通过python写一些脚本实现数据处理功能确实很方便,先记录当下,后续补充完善
1.读取文件夹所有数据并进行随机采样注:主要调用os库的文件操作功能,想要掌握程度就自己实现一个小功能就很快了解其基本原理了,代码书写时需要注意上下对齐与空格,不然会报错,再就是对于中文路径读出和写入时的编码小心,其他就是字符串的操作了,具体实现如下:
# -*- coding:UTF-8 -*-import osimport randomdirname = "...folder"txt_path='F:/code/Pycham/list.txt'txt_file = open(txt_path,'w')imglist=[]def traverse_path(file_path): files = os.listdir(file_path) for fi in files: fi_d = os.path.join(file_path, fi) if os.path.isdir(fi_d): traverse_path(fi_d) else: img_name=os.path.join(file_path, fi_d) #print(img_name) if img_name[-4:]=='tiff': #print(img_name) txt_file.write(img_name) txt_file.write('n') # for i in len(lists): # print(lists(i))#print("读取图像数量:",len(imglist))traverse_path(dirname)#随机抽取样本数目sample_file='F:codePychamsample.txt'n=1000f=open(txt_path,encoding='gbk')lines=f.readlines()list=[]for line in lines: print(line) list.append(line)print("样本总个数:",len(list)," 抽样个数:",n)g=open(sample_file,'w')a=random.sample(list,n)#随机抽取n行for i in a: g.write(i)f.close()g.close()