解压VOCdevkit_mask.zip数据集使用PASCAL VOC数据集的目录结构:建立文件夹层次为 VOCdevkit / VOC2007
VOC2007下面建立两个文件夹:Annotations和JPEGImages , JPEGImages放所有的训练和测试图片;Annotations放所有的xml标记文件
3)划分数据集:生成训练集和验证集
编写脚本
import xml.etree.ElementTree as ETimport pickleimport osfrom os import listdir, getcwdfrom os.path import joinimport randomfrom shutil import copyfileclasses=["nomask","mask"]TRAIN_RATIO = 80def clear_hidden_files(path): dir_list = os.listdir(path) for i in dir_list: abspath = os.path.join(os.path.abspath(path), i) if os.path.isfile(abspath): if i.startswith("._"): os.remove(abspath) else: clear_hidden_files(abspath)def convert(size, box): dw = 1./size[0] dh = 1./size[1] x = (box[0] + box[1])/2.0 y = (box[2] + box[3])/2.0 w = box[1] - box[0] h = box[3] - box[2] x = x*dw w = w*dw y = y*dh h = h*dh return (x,y,w,h)def convert_annotation(image_id): in_file = open('VOCdevkit/VOC2007/Annotations/%s.xml' %image_id) out_file = open('VOCdevkit/VOC2007/YOLOLabels/%s.txt' %image_id, 'w') tree=ET.parse(in_file) root = tree.getroot() size = root.find('size') w = int(size.find('width').text) h = int(size.find('height').text) for obj in root.iter('object'): difficult = obj.find('difficult').text cls = obj.find('name').text if cls not in classes or int(difficult) == 1: continue cls_id = classes.index(cls) xmlbox = obj.find('bndbox') b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) bb = convert((w,h), b) out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + 'n') in_file.close() out_file.close()wd = os.getcwd()wd = os.getcwd()data_base_dir = os.path.join(wd, "VOCdevkit/")if not os.path.isdir(data_base_dir): os.mkdir(data_base_dir)work_sapce_dir = os.path.join(data_base_dir, "VOC2007/")if not os.path.isdir(work_sapce_dir): os.mkdir(work_sapce_dir)annotation_dir = os.path.join(work_sapce_dir, "Annotations/")if not os.path.isdir(annotation_dir): os.mkdir(annotation_dir)clear_hidden_files(annotation_dir)image_dir = os.path.join(work_sapce_dir, "JPEGImages/")if not os.path.isdir(image_dir): os.mkdir(image_dir)clear_hidden_files(image_dir)yolo_labels_dir = os.path.join(work_sapce_dir, "YOLOLabels/")if not os.path.isdir(yolo_labels_dir): os.mkdir(yolo_labels_dir)clear_hidden_files(yolo_labels_dir)yolov5_images_dir = os.path.join(data_base_dir, "images/")if not os.path.isdir(yolov5_images_dir): os.mkdir(yolov5_images_dir)clear_hidden_files(yolov5_images_dir)yolov5_labels_dir = os.path.join(data_base_dir, "labels/")if not os.path.isdir(yolov5_labels_dir): os.mkdir(yolov5_labels_dir)clear_hidden_files(yolov5_labels_dir)yolov5_images_train_dir = os.path.join(yolov5_images_dir, "train/")if not os.path.isdir(yolov5_images_train_dir): os.mkdir(yolov5_images_train_dir)clear_hidden_files(yolov5_images_train_dir)yolov5_images_test_dir = os.path.join(yolov5_images_dir, "val/")if not os.path.isdir(yolov5_images_test_dir): os.mkdir(yolov5_images_test_dir)clear_hidden_files(yolov5_images_test_dir)yolov5_labels_train_dir = os.path.join(yolov5_labels_dir, "train/")if not os.path.isdir(yolov5_labels_train_dir): os.mkdir(yolov5_labels_train_dir)clear_hidden_files(yolov5_labels_train_dir)yolov5_labels_test_dir = os.path.join(yolov5_labels_dir, "val/")if not os.path.isdir(yolov5_labels_test_dir): os.mkdir(yolov5_labels_test_dir)clear_hidden_files(yolov5_labels_test_dir)train_file = open(os.path.join(wd, "yolov5_train.txt"), 'w')test_file = open(os.path.join(wd, "yolov5_val.txt"), 'w')train_file.close()test_file.close()train_file = open(os.path.join(wd, "yolov5_train.txt"), 'a')test_file = open(os.path.join(wd, "yolov5_val.txt"), 'a')list_imgs = os.listdir(image_dir) # list image filesprobo = random.randint(1, 100)print("Probobility: %d" % probo)for i in range(0,len(list_imgs)): path = os.path.join(image_dir,list_imgs[i]) if os.path.isfile(path): image_path = image_dir + list_imgs[i] voc_path = list_imgs[i] (nameWithoutExtention, extention) = os.path.splitext(os.path.basename(image_path)) (voc_nameWithoutExtention, voc_extention) = os.path.splitext(os.path.basename(voc_path)) annotation_name = nameWithoutExtention + '.xml' annotation_path = os.path.join(annotation_dir, annotation_name) label_name = nameWithoutExtention + '.txt' label_path = os.path.join(yolo_labels_dir, label_name) prob = random.randint(1, 100) print("Probability: %d" % prob) if(prob < TRAIN_RATIO): # train dataset if os.path.exists(annotation_path): train_file.write(image_path + 'n') convert_annotation(nameWithoutExtention) # convert label copyfile(image_path, yolov5_images_train_dir + voc_path) copyfile(label_path, yolov5_labels_train_dir + label_name) else: # test dataset if os.path.exists(annotation_path): test_file.write(image_path + 'n') convert_annotation(nameWithoutExtention) # convert label copyfile(image_path, yolov5_images_test_dir + voc_path) copyfile(label_path, yolov5_labels_test_dir + label_name)train_file.close()test_file.close()
执行python脚本:
python prepare_data.py
注意:classes=[“nomask”,“mask”]要根据自己的数据集类别做相应的修改在VOCdevkit / VOC2007目录下可以看到生成了文件夹YOLOLabelsYOLOLabels下的文件是images文件夹下每一个图像的yolo格式的标注文件,
这是由annotations的xml标注文件转换来的;在VOCdevkit目录下生成了images和labels文件夹
images文件夹下有train和val文件夹,分别放置训练集和测试集图片;
labels文件夹有train和val文件夹,分别放置训练集和测试集标签(yolo格式)在yolov5下生成了两个文件yolov5_train.txt和yolov5_val.txt。
yolov5_train.txt和yolov5_val.txt分别给出了训练图片文件和测试图片文件的列表,
含有每个图片的路径和文件名。 修改配置文件
(1) 新建文件data/voc-mask.yaml
可以复制data/voc.yaml,再重新命名为data/voc-mask.yaml
然后修改配置参数
# YOLOv5 by Ultralytics, GPL-3.0 license# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC by University of Oxford# Example usage: python train.py --data voc-mask.yaml# parent# ├── yolov5# └── datasets# └── VOC ← downloads here# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]path: ./VOCdevkittrain: # train images (relative to 'path') 16551 images - images/trainval: # val images (relative to 'path') 4952 images - images/valtest: # test images (optional)# Classesnc: 2 # number of classesnames: ['nomask', 'mask'] # class names
(2) 新建文件models/yolov5s-mask.yaml
可以复制models/yolov5s.yaml,再重新命名为models/yolov5s-mask.yaml
然后修改配置参数
# parametersnc: 2 # number of classes
训练数据集
python train.py --data data/voc-mask.yaml --cfg models/yolov5s-mask.yaml --weights weights/yolov5s.pt --epochs 50