计算机视觉 opencv 目标检测数据集格式转换：将labelme格式转为YOLO以及VOC格式

文章目录

前言第一步：将图片和标签分为两个单独的文件夹第二步：将jpeg、png等格式都改为jpg格式第三步：重命名图片和标签第四步：修改json中的imagePath第五步：将labelme格式转为YOLO格式第六步：将YOLO格式转为xml格式第七步：可视化最终结果

前言

一个目标检测项目需要自己找图片标注数据进行训练，训练需要YOLO格式，但数据增广需要VOC格式，该文记录如何将labelme标注的数据格式转为YOLO格式，再从YOLO格式转为VOC格式，只作为自己用的记录，如果你刚好也需要这么干，或者需要文中提到的某一种转换，也可以参考一下。文中有些代码是参考其他地方的，时间长已经记不清了，如有侵权请联系更改。注意：路径不要有中文，标签也用相应的英文

第一步：将图片和标签分为两个单独的文件夹

手动完成即可，标签的文件夹最好加个json后缀，因为后面会有其他格式的标签文件。

第二步：将jpeg、png等格式都改为jpg格式

因为搜集的图片什么格式都有，为了方便训练，统一为jpg格式。

代码如下：

# trans_others_to_jpg.py

import os

import cv2 as cv

image_path = 'D:/DeskTop/Datasets/clothes/images/' #设置图片读取路径

save_path = 'D:/DeskTop/Datasets/clothes/images_jpg/' #设置图片保存路径，新建文件夹，不然其他格式会依然存在

if not os.path.exists(save_path): #判断路径是否正确，并打开

os.makedirs(save_path)

image_file = os.listdir(image_path)

# print(image_file)

for image in image_file:

# print(image)

if image.split('.')[-1] in ['bmp', 'jpg', 'jpeg', 'png', 'JPG', 'PNG']:

str = image.rsplit(".", 1) #从右侧判断是否有符号“.”，并对image的名称做一次分割。如112345.jpeg分割后的str为["112345","jpeg"]

# print(str)

output_img_name = str[0] + ".jpg" #取列表中的第一个字符串与“.jpg”放在一起。

# print(output_img_name)

dir = os.path.join(image_path, image)

# print("dir:",dir)

src = cv.imread(dir)

# print(src)

cv.imwrite(save_path + output_img_name, src)

print('FINISHED')

第三步：重命名图片和标签

将文件和对应的标签重命名为从六位数的名字，从000001开始，注意：图片和标签都需要进行重命名

代码如下：

# rename.py

import os

path = "D:/DeskTop/Datasets/clothes/label_json/" # json标签文件的保存路径

filelist = os.listdir(path)

count=1

for file in filelist:

print(file)

for file in filelist:

Olddir=os.path.join(path,file)

if os.path.isdir(Olddir):

continue

filename=os.path.splitext(file)[0]

filetype=os.path.splitext(file)[1]

Newdir=os.path.join(path,str(count).zfill(6)+filetype) # zfill(6):表示命名为6位数

os.rename(Olddir,Newdir)

count+=1

第四步：修改json中的imagePath

因为上一步只改变了名字，标签内的imagePath并没有跟着变，所以还要改一下，和图片对应起来，其实这一步不做也没事，因为YOLO格式就是根据标签文件名读取图片路径的，为了以后可能需要json的标签，还是改一下最好。

代码如下：

# change_json_imagePath.py

import json

import os

import re

path = 'D:/DeskTop/Datasets/clothes/label_json/' # json文件路径

dirs = os.listdir(path)

num_flag = 0

for file in dirs: # 循环读取路径下的文件并筛选输出

if os.path.splitext(file)[1] == ".json": # 筛选Json文件

num_flag = num_flag + 1

print("path = ", file) # 此处file为json文件名，之前修改为与图片jpg同名

# print(os.path.join(path,file))

with open(os.path.join(path, file), 'r') as load_f: # 若有中文，可将r改为rb

load_dict = json.load(load_f) # 用json.load()函数读取文件句柄，可以直接读取到这个文件中的所有内容，并且读取的结果返回为python的dict对象

n = len(load_dict) # 获取字典load_dict中list值

print('n = ', n)

print("imagePath = ", load_dict['imagePath']) # 此处因为我的json文件要修改的imagePath，没有那么多弯弯绕，直接在顶层，所以一层[]即可，如果你们的不是这种结构，需自行修改

filename = file[:-5] # 去掉拓展名5位 .json

print("filename = ", filename)

load_dict['imagePath'] = filename + '.jpg' # 存到当前路径下，如果有其它存储要求，自行修改即可

print("new imagePath = ", load_dict['imagePath'])

with open(os.path.join(path, file), 'w') as dump_f:

json.dump(load_dict, dump_f)

if (num_flag == 0):

print('所选文件夹不存在json文件，请重新确认要选择的文件夹')

else:

print('共{}个json文件'.format(num_flag))

第五步：将labelme格式转为YOLO格式

将labelme的json格式转为YOLO的txt格式，同样保存txt标签的文件夹最好也加个后缀，方便和json区分，注意把代码第12行改为自己数据集的类别，从0开始

代码如下：

# trans_labelme_to_yolo.py

import cv2

import os

import json

import shutil

import numpy as np

from pathlib import Path

from glob import glob

id2cls = {0: 'clothing'}

cls2id = {'clothing': 0}

#支持中文路径

def cv_imread(filePath):

cv_img=cv2.imdecode(np.fromfile(filePath,dtype=np.uint8),flags=cv2.IMREAD_COLOR)

return cv_img

def labelme2yolo_single(img_path,label_file):

anno= json.load(open(label_file, "r", encoding="utf-8"))

shapes = anno['shapes']

w0, h0 = anno['imageWidth'], anno['imageHeight']

image_path = os.path.basename(img_path + anno['imagePath'])

labels = []

for s in shapes:

pts = s['points']

x1, y1 = pts[0]

x2, y2 = pts[1]

x = (x1 + x2) / 2 / w0

y = (y1 + y2) / 2 / h0

w = abs(x2 - x1) / w0

h = abs(y2 - y1) / h0

cid = cls2id[s['label']]

labels.append([cid, x, y, w, h])

return np.array(labels), image_path

def labelme2yolo(img_path,labelme_label_dir, save_dir='res/'):

labelme_label_dir = str(Path(labelme_label_dir)) + '/'

save_dir = str(Path(save_dir))

yolo_label_dir = save_dir + '/'

""" yolo_image_dir = save_dir + 'images/'

if not os.path.exists(yolo_image_dir):

os.makedirs(yolo_image_dir) """

if not os.path.exists(yolo_label_dir):

os.makedirs(yolo_label_dir)

json_files = glob(labelme_label_dir + '*.json')

for ijf, jf in enumerate(json_files):

print(ijf+1, '/', len(json_files), jf)

filename = os.path.basename(jf).rsplit('.', 1)[0]

labels, image_path = labelme2yolo_single(img_path,jf)

if len(labels) > 0:

np.savetxt(yolo_label_dir + filename + '.txt', labels)

# shutil.copy(labelme_label_dir + image_path, yolo_image_dir + image_path)

print('Completed!')

if __name__ == '__main__':

img_path = 'D:/DeskTop/Datasets/clothes/images/' # 数据集图片的路径

json_dir = 'D:/DeskTop/Datasets/clothes/label_json/' # json标签的路径

save_dir = 'D:/DeskTop/Datasets/clothes/label_txt/' # 保存的txt标签的路径

labelme2yolo(img_path,json_dir, save_dir)

第六步：将YOLO格式转为xml格式

因为数据增广需要xml格式，所以再进行一次转换，注意把代码第十四行改为自己数据集的类别

代码如下：

# trans_YOLOtxt_to_VOCxml.py

import xml.dom.minidom

import glob

from PIL import Image

from math import ceil

import shutil

import os

yolo_file = 'D:/DeskTop/Datasets/clothes/label_txt2/'# yolo格式下的存放txt标注文件的文件夹

turn_xml_file = 'D:/DeskTop/Datasets/clothes/label_xml/'# 转换后储存xml的文件夹地址

img_file = 'D:/DeskTop/Datasets/clothes/images/'# 存放图片的文件夹

labels = ['clothes'] #这里要改为自己的类别

src_img_dir = img_file

src_txt_dir = yolo_file

src_xml_dir = turn_xml_file #转换后储存xml的文件夹地址

img_Lists = glob.glob(src_img_dir + '/*.jpg')

img_basenames = []

for item in img_Lists:

img_basenames.append(os.path.basename(item))#os.path.basename返回path最后的文件名

img_names = []

for item in img_basenames:

temp1, temp2 = os.path.splitext(item) #os.path.splitext(“文件路径”) 分离文件名与扩展名

img_names.append(temp1)

total_num = len(img_names) #统计当前总共要转换的图片标注数量

count = 0 #技术变量

for img in img_names: #这里的img是不加后缀的图片名称，如：'GF3_SAY_FSI_002732_E122.3_N29.9_20170215_L1A_HH_L10002188179__1__4320___10368'

count +=1

if count % 1000 == 0:

print("当前转换进度{}/{}".format(count,total_num))

im = Image.open((src_img_dir + img + '.jpg'))

width, height = im.size

#打开yolo格式下的txt文件

gt = open(src_txt_dir + img + '.txt').read().splitlines()

if gt:

# 将主干部分写入xml文件中

xml_file = src_xml_dir + img + '.xml'

xml_file = open((src_xml_dir + img + '.xml'), 'w')

xml_file.write('\n')

xml_file.write(' VOC2007\n')

xml_file.write(' ' + str(img) + '.jpg' + '\n')

xml_file.write(' \n')

xml_file.write(' ' + str(width) + '\n')

xml_file.write(' ' + str(height) + '\n')

xml_file.write(' 3\n')

xml_file.write(' \n')

# write the region of image on xml file

for img_each_label in gt:

spt = img_each_label.split(' ') # 这里如果txt里面是以逗号‘，’隔开的，那么就改为spt = img_each_label.split(',')。

xml_file.write(' \n')

xml_file.write('')

else:

# 将主干部分写入xml文件中

xml_file = open((src_xml_dir + '/' + img + '.xml'), 'w')

xml_file.write('\n')

xml_file.write(' VOC2007\n')

xml_file.write(' ' + str(img) + '.jpg' + '\n')

xml_file.write(' \n')

xml_file.write(' ' + str(width) + '\n')

xml_file.write(' ' + str(height) + '\n')

xml_file.write(' 3\n')

xml_file.write(' \n')

xml_file.write('')

第七步：可视化

验证标签转换后知否正确，用xml标签进行可视化，多测试几张图片，找一些目标多的图片验证标签的正确性

代码如下：

# visualization_xml_OD.py

from lxml import etree

import cv2 as cv

import matplotlib.pyplot as plt

from copy import deepcopy

import numpy as np

def parse_xml_to_dict(xml):

"""

将xml文件解析成字典形式，参考tensorflow的recursive_parse_xml_to_dict

Args：

xml: xml tree obtained by parsing XML file contents using lxml.etree

Returns:

Python dictionary holding XML contents.

"""

if len(xml) == 0: # 遍历到底层，直接返回tag对应的信息

return {xml.tag: xml.text}

result = {}

for child in xml:

child_result = parse_xml_to_dict(child) # 递归遍历标签信息

if child.tag != 'object':

result[child.tag] = child_result[child.tag]

else:

if child.tag not in result: # 因为object可能有多个，所以需要放入列表里

result[child.tag] = []

result[child.tag].append(child_result[child.tag])

return {xml.tag: result}

def get_xml_info(xml_path):

with open(xml_path) as fid:

xml_str = fid.read()

xml = etree.fromstring(xml_str)

data = parse_xml_to_dict(xml)["annotation"]

bboxes = []

for index, obj in enumerate(data["object"]):

# 获取每个object的box信息

xmin = int(obj["bndbox"]["xmin"])

xmax = int(obj["bndbox"]["xmax"])

ymin = int(obj["bndbox"]["ymin"])

ymax = int(obj["bndbox"]["ymax"])

# bbox = np.array([xmin, ymin, xmax, ymax])

bbox = [xmin, ymin, xmax, ymax]

bboxes.append(bbox)

return bboxes

img_path = "D:/DeskTop/Datasets/clothes/images/000056.jpg" # 需要可是化的图片

xml_path = "D:/DeskTop/Datasets/clothes/label_xml/000056.xml" # 图片对应的标签

img = cv.imread(img_path)

bboxes = np.array(get_xml_info(xml_path))

for box in bboxes:

pt1 = (box[0], box[1])

pt2 = (box[2], box[3])

cv.rectangle(img, pt1, pt2, (0, 0, 255), 4)

plt.figure(1)

plt.imshow(img[:, :, ::-1], cmap='gray')

plt.show()

最终结果

最终的处理结果包含四个文件夹，数据集图片以及三种类型的标签

至此，从labelme格式转为YOLO和VOC格式的任务就完成了。

下面是将txt标签中的科学计数法表示转为float的代码，有需要的或是强迫症患者可以参考一下。代码如下：先将txt中的’+‘替换为’-’

# change_txt_'+'_to_'-'.py

import os

def trans(input_dir, output_dir, word, splitword):

for root, dirs, files in os.walk(input_dir):

for item in files:

if os.path.splitext(item)[1] == ".txt":

f = open(input_dir+item, "r", encoding='UTF-8')

content = f.read()

content = content.replace(word, splitword)

with open(os.path.join(output_dir, item), 'w', encoding='UTF-8') as fval:

fval.write(content)

f.close()

if __name__ == '__main__':

# 老文件夹

input_dir = "D:\DeskTop\Datasets\clothes\label_txt/"

# 新文件夹

output_dir = "D:\DeskTop\Datasets\clothes\label_txt/"

# 要删除的字符

word='+'

# 要替换成的字符

splitword = "-"

trans(input_dir, output_dir, word, splitword)

再将科学计数法转为float

# !usr/bin env python

# -*- coding: utf-8 -*-

import re

import math

import os

def ConvertELogStrToValue(eLogStr):

"""

convert string of natural logarithm base of E to value

return (convertOK, convertedValue)

eg:

input: -1.1694737e-03

output: -0.001169

input: 8.9455025e-04

output: 0.000895

"""

(convertOK, convertedValue) = (False, 0.0)

foundEPower = re.search("(?P-?\d+\.\d+)e(?P-\d+)", eLogStr, re.I)

#print "foundEPower=",foundEPower

if(foundEPower):

coefficientPart = foundEPower.group("coefficientPart")

ePowerPart = foundEPower.group("ePowerPart")

#print "coefficientPart=%s,ePower=%s"%(coefficientPart, ePower)

coefficientValue = float(coefficientPart)

ePowerValue = float(ePowerPart)

#print "coefficientValue=%f,ePowerValue=%f"%(coefficientValue, ePowerValue)

#math.e= 2.71828182846

# wholeOrigValue = coefficientValue * math.pow(math.e, ePowerValue)

wholeOrigValue = coefficientValue * math.pow(10, ePowerValue)

#print "wholeOrigValue=",wholeOrigValue;

(convertOK, convertedValue) = (True, wholeOrigValue)

else:

(convertOK, convertedValue) = (False, 0.0)

return (convertOK, convertedValue)

def parseIntEValue(intEValuesStr):

# print "intEValuesStr=", intEValuesStr

intEStrList = re.findall("-?\d+\.\d+e-\d+", intEValuesStr)

# intEStrList = intEValuesStr.split(' ')

# print "intEStrList=", intEStrList

for eachIntEStr in intEStrList:

# intValue = int(eachIntEStr)

# print "intValue=",intValue

(convertOK, convertedValue) = ConvertELogStrToValue(eachIntEStr)

#print "convertOK=%s,convertedValue=%f"%(convertOK, convertedValue)

print("eachIntEStr=%s,\tconvertedValue=%f" % (eachIntEStr, convertedValue))

trans(txt_path,txt_path,eachIntEStr,convertedValue)

def trans(input_dir, output_dir, word, splitword):

for root, dirs, files in os.walk(input_dir):

for item in files:

if os.path.splitext(item)[1] == ".txt":

f = open(input_dir+item, "r", encoding='UTF-8')

content = f.read()

content = content.replace(str(word), str(splitword))

with open(os.path.join(output_dir, item), 'w', encoding='UTF-8') as fval:

fval.write(content)

f.close()

# intEValuesStr= 2.1690427e-005 -1.1694737e-003 -6.1193734e-004

# 8.9455025e-004 -8.6277081e-004 -7.2735757e-004

# intEStrList= ['2.1690427e-005', '-1.1694737e-003', '-6.1193734e-004', '8.9455025e-004', '-8.6277081e-004', '-7.2735757e-004']

# eachIntEStr=2.1690427e-005, convertedValue=0.014615

# eachIntEStr=-1.1694737e-003, convertedValue=-0.058225

# eachIntEStr=-6.1193734e-004, convertedValue=-0.112080

# eachIntEStr=8.9455025e-004, convertedValue=0.163843

# eachIntEStr=-8.6277081e-004, convertedValue=-0.158022

# eachIntEStr=-7.2735757e-004, convertedValue=-0.133220

if __name__ == "__main__":

txt_path = "D:\DeskTop\Datasets\clothes\label_txt/"

output_dir = "D:\DeskTop\Datasets\clothes\label_txt/"

# data_path = "D:/DeskTop/000001.txt"

for root, dirs, files in os.walk(txt_path):

for item in files:

if os.path.splitext(item)[1] == ".txt":

with open(txt_path + item, 'r') as f:

for line in f.readlines():

linestr = line.strip()

# print linestr

parseIntEValue(linestr)

推荐链接

评论可见，请评论后查看内容，谢谢！！！

您阅读本篇文章共花了：

金钥匙

计算机视觉 opencv 目标检测数据集格式转换：将labelme格式转为YOLO以及VOC格式

人工智能计算机视觉（HOTA）多目标跟踪MOT指标计算方法

golang grpc中间件之链路追踪（otel+jaeger）

发表评论取消回复

金钥匙

计算机视觉 opencv 目标检测数据集格式转换：将labelme格式转为YOLO以及VOC格式

人工智能 计算机视觉 （HOTA）多目标跟踪MOT指标计算方法

golang grpc中间件之链路追踪（otel+jaeger）

相关文章

发表评论取消回复

人工智能计算机视觉（HOTA）多目标跟踪MOT指标计算方法