import os
import xml.etree.ElementTree as ET
from termcolor import colored
voc_labels_dir = "datasets/AI/train/images" # Path to your VOC XML label files
yolo_labels_dir = "datasets/AI/train/labels" # Path to save YOLO format label files
image_dir = "datasets/AI/train/images" # Path to your images
os.makedirs(yolo_labels_dir, exist_ok=True)
def convert_bbox(size, box):
dw = 1.0 / size[0]
dh = 1.0 / size[1]
x = (box[0] + box[1]) / 2.0
y = (box[2] + box[3]) / 2.0
w = box[1] - box[0]
h = box[3] - box[2]
return (x * dw, y * dh, w * dw, h * dh)
class_mapping = {
"YellowSample": 0,
"BlueSample": 1,
"RedSample": 2
}
yellow_count = 0
blue_count = 0
red_count = 0
total_labeled_images = 0
duplicates = 0
lastFile = ""
hy = 0
hb = 0
hr = 0
maxl = 0
b_differece = 0
r_differece = 0
y_differece = 0
for file in os.listdir(voc_labels_dir):
if file.endswith(".xml"):
xml_path = os.path.join(voc_labels_dir, file)
tree = ET.parse(xml_path)
root = tree.getroot()
yolo_path = os.path.join(yolo_labels_dir, file.replace(".xml", ".txt"))
with open(yolo_path, "w") as f:
for obj in root.findall("object"):
class_name = obj.find("name").text
if class_name in class_mapping:
class_id = class_mapping[class_name]
if class_id == 0:
yellow_count += 1
elif class_id == 1:
blue_count += 1
elif class_id == 2:
red_count += 1
total_labeled_images += 1
if lastFile == file:
duplicates += 1
else:
print(f"Warning: Unknown class '{class_name}' in {file}")
lastFile = file
if yellow_count > blue_count and yellow_count > red_count:
maxl = yellow_count
elif blue_count > yellow_count and blue_count > red_count:
maxl = blue_count
elif red_count > yellow_count and red_count > blue_count:
maxl = red_count
if maxl == yellow_count:
b_differece = maxl - blue_count
r_differece = maxl - red_count
elif maxl == blue_count:
y_differece = maxl - yellow_count
r_differece = maxl - red_count
elif maxl == red_count:
y_differece = maxl - yellow_count
b_differece = maxl - blue_count
if maxl * 0.05 < b_differece or maxl * 0.05 < r_differece or maxl * 0.05 < y_differece:
print(colored("Warning: There is a difference of more than 5% between the classes.", "red"))
print(colored("Please check the labeled images and make sure that the classes are balanced.", "red"))
else:
print(colored("Classes are balanced.", "green"))
print("")
print("Total labeled images: " + colored(str(total_labeled_images-duplicates), "green"))
print(colored("Yellow samples: ", "yellow") + colored(str(yellow_count), "green") + " | [Difference]: " + colored(str(y_differece), "red"))
print(colored("Blue samples: ", "blue") + colored(str(blue_count), "green") + " | [Difference]: " + colored(str(b_differece), "red"))
print(colored("Red samples: ", "red") + colored(str(red_count), "green") + " | [Difference]: " + colored(str(r_differece), "red"))
Training Dataset