SCUT-HEAD-Dataset-Release Incorrect VOC format

There are some issues with the XML files, e.g. the image filename/path being incorrect, and I seem to recall a colleague noticing that the image sizes in the XML didn't always match the actual image sizes (and were sometimes 0 maybe?). I've also noticed some of the bounding boxes are invalid (i.e. have xmin==xmax, and ymin==ymax).

Would it be possible to fix these?

Oct 25 '18 02:10 kodonnell

Hi, I have encountered this issue as well and have made a quick fix for this two problems:

Image size = (0,0) (read actual image size from image) I have not noticed any wrong image sizes.
BBox size invalid (simply skip invalid BBoxes)

My script is to generate labels from VOC annotations into Yolonet readable format. I have checked the correctness of the boxes by looking through the first 500 images of each set, inclusive of problematic ones.


import xml.etree.ElementTree as ET
import os
from os import listdir, getcwd
from os.path import join
from cv2 import imread

sets=[('SCUT_HEAD_Part_A', 'train'), ('SCUT_HEAD_Part_A', 'val'), ('SCUT_HEAD_Part_A', 'test'),
      ('SCUT_HEAD_Part_B', 'train'), ('SCUT_HEAD_Part_B', 'val'), ('SCUT_HEAD_Part_B', 'test')]

classes = ["person"]


def convert(size, box):
    dw = 1./size[0]
    dh = 1./size[1]

    x = (box[0] + box[1])/2.0
    y = (box[2] + box[3])/2.0
    w = box[1] - box[0]
    h = box[3] - box[2]

    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x, y, w, h)



def convert_annotation(year, image_id):
    in_file = open('%s/Annotations/%s.xml'%(year, image_id), 'r', encoding='utf-8')
    out_file = open('%s/labels/%s.txt'%(year, image_id), 'w')
    tree=ET.parse(in_file)
    root = tree.getroot()
    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    try:
        assert w > 0 and h > 0

    except AssertionError:
        print("WARN: (0,0) size: %s.jpg" % image_id)
        im = imread('%s/%s/JPEGImages/%s.jpg' % (wd, year, image_id))
        h,w = im.shape[:2]

    for obj in root.iter('object'):
        difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult) == 1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))

        try:
            ww = b[1] - b[0]
            hh = b[3] - b[2]
            assert ww > 0 and hh > 0
            bb = convert((w, h), b)
            out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')

        except AssertionError:
            print("WARN: BBox size cannot be 0, skipping BBox in", image_id)


wd = getcwd()

for year, image_set in sets:
    if not os.path.exists('%s/labels/'%(year)):
        os.makedirs('%s/labels/'%(year))
    image_ids = open('%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
    list_file = open('%s_%s.txt'%(year, image_set), 'w')
    for image_id in image_ids:
        print(image_id)
        list_file.write('%s/%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))
        convert_annotation(year, image_id)
    list_file.close()

If you want to visualize and manually check through the images with BBoxes, run this script.

import os
import glob
from os import listdir, getcwd
from os.path import join
import cv2

sets = ['A', 'B']
classes = ["person"]


def deconvert(size, box):
    dw = size[1]
    dh = size[0]
    x_cen = box[0]*dw
    y_cen = box[1]*dh

    w = int(box[2]*dw)
    h = int(box[3]*dh)
    x = int(x_cen - w/2)
    y = int(y_cen - h/2)

    return x, y, w, h


wd = getcwd()
for image_set in sets:
    label_len = len(glob.glob("SCUT_HEAD_Part_%s/labels/*.txt" % image_set))
    for index in range(label_len):
        print("Part%s_%05d"% (image_set, index))
        image_bboxes = open("SCUT_HEAD_Part_%s/labels/Part%s_%05d.txt" % (image_set, image_set, index), 'r').read().strip().split('\n')
        image_bboxes = [list(map(float, i.split())) for i in image_bboxes]

        im = cv2.imread('SCUT_HEAD_Part_%s/JPEGImages/Part%s_%05d.jpg' % (image_set, image_set, index))
        im_size = im.shape[:2]
        for box in image_bboxes:
            if len(box) != 0:
                x, y, w, h = deconvert(im_size, box[1:])
                cv2.rectangle(im, (x, y), (x + w, y + h), (0, 255, 255), 2)

        cv2.imshow('frame', im)
        k = cv2.waitKey(0)
        if k ==32:
            continue
        else:
            break

cv2.destroyAllWindows()

Oct 26 '18 03:10 limwenyao

could it be that for the case of bad bbox (xmin = xmax or ymin=ymax) it's because the head is so small, that it is only marked with a singular dot? have any of you tried to visualize what the bad bbox actually is?

Jun 18 '19 23:06 usamahjundia