Posting my function in case someone finds it useful after exporting from Label Studio (AutoML export format is currently not supported) and wants to use it with TFlite Model Maker.
def convert_coco_json_to_csv(filename, labels):
import json, random
s = json.load(open(filename, 'r'))
# Remember image paths by id
images = {}
for im in s['images']:
images[im['id']] = {
'path': im['file_name'].split('/')[-1], # Split likely not required in most cases
'width': im['width'],
'height': im['height']
}
images = list(images.items()) # Cannot shuffle a dictionary
random.shuffle(images)
images = dict(images)
nr_of_annotations = len(s['annotations']) - 1
# Write to Google Cloud AutoML format .csv
out_file = filename[:-5] + '.csv'
out = open(out_file, 'w')
# set,path,label,x_min,y_min,,,x_max,y_max,,
out.write('set,path,label,x_min,y_min,,,x_max,y_max,,\n')
for i, ann in enumerate(s['annotations']):
x_min = ann['bbox'][0] / images[ann['image_id']]['width']
x_max = (ann['bbox'][0] + ann['bbox'][2]) / images[ann['image_id']]['width']
y_min = ann['bbox'][1] / images[ann['image_id']]['height']
y_max = (ann['bbox'][1] + ann['bbox'][3]) / images[ann['image_id']]['height']
# Split images into train, validation and test sets by 75%, 20% and 5% respectively
percentage = i / nr_of_annotations * 100
if percentage < 75:
img_set = 'TRAIN'
elif percentage < 95:
img_set = 'VALIDATION'
else:
img_set = 'TEST'
path = images[ann['image_id']]['path']
label = labels[int(ann['category_id'])]
out.write('{},{},{},{},{},{},{},{},{},{},{}\n'.format(img_set, path, label, x_min, y_min, '', '', x_max, y_max, '', ''))
out.close()