0

Ive this code which updates a list of dictionary with coordinates of images from a json file. The format I need is : filename, transcription (label name), Coordinates :-

Image_123.jpg   [{"transcription": "HELLO", "points": [[310, 104], [416, 141], [418, 216], [312, 179]]}, {"transcription": "CHARLIE", "points": [[223, 293], [313, 288], [313, 311], [222, 316]]}]

Heres my code, but the problem is that transcription value is getting updated globally for the entire dictionary. Thus Im getting same value of transcription stored in all the dictionaries. I think this line dic["transcription"]=clas is changing all the transcription values with the last value for a particular file. How to stop this global update ? If you see carefully, this isnt happening with the points value in the dictionary. So Im amazed as to why only transcription is getting updated globally.

title = ['filename','width', 'height',  'class', 'xmin', 'ymin', 'xmax','ymax']
ctr=1
df_ = pd.DataFrame(columns = title) 

for i in data['_via_img_metadata']:

    clas=[]
    fil=data['_via_img_metadata'][i]['filename']
    dic={"transcription": None, "points":None }
    lst=[]
    for k in (data['_via_img_metadata'][i]['regions']):
        shp=k['shape_attributes']
        xmin= shp['x']
        ymin= shp['y']
        xmax= shp['width']
        ymax= shp['height']
        clas=None
        try:
            num=k['region_attributes']['num']
        except:
            continue
                   
        try:
            if int(num)==9:
                #clas=(k['region_attributes']['Digit'])
                clas='9'
        except:
            pass

        dic["transcription"]=clas 
        print (clas)
        points=[[xmin, ymin], [xmin+xmax, ymin],  [xmin+xmax, ymin-ymax], [xmin, ymin-ymax]]
        dic["points"]=points
        lst.append(dic)
        print (lst)

        ctr+=1
    
    print(lst)

=====OUTPUT======

[{'transcription': '0', 'points': [[367, 590], [402, 590], [402, 533], [367, 533]]}, {'transcription': '0', 'points': [[367, 590], [402, 590], [402, 533], [367, 533]]}, {'transcription': '0', 'points': [[367, 590], [402, 590], [402, 533], [367, 533]]}, {'transcription': '0', 'points': [[367, 590], [402, 590], [402, 533], [367, 533]]}, {'transcription': '0', 'points': [[367, 590], [402, 590], [402, 533], [367, 533]]}]

====DESIRED OUTPUT====

[{'transcription': '4', 'points': [[367, 590], [402, 590], [402, 533], [367, 533]]}, {'transcription': '3', 'points': [[367, 590], [402, 590], [402, 533], [367, 533]]}, {'transcription': '1', 'points': [[367, 590], [402, 590], [402, 533], [367, 533]]}, {'transcription': '0', 'points': [[367, 590], [402, 590], [402, 533], [367, 533]]}, {'transcription': '8', 'points': [[367, 590], [402, 590], [402, 533], [367, 533]]}]

CONTENT OF data file (json)

{"_via_settings":{"ui":{"annotation_editor_height":25,"annotation_editor_fontsize":0.8,"leftsidebar_width":18,"image_grid":{"img_height":80,"rshape_fill":"none","rshape_fill_opacity":0.3,"rshape_stroke":"yellow","rshape_stroke_width":2,"show_region_shape":true,"show_image_policy":"all"},"image":{"region_label":"__via_region_id__","region_color":"__via_default_region_color__","region_label_font":"10px Sans","on_image_annotation_editor_placement":"NEAR_REGION"}},"core":{"buffer_size":18,"filepath":{},"default_filepath":""},"project":{"name":"via_project_20Jul2021_10h34m"}},"_via_img_metadata":{"im1.jpg165458":{"filename":"im1.jpg","size":165458,"regions":[{"shape_attributes":{"name":"rect","x":193,"y":590,"width":31,"height":69},"region_attributes":{"num":"1"}},{"shape_attributes":{"name":"rect","x":229,"y":597,"width":21,"height":60},"region_attributes":{"num":"2"}},{"shape_attributes":{"name":"rect","x":254,"y":595,"width":33,"height":55},"region_attributes":{"num":"3"}},{"shape_attributes":{"name":"rect","x":290,"y":596,"width":33,"height":53},"region_attributes":{"num":"4"}},{"shape_attributes":{"name":"rect","x":367,"y":590,"width":35,"height":57},"region_attributes":{"num":"0"}},{"shape_attributes":{"name":"rect","x":404,"y":592,"width":31,"height":51},"region_attributes":{"num":"0"}},{"shape_attributes":{"name":"rect","x":438,"y":591,"width":33,"height":54},"region_attributes":{"num":"0"}},{"shape_attributes":{"name":"rect","x":475,"y":589,"width":36,"height":56},"region_attributes":{"num":"0"}},{"shape_attributes":{"name":"rect","x":540,"y":590,"width":36,"height":49},"region_attributes":{"num":"0"}},{"shape_attributes":{"name":"rect","x":615,"y":588,"width":34,"height":49},"region_attributes":{"num":"0"}},{"shape_attributes":{"name":"rect","x":578,"y":589,"width":32,"height":49},"region_attributes":{}},{"shape_attributes":{"name":"rect","x":578,"y":589,"width":32,"height":53},"region_attributes":{}},{"shape_attributes":{"name":"rect","x":650,"y":587,"width":37,"height":51},"region_attributes":{"num":"0"}},{"shape_attributes":{"name":"rect","x":719,"y":587,"width":33,"height":50},"region_attributes":{"num":"0"}},{"shape_attributes":{"name":"rect","x":755,"y":586,"width":35,"height":53},"region_attributes":{"num":"0"}},{"shape_attributes":{"name":"rect","x":794,"y":585,"width":31,"height":49},"region_attributes":{"num":"0"}},{"shape_attributes":{"name":"rect","x":826,"y":583,"width":34,"height":50},"region_attributes":{"num":"0"}}],"file_attributes":{}}}}
  • Could you post a sample of what's in MyFile.txt and also show the code where the data variable is defined? –  Jul 22 '21 at 07:18
  • @AndyKnight sorry but deleted that line, it was irrelevant. Have added the sample content of data (which resides in a json) – Monisha yadav Jul 22 '21 at 17:30
  • The problem is with `list.append(dic)`, which appends *the same object named* `dic` each time (not a copy). A list of dicts has the same problem as a list of lists (as in the linked duplicate). You can avoid this by recreating `dic={"transcription": None, "points":None }` at the top of the loop. – Karl Knechtel Jul 22 '21 at 17:38
  • As an aside, the `ctr` value in your code is not used and the related code can be removed entirely. – Karl Knechtel Jul 22 '21 at 17:39

0 Answers0