2

How we convert XML annotation folder into text or YOLOv3 Format for detection??? I used this code for conversion but it only take one xml image and convert into .txt ..but i want to convert my full folder at once time. You have Any easy solution to convert xml files into text files. i Have 15000+ images.

from xml.dom import minidom
import os
import glob

lut={}
lut["14111"] =0
lut["14131"] =1
lut["14141"] =2

def convert_coordinates(size, box):
  dw = 1.0/size[0]
  dh = 1.0/size[1]
  x = (box[0]+box[1])/2.0
  y = (box[2]+box[3])/2.0
  w = box[1]-box[0]
  h = box[3]-box[2]
  x = x*dw
  w = w*dw
  y = y*dh
  h = h*dh
  return (x,y,w,h)


def convert_xml2yolo( lut ):

  for fname in glob.glob("/content/gdrive/MyDrive/Dataset /Annotation/14111_00000002.xml"):
      
      xmldoc = minidom.parse(fname)
      
      fname_out = (fname[:-4]+'.txt')

      with open(fname_out, "w") as f:

          itemlist = xmldoc.getElementsByTagName('object')
          size = xmldoc.getElementsByTagName('size')[0]
          width = int((size.getElementsByTagName('width')[0]).firstChild.data)
          height = int((size.getElementsByTagName('height')[0]).firstChild.data)

          for item in itemlist:
              # get class label
              classid =  (item.getElementsByTagName('name')[0]).firstChild.data
              if classid in lut:
                  label_str = str(lut[classid])
              else:
                  label_str = "-1"
                  print ("warning: label '%s' not in look-up table" % classid)

              # get bbox coordinates
              xmin = ((item.getElementsByTagName('bndbox')[0]).getElementsByTagName('xmin')[0]).firstChild.data
              ymin = ((item.getElementsByTagName('bndbox')[0]).getElementsByTagName('ymin')[0]).firstChild.data
              xmax = ((item.getElementsByTagName('bndbox')[0]).getElementsByTagName('xmax')[0]).firstChild.data
              ymax = ((item.getElementsByTagName('bndbox')[0]).getElementsByTagName('ymax')[0]).firstChild.data
              b = (float(xmin), float(xmax), float(ymin), float(ymax))
              bb = convert_coordinates((width,height), b)
              #print(bb)

              f.write(label_str + " " + " ".join([("%.6f" % a) for a in bb]) + '\n')

      print ("wrote %s" % fname_out)



def main():
  convert_xml2yolo( lut )


if __name__ == '__main__':
  main()
Dua khan
  • 21
  • 2

2 Answers2

0

Follow this github repository.

  • 1
    Your answer could be improved with additional supporting information. Please [edit] to add further details, such as citations or documentation, so that others can confirm that your answer is correct. You can find more information on how to write good answers [in the help center](/help/how-to-answer). – Community Apr 28 '22 at 05:35
0

You just need to edit this line:

for fname in glob.glob("/content/gdrive/MyDrive/Dataset/Annotation/*.xml"):

This means you will read all the .xml files in the Annotation folder and convert them to .txt files.

0sVoid
  • 2,547
  • 1
  • 10
  • 25