1

I have dataset consist of inkml files of handwritten texts. I want to convert it to a usable image format to train a CNN. python script would be helpful.

I found a method given below is the source code

def get_traces_data(inkml_file_abs_path):

traces_data = []

tree = ET.parse(inkml_file_abs_path)
root = tree.getroot()
doc_namespace = "{http://www.w3.org/2003/InkML}"

'Stores traces_all with their corresponding id'
traces_all = [{'id': trace_tag.get('id'),
                'coords': [[round(float(axis_coord)) if float(axis_coord).is_integer() else round(float(axis_coord)) \
                                for axis_coord in coord[1:].split(' ')] if coord.startswith(' ') \
                            else [round(float(axis_coord)) if float(axis_coord).is_integer() else round(float(axis_coord)) \
                                for axis_coord in coord.split(' ')] \
                        for coord in (trace_tag.text).replace('\n', '').split(',')]} \
                        for trace_tag in root.findall(doc_namespace + 'trace')]

# print("before sort ", traces_all)
'Sort traces_all list by id to make searching for references faster'
traces_all.sort(key=lambda trace_dict: int(trace_dict['id']))
# print("after sort ", traces_all)
'Always 1st traceGroup is a redundant wrapper'
traceGroupWrapper = root.find(doc_namespace + 'traceGroup')

if traceGroupWrapper is not None:
    for traceGroup in traceGroupWrapper.findall(doc_namespace + 'traceGroup'):

        label = traceGroup.find(doc_namespace + 'annotation').text

        'traces of the current traceGroup'
        traces_curr = []
        for traceView in traceGroup.findall(doc_namespace + 'traceView'):

            'Id reference to specific trace tag corresponding to currently considered label'
            traceDataRef = int(traceView.get('traceDataRef'))

            'Each trace is represented by a list of coordinates to connect'
            single_trace = traces_all[traceDataRef]['coords']
            traces_curr.append(single_trace)


        traces_data.append({'label': label, 'trace_group': traces_curr})

else:
    'Consider Validation data that has no labels'
    [traces_data.append({'trace_group': [trace['coords']]}) for trace in traces_all]

return traces_data

1 Answers1

0

You may consider using xml.etree.ElementTree in Python to parse your inkml files and use OpenCV's cv2.line method to connect the points to draw the stroke.

סטנלי גרונן
  • 2,917
  • 23
  • 46
  • 68
kaleco
  • 49
  • 3