0
def buildTree(data):

    f = 300  # Length of item vector that will be indexed
    t = AnnoyIndex(f, 'angular')
    trees = 10

    v = data.columns.get_loc("Vektoren")
    for i in range(len(data.index)):
        t.add_item(i, data.iloc[i,v])

    t.build(trees)  # 10 trees
    t.save('test.ann')

    # ...

    u = AnnoyIndex(f, 'angular')
    u.load('test.ann')  # super fast, will just mmap the file
    data['Nachbarn'] = ''
    data['Index'] = ''
    data['Distance'] = ''
    sag = data.columns.get_loc("SAG-Nummer")


    for i in range(len(data.index)):

        sagList = []
        annoyVektor = []

        for index in u.get_nns_by_item(i, trees):
            sagList.append(data.iloc[index, sag])

        indexList = u.get_nns_by_item(i, trees,include_distances=True)

        data.at[i, 'Index'] = indexList[0]
        data.at[i,'Nachbarn'] = sagList
        data.at[i,'Distance'] = indexList[1]

    return data

The returned data includes a list of neigherst neigbours and their distance to the element of the dataset. enter image description here

I dont't understand how to plot this data into a 2d scatter-plot.

Roland
  • 111
  • 7

0 Answers0