I would like to perform a simple_query_string search in Elasticsearch while having a sub-word matching. For example if a would have a filename: "C:\Users\Sven Onderbeke\Documents\Arduino" Than I would want this filename listed if my searchterm is for example "ocumen".
This thread suggested to use ngram to match with parts of the word. I tried to implement it as follows (in Python) but I get zero results while I expect one:
test_mapping = {
"properties": {
"filename": {
"type": "text",
"analyzer": "my_index_analyzer"
},
}
}
def create_index(index_name, mapping):
created = False
# index settings
settings = {
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
},
"analysis": {
"index_analyzer": {
"my_index_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"mynGram"
]
}
},
"search_analyzer": {
"my_search_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"standard",
"lowercase",
"mynGram"
]
}
},
"filter": {
"mynGram": {
"type": "nGram",
"min_gram": 2,
"max_gram": 50
}
}
},
"mappings": mapping
}
try:
if not es.indices.exists(index_name):
# Ignore 400 means to ignore "Index Already Exist" error.
es.indices.create(index=index_name, ignore=400, body=settings)
print(f'Created Index: {index_name}')
created = True
except Exception as ex:
print(str(ex))
finally:
return created
create_index("test", test_mapping)
doc = {
'filename': r"C:\Users\Sven Onderbeke\Documents\Arduino",
}
es.index(index="test", document=doc)
needle = "ocumen"
q = {
"simple_query_string": {
"query": needle,
"default_operator": "and"
}
}
res = es.search(index="test", query=q)
print(res)
for hit in res['hits']['hits']:
print(hit)