0

I would like to perform a simple_query_string search in Elasticsearch while having a sub-word matching. For example if a would have a filename: "C:\Users\Sven Onderbeke\Documents\Arduino" Than I would want this filename listed if my searchterm is for example "ocumen".

This thread suggested to use ngram to match with parts of the word. I tried to implement it as follows (in Python) but I get zero results while I expect one:

test_mapping = {
    "properties": {
        "filename": {
            "type": "text",
            "analyzer": "my_index_analyzer"
        },
    }
}


def create_index(index_name, mapping):
    created = False
    # index settings
    settings = {
        "settings": {
            "number_of_shards": 1,
            "number_of_replicas": 0,
        },


    "analysis": {
      "index_analyzer": {
        "my_index_analyzer": {
          "type": "custom",
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "mynGram"
          ]
        }
      },
      "search_analyzer": {
        "my_search_analyzer": {
          "type": "custom",
          "tokenizer": "standard",
          "filter": [
            "standard",
            "lowercase",
            "mynGram"
          ]
        }
      },
      "filter": {
        "mynGram": {
          "type": "nGram",
          "min_gram": 2,
          "max_gram": 50
        }
      }
    },

        "mappings": mapping
    }
    try:
        if not es.indices.exists(index_name):
            # Ignore 400 means to ignore "Index Already Exist" error.
            es.indices.create(index=index_name, ignore=400, body=settings)
            print(f'Created Index: {index_name}')
            created = True
    except Exception as ex:
        print(str(ex))
    finally:
        return created

create_index("test", test_mapping)

doc = {
    'filename': r"C:\Users\Sven Onderbeke\Documents\Arduino",

}
es.index(index="test", document=doc)

needle = "ocumen"

q = {
    "simple_query_string": {
        "query": needle,
        "default_operator": "and"
    }
}

res = es.search(index="test", query=q)
print(res)
for hit in res['hits']['hits']:
    print(hit) 
Niya
  • 61
  • 6

1 Answers1

0

The reason your solution isn't working is because you haven't provided analyzer on the property named as field while defining mapping. Update mapping as below and then reindex all documents.

test_mapping = {
    "properties": {
        "filename": {
            "type": "text",
            "analyzer": "my_index_analyzer"
        },
    }
}
Nishant
  • 7,504
  • 1
  • 21
  • 34
  • I changed my code and question accordingly, but still no results. A needle of "*" does return the line (as a test). needle = "*ocumen *" does get 0 results which is also strange – Niya Oct 31 '21 at 13:04