I have to implement a full-text based search in a pdf document using Elasticsearch
ingest plugin. I'm getting an empty hit array when I'm trying to search the word someword
in the pdf document.
//Code for creating pipeline
PUT _ingest/pipeline/attachment
{
"description" : "Extract attachment information",
"processors" : [
{
"attachment" : {
"field" : "data",
"indexed_chars" : -1
}
}
]
}
//Code for creating the index
PUT my_index/my_type/my_id?pipeline=attachment
{
"filename" : "C:\\Users\\myname\\Desktop\\bh1.pdf",
"title" : "Quick",
"data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0="
}
//Code for searching the word in pdf
GET /my_index/my_type/_search
{
"query": {
"match": {
"data" : {
"query" : "someword"
}
}
}