Have a look at this, this is a question I asked regarding a similar data set. Here is a look at the index settings/mapping I have used to produce some decent results. Development has ceased on this for the interim however this is what I've produced so far. You can then develop your queries -
{
"settings": {
"number_of_shards": 5,
"number_of_replicas": 0,
"analysis": {
"filter": {
"synonym": {
"type": "synonym",
"synonyms_path": "synonyms/synonyms.txt"
},
"my_metaphone": {
"type": "phonetic",
"encoder": "metaphone",
"replace": false
}
},
"analyzer": {
"synonym": {
"tokenizer": "whitespace",
"filter": [
"lowercase",
"synonym"
]
},
"metaphone": {
"tokenizer": "standard",
"filter": [
"my_metaphone"
]
},
"porter": {
"tokenizer": "standard",
"filter": [
"lowercase",
"porter_stem"
]
}
}
}
},
"mappings": {
"mes": {
"_all": {
"enabled": false
},
"properties": {
"pty_forename": {
"type": "multi_field",
"store": "yes",
"fields": {
"pty_forename": {
"type": "string",
"analyzer": "simple"
},
"metaphone": {
"type": "string",
"analyzer": "metaphone"
},
"porter": {
"type": "string",
"analyzer": "porter"
},
"synonym": {
"type": "string",
"analyzer": "synonym"
}
}
},
"pty_full_name": {
"type": "string",
"index": "not_analyzed",
"store": "yes"
},
"pty_surname": {
"type": "multi_field",
"store": "yes",
"fields": {
"pty_surname": {
"type": "string",
"analyzer": "simple"
},
"metaphone": {
"type": "string",
"analyzer": "metaphone"
},
"porter": {
"type": "string",
"analyzer": "porter"
},
"synonym": {
"type": "string",
"analyzer": "synonym"
}
}
}
}
}
}
}'
Note I have used the phonetic plugin and also I have a comprehensive list of synonyms which is critical for returning results for richard
when dick
is entered.