5

I'm trying to use a shingle filter with synonym filter (see code below). This gives me the output:

enforced implemented

implemented for

for examination

examination testing

The words enforced and implemented are occurring together same as testing and examination. Is it possible to get the following output?

enforced for

implemented for

for examination

for testing

JSON Definition

String json = jsonBuilder()
        .startObject()
                .field("number_of_shards", 1)
                .startObject("analysis")
                    .startObject("filter")
                        .startObject("my_shingle_filter")
                            .field("type","shingle")
                            .field("min_shingle_size",2)
                            .field("max_shingle_size",2)
                            .field("output_unigrams",false)
                        .endObject()
                        .startObject("my_syn_filter")
                            .field("type", "synonym")
                            .field("format","wordnet")
                            .field("synonyms_path","prolog/wn_s.pl")
                        .endObject()
                    .endObject()
                    .startObject("analyzer")
                        .startObject("my_shingle_analyzer")
                            .field("type", "custom")
                            .field("tokenizer","standard")
                            .field("filter",new String[]{"lowercase","my_syn_filter","my_shingle_filter"})
                        .endObject()
                    .endObject()
                .endObject()
        .endObject().string();

client.admin().indices().prepareCreate("testshingle").setSettings(ImmutableSettings.settingsBuilder()
        .loadFromSource(json))
            .execute().actionGet();

AnalyzeResponse  resp= client.admin().indices().prepareAnalyze("testshingle", "implemented for testing").setAnalyzer("my_shingle_analyzer").execute().get();
for(AnalyzeToken token:resp.getTokens()){
    System.out.println(token.getTerm());
}
Saeed Zhiany
  • 2,051
  • 9
  • 30
  • 41
ralph
  • 153
  • 2
  • 9

0 Answers0