Lucas has the right idea, but there is a more specialized MultiPhraseQuery
that can be used to build up a query based on the data that is already in the index to get a prefix match as demonstrated in this unit test. The documentation of MultiPhraseQuery
reads:
MultiPhraseQuery
is a generalized version of PhraseQuery
, with an added method Add(Term[])
. To use this class, to search for the phrase "Microsoft app*" first use Add(Term)
on the term "Microsoft", then find all terms that have "app" as prefix using IndexReader.GetTerms(Term)
, and use MultiPhraseQuery.Add(Term[] terms)
to add them to the query.
As Lucas pointed out, a *something
WildCardQuery
is the way to do the suffix match, provided you understand the performance implications.
They can then be combined with a BooleanQuery
to get the result you want.
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
namespace LuceneSQLLikeSearch
{
class Program
{
static void Main(string[] args)
{
// Prepare...
var dir = new RAMDirectory();
var writer = new IndexWriter(dir,
new IndexWriterConfig(LuceneVersion.LUCENE_48,
new StandardAnalyzer(LuceneVersion.LUCENE_48)));
WriteIndex(writer);
// Search...
var reader = writer.GetReader(false);
// Get all terms that end with tulip
var wildCardQuery = new WildcardQuery(new Term("field", "*tulip"));
var multiPhraseQuery = new MultiPhraseQuery();
multiPhraseQuery.Add(new Term("field", "inn"));
// Get all terms that start with riyadhh
multiPhraseQuery.Add(GetPrefixTerms(reader, "field", "riyadhh"));
var query = new BooleanQuery();
query.Add(wildCardQuery, Occur.SHOULD);
query.Add(multiPhraseQuery, Occur.SHOULD);
var result = ExecuteSearch(writer, query);
foreach (var item in result)
{
Console.WriteLine("Match: {0} - Score: {1:0.0########}",
item.Value, item.Score);
}
Console.ReadKey();
}
}
}
WriteIndex
public static void WriteIndex(IndexWriter writer)
{
Document document;
document = new Document();
document.Add(new TextField("field", "Tulip INN Riyadhh", Field.Store.YES));
writer.AddDocument(document);
document = new Document();
document.Add(new TextField("field", "Tulip INN Riyadhh LUXURY", Field.Store.YES));
writer.AddDocument(document);
document = new Document();
document.Add(new TextField("field", "Suites of Tulip INN RIYAHdhh", Field.Store.YES));
writer.AddDocument(document);
document = new Document();
document.Add(new TextField("field", "Suites of Tulip INN RIYAHdhhll", Field.Store.YES));
writer.AddDocument(document);
document = new Document();
document.Add(new TextField("field", "myTulip INN Riyadhh LUXURY", Field.Store.YES));
writer.AddDocument(document);
document = new Document();
document.Add(new TextField("field", "some bogus data that should not match", Field.Store.YES));
writer.AddDocument(document);
writer.Commit();
}
GetPrefixTerms
Here we scan the index to find all of the terms that start with the passed-in prefix. The terms are then added to the MultiPhraseQuery
.
public static Term[] GetPrefixTerms(IndexReader reader, string field, string prefix)
{
var result = new List<Term>();
TermsEnum te = MultiFields.GetFields(reader).GetTerms(field).GetIterator(null);
te.SeekCeil(new BytesRef(prefix));
do
{
string s = te.Term.Utf8ToString();
if (s.StartsWith(prefix, StringComparison.Ordinal))
{
result.Add(new Term(field, s));
}
else
{
break;
}
} while (te.Next() != null);
return result.ToArray();
}
ExecuteSearch
public static IList<SearchResult> ExecuteSearch(IndexWriter writer, Query query)
{
var result = new List<SearchResult>();
var searcherManager = new SearcherManager(writer, true, null);
// Execute the search with a fresh indexSearcher
searcherManager.MaybeRefreshBlocking();
var searcher = searcherManager.Acquire();
try
{
var topDocs = searcher.Search(query, 10);
foreach (var scoreDoc in topDocs.ScoreDocs)
{
var doc = searcher.Doc(scoreDoc.Doc);
result.Add(new SearchResult
{
Value = doc.GetField("field")?.GetStringValue(),
// Results are automatically sorted by relevance
Score = scoreDoc.Score,
});
}
}
catch (Exception e)
{
Console.WriteLine(e.ToString());
}
finally
{
searcherManager.Release(searcher);
searcher = null; // Don't use searcher after this point!
}
return result;
}
SearchResult
public class SearchResult
{
public string Value { get; set; }
public float Score { get; set; }
}
If this seems cumbersome, note that QueryParser
can mimic a "SQL LIKE" query. As pointed out here, there is an option to AllowLeadingWildCard
on QueryParser
to build up the correct query sequence easily. It is unclear why you have a constraint that you can't use it, as it is definitely the simplest way to get the job done.