I have looked at this, this and this but I'm not sure why they don't work for me.
I would normally use an analyzer like below.
import lucene
from org.apache.lucene.analysis.core import WhitespaceAnalyzer
from org.apache.lucene.index import IndexWriterConfig, IndexWriter
from org.apache.lucene.store import SimpleFSDirectory
from java.nio.file import Paths
from org.apache.lucene.document import Document, Field, TextField
index_path = "./index"
lucene.initVM()
analyzer = WhitespaceAnalyzer()
config = IndexWriterConfig(analyzer)
store = SimpleFSDirectory(Paths.get(index_path))
writer = IndexWriter(store, config)
doc = Document()
doc.add(Field("title", "The quick brown fox.", TextField.TYPE_STORED))
writer.addDocument(doc)
writer.close()
store.close()
Instead of the WhitespaceAnalyzer()
I would like to use MyAnalyzer()
which should have LowerCaseFilter
and WhitespaceTokenizer
.
from org.apache.lucene.analysis.core import LowerCaseFilter, WhitespaceTokenizer
from org.apache.pylucene.analysis import PythonAnalyzer
class MyAnalyzer(PythonAnalyzer):
def __init__(self):
PythonAnalyzer.__init__(self)
def createComponents(self, fieldName):
# What do I write here?
Can you please help me write and use MyAnalyzer()
?