0

Hello I am working on Lucene to index my database records but I am unable to solve this error.

Error:

java.lang.IllegalArgumentException: Document contains at least one immense term in field="contentSum_DES" (whose UTF8 encoding is longer than the max length 32766), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '[71, 101, -61, -92, 110, 100, 101, 114, 116, 32, 97, 109, 58, 32, 49, 57, 46, 49, 49, 46, 50, 48, 49, 53, 32, 49, 52, 58, 49, 55]...', original message: bytes can be at most 32766 in length; got 57172 1074 total milliseconds CODE:

    package lucene;

    import java.io.File;
    import java.nio.file.FileSystems;
    import java.nio.file.Path;
    import java.sql.Connection;
    import java.sql.DriverManager;
    import java.sql.ResultSet;
    import java.sql.Statement;

    import org.apache.lucene.analysis.core.SimpleAnalyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.DateTools;
    import org.apache.lucene.document.DateTools.Resolution;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.document.Field;
    import org.apache.lucene.document.StringField;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.index.IndexWriterConfig;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.util.Version;
    public class Lucenetest {
//database connection
    public static final String PATH = "C:/dbindex/index.txt";
    private static final String JDBC_DRIVER = "com.microsoft.sqlserver.jdbc.SQLServerDriver";
    private static final String CONNECTION_URL = "jjdbc:sqlserver://WMDENTW1\\SQLEXPRESS:1433;" + 
                        "database=FullTextDB;" + 
                        "user=root;" + 
                        "password=root123";
    private static final String QUERY = "select FTID, ID, CLASSID, TEXT, PUBNOTICECONTENT, DOCUMENTCONTENT, contentSum_DE from METADATA_FULLTEXT";
    public static void main(String[] args) throws Exception {
    Lucenetest indexer = new Lucenetest();

//error here

    ***Directory indexDir = FSDirectory.open(new File(PATH));***

    try{  
//index writer
       Class.forName(JDBC_DRIVER).newInstance();  

       Connection conn = DriverManager.getConnection(CONNECTION_URL); 

       StandardAnalyzer analyzer = new StandardAnalyzer();  

       IndexWriterConfig Config = new IndexWriterConfig(analyzer);

       IndexWriter indexWriter = new IndexWriter(indexDir, Config);  

       System.out.println("Indexing to directory '" + indexDir + "'...");  

       int indexedDocumentCount = indexer.indexDocs1(indexWriter, conn);  

       indexWriter.close();  

       System.out.println(indexedDocumentCount + " records have been indexed successfully");

    } catch (Exception e) {  
       e.printStackTrace();  
    } 
    }

    @SuppressWarnings("deprecation")
    int indexDocs1(IndexWriter writer, Connection conn) throws Exception {  
      String sql = QUERY;  
      Statement stmt = conn.createStatement();  
      ResultSet rs = stmt.executeQuery(sql);  
      int i=0;
      while (rs.next()) {  

//checking for null and allowing it to add

        String FTID = resultSet.getString("FTID"); //!= null ? resultSet.getString("FTID"): " ";
        String ID = resultSet.getString("ID")!= null ? resultSet.getString("ID"): " ";

        String CLASSID = resultSet.getString("CLASSID")!= null ? resultSet.getString("CLASSID"): " ";

        String TEXT = resultSet.getString("TEXT")!= null ? resultSet.getString("TEXT"): " ";

        String PUBNOTICECONTENT = resultSet.getString("PUBNOTICECONTENT")!= null ? resultSet.getString("PUBNOTICECONTENT"): " ";
        String DOCUMENTCONTENT = resultSet.getString("DOCUMENTCONTENT")!= null ? resultSet.getString("DOCUMENTCONTENT"): " ";

                String contentSum_DE = resultSet.getString("contentSum_DE")!= null ? resultSet.getString("contentSum_DE"): " ";

         Document doc = new Document();  
          doc.add(new StringField("FTIDS", FTID, Field.Store.YES));
        doc.add(new StringField("IDS", ID, Field.Store.YES));
        doc.add(new StringField("CLASSIDS", CLASSID, Field.Store.YES));
        doc.add(new StringField("TEXTS", TEXT, Field.Store.YES));
        doc.add(new StringField("PUBNOTICECONTENTS", PUBNOTICECONTENT, Field.Store.YES));
       doc.add(new Field("DOCUMENTCONTENTS", DOCUMENTCONTENT, Field.Store.YES, Field.Index.));
        doc.add(new Field("contentSum_DES", contentSum_DE, Field.Store.YES, Field.Index.NO));

           if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
        // New index, so we just add the document (no old document can be there):
        System.out.println("adding " + FTID + " ---- " + ID + "---- " + CLASSID);
          writer.addDocument(doc);

        }

        }

} catch (Exception e) {
  System.out.println(e);
}
gupta k
  • 23
  • 1
  • 9
  • 1
    Possible duplicate of [UTF8 encoding is longer than the max length 32766](http://stackoverflow.com/questions/24019868/utf8-encoding-is-longer-than-the-max-length-32766) – Matt Gibson Mar 29 '16 at 10:58
  • the concept was same but I am unable to do it in LUCENE CODE I changed it to doc.add(new Field("contentSum_DES", contentSum_DE, Field.Store.YES, Field.Index.NO)); still not working – gupta k Mar 29 '16 at 11:20
  • well its working now!! thanks! – gupta k Mar 29 '16 at 11:23

0 Answers0