UPDATE
Now i'm able parse document clicking the link 'Next'. But only the second time the file is parsed. I have two actions in which first i'll load the file then by clicking next button it should process parse the content from the word document. I'm using apache poi to extract contents.
Struts.xml
<package name="parsing" extends="struts-default">
<action name="moveNext" class="tryupload.NextFileParser" method="execute">
<result name="success">/parser.jsp</result>
</action>
</package>
ParsingDOCAction
package tryupload;
import java.io.File;
import java.io.FileInputStream;
import java.sql.CallableStatement;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.util.ArrayList;
import javax.servlet.ServletContext;
import org.apache.commons.io.FilenameUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.struts2.util.ServletContextAware;
import com.opensymphony.xwork2.ActionSupport;
import sun.misc.ExtensionInstallationProvider;
public class NextFileParser extends ActionSupport implements ServletContextAware{
/**
*
*/
private static final long serialVersionUID = 1678420053045158846L;
POIFSFileSystem fs = null;
ParseDocBean pdb=new ParseDocBean();
//private File[] fileList;
static int i=0;
private ServletContext servletContext;
private ArrayList<ParseDocBean> list=new ArrayList<ParseDocBean>();
public ServletContext getServletContext() {
return servletContext;
}
@Override
public void setServletContext(ServletContext servletContext) {
this.servletContext = servletContext;
}
public String execute(){
String fileServerpath=getServletContext().getRealPath("/doc/");
System.out.println(fileServerpath);
File f=new File(fileServerpath);
File[] docs=f.listFiles();
//System.out.println(docs.length);
if(parseNext(docs))
System.out.println("parsed");
System.out.println("inside execute method");
//NextFileParser nfp=new NextFileParser();
//nfp.setFileList(docs);
//nfp.setFileList(docs);
//NextFileParser.parseNext(docs);
i++;
return SUCCESS;
}
public boolean parseNext(File[] fileList){
System.out.println(fileList.length);
System.out.println(i);
String Content=null;
try{
if(i==fileList.length){
//msg="Finished";
pdb.setMsg("Finished");
list.add(pdb);
return false;
}
else{
String fname=fileList[i].getName();
System.out.println(fname);
String basename=FilenameUtils.getBaseName(fname);
//String extension=FilenameUtils.getExtension(fname);
if(checkAlreadyExists(basename))
{
//addFieldError("content", "Already Parsed");
//str="No new Files";
pdb.setStr("No new Files");
list.add(pdb);
return false;
}
else{
try
{
fs = new POIFSFileSystem(new FileInputStream(fileList[i]));
HWPFDocument doc = new HWPFDocument(fs);
WordExtractor we = new WordExtractor(doc);
// content= we.getText();//if the extension is .doc
ParseDocBean pdb=new ParseDocBean();
System.out.println(we.getText());
pdb.setContent(we.getText());
list.add(pdb);
/*pdb.setFinishMsg("Finished");*/
we.close();
System.out.println(i);
}
catch (Exception e)
{
e.getStackTrace();
e.getMessage();
System.out.println("document file cant be parsed");
}
}
}
}
catch (Exception e) {
e.getMessage();
}
return true;
/* if(!Content.equals("")){
System.out.println(Content);}
return Content;
*/
}
public static boolean checkAlreadyExists(String basename){
boolean status=false;
try {
Class.forName("com.mysql.jdbc.Driver");
Connection con=DriverManager.getConnection("jdbc:mysql://localhost:3306/resume","root","root");
if(con!=null){
System.out.println("Connection ok");
}
String query="{call checkFileAlreadyExists(?,?)}";
CallableStatement st;
st=con.prepareCall(query);
st.setString(1, basename);
st.registerOutParameter(2, java.sql.Types.INTEGER);
st.execute();
int id=st.getInt(2);
if(id!=0){
status=true;
}
else
{
status=false;
}
con.close();
st.close();
}
catch (ClassNotFoundException | SQLException e) {
e.printStackTrace();
}
if(status){
System.out.println("All ready parsed");
return true;
}
else
{
return false;
}
}
}
I'm inserting some data from the extracted contents into the database and i'm checking weather the file is already parsed in the above class using the method checkAlreadyExists()
parser.jsp
<body>
<fieldset>
<s:form action="parsedoc" method="post" theme="xhtml">
<s:textfield name="cname" label="Candidate Name*"></s:textfield>
<s:textfield name="cemail" label="Email*"></s:textfield>
<s:textfield name="cNo" label="Contact No*"></s:textfield>
<s:textfield name="DOB" label="DOB*"></s:textfield>
<s:textfield name="gender" label="Gender*"></s:textfield>
<s:textfield name="mStatus" label="Marital Status*"></s:textfield>
<s:textfield name="qualification" label="Qualification*"></s:textfield>
<s:textfield name="marks" label="Score(if any)"></s:textfield>
<s:div style="display:block">
<s:reset ></s:reset><s:submit ></s:submit>
</s:div>
</s:form>
</fieldset>
<%@ include file="extracted.jsp" %>
</body>
extracted.jsp
<body>
<s:div style="display:block;">
<s:if test="%{list!=null && list.size>0}">
<s:iterator value="listName">
<%-- <s:textarea name="content"><s:property value="content"/> </s:textarea> --%>
<s:div><p><s:property value="content"/></p></s:div>
<p><s:property value="str"/></p>
<p><s:property value="msg"/></p>
</s:iterator>
</s:if>
<s:else>No files found</s:else>
<s:a href="moveNext">Next<%-- <s:submit type="button" name="Next" value="Next"></s:submit> --%></s:a>
<%-- <s:text name="finishMsg"><s:property value="finishMsg"/></s:text> --%>
</s:div>
</body>
the output i'm gettin when i click the next button is as below.I want to show the the extracted contents in the parser.jsp page. But i'm not getting it.
Nov 05, 2015 10:15:08 PM org.apache.tomcat.util.digester.SetPropertiesRule begin
WARNING: [SetPropertiesRule]{Server/Service/Engine/Host/Context} Setting property 'source' to 'org.eclipse.jst.jee.server:tryupload' did not find a matching property.
Nov 05, 2015 10:15:08 PM org.apache.catalina.startup.VersionLoggerListener log
INFO: Server version: Apache Tomcat/8.0.24
Nov 05, 2015 10:15:08 PM org.apache.catalina.startup.VersionLoggerListener log
INFO: Server built: Jul 1 2015 20:19:55 UTC
Nov 05, 2015 10:15:08 PM org.apache.catalina.startup.VersionLoggerListener log
INFO: Server number: 8.0.24.0
Nov 05, 2015 10:15:08 PM org.apache.catalina.startup.VersionLoggerListener log
INFO: OS Name: Windows 7
Nov 05, 2015 10:15:08 PM org.apache.catalina.startup.VersionLoggerListener log
INFO: OS Version: 6.1
Nov 05, 2015 10:15:08 PM org.apache.catalina.startup.VersionLoggerListener log
INFO: Architecture: amd64
Nov 05, 2015 10:15:08 PM org.apache.catalina.startup.VersionLoggerListener log
INFO: Java Home: C:\Program Files\Java\jdk1.7.0_25\jre
Nov 05, 2015 10:15:08 PM org.apache.catalina.startup.VersionLoggerListener log
INFO: JVM Version: 1.7.0_25-b17
Nov 05, 2015 10:15:08 PM org.apache.catalina.startup.VersionLoggerListener log
INFO: JVM Vendor: Oracle Corporation
Nov 05, 2015 10:15:08 PM org.apache.catalina.startup.VersionLoggerListener log
INFO: CATALINA_BASE: E:\work space\.metadata\.plugins\org.eclipse.wst.server.core\tmp0
Nov 05, 2015 10:15:08 PM org.apache.catalina.startup.VersionLoggerListener log
INFO: CATALINA_HOME: C:\java\apache-tomcat-8.0.24
Nov 05, 2015 10:15:08 PM org.apache.catalina.startup.VersionLoggerListener log
INFO: Command line argument: -Dcatalina.base=E:\work space\.metadata\.plugins\org.eclipse.wst.server.core\tmp0
Nov 05, 2015 10:15:08 PM org.apache.catalina.startup.VersionLoggerListener log
INFO: Command line argument: -Dcatalina.home=C:\java\apache-tomcat-8.0.24
Nov 05, 2015 10:15:08 PM org.apache.catalina.startup.VersionLoggerListener log
INFO: Command line argument: -Dwtp.deploy=E:\work space\.metadata\.plugins\org.eclipse.wst.server.core\tmp0\wtpwebapps
Nov 05, 2015 10:15:08 PM org.apache.catalina.startup.VersionLoggerListener log
INFO: Command line argument: -Djava.endorsed.dirs=C:\java\apache-tomcat-8.0.24\endorsed
Nov 05, 2015 10:15:08 PM org.apache.catalina.startup.VersionLoggerListener log
INFO: Command line argument: -Dfile.encoding=Cp1252
Nov 05, 2015 10:15:08 PM org.apache.catalina.core.AprLifecycleListener lifecycleEvent
INFO: The APR based Apache Tomcat Native library which allows optimal performance in production environments was not found on the java.library.path: C:\Program Files\Java\jdk1.7.0_25\bin;C:\Windows\Sun\Java\bin;C:\Windows\system32;C:\Windows;C:/Program Files/Java/jdk1.7.0_25/bin/../jre/bin/server;C:/Program Files/Java/jdk1.7.0_25/bin/../jre/bin;C:/Program Files/Java/jdk1.7.0_25/bin/../jre/lib/amd64;C:\Program Files\Java\jdk1.7.0_25\bin;c:\Program Files (x86)\Microsoft SQL Server\90\Tools\binn\;c:\Program Files (x86)\Microsoft SQL Server\100\Tools\Binn\;c:\Program Files\Microsoft SQL Server\100\Tools\Binn\;c:\Program Files\Microsoft SQL Server\100\DTS\Binn\;C:\Program Files\MySQL\MySQL Server 5.1\bin;F:\soft\soft\soft\eclipse-jee-mars-R-win32-x86_64\eclipse;;.
Nov 05, 2015 10:15:09 PM org.apache.coyote.AbstractProtocol init
INFO: Initializing ProtocolHandler ["http-nio-8081"]
Nov 05, 2015 10:15:09 PM org.apache.tomcat.util.net.NioSelectorPool getSharedSelector
INFO: Using a shared selector for servlet write/read
Nov 05, 2015 10:15:09 PM org.apache.coyote.AbstractProtocol init
INFO: Initializing ProtocolHandler ["ajp-nio-8009"]
Nov 05, 2015 10:15:09 PM org.apache.tomcat.util.net.NioSelectorPool getSharedSelector
INFO: Using a shared selector for servlet write/read
Nov 05, 2015 10:15:09 PM org.apache.catalina.startup.Catalina load
INFO: Initialization processed in 753 ms
Nov 05, 2015 10:15:09 PM org.apache.catalina.core.StandardService startInternal
INFO: Starting service Catalina
Nov 05, 2015 10:15:09 PM org.apache.catalina.core.StandardEngine startInternal
INFO: Starting Servlet Engine: Apache Tomcat/8.0.24
Nov 05, 2015 10:15:11 PM org.apache.jasper.servlet.TldScanner scanJars
INFO: At least one JAR was scanned for TLDs yet contained no TLDs. Enable debug logging for this logger for a complete list of JARs that were scanned but no TLDs were found in them. Skipping unneeded JARs during scanning can improve startup time and JSP compilation time.
Nov 05, 2015 10:15:11 PM com.opensymphony.xwork2.config.providers.XmlConfigurationProvider info
INFO: Parsing configuration file [struts-default.xml]
Nov 05, 2015 10:15:11 PM com.opensymphony.xwork2.config.providers.XmlConfigurationProvider info
INFO: Parsing configuration file [struts-plugin.xml]
Nov 05, 2015 10:15:11 PM com.opensymphony.xwork2.config.providers.XmlConfigurationProvider info
INFO: Parsing configuration file [struts.xml]
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.ObjectFactory)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.factory.ActionFactory)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.factory.ResultFactory)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.factory.ConverterFactory)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.factory.InterceptorFactory)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.factory.ValidatorFactory)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.factory.UnknownHandlerFactory)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.FileManagerFactory)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.conversion.impl.XWorkConverter)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.conversion.impl.CollectionConverter)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.conversion.impl.ArrayConverter)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.conversion.impl.DateConverter)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.conversion.impl.NumberConverter)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.conversion.impl.StringConverter)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.conversion.ConversionPropertiesProcessor)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.conversion.ConversionFileProcessor)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.conversion.ConversionAnnotationProcessor)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.conversion.TypeConverterCreator)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.conversion.TypeConverterHolder)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.TextProvider)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.LocaleProvider)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.ActionProxyFactory)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.conversion.ObjectTypeDeterminer)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (org.apache.struts2.dispatcher.mapper.ActionMapper)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (jakarta) for (org.apache.struts2.dispatcher.multipart.MultiPartRequest)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (org.apache.struts2.views.freemarker.FreemarkerManager)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (org.apache.struts2.components.UrlRenderer)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.validator.ActionValidatorManager)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.util.ValueStackFactory)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.util.reflection.ReflectionProvider)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.util.reflection.ReflectionContextFactory)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.util.PatternMatcher)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (org.apache.struts2.util.ContentTypeMatcher)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (org.apache.struts2.dispatcher.StaticContentLoader)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.UnknownHandlerManager)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (org.apache.struts2.views.util.UrlHelper)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.util.TextParser)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (org.apache.struts2.dispatcher.DispatcherErrorHandler)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.security.ExcludedPatternsChecker)
Nov 05, 2015 10:15:11 PM org.apache.struts2.config.AbstractBeanSelectionProvider info
INFO: Choosing bean (struts) for (com.opensymphony.xwork2.security.AcceptedPatternsChecker)
Nov 05, 2015 10:15:12 PM org.apache.coyote.AbstractProtocol start
INFO: Starting ProtocolHandler ["http-nio-8081"]
Nov 05, 2015 10:15:12 PM org.apache.coyote.AbstractProtocol start
INFO: Starting ProtocolHandler ["ajp-nio-8009"]
Nov 05, 2015 10:15:12 PM org.apache.catalina.startup.Catalina start
INFO: Server startup in 3033 ms
Nov 05, 2015 10:15:23 PM org.apache.struts2.components.ServletUrlRenderer warn
WARNING: No configuration found for the specified action: 'parsedoc' in namespace: ''. Form action defaulting to 'action' attribute's literal value.
Nov 05, 2015 10:15:23 PM org.apache.struts2.components.ServletUrlRenderer warn
WARNING: No configuration found for the specified action: 'parsedoc' in namespace: ''. Form action defaulting to 'action' attribute's literal value.
E:\work space\.metadata\.plugins\org.eclipse.wst.server.core\tmp0\wtpwebapps\tryupload\doc\
2
0
pdf extraction and searching1.doc
Connection ok
document file cant be parsed
parsed
inside execute method
Nov 05, 2015 10:15:26 PM org.apache.struts2.components.ServletUrlRenderer warn
WARNING: No configuration found for the specified action: 'parsedoc' in namespace: '/'. Form action defaulting to 'action' attribute's literal value.
Nov 05, 2015 10:15:26 PM org.apache.struts2.components.ServletUrlRenderer warn
WARNING: No configuration found for the specified action: 'parsedoc' in namespace: '/'. Form action defaulting to 'action' attribute's literal value.
E:\work space\.metadata\.plugins\org.eclipse.wst.server.core\tmp0\wtpwebapps\tryupload\doc\
2
1
pdf extraction and searching182.doc
Connection ok
package com.programmingfree.simplepdfsearch;
import org.apache.lucene.queryParser.ParseException;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;
import java.io.File;
import java.io.IOException;
public class SimplePDFSearch {
// location where the index will be stored.
private static final String INDEX_DIR = "src/main/resources/index";
private static final int DEFAULT_RESULT_SIZE = 100;
public static void main(String[] args) throws IOException, ParseException {
File pdfFile = new File("src/resources/SamplePDF.pdf");
IndexItem pdfIndexItem = index(pdfFile);
// creating an instance of the indexer class and indexing the items
Indexer indexer = new Indexer(INDEX_DIR);
indexer.index(pdfIndexItem);
indexer.close();
// creating an instance of the Searcher class to the query the index
Searcher searcher = new Searcher(INDEX_DIR);
int result = searcher.findByContent("Hello", DEFAULT_RESULT_SIZE);
print(result);
searcher.close();
}
//Extract text from PDF document
public static IndexItem index(File file) throws IOException {
PDDocument doc = PDDocument.load(file);
String content = new PDFTextStripper().getText(doc);
doc.close();
return new IndexItem((long)file.getName().hashCode(), file.getName(), content);
}
//Print the results
private static void print(int result) {
if(result==1)
System.out.println("The document contains the search keyword");
else
System.out.println("The document does not contain the search keyword");
}
}
package com.programmingfree.simplepdfsearch;
public class IndexItem {
private Long id;
private String title;
private String content;
public static final String ID = "id";
public static final String TITLE = "title";
public static final String CONTENT = "content";
public IndexItem(Long id, String title, String content) {
this.id = id;
this.title = title;
this.content = content;
}
public Long getId() {
return id;
}
public String getTitle() {
return title;
}
public String getContent() {
return content;
}
@Override
public String toString() {
return "IndexItem{" +
"id=" + id +
", title='" + title + '\'' +
", content='" + content + '\'' +
'}';
}
}
package com.programmingfree.simplepdfsearch;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.IOException;
public class Indexer {
private IndexWriter writer;
public Indexer(String indexDir) throws IOException {
// create the index
if(writer == null) {
writer = new IndexWriter(FSDirectory.open(
new File(indexDir)), new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)));
}
}
/**
* This method will add the items into index
*/
public void index(IndexItem indexItem) throws IOException {
// deleting the item, if already exists
writer.deleteDocuments(new Term(IndexItem.ID, indexItem.getId().toString()));
Document doc = new Document();
doc.add(new Field(IndexItem.ID, indexItem.getId().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field(IndexItem.TITLE, indexItem.getTitle(), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field(IndexItem.CONTENT, indexItem.getContent(), Field.Store.YES, Field.Index.ANALYZED));
// add the document to the index
writer.addDocument(doc);
}
/**
* Closing the index
*/
public void close() throws IOException {
writer.close();
}
}
package com.programmingfree.simplepdfsearch;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class Searcher {
private IndexSearcher searcher;
private QueryParser contentQueryParser;
public Searcher(String indexDir) throws IOException {
// open the index directory to search
searcher = new IndexSearcher(IndexReader.open(FSDirectory.open(new File(indexDir))));
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
// defining the query parser to search items by content field.
contentQueryParser = new QueryParser(Version.LUCENE_36, IndexItem.CONTENT, analyzer);
}
/**
* This method is used to find the indexed items by the content.
* @param queryString - the query string to search for
*/
public int findByContent(String queryString, int numOfResults) throws ParseException, IOException {
// create query from the incoming query string.
Query query = contentQueryParser.parse(queryString);
// execute the query and get the results
ScoreDoc[] queryResults = searcher.search(query, numOfResults).scoreDocs;
if(queryResults.length>0)
return 1;
else
return 0;
}
public void close() throws IOException {
searcher.close();
}
}
1
parsed
inside execute method
Nov 05, 2015 10:15:28 PM org.apache.struts2.components.ServletUrlRenderer warn
WARNING: No configuration found for the specified action: 'parsedoc' in namespace: '/'. Form action defaulting to 'action' attribute's literal value.
Nov 05, 2015 10:15:28 PM org.apache.struts2.components.ServletUrlRenderer warn
WARNING: No configuration found for the specified action: 'parsedoc' in namespace: '/'. Form action defaulting to 'action' attribute's literal value.