1

Iam using lingpipe tool for naive bayes algorithm.I trained it using my trained data and it successfullu tests my test data. But each time I runs the algorithm each time it trains. I don't want to train it each time instead I want to build a model to which I can apply the test data.

public class ClassifyNews {
private static File TRAINING_DIR= new File("train");
private static File TESTING_DIR=  new File("test");
private static String[] CATEGORIES
    =  { "c1",
        "c2",
        "c3"};
private static int NGRAM_SIZE = 6;
public static void main(String[] args)throws ClassNotFoundException, IOException
{
DynamicLMClassifier<NGramProcessLM> classifier
=DynamicLMClassifier.createNGramProcess(CATEGORIES,NGRAM_SIZE);
for(int i=0; i<CATEGORIES.length; ++i)
{
   File classDir = new File(TRAINING_DIR,CATEGORIES[i]);
   if (!classDir.isDirectory())
   {
     String msg = "Could not find training directory="+ classDir
     + "\nTraining directory not found";
     System.out.println(msg); 
    throw new IllegalArgumentException(msg);
   }
 String[] trainingFiles = classDir.list();
for (int j = 0; j < trainingFiles.length; ++j)
{
 File file = new File(classDir,trainingFiles[j]);
String text = Files.readFromFile(file,"ISO-8859-1");
System.out.println("Training on " + CATEGORIES[i] + "/" + trainingFiles[j]);
Classification classification= new Classification(CATEGORIES[i]);
Classified<CharSequence> classified= new Classified<CharSequence>(text,classification);
classifier.handle(classified);}
}
System.out.println("Compiling");
JointClassifier<CharSequence> compiledClassifier
= (JointClassifier<CharSequence>)
AbstractExternalizable.compile(classifier);
boolean storeCategories = true;
JointClassifierEvaluator<CharSequence> evaluator =
new JointClassifierEvaluator
<CharSequence> (compiledClassifier,CATEGORIES,storeCategories);
for(int i = 0; i < CATEGORIES.length; ++i) 
{
File classDir = new File(TESTING_DIR,CATEGORIES[i]);
String[] testingFiles = classDir.list();
for (int j=0; j<testingFiles.length;  ++j) 
{
String text= Files.readFromFile(new File(classDir,testingFiles[j]),"ISO-8859-1");
System.out.print("\nTesting on " + CATEGORIES[i] + "/" + testingFiles[j] + " ");
Classification classification= new Classification(CATEGORIES[i]);
Classified<CharSequence> classified= new Classified<CharSequence>(text,classification);
    evaluator.handle(classified);
JointClassification jc =compiledClassifier.classify(text);
String bestCategory = jc.bestCategory();
String details = jc.toString();
System.out.println("\tGot best category of: " + bestCategory);
System.out.println(jc.toString());
}}

} }

  • Just split up the code into a training and a testing/classification part. – qqilihq Feb 13 '14 at 08:54
  • it is not possible since these are all some class files and we can't see what is internally happening – user3304555 Feb 13 '14 at 09:20
  • a) The LingPipe source code is publicly available, you can have a look there. b) The JavaDoc for the classifier classes give you an explanation how to serialize/deserialize trained classifiers, so that you can re-use them later. – qqilihq Feb 13 '14 at 09:38
  • can you show the code snippet for doing this. I tried the same but does'nt working – user3304555 Feb 13 '14 at 10:17
  • What did you try? What was not working? The relevant method for serializing is documented here: http://alias-i.com/lingpipe/docs/api/com/aliasi/classify/DynamicLMClassifier.html#compileTo(java.io.ObjectOutput) – qqilihq Feb 13 '14 at 10:47

0 Answers0