4

I am newbie in GATE ANNIE. I tried GATE GUI interface and got experience to do task on it. I wanted to know how can I implement Named Entity Extraction in Java?

I did R&D but unable to find any tutorial regarding Named Entity Extraction.

Is there any code available to find out Named Entity Extraction in GATE ANNIE in Java?

iNikkz
  • 3,729
  • 5
  • 29
  • 59

2 Answers2

6
import gate.*;
import gate.creole.ANNIEConstants;
import gate.util.persistence.PersistenceManager;
import java.io.File;
import java.util.*;

public class AnnieNerExample {

    public static void main(String[] args) throws Exception {
        Gate.setGateHome(new File("C:\\Program Files\\GATE_Developer_8.1"));
        Gate.init();

        LanguageAnalyser controller = (LanguageAnalyser) PersistenceManager
                .loadObjectFromFile(new File(new File(Gate.getPluginsHome(),
                        ANNIEConstants.PLUGIN_DIR), ANNIEConstants.DEFAULT_FILE));

        Corpus corpus = Factory.newCorpus("corpus");
        Document document = Factory.newDocument(
                "Michael Jordan is a professor at the University of California, Berkeley.");
        corpus.add(document); controller.setCorpus(corpus); 
        controller.execute();

        document.getAnnotations().get(new HashSet<>(Arrays.asList("Person", "Organization", "Location")))
            .forEach(a -> System.err.format("%s - \"%s\" [%d to %d]\n", 
                    a.getType(), Utils.stringFor(document, a),
                    a.getStartNode().getOffset(), a.getEndNode().getOffset()));

        //Don't forget to release GATE resources 
        Factory.deleteResource(document); Factory.deleteResource(corpus); Factory.deleteResource(controller);
    }
}

The output:

Person - "Michael Jordan" [0 to 14]
Organization - "University of California" [37 to 61]
Location - "Berkeley" [63 to 71]

Jars

two possibilities:

  1. Manual

Quick Start with GATE Embedded:

add $GATE_HOME/bin/gate.jar and the JAR files in $GATE_HOME/lib to the Java CLASSPATH ($GATE_HOME is the GATE root directory)

  1. Maven

    <dependency>
        <groupId>uk.ac.gate</groupId>
        <artifactId>gate-core</artifactId>
        <version>8.4</version>
    </dependency>
    
dedek
  • 7,981
  • 3
  • 38
  • 68
  • Thanks for the code. You saved my time. One question more, which JARS, you used ? – iNikkz May 23 '16 at 06:31
  • @dedek Thank you for this code, really helpful. I wish to run the TwitIE pipeline, which is a gate Plugin. Do you happen to know how to do this? – Mr. Phil May 04 '17 at 14:56
  • @Mr.Phil the only change should be in the GAPP file: `PersistenceManager.loadObjectFromFile(new File(Gate.getPluginsHome(), "Twitter/resources/twitie-english-only.gapp"))` – dedek May 05 '17 at 07:42
0

for development and using in java code. It is better to use gate embedded.

pom.xml file

<dependencies>
    <dependency>
        <groupId>uk.ac.gate</groupId>
        <artifactId>gate-core</artifactId>
        <version>8.6.1</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/uk.ac.gate.plugins/annie -->
    <dependency>
        <groupId>uk.ac.gate.plugins</groupId>
        <artifactId>annie</artifactId>
        <version>8.6</version>
        <scope>provided</scope>
    </dependency>

</dependencies>

Main.java file in src

import gate.*;
import gate.creole.ANNIEConstants;
import gate.creole.Plugin;
import gate.creole.ResourceReference;
import gate.util.persistence.PersistenceManager;

import java.net.URL;
import java.util.Arrays;
import java.util.HashSet;

public class Main {

    public static void main(String[] args) throws Exception {
        Gate.init();
        Plugin anniePlugin = new Plugin.Maven("uk.ac.gate.plugins", "annie", "8.6");
        Gate.getCreoleRegister().registerPlugin(anniePlugin);
        URL annieFile = new ResourceReference(anniePlugin, "resources/" + ANNIEConstants.DEFAULT_FILE).toURL();

        LanguageAnalyser controller = (LanguageAnalyser) PersistenceManager.loadObjectFromUrl(annieFile);

        Corpus corpus = Factory.newCorpus("corpus");
        Document document = Factory.newDocument("Michael Jordan is a professor at the University of California, Berkeley.");
        corpus.add(document);
        controller.setCorpus(corpus);
        controller.execute();


        for (Annotation obj : document.getAnnotations().get(new HashSet<String>(Arrays.asList("Person", "Organization", "Location")))) {
            System.out.print("type : " + obj.getType());
            System.out.print("\t data : " + Utils.stringFor(document, obj));
            System.out.print("\t start : " + obj.getStartNode().getOffset());
            System.out.println("\t end : " + obj.getEndNode().getOffset());
        }
        Factory.deleteResource(document);
        Factory.deleteResource(corpus);
        Factory.deleteResource(controller);
    }
}

example code https://github.com/hsali/gate-developer-annie-plugin-example

Hafiz Shehbaz Ali
  • 2,566
  • 25
  • 21