5

So far I only have a working code for retrieving texts from ppt slide notes

try {
    FileInputStream is = new FileInputStream("C:\\sample\\test.ppt");
    SlideShow ppt = new SlideShow(is);

    Slide[] slide = ppt.getSlides();
    for (int i = 0; i < slide.length; i++) {

        System.out.println(i);
        TextRun[] runs = slide[i].getNotesSheet().getTextRuns();
        if (runs.length < 1) {
            System.out.println("null");
        } else {
            for (TextRun run : runs) {
                System.out.println(" > " + run.getText());
            }
        }
    }

} catch (IOException ioe) {

}

But how do you retrieve text from pptx slide notes?

Wolfgang Fahl
  • 15,016
  • 11
  • 93
  • 186
Gerard Cruz
  • 641
  • 14
  • 34
  • Did you try looking at the code and examples in Apache POI? For example, [the XSLF text extractor class XSLFPowerPointExtractor](https://svn.apache.org/repos/asf/poi/trunk/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java) which is able to extract text from slide notes? – Gagravarr Jul 21 '14 at 20:46

3 Answers3

8

After constant trial and error, found a solution.

try {

    FileInputStream fis = new FileInputStream("C:\\sample\\sample.pptx");
    XMLSlideShow pptxshow = new XMLSlideShow(fis);

    XSLFSlide[] slide2 = pptxshow.getSlides();
    for (int i = 0; i < slide2.length; i++) {
        System.out.println(i);
        try {
            XSLFNotes mynotes = slide2[i].getNotes();
            for (XSLFShape shape : mynotes) {
                if (shape instanceof XSLFTextShape) {
                    XSLFTextShape txShape = (XSLFTextShape) shape;
                    for (XSLFTextParagraph xslfParagraph : txShape.getTextParagraphs()) {
                        System.out.println(xslfParagraph.getText());
                    }
                }
            }
        } catch (Exception e) {

        }

    }
} catch (IOException e) {

}
Gerard Cruz
  • 641
  • 14
  • 34
0

An update on the accepted answer. This works BUT if you have other parts in the notes master enabled, such as header or page number, then you will get extra notes paragraphs you may not be expecting. You can limit to just the actual notes with the following code:

try {

    FileInputStream fis = new FileInputStream("C:\\sample\\sample.pptx");
    XMLSlideShow pptxshow = new XMLSlideShow(fis);

    XSLFSlide[] slide2 = pptxshow.getSlides();
    for (int i = 0; i < slide2.length; i++) {
        System.out.println(i);
        try {
            XSLFNotes mynotes = slide2[i].getNotes();
            for (XSLFShape shape : mynotes) {
                if (shape instanceof XSLFTextShape) {
                    XSLFTextShape txShape = (XSLFTextShape) shape;

                    // Look for the actual notes only ...
                    if (!txShape.getShapeName().contains("Notes Placeholder")) {
                        continue;
                    }

                    for (XSLFTextParagraph xslfParagraph : txShape.getTextParagraphs()) {                    
                        System.out.println(xslfParagraph.getText());
                    }
                }
            }
        } catch (Exception e) {

        }

    }
} catch (IOException e) {

}
0

Give the better solution.

try (FileInputStream fis = new FileInputStream("C:\\sample\\sample.pptx")) {
    XMLSlideShow ppt = new XMLSlideShow(fis);
    List<XSLFSlide> slides = ppt.getSlides();
    for (XSLFSlide slide : slides) {
        try {
            XSLFNotes mynotes = slide.getNotes();
            for (XSLFShape shape : mynotes) {
                if (shape instanceof XSLFTextShape && Placeholder.BODY == ((XSLFTextShape) shape).getTextType()) {
                    XSLFTextShape txShape = (XSLFTextShape) shape;
                    System.out.println(txShape.getText());
                    break;
                }
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
} catch (IOException e) {
    e.printStackTrace();
}

Different to other answers, this code use Placeholder.BODY == ((XSLFTextShape) shape).getTextType() so that you can just only get remark text.