9

Does pdfbox provide some utility to highlight the text when I have it's co-ordinates?

Bounds of the text is known.

I know there are other libraries that provide the same functionality like pdfclown etc. But does pdfbox provide something like that?

f_puras
  • 2,521
  • 4
  • 33
  • 38
Alvin
  • 387
  • 4
  • 7
  • 18

5 Answers5

8

well i found this out. it is simple.

PDDocument doc = PDDocument.load(/*path to the file*/);
PDPage page = (PDPage)doc.getDocumentCatalog.getAllPages.get(i);
List annots = page.getAnnotations;
PDAnnotationTextMarkup markup = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.Su....);
markup.setRectangle(/*your PDRectangle*/);
markup.setQuads(/*float array of size eight with all the vertices of the PDRectangle in anticlockwise order*/);
annots.add(markup);
doc.save(/*path to the output file*/);
Alvin
  • 387
  • 4
  • 7
  • 18
8

This is an extended answer from the number 1 here, and basically is the same code as above.

Improves the coordinates points in respect to the page size in the current document, as well the yellow color that is very lighter and sometimes if the word is short and smaller is difficult to see.

Also highlight the full word taking the X, Y coordinates from the top-left to the top-right. Takes the coordinates from the first character and from the last one in the string.

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.List;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;

public class MainSource extends PDFTextStripper {

    public MainSource()  throws IOException {
        super();
    }

    public static void main(String[] args)  throws IOException {
        PDDocument document = null;
        String fileName = "C:/AnyPDFFile.pdf";
        try {
            document = PDDocument.load( new File(fileName) );
            PDFTextStripper stripper = new MainSource();
            stripper.setSortByPosition( true );

            stripper.setStartPage( 0 );
            stripper.setEndPage( document.getNumberOfPages() );

            Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream());
            stripper.writeText(document, dummy);

            File file1 = new File("C:/AnyPDFFile-New.pdf");
            document.save(file1);
        }
        finally {
            if( document != null ) {
                document.close();
            }
        }
    }

    /**
     * Override the default functionality of PDFTextStripper.writeString()
     */

    @Override
    protected void writeString(String string, List<TextPosition> textPositions) throws IOException {
        boolean isFound = false;
        float posXInit  = 0, 
              posXEnd   = 0, 
              posYInit  = 0,
              posYEnd   = 0,
              width     = 0, 
              height    = 0, 
              fontHeight = 0;
        String[] criteria = {"Word1", "Word2", "Word3", ....};

        for (int i = 0; i < criteria.length; i++) {
            if (string.contains(criteria[i])) {
                isFound = true;
            } 
        }
        if (isFound) {
            posXInit = textPositions.get(0).getXDirAdj();
            posXEnd  = textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth();
            posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj();
            posYEnd  = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1).getYDirAdj();
            width    = textPositions.get(0).getWidthDirAdj();
            height   = textPositions.get(0).getHeightDir();

            System.out.println(string + "X-Init = " + posXInit + "; Y-Init = " + posYInit + "; X-End = " + posXEnd + "; Y-End = " + posYEnd + "; Font-Height = " + fontHeight);

            /* numeration is index-based. Starts from 0 */

            float quadPoints[] = {posXInit, posYEnd + height + 2, posXEnd, posYEnd + height + 2, posXInit, posYInit - 2, posXEnd, posYEnd - 2};

            List<PDAnnotation> annotations = document.getPage(this.getCurrentPageNo() - 1).getAnnotations();
            PDAnnotationTextMarkup highlight = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);

            PDRectangle position = new PDRectangle();
            position.setLowerLeftX(posXInit);
            position.setLowerLeftY(posYEnd);
            position.setUpperRightX(posXEnd);
            position.setUpperRightY(posYEnd + height);

            highlight.setRectangle(position);

            // quadPoints is array of x,y coordinates in Z-like order (top-left, top-right, bottom-left,bottom-right) 
            // of the area to be highlighted

            highlight.setQuadPoints(quadPoints);

            PDColor yellow = new PDColor(new float[]{1, 1, 1 / 255F}, PDDeviceRGB.INSTANCE);
            highlight.setColor(yellow);
            annotations.add(highlight);
        }
    }

}
JRod
  • 151
  • 1
  • 5
1

This works for pdfbox 2.0.7

PDDocument document = /* get doc */
/* numeration is index-based. Starts from 0 */
List<PDAnnotation> annotations = document.getPage(yourPageNumber - 1).getAnnotations();
PDAnnotationTextMarkup highlight = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
highlight.setRectangle(PDRectangle.A4);
// quadPoints is array of x,y coordinates in Z-like order (top-left, top-right, bottom-left,bottom-right) 
// of the area to be highlighted
highlight.setQuadPoints(quadPoints);
PDColor yellow = new PDColor(new float[]{1, 1, 204 / 255F}, PDDeviceRGB.INSTANCE);
highlight.setColor(yellow);
annotations.add(highlight);

Note: such annotation will be displayed if you save doc in file, but it will not appear in image created from page since there is no AppearanceStream created for this annotation. I solved it with code drafts from PDFBOX-3353

strkk
  • 589
  • 3
  • 7
1

Simplest way ... draw a rectangle in the desired location and set the height to 1 and the fill color to BLACK. or ...

Using PDFBox ...

//create the page PDDocument doc = new PDDocument(); 
PDPage page1 = new PDPage(); 
doc.addPage(page1); 
//create the stream 
PDPageContentStream stream1 = new PDPageContentStream(doc, page1); 
//to simply draw an underscore with the coordinates 
//where the first is x start, second y start, third x end, fourth y end 
stream1.drawLine(20, 740, 590, 740); 
//to draw an underscore thicker than one pixel 
//first x begin second y begin third length fourth thickness 
stream1.addRect(345, 568, 70, 2); 
stream1.setNonStrokingColor(Color.BLACK); stream1.fill();
Namysh
  • 3,717
  • 2
  • 9
  • 17
0

Another solution could be drawing a yellow-ish rectangle with a lower alpha, like in the follow sample code:

    PDDocument document = new PDDocument();    

    PDPage page = new PDPage();
    document.addPage(page);

    PDPageContentStream contentStream = new PDPageContentStream(document, page, AppendMode.APPEND, true, true);

    PDFont font = PDType1Font.COURIER;      
    final int fontSize = 16;
    
    //Writing text
    contentStream.beginText();
    contentStream.setFont(font, fontSize );
    contentStream.newLineAtOffset(25, 250);
    contentStream.showText("Hello world");
    contentStream.endText();
    
    //Changing alpha mode
    PDExtendedGraphicsState gs = new PDExtendedGraphicsState();
    gs.setNonStrokingAlphaConstant(0.2f);
    gs.setStrokingAlphaConstant(0.2f);
    gs.setBlendMode(BlendMode.MULTIPLY);
    contentStream.setGraphicsStateParameters(gs);
    //Setting color
    contentStream.setNonStrokingColor(new Color(255, 255, 0, 100));
    //Highlighting (that is, drawing a rectangle)
    contentStream.addRect(25, 250, font.getStringWidth("Hello world")*fontSize/1000, font.getBoundingBox().getHeight()*fontSize/1000);
    contentStream.fill();
    
    contentStream.close();
    
    //Resetting alpha means creating a new content stream...
    //writing a new rectangle just to test alpha changing
    contentStream = new PDPageContentStream(document, page, AppendMode.APPEND, true, true);
    gs = new PDExtendedGraphicsState();

    gs.setNonStrokingAlphaConstant(1f);
    gs.setStrokingAlphaConstant(1f);
    gs.setBlendMode(BlendMode.MULTIPLY);
    contentStream.setGraphicsStateParameters(gs);
    
    contentStream.setNonStrokingColor(new Color(255, 255, 0, 100));
    contentStream.addRect(50, 50, 50, 50);
    contentStream.fill();
    
    contentStream.close();
    document.save(Constants.PATH);
    document.close();

Producing this as result

result