we need to override all positon related classes. Thanks @Tilman Hausherr and @mkl. Please correct my answer if required. thanks once again.
import java.awt.geom.GeneralPath;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import org.apache.pdfbox.contentstream.PDContentStream;
import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.OperatorProcessor;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDFontFactory;
import org.apache.pdfbox.pdmodel.graphics.blend.BlendMode;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDTransparencyGroup;
import org.apache.pdfbox.pdmodel.graphics.state.PDGraphicsState;
import org.apache.pdfbox.pdmodel.graphics.state.PDTextState;
import org.apache.pdfbox.util.Matrix;
import org.apache.pdfbox.util.Vector;
public class PDStreamengine extends PDFStreamEngine {
public static Map<String, OperatorProcessor> operators = new HashMap<String, OperatorProcessor>(80);
private Matrix textMatrix;
private Matrix textLineMatrix;
private Stack<PDGraphicsState> graphicsStack = new Stack<PDGraphicsState>();
private PDResources resources;
private PDPage currentPage;
private Matrix initialMatrix;
public static ArrayList<ArrayList<Double>> chars;
public static ArrayList<Matrix> charmatrixs ;
public static ArrayList<String> tjchars;
@Override
public void processPage(PDPage page) throws IOException
{
initPage(page);
if (page.hasContents())
{
processStream(page);
}
}
private void initPage(PDPage page)
{
if (page == null)
{
throw new IllegalArgumentException("Page cannot be null");
}
currentPage = page;
graphicsStack.clear();
graphicsStack.push(new PDGraphicsState(page.getCropBox()));
textMatrix = null;
textLineMatrix = null;
resources = null;
initialMatrix = page.getMatrix();
}
public void processStream(PDContentStream contentStream) throws IOException
{
PDResources parent = pushResources(contentStream);
Stack<PDGraphicsState> savedStack = saveGraphicsStacks();
Matrix parentMatrix = initialMatrix;
// transform the CTM using the stream's matrix
getGraphicsState().getCurrentTransformationMatrix().concatenate(contentStream.getMatrix());
// the stream's initial matrix includes the parent CTM, e.g. this allows a scaled form
initialMatrix = getGraphicsState().getCurrentTransformationMatrix().clone();
// clip to bounding box
PDRectangle bbox = contentStream.getBBox();
clipToRect(bbox);
processStreamOperators(contentStream);
initialMatrix = parentMatrix;
restoreGraphicsStacks(savedStack);
popResources(parent);
}
private PDResources pushResources(PDContentStream contentStream)
{
// resource lookup: first look for stream resources, then fallback to the current page
PDResources parentResources = resources;
PDResources streamResources = contentStream.getResources();
if (streamResources != null)
{
resources = streamResources;
}
else if (resources != null)
{
// inherit directly from parent stream, this is not in the PDF spec, but the file from
// PDFBOX-1359 does this and works in Acrobat
}
else
{
resources = currentPage.getResources();
}
// resources are required in PDF
if (resources == null)
{
resources = new PDResources();
}
return parentResources;
}
private void clipToRect(PDRectangle rectangle)
{
if (rectangle != null)
{
GeneralPath clip = rectangle.transform(getGraphicsState().getCurrentTransformationMatrix());
getGraphicsState().intersectClippingPath(clip);
}
}
private void processStreamOperators(PDContentStream contentStream) throws IOException
{
List<COSBase> arguments = new ArrayList<COSBase>();
PDFStreamParser parser = new PDFStreamParser(contentStream);
new ProcessClasses();
Object token = parser.parseNextToken();
while (token != null)
{
if (token instanceof COSObject)
{
arguments.add(((COSObject) token).getObject());
}
else if (token instanceof Operator)
{
processOperator((Operator) token, arguments);
arguments = new ArrayList<COSBase>();
}
else
{
arguments.add((COSBase) token);
}
token = parser.parseNextToken();
}
}
private void popResources(PDResources parentResources)
{
resources = parentResources;
}
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException
{
String name = operator.getName();
OperatorProcessor processor = operators.get(name);
if (processor != null)
{
processor.setContext(this);
try
{
System.out.println(operator);
System.out.println(operands);
processor.process(operator, operands);
}
catch (IOException e)
{
operatorException(operator, operands, e);
}
}
else
{
unsupportedOperator(operator, operands);
}
}
protected final Stack<PDGraphicsState> saveGraphicsStacks()
{
Stack<PDGraphicsState> savedStack = graphicsStack;
graphicsStack = new Stack<PDGraphicsState>();
graphicsStack.add(savedStack.peek().clone());
return savedStack;
}
@Override
public PDGraphicsState getGraphicsState()
{
return graphicsStack.peek();
}
public void addOperators(OperatorProcessor op)
{
op.setContext(this);
operators.put(op.getName(), op);
}
protected final void restoreGraphicsStacks(Stack<PDGraphicsState> snapshot)
{
graphicsStack = snapshot;
}
/**
* @return Returns the size of the graphicsStack.
*/
public int getGraphicsStackSize()
{
return graphicsStack.size();
}
/**
* @return Returns the textLineMatrix.
*/
public Matrix getTextLineMatrix()
{
return textLineMatrix;
}
/**
* @param value The textLineMatrix to set.
*/
public void setTextLineMatrix(Matrix value)
{
textLineMatrix = value;
}
/**
* @return Returns the textMatrix.
*/
public Matrix getTextMatrix()
{
return textMatrix;
}
/**
* @param value The textMatrix to set.
*/
public void setTextMatrix(Matrix value)
{
textMatrix = value;
}
public PDResources getResources()
{
return resources;
}
/**
* Pushes the current graphics state to the stack.
*/
public void saveGraphicsState()
{
graphicsStack.push(graphicsStack.peek().clone());
}
/**
* Pops the current graphics state from the stack.
*/
public void restoreGraphicsState()
{
graphicsStack.pop();
}
protected void applyTextAdjustment(float tx, float ty) throws IOException
{
// update the text matrix
textMatrix.concatenate(Matrix.getTranslateInstance(tx, ty));
}
public void showForm(PDFormXObject form) throws IOException
{
if (currentPage == null)
{
throw new IllegalStateException("No current page, call " +
"#processChildStream(PDContentStream, PDPage) instead");
}
if (form.getCOSObject().getLength() > 0)
{
processStream(form);
}
}
/**
* Called when a string of text is to be shown.
*
* @param string the encoded text
* @throws IOException if there was an error showing the text
*/
public void showTextString(byte[] string) throws IOException
{
showText(string);
}
@Override
public void showTransparencyGroup(PDTransparencyGroup form) throws IOException
{
processTransparencyGroup(form);
}
@Override
protected void processTransparencyGroup(PDTransparencyGroup group) throws IOException
{
if (currentPage == null)
{
throw new IllegalStateException("No current page, call " +
"#processChildStream(PDContentStream, PDPage) instead");
}
PDResources parent = pushResources(group);
Stack<PDGraphicsState> savedStack = saveGraphicsStacks();
Matrix parentMatrix = initialMatrix;
// the stream's initial matrix includes the parent CTM, e.g. this allows a scaled form
initialMatrix = getGraphicsState().getCurrentTransformationMatrix().clone();
// transform the CTM using the stream's matrix
getGraphicsState().getCurrentTransformationMatrix().concatenate(group.getMatrix());
// Before execution of the transparency group XObject’s content stream,
// the current blend mode in the graphics state shall be initialized to Normal,
// the current stroking and nonstroking alpha constants to 1.0, and the current soft mask to None.
getGraphicsState().setBlendMode(BlendMode.NORMAL);
getGraphicsState().setAlphaConstant(1);
getGraphicsState().setNonStrokeAlphaConstant(1);
getGraphicsState().setSoftMask(null);
// clip to bounding box
clipToRect(group.getBBox());
processStreamOperators(group);
initialMatrix = parentMatrix;
restoreGraphicsStack(savedStack);
popResources(parent);
}
@Override
public void showTextStrings(COSArray array) throws IOException{
PDTextState textState = getGraphicsState().getTextState();
float fontSize = textState.getFontSize();
float horizontalScaling = textState.getHorizontalScaling() / 100f;
PDFont font = textState.getFont();
chars = new ArrayList<ArrayList<Double>>();
charmatrixs = new ArrayList<Matrix>();
tjchars = new ArrayList<String>();
boolean isVertical = false;
if (font != null)
{
isVertical = font.isVertical();
}
for (COSBase obj : array)
{
if (obj instanceof COSNumber)
{
float tj = ((COSNumber)obj).floatValue();
// calculate the combined displacements
float tx, ty;
if (isVertical)
{
tx = 0;
ty = -tj / 1000 * fontSize;
}
else
{
tx = -tj / 1000 * fontSize * horizontalScaling;
ty = 0;
}
applyTextAdjustment(tx, ty);
}
else if(obj instanceof COSString)
{
byte[] string = ((COSString)obj).getBytes();
showText(string);
}
else
{
throw new IOException("Unknown type in array for TJ operation:" + obj);
}
}
if(!chars.isEmpty() && !charmatrixs.isEmpty()) {
Horizontalparsing.poscharobj.put(Horizontalparsing.tj_ycount, chars);
Horizontalparsing.txtposmatrix.put(Horizontalparsing.tj_ycount, charmatrixs);
Horizontalparsing.wordobj.put(Horizontalparsing.tj_ycount, tjchars);
Horizontalparsing.tj_ycount +=1;
}
}
@Override
protected void showText(byte[] string) throws IOException
{
PDGraphicsState state = getGraphicsState();
PDTextState textState = state.getTextState();
// get the current font
PDFont font = textState.getFont();
if (font == null)
{
// LOG.warn("No current font, will use default");
font = PDFontFactory.createDefaultFont();
}
float fontSize = textState.getFontSize();
float horizontalScaling = textState.getHorizontalScaling() / 100f;
float charSpacing = textState.getCharacterSpacing();
// put the text state parameters into matrix form
Matrix parameters = new Matrix(
fontSize * horizontalScaling, 0, // 0
0, fontSize, // 0
0, textState.getRise()); // 1
// read the stream until it is empty
InputStream in = new ByteArrayInputStream(string);
while (in.available() > 0)
{
// decode a character
int before = in.available();
int code = font.readCode(in);
int codeLength = before - in.available();
String unicode = font.toUnicode(code);
//To record char positions
ArrayList<Double> pstnchar = new ArrayList<Double>();
// Word spacing shall be applied to every occurrence of the single-byte character code
// 32 in a string when using a simple font or a composite font that defines code 32 as
// a single-byte code.
float wordSpacing = 0;
if (codeLength == 1 && code == 32)
{
wordSpacing += textState.getWordSpacing();
}
// text rendering matrix (text space -> device space)
Matrix ctm = state.getCurrentTransformationMatrix();
Matrix textRenderingMatrix = parameters.multiply(textMatrix).multiply(ctm);
// get glyph's position vector if this is vertical text
// changes to vertical text should be tested with PDFBOX-2294 and PDFBOX-1422
if (font.isVertical())
{
// position vector, in text space
Vector v = font.getPositionVector(code);
// apply the position vector to the horizontal origin to get the vertical origin
textRenderingMatrix.translate(v);
}
// get glyph's horizontal and vertical displacements, in text space
Vector w = font.getDisplacement(code);
// process the decoded glyph
saveGraphicsState();
Matrix textMatrixOld = textMatrix;
Matrix textLineMatrixOld = textLineMatrix;
showGlyph(textRenderingMatrix, font, code, unicode, w);
textMatrix = textMatrixOld;
textLineMatrix = textLineMatrixOld;
pstnchar.add((double) textMatrix.getValue(2, 0));
pstnchar.add((double) textMatrix.getValue(2, 1));
charmatrixs.add(textRenderingMatrix);
restoreGraphicsState();
// calculate the combined displacements
float tx, ty;
if (font.isVertical())
{
tx = 0;
ty = w.getY() * fontSize + charSpacing + wordSpacing;
}
else
{
tx = (w.getX() * fontSize + charSpacing + wordSpacing) * horizontalScaling;
ty = 0;
}
// update the text matrix
textMatrix.concatenate(Matrix.getTranslateInstance(tx, ty));
pstnchar.add((double) textMatrix.getValue(2, 0));
pstnchar.add((double) textMatrix.getValue(2, 1));
tjchars.add(unicode);
chars.add(pstnchar);
}
}
}