1

I generate some pdf file in Java8 project using Apache FOP library. English content is displaying w/o any problems, but Russian characters are weird. They look like this: Ð#огР̧н.

It seems that issue here is somehow related to encoding, but how can i fix it?

here is the class i use to generate pdf:

public class PdfGenerationTools implements StreamResource.StreamSource
    {
    String content;

    public PdfGenerationTools(String content) {
        this.content = content;
    }

    @Override
    public InputStream getStream()
    {
        ByteArrayInputStream foStream =
                new ByteArrayInputStream(content.getBytes(StringTools.UTF8));

        // Basic FOP configuration. You could create this object
        // just once and keep it.
        FopFactory fopFactory = FopFactory.newInstance();
        fopFactory.setStrictValidation(false); // For an example

        // Configuration for this PDF document - mainly metadata
        FOUserAgent userAgent = getFOUserAgent(fopFactory);

        // Transform to PDF
        ByteArrayOutputStream fopOut = new ByteArrayOutputStream();
        try {
            Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF,
                    userAgent, fopOut);
            TransformerFactory factory =
                    TransformerFactory.newInstance();
            Transformer transformer = factory.newTransformer();
            Source src = new
                    javax.xml.transform.stream.StreamSource(foStream);
            Result res = new SAXResult(fop.getDefaultHandler());
            transformer.transform(src, res);
            fopOut.close();
            return new ByteArrayInputStream(fopOut.toByteArray());

        } catch (Exception e) {
            e.printStackTrace();
        }

        return null;
    }

    private FOUserAgent getFOUserAgent(FopFactory factory)
    {
        FOUserAgent userAgent = factory.newFOUserAgent();

        userAgent.setProducer("Company");
        userAgent.setCreationDate(new Date());
        userAgent.setTitle("Printing jobs");
        userAgent.setTargetResolution(300); // DPI

        return userAgent;
    }

    public static String initDoc()
    {
        return "<?xml version='1.0' encoding='ISO-8859-1'?>"+
                "<fo:root xmlns:fo='http://www.w3.org/1999/XSL/Format'>"+
                "<fo:layout-master-set>"+
                "<fo:simple-page-master master-name='A4' margin='2cm'>"+
                "<fo:region-body />"+
                "</fo:simple-page-master>"+
                "</fo:layout-master-set>"+
                "<fo:page-sequence master-reference='A4'>"+
                "<fo:flow flow-name='xsl-region-body'>";
    }

    public static String closeDoc()
    {
        return "</fo:flow>"+
                "</fo:page-sequence>"+
                "</fo:root>";
    }

    public static String initTable()
    {
        return "<fo:block space-before.optimum=\"10pt\"></fo:block>" +
                "<fo:table table-layout=\"fixed\" border-width=\"1mm\" border-style=\"solid\">" +
                "<fo:table-column column-number=\"1\" column-width=\"50%\"/>" +
                "<fo:table-column column-number=\"2\" column-width=\"50%\"/>" +
                "<fo:table-body>";
    }

    public static String closeTable()
    {
        return "</fo:table-body>" +
                "</fo:table>";
    }

    public static String initTableRow()
    {
        return "<fo:table-row keep-together.within-page=\"always\">";
    }

    public static String closeTableRow()
    {
        return  "</fo:table-row>";
    }

    public static String getCell(String ... args)
    {
        final StringBuilder sb = new StringBuilder();
        sb.append("<fo:table-cell padding=\"1mm\" border-width=\"1mm\" border-style=\"double\">");

        for (String arg : args)
        {
            sb.append("<fo:block font-family=\"SansSerif\">")
                    .append(arg)
                    .append("</fo:block>");
        }

        sb.append("</fo:table-cell>");

        return sb.toString();
    }
}

When i changed encoding from 'ISO-8859-1' to 'UTF-8' my cyrillic substring looks like this: '#####'. It seems i missing fonts here..

user1053031
  • 727
  • 1
  • 11
  • 30
  • 1
    That looks like multibyte UTF-8 seen as some one-byte ISO/Windows encoding. For the rest make some small test like http://www.javaranch.com/journal/200409/CreatingMultipleLanguagePDFusingApacheFOP.html – Joop Eggen Jul 01 '16 at 06:57
  • 1
    It is probably either a font configuration issue ([this answer of mine](http://stackoverflow.com/a/28251945/4453460) could come in handy) or an encoding problem. Adding a small FO snippet with cyrillic characters could help getting an answer, as otherwise it's not possible to try and reproduce your problem (see [MCVE](http://stackoverflow.com/help/mcve)). – lfurini Jul 02 '16 at 07:20
  • I added a code snippet above to show how i generate the pdf content – user1053031 Jul 13 '16 at 10:48

1 Answers1

3

You must use the configuration file for FOP which indicates your fonts to be embedded in a PDF document, for example:

<?xml version="1.0" encoding="UTF-8"?>
<fop version='1.0'>
    <renderers>
        <renderer mime='application/pdf'>
            <fonts>
                <!-- TTF fonts -->
                <font kerning='yes' embed-url='c:\windows\fonts\arial.ttf'>
                    <font-triplet name='Arial' style='normal' weight='normal' />
                </font>
                <font kerning='yes' embed-url='c:\windows\fonts\arialbd.ttf'>
                    <font-triplet name='Arial' style='normal' weight='bold' />
                </font>
                <font kerning='yes' embed-url='c:\windows\fonts\ariali.ttf'>
                    <font-triplet name='Arial' style='italic' weight='normal' />
                </font>
                <font kerning='yes' embed-url='c:\windows\fonts\arialbi.ttf'>
                    <font-triplet name='Arial' style='italic' weight='bold' />
                </font>
                <font kerning='yes' embed-url='c:\windows\fonts\times.ttf'>
                    <font-triplet name='TimesNewRoman' style='normal' weight='normal' />
                </font>
                <font kerning='yes' embed-url='c:\windows\fonts\timesbd.ttf'>
                    <font-triplet name='TimesNewRoman' style='normal' weight='bold' />
                </font>
                <font kerning='yes' embed-url='c:\windows\fonts\timesi.ttf'>
                    <font-triplet name='TimesNewRoman' style='italic' weight='normal' />
                </font>
                <font kerning='yes' embed-url='c:\windows\fonts\timesbi.ttf'>
                    <font-triplet name='TimesNewRoman' style='italic' weight='bold' />
                </font>
                <font kerning='yes' embed-url='c:\windows\fonts\cour.ttf'>
                    <font-triplet name='CourierNew' style='normal' weight='normal' />
                </font>
                <font kerning='yes' embed-url='c:\windows\fonts\courbd.ttf'>
                    <font-triplet name='CourierNew' style='normal' weight='bold' />
                </font>
                <font kerning='yes' embed-url='c:\windows\fonts\couri.ttf'>
                    <font-triplet name='CourierNew' style='italic' weight='normal' />
                </font>
                <font kerning='yes' embed-url='c:\windows\fonts\courbi.ttf'>
                    <font-triplet name='CourierNew' style='italic' weight='bold' />
                </font>
                <font kerning='yes' embed-url='c:\windows\fonts\verdana.ttf'>
                    <font-triplet name='Verdana' style='normal' weight='normal' />
                </font>
                <font kerning='yes' embed-url='c:\windows\fonts\verdanab.ttf'>
                    <font-triplet name='Verdana' style='normal' weight='bold' />
                </font>
                <font kerning='yes' embed-url='c:\windows\fonts\verdanai.ttf'>
                    <font-triplet name='Verdana' style='italic' weight='normal' />
                </font>
                <font kerning='yes' embed-url='c:\windows\fonts\verdanaz.ttf'>
                    <font-triplet name='Verdana' style='italic' weight='bold' />
                </font>
            </fonts>
        </renderer>
    </renderers>
</fop>

How to use:

// configure fopFactory as desired
FopFactory fopFactory = FopFactory.newInstance();
FOUserAgent foUserAgent = fopFactory.newFOUserAgent();
fopFactory.setUserConfig(new File("fop.xml"));
Igor Kudryashov
  • 353
  • 2
  • 10
  • finally i returned back to this issue.. problem is that i work under Ubuntu 14th. so MS fonts are not available here( – user1053031 Jul 13 '16 at 09:41
  • 2
    You can use any fonts that contain Cyrillic characters. Also you can set MS fonts in Ubuntu. Open Ubuntu Software Center and search for "ttf-mscorefonts-installer". This will install Microsoft's core fonts. – Igor Kudryashov Jul 13 '16 at 12:01