0

I want to highlight certain words in a pdf generated with apache-fop. Because both the words to highlight as well as the input-text are dynamic, I thought calling a java function that returns xsl inline element is the easiest way:

<xsl:variable name="lines" select="ext:highlight(.)"/>
<xsl:for-each select="$lines">
  <fo:block><xsl:value-of disable-output-escaping="yes" select="."/></fo:block>
</xsl:for-each>
public String highlight(String input) {
  for (String toHighlight : wordsToHighlight) {
    input = input.replaceAll(toHighlight, "<fo:inline background-color=\"yellow\">toHightlight</fo:inline>");
  }
  return input;
}

Unfortunately, the returned inline element is put literally in the pdf. What am I missing?

Also, I've looked into XSLT XML: highlight a search word in search results, but it becomes too complicated in my case.

Minimal reproducible example:

@Data
public class Example {

    private Set<String> keywords = Set.of("fox", "brown");
    private String inputText = "The quick brown fox jumped over the fence.";

}
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0"
                xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
                xmlns:exsl="http://exslt.org/common"
                xmlns:fo="http://www.w3.org/1999/XSL/Format">

    <xsl:template match="/">
        <fo:root>
            <fo:layout-master-set>
                <fo:simple-page-master master-name="simpleA4">
                    <fo:region-body />
                </fo:simple-page-master>
            </fo:layout-master-set>

            <fo:page-sequence master-reference="simpleA4">
                <fo:flow flow-name="xsl-region-body">
                    <xsl:apply-templates select="example" />
                </fo:flow>
            </fo:page-sequence>
        </fo:root>
    </xsl:template>

    <xsl:template match="example">
        <xsl:apply-templates select="inputText" />
    </xsl:template>

    <xsl:variable name="dictionary">
        <entry keyword="fox" />
        <entry keyword="brown" />
    </xsl:variable>

    <xsl:template match="inputText">
        <fo:block>
            <xsl:call-template name="multi-hilite">
                <xsl:with-param name="string" select="." />
                <xsl:with-param name="entries" select="exsl:node-set($dictionary)/entry" />
            </xsl:call-template>
        </fo:block>
    </xsl:template>

    <xsl:template name="multi-hilite">
        <xsl:param name="string"/>
        <xsl:param name="entries"/>
        <xsl:choose>
            <xsl:when test="$entries">
                <xsl:call-template name="multi-hilite">
                    <xsl:with-param name="string">
                        <xsl:call-template name="hilite">
                            <xsl:with-param name="text" select="$string" />
                            <xsl:with-param name="search-string" select="$entries[1]/@keyword" />
                        </xsl:call-template>
                    </xsl:with-param>
                    <xsl:with-param name="entries" select="$entries[position() > 1]"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:otherwise>
                <xsl:value-of select="$string"/>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>

    <xsl:template name="hilite">
        <xsl:param name="text"/>
        <xsl:param name="search-string"/>
        <xsl:choose>
            <xsl:when test="contains($text, $search-string)">
                <xsl:value-of select="substring-before($text, $search-string)"/>
                <fo:inline background-color="#eeee00">
                    <xsl:value-of select="$search-string"/>
                </fo:inline>
                <xsl:call-template name="hilite">
                    <xsl:with-param name="text" select="substring-after($text, $search-string)"/>
                    <xsl:with-param name="search-string" select="$search-string"/>
                </xsl:call-template>
            </xsl:when>
            <xsl:otherwise>
                <xsl:value-of select="$text"/>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>

</xsl:stylesheet>
  • Returning a string from an extension function is not creating nodes on the XSLT side. Why are the XSLT 3 or XSLT 1 samples you linked to too complicated? – Martin Honnen May 30 '22 at 10:47
  • @MartinHonnen Because I have multiple words I need to highlight – Tumelo Galenos May 30 '22 at 11:35
  • @TumeloGalenos https://stackoverflow.com/a/60815422/3016153 – michael.hor257k May 30 '22 at 11:53
  • What exactly is not working, is there no yellow background on a `fo:inline` element? Is there a textual rendering of the markup e.g. you see `` in the PDF? Or is the word `toHightLight` in the PDF instead of the terms? I think in the Java code you want `input.replaceAll(toHighlight, "$0")`. – Martin Honnen May 30 '22 at 11:57
  • P.S. If you able to use Java functions, you may consider doing this in two steps: (1) use Java to place a prefix and a suffix around the words you want to replace, then (2) use a recursive named template to wrap those words in markup. This will allow you to use regex to identify the search phrases - something you cannot do in pure XSLT 1.0. – michael.hor257k May 30 '22 at 11:59
  • @MartinHonnen sorry, I get "" in the pdf as text. I think that's because of what you said: returning a string from an extension function does not create a node on the XSLT side. – Tumelo Galenos May 30 '22 at 12:19
  • @michael.hor257k looking into stackoverflow.com/a/60815422/3016153, I almost got it working for me. The multi-replace function won't budge when calling itself. It seems to lose the values for entries – Tumelo Galenos May 30 '22 at 14:36
  • @TumeloGalenos Post a [mcve] showing the problem. – michael.hor257k May 30 '22 at 14:43
  • @michael.hor257k I edited the question with a minimal reproducible example. It currently produces no output, but in the actual stylesheet the text does not get highlighted at all, although I did find the templates are being called. – Tumelo Galenos May 31 '22 at 08:41
  • I don't see your input. – michael.hor257k May 31 '22 at 09:44
  • @michael.hor257k The input is the Example object serialized to XML: brown fox The quick brown fox jumped over the fence. because the page you refer to uses a dictionary I don't use the keywords from the example, but the final xslt should do that instead. – Tumelo Galenos May 31 '22 at 11:05
  • OK I see the problem now. Those examples do not fit your case because you need to output markup, not text. See if the answer I posted helps. – michael.hor257k May 31 '22 at 11:56
  • @michael.hor257k excellent, thank you very much! – Tumelo Galenos May 31 '22 at 12:09

1 Answers1

1

Consider the following simplified example:

XML

<example>
    <keywords>
        <keyword>brown</keyword>
        <keyword>fox</keyword>
    </keywords>
    <inputText>The quick brown fox jumped over the fence.</inputText>
</example>

XSLT 1.0

<xsl:stylesheet version="1.0" 
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
<xsl:strip-space elements="*"/>

<xsl:template match="/example">
    <output>
        <xsl:call-template name="hilite-keywords">
            <xsl:with-param name="string" select="inputText"/>
            <xsl:with-param name="keywords" select="keywords/keyword"/>
        </xsl:call-template>
    </output>
</xsl:template>

<xsl:template name="hilite-keywords">
    <xsl:param name="string"/>
    <xsl:param name="keywords"/>
    <xsl:choose>
        <xsl:when test="$keywords">
            <xsl:variable name="keyword" select="$keywords[1]" />
            <xsl:choose>
                <xsl:when test="contains($string, $keyword)">
                    <!-- process substring-before with the remaining keywords -->
                    <xsl:call-template name="hilite-keywords">
                        <xsl:with-param name="string" select="substring-before($string, $keyword)"/>
                        <xsl:with-param name="keywords" select="$keywords[position() > 1]"/>
                    </xsl:call-template>
                    <!-- matched keyword -->
                    <hilite>
                        <xsl:value-of select="$keyword"/>
                    </hilite>
                    <!-- continue with substring-after -->
                    <xsl:call-template name="hilite-keywords">
                        <xsl:with-param name="string" select="substring-after($string, $keyword)"/>
                        <xsl:with-param name="keywords" select="$keywords"/>
                    </xsl:call-template>
                </xsl:when>
                <xsl:otherwise>
                    <!-- pass the entire string for processing with the remaining keywords -->
                    <xsl:call-template name="hilite-keywords">
                        <xsl:with-param name="string" select="$string"/>
                        <xsl:with-param name="keywords" select="$keywords[position() > 1]"/>
                    </xsl:call-template>
                </xsl:otherwise>
            </xsl:choose>
        </xsl:when>
        <xsl:otherwise>
            <xsl:value-of select="$string"/>
        </xsl:otherwise>
    </xsl:choose>
</xsl:template>

</xsl:stylesheet>

Result

<?xml version="1.0" encoding="UTF-8"?>
<output>The quick <hilite>brown</hilite> <hilite>fox</hilite> jumped over the fence.</output>
michael.hor257k
  • 113,275
  • 6
  • 33
  • 51