1

Say I have some reStructuredText source in a string

source = """
============
Introduction
============

Hello world.

.. code-block:: bash

    $ echo Greetings.


"""

import sys

import docutils.nodes
import docutils.parsers.rst
import docutils.utils
import sphinx.writers.text
import sphinx.builders.text

def parse_rst(text: str) -> docutils.nodes.document:
    parser = docutils.parsers.rst.Parser()
    components = (docutils.parsers.rst.Parser,)
    settings = docutils.frontend.OptionParser(components=components).get_default_values()
    document = docutils.utils.new_document('<rst-doc>', settings=settings)
    parser.parse(text, document)
    return document

if __name__ == '__main__':        
    document = parse_rst(source)

I'd like to convert it into plain text without the reST markup using Python.

I tried to use sphinx.builders.text.TextBuilder but it seems to want an App object, not a string.


  • Here is a related question about doing it manually on the command-line with files instead of strings.
  • Parsing code comes from this answer
Hatshepsut
  • 5,962
  • 8
  • 44
  • 80

2 Answers2

2

This code works. It has some hacks like setting a fake config dir, maybe there's a better way.

import sys
import textwrap
import types

import docutils.nodes
import docutils.parsers.rst
import docutils.utils
import sphinx.writers.text
import sphinx.builders.text
import sphinx.util.osutil


def parse_rst(text: str) -> docutils.nodes.document:
    parser = docutils.parsers.rst.Parser()
    components = (docutils.parsers.rst.Parser,)
    settings = docutils.frontend.OptionParser(
        components=components
    ).get_default_values()
    document = docutils.utils.new_document("<rst-doc>", settings=settings)
    parser.parse(text, document)
    return document


if __name__ == "__main__":
    source = textwrap.dedent(
        """\
    ============
    Introduction
    ============

    Hello world.

    .. code-block:: bash

        $ echo Greetings.


    """
    )

    document = parse_rst(source)

    app = types.SimpleNamespace(
        srcdir=None,
        confdir=None,
        outdir=None,
        doctreedir="/",
        config=types.SimpleNamespace(
            text_newlines="native",
            text_sectionchars="=",
            text_add_secnumbers=False,
            text_secnumber_suffix=".",
        ),
        tags=set(),
        registry=types.SimpleNamespace(
            create_translator=lambda self, something, new_builder: sphinx.writers.text.TextTranslator(
                document, new_builder
            )
        ),
    )

    builder = sphinx.builders.text.TextBuilder(app)

    translator = sphinx.writers.text.TextTranslator(document, builder)

    document.walkabout(translator)

    print(translator.body)

Output:

    Introduction
    ============

    Hello world.

       $ echo Greetings.
Hatshepsut
  • 5,962
  • 8
  • 44
  • 80
0

Sphinx comes with a TextBuilder. From the command line:

make text
Steve Piercy
  • 13,693
  • 1
  • 44
  • 57