Kevin Shannon

35
reputation
5
import argparse
import os
import re


def main(args):
    if args.single:
        convert(args.input, args.output)
    elif args.batch:
        for input in os.listdir(args.input):
            if '.txt' in input:
                output = input.replace('.txt', '.md')
                convert(input, output)

def convert(input, output):
    lines = open(input, 'r').read()
    # match table of contents
    lines = re.sub('__TOC__\n', '[TOC]\n', lines)
    # match ordered lists
    def repl(x):
        levels = [0] * 10
        lines = x.group(1).split('\n')
        for i, line in enumerate(lines):
            levels[line.count('#')] += 1
            lines[i] = '   ' * (line.count('#') - 1) + str(levels[line.count('#')]) + '. ' + line.strip('# ')
        return '\n' + '\n'.join(lines) + '\n\n'

    lines = re.sub('\n(# [\s\S]*?#.*)\n\n', repl, lines)
    # match headers
    repl = lambda x: '#' * len(x.group(1)) + f' {x.group(2)}\n'
    lines = re.sub('(={1,6}) (.+) ={1,6}\n', repl, lines)
    # match links
    repl = lambda x: '[' + (x.group(3) if x.group(3) else x.group(1)) + f']({x.group(1)})'
    lines = re.sub('\[\[(.+?)(\|(.+?))?\]\]', repl, lines)
    # match unordered lists
    repl = lambda x: '  ' * (len(x.group(1)) - 1) + '- ' + x.group(2)
    lines = re.sub('(\*+)(.*)', repl, lines)
    # match tables
    def repl(x):
        table = [[cell.strip('!| ') for cell in row.split('\n')[1:-1]] for row in x.group(1).split('|-')][1:]
        table.insert(1, ['-'] * len(table[0]))
        return '\n'.join(['|'.join(row) for row in table])

    lines = re.sub('\{.*class.*wikitable.*\n([\S\s]*?)\|\}', repl, lines)
    # write output
    with open(output, 'w') as out:
        out.write(lines)

parser = argparse.ArgumentParser(description='Convert documents from wikitext to gitlab flavored markdown')
parser.add_argument('-i', '--input', help='path of input')
parser.add_argument('-o', '--output', help='path of output')
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-b', '--batch', action='store_true', help='enables batch mode: pass directories to input and output to have all documents converted at once')
group.add_argument('-s', '--single', action='store_true', help='enables single mode: pass file paths to input and output to have one document converted at a time')
args = parser.parse_args()

if __name__ == '__main__':
    main(args)