From 513997e127e906d91717c345bf5681ef548993cc Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Mon, 28 Feb 2011 21:31:49 +0000 Subject: [PATCH] utility to clean up converted from TeX rst docs --- docroot/reformat.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 docroot/reformat.py diff --git a/docroot/reformat.py b/docroot/reformat.py new file mode 100644 index 0000000000..fb52075e90 --- /dev/null +++ b/docroot/reformat.py @@ -0,0 +1,42 @@ +import os, sys, re + +finput=open(sys.argv[1], "rt") + +# read the whole file content to s +s = "".join(finput.readlines()) +finput.close() + +# normalize line endings +s = re.sub(r"\r\n", "\n", s) + +# remove trailing whitespaces +s = re.sub(r"[ \t]+\n", "\n", s) + +# compress multiple empty lines +for i in range(5): + s = re.sub(r"\n\n\n", "\n\n", s) + +# remove empty line before ".." that terminates a code block +s = re.sub(r"\n\n\.\.\n", "\n..\n", s) + +# move :: starting a code block to the end of previous line +s = re.sub(r"\n\n::\n", " ::\n", s) + +# remove extra line breaks before/after _ or , +s = re.sub(r"\n[ \t]*([_,])\n", r"\1", s) + +# remove extra line breaks after ` +s = re.sub(r"`\n", "` ", s) + +# remove extra line breaks before ` +s = re.sub(r"\n[ \t]*`", " `", s) + +# remove links to wiki +s = re.sub(r"\n[ \t]*`id=\d[^`]+`__\n", "", s) + +# remove trailing whitespaces one more time +s = re.sub(r"[ \t]+\n", "\n", s) + +foutput=open(sys.argv[2], "wt") +foutput.write(s) +foutput.close()