/scripts/ob_export_remove_invalid_dashes.py
https://github.com/wangmxf/lesswrong · Python · 28 lines · 20 code · 7 blank · 1 comment · 1 complexity · f9ade595d7f5e84f5b26e6a7b56c8a16 MD5 · raw file
- #!/usr/bin/env python
- import sys
- import re
- def main():
- infilename, outfilename = sys.argv[1:3]
- infile = open(infilename)
- buf = infile.read()
- buf = buf.decode('utf-8')
- re_invalid_dashes = re.compile(ur'^-----(---)?(\r)?\n(?!--------|BODY:|EXTENDED BODY:|EXCERPT:|KEYWORDS:|AUTHOR:|COMMENT:|PING:|\Z)', re.MULTILINE)
- buf = re_invalid_dashes.sub(ur'----\n', buf)
- re_control_chars = re.compile(ur'(\s)[]')
- buf = re_control_chars.sub(ur'\1', buf)
- re_control_chars = re.compile(ur'[](\s)')
- buf = re_control_chars.sub(ur'\1', buf)
- re_control_chars = re.compile(ur'[]')
- buf = re_control_chars.sub(ur' ', buf)
- buf = buf.encode('utf-8')
- outfile = open(outfilename, 'w')
- outfile.write(buf)
- if __name__ == '__main__':
- main()