Annotation of embedaddon/rsync/md2man, revision 1.1
1.1 ! misho 1: #!/usr/bin/env python3
! 2:
! 3: # This script takes a manpage written in markdown and turns it into an html web
! 4: # page and a nroff man page. The input file must have the name of the program
! 5: # and the section in this format: NAME.NUM.md. The output files are written
! 6: # into the current directory named NAME.NUM.html and NAME.NUM. The input
! 7: # format has one extra extension: if a numbered list starts at 0, it is turned
! 8: # into a description list. The dl's dt tag is taken from the contents of the
! 9: # first tag inside the li, which is usually a p, code, or strong tag. The
! 10: # cmarkgfm or commonmark lib is used to transforms the input file into html.
! 11: # The html.parser is used as a state machine that both tweaks the html and
! 12: # outputs the nroff data based on the html tags.
! 13: #
! 14: # Copyright (C) 2020 Wayne Davison
! 15: #
! 16: # This program is freely redistributable.
! 17:
! 18: import sys, os, re, argparse, subprocess, time
! 19: from html.parser import HTMLParser
! 20:
! 21: CONSUMES_TXT = set('h1 h2 p li pre'.split())
! 22:
! 23: HTML_START = """\
! 24: <html><head>
! 25: <title>%s</title>
! 26: <link href="https://fonts.googleapis.com/css2?family=Roboto&family=Roboto+Mono&display=swap" rel="stylesheet">
! 27: <style>
! 28: body {
! 29: max-width: 50em;
! 30: margin: auto;
! 31: }
! 32: body, b, strong, u {
! 33: font-family: 'Roboto', sans-serif;
! 34: }
! 35: code {
! 36: font-family: 'Roboto Mono', monospace;
! 37: font-weight: bold;
! 38: white-space: pre;
! 39: }
! 40: pre code {
! 41: display: block;
! 42: font-weight: normal;
! 43: }
! 44: blockquote pre code {
! 45: background: #f1f1f1;
! 46: }
! 47: dd p:first-of-type {
! 48: margin-block-start: 0em;
! 49: }
! 50: </style>
! 51: </head><body>
! 52: """
! 53:
! 54: HTML_END = """\
! 55: <div style="float: right"><p><i>%s</i></p></div>
! 56: </body></html>
! 57: """
! 58:
! 59: MAN_START = r"""
! 60: .TH "%s" "%s" "%s" "%s" "User Commands"
! 61: """.lstrip()
! 62:
! 63: MAN_END = """\
! 64: """
! 65:
! 66: NORM_FONT = ('\1', r"\fP")
! 67: BOLD_FONT = ('\2', r"\fB")
! 68: UNDR_FONT = ('\3', r"\fI")
! 69: NBR_DASH = ('\4', r"\-")
! 70: NBR_SPACE = ('\xa0', r"\ ")
! 71:
! 72: md_parser = None
! 73:
! 74: def main():
! 75: fi = re.match(r'^(?P<fn>(?P<srcdir>.+/)?(?P<name>(?P<prog>[^/]+)\.(?P<sect>\d+))\.md)$', args.mdfile)
! 76: if not fi:
! 77: die('Failed to parse NAME.NUM.md out of input file:', args.mdfile)
! 78: fi = argparse.Namespace(**fi.groupdict())
! 79:
! 80: if not fi.srcdir:
! 81: fi.srcdir = './'
! 82:
! 83: fi.title = fi.prog + '(' + fi.sect + ') man page'
! 84: fi.mtime = 0
! 85:
! 86: git_dir = fi.srcdir + '.git'
! 87: if os.path.lexists(git_dir):
! 88: fi.mtime = int(subprocess.check_output(['git', '--git-dir', git_dir, 'log', '-1', '--format=%at']))
! 89:
! 90: env_subs = { 'prefix': os.environ.get('RSYNC_OVERRIDE_PREFIX', None) }
! 91:
! 92: if args.test:
! 93: env_subs['VERSION'] = '1.0.0'
! 94: env_subs['libdir'] = '/usr'
! 95: else:
! 96: for fn in (fi.srcdir + 'version.h', 'Makefile'):
! 97: try:
! 98: st = os.lstat(fn)
! 99: except:
! 100: die('Failed to find', fi.srcdir + fn)
! 101: if not fi.mtime:
! 102: fi.mtime = st.st_mtime
! 103:
! 104: with open(fi.srcdir + 'version.h', 'r', encoding='utf-8') as fh:
! 105: txt = fh.read()
! 106: m = re.search(r'"(.+?)"', txt)
! 107: env_subs['VERSION'] = m.group(1)
! 108:
! 109: with open('Makefile', 'r', encoding='utf-8') as fh:
! 110: for line in fh:
! 111: m = re.match(r'^(\w+)=(.+)', line)
! 112: if not m:
! 113: continue
! 114: var, val = (m.group(1), m.group(2))
! 115: if var == 'prefix' and env_subs[var] is not None:
! 116: continue
! 117: while re.search(r'\$\{', val):
! 118: val = re.sub(r'\$\{(\w+)\}', lambda m: env_subs[m.group(1)], val)
! 119: env_subs[var] = val
! 120: if var == 'srcdir':
! 121: break
! 122:
! 123: with open(fi.fn, 'r', encoding='utf-8') as fh:
! 124: txt = fh.read()
! 125:
! 126: txt = re.sub(r'@VERSION@', env_subs['VERSION'], txt)
! 127: txt = re.sub(r'@LIBDIR@', env_subs['libdir'], txt)
! 128:
! 129: fi.html_in = md_parser(txt)
! 130: txt = None
! 131:
! 132: fi.date = time.strftime('%d %b %Y', time.localtime(fi.mtime))
! 133: fi.man_headings = (fi.prog, fi.sect, fi.date, fi.prog + ' ' + env_subs['VERSION'])
! 134:
! 135: HtmlToManPage(fi)
! 136:
! 137: if args.test:
! 138: print("The test was successful.")
! 139: return
! 140:
! 141: for fn, txt in ((fi.name + '.html', fi.html_out), (fi.name, fi.man_out)):
! 142: print("Wrote:", fn)
! 143: with open(fn, 'w', encoding='utf-8') as fh:
! 144: fh.write(txt)
! 145:
! 146:
! 147: def html_via_commonmark(txt):
! 148: return commonmark.HtmlRenderer().render(commonmark.Parser().parse(txt))
! 149:
! 150:
! 151: class HtmlToManPage(HTMLParser):
! 152: def __init__(self, fi):
! 153: HTMLParser.__init__(self, convert_charrefs=True)
! 154:
! 155: st = self.state = argparse.Namespace(
! 156: list_state = [ ],
! 157: p_macro = ".P\n",
! 158: at_first_tag_in_li = False,
! 159: at_first_tag_in_dd = False,
! 160: dt_from = None,
! 161: in_pre = False,
! 162: in_code = False,
! 163: html_out = [ HTML_START % fi.title ],
! 164: man_out = [ MAN_START % fi.man_headings ],
! 165: txt = '',
! 166: )
! 167:
! 168: self.feed(fi.html_in)
! 169: fi.html_in = None
! 170:
! 171: st.html_out.append(HTML_END % fi.date)
! 172: st.man_out.append(MAN_END)
! 173:
! 174: fi.html_out = ''.join(st.html_out)
! 175: st.html_out = None
! 176:
! 177: fi.man_out = ''.join(st.man_out)
! 178: st.man_out = None
! 179:
! 180:
! 181: def handle_starttag(self, tag, attrs_list):
! 182: st = self.state
! 183: if args.debug:
! 184: self.output_debug('START', (tag, attrs_list))
! 185: if st.at_first_tag_in_li:
! 186: if st.list_state[-1] == 'dl':
! 187: st.dt_from = tag
! 188: if tag == 'p':
! 189: tag = 'dt'
! 190: else:
! 191: st.html_out.append('<dt>')
! 192: elif tag == 'p':
! 193: st.at_first_tag_in_dd = True # Kluge to suppress a .P at the start of an li.
! 194: st.at_first_tag_in_li = False
! 195: if tag == 'p':
! 196: if not st.at_first_tag_in_dd:
! 197: st.man_out.append(st.p_macro)
! 198: elif tag == 'li':
! 199: st.at_first_tag_in_li = True
! 200: lstate = st.list_state[-1]
! 201: if lstate == 'dl':
! 202: return
! 203: if lstate == 'o':
! 204: st.man_out.append(".IP o\n")
! 205: else:
! 206: st.man_out.append(".IP " + str(lstate) + ".\n")
! 207: st.list_state[-1] += 1
! 208: elif tag == 'blockquote':
! 209: st.man_out.append(".RS 4\n")
! 210: elif tag == 'pre':
! 211: st.in_pre = True
! 212: st.man_out.append(st.p_macro + ".nf\n")
! 213: elif tag == 'code' and not st.in_pre:
! 214: st.in_code = True
! 215: st.txt += BOLD_FONT[0]
! 216: elif tag == 'strong' or tag == 'b':
! 217: st.txt += BOLD_FONT[0]
! 218: elif tag == 'em' or tag == 'i':
! 219: tag = 'u' # Change it into underline to be more like the man page
! 220: st.txt += UNDR_FONT[0]
! 221: elif tag == 'ol':
! 222: start = 1
! 223: for var, val in attrs_list:
! 224: if var == 'start':
! 225: start = int(val) # We only support integers.
! 226: break
! 227: if st.list_state:
! 228: st.man_out.append(".RS\n")
! 229: if start == 0:
! 230: tag = 'dl'
! 231: attrs_list = [ ]
! 232: st.list_state.append('dl')
! 233: else:
! 234: st.list_state.append(start)
! 235: st.man_out.append(st.p_macro)
! 236: st.p_macro = ".IP\n"
! 237: elif tag == 'ul':
! 238: st.man_out.append(st.p_macro)
! 239: if st.list_state:
! 240: st.man_out.append(".RS\n")
! 241: st.p_macro = ".IP\n"
! 242: st.list_state.append('o')
! 243: st.html_out.append('<' + tag + ''.join(' ' + var + '="' + htmlify(val) + '"' for var, val in attrs_list) + '>')
! 244: st.at_first_tag_in_dd = False
! 245:
! 246:
! 247: def handle_endtag(self, tag):
! 248: st = self.state
! 249: if args.debug:
! 250: self.output_debug('END', (tag,))
! 251: if tag in CONSUMES_TXT or st.dt_from == tag:
! 252: txt = st.txt.strip()
! 253: st.txt = ''
! 254: else:
! 255: txt = None
! 256: add_to_txt = None
! 257: if tag == 'h1':
! 258: st.man_out.append(st.p_macro + '.SH "' + manify(txt) + '"\n')
! 259: elif tag == 'h2':
! 260: st.man_out.append(st.p_macro + '.SS "' + manify(txt) + '"\n')
! 261: elif tag == 'p':
! 262: if st.dt_from == 'p':
! 263: tag = 'dt'
! 264: st.man_out.append('.IP "' + manify(txt) + '"\n')
! 265: st.dt_from = None
! 266: elif txt != '':
! 267: st.man_out.append(manify(txt) + "\n")
! 268: elif tag == 'li':
! 269: if st.list_state[-1] == 'dl':
! 270: if st.at_first_tag_in_li:
! 271: die("Invalid 0. -> td translation")
! 272: tag = 'dd'
! 273: if txt != '':
! 274: st.man_out.append(manify(txt) + "\n")
! 275: st.at_first_tag_in_li = False
! 276: elif tag == 'blockquote':
! 277: st.man_out.append(".RE\n")
! 278: elif tag == 'pre':
! 279: st.in_pre = False
! 280: st.man_out.append(manify(txt) + "\n.fi\n")
! 281: elif (tag == 'code' and not st.in_pre):
! 282: st.in_code = False
! 283: add_to_txt = NORM_FONT[0]
! 284: elif tag == 'strong' or tag == 'b':
! 285: add_to_txt = NORM_FONT[0]
! 286: elif tag == 'em' or tag == 'i':
! 287: tag = 'u' # Change it into underline to be more like the man page
! 288: add_to_txt = NORM_FONT[0]
! 289: elif tag == 'ol' or tag == 'ul':
! 290: if st.list_state.pop() == 'dl':
! 291: tag = 'dl'
! 292: if st.list_state:
! 293: st.man_out.append(".RE\n")
! 294: else:
! 295: st.p_macro = ".P\n"
! 296: st.at_first_tag_in_dd = False
! 297: st.html_out.append('</' + tag + '>')
! 298: if add_to_txt:
! 299: if txt is None:
! 300: st.txt += add_to_txt
! 301: else:
! 302: txt += add_to_txt
! 303: if st.dt_from == tag:
! 304: st.man_out.append('.IP "' + manify(txt) + '"\n')
! 305: st.html_out.append('</dt><dd>')
! 306: st.at_first_tag_in_dd = True
! 307: st.dt_from = None
! 308: elif tag == 'dt':
! 309: st.html_out.append('<dd>')
! 310: st.at_first_tag_in_dd = True
! 311:
! 312:
! 313: def handle_data(self, txt):
! 314: st = self.state
! 315: if args.debug:
! 316: self.output_debug('DATA', (txt,))
! 317: if st.in_pre:
! 318: html = htmlify(txt)
! 319: else:
! 320: txt = re.sub(r'\s--(\s)', NBR_SPACE[0] + r'--\1', txt).replace('--', NBR_DASH[0]*2)
! 321: txt = re.sub(r'(^|\W)-', r'\1' + NBR_DASH[0], txt)
! 322: html = htmlify(txt)
! 323: if st.in_code:
! 324: txt = re.sub(r'\s', NBR_SPACE[0], txt)
! 325: html = html.replace(NBR_DASH[0], '-').replace(NBR_SPACE[0], ' ') # <code> is non-breaking in CSS
! 326: st.html_out.append(html.replace(NBR_SPACE[0], ' ').replace(NBR_DASH[0], '-⁠'))
! 327: st.txt += txt
! 328:
! 329:
! 330: def output_debug(self, event, extra):
! 331: import pprint
! 332: st = self.state
! 333: if args.debug < 2:
! 334: st = argparse.Namespace(**vars(st))
! 335: if len(st.html_out) > 2:
! 336: st.html_out = ['...'] + st.html_out[-2:]
! 337: if len(st.man_out) > 2:
! 338: st.man_out = ['...'] + st.man_out[-2:]
! 339: print(event, extra)
! 340: pprint.PrettyPrinter(indent=2).pprint(vars(st))
! 341:
! 342:
! 343: def manify(txt):
! 344: return re.sub(r"^(['.])", r'\&\1', txt.replace('\\', '\\\\')
! 345: .replace(NBR_SPACE[0], NBR_SPACE[1])
! 346: .replace(NBR_DASH[0], NBR_DASH[1])
! 347: .replace(NORM_FONT[0], NORM_FONT[1])
! 348: .replace(BOLD_FONT[0], BOLD_FONT[1])
! 349: .replace(UNDR_FONT[0], UNDR_FONT[1]), flags=re.M)
! 350:
! 351:
! 352: def htmlify(txt):
! 353: return txt.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"')
! 354:
! 355:
! 356: def warn(*msg):
! 357: print(*msg, file=sys.stderr)
! 358:
! 359:
! 360: def die(*msg):
! 361: warn(*msg)
! 362: sys.exit(1)
! 363:
! 364:
! 365: if __name__ == '__main__':
! 366: parser = argparse.ArgumentParser(description='Transform a NAME.NUM.md markdown file into a NAME.NUM.html web page & a NAME.NUM man page.', add_help=False)
! 367: parser.add_argument('--test', action='store_true', help='Test if we can parse the input w/o updating any files.')
! 368: parser.add_argument('--debug', '-D', action='count', default=0, help='Output copious info on the html parsing. Repeat for even more.')
! 369: parser.add_argument("--help", "-h", action="help", help="Output this help message and exit.")
! 370: parser.add_argument('mdfile', help="The NAME.NUM.md file to parse.")
! 371: args = parser.parse_args()
! 372:
! 373: try:
! 374: import cmarkgfm
! 375: md_parser = cmarkgfm.markdown_to_html
! 376: except:
! 377: try:
! 378: import commonmark
! 379: md_parser = html_via_commonmark
! 380: except:
! 381: die("Failed to find cmarkgfm or commonmark for python3.")
! 382:
! 383: main()
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>