3 # This script takes a manpage written in github-flavored markdown and turns it
4 # into a html web page and a nroff man page. The input file must have the name
5 # of the program and the section in the format: NAME.NUM.md. The output files
6 # are written into the current directory named NAME.NUM.html and NAME.NUM. The
7 # input format has one extra extension: if a numbered list starts at 0, it is
8 # turned into a description list. The dl's dt tag is taken from the contents of
9 # the first tag inside the li, which is usually a p tag or a code tag. The
10 # cmarkgfm lib is used to transforms the input file into html. The html.parser
11 # is used as a state machine that both tweaks the html and outputs the nroff
12 # data based on the html tags.
14 # Copyright (C) 2020 Wayne Davison
16 # This program is freely redistributable.
18 import sys, os, re, argparse, time
19 from html.parser import HTMLParser
21 CONSUMES_TXT = set('h1 h2 p li pre'.split())
26 <link href="https://fonts.googleapis.com/css2?family=Roboto&display=swap" rel="stylesheet">
32 font-family: 'Roboto', sans-serif;
38 margin-block-start: 0em;
45 <div style="float: right"><p><i>%s</i></p></div>
50 .TH "%s" "%s" "%s" "" ""
56 NORM_FONT = ('\1', r"\fP")
57 BOLD_FONT = ('\2', r"\fB")
58 ULIN_FONT = ('\3', r"\fI")
61 fi = re.match(r'^(?P<fn>(?P<srcdir>.+/)?(?P<name>(?P<prog>[^/]+)\.(?P<sect>\d+))\.md)$', args.mdfile)
63 die('Failed to parse NAME.NUM.md out of input file:', args.mdfile)
64 fi = argparse.Namespace(**fi.groupdict())
69 fi.title = fi.prog + '(' + fi.sect + ') man page'
72 chk_files = 'latest-year.h Makefile'.split()
75 st = os.lstat(fi.srcdir + fn)
77 die('Failed to find', fi.srcdir + fn)
79 fi.date = time.strftime('%d %b %Y', time.localtime(st.st_mtime))
83 with open(fi.srcdir + 'Makefile', 'r', encoding='utf-8') as fh:
85 m = re.match(r'^(\w+)=(.+)', line)
88 var, val = (m[1], m[2])
89 while re.search(r'\$\{', val):
90 val = re.sub(r'\$\{(\w+)\}', lambda m: env_subs[m[1]], val)
95 with open(fi.fn, 'r', encoding='utf-8') as fh:
96 txt = re.sub(r'@VERSION@', env_subs['VERSION'], fh.read())
97 txt = re.sub(r'@LIBDIR@', env_subs['libdir'], txt)
98 fi.html_in = cmarkgfm.github_flavored_markdown_to_html(txt)
104 print("The test was successful.")
107 for fn, txt in ((fi.name + '.html', fi.html_out), (fi.name, fi.man_out)):
109 with open(fn, 'w', encoding='utf-8') as fh:
113 class HtmlToManPage(HTMLParser):
114 def __init__(self, fi):
115 HTMLParser.__init__(self, convert_charrefs=True)
117 st = self.state = argparse.Namespace(
120 at_first_tag_in_li = False,
121 at_first_tag_in_dd = False,
124 html_out = [ HTML_START % fi.title ],
125 man_out = [ MAN_START % (fi.prog, fi.sect, fi.date) ],
129 self.feed(fi.html_in)
132 st.html_out.append(HTML_END % fi.date)
133 st.man_out.append(MAN_END)
135 fi.html_out = ''.join(st.html_out)
138 fi.man_out = ''.join(st.man_out)
142 def handle_starttag(self, tag, attrs_list):
145 self.output_debug('START', (tag, attrs_list))
146 if st.at_first_tag_in_li:
147 if st.list_state[-1] == 'dl':
152 st.html_out.append('<dt>')
153 st.at_first_tag_in_li = False
155 if not st.at_first_tag_in_dd:
156 st.man_out.append(st.p_macro)
158 st.at_first_tag_in_li = True
159 lstate = st.list_state[-1]
163 st.man_out.append(".IP o\n")
165 st.man_out.append(".IP " + str(lstate) + ".\n")
166 st.list_state[-1] += 1
167 elif tag == 'blockquote':
168 st.man_out.append(".RS 4\n")
171 st.man_out.append(st.p_macro + ".nf\n")
172 elif tag == 'code' and not st.in_pre:
173 st.txt += BOLD_FONT[0]
174 elif tag == 'strong' or tag == 'bold':
175 st.txt += BOLD_FONT[0]
176 elif tag == 'i' or tag == 'em':
177 st.txt += ULIN_FONT[0]
180 for var, val in attrs_list:
182 start = int(val) # We only support integers.
185 st.man_out.append(".RS\n")
189 st.list_state.append('dl')
191 st.list_state.append(start)
192 st.man_out.append(st.p_macro)
195 st.man_out.append(st.p_macro)
197 st.man_out.append(".RS\n")
199 st.list_state.append('o')
200 st.html_out.append('<' + tag + ' '.join( ' ' + var + '="' + safeText(val) + '"' for var, val in attrs_list) + '>')
201 st.at_first_tag_in_dd = False
204 def handle_endtag(self, tag):
207 self.output_debug('END', (tag,))
208 if tag in CONSUMES_TXT or st.dt_from == tag:
215 st.man_out.append(st.p_macro + '.SH "' + manify(txt) + '"\n')
217 st.man_out.append(st.p_macro + '.SS "' + manify(txt) + '"\n')
219 if st.dt_from == 'p':
221 st.man_out.append('.IP "' + manify(txt) + '"\n')
224 st.man_out.append(manify(txt) + "\n")
226 if st.list_state[-1] == 'dl':
227 if st.at_first_tag_in_li:
228 die("Invalid 0. -> td translation")
231 st.man_out.append(manify(txt) + "\n")
232 st.at_first_tag_in_li = False
233 elif tag == 'blockquote':
234 st.man_out.append(".RE\n")
237 st.man_out.append(manify(txt) + "\n.fi\n")
238 elif tag == 'code' and not st.in_pre:
239 add_to_txt = NORM_FONT[0]
240 elif tag == 'strong' or tag == 'bold':
241 add_to_txt = NORM_FONT[0]
242 elif tag == 'i' or tag == 'em':
243 add_to_txt = NORM_FONT[0]
244 elif tag == 'ol' or tag == 'ul':
245 if st.list_state.pop() == 'dl':
248 st.man_out.append(".RE\n")
251 st.at_first_tag_in_dd = False
252 st.html_out.append('</' + tag + '>')
258 if st.dt_from == tag:
259 st.man_out.append('.IP "' + manify(txt) + '"\n')
260 st.html_out.append('</dt><dd>')
261 st.at_first_tag_in_dd = True
264 st.html_out.append('<dd>')
265 st.at_first_tag_in_dd = True
268 def handle_data(self, data):
271 self.output_debug('DATA', (data,))
272 st.html_out.append(safeText(data))
276 def output_debug(self, event, extra):
280 if len(st.html_out) > 2:
281 st.html_out = ['...'] + st.html_out[-2:]
282 if len(st.man_out) > 2:
283 st.man_out = ['...'] + st.man_out[-2:]
285 pprint.PrettyPrinter(indent=2).pprint(vars(st))
289 return re.sub(r"^(['.])", r'\&\1', txt.replace('\\', '\\\\')
290 .replace(NORM_FONT[0], NORM_FONT[1])
291 .replace(BOLD_FONT[0], BOLD_FONT[1])
292 .replace(ULIN_FONT[0], ULIN_FONT[1]), flags=re.M)
296 return txt.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"')
300 print(*msg, file=sys.stderr)
308 if __name__ == '__main__':
309 parser = argparse.ArgumentParser(description='Transform a NAME.NUM.md markdown file into a NAME.NUM.html web page & a NAME.NUM man page.', add_help=False)
310 parser.add_argument('--test', action='store_true', help='Test if we can parse the input w/o updating any files.')
311 parser.add_argument('--debug', '-D', action='count', default=0, help='Output copious info on the html parsing.')
312 parser.add_argument("--help", "-h", action="help", help="Output this help message and exit.")
313 parser.add_argument('mdfile', help="The NAME.NUM.md file to parse.")
314 args = parser.parse_args()
319 die("The cmarkgfm library is not available for python3.")