3 # This script takes a manpage written in github-flavored markdown and turns it
4 # into a html web page and a nroff man page. The input file must have the name
5 # of the program and the section in the format: NAME.NUM.md. The output files
6 # are written into the current directory named NAME.NUM.html and NAME.NUM. The
7 # input format has one extra extension: if a numbered list starts at 0, it is
8 # turned into a description list. The dl's dt tag is taken from the contents of
9 # the first tag inside the li, which is usually a p tag or a code tag. The
10 # cmarkgfm lib is used to transforms the input file into html. The html.parser
11 # is used as a state machine that both tweaks the html and outputs the nroff
12 # data based on the html tags.
14 # Copyright (C) 2020 Wayne Davison
16 # This program is freely redistributable.
18 import sys, os, re, argparse, time
19 from html.parser import HTMLParser
21 CONSUMES_TXT = set('h1 h2 p li pre'.split())
26 <link href="https://fonts.googleapis.com/css2?family=Roboto&display=swap" rel="stylesheet">
32 font-family: 'Roboto', sans-serif;
38 margin-block-start: 0em;
45 <div style="float: right"><p><i>%s</i></p></div>
50 .TH "%s" "%s" "%s" "" ""
56 NORM_FONT = ('\1', r"\fP")
57 BOLD_FONT = ('\2', r"\fB")
58 ULIN_FONT = ('\3', r"\fI")
65 fi = re.match(r'^(?P<fn>(?P<srcdir>.+/)?(?P<name>(?P<prog>[^/]+)\.(?P<sect>\d+))\.md)$', args.mdfile)
67 die('Failed to parse NAME.NUM.md out of input file:', args.mdfile)
68 fi = argparse.Namespace(**fi.groupdict())
72 chk_files = 'latest-year.h Makefile'.split()
75 st = os.lstat(fi.srcdir + fn)
77 die('Failed to find', fi.srcdir + fn)
81 with open(fi.srcdir + 'Makefile', 'r', encoding='utf-8') as fh:
83 m = re.match(r'^(\w+)=(.+)', line)
86 var, val = (m[1], m[2])
87 while re.search(r'\$\{', val):
88 val = re.sub(r'\$\{(\w+)\}', lambda m: env_subs[m[1]], val)
93 MarkdownToManPage(fi, mtime)
96 class MarkdownToManPage(HTMLParser):
97 def __init__(self, fi, mtime):
98 HTMLParser.__init__(self, convert_charrefs=True)
100 self.man_fh = self.html_fh = None
101 self.state = argparse.Namespace(
104 first_li_tag = False,
105 first_dd_tag = False,
111 self.date = time.strftime('%d %b %Y', time.localtime(mtime))
113 with open(fi.fn, 'r', encoding='utf-8') as fh:
114 txt = re.sub(r'@VERSION@', env_subs['VERSION'], fh.read())
115 txt = re.sub(r'@LIBDIR@', env_subs['libdir'], txt)
116 html = cmarkgfm.github_flavored_markdown_to_html(txt)
120 self.html_fh = open(os.devnull, 'w', encoding='utf-8')
121 self.man_fh = self.html_fh
123 self.html_fn = fi.name + '.html'
124 self.html_fh = open(self.html_fn, 'w', encoding='utf-8')
125 self.html_fh.write(HTML_START % fi.prog + '(' + fi.sect + ') man page')
127 self.man_fn = fi.name
128 self.man_fh = open(self.man_fn, 'w', encoding='utf-8')
129 self.man_fh.write(MAN_START % (fi.prog, fi.sect, self.date))
135 print("The test was successful.")
139 self.html_fh.write(HTML_END % self.date)
141 print("Output HTML page: ", self.html_fn)
144 self.man_fh.write(MAN_END)
146 print("Output man page: ", self.man_fn)
148 def handle_starttag(self, tag, attrs_list):
151 print('START', tag, attrs_list, st)
153 if st.list_state[-1] == 'dl':
158 self.html_fh.write('<dt>')
159 st.first_li_tag = False
161 if not st.first_dd_tag:
162 self.man_fh.write(st.p_macro)
164 st.first_li_tag = True
165 lstate = st.list_state[-1]
169 self.man_fh.write(".IP o\n")
171 self.man_fh.write(".IP " + str(lstate) + ".\n")
172 st.list_state[-1] += 1
173 elif tag == 'blockquote':
174 self.man_fh.write(".RS 4\n")
177 self.man_fh.write(st.p_macro + ".nf\n")
178 elif tag == 'code' and not st.in_pre:
179 st.txt += BOLD_FONT[0]
180 elif tag == 'strong' or tag == 'bold':
181 st.txt += BOLD_FONT[0]
182 elif tag == 'i' or tag == 'em':
183 st.txt += ULIN_FONT[0]
186 for var, val in attrs_list:
188 start = int(val) # We only support integers.
191 self.man_fh.write(".RS\n")
195 st.list_state.append('dl')
197 st.list_state.append(start)
198 self.man_fh.write(st.p_macro)
201 self.man_fh.write(st.p_macro)
203 self.man_fh.write(".RS\n")
205 st.list_state.append('o')
206 outer_tag = '<' + tag
207 for var, val in attrs_list:
208 outer_tag += ' ' + var + '=' + safeText(val) + '"'
209 self.html_fh.write(outer_tag + '>')
210 st.first_dd_tag = False
212 def handle_endtag(self, tag):
215 print(' END', tag, st)
216 if tag in CONSUMES_TXT or st.dt_from == tag:
223 self.man_fh.write(st.p_macro + '.SH "' + manify(txt) + '"\n')
225 if st.dt_from == 'p':
227 self.man_fh.write('.IP "' + manify(txt) + '"\n')
230 self.man_fh.write(manify(txt) + "\n")
232 if st.list_state[-1] == 'dl':
234 die("Invalid 0. -> td translation")
237 self.man_fh.write(manify(txt) + "\n")
238 st.first_li_tag = False
239 elif tag == 'blockquote':
240 self.man_fh.write(".RE\n")
243 self.man_fh.write(manify(txt) + "\n.fi\n")
244 elif tag == 'code' and not st.in_pre:
245 add_to_txt = NORM_FONT[0]
246 elif tag == 'strong' or tag == 'bold':
247 add_to_txt = NORM_FONT[0]
248 elif tag == 'i' or tag == 'em':
249 add_to_txt = NORM_FONT[0]
250 elif tag == 'ol' or tag == 'ul':
251 if st.list_state.pop() == 'dl':
254 self.man_fh.write(".RE\n")
257 st.first_dd_tag = False
258 self.html_fh.write('</' + tag + '>')
264 if st.dt_from == tag:
265 self.man_fh.write('.IP "' + manify(txt) + '"\n')
266 self.html_fh.write('</dt><dd>')
267 st.first_dd_tag = True
270 self.html_fh.write('<dd>')
271 st.first_dd_tag = True
273 def handle_data(self, data):
276 print(' DATA', [data], st)
277 self.html_fh.write(safeText(data))
282 return re.sub(r"^(['.])", r'\&\1', txt.replace('\\', '\\\\')
283 .replace(NORM_FONT[0], NORM_FONT[1])
284 .replace(BOLD_FONT[0], BOLD_FONT[1])
285 .replace(ULIN_FONT[0], ULIN_FONT[1]), flags=re.M)
289 return txt.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"')
293 print(*msg, file=sys.stderr)
301 if __name__ == '__main__':
302 parser = argparse.ArgumentParser(description='Transform a NAME.NUM.md markdown file into a NAME.NUM.html web page & a NAME.NUM man page.', add_help=False)
303 parser.add_argument('--test', action='store_true', help='Test if we can parse the input w/o updating any files.')
304 parser.add_argument('--debug', '-D', action='count', default=0, help='Output copious info on the html parsing.')
305 parser.add_argument("--help", "-h", action="help", help="Output this help message and exit.")
306 parser.add_argument('mdfile', help="The NAME.NUM.md file to parse.")
307 args = parser.parse_args()
312 die("The cmarkgfm library is not available for python3.")