Unify md parsing scripts & improve non-man html conversions.

author Wayne Davison <wayne@opencoder.net>

Mon, 27 Dec 2021 22:19:11 +0000 (14:19 -0800)

committer Wayne Davison <wayne@opencoder.net>

Mon, 27 Dec 2021 22:24:05 +0000 (14:24 -0800)
author Wayne Davison <wayne@opencoder.net>
Mon, 27 Dec 2021 22:19:11 +0000 (14:19 -0800)
committer Wayne Davison <wayne@opencoder.net>
Mon, 27 Dec 2021 22:24:05 +0000 (14:24 -0800)
diff --git a/Makefile.in b/Makefile.in

index 98d5a7af8e3c5d9be05a68d847d3b77cca76faa3..14d95abed40b4b31a4a4da5b6ca92801c3dddd49 100644 (file)
--- a/Makefile.in
+++ b/Makefile.in
@@ -257,16 +257,16 @@ proto.h-tstamp: $(srcdir)/*.c $(srcdir)/lib/compat.c daemon-parm.h
  .PHONY: man
  man: rsync.1 rsync-ssl.1 rsyncd.conf.5 rrsync.1
  
-rsync.1: rsync.1.md md2man version.h Makefile
+rsync.1: rsync.1.md md-convert version.h Makefile
         @$(srcdir)/maybe-make-man $(srcdir) rsync.1.md
  
-rsync-ssl.1: rsync-ssl.1.md md2man version.h Makefile
+rsync-ssl.1: rsync-ssl.1.md md-convert version.h Makefile
         @$(srcdir)/maybe-make-man $(srcdir) rsync-ssl.1.md
  
-rsyncd.conf.5: rsyncd.conf.5.md md2man version.h Makefile
+rsyncd.conf.5: rsyncd.conf.5.md md-convert version.h Makefile
         @$(srcdir)/maybe-make-man $(srcdir) rsyncd.conf.5.md
  
-rrsync.1: support/rrsync.1.md md2man Makefile
+rrsync.1: support/rrsync.1.md md-convert Makefile
         @$(srcdir)/maybe-make-man $(srcdir) support/rrsync.1.md
  
  .PHONY: clean
diff --git a/NEWS.md b/NEWS.md

index 5994fff643e7a9a9cf45d8dba3ead78653f96ff9..b88208c500733a4f645d7bdb0289eee4134a6b93 100644 (file)
--- a/NEWS.md
+++ b/NEWS.md
@@ -4472,3 +4472,5 @@
  
  \* DATE OF COMMIT is the date the protocol change was committed to version
  control.
+
+@USE_GFM_PARSER@
diff --git a/maybe-make-man b/maybe-make-man

index 59f2dce42a28ad5caa615b3ab5405264d9f650e7..99b8fb8944c84b49e24b50f2d3662fa9e958db6b 100755 (executable)
--- a/maybe-make-man
+++ b/maybe-make-man
@@ -16,7 +16,7 @@ fi
  
  if [ ! -f "$flagfile" ]; then
      # We test our smallest manpage just to see if the python setup works.
-    if "$srcdir/md2man" --test "$srcdir/rsync-ssl.1.md" >/dev/null 2>&1; then
+    if "$srcdir/md-convert" --test "$srcdir/rsync-ssl.1.md" >/dev/null 2>&1; then
         touch $flagfile
      else
         outname=`echo "$inname" | sed 's/\.md$//'`
@@ -37,4 +37,4 @@ if [ ! -f "$flagfile" ]; then
      fi
  fi
  
-"$srcdir/md2man" -s "$srcdir" "$srcdir/$inname"
+"$srcdir/md-convert" "$srcdir/$inname"
diff --git a/md-convert b/md-convert

index fd546f19060740afb28418e9180e371a9f5e0846..7780d06b5f27d1924eb139d45effefdf575b885e 100755 (executable)
--- a/md-convert
+++ b/md-convert
@@ -1,28 +1,35 @@
  #!/usr/bin/env python3
  
-# This script takes a manpage written in markdown and turns it into an html web
-# page and a nroff man page.  The input file must have the name of the program
-# and the section in this format: NAME.NUM.md.  The output files are written
-# into the current directory named NAME.NUM.html and NAME.NUM.  The input
-# format has one extra extension: if a numbered list starts at 0, it is turned
-# into a description list. The dl's dt tag is taken from the contents of the
-# first tag inside the li, which is usually a p, code, or strong tag.  The
-# cmarkgfm or commonmark lib is used to transforms the input file into html.
-# The html.parser is used as a state machine that both tweaks the html and
-# outputs the nroff data based on the html tags.
+# This script transforms markdown files into html and (optionally) nroff. The
+# output files are written into the current directory named for the input file
+# without the .md suffix and either the .html suffix or no suffix.
  #
-# We normally grab the prefix from the generated Makefile, which is then used
-# in the various other grabbed values (see the Makefile for its ${prefix}
-# paths).  However, the maintainer can choose to override this prefix by
-# exporting RSYNC_OVERRIDE_PREFIX=/usr.  This allows the man pages to refer to
-# /usr paths (and are thus compatible with the release-rsync script) while
-# still having the built rsync get installed into /usr/local for local testing.
+# If the input .md file has a section number at the end of the name (e.g.,
+# rsync.1.md) a nroff file is also output (PROJ.NUM.md -> PROJ.NUM).
  #
-# Copyright (C) 2020 Wayne Davison
+# The markdown input format has one extra extension: if a numbered list starts
+# at 0, it is turned into a description list. The dl's dt tag is taken from the
+# contents of the first tag inside the li, which is usually a p, code, or
+# strong tag.
+#
+# The cmarkgfm or commonmark lib is used to transforms the input file into
+# html.  Then, the html.parser is used as a state machine that lets us tweak
+# the html and (optionally) output nroff data based on the html tags.
+#
+# If the string @USE_GFM_PARSER@ exists in the file, the string is removed and
+# a github-flavored-markup parser is used to parse the file.
+#
+# The man-page .md files also get the vars @VERSION@, @BINDIR@, and @LIBDIR@
+# substituted.  Some of these values depend on the Makefile $(prefix) (see the
+# generated Makefile).  If the maintainer wants to build files for /usr/local
+# while creating release-ready man-page files for /usr, use the environment to
+# set RSYNC_OVERRIDE_PREFIX=/usr.
+
+# Copyright (C) 2020 - 2021 Wayne Davison
  #
  # This program is freely redistributable.
  
-import sys, os, re, argparse, subprocess, time
+import os, sys, re, argparse, subprocess, time
  from html.parser import HTMLParser
  
  CONSUMES_TXT = set('h1 h2 p li pre'.split())
@@ -58,8 +65,30 @@ dd p:first-of-type {
  </head><body>
  """
  
-HTML_END = """\
+TABLE_STYLE = """\
+table {
+  border-color: grey;
+  border-spacing: 0;
+}
+tr {
+  border-top: 1px solid grey;
+}
+tr:nth-child(2n) {
+  background-color: #f6f8fa;
+}
+th, td {
+  border: 1px solid #dfe2e5;
+  text-align: center;
+  padding-left: 1em;
+  padding-right: 1em;
+}
+"""
+
+MAN_HTML_END = """\
  <div style="float: right"><p><i>%s</i></p></div>
+"""
+
+HTML_END = """\
  </body></html>
  """
  
@@ -78,41 +107,96 @@ NBR_DASH = ('\4', r"\-")
  NBR_SPACE = ('\xa0', r"\ ")
  
  md_parser = None
+env_subs = { }
  
  def main():
-    fi = re.match(r'^(?P<fn>(?P<srcdir>.+/)?(?P<name>(?P<prog>[^/]+)\.(?P<sect>\d+))\.md)$', args.mdfile)
+    for mdfn in args.mdfiles:
+        parse_md_file(mdfn)
+
+    if args.test:
+        print("The test was successful.")
+
+
+def parse_md_file(mdfn):
+    fi = re.match(r'^(?P<fn>(?P<srcdir>.+/)?(?P<name>(?P<prog>[^/]+?)(\.(?P<sect>\d+))?)\.md)$', mdfn)
      if not fi:
-        die('Failed to parse NAME.NUM.md out of input file:', args.mdfile)
+        die('Failed to parse a md input file name:', mdfn)
      fi = argparse.Namespace(**fi.groupdict())
+    fi.want_manpage = not not fi.sect
+    if fi.want_manpage:
+        fi.title = fi.prog + '(' + fi.sect + ') man page'
+    else:
+        fi.title = fi.prog
+
+    if fi.want_manpage:
+        if not env_subs:
+            find_man_substitutions()
+        prog_ver = 'rsync ' + env_subs['VERSION']
+        if fi.prog != 'rsync':
+            prog_ver = fi.prog + ' from ' + prog_ver
+        fi.man_headings = (fi.prog, fi.sect, env_subs['date'], prog_ver, env_subs['prefix'])
+
+    with open(mdfn, 'r', encoding='utf-8') as fh:
+        txt = fh.read()
+
+    use_gfm_parser = '@USE_GFM_PARSER@' in txt
+    if use_gfm_parser:
+        txt = txt.replace('@USE_GFM_PARSER@', '')
+
+    if fi.want_manpage:
+        txt = (txt.replace('@VERSION@', env_subs['VERSION'])
+                  .replace('@BINDIR@', env_subs['bindir'])
+                  .replace('@LIBDIR@', env_subs['libdir']))
+
+    if use_gfm_parser:
+        if not gfm_parser:
+            die('Input file requires cmarkgfm parser:', mdfn)
+        fi.html_in = gfm_parser(txt)
+    else:
+        fi.html_in = md_parser(txt)
+    txt = None
+
+    TransformHtml(fi)
+
+    if args.test:
+        return
+
+    output_list = [ (fi.name + '.html', fi.html_out) ]
+    if fi.want_manpage:
+        output_list += [ (fi.name, fi.man_out) ]
+    for fn, txt in output_list:
+        if os.path.lexists(fn):
+            os.unlink(fn)
+        print("Wrote:", fn)
+        with open(fn, 'w', encoding='utf-8') as fh:
+            fh.write(txt)
  
-    if args.srcdir:
-        fi.srcdir = args.srcdir + '/'
-    elif not fi.srcdir:
-        fi.srcdir = './'
  
-    fi.title = fi.prog + '(' + fi.sect + ') man page'
-    fi.mtime = 0
+def find_man_substitutions():
+    srcdir = os.path.dirname(sys.argv[0]) + '/'
+    mtime = 0
  
-    git_dir = fi.srcdir + '.git'
+    git_dir = srcdir + '.git'
      if os.path.lexists(git_dir):
-        fi.mtime = int(subprocess.check_output(['git', '--git-dir', git_dir, 'log', '-1', '--format=%at']))
+        mtime = int(subprocess.check_output(['git', '--git-dir', git_dir, 'log', '-1', '--format=%at']))
  
-    env_subs = { 'prefix': os.environ.get('RSYNC_OVERRIDE_PREFIX', None) }
+    # Allow "prefix" to be overridden via the environment:
+    env_subs['prefix'] = os.environ.get('RSYNC_OVERRIDE_PREFIX', None)
  
      if args.test:
          env_subs['VERSION'] = '1.0.0'
          env_subs['bindir'] = '/usr/bin'
          env_subs['libdir'] = '/usr/lib/rsync'
      else:
-        for fn in (fi.srcdir + 'version.h', 'Makefile'):
+        for fn in (srcdir + 'version.h', 'Makefile'):
              try:
                  st = os.lstat(fn)
              except OSError:
-                die('Failed to find', fi.srcdir + fn)
-            if not fi.mtime:
-                fi.mtime = st.st_mtime
+                die('Failed to find', srcdir + fn)
+            if not mtime:
+                mtime = st.st_mtime
  
-        with open(fi.srcdir + 'version.h', 'r', encoding='utf-8') as fh:
+        with open(srcdir + 'version.h', 'r', encoding='utf-8') as fh:
              txt = fh.read()
          m = re.search(r'"(.+?)"', txt)
          env_subs['VERSION'] = m.group(1)
@@ -131,40 +215,14 @@ def main():
                  if var == 'srcdir':
                      break
  
-    fi.prog_ver = 'rsync ' + env_subs['VERSION']
-    if fi.prog != 'rsync':
-        fi.prog_ver = fi.prog + ' from ' + fi.prog_ver
-
-    with open(fi.fn, 'r', encoding='utf-8') as fh:
-        txt = fh.read()
-
-    txt = re.sub(r'@VERSION@', env_subs['VERSION'], txt)
-    txt = re.sub(r'@BINDIR@', env_subs['bindir'], txt)
-    txt = re.sub(r'@LIBDIR@', env_subs['libdir'], txt)
-
-    fi.html_in = md_parser(txt)
-    txt = None
-
-    fi.date = time.strftime('%d %b %Y', time.localtime(fi.mtime))
-    fi.man_headings = (fi.prog, fi.sect, fi.date, fi.prog_ver, env_subs['prefix'])
-
-    HtmlToManPage(fi)
-
-    if args.test:
-        print("The test was successful.")
-        return
-
-    for fn, txt in ((fi.name + '.html', fi.html_out), (fi.name, fi.man_out)):
-        print("Wrote:", fn)
-        with open(fn, 'w', encoding='utf-8') as fh:
-            fh.write(txt)
+    env_subs['date'] = time.strftime('%d %b %Y', time.localtime(mtime))
  
  
  def html_via_commonmark(txt):
      return commonmark.HtmlRenderer().render(commonmark.Parser().parse(txt))
  
  
-class HtmlToManPage(HTMLParser):
+class TransformHtml(HTMLParser):
      def __init__(self, fi):
          HTMLParser.__init__(self, convert_charrefs=True)
  
@@ -177,14 +235,23 @@ class HtmlToManPage(HTMLParser):
                  in_pre = False,
                  in_code = False,
                  html_out = [ HTML_START % fi.title ],
-                man_out = [ MAN_START % fi.man_headings ],
+                man_out = [ ],
                  txt = '',
+                want_manpage = fi.want_manpage,
                  )
  
+        if st.want_manpage:
+            st.man_out.append(MAN_START % fi.man_headings)
+
+        if '</table>' in fi.html_in:
+            st.html_out[0] = st.html_out[0].replace('</style>', TABLE_STYLE + '</style>')
+
          self.feed(fi.html_in)
          fi.html_in = None
  
-        st.html_out.append(HTML_END % fi.date)
+        if st.want_manpage:
+            st.html_out.append(MAN_HTML_END % env_subs['date'])
+        st.html_out.append(HTML_END)
          st.man_out.append(MAN_END)
  
          fi.html_out = ''.join(st.html_out)
@@ -232,8 +299,9 @@ class HtmlToManPage(HTMLParser):
          elif tag == 'strong' or tag == 'b':
              st.txt += BOLD_FONT[0]
          elif tag == 'em' or  tag == 'i':
-            tag = 'u' # Change it into underline to be more like the man page
-            st.txt += UNDR_FONT[0]
+            if st.want_manpage:
+                tag = 'u' # Change it into underline to be more like the man page
+                st.txt += UNDR_FONT[0]
          elif tag == 'ol':
              start = 1
              for var, val in attrs_list:
@@ -256,6 +324,10 @@ class HtmlToManPage(HTMLParser):
                  st.man_out.append(".RS\n")
                  st.p_macro = ".IP\n"
              st.list_state.append('o')
+        elif tag == 'hr':
+            st.man_out.append(".l\n")
+            st.html_out.append("<hr />")
+            return
          st.html_out.append('<' + tag + ''.join(' ' + var + '="' + htmlify(val) + '"' for var, val in attrs_list) + '>')
          st.at_first_tag_in_dd = False
  
@@ -300,8 +372,9 @@ class HtmlToManPage(HTMLParser):
          elif tag == 'strong' or tag == 'b':
              add_to_txt = NORM_FONT[0]
          elif tag == 'em' or  tag == 'i':
-            tag = 'u' # Change it into underline to be more like the man page
-            add_to_txt = NORM_FONT[0]
+            if st.want_manpage:
+                tag = 'u' # Change it into underline to be more like the man page
+                add_to_txt = NORM_FONT[0]
          elif tag == 'ol' or tag == 'ul':
              if st.list_state.pop() == 'dl':
                  tag = 'dl'
@@ -310,6 +383,8 @@ class HtmlToManPage(HTMLParser):
              else:
                  st.p_macro = ".P\n"
              st.at_first_tag_in_dd = False
+        elif tag == 'hr':
+            return
          st.html_out.append('</' + tag + '>')
          if add_to_txt:
              if txt is None:
@@ -379,22 +454,23 @@ def die(*msg):
  
  
  if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Transform a NAME.NUM.md markdown file into a NAME.NUM.html web page & a NAME.NUM man page.', add_help=False)
-    parser.add_argument('--srcdir', '-s', help='Specify the source dir if the input file is not in it.')
-    parser.add_argument('--test', action='store_true', help='Test if we can parse the input w/o updating any files.')
+    parser = argparse.ArgumentParser(description="Output html and (optionally) nroff for markdown pages.", add_help=False)
+    parser.add_argument('--test', action='store_true', help="Just test the parsing without outputting any files.")
      parser.add_argument('--debug', '-D', action='count', default=0, help='Output copious info on the html parsing. Repeat for even more.')
      parser.add_argument("--help", "-h", action="help", help="Output this help message and exit.")
-    parser.add_argument('mdfile', help="The NAME.NUM.md file to parse.")
+    parser.add_argument("mdfiles", nargs='+', help="The source .md files to convert.")
      args = parser.parse_args()
  
      try:
          import cmarkgfm
          md_parser = cmarkgfm.markdown_to_html
+        gfm_parser = cmarkgfm.github_flavored_markdown_to_html
      except:
          try:
              import commonmark
              md_parser = html_via_commonmark
          except:
              die("Failed to find cmarkgfm or commonmark for python3.")
+        gfm_parser = None
  
      main()
diff --git a/md2man b/md2man

new file mode 120000 (symlink)

index 0000000..5d1a8fc
--- /dev/null
+++ b/md2man
@@ -0,0 +1 @@
+md-convert
+\ No newline at end of file
diff --git a/packaging/md2html b/packaging/md2html

deleted file mode 100755 (executable)

index 21e42c6..0000000
--- a/packaging/md2html
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (C) 2020 Wayne Davison
-#
-# This program is freely redistributable.
-
-import os, re, argparse
-
-HTML_START = """\
-<html><head>
-<title>%s</title>
-<link href="https://fonts.googleapis.com/css2?family=Roboto&family=Roboto+Mono&display=swap" rel="stylesheet">
-<style>
-body {
-  max-width: 50em;
-  margin: auto;
-}
-body, b, strong, u {
-  font-family: 'Roboto', sans-serif;
-}
-code {
-  font-family: 'Roboto Mono', monospace;
-  font-weight: bold;
-}
-pre code {
-  display: block;
-  font-weight: normal;
-}
-blockquote pre code {
-  background: #f1f1f1;
-}
-dd p:first-of-type {
-  margin-block-start: 0em;
-}
-table {
-  border-color: grey;
-  border-spacing: 0;
-}
-tr {
-  border-top: 1px solid grey;
-}
-tr:nth-child(2n) {
-  background-color: #f6f8fa;
-}
-th, td {
-  border: 1px solid #dfe2e5;
-  text-align: center;
-  padding-left: 1em;
-  padding-right: 1em;
-}
-</style>
-</head><body>
-"""
-
-HTML_END = """\
-</body></html>
-"""
-
-md_parser = None
-
-def main():
-    for mdfn in args.mdfiles:
-        if not mdfn.endswith('.md'):
-            print('Ignoring non-md input file:', mdfn)
-            continue
-        title = re.sub(r'.*/', '', mdfn).replace('.md', '')
-        htfn = mdfn.replace('.md', '.html')
-
-        print("Parsing", mdfn, '->', htfn)
-
-        with open(mdfn, 'r', encoding='utf-8') as fh:
-            txt = fh.read()
-
-        txt = re.sub(r'\s--\s', '\xa0-- ', txt)
-
-        html = md_parser(txt)
-
-        html = re.sub(r'(?<!<pre>)(<code>)([\s\S]*?)(</code>)', lambda m: m[1] + re.sub(r'\s', '\xa0', m[2]) + m[3], html)
-        html = html.replace('--', '&#8209;&#8209;').replace("\xa0-", '&nbsp;&#8209;').replace("\xa0", '&nbsp;')
-        html = re.sub(r'(\W)-', r'\1&#8209;', html)
-
-        if os.path.lexists(htfn):
-            os.unlink(htfn)
-
-        with open(htfn, 'w', encoding='utf-8') as fh:
-            fh.write(HTML_START % title)
-            fh.write(html)
-            fh.write(HTML_END)
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Output html for md pages.', add_help=False)
-    parser.add_argument("--help", "-h", action="help", help="Output this help message and exit.")
-    parser.add_argument("mdfiles", nargs='+', help="The .md files to turn into .html files.")
-    args = parser.parse_args()
-
-    try:
-        import cmarkgfm
-        # Our NEWS.md file has a gfm table in it.
-        md_parser = cmarkgfm.github_flavored_markdown_to_html
-    except:
-        die("Failed to find cmarkgfm for python3.")
-
-    main()
diff --git a/packaging/release-rsync b/packaging/release-rsync

index fa1da234981f4aad2cf4a10d7329721a3e8dedd0..0ffc109503b3b8278a7f56b2272976074dbd83cd 100755 (executable)
--- a/packaging/release-rsync
+++ b/packaging/release-rsync
@@ -341,7 +341,7 @@ About to:
      md_files = 'README.md NEWS.md INSTALL.md'.split()
      html_files = [ fn for fn in gen_pathnames if fn.endswith('.html') ]
      cmd_chk(['rsync', '-a', *md_files, *html_files, dest])
-    cmd_chk(["packaging/md2html"] + [ dest +'/'+ fn for fn in md_files ])
+    cmd_chk(["./md-convert"] + [ dest +'/'+ fn for fn in md_files ])
  
      cmd_chk(f"git log --name-status | gzip -9 >{dest}/ChangeLog.gz")
author	Wayne Davison <wayne@opencoder.net>
	Mon, 27 Dec 2021 22:19:11 +0000 (14:19 -0800)
committer	Wayne Davison <wayne@opencoder.net>
	Mon, 27 Dec 2021 22:24:05 +0000 (14:24 -0800)
Makefile.in		patch \| blob \| history
NEWS.md		patch \| blob \| history
maybe-make-man		patch \| blob \| history
md-convert		patch \| blob \| history
md2man	[new symlink]	patch \| blob
packaging/md2html	[deleted file]	patch \| blob \| history
packaging/release-rsync		patch \| blob \| history