Guido van Rossum | f06ee5f | 1996-11-27 19:52:01 +0000 | [diff] [blame] | 1 | #! /usr/bin/env python |
Guido van Rossum | 03d4c26 | 1995-01-04 19:21:44 +0000 | [diff] [blame] | 2 | |
Guido van Rossum | 694f701 | 1996-09-10 17:59:15 +0000 | [diff] [blame] | 3 | # A somewhat-generalized FAQ-to-HTML converter (by Ka-Ping Yee, 10 Sept 96) |
Guido van Rossum | 03d4c26 | 1995-01-04 19:21:44 +0000 | [diff] [blame] | 4 | |
Guido van Rossum | 694f701 | 1996-09-10 17:59:15 +0000 | [diff] [blame] | 5 | # Reads a text file given on standard input or named as first argument, and |
| 6 | # generates HTML 2.0 on standard output. Recognizes these constructions: |
| 7 | # |
| 8 | # HTML element pattern at the beginning of a line |
| 9 | # |
| 10 | # section heading (<number><period>)+<space> |
| 11 | # numbered list element <1-2 spaces>(<number><period>)+<space> |
| 12 | # unnumbered list element <0-2 spaces><hyphen or asterisk><space> |
| 13 | # preformatted section <more than two spaces> |
| 14 | # |
| 15 | # Heading level is determined by the number of (<number><period>) segments. |
| 16 | # Blank lines force a separation of elements; if none of the above four |
| 17 | # types is indicated, a new paragraph begins. A line beginning with many |
| 18 | # spaces is interpreted as a continuation (instead of preformatted) after |
| 19 | # a list element. Headings are anchored; paragraphs starting with "Q." are |
| 20 | # emphasized, and those marked with "A." get their first sentence emphasized. |
| 21 | # |
| 22 | # Hyperlinks are created from references to: |
| 23 | # URLs, explicitly marked using <URL:scheme://host...> |
| 24 | # other questions, of the form "question <number>(<period><number>)*" |
| 25 | # sections, of the form "section <number>". |
Guido van Rossum | 03d4c26 | 1995-01-04 19:21:44 +0000 | [diff] [blame] | 26 | |
Guido van Rossum | 694f701 | 1996-09-10 17:59:15 +0000 | [diff] [blame] | 27 | import sys, string, regex, regsub, regex_syntax |
| 28 | regex.set_syntax(regex_syntax.RE_SYNTAX_AWK) |
Guido van Rossum | 03d4c26 | 1995-01-04 19:21:44 +0000 | [diff] [blame] | 29 | |
Guido van Rossum | 694f701 | 1996-09-10 17:59:15 +0000 | [diff] [blame] | 30 | # --------------------------------------------------------- regular expressions |
| 31 | orditemprog = regex.compile(' ?([1-9][0-9]*\.)+ +') |
| 32 | itemprog = regex.compile(' ? ?[-*] +') |
| 33 | headingprog = regex.compile('([1-9][0-9]*\.)+ +') |
| 34 | prefmtprog = regex.compile(' ') |
| 35 | blankprog = regex.compile('^[ \t\r\n]$') |
| 36 | questionprog = regex.compile(' *Q\. +') |
| 37 | answerprog = regex.compile(' *A\. +') |
| 38 | sentprog = regex.compile('(([^.:;?!(]|[.:;?!][^ \t\r\n])+[.:;?!]?)') |
Guido van Rossum | 03d4c26 | 1995-01-04 19:21:44 +0000 | [diff] [blame] | 39 | |
Guido van Rossum | 694f701 | 1996-09-10 17:59:15 +0000 | [diff] [blame] | 40 | mailhdrprog = regex.compile('^(Subject|Newsgroups|Followup-To|From|Reply-To' |
| 41 | '|Approved|Archive-Name|Version|Last-Modified): +', regex.casefold) |
| 42 | urlprog = regex.compile('<URL:([^&]+)>') |
| 43 | addrprog = regex.compile('<([^>@:]+@[^&@:]+)>') |
| 44 | qrefprog = regex.compile('question +([1-9](\.[0-9]+)*)') |
| 45 | srefprog = regex.compile('section +([1-9][0-9]*)') |
| 46 | entityprog = regex.compile('[&<>]') |
Guido van Rossum | 03d4c26 | 1995-01-04 19:21:44 +0000 | [diff] [blame] | 47 | |
Guido van Rossum | 694f701 | 1996-09-10 17:59:15 +0000 | [diff] [blame] | 48 | # ------------------------------------------------------------ global variables |
| 49 | body = [] |
| 50 | ollev = ullev = 0 |
| 51 | element = content = secnum = version = '' |
Guido van Rossum | 03d4c26 | 1995-01-04 19:21:44 +0000 | [diff] [blame] | 52 | |
Guido van Rossum | 694f701 | 1996-09-10 17:59:15 +0000 | [diff] [blame] | 53 | # ----------------------------------------------------- for making nested lists |
| 54 | def dnol(): |
| 55 | global body, ollev |
| 56 | ollev = ollev + 1 |
| 57 | if body[-1] == '</li>': del body[-1] |
| 58 | body.append('<ol>') |
Guido van Rossum | 03d4c26 | 1995-01-04 19:21:44 +0000 | [diff] [blame] | 59 | |
Guido van Rossum | 694f701 | 1996-09-10 17:59:15 +0000 | [diff] [blame] | 60 | def upol(): |
| 61 | global body, ollev |
| 62 | ollev = ollev - 1 |
| 63 | body.append(ollev and '</ol></li>' or '</ol>') |
Guido van Rossum | 03d4c26 | 1995-01-04 19:21:44 +0000 | [diff] [blame] | 64 | |
Guido van Rossum | 694f701 | 1996-09-10 17:59:15 +0000 | [diff] [blame] | 65 | # --------------------------------- output one element and convert its contents |
| 66 | def spew(clearol=0, clearul=0): |
| 67 | global content, body, ollev, ullev |
Guido van Rossum | 03d4c26 | 1995-01-04 19:21:44 +0000 | [diff] [blame] | 68 | |
Guido van Rossum | 694f701 | 1996-09-10 17:59:15 +0000 | [diff] [blame] | 69 | if content: |
| 70 | if entityprog.search(content) > -1: |
| 71 | content = regsub.gsub('&', '&', content) |
| 72 | content = regsub.gsub('<', '<', content) |
| 73 | content = regsub.gsub('>', '>', content) |
Guido van Rossum | 03d4c26 | 1995-01-04 19:21:44 +0000 | [diff] [blame] | 74 | |
Guido van Rossum | 694f701 | 1996-09-10 17:59:15 +0000 | [diff] [blame] | 75 | n = questionprog.match(content) |
| 76 | if n > 0: |
| 77 | content = '<em>' + content[n:] + '</em>' |
| 78 | if ollev: # question reference in index |
| 79 | fragid = regsub.gsub('^ +|\.? +$', '', secnum) |
| 80 | content = '<a href="#%s">%s</a>' % (fragid, content) |
Guido van Rossum | 03d4c26 | 1995-01-04 19:21:44 +0000 | [diff] [blame] | 81 | |
Guido van Rossum | 694f701 | 1996-09-10 17:59:15 +0000 | [diff] [blame] | 82 | if element[0] == 'h': # heading in the main text |
| 83 | fragid = regsub.gsub('^ +|\.? +$', '', secnum) |
| 84 | content = secnum + '<a name="%s">%s</a>' % (fragid, content) |
Guido van Rossum | 03d4c26 | 1995-01-04 19:21:44 +0000 | [diff] [blame] | 85 | |
Guido van Rossum | 694f701 | 1996-09-10 17:59:15 +0000 | [diff] [blame] | 86 | n = answerprog.match(content) |
| 87 | if n > 0: # answer paragraph |
| 88 | content = regsub.sub(sentprog, '<strong>\\1</strong>', content[n:]) |
Guido van Rossum | 03d4c26 | 1995-01-04 19:21:44 +0000 | [diff] [blame] | 89 | |
Guido van Rossum | 694f701 | 1996-09-10 17:59:15 +0000 | [diff] [blame] | 90 | body.append('<' + element + '>' + content) |
| 91 | body.append('</' + element + '>') |
| 92 | content = '' |
| 93 | |
| 94 | while clearol and ollev: upol() |
| 95 | if clearul and ullev: body.append('</ul>'); ullev = 0 |
| 96 | |
| 97 | # ---------------------------------------------------------------- main program |
| 98 | faq = len(sys.argv)>1 and sys.argv[1] and open(sys.argv[1]) or sys.stdin |
| 99 | lines = faq.readlines() |
| 100 | |
| 101 | for line in lines: |
| 102 | if line[2:9] == '=======': # <hr> will appear *before* |
| 103 | body.append('<hr>') # the underlined heading |
| 104 | continue |
| 105 | |
| 106 | n = orditemprog.match(line) |
| 107 | if n > 0: # make ordered list item |
| 108 | spew(0, 'clear ul') |
| 109 | secnum = line[:n] |
| 110 | level = string.count(secnum, '.') |
| 111 | while level > ollev: dnol() |
| 112 | while level < ollev: upol() |
| 113 | element, content = 'li', line[n:] |
| 114 | continue |
| 115 | |
| 116 | n = itemprog.match(line) |
| 117 | if n > 0: # make unordered list item |
| 118 | spew('clear ol', 0) |
| 119 | if ullev == 0: body.append('<ul>'); ullev = 1 |
| 120 | element, content = 'li', line[n:] |
| 121 | continue |
| 122 | |
| 123 | n = headingprog.match(line) |
| 124 | if n > 0: # make heading element |
| 125 | spew('clear ol', 'clear ul') |
| 126 | secnum = line[:n] |
| 127 | sys.stderr.write(line) |
| 128 | element, content = 'h%d' % string.count(secnum, '.'), line[n:] |
| 129 | continue |
| 130 | |
| 131 | n = 0 |
| 132 | if not secnum: # haven't hit body yet |
| 133 | n = mailhdrprog.match(line) |
| 134 | v = version and -1 or regex.match('Version: ', line) |
| 135 | if v > 0 and not version: version = line[v:] |
| 136 | if n <= 0 and element != 'li': # not pre if after a list item |
| 137 | n = prefmtprog.match(line) |
| 138 | if n > 0: # make preformatted element |
| 139 | if element == 'pre': |
| 140 | content = content + line |
| 141 | else: |
| 142 | spew('clear ol', 'clear ul') |
| 143 | element, content = 'pre', line |
| 144 | continue |
| 145 | |
| 146 | if blankprog.match(line) > 0: # force a new element |
| 147 | spew() |
| 148 | element = '' |
| 149 | elif element: # continue current element |
| 150 | content = content + line |
| 151 | else: # no element; make paragraph |
| 152 | spew('clear ol', 'clear ul') |
| 153 | element, content = 'p', line |
| 154 | |
| 155 | spew() # output last element |
| 156 | |
| 157 | body = string.joinfields(body, '') |
| 158 | body = regsub.gsub(urlprog, '<a href="\\1">\\1</a>', body) |
| 159 | body = regsub.gsub(addrprog, '<a href="mailto:\\1">\\1</a>', body) |
| 160 | body = regsub.gsub(qrefprog, '<a href="#\\1">question \\1</a>', body) |
| 161 | body = regsub.gsub(srefprog, '<a href="#\\1">section \\1</a>', body) |
| 162 | |
| 163 | print '<!doctype html public "-//IETF//DTD HTML 2.0//EN"><html>' |
| 164 | print '<head><title>Python Frequently-Asked Questions v' + version |
| 165 | print "</title></head><body>(This file was generated using Ping's" |
| 166 | print '<a href="faq2html.py">faq2html.py</a>.)' |
| 167 | print body + '</body></html>' |