blob: 948e5e1ddcc58311df844cb423f978537c4e71d9 [file] [log] [blame]
Martin Moene61280e62017-08-29 21:34:43 +02001#!/usr/bin/env python
2
3#
4# updateDocumentToC.py
5#
6# Insert table of contents at top of Catch markdown documents.
7#
8# This script is distributed under the GNU General Public License v3.0
9#
10# It is based on markdown-toclify version 1.7.1 by Sebastian Raschka,
11# https://github.com/rasbt/markdown-toclify
12#
13
14from __future__ import print_function
15from scriptCommon import catchPath
16
17import argparse
18import glob
19import os
20import re
21import sys
22
23# Configuration:
24
25minTocEntries = 4
26
27headingExcludeDefault = [1,3,4,5] # use level 2 headers for at default
28headingExcludeRelease = [2,3,4,5] # use level 1 headers for release-notes.md
29
30documentsDefault = os.path.join(os.path.relpath(catchPath), 'docs/*.md')
31releaseNotesName = 'release-notes.md'
32
Martin Hořeňovský276393e2017-10-13 11:14:37 +020033contentTitle = '**Contents**'
Martin Moene61280e62017-08-29 21:34:43 +020034contentLineNo = 4
35contentLineNdx = contentLineNo - 1
36
37# End configuration
38
39VALIDS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-&'
40
41def readLines(in_file):
42 """Returns a list of lines from a input markdown file."""
43
44 with open(in_file, 'r') as inf:
45 in_contents = inf.read().split('\n')
46 return in_contents
47
48def removeLines(lines, remove=('[[back to top]', '<a class="mk-toclify"')):
49 """Removes existing [back to top] links and <a id> tags."""
50
51 if not remove:
52 return lines[:]
53
54 out = []
55 for l in lines:
56 if l.startswith(remove):
57 continue
58 out.append(l)
59 return out
60
61def removeToC(lines):
62 """Removes existing table of contents starting at index contentLineNdx."""
63 if not lines[contentLineNdx ].startswith(contentTitle):
64 return lines[:]
65
66 result_top = lines[:contentLineNdx]
67
68 pos = contentLineNdx + 1
69 while lines[pos].startswith('['):
70 pos = pos + 1
71
72 result_bottom = lines[pos + 1:]
73
74 return result_top + result_bottom
75
76def dashifyHeadline(line):
77 """
78 Takes a header line from a Markdown document and
79 returns a tuple of the
80 '#'-stripped version of the head line,
81 a string version for <a id=''></a> anchor tags,
82 and the level of the headline as integer.
83 E.g.,
84 >>> dashifyHeadline('### some header lvl3')
85 ('Some header lvl3', 'some-header-lvl3', 3)
86
87 """
88 stripped_right = line.rstrip('#')
89 stripped_both = stripped_right.lstrip('#')
90 level = len(stripped_right) - len(stripped_both)
91 stripped_wspace = stripped_both.strip()
92
93 # character replacements
94 replaced_colon = stripped_wspace.replace('.', '')
95 replaced_slash = replaced_colon.replace('/', '')
96 rem_nonvalids = ''.join([c if c in VALIDS
97 else '-' for c in replaced_slash])
98
99 lowered = rem_nonvalids.lower()
100 dashified = re.sub(r'(-)\1+', r'\1', lowered) # remove duplicate dashes
101 dashified = dashified.strip('-') # strip dashes from start and end
102
103 # exception '&' (double-dash in github)
104 dashified = dashified.replace('-&-', '--')
105
106 return [stripped_wspace, dashified, level]
107
108def tagAndCollect(lines, id_tag=True, back_links=False, exclude_h=None):
109 """
110 Gets headlines from the markdown document and creates anchor tags.
111
112 Keyword arguments:
113 lines: a list of sublists where every sublist
114 represents a line from a Markdown document.
115 id_tag: if true, creates inserts a the <a id> tags (not req. by GitHub)
116 back_links: if true, adds "back to top" links below each headline
117 exclude_h: header levels to exclude. E.g., [2, 3]
118 excludes level 2 and 3 headings.
119
120 Returns a tuple of 2 lists:
121 1st list:
122 A modified version of the input list where
123 <a id="some-header"></a> anchor tags where inserted
124 above the header lines (if github is False).
125
126 2nd list:
127 A list of 3-value sublists, where the first value
128 represents the heading, the second value the string
129 that was inserted assigned to the IDs in the anchor tags,
130 and the third value is an integer that reprents the headline level.
131 E.g.,
132 [['some header lvl3', 'some-header-lvl3', 3], ...]
133
134 """
135 out_contents = []
136 headlines = []
137 for l in lines:
138 saw_headline = False
139
140 orig_len = len(l)
141 l_stripped = l.lstrip()
142
143 if l_stripped.startswith(('# ', '## ', '### ', '#### ', '##### ', '###### ')):
144
145 # comply with new markdown standards
146
147 # not a headline if '#' not followed by whitespace '##no-header':
148 if not l.lstrip('#').startswith(' '):
149 continue
150 # not a headline if more than 6 '#':
151 if len(l) - len(l.lstrip('#')) > 6:
152 continue
153 # headers can be indented by at most 3 spaces:
154 if orig_len - len(l_stripped) > 3:
155 continue
156
157 # ignore empty headers
158 if not set(l) - {'#', ' '}:
159 continue
160
161 saw_headline = True
162 dashified = dashifyHeadline(l)
163
164 if not exclude_h or not dashified[-1] in exclude_h:
165 if id_tag:
166 id_tag = '<a class="mk-toclify" id="%s"></a>'\
167 % (dashified[1])
168 out_contents.append(id_tag)
169 headlines.append(dashified)
170
171 out_contents.append(l)
172 if back_links and saw_headline:
173 out_contents.append('[[back to top](#table-of-contents)]')
174 return out_contents, headlines
175
176def positioningHeadlines(headlines):
177 """
178 Strips unnecessary whitespaces/tabs if first header is not left-aligned
179 """
180 left_just = False
181 for row in headlines:
182 if row[-1] == 1:
183 left_just = True
184 break
185 if not left_just:
186 for row in headlines:
187 row[-1] -= 1
188 return headlines
189
190def createToc(headlines, hyperlink=True, top_link=False, no_toc_header=False):
191 """
192 Creates the table of contents from the headline list
193 that was returned by the tagAndCollect function.
194
195 Keyword Arguments:
196 headlines: list of lists
197 e.g., ['Some header lvl3', 'some-header-lvl3', 3]
198 hyperlink: Creates hyperlinks in Markdown format if True,
199 e.g., '- [Some header lvl1](#some-header-lvl1)'
200 top_link: if True, add a id tag for linking the table
201 of contents itself (for the back-to-top-links)
202 no_toc_header: suppresses TOC header if True.
203
204 Returns a list of headlines for a table of contents
205 in Markdown format,
206 e.g., [' - [Some header lvl3](#some-header-lvl3)', ...]
207
208 """
209 processed = []
210 if not no_toc_header:
211 if top_link:
212 processed.append('<a class="mk-toclify" id="table-of-contents"></a>\n')
Martin Hořeňovský276393e2017-10-13 11:14:37 +0200213 processed.append(contentTitle + '<br>')
Martin Moene61280e62017-08-29 21:34:43 +0200214
215 for line in headlines:
216 if hyperlink:
Martin Hořeňovský276393e2017-10-13 11:14:37 +0200217 item = '[%s](#%s)' % (line[0], line[1])
Martin Moene61280e62017-08-29 21:34:43 +0200218 else:
219 item = '%s- %s' % ((line[2]-1)*' ', line[0])
Martin Hořeňovský276393e2017-10-13 11:14:37 +0200220 processed.append(item + '<br>')
Martin Moene61280e62017-08-29 21:34:43 +0200221 processed.append('\n')
222 return processed
223
224def buildMarkdown(toc_headlines, body, spacer=0, placeholder=None):
225 """
226 Returns a string with the Markdown output contents incl.
227 the table of contents.
228
229 Keyword arguments:
230 toc_headlines: lines for the table of contents
231 as created by the createToc function.
232 body: contents of the Markdown file including
233 ID-anchor tags as returned by the
234 tagAndCollect function.
235 spacer: Adds vertical space after the table
236 of contents. Height in pixels.
237 placeholder: If a placeholder string is provided, the placeholder
238 will be replaced by the TOC instead of inserting the TOC at
239 the top of the document
240
241 """
242 if spacer:
243 spacer_line = ['\n<div style="height:%spx;"></div>\n' % (spacer)]
244 toc_markdown = "\n".join(toc_headlines + spacer_line)
245 else:
246 toc_markdown = "\n".join(toc_headlines)
247
248 if placeholder:
249 body_markdown = "\n".join(body)
250 markdown = body_markdown.replace(placeholder, toc_markdown)
251 else:
252 body_markdown_p1 = "\n".join(body[:contentLineNdx ]) + '\n'
253 body_markdown_p2 = "\n".join(body[ contentLineNdx:])
254 markdown = body_markdown_p1 + toc_markdown + body_markdown_p2
255
256 return markdown
257
258def outputMarkdown(markdown_cont, output_file):
259 """
260 Writes to an output file if `outfile` is a valid path.
261
262 """
263 if output_file:
264 with open(output_file, 'w') as out:
265 out.write(markdown_cont)
266
267def markdownToclify(
268 input_file,
269 output_file=None,
270 min_toc_len=2,
271 github=False,
272 back_to_top=False,
273 nolink=False,
274 no_toc_header=False,
275 spacer=0,
276 placeholder=None,
277 exclude_h=None):
278 """ Function to add table of contents to markdown files.
279
280 Parameters
281 -----------
282 input_file: str
283 Path to the markdown input file.
284
285 output_file: str (defaul: None)
286 Path to the markdown output file.
287
288 min_toc_len: int (default: 2)
289 Miniumum number of entries to create a table of contents for.
290
291 github: bool (default: False)
292 Uses GitHub TOC syntax if True.
293
294 back_to_top: bool (default: False)
295 Inserts back-to-top links below headings if True.
296
297 nolink: bool (default: False)
298 Creates the table of contents without internal links if True.
299
300 no_toc_header: bool (default: False)
301 Suppresses the Table of Contents header if True
302
303 spacer: int (default: 0)
304 Inserts horizontal space (in pixels) after the table of contents.
305
306 placeholder: str (default: None)
307 Inserts the TOC at the placeholder string instead
308 of inserting the TOC at the top of the document.
309
310 exclude_h: list (default None)
311 Excludes header levels, e.g., if [2, 3], ignores header
312 levels 2 and 3 in the TOC.
313
314 Returns
315 -----------
316 changed: Boolean
317 True if the file has been updated, False otherwise.
318
319 """
320 cleaned_contents = removeLines(
321 removeToC(readLines(input_file)),
322 remove=('[[back to top]', '<a class="mk-toclify"'))
323
324 processed_contents, raw_headlines = tagAndCollect(
325 cleaned_contents,
326 id_tag=not github,
327 back_links=back_to_top,
328 exclude_h=exclude_h)
329
330 # add table of contents?
331 if len(raw_headlines) < min_toc_len:
332 processed_headlines = []
333 else:
334 leftjustified_headlines = positioningHeadlines(raw_headlines)
335
336 processed_headlines = createToc(
337 leftjustified_headlines,
338 hyperlink=not nolink,
339 top_link=not nolink and not github,
340 no_toc_header=no_toc_header)
341
342 if nolink:
343 processed_contents = cleaned_contents
344
345 cont = buildMarkdown(
346 toc_headlines=processed_headlines,
347 body=processed_contents,
348 spacer=spacer,
349 placeholder=placeholder)
350
351 if output_file:
352 outputMarkdown(cont, output_file)
353
354def isReleaseNotes(f):
355 return os.path.basename(f) == releaseNotesName
356
357def excludeHeadingsFor(f):
358 return headingExcludeRelease if isReleaseNotes(f) else headingExcludeDefault
359
360def updateSingleDocumentToC(input_file, min_toc_len, verbose=False):
361 """Add or update table of contents in specified file. Return 1 if file changed, 0 otherwise."""
362 if verbose :
363 print( 'file: {}'.format(input_file))
364
365 output_file = input_file + '.tmp'
366
367 markdownToclify(
368 input_file=input_file,
369 output_file=output_file,
370 min_toc_len=min_toc_len,
371 github=True,
372 back_to_top=False,
373 nolink=False,
374 no_toc_header=False,
375 spacer=False,
376 placeholder=False,
377 exclude_h=excludeHeadingsFor(input_file))
378
379 # prevent race-condition (Python 3.3):
380 if sys.version_info >= (3, 3):
381 os.replace(output_file, input_file)
382 else:
383 os.remove(input_file)
384 os.rename(output_file, input_file)
385
386 return 1
387
388def updateDocumentToC(paths, min_toc_len, verbose):
389 """Add or update table of contents to specified paths. Return number of changed files"""
390 n = 0
391 for g in paths:
392 for f in glob.glob(g):
393 if os.path.isfile(f):
394 n = n + updateSingleDocumentToC(input_file=f, min_toc_len=min_toc_len, verbose=verbose)
395 return n
396
397def updateDocumentToCMain():
398 """Add or update table of contents to specified paths."""
399
400 parser = argparse.ArgumentParser(
401 description='Add or update table of contents in markdown documents.',
402 epilog="""""",
403 formatter_class=argparse.RawTextHelpFormatter)
404
405 parser.add_argument(
406 'Input',
407 metavar='file',
408 type=str,
409 nargs=argparse.REMAINDER,
410 help='files to process, at default: docs/*.md')
411
412 parser.add_argument(
413 '-v', '--verbose',
414 action='store_true',
415 help='report the name of the file being processed')
416
417 parser.add_argument(
418 '--min-toc-entries',
419 dest='minTocEntries',
420 default=minTocEntries,
421 type=int,
422 metavar='N',
423 help='the minimum number of entries to create a table of contents for [{deflt}]'.format(deflt=minTocEntries))
424
425 parser.add_argument(
426 '--remove-toc',
427 action='store_const',
428 dest='minTocEntries',
429 const=99,
430 help='remove all tables of contents')
431
432 args = parser.parse_args()
433
434 paths = args.Input if len(args.Input) > 0 else [documentsDefault]
435
436 changedFiles = updateDocumentToC(paths=paths, min_toc_len=args.minTocEntries, verbose=args.verbose)
437
438 if changedFiles > 0:
439 print( "Processed table of contents in " + str(changedFiles) + " file(s)" )
440 else:
441 print( "No table of contents added or updated" )
442
443if __name__ == '__main__':
444 updateDocumentToCMain()
445
446# end of file