Blame - scripts/updateDocumentToC.py - platform/external/catch2

blob: 948e5e1ddcc58311df844cb423f978537c4e71d9 [file] [log] [blame]

Martin Moene	61280e6	2017-08-29 21:34:43 +0200	[diff] [blame]	1	#!/usr/bin/env python
				2
				3	#
				4	# updateDocumentToC.py
				5	#
				6	# Insert table of contents at top of Catch markdown documents.
				7	#
				8	# This script is distributed under the GNU General Public License v3.0
				9	#
				10	# It is based on markdown-toclify version 1.7.1 by Sebastian Raschka,
				11	# https://github.com/rasbt/markdown-toclify
				12	#
				13
				14	from __future__ import print_function
				15	from scriptCommon import catchPath
				16
				17	import argparse
				18	import glob
				19	import os
				20	import re
				21	import sys
				22
				23	# Configuration:
				24
				25	minTocEntries = 4
				26
				27	headingExcludeDefault = [1,3,4,5] # use level 2 headers for at default
				28	headingExcludeRelease = [2,3,4,5] # use level 1 headers for release-notes.md
				29
				30	documentsDefault = os.path.join(os.path.relpath(catchPath), 'docs/*.md')
				31	releaseNotesName = 'release-notes.md'
				32
Martin Hořeňovský	276393e	2017-10-13 11:14:37 +0200	[diff] [blame]	33	contentTitle = 'Contents'
Martin Moene	61280e6	2017-08-29 21:34:43 +0200	[diff] [blame]	34	contentLineNo = 4
				35	contentLineNdx = contentLineNo - 1
				36
				37	# End configuration
				38
				39	VALIDS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-&'
				40
				41	def readLines(in_file):
				42	"""Returns a list of lines from a input markdown file."""
				43
				44	with open(in_file, 'r') as inf:
				45	in_contents = inf.read().split('\n')
				46	return in_contents
				47
				48	def removeLines(lines, remove=('[[back to top]', '<a class="mk-toclify"')):
				49	"""Removes existing [back to top] links and <a id> tags."""
				50
				51	if not remove:
				52	return lines[:]
				53
				54	out = []
				55	for l in lines:
				56	if l.startswith(remove):
				57	continue
				58	out.append(l)
				59	return out
				60
				61	def removeToC(lines):
				62	"""Removes existing table of contents starting at index contentLineNdx."""
				63	if not lines[contentLineNdx ].startswith(contentTitle):
				64	return lines[:]
				65
				66	result_top = lines[:contentLineNdx]
				67
				68	pos = contentLineNdx + 1
				69	while lines[pos].startswith('['):
				70	pos = pos + 1
				71
				72	result_bottom = lines[pos + 1:]
				73
				74	return result_top + result_bottom
				75
				76	def dashifyHeadline(line):
				77	"""
				78	Takes a header line from a Markdown document and
				79	returns a tuple of the
				80	'#'-stripped version of the head line,
				81	a string version for <a id=''></a> anchor tags,
				82	and the level of the headline as integer.
				83	E.g.,
				84	>>> dashifyHeadline('### some header lvl3')
				85	('Some header lvl3', 'some-header-lvl3', 3)
				86
				87	"""
				88	stripped_right = line.rstrip('#')
				89	stripped_both = stripped_right.lstrip('#')
				90	level = len(stripped_right) - len(stripped_both)
				91	stripped_wspace = stripped_both.strip()
				92
				93	# character replacements
				94	replaced_colon = stripped_wspace.replace('.', '')
				95	replaced_slash = replaced_colon.replace('/', '')
				96	rem_nonvalids = ''.join([c if c in VALIDS
				97	else '-' for c in replaced_slash])
				98
				99	lowered = rem_nonvalids.lower()
				100	dashified = re.sub(r'(-)\1+', r'\1', lowered) # remove duplicate dashes
				101	dashified = dashified.strip('-') # strip dashes from start and end
				102
				103	# exception '&' (double-dash in github)
				104	dashified = dashified.replace('-&-', '--')
				105
				106	return [stripped_wspace, dashified, level]
				107
				108	def tagAndCollect(lines, id_tag=True, back_links=False, exclude_h=None):
				109	"""
				110	Gets headlines from the markdown document and creates anchor tags.
				111
				112	Keyword arguments:
				113	lines: a list of sublists where every sublist
				114	represents a line from a Markdown document.
				115	id_tag: if true, creates inserts a the <a id> tags (not req. by GitHub)
				116	back_links: if true, adds "back to top" links below each headline
				117	exclude_h: header levels to exclude. E.g., [2, 3]
				118	excludes level 2 and 3 headings.
				119
				120	Returns a tuple of 2 lists:
				121	1st list:
				122	A modified version of the input list where
				123	<a id="some-header"></a> anchor tags where inserted
				124	above the header lines (if github is False).
				125
				126	2nd list:
				127	A list of 3-value sublists, where the first value
				128	represents the heading, the second value the string
				129	that was inserted assigned to the IDs in the anchor tags,
				130	and the third value is an integer that reprents the headline level.
				131	E.g.,
				132	[['some header lvl3', 'some-header-lvl3', 3], ...]
				133
				134	"""
				135	out_contents = []
				136	headlines = []
				137	for l in lines:
				138	saw_headline = False
				139
				140	orig_len = len(l)
				141	l_stripped = l.lstrip()
				142
				143	if l_stripped.startswith(('# ', '## ', '### ', '#### ', '##### ', '###### ')):
				144
				145	# comply with new markdown standards
				146
				147	# not a headline if '#' not followed by whitespace '##no-header':
				148	if not l.lstrip('#').startswith(' '):
				149	continue
				150	# not a headline if more than 6 '#':
				151	if len(l) - len(l.lstrip('#')) > 6:
				152	continue
				153	# headers can be indented by at most 3 spaces:
				154	if orig_len - len(l_stripped) > 3:
				155	continue
				156
				157	# ignore empty headers
				158	if not set(l) - {'#', ' '}:
				159	continue
				160
				161	saw_headline = True
				162	dashified = dashifyHeadline(l)
				163
				164	if not exclude_h or not dashified[-1] in exclude_h:
				165	if id_tag:
				166	id_tag = '<a class="mk-toclify" id="%s"></a>'\
				167	% (dashified[1])
				168	out_contents.append(id_tag)
				169	headlines.append(dashified)
				170
				171	out_contents.append(l)
				172	if back_links and saw_headline:
				173	out_contents.append('[[back to top](#table-of-contents)]')
				174	return out_contents, headlines
				175
				176	def positioningHeadlines(headlines):
				177	"""
				178	Strips unnecessary whitespaces/tabs if first header is not left-aligned
				179	"""
				180	left_just = False
				181	for row in headlines:
				182	if row[-1] == 1:
				183	left_just = True
				184	break
				185	if not left_just:
				186	for row in headlines:
				187	row[-1] -= 1
				188	return headlines
				189
				190	def createToc(headlines, hyperlink=True, top_link=False, no_toc_header=False):
				191	"""
				192	Creates the table of contents from the headline list
				193	that was returned by the tagAndCollect function.
				194
				195	Keyword Arguments:
				196	headlines: list of lists
				197	e.g., ['Some header lvl3', 'some-header-lvl3', 3]
				198	hyperlink: Creates hyperlinks in Markdown format if True,
				199	e.g., '- [Some header lvl1](#some-header-lvl1)'
				200	top_link: if True, add a id tag for linking the table
				201	of contents itself (for the back-to-top-links)
				202	no_toc_header: suppresses TOC header if True.
				203
				204	Returns a list of headlines for a table of contents
				205	in Markdown format,
				206	e.g., [' - [Some header lvl3](#some-header-lvl3)', ...]
				207
				208	"""
				209	processed = []
				210	if not no_toc_header:
				211	if top_link:
				212	processed.append('<a class="mk-toclify" id="table-of-contents"></a>\n')
Martin Hořeňovský	276393e	2017-10-13 11:14:37 +0200	[diff] [blame]	213	processed.append(contentTitle + '<br>')
Martin Moene	61280e6	2017-08-29 21:34:43 +0200	[diff] [blame]	214
				215	for line in headlines:
				216	if hyperlink:
Martin Hořeňovský	276393e	2017-10-13 11:14:37 +0200	[diff] [blame]	217	item = '[%s](#%s)' % (line[0], line[1])
Martin Moene	61280e6	2017-08-29 21:34:43 +0200	[diff] [blame]	218	else:
				219	item = '%s- %s' % ((line[2]-1)*' ', line[0])
Martin Hořeňovský	276393e	2017-10-13 11:14:37 +0200	[diff] [blame]	220	processed.append(item + '<br>')
Martin Moene	61280e6	2017-08-29 21:34:43 +0200	[diff] [blame]	221	processed.append('\n')
				222	return processed
				223
				224	def buildMarkdown(toc_headlines, body, spacer=0, placeholder=None):
				225	"""
				226	Returns a string with the Markdown output contents incl.
				227	the table of contents.
				228
				229	Keyword arguments:
				230	toc_headlines: lines for the table of contents
				231	as created by the createToc function.
				232	body: contents of the Markdown file including
				233	ID-anchor tags as returned by the
				234	tagAndCollect function.
				235	spacer: Adds vertical space after the table
				236	of contents. Height in pixels.
				237	placeholder: If a placeholder string is provided, the placeholder
				238	will be replaced by the TOC instead of inserting the TOC at
				239	the top of the document
				240
				241	"""
				242	if spacer:
				243	spacer_line = ['\n<div style="height:%spx;"></div>\n' % (spacer)]
				244	toc_markdown = "\n".join(toc_headlines + spacer_line)
				245	else:
				246	toc_markdown = "\n".join(toc_headlines)
				247
				248	if placeholder:
				249	body_markdown = "\n".join(body)
				250	markdown = body_markdown.replace(placeholder, toc_markdown)
				251	else:
				252	body_markdown_p1 = "\n".join(body[:contentLineNdx ]) + '\n'
				253	body_markdown_p2 = "\n".join(body[ contentLineNdx:])
				254	markdown = body_markdown_p1 + toc_markdown + body_markdown_p2
				255
				256	return markdown
				257
				258	def outputMarkdown(markdown_cont, output_file):
				259	"""
				260	Writes to an output file if `outfile` is a valid path.
				261
				262	"""
				263	if output_file:
				264	with open(output_file, 'w') as out:
				265	out.write(markdown_cont)
				266
				267	def markdownToclify(
				268	input_file,
				269	output_file=None,
				270	min_toc_len=2,
				271	github=False,
				272	back_to_top=False,
				273	nolink=False,
				274	no_toc_header=False,
				275	spacer=0,
				276	placeholder=None,
				277	exclude_h=None):
				278	""" Function to add table of contents to markdown files.
				279
				280	Parameters
				281	-----------
				282	input_file: str
				283	Path to the markdown input file.
				284
				285	output_file: str (defaul: None)
				286	Path to the markdown output file.
				287
				288	min_toc_len: int (default: 2)
				289	Miniumum number of entries to create a table of contents for.
				290
				291	github: bool (default: False)
				292	Uses GitHub TOC syntax if True.
				293
				294	back_to_top: bool (default: False)
				295	Inserts back-to-top links below headings if True.
				296
				297	nolink: bool (default: False)
				298	Creates the table of contents without internal links if True.
				299
				300	no_toc_header: bool (default: False)
				301	Suppresses the Table of Contents header if True
				302
				303	spacer: int (default: 0)
				304	Inserts horizontal space (in pixels) after the table of contents.
				305
				306	placeholder: str (default: None)
				307	Inserts the TOC at the placeholder string instead
				308	of inserting the TOC at the top of the document.
				309
				310	exclude_h: list (default None)
				311	Excludes header levels, e.g., if [2, 3], ignores header
				312	levels 2 and 3 in the TOC.
				313
				314	Returns
				315	-----------
				316	changed: Boolean
				317	True if the file has been updated, False otherwise.
				318
				319	"""
				320	cleaned_contents = removeLines(
				321	removeToC(readLines(input_file)),
				322	remove=('[[back to top]', '<a class="mk-toclify"'))
				323
				324	processed_contents, raw_headlines = tagAndCollect(
				325	cleaned_contents,
				326	id_tag=not github,
				327	back_links=back_to_top,
				328	exclude_h=exclude_h)
				329
				330	# add table of contents?
				331	if len(raw_headlines) < min_toc_len:
				332	processed_headlines = []
				333	else:
				334	leftjustified_headlines = positioningHeadlines(raw_headlines)
				335
				336	processed_headlines = createToc(
				337	leftjustified_headlines,
				338	hyperlink=not nolink,
				339	top_link=not nolink and not github,
				340	no_toc_header=no_toc_header)
				341
				342	if nolink:
				343	processed_contents = cleaned_contents
				344
				345	cont = buildMarkdown(
				346	toc_headlines=processed_headlines,
				347	body=processed_contents,
				348	spacer=spacer,
				349	placeholder=placeholder)
				350
				351	if output_file:
				352	outputMarkdown(cont, output_file)
				353
				354	def isReleaseNotes(f):
				355	return os.path.basename(f) == releaseNotesName
				356
				357	def excludeHeadingsFor(f):
				358	return headingExcludeRelease if isReleaseNotes(f) else headingExcludeDefault
				359
				360	def updateSingleDocumentToC(input_file, min_toc_len, verbose=False):
				361	"""Add or update table of contents in specified file. Return 1 if file changed, 0 otherwise."""
				362	if verbose :
				363	print( 'file: {}'.format(input_file))
				364
				365	output_file = input_file + '.tmp'
				366
				367	markdownToclify(
				368	input_file=input_file,
				369	output_file=output_file,
				370	min_toc_len=min_toc_len,
				371	github=True,
				372	back_to_top=False,
				373	nolink=False,
				374	no_toc_header=False,
				375	spacer=False,
				376	placeholder=False,
				377	exclude_h=excludeHeadingsFor(input_file))
				378
				379	# prevent race-condition (Python 3.3):
				380	if sys.version_info >= (3, 3):
				381	os.replace(output_file, input_file)
				382	else:
				383	os.remove(input_file)
				384	os.rename(output_file, input_file)
				385
				386	return 1
				387
				388	def updateDocumentToC(paths, min_toc_len, verbose):
				389	"""Add or update table of contents to specified paths. Return number of changed files"""
				390	n = 0
				391	for g in paths:
				392	for f in glob.glob(g):
				393	if os.path.isfile(f):
				394	n = n + updateSingleDocumentToC(input_file=f, min_toc_len=min_toc_len, verbose=verbose)
				395	return n
				396
				397	def updateDocumentToCMain():
				398	"""Add or update table of contents to specified paths."""
				399
				400	parser = argparse.ArgumentParser(
				401	description='Add or update table of contents in markdown documents.',
				402	epilog="""""",
				403	formatter_class=argparse.RawTextHelpFormatter)
				404
				405	parser.add_argument(
				406	'Input',
				407	metavar='file',
				408	type=str,
				409	nargs=argparse.REMAINDER,
				410	help='files to process, at default: docs/*.md')
				411
				412	parser.add_argument(
				413	'-v', '--verbose',
				414	action='store_true',
				415	help='report the name of the file being processed')
				416
				417	parser.add_argument(
				418	'--min-toc-entries',
				419	dest='minTocEntries',
				420	default=minTocEntries,
				421	type=int,
				422	metavar='N',
				423	help='the minimum number of entries to create a table of contents for [{deflt}]'.format(deflt=minTocEntries))
				424
				425	parser.add_argument(
				426	'--remove-toc',
				427	action='store_const',
				428	dest='minTocEntries',
				429	const=99,
				430	help='remove all tables of contents')
				431
				432	args = parser.parse_args()
				433
				434	paths = args.Input if len(args.Input) > 0 else [documentsDefault]
				435
				436	changedFiles = updateDocumentToC(paths=paths, min_toc_len=args.minTocEntries, verbose=args.verbose)
				437
				438	if changedFiles > 0:
				439	print( "Processed table of contents in " + str(changedFiles) + " file(s)" )
				440	else:
				441	print( "No table of contents added or updated" )
				442
				443	if __name__ == '__main__':
				444	updateDocumentToCMain()
				445
				446	# end of file