Blame - tools/generate-notice-files.py - platform/build

blob: adbf7c231568ad41bd138f90befa8d43c5d031cb [file] [log] [blame]

Daniel Berlin	f5a97d7	2012-03-29 10:33:19 -0400	[diff] [blame]	1	#!/usr/bin/env python
				2	#
				3	# Copyright (C) 2012 The Android Open Source Project
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the "License");
				6	# you may not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# http://www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an "AS IS" BASIS,
				13	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16	"""
Jaekyun Seok	3b7560b	2017-04-19 15:26:47 +0900	[diff] [blame]	17	Usage: generate-notice-files --text-output [plain text output file] \
				18	--html-output [html output file] \
				19	--xml-output [xml output file] \
				20	-t [file title] -s [directory of notices]
Daniel Berlin	f5a97d7	2012-03-29 10:33:19 -0400	[diff] [blame]	21
				22	Generate the Android notice files, including both text and html files.
				23
				24	-h to display this usage message and exit.
				25	"""
				26	from collections import defaultdict
Jaekyun Seok	3b7560b	2017-04-19 15:26:47 +0900	[diff] [blame]	27	import argparse
Daniel Berlin	f5a97d7	2012-03-29 10:33:19 -0400	[diff] [blame]	28	import hashlib
				29	import itertools
				30	import os
				31	import os.path
				32	import re
				33	import sys
				34
				35	MD5_BLOCKSIZE = 1024 * 1024
				36	HTML_ESCAPE_TABLE = {
				37	"&": "&",
				38	'"': """,
				39	"'": "'",
				40	">": ">",
				41	"<": "<",
				42	}
				43
Daniel Berlin	f5a97d7	2012-03-29 10:33:19 -0400	[diff] [blame]	44	def hexify(s):
				45	return ("%02x"*len(s)) % tuple(map(ord, s))
				46
				47	def md5sum(filename):
				48	"""Calculate an MD5 of the file given by FILENAME,
				49	and return hex digest as a string.
				50	Output should be compatible with md5sum command"""
				51
				52	f = open(filename, "rb")
				53	sum = hashlib.md5()
				54	while 1:
				55	block = f.read(MD5_BLOCKSIZE)
				56	if not block:
				57	break
				58	sum.update(block)
				59	f.close()
				60	return hexify(sum.digest())
				61
				62
				63	def html_escape(text):
				64	"""Produce entities within text."""
				65	return "".join(HTML_ESCAPE_TABLE.get(c,c) for c in text)
				66
				67	HTML_OUTPUT_CSS="""
				68	<style type="text/css">
				69	body { padding: 0; font-family: sans-serif; }
				70	.same-license { background-color: #eeeeee; border-top: 20px solid white; padding: 10px; }
				71	.label { font-weight: bold; }
				72	.file-list { margin-left: 1em; color: blue; }
				73	</style>
				74	"""
				75
				76	def combine_notice_files_html(file_hash, input_dir, output_filename):
				77	"""Combine notice files in FILE_HASH and output a HTML version to OUTPUT_FILENAME."""
				78
				79	SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt")
				80
				81	# Set up a filename to row id table (anchors inside tables don't work in
				82	# most browsers, but href's to table row ids do)
				83	id_table = {}
				84	id_count = 0
Dan Willemsen	8ae4984	2015-11-10 12:37:23 -0800	[diff] [blame]	85	for value in file_hash:
Daniel Berlin	f5a97d7	2012-03-29 10:33:19 -0400	[diff] [blame]	86	for filename in value:
				87	id_table[filename] = id_count
				88	id_count += 1
				89
				90	# Open the output file, and output the header pieces
				91	output_file = open(output_filename, "wb")
				92
				93	print >> output_file, "<html><head>"
				94	print >> output_file, HTML_OUTPUT_CSS
				95	print >> output_file, '</head><body topmargin="0" leftmargin="0" rightmargin="0" bottommargin="0">'
				96
				97	# Output our table of contents
				98	print >> output_file, '<div class="toc">'
				99	print >> output_file, "<ul>"
				100
				101	# Flatten the list of lists into a single list of filenames
Dan Willemsen	8ae4984	2015-11-10 12:37:23 -0800	[diff] [blame]	102	sorted_filenames = sorted(itertools.chain.from_iterable(file_hash))
Daniel Berlin	f5a97d7	2012-03-29 10:33:19 -0400	[diff] [blame]	103
				104	# Print out a nice table of contents
				105	for filename in sorted_filenames:
				106	stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename)
				107	print >> output_file, '<li><a href="#id%d">%s</a></li>' % (id_table.get(filename), stripped_filename)
				108
				109	print >> output_file, "</ul>"
				110	print >> output_file, "</div><!-- table of contents -->"
				111	# Output the individual notice file lists
				112	print >>output_file, '<table cellpadding="0" cellspacing="0" border="0">'
Dan Willemsen	8ae4984	2015-11-10 12:37:23 -0800	[diff] [blame]	113	for value in file_hash:
Daniel Berlin	f5a97d7	2012-03-29 10:33:19 -0400	[diff] [blame]	114	print >> output_file, '<tr id="id%d"><td class="same-license">' % id_table.get(value[0])
				115	print >> output_file, '<div class="label">Notices for file(s):</div>'
				116	print >> output_file, '<div class="file-list">'
Dan Willemsen	8ae4984	2015-11-10 12:37:23 -0800	[diff] [blame]	117	for filename in value:
Daniel Berlin	f5a97d7	2012-03-29 10:33:19 -0400	[diff] [blame]	118	print >> output_file, "%s <br/>" % (SRC_DIR_STRIP_RE.sub(r"\1", filename))
				119	print >> output_file, "</div><!-- file-list -->"
				120	print >> output_file
				121	print >> output_file, '<pre class="license-text">'
				122	print >> output_file, html_escape(open(value[0]).read())
				123	print >> output_file, "</pre><!-- license-text -->"
				124	print >> output_file, "</td></tr><!-- same-license -->"
				125	print >> output_file
				126	print >> output_file
				127	print >> output_file
				128
				129	# Finish off the file output
				130	print >> output_file, "</table>"
				131	print >> output_file, "</body></html>"
				132	output_file.close()
				133
				134	def combine_notice_files_text(file_hash, input_dir, output_filename, file_title):
				135	"""Combine notice files in FILE_HASH and output a text version to OUTPUT_FILENAME."""
				136
				137	SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt")
				138	output_file = open(output_filename, "wb")
				139	print >> output_file, file_title
Dan Willemsen	8ae4984	2015-11-10 12:37:23 -0800	[diff] [blame]	140	for value in file_hash:
Daniel Berlin	f5a97d7	2012-03-29 10:33:19 -0400	[diff] [blame]	141	print >> output_file, "============================================================"
				142	print >> output_file, "Notices for file(s):"
Dan Willemsen	8ae4984	2015-11-10 12:37:23 -0800	[diff] [blame]	143	for filename in value:
Daniel Berlin	f5a97d7	2012-03-29 10:33:19 -0400	[diff] [blame]	144	print >> output_file, SRC_DIR_STRIP_RE.sub(r"\1", filename)
				145	print >> output_file, "------------------------------------------------------------"
				146	print >> output_file, open(value[0]).read()
				147	output_file.close()
				148
Jaekyun Seok	3b7560b	2017-04-19 15:26:47 +0900	[diff] [blame]	149	def combine_notice_files_xml(files_with_same_hash, input_dir, output_filename):
				150	"""Combine notice files in FILE_HASH and output a XML version to OUTPUT_FILENAME."""
				151
				152	SRC_DIR_STRIP_RE = re.compile(input_dir + "(/.*).txt")
				153
				154	# Set up a filename to row id table (anchors inside tables don't work in
				155	# most browsers, but href's to table row ids do)
				156	id_table = {}
				157	for file_key in files_with_same_hash.keys():
				158	for filename in files_with_same_hash[file_key]:
				159	id_table[filename] = file_key
				160
				161	# Open the output file, and output the header pieces
				162	output_file = open(output_filename, "wb")
				163
				164	print >> output_file, '<?xml version="1.0" encoding="utf-8"?>'
				165	print >> output_file, "<licenses>"
				166
				167	# Flatten the list of lists into a single list of filenames
				168	sorted_filenames = sorted(id_table.keys())
				169
				170	# Print out a nice table of contents
				171	for filename in sorted_filenames:
				172	stripped_filename = SRC_DIR_STRIP_RE.sub(r"\1", filename)
				173	print >> output_file, '<file-name contentId="%s">%s</file-name>' % (id_table.get(filename), stripped_filename)
				174
				175	print >> output_file
				176	print >> output_file
				177
				178	processed_file_keys = []
				179	# Output the individual notice file lists
				180	for filename in sorted_filenames:
				181	file_key = id_table.get(filename)
				182	if file_key in processed_file_keys:
				183	continue
				184	processed_file_keys.append(file_key)
				185
				186	print >> output_file, '<file-content contentId="%s"><![CDATA[%s]]></file-content>' % (file_key, html_escape(open(filename).read()))
				187	print >> output_file
				188
				189	# Finish off the file output
				190	print >> output_file, "</licenses>"
				191	output_file.close()
				192
				193	def get_args():
				194	parser = argparse.ArgumentParser()
				195	parser.add_argument(
				196	'--text-output', required=True,
				197	help='The text output file path.')
				198	parser.add_argument(
				199	'--html-output',
				200	help='The html output file path.')
				201	parser.add_argument(
				202	'--xml-output',
				203	help='The xml output file path.')
				204	parser.add_argument(
				205	'-t', '--title', required=True,
				206	help='The file title.')
				207	parser.add_argument(
				208	'-s', '--source-dir', required=True,
				209	help='The directory containing notices.')
				210	parser.add_argument(
				211	'-i', '--included-subdirs', action='append',
				212	help='The sub directories which should be included.')
				213	parser.add_argument(
				214	'-e', '--excluded-subdirs', action='append',
				215	help='The sub directories which should be excluded.')
				216	return parser.parse_args()
				217
				218	def main(argv):
				219	args = get_args()
				220
				221	txt_output_file = args.text_output
				222	html_output_file = args.html_output
				223	xml_output_file = args.xml_output
				224	file_title = args.title
				225	included_subdirs = []
				226	excluded_subdirs = []
				227	if args.included_subdirs is not None:
				228	included_subdirs = args.included_subdirs
				229	if args.excluded_subdirs is not None:
				230	excluded_subdirs = args.excluded_subdirs
Daniel Berlin	f5a97d7	2012-03-29 10:33:19 -0400	[diff] [blame]	231
				232	# Find all the notice files and md5 them
Jaekyun Seok	3b7560b	2017-04-19 15:26:47 +0900	[diff] [blame]	233	input_dir = os.path.normpath(args.source_dir)
Daniel Berlin	f5a97d7	2012-03-29 10:33:19 -0400	[diff] [blame]	234	files_with_same_hash = defaultdict(list)
				235	for root, dir, files in os.walk(input_dir):
				236	for file in files:
Jaekyun Seok	3b7560b	2017-04-19 15:26:47 +0900	[diff] [blame]	237	matched = True
				238	if len(included_subdirs) > 0:
				239	matched = False
				240	for subdir in included_subdirs:
				241	if root.startswith(input_dir + '/' + subdir):
				242	matched = True
				243	break
				244	elif len(excluded_subdirs) > 0:
				245	for subdir in excluded_subdirs:
				246	if root.startswith(input_dir + '/' + subdir):
				247	matched = False
				248	break
				249	if matched and file.endswith(".txt"):
Daniel Berlin	f5a97d7	2012-03-29 10:33:19 -0400	[diff] [blame]	250	filename = os.path.join(root, file)
				251	file_md5sum = md5sum(filename)
				252	files_with_same_hash[file_md5sum].append(filename)
				253
Dan Willemsen	8ae4984	2015-11-10 12:37:23 -0800	[diff] [blame]	254	filesets = [sorted(files_with_same_hash[md5]) for md5 in sorted(files_with_same_hash.keys())]
Daniel Berlin	f5a97d7	2012-03-29 10:33:19 -0400	[diff] [blame]	255
Daniel Berlin	f5a97d7	2012-03-29 10:33:19 -0400	[diff] [blame]	256	print "Combining NOTICE files into text"
Dan Willemsen	8ae4984	2015-11-10 12:37:23 -0800	[diff] [blame]	257	combine_notice_files_text(filesets, input_dir, txt_output_file, file_title)
Daniel Berlin	f5a97d7	2012-03-29 10:33:19 -0400	[diff] [blame]	258
Jaekyun Seok	3b7560b	2017-04-19 15:26:47 +0900	[diff] [blame]	259	if html_output_file is not None:
				260	print "Combining NOTICE files into HTML"
				261	combine_notice_files_html(filesets, input_dir, html_output_file)
				262
				263	if xml_output_file is not None:
				264	print "Combining NOTICE files into XML"
				265	combine_notice_files_xml(files_with_same_hash, input_dir, xml_output_file)
				266
Daniel Berlin	f5a97d7	2012-03-29 10:33:19 -0400	[diff] [blame]	267	if __name__ == "__main__":
Jaekyun Seok	3b7560b	2017-04-19 15:26:47 +0900	[diff] [blame]	268	main(sys.argv)