Glenn Kasten | 31ae5a1 | 2016-08-08 09:39:52 -0700 | [diff] [blame] | 1 | #!/usr/bin/python |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 2 | """ |
| 3 | Utility for building the CDD from component markdown files. |
| 4 | |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 5 | From the compatibility/cdd directory, run: |
| 6 | python make-cdd.py --version <version number> --branch <AOSP branch> |
| 7 | --output <output file name> |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 8 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 9 | Each generated CDD file is marked with a hash based on the content of the input files. |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 10 | |
| 11 | TODO(gdimino): Clean up and comment this code. |
| 12 | """ |
| 13 | |
| 14 | from bs4 import BeautifulSoup |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 15 | import argparse |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 16 | import codecs |
| 17 | import jinja2 |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 18 | import markdown |
| 19 | import os |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 20 | import re |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 21 | import subprocess |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 22 | import tidylib |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 23 | |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 24 | |
| 25 | HEADERS_FOR_TOC = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7'] |
Glenn Kasten | 3763d6f | 2019-08-27 08:12:26 -0700 | [diff] [blame] | 26 | global ANDROID_VERSION |
Gina Dimino | c73edfa | 2019-01-23 10:46:16 -0800 | [diff] [blame] | 27 | ANDROID_VERSION = "10" |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 28 | TOC_PER_COL = 34 |
| 29 | |
| 30 | def get_section_info(my_path): |
| 31 | # (_, _, filenames) = os.walk(my_path).next() |
| 32 | section_info = []; |
| 33 | # Get section info from every file whose name contains a number. TODO: fix |
| 34 | # this ugly hack. |
| 35 | # for rootdir, subdirs, files in os.walk(my_path): |
| 36 | for dir in get_immediate_subdirs(my_path): |
| 37 | # for dir in subdirs: |
Gina Dimino | 2cf721f | 2016-06-27 12:45:01 -0700 | [diff] [blame] | 38 | if (not dir.isalpha() and dir != 'older-versions' and dir != '.git'): |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 39 | child_data = [] |
| 40 | print 'dir = ' + dir |
| 41 | for file in os.listdir(dir): |
| 42 | if '.md' in file: |
| 43 | if file == 'index.md': |
| 44 | number = 0 |
| 45 | else: |
| 46 | number = int((file.split('_')[1])) |
| 47 | print 'file = ' + file + ', dir = ' + dir |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 48 | html_string = markdown.markdown(codecs.open(my_path + '/' + dir + '/' + file, 'r', encoding='utf-8').read()) |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 49 | child_data.append({'file': file, |
| 50 | 'number': number, |
| 51 | 'title': dir.split('_')[-1], |
| 52 | 'html': html_string, |
| 53 | 'children':[]}) |
| 54 | child_data.sort(key=lambda child: child['number']) |
| 55 | section_info.append({'id': dir, |
| 56 | 'number': int(''.join((dir.split('_')[:-1])).replace("_", ".")), |
| 57 | 'title': dir.split('_')[-1], |
| 58 | 'html': '', |
| 59 | 'children':child_data}) |
| 60 | section_info.sort(key=lambda section: section['number']) |
| 61 | return section_info |
| 62 | |
| 63 | |
Gina Dimino | 30de4fa | 2019-01-23 10:46:16 -0800 | [diff] [blame] | 64 | def get_soup(section_info, version): |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 65 | html_body_text = u'''<!DOCTYPE html> |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 66 | <head> |
Gina Dimino | 30de4fa | 2019-01-23 10:46:16 -0800 | [diff] [blame] | 67 | <title>Android ''' + version + ''' Compatibility Definition</title> |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 68 | <link rel="stylesheet" type="text/css" href="source/android-cdd.css"/> |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 69 | <meta charset="utf-8" /> |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 70 | </head> |
| 71 | <body> |
| 72 | <div id="main">''' |
| 73 | |
| 74 | for section in section_info: |
| 75 | for child in section['children']: |
| 76 | html_body_text += child['html'] |
| 77 | html_body_text += '</div></body><html>' |
| 78 | return BeautifulSoup(html_body_text) |
| 79 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 80 | def get_soup_devsite(section_info): |
| 81 | html_body_text = '' |
| 82 | for section in section_info: |
| 83 | for child in section['children']: |
| 84 | html_body_text += child['html'] |
| 85 | return BeautifulSoup(html_body_text) |
| 86 | |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 87 | |
| 88 | def add_id_to_section_headers(soup): |
| 89 | header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7'] |
| 90 | for tag in soup.find_all(header_tags): |
| 91 | tag['id'] = create_id(tag) |
| 92 | |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 93 | def generate_toc(soup): |
| 94 | toc_html = '<div id="toc">' |
| 95 | header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7'] |
| 96 | toc_entries = soup.find_all(header_tags) |
| 97 | toc_chunks = [toc_entries[i:i + TOC_PER_COL] for i in xrange(0, len(toc_entries), TOC_PER_COL)] |
| 98 | print 'Number of chunks = %d' % len(toc_chunks) |
| 99 | for chunk in toc_chunks: |
| 100 | if not toc_chunks.index(chunk) %2: |
| 101 | toc_html = toc_html + ('<div id="toc_left">') |
| 102 | for tag in chunk: |
| 103 | toc_html = toc_html + '<p class="toc_' + tag.name + '"><a href= "#' + create_id(tag) + '">' + tag.contents[0] + '</a></p>' |
| 104 | toc_html = toc_html + ('</div>') |
| 105 | else: |
| 106 | toc_html = toc_html + ('<div id="toc_right">') |
| 107 | for tag in chunk: |
| 108 | toc_html = toc_html + '<p class="toc_' + tag.name + '"><a href= "#' + create_id(tag) + '">' + tag.contents[0] + '</a></p>' |
| 109 | toc_html = toc_html + ('</div>') |
| 110 | toc_html = toc_html + '<div style="clear: both; page-break-after:always; height:1px"></div>' |
| 111 | toc_html = toc_html + '<div style="clear: both"></div>' |
| 112 | return (BeautifulSoup(toc_html).body.contents) |
| 113 | |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 114 | def add_toc(soup): |
| 115 | toc_contents = generate_toc(soup)[0] |
| 116 | toc_title = BeautifulSoup("<h6>Table of Contents</h6>").body.contents[0] |
| 117 | soup.body.insert(0, toc_contents) |
| 118 | soup.body.insert(0, toc_title) |
| 119 | return soup |
| 120 | |
| 121 | def create_id(header_tag): |
| 122 | return header_tag.contents[0].lower().replace('. ', '_').replace(' ', '_').replace('.', '_') |
| 123 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 124 | def decrease_headings(soup): |
| 125 | heading_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7', 'h8'] |
| 126 | headings = soup.find_all(heading_tags) |
| 127 | for heading in headings: |
| 128 | level = int(re.search(r'(\d)', heading.name).groups()[0]) |
| 129 | heading.name = 'h%d' % (level + 1) |
| 130 | return soup |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 131 | |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 132 | def get_version_branch_and_output(): |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 133 | # Get command-line args. If there aren't any, then prompt for user input. |
| 134 | parser = argparse.ArgumentParser() |
| 135 | parser.add_argument('--version', help='Android version') |
| 136 | parser.add_argument('--branch', help='AOSP branch') |
| 137 | parser.add_argument('--output', help='Base name of output file') |
| 138 | args = parser.parse_args() |
| 139 | |
| 140 | if not args.version: |
| 141 | args.version = raw_input('Android version for CDD: ') |
| 142 | if not args.branch: |
| 143 | args.branch = raw_input('Current AOSP branch for changelog: ') |
| 144 | if not args.output: |
| 145 | args.output = raw_input('Base name of desired output file: ') |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 146 | return (args.version, args.branch, args.output) |
| 147 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 148 | # Utilities |
| 149 | def get_immediate_subdirs(dir): |
| 150 | return [name for name in os.listdir(dir) |
| 151 | if os.path.isdir(os.path.join(dir, name))] |
| 152 | |
| 153 | def render_content(page_info, template_filename): |
| 154 | fp = open(template_filename) |
| 155 | temp_file = fp.read().encode('utf8') |
| 156 | fp.close() |
| 157 | return jinja2.Template(temp_file).render(page_info) |
| 158 | |
| 159 | # Odds and ends |
| 160 | |
| 161 | def check_section_numbering(soup): |
| 162 | header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7'] |
| 163 | for tag in header_tags: |
| 164 | headings = soup.find_all(tag) |
| 165 | header_numbers = [] |
| 166 | for heading in headings: |
| 167 | header_numbers.append(re.sub(r"([\d.]*).*", r"\1"), heading.contents) |
| 168 | return true |
| 169 | |
| 170 | # Abandoned in favor of tidy. |
| 171 | def elim_para_whitespace(html): |
| 172 | new_html = re.sub(re.compile(r"(<p[^>]*>)\s*\n\s*(<a[^>]*>)\n([^<]*)\n\s*(</a>)\n\s*(</p>)", re.M),r"\1\2\3\4\5\n", html) |
| 173 | return new_html |
| 174 | |
| 175 | |
| 176 | def elim_space_before_punc(html): |
| 177 | new_html = re.sub(re.compile(r"</a>\s+([.,;:])", re.M),r"</a>\1", html) |
| 178 | return new_html |
| 179 | |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 180 | |
| 181 | def main(): |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 182 | # Read version and branch info and output file name. |
Glenn Kasten | 3763d6f | 2019-08-27 08:12:26 -0700 | [diff] [blame] | 183 | global ANDROID_VERSION |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 184 | (ANDROID_VERSION, CURRENT_BRANCH, output_filename) = get_version_branch_and_output() |
| 185 | |
| 186 | # Scan current directory for source files and compile info for the toc.. |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 187 | my_path = os.getcwd() |
| 188 | section_info = get_section_info(my_path) |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 189 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 190 | # Get page info |
| 191 | page_info = { 'title': 'Android ANDROID_VERSION Compatibility Definition', |
| 192 | 'book_path': '/_book.yaml', |
| 193 | 'project_path': '/_project.yaml' |
| 194 | } |
| 195 | |
| 196 | # Generate the HTML for PDF |
Gina Dimino | 30de4fa | 2019-01-23 10:46:16 -0800 | [diff] [blame] | 197 | soup = get_soup(section_info, ANDROID_VERSION) |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 198 | add_id_to_section_headers(soup) |
| 199 | add_toc(soup) |
| 200 | html = soup.prettify(formatter='html') |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 201 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 202 | # Generate the HTML for devsite |
| 203 | devsite_soup = get_soup_devsite(section_info) |
| 204 | add_id_to_section_headers(devsite_soup) |
| 205 | add_id_to_section_headers(soup) |
| 206 | page_info['body_html'] = decrease_headings(devsite_soup) |
| 207 | devsite_html = render_content(page_info, 'source/devsite_template.html') |
| 208 | |
| 209 | html = soup.prettify(formatter='html') |
| 210 | |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 211 | # Add version and branch info |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 212 | html = re.sub(re.compile(r'ANDROID_VERSION'), ANDROID_VERSION, html) |
| 213 | html = re.sub(re.compile(r'CURRENT_BRANCH'), CURRENT_BRANCH, html) |
| 214 | |
| 215 | devsite_html = re.sub(re.compile(r'ANDROID_VERSION'), ANDROID_VERSION, devsite_html) |
| 216 | devsite_html = re.sub(re.compile(r'CURRENT_BRANCH'), CURRENT_BRANCH, devsite_html) |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 217 | |
| 218 | # Apply HTML Tidy to output |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 219 | (document, errors) = tidylib.tidy_document(html, options={'doctype': 'omit'}) |
| 220 | (devsite_document, errors) = tidylib.tidy_document(devsite_html, options={'doctype': 'omit'}) |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 221 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 222 | # Eliminate space before punctuation |
| 223 | html = elim_space_before_punc(html) |
| 224 | devsite_html = elim_space_before_punc(devsite_html) |
| 225 | |
| 226 | # Write output files |
| 227 | output = codecs.open('%s.html' % output_filename, 'w', encoding='utf-8') |
| 228 | output.write(document) |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 229 | output.close() |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 230 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 231 | devsite_output = codecs.open('%s-devsite.html' % output_filename, 'w', encoding='utf-8') |
| 232 | devsite_output.write(devsite_document) |
| 233 | output.close() |
| 234 | |
| 235 | # Code to generate PDF |
| 236 | # TODO(gdimino) |
| 237 | |
| 238 | # subprocess.call('wkhtmltopdf -B 1in -T 1in -L .75in -R .75in page ' + |
| 239 | # output_filename + |
| 240 | # ' --footer-html source/android-cdd-footer.html /tmp/android-cdd-body.pdf', shell=True) |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 241 | |
| 242 | if __name__ == '__main__': |
| 243 | main() |
| 244 | |
| 245 | |
| 246 | |
| 247 | |