Glenn Kasten | 31ae5a1 | 2016-08-08 09:39:52 -0700 | [diff] [blame] | 1 | #!/usr/bin/python |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 2 | """ |
| 3 | Utility for building the CDD from component markdown files. |
| 4 | |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 5 | From the compatibility/cdd directory, run: |
| 6 | python make-cdd.py --version <version number> --branch <AOSP branch> |
| 7 | --output <output file name> |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 8 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 9 | Each generated CDD file is marked with a hash based on the content of the input files. |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 10 | |
| 11 | TODO(gdimino): Clean up and comment this code. |
| 12 | """ |
| 13 | |
| 14 | from bs4 import BeautifulSoup |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 15 | import argparse |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 16 | import codecs |
| 17 | import jinja2 |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 18 | import markdown |
| 19 | import os |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 20 | import re |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 21 | import subprocess |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 22 | import tidylib |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 23 | |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 24 | |
| 25 | HEADERS_FOR_TOC = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7'] |
Gina Dimino | 30de4fa | 2019-01-23 10:46:16 -0800 | [diff] [blame^] | 26 | <<<<<<< HEAD |
Glenn Kasten | 3763d6f | 2019-08-27 08:12:26 -0700 | [diff] [blame] | 27 | global ANDROID_VERSION |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 28 | ANDROID_VERSION = "7.0, (N)" |
Gina Dimino | 30de4fa | 2019-01-23 10:46:16 -0800 | [diff] [blame^] | 29 | ======= |
| 30 | >>>>>>> 1a3acad3... CDD build script and helper shell script. |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 31 | TOC_PER_COL = 34 |
| 32 | |
| 33 | def get_section_info(my_path): |
| 34 | # (_, _, filenames) = os.walk(my_path).next() |
| 35 | section_info = []; |
| 36 | # Get section info from every file whose name contains a number. TODO: fix |
| 37 | # this ugly hack. |
| 38 | # for rootdir, subdirs, files in os.walk(my_path): |
| 39 | for dir in get_immediate_subdirs(my_path): |
| 40 | # for dir in subdirs: |
Gina Dimino | 2cf721f | 2016-06-27 12:45:01 -0700 | [diff] [blame] | 41 | if (not dir.isalpha() and dir != 'older-versions' and dir != '.git'): |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 42 | child_data = [] |
| 43 | print 'dir = ' + dir |
| 44 | for file in os.listdir(dir): |
| 45 | if '.md' in file: |
| 46 | if file == 'index.md': |
| 47 | number = 0 |
| 48 | else: |
| 49 | number = int((file.split('_')[1])) |
| 50 | print 'file = ' + file + ', dir = ' + dir |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 51 | html_string = markdown.markdown(codecs.open(my_path + '/' + dir + '/' + file, 'r', encoding='utf-8').read()) |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 52 | child_data.append({'file': file, |
| 53 | 'number': number, |
| 54 | 'title': dir.split('_')[-1], |
| 55 | 'html': html_string, |
| 56 | 'children':[]}) |
| 57 | child_data.sort(key=lambda child: child['number']) |
| 58 | section_info.append({'id': dir, |
| 59 | 'number': int(''.join((dir.split('_')[:-1])).replace("_", ".")), |
| 60 | 'title': dir.split('_')[-1], |
| 61 | 'html': '', |
| 62 | 'children':child_data}) |
| 63 | section_info.sort(key=lambda section: section['number']) |
| 64 | return section_info |
| 65 | |
| 66 | |
Gina Dimino | 30de4fa | 2019-01-23 10:46:16 -0800 | [diff] [blame^] | 67 | def get_soup(section_info, version): |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 68 | html_body_text = u'''<!DOCTYPE html> |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 69 | <head> |
Gina Dimino | 30de4fa | 2019-01-23 10:46:16 -0800 | [diff] [blame^] | 70 | <title>Android ''' + version + ''' Compatibility Definition</title> |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 71 | <link rel="stylesheet" type="text/css" href="source/android-cdd.css"/> |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 72 | <meta charset="utf-8" /> |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 73 | </head> |
| 74 | <body> |
| 75 | <div id="main">''' |
| 76 | |
| 77 | for section in section_info: |
| 78 | for child in section['children']: |
| 79 | html_body_text += child['html'] |
| 80 | html_body_text += '</div></body><html>' |
| 81 | return BeautifulSoup(html_body_text) |
| 82 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 83 | def get_soup_devsite(section_info): |
| 84 | html_body_text = '' |
| 85 | for section in section_info: |
| 86 | for child in section['children']: |
| 87 | html_body_text += child['html'] |
| 88 | return BeautifulSoup(html_body_text) |
| 89 | |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 90 | |
| 91 | def add_id_to_section_headers(soup): |
| 92 | header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7'] |
| 93 | for tag in soup.find_all(header_tags): |
| 94 | tag['id'] = create_id(tag) |
| 95 | |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 96 | def generate_toc(soup): |
| 97 | toc_html = '<div id="toc">' |
| 98 | header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7'] |
| 99 | toc_entries = soup.find_all(header_tags) |
| 100 | toc_chunks = [toc_entries[i:i + TOC_PER_COL] for i in xrange(0, len(toc_entries), TOC_PER_COL)] |
| 101 | print 'Number of chunks = %d' % len(toc_chunks) |
| 102 | for chunk in toc_chunks: |
| 103 | if not toc_chunks.index(chunk) %2: |
| 104 | toc_html = toc_html + ('<div id="toc_left">') |
| 105 | for tag in chunk: |
| 106 | toc_html = toc_html + '<p class="toc_' + tag.name + '"><a href= "#' + create_id(tag) + '">' + tag.contents[0] + '</a></p>' |
| 107 | toc_html = toc_html + ('</div>') |
| 108 | else: |
| 109 | toc_html = toc_html + ('<div id="toc_right">') |
| 110 | for tag in chunk: |
| 111 | toc_html = toc_html + '<p class="toc_' + tag.name + '"><a href= "#' + create_id(tag) + '">' + tag.contents[0] + '</a></p>' |
| 112 | toc_html = toc_html + ('</div>') |
| 113 | toc_html = toc_html + '<div style="clear: both; page-break-after:always; height:1px"></div>' |
| 114 | toc_html = toc_html + '<div style="clear: both"></div>' |
| 115 | return (BeautifulSoup(toc_html).body.contents) |
| 116 | |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 117 | def add_toc(soup): |
| 118 | toc_contents = generate_toc(soup)[0] |
| 119 | toc_title = BeautifulSoup("<h6>Table of Contents</h6>").body.contents[0] |
| 120 | soup.body.insert(0, toc_contents) |
| 121 | soup.body.insert(0, toc_title) |
| 122 | return soup |
| 123 | |
| 124 | def create_id(header_tag): |
| 125 | return header_tag.contents[0].lower().replace('. ', '_').replace(' ', '_').replace('.', '_') |
| 126 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 127 | def decrease_headings(soup): |
| 128 | heading_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7', 'h8'] |
| 129 | headings = soup.find_all(heading_tags) |
| 130 | for heading in headings: |
| 131 | level = int(re.search(r'(\d)', heading.name).groups()[0]) |
| 132 | heading.name = 'h%d' % (level + 1) |
| 133 | return soup |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 134 | |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 135 | def get_version_branch_and_output(): |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 136 | # Get command-line args. If there aren't any, then prompt for user input. |
| 137 | parser = argparse.ArgumentParser() |
| 138 | parser.add_argument('--version', help='Android version') |
| 139 | parser.add_argument('--branch', help='AOSP branch') |
| 140 | parser.add_argument('--output', help='Base name of output file') |
| 141 | args = parser.parse_args() |
| 142 | |
| 143 | if not args.version: |
| 144 | args.version = raw_input('Android version for CDD: ') |
| 145 | if not args.branch: |
| 146 | args.branch = raw_input('Current AOSP branch for changelog: ') |
| 147 | if not args.output: |
| 148 | args.output = raw_input('Base name of desired output file: ') |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 149 | return (args.version, args.branch, args.output) |
| 150 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 151 | # Utilities |
| 152 | def get_immediate_subdirs(dir): |
| 153 | return [name for name in os.listdir(dir) |
| 154 | if os.path.isdir(os.path.join(dir, name))] |
| 155 | |
| 156 | def render_content(page_info, template_filename): |
| 157 | fp = open(template_filename) |
| 158 | temp_file = fp.read().encode('utf8') |
| 159 | fp.close() |
| 160 | return jinja2.Template(temp_file).render(page_info) |
| 161 | |
| 162 | # Odds and ends |
| 163 | |
| 164 | def check_section_numbering(soup): |
| 165 | header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7'] |
| 166 | for tag in header_tags: |
| 167 | headings = soup.find_all(tag) |
| 168 | header_numbers = [] |
| 169 | for heading in headings: |
| 170 | header_numbers.append(re.sub(r"([\d.]*).*", r"\1"), heading.contents) |
| 171 | return true |
| 172 | |
| 173 | # Abandoned in favor of tidy. |
| 174 | def elim_para_whitespace(html): |
| 175 | new_html = re.sub(re.compile(r"(<p[^>]*>)\s*\n\s*(<a[^>]*>)\n([^<]*)\n\s*(</a>)\n\s*(</p>)", re.M),r"\1\2\3\4\5\n", html) |
| 176 | return new_html |
| 177 | |
| 178 | |
| 179 | def elim_space_before_punc(html): |
| 180 | new_html = re.sub(re.compile(r"</a>\s+([.,;:])", re.M),r"</a>\1", html) |
| 181 | return new_html |
| 182 | |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 183 | |
| 184 | def main(): |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 185 | # Read version and branch info and output file name. |
Glenn Kasten | 3763d6f | 2019-08-27 08:12:26 -0700 | [diff] [blame] | 186 | global ANDROID_VERSION |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 187 | (ANDROID_VERSION, CURRENT_BRANCH, output_filename) = get_version_branch_and_output() |
| 188 | |
| 189 | # Scan current directory for source files and compile info for the toc.. |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 190 | my_path = os.getcwd() |
| 191 | section_info = get_section_info(my_path) |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 192 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 193 | # Get page info |
| 194 | page_info = { 'title': 'Android ANDROID_VERSION Compatibility Definition', |
| 195 | 'book_path': '/_book.yaml', |
| 196 | 'project_path': '/_project.yaml' |
| 197 | } |
| 198 | |
| 199 | # Generate the HTML for PDF |
Gina Dimino | 30de4fa | 2019-01-23 10:46:16 -0800 | [diff] [blame^] | 200 | soup = get_soup(section_info, ANDROID_VERSION) |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 201 | add_id_to_section_headers(soup) |
| 202 | add_toc(soup) |
| 203 | html = soup.prettify(formatter='html') |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 204 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 205 | # Generate the HTML for devsite |
| 206 | devsite_soup = get_soup_devsite(section_info) |
| 207 | add_id_to_section_headers(devsite_soup) |
| 208 | add_id_to_section_headers(soup) |
| 209 | page_info['body_html'] = decrease_headings(devsite_soup) |
| 210 | devsite_html = render_content(page_info, 'source/devsite_template.html') |
| 211 | |
| 212 | html = soup.prettify(formatter='html') |
| 213 | |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 214 | # Add version and branch info |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 215 | html = re.sub(re.compile(r'ANDROID_VERSION'), ANDROID_VERSION, html) |
| 216 | html = re.sub(re.compile(r'CURRENT_BRANCH'), CURRENT_BRANCH, html) |
| 217 | |
| 218 | devsite_html = re.sub(re.compile(r'ANDROID_VERSION'), ANDROID_VERSION, devsite_html) |
| 219 | devsite_html = re.sub(re.compile(r'CURRENT_BRANCH'), CURRENT_BRANCH, devsite_html) |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 220 | |
| 221 | # Apply HTML Tidy to output |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 222 | (document, errors) = tidylib.tidy_document(html, options={'doctype': 'omit'}) |
| 223 | (devsite_document, errors) = tidylib.tidy_document(devsite_html, options={'doctype': 'omit'}) |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 224 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 225 | # Eliminate space before punctuation |
| 226 | html = elim_space_before_punc(html) |
| 227 | devsite_html = elim_space_before_punc(devsite_html) |
| 228 | |
| 229 | # Write output files |
| 230 | output = codecs.open('%s.html' % output_filename, 'w', encoding='utf-8') |
| 231 | output.write(document) |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 232 | output.close() |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 233 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 234 | devsite_output = codecs.open('%s-devsite.html' % output_filename, 'w', encoding='utf-8') |
| 235 | devsite_output.write(devsite_document) |
| 236 | output.close() |
| 237 | |
| 238 | # Code to generate PDF |
| 239 | # TODO(gdimino) |
| 240 | |
| 241 | # subprocess.call('wkhtmltopdf -B 1in -T 1in -L .75in -R .75in page ' + |
| 242 | # output_filename + |
| 243 | # ' --footer-html source/android-cdd-footer.html /tmp/android-cdd-body.pdf', shell=True) |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 244 | |
| 245 | if __name__ == '__main__': |
| 246 | main() |
| 247 | |
| 248 | |
| 249 | |
| 250 | |