Glenn Kasten | 31ae5a1 | 2016-08-08 09:39:52 -0700 | [diff] [blame] | 1 | #!/usr/bin/python |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 2 | """ |
| 3 | Utility for building the CDD from component markdown files. |
| 4 | |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 5 | From the compatibility/cdd directory, run: |
| 6 | python make-cdd.py --version <version number> --branch <AOSP branch> |
| 7 | --output <output file name> |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 8 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 9 | Each generated CDD file is marked with a hash based on the content of the input files. |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 10 | |
| 11 | TODO(gdimino): Clean up and comment this code. |
| 12 | """ |
| 13 | |
| 14 | from bs4 import BeautifulSoup |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 15 | import argparse |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 16 | import codecs |
| 17 | import jinja2 |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 18 | import markdown |
| 19 | import os |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 20 | import re |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 21 | import subprocess |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 22 | import tidylib |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 23 | |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 24 | |
| 25 | HEADERS_FOR_TOC = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7'] |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 26 | ANDROID_VERSION = "7.0, (N)" |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 27 | TOC_PER_COL = 34 |
| 28 | |
| 29 | def get_section_info(my_path): |
| 30 | # (_, _, filenames) = os.walk(my_path).next() |
| 31 | section_info = []; |
| 32 | # Get section info from every file whose name contains a number. TODO: fix |
| 33 | # this ugly hack. |
| 34 | # for rootdir, subdirs, files in os.walk(my_path): |
| 35 | for dir in get_immediate_subdirs(my_path): |
| 36 | # for dir in subdirs: |
Gina Dimino | 2cf721f | 2016-06-27 12:45:01 -0700 | [diff] [blame] | 37 | if (not dir.isalpha() and dir != 'older-versions' and dir != '.git'): |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 38 | child_data = [] |
| 39 | print 'dir = ' + dir |
| 40 | for file in os.listdir(dir): |
| 41 | if '.md' in file: |
| 42 | if file == 'index.md': |
| 43 | number = 0 |
| 44 | else: |
| 45 | number = int((file.split('_')[1])) |
| 46 | print 'file = ' + file + ', dir = ' + dir |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 47 | html_string = markdown.markdown(codecs.open(my_path + '/' + dir + '/' + file, 'r', encoding='utf-8').read()) |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 48 | child_data.append({'file': file, |
| 49 | 'number': number, |
| 50 | 'title': dir.split('_')[-1], |
| 51 | 'html': html_string, |
| 52 | 'children':[]}) |
| 53 | child_data.sort(key=lambda child: child['number']) |
| 54 | section_info.append({'id': dir, |
| 55 | 'number': int(''.join((dir.split('_')[:-1])).replace("_", ".")), |
| 56 | 'title': dir.split('_')[-1], |
| 57 | 'html': '', |
| 58 | 'children':child_data}) |
| 59 | section_info.sort(key=lambda section: section['number']) |
| 60 | return section_info |
| 61 | |
| 62 | |
| 63 | def get_soup(section_info): |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 64 | html_body_text = u'''<!DOCTYPE html> |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 65 | <head> |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 66 | <title>Android ''' + ANDROID_VERSION + ''' Compatibility Definition</title> |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 67 | <link rel="stylesheet" type="text/css" href="source/android-cdd.css"/> |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 68 | <meta charset="utf-8" /> |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 69 | </head> |
| 70 | <body> |
| 71 | <div id="main">''' |
| 72 | |
| 73 | for section in section_info: |
| 74 | for child in section['children']: |
| 75 | html_body_text += child['html'] |
| 76 | html_body_text += '</div></body><html>' |
| 77 | return BeautifulSoup(html_body_text) |
| 78 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 79 | def get_soup_devsite(section_info): |
| 80 | html_body_text = '' |
| 81 | for section in section_info: |
| 82 | for child in section['children']: |
| 83 | html_body_text += child['html'] |
| 84 | return BeautifulSoup(html_body_text) |
| 85 | |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 86 | |
| 87 | def add_id_to_section_headers(soup): |
| 88 | header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7'] |
| 89 | for tag in soup.find_all(header_tags): |
| 90 | tag['id'] = create_id(tag) |
| 91 | |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 92 | def generate_toc(soup): |
| 93 | toc_html = '<div id="toc">' |
| 94 | header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7'] |
| 95 | toc_entries = soup.find_all(header_tags) |
| 96 | toc_chunks = [toc_entries[i:i + TOC_PER_COL] for i in xrange(0, len(toc_entries), TOC_PER_COL)] |
| 97 | print 'Number of chunks = %d' % len(toc_chunks) |
| 98 | for chunk in toc_chunks: |
| 99 | if not toc_chunks.index(chunk) %2: |
| 100 | toc_html = toc_html + ('<div id="toc_left">') |
| 101 | for tag in chunk: |
| 102 | toc_html = toc_html + '<p class="toc_' + tag.name + '"><a href= "#' + create_id(tag) + '">' + tag.contents[0] + '</a></p>' |
| 103 | toc_html = toc_html + ('</div>') |
| 104 | else: |
| 105 | toc_html = toc_html + ('<div id="toc_right">') |
| 106 | for tag in chunk: |
| 107 | toc_html = toc_html + '<p class="toc_' + tag.name + '"><a href= "#' + create_id(tag) + '">' + tag.contents[0] + '</a></p>' |
| 108 | toc_html = toc_html + ('</div>') |
| 109 | toc_html = toc_html + '<div style="clear: both; page-break-after:always; height:1px"></div>' |
| 110 | toc_html = toc_html + '<div style="clear: both"></div>' |
| 111 | return (BeautifulSoup(toc_html).body.contents) |
| 112 | |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 113 | def add_toc(soup): |
| 114 | toc_contents = generate_toc(soup)[0] |
| 115 | toc_title = BeautifulSoup("<h6>Table of Contents</h6>").body.contents[0] |
| 116 | soup.body.insert(0, toc_contents) |
| 117 | soup.body.insert(0, toc_title) |
| 118 | return soup |
| 119 | |
| 120 | def create_id(header_tag): |
| 121 | return header_tag.contents[0].lower().replace('. ', '_').replace(' ', '_').replace('.', '_') |
| 122 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 123 | def decrease_headings(soup): |
| 124 | heading_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7', 'h8'] |
| 125 | headings = soup.find_all(heading_tags) |
| 126 | for heading in headings: |
| 127 | level = int(re.search(r'(\d)', heading.name).groups()[0]) |
| 128 | heading.name = 'h%d' % (level + 1) |
| 129 | return soup |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 130 | |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 131 | def get_version_branch_and_output(): |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 132 | # Get command-line args. If there aren't any, then prompt for user input. |
| 133 | parser = argparse.ArgumentParser() |
| 134 | parser.add_argument('--version', help='Android version') |
| 135 | parser.add_argument('--branch', help='AOSP branch') |
| 136 | parser.add_argument('--output', help='Base name of output file') |
| 137 | args = parser.parse_args() |
| 138 | |
| 139 | if not args.version: |
| 140 | args.version = raw_input('Android version for CDD: ') |
| 141 | if not args.branch: |
| 142 | args.branch = raw_input('Current AOSP branch for changelog: ') |
| 143 | if not args.output: |
| 144 | args.output = raw_input('Base name of desired output file: ') |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 145 | return (args.version, args.branch, args.output) |
| 146 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 147 | # Utilities |
| 148 | def get_immediate_subdirs(dir): |
| 149 | return [name for name in os.listdir(dir) |
| 150 | if os.path.isdir(os.path.join(dir, name))] |
| 151 | |
| 152 | def render_content(page_info, template_filename): |
| 153 | fp = open(template_filename) |
| 154 | temp_file = fp.read().encode('utf8') |
| 155 | fp.close() |
| 156 | return jinja2.Template(temp_file).render(page_info) |
| 157 | |
| 158 | # Odds and ends |
| 159 | |
| 160 | def check_section_numbering(soup): |
| 161 | header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7'] |
| 162 | for tag in header_tags: |
| 163 | headings = soup.find_all(tag) |
| 164 | header_numbers = [] |
| 165 | for heading in headings: |
| 166 | header_numbers.append(re.sub(r"([\d.]*).*", r"\1"), heading.contents) |
| 167 | return true |
| 168 | |
| 169 | # Abandoned in favor of tidy. |
| 170 | def elim_para_whitespace(html): |
| 171 | new_html = re.sub(re.compile(r"(<p[^>]*>)\s*\n\s*(<a[^>]*>)\n([^<]*)\n\s*(</a>)\n\s*(</p>)", re.M),r"\1\2\3\4\5\n", html) |
| 172 | return new_html |
| 173 | |
| 174 | |
| 175 | def elim_space_before_punc(html): |
| 176 | new_html = re.sub(re.compile(r"</a>\s+([.,;:])", re.M),r"</a>\1", html) |
| 177 | return new_html |
| 178 | |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 179 | |
| 180 | def main(): |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 181 | # Read version and branch info and output file name. |
| 182 | (ANDROID_VERSION, CURRENT_BRANCH, output_filename) = get_version_branch_and_output() |
| 183 | |
| 184 | # Scan current directory for source files and compile info for the toc.. |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 185 | my_path = os.getcwd() |
| 186 | section_info = get_section_info(my_path) |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 187 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 188 | # Get page info |
| 189 | page_info = { 'title': 'Android ANDROID_VERSION Compatibility Definition', |
| 190 | 'book_path': '/_book.yaml', |
| 191 | 'project_path': '/_project.yaml' |
| 192 | } |
| 193 | |
| 194 | # Generate the HTML for PDF |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 195 | soup = get_soup(section_info) |
| 196 | add_id_to_section_headers(soup) |
| 197 | add_toc(soup) |
| 198 | html = soup.prettify(formatter='html') |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 199 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 200 | # Generate the HTML for devsite |
| 201 | devsite_soup = get_soup_devsite(section_info) |
| 202 | add_id_to_section_headers(devsite_soup) |
| 203 | add_id_to_section_headers(soup) |
| 204 | page_info['body_html'] = decrease_headings(devsite_soup) |
| 205 | devsite_html = render_content(page_info, 'source/devsite_template.html') |
| 206 | |
| 207 | html = soup.prettify(formatter='html') |
| 208 | |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 209 | # Add version and branch info |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 210 | html = re.sub(re.compile(r'ANDROID_VERSION'), ANDROID_VERSION, html) |
| 211 | html = re.sub(re.compile(r'CURRENT_BRANCH'), CURRENT_BRANCH, html) |
| 212 | |
| 213 | devsite_html = re.sub(re.compile(r'ANDROID_VERSION'), ANDROID_VERSION, devsite_html) |
| 214 | devsite_html = re.sub(re.compile(r'CURRENT_BRANCH'), CURRENT_BRANCH, devsite_html) |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 215 | |
| 216 | # Apply HTML Tidy to output |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 217 | (document, errors) = tidylib.tidy_document(html, options={'doctype': 'omit'}) |
| 218 | (devsite_document, errors) = tidylib.tidy_document(devsite_html, options={'doctype': 'omit'}) |
Gina Dimino | 8f4279b | 2017-01-11 16:18:39 -0800 | [diff] [blame] | 219 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 220 | # Eliminate space before punctuation |
| 221 | html = elim_space_before_punc(html) |
| 222 | devsite_html = elim_space_before_punc(devsite_html) |
| 223 | |
| 224 | # Write output files |
| 225 | output = codecs.open('%s.html' % output_filename, 'w', encoding='utf-8') |
| 226 | output.write(document) |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 227 | output.close() |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 228 | |
Gina Dimino | 391dde6 | 2017-11-15 13:48:52 -0800 | [diff] [blame] | 229 | devsite_output = codecs.open('%s-devsite.html' % output_filename, 'w', encoding='utf-8') |
| 230 | devsite_output.write(devsite_document) |
| 231 | output.close() |
| 232 | |
| 233 | # Code to generate PDF |
| 234 | # TODO(gdimino) |
| 235 | |
| 236 | # subprocess.call('wkhtmltopdf -B 1in -T 1in -L .75in -R .75in page ' + |
| 237 | # output_filename + |
| 238 | # ' --footer-html source/android-cdd-footer.html /tmp/android-cdd-body.pdf', shell=True) |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 239 | |
| 240 | if __name__ == '__main__': |
| 241 | main() |
| 242 | |
| 243 | |
| 244 | |
| 245 | |