Glenn Kasten | 31ae5a1 | 2016-08-08 09:39:52 -0700 | [diff] [blame] | 1 | #!/usr/bin/python |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 2 | """ |
| 3 | Utility for building the CDD from component markdown files. |
| 4 | |
| 5 | From the compatibility/cdd directory, run python make-cdd.py. |
| 6 | |
| 7 | Each generated CDD file is marked with a hash based on the content of the input files. |
| 8 | |
| 9 | TODO(gdimino): Clean up and comment this code. |
| 10 | """ |
| 11 | |
| 12 | from bs4 import BeautifulSoup |
| 13 | import hashlib |
| 14 | import markdown |
| 15 | import os |
| 16 | import pprint |
| 17 | import re |
| 18 | import tidylib |
| 19 | import subprocess |
| 20 | |
| 21 | # TODO (gdimino): Clean up this code using templates |
| 22 | # from jinja2 import Template |
| 23 | |
| 24 | HEADERS_FOR_TOC = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7'] |
| 25 | ANDROID_VERSION = "7.0, (N)" |
| 26 | TOC_PER_COL = 34 |
| 27 | |
| 28 | def get_section_info(my_path): |
| 29 | # (_, _, filenames) = os.walk(my_path).next() |
| 30 | section_info = []; |
| 31 | # Get section info from every file whose name contains a number. TODO: fix |
| 32 | # this ugly hack. |
| 33 | # for rootdir, subdirs, files in os.walk(my_path): |
| 34 | for dir in get_immediate_subdirs(my_path): |
| 35 | # for dir in subdirs: |
Gina Dimino | 2cf721f | 2016-06-27 12:45:01 -0700 | [diff] [blame] | 36 | if (not dir.isalpha() and dir != 'older-versions' and dir != '.git'): |
Gina Dimino | ee0fa6b | 2016-04-04 17:30:54 -0700 | [diff] [blame] | 37 | child_data = [] |
| 38 | print 'dir = ' + dir |
| 39 | for file in os.listdir(dir): |
| 40 | if '.md' in file: |
| 41 | if file == 'index.md': |
| 42 | number = 0 |
| 43 | else: |
| 44 | number = int((file.split('_')[1])) |
| 45 | print 'file = ' + file + ', dir = ' + dir |
| 46 | html_string = markdown.markdown(unicode(open(my_path + '/' + dir + '/' + file, 'r').read(), 'utf-8')) |
| 47 | child_data.append({'file': file, |
| 48 | 'number': number, |
| 49 | 'title': dir.split('_')[-1], |
| 50 | 'html': html_string, |
| 51 | 'children':[]}) |
| 52 | child_data.sort(key=lambda child: child['number']) |
| 53 | section_info.append({'id': dir, |
| 54 | 'number': int(''.join((dir.split('_')[:-1])).replace("_", ".")), |
| 55 | 'title': dir.split('_')[-1], |
| 56 | 'html': '', |
| 57 | 'children':child_data}) |
| 58 | section_info.sort(key=lambda section: section['number']) |
| 59 | return section_info |
| 60 | |
| 61 | |
| 62 | def get_soup(section_info): |
| 63 | html_body_text = '''<!DOCTYPE html> |
| 64 | <head> |
| 65 | <title>Android ''' + ANDROID_VERSION + ''' Compatibility Definition</title> |
| 66 | <link rel="stylesheet" type="text/css" href="source/android-cdd.css"/> |
| 67 | </head> |
| 68 | <body> |
| 69 | <div id="main">''' |
| 70 | |
| 71 | for section in section_info: |
| 72 | for child in section['children']: |
| 73 | html_body_text += child['html'] |
| 74 | html_body_text += '</div></body><html>' |
| 75 | return BeautifulSoup(html_body_text) |
| 76 | |
| 77 | |
| 78 | def add_id_to_section_headers(soup): |
| 79 | header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7'] |
| 80 | for tag in soup.find_all(header_tags): |
| 81 | tag['id'] = create_id(tag) |
| 82 | |
| 83 | def old_generate_toc(soup): |
| 84 | toc_html = '' |
| 85 | header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7'] |
| 86 | for tag in soup.find_all(header_tags): |
| 87 | tag_html = '<p class="toc_' + tag.name + '"><a href= "#' + create_id(tag) + '">' + tag.contents[0] + '</a></p>' |
| 88 | toc_html = toc_html + tag_html |
| 89 | return (BeautifulSoup(toc_html).body.contents, '') |
| 90 | |
| 91 | def generate_toc(soup): |
| 92 | toc_html = '<div id="toc">' |
| 93 | header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7'] |
| 94 | toc_entries = soup.find_all(header_tags) |
| 95 | toc_chunks = [toc_entries[i:i + TOC_PER_COL] for i in xrange(0, len(toc_entries), TOC_PER_COL)] |
| 96 | print 'Number of chunks = %d' % len(toc_chunks) |
| 97 | for chunk in toc_chunks: |
| 98 | if not toc_chunks.index(chunk) %2: |
| 99 | toc_html = toc_html + ('<div id="toc_left">') |
| 100 | for tag in chunk: |
| 101 | toc_html = toc_html + '<p class="toc_' + tag.name + '"><a href= "#' + create_id(tag) + '">' + tag.contents[0] + '</a></p>' |
| 102 | toc_html = toc_html + ('</div>') |
| 103 | else: |
| 104 | toc_html = toc_html + ('<div id="toc_right">') |
| 105 | for tag in chunk: |
| 106 | toc_html = toc_html + '<p class="toc_' + tag.name + '"><a href= "#' + create_id(tag) + '">' + tag.contents[0] + '</a></p>' |
| 107 | toc_html = toc_html + ('</div>') |
| 108 | toc_html = toc_html + '<div style="clear: both; page-break-after:always; height:1px"></div>' |
| 109 | toc_html = toc_html + '<div style="clear: both"></div>' |
| 110 | return (BeautifulSoup(toc_html).body.contents) |
| 111 | |
| 112 | def old_add_toc(soup): |
| 113 | toc = soup.new_tag('div', id='toc') |
| 114 | toc_left = soup.new_tag('div', id='toc_left') |
| 115 | toc_right = soup.new_tag('div', id='toc_right') |
| 116 | toc.append(toc_left) |
| 117 | toc.append(toc_right) |
| 118 | # toc_left.contents, toc_right.contents = generate_toc(soup) |
| 119 | toc_left.contents, toc_right.contents = generate_toc(soup) |
| 120 | toc_title = BeautifulSoup("<h6>Table of Contents</h6>").body.contents[0] |
| 121 | soup.body.insert(0,toc) |
| 122 | soup.body.insert(0, toc_title) |
| 123 | return soup |
| 124 | |
| 125 | def add_toc(soup): |
| 126 | toc_contents = generate_toc(soup)[0] |
| 127 | toc_title = BeautifulSoup("<h6>Table of Contents</h6>").body.contents[0] |
| 128 | soup.body.insert(0, toc_contents) |
| 129 | soup.body.insert(0, toc_title) |
| 130 | return soup |
| 131 | |
| 132 | def create_id(header_tag): |
| 133 | return header_tag.contents[0].lower().replace('. ', '_').replace(' ', '_').replace('.', '_') |
| 134 | |
| 135 | # Utilities |
| 136 | def get_immediate_subdirs(dir): |
| 137 | return [name for name in os.listdir(dir) |
| 138 | if os.path.isdir(os.path.join(dir, name))] |
| 139 | |
| 140 | # Odds and ends |
| 141 | |
| 142 | def check_section_numbering(soup): |
| 143 | header_tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7'] |
| 144 | for tag in header_tags: |
| 145 | headings = soup.find_all(tag) |
| 146 | header_numbers = [] |
| 147 | for heading in headings: |
| 148 | header_numbers.append(re.sub(r"([\d.]*).*", r"\1"), heading.contents) |
| 149 | return true |
| 150 | |
| 151 | def elim_para_whitespace(html): |
| 152 | new_html = re.sub(re.compile(r"(<p[^>]*>)\s*\n\s*(<a[^>]*>)\n([^<]*)\n\s*(</a>)\n\s*(</p>)", re.M),r"\1\2\3\4\5\n", html) |
| 153 | return new_html |
| 154 | |
| 155 | def main(): |
| 156 | my_path = os.getcwd() |
| 157 | section_info = get_section_info(my_path) |
| 158 | soup = get_soup(section_info) |
| 159 | add_id_to_section_headers(soup) |
| 160 | add_toc(soup) |
| 161 | html = soup.prettify(formatter='html') |
| 162 | # Add a hash to the filename, so that identidal inputs produce the same output |
| 163 | # file. |
| 164 | output_filename = "test-generated-cdd-%s.html" % hashlib.md5(html).hexdigest()[0:5] |
| 165 | output = open(output_filename, "w") |
| 166 | output.write(html.encode('utf-8')) |
| 167 | output.close() |
| 168 | # Code to generate PDF, needs work. |
| 169 | # subprocess.call('wkhtmltopdf -B 1in -T 1in -L .75in -R .75in page ' + output_filename + ' --footer-html source/android-cdd-footer.html /tmp/android-cdd-body.pdf') |
| 170 | |
| 171 | |
| 172 | if __name__ == '__main__': |
| 173 | main() |
| 174 | |
| 175 | |
| 176 | |
| 177 | |