edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 1 | #!/usr/local/bin/python |
| 2 | # coding: utf-8 |
| 3 | |
| 4 | import sys |
| 5 | import re |
| 6 | |
| 7 | # TODO(edisonn): put processed part of file in a new file |
| 8 | # put unprocessed part, in a new file, so we see what we miss |
| 9 | # keep blank lines, and generate a version without the blank lines |
| 10 | |
| 11 | #TODO (edisonn): deal manually with |
| 12 | # TABLE 7.11 Restrictions on the entries in a soft-mask image dictionary |
| 13 | #KEY RESTRICTION |
| 14 | |
| 15 | |
| 16 | |
| 17 | lines = 0 |
| 18 | table = '' |
| 19 | tableHeaderFound = False |
| 20 | tableLine = 0 |
| 21 | tableRow = 0 |
| 22 | columnWidth = [] |
| 23 | columnValues = ['', '', ''] |
| 24 | mustFollowTableHeader = False |
| 25 | |
| 26 | knownTypes = { |
| 27 | '(any)', |
| 28 | unicode('undefined', 'utf8'), |
| 29 | '(undefined)', |
| 30 | '(various)', |
| 31 | 'array', |
| 32 | 'or', |
| 33 | 'boolean', |
| 34 | 'date', |
| 35 | 'dictionary', |
| 36 | 'function', |
| 37 | 'integer', |
| 38 | unicode('file', 'utf8'), |
| 39 | 'file', |
| 40 | unicode('specification', 'utf8'), |
| 41 | 'specification', |
| 42 | 'name', |
| 43 | 'tree', |
| 44 | 'number', |
| 45 | 'rectangle', |
| 46 | 'stream', |
| 47 | 'string', |
| 48 | 'text', |
| 49 | ',', |
| 50 | ' ' |
| 51 | } |
| 52 | |
| 53 | def acceptType(val): |
| 54 | global knownTypes |
| 55 | |
| 56 | ret = val |
| 57 | |
| 58 | for item in knownTypes: |
| 59 | ret = ret.replace(item, '') |
| 60 | |
| 61 | return ret == '' |
| 62 | |
| 63 | |
| 64 | def inTable(): |
| 65 | global tableHeaderFound |
| 66 | return tableHeaderFound |
| 67 | |
| 68 | def tableDescriptionFound(desc): |
| 69 | global table |
| 70 | table = desc.strip() |
| 71 | |
| 72 | def tableHasHeader(): |
| 73 | global table |
| 74 | global tableHeaderFound |
| 75 | |
| 76 | tableHeaderFound = True |
| 77 | #print table |
| 78 | |
| 79 | |
| 80 | def commitRow(): |
| 81 | global columnValues |
| 82 | #print columnValues |
| 83 | |
| 84 | lastClosed = columnValues[2].find(')') |
| 85 | if lastClosed < 0: |
| 86 | print 'ERRRRRRRRRRRRRRROR' |
| 87 | print columnValues |
| 88 | return |
| 89 | |
| 90 | spec = columnValues[2][:lastClosed + 1] |
| 91 | spec = spec.replace('(', ';') |
| 92 | spec = spec.replace(')', ';') |
| 93 | spec = spec.strip(';') |
| 94 | |
| 95 | specs = spec.split(';') |
| 96 | |
| 97 | # clearly required, but it can be required with conditions. don't handle this ones here, but manually |
| 98 | required = specs[0] == 'Required' |
| 99 | |
| 100 | inheritable = False |
| 101 | version = '' |
| 102 | for s in specs: |
| 103 | if s.strip() == 'inheritable' or s.strip() == 'Inheritable': |
| 104 | inheritable = True |
| 105 | elif re.match('^PDF [0-9]*[\.[0-9]*]*', s.strip()): |
| 106 | version = s.strip() |
| 107 | elif s != 'Required': |
| 108 | required = False |
| 109 | |
| 110 | print spec |
| 111 | print specs |
| 112 | print required |
| 113 | print inheritable |
| 114 | print version |
| 115 | print columnValues |
| 116 | |
| 117 | def newRow(first, second, third): |
| 118 | global columnValues |
| 119 | columnValues = [first.rstrip(), second.rstrip(), third.rstrip()] |
| 120 | |
| 121 | def appendRow(second, third): |
| 122 | global columnValues |
| 123 | if second.rstrip() != '': |
| 124 | columnValues[1] = columnValues[1] + ' ' + second.rstrip() |
| 125 | if third.rstrip() != '': |
| 126 | columnValues[2] = columnValues[2] + ' ' + third.rstrip() |
| 127 | |
| 128 | def rebaseTable(line): |
| 129 | global knownTypes |
| 130 | global columnWidth |
| 131 | |
| 132 | words = line.split() |
| 133 | |
| 134 | if len(words) < 3: |
| 135 | return False |
| 136 | |
| 137 | i = 1 |
| 138 | while i < len(words) - 1 and words[i] in knownTypes: |
| 139 | i = i + 1 |
| 140 | |
| 141 | if words[i].startswith('(Optional') or words[i].startswith('(Required'): |
| 142 | commitRow() |
| 143 | |
| 144 | columnWidth[0] = line.find(words[1]) |
| 145 | |
| 146 | if words[i].startswith('(Optional'): |
| 147 | columnWidth[1] = line.find('(Optional') - columnWidth[0] |
| 148 | if words[i].startswith('(Required'): |
| 149 | columnWidth[1] = line.find('(Required') - columnWidth[0] |
| 150 | return True |
| 151 | |
| 152 | return False |
| 153 | |
| 154 | |
| 155 | def stopTable(): |
| 156 | global tableHeaderFound |
| 157 | commitRow() |
| 158 | tableHeaderFound = False |
| 159 | |
| 160 | |
| 161 | def killTable(): |
| 162 | return |
| 163 | |
| 164 | def processLine(line): |
| 165 | global lines |
| 166 | global tableLine |
| 167 | global tableRow |
| 168 | global columnWidth |
| 169 | global columnValues |
| 170 | global mustFollowTableHeader |
| 171 | |
| 172 | lines = lines + 1 |
| 173 | |
| 174 | line = unicode(line, 'utf8') |
| 175 | |
| 176 | striped = line.rstrip() |
| 177 | |
| 178 | words = line.split() |
| 179 | if len(words) == 0: |
| 180 | return |
| 181 | |
| 182 | if words[0] == 'TABLE': |
| 183 | tableDescriptionFound(striped) |
| 184 | mustFollowTableHeader = True |
| 185 | return |
| 186 | |
| 187 | if mustFollowTableHeader: |
| 188 | mustFollowTableHeader = False |
| 189 | if len(words) != 3: |
| 190 | killTable() |
| 191 | |
| 192 | # TODO(edisonn): support for generic table! |
| 193 | if words[0] != 'KEY' or words[1] != 'TYPE' or words[2] != 'VALUE': |
| 194 | killTable() |
| 195 | return |
| 196 | |
| 197 | tableHasHeader() |
| 198 | columnWidth = [0, 0, 0] |
| 199 | columnWidth[0] = striped.index('TYPE') |
| 200 | columnWidth[1] = striped.index('VALUE') - striped.index('TYPE') |
| 201 | columnWidth[2] = 0 |
| 202 | return |
| 203 | |
| 204 | if inTable(): |
| 205 | tableLine = tableLine + 1 |
| 206 | first = striped[0 : columnWidth[0]] |
| 207 | second = striped[columnWidth[0] : columnWidth[0] + columnWidth[1]] |
| 208 | third = striped[columnWidth[0] + columnWidth[1] :] |
| 209 | |
| 210 | if tableLine == 1: |
| 211 | if third[0] != '(': |
| 212 | killTable() |
| 213 | return |
| 214 | |
| 215 | newRow(first, second, third) |
| 216 | return |
| 217 | |
| 218 | if rebaseTable(striped): |
| 219 | first = striped[0 : columnWidth[0]] |
| 220 | second = striped[columnWidth[0] : columnWidth[0] + columnWidth[1]] |
| 221 | third = striped[columnWidth[0] + columnWidth[1] :] |
| 222 | |
| 223 | first = first.rstrip() |
| 224 | second = second.rstrip() |
| 225 | third = third.rstrip() |
| 226 | |
| 227 | if first == '' and second == '' and third != '': |
| 228 | appendRow(second, third) |
| 229 | return |
| 230 | |
| 231 | if len(first.split()) > 1: |
| 232 | stopTable() |
| 233 | return |
| 234 | |
| 235 | if first != '' and first[0] == ' ': |
| 236 | stopTable() |
| 237 | return |
| 238 | |
| 239 | if first != '' and second != '' and third == '': |
| 240 | stopTable() |
| 241 | return |
| 242 | |
| 243 | if first == '' and second != '' and second[0] != ' ': |
| 244 | if acceptType(second): |
| 245 | appendRow(second, third) |
| 246 | else: |
| 247 | stopTable() |
| 248 | return |
| 249 | |
| 250 | if first != '' and second != '' and third[0] != '(': |
| 251 | stopTable() |
| 252 | return |
| 253 | |
| 254 | if first == '' and second != '' and second[0] == ' ': |
| 255 | stopTable() |
| 256 | return |
| 257 | |
| 258 | if first != '' and second != '' and third[0] == '(': |
| 259 | commitRow() |
| 260 | newRow(first, second, third) |
| 261 | return |
| 262 | |
| 263 | |
| 264 | def generateDef(): |
| 265 | global lines |
| 266 | for line in sys.stdin: |
| 267 | processLine(line) |
| 268 | print lines |
| 269 | |
| 270 | if '__main__' == __name__: |
| 271 | sys.exit(generateDef()) |