blob: fe3e293d59f92c12ee104de434ecad46ded24875 [file] [log] [blame]
edisonn@google.com07f01472013-06-13 17:24:54 +00001#!/usr/local/bin/python
2# coding: utf-8
3
4import sys
5import re
6
7# TODO(edisonn): put processed part of file in a new file
8# put unprocessed part, in a new file, so we see what we miss
9# keep blank lines, and generate a version without the blank lines
10
edisonn@google.coma2fab9d2013-06-14 19:22:19 +000011#TODO (edisonn): deal manually with tables that don't have "KEY TYPE VALUE' header, e.g.
edisonn@google.com07f01472013-06-13 17:24:54 +000012# TABLE 7.11 Restrictions on the entries in a soft-mask image dictionary
13#KEY RESTRICTION
14
15
16
17lines = 0
18table = ''
19tableHeaderFound = False
20tableLine = 0
21tableRow = 0
22columnWidth = []
edisonn@google.com45327112013-06-13 20:02:29 +000023columnValues = None
edisonn@google.com07f01472013-06-13 17:24:54 +000024mustFollowTableHeader = False
edisonn@google.com45327112013-06-13 20:02:29 +000025emitedDitionaryName = ''
edisonn@google.com07f01472013-06-13 17:24:54 +000026
27knownTypes = {
28'(any)',
29unicode('undefined', 'utf8'),
30'(undefined)',
31'(various)',
32'array',
33'or',
34'boolean',
35'date',
36'dictionary',
37'function',
38'integer',
39unicode('file', 'utf8'),
40'file',
41unicode('specification', 'utf8'),
42'specification',
43'name',
44'tree',
45'number',
46'rectangle',
47'stream',
48'string',
49'text',
50',',
51' '
52}
53
edisonn@google.coma2fab9d2013-06-14 19:22:19 +000054# TODO(edisonn): add a third element in the vector, the base class, by default it is Dictionary
55# TODO(edisonn): add overrides for types map<field_name, type_name>
56# e.g. ,{'Resources', 'ResourceDictionary'}
57# TODO(edisonn): can be added one by one, or extracted from documentation
58
edisonn@google.com45327112013-06-13 20:02:29 +000059tableToClassName = {
edisonn@google.coma2fab9d2013-06-14 19:22:19 +000060'TABLE 3.4': ['StreamCommonDictionary', 'Entries common to all stream dictionaries'],
61'TABLE 3.7': ['LzwdecodeAndFlatedecodeFiltersDictionary', 'Optional parameters for LZWDecode and FlateDecode filters'],
62'TABLE 3.9': ['CcittfaxdecodeFilterDictionary', 'Optional parameters for the CCITTFaxDecode filter'],
63'TABLE 3.10': ['Jbig2DecodeFilterDictionary', 'Optional parameter for the JBIG2Decode filter'],
64'TABLE 3.11': ['DctdecodeFilterDictionary', 'Optional parameter for the DCTDecode filter'],
edisonn@google.com45327112013-06-13 20:02:29 +000065'TABLE 3.12': ['FileTrailerDictionary', 'Entries in the file trailer dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +000066'TABLE 3.13': ['EncryptionCommonDictionary', 'Entries common to all encryption dictionaries'],
67'TABLE 3.14': ['StandardSecurityHandlerDictionary', 'Additional encryption dictionary entries for the standard security handler'],
edisonn@google.com45327112013-06-13 20:02:29 +000068'TABLE 3.16': ['CatalogDictionary', 'Entries in the catalog dictionary'],
69'TABLE 3.17': ['PageTreeNodeDictionary', 'Required entries in a page tree node'],
70'TABLE 3.18': ['PageObjectDictionary', 'Entries in a page object'],
71'TABLE 3.19': ['NameDictionary', 'Entries in the name dictionary'],
72'TABLE 3.21': ['ResourceDictionary', 'Entries in a resource dictionary'],
73'TABLE 3.23': ['NameTreeNodeDictionary', 'Entries in a name tree node dictionary'],
74'TABLE 3.25': ['NumberTreeNodeDictionary', 'Entries in a number tree node dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +000075'TABLE 3.26': ['FunctionCommonDictionary', 'Entries common to all function dictionaries'],
76'TABLE 3.27': ['Type0FunctionDictionary', 'Additional entries specific to a type 0 function dictionary'],
77'TABLE 3.28': ['Type2FunctionDictionary', 'Additional entries specific to a type 2 function dictionary'],
78'TABLE 3.29': ['Type3FunctionDictionary', 'Additional entries specific to a type 3 function dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +000079'TABLE 3.32': ['FileSpecificationDictionary', 'Entries in a file specification dictionary'],
80'TABLE 3.33': ['EmbeddedFileStreamDictionary', 'Additional entries in an embedded file stream dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +000081'TABLE 3.34': ['EmbeddedFileParameterDictionary', 'Entries in an embedded file parameter dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +000082'TABLE 3.35': ['MacOsFileInformationDictionary', 'Entries in a Mac OS file information dictionary'],
83'TABLE 4.13': ['CalgrayColorSpaceDictionary', 'Entries in a CalGray color space dictionary'],
84'TABLE 4.14': ['CalrgbColorSpaceDictionary', 'Entries in a CalRGB color space dictionary'],
85'TABLE 4.15': ['LabColorSpaceDictionary', 'Entries in a Lab color space dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +000086'TABLE 4.16': ['IccProfileStreamDictionary', 'Additional entries specific to an ICC profile stream dictionary'],
87'TABLE 4.20': ['DeviceNColorSpaceDictionary', 'Entry in a DeviceN color space attributes dictionary'],
88'TABLE 4.22': ['Type1PatternDictionary', 'Additional entries specific to a type 1 pattern dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +000089'TABLE 4.23': ['Type2PatternDictionary', 'Entries in a type 2 pattern dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +000090'TABLE 4.25': ['ShadingDictionary', 'Entries common to all shading dictionaries'],
91'TABLE 4.26': ['Type1ShadingDictionary', 'Additional entries specific to a type 1 shading dictionary', 'ShadingDictionary'],
92'TABLE 4.27': ['Type2ShadingDictionary', 'Additional entries specific to a type 2 shading dictionary', 'ShadingDictionary'],
93'TABLE 4.28': ['Type3ShadingDictionary', 'Additional entries specific to a type 3 shading dictionary', 'ShadingDictionary'],
94'TABLE 4.29': ['Type4ShadingDictionary', 'Additional entries specific to a type 4 shading dictionary', 'ShadingDictionary'],
95'TABLE 4.30': ['Type5ShadingDictionary', 'Additional entries specific to a type 5 shading dictionary', 'ShadingDictionary'],
96'TABLE 4.31': ['Type6ShadingDictionary', 'Additional entries specific to a type 6 shading dictionary', 'ShadingDictionary'],
97'TABLE 4.35': ['ImageDictionary', 'Additional entries specific to an image dictionary', 'XObjectDictionary', {'Subtype': 'datatypes.PdfName(\'Image\')'}],
edisonn@google.com45327112013-06-13 20:02:29 +000098'TABLE 4.37': ['AlternateImageDictionary', 'Entries in an alternate image dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +000099'TABLE 4.41': ['Type1FormDictionary', 'Additional entries specific to a type 1 form dictionary', 'XObjectDictionary', {'Subtype': 'datatypes.PdfName(\'Form\')'}],
100'TABLE 4.42': ['GroupAttributesDictionary', 'Entries common to all group attributes dictionaries'],
edisonn@google.com45327112013-06-13 20:02:29 +0000101'TABLE 4.43': ['ReferenceDictionary', 'Entries in a reference dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000102'TABLE 4.44': ['PSXobjectDictionary', 'Additional entries specific to a PostScript XObject dictionary'],
103'TABLE 5.8': ['Type1FontDictionary', 'Entries in a Type 1 font dictionary', 'FontDictionary'],
104'TABLE 5.9': ['Type3FontDictionary', 'Entries in a Type 3 font dictionary', 'FontDictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000105'TABLE 5.11': ['EncodingDictionary', 'Entries in an encoding dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000106'TABLE 5.12': ['CIDSystemInfoDictionary', 'Entries in a CIDSystemInfo dictionary'],
107'TABLE 5.13': ['CIDFontDictionary', 'Entries in a CIDFont dictionary', 'FontDictionary'],
108'TABLE 5.16': ['CMapDictionary', 'Additional entries in a CMap dictionary'],
109'TABLE 5.17': ['Type0FontDictionary', 'Entries in a Type 0 font dictionary', 'FontDictionary'],
110'TABLE 5.18': ['FontDescriptorDictionary', 'Entries common to all font descriptors'],
111'TABLE 5.20': ['CIDFontDescriptorDictionary', 'Additional font descriptor entries for CIDFonts'],
edisonn@google.com45327112013-06-13 20:02:29 +0000112'TABLE 5.23': ['EmbeddedFontStreamDictionary', 'Additional entries in an embedded font stream dictionary'],
113'TABLE 6.3': ['Type1HalftoneDictionary', 'Entries in a type 1 halftone dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000114'TABLE 6.4': ['Type6HalftoneDictionary', 'Additional entries specific to a type 6 halftone dictionary'],
115'TABLE 6.5': ['Type10HalftoneDictionary', 'Additional entries specific to a type 10 halftone dictionary'],
116'TABLE 6.6': ['Type16HalftoneDictionary', 'Additional entries specific to a type 16 halftone dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000117'TABLE 6.7': ['Type5HalftoneDictionary', 'Entries in a type 5 halftone dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000118'TABLE 7.10': ['SoftMaskDictionary', 'Entries in a soft-mask dictionary'],
119'TABLE 7.12': ['SoftMaskImageDictionary', 'Additional entry in a soft-mask image dictionary'],
120'TABLE 7.13': ['TransparencyGroupDictionary', 'Additional entries specific to a transparency group attributes dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000121'TABLE 8.1': ['ViewerPreferencesDictionary', 'Entries in a viewer preferences dictionary'],
122'TABLE 8.3': ['OutlineDictionary', 'Entries in the outline dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000123'TABLE 8.4': ['OutlineItemDictionary', 'Entries in an outline item dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000124'TABLE 8.6': ['PageLabelDictionary', 'Entries in a page label dictionary'],
125'TABLE 8.7': ['ThreadDictionary', 'Entries in a thread dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000126'TABLE 8.8': ['BeadDictionary', 'Entries in a bead dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000127'TABLE 8.9': ['TransitionDictionary', 'Entries in a transition dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000128'TABLE 8.10': ['AnnotationDictionary', 'Entries common to all annotation dictionaries'],
edisonn@google.com45327112013-06-13 20:02:29 +0000129'TABLE 8.12': ['BorderStyleDictionary', 'Entries in a border style dictionary'],
130'TABLE 8.13': ['AppearanceDictionary', 'Entries in an appearance dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000131'TABLE 8.15': ['TextAnnotationDictionary', 'Additional entries specific to a text annotation'],
132'TABLE 8.16': ['ALinkAnnotationDictionary', 'Additional entries specific to a link annotation'],
133'TABLE 8.17': ['FreeTextAnnotationDictionary', 'Additional entries specific to a free text annotation'],
134'TABLE 8.18': ['LineAnnotationDictionary', 'Additional entries specific to a line annotation'],
135'TABLE 8.20': ['SquareOrCircleAnnotation', 'Additional entries specific to a square or circle annotation'],
136'TABLE 8.21': ['MarkupAnnotationsDictionary', 'Additional entries specific to markup annotations'],
137'TABLE 8.22': ['RubberStampAnnotationDictionary', 'Additional entries specific to a rubber stamp annotation'],
138'TABLE 8.23': ['InkAnnotationDictionary', 'Additional entries specific to an ink annotation'],
139'TABLE 8.24': ['PopUpAnnotationDictionary', 'Additional entries specific to a pop-up annotation'],
140'TABLE 8.25': ['FileAttachmentAnnotationDictionary', 'Additional entries specific to a file attachment annotation'],
141'TABLE 8.26': ['SoundAnnotationDictionary', 'Additional entries specific to a sound annotation'],
142'TABLE 8.27': ['MovieAnnotationDictionary', 'Additional entries specific to a movie annotation'],
143'TABLE 8.28': ['WidgetAnnotationDictionary', 'Additional entries specific to a widget annotation'],
144'TABLE 8.29': ['ActionDictionary', 'Entries common to all action dictionaries'],
145'TABLE 8.30': ['AnnotationActionsDictionary', 'Entries in an annotation\'s additional-actions dictionary'],
146'TABLE 8.31': ['PageObjectActionsDictionary', 'Entries in a page object\'s additional-actions dictionary'],
147'TABLE 8.32': ['FormFieldActionsDictionary', 'Entries in a form field\'s additional-actions dictionary'],
148'TABLE 8.33': ['DocumentCatalogActionsDictionary', 'Entries in the document catalog\'s additional-actions dictionary'],
149'TABLE 8.35': ['GoToActionDictionary', 'Additional entries specific to a go-to action'],
150'TABLE 8.36': ['RemoteGoToActionDictionary', 'Additional entries specific to a remote go-to action'],
151'TABLE 8.37': ['LaunchActionDictionary', 'Additional entries specific to a launch action'],
152'TABLE 8.38': ['WindowsLaunchActionDictionary', 'Entries in a Windows launch parameter dictionary'],
153'TABLE 8.39': ['ThreadActionDictionary', 'Additional entries specific to a thread action'],
154'TABLE 8.40': ['URIActionDictionary', 'Additional entries specific to a URI action'],
155'TABLE 8.41': ['URIDictionary', 'Entry in a URI dictionary'],
156'TABLE 8.42': ['SoundActionDictionary', 'Additional entries specific to a sound action'],
157'TABLE 8.43': ['MovieActionDictionary', 'Additional entries specific to a movie action'],
158'TABLE 8.44': ['HideActionDictionary', 'Additional entries specific to a hide action'],
159'TABLE 8.46': ['NamedActionsDictionary', 'Additional entries specific to named actions'],
edisonn@google.com45327112013-06-13 20:02:29 +0000160'TABLE 8.47': ['InteractiveFormDictionary', 'Entries in the interactive form dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000161'TABLE 8.49': ['FieldDictionary', 'Entries common to all field dictionaries'],
162'TABLE 8.51': ['VariableTextFieldDictionary', 'Additional entries common to all fields containing variable text'],
edisonn@google.com45327112013-06-13 20:02:29 +0000163'TABLE 8.52': ['AppearanceCharacteristicsDictionary', 'Entries in an appearance characteristics dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000164'TABLE 8.54': ['CheckboxFieldDictionary', 'Additional entry specific to a checkbox field'],
165'TABLE 8.55': ['RadioButtonFieldDictionary', 'Additional entry specific to a radio button field'],
166'TABLE 8.57': ['TextFieldDictionary', 'Additional entry specific to a text field'],
167'TABLE 8.59': ['ChoiceFieldDictionary', 'Additional entries specific to a choice field'],
edisonn@google.com45327112013-06-13 20:02:29 +0000168'TABLE 8.60': ['SignatureDictionary', 'Entries in a signature dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000169'TABLE 8.61': ['SubmitFormActionDictionary', 'Additional entries specific to a submit-form action'],
170'TABLE 8.63': ['ResetFormActionDictionary', 'Additional entries specific to a reset-form action'],
171'TABLE 8.65': ['ImportDataActionDictionary', 'Additional entries specific to an import-data action'],
172'TABLE 8.66': ['JavascriptActionDictionary', 'Additional entries specific to a JavaScript action'],
173'TABLE 8.67': ['FDFTrailerDictionary', 'Entry in the FDF trailer dictionary'],
174'TABLE 8.68': ['FDFCatalogDictionary', 'Entries in the FDF catalog dictionary'],
175'TABLE 8.69': ['FDFDictionary', 'Entries in the FDF dictionary'],
176'TABLE 8.70': ['EncryptedEmbeddedFileStreamDictionary', 'Additional entry in an embedded file stream dictionary for an encrypted FDF file'],
edisonn@google.com45327112013-06-13 20:02:29 +0000177'TABLE 8.71': ['JavascriptDictionary', 'Entries in the JavaScript dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000178'TABLE 8.72': ['FDFFieldDictionary', 'Entries in an FDF field dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000179'TABLE 8.73': ['IconFitDictionary', 'Entries in an icon fit dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000180'TABLE 8.74': ['FDFPageDictionary', 'Entries in an FDF page dictionary'],
181'TABLE 8.75': ['FDFTemplateDictionary', 'Entries in an FDF template dictionary'],
182'TABLE 8.76': ['FDFNamedPageReferenceDictionary', 'Entries in an FDF named page reference dictionary'],
183'TABLE 8.77': ['FDFFileAnnotationDictionary', 'Additional entry for annotation dictionaries in an FDF file'],
184'TABLE 8.78': ['SoundObjectDictionary', 'Additional entries specific to a sound object'],
edisonn@google.com45327112013-06-13 20:02:29 +0000185'TABLE 8.79': ['MovieDictionary', 'Entries in a movie dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000186'TABLE 8.80': ['MovieActivationDictionary', 'Entries in a movie activation dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000187'TABLE 9.2': ['DocumentInformationDictionary', 'Entries in the document information dictionary'],
188'TABLE 9.3': ['MetadataStreamDictionary', 'Additional entries in a metadata stream dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000189'TABLE 9.4': ['ComponentsWithMetadataDictionary', 'Additional entry for components having metadata'],
190'TABLE 9.6': ['PagePieceDictionary', 'Entries in a page-piece dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000191'TABLE 9.7': ['ApplicationDataDictionary', 'Entries in an application data dictionary'],
192'TABLE 9.9': ['StructureTreeRootDictionary', 'Entries in the structure tree root'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000193'TABLE 9.10': ['StructureElementDictionary', 'Entries in a structure element dictionary'],
194'TABLE 9.11': ['MarkedContentReferenceDictionary', 'Entries in a marked-content reference dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000195'TABLE 9.12': ['ObjectReferenceDictionary', 'Entries in an object reference dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000196'TABLE 9.13': ['StructureElementAccessDictionary', 'Additional dictionary entries for structure element access'],
197'TABLE 9.14': ['AttributeObjectDictionary', 'Entry common to all attribute objects'],
edisonn@google.com45327112013-06-13 20:02:29 +0000198'TABLE 9.15': ['MarkInformationDictionary', 'Entry in the mark information dictionary'],
199'TABLE 9.16': ['ArtifactsDictionary', 'Property list entries for artifacts'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000200'TABLE 9.27': ['StandardStructureDictionary', 'Standard layout attributes common to all standard structure types'],
201'TABLE 9.28': ['BlockLevelStructureElementsDictionary', 'Additional standard layout attributes specific to block-level structure elements'],
202'TABLE 9.29': ['InlineLevelStructureElementsDictionary', 'Standard layout attributes specific to inline-level structure elements'],
edisonn@google.com45327112013-06-13 20:02:29 +0000203'TABLE 9.30': ['ListAttributeDictionary', 'Standard list attribute'],
204'TABLE 9.31': ['TableAttributesDictionary', 'Standard table attributes'],
205'TABLE 9.32': ['WebCaptureInformationDictionary', 'Entries in the Web Capture information dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000206'TABLE 9.33': ['WebCaptureDictionary', 'Entries common to all Web Capture content sets'],
207'TABLE 9.34': ['WebCapturePageSetDictionary', 'Additional entries specific to a Web Capture page set'],
208'TABLE 9.35': ['WebCaptureImageSetDictionary', 'Additional entries specific to a Web Capture image set'],
edisonn@google.com45327112013-06-13 20:02:29 +0000209'TABLE 9.36': ['SourceInformationDictionary', 'Entries in a source information dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000210'TABLE 9.37': ['URLAliasDictionary', 'Entries in a URL alias dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000211'TABLE 9.38': ['WebCaptureCommandDictionary', 'Entries in a Web Capture command dictionary'],
212'TABLE 9.40': ['WebCaptureCommandSettingsDictionary', 'Entries in a Web Capture command settings dictionary'],
213'TABLE 9.41': ['BoxColorInformationDictionary', 'Entries in a box color information dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000214'TABLE 9.42': ['BoxStyleDictionary', 'Entries in a box style dictionary'],
215'TABLE 9.43': ['PrinterMarkAnnotationDictionary', 'Additional entries specific to a printer\'s mark annotation'],
216'TABLE 9.44': ['PrinterMarkFormDictionary', 'Additional entries specific to a printer\'s mark form dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000217'TABLE 9.45': ['SeparationDictionary', 'Entries in a separation dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000218'TABLE 9.46': ['PDF_XOutputIntentDictionary', 'Entries in a PDF/X output intent dictionary'],
219'TABLE 9.47': ['TrapNetworkAnnotationDictionary', 'Additional entries specific to a trap network annotation'],
220'TABLE 9.48': ['TrapNetworkAppearanceStreamDictionary', 'Additional entries specific to a trap network appearance stream'],
edisonn@google.com45327112013-06-13 20:02:29 +0000221'TABLE 9.49': ['OpiVersionDictionary', 'Entry in an OPI version dictionary'],
222}
223
224
edisonn@google.com07f01472013-06-13 17:24:54 +0000225def acceptType(val):
226 global knownTypes
227
228 ret = val
229
230 for item in knownTypes:
231 ret = ret.replace(item, '')
232
233 return ret == ''
234
235
236def inTable():
237 global tableHeaderFound
238 return tableHeaderFound
239
240def tableDescriptionFound(desc):
241 global table
242 table = desc.strip()
243
244def tableHasHeader():
245 global table
246 global tableHeaderFound
247
248 tableHeaderFound = True
249 #print table
250
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000251def fix(val):
252 ret = val
253
254 # fix unicode chars
255 ret = ret.replace(unicode('fi', 'utf8'), 'fi')
256 ret = ret.replace(u'\u201c', '\"')
257 ret = ret.replace(u'\u201d', '\"')
258 ret = ret.replace(u'\u2019', '\'')
259 ret = ret.replace(u'\ufb02', 'fl')
260 ret = ret.replace(u'\xae', '(R)')
261 ret = ret.replace(u'\u2026', '...')
262 ret = ret.replace(u'\xd7', 'x')
263 ret = ret.replace(u'\u2212', '-')
264 ret = ret.replace(u'\u2264', '<=')
265 ret = ret.replace(u'\u2014', '-')
266 ret = ret.replace(u'\u2013', '\'')
267 ret = ret.replace(u'\u2022', '*')
268 ret = ret.replace(u'\xb5', 'mu')
269 ret = ret.replace(u'\xf7', '/')
270
271
272 # how enable to emit this a python string
273 ret = ret.replace('\'', '\\\'')
274 ret = ret.replace('\n', '\\n')
275
276
277 return ret
edisonn@google.com07f01472013-06-13 17:24:54 +0000278
279def commitRow():
280 global columnValues
edisonn@google.com45327112013-06-13 20:02:29 +0000281 global emitedDitionaryName
282 global table
283 global tableToClassName
284
285 if columnValues == None:
286 return
287
edisonn@google.com07f01472013-06-13 17:24:54 +0000288 #print columnValues
289
290 lastClosed = columnValues[2].find(')')
291 if lastClosed < 0:
292 print 'ERRRRRRRRRRRRRRROR'
293 print columnValues
294 return
295
296 spec = columnValues[2][:lastClosed + 1]
297 spec = spec.replace('(', ';')
298 spec = spec.replace(')', ';')
299 spec = spec.strip(';')
300
301 specs = spec.split(';')
302
303 # clearly required, but it can be required with conditions. don't handle this ones here, but manually
304 required = specs[0] == 'Required'
305
306 inheritable = False
307 version = ''
308 for s in specs:
309 if s.strip() == 'inheritable' or s.strip() == 'Inheritable':
310 inheritable = True
311 elif re.match('^PDF [0-9]*[\.[0-9]*]*', s.strip()):
312 version = s.strip()
313 elif s != 'Required':
314 required = False
315
edisonn@google.com45327112013-06-13 20:02:29 +0000316 #print spec
317 #print specs
318 #print required
319 #print inheritable
320 #print version
321 #print columnValues
322
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000323 columnValues = [fix(columnValues[0]), fix(columnValues[1]), fix(columnValues[2])]
edisonn@google.com45327112013-06-13 20:02:29 +0000324
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000325 tableKey = re.search('(TABLE [0-9].[0-9][0-9]?)', table).group(1)
326
edisonn@google.com45327112013-06-13 20:02:29 +0000327 if emitedDitionaryName == '':
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000328 table = fix(table)
edisonn@google.com45327112013-06-13 20:02:29 +0000329
330 #print table
331 emitedDitionaryName = 'foo'
332 e = re.search('[Entries|Entry] in [a-z]* (.* dictionary)', table)
333 a = re.search('Additional [a-z]* in a[n]? (.* dictionary)', table)
334 s = re.search('Additional [a-z]* (.*)', table)
335 c = re.search('[Entries|Entry] common to all (.*)', table)
336 o1 = re.search('Optional parameter[s]? for the (.*)', table)
337 o2 = re.search('Optional parameter[s]? for (.*)', table)
338 t = re.search('.*ntries in [a-z]* (.*)', table)
339
340 r = re.search('Property list entries for (.*)', table)
341 st = re.search('Standard (.*)', table)
342
343 if e:
344 emitedDitionaryName = e.group(1).title().replace(' ', '')
345 #print emitedDitionaryName
346 elif a:
347 emitedDitionaryName = a.group(1).title().replace(' ', '')
348 #print emitedDitionaryName
349 elif s:
350 emitedDitionaryName = s.group(1).title().replace(' ', '')
351 #print emitedDitionaryName
352 elif c:
353 emitedDitionaryName = c.group(1).title().replace(' ', '') + 'Common'
354 #print emitedDitionaryName
355 elif o1:
356 emitedDitionaryName = o1.group(1).title().replace(' ', '') + 'OptionalParameters'
357 #print emitedDitionaryName
358 elif o2:
359 emitedDitionaryName = o2.group(1).title().replace(' ', '') + 'OptionalParameters'
360 #print emitedDitionaryName
361 elif t:
362 emitedDitionaryName = t.group(1).title().replace(' ', '') + 'Dictionary'
363 #print emitedDitionaryName
364 elif r:
365 emitedDitionaryName = r.group(1).title().replace(' ', '') + 'Dictionary'
366 #print emitedDitionaryName
367 elif st:
368 emitedDitionaryName = st.group(1).title().replace(' ', '') + 'Dictionary'
369 #print emitedDitionaryName
370 #else:
371 #print table
372
edisonn@google.com45327112013-06-13 20:02:29 +0000373 #print tableKey
374 #print('\'' + tableKey + '\': [\'' + emitedDitionaryName + '\', \'' + table[len(tableKey) + 1:] + '\'],')
375
376 emitedDitionaryName = tableToClassName[tableKey][0]
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000377 comment = fix(tableToClassName[tableKey][1])
378
379 if len(tableToClassName[tableKey]) >= 3 and tableToClassName[tableKey][2] != '':
380 print(' pdfspec.addClass(\'' + emitedDitionaryName + '\', \'' + tableToClassName[tableKey][2] + '\', \'' + comment + '\')\\')
381 else:
382 print(' pdfspec.addClass(\'' + emitedDitionaryName + '\', \'Dictionary\', \'' + comment + '\')\\')
383
384 if len(tableToClassName[tableKey]) >= 4 and columnValues[0] in tableToClassName[tableKey][3]:
385 required = True
edisonn@google.com45327112013-06-13 20:02:29 +0000386
387 if required:
388 print(' .required(\'NULL\')\\')
389 else:
390 print(' .optional()\\')
391
392 print(' .field(\'' + columnValues[0] + '\')\\')
393 print(' .name(\'' + columnValues[0] + '\')\\')
394 print(' .type(\'' + columnValues[1] + '\')\\')
395 print(' .comment(\'' + columnValues[2] + '\')\\')
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000396
397 if len(tableToClassName[tableKey]) >= 4 and columnValues[0] in tableToClassName[tableKey][3]:
398 print(' .must(' + tableToClassName[tableKey][3][columnValues[0]] + ')\\')
399
edisonn@google.com45327112013-06-13 20:02:29 +0000400 print(' .done().done()\\')
401
402
403 columnValues = None
edisonn@google.com07f01472013-06-13 17:24:54 +0000404
405def newRow(first, second, third):
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000406 global columnValues
edisonn@google.com07f01472013-06-13 17:24:54 +0000407 columnValues = [first.rstrip(), second.rstrip(), third.rstrip()]
408
409def appendRow(second, third):
410 global columnValues
411 if second.rstrip() != '':
412 columnValues[1] = columnValues[1] + ' ' + second.rstrip()
413 if third.rstrip() != '':
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000414 columnValues[2] = columnValues[2] + '\n' + third.rstrip()
edisonn@google.com07f01472013-06-13 17:24:54 +0000415
416def rebaseTable(line):
417 global knownTypes
418 global columnWidth
419
420 words = line.split()
421
422 if len(words) < 3:
423 return False
424
425 i = 1
426 while i < len(words) - 1 and words[i] in knownTypes:
427 i = i + 1
428
429 if words[i].startswith('(Optional') or words[i].startswith('(Required'):
430 commitRow()
431
432 columnWidth[0] = line.find(words[1])
433
434 if words[i].startswith('(Optional'):
435 columnWidth[1] = line.find('(Optional') - columnWidth[0]
436 if words[i].startswith('(Required'):
437 columnWidth[1] = line.find('(Required') - columnWidth[0]
438 return True
439
440 return False
441
442
443def stopTable():
444 global tableHeaderFound
edisonn@google.com45327112013-06-13 20:02:29 +0000445 global emitedDitionaryName
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000446
447 if not inTable():
448 return
449
edisonn@google.com07f01472013-06-13 17:24:54 +0000450 commitRow()
451 tableHeaderFound = False
edisonn@google.com45327112013-06-13 20:02:29 +0000452 emitedDitionaryName = ''
453 print(' .done()')
454 print
edisonn@google.com07f01472013-06-13 17:24:54 +0000455
456
457def killTable():
458 return
459
460def processLine(line):
461 global lines
462 global tableLine
463 global tableRow
464 global columnWidth
465 global columnValues
466 global mustFollowTableHeader
467
468 lines = lines + 1
469
470 line = unicode(line, 'utf8')
471
472 striped = line.rstrip()
473
474 words = line.split()
475 if len(words) == 0:
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000476 stopTable()
edisonn@google.com07f01472013-06-13 17:24:54 +0000477 return
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000478
479 isTableHeader = re.search('^[\s]*(TABLE [0-9].[0-9][0-9]?)', striped)
480 if isTableHeader:
481 stopTable()
edisonn@google.com07f01472013-06-13 17:24:54 +0000482 tableDescriptionFound(striped)
483 mustFollowTableHeader = True
484 return
485
486 if mustFollowTableHeader:
487 mustFollowTableHeader = False
488 if len(words) != 3:
489 killTable()
490
491 # TODO(edisonn): support for generic table!
492 if words[0] != 'KEY' or words[1] != 'TYPE' or words[2] != 'VALUE':
493 killTable()
494 return
495
496 tableHasHeader()
497 columnWidth = [0, 0, 0]
498 columnWidth[0] = striped.index('TYPE')
499 columnWidth[1] = striped.index('VALUE') - striped.index('TYPE')
500 columnWidth[2] = 0
501 return
502
503 if inTable():
504 tableLine = tableLine + 1
505 first = striped[0 : columnWidth[0]]
506 second = striped[columnWidth[0] : columnWidth[0] + columnWidth[1]]
507 third = striped[columnWidth[0] + columnWidth[1] :]
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000508
509
510
511
edisonn@google.com07f01472013-06-13 17:24:54 +0000512 if tableLine == 1:
513 if third[0] != '(':
514 killTable()
515 return
516
517 newRow(first, second, third)
518 return
519
520 if rebaseTable(striped):
521 first = striped[0 : columnWidth[0]]
522 second = striped[columnWidth[0] : columnWidth[0] + columnWidth[1]]
523 third = striped[columnWidth[0] + columnWidth[1] :]
524
525 first = first.rstrip()
526 second = second.rstrip()
527 third = third.rstrip()
528
529 if first == '' and second == '' and third != '':
530 appendRow(second, third)
531 return
532
533 if len(first.split()) > 1:
534 stopTable()
535 return
536
537 if first != '' and first[0] == ' ':
538 stopTable()
539 return
540
541 if first != '' and second != '' and third == '':
542 stopTable()
543 return
544
545 if first == '' and second != '' and second[0] != ' ':
546 if acceptType(second):
547 appendRow(second, third)
548 else:
549 stopTable()
550 return
551
552 if first != '' and second != '' and third[0] != '(':
553 stopTable()
554 return
555
556 if first == '' and second != '' and second[0] == ' ':
557 stopTable()
558 return
559
560 if first != '' and second != '' and third[0] == '(':
561 commitRow()
562 newRow(first, second, third)
563 return
564
565
566def generateDef():
567 global lines
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000568
569 print 'import datatypes'
570 print
571
572 print 'def buildPdfSpec(pdfspec):'
573
edisonn@google.com07f01472013-06-13 17:24:54 +0000574 for line in sys.stdin:
575 processLine(line)
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000576
577 # close last table if it was not closed already
578 stopTable()
579
580 print
581
edisonn@google.com45327112013-06-13 20:02:29 +0000582 #print lines
edisonn@google.com07f01472013-06-13 17:24:54 +0000583
584if '__main__' == __name__:
585 sys.exit(generateDef())