blob: f524d855acab773b9bf70f27dfbc3c91908bf29e [file] [log] [blame]
edisonn@google.com07f01472013-06-13 17:24:54 +00001#!/usr/local/bin/python
2# coding: utf-8
3
4import sys
5import re
6
7# TODO(edisonn): put processed part of file in a new file
8# put unprocessed part, in a new file, so we see what we miss
9# keep blank lines, and generate a version without the blank lines
10
edisonn@google.coma2fab9d2013-06-14 19:22:19 +000011#TODO (edisonn): deal manually with tables that don't have "KEY TYPE VALUE' header, e.g.
edisonn@google.com07f01472013-06-13 17:24:54 +000012# TABLE 7.11 Restrictions on the entries in a soft-mask image dictionary
13#KEY RESTRICTION
14
15
16
17lines = 0
18table = ''
19tableHeaderFound = False
20tableLine = 0
21tableRow = 0
22columnWidth = []
edisonn@google.com45327112013-06-13 20:02:29 +000023columnValues = None
edisonn@google.com07f01472013-06-13 17:24:54 +000024mustFollowTableHeader = False
edisonn@google.com45327112013-06-13 20:02:29 +000025emitedDitionaryName = ''
edisonn@google.com07f01472013-06-13 17:24:54 +000026
27knownTypes = {
28'(any)',
29unicode('undefined', 'utf8'),
30'(undefined)',
31'(various)',
32'array',
33'or',
34'boolean',
35'date',
36'dictionary',
37'function',
38'integer',
39unicode('file', 'utf8'),
40'file',
41unicode('specification', 'utf8'),
42'specification',
43'name',
44'tree',
45'number',
46'rectangle',
47'stream',
48'string',
49'text',
50',',
51' '
52}
53
edisonn@google.coma2fab9d2013-06-14 19:22:19 +000054# TODO(edisonn): add a third element in the vector, the base class, by default it is Dictionary
55# TODO(edisonn): add overrides for types map<field_name, type_name>
56# e.g. ,{'Resources', 'ResourceDictionary'}
57# TODO(edisonn): can be added one by one, or extracted from documentation
58
edisonn@google.com45327112013-06-13 20:02:29 +000059tableToClassName = {
edisonn@google.coma2fab9d2013-06-14 19:22:19 +000060'TABLE 3.4': ['StreamCommonDictionary', 'Entries common to all stream dictionaries'],
61'TABLE 3.7': ['LzwdecodeAndFlatedecodeFiltersDictionary', 'Optional parameters for LZWDecode and FlateDecode filters'],
62'TABLE 3.9': ['CcittfaxdecodeFilterDictionary', 'Optional parameters for the CCITTFaxDecode filter'],
63'TABLE 3.10': ['Jbig2DecodeFilterDictionary', 'Optional parameter for the JBIG2Decode filter'],
64'TABLE 3.11': ['DctdecodeFilterDictionary', 'Optional parameter for the DCTDecode filter'],
edisonn@google.com45327112013-06-13 20:02:29 +000065'TABLE 3.12': ['FileTrailerDictionary', 'Entries in the file trailer dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +000066'TABLE 3.13': ['EncryptionCommonDictionary', 'Entries common to all encryption dictionaries'],
67'TABLE 3.14': ['StandardSecurityHandlerDictionary', 'Additional encryption dictionary entries for the standard security handler'],
edisonn@google.com45327112013-06-13 20:02:29 +000068'TABLE 3.16': ['CatalogDictionary', 'Entries in the catalog dictionary'],
69'TABLE 3.17': ['PageTreeNodeDictionary', 'Required entries in a page tree node'],
70'TABLE 3.18': ['PageObjectDictionary', 'Entries in a page object'],
71'TABLE 3.19': ['NameDictionary', 'Entries in the name dictionary'],
72'TABLE 3.21': ['ResourceDictionary', 'Entries in a resource dictionary'],
73'TABLE 3.23': ['NameTreeNodeDictionary', 'Entries in a name tree node dictionary'],
74'TABLE 3.25': ['NumberTreeNodeDictionary', 'Entries in a number tree node dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +000075'TABLE 3.26': ['FunctionCommonDictionary', 'Entries common to all function dictionaries'],
76'TABLE 3.27': ['Type0FunctionDictionary', 'Additional entries specific to a type 0 function dictionary'],
77'TABLE 3.28': ['Type2FunctionDictionary', 'Additional entries specific to a type 2 function dictionary'],
78'TABLE 3.29': ['Type3FunctionDictionary', 'Additional entries specific to a type 3 function dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +000079'TABLE 3.32': ['FileSpecificationDictionary', 'Entries in a file specification dictionary'],
80'TABLE 3.33': ['EmbeddedFileStreamDictionary', 'Additional entries in an embedded file stream dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +000081'TABLE 3.34': ['EmbeddedFileParameterDictionary', 'Entries in an embedded file parameter dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +000082'TABLE 3.35': ['MacOsFileInformationDictionary', 'Entries in a Mac OS file information dictionary'],
edisonn@google.com68d15c82013-06-17 20:46:27 +000083'TABLE 4.8': ['GraphicsStateDictionary', 'Entries in a graphics state parameter dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +000084'TABLE 4.13': ['CalgrayColorSpaceDictionary', 'Entries in a CalGray color space dictionary'],
85'TABLE 4.14': ['CalrgbColorSpaceDictionary', 'Entries in a CalRGB color space dictionary'],
86'TABLE 4.15': ['LabColorSpaceDictionary', 'Entries in a Lab color space dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +000087'TABLE 4.16': ['IccProfileStreamDictionary', 'Additional entries specific to an ICC profile stream dictionary'],
88'TABLE 4.20': ['DeviceNColorSpaceDictionary', 'Entry in a DeviceN color space attributes dictionary'],
89'TABLE 4.22': ['Type1PatternDictionary', 'Additional entries specific to a type 1 pattern dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +000090'TABLE 4.23': ['Type2PatternDictionary', 'Entries in a type 2 pattern dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +000091'TABLE 4.25': ['ShadingDictionary', 'Entries common to all shading dictionaries'],
92'TABLE 4.26': ['Type1ShadingDictionary', 'Additional entries specific to a type 1 shading dictionary', 'ShadingDictionary'],
93'TABLE 4.27': ['Type2ShadingDictionary', 'Additional entries specific to a type 2 shading dictionary', 'ShadingDictionary'],
94'TABLE 4.28': ['Type3ShadingDictionary', 'Additional entries specific to a type 3 shading dictionary', 'ShadingDictionary'],
95'TABLE 4.29': ['Type4ShadingDictionary', 'Additional entries specific to a type 4 shading dictionary', 'ShadingDictionary'],
96'TABLE 4.30': ['Type5ShadingDictionary', 'Additional entries specific to a type 5 shading dictionary', 'ShadingDictionary'],
97'TABLE 4.31': ['Type6ShadingDictionary', 'Additional entries specific to a type 6 shading dictionary', 'ShadingDictionary'],
edisonn@google.comff278442013-06-21 21:03:15 +000098'TABLE 4.35': ['ImageDictionary', 'Additional entries specific to an image dictionary', 'XObjectDictionary', {'Subtype': '[datatypes.PdfName(\'Image\')]'}],
edisonn@google.com45327112013-06-13 20:02:29 +000099'TABLE 4.37': ['AlternateImageDictionary', 'Entries in an alternate image dictionary'],
edisonn@google.comff278442013-06-21 21:03:15 +0000100'TABLE 4.41': ['Type1FormDictionary', 'Additional entries specific to a type 1 form dictionary', 'XObjectDictionary', {'Subtype': '[datatypes.PdfName(\'Form\')]'}],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000101'TABLE 4.42': ['GroupAttributesDictionary', 'Entries common to all group attributes dictionaries'],
edisonn@google.com45327112013-06-13 20:02:29 +0000102'TABLE 4.43': ['ReferenceDictionary', 'Entries in a reference dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000103'TABLE 4.44': ['PSXobjectDictionary', 'Additional entries specific to a PostScript XObject dictionary'],
edisonn@google.comff278442013-06-21 21:03:15 +0000104'TABLE 5.8': ['Type1FontDictionary', 'Entries in a Type 1 font dictionary', 'FontDictionary', {'Subtype': '[datatypes.PdfName(\'Type1\')]'}],
105'TABLE 5.9': ['Type3FontDictionary', 'Entries in a Type 3 font dictionary', 'Type0FontDictionary', {'Subtype': '[datatypes.PdfName(\'Type3\')]'}],
edisonn@google.com45327112013-06-13 20:02:29 +0000106'TABLE 5.11': ['EncodingDictionary', 'Entries in an encoding dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000107'TABLE 5.12': ['CIDSystemInfoDictionary', 'Entries in a CIDSystemInfo dictionary'],
edisonn@google.comff278442013-06-21 21:03:15 +0000108'TABLE 5.13': ['CIDFontDictionary', 'Entries in a CIDFont dictionary', 'FontDictionary', {'Subtype': '[datatypes.PdfName(\'CIDFontType0\'), datatypes.PdfName(\'CIDFontType2\')]'}],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000109'TABLE 5.16': ['CMapDictionary', 'Additional entries in a CMap dictionary'],
edisonn@google.comff278442013-06-21 21:03:15 +0000110'TABLE 5.17': ['Type0FontDictionary', 'Entries in a Type 0 font dictionary', 'FontDictionary', {'Subtype': '[datatypes.PdfName(\'Type0\')]'}],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000111'TABLE 5.18': ['FontDescriptorDictionary', 'Entries common to all font descriptors'],
112'TABLE 5.20': ['CIDFontDescriptorDictionary', 'Additional font descriptor entries for CIDFonts'],
edisonn@google.com45327112013-06-13 20:02:29 +0000113'TABLE 5.23': ['EmbeddedFontStreamDictionary', 'Additional entries in an embedded font stream dictionary'],
114'TABLE 6.3': ['Type1HalftoneDictionary', 'Entries in a type 1 halftone dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000115'TABLE 6.4': ['Type6HalftoneDictionary', 'Additional entries specific to a type 6 halftone dictionary'],
116'TABLE 6.5': ['Type10HalftoneDictionary', 'Additional entries specific to a type 10 halftone dictionary'],
117'TABLE 6.6': ['Type16HalftoneDictionary', 'Additional entries specific to a type 16 halftone dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000118'TABLE 6.7': ['Type5HalftoneDictionary', 'Entries in a type 5 halftone dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000119'TABLE 7.10': ['SoftMaskDictionary', 'Entries in a soft-mask dictionary'],
120'TABLE 7.12': ['SoftMaskImageDictionary', 'Additional entry in a soft-mask image dictionary'],
121'TABLE 7.13': ['TransparencyGroupDictionary', 'Additional entries specific to a transparency group attributes dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000122'TABLE 8.1': ['ViewerPreferencesDictionary', 'Entries in a viewer preferences dictionary'],
123'TABLE 8.3': ['OutlineDictionary', 'Entries in the outline dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000124'TABLE 8.4': ['OutlineItemDictionary', 'Entries in an outline item dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000125'TABLE 8.6': ['PageLabelDictionary', 'Entries in a page label dictionary'],
126'TABLE 8.7': ['ThreadDictionary', 'Entries in a thread dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000127'TABLE 8.8': ['BeadDictionary', 'Entries in a bead dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000128'TABLE 8.9': ['TransitionDictionary', 'Entries in a transition dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000129'TABLE 8.10': ['AnnotationDictionary', 'Entries common to all annotation dictionaries'],
edisonn@google.com45327112013-06-13 20:02:29 +0000130'TABLE 8.12': ['BorderStyleDictionary', 'Entries in a border style dictionary'],
131'TABLE 8.13': ['AppearanceDictionary', 'Entries in an appearance dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000132'TABLE 8.15': ['TextAnnotationDictionary', 'Additional entries specific to a text annotation'],
133'TABLE 8.16': ['ALinkAnnotationDictionary', 'Additional entries specific to a link annotation'],
134'TABLE 8.17': ['FreeTextAnnotationDictionary', 'Additional entries specific to a free text annotation'],
135'TABLE 8.18': ['LineAnnotationDictionary', 'Additional entries specific to a line annotation'],
136'TABLE 8.20': ['SquareOrCircleAnnotation', 'Additional entries specific to a square or circle annotation'],
137'TABLE 8.21': ['MarkupAnnotationsDictionary', 'Additional entries specific to markup annotations'],
138'TABLE 8.22': ['RubberStampAnnotationDictionary', 'Additional entries specific to a rubber stamp annotation'],
139'TABLE 8.23': ['InkAnnotationDictionary', 'Additional entries specific to an ink annotation'],
140'TABLE 8.24': ['PopUpAnnotationDictionary', 'Additional entries specific to a pop-up annotation'],
141'TABLE 8.25': ['FileAttachmentAnnotationDictionary', 'Additional entries specific to a file attachment annotation'],
142'TABLE 8.26': ['SoundAnnotationDictionary', 'Additional entries specific to a sound annotation'],
143'TABLE 8.27': ['MovieAnnotationDictionary', 'Additional entries specific to a movie annotation'],
144'TABLE 8.28': ['WidgetAnnotationDictionary', 'Additional entries specific to a widget annotation'],
145'TABLE 8.29': ['ActionDictionary', 'Entries common to all action dictionaries'],
146'TABLE 8.30': ['AnnotationActionsDictionary', 'Entries in an annotation\'s additional-actions dictionary'],
147'TABLE 8.31': ['PageObjectActionsDictionary', 'Entries in a page object\'s additional-actions dictionary'],
148'TABLE 8.32': ['FormFieldActionsDictionary', 'Entries in a form field\'s additional-actions dictionary'],
149'TABLE 8.33': ['DocumentCatalogActionsDictionary', 'Entries in the document catalog\'s additional-actions dictionary'],
150'TABLE 8.35': ['GoToActionDictionary', 'Additional entries specific to a go-to action'],
151'TABLE 8.36': ['RemoteGoToActionDictionary', 'Additional entries specific to a remote go-to action'],
152'TABLE 8.37': ['LaunchActionDictionary', 'Additional entries specific to a launch action'],
153'TABLE 8.38': ['WindowsLaunchActionDictionary', 'Entries in a Windows launch parameter dictionary'],
154'TABLE 8.39': ['ThreadActionDictionary', 'Additional entries specific to a thread action'],
155'TABLE 8.40': ['URIActionDictionary', 'Additional entries specific to a URI action'],
156'TABLE 8.41': ['URIDictionary', 'Entry in a URI dictionary'],
157'TABLE 8.42': ['SoundActionDictionary', 'Additional entries specific to a sound action'],
158'TABLE 8.43': ['MovieActionDictionary', 'Additional entries specific to a movie action'],
159'TABLE 8.44': ['HideActionDictionary', 'Additional entries specific to a hide action'],
160'TABLE 8.46': ['NamedActionsDictionary', 'Additional entries specific to named actions'],
edisonn@google.com45327112013-06-13 20:02:29 +0000161'TABLE 8.47': ['InteractiveFormDictionary', 'Entries in the interactive form dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000162'TABLE 8.49': ['FieldDictionary', 'Entries common to all field dictionaries'],
163'TABLE 8.51': ['VariableTextFieldDictionary', 'Additional entries common to all fields containing variable text'],
edisonn@google.com45327112013-06-13 20:02:29 +0000164'TABLE 8.52': ['AppearanceCharacteristicsDictionary', 'Entries in an appearance characteristics dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000165'TABLE 8.54': ['CheckboxFieldDictionary', 'Additional entry specific to a checkbox field'],
166'TABLE 8.55': ['RadioButtonFieldDictionary', 'Additional entry specific to a radio button field'],
167'TABLE 8.57': ['TextFieldDictionary', 'Additional entry specific to a text field'],
168'TABLE 8.59': ['ChoiceFieldDictionary', 'Additional entries specific to a choice field'],
edisonn@google.com45327112013-06-13 20:02:29 +0000169'TABLE 8.60': ['SignatureDictionary', 'Entries in a signature dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000170'TABLE 8.61': ['SubmitFormActionDictionary', 'Additional entries specific to a submit-form action'],
171'TABLE 8.63': ['ResetFormActionDictionary', 'Additional entries specific to a reset-form action'],
172'TABLE 8.65': ['ImportDataActionDictionary', 'Additional entries specific to an import-data action'],
173'TABLE 8.66': ['JavascriptActionDictionary', 'Additional entries specific to a JavaScript action'],
174'TABLE 8.67': ['FDFTrailerDictionary', 'Entry in the FDF trailer dictionary'],
175'TABLE 8.68': ['FDFCatalogDictionary', 'Entries in the FDF catalog dictionary'],
176'TABLE 8.69': ['FDFDictionary', 'Entries in the FDF dictionary'],
177'TABLE 8.70': ['EncryptedEmbeddedFileStreamDictionary', 'Additional entry in an embedded file stream dictionary for an encrypted FDF file'],
edisonn@google.com45327112013-06-13 20:02:29 +0000178'TABLE 8.71': ['JavascriptDictionary', 'Entries in the JavaScript dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000179'TABLE 8.72': ['FDFFieldDictionary', 'Entries in an FDF field dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000180'TABLE 8.73': ['IconFitDictionary', 'Entries in an icon fit dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000181'TABLE 8.74': ['FDFPageDictionary', 'Entries in an FDF page dictionary'],
182'TABLE 8.75': ['FDFTemplateDictionary', 'Entries in an FDF template dictionary'],
183'TABLE 8.76': ['FDFNamedPageReferenceDictionary', 'Entries in an FDF named page reference dictionary'],
184'TABLE 8.77': ['FDFFileAnnotationDictionary', 'Additional entry for annotation dictionaries in an FDF file'],
185'TABLE 8.78': ['SoundObjectDictionary', 'Additional entries specific to a sound object'],
edisonn@google.com45327112013-06-13 20:02:29 +0000186'TABLE 8.79': ['MovieDictionary', 'Entries in a movie dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000187'TABLE 8.80': ['MovieActivationDictionary', 'Entries in a movie activation dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000188'TABLE 9.2': ['DocumentInformationDictionary', 'Entries in the document information dictionary'],
189'TABLE 9.3': ['MetadataStreamDictionary', 'Additional entries in a metadata stream dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000190'TABLE 9.4': ['ComponentsWithMetadataDictionary', 'Additional entry for components having metadata'],
191'TABLE 9.6': ['PagePieceDictionary', 'Entries in a page-piece dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000192'TABLE 9.7': ['ApplicationDataDictionary', 'Entries in an application data dictionary'],
193'TABLE 9.9': ['StructureTreeRootDictionary', 'Entries in the structure tree root'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000194'TABLE 9.10': ['StructureElementDictionary', 'Entries in a structure element dictionary'],
195'TABLE 9.11': ['MarkedContentReferenceDictionary', 'Entries in a marked-content reference dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000196'TABLE 9.12': ['ObjectReferenceDictionary', 'Entries in an object reference dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000197'TABLE 9.13': ['StructureElementAccessDictionary', 'Additional dictionary entries for structure element access'],
198'TABLE 9.14': ['AttributeObjectDictionary', 'Entry common to all attribute objects'],
edisonn@google.com45327112013-06-13 20:02:29 +0000199'TABLE 9.15': ['MarkInformationDictionary', 'Entry in the mark information dictionary'],
200'TABLE 9.16': ['ArtifactsDictionary', 'Property list entries for artifacts'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000201'TABLE 9.27': ['StandardStructureDictionary', 'Standard layout attributes common to all standard structure types'],
202'TABLE 9.28': ['BlockLevelStructureElementsDictionary', 'Additional standard layout attributes specific to block-level structure elements'],
203'TABLE 9.29': ['InlineLevelStructureElementsDictionary', 'Standard layout attributes specific to inline-level structure elements'],
edisonn@google.com45327112013-06-13 20:02:29 +0000204'TABLE 9.30': ['ListAttributeDictionary', 'Standard list attribute'],
205'TABLE 9.31': ['TableAttributesDictionary', 'Standard table attributes'],
206'TABLE 9.32': ['WebCaptureInformationDictionary', 'Entries in the Web Capture information dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000207'TABLE 9.33': ['WebCaptureDictionary', 'Entries common to all Web Capture content sets'],
208'TABLE 9.34': ['WebCapturePageSetDictionary', 'Additional entries specific to a Web Capture page set'],
209'TABLE 9.35': ['WebCaptureImageSetDictionary', 'Additional entries specific to a Web Capture image set'],
edisonn@google.com45327112013-06-13 20:02:29 +0000210'TABLE 9.36': ['SourceInformationDictionary', 'Entries in a source information dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000211'TABLE 9.37': ['URLAliasDictionary', 'Entries in a URL alias dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000212'TABLE 9.38': ['WebCaptureCommandDictionary', 'Entries in a Web Capture command dictionary'],
213'TABLE 9.40': ['WebCaptureCommandSettingsDictionary', 'Entries in a Web Capture command settings dictionary'],
214'TABLE 9.41': ['BoxColorInformationDictionary', 'Entries in a box color information dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000215'TABLE 9.42': ['BoxStyleDictionary', 'Entries in a box style dictionary'],
216'TABLE 9.43': ['PrinterMarkAnnotationDictionary', 'Additional entries specific to a printer\'s mark annotation'],
217'TABLE 9.44': ['PrinterMarkFormDictionary', 'Additional entries specific to a printer\'s mark form dictionary'],
edisonn@google.com45327112013-06-13 20:02:29 +0000218'TABLE 9.45': ['SeparationDictionary', 'Entries in a separation dictionary'],
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000219'TABLE 9.46': ['PDF_XOutputIntentDictionary', 'Entries in a PDF/X output intent dictionary'],
220'TABLE 9.47': ['TrapNetworkAnnotationDictionary', 'Additional entries specific to a trap network annotation'],
221'TABLE 9.48': ['TrapNetworkAppearanceStreamDictionary', 'Additional entries specific to a trap network appearance stream'],
edisonn@google.com45327112013-06-13 20:02:29 +0000222'TABLE 9.49': ['OpiVersionDictionary', 'Entry in an OPI version dictionary'],
223}
224
225
edisonn@google.com07f01472013-06-13 17:24:54 +0000226def acceptType(val):
227 global knownTypes
228
229 ret = val
230
231 for item in knownTypes:
232 ret = ret.replace(item, '')
233
234 return ret == ''
235
236
237def inTable():
238 global tableHeaderFound
239 return tableHeaderFound
240
241def tableDescriptionFound(desc):
242 global table
243 table = desc.strip()
244
245def tableHasHeader():
246 global table
247 global tableHeaderFound
248
249 tableHeaderFound = True
250 #print table
251
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000252def fix(val):
253 ret = val
254
255 # fix unicode chars
256 ret = ret.replace(unicode('fi', 'utf8'), 'fi')
257 ret = ret.replace(u'\u201c', '\"')
258 ret = ret.replace(u'\u201d', '\"')
259 ret = ret.replace(u'\u2019', '\'')
260 ret = ret.replace(u'\ufb02', 'fl')
261 ret = ret.replace(u'\xae', '(R)')
262 ret = ret.replace(u'\u2026', '...')
263 ret = ret.replace(u'\xd7', 'x')
264 ret = ret.replace(u'\u2212', '-')
265 ret = ret.replace(u'\u2264', '<=')
266 ret = ret.replace(u'\u2014', '-')
267 ret = ret.replace(u'\u2013', '\'')
268 ret = ret.replace(u'\u2022', '*')
269 ret = ret.replace(u'\xb5', 'mu')
270 ret = ret.replace(u'\xf7', '/')
271
272
273 # how enable to emit this a python string
274 ret = ret.replace('\'', '\\\'')
275 ret = ret.replace('\n', '\\n')
276
277
278 return ret
edisonn@google.com07f01472013-06-13 17:24:54 +0000279
280def commitRow():
281 global columnValues
edisonn@google.com45327112013-06-13 20:02:29 +0000282 global emitedDitionaryName
283 global table
284 global tableToClassName
285
286 if columnValues == None:
287 return
288
edisonn@google.com07f01472013-06-13 17:24:54 +0000289 #print columnValues
290
291 lastClosed = columnValues[2].find(')')
292 if lastClosed < 0:
293 print 'ERRRRRRRRRRRRRRROR'
294 print columnValues
295 return
296
297 spec = columnValues[2][:lastClosed + 1]
298 spec = spec.replace('(', ';')
299 spec = spec.replace(')', ';')
300 spec = spec.strip(';')
301
302 specs = spec.split(';')
303
304 # clearly required, but it can be required with conditions. don't handle this ones here, but manually
305 required = specs[0] == 'Required'
306
307 inheritable = False
308 version = ''
309 for s in specs:
310 if s.strip() == 'inheritable' or s.strip() == 'Inheritable':
311 inheritable = True
312 elif re.match('^PDF [0-9]*[\.[0-9]*]*', s.strip()):
313 version = s.strip()
314 elif s != 'Required':
315 required = False
316
edisonn@google.com45327112013-06-13 20:02:29 +0000317 #print spec
318 #print specs
319 #print required
320 #print inheritable
321 #print version
322 #print columnValues
323
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000324 columnValues = [fix(columnValues[0]), fix(columnValues[1]), fix(columnValues[2])]
edisonn@google.com45327112013-06-13 20:02:29 +0000325
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000326 tableKey = re.search('(TABLE [0-9].[0-9][0-9]?)', table).group(1)
327
edisonn@google.com45327112013-06-13 20:02:29 +0000328 if emitedDitionaryName == '':
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000329 table = fix(table)
edisonn@google.com45327112013-06-13 20:02:29 +0000330
331 #print table
332 emitedDitionaryName = 'foo'
333 e = re.search('[Entries|Entry] in [a-z]* (.* dictionary)', table)
334 a = re.search('Additional [a-z]* in a[n]? (.* dictionary)', table)
335 s = re.search('Additional [a-z]* (.*)', table)
336 c = re.search('[Entries|Entry] common to all (.*)', table)
337 o1 = re.search('Optional parameter[s]? for the (.*)', table)
338 o2 = re.search('Optional parameter[s]? for (.*)', table)
339 t = re.search('.*ntries in [a-z]* (.*)', table)
340
341 r = re.search('Property list entries for (.*)', table)
342 st = re.search('Standard (.*)', table)
343
344 if e:
345 emitedDitionaryName = e.group(1).title().replace(' ', '')
346 #print emitedDitionaryName
347 elif a:
348 emitedDitionaryName = a.group(1).title().replace(' ', '')
349 #print emitedDitionaryName
350 elif s:
351 emitedDitionaryName = s.group(1).title().replace(' ', '')
352 #print emitedDitionaryName
353 elif c:
354 emitedDitionaryName = c.group(1).title().replace(' ', '') + 'Common'
355 #print emitedDitionaryName
356 elif o1:
357 emitedDitionaryName = o1.group(1).title().replace(' ', '') + 'OptionalParameters'
358 #print emitedDitionaryName
359 elif o2:
360 emitedDitionaryName = o2.group(1).title().replace(' ', '') + 'OptionalParameters'
361 #print emitedDitionaryName
362 elif t:
363 emitedDitionaryName = t.group(1).title().replace(' ', '') + 'Dictionary'
364 #print emitedDitionaryName
365 elif r:
366 emitedDitionaryName = r.group(1).title().replace(' ', '') + 'Dictionary'
367 #print emitedDitionaryName
368 elif st:
369 emitedDitionaryName = st.group(1).title().replace(' ', '') + 'Dictionary'
370 #print emitedDitionaryName
371 #else:
372 #print table
373
edisonn@google.com45327112013-06-13 20:02:29 +0000374 #print tableKey
375 #print('\'' + tableKey + '\': [\'' + emitedDitionaryName + '\', \'' + table[len(tableKey) + 1:] + '\'],')
376
377 emitedDitionaryName = tableToClassName[tableKey][0]
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000378 comment = fix(tableToClassName[tableKey][1])
379
380 if len(tableToClassName[tableKey]) >= 3 and tableToClassName[tableKey][2] != '':
381 print(' pdfspec.addClass(\'' + emitedDitionaryName + '\', \'' + tableToClassName[tableKey][2] + '\', \'' + comment + '\')\\')
382 else:
383 print(' pdfspec.addClass(\'' + emitedDitionaryName + '\', \'Dictionary\', \'' + comment + '\')\\')
384
385 if len(tableToClassName[tableKey]) >= 4 and columnValues[0] in tableToClassName[tableKey][3]:
386 required = True
edisonn@google.com45327112013-06-13 20:02:29 +0000387
388 if required:
389 print(' .required(\'NULL\')\\')
390 else:
391 print(' .optional()\\')
392
393 print(' .field(\'' + columnValues[0] + '\')\\')
394 print(' .name(\'' + columnValues[0] + '\')\\')
395 print(' .type(\'' + columnValues[1] + '\')\\')
396 print(' .comment(\'' + columnValues[2] + '\')\\')
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000397
398 if len(tableToClassName[tableKey]) >= 4 and columnValues[0] in tableToClassName[tableKey][3]:
399 print(' .must(' + tableToClassName[tableKey][3][columnValues[0]] + ')\\')
400
edisonn@google.com45327112013-06-13 20:02:29 +0000401 print(' .done().done()\\')
402
403
404 columnValues = None
edisonn@google.com07f01472013-06-13 17:24:54 +0000405
406def newRow(first, second, third):
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000407 global columnValues
edisonn@google.com07f01472013-06-13 17:24:54 +0000408 columnValues = [first.rstrip(), second.rstrip(), third.rstrip()]
409
410def appendRow(second, third):
411 global columnValues
412 if second.rstrip() != '':
413 columnValues[1] = columnValues[1] + ' ' + second.rstrip()
414 if third.rstrip() != '':
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000415 columnValues[2] = columnValues[2] + '\n' + third.rstrip()
edisonn@google.com07f01472013-06-13 17:24:54 +0000416
417def rebaseTable(line):
418 global knownTypes
419 global columnWidth
420
edisonn@google.com68d15c82013-06-17 20:46:27 +0000421 line2 = line.replace(',', ' , ')
422
423 words = line2.split()
edisonn@google.com07f01472013-06-13 17:24:54 +0000424
425 if len(words) < 3:
426 return False
427
428 i = 1
429 while i < len(words) - 1 and words[i] in knownTypes:
430 i = i + 1
431
432 if words[i].startswith('(Optional') or words[i].startswith('(Required'):
433 commitRow()
434
435 columnWidth[0] = line.find(words[1])
436
437 if words[i].startswith('(Optional'):
438 columnWidth[1] = line.find('(Optional') - columnWidth[0]
439 if words[i].startswith('(Required'):
440 columnWidth[1] = line.find('(Required') - columnWidth[0]
441 return True
442
443 return False
444
445
446def stopTable():
447 global tableHeaderFound
edisonn@google.com45327112013-06-13 20:02:29 +0000448 global emitedDitionaryName
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000449
450 if not inTable():
451 return
452
edisonn@google.com07f01472013-06-13 17:24:54 +0000453 commitRow()
454 tableHeaderFound = False
edisonn@google.com45327112013-06-13 20:02:29 +0000455 emitedDitionaryName = ''
456 print(' .done()')
457 print
edisonn@google.com07f01472013-06-13 17:24:54 +0000458
459
460def killTable():
461 return
462
edisonn@google.com68d15c82013-06-17 20:46:27 +0000463def processLineCore(line):
edisonn@google.com07f01472013-06-13 17:24:54 +0000464 global lines
465 global tableLine
466 global tableRow
467 global columnWidth
468 global columnValues
469 global mustFollowTableHeader
470
edisonn@google.com68d15c82013-06-17 20:46:27 +0000471 global fnewspec
472
edisonn@google.com07f01472013-06-13 17:24:54 +0000473 lines = lines + 1
474
475 line = unicode(line, 'utf8')
476
477 striped = line.rstrip()
478
479 words = line.split()
480 if len(words) == 0:
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000481 stopTable()
edisonn@google.com68d15c82013-06-17 20:46:27 +0000482 return False
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000483
484 isTableHeader = re.search('^[\s]*(TABLE [0-9].[0-9][0-9]?)', striped)
485 if isTableHeader:
486 stopTable()
edisonn@google.com07f01472013-06-13 17:24:54 +0000487 tableDescriptionFound(striped)
488 mustFollowTableHeader = True
edisonn@google.com68d15c82013-06-17 20:46:27 +0000489 return False
edisonn@google.com07f01472013-06-13 17:24:54 +0000490
491 if mustFollowTableHeader:
492 mustFollowTableHeader = False
493 if len(words) != 3:
494 killTable()
edisonn@google.com68d15c82013-06-17 20:46:27 +0000495 return False
edisonn@google.com07f01472013-06-13 17:24:54 +0000496
497 # TODO(edisonn): support for generic table!
498 if words[0] != 'KEY' or words[1] != 'TYPE' or words[2] != 'VALUE':
499 killTable()
edisonn@google.com68d15c82013-06-17 20:46:27 +0000500 return False
edisonn@google.com07f01472013-06-13 17:24:54 +0000501
502 tableHasHeader()
503 columnWidth = [0, 0, 0]
504 columnWidth[0] = striped.index('TYPE')
505 columnWidth[1] = striped.index('VALUE') - striped.index('TYPE')
506 columnWidth[2] = 0
edisonn@google.com68d15c82013-06-17 20:46:27 +0000507 return True
edisonn@google.com07f01472013-06-13 17:24:54 +0000508
509 if inTable():
510 tableLine = tableLine + 1
511 first = striped[0 : columnWidth[0]]
512 second = striped[columnWidth[0] : columnWidth[0] + columnWidth[1]]
513 third = striped[columnWidth[0] + columnWidth[1] :]
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000514
edisonn@google.com07f01472013-06-13 17:24:54 +0000515 if tableLine == 1:
516 if third[0] != '(':
517 killTable()
edisonn@google.com68d15c82013-06-17 20:46:27 +0000518 return False
edisonn@google.com07f01472013-06-13 17:24:54 +0000519
520 newRow(first, second, third)
edisonn@google.com68d15c82013-06-17 20:46:27 +0000521 return True
edisonn@google.com07f01472013-06-13 17:24:54 +0000522
523 if rebaseTable(striped):
524 first = striped[0 : columnWidth[0]]
525 second = striped[columnWidth[0] : columnWidth[0] + columnWidth[1]]
526 third = striped[columnWidth[0] + columnWidth[1] :]
527
528 first = first.rstrip()
529 second = second.rstrip()
530 third = third.rstrip()
531
532 if first == '' and second == '' and third != '':
533 appendRow(second, third)
edisonn@google.com68d15c82013-06-17 20:46:27 +0000534 return True
edisonn@google.com07f01472013-06-13 17:24:54 +0000535
536 if len(first.split()) > 1:
537 stopTable()
edisonn@google.com68d15c82013-06-17 20:46:27 +0000538 return False
edisonn@google.com07f01472013-06-13 17:24:54 +0000539
540 if first != '' and first[0] == ' ':
541 stopTable()
edisonn@google.com68d15c82013-06-17 20:46:27 +0000542 return False
edisonn@google.com07f01472013-06-13 17:24:54 +0000543
544 if first != '' and second != '' and third == '':
545 stopTable()
edisonn@google.com68d15c82013-06-17 20:46:27 +0000546 return False
edisonn@google.com07f01472013-06-13 17:24:54 +0000547
548 if first == '' and second != '' and second[0] != ' ':
549 if acceptType(second):
550 appendRow(second, third)
edisonn@google.com68d15c82013-06-17 20:46:27 +0000551 return True
edisonn@google.com07f01472013-06-13 17:24:54 +0000552 else:
553 stopTable()
edisonn@google.com68d15c82013-06-17 20:46:27 +0000554 return False
edisonn@google.com07f01472013-06-13 17:24:54 +0000555
556 if first != '' and second != '' and third[0] != '(':
557 stopTable()
edisonn@google.com68d15c82013-06-17 20:46:27 +0000558 return False
edisonn@google.com07f01472013-06-13 17:24:54 +0000559
560 if first == '' and second != '' and second[0] == ' ':
561 stopTable()
edisonn@google.com68d15c82013-06-17 20:46:27 +0000562 return False
edisonn@google.com07f01472013-06-13 17:24:54 +0000563
564 if first != '' and second != '' and third[0] == '(':
565 commitRow()
566 newRow(first, second, third)
edisonn@google.com68d15c82013-06-17 20:46:27 +0000567 return True
568
569 return False
570 return False
571
572def processLine(line):
573 global fnewspec
574
575 inSpec = processLineCore(line)
576
577 #just return, use the next lines if you wish to rewrite spec
edisonn@google.combb2c7532013-06-17 21:06:22 +0000578 #return
edisonn@google.com68d15c82013-06-17 20:46:27 +0000579
580 if inSpec:
581 #resize colum with types
582 line = line[:columnWidth[0] + columnWidth[1]] + (' ' * (60 - columnWidth[1])) + line[columnWidth[0] + columnWidth[1]:]
583 line = line[:columnWidth[0]] + (' ' * (40 - columnWidth[0])) + line[columnWidth[0]:]
584
585 fnewspec.write(line)
edisonn@google.com07f01472013-06-13 17:24:54 +0000586
587
588def generateDef():
589 global lines
edisonn@google.com68d15c82013-06-17 20:46:27 +0000590 global fnewspec
591
edisonn@google.combb2c7532013-06-17 21:06:22 +0000592 fnewspec = open('PdfReference-okular-2.txt', 'w')
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000593
594 print 'import datatypes'
595 print
596
597 print 'def buildPdfSpec(pdfspec):'
598
edisonn@google.com07f01472013-06-13 17:24:54 +0000599 for line in sys.stdin:
600 processLine(line)
edisonn@google.coma2fab9d2013-06-14 19:22:19 +0000601
602 # close last table if it was not closed already
603 stopTable()
604
605 print
606
edisonn@google.com45327112013-06-13 20:02:29 +0000607 #print lines
edisonn@google.combb2c7532013-06-17 21:06:22 +0000608 fnewspec.close()
edisonn@google.com07f01472013-06-13 17:24:54 +0000609
610if '__main__' == __name__:
611 sys.exit(generateDef())