edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 1 | #!/usr/local/bin/python |
| 2 | # coding: utf-8 |
| 3 | |
| 4 | import sys |
| 5 | import re |
| 6 | |
| 7 | # TODO(edisonn): put processed part of file in a new file |
| 8 | # put unprocessed part, in a new file, so we see what we miss |
| 9 | # keep blank lines, and generate a version without the blank lines |
| 10 | |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 11 | #TODO (edisonn): deal manually with tables that don't have "KEY TYPE VALUE' header, e.g. |
edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 12 | # TABLE 7.11 Restrictions on the entries in a soft-mask image dictionary |
| 13 | #KEY RESTRICTION |
| 14 | |
| 15 | |
| 16 | |
| 17 | lines = 0 |
| 18 | table = '' |
| 19 | tableHeaderFound = False |
| 20 | tableLine = 0 |
| 21 | tableRow = 0 |
| 22 | columnWidth = [] |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 23 | columnValues = None |
edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 24 | mustFollowTableHeader = False |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 25 | emitedDitionaryName = '' |
edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 26 | |
| 27 | knownTypes = { |
| 28 | '(any)', |
| 29 | unicode('undefined', 'utf8'), |
| 30 | '(undefined)', |
| 31 | '(various)', |
| 32 | 'array', |
| 33 | 'or', |
| 34 | 'boolean', |
| 35 | 'date', |
| 36 | 'dictionary', |
| 37 | 'function', |
| 38 | 'integer', |
| 39 | unicode('file', 'utf8'), |
| 40 | 'file', |
| 41 | unicode('specification', 'utf8'), |
| 42 | 'specification', |
| 43 | 'name', |
| 44 | 'tree', |
| 45 | 'number', |
| 46 | 'rectangle', |
| 47 | 'stream', |
| 48 | 'string', |
| 49 | 'text', |
| 50 | ',', |
| 51 | ' ' |
| 52 | } |
| 53 | |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 54 | # TODO(edisonn): add a third element in the vector, the base class, by default it is Dictionary |
| 55 | # TODO(edisonn): add overrides for types map<field_name, type_name> |
| 56 | # e.g. ,{'Resources', 'ResourceDictionary'} |
| 57 | # TODO(edisonn): can be added one by one, or extracted from documentation |
| 58 | |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 59 | tableToClassName = { |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 60 | 'TABLE 3.4': ['StreamCommonDictionary', 'Entries common to all stream dictionaries'], |
| 61 | 'TABLE 3.7': ['LzwdecodeAndFlatedecodeFiltersDictionary', 'Optional parameters for LZWDecode and FlateDecode filters'], |
| 62 | 'TABLE 3.9': ['CcittfaxdecodeFilterDictionary', 'Optional parameters for the CCITTFaxDecode filter'], |
| 63 | 'TABLE 3.10': ['Jbig2DecodeFilterDictionary', 'Optional parameter for the JBIG2Decode filter'], |
| 64 | 'TABLE 3.11': ['DctdecodeFilterDictionary', 'Optional parameter for the DCTDecode filter'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 65 | 'TABLE 3.12': ['FileTrailerDictionary', 'Entries in the file trailer dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 66 | 'TABLE 3.13': ['EncryptionCommonDictionary', 'Entries common to all encryption dictionaries'], |
| 67 | 'TABLE 3.14': ['StandardSecurityHandlerDictionary', 'Additional encryption dictionary entries for the standard security handler'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 68 | 'TABLE 3.16': ['CatalogDictionary', 'Entries in the catalog dictionary'], |
| 69 | 'TABLE 3.17': ['PageTreeNodeDictionary', 'Required entries in a page tree node'], |
| 70 | 'TABLE 3.18': ['PageObjectDictionary', 'Entries in a page object'], |
| 71 | 'TABLE 3.19': ['NameDictionary', 'Entries in the name dictionary'], |
| 72 | 'TABLE 3.21': ['ResourceDictionary', 'Entries in a resource dictionary'], |
| 73 | 'TABLE 3.23': ['NameTreeNodeDictionary', 'Entries in a name tree node dictionary'], |
| 74 | 'TABLE 3.25': ['NumberTreeNodeDictionary', 'Entries in a number tree node dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 75 | 'TABLE 3.26': ['FunctionCommonDictionary', 'Entries common to all function dictionaries'], |
| 76 | 'TABLE 3.27': ['Type0FunctionDictionary', 'Additional entries specific to a type 0 function dictionary'], |
| 77 | 'TABLE 3.28': ['Type2FunctionDictionary', 'Additional entries specific to a type 2 function dictionary'], |
| 78 | 'TABLE 3.29': ['Type3FunctionDictionary', 'Additional entries specific to a type 3 function dictionary'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 79 | 'TABLE 3.32': ['FileSpecificationDictionary', 'Entries in a file specification dictionary'], |
| 80 | 'TABLE 3.33': ['EmbeddedFileStreamDictionary', 'Additional entries in an embedded file stream dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 81 | 'TABLE 3.34': ['EmbeddedFileParameterDictionary', 'Entries in an embedded file parameter dictionary'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 82 | 'TABLE 3.35': ['MacOsFileInformationDictionary', 'Entries in a Mac OS file information dictionary'], |
| 83 | 'TABLE 4.13': ['CalgrayColorSpaceDictionary', 'Entries in a CalGray color space dictionary'], |
| 84 | 'TABLE 4.14': ['CalrgbColorSpaceDictionary', 'Entries in a CalRGB color space dictionary'], |
| 85 | 'TABLE 4.15': ['LabColorSpaceDictionary', 'Entries in a Lab color space dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 86 | 'TABLE 4.16': ['IccProfileStreamDictionary', 'Additional entries specific to an ICC profile stream dictionary'], |
| 87 | 'TABLE 4.20': ['DeviceNColorSpaceDictionary', 'Entry in a DeviceN color space attributes dictionary'], |
| 88 | 'TABLE 4.22': ['Type1PatternDictionary', 'Additional entries specific to a type 1 pattern dictionary'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 89 | 'TABLE 4.23': ['Type2PatternDictionary', 'Entries in a type 2 pattern dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 90 | 'TABLE 4.25': ['ShadingDictionary', 'Entries common to all shading dictionaries'], |
| 91 | 'TABLE 4.26': ['Type1ShadingDictionary', 'Additional entries specific to a type 1 shading dictionary', 'ShadingDictionary'], |
| 92 | 'TABLE 4.27': ['Type2ShadingDictionary', 'Additional entries specific to a type 2 shading dictionary', 'ShadingDictionary'], |
| 93 | 'TABLE 4.28': ['Type3ShadingDictionary', 'Additional entries specific to a type 3 shading dictionary', 'ShadingDictionary'], |
| 94 | 'TABLE 4.29': ['Type4ShadingDictionary', 'Additional entries specific to a type 4 shading dictionary', 'ShadingDictionary'], |
| 95 | 'TABLE 4.30': ['Type5ShadingDictionary', 'Additional entries specific to a type 5 shading dictionary', 'ShadingDictionary'], |
| 96 | 'TABLE 4.31': ['Type6ShadingDictionary', 'Additional entries specific to a type 6 shading dictionary', 'ShadingDictionary'], |
| 97 | 'TABLE 4.35': ['ImageDictionary', 'Additional entries specific to an image dictionary', 'XObjectDictionary', {'Subtype': 'datatypes.PdfName(\'Image\')'}], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 98 | 'TABLE 4.37': ['AlternateImageDictionary', 'Entries in an alternate image dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 99 | 'TABLE 4.41': ['Type1FormDictionary', 'Additional entries specific to a type 1 form dictionary', 'XObjectDictionary', {'Subtype': 'datatypes.PdfName(\'Form\')'}], |
| 100 | 'TABLE 4.42': ['GroupAttributesDictionary', 'Entries common to all group attributes dictionaries'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 101 | 'TABLE 4.43': ['ReferenceDictionary', 'Entries in a reference dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 102 | 'TABLE 4.44': ['PSXobjectDictionary', 'Additional entries specific to a PostScript XObject dictionary'], |
| 103 | 'TABLE 5.8': ['Type1FontDictionary', 'Entries in a Type 1 font dictionary', 'FontDictionary'], |
| 104 | 'TABLE 5.9': ['Type3FontDictionary', 'Entries in a Type 3 font dictionary', 'FontDictionary'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 105 | 'TABLE 5.11': ['EncodingDictionary', 'Entries in an encoding dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 106 | 'TABLE 5.12': ['CIDSystemInfoDictionary', 'Entries in a CIDSystemInfo dictionary'], |
| 107 | 'TABLE 5.13': ['CIDFontDictionary', 'Entries in a CIDFont dictionary', 'FontDictionary'], |
| 108 | 'TABLE 5.16': ['CMapDictionary', 'Additional entries in a CMap dictionary'], |
| 109 | 'TABLE 5.17': ['Type0FontDictionary', 'Entries in a Type 0 font dictionary', 'FontDictionary'], |
| 110 | 'TABLE 5.18': ['FontDescriptorDictionary', 'Entries common to all font descriptors'], |
| 111 | 'TABLE 5.20': ['CIDFontDescriptorDictionary', 'Additional font descriptor entries for CIDFonts'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 112 | 'TABLE 5.23': ['EmbeddedFontStreamDictionary', 'Additional entries in an embedded font stream dictionary'], |
| 113 | 'TABLE 6.3': ['Type1HalftoneDictionary', 'Entries in a type 1 halftone dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 114 | 'TABLE 6.4': ['Type6HalftoneDictionary', 'Additional entries specific to a type 6 halftone dictionary'], |
| 115 | 'TABLE 6.5': ['Type10HalftoneDictionary', 'Additional entries specific to a type 10 halftone dictionary'], |
| 116 | 'TABLE 6.6': ['Type16HalftoneDictionary', 'Additional entries specific to a type 16 halftone dictionary'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 117 | 'TABLE 6.7': ['Type5HalftoneDictionary', 'Entries in a type 5 halftone dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 118 | 'TABLE 7.10': ['SoftMaskDictionary', 'Entries in a soft-mask dictionary'], |
| 119 | 'TABLE 7.12': ['SoftMaskImageDictionary', 'Additional entry in a soft-mask image dictionary'], |
| 120 | 'TABLE 7.13': ['TransparencyGroupDictionary', 'Additional entries specific to a transparency group attributes dictionary'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 121 | 'TABLE 8.1': ['ViewerPreferencesDictionary', 'Entries in a viewer preferences dictionary'], |
| 122 | 'TABLE 8.3': ['OutlineDictionary', 'Entries in the outline dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 123 | 'TABLE 8.4': ['OutlineItemDictionary', 'Entries in an outline item dictionary'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 124 | 'TABLE 8.6': ['PageLabelDictionary', 'Entries in a page label dictionary'], |
| 125 | 'TABLE 8.7': ['ThreadDictionary', 'Entries in a thread dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 126 | 'TABLE 8.8': ['BeadDictionary', 'Entries in a bead dictionary'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 127 | 'TABLE 8.9': ['TransitionDictionary', 'Entries in a transition dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 128 | 'TABLE 8.10': ['AnnotationDictionary', 'Entries common to all annotation dictionaries'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 129 | 'TABLE 8.12': ['BorderStyleDictionary', 'Entries in a border style dictionary'], |
| 130 | 'TABLE 8.13': ['AppearanceDictionary', 'Entries in an appearance dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 131 | 'TABLE 8.15': ['TextAnnotationDictionary', 'Additional entries specific to a text annotation'], |
| 132 | 'TABLE 8.16': ['ALinkAnnotationDictionary', 'Additional entries specific to a link annotation'], |
| 133 | 'TABLE 8.17': ['FreeTextAnnotationDictionary', 'Additional entries specific to a free text annotation'], |
| 134 | 'TABLE 8.18': ['LineAnnotationDictionary', 'Additional entries specific to a line annotation'], |
| 135 | 'TABLE 8.20': ['SquareOrCircleAnnotation', 'Additional entries specific to a square or circle annotation'], |
| 136 | 'TABLE 8.21': ['MarkupAnnotationsDictionary', 'Additional entries specific to markup annotations'], |
| 137 | 'TABLE 8.22': ['RubberStampAnnotationDictionary', 'Additional entries specific to a rubber stamp annotation'], |
| 138 | 'TABLE 8.23': ['InkAnnotationDictionary', 'Additional entries specific to an ink annotation'], |
| 139 | 'TABLE 8.24': ['PopUpAnnotationDictionary', 'Additional entries specific to a pop-up annotation'], |
| 140 | 'TABLE 8.25': ['FileAttachmentAnnotationDictionary', 'Additional entries specific to a file attachment annotation'], |
| 141 | 'TABLE 8.26': ['SoundAnnotationDictionary', 'Additional entries specific to a sound annotation'], |
| 142 | 'TABLE 8.27': ['MovieAnnotationDictionary', 'Additional entries specific to a movie annotation'], |
| 143 | 'TABLE 8.28': ['WidgetAnnotationDictionary', 'Additional entries specific to a widget annotation'], |
| 144 | 'TABLE 8.29': ['ActionDictionary', 'Entries common to all action dictionaries'], |
| 145 | 'TABLE 8.30': ['AnnotationActionsDictionary', 'Entries in an annotation\'s additional-actions dictionary'], |
| 146 | 'TABLE 8.31': ['PageObjectActionsDictionary', 'Entries in a page object\'s additional-actions dictionary'], |
| 147 | 'TABLE 8.32': ['FormFieldActionsDictionary', 'Entries in a form field\'s additional-actions dictionary'], |
| 148 | 'TABLE 8.33': ['DocumentCatalogActionsDictionary', 'Entries in the document catalog\'s additional-actions dictionary'], |
| 149 | 'TABLE 8.35': ['GoToActionDictionary', 'Additional entries specific to a go-to action'], |
| 150 | 'TABLE 8.36': ['RemoteGoToActionDictionary', 'Additional entries specific to a remote go-to action'], |
| 151 | 'TABLE 8.37': ['LaunchActionDictionary', 'Additional entries specific to a launch action'], |
| 152 | 'TABLE 8.38': ['WindowsLaunchActionDictionary', 'Entries in a Windows launch parameter dictionary'], |
| 153 | 'TABLE 8.39': ['ThreadActionDictionary', 'Additional entries specific to a thread action'], |
| 154 | 'TABLE 8.40': ['URIActionDictionary', 'Additional entries specific to a URI action'], |
| 155 | 'TABLE 8.41': ['URIDictionary', 'Entry in a URI dictionary'], |
| 156 | 'TABLE 8.42': ['SoundActionDictionary', 'Additional entries specific to a sound action'], |
| 157 | 'TABLE 8.43': ['MovieActionDictionary', 'Additional entries specific to a movie action'], |
| 158 | 'TABLE 8.44': ['HideActionDictionary', 'Additional entries specific to a hide action'], |
| 159 | 'TABLE 8.46': ['NamedActionsDictionary', 'Additional entries specific to named actions'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 160 | 'TABLE 8.47': ['InteractiveFormDictionary', 'Entries in the interactive form dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 161 | 'TABLE 8.49': ['FieldDictionary', 'Entries common to all field dictionaries'], |
| 162 | 'TABLE 8.51': ['VariableTextFieldDictionary', 'Additional entries common to all fields containing variable text'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 163 | 'TABLE 8.52': ['AppearanceCharacteristicsDictionary', 'Entries in an appearance characteristics dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 164 | 'TABLE 8.54': ['CheckboxFieldDictionary', 'Additional entry specific to a checkbox field'], |
| 165 | 'TABLE 8.55': ['RadioButtonFieldDictionary', 'Additional entry specific to a radio button field'], |
| 166 | 'TABLE 8.57': ['TextFieldDictionary', 'Additional entry specific to a text field'], |
| 167 | 'TABLE 8.59': ['ChoiceFieldDictionary', 'Additional entries specific to a choice field'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 168 | 'TABLE 8.60': ['SignatureDictionary', 'Entries in a signature dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 169 | 'TABLE 8.61': ['SubmitFormActionDictionary', 'Additional entries specific to a submit-form action'], |
| 170 | 'TABLE 8.63': ['ResetFormActionDictionary', 'Additional entries specific to a reset-form action'], |
| 171 | 'TABLE 8.65': ['ImportDataActionDictionary', 'Additional entries specific to an import-data action'], |
| 172 | 'TABLE 8.66': ['JavascriptActionDictionary', 'Additional entries specific to a JavaScript action'], |
| 173 | 'TABLE 8.67': ['FDFTrailerDictionary', 'Entry in the FDF trailer dictionary'], |
| 174 | 'TABLE 8.68': ['FDFCatalogDictionary', 'Entries in the FDF catalog dictionary'], |
| 175 | 'TABLE 8.69': ['FDFDictionary', 'Entries in the FDF dictionary'], |
| 176 | 'TABLE 8.70': ['EncryptedEmbeddedFileStreamDictionary', 'Additional entry in an embedded file stream dictionary for an encrypted FDF file'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 177 | 'TABLE 8.71': ['JavascriptDictionary', 'Entries in the JavaScript dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 178 | 'TABLE 8.72': ['FDFFieldDictionary', 'Entries in an FDF field dictionary'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 179 | 'TABLE 8.73': ['IconFitDictionary', 'Entries in an icon fit dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 180 | 'TABLE 8.74': ['FDFPageDictionary', 'Entries in an FDF page dictionary'], |
| 181 | 'TABLE 8.75': ['FDFTemplateDictionary', 'Entries in an FDF template dictionary'], |
| 182 | 'TABLE 8.76': ['FDFNamedPageReferenceDictionary', 'Entries in an FDF named page reference dictionary'], |
| 183 | 'TABLE 8.77': ['FDFFileAnnotationDictionary', 'Additional entry for annotation dictionaries in an FDF file'], |
| 184 | 'TABLE 8.78': ['SoundObjectDictionary', 'Additional entries specific to a sound object'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 185 | 'TABLE 8.79': ['MovieDictionary', 'Entries in a movie dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 186 | 'TABLE 8.80': ['MovieActivationDictionary', 'Entries in a movie activation dictionary'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 187 | 'TABLE 9.2': ['DocumentInformationDictionary', 'Entries in the document information dictionary'], |
| 188 | 'TABLE 9.3': ['MetadataStreamDictionary', 'Additional entries in a metadata stream dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 189 | 'TABLE 9.4': ['ComponentsWithMetadataDictionary', 'Additional entry for components having metadata'], |
| 190 | 'TABLE 9.6': ['PagePieceDictionary', 'Entries in a page-piece dictionary'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 191 | 'TABLE 9.7': ['ApplicationDataDictionary', 'Entries in an application data dictionary'], |
| 192 | 'TABLE 9.9': ['StructureTreeRootDictionary', 'Entries in the structure tree root'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 193 | 'TABLE 9.10': ['StructureElementDictionary', 'Entries in a structure element dictionary'], |
| 194 | 'TABLE 9.11': ['MarkedContentReferenceDictionary', 'Entries in a marked-content reference dictionary'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 195 | 'TABLE 9.12': ['ObjectReferenceDictionary', 'Entries in an object reference dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 196 | 'TABLE 9.13': ['StructureElementAccessDictionary', 'Additional dictionary entries for structure element access'], |
| 197 | 'TABLE 9.14': ['AttributeObjectDictionary', 'Entry common to all attribute objects'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 198 | 'TABLE 9.15': ['MarkInformationDictionary', 'Entry in the mark information dictionary'], |
| 199 | 'TABLE 9.16': ['ArtifactsDictionary', 'Property list entries for artifacts'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 200 | 'TABLE 9.27': ['StandardStructureDictionary', 'Standard layout attributes common to all standard structure types'], |
| 201 | 'TABLE 9.28': ['BlockLevelStructureElementsDictionary', 'Additional standard layout attributes specific to block-level structure elements'], |
| 202 | 'TABLE 9.29': ['InlineLevelStructureElementsDictionary', 'Standard layout attributes specific to inline-level structure elements'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 203 | 'TABLE 9.30': ['ListAttributeDictionary', 'Standard list attribute'], |
| 204 | 'TABLE 9.31': ['TableAttributesDictionary', 'Standard table attributes'], |
| 205 | 'TABLE 9.32': ['WebCaptureInformationDictionary', 'Entries in the Web Capture information dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 206 | 'TABLE 9.33': ['WebCaptureDictionary', 'Entries common to all Web Capture content sets'], |
| 207 | 'TABLE 9.34': ['WebCapturePageSetDictionary', 'Additional entries specific to a Web Capture page set'], |
| 208 | 'TABLE 9.35': ['WebCaptureImageSetDictionary', 'Additional entries specific to a Web Capture image set'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 209 | 'TABLE 9.36': ['SourceInformationDictionary', 'Entries in a source information dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 210 | 'TABLE 9.37': ['URLAliasDictionary', 'Entries in a URL alias dictionary'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 211 | 'TABLE 9.38': ['WebCaptureCommandDictionary', 'Entries in a Web Capture command dictionary'], |
| 212 | 'TABLE 9.40': ['WebCaptureCommandSettingsDictionary', 'Entries in a Web Capture command settings dictionary'], |
| 213 | 'TABLE 9.41': ['BoxColorInformationDictionary', 'Entries in a box color information dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 214 | 'TABLE 9.42': ['BoxStyleDictionary', 'Entries in a box style dictionary'], |
| 215 | 'TABLE 9.43': ['PrinterMarkAnnotationDictionary', 'Additional entries specific to a printer\'s mark annotation'], |
| 216 | 'TABLE 9.44': ['PrinterMarkFormDictionary', 'Additional entries specific to a printer\'s mark form dictionary'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 217 | 'TABLE 9.45': ['SeparationDictionary', 'Entries in a separation dictionary'], |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 218 | 'TABLE 9.46': ['PDF_XOutputIntentDictionary', 'Entries in a PDF/X output intent dictionary'], |
| 219 | 'TABLE 9.47': ['TrapNetworkAnnotationDictionary', 'Additional entries specific to a trap network annotation'], |
| 220 | 'TABLE 9.48': ['TrapNetworkAppearanceStreamDictionary', 'Additional entries specific to a trap network appearance stream'], |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 221 | 'TABLE 9.49': ['OpiVersionDictionary', 'Entry in an OPI version dictionary'], |
| 222 | } |
| 223 | |
| 224 | |
edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 225 | def acceptType(val): |
| 226 | global knownTypes |
| 227 | |
| 228 | ret = val |
| 229 | |
| 230 | for item in knownTypes: |
| 231 | ret = ret.replace(item, '') |
| 232 | |
| 233 | return ret == '' |
| 234 | |
| 235 | |
| 236 | def inTable(): |
| 237 | global tableHeaderFound |
| 238 | return tableHeaderFound |
| 239 | |
| 240 | def tableDescriptionFound(desc): |
| 241 | global table |
| 242 | table = desc.strip() |
| 243 | |
| 244 | def tableHasHeader(): |
| 245 | global table |
| 246 | global tableHeaderFound |
| 247 | |
| 248 | tableHeaderFound = True |
| 249 | #print table |
| 250 | |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 251 | def fix(val): |
| 252 | ret = val |
| 253 | |
| 254 | # fix unicode chars |
| 255 | ret = ret.replace(unicode('fi', 'utf8'), 'fi') |
| 256 | ret = ret.replace(u'\u201c', '\"') |
| 257 | ret = ret.replace(u'\u201d', '\"') |
| 258 | ret = ret.replace(u'\u2019', '\'') |
| 259 | ret = ret.replace(u'\ufb02', 'fl') |
| 260 | ret = ret.replace(u'\xae', '(R)') |
| 261 | ret = ret.replace(u'\u2026', '...') |
| 262 | ret = ret.replace(u'\xd7', 'x') |
| 263 | ret = ret.replace(u'\u2212', '-') |
| 264 | ret = ret.replace(u'\u2264', '<=') |
| 265 | ret = ret.replace(u'\u2014', '-') |
| 266 | ret = ret.replace(u'\u2013', '\'') |
| 267 | ret = ret.replace(u'\u2022', '*') |
| 268 | ret = ret.replace(u'\xb5', 'mu') |
| 269 | ret = ret.replace(u'\xf7', '/') |
| 270 | |
| 271 | |
| 272 | # how enable to emit this a python string |
| 273 | ret = ret.replace('\'', '\\\'') |
| 274 | ret = ret.replace('\n', '\\n') |
| 275 | |
| 276 | |
| 277 | return ret |
edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 278 | |
| 279 | def commitRow(): |
| 280 | global columnValues |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 281 | global emitedDitionaryName |
| 282 | global table |
| 283 | global tableToClassName |
| 284 | |
| 285 | if columnValues == None: |
| 286 | return |
| 287 | |
edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 288 | #print columnValues |
| 289 | |
| 290 | lastClosed = columnValues[2].find(')') |
| 291 | if lastClosed < 0: |
| 292 | print 'ERRRRRRRRRRRRRRROR' |
| 293 | print columnValues |
| 294 | return |
| 295 | |
| 296 | spec = columnValues[2][:lastClosed + 1] |
| 297 | spec = spec.replace('(', ';') |
| 298 | spec = spec.replace(')', ';') |
| 299 | spec = spec.strip(';') |
| 300 | |
| 301 | specs = spec.split(';') |
| 302 | |
| 303 | # clearly required, but it can be required with conditions. don't handle this ones here, but manually |
| 304 | required = specs[0] == 'Required' |
| 305 | |
| 306 | inheritable = False |
| 307 | version = '' |
| 308 | for s in specs: |
| 309 | if s.strip() == 'inheritable' or s.strip() == 'Inheritable': |
| 310 | inheritable = True |
| 311 | elif re.match('^PDF [0-9]*[\.[0-9]*]*', s.strip()): |
| 312 | version = s.strip() |
| 313 | elif s != 'Required': |
| 314 | required = False |
| 315 | |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 316 | #print spec |
| 317 | #print specs |
| 318 | #print required |
| 319 | #print inheritable |
| 320 | #print version |
| 321 | #print columnValues |
| 322 | |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 323 | columnValues = [fix(columnValues[0]), fix(columnValues[1]), fix(columnValues[2])] |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 324 | |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 325 | tableKey = re.search('(TABLE [0-9].[0-9][0-9]?)', table).group(1) |
| 326 | |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 327 | if emitedDitionaryName == '': |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 328 | table = fix(table) |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 329 | |
| 330 | #print table |
| 331 | emitedDitionaryName = 'foo' |
| 332 | e = re.search('[Entries|Entry] in [a-z]* (.* dictionary)', table) |
| 333 | a = re.search('Additional [a-z]* in a[n]? (.* dictionary)', table) |
| 334 | s = re.search('Additional [a-z]* (.*)', table) |
| 335 | c = re.search('[Entries|Entry] common to all (.*)', table) |
| 336 | o1 = re.search('Optional parameter[s]? for the (.*)', table) |
| 337 | o2 = re.search('Optional parameter[s]? for (.*)', table) |
| 338 | t = re.search('.*ntries in [a-z]* (.*)', table) |
| 339 | |
| 340 | r = re.search('Property list entries for (.*)', table) |
| 341 | st = re.search('Standard (.*)', table) |
| 342 | |
| 343 | if e: |
| 344 | emitedDitionaryName = e.group(1).title().replace(' ', '') |
| 345 | #print emitedDitionaryName |
| 346 | elif a: |
| 347 | emitedDitionaryName = a.group(1).title().replace(' ', '') |
| 348 | #print emitedDitionaryName |
| 349 | elif s: |
| 350 | emitedDitionaryName = s.group(1).title().replace(' ', '') |
| 351 | #print emitedDitionaryName |
| 352 | elif c: |
| 353 | emitedDitionaryName = c.group(1).title().replace(' ', '') + 'Common' |
| 354 | #print emitedDitionaryName |
| 355 | elif o1: |
| 356 | emitedDitionaryName = o1.group(1).title().replace(' ', '') + 'OptionalParameters' |
| 357 | #print emitedDitionaryName |
| 358 | elif o2: |
| 359 | emitedDitionaryName = o2.group(1).title().replace(' ', '') + 'OptionalParameters' |
| 360 | #print emitedDitionaryName |
| 361 | elif t: |
| 362 | emitedDitionaryName = t.group(1).title().replace(' ', '') + 'Dictionary' |
| 363 | #print emitedDitionaryName |
| 364 | elif r: |
| 365 | emitedDitionaryName = r.group(1).title().replace(' ', '') + 'Dictionary' |
| 366 | #print emitedDitionaryName |
| 367 | elif st: |
| 368 | emitedDitionaryName = st.group(1).title().replace(' ', '') + 'Dictionary' |
| 369 | #print emitedDitionaryName |
| 370 | #else: |
| 371 | #print table |
| 372 | |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 373 | #print tableKey |
| 374 | #print('\'' + tableKey + '\': [\'' + emitedDitionaryName + '\', \'' + table[len(tableKey) + 1:] + '\'],') |
| 375 | |
| 376 | emitedDitionaryName = tableToClassName[tableKey][0] |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 377 | comment = fix(tableToClassName[tableKey][1]) |
| 378 | |
| 379 | if len(tableToClassName[tableKey]) >= 3 and tableToClassName[tableKey][2] != '': |
| 380 | print(' pdfspec.addClass(\'' + emitedDitionaryName + '\', \'' + tableToClassName[tableKey][2] + '\', \'' + comment + '\')\\') |
| 381 | else: |
| 382 | print(' pdfspec.addClass(\'' + emitedDitionaryName + '\', \'Dictionary\', \'' + comment + '\')\\') |
| 383 | |
| 384 | if len(tableToClassName[tableKey]) >= 4 and columnValues[0] in tableToClassName[tableKey][3]: |
| 385 | required = True |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 386 | |
| 387 | if required: |
| 388 | print(' .required(\'NULL\')\\') |
| 389 | else: |
| 390 | print(' .optional()\\') |
| 391 | |
| 392 | print(' .field(\'' + columnValues[0] + '\')\\') |
| 393 | print(' .name(\'' + columnValues[0] + '\')\\') |
| 394 | print(' .type(\'' + columnValues[1] + '\')\\') |
| 395 | print(' .comment(\'' + columnValues[2] + '\')\\') |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 396 | |
| 397 | if len(tableToClassName[tableKey]) >= 4 and columnValues[0] in tableToClassName[tableKey][3]: |
| 398 | print(' .must(' + tableToClassName[tableKey][3][columnValues[0]] + ')\\') |
| 399 | |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 400 | print(' .done().done()\\') |
| 401 | |
| 402 | |
| 403 | columnValues = None |
edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 404 | |
| 405 | def newRow(first, second, third): |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 406 | global columnValues |
edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 407 | columnValues = [first.rstrip(), second.rstrip(), third.rstrip()] |
| 408 | |
| 409 | def appendRow(second, third): |
| 410 | global columnValues |
| 411 | if second.rstrip() != '': |
| 412 | columnValues[1] = columnValues[1] + ' ' + second.rstrip() |
| 413 | if third.rstrip() != '': |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 414 | columnValues[2] = columnValues[2] + '\n' + third.rstrip() |
edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 415 | |
| 416 | def rebaseTable(line): |
| 417 | global knownTypes |
| 418 | global columnWidth |
| 419 | |
| 420 | words = line.split() |
| 421 | |
| 422 | if len(words) < 3: |
| 423 | return False |
| 424 | |
| 425 | i = 1 |
| 426 | while i < len(words) - 1 and words[i] in knownTypes: |
| 427 | i = i + 1 |
| 428 | |
| 429 | if words[i].startswith('(Optional') or words[i].startswith('(Required'): |
| 430 | commitRow() |
| 431 | |
| 432 | columnWidth[0] = line.find(words[1]) |
| 433 | |
| 434 | if words[i].startswith('(Optional'): |
| 435 | columnWidth[1] = line.find('(Optional') - columnWidth[0] |
| 436 | if words[i].startswith('(Required'): |
| 437 | columnWidth[1] = line.find('(Required') - columnWidth[0] |
| 438 | return True |
| 439 | |
| 440 | return False |
| 441 | |
| 442 | |
| 443 | def stopTable(): |
| 444 | global tableHeaderFound |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 445 | global emitedDitionaryName |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 446 | |
| 447 | if not inTable(): |
| 448 | return |
| 449 | |
edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 450 | commitRow() |
| 451 | tableHeaderFound = False |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 452 | emitedDitionaryName = '' |
| 453 | print(' .done()') |
| 454 | print |
edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 455 | |
| 456 | |
| 457 | def killTable(): |
| 458 | return |
| 459 | |
| 460 | def processLine(line): |
| 461 | global lines |
| 462 | global tableLine |
| 463 | global tableRow |
| 464 | global columnWidth |
| 465 | global columnValues |
| 466 | global mustFollowTableHeader |
| 467 | |
| 468 | lines = lines + 1 |
| 469 | |
| 470 | line = unicode(line, 'utf8') |
| 471 | |
| 472 | striped = line.rstrip() |
| 473 | |
| 474 | words = line.split() |
| 475 | if len(words) == 0: |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 476 | stopTable() |
edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 477 | return |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 478 | |
| 479 | isTableHeader = re.search('^[\s]*(TABLE [0-9].[0-9][0-9]?)', striped) |
| 480 | if isTableHeader: |
| 481 | stopTable() |
edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 482 | tableDescriptionFound(striped) |
| 483 | mustFollowTableHeader = True |
| 484 | return |
| 485 | |
| 486 | if mustFollowTableHeader: |
| 487 | mustFollowTableHeader = False |
| 488 | if len(words) != 3: |
| 489 | killTable() |
| 490 | |
| 491 | # TODO(edisonn): support for generic table! |
| 492 | if words[0] != 'KEY' or words[1] != 'TYPE' or words[2] != 'VALUE': |
| 493 | killTable() |
| 494 | return |
| 495 | |
| 496 | tableHasHeader() |
| 497 | columnWidth = [0, 0, 0] |
| 498 | columnWidth[0] = striped.index('TYPE') |
| 499 | columnWidth[1] = striped.index('VALUE') - striped.index('TYPE') |
| 500 | columnWidth[2] = 0 |
| 501 | return |
| 502 | |
| 503 | if inTable(): |
| 504 | tableLine = tableLine + 1 |
| 505 | first = striped[0 : columnWidth[0]] |
| 506 | second = striped[columnWidth[0] : columnWidth[0] + columnWidth[1]] |
| 507 | third = striped[columnWidth[0] + columnWidth[1] :] |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 508 | |
| 509 | |
| 510 | |
| 511 | |
edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 512 | if tableLine == 1: |
| 513 | if third[0] != '(': |
| 514 | killTable() |
| 515 | return |
| 516 | |
| 517 | newRow(first, second, third) |
| 518 | return |
| 519 | |
| 520 | if rebaseTable(striped): |
| 521 | first = striped[0 : columnWidth[0]] |
| 522 | second = striped[columnWidth[0] : columnWidth[0] + columnWidth[1]] |
| 523 | third = striped[columnWidth[0] + columnWidth[1] :] |
| 524 | |
| 525 | first = first.rstrip() |
| 526 | second = second.rstrip() |
| 527 | third = third.rstrip() |
| 528 | |
| 529 | if first == '' and second == '' and third != '': |
| 530 | appendRow(second, third) |
| 531 | return |
| 532 | |
| 533 | if len(first.split()) > 1: |
| 534 | stopTable() |
| 535 | return |
| 536 | |
| 537 | if first != '' and first[0] == ' ': |
| 538 | stopTable() |
| 539 | return |
| 540 | |
| 541 | if first != '' and second != '' and third == '': |
| 542 | stopTable() |
| 543 | return |
| 544 | |
| 545 | if first == '' and second != '' and second[0] != ' ': |
| 546 | if acceptType(second): |
| 547 | appendRow(second, third) |
| 548 | else: |
| 549 | stopTable() |
| 550 | return |
| 551 | |
| 552 | if first != '' and second != '' and third[0] != '(': |
| 553 | stopTable() |
| 554 | return |
| 555 | |
| 556 | if first == '' and second != '' and second[0] == ' ': |
| 557 | stopTable() |
| 558 | return |
| 559 | |
| 560 | if first != '' and second != '' and third[0] == '(': |
| 561 | commitRow() |
| 562 | newRow(first, second, third) |
| 563 | return |
| 564 | |
| 565 | |
| 566 | def generateDef(): |
| 567 | global lines |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 568 | |
| 569 | print 'import datatypes' |
| 570 | print |
| 571 | |
| 572 | print 'def buildPdfSpec(pdfspec):' |
| 573 | |
edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 574 | for line in sys.stdin: |
| 575 | processLine(line) |
edisonn@google.com | a2fab9d | 2013-06-14 19:22:19 +0000 | [diff] [blame^] | 576 | |
| 577 | # close last table if it was not closed already |
| 578 | stopTable() |
| 579 | |
| 580 | print |
| 581 | |
edisonn@google.com | 4532711 | 2013-06-13 20:02:29 +0000 | [diff] [blame] | 582 | #print lines |
edisonn@google.com | 07f0147 | 2013-06-13 17:24:54 +0000 | [diff] [blame] | 583 | |
| 584 | if '__main__' == __name__: |
| 585 | sys.exit(generateDef()) |