Parse pdf reference manual for 1.4 to extract dictionary definition, initial code. Use spec2def.py <PdfReference-okular-1.txt
Review URL: https://codereview.chromium.org/16838015
git-svn-id: http://skia.googlecode.com/svn/trunk@9587 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/experimental/PdfViewer/spec2def.py b/experimental/PdfViewer/spec2def.py
new file mode 100644
index 0000000..ab1b26a
--- /dev/null
+++ b/experimental/PdfViewer/spec2def.py
@@ -0,0 +1,271 @@
+#!/usr/local/bin/python
+# coding: utf-8
+
+import sys
+import re
+
+# TODO(edisonn): put processed part of file in a new file
+# put unprocessed part, in a new file, so we see what we miss
+# keep blank lines, and generate a version without the blank lines
+
+#TODO (edisonn): deal manually with
+# TABLE 7.11 Restrictions on the entries in a soft-mask image dictionary
+#KEY RESTRICTION
+
+
+
+lines = 0
+table = ''
+tableHeaderFound = False
+tableLine = 0
+tableRow = 0
+columnWidth = []
+columnValues = ['', '', '']
+mustFollowTableHeader = False
+
+knownTypes = {
+'(any)',
+unicode('undefined', 'utf8'),
+'(undefined)',
+'(various)',
+'array',
+'or',
+'boolean',
+'date',
+'dictionary',
+'function',
+'integer',
+unicode('file', 'utf8'),
+'file',
+unicode('specification', 'utf8'),
+'specification',
+'name',
+'tree',
+'number',
+'rectangle',
+'stream',
+'string',
+'text',
+',',
+' '
+}
+
+def acceptType(val):
+ global knownTypes
+
+ ret = val
+
+ for item in knownTypes:
+ ret = ret.replace(item, '')
+
+ return ret == ''
+
+
+def inTable():
+ global tableHeaderFound
+ return tableHeaderFound
+
+def tableDescriptionFound(desc):
+ global table
+ table = desc.strip()
+
+def tableHasHeader():
+ global table
+ global tableHeaderFound
+
+ tableHeaderFound = True
+ #print table
+
+
+def commitRow():
+ global columnValues
+ #print columnValues
+
+ lastClosed = columnValues[2].find(')')
+ if lastClosed < 0:
+ print 'ERRRRRRRRRRRRRRROR'
+ print columnValues
+ return
+
+ spec = columnValues[2][:lastClosed + 1]
+ spec = spec.replace('(', ';')
+ spec = spec.replace(')', ';')
+ spec = spec.strip(';')
+
+ specs = spec.split(';')
+
+ # clearly required, but it can be required with conditions. don't handle this ones here, but manually
+ required = specs[0] == 'Required'
+
+ inheritable = False
+ version = ''
+ for s in specs:
+ if s.strip() == 'inheritable' or s.strip() == 'Inheritable':
+ inheritable = True
+ elif re.match('^PDF [0-9]*[\.[0-9]*]*', s.strip()):
+ version = s.strip()
+ elif s != 'Required':
+ required = False
+
+ print spec
+ print specs
+ print required
+ print inheritable
+ print version
+ print columnValues
+
+def newRow(first, second, third):
+ global columnValues
+ columnValues = [first.rstrip(), second.rstrip(), third.rstrip()]
+
+def appendRow(second, third):
+ global columnValues
+ if second.rstrip() != '':
+ columnValues[1] = columnValues[1] + ' ' + second.rstrip()
+ if third.rstrip() != '':
+ columnValues[2] = columnValues[2] + ' ' + third.rstrip()
+
+def rebaseTable(line):
+ global knownTypes
+ global columnWidth
+
+ words = line.split()
+
+ if len(words) < 3:
+ return False
+
+ i = 1
+ while i < len(words) - 1 and words[i] in knownTypes:
+ i = i + 1
+
+ if words[i].startswith('(Optional') or words[i].startswith('(Required'):
+ commitRow()
+
+ columnWidth[0] = line.find(words[1])
+
+ if words[i].startswith('(Optional'):
+ columnWidth[1] = line.find('(Optional') - columnWidth[0]
+ if words[i].startswith('(Required'):
+ columnWidth[1] = line.find('(Required') - columnWidth[0]
+ return True
+
+ return False
+
+
+def stopTable():
+ global tableHeaderFound
+ commitRow()
+ tableHeaderFound = False
+
+
+def killTable():
+ return
+
+def processLine(line):
+ global lines
+ global tableLine
+ global tableRow
+ global columnWidth
+ global columnValues
+ global mustFollowTableHeader
+
+ lines = lines + 1
+
+ line = unicode(line, 'utf8')
+
+ striped = line.rstrip()
+
+ words = line.split()
+ if len(words) == 0:
+ return
+
+ if words[0] == 'TABLE':
+ tableDescriptionFound(striped)
+ mustFollowTableHeader = True
+ return
+
+ if mustFollowTableHeader:
+ mustFollowTableHeader = False
+ if len(words) != 3:
+ killTable()
+
+ # TODO(edisonn): support for generic table!
+ if words[0] != 'KEY' or words[1] != 'TYPE' or words[2] != 'VALUE':
+ killTable()
+ return
+
+ tableHasHeader()
+ columnWidth = [0, 0, 0]
+ columnWidth[0] = striped.index('TYPE')
+ columnWidth[1] = striped.index('VALUE') - striped.index('TYPE')
+ columnWidth[2] = 0
+ return
+
+ if inTable():
+ tableLine = tableLine + 1
+ first = striped[0 : columnWidth[0]]
+ second = striped[columnWidth[0] : columnWidth[0] + columnWidth[1]]
+ third = striped[columnWidth[0] + columnWidth[1] :]
+
+ if tableLine == 1:
+ if third[0] != '(':
+ killTable()
+ return
+
+ newRow(first, second, third)
+ return
+
+ if rebaseTable(striped):
+ first = striped[0 : columnWidth[0]]
+ second = striped[columnWidth[0] : columnWidth[0] + columnWidth[1]]
+ third = striped[columnWidth[0] + columnWidth[1] :]
+
+ first = first.rstrip()
+ second = second.rstrip()
+ third = third.rstrip()
+
+ if first == '' and second == '' and third != '':
+ appendRow(second, third)
+ return
+
+ if len(first.split()) > 1:
+ stopTable()
+ return
+
+ if first != '' and first[0] == ' ':
+ stopTable()
+ return
+
+ if first != '' and second != '' and third == '':
+ stopTable()
+ return
+
+ if first == '' and second != '' and second[0] != ' ':
+ if acceptType(second):
+ appendRow(second, third)
+ else:
+ stopTable()
+ return
+
+ if first != '' and second != '' and third[0] != '(':
+ stopTable()
+ return
+
+ if first == '' and second != '' and second[0] == ' ':
+ stopTable()
+ return
+
+ if first != '' and second != '' and third[0] == '(':
+ commitRow()
+ newRow(first, second, third)
+ return
+
+
+def generateDef():
+ global lines
+ for line in sys.stdin:
+ processLine(line)
+ print lines
+
+if '__main__' == __name__:
+ sys.exit(generateDef())
\ No newline at end of file