Parse pdf reference manual for 1.4 to extract dictionary definition, initial code. Use <PdfReference-okular-1.txt
Review URL:
git-svn-id: 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/experimental/PdfViewer/ b/experimental/PdfViewer/
new file mode 100644
index 0000000..ab1b26a
--- /dev/null
+++ b/experimental/PdfViewer/
@@ -0,0 +1,271 @@
+# coding: utf-8
+import sys
+import re
+# TODO(edisonn): put processed part of file in a new file
+# put unprocessed part, in a new file, so we see what we miss
+# keep blank lines, and generate a version without the blank lines
+#TODO (edisonn): deal manually with
+# TABLE 7.11 Restrictions on the entries in a soft-mask image dictionary
+lines = 0
+table = ''
+tableHeaderFound = False
+tableLine = 0
+tableRow = 0
+columnWidth = []
+columnValues = ['', '', '']
+mustFollowTableHeader = False
+knownTypes = {
+unicode('undefined', 'utf8'),
+unicode('file', 'utf8'),
+unicode('specification', 'utf8'),
+' '
+def acceptType(val):
+ global knownTypes
+ ret = val
+ for item in knownTypes:
+ ret = ret.replace(item, '')
+ return ret == ''
+def inTable():
+ global tableHeaderFound
+ return tableHeaderFound
+def tableDescriptionFound(desc):
+ global table
+ table = desc.strip()
+def tableHasHeader():
+ global table
+ global tableHeaderFound
+ tableHeaderFound = True
+ #print table
+def commitRow():
+ global columnValues
+ #print columnValues
+ lastClosed = columnValues[2].find(')')
+ if lastClosed < 0:
+ print columnValues
+ return
+ spec = columnValues[2][:lastClosed + 1]
+ spec = spec.replace('(', ';')
+ spec = spec.replace(')', ';')
+ spec = spec.strip(';')
+ specs = spec.split(';')
+ # clearly required, but it can be required with conditions. don't handle this ones here, but manually
+ required = specs[0] == 'Required'
+ inheritable = False
+ version = ''
+ for s in specs:
+ if s.strip() == 'inheritable' or s.strip() == 'Inheritable':
+ inheritable = True
+ elif re.match('^PDF [0-9]*[\.[0-9]*]*', s.strip()):
+ version = s.strip()
+ elif s != 'Required':
+ required = False
+ print spec
+ print specs
+ print required
+ print inheritable
+ print version
+ print columnValues
+def newRow(first, second, third):
+ global columnValues
+ columnValues = [first.rstrip(), second.rstrip(), third.rstrip()]
+def appendRow(second, third):
+ global columnValues
+ if second.rstrip() != '':
+ columnValues[1] = columnValues[1] + ' ' + second.rstrip()
+ if third.rstrip() != '':
+ columnValues[2] = columnValues[2] + ' ' + third.rstrip()
+def rebaseTable(line):
+ global knownTypes
+ global columnWidth
+ words = line.split()
+ if len(words) < 3:
+ return False
+ i = 1
+ while i < len(words) - 1 and words[i] in knownTypes:
+ i = i + 1
+ if words[i].startswith('(Optional') or words[i].startswith('(Required'):
+ commitRow()
+ columnWidth[0] = line.find(words[1])
+ if words[i].startswith('(Optional'):
+ columnWidth[1] = line.find('(Optional') - columnWidth[0]
+ if words[i].startswith('(Required'):
+ columnWidth[1] = line.find('(Required') - columnWidth[0]
+ return True
+ return False
+def stopTable():
+ global tableHeaderFound
+ commitRow()
+ tableHeaderFound = False
+def killTable():
+ return
+def processLine(line):
+ global lines
+ global tableLine
+ global tableRow
+ global columnWidth
+ global columnValues
+ global mustFollowTableHeader
+ lines = lines + 1
+ line = unicode(line, 'utf8')
+ striped = line.rstrip()
+ words = line.split()
+ if len(words) == 0:
+ return
+ if words[0] == 'TABLE':
+ tableDescriptionFound(striped)
+ mustFollowTableHeader = True
+ return
+ if mustFollowTableHeader:
+ mustFollowTableHeader = False
+ if len(words) != 3:
+ killTable()
+ # TODO(edisonn): support for generic table!
+ if words[0] != 'KEY' or words[1] != 'TYPE' or words[2] != 'VALUE':
+ killTable()
+ return
+ tableHasHeader()
+ columnWidth = [0, 0, 0]
+ columnWidth[0] = striped.index('TYPE')
+ columnWidth[1] = striped.index('VALUE') - striped.index('TYPE')
+ columnWidth[2] = 0
+ return
+ if inTable():
+ tableLine = tableLine + 1
+ first = striped[0 : columnWidth[0]]
+ second = striped[columnWidth[0] : columnWidth[0] + columnWidth[1]]
+ third = striped[columnWidth[0] + columnWidth[1] :]
+ if tableLine == 1:
+ if third[0] != '(':
+ killTable()
+ return
+ newRow(first, second, third)
+ return
+ if rebaseTable(striped):
+ first = striped[0 : columnWidth[0]]
+ second = striped[columnWidth[0] : columnWidth[0] + columnWidth[1]]
+ third = striped[columnWidth[0] + columnWidth[1] :]
+ first = first.rstrip()
+ second = second.rstrip()
+ third = third.rstrip()
+ if first == '' and second == '' and third != '':
+ appendRow(second, third)
+ return
+ if len(first.split()) > 1:
+ stopTable()
+ return
+ if first != '' and first[0] == ' ':
+ stopTable()
+ return
+ if first != '' and second != '' and third == '':
+ stopTable()
+ return
+ if first == '' and second != '' and second[0] != ' ':
+ if acceptType(second):
+ appendRow(second, third)
+ else:
+ stopTable()
+ return
+ if first != '' and second != '' and third[0] != '(':
+ stopTable()
+ return
+ if first == '' and second != '' and second[0] == ' ':
+ stopTable()
+ return
+ if first != '' and second != '' and third[0] == '(':
+ commitRow()
+ newRow(first, second, third)
+ return
+def generateDef():
+ global lines
+ for line in sys.stdin:
+ processLine(line)
+ print lines
+if '__main__' == __name__:
+ sys.exit(generateDef())
\ No newline at end of file