blob: ffc4c045dd9d06080d20c70ffefdf500a2b6e9c6 [file] [log] [blame]
#! /usr/bin/env python
"""This script builds the Lib/fontTools/ttLib/tables/otData.py file
from the OpenType HTML documentation. However, it depends on a slightly
patched version the the HTML, as there are some inconsistencies in the
markup and the naming of certain fields. See doco.diff for differences,
but this is probably against a slightly older version of the documentation
than what is currently online. The documentation was taken from this URL:
http://www.microsoft.com/typography/otspec/default.htm
"""
from sgmllib import SGMLParser
class HTMLParser(SGMLParser):
def __init__(self):
SGMLParser.__init__(self)
self.data = None
self.currenttable = None
self.lastcaption = None
def handle_data(self, data):
if self.data is not None:
self.data.append(data)
def start_i(self, attrs):
if self.currenttable is None:
self.data = []
def end_i(self):
if self.currenttable is None:
self.lastcaption = " ".join(self.data)
self.data = None
def start_b(self, attrs):
if self.currenttable is None:
self.data = []
def end_b(self):
if self.currenttable is None:
self.lastcaption = " ".join(self.data)
self.data = None
def start_table(self, attrs):
attrs = dict(attrs)
if attrs.get('width') in ('455', '460'):
#print "---", attrs
self.currenttable = []
else:
self.currenttable = None
def end_table(self):
if self.currenttable is not None and self.lastcaption is not None:
if self.currenttable[0] == ['Type', 'Name', 'Description'] or \
self.currenttable[0] == ['Value', 'Type', 'Description']:
caption = self.lastcaption.split()
name = caption[0]
if name == "LookupType" or name == "LookupFlag":
self.currenttable = None
return
elif name == "Device":
if "Tables" in caption:
# XXX skip this one
self.currenttable = None
return
buildTable(name, self.currenttable[1:], self.lastcaption)
self.currenttable = None
def start_tr(self, attrs):
if self.currenttable is not None:
self.currenttable.append([])
def end_tr(self):
pass
def start_td(self, attrs):
self.data = []
def end_td(self):
if self.currenttable is not None and self.data is not None:
self.currenttable[-1].append(" ".join(self.data))
self.data = None
globalDups = {}
localDups = {}
not3 = []
def buildTable(name, table, caption):
if globalDups.has_key(name):
globalDups[name].append(caption)
else:
globalDups[name] = [caption]
print "\t(%s, [" % repr(name)
allFields = {}
for row in table:
row = [" ".join(x.split()) for x in row]
if len(row) <> 3:
not3.append(row)
row = makeRow(row)
fieldName = row[1]
if allFields.has_key(fieldName):
key = (name, fieldName)
localDups[key] = 1
allFields[fieldName] = 1
print "\t\t%s," % (tuple(row),)
print "\t]),"
print
def makeRow(rawRow):
tp, name = rawRow[:2]
name = name.strip()
rest = tuple(rawRow[2:])
if '[' in name:
name, repeat = name.split("[")
name = name.strip()
assert repeat[-1] == "]"
repeat = repeat[:-1].split()
if repeat[1:]:
repeatOffset = int("".join(repeat[1:]))
else:
repeatOffset = 0
if not repeat:
repeat = ""
else:
repeat = repeat[0]
else:
repeat = None
repeatOffset = None
row = (tp, name, repeat, repeatOffset) + rest
return row
if __name__ == "__main__":
import sys, os
if "-" not in sys.argv:
sys.stdout = open("otData.py", "w")
print "otData = ["
for file in ["chapter2.htm", "gpos.htm", "gsub.htm", "gdef.htm", "base.htm", "jstf.htm"]:
name = os.path.splitext(file)[0]
if name == "chapter2":
name = "common"
print
print "\t#"
print "\t# %s (generated from %s)" % (name, file)
print "\t#"
print
p = HTMLParser()
p.feed(open(file).read())
p.close()
print "]"
print
for k, v in globalDups.items():
if len(v) > 1:
print "# XXX duplicate table name:", k, v
for (name, fieldName), v in localDups.items():
print "# XXX duplicate field name '%s' in table '%s'" % (fieldName, name)
for n in not3:
print "#XXX", not3