new API building Python script, does the C parsing directly, generates a
* doc/apibuild.py doc/libxml2-api.xml doc/Makefile.am: new API
building Python script, does the C parsing directly, generates
a better API description including structure fieds defs and
enums. Still a couple of bugs, but good enough for the python
wrappers now.
* DOCBparser.c SAX.c nanohttp.c parser.c parserInternals.c tree.c
valid.c xmlIO.c xmlmemory.c xmlreader.c xmlregexp.c xmlschemas.c
include/libxml/schemasInternals.h include/libxml/tree.h: more
cleanup based on the python analysis script reports.
* libxml.spec.in: make sure the API XML description is part of the
devel package.
Daniel
diff --git a/doc/apibuild.py b/doc/apibuild.py
new file mode 100755
index 0000000..8d1b204
--- /dev/null
+++ b/doc/apibuild.py
@@ -0,0 +1,1458 @@
+#!/usr/bin/python -u
+#
+# This is the API builder, it parses the C sources and build the
+# API formal description in XML.
+#
+# See Copyright for the status of this software.
+#
+# daniel@veillard.com
+#
+import sys
+import string
+import glob
+
+def escape(raw):
+ raw = string.replace(raw, '&', '&')
+ raw = string.replace(raw, '<', '<')
+ raw = string.replace(raw, '>', '>')
+ raw = string.replace(raw, "'", ''')
+ raw = string.replace(raw, '"', '"')
+ return raw
+
+class identifier:
+ def __init__(self, name, module=None, type=None, info=None, extra=None):
+ self.name = name
+ self.module = module
+ self.type = type
+ self.info = info
+ self.extra = extra
+ self.static = 0
+
+ def __repr__(self):
+ r = "%s %s:" % (self.type, self.name)
+ if self.static:
+ r = r + " static"
+ if self.module != None:
+ r = r + " from %s" % (self.module)
+ if self.info != None:
+ r = r + " " + `self.info`
+ if self.extra != None:
+ r = r + " " + `self.extra`
+ return r
+
+
+ def set_module(self, module):
+ self.module = module
+ def set_type(self, type):
+ self.type = type
+ def set_info(self, info):
+ self.info = info
+ def set_extra(self, extra):
+ self.extra = extra
+ def set_static(self, static):
+ self.static = static
+
+ def update(self, module, type = None, info = None, extra=None):
+ if module != None and self.module == None:
+ self.set_module(module)
+ if type != None and self.type == None:
+ self.set_type(type)
+ if info != None:
+ self.set_info(info)
+ if extra != None:
+ self.set_extra(extra)
+
+
+class index:
+ def __init__(self, name = "noname"):
+ self.name = name;
+ self.identifiers = {}
+ self.functions = {}
+ self.variables = {}
+ self.includes = {}
+ self.structs = {}
+ self.enums = {}
+ self.typedefs = {}
+ self.macros = {}
+ self.references = {}
+
+ def add(self, name, module, static, type, info=None, extra=None):
+ if name[0:2] == '__':
+ return None
+ d = None
+ try:
+ d = self.identifiers[name]
+ d.update(module, type, info, extra)
+ except:
+ d = identifier(name, module, type, info, extra)
+ self.identifiers[name] = d
+
+ if d != None and static == 1:
+ d.set_static(1)
+
+ if d != None and name != None and type != None:
+ if type == "function":
+ self.functions[name] = d
+ elif type == "functype":
+ self.functions[name] = d
+ elif type == "variable":
+ self.variables[name] = d
+ elif type == "include":
+ self.includes[name] = d
+ elif type == "struct":
+ self.structs[name] = d
+ elif type == "enum":
+ self.enums[name] = d
+ elif type == "typedef":
+ self.typedefs[name] = d
+ elif type == "macro":
+ self.macros[name] = d
+ else:
+ print "Unable to register type ", type
+ return d
+
+ def merge(self, idx):
+ for id in idx.functions.keys():
+ #
+ # macro might be used to override functions or variables
+ # definitions
+ #
+ if self.macros.has_key(id):
+ del self.macros[id]
+ if self.functions.has_key(id):
+ print "function %s from %s redeclared in %s" % (
+ id, self.functions[id].module, idx.functions[id].module)
+ else:
+ self.functions[id] = idx.functions[id]
+ self.identifiers[id] = idx.functions[id]
+ for id in idx.variables.keys():
+ #
+ # macro might be used to override functions or variables
+ # definitions
+ #
+ if self.macros.has_key(id):
+ del self.macros[id]
+ if self.variables.has_key(id):
+ print "variable %s from %s redeclared in %s" % (
+ id, self.variables[id].module, idx.variables[id].module)
+ else:
+ self.variables[id] = idx.variables[id]
+ self.identifiers[id] = idx.variables[id]
+ for id in idx.structs.keys():
+ if self.structs.has_key(id):
+ print "struct %s from %s redeclared in %s" % (
+ id, self.structs[id].module, idx.structs[id].module)
+ else:
+ self.structs[id] = idx.structs[id]
+ self.identifiers[id] = idx.structs[id]
+ for id in idx.typedefs.keys():
+ if self.typedefs.has_key(id):
+ print "typedef %s from %s redeclared in %s" % (
+ id, self.typedefs[id].module, idx.typedefs[id].module)
+ else:
+ self.typedefs[id] = idx.typedefs[id]
+ self.identifiers[id] = idx.typedefs[id]
+ for id in idx.macros.keys():
+ #
+ # macro might be used to override functions or variables
+ # definitions
+ #
+ if self.variables.has_key(id):
+ continue
+ if self.functions.has_key(id):
+ continue
+ if self.enums.has_key(id):
+ continue
+ if self.macros.has_key(id):
+ print "macro %s from %s redeclared in %s" % (
+ id, self.macros[id].module, idx.macros[id].module)
+ else:
+ self.macros[id] = idx.macros[id]
+ self.identifiers[id] = idx.macros[id]
+ for id in idx.enums.keys():
+ if self.enums.has_key(id):
+ print "enum %s from %s redeclared in %s" % (
+ id, self.enums[id].module, idx.enums[id].module)
+ else:
+ self.enums[id] = idx.enums[id]
+ self.identifiers[id] = idx.enums[id]
+
+ def merge_public(self, idx):
+ for id in idx.functions.keys():
+ if self.functions.has_key(id):
+ up = idx.functions[id]
+ self.functions[id].update(None, up.type, up.info, up.extra)
+ else:
+ if idx.functions[id].static == 0:
+ self.functions[id] = idx.functions[id]
+
+ def analyze_dict(self, type, dict):
+ count = 0
+ public = 0
+ for name in dict.keys():
+ id = dict[name]
+ count = count + 1
+ if id.static == 0:
+ public = public + 1
+ if count != public:
+ print " %d %s , %d public" % (count, type, public)
+ elif count != 0:
+ print " %d public %s" % (count, type)
+
+
+ def analyze(self):
+ self.analyze_dict("functions", self.functions)
+ self.analyze_dict("variables", self.variables)
+ self.analyze_dict("structs", self.structs)
+ self.analyze_dict("typedefs", self.typedefs)
+ self.analyze_dict("macros", self.macros)
+
+#
+# C parser analysis code
+#
+ignored_files = {
+ "trio": "too many non standard macros",
+ "trio.c": "too many non standard macros",
+ "trionan.c": "too many non standard macros",
+ "triostr.c": "too many non standard macros",
+ "acconfig.h": "generated portability layer",
+ "config.h": "generated portability layer",
+ "libxml.h": "internal only",
+}
+
+ignored_words = {
+ "WINAPI": (0, "Windows keyword"),
+ "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
+ "__declspec": (3, "Windows keyword"),
+ "ATTRIBUTE_UNUSED": (0, "macro keyword"),
+}
+
+class CLexer:
+ """A lexer for the C language, tokenize the input by reading and
+ analyzing it line by line"""
+ def __init__(self, input):
+ self.input = input
+ self.tokens = []
+ self.line = ""
+ self.lineno = 0
+
+ def getline(self):
+ line = ''
+ while line == '':
+ line = self.input.readline()
+ if not line:
+ return None
+ self.lineno = self.lineno + 1
+ line = string.lstrip(line)
+ line = string.rstrip(line)
+ if line == '':
+ continue
+ while line[-1] == '\\':
+ line = line[:-1]
+ n = self.input.readline()
+ self.lineno = self.lineno + 1
+ n = string.lstrip(n)
+ n = string.rstrip(n)
+ if not n:
+ break
+ else:
+ line = line + n
+ return line
+
+ def getlineno(self):
+ return self.lineno
+
+ def push(self, token):
+ self.tokens.insert(0, token);
+
+ def debug(self):
+ print "Last token: ", self.last
+ print "Token queue: ", self.tokens
+ print "Line %d end: " % (self.lineno), self.line
+
+ def token(self):
+ while self.tokens == []:
+ if self.line == "":
+ line = self.getline()
+ else:
+ line = self.line
+ self.line = ""
+ if line == None:
+ return None
+
+ if line[0] == '#':
+ self.tokens = map((lambda x: ('preproc', x)),
+ string.split(line))
+ break;
+ l = len(line)
+ if line[0] == '"' or line[0] == "'":
+ end = line[0]
+ line = line[1:]
+ found = 0
+ tok = ""
+ while found == 0:
+ i = 0
+ l = len(line)
+ while i < l:
+ if line[i] == end:
+ self.line = line[i+1:]
+ line = line[:i]
+ l = i
+ found = 1
+ break
+ if line[i] == '\\':
+ i = i + 1
+ i = i + 1
+ tok = tok + line
+ if found == 0:
+ line = self.getline()
+ if line == None:
+ return None
+ self.last = ('string', tok)
+ return self.last
+
+ if l >= 2 and line[0] == '/' and line[1] == '*':
+ line = line[2:]
+ found = 0
+ tok = ""
+ while found == 0:
+ i = 0
+ l = len(line)
+ while i < l:
+ if line[i] == '*' and i+1 < l and line[i+1] == '/':
+ self.line = line[i+2:]
+ line = line[:i-1]
+ l = i
+ found = 1
+ break
+ i = i + 1
+ if tok != "":
+ tok = tok + "\n"
+ tok = tok + line
+ if found == 0:
+ line = self.getline()
+ if line == None:
+ return None
+ self.last = ('comment', tok)
+ return self.last
+ if l >= 2 and line[0] == '/' and line[1] == '/':
+ line = line[2:]
+ self.last = ('comment', line)
+ return self.last
+ i = 0
+ while i < l:
+ if line[i] == '/' and i+1 < l and line[i+1] == '/':
+ self.line = line[i:]
+ line = line[:i]
+ break
+ if line[i] == '/' and i+1 < l and line[i+1] == '*':
+ self.line = line[i:]
+ line = line[:i]
+ break
+ if line[i] == '"' or line[i] == "'":
+ self.line = line[i:]
+ line = line[:i]
+ break
+ i = i + 1
+ l = len(line)
+ i = 0
+ while i < l:
+ if line[i] == ' ' or line[i] == '\t':
+ i = i + 1
+ continue
+ o = ord(line[i])
+ if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
+ (o >= 48 and o <= 57):
+ s = i
+ while i < l:
+ o = ord(line[i])
+ if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
+ (o >= 48 and o <= 57) or string.find(
+ " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
+ i = i + 1
+ else:
+ break
+ self.tokens.append(('name', line[s:i]))
+ continue
+ if string.find("(){}:;,[]", line[i]) != -1:
+# if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
+# line[i] == '}' or line[i] == ':' or line[i] == ';' or \
+# line[i] == ',' or line[i] == '[' or line[i] == ']':
+ self.tokens.append(('sep', line[i]))
+ i = i + 1
+ continue
+ if string.find("+-*><=/%&!|.", line[i]) != -1:
+# if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
+# line[i] == '>' or line[i] == '<' or line[i] == '=' or \
+# line[i] == '/' or line[i] == '%' or line[i] == '&' or \
+# line[i] == '!' or line[i] == '|' or line[i] == '.':
+ if line[i] == '.' and i + 2 < l and \
+ line[i+1] == '.' and line[i+2] == '.':
+ self.tokens.append(('name', '...'))
+ i = i + 3
+ continue
+
+ j = i + 1
+ if j < l and (
+ string.find("+-*><=/%&!|", line[j]) != -1):
+# line[j] == '+' or line[j] == '-' or line[j] == '*' or \
+# line[j] == '>' or line[j] == '<' or line[j] == '=' or \
+# line[j] == '/' or line[j] == '%' or line[j] == '&' or \
+# line[j] == '!' or line[j] == '|'):
+ self.tokens.append(('op', line[i:j+1]))
+ i = j + 1
+ else:
+ self.tokens.append(('op', line[i]))
+ i = i + 1
+ continue
+ s = i
+ while i < l:
+ o = ord(line[i])
+ if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
+ (o >= 48 and o <= 57) or (
+ string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
+# line[i] != ' ' and line[i] != '\t' and
+# line[i] != '(' and line[i] != ')' and
+# line[i] != '{' and line[i] != '}' and
+# line[i] != ':' and line[i] != ';' and
+# line[i] != ',' and line[i] != '+' and
+# line[i] != '-' and line[i] != '*' and
+# line[i] != '/' and line[i] != '%' and
+# line[i] != '&' and line[i] != '!' and
+# line[i] != '|' and line[i] != '[' and
+# line[i] != ']' and line[i] != '=' and
+# line[i] != '*' and line[i] != '>' and
+# line[i] != '<'):
+ i = i + 1
+ else:
+ break
+ self.tokens.append(('name', line[s:i]))
+
+ tok = self.tokens[0]
+ self.tokens = self.tokens[1:]
+ self.last = tok
+ return tok
+
+class CParser:
+ """The C module parser"""
+ def __init__(self, filename, idx = None):
+ self.filename = filename
+ if len(filename) > 2 and filename[-2:] == '.h':
+ self.is_header = 1
+ else:
+ self.is_header = 0
+ self.input = open(filename)
+ self.lexer = CLexer(self.input)
+ if idx == None:
+ self.index = index()
+ else:
+ self.index = idx
+ self.top_comment = ""
+ self.last_comment = ""
+ self.comment = None
+
+ def lineno(self):
+ return self.lexer.getlineno()
+
+ def error(self, msg, token=-1):
+ print "Parse Error: " + msg
+ if token != -1:
+ print "Got token ", token
+ self.lexer.debug()
+ sys.exit(1)
+
+ def debug(self, msg, token=-1):
+ print "Debug: " + msg
+ if token != -1:
+ print "Got token ", token
+ self.lexer.debug()
+
+ def parseComment(self, token):
+ if self.top_comment == "":
+ self.top_comment = token[1]
+ if self.comment == None or token[1][0] == '*':
+ self.comment = token[1];
+ else:
+ self.comment = self.comment + token[1]
+ token = self.lexer.token()
+ return token
+
+ #
+ # Parse a comment block associate to a macro
+ #
+ def parseMacroComment(self, name, quiet = 0):
+ if name[0:2] == '__':
+ quiet = 1
+
+ args = []
+ desc = ""
+
+ if self.comment == None:
+ if not quiet:
+ print "Missing comment for macro %s" % (name)
+ return((args, desc))
+ if self.comment[0] != '*':
+ if not quiet:
+ print "Missing * in macro comment for %s" % (name)
+ return((args, desc))
+ lines = string.split(self.comment, '\n')
+ if lines[0] == '*':
+ del lines[0]
+ if lines[0] != "* %s:" % (name):
+ if not quiet:
+ print "Misformatted macro comment for %s" % (name)
+ print " Expecting '* %s:' got '%s'" % (name, lines[0])
+ return((args, desc))
+ del lines[0]
+ while lines[0] == '*':
+ del lines[0]
+ while len(lines) > 0 and lines[0][0:3] == '* @':
+ l = lines[0][3:]
+ try:
+ (arg, desc) = string.split(l, ':', 1)
+ desc=string.strip(desc)
+ arg=string.strip(arg)
+ except:
+ if not quiet:
+ print "Misformatted macro comment for %s" % (name)
+ print " problem with '%s'" % (lines[0])
+ del lines[0]
+ continue
+ del lines[0]
+ l = string.strip(lines[0])
+ while len(l) > 2 and l[0:3] != '* @':
+ while l[0] == '*':
+ l = l[1:]
+ desc = desc + ' ' + string.strip(l)
+ del lines[0]
+ if len(lines) == 0:
+ break
+ l = lines[0]
+ args.append((arg, desc))
+ while len(lines) > 0 and lines[0] == '*':
+ del lines[0]
+ desc = ""
+ while len(lines) > 0:
+ l = lines[0]
+ while len(l) > 0 and l[0] == '*':
+ l = l[1:]
+ l = string.strip(l)
+ desc = desc + " " + l
+ del lines[0]
+
+ desc = string.strip(desc)
+
+ if quiet == 0:
+ if desc == "":
+ print "Macro comment for %s lack description of the macro" % (name)
+
+ return((args, desc))
+
+ #
+ # Parse a comment block and merge the informations found in the
+ # parameters descriptions, finally returns a block as complete
+ # as possible
+ #
+ def mergeFunctionComment(self, name, description, quiet = 0):
+ if name == 'main':
+ quiet = 1
+ if name[0:2] == '__':
+ quiet = 1
+
+ (ret, args) = description
+ desc = ""
+ retdesc = ""
+
+ if self.comment == None:
+ if not quiet:
+ print "Missing comment for function %s" % (name)
+ return(((ret[0], retdesc), args, desc))
+ if self.comment[0] != '*':
+ if not quiet:
+ print "Missing * in function comment for %s" % (name)
+ return(((ret[0], retdesc), args, desc))
+ lines = string.split(self.comment, '\n')
+ if lines[0] == '*':
+ del lines[0]
+ if lines[0] != "* %s:" % (name):
+ if not quiet:
+ print "Misformatted function comment for %s" % (name)
+ print " Expecting '* %s:' got '%s'" % (name, lines[0])
+ return(((ret[0], retdesc), args, desc))
+ del lines[0]
+ while lines[0] == '*':
+ del lines[0]
+ nbargs = len(args)
+ while len(lines) > 0 and lines[0][0:3] == '* @':
+ l = lines[0][3:]
+ try:
+ (arg, desc) = string.split(l, ':', 1)
+ desc=string.strip(desc)
+ arg=string.strip(arg)
+ except:
+ if not quiet:
+ print "Misformatted function comment for %s" % (name)
+ print " problem with '%s'" % (lines[0])
+ del lines[0]
+ continue
+ del lines[0]
+ l = string.strip(lines[0])
+ while len(l) > 2 and l[0:3] != '* @':
+ while l[0] == '*':
+ l = l[1:]
+ desc = desc + ' ' + string.strip(l)
+ del lines[0]
+ if len(lines) == 0:
+ break
+ l = lines[0]
+ i = 0
+ while i < nbargs:
+ if args[i][1] == arg:
+ args[i] = (args[i][0], arg, desc)
+ break;
+ i = i + 1
+ if i >= nbargs:
+ if not quiet:
+ print "Uname to find arg %s from function comment for %s" % (
+ arg, name)
+ while len(lines) > 0 and lines[0] == '*':
+ del lines[0]
+ desc = ""
+ while len(lines) > 0:
+ l = lines[0]
+ while len(l) > 0 and l[0] == '*':
+ l = l[1:]
+ l = string.strip(l)
+ if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
+ try:
+ l = string.split(l, ' ', 1)[1]
+ except:
+ l = ""
+ retdesc = string.strip(l)
+ del lines[0]
+ while len(lines) > 0:
+ l = lines[0]
+ while len(l) > 0 and l[0] == '*':
+ l = l[1:]
+ l = string.strip(l)
+ retdesc = retdesc + " " + l
+ del lines[0]
+ else:
+ desc = desc + " " + l
+ del lines[0]
+
+ retdesc = string.strip(retdesc)
+ desc = string.strip(desc)
+
+ if quiet == 0:
+ #
+ # report missing comments
+ #
+ i = 0
+ while i < nbargs:
+ if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
+ print "Function comment for %s lack description of arg %s" % (name, args[i][1])
+ i = i + 1
+ if retdesc == "" and ret[0] != "void":
+ print "Function comment for %s lack description of return value" % (name)
+ if desc == "":
+ print "Function comment for %s lack description of the function" % (name)
+
+
+ return(((ret[0], retdesc), args, desc))
+
+ def parsePreproc(self, token):
+ name = token[1]
+ if name == "#include":
+ token = self.lexer.token()
+ if token == None:
+ return None
+ if token[0] == 'preproc':
+ self.index.add(token[1], self.filename, not self.is_header,
+ "include")
+ return self.lexer.token()
+ return token
+ if name == "#define":
+ token = self.lexer.token()
+ if token == None:
+ return None
+ if token[0] == 'preproc':
+ # TODO macros with arguments
+ name = token[1]
+ lst = []
+ token = self.lexer.token()
+ while token != None and token[0] == 'preproc' and \
+ token[1][0] != '#':
+ lst.append(token[1])
+ token = self.lexer.token()
+ try:
+ name = string.split(name, '(') [0]
+ except:
+ pass
+ info = self.parseMacroComment(name, not self.is_header)
+ self.index.add(name, self.filename, not self.is_header,
+ "macro", info)
+ return token
+ token = self.lexer.token()
+ while token != None and token[0] == 'preproc' and \
+ token[1][0] != '#':
+ token = self.lexer.token()
+ return token
+
+ #
+ # token acquisition on top of the lexer, it handle internally
+ # preprocessor and comments since they are logically not part of
+ # the program structure.
+ #
+ def token(self):
+ global ignored_words
+
+ token = self.lexer.token()
+ while token != None:
+ if token[0] == 'comment':
+ token = self.parseComment(token)
+ continue
+ elif token[0] == 'preproc':
+ token = self.parsePreproc(token)
+ continue
+ elif token[0] == "name" and ignored_words.has_key(token[1]):
+ (n, info) = ignored_words[token[1]]
+ i = 0
+ while i < n:
+ token = self.lexer.token()
+ i = i + 1
+ token = self.lexer.token()
+ continue
+ else:
+ #print "=> ", token
+ return token
+ return None
+
+ #
+ # Parse a typedef, it records the type and its name.
+ #
+ def parseTypedef(self, token):
+ if token == None:
+ return None
+ token = self.parseType(token)
+ if token == None:
+ self.error("parsing typedef")
+ return None
+ base_type = self.type
+ type = base_type
+ #self.debug("end typedef type", token)
+ while token != None:
+ if token[0] == "name":
+ name = token[1]
+ signature = self.signature
+ if signature != None:
+ d = self.mergeFunctionComment(name,
+ ((type, None), signature), 1)
+ self.index.add(name, self.filename, not self.is_header,
+ "functype", d)
+ else:
+ if base_type == "struct":
+ self.index.add(name, self.filename, not self.is_header,
+ "struct", type)
+ base_type = "struct " + name
+ else:
+ self.index.add(name, self.filename, not self.is_header,
+ "typedef", type)
+ token = self.token()
+ else:
+ self.error("parsing typedef: expecting a name")
+ return token
+ #self.debug("end typedef", token)
+ if token != None and token[0] == 'sep' and token[1] == ',':
+ type = base_type
+ token = self.token()
+ while token != None and token[0] == "op":
+ type = type + token[1]
+ token = self.token()
+ elif token != None and token[0] == 'sep' and token[1] == ';':
+ break;
+ elif token != None and token[0] == 'name':
+ type = base_type
+ continue;
+ else:
+ self.error("parsing typedef: expecting ';'", token)
+ return token
+ token = self.token()
+ return token
+
+ #
+ # Parse a C code block, used for functions it parse till
+ # the balancing } included
+ #
+ def parseBlock(self, token):
+ while token != None:
+ if token[0] == "sep" and token[1] == "{":
+ token = self.token()
+ token = self.parseBlock(token)
+ elif token[0] == "sep" and token[1] == "}":
+ self.comment = None
+ token = self.token()
+ return token
+ else:
+ token = self.token()
+ return token
+
+ #
+ # Parse a C struct definition till the balancing }
+ #
+ def parseStruct(self, token):
+ fields = []
+ #self.debug("start parseStruct", token)
+ while token != None:
+ if token[0] == "sep" and token[1] == "{":
+ token = self.token()
+ token = self.parseTypeBlock(token)
+ elif token[0] == "sep" and token[1] == "}":
+ self.struct_fields = fields
+ #self.debug("end parseStruct", token)
+ #print fields
+ token = self.token()
+ return token
+ else:
+ base_type = self.type
+ #self.debug("before parseType", token)
+ token = self.parseType(token)
+ #self.debug("after parseType", token)
+ if token != None and token[0] == "name":
+ fname = token[1]
+ token = self.token()
+ if token[0] == "sep" and token[1] == ";":
+ self.comment = None
+ token = self.token()
+ fields.append((self.type, fname, self.comment))
+ self.comment = None
+ else:
+ self.error("parseStruct: expecting ;", token)
+ elif token != None and token[0] == "sep" and token[1] == "{":
+ token = self.token()
+ token = self.parseTypeBlock(token)
+ if token != None and token[0] == "name":
+ token = self.token()
+ if token != None and token[0] == "sep" and token[1] == ";":
+ token = self.token()
+ else:
+ self.error("parseStruct: expecting ;", token)
+ else:
+ self.error("parseStruct: name", token)
+ token = self.token()
+ self.type = base_type;
+ self.struct_fields = fields
+ #self.debug("end parseStruct", token)
+ #print fields
+ return token
+
+ #
+ # Parse a C enum block, parse till the balancing }
+ #
+ def parseEnumBlock(self, token):
+ self.enums = []
+ name = None
+ self.comment = None
+ comment = ""
+ value = ""
+ while token != None:
+ if token[0] == "sep" and token[1] == "{":
+ token = self.token()
+ token = self.parseTypeBlock(token)
+ elif token[0] == "sep" and token[1] == "}":
+ if name != None:
+ if self.comment != None:
+ comment = self.comment
+ self.comment = None
+ self.enums.append((name, value, comment))
+ token = self.token()
+ return token
+ elif token[0] == "name":
+ if name != None:
+ if self.comment != None:
+ comment = string.strip(self.comment)
+ self.comment = None
+ self.enums.append((name, value, comment))
+ name = token[1]
+ comment = ""
+ value = ""
+ token = self.token()
+ if token[0] == "op" and token[1][0] == "=":
+ if len(token[1]) > 1:
+ value = token[1][1:]
+ token = self.token()
+ while token[0] != "sep" or (token[1] != ',' and
+ token[1] != '}'):
+ value = value + token[1]
+ token = self.token()
+ if token[0] == "sep" and token[1] == ",":
+ token = self.token()
+ else:
+ token = self.token()
+ return token
+
+ #
+ # Parse a C definition block, used for structs it parse till
+ # the balancing }
+ #
+ def parseTypeBlock(self, token):
+ while token != None:
+ if token[0] == "sep" and token[1] == "{":
+ token = self.token()
+ token = self.parseTypeBlock(token)
+ elif token[0] == "sep" and token[1] == "}":
+ token = self.token()
+ return token
+ else:
+ token = self.token()
+ return token
+
+ #
+ # Parse a type: the fact that the type name can either occur after
+ # the definition or within the definition makes it a little harder
+ # if inside, the name token is pushed back before returning
+ #
+ def parseType(self, token):
+ self.type = ""
+ self.struct_fields = []
+ self.signature = None
+ if token == None:
+ return token
+
+ while token[0] == "name" and (
+ token[1] == "const" or token[1] == "unsigned"):
+ if self.type == "":
+ self.type = token[1]
+ else:
+ self.type = self.type + " " + token[1]
+ token = self.token()
+
+ if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
+ if self.type == "":
+ self.type = token[1]
+ else:
+ self.type = self.type + " " + token[1]
+ if token[0] == "name" and token[1] == "int":
+ if self.type == "":
+ self.type = tmp[1]
+ else:
+ self.type = self.type + " " + tmp[1]
+
+ elif token[0] == "name" and token[1] == "struct":
+ if self.type == "":
+ self.type = token[1]
+ else:
+ self.type = self.type + " " + token[1]
+ token = self.token()
+ nametok = None
+ if token[0] == "name":
+ nametok = token
+ token = self.token()
+ if token != None and token[0] == "sep" and token[1] == "{":
+ token = self.token()
+ token = self.parseStruct(token)
+ elif token != None and token[0] == "op" and token[1] == "*":
+ self.type = self.type + " " + nametok[1] + " *"
+ token = self.token()
+ while token != None and token[0] == "op" and token[1] == "*":
+ self.type = self.type + " *"
+ token = self.token()
+ if token[0] == "name":
+ nametok = token
+ token = self.token()
+ else:
+ self.error("struct : expecting name", token)
+ return token
+ elif token != None and token[0] == "name" and nametok != None:
+ self.type = self.type + " " + nametok[1]
+ return token
+
+ if nametok != None:
+ self.lexer.push(token)
+ token = nametok
+ return token
+
+ elif token[0] == "name" and token[1] == "enum":
+ if self.type == "":
+ self.type = token[1]
+ else:
+ self.type = self.type + " " + token[1]
+ self.enums = []
+ token = self.token()
+ if token != None and token[0] == "sep" and token[1] == "{":
+ token = self.token()
+ token = self.parseEnumBlock(token)
+ else:
+ self.error("parsing enum: expecting '{'", token)
+ enum_type = None
+ if token != None and token[0] != "name":
+ self.lexer.push(token)
+ token = ("name", "enum")
+ else:
+ enum_type = token[1]
+ for enum in self.enums:
+ self.index.add(enum[0], self.filename,
+ not self.is_header, "enum",
+ (enum[1], enum[2], enum_type))
+ return token
+
+ elif token[0] == "name":
+ if self.type == "":
+ self.type = token[1]
+ else:
+ self.type = self.type + " " + token[1]
+ else:
+ self.error("parsing type %s: expecting a name" % (self.type),
+ token)
+ return token
+ token = self.token()
+ while token != None and (token[0] == "op" or
+ token[0] == "name" and token[1] == "const"):
+ self.type = self.type + " " + token[1]
+ token = self.token()
+
+ #
+ # if there is a parenthesis here, this means a function type
+ #
+ if token != None and token[0] == "sep" and token[1] == '(':
+ self.type = self.type + token[1]
+ token = self.token()
+ while token != None and token[0] == "op" and token[1] == '*':
+ self.type = self.type + token[1]
+ token = self.token()
+ if token == None or token[0] != "name" :
+ self.error("parsing function type, name expected", token);
+ return token
+ self.type = self.type + token[1]
+ nametok = token
+ token = self.token()
+ if token != None and token[0] == "sep" and token[1] == ')':
+ self.type = self.type + token[1]
+ token = self.token()
+ if token != None and token[0] == "sep" and token[1] == '(':
+ token = self.token()
+ type = self.type;
+ token = self.parseSignature(token);
+ self.type = type;
+ else:
+ self.error("parsing function type, '(' expected", token);
+ return token
+ else:
+ self.error("parsing function type, ')' expected", token);
+ return token
+ self.lexer.push(token)
+ token = nametok
+ return token
+
+ #
+ # do some lookahead for arrays
+ #
+ if token != None and token[0] == "name":
+ nametok = token
+ token = self.token()
+ if token != None and token[0] == "sep" and token[1] == '[':
+ self.type = self.type + nametok[1]
+ while token != None and token[0] == "sep" and token[1] == '[':
+ self.type = self.type + token[1]
+ token = self.token()
+ while token != None and token[0] != 'sep' and \
+ token[1] != ']' and token[1] != ';':
+ self.type = self.type + token[1]
+ token = self.token()
+ if token != None and token[0] == 'sep' and token[1] == ']':
+ self.type = self.type + token[1]
+ token = self.token()
+ else:
+ self.error("parsing array type, ']' expected", token);
+ return token
+ elif token != None and token[0] == "sep" and token[1] == ':':
+ # remove :12 in case it's a limited int size
+ token = self.token()
+ token = self.token()
+ self.lexer.push(token)
+ token = nametok
+
+ return token
+
+ #
+ # Parse a signature: '(' has been parsed and we scan the type definition
+ # up to the ')' included
+ def parseSignature(self, token):
+ signature = []
+ if token != None and token[0] == "sep" and token[1] == ')':
+ self.signature = []
+ token = self.token()
+ return token
+ while token != None:
+ token = self.parseType(token)
+ if token != None and token[0] == "name":
+ signature.append((self.type, token[1], None))
+ token = self.token()
+ elif token != None and token[0] == "sep" and token[1] == ',':
+ token = self.token()
+ continue
+ elif token != None and token[0] == "sep" and token[1] == ')':
+ # only the type was provided
+ if self.type == "...":
+ signature.append((self.type, "...", None))
+ else:
+ signature.append((self.type, None, None))
+ if token != None and token[0] == "sep":
+ if token[1] == ',':
+ token = self.token()
+ continue
+ elif token[1] == ')':
+ token = self.token()
+ break
+ self.signature = signature
+ return token
+
+ #
+ # Parse a global definition, be it a type, variable or function
+ # the extern "C" blocks are a bit nasty and require it to recurse.
+ #
+ def parseGlobal(self, token):
+ static = 0
+ if token[1] == 'extern':
+ token = self.token()
+ if token == None:
+ return token
+ if token[0] == 'string':
+ if token[1] == 'C':
+ token = self.token()
+ if token == None:
+ return token
+ if token[0] == 'sep' and token[1] == "{":
+ token = self.token()
+# print 'Entering extern "C line ', self.lineno()
+ while token != None and (token[0] != 'sep' or
+ token[1] != "}"):
+ if token[0] == 'name':
+ token = self.parseGlobal(token)
+ else:
+ self.error(
+ "token %s %s unexpected at the top level" % (
+ token[0], token[1]))
+ token = self.parseGlobal(token)
+# print 'Exiting extern "C" line', self.lineno()
+ token = self.token()
+ return token
+ else:
+ return token
+ elif token[1] == 'static':
+ static = 1
+ token = self.token()
+ if token == None or token[0] != 'name':
+ return token
+
+ if token[1] == 'typedef':
+ token = self.token()
+ return self.parseTypedef(token)
+ else:
+ token = self.parseType(token)
+ type_orig = self.type
+ if token == None or token[0] != "name":
+ return token
+ type = type_orig
+ self.name = token[1]
+ token = self.token()
+ while token != None and (token[0] == "sep" or token[0] == "op"):
+ if token[0] == "sep":
+ if token[1] == "[":
+ type = type + token[1]
+ token = self.token()
+ while token != None and (token[0] != "sep" or \
+ token[1] != ";"):
+ type = type + token[1]
+ token = self.token()
+
+ if token != None and token[0] == "op" and token[1] == "=":
+ #
+ # Skip the initialization of the variable
+ #
+ token = self.token()
+ if token[0] == 'sep' and token[1] == '{':
+ token = self.token()
+ token = self.parseBlock(token)
+ else:
+ self.comment = None
+ while token != None and (token[0] != "sep" or \
+ (token[1] != ';' and token[1] != ',')):
+ token = self.token()
+ self.comment = None
+ if token == None or token[0] != "sep" or (token[1] != ';' and
+ token[1] != ','):
+ self.error("missing ';' or ',' after value")
+
+ if token != None and token[0] == "sep":
+ if token[1] == ";":
+ self.comment = None
+ token = self.token()
+ if type == "struct":
+ self.index.add(self.name, self.filename,
+ not self.is_header, "struct", self.struct_fields)
+ else:
+ self.index.add(self.name, self.filename,
+ not self.is_header, "variable", type)
+ break
+ elif token[1] == "(":
+ token = self.token()
+ token = self.parseSignature(token)
+ if token == None:
+ return None
+ if token[0] == "sep" and token[1] == ";":
+ d = self.mergeFunctionComment(self.name,
+ ((type, None), self.signature), 1)
+ self.index.add(self.name, self.filename, static,
+ "function", d)
+ token = self.token()
+ if token[0] == "sep" and token[1] == "{":
+ d = self.mergeFunctionComment(self.name,
+ ((type, None), self.signature), static)
+ self.index.add(self.name, self.filename, static,
+ "function", d)
+ token = self.token()
+ token = self.parseBlock(token);
+ elif token[1] == ',':
+ self.comment = None
+ self.index.add(self.name, self.filename, static,
+ "variable", type)
+ type = type_orig
+ token = self.token()
+ while token != None and token[0] == "sep":
+ type = type + token[1]
+ token = self.token()
+ if token != None and token[0] == "name":
+ self.name = token[1]
+ token = self.token()
+ else:
+ break
+
+ return token
+
+ def parse(self):
+ print "Parsing %s" % (self.filename)
+ token = self.token()
+ while token != None:
+ if token[0] == 'name':
+ token = self.parseGlobal(token)
+ else:
+ self.error("token %s %s unexpected at the top level" % (
+ token[0], token[1]))
+ token = self.parseGlobal(token)
+ return
+ return self.index
+
+
+class docBuilder:
+ """A documentation builder"""
+ def __init__(self, name, directories=['.'], excludes=[]):
+ self.name = name
+ self.directories = directories
+ self.excludes = excludes + ignored_files.keys()
+ self.modules = {}
+ self.headers = {}
+ self.idx = index()
+
+ def analyze(self):
+ print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
+ self.idx.analyze()
+
+ def scanHeaders(self):
+ for header in self.headers.keys():
+ parser = CParser(header)
+ idx = parser.parse()
+ self.headers[header] = idx;
+ self.idx.merge(idx)
+
+ def scanModules(self):
+ for module in self.modules.keys():
+ parser = CParser(module)
+ idx = parser.parse()
+ # idx.analyze()
+ self.modules[module] = idx
+ self.idx.merge_public(idx)
+
+ def scan(self):
+ for directory in self.directories:
+ files = glob.glob(directory + "/*.c")
+ for file in files:
+ skip = 0
+ for excl in self.excludes:
+ if string.find(file, excl) != -1:
+ skip = 1;
+ break
+ if skip == 0:
+ self.modules[file] = None;
+ files = glob.glob(directory + "/*.h")
+ for file in files:
+ skip = 0
+ for excl in self.excludes:
+ if string.find(file, excl) != -1:
+ skip = 1;
+ break
+ if skip == 0:
+ self.headers[file] = None;
+ self.scanHeaders()
+ self.scanModules()
+
+ def modulename_file(self, file):
+ module = string.split(file, '/')[-1]
+ if module[-2:] == '.h':
+ module = module[:-2]
+ return module
+
+ def serialize_enum(self, output, name):
+ id = self.idx.enums[name]
+ output.write(" <enum name='%s' file='%s'" % (name,
+ self.modulename_file(id.module)))
+ if id.info != None:
+ info = id.info
+ if info[0] != None and info[0] != '':
+ output.write(" value='%s'" % info[0]);
+ if info[2] != None and info[2] != '':
+ output.write(" type='%s'" % info[2]);
+ if info[1] != None and info[1] != '':
+ output.write(" info='%s'" % escape(info[1]));
+ output.write("/>\n")
+
+ def serialize_macro(self, output, name):
+ id = self.idx.macros[name]
+ output.write(" <macro name='%s' file='%s'>\n" % (name,
+ self.modulename_file(id.module)))
+ if id.info != None:
+ try:
+ (args, desc) = id.info
+ if desc != None and desc != "":
+ output.write(" <info>%s</info>\n" % (escape(desc)))
+ for arg in args:
+ (name, desc) = arg
+ if desc != None and desc != "":
+ output.write(" <arg name='%s' info='%s'/>\n" % (
+ name, escape(desc)))
+ else:
+ output.write(" <arg name='%s'/>\n" % (name))
+ except:
+ pass
+ output.write(" </macro>\n")
+
+ def serialize_typedef(self, output, name):
+ id = self.idx.typedefs[name]
+ if id.info[0:7] == 'struct ':
+ output.write(" <struct name='%s' file='%s' type='%s'" % (
+ name, self.modulename_file(id.module), id.info))
+ name = id.info[7:]
+ if self.idx.structs.has_key(name):
+ output.write(">\n");
+ for field in self.idx.structs[name].info:
+ desc = field[2]
+ if desc == None:
+ desc = ''
+ else:
+ desc = escape(desc)
+ output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
+ output.write(" </struct>\n")
+ else:
+ output.write("/>\n");
+ else :
+ output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % (
+ name, self.modulename_file(id.module), id.info))
+
+ def serialize_function(self, output, name):
+ id = self.idx.functions[name]
+ output.write(" <%s name='%s' file='%s'>\n" % (id.type, name,
+ self.modulename_file(id.module)))
+ try:
+ (ret, params, desc) = id.info
+ output.write(" <info>%s</info>\n" % (escape(desc)))
+ if ret[0] != None:
+ if ret[0] == "void":
+ output.write(" <return type='void'/>\n")
+ else:
+ output.write(" <return type='%s' info='%s'/>\n" % (
+ ret[0], escape(ret[1])))
+ for param in params:
+ if param[0] == 'void':
+ continue
+ if param[2] == None:
+ output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
+ else:
+ output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
+ except:
+ print "Failed to save function %s info: " % name, `id.info`
+ output.write(" </%s>\n" % (id.type))
+
+ def serialize_exports(self, output, file):
+ module = self.modulename_file(file)
+ output.write(" <file name='%s'>\n" % (module))
+ dict = self.headers[file]
+ ids = dict.functions.keys() + dict.variables.keys() + \
+ dict.macros.keys() + dict.typedefs.keys() + \
+ dict.structs.keys() + dict.enums.keys()
+ ids.sort()
+ for id in ids:
+ output.write(" <exports symbol='%s'/>\n" % (id))
+ output.write(" </file>\n")
+
+
+ def serialize(self, filename = None):
+ if filename == None:
+ filename = "%s-api.xml" % self.name
+ print "Saving XML description %s" % (filename)
+ output = open(filename, "w")
+ output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
+ output.write("<api name='%s'>\n" % self.name)
+ output.write(" <files>\n")
+ for file in self.headers.keys():
+ self.serialize_exports(output, file)
+ output.write(" </files>\n")
+ output.write(" <symbols>\n")
+ macros = self.idx.macros.keys()
+ macros.sort()
+ for macro in macros:
+ self.serialize_macro(output, macro)
+ enums = self.idx.enums.keys()
+ enums.sort()
+ for enum in enums:
+ self.serialize_enum(output, enum)
+ typedefs = self.idx.typedefs.keys()
+ typedefs.sort()
+ for typedef in typedefs:
+ self.serialize_typedef(output, typedef)
+ functions = self.idx.functions.keys()
+ functions.sort()
+ for function in functions:
+ self.serialize_function(output, function)
+ output.write(" </symbols>\n")
+ output.write("</api>\n")
+ output.close()
+
+
+def rebuild():
+ builder = None
+ if glob.glob("../parser.c") != [] :
+ print "Rebuilding API description for libxml2"
+ builder = docBuilder("libxml2", ["..", "../include/libxml"],
+ ["xmlwin32version.h", "tst.c"])
+ elif glob.glob("../libxslt/transform.c") != [] :
+ print "Rebuilding API description for libxslt"
+ builder = docBuilder("libxslt", ["../libxslt"],
+ ["win32config.h", "tst.c"])
+ else:
+ print "rebuild() failed, unable to guess the module"
+ return None
+ builder.scan()
+ builder.analyze()
+ builder.serialize()
+ return builder
+
+#
+# for debugging the parser
+#
+def parse(filename):
+ parser = CParser(filename)
+ idx = parser.parse()
+ return idx
+
+if __name__ == "__main__":
+ rebuild()