| """Parse a Python file and retrieve classes and methods. | 
 |  | 
 | Parse enough of a Python file to recognize class and method | 
 | definitions and to find out the superclasses of a class. | 
 |  | 
 | The interface consists of a single function: | 
 | 	readmodule(module, path) | 
 | module is the name of a Python module, path is an optional list of | 
 | directories where the module is to be searched.  If present, path is | 
 | prepended to the system search path sys.path. | 
 | The return value is a dictionary.  The keys of the dictionary are | 
 | the names of the classes defined in the module (including classes | 
 | that are defined via the from XXX import YYY construct).  The values | 
 | are class instances of the class Class defined here. | 
 |  | 
 | A class is described by the class Class in this module.  Instances | 
 | of this class have the following instance variables: | 
 | 	name -- the name of the class | 
 | 	super -- a list of super classes (Class instances) | 
 | 	methods -- a dictionary of methods | 
 | 	file -- the file in which the class was defined | 
 | 	lineno -- the line in the file on which the class statement occurred | 
 | The dictionary of methods uses the method names as keys and the line | 
 | numbers on which the method was defined as values. | 
 | If the name of a super class is not recognized, the corresponding | 
 | entry in the list of super classes is not a class instance but a | 
 | string giving the name of the super class.  Since import statements | 
 | are recognized and imported modules are scanned as well, this | 
 | shouldn't happen often. | 
 |  | 
 | BUGS | 
 | - Continuation lines are not dealt with at all. | 
 | - While triple-quoted strings won't confuse it, lines that look like | 
 |   def, class, import or "from ... import" stmts inside backslash-continued | 
 |   single-quoted strings are treated like code.  The expense of stopping | 
 |   that isn't worth it. | 
 | - Code that doesn't pass tabnanny or python -t will confuse it, unless | 
 |   you set the module TABWIDTH vrbl (default 8) to the correct tab width | 
 |   for the file. | 
 |  | 
 | PACKAGE RELATED BUGS | 
 | - If you have a package and a module inside that or another package | 
 |   with the same name, module caching doesn't work properly since the | 
 |   key is the base name of the module/package. | 
 | - The only entry that is returned when you readmodule a package is a | 
 |   __path__ whose value is a list which confuses certain class browsers. | 
 | - When code does: | 
 |   from package import subpackage | 
 |   class MyClass(subpackage.SuperClass): | 
 |     ... | 
 |   It can't locate the parent.  It probably needs to have the same | 
 |   hairy logic that the import locator already does.  (This logic | 
 |   exists coded in Python in the freeze package.) | 
 | """ | 
 |  | 
 | import os | 
 | import sys | 
 | import imp | 
 | import re | 
 | import string | 
 |  | 
 | TABWIDTH = 8 | 
 |  | 
 | _getnext = re.compile(r""" | 
 |     (?P<String> | 
 |        \""" [^"\\]* (?: | 
 | 			(?: \\. | "(?!"") ) | 
 | 			[^"\\]* | 
 | 		    )* | 
 |        \""" | 
 |  | 
 |     |   ''' [^'\\]* (?: | 
 | 			(?: \\. | '(?!'') ) | 
 | 			[^'\\]* | 
 | 		    )* | 
 | 	''' | 
 |     ) | 
 |  | 
 | |   (?P<Method> | 
 | 	^ | 
 | 	(?P<MethodIndent> [ \t]* ) | 
 | 	def [ \t]+ | 
 | 	(?P<MethodName> [a-zA-Z_] \w* ) | 
 | 	[ \t]* \( | 
 |     ) | 
 |  | 
 | |   (?P<Class> | 
 | 	^ | 
 | 	(?P<ClassIndent> [ \t]* ) | 
 | 	class [ \t]+ | 
 | 	(?P<ClassName> [a-zA-Z_] \w* ) | 
 | 	[ \t]* | 
 | 	(?P<ClassSupers> \( [^)\n]* \) )? | 
 | 	[ \t]* : | 
 |     ) | 
 |  | 
 | |   (?P<Import> | 
 | 	^ import [ \t]+ | 
 | 	(?P<ImportList> [^#;\n]+ ) | 
 |     ) | 
 |  | 
 | |   (?P<ImportFrom> | 
 | 	^ from [ \t]+ | 
 | 	(?P<ImportFromPath> | 
 | 	    [a-zA-Z_] \w* | 
 | 	    (?: | 
 | 		[ \t]* \. [ \t]* [a-zA-Z_] \w* | 
 | 	    )* | 
 | 	) | 
 | 	[ \t]+ | 
 | 	import [ \t]+ | 
 | 	(?P<ImportFromList> [^#;\n]+ ) | 
 |     ) | 
 | """, re.VERBOSE | re.DOTALL | re.MULTILINE).search | 
 |  | 
 | _modules = {}                           # cache of modules we've seen | 
 |  | 
 | # each Python class is represented by an instance of this class | 
 | class Class: | 
 | 	'''Class to represent a Python class.''' | 
 | 	def __init__(self, module, name, super, file, lineno): | 
 | 		self.module = module | 
 | 		self.name = name | 
 | 		if super is None: | 
 | 			super = [] | 
 | 		self.super = super | 
 | 		self.methods = {} | 
 | 		self.file = file | 
 | 		self.lineno = lineno | 
 |  | 
 | 	def _addmethod(self, name, lineno): | 
 | 		self.methods[name] = lineno | 
 |  | 
 | class Function(Class): | 
 | 	'''Class to represent a top-level Python function''' | 
 | 	def __init__(self, module, name, file, lineno): | 
 | 		Class.__init__(self, module, name, None, file, lineno) | 
 | 	def _addmethod(self, name, lineno): | 
 | 		assert 0, "Function._addmethod() shouldn't be called" | 
 |  | 
 | def readmodule(module, path=[], inpackage=0): | 
 | 	'''Backwards compatible interface. | 
 |  | 
 | 	Like readmodule_ex() but strips Function objects from the | 
 | 	resulting dictionary.''' | 
 |  | 
 | 	dict = readmodule_ex(module, path, inpackage) | 
 | 	res = {} | 
 | 	for key, value in dict.items(): | 
 | 		if not isinstance(value, Function): | 
 | 			res[key] = value | 
 | 	return res | 
 |  | 
 | def readmodule_ex(module, path=[], inpackage=0): | 
 | 	'''Read a module file and return a dictionary of classes. | 
 |  | 
 | 	Search for MODULE in PATH and sys.path, read and parse the | 
 | 	module and return a dictionary with one entry for each class | 
 | 	found in the module.''' | 
 |  | 
 | 	dict = {} | 
 |  | 
 | 	i = string.rfind(module, '.') | 
 | 	if i >= 0: | 
 | 		# Dotted module name | 
 | 		package = string.strip(module[:i]) | 
 | 		submodule = string.strip(module[i+1:]) | 
 | 		parent = readmodule(package, path, inpackage) | 
 | 		child = readmodule(submodule, parent['__path__'], 1) | 
 | 		return child | 
 |  | 
 | 	if _modules.has_key(module): | 
 | 		# we've seen this module before... | 
 | 		return _modules[module] | 
 | 	if module in sys.builtin_module_names: | 
 | 		# this is a built-in module | 
 | 		_modules[module] = dict | 
 | 		return dict | 
 |  | 
 | 	# search the path for the module | 
 | 	f = None | 
 | 	if inpackage: | 
 | 		try: | 
 | 			f, file, (suff, mode, type) = \ | 
 | 				imp.find_module(module, path) | 
 | 		except ImportError: | 
 | 			f = None | 
 | 	if f is None: | 
 | 		fullpath = list(path) + sys.path | 
 | 		f, file, (suff, mode, type) = imp.find_module(module, fullpath) | 
 | 	if type == imp.PKG_DIRECTORY: | 
 | 		dict['__path__'] = [file] | 
 | 		_modules[module] = dict | 
 | 		path = [file] + path | 
 | 		f, file, (suff, mode, type) = \ | 
 | 				imp.find_module('__init__', [file]) | 
 | 	if type != imp.PY_SOURCE: | 
 | 		# not Python source, can't do anything with this module | 
 | 		f.close() | 
 | 		_modules[module] = dict | 
 | 		return dict | 
 |  | 
 | 	_modules[module] = dict | 
 | 	imports = [] | 
 | 	classstack = []	# stack of (class, indent) pairs | 
 | 	src = f.read() | 
 | 	f.close() | 
 |  | 
 | 	# To avoid having to stop the regexp at each newline, instead | 
 | 	# when we need a line number we simply string.count the number of | 
 | 	# newlines in the string since the last time we did this; i.e., | 
 | 	#    lineno = lineno + \ | 
 | 	#             string.count(src, '\n', last_lineno_pos, here) | 
 | 	#    last_lineno_pos = here | 
 | 	countnl = string.count | 
 | 	lineno, last_lineno_pos = 1, 0 | 
 | 	i = 0 | 
 | 	while 1: | 
 | 		m = _getnext(src, i) | 
 | 		if not m: | 
 | 			break | 
 | 		start, i = m.span() | 
 |  | 
 | 		if m.start("Method") >= 0: | 
 | 			# found a method definition or function | 
 | 			thisindent = _indent(m.group("MethodIndent")) | 
 | 			meth_name = m.group("MethodName") | 
 | 			lineno = lineno + \ | 
 | 				 countnl(src, '\n', | 
 | 					 last_lineno_pos, start) | 
 | 			last_lineno_pos = start | 
 | 			# close all classes indented at least as much | 
 | 			while classstack and \ | 
 | 			      classstack[-1][1] >= thisindent: | 
 | 				del classstack[-1] | 
 | 			if classstack: | 
 | 				# it's a class method | 
 | 				cur_class = classstack[-1][0] | 
 | 				cur_class._addmethod(meth_name, lineno) | 
 | 			else: | 
 | 				# it's a function | 
 | 				f = Function(module, meth_name, | 
 | 					     file, lineno) | 
 | 				dict[meth_name] = f | 
 |  | 
 | 		elif m.start("String") >= 0: | 
 | 			pass | 
 |  | 
 | 		elif m.start("Class") >= 0: | 
 | 			# we found a class definition | 
 | 			thisindent = _indent(m.group("ClassIndent")) | 
 | 			# close all classes indented at least as much | 
 | 			while classstack and \ | 
 | 			      classstack[-1][1] >= thisindent: | 
 | 				del classstack[-1] | 
 | 			lineno = lineno + \ | 
 | 				 countnl(src, '\n', last_lineno_pos, start) | 
 | 			last_lineno_pos = start | 
 | 			class_name = m.group("ClassName") | 
 | 			inherit = m.group("ClassSupers") | 
 | 			if inherit: | 
 | 				# the class inherits from other classes | 
 | 				inherit = string.strip(inherit[1:-1]) | 
 | 				names = [] | 
 | 				for n in string.splitfields(inherit, ','): | 
 | 					n = string.strip(n) | 
 | 					if dict.has_key(n): | 
 | 						# we know this super class | 
 | 						n = dict[n] | 
 | 					else: | 
 | 						c = string.splitfields(n, '.') | 
 | 						if len(c) > 1: | 
 | 							# super class | 
 | 							# is of the | 
 | 							# form module.class: | 
 | 							# look in | 
 | 							# module for class | 
 | 							m = c[-2] | 
 | 							c = c[-1] | 
 | 							if _modules.has_key(m): | 
 | 								d = _modules[m] | 
 | 								if d.has_key(c): | 
 | 									n = d[c] | 
 | 					names.append(n) | 
 | 				inherit = names | 
 | 			# remember this class | 
 | 			cur_class = Class(module, class_name, inherit, | 
 | 					  file, lineno) | 
 | 			dict[class_name] = cur_class | 
 | 			classstack.append((cur_class, thisindent)) | 
 |  | 
 | 		elif m.start("Import") >= 0: | 
 | 			# import module | 
 | 			for n in string.split(m.group("ImportList"), ','): | 
 | 				n = string.strip(n) | 
 | 				try: | 
 | 					# recursively read the imported module | 
 | 					d = readmodule(n, path, inpackage) | 
 | 				except: | 
 | 					##print 'module', n, 'not found' | 
 | 					pass | 
 |  | 
 | 		elif m.start("ImportFrom") >= 0: | 
 | 			# from module import stuff | 
 | 			mod = m.group("ImportFromPath") | 
 | 			names = string.split(m.group("ImportFromList"), ',') | 
 | 			try: | 
 | 				# recursively read the imported module | 
 | 				d = readmodule(mod, path, inpackage) | 
 | 			except: | 
 | 				##print 'module', mod, 'not found' | 
 | 				continue | 
 | 			# add any classes that were defined in the | 
 | 			# imported module to our name space if they | 
 | 			# were mentioned in the list | 
 | 			for n in names: | 
 | 				n = string.strip(n) | 
 | 				if d.has_key(n): | 
 | 					dict[n] = d[n] | 
 | 				elif n == '*': | 
 | 					# only add a name if not | 
 | 					# already there (to mimic what | 
 | 					# Python does internally) | 
 | 					# also don't add names that | 
 | 					# start with _ | 
 | 					for n in d.keys(): | 
 | 						if n[0] != '_' and \ | 
 | 						   not dict.has_key(n): | 
 | 							dict[n] = d[n] | 
 | 		else: | 
 | 			assert 0, "regexp _getnext found something unexpected" | 
 |  | 
 | 	return dict | 
 |  | 
 | def _indent(ws, _expandtabs=string.expandtabs): | 
 | 	return len(_expandtabs(ws, TABWIDTH)) |