Initial revision

commit: 7aced17437a6b05bc4b0b5ff93aa6a5d3a374d68 [log] [tgz]
author: David Scherer <dscherer@cmu.edu> Tue Aug 15 01:13:23 2000 +0000
committer: David Scherer <dscherer@cmu.edu> Tue Aug 15 01:13:23 2000 +0000
tree: ce0576a16111fd86ac5f56ff4ec1500f29c4f8db
parent: 33a6da9971a923ceaaee1406d0feaa64b8d1759a [diff] [blame]
diff --git a/Lib/idlelib/pyclbr.py b/Lib/idlelib/pyclbr.py
new file mode 100644
index 0000000..74b7ff7
--- /dev/null
+++ b/Lib/idlelib/pyclbr.py

@@ -0,0 +1,336 @@
+"""Parse a Python file and retrieve classes and methods.
+
+Parse enough of a Python file to recognize class and method
+definitions and to find out the superclasses of a class.
+
+The interface consists of a single function:
+	readmodule(module, path)
+module is the name of a Python module, path is an optional list of
+directories where the module is to be searched.  If present, path is
+prepended to the system search path sys.path.
+The return value is a dictionary.  The keys of the dictionary are
+the names of the classes defined in the module (including classes
+that are defined via the from XXX import YYY construct).  The values
+are class instances of the class Class defined here.
+
+A class is described by the class Class in this module.  Instances
+of this class have the following instance variables:
+	name -- the name of the class
+	super -- a list of super classes (Class instances)
+	methods -- a dictionary of methods
+	file -- the file in which the class was defined
+	lineno -- the line in the file on which the class statement occurred
+The dictionary of methods uses the method names as keys and the line
+numbers on which the method was defined as values.
+If the name of a super class is not recognized, the corresponding
+entry in the list of super classes is not a class instance but a
+string giving the name of the super class.  Since import statements
+are recognized and imported modules are scanned as well, this
+shouldn't happen often.
+
+BUGS
+- Continuation lines are not dealt with at all.
+- While triple-quoted strings won't confuse it, lines that look like
+  def, class, import or "from ... import" stmts inside backslash-continued
+  single-quoted strings are treated like code.  The expense of stopping
+  that isn't worth it.
+- Code that doesn't pass tabnanny or python -t will confuse it, unless
+  you set the module TABWIDTH vrbl (default 8) to the correct tab width
+  for the file.
+
+PACKAGE RELATED BUGS
+- If you have a package and a module inside that or another package
+  with the same name, module caching doesn't work properly since the
+  key is the base name of the module/package.
+- The only entry that is returned when you readmodule a package is a
+  __path__ whose value is a list which confuses certain class browsers.
+- When code does:
+  from package import subpackage
+  class MyClass(subpackage.SuperClass):
+    ...
+  It can't locate the parent.  It probably needs to have the same
+  hairy logic that the import locator already does.  (This logic
+  exists coded in Python in the freeze package.)
+"""
+
+import os
+import sys
+import imp
+import re
+import string
+
+TABWIDTH = 8
+
+_getnext = re.compile(r"""
+    (?P<String>
+       \""" [^"\\]* (?:
+			(?: \\. | "(?!"") )
+			[^"\\]*
+		    )*
+       \"""
+
+    |   ''' [^'\\]* (?:
+			(?: \\. | '(?!'') )
+			[^'\\]*
+		    )*
+	'''
+    )
+
+|   (?P<Method>
+	^
+	(?P<MethodIndent> [ \t]* )
+	def [ \t]+
+	(?P<MethodName> [a-zA-Z_] \w* )
+	[ \t]* \(
+    )
+
+|   (?P<Class>
+	^
+	(?P<ClassIndent> [ \t]* )
+	class [ \t]+
+	(?P<ClassName> [a-zA-Z_] \w* )
+	[ \t]*
+	(?P<ClassSupers> \( [^)\n]* \) )?
+	[ \t]* :
+    )
+
+|   (?P<Import>
+	^ import [ \t]+
+	(?P<ImportList> [^#;\n]+ )
+    )
+
+|   (?P<ImportFrom>
+	^ from [ \t]+
+	(?P<ImportFromPath>
+	    [a-zA-Z_] \w*
+	    (?:
+		[ \t]* \. [ \t]* [a-zA-Z_] \w*
+	    )*
+	)
+	[ \t]+
+	import [ \t]+
+	(?P<ImportFromList> [^#;\n]+ )
+    )
+""", re.VERBOSE | re.DOTALL | re.MULTILINE).search
+
+_modules = {}                           # cache of modules we've seen
+
+# each Python class is represented by an instance of this class
+class Class:
+	'''Class to represent a Python class.'''
+	def __init__(self, module, name, super, file, lineno):
+		self.module = module
+		self.name = name
+		if super is None:
+			super = []
+		self.super = super
+		self.methods = {}
+		self.file = file
+		self.lineno = lineno
+
+	def _addmethod(self, name, lineno):
+		self.methods[name] = lineno
+
+class Function(Class):
+	'''Class to represent a top-level Python function'''
+	def __init__(self, module, name, file, lineno):
+		Class.__init__(self, module, name, None, file, lineno)
+	def _addmethod(self, name, lineno):
+		assert 0, "Function._addmethod() shouldn't be called"
+
+def readmodule(module, path=[], inpackage=0):
+	'''Backwards compatible interface.
+
+	Like readmodule_ex() but strips Function objects from the
+	resulting dictionary.'''
+
+	dict = readmodule_ex(module, path, inpackage)
+	res = {}
+	for key, value in dict.items():
+		if not isinstance(value, Function):
+			res[key] = value
+	return res
+
+def readmodule_ex(module, path=[], inpackage=0):
+	'''Read a module file and return a dictionary of classes.
+
+	Search for MODULE in PATH and sys.path, read and parse the
+	module and return a dictionary with one entry for each class
+	found in the module.'''
+
+	dict = {}
+
+	i = string.rfind(module, '.')
+	if i >= 0:
+		# Dotted module name
+		package = string.strip(module[:i])
+		submodule = string.strip(module[i+1:])
+		parent = readmodule(package, path, inpackage)
+		child = readmodule(submodule, parent['__path__'], 1)
+		return child
+
+	if _modules.has_key(module):
+		# we've seen this module before...
+		return _modules[module]
+	if module in sys.builtin_module_names:
+		# this is a built-in module
+		_modules[module] = dict
+		return dict
+
+	# search the path for the module
+	f = None
+	if inpackage:
+		try:
+			f, file, (suff, mode, type) = \
+				imp.find_module(module, path)
+		except ImportError:
+			f = None
+	if f is None:
+		fullpath = list(path) + sys.path
+		f, file, (suff, mode, type) = imp.find_module(module, fullpath)
+	if type == imp.PKG_DIRECTORY:
+		dict['__path__'] = [file]
+		_modules[module] = dict
+		path = [file] + path
+		f, file, (suff, mode, type) = \
+				imp.find_module('__init__', [file])
+	if type != imp.PY_SOURCE:
+		# not Python source, can't do anything with this module
+		f.close()
+		_modules[module] = dict
+		return dict
+
+	_modules[module] = dict
+	imports = []
+	classstack = []	# stack of (class, indent) pairs
+	src = f.read()
+	f.close()
+
+	# To avoid having to stop the regexp at each newline, instead
+	# when we need a line number we simply string.count the number of
+	# newlines in the string since the last time we did this; i.e.,
+	#    lineno = lineno + \
+	#             string.count(src, '\n', last_lineno_pos, here)
+	#    last_lineno_pos = here
+	countnl = string.count
+	lineno, last_lineno_pos = 1, 0
+	i = 0
+	while 1:
+		m = _getnext(src, i)
+		if not m:
+			break
+		start, i = m.span()
+
+		if m.start("Method") >= 0:
+			# found a method definition or function
+			thisindent = _indent(m.group("MethodIndent"))
+			meth_name = m.group("MethodName")
+			lineno = lineno + \
+				 countnl(src, '\n',
+					 last_lineno_pos, start)
+			last_lineno_pos = start
+			# close all classes indented at least as much
+			while classstack and \
+			      classstack[-1][1] >= thisindent:
+				del classstack[-1]
+			if classstack:
+				# it's a class method
+				cur_class = classstack[-1][0]
+				cur_class._addmethod(meth_name, lineno)
+			else:
+				# it's a function
+				f = Function(module, meth_name,
+					     file, lineno)
+				dict[meth_name] = f
+
+		elif m.start("String") >= 0:
+			pass
+
+		elif m.start("Class") >= 0:
+			# we found a class definition
+			thisindent = _indent(m.group("ClassIndent"))
+			# close all classes indented at least as much
+			while classstack and \
+			      classstack[-1][1] >= thisindent:
+				del classstack[-1]
+			lineno = lineno + \
+				 countnl(src, '\n', last_lineno_pos, start)
+			last_lineno_pos = start
+			class_name = m.group("ClassName")
+			inherit = m.group("ClassSupers")
+			if inherit:
+				# the class inherits from other classes
+				inherit = string.strip(inherit[1:-1])
+				names = []
+				for n in string.splitfields(inherit, ','):
+					n = string.strip(n)
+					if dict.has_key(n):
+						# we know this super class
+						n = dict[n]
+					else:
+						c = string.splitfields(n, '.')
+						if len(c) > 1:
+							# super class
+							# is of the
+							# form module.class:
+							# look in
+							# module for class
+							m = c[-2]
+							c = c[-1]
+							if _modules.has_key(m):
+								d = _modules[m]
+								if d.has_key(c):
+									n = d[c]
+					names.append(n)
+				inherit = names
+			# remember this class
+			cur_class = Class(module, class_name, inherit,
+					  file, lineno)
+			dict[class_name] = cur_class
+			classstack.append((cur_class, thisindent))
+
+		elif m.start("Import") >= 0:
+			# import module
+			for n in string.split(m.group("ImportList"), ','):
+				n = string.strip(n)
+				try:
+					# recursively read the imported module
+					d = readmodule(n, path, inpackage)
+				except:
+					##print 'module', n, 'not found'
+					pass
+
+		elif m.start("ImportFrom") >= 0:
+			# from module import stuff
+			mod = m.group("ImportFromPath")
+			names = string.split(m.group("ImportFromList"), ',')
+			try:
+				# recursively read the imported module
+				d = readmodule(mod, path, inpackage)
+			except:
+				##print 'module', mod, 'not found'
+				continue
+			# add any classes that were defined in the
+			# imported module to our name space if they
+			# were mentioned in the list
+			for n in names:
+				n = string.strip(n)
+				if d.has_key(n):
+					dict[n] = d[n]
+				elif n == '*':
+					# only add a name if not
+					# already there (to mimic what
+					# Python does internally)
+					# also don't add names that
+					# start with _
+					for n in d.keys():
+						if n[0] != '_' and \
+						   not dict.has_key(n):
+							dict[n] = d[n]
+		else:
+			assert 0, "regexp _getnext found something unexpected"
+
+	return dict
+
+def _indent(ws, _expandtabs=string.expandtabs):
+	return len(_expandtabs(ws, TABWIDTH))
commit	7aced17437a6b05bc4b0b5ff93aa6a5d3a374d68	[log] [tgz]
author	David Scherer <dscherer@cmu.edu>	Tue Aug 15 01:13:23 2000 +0000
committer	David Scherer <dscherer@cmu.edu>	Tue Aug 15 01:13:23 2000 +0000
tree	ce0576a16111fd86ac5f56ff4ec1500f29c4f8db
parent	33a6da9971a923ceaaee1406d0feaa64b8d1759a [diff] [blame]