blob: fe34208dd65483c6ac5079ed6d39ab08821b0f7e [file] [log] [blame]
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +00001"""Parse a Python file and retrieve classes and methods.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00002
3Parse enough of a Python file to recognize class and method
4definitions and to find out the superclasses of a class.
5
6The interface consists of a single function:
Guido van Rossum040d7ca2002-08-23 01:36:01 +00007 readmodule_ex(module [, path[, inpackage]])
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00008module is the name of a Python module, path is an optional list of
9directories where the module is to be searched. If present, path is
Guido van Rossum040d7ca2002-08-23 01:36:01 +000010prepended to the system search path sys.path. (inpackage is used
11internally to search for a submodule of a package.)
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000012The return value is a dictionary. The keys of the dictionary are
13the names of the classes defined in the module (including classes
14that are defined via the from XXX import YYY construct). The values
15are class instances of the class Class defined here.
16
17A class is described by the class Class in this module. Instances
18of this class have the following instance variables:
Tim Peters2344fae2001-01-15 00:50:52 +000019 name -- the name of the class
20 super -- a list of super classes (Class instances)
21 methods -- a dictionary of methods
22 file -- the file in which the class was defined
23 lineno -- the line in the file on which the class statement occurred
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000024The dictionary of methods uses the method names as keys and the line
25numbers on which the method was defined as values.
26If the name of a super class is not recognized, the corresponding
27entry in the list of super classes is not a class instance but a
28string giving the name of the super class. Since import statements
29are recognized and imported modules are scanned as well, this
30shouldn't happen often.
31
Guido van Rossum040d7ca2002-08-23 01:36:01 +000032XXX describe the Function class.
33
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000034BUGS
Tim Petersc6ac8a72001-10-24 20:22:40 +000035- Nested classes and functions can confuse it.
Guido van Rossumb2693021999-06-10 19:05:54 +000036
37PACKAGE RELATED BUGS
38- If you have a package and a module inside that or another package
39 with the same name, module caching doesn't work properly since the
40 key is the base name of the module/package.
41- The only entry that is returned when you readmodule a package is a
42 __path__ whose value is a list which confuses certain class browsers.
43- When code does:
44 from package import subpackage
45 class MyClass(subpackage.SuperClass):
46 ...
47 It can't locate the parent. It probably needs to have the same
48 hairy logic that the import locator already does. (This logic
49 exists coded in Python in the freeze package.)
Guido van Rossum4b8c6ea2000-02-04 15:39:30 +000050"""
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000051
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000052import sys
53import imp
Guido van Rossum040d7ca2002-08-23 01:36:01 +000054import tokenize # Python tokenizer
55from token import NAME
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000056
Skip Montanaroc62c81e2001-02-12 02:00:42 +000057__all__ = ["readmodule"]
58
Guido van Rossumad380551999-06-07 15:25:18 +000059_modules = {} # cache of modules we've seen
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000060
61# each Python class is represented by an instance of this class
62class Class:
Tim Peters2344fae2001-01-15 00:50:52 +000063 '''Class to represent a Python class.'''
64 def __init__(self, module, name, super, file, lineno):
65 self.module = module
66 self.name = name
67 if super is None:
68 super = []
69 self.super = super
70 self.methods = {}
71 self.file = file
72 self.lineno = lineno
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000073
Tim Peters2344fae2001-01-15 00:50:52 +000074 def _addmethod(self, name, lineno):
75 self.methods[name] = lineno
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000076
Guido van Rossuma3b4a331999-06-10 14:39:39 +000077class Function(Class):
Tim Peters2344fae2001-01-15 00:50:52 +000078 '''Class to represent a top-level Python function'''
79 def __init__(self, module, name, file, lineno):
80 Class.__init__(self, module, name, None, file, lineno)
81 def _addmethod(self, name, lineno):
82 assert 0, "Function._addmethod() shouldn't be called"
Guido van Rossuma3b4a331999-06-10 14:39:39 +000083
Guido van Rossum040d7ca2002-08-23 01:36:01 +000084def readmodule(module, path=[], inpackage=False):
Tim Peters2344fae2001-01-15 00:50:52 +000085 '''Backwards compatible interface.
Guido van Rossuma3b4a331999-06-10 14:39:39 +000086
Tim Peters2344fae2001-01-15 00:50:52 +000087 Like readmodule_ex() but strips Function objects from the
88 resulting dictionary.'''
Guido van Rossuma3b4a331999-06-10 14:39:39 +000089
Tim Peters2344fae2001-01-15 00:50:52 +000090 dict = readmodule_ex(module, path, inpackage)
91 res = {}
92 for key, value in dict.items():
93 if not isinstance(value, Function):
94 res[key] = value
95 return res
Guido van Rossuma3b4a331999-06-10 14:39:39 +000096
Guido van Rossum040d7ca2002-08-23 01:36:01 +000097def readmodule_ex(module, path=[], inpackage=False):
Tim Peters2344fae2001-01-15 00:50:52 +000098 '''Read a module file and return a dictionary of classes.
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000099
Tim Peters2344fae2001-01-15 00:50:52 +0000100 Search for MODULE in PATH and sys.path, read and parse the
101 module and return a dictionary with one entry for each class
102 found in the module.'''
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000103
Tim Peters2344fae2001-01-15 00:50:52 +0000104 dict = {}
Guido van Rossum3d548711999-06-09 15:49:09 +0000105
Eric S. Raymondec3bbde2001-02-09 09:39:08 +0000106 i = module.rfind('.')
Tim Peters2344fae2001-01-15 00:50:52 +0000107 if i >= 0:
108 # Dotted module name
Eric S. Raymondec3bbde2001-02-09 09:39:08 +0000109 package = module[:i].strip()
110 submodule = module[i+1:].strip()
Fred Drake03f7a702001-08-13 20:20:51 +0000111 parent = readmodule_ex(package, path, inpackage)
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000112 child = readmodule_ex(submodule, parent['__path__'], True)
Tim Peters2344fae2001-01-15 00:50:52 +0000113 return child
Guido van Rossum7a840e81998-10-12 15:21:38 +0000114
Raymond Hettinger54f02222002-06-01 14:18:47 +0000115 if module in _modules:
Tim Peters2344fae2001-01-15 00:50:52 +0000116 # we've seen this module before...
117 return _modules[module]
118 if module in sys.builtin_module_names:
119 # this is a built-in module
120 _modules[module] = dict
121 return dict
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000122
Tim Peters2344fae2001-01-15 00:50:52 +0000123 # search the path for the module
124 f = None
125 if inpackage:
126 try:
127 f, file, (suff, mode, type) = \
128 imp.find_module(module, path)
129 except ImportError:
130 f = None
131 if f is None:
132 fullpath = list(path) + sys.path
133 f, file, (suff, mode, type) = imp.find_module(module, fullpath)
134 if type == imp.PKG_DIRECTORY:
135 dict['__path__'] = [file]
136 _modules[module] = dict
137 path = [file] + path
138 f, file, (suff, mode, type) = \
139 imp.find_module('__init__', [file])
140 if type != imp.PY_SOURCE:
141 # not Python source, can't do anything with this module
142 f.close()
143 _modules[module] = dict
144 return dict
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000145
Tim Peters2344fae2001-01-15 00:50:52 +0000146 _modules[module] = dict
Tim Peters2344fae2001-01-15 00:50:52 +0000147 classstack = [] # stack of (class, indent) pairs
Guido van Rossumad380551999-06-07 15:25:18 +0000148
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000149 g = tokenize.generate_tokens(f.readline)
150 try:
151 for tokentype, token, start, end, line in g:
152 if token == 'def':
153 lineno, thisindent = start
154 tokentype, meth_name, start, end, line = g.next()
155 if tokentype != NAME:
156 continue # Syntax error
157 # close all classes indented at least as much
158 while classstack and \
159 classstack[-1][1] >= thisindent:
160 del classstack[-1]
161 if classstack:
162 # it's a class method
163 cur_class = classstack[-1][0]
164 cur_class._addmethod(meth_name, lineno)
165 else:
166 # it's a function
167 dict[meth_name] = Function(module, meth_name, file, lineno)
168 elif token == 'class':
169 lineno, thisindent = start
170 tokentype, class_name, start, end, line = g.next()
171 if tokentype != NAME:
172 continue # Syntax error
173 # close all classes indented at least as much
174 while classstack and \
175 classstack[-1][1] >= thisindent:
176 del classstack[-1]
177 # parse what follows the class name
178 tokentype, token, start, end, line = g.next()
179 inherit = None
180 if token == '(':
181 names = [] # List of superclasses
182 # there's a list of superclasses
183 level = 1
184 super = [] # Tokens making up current superclass
185 while True:
186 tokentype, token, start, end, line = g.next()
187 if token in (')', ',') and level == 1:
188 n = "".join(super)
189 if n in dict:
190 # we know this super class
191 n = dict[n]
192 else:
193 c = n.split('.')
194 if len(c) > 1:
195 # super class is of the form
196 # module.class: look in module for
197 # class
198 m = c[-2]
199 c = c[-1]
200 if m in _modules:
201 d = _modules[m]
202 if c in d:
203 n = d[c]
204 names.append(n)
205 if token == '(':
206 level += 1
207 elif token == ')':
208 level -= 1
209 if level == 0:
210 break
211 elif token == ',' and level == 1:
212 pass
213 else:
214 super.append(token)
215 inherit = names
216 cur_class = Class(module, class_name, inherit, file, lineno)
217 dict[class_name] = cur_class
218 classstack.append((cur_class, thisindent))
219 elif token == 'import' and start[1] == 0:
220 modules = _getnamelist(g)
221 for mod, mod2 in modules:
Guido van Rossum258cba82002-09-16 16:36:02 +0000222 try:
223 # Recursively read the imported module
224 readmodule_ex(mod, path, inpackage)
225 except:
226 # If we can't find or parse the imported module,
227 # too bad -- don't die here.
228 pass
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000229 elif token == 'from' and start[1] == 0:
230 mod, token = _getname(g)
231 if not mod or token != "import":
232 continue
233 names = _getnamelist(g)
Tim Peters2344fae2001-01-15 00:50:52 +0000234 try:
Guido van Rossum258cba82002-09-16 16:36:02 +0000235 # Recursively read the imported module
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000236 d = readmodule_ex(mod, path, inpackage)
Tim Peters2344fae2001-01-15 00:50:52 +0000237 except:
Guido van Rossum258cba82002-09-16 16:36:02 +0000238 # If we can't find or parse the imported module,
239 # too bad -- don't die here.
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000240 continue
241 # add any classes that were defined in the imported module
242 # to our name space if they were mentioned in the list
243 for n, n2 in names:
244 if n in d:
245 dict[n2 or n] = d[n]
246 elif n == '*':
247 # only add a name if not already there (to mimic
248 # what Python does internally) also don't add
249 # names that start with _
250 for n in d:
251 if n[0] != '_' and not n in dict:
252 dict[n] = d[n]
253 except StopIteration:
254 pass
Guido van Rossumad380551999-06-07 15:25:18 +0000255
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000256 f.close()
Tim Peters2344fae2001-01-15 00:50:52 +0000257 return dict
Guido van Rossumdf9f7a31999-06-08 12:53:21 +0000258
Guido van Rossum040d7ca2002-08-23 01:36:01 +0000259def _getnamelist(g):
260 # Helper to get a comma-separated list of dotted names plus 'as'
261 # clauses. Return a list of pairs (name, name2) where name2 is
262 # the 'as' name, or None if there is no 'as' clause.
263 names = []
264 while True:
265 name, token = _getname(g)
266 if not name:
267 break
268 if token == 'as':
269 name2, token = _getname(g)
270 else:
271 name2 = None
272 names.append((name, name2))
273 while token != "," and "\n" not in token:
274 tokentype, token, start, end, line = g.next()
275 if token != ",":
276 break
277 return names
278
279def _getname(g):
280 # Helper to get a dotted name, return a pair (name, token) where
281 # name is the dotted name, or None if there was no dotted name,
282 # and token is the next input token.
283 parts = []
284 tokentype, token, start, end, line = g.next()
285 if tokentype != NAME and token != '*':
286 return (None, token)
287 parts.append(token)
288 while True:
289 tokentype, token, start, end, line = g.next()
290 if token != '.':
291 break
292 tokentype, token, start, end, line = g.next()
293 if tokentype != NAME:
294 break
295 parts.append(token)
296 return (".".join(parts), token)