blob: cb4125f86078d896a32c8c0af3ba3fb42aa242e5 [file] [log] [blame]
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00001'''Parse a Python file and retrieve classes and methods.
2
3Parse enough of a Python file to recognize class and method
4definitions and to find out the superclasses of a class.
5
6The interface consists of a single function:
7 readmodule(module, path)
8module is the name of a Python module, path is an optional list of
9directories where the module is to be searched. If present, path is
10prepended to the system search path sys.path.
11The return value is a dictionary. The keys of the dictionary are
12the names of the classes defined in the module (including classes
13that are defined via the from XXX import YYY construct). The values
14are class instances of the class Class defined here.
15
16A class is described by the class Class in this module. Instances
17of this class have the following instance variables:
18 name -- the name of the class
19 super -- a list of super classes (Class instances)
20 methods -- a dictionary of methods
21 file -- the file in which the class was defined
22 lineno -- the line in the file on which the class statement occurred
23The dictionary of methods uses the method names as keys and the line
24numbers on which the method was defined as values.
25If the name of a super class is not recognized, the corresponding
26entry in the list of super classes is not a class instance but a
27string giving the name of the super class. Since import statements
28are recognized and imported modules are scanned as well, this
29shouldn't happen often.
30
31BUGS
32Continuation lines are not dealt with at all and strings may confuse
33the hell out of the parser, but it usually works.'''
34
35import os
36import sys
37import imp
Guido van Rossum31626bc1997-10-24 14:46:16 +000038import re
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000039import string
40
Guido van Rossum31626bc1997-10-24 14:46:16 +000041id = '(?P<id>[A-Za-z_][A-Za-z0-9_]*)' # match identifier
42blank_line = re.compile('^[ \t]*($|#)')
43is_class = re.compile('^class[ \t]+'+id+'[ \t]*(?P<sup>\([^)]*\))?[ \t]*:')
44is_method = re.compile('^[ \t]+def[ \t]+'+id+'[ \t]*\(')
45is_import = re.compile('^import[ \t]*(?P<imp>[^#]+)')
46is_from = re.compile('^from[ \t]+'+id+'[ \t]+import[ \t]+(?P<imp>[^#]+)')
47dedent = re.compile('^[^ \t]')
48indent = re.compile('^[^ \t]*')
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000049
50_modules = {} # cache of modules we've seen
51
52# each Python class is represented by an instance of this class
53class Class:
54 '''Class to represent a Python class.'''
Sjoerd Mullender825bae71995-11-02 17:21:33 +000055 def __init__(self, module, name, super, file, lineno):
56 self.module = module
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000057 self.name = name
58 if super is None:
59 super = []
60 self.super = super
61 self.methods = {}
62 self.file = file
63 self.lineno = lineno
64
65 def _addmethod(self, name, lineno):
66 self.methods[name] = lineno
67
Guido van Rossum7a840e81998-10-12 15:21:38 +000068def readmodule(module, path=[], inpackage=0):
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000069 '''Read a module file and return a dictionary of classes.
70
71 Search for MODULE in PATH and sys.path, read and parse the
72 module and return a dictionary with one entry for each class
73 found in the module.'''
74
Guido van Rossum7a840e81998-10-12 15:21:38 +000075 i = string.rfind(module, '.')
76 if i >= 0:
77 # Dotted module name
78 package = module[:i]
79 submodule = module[i+1:]
80 parent = readmodule(package, path, inpackage)
81 child = readmodule(submodule, parent['__path__'], 1)
82 return child
83
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000084 if _modules.has_key(module):
85 # we've seen this module before...
86 return _modules[module]
87 if module in sys.builtin_module_names:
88 # this is a built-in module
89 dict = {}
90 _modules[module] = dict
91 return dict
92
93 # search the path for the module
94 f = None
Guido van Rossum7a840e81998-10-12 15:21:38 +000095 if inpackage:
96 try:
97 f, file, (suff, mode, type) = \
98 imp.find_module(module, path)
99 except ImportError:
100 f = None
101 if f is None:
102 fullpath = path + sys.path
103 f, file, (suff, mode, type) = imp.find_module(module, fullpath)
104 if type == imp.PKG_DIRECTORY:
105 dict = {'__path__': [file]}
106 _modules[module] = dict
107 # XXX Should we recursively look for submodules?
108 return dict
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000109 if type != imp.PY_SOURCE:
110 # not Python source, can't do anything with this module
111 f.close()
112 dict = {}
113 _modules[module] = dict
114 return dict
115
116 cur_class = None
117 dict = {}
118 _modules[module] = dict
119 imports = []
120 lineno = 0
121 while 1:
122 line = f.readline()
123 if not line:
124 break
125 lineno = lineno + 1 # count lines
126 line = line[:-1] # remove line feed
Guido van Rossum31626bc1997-10-24 14:46:16 +0000127 if blank_line.match(line):
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000128 # ignore blank (and comment only) lines
129 continue
Guido van Rossum31626bc1997-10-24 14:46:16 +0000130## res = indent.match(line)
131## if res:
132## indentation = len(string.expandtabs(res.group(0), 8))
133 res = is_import.match(line)
134 if res:
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000135 # import module
Guido van Rossum31626bc1997-10-24 14:46:16 +0000136 for n in string.splitfields(res.group('imp'), ','):
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000137 n = string.strip(n)
138 try:
139 # recursively read the
140 # imported module
Guido van Rossum7a840e81998-10-12 15:21:38 +0000141 d = readmodule(n, path, inpackage)
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000142 except:
143 print 'module',n,'not found'
144 pass
145 continue
Guido van Rossum31626bc1997-10-24 14:46:16 +0000146 res = is_from.match(line)
147 if res:
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000148 # from module import stuff
Guido van Rossum31626bc1997-10-24 14:46:16 +0000149 mod = res.group('id')
150 names = string.splitfields(res.group('imp'), ',')
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000151 try:
152 # recursively read the imported module
Guido van Rossum7a840e81998-10-12 15:21:38 +0000153 d = readmodule(mod, path, inpackage)
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000154 except:
155 print 'module',mod,'not found'
156 continue
157 # add any classes that were defined in the
158 # imported module to our name space if they
159 # were mentioned in the list
160 for n in names:
161 n = string.strip(n)
162 if d.has_key(n):
163 dict[n] = d[n]
164 elif n == '*':
165 # only add a name if not
166 # already there (to mimic what
167 # Python does internally)
Guido van Rossumb5fa1cb1996-10-10 16:00:28 +0000168 # also don't add names that
169 # start with _
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000170 for n in d.keys():
Guido van Rossumb5fa1cb1996-10-10 16:00:28 +0000171 if n[0] != '_' and \
172 not dict.has_key(n):
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000173 dict[n] = d[n]
174 continue
Guido van Rossum31626bc1997-10-24 14:46:16 +0000175 res = is_class.match(line)
176 if res:
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000177 # we found a class definition
Guido van Rossum31626bc1997-10-24 14:46:16 +0000178 class_name = res.group('id')
179 inherit = res.group('sup')
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000180 if inherit:
181 # the class inherits from other classes
182 inherit = string.strip(inherit[1:-1])
183 names = []
184 for n in string.splitfields(inherit, ','):
185 n = string.strip(n)
186 if dict.has_key(n):
187 # we know this super class
188 n = dict[n]
189 else:
190 c = string.splitfields(n, '.')
191 if len(c) > 1:
192 # super class
193 # is of the
194 # form module.class:
195 # look in
196 # module for class
197 m = c[-2]
198 c = c[-1]
199 if _modules.has_key(m):
200 d = _modules[m]
201 if d.has_key(c):
202 n = d[c]
203 names.append(n)
204 inherit = names
205 # remember this class
Sjoerd Mullender825bae71995-11-02 17:21:33 +0000206 cur_class = Class(module, class_name, inherit, file, lineno)
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000207 dict[class_name] = cur_class
208 continue
Guido van Rossum31626bc1997-10-24 14:46:16 +0000209 res = is_method.match(line)
210 if res:
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000211 # found a method definition
212 if cur_class:
213 # and we know the class it belongs to
Guido van Rossum31626bc1997-10-24 14:46:16 +0000214 meth_name = res.group('id')
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000215 cur_class._addmethod(meth_name, lineno)
216 continue
Guido van Rossum31626bc1997-10-24 14:46:16 +0000217 if dedent.match(line):
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000218 # end of class definition
219 cur_class = None
220 f.close()
221 return dict
Guido van Rossum31626bc1997-10-24 14:46:16 +0000222