blob: c19c7a577e844273174c7e3e472204338e69e355 [file] [log] [blame]
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00001'''Parse a Python file and retrieve classes and methods.
2
3Parse enough of a Python file to recognize class and method
4definitions and to find out the superclasses of a class.
5
6The interface consists of a single function:
7 readmodule(module, path)
8module is the name of a Python module, path is an optional list of
9directories where the module is to be searched. If present, path is
10prepended to the system search path sys.path.
11The return value is a dictionary. The keys of the dictionary are
12the names of the classes defined in the module (including classes
13that are defined via the from XXX import YYY construct). The values
14are class instances of the class Class defined here.
15
16A class is described by the class Class in this module. Instances
17of this class have the following instance variables:
18 name -- the name of the class
19 super -- a list of super classes (Class instances)
20 methods -- a dictionary of methods
21 file -- the file in which the class was defined
22 lineno -- the line in the file on which the class statement occurred
23The dictionary of methods uses the method names as keys and the line
24numbers on which the method was defined as values.
25If the name of a super class is not recognized, the corresponding
26entry in the list of super classes is not a class instance but a
27string giving the name of the super class. Since import statements
28are recognized and imported modules are scanned as well, this
29shouldn't happen often.
30
31BUGS
32Continuation lines are not dealt with at all and strings may confuse
33the hell out of the parser, but it usually works.'''
34
35import os
36import sys
37import imp
38import regex
39import string
40
41id = '\\(<id>[A-Za-z_][A-Za-z0-9_]*\\)' # match identifier
42blank_line = regex.compile('^[ \t]*\\($\\|#\\)')
43is_class = regex.symcomp('^class[ \t]+'+id+'[ \t]*\\(<sup>([^)]*)\\)?[ \t]*:')
44is_method = regex.symcomp('^[ \t]+def[ \t]+'+id+'[ \t]*(')
45is_import = regex.symcomp('^import[ \t]*\\(<imp>[^#]+\\)')
46is_from = regex.symcomp('^from[ \t]+'+id+'[ \t]+import[ \t]+\\(<imp>[^#]+\\)')
47dedent = regex.compile('^[^ \t]')
48indent = regex.compile('^[^ \t]*')
49
50_modules = {} # cache of modules we've seen
51
52# each Python class is represented by an instance of this class
53class Class:
54 '''Class to represent a Python class.'''
55 def __init__(self, name, super, file, lineno):
56 self.name = name
57 if super is None:
58 super = []
59 self.super = super
60 self.methods = {}
61 self.file = file
62 self.lineno = lineno
63
64 def _addmethod(self, name, lineno):
65 self.methods[name] = lineno
66
67def readmodule(module, path = []):
68 '''Read a module file and return a dictionary of classes.
69
70 Search for MODULE in PATH and sys.path, read and parse the
71 module and return a dictionary with one entry for each class
72 found in the module.'''
73
74 if _modules.has_key(module):
75 # we've seen this module before...
76 return _modules[module]
77 if module in sys.builtin_module_names:
78 # this is a built-in module
79 dict = {}
80 _modules[module] = dict
81 return dict
82
83 # search the path for the module
84 f = None
85 suffixes = imp.get_suffixes()
86 for dir in path + sys.path:
87 for suff, mode, type in suffixes:
88 file = os.path.join(dir, module + suff)
89 try:
90 f = open(file, mode)
91 except IOError:
92 pass
93 else:
94 # found the module
95 break
96 if f:
97 break
98 if not f:
99 raise IOError, 'module ' + module + ' not found'
100 if type != imp.PY_SOURCE:
101 # not Python source, can't do anything with this module
102 f.close()
103 dict = {}
104 _modules[module] = dict
105 return dict
106
107 cur_class = None
108 dict = {}
109 _modules[module] = dict
110 imports = []
111 lineno = 0
112 while 1:
113 line = f.readline()
114 if not line:
115 break
116 lineno = lineno + 1 # count lines
117 line = line[:-1] # remove line feed
118 if blank_line.match(line) >= 0:
119 # ignore blank (and comment only) lines
120 continue
121## if indent.match(line) >= 0:
122## indentation = len(string.expandtabs(indent.group(0), 8))
123 if is_import.match(line) >= 0:
124 # import module
125 for n in string.splitfields(is_import.group('imp'), ','):
126 n = string.strip(n)
127 try:
128 # recursively read the
129 # imported module
130 d = readmodule(n, path)
131 except:
132 print 'module',n,'not found'
133 pass
134 continue
135 if is_from.match(line) >= 0:
136 # from module import stuff
137 mod = is_from.group('id')
138 names = string.splitfields(is_from.group('imp'), ',')
139 try:
140 # recursively read the imported module
141 d = readmodule(mod, path)
142 except:
143 print 'module',mod,'not found'
144 continue
145 # add any classes that were defined in the
146 # imported module to our name space if they
147 # were mentioned in the list
148 for n in names:
149 n = string.strip(n)
150 if d.has_key(n):
151 dict[n] = d[n]
152 elif n == '*':
153 # only add a name if not
154 # already there (to mimic what
155 # Python does internally)
156 for n in d.keys():
157 if not dict.has_key(n):
158 dict[n] = d[n]
159 continue
160 if is_class.match(line) >= 0:
161 # we found a class definition
162 class_name = is_class.group('id')
163 inherit = is_class.group('sup')
164 if inherit:
165 # the class inherits from other classes
166 inherit = string.strip(inherit[1:-1])
167 names = []
168 for n in string.splitfields(inherit, ','):
169 n = string.strip(n)
170 if dict.has_key(n):
171 # we know this super class
172 n = dict[n]
173 else:
174 c = string.splitfields(n, '.')
175 if len(c) > 1:
176 # super class
177 # is of the
178 # form module.class:
179 # look in
180 # module for class
181 m = c[-2]
182 c = c[-1]
183 if _modules.has_key(m):
184 d = _modules[m]
185 if d.has_key(c):
186 n = d[c]
187 names.append(n)
188 inherit = names
189 # remember this class
190 cur_class = Class(class_name, inherit, file, lineno)
191 dict[class_name] = cur_class
192 continue
193 if is_method.match(line) >= 0:
194 # found a method definition
195 if cur_class:
196 # and we know the class it belongs to
197 meth_name = is_method.group('id')
198 cur_class._addmethod(meth_name, lineno)
199 continue
200 if dedent.match(line) >= 0:
201 # end of class definition
202 cur_class = None
203 f.close()
204 return dict