blob: f6f5bb932e9b3939d65d447abca0d4f958fcfa4c [file] [log] [blame]
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00001'''Parse a Python file and retrieve classes and methods.
2
3Parse enough of a Python file to recognize class and method
4definitions and to find out the superclasses of a class.
5
6The interface consists of a single function:
7 readmodule(module, path)
8module is the name of a Python module, path is an optional list of
9directories where the module is to be searched. If present, path is
10prepended to the system search path sys.path.
11The return value is a dictionary. The keys of the dictionary are
12the names of the classes defined in the module (including classes
13that are defined via the from XXX import YYY construct). The values
14are class instances of the class Class defined here.
15
16A class is described by the class Class in this module. Instances
17of this class have the following instance variables:
18 name -- the name of the class
19 super -- a list of super classes (Class instances)
20 methods -- a dictionary of methods
21 file -- the file in which the class was defined
22 lineno -- the line in the file on which the class statement occurred
23The dictionary of methods uses the method names as keys and the line
24numbers on which the method was defined as values.
25If the name of a super class is not recognized, the corresponding
26entry in the list of super classes is not a class instance but a
27string giving the name of the super class. Since import statements
28are recognized and imported modules are scanned as well, this
29shouldn't happen often.
30
31BUGS
32Continuation lines are not dealt with at all and strings may confuse
33the hell out of the parser, but it usually works.'''
34
35import os
36import sys
37import imp
38import regex
39import string
40
41id = '\\(<id>[A-Za-z_][A-Za-z0-9_]*\\)' # match identifier
42blank_line = regex.compile('^[ \t]*\\($\\|#\\)')
43is_class = regex.symcomp('^class[ \t]+'+id+'[ \t]*\\(<sup>([^)]*)\\)?[ \t]*:')
44is_method = regex.symcomp('^[ \t]+def[ \t]+'+id+'[ \t]*(')
45is_import = regex.symcomp('^import[ \t]*\\(<imp>[^#]+\\)')
46is_from = regex.symcomp('^from[ \t]+'+id+'[ \t]+import[ \t]+\\(<imp>[^#]+\\)')
47dedent = regex.compile('^[^ \t]')
48indent = regex.compile('^[^ \t]*')
49
50_modules = {} # cache of modules we've seen
51
52# each Python class is represented by an instance of this class
53class Class:
54 '''Class to represent a Python class.'''
Sjoerd Mullender825bae71995-11-02 17:21:33 +000055 def __init__(self, module, name, super, file, lineno):
56 self.module = module
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000057 self.name = name
58 if super is None:
59 super = []
60 self.super = super
61 self.methods = {}
62 self.file = file
63 self.lineno = lineno
64
65 def _addmethod(self, name, lineno):
66 self.methods[name] = lineno
67
68def readmodule(module, path = []):
69 '''Read a module file and return a dictionary of classes.
70
71 Search for MODULE in PATH and sys.path, read and parse the
72 module and return a dictionary with one entry for each class
73 found in the module.'''
74
75 if _modules.has_key(module):
76 # we've seen this module before...
77 return _modules[module]
78 if module in sys.builtin_module_names:
79 # this is a built-in module
80 dict = {}
81 _modules[module] = dict
82 return dict
83
84 # search the path for the module
85 f = None
86 suffixes = imp.get_suffixes()
87 for dir in path + sys.path:
88 for suff, mode, type in suffixes:
89 file = os.path.join(dir, module + suff)
90 try:
91 f = open(file, mode)
92 except IOError:
93 pass
94 else:
95 # found the module
96 break
97 if f:
98 break
99 if not f:
100 raise IOError, 'module ' + module + ' not found'
101 if type != imp.PY_SOURCE:
102 # not Python source, can't do anything with this module
103 f.close()
104 dict = {}
105 _modules[module] = dict
106 return dict
107
108 cur_class = None
109 dict = {}
110 _modules[module] = dict
111 imports = []
112 lineno = 0
113 while 1:
114 line = f.readline()
115 if not line:
116 break
117 lineno = lineno + 1 # count lines
118 line = line[:-1] # remove line feed
119 if blank_line.match(line) >= 0:
120 # ignore blank (and comment only) lines
121 continue
122## if indent.match(line) >= 0:
123## indentation = len(string.expandtabs(indent.group(0), 8))
124 if is_import.match(line) >= 0:
125 # import module
126 for n in string.splitfields(is_import.group('imp'), ','):
127 n = string.strip(n)
128 try:
129 # recursively read the
130 # imported module
131 d = readmodule(n, path)
132 except:
133 print 'module',n,'not found'
134 pass
135 continue
136 if is_from.match(line) >= 0:
137 # from module import stuff
138 mod = is_from.group('id')
139 names = string.splitfields(is_from.group('imp'), ',')
140 try:
141 # recursively read the imported module
142 d = readmodule(mod, path)
143 except:
144 print 'module',mod,'not found'
145 continue
146 # add any classes that were defined in the
147 # imported module to our name space if they
148 # were mentioned in the list
149 for n in names:
150 n = string.strip(n)
151 if d.has_key(n):
152 dict[n] = d[n]
153 elif n == '*':
154 # only add a name if not
155 # already there (to mimic what
156 # Python does internally)
157 for n in d.keys():
158 if not dict.has_key(n):
159 dict[n] = d[n]
160 continue
161 if is_class.match(line) >= 0:
162 # we found a class definition
163 class_name = is_class.group('id')
164 inherit = is_class.group('sup')
165 if inherit:
166 # the class inherits from other classes
167 inherit = string.strip(inherit[1:-1])
168 names = []
169 for n in string.splitfields(inherit, ','):
170 n = string.strip(n)
171 if dict.has_key(n):
172 # we know this super class
173 n = dict[n]
174 else:
175 c = string.splitfields(n, '.')
176 if len(c) > 1:
177 # super class
178 # is of the
179 # form module.class:
180 # look in
181 # module for class
182 m = c[-2]
183 c = c[-1]
184 if _modules.has_key(m):
185 d = _modules[m]
186 if d.has_key(c):
187 n = d[c]
188 names.append(n)
189 inherit = names
190 # remember this class
Sjoerd Mullender825bae71995-11-02 17:21:33 +0000191 cur_class = Class(module, class_name, inherit, file, lineno)
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000192 dict[class_name] = cur_class
193 continue
194 if is_method.match(line) >= 0:
195 # found a method definition
196 if cur_class:
197 # and we know the class it belongs to
198 meth_name = is_method.group('id')
199 cur_class._addmethod(meth_name, lineno)
200 continue
201 if dedent.match(line) >= 0:
202 # end of class definition
203 cur_class = None
204 f.close()
205 return dict