blob: cf93f3ea0db3445197d903f74429f425990f2418 [file] [log] [blame]
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +00001'''Parse a Python file and retrieve classes and methods.
2
3Parse enough of a Python file to recognize class and method
4definitions and to find out the superclasses of a class.
5
6The interface consists of a single function:
7 readmodule(module, path)
8module is the name of a Python module, path is an optional list of
9directories where the module is to be searched. If present, path is
10prepended to the system search path sys.path.
11The return value is a dictionary. The keys of the dictionary are
12the names of the classes defined in the module (including classes
13that are defined via the from XXX import YYY construct). The values
14are class instances of the class Class defined here.
15
16A class is described by the class Class in this module. Instances
17of this class have the following instance variables:
18 name -- the name of the class
19 super -- a list of super classes (Class instances)
20 methods -- a dictionary of methods
21 file -- the file in which the class was defined
22 lineno -- the line in the file on which the class statement occurred
23The dictionary of methods uses the method names as keys and the line
24numbers on which the method was defined as values.
25If the name of a super class is not recognized, the corresponding
26entry in the list of super classes is not a class instance but a
27string giving the name of the super class. Since import statements
28are recognized and imported modules are scanned as well, this
29shouldn't happen often.
30
31BUGS
32Continuation lines are not dealt with at all and strings may confuse
33the hell out of the parser, but it usually works.'''
34
35import os
36import sys
37import imp
Guido van Rossum31626bc1997-10-24 14:46:16 +000038import re
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000039import string
40
Guido van Rossum31626bc1997-10-24 14:46:16 +000041id = '(?P<id>[A-Za-z_][A-Za-z0-9_]*)' # match identifier
42blank_line = re.compile('^[ \t]*($|#)')
43is_class = re.compile('^class[ \t]+'+id+'[ \t]*(?P<sup>\([^)]*\))?[ \t]*:')
44is_method = re.compile('^[ \t]+def[ \t]+'+id+'[ \t]*\(')
45is_import = re.compile('^import[ \t]*(?P<imp>[^#]+)')
46is_from = re.compile('^from[ \t]+'+id+'[ \t]+import[ \t]+(?P<imp>[^#]+)')
47dedent = re.compile('^[^ \t]')
48indent = re.compile('^[^ \t]*')
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000049
50_modules = {} # cache of modules we've seen
51
52# each Python class is represented by an instance of this class
53class Class:
54 '''Class to represent a Python class.'''
Sjoerd Mullender825bae71995-11-02 17:21:33 +000055 def __init__(self, module, name, super, file, lineno):
56 self.module = module
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +000057 self.name = name
58 if super is None:
59 super = []
60 self.super = super
61 self.methods = {}
62 self.file = file
63 self.lineno = lineno
64
65 def _addmethod(self, name, lineno):
66 self.methods[name] = lineno
67
68def readmodule(module, path = []):
69 '''Read a module file and return a dictionary of classes.
70
71 Search for MODULE in PATH and sys.path, read and parse the
72 module and return a dictionary with one entry for each class
73 found in the module.'''
74
75 if _modules.has_key(module):
76 # we've seen this module before...
77 return _modules[module]
78 if module in sys.builtin_module_names:
79 # this is a built-in module
80 dict = {}
81 _modules[module] = dict
82 return dict
83
84 # search the path for the module
85 f = None
86 suffixes = imp.get_suffixes()
87 for dir in path + sys.path:
88 for suff, mode, type in suffixes:
89 file = os.path.join(dir, module + suff)
90 try:
91 f = open(file, mode)
92 except IOError:
93 pass
94 else:
95 # found the module
96 break
97 if f:
98 break
99 if not f:
100 raise IOError, 'module ' + module + ' not found'
101 if type != imp.PY_SOURCE:
102 # not Python source, can't do anything with this module
103 f.close()
104 dict = {}
105 _modules[module] = dict
106 return dict
107
108 cur_class = None
109 dict = {}
110 _modules[module] = dict
111 imports = []
112 lineno = 0
113 while 1:
114 line = f.readline()
115 if not line:
116 break
117 lineno = lineno + 1 # count lines
118 line = line[:-1] # remove line feed
Guido van Rossum31626bc1997-10-24 14:46:16 +0000119 if blank_line.match(line):
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000120 # ignore blank (and comment only) lines
121 continue
Guido van Rossum31626bc1997-10-24 14:46:16 +0000122## res = indent.match(line)
123## if res:
124## indentation = len(string.expandtabs(res.group(0), 8))
125 res = is_import.match(line)
126 if res:
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000127 # import module
Guido van Rossum31626bc1997-10-24 14:46:16 +0000128 for n in string.splitfields(res.group('imp'), ','):
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000129 n = string.strip(n)
130 try:
131 # recursively read the
132 # imported module
133 d = readmodule(n, path)
134 except:
135 print 'module',n,'not found'
136 pass
137 continue
Guido van Rossum31626bc1997-10-24 14:46:16 +0000138 res = is_from.match(line)
139 if res:
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000140 # from module import stuff
Guido van Rossum31626bc1997-10-24 14:46:16 +0000141 mod = res.group('id')
142 names = string.splitfields(res.group('imp'), ',')
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000143 try:
144 # recursively read the imported module
145 d = readmodule(mod, path)
146 except:
147 print 'module',mod,'not found'
148 continue
149 # add any classes that were defined in the
150 # imported module to our name space if they
151 # were mentioned in the list
152 for n in names:
153 n = string.strip(n)
154 if d.has_key(n):
155 dict[n] = d[n]
156 elif n == '*':
157 # only add a name if not
158 # already there (to mimic what
159 # Python does internally)
Guido van Rossumb5fa1cb1996-10-10 16:00:28 +0000160 # also don't add names that
161 # start with _
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000162 for n in d.keys():
Guido van Rossumb5fa1cb1996-10-10 16:00:28 +0000163 if n[0] != '_' and \
164 not dict.has_key(n):
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000165 dict[n] = d[n]
166 continue
Guido van Rossum31626bc1997-10-24 14:46:16 +0000167 res = is_class.match(line)
168 if res:
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000169 # we found a class definition
Guido van Rossum31626bc1997-10-24 14:46:16 +0000170 class_name = res.group('id')
171 inherit = res.group('sup')
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000172 if inherit:
173 # the class inherits from other classes
174 inherit = string.strip(inherit[1:-1])
175 names = []
176 for n in string.splitfields(inherit, ','):
177 n = string.strip(n)
178 if dict.has_key(n):
179 # we know this super class
180 n = dict[n]
181 else:
182 c = string.splitfields(n, '.')
183 if len(c) > 1:
184 # super class
185 # is of the
186 # form module.class:
187 # look in
188 # module for class
189 m = c[-2]
190 c = c[-1]
191 if _modules.has_key(m):
192 d = _modules[m]
193 if d.has_key(c):
194 n = d[c]
195 names.append(n)
196 inherit = names
197 # remember this class
Sjoerd Mullender825bae71995-11-02 17:21:33 +0000198 cur_class = Class(module, class_name, inherit, file, lineno)
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000199 dict[class_name] = cur_class
200 continue
Guido van Rossum31626bc1997-10-24 14:46:16 +0000201 res = is_method.match(line)
202 if res:
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000203 # found a method definition
204 if cur_class:
205 # and we know the class it belongs to
Guido van Rossum31626bc1997-10-24 14:46:16 +0000206 meth_name = res.group('id')
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000207 cur_class._addmethod(meth_name, lineno)
208 continue
Guido van Rossum31626bc1997-10-24 14:46:16 +0000209 if dedent.match(line):
Sjoerd Mullender8cb4b1f1995-07-28 09:30:01 +0000210 # end of class definition
211 cur_class = None
212 f.close()
213 return dict
Guido van Rossum31626bc1997-10-24 14:46:16 +0000214