Guido van Rossum | 00ff433 | 1994-10-03 16:33:08 +0000 | [diff] [blame] | 1 | # Determine the names and filenames of the modules imported by a |
| 2 | # script, recursively. This is done by scanning for lines containing |
| 3 | # import statements. (The scanning has only superficial knowledge of |
| 4 | # Python syntax and no knowledge of semantics, so in theory the result |
| 5 | # may be incorrect -- however this is quite unlikely if you don't |
| 6 | # intentionally obscure your Python code.) |
| 7 | |
| 8 | import os |
| 9 | import regex |
| 10 | import string |
| 11 | import sys |
| 12 | |
| 13 | |
| 14 | # Top-level interface. |
| 15 | # First argument is the main program (script). |
| 16 | # Second optional argument is list of modules to be searched as well. |
| 17 | |
| 18 | def findmodules(scriptfile, modules = [], path = sys.path): |
| 19 | todo = {} |
| 20 | todo['__main__'] = scriptfile |
| 21 | for name in modules: |
| 22 | mod = os.path.basename(name) |
| 23 | if mod[-3:] == '.py': mod = mod[:-3] |
Guido van Rossum | d8336c2 | 1994-10-05 16:13:01 +0000 | [diff] [blame^] | 24 | elif mod[-4:] == '.pyc': mod = mod[:-4] |
Guido van Rossum | 00ff433 | 1994-10-03 16:33:08 +0000 | [diff] [blame] | 25 | todo[mod] = name |
| 26 | done = closure(todo) |
| 27 | return done |
| 28 | |
| 29 | |
| 30 | # Compute the closure of scanfile() and findmodule(). |
| 31 | # Return a dictionary mapping module names to filenames. |
| 32 | # Writes to stderr if a file can't be or read. |
| 33 | |
| 34 | def closure(todo): |
| 35 | done = {} |
| 36 | while todo: |
| 37 | newtodo = {} |
| 38 | for modname in todo.keys(): |
| 39 | if not done.has_key(modname): |
| 40 | filename = todo[modname] |
| 41 | if filename is None: |
| 42 | filename = findmodule(modname) |
| 43 | done[modname] = filename |
| 44 | if filename in ('<builtin>', '<unknown>'): |
| 45 | continue |
| 46 | try: |
| 47 | modules = scanfile(filename) |
| 48 | except IOError, msg: |
| 49 | sys.stderr.write("%s: %s\n" % |
| 50 | (filename, str(msg))) |
| 51 | continue |
| 52 | for m in modules: |
| 53 | if not done.has_key(m): |
| 54 | newtodo[m] = None |
| 55 | todo = newtodo |
| 56 | return done |
| 57 | |
| 58 | |
| 59 | # Scan a file looking for import statements. |
| 60 | # Return list of module names. |
| 61 | # Can raise IOError. |
| 62 | |
| 63 | importstr = '\(^\|:\)[ \t]*import[ \t]+\([a-zA-Z0-9_, \t]+\)' |
| 64 | fromstr = '\(^\|:\)[ \t]*from[ \t]+\([a-zA-Z0-9_]+\)[ \t]+import[ \t]+' |
| 65 | isimport = regex.compile(importstr) |
| 66 | isfrom = regex.compile(fromstr) |
| 67 | |
| 68 | def scanfile(filename): |
| 69 | allmodules = {} |
| 70 | f = open(filename, 'r') |
| 71 | try: |
| 72 | while 1: |
| 73 | line = f.readline() |
| 74 | if not line: break # EOF |
| 75 | while line[-2:] == '\\\n': # Continuation line |
| 76 | line = line[:-2] + ' ' |
| 77 | line = line + f.readline() |
| 78 | if isimport.search(line) >= 0: |
| 79 | rawmodules = isimport.group(2) |
| 80 | modules = string.splitfields(rawmodules, ',') |
| 81 | for i in range(len(modules)): |
| 82 | modules[i] = string.strip(modules[i]) |
| 83 | elif isfrom.search(line) >= 0: |
| 84 | modules = [isfrom.group(2)] |
| 85 | else: |
| 86 | continue |
| 87 | for mod in modules: |
| 88 | allmodules[mod] = None |
| 89 | finally: |
| 90 | f.close() |
| 91 | return allmodules.keys() |
| 92 | |
| 93 | |
| 94 | # Find the file containing a module, given its name. |
| 95 | # Return filename, or '<builtin>', or '<unknown>'. |
| 96 | |
| 97 | builtins = sys.builtin_module_names |
Guido van Rossum | 0118134 | 1994-10-03 16:43:15 +0000 | [diff] [blame] | 98 | tails = ['.py', '.pyc'] |
Guido van Rossum | 00ff433 | 1994-10-03 16:33:08 +0000 | [diff] [blame] | 99 | |
| 100 | def findmodule(modname, path = sys.path): |
| 101 | if modname in builtins: return '<builtin>' |
| 102 | for dirname in path: |
| 103 | for tail in tails: |
| 104 | fullname = os.path.join(dirname, modname + tail) |
| 105 | try: |
| 106 | f = open(fullname, 'r') |
| 107 | except IOError: |
| 108 | continue |
| 109 | f.close() |
| 110 | return fullname |
| 111 | return '<unknown>' |
| 112 | |
| 113 | |
| 114 | # Test the above functions. |
| 115 | |
| 116 | def test(): |
| 117 | if not sys.argv[1:]: |
| 118 | print 'usage: python findmodules.py scriptfile [morefiles ...]' |
| 119 | sys.exit(2) |
| 120 | done = findmodules(sys.argv[1], sys.argv[2:]) |
| 121 | items = done.items() |
| 122 | items.sort() |
| 123 | for mod, file in [('Module', 'File')] + items: |
| 124 | print "%-15s %s" % (mod, file) |
| 125 | |
| 126 | if __name__ == '__main__': |
| 127 | test() |