| Guido van Rossum | f06ee5f | 1996-11-27 19:52:01 +0000 | [diff] [blame] | 1 | #! /usr/bin/env python | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 2 |  | 
|  | 3 | # Perform massive identifier substitution on C source files. | 
|  | 4 | # This actually tokenizes the files (to some extent) so it can | 
|  | 5 | # avoid making substitutions inside strings or comments. | 
|  | 6 | # Inside strings, substitutions are never made; inside comments, | 
| Guido van Rossum | e6fcd33 | 1997-05-15 21:51:16 +0000 | [diff] [blame] | 7 | # it is a user option (off by default). | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 8 | # | 
|  | 9 | # The substitutions are read from one or more files whose lines, | 
|  | 10 | # when not empty, after stripping comments starting with #, | 
|  | 11 | # must contain exactly two words separated by whitespace: the | 
|  | 12 | # old identifier and its replacement. | 
|  | 13 | # | 
|  | 14 | # The option -r reverses the sense of the substitutions (this may be | 
|  | 15 | # useful to undo a particular substitution). | 
|  | 16 | # | 
|  | 17 | # If the old identifier is prefixed with a '*' (with no intervening | 
|  | 18 | # whitespace), then it will not be substituted inside comments. | 
|  | 19 | # | 
|  | 20 | # Command line arguments are files or directories to be processed. | 
|  | 21 | # Directories are searched recursively for files whose name looks | 
|  | 22 | # like a C file (ends in .h or .c).  The special filename '-' means | 
|  | 23 | # operate in filter mode: read stdin, write stdout. | 
|  | 24 | # | 
|  | 25 | # Symbolic links are always ignored (except as explicit directory | 
|  | 26 | # arguments). | 
|  | 27 | # | 
|  | 28 | # The original files are kept as back-up with a "~" suffix. | 
|  | 29 | # | 
|  | 30 | # Changes made are reported to stdout in a diff-like format. | 
|  | 31 | # | 
|  | 32 | # NB: by changing only the function fixline() you can turn this | 
|  | 33 | # into a program for different changes to C source files; by | 
|  | 34 | # changing the function wanted() you can make a different selection of | 
|  | 35 | # files. | 
|  | 36 |  | 
|  | 37 | import sys | 
|  | 38 | import regex | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 39 | import os | 
|  | 40 | from stat import * | 
|  | 41 | import getopt | 
|  | 42 |  | 
|  | 43 | err = sys.stderr.write | 
|  | 44 | dbg = err | 
|  | 45 | rep = sys.stdout.write | 
|  | 46 |  | 
|  | 47 | def usage(): | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 48 | progname = sys.argv[0] | 
|  | 49 | err('Usage: ' + progname + | 
|  | 50 | ' [-c] [-r] [-s file] ... file-or-directory ...\n') | 
|  | 51 | err('\n') | 
|  | 52 | err('-c           : substitute inside comments\n') | 
|  | 53 | err('-r           : reverse direction for following -s options\n') | 
|  | 54 | err('-s substfile : add a file of substitutions\n') | 
|  | 55 | err('\n') | 
|  | 56 | err('Each non-empty non-comment line in a substitution file must\n') | 
|  | 57 | err('contain exactly two words: an identifier and its replacement.\n') | 
|  | 58 | err('Comments start with a # character and end at end of line.\n') | 
|  | 59 | err('If an identifier is preceded with a *, it is not substituted\n') | 
|  | 60 | err('inside a comment even when -c is specified.\n') | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 61 |  | 
|  | 62 | def main(): | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 63 | try: | 
|  | 64 | opts, args = getopt.getopt(sys.argv[1:], 'crs:') | 
|  | 65 | except getopt.error, msg: | 
|  | 66 | err('Options error: ' + str(msg) + '\n') | 
|  | 67 | usage() | 
|  | 68 | sys.exit(2) | 
|  | 69 | bad = 0 | 
|  | 70 | if not args: # No arguments | 
|  | 71 | usage() | 
|  | 72 | sys.exit(2) | 
|  | 73 | for opt, arg in opts: | 
|  | 74 | if opt == '-c': | 
|  | 75 | setdocomments() | 
|  | 76 | if opt == '-r': | 
|  | 77 | setreverse() | 
|  | 78 | if opt == '-s': | 
|  | 79 | addsubst(arg) | 
|  | 80 | for arg in args: | 
|  | 81 | if os.path.isdir(arg): | 
|  | 82 | if recursedown(arg): bad = 1 | 
|  | 83 | elif os.path.islink(arg): | 
|  | 84 | err(arg + ': will not process symbolic links\n') | 
|  | 85 | bad = 1 | 
|  | 86 | else: | 
|  | 87 | if fix(arg): bad = 1 | 
|  | 88 | sys.exit(bad) | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 89 |  | 
|  | 90 | # Change this regular expression to select a different set of files | 
|  | 91 | Wanted = '^[a-zA-Z0-9_]+\.[ch]$' | 
|  | 92 | def wanted(name): | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 93 | return regex.match(Wanted, name) >= 0 | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 94 |  | 
|  | 95 | def recursedown(dirname): | 
| Walter Dörwald | 70a6b49 | 2004-02-12 17:35:32 +0000 | [diff] [blame] | 96 | dbg('recursedown(%r)\n' % (dirname,)) | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 97 | bad = 0 | 
|  | 98 | try: | 
|  | 99 | names = os.listdir(dirname) | 
|  | 100 | except os.error, msg: | 
|  | 101 | err(dirname + ': cannot list directory: ' + str(msg) + '\n') | 
|  | 102 | return 1 | 
|  | 103 | names.sort() | 
|  | 104 | subdirs = [] | 
|  | 105 | for name in names: | 
|  | 106 | if name in (os.curdir, os.pardir): continue | 
|  | 107 | fullname = os.path.join(dirname, name) | 
|  | 108 | if os.path.islink(fullname): pass | 
|  | 109 | elif os.path.isdir(fullname): | 
|  | 110 | subdirs.append(fullname) | 
|  | 111 | elif wanted(name): | 
|  | 112 | if fix(fullname): bad = 1 | 
|  | 113 | for fullname in subdirs: | 
|  | 114 | if recursedown(fullname): bad = 1 | 
|  | 115 | return bad | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 116 |  | 
|  | 117 | def fix(filename): | 
| Walter Dörwald | 70a6b49 | 2004-02-12 17:35:32 +0000 | [diff] [blame] | 118 | ##  dbg('fix(%r)\n' % (filename,)) | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 119 | if filename == '-': | 
|  | 120 | # Filter mode | 
|  | 121 | f = sys.stdin | 
|  | 122 | g = sys.stdout | 
|  | 123 | else: | 
|  | 124 | # File replacement mode | 
|  | 125 | try: | 
|  | 126 | f = open(filename, 'r') | 
|  | 127 | except IOError, msg: | 
|  | 128 | err(filename + ': cannot open: ' + str(msg) + '\n') | 
|  | 129 | return 1 | 
|  | 130 | head, tail = os.path.split(filename) | 
|  | 131 | tempname = os.path.join(head, '@' + tail) | 
|  | 132 | g = None | 
|  | 133 | # If we find a match, we rewind the file and start over but | 
|  | 134 | # now copy everything to a temp file. | 
|  | 135 | lineno = 0 | 
|  | 136 | initfixline() | 
|  | 137 | while 1: | 
|  | 138 | line = f.readline() | 
|  | 139 | if not line: break | 
|  | 140 | lineno = lineno + 1 | 
|  | 141 | while line[-2:] == '\\\n': | 
|  | 142 | nextline = f.readline() | 
|  | 143 | if not nextline: break | 
|  | 144 | line = line + nextline | 
|  | 145 | lineno = lineno + 1 | 
|  | 146 | newline = fixline(line) | 
|  | 147 | if newline != line: | 
|  | 148 | if g is None: | 
|  | 149 | try: | 
|  | 150 | g = open(tempname, 'w') | 
|  | 151 | except IOError, msg: | 
|  | 152 | f.close() | 
|  | 153 | err(tempname+': cannot create: '+ | 
|  | 154 | str(msg)+'\n') | 
|  | 155 | return 1 | 
|  | 156 | f.seek(0) | 
|  | 157 | lineno = 0 | 
|  | 158 | initfixline() | 
|  | 159 | rep(filename + ':\n') | 
|  | 160 | continue # restart from the beginning | 
| Walter Dörwald | 70a6b49 | 2004-02-12 17:35:32 +0000 | [diff] [blame] | 161 | rep(repr(lineno) + '\n') | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 162 | rep('< ' + line) | 
|  | 163 | rep('> ' + newline) | 
|  | 164 | if g is not None: | 
|  | 165 | g.write(newline) | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 166 |  | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 167 | # End of file | 
|  | 168 | if filename == '-': return 0 # Done in filter mode | 
|  | 169 | f.close() | 
|  | 170 | if not g: return 0 # No changes | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 171 |  | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 172 | # Finishing touch -- move files | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 173 |  | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 174 | # First copy the file's mode to the temp file | 
|  | 175 | try: | 
|  | 176 | statbuf = os.stat(filename) | 
|  | 177 | os.chmod(tempname, statbuf[ST_MODE] & 07777) | 
|  | 178 | except os.error, msg: | 
|  | 179 | err(tempname + ': warning: chmod failed (' + str(msg) + ')\n') | 
|  | 180 | # Then make a backup of the original file as filename~ | 
|  | 181 | try: | 
|  | 182 | os.rename(filename, filename + '~') | 
|  | 183 | except os.error, msg: | 
|  | 184 | err(filename + ': warning: backup failed (' + str(msg) + ')\n') | 
|  | 185 | # Now move the temp file to the original file | 
|  | 186 | try: | 
|  | 187 | os.rename(tempname, filename) | 
|  | 188 | except os.error, msg: | 
|  | 189 | err(filename + ': rename failed (' + str(msg) + ')\n') | 
|  | 190 | return 1 | 
|  | 191 | # Return succes | 
|  | 192 | return 0 | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 193 |  | 
|  | 194 | # Tokenizing ANSI C (partly) | 
|  | 195 |  | 
| Guido van Rossum | 07c9645 | 1994-10-03 16:45:35 +0000 | [diff] [blame] | 196 | Identifier = '\(struct \)?[a-zA-Z_][a-zA-Z0-9_]+' | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 197 | String = '"\([^\n\\"]\|\\\\.\)*"' | 
|  | 198 | Char = '\'\([^\n\\\']\|\\\\.\)*\'' | 
|  | 199 | CommentStart = '/\*' | 
|  | 200 | CommentEnd = '\*/' | 
|  | 201 |  | 
|  | 202 | Hexnumber = '0[xX][0-9a-fA-F]*[uUlL]*' | 
|  | 203 | Octnumber = '0[0-7]*[uUlL]*' | 
|  | 204 | Decnumber = '[1-9][0-9]*[uUlL]*' | 
|  | 205 | Intnumber = Hexnumber + '\|' + Octnumber + '\|' + Decnumber | 
|  | 206 | Exponent = '[eE][-+]?[0-9]+' | 
|  | 207 | Pointfloat = '\([0-9]+\.[0-9]*\|\.[0-9]+\)\(' + Exponent + '\)?' | 
|  | 208 | Expfloat = '[0-9]+' + Exponent | 
|  | 209 | Floatnumber = Pointfloat + '\|' + Expfloat | 
|  | 210 | Number = Floatnumber + '\|' + Intnumber | 
|  | 211 |  | 
|  | 212 | # Anything else is an operator -- don't list this explicitly because of '/*' | 
|  | 213 |  | 
|  | 214 | OutsideComment = (Identifier, Number, String, Char, CommentStart) | 
| Walter Dörwald | aaab30e | 2002-09-11 20:36:02 +0000 | [diff] [blame] | 215 | OutsideCommentPattern = '\(' + '\|'.join(OutsideComment) + '\)' | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 216 | OutsideCommentProgram = regex.compile(OutsideCommentPattern) | 
|  | 217 |  | 
|  | 218 | InsideComment = (Identifier, Number, CommentEnd) | 
| Walter Dörwald | aaab30e | 2002-09-11 20:36:02 +0000 | [diff] [blame] | 219 | InsideCommentPattern = '\(' + '\|'.join(InsideComment) + '\)' | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 220 | InsideCommentProgram = regex.compile(InsideCommentPattern) | 
|  | 221 |  | 
|  | 222 | def initfixline(): | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 223 | global Program | 
|  | 224 | Program = OutsideCommentProgram | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 225 |  | 
|  | 226 | def fixline(line): | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 227 | global Program | 
| Walter Dörwald | 70a6b49 | 2004-02-12 17:35:32 +0000 | [diff] [blame] | 228 | ##  print '-->', repr(line) | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 229 | i = 0 | 
|  | 230 | while i < len(line): | 
|  | 231 | i = Program.search(line, i) | 
|  | 232 | if i < 0: break | 
|  | 233 | found = Program.group(0) | 
| Tim Peters | 79b334b | 2001-01-17 09:13:33 +0000 | [diff] [blame] | 234 | ##      if Program is InsideCommentProgram: print '...', | 
|  | 235 | ##      else: print '   ', | 
|  | 236 | ##      print found | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 237 | if len(found) == 2: | 
|  | 238 | if found == '/*': | 
|  | 239 | Program = InsideCommentProgram | 
|  | 240 | elif found == '*/': | 
|  | 241 | Program = OutsideCommentProgram | 
|  | 242 | n = len(found) | 
|  | 243 | if Dict.has_key(found): | 
|  | 244 | subst = Dict[found] | 
|  | 245 | if Program is InsideCommentProgram: | 
|  | 246 | if not Docomments: | 
|  | 247 | print 'Found in comment:', found | 
|  | 248 | i = i + n | 
|  | 249 | continue | 
|  | 250 | if NotInComment.has_key(found): | 
| Tim Peters | 79b334b | 2001-01-17 09:13:33 +0000 | [diff] [blame] | 251 | ##                  print 'Ignored in comment:', | 
|  | 252 | ##                  print found, '-->', subst | 
|  | 253 | ##                  print 'Line:', line, | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 254 | subst = found | 
| Tim Peters | 79b334b | 2001-01-17 09:13:33 +0000 | [diff] [blame] | 255 | ##              else: | 
|  | 256 | ##                  print 'Substituting in comment:', | 
|  | 257 | ##                  print found, '-->', subst | 
|  | 258 | ##                  print 'Line:', line, | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 259 | line = line[:i] + subst + line[i+n:] | 
|  | 260 | n = len(subst) | 
|  | 261 | i = i + n | 
|  | 262 | return line | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 263 |  | 
| Guido van Rossum | f62f687 | 1994-01-07 10:55:15 +0000 | [diff] [blame] | 264 | Docomments = 0 | 
|  | 265 | def setdocomments(): | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 266 | global Docomments | 
|  | 267 | Docomments = 1 | 
| Guido van Rossum | f62f687 | 1994-01-07 10:55:15 +0000 | [diff] [blame] | 268 |  | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 269 | Reverse = 0 | 
|  | 270 | def setreverse(): | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 271 | global Reverse | 
|  | 272 | Reverse = (not Reverse) | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 273 |  | 
|  | 274 | Dict = {} | 
|  | 275 | NotInComment = {} | 
|  | 276 | def addsubst(substfile): | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 277 | try: | 
|  | 278 | fp = open(substfile, 'r') | 
|  | 279 | except IOError, msg: | 
|  | 280 | err(substfile + ': cannot read substfile: ' + str(msg) + '\n') | 
|  | 281 | sys.exit(1) | 
|  | 282 | lineno = 0 | 
|  | 283 | while 1: | 
|  | 284 | line = fp.readline() | 
|  | 285 | if not line: break | 
|  | 286 | lineno = lineno + 1 | 
|  | 287 | try: | 
| Walter Dörwald | aaab30e | 2002-09-11 20:36:02 +0000 | [diff] [blame] | 288 | i = line.index('#') | 
|  | 289 | except ValueError: | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 290 | i = -1          # Happens to delete trailing \n | 
| Walter Dörwald | aaab30e | 2002-09-11 20:36:02 +0000 | [diff] [blame] | 291 | words = line[:i].split() | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 292 | if not words: continue | 
|  | 293 | if len(words) == 3 and words[0] == 'struct': | 
|  | 294 | words[:2] = [words[0] + ' ' + words[1]] | 
|  | 295 | elif len(words) <> 2: | 
| Walter Dörwald | 70a6b49 | 2004-02-12 17:35:32 +0000 | [diff] [blame] | 296 | err(substfile + '%s:%r: warning: bad line: %r' % (substfile, lineno, line)) | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 297 | continue | 
|  | 298 | if Reverse: | 
|  | 299 | [value, key] = words | 
|  | 300 | else: | 
|  | 301 | [key, value] = words | 
|  | 302 | if value[0] == '*': | 
|  | 303 | value = value[1:] | 
|  | 304 | if key[0] == '*': | 
|  | 305 | key = key[1:] | 
|  | 306 | NotInComment[key] = value | 
|  | 307 | if Dict.has_key(key): | 
| Walter Dörwald | 70a6b49 | 2004-02-12 17:35:32 +0000 | [diff] [blame] | 308 | err('%s:%r: warning: overriding: %r %r\n' % (substfile, lineno, key, value)) | 
|  | 309 | err('%s:%r: warning: previous: %r\n' % (substfile, lineno, Dict[key])) | 
| Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 310 | Dict[key] = value | 
|  | 311 | fp.close() | 
| Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 312 |  | 
|  | 313 | main() |