Guido van Rossum | f06ee5f | 1996-11-27 19:52:01 +0000 | [diff] [blame] | 1 | #! /usr/bin/env python |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 2 | |
| 3 | # Perform massive identifier substitution on C source files. |
| 4 | # This actually tokenizes the files (to some extent) so it can |
| 5 | # avoid making substitutions inside strings or comments. |
| 6 | # Inside strings, substitutions are never made; inside comments, |
Guido van Rossum | e6fcd33 | 1997-05-15 21:51:16 +0000 | [diff] [blame] | 7 | # it is a user option (off by default). |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 8 | # |
| 9 | # The substitutions are read from one or more files whose lines, |
| 10 | # when not empty, after stripping comments starting with #, |
| 11 | # must contain exactly two words separated by whitespace: the |
| 12 | # old identifier and its replacement. |
| 13 | # |
| 14 | # The option -r reverses the sense of the substitutions (this may be |
| 15 | # useful to undo a particular substitution). |
| 16 | # |
| 17 | # If the old identifier is prefixed with a '*' (with no intervening |
| 18 | # whitespace), then it will not be substituted inside comments. |
| 19 | # |
| 20 | # Command line arguments are files or directories to be processed. |
| 21 | # Directories are searched recursively for files whose name looks |
| 22 | # like a C file (ends in .h or .c). The special filename '-' means |
| 23 | # operate in filter mode: read stdin, write stdout. |
| 24 | # |
| 25 | # Symbolic links are always ignored (except as explicit directory |
| 26 | # arguments). |
| 27 | # |
| 28 | # The original files are kept as back-up with a "~" suffix. |
| 29 | # |
| 30 | # Changes made are reported to stdout in a diff-like format. |
| 31 | # |
| 32 | # NB: by changing only the function fixline() you can turn this |
| 33 | # into a program for different changes to C source files; by |
| 34 | # changing the function wanted() you can make a different selection of |
| 35 | # files. |
| 36 | |
| 37 | import sys |
Neal Norwitz | 10be10c | 2006-03-16 06:50:13 +0000 | [diff] [blame] | 38 | import re |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 39 | import os |
| 40 | from stat import * |
| 41 | import getopt |
| 42 | |
| 43 | err = sys.stderr.write |
| 44 | dbg = err |
| 45 | rep = sys.stdout.write |
| 46 | |
| 47 | def usage(): |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 48 | progname = sys.argv[0] |
| 49 | err('Usage: ' + progname + |
| 50 | ' [-c] [-r] [-s file] ... file-or-directory ...\n') |
| 51 | err('\n') |
| 52 | err('-c : substitute inside comments\n') |
| 53 | err('-r : reverse direction for following -s options\n') |
| 54 | err('-s substfile : add a file of substitutions\n') |
| 55 | err('\n') |
| 56 | err('Each non-empty non-comment line in a substitution file must\n') |
| 57 | err('contain exactly two words: an identifier and its replacement.\n') |
| 58 | err('Comments start with a # character and end at end of line.\n') |
| 59 | err('If an identifier is preceded with a *, it is not substituted\n') |
| 60 | err('inside a comment even when -c is specified.\n') |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 61 | |
| 62 | def main(): |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 63 | try: |
| 64 | opts, args = getopt.getopt(sys.argv[1:], 'crs:') |
| 65 | except getopt.error, msg: |
| 66 | err('Options error: ' + str(msg) + '\n') |
| 67 | usage() |
| 68 | sys.exit(2) |
| 69 | bad = 0 |
| 70 | if not args: # No arguments |
| 71 | usage() |
| 72 | sys.exit(2) |
| 73 | for opt, arg in opts: |
| 74 | if opt == '-c': |
| 75 | setdocomments() |
| 76 | if opt == '-r': |
| 77 | setreverse() |
| 78 | if opt == '-s': |
| 79 | addsubst(arg) |
| 80 | for arg in args: |
| 81 | if os.path.isdir(arg): |
| 82 | if recursedown(arg): bad = 1 |
| 83 | elif os.path.islink(arg): |
| 84 | err(arg + ': will not process symbolic links\n') |
| 85 | bad = 1 |
| 86 | else: |
| 87 | if fix(arg): bad = 1 |
| 88 | sys.exit(bad) |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 89 | |
| 90 | # Change this regular expression to select a different set of files |
Martin Panter | 8f7d36b | 2016-09-11 09:48:57 +0000 | [diff] [blame] | 91 | Wanted = r'^[a-zA-Z0-9_]+\.[ch]$' |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 92 | def wanted(name): |
Martin Panter | 8f7d36b | 2016-09-11 09:48:57 +0000 | [diff] [blame] | 93 | return re.match(Wanted, name) |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 94 | |
| 95 | def recursedown(dirname): |
Walter Dörwald | 70a6b49 | 2004-02-12 17:35:32 +0000 | [diff] [blame] | 96 | dbg('recursedown(%r)\n' % (dirname,)) |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 97 | bad = 0 |
| 98 | try: |
| 99 | names = os.listdir(dirname) |
| 100 | except os.error, msg: |
| 101 | err(dirname + ': cannot list directory: ' + str(msg) + '\n') |
| 102 | return 1 |
| 103 | names.sort() |
| 104 | subdirs = [] |
| 105 | for name in names: |
| 106 | if name in (os.curdir, os.pardir): continue |
| 107 | fullname = os.path.join(dirname, name) |
| 108 | if os.path.islink(fullname): pass |
| 109 | elif os.path.isdir(fullname): |
| 110 | subdirs.append(fullname) |
| 111 | elif wanted(name): |
| 112 | if fix(fullname): bad = 1 |
| 113 | for fullname in subdirs: |
| 114 | if recursedown(fullname): bad = 1 |
| 115 | return bad |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 116 | |
| 117 | def fix(filename): |
Walter Dörwald | 70a6b49 | 2004-02-12 17:35:32 +0000 | [diff] [blame] | 118 | ## dbg('fix(%r)\n' % (filename,)) |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 119 | if filename == '-': |
| 120 | # Filter mode |
| 121 | f = sys.stdin |
| 122 | g = sys.stdout |
| 123 | else: |
| 124 | # File replacement mode |
| 125 | try: |
| 126 | f = open(filename, 'r') |
| 127 | except IOError, msg: |
| 128 | err(filename + ': cannot open: ' + str(msg) + '\n') |
| 129 | return 1 |
| 130 | head, tail = os.path.split(filename) |
| 131 | tempname = os.path.join(head, '@' + tail) |
| 132 | g = None |
| 133 | # If we find a match, we rewind the file and start over but |
| 134 | # now copy everything to a temp file. |
| 135 | lineno = 0 |
| 136 | initfixline() |
| 137 | while 1: |
| 138 | line = f.readline() |
| 139 | if not line: break |
| 140 | lineno = lineno + 1 |
| 141 | while line[-2:] == '\\\n': |
| 142 | nextline = f.readline() |
| 143 | if not nextline: break |
| 144 | line = line + nextline |
| 145 | lineno = lineno + 1 |
| 146 | newline = fixline(line) |
| 147 | if newline != line: |
| 148 | if g is None: |
| 149 | try: |
| 150 | g = open(tempname, 'w') |
| 151 | except IOError, msg: |
| 152 | f.close() |
| 153 | err(tempname+': cannot create: '+ |
| 154 | str(msg)+'\n') |
| 155 | return 1 |
| 156 | f.seek(0) |
| 157 | lineno = 0 |
| 158 | initfixline() |
| 159 | rep(filename + ':\n') |
| 160 | continue # restart from the beginning |
Walter Dörwald | 70a6b49 | 2004-02-12 17:35:32 +0000 | [diff] [blame] | 161 | rep(repr(lineno) + '\n') |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 162 | rep('< ' + line) |
| 163 | rep('> ' + newline) |
| 164 | if g is not None: |
| 165 | g.write(newline) |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 166 | |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 167 | # End of file |
| 168 | if filename == '-': return 0 # Done in filter mode |
| 169 | f.close() |
| 170 | if not g: return 0 # No changes |
Martin Panter | 8f7d36b | 2016-09-11 09:48:57 +0000 | [diff] [blame] | 171 | g.close() |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 172 | |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 173 | # Finishing touch -- move files |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 174 | |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 175 | # First copy the file's mode to the temp file |
| 176 | try: |
| 177 | statbuf = os.stat(filename) |
| 178 | os.chmod(tempname, statbuf[ST_MODE] & 07777) |
| 179 | except os.error, msg: |
| 180 | err(tempname + ': warning: chmod failed (' + str(msg) + ')\n') |
| 181 | # Then make a backup of the original file as filename~ |
| 182 | try: |
| 183 | os.rename(filename, filename + '~') |
| 184 | except os.error, msg: |
| 185 | err(filename + ': warning: backup failed (' + str(msg) + ')\n') |
| 186 | # Now move the temp file to the original file |
| 187 | try: |
| 188 | os.rename(tempname, filename) |
| 189 | except os.error, msg: |
| 190 | err(filename + ': rename failed (' + str(msg) + ')\n') |
| 191 | return 1 |
Ezio Melotti | c2077b0 | 2011-03-16 12:34:31 +0200 | [diff] [blame] | 192 | # Return success |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 193 | return 0 |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 194 | |
| 195 | # Tokenizing ANSI C (partly) |
| 196 | |
Martin Panter | 8f7d36b | 2016-09-11 09:48:57 +0000 | [diff] [blame] | 197 | Identifier = '(struct )?[a-zA-Z_][a-zA-Z0-9_]+' |
| 198 | String = r'"([^\n\\"]|\\.)*"' |
| 199 | Char = r"'([^\n\\']|\\.)*'" |
| 200 | CommentStart = r'/\*' |
| 201 | CommentEnd = r'\*/' |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 202 | |
| 203 | Hexnumber = '0[xX][0-9a-fA-F]*[uUlL]*' |
| 204 | Octnumber = '0[0-7]*[uUlL]*' |
| 205 | Decnumber = '[1-9][0-9]*[uUlL]*' |
Martin Panter | 8f7d36b | 2016-09-11 09:48:57 +0000 | [diff] [blame] | 206 | Intnumber = Hexnumber + '|' + Octnumber + '|' + Decnumber |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 207 | Exponent = '[eE][-+]?[0-9]+' |
Martin Panter | 8f7d36b | 2016-09-11 09:48:57 +0000 | [diff] [blame] | 208 | Pointfloat = r'([0-9]+\.[0-9]*|\.[0-9]+)(' + Exponent + r')?' |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 209 | Expfloat = '[0-9]+' + Exponent |
Martin Panter | 8f7d36b | 2016-09-11 09:48:57 +0000 | [diff] [blame] | 210 | Floatnumber = Pointfloat + '|' + Expfloat |
| 211 | Number = Floatnumber + '|' + Intnumber |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 212 | |
| 213 | # Anything else is an operator -- don't list this explicitly because of '/*' |
| 214 | |
| 215 | OutsideComment = (Identifier, Number, String, Char, CommentStart) |
Neal Norwitz | 10be10c | 2006-03-16 06:50:13 +0000 | [diff] [blame] | 216 | OutsideCommentPattern = '(' + '|'.join(OutsideComment) + ')' |
| 217 | OutsideCommentProgram = re.compile(OutsideCommentPattern) |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 218 | |
| 219 | InsideComment = (Identifier, Number, CommentEnd) |
Neal Norwitz | 10be10c | 2006-03-16 06:50:13 +0000 | [diff] [blame] | 220 | InsideCommentPattern = '(' + '|'.join(InsideComment) + ')' |
| 221 | InsideCommentProgram = re.compile(InsideCommentPattern) |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 222 | |
| 223 | def initfixline(): |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 224 | global Program |
| 225 | Program = OutsideCommentProgram |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 226 | |
| 227 | def fixline(line): |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 228 | global Program |
Walter Dörwald | 70a6b49 | 2004-02-12 17:35:32 +0000 | [diff] [blame] | 229 | ## print '-->', repr(line) |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 230 | i = 0 |
| 231 | while i < len(line): |
Martin Panter | 8f7d36b | 2016-09-11 09:48:57 +0000 | [diff] [blame] | 232 | match = Program.search(line, i) |
| 233 | if match is None: break |
| 234 | i = match.start() |
| 235 | found = match.group(0) |
Tim Peters | 79b334b | 2001-01-17 09:13:33 +0000 | [diff] [blame] | 236 | ## if Program is InsideCommentProgram: print '...', |
| 237 | ## else: print ' ', |
| 238 | ## print found |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 239 | if len(found) == 2: |
| 240 | if found == '/*': |
| 241 | Program = InsideCommentProgram |
| 242 | elif found == '*/': |
| 243 | Program = OutsideCommentProgram |
| 244 | n = len(found) |
Serhiy Storchaka | f8cc287 | 2016-10-25 09:51:38 +0300 | [diff] [blame] | 245 | if found in Dict: |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 246 | subst = Dict[found] |
| 247 | if Program is InsideCommentProgram: |
| 248 | if not Docomments: |
| 249 | print 'Found in comment:', found |
| 250 | i = i + n |
| 251 | continue |
Serhiy Storchaka | f8cc287 | 2016-10-25 09:51:38 +0300 | [diff] [blame] | 252 | if found in NotInComment: |
Tim Peters | 79b334b | 2001-01-17 09:13:33 +0000 | [diff] [blame] | 253 | ## print 'Ignored in comment:', |
| 254 | ## print found, '-->', subst |
| 255 | ## print 'Line:', line, |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 256 | subst = found |
Tim Peters | 79b334b | 2001-01-17 09:13:33 +0000 | [diff] [blame] | 257 | ## else: |
| 258 | ## print 'Substituting in comment:', |
| 259 | ## print found, '-->', subst |
| 260 | ## print 'Line:', line, |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 261 | line = line[:i] + subst + line[i+n:] |
| 262 | n = len(subst) |
| 263 | i = i + n |
| 264 | return line |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 265 | |
Guido van Rossum | f62f687 | 1994-01-07 10:55:15 +0000 | [diff] [blame] | 266 | Docomments = 0 |
| 267 | def setdocomments(): |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 268 | global Docomments |
| 269 | Docomments = 1 |
Guido van Rossum | f62f687 | 1994-01-07 10:55:15 +0000 | [diff] [blame] | 270 | |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 271 | Reverse = 0 |
| 272 | def setreverse(): |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 273 | global Reverse |
| 274 | Reverse = (not Reverse) |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 275 | |
| 276 | Dict = {} |
| 277 | NotInComment = {} |
| 278 | def addsubst(substfile): |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 279 | try: |
| 280 | fp = open(substfile, 'r') |
| 281 | except IOError, msg: |
| 282 | err(substfile + ': cannot read substfile: ' + str(msg) + '\n') |
| 283 | sys.exit(1) |
| 284 | lineno = 0 |
| 285 | while 1: |
| 286 | line = fp.readline() |
| 287 | if not line: break |
| 288 | lineno = lineno + 1 |
| 289 | try: |
Walter Dörwald | aaab30e | 2002-09-11 20:36:02 +0000 | [diff] [blame] | 290 | i = line.index('#') |
| 291 | except ValueError: |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 292 | i = -1 # Happens to delete trailing \n |
Walter Dörwald | aaab30e | 2002-09-11 20:36:02 +0000 | [diff] [blame] | 293 | words = line[:i].split() |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 294 | if not words: continue |
| 295 | if len(words) == 3 and words[0] == 'struct': |
| 296 | words[:2] = [words[0] + ' ' + words[1]] |
Serhiy Storchaka | f8cc287 | 2016-10-25 09:51:38 +0300 | [diff] [blame] | 297 | elif len(words) != 2: |
Walter Dörwald | 70a6b49 | 2004-02-12 17:35:32 +0000 | [diff] [blame] | 298 | err(substfile + '%s:%r: warning: bad line: %r' % (substfile, lineno, line)) |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 299 | continue |
| 300 | if Reverse: |
| 301 | [value, key] = words |
| 302 | else: |
| 303 | [key, value] = words |
| 304 | if value[0] == '*': |
| 305 | value = value[1:] |
| 306 | if key[0] == '*': |
| 307 | key = key[1:] |
| 308 | NotInComment[key] = value |
Serhiy Storchaka | f8cc287 | 2016-10-25 09:51:38 +0300 | [diff] [blame] | 309 | if key in Dict: |
Walter Dörwald | 70a6b49 | 2004-02-12 17:35:32 +0000 | [diff] [blame] | 310 | err('%s:%r: warning: overriding: %r %r\n' % (substfile, lineno, key, value)) |
| 311 | err('%s:%r: warning: previous: %r\n' % (substfile, lineno, Dict[key])) |
Tim Peters | 70c4378 | 2001-01-17 08:48:39 +0000 | [diff] [blame] | 312 | Dict[key] = value |
| 313 | fp.close() |
Guido van Rossum | aa895c7 | 1993-06-10 14:43:53 +0000 | [diff] [blame] | 314 | |
Andrew M. Kuchling | e236b38 | 2004-08-09 17:27:55 +0000 | [diff] [blame] | 315 | if __name__ == '__main__': |
| 316 | main() |