| Dan Bornstein | d12de17 | 2010-12-02 15:21:59 -0800 | [diff] [blame^] | 1 | # Copyright (C) 2007 The Android Open Source Project |
| 2 | # |
| 3 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | # you may not use this file except in compliance with the License. |
| 5 | # You may obtain a copy of the License at |
| 6 | # |
| 7 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | # |
| 9 | # Unless required by applicable law or agreed to in writing, software |
| 10 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | # See the License for the specific language governing permissions and |
| 13 | # limitations under the License. |
| 14 | |
| 15 | # Awk helper script for opcode-gen. |
| 16 | |
| 17 | BEGIN { |
| 18 | MAX_OPCODE = 65535; |
| 19 | MAX_LIBDEX_OPCODE = 255; # TODO: Will not be true for long! |
| 20 | initIndexTypes(); |
| 21 | initFlags(); |
| 22 | if (readBytecodes()) exit 1; |
| 23 | deriveOpcodeChains(); |
| 24 | consumeUntil = ""; |
| 25 | } |
| 26 | |
| 27 | consumeUntil != "" { |
| 28 | if (index($0, consumeUntil) != 0) { |
| 29 | consumeUntil = ""; |
| 30 | } else { |
| 31 | next; |
| 32 | } |
| 33 | } |
| 34 | |
| 35 | /BEGIN\(opcodes\)/ { |
| 36 | consumeUntil = "END(opcodes)"; |
| 37 | print; |
| 38 | |
| 39 | for (i = 0; i <= MAX_OPCODE; i++) { |
| 40 | if (isUnused(i) || isOptimized(i)) continue; |
| 41 | printf(" public static final int %s = 0x%s;\n", |
| 42 | constName[i], hex[i]); |
| 43 | } |
| 44 | |
| 45 | next; |
| 46 | } |
| 47 | |
| 48 | /BEGIN\(first-opcodes\)/ { |
| 49 | consumeUntil = "END(first-opcodes)"; |
| 50 | print; |
| 51 | |
| 52 | for (i = 0; i <= MAX_OPCODE; i++) { |
| 53 | if (isUnused(i) || isOptimized(i)) continue; |
| 54 | if (isFirst[i] == "true") { |
| 55 | printf(" // DalvOps.%s\n", constName[i]); |
| 56 | } |
| 57 | } |
| 58 | |
| 59 | next; |
| 60 | } |
| 61 | |
| 62 | /BEGIN\(dops\)/ { |
| 63 | consumeUntil = "END(dops)"; |
| 64 | print; |
| 65 | |
| 66 | for (i = 0; i <= MAX_OPCODE; i++) { |
| 67 | if (isUnused(i) || isOptimized(i)) continue; |
| 68 | |
| 69 | nextOp = nextOpcode[i]; |
| 70 | nextOp = (nextOp == -1) ? "NO_NEXT" : constName[nextOp]; |
| 71 | |
| 72 | printf(" public static final Dop %s =\n" \ |
| 73 | " new Dop(DalvOps.%s, DalvOps.%s,\n" \ |
| 74 | " DalvOps.%s, Form%s.THE_ONE, %s,\n" \ |
| 75 | " \"%s\");\n\n", |
| 76 | constName[i], constName[i], family[i], nextOp, format[i], |
| 77 | hasResult[i], name[i]); |
| 78 | } |
| 79 | |
| 80 | next; |
| 81 | } |
| 82 | |
| 83 | /BEGIN\(dops-init\)/ { |
| 84 | consumeUntil = "END(dops-init)"; |
| 85 | print; |
| 86 | |
| 87 | for (i = 0; i <= MAX_OPCODE; i++) { |
| 88 | if (isUnused(i) || isOptimized(i)) continue; |
| 89 | printf(" set(%s);\n", constName[i]); |
| 90 | } |
| 91 | |
| 92 | next; |
| 93 | } |
| 94 | |
| 95 | /BEGIN\(libcore-opcodes\)/ { |
| 96 | consumeUntil = "END(libcore-opcodes)"; |
| 97 | print; |
| 98 | |
| 99 | for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) { |
| 100 | if (isUnusedByte(i) || isOptimized(i)) continue; |
| 101 | printf(" int OP_%-28s = 0x%02x;\n", constName[i], i); |
| 102 | } |
| 103 | |
| 104 | next; |
| 105 | } |
| 106 | |
| 107 | /BEGIN\(libcore-maximum-value\)/ { |
| 108 | consumeUntil = "END(libcore-maximum-value)"; |
| 109 | print; |
| 110 | |
| 111 | # TODO: Make this smarter. |
| 112 | printf(" MAXIMUM_VALUE = %d;\n", MAX_LIBDEX_OPCODE); |
| 113 | |
| 114 | next; |
| 115 | } |
| 116 | |
| 117 | /BEGIN\(libdex-opcode-enum\)/ { |
| 118 | consumeUntil = "END(libdex-opcode-enum)"; |
| 119 | print; |
| 120 | |
| 121 | for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) { |
| 122 | printf(" OP_%-28s = 0x%02x,\n", constNameOrUnusedByte(i), i); |
| 123 | } |
| 124 | |
| 125 | next; |
| 126 | } |
| 127 | |
| 128 | /BEGIN\(libdex-goto-table\)/ { |
| 129 | consumeUntil = "END(libdex-goto-table)"; |
| 130 | print; |
| 131 | |
| 132 | for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) { |
| 133 | content = sprintf(" H(OP_%s),", constNameOrUnusedByte(i)); |
| 134 | printf("%-78s\\\n", content); |
| 135 | } |
| 136 | |
| 137 | next; |
| 138 | } |
| 139 | |
| 140 | /BEGIN\(libdex-opcode-names\)/ { |
| 141 | consumeUntil = "END(libdex-opcode-names)"; |
| 142 | print; |
| 143 | |
| 144 | for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) { |
| 145 | printf(" \"%s\",\n", nameOrUnusedByte(i)); |
| 146 | } |
| 147 | |
| 148 | next; |
| 149 | } |
| 150 | |
| 151 | /BEGIN\(libdex-widths\)/ { |
| 152 | consumeUntil = "END(libdex-widths)"; |
| 153 | print; |
| 154 | |
| 155 | col = 1; |
| 156 | for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) { |
| 157 | value = sprintf("%d,", isUnusedByte(i) ? 0 : width[i]); |
| 158 | col = colPrint(value, (i == MAX_LIBDEX_OPCODE), col, 16, 2, " "); |
| 159 | } |
| 160 | |
| 161 | next; |
| 162 | } |
| 163 | |
| 164 | /BEGIN\(libdex-flags\)/ { |
| 165 | consumeUntil = "END(libdex-flags)"; |
| 166 | print; |
| 167 | |
| 168 | for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) { |
| 169 | value = flagsToC(isUnusedByte(i) ? 0 : flags[i]); |
| 170 | printf(" %s,\n", value); |
| 171 | } |
| 172 | |
| 173 | next; |
| 174 | } |
| 175 | |
| 176 | /BEGIN\(libdex-formats\)/ { |
| 177 | consumeUntil = "END(libdex-formats)"; |
| 178 | print; |
| 179 | |
| 180 | col = 1; |
| 181 | for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) { |
| 182 | value = sprintf("kFmt%s,", isUnusedByte(i) ? "00x" : format[i]); |
| 183 | col = colPrint(value, (i == MAX_LIBDEX_OPCODE), col, 7, 9, " "); |
| 184 | } |
| 185 | |
| 186 | next; |
| 187 | } |
| 188 | |
| 189 | /BEGIN\(libdex-index-types\)/ { |
| 190 | consumeUntil = "END(libdex-index-types)"; |
| 191 | print; |
| 192 | |
| 193 | col = 1; |
| 194 | for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) { |
| 195 | value = isUnusedByte(i) ? "unknown" : indexType[i]; |
| 196 | value = sprintf("%s,", indexTypeValues[value]); |
| 197 | col = colPrint(value, (i == MAX_LIBDEX_OPCODE), col, 3, 19, " "); |
| 198 | } |
| 199 | |
| 200 | next; |
| 201 | } |
| 202 | |
| 203 | { print; } |
| 204 | |
| 205 | # Helper to print out an element in a multi-column fashion. It returns |
| 206 | # the (one-based) column number that the next element will be printed |
| 207 | # in. |
| 208 | function colPrint(value, isLast, col, numCols, colWidth, linePrefix) { |
| 209 | isLast = (isLast || (col == numCols)); |
| 210 | printf("%s%-*s%s", |
| 211 | (col == 1) ? linePrefix : " ", |
| 212 | isLast ? 1 : colWidth, value, |
| 213 | isLast ? "\n" : ""); |
| 214 | |
| 215 | return (col % numCols) + 1; |
| 216 | } |
| 217 | |
| 218 | # Read the bytecode description file. |
| 219 | function readBytecodes(i, parts, line, cmd, status, count) { |
| 220 | # locals: parts, line, cmd, status, count |
| 221 | for (;;) { |
| 222 | # Read a line. |
| 223 | status = getline line <bytecodeFile; |
| 224 | if (status == 0) break; |
| 225 | if (status < 0) { |
| 226 | print "trouble reading bytecode file"; |
| 227 | exit 1; |
| 228 | } |
| 229 | |
| 230 | # Clean up the line and extract the command. |
| 231 | gsub(/ */, " ", line); |
| 232 | sub(/ *#.*$/, "", line); |
| 233 | sub(/ $/, "", line); |
| 234 | sub(/^ /, "", line); |
| 235 | count = split(line, parts); |
| 236 | if (count == 0) continue; # Blank or comment line. |
| 237 | cmd = parts[1]; |
| 238 | sub(/^[a-z][a-z]* */, "", line); # Remove the command from line. |
| 239 | |
| 240 | if (cmd == "op") { |
| 241 | status = defineOpcode(line); |
| 242 | } else if (cmd == "format") { |
| 243 | status = defineFormat(line); |
| 244 | } else { |
| 245 | status = -1; |
| 246 | } |
| 247 | |
| 248 | if (status != 0) { |
| 249 | printf("syntax error on line: %s\n", line); |
| 250 | return 1; |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | return 0; |
| 255 | } |
| 256 | |
| 257 | # Define an opcode. |
| 258 | function defineOpcode(line, count, parts, idx) { |
| 259 | # locals: count, parts, idx |
| 260 | count = split(line, parts); |
| 261 | if (count != 6) return -1; |
| 262 | idx = parseHex(parts[1]); |
| 263 | if (idx < 0) return -1; |
| 264 | |
| 265 | # Extract directly specified values from the line. |
| 266 | hex[idx] = parts[1]; |
| 267 | name[idx] = parts[2]; |
| 268 | format[idx] = parts[3]; |
| 269 | hasResult[idx] = (parts[4] == "n") ? "false" : "true"; |
| 270 | indexType[idx] = parts[5]; |
| 271 | flags[idx] = parts[6]; |
| 272 | |
| 273 | # Calculate derived values. |
| 274 | |
| 275 | constName[idx] = toupper(name[idx]); |
| 276 | gsub("[---/]", "_", constName[idx]); # Dash and slash become underscore. |
| 277 | gsub("[+^]", "", constName[idx]); # Plus and caret are removed. |
| 278 | split(name[idx], parts, "/"); |
| 279 | |
| 280 | family[idx] = toupper(parts[1]); |
| 281 | gsub("-", "_", family[idx]); # Dash becomes underscore. |
| 282 | gsub("[+^]", "", family[idx]); # Plus and caret are removed. |
| 283 | |
| 284 | split(format[idx], parts, ""); # Width is the first format char. |
| 285 | width[idx] = parts[1]; |
| 286 | |
| 287 | # This association is used when computing "next" opcodes. |
| 288 | familyFormat[family[idx],format[idx]] = idx; |
| 289 | |
| 290 | # Verify values. |
| 291 | |
| 292 | if (nextFormat[format[idx]] == "") { |
| 293 | printf("unknown format: %s\n", format[idx]); |
| 294 | return 1; |
| 295 | } |
| 296 | |
| 297 | if (indexTypeValues[indexType[idx]] == "") { |
| 298 | printf("unknown index type: %s\n", indexType[idx]); |
| 299 | return 1; |
| 300 | } |
| 301 | |
| 302 | if (flagsToC(flags[idx]) == "") { |
| 303 | printf("bogus flags: %s\n", flags[idx]); |
| 304 | return 1; |
| 305 | } |
| 306 | |
| 307 | return 0; |
| 308 | } |
| 309 | |
| 310 | # Define a format family. |
| 311 | function defineFormat(line, count, parts, i) { |
| 312 | # locals: count, parts, i |
| 313 | count = split(line, parts); |
| 314 | if (count < 1) return -1; |
| 315 | formats[parts[1]] = line; |
| 316 | |
| 317 | parts[count + 1] = "none"; |
| 318 | for (i = 1; i <= count; i++) { |
| 319 | nextFormat[parts[i]] = parts[i + 1]; |
| 320 | } |
| 321 | |
| 322 | return 0; |
| 323 | } |
| 324 | |
| 325 | # Produce the nextOpcode and isFirst arrays. The former indicates, for |
| 326 | # each opcode, which one should be tried next when doing instruction |
| 327 | # fitting. The latter indicates which opcodes are at the head of an |
| 328 | # instruction fitting chain. |
| 329 | function deriveOpcodeChains(i, op) { |
| 330 | # locals: i, op |
| 331 | |
| 332 | for (i = 0; i <= MAX_OPCODE; i++) { |
| 333 | if (isUnused(i)) continue; |
| 334 | isFirst[i] = "true"; |
| 335 | } |
| 336 | |
| 337 | for (i = 0; i <= MAX_OPCODE; i++) { |
| 338 | if (isUnused(i)) continue; |
| 339 | op = findNextOpcode(i); |
| 340 | nextOpcode[i] = op; |
| 341 | if (op != -1) { |
| 342 | isFirst[op] = "false"; |
| 343 | } |
| 344 | } |
| 345 | } |
| 346 | |
| 347 | # Given an opcode by index, find the next opcode in the same family |
| 348 | # (that is, with the same base name) to try when matching instructions |
| 349 | # to opcodes. This simply walks the nextFormat chain looking for a |
| 350 | # match. This returns the index of the matching opcode or -1 if there |
| 351 | # is none. |
| 352 | function findNextOpcode(idx, fam, fmt, result) { |
| 353 | # locals: fam, fmt, result |
| 354 | fam = family[idx]; |
| 355 | fmt = format[idx]; |
| 356 | |
| 357 | # Not every opcode has a version with every possible format, so |
| 358 | # we have to iterate down the chain until we find one or run out of |
| 359 | # formats to try. |
| 360 | for (fmt = nextFormat[format[idx]]; fmt != "none"; fmt = nextFormat[fmt]) { |
| 361 | result = familyFormat[fam,fmt]; |
| 362 | if (result != "") { |
| 363 | return result; |
| 364 | } |
| 365 | } |
| 366 | |
| 367 | return -1; |
| 368 | } |
| 369 | |
| 370 | # Convert a hex value to an int. |
| 371 | function parseHex(hex, result, chars, count, c, i) { |
| 372 | # locals: result, chars, count, c, i |
| 373 | hex = tolower(hex); |
| 374 | count = split(hex, chars, ""); |
| 375 | result = 0; |
| 376 | for (i = 1; i <= count; i++) { |
| 377 | c = index("0123456789abcdef", chars[i]); |
| 378 | if (c == 0) { |
| 379 | printf("bogus hex value: %s\n", hex); |
| 380 | return -1; |
| 381 | } |
| 382 | result = (result * 16) + c - 1; |
| 383 | } |
| 384 | return result; |
| 385 | } |
| 386 | |
| 387 | # Initialize the indexTypes data. |
| 388 | function initIndexTypes() { |
| 389 | indexTypeValues["unknown"] = "kIndexUnknown"; |
| 390 | indexTypeValues["none"] = "kIndexNone"; |
| 391 | indexTypeValues["varies"] = "kIndexVaries"; |
| 392 | indexTypeValues["type-ref"] = "kIndexTypeRef"; |
| 393 | indexTypeValues["string-ref"] = "kIndexStringRef"; |
| 394 | indexTypeValues["method-ref"] = "kIndexMethodRef"; |
| 395 | indexTypeValues["field-ref"] = "kIndexFieldRef"; |
| 396 | indexTypeValues["inline-method"] = "kIndexInlineMethod"; |
| 397 | indexTypeValues["vtable-offset"] = "kIndexVtableOffset"; |
| 398 | indexTypeValues["field-offset"] = "kIndexFieldOffset"; |
| 399 | } |
| 400 | |
| 401 | # Initialize the flags data. |
| 402 | function initFlags() { |
| 403 | flagValues["branch"] = "kInstrCanBranch"; |
| 404 | flagValues["continue"] = "kInstrCanContinue"; |
| 405 | flagValues["switch"] = "kInstrCanSwitch"; |
| 406 | flagValues["throw"] = "kInstrCanThrow"; |
| 407 | flagValues["return"] = "kInstrCanReturn"; |
| 408 | flagValues["invoke"] = "kInstrInvoke"; |
| 409 | flagValues["optimized"] = "0"; # Not represented in C output |
| 410 | flagValues["0"] = "0"; |
| 411 | } |
| 412 | |
| 413 | # Translate the given flags into the equivalent C expression. Returns |
| 414 | # "" on error. |
| 415 | function flagsToC(f, parts, result, i) { |
| 416 | # locals: parts, result, i |
| 417 | count = split(f, parts, /\|/); # Split input at pipe characters. |
| 418 | result = "0"; |
| 419 | |
| 420 | for (i = 1; i <= count; i++) { |
| 421 | f = flagValues[parts[i]]; |
| 422 | if (f == "") { |
| 423 | printf("bogus flag: %s\n", f); |
| 424 | return ""; # Bogus flag name. |
| 425 | } else if (f == "0") { |
| 426 | # Nothing to append for this case. |
| 427 | } else if (result == "0") { |
| 428 | result = f; |
| 429 | } else { |
| 430 | result = result "|" f; |
| 431 | } |
| 432 | } |
| 433 | |
| 434 | return result; |
| 435 | } |
| 436 | |
| 437 | # Given a packed opcode, returns the raw (unpacked) opcode value. |
| 438 | function unpackOpcode(idx) { |
| 439 | # Note: This must be the inverse of the corresponding code in |
| 440 | # libdex/DexOpcodes.h. |
| 441 | if (idx <= 0xff) { |
| 442 | return idx; |
| 443 | } else { |
| 444 | return (idx * 0x100) + 0xff; |
| 445 | } |
| 446 | } |
| 447 | |
| 448 | # Returns true if the given opcode (by index) is an "optimized" opcode. |
| 449 | function isOptimized(idx, parts, f) { |
| 450 | # locals: parts, f |
| 451 | split(flags[idx], parts, /\|/); # Split flags[idx] at pipes. |
| 452 | for (f in parts) { |
| 453 | if (parts[f] == "optimized") return 1; |
| 454 | } |
| 455 | return 0; |
| 456 | } |
| 457 | |
| 458 | # Returns true if there is no definition for the given opcode (by index). |
| 459 | function isUnused(idx) { |
| 460 | return (name[idx] == ""); |
| 461 | } |
| 462 | |
| 463 | # Returns true if there is no definition for the given opcode (by |
| 464 | # index), taken as a single-byte opcode. The odd case for this |
| 465 | # function is 255, which is the first extended (two-byte) opcode. For |
| 466 | # the purposes of this function, it is considered unused. (This is |
| 467 | # meant as a stop-gap measure for code that is not yet prepared to |
| 468 | # deal with extended opcodes.) |
| 469 | function isUnusedByte(idx) { |
| 470 | return (idx == 255) || (name[idx] == ""); |
| 471 | } |
| 472 | |
| 473 | # Returns the constant name of the given single-byte opcode (by index) |
| 474 | # or the string "UNUSED_XX" (where XX is the index in hex) if the |
| 475 | # opcode is unused. See isUnusedByte(), above, for more info. |
| 476 | function constNameOrUnusedByte(idx) { |
| 477 | if (isUnusedByte(idx)) { |
| 478 | return toupper(sprintf("UNUSED_%02x", idx)); |
| 479 | } |
| 480 | return constName[idx]; |
| 481 | } |
| 482 | |
| 483 | # Returns the (human-oriented) name of the given single-byte opcode |
| 484 | # (by index) or the string "unused-xx" (where xx is the index in hex) |
| 485 | # if the opcode is unused. See isUnusedByte(), above, for more info. |
| 486 | function nameOrUnusedByte(idx) { |
| 487 | if (isUnusedByte(idx)) { |
| 488 | return sprintf("unused-%02x", idx); |
| 489 | } |
| 490 | return name[idx]; |
| 491 | } |