Blame - opcode-gen/opcode-gen.awk - platform/dalvik

blob: 20e1a36b391bab9282b8ae937d10a86440b249e2 [file] [log] [blame]

Dan Bornstein	d12de17	2010-12-02 15:21:59 -0800	[diff] [blame^]	1	# Copyright (C) 2007 The Android Open Source Project
				2	#
				3	# Licensed under the Apache License, Version 2.0 (the "License");
				4	# you may not use this file except in compliance with the License.
				5	# You may obtain a copy of the License at
				6	#
				7	# http://www.apache.org/licenses/LICENSE-2.0
				8	#
				9	# Unless required by applicable law or agreed to in writing, software
				10	# distributed under the License is distributed on an "AS IS" BASIS,
				11	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	# See the License for the specific language governing permissions and
				13	# limitations under the License.
				14
				15	# Awk helper script for opcode-gen.
				16
				17	BEGIN {
				18	MAX_OPCODE = 65535;
				19	MAX_LIBDEX_OPCODE = 255; # TODO: Will not be true for long!
				20	initIndexTypes();
				21	initFlags();
				22	if (readBytecodes()) exit 1;
				23	deriveOpcodeChains();
				24	consumeUntil = "";
				25	}
				26
				27	consumeUntil != "" {
				28	if (index($0, consumeUntil) != 0) {
				29	consumeUntil = "";
				30	} else {
				31	next;
				32	}
				33	}
				34
				35	/BEGIN$opcodes$/ {
				36	consumeUntil = "END(opcodes)";
				37	print;
				38
				39	for (i = 0; i <= MAX_OPCODE; i++) {
				40	if (isUnused(i) \|\| isOptimized(i)) continue;
				41	printf(" public static final int %s = 0x%s;\n",
				42	constName[i], hex[i]);
				43	}
				44
				45	next;
				46	}
				47
				48	/BEGIN$first-opcodes$/ {
				49	consumeUntil = "END(first-opcodes)";
				50	print;
				51
				52	for (i = 0; i <= MAX_OPCODE; i++) {
				53	if (isUnused(i) \|\| isOptimized(i)) continue;
				54	if (isFirst[i] == "true") {
				55	printf(" // DalvOps.%s\n", constName[i]);
				56	}
				57	}
				58
				59	next;
				60	}
				61
				62	/BEGIN$dops$/ {
				63	consumeUntil = "END(dops)";
				64	print;
				65
				66	for (i = 0; i <= MAX_OPCODE; i++) {
				67	if (isUnused(i) \|\| isOptimized(i)) continue;
				68
				69	nextOp = nextOpcode[i];
				70	nextOp = (nextOp == -1) ? "NO_NEXT" : constName[nextOp];
				71
				72	printf(" public static final Dop %s =\n" \
				73	" new Dop(DalvOps.%s, DalvOps.%s,\n" \
				74	" DalvOps.%s, Form%s.THE_ONE, %s,\n" \
				75	" \"%s\");\n\n",
				76	constName[i], constName[i], family[i], nextOp, format[i],
				77	hasResult[i], name[i]);
				78	}
				79
				80	next;
				81	}
				82
				83	/BEGIN$dops-init$/ {
				84	consumeUntil = "END(dops-init)";
				85	print;
				86
				87	for (i = 0; i <= MAX_OPCODE; i++) {
				88	if (isUnused(i) \|\| isOptimized(i)) continue;
				89	printf(" set(%s);\n", constName[i]);
				90	}
				91
				92	next;
				93	}
				94
				95	/BEGIN$libcore-opcodes$/ {
				96	consumeUntil = "END(libcore-opcodes)";
				97	print;
				98
				99	for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) {
				100	if (isUnusedByte(i) \|\| isOptimized(i)) continue;
				101	printf(" int OP_%-28s = 0x%02x;\n", constName[i], i);
				102	}
				103
				104	next;
				105	}
				106
				107	/BEGIN$libcore-maximum-value$/ {
				108	consumeUntil = "END(libcore-maximum-value)";
				109	print;
				110
				111	# TODO: Make this smarter.
				112	printf(" MAXIMUM_VALUE = %d;\n", MAX_LIBDEX_OPCODE);
				113
				114	next;
				115	}
				116
				117	/BEGIN$libdex-opcode-enum$/ {
				118	consumeUntil = "END(libdex-opcode-enum)";
				119	print;
				120
				121	for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) {
				122	printf(" OP_%-28s = 0x%02x,\n", constNameOrUnusedByte(i), i);
				123	}
				124
				125	next;
				126	}
				127
				128	/BEGIN$libdex-goto-table$/ {
				129	consumeUntil = "END(libdex-goto-table)";
				130	print;
				131
				132	for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) {
				133	content = sprintf(" H(OP_%s),", constNameOrUnusedByte(i));
				134	printf("%-78s\\\n", content);
				135	}
				136
				137	next;
				138	}
				139
				140	/BEGIN$libdex-opcode-names$/ {
				141	consumeUntil = "END(libdex-opcode-names)";
				142	print;
				143
				144	for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) {
				145	printf(" \"%s\",\n", nameOrUnusedByte(i));
				146	}
				147
				148	next;
				149	}
				150
				151	/BEGIN$libdex-widths$/ {
				152	consumeUntil = "END(libdex-widths)";
				153	print;
				154
				155	col = 1;
				156	for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) {
				157	value = sprintf("%d,", isUnusedByte(i) ? 0 : width[i]);
				158	col = colPrint(value, (i == MAX_LIBDEX_OPCODE), col, 16, 2, " ");
				159	}
				160
				161	next;
				162	}
				163
				164	/BEGIN$libdex-flags$/ {
				165	consumeUntil = "END(libdex-flags)";
				166	print;
				167
				168	for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) {
				169	value = flagsToC(isUnusedByte(i) ? 0 : flags[i]);
				170	printf(" %s,\n", value);
				171	}
				172
				173	next;
				174	}
				175
				176	/BEGIN$libdex-formats$/ {
				177	consumeUntil = "END(libdex-formats)";
				178	print;
				179
				180	col = 1;
				181	for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) {
				182	value = sprintf("kFmt%s,", isUnusedByte(i) ? "00x" : format[i]);
				183	col = colPrint(value, (i == MAX_LIBDEX_OPCODE), col, 7, 9, " ");
				184	}
				185
				186	next;
				187	}
				188
				189	/BEGIN$libdex-index-types$/ {
				190	consumeUntil = "END(libdex-index-types)";
				191	print;
				192
				193	col = 1;
				194	for (i = 0; i <= MAX_LIBDEX_OPCODE; i++) {
				195	value = isUnusedByte(i) ? "unknown" : indexType[i];
				196	value = sprintf("%s,", indexTypeValues[value]);
				197	col = colPrint(value, (i == MAX_LIBDEX_OPCODE), col, 3, 19, " ");
				198	}
				199
				200	next;
				201	}
				202
				203	{ print; }
				204
				205	# Helper to print out an element in a multi-column fashion. It returns
				206	# the (one-based) column number that the next element will be printed
				207	# in.
				208	function colPrint(value, isLast, col, numCols, colWidth, linePrefix) {
				209	isLast = (isLast \|\| (col == numCols));
				210	printf("%s%-*s%s",
				211	(col == 1) ? linePrefix : " ",
				212	isLast ? 1 : colWidth, value,
				213	isLast ? "\n" : "");
				214
				215	return (col % numCols) + 1;
				216	}
				217
				218	# Read the bytecode description file.
				219	function readBytecodes(i, parts, line, cmd, status, count) {
				220	# locals: parts, line, cmd, status, count
				221	for (;;) {
				222	# Read a line.
				223	status = getline line <bytecodeFile;
				224	if (status == 0) break;
				225	if (status < 0) {
				226	print "trouble reading bytecode file";
				227	exit 1;
				228	}
				229
				230	# Clean up the line and extract the command.
				231	gsub(/ */, " ", line);
				232	sub(/ #.$/, "", line);
				233	sub(/ $/, "", line);
				234	sub(/^ /, "", line);
				235	count = split(line, parts);
				236	if (count == 0) continue; # Blank or comment line.
				237	cmd = parts[1];
				238	sub(/^[a-z][a-z]* */, "", line); # Remove the command from line.
				239
				240	if (cmd == "op") {
				241	status = defineOpcode(line);
				242	} else if (cmd == "format") {
				243	status = defineFormat(line);
				244	} else {
				245	status = -1;
				246	}
				247
				248	if (status != 0) {
				249	printf("syntax error on line: %s\n", line);
				250	return 1;
				251	}
				252	}
				253
				254	return 0;
				255	}
				256
				257	# Define an opcode.
				258	function defineOpcode(line, count, parts, idx) {
				259	# locals: count, parts, idx
				260	count = split(line, parts);
				261	if (count != 6) return -1;
				262	idx = parseHex(parts[1]);
				263	if (idx < 0) return -1;
				264
				265	# Extract directly specified values from the line.
				266	hex[idx] = parts[1];
				267	name[idx] = parts[2];
				268	format[idx] = parts[3];
				269	hasResult[idx] = (parts[4] == "n") ? "false" : "true";
				270	indexType[idx] = parts[5];
				271	flags[idx] = parts[6];
				272
				273	# Calculate derived values.
				274
				275	constName[idx] = toupper(name[idx]);
				276	gsub("[---/]", "_", constName[idx]); # Dash and slash become underscore.
				277	gsub("[+^]", "", constName[idx]); # Plus and caret are removed.
				278	split(name[idx], parts, "/");
				279
				280	family[idx] = toupper(parts[1]);
				281	gsub("-", "_", family[idx]); # Dash becomes underscore.
				282	gsub("[+^]", "", family[idx]); # Plus and caret are removed.
				283
				284	split(format[idx], parts, ""); # Width is the first format char.
				285	width[idx] = parts[1];
				286
				287	# This association is used when computing "next" opcodes.
				288	familyFormat[family[idx],format[idx]] = idx;
				289
				290	# Verify values.
				291
				292	if (nextFormat[format[idx]] == "") {
				293	printf("unknown format: %s\n", format[idx]);
				294	return 1;
				295	}
				296
				297	if (indexTypeValues[indexType[idx]] == "") {
				298	printf("unknown index type: %s\n", indexType[idx]);
				299	return 1;
				300	}
				301
				302	if (flagsToC(flags[idx]) == "") {
				303	printf("bogus flags: %s\n", flags[idx]);
				304	return 1;
				305	}
				306
				307	return 0;
				308	}
				309
				310	# Define a format family.
				311	function defineFormat(line, count, parts, i) {
				312	# locals: count, parts, i
				313	count = split(line, parts);
				314	if (count < 1) return -1;
				315	formats[parts[1]] = line;
				316
				317	parts[count + 1] = "none";
				318	for (i = 1; i <= count; i++) {
				319	nextFormat[parts[i]] = parts[i + 1];
				320	}
				321
				322	return 0;
				323	}
				324
				325	# Produce the nextOpcode and isFirst arrays. The former indicates, for
				326	# each opcode, which one should be tried next when doing instruction
				327	# fitting. The latter indicates which opcodes are at the head of an
				328	# instruction fitting chain.
				329	function deriveOpcodeChains(i, op) {
				330	# locals: i, op
				331
				332	for (i = 0; i <= MAX_OPCODE; i++) {
				333	if (isUnused(i)) continue;
				334	isFirst[i] = "true";
				335	}
				336
				337	for (i = 0; i <= MAX_OPCODE; i++) {
				338	if (isUnused(i)) continue;
				339	op = findNextOpcode(i);
				340	nextOpcode[i] = op;
				341	if (op != -1) {
				342	isFirst[op] = "false";
				343	}
				344	}
				345	}
				346
				347	# Given an opcode by index, find the next opcode in the same family
				348	# (that is, with the same base name) to try when matching instructions
				349	# to opcodes. This simply walks the nextFormat chain looking for a
				350	# match. This returns the index of the matching opcode or -1 if there
				351	# is none.
				352	function findNextOpcode(idx, fam, fmt, result) {
				353	# locals: fam, fmt, result
				354	fam = family[idx];
				355	fmt = format[idx];
				356
				357	# Not every opcode has a version with every possible format, so
				358	# we have to iterate down the chain until we find one or run out of
				359	# formats to try.
				360	for (fmt = nextFormat[format[idx]]; fmt != "none"; fmt = nextFormat[fmt]) {
				361	result = familyFormat[fam,fmt];
				362	if (result != "") {
				363	return result;
				364	}
				365	}
				366
				367	return -1;
				368	}
				369
				370	# Convert a hex value to an int.
				371	function parseHex(hex, result, chars, count, c, i) {
				372	# locals: result, chars, count, c, i
				373	hex = tolower(hex);
				374	count = split(hex, chars, "");
				375	result = 0;
				376	for (i = 1; i <= count; i++) {
				377	c = index("0123456789abcdef", chars[i]);
				378	if (c == 0) {
				379	printf("bogus hex value: %s\n", hex);
				380	return -1;
				381	}
				382	result = (result * 16) + c - 1;
				383	}
				384	return result;
				385	}
				386
				387	# Initialize the indexTypes data.
				388	function initIndexTypes() {
				389	indexTypeValues["unknown"] = "kIndexUnknown";
				390	indexTypeValues["none"] = "kIndexNone";
				391	indexTypeValues["varies"] = "kIndexVaries";
				392	indexTypeValues["type-ref"] = "kIndexTypeRef";
				393	indexTypeValues["string-ref"] = "kIndexStringRef";
				394	indexTypeValues["method-ref"] = "kIndexMethodRef";
				395	indexTypeValues["field-ref"] = "kIndexFieldRef";
				396	indexTypeValues["inline-method"] = "kIndexInlineMethod";
				397	indexTypeValues["vtable-offset"] = "kIndexVtableOffset";
				398	indexTypeValues["field-offset"] = "kIndexFieldOffset";
				399	}
				400
				401	# Initialize the flags data.
				402	function initFlags() {
				403	flagValues["branch"] = "kInstrCanBranch";
				404	flagValues["continue"] = "kInstrCanContinue";
				405	flagValues["switch"] = "kInstrCanSwitch";
				406	flagValues["throw"] = "kInstrCanThrow";
				407	flagValues["return"] = "kInstrCanReturn";
				408	flagValues["invoke"] = "kInstrInvoke";
				409	flagValues["optimized"] = "0"; # Not represented in C output
				410	flagValues["0"] = "0";
				411	}
				412
				413	# Translate the given flags into the equivalent C expression. Returns
				414	# "" on error.
				415	function flagsToC(f, parts, result, i) {
				416	# locals: parts, result, i
				417	count = split(f, parts, /\\|/); # Split input at pipe characters.
				418	result = "0";
				419
				420	for (i = 1; i <= count; i++) {
				421	f = flagValues[parts[i]];
				422	if (f == "") {
				423	printf("bogus flag: %s\n", f);
				424	return ""; # Bogus flag name.
				425	} else if (f == "0") {
				426	# Nothing to append for this case.
				427	} else if (result == "0") {
				428	result = f;
				429	} else {
				430	result = result "\|" f;
				431	}
				432	}
				433
				434	return result;
				435	}
				436
				437	# Given a packed opcode, returns the raw (unpacked) opcode value.
				438	function unpackOpcode(idx) {
				439	# Note: This must be the inverse of the corresponding code in
				440	# libdex/DexOpcodes.h.
				441	if (idx <= 0xff) {
				442	return idx;
				443	} else {
				444	return (idx * 0x100) + 0xff;
				445	}
				446	}
				447
				448	# Returns true if the given opcode (by index) is an "optimized" opcode.
				449	function isOptimized(idx, parts, f) {
				450	# locals: parts, f
				451	split(flags[idx], parts, /\\|/); # Split flags[idx] at pipes.
				452	for (f in parts) {
				453	if (parts[f] == "optimized") return 1;
				454	}
				455	return 0;
				456	}
				457
				458	# Returns true if there is no definition for the given opcode (by index).
				459	function isUnused(idx) {
				460	return (name[idx] == "");
				461	}
				462
				463	# Returns true if there is no definition for the given opcode (by
				464	# index), taken as a single-byte opcode. The odd case for this
				465	# function is 255, which is the first extended (two-byte) opcode. For
				466	# the purposes of this function, it is considered unused. (This is
				467	# meant as a stop-gap measure for code that is not yet prepared to
				468	# deal with extended opcodes.)
				469	function isUnusedByte(idx) {
				470	return (idx == 255) \|\| (name[idx] == "");
				471	}
				472
				473	# Returns the constant name of the given single-byte opcode (by index)
				474	# or the string "UNUSED_XX" (where XX is the index in hex) if the
				475	# opcode is unused. See isUnusedByte(), above, for more info.
				476	function constNameOrUnusedByte(idx) {
				477	if (isUnusedByte(idx)) {
				478	return toupper(sprintf("UNUSED_%02x", idx));
				479	}
				480	return constName[idx];
				481	}
				482
				483	# Returns the (human-oriented) name of the given single-byte opcode
				484	# (by index) or the string "unused-xx" (where xx is the index in hex)
				485	# if the opcode is unused. See isUnusedByte(), above, for more info.
				486	function nameOrUnusedByte(idx) {
				487	if (isUnusedByte(idx)) {
				488	return sprintf("unused-%02x", idx);
				489	}
				490	return name[idx];
				491	}