blob: 50d90dd38f42ca3dc7194937e97d16e1c016f428 [file] [log] [blame]
Jack Palevichae54f1f2009-05-08 14:54:15 -07001/*
Jack Palevich21a15a22009-05-11 14:49:29 -07002 Obfuscated Tiny C Compiler
Jack Palevich88311482009-05-08 13:57:37 -07003
Jack Palevich21a15a22009-05-11 14:49:29 -07004 Copyright (C) 2001-2003 Fabrice Bellard
Jack Palevichae54f1f2009-05-08 14:54:15 -07005
Jack Palevich21a15a22009-05-11 14:49:29 -07006 This software is provided 'as-is', without any express or implied
7 warranty. In no event will the authors be held liable for any damages
8 arising from the use of this software.
Jack Paleviche27bf3e2009-05-10 14:09:03 -07009
Jack Palevich21a15a22009-05-11 14:49:29 -070010 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
Jack Paleviche27bf3e2009-05-10 14:09:03 -070013
Jack Palevich21a15a22009-05-11 14:49:29 -070014 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product and its documentation
17 *is* required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
21 */
Jack Paleviche27bf3e2009-05-10 14:09:03 -070022
Jack Palevich77ae76e2009-05-10 19:59:24 -070023#include <ctype.h>
24#include <dlfcn.h>
Jack Paleviche27bf3e2009-05-10 14:09:03 -070025#include <stdarg.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070026#include <stdio.h>
Jack Palevichf6b5a532009-05-10 19:16:42 -070027#include <stdlib.h>
28#include <string.h>
Jack Palevichae54f1f2009-05-08 14:54:15 -070029
Jack Palevich546b2242009-05-13 15:10:04 -070030#if defined(__arm__)
31#include <unistd.h>
32#endif
33
Jack Palevicha6535612009-05-13 16:24:17 -070034#include "disassem.h"
35
Jack Palevichbbf8ab52009-05-11 11:54:30 -070036namespace acc {
37
Jack Palevich77ae76e2009-05-10 19:59:24 -070038class compiler {
Jack Palevich21a15a22009-05-11 14:49:29 -070039 class CodeBuf {
40 char* ind;
41 char* pProgramBase;
Jack Palevichf0cbc922009-05-08 16:35:13 -070042
Jack Palevich21a15a22009-05-11 14:49:29 -070043 void release() {
44 if (pProgramBase != 0) {
45 free(pProgramBase);
46 pProgramBase = 0;
Jack Palevichae54f1f2009-05-08 14:54:15 -070047 }
Jack Palevich21a15a22009-05-11 14:49:29 -070048 }
49
50 public:
51 CodeBuf() {
52 pProgramBase = 0;
53 ind = 0;
54 }
55
56 ~CodeBuf() {
57 release();
58 }
59
60 void init(int size) {
61 release();
62 pProgramBase = (char*) calloc(1, size);
63 ind = pProgramBase;
64 }
65
66 void o(int n) {
67 /* cannot use unsigned, so we must do a hack */
68 while (n && n != -1) {
69 *ind++ = n;
70 n = n >> 8;
71 }
72 }
73
Jack Palevich546b2242009-05-13 15:10:04 -070074 int o4(int n) {
75 int result = (int) ind;
76 * (int*) ind = n;
77 ind += 4;
78 return result;
79 }
80
Jack Palevich21a15a22009-05-11 14:49:29 -070081 /*
82 * Output a byte. Handles all values, 0..ff.
83 */
84 void ob(int n) {
85 *ind++ = n;
86 }
87
88 /* output a symbol and patch all calls to it */
89 void gsym(int t) {
90 int n;
91 while (t) {
92 n = *(int *) t; /* next value */
93 *(int *) t = ((int) ind) - t - 4;
94 t = n;
95 }
96 }
97
98 /* psym is used to put an instruction with a data field which is a
99 reference to a symbol. It is in fact the same as oad ! */
100 int psym(int n, int t) {
101 return oad(n, t);
102 }
103
104 /* instruction + address */
105 int oad(int n, int t) {
106 o(n);
107 *(int *) ind = t;
108 t = (int) ind;
109 ind = ind + 4;
110 return t;
111 }
112
113 inline void* getBase() {
114 return (void*) pProgramBase;
115 }
116
117 int getSize() {
118 return ind - pProgramBase;
119 }
120
121 int getPC() {
122 return (int) ind;
123 }
124 };
125
126 class CodeGenerator {
127 public:
128 CodeGenerator() {}
129 virtual ~CodeGenerator() {}
130
Jack Palevich22305132009-05-13 10:58:45 -0700131 virtual void init(CodeBuf* pCodeBuf) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700132 this->pCodeBuf = pCodeBuf;
133 }
134
Jack Palevich22305132009-05-13 10:58:45 -0700135 /* returns address to patch with local variable size
136 */
Jack Palevich546b2242009-05-13 15:10:04 -0700137 virtual int functionEntry(int argCount) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700138
Jack Palevich546b2242009-05-13 15:10:04 -0700139 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700140
141 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700142 virtual void li(int t) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700143
144 virtual int gjmp(int t) = 0;
145
146 /* l = 0: je, l == 1: jne */
147 virtual int gtst(bool l, int t) = 0;
148
149 virtual void gcmp(int op) = 0;
150
Jack Palevich546b2242009-05-13 15:10:04 -0700151 virtual void genOp(int op) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700152
153 virtual void clearECX() = 0;
154
155 virtual void pushEAX() = 0;
156
157 virtual void popECX() = 0;
158
159 virtual void storeEAXToAddressECX(bool isInt) = 0;
160
161 virtual void loadEAXIndirect(bool isInt) = 0;
162
163 virtual void leaEAX(int ea) = 0;
164
165 virtual void storeEAX(int ea) = 0;
166
167 virtual void loadEAX(int ea) = 0;
168
169 virtual void postIncrementOrDecrement(int n, int op) = 0;
170
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700171 virtual int beginFunctionCallArguments() = 0;
172
173 virtual void endFunctionCallArguments(int a, int l) = 0;
Jack Palevich22305132009-05-13 10:58:45 -0700174
175 virtual void storeEAToArg(int l) = 0;
176
177 virtual int callForward(int symbol) = 0;
178
179 virtual void callRelative(int t) = 0;
180
181 virtual void callIndirect(int l) = 0;
182
183 virtual void adjustStackAfterCall(int l) = 0;
184
Jack Palevicha6535612009-05-13 16:24:17 -0700185 virtual int disassemble(FILE* out) = 0;
186
Jack Palevich21a15a22009-05-11 14:49:29 -0700187 /* output a symbol and patch all calls to it */
Jack Palevich22305132009-05-13 10:58:45 -0700188 virtual void gsym(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700189 pCodeBuf->gsym(t);
190 }
191
Jack Palevich546b2242009-05-13 15:10:04 -0700192 virtual int finishCompile() {
193#if defined(__arm__)
Jack Palevicha6535612009-05-13 16:24:17 -0700194 const long base = long(pCodeBuf->getBase());
195 const long curr = base + long(pCodeBuf->getSize());
196 int err = cacheflush(base, curr, 0);
197 return err;
Jack Palevich546b2242009-05-13 15:10:04 -0700198#else
Jack Palevicha6535612009-05-13 16:24:17 -0700199 return 0;
Jack Palevich546b2242009-05-13 15:10:04 -0700200#endif
201 }
202
Jack Palevicha6535612009-05-13 16:24:17 -0700203 /**
204 * Adjust relative branches by this amount.
205 */
206 virtual int jumpOffset() = 0;
207
Jack Palevich21a15a22009-05-11 14:49:29 -0700208 protected:
209 void o(int n) {
210 pCodeBuf->o(n);
211 }
212
213 /*
214 * Output a byte. Handles all values, 0..ff.
215 */
216 void ob(int n) {
217 pCodeBuf->ob(n);
218 }
219
220 /* psym is used to put an instruction with a data field which is a
221 reference to a symbol. It is in fact the same as oad ! */
222 int psym(int n, int t) {
223 return oad(n, t);
224 }
225
226 /* instruction + address */
227 int oad(int n, int t) {
228 return pCodeBuf->oad(n,t);
229 }
230
Jack Palevicha6535612009-05-13 16:24:17 -0700231 int getBase() {
232 return (int) pCodeBuf->getBase();
233 }
234
Jack Palevich21a15a22009-05-11 14:49:29 -0700235 int getPC() {
236 return pCodeBuf->getPC();
237 }
238
Jack Palevich546b2242009-05-13 15:10:04 -0700239 int o4(int data) {
240 return pCodeBuf->o4(data);
241 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700242 private:
243 CodeBuf* pCodeBuf;
244 };
245
Jack Palevich22305132009-05-13 10:58:45 -0700246 class ARMCodeGenerator : public CodeGenerator {
247 public:
248 ARMCodeGenerator() {}
249 virtual ~ARMCodeGenerator() {}
250
251 /* returns address to patch with local variable size
252 */
Jack Palevich546b2242009-05-13 15:10:04 -0700253 virtual int functionEntry(int argCount) {
254 fprintf(stderr, "functionEntry(%d);\n", argCount);
Jack Palevich69796b62009-05-14 15:42:26 -0700255 // sp -> arg4 arg5 ...
256 // Push our register-based arguments back on the stack
257 if (argCount > 0) {
258 int regArgCount = argCount <= 4 ? argCount : 4;
259 o4(0xE92D0000 | ((1 << argCount) - 1)); // stmfd sp!, {}
260 }
261 // sp -> arg0 arg1 ...
262 o4(0xE92D4800); // stmfd sp!, {fp, lr}
263 // sp, fp -> oldfp, retadr, arg0 arg1 ....
264 o4(0xE1A0B00D); // mov fp, sp
265 return o4(0xE24DD000); // sub sp, sp, # <local variables>
Jack Palevich22305132009-05-13 10:58:45 -0700266 }
267
Jack Palevich546b2242009-05-13 15:10:04 -0700268 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
269 fprintf(stderr, "functionExit(%d, %d, %d);\n", argCount, localVariableAddress, localVariableSize);
Jack Palevich69796b62009-05-14 15:42:26 -0700270 // Patch local variable allocation code:
271 if (localVariableSize < 0 || localVariableSize > 255) {
Jack Palevich8de461d2009-05-14 17:21:45 -0700272 error("localVariables out of range: %d", localVariableSize);
Jack Palevich546b2242009-05-13 15:10:04 -0700273 }
Jack Palevich69796b62009-05-14 15:42:26 -0700274 *(char*) (localVariableAddress) = localVariableSize;
275
276 // sp -> locals .... fp -> oldfp, retadr, arg0, arg1, ...
277 o4(0xE1A0E00B); // mov lr, fp
278 o4(0xE59BB000); // ldr fp, [fp]
279 o4(0xE28ED004); // add sp, lr, #4
280 // sp -> retadr, arg0, ...
281 o4(0xE8BD4000); // ldmfd sp!, {lr}
282 // sp -> arg0 ....
283 if (argCount > 0) {
284 // We store the PC into the lr so we can adjust the sp before
Jack Palevich8de461d2009-05-14 17:21:45 -0700285 // returning. We need to pull off the registers we pushed
Jack Palevich69796b62009-05-14 15:42:26 -0700286 // earlier. We don't need to actually store them anywhere,
287 // just adjust the stack.
288 int regArgCount = argCount <= 4 ? argCount : 4;
289 o4(0xE28DD000 | (regArgCount << 2)); // add sp, sp, #argCount << 2
290 }
291 o4(0xE12FFF1E); // bx lr
Jack Palevich22305132009-05-13 10:58:45 -0700292 }
293
294 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700295 virtual void li(int t) {
Jack Palevich22305132009-05-13 10:58:45 -0700296 fprintf(stderr, "li(%d);\n", t);
Jack Palevicha6535612009-05-13 16:24:17 -0700297 if (t >= 0 && t < 255) {
Jack Palevich69796b62009-05-14 15:42:26 -0700298 o4(0xE3A00000 + t); // mov r0, #0
Jack Palevicha6535612009-05-13 16:24:17 -0700299 } else if (t >= -256 && t < 0) {
300 // mvn means move constant ^ ~0
Jack Palevich69796b62009-05-14 15:42:26 -0700301 o4(0xE3E00001 - t); // mvn r0, #0
Jack Palevicha6535612009-05-13 16:24:17 -0700302 } else {
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700303 o4(0xE51F0000); // ldr r0, .L3
304 o4(0xEA000000); // b .L99
305 o4(t); // .L3: .word 0
306 // .L99:
Jack Palevicha6535612009-05-13 16:24:17 -0700307 }
Jack Palevich22305132009-05-13 10:58:45 -0700308 }
309
310 virtual int gjmp(int t) {
311 fprintf(stderr, "gjmp(%d);\n", t);
Jack Palevich8de461d2009-05-14 17:21:45 -0700312 return o4(0xEA000000 | encodeAddress(t)); // b .L33
Jack Palevich22305132009-05-13 10:58:45 -0700313 }
314
315 /* l = 0: je, l == 1: jne */
316 virtual int gtst(bool l, int t) {
317 fprintf(stderr, "gtst(%d, %d);\n", l, t);
Jack Palevich8de461d2009-05-14 17:21:45 -0700318 o4(0xE3500000); // cmp r0,#0
319 int branch = l ? 0x1A000000 : 0x0A000000; // bne : beq
320 return o4(branch | encodeAddress(t));
Jack Palevich22305132009-05-13 10:58:45 -0700321 }
322
323 virtual void gcmp(int op) {
324 fprintf(stderr, "gcmp(%d);\n", op);
Jack Palevich8de461d2009-05-14 17:21:45 -0700325 o4(0xE1510000); // cmp r1, r1
326 switch(op) {
327 case OP_EQUALS:
328 o4(0x03A00001); // moveq r0,#1
329 o4(0x13A00000); // movne r0,#0
330 break;
331 case OP_NOT_EQUALS:
332 o4(0x03A00000); // moveq r0,#0
333 o4(0x13A00001); // movne r0,#1
334 break;
335 case OP_LESS_EQUAL:
336 o4(0xD3A00001); // movle r0,#1
337 o4(0xC3A00000); // movgt r0,#0
338 break;
339 case OP_GREATER:
340 o4(0xD3A00000); // movle r0,#0
341 o4(0xC3A00001); // movgt r0,#1
342 break;
343 case OP_GREATER_EQUAL:
344 o4(0xA3A00001); // movge r0,#1
345 o4(0xB3A00000); // movlt r0,#0
346 break;
347 case OP_LESS:
348 o4(0xA3A00000); // movge r0,#0
349 o4(0xB3A00001); // movlt r0,#1
350 break;
351 default:
352 error("Unknown comparison op %d", op);
353 break;
354 }
Jack Palevich22305132009-05-13 10:58:45 -0700355 }
356
Jack Palevich546b2242009-05-13 15:10:04 -0700357 virtual void genOp(int op) {
Jack Palevich22305132009-05-13 10:58:45 -0700358 fprintf(stderr, "genOp(%d);\n", op);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700359 switch(op) {
360 case OP_MUL:
361 o4(0x0E0000091); // mul r0,r1,r0
362 break;
363 case OP_PLUS:
364 o4(0xE0810000); // add r0,r1,r0
365 break;
366 case OP_MINUS:
367 o4(0xE0410000); // sub r0,r1,r0
368 break;
369 case OP_SHIFT_LEFT:
370 o4(0xE1A00011); // lsl r0,r1,r0
371 break;
372 case OP_SHIFT_RIGHT:
373 o4(0xE1A00051); // asr r0,r1,r0
374 break;
375 case OP_BIT_AND:
376 o4(0xE0010000); // and r0,r1,r0
377 break;
378 case OP_BIT_XOR:
379 o4(0xE0210000); // eor r0,r1,r0
380 break;
381 case OP_BIT_OR:
382 o4(0xE1810000); // orr r0,r1,r0
383 break;
384 case OP_BIT_NOT:
385 o4(0xE1E00000); // mvn r0, r0
386 break;
387 default:
Jack Palevich69796b62009-05-14 15:42:26 -0700388 error("Unimplemented op %d\n", op);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700389 break;
390 }
Jack Palevich22305132009-05-13 10:58:45 -0700391#if 0
392 o(decodeOp(op));
393 if (op == OP_MOD)
394 o(0x92); /* xchg %edx, %eax */
395#endif
396 }
397
398 virtual void clearECX() {
399 fprintf(stderr, "clearECX();\n");
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700400 o4(0xE3A01000); // mov r1, #0
Jack Palevich22305132009-05-13 10:58:45 -0700401 }
402
403 virtual void pushEAX() {
404 fprintf(stderr, "pushEAX();\n");
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700405 o4(0xE92D0001); // stmfd sp!,{r0}
Jack Palevich22305132009-05-13 10:58:45 -0700406 }
407
408 virtual void popECX() {
409 fprintf(stderr, "popECX();\n");
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700410 o4(0xE8BD0002); // ldmfd sp!,{r1}
Jack Palevich22305132009-05-13 10:58:45 -0700411 }
412
413 virtual void storeEAXToAddressECX(bool isInt) {
414 fprintf(stderr, "storeEAXToAddressECX(%d);\n", isInt);
Jack Palevichbd894902009-05-14 19:35:31 -0700415 if (isInt) {
416 o4(0xE5810000); // str r0, [r1]
417 } else {
418 o4(0xE5C10000); // strb r0, [r1]
419 }
Jack Palevich22305132009-05-13 10:58:45 -0700420 }
421
422 virtual void loadEAXIndirect(bool isInt) {
423 fprintf(stderr, "loadEAXIndirect(%d);\n", isInt);
424 if (isInt)
Jack Palevich69796b62009-05-14 15:42:26 -0700425 o4(0xE5900000); // ldr r0, [r0]
Jack Palevich22305132009-05-13 10:58:45 -0700426 else
Jack Palevich69796b62009-05-14 15:42:26 -0700427 o4(0xE5D00000); // ldrb r0, [r0]
Jack Palevich22305132009-05-13 10:58:45 -0700428 }
429
430 virtual void leaEAX(int ea) {
Jack Palevichbd894902009-05-14 19:35:31 -0700431 fprintf(stderr, "leaEAX(%d);\n", ea);
432 if (ea < -1023 || ea > 1023 || ((ea & 3) != 0)) {
Jack Palevich69796b62009-05-14 15:42:26 -0700433 error("Offset out of range: %08x", ea);
434 }
Jack Palevichbd894902009-05-14 19:35:31 -0700435 if (ea < 0) {
436 o4(0xE24B0F00 | (0xff & ((-ea) >> 2))); // sub r0, fp, #ea
437 } else {
438 o4(0xE28B0F00 | (0xff & (ea >> 2))); // add r0, fp, #ea
439 }
440
Jack Palevich22305132009-05-13 10:58:45 -0700441 }
442
443 virtual void storeEAX(int ea) {
444 fprintf(stderr, "storeEAX(%d);\n", ea);
Jack Palevichbd894902009-05-14 19:35:31 -0700445 if (ea < -4095 || ea > 4095) {
Jack Palevich69796b62009-05-14 15:42:26 -0700446 error("Offset out of range: %08x", ea);
447 }
Jack Palevichbd894902009-05-14 19:35:31 -0700448 if (ea < 0) {
449 o4(0xE50B0000 | (0xfff & (-ea))); // str r0, [fp,#-ea]
Jack Palevich69796b62009-05-14 15:42:26 -0700450 } else {
Jack Palevichbd894902009-05-14 19:35:31 -0700451 o4(0xE58B0000 | (0xfff & ea)); // str r0, [fp,#ea]
Jack Palevich69796b62009-05-14 15:42:26 -0700452 }
Jack Palevich22305132009-05-13 10:58:45 -0700453 }
454
455 virtual void loadEAX(int ea) {
456 fprintf(stderr, "loadEAX(%d);\n", ea);
Jack Palevichbd894902009-05-14 19:35:31 -0700457 if (ea < -4095 || ea > 4095) {
Jack Palevich69796b62009-05-14 15:42:26 -0700458 error("Offset out of range: %08x", ea);
459 }
Jack Palevichbd894902009-05-14 19:35:31 -0700460 if (ea < 0) {
461 o4(0xE51B0000 | (0xfff & (-ea))); // ldr r0, [fp,#-ea]
Jack Palevich69796b62009-05-14 15:42:26 -0700462 } else {
Jack Palevichbd894902009-05-14 19:35:31 -0700463 o4(0xE59B0000 | (0xfff & ea)); // ldr r0, [fp,#ea]
Jack Palevich69796b62009-05-14 15:42:26 -0700464 }
Jack Palevich22305132009-05-13 10:58:45 -0700465 }
466
Jack Palevichbd894902009-05-14 19:35:31 -0700467 virtual void postIncrementOrDecrement(int ea, int op) {
468 fprintf(stderr, "postIncrementOrDecrement(%d, %d);\n", ea, op);
469 /* R0 has the original value.
Jack Palevich22305132009-05-13 10:58:45 -0700470 */
Jack Palevichbd894902009-05-14 19:35:31 -0700471 switch (op) {
472 case OP_INCREMENT:
473 o4(0xE2801001); // add r1, r0, #1
474 break;
475 case OP_DECREMENT:
476 o4(0xE2401001); // sub r1, r0, #1
477 break;
478 default:
479 error("unknown opcode: %d", op);
480 }
481 if (ea < -4095 || ea > 4095) {
482 error("Offset out of range: %08x", ea);
483 }
484 if (ea < 0) {
485 o4(0xE50B1000 | (0xfff & (-ea))); // str r1, [fp,#-ea]
486 } else {
487 o4(0xE58B1000 | (0xfff & ea)); // str r1, [fp,#ea]
488 }
Jack Palevich22305132009-05-13 10:58:45 -0700489 }
490
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700491 virtual int beginFunctionCallArguments() {
492 fprintf(stderr, "beginFunctionCallArguments();\n");
493 return o4(0xE24DDF00); // Placeholder
494 }
495
496 virtual void endFunctionCallArguments(int a, int l) {
497 fprintf(stderr, "endFunctionCallArguments(0x%08x, %d);\n", a, l);
498 if (l < 0 || l > 0x3FC) {
499 error("L out of range for stack adjustment: 0x%08x", l);
500 }
501 * (int*) a = 0xE24DDF00 | (l >> 2); // sub sp, sp, #0 << 2
502 int argCount = l >> 2;
503 if (argCount > 0) {
504 int regArgCount = argCount > 4 ? 4 : argCount;
505 o4(0xE8BD0000 | ((1 << regArgCount) - 1)); // ldmfd sp!,{}
506 }
Jack Palevich22305132009-05-13 10:58:45 -0700507 }
508
509 virtual void storeEAToArg(int l) {
510 fprintf(stderr, "storeEAToArg(%d);\n", l);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700511 if (l < 0 || l > 4096-4) {
512 error("l out of range for stack offset: 0x%08x", l);
513 }
514 o4(0xE58D0000 + l); // str r0, [sp, #4]
Jack Palevich22305132009-05-13 10:58:45 -0700515 }
516
517 virtual int callForward(int symbol) {
518 fprintf(stderr, "callForward(%d);\n", symbol);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700519 // Forward calls are always short (local)
520 return o4(0xEB000000 | encodeAddress(symbol));
Jack Palevich22305132009-05-13 10:58:45 -0700521 }
522
523 virtual void callRelative(int t) {
524 fprintf(stderr, "callRelative(%d);\n", t);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700525 int abs = t + getPC() + jumpOffset();
Jack Palevichbd894902009-05-14 19:35:31 -0700526 fprintf(stderr, "abs=%d (0x%08x)\n", abs, abs);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700527 if (t >= - (1 << 25) && t < (1 << 25)) {
528 o4(0xEB000000 | encodeAddress(t));
529 } else {
530 // Long call.
531 o4(0xE59FC000); // ldr r12, .L1
532 o4(0xEA000000); // b .L99
Jack Palevichbd894902009-05-14 19:35:31 -0700533 o4(t - 12); // .L1: .word 0
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700534 o4(0xE08CC00F); // .L99: add r12,pc
535 o4(0xE12FFF3C); // blx r12
536 }
Jack Palevich22305132009-05-13 10:58:45 -0700537 }
538
539 virtual void callIndirect(int l) {
540 fprintf(stderr, "callIndirect(%d);\n", l);
541 oad(0x2494ff, l); /* call *xxx(%esp) */
542 }
543
544 virtual void adjustStackAfterCall(int l) {
545 fprintf(stderr, "adjustStackAfterCall(%d);\n", l);
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700546 if (l < 0 || l > 0x3FC) {
547 error("L out of range for stack adjustment: 0x%08x", l);
548 }
549 int argCount = l >> 2;
550 if (argCount > 4) {
551 int remainingArgs = argCount - 4;
552 o4(0xE28DDF00 | remainingArgs); // add sp, sp, #0x3fc
553 }
554
Jack Palevich22305132009-05-13 10:58:45 -0700555 }
556
Jack Palevicha6535612009-05-13 16:24:17 -0700557 virtual int jumpOffset() {
Jack Palevichbd894902009-05-14 19:35:31 -0700558 return 8;
Jack Palevicha6535612009-05-13 16:24:17 -0700559 }
560
561 /* output a symbol and patch all calls to it */
562 virtual void gsym(int t) {
563 fprintf(stderr, "gsym(0x%x)\n", t);
564 int n;
565 int base = getBase();
566 int pc = getPC();
567 fprintf(stderr, "pc = 0x%x\n", pc);
568 while (t) {
569 int data = * (int*) t;
570 int decodedOffset = ((BRANCH_REL_ADDRESS_MASK & data) << 2);
571 if (decodedOffset == 0) {
572 n = 0;
573 } else {
574 n = base + decodedOffset; /* next value */
575 }
576 *(int *) t = (data & ~BRANCH_REL_ADDRESS_MASK)
577 | encodeRelAddress(pc - t - 8);
578 t = n;
579 }
580 }
581
582 virtual int disassemble(FILE* out) {
583 disasmOut = out;
584 disasm_interface_t di;
585 di.di_readword = disassemble_readword;
586 di.di_printaddr = disassemble_printaddr;
587 di.di_printf = disassemble_printf;
588
589 int base = getBase();
590 int pc = getPC();
591 for(int i = base; i < pc; i += 4) {
592 fprintf(out, "%08x: %08x ", i, *(int*) i);
593 ::disasm(&di, i, 0);
594 }
595 return 0;
596 }
Jack Palevich22305132009-05-13 10:58:45 -0700597 private:
Jack Palevicha6535612009-05-13 16:24:17 -0700598 static FILE* disasmOut;
599
600 static u_int
601 disassemble_readword(u_int address)
602 {
603 return(*((u_int *)address));
604 }
605
606 static void
607 disassemble_printaddr(u_int address)
608 {
609 fprintf(disasmOut, "0x%08x", address);
610 }
611
612 static void
613 disassemble_printf(const char *fmt, ...) {
614 va_list ap;
615 va_start(ap, fmt);
616 vfprintf(disasmOut, fmt, ap);
617 va_end(ap);
618 }
619
620 static const int BRANCH_REL_ADDRESS_MASK = 0x00ffffff;
621
622 /** Encode a relative address that might also be
623 * a label.
624 */
625 int encodeAddress(int value) {
626 int base = getBase();
627 if (value >= base && value <= getPC() ) {
628 // This is a label, encode it relative to the base.
629 value = value - base;
630 }
631 return encodeRelAddress(value);
632 }
633
634 int encodeRelAddress(int value) {
635 return BRANCH_REL_ADDRESS_MASK & (value >> 2);
636 }
Jack Palevich22305132009-05-13 10:58:45 -0700637
Jack Palevich546b2242009-05-13 15:10:04 -0700638 void error(const char* fmt,...) {
639 va_list ap;
640 va_start(ap, fmt);
641 vfprintf(stderr, fmt, ap);
642 va_end(ap);
643 exit(12);
644 }
Jack Palevich22305132009-05-13 10:58:45 -0700645 };
646
Jack Palevich21a15a22009-05-11 14:49:29 -0700647 class X86CodeGenerator : public CodeGenerator {
648 public:
649 X86CodeGenerator() {}
650 virtual ~X86CodeGenerator() {}
651
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700652 /* returns address to patch with local variable size
653 */
Jack Palevich546b2242009-05-13 15:10:04 -0700654 virtual int functionEntry(int argCount) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700655 o(0xe58955); /* push %ebp, mov %esp, %ebp */
656 return oad(0xec81, 0); /* sub $xxx, %esp */
657 }
658
Jack Palevich546b2242009-05-13 15:10:04 -0700659 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700660 o(0xc3c9); /* leave, ret */
Jack Palevich546b2242009-05-13 15:10:04 -0700661 *(int *) localVariableAddress = localVariableSize; /* save local variables */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700662 }
663
Jack Palevich21a15a22009-05-11 14:49:29 -0700664 /* load immediate value */
Jack Palevich546b2242009-05-13 15:10:04 -0700665 virtual void li(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700666 oad(0xb8, t); /* mov $xx, %eax */
667 }
668
Jack Palevich22305132009-05-13 10:58:45 -0700669 virtual int gjmp(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700670 return psym(0xe9, t);
671 }
672
673 /* l = 0: je, l == 1: jne */
Jack Palevich22305132009-05-13 10:58:45 -0700674 virtual int gtst(bool l, int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700675 o(0x0fc085); /* test %eax, %eax, je/jne xxx */
676 return psym(0x84 + l, t);
677 }
678
Jack Palevich22305132009-05-13 10:58:45 -0700679 virtual void gcmp(int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700680 int t = decodeOp(op);
Jack Palevich21a15a22009-05-11 14:49:29 -0700681 o(0xc139); /* cmp %eax,%ecx */
682 li(0);
683 o(0x0f); /* setxx %al */
684 o(t + 0x90);
685 o(0xc0);
686 }
687
Jack Palevich546b2242009-05-13 15:10:04 -0700688 virtual void genOp(int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700689 o(decodeOp(op));
690 if (op == OP_MOD)
691 o(0x92); /* xchg %edx, %eax */
692 }
693
Jack Palevich22305132009-05-13 10:58:45 -0700694 virtual void clearECX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700695 oad(0xb9, 0); /* movl $0, %ecx */
696 }
697
Jack Palevich22305132009-05-13 10:58:45 -0700698 virtual void pushEAX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700699 o(0x50); /* push %eax */
700 }
701
Jack Palevich22305132009-05-13 10:58:45 -0700702 virtual void popECX() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700703 o(0x59); /* pop %ecx */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700704 }
705
Jack Palevich22305132009-05-13 10:58:45 -0700706 virtual void storeEAXToAddressECX(bool isInt) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700707 o(0x0188 + isInt); /* movl %eax/%al, (%ecx) */
708 }
709
Jack Palevich22305132009-05-13 10:58:45 -0700710 virtual void loadEAXIndirect(bool isInt) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700711 if (isInt)
712 o(0x8b); /* mov (%eax), %eax */
713 else
714 o(0xbe0f); /* movsbl (%eax), %eax */
715 ob(0); /* add zero in code */
716 }
717
Jack Palevich22305132009-05-13 10:58:45 -0700718 virtual void leaEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700719 gmov(10, ea); /* leal EA, %eax */
720 }
721
Jack Palevich22305132009-05-13 10:58:45 -0700722 virtual void storeEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700723 gmov(6, ea); /* mov %eax, EA */
724 }
725
Jack Palevich22305132009-05-13 10:58:45 -0700726 virtual void loadEAX(int ea) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700727 gmov(8, ea); /* mov EA, %eax */
728 }
729
Jack Palevich22305132009-05-13 10:58:45 -0700730 virtual void postIncrementOrDecrement(int n, int op) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700731 /* Implement post-increment or post decrement.
Jack Palevich21a15a22009-05-11 14:49:29 -0700732 */
733 gmov(0, n); /* 83 ADD */
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700734 o(decodeOp(op));
Jack Palevich21a15a22009-05-11 14:49:29 -0700735 }
736
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700737 virtual int beginFunctionCallArguments() {
Jack Palevich21a15a22009-05-11 14:49:29 -0700738 return oad(0xec81, 0); /* sub $xxx, %esp */
739 }
740
Jack Palevichcb1c9ef2009-05-14 11:38:49 -0700741 virtual void endFunctionCallArguments(int a, int l) {
742 * (int*) a = l;
743 }
744
Jack Palevich22305132009-05-13 10:58:45 -0700745 virtual void storeEAToArg(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700746 oad(0x248489, l); /* movl %eax, xxx(%esp) */
747 }
748
Jack Palevich22305132009-05-13 10:58:45 -0700749 virtual int callForward(int symbol) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700750 return psym(0xe8, symbol); /* call xxx */
751 }
752
Jack Palevich22305132009-05-13 10:58:45 -0700753 virtual void callRelative(int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700754 psym(0xe8, t); /* call xxx */
755 }
756
Jack Palevich22305132009-05-13 10:58:45 -0700757 virtual void callIndirect(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700758 oad(0x2494ff, l); /* call *xxx(%esp) */
759 }
760
Jack Palevich22305132009-05-13 10:58:45 -0700761 virtual void adjustStackAfterCall(int l) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700762 oad(0xc481, l); /* add $xxx, %esp */
763 }
764
Jack Palevicha6535612009-05-13 16:24:17 -0700765 virtual int jumpOffset() {
766 return 5;
767 }
768
769 virtual int disassemble(FILE* out) {
770 return 1;
771 }
772
Jack Palevich21a15a22009-05-11 14:49:29 -0700773 private:
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700774 static const int operatorHelper[];
775
776 int decodeOp(int op) {
777 if (op < 0 || op > OP_COUNT) {
778 fprintf(stderr, "Out-of-range operator: %d\n", op);
779 exit(1);
780 }
781 return operatorHelper[op];
782 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700783
Jack Palevich546b2242009-05-13 15:10:04 -0700784 void gmov(int l, int t) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700785 o(l + 0x83);
786 oad((t < LOCAL) << 7 | 5, t);
787 }
788 };
789
790 /* vars: value of variables
791 loc : local variable index
792 glo : global variable index
793 ind : output code ptr
794 rsym: return symbol
795 prog: output code
796 dstk: define stack
797 dptr, dch: macro state
798 */
799 int tok, tokc, tokl, ch, vars, rsym, loc, glo, sym_stk, dstk,
800 dptr, dch, last_id;
801 void* pSymbolBase;
802 void* pGlobalBase;
803 void* pVarsBase;
804 FILE* file;
805
806 CodeBuf codeBuf;
Jack Palevich22305132009-05-13 10:58:45 -0700807 CodeGenerator* pGen;
Jack Palevich21a15a22009-05-11 14:49:29 -0700808
809 static const int ALLOC_SIZE = 99999;
810
811 /* depends on the init string */
812 static const int TOK_STR_SIZE = 48;
813 static const int TOK_IDENT = 0x100;
814 static const int TOK_INT = 0x100;
815 static const int TOK_IF = 0x120;
816 static const int TOK_ELSE = 0x138;
817 static const int TOK_WHILE = 0x160;
818 static const int TOK_BREAK = 0x190;
819 static const int TOK_RETURN = 0x1c0;
820 static const int TOK_FOR = 0x1f8;
821 static const int TOK_DEFINE = 0x218;
822 static const int TOK_MAIN = 0x250;
823
824 static const int TOK_DUMMY = 1;
825 static const int TOK_NUM = 2;
826
827 static const int LOCAL = 0x200;
828
829 static const int SYM_FORWARD = 0;
830 static const int SYM_DEFINE = 1;
831
832 /* tokens in string heap */
833 static const int TAG_TOK = ' ';
834 static const int TAG_MACRO = 2;
835
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700836 static const int OP_INCREMENT = 0;
837 static const int OP_DECREMENT = 1;
838 static const int OP_MUL = 2;
839 static const int OP_DIV = 3;
840 static const int OP_MOD = 4;
841 static const int OP_PLUS = 5;
842 static const int OP_MINUS = 6;
843 static const int OP_SHIFT_LEFT = 7;
844 static const int OP_SHIFT_RIGHT = 8;
845 static const int OP_LESS_EQUAL = 9;
846 static const int OP_GREATER_EQUAL = 10;
847 static const int OP_LESS = 11;
848 static const int OP_GREATER = 12;
849 static const int OP_EQUALS = 13;
850 static const int OP_NOT_EQUALS = 14;
851 static const int OP_LOGICAL_AND = 15;
852 static const int OP_LOGICAL_OR = 16;
853 static const int OP_BIT_AND = 17;
854 static const int OP_BIT_XOR = 18;
855 static const int OP_BIT_OR = 19;
856 static const int OP_BIT_NOT = 20;
857 static const int OP_LOGICAL_NOT = 21;
858 static const int OP_COUNT = 22;
859
860 /* Operators are searched from front, the two-character operators appear
861 * before the single-character operators with the same first character.
862 * @ is used to pad out single-character operators.
863 */
864 static const char* operatorChars;
865 static const char operatorLevel[];
866
Jack Palevich21a15a22009-05-11 14:49:29 -0700867 void pdef(int t) {
868 *(char *) dstk++ = t;
869 }
870
871 void inp() {
872 if (dptr) {
873 ch = *(char *) dptr++;
874 if (ch == TAG_MACRO) {
875 dptr = 0;
876 ch = dch;
877 }
878 } else
879 ch = fgetc(file);
880 /* printf("ch=%c 0x%x\n", ch, ch); */
881 }
882
883 int isid() {
Jack Palevich546b2242009-05-13 15:10:04 -0700884 return isalnum(ch) | (ch == '_');
Jack Palevich21a15a22009-05-11 14:49:29 -0700885 }
886
887 /* read a character constant */
888 void getq() {
889 if (ch == '\\') {
890 inp();
891 if (ch == 'n')
892 ch = '\n';
893 }
894 }
895
896 void next() {
897 int l, a;
898
Jack Palevich546b2242009-05-13 15:10:04 -0700899 while (isspace(ch) | (ch == '#')) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700900 if (ch == '#') {
901 inp();
902 next();
903 if (tok == TOK_DEFINE) {
904 next();
905 pdef(TAG_TOK); /* fill last ident tag */
906 *(int *) tok = SYM_DEFINE;
907 *(int *) (tok + 4) = dstk; /* define stack */
908 }
909 /* well we always save the values ! */
910 while (ch != '\n') {
911 pdef(ch);
912 inp();
913 }
914 pdef(ch);
915 pdef(TAG_MACRO);
916 }
917 inp();
918 }
919 tokl = 0;
920 tok = ch;
921 /* encode identifiers & numbers */
922 if (isid()) {
923 pdef(TAG_TOK);
924 last_id = dstk;
925 while (isid()) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700926 pdef(ch);
927 inp();
Jack Palevichae54f1f2009-05-08 14:54:15 -0700928 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700929 if (isdigit(tok)) {
930 tokc = strtol((char*) last_id, 0, 0);
931 tok = TOK_NUM;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700932 } else {
Jack Palevich21a15a22009-05-11 14:49:29 -0700933 *(char *) dstk = TAG_TOK; /* no need to mark end of string (we
934 suppose data is initialized to zero by calloc) */
935 tok = (int) (strstr((char*) sym_stk, (char*) (last_id - 1))
936 - sym_stk);
937 *(char *) dstk = 0; /* mark real end of ident for dlsym() */
938 tok = tok * 8 + TOK_IDENT;
939 if (tok > TOK_DEFINE) {
940 tok = vars + tok;
941 /* printf("tok=%s %x\n", last_id, tok); */
942 /* define handling */
943 if (*(int *) tok == SYM_DEFINE) {
944 dptr = *(int *) (tok + 4);
945 dch = ch;
946 inp();
947 next();
948 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700949 }
950 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700951 } else {
Jack Palevich21a15a22009-05-11 14:49:29 -0700952 inp();
953 if (tok == '\'') {
954 tok = TOK_NUM;
955 getq();
956 tokc = ch;
957 inp();
958 inp();
Jack Palevich546b2242009-05-13 15:10:04 -0700959 } else if ((tok == '/') & (ch == '*')) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700960 inp();
961 while (ch) {
962 while (ch != '*')
963 inp();
964 inp();
965 if (ch == '/')
966 ch = 0;
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700967 }
Jack Palevich21a15a22009-05-11 14:49:29 -0700968 inp();
Jack Paleviche27bf3e2009-05-10 14:09:03 -0700969 next();
Jack Palevichbd894902009-05-14 19:35:31 -0700970 } else if ((tok == '/') & (ch == '/')) {
971 inp();
972 while (ch && (ch != '\n')) {
973 inp();
974 }
975 inp();
976 next();
Jack Palevich21a15a22009-05-11 14:49:29 -0700977 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700978 const char* t = operatorChars;
979 int opIndex = 0;
Jack Palevich546b2242009-05-13 15:10:04 -0700980 while ((l = *t++) != 0) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700981 a = *t++;
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700982 tokl = operatorLevel[opIndex];
983 tokc = opIndex;
Jack Palevich546b2242009-05-13 15:10:04 -0700984 if ((l == tok) & ((a == ch) | (a == '@'))) {
Jack Palevich21a15a22009-05-11 14:49:29 -0700985#if 0
986 printf("%c%c -> tokl=%d tokc=0x%x\n",
987 l, a, tokl, tokc);
988#endif
989 if (a == ch) {
990 inp();
991 tok = TOK_DUMMY; /* dummy token for double tokens */
992 }
993 break;
994 }
Jack Palevichbf42c9c2009-05-12 12:48:35 -0700995 opIndex++;
996 }
997 if (l == 0) {
998 tokl = 0;
999 tokc = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001000 }
1001 }
1002 }
1003#if 0
1004 {
1005 int p;
1006
1007 printf("tok=0x%x ", tok);
1008 if (tok >= TOK_IDENT) {
1009 printf("'");
1010 if (tok> TOK_DEFINE)
1011 p = sym_stk + 1 + (tok - vars - TOK_IDENT) / 8;
1012 else
1013 p = sym_stk + 1 + (tok - TOK_IDENT) / 8;
1014 while (*(char *)p != TAG_TOK && *(char *)p)
1015 printf("%c", *(char *)p++);
1016 printf("'\n");
1017 } else if (tok == TOK_NUM) {
1018 printf("%d\n", tokc);
1019 } else {
1020 printf("'%c'\n", tok);
1021 }
1022 }
1023#endif
1024 }
1025
1026 void error(const char *fmt, ...) {
1027 va_list ap;
1028
1029 va_start(ap, fmt);
1030 fprintf(stderr, "%ld: ", ftell((FILE *) file));
1031 vfprintf(stderr, fmt, ap);
1032 fprintf(stderr, "\n");
1033 va_end(ap);
1034 exit(1);
1035 }
1036
1037 void skip(int c) {
1038 if (tok != c) {
1039 error("'%c' expected", c);
1040 }
1041 next();
1042 }
1043
Jack Palevich21a15a22009-05-11 14:49:29 -07001044 /* l is one if '=' parsing wanted (quick hack) */
1045 void unary(int l) {
1046 int n, t, a, c;
Jack Palevich546b2242009-05-13 15:10:04 -07001047 t = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001048 n = 1; /* type of expression 0 = forward, 1 = value, other =
1049 lvalue */
1050 if (tok == '\"') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001051 pGen->li(glo);
Jack Palevich21a15a22009-05-11 14:49:29 -07001052 while (ch != '\"') {
1053 getq();
1054 *(char *) glo++ = ch;
1055 inp();
1056 }
1057 *(char *) glo = 0;
Jack Palevich546b2242009-05-13 15:10:04 -07001058 glo = (glo + 4) & -4; /* align heap */
Jack Palevich21a15a22009-05-11 14:49:29 -07001059 inp();
1060 next();
1061 } else {
1062 c = tokl;
1063 a = tokc;
1064 t = tok;
1065 next();
1066 if (t == TOK_NUM) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001067 pGen->li(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001068 } else if (c == 2) {
1069 /* -, +, !, ~ */
1070 unary(0);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001071 pGen->clearECX();
Jack Palevich21a15a22009-05-11 14:49:29 -07001072 if (t == '!')
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001073 pGen->gcmp(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001074 else
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001075 pGen->genOp(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001076 } else if (t == '(') {
1077 expr();
1078 skip(')');
1079 } else if (t == '*') {
1080 /* parse cast */
1081 skip('(');
1082 t = tok; /* get type */
1083 next(); /* skip int/char/void */
1084 next(); /* skip '*' or '(' */
1085 if (tok == '*') {
1086 /* function type */
1087 skip('*');
1088 skip(')');
1089 skip('(');
1090 skip(')');
1091 t = 0;
1092 }
1093 skip(')');
1094 unary(0);
1095 if (tok == '=') {
1096 next();
1097 pGen->pushEAX();
1098 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001099 pGen->popECX();
1100 pGen->storeEAXToAddressECX(t == TOK_INT);
Jack Palevich21a15a22009-05-11 14:49:29 -07001101 } else if (t) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001102 pGen->loadEAXIndirect(t == TOK_INT);
Jack Palevich21a15a22009-05-11 14:49:29 -07001103 }
1104 } else if (t == '&') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001105 pGen->leaEAX(*(int *) tok);
Jack Palevich21a15a22009-05-11 14:49:29 -07001106 next();
1107 } else {
1108 n = *(int *) t;
1109 /* forward reference: try dlsym */
Jack Palevichcb1c9ef2009-05-14 11:38:49 -07001110 if (!n) {
1111 n = (int) dlsym(RTLD_DEFAULT, (char*) last_id);
1112 }
Jack Palevich546b2242009-05-13 15:10:04 -07001113 if ((tok == '=') & l) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001114 /* assignment */
1115 next();
1116 expr();
1117 pGen->storeEAX(n);
1118 } else if (tok != '(') {
1119 /* variable */
1120 pGen->loadEAX(n);
1121 if (tokl == 11) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001122 pGen->postIncrementOrDecrement(n, tokc);
Jack Palevich21a15a22009-05-11 14:49:29 -07001123 next();
1124 }
1125 }
1126 }
1127 }
1128
1129 /* function call */
1130 if (tok == '(') {
1131 if (n == 1)
1132 pGen->pushEAX();
1133
1134 /* push args and invert order */
Jack Palevichcb1c9ef2009-05-14 11:38:49 -07001135 a = pGen->beginFunctionCallArguments();
Jack Palevich21a15a22009-05-11 14:49:29 -07001136 next();
1137 l = 0;
1138 while (tok != ')') {
1139 expr();
1140 pGen->storeEAToArg(l);
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001141 if (tok == ',')
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001142 next();
Jack Palevich21a15a22009-05-11 14:49:29 -07001143 l = l + 4;
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001144 }
Jack Palevichcb1c9ef2009-05-14 11:38:49 -07001145 pGen->endFunctionCallArguments(a, l);
Jack Palevich21a15a22009-05-11 14:49:29 -07001146 next();
1147 if (!n) {
1148 /* forward reference */
1149 t = t + 4;
1150 *(int *) t = pGen->callForward(*(int *) t);
1151 } else if (n == 1) {
1152 pGen->callIndirect(l);
1153 l = l + 4;
1154 } else {
Jack Palevicha6535612009-05-13 16:24:17 -07001155 pGen->callRelative(n - codeBuf.getPC() - pGen->jumpOffset()); /* call xxx */
Jack Palevich21a15a22009-05-11 14:49:29 -07001156 }
1157 if (l)
1158 pGen->adjustStackAfterCall(l);
1159 }
1160 }
1161
1162 void sum(int l) {
1163 int t, n, a;
Jack Palevich546b2242009-05-13 15:10:04 -07001164 t = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001165 if (l-- == 1)
1166 unary(1);
1167 else {
1168 sum(l);
1169 a = 0;
1170 while (l == tokl) {
1171 n = tok;
1172 t = tokc;
1173 next();
1174
1175 if (l > 8) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001176 a = pGen->gtst(t == OP_LOGICAL_OR, a); /* && and || output code generation */
Jack Palevich21a15a22009-05-11 14:49:29 -07001177 sum(l);
1178 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001179 pGen->pushEAX();
Jack Palevich21a15a22009-05-11 14:49:29 -07001180 sum(l);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001181 pGen->popECX();
Jack Palevich21a15a22009-05-11 14:49:29 -07001182
Jack Palevich546b2242009-05-13 15:10:04 -07001183 if ((l == 4) | (l == 5)) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001184 pGen->gcmp(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001185 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001186 pGen->genOp(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001187 }
1188 }
1189 }
1190 /* && and || output code generation */
1191 if (a && l > 8) {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001192 a = pGen->gtst(t == OP_LOGICAL_OR, a);
1193 pGen->li(t != OP_LOGICAL_OR);
Jack Palevicha6535612009-05-13 16:24:17 -07001194 pGen->gjmp(5); /* jmp $ + 5 (sizeof li, FIXME for ARM) */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001195 pGen->gsym(a);
1196 pGen->li(t == OP_LOGICAL_OR);
Jack Palevich21a15a22009-05-11 14:49:29 -07001197 }
1198 }
1199 }
1200
1201 void expr() {
1202 sum(11);
1203 }
1204
1205 int test_expr() {
1206 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001207 return pGen->gtst(0, 0);
Jack Palevich21a15a22009-05-11 14:49:29 -07001208 }
1209
1210 void block(int l) {
1211 int a, n, t;
1212
1213 if (tok == TOK_IF) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001214 next();
1215 skip('(');
Jack Palevich21a15a22009-05-11 14:49:29 -07001216 a = test_expr();
1217 skip(')');
1218 block(l);
1219 if (tok == TOK_ELSE) {
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001220 next();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001221 n = pGen->gjmp(0); /* jmp */
1222 pGen->gsym(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001223 block(l);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001224 pGen->gsym(n); /* patch else jmp */
Jack Palevich21a15a22009-05-11 14:49:29 -07001225 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001226 pGen->gsym(a); /* patch if test */
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001227 }
Jack Palevich546b2242009-05-13 15:10:04 -07001228 } else if ((tok == TOK_WHILE) | (tok == TOK_FOR)) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001229 t = tok;
1230 next();
1231 skip('(');
1232 if (t == TOK_WHILE) {
Jack Palevicha6535612009-05-13 16:24:17 -07001233 n = codeBuf.getPC(); // top of loop, target of "next" iteration
Jack Palevich21a15a22009-05-11 14:49:29 -07001234 a = test_expr();
1235 } else {
1236 if (tok != ';')
1237 expr();
1238 skip(';');
1239 n = codeBuf.getPC();
1240 a = 0;
1241 if (tok != ';')
1242 a = test_expr();
1243 skip(';');
1244 if (tok != ')') {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001245 t = pGen->gjmp(0);
Jack Palevich21a15a22009-05-11 14:49:29 -07001246 expr();
Jack Palevicha6535612009-05-13 16:24:17 -07001247 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset());
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001248 pGen->gsym(t);
Jack Palevich21a15a22009-05-11 14:49:29 -07001249 n = t + 4;
1250 }
1251 }
1252 skip(')');
1253 block((int) &a);
Jack Palevicha6535612009-05-13 16:24:17 -07001254 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); /* jmp */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001255 pGen->gsym(a);
Jack Palevich21a15a22009-05-11 14:49:29 -07001256 } else if (tok == '{') {
1257 next();
1258 /* declarations */
1259 decl(1);
1260 while (tok != '}')
1261 block(l);
1262 next();
1263 } else {
1264 if (tok == TOK_RETURN) {
1265 next();
1266 if (tok != ';')
1267 expr();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001268 rsym = pGen->gjmp(rsym); /* jmp */
Jack Palevich21a15a22009-05-11 14:49:29 -07001269 } else if (tok == TOK_BREAK) {
1270 next();
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001271 *(int *) l = pGen->gjmp(*(int *) l);
Jack Palevich21a15a22009-05-11 14:49:29 -07001272 } else if (tok != ';')
1273 expr();
1274 skip(';');
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001275 }
1276 }
Jack Palevich21a15a22009-05-11 14:49:29 -07001277
1278 /* 'l' is true if local declarations */
1279 void decl(int l) {
1280 int a;
1281
Jack Palevich546b2242009-05-13 15:10:04 -07001282 while ((tok == TOK_INT) | ((tok != -1) & (!l))) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001283 if (tok == TOK_INT) {
1284 next();
1285 while (tok != ';') {
1286 if (l) {
1287 loc = loc + 4;
1288 *(int *) tok = -loc;
1289 } else {
1290 *(int *) tok = glo;
1291 glo = glo + 4;
1292 }
1293 next();
1294 if (tok == ',')
1295 next();
1296 }
1297 skip(';');
1298 } else {
1299 /* patch forward references (XXX: do not work for function
1300 pointers) */
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001301 pGen->gsym(*(int *) (tok + 4));
Jack Palevich21a15a22009-05-11 14:49:29 -07001302 /* put function address */
1303 *(int *) tok = codeBuf.getPC();
1304 next();
1305 skip('(');
1306 a = 8;
Jack Palevich546b2242009-05-13 15:10:04 -07001307 int argCount = 0;
Jack Palevich21a15a22009-05-11 14:49:29 -07001308 while (tok != ')') {
1309 /* read param name and compute offset */
1310 *(int *) tok = a;
1311 a = a + 4;
1312 next();
1313 if (tok == ',')
1314 next();
Jack Palevich546b2242009-05-13 15:10:04 -07001315 argCount++;
Jack Palevich21a15a22009-05-11 14:49:29 -07001316 }
1317 next(); /* skip ')' */
1318 rsym = loc = 0;
Jack Palevich546b2242009-05-13 15:10:04 -07001319 a = pGen->functionEntry(argCount);
Jack Palevich21a15a22009-05-11 14:49:29 -07001320 block(0);
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001321 pGen->gsym(rsym);
Jack Palevich546b2242009-05-13 15:10:04 -07001322 pGen->functionExit(argCount, a, loc);
Jack Palevich21a15a22009-05-11 14:49:29 -07001323 }
1324 }
1325 }
1326
1327 void cleanup() {
1328 if (sym_stk != 0) {
1329 free((void*) sym_stk);
1330 sym_stk = 0;
1331 }
1332 if (pGlobalBase != 0) {
1333 free((void*) pGlobalBase);
1334 pGlobalBase = 0;
1335 }
1336 if (pVarsBase != 0) {
1337 free(pVarsBase);
1338 pVarsBase = 0;
1339 }
1340 if (pGen) {
1341 delete pGen;
1342 pGen = 0;
1343 }
1344 }
1345
1346 void clear() {
1347 tok = 0;
1348 tokc = 0;
1349 tokl = 0;
1350 ch = 0;
1351 vars = 0;
1352 rsym = 0;
1353 loc = 0;
1354 glo = 0;
1355 sym_stk = 0;
1356 dstk = 0;
1357 dptr = 0;
1358 dch = 0;
1359 last_id = 0;
1360 file = 0;
1361 pGlobalBase = 0;
1362 pVarsBase = 0;
1363 pGen = 0;
1364 }
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001365
Jack Palevich22305132009-05-13 10:58:45 -07001366 void setArchitecture(const char* architecture) {
1367 delete pGen;
1368 pGen = 0;
1369
1370 if (architecture != NULL) {
1371 if (strcmp(architecture, "arm") == 0) {
1372 pGen = new ARMCodeGenerator();
1373 } else if (strcmp(architecture, "x86") == 0) {
1374 pGen = new X86CodeGenerator();
1375 } else {
1376 fprintf(stderr, "Unknown architecture %s", architecture);
1377 }
1378 }
1379
1380 if (pGen == NULL) {
1381 pGen = new ARMCodeGenerator();
1382 }
1383 }
1384
Jack Palevich77ae76e2009-05-10 19:59:24 -07001385public:
Jack Palevich22305132009-05-13 10:58:45 -07001386 struct args {
1387 args() {
1388 architecture = 0;
1389 }
1390 const char* architecture;
1391 };
1392
Jack Palevich21a15a22009-05-11 14:49:29 -07001393 compiler() {
1394 clear();
Jack Paleviche27bf3e2009-05-10 14:09:03 -07001395 }
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001396
Jack Palevich21a15a22009-05-11 14:49:29 -07001397 ~compiler() {
1398 cleanup();
1399 }
1400
Jack Palevich22305132009-05-13 10:58:45 -07001401 int compile(FILE* in, args& args) {
Jack Palevich21a15a22009-05-11 14:49:29 -07001402 cleanup();
1403 clear();
1404 codeBuf.init(ALLOC_SIZE);
Jack Palevich22305132009-05-13 10:58:45 -07001405 setArchitecture(args.architecture);
Jack Palevich21a15a22009-05-11 14:49:29 -07001406 pGen->init(&codeBuf);
1407 file = in;
1408 sym_stk = (int) calloc(1, ALLOC_SIZE);
1409 dstk = (int) strcpy((char*) sym_stk,
1410 " int if else while break return for define main ")
1411 + TOK_STR_SIZE;
1412 pGlobalBase = calloc(1, ALLOC_SIZE);
1413 glo = (int) pGlobalBase;
1414 pVarsBase = calloc(1, ALLOC_SIZE);
1415 vars = (int) pVarsBase;
1416 inp();
1417 next();
1418 decl(0);
Jack Palevich546b2242009-05-13 15:10:04 -07001419 pGen->finishCompile();
Jack Palevich21a15a22009-05-11 14:49:29 -07001420 return 0;
1421 }
1422
1423 int run(int argc, char** argv) {
1424 typedef int (*mainPtr)(int argc, char** argv);
1425 mainPtr aMain = (mainPtr) *(int*) (vars + TOK_MAIN);
1426 if (!aMain) {
1427 fprintf(stderr, "Could not find function \"main\".\n");
1428 return -1;
1429 }
1430 return aMain(argc, argv);
1431 }
1432
1433 int dump(FILE* out) {
1434 fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out);
1435 return 0;
1436 }
Jack Palevich77ae76e2009-05-10 19:59:24 -07001437
Jack Palevicha6535612009-05-13 16:24:17 -07001438 int disassemble(FILE* out) {
1439 return pGen->disassemble(out);
1440 }
1441
Jack Palevich77ae76e2009-05-10 19:59:24 -07001442};
1443
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001444const char* compiler::operatorChars =
1445 "++--*@/@%@+@-@<<>><=>=<@>@==!=&&||&@^@|@~@!@";
1446
1447const char compiler::operatorLevel[] =
1448 {11, 11, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4,
1449 5, 5, /* ==, != */
1450 9, 10, /* &&, || */
1451 6, 7, 8, /* & ^ | */
1452 2, 2 /* ~ ! */
1453 };
1454
Jack Palevicha6535612009-05-13 16:24:17 -07001455FILE* compiler::ARMCodeGenerator::disasmOut;
1456
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001457const int compiler::X86CodeGenerator::operatorHelper[] = {
1458 0x1, // ++
1459 0xff, // --
1460 0xc1af0f, // *
1461 0xf9f79991, // /
1462 0xf9f79991, // % (With manual assist to swap results)
1463 0xc801, // +
1464 0xd8f7c829, // -
1465 0xe0d391, // <<
1466 0xf8d391, // >>
1467 0xe, // <=
1468 0xd, // >=
1469 0xc, // <
1470 0xf, // >
1471 0x4, // ==
1472 0x5, // !=
1473 0x0, // &&
1474 0x1, // ||
1475 0xc821, // &
1476 0xc831, // ^
1477 0xc809, // |
1478 0xd0f7, // ~
1479 0x4 // !
1480};
1481
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001482} // namespace acc
1483
Jack Palevich546b2242009-05-13 15:10:04 -07001484// This is a separate function so it can easily be set by breakpoint in gdb.
1485int run(acc::compiler& c, int argc, char** argv) {
1486 return c.run(argc, argv);
1487}
1488
Jack Palevich77ae76e2009-05-10 19:59:24 -07001489int main(int argc, char** argv) {
Jack Palevich22305132009-05-13 10:58:45 -07001490 bool doDump = false;
Jack Palevicha6535612009-05-13 16:24:17 -07001491 bool doDisassemble = false;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001492 const char* inFile = NULL;
1493 const char* outFile = NULL;
Jack Palevich22305132009-05-13 10:58:45 -07001494 const char* architecture = "arm";
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001495 int i;
Jack Palevich21a15a22009-05-11 14:49:29 -07001496 for (i = 1; i < argc; i++) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001497 char* arg = argv[i];
1498 if (arg[0] == '-') {
1499 switch (arg[1]) {
Jack Palevich22305132009-05-13 10:58:45 -07001500 case 'a':
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001501 if (i + 1 >= argc) {
Jack Palevich22305132009-05-13 10:58:45 -07001502 fprintf(stderr, "Expected architecture after -a\n");
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001503 return 2;
1504 }
Jack Palevich22305132009-05-13 10:58:45 -07001505 architecture = argv[i+1];
1506 i += 1;
1507 break;
1508 case 'd':
1509 if (i + 1 >= argc) {
1510 fprintf(stderr, "Expected filename after -d\n");
1511 return 2;
1512 }
1513 doDump = true;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001514 outFile = argv[i + 1];
1515 i += 1;
1516 break;
Jack Palevicha6535612009-05-13 16:24:17 -07001517 case 'S':
1518 doDisassemble = true;
1519 break;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001520 default:
1521 fprintf(stderr, "Unrecognized flag %s\n", arg);
1522 return 3;
1523 }
1524 } else if (inFile == NULL) {
1525 inFile = arg;
1526 } else {
1527 break;
1528 }
1529 }
1530
1531 FILE* in = stdin;
1532 if (inFile) {
1533 in = fopen(inFile, "r");
Jack Palevich21a15a22009-05-11 14:49:29 -07001534 if (!in) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001535 fprintf(stderr, "Could not open input file %s\n", inFile);
1536 return 1;
1537 }
1538 }
1539 acc::compiler compiler;
Jack Palevich22305132009-05-13 10:58:45 -07001540 acc::compiler::args args;
1541 args.architecture = architecture;
1542 int compileResult = compiler.compile(in, args);
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001543 if (in != stdin) {
1544 fclose(in);
1545 }
1546 if (compileResult) {
1547 fprintf(stderr, "Compile failed: %d\n", compileResult);
1548 return 6;
1549 }
Jack Palevicha6535612009-05-13 16:24:17 -07001550 if (doDisassemble) {
1551 compiler.disassemble(stderr);
1552 }
Jack Palevich22305132009-05-13 10:58:45 -07001553 if (doDump) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001554 FILE* save = fopen(outFile, "w");
Jack Palevich21a15a22009-05-11 14:49:29 -07001555 if (!save) {
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001556 fprintf(stderr, "Could not open output file %s\n", outFile);
1557 return 5;
1558 }
1559 compiler.dump(save);
1560 fclose(save);
1561 } else {
Jack Palevichbf42c9c2009-05-12 12:48:35 -07001562 fprintf(stderr, "Executing compiled code:\n");
Jack Palevich21a15a22009-05-11 14:49:29 -07001563 int codeArgc = argc - i + 1;
1564 char** codeArgv = argv + i - 1;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001565 codeArgv[0] = (char*) (inFile ? inFile : "stdin");
Jack Palevich546b2242009-05-13 15:10:04 -07001566 int result = run(compiler, codeArgc, codeArgv);
Jack Palevich22305132009-05-13 10:58:45 -07001567 fprintf(stderr, "result: %d\n", result);
1568 return result;
Jack Palevichbbf8ab52009-05-11 11:54:30 -07001569 }
1570
1571 return 0;
1572}