njn | f131b3a | 2005-12-14 05:33:35 +0000 | [diff] [blame] | 1 | // This artificial program runs a lot of code. The exact amount depends on |
| 2 | // the command line -- if any command line args are given, it does exactly |
| 3 | // the same amount of work, but using four times as much code. |
| 4 | // |
| 5 | // It's a stress test for Valgrind's translation speed; natively the two |
| 6 | // modes run in about the same time (the I-cache effects aren't big enough |
| 7 | // to make a difference), but under Valgrind the one running more code is |
| 8 | // significantly slower due to the extra translation time. |
| 9 | |
| 10 | #include <stdio.h> |
| 11 | #include <string.h> |
sewardj | 0d3a1a8 | 2005-12-15 16:11:25 +0000 | [diff] [blame] | 12 | #include <assert.h> |
| 13 | #include <sys/mman.h> |
njn | f131b3a | 2005-12-14 05:33:35 +0000 | [diff] [blame] | 14 | |
| 15 | #define FN_SIZE 996 // Must be big enough to hold the compiled f() |
| 16 | #define N_LOOPS 20000 // Should be divisible by four |
| 17 | #define RATIO 4 // Ratio of code sizes between the two modes |
| 18 | |
| 19 | int f(int x, int y) |
| 20 | { |
| 21 | int i; |
| 22 | for (i = 0; i < 5000; i++) { |
| 23 | switch (x % 8) { |
| 24 | case 1: y += 3; |
| 25 | case 2: y += x; |
| 26 | case 3: y *= 2; |
| 27 | default: y--; |
| 28 | } |
| 29 | } |
| 30 | return y; |
| 31 | } |
| 32 | |
njn | f131b3a | 2005-12-14 05:33:35 +0000 | [diff] [blame] | 33 | int main(int argc, char* argv[]) |
| 34 | { |
| 35 | int h, i, sum1 = 0, sum2 = 0, sum3 = 0, sum4 = 0; |
| 36 | int n_fns, n_reps; |
| 37 | |
sewardj | 0d3a1a8 | 2005-12-15 16:11:25 +0000 | [diff] [blame] | 38 | char* a = mmap(0, FN_SIZE * N_LOOPS, |
| 39 | PROT_EXEC|PROT_WRITE, |
sewardj | 799a284 | 2006-10-17 02:27:41 +0000 | [diff] [blame] | 40 | MAP_PRIVATE|MAP_ANONYMOUS, -1,0); |
sewardj | 0d3a1a8 | 2005-12-15 16:11:25 +0000 | [diff] [blame] | 41 | assert(a != (char*)MAP_FAILED); |
| 42 | |
njn | f131b3a | 2005-12-14 05:33:35 +0000 | [diff] [blame] | 43 | if (argc <= 1) { |
| 44 | // Mode 1: not so much code |
| 45 | n_fns = N_LOOPS / RATIO; |
| 46 | n_reps = RATIO; |
| 47 | printf("mode 1: "); |
| 48 | } else { |
| 49 | // Mode 2: lots of code |
| 50 | n_fns = N_LOOPS; |
| 51 | n_reps = 1; |
| 52 | printf("mode 1: "); |
| 53 | } |
| 54 | printf("%d copies of f(), %d reps\n", n_fns, n_reps); |
| 55 | |
| 56 | // Make a whole lot of copies of f(). FN_SIZE is much bigger than f() |
| 57 | // will ever be (we hope). |
| 58 | for (i = 0; i < n_fns; i++) { |
| 59 | memcpy(&a[FN_SIZE*i], f, FN_SIZE); |
| 60 | } |
| 61 | |
| 62 | for (h = 0; h < n_reps; h += 1) { |
| 63 | for (i = 0; i < n_fns; i += 4) { |
| 64 | int(*f1)(int,int) = (void*)&a[FN_SIZE*(i+0)]; |
| 65 | int(*f2)(int,int) = (void*)&a[FN_SIZE*(i+1)]; |
| 66 | int(*f3)(int,int) = (void*)&a[FN_SIZE*(i+2)]; |
| 67 | int(*f4)(int,int) = (void*)&a[FN_SIZE*(i+3)]; |
| 68 | sum1 += f1(i+0, n_fns-i+0); |
| 69 | sum2 += f2(i+1, n_fns-i+1); |
| 70 | sum3 += f3(i+2, n_fns-i+2); |
| 71 | sum4 += f4(i+3, n_fns-i+3); |
| 72 | if (i % 1000 == 0) |
| 73 | printf("."); |
| 74 | } |
| 75 | } |
| 76 | printf("result = %d\n", sum1 + sum2 + sum3 + sum4); |
| 77 | return 0; |
| 78 | } |