blob: ae31cbb17c31f0e43b2ffcc0230a388b3223a303 [file] [log] [blame]
njnf131b3a2005-12-14 05:33:35 +00001// This artificial program runs a lot of code. The exact amount depends on
philippe8e1bee42013-10-18 00:08:20 +00002// the command line -- if an arg "0" is given, it does exactly
njnf131b3a2005-12-14 05:33:35 +00003// the same amount of work, but using four times as much code.
philippe8e1bee42013-10-18 00:08:20 +00004// If an arg >= 1 is given, the amount of code is multiplied by this arg.
njnf131b3a2005-12-14 05:33:35 +00005//
6// It's a stress test for Valgrind's translation speed; natively the two
7// modes run in about the same time (the I-cache effects aren't big enough
8// to make a difference), but under Valgrind the one running more code is
9// significantly slower due to the extra translation time.
10
sewardj6d9a3382015-08-31 13:05:35 +000011// 31 Aug 2015: this only "works" on x86/amd64/s390 by accident; the
12// test is essentially kludged. This "generates" code into memory
13// (the mmap'd area) and the executes it. But historically and even
14// after this commit (r15601), the test has been run without
15// --smc-check=all or all-non-file. That just happens to work because
16// the "generated" code is never modified, so there's never a
17// translated-vs-reality coherence problem. Really we ought to run
18// with the new-as-of-r15601 default --smc-check=all-non-file, but that
19// hugely slows it down and makes the results non-comparable with
20// pre r15601 results, so instead the .vgperf files now specify the
21// old default value --smc-check=stack explicitly.
22
23
njnf131b3a2005-12-14 05:33:35 +000024#include <stdio.h>
25#include <string.h>
philippe8e1bee42013-10-18 00:08:20 +000026#include <stdlib.h>
sewardj0d3a1a82005-12-15 16:11:25 +000027#include <assert.h>
petarj213b0d32013-09-15 22:16:38 +000028#if defined(__mips__)
29#include <asm/cachectl.h>
30#include <sys/syscall.h>
zliu261fbcb2015-04-15 02:56:20 +000031#elif defined(__tilegx__)
32#include <asm/cachectl.h>
petarj213b0d32013-09-15 22:16:38 +000033#endif
njn83b62cb2009-04-15 03:12:43 +000034#include "tests/sys_mman.h"
njnf131b3a2005-12-14 05:33:35 +000035
cborntraa2677f12015-04-21 12:27:09 +000036#define FN_SIZE 1280 // Must be big enough to hold the compiled f()
37 // and any literal pool that might be used
njnf131b3a2005-12-14 05:33:35 +000038#define N_LOOPS 20000 // Should be divisible by four
39#define RATIO 4 // Ratio of code sizes between the two modes
40
41int f(int x, int y)
42{
43 int i;
44 for (i = 0; i < 5000; i++) {
45 switch (x % 8) {
46 case 1: y += 3;
47 case 2: y += x;
48 case 3: y *= 2;
49 default: y--;
50 }
51 }
52 return y;
53}
54
njnf131b3a2005-12-14 05:33:35 +000055int main(int argc, char* argv[])
56{
57 int h, i, sum1 = 0, sum2 = 0, sum3 = 0, sum4 = 0;
58 int n_fns, n_reps;
59
njnf131b3a2005-12-14 05:33:35 +000060 if (argc <= 1) {
61 // Mode 1: not so much code
62 n_fns = N_LOOPS / RATIO;
63 n_reps = RATIO;
64 printf("mode 1: ");
65 } else {
66 // Mode 2: lots of code
philippe8e1bee42013-10-18 00:08:20 +000067 const int mul = atoi(argv[1]);
68 if (mul == 0)
69 n_fns = N_LOOPS;
70 else
71 n_fns = N_LOOPS * mul;
njnf131b3a2005-12-14 05:33:35 +000072 n_reps = 1;
73 printf("mode 1: ");
74 }
75 printf("%d copies of f(), %d reps\n", n_fns, n_reps);
76
philippe8e1bee42013-10-18 00:08:20 +000077 char* a = mmap(0, FN_SIZE * n_fns,
78 PROT_EXEC|PROT_WRITE,
79 MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
80 assert(a != (char*)MAP_FAILED);
81
njnf131b3a2005-12-14 05:33:35 +000082 // Make a whole lot of copies of f(). FN_SIZE is much bigger than f()
83 // will ever be (we hope).
84 for (i = 0; i < n_fns; i++) {
85 memcpy(&a[FN_SIZE*i], f, FN_SIZE);
86 }
petarj213b0d32013-09-15 22:16:38 +000087
88#if defined(__mips__)
89 syscall(__NR_cacheflush, a, FN_SIZE * n_fns, ICACHE);
zliu261fbcb2015-04-15 02:56:20 +000090#elif defined(__tilegx__)
91 cacheflush(a, FN_SIZE * n_fns, ICACHE);
petarj213b0d32013-09-15 22:16:38 +000092#endif
93
njnf131b3a2005-12-14 05:33:35 +000094 for (h = 0; h < n_reps; h += 1) {
95 for (i = 0; i < n_fns; i += 4) {
96 int(*f1)(int,int) = (void*)&a[FN_SIZE*(i+0)];
97 int(*f2)(int,int) = (void*)&a[FN_SIZE*(i+1)];
98 int(*f3)(int,int) = (void*)&a[FN_SIZE*(i+2)];
99 int(*f4)(int,int) = (void*)&a[FN_SIZE*(i+3)];
100 sum1 += f1(i+0, n_fns-i+0);
101 sum2 += f2(i+1, n_fns-i+1);
102 sum3 += f3(i+2, n_fns-i+2);
103 sum4 += f4(i+3, n_fns-i+3);
104 if (i % 1000 == 0)
105 printf(".");
106 }
107 }
108 printf("result = %d\n", sum1 + sum2 + sum3 + sum4);
109 return 0;
110}