sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 1 | |
| 2 | /*--------------------------------------------------------------------*/ |
| 3 | /*--- Branch predictor simulation cg_branchpred.c ---*/ |
| 4 | /*--------------------------------------------------------------------*/ |
| 5 | |
| 6 | /* |
| 7 | This file is part of Cachegrind, a Valgrind tool for cache |
| 8 | profiling programs. |
| 9 | |
Elliott Hughes | ed39800 | 2017-06-21 14:41:24 -0700 | [diff] [blame] | 10 | Copyright (C) 2002-2017 Nicholas Nethercote |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 11 | njn@valgrind.org |
| 12 | |
| 13 | This program is free software; you can redistribute it and/or |
| 14 | modify it under the terms of the GNU General Public License as |
| 15 | published by the Free Software Foundation; either version 2 of the |
| 16 | License, or (at your option) any later version. |
| 17 | |
| 18 | This program is distributed in the hope that it will be useful, but |
| 19 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 21 | General Public License for more details. |
| 22 | |
| 23 | You should have received a copy of the GNU General Public License |
| 24 | along with this program; if not, write to the Free Software |
| 25 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 26 | 02111-1307, USA. |
| 27 | |
| 28 | The GNU General Public License is contained in the file COPYING. |
| 29 | */ |
| 30 | |
| 31 | |
| 32 | /* This file contains the actual branch predictor simulator and its |
| 33 | associated state. As with cg_sim.c it is #included directly into |
| 34 | cg_main.c. It provides: |
| 35 | |
| 36 | - a taken/not-taken predictor for conditional branches |
| 37 | - a branch target address predictor for indirect branches |
| 38 | |
| 39 | Function return-address prediction is not modelled, on the basis |
| 40 | that return stack predictors almost always predict correctly, and |
| 41 | also that it is difficult for Valgrind to robustly identify |
| 42 | function calls and returns. |
| 43 | */ |
| 44 | |
| 45 | /* How many bits at the bottom of an instruction address are |
| 46 | guaranteed to be zero? */ |
carll | cae0cc2 | 2014-08-07 23:17:29 +0000 | [diff] [blame] | 47 | #if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le) \ |
sewardj | f0c1250 | 2014-01-12 12:54:00 +0000 | [diff] [blame] | 48 | || defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_arm64) |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 49 | # define N_IADDR_LO_ZERO_BITS 2 |
| 50 | #elif defined(VGA_x86) || defined(VGA_amd64) |
| 51 | # define N_IADDR_LO_ZERO_BITS 0 |
sewardj | f0c1250 | 2014-01-12 12:54:00 +0000 | [diff] [blame] | 52 | #elif defined(VGA_s390x) || defined(VGA_arm) |
sewardj | b5b8740 | 2011-03-07 16:05:35 +0000 | [diff] [blame] | 53 | # define N_IADDR_LO_ZERO_BITS 1 |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 54 | #else |
| 55 | # error "Unsupported architecture" |
| 56 | #endif |
| 57 | |
| 58 | |
| 59 | /* Get a taken/not-taken prediction for the instruction (presumably a |
| 60 | conditional branch) at instr_addr. Once that's done, update the |
| 61 | predictor state based on whether or not it was actually taken, as |
| 62 | indicated by 'taken'. Finally, return 1 for a mispredict and 0 for |
| 63 | a successful predict. |
| 64 | |
| 65 | The predictor is an array of 16k (== 2^14) 2-bit saturating |
| 66 | counters. Given the address of the branch instruction, the array |
| 67 | index to use is computed both from the low order bits of the branch |
| 68 | instruction's address, and the global history - that is, from the |
| 69 | taken/not-taken behaviour of the most recent few branches. This |
| 70 | makes the predictor able to correlate this branch's behaviour with |
| 71 | that of other branches. |
| 72 | |
| 73 | TODO: use predictor written by someone who understands this stuff. |
| 74 | Perhaps it would be better to move to a standard GShare predictor |
| 75 | and/or tournament predictor. |
| 76 | */ |
| 77 | /* The index is composed of N_HIST bits at the top and N_IADD bits at |
| 78 | the bottom. These numbers chosen somewhat arbitrarily, but note |
| 79 | that making N_IADD_BITS too small (eg 4) can cause large amounts of |
| 80 | aliasing, and hence misprediction, particularly if the history bits |
| 81 | are mostly unchanging. */ |
| 82 | #define N_HIST_BITS 7 |
| 83 | #define N_IADD_BITS 7 |
| 84 | |
| 85 | #define N_BITS (N_HIST_BITS + N_IADD_BITS) |
| 86 | #define N_COUNTERS (1 << N_BITS) |
| 87 | |
| 88 | static UWord shift_register = 0; /* Contains global history */ |
| 89 | static UChar counters[N_COUNTERS]; /* Counter array; presumably auto-zeroed */ |
| 90 | |
| 91 | |
| 92 | static ULong do_cond_branch_predict ( Addr instr_addr, Word takenW ) |
| 93 | { |
sewardj | 620dd26 | 2007-06-05 20:48:54 +0000 | [diff] [blame] | 94 | UWord indx; |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 95 | Bool predicted_taken, actually_taken, mispredict; |
| 96 | |
| 97 | const UWord hist_mask = (1 << N_HIST_BITS) - 1; |
| 98 | const UWord iadd_mask = (1 << N_IADD_BITS) - 1; |
| 99 | UWord hist_bits = shift_register & hist_mask; |
| 100 | UWord iadd_bits = (instr_addr >> N_IADDR_LO_ZERO_BITS) |
| 101 | & iadd_mask; |
| 102 | |
| 103 | tl_assert(hist_bits <= hist_mask); |
| 104 | tl_assert(iadd_bits <= iadd_mask); |
sewardj | 620dd26 | 2007-06-05 20:48:54 +0000 | [diff] [blame] | 105 | indx = (hist_bits << N_IADD_BITS) | iadd_bits; |
| 106 | tl_assert(indx < N_COUNTERS); |
| 107 | if (0) VG_(printf)("index = %d\n", (Int)indx); |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 108 | |
| 109 | tl_assert(takenW <= 1); |
sewardj | 620dd26 | 2007-06-05 20:48:54 +0000 | [diff] [blame] | 110 | predicted_taken = counters[ indx ] >= 2; |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 111 | actually_taken = takenW > 0; |
| 112 | |
| 113 | mispredict = (actually_taken && (!predicted_taken)) |
| 114 | || ((!actually_taken) && predicted_taken); |
| 115 | |
| 116 | shift_register <<= 1; |
| 117 | shift_register |= (actually_taken ? 1 : 0); |
| 118 | |
| 119 | if (actually_taken) { |
sewardj | 620dd26 | 2007-06-05 20:48:54 +0000 | [diff] [blame] | 120 | if (counters[indx] < 3) |
| 121 | counters[indx]++; |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 122 | } else { |
sewardj | 620dd26 | 2007-06-05 20:48:54 +0000 | [diff] [blame] | 123 | if (counters[indx] > 0) |
| 124 | counters[indx]--; |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 125 | } |
| 126 | |
sewardj | 620dd26 | 2007-06-05 20:48:54 +0000 | [diff] [blame] | 127 | tl_assert(counters[indx] <= 3); |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 128 | |
| 129 | return mispredict ? 1 : 0; |
| 130 | } |
| 131 | |
| 132 | |
| 133 | /* A very simple indirect branch predictor. Use the branch's address |
| 134 | to index a table which records the previous target address for this |
| 135 | branch (or whatever aliased with it) and use that as the |
| 136 | prediction. */ |
| 137 | #define N_BTAC_BITS 9 |
| 138 | #define N_BTAC (1 << N_BTAC_BITS) |
| 139 | static Addr btac[N_BTAC]; /* BTAC; presumably auto-zeroed */ |
| 140 | |
| 141 | static ULong do_ind_branch_predict ( Addr instr_addr, Addr actual ) |
| 142 | { |
| 143 | Bool mispredict; |
sewardj | 620dd26 | 2007-06-05 20:48:54 +0000 | [diff] [blame] | 144 | const UWord mask = (1 << N_BTAC_BITS) - 1; |
| 145 | UWord indx = (instr_addr >> N_IADDR_LO_ZERO_BITS) |
| 146 | & mask; |
| 147 | tl_assert(indx < N_BTAC); |
| 148 | mispredict = btac[indx] != actual; |
| 149 | btac[indx] = actual; |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 150 | return mispredict ? 1 : 0; |
| 151 | } |
| 152 | |
| 153 | |
| 154 | /*--------------------------------------------------------------------*/ |
| 155 | /*--- end cg_branchpred.c ---*/ |
| 156 | /*--------------------------------------------------------------------*/ |
| 157 | |