sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 1 | |
| 2 | /*--------------------------------------------------------------------*/ |
| 3 | /*--- Branch predictor simulation cg_branchpred.c ---*/ |
| 4 | /*--------------------------------------------------------------------*/ |
| 5 | |
| 6 | /* |
| 7 | This file is part of Cachegrind, a Valgrind tool for cache |
| 8 | profiling programs. |
| 9 | |
njn | 9f20746 | 2009-03-10 22:02:09 +0000 | [diff] [blame] | 10 | Copyright (C) 2002-2009 Nicholas Nethercote |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 11 | njn@valgrind.org |
| 12 | |
| 13 | This program is free software; you can redistribute it and/or |
| 14 | modify it under the terms of the GNU General Public License as |
| 15 | published by the Free Software Foundation; either version 2 of the |
| 16 | License, or (at your option) any later version. |
| 17 | |
| 18 | This program is distributed in the hope that it will be useful, but |
| 19 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 21 | General Public License for more details. |
| 22 | |
| 23 | You should have received a copy of the GNU General Public License |
| 24 | along with this program; if not, write to the Free Software |
| 25 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA |
| 26 | 02111-1307, USA. |
| 27 | |
| 28 | The GNU General Public License is contained in the file COPYING. |
| 29 | */ |
| 30 | |
| 31 | |
| 32 | /* This file contains the actual branch predictor simulator and its |
| 33 | associated state. As with cg_sim.c it is #included directly into |
| 34 | cg_main.c. It provides: |
| 35 | |
| 36 | - a taken/not-taken predictor for conditional branches |
| 37 | - a branch target address predictor for indirect branches |
| 38 | |
| 39 | Function return-address prediction is not modelled, on the basis |
| 40 | that return stack predictors almost always predict correctly, and |
| 41 | also that it is difficult for Valgrind to robustly identify |
| 42 | function calls and returns. |
| 43 | */ |
| 44 | |
| 45 | /* How many bits at the bottom of an instruction address are |
| 46 | guaranteed to be zero? */ |
| 47 | #if defined(VGA_ppc32) || defined(VGA_ppc64) |
| 48 | # define N_IADDR_LO_ZERO_BITS 2 |
| 49 | #elif defined(VGA_x86) || defined(VGA_amd64) |
| 50 | # define N_IADDR_LO_ZERO_BITS 0 |
| 51 | #else |
| 52 | # error "Unsupported architecture" |
| 53 | #endif |
| 54 | |
| 55 | |
| 56 | /* Get a taken/not-taken prediction for the instruction (presumably a |
| 57 | conditional branch) at instr_addr. Once that's done, update the |
| 58 | predictor state based on whether or not it was actually taken, as |
| 59 | indicated by 'taken'. Finally, return 1 for a mispredict and 0 for |
| 60 | a successful predict. |
| 61 | |
| 62 | The predictor is an array of 16k (== 2^14) 2-bit saturating |
| 63 | counters. Given the address of the branch instruction, the array |
| 64 | index to use is computed both from the low order bits of the branch |
| 65 | instruction's address, and the global history - that is, from the |
| 66 | taken/not-taken behaviour of the most recent few branches. This |
| 67 | makes the predictor able to correlate this branch's behaviour with |
| 68 | that of other branches. |
| 69 | |
| 70 | TODO: use predictor written by someone who understands this stuff. |
| 71 | Perhaps it would be better to move to a standard GShare predictor |
| 72 | and/or tournament predictor. |
| 73 | */ |
| 74 | /* The index is composed of N_HIST bits at the top and N_IADD bits at |
| 75 | the bottom. These numbers chosen somewhat arbitrarily, but note |
| 76 | that making N_IADD_BITS too small (eg 4) can cause large amounts of |
| 77 | aliasing, and hence misprediction, particularly if the history bits |
| 78 | are mostly unchanging. */ |
| 79 | #define N_HIST_BITS 7 |
| 80 | #define N_IADD_BITS 7 |
| 81 | |
| 82 | #define N_BITS (N_HIST_BITS + N_IADD_BITS) |
| 83 | #define N_COUNTERS (1 << N_BITS) |
| 84 | |
| 85 | static UWord shift_register = 0; /* Contains global history */ |
| 86 | static UChar counters[N_COUNTERS]; /* Counter array; presumably auto-zeroed */ |
| 87 | |
| 88 | |
| 89 | static ULong do_cond_branch_predict ( Addr instr_addr, Word takenW ) |
| 90 | { |
sewardj | 620dd26 | 2007-06-05 20:48:54 +0000 | [diff] [blame] | 91 | UWord indx; |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 92 | Bool predicted_taken, actually_taken, mispredict; |
| 93 | |
| 94 | const UWord hist_mask = (1 << N_HIST_BITS) - 1; |
| 95 | const UWord iadd_mask = (1 << N_IADD_BITS) - 1; |
| 96 | UWord hist_bits = shift_register & hist_mask; |
| 97 | UWord iadd_bits = (instr_addr >> N_IADDR_LO_ZERO_BITS) |
| 98 | & iadd_mask; |
| 99 | |
| 100 | tl_assert(hist_bits <= hist_mask); |
| 101 | tl_assert(iadd_bits <= iadd_mask); |
sewardj | 620dd26 | 2007-06-05 20:48:54 +0000 | [diff] [blame] | 102 | indx = (hist_bits << N_IADD_BITS) | iadd_bits; |
| 103 | tl_assert(indx < N_COUNTERS); |
| 104 | if (0) VG_(printf)("index = %d\n", (Int)indx); |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 105 | |
| 106 | tl_assert(takenW <= 1); |
sewardj | 620dd26 | 2007-06-05 20:48:54 +0000 | [diff] [blame] | 107 | predicted_taken = counters[ indx ] >= 2; |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 108 | actually_taken = takenW > 0; |
| 109 | |
| 110 | mispredict = (actually_taken && (!predicted_taken)) |
| 111 | || ((!actually_taken) && predicted_taken); |
| 112 | |
| 113 | shift_register <<= 1; |
| 114 | shift_register |= (actually_taken ? 1 : 0); |
| 115 | |
| 116 | if (actually_taken) { |
sewardj | 620dd26 | 2007-06-05 20:48:54 +0000 | [diff] [blame] | 117 | if (counters[indx] < 3) |
| 118 | counters[indx]++; |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 119 | } else { |
sewardj | 620dd26 | 2007-06-05 20:48:54 +0000 | [diff] [blame] | 120 | if (counters[indx] > 0) |
| 121 | counters[indx]--; |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 122 | } |
| 123 | |
sewardj | 620dd26 | 2007-06-05 20:48:54 +0000 | [diff] [blame] | 124 | tl_assert(counters[indx] <= 3); |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 125 | |
| 126 | return mispredict ? 1 : 0; |
| 127 | } |
| 128 | |
| 129 | |
| 130 | /* A very simple indirect branch predictor. Use the branch's address |
| 131 | to index a table which records the previous target address for this |
| 132 | branch (or whatever aliased with it) and use that as the |
| 133 | prediction. */ |
| 134 | #define N_BTAC_BITS 9 |
| 135 | #define N_BTAC (1 << N_BTAC_BITS) |
| 136 | static Addr btac[N_BTAC]; /* BTAC; presumably auto-zeroed */ |
| 137 | |
| 138 | static ULong do_ind_branch_predict ( Addr instr_addr, Addr actual ) |
| 139 | { |
| 140 | Bool mispredict; |
sewardj | 620dd26 | 2007-06-05 20:48:54 +0000 | [diff] [blame] | 141 | const UWord mask = (1 << N_BTAC_BITS) - 1; |
| 142 | UWord indx = (instr_addr >> N_IADDR_LO_ZERO_BITS) |
| 143 | & mask; |
| 144 | tl_assert(indx < N_BTAC); |
| 145 | mispredict = btac[indx] != actual; |
| 146 | btac[indx] = actual; |
sewardj | 8badbaa | 2007-05-08 09:20:25 +0000 | [diff] [blame] | 147 | return mispredict ? 1 : 0; |
| 148 | } |
| 149 | |
| 150 | |
| 151 | /*--------------------------------------------------------------------*/ |
| 152 | /*--- end cg_branchpred.c ---*/ |
| 153 | /*--------------------------------------------------------------------*/ |
| 154 | |