Gustavo Romero | a08082f | 2017-12-31 18:20:46 -0500 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2017, Gustavo Romero, IBM Corp. |
| 3 | * Licensed under GPLv2. |
| 4 | * |
| 5 | * Check if thread endianness is flipped inadvertently to BE on trap |
| 6 | * caught in TM whilst MSR.FP and MSR.VEC are zero (i.e. just after |
| 7 | * load_fp and load_vec overflowed). |
| 8 | * |
| 9 | * The issue can be checked on LE machines simply by zeroing load_fp |
| 10 | * and load_vec and then causing a trap in TM. Since the endianness |
| 11 | * changes to BE on return from the signal handler, 'nop' is |
| 12 | * thread as an illegal instruction in following sequence: |
| 13 | * tbegin. |
| 14 | * beq 1f |
| 15 | * trap |
| 16 | * tend. |
| 17 | * 1: nop |
| 18 | * |
| 19 | * However, although the issue is also present on BE machines, it's a |
| 20 | * bit trickier to check it on BE machines because MSR.LE bit is set |
| 21 | * to zero which determines a BE endianness that is the native |
| 22 | * endianness on BE machines, so nothing notably critical happens, |
| 23 | * i.e. no illegal instruction is observed immediately after returning |
| 24 | * from the signal handler (as it happens on LE machines). Thus to test |
| 25 | * it on BE machines LE endianness is forced after a first trap and then |
| 26 | * the endianness is verified on subsequent traps to determine if the |
| 27 | * endianness "flipped back" to the native endianness (BE). |
| 28 | */ |
| 29 | |
| 30 | #define _GNU_SOURCE |
| 31 | #include <error.h> |
| 32 | #include <stdio.h> |
| 33 | #include <stdlib.h> |
| 34 | #include <unistd.h> |
| 35 | #include <htmintrin.h> |
| 36 | #include <inttypes.h> |
| 37 | #include <pthread.h> |
| 38 | #include <sched.h> |
| 39 | #include <signal.h> |
| 40 | #include <stdbool.h> |
| 41 | |
| 42 | #include "tm.h" |
| 43 | #include "utils.h" |
| 44 | |
| 45 | #define pr_error(error_code, format, ...) \ |
| 46 | error_at_line(1, error_code, __FILE__, __LINE__, format, ##__VA_ARGS__) |
| 47 | |
| 48 | #define MSR_LE 1UL |
| 49 | #define LE 1UL |
| 50 | |
| 51 | pthread_t t0_ping; |
| 52 | pthread_t t1_pong; |
| 53 | |
| 54 | int exit_from_pong; |
| 55 | |
| 56 | int trap_event; |
| 57 | int le; |
| 58 | |
| 59 | bool success; |
| 60 | |
| 61 | void trap_signal_handler(int signo, siginfo_t *si, void *uc) |
| 62 | { |
| 63 | ucontext_t *ucp = uc; |
| 64 | uint64_t thread_endianness; |
| 65 | |
| 66 | /* Get thread endianness: extract bit LE from MSR */ |
| 67 | thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR]; |
| 68 | |
| 69 | /*** |
| 70 | * Little-Endian Machine |
| 71 | */ |
| 72 | |
| 73 | if (le) { |
| 74 | /* First trap event */ |
| 75 | if (trap_event == 0) { |
| 76 | /* Do nothing. Since it is returning from this trap |
| 77 | * event that endianness is flipped by the bug, so just |
| 78 | * let the process return from the signal handler and |
| 79 | * check on the second trap event if endianness is |
| 80 | * flipped or not. |
| 81 | */ |
| 82 | } |
| 83 | /* Second trap event */ |
| 84 | else if (trap_event == 1) { |
| 85 | /* |
| 86 | * Since trap was caught in TM on first trap event, if |
| 87 | * endianness was still LE (not flipped inadvertently) |
| 88 | * after returning from the signal handler instruction |
| 89 | * (1) is executed (basically a 'nop'), as it's located |
| 90 | * at address of tbegin. +4 (rollback addr). As (1) on |
| 91 | * LE endianness does in effect nothing, instruction (2) |
| 92 | * is then executed again as 'trap', generating a second |
| 93 | * trap event (note that in that case 'trap' is caught |
| 94 | * not in transacional mode). On te other hand, if after |
| 95 | * the return from the signal handler the endianness in- |
| 96 | * advertently flipped, instruction (1) is tread as a |
| 97 | * branch instruction, i.e. b .+8, hence instruction (3) |
| 98 | * and (4) are executed (tbegin.; trap;) and we get sim- |
| 99 | * ilaly on the trap signal handler, but now in TM mode. |
| 100 | * Either way, it's now possible to check the MSR LE bit |
| 101 | * once in the trap handler to verify if endianness was |
| 102 | * flipped or not after the return from the second trap |
| 103 | * event. If endianness is flipped, the bug is present. |
| 104 | * Finally, getting a trap in TM mode or not is just |
| 105 | * worth noting because it affects the math to determine |
| 106 | * the offset added to the NIP on return: the NIP for a |
| 107 | * trap caught in TM is the rollback address, i.e. the |
| 108 | * next instruction after 'tbegin.', whilst the NIP for |
| 109 | * a trap caught in non-transactional mode is the very |
| 110 | * same address of the 'trap' instruction that generated |
| 111 | * the trap event. |
| 112 | */ |
| 113 | |
| 114 | if (thread_endianness == LE) { |
| 115 | /* Go to 'success', i.e. instruction (6) */ |
| 116 | ucp->uc_mcontext.gp_regs[PT_NIP] += 16; |
| 117 | } else { |
| 118 | /* |
| 119 | * Thread endianness is BE, so it flipped |
| 120 | * inadvertently. Thus we flip back to LE and |
| 121 | * set NIP to go to 'failure', instruction (5). |
| 122 | */ |
| 123 | ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL; |
| 124 | ucp->uc_mcontext.gp_regs[PT_NIP] += 4; |
| 125 | } |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | /*** |
| 130 | * Big-Endian Machine |
| 131 | */ |
| 132 | |
| 133 | else { |
| 134 | /* First trap event */ |
| 135 | if (trap_event == 0) { |
| 136 | /* |
| 137 | * Force thread endianness to be LE. Instructions (1), |
| 138 | * (3), and (4) will be executed, generating a second |
| 139 | * trap in TM mode. |
| 140 | */ |
| 141 | ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL; |
| 142 | } |
| 143 | /* Second trap event */ |
| 144 | else if (trap_event == 1) { |
| 145 | /* |
| 146 | * Do nothing. If bug is present on return from this |
| 147 | * second trap event endianness will flip back "automat- |
| 148 | * ically" to BE, otherwise thread endianness will |
| 149 | * continue to be LE, just as it was set above. |
| 150 | */ |
| 151 | } |
| 152 | /* A third trap event */ |
| 153 | else { |
| 154 | /* |
| 155 | * Once here it means that after returning from the sec- |
| 156 | * ond trap event instruction (4) (trap) was executed |
| 157 | * as LE, generating a third trap event. In that case |
| 158 | * endianness is still LE as set on return from the |
| 159 | * first trap event, hence no bug. Otherwise, bug |
| 160 | * flipped back to BE on return from the second trap |
| 161 | * event and instruction (4) was executed as 'tdi' (so |
| 162 | * basically a 'nop') and branch to 'failure' in |
| 163 | * instruction (5) was taken to indicate failure and we |
| 164 | * never get here. |
| 165 | */ |
| 166 | |
| 167 | /* |
| 168 | * Flip back to BE and go to instruction (6), i.e. go to |
| 169 | * 'success'. |
| 170 | */ |
| 171 | ucp->uc_mcontext.gp_regs[PT_MSR] &= ~1UL; |
| 172 | ucp->uc_mcontext.gp_regs[PT_NIP] += 8; |
| 173 | } |
| 174 | } |
| 175 | |
| 176 | trap_event++; |
| 177 | } |
| 178 | |
| 179 | void usr1_signal_handler(int signo, siginfo_t *si, void *not_used) |
| 180 | { |
| 181 | /* Got a USR1 signal from ping(), so just tell pong() to exit */ |
| 182 | exit_from_pong = 1; |
| 183 | } |
| 184 | |
| 185 | void *ping(void *not_used) |
| 186 | { |
| 187 | uint64_t i; |
| 188 | |
| 189 | trap_event = 0; |
| 190 | |
| 191 | /* |
| 192 | * Wait an amount of context switches so load_fp and load_vec overflows |
| 193 | * and MSR_[FP|VEC|V] is 0. |
| 194 | */ |
| 195 | for (i = 0; i < 1024*1024*512; i++) |
| 196 | ; |
| 197 | |
| 198 | asm goto( |
| 199 | /* |
| 200 | * [NA] means "Native Endianness", i.e. it tells how a |
| 201 | * instruction is executed on machine's native endianness (in |
| 202 | * other words, native endianness matches kernel endianness). |
| 203 | * [OP] means "Opposite Endianness", i.e. on a BE machine, it |
| 204 | * tells how a instruction is executed as a LE instruction; con- |
| 205 | * versely, on a LE machine, it tells how a instruction is |
| 206 | * executed as a BE instruction. When [NA] is omitted, it means |
| 207 | * that the native interpretation of a given instruction is not |
| 208 | * relevant for the test. Likewise when [OP] is omitted. |
| 209 | */ |
| 210 | |
| 211 | " tbegin. ;" /* (0) tbegin. [NA] */ |
| 212 | " tdi 0, 0, 0x48;" /* (1) nop [NA]; b (3) [OP] */ |
| 213 | " trap ;" /* (2) trap [NA] */ |
| 214 | ".long 0x1D05007C;" /* (3) tbegin. [OP] */ |
| 215 | ".long 0x0800E07F;" /* (4) trap [OP]; nop [NA] */ |
| 216 | " b %l[failure] ;" /* (5) b [NA]; MSR.LE flipped (bug) */ |
| 217 | " b %l[success] ;" /* (6) b [NA]; MSR.LE did not flip (ok)*/ |
| 218 | |
| 219 | : : : : failure, success); |
| 220 | |
| 221 | failure: |
| 222 | success = false; |
| 223 | goto exit_from_ping; |
| 224 | |
| 225 | success: |
| 226 | success = true; |
| 227 | |
| 228 | exit_from_ping: |
| 229 | /* Tell pong() to exit before leaving */ |
| 230 | pthread_kill(t1_pong, SIGUSR1); |
| 231 | return NULL; |
| 232 | } |
| 233 | |
| 234 | void *pong(void *not_used) |
| 235 | { |
| 236 | while (!exit_from_pong) |
| 237 | /* |
| 238 | * Induce context switches on ping() thread |
| 239 | * until ping() finishes its job and signs |
| 240 | * to exit from this loop. |
| 241 | */ |
| 242 | sched_yield(); |
| 243 | |
| 244 | return NULL; |
| 245 | } |
| 246 | |
| 247 | int tm_trap_test(void) |
| 248 | { |
| 249 | uint16_t k = 1; |
| 250 | |
| 251 | int rc; |
| 252 | |
| 253 | pthread_attr_t attr; |
| 254 | cpu_set_t cpuset; |
| 255 | |
| 256 | struct sigaction trap_sa; |
| 257 | |
Michael Ellerman | 192b2e7 | 2018-02-26 13:17:07 +1100 | [diff] [blame] | 258 | SKIP_IF(!have_htm()); |
| 259 | |
Gustavo Romero | a08082f | 2017-12-31 18:20:46 -0500 | [diff] [blame] | 260 | trap_sa.sa_flags = SA_SIGINFO; |
| 261 | trap_sa.sa_sigaction = trap_signal_handler; |
| 262 | sigaction(SIGTRAP, &trap_sa, NULL); |
| 263 | |
| 264 | struct sigaction usr1_sa; |
| 265 | |
| 266 | usr1_sa.sa_flags = SA_SIGINFO; |
| 267 | usr1_sa.sa_sigaction = usr1_signal_handler; |
| 268 | sigaction(SIGUSR1, &usr1_sa, NULL); |
| 269 | |
| 270 | /* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */ |
| 271 | CPU_ZERO(&cpuset); |
| 272 | CPU_SET(0, &cpuset); |
| 273 | |
| 274 | /* Init pthread attribute */ |
| 275 | rc = pthread_attr_init(&attr); |
| 276 | if (rc) |
| 277 | pr_error(rc, "pthread_attr_init()"); |
| 278 | |
| 279 | /* |
| 280 | * Bind thread ping() and pong() both to CPU 0 so they ping-pong and |
| 281 | * speed up context switches on ping() thread, speeding up the load_fp |
| 282 | * and load_vec overflow. |
| 283 | */ |
| 284 | rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); |
| 285 | if (rc) |
| 286 | pr_error(rc, "pthread_attr_setaffinity()"); |
| 287 | |
| 288 | /* Figure out the machine endianness */ |
| 289 | le = (int) *(uint8_t *)&k; |
| 290 | |
| 291 | printf("%s machine detected. Checking if endianness flips %s", |
| 292 | le ? "Little-Endian" : "Big-Endian", |
| 293 | "inadvertently on trap in TM... "); |
| 294 | |
| 295 | rc = fflush(0); |
| 296 | if (rc) |
| 297 | pr_error(rc, "fflush()"); |
| 298 | |
| 299 | /* Launch ping() */ |
| 300 | rc = pthread_create(&t0_ping, &attr, ping, NULL); |
| 301 | if (rc) |
| 302 | pr_error(rc, "pthread_create()"); |
| 303 | |
| 304 | exit_from_pong = 0; |
| 305 | |
| 306 | /* Launch pong() */ |
| 307 | rc = pthread_create(&t1_pong, &attr, pong, NULL); |
| 308 | if (rc) |
| 309 | pr_error(rc, "pthread_create()"); |
| 310 | |
| 311 | rc = pthread_join(t0_ping, NULL); |
| 312 | if (rc) |
| 313 | pr_error(rc, "pthread_join()"); |
| 314 | |
| 315 | rc = pthread_join(t1_pong, NULL); |
| 316 | if (rc) |
| 317 | pr_error(rc, "pthread_join()"); |
| 318 | |
| 319 | if (success) { |
| 320 | printf("no.\n"); /* no, endianness did not flip inadvertently */ |
| 321 | return EXIT_SUCCESS; |
| 322 | } |
| 323 | |
| 324 | printf("yes!\n"); /* yes, endianness did flip inadvertently */ |
| 325 | return EXIT_FAILURE; |
| 326 | } |
| 327 | |
| 328 | int main(int argc, char **argv) |
| 329 | { |
| 330 | return test_harness(tm_trap_test, "tm_trap_test"); |
| 331 | } |