sewardj | 41863f8 | 2015-08-12 11:35:27 +0000 | [diff] [blame] | 1 | |
| 2 | #include <stdio.h> |
| 3 | #include <stdlib.h> |
| 4 | #include <assert.h> |
| 5 | #include "tests/asm.h" |
| 6 | #include "tests/malloc.h" |
| 7 | #include <string.h> |
| 8 | |
| 9 | #define XSAVE_AREA_SIZE 832 |
| 10 | |
| 11 | typedef unsigned char UChar; |
| 12 | typedef unsigned int UInt; |
| 13 | typedef unsigned long long int ULong; |
| 14 | |
| 15 | typedef unsigned long int UWord; |
| 16 | |
| 17 | typedef unsigned char Bool; |
| 18 | #define True ((Bool)1) |
| 19 | #define False ((Bool)0) |
| 20 | |
| 21 | const unsigned int vec0[8] |
| 22 | = { 0x12345678, 0x11223344, 0x55667788, 0x87654321, |
| 23 | 0x15263748, 0x91929394, 0x19293949, 0x48372615 }; |
| 24 | |
| 25 | const unsigned int vec1[8] |
| 26 | = { 0xABCDEF01, 0xAABBCCDD, 0xEEFF0011, 0x10FEDCBA, |
| 27 | 0xBADCFE10, 0xFFEE9988, 0x11667722, 0x01EFCDAB }; |
| 28 | |
| 29 | const unsigned int vecZ[8] |
| 30 | = { 0, 0, 0, 0, 0, 0, 0, 0 }; |
| 31 | |
| 32 | /* A version of memset that doesn't use XMM or YMM registers. */ |
| 33 | static __attribute__((noinline)) |
| 34 | void* my_memset(void* s, int c, size_t n) |
| 35 | { |
| 36 | size_t i; |
| 37 | for (i = 0; i < n; i++) { |
| 38 | ((unsigned char*)s)[i] = (unsigned char)(unsigned int)c; |
| 39 | /* Defeat any attempt at autovectorisation */ |
| 40 | __asm__ __volatile__("" ::: "cc","memory"); |
| 41 | } |
| 42 | return s; |
| 43 | } |
| 44 | |
| 45 | /* Ditto for memcpy */ |
| 46 | static __attribute__((noinline)) |
| 47 | void* my_memcpy(void *dest, const void *src, size_t n) |
| 48 | { |
| 49 | size_t i; |
| 50 | for (i = 0; i < n; i++) { |
| 51 | ((unsigned char*)dest)[i] = ((unsigned char*)src)[i]; |
| 52 | __asm__ __volatile__("" ::: "cc","memory"); |
| 53 | } |
| 54 | return dest; |
| 55 | } |
| 56 | |
rhyskidd | 07d0c9e | 2015-08-15 11:16:35 +0000 | [diff] [blame^] | 57 | static void* memalign_zeroed64(size_t size) |
sewardj | 41863f8 | 2015-08-12 11:35:27 +0000 | [diff] [blame] | 58 | { |
rhyskidd | 07d0c9e | 2015-08-15 11:16:35 +0000 | [diff] [blame^] | 59 | char* p = memalign64(size); |
sewardj | 41863f8 | 2015-08-12 11:35:27 +0000 | [diff] [blame] | 60 | if (p && size > 0) { |
| 61 | my_memset(p, 0, size); |
| 62 | } |
| 63 | return p; |
| 64 | } |
| 65 | |
| 66 | __attribute__((noinline)) |
| 67 | static void do_xsave ( void* p, UInt rfbm ) |
| 68 | { |
| 69 | assert(rfbm <= 7); |
| 70 | __asm__ __volatile__( |
| 71 | "movq %0, %%rax; xorq %%rdx, %%rdx; xsave (%1)" |
| 72 | : /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p) |
| 73 | : /*TRASH*/ "memory", "rax", "rdx" |
| 74 | ); |
| 75 | } |
| 76 | |
| 77 | __attribute__((noinline)) |
| 78 | static void do_xrstor ( void* p, UInt rfbm ) |
| 79 | { |
| 80 | assert(rfbm <= 7); |
| 81 | __asm__ __volatile__( |
| 82 | "movq %0, %%rax; xorq %%rdx, %%rdx; xrstor (%1)" |
| 83 | : /*OUT*/ : /*IN*/ "r"((ULong)rfbm), "r"(p) |
| 84 | : /*TRASH*/ "rax", "rdx" /* FIXME plus all X87,SSE,AVX regs */ |
| 85 | ); |
| 86 | } |
| 87 | |
| 88 | /* set up the FP, SSE and AVX state, and then dump it. */ |
| 89 | static void do_setup_then_xsave ( void* p, UInt rfbm ) |
| 90 | { |
| 91 | __asm__ __volatile__("finit"); |
| 92 | __asm__ __volatile__("fldpi"); |
| 93 | __asm__ __volatile__("fld1"); |
| 94 | __asm__ __volatile__("fldln2"); |
| 95 | __asm__ __volatile__("fldlg2"); |
| 96 | __asm__ __volatile__("fld %st(3)"); |
| 97 | __asm__ __volatile__("fld %st(3)"); |
| 98 | __asm__ __volatile__("fld1"); |
florian | 40fc6b2 | 2015-08-12 13:24:33 +0000 | [diff] [blame] | 99 | __asm__ __volatile__("vmovups (%0), %%ymm0" : : "r"(&vec0[0]) : "xmm0" ); |
| 100 | __asm__ __volatile__("vmovups (%0), %%ymm1" : : "r"(&vec1[0]) : "xmm1" ); |
sewardj | 41863f8 | 2015-08-12 11:35:27 +0000 | [diff] [blame] | 101 | __asm__ __volatile__("vxorps %ymm2, %ymm2, %ymm2"); |
| 102 | __asm__ __volatile__("vmovaps %ymm0, %ymm3"); |
| 103 | __asm__ __volatile__("vmovaps %ymm1, %ymm4"); |
| 104 | __asm__ __volatile__("vmovaps %ymm2, %ymm5"); |
| 105 | __asm__ __volatile__("vmovaps %ymm0, %ymm6"); |
| 106 | __asm__ __volatile__("vmovaps %ymm1, %ymm7"); |
| 107 | __asm__ __volatile__("vmovaps %ymm1, %ymm8"); |
| 108 | __asm__ __volatile__("vmovaps %ymm2, %ymm9"); |
| 109 | __asm__ __volatile__("vmovaps %ymm0, %ymm10"); |
| 110 | __asm__ __volatile__("vmovaps %ymm1, %ymm11"); |
| 111 | __asm__ __volatile__("vmovaps %ymm1, %ymm12"); |
| 112 | __asm__ __volatile__("vmovaps %ymm2, %ymm13"); |
| 113 | __asm__ __volatile__("vmovaps %ymm0, %ymm14"); |
| 114 | __asm__ __volatile__("vmovaps %ymm1, %ymm15"); |
| 115 | do_xsave(p, rfbm); |
| 116 | } |
| 117 | |
| 118 | static int isFPLsbs ( int i ) |
| 119 | { |
| 120 | int q; |
| 121 | q = 32; if (i == q || i == q+1) return 1; |
| 122 | q = 48; if (i == q || i == q+1) return 1; |
| 123 | q = 64; if (i == q || i == q+1) return 1; |
| 124 | q = 80; if (i == q || i == q+1) return 1; |
| 125 | q = 96; if (i == q || i == q+1) return 1; |
| 126 | q = 112; if (i == q || i == q+1) return 1; |
| 127 | q = 128; if (i == q || i == q+1) return 1; |
| 128 | q = 144; if (i == q || i == q+1) return 1; |
| 129 | return 0; |
| 130 | } |
| 131 | |
| 132 | static void show ( unsigned char* buf, Bool hideBits64to79 ) |
| 133 | { |
| 134 | int i; |
| 135 | for (i = 0; i < XSAVE_AREA_SIZE; i++) { |
| 136 | if ((i % 16) == 0) |
| 137 | fprintf(stderr, "%3d ", i); |
| 138 | if (hideBits64to79 && isFPLsbs(i)) |
| 139 | fprintf(stderr, "xx "); |
| 140 | else |
| 141 | fprintf(stderr, "%02x ", buf[i]); |
| 142 | if (i > 0 && ((i % 16) == 15)) |
| 143 | fprintf(stderr, "\n"); |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | static void cpuid ( UInt* eax, UInt* ebx, UInt* ecx, UInt* edx, |
| 148 | UInt index, UInt ecx_in ) |
| 149 | { |
| 150 | UInt a,b,c,d; |
| 151 | asm volatile ("cpuid" |
| 152 | : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ |
| 153 | : "0" (index), "2"(ecx_in) ); |
| 154 | *eax = a; *ebx = b; *ecx = c; *edx = d; |
| 155 | //fprintf(stderr, "%08x %08x -> %08x %08x %08x %08x\n", |
| 156 | // index,ecx_in, a,b,c,d ); |
| 157 | } |
| 158 | |
| 159 | static void xgetbv ( UInt* eax, UInt* edx, UInt ecx_in ) |
| 160 | { |
| 161 | UInt a,d; |
| 162 | asm volatile ("xgetbv" |
| 163 | : "=a" (a), "=d" (d) \ |
| 164 | : "c"(ecx_in) ); |
| 165 | *eax = a; *edx = d; |
| 166 | } |
| 167 | |
| 168 | static void check_for_xsave ( void ) |
| 169 | { |
| 170 | UInt eax, ebx, ecx, edx; |
| 171 | Bool ok = True; |
| 172 | |
| 173 | eax = ebx = ecx = edx = 0; |
| 174 | cpuid(&eax, &ebx, &ecx, &edx, 1,0); |
| 175 | //fprintf(stderr, "cpuid(1).ecx[26=xsave] = %u\n", (ecx >> 26) & 1); |
| 176 | ok = ok && (((ecx >> 26) & 1) == 1); |
| 177 | |
| 178 | eax = ebx = ecx = edx = 0; |
| 179 | cpuid(&eax, &ebx, &ecx, &edx, 1,0); |
| 180 | //fprintf(stderr, "cpuid(1).ecx[27=osxsave] = %u\n", (ecx >> 27) & 1); |
| 181 | ok = ok && (((ecx >> 27) & 1) == 1); |
| 182 | |
| 183 | eax = ebx = ecx = edx = 0; |
| 184 | xgetbv(&eax, &edx, 0); |
| 185 | //fprintf(stderr, "xgetbv(0) = %u:%u\n", edx, eax); |
| 186 | ok = ok && (edx == 0) && (eax == 7); |
| 187 | |
| 188 | if (ok) return; |
| 189 | |
| 190 | fprintf(stderr, |
| 191 | "This program must be run on a CPU that supports AVX and XSAVE.\n"); |
| 192 | exit(1); |
| 193 | } |
| 194 | |
| 195 | |
| 196 | void test_xsave ( Bool hideBits64to79 ) |
| 197 | { |
| 198 | /* Testing XSAVE: |
| 199 | |
| 200 | For RBFM in 0 .. 7 (that is, all combinations): set the x87, SSE |
| 201 | and AVX registers with some values, do XSAVE to dump it, and |
| 202 | print the resulting buffer. */ |
| 203 | |
| 204 | UInt rfbm; |
| 205 | for (rfbm = 0; rfbm <= 7; rfbm++) { |
rhyskidd | 07d0c9e | 2015-08-15 11:16:35 +0000 | [diff] [blame^] | 206 | UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE); |
sewardj | 41863f8 | 2015-08-12 11:35:27 +0000 | [diff] [blame] | 207 | |
| 208 | my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE); |
| 209 | saved_img[512] = 0; |
| 210 | do_setup_then_xsave(saved_img, rfbm); |
| 211 | |
| 212 | fprintf(stderr, |
| 213 | "------------------ XSAVE, rfbm = %u ------------------\n", rfbm); |
| 214 | show(saved_img, hideBits64to79); |
| 215 | fprintf(stderr, "\n"); |
| 216 | |
| 217 | free(saved_img); |
| 218 | } |
| 219 | } |
| 220 | |
| 221 | |
| 222 | void test_xrstor ( Bool hideBits64to79 ) |
| 223 | { |
| 224 | /* Testing XRSTOR is more complex than testing XSAVE, because the |
| 225 | loaded value(s) depend not only on what bits are requested (by |
| 226 | RBFM) but also on what bits are actually present in the image |
| 227 | (defined by XSTATE_BV). So we have to test all 64 (8 x 8) |
| 228 | combinations. |
| 229 | |
| 230 | The approach is to fill a memory buffer with data, do XRSTOR |
| 231 | from the buffer, them dump all components with XSAVE in a new |
| 232 | buffer, and print the result. This is complicated by the fact |
| 233 | that we need to be able to see which parts of the state (in |
| 234 | registers) are neither overwritten nor zeroed by the restore. |
| 235 | Hence the registers must be pre-filled with values which are |
| 236 | neither zero nor the data to be loaded. We choose to use 0x55 |
| 237 | where possible. */ |
| 238 | |
rhyskidd | 07d0c9e | 2015-08-15 11:16:35 +0000 | [diff] [blame^] | 239 | UChar* fives = memalign_zeroed64(XSAVE_AREA_SIZE); |
sewardj | 41863f8 | 2015-08-12 11:35:27 +0000 | [diff] [blame] | 240 | my_memset(fives, 0x55, XSAVE_AREA_SIZE); |
| 241 | /* Set MXCSR so that the insn doesn't fault */ |
| 242 | fives[24] = 0x80; |
| 243 | fives[25] = 0x1f; |
| 244 | fives[26] = 0; |
| 245 | fives[27] = 0; |
| 246 | /* Ditto for the XSAVE header area. Also set XSTATE_BV. */ |
| 247 | fives[512] = 7; |
| 248 | UInt i; |
| 249 | for (i = 1; i <= 23; i++) fives[512+i] = 0; |
| 250 | /* Fill the x87 register values with something that VEX's |
| 251 | 80-vs-64-bit kludging won't mess up -- an 80 bit number which is |
| 252 | representable also as 64 bit: 123456789.0123 */ |
| 253 | for (i = 0; i <= 7; i++) { |
| 254 | UChar* p = &fives[32 + 16 * i]; |
| 255 | p[0]=0x00; p[1]=0xf8; p[2]=0xc2; p[3]=0x64; p[4]=0xa0; |
| 256 | p[5]=0xa2; p[6]=0x79; p[7]=0xeb; p[8]=0x19; p[9]=0x40; |
| 257 | } |
| 258 | /* And mark the tags for all 8 dumped regs as "valid". */ |
| 259 | fives[4/*FTW*/] = 0xFF; |
| 260 | |
| 261 | /* (1) (see comment in loop below) */ |
rhyskidd | 07d0c9e | 2015-08-15 11:16:35 +0000 | [diff] [blame^] | 262 | UChar* standard_test_data = memalign_zeroed64(XSAVE_AREA_SIZE); |
sewardj | 41863f8 | 2015-08-12 11:35:27 +0000 | [diff] [blame] | 263 | do_setup_then_xsave(standard_test_data, 7); |
| 264 | |
| 265 | UInt xstate_bv, rfbm; |
| 266 | for (xstate_bv = 0; xstate_bv <= 7; xstate_bv++) { |
| 267 | for (rfbm = 0; rfbm <= 7; rfbm++) { |
| 268 | //{ xstate_bv = 7; |
| 269 | // { rfbm = 6; |
| 270 | /* 1. Copy the "standard test data" into registers, and dump |
| 271 | it with XSAVE. This gives us an image we can try |
| 272 | restoring from. |
| 273 | |
| 274 | 2. Set the register state to all-0x55s (as far as is |
| 275 | possible), so we can see which parts get overwritten |
| 276 | and which parts get zeroed on the test restore. |
| 277 | |
| 278 | 3. Do the restore from the image prepared in (1). |
| 279 | |
| 280 | 4. Dump the state with XSAVE and print it. |
| 281 | */ |
| 282 | |
| 283 | /* (3a). We can't use |standard_test_data| directly, since we |
| 284 | need to put in the required |xstate_bv| value. So make a |
| 285 | copy and modify that instead. */ |
rhyskidd | 07d0c9e | 2015-08-15 11:16:35 +0000 | [diff] [blame^] | 286 | UChar* img_to_restore_from = memalign_zeroed64(XSAVE_AREA_SIZE); |
sewardj | 41863f8 | 2015-08-12 11:35:27 +0000 | [diff] [blame] | 287 | my_memcpy(img_to_restore_from, standard_test_data, XSAVE_AREA_SIZE); |
| 288 | img_to_restore_from[512] = xstate_bv; |
| 289 | |
| 290 | /* (4a) */ |
rhyskidd | 07d0c9e | 2015-08-15 11:16:35 +0000 | [diff] [blame^] | 291 | UChar* saved_img = memalign_zeroed64(XSAVE_AREA_SIZE); |
sewardj | 41863f8 | 2015-08-12 11:35:27 +0000 | [diff] [blame] | 292 | my_memset(saved_img, 0xAA, XSAVE_AREA_SIZE); |
| 293 | saved_img[512] = 0; |
| 294 | |
| 295 | /* (2) */ |
| 296 | do_xrstor(fives, 7); |
| 297 | |
| 298 | // X87, SSE, AVX state LIVE |
| 299 | |
| 300 | /* (3b) */ |
| 301 | /* and this is what we're actually trying to test */ |
| 302 | do_xrstor(img_to_restore_from, rfbm); |
| 303 | |
| 304 | // X87, SSE, AVX state LIVE |
| 305 | |
| 306 | /* (4b) */ |
| 307 | do_xsave(saved_img, 7); |
| 308 | |
| 309 | fprintf(stderr, |
| 310 | "---------- XRSTOR, xstate_bv = %u, rfbm = %u ---------\n", |
| 311 | xstate_bv, rfbm); |
| 312 | show(saved_img, hideBits64to79); |
| 313 | fprintf(stderr, "\n"); |
| 314 | |
| 315 | free(saved_img); |
| 316 | free(img_to_restore_from); |
| 317 | } |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | |
| 322 | int main ( int argc, char** argv ) |
| 323 | { |
| 324 | Bool hideBits64to79 = argc > 1; |
| 325 | fprintf(stderr, "Re-run with any arg to suppress least-significant\n" |
| 326 | " 16 bits of 80-bit FP numbers\n"); |
| 327 | |
| 328 | check_for_xsave(); |
| 329 | |
| 330 | if (1) |
| 331 | test_xsave(hideBits64to79); |
| 332 | |
| 333 | if (1) |
| 334 | test_xrstor(hideBits64to79); |
| 335 | |
| 336 | return 0; |
| 337 | } |