| |
| /* Contrary to what the next comment says, this is now an amd64 CPU |
| test. */ |
| |
| /* |
| * x86 CPU test |
| * |
| * Copyright (c) 2003 Fabrice Bellard |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2 of the License, or |
| * (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, write to the Free Software |
| * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
| */ |
| |
| #include <stdlib.h> |
| #include <stdio.h> |
| #include <string.h> |
| #include <inttypes.h> |
| #include <math.h> |
| #include <stdarg.h> |
| #include <assert.h> |
| |
| |
| ////////////////////////////////////////////////////////////////// |
| ////////////////////////////////////////////////////////////////// |
| |
| /* |
| * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. |
| * MD5 Message-Digest Algorithm (RFC 1321). |
| * |
| * Homepage: |
| * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 |
| * |
| * Author: |
| * Alexander Peslyak, better known as Solar Designer <solar at openwall.com> |
| * |
| * This software was written by Alexander Peslyak in 2001. No copyright is |
| * claimed, and the software is hereby placed in the public domain. |
| * In case this attempt to disclaim copyright and place the software in the |
| * public domain is deemed null and void, then the software is |
| * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the |
| * general public under the following terms: |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted. |
| * |
| * There's ABSOLUTELY NO WARRANTY, express or implied. |
| * |
| * (This is a heavily cut-down "BSD license".) |
| * |
| * This differs from Colin Plumb's older public domain implementation in that |
| * no exactly 32-bit integer data type is required (any 32-bit or wider |
| * unsigned integer data type will do), there's no compile-time endianness |
| * configuration, and the function prototypes match OpenSSL's. No code from |
| * Colin Plumb's implementation has been reused; this comment merely compares |
| * the properties of the two independent implementations. |
| * |
| * The primary goals of this implementation are portability and ease of use. |
| * It is meant to be fast, but not as fast as possible. Some known |
| * optimizations are not included to reduce source code size and avoid |
| * compile-time configuration. |
| */ |
| |
| #include <string.h> |
| |
| // BEGIN #include "md5.h" |
| /* Any 32-bit or wider unsigned integer data type will do */ |
| typedef unsigned int MD5_u32plus; |
| |
| typedef struct { |
| MD5_u32plus lo, hi; |
| MD5_u32plus a, b, c, d; |
| unsigned char buffer[64]; |
| MD5_u32plus block[16]; |
| } MD5_CTX; |
| |
| void MD5_Init(MD5_CTX *ctx); |
| void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size); |
| void MD5_Final(unsigned char *result, MD5_CTX *ctx); |
| // END #include "md5.h" |
| |
| /* |
| * The basic MD5 functions. |
| * |
| * F and G are optimized compared to their RFC 1321 definitions for |
| * architectures that lack an AND-NOT instruction, just like in Colin Plumb's |
| * implementation. |
| */ |
| #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) |
| #define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y)))) |
| #define H(x, y, z) (((x) ^ (y)) ^ (z)) |
| #define H2(x, y, z) ((x) ^ ((y) ^ (z))) |
| #define I(x, y, z) ((y) ^ ((x) | ~(z))) |
| |
| /* |
| * The MD5 transformation for all four rounds. |
| */ |
| #define STEP(f, a, b, c, d, x, t, s) \ |
| (a) += f((b), (c), (d)) + (x) + (t); \ |
| (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \ |
| (a) += (b); |
| |
| /* |
| * SET reads 4 input bytes in little-endian byte order and stores them in a |
| * properly aligned word in host byte order. |
| * |
| * The check for little-endian architectures that tolerate unaligned memory |
| * accesses is just an optimization. Nothing will break if it fails to detect |
| * a suitable architecture. |
| * |
| * Unfortunately, this optimization may be a C strict aliasing rules violation |
| * if the caller's data buffer has effective type that cannot be aliased by |
| * MD5_u32plus. In practice, this problem may occur if these MD5 routines are |
| * inlined into a calling function, or with future and dangerously advanced |
| * link-time optimizations. For the time being, keeping these MD5 routines in |
| * their own translation unit avoids the problem. |
| */ |
| #if defined(__i386__) || defined(__x86_64__) || defined(__vax__) |
| #define SET(n) \ |
| (*(MD5_u32plus *)&ptr[(n) * 4]) |
| #define GET(n) \ |
| SET(n) |
| #else |
| #define SET(n) \ |
| (ctx->block[(n)] = \ |
| (MD5_u32plus)ptr[(n) * 4] | \ |
| ((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \ |
| ((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \ |
| ((MD5_u32plus)ptr[(n) * 4 + 3] << 24)) |
| #define GET(n) \ |
| (ctx->block[(n)]) |
| #endif |
| |
| /* |
| * This processes one or more 64-byte data blocks, but does NOT update the bit |
| * counters. There are no alignment requirements. |
| */ |
| static const void *body(MD5_CTX *ctx, const void *data, unsigned long size) |
| { |
| const unsigned char *ptr; |
| MD5_u32plus a, b, c, d; |
| MD5_u32plus saved_a, saved_b, saved_c, saved_d; |
| |
| ptr = (const unsigned char *)data; |
| |
| a = ctx->a; |
| b = ctx->b; |
| c = ctx->c; |
| d = ctx->d; |
| |
| do { |
| saved_a = a; |
| saved_b = b; |
| saved_c = c; |
| saved_d = d; |
| |
| /* Round 1 */ |
| STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7) |
| STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12) |
| STEP(F, c, d, a, b, SET(2), 0x242070db, 17) |
| STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22) |
| STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7) |
| STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12) |
| STEP(F, c, d, a, b, SET(6), 0xa8304613, 17) |
| STEP(F, b, c, d, a, SET(7), 0xfd469501, 22) |
| STEP(F, a, b, c, d, SET(8), 0x698098d8, 7) |
| STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12) |
| STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17) |
| STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22) |
| STEP(F, a, b, c, d, SET(12), 0x6b901122, 7) |
| STEP(F, d, a, b, c, SET(13), 0xfd987193, 12) |
| STEP(F, c, d, a, b, SET(14), 0xa679438e, 17) |
| STEP(F, b, c, d, a, SET(15), 0x49b40821, 22) |
| |
| /* Round 2 */ |
| STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5) |
| STEP(G, d, a, b, c, GET(6), 0xc040b340, 9) |
| STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14) |
| STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20) |
| STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5) |
| STEP(G, d, a, b, c, GET(10), 0x02441453, 9) |
| STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14) |
| STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20) |
| STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5) |
| STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9) |
| STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14) |
| STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20) |
| STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5) |
| STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9) |
| STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14) |
| STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20) |
| |
| /* Round 3 */ |
| STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4) |
| STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11) |
| STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16) |
| STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23) |
| STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4) |
| STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11) |
| STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16) |
| STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23) |
| STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4) |
| STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11) |
| STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16) |
| STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23) |
| STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4) |
| STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11) |
| STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16) |
| STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23) |
| |
| /* Round 4 */ |
| STEP(I, a, b, c, d, GET(0), 0xf4292244, 6) |
| STEP(I, d, a, b, c, GET(7), 0x432aff97, 10) |
| STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15) |
| STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21) |
| STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6) |
| STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10) |
| STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15) |
| STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21) |
| STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6) |
| STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10) |
| STEP(I, c, d, a, b, GET(6), 0xa3014314, 15) |
| STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21) |
| STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6) |
| STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10) |
| STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15) |
| STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21) |
| |
| a += saved_a; |
| b += saved_b; |
| c += saved_c; |
| d += saved_d; |
| |
| ptr += 64; |
| } while (size -= 64); |
| |
| ctx->a = a; |
| ctx->b = b; |
| ctx->c = c; |
| ctx->d = d; |
| |
| return ptr; |
| } |
| |
| void MD5_Init(MD5_CTX *ctx) |
| { |
| ctx->a = 0x67452301; |
| ctx->b = 0xefcdab89; |
| ctx->c = 0x98badcfe; |
| ctx->d = 0x10325476; |
| |
| ctx->lo = 0; |
| ctx->hi = 0; |
| } |
| |
| void MD5_Update(MD5_CTX *ctx, const void *data, unsigned long size) |
| { |
| MD5_u32plus saved_lo; |
| unsigned long used, available; |
| |
| saved_lo = ctx->lo; |
| if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo) |
| ctx->hi++; |
| ctx->hi += size >> 29; |
| |
| used = saved_lo & 0x3f; |
| |
| if (used) { |
| available = 64 - used; |
| |
| if (size < available) { |
| memcpy(&ctx->buffer[used], data, size); |
| return; |
| } |
| |
| memcpy(&ctx->buffer[used], data, available); |
| data = (const unsigned char *)data + available; |
| size -= available; |
| body(ctx, ctx->buffer, 64); |
| } |
| |
| if (size >= 64) { |
| data = body(ctx, data, size & ~(unsigned long)0x3f); |
| size &= 0x3f; |
| } |
| |
| memcpy(ctx->buffer, data, size); |
| } |
| |
| #define OUT(dst, src) \ |
| (dst)[0] = (unsigned char)(src); \ |
| (dst)[1] = (unsigned char)((src) >> 8); \ |
| (dst)[2] = (unsigned char)((src) >> 16); \ |
| (dst)[3] = (unsigned char)((src) >> 24); |
| |
| void MD5_Final(unsigned char *result, MD5_CTX *ctx) |
| { |
| unsigned long used, available; |
| |
| used = ctx->lo & 0x3f; |
| |
| ctx->buffer[used++] = 0x80; |
| |
| available = 64 - used; |
| |
| if (available < 8) { |
| memset(&ctx->buffer[used], 0, available); |
| body(ctx, ctx->buffer, 64); |
| used = 0; |
| available = 64; |
| } |
| |
| memset(&ctx->buffer[used], 0, available - 8); |
| |
| ctx->lo <<= 3; |
| OUT(&ctx->buffer[56], ctx->lo) |
| OUT(&ctx->buffer[60], ctx->hi) |
| |
| body(ctx, ctx->buffer, 64); |
| |
| OUT(&result[0], ctx->a) |
| OUT(&result[4], ctx->b) |
| OUT(&result[8], ctx->c) |
| OUT(&result[12], ctx->d) |
| |
| memset(ctx, 0, sizeof(*ctx)); |
| } |
| |
| |
| ////////////////////////////////////////////////////////////////// |
| ////////////////////////////////////////////////////////////////// |
| |
| static MD5_CTX md5ctx; |
| |
| void xxprintf_start(void) |
| { |
| MD5_Init(&md5ctx); |
| } |
| |
| void xxprintf_done(void) |
| { |
| const char hexchar[16] = "0123456789abcdef"; |
| unsigned char result[100]; |
| memset(result, 0, sizeof(result)); |
| MD5_Final(&result[0], &md5ctx); |
| printf("final MD5 = "); |
| int i; |
| for (i = 0; i < 16; i++) { |
| printf("%c%c", hexchar[0xF & (result[i] >> 4)], |
| hexchar[0xF & (result[i] >> 0)]); |
| } |
| printf("\n"); |
| } |
| |
| __attribute__((format(__printf__, 1, 2))) |
| void xxprintf (const char *format, ...) |
| { |
| char buf[128]; |
| memset(buf, 0, sizeof(buf)); |
| |
| va_list vargs; |
| va_start(vargs, format); |
| int n = vsnprintf(buf, sizeof(buf)-1, format, vargs); |
| va_end(vargs); |
| |
| assert(n < sizeof(buf)-1); |
| assert(buf[sizeof(buf)-1] == 0); |
| assert(buf[sizeof(buf)-2] == 0); |
| |
| MD5_Update(&md5ctx, buf, strlen(buf)); |
| if (0) printf("QQQ %s", buf); |
| } |
| |
| ////////////////////////////////////////////////////////////////// |
| ////////////////////////////////////////////////////////////////// |
| |
| |
| /* Setting this to 1 creates a very comprehensive test of |
| integer condition codes. */ |
| #define TEST_INTEGER_VERBOSE 1 |
| |
| typedef long long int int64; |
| |
| //#define LINUX_VM86_IOPL_FIX |
| //#define TEST_P4_FLAGS |
| |
| #define xglue(x, y) x ## y |
| #define glue(x, y) xglue(x, y) |
| #define stringify(s) tostring(s) |
| #define tostring(s) #s |
| |
| #define CC_C 0x0001 |
| #define CC_P 0x0004 |
| #define CC_A 0x0010 |
| #define CC_Z 0x0040 |
| #define CC_S 0x0080 |
| #define CC_O 0x0800 |
| |
| #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A) |
| |
| #define OP add |
| #include "fb_test_amd64.h" |
| |
| #define OP sub |
| #include "fb_test_amd64.h" |
| |
| #define OP xor |
| #include "fb_test_amd64.h" |
| |
| #define OP and |
| #include "fb_test_amd64.h" |
| |
| #define OP or |
| #include "fb_test_amd64.h" |
| |
| #define OP cmp |
| #include "fb_test_amd64.h" |
| |
| #define OP adc |
| #define OP_CC |
| #include "fb_test_amd64.h" |
| |
| #define OP sbb |
| #define OP_CC |
| #include "fb_test_amd64.h" |
| |
| #define OP adcx |
| #define NSH |
| #define OP_CC |
| #include "fb_test_amd64.h" |
| |
| #define OP adox |
| #define NSH |
| #define OP_CC |
| #include "fb_test_amd64.h" |
| |
| #define OP inc |
| #define OP_CC |
| #define OP1 |
| #include "fb_test_amd64.h" |
| |
| #define OP dec |
| #define OP_CC |
| #define OP1 |
| #include "fb_test_amd64.h" |
| |
| #define OP neg |
| #define OP_CC |
| #define OP1 |
| #include "fb_test_amd64.h" |
| |
| #define OP not |
| #define OP_CC |
| #define OP1 |
| #include "fb_test_amd64.h" |
| |
| #undef CC_MASK |
| #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O) |
| |
| #define OP shl |
| #include "fb_test_amd64_shift.h" |
| |
| #define OP shr |
| #include "fb_test_amd64_shift.h" |
| |
| #define OP sar |
| #include "fb_test_amd64_shift.h" |
| |
| #define OP rol |
| #include "fb_test_amd64_shift.h" |
| |
| #define OP ror |
| #include "fb_test_amd64_shift.h" |
| |
| #define OP rcr |
| #define OP_CC |
| #include "fb_test_amd64_shift.h" |
| |
| #define OP rcl |
| #define OP_CC |
| #include "fb_test_amd64_shift.h" |
| |
| /* XXX: should be more precise ? */ |
| #undef CC_MASK |
| #define CC_MASK (CC_C) |
| |
| /* lea test (modrm support) */ |
| #define TEST_LEA(STR)\ |
| {\ |
| asm("leaq " STR ", %0"\ |
| : "=r" (res)\ |
| : "a" (rax), "b" (rbx), "c" (rcx), "d" (rdx), "S" (rsi), "D" (rdi));\ |
| xxprintf("lea %s = %016llx\n", STR, res);\ |
| } |
| |
| #define TEST_LEA16(STR)\ |
| {\ |
| asm(".code16 ; .byte 0x67 ; leal " STR ", %0 ; .code32"\ |
| : "=wq" (res)\ |
| : "a" (eax), "b" (ebx), "c" (ecx), "d" (edx), "S" (esi), "D" (edi));\ |
| xxprintf("lea %s = %08x\n", STR, res);\ |
| } |
| |
| |
| void test_lea(void) |
| { |
| int64 rax, rbx, rcx, rdx, rsi, rdi, res; |
| rax = 0x0001; |
| rbx = 0x0002; |
| rcx = 0x0004; |
| rdx = 0x0008; |
| rsi = 0x0010; |
| rdi = 0x0020; |
| |
| TEST_LEA("0x4000"); |
| |
| TEST_LEA("(%%rax)"); |
| TEST_LEA("(%%rbx)"); |
| TEST_LEA("(%%rcx)"); |
| TEST_LEA("(%%rdx)"); |
| TEST_LEA("(%%rsi)"); |
| TEST_LEA("(%%rdi)"); |
| |
| TEST_LEA("0x40(%%rax)"); |
| TEST_LEA("0x40(%%rbx)"); |
| TEST_LEA("0x40(%%rcx)"); |
| TEST_LEA("0x40(%%rdx)"); |
| TEST_LEA("0x40(%%rsi)"); |
| TEST_LEA("0x40(%%rdi)"); |
| |
| TEST_LEA("0x4000(%%rax)"); |
| TEST_LEA("0x4000(%%rbx)"); |
| TEST_LEA("0x4000(%%rcx)"); |
| TEST_LEA("0x4000(%%rdx)"); |
| TEST_LEA("0x4000(%%rsi)"); |
| TEST_LEA("0x4000(%%rdi)"); |
| |
| TEST_LEA("(%%rax, %%rcx)"); |
| TEST_LEA("(%%rbx, %%rdx)"); |
| TEST_LEA("(%%rcx, %%rcx)"); |
| TEST_LEA("(%%rdx, %%rcx)"); |
| TEST_LEA("(%%rsi, %%rcx)"); |
| TEST_LEA("(%%rdi, %%rcx)"); |
| |
| TEST_LEA("0x40(%%rax, %%rcx)"); |
| TEST_LEA("0x4000(%%rbx, %%rdx)"); |
| |
| TEST_LEA("(%%rcx, %%rcx, 2)"); |
| TEST_LEA("(%%rdx, %%rcx, 4)"); |
| TEST_LEA("(%%rsi, %%rcx, 8)"); |
| |
| TEST_LEA("(,%%rax, 2)"); |
| TEST_LEA("(,%%rbx, 4)"); |
| TEST_LEA("(,%%rcx, 8)"); |
| |
| TEST_LEA("0x40(,%%rax, 2)"); |
| TEST_LEA("0x40(,%%rbx, 4)"); |
| TEST_LEA("0x40(,%%rcx, 8)"); |
| |
| |
| TEST_LEA("-10(%%rcx, %%rcx, 2)"); |
| TEST_LEA("-10(%%rdx, %%rcx, 4)"); |
| TEST_LEA("-10(%%rsi, %%rcx, 8)"); |
| |
| TEST_LEA("0x4000(%%rcx, %%rcx, 2)"); |
| TEST_LEA("0x4000(%%rdx, %%rcx, 4)"); |
| TEST_LEA("0x4000(%%rsi, %%rcx, 8)"); |
| } |
| |
| #define TEST_JCC(JCC, v1, v2)\ |
| { int one = 1; \ |
| int res;\ |
| asm("movl $1, %0\n\t"\ |
| "cmpl %2, %1\n\t"\ |
| "j" JCC " 1f\n\t"\ |
| "movl $0, %0\n\t"\ |
| "1:\n\t"\ |
| : "=r" (res)\ |
| : "r" (v1), "r" (v2));\ |
| xxprintf("%-10s %d\n", "j" JCC, res);\ |
| \ |
| asm("movl $0, %0\n\t"\ |
| "cmpl %2, %1\n\t"\ |
| "set" JCC " %b0\n\t"\ |
| : "=r" (res)\ |
| : "r" (v1), "r" (v2));\ |
| xxprintf("%-10s %d\n", "set" JCC, res);\ |
| {\ |
| asm("movl $0x12345678, %0\n\t"\ |
| "cmpl %2, %1\n\t"\ |
| "cmov" JCC "l %3, %0\n\t"\ |
| : "=r" (res)\ |
| : "r" (v1), "r" (v2), "m" (one));\ |
| xxprintf("%-10s R=0x%08x\n", "cmov" JCC "l", res);\ |
| asm("movl $0x12345678, %0\n\t"\ |
| "cmpl %2, %1\n\t"\ |
| "cmov" JCC "w %w3, %w0\n\t"\ |
| : "=r" (res)\ |
| : "r" (v1), "r" (v2), "r" (one));\ |
| xxprintf("%-10s R=0x%08x\n", "cmov" JCC "w", res);\ |
| } \ |
| } |
| |
| /* various jump tests */ |
| void test_jcc(void) |
| { |
| TEST_JCC("ne", 1, 1); |
| TEST_JCC("ne", 1, 0); |
| |
| TEST_JCC("e", 1, 1); |
| TEST_JCC("e", 1, 0); |
| |
| TEST_JCC("l", 1, 1); |
| TEST_JCC("l", 1, 0); |
| TEST_JCC("l", 1, -1); |
| |
| TEST_JCC("le", 1, 1); |
| TEST_JCC("le", 1, 0); |
| TEST_JCC("le", 1, -1); |
| |
| TEST_JCC("ge", 1, 1); |
| TEST_JCC("ge", 1, 0); |
| TEST_JCC("ge", -1, 1); |
| |
| TEST_JCC("g", 1, 1); |
| TEST_JCC("g", 1, 0); |
| TEST_JCC("g", 1, -1); |
| |
| TEST_JCC("b", 1, 1); |
| TEST_JCC("b", 1, 0); |
| TEST_JCC("b", 1, -1); |
| |
| TEST_JCC("be", 1, 1); |
| TEST_JCC("be", 1, 0); |
| TEST_JCC("be", 1, -1); |
| |
| TEST_JCC("ae", 1, 1); |
| TEST_JCC("ae", 1, 0); |
| TEST_JCC("ae", 1, -1); |
| |
| TEST_JCC("a", 1, 1); |
| TEST_JCC("a", 1, 0); |
| TEST_JCC("a", 1, -1); |
| |
| |
| TEST_JCC("p", 1, 1); |
| TEST_JCC("p", 1, 0); |
| |
| TEST_JCC("np", 1, 1); |
| TEST_JCC("np", 1, 0); |
| |
| TEST_JCC("o", 0x7fffffff, 0); |
| TEST_JCC("o", 0x7fffffff, -1); |
| |
| TEST_JCC("no", 0x7fffffff, 0); |
| TEST_JCC("no", 0x7fffffff, -1); |
| |
| TEST_JCC("s", 0, 1); |
| TEST_JCC("s", 0, -1); |
| TEST_JCC("s", 0, 0); |
| |
| TEST_JCC("ns", 0, 1); |
| TEST_JCC("ns", 0, -1); |
| TEST_JCC("ns", 0, 0); |
| } |
| |
| #undef CC_MASK |
| #ifdef TEST_P4_FLAGS |
| #define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A) |
| #else |
| #define CC_MASK (CC_O | CC_C) |
| #endif |
| |
| #define OP mul |
| #include "fb_test_amd64_muldiv.h" |
| |
| #define OP imul |
| #include "fb_test_amd64_muldiv.h" |
| |
| void test_imulw2(int64 op0, int64 op1) |
| { |
| int64 res, s1, s0, flags; |
| s0 = op0; |
| s1 = op1; |
| res = s0; |
| flags = 0; |
| asm ("pushq %4\n\t" |
| "popfq\n\t" |
| "imulw %w2, %w0\n\t" |
| "pushfq\n\t" |
| "popq %1\n\t" |
| : "=q" (res), "=g" (flags) |
| : "q" (s1), "0" (res), "1" (flags)); |
| xxprintf("%-10s A=%016llx B=%016llx R=%016llx CC=%04llx\n", |
| "imulw", s0, s1, res, flags & CC_MASK); |
| } |
| |
| void test_imull2(int64 op0, int64 op1) |
| { |
| int res, s1; |
| int64 s0, flags; |
| s0 = op0; |
| s1 = op1; |
| res = s0; |
| flags = 0; |
| asm ("pushq %4\n\t" |
| "popfq\n\t" |
| "imull %2, %0\n\t" |
| "pushfq\n\t" |
| "popq %1\n\t" |
| : "=q" (res), "=g" (flags) |
| : "q" (s1), "0" (res), "1" (flags)); |
| xxprintf("%-10s A=%016llx B=%08x R=%08x CC=%04llx\n", |
| "imull", s0, s1, res, flags & CC_MASK); |
| } |
| |
| #define TEST_IMUL_IM(size, size1, op0, op1)\ |
| {\ |
| int64 res, flags;\ |
| flags = 0;\ |
| res = 0;\ |
| asm ("pushq %3\n\t"\ |
| "popfq\n\t"\ |
| "imul" size " $" #op0 ", %" size1 "2, %" size1 "0\n\t" \ |
| "pushfq\n\t"\ |
| "popq %1\n\t"\ |
| : "=r" (res), "=g" (flags)\ |
| : "r" (op1), "1" (flags), "0" (res));\ |
| xxprintf("%-10s A=%08x B=%08x R=%016llx CC=%04llx\n",\ |
| "imul" size, op0, op1, res, flags & CC_MASK);\ |
| } |
| |
| #define TEST_IMUL_IM_L(op0, op1)\ |
| {\ |
| int64 flags = 0;\ |
| int res = 0;\ |
| int res64 = 0;\ |
| asm ("pushq %3\n\t"\ |
| "popfq\n\t"\ |
| "imul $" #op0 ", %2, %0\n\t" \ |
| "pushfq\n\t"\ |
| "popq %1\n\t"\ |
| : "=r" (res64), "=g" (flags)\ |
| : "r" (op1), "1" (flags), "0" (res));\ |
| xxprintf("%-10s A=%08x B=%08x R=%08x CC=%04llx\n",\ |
| "imull", op0, op1, res, flags & CC_MASK);\ |
| } |
| |
| |
| #undef CC_MASK |
| #define CC_MASK (0) |
| |
| #define OP div |
| #include "fb_test_amd64_muldiv.h" |
| |
| #define OP idiv |
| #include "fb_test_amd64_muldiv.h" |
| |
| void test_mul(void) |
| { |
| test_imulb(0x1234561d, 4); |
| test_imulb(3, -4); |
| test_imulb(0x80, 0x80); |
| test_imulb(0x10, 0x10); |
| |
| test_imulw(0, 0, 0); |
| test_imulw(0, 0xFF, 0xFF); |
| test_imulw(0, 0xFF, 0x100); |
| test_imulw(0, 0x1234001d, 45); |
| test_imulw(0, 23, -45); |
| test_imulw(0, 0x8000, 0x8000); |
| test_imulw(0, 0x100, 0x100); |
| |
| test_imull(0, 0, 0); |
| test_imull(0, 0xFFFF, 0xFFFF); |
| test_imull(0, 0xFFFF, 0x10000); |
| test_imull(0, 0x1234001d, 45); |
| test_imull(0, 23, -45); |
| test_imull(0, 0x80000000, 0x80000000); |
| test_imull(0, 0x10000, 0x10000); |
| |
| test_mulb(0x1234561d, 4); |
| test_mulb(3, -4); |
| test_mulb(0x80, 0x80); |
| test_mulb(0x10, 0x10); |
| |
| test_mulw(0, 0x1234001d, 45); |
| test_mulw(0, 23, -45); |
| test_mulw(0, 0x8000, 0x8000); |
| test_mulw(0, 0x100, 0x100); |
| |
| test_mull(0, 0x1234001d, 45); |
| test_mull(0, 23, -45); |
| test_mull(0, 0x80000000, 0x80000000); |
| test_mull(0, 0x10000, 0x10000); |
| |
| test_imulw2(0x1234001d, 45); |
| test_imulw2(23, -45); |
| test_imulw2(0x8000, 0x8000); |
| test_imulw2(0x100, 0x100); |
| |
| test_imull2(0x1234001d, 45); |
| test_imull2(23, -45); |
| test_imull2(0x80000000, 0x80000000); |
| test_imull2(0x10000, 0x10000); |
| |
| TEST_IMUL_IM("w", "w", 45, 0x1234); |
| TEST_IMUL_IM("w", "w", -45, 23); |
| TEST_IMUL_IM("w", "w", 0x8000, 0x80000000); |
| TEST_IMUL_IM("w", "w", 0x7fff, 0x1000); |
| |
| TEST_IMUL_IM_L(45, 0x1234); |
| TEST_IMUL_IM_L(-45, 23); |
| TEST_IMUL_IM_L(0x8000, 0x80000000); |
| TEST_IMUL_IM_L(0x7fff, 0x1000); |
| |
| test_idivb(0x12341678, 0x127e); |
| test_idivb(0x43210123, -5); |
| test_idivb(0x12340004, -1); |
| |
| test_idivw(0, 0x12345678, 12347); |
| test_idivw(0, -23223, -45); |
| test_idivw(0, 0x12348000, -1); |
| test_idivw(0x12343, 0x12345678, 0x81238567); |
| |
| test_idivl(0, 0x12345678, 12347); |
| test_idivl(0, -233223, -45); |
| test_idivl(0, 0x80000000, -1); |
| test_idivl(0x12343, 0x12345678, 0x81234567); |
| |
| test_idivq(0, 0x12345678, 12347); |
| test_idivq(0, -233223, -45); |
| test_idivq(0, 0x80000000, -1); |
| test_idivq(0x12343, 0x12345678, 0x81234567); |
| |
| test_divb(0x12341678, 0x127e); |
| test_divb(0x43210123, -5); |
| test_divb(0x12340004, -1); |
| |
| test_divw(0, 0x12345678, 12347); |
| test_divw(0, -23223, -45); |
| test_divw(0, 0x12348000, -1); |
| test_divw(0x12343, 0x12345678, 0x81238567); |
| |
| test_divl(0, 0x12345678, 12347); |
| test_divl(0, -233223, -45); |
| test_divl(0, 0x80000000, -1); |
| test_divl(0x12343, 0x12345678, 0x81234567); |
| |
| test_divq(0, 0x12345678, 12347); |
| test_divq(0, -233223, -45); |
| test_divq(0, 0x80000000, -1); |
| test_divq(0x12343, 0x12345678, 0x81234567); |
| } |
| |
| #define TEST_BSX(op, size, op0)\ |
| {\ |
| int res, val, resz;\ |
| val = op0;\ |
| asm("xorl %1, %1\n"\ |
| "movl $0x12345678, %0\n"\ |
| #op " %" size "2, %" size "0 ; setz %b1" \ |
| : "=r" (res), "=q" (resz)\ |
| : "r" (val));\ |
| xxprintf("%-10s A=%08x R=%08x %d\n", #op, val, res, resz);\ |
| } |
| |
| void test_bsx(void) |
| { |
| TEST_BSX(bsrw, "w", 0); |
| TEST_BSX(bsrw, "w", 0x12340128); |
| TEST_BSX(bsrl, "", 0); |
| TEST_BSX(bsrl, "", 0x00340128); |
| TEST_BSX(bsfw, "w", 0); |
| TEST_BSX(bsfw, "w", 0x12340128); |
| TEST_BSX(bsfl, "", 0); |
| TEST_BSX(bsfl, "", 0x00340128); |
| } |
| |
| /**********************************************/ |
| |
| void test_fops(double a, double b) |
| { |
| xxprintf("a=%f b=%f a+b=%f\n", a, b, a + b); |
| xxprintf("a=%f b=%f a-b=%f\n", a, b, a - b); |
| xxprintf("a=%f b=%f a*b=%f\n", a, b, a * b); |
| xxprintf("a=%f b=%f a/b=%f\n", a, b, a / b); |
| xxprintf("a=%f b=%f fmod(a, b)=%f\n", a, b, fmod(a, b)); |
| xxprintf("a=%f sqrt(a)=%f\n", a, sqrt(a)); |
| xxprintf("a=%f sin(a)=%f\n", a, sin(a)); |
| xxprintf("a=%f cos(a)=%f\n", a, cos(a)); |
| xxprintf("a=%f tan(a)=%f\n", a, tan(a)); |
| xxprintf("a=%f log(a)=%f\n", a, log(a)); |
| xxprintf("a=%f exp(a)=%f\n", a, exp(a)); |
| xxprintf("a=%f b=%f atan2(a, b)=%f\n", a, b, atan2(a, b)); |
| /* just to test some op combining */ |
| xxprintf("a=%f asin(sin(a))=%f\n", a, asin(sin(a))); |
| xxprintf("a=%f acos(cos(a))=%f\n", a, acos(cos(a))); |
| xxprintf("a=%f atan(tan(a))=%f\n", a, atan(tan(a))); |
| } |
| |
| void test_fcmp(double a, double b) |
| { |
| xxprintf("(%f<%f)=%d\n", |
| a, b, a < b); |
| xxprintf("(%f<=%f)=%d\n", |
| a, b, a <= b); |
| xxprintf("(%f==%f)=%d\n", |
| a, b, a == b); |
| xxprintf("(%f>%f)=%d\n", |
| a, b, a > b); |
| xxprintf("(%f<=%f)=%d\n", |
| a, b, a >= b); |
| { |
| unsigned long long int rflags; |
| /* test f(u)comi instruction */ |
| asm("fcomi %2, %1\n" |
| "pushfq\n" |
| "popq %0\n" |
| : "=r" (rflags) |
| : "t" (a), "u" (b)); |
| xxprintf("fcomi(%f %f)=%016llx\n", a, b, rflags & (CC_Z | CC_P | CC_C)); |
| } |
| } |
| |
| void test_fcvt(double a) |
| { |
| float fa; |
| long double la; |
| int16_t fpuc; |
| int i; |
| int64 lla; |
| int ia; |
| int16_t wa; |
| double ra; |
| |
| fa = a; |
| la = a; |
| xxprintf("(float)%f = %f\n", a, fa); |
| xxprintf("(long double)%f = %Lf\n", a, la); |
| xxprintf("a=%016llx\n", *(unsigned long long int *) &a); |
| xxprintf("la=%016llx %04x\n", *(unsigned long long int *) &la, |
| *(unsigned short *) ((char *)(&la) + 8)); |
| |
| /* test all roundings */ |
| asm volatile ("fstcw %0" : "=m" (fpuc)); |
| for(i=0;i<4;i++) { |
| short zz = (fpuc & ~0x0c00) | (i << 10); |
| asm volatile ("fldcw %0" : : "m" (zz)); |
| asm volatile ("fists %0" : "=m" (wa) : "t" (a)); |
| asm volatile ("fistl %0" : "=m" (ia) : "t" (a)); |
| asm volatile ("fistpll %0" : "=m" (lla) : "t" (a) : "st"); |
| asm volatile ("frndint ; fstl %0" : "=m" (ra) : "t" (a)); |
| asm volatile ("fldcw %0" : : "m" (fpuc)); |
| xxprintf("(short)a = %d\n", wa); |
| xxprintf("(int)a = %d\n", ia); |
| xxprintf("(int64_t)a = %lld\n", lla); |
| xxprintf("rint(a) = %f\n", ra); |
| } |
| } |
| |
| #define TEST(N) \ |
| asm("fld" #N : "=t" (a)); \ |
| xxprintf("fld" #N "= %f\n", a); |
| |
| void test_fconst(void) |
| { |
| double a; |
| TEST(1); |
| TEST(l2t); |
| TEST(l2e); |
| TEST(pi); |
| TEST(lg2); |
| TEST(ln2); |
| TEST(z); |
| } |
| |
| void test_fbcd(double a) |
| { |
| unsigned short bcd[5]; |
| double b; |
| |
| asm("fbstp %0" : "=m" (bcd[0]) : "t" (a) : "st"); |
| asm("fbld %1" : "=t" (b) : "m" (bcd[0])); |
| xxprintf("a=%f bcd=%04x%04x%04x%04x%04x b=%f\n", |
| a, bcd[4], bcd[3], bcd[2], bcd[1], bcd[0], b); |
| } |
| |
| #define TEST_ENV(env, save, restore)\ |
| {\ |
| memset((env), 0xaa, sizeof(*(env)));\ |
| for(i=0;i<5;i++)\ |
| asm volatile ("fldl %0" : : "m" (dtab[i]));\ |
| asm(save " %0\n" : : "m" (*(env)));\ |
| asm(restore " %0\n": : "m" (*(env)));\ |
| for(i=0;i<5;i++)\ |
| asm volatile ("fstpl %0" : "=m" (rtab[i]));\ |
| for(i=0;i<5;i++)\ |
| xxprintf("res[%d]=%f\n", i, rtab[i]);\ |
| xxprintf("fpuc=%04x fpus=%04x fptag=%04x\n",\ |
| (env)->fpuc,\ |
| (env)->fpus & 0xff00,\ |
| (env)->fptag);\ |
| } |
| |
| void test_fenv(void) |
| { |
| struct __attribute__((packed)) { |
| uint16_t fpuc; |
| uint16_t dummy1; |
| uint16_t fpus; |
| uint16_t dummy2; |
| uint16_t fptag; |
| uint16_t dummy3; |
| uint32_t ignored[4]; |
| long double fpregs[8]; |
| } float_env32; |
| double dtab[8]; |
| double rtab[8]; |
| int i; |
| |
| for(i=0;i<8;i++) |
| dtab[i] = i + 1; |
| |
| TEST_ENV(&float_env32, "fnstenv", "fldenv"); |
| TEST_ENV(&float_env32, "fnsave", "frstor"); |
| |
| /* test for ffree */ |
| for(i=0;i<5;i++) |
| asm volatile ("fldl %0" : : "m" (dtab[i])); |
| asm volatile("ffree %st(2)"); |
| asm volatile ("fnstenv %0\n" : : "m" (float_env32)); |
| asm volatile ("fninit"); |
| xxprintf("fptag=%04x\n", float_env32.fptag); |
| } |
| |
| |
| #define TEST_FCMOV(a, b, rflags, CC)\ |
| {\ |
| double res;\ |
| asm("pushq %3\n"\ |
| "popfq\n"\ |
| "fcmov" CC " %2, %0\n"\ |
| : "=t" (res)\ |
| : "0" (a), "u" (b), "g" (rflags));\ |
| xxprintf("fcmov%s rflags=0x%04llx-> %f\n", \ |
| CC, rflags, res);\ |
| } |
| |
| void test_fcmov(void) |
| { |
| double a, b; |
| int64 rflags, i; |
| |
| a = 1.0; |
| b = 2.0; |
| for(i = 0; i < 4; i++) { |
| rflags = 0; |
| if (i & 1) |
| rflags |= CC_C; |
| if (i & 2) |
| rflags |= CC_Z; |
| TEST_FCMOV(a, b, rflags, "b"); |
| TEST_FCMOV(a, b, rflags, "e"); |
| TEST_FCMOV(a, b, rflags, "be"); |
| TEST_FCMOV(a, b, rflags, "nb"); |
| TEST_FCMOV(a, b, rflags, "ne"); |
| TEST_FCMOV(a, b, rflags, "nbe"); |
| } |
| TEST_FCMOV(a, b, (int64)0, "u"); |
| TEST_FCMOV(a, b, (int64)CC_P, "u"); |
| TEST_FCMOV(a, b, (int64)0, "nu"); |
| TEST_FCMOV(a, b, (int64)CC_P, "nu"); |
| } |
| |
| void test_floats(void) |
| { |
| test_fops(2, 3); |
| test_fops(1.4, -5); |
| test_fcmp(2, -1); |
| test_fcmp(2, 2); |
| test_fcmp(2, 3); |
| test_fcvt(0.5); |
| test_fcvt(-0.5); |
| test_fcvt(1.0/7.0); |
| test_fcvt(-1.0/9.0); |
| test_fcvt(32768); |
| test_fcvt(-1e20); |
| test_fconst(); |
| // REINSTATE (maybe): test_fbcd(1234567890123456); |
| // REINSTATE (maybe): test_fbcd(-123451234567890); |
| // REINSTATE: test_fenv(); |
| // REINSTATE: test_fcmov(); |
| } |
| |
| /**********************************************/ |
| |
| #define TEST_XCHG(op, size, opconst)\ |
| {\ |
| int op0, op1;\ |
| op0 = 0x12345678;\ |
| op1 = 0xfbca7654;\ |
| asm(#op " %" size "0, %" size "1" \ |
| : "=q" (op0), opconst (op1) \ |
| : "0" (op0), "1" (op1));\ |
| xxprintf("%-10s A=%08x B=%08x\n",\ |
| #op, op0, op1);\ |
| } |
| |
| #define TEST_CMPXCHG(op, size, opconst, eax)\ |
| {\ |
| int op0, op1;\ |
| op0 = 0x12345678;\ |
| op1 = 0xfbca7654;\ |
| asm(#op " %" size "0, %" size "1" \ |
| : "=q" (op0), opconst (op1) \ |
| : "0" (op0), "1" (op1), "a" (eax));\ |
| xxprintf("%-10s EAX=%08x A=%08x C=%08x\n",\ |
| #op, eax, op0, op1);\ |
| } |
| |
| |
| /**********************************************/ |
| /* segmentation tests */ |
| |
| extern char func_lret32; |
| extern char func_iret32; |
| |
| uint8_t str_buffer[4096]; |
| |
| #define TEST_STRING1(OP, size, DF, REP)\ |
| {\ |
| int64 rsi, rdi, rax, rcx, rflags;\ |
| \ |
| rsi = (long)(str_buffer + sizeof(str_buffer) / 2);\ |
| rdi = (long)(str_buffer + sizeof(str_buffer) / 2) + 16;\ |
| rax = 0x12345678;\ |
| rcx = 17;\ |
| \ |
| asm volatile ("pushq $0\n\t"\ |
| "popfq\n\t"\ |
| DF "\n\t"\ |
| REP #OP size "\n\t"\ |
| "cld\n\t"\ |
| "pushfq\n\t"\ |
| "popq %4\n\t"\ |
| : "=S" (rsi), "=D" (rdi), "=a" (rax), "=c" (rcx), "=g" (rflags)\ |
| : "0" (rsi), "1" (rdi), "2" (rax), "3" (rcx));\ |
| xxprintf("%-10s ESI=%016llx EDI=%016llx EAX=%016llx ECX=%016llx EFL=%04llx\n",\ |
| REP #OP size, rsi, rdi, rax, rcx,\ |
| rflags & (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A));\ |
| } |
| |
| #define TEST_STRING(OP, REP)\ |
| TEST_STRING1(OP, "b", "", REP);\ |
| TEST_STRING1(OP, "w", "", REP);\ |
| TEST_STRING1(OP, "l", "", REP);\ |
| TEST_STRING1(OP, "b", "std", REP);\ |
| TEST_STRING1(OP, "w", "std", REP);\ |
| TEST_STRING1(OP, "l", "std", REP) |
| |
| void test_string(void) |
| { |
| int64 i; |
| for(i = 0;i < sizeof(str_buffer); i++) |
| str_buffer[i] = i + 0x56; |
| TEST_STRING(stos, ""); |
| TEST_STRING(stos, "rep "); |
| TEST_STRING(lods, ""); /* to verify stos */ |
| // TEST_STRING(lods, "rep "); |
| TEST_STRING(movs, ""); |
| TEST_STRING(movs, "rep "); |
| TEST_STRING(lods, ""); /* to verify stos */ |
| |
| /* XXX: better tests */ |
| TEST_STRING(scas, ""); |
| TEST_STRING(scas, "repz "); |
| TEST_STRING(scas, "repnz "); |
| // REINSTATE? TEST_STRING(cmps, ""); |
| TEST_STRING(cmps, "repz "); |
| // REINSTATE? TEST_STRING(cmps, "repnz "); |
| } |
| |
| int main(int argc, char **argv) |
| { |
| // The three commented out test cases produce different results at different |
| // compiler optimisation levels. This suggests to me that their inline |
| // assembly is incorrect. I don't have time to investigate now, though. So |
| // they are disabled. |
| xxprintf_start(); |
| test_adc(); |
| test_adcx(); |
| test_add(); |
| test_adox(); |
| test_and(); |
| // test_bsx(); |
| test_cmp(); |
| test_dec(); |
| test_fcmov(); |
| test_fconst(); |
| test_fenv(); |
| test_floats(); |
| test_inc(); |
| // test_jcc(); |
| test_lea(); |
| test_mul(); |
| test_neg(); |
| test_not(); |
| test_or(); |
| test_rcl(); |
| test_rcr(); |
| test_rol(); |
| test_ror(); |
| test_sar(); |
| test_sbb(); |
| test_shl(); |
| test_shr(); |
| // test_string(); |
| test_sub(); |
| test_xor(); |
| xxprintf_done(); |
| // the expected MD5SUM is 66802c845574c7c69f30d29ef85f7ca3 |
| return 0; |
| } |