| |
| /* |
| gcc -o v8crypto v8crypto.c -march=armv8-a -mfpu=crypto-neon-fp-armv8 |
| gcc -o v8crypto v8crypto.c -mfpu=crypto-neon-fp-armv8 |
| */ |
| |
| #include <stdio.h> |
| #include <assert.h> |
| #include <malloc.h> // memalign |
| #include <string.h> // memset |
| #include "tests/malloc.h" |
| #include <math.h> // isnormal |
| |
| typedef unsigned char UChar; |
| typedef unsigned short int UShort; |
| typedef unsigned int UInt; |
| typedef signed int Int; |
| typedef unsigned char UChar; |
| typedef unsigned long long int ULong; |
| typedef signed long long int Long; |
| typedef double Double; |
| typedef float Float; |
| |
| typedef unsigned char Bool; |
| #define False ((Bool)0) |
| #define True ((Bool)1) |
| |
| |
| #define ITERS 1 |
| |
| typedef |
| enum { TyHF=1234, TySF, TyDF, TyB, TyH, TyS, TyD, TyNONE } |
| LaneTy; |
| |
| union _V128 { |
| UChar u8[16]; |
| UShort u16[8]; |
| UInt u32[4]; |
| ULong u64[2]; |
| Float f32[4]; |
| Double f64[2]; |
| }; |
| typedef union _V128 V128; |
| |
| static inline UChar randUChar ( void ) |
| { |
| static UInt seed = 80021; |
| seed = 1103515245 * seed + 12345; |
| return (seed >> 17) & 0xFF; |
| } |
| |
| //static ULong randULong ( LaneTy ty ) |
| //{ |
| // Int i; |
| // ULong r = 0; |
| // for (i = 0; i < 8; i++) { |
| // r = (r << 8) | (ULong)(0xFF & randUChar()); |
| // } |
| // return r; |
| //} |
| |
| /* Generates a random V128. Ensures that that it contains normalised |
| FP numbers when viewed as either F32x4 or F64x2, so that it is |
| reasonable to use in FP test cases. */ |
| static void randV128 ( /*OUT*/V128* v, LaneTy ty ) |
| { |
| static UInt nCalls = 0, nIters = 0; |
| Int i; |
| nCalls++; |
| while (1) { |
| nIters++; |
| for (i = 0; i < 16; i++) { |
| v->u8[i] = randUChar(); |
| } |
| if (isnormal(v->f32[0]) && isnormal(v->f32[1]) && isnormal(v->f32[2]) |
| && isnormal(v->f32[3]) && isnormal(v->f64[0]) && isnormal(v->f64[1])) |
| break; |
| } |
| if (0 == (nCalls & 0xFF)) |
| printf("randV128: %u calls, %u iters\n", nCalls, nIters); |
| } |
| |
| static void showV128 ( V128* v ) |
| { |
| Int i; |
| for (i = 15; i >= 0; i--) |
| printf("%02x", (Int)v->u8[i]); |
| } |
| |
| //static void showBlock ( const char* msg, V128* block, Int nBlock ) |
| //{ |
| // Int i; |
| // printf("%s\n", msg); |
| // for (i = 0; i < nBlock; i++) { |
| // printf(" "); |
| // showV128(&block[i]); |
| // printf("\n"); |
| // } |
| //} |
| |
| |
| /* ---------------------------------------------------------------- */ |
| /* -- Parameterisable test macros -- */ |
| /* ---------------------------------------------------------------- */ |
| |
| #define DO50(_action) \ |
| do { \ |
| Int _qq; for (_qq = 0; _qq < 50; _qq++) { _action ; } \ |
| } while (0) |
| |
| |
| /* Generate a test that involves two vector regs, |
| with no bias as towards which is input or output. |
| It's OK to use r8 as scratch.*/ |
| #define GEN_TWOVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO) \ |
| __attribute__((noinline)) \ |
| static void test_##TESTNAME ( LaneTy ty ) { \ |
| Int i; \ |
| for (i = 0; i < ITERS; i++) { \ |
| V128 block[4+1]; \ |
| memset(block, 0x55, sizeof(block)); \ |
| randV128(&block[0], ty); \ |
| randV128(&block[1], ty); \ |
| randV128(&block[2], ty); \ |
| randV128(&block[3], ty); \ |
| __asm__ __volatile__( \ |
| "mov r9, #0 ; vmsr fpscr, r9 ; " \ |
| "add r9, %0, #0 ; vld1.8 { q"#VECREG1NO" }, [r9] ; " \ |
| "add r9, %0, #16 ; vld1.8 { q"#VECREG2NO" }, [r9] ; " \ |
| INSN " ; " \ |
| "add r9, %0, #32 ; vst1.8 { q"#VECREG1NO" }, [r9] ; " \ |
| "add r9, %0, #48 ; vst1.8 { q"#VECREG2NO" }, [r9] ; " \ |
| "vmrs r9, fpscr ; str r9, [%0, #64] " \ |
| : : "r"(&block[0]) \ |
| : "cc", "memory", "q"#VECREG1NO, "q"#VECREG2NO, "r8", "r9" \ |
| ); \ |
| printf(INSN " "); \ |
| UInt fpscr = 0xFFFFFFFF & block[4].u32[0]; \ |
| showV128(&block[0]); printf(" "); \ |
| showV128(&block[1]); printf(" "); \ |
| showV128(&block[2]); printf(" "); \ |
| showV128(&block[3]); printf(" fpscr=%08x\n", fpscr); \ |
| } \ |
| } |
| |
| |
| /* Generate a test that involves three vector regs, |
| with no bias as towards which is input or output. It's also OK |
| to use r8 scratch. */ |
| #define GEN_THREEVEC_TEST(TESTNAME,INSN,VECREG1NO,VECREG2NO,VECREG3NO) \ |
| __attribute__((noinline)) \ |
| static void test_##TESTNAME ( LaneTy ty ) { \ |
| Int i; \ |
| for (i = 0; i < ITERS; i++) { \ |
| V128 block[6+1]; \ |
| memset(block, 0x55, sizeof(block)); \ |
| randV128(&block[0], ty); \ |
| randV128(&block[1], ty); \ |
| randV128(&block[2], ty); \ |
| randV128(&block[3], ty); \ |
| randV128(&block[4], ty); \ |
| randV128(&block[5], ty); \ |
| __asm__ __volatile__( \ |
| "mov r9, #0 ; vmsr fpscr, r9 ; " \ |
| "add r9, %0, #0 ; vld1.8 { q"#VECREG1NO" }, [r9] ; " \ |
| "add r9, %0, #16 ; vld1.8 { q"#VECREG2NO" }, [r9] ; " \ |
| "add r9, %0, #32 ; vld1.8 { q"#VECREG3NO" }, [r9] ; " \ |
| INSN " ; " \ |
| "add r9, %0, #48 ; vst1.8 { q"#VECREG1NO" }, [r9] ; " \ |
| "add r9, %0, #64 ; vst1.8 { q"#VECREG2NO" }, [r9] ; " \ |
| "add r9, %0, #80 ; vst1.8 { q"#VECREG3NO" }, [r9] ; " \ |
| "vmrs r9, fpscr ; str r9, [%0, #96] " \ |
| : : "r"(&block[0]) \ |
| : "cc", "memory", "q"#VECREG1NO, "q"#VECREG2NO, "q"#VECREG3NO, \ |
| "r8", "r9" \ |
| ); \ |
| printf(INSN " "); \ |
| UInt fpscr = 0xFFFFFFFF & block[6].u32[0]; \ |
| showV128(&block[0]); printf(" "); \ |
| showV128(&block[1]); printf(" "); \ |
| showV128(&block[2]); printf(" "); \ |
| showV128(&block[3]); printf(" "); \ |
| showV128(&block[4]); printf(" "); \ |
| showV128(&block[5]); printf(" fpscr=%08x\n", fpscr); \ |
| } \ |
| } |
| |
| // ======================== CRYPTO ======================== |
| |
| GEN_TWOVEC_TEST(aesd_q_q, "aesd.8 q3, q4", 3, 4) |
| GEN_TWOVEC_TEST(aese_q_q, "aese.8 q12, q13", 12, 13) |
| GEN_TWOVEC_TEST(aesimc_q_q, "aesimc.8 q15, q0", 15, 0) |
| GEN_TWOVEC_TEST(aesmc_q_q, "aesmc.8 q1, q9", 1, 9) |
| |
| GEN_THREEVEC_TEST(sha1c_q_q_q, "sha1c.32 q11, q10, q2", 11, 10, 2) |
| GEN_TWOVEC_TEST(sha1h_q_q, "sha1h.32 q6, q7", 6, 7) |
| GEN_THREEVEC_TEST(sha1m_q_q_q, "sha1m.32 q2, q8, q13", 2, 8, 13) |
| GEN_THREEVEC_TEST(sha1p_q_q_q, "sha1p.32 q3, q9, q14", 3, 9, 14) |
| GEN_THREEVEC_TEST(sha1su0_q_q_q, "sha1su0.32 q4, q10, q15", 4, 10, 15) |
| GEN_TWOVEC_TEST(sha1su1_q_q, "sha1su1.32 q11, q2", 11, 2) |
| |
| GEN_THREEVEC_TEST(sha256h2_q_q_q, "sha256h2.32 q9, q8, q7", 9, 8, 7) |
| GEN_THREEVEC_TEST(sha256h_q_q_q, "sha256h.32 q10, q9, q8", 10, 9, 8) |
| GEN_TWOVEC_TEST(sha256su0_q_q, "sha256su0.32 q11, q10", 11, 10) |
| GEN_THREEVEC_TEST(sha256su1_q_q_q, "sha256su1.32 q12, q11, q10", 12, 11, 10) |
| |
| // This is a bit complex. This really mentions three registers, so it |
| // should really be a THREEVEC variant. But the two source registers |
| // are D registers. So we say it is just a TWOVEC insn, producing a Q |
| // and taking a single Q (q7); q7 is the d14-d15 register pair, which |
| // is why the insn itself is mentions d14 and d15 whereas the |
| // numbers that follow mention q7. The result (q7) is 128 bits wide and |
| // so is unaffected by these shenanigans. |
| GEN_TWOVEC_TEST(pmull_q_d_d, "vmull.p64 q13, d14, d15", 13, 7) |
| |
| int main ( void ) |
| { |
| // ======================== CRYPTO ======================== |
| |
| // aesd.8 q_q (aes single round decryption) |
| // aese.8 q_q (aes single round encryption) |
| // aesimc.8 q_q (aes inverse mix columns) |
| // aesmc.8 q_q (aes mix columns) |
| if (1) DO50( test_aesd_q_q(TyNONE) ); |
| if (1) DO50( test_aese_q_q(TyNONE) ); |
| if (1) DO50( test_aesimc_q_q(TyNONE) ); |
| if (1) DO50( test_aesmc_q_q(TyNONE) ); |
| |
| // sha1c.32 q_q_q |
| // sha1h.32 q_q |
| // sha1m.32 q_q_q |
| // sha1p.32 q_q_q |
| // sha1su0.32 q_q_q |
| // sha1su1.32 q_q |
| if (1) DO50( test_sha1c_q_q_q(TyNONE) ); |
| if (1) DO50( test_sha1h_q_q(TyNONE) ); |
| if (1) DO50( test_sha1m_q_q_q(TyNONE) ); |
| if (1) DO50( test_sha1p_q_q_q(TyNONE) ); |
| if (1) DO50( test_sha1su0_q_q_q(TyNONE) ); |
| if (1) DO50( test_sha1su1_q_q(TyNONE) ); |
| |
| // sha256h2.32 q_q_q |
| // sha256h.32 q_q_q |
| // sha256su0.32 q_q |
| // sha256su1.32 q_q_q |
| if (1) DO50( test_sha256h2_q_q_q(TyNONE) ); |
| if (1) DO50( test_sha256h_q_q_q(TyNONE) ); |
| if (1) DO50( test_sha256su0_q_q(TyNONE) ); |
| if (1) DO50( test_sha256su1_q_q_q(TyNONE) ); |
| |
| // vmull.64 q_d_d |
| if (1) DO50( test_pmull_q_d_d(TyD) ); |
| |
| return 0; |
| } |