| #include "crc32c.h" |
| |
| #define CRC32C3X8(ITR) \ |
| crc1 = __crc32cd(crc1, *((const uint64_t *)data + 42*1 + (ITR)));\ |
| crc2 = __crc32cd(crc2, *((const uint64_t *)data + 42*2 + (ITR)));\ |
| crc0 = __crc32cd(crc0, *((const uint64_t *)data + 42*0 + (ITR))); |
| |
| #define CRC32C7X3X8(ITR) do {\ |
| CRC32C3X8((ITR)*7+0) \ |
| CRC32C3X8((ITR)*7+1) \ |
| CRC32C3X8((ITR)*7+2) \ |
| CRC32C3X8((ITR)*7+3) \ |
| CRC32C3X8((ITR)*7+4) \ |
| CRC32C3X8((ITR)*7+5) \ |
| CRC32C3X8((ITR)*7+6) \ |
| } while(0) |
| |
| #ifndef HWCAP_CRC32 |
| #define HWCAP_CRC32 (1 << 7) |
| #endif /* HWCAP_CRC32 */ |
| |
| int crc32c_arm64_available = 0; |
| |
| #ifdef ARCH_HAVE_ARM64_CRC_CRYPTO |
| |
| #include <sys/auxv.h> |
| #include <arm_acle.h> |
| #include <arm_neon.h> |
| |
| static int crc32c_probed; |
| |
| /* |
| * Function to calculate reflected crc with PMULL Instruction |
| * crc done "by 3" for fixed input block size of 1024 bytes |
| */ |
| uint32_t crc32c_arm64(unsigned char const *data, unsigned long length) |
| { |
| signed long len = length; |
| uint32_t crc = ~0; |
| uint32_t crc0, crc1, crc2; |
| |
| /* Load two consts: K1 and K2 */ |
| const poly64_t k1 = 0xe417f38a, k2 = 0x8f158014; |
| uint64_t t0, t1; |
| |
| while ((len -= 1024) >= 0) { |
| /* Do first 8 bytes here for better pipelining */ |
| crc0 = __crc32cd(crc, *(const uint64_t *)data); |
| crc1 = 0; |
| crc2 = 0; |
| data += sizeof(uint64_t); |
| |
| /* Process block inline |
| Process crc0 last to avoid dependency with above */ |
| CRC32C7X3X8(0); |
| CRC32C7X3X8(1); |
| CRC32C7X3X8(2); |
| CRC32C7X3X8(3); |
| CRC32C7X3X8(4); |
| CRC32C7X3X8(5); |
| |
| data += 42*3*sizeof(uint64_t); |
| |
| /* Merge crc0 and crc1 into crc2 |
| crc1 multiply by K2 |
| crc0 multiply by K1 */ |
| |
| t1 = (uint64_t)vmull_p64(crc1, k2); |
| t0 = (uint64_t)vmull_p64(crc0, k1); |
| crc = __crc32cd(crc2, *(const uint64_t *)data); |
| crc1 = __crc32cd(0, t1); |
| crc ^= crc1; |
| crc0 = __crc32cd(0, t0); |
| crc ^= crc0; |
| |
| data += sizeof(uint64_t); |
| } |
| |
| if (!(len += 1024)) |
| return crc; |
| |
| while ((len -= sizeof(uint64_t)) >= 0) { |
| crc = __crc32cd(crc, *(const uint64_t *)data); |
| data += sizeof(uint64_t); |
| } |
| |
| /* The following is more efficient than the straight loop */ |
| if (len & sizeof(uint32_t)) { |
| crc = __crc32cw(crc, *(const uint32_t *)data); |
| data += sizeof(uint32_t); |
| } |
| if (len & sizeof(uint16_t)) { |
| crc = __crc32ch(crc, *(const uint16_t *)data); |
| data += sizeof(uint16_t); |
| } |
| if (len & sizeof(uint8_t)) { |
| crc = __crc32cb(crc, *(const uint8_t *)data); |
| } |
| |
| return crc; |
| } |
| |
| void crc32c_arm64_probe(void) |
| { |
| unsigned long hwcap; |
| |
| if (!crc32c_probed) { |
| hwcap = getauxval(AT_HWCAP); |
| if (hwcap & HWCAP_CRC32) |
| crc32c_arm64_available = 1; |
| crc32c_probed = 1; |
| } |
| } |
| |
| #endif /* ARCH_HAVE_ARM64_CRC_CRYPTO */ |