Ard Biesheuvel | fdd2389 | 2014-03-26 20:53:05 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Accelerated GHASH implementation with ARMv8 PMULL instructions. |
| 3 | * |
| 4 | * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org> |
| 5 | * |
Ard Biesheuvel | fdd2389 | 2014-03-26 20:53:05 +0100 | [diff] [blame] | 6 | * This program is free software; you can redistribute it and/or modify it |
| 7 | * under the terms of the GNU General Public License version 2 as published |
| 8 | * by the Free Software Foundation. |
| 9 | */ |
| 10 | |
| 11 | #include <linux/linkage.h> |
| 12 | #include <asm/assembler.h> |
| 13 | |
Ard Biesheuvel | b913a64 | 2014-06-16 11:02:16 +0100 | [diff] [blame] | 14 | SHASH .req v0 |
| 15 | SHASH2 .req v1 |
Ard Biesheuvel | fdd2389 | 2014-03-26 20:53:05 +0100 | [diff] [blame] | 16 | T1 .req v2 |
| 17 | T2 .req v3 |
Ard Biesheuvel | b913a64 | 2014-06-16 11:02:16 +0100 | [diff] [blame] | 18 | MASK .req v4 |
| 19 | XL .req v5 |
| 20 | XM .req v6 |
| 21 | XH .req v7 |
| 22 | IN1 .req v7 |
Ard Biesheuvel | fdd2389 | 2014-03-26 20:53:05 +0100 | [diff] [blame] | 23 | |
| 24 | .text |
| 25 | .arch armv8-a+crypto |
| 26 | |
| 27 | /* |
| 28 | * void pmull_ghash_update(int blocks, u64 dg[], const char *src, |
| 29 | * struct ghash_key const *k, const char *head) |
| 30 | */ |
| 31 | ENTRY(pmull_ghash_update) |
Ard Biesheuvel | fdd2389 | 2014-03-26 20:53:05 +0100 | [diff] [blame] | 32 | ld1 {SHASH.16b}, [x3] |
Ard Biesheuvel | b913a64 | 2014-06-16 11:02:16 +0100 | [diff] [blame] | 33 | ld1 {XL.16b}, [x1] |
| 34 | movi MASK.16b, #0xe1 |
| 35 | ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 |
| 36 | shl MASK.2d, MASK.2d, #57 |
| 37 | eor SHASH2.16b, SHASH2.16b, SHASH.16b |
Ard Biesheuvel | fdd2389 | 2014-03-26 20:53:05 +0100 | [diff] [blame] | 38 | |
| 39 | /* do the head block first, if supplied */ |
| 40 | cbz x4, 0f |
Ard Biesheuvel | b913a64 | 2014-06-16 11:02:16 +0100 | [diff] [blame] | 41 | ld1 {T1.2d}, [x4] |
Ard Biesheuvel | fdd2389 | 2014-03-26 20:53:05 +0100 | [diff] [blame] | 42 | b 1f |
| 43 | |
Ard Biesheuvel | b913a64 | 2014-06-16 11:02:16 +0100 | [diff] [blame] | 44 | 0: ld1 {T1.2d}, [x2], #16 |
Ard Biesheuvel | fdd2389 | 2014-03-26 20:53:05 +0100 | [diff] [blame] | 45 | sub w0, w0, #1 |
Ard Biesheuvel | fdd2389 | 2014-03-26 20:53:05 +0100 | [diff] [blame] | 46 | |
Ard Biesheuvel | b913a64 | 2014-06-16 11:02:16 +0100 | [diff] [blame] | 47 | 1: /* multiply XL by SHASH in GF(2^128) */ |
| 48 | CPU_LE( rev64 T1.16b, T1.16b ) |
Ard Biesheuvel | fdd2389 | 2014-03-26 20:53:05 +0100 | [diff] [blame] | 49 | |
Ard Biesheuvel | b913a64 | 2014-06-16 11:02:16 +0100 | [diff] [blame] | 50 | ext T2.16b, XL.16b, XL.16b, #8 |
| 51 | ext IN1.16b, T1.16b, T1.16b, #8 |
Ard Biesheuvel | fdd2389 | 2014-03-26 20:53:05 +0100 | [diff] [blame] | 52 | eor T1.16b, T1.16b, T2.16b |
Ard Biesheuvel | b913a64 | 2014-06-16 11:02:16 +0100 | [diff] [blame] | 53 | eor XL.16b, XL.16b, IN1.16b |
| 54 | |
| 55 | pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1 |
| 56 | eor T1.16b, T1.16b, XL.16b |
| 57 | pmull XL.1q, SHASH.1d, XL.1d // a0 * b0 |
| 58 | pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0) |
| 59 | |
| 60 | ext T1.16b, XL.16b, XH.16b, #8 |
| 61 | eor T2.16b, XL.16b, XH.16b |
| 62 | eor XM.16b, XM.16b, T1.16b |
| 63 | eor XM.16b, XM.16b, T2.16b |
| 64 | pmull T2.1q, XL.1d, MASK.1d |
| 65 | |
| 66 | mov XH.d[0], XM.d[1] |
| 67 | mov XM.d[1], XL.d[0] |
| 68 | |
| 69 | eor XL.16b, XM.16b, T2.16b |
| 70 | ext T2.16b, XL.16b, XL.16b, #8 |
| 71 | pmull XL.1q, XL.1d, MASK.1d |
| 72 | eor T2.16b, T2.16b, XH.16b |
| 73 | eor XL.16b, XL.16b, T2.16b |
Ard Biesheuvel | fdd2389 | 2014-03-26 20:53:05 +0100 | [diff] [blame] | 74 | |
| 75 | cbnz w0, 0b |
| 76 | |
Ard Biesheuvel | b913a64 | 2014-06-16 11:02:16 +0100 | [diff] [blame] | 77 | st1 {XL.16b}, [x1] |
Ard Biesheuvel | fdd2389 | 2014-03-26 20:53:05 +0100 | [diff] [blame] | 78 | ret |
| 79 | ENDPROC(pmull_ghash_update) |