blob: 586f41aac361acb834a3d2370382354bc763754f [file] [log] [blame]
Huang Ying0e1227d2009-10-19 11:53:06 +09001/*
2 * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
3 * instructions. This file contains accelerated part of ghash
4 * implementation. More information about PCLMULQDQ can be found at:
5 *
6 * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
7 *
8 * Copyright (c) 2009 Intel Corp.
9 * Author: Huang Ying <ying.huang@intel.com>
10 * Vinodh Gopal
11 * Erdinc Ozturk
12 * Deniz Karakoyunlu
13 *
14 * This program is free software; you can redistribute it and/or modify it
15 * under the terms of the GNU General Public License version 2 as published
16 * by the Free Software Foundation.
17 */
18
19#include <linux/linkage.h>
Huang Ying564ec0e2009-11-23 19:55:22 +080020#include <asm/inst.h>
Huang Ying0e1227d2009-10-19 11:53:06 +090021
Jiri Kosina68ee8712009-11-23 20:19:47 +080022.data
23
Huang Ying0e1227d2009-10-19 11:53:06 +090024.align 16
25.Lbswap_mask:
26 .octa 0x000102030405060708090a0b0c0d0e0f
27.Lpoly:
28 .octa 0xc2000000000000000000000000000001
29.Ltwo_one:
30 .octa 0x00000001000000000000000000000001
31
32#define DATA %xmm0
33#define SHASH %xmm1
34#define T1 %xmm2
35#define T2 %xmm3
36#define T3 %xmm4
37#define BSWAP %xmm5
38#define IN1 %xmm6
39
40.text
41
42/*
43 * __clmul_gf128mul_ble: internal ABI
44 * input:
45 * DATA: operand1
46 * SHASH: operand2, hash_key << 1 mod poly
47 * output:
48 * DATA: operand1 * operand2 mod poly
49 * changed:
50 * T1
51 * T2
52 * T3
53 */
54__clmul_gf128mul_ble:
55 movaps DATA, T1
56 pshufd $0b01001110, DATA, T2
57 pshufd $0b01001110, SHASH, T3
58 pxor DATA, T2
59 pxor SHASH, T3
60
Huang Ying564ec0e2009-11-23 19:55:22 +080061 PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0
62 PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1
63 PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0)
Huang Ying0e1227d2009-10-19 11:53:06 +090064 pxor DATA, T2
65 pxor T1, T2 # T2 = a0 * b1 + a1 * b0
66
67 movaps T2, T3
68 pslldq $8, T3
69 psrldq $8, T2
70 pxor T3, DATA
71 pxor T2, T1 # <T1:DATA> is result of
72 # carry-less multiplication
73
74 # first phase of the reduction
75 movaps DATA, T3
76 psllq $1, T3
77 pxor DATA, T3
78 psllq $5, T3
79 pxor DATA, T3
80 psllq $57, T3
81 movaps T3, T2
82 pslldq $8, T2
83 psrldq $8, T3
84 pxor T2, DATA
85 pxor T3, T1
86
87 # second phase of the reduction
88 movaps DATA, T2
89 psrlq $5, T2
90 pxor DATA, T2
91 psrlq $1, T2
92 pxor DATA, T2
93 psrlq $1, T2
94 pxor T2, T1
95 pxor T1, DATA
96 ret
Jussi Kivilinnab05d3f32013-01-19 13:39:26 +020097ENDPROC(__clmul_gf128mul_ble)
Huang Ying0e1227d2009-10-19 11:53:06 +090098
99/* void clmul_ghash_mul(char *dst, const be128 *shash) */
100ENTRY(clmul_ghash_mul)
101 movups (%rdi), DATA
102 movups (%rsi), SHASH
103 movaps .Lbswap_mask, BSWAP
Huang Ying564ec0e2009-11-23 19:55:22 +0800104 PSHUFB_XMM BSWAP DATA
Huang Ying0e1227d2009-10-19 11:53:06 +0900105 call __clmul_gf128mul_ble
Huang Ying564ec0e2009-11-23 19:55:22 +0800106 PSHUFB_XMM BSWAP DATA
Huang Ying0e1227d2009-10-19 11:53:06 +0900107 movups DATA, (%rdi)
108 ret
Jussi Kivilinnab05d3f32013-01-19 13:39:26 +0200109ENDPROC(clmul_ghash_mul)
Huang Ying0e1227d2009-10-19 11:53:06 +0900110
111/*
112 * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
113 * const be128 *shash);
114 */
115ENTRY(clmul_ghash_update)
116 cmp $16, %rdx
117 jb .Lupdate_just_ret # check length
118 movaps .Lbswap_mask, BSWAP
119 movups (%rdi), DATA
120 movups (%rcx), SHASH
Huang Ying564ec0e2009-11-23 19:55:22 +0800121 PSHUFB_XMM BSWAP DATA
Huang Ying0e1227d2009-10-19 11:53:06 +0900122.align 4
123.Lupdate_loop:
124 movups (%rsi), IN1
Huang Ying564ec0e2009-11-23 19:55:22 +0800125 PSHUFB_XMM BSWAP IN1
Huang Ying0e1227d2009-10-19 11:53:06 +0900126 pxor IN1, DATA
127 call __clmul_gf128mul_ble
128 sub $16, %rdx
129 add $16, %rsi
130 cmp $16, %rdx
131 jge .Lupdate_loop
Huang Ying564ec0e2009-11-23 19:55:22 +0800132 PSHUFB_XMM BSWAP DATA
Huang Ying0e1227d2009-10-19 11:53:06 +0900133 movups DATA, (%rdi)
134.Lupdate_just_ret:
135 ret
Jussi Kivilinnab05d3f32013-01-19 13:39:26 +0200136ENDPROC(clmul_ghash_update)
Huang Ying0e1227d2009-10-19 11:53:06 +0900137
138/*
139 * void clmul_ghash_setkey(be128 *shash, const u8 *key);
140 *
141 * Calculate hash_key << 1 mod poly
142 */
143ENTRY(clmul_ghash_setkey)
144 movaps .Lbswap_mask, BSWAP
145 movups (%rsi), %xmm0
Huang Ying564ec0e2009-11-23 19:55:22 +0800146 PSHUFB_XMM BSWAP %xmm0
Huang Ying0e1227d2009-10-19 11:53:06 +0900147 movaps %xmm0, %xmm1
148 psllq $1, %xmm0
149 psrlq $63, %xmm1
150 movaps %xmm1, %xmm2
151 pslldq $8, %xmm1
152 psrldq $8, %xmm2
153 por %xmm1, %xmm0
154 # reduction
155 pshufd $0b00100100, %xmm2, %xmm1
156 pcmpeqd .Ltwo_one, %xmm1
157 pand .Lpoly, %xmm1
158 pxor %xmm1, %xmm0
159 movups %xmm0, (%rdi)
160 ret
Jussi Kivilinnab05d3f32013-01-19 13:39:26 +0200161ENDPROC(clmul_ghash_setkey)