| Thomas Gleixner | d2912cb | 2019-06-04 10:11:33 +0200 | [diff] [blame^] | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ | 
| Ondrej Mosnacek | 1d373d4 | 2018-05-11 14:12:51 +0200 | [diff] [blame] | 2 | /* | 
 | 3 |  * AES-NI + SSE2 implementation of AEGIS-128L | 
 | 4 |  * | 
 | 5 |  * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> | 
 | 6 |  * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. | 
| Ondrej Mosnacek | 1d373d4 | 2018-05-11 14:12:51 +0200 | [diff] [blame] | 7 |  */ | 
 | 8 |  | 
 | 9 | #include <linux/linkage.h> | 
 | 10 | #include <asm/frame.h> | 
 | 11 |  | 
 | 12 | #define STATE0	%xmm0 | 
 | 13 | #define STATE1	%xmm1 | 
 | 14 | #define STATE2	%xmm2 | 
 | 15 | #define STATE3	%xmm3 | 
 | 16 | #define STATE4	%xmm4 | 
 | 17 | #define STATE5	%xmm5 | 
 | 18 | #define MSG	%xmm6 | 
 | 19 | #define T0	%xmm7 | 
 | 20 | #define T1	%xmm8 | 
 | 21 | #define T2	%xmm9 | 
 | 22 | #define T3	%xmm10 | 
 | 23 |  | 
 | 24 | #define STATEP	%rdi | 
 | 25 | #define LEN	%rsi | 
 | 26 | #define SRC	%rdx | 
 | 27 | #define DST	%rcx | 
 | 28 |  | 
 | 29 | .section .rodata.cst16.aegis256_const, "aM", @progbits, 32 | 
 | 30 | .align 16 | 
 | 31 | .Laegis256_const_0: | 
 | 32 | 	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d | 
 | 33 | 	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 | 
 | 34 | .Laegis256_const_1: | 
 | 35 | 	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 | 
 | 36 | 	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd | 
 | 37 |  | 
 | 38 | .section .rodata.cst16.aegis256_counter, "aM", @progbits, 16 | 
 | 39 | .align 16 | 
 | 40 | .Laegis256_counter: | 
 | 41 | 	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 | 
 | 42 | 	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f | 
 | 43 |  | 
 | 44 | .text | 
 | 45 |  | 
 | 46 | /* | 
 | 47 |  * __load_partial: internal ABI | 
 | 48 |  * input: | 
 | 49 |  *   LEN - bytes | 
 | 50 |  *   SRC - src | 
 | 51 |  * output: | 
 | 52 |  *   MSG  - message block | 
 | 53 |  * changed: | 
 | 54 |  *   T0 | 
 | 55 |  *   %r8 | 
 | 56 |  *   %r9 | 
 | 57 |  */ | 
 | 58 | __load_partial: | 
| Jan Beulich | a7bea83 | 2018-07-02 04:31:54 -0600 | [diff] [blame] | 59 | 	xor %r9d, %r9d | 
| Ondrej Mosnacek | 1d373d4 | 2018-05-11 14:12:51 +0200 | [diff] [blame] | 60 | 	pxor MSG, MSG | 
 | 61 |  | 
 | 62 | 	mov LEN, %r8 | 
 | 63 | 	and $0x1, %r8 | 
 | 64 | 	jz .Lld_partial_1 | 
 | 65 |  | 
 | 66 | 	mov LEN, %r8 | 
 | 67 | 	and $0x1E, %r8 | 
 | 68 | 	add SRC, %r8 | 
 | 69 | 	mov (%r8), %r9b | 
 | 70 |  | 
 | 71 | .Lld_partial_1: | 
 | 72 | 	mov LEN, %r8 | 
 | 73 | 	and $0x2, %r8 | 
 | 74 | 	jz .Lld_partial_2 | 
 | 75 |  | 
 | 76 | 	mov LEN, %r8 | 
 | 77 | 	and $0x1C, %r8 | 
 | 78 | 	add SRC, %r8 | 
 | 79 | 	shl $0x10, %r9 | 
 | 80 | 	mov (%r8), %r9w | 
 | 81 |  | 
 | 82 | .Lld_partial_2: | 
 | 83 | 	mov LEN, %r8 | 
 | 84 | 	and $0x4, %r8 | 
 | 85 | 	jz .Lld_partial_4 | 
 | 86 |  | 
 | 87 | 	mov LEN, %r8 | 
 | 88 | 	and $0x18, %r8 | 
 | 89 | 	add SRC, %r8 | 
 | 90 | 	shl $32, %r9 | 
 | 91 | 	mov (%r8), %r8d | 
 | 92 | 	xor %r8, %r9 | 
 | 93 |  | 
 | 94 | .Lld_partial_4: | 
 | 95 | 	movq %r9, MSG | 
 | 96 |  | 
 | 97 | 	mov LEN, %r8 | 
 | 98 | 	and $0x8, %r8 | 
 | 99 | 	jz .Lld_partial_8 | 
 | 100 |  | 
 | 101 | 	mov LEN, %r8 | 
 | 102 | 	and $0x10, %r8 | 
 | 103 | 	add SRC, %r8 | 
 | 104 | 	pslldq $8, MSG | 
 | 105 | 	movq (%r8), T0 | 
 | 106 | 	pxor T0, MSG | 
 | 107 |  | 
 | 108 | .Lld_partial_8: | 
 | 109 | 	ret | 
 | 110 | ENDPROC(__load_partial) | 
 | 111 |  | 
 | 112 | /* | 
 | 113 |  * __store_partial: internal ABI | 
 | 114 |  * input: | 
 | 115 |  *   LEN - bytes | 
 | 116 |  *   DST - dst | 
 | 117 |  * output: | 
 | 118 |  *   T0   - message block | 
 | 119 |  * changed: | 
 | 120 |  *   %r8 | 
 | 121 |  *   %r9 | 
 | 122 |  *   %r10 | 
 | 123 |  */ | 
 | 124 | __store_partial: | 
 | 125 | 	mov LEN, %r8 | 
 | 126 | 	mov DST, %r9 | 
 | 127 |  | 
 | 128 | 	movq T0, %r10 | 
 | 129 |  | 
 | 130 | 	cmp $8, %r8 | 
 | 131 | 	jl .Lst_partial_8 | 
 | 132 |  | 
 | 133 | 	mov %r10, (%r9) | 
 | 134 | 	psrldq $8, T0 | 
 | 135 | 	movq T0, %r10 | 
 | 136 |  | 
 | 137 | 	sub $8, %r8 | 
 | 138 | 	add $8, %r9 | 
 | 139 |  | 
 | 140 | .Lst_partial_8: | 
 | 141 | 	cmp $4, %r8 | 
 | 142 | 	jl .Lst_partial_4 | 
 | 143 |  | 
 | 144 | 	mov %r10d, (%r9) | 
 | 145 | 	shr $32, %r10 | 
 | 146 |  | 
 | 147 | 	sub $4, %r8 | 
 | 148 | 	add $4, %r9 | 
 | 149 |  | 
 | 150 | .Lst_partial_4: | 
 | 151 | 	cmp $2, %r8 | 
 | 152 | 	jl .Lst_partial_2 | 
 | 153 |  | 
 | 154 | 	mov %r10w, (%r9) | 
 | 155 | 	shr $0x10, %r10 | 
 | 156 |  | 
 | 157 | 	sub $2, %r8 | 
 | 158 | 	add $2, %r9 | 
 | 159 |  | 
 | 160 | .Lst_partial_2: | 
 | 161 | 	cmp $1, %r8 | 
 | 162 | 	jl .Lst_partial_1 | 
 | 163 |  | 
 | 164 | 	mov %r10b, (%r9) | 
 | 165 |  | 
 | 166 | .Lst_partial_1: | 
 | 167 | 	ret | 
 | 168 | ENDPROC(__store_partial) | 
 | 169 |  | 
 | 170 | .macro update | 
 | 171 | 	movdqa STATE5, T0 | 
 | 172 | 	aesenc STATE0, STATE5 | 
 | 173 | 	aesenc STATE1, STATE0 | 
 | 174 | 	aesenc STATE2, STATE1 | 
 | 175 | 	aesenc STATE3, STATE2 | 
 | 176 | 	aesenc STATE4, STATE3 | 
 | 177 | 	aesenc T0,     STATE4 | 
 | 178 | .endm | 
 | 179 |  | 
 | 180 | .macro update0 m | 
 | 181 | 	update | 
 | 182 | 	pxor \m, STATE5 | 
 | 183 | .endm | 
 | 184 |  | 
 | 185 | .macro update1 m | 
 | 186 | 	update | 
 | 187 | 	pxor \m, STATE4 | 
 | 188 | .endm | 
 | 189 |  | 
 | 190 | .macro update2 m | 
 | 191 | 	update | 
 | 192 | 	pxor \m, STATE3 | 
 | 193 | .endm | 
 | 194 |  | 
 | 195 | .macro update3 m | 
 | 196 | 	update | 
 | 197 | 	pxor \m, STATE2 | 
 | 198 | .endm | 
 | 199 |  | 
 | 200 | .macro update4 m | 
 | 201 | 	update | 
 | 202 | 	pxor \m, STATE1 | 
 | 203 | .endm | 
 | 204 |  | 
 | 205 | .macro update5 m | 
 | 206 | 	update | 
 | 207 | 	pxor \m, STATE0 | 
 | 208 | .endm | 
 | 209 |  | 
 | 210 | .macro state_load | 
 | 211 | 	movdqu 0x00(STATEP), STATE0 | 
 | 212 | 	movdqu 0x10(STATEP), STATE1 | 
 | 213 | 	movdqu 0x20(STATEP), STATE2 | 
 | 214 | 	movdqu 0x30(STATEP), STATE3 | 
 | 215 | 	movdqu 0x40(STATEP), STATE4 | 
 | 216 | 	movdqu 0x50(STATEP), STATE5 | 
 | 217 | .endm | 
 | 218 |  | 
 | 219 | .macro state_store s0 s1 s2 s3 s4 s5 | 
 | 220 | 	movdqu \s5, 0x00(STATEP) | 
 | 221 | 	movdqu \s0, 0x10(STATEP) | 
 | 222 | 	movdqu \s1, 0x20(STATEP) | 
 | 223 | 	movdqu \s2, 0x30(STATEP) | 
 | 224 | 	movdqu \s3, 0x40(STATEP) | 
 | 225 | 	movdqu \s4, 0x50(STATEP) | 
 | 226 | .endm | 
 | 227 |  | 
 | 228 | .macro state_store0 | 
 | 229 | 	state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 | 
 | 230 | .endm | 
 | 231 |  | 
 | 232 | .macro state_store1 | 
 | 233 | 	state_store STATE5 STATE0 STATE1 STATE2 STATE3 STATE4 | 
 | 234 | .endm | 
 | 235 |  | 
 | 236 | .macro state_store2 | 
 | 237 | 	state_store STATE4 STATE5 STATE0 STATE1 STATE2 STATE3 | 
 | 238 | .endm | 
 | 239 |  | 
 | 240 | .macro state_store3 | 
 | 241 | 	state_store STATE3 STATE4 STATE5 STATE0 STATE1 STATE2 | 
 | 242 | .endm | 
 | 243 |  | 
 | 244 | .macro state_store4 | 
 | 245 | 	state_store STATE2 STATE3 STATE4 STATE5 STATE0 STATE1 | 
 | 246 | .endm | 
 | 247 |  | 
 | 248 | .macro state_store5 | 
 | 249 | 	state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE0 | 
 | 250 | .endm | 
 | 251 |  | 
 | 252 | /* | 
 | 253 |  * void crypto_aegis256_aesni_init(void *state, const void *key, const void *iv); | 
 | 254 |  */ | 
 | 255 | ENTRY(crypto_aegis256_aesni_init) | 
 | 256 | 	FRAME_BEGIN | 
 | 257 |  | 
 | 258 | 	/* load key: */ | 
 | 259 | 	movdqa 0x00(%rsi), MSG | 
 | 260 | 	movdqa 0x10(%rsi), T1 | 
 | 261 | 	movdqa MSG, STATE4 | 
 | 262 | 	movdqa T1, STATE5 | 
 | 263 |  | 
 | 264 | 	/* load IV: */ | 
 | 265 | 	movdqu 0x00(%rdx), T2 | 
 | 266 | 	movdqu 0x10(%rdx), T3 | 
 | 267 | 	pxor MSG, T2 | 
 | 268 | 	pxor T1, T3 | 
 | 269 | 	movdqa T2, STATE0 | 
 | 270 | 	movdqa T3, STATE1 | 
 | 271 |  | 
 | 272 | 	/* load the constants: */ | 
 | 273 | 	movdqa .Laegis256_const_0, STATE3 | 
 | 274 | 	movdqa .Laegis256_const_1, STATE2 | 
 | 275 | 	pxor STATE3, STATE4 | 
 | 276 | 	pxor STATE2, STATE5 | 
 | 277 |  | 
 | 278 | 	/* update 10 times with IV and KEY: */ | 
 | 279 | 	update0 MSG | 
 | 280 | 	update1 T1 | 
 | 281 | 	update2 T2 | 
 | 282 | 	update3 T3 | 
 | 283 | 	update4 MSG | 
 | 284 | 	update5 T1 | 
 | 285 | 	update0 T2 | 
 | 286 | 	update1 T3 | 
 | 287 | 	update2 MSG | 
 | 288 | 	update3 T1 | 
 | 289 | 	update4 T2 | 
 | 290 | 	update5 T3 | 
 | 291 | 	update0 MSG | 
 | 292 | 	update1 T1 | 
 | 293 | 	update2 T2 | 
 | 294 | 	update3 T3 | 
 | 295 |  | 
 | 296 | 	state_store3 | 
 | 297 |  | 
 | 298 | 	FRAME_END | 
 | 299 | 	ret | 
 | 300 | ENDPROC(crypto_aegis256_aesni_init) | 
 | 301 |  | 
 | 302 | .macro ad_block a i | 
 | 303 | 	movdq\a (\i * 0x10)(SRC), MSG | 
 | 304 | 	update\i MSG | 
 | 305 | 	sub $0x10, LEN | 
 | 306 | 	cmp $0x10, LEN | 
 | 307 | 	jl .Lad_out_\i | 
 | 308 | .endm | 
 | 309 |  | 
 | 310 | /* | 
 | 311 |  * void crypto_aegis256_aesni_ad(void *state, unsigned int length, | 
 | 312 |  *                               const void *data); | 
 | 313 |  */ | 
 | 314 | ENTRY(crypto_aegis256_aesni_ad) | 
 | 315 | 	FRAME_BEGIN | 
 | 316 |  | 
 | 317 | 	cmp $0x10, LEN | 
 | 318 | 	jb .Lad_out | 
 | 319 |  | 
 | 320 | 	state_load | 
 | 321 |  | 
 | 322 | 	mov  SRC, %r8 | 
 | 323 | 	and $0xf, %r8 | 
 | 324 | 	jnz .Lad_u_loop | 
 | 325 |  | 
 | 326 | .align 8 | 
 | 327 | .Lad_a_loop: | 
 | 328 | 	ad_block a 0 | 
 | 329 | 	ad_block a 1 | 
 | 330 | 	ad_block a 2 | 
 | 331 | 	ad_block a 3 | 
 | 332 | 	ad_block a 4 | 
 | 333 | 	ad_block a 5 | 
 | 334 |  | 
 | 335 | 	add $0x60, SRC | 
 | 336 | 	jmp .Lad_a_loop | 
 | 337 |  | 
 | 338 | .align 8 | 
 | 339 | .Lad_u_loop: | 
 | 340 | 	ad_block u 0 | 
 | 341 | 	ad_block u 1 | 
 | 342 | 	ad_block u 2 | 
 | 343 | 	ad_block u 3 | 
 | 344 | 	ad_block u 4 | 
 | 345 | 	ad_block u 5 | 
 | 346 |  | 
 | 347 | 	add $0x60, SRC | 
 | 348 | 	jmp .Lad_u_loop | 
 | 349 |  | 
 | 350 | .Lad_out_0: | 
 | 351 | 	state_store0 | 
 | 352 | 	FRAME_END | 
 | 353 | 	ret | 
 | 354 |  | 
 | 355 | .Lad_out_1: | 
 | 356 | 	state_store1 | 
 | 357 | 	FRAME_END | 
 | 358 | 	ret | 
 | 359 |  | 
 | 360 | .Lad_out_2: | 
 | 361 | 	state_store2 | 
 | 362 | 	FRAME_END | 
 | 363 | 	ret | 
 | 364 |  | 
 | 365 | .Lad_out_3: | 
 | 366 | 	state_store3 | 
 | 367 | 	FRAME_END | 
 | 368 | 	ret | 
 | 369 |  | 
 | 370 | .Lad_out_4: | 
 | 371 | 	state_store4 | 
 | 372 | 	FRAME_END | 
 | 373 | 	ret | 
 | 374 |  | 
 | 375 | .Lad_out_5: | 
 | 376 | 	state_store5 | 
 | 377 | 	FRAME_END | 
 | 378 | 	ret | 
 | 379 |  | 
 | 380 | .Lad_out: | 
 | 381 | 	FRAME_END | 
 | 382 | 	ret | 
 | 383 | ENDPROC(crypto_aegis256_aesni_ad) | 
 | 384 |  | 
 | 385 | .macro crypt m s0 s1 s2 s3 s4 s5 | 
 | 386 | 	pxor \s1, \m | 
 | 387 | 	pxor \s4, \m | 
 | 388 | 	pxor \s5, \m | 
 | 389 | 	movdqa \s2, T3 | 
 | 390 | 	pand \s3, T3 | 
 | 391 | 	pxor T3, \m | 
 | 392 | .endm | 
 | 393 |  | 
 | 394 | .macro crypt0 m | 
 | 395 | 	crypt \m STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 | 
 | 396 | .endm | 
 | 397 |  | 
 | 398 | .macro crypt1 m | 
 | 399 | 	crypt \m STATE5 STATE0 STATE1 STATE2 STATE3 STATE4 | 
 | 400 | .endm | 
 | 401 |  | 
 | 402 | .macro crypt2 m | 
 | 403 | 	crypt \m STATE4 STATE5 STATE0 STATE1 STATE2 STATE3 | 
 | 404 | .endm | 
 | 405 |  | 
 | 406 | .macro crypt3 m | 
 | 407 | 	crypt \m STATE3 STATE4 STATE5 STATE0 STATE1 STATE2 | 
 | 408 | .endm | 
 | 409 |  | 
 | 410 | .macro crypt4 m | 
 | 411 | 	crypt \m STATE2 STATE3 STATE4 STATE5 STATE0 STATE1 | 
 | 412 | .endm | 
 | 413 |  | 
 | 414 | .macro crypt5 m | 
 | 415 | 	crypt \m STATE1 STATE2 STATE3 STATE4 STATE5 STATE0 | 
 | 416 | .endm | 
 | 417 |  | 
 | 418 | .macro encrypt_block a i | 
 | 419 | 	movdq\a (\i * 0x10)(SRC), MSG | 
 | 420 | 	movdqa MSG, T0 | 
 | 421 | 	crypt\i T0 | 
 | 422 | 	movdq\a T0, (\i * 0x10)(DST) | 
 | 423 |  | 
 | 424 | 	update\i MSG | 
 | 425 |  | 
 | 426 | 	sub $0x10, LEN | 
 | 427 | 	cmp $0x10, LEN | 
 | 428 | 	jl .Lenc_out_\i | 
 | 429 | .endm | 
 | 430 |  | 
 | 431 | .macro decrypt_block a i | 
 | 432 | 	movdq\a (\i * 0x10)(SRC), MSG | 
 | 433 | 	crypt\i MSG | 
 | 434 | 	movdq\a MSG, (\i * 0x10)(DST) | 
 | 435 |  | 
 | 436 | 	update\i MSG | 
 | 437 |  | 
 | 438 | 	sub $0x10, LEN | 
 | 439 | 	cmp $0x10, LEN | 
 | 440 | 	jl .Ldec_out_\i | 
 | 441 | .endm | 
 | 442 |  | 
 | 443 | /* | 
 | 444 |  * void crypto_aegis256_aesni_enc(void *state, unsigned int length, | 
 | 445 |  *                                const void *src, void *dst); | 
 | 446 |  */ | 
 | 447 | ENTRY(crypto_aegis256_aesni_enc) | 
 | 448 | 	FRAME_BEGIN | 
 | 449 |  | 
 | 450 | 	cmp $0x10, LEN | 
 | 451 | 	jb .Lenc_out | 
 | 452 |  | 
 | 453 | 	state_load | 
 | 454 |  | 
 | 455 | 	mov  SRC, %r8 | 
 | 456 | 	or   DST, %r8 | 
 | 457 | 	and $0xf, %r8 | 
 | 458 | 	jnz .Lenc_u_loop | 
 | 459 |  | 
 | 460 | .align 8 | 
 | 461 | .Lenc_a_loop: | 
 | 462 | 	encrypt_block a 0 | 
 | 463 | 	encrypt_block a 1 | 
 | 464 | 	encrypt_block a 2 | 
 | 465 | 	encrypt_block a 3 | 
 | 466 | 	encrypt_block a 4 | 
 | 467 | 	encrypt_block a 5 | 
 | 468 |  | 
 | 469 | 	add $0x60, SRC | 
 | 470 | 	add $0x60, DST | 
 | 471 | 	jmp .Lenc_a_loop | 
 | 472 |  | 
 | 473 | .align 8 | 
 | 474 | .Lenc_u_loop: | 
 | 475 | 	encrypt_block u 0 | 
 | 476 | 	encrypt_block u 1 | 
 | 477 | 	encrypt_block u 2 | 
 | 478 | 	encrypt_block u 3 | 
 | 479 | 	encrypt_block u 4 | 
 | 480 | 	encrypt_block u 5 | 
 | 481 |  | 
 | 482 | 	add $0x60, SRC | 
 | 483 | 	add $0x60, DST | 
 | 484 | 	jmp .Lenc_u_loop | 
 | 485 |  | 
 | 486 | .Lenc_out_0: | 
 | 487 | 	state_store0 | 
 | 488 | 	FRAME_END | 
 | 489 | 	ret | 
 | 490 |  | 
 | 491 | .Lenc_out_1: | 
 | 492 | 	state_store1 | 
 | 493 | 	FRAME_END | 
 | 494 | 	ret | 
 | 495 |  | 
 | 496 | .Lenc_out_2: | 
 | 497 | 	state_store2 | 
 | 498 | 	FRAME_END | 
 | 499 | 	ret | 
 | 500 |  | 
 | 501 | .Lenc_out_3: | 
 | 502 | 	state_store3 | 
 | 503 | 	FRAME_END | 
 | 504 | 	ret | 
 | 505 |  | 
 | 506 | .Lenc_out_4: | 
 | 507 | 	state_store4 | 
 | 508 | 	FRAME_END | 
 | 509 | 	ret | 
 | 510 |  | 
 | 511 | .Lenc_out_5: | 
 | 512 | 	state_store5 | 
 | 513 | 	FRAME_END | 
 | 514 | 	ret | 
 | 515 |  | 
 | 516 | .Lenc_out: | 
 | 517 | 	FRAME_END | 
 | 518 | 	ret | 
 | 519 | ENDPROC(crypto_aegis256_aesni_enc) | 
 | 520 |  | 
 | 521 | /* | 
 | 522 |  * void crypto_aegis256_aesni_enc_tail(void *state, unsigned int length, | 
 | 523 |  *                                     const void *src, void *dst); | 
 | 524 |  */ | 
 | 525 | ENTRY(crypto_aegis256_aesni_enc_tail) | 
 | 526 | 	FRAME_BEGIN | 
 | 527 |  | 
 | 528 | 	state_load | 
 | 529 |  | 
 | 530 | 	/* encrypt message: */ | 
 | 531 | 	call __load_partial | 
 | 532 |  | 
 | 533 | 	movdqa MSG, T0 | 
 | 534 | 	crypt0 T0 | 
 | 535 |  | 
 | 536 | 	call __store_partial | 
 | 537 |  | 
 | 538 | 	update0 MSG | 
 | 539 |  | 
 | 540 | 	state_store0 | 
 | 541 |  | 
 | 542 | 	FRAME_END | 
| Borislav Petkov | 221e00d | 2018-06-23 12:36:22 +0200 | [diff] [blame] | 543 | 	ret | 
| Ondrej Mosnacek | 1d373d4 | 2018-05-11 14:12:51 +0200 | [diff] [blame] | 544 | ENDPROC(crypto_aegis256_aesni_enc_tail) | 
 | 545 |  | 
 | 546 | /* | 
 | 547 |  * void crypto_aegis256_aesni_dec(void *state, unsigned int length, | 
 | 548 |  *                                const void *src, void *dst); | 
 | 549 |  */ | 
 | 550 | ENTRY(crypto_aegis256_aesni_dec) | 
 | 551 | 	FRAME_BEGIN | 
 | 552 |  | 
 | 553 | 	cmp $0x10, LEN | 
 | 554 | 	jb .Ldec_out | 
 | 555 |  | 
 | 556 | 	state_load | 
 | 557 |  | 
 | 558 | 	mov  SRC, %r8 | 
 | 559 | 	or   DST, %r8 | 
 | 560 | 	and $0xF, %r8 | 
 | 561 | 	jnz .Ldec_u_loop | 
 | 562 |  | 
 | 563 | .align 8 | 
 | 564 | .Ldec_a_loop: | 
 | 565 | 	decrypt_block a 0 | 
 | 566 | 	decrypt_block a 1 | 
 | 567 | 	decrypt_block a 2 | 
 | 568 | 	decrypt_block a 3 | 
 | 569 | 	decrypt_block a 4 | 
 | 570 | 	decrypt_block a 5 | 
 | 571 |  | 
 | 572 | 	add $0x60, SRC | 
 | 573 | 	add $0x60, DST | 
 | 574 | 	jmp .Ldec_a_loop | 
 | 575 |  | 
 | 576 | .align 8 | 
 | 577 | .Ldec_u_loop: | 
 | 578 | 	decrypt_block u 0 | 
 | 579 | 	decrypt_block u 1 | 
 | 580 | 	decrypt_block u 2 | 
 | 581 | 	decrypt_block u 3 | 
 | 582 | 	decrypt_block u 4 | 
 | 583 | 	decrypt_block u 5 | 
 | 584 |  | 
 | 585 | 	add $0x60, SRC | 
 | 586 | 	add $0x60, DST | 
 | 587 | 	jmp .Ldec_u_loop | 
 | 588 |  | 
 | 589 | .Ldec_out_0: | 
 | 590 | 	state_store0 | 
 | 591 | 	FRAME_END | 
 | 592 | 	ret | 
 | 593 |  | 
 | 594 | .Ldec_out_1: | 
 | 595 | 	state_store1 | 
 | 596 | 	FRAME_END | 
 | 597 | 	ret | 
 | 598 |  | 
 | 599 | .Ldec_out_2: | 
 | 600 | 	state_store2 | 
 | 601 | 	FRAME_END | 
 | 602 | 	ret | 
 | 603 |  | 
 | 604 | .Ldec_out_3: | 
 | 605 | 	state_store3 | 
 | 606 | 	FRAME_END | 
 | 607 | 	ret | 
 | 608 |  | 
 | 609 | .Ldec_out_4: | 
 | 610 | 	state_store4 | 
 | 611 | 	FRAME_END | 
 | 612 | 	ret | 
 | 613 |  | 
 | 614 | .Ldec_out_5: | 
 | 615 | 	state_store5 | 
 | 616 | 	FRAME_END | 
 | 617 | 	ret | 
 | 618 |  | 
 | 619 | .Ldec_out: | 
 | 620 | 	FRAME_END | 
 | 621 | 	ret | 
 | 622 | ENDPROC(crypto_aegis256_aesni_dec) | 
 | 623 |  | 
 | 624 | /* | 
 | 625 |  * void crypto_aegis256_aesni_dec_tail(void *state, unsigned int length, | 
 | 626 |  *                                     const void *src, void *dst); | 
 | 627 |  */ | 
 | 628 | ENTRY(crypto_aegis256_aesni_dec_tail) | 
 | 629 | 	FRAME_BEGIN | 
 | 630 |  | 
 | 631 | 	state_load | 
 | 632 |  | 
 | 633 | 	/* decrypt message: */ | 
 | 634 | 	call __load_partial | 
 | 635 |  | 
 | 636 | 	crypt0 MSG | 
 | 637 |  | 
 | 638 | 	movdqa MSG, T0 | 
 | 639 | 	call __store_partial | 
 | 640 |  | 
 | 641 | 	/* mask with byte count: */ | 
 | 642 | 	movq LEN, T0 | 
 | 643 | 	punpcklbw T0, T0 | 
 | 644 | 	punpcklbw T0, T0 | 
 | 645 | 	punpcklbw T0, T0 | 
 | 646 | 	punpcklbw T0, T0 | 
 | 647 | 	movdqa .Laegis256_counter, T1 | 
 | 648 | 	pcmpgtb T1, T0 | 
 | 649 | 	pand T0, MSG | 
 | 650 |  | 
 | 651 | 	update0 MSG | 
 | 652 |  | 
 | 653 | 	state_store0 | 
 | 654 |  | 
 | 655 | 	FRAME_END | 
 | 656 | 	ret | 
 | 657 | ENDPROC(crypto_aegis256_aesni_dec_tail) | 
 | 658 |  | 
 | 659 | /* | 
 | 660 |  * void crypto_aegis256_aesni_final(void *state, void *tag_xor, | 
 | 661 |  *                                  u64 assoclen, u64 cryptlen); | 
 | 662 |  */ | 
 | 663 | ENTRY(crypto_aegis256_aesni_final) | 
 | 664 | 	FRAME_BEGIN | 
 | 665 |  | 
 | 666 | 	state_load | 
 | 667 |  | 
 | 668 | 	/* prepare length block: */ | 
 | 669 | 	movq %rdx, MSG | 
 | 670 | 	movq %rcx, T0 | 
 | 671 | 	pslldq $8, T0 | 
 | 672 | 	pxor T0, MSG | 
 | 673 | 	psllq $3, MSG /* multiply by 8 (to get bit count) */ | 
 | 674 |  | 
 | 675 | 	pxor STATE3, MSG | 
 | 676 |  | 
 | 677 | 	/* update state: */ | 
 | 678 | 	update0 MSG | 
 | 679 | 	update1 MSG | 
 | 680 | 	update2 MSG | 
 | 681 | 	update3 MSG | 
 | 682 | 	update4 MSG | 
 | 683 | 	update5 MSG | 
 | 684 | 	update0 MSG | 
 | 685 |  | 
 | 686 | 	/* xor tag: */ | 
 | 687 | 	movdqu (%rsi), MSG | 
 | 688 |  | 
 | 689 | 	pxor STATE0, MSG | 
 | 690 | 	pxor STATE1, MSG | 
 | 691 | 	pxor STATE2, MSG | 
 | 692 | 	pxor STATE3, MSG | 
 | 693 | 	pxor STATE4, MSG | 
 | 694 | 	pxor STATE5, MSG | 
 | 695 |  | 
 | 696 | 	movdqu MSG, (%rsi) | 
 | 697 |  | 
 | 698 | 	FRAME_END | 
 | 699 | 	ret | 
 | 700 | ENDPROC(crypto_aegis256_aesni_final) |