blob: 32aae83972680731a4f36f92b608344d15339d9f [file] [log] [blame]
Ondrej Mosnacek1d373d42018-05-11 14:12:51 +02001/*
2 * AES-NI + SSE2 implementation of AEGIS-128L
3 *
4 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
5 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published
9 * by the Free Software Foundation.
10 */
11
12#include <linux/linkage.h>
13#include <asm/frame.h>
14
15#define STATE0 %xmm0
16#define STATE1 %xmm1
17#define STATE2 %xmm2
18#define STATE3 %xmm3
19#define STATE4 %xmm4
20#define STATE5 %xmm5
21#define MSG %xmm6
22#define T0 %xmm7
23#define T1 %xmm8
24#define T2 %xmm9
25#define T3 %xmm10
26
27#define STATEP %rdi
28#define LEN %rsi
29#define SRC %rdx
30#define DST %rcx
31
32.section .rodata.cst16.aegis256_const, "aM", @progbits, 32
33.align 16
34.Laegis256_const_0:
35 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
36 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
37.Laegis256_const_1:
38 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
39 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
40
41.section .rodata.cst16.aegis256_counter, "aM", @progbits, 16
42.align 16
43.Laegis256_counter:
44 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
45 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
46
47.text
48
49/*
50 * __load_partial: internal ABI
51 * input:
52 * LEN - bytes
53 * SRC - src
54 * output:
55 * MSG - message block
56 * changed:
57 * T0
58 * %r8
59 * %r9
60 */
61__load_partial:
62 xor %r9, %r9
63 pxor MSG, MSG
64
65 mov LEN, %r8
66 and $0x1, %r8
67 jz .Lld_partial_1
68
69 mov LEN, %r8
70 and $0x1E, %r8
71 add SRC, %r8
72 mov (%r8), %r9b
73
74.Lld_partial_1:
75 mov LEN, %r8
76 and $0x2, %r8
77 jz .Lld_partial_2
78
79 mov LEN, %r8
80 and $0x1C, %r8
81 add SRC, %r8
82 shl $0x10, %r9
83 mov (%r8), %r9w
84
85.Lld_partial_2:
86 mov LEN, %r8
87 and $0x4, %r8
88 jz .Lld_partial_4
89
90 mov LEN, %r8
91 and $0x18, %r8
92 add SRC, %r8
93 shl $32, %r9
94 mov (%r8), %r8d
95 xor %r8, %r9
96
97.Lld_partial_4:
98 movq %r9, MSG
99
100 mov LEN, %r8
101 and $0x8, %r8
102 jz .Lld_partial_8
103
104 mov LEN, %r8
105 and $0x10, %r8
106 add SRC, %r8
107 pslldq $8, MSG
108 movq (%r8), T0
109 pxor T0, MSG
110
111.Lld_partial_8:
112 ret
113ENDPROC(__load_partial)
114
115/*
116 * __store_partial: internal ABI
117 * input:
118 * LEN - bytes
119 * DST - dst
120 * output:
121 * T0 - message block
122 * changed:
123 * %r8
124 * %r9
125 * %r10
126 */
127__store_partial:
128 mov LEN, %r8
129 mov DST, %r9
130
131 movq T0, %r10
132
133 cmp $8, %r8
134 jl .Lst_partial_8
135
136 mov %r10, (%r9)
137 psrldq $8, T0
138 movq T0, %r10
139
140 sub $8, %r8
141 add $8, %r9
142
143.Lst_partial_8:
144 cmp $4, %r8
145 jl .Lst_partial_4
146
147 mov %r10d, (%r9)
148 shr $32, %r10
149
150 sub $4, %r8
151 add $4, %r9
152
153.Lst_partial_4:
154 cmp $2, %r8
155 jl .Lst_partial_2
156
157 mov %r10w, (%r9)
158 shr $0x10, %r10
159
160 sub $2, %r8
161 add $2, %r9
162
163.Lst_partial_2:
164 cmp $1, %r8
165 jl .Lst_partial_1
166
167 mov %r10b, (%r9)
168
169.Lst_partial_1:
170 ret
171ENDPROC(__store_partial)
172
173.macro update
174 movdqa STATE5, T0
175 aesenc STATE0, STATE5
176 aesenc STATE1, STATE0
177 aesenc STATE2, STATE1
178 aesenc STATE3, STATE2
179 aesenc STATE4, STATE3
180 aesenc T0, STATE4
181.endm
182
183.macro update0 m
184 update
185 pxor \m, STATE5
186.endm
187
188.macro update1 m
189 update
190 pxor \m, STATE4
191.endm
192
193.macro update2 m
194 update
195 pxor \m, STATE3
196.endm
197
198.macro update3 m
199 update
200 pxor \m, STATE2
201.endm
202
203.macro update4 m
204 update
205 pxor \m, STATE1
206.endm
207
208.macro update5 m
209 update
210 pxor \m, STATE0
211.endm
212
213.macro state_load
214 movdqu 0x00(STATEP), STATE0
215 movdqu 0x10(STATEP), STATE1
216 movdqu 0x20(STATEP), STATE2
217 movdqu 0x30(STATEP), STATE3
218 movdqu 0x40(STATEP), STATE4
219 movdqu 0x50(STATEP), STATE5
220.endm
221
222.macro state_store s0 s1 s2 s3 s4 s5
223 movdqu \s5, 0x00(STATEP)
224 movdqu \s0, 0x10(STATEP)
225 movdqu \s1, 0x20(STATEP)
226 movdqu \s2, 0x30(STATEP)
227 movdqu \s3, 0x40(STATEP)
228 movdqu \s4, 0x50(STATEP)
229.endm
230
231.macro state_store0
232 state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
233.endm
234
235.macro state_store1
236 state_store STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
237.endm
238
239.macro state_store2
240 state_store STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
241.endm
242
243.macro state_store3
244 state_store STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
245.endm
246
247.macro state_store4
248 state_store STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
249.endm
250
251.macro state_store5
252 state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
253.endm
254
255/*
256 * void crypto_aegis256_aesni_init(void *state, const void *key, const void *iv);
257 */
258ENTRY(crypto_aegis256_aesni_init)
259 FRAME_BEGIN
260
261 /* load key: */
262 movdqa 0x00(%rsi), MSG
263 movdqa 0x10(%rsi), T1
264 movdqa MSG, STATE4
265 movdqa T1, STATE5
266
267 /* load IV: */
268 movdqu 0x00(%rdx), T2
269 movdqu 0x10(%rdx), T3
270 pxor MSG, T2
271 pxor T1, T3
272 movdqa T2, STATE0
273 movdqa T3, STATE1
274
275 /* load the constants: */
276 movdqa .Laegis256_const_0, STATE3
277 movdqa .Laegis256_const_1, STATE2
278 pxor STATE3, STATE4
279 pxor STATE2, STATE5
280
281 /* update 10 times with IV and KEY: */
282 update0 MSG
283 update1 T1
284 update2 T2
285 update3 T3
286 update4 MSG
287 update5 T1
288 update0 T2
289 update1 T3
290 update2 MSG
291 update3 T1
292 update4 T2
293 update5 T3
294 update0 MSG
295 update1 T1
296 update2 T2
297 update3 T3
298
299 state_store3
300
301 FRAME_END
302 ret
303ENDPROC(crypto_aegis256_aesni_init)
304
305.macro ad_block a i
306 movdq\a (\i * 0x10)(SRC), MSG
307 update\i MSG
308 sub $0x10, LEN
309 cmp $0x10, LEN
310 jl .Lad_out_\i
311.endm
312
313/*
314 * void crypto_aegis256_aesni_ad(void *state, unsigned int length,
315 * const void *data);
316 */
317ENTRY(crypto_aegis256_aesni_ad)
318 FRAME_BEGIN
319
320 cmp $0x10, LEN
321 jb .Lad_out
322
323 state_load
324
325 mov SRC, %r8
326 and $0xf, %r8
327 jnz .Lad_u_loop
328
329.align 8
330.Lad_a_loop:
331 ad_block a 0
332 ad_block a 1
333 ad_block a 2
334 ad_block a 3
335 ad_block a 4
336 ad_block a 5
337
338 add $0x60, SRC
339 jmp .Lad_a_loop
340
341.align 8
342.Lad_u_loop:
343 ad_block u 0
344 ad_block u 1
345 ad_block u 2
346 ad_block u 3
347 ad_block u 4
348 ad_block u 5
349
350 add $0x60, SRC
351 jmp .Lad_u_loop
352
353.Lad_out_0:
354 state_store0
355 FRAME_END
356 ret
357
358.Lad_out_1:
359 state_store1
360 FRAME_END
361 ret
362
363.Lad_out_2:
364 state_store2
365 FRAME_END
366 ret
367
368.Lad_out_3:
369 state_store3
370 FRAME_END
371 ret
372
373.Lad_out_4:
374 state_store4
375 FRAME_END
376 ret
377
378.Lad_out_5:
379 state_store5
380 FRAME_END
381 ret
382
383.Lad_out:
384 FRAME_END
385 ret
386ENDPROC(crypto_aegis256_aesni_ad)
387
388.macro crypt m s0 s1 s2 s3 s4 s5
389 pxor \s1, \m
390 pxor \s4, \m
391 pxor \s5, \m
392 movdqa \s2, T3
393 pand \s3, T3
394 pxor T3, \m
395.endm
396
397.macro crypt0 m
398 crypt \m STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
399.endm
400
401.macro crypt1 m
402 crypt \m STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
403.endm
404
405.macro crypt2 m
406 crypt \m STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
407.endm
408
409.macro crypt3 m
410 crypt \m STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
411.endm
412
413.macro crypt4 m
414 crypt \m STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
415.endm
416
417.macro crypt5 m
418 crypt \m STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
419.endm
420
421.macro encrypt_block a i
422 movdq\a (\i * 0x10)(SRC), MSG
423 movdqa MSG, T0
424 crypt\i T0
425 movdq\a T0, (\i * 0x10)(DST)
426
427 update\i MSG
428
429 sub $0x10, LEN
430 cmp $0x10, LEN
431 jl .Lenc_out_\i
432.endm
433
434.macro decrypt_block a i
435 movdq\a (\i * 0x10)(SRC), MSG
436 crypt\i MSG
437 movdq\a MSG, (\i * 0x10)(DST)
438
439 update\i MSG
440
441 sub $0x10, LEN
442 cmp $0x10, LEN
443 jl .Ldec_out_\i
444.endm
445
446/*
447 * void crypto_aegis256_aesni_enc(void *state, unsigned int length,
448 * const void *src, void *dst);
449 */
450ENTRY(crypto_aegis256_aesni_enc)
451 FRAME_BEGIN
452
453 cmp $0x10, LEN
454 jb .Lenc_out
455
456 state_load
457
458 mov SRC, %r8
459 or DST, %r8
460 and $0xf, %r8
461 jnz .Lenc_u_loop
462
463.align 8
464.Lenc_a_loop:
465 encrypt_block a 0
466 encrypt_block a 1
467 encrypt_block a 2
468 encrypt_block a 3
469 encrypt_block a 4
470 encrypt_block a 5
471
472 add $0x60, SRC
473 add $0x60, DST
474 jmp .Lenc_a_loop
475
476.align 8
477.Lenc_u_loop:
478 encrypt_block u 0
479 encrypt_block u 1
480 encrypt_block u 2
481 encrypt_block u 3
482 encrypt_block u 4
483 encrypt_block u 5
484
485 add $0x60, SRC
486 add $0x60, DST
487 jmp .Lenc_u_loop
488
489.Lenc_out_0:
490 state_store0
491 FRAME_END
492 ret
493
494.Lenc_out_1:
495 state_store1
496 FRAME_END
497 ret
498
499.Lenc_out_2:
500 state_store2
501 FRAME_END
502 ret
503
504.Lenc_out_3:
505 state_store3
506 FRAME_END
507 ret
508
509.Lenc_out_4:
510 state_store4
511 FRAME_END
512 ret
513
514.Lenc_out_5:
515 state_store5
516 FRAME_END
517 ret
518
519.Lenc_out:
520 FRAME_END
521 ret
522ENDPROC(crypto_aegis256_aesni_enc)
523
524/*
525 * void crypto_aegis256_aesni_enc_tail(void *state, unsigned int length,
526 * const void *src, void *dst);
527 */
528ENTRY(crypto_aegis256_aesni_enc_tail)
529 FRAME_BEGIN
530
531 state_load
532
533 /* encrypt message: */
534 call __load_partial
535
536 movdqa MSG, T0
537 crypt0 T0
538
539 call __store_partial
540
541 update0 MSG
542
543 state_store0
544
545 FRAME_END
Borislav Petkov221e00d2018-06-23 12:36:22 +0200546 ret
Ondrej Mosnacek1d373d42018-05-11 14:12:51 +0200547ENDPROC(crypto_aegis256_aesni_enc_tail)
548
549/*
550 * void crypto_aegis256_aesni_dec(void *state, unsigned int length,
551 * const void *src, void *dst);
552 */
553ENTRY(crypto_aegis256_aesni_dec)
554 FRAME_BEGIN
555
556 cmp $0x10, LEN
557 jb .Ldec_out
558
559 state_load
560
561 mov SRC, %r8
562 or DST, %r8
563 and $0xF, %r8
564 jnz .Ldec_u_loop
565
566.align 8
567.Ldec_a_loop:
568 decrypt_block a 0
569 decrypt_block a 1
570 decrypt_block a 2
571 decrypt_block a 3
572 decrypt_block a 4
573 decrypt_block a 5
574
575 add $0x60, SRC
576 add $0x60, DST
577 jmp .Ldec_a_loop
578
579.align 8
580.Ldec_u_loop:
581 decrypt_block u 0
582 decrypt_block u 1
583 decrypt_block u 2
584 decrypt_block u 3
585 decrypt_block u 4
586 decrypt_block u 5
587
588 add $0x60, SRC
589 add $0x60, DST
590 jmp .Ldec_u_loop
591
592.Ldec_out_0:
593 state_store0
594 FRAME_END
595 ret
596
597.Ldec_out_1:
598 state_store1
599 FRAME_END
600 ret
601
602.Ldec_out_2:
603 state_store2
604 FRAME_END
605 ret
606
607.Ldec_out_3:
608 state_store3
609 FRAME_END
610 ret
611
612.Ldec_out_4:
613 state_store4
614 FRAME_END
615 ret
616
617.Ldec_out_5:
618 state_store5
619 FRAME_END
620 ret
621
622.Ldec_out:
623 FRAME_END
624 ret
625ENDPROC(crypto_aegis256_aesni_dec)
626
627/*
628 * void crypto_aegis256_aesni_dec_tail(void *state, unsigned int length,
629 * const void *src, void *dst);
630 */
631ENTRY(crypto_aegis256_aesni_dec_tail)
632 FRAME_BEGIN
633
634 state_load
635
636 /* decrypt message: */
637 call __load_partial
638
639 crypt0 MSG
640
641 movdqa MSG, T0
642 call __store_partial
643
644 /* mask with byte count: */
645 movq LEN, T0
646 punpcklbw T0, T0
647 punpcklbw T0, T0
648 punpcklbw T0, T0
649 punpcklbw T0, T0
650 movdqa .Laegis256_counter, T1
651 pcmpgtb T1, T0
652 pand T0, MSG
653
654 update0 MSG
655
656 state_store0
657
658 FRAME_END
659 ret
660ENDPROC(crypto_aegis256_aesni_dec_tail)
661
662/*
663 * void crypto_aegis256_aesni_final(void *state, void *tag_xor,
664 * u64 assoclen, u64 cryptlen);
665 */
666ENTRY(crypto_aegis256_aesni_final)
667 FRAME_BEGIN
668
669 state_load
670
671 /* prepare length block: */
672 movq %rdx, MSG
673 movq %rcx, T0
674 pslldq $8, T0
675 pxor T0, MSG
676 psllq $3, MSG /* multiply by 8 (to get bit count) */
677
678 pxor STATE3, MSG
679
680 /* update state: */
681 update0 MSG
682 update1 MSG
683 update2 MSG
684 update3 MSG
685 update4 MSG
686 update5 MSG
687 update0 MSG
688
689 /* xor tag: */
690 movdqu (%rsi), MSG
691
692 pxor STATE0, MSG
693 pxor STATE1, MSG
694 pxor STATE2, MSG
695 pxor STATE3, MSG
696 pxor STATE4, MSG
697 pxor STATE5, MSG
698
699 movdqu MSG, (%rsi)
700
701 FRAME_END
702 ret
703ENDPROC(crypto_aegis256_aesni_final)