blob: 483cbb23ab8d0da13c7f6c8262d95fc6206ef7b3 [file] [log] [blame]
Andreas Steinmetza2a892a2005-07-06 13:55:00 -07001/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
2 *
3 * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
4 *
5 * License:
6 * This code can be distributed under the terms of the GNU General Public
7 * License (GPL) Version 2 provided that the above header down to and
8 * including this sentence is retained in full.
9 */
10
11.extern aes_ft_tab
12.extern aes_it_tab
13.extern aes_fl_tab
14.extern aes_il_tab
15
16.text
17
18#define R1 %rax
19#define R1E %eax
20#define R1X %ax
21#define R1H %ah
22#define R1L %al
23#define R2 %rbx
24#define R2E %ebx
25#define R2X %bx
26#define R2H %bh
27#define R2L %bl
28#define R3 %rcx
29#define R3E %ecx
30#define R3X %cx
31#define R3H %ch
32#define R3L %cl
33#define R4 %rdx
34#define R4E %edx
35#define R4X %dx
36#define R4H %dh
37#define R4L %dl
38#define R5 %rsi
39#define R5E %esi
40#define R6 %rdi
41#define R6E %edi
42#define R7 %rbp
43#define R7E %ebp
44#define R8 %r8
45#define R9 %r9
46#define R10 %r10
47#define R11 %r11
48
49#define prologue(FUNC,BASE,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
50 .global FUNC; \
51 .type FUNC,@function; \
52 .align 8; \
53FUNC: movq r1,r2; \
54 movq r3,r4; \
55 leaq BASE+52(r8),r9; \
56 movq r10,r11; \
57 movl (r7),r5 ## E; \
58 movl 4(r7),r1 ## E; \
59 movl 8(r7),r6 ## E; \
60 movl 12(r7),r7 ## E; \
61 movl (r8),r10 ## E; \
62 xorl -48(r9),r5 ## E; \
63 xorl -44(r9),r1 ## E; \
64 xorl -40(r9),r6 ## E; \
65 xorl -36(r9),r7 ## E; \
66 cmpl $24,r10 ## E; \
67 jb B128; \
68 leaq 32(r9),r9; \
69 je B192; \
70 leaq 32(r9),r9;
71
72#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
73 movq r1,r2; \
74 movq r3,r4; \
75 movl r5 ## E,(r9); \
76 movl r6 ## E,4(r9); \
77 movl r7 ## E,8(r9); \
78 movl r8 ## E,12(r9); \
79 ret;
80
81#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
82 movzbl r2 ## H,r5 ## E; \
83 movzbl r2 ## L,r6 ## E; \
84 movl TAB+1024(,r5,4),r5 ## E;\
85 movw r4 ## X,r2 ## X; \
86 movl TAB(,r6,4),r6 ## E; \
87 roll $16,r2 ## E; \
88 shrl $16,r4 ## E; \
89 movzbl r4 ## H,r7 ## E; \
90 movzbl r4 ## L,r4 ## E; \
91 xorl OFFSET(r8),ra ## E; \
92 xorl OFFSET+4(r8),rb ## E; \
93 xorl TAB+3072(,r7,4),r5 ## E;\
94 xorl TAB+2048(,r4,4),r6 ## E;\
95 movzbl r1 ## L,r7 ## E; \
96 movzbl r1 ## H,r4 ## E; \
97 movl TAB+1024(,r4,4),r4 ## E;\
98 movw r3 ## X,r1 ## X; \
99 roll $16,r1 ## E; \
100 shrl $16,r3 ## E; \
101 xorl TAB(,r7,4),r5 ## E; \
102 movzbl r3 ## H,r7 ## E; \
103 movzbl r3 ## L,r3 ## E; \
104 xorl TAB+3072(,r7,4),r4 ## E;\
105 xorl TAB+2048(,r3,4),r5 ## E;\
106 movzbl r1 ## H,r7 ## E; \
107 movzbl r1 ## L,r3 ## E; \
108 shrl $16,r1 ## E; \
109 xorl TAB+3072(,r7,4),r6 ## E;\
110 movl TAB+2048(,r3,4),r3 ## E;\
111 movzbl r1 ## H,r7 ## E; \
112 movzbl r1 ## L,r1 ## E; \
113 xorl TAB+1024(,r7,4),r6 ## E;\
114 xorl TAB(,r1,4),r3 ## E; \
115 movzbl r2 ## H,r1 ## E; \
116 movzbl r2 ## L,r7 ## E; \
117 shrl $16,r2 ## E; \
118 xorl TAB+3072(,r1,4),r3 ## E;\
119 xorl TAB+2048(,r7,4),r4 ## E;\
120 movzbl r2 ## H,r1 ## E; \
121 movzbl r2 ## L,r2 ## E; \
122 xorl OFFSET+8(r8),rc ## E; \
123 xorl OFFSET+12(r8),rd ## E; \
124 xorl TAB+1024(,r1,4),r3 ## E;\
125 xorl TAB(,r2,4),r4 ## E;
126
127#define move_regs(r1,r2,r3,r4) \
128 movl r3 ## E,r1 ## E; \
129 movl r4 ## E,r2 ## E;
130
131#define entry(FUNC,BASE,B128,B192) \
132 prologue(FUNC,BASE,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)
133
134#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)
135
136#define encrypt_round(TAB,OFFSET) \
137 round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
138 move_regs(R1,R2,R5,R6)
139
140#define encrypt_final(TAB,OFFSET) \
141 round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
142
143#define decrypt_round(TAB,OFFSET) \
144 round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
145 move_regs(R1,R2,R5,R6)
146
147#define decrypt_final(TAB,OFFSET) \
148 round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
149
150/* void aes_encrypt(void *ctx, u8 *out, const u8 *in) */
151
152 entry(aes_encrypt,0,enc128,enc192)
153 encrypt_round(aes_ft_tab,-96)
154 encrypt_round(aes_ft_tab,-80)
155enc192: encrypt_round(aes_ft_tab,-64)
156 encrypt_round(aes_ft_tab,-48)
157enc128: encrypt_round(aes_ft_tab,-32)
158 encrypt_round(aes_ft_tab,-16)
159 encrypt_round(aes_ft_tab, 0)
160 encrypt_round(aes_ft_tab, 16)
161 encrypt_round(aes_ft_tab, 32)
162 encrypt_round(aes_ft_tab, 48)
163 encrypt_round(aes_ft_tab, 64)
164 encrypt_round(aes_ft_tab, 80)
165 encrypt_round(aes_ft_tab, 96)
166 encrypt_final(aes_fl_tab,112)
167 return
168
169/* void aes_decrypt(void *ctx, u8 *out, const u8 *in) */
170
171 entry(aes_decrypt,240,dec128,dec192)
172 decrypt_round(aes_it_tab,-96)
173 decrypt_round(aes_it_tab,-80)
174dec192: decrypt_round(aes_it_tab,-64)
175 decrypt_round(aes_it_tab,-48)
176dec128: decrypt_round(aes_it_tab,-32)
177 decrypt_round(aes_it_tab,-16)
178 decrypt_round(aes_it_tab, 0)
179 decrypt_round(aes_it_tab, 16)
180 decrypt_round(aes_it_tab, 32)
181 decrypt_round(aes_it_tab, 48)
182 decrypt_round(aes_it_tab, 64)
183 decrypt_round(aes_it_tab, 80)
184 decrypt_round(aes_it_tab, 96)
185 decrypt_final(aes_il_tab,112)
186 return