blob: 2849dbc59e11738a3f7d2a9e063e6700eec7b694 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001// -------------------------------------------------------------------------
2// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
3// All rights reserved.
4//
5// LICENSE TERMS
6//
7// The free distribution and use of this software in both source and binary
8// form is allowed (with or without changes) provided that:
9//
10// 1. distributions of this source code include the above copyright
11// notice, this list of conditions and the following disclaimer//
12//
13// 2. distributions in binary form include the above copyright
14// notice, this list of conditions and the following disclaimer
15// in the documentation and/or other associated materials//
16//
17// 3. the copyright holder's name is not used to endorse products
18// built using this software without specific written permission.
19//
20//
21// ALTERNATIVELY, provided that this notice is retained in full, this product
22// may be distributed under the terms of the GNU General Public License (GPL),
23// in which case the provisions of the GPL apply INSTEAD OF those given above.
24//
25// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
26// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
27
28// DISCLAIMER
29//
30// This software is provided 'as is' with no explicit or implied warranties
31// in respect of its properties including, but not limited to, correctness
32// and fitness for purpose.
33// -------------------------------------------------------------------------
34// Issue Date: 29/07/2002
35
36.file "aes-i586-asm.S"
37.text
38
Jussi Kivilinna3f299742013-01-19 13:38:50 +020039#include <linux/linkage.h>
Herbert Xu6c2bb982006-05-16 22:09:29 +100040#include <asm/asm-offsets.h>
41
Linus Torvalds1da177e2005-04-16 15:20:36 -070042#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
43
Herbert Xu6c2bb982006-05-16 22:09:29 +100044/* offsets to parameters with one register pushed onto stack */
Huang Ying07bf44f2009-01-09 17:25:50 +110045#define ctx 8
Herbert Xu6c2bb982006-05-16 22:09:29 +100046#define out_blk 12
47#define in_blk 16
Linus Torvalds1da177e2005-04-16 15:20:36 -070048
Huang Ying07bf44f2009-01-09 17:25:50 +110049/* offsets in crypto_aes_ctx structure */
50#define klen (480)
51#define ekey (0)
52#define dkey (240)
Linus Torvalds1da177e2005-04-16 15:20:36 -070053
54// register mapping for encrypt and decrypt subroutines
55
56#define r0 eax
57#define r1 ebx
58#define r2 ecx
59#define r3 edx
60#define r4 esi
61#define r5 edi
62
63#define eaxl al
64#define eaxh ah
65#define ebxl bl
66#define ebxh bh
67#define ecxl cl
68#define ecxh ch
69#define edxl dl
70#define edxh dh
71
72#define _h(reg) reg##h
73#define h(reg) _h(reg)
74
75#define _l(reg) reg##l
76#define l(reg) _l(reg)
77
78// This macro takes a 32-bit word representing a column and uses
79// each of its four bytes to index into four tables of 256 32-bit
80// words to obtain values that are then xored into the appropriate
81// output registers r0, r1, r4 or r5.
82
83// Parameters:
84// table table base address
85// %1 out_state[0]
86// %2 out_state[1]
87// %3 out_state[2]
88// %4 out_state[3]
89// idx input register for the round (destroyed)
90// tmp scratch register for the round
91// sched key schedule
92
93#define do_col(table, a1,a2,a3,a4, idx, tmp) \
94 movzx %l(idx),%tmp; \
95 xor table(,%tmp,4),%a1; \
96 movzx %h(idx),%tmp; \
97 shr $16,%idx; \
98 xor table+tlen(,%tmp,4),%a2; \
99 movzx %l(idx),%tmp; \
100 movzx %h(idx),%idx; \
101 xor table+2*tlen(,%tmp,4),%a3; \
102 xor table+3*tlen(,%idx,4),%a4;
103
104// initialise output registers from the key schedule
105// NB1: original value of a3 is in idx on exit
106// NB2: original values of a1,a2,a4 aren't used
107#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
108 mov 0 sched,%a1; \
109 movzx %l(idx),%tmp; \
110 mov 12 sched,%a2; \
111 xor table(,%tmp,4),%a1; \
112 mov 4 sched,%a4; \
113 movzx %h(idx),%tmp; \
114 shr $16,%idx; \
115 xor table+tlen(,%tmp,4),%a2; \
116 movzx %l(idx),%tmp; \
117 movzx %h(idx),%idx; \
118 xor table+3*tlen(,%idx,4),%a4; \
119 mov %a3,%idx; \
120 mov 8 sched,%a3; \
121 xor table+2*tlen(,%tmp,4),%a3;
122
123// initialise output registers from the key schedule
124// NB1: original value of a3 is in idx on exit
125// NB2: original values of a1,a2,a4 aren't used
126#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
127 mov 0 sched,%a1; \
128 movzx %l(idx),%tmp; \
129 mov 4 sched,%a2; \
130 xor table(,%tmp,4),%a1; \
131 mov 12 sched,%a4; \
132 movzx %h(idx),%tmp; \
133 shr $16,%idx; \
134 xor table+tlen(,%tmp,4),%a2; \
135 movzx %l(idx),%tmp; \
136 movzx %h(idx),%idx; \
137 xor table+3*tlen(,%idx,4),%a4; \
138 mov %a3,%idx; \
139 mov 8 sched,%a3; \
140 xor table+2*tlen(,%tmp,4),%a3;
141
142
143// original Gladman had conditional saves to MMX regs.
144#define save(a1, a2) \
145 mov %a2,4*a1(%esp)
146
147#define restore(a1, a2) \
148 mov 4*a2(%esp),%a1
149
150// These macros perform a forward encryption cycle. They are entered with
151// the first previous round column values in r0,r1,r4,r5 and
152// exit with the final values in the same registers, using stack
153// for temporary storage.
154
155// round column values
156// on entry: r0,r1,r4,r5
157// on exit: r2,r1,r4,r5
158#define fwd_rnd1(arg, table) \
159 save (0,r1); \
160 save (1,r5); \
161 \
162 /* compute new column values */ \
163 do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
164 do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
165 restore(r0,0); \
166 do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
167 restore(r0,1); \
168 do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
169
170// round column values
171// on entry: r2,r1,r4,r5
172// on exit: r0,r1,r4,r5
173#define fwd_rnd2(arg, table) \
174 save (0,r1); \
175 save (1,r5); \
176 \
177 /* compute new column values */ \
178 do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
179 do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
180 restore(r2,0); \
181 do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
182 restore(r2,1); \
183 do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
184
185// These macros performs an inverse encryption cycle. They are entered with
186// the first previous round column values in r0,r1,r4,r5 and
187// exit with the final values in the same registers, using stack
188// for temporary storage
189
190// round column values
191// on entry: r0,r1,r4,r5
192// on exit: r2,r1,r4,r5
193#define inv_rnd1(arg, table) \
194 save (0,r1); \
195 save (1,r5); \
196 \
197 /* compute new column values */ \
198 do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
199 do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
200 restore(r0,0); \
201 do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
202 restore(r0,1); \
203 do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
204
205// round column values
206// on entry: r2,r1,r4,r5
207// on exit: r0,r1,r4,r5
208#define inv_rnd2(arg, table) \
209 save (0,r1); \
210 save (1,r5); \
211 \
212 /* compute new column values */ \
213 do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
214 do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
215 restore(r2,0); \
216 do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
217 restore(r2,1); \
218 do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
219
220// AES (Rijndael) Encryption Subroutine
Huang Ying07bf44f2009-01-09 17:25:50 +1100221/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222
Sebastian Siewior5157dea2007-11-10 19:07:16 +0800223.extern crypto_ft_tab
224.extern crypto_fl_tab
Linus Torvalds1da177e2005-04-16 15:20:36 -0700225
Jussi Kivilinna3f299742013-01-19 13:38:50 +0200226ENTRY(aes_enc_blk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 push %ebp
Huang Ying07bf44f2009-01-09 17:25:50 +1100228 mov ctx(%esp),%ebp
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229
230// CAUTION: the order and the values used in these assigns
231// rely on the register mappings
232
2331: push %ebx
234 mov in_blk+4(%esp),%r2
235 push %esi
Sebastian Siewior5157dea2007-11-10 19:07:16 +0800236 mov klen(%ebp),%r3 // key size
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237 push %edi
238#if ekey != 0
239 lea ekey(%ebp),%ebp // key pointer
240#endif
241
242// input four columns and xor in first round key
243
244 mov (%r2),%r0
245 mov 4(%r2),%r1
246 mov 8(%r2),%r4
247 mov 12(%r2),%r5
248 xor (%ebp),%r0
249 xor 4(%ebp),%r1
250 xor 8(%ebp),%r4
251 xor 12(%ebp),%r5
252
Denis Vlasenkoe6a3a922005-11-29 22:23:20 +1100253 sub $8,%esp // space for register saves on stack
254 add $16,%ebp // increment to next round key
Sebastian Siewior5157dea2007-11-10 19:07:16 +0800255 cmp $24,%r3
Denis Vlasenkoe6a3a922005-11-29 22:23:20 +1100256 jb 4f // 10 rounds for 128-bit key
257 lea 32(%ebp),%ebp
258 je 3f // 12 rounds for 192-bit key
259 lea 32(%ebp),%ebp
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260
Sebastian Siewior5157dea2007-11-10 19:07:16 +08002612: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key
262 fwd_rnd2( -48(%ebp), crypto_ft_tab)
2633: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key
264 fwd_rnd2( -16(%ebp), crypto_ft_tab)
2654: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key
266 fwd_rnd2( +16(%ebp), crypto_ft_tab)
267 fwd_rnd1( +32(%ebp), crypto_ft_tab)
268 fwd_rnd2( +48(%ebp), crypto_ft_tab)
269 fwd_rnd1( +64(%ebp), crypto_ft_tab)
270 fwd_rnd2( +80(%ebp), crypto_ft_tab)
271 fwd_rnd1( +96(%ebp), crypto_ft_tab)
272 fwd_rnd2(+112(%ebp), crypto_ft_tab)
273 fwd_rnd1(+128(%ebp), crypto_ft_tab)
274 fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275
276// move final values to the output array. CAUTION: the
277// order of these assigns rely on the register mappings
278
279 add $8,%esp
280 mov out_blk+12(%esp),%ebp
281 mov %r5,12(%ebp)
282 pop %edi
283 mov %r4,8(%ebp)
284 pop %esi
285 mov %r1,4(%ebp)
286 pop %ebx
287 mov %r0,(%ebp)
288 pop %ebp
Linus Torvalds1da177e2005-04-16 15:20:36 -0700289 ret
Jussi Kivilinna3f299742013-01-19 13:38:50 +0200290ENDPROC(aes_enc_blk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291
292// AES (Rijndael) Decryption Subroutine
Huang Ying07bf44f2009-01-09 17:25:50 +1100293/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294
Sebastian Siewior5157dea2007-11-10 19:07:16 +0800295.extern crypto_it_tab
296.extern crypto_il_tab
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297
Jussi Kivilinna3f299742013-01-19 13:38:50 +0200298ENTRY(aes_dec_blk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299 push %ebp
Huang Ying07bf44f2009-01-09 17:25:50 +1100300 mov ctx(%esp),%ebp
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301
302// CAUTION: the order and the values used in these assigns
303// rely on the register mappings
304
3051: push %ebx
306 mov in_blk+4(%esp),%r2
307 push %esi
Sebastian Siewior5157dea2007-11-10 19:07:16 +0800308 mov klen(%ebp),%r3 // key size
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309 push %edi
310#if dkey != 0
311 lea dkey(%ebp),%ebp // key pointer
312#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313
314// input four columns and xor in first round key
315
316 mov (%r2),%r0
317 mov 4(%r2),%r1
318 mov 8(%r2),%r4
319 mov 12(%r2),%r5
320 xor (%ebp),%r0
321 xor 4(%ebp),%r1
322 xor 8(%ebp),%r4
323 xor 12(%ebp),%r5
324
Denis Vlasenkoe6a3a922005-11-29 22:23:20 +1100325 sub $8,%esp // space for register saves on stack
Sebastian Siewior5157dea2007-11-10 19:07:16 +0800326 add $16,%ebp // increment to next round key
327 cmp $24,%r3
Denis Vlasenkoe6a3a922005-11-29 22:23:20 +1100328 jb 4f // 10 rounds for 128-bit key
Sebastian Siewior5157dea2007-11-10 19:07:16 +0800329 lea 32(%ebp),%ebp
Denis Vlasenkoe6a3a922005-11-29 22:23:20 +1100330 je 3f // 12 rounds for 192-bit key
Sebastian Siewior5157dea2007-11-10 19:07:16 +0800331 lea 32(%ebp),%ebp
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332
Sebastian Siewior5157dea2007-11-10 19:07:16 +08003332: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key
334 inv_rnd2( -48(%ebp), crypto_it_tab)
3353: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key
336 inv_rnd2( -16(%ebp), crypto_it_tab)
3374: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key
338 inv_rnd2( +16(%ebp), crypto_it_tab)
339 inv_rnd1( +32(%ebp), crypto_it_tab)
340 inv_rnd2( +48(%ebp), crypto_it_tab)
341 inv_rnd1( +64(%ebp), crypto_it_tab)
342 inv_rnd2( +80(%ebp), crypto_it_tab)
343 inv_rnd1( +96(%ebp), crypto_it_tab)
344 inv_rnd2(+112(%ebp), crypto_it_tab)
345 inv_rnd1(+128(%ebp), crypto_it_tab)
346 inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347
348// move final values to the output array. CAUTION: the
349// order of these assigns rely on the register mappings
350
351 add $8,%esp
352 mov out_blk+12(%esp),%ebp
353 mov %r5,12(%ebp)
354 pop %edi
355 mov %r4,8(%ebp)
356 pop %esi
357 mov %r1,4(%ebp)
358 pop %ebx
359 mov %r0,(%ebp)
360 pop %ebp
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 ret
Jussi Kivilinna3f299742013-01-19 13:38:50 +0200362ENDPROC(aes_dec_blk)