blob: b949ec2f9af444e06377492d39b9db9960f3a626 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001// -------------------------------------------------------------------------
2// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
3// All rights reserved.
4//
5// LICENSE TERMS
6//
7// The free distribution and use of this software in both source and binary
8// form is allowed (with or without changes) provided that:
9//
10// 1. distributions of this source code include the above copyright
11// notice, this list of conditions and the following disclaimer//
12//
13// 2. distributions in binary form include the above copyright
14// notice, this list of conditions and the following disclaimer
15// in the documentation and/or other associated materials//
16//
17// 3. the copyright holder's name is not used to endorse products
18// built using this software without specific written permission.
19//
20//
21// ALTERNATIVELY, provided that this notice is retained in full, this product
22// may be distributed under the terms of the GNU General Public License (GPL),
23// in which case the provisions of the GPL apply INSTEAD OF those given above.
24//
25// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
26// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
27
28// DISCLAIMER
29//
30// This software is provided 'as is' with no explicit or implied warranties
31// in respect of its properties including, but not limited to, correctness
32// and fitness for purpose.
33// -------------------------------------------------------------------------
34// Issue Date: 29/07/2002
35
36.file "aes-i586-asm.S"
37.text
38
Herbert Xu6c2bb982006-05-16 22:09:29 +100039#include <asm/asm-offsets.h>
40
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
42
Herbert Xu6c2bb982006-05-16 22:09:29 +100043/* offsets to parameters with one register pushed onto stack */
Huang Ying07bf44f2009-01-09 17:25:50 +110044#define ctx 8
Herbert Xu6c2bb982006-05-16 22:09:29 +100045#define out_blk 12
46#define in_blk 16
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
Huang Ying07bf44f2009-01-09 17:25:50 +110048/* offsets in crypto_aes_ctx structure */
49#define klen (480)
50#define ekey (0)
51#define dkey (240)
Linus Torvalds1da177e2005-04-16 15:20:36 -070052
53// register mapping for encrypt and decrypt subroutines
54
55#define r0 eax
56#define r1 ebx
57#define r2 ecx
58#define r3 edx
59#define r4 esi
60#define r5 edi
61
62#define eaxl al
63#define eaxh ah
64#define ebxl bl
65#define ebxh bh
66#define ecxl cl
67#define ecxh ch
68#define edxl dl
69#define edxh dh
70
71#define _h(reg) reg##h
72#define h(reg) _h(reg)
73
74#define _l(reg) reg##l
75#define l(reg) _l(reg)
76
77// This macro takes a 32-bit word representing a column and uses
78// each of its four bytes to index into four tables of 256 32-bit
79// words to obtain values that are then xored into the appropriate
80// output registers r0, r1, r4 or r5.
81
82// Parameters:
83// table table base address
84// %1 out_state[0]
85// %2 out_state[1]
86// %3 out_state[2]
87// %4 out_state[3]
88// idx input register for the round (destroyed)
89// tmp scratch register for the round
90// sched key schedule
91
92#define do_col(table, a1,a2,a3,a4, idx, tmp) \
93 movzx %l(idx),%tmp; \
94 xor table(,%tmp,4),%a1; \
95 movzx %h(idx),%tmp; \
96 shr $16,%idx; \
97 xor table+tlen(,%tmp,4),%a2; \
98 movzx %l(idx),%tmp; \
99 movzx %h(idx),%idx; \
100 xor table+2*tlen(,%tmp,4),%a3; \
101 xor table+3*tlen(,%idx,4),%a4;
102
103// initialise output registers from the key schedule
104// NB1: original value of a3 is in idx on exit
105// NB2: original values of a1,a2,a4 aren't used
106#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
107 mov 0 sched,%a1; \
108 movzx %l(idx),%tmp; \
109 mov 12 sched,%a2; \
110 xor table(,%tmp,4),%a1; \
111 mov 4 sched,%a4; \
112 movzx %h(idx),%tmp; \
113 shr $16,%idx; \
114 xor table+tlen(,%tmp,4),%a2; \
115 movzx %l(idx),%tmp; \
116 movzx %h(idx),%idx; \
117 xor table+3*tlen(,%idx,4),%a4; \
118 mov %a3,%idx; \
119 mov 8 sched,%a3; \
120 xor table+2*tlen(,%tmp,4),%a3;
121
122// initialise output registers from the key schedule
123// NB1: original value of a3 is in idx on exit
124// NB2: original values of a1,a2,a4 aren't used
125#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
126 mov 0 sched,%a1; \
127 movzx %l(idx),%tmp; \
128 mov 4 sched,%a2; \
129 xor table(,%tmp,4),%a1; \
130 mov 12 sched,%a4; \
131 movzx %h(idx),%tmp; \
132 shr $16,%idx; \
133 xor table+tlen(,%tmp,4),%a2; \
134 movzx %l(idx),%tmp; \
135 movzx %h(idx),%idx; \
136 xor table+3*tlen(,%idx,4),%a4; \
137 mov %a3,%idx; \
138 mov 8 sched,%a3; \
139 xor table+2*tlen(,%tmp,4),%a3;
140
141
142// original Gladman had conditional saves to MMX regs.
143#define save(a1, a2) \
144 mov %a2,4*a1(%esp)
145
146#define restore(a1, a2) \
147 mov 4*a2(%esp),%a1
148
149// These macros perform a forward encryption cycle. They are entered with
150// the first previous round column values in r0,r1,r4,r5 and
151// exit with the final values in the same registers, using stack
152// for temporary storage.
153
154// round column values
155// on entry: r0,r1,r4,r5
156// on exit: r2,r1,r4,r5
157#define fwd_rnd1(arg, table) \
158 save (0,r1); \
159 save (1,r5); \
160 \
161 /* compute new column values */ \
162 do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
163 do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
164 restore(r0,0); \
165 do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
166 restore(r0,1); \
167 do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
168
169// round column values
170// on entry: r2,r1,r4,r5
171// on exit: r0,r1,r4,r5
172#define fwd_rnd2(arg, table) \
173 save (0,r1); \
174 save (1,r5); \
175 \
176 /* compute new column values */ \
177 do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
178 do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
179 restore(r2,0); \
180 do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
181 restore(r2,1); \
182 do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
183
184// These macros performs an inverse encryption cycle. They are entered with
185// the first previous round column values in r0,r1,r4,r5 and
186// exit with the final values in the same registers, using stack
187// for temporary storage
188
189// round column values
190// on entry: r0,r1,r4,r5
191// on exit: r2,r1,r4,r5
192#define inv_rnd1(arg, table) \
193 save (0,r1); \
194 save (1,r5); \
195 \
196 /* compute new column values */ \
197 do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
198 do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
199 restore(r0,0); \
200 do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
201 restore(r0,1); \
202 do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
203
204// round column values
205// on entry: r2,r1,r4,r5
206// on exit: r0,r1,r4,r5
207#define inv_rnd2(arg, table) \
208 save (0,r1); \
209 save (1,r5); \
210 \
211 /* compute new column values */ \
212 do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
213 do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
214 restore(r2,0); \
215 do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
216 restore(r2,1); \
217 do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
218
219// AES (Rijndael) Encryption Subroutine
Huang Ying07bf44f2009-01-09 17:25:50 +1100220/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221
222.global aes_enc_blk
223
Sebastian Siewior5157dea2007-11-10 19:07:16 +0800224.extern crypto_ft_tab
225.extern crypto_fl_tab
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226
227.align 4
228
229aes_enc_blk:
230 push %ebp
Huang Ying07bf44f2009-01-09 17:25:50 +1100231 mov ctx(%esp),%ebp
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232
233// CAUTION: the order and the values used in these assigns
234// rely on the register mappings
235
2361: push %ebx
237 mov in_blk+4(%esp),%r2
238 push %esi
Sebastian Siewior5157dea2007-11-10 19:07:16 +0800239 mov klen(%ebp),%r3 // key size
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 push %edi
241#if ekey != 0
242 lea ekey(%ebp),%ebp // key pointer
243#endif
244
245// input four columns and xor in first round key
246
247 mov (%r2),%r0
248 mov 4(%r2),%r1
249 mov 8(%r2),%r4
250 mov 12(%r2),%r5
251 xor (%ebp),%r0
252 xor 4(%ebp),%r1
253 xor 8(%ebp),%r4
254 xor 12(%ebp),%r5
255
Denis Vlasenkoe6a3a922005-11-29 22:23:20 +1100256 sub $8,%esp // space for register saves on stack
257 add $16,%ebp // increment to next round key
Sebastian Siewior5157dea2007-11-10 19:07:16 +0800258 cmp $24,%r3
Denis Vlasenkoe6a3a922005-11-29 22:23:20 +1100259 jb 4f // 10 rounds for 128-bit key
260 lea 32(%ebp),%ebp
261 je 3f // 12 rounds for 192-bit key
262 lea 32(%ebp),%ebp
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263
Sebastian Siewior5157dea2007-11-10 19:07:16 +08002642: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key
265 fwd_rnd2( -48(%ebp), crypto_ft_tab)
2663: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key
267 fwd_rnd2( -16(%ebp), crypto_ft_tab)
2684: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key
269 fwd_rnd2( +16(%ebp), crypto_ft_tab)
270 fwd_rnd1( +32(%ebp), crypto_ft_tab)
271 fwd_rnd2( +48(%ebp), crypto_ft_tab)
272 fwd_rnd1( +64(%ebp), crypto_ft_tab)
273 fwd_rnd2( +80(%ebp), crypto_ft_tab)
274 fwd_rnd1( +96(%ebp), crypto_ft_tab)
275 fwd_rnd2(+112(%ebp), crypto_ft_tab)
276 fwd_rnd1(+128(%ebp), crypto_ft_tab)
277 fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278
279// move final values to the output array. CAUTION: the
280// order of these assigns rely on the register mappings
281
282 add $8,%esp
283 mov out_blk+12(%esp),%ebp
284 mov %r5,12(%ebp)
285 pop %edi
286 mov %r4,8(%ebp)
287 pop %esi
288 mov %r1,4(%ebp)
289 pop %ebx
290 mov %r0,(%ebp)
291 pop %ebp
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 ret
293
294// AES (Rijndael) Decryption Subroutine
Huang Ying07bf44f2009-01-09 17:25:50 +1100295/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296
297.global aes_dec_blk
298
Sebastian Siewior5157dea2007-11-10 19:07:16 +0800299.extern crypto_it_tab
300.extern crypto_il_tab
Linus Torvalds1da177e2005-04-16 15:20:36 -0700301
302.align 4
303
304aes_dec_blk:
305 push %ebp
Huang Ying07bf44f2009-01-09 17:25:50 +1100306 mov ctx(%esp),%ebp
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307
308// CAUTION: the order and the values used in these assigns
309// rely on the register mappings
310
3111: push %ebx
312 mov in_blk+4(%esp),%r2
313 push %esi
Sebastian Siewior5157dea2007-11-10 19:07:16 +0800314 mov klen(%ebp),%r3 // key size
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 push %edi
316#if dkey != 0
317 lea dkey(%ebp),%ebp // key pointer
318#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319
320// input four columns and xor in first round key
321
322 mov (%r2),%r0
323 mov 4(%r2),%r1
324 mov 8(%r2),%r4
325 mov 12(%r2),%r5
326 xor (%ebp),%r0
327 xor 4(%ebp),%r1
328 xor 8(%ebp),%r4
329 xor 12(%ebp),%r5
330
Denis Vlasenkoe6a3a922005-11-29 22:23:20 +1100331 sub $8,%esp // space for register saves on stack
Sebastian Siewior5157dea2007-11-10 19:07:16 +0800332 add $16,%ebp // increment to next round key
333 cmp $24,%r3
Denis Vlasenkoe6a3a922005-11-29 22:23:20 +1100334 jb 4f // 10 rounds for 128-bit key
Sebastian Siewior5157dea2007-11-10 19:07:16 +0800335 lea 32(%ebp),%ebp
Denis Vlasenkoe6a3a922005-11-29 22:23:20 +1100336 je 3f // 12 rounds for 192-bit key
Sebastian Siewior5157dea2007-11-10 19:07:16 +0800337 lea 32(%ebp),%ebp
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338
Sebastian Siewior5157dea2007-11-10 19:07:16 +08003392: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key
340 inv_rnd2( -48(%ebp), crypto_it_tab)
3413: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key
342 inv_rnd2( -16(%ebp), crypto_it_tab)
3434: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key
344 inv_rnd2( +16(%ebp), crypto_it_tab)
345 inv_rnd1( +32(%ebp), crypto_it_tab)
346 inv_rnd2( +48(%ebp), crypto_it_tab)
347 inv_rnd1( +64(%ebp), crypto_it_tab)
348 inv_rnd2( +80(%ebp), crypto_it_tab)
349 inv_rnd1( +96(%ebp), crypto_it_tab)
350 inv_rnd2(+112(%ebp), crypto_it_tab)
351 inv_rnd1(+128(%ebp), crypto_it_tab)
352 inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353
354// move final values to the output array. CAUTION: the
355// order of these assigns rely on the register mappings
356
357 add $8,%esp
358 mov out_blk+12(%esp),%ebp
359 mov %r5,12(%ebp)
360 pop %edi
361 mov %r4,8(%ebp)
362 pop %esi
363 mov %r1,4(%ebp)
364 pop %ebx
365 mov %r0,(%ebp)
366 pop %ebp
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 ret