blob: 911b15377f2efeded7a46a13c692770b232a79af [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001// -------------------------------------------------------------------------
2// Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK.
3// All rights reserved.
4//
5// LICENSE TERMS
6//
7// The free distribution and use of this software in both source and binary
8// form is allowed (with or without changes) provided that:
9//
10// 1. distributions of this source code include the above copyright
11// notice, this list of conditions and the following disclaimer//
12//
13// 2. distributions in binary form include the above copyright
14// notice, this list of conditions and the following disclaimer
15// in the documentation and/or other associated materials//
16//
17// 3. the copyright holder's name is not used to endorse products
18// built using this software without specific written permission.
19//
20//
21// ALTERNATIVELY, provided that this notice is retained in full, this product
22// may be distributed under the terms of the GNU General Public License (GPL),
23// in which case the provisions of the GPL apply INSTEAD OF those given above.
24//
25// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
26// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
27
28// DISCLAIMER
29//
30// This software is provided 'as is' with no explicit or implied warranties
31// in respect of its properties including, but not limited to, correctness
32// and fitness for purpose.
33// -------------------------------------------------------------------------
34// Issue Date: 29/07/2002
35
36.file "aes-i586-asm.S"
37.text
38
39// aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
40// aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1])//
41
42#define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
43
44// offsets to parameters with one register pushed onto stack
45
46#define in_blk 8 // input byte array address parameter
47#define out_blk 12 // output byte array address parameter
48#define ctx 16 // AES context structure
49
50// offsets in context structure
51
52#define ekey 0 // encryption key schedule base address
53#define nrnd 256 // number of rounds
54#define dkey 260 // decryption key schedule base address
55
56// register mapping for encrypt and decrypt subroutines
57
58#define r0 eax
59#define r1 ebx
60#define r2 ecx
61#define r3 edx
62#define r4 esi
63#define r5 edi
64
65#define eaxl al
66#define eaxh ah
67#define ebxl bl
68#define ebxh bh
69#define ecxl cl
70#define ecxh ch
71#define edxl dl
72#define edxh dh
73
74#define _h(reg) reg##h
75#define h(reg) _h(reg)
76
77#define _l(reg) reg##l
78#define l(reg) _l(reg)
79
80// This macro takes a 32-bit word representing a column and uses
81// each of its four bytes to index into four tables of 256 32-bit
82// words to obtain values that are then xored into the appropriate
83// output registers r0, r1, r4 or r5.
84
85// Parameters:
86// table table base address
87// %1 out_state[0]
88// %2 out_state[1]
89// %3 out_state[2]
90// %4 out_state[3]
91// idx input register for the round (destroyed)
92// tmp scratch register for the round
93// sched key schedule
94
95#define do_col(table, a1,a2,a3,a4, idx, tmp) \
96 movzx %l(idx),%tmp; \
97 xor table(,%tmp,4),%a1; \
98 movzx %h(idx),%tmp; \
99 shr $16,%idx; \
100 xor table+tlen(,%tmp,4),%a2; \
101 movzx %l(idx),%tmp; \
102 movzx %h(idx),%idx; \
103 xor table+2*tlen(,%tmp,4),%a3; \
104 xor table+3*tlen(,%idx,4),%a4;
105
106// initialise output registers from the key schedule
107// NB1: original value of a3 is in idx on exit
108// NB2: original values of a1,a2,a4 aren't used
109#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
110 mov 0 sched,%a1; \
111 movzx %l(idx),%tmp; \
112 mov 12 sched,%a2; \
113 xor table(,%tmp,4),%a1; \
114 mov 4 sched,%a4; \
115 movzx %h(idx),%tmp; \
116 shr $16,%idx; \
117 xor table+tlen(,%tmp,4),%a2; \
118 movzx %l(idx),%tmp; \
119 movzx %h(idx),%idx; \
120 xor table+3*tlen(,%idx,4),%a4; \
121 mov %a3,%idx; \
122 mov 8 sched,%a3; \
123 xor table+2*tlen(,%tmp,4),%a3;
124
125// initialise output registers from the key schedule
126// NB1: original value of a3 is in idx on exit
127// NB2: original values of a1,a2,a4 aren't used
128#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
129 mov 0 sched,%a1; \
130 movzx %l(idx),%tmp; \
131 mov 4 sched,%a2; \
132 xor table(,%tmp,4),%a1; \
133 mov 12 sched,%a4; \
134 movzx %h(idx),%tmp; \
135 shr $16,%idx; \
136 xor table+tlen(,%tmp,4),%a2; \
137 movzx %l(idx),%tmp; \
138 movzx %h(idx),%idx; \
139 xor table+3*tlen(,%idx,4),%a4; \
140 mov %a3,%idx; \
141 mov 8 sched,%a3; \
142 xor table+2*tlen(,%tmp,4),%a3;
143
144
145// original Gladman had conditional saves to MMX regs.
146#define save(a1, a2) \
147 mov %a2,4*a1(%esp)
148
149#define restore(a1, a2) \
150 mov 4*a2(%esp),%a1
151
152// These macros perform a forward encryption cycle. They are entered with
153// the first previous round column values in r0,r1,r4,r5 and
154// exit with the final values in the same registers, using stack
155// for temporary storage.
156
157// round column values
158// on entry: r0,r1,r4,r5
159// on exit: r2,r1,r4,r5
160#define fwd_rnd1(arg, table) \
161 save (0,r1); \
162 save (1,r5); \
163 \
164 /* compute new column values */ \
165 do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \
166 do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \
167 restore(r0,0); \
168 do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \
169 restore(r0,1); \
170 do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */
171
172// round column values
173// on entry: r2,r1,r4,r5
174// on exit: r0,r1,r4,r5
175#define fwd_rnd2(arg, table) \
176 save (0,r1); \
177 save (1,r5); \
178 \
179 /* compute new column values */ \
180 do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \
181 do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \
182 restore(r2,0); \
183 do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \
184 restore(r2,1); \
185 do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */
186
187// These macros performs an inverse encryption cycle. They are entered with
188// the first previous round column values in r0,r1,r4,r5 and
189// exit with the final values in the same registers, using stack
190// for temporary storage
191
192// round column values
193// on entry: r0,r1,r4,r5
194// on exit: r2,r1,r4,r5
195#define inv_rnd1(arg, table) \
196 save (0,r1); \
197 save (1,r5); \
198 \
199 /* compute new column values */ \
200 do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \
201 do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \
202 restore(r0,0); \
203 do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \
204 restore(r0,1); \
205 do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */
206
207// round column values
208// on entry: r2,r1,r4,r5
209// on exit: r0,r1,r4,r5
210#define inv_rnd2(arg, table) \
211 save (0,r1); \
212 save (1,r5); \
213 \
214 /* compute new column values */ \
215 do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \
216 do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \
217 restore(r2,0); \
218 do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \
219 restore(r2,1); \
220 do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */
221
222// AES (Rijndael) Encryption Subroutine
223
224.global aes_enc_blk
225
226.extern ft_tab
227.extern fl_tab
228
229.align 4
230
231aes_enc_blk:
232 push %ebp
233 mov ctx(%esp),%ebp // pointer to context
234
235// CAUTION: the order and the values used in these assigns
236// rely on the register mappings
237
2381: push %ebx
239 mov in_blk+4(%esp),%r2
240 push %esi
241 mov nrnd(%ebp),%r3 // number of rounds
242 push %edi
243#if ekey != 0
244 lea ekey(%ebp),%ebp // key pointer
245#endif
246
247// input four columns and xor in first round key
248
249 mov (%r2),%r0
250 mov 4(%r2),%r1
251 mov 8(%r2),%r4
252 mov 12(%r2),%r5
253 xor (%ebp),%r0
254 xor 4(%ebp),%r1
255 xor 8(%ebp),%r4
256 xor 12(%ebp),%r5
257
Denis Vlasenkoe6a3a922005-11-29 22:23:20 +1100258 sub $8,%esp // space for register saves on stack
259 add $16,%ebp // increment to next round key
260 cmp $12,%r3
261 jb 4f // 10 rounds for 128-bit key
262 lea 32(%ebp),%ebp
263 je 3f // 12 rounds for 192-bit key
264 lea 32(%ebp),%ebp
Linus Torvalds1da177e2005-04-16 15:20:36 -0700265
Denis Vlasenkoe6a3a922005-11-29 22:23:20 +11002662: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 256-bit key
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 fwd_rnd2( -48(%ebp) ,ft_tab)
Denis Vlasenkoe6a3a922005-11-29 22:23:20 +11002683: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 192-bit key
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 fwd_rnd2( -16(%ebp) ,ft_tab)
2704: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key
271 fwd_rnd2( +16(%ebp) ,ft_tab)
272 fwd_rnd1( +32(%ebp) ,ft_tab)
273 fwd_rnd2( +48(%ebp) ,ft_tab)
274 fwd_rnd1( +64(%ebp) ,ft_tab)
275 fwd_rnd2( +80(%ebp) ,ft_tab)
276 fwd_rnd1( +96(%ebp) ,ft_tab)
277 fwd_rnd2(+112(%ebp) ,ft_tab)
278 fwd_rnd1(+128(%ebp) ,ft_tab)
279 fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table
280
281// move final values to the output array. CAUTION: the
282// order of these assigns rely on the register mappings
283
284 add $8,%esp
285 mov out_blk+12(%esp),%ebp
286 mov %r5,12(%ebp)
287 pop %edi
288 mov %r4,8(%ebp)
289 pop %esi
290 mov %r1,4(%ebp)
291 pop %ebx
292 mov %r0,(%ebp)
293 pop %ebp
294 mov $1,%eax
295 ret
296
297// AES (Rijndael) Decryption Subroutine
298
299.global aes_dec_blk
300
301.extern it_tab
302.extern il_tab
303
304.align 4
305
306aes_dec_blk:
307 push %ebp
308 mov ctx(%esp),%ebp // pointer to context
309
310// CAUTION: the order and the values used in these assigns
311// rely on the register mappings
312
3131: push %ebx
314 mov in_blk+4(%esp),%r2
315 push %esi
316 mov nrnd(%ebp),%r3 // number of rounds
317 push %edi
318#if dkey != 0
319 lea dkey(%ebp),%ebp // key pointer
320#endif
321 mov %r3,%r0
322 shl $4,%r0
323 add %r0,%ebp
324
325// input four columns and xor in first round key
326
327 mov (%r2),%r0
328 mov 4(%r2),%r1
329 mov 8(%r2),%r4
330 mov 12(%r2),%r5
331 xor (%ebp),%r0
332 xor 4(%ebp),%r1
333 xor 8(%ebp),%r4
334 xor 12(%ebp),%r5
335
Denis Vlasenkoe6a3a922005-11-29 22:23:20 +1100336 sub $8,%esp // space for register saves on stack
337 sub $16,%ebp // increment to next round key
338 cmp $12,%r3
339 jb 4f // 10 rounds for 128-bit key
340 lea -32(%ebp),%ebp
341 je 3f // 12 rounds for 192-bit key
342 lea -32(%ebp),%ebp
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343
Denis Vlasenkoe6a3a922005-11-29 22:23:20 +11003442: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 256-bit key
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 inv_rnd2( +48(%ebp), it_tab)
Denis Vlasenkoe6a3a922005-11-29 22:23:20 +11003463: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 192-bit key
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347 inv_rnd2( +16(%ebp), it_tab)
3484: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key
349 inv_rnd2( -16(%ebp), it_tab)
350 inv_rnd1( -32(%ebp), it_tab)
351 inv_rnd2( -48(%ebp), it_tab)
352 inv_rnd1( -64(%ebp), it_tab)
353 inv_rnd2( -80(%ebp), it_tab)
354 inv_rnd1( -96(%ebp), it_tab)
355 inv_rnd2(-112(%ebp), it_tab)
356 inv_rnd1(-128(%ebp), it_tab)
357 inv_rnd2(-144(%ebp), il_tab) // last round uses a different table
358
359// move final values to the output array. CAUTION: the
360// order of these assigns rely on the register mappings
361
362 add $8,%esp
363 mov out_blk+12(%esp),%ebp
364 mov %r5,12(%ebp)
365 pop %edi
366 mov %r4,8(%ebp)
367 pop %esi
368 mov %r1,4(%ebp)
369 pop %ebx
370 mov %r0,(%ebp)
371 pop %ebp
372 mov $1,%eax
373 ret
374