Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | // ------------------------------------------------------------------------- |
| 2 | // Copyright (c) 2001, Dr Brian Gladman < >, Worcester, UK. |
| 3 | // All rights reserved. |
| 4 | // |
| 5 | // LICENSE TERMS |
| 6 | // |
| 7 | // The free distribution and use of this software in both source and binary |
| 8 | // form is allowed (with or without changes) provided that: |
| 9 | // |
| 10 | // 1. distributions of this source code include the above copyright |
| 11 | // notice, this list of conditions and the following disclaimer// |
| 12 | // |
| 13 | // 2. distributions in binary form include the above copyright |
| 14 | // notice, this list of conditions and the following disclaimer |
| 15 | // in the documentation and/or other associated materials// |
| 16 | // |
| 17 | // 3. the copyright holder's name is not used to endorse products |
| 18 | // built using this software without specific written permission. |
| 19 | // |
| 20 | // |
| 21 | // ALTERNATIVELY, provided that this notice is retained in full, this product |
| 22 | // may be distributed under the terms of the GNU General Public License (GPL), |
| 23 | // in which case the provisions of the GPL apply INSTEAD OF those given above. |
| 24 | // |
| 25 | // Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org> |
| 26 | // Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> |
| 27 | |
| 28 | // DISCLAIMER |
| 29 | // |
| 30 | // This software is provided 'as is' with no explicit or implied warranties |
| 31 | // in respect of its properties including, but not limited to, correctness |
| 32 | // and fitness for purpose. |
| 33 | // ------------------------------------------------------------------------- |
| 34 | // Issue Date: 29/07/2002 |
| 35 | |
| 36 | .file "aes-i586-asm.S" |
| 37 | .text |
| 38 | |
Jussi Kivilinna | 3f29974 | 2013-01-19 13:38:50 +0200 | [diff] [blame] | 39 | #include <linux/linkage.h> |
Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 40 | #include <asm/asm-offsets.h> |
| 41 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 42 | #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) |
| 43 | |
Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 44 | /* offsets to parameters with one register pushed onto stack */ |
Huang Ying | 07bf44f | 2009-01-09 17:25:50 +1100 | [diff] [blame] | 45 | #define ctx 8 |
Herbert Xu | 6c2bb98 | 2006-05-16 22:09:29 +1000 | [diff] [blame] | 46 | #define out_blk 12 |
| 47 | #define in_blk 16 |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 48 | |
Huang Ying | 07bf44f | 2009-01-09 17:25:50 +1100 | [diff] [blame] | 49 | /* offsets in crypto_aes_ctx structure */ |
| 50 | #define klen (480) |
| 51 | #define ekey (0) |
| 52 | #define dkey (240) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 53 | |
| 54 | // register mapping for encrypt and decrypt subroutines |
| 55 | |
| 56 | #define r0 eax |
| 57 | #define r1 ebx |
| 58 | #define r2 ecx |
| 59 | #define r3 edx |
| 60 | #define r4 esi |
| 61 | #define r5 edi |
| 62 | |
| 63 | #define eaxl al |
| 64 | #define eaxh ah |
| 65 | #define ebxl bl |
| 66 | #define ebxh bh |
| 67 | #define ecxl cl |
| 68 | #define ecxh ch |
| 69 | #define edxl dl |
| 70 | #define edxh dh |
| 71 | |
| 72 | #define _h(reg) reg##h |
| 73 | #define h(reg) _h(reg) |
| 74 | |
| 75 | #define _l(reg) reg##l |
| 76 | #define l(reg) _l(reg) |
| 77 | |
| 78 | // This macro takes a 32-bit word representing a column and uses |
| 79 | // each of its four bytes to index into four tables of 256 32-bit |
| 80 | // words to obtain values that are then xored into the appropriate |
| 81 | // output registers r0, r1, r4 or r5. |
| 82 | |
| 83 | // Parameters: |
| 84 | // table table base address |
| 85 | // %1 out_state[0] |
| 86 | // %2 out_state[1] |
| 87 | // %3 out_state[2] |
| 88 | // %4 out_state[3] |
| 89 | // idx input register for the round (destroyed) |
| 90 | // tmp scratch register for the round |
| 91 | // sched key schedule |
| 92 | |
| 93 | #define do_col(table, a1,a2,a3,a4, idx, tmp) \ |
| 94 | movzx %l(idx),%tmp; \ |
| 95 | xor table(,%tmp,4),%a1; \ |
| 96 | movzx %h(idx),%tmp; \ |
| 97 | shr $16,%idx; \ |
| 98 | xor table+tlen(,%tmp,4),%a2; \ |
| 99 | movzx %l(idx),%tmp; \ |
| 100 | movzx %h(idx),%idx; \ |
| 101 | xor table+2*tlen(,%tmp,4),%a3; \ |
| 102 | xor table+3*tlen(,%idx,4),%a4; |
| 103 | |
| 104 | // initialise output registers from the key schedule |
| 105 | // NB1: original value of a3 is in idx on exit |
| 106 | // NB2: original values of a1,a2,a4 aren't used |
| 107 | #define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \ |
| 108 | mov 0 sched,%a1; \ |
| 109 | movzx %l(idx),%tmp; \ |
| 110 | mov 12 sched,%a2; \ |
| 111 | xor table(,%tmp,4),%a1; \ |
| 112 | mov 4 sched,%a4; \ |
| 113 | movzx %h(idx),%tmp; \ |
| 114 | shr $16,%idx; \ |
| 115 | xor table+tlen(,%tmp,4),%a2; \ |
| 116 | movzx %l(idx),%tmp; \ |
| 117 | movzx %h(idx),%idx; \ |
| 118 | xor table+3*tlen(,%idx,4),%a4; \ |
| 119 | mov %a3,%idx; \ |
| 120 | mov 8 sched,%a3; \ |
| 121 | xor table+2*tlen(,%tmp,4),%a3; |
| 122 | |
| 123 | // initialise output registers from the key schedule |
| 124 | // NB1: original value of a3 is in idx on exit |
| 125 | // NB2: original values of a1,a2,a4 aren't used |
| 126 | #define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \ |
| 127 | mov 0 sched,%a1; \ |
| 128 | movzx %l(idx),%tmp; \ |
| 129 | mov 4 sched,%a2; \ |
| 130 | xor table(,%tmp,4),%a1; \ |
| 131 | mov 12 sched,%a4; \ |
| 132 | movzx %h(idx),%tmp; \ |
| 133 | shr $16,%idx; \ |
| 134 | xor table+tlen(,%tmp,4),%a2; \ |
| 135 | movzx %l(idx),%tmp; \ |
| 136 | movzx %h(idx),%idx; \ |
| 137 | xor table+3*tlen(,%idx,4),%a4; \ |
| 138 | mov %a3,%idx; \ |
| 139 | mov 8 sched,%a3; \ |
| 140 | xor table+2*tlen(,%tmp,4),%a3; |
| 141 | |
| 142 | |
| 143 | // original Gladman had conditional saves to MMX regs. |
| 144 | #define save(a1, a2) \ |
| 145 | mov %a2,4*a1(%esp) |
| 146 | |
| 147 | #define restore(a1, a2) \ |
| 148 | mov 4*a2(%esp),%a1 |
| 149 | |
| 150 | // These macros perform a forward encryption cycle. They are entered with |
| 151 | // the first previous round column values in r0,r1,r4,r5 and |
| 152 | // exit with the final values in the same registers, using stack |
| 153 | // for temporary storage. |
| 154 | |
| 155 | // round column values |
| 156 | // on entry: r0,r1,r4,r5 |
| 157 | // on exit: r2,r1,r4,r5 |
| 158 | #define fwd_rnd1(arg, table) \ |
| 159 | save (0,r1); \ |
| 160 | save (1,r5); \ |
| 161 | \ |
| 162 | /* compute new column values */ \ |
| 163 | do_fcol(table, r2,r5,r4,r1, r0,r3, arg); /* idx=r0 */ \ |
| 164 | do_col (table, r4,r1,r2,r5, r0,r3); /* idx=r4 */ \ |
| 165 | restore(r0,0); \ |
| 166 | do_col (table, r1,r2,r5,r4, r0,r3); /* idx=r1 */ \ |
| 167 | restore(r0,1); \ |
| 168 | do_col (table, r5,r4,r1,r2, r0,r3); /* idx=r5 */ |
| 169 | |
| 170 | // round column values |
| 171 | // on entry: r2,r1,r4,r5 |
| 172 | // on exit: r0,r1,r4,r5 |
| 173 | #define fwd_rnd2(arg, table) \ |
| 174 | save (0,r1); \ |
| 175 | save (1,r5); \ |
| 176 | \ |
| 177 | /* compute new column values */ \ |
| 178 | do_fcol(table, r0,r5,r4,r1, r2,r3, arg); /* idx=r2 */ \ |
| 179 | do_col (table, r4,r1,r0,r5, r2,r3); /* idx=r4 */ \ |
| 180 | restore(r2,0); \ |
| 181 | do_col (table, r1,r0,r5,r4, r2,r3); /* idx=r1 */ \ |
| 182 | restore(r2,1); \ |
| 183 | do_col (table, r5,r4,r1,r0, r2,r3); /* idx=r5 */ |
| 184 | |
| 185 | // These macros performs an inverse encryption cycle. They are entered with |
| 186 | // the first previous round column values in r0,r1,r4,r5 and |
| 187 | // exit with the final values in the same registers, using stack |
| 188 | // for temporary storage |
| 189 | |
| 190 | // round column values |
| 191 | // on entry: r0,r1,r4,r5 |
| 192 | // on exit: r2,r1,r4,r5 |
| 193 | #define inv_rnd1(arg, table) \ |
| 194 | save (0,r1); \ |
| 195 | save (1,r5); \ |
| 196 | \ |
| 197 | /* compute new column values */ \ |
| 198 | do_icol(table, r2,r1,r4,r5, r0,r3, arg); /* idx=r0 */ \ |
| 199 | do_col (table, r4,r5,r2,r1, r0,r3); /* idx=r4 */ \ |
| 200 | restore(r0,0); \ |
| 201 | do_col (table, r1,r4,r5,r2, r0,r3); /* idx=r1 */ \ |
| 202 | restore(r0,1); \ |
| 203 | do_col (table, r5,r2,r1,r4, r0,r3); /* idx=r5 */ |
| 204 | |
| 205 | // round column values |
| 206 | // on entry: r2,r1,r4,r5 |
| 207 | // on exit: r0,r1,r4,r5 |
| 208 | #define inv_rnd2(arg, table) \ |
| 209 | save (0,r1); \ |
| 210 | save (1,r5); \ |
| 211 | \ |
| 212 | /* compute new column values */ \ |
| 213 | do_icol(table, r0,r1,r4,r5, r2,r3, arg); /* idx=r2 */ \ |
| 214 | do_col (table, r4,r5,r0,r1, r2,r3); /* idx=r4 */ \ |
| 215 | restore(r2,0); \ |
| 216 | do_col (table, r1,r4,r5,r0, r2,r3); /* idx=r1 */ \ |
| 217 | restore(r2,1); \ |
| 218 | do_col (table, r5,r0,r1,r4, r2,r3); /* idx=r5 */ |
| 219 | |
| 220 | // AES (Rijndael) Encryption Subroutine |
Huang Ying | 07bf44f | 2009-01-09 17:25:50 +1100 | [diff] [blame] | 221 | /* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 222 | |
Sebastian Siewior | 5157dea | 2007-11-10 19:07:16 +0800 | [diff] [blame] | 223 | .extern crypto_ft_tab |
| 224 | .extern crypto_fl_tab |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 225 | |
Jussi Kivilinna | 3f29974 | 2013-01-19 13:38:50 +0200 | [diff] [blame] | 226 | ENTRY(aes_enc_blk) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 227 | push %ebp |
Huang Ying | 07bf44f | 2009-01-09 17:25:50 +1100 | [diff] [blame] | 228 | mov ctx(%esp),%ebp |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 229 | |
| 230 | // CAUTION: the order and the values used in these assigns |
| 231 | // rely on the register mappings |
| 232 | |
| 233 | 1: push %ebx |
| 234 | mov in_blk+4(%esp),%r2 |
| 235 | push %esi |
Sebastian Siewior | 5157dea | 2007-11-10 19:07:16 +0800 | [diff] [blame] | 236 | mov klen(%ebp),%r3 // key size |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 237 | push %edi |
| 238 | #if ekey != 0 |
| 239 | lea ekey(%ebp),%ebp // key pointer |
| 240 | #endif |
| 241 | |
| 242 | // input four columns and xor in first round key |
| 243 | |
| 244 | mov (%r2),%r0 |
| 245 | mov 4(%r2),%r1 |
| 246 | mov 8(%r2),%r4 |
| 247 | mov 12(%r2),%r5 |
| 248 | xor (%ebp),%r0 |
| 249 | xor 4(%ebp),%r1 |
| 250 | xor 8(%ebp),%r4 |
| 251 | xor 12(%ebp),%r5 |
| 252 | |
Denis Vlasenko | e6a3a92 | 2005-11-29 22:23:20 +1100 | [diff] [blame] | 253 | sub $8,%esp // space for register saves on stack |
| 254 | add $16,%ebp // increment to next round key |
Sebastian Siewior | 5157dea | 2007-11-10 19:07:16 +0800 | [diff] [blame] | 255 | cmp $24,%r3 |
Denis Vlasenko | e6a3a92 | 2005-11-29 22:23:20 +1100 | [diff] [blame] | 256 | jb 4f // 10 rounds for 128-bit key |
| 257 | lea 32(%ebp),%ebp |
| 258 | je 3f // 12 rounds for 192-bit key |
| 259 | lea 32(%ebp),%ebp |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 260 | |
Sebastian Siewior | 5157dea | 2007-11-10 19:07:16 +0800 | [diff] [blame] | 261 | 2: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key |
| 262 | fwd_rnd2( -48(%ebp), crypto_ft_tab) |
| 263 | 3: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key |
| 264 | fwd_rnd2( -16(%ebp), crypto_ft_tab) |
| 265 | 4: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key |
| 266 | fwd_rnd2( +16(%ebp), crypto_ft_tab) |
| 267 | fwd_rnd1( +32(%ebp), crypto_ft_tab) |
| 268 | fwd_rnd2( +48(%ebp), crypto_ft_tab) |
| 269 | fwd_rnd1( +64(%ebp), crypto_ft_tab) |
| 270 | fwd_rnd2( +80(%ebp), crypto_ft_tab) |
| 271 | fwd_rnd1( +96(%ebp), crypto_ft_tab) |
| 272 | fwd_rnd2(+112(%ebp), crypto_ft_tab) |
| 273 | fwd_rnd1(+128(%ebp), crypto_ft_tab) |
| 274 | fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 275 | |
| 276 | // move final values to the output array. CAUTION: the |
| 277 | // order of these assigns rely on the register mappings |
| 278 | |
| 279 | add $8,%esp |
| 280 | mov out_blk+12(%esp),%ebp |
| 281 | mov %r5,12(%ebp) |
| 282 | pop %edi |
| 283 | mov %r4,8(%ebp) |
| 284 | pop %esi |
| 285 | mov %r1,4(%ebp) |
| 286 | pop %ebx |
| 287 | mov %r0,(%ebp) |
| 288 | pop %ebp |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 289 | ret |
Jussi Kivilinna | 3f29974 | 2013-01-19 13:38:50 +0200 | [diff] [blame] | 290 | ENDPROC(aes_enc_blk) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 291 | |
| 292 | // AES (Rijndael) Decryption Subroutine |
Huang Ying | 07bf44f | 2009-01-09 17:25:50 +1100 | [diff] [blame] | 293 | /* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 294 | |
Sebastian Siewior | 5157dea | 2007-11-10 19:07:16 +0800 | [diff] [blame] | 295 | .extern crypto_it_tab |
| 296 | .extern crypto_il_tab |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 297 | |
Jussi Kivilinna | 3f29974 | 2013-01-19 13:38:50 +0200 | [diff] [blame] | 298 | ENTRY(aes_dec_blk) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 299 | push %ebp |
Huang Ying | 07bf44f | 2009-01-09 17:25:50 +1100 | [diff] [blame] | 300 | mov ctx(%esp),%ebp |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 301 | |
| 302 | // CAUTION: the order and the values used in these assigns |
| 303 | // rely on the register mappings |
| 304 | |
| 305 | 1: push %ebx |
| 306 | mov in_blk+4(%esp),%r2 |
| 307 | push %esi |
Sebastian Siewior | 5157dea | 2007-11-10 19:07:16 +0800 | [diff] [blame] | 308 | mov klen(%ebp),%r3 // key size |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 309 | push %edi |
| 310 | #if dkey != 0 |
| 311 | lea dkey(%ebp),%ebp // key pointer |
| 312 | #endif |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 313 | |
| 314 | // input four columns and xor in first round key |
| 315 | |
| 316 | mov (%r2),%r0 |
| 317 | mov 4(%r2),%r1 |
| 318 | mov 8(%r2),%r4 |
| 319 | mov 12(%r2),%r5 |
| 320 | xor (%ebp),%r0 |
| 321 | xor 4(%ebp),%r1 |
| 322 | xor 8(%ebp),%r4 |
| 323 | xor 12(%ebp),%r5 |
| 324 | |
Denis Vlasenko | e6a3a92 | 2005-11-29 22:23:20 +1100 | [diff] [blame] | 325 | sub $8,%esp // space for register saves on stack |
Sebastian Siewior | 5157dea | 2007-11-10 19:07:16 +0800 | [diff] [blame] | 326 | add $16,%ebp // increment to next round key |
| 327 | cmp $24,%r3 |
Denis Vlasenko | e6a3a92 | 2005-11-29 22:23:20 +1100 | [diff] [blame] | 328 | jb 4f // 10 rounds for 128-bit key |
Sebastian Siewior | 5157dea | 2007-11-10 19:07:16 +0800 | [diff] [blame] | 329 | lea 32(%ebp),%ebp |
Denis Vlasenko | e6a3a92 | 2005-11-29 22:23:20 +1100 | [diff] [blame] | 330 | je 3f // 12 rounds for 192-bit key |
Sebastian Siewior | 5157dea | 2007-11-10 19:07:16 +0800 | [diff] [blame] | 331 | lea 32(%ebp),%ebp |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 332 | |
Sebastian Siewior | 5157dea | 2007-11-10 19:07:16 +0800 | [diff] [blame] | 333 | 2: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key |
| 334 | inv_rnd2( -48(%ebp), crypto_it_tab) |
| 335 | 3: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key |
| 336 | inv_rnd2( -16(%ebp), crypto_it_tab) |
| 337 | 4: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key |
| 338 | inv_rnd2( +16(%ebp), crypto_it_tab) |
| 339 | inv_rnd1( +32(%ebp), crypto_it_tab) |
| 340 | inv_rnd2( +48(%ebp), crypto_it_tab) |
| 341 | inv_rnd1( +64(%ebp), crypto_it_tab) |
| 342 | inv_rnd2( +80(%ebp), crypto_it_tab) |
| 343 | inv_rnd1( +96(%ebp), crypto_it_tab) |
| 344 | inv_rnd2(+112(%ebp), crypto_it_tab) |
| 345 | inv_rnd1(+128(%ebp), crypto_it_tab) |
| 346 | inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 347 | |
| 348 | // move final values to the output array. CAUTION: the |
| 349 | // order of these assigns rely on the register mappings |
| 350 | |
| 351 | add $8,%esp |
| 352 | mov out_blk+12(%esp),%ebp |
| 353 | mov %r5,12(%ebp) |
| 354 | pop %edi |
| 355 | mov %r4,8(%ebp) |
| 356 | pop %esi |
| 357 | mov %r1,4(%ebp) |
| 358 | pop %ebx |
| 359 | mov %r0,(%ebp) |
| 360 | pop %ebp |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 361 | ret |
Jussi Kivilinna | 3f29974 | 2013-01-19 13:38:50 +0200 | [diff] [blame] | 362 | ENDPROC(aes_dec_blk) |