Markus Stockhausen | f98992a | 2015-02-22 09:59:54 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Key handling functions for PPC AES implementation |
| 3 | * |
| 4 | * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or modify it |
| 7 | * under the terms of the GNU General Public License as published by the Free |
| 8 | * Software Foundation; either version 2 of the License, or (at your option) |
| 9 | * any later version. |
| 10 | * |
| 11 | */ |
| 12 | |
| 13 | #include <asm/ppc_asm.h> |
| 14 | |
| 15 | #ifdef __BIG_ENDIAN__ |
| 16 | #define LOAD_KEY(d, s, off) \ |
| 17 | lwz d,off(s); |
| 18 | #else |
| 19 | #define LOAD_KEY(d, s, off) \ |
| 20 | li r0,off; \ |
| 21 | lwbrx d,s,r0; |
| 22 | #endif |
| 23 | |
| 24 | #define INITIALIZE_KEY \ |
| 25 | stwu r1,-32(r1); /* create stack frame */ \ |
| 26 | stw r14,8(r1); /* save registers */ \ |
| 27 | stw r15,12(r1); \ |
| 28 | stw r16,16(r1); |
| 29 | |
| 30 | #define FINALIZE_KEY \ |
| 31 | lwz r14,8(r1); /* restore registers */ \ |
| 32 | lwz r15,12(r1); \ |
| 33 | lwz r16,16(r1); \ |
| 34 | xor r5,r5,r5; /* clear sensitive data */ \ |
| 35 | xor r6,r6,r6; \ |
| 36 | xor r7,r7,r7; \ |
| 37 | xor r8,r8,r8; \ |
| 38 | xor r9,r9,r9; \ |
| 39 | xor r10,r10,r10; \ |
| 40 | xor r11,r11,r11; \ |
| 41 | xor r12,r12,r12; \ |
| 42 | addi r1,r1,32; /* cleanup stack */ |
| 43 | |
| 44 | #define LS_BOX(r, t1, t2) \ |
| 45 | lis t2,PPC_AES_4K_ENCTAB@h; \ |
| 46 | ori t2,t2,PPC_AES_4K_ENCTAB@l; \ |
| 47 | rlwimi t2,r,4,20,27; \ |
| 48 | lbz t1,8(t2); \ |
| 49 | rlwimi r,t1,0,24,31; \ |
| 50 | rlwimi t2,r,28,20,27; \ |
| 51 | lbz t1,8(t2); \ |
| 52 | rlwimi r,t1,8,16,23; \ |
| 53 | rlwimi t2,r,20,20,27; \ |
| 54 | lbz t1,8(t2); \ |
| 55 | rlwimi r,t1,16,8,15; \ |
| 56 | rlwimi t2,r,12,20,27; \ |
| 57 | lbz t1,8(t2); \ |
| 58 | rlwimi r,t1,24,0,7; |
| 59 | |
| 60 | #define GF8_MUL(out, in, t1, t2) \ |
| 61 | lis t1,0x8080; /* multiplication in GF8 */ \ |
| 62 | ori t1,t1,0x8080; \ |
| 63 | and t1,t1,in; \ |
| 64 | srwi t1,t1,7; \ |
| 65 | mulli t1,t1,0x1b; \ |
| 66 | lis t2,0x7f7f; \ |
| 67 | ori t2,t2,0x7f7f; \ |
| 68 | and t2,t2,in; \ |
| 69 | slwi t2,t2,1; \ |
| 70 | xor out,t1,t2; |
| 71 | |
| 72 | /* |
| 73 | * ppc_expand_key_128(u32 *key_enc, const u8 *key) |
| 74 | * |
| 75 | * Expand 128 bit key into 176 bytes encryption key. It consists of |
| 76 | * key itself plus 10 rounds with 16 bytes each |
| 77 | * |
| 78 | */ |
| 79 | _GLOBAL(ppc_expand_key_128) |
| 80 | INITIALIZE_KEY |
| 81 | LOAD_KEY(r5,r4,0) |
| 82 | LOAD_KEY(r6,r4,4) |
| 83 | LOAD_KEY(r7,r4,8) |
| 84 | LOAD_KEY(r8,r4,12) |
| 85 | stw r5,0(r3) /* key[0..3] = input data */ |
| 86 | stw r6,4(r3) |
| 87 | stw r7,8(r3) |
| 88 | stw r8,12(r3) |
| 89 | li r16,10 /* 10 expansion rounds */ |
| 90 | lis r0,0x0100 /* RCO(1) */ |
| 91 | ppc_expand_128_loop: |
| 92 | addi r3,r3,16 |
| 93 | mr r14,r8 /* apply LS_BOX to 4th temp */ |
| 94 | rotlwi r14,r14,8 |
| 95 | LS_BOX(r14, r15, r4) |
| 96 | xor r14,r14,r0 |
| 97 | xor r5,r5,r14 /* xor next 4 keys */ |
| 98 | xor r6,r6,r5 |
| 99 | xor r7,r7,r6 |
| 100 | xor r8,r8,r7 |
| 101 | stw r5,0(r3) /* store next 4 keys */ |
| 102 | stw r6,4(r3) |
| 103 | stw r7,8(r3) |
| 104 | stw r8,12(r3) |
| 105 | GF8_MUL(r0, r0, r4, r14) /* multiply RCO by 2 in GF */ |
| 106 | subi r16,r16,1 |
| 107 | cmpwi r16,0 |
| 108 | bt eq,ppc_expand_128_end |
| 109 | b ppc_expand_128_loop |
| 110 | ppc_expand_128_end: |
| 111 | FINALIZE_KEY |
| 112 | blr |
| 113 | |
| 114 | /* |
| 115 | * ppc_expand_key_192(u32 *key_enc, const u8 *key) |
| 116 | * |
| 117 | * Expand 192 bit key into 208 bytes encryption key. It consists of key |
| 118 | * itself plus 12 rounds with 16 bytes each |
| 119 | * |
| 120 | */ |
| 121 | _GLOBAL(ppc_expand_key_192) |
| 122 | INITIALIZE_KEY |
| 123 | LOAD_KEY(r5,r4,0) |
| 124 | LOAD_KEY(r6,r4,4) |
| 125 | LOAD_KEY(r7,r4,8) |
| 126 | LOAD_KEY(r8,r4,12) |
| 127 | LOAD_KEY(r9,r4,16) |
| 128 | LOAD_KEY(r10,r4,20) |
| 129 | stw r5,0(r3) |
| 130 | stw r6,4(r3) |
| 131 | stw r7,8(r3) |
| 132 | stw r8,12(r3) |
| 133 | stw r9,16(r3) |
| 134 | stw r10,20(r3) |
| 135 | li r16,8 /* 8 expansion rounds */ |
| 136 | lis r0,0x0100 /* RCO(1) */ |
| 137 | ppc_expand_192_loop: |
| 138 | addi r3,r3,24 |
| 139 | mr r14,r10 /* apply LS_BOX to 6th temp */ |
| 140 | rotlwi r14,r14,8 |
| 141 | LS_BOX(r14, r15, r4) |
| 142 | xor r14,r14,r0 |
| 143 | xor r5,r5,r14 /* xor next 6 keys */ |
| 144 | xor r6,r6,r5 |
| 145 | xor r7,r7,r6 |
| 146 | xor r8,r8,r7 |
| 147 | xor r9,r9,r8 |
| 148 | xor r10,r10,r9 |
| 149 | stw r5,0(r3) |
| 150 | stw r6,4(r3) |
| 151 | stw r7,8(r3) |
| 152 | stw r8,12(r3) |
| 153 | subi r16,r16,1 |
| 154 | cmpwi r16,0 /* last round early kick out */ |
| 155 | bt eq,ppc_expand_192_end |
| 156 | stw r9,16(r3) |
| 157 | stw r10,20(r3) |
| 158 | GF8_MUL(r0, r0, r4, r14) /* multiply RCO GF8 */ |
| 159 | b ppc_expand_192_loop |
| 160 | ppc_expand_192_end: |
| 161 | FINALIZE_KEY |
| 162 | blr |
| 163 | |
| 164 | /* |
| 165 | * ppc_expand_key_256(u32 *key_enc, const u8 *key) |
| 166 | * |
| 167 | * Expand 256 bit key into 240 bytes encryption key. It consists of key |
| 168 | * itself plus 14 rounds with 16 bytes each |
| 169 | * |
| 170 | */ |
| 171 | _GLOBAL(ppc_expand_key_256) |
| 172 | INITIALIZE_KEY |
| 173 | LOAD_KEY(r5,r4,0) |
| 174 | LOAD_KEY(r6,r4,4) |
| 175 | LOAD_KEY(r7,r4,8) |
| 176 | LOAD_KEY(r8,r4,12) |
| 177 | LOAD_KEY(r9,r4,16) |
| 178 | LOAD_KEY(r10,r4,20) |
| 179 | LOAD_KEY(r11,r4,24) |
| 180 | LOAD_KEY(r12,r4,28) |
| 181 | stw r5,0(r3) |
| 182 | stw r6,4(r3) |
| 183 | stw r7,8(r3) |
| 184 | stw r8,12(r3) |
| 185 | stw r9,16(r3) |
| 186 | stw r10,20(r3) |
| 187 | stw r11,24(r3) |
| 188 | stw r12,28(r3) |
| 189 | li r16,7 /* 7 expansion rounds */ |
| 190 | lis r0,0x0100 /* RCO(1) */ |
| 191 | ppc_expand_256_loop: |
| 192 | addi r3,r3,32 |
| 193 | mr r14,r12 /* apply LS_BOX to 8th temp */ |
| 194 | rotlwi r14,r14,8 |
| 195 | LS_BOX(r14, r15, r4) |
| 196 | xor r14,r14,r0 |
| 197 | xor r5,r5,r14 /* xor 4 keys */ |
| 198 | xor r6,r6,r5 |
| 199 | xor r7,r7,r6 |
| 200 | xor r8,r8,r7 |
| 201 | mr r14,r8 |
| 202 | LS_BOX(r14, r15, r4) /* apply LS_BOX to 4th temp */ |
| 203 | xor r9,r9,r14 /* xor 4 keys */ |
| 204 | xor r10,r10,r9 |
| 205 | xor r11,r11,r10 |
| 206 | xor r12,r12,r11 |
| 207 | stw r5,0(r3) |
| 208 | stw r6,4(r3) |
| 209 | stw r7,8(r3) |
| 210 | stw r8,12(r3) |
| 211 | subi r16,r16,1 |
| 212 | cmpwi r16,0 /* last round early kick out */ |
| 213 | bt eq,ppc_expand_256_end |
| 214 | stw r9,16(r3) |
| 215 | stw r10,20(r3) |
| 216 | stw r11,24(r3) |
| 217 | stw r12,28(r3) |
| 218 | GF8_MUL(r0, r0, r4, r14) |
| 219 | b ppc_expand_256_loop |
| 220 | ppc_expand_256_end: |
| 221 | FINALIZE_KEY |
| 222 | blr |
| 223 | |
| 224 | /* |
| 225 | * ppc_generate_decrypt_key: derive decryption key from encryption key |
| 226 | * number of bytes to handle are calculated from length of key (16/24/32) |
| 227 | * |
| 228 | */ |
| 229 | _GLOBAL(ppc_generate_decrypt_key) |
| 230 | addi r6,r5,24 |
| 231 | slwi r6,r6,2 |
| 232 | lwzx r7,r4,r6 /* first/last 4 words are same */ |
| 233 | stw r7,0(r3) |
| 234 | lwz r7,0(r4) |
| 235 | stwx r7,r3,r6 |
| 236 | addi r6,r6,4 |
| 237 | lwzx r7,r4,r6 |
| 238 | stw r7,4(r3) |
| 239 | lwz r7,4(r4) |
| 240 | stwx r7,r3,r6 |
| 241 | addi r6,r6,4 |
| 242 | lwzx r7,r4,r6 |
| 243 | stw r7,8(r3) |
| 244 | lwz r7,8(r4) |
| 245 | stwx r7,r3,r6 |
| 246 | addi r6,r6,4 |
| 247 | lwzx r7,r4,r6 |
| 248 | stw r7,12(r3) |
| 249 | lwz r7,12(r4) |
| 250 | stwx r7,r3,r6 |
| 251 | addi r3,r3,16 |
| 252 | add r4,r4,r6 |
| 253 | subi r4,r4,28 |
| 254 | addi r5,r5,20 |
| 255 | srwi r5,r5,2 |
| 256 | ppc_generate_decrypt_block: |
| 257 | li r6,4 |
| 258 | mtctr r6 |
| 259 | ppc_generate_decrypt_word: |
| 260 | lwz r6,0(r4) |
| 261 | GF8_MUL(r7, r6, r0, r7) |
| 262 | GF8_MUL(r8, r7, r0, r8) |
| 263 | GF8_MUL(r9, r8, r0, r9) |
| 264 | xor r10,r9,r6 |
| 265 | xor r11,r7,r8 |
| 266 | xor r11,r11,r9 |
| 267 | xor r12,r7,r10 |
| 268 | rotrwi r12,r12,24 |
| 269 | xor r11,r11,r12 |
| 270 | xor r12,r8,r10 |
| 271 | rotrwi r12,r12,16 |
| 272 | xor r11,r11,r12 |
| 273 | rotrwi r12,r10,8 |
| 274 | xor r11,r11,r12 |
| 275 | stw r11,0(r3) |
| 276 | addi r3,r3,4 |
| 277 | addi r4,r4,4 |
| 278 | bdnz ppc_generate_decrypt_word |
| 279 | subi r4,r4,32 |
| 280 | subi r5,r5,1 |
| 281 | cmpwi r5,0 |
| 282 | bt gt,ppc_generate_decrypt_block |
| 283 | blr |