blob: 813ffcc4d17cce13bcfda279546713c45295a102 [file] [log] [blame]
Paulo Flabiano Smorigo11c6e162016-07-18 12:26:25 -03001#! /usr/bin/env perl
2# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02009#
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# This module implements support for AES instructions as per PowerISA
18# specification version 2.07, first implemented by POWER8 processor.
19# The module is endian-agnostic in sense that it supports both big-
20# and little-endian cases. Data alignment in parallelizable modes is
21# handled with VSX loads and stores, which implies MSR.VSX flag being
22# set. It should also be noted that ISA specification doesn't prohibit
23# alignment exceptions for these instructions on page boundaries.
24# Initially alignment was handled in pure AltiVec/VMX way [when data
25# is aligned programmatically, which in turn guarantees exception-
26# free execution], but it turned to hamper performance when vcipher
27# instructions are interleaved. It's reckoned that eventual
28# misalignment penalties at page boundaries are in average lower
29# than additional overhead in pure AltiVec approach.
Paulo Flabiano Smorigo11c6e162016-07-18 12:26:25 -030030#
31# May 2016
32#
33# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34# systems were measured.
35#
36######################################################################
37# Current large-block performance in cycles per byte processed with
38# 128-bit key (less is better).
39#
40# CBC en-/decrypt CTR XTS
41# POWER8[le] 3.96/0.72 0.74 1.1
42# POWER8[be] 3.75/0.65 0.66 1.0
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -020043
44$flavour = shift;
45
46if ($flavour =~ /64/) {
47 $SIZE_T =8;
48 $LRSAVE =2*$SIZE_T;
49 $STU ="stdu";
50 $POP ="ld";
51 $PUSH ="std";
52 $UCMP ="cmpld";
53 $SHL ="sldi";
54} elsif ($flavour =~ /32/) {
55 $SIZE_T =4;
56 $LRSAVE =$SIZE_T;
57 $STU ="stwu";
58 $POP ="lwz";
59 $PUSH ="stw";
60 $UCMP ="cmplw";
61 $SHL ="slwi";
62} else { die "nonsense $flavour"; }
63
64$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
65
66$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
67( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
68( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
69die "can't locate ppc-xlate.pl";
70
71open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
72
73$FRAME=8*$SIZE_T;
74$prefix="aes_p8";
75
76$sp="r1";
77$vrsave="r12";
78
79#########################################################################
80{{{ # Key setup procedures #
81my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
82my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
83my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
84
85$code.=<<___;
86.machine "any"
87
88.text
89
90.align 7
91rcon:
92.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
93.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
94.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
95.long 0,0,0,0 ?asis
96Lconsts:
97 mflr r0
98 bcl 20,31,\$+4
99 mflr $ptr #vvvvv "distance between . and rcon
100 addi $ptr,$ptr,-0x48
101 mtlr r0
102 blr
103 .long 0
104 .byte 0,12,0x14,0,0,0,0,0
105.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
106
107.globl .${prefix}_set_encrypt_key
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -0200108Lset_encrypt_key:
109 mflr r11
110 $PUSH r11,$LRSAVE($sp)
111
112 li $ptr,-1
113 ${UCMP}i $inp,0
114 beq- Lenc_key_abort # if ($inp==0) return -1;
115 ${UCMP}i $out,0
116 beq- Lenc_key_abort # if ($out==0) return -1;
117 li $ptr,-2
118 cmpwi $bits,128
119 blt- Lenc_key_abort
120 cmpwi $bits,256
121 bgt- Lenc_key_abort
122 andi. r0,$bits,0x3f
123 bne- Lenc_key_abort
124
125 lis r0,0xfff0
126 mfspr $vrsave,256
127 mtspr 256,r0
128
129 bl Lconsts
130 mtlr r11
131
132 neg r9,$inp
133 lvx $in0,0,$inp
134 addi $inp,$inp,15 # 15 is not typo
135 lvsr $key,0,r9 # borrow $key
136 li r8,0x20
137 cmpwi $bits,192
138 lvx $in1,0,$inp
139 le?vspltisb $mask,0x0f # borrow $mask
140 lvx $rcon,0,$ptr
141 le?vxor $key,$key,$mask # adjust for byte swap
142 lvx $mask,r8,$ptr
143 addi $ptr,$ptr,0x10
144 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
145 li $cnt,8
146 vxor $zero,$zero,$zero
147 mtctr $cnt
148
149 ?lvsr $outperm,0,$out
150 vspltisb $outmask,-1
151 lvx $outhead,0,$out
152 ?vperm $outmask,$zero,$outmask,$outperm
153
154 blt Loop128
155 addi $inp,$inp,8
156 beq L192
157 addi $inp,$inp,8
158 b L256
159
160.align 4
161Loop128:
162 vperm $key,$in0,$in0,$mask # rotate-n-splat
163 vsldoi $tmp,$zero,$in0,12 # >>32
164 vperm $outtail,$in0,$in0,$outperm # rotate
165 vsel $stage,$outhead,$outtail,$outmask
166 vmr $outhead,$outtail
167 vcipherlast $key,$key,$rcon
168 stvx $stage,0,$out
169 addi $out,$out,16
170
171 vxor $in0,$in0,$tmp
172 vsldoi $tmp,$zero,$tmp,12 # >>32
173 vxor $in0,$in0,$tmp
174 vsldoi $tmp,$zero,$tmp,12 # >>32
175 vxor $in0,$in0,$tmp
176 vadduwm $rcon,$rcon,$rcon
177 vxor $in0,$in0,$key
178 bdnz Loop128
179
180 lvx $rcon,0,$ptr # last two round keys
181
182 vperm $key,$in0,$in0,$mask # rotate-n-splat
183 vsldoi $tmp,$zero,$in0,12 # >>32
184 vperm $outtail,$in0,$in0,$outperm # rotate
185 vsel $stage,$outhead,$outtail,$outmask
186 vmr $outhead,$outtail
187 vcipherlast $key,$key,$rcon
188 stvx $stage,0,$out
189 addi $out,$out,16
190
191 vxor $in0,$in0,$tmp
192 vsldoi $tmp,$zero,$tmp,12 # >>32
193 vxor $in0,$in0,$tmp
194 vsldoi $tmp,$zero,$tmp,12 # >>32
195 vxor $in0,$in0,$tmp
196 vadduwm $rcon,$rcon,$rcon
197 vxor $in0,$in0,$key
198
199 vperm $key,$in0,$in0,$mask # rotate-n-splat
200 vsldoi $tmp,$zero,$in0,12 # >>32
201 vperm $outtail,$in0,$in0,$outperm # rotate
202 vsel $stage,$outhead,$outtail,$outmask
203 vmr $outhead,$outtail
204 vcipherlast $key,$key,$rcon
205 stvx $stage,0,$out
206 addi $out,$out,16
207
208 vxor $in0,$in0,$tmp
209 vsldoi $tmp,$zero,$tmp,12 # >>32
210 vxor $in0,$in0,$tmp
211 vsldoi $tmp,$zero,$tmp,12 # >>32
212 vxor $in0,$in0,$tmp
213 vxor $in0,$in0,$key
214 vperm $outtail,$in0,$in0,$outperm # rotate
215 vsel $stage,$outhead,$outtail,$outmask
216 vmr $outhead,$outtail
217 stvx $stage,0,$out
218
219 addi $inp,$out,15 # 15 is not typo
220 addi $out,$out,0x50
221
222 li $rounds,10
223 b Ldone
224
225.align 4
226L192:
227 lvx $tmp,0,$inp
228 li $cnt,4
229 vperm $outtail,$in0,$in0,$outperm # rotate
230 vsel $stage,$outhead,$outtail,$outmask
231 vmr $outhead,$outtail
232 stvx $stage,0,$out
233 addi $out,$out,16
234 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
235 vspltisb $key,8 # borrow $key
236 mtctr $cnt
237 vsububm $mask,$mask,$key # adjust the mask
238
239Loop192:
240 vperm $key,$in1,$in1,$mask # roate-n-splat
241 vsldoi $tmp,$zero,$in0,12 # >>32
242 vcipherlast $key,$key,$rcon
243
244 vxor $in0,$in0,$tmp
245 vsldoi $tmp,$zero,$tmp,12 # >>32
246 vxor $in0,$in0,$tmp
247 vsldoi $tmp,$zero,$tmp,12 # >>32
248 vxor $in0,$in0,$tmp
249
250 vsldoi $stage,$zero,$in1,8
251 vspltw $tmp,$in0,3
252 vxor $tmp,$tmp,$in1
253 vsldoi $in1,$zero,$in1,12 # >>32
254 vadduwm $rcon,$rcon,$rcon
255 vxor $in1,$in1,$tmp
256 vxor $in0,$in0,$key
257 vxor $in1,$in1,$key
258 vsldoi $stage,$stage,$in0,8
259
260 vperm $key,$in1,$in1,$mask # rotate-n-splat
261 vsldoi $tmp,$zero,$in0,12 # >>32
262 vperm $outtail,$stage,$stage,$outperm # rotate
263 vsel $stage,$outhead,$outtail,$outmask
264 vmr $outhead,$outtail
265 vcipherlast $key,$key,$rcon
266 stvx $stage,0,$out
267 addi $out,$out,16
268
269 vsldoi $stage,$in0,$in1,8
270 vxor $in0,$in0,$tmp
271 vsldoi $tmp,$zero,$tmp,12 # >>32
272 vperm $outtail,$stage,$stage,$outperm # rotate
273 vsel $stage,$outhead,$outtail,$outmask
274 vmr $outhead,$outtail
275 vxor $in0,$in0,$tmp
276 vsldoi $tmp,$zero,$tmp,12 # >>32
277 vxor $in0,$in0,$tmp
278 stvx $stage,0,$out
279 addi $out,$out,16
280
281 vspltw $tmp,$in0,3
282 vxor $tmp,$tmp,$in1
283 vsldoi $in1,$zero,$in1,12 # >>32
284 vadduwm $rcon,$rcon,$rcon
285 vxor $in1,$in1,$tmp
286 vxor $in0,$in0,$key
287 vxor $in1,$in1,$key
288 vperm $outtail,$in0,$in0,$outperm # rotate
289 vsel $stage,$outhead,$outtail,$outmask
290 vmr $outhead,$outtail
291 stvx $stage,0,$out
292 addi $inp,$out,15 # 15 is not typo
293 addi $out,$out,16
294 bdnz Loop192
295
296 li $rounds,12
297 addi $out,$out,0x20
298 b Ldone
299
300.align 4
301L256:
302 lvx $tmp,0,$inp
303 li $cnt,7
304 li $rounds,14
305 vperm $outtail,$in0,$in0,$outperm # rotate
306 vsel $stage,$outhead,$outtail,$outmask
307 vmr $outhead,$outtail
308 stvx $stage,0,$out
309 addi $out,$out,16
310 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
311 mtctr $cnt
312
313Loop256:
314 vperm $key,$in1,$in1,$mask # rotate-n-splat
315 vsldoi $tmp,$zero,$in0,12 # >>32
316 vperm $outtail,$in1,$in1,$outperm # rotate
317 vsel $stage,$outhead,$outtail,$outmask
318 vmr $outhead,$outtail
319 vcipherlast $key,$key,$rcon
320 stvx $stage,0,$out
321 addi $out,$out,16
322
323 vxor $in0,$in0,$tmp
324 vsldoi $tmp,$zero,$tmp,12 # >>32
325 vxor $in0,$in0,$tmp
326 vsldoi $tmp,$zero,$tmp,12 # >>32
327 vxor $in0,$in0,$tmp
328 vadduwm $rcon,$rcon,$rcon
329 vxor $in0,$in0,$key
330 vperm $outtail,$in0,$in0,$outperm # rotate
331 vsel $stage,$outhead,$outtail,$outmask
332 vmr $outhead,$outtail
333 stvx $stage,0,$out
334 addi $inp,$out,15 # 15 is not typo
335 addi $out,$out,16
336 bdz Ldone
337
338 vspltw $key,$in0,3 # just splat
339 vsldoi $tmp,$zero,$in1,12 # >>32
340 vsbox $key,$key
341
342 vxor $in1,$in1,$tmp
343 vsldoi $tmp,$zero,$tmp,12 # >>32
344 vxor $in1,$in1,$tmp
345 vsldoi $tmp,$zero,$tmp,12 # >>32
346 vxor $in1,$in1,$tmp
347
348 vxor $in1,$in1,$key
349 b Loop256
350
351.align 4
352Ldone:
353 lvx $in1,0,$inp # redundant in aligned case
354 vsel $in1,$outhead,$in1,$outmask
355 stvx $in1,0,$inp
356 li $ptr,0
357 mtspr 256,$vrsave
358 stw $rounds,0($out)
359
360Lenc_key_abort:
361 mr r3,$ptr
362 blr
363 .long 0
364 .byte 0,12,0x14,1,0,0,3,0
365 .long 0
366.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
367
368.globl .${prefix}_set_decrypt_key
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -0200369 $STU $sp,-$FRAME($sp)
370 mflr r10
371 $PUSH r10,$FRAME+$LRSAVE($sp)
372 bl Lset_encrypt_key
373 mtlr r10
374
375 cmpwi r3,0
376 bne- Ldec_key_abort
377
378 slwi $cnt,$rounds,4
379 subi $inp,$out,240 # first round key
380 srwi $rounds,$rounds,1
381 add $out,$inp,$cnt # last round key
382 mtctr $rounds
383
384Ldeckey:
385 lwz r0, 0($inp)
386 lwz r6, 4($inp)
387 lwz r7, 8($inp)
388 lwz r8, 12($inp)
389 addi $inp,$inp,16
390 lwz r9, 0($out)
391 lwz r10,4($out)
392 lwz r11,8($out)
393 lwz r12,12($out)
394 stw r0, 0($out)
395 stw r6, 4($out)
396 stw r7, 8($out)
397 stw r8, 12($out)
398 subi $out,$out,16
399 stw r9, -16($inp)
400 stw r10,-12($inp)
401 stw r11,-8($inp)
402 stw r12,-4($inp)
403 bdnz Ldeckey
404
405 xor r3,r3,r3 # return value
406Ldec_key_abort:
407 addi $sp,$sp,$FRAME
408 blr
409 .long 0
410 .byte 0,12,4,1,0x80,0,3,0
411 .long 0
412.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
413___
414}}}
415#########################################################################
416{{{ # Single block en- and decrypt procedures #
417sub gen_block () {
418my $dir = shift;
419my $n = $dir eq "de" ? "n" : "";
420my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
421
422$code.=<<___;
423.globl .${prefix}_${dir}crypt
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -0200424 lwz $rounds,240($key)
425 lis r0,0xfc00
426 mfspr $vrsave,256
427 li $idx,15 # 15 is not typo
428 mtspr 256,r0
429
430 lvx v0,0,$inp
431 neg r11,$out
432 lvx v1,$idx,$inp
433 lvsl v2,0,$inp # inpperm
434 le?vspltisb v4,0x0f
435 ?lvsl v3,0,r11 # outperm
436 le?vxor v2,v2,v4
437 li $idx,16
438 vperm v0,v0,v1,v2 # align [and byte swap in LE]
439 lvx v1,0,$key
440 ?lvsl v5,0,$key # keyperm
441 srwi $rounds,$rounds,1
442 lvx v2,$idx,$key
443 addi $idx,$idx,16
444 subi $rounds,$rounds,1
445 ?vperm v1,v1,v2,v5 # align round key
446
447 vxor v0,v0,v1
448 lvx v1,$idx,$key
449 addi $idx,$idx,16
450 mtctr $rounds
451
452Loop_${dir}c:
453 ?vperm v2,v2,v1,v5
454 v${n}cipher v0,v0,v2
455 lvx v2,$idx,$key
456 addi $idx,$idx,16
457 ?vperm v1,v1,v2,v5
458 v${n}cipher v0,v0,v1
459 lvx v1,$idx,$key
460 addi $idx,$idx,16
461 bdnz Loop_${dir}c
462
463 ?vperm v2,v2,v1,v5
464 v${n}cipher v0,v0,v2
465 lvx v2,$idx,$key
466 ?vperm v1,v1,v2,v5
467 v${n}cipherlast v0,v0,v1
468
469 vspltisb v2,-1
470 vxor v1,v1,v1
471 li $idx,15 # 15 is not typo
472 ?vperm v2,v1,v2,v3 # outmask
473 le?vxor v3,v3,v4
474 lvx v1,0,$out # outhead
475 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
476 vsel v1,v1,v0,v2
477 lvx v4,$idx,$out
478 stvx v1,0,$out
479 vsel v0,v0,v4,v2
480 stvx v0,$idx,$out
481
482 mtspr 256,$vrsave
483 blr
484 .long 0
485 .byte 0,12,0x14,0,0,0,3,0
486 .long 0
487.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
488___
489}
490&gen_block("en");
491&gen_block("de");
492}}}
493#########################################################################
494{{{ # CBC en- and decrypt procedures #
495my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
496my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
497my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
498 map("v$_",(4..10));
499$code.=<<___;
500.globl .${prefix}_cbc_encrypt
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -0200501 ${UCMP}i $len,16
502 bltlr-
503
504 cmpwi $enc,0 # test direction
505 lis r0,0xffe0
506 mfspr $vrsave,256
507 mtspr 256,r0
508
509 li $idx,15
510 vxor $rndkey0,$rndkey0,$rndkey0
511 le?vspltisb $tmp,0x0f
512
513 lvx $ivec,0,$ivp # load [unaligned] iv
514 lvsl $inpperm,0,$ivp
515 lvx $inptail,$idx,$ivp
516 le?vxor $inpperm,$inpperm,$tmp
517 vperm $ivec,$ivec,$inptail,$inpperm
518
519 neg r11,$inp
520 ?lvsl $keyperm,0,$key # prepare for unaligned key
521 lwz $rounds,240($key)
522
523 lvsr $inpperm,0,r11 # prepare for unaligned load
524 lvx $inptail,0,$inp
525 addi $inp,$inp,15 # 15 is not typo
526 le?vxor $inpperm,$inpperm,$tmp
527
528 ?lvsr $outperm,0,$out # prepare for unaligned store
529 vspltisb $outmask,-1
530 lvx $outhead,0,$out
531 ?vperm $outmask,$rndkey0,$outmask,$outperm
532 le?vxor $outperm,$outperm,$tmp
533
534 srwi $rounds,$rounds,1
535 li $idx,16
536 subi $rounds,$rounds,1
537 beq Lcbc_dec
538
539Lcbc_enc:
540 vmr $inout,$inptail
541 lvx $inptail,0,$inp
542 addi $inp,$inp,16
543 mtctr $rounds
544 subi $len,$len,16 # len-=16
545
546 lvx $rndkey0,0,$key
547 vperm $inout,$inout,$inptail,$inpperm
548 lvx $rndkey1,$idx,$key
549 addi $idx,$idx,16
550 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
551 vxor $inout,$inout,$rndkey0
552 lvx $rndkey0,$idx,$key
553 addi $idx,$idx,16
554 vxor $inout,$inout,$ivec
555
556Loop_cbc_enc:
557 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
558 vcipher $inout,$inout,$rndkey1
559 lvx $rndkey1,$idx,$key
560 addi $idx,$idx,16
561 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
562 vcipher $inout,$inout,$rndkey0
563 lvx $rndkey0,$idx,$key
564 addi $idx,$idx,16
565 bdnz Loop_cbc_enc
566
567 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
568 vcipher $inout,$inout,$rndkey1
569 lvx $rndkey1,$idx,$key
570 li $idx,16
571 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
572 vcipherlast $ivec,$inout,$rndkey0
573 ${UCMP}i $len,16
574
575 vperm $tmp,$ivec,$ivec,$outperm
576 vsel $inout,$outhead,$tmp,$outmask
577 vmr $outhead,$tmp
578 stvx $inout,0,$out
579 addi $out,$out,16
580 bge Lcbc_enc
581
582 b Lcbc_done
583
584.align 4
585Lcbc_dec:
586 ${UCMP}i $len,128
587 bge _aesp8_cbc_decrypt8x
588 vmr $tmp,$inptail
589 lvx $inptail,0,$inp
590 addi $inp,$inp,16
591 mtctr $rounds
592 subi $len,$len,16 # len-=16
593
594 lvx $rndkey0,0,$key
595 vperm $tmp,$tmp,$inptail,$inpperm
596 lvx $rndkey1,$idx,$key
597 addi $idx,$idx,16
598 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
599 vxor $inout,$tmp,$rndkey0
600 lvx $rndkey0,$idx,$key
601 addi $idx,$idx,16
602
603Loop_cbc_dec:
604 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
605 vncipher $inout,$inout,$rndkey1
606 lvx $rndkey1,$idx,$key
607 addi $idx,$idx,16
608 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
609 vncipher $inout,$inout,$rndkey0
610 lvx $rndkey0,$idx,$key
611 addi $idx,$idx,16
612 bdnz Loop_cbc_dec
613
614 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
615 vncipher $inout,$inout,$rndkey1
616 lvx $rndkey1,$idx,$key
617 li $idx,16
618 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
619 vncipherlast $inout,$inout,$rndkey0
620 ${UCMP}i $len,16
621
622 vxor $inout,$inout,$ivec
623 vmr $ivec,$tmp
624 vperm $tmp,$inout,$inout,$outperm
625 vsel $inout,$outhead,$tmp,$outmask
626 vmr $outhead,$tmp
627 stvx $inout,0,$out
628 addi $out,$out,16
629 bge Lcbc_dec
630
631Lcbc_done:
632 addi $out,$out,-1
633 lvx $inout,0,$out # redundant in aligned case
634 vsel $inout,$outhead,$inout,$outmask
635 stvx $inout,0,$out
636
637 neg $enc,$ivp # write [unaligned] iv
638 li $idx,15 # 15 is not typo
639 vxor $rndkey0,$rndkey0,$rndkey0
640 vspltisb $outmask,-1
641 le?vspltisb $tmp,0x0f
642 ?lvsl $outperm,0,$enc
643 ?vperm $outmask,$rndkey0,$outmask,$outperm
644 le?vxor $outperm,$outperm,$tmp
645 lvx $outhead,0,$ivp
646 vperm $ivec,$ivec,$ivec,$outperm
647 vsel $inout,$outhead,$ivec,$outmask
648 lvx $inptail,$idx,$ivp
649 stvx $inout,0,$ivp
650 vsel $inout,$ivec,$inptail,$outmask
651 stvx $inout,$idx,$ivp
652
653 mtspr 256,$vrsave
654 blr
655 .long 0
656 .byte 0,12,0x14,0,0,0,6,0
657 .long 0
658___
659#########################################################################
660{{ # Optimized CBC decrypt procedure #
661my $key_="r11";
662my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
663my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
664my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
665my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
666 # v26-v31 last 6 round keys
667my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
668
669$code.=<<___;
670.align 5
671_aesp8_cbc_decrypt8x:
672 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
673 li r10,`$FRAME+8*16+15`
674 li r11,`$FRAME+8*16+31`
675 stvx v20,r10,$sp # ABI says so
676 addi r10,r10,32
677 stvx v21,r11,$sp
678 addi r11,r11,32
679 stvx v22,r10,$sp
680 addi r10,r10,32
681 stvx v23,r11,$sp
682 addi r11,r11,32
683 stvx v24,r10,$sp
684 addi r10,r10,32
685 stvx v25,r11,$sp
686 addi r11,r11,32
687 stvx v26,r10,$sp
688 addi r10,r10,32
689 stvx v27,r11,$sp
690 addi r11,r11,32
691 stvx v28,r10,$sp
692 addi r10,r10,32
693 stvx v29,r11,$sp
694 addi r11,r11,32
695 stvx v30,r10,$sp
696 stvx v31,r11,$sp
697 li r0,-1
698 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
699 li $x10,0x10
700 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
701 li $x20,0x20
702 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
703 li $x30,0x30
704 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
705 li $x40,0x40
706 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
707 li $x50,0x50
708 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
709 li $x60,0x60
710 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
711 li $x70,0x70
712 mtspr 256,r0
713
714 subi $rounds,$rounds,3 # -4 in total
715 subi $len,$len,128 # bias
716
717 lvx $rndkey0,$x00,$key # load key schedule
718 lvx v30,$x10,$key
719 addi $key,$key,0x20
720 lvx v31,$x00,$key
721 ?vperm $rndkey0,$rndkey0,v30,$keyperm
722 addi $key_,$sp,$FRAME+15
723 mtctr $rounds
724
725Load_cbc_dec_key:
726 ?vperm v24,v30,v31,$keyperm
727 lvx v30,$x10,$key
728 addi $key,$key,0x20
729 stvx v24,$x00,$key_ # off-load round[1]
730 ?vperm v25,v31,v30,$keyperm
731 lvx v31,$x00,$key
732 stvx v25,$x10,$key_ # off-load round[2]
733 addi $key_,$key_,0x20
734 bdnz Load_cbc_dec_key
735
736 lvx v26,$x10,$key
737 ?vperm v24,v30,v31,$keyperm
738 lvx v27,$x20,$key
739 stvx v24,$x00,$key_ # off-load round[3]
740 ?vperm v25,v31,v26,$keyperm
741 lvx v28,$x30,$key
742 stvx v25,$x10,$key_ # off-load round[4]
743 addi $key_,$sp,$FRAME+15 # rewind $key_
744 ?vperm v26,v26,v27,$keyperm
745 lvx v29,$x40,$key
746 ?vperm v27,v27,v28,$keyperm
747 lvx v30,$x50,$key
748 ?vperm v28,v28,v29,$keyperm
749 lvx v31,$x60,$key
750 ?vperm v29,v29,v30,$keyperm
751 lvx $out0,$x70,$key # borrow $out0
752 ?vperm v30,v30,v31,$keyperm
753 lvx v24,$x00,$key_ # pre-load round[1]
754 ?vperm v31,v31,$out0,$keyperm
755 lvx v25,$x10,$key_ # pre-load round[2]
756
757 #lvx $inptail,0,$inp # "caller" already did this
758 #addi $inp,$inp,15 # 15 is not typo
759 subi $inp,$inp,15 # undo "caller"
760
761 le?li $idx,8
762 lvx_u $in0,$x00,$inp # load first 8 "words"
763 le?lvsl $inpperm,0,$idx
764 le?vspltisb $tmp,0x0f
765 lvx_u $in1,$x10,$inp
766 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
767 lvx_u $in2,$x20,$inp
768 le?vperm $in0,$in0,$in0,$inpperm
769 lvx_u $in3,$x30,$inp
770 le?vperm $in1,$in1,$in1,$inpperm
771 lvx_u $in4,$x40,$inp
772 le?vperm $in2,$in2,$in2,$inpperm
773 vxor $out0,$in0,$rndkey0
774 lvx_u $in5,$x50,$inp
775 le?vperm $in3,$in3,$in3,$inpperm
776 vxor $out1,$in1,$rndkey0
777 lvx_u $in6,$x60,$inp
778 le?vperm $in4,$in4,$in4,$inpperm
779 vxor $out2,$in2,$rndkey0
780 lvx_u $in7,$x70,$inp
781 addi $inp,$inp,0x80
782 le?vperm $in5,$in5,$in5,$inpperm
783 vxor $out3,$in3,$rndkey0
784 le?vperm $in6,$in6,$in6,$inpperm
785 vxor $out4,$in4,$rndkey0
786 le?vperm $in7,$in7,$in7,$inpperm
787 vxor $out5,$in5,$rndkey0
788 vxor $out6,$in6,$rndkey0
789 vxor $out7,$in7,$rndkey0
790
791 mtctr $rounds
792 b Loop_cbc_dec8x
793.align 5
794Loop_cbc_dec8x:
795 vncipher $out0,$out0,v24
796 vncipher $out1,$out1,v24
797 vncipher $out2,$out2,v24
798 vncipher $out3,$out3,v24
799 vncipher $out4,$out4,v24
800 vncipher $out5,$out5,v24
801 vncipher $out6,$out6,v24
802 vncipher $out7,$out7,v24
803 lvx v24,$x20,$key_ # round[3]
804 addi $key_,$key_,0x20
805
806 vncipher $out0,$out0,v25
807 vncipher $out1,$out1,v25
808 vncipher $out2,$out2,v25
809 vncipher $out3,$out3,v25
810 vncipher $out4,$out4,v25
811 vncipher $out5,$out5,v25
812 vncipher $out6,$out6,v25
813 vncipher $out7,$out7,v25
814 lvx v25,$x10,$key_ # round[4]
815 bdnz Loop_cbc_dec8x
816
817 subic $len,$len,128 # $len-=128
818 vncipher $out0,$out0,v24
819 vncipher $out1,$out1,v24
820 vncipher $out2,$out2,v24
821 vncipher $out3,$out3,v24
822 vncipher $out4,$out4,v24
823 vncipher $out5,$out5,v24
824 vncipher $out6,$out6,v24
825 vncipher $out7,$out7,v24
826
827 subfe. r0,r0,r0 # borrow?-1:0
828 vncipher $out0,$out0,v25
829 vncipher $out1,$out1,v25
830 vncipher $out2,$out2,v25
831 vncipher $out3,$out3,v25
832 vncipher $out4,$out4,v25
833 vncipher $out5,$out5,v25
834 vncipher $out6,$out6,v25
835 vncipher $out7,$out7,v25
836
837 and r0,r0,$len
838 vncipher $out0,$out0,v26
839 vncipher $out1,$out1,v26
840 vncipher $out2,$out2,v26
841 vncipher $out3,$out3,v26
842 vncipher $out4,$out4,v26
843 vncipher $out5,$out5,v26
844 vncipher $out6,$out6,v26
845 vncipher $out7,$out7,v26
846
847 add $inp,$inp,r0 # $inp is adjusted in such
848 # way that at exit from the
849 # loop inX-in7 are loaded
850 # with last "words"
851 vncipher $out0,$out0,v27
852 vncipher $out1,$out1,v27
853 vncipher $out2,$out2,v27
854 vncipher $out3,$out3,v27
855 vncipher $out4,$out4,v27
856 vncipher $out5,$out5,v27
857 vncipher $out6,$out6,v27
858 vncipher $out7,$out7,v27
859
860 addi $key_,$sp,$FRAME+15 # rewind $key_
861 vncipher $out0,$out0,v28
862 vncipher $out1,$out1,v28
863 vncipher $out2,$out2,v28
864 vncipher $out3,$out3,v28
865 vncipher $out4,$out4,v28
866 vncipher $out5,$out5,v28
867 vncipher $out6,$out6,v28
868 vncipher $out7,$out7,v28
869 lvx v24,$x00,$key_ # re-pre-load round[1]
870
871 vncipher $out0,$out0,v29
872 vncipher $out1,$out1,v29
873 vncipher $out2,$out2,v29
874 vncipher $out3,$out3,v29
875 vncipher $out4,$out4,v29
876 vncipher $out5,$out5,v29
877 vncipher $out6,$out6,v29
878 vncipher $out7,$out7,v29
879 lvx v25,$x10,$key_ # re-pre-load round[2]
880
881 vncipher $out0,$out0,v30
882 vxor $ivec,$ivec,v31 # xor with last round key
883 vncipher $out1,$out1,v30
884 vxor $in0,$in0,v31
885 vncipher $out2,$out2,v30
886 vxor $in1,$in1,v31
887 vncipher $out3,$out3,v30
888 vxor $in2,$in2,v31
889 vncipher $out4,$out4,v30
890 vxor $in3,$in3,v31
891 vncipher $out5,$out5,v30
892 vxor $in4,$in4,v31
893 vncipher $out6,$out6,v30
894 vxor $in5,$in5,v31
895 vncipher $out7,$out7,v30
896 vxor $in6,$in6,v31
897
898 vncipherlast $out0,$out0,$ivec
899 vncipherlast $out1,$out1,$in0
900 lvx_u $in0,$x00,$inp # load next input block
901 vncipherlast $out2,$out2,$in1
902 lvx_u $in1,$x10,$inp
903 vncipherlast $out3,$out3,$in2
904 le?vperm $in0,$in0,$in0,$inpperm
905 lvx_u $in2,$x20,$inp
906 vncipherlast $out4,$out4,$in3
907 le?vperm $in1,$in1,$in1,$inpperm
908 lvx_u $in3,$x30,$inp
909 vncipherlast $out5,$out5,$in4
910 le?vperm $in2,$in2,$in2,$inpperm
911 lvx_u $in4,$x40,$inp
912 vncipherlast $out6,$out6,$in5
913 le?vperm $in3,$in3,$in3,$inpperm
914 lvx_u $in5,$x50,$inp
915 vncipherlast $out7,$out7,$in6
916 le?vperm $in4,$in4,$in4,$inpperm
917 lvx_u $in6,$x60,$inp
918 vmr $ivec,$in7
919 le?vperm $in5,$in5,$in5,$inpperm
920 lvx_u $in7,$x70,$inp
921 addi $inp,$inp,0x80
922
923 le?vperm $out0,$out0,$out0,$inpperm
924 le?vperm $out1,$out1,$out1,$inpperm
925 stvx_u $out0,$x00,$out
926 le?vperm $in6,$in6,$in6,$inpperm
927 vxor $out0,$in0,$rndkey0
928 le?vperm $out2,$out2,$out2,$inpperm
929 stvx_u $out1,$x10,$out
930 le?vperm $in7,$in7,$in7,$inpperm
931 vxor $out1,$in1,$rndkey0
932 le?vperm $out3,$out3,$out3,$inpperm
933 stvx_u $out2,$x20,$out
934 vxor $out2,$in2,$rndkey0
935 le?vperm $out4,$out4,$out4,$inpperm
936 stvx_u $out3,$x30,$out
937 vxor $out3,$in3,$rndkey0
938 le?vperm $out5,$out5,$out5,$inpperm
939 stvx_u $out4,$x40,$out
940 vxor $out4,$in4,$rndkey0
941 le?vperm $out6,$out6,$out6,$inpperm
942 stvx_u $out5,$x50,$out
943 vxor $out5,$in5,$rndkey0
944 le?vperm $out7,$out7,$out7,$inpperm
945 stvx_u $out6,$x60,$out
946 vxor $out6,$in6,$rndkey0
947 stvx_u $out7,$x70,$out
948 addi $out,$out,0x80
949 vxor $out7,$in7,$rndkey0
950
951 mtctr $rounds
952 beq Loop_cbc_dec8x # did $len-=128 borrow?
953
954 addic. $len,$len,128
955 beq Lcbc_dec8x_done
956 nop
957 nop
958
959Loop_cbc_dec8x_tail: # up to 7 "words" tail...
960 vncipher $out1,$out1,v24
961 vncipher $out2,$out2,v24
962 vncipher $out3,$out3,v24
963 vncipher $out4,$out4,v24
964 vncipher $out5,$out5,v24
965 vncipher $out6,$out6,v24
966 vncipher $out7,$out7,v24
967 lvx v24,$x20,$key_ # round[3]
968 addi $key_,$key_,0x20
969
970 vncipher $out1,$out1,v25
971 vncipher $out2,$out2,v25
972 vncipher $out3,$out3,v25
973 vncipher $out4,$out4,v25
974 vncipher $out5,$out5,v25
975 vncipher $out6,$out6,v25
976 vncipher $out7,$out7,v25
977 lvx v25,$x10,$key_ # round[4]
978 bdnz Loop_cbc_dec8x_tail
979
980 vncipher $out1,$out1,v24
981 vncipher $out2,$out2,v24
982 vncipher $out3,$out3,v24
983 vncipher $out4,$out4,v24
984 vncipher $out5,$out5,v24
985 vncipher $out6,$out6,v24
986 vncipher $out7,$out7,v24
987
988 vncipher $out1,$out1,v25
989 vncipher $out2,$out2,v25
990 vncipher $out3,$out3,v25
991 vncipher $out4,$out4,v25
992 vncipher $out5,$out5,v25
993 vncipher $out6,$out6,v25
994 vncipher $out7,$out7,v25
995
996 vncipher $out1,$out1,v26
997 vncipher $out2,$out2,v26
998 vncipher $out3,$out3,v26
999 vncipher $out4,$out4,v26
1000 vncipher $out5,$out5,v26
1001 vncipher $out6,$out6,v26
1002 vncipher $out7,$out7,v26
1003
1004 vncipher $out1,$out1,v27
1005 vncipher $out2,$out2,v27
1006 vncipher $out3,$out3,v27
1007 vncipher $out4,$out4,v27
1008 vncipher $out5,$out5,v27
1009 vncipher $out6,$out6,v27
1010 vncipher $out7,$out7,v27
1011
1012 vncipher $out1,$out1,v28
1013 vncipher $out2,$out2,v28
1014 vncipher $out3,$out3,v28
1015 vncipher $out4,$out4,v28
1016 vncipher $out5,$out5,v28
1017 vncipher $out6,$out6,v28
1018 vncipher $out7,$out7,v28
1019
1020 vncipher $out1,$out1,v29
1021 vncipher $out2,$out2,v29
1022 vncipher $out3,$out3,v29
1023 vncipher $out4,$out4,v29
1024 vncipher $out5,$out5,v29
1025 vncipher $out6,$out6,v29
1026 vncipher $out7,$out7,v29
1027
1028 vncipher $out1,$out1,v30
1029 vxor $ivec,$ivec,v31 # last round key
1030 vncipher $out2,$out2,v30
1031 vxor $in1,$in1,v31
1032 vncipher $out3,$out3,v30
1033 vxor $in2,$in2,v31
1034 vncipher $out4,$out4,v30
1035 vxor $in3,$in3,v31
1036 vncipher $out5,$out5,v30
1037 vxor $in4,$in4,v31
1038 vncipher $out6,$out6,v30
1039 vxor $in5,$in5,v31
1040 vncipher $out7,$out7,v30
1041 vxor $in6,$in6,v31
1042
1043 cmplwi $len,32 # switch($len)
1044 blt Lcbc_dec8x_one
1045 nop
1046 beq Lcbc_dec8x_two
1047 cmplwi $len,64
1048 blt Lcbc_dec8x_three
1049 nop
1050 beq Lcbc_dec8x_four
1051 cmplwi $len,96
1052 blt Lcbc_dec8x_five
1053 nop
1054 beq Lcbc_dec8x_six
1055
1056Lcbc_dec8x_seven:
1057 vncipherlast $out1,$out1,$ivec
1058 vncipherlast $out2,$out2,$in1
1059 vncipherlast $out3,$out3,$in2
1060 vncipherlast $out4,$out4,$in3
1061 vncipherlast $out5,$out5,$in4
1062 vncipherlast $out6,$out6,$in5
1063 vncipherlast $out7,$out7,$in6
1064 vmr $ivec,$in7
1065
1066 le?vperm $out1,$out1,$out1,$inpperm
1067 le?vperm $out2,$out2,$out2,$inpperm
1068 stvx_u $out1,$x00,$out
1069 le?vperm $out3,$out3,$out3,$inpperm
1070 stvx_u $out2,$x10,$out
1071 le?vperm $out4,$out4,$out4,$inpperm
1072 stvx_u $out3,$x20,$out
1073 le?vperm $out5,$out5,$out5,$inpperm
1074 stvx_u $out4,$x30,$out
1075 le?vperm $out6,$out6,$out6,$inpperm
1076 stvx_u $out5,$x40,$out
1077 le?vperm $out7,$out7,$out7,$inpperm
1078 stvx_u $out6,$x50,$out
1079 stvx_u $out7,$x60,$out
1080 addi $out,$out,0x70
1081 b Lcbc_dec8x_done
1082
1083.align 5
1084Lcbc_dec8x_six:
1085 vncipherlast $out2,$out2,$ivec
1086 vncipherlast $out3,$out3,$in2
1087 vncipherlast $out4,$out4,$in3
1088 vncipherlast $out5,$out5,$in4
1089 vncipherlast $out6,$out6,$in5
1090 vncipherlast $out7,$out7,$in6
1091 vmr $ivec,$in7
1092
1093 le?vperm $out2,$out2,$out2,$inpperm
1094 le?vperm $out3,$out3,$out3,$inpperm
1095 stvx_u $out2,$x00,$out
1096 le?vperm $out4,$out4,$out4,$inpperm
1097 stvx_u $out3,$x10,$out
1098 le?vperm $out5,$out5,$out5,$inpperm
1099 stvx_u $out4,$x20,$out
1100 le?vperm $out6,$out6,$out6,$inpperm
1101 stvx_u $out5,$x30,$out
1102 le?vperm $out7,$out7,$out7,$inpperm
1103 stvx_u $out6,$x40,$out
1104 stvx_u $out7,$x50,$out
1105 addi $out,$out,0x60
1106 b Lcbc_dec8x_done
1107
1108.align 5
1109Lcbc_dec8x_five:
1110 vncipherlast $out3,$out3,$ivec
1111 vncipherlast $out4,$out4,$in3
1112 vncipherlast $out5,$out5,$in4
1113 vncipherlast $out6,$out6,$in5
1114 vncipherlast $out7,$out7,$in6
1115 vmr $ivec,$in7
1116
1117 le?vperm $out3,$out3,$out3,$inpperm
1118 le?vperm $out4,$out4,$out4,$inpperm
1119 stvx_u $out3,$x00,$out
1120 le?vperm $out5,$out5,$out5,$inpperm
1121 stvx_u $out4,$x10,$out
1122 le?vperm $out6,$out6,$out6,$inpperm
1123 stvx_u $out5,$x20,$out
1124 le?vperm $out7,$out7,$out7,$inpperm
1125 stvx_u $out6,$x30,$out
1126 stvx_u $out7,$x40,$out
1127 addi $out,$out,0x50
1128 b Lcbc_dec8x_done
1129
1130.align 5
1131Lcbc_dec8x_four:
1132 vncipherlast $out4,$out4,$ivec
1133 vncipherlast $out5,$out5,$in4
1134 vncipherlast $out6,$out6,$in5
1135 vncipherlast $out7,$out7,$in6
1136 vmr $ivec,$in7
1137
1138 le?vperm $out4,$out4,$out4,$inpperm
1139 le?vperm $out5,$out5,$out5,$inpperm
1140 stvx_u $out4,$x00,$out
1141 le?vperm $out6,$out6,$out6,$inpperm
1142 stvx_u $out5,$x10,$out
1143 le?vperm $out7,$out7,$out7,$inpperm
1144 stvx_u $out6,$x20,$out
1145 stvx_u $out7,$x30,$out
1146 addi $out,$out,0x40
1147 b Lcbc_dec8x_done
1148
1149.align 5
1150Lcbc_dec8x_three:
1151 vncipherlast $out5,$out5,$ivec
1152 vncipherlast $out6,$out6,$in5
1153 vncipherlast $out7,$out7,$in6
1154 vmr $ivec,$in7
1155
1156 le?vperm $out5,$out5,$out5,$inpperm
1157 le?vperm $out6,$out6,$out6,$inpperm
1158 stvx_u $out5,$x00,$out
1159 le?vperm $out7,$out7,$out7,$inpperm
1160 stvx_u $out6,$x10,$out
1161 stvx_u $out7,$x20,$out
1162 addi $out,$out,0x30
1163 b Lcbc_dec8x_done
1164
1165.align 5
1166Lcbc_dec8x_two:
1167 vncipherlast $out6,$out6,$ivec
1168 vncipherlast $out7,$out7,$in6
1169 vmr $ivec,$in7
1170
1171 le?vperm $out6,$out6,$out6,$inpperm
1172 le?vperm $out7,$out7,$out7,$inpperm
1173 stvx_u $out6,$x00,$out
1174 stvx_u $out7,$x10,$out
1175 addi $out,$out,0x20
1176 b Lcbc_dec8x_done
1177
1178.align 5
1179Lcbc_dec8x_one:
1180 vncipherlast $out7,$out7,$ivec
1181 vmr $ivec,$in7
1182
1183 le?vperm $out7,$out7,$out7,$inpperm
1184 stvx_u $out7,0,$out
1185 addi $out,$out,0x10
1186
1187Lcbc_dec8x_done:
1188 le?vperm $ivec,$ivec,$ivec,$inpperm
1189 stvx_u $ivec,0,$ivp # write [unaligned] iv
1190
1191 li r10,`$FRAME+15`
1192 li r11,`$FRAME+31`
1193 stvx $inpperm,r10,$sp # wipe copies of round keys
1194 addi r10,r10,32
1195 stvx $inpperm,r11,$sp
1196 addi r11,r11,32
1197 stvx $inpperm,r10,$sp
1198 addi r10,r10,32
1199 stvx $inpperm,r11,$sp
1200 addi r11,r11,32
1201 stvx $inpperm,r10,$sp
1202 addi r10,r10,32
1203 stvx $inpperm,r11,$sp
1204 addi r11,r11,32
1205 stvx $inpperm,r10,$sp
1206 addi r10,r10,32
1207 stvx $inpperm,r11,$sp
1208 addi r11,r11,32
1209
1210 mtspr 256,$vrsave
1211 lvx v20,r10,$sp # ABI says so
1212 addi r10,r10,32
1213 lvx v21,r11,$sp
1214 addi r11,r11,32
1215 lvx v22,r10,$sp
1216 addi r10,r10,32
1217 lvx v23,r11,$sp
1218 addi r11,r11,32
1219 lvx v24,r10,$sp
1220 addi r10,r10,32
1221 lvx v25,r11,$sp
1222 addi r11,r11,32
1223 lvx v26,r10,$sp
1224 addi r10,r10,32
1225 lvx v27,r11,$sp
1226 addi r11,r11,32
1227 lvx v28,r10,$sp
1228 addi r10,r10,32
1229 lvx v29,r11,$sp
1230 addi r11,r11,32
1231 lvx v30,r10,$sp
1232 lvx v31,r11,$sp
1233 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1234 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1235 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1236 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1237 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1238 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1239 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1240 blr
1241 .long 0
1242 .byte 0,12,0x14,0,0x80,6,6,0
1243 .long 0
1244.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1245___
1246}} }}}
1247
1248#########################################################################
1249{{{ # CTR procedure[s] #
1250my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1251my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1252my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1253 map("v$_",(4..11));
1254my $dat=$tmp;
1255
1256$code.=<<___;
1257.globl .${prefix}_ctr32_encrypt_blocks
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001258 ${UCMP}i $len,1
1259 bltlr-
1260
1261 lis r0,0xfff0
1262 mfspr $vrsave,256
1263 mtspr 256,r0
1264
1265 li $idx,15
1266 vxor $rndkey0,$rndkey0,$rndkey0
1267 le?vspltisb $tmp,0x0f
1268
1269 lvx $ivec,0,$ivp # load [unaligned] iv
1270 lvsl $inpperm,0,$ivp
1271 lvx $inptail,$idx,$ivp
1272 vspltisb $one,1
1273 le?vxor $inpperm,$inpperm,$tmp
1274 vperm $ivec,$ivec,$inptail,$inpperm
1275 vsldoi $one,$rndkey0,$one,1
1276
1277 neg r11,$inp
1278 ?lvsl $keyperm,0,$key # prepare for unaligned key
1279 lwz $rounds,240($key)
1280
1281 lvsr $inpperm,0,r11 # prepare for unaligned load
1282 lvx $inptail,0,$inp
1283 addi $inp,$inp,15 # 15 is not typo
1284 le?vxor $inpperm,$inpperm,$tmp
1285
1286 srwi $rounds,$rounds,1
1287 li $idx,16
1288 subi $rounds,$rounds,1
1289
1290 ${UCMP}i $len,8
1291 bge _aesp8_ctr32_encrypt8x
1292
1293 ?lvsr $outperm,0,$out # prepare for unaligned store
1294 vspltisb $outmask,-1
1295 lvx $outhead,0,$out
1296 ?vperm $outmask,$rndkey0,$outmask,$outperm
1297 le?vxor $outperm,$outperm,$tmp
1298
1299 lvx $rndkey0,0,$key
1300 mtctr $rounds
1301 lvx $rndkey1,$idx,$key
1302 addi $idx,$idx,16
1303 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1304 vxor $inout,$ivec,$rndkey0
1305 lvx $rndkey0,$idx,$key
1306 addi $idx,$idx,16
1307 b Loop_ctr32_enc
1308
1309.align 5
1310Loop_ctr32_enc:
1311 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1312 vcipher $inout,$inout,$rndkey1
1313 lvx $rndkey1,$idx,$key
1314 addi $idx,$idx,16
1315 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1316 vcipher $inout,$inout,$rndkey0
1317 lvx $rndkey0,$idx,$key
1318 addi $idx,$idx,16
1319 bdnz Loop_ctr32_enc
1320
1321 vadduwm $ivec,$ivec,$one
1322 vmr $dat,$inptail
1323 lvx $inptail,0,$inp
1324 addi $inp,$inp,16
1325 subic. $len,$len,1 # blocks--
1326
1327 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1328 vcipher $inout,$inout,$rndkey1
1329 lvx $rndkey1,$idx,$key
1330 vperm $dat,$dat,$inptail,$inpperm
1331 li $idx,16
1332 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1333 lvx $rndkey0,0,$key
1334 vxor $dat,$dat,$rndkey1 # last round key
1335 vcipherlast $inout,$inout,$dat
1336
1337 lvx $rndkey1,$idx,$key
1338 addi $idx,$idx,16
1339 vperm $inout,$inout,$inout,$outperm
1340 vsel $dat,$outhead,$inout,$outmask
1341 mtctr $rounds
1342 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1343 vmr $outhead,$inout
1344 vxor $inout,$ivec,$rndkey0
1345 lvx $rndkey0,$idx,$key
1346 addi $idx,$idx,16
1347 stvx $dat,0,$out
1348 addi $out,$out,16
1349 bne Loop_ctr32_enc
1350
1351 addi $out,$out,-1
1352 lvx $inout,0,$out # redundant in aligned case
1353 vsel $inout,$outhead,$inout,$outmask
1354 stvx $inout,0,$out
1355
1356 mtspr 256,$vrsave
1357 blr
1358 .long 0
1359 .byte 0,12,0x14,0,0,0,6,0
1360 .long 0
1361___
1362#########################################################################
1363{{ # Optimized CTR procedure #
1364my $key_="r11";
1365my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1366my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1367my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1368my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1369 # v26-v31 last 6 round keys
1370my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1371my ($two,$three,$four)=($outhead,$outperm,$outmask);
1372
1373$code.=<<___;
1374.align 5
1375_aesp8_ctr32_encrypt8x:
1376 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1377 li r10,`$FRAME+8*16+15`
1378 li r11,`$FRAME+8*16+31`
1379 stvx v20,r10,$sp # ABI says so
1380 addi r10,r10,32
1381 stvx v21,r11,$sp
1382 addi r11,r11,32
1383 stvx v22,r10,$sp
1384 addi r10,r10,32
1385 stvx v23,r11,$sp
1386 addi r11,r11,32
1387 stvx v24,r10,$sp
1388 addi r10,r10,32
1389 stvx v25,r11,$sp
1390 addi r11,r11,32
1391 stvx v26,r10,$sp
1392 addi r10,r10,32
1393 stvx v27,r11,$sp
1394 addi r11,r11,32
1395 stvx v28,r10,$sp
1396 addi r10,r10,32
1397 stvx v29,r11,$sp
1398 addi r11,r11,32
1399 stvx v30,r10,$sp
1400 stvx v31,r11,$sp
1401 li r0,-1
1402 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1403 li $x10,0x10
1404 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1405 li $x20,0x20
1406 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1407 li $x30,0x30
1408 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1409 li $x40,0x40
1410 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1411 li $x50,0x50
1412 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1413 li $x60,0x60
1414 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1415 li $x70,0x70
1416 mtspr 256,r0
1417
1418 subi $rounds,$rounds,3 # -4 in total
1419
1420 lvx $rndkey0,$x00,$key # load key schedule
1421 lvx v30,$x10,$key
1422 addi $key,$key,0x20
1423 lvx v31,$x00,$key
1424 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1425 addi $key_,$sp,$FRAME+15
1426 mtctr $rounds
1427
1428Load_ctr32_enc_key:
1429 ?vperm v24,v30,v31,$keyperm
1430 lvx v30,$x10,$key
1431 addi $key,$key,0x20
1432 stvx v24,$x00,$key_ # off-load round[1]
1433 ?vperm v25,v31,v30,$keyperm
1434 lvx v31,$x00,$key
1435 stvx v25,$x10,$key_ # off-load round[2]
1436 addi $key_,$key_,0x20
1437 bdnz Load_ctr32_enc_key
1438
1439 lvx v26,$x10,$key
1440 ?vperm v24,v30,v31,$keyperm
1441 lvx v27,$x20,$key
1442 stvx v24,$x00,$key_ # off-load round[3]
1443 ?vperm v25,v31,v26,$keyperm
1444 lvx v28,$x30,$key
1445 stvx v25,$x10,$key_ # off-load round[4]
1446 addi $key_,$sp,$FRAME+15 # rewind $key_
1447 ?vperm v26,v26,v27,$keyperm
1448 lvx v29,$x40,$key
1449 ?vperm v27,v27,v28,$keyperm
1450 lvx v30,$x50,$key
1451 ?vperm v28,v28,v29,$keyperm
1452 lvx v31,$x60,$key
1453 ?vperm v29,v29,v30,$keyperm
1454 lvx $out0,$x70,$key # borrow $out0
1455 ?vperm v30,v30,v31,$keyperm
1456 lvx v24,$x00,$key_ # pre-load round[1]
1457 ?vperm v31,v31,$out0,$keyperm
1458 lvx v25,$x10,$key_ # pre-load round[2]
1459
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001460 vadduqm $two,$one,$one
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001461 subi $inp,$inp,15 # undo "caller"
1462 $SHL $len,$len,4
1463
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001464 vadduqm $out1,$ivec,$one # counter values ...
1465 vadduqm $out2,$ivec,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001466 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1467 le?li $idx,8
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001468 vadduqm $out3,$out1,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001469 vxor $out1,$out1,$rndkey0
1470 le?lvsl $inpperm,0,$idx
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001471 vadduqm $out4,$out2,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001472 vxor $out2,$out2,$rndkey0
1473 le?vspltisb $tmp,0x0f
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001474 vadduqm $out5,$out3,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001475 vxor $out3,$out3,$rndkey0
1476 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001477 vadduqm $out6,$out4,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001478 vxor $out4,$out4,$rndkey0
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001479 vadduqm $out7,$out5,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001480 vxor $out5,$out5,$rndkey0
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001481 vadduqm $ivec,$out6,$two # next counter value
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001482 vxor $out6,$out6,$rndkey0
1483 vxor $out7,$out7,$rndkey0
1484
1485 mtctr $rounds
1486 b Loop_ctr32_enc8x
1487.align 5
1488Loop_ctr32_enc8x:
1489 vcipher $out0,$out0,v24
1490 vcipher $out1,$out1,v24
1491 vcipher $out2,$out2,v24
1492 vcipher $out3,$out3,v24
1493 vcipher $out4,$out4,v24
1494 vcipher $out5,$out5,v24
1495 vcipher $out6,$out6,v24
1496 vcipher $out7,$out7,v24
1497Loop_ctr32_enc8x_middle:
1498 lvx v24,$x20,$key_ # round[3]
1499 addi $key_,$key_,0x20
1500
1501 vcipher $out0,$out0,v25
1502 vcipher $out1,$out1,v25
1503 vcipher $out2,$out2,v25
1504 vcipher $out3,$out3,v25
1505 vcipher $out4,$out4,v25
1506 vcipher $out5,$out5,v25
1507 vcipher $out6,$out6,v25
1508 vcipher $out7,$out7,v25
1509 lvx v25,$x10,$key_ # round[4]
1510 bdnz Loop_ctr32_enc8x
1511
1512 subic r11,$len,256 # $len-256, borrow $key_
1513 vcipher $out0,$out0,v24
1514 vcipher $out1,$out1,v24
1515 vcipher $out2,$out2,v24
1516 vcipher $out3,$out3,v24
1517 vcipher $out4,$out4,v24
1518 vcipher $out5,$out5,v24
1519 vcipher $out6,$out6,v24
1520 vcipher $out7,$out7,v24
1521
1522 subfe r0,r0,r0 # borrow?-1:0
1523 vcipher $out0,$out0,v25
1524 vcipher $out1,$out1,v25
1525 vcipher $out2,$out2,v25
1526 vcipher $out3,$out3,v25
1527 vcipher $out4,$out4,v25
1528 vcipher $out5,$out5,v25
1529 vcipher $out6,$out6,v25
1530 vcipher $out7,$out7,v25
1531
1532 and r0,r0,r11
1533 addi $key_,$sp,$FRAME+15 # rewind $key_
1534 vcipher $out0,$out0,v26
1535 vcipher $out1,$out1,v26
1536 vcipher $out2,$out2,v26
1537 vcipher $out3,$out3,v26
1538 vcipher $out4,$out4,v26
1539 vcipher $out5,$out5,v26
1540 vcipher $out6,$out6,v26
1541 vcipher $out7,$out7,v26
1542 lvx v24,$x00,$key_ # re-pre-load round[1]
1543
1544 subic $len,$len,129 # $len-=129
1545 vcipher $out0,$out0,v27
1546 addi $len,$len,1 # $len-=128 really
1547 vcipher $out1,$out1,v27
1548 vcipher $out2,$out2,v27
1549 vcipher $out3,$out3,v27
1550 vcipher $out4,$out4,v27
1551 vcipher $out5,$out5,v27
1552 vcipher $out6,$out6,v27
1553 vcipher $out7,$out7,v27
1554 lvx v25,$x10,$key_ # re-pre-load round[2]
1555
1556 vcipher $out0,$out0,v28
1557 lvx_u $in0,$x00,$inp # load input
1558 vcipher $out1,$out1,v28
1559 lvx_u $in1,$x10,$inp
1560 vcipher $out2,$out2,v28
1561 lvx_u $in2,$x20,$inp
1562 vcipher $out3,$out3,v28
1563 lvx_u $in3,$x30,$inp
1564 vcipher $out4,$out4,v28
1565 lvx_u $in4,$x40,$inp
1566 vcipher $out5,$out5,v28
1567 lvx_u $in5,$x50,$inp
1568 vcipher $out6,$out6,v28
1569 lvx_u $in6,$x60,$inp
1570 vcipher $out7,$out7,v28
1571 lvx_u $in7,$x70,$inp
1572 addi $inp,$inp,0x80
1573
1574 vcipher $out0,$out0,v29
1575 le?vperm $in0,$in0,$in0,$inpperm
1576 vcipher $out1,$out1,v29
1577 le?vperm $in1,$in1,$in1,$inpperm
1578 vcipher $out2,$out2,v29
1579 le?vperm $in2,$in2,$in2,$inpperm
1580 vcipher $out3,$out3,v29
1581 le?vperm $in3,$in3,$in3,$inpperm
1582 vcipher $out4,$out4,v29
1583 le?vperm $in4,$in4,$in4,$inpperm
1584 vcipher $out5,$out5,v29
1585 le?vperm $in5,$in5,$in5,$inpperm
1586 vcipher $out6,$out6,v29
1587 le?vperm $in6,$in6,$in6,$inpperm
1588 vcipher $out7,$out7,v29
1589 le?vperm $in7,$in7,$in7,$inpperm
1590
1591 add $inp,$inp,r0 # $inp is adjusted in such
1592 # way that at exit from the
1593 # loop inX-in7 are loaded
1594 # with last "words"
1595 subfe. r0,r0,r0 # borrow?-1:0
1596 vcipher $out0,$out0,v30
1597 vxor $in0,$in0,v31 # xor with last round key
1598 vcipher $out1,$out1,v30
1599 vxor $in1,$in1,v31
1600 vcipher $out2,$out2,v30
1601 vxor $in2,$in2,v31
1602 vcipher $out3,$out3,v30
1603 vxor $in3,$in3,v31
1604 vcipher $out4,$out4,v30
1605 vxor $in4,$in4,v31
1606 vcipher $out5,$out5,v30
1607 vxor $in5,$in5,v31
1608 vcipher $out6,$out6,v30
1609 vxor $in6,$in6,v31
1610 vcipher $out7,$out7,v30
1611 vxor $in7,$in7,v31
1612
1613 bne Lctr32_enc8x_break # did $len-129 borrow?
1614
1615 vcipherlast $in0,$out0,$in0
1616 vcipherlast $in1,$out1,$in1
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001617 vadduqm $out1,$ivec,$one # counter values ...
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001618 vcipherlast $in2,$out2,$in2
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001619 vadduqm $out2,$ivec,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001620 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1621 vcipherlast $in3,$out3,$in3
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001622 vadduqm $out3,$out1,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001623 vxor $out1,$out1,$rndkey0
1624 vcipherlast $in4,$out4,$in4
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001625 vadduqm $out4,$out2,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001626 vxor $out2,$out2,$rndkey0
1627 vcipherlast $in5,$out5,$in5
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001628 vadduqm $out5,$out3,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001629 vxor $out3,$out3,$rndkey0
1630 vcipherlast $in6,$out6,$in6
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001631 vadduqm $out6,$out4,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001632 vxor $out4,$out4,$rndkey0
1633 vcipherlast $in7,$out7,$in7
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001634 vadduqm $out7,$out5,$two
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001635 vxor $out5,$out5,$rndkey0
1636 le?vperm $in0,$in0,$in0,$inpperm
Leonidas Da Silva Barbosa1d4aa0b2015-08-14 10:12:22 -03001637 vadduqm $ivec,$out6,$two # next counter value
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02001638 vxor $out6,$out6,$rndkey0
1639 le?vperm $in1,$in1,$in1,$inpperm
1640 vxor $out7,$out7,$rndkey0
1641 mtctr $rounds
1642
1643 vcipher $out0,$out0,v24
1644 stvx_u $in0,$x00,$out
1645 le?vperm $in2,$in2,$in2,$inpperm
1646 vcipher $out1,$out1,v24
1647 stvx_u $in1,$x10,$out
1648 le?vperm $in3,$in3,$in3,$inpperm
1649 vcipher $out2,$out2,v24
1650 stvx_u $in2,$x20,$out
1651 le?vperm $in4,$in4,$in4,$inpperm
1652 vcipher $out3,$out3,v24
1653 stvx_u $in3,$x30,$out
1654 le?vperm $in5,$in5,$in5,$inpperm
1655 vcipher $out4,$out4,v24
1656 stvx_u $in4,$x40,$out
1657 le?vperm $in6,$in6,$in6,$inpperm
1658 vcipher $out5,$out5,v24
1659 stvx_u $in5,$x50,$out
1660 le?vperm $in7,$in7,$in7,$inpperm
1661 vcipher $out6,$out6,v24
1662 stvx_u $in6,$x60,$out
1663 vcipher $out7,$out7,v24
1664 stvx_u $in7,$x70,$out
1665 addi $out,$out,0x80
1666
1667 b Loop_ctr32_enc8x_middle
1668
1669.align 5
1670Lctr32_enc8x_break:
1671 cmpwi $len,-0x60
1672 blt Lctr32_enc8x_one
1673 nop
1674 beq Lctr32_enc8x_two
1675 cmpwi $len,-0x40
1676 blt Lctr32_enc8x_three
1677 nop
1678 beq Lctr32_enc8x_four
1679 cmpwi $len,-0x20
1680 blt Lctr32_enc8x_five
1681 nop
1682 beq Lctr32_enc8x_six
1683 cmpwi $len,0x00
1684 blt Lctr32_enc8x_seven
1685
1686Lctr32_enc8x_eight:
1687 vcipherlast $out0,$out0,$in0
1688 vcipherlast $out1,$out1,$in1
1689 vcipherlast $out2,$out2,$in2
1690 vcipherlast $out3,$out3,$in3
1691 vcipherlast $out4,$out4,$in4
1692 vcipherlast $out5,$out5,$in5
1693 vcipherlast $out6,$out6,$in6
1694 vcipherlast $out7,$out7,$in7
1695
1696 le?vperm $out0,$out0,$out0,$inpperm
1697 le?vperm $out1,$out1,$out1,$inpperm
1698 stvx_u $out0,$x00,$out
1699 le?vperm $out2,$out2,$out2,$inpperm
1700 stvx_u $out1,$x10,$out
1701 le?vperm $out3,$out3,$out3,$inpperm
1702 stvx_u $out2,$x20,$out
1703 le?vperm $out4,$out4,$out4,$inpperm
1704 stvx_u $out3,$x30,$out
1705 le?vperm $out5,$out5,$out5,$inpperm
1706 stvx_u $out4,$x40,$out
1707 le?vperm $out6,$out6,$out6,$inpperm
1708 stvx_u $out5,$x50,$out
1709 le?vperm $out7,$out7,$out7,$inpperm
1710 stvx_u $out6,$x60,$out
1711 stvx_u $out7,$x70,$out
1712 addi $out,$out,0x80
1713 b Lctr32_enc8x_done
1714
1715.align 5
1716Lctr32_enc8x_seven:
1717 vcipherlast $out0,$out0,$in1
1718 vcipherlast $out1,$out1,$in2
1719 vcipherlast $out2,$out2,$in3
1720 vcipherlast $out3,$out3,$in4
1721 vcipherlast $out4,$out4,$in5
1722 vcipherlast $out5,$out5,$in6
1723 vcipherlast $out6,$out6,$in7
1724
1725 le?vperm $out0,$out0,$out0,$inpperm
1726 le?vperm $out1,$out1,$out1,$inpperm
1727 stvx_u $out0,$x00,$out
1728 le?vperm $out2,$out2,$out2,$inpperm
1729 stvx_u $out1,$x10,$out
1730 le?vperm $out3,$out3,$out3,$inpperm
1731 stvx_u $out2,$x20,$out
1732 le?vperm $out4,$out4,$out4,$inpperm
1733 stvx_u $out3,$x30,$out
1734 le?vperm $out5,$out5,$out5,$inpperm
1735 stvx_u $out4,$x40,$out
1736 le?vperm $out6,$out6,$out6,$inpperm
1737 stvx_u $out5,$x50,$out
1738 stvx_u $out6,$x60,$out
1739 addi $out,$out,0x70
1740 b Lctr32_enc8x_done
1741
1742.align 5
1743Lctr32_enc8x_six:
1744 vcipherlast $out0,$out0,$in2
1745 vcipherlast $out1,$out1,$in3
1746 vcipherlast $out2,$out2,$in4
1747 vcipherlast $out3,$out3,$in5
1748 vcipherlast $out4,$out4,$in6
1749 vcipherlast $out5,$out5,$in7
1750
1751 le?vperm $out0,$out0,$out0,$inpperm
1752 le?vperm $out1,$out1,$out1,$inpperm
1753 stvx_u $out0,$x00,$out
1754 le?vperm $out2,$out2,$out2,$inpperm
1755 stvx_u $out1,$x10,$out
1756 le?vperm $out3,$out3,$out3,$inpperm
1757 stvx_u $out2,$x20,$out
1758 le?vperm $out4,$out4,$out4,$inpperm
1759 stvx_u $out3,$x30,$out
1760 le?vperm $out5,$out5,$out5,$inpperm
1761 stvx_u $out4,$x40,$out
1762 stvx_u $out5,$x50,$out
1763 addi $out,$out,0x60
1764 b Lctr32_enc8x_done
1765
1766.align 5
1767Lctr32_enc8x_five:
1768 vcipherlast $out0,$out0,$in3
1769 vcipherlast $out1,$out1,$in4
1770 vcipherlast $out2,$out2,$in5
1771 vcipherlast $out3,$out3,$in6
1772 vcipherlast $out4,$out4,$in7
1773
1774 le?vperm $out0,$out0,$out0,$inpperm
1775 le?vperm $out1,$out1,$out1,$inpperm
1776 stvx_u $out0,$x00,$out
1777 le?vperm $out2,$out2,$out2,$inpperm
1778 stvx_u $out1,$x10,$out
1779 le?vperm $out3,$out3,$out3,$inpperm
1780 stvx_u $out2,$x20,$out
1781 le?vperm $out4,$out4,$out4,$inpperm
1782 stvx_u $out3,$x30,$out
1783 stvx_u $out4,$x40,$out
1784 addi $out,$out,0x50
1785 b Lctr32_enc8x_done
1786
1787.align 5
1788Lctr32_enc8x_four:
1789 vcipherlast $out0,$out0,$in4
1790 vcipherlast $out1,$out1,$in5
1791 vcipherlast $out2,$out2,$in6
1792 vcipherlast $out3,$out3,$in7
1793
1794 le?vperm $out0,$out0,$out0,$inpperm
1795 le?vperm $out1,$out1,$out1,$inpperm
1796 stvx_u $out0,$x00,$out
1797 le?vperm $out2,$out2,$out2,$inpperm
1798 stvx_u $out1,$x10,$out
1799 le?vperm $out3,$out3,$out3,$inpperm
1800 stvx_u $out2,$x20,$out
1801 stvx_u $out3,$x30,$out
1802 addi $out,$out,0x40
1803 b Lctr32_enc8x_done
1804
1805.align 5
1806Lctr32_enc8x_three:
1807 vcipherlast $out0,$out0,$in5
1808 vcipherlast $out1,$out1,$in6
1809 vcipherlast $out2,$out2,$in7
1810
1811 le?vperm $out0,$out0,$out0,$inpperm
1812 le?vperm $out1,$out1,$out1,$inpperm
1813 stvx_u $out0,$x00,$out
1814 le?vperm $out2,$out2,$out2,$inpperm
1815 stvx_u $out1,$x10,$out
1816 stvx_u $out2,$x20,$out
1817 addi $out,$out,0x30
1818 b Lcbc_dec8x_done
1819
1820.align 5
1821Lctr32_enc8x_two:
1822 vcipherlast $out0,$out0,$in6
1823 vcipherlast $out1,$out1,$in7
1824
1825 le?vperm $out0,$out0,$out0,$inpperm
1826 le?vperm $out1,$out1,$out1,$inpperm
1827 stvx_u $out0,$x00,$out
1828 stvx_u $out1,$x10,$out
1829 addi $out,$out,0x20
1830 b Lcbc_dec8x_done
1831
1832.align 5
1833Lctr32_enc8x_one:
1834 vcipherlast $out0,$out0,$in7
1835
1836 le?vperm $out0,$out0,$out0,$inpperm
1837 stvx_u $out0,0,$out
1838 addi $out,$out,0x10
1839
1840Lctr32_enc8x_done:
1841 li r10,`$FRAME+15`
1842 li r11,`$FRAME+31`
1843 stvx $inpperm,r10,$sp # wipe copies of round keys
1844 addi r10,r10,32
1845 stvx $inpperm,r11,$sp
1846 addi r11,r11,32
1847 stvx $inpperm,r10,$sp
1848 addi r10,r10,32
1849 stvx $inpperm,r11,$sp
1850 addi r11,r11,32
1851 stvx $inpperm,r10,$sp
1852 addi r10,r10,32
1853 stvx $inpperm,r11,$sp
1854 addi r11,r11,32
1855 stvx $inpperm,r10,$sp
1856 addi r10,r10,32
1857 stvx $inpperm,r11,$sp
1858 addi r11,r11,32
1859
1860 mtspr 256,$vrsave
1861 lvx v20,r10,$sp # ABI says so
1862 addi r10,r10,32
1863 lvx v21,r11,$sp
1864 addi r11,r11,32
1865 lvx v22,r10,$sp
1866 addi r10,r10,32
1867 lvx v23,r11,$sp
1868 addi r11,r11,32
1869 lvx v24,r10,$sp
1870 addi r10,r10,32
1871 lvx v25,r11,$sp
1872 addi r11,r11,32
1873 lvx v26,r10,$sp
1874 addi r10,r10,32
1875 lvx v27,r11,$sp
1876 addi r11,r11,32
1877 lvx v28,r10,$sp
1878 addi r10,r10,32
1879 lvx v29,r11,$sp
1880 addi r11,r11,32
1881 lvx v30,r10,$sp
1882 lvx v31,r11,$sp
1883 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1884 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1885 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1886 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1887 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1888 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1889 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1890 blr
1891 .long 0
1892 .byte 0,12,0x14,0,0x80,6,6,0
1893 .long 0
1894.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1895___
1896}} }}}
1897
Paulo Flabiano Smorigo11c6e162016-07-18 12:26:25 -03001898#########################################################################
1899{{{ # XTS procedures #
1900# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1901# const AES_KEY *key1, const AES_KEY *key2, #
1902# [const] unsigned char iv[16]); #
1903# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1904# input tweak value is assumed to be encrypted already, and last tweak #
1905# value, one suitable for consecutive call on same chunk of data, is #
1906# written back to original buffer. In addition, in "tweak chaining" #
1907# mode only complete input blocks are processed. #
1908
1909my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1910my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1911my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1912my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1913my $taillen = $key2;
1914
1915 ($inp,$idx) = ($idx,$inp); # reassign
1916
1917$code.=<<___;
1918.globl .${prefix}_xts_encrypt
1919 mr $inp,r3 # reassign
1920 li r3,-1
1921 ${UCMP}i $len,16
1922 bltlr-
1923
1924 lis r0,0xfff0
1925 mfspr r12,256 # save vrsave
1926 li r11,0
1927 mtspr 256,r0
1928
1929 vspltisb $seven,0x07 # 0x070707..07
1930 le?lvsl $leperm,r11,r11
1931 le?vspltisb $tmp,0x0f
1932 le?vxor $leperm,$leperm,$seven
1933
1934 li $idx,15
1935 lvx $tweak,0,$ivp # load [unaligned] iv
1936 lvsl $inpperm,0,$ivp
1937 lvx $inptail,$idx,$ivp
1938 le?vxor $inpperm,$inpperm,$tmp
1939 vperm $tweak,$tweak,$inptail,$inpperm
1940
1941 neg r11,$inp
1942 lvsr $inpperm,0,r11 # prepare for unaligned load
1943 lvx $inout,0,$inp
1944 addi $inp,$inp,15 # 15 is not typo
1945 le?vxor $inpperm,$inpperm,$tmp
1946
1947 ${UCMP}i $key2,0 # key2==NULL?
1948 beq Lxts_enc_no_key2
1949
1950 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
1951 lwz $rounds,240($key2)
1952 srwi $rounds,$rounds,1
1953 subi $rounds,$rounds,1
1954 li $idx,16
1955
1956 lvx $rndkey0,0,$key2
1957 lvx $rndkey1,$idx,$key2
1958 addi $idx,$idx,16
1959 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1960 vxor $tweak,$tweak,$rndkey0
1961 lvx $rndkey0,$idx,$key2
1962 addi $idx,$idx,16
1963 mtctr $rounds
1964
1965Ltweak_xts_enc:
1966 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1967 vcipher $tweak,$tweak,$rndkey1
1968 lvx $rndkey1,$idx,$key2
1969 addi $idx,$idx,16
1970 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1971 vcipher $tweak,$tweak,$rndkey0
1972 lvx $rndkey0,$idx,$key2
1973 addi $idx,$idx,16
1974 bdnz Ltweak_xts_enc
1975
1976 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1977 vcipher $tweak,$tweak,$rndkey1
1978 lvx $rndkey1,$idx,$key2
1979 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1980 vcipherlast $tweak,$tweak,$rndkey0
1981
1982 li $ivp,0 # don't chain the tweak
1983 b Lxts_enc
1984
1985Lxts_enc_no_key2:
1986 li $idx,-16
1987 and $len,$len,$idx # in "tweak chaining"
1988 # mode only complete
1989 # blocks are processed
1990Lxts_enc:
1991 lvx $inptail,0,$inp
1992 addi $inp,$inp,16
1993
1994 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
1995 lwz $rounds,240($key1)
1996 srwi $rounds,$rounds,1
1997 subi $rounds,$rounds,1
1998 li $idx,16
1999
2000 vslb $eighty7,$seven,$seven # 0x808080..80
2001 vor $eighty7,$eighty7,$seven # 0x878787..87
2002 vspltisb $tmp,1 # 0x010101..01
2003 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2004
2005 ${UCMP}i $len,96
2006 bge _aesp8_xts_encrypt6x
2007
2008 andi. $taillen,$len,15
2009 subic r0,$len,32
2010 subi $taillen,$taillen,16
2011 subfe r0,r0,r0
2012 and r0,r0,$taillen
2013 add $inp,$inp,r0
2014
2015 lvx $rndkey0,0,$key1
2016 lvx $rndkey1,$idx,$key1
2017 addi $idx,$idx,16
2018 vperm $inout,$inout,$inptail,$inpperm
2019 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2020 vxor $inout,$inout,$tweak
2021 vxor $inout,$inout,$rndkey0
2022 lvx $rndkey0,$idx,$key1
2023 addi $idx,$idx,16
2024 mtctr $rounds
2025 b Loop_xts_enc
2026
2027.align 5
2028Loop_xts_enc:
2029 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2030 vcipher $inout,$inout,$rndkey1
2031 lvx $rndkey1,$idx,$key1
2032 addi $idx,$idx,16
2033 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2034 vcipher $inout,$inout,$rndkey0
2035 lvx $rndkey0,$idx,$key1
2036 addi $idx,$idx,16
2037 bdnz Loop_xts_enc
2038
2039 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2040 vcipher $inout,$inout,$rndkey1
2041 lvx $rndkey1,$idx,$key1
2042 li $idx,16
2043 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2044 vxor $rndkey0,$rndkey0,$tweak
2045 vcipherlast $output,$inout,$rndkey0
2046
2047 le?vperm $tmp,$output,$output,$leperm
2048 be?nop
2049 le?stvx_u $tmp,0,$out
2050 be?stvx_u $output,0,$out
2051 addi $out,$out,16
2052
2053 subic. $len,$len,16
2054 beq Lxts_enc_done
2055
2056 vmr $inout,$inptail
2057 lvx $inptail,0,$inp
2058 addi $inp,$inp,16
2059 lvx $rndkey0,0,$key1
2060 lvx $rndkey1,$idx,$key1
2061 addi $idx,$idx,16
2062
2063 subic r0,$len,32
2064 subfe r0,r0,r0
2065 and r0,r0,$taillen
2066 add $inp,$inp,r0
2067
2068 vsrab $tmp,$tweak,$seven # next tweak value
2069 vaddubm $tweak,$tweak,$tweak
2070 vsldoi $tmp,$tmp,$tmp,15
2071 vand $tmp,$tmp,$eighty7
2072 vxor $tweak,$tweak,$tmp
2073
2074 vperm $inout,$inout,$inptail,$inpperm
2075 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2076 vxor $inout,$inout,$tweak
2077 vxor $output,$output,$rndkey0 # just in case $len<16
2078 vxor $inout,$inout,$rndkey0
2079 lvx $rndkey0,$idx,$key1
2080 addi $idx,$idx,16
2081
2082 mtctr $rounds
2083 ${UCMP}i $len,16
2084 bge Loop_xts_enc
2085
2086 vxor $output,$output,$tweak
2087 lvsr $inpperm,0,$len # $inpperm is no longer needed
2088 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2089 vspltisb $tmp,-1
2090 vperm $inptail,$inptail,$tmp,$inpperm
2091 vsel $inout,$inout,$output,$inptail
2092
2093 subi r11,$out,17
2094 subi $out,$out,16
2095 mtctr $len
2096 li $len,16
2097Loop_xts_enc_steal:
2098 lbzu r0,1(r11)
2099 stb r0,16(r11)
2100 bdnz Loop_xts_enc_steal
2101
2102 mtctr $rounds
2103 b Loop_xts_enc # one more time...
2104
2105Lxts_enc_done:
2106 ${UCMP}i $ivp,0
2107 beq Lxts_enc_ret
2108
2109 vsrab $tmp,$tweak,$seven # next tweak value
2110 vaddubm $tweak,$tweak,$tweak
2111 vsldoi $tmp,$tmp,$tmp,15
2112 vand $tmp,$tmp,$eighty7
2113 vxor $tweak,$tweak,$tmp
2114
2115 le?vperm $tweak,$tweak,$tweak,$leperm
2116 stvx_u $tweak,0,$ivp
2117
2118Lxts_enc_ret:
2119 mtspr 256,r12 # restore vrsave
2120 li r3,0
2121 blr
2122 .long 0
2123 .byte 0,12,0x04,0,0x80,6,6,0
2124 .long 0
2125.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2126
2127.globl .${prefix}_xts_decrypt
2128.align 5
2129.${prefix}_xts_decrypt:
2130 mr $inp,r3 # reassign
2131 li r3,-1
2132 ${UCMP}i $len,16
2133 bltlr-
2134
2135 lis r0,0xfff8
2136 mfspr r12,256 # save vrsave
2137 li r11,0
2138 mtspr 256,r0
2139
2140 andi. r0,$len,15
2141 neg r0,r0
2142 andi. r0,r0,16
2143 sub $len,$len,r0
2144
2145 vspltisb $seven,0x07 # 0x070707..07
2146 le?lvsl $leperm,r11,r11
2147 le?vspltisb $tmp,0x0f
2148 le?vxor $leperm,$leperm,$seven
2149
2150 li $idx,15
2151 lvx $tweak,0,$ivp # load [unaligned] iv
2152 lvsl $inpperm,0,$ivp
2153 lvx $inptail,$idx,$ivp
2154 le?vxor $inpperm,$inpperm,$tmp
2155 vperm $tweak,$tweak,$inptail,$inpperm
2156
2157 neg r11,$inp
2158 lvsr $inpperm,0,r11 # prepare for unaligned load
2159 lvx $inout,0,$inp
2160 addi $inp,$inp,15 # 15 is not typo
2161 le?vxor $inpperm,$inpperm,$tmp
2162
2163 ${UCMP}i $key2,0 # key2==NULL?
2164 beq Lxts_dec_no_key2
2165
2166 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2167 lwz $rounds,240($key2)
2168 srwi $rounds,$rounds,1
2169 subi $rounds,$rounds,1
2170 li $idx,16
2171
2172 lvx $rndkey0,0,$key2
2173 lvx $rndkey1,$idx,$key2
2174 addi $idx,$idx,16
2175 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2176 vxor $tweak,$tweak,$rndkey0
2177 lvx $rndkey0,$idx,$key2
2178 addi $idx,$idx,16
2179 mtctr $rounds
2180
2181Ltweak_xts_dec:
2182 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2183 vcipher $tweak,$tweak,$rndkey1
2184 lvx $rndkey1,$idx,$key2
2185 addi $idx,$idx,16
2186 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2187 vcipher $tweak,$tweak,$rndkey0
2188 lvx $rndkey0,$idx,$key2
2189 addi $idx,$idx,16
2190 bdnz Ltweak_xts_dec
2191
2192 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2193 vcipher $tweak,$tweak,$rndkey1
2194 lvx $rndkey1,$idx,$key2
2195 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2196 vcipherlast $tweak,$tweak,$rndkey0
2197
2198 li $ivp,0 # don't chain the tweak
2199 b Lxts_dec
2200
2201Lxts_dec_no_key2:
2202 neg $idx,$len
2203 andi. $idx,$idx,15
2204 add $len,$len,$idx # in "tweak chaining"
2205 # mode only complete
2206 # blocks are processed
2207Lxts_dec:
2208 lvx $inptail,0,$inp
2209 addi $inp,$inp,16
2210
2211 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2212 lwz $rounds,240($key1)
2213 srwi $rounds,$rounds,1
2214 subi $rounds,$rounds,1
2215 li $idx,16
2216
2217 vslb $eighty7,$seven,$seven # 0x808080..80
2218 vor $eighty7,$eighty7,$seven # 0x878787..87
2219 vspltisb $tmp,1 # 0x010101..01
2220 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2221
2222 ${UCMP}i $len,96
2223 bge _aesp8_xts_decrypt6x
2224
2225 lvx $rndkey0,0,$key1
2226 lvx $rndkey1,$idx,$key1
2227 addi $idx,$idx,16
2228 vperm $inout,$inout,$inptail,$inpperm
2229 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2230 vxor $inout,$inout,$tweak
2231 vxor $inout,$inout,$rndkey0
2232 lvx $rndkey0,$idx,$key1
2233 addi $idx,$idx,16
2234 mtctr $rounds
2235
2236 ${UCMP}i $len,16
2237 blt Ltail_xts_dec
2238 be?b Loop_xts_dec
2239
2240.align 5
2241Loop_xts_dec:
2242 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2243 vncipher $inout,$inout,$rndkey1
2244 lvx $rndkey1,$idx,$key1
2245 addi $idx,$idx,16
2246 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2247 vncipher $inout,$inout,$rndkey0
2248 lvx $rndkey0,$idx,$key1
2249 addi $idx,$idx,16
2250 bdnz Loop_xts_dec
2251
2252 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2253 vncipher $inout,$inout,$rndkey1
2254 lvx $rndkey1,$idx,$key1
2255 li $idx,16
2256 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2257 vxor $rndkey0,$rndkey0,$tweak
2258 vncipherlast $output,$inout,$rndkey0
2259
2260 le?vperm $tmp,$output,$output,$leperm
2261 be?nop
2262 le?stvx_u $tmp,0,$out
2263 be?stvx_u $output,0,$out
2264 addi $out,$out,16
2265
2266 subic. $len,$len,16
2267 beq Lxts_dec_done
2268
2269 vmr $inout,$inptail
2270 lvx $inptail,0,$inp
2271 addi $inp,$inp,16
2272 lvx $rndkey0,0,$key1
2273 lvx $rndkey1,$idx,$key1
2274 addi $idx,$idx,16
2275
2276 vsrab $tmp,$tweak,$seven # next tweak value
2277 vaddubm $tweak,$tweak,$tweak
2278 vsldoi $tmp,$tmp,$tmp,15
2279 vand $tmp,$tmp,$eighty7
2280 vxor $tweak,$tweak,$tmp
2281
2282 vperm $inout,$inout,$inptail,$inpperm
2283 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2284 vxor $inout,$inout,$tweak
2285 vxor $inout,$inout,$rndkey0
2286 lvx $rndkey0,$idx,$key1
2287 addi $idx,$idx,16
2288
2289 mtctr $rounds
2290 ${UCMP}i $len,16
2291 bge Loop_xts_dec
2292
2293Ltail_xts_dec:
2294 vsrab $tmp,$tweak,$seven # next tweak value
2295 vaddubm $tweak1,$tweak,$tweak
2296 vsldoi $tmp,$tmp,$tmp,15
2297 vand $tmp,$tmp,$eighty7
2298 vxor $tweak1,$tweak1,$tmp
2299
2300 subi $inp,$inp,16
2301 add $inp,$inp,$len
2302
2303 vxor $inout,$inout,$tweak # :-(
2304 vxor $inout,$inout,$tweak1 # :-)
2305
2306Loop_xts_dec_short:
2307 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2308 vncipher $inout,$inout,$rndkey1
2309 lvx $rndkey1,$idx,$key1
2310 addi $idx,$idx,16
2311 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2312 vncipher $inout,$inout,$rndkey0
2313 lvx $rndkey0,$idx,$key1
2314 addi $idx,$idx,16
2315 bdnz Loop_xts_dec_short
2316
2317 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2318 vncipher $inout,$inout,$rndkey1
2319 lvx $rndkey1,$idx,$key1
2320 li $idx,16
2321 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2322 vxor $rndkey0,$rndkey0,$tweak1
2323 vncipherlast $output,$inout,$rndkey0
2324
2325 le?vperm $tmp,$output,$output,$leperm
2326 be?nop
2327 le?stvx_u $tmp,0,$out
2328 be?stvx_u $output,0,$out
2329
2330 vmr $inout,$inptail
2331 lvx $inptail,0,$inp
2332 #addi $inp,$inp,16
2333 lvx $rndkey0,0,$key1
2334 lvx $rndkey1,$idx,$key1
2335 addi $idx,$idx,16
2336 vperm $inout,$inout,$inptail,$inpperm
2337 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2338
2339 lvsr $inpperm,0,$len # $inpperm is no longer needed
2340 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2341 vspltisb $tmp,-1
2342 vperm $inptail,$inptail,$tmp,$inpperm
2343 vsel $inout,$inout,$output,$inptail
2344
2345 vxor $rndkey0,$rndkey0,$tweak
2346 vxor $inout,$inout,$rndkey0
2347 lvx $rndkey0,$idx,$key1
2348 addi $idx,$idx,16
2349
2350 subi r11,$out,1
2351 mtctr $len
2352 li $len,16
2353Loop_xts_dec_steal:
2354 lbzu r0,1(r11)
2355 stb r0,16(r11)
2356 bdnz Loop_xts_dec_steal
2357
2358 mtctr $rounds
2359 b Loop_xts_dec # one more time...
2360
2361Lxts_dec_done:
2362 ${UCMP}i $ivp,0
2363 beq Lxts_dec_ret
2364
2365 vsrab $tmp,$tweak,$seven # next tweak value
2366 vaddubm $tweak,$tweak,$tweak
2367 vsldoi $tmp,$tmp,$tmp,15
2368 vand $tmp,$tmp,$eighty7
2369 vxor $tweak,$tweak,$tmp
2370
2371 le?vperm $tweak,$tweak,$tweak,$leperm
2372 stvx_u $tweak,0,$ivp
2373
2374Lxts_dec_ret:
2375 mtspr 256,r12 # restore vrsave
2376 li r3,0
2377 blr
2378 .long 0
2379 .byte 0,12,0x04,0,0x80,6,6,0
2380 .long 0
2381.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2382___
2383#########################################################################
2384{{ # Optimized XTS procedures #
2385my $key_=$key2;
2386my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2387 $x00=0 if ($flavour =~ /osx/);
2388my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2389my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2390my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2391my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2392 # v26-v31 last 6 round keys
2393my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2394my $taillen=$x70;
2395
2396$code.=<<___;
2397.align 5
2398_aesp8_xts_encrypt6x:
2399 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2400 mflr r11
2401 li r7,`$FRAME+8*16+15`
2402 li r3,`$FRAME+8*16+31`
2403 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2404 stvx v20,r7,$sp # ABI says so
2405 addi r7,r7,32
2406 stvx v21,r3,$sp
2407 addi r3,r3,32
2408 stvx v22,r7,$sp
2409 addi r7,r7,32
2410 stvx v23,r3,$sp
2411 addi r3,r3,32
2412 stvx v24,r7,$sp
2413 addi r7,r7,32
2414 stvx v25,r3,$sp
2415 addi r3,r3,32
2416 stvx v26,r7,$sp
2417 addi r7,r7,32
2418 stvx v27,r3,$sp
2419 addi r3,r3,32
2420 stvx v28,r7,$sp
2421 addi r7,r7,32
2422 stvx v29,r3,$sp
2423 addi r3,r3,32
2424 stvx v30,r7,$sp
2425 stvx v31,r3,$sp
2426 li r0,-1
2427 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2428 li $x10,0x10
2429 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2430 li $x20,0x20
2431 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2432 li $x30,0x30
2433 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2434 li $x40,0x40
2435 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2436 li $x50,0x50
2437 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2438 li $x60,0x60
2439 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2440 li $x70,0x70
2441 mtspr 256,r0
2442
2443 subi $rounds,$rounds,3 # -4 in total
2444
2445 lvx $rndkey0,$x00,$key1 # load key schedule
2446 lvx v30,$x10,$key1
2447 addi $key1,$key1,0x20
2448 lvx v31,$x00,$key1
2449 ?vperm $rndkey0,$rndkey0,v30,$keyperm
2450 addi $key_,$sp,$FRAME+15
2451 mtctr $rounds
2452
2453Load_xts_enc_key:
2454 ?vperm v24,v30,v31,$keyperm
2455 lvx v30,$x10,$key1
2456 addi $key1,$key1,0x20
2457 stvx v24,$x00,$key_ # off-load round[1]
2458 ?vperm v25,v31,v30,$keyperm
2459 lvx v31,$x00,$key1
2460 stvx v25,$x10,$key_ # off-load round[2]
2461 addi $key_,$key_,0x20
2462 bdnz Load_xts_enc_key
2463
2464 lvx v26,$x10,$key1
2465 ?vperm v24,v30,v31,$keyperm
2466 lvx v27,$x20,$key1
2467 stvx v24,$x00,$key_ # off-load round[3]
2468 ?vperm v25,v31,v26,$keyperm
2469 lvx v28,$x30,$key1
2470 stvx v25,$x10,$key_ # off-load round[4]
2471 addi $key_,$sp,$FRAME+15 # rewind $key_
2472 ?vperm v26,v26,v27,$keyperm
2473 lvx v29,$x40,$key1
2474 ?vperm v27,v27,v28,$keyperm
2475 lvx v30,$x50,$key1
2476 ?vperm v28,v28,v29,$keyperm
2477 lvx v31,$x60,$key1
2478 ?vperm v29,v29,v30,$keyperm
2479 lvx $twk5,$x70,$key1 # borrow $twk5
2480 ?vperm v30,v30,v31,$keyperm
2481 lvx v24,$x00,$key_ # pre-load round[1]
2482 ?vperm v31,v31,$twk5,$keyperm
2483 lvx v25,$x10,$key_ # pre-load round[2]
2484
2485 vperm $in0,$inout,$inptail,$inpperm
2486 subi $inp,$inp,31 # undo "caller"
2487 vxor $twk0,$tweak,$rndkey0
2488 vsrab $tmp,$tweak,$seven # next tweak value
2489 vaddubm $tweak,$tweak,$tweak
2490 vsldoi $tmp,$tmp,$tmp,15
2491 vand $tmp,$tmp,$eighty7
2492 vxor $out0,$in0,$twk0
2493 vxor $tweak,$tweak,$tmp
2494
2495 lvx_u $in1,$x10,$inp
2496 vxor $twk1,$tweak,$rndkey0
2497 vsrab $tmp,$tweak,$seven # next tweak value
2498 vaddubm $tweak,$tweak,$tweak
2499 vsldoi $tmp,$tmp,$tmp,15
2500 le?vperm $in1,$in1,$in1,$leperm
2501 vand $tmp,$tmp,$eighty7
2502 vxor $out1,$in1,$twk1
2503 vxor $tweak,$tweak,$tmp
2504
2505 lvx_u $in2,$x20,$inp
2506 andi. $taillen,$len,15
2507 vxor $twk2,$tweak,$rndkey0
2508 vsrab $tmp,$tweak,$seven # next tweak value
2509 vaddubm $tweak,$tweak,$tweak
2510 vsldoi $tmp,$tmp,$tmp,15
2511 le?vperm $in2,$in2,$in2,$leperm
2512 vand $tmp,$tmp,$eighty7
2513 vxor $out2,$in2,$twk2
2514 vxor $tweak,$tweak,$tmp
2515
2516 lvx_u $in3,$x30,$inp
2517 sub $len,$len,$taillen
2518 vxor $twk3,$tweak,$rndkey0
2519 vsrab $tmp,$tweak,$seven # next tweak value
2520 vaddubm $tweak,$tweak,$tweak
2521 vsldoi $tmp,$tmp,$tmp,15
2522 le?vperm $in3,$in3,$in3,$leperm
2523 vand $tmp,$tmp,$eighty7
2524 vxor $out3,$in3,$twk3
2525 vxor $tweak,$tweak,$tmp
2526
2527 lvx_u $in4,$x40,$inp
2528 subi $len,$len,0x60
2529 vxor $twk4,$tweak,$rndkey0
2530 vsrab $tmp,$tweak,$seven # next tweak value
2531 vaddubm $tweak,$tweak,$tweak
2532 vsldoi $tmp,$tmp,$tmp,15
2533 le?vperm $in4,$in4,$in4,$leperm
2534 vand $tmp,$tmp,$eighty7
2535 vxor $out4,$in4,$twk4
2536 vxor $tweak,$tweak,$tmp
2537
2538 lvx_u $in5,$x50,$inp
2539 addi $inp,$inp,0x60
2540 vxor $twk5,$tweak,$rndkey0
2541 vsrab $tmp,$tweak,$seven # next tweak value
2542 vaddubm $tweak,$tweak,$tweak
2543 vsldoi $tmp,$tmp,$tmp,15
2544 le?vperm $in5,$in5,$in5,$leperm
2545 vand $tmp,$tmp,$eighty7
2546 vxor $out5,$in5,$twk5
2547 vxor $tweak,$tweak,$tmp
2548
2549 vxor v31,v31,$rndkey0
2550 mtctr $rounds
2551 b Loop_xts_enc6x
2552
2553.align 5
2554Loop_xts_enc6x:
2555 vcipher $out0,$out0,v24
2556 vcipher $out1,$out1,v24
2557 vcipher $out2,$out2,v24
2558 vcipher $out3,$out3,v24
2559 vcipher $out4,$out4,v24
2560 vcipher $out5,$out5,v24
2561 lvx v24,$x20,$key_ # round[3]
2562 addi $key_,$key_,0x20
2563
2564 vcipher $out0,$out0,v25
2565 vcipher $out1,$out1,v25
2566 vcipher $out2,$out2,v25
2567 vcipher $out3,$out3,v25
2568 vcipher $out4,$out4,v25
2569 vcipher $out5,$out5,v25
2570 lvx v25,$x10,$key_ # round[4]
2571 bdnz Loop_xts_enc6x
2572
2573 subic $len,$len,96 # $len-=96
2574 vxor $in0,$twk0,v31 # xor with last round key
2575 vcipher $out0,$out0,v24
2576 vcipher $out1,$out1,v24
2577 vsrab $tmp,$tweak,$seven # next tweak value
2578 vxor $twk0,$tweak,$rndkey0
2579 vaddubm $tweak,$tweak,$tweak
2580 vcipher $out2,$out2,v24
2581 vcipher $out3,$out3,v24
2582 vsldoi $tmp,$tmp,$tmp,15
2583 vcipher $out4,$out4,v24
2584 vcipher $out5,$out5,v24
2585
2586 subfe. r0,r0,r0 # borrow?-1:0
2587 vand $tmp,$tmp,$eighty7
2588 vcipher $out0,$out0,v25
2589 vcipher $out1,$out1,v25
2590 vxor $tweak,$tweak,$tmp
2591 vcipher $out2,$out2,v25
2592 vcipher $out3,$out3,v25
2593 vxor $in1,$twk1,v31
2594 vsrab $tmp,$tweak,$seven # next tweak value
2595 vxor $twk1,$tweak,$rndkey0
2596 vcipher $out4,$out4,v25
2597 vcipher $out5,$out5,v25
2598
2599 and r0,r0,$len
2600 vaddubm $tweak,$tweak,$tweak
2601 vsldoi $tmp,$tmp,$tmp,15
2602 vcipher $out0,$out0,v26
2603 vcipher $out1,$out1,v26
2604 vand $tmp,$tmp,$eighty7
2605 vcipher $out2,$out2,v26
2606 vcipher $out3,$out3,v26
2607 vxor $tweak,$tweak,$tmp
2608 vcipher $out4,$out4,v26
2609 vcipher $out5,$out5,v26
2610
2611 add $inp,$inp,r0 # $inp is adjusted in such
2612 # way that at exit from the
2613 # loop inX-in5 are loaded
2614 # with last "words"
2615 vxor $in2,$twk2,v31
2616 vsrab $tmp,$tweak,$seven # next tweak value
2617 vxor $twk2,$tweak,$rndkey0
2618 vaddubm $tweak,$tweak,$tweak
2619 vcipher $out0,$out0,v27
2620 vcipher $out1,$out1,v27
2621 vsldoi $tmp,$tmp,$tmp,15
2622 vcipher $out2,$out2,v27
2623 vcipher $out3,$out3,v27
2624 vand $tmp,$tmp,$eighty7
2625 vcipher $out4,$out4,v27
2626 vcipher $out5,$out5,v27
2627
2628 addi $key_,$sp,$FRAME+15 # rewind $key_
2629 vxor $tweak,$tweak,$tmp
2630 vcipher $out0,$out0,v28
2631 vcipher $out1,$out1,v28
2632 vxor $in3,$twk3,v31
2633 vsrab $tmp,$tweak,$seven # next tweak value
2634 vxor $twk3,$tweak,$rndkey0
2635 vcipher $out2,$out2,v28
2636 vcipher $out3,$out3,v28
2637 vaddubm $tweak,$tweak,$tweak
2638 vsldoi $tmp,$tmp,$tmp,15
2639 vcipher $out4,$out4,v28
2640 vcipher $out5,$out5,v28
2641 lvx v24,$x00,$key_ # re-pre-load round[1]
2642 vand $tmp,$tmp,$eighty7
2643
2644 vcipher $out0,$out0,v29
2645 vcipher $out1,$out1,v29
2646 vxor $tweak,$tweak,$tmp
2647 vcipher $out2,$out2,v29
2648 vcipher $out3,$out3,v29
2649 vxor $in4,$twk4,v31
2650 vsrab $tmp,$tweak,$seven # next tweak value
2651 vxor $twk4,$tweak,$rndkey0
2652 vcipher $out4,$out4,v29
2653 vcipher $out5,$out5,v29
2654 lvx v25,$x10,$key_ # re-pre-load round[2]
2655 vaddubm $tweak,$tweak,$tweak
2656 vsldoi $tmp,$tmp,$tmp,15
2657
2658 vcipher $out0,$out0,v30
2659 vcipher $out1,$out1,v30
2660 vand $tmp,$tmp,$eighty7
2661 vcipher $out2,$out2,v30
2662 vcipher $out3,$out3,v30
2663 vxor $tweak,$tweak,$tmp
2664 vcipher $out4,$out4,v30
2665 vcipher $out5,$out5,v30
2666 vxor $in5,$twk5,v31
2667 vsrab $tmp,$tweak,$seven # next tweak value
2668 vxor $twk5,$tweak,$rndkey0
2669
2670 vcipherlast $out0,$out0,$in0
2671 lvx_u $in0,$x00,$inp # load next input block
2672 vaddubm $tweak,$tweak,$tweak
2673 vsldoi $tmp,$tmp,$tmp,15
2674 vcipherlast $out1,$out1,$in1
2675 lvx_u $in1,$x10,$inp
2676 vcipherlast $out2,$out2,$in2
2677 le?vperm $in0,$in0,$in0,$leperm
2678 lvx_u $in2,$x20,$inp
2679 vand $tmp,$tmp,$eighty7
2680 vcipherlast $out3,$out3,$in3
2681 le?vperm $in1,$in1,$in1,$leperm
2682 lvx_u $in3,$x30,$inp
2683 vcipherlast $out4,$out4,$in4
2684 le?vperm $in2,$in2,$in2,$leperm
2685 lvx_u $in4,$x40,$inp
2686 vxor $tweak,$tweak,$tmp
2687 vcipherlast $tmp,$out5,$in5 # last block might be needed
2688 # in stealing mode
2689 le?vperm $in3,$in3,$in3,$leperm
2690 lvx_u $in5,$x50,$inp
2691 addi $inp,$inp,0x60
2692 le?vperm $in4,$in4,$in4,$leperm
2693 le?vperm $in5,$in5,$in5,$leperm
2694
2695 le?vperm $out0,$out0,$out0,$leperm
2696 le?vperm $out1,$out1,$out1,$leperm
2697 stvx_u $out0,$x00,$out # store output
2698 vxor $out0,$in0,$twk0
2699 le?vperm $out2,$out2,$out2,$leperm
2700 stvx_u $out1,$x10,$out
2701 vxor $out1,$in1,$twk1
2702 le?vperm $out3,$out3,$out3,$leperm
2703 stvx_u $out2,$x20,$out
2704 vxor $out2,$in2,$twk2
2705 le?vperm $out4,$out4,$out4,$leperm
2706 stvx_u $out3,$x30,$out
2707 vxor $out3,$in3,$twk3
2708 le?vperm $out5,$tmp,$tmp,$leperm
2709 stvx_u $out4,$x40,$out
2710 vxor $out4,$in4,$twk4
2711 le?stvx_u $out5,$x50,$out
2712 be?stvx_u $tmp, $x50,$out
2713 vxor $out5,$in5,$twk5
2714 addi $out,$out,0x60
2715
2716 mtctr $rounds
2717 beq Loop_xts_enc6x # did $len-=96 borrow?
2718
2719 addic. $len,$len,0x60
2720 beq Lxts_enc6x_zero
2721 cmpwi $len,0x20
2722 blt Lxts_enc6x_one
2723 nop
2724 beq Lxts_enc6x_two
2725 cmpwi $len,0x40
2726 blt Lxts_enc6x_three
2727 nop
2728 beq Lxts_enc6x_four
2729
2730Lxts_enc6x_five:
2731 vxor $out0,$in1,$twk0
2732 vxor $out1,$in2,$twk1
2733 vxor $out2,$in3,$twk2
2734 vxor $out3,$in4,$twk3
2735 vxor $out4,$in5,$twk4
2736
2737 bl _aesp8_xts_enc5x
2738
2739 le?vperm $out0,$out0,$out0,$leperm
2740 vmr $twk0,$twk5 # unused tweak
2741 le?vperm $out1,$out1,$out1,$leperm
2742 stvx_u $out0,$x00,$out # store output
2743 le?vperm $out2,$out2,$out2,$leperm
2744 stvx_u $out1,$x10,$out
2745 le?vperm $out3,$out3,$out3,$leperm
2746 stvx_u $out2,$x20,$out
2747 vxor $tmp,$out4,$twk5 # last block prep for stealing
2748 le?vperm $out4,$out4,$out4,$leperm
2749 stvx_u $out3,$x30,$out
2750 stvx_u $out4,$x40,$out
2751 addi $out,$out,0x50
2752 bne Lxts_enc6x_steal
2753 b Lxts_enc6x_done
2754
2755.align 4
2756Lxts_enc6x_four:
2757 vxor $out0,$in2,$twk0
2758 vxor $out1,$in3,$twk1
2759 vxor $out2,$in4,$twk2
2760 vxor $out3,$in5,$twk3
2761 vxor $out4,$out4,$out4
2762
2763 bl _aesp8_xts_enc5x
2764
2765 le?vperm $out0,$out0,$out0,$leperm
2766 vmr $twk0,$twk4 # unused tweak
2767 le?vperm $out1,$out1,$out1,$leperm
2768 stvx_u $out0,$x00,$out # store output
2769 le?vperm $out2,$out2,$out2,$leperm
2770 stvx_u $out1,$x10,$out
2771 vxor $tmp,$out3,$twk4 # last block prep for stealing
2772 le?vperm $out3,$out3,$out3,$leperm
2773 stvx_u $out2,$x20,$out
2774 stvx_u $out3,$x30,$out
2775 addi $out,$out,0x40
2776 bne Lxts_enc6x_steal
2777 b Lxts_enc6x_done
2778
2779.align 4
2780Lxts_enc6x_three:
2781 vxor $out0,$in3,$twk0
2782 vxor $out1,$in4,$twk1
2783 vxor $out2,$in5,$twk2
2784 vxor $out3,$out3,$out3
2785 vxor $out4,$out4,$out4
2786
2787 bl _aesp8_xts_enc5x
2788
2789 le?vperm $out0,$out0,$out0,$leperm
2790 vmr $twk0,$twk3 # unused tweak
2791 le?vperm $out1,$out1,$out1,$leperm
2792 stvx_u $out0,$x00,$out # store output
2793 vxor $tmp,$out2,$twk3 # last block prep for stealing
2794 le?vperm $out2,$out2,$out2,$leperm
2795 stvx_u $out1,$x10,$out
2796 stvx_u $out2,$x20,$out
2797 addi $out,$out,0x30
2798 bne Lxts_enc6x_steal
2799 b Lxts_enc6x_done
2800
2801.align 4
2802Lxts_enc6x_two:
2803 vxor $out0,$in4,$twk0
2804 vxor $out1,$in5,$twk1
2805 vxor $out2,$out2,$out2
2806 vxor $out3,$out3,$out3
2807 vxor $out4,$out4,$out4
2808
2809 bl _aesp8_xts_enc5x
2810
2811 le?vperm $out0,$out0,$out0,$leperm
2812 vmr $twk0,$twk2 # unused tweak
2813 vxor $tmp,$out1,$twk2 # last block prep for stealing
2814 le?vperm $out1,$out1,$out1,$leperm
2815 stvx_u $out0,$x00,$out # store output
2816 stvx_u $out1,$x10,$out
2817 addi $out,$out,0x20
2818 bne Lxts_enc6x_steal
2819 b Lxts_enc6x_done
2820
2821.align 4
2822Lxts_enc6x_one:
2823 vxor $out0,$in5,$twk0
2824 nop
2825Loop_xts_enc1x:
2826 vcipher $out0,$out0,v24
2827 lvx v24,$x20,$key_ # round[3]
2828 addi $key_,$key_,0x20
2829
2830 vcipher $out0,$out0,v25
2831 lvx v25,$x10,$key_ # round[4]
2832 bdnz Loop_xts_enc1x
2833
2834 add $inp,$inp,$taillen
2835 cmpwi $taillen,0
2836 vcipher $out0,$out0,v24
2837
2838 subi $inp,$inp,16
2839 vcipher $out0,$out0,v25
2840
2841 lvsr $inpperm,0,$taillen
2842 vcipher $out0,$out0,v26
2843
2844 lvx_u $in0,0,$inp
2845 vcipher $out0,$out0,v27
2846
2847 addi $key_,$sp,$FRAME+15 # rewind $key_
2848 vcipher $out0,$out0,v28
2849 lvx v24,$x00,$key_ # re-pre-load round[1]
2850
2851 vcipher $out0,$out0,v29
2852 lvx v25,$x10,$key_ # re-pre-load round[2]
2853 vxor $twk0,$twk0,v31
2854
2855 le?vperm $in0,$in0,$in0,$leperm
2856 vcipher $out0,$out0,v30
2857
2858 vperm $in0,$in0,$in0,$inpperm
2859 vcipherlast $out0,$out0,$twk0
2860
2861 vmr $twk0,$twk1 # unused tweak
2862 vxor $tmp,$out0,$twk1 # last block prep for stealing
2863 le?vperm $out0,$out0,$out0,$leperm
2864 stvx_u $out0,$x00,$out # store output
2865 addi $out,$out,0x10
2866 bne Lxts_enc6x_steal
2867 b Lxts_enc6x_done
2868
2869.align 4
2870Lxts_enc6x_zero:
2871 cmpwi $taillen,0
2872 beq Lxts_enc6x_done
2873
2874 add $inp,$inp,$taillen
2875 subi $inp,$inp,16
2876 lvx_u $in0,0,$inp
2877 lvsr $inpperm,0,$taillen # $in5 is no more
2878 le?vperm $in0,$in0,$in0,$leperm
2879 vperm $in0,$in0,$in0,$inpperm
2880 vxor $tmp,$tmp,$twk0
2881Lxts_enc6x_steal:
2882 vxor $in0,$in0,$twk0
2883 vxor $out0,$out0,$out0
2884 vspltisb $out1,-1
2885 vperm $out0,$out0,$out1,$inpperm
2886 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2887
2888 subi r30,$out,17
2889 subi $out,$out,16
2890 mtctr $taillen
2891Loop_xts_enc6x_steal:
2892 lbzu r0,1(r30)
2893 stb r0,16(r30)
2894 bdnz Loop_xts_enc6x_steal
2895
2896 li $taillen,0
2897 mtctr $rounds
2898 b Loop_xts_enc1x # one more time...
2899
2900.align 4
2901Lxts_enc6x_done:
2902 ${UCMP}i $ivp,0
2903 beq Lxts_enc6x_ret
2904
2905 vxor $tweak,$twk0,$rndkey0
2906 le?vperm $tweak,$tweak,$tweak,$leperm
2907 stvx_u $tweak,0,$ivp
2908
2909Lxts_enc6x_ret:
2910 mtlr r11
2911 li r10,`$FRAME+15`
2912 li r11,`$FRAME+31`
2913 stvx $seven,r10,$sp # wipe copies of round keys
2914 addi r10,r10,32
2915 stvx $seven,r11,$sp
2916 addi r11,r11,32
2917 stvx $seven,r10,$sp
2918 addi r10,r10,32
2919 stvx $seven,r11,$sp
2920 addi r11,r11,32
2921 stvx $seven,r10,$sp
2922 addi r10,r10,32
2923 stvx $seven,r11,$sp
2924 addi r11,r11,32
2925 stvx $seven,r10,$sp
2926 addi r10,r10,32
2927 stvx $seven,r11,$sp
2928 addi r11,r11,32
2929
2930 mtspr 256,$vrsave
2931 lvx v20,r10,$sp # ABI says so
2932 addi r10,r10,32
2933 lvx v21,r11,$sp
2934 addi r11,r11,32
2935 lvx v22,r10,$sp
2936 addi r10,r10,32
2937 lvx v23,r11,$sp
2938 addi r11,r11,32
2939 lvx v24,r10,$sp
2940 addi r10,r10,32
2941 lvx v25,r11,$sp
2942 addi r11,r11,32
2943 lvx v26,r10,$sp
2944 addi r10,r10,32
2945 lvx v27,r11,$sp
2946 addi r11,r11,32
2947 lvx v28,r10,$sp
2948 addi r10,r10,32
2949 lvx v29,r11,$sp
2950 addi r11,r11,32
2951 lvx v30,r10,$sp
2952 lvx v31,r11,$sp
2953 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2954 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2955 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2956 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2957 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2958 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2959 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2960 blr
2961 .long 0
2962 .byte 0,12,0x04,1,0x80,6,6,0
2963 .long 0
2964
2965.align 5
2966_aesp8_xts_enc5x:
2967 vcipher $out0,$out0,v24
2968 vcipher $out1,$out1,v24
2969 vcipher $out2,$out2,v24
2970 vcipher $out3,$out3,v24
2971 vcipher $out4,$out4,v24
2972 lvx v24,$x20,$key_ # round[3]
2973 addi $key_,$key_,0x20
2974
2975 vcipher $out0,$out0,v25
2976 vcipher $out1,$out1,v25
2977 vcipher $out2,$out2,v25
2978 vcipher $out3,$out3,v25
2979 vcipher $out4,$out4,v25
2980 lvx v25,$x10,$key_ # round[4]
2981 bdnz _aesp8_xts_enc5x
2982
2983 add $inp,$inp,$taillen
2984 cmpwi $taillen,0
2985 vcipher $out0,$out0,v24
2986 vcipher $out1,$out1,v24
2987 vcipher $out2,$out2,v24
2988 vcipher $out3,$out3,v24
2989 vcipher $out4,$out4,v24
2990
2991 subi $inp,$inp,16
2992 vcipher $out0,$out0,v25
2993 vcipher $out1,$out1,v25
2994 vcipher $out2,$out2,v25
2995 vcipher $out3,$out3,v25
2996 vcipher $out4,$out4,v25
2997 vxor $twk0,$twk0,v31
2998
2999 vcipher $out0,$out0,v26
3000 lvsr $inpperm,r0,$taillen # $in5 is no more
3001 vcipher $out1,$out1,v26
3002 vcipher $out2,$out2,v26
3003 vcipher $out3,$out3,v26
3004 vcipher $out4,$out4,v26
3005 vxor $in1,$twk1,v31
3006
3007 vcipher $out0,$out0,v27
3008 lvx_u $in0,0,$inp
3009 vcipher $out1,$out1,v27
3010 vcipher $out2,$out2,v27
3011 vcipher $out3,$out3,v27
3012 vcipher $out4,$out4,v27
3013 vxor $in2,$twk2,v31
3014
3015 addi $key_,$sp,$FRAME+15 # rewind $key_
3016 vcipher $out0,$out0,v28
3017 vcipher $out1,$out1,v28
3018 vcipher $out2,$out2,v28
3019 vcipher $out3,$out3,v28
3020 vcipher $out4,$out4,v28
3021 lvx v24,$x00,$key_ # re-pre-load round[1]
3022 vxor $in3,$twk3,v31
3023
3024 vcipher $out0,$out0,v29
3025 le?vperm $in0,$in0,$in0,$leperm
3026 vcipher $out1,$out1,v29
3027 vcipher $out2,$out2,v29
3028 vcipher $out3,$out3,v29
3029 vcipher $out4,$out4,v29
3030 lvx v25,$x10,$key_ # re-pre-load round[2]
3031 vxor $in4,$twk4,v31
3032
3033 vcipher $out0,$out0,v30
3034 vperm $in0,$in0,$in0,$inpperm
3035 vcipher $out1,$out1,v30
3036 vcipher $out2,$out2,v30
3037 vcipher $out3,$out3,v30
3038 vcipher $out4,$out4,v30
3039
3040 vcipherlast $out0,$out0,$twk0
3041 vcipherlast $out1,$out1,$in1
3042 vcipherlast $out2,$out2,$in2
3043 vcipherlast $out3,$out3,$in3
3044 vcipherlast $out4,$out4,$in4
3045 blr
3046 .long 0
3047 .byte 0,12,0x14,0,0,0,0,0
3048
3049.align 5
3050_aesp8_xts_decrypt6x:
3051 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3052 mflr r11
3053 li r7,`$FRAME+8*16+15`
3054 li r3,`$FRAME+8*16+31`
3055 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3056 stvx v20,r7,$sp # ABI says so
3057 addi r7,r7,32
3058 stvx v21,r3,$sp
3059 addi r3,r3,32
3060 stvx v22,r7,$sp
3061 addi r7,r7,32
3062 stvx v23,r3,$sp
3063 addi r3,r3,32
3064 stvx v24,r7,$sp
3065 addi r7,r7,32
3066 stvx v25,r3,$sp
3067 addi r3,r3,32
3068 stvx v26,r7,$sp
3069 addi r7,r7,32
3070 stvx v27,r3,$sp
3071 addi r3,r3,32
3072 stvx v28,r7,$sp
3073 addi r7,r7,32
3074 stvx v29,r3,$sp
3075 addi r3,r3,32
3076 stvx v30,r7,$sp
3077 stvx v31,r3,$sp
3078 li r0,-1
3079 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3080 li $x10,0x10
3081 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3082 li $x20,0x20
3083 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3084 li $x30,0x30
3085 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3086 li $x40,0x40
3087 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3088 li $x50,0x50
3089 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3090 li $x60,0x60
3091 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3092 li $x70,0x70
3093 mtspr 256,r0
3094
3095 subi $rounds,$rounds,3 # -4 in total
3096
3097 lvx $rndkey0,$x00,$key1 # load key schedule
3098 lvx v30,$x10,$key1
3099 addi $key1,$key1,0x20
3100 lvx v31,$x00,$key1
3101 ?vperm $rndkey0,$rndkey0,v30,$keyperm
3102 addi $key_,$sp,$FRAME+15
3103 mtctr $rounds
3104
3105Load_xts_dec_key:
3106 ?vperm v24,v30,v31,$keyperm
3107 lvx v30,$x10,$key1
3108 addi $key1,$key1,0x20
3109 stvx v24,$x00,$key_ # off-load round[1]
3110 ?vperm v25,v31,v30,$keyperm
3111 lvx v31,$x00,$key1
3112 stvx v25,$x10,$key_ # off-load round[2]
3113 addi $key_,$key_,0x20
3114 bdnz Load_xts_dec_key
3115
3116 lvx v26,$x10,$key1
3117 ?vperm v24,v30,v31,$keyperm
3118 lvx v27,$x20,$key1
3119 stvx v24,$x00,$key_ # off-load round[3]
3120 ?vperm v25,v31,v26,$keyperm
3121 lvx v28,$x30,$key1
3122 stvx v25,$x10,$key_ # off-load round[4]
3123 addi $key_,$sp,$FRAME+15 # rewind $key_
3124 ?vperm v26,v26,v27,$keyperm
3125 lvx v29,$x40,$key1
3126 ?vperm v27,v27,v28,$keyperm
3127 lvx v30,$x50,$key1
3128 ?vperm v28,v28,v29,$keyperm
3129 lvx v31,$x60,$key1
3130 ?vperm v29,v29,v30,$keyperm
3131 lvx $twk5,$x70,$key1 # borrow $twk5
3132 ?vperm v30,v30,v31,$keyperm
3133 lvx v24,$x00,$key_ # pre-load round[1]
3134 ?vperm v31,v31,$twk5,$keyperm
3135 lvx v25,$x10,$key_ # pre-load round[2]
3136
3137 vperm $in0,$inout,$inptail,$inpperm
3138 subi $inp,$inp,31 # undo "caller"
3139 vxor $twk0,$tweak,$rndkey0
3140 vsrab $tmp,$tweak,$seven # next tweak value
3141 vaddubm $tweak,$tweak,$tweak
3142 vsldoi $tmp,$tmp,$tmp,15
3143 vand $tmp,$tmp,$eighty7
3144 vxor $out0,$in0,$twk0
3145 vxor $tweak,$tweak,$tmp
3146
3147 lvx_u $in1,$x10,$inp
3148 vxor $twk1,$tweak,$rndkey0
3149 vsrab $tmp,$tweak,$seven # next tweak value
3150 vaddubm $tweak,$tweak,$tweak
3151 vsldoi $tmp,$tmp,$tmp,15
3152 le?vperm $in1,$in1,$in1,$leperm
3153 vand $tmp,$tmp,$eighty7
3154 vxor $out1,$in1,$twk1
3155 vxor $tweak,$tweak,$tmp
3156
3157 lvx_u $in2,$x20,$inp
3158 andi. $taillen,$len,15
3159 vxor $twk2,$tweak,$rndkey0
3160 vsrab $tmp,$tweak,$seven # next tweak value
3161 vaddubm $tweak,$tweak,$tweak
3162 vsldoi $tmp,$tmp,$tmp,15
3163 le?vperm $in2,$in2,$in2,$leperm
3164 vand $tmp,$tmp,$eighty7
3165 vxor $out2,$in2,$twk2
3166 vxor $tweak,$tweak,$tmp
3167
3168 lvx_u $in3,$x30,$inp
3169 sub $len,$len,$taillen
3170 vxor $twk3,$tweak,$rndkey0
3171 vsrab $tmp,$tweak,$seven # next tweak value
3172 vaddubm $tweak,$tweak,$tweak
3173 vsldoi $tmp,$tmp,$tmp,15
3174 le?vperm $in3,$in3,$in3,$leperm
3175 vand $tmp,$tmp,$eighty7
3176 vxor $out3,$in3,$twk3
3177 vxor $tweak,$tweak,$tmp
3178
3179 lvx_u $in4,$x40,$inp
3180 subi $len,$len,0x60
3181 vxor $twk4,$tweak,$rndkey0
3182 vsrab $tmp,$tweak,$seven # next tweak value
3183 vaddubm $tweak,$tweak,$tweak
3184 vsldoi $tmp,$tmp,$tmp,15
3185 le?vperm $in4,$in4,$in4,$leperm
3186 vand $tmp,$tmp,$eighty7
3187 vxor $out4,$in4,$twk4
3188 vxor $tweak,$tweak,$tmp
3189
3190 lvx_u $in5,$x50,$inp
3191 addi $inp,$inp,0x60
3192 vxor $twk5,$tweak,$rndkey0
3193 vsrab $tmp,$tweak,$seven # next tweak value
3194 vaddubm $tweak,$tweak,$tweak
3195 vsldoi $tmp,$tmp,$tmp,15
3196 le?vperm $in5,$in5,$in5,$leperm
3197 vand $tmp,$tmp,$eighty7
3198 vxor $out5,$in5,$twk5
3199 vxor $tweak,$tweak,$tmp
3200
3201 vxor v31,v31,$rndkey0
3202 mtctr $rounds
3203 b Loop_xts_dec6x
3204
3205.align 5
3206Loop_xts_dec6x:
3207 vncipher $out0,$out0,v24
3208 vncipher $out1,$out1,v24
3209 vncipher $out2,$out2,v24
3210 vncipher $out3,$out3,v24
3211 vncipher $out4,$out4,v24
3212 vncipher $out5,$out5,v24
3213 lvx v24,$x20,$key_ # round[3]
3214 addi $key_,$key_,0x20
3215
3216 vncipher $out0,$out0,v25
3217 vncipher $out1,$out1,v25
3218 vncipher $out2,$out2,v25
3219 vncipher $out3,$out3,v25
3220 vncipher $out4,$out4,v25
3221 vncipher $out5,$out5,v25
3222 lvx v25,$x10,$key_ # round[4]
3223 bdnz Loop_xts_dec6x
3224
3225 subic $len,$len,96 # $len-=96
3226 vxor $in0,$twk0,v31 # xor with last round key
3227 vncipher $out0,$out0,v24
3228 vncipher $out1,$out1,v24
3229 vsrab $tmp,$tweak,$seven # next tweak value
3230 vxor $twk0,$tweak,$rndkey0
3231 vaddubm $tweak,$tweak,$tweak
3232 vncipher $out2,$out2,v24
3233 vncipher $out3,$out3,v24
3234 vsldoi $tmp,$tmp,$tmp,15
3235 vncipher $out4,$out4,v24
3236 vncipher $out5,$out5,v24
3237
3238 subfe. r0,r0,r0 # borrow?-1:0
3239 vand $tmp,$tmp,$eighty7
3240 vncipher $out0,$out0,v25
3241 vncipher $out1,$out1,v25
3242 vxor $tweak,$tweak,$tmp
3243 vncipher $out2,$out2,v25
3244 vncipher $out3,$out3,v25
3245 vxor $in1,$twk1,v31
3246 vsrab $tmp,$tweak,$seven # next tweak value
3247 vxor $twk1,$tweak,$rndkey0
3248 vncipher $out4,$out4,v25
3249 vncipher $out5,$out5,v25
3250
3251 and r0,r0,$len
3252 vaddubm $tweak,$tweak,$tweak
3253 vsldoi $tmp,$tmp,$tmp,15
3254 vncipher $out0,$out0,v26
3255 vncipher $out1,$out1,v26
3256 vand $tmp,$tmp,$eighty7
3257 vncipher $out2,$out2,v26
3258 vncipher $out3,$out3,v26
3259 vxor $tweak,$tweak,$tmp
3260 vncipher $out4,$out4,v26
3261 vncipher $out5,$out5,v26
3262
3263 add $inp,$inp,r0 # $inp is adjusted in such
3264 # way that at exit from the
3265 # loop inX-in5 are loaded
3266 # with last "words"
3267 vxor $in2,$twk2,v31
3268 vsrab $tmp,$tweak,$seven # next tweak value
3269 vxor $twk2,$tweak,$rndkey0
3270 vaddubm $tweak,$tweak,$tweak
3271 vncipher $out0,$out0,v27
3272 vncipher $out1,$out1,v27
3273 vsldoi $tmp,$tmp,$tmp,15
3274 vncipher $out2,$out2,v27
3275 vncipher $out3,$out3,v27
3276 vand $tmp,$tmp,$eighty7
3277 vncipher $out4,$out4,v27
3278 vncipher $out5,$out5,v27
3279
3280 addi $key_,$sp,$FRAME+15 # rewind $key_
3281 vxor $tweak,$tweak,$tmp
3282 vncipher $out0,$out0,v28
3283 vncipher $out1,$out1,v28
3284 vxor $in3,$twk3,v31
3285 vsrab $tmp,$tweak,$seven # next tweak value
3286 vxor $twk3,$tweak,$rndkey0
3287 vncipher $out2,$out2,v28
3288 vncipher $out3,$out3,v28
3289 vaddubm $tweak,$tweak,$tweak
3290 vsldoi $tmp,$tmp,$tmp,15
3291 vncipher $out4,$out4,v28
3292 vncipher $out5,$out5,v28
3293 lvx v24,$x00,$key_ # re-pre-load round[1]
3294 vand $tmp,$tmp,$eighty7
3295
3296 vncipher $out0,$out0,v29
3297 vncipher $out1,$out1,v29
3298 vxor $tweak,$tweak,$tmp
3299 vncipher $out2,$out2,v29
3300 vncipher $out3,$out3,v29
3301 vxor $in4,$twk4,v31
3302 vsrab $tmp,$tweak,$seven # next tweak value
3303 vxor $twk4,$tweak,$rndkey0
3304 vncipher $out4,$out4,v29
3305 vncipher $out5,$out5,v29
3306 lvx v25,$x10,$key_ # re-pre-load round[2]
3307 vaddubm $tweak,$tweak,$tweak
3308 vsldoi $tmp,$tmp,$tmp,15
3309
3310 vncipher $out0,$out0,v30
3311 vncipher $out1,$out1,v30
3312 vand $tmp,$tmp,$eighty7
3313 vncipher $out2,$out2,v30
3314 vncipher $out3,$out3,v30
3315 vxor $tweak,$tweak,$tmp
3316 vncipher $out4,$out4,v30
3317 vncipher $out5,$out5,v30
3318 vxor $in5,$twk5,v31
3319 vsrab $tmp,$tweak,$seven # next tweak value
3320 vxor $twk5,$tweak,$rndkey0
3321
3322 vncipherlast $out0,$out0,$in0
3323 lvx_u $in0,$x00,$inp # load next input block
3324 vaddubm $tweak,$tweak,$tweak
3325 vsldoi $tmp,$tmp,$tmp,15
3326 vncipherlast $out1,$out1,$in1
3327 lvx_u $in1,$x10,$inp
3328 vncipherlast $out2,$out2,$in2
3329 le?vperm $in0,$in0,$in0,$leperm
3330 lvx_u $in2,$x20,$inp
3331 vand $tmp,$tmp,$eighty7
3332 vncipherlast $out3,$out3,$in3
3333 le?vperm $in1,$in1,$in1,$leperm
3334 lvx_u $in3,$x30,$inp
3335 vncipherlast $out4,$out4,$in4
3336 le?vperm $in2,$in2,$in2,$leperm
3337 lvx_u $in4,$x40,$inp
3338 vxor $tweak,$tweak,$tmp
3339 vncipherlast $out5,$out5,$in5
3340 le?vperm $in3,$in3,$in3,$leperm
3341 lvx_u $in5,$x50,$inp
3342 addi $inp,$inp,0x60
3343 le?vperm $in4,$in4,$in4,$leperm
3344 le?vperm $in5,$in5,$in5,$leperm
3345
3346 le?vperm $out0,$out0,$out0,$leperm
3347 le?vperm $out1,$out1,$out1,$leperm
3348 stvx_u $out0,$x00,$out # store output
3349 vxor $out0,$in0,$twk0
3350 le?vperm $out2,$out2,$out2,$leperm
3351 stvx_u $out1,$x10,$out
3352 vxor $out1,$in1,$twk1
3353 le?vperm $out3,$out3,$out3,$leperm
3354 stvx_u $out2,$x20,$out
3355 vxor $out2,$in2,$twk2
3356 le?vperm $out4,$out4,$out4,$leperm
3357 stvx_u $out3,$x30,$out
3358 vxor $out3,$in3,$twk3
3359 le?vperm $out5,$out5,$out5,$leperm
3360 stvx_u $out4,$x40,$out
3361 vxor $out4,$in4,$twk4
3362 stvx_u $out5,$x50,$out
3363 vxor $out5,$in5,$twk5
3364 addi $out,$out,0x60
3365
3366 mtctr $rounds
3367 beq Loop_xts_dec6x # did $len-=96 borrow?
3368
3369 addic. $len,$len,0x60
3370 beq Lxts_dec6x_zero
3371 cmpwi $len,0x20
3372 blt Lxts_dec6x_one
3373 nop
3374 beq Lxts_dec6x_two
3375 cmpwi $len,0x40
3376 blt Lxts_dec6x_three
3377 nop
3378 beq Lxts_dec6x_four
3379
3380Lxts_dec6x_five:
3381 vxor $out0,$in1,$twk0
3382 vxor $out1,$in2,$twk1
3383 vxor $out2,$in3,$twk2
3384 vxor $out3,$in4,$twk3
3385 vxor $out4,$in5,$twk4
3386
3387 bl _aesp8_xts_dec5x
3388
3389 le?vperm $out0,$out0,$out0,$leperm
3390 vmr $twk0,$twk5 # unused tweak
3391 vxor $twk1,$tweak,$rndkey0
3392 le?vperm $out1,$out1,$out1,$leperm
3393 stvx_u $out0,$x00,$out # store output
3394 vxor $out0,$in0,$twk1
3395 le?vperm $out2,$out2,$out2,$leperm
3396 stvx_u $out1,$x10,$out
3397 le?vperm $out3,$out3,$out3,$leperm
3398 stvx_u $out2,$x20,$out
3399 le?vperm $out4,$out4,$out4,$leperm
3400 stvx_u $out3,$x30,$out
3401 stvx_u $out4,$x40,$out
3402 addi $out,$out,0x50
3403 bne Lxts_dec6x_steal
3404 b Lxts_dec6x_done
3405
3406.align 4
3407Lxts_dec6x_four:
3408 vxor $out0,$in2,$twk0
3409 vxor $out1,$in3,$twk1
3410 vxor $out2,$in4,$twk2
3411 vxor $out3,$in5,$twk3
3412 vxor $out4,$out4,$out4
3413
3414 bl _aesp8_xts_dec5x
3415
3416 le?vperm $out0,$out0,$out0,$leperm
3417 vmr $twk0,$twk4 # unused tweak
3418 vmr $twk1,$twk5
3419 le?vperm $out1,$out1,$out1,$leperm
3420 stvx_u $out0,$x00,$out # store output
3421 vxor $out0,$in0,$twk5
3422 le?vperm $out2,$out2,$out2,$leperm
3423 stvx_u $out1,$x10,$out
3424 le?vperm $out3,$out3,$out3,$leperm
3425 stvx_u $out2,$x20,$out
3426 stvx_u $out3,$x30,$out
3427 addi $out,$out,0x40
3428 bne Lxts_dec6x_steal
3429 b Lxts_dec6x_done
3430
3431.align 4
3432Lxts_dec6x_three:
3433 vxor $out0,$in3,$twk0
3434 vxor $out1,$in4,$twk1
3435 vxor $out2,$in5,$twk2
3436 vxor $out3,$out3,$out3
3437 vxor $out4,$out4,$out4
3438
3439 bl _aesp8_xts_dec5x
3440
3441 le?vperm $out0,$out0,$out0,$leperm
3442 vmr $twk0,$twk3 # unused tweak
3443 vmr $twk1,$twk4
3444 le?vperm $out1,$out1,$out1,$leperm
3445 stvx_u $out0,$x00,$out # store output
3446 vxor $out0,$in0,$twk4
3447 le?vperm $out2,$out2,$out2,$leperm
3448 stvx_u $out1,$x10,$out
3449 stvx_u $out2,$x20,$out
3450 addi $out,$out,0x30
3451 bne Lxts_dec6x_steal
3452 b Lxts_dec6x_done
3453
3454.align 4
3455Lxts_dec6x_two:
3456 vxor $out0,$in4,$twk0
3457 vxor $out1,$in5,$twk1
3458 vxor $out2,$out2,$out2
3459 vxor $out3,$out3,$out3
3460 vxor $out4,$out4,$out4
3461
3462 bl _aesp8_xts_dec5x
3463
3464 le?vperm $out0,$out0,$out0,$leperm
3465 vmr $twk0,$twk2 # unused tweak
3466 vmr $twk1,$twk3
3467 le?vperm $out1,$out1,$out1,$leperm
3468 stvx_u $out0,$x00,$out # store output
3469 vxor $out0,$in0,$twk3
3470 stvx_u $out1,$x10,$out
3471 addi $out,$out,0x20
3472 bne Lxts_dec6x_steal
3473 b Lxts_dec6x_done
3474
3475.align 4
3476Lxts_dec6x_one:
3477 vxor $out0,$in5,$twk0
3478 nop
3479Loop_xts_dec1x:
3480 vncipher $out0,$out0,v24
3481 lvx v24,$x20,$key_ # round[3]
3482 addi $key_,$key_,0x20
3483
3484 vncipher $out0,$out0,v25
3485 lvx v25,$x10,$key_ # round[4]
3486 bdnz Loop_xts_dec1x
3487
3488 subi r0,$taillen,1
3489 vncipher $out0,$out0,v24
3490
3491 andi. r0,r0,16
3492 cmpwi $taillen,0
3493 vncipher $out0,$out0,v25
3494
3495 sub $inp,$inp,r0
3496 vncipher $out0,$out0,v26
3497
3498 lvx_u $in0,0,$inp
3499 vncipher $out0,$out0,v27
3500
3501 addi $key_,$sp,$FRAME+15 # rewind $key_
3502 vncipher $out0,$out0,v28
3503 lvx v24,$x00,$key_ # re-pre-load round[1]
3504
3505 vncipher $out0,$out0,v29
3506 lvx v25,$x10,$key_ # re-pre-load round[2]
3507 vxor $twk0,$twk0,v31
3508
3509 le?vperm $in0,$in0,$in0,$leperm
3510 vncipher $out0,$out0,v30
3511
3512 mtctr $rounds
3513 vncipherlast $out0,$out0,$twk0
3514
3515 vmr $twk0,$twk1 # unused tweak
3516 vmr $twk1,$twk2
3517 le?vperm $out0,$out0,$out0,$leperm
3518 stvx_u $out0,$x00,$out # store output
3519 addi $out,$out,0x10
3520 vxor $out0,$in0,$twk2
3521 bne Lxts_dec6x_steal
3522 b Lxts_dec6x_done
3523
3524.align 4
3525Lxts_dec6x_zero:
3526 cmpwi $taillen,0
3527 beq Lxts_dec6x_done
3528
3529 lvx_u $in0,0,$inp
3530 le?vperm $in0,$in0,$in0,$leperm
3531 vxor $out0,$in0,$twk1
3532Lxts_dec6x_steal:
3533 vncipher $out0,$out0,v24
3534 lvx v24,$x20,$key_ # round[3]
3535 addi $key_,$key_,0x20
3536
3537 vncipher $out0,$out0,v25
3538 lvx v25,$x10,$key_ # round[4]
3539 bdnz Lxts_dec6x_steal
3540
3541 add $inp,$inp,$taillen
3542 vncipher $out0,$out0,v24
3543
3544 cmpwi $taillen,0
3545 vncipher $out0,$out0,v25
3546
3547 lvx_u $in0,0,$inp
3548 vncipher $out0,$out0,v26
3549
3550 lvsr $inpperm,0,$taillen # $in5 is no more
3551 vncipher $out0,$out0,v27
3552
3553 addi $key_,$sp,$FRAME+15 # rewind $key_
3554 vncipher $out0,$out0,v28
3555 lvx v24,$x00,$key_ # re-pre-load round[1]
3556
3557 vncipher $out0,$out0,v29
3558 lvx v25,$x10,$key_ # re-pre-load round[2]
3559 vxor $twk1,$twk1,v31
3560
3561 le?vperm $in0,$in0,$in0,$leperm
3562 vncipher $out0,$out0,v30
3563
3564 vperm $in0,$in0,$in0,$inpperm
3565 vncipherlast $tmp,$out0,$twk1
3566
3567 le?vperm $out0,$tmp,$tmp,$leperm
3568 le?stvx_u $out0,0,$out
3569 be?stvx_u $tmp,0,$out
3570
3571 vxor $out0,$out0,$out0
3572 vspltisb $out1,-1
3573 vperm $out0,$out0,$out1,$inpperm
3574 vsel $out0,$in0,$tmp,$out0
3575 vxor $out0,$out0,$twk0
3576
3577 subi r30,$out,1
3578 mtctr $taillen
3579Loop_xts_dec6x_steal:
3580 lbzu r0,1(r30)
3581 stb r0,16(r30)
3582 bdnz Loop_xts_dec6x_steal
3583
3584 li $taillen,0
3585 mtctr $rounds
3586 b Loop_xts_dec1x # one more time...
3587
3588.align 4
3589Lxts_dec6x_done:
3590 ${UCMP}i $ivp,0
3591 beq Lxts_dec6x_ret
3592
3593 vxor $tweak,$twk0,$rndkey0
3594 le?vperm $tweak,$tweak,$tweak,$leperm
3595 stvx_u $tweak,0,$ivp
3596
3597Lxts_dec6x_ret:
3598 mtlr r11
3599 li r10,`$FRAME+15`
3600 li r11,`$FRAME+31`
3601 stvx $seven,r10,$sp # wipe copies of round keys
3602 addi r10,r10,32
3603 stvx $seven,r11,$sp
3604 addi r11,r11,32
3605 stvx $seven,r10,$sp
3606 addi r10,r10,32
3607 stvx $seven,r11,$sp
3608 addi r11,r11,32
3609 stvx $seven,r10,$sp
3610 addi r10,r10,32
3611 stvx $seven,r11,$sp
3612 addi r11,r11,32
3613 stvx $seven,r10,$sp
3614 addi r10,r10,32
3615 stvx $seven,r11,$sp
3616 addi r11,r11,32
3617
3618 mtspr 256,$vrsave
3619 lvx v20,r10,$sp # ABI says so
3620 addi r10,r10,32
3621 lvx v21,r11,$sp
3622 addi r11,r11,32
3623 lvx v22,r10,$sp
3624 addi r10,r10,32
3625 lvx v23,r11,$sp
3626 addi r11,r11,32
3627 lvx v24,r10,$sp
3628 addi r10,r10,32
3629 lvx v25,r11,$sp
3630 addi r11,r11,32
3631 lvx v26,r10,$sp
3632 addi r10,r10,32
3633 lvx v27,r11,$sp
3634 addi r11,r11,32
3635 lvx v28,r10,$sp
3636 addi r10,r10,32
3637 lvx v29,r11,$sp
3638 addi r11,r11,32
3639 lvx v30,r10,$sp
3640 lvx v31,r11,$sp
3641 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3642 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3643 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3644 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3645 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3646 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3647 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3648 blr
3649 .long 0
3650 .byte 0,12,0x04,1,0x80,6,6,0
3651 .long 0
3652
3653.align 5
3654_aesp8_xts_dec5x:
3655 vncipher $out0,$out0,v24
3656 vncipher $out1,$out1,v24
3657 vncipher $out2,$out2,v24
3658 vncipher $out3,$out3,v24
3659 vncipher $out4,$out4,v24
3660 lvx v24,$x20,$key_ # round[3]
3661 addi $key_,$key_,0x20
3662
3663 vncipher $out0,$out0,v25
3664 vncipher $out1,$out1,v25
3665 vncipher $out2,$out2,v25
3666 vncipher $out3,$out3,v25
3667 vncipher $out4,$out4,v25
3668 lvx v25,$x10,$key_ # round[4]
3669 bdnz _aesp8_xts_dec5x
3670
3671 subi r0,$taillen,1
3672 vncipher $out0,$out0,v24
3673 vncipher $out1,$out1,v24
3674 vncipher $out2,$out2,v24
3675 vncipher $out3,$out3,v24
3676 vncipher $out4,$out4,v24
3677
3678 andi. r0,r0,16
3679 cmpwi $taillen,0
3680 vncipher $out0,$out0,v25
3681 vncipher $out1,$out1,v25
3682 vncipher $out2,$out2,v25
3683 vncipher $out3,$out3,v25
3684 vncipher $out4,$out4,v25
3685 vxor $twk0,$twk0,v31
3686
3687 sub $inp,$inp,r0
3688 vncipher $out0,$out0,v26
3689 vncipher $out1,$out1,v26
3690 vncipher $out2,$out2,v26
3691 vncipher $out3,$out3,v26
3692 vncipher $out4,$out4,v26
3693 vxor $in1,$twk1,v31
3694
3695 vncipher $out0,$out0,v27
3696 lvx_u $in0,0,$inp
3697 vncipher $out1,$out1,v27
3698 vncipher $out2,$out2,v27
3699 vncipher $out3,$out3,v27
3700 vncipher $out4,$out4,v27
3701 vxor $in2,$twk2,v31
3702
3703 addi $key_,$sp,$FRAME+15 # rewind $key_
3704 vncipher $out0,$out0,v28
3705 vncipher $out1,$out1,v28
3706 vncipher $out2,$out2,v28
3707 vncipher $out3,$out3,v28
3708 vncipher $out4,$out4,v28
3709 lvx v24,$x00,$key_ # re-pre-load round[1]
3710 vxor $in3,$twk3,v31
3711
3712 vncipher $out0,$out0,v29
3713 le?vperm $in0,$in0,$in0,$leperm
3714 vncipher $out1,$out1,v29
3715 vncipher $out2,$out2,v29
3716 vncipher $out3,$out3,v29
3717 vncipher $out4,$out4,v29
3718 lvx v25,$x10,$key_ # re-pre-load round[2]
3719 vxor $in4,$twk4,v31
3720
3721 vncipher $out0,$out0,v30
3722 vncipher $out1,$out1,v30
3723 vncipher $out2,$out2,v30
3724 vncipher $out3,$out3,v30
3725 vncipher $out4,$out4,v30
3726
3727 vncipherlast $out0,$out0,$twk0
3728 vncipherlast $out1,$out1,$in1
3729 vncipherlast $out2,$out2,$in2
3730 vncipherlast $out3,$out3,$in3
3731 vncipherlast $out4,$out4,$in4
3732 mtctr $rounds
3733 blr
3734 .long 0
3735 .byte 0,12,0x14,0,0,0,0,0
3736___
3737}} }}}
3738
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02003739my $consts=1;
3740foreach(split("\n",$code)) {
3741 s/\`([^\`]*)\`/eval($1)/geo;
3742
3743 # constants table endian-specific conversion
3744 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3745 my $conv=$3;
3746 my @bytes=();
3747
3748 # convert to endian-agnostic format
3749 if ($1 eq "long") {
3750 foreach (split(/,\s*/,$2)) {
3751 my $l = /^0/?oct:int;
3752 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3753 }
3754 } else {
3755 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3756 }
3757
3758 # little-endian conversion
3759 if ($flavour =~ /le$/o) {
3760 SWITCH: for($conv) {
3761 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
Paulo Flabiano Smorigo11c6e162016-07-18 12:26:25 -03003762 /\?rev/ && do { @bytes=reverse(@bytes); last; };
Leonidas S. Barbosa5c380d62015-02-06 14:59:35 -02003763 }
3764 }
3765
3766 #emit
3767 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3768 next;
3769 }
3770 $consts=0 if (m/Lconsts:/o); # end of table
3771
3772 # instructions prefixed with '?' are endian-specific and need
3773 # to be adjusted accordingly...
3774 if ($flavour =~ /le$/o) { # little-endian
3775 s/le\?//o or
3776 s/be\?/#be#/o or
3777 s/\?lvsr/lvsl/o or
3778 s/\?lvsl/lvsr/o or
3779 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3780 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3781 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3782 } else { # big-endian
3783 s/le\?/#le#/o or
3784 s/be\?//o or
3785 s/\?([a-z]+)/$1/o;
3786 }
3787
3788 print $_,"\n";
3789}
3790
3791close STDOUT;