blob: ca3a15001f5d451c87d7c5b2190689e3bff22804 [file] [log] [blame]
Steven Valdezbb1ceac2016-10-07 10:34:51 -04001#! /usr/bin/env perl
2# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9#
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# This module implements support for AES instructions as per PowerISA
18# specification version 2.07, first implemented by POWER8 processor.
19# The module is endian-agnostic in sense that it supports both big-
20# and little-endian cases. Data alignment in parallelizable modes is
21# handled with VSX loads and stores, which implies MSR.VSX flag being
22# set. It should also be noted that ISA specification doesn't prohibit
23# alignment exceptions for these instructions on page boundaries.
24# Initially alignment was handled in pure AltiVec/VMX way [when data
25# is aligned programmatically, which in turn guarantees exception-
26# free execution], but it turned to hamper performance when vcipher
27# instructions are interleaved. It's reckoned that eventual
28# misalignment penalties at page boundaries are in average lower
29# than additional overhead in pure AltiVec approach.
30#
31# May 2016
32#
33# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34# systems were measured.
35#
36######################################################################
37# Current large-block performance in cycles per byte processed with
38# 128-bit key (less is better).
39#
40# CBC en-/decrypt CTR XTS
41# POWER8[le] 3.96/0.72 0.74 1.1
42# POWER8[be] 3.75/0.65 0.66 1.0
43
44$flavour = shift;
45
46if ($flavour =~ /64/) {
47 $SIZE_T =8;
48 $LRSAVE =2*$SIZE_T;
49 $STU ="stdu";
50 $POP ="ld";
51 $PUSH ="std";
52 $UCMP ="cmpld";
53 $SHL ="sldi";
54} elsif ($flavour =~ /32/) {
55 $SIZE_T =4;
56 $LRSAVE =$SIZE_T;
57 $STU ="stwu";
58 $POP ="lwz";
59 $PUSH ="stw";
60 $UCMP ="cmplw";
61 $SHL ="slwi";
62} else { die "nonsense $flavour"; }
63
64$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
65
66$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
67( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
68( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
69die "can't locate ppc-xlate.pl";
70
71open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
72
73$FRAME=8*$SIZE_T;
74$prefix="aes_hw";
75
76$sp="r1";
77$vrsave="r12";
78
79#########################################################################
80{{{ # Key setup procedures #
81my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
82my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
83my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
84
85$code.=<<___;
86.machine "any"
87
88.text
89
90.align 7
91rcon:
92.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
93.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
94.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
95.long 0,0,0,0 ?asis
96Lconsts:
97 mflr r0
98 bcl 20,31,\$+4
99 mflr $ptr #vvvvv "distance between . and rcon
100 addi $ptr,$ptr,-0x48
101 mtlr r0
102 blr
103 .long 0
104 .byte 0,12,0x14,0,0,0,0,0
105.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
106
107.globl .${prefix}_set_encrypt_key
108.align 5
109.${prefix}_set_encrypt_key:
110Lset_encrypt_key:
111 mflr r11
112 $PUSH r11,$LRSAVE($sp)
113
114 li $ptr,-1
115 ${UCMP}i $inp,0
116 beq- Lenc_key_abort # if ($inp==0) return -1;
117 ${UCMP}i $out,0
118 beq- Lenc_key_abort # if ($out==0) return -1;
119 li $ptr,-2
120 cmpwi $bits,128
121 blt- Lenc_key_abort
122 cmpwi $bits,256
123 bgt- Lenc_key_abort
124 andi. r0,$bits,0x3f
125 bne- Lenc_key_abort
126
127 lis r0,0xfff0
128 mfspr $vrsave,256
129 mtspr 256,r0
130
131 bl Lconsts
132 mtlr r11
133
134 neg r9,$inp
135 lvx $in0,0,$inp
136 addi $inp,$inp,15 # 15 is not typo
137 lvsr $key,0,r9 # borrow $key
138 li r8,0x20
139 cmpwi $bits,192
140 lvx $in1,0,$inp
141 le?vspltisb $mask,0x0f # borrow $mask
142 lvx $rcon,0,$ptr
143 le?vxor $key,$key,$mask # adjust for byte swap
144 lvx $mask,r8,$ptr
145 addi $ptr,$ptr,0x10
146 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
147 li $cnt,8
148 vxor $zero,$zero,$zero
149 mtctr $cnt
150
151 ?lvsr $outperm,0,$out
152 vspltisb $outmask,-1
153 lvx $outhead,0,$out
154 ?vperm $outmask,$zero,$outmask,$outperm
155
156 blt Loop128
157 addi $inp,$inp,8
158 beq L192
159 addi $inp,$inp,8
160 b L256
161
162.align 4
163Loop128:
164 vperm $key,$in0,$in0,$mask # rotate-n-splat
165 vsldoi $tmp,$zero,$in0,12 # >>32
166 vperm $outtail,$in0,$in0,$outperm # rotate
167 vsel $stage,$outhead,$outtail,$outmask
168 vmr $outhead,$outtail
169 vcipherlast $key,$key,$rcon
170 stvx $stage,0,$out
171 addi $out,$out,16
172
173 vxor $in0,$in0,$tmp
174 vsldoi $tmp,$zero,$tmp,12 # >>32
175 vxor $in0,$in0,$tmp
176 vsldoi $tmp,$zero,$tmp,12 # >>32
177 vxor $in0,$in0,$tmp
178 vadduwm $rcon,$rcon,$rcon
179 vxor $in0,$in0,$key
180 bdnz Loop128
181
182 lvx $rcon,0,$ptr # last two round keys
183
184 vperm $key,$in0,$in0,$mask # rotate-n-splat
185 vsldoi $tmp,$zero,$in0,12 # >>32
186 vperm $outtail,$in0,$in0,$outperm # rotate
187 vsel $stage,$outhead,$outtail,$outmask
188 vmr $outhead,$outtail
189 vcipherlast $key,$key,$rcon
190 stvx $stage,0,$out
191 addi $out,$out,16
192
193 vxor $in0,$in0,$tmp
194 vsldoi $tmp,$zero,$tmp,12 # >>32
195 vxor $in0,$in0,$tmp
196 vsldoi $tmp,$zero,$tmp,12 # >>32
197 vxor $in0,$in0,$tmp
198 vadduwm $rcon,$rcon,$rcon
199 vxor $in0,$in0,$key
200
201 vperm $key,$in0,$in0,$mask # rotate-n-splat
202 vsldoi $tmp,$zero,$in0,12 # >>32
203 vperm $outtail,$in0,$in0,$outperm # rotate
204 vsel $stage,$outhead,$outtail,$outmask
205 vmr $outhead,$outtail
206 vcipherlast $key,$key,$rcon
207 stvx $stage,0,$out
208 addi $out,$out,16
209
210 vxor $in0,$in0,$tmp
211 vsldoi $tmp,$zero,$tmp,12 # >>32
212 vxor $in0,$in0,$tmp
213 vsldoi $tmp,$zero,$tmp,12 # >>32
214 vxor $in0,$in0,$tmp
215 vxor $in0,$in0,$key
216 vperm $outtail,$in0,$in0,$outperm # rotate
217 vsel $stage,$outhead,$outtail,$outmask
218 vmr $outhead,$outtail
219 stvx $stage,0,$out
220
221 addi $inp,$out,15 # 15 is not typo
222 addi $out,$out,0x50
223
224 li $rounds,10
225 b Ldone
226
227.align 4
228L192:
229 lvx $tmp,0,$inp
230 li $cnt,4
231 vperm $outtail,$in0,$in0,$outperm # rotate
232 vsel $stage,$outhead,$outtail,$outmask
233 vmr $outhead,$outtail
234 stvx $stage,0,$out
235 addi $out,$out,16
236 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
237 vspltisb $key,8 # borrow $key
238 mtctr $cnt
239 vsububm $mask,$mask,$key # adjust the mask
240
241Loop192:
242 vperm $key,$in1,$in1,$mask # roate-n-splat
243 vsldoi $tmp,$zero,$in0,12 # >>32
244 vcipherlast $key,$key,$rcon
245
246 vxor $in0,$in0,$tmp
247 vsldoi $tmp,$zero,$tmp,12 # >>32
248 vxor $in0,$in0,$tmp
249 vsldoi $tmp,$zero,$tmp,12 # >>32
250 vxor $in0,$in0,$tmp
251
252 vsldoi $stage,$zero,$in1,8
253 vspltw $tmp,$in0,3
254 vxor $tmp,$tmp,$in1
255 vsldoi $in1,$zero,$in1,12 # >>32
256 vadduwm $rcon,$rcon,$rcon
257 vxor $in1,$in1,$tmp
258 vxor $in0,$in0,$key
259 vxor $in1,$in1,$key
260 vsldoi $stage,$stage,$in0,8
261
262 vperm $key,$in1,$in1,$mask # rotate-n-splat
263 vsldoi $tmp,$zero,$in0,12 # >>32
264 vperm $outtail,$stage,$stage,$outperm # rotate
265 vsel $stage,$outhead,$outtail,$outmask
266 vmr $outhead,$outtail
267 vcipherlast $key,$key,$rcon
268 stvx $stage,0,$out
269 addi $out,$out,16
270
271 vsldoi $stage,$in0,$in1,8
272 vxor $in0,$in0,$tmp
273 vsldoi $tmp,$zero,$tmp,12 # >>32
274 vperm $outtail,$stage,$stage,$outperm # rotate
275 vsel $stage,$outhead,$outtail,$outmask
276 vmr $outhead,$outtail
277 vxor $in0,$in0,$tmp
278 vsldoi $tmp,$zero,$tmp,12 # >>32
279 vxor $in0,$in0,$tmp
280 stvx $stage,0,$out
281 addi $out,$out,16
282
283 vspltw $tmp,$in0,3
284 vxor $tmp,$tmp,$in1
285 vsldoi $in1,$zero,$in1,12 # >>32
286 vadduwm $rcon,$rcon,$rcon
287 vxor $in1,$in1,$tmp
288 vxor $in0,$in0,$key
289 vxor $in1,$in1,$key
290 vperm $outtail,$in0,$in0,$outperm # rotate
291 vsel $stage,$outhead,$outtail,$outmask
292 vmr $outhead,$outtail
293 stvx $stage,0,$out
294 addi $inp,$out,15 # 15 is not typo
295 addi $out,$out,16
296 bdnz Loop192
297
298 li $rounds,12
299 addi $out,$out,0x20
300 b Ldone
301
302.align 4
303L256:
304 lvx $tmp,0,$inp
305 li $cnt,7
306 li $rounds,14
307 vperm $outtail,$in0,$in0,$outperm # rotate
308 vsel $stage,$outhead,$outtail,$outmask
309 vmr $outhead,$outtail
310 stvx $stage,0,$out
311 addi $out,$out,16
312 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
313 mtctr $cnt
314
315Loop256:
316 vperm $key,$in1,$in1,$mask # rotate-n-splat
317 vsldoi $tmp,$zero,$in0,12 # >>32
318 vperm $outtail,$in1,$in1,$outperm # rotate
319 vsel $stage,$outhead,$outtail,$outmask
320 vmr $outhead,$outtail
321 vcipherlast $key,$key,$rcon
322 stvx $stage,0,$out
323 addi $out,$out,16
324
325 vxor $in0,$in0,$tmp
326 vsldoi $tmp,$zero,$tmp,12 # >>32
327 vxor $in0,$in0,$tmp
328 vsldoi $tmp,$zero,$tmp,12 # >>32
329 vxor $in0,$in0,$tmp
330 vadduwm $rcon,$rcon,$rcon
331 vxor $in0,$in0,$key
332 vperm $outtail,$in0,$in0,$outperm # rotate
333 vsel $stage,$outhead,$outtail,$outmask
334 vmr $outhead,$outtail
335 stvx $stage,0,$out
336 addi $inp,$out,15 # 15 is not typo
337 addi $out,$out,16
338 bdz Ldone
339
340 vspltw $key,$in0,3 # just splat
341 vsldoi $tmp,$zero,$in1,12 # >>32
342 vsbox $key,$key
343
344 vxor $in1,$in1,$tmp
345 vsldoi $tmp,$zero,$tmp,12 # >>32
346 vxor $in1,$in1,$tmp
347 vsldoi $tmp,$zero,$tmp,12 # >>32
348 vxor $in1,$in1,$tmp
349
350 vxor $in1,$in1,$key
351 b Loop256
352
353.align 4
354Ldone:
355 lvx $in1,0,$inp # redundant in aligned case
356 vsel $in1,$outhead,$in1,$outmask
357 stvx $in1,0,$inp
358 li $ptr,0
359 mtspr 256,$vrsave
360 stw $rounds,0($out)
361
362Lenc_key_abort:
363 mr r3,$ptr
364 blr
365 .long 0
366 .byte 0,12,0x14,1,0,0,3,0
367 .long 0
368.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
369
370.globl .${prefix}_set_decrypt_key
371.align 5
372.${prefix}_set_decrypt_key:
373 $STU $sp,-$FRAME($sp)
374 mflr r10
375 $PUSH r10,$FRAME+$LRSAVE($sp)
376 bl Lset_encrypt_key
377 mtlr r10
378
379 cmpwi r3,0
380 bne- Ldec_key_abort
381
382 slwi $cnt,$rounds,4
383 subi $inp,$out,240 # first round key
384 srwi $rounds,$rounds,1
385 add $out,$inp,$cnt # last round key
386 mtctr $rounds
387
388Ldeckey:
389 lwz r0, 0($inp)
390 lwz r6, 4($inp)
391 lwz r7, 8($inp)
392 lwz r8, 12($inp)
393 addi $inp,$inp,16
394 lwz r9, 0($out)
395 lwz r10,4($out)
396 lwz r11,8($out)
397 lwz r12,12($out)
398 stw r0, 0($out)
399 stw r6, 4($out)
400 stw r7, 8($out)
401 stw r8, 12($out)
402 subi $out,$out,16
403 stw r9, -16($inp)
404 stw r10,-12($inp)
405 stw r11,-8($inp)
406 stw r12,-4($inp)
407 bdnz Ldeckey
408
409 xor r3,r3,r3 # return value
410Ldec_key_abort:
411 addi $sp,$sp,$FRAME
412 blr
413 .long 0
414 .byte 0,12,4,1,0x80,0,3,0
415 .long 0
416.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
417___
418}}}
419#########################################################################
420{{{ # Single block en- and decrypt procedures #
421sub gen_block () {
422my $dir = shift;
423my $n = $dir eq "de" ? "n" : "";
424my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
425
426$code.=<<___;
427.globl .${prefix}_${dir}crypt
428.align 5
429.${prefix}_${dir}crypt:
430 lwz $rounds,240($key)
431 lis r0,0xfc00
432 mfspr $vrsave,256
433 li $idx,15 # 15 is not typo
434 mtspr 256,r0
435
436 lvx v0,0,$inp
437 neg r11,$out
438 lvx v1,$idx,$inp
439 lvsl v2,0,$inp # inpperm
440 le?vspltisb v4,0x0f
441 ?lvsl v3,0,r11 # outperm
442 le?vxor v2,v2,v4
443 li $idx,16
444 vperm v0,v0,v1,v2 # align [and byte swap in LE]
445 lvx v1,0,$key
446 ?lvsl v5,0,$key # keyperm
447 srwi $rounds,$rounds,1
448 lvx v2,$idx,$key
449 addi $idx,$idx,16
450 subi $rounds,$rounds,1
451 ?vperm v1,v1,v2,v5 # align round key
452
453 vxor v0,v0,v1
454 lvx v1,$idx,$key
455 addi $idx,$idx,16
456 mtctr $rounds
457
458Loop_${dir}c:
459 ?vperm v2,v2,v1,v5
460 v${n}cipher v0,v0,v2
461 lvx v2,$idx,$key
462 addi $idx,$idx,16
463 ?vperm v1,v1,v2,v5
464 v${n}cipher v0,v0,v1
465 lvx v1,$idx,$key
466 addi $idx,$idx,16
467 bdnz Loop_${dir}c
468
469 ?vperm v2,v2,v1,v5
470 v${n}cipher v0,v0,v2
471 lvx v2,$idx,$key
472 ?vperm v1,v1,v2,v5
473 v${n}cipherlast v0,v0,v1
474
475 vspltisb v2,-1
476 vxor v1,v1,v1
477 li $idx,15 # 15 is not typo
478 ?vperm v2,v1,v2,v3 # outmask
479 le?vxor v3,v3,v4
480 lvx v1,0,$out # outhead
481 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
482 vsel v1,v1,v0,v2
483 lvx v4,$idx,$out
484 stvx v1,0,$out
485 vsel v0,v0,v4,v2
486 stvx v0,$idx,$out
487
488 mtspr 256,$vrsave
489 blr
490 .long 0
491 .byte 0,12,0x14,0,0,0,3,0
492 .long 0
493.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
494___
495}
496&gen_block("en");
497&gen_block("de");
498}}}
499#########################################################################
500{{{ # CBC en- and decrypt procedures #
501my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
502my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
503my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
504 map("v$_",(4..10));
505$code.=<<___;
506.globl .${prefix}_cbc_encrypt
507.align 5
508.${prefix}_cbc_encrypt:
509 ${UCMP}i $len,16
510 bltlr-
511
512 cmpwi $enc,0 # test direction
513 lis r0,0xffe0
514 mfspr $vrsave,256
515 mtspr 256,r0
516
517 li $idx,15
518 vxor $rndkey0,$rndkey0,$rndkey0
519 le?vspltisb $tmp,0x0f
520
521 lvx $ivec,0,$ivp # load [unaligned] iv
522 lvsl $inpperm,0,$ivp
523 lvx $inptail,$idx,$ivp
524 le?vxor $inpperm,$inpperm,$tmp
525 vperm $ivec,$ivec,$inptail,$inpperm
526
527 neg r11,$inp
528 ?lvsl $keyperm,0,$key # prepare for unaligned key
529 lwz $rounds,240($key)
530
531 lvsr $inpperm,0,r11 # prepare for unaligned load
532 lvx $inptail,0,$inp
533 addi $inp,$inp,15 # 15 is not typo
534 le?vxor $inpperm,$inpperm,$tmp
535
536 ?lvsr $outperm,0,$out # prepare for unaligned store
537 vspltisb $outmask,-1
538 lvx $outhead,0,$out
539 ?vperm $outmask,$rndkey0,$outmask,$outperm
540 le?vxor $outperm,$outperm,$tmp
541
542 srwi $rounds,$rounds,1
543 li $idx,16
544 subi $rounds,$rounds,1
545 beq Lcbc_dec
546
547Lcbc_enc:
548 vmr $inout,$inptail
549 lvx $inptail,0,$inp
550 addi $inp,$inp,16
551 mtctr $rounds
552 subi $len,$len,16 # len-=16
553
554 lvx $rndkey0,0,$key
555 vperm $inout,$inout,$inptail,$inpperm
556 lvx $rndkey1,$idx,$key
557 addi $idx,$idx,16
558 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
559 vxor $inout,$inout,$rndkey0
560 lvx $rndkey0,$idx,$key
561 addi $idx,$idx,16
562 vxor $inout,$inout,$ivec
563
564Loop_cbc_enc:
565 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
566 vcipher $inout,$inout,$rndkey1
567 lvx $rndkey1,$idx,$key
568 addi $idx,$idx,16
569 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
570 vcipher $inout,$inout,$rndkey0
571 lvx $rndkey0,$idx,$key
572 addi $idx,$idx,16
573 bdnz Loop_cbc_enc
574
575 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
576 vcipher $inout,$inout,$rndkey1
577 lvx $rndkey1,$idx,$key
578 li $idx,16
579 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
580 vcipherlast $ivec,$inout,$rndkey0
581 ${UCMP}i $len,16
582
583 vperm $tmp,$ivec,$ivec,$outperm
584 vsel $inout,$outhead,$tmp,$outmask
585 vmr $outhead,$tmp
586 stvx $inout,0,$out
587 addi $out,$out,16
588 bge Lcbc_enc
589
590 b Lcbc_done
591
592.align 4
593Lcbc_dec:
594 ${UCMP}i $len,128
595 bge _aesp8_cbc_decrypt8x
596 vmr $tmp,$inptail
597 lvx $inptail,0,$inp
598 addi $inp,$inp,16
599 mtctr $rounds
600 subi $len,$len,16 # len-=16
601
602 lvx $rndkey0,0,$key
603 vperm $tmp,$tmp,$inptail,$inpperm
604 lvx $rndkey1,$idx,$key
605 addi $idx,$idx,16
606 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
607 vxor $inout,$tmp,$rndkey0
608 lvx $rndkey0,$idx,$key
609 addi $idx,$idx,16
610
611Loop_cbc_dec:
612 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
613 vncipher $inout,$inout,$rndkey1
614 lvx $rndkey1,$idx,$key
615 addi $idx,$idx,16
616 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
617 vncipher $inout,$inout,$rndkey0
618 lvx $rndkey0,$idx,$key
619 addi $idx,$idx,16
620 bdnz Loop_cbc_dec
621
622 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
623 vncipher $inout,$inout,$rndkey1
624 lvx $rndkey1,$idx,$key
625 li $idx,16
626 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
627 vncipherlast $inout,$inout,$rndkey0
628 ${UCMP}i $len,16
629
630 vxor $inout,$inout,$ivec
631 vmr $ivec,$tmp
632 vperm $tmp,$inout,$inout,$outperm
633 vsel $inout,$outhead,$tmp,$outmask
634 vmr $outhead,$tmp
635 stvx $inout,0,$out
636 addi $out,$out,16
637 bge Lcbc_dec
638
639Lcbc_done:
640 addi $out,$out,-1
641 lvx $inout,0,$out # redundant in aligned case
642 vsel $inout,$outhead,$inout,$outmask
643 stvx $inout,0,$out
644
645 neg $enc,$ivp # write [unaligned] iv
646 li $idx,15 # 15 is not typo
647 vxor $rndkey0,$rndkey0,$rndkey0
648 vspltisb $outmask,-1
649 le?vspltisb $tmp,0x0f
650 ?lvsl $outperm,0,$enc
651 ?vperm $outmask,$rndkey0,$outmask,$outperm
652 le?vxor $outperm,$outperm,$tmp
653 lvx $outhead,0,$ivp
654 vperm $ivec,$ivec,$ivec,$outperm
655 vsel $inout,$outhead,$ivec,$outmask
656 lvx $inptail,$idx,$ivp
657 stvx $inout,0,$ivp
658 vsel $inout,$ivec,$inptail,$outmask
659 stvx $inout,$idx,$ivp
660
661 mtspr 256,$vrsave
662 blr
663 .long 0
664 .byte 0,12,0x14,0,0,0,6,0
665 .long 0
666___
667#########################################################################
668{{ # Optimized CBC decrypt procedure #
669my $key_="r11";
670my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
671 $x00=0 if ($flavour =~ /osx/);
672my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
673my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
674my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
675 # v26-v31 last 6 round keys
676my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
677
678$code.=<<___;
679.align 5
680_aesp8_cbc_decrypt8x:
681 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
682 li r10,`$FRAME+8*16+15`
683 li r11,`$FRAME+8*16+31`
684 stvx v20,r10,$sp # ABI says so
685 addi r10,r10,32
686 stvx v21,r11,$sp
687 addi r11,r11,32
688 stvx v22,r10,$sp
689 addi r10,r10,32
690 stvx v23,r11,$sp
691 addi r11,r11,32
692 stvx v24,r10,$sp
693 addi r10,r10,32
694 stvx v25,r11,$sp
695 addi r11,r11,32
696 stvx v26,r10,$sp
697 addi r10,r10,32
698 stvx v27,r11,$sp
699 addi r11,r11,32
700 stvx v28,r10,$sp
701 addi r10,r10,32
702 stvx v29,r11,$sp
703 addi r11,r11,32
704 stvx v30,r10,$sp
705 stvx v31,r11,$sp
706 li r0,-1
707 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
708 li $x10,0x10
709 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
710 li $x20,0x20
711 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
712 li $x30,0x30
713 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
714 li $x40,0x40
715 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
716 li $x50,0x50
717 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
718 li $x60,0x60
719 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
720 li $x70,0x70
721 mtspr 256,r0
722
723 subi $rounds,$rounds,3 # -4 in total
724 subi $len,$len,128 # bias
725
726 lvx $rndkey0,$x00,$key # load key schedule
727 lvx v30,$x10,$key
728 addi $key,$key,0x20
729 lvx v31,$x00,$key
730 ?vperm $rndkey0,$rndkey0,v30,$keyperm
731 addi $key_,$sp,$FRAME+15
732 mtctr $rounds
733
734Load_cbc_dec_key:
735 ?vperm v24,v30,v31,$keyperm
736 lvx v30,$x10,$key
737 addi $key,$key,0x20
738 stvx v24,$x00,$key_ # off-load round[1]
739 ?vperm v25,v31,v30,$keyperm
740 lvx v31,$x00,$key
741 stvx v25,$x10,$key_ # off-load round[2]
742 addi $key_,$key_,0x20
743 bdnz Load_cbc_dec_key
744
745 lvx v26,$x10,$key
746 ?vperm v24,v30,v31,$keyperm
747 lvx v27,$x20,$key
748 stvx v24,$x00,$key_ # off-load round[3]
749 ?vperm v25,v31,v26,$keyperm
750 lvx v28,$x30,$key
751 stvx v25,$x10,$key_ # off-load round[4]
752 addi $key_,$sp,$FRAME+15 # rewind $key_
753 ?vperm v26,v26,v27,$keyperm
754 lvx v29,$x40,$key
755 ?vperm v27,v27,v28,$keyperm
756 lvx v30,$x50,$key
757 ?vperm v28,v28,v29,$keyperm
758 lvx v31,$x60,$key
759 ?vperm v29,v29,v30,$keyperm
760 lvx $out0,$x70,$key # borrow $out0
761 ?vperm v30,v30,v31,$keyperm
762 lvx v24,$x00,$key_ # pre-load round[1]
763 ?vperm v31,v31,$out0,$keyperm
764 lvx v25,$x10,$key_ # pre-load round[2]
765
766 #lvx $inptail,0,$inp # "caller" already did this
767 #addi $inp,$inp,15 # 15 is not typo
768 subi $inp,$inp,15 # undo "caller"
769
770 le?li $idx,8
771 lvx_u $in0,$x00,$inp # load first 8 "words"
772 le?lvsl $inpperm,0,$idx
773 le?vspltisb $tmp,0x0f
774 lvx_u $in1,$x10,$inp
775 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
776 lvx_u $in2,$x20,$inp
777 le?vperm $in0,$in0,$in0,$inpperm
778 lvx_u $in3,$x30,$inp
779 le?vperm $in1,$in1,$in1,$inpperm
780 lvx_u $in4,$x40,$inp
781 le?vperm $in2,$in2,$in2,$inpperm
782 vxor $out0,$in0,$rndkey0
783 lvx_u $in5,$x50,$inp
784 le?vperm $in3,$in3,$in3,$inpperm
785 vxor $out1,$in1,$rndkey0
786 lvx_u $in6,$x60,$inp
787 le?vperm $in4,$in4,$in4,$inpperm
788 vxor $out2,$in2,$rndkey0
789 lvx_u $in7,$x70,$inp
790 addi $inp,$inp,0x80
791 le?vperm $in5,$in5,$in5,$inpperm
792 vxor $out3,$in3,$rndkey0
793 le?vperm $in6,$in6,$in6,$inpperm
794 vxor $out4,$in4,$rndkey0
795 le?vperm $in7,$in7,$in7,$inpperm
796 vxor $out5,$in5,$rndkey0
797 vxor $out6,$in6,$rndkey0
798 vxor $out7,$in7,$rndkey0
799
800 mtctr $rounds
801 b Loop_cbc_dec8x
802.align 5
803Loop_cbc_dec8x:
804 vncipher $out0,$out0,v24
805 vncipher $out1,$out1,v24
806 vncipher $out2,$out2,v24
807 vncipher $out3,$out3,v24
808 vncipher $out4,$out4,v24
809 vncipher $out5,$out5,v24
810 vncipher $out6,$out6,v24
811 vncipher $out7,$out7,v24
812 lvx v24,$x20,$key_ # round[3]
813 addi $key_,$key_,0x20
814
815 vncipher $out0,$out0,v25
816 vncipher $out1,$out1,v25
817 vncipher $out2,$out2,v25
818 vncipher $out3,$out3,v25
819 vncipher $out4,$out4,v25
820 vncipher $out5,$out5,v25
821 vncipher $out6,$out6,v25
822 vncipher $out7,$out7,v25
823 lvx v25,$x10,$key_ # round[4]
824 bdnz Loop_cbc_dec8x
825
826 subic $len,$len,128 # $len-=128
827 vncipher $out0,$out0,v24
828 vncipher $out1,$out1,v24
829 vncipher $out2,$out2,v24
830 vncipher $out3,$out3,v24
831 vncipher $out4,$out4,v24
832 vncipher $out5,$out5,v24
833 vncipher $out6,$out6,v24
834 vncipher $out7,$out7,v24
835
836 subfe. r0,r0,r0 # borrow?-1:0
837 vncipher $out0,$out0,v25
838 vncipher $out1,$out1,v25
839 vncipher $out2,$out2,v25
840 vncipher $out3,$out3,v25
841 vncipher $out4,$out4,v25
842 vncipher $out5,$out5,v25
843 vncipher $out6,$out6,v25
844 vncipher $out7,$out7,v25
845
846 and r0,r0,$len
847 vncipher $out0,$out0,v26
848 vncipher $out1,$out1,v26
849 vncipher $out2,$out2,v26
850 vncipher $out3,$out3,v26
851 vncipher $out4,$out4,v26
852 vncipher $out5,$out5,v26
853 vncipher $out6,$out6,v26
854 vncipher $out7,$out7,v26
855
856 add $inp,$inp,r0 # $inp is adjusted in such
857 # way that at exit from the
858 # loop inX-in7 are loaded
859 # with last "words"
860 vncipher $out0,$out0,v27
861 vncipher $out1,$out1,v27
862 vncipher $out2,$out2,v27
863 vncipher $out3,$out3,v27
864 vncipher $out4,$out4,v27
865 vncipher $out5,$out5,v27
866 vncipher $out6,$out6,v27
867 vncipher $out7,$out7,v27
868
869 addi $key_,$sp,$FRAME+15 # rewind $key_
870 vncipher $out0,$out0,v28
871 vncipher $out1,$out1,v28
872 vncipher $out2,$out2,v28
873 vncipher $out3,$out3,v28
874 vncipher $out4,$out4,v28
875 vncipher $out5,$out5,v28
876 vncipher $out6,$out6,v28
877 vncipher $out7,$out7,v28
878 lvx v24,$x00,$key_ # re-pre-load round[1]
879
880 vncipher $out0,$out0,v29
881 vncipher $out1,$out1,v29
882 vncipher $out2,$out2,v29
883 vncipher $out3,$out3,v29
884 vncipher $out4,$out4,v29
885 vncipher $out5,$out5,v29
886 vncipher $out6,$out6,v29
887 vncipher $out7,$out7,v29
888 lvx v25,$x10,$key_ # re-pre-load round[2]
889
890 vncipher $out0,$out0,v30
891 vxor $ivec,$ivec,v31 # xor with last round key
892 vncipher $out1,$out1,v30
893 vxor $in0,$in0,v31
894 vncipher $out2,$out2,v30
895 vxor $in1,$in1,v31
896 vncipher $out3,$out3,v30
897 vxor $in2,$in2,v31
898 vncipher $out4,$out4,v30
899 vxor $in3,$in3,v31
900 vncipher $out5,$out5,v30
901 vxor $in4,$in4,v31
902 vncipher $out6,$out6,v30
903 vxor $in5,$in5,v31
904 vncipher $out7,$out7,v30
905 vxor $in6,$in6,v31
906
907 vncipherlast $out0,$out0,$ivec
908 vncipherlast $out1,$out1,$in0
909 lvx_u $in0,$x00,$inp # load next input block
910 vncipherlast $out2,$out2,$in1
911 lvx_u $in1,$x10,$inp
912 vncipherlast $out3,$out3,$in2
913 le?vperm $in0,$in0,$in0,$inpperm
914 lvx_u $in2,$x20,$inp
915 vncipherlast $out4,$out4,$in3
916 le?vperm $in1,$in1,$in1,$inpperm
917 lvx_u $in3,$x30,$inp
918 vncipherlast $out5,$out5,$in4
919 le?vperm $in2,$in2,$in2,$inpperm
920 lvx_u $in4,$x40,$inp
921 vncipherlast $out6,$out6,$in5
922 le?vperm $in3,$in3,$in3,$inpperm
923 lvx_u $in5,$x50,$inp
924 vncipherlast $out7,$out7,$in6
925 le?vperm $in4,$in4,$in4,$inpperm
926 lvx_u $in6,$x60,$inp
927 vmr $ivec,$in7
928 le?vperm $in5,$in5,$in5,$inpperm
929 lvx_u $in7,$x70,$inp
930 addi $inp,$inp,0x80
931
932 le?vperm $out0,$out0,$out0,$inpperm
933 le?vperm $out1,$out1,$out1,$inpperm
934 stvx_u $out0,$x00,$out
935 le?vperm $in6,$in6,$in6,$inpperm
936 vxor $out0,$in0,$rndkey0
937 le?vperm $out2,$out2,$out2,$inpperm
938 stvx_u $out1,$x10,$out
939 le?vperm $in7,$in7,$in7,$inpperm
940 vxor $out1,$in1,$rndkey0
941 le?vperm $out3,$out3,$out3,$inpperm
942 stvx_u $out2,$x20,$out
943 vxor $out2,$in2,$rndkey0
944 le?vperm $out4,$out4,$out4,$inpperm
945 stvx_u $out3,$x30,$out
946 vxor $out3,$in3,$rndkey0
947 le?vperm $out5,$out5,$out5,$inpperm
948 stvx_u $out4,$x40,$out
949 vxor $out4,$in4,$rndkey0
950 le?vperm $out6,$out6,$out6,$inpperm
951 stvx_u $out5,$x50,$out
952 vxor $out5,$in5,$rndkey0
953 le?vperm $out7,$out7,$out7,$inpperm
954 stvx_u $out6,$x60,$out
955 vxor $out6,$in6,$rndkey0
956 stvx_u $out7,$x70,$out
957 addi $out,$out,0x80
958 vxor $out7,$in7,$rndkey0
959
960 mtctr $rounds
961 beq Loop_cbc_dec8x # did $len-=128 borrow?
962
963 addic. $len,$len,128
964 beq Lcbc_dec8x_done
965 nop
966 nop
967
968Loop_cbc_dec8x_tail: # up to 7 "words" tail...
969 vncipher $out1,$out1,v24
970 vncipher $out2,$out2,v24
971 vncipher $out3,$out3,v24
972 vncipher $out4,$out4,v24
973 vncipher $out5,$out5,v24
974 vncipher $out6,$out6,v24
975 vncipher $out7,$out7,v24
976 lvx v24,$x20,$key_ # round[3]
977 addi $key_,$key_,0x20
978
979 vncipher $out1,$out1,v25
980 vncipher $out2,$out2,v25
981 vncipher $out3,$out3,v25
982 vncipher $out4,$out4,v25
983 vncipher $out5,$out5,v25
984 vncipher $out6,$out6,v25
985 vncipher $out7,$out7,v25
986 lvx v25,$x10,$key_ # round[4]
987 bdnz Loop_cbc_dec8x_tail
988
989 vncipher $out1,$out1,v24
990 vncipher $out2,$out2,v24
991 vncipher $out3,$out3,v24
992 vncipher $out4,$out4,v24
993 vncipher $out5,$out5,v24
994 vncipher $out6,$out6,v24
995 vncipher $out7,$out7,v24
996
997 vncipher $out1,$out1,v25
998 vncipher $out2,$out2,v25
999 vncipher $out3,$out3,v25
1000 vncipher $out4,$out4,v25
1001 vncipher $out5,$out5,v25
1002 vncipher $out6,$out6,v25
1003 vncipher $out7,$out7,v25
1004
1005 vncipher $out1,$out1,v26
1006 vncipher $out2,$out2,v26
1007 vncipher $out3,$out3,v26
1008 vncipher $out4,$out4,v26
1009 vncipher $out5,$out5,v26
1010 vncipher $out6,$out6,v26
1011 vncipher $out7,$out7,v26
1012
1013 vncipher $out1,$out1,v27
1014 vncipher $out2,$out2,v27
1015 vncipher $out3,$out3,v27
1016 vncipher $out4,$out4,v27
1017 vncipher $out5,$out5,v27
1018 vncipher $out6,$out6,v27
1019 vncipher $out7,$out7,v27
1020
1021 vncipher $out1,$out1,v28
1022 vncipher $out2,$out2,v28
1023 vncipher $out3,$out3,v28
1024 vncipher $out4,$out4,v28
1025 vncipher $out5,$out5,v28
1026 vncipher $out6,$out6,v28
1027 vncipher $out7,$out7,v28
1028
1029 vncipher $out1,$out1,v29
1030 vncipher $out2,$out2,v29
1031 vncipher $out3,$out3,v29
1032 vncipher $out4,$out4,v29
1033 vncipher $out5,$out5,v29
1034 vncipher $out6,$out6,v29
1035 vncipher $out7,$out7,v29
1036
1037 vncipher $out1,$out1,v30
1038 vxor $ivec,$ivec,v31 # last round key
1039 vncipher $out2,$out2,v30
1040 vxor $in1,$in1,v31
1041 vncipher $out3,$out3,v30
1042 vxor $in2,$in2,v31
1043 vncipher $out4,$out4,v30
1044 vxor $in3,$in3,v31
1045 vncipher $out5,$out5,v30
1046 vxor $in4,$in4,v31
1047 vncipher $out6,$out6,v30
1048 vxor $in5,$in5,v31
1049 vncipher $out7,$out7,v30
1050 vxor $in6,$in6,v31
1051
1052 cmplwi $len,32 # switch($len)
1053 blt Lcbc_dec8x_one
1054 nop
1055 beq Lcbc_dec8x_two
1056 cmplwi $len,64
1057 blt Lcbc_dec8x_three
1058 nop
1059 beq Lcbc_dec8x_four
1060 cmplwi $len,96
1061 blt Lcbc_dec8x_five
1062 nop
1063 beq Lcbc_dec8x_six
1064
1065Lcbc_dec8x_seven:
1066 vncipherlast $out1,$out1,$ivec
1067 vncipherlast $out2,$out2,$in1
1068 vncipherlast $out3,$out3,$in2
1069 vncipherlast $out4,$out4,$in3
1070 vncipherlast $out5,$out5,$in4
1071 vncipherlast $out6,$out6,$in5
1072 vncipherlast $out7,$out7,$in6
1073 vmr $ivec,$in7
1074
1075 le?vperm $out1,$out1,$out1,$inpperm
1076 le?vperm $out2,$out2,$out2,$inpperm
1077 stvx_u $out1,$x00,$out
1078 le?vperm $out3,$out3,$out3,$inpperm
1079 stvx_u $out2,$x10,$out
1080 le?vperm $out4,$out4,$out4,$inpperm
1081 stvx_u $out3,$x20,$out
1082 le?vperm $out5,$out5,$out5,$inpperm
1083 stvx_u $out4,$x30,$out
1084 le?vperm $out6,$out6,$out6,$inpperm
1085 stvx_u $out5,$x40,$out
1086 le?vperm $out7,$out7,$out7,$inpperm
1087 stvx_u $out6,$x50,$out
1088 stvx_u $out7,$x60,$out
1089 addi $out,$out,0x70
1090 b Lcbc_dec8x_done
1091
1092.align 5
1093Lcbc_dec8x_six:
1094 vncipherlast $out2,$out2,$ivec
1095 vncipherlast $out3,$out3,$in2
1096 vncipherlast $out4,$out4,$in3
1097 vncipherlast $out5,$out5,$in4
1098 vncipherlast $out6,$out6,$in5
1099 vncipherlast $out7,$out7,$in6
1100 vmr $ivec,$in7
1101
1102 le?vperm $out2,$out2,$out2,$inpperm
1103 le?vperm $out3,$out3,$out3,$inpperm
1104 stvx_u $out2,$x00,$out
1105 le?vperm $out4,$out4,$out4,$inpperm
1106 stvx_u $out3,$x10,$out
1107 le?vperm $out5,$out5,$out5,$inpperm
1108 stvx_u $out4,$x20,$out
1109 le?vperm $out6,$out6,$out6,$inpperm
1110 stvx_u $out5,$x30,$out
1111 le?vperm $out7,$out7,$out7,$inpperm
1112 stvx_u $out6,$x40,$out
1113 stvx_u $out7,$x50,$out
1114 addi $out,$out,0x60
1115 b Lcbc_dec8x_done
1116
1117.align 5
1118Lcbc_dec8x_five:
1119 vncipherlast $out3,$out3,$ivec
1120 vncipherlast $out4,$out4,$in3
1121 vncipherlast $out5,$out5,$in4
1122 vncipherlast $out6,$out6,$in5
1123 vncipherlast $out7,$out7,$in6
1124 vmr $ivec,$in7
1125
1126 le?vperm $out3,$out3,$out3,$inpperm
1127 le?vperm $out4,$out4,$out4,$inpperm
1128 stvx_u $out3,$x00,$out
1129 le?vperm $out5,$out5,$out5,$inpperm
1130 stvx_u $out4,$x10,$out
1131 le?vperm $out6,$out6,$out6,$inpperm
1132 stvx_u $out5,$x20,$out
1133 le?vperm $out7,$out7,$out7,$inpperm
1134 stvx_u $out6,$x30,$out
1135 stvx_u $out7,$x40,$out
1136 addi $out,$out,0x50
1137 b Lcbc_dec8x_done
1138
1139.align 5
1140Lcbc_dec8x_four:
1141 vncipherlast $out4,$out4,$ivec
1142 vncipherlast $out5,$out5,$in4
1143 vncipherlast $out6,$out6,$in5
1144 vncipherlast $out7,$out7,$in6
1145 vmr $ivec,$in7
1146
1147 le?vperm $out4,$out4,$out4,$inpperm
1148 le?vperm $out5,$out5,$out5,$inpperm
1149 stvx_u $out4,$x00,$out
1150 le?vperm $out6,$out6,$out6,$inpperm
1151 stvx_u $out5,$x10,$out
1152 le?vperm $out7,$out7,$out7,$inpperm
1153 stvx_u $out6,$x20,$out
1154 stvx_u $out7,$x30,$out
1155 addi $out,$out,0x40
1156 b Lcbc_dec8x_done
1157
1158.align 5
1159Lcbc_dec8x_three:
1160 vncipherlast $out5,$out5,$ivec
1161 vncipherlast $out6,$out6,$in5
1162 vncipherlast $out7,$out7,$in6
1163 vmr $ivec,$in7
1164
1165 le?vperm $out5,$out5,$out5,$inpperm
1166 le?vperm $out6,$out6,$out6,$inpperm
1167 stvx_u $out5,$x00,$out
1168 le?vperm $out7,$out7,$out7,$inpperm
1169 stvx_u $out6,$x10,$out
1170 stvx_u $out7,$x20,$out
1171 addi $out,$out,0x30
1172 b Lcbc_dec8x_done
1173
1174.align 5
1175Lcbc_dec8x_two:
1176 vncipherlast $out6,$out6,$ivec
1177 vncipherlast $out7,$out7,$in6
1178 vmr $ivec,$in7
1179
1180 le?vperm $out6,$out6,$out6,$inpperm
1181 le?vperm $out7,$out7,$out7,$inpperm
1182 stvx_u $out6,$x00,$out
1183 stvx_u $out7,$x10,$out
1184 addi $out,$out,0x20
1185 b Lcbc_dec8x_done
1186
1187.align 5
1188Lcbc_dec8x_one:
1189 vncipherlast $out7,$out7,$ivec
1190 vmr $ivec,$in7
1191
1192 le?vperm $out7,$out7,$out7,$inpperm
1193 stvx_u $out7,0,$out
1194 addi $out,$out,0x10
1195
1196Lcbc_dec8x_done:
1197 le?vperm $ivec,$ivec,$ivec,$inpperm
1198 stvx_u $ivec,0,$ivp # write [unaligned] iv
1199
1200 li r10,`$FRAME+15`
1201 li r11,`$FRAME+31`
1202 stvx $inpperm,r10,$sp # wipe copies of round keys
1203 addi r10,r10,32
1204 stvx $inpperm,r11,$sp
1205 addi r11,r11,32
1206 stvx $inpperm,r10,$sp
1207 addi r10,r10,32
1208 stvx $inpperm,r11,$sp
1209 addi r11,r11,32
1210 stvx $inpperm,r10,$sp
1211 addi r10,r10,32
1212 stvx $inpperm,r11,$sp
1213 addi r11,r11,32
1214 stvx $inpperm,r10,$sp
1215 addi r10,r10,32
1216 stvx $inpperm,r11,$sp
1217 addi r11,r11,32
1218
1219 mtspr 256,$vrsave
1220 lvx v20,r10,$sp # ABI says so
1221 addi r10,r10,32
1222 lvx v21,r11,$sp
1223 addi r11,r11,32
1224 lvx v22,r10,$sp
1225 addi r10,r10,32
1226 lvx v23,r11,$sp
1227 addi r11,r11,32
1228 lvx v24,r10,$sp
1229 addi r10,r10,32
1230 lvx v25,r11,$sp
1231 addi r11,r11,32
1232 lvx v26,r10,$sp
1233 addi r10,r10,32
1234 lvx v27,r11,$sp
1235 addi r11,r11,32
1236 lvx v28,r10,$sp
1237 addi r10,r10,32
1238 lvx v29,r11,$sp
1239 addi r11,r11,32
1240 lvx v30,r10,$sp
1241 lvx v31,r11,$sp
1242 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1243 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1244 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1245 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1246 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1247 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1248 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1249 blr
1250 .long 0
1251 .byte 0,12,0x04,0,0x80,6,6,0
1252 .long 0
1253.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1254___
1255}} }}}
1256
1257#########################################################################
1258{{{ # CTR procedure[s] #
1259my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1260my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1261my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1262 map("v$_",(4..11));
1263my $dat=$tmp;
1264
1265$code.=<<___;
1266.globl .${prefix}_ctr32_encrypt_blocks
1267.align 5
1268.${prefix}_ctr32_encrypt_blocks:
1269 ${UCMP}i $len,1
1270 bltlr-
1271
1272 lis r0,0xfff0
1273 mfspr $vrsave,256
1274 mtspr 256,r0
1275
1276 li $idx,15
1277 vxor $rndkey0,$rndkey0,$rndkey0
1278 le?vspltisb $tmp,0x0f
1279
1280 lvx $ivec,0,$ivp # load [unaligned] iv
1281 lvsl $inpperm,0,$ivp
1282 lvx $inptail,$idx,$ivp
1283 vspltisb $one,1
1284 le?vxor $inpperm,$inpperm,$tmp
1285 vperm $ivec,$ivec,$inptail,$inpperm
1286 vsldoi $one,$rndkey0,$one,1
1287
1288 neg r11,$inp
1289 ?lvsl $keyperm,0,$key # prepare for unaligned key
1290 lwz $rounds,240($key)
1291
1292 lvsr $inpperm,0,r11 # prepare for unaligned load
1293 lvx $inptail,0,$inp
1294 addi $inp,$inp,15 # 15 is not typo
1295 le?vxor $inpperm,$inpperm,$tmp
1296
1297 srwi $rounds,$rounds,1
1298 li $idx,16
1299 subi $rounds,$rounds,1
1300
1301 ${UCMP}i $len,8
1302 bge _aesp8_ctr32_encrypt8x
1303
1304 ?lvsr $outperm,0,$out # prepare for unaligned store
1305 vspltisb $outmask,-1
1306 lvx $outhead,0,$out
1307 ?vperm $outmask,$rndkey0,$outmask,$outperm
1308 le?vxor $outperm,$outperm,$tmp
1309
1310 lvx $rndkey0,0,$key
1311 mtctr $rounds
1312 lvx $rndkey1,$idx,$key
1313 addi $idx,$idx,16
1314 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1315 vxor $inout,$ivec,$rndkey0
1316 lvx $rndkey0,$idx,$key
1317 addi $idx,$idx,16
1318 b Loop_ctr32_enc
1319
1320.align 5
1321Loop_ctr32_enc:
1322 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1323 vcipher $inout,$inout,$rndkey1
1324 lvx $rndkey1,$idx,$key
1325 addi $idx,$idx,16
1326 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1327 vcipher $inout,$inout,$rndkey0
1328 lvx $rndkey0,$idx,$key
1329 addi $idx,$idx,16
1330 bdnz Loop_ctr32_enc
1331
1332 vadduwm $ivec,$ivec,$one
1333 vmr $dat,$inptail
1334 lvx $inptail,0,$inp
1335 addi $inp,$inp,16
1336 subic. $len,$len,1 # blocks--
1337
1338 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1339 vcipher $inout,$inout,$rndkey1
1340 lvx $rndkey1,$idx,$key
1341 vperm $dat,$dat,$inptail,$inpperm
1342 li $idx,16
1343 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1344 lvx $rndkey0,0,$key
1345 vxor $dat,$dat,$rndkey1 # last round key
1346 vcipherlast $inout,$inout,$dat
1347
1348 lvx $rndkey1,$idx,$key
1349 addi $idx,$idx,16
1350 vperm $inout,$inout,$inout,$outperm
1351 vsel $dat,$outhead,$inout,$outmask
1352 mtctr $rounds
1353 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1354 vmr $outhead,$inout
1355 vxor $inout,$ivec,$rndkey0
1356 lvx $rndkey0,$idx,$key
1357 addi $idx,$idx,16
1358 stvx $dat,0,$out
1359 addi $out,$out,16
1360 bne Loop_ctr32_enc
1361
1362 addi $out,$out,-1
1363 lvx $inout,0,$out # redundant in aligned case
1364 vsel $inout,$outhead,$inout,$outmask
1365 stvx $inout,0,$out
1366
1367 mtspr 256,$vrsave
1368 blr
1369 .long 0
1370 .byte 0,12,0x14,0,0,0,6,0
1371 .long 0
1372___
1373#########################################################################
1374{{ # Optimized CTR procedure #
1375my $key_="r11";
1376my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1377 $x00=0 if ($flavour =~ /osx/);
1378my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1379my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1380my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1381 # v26-v31 last 6 round keys
1382my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1383my ($two,$three,$four)=($outhead,$outperm,$outmask);
1384
1385$code.=<<___;
1386.align 5
1387_aesp8_ctr32_encrypt8x:
1388 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1389 li r10,`$FRAME+8*16+15`
1390 li r11,`$FRAME+8*16+31`
1391 stvx v20,r10,$sp # ABI says so
1392 addi r10,r10,32
1393 stvx v21,r11,$sp
1394 addi r11,r11,32
1395 stvx v22,r10,$sp
1396 addi r10,r10,32
1397 stvx v23,r11,$sp
1398 addi r11,r11,32
1399 stvx v24,r10,$sp
1400 addi r10,r10,32
1401 stvx v25,r11,$sp
1402 addi r11,r11,32
1403 stvx v26,r10,$sp
1404 addi r10,r10,32
1405 stvx v27,r11,$sp
1406 addi r11,r11,32
1407 stvx v28,r10,$sp
1408 addi r10,r10,32
1409 stvx v29,r11,$sp
1410 addi r11,r11,32
1411 stvx v30,r10,$sp
1412 stvx v31,r11,$sp
1413 li r0,-1
1414 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1415 li $x10,0x10
1416 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1417 li $x20,0x20
1418 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1419 li $x30,0x30
1420 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1421 li $x40,0x40
1422 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1423 li $x50,0x50
1424 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1425 li $x60,0x60
1426 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1427 li $x70,0x70
1428 mtspr 256,r0
1429
1430 subi $rounds,$rounds,3 # -4 in total
1431
1432 lvx $rndkey0,$x00,$key # load key schedule
1433 lvx v30,$x10,$key
1434 addi $key,$key,0x20
1435 lvx v31,$x00,$key
1436 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1437 addi $key_,$sp,$FRAME+15
1438 mtctr $rounds
1439
1440Load_ctr32_enc_key:
1441 ?vperm v24,v30,v31,$keyperm
1442 lvx v30,$x10,$key
1443 addi $key,$key,0x20
1444 stvx v24,$x00,$key_ # off-load round[1]
1445 ?vperm v25,v31,v30,$keyperm
1446 lvx v31,$x00,$key
1447 stvx v25,$x10,$key_ # off-load round[2]
1448 addi $key_,$key_,0x20
1449 bdnz Load_ctr32_enc_key
1450
1451 lvx v26,$x10,$key
1452 ?vperm v24,v30,v31,$keyperm
1453 lvx v27,$x20,$key
1454 stvx v24,$x00,$key_ # off-load round[3]
1455 ?vperm v25,v31,v26,$keyperm
1456 lvx v28,$x30,$key
1457 stvx v25,$x10,$key_ # off-load round[4]
1458 addi $key_,$sp,$FRAME+15 # rewind $key_
1459 ?vperm v26,v26,v27,$keyperm
1460 lvx v29,$x40,$key
1461 ?vperm v27,v27,v28,$keyperm
1462 lvx v30,$x50,$key
1463 ?vperm v28,v28,v29,$keyperm
1464 lvx v31,$x60,$key
1465 ?vperm v29,v29,v30,$keyperm
1466 lvx $out0,$x70,$key # borrow $out0
1467 ?vperm v30,v30,v31,$keyperm
1468 lvx v24,$x00,$key_ # pre-load round[1]
1469 ?vperm v31,v31,$out0,$keyperm
1470 lvx v25,$x10,$key_ # pre-load round[2]
1471
1472 vadduwm $two,$one,$one
1473 subi $inp,$inp,15 # undo "caller"
1474 $SHL $len,$len,4
1475
1476 vadduwm $out1,$ivec,$one # counter values ...
1477 vadduwm $out2,$ivec,$two
1478 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1479 le?li $idx,8
1480 vadduwm $out3,$out1,$two
1481 vxor $out1,$out1,$rndkey0
1482 le?lvsl $inpperm,0,$idx
1483 vadduwm $out4,$out2,$two
1484 vxor $out2,$out2,$rndkey0
1485 le?vspltisb $tmp,0x0f
1486 vadduwm $out5,$out3,$two
1487 vxor $out3,$out3,$rndkey0
1488 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1489 vadduwm $out6,$out4,$two
1490 vxor $out4,$out4,$rndkey0
1491 vadduwm $out7,$out5,$two
1492 vxor $out5,$out5,$rndkey0
1493 vadduwm $ivec,$out6,$two # next counter value
1494 vxor $out6,$out6,$rndkey0
1495 vxor $out7,$out7,$rndkey0
1496
1497 mtctr $rounds
1498 b Loop_ctr32_enc8x
1499.align 5
1500Loop_ctr32_enc8x:
1501 vcipher $out0,$out0,v24
1502 vcipher $out1,$out1,v24
1503 vcipher $out2,$out2,v24
1504 vcipher $out3,$out3,v24
1505 vcipher $out4,$out4,v24
1506 vcipher $out5,$out5,v24
1507 vcipher $out6,$out6,v24
1508 vcipher $out7,$out7,v24
1509Loop_ctr32_enc8x_middle:
1510 lvx v24,$x20,$key_ # round[3]
1511 addi $key_,$key_,0x20
1512
1513 vcipher $out0,$out0,v25
1514 vcipher $out1,$out1,v25
1515 vcipher $out2,$out2,v25
1516 vcipher $out3,$out3,v25
1517 vcipher $out4,$out4,v25
1518 vcipher $out5,$out5,v25
1519 vcipher $out6,$out6,v25
1520 vcipher $out7,$out7,v25
1521 lvx v25,$x10,$key_ # round[4]
1522 bdnz Loop_ctr32_enc8x
1523
1524 subic r11,$len,256 # $len-256, borrow $key_
1525 vcipher $out0,$out0,v24
1526 vcipher $out1,$out1,v24
1527 vcipher $out2,$out2,v24
1528 vcipher $out3,$out3,v24
1529 vcipher $out4,$out4,v24
1530 vcipher $out5,$out5,v24
1531 vcipher $out6,$out6,v24
1532 vcipher $out7,$out7,v24
1533
1534 subfe r0,r0,r0 # borrow?-1:0
1535 vcipher $out0,$out0,v25
1536 vcipher $out1,$out1,v25
1537 vcipher $out2,$out2,v25
1538 vcipher $out3,$out3,v25
1539 vcipher $out4,$out4,v25
1540 vcipher $out5,$out5,v25
1541 vcipher $out6,$out6,v25
1542 vcipher $out7,$out7,v25
1543
1544 and r0,r0,r11
1545 addi $key_,$sp,$FRAME+15 # rewind $key_
1546 vcipher $out0,$out0,v26
1547 vcipher $out1,$out1,v26
1548 vcipher $out2,$out2,v26
1549 vcipher $out3,$out3,v26
1550 vcipher $out4,$out4,v26
1551 vcipher $out5,$out5,v26
1552 vcipher $out6,$out6,v26
1553 vcipher $out7,$out7,v26
1554 lvx v24,$x00,$key_ # re-pre-load round[1]
1555
1556 subic $len,$len,129 # $len-=129
1557 vcipher $out0,$out0,v27
1558 addi $len,$len,1 # $len-=128 really
1559 vcipher $out1,$out1,v27
1560 vcipher $out2,$out2,v27
1561 vcipher $out3,$out3,v27
1562 vcipher $out4,$out4,v27
1563 vcipher $out5,$out5,v27
1564 vcipher $out6,$out6,v27
1565 vcipher $out7,$out7,v27
1566 lvx v25,$x10,$key_ # re-pre-load round[2]
1567
1568 vcipher $out0,$out0,v28
1569 lvx_u $in0,$x00,$inp # load input
1570 vcipher $out1,$out1,v28
1571 lvx_u $in1,$x10,$inp
1572 vcipher $out2,$out2,v28
1573 lvx_u $in2,$x20,$inp
1574 vcipher $out3,$out3,v28
1575 lvx_u $in3,$x30,$inp
1576 vcipher $out4,$out4,v28
1577 lvx_u $in4,$x40,$inp
1578 vcipher $out5,$out5,v28
1579 lvx_u $in5,$x50,$inp
1580 vcipher $out6,$out6,v28
1581 lvx_u $in6,$x60,$inp
1582 vcipher $out7,$out7,v28
1583 lvx_u $in7,$x70,$inp
1584 addi $inp,$inp,0x80
1585
1586 vcipher $out0,$out0,v29
1587 le?vperm $in0,$in0,$in0,$inpperm
1588 vcipher $out1,$out1,v29
1589 le?vperm $in1,$in1,$in1,$inpperm
1590 vcipher $out2,$out2,v29
1591 le?vperm $in2,$in2,$in2,$inpperm
1592 vcipher $out3,$out3,v29
1593 le?vperm $in3,$in3,$in3,$inpperm
1594 vcipher $out4,$out4,v29
1595 le?vperm $in4,$in4,$in4,$inpperm
1596 vcipher $out5,$out5,v29
1597 le?vperm $in5,$in5,$in5,$inpperm
1598 vcipher $out6,$out6,v29
1599 le?vperm $in6,$in6,$in6,$inpperm
1600 vcipher $out7,$out7,v29
1601 le?vperm $in7,$in7,$in7,$inpperm
1602
1603 add $inp,$inp,r0 # $inp is adjusted in such
1604 # way that at exit from the
1605 # loop inX-in7 are loaded
1606 # with last "words"
1607 subfe. r0,r0,r0 # borrow?-1:0
1608 vcipher $out0,$out0,v30
1609 vxor $in0,$in0,v31 # xor with last round key
1610 vcipher $out1,$out1,v30
1611 vxor $in1,$in1,v31
1612 vcipher $out2,$out2,v30
1613 vxor $in2,$in2,v31
1614 vcipher $out3,$out3,v30
1615 vxor $in3,$in3,v31
1616 vcipher $out4,$out4,v30
1617 vxor $in4,$in4,v31
1618 vcipher $out5,$out5,v30
1619 vxor $in5,$in5,v31
1620 vcipher $out6,$out6,v30
1621 vxor $in6,$in6,v31
1622 vcipher $out7,$out7,v30
1623 vxor $in7,$in7,v31
1624
1625 bne Lctr32_enc8x_break # did $len-129 borrow?
1626
1627 vcipherlast $in0,$out0,$in0
1628 vcipherlast $in1,$out1,$in1
1629 vadduwm $out1,$ivec,$one # counter values ...
1630 vcipherlast $in2,$out2,$in2
1631 vadduwm $out2,$ivec,$two
1632 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1633 vcipherlast $in3,$out3,$in3
1634 vadduwm $out3,$out1,$two
1635 vxor $out1,$out1,$rndkey0
1636 vcipherlast $in4,$out4,$in4
1637 vadduwm $out4,$out2,$two
1638 vxor $out2,$out2,$rndkey0
1639 vcipherlast $in5,$out5,$in5
1640 vadduwm $out5,$out3,$two
1641 vxor $out3,$out3,$rndkey0
1642 vcipherlast $in6,$out6,$in6
1643 vadduwm $out6,$out4,$two
1644 vxor $out4,$out4,$rndkey0
1645 vcipherlast $in7,$out7,$in7
1646 vadduwm $out7,$out5,$two
1647 vxor $out5,$out5,$rndkey0
1648 le?vperm $in0,$in0,$in0,$inpperm
1649 vadduwm $ivec,$out6,$two # next counter value
1650 vxor $out6,$out6,$rndkey0
1651 le?vperm $in1,$in1,$in1,$inpperm
1652 vxor $out7,$out7,$rndkey0
1653 mtctr $rounds
1654
1655 vcipher $out0,$out0,v24
1656 stvx_u $in0,$x00,$out
1657 le?vperm $in2,$in2,$in2,$inpperm
1658 vcipher $out1,$out1,v24
1659 stvx_u $in1,$x10,$out
1660 le?vperm $in3,$in3,$in3,$inpperm
1661 vcipher $out2,$out2,v24
1662 stvx_u $in2,$x20,$out
1663 le?vperm $in4,$in4,$in4,$inpperm
1664 vcipher $out3,$out3,v24
1665 stvx_u $in3,$x30,$out
1666 le?vperm $in5,$in5,$in5,$inpperm
1667 vcipher $out4,$out4,v24
1668 stvx_u $in4,$x40,$out
1669 le?vperm $in6,$in6,$in6,$inpperm
1670 vcipher $out5,$out5,v24
1671 stvx_u $in5,$x50,$out
1672 le?vperm $in7,$in7,$in7,$inpperm
1673 vcipher $out6,$out6,v24
1674 stvx_u $in6,$x60,$out
1675 vcipher $out7,$out7,v24
1676 stvx_u $in7,$x70,$out
1677 addi $out,$out,0x80
1678
1679 b Loop_ctr32_enc8x_middle
1680
1681.align 5
1682Lctr32_enc8x_break:
1683 cmpwi $len,-0x60
1684 blt Lctr32_enc8x_one
1685 nop
1686 beq Lctr32_enc8x_two
1687 cmpwi $len,-0x40
1688 blt Lctr32_enc8x_three
1689 nop
1690 beq Lctr32_enc8x_four
1691 cmpwi $len,-0x20
1692 blt Lctr32_enc8x_five
1693 nop
1694 beq Lctr32_enc8x_six
1695 cmpwi $len,0x00
1696 blt Lctr32_enc8x_seven
1697
1698Lctr32_enc8x_eight:
1699 vcipherlast $out0,$out0,$in0
1700 vcipherlast $out1,$out1,$in1
1701 vcipherlast $out2,$out2,$in2
1702 vcipherlast $out3,$out3,$in3
1703 vcipherlast $out4,$out4,$in4
1704 vcipherlast $out5,$out5,$in5
1705 vcipherlast $out6,$out6,$in6
1706 vcipherlast $out7,$out7,$in7
1707
1708 le?vperm $out0,$out0,$out0,$inpperm
1709 le?vperm $out1,$out1,$out1,$inpperm
1710 stvx_u $out0,$x00,$out
1711 le?vperm $out2,$out2,$out2,$inpperm
1712 stvx_u $out1,$x10,$out
1713 le?vperm $out3,$out3,$out3,$inpperm
1714 stvx_u $out2,$x20,$out
1715 le?vperm $out4,$out4,$out4,$inpperm
1716 stvx_u $out3,$x30,$out
1717 le?vperm $out5,$out5,$out5,$inpperm
1718 stvx_u $out4,$x40,$out
1719 le?vperm $out6,$out6,$out6,$inpperm
1720 stvx_u $out5,$x50,$out
1721 le?vperm $out7,$out7,$out7,$inpperm
1722 stvx_u $out6,$x60,$out
1723 stvx_u $out7,$x70,$out
1724 addi $out,$out,0x80
1725 b Lctr32_enc8x_done
1726
1727.align 5
1728Lctr32_enc8x_seven:
1729 vcipherlast $out0,$out0,$in1
1730 vcipherlast $out1,$out1,$in2
1731 vcipherlast $out2,$out2,$in3
1732 vcipherlast $out3,$out3,$in4
1733 vcipherlast $out4,$out4,$in5
1734 vcipherlast $out5,$out5,$in6
1735 vcipherlast $out6,$out6,$in7
1736
1737 le?vperm $out0,$out0,$out0,$inpperm
1738 le?vperm $out1,$out1,$out1,$inpperm
1739 stvx_u $out0,$x00,$out
1740 le?vperm $out2,$out2,$out2,$inpperm
1741 stvx_u $out1,$x10,$out
1742 le?vperm $out3,$out3,$out3,$inpperm
1743 stvx_u $out2,$x20,$out
1744 le?vperm $out4,$out4,$out4,$inpperm
1745 stvx_u $out3,$x30,$out
1746 le?vperm $out5,$out5,$out5,$inpperm
1747 stvx_u $out4,$x40,$out
1748 le?vperm $out6,$out6,$out6,$inpperm
1749 stvx_u $out5,$x50,$out
1750 stvx_u $out6,$x60,$out
1751 addi $out,$out,0x70
1752 b Lctr32_enc8x_done
1753
1754.align 5
1755Lctr32_enc8x_six:
1756 vcipherlast $out0,$out0,$in2
1757 vcipherlast $out1,$out1,$in3
1758 vcipherlast $out2,$out2,$in4
1759 vcipherlast $out3,$out3,$in5
1760 vcipherlast $out4,$out4,$in6
1761 vcipherlast $out5,$out5,$in7
1762
1763 le?vperm $out0,$out0,$out0,$inpperm
1764 le?vperm $out1,$out1,$out1,$inpperm
1765 stvx_u $out0,$x00,$out
1766 le?vperm $out2,$out2,$out2,$inpperm
1767 stvx_u $out1,$x10,$out
1768 le?vperm $out3,$out3,$out3,$inpperm
1769 stvx_u $out2,$x20,$out
1770 le?vperm $out4,$out4,$out4,$inpperm
1771 stvx_u $out3,$x30,$out
1772 le?vperm $out5,$out5,$out5,$inpperm
1773 stvx_u $out4,$x40,$out
1774 stvx_u $out5,$x50,$out
1775 addi $out,$out,0x60
1776 b Lctr32_enc8x_done
1777
1778.align 5
1779Lctr32_enc8x_five:
1780 vcipherlast $out0,$out0,$in3
1781 vcipherlast $out1,$out1,$in4
1782 vcipherlast $out2,$out2,$in5
1783 vcipherlast $out3,$out3,$in6
1784 vcipherlast $out4,$out4,$in7
1785
1786 le?vperm $out0,$out0,$out0,$inpperm
1787 le?vperm $out1,$out1,$out1,$inpperm
1788 stvx_u $out0,$x00,$out
1789 le?vperm $out2,$out2,$out2,$inpperm
1790 stvx_u $out1,$x10,$out
1791 le?vperm $out3,$out3,$out3,$inpperm
1792 stvx_u $out2,$x20,$out
1793 le?vperm $out4,$out4,$out4,$inpperm
1794 stvx_u $out3,$x30,$out
1795 stvx_u $out4,$x40,$out
1796 addi $out,$out,0x50
1797 b Lctr32_enc8x_done
1798
1799.align 5
1800Lctr32_enc8x_four:
1801 vcipherlast $out0,$out0,$in4
1802 vcipherlast $out1,$out1,$in5
1803 vcipherlast $out2,$out2,$in6
1804 vcipherlast $out3,$out3,$in7
1805
1806 le?vperm $out0,$out0,$out0,$inpperm
1807 le?vperm $out1,$out1,$out1,$inpperm
1808 stvx_u $out0,$x00,$out
1809 le?vperm $out2,$out2,$out2,$inpperm
1810 stvx_u $out1,$x10,$out
1811 le?vperm $out3,$out3,$out3,$inpperm
1812 stvx_u $out2,$x20,$out
1813 stvx_u $out3,$x30,$out
1814 addi $out,$out,0x40
1815 b Lctr32_enc8x_done
1816
1817.align 5
1818Lctr32_enc8x_three:
1819 vcipherlast $out0,$out0,$in5
1820 vcipherlast $out1,$out1,$in6
1821 vcipherlast $out2,$out2,$in7
1822
1823 le?vperm $out0,$out0,$out0,$inpperm
1824 le?vperm $out1,$out1,$out1,$inpperm
1825 stvx_u $out0,$x00,$out
1826 le?vperm $out2,$out2,$out2,$inpperm
1827 stvx_u $out1,$x10,$out
1828 stvx_u $out2,$x20,$out
1829 addi $out,$out,0x30
1830 b Lcbc_dec8x_done
1831
1832.align 5
1833Lctr32_enc8x_two:
1834 vcipherlast $out0,$out0,$in6
1835 vcipherlast $out1,$out1,$in7
1836
1837 le?vperm $out0,$out0,$out0,$inpperm
1838 le?vperm $out1,$out1,$out1,$inpperm
1839 stvx_u $out0,$x00,$out
1840 stvx_u $out1,$x10,$out
1841 addi $out,$out,0x20
1842 b Lcbc_dec8x_done
1843
1844.align 5
1845Lctr32_enc8x_one:
1846 vcipherlast $out0,$out0,$in7
1847
1848 le?vperm $out0,$out0,$out0,$inpperm
1849 stvx_u $out0,0,$out
1850 addi $out,$out,0x10
1851
1852Lctr32_enc8x_done:
1853 li r10,`$FRAME+15`
1854 li r11,`$FRAME+31`
1855 stvx $inpperm,r10,$sp # wipe copies of round keys
1856 addi r10,r10,32
1857 stvx $inpperm,r11,$sp
1858 addi r11,r11,32
1859 stvx $inpperm,r10,$sp
1860 addi r10,r10,32
1861 stvx $inpperm,r11,$sp
1862 addi r11,r11,32
1863 stvx $inpperm,r10,$sp
1864 addi r10,r10,32
1865 stvx $inpperm,r11,$sp
1866 addi r11,r11,32
1867 stvx $inpperm,r10,$sp
1868 addi r10,r10,32
1869 stvx $inpperm,r11,$sp
1870 addi r11,r11,32
1871
1872 mtspr 256,$vrsave
1873 lvx v20,r10,$sp # ABI says so
1874 addi r10,r10,32
1875 lvx v21,r11,$sp
1876 addi r11,r11,32
1877 lvx v22,r10,$sp
1878 addi r10,r10,32
1879 lvx v23,r11,$sp
1880 addi r11,r11,32
1881 lvx v24,r10,$sp
1882 addi r10,r10,32
1883 lvx v25,r11,$sp
1884 addi r11,r11,32
1885 lvx v26,r10,$sp
1886 addi r10,r10,32
1887 lvx v27,r11,$sp
1888 addi r11,r11,32
1889 lvx v28,r10,$sp
1890 addi r10,r10,32
1891 lvx v29,r11,$sp
1892 addi r11,r11,32
1893 lvx v30,r10,$sp
1894 lvx v31,r11,$sp
1895 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1896 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1897 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1898 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1899 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1900 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1901 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1902 blr
1903 .long 0
1904 .byte 0,12,0x04,0,0x80,6,6,0
1905 .long 0
1906.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1907___
1908}} }}}
1909
1910#########################################################################
1911{{{ # XTS procedures #
1912# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1913# const AES_KEY *key1, const AES_KEY *key2, #
1914# [const] unsigned char iv[16]); #
1915# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1916# input tweak value is assumed to be encrypted already, and last tweak #
1917# value, one suitable for consecutive call on same chunk of data, is #
1918# written back to original buffer. In addition, in "tweak chaining" #
1919# mode only complete input blocks are processed. #
1920
1921my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1922my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1923my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1924my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1925my $taillen = $key2;
1926
1927 ($inp,$idx) = ($idx,$inp); # reassign
1928
1929$code.=<<___;
1930.globl .${prefix}_xts_encrypt
1931.align 5
1932.${prefix}_xts_encrypt:
1933 mr $inp,r3 # reassign
1934 li r3,-1
1935 ${UCMP}i $len,16
1936 bltlr-
1937
1938 lis r0,0xfff0
1939 mfspr r12,256 # save vrsave
1940 li r11,0
1941 mtspr 256,r0
1942
1943 vspltisb $seven,0x07 # 0x070707..07
1944 le?lvsl $leperm,r11,r11
1945 le?vspltisb $tmp,0x0f
1946 le?vxor $leperm,$leperm,$seven
1947
1948 li $idx,15
1949 lvx $tweak,0,$ivp # load [unaligned] iv
1950 lvsl $inpperm,0,$ivp
1951 lvx $inptail,$idx,$ivp
1952 le?vxor $inpperm,$inpperm,$tmp
1953 vperm $tweak,$tweak,$inptail,$inpperm
1954
1955 neg r11,$inp
1956 lvsr $inpperm,0,r11 # prepare for unaligned load
1957 lvx $inout,0,$inp
1958 addi $inp,$inp,15 # 15 is not typo
1959 le?vxor $inpperm,$inpperm,$tmp
1960
1961 ${UCMP}i $key2,0 # key2==NULL?
1962 beq Lxts_enc_no_key2
1963
1964 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
1965 lwz $rounds,240($key2)
1966 srwi $rounds,$rounds,1
1967 subi $rounds,$rounds,1
1968 li $idx,16
1969
1970 lvx $rndkey0,0,$key2
1971 lvx $rndkey1,$idx,$key2
1972 addi $idx,$idx,16
1973 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1974 vxor $tweak,$tweak,$rndkey0
1975 lvx $rndkey0,$idx,$key2
1976 addi $idx,$idx,16
1977 mtctr $rounds
1978
1979Ltweak_xts_enc:
1980 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1981 vcipher $tweak,$tweak,$rndkey1
1982 lvx $rndkey1,$idx,$key2
1983 addi $idx,$idx,16
1984 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1985 vcipher $tweak,$tweak,$rndkey0
1986 lvx $rndkey0,$idx,$key2
1987 addi $idx,$idx,16
1988 bdnz Ltweak_xts_enc
1989
1990 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1991 vcipher $tweak,$tweak,$rndkey1
1992 lvx $rndkey1,$idx,$key2
1993 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1994 vcipherlast $tweak,$tweak,$rndkey0
1995
1996 li $ivp,0 # don't chain the tweak
1997 b Lxts_enc
1998
1999Lxts_enc_no_key2:
2000 li $idx,-16
2001 and $len,$len,$idx # in "tweak chaining"
2002 # mode only complete
2003 # blocks are processed
2004Lxts_enc:
2005 lvx $inptail,0,$inp
2006 addi $inp,$inp,16
2007
2008 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2009 lwz $rounds,240($key1)
2010 srwi $rounds,$rounds,1
2011 subi $rounds,$rounds,1
2012 li $idx,16
2013
2014 vslb $eighty7,$seven,$seven # 0x808080..80
2015 vor $eighty7,$eighty7,$seven # 0x878787..87
2016 vspltisb $tmp,1 # 0x010101..01
2017 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2018
2019 ${UCMP}i $len,96
2020 bge _aesp8_xts_encrypt6x
2021
2022 andi. $taillen,$len,15
2023 subic r0,$len,32
2024 subi $taillen,$taillen,16
2025 subfe r0,r0,r0
2026 and r0,r0,$taillen
2027 add $inp,$inp,r0
2028
2029 lvx $rndkey0,0,$key1
2030 lvx $rndkey1,$idx,$key1
2031 addi $idx,$idx,16
2032 vperm $inout,$inout,$inptail,$inpperm
2033 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2034 vxor $inout,$inout,$tweak
2035 vxor $inout,$inout,$rndkey0
2036 lvx $rndkey0,$idx,$key1
2037 addi $idx,$idx,16
2038 mtctr $rounds
2039 b Loop_xts_enc
2040
2041.align 5
2042Loop_xts_enc:
2043 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2044 vcipher $inout,$inout,$rndkey1
2045 lvx $rndkey1,$idx,$key1
2046 addi $idx,$idx,16
2047 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2048 vcipher $inout,$inout,$rndkey0
2049 lvx $rndkey0,$idx,$key1
2050 addi $idx,$idx,16
2051 bdnz Loop_xts_enc
2052
2053 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2054 vcipher $inout,$inout,$rndkey1
2055 lvx $rndkey1,$idx,$key1
2056 li $idx,16
2057 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2058 vxor $rndkey0,$rndkey0,$tweak
2059 vcipherlast $output,$inout,$rndkey0
2060
2061 le?vperm $tmp,$output,$output,$leperm
2062 be?nop
2063 le?stvx_u $tmp,0,$out
2064 be?stvx_u $output,0,$out
2065 addi $out,$out,16
2066
2067 subic. $len,$len,16
2068 beq Lxts_enc_done
2069
2070 vmr $inout,$inptail
2071 lvx $inptail,0,$inp
2072 addi $inp,$inp,16
2073 lvx $rndkey0,0,$key1
2074 lvx $rndkey1,$idx,$key1
2075 addi $idx,$idx,16
2076
2077 subic r0,$len,32
2078 subfe r0,r0,r0
2079 and r0,r0,$taillen
2080 add $inp,$inp,r0
2081
2082 vsrab $tmp,$tweak,$seven # next tweak value
2083 vaddubm $tweak,$tweak,$tweak
2084 vsldoi $tmp,$tmp,$tmp,15
2085 vand $tmp,$tmp,$eighty7
2086 vxor $tweak,$tweak,$tmp
2087
2088 vperm $inout,$inout,$inptail,$inpperm
2089 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2090 vxor $inout,$inout,$tweak
2091 vxor $output,$output,$rndkey0 # just in case $len<16
2092 vxor $inout,$inout,$rndkey0
2093 lvx $rndkey0,$idx,$key1
2094 addi $idx,$idx,16
2095
2096 mtctr $rounds
2097 ${UCMP}i $len,16
2098 bge Loop_xts_enc
2099
2100 vxor $output,$output,$tweak
2101 lvsr $inpperm,0,$len # $inpperm is no longer needed
2102 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2103 vspltisb $tmp,-1
2104 vperm $inptail,$inptail,$tmp,$inpperm
2105 vsel $inout,$inout,$output,$inptail
2106
2107 subi r11,$out,17
2108 subi $out,$out,16
2109 mtctr $len
2110 li $len,16
2111Loop_xts_enc_steal:
2112 lbzu r0,1(r11)
2113 stb r0,16(r11)
2114 bdnz Loop_xts_enc_steal
2115
2116 mtctr $rounds
2117 b Loop_xts_enc # one more time...
2118
2119Lxts_enc_done:
2120 ${UCMP}i $ivp,0
2121 beq Lxts_enc_ret
2122
2123 vsrab $tmp,$tweak,$seven # next tweak value
2124 vaddubm $tweak,$tweak,$tweak
2125 vsldoi $tmp,$tmp,$tmp,15
2126 vand $tmp,$tmp,$eighty7
2127 vxor $tweak,$tweak,$tmp
2128
2129 le?vperm $tweak,$tweak,$tweak,$leperm
2130 stvx_u $tweak,0,$ivp
2131
2132Lxts_enc_ret:
2133 mtspr 256,r12 # restore vrsave
2134 li r3,0
2135 blr
2136 .long 0
2137 .byte 0,12,0x04,0,0x80,6,6,0
2138 .long 0
2139.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2140
2141.globl .${prefix}_xts_decrypt
2142.align 5
2143.${prefix}_xts_decrypt:
2144 mr $inp,r3 # reassign
2145 li r3,-1
2146 ${UCMP}i $len,16
2147 bltlr-
2148
2149 lis r0,0xfff8
2150 mfspr r12,256 # save vrsave
2151 li r11,0
2152 mtspr 256,r0
2153
2154 andi. r0,$len,15
2155 neg r0,r0
2156 andi. r0,r0,16
2157 sub $len,$len,r0
2158
2159 vspltisb $seven,0x07 # 0x070707..07
2160 le?lvsl $leperm,r11,r11
2161 le?vspltisb $tmp,0x0f
2162 le?vxor $leperm,$leperm,$seven
2163
2164 li $idx,15
2165 lvx $tweak,0,$ivp # load [unaligned] iv
2166 lvsl $inpperm,0,$ivp
2167 lvx $inptail,$idx,$ivp
2168 le?vxor $inpperm,$inpperm,$tmp
2169 vperm $tweak,$tweak,$inptail,$inpperm
2170
2171 neg r11,$inp
2172 lvsr $inpperm,0,r11 # prepare for unaligned load
2173 lvx $inout,0,$inp
2174 addi $inp,$inp,15 # 15 is not typo
2175 le?vxor $inpperm,$inpperm,$tmp
2176
2177 ${UCMP}i $key2,0 # key2==NULL?
2178 beq Lxts_dec_no_key2
2179
2180 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2181 lwz $rounds,240($key2)
2182 srwi $rounds,$rounds,1
2183 subi $rounds,$rounds,1
2184 li $idx,16
2185
2186 lvx $rndkey0,0,$key2
2187 lvx $rndkey1,$idx,$key2
2188 addi $idx,$idx,16
2189 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2190 vxor $tweak,$tweak,$rndkey0
2191 lvx $rndkey0,$idx,$key2
2192 addi $idx,$idx,16
2193 mtctr $rounds
2194
2195Ltweak_xts_dec:
2196 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2197 vcipher $tweak,$tweak,$rndkey1
2198 lvx $rndkey1,$idx,$key2
2199 addi $idx,$idx,16
2200 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2201 vcipher $tweak,$tweak,$rndkey0
2202 lvx $rndkey0,$idx,$key2
2203 addi $idx,$idx,16
2204 bdnz Ltweak_xts_dec
2205
2206 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2207 vcipher $tweak,$tweak,$rndkey1
2208 lvx $rndkey1,$idx,$key2
2209 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2210 vcipherlast $tweak,$tweak,$rndkey0
2211
2212 li $ivp,0 # don't chain the tweak
2213 b Lxts_dec
2214
2215Lxts_dec_no_key2:
2216 neg $idx,$len
2217 andi. $idx,$idx,15
2218 add $len,$len,$idx # in "tweak chaining"
2219 # mode only complete
2220 # blocks are processed
2221Lxts_dec:
2222 lvx $inptail,0,$inp
2223 addi $inp,$inp,16
2224
2225 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2226 lwz $rounds,240($key1)
2227 srwi $rounds,$rounds,1
2228 subi $rounds,$rounds,1
2229 li $idx,16
2230
2231 vslb $eighty7,$seven,$seven # 0x808080..80
2232 vor $eighty7,$eighty7,$seven # 0x878787..87
2233 vspltisb $tmp,1 # 0x010101..01
2234 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2235
2236 ${UCMP}i $len,96
2237 bge _aesp8_xts_decrypt6x
2238
2239 lvx $rndkey0,0,$key1
2240 lvx $rndkey1,$idx,$key1
2241 addi $idx,$idx,16
2242 vperm $inout,$inout,$inptail,$inpperm
2243 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2244 vxor $inout,$inout,$tweak
2245 vxor $inout,$inout,$rndkey0
2246 lvx $rndkey0,$idx,$key1
2247 addi $idx,$idx,16
2248 mtctr $rounds
2249
2250 ${UCMP}i $len,16
2251 blt Ltail_xts_dec
2252 be?b Loop_xts_dec
2253
2254.align 5
2255Loop_xts_dec:
2256 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2257 vncipher $inout,$inout,$rndkey1
2258 lvx $rndkey1,$idx,$key1
2259 addi $idx,$idx,16
2260 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2261 vncipher $inout,$inout,$rndkey0
2262 lvx $rndkey0,$idx,$key1
2263 addi $idx,$idx,16
2264 bdnz Loop_xts_dec
2265
2266 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2267 vncipher $inout,$inout,$rndkey1
2268 lvx $rndkey1,$idx,$key1
2269 li $idx,16
2270 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2271 vxor $rndkey0,$rndkey0,$tweak
2272 vncipherlast $output,$inout,$rndkey0
2273
2274 le?vperm $tmp,$output,$output,$leperm
2275 be?nop
2276 le?stvx_u $tmp,0,$out
2277 be?stvx_u $output,0,$out
2278 addi $out,$out,16
2279
2280 subic. $len,$len,16
2281 beq Lxts_dec_done
2282
2283 vmr $inout,$inptail
2284 lvx $inptail,0,$inp
2285 addi $inp,$inp,16
2286 lvx $rndkey0,0,$key1
2287 lvx $rndkey1,$idx,$key1
2288 addi $idx,$idx,16
2289
2290 vsrab $tmp,$tweak,$seven # next tweak value
2291 vaddubm $tweak,$tweak,$tweak
2292 vsldoi $tmp,$tmp,$tmp,15
2293 vand $tmp,$tmp,$eighty7
2294 vxor $tweak,$tweak,$tmp
2295
2296 vperm $inout,$inout,$inptail,$inpperm
2297 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2298 vxor $inout,$inout,$tweak
2299 vxor $inout,$inout,$rndkey0
2300 lvx $rndkey0,$idx,$key1
2301 addi $idx,$idx,16
2302
2303 mtctr $rounds
2304 ${UCMP}i $len,16
2305 bge Loop_xts_dec
2306
2307Ltail_xts_dec:
2308 vsrab $tmp,$tweak,$seven # next tweak value
2309 vaddubm $tweak1,$tweak,$tweak
2310 vsldoi $tmp,$tmp,$tmp,15
2311 vand $tmp,$tmp,$eighty7
2312 vxor $tweak1,$tweak1,$tmp
2313
2314 subi $inp,$inp,16
2315 add $inp,$inp,$len
2316
2317 vxor $inout,$inout,$tweak # :-(
2318 vxor $inout,$inout,$tweak1 # :-)
2319
2320Loop_xts_dec_short:
2321 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2322 vncipher $inout,$inout,$rndkey1
2323 lvx $rndkey1,$idx,$key1
2324 addi $idx,$idx,16
2325 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2326 vncipher $inout,$inout,$rndkey0
2327 lvx $rndkey0,$idx,$key1
2328 addi $idx,$idx,16
2329 bdnz Loop_xts_dec_short
2330
2331 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2332 vncipher $inout,$inout,$rndkey1
2333 lvx $rndkey1,$idx,$key1
2334 li $idx,16
2335 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2336 vxor $rndkey0,$rndkey0,$tweak1
2337 vncipherlast $output,$inout,$rndkey0
2338
2339 le?vperm $tmp,$output,$output,$leperm
2340 be?nop
2341 le?stvx_u $tmp,0,$out
2342 be?stvx_u $output,0,$out
2343
2344 vmr $inout,$inptail
2345 lvx $inptail,0,$inp
2346 #addi $inp,$inp,16
2347 lvx $rndkey0,0,$key1
2348 lvx $rndkey1,$idx,$key1
2349 addi $idx,$idx,16
2350 vperm $inout,$inout,$inptail,$inpperm
2351 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2352
2353 lvsr $inpperm,0,$len # $inpperm is no longer needed
2354 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2355 vspltisb $tmp,-1
2356 vperm $inptail,$inptail,$tmp,$inpperm
2357 vsel $inout,$inout,$output,$inptail
2358
2359 vxor $rndkey0,$rndkey0,$tweak
2360 vxor $inout,$inout,$rndkey0
2361 lvx $rndkey0,$idx,$key1
2362 addi $idx,$idx,16
2363
2364 subi r11,$out,1
2365 mtctr $len
2366 li $len,16
2367Loop_xts_dec_steal:
2368 lbzu r0,1(r11)
2369 stb r0,16(r11)
2370 bdnz Loop_xts_dec_steal
2371
2372 mtctr $rounds
2373 b Loop_xts_dec # one more time...
2374
2375Lxts_dec_done:
2376 ${UCMP}i $ivp,0
2377 beq Lxts_dec_ret
2378
2379 vsrab $tmp,$tweak,$seven # next tweak value
2380 vaddubm $tweak,$tweak,$tweak
2381 vsldoi $tmp,$tmp,$tmp,15
2382 vand $tmp,$tmp,$eighty7
2383 vxor $tweak,$tweak,$tmp
2384
2385 le?vperm $tweak,$tweak,$tweak,$leperm
2386 stvx_u $tweak,0,$ivp
2387
2388Lxts_dec_ret:
2389 mtspr 256,r12 # restore vrsave
2390 li r3,0
2391 blr
2392 .long 0
2393 .byte 0,12,0x04,0,0x80,6,6,0
2394 .long 0
2395.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2396___
2397#########################################################################
2398{{ # Optimized XTS procedures #
2399my $key_=$key2;
2400my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2401 $x00=0 if ($flavour =~ /osx/);
2402my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2403my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2404my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2405my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2406 # v26-v31 last 6 round keys
2407my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2408my $taillen=$x70;
2409
2410$code.=<<___;
2411.align 5
2412_aesp8_xts_encrypt6x:
2413 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2414 mflr r11
2415 li r7,`$FRAME+8*16+15`
2416 li r3,`$FRAME+8*16+31`
2417 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2418 stvx v20,r7,$sp # ABI says so
2419 addi r7,r7,32
2420 stvx v21,r3,$sp
2421 addi r3,r3,32
2422 stvx v22,r7,$sp
2423 addi r7,r7,32
2424 stvx v23,r3,$sp
2425 addi r3,r3,32
2426 stvx v24,r7,$sp
2427 addi r7,r7,32
2428 stvx v25,r3,$sp
2429 addi r3,r3,32
2430 stvx v26,r7,$sp
2431 addi r7,r7,32
2432 stvx v27,r3,$sp
2433 addi r3,r3,32
2434 stvx v28,r7,$sp
2435 addi r7,r7,32
2436 stvx v29,r3,$sp
2437 addi r3,r3,32
2438 stvx v30,r7,$sp
2439 stvx v31,r3,$sp
2440 li r0,-1
2441 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2442 li $x10,0x10
2443 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2444 li $x20,0x20
2445 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2446 li $x30,0x30
2447 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2448 li $x40,0x40
2449 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2450 li $x50,0x50
2451 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2452 li $x60,0x60
2453 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2454 li $x70,0x70
2455 mtspr 256,r0
2456
2457 subi $rounds,$rounds,3 # -4 in total
2458
2459 lvx $rndkey0,$x00,$key1 # load key schedule
2460 lvx v30,$x10,$key1
2461 addi $key1,$key1,0x20
2462 lvx v31,$x00,$key1
2463 ?vperm $rndkey0,$rndkey0,v30,$keyperm
2464 addi $key_,$sp,$FRAME+15
2465 mtctr $rounds
2466
2467Load_xts_enc_key:
2468 ?vperm v24,v30,v31,$keyperm
2469 lvx v30,$x10,$key1
2470 addi $key1,$key1,0x20
2471 stvx v24,$x00,$key_ # off-load round[1]
2472 ?vperm v25,v31,v30,$keyperm
2473 lvx v31,$x00,$key1
2474 stvx v25,$x10,$key_ # off-load round[2]
2475 addi $key_,$key_,0x20
2476 bdnz Load_xts_enc_key
2477
2478 lvx v26,$x10,$key1
2479 ?vperm v24,v30,v31,$keyperm
2480 lvx v27,$x20,$key1
2481 stvx v24,$x00,$key_ # off-load round[3]
2482 ?vperm v25,v31,v26,$keyperm
2483 lvx v28,$x30,$key1
2484 stvx v25,$x10,$key_ # off-load round[4]
2485 addi $key_,$sp,$FRAME+15 # rewind $key_
2486 ?vperm v26,v26,v27,$keyperm
2487 lvx v29,$x40,$key1
2488 ?vperm v27,v27,v28,$keyperm
2489 lvx v30,$x50,$key1
2490 ?vperm v28,v28,v29,$keyperm
2491 lvx v31,$x60,$key1
2492 ?vperm v29,v29,v30,$keyperm
2493 lvx $twk5,$x70,$key1 # borrow $twk5
2494 ?vperm v30,v30,v31,$keyperm
2495 lvx v24,$x00,$key_ # pre-load round[1]
2496 ?vperm v31,v31,$twk5,$keyperm
2497 lvx v25,$x10,$key_ # pre-load round[2]
2498
2499 vperm $in0,$inout,$inptail,$inpperm
2500 subi $inp,$inp,31 # undo "caller"
2501 vxor $twk0,$tweak,$rndkey0
2502 vsrab $tmp,$tweak,$seven # next tweak value
2503 vaddubm $tweak,$tweak,$tweak
2504 vsldoi $tmp,$tmp,$tmp,15
2505 vand $tmp,$tmp,$eighty7
2506 vxor $out0,$in0,$twk0
2507 vxor $tweak,$tweak,$tmp
2508
2509 lvx_u $in1,$x10,$inp
2510 vxor $twk1,$tweak,$rndkey0
2511 vsrab $tmp,$tweak,$seven # next tweak value
2512 vaddubm $tweak,$tweak,$tweak
2513 vsldoi $tmp,$tmp,$tmp,15
2514 le?vperm $in1,$in1,$in1,$leperm
2515 vand $tmp,$tmp,$eighty7
2516 vxor $out1,$in1,$twk1
2517 vxor $tweak,$tweak,$tmp
2518
2519 lvx_u $in2,$x20,$inp
2520 andi. $taillen,$len,15
2521 vxor $twk2,$tweak,$rndkey0
2522 vsrab $tmp,$tweak,$seven # next tweak value
2523 vaddubm $tweak,$tweak,$tweak
2524 vsldoi $tmp,$tmp,$tmp,15
2525 le?vperm $in2,$in2,$in2,$leperm
2526 vand $tmp,$tmp,$eighty7
2527 vxor $out2,$in2,$twk2
2528 vxor $tweak,$tweak,$tmp
2529
2530 lvx_u $in3,$x30,$inp
2531 sub $len,$len,$taillen
2532 vxor $twk3,$tweak,$rndkey0
2533 vsrab $tmp,$tweak,$seven # next tweak value
2534 vaddubm $tweak,$tweak,$tweak
2535 vsldoi $tmp,$tmp,$tmp,15
2536 le?vperm $in3,$in3,$in3,$leperm
2537 vand $tmp,$tmp,$eighty7
2538 vxor $out3,$in3,$twk3
2539 vxor $tweak,$tweak,$tmp
2540
2541 lvx_u $in4,$x40,$inp
2542 subi $len,$len,0x60
2543 vxor $twk4,$tweak,$rndkey0
2544 vsrab $tmp,$tweak,$seven # next tweak value
2545 vaddubm $tweak,$tweak,$tweak
2546 vsldoi $tmp,$tmp,$tmp,15
2547 le?vperm $in4,$in4,$in4,$leperm
2548 vand $tmp,$tmp,$eighty7
2549 vxor $out4,$in4,$twk4
2550 vxor $tweak,$tweak,$tmp
2551
2552 lvx_u $in5,$x50,$inp
2553 addi $inp,$inp,0x60
2554 vxor $twk5,$tweak,$rndkey0
2555 vsrab $tmp,$tweak,$seven # next tweak value
2556 vaddubm $tweak,$tweak,$tweak
2557 vsldoi $tmp,$tmp,$tmp,15
2558 le?vperm $in5,$in5,$in5,$leperm
2559 vand $tmp,$tmp,$eighty7
2560 vxor $out5,$in5,$twk5
2561 vxor $tweak,$tweak,$tmp
2562
2563 vxor v31,v31,$rndkey0
2564 mtctr $rounds
2565 b Loop_xts_enc6x
2566
2567.align 5
2568Loop_xts_enc6x:
2569 vcipher $out0,$out0,v24
2570 vcipher $out1,$out1,v24
2571 vcipher $out2,$out2,v24
2572 vcipher $out3,$out3,v24
2573 vcipher $out4,$out4,v24
2574 vcipher $out5,$out5,v24
2575 lvx v24,$x20,$key_ # round[3]
2576 addi $key_,$key_,0x20
2577
2578 vcipher $out0,$out0,v25
2579 vcipher $out1,$out1,v25
2580 vcipher $out2,$out2,v25
2581 vcipher $out3,$out3,v25
2582 vcipher $out4,$out4,v25
2583 vcipher $out5,$out5,v25
2584 lvx v25,$x10,$key_ # round[4]
2585 bdnz Loop_xts_enc6x
2586
2587 subic $len,$len,96 # $len-=96
2588 vxor $in0,$twk0,v31 # xor with last round key
2589 vcipher $out0,$out0,v24
2590 vcipher $out1,$out1,v24
2591 vsrab $tmp,$tweak,$seven # next tweak value
2592 vxor $twk0,$tweak,$rndkey0
2593 vaddubm $tweak,$tweak,$tweak
2594 vcipher $out2,$out2,v24
2595 vcipher $out3,$out3,v24
2596 vsldoi $tmp,$tmp,$tmp,15
2597 vcipher $out4,$out4,v24
2598 vcipher $out5,$out5,v24
2599
2600 subfe. r0,r0,r0 # borrow?-1:0
2601 vand $tmp,$tmp,$eighty7
2602 vcipher $out0,$out0,v25
2603 vcipher $out1,$out1,v25
2604 vxor $tweak,$tweak,$tmp
2605 vcipher $out2,$out2,v25
2606 vcipher $out3,$out3,v25
2607 vxor $in1,$twk1,v31
2608 vsrab $tmp,$tweak,$seven # next tweak value
2609 vxor $twk1,$tweak,$rndkey0
2610 vcipher $out4,$out4,v25
2611 vcipher $out5,$out5,v25
2612
2613 and r0,r0,$len
2614 vaddubm $tweak,$tweak,$tweak
2615 vsldoi $tmp,$tmp,$tmp,15
2616 vcipher $out0,$out0,v26
2617 vcipher $out1,$out1,v26
2618 vand $tmp,$tmp,$eighty7
2619 vcipher $out2,$out2,v26
2620 vcipher $out3,$out3,v26
2621 vxor $tweak,$tweak,$tmp
2622 vcipher $out4,$out4,v26
2623 vcipher $out5,$out5,v26
2624
2625 add $inp,$inp,r0 # $inp is adjusted in such
2626 # way that at exit from the
2627 # loop inX-in5 are loaded
2628 # with last "words"
2629 vxor $in2,$twk2,v31
2630 vsrab $tmp,$tweak,$seven # next tweak value
2631 vxor $twk2,$tweak,$rndkey0
2632 vaddubm $tweak,$tweak,$tweak
2633 vcipher $out0,$out0,v27
2634 vcipher $out1,$out1,v27
2635 vsldoi $tmp,$tmp,$tmp,15
2636 vcipher $out2,$out2,v27
2637 vcipher $out3,$out3,v27
2638 vand $tmp,$tmp,$eighty7
2639 vcipher $out4,$out4,v27
2640 vcipher $out5,$out5,v27
2641
2642 addi $key_,$sp,$FRAME+15 # rewind $key_
2643 vxor $tweak,$tweak,$tmp
2644 vcipher $out0,$out0,v28
2645 vcipher $out1,$out1,v28
2646 vxor $in3,$twk3,v31
2647 vsrab $tmp,$tweak,$seven # next tweak value
2648 vxor $twk3,$tweak,$rndkey0
2649 vcipher $out2,$out2,v28
2650 vcipher $out3,$out3,v28
2651 vaddubm $tweak,$tweak,$tweak
2652 vsldoi $tmp,$tmp,$tmp,15
2653 vcipher $out4,$out4,v28
2654 vcipher $out5,$out5,v28
2655 lvx v24,$x00,$key_ # re-pre-load round[1]
2656 vand $tmp,$tmp,$eighty7
2657
2658 vcipher $out0,$out0,v29
2659 vcipher $out1,$out1,v29
2660 vxor $tweak,$tweak,$tmp
2661 vcipher $out2,$out2,v29
2662 vcipher $out3,$out3,v29
2663 vxor $in4,$twk4,v31
2664 vsrab $tmp,$tweak,$seven # next tweak value
2665 vxor $twk4,$tweak,$rndkey0
2666 vcipher $out4,$out4,v29
2667 vcipher $out5,$out5,v29
2668 lvx v25,$x10,$key_ # re-pre-load round[2]
2669 vaddubm $tweak,$tweak,$tweak
2670 vsldoi $tmp,$tmp,$tmp,15
2671
2672 vcipher $out0,$out0,v30
2673 vcipher $out1,$out1,v30
2674 vand $tmp,$tmp,$eighty7
2675 vcipher $out2,$out2,v30
2676 vcipher $out3,$out3,v30
2677 vxor $tweak,$tweak,$tmp
2678 vcipher $out4,$out4,v30
2679 vcipher $out5,$out5,v30
2680 vxor $in5,$twk5,v31
2681 vsrab $tmp,$tweak,$seven # next tweak value
2682 vxor $twk5,$tweak,$rndkey0
2683
2684 vcipherlast $out0,$out0,$in0
2685 lvx_u $in0,$x00,$inp # load next input block
2686 vaddubm $tweak,$tweak,$tweak
2687 vsldoi $tmp,$tmp,$tmp,15
2688 vcipherlast $out1,$out1,$in1
2689 lvx_u $in1,$x10,$inp
2690 vcipherlast $out2,$out2,$in2
2691 le?vperm $in0,$in0,$in0,$leperm
2692 lvx_u $in2,$x20,$inp
2693 vand $tmp,$tmp,$eighty7
2694 vcipherlast $out3,$out3,$in3
2695 le?vperm $in1,$in1,$in1,$leperm
2696 lvx_u $in3,$x30,$inp
2697 vcipherlast $out4,$out4,$in4
2698 le?vperm $in2,$in2,$in2,$leperm
2699 lvx_u $in4,$x40,$inp
2700 vxor $tweak,$tweak,$tmp
2701 vcipherlast $tmp,$out5,$in5 # last block might be needed
2702 # in stealing mode
2703 le?vperm $in3,$in3,$in3,$leperm
2704 lvx_u $in5,$x50,$inp
2705 addi $inp,$inp,0x60
2706 le?vperm $in4,$in4,$in4,$leperm
2707 le?vperm $in5,$in5,$in5,$leperm
2708
2709 le?vperm $out0,$out0,$out0,$leperm
2710 le?vperm $out1,$out1,$out1,$leperm
2711 stvx_u $out0,$x00,$out # store output
2712 vxor $out0,$in0,$twk0
2713 le?vperm $out2,$out2,$out2,$leperm
2714 stvx_u $out1,$x10,$out
2715 vxor $out1,$in1,$twk1
2716 le?vperm $out3,$out3,$out3,$leperm
2717 stvx_u $out2,$x20,$out
2718 vxor $out2,$in2,$twk2
2719 le?vperm $out4,$out4,$out4,$leperm
2720 stvx_u $out3,$x30,$out
2721 vxor $out3,$in3,$twk3
2722 le?vperm $out5,$tmp,$tmp,$leperm
2723 stvx_u $out4,$x40,$out
2724 vxor $out4,$in4,$twk4
2725 le?stvx_u $out5,$x50,$out
2726 be?stvx_u $tmp, $x50,$out
2727 vxor $out5,$in5,$twk5
2728 addi $out,$out,0x60
2729
2730 mtctr $rounds
2731 beq Loop_xts_enc6x # did $len-=96 borrow?
2732
2733 addic. $len,$len,0x60
2734 beq Lxts_enc6x_zero
2735 cmpwi $len,0x20
2736 blt Lxts_enc6x_one
2737 nop
2738 beq Lxts_enc6x_two
2739 cmpwi $len,0x40
2740 blt Lxts_enc6x_three
2741 nop
2742 beq Lxts_enc6x_four
2743
2744Lxts_enc6x_five:
2745 vxor $out0,$in1,$twk0
2746 vxor $out1,$in2,$twk1
2747 vxor $out2,$in3,$twk2
2748 vxor $out3,$in4,$twk3
2749 vxor $out4,$in5,$twk4
2750
2751 bl _aesp8_xts_enc5x
2752
2753 le?vperm $out0,$out0,$out0,$leperm
2754 vmr $twk0,$twk5 # unused tweak
2755 le?vperm $out1,$out1,$out1,$leperm
2756 stvx_u $out0,$x00,$out # store output
2757 le?vperm $out2,$out2,$out2,$leperm
2758 stvx_u $out1,$x10,$out
2759 le?vperm $out3,$out3,$out3,$leperm
2760 stvx_u $out2,$x20,$out
2761 vxor $tmp,$out4,$twk5 # last block prep for stealing
2762 le?vperm $out4,$out4,$out4,$leperm
2763 stvx_u $out3,$x30,$out
2764 stvx_u $out4,$x40,$out
2765 addi $out,$out,0x50
2766 bne Lxts_enc6x_steal
2767 b Lxts_enc6x_done
2768
2769.align 4
2770Lxts_enc6x_four:
2771 vxor $out0,$in2,$twk0
2772 vxor $out1,$in3,$twk1
2773 vxor $out2,$in4,$twk2
2774 vxor $out3,$in5,$twk3
2775 vxor $out4,$out4,$out4
2776
2777 bl _aesp8_xts_enc5x
2778
2779 le?vperm $out0,$out0,$out0,$leperm
2780 vmr $twk0,$twk4 # unused tweak
2781 le?vperm $out1,$out1,$out1,$leperm
2782 stvx_u $out0,$x00,$out # store output
2783 le?vperm $out2,$out2,$out2,$leperm
2784 stvx_u $out1,$x10,$out
2785 vxor $tmp,$out3,$twk4 # last block prep for stealing
2786 le?vperm $out3,$out3,$out3,$leperm
2787 stvx_u $out2,$x20,$out
2788 stvx_u $out3,$x30,$out
2789 addi $out,$out,0x40
2790 bne Lxts_enc6x_steal
2791 b Lxts_enc6x_done
2792
2793.align 4
2794Lxts_enc6x_three:
2795 vxor $out0,$in3,$twk0
2796 vxor $out1,$in4,$twk1
2797 vxor $out2,$in5,$twk2
2798 vxor $out3,$out3,$out3
2799 vxor $out4,$out4,$out4
2800
2801 bl _aesp8_xts_enc5x
2802
2803 le?vperm $out0,$out0,$out0,$leperm
2804 vmr $twk0,$twk3 # unused tweak
2805 le?vperm $out1,$out1,$out1,$leperm
2806 stvx_u $out0,$x00,$out # store output
2807 vxor $tmp,$out2,$twk3 # last block prep for stealing
2808 le?vperm $out2,$out2,$out2,$leperm
2809 stvx_u $out1,$x10,$out
2810 stvx_u $out2,$x20,$out
2811 addi $out,$out,0x30
2812 bne Lxts_enc6x_steal
2813 b Lxts_enc6x_done
2814
2815.align 4
2816Lxts_enc6x_two:
2817 vxor $out0,$in4,$twk0
2818 vxor $out1,$in5,$twk1
2819 vxor $out2,$out2,$out2
2820 vxor $out3,$out3,$out3
2821 vxor $out4,$out4,$out4
2822
2823 bl _aesp8_xts_enc5x
2824
2825 le?vperm $out0,$out0,$out0,$leperm
2826 vmr $twk0,$twk2 # unused tweak
2827 vxor $tmp,$out1,$twk2 # last block prep for stealing
2828 le?vperm $out1,$out1,$out1,$leperm
2829 stvx_u $out0,$x00,$out # store output
2830 stvx_u $out1,$x10,$out
2831 addi $out,$out,0x20
2832 bne Lxts_enc6x_steal
2833 b Lxts_enc6x_done
2834
2835.align 4
2836Lxts_enc6x_one:
2837 vxor $out0,$in5,$twk0
2838 nop
2839Loop_xts_enc1x:
2840 vcipher $out0,$out0,v24
2841 lvx v24,$x20,$key_ # round[3]
2842 addi $key_,$key_,0x20
2843
2844 vcipher $out0,$out0,v25
2845 lvx v25,$x10,$key_ # round[4]
2846 bdnz Loop_xts_enc1x
2847
2848 add $inp,$inp,$taillen
2849 cmpwi $taillen,0
2850 vcipher $out0,$out0,v24
2851
2852 subi $inp,$inp,16
2853 vcipher $out0,$out0,v25
2854
2855 lvsr $inpperm,0,$taillen
2856 vcipher $out0,$out0,v26
2857
2858 lvx_u $in0,0,$inp
2859 vcipher $out0,$out0,v27
2860
2861 addi $key_,$sp,$FRAME+15 # rewind $key_
2862 vcipher $out0,$out0,v28
2863 lvx v24,$x00,$key_ # re-pre-load round[1]
2864
2865 vcipher $out0,$out0,v29
2866 lvx v25,$x10,$key_ # re-pre-load round[2]
2867 vxor $twk0,$twk0,v31
2868
2869 le?vperm $in0,$in0,$in0,$leperm
2870 vcipher $out0,$out0,v30
2871
2872 vperm $in0,$in0,$in0,$inpperm
2873 vcipherlast $out0,$out0,$twk0
2874
2875 vmr $twk0,$twk1 # unused tweak
2876 vxor $tmp,$out0,$twk1 # last block prep for stealing
2877 le?vperm $out0,$out0,$out0,$leperm
2878 stvx_u $out0,$x00,$out # store output
2879 addi $out,$out,0x10
2880 bne Lxts_enc6x_steal
2881 b Lxts_enc6x_done
2882
2883.align 4
2884Lxts_enc6x_zero:
2885 cmpwi $taillen,0
2886 beq Lxts_enc6x_done
2887
2888 add $inp,$inp,$taillen
2889 subi $inp,$inp,16
2890 lvx_u $in0,0,$inp
2891 lvsr $inpperm,0,$taillen # $in5 is no more
2892 le?vperm $in0,$in0,$in0,$leperm
2893 vperm $in0,$in0,$in0,$inpperm
2894 vxor $tmp,$tmp,$twk0
2895Lxts_enc6x_steal:
2896 vxor $in0,$in0,$twk0
2897 vxor $out0,$out0,$out0
2898 vspltisb $out1,-1
2899 vperm $out0,$out0,$out1,$inpperm
2900 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2901
2902 subi r30,$out,17
2903 subi $out,$out,16
2904 mtctr $taillen
2905Loop_xts_enc6x_steal:
2906 lbzu r0,1(r30)
2907 stb r0,16(r30)
2908 bdnz Loop_xts_enc6x_steal
2909
2910 li $taillen,0
2911 mtctr $rounds
2912 b Loop_xts_enc1x # one more time...
2913
2914.align 4
2915Lxts_enc6x_done:
2916 ${UCMP}i $ivp,0
2917 beq Lxts_enc6x_ret
2918
2919 vxor $tweak,$twk0,$rndkey0
2920 le?vperm $tweak,$tweak,$tweak,$leperm
2921 stvx_u $tweak,0,$ivp
2922
2923Lxts_enc6x_ret:
2924 mtlr r11
2925 li r10,`$FRAME+15`
2926 li r11,`$FRAME+31`
2927 stvx $seven,r10,$sp # wipe copies of round keys
2928 addi r10,r10,32
2929 stvx $seven,r11,$sp
2930 addi r11,r11,32
2931 stvx $seven,r10,$sp
2932 addi r10,r10,32
2933 stvx $seven,r11,$sp
2934 addi r11,r11,32
2935 stvx $seven,r10,$sp
2936 addi r10,r10,32
2937 stvx $seven,r11,$sp
2938 addi r11,r11,32
2939 stvx $seven,r10,$sp
2940 addi r10,r10,32
2941 stvx $seven,r11,$sp
2942 addi r11,r11,32
2943
2944 mtspr 256,$vrsave
2945 lvx v20,r10,$sp # ABI says so
2946 addi r10,r10,32
2947 lvx v21,r11,$sp
2948 addi r11,r11,32
2949 lvx v22,r10,$sp
2950 addi r10,r10,32
2951 lvx v23,r11,$sp
2952 addi r11,r11,32
2953 lvx v24,r10,$sp
2954 addi r10,r10,32
2955 lvx v25,r11,$sp
2956 addi r11,r11,32
2957 lvx v26,r10,$sp
2958 addi r10,r10,32
2959 lvx v27,r11,$sp
2960 addi r11,r11,32
2961 lvx v28,r10,$sp
2962 addi r10,r10,32
2963 lvx v29,r11,$sp
2964 addi r11,r11,32
2965 lvx v30,r10,$sp
2966 lvx v31,r11,$sp
2967 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2968 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2969 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2970 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2971 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2972 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2973 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2974 blr
2975 .long 0
2976 .byte 0,12,0x04,1,0x80,6,6,0
2977 .long 0
2978
2979.align 5
2980_aesp8_xts_enc5x:
2981 vcipher $out0,$out0,v24
2982 vcipher $out1,$out1,v24
2983 vcipher $out2,$out2,v24
2984 vcipher $out3,$out3,v24
2985 vcipher $out4,$out4,v24
2986 lvx v24,$x20,$key_ # round[3]
2987 addi $key_,$key_,0x20
2988
2989 vcipher $out0,$out0,v25
2990 vcipher $out1,$out1,v25
2991 vcipher $out2,$out2,v25
2992 vcipher $out3,$out3,v25
2993 vcipher $out4,$out4,v25
2994 lvx v25,$x10,$key_ # round[4]
2995 bdnz _aesp8_xts_enc5x
2996
2997 add $inp,$inp,$taillen
2998 cmpwi $taillen,0
2999 vcipher $out0,$out0,v24
3000 vcipher $out1,$out1,v24
3001 vcipher $out2,$out2,v24
3002 vcipher $out3,$out3,v24
3003 vcipher $out4,$out4,v24
3004
3005 subi $inp,$inp,16
3006 vcipher $out0,$out0,v25
3007 vcipher $out1,$out1,v25
3008 vcipher $out2,$out2,v25
3009 vcipher $out3,$out3,v25
3010 vcipher $out4,$out4,v25
3011 vxor $twk0,$twk0,v31
3012
3013 vcipher $out0,$out0,v26
Steven Valdez909b19f2016-11-21 15:35:44 -05003014 lvsr $inpperm,0,$taillen # $in5 is no more
Steven Valdezbb1ceac2016-10-07 10:34:51 -04003015 vcipher $out1,$out1,v26
3016 vcipher $out2,$out2,v26
3017 vcipher $out3,$out3,v26
3018 vcipher $out4,$out4,v26
3019 vxor $in1,$twk1,v31
3020
3021 vcipher $out0,$out0,v27
3022 lvx_u $in0,0,$inp
3023 vcipher $out1,$out1,v27
3024 vcipher $out2,$out2,v27
3025 vcipher $out3,$out3,v27
3026 vcipher $out4,$out4,v27
3027 vxor $in2,$twk2,v31
3028
3029 addi $key_,$sp,$FRAME+15 # rewind $key_
3030 vcipher $out0,$out0,v28
3031 vcipher $out1,$out1,v28
3032 vcipher $out2,$out2,v28
3033 vcipher $out3,$out3,v28
3034 vcipher $out4,$out4,v28
3035 lvx v24,$x00,$key_ # re-pre-load round[1]
3036 vxor $in3,$twk3,v31
3037
3038 vcipher $out0,$out0,v29
3039 le?vperm $in0,$in0,$in0,$leperm
3040 vcipher $out1,$out1,v29
3041 vcipher $out2,$out2,v29
3042 vcipher $out3,$out3,v29
3043 vcipher $out4,$out4,v29
3044 lvx v25,$x10,$key_ # re-pre-load round[2]
3045 vxor $in4,$twk4,v31
3046
3047 vcipher $out0,$out0,v30
3048 vperm $in0,$in0,$in0,$inpperm
3049 vcipher $out1,$out1,v30
3050 vcipher $out2,$out2,v30
3051 vcipher $out3,$out3,v30
3052 vcipher $out4,$out4,v30
3053
3054 vcipherlast $out0,$out0,$twk0
3055 vcipherlast $out1,$out1,$in1
3056 vcipherlast $out2,$out2,$in2
3057 vcipherlast $out3,$out3,$in3
3058 vcipherlast $out4,$out4,$in4
3059 blr
3060 .long 0
3061 .byte 0,12,0x14,0,0,0,0,0
3062
3063.align 5
3064_aesp8_xts_decrypt6x:
3065 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3066 mflr r11
3067 li r7,`$FRAME+8*16+15`
3068 li r3,`$FRAME+8*16+31`
3069 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3070 stvx v20,r7,$sp # ABI says so
3071 addi r7,r7,32
3072 stvx v21,r3,$sp
3073 addi r3,r3,32
3074 stvx v22,r7,$sp
3075 addi r7,r7,32
3076 stvx v23,r3,$sp
3077 addi r3,r3,32
3078 stvx v24,r7,$sp
3079 addi r7,r7,32
3080 stvx v25,r3,$sp
3081 addi r3,r3,32
3082 stvx v26,r7,$sp
3083 addi r7,r7,32
3084 stvx v27,r3,$sp
3085 addi r3,r3,32
3086 stvx v28,r7,$sp
3087 addi r7,r7,32
3088 stvx v29,r3,$sp
3089 addi r3,r3,32
3090 stvx v30,r7,$sp
3091 stvx v31,r3,$sp
3092 li r0,-1
3093 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3094 li $x10,0x10
3095 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3096 li $x20,0x20
3097 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3098 li $x30,0x30
3099 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3100 li $x40,0x40
3101 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3102 li $x50,0x50
3103 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3104 li $x60,0x60
3105 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3106 li $x70,0x70
3107 mtspr 256,r0
3108
3109 subi $rounds,$rounds,3 # -4 in total
3110
3111 lvx $rndkey0,$x00,$key1 # load key schedule
3112 lvx v30,$x10,$key1
3113 addi $key1,$key1,0x20
3114 lvx v31,$x00,$key1
3115 ?vperm $rndkey0,$rndkey0,v30,$keyperm
3116 addi $key_,$sp,$FRAME+15
3117 mtctr $rounds
3118
3119Load_xts_dec_key:
3120 ?vperm v24,v30,v31,$keyperm
3121 lvx v30,$x10,$key1
3122 addi $key1,$key1,0x20
3123 stvx v24,$x00,$key_ # off-load round[1]
3124 ?vperm v25,v31,v30,$keyperm
3125 lvx v31,$x00,$key1
3126 stvx v25,$x10,$key_ # off-load round[2]
3127 addi $key_,$key_,0x20
3128 bdnz Load_xts_dec_key
3129
3130 lvx v26,$x10,$key1
3131 ?vperm v24,v30,v31,$keyperm
3132 lvx v27,$x20,$key1
3133 stvx v24,$x00,$key_ # off-load round[3]
3134 ?vperm v25,v31,v26,$keyperm
3135 lvx v28,$x30,$key1
3136 stvx v25,$x10,$key_ # off-load round[4]
3137 addi $key_,$sp,$FRAME+15 # rewind $key_
3138 ?vperm v26,v26,v27,$keyperm
3139 lvx v29,$x40,$key1
3140 ?vperm v27,v27,v28,$keyperm
3141 lvx v30,$x50,$key1
3142 ?vperm v28,v28,v29,$keyperm
3143 lvx v31,$x60,$key1
3144 ?vperm v29,v29,v30,$keyperm
3145 lvx $twk5,$x70,$key1 # borrow $twk5
3146 ?vperm v30,v30,v31,$keyperm
3147 lvx v24,$x00,$key_ # pre-load round[1]
3148 ?vperm v31,v31,$twk5,$keyperm
3149 lvx v25,$x10,$key_ # pre-load round[2]
3150
3151 vperm $in0,$inout,$inptail,$inpperm
3152 subi $inp,$inp,31 # undo "caller"
3153 vxor $twk0,$tweak,$rndkey0
3154 vsrab $tmp,$tweak,$seven # next tweak value
3155 vaddubm $tweak,$tweak,$tweak
3156 vsldoi $tmp,$tmp,$tmp,15
3157 vand $tmp,$tmp,$eighty7
3158 vxor $out0,$in0,$twk0
3159 vxor $tweak,$tweak,$tmp
3160
3161 lvx_u $in1,$x10,$inp
3162 vxor $twk1,$tweak,$rndkey0
3163 vsrab $tmp,$tweak,$seven # next tweak value
3164 vaddubm $tweak,$tweak,$tweak
3165 vsldoi $tmp,$tmp,$tmp,15
3166 le?vperm $in1,$in1,$in1,$leperm
3167 vand $tmp,$tmp,$eighty7
3168 vxor $out1,$in1,$twk1
3169 vxor $tweak,$tweak,$tmp
3170
3171 lvx_u $in2,$x20,$inp
3172 andi. $taillen,$len,15
3173 vxor $twk2,$tweak,$rndkey0
3174 vsrab $tmp,$tweak,$seven # next tweak value
3175 vaddubm $tweak,$tweak,$tweak
3176 vsldoi $tmp,$tmp,$tmp,15
3177 le?vperm $in2,$in2,$in2,$leperm
3178 vand $tmp,$tmp,$eighty7
3179 vxor $out2,$in2,$twk2
3180 vxor $tweak,$tweak,$tmp
3181
3182 lvx_u $in3,$x30,$inp
3183 sub $len,$len,$taillen
3184 vxor $twk3,$tweak,$rndkey0
3185 vsrab $tmp,$tweak,$seven # next tweak value
3186 vaddubm $tweak,$tweak,$tweak
3187 vsldoi $tmp,$tmp,$tmp,15
3188 le?vperm $in3,$in3,$in3,$leperm
3189 vand $tmp,$tmp,$eighty7
3190 vxor $out3,$in3,$twk3
3191 vxor $tweak,$tweak,$tmp
3192
3193 lvx_u $in4,$x40,$inp
3194 subi $len,$len,0x60
3195 vxor $twk4,$tweak,$rndkey0
3196 vsrab $tmp,$tweak,$seven # next tweak value
3197 vaddubm $tweak,$tweak,$tweak
3198 vsldoi $tmp,$tmp,$tmp,15
3199 le?vperm $in4,$in4,$in4,$leperm
3200 vand $tmp,$tmp,$eighty7
3201 vxor $out4,$in4,$twk4
3202 vxor $tweak,$tweak,$tmp
3203
3204 lvx_u $in5,$x50,$inp
3205 addi $inp,$inp,0x60
3206 vxor $twk5,$tweak,$rndkey0
3207 vsrab $tmp,$tweak,$seven # next tweak value
3208 vaddubm $tweak,$tweak,$tweak
3209 vsldoi $tmp,$tmp,$tmp,15
3210 le?vperm $in5,$in5,$in5,$leperm
3211 vand $tmp,$tmp,$eighty7
3212 vxor $out5,$in5,$twk5
3213 vxor $tweak,$tweak,$tmp
3214
3215 vxor v31,v31,$rndkey0
3216 mtctr $rounds
3217 b Loop_xts_dec6x
3218
3219.align 5
3220Loop_xts_dec6x:
3221 vncipher $out0,$out0,v24
3222 vncipher $out1,$out1,v24
3223 vncipher $out2,$out2,v24
3224 vncipher $out3,$out3,v24
3225 vncipher $out4,$out4,v24
3226 vncipher $out5,$out5,v24
3227 lvx v24,$x20,$key_ # round[3]
3228 addi $key_,$key_,0x20
3229
3230 vncipher $out0,$out0,v25
3231 vncipher $out1,$out1,v25
3232 vncipher $out2,$out2,v25
3233 vncipher $out3,$out3,v25
3234 vncipher $out4,$out4,v25
3235 vncipher $out5,$out5,v25
3236 lvx v25,$x10,$key_ # round[4]
3237 bdnz Loop_xts_dec6x
3238
3239 subic $len,$len,96 # $len-=96
3240 vxor $in0,$twk0,v31 # xor with last round key
3241 vncipher $out0,$out0,v24
3242 vncipher $out1,$out1,v24
3243 vsrab $tmp,$tweak,$seven # next tweak value
3244 vxor $twk0,$tweak,$rndkey0
3245 vaddubm $tweak,$tweak,$tweak
3246 vncipher $out2,$out2,v24
3247 vncipher $out3,$out3,v24
3248 vsldoi $tmp,$tmp,$tmp,15
3249 vncipher $out4,$out4,v24
3250 vncipher $out5,$out5,v24
3251
3252 subfe. r0,r0,r0 # borrow?-1:0
3253 vand $tmp,$tmp,$eighty7
3254 vncipher $out0,$out0,v25
3255 vncipher $out1,$out1,v25
3256 vxor $tweak,$tweak,$tmp
3257 vncipher $out2,$out2,v25
3258 vncipher $out3,$out3,v25
3259 vxor $in1,$twk1,v31
3260 vsrab $tmp,$tweak,$seven # next tweak value
3261 vxor $twk1,$tweak,$rndkey0
3262 vncipher $out4,$out4,v25
3263 vncipher $out5,$out5,v25
3264
3265 and r0,r0,$len
3266 vaddubm $tweak,$tweak,$tweak
3267 vsldoi $tmp,$tmp,$tmp,15
3268 vncipher $out0,$out0,v26
3269 vncipher $out1,$out1,v26
3270 vand $tmp,$tmp,$eighty7
3271 vncipher $out2,$out2,v26
3272 vncipher $out3,$out3,v26
3273 vxor $tweak,$tweak,$tmp
3274 vncipher $out4,$out4,v26
3275 vncipher $out5,$out5,v26
3276
3277 add $inp,$inp,r0 # $inp is adjusted in such
3278 # way that at exit from the
3279 # loop inX-in5 are loaded
3280 # with last "words"
3281 vxor $in2,$twk2,v31
3282 vsrab $tmp,$tweak,$seven # next tweak value
3283 vxor $twk2,$tweak,$rndkey0
3284 vaddubm $tweak,$tweak,$tweak
3285 vncipher $out0,$out0,v27
3286 vncipher $out1,$out1,v27
3287 vsldoi $tmp,$tmp,$tmp,15
3288 vncipher $out2,$out2,v27
3289 vncipher $out3,$out3,v27
3290 vand $tmp,$tmp,$eighty7
3291 vncipher $out4,$out4,v27
3292 vncipher $out5,$out5,v27
3293
3294 addi $key_,$sp,$FRAME+15 # rewind $key_
3295 vxor $tweak,$tweak,$tmp
3296 vncipher $out0,$out0,v28
3297 vncipher $out1,$out1,v28
3298 vxor $in3,$twk3,v31
3299 vsrab $tmp,$tweak,$seven # next tweak value
3300 vxor $twk3,$tweak,$rndkey0
3301 vncipher $out2,$out2,v28
3302 vncipher $out3,$out3,v28
3303 vaddubm $tweak,$tweak,$tweak
3304 vsldoi $tmp,$tmp,$tmp,15
3305 vncipher $out4,$out4,v28
3306 vncipher $out5,$out5,v28
3307 lvx v24,$x00,$key_ # re-pre-load round[1]
3308 vand $tmp,$tmp,$eighty7
3309
3310 vncipher $out0,$out0,v29
3311 vncipher $out1,$out1,v29
3312 vxor $tweak,$tweak,$tmp
3313 vncipher $out2,$out2,v29
3314 vncipher $out3,$out3,v29
3315 vxor $in4,$twk4,v31
3316 vsrab $tmp,$tweak,$seven # next tweak value
3317 vxor $twk4,$tweak,$rndkey0
3318 vncipher $out4,$out4,v29
3319 vncipher $out5,$out5,v29
3320 lvx v25,$x10,$key_ # re-pre-load round[2]
3321 vaddubm $tweak,$tweak,$tweak
3322 vsldoi $tmp,$tmp,$tmp,15
3323
3324 vncipher $out0,$out0,v30
3325 vncipher $out1,$out1,v30
3326 vand $tmp,$tmp,$eighty7
3327 vncipher $out2,$out2,v30
3328 vncipher $out3,$out3,v30
3329 vxor $tweak,$tweak,$tmp
3330 vncipher $out4,$out4,v30
3331 vncipher $out5,$out5,v30
3332 vxor $in5,$twk5,v31
3333 vsrab $tmp,$tweak,$seven # next tweak value
3334 vxor $twk5,$tweak,$rndkey0
3335
3336 vncipherlast $out0,$out0,$in0
3337 lvx_u $in0,$x00,$inp # load next input block
3338 vaddubm $tweak,$tweak,$tweak
3339 vsldoi $tmp,$tmp,$tmp,15
3340 vncipherlast $out1,$out1,$in1
3341 lvx_u $in1,$x10,$inp
3342 vncipherlast $out2,$out2,$in2
3343 le?vperm $in0,$in0,$in0,$leperm
3344 lvx_u $in2,$x20,$inp
3345 vand $tmp,$tmp,$eighty7
3346 vncipherlast $out3,$out3,$in3
3347 le?vperm $in1,$in1,$in1,$leperm
3348 lvx_u $in3,$x30,$inp
3349 vncipherlast $out4,$out4,$in4
3350 le?vperm $in2,$in2,$in2,$leperm
3351 lvx_u $in4,$x40,$inp
3352 vxor $tweak,$tweak,$tmp
3353 vncipherlast $out5,$out5,$in5
3354 le?vperm $in3,$in3,$in3,$leperm
3355 lvx_u $in5,$x50,$inp
3356 addi $inp,$inp,0x60
3357 le?vperm $in4,$in4,$in4,$leperm
3358 le?vperm $in5,$in5,$in5,$leperm
3359
3360 le?vperm $out0,$out0,$out0,$leperm
3361 le?vperm $out1,$out1,$out1,$leperm
3362 stvx_u $out0,$x00,$out # store output
3363 vxor $out0,$in0,$twk0
3364 le?vperm $out2,$out2,$out2,$leperm
3365 stvx_u $out1,$x10,$out
3366 vxor $out1,$in1,$twk1
3367 le?vperm $out3,$out3,$out3,$leperm
3368 stvx_u $out2,$x20,$out
3369 vxor $out2,$in2,$twk2
3370 le?vperm $out4,$out4,$out4,$leperm
3371 stvx_u $out3,$x30,$out
3372 vxor $out3,$in3,$twk3
3373 le?vperm $out5,$out5,$out5,$leperm
3374 stvx_u $out4,$x40,$out
3375 vxor $out4,$in4,$twk4
3376 stvx_u $out5,$x50,$out
3377 vxor $out5,$in5,$twk5
3378 addi $out,$out,0x60
3379
3380 mtctr $rounds
3381 beq Loop_xts_dec6x # did $len-=96 borrow?
3382
3383 addic. $len,$len,0x60
3384 beq Lxts_dec6x_zero
3385 cmpwi $len,0x20
3386 blt Lxts_dec6x_one
3387 nop
3388 beq Lxts_dec6x_two
3389 cmpwi $len,0x40
3390 blt Lxts_dec6x_three
3391 nop
3392 beq Lxts_dec6x_four
3393
3394Lxts_dec6x_five:
3395 vxor $out0,$in1,$twk0
3396 vxor $out1,$in2,$twk1
3397 vxor $out2,$in3,$twk2
3398 vxor $out3,$in4,$twk3
3399 vxor $out4,$in5,$twk4
3400
3401 bl _aesp8_xts_dec5x
3402
3403 le?vperm $out0,$out0,$out0,$leperm
3404 vmr $twk0,$twk5 # unused tweak
3405 vxor $twk1,$tweak,$rndkey0
3406 le?vperm $out1,$out1,$out1,$leperm
3407 stvx_u $out0,$x00,$out # store output
3408 vxor $out0,$in0,$twk1
3409 le?vperm $out2,$out2,$out2,$leperm
3410 stvx_u $out1,$x10,$out
3411 le?vperm $out3,$out3,$out3,$leperm
3412 stvx_u $out2,$x20,$out
3413 le?vperm $out4,$out4,$out4,$leperm
3414 stvx_u $out3,$x30,$out
3415 stvx_u $out4,$x40,$out
3416 addi $out,$out,0x50
3417 bne Lxts_dec6x_steal
3418 b Lxts_dec6x_done
3419
3420.align 4
3421Lxts_dec6x_four:
3422 vxor $out0,$in2,$twk0
3423 vxor $out1,$in3,$twk1
3424 vxor $out2,$in4,$twk2
3425 vxor $out3,$in5,$twk3
3426 vxor $out4,$out4,$out4
3427
3428 bl _aesp8_xts_dec5x
3429
3430 le?vperm $out0,$out0,$out0,$leperm
3431 vmr $twk0,$twk4 # unused tweak
3432 vmr $twk1,$twk5
3433 le?vperm $out1,$out1,$out1,$leperm
3434 stvx_u $out0,$x00,$out # store output
3435 vxor $out0,$in0,$twk5
3436 le?vperm $out2,$out2,$out2,$leperm
3437 stvx_u $out1,$x10,$out
3438 le?vperm $out3,$out3,$out3,$leperm
3439 stvx_u $out2,$x20,$out
3440 stvx_u $out3,$x30,$out
3441 addi $out,$out,0x40
3442 bne Lxts_dec6x_steal
3443 b Lxts_dec6x_done
3444
3445.align 4
3446Lxts_dec6x_three:
3447 vxor $out0,$in3,$twk0
3448 vxor $out1,$in4,$twk1
3449 vxor $out2,$in5,$twk2
3450 vxor $out3,$out3,$out3
3451 vxor $out4,$out4,$out4
3452
3453 bl _aesp8_xts_dec5x
3454
3455 le?vperm $out0,$out0,$out0,$leperm
3456 vmr $twk0,$twk3 # unused tweak
3457 vmr $twk1,$twk4
3458 le?vperm $out1,$out1,$out1,$leperm
3459 stvx_u $out0,$x00,$out # store output
3460 vxor $out0,$in0,$twk4
3461 le?vperm $out2,$out2,$out2,$leperm
3462 stvx_u $out1,$x10,$out
3463 stvx_u $out2,$x20,$out
3464 addi $out,$out,0x30
3465 bne Lxts_dec6x_steal
3466 b Lxts_dec6x_done
3467
3468.align 4
3469Lxts_dec6x_two:
3470 vxor $out0,$in4,$twk0
3471 vxor $out1,$in5,$twk1
3472 vxor $out2,$out2,$out2
3473 vxor $out3,$out3,$out3
3474 vxor $out4,$out4,$out4
3475
3476 bl _aesp8_xts_dec5x
3477
3478 le?vperm $out0,$out0,$out0,$leperm
3479 vmr $twk0,$twk2 # unused tweak
3480 vmr $twk1,$twk3
3481 le?vperm $out1,$out1,$out1,$leperm
3482 stvx_u $out0,$x00,$out # store output
3483 vxor $out0,$in0,$twk3
3484 stvx_u $out1,$x10,$out
3485 addi $out,$out,0x20
3486 bne Lxts_dec6x_steal
3487 b Lxts_dec6x_done
3488
3489.align 4
3490Lxts_dec6x_one:
3491 vxor $out0,$in5,$twk0
3492 nop
3493Loop_xts_dec1x:
3494 vncipher $out0,$out0,v24
3495 lvx v24,$x20,$key_ # round[3]
3496 addi $key_,$key_,0x20
3497
3498 vncipher $out0,$out0,v25
3499 lvx v25,$x10,$key_ # round[4]
3500 bdnz Loop_xts_dec1x
3501
3502 subi r0,$taillen,1
3503 vncipher $out0,$out0,v24
3504
3505 andi. r0,r0,16
3506 cmpwi $taillen,0
3507 vncipher $out0,$out0,v25
3508
3509 sub $inp,$inp,r0
3510 vncipher $out0,$out0,v26
3511
3512 lvx_u $in0,0,$inp
3513 vncipher $out0,$out0,v27
3514
3515 addi $key_,$sp,$FRAME+15 # rewind $key_
3516 vncipher $out0,$out0,v28
3517 lvx v24,$x00,$key_ # re-pre-load round[1]
3518
3519 vncipher $out0,$out0,v29
3520 lvx v25,$x10,$key_ # re-pre-load round[2]
3521 vxor $twk0,$twk0,v31
3522
3523 le?vperm $in0,$in0,$in0,$leperm
3524 vncipher $out0,$out0,v30
3525
3526 mtctr $rounds
3527 vncipherlast $out0,$out0,$twk0
3528
3529 vmr $twk0,$twk1 # unused tweak
3530 vmr $twk1,$twk2
3531 le?vperm $out0,$out0,$out0,$leperm
3532 stvx_u $out0,$x00,$out # store output
3533 addi $out,$out,0x10
3534 vxor $out0,$in0,$twk2
3535 bne Lxts_dec6x_steal
3536 b Lxts_dec6x_done
3537
3538.align 4
3539Lxts_dec6x_zero:
3540 cmpwi $taillen,0
3541 beq Lxts_dec6x_done
3542
3543 lvx_u $in0,0,$inp
3544 le?vperm $in0,$in0,$in0,$leperm
3545 vxor $out0,$in0,$twk1
3546Lxts_dec6x_steal:
3547 vncipher $out0,$out0,v24
3548 lvx v24,$x20,$key_ # round[3]
3549 addi $key_,$key_,0x20
3550
3551 vncipher $out0,$out0,v25
3552 lvx v25,$x10,$key_ # round[4]
3553 bdnz Lxts_dec6x_steal
3554
3555 add $inp,$inp,$taillen
3556 vncipher $out0,$out0,v24
3557
3558 cmpwi $taillen,0
3559 vncipher $out0,$out0,v25
3560
3561 lvx_u $in0,0,$inp
3562 vncipher $out0,$out0,v26
3563
3564 lvsr $inpperm,0,$taillen # $in5 is no more
3565 vncipher $out0,$out0,v27
3566
3567 addi $key_,$sp,$FRAME+15 # rewind $key_
3568 vncipher $out0,$out0,v28
3569 lvx v24,$x00,$key_ # re-pre-load round[1]
3570
3571 vncipher $out0,$out0,v29
3572 lvx v25,$x10,$key_ # re-pre-load round[2]
3573 vxor $twk1,$twk1,v31
3574
3575 le?vperm $in0,$in0,$in0,$leperm
3576 vncipher $out0,$out0,v30
3577
3578 vperm $in0,$in0,$in0,$inpperm
3579 vncipherlast $tmp,$out0,$twk1
3580
3581 le?vperm $out0,$tmp,$tmp,$leperm
3582 le?stvx_u $out0,0,$out
3583 be?stvx_u $tmp,0,$out
3584
3585 vxor $out0,$out0,$out0
3586 vspltisb $out1,-1
3587 vperm $out0,$out0,$out1,$inpperm
3588 vsel $out0,$in0,$tmp,$out0
3589 vxor $out0,$out0,$twk0
3590
3591 subi r30,$out,1
3592 mtctr $taillen
3593Loop_xts_dec6x_steal:
3594 lbzu r0,1(r30)
3595 stb r0,16(r30)
3596 bdnz Loop_xts_dec6x_steal
3597
3598 li $taillen,0
3599 mtctr $rounds
3600 b Loop_xts_dec1x # one more time...
3601
3602.align 4
3603Lxts_dec6x_done:
3604 ${UCMP}i $ivp,0
3605 beq Lxts_dec6x_ret
3606
3607 vxor $tweak,$twk0,$rndkey0
3608 le?vperm $tweak,$tweak,$tweak,$leperm
3609 stvx_u $tweak,0,$ivp
3610
3611Lxts_dec6x_ret:
3612 mtlr r11
3613 li r10,`$FRAME+15`
3614 li r11,`$FRAME+31`
3615 stvx $seven,r10,$sp # wipe copies of round keys
3616 addi r10,r10,32
3617 stvx $seven,r11,$sp
3618 addi r11,r11,32
3619 stvx $seven,r10,$sp
3620 addi r10,r10,32
3621 stvx $seven,r11,$sp
3622 addi r11,r11,32
3623 stvx $seven,r10,$sp
3624 addi r10,r10,32
3625 stvx $seven,r11,$sp
3626 addi r11,r11,32
3627 stvx $seven,r10,$sp
3628 addi r10,r10,32
3629 stvx $seven,r11,$sp
3630 addi r11,r11,32
3631
3632 mtspr 256,$vrsave
3633 lvx v20,r10,$sp # ABI says so
3634 addi r10,r10,32
3635 lvx v21,r11,$sp
3636 addi r11,r11,32
3637 lvx v22,r10,$sp
3638 addi r10,r10,32
3639 lvx v23,r11,$sp
3640 addi r11,r11,32
3641 lvx v24,r10,$sp
3642 addi r10,r10,32
3643 lvx v25,r11,$sp
3644 addi r11,r11,32
3645 lvx v26,r10,$sp
3646 addi r10,r10,32
3647 lvx v27,r11,$sp
3648 addi r11,r11,32
3649 lvx v28,r10,$sp
3650 addi r10,r10,32
3651 lvx v29,r11,$sp
3652 addi r11,r11,32
3653 lvx v30,r10,$sp
3654 lvx v31,r11,$sp
3655 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3656 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3657 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3658 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3659 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3660 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3661 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3662 blr
3663 .long 0
3664 .byte 0,12,0x04,1,0x80,6,6,0
3665 .long 0
3666
3667.align 5
3668_aesp8_xts_dec5x:
3669 vncipher $out0,$out0,v24
3670 vncipher $out1,$out1,v24
3671 vncipher $out2,$out2,v24
3672 vncipher $out3,$out3,v24
3673 vncipher $out4,$out4,v24
3674 lvx v24,$x20,$key_ # round[3]
3675 addi $key_,$key_,0x20
3676
3677 vncipher $out0,$out0,v25
3678 vncipher $out1,$out1,v25
3679 vncipher $out2,$out2,v25
3680 vncipher $out3,$out3,v25
3681 vncipher $out4,$out4,v25
3682 lvx v25,$x10,$key_ # round[4]
3683 bdnz _aesp8_xts_dec5x
3684
3685 subi r0,$taillen,1
3686 vncipher $out0,$out0,v24
3687 vncipher $out1,$out1,v24
3688 vncipher $out2,$out2,v24
3689 vncipher $out3,$out3,v24
3690 vncipher $out4,$out4,v24
3691
3692 andi. r0,r0,16
3693 cmpwi $taillen,0
3694 vncipher $out0,$out0,v25
3695 vncipher $out1,$out1,v25
3696 vncipher $out2,$out2,v25
3697 vncipher $out3,$out3,v25
3698 vncipher $out4,$out4,v25
3699 vxor $twk0,$twk0,v31
3700
3701 sub $inp,$inp,r0
3702 vncipher $out0,$out0,v26
3703 vncipher $out1,$out1,v26
3704 vncipher $out2,$out2,v26
3705 vncipher $out3,$out3,v26
3706 vncipher $out4,$out4,v26
3707 vxor $in1,$twk1,v31
3708
3709 vncipher $out0,$out0,v27
3710 lvx_u $in0,0,$inp
3711 vncipher $out1,$out1,v27
3712 vncipher $out2,$out2,v27
3713 vncipher $out3,$out3,v27
3714 vncipher $out4,$out4,v27
3715 vxor $in2,$twk2,v31
3716
3717 addi $key_,$sp,$FRAME+15 # rewind $key_
3718 vncipher $out0,$out0,v28
3719 vncipher $out1,$out1,v28
3720 vncipher $out2,$out2,v28
3721 vncipher $out3,$out3,v28
3722 vncipher $out4,$out4,v28
3723 lvx v24,$x00,$key_ # re-pre-load round[1]
3724 vxor $in3,$twk3,v31
3725
3726 vncipher $out0,$out0,v29
3727 le?vperm $in0,$in0,$in0,$leperm
3728 vncipher $out1,$out1,v29
3729 vncipher $out2,$out2,v29
3730 vncipher $out3,$out3,v29
3731 vncipher $out4,$out4,v29
3732 lvx v25,$x10,$key_ # re-pre-load round[2]
3733 vxor $in4,$twk4,v31
3734
3735 vncipher $out0,$out0,v30
3736 vncipher $out1,$out1,v30
3737 vncipher $out2,$out2,v30
3738 vncipher $out3,$out3,v30
3739 vncipher $out4,$out4,v30
3740
3741 vncipherlast $out0,$out0,$twk0
3742 vncipherlast $out1,$out1,$in1
3743 vncipherlast $out2,$out2,$in2
3744 vncipherlast $out3,$out3,$in3
3745 vncipherlast $out4,$out4,$in4
3746 mtctr $rounds
3747 blr
3748 .long 0
3749 .byte 0,12,0x14,0,0,0,0,0
3750___
3751}} }}}
3752
3753my $consts=1;
3754foreach(split("\n",$code)) {
3755 s/\`([^\`]*)\`/eval($1)/geo;
3756
3757 # constants table endian-specific conversion
3758 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3759 my $conv=$3;
3760 my @bytes=();
3761
3762 # convert to endian-agnostic format
3763 if ($1 eq "long") {
3764 foreach (split(/,\s*/,$2)) {
3765 my $l = /^0/?oct:int;
3766 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3767 }
3768 } else {
3769 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3770 }
3771
3772 # little-endian conversion
3773 if ($flavour =~ /le$/o) {
3774 SWITCH: for($conv) {
3775 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
Steven Valdez909b19f2016-11-21 15:35:44 -05003776 /\?rev/ && do { @bytes=reverse(@bytes); last; };
Steven Valdezbb1ceac2016-10-07 10:34:51 -04003777 }
3778 }
3779
3780 #emit
3781 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3782 next;
3783 }
3784 $consts=0 if (m/Lconsts:/o); # end of table
3785
3786 # instructions prefixed with '?' are endian-specific and need
3787 # to be adjusted accordingly...
3788 if ($flavour =~ /le$/o) { # little-endian
3789 s/le\?//o or
3790 s/be\?/#be#/o or
3791 s/\?lvsr/lvsl/o or
3792 s/\?lvsl/lvsr/o or
3793 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3794 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3795 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3796 } else { # big-endian
3797 s/le\?/#le#/o or
3798 s/be\?//o or
3799 s/\?([a-z]+)/$1/o;
3800 }
3801
3802 print $_,"\n";
3803}
3804
3805close STDOUT;