blob: 62d4842b91b241e7deeb2c4d936ad7b7419a4718 [file] [log] [blame]
Steven Valdezbb1ceac2016-10-07 10:34:51 -04001#! /usr/bin/env perl
Pete Bentley0c61efe2019-08-13 09:32:23 +01002# Copyright 2014-2018 The OpenSSL Project Authors. All Rights Reserved.
Steven Valdezbb1ceac2016-10-07 10:34:51 -04003#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9#
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# This module implements support for AES instructions as per PowerISA
18# specification version 2.07, first implemented by POWER8 processor.
19# The module is endian-agnostic in sense that it supports both big-
20# and little-endian cases. Data alignment in parallelizable modes is
21# handled with VSX loads and stores, which implies MSR.VSX flag being
22# set. It should also be noted that ISA specification doesn't prohibit
23# alignment exceptions for these instructions on page boundaries.
24# Initially alignment was handled in pure AltiVec/VMX way [when data
25# is aligned programmatically, which in turn guarantees exception-
26# free execution], but it turned to hamper performance when vcipher
27# instructions are interleaved. It's reckoned that eventual
28# misalignment penalties at page boundaries are in average lower
29# than additional overhead in pure AltiVec approach.
30#
31# May 2016
32#
33# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34# systems were measured.
35#
36######################################################################
37# Current large-block performance in cycles per byte processed with
38# 128-bit key (less is better).
39#
40# CBC en-/decrypt CTR XTS
41# POWER8[le] 3.96/0.72 0.74 1.1
42# POWER8[be] 3.75/0.65 0.66 1.0
Pete Bentley0c61efe2019-08-13 09:32:23 +010043# POWER9[le] 4.02/0.86 0.84 1.05
44# POWER9[be] 3.99/0.78 0.79 0.97
Steven Valdezbb1ceac2016-10-07 10:34:51 -040045
46$flavour = shift;
47
48if ($flavour =~ /64/) {
49 $SIZE_T =8;
50 $LRSAVE =2*$SIZE_T;
51 $STU ="stdu";
52 $POP ="ld";
53 $PUSH ="std";
54 $UCMP ="cmpld";
55 $SHL ="sldi";
56} elsif ($flavour =~ /32/) {
57 $SIZE_T =4;
58 $LRSAVE =$SIZE_T;
59 $STU ="stwu";
60 $POP ="lwz";
61 $PUSH ="stw";
62 $UCMP ="cmplw";
63 $SHL ="slwi";
64} else { die "nonsense $flavour"; }
65
66$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
67
68$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
69( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
Robert Sloan572a4e22017-04-17 10:52:19 -070070( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or
Steven Valdezbb1ceac2016-10-07 10:34:51 -040071die "can't locate ppc-xlate.pl";
72
73open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
74
75$FRAME=8*$SIZE_T;
76$prefix="aes_hw";
77
78$sp="r1";
79$vrsave="r12";
80
81#########################################################################
82{{{ # Key setup procedures #
83my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
84my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
85my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
86
87$code.=<<___;
88.machine "any"
89
90.text
91
92.align 7
Robert Sloan8ff03552017-06-14 12:40:58 -070093Lrcon:
Steven Valdezbb1ceac2016-10-07 10:34:51 -040094.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
95.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
96.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
97.long 0,0,0,0 ?asis
98Lconsts:
99 mflr r0
100 bcl 20,31,\$+4
101 mflr $ptr #vvvvv "distance between . and rcon
102 addi $ptr,$ptr,-0x48
103 mtlr r0
104 blr
105 .long 0
106 .byte 0,12,0x14,0,0,0,0,0
107.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
108
109.globl .${prefix}_set_encrypt_key
110.align 5
111.${prefix}_set_encrypt_key:
112Lset_encrypt_key:
113 mflr r11
114 $PUSH r11,$LRSAVE($sp)
115
116 li $ptr,-1
117 ${UCMP}i $inp,0
118 beq- Lenc_key_abort # if ($inp==0) return -1;
119 ${UCMP}i $out,0
120 beq- Lenc_key_abort # if ($out==0) return -1;
121 li $ptr,-2
122 cmpwi $bits,128
123 blt- Lenc_key_abort
124 cmpwi $bits,256
125 bgt- Lenc_key_abort
126 andi. r0,$bits,0x3f
127 bne- Lenc_key_abort
128
129 lis r0,0xfff0
130 mfspr $vrsave,256
131 mtspr 256,r0
132
133 bl Lconsts
134 mtlr r11
135
136 neg r9,$inp
137 lvx $in0,0,$inp
138 addi $inp,$inp,15 # 15 is not typo
139 lvsr $key,0,r9 # borrow $key
140 li r8,0x20
141 cmpwi $bits,192
142 lvx $in1,0,$inp
143 le?vspltisb $mask,0x0f # borrow $mask
144 lvx $rcon,0,$ptr
145 le?vxor $key,$key,$mask # adjust for byte swap
146 lvx $mask,r8,$ptr
147 addi $ptr,$ptr,0x10
148 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
149 li $cnt,8
150 vxor $zero,$zero,$zero
151 mtctr $cnt
152
153 ?lvsr $outperm,0,$out
154 vspltisb $outmask,-1
155 lvx $outhead,0,$out
156 ?vperm $outmask,$zero,$outmask,$outperm
157
158 blt Loop128
159 addi $inp,$inp,8
160 beq L192
161 addi $inp,$inp,8
162 b L256
163
164.align 4
165Loop128:
166 vperm $key,$in0,$in0,$mask # rotate-n-splat
167 vsldoi $tmp,$zero,$in0,12 # >>32
168 vperm $outtail,$in0,$in0,$outperm # rotate
169 vsel $stage,$outhead,$outtail,$outmask
170 vmr $outhead,$outtail
171 vcipherlast $key,$key,$rcon
172 stvx $stage,0,$out
173 addi $out,$out,16
174
175 vxor $in0,$in0,$tmp
176 vsldoi $tmp,$zero,$tmp,12 # >>32
177 vxor $in0,$in0,$tmp
178 vsldoi $tmp,$zero,$tmp,12 # >>32
179 vxor $in0,$in0,$tmp
180 vadduwm $rcon,$rcon,$rcon
181 vxor $in0,$in0,$key
182 bdnz Loop128
183
184 lvx $rcon,0,$ptr # last two round keys
185
186 vperm $key,$in0,$in0,$mask # rotate-n-splat
187 vsldoi $tmp,$zero,$in0,12 # >>32
188 vperm $outtail,$in0,$in0,$outperm # rotate
189 vsel $stage,$outhead,$outtail,$outmask
190 vmr $outhead,$outtail
191 vcipherlast $key,$key,$rcon
192 stvx $stage,0,$out
193 addi $out,$out,16
194
195 vxor $in0,$in0,$tmp
196 vsldoi $tmp,$zero,$tmp,12 # >>32
197 vxor $in0,$in0,$tmp
198 vsldoi $tmp,$zero,$tmp,12 # >>32
199 vxor $in0,$in0,$tmp
200 vadduwm $rcon,$rcon,$rcon
201 vxor $in0,$in0,$key
202
203 vperm $key,$in0,$in0,$mask # rotate-n-splat
204 vsldoi $tmp,$zero,$in0,12 # >>32
205 vperm $outtail,$in0,$in0,$outperm # rotate
206 vsel $stage,$outhead,$outtail,$outmask
207 vmr $outhead,$outtail
208 vcipherlast $key,$key,$rcon
209 stvx $stage,0,$out
210 addi $out,$out,16
211
212 vxor $in0,$in0,$tmp
213 vsldoi $tmp,$zero,$tmp,12 # >>32
214 vxor $in0,$in0,$tmp
215 vsldoi $tmp,$zero,$tmp,12 # >>32
216 vxor $in0,$in0,$tmp
217 vxor $in0,$in0,$key
218 vperm $outtail,$in0,$in0,$outperm # rotate
219 vsel $stage,$outhead,$outtail,$outmask
220 vmr $outhead,$outtail
221 stvx $stage,0,$out
222
223 addi $inp,$out,15 # 15 is not typo
224 addi $out,$out,0x50
225
226 li $rounds,10
227 b Ldone
228
229.align 4
230L192:
231 lvx $tmp,0,$inp
232 li $cnt,4
233 vperm $outtail,$in0,$in0,$outperm # rotate
234 vsel $stage,$outhead,$outtail,$outmask
235 vmr $outhead,$outtail
236 stvx $stage,0,$out
237 addi $out,$out,16
238 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
239 vspltisb $key,8 # borrow $key
240 mtctr $cnt
241 vsububm $mask,$mask,$key # adjust the mask
242
243Loop192:
244 vperm $key,$in1,$in1,$mask # roate-n-splat
245 vsldoi $tmp,$zero,$in0,12 # >>32
246 vcipherlast $key,$key,$rcon
247
248 vxor $in0,$in0,$tmp
249 vsldoi $tmp,$zero,$tmp,12 # >>32
250 vxor $in0,$in0,$tmp
251 vsldoi $tmp,$zero,$tmp,12 # >>32
252 vxor $in0,$in0,$tmp
253
254 vsldoi $stage,$zero,$in1,8
255 vspltw $tmp,$in0,3
256 vxor $tmp,$tmp,$in1
257 vsldoi $in1,$zero,$in1,12 # >>32
258 vadduwm $rcon,$rcon,$rcon
259 vxor $in1,$in1,$tmp
260 vxor $in0,$in0,$key
261 vxor $in1,$in1,$key
262 vsldoi $stage,$stage,$in0,8
263
264 vperm $key,$in1,$in1,$mask # rotate-n-splat
265 vsldoi $tmp,$zero,$in0,12 # >>32
266 vperm $outtail,$stage,$stage,$outperm # rotate
267 vsel $stage,$outhead,$outtail,$outmask
268 vmr $outhead,$outtail
269 vcipherlast $key,$key,$rcon
270 stvx $stage,0,$out
271 addi $out,$out,16
272
273 vsldoi $stage,$in0,$in1,8
274 vxor $in0,$in0,$tmp
275 vsldoi $tmp,$zero,$tmp,12 # >>32
276 vperm $outtail,$stage,$stage,$outperm # rotate
277 vsel $stage,$outhead,$outtail,$outmask
278 vmr $outhead,$outtail
279 vxor $in0,$in0,$tmp
280 vsldoi $tmp,$zero,$tmp,12 # >>32
281 vxor $in0,$in0,$tmp
282 stvx $stage,0,$out
283 addi $out,$out,16
284
285 vspltw $tmp,$in0,3
286 vxor $tmp,$tmp,$in1
287 vsldoi $in1,$zero,$in1,12 # >>32
288 vadduwm $rcon,$rcon,$rcon
289 vxor $in1,$in1,$tmp
290 vxor $in0,$in0,$key
291 vxor $in1,$in1,$key
292 vperm $outtail,$in0,$in0,$outperm # rotate
293 vsel $stage,$outhead,$outtail,$outmask
294 vmr $outhead,$outtail
295 stvx $stage,0,$out
296 addi $inp,$out,15 # 15 is not typo
297 addi $out,$out,16
298 bdnz Loop192
299
300 li $rounds,12
301 addi $out,$out,0x20
302 b Ldone
303
304.align 4
305L256:
306 lvx $tmp,0,$inp
307 li $cnt,7
308 li $rounds,14
309 vperm $outtail,$in0,$in0,$outperm # rotate
310 vsel $stage,$outhead,$outtail,$outmask
311 vmr $outhead,$outtail
312 stvx $stage,0,$out
313 addi $out,$out,16
314 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
315 mtctr $cnt
316
317Loop256:
318 vperm $key,$in1,$in1,$mask # rotate-n-splat
319 vsldoi $tmp,$zero,$in0,12 # >>32
320 vperm $outtail,$in1,$in1,$outperm # rotate
321 vsel $stage,$outhead,$outtail,$outmask
322 vmr $outhead,$outtail
323 vcipherlast $key,$key,$rcon
324 stvx $stage,0,$out
325 addi $out,$out,16
326
327 vxor $in0,$in0,$tmp
328 vsldoi $tmp,$zero,$tmp,12 # >>32
329 vxor $in0,$in0,$tmp
330 vsldoi $tmp,$zero,$tmp,12 # >>32
331 vxor $in0,$in0,$tmp
332 vadduwm $rcon,$rcon,$rcon
333 vxor $in0,$in0,$key
334 vperm $outtail,$in0,$in0,$outperm # rotate
335 vsel $stage,$outhead,$outtail,$outmask
336 vmr $outhead,$outtail
337 stvx $stage,0,$out
338 addi $inp,$out,15 # 15 is not typo
339 addi $out,$out,16
340 bdz Ldone
341
342 vspltw $key,$in0,3 # just splat
343 vsldoi $tmp,$zero,$in1,12 # >>32
344 vsbox $key,$key
345
346 vxor $in1,$in1,$tmp
347 vsldoi $tmp,$zero,$tmp,12 # >>32
348 vxor $in1,$in1,$tmp
349 vsldoi $tmp,$zero,$tmp,12 # >>32
350 vxor $in1,$in1,$tmp
351
352 vxor $in1,$in1,$key
353 b Loop256
354
355.align 4
356Ldone:
357 lvx $in1,0,$inp # redundant in aligned case
358 vsel $in1,$outhead,$in1,$outmask
359 stvx $in1,0,$inp
360 li $ptr,0
361 mtspr 256,$vrsave
362 stw $rounds,0($out)
363
364Lenc_key_abort:
365 mr r3,$ptr
366 blr
367 .long 0
368 .byte 0,12,0x14,1,0,0,3,0
369 .long 0
370.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
371
372.globl .${prefix}_set_decrypt_key
373.align 5
374.${prefix}_set_decrypt_key:
375 $STU $sp,-$FRAME($sp)
376 mflr r10
Robert Sloan8ff03552017-06-14 12:40:58 -0700377 $PUSH r10,`$FRAME+$LRSAVE`($sp)
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400378 bl Lset_encrypt_key
379 mtlr r10
380
381 cmpwi r3,0
382 bne- Ldec_key_abort
383
384 slwi $cnt,$rounds,4
385 subi $inp,$out,240 # first round key
386 srwi $rounds,$rounds,1
387 add $out,$inp,$cnt # last round key
388 mtctr $rounds
389
390Ldeckey:
391 lwz r0, 0($inp)
392 lwz r6, 4($inp)
393 lwz r7, 8($inp)
394 lwz r8, 12($inp)
395 addi $inp,$inp,16
396 lwz r9, 0($out)
397 lwz r10,4($out)
398 lwz r11,8($out)
399 lwz r12,12($out)
400 stw r0, 0($out)
401 stw r6, 4($out)
402 stw r7, 8($out)
403 stw r8, 12($out)
404 subi $out,$out,16
405 stw r9, -16($inp)
406 stw r10,-12($inp)
407 stw r11,-8($inp)
408 stw r12,-4($inp)
409 bdnz Ldeckey
410
411 xor r3,r3,r3 # return value
412Ldec_key_abort:
413 addi $sp,$sp,$FRAME
414 blr
415 .long 0
416 .byte 0,12,4,1,0x80,0,3,0
417 .long 0
418.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
419___
420}}}
421#########################################################################
422{{{ # Single block en- and decrypt procedures #
423sub gen_block () {
424my $dir = shift;
425my $n = $dir eq "de" ? "n" : "";
426my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
427
428$code.=<<___;
429.globl .${prefix}_${dir}crypt
430.align 5
431.${prefix}_${dir}crypt:
432 lwz $rounds,240($key)
433 lis r0,0xfc00
434 mfspr $vrsave,256
435 li $idx,15 # 15 is not typo
436 mtspr 256,r0
437
438 lvx v0,0,$inp
439 neg r11,$out
440 lvx v1,$idx,$inp
441 lvsl v2,0,$inp # inpperm
442 le?vspltisb v4,0x0f
443 ?lvsl v3,0,r11 # outperm
444 le?vxor v2,v2,v4
445 li $idx,16
446 vperm v0,v0,v1,v2 # align [and byte swap in LE]
447 lvx v1,0,$key
448 ?lvsl v5,0,$key # keyperm
449 srwi $rounds,$rounds,1
450 lvx v2,$idx,$key
451 addi $idx,$idx,16
452 subi $rounds,$rounds,1
453 ?vperm v1,v1,v2,v5 # align round key
454
455 vxor v0,v0,v1
456 lvx v1,$idx,$key
457 addi $idx,$idx,16
458 mtctr $rounds
459
460Loop_${dir}c:
461 ?vperm v2,v2,v1,v5
462 v${n}cipher v0,v0,v2
463 lvx v2,$idx,$key
464 addi $idx,$idx,16
465 ?vperm v1,v1,v2,v5
466 v${n}cipher v0,v0,v1
467 lvx v1,$idx,$key
468 addi $idx,$idx,16
469 bdnz Loop_${dir}c
470
471 ?vperm v2,v2,v1,v5
472 v${n}cipher v0,v0,v2
473 lvx v2,$idx,$key
474 ?vperm v1,v1,v2,v5
475 v${n}cipherlast v0,v0,v1
476
477 vspltisb v2,-1
478 vxor v1,v1,v1
479 li $idx,15 # 15 is not typo
480 ?vperm v2,v1,v2,v3 # outmask
481 le?vxor v3,v3,v4
482 lvx v1,0,$out # outhead
483 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
484 vsel v1,v1,v0,v2
485 lvx v4,$idx,$out
486 stvx v1,0,$out
487 vsel v0,v0,v4,v2
488 stvx v0,$idx,$out
489
490 mtspr 256,$vrsave
491 blr
492 .long 0
493 .byte 0,12,0x14,0,0,0,3,0
494 .long 0
495.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
496___
497}
498&gen_block("en");
499&gen_block("de");
500}}}
501#########################################################################
502{{{ # CBC en- and decrypt procedures #
503my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
504my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
505my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
506 map("v$_",(4..10));
507$code.=<<___;
508.globl .${prefix}_cbc_encrypt
509.align 5
510.${prefix}_cbc_encrypt:
511 ${UCMP}i $len,16
512 bltlr-
513
514 cmpwi $enc,0 # test direction
515 lis r0,0xffe0
516 mfspr $vrsave,256
517 mtspr 256,r0
518
519 li $idx,15
520 vxor $rndkey0,$rndkey0,$rndkey0
521 le?vspltisb $tmp,0x0f
522
523 lvx $ivec,0,$ivp # load [unaligned] iv
524 lvsl $inpperm,0,$ivp
525 lvx $inptail,$idx,$ivp
526 le?vxor $inpperm,$inpperm,$tmp
527 vperm $ivec,$ivec,$inptail,$inpperm
528
529 neg r11,$inp
530 ?lvsl $keyperm,0,$key # prepare for unaligned key
531 lwz $rounds,240($key)
532
533 lvsr $inpperm,0,r11 # prepare for unaligned load
534 lvx $inptail,0,$inp
535 addi $inp,$inp,15 # 15 is not typo
536 le?vxor $inpperm,$inpperm,$tmp
537
538 ?lvsr $outperm,0,$out # prepare for unaligned store
539 vspltisb $outmask,-1
540 lvx $outhead,0,$out
541 ?vperm $outmask,$rndkey0,$outmask,$outperm
542 le?vxor $outperm,$outperm,$tmp
543
544 srwi $rounds,$rounds,1
545 li $idx,16
546 subi $rounds,$rounds,1
547 beq Lcbc_dec
548
549Lcbc_enc:
550 vmr $inout,$inptail
551 lvx $inptail,0,$inp
552 addi $inp,$inp,16
553 mtctr $rounds
554 subi $len,$len,16 # len-=16
555
556 lvx $rndkey0,0,$key
557 vperm $inout,$inout,$inptail,$inpperm
558 lvx $rndkey1,$idx,$key
559 addi $idx,$idx,16
560 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
561 vxor $inout,$inout,$rndkey0
562 lvx $rndkey0,$idx,$key
563 addi $idx,$idx,16
564 vxor $inout,$inout,$ivec
565
566Loop_cbc_enc:
567 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
568 vcipher $inout,$inout,$rndkey1
569 lvx $rndkey1,$idx,$key
570 addi $idx,$idx,16
571 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
572 vcipher $inout,$inout,$rndkey0
573 lvx $rndkey0,$idx,$key
574 addi $idx,$idx,16
575 bdnz Loop_cbc_enc
576
577 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
578 vcipher $inout,$inout,$rndkey1
579 lvx $rndkey1,$idx,$key
580 li $idx,16
581 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
582 vcipherlast $ivec,$inout,$rndkey0
583 ${UCMP}i $len,16
584
585 vperm $tmp,$ivec,$ivec,$outperm
586 vsel $inout,$outhead,$tmp,$outmask
587 vmr $outhead,$tmp
588 stvx $inout,0,$out
589 addi $out,$out,16
590 bge Lcbc_enc
591
592 b Lcbc_done
593
594.align 4
595Lcbc_dec:
596 ${UCMP}i $len,128
597 bge _aesp8_cbc_decrypt8x
598 vmr $tmp,$inptail
599 lvx $inptail,0,$inp
600 addi $inp,$inp,16
601 mtctr $rounds
602 subi $len,$len,16 # len-=16
603
604 lvx $rndkey0,0,$key
605 vperm $tmp,$tmp,$inptail,$inpperm
606 lvx $rndkey1,$idx,$key
607 addi $idx,$idx,16
608 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
609 vxor $inout,$tmp,$rndkey0
610 lvx $rndkey0,$idx,$key
611 addi $idx,$idx,16
612
613Loop_cbc_dec:
614 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
615 vncipher $inout,$inout,$rndkey1
616 lvx $rndkey1,$idx,$key
617 addi $idx,$idx,16
618 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
619 vncipher $inout,$inout,$rndkey0
620 lvx $rndkey0,$idx,$key
621 addi $idx,$idx,16
622 bdnz Loop_cbc_dec
623
624 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
625 vncipher $inout,$inout,$rndkey1
626 lvx $rndkey1,$idx,$key
627 li $idx,16
628 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
629 vncipherlast $inout,$inout,$rndkey0
630 ${UCMP}i $len,16
631
632 vxor $inout,$inout,$ivec
633 vmr $ivec,$tmp
634 vperm $tmp,$inout,$inout,$outperm
635 vsel $inout,$outhead,$tmp,$outmask
636 vmr $outhead,$tmp
637 stvx $inout,0,$out
638 addi $out,$out,16
639 bge Lcbc_dec
640
641Lcbc_done:
642 addi $out,$out,-1
643 lvx $inout,0,$out # redundant in aligned case
644 vsel $inout,$outhead,$inout,$outmask
645 stvx $inout,0,$out
646
647 neg $enc,$ivp # write [unaligned] iv
648 li $idx,15 # 15 is not typo
649 vxor $rndkey0,$rndkey0,$rndkey0
650 vspltisb $outmask,-1
651 le?vspltisb $tmp,0x0f
652 ?lvsl $outperm,0,$enc
653 ?vperm $outmask,$rndkey0,$outmask,$outperm
654 le?vxor $outperm,$outperm,$tmp
655 lvx $outhead,0,$ivp
656 vperm $ivec,$ivec,$ivec,$outperm
657 vsel $inout,$outhead,$ivec,$outmask
658 lvx $inptail,$idx,$ivp
659 stvx $inout,0,$ivp
660 vsel $inout,$ivec,$inptail,$outmask
661 stvx $inout,$idx,$ivp
662
663 mtspr 256,$vrsave
664 blr
665 .long 0
666 .byte 0,12,0x14,0,0,0,6,0
667 .long 0
668___
669#########################################################################
670{{ # Optimized CBC decrypt procedure #
671my $key_="r11";
672my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
673 $x00=0 if ($flavour =~ /osx/);
674my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
675my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
676my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
677 # v26-v31 last 6 round keys
678my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
679
680$code.=<<___;
681.align 5
682_aesp8_cbc_decrypt8x:
683 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
684 li r10,`$FRAME+8*16+15`
685 li r11,`$FRAME+8*16+31`
686 stvx v20,r10,$sp # ABI says so
687 addi r10,r10,32
688 stvx v21,r11,$sp
689 addi r11,r11,32
690 stvx v22,r10,$sp
691 addi r10,r10,32
692 stvx v23,r11,$sp
693 addi r11,r11,32
694 stvx v24,r10,$sp
695 addi r10,r10,32
696 stvx v25,r11,$sp
697 addi r11,r11,32
698 stvx v26,r10,$sp
699 addi r10,r10,32
700 stvx v27,r11,$sp
701 addi r11,r11,32
702 stvx v28,r10,$sp
703 addi r10,r10,32
704 stvx v29,r11,$sp
705 addi r11,r11,32
706 stvx v30,r10,$sp
707 stvx v31,r11,$sp
708 li r0,-1
709 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
710 li $x10,0x10
711 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
712 li $x20,0x20
713 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
714 li $x30,0x30
715 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
716 li $x40,0x40
717 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
718 li $x50,0x50
719 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
720 li $x60,0x60
721 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
722 li $x70,0x70
723 mtspr 256,r0
724
725 subi $rounds,$rounds,3 # -4 in total
726 subi $len,$len,128 # bias
727
728 lvx $rndkey0,$x00,$key # load key schedule
729 lvx v30,$x10,$key
730 addi $key,$key,0x20
731 lvx v31,$x00,$key
732 ?vperm $rndkey0,$rndkey0,v30,$keyperm
Robert Sloan8ff03552017-06-14 12:40:58 -0700733 addi $key_,$sp,`$FRAME+15`
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400734 mtctr $rounds
735
736Load_cbc_dec_key:
737 ?vperm v24,v30,v31,$keyperm
738 lvx v30,$x10,$key
739 addi $key,$key,0x20
740 stvx v24,$x00,$key_ # off-load round[1]
741 ?vperm v25,v31,v30,$keyperm
742 lvx v31,$x00,$key
743 stvx v25,$x10,$key_ # off-load round[2]
744 addi $key_,$key_,0x20
745 bdnz Load_cbc_dec_key
746
747 lvx v26,$x10,$key
748 ?vperm v24,v30,v31,$keyperm
749 lvx v27,$x20,$key
750 stvx v24,$x00,$key_ # off-load round[3]
751 ?vperm v25,v31,v26,$keyperm
752 lvx v28,$x30,$key
753 stvx v25,$x10,$key_ # off-load round[4]
Robert Sloan8ff03552017-06-14 12:40:58 -0700754 addi $key_,$sp,`$FRAME+15` # rewind $key_
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400755 ?vperm v26,v26,v27,$keyperm
756 lvx v29,$x40,$key
757 ?vperm v27,v27,v28,$keyperm
758 lvx v30,$x50,$key
759 ?vperm v28,v28,v29,$keyperm
760 lvx v31,$x60,$key
761 ?vperm v29,v29,v30,$keyperm
762 lvx $out0,$x70,$key # borrow $out0
763 ?vperm v30,v30,v31,$keyperm
764 lvx v24,$x00,$key_ # pre-load round[1]
765 ?vperm v31,v31,$out0,$keyperm
766 lvx v25,$x10,$key_ # pre-load round[2]
767
768 #lvx $inptail,0,$inp # "caller" already did this
769 #addi $inp,$inp,15 # 15 is not typo
770 subi $inp,$inp,15 # undo "caller"
771
772 le?li $idx,8
773 lvx_u $in0,$x00,$inp # load first 8 "words"
774 le?lvsl $inpperm,0,$idx
775 le?vspltisb $tmp,0x0f
776 lvx_u $in1,$x10,$inp
777 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
778 lvx_u $in2,$x20,$inp
779 le?vperm $in0,$in0,$in0,$inpperm
780 lvx_u $in3,$x30,$inp
781 le?vperm $in1,$in1,$in1,$inpperm
782 lvx_u $in4,$x40,$inp
783 le?vperm $in2,$in2,$in2,$inpperm
784 vxor $out0,$in0,$rndkey0
785 lvx_u $in5,$x50,$inp
786 le?vperm $in3,$in3,$in3,$inpperm
787 vxor $out1,$in1,$rndkey0
788 lvx_u $in6,$x60,$inp
789 le?vperm $in4,$in4,$in4,$inpperm
790 vxor $out2,$in2,$rndkey0
791 lvx_u $in7,$x70,$inp
792 addi $inp,$inp,0x80
793 le?vperm $in5,$in5,$in5,$inpperm
794 vxor $out3,$in3,$rndkey0
795 le?vperm $in6,$in6,$in6,$inpperm
796 vxor $out4,$in4,$rndkey0
797 le?vperm $in7,$in7,$in7,$inpperm
798 vxor $out5,$in5,$rndkey0
799 vxor $out6,$in6,$rndkey0
800 vxor $out7,$in7,$rndkey0
801
802 mtctr $rounds
803 b Loop_cbc_dec8x
804.align 5
805Loop_cbc_dec8x:
806 vncipher $out0,$out0,v24
807 vncipher $out1,$out1,v24
808 vncipher $out2,$out2,v24
809 vncipher $out3,$out3,v24
810 vncipher $out4,$out4,v24
811 vncipher $out5,$out5,v24
812 vncipher $out6,$out6,v24
813 vncipher $out7,$out7,v24
814 lvx v24,$x20,$key_ # round[3]
815 addi $key_,$key_,0x20
816
817 vncipher $out0,$out0,v25
818 vncipher $out1,$out1,v25
819 vncipher $out2,$out2,v25
820 vncipher $out3,$out3,v25
821 vncipher $out4,$out4,v25
822 vncipher $out5,$out5,v25
823 vncipher $out6,$out6,v25
824 vncipher $out7,$out7,v25
825 lvx v25,$x10,$key_ # round[4]
826 bdnz Loop_cbc_dec8x
827
828 subic $len,$len,128 # $len-=128
829 vncipher $out0,$out0,v24
830 vncipher $out1,$out1,v24
831 vncipher $out2,$out2,v24
832 vncipher $out3,$out3,v24
833 vncipher $out4,$out4,v24
834 vncipher $out5,$out5,v24
835 vncipher $out6,$out6,v24
836 vncipher $out7,$out7,v24
837
838 subfe. r0,r0,r0 # borrow?-1:0
839 vncipher $out0,$out0,v25
840 vncipher $out1,$out1,v25
841 vncipher $out2,$out2,v25
842 vncipher $out3,$out3,v25
843 vncipher $out4,$out4,v25
844 vncipher $out5,$out5,v25
845 vncipher $out6,$out6,v25
846 vncipher $out7,$out7,v25
847
848 and r0,r0,$len
849 vncipher $out0,$out0,v26
850 vncipher $out1,$out1,v26
851 vncipher $out2,$out2,v26
852 vncipher $out3,$out3,v26
853 vncipher $out4,$out4,v26
854 vncipher $out5,$out5,v26
855 vncipher $out6,$out6,v26
856 vncipher $out7,$out7,v26
857
858 add $inp,$inp,r0 # $inp is adjusted in such
859 # way that at exit from the
860 # loop inX-in7 are loaded
861 # with last "words"
862 vncipher $out0,$out0,v27
863 vncipher $out1,$out1,v27
864 vncipher $out2,$out2,v27
865 vncipher $out3,$out3,v27
866 vncipher $out4,$out4,v27
867 vncipher $out5,$out5,v27
868 vncipher $out6,$out6,v27
869 vncipher $out7,$out7,v27
870
Robert Sloan8ff03552017-06-14 12:40:58 -0700871 addi $key_,$sp,`$FRAME+15` # rewind $key_
Steven Valdezbb1ceac2016-10-07 10:34:51 -0400872 vncipher $out0,$out0,v28
873 vncipher $out1,$out1,v28
874 vncipher $out2,$out2,v28
875 vncipher $out3,$out3,v28
876 vncipher $out4,$out4,v28
877 vncipher $out5,$out5,v28
878 vncipher $out6,$out6,v28
879 vncipher $out7,$out7,v28
880 lvx v24,$x00,$key_ # re-pre-load round[1]
881
882 vncipher $out0,$out0,v29
883 vncipher $out1,$out1,v29
884 vncipher $out2,$out2,v29
885 vncipher $out3,$out3,v29
886 vncipher $out4,$out4,v29
887 vncipher $out5,$out5,v29
888 vncipher $out6,$out6,v29
889 vncipher $out7,$out7,v29
890 lvx v25,$x10,$key_ # re-pre-load round[2]
891
892 vncipher $out0,$out0,v30
893 vxor $ivec,$ivec,v31 # xor with last round key
894 vncipher $out1,$out1,v30
895 vxor $in0,$in0,v31
896 vncipher $out2,$out2,v30
897 vxor $in1,$in1,v31
898 vncipher $out3,$out3,v30
899 vxor $in2,$in2,v31
900 vncipher $out4,$out4,v30
901 vxor $in3,$in3,v31
902 vncipher $out5,$out5,v30
903 vxor $in4,$in4,v31
904 vncipher $out6,$out6,v30
905 vxor $in5,$in5,v31
906 vncipher $out7,$out7,v30
907 vxor $in6,$in6,v31
908
909 vncipherlast $out0,$out0,$ivec
910 vncipherlast $out1,$out1,$in0
911 lvx_u $in0,$x00,$inp # load next input block
912 vncipherlast $out2,$out2,$in1
913 lvx_u $in1,$x10,$inp
914 vncipherlast $out3,$out3,$in2
915 le?vperm $in0,$in0,$in0,$inpperm
916 lvx_u $in2,$x20,$inp
917 vncipherlast $out4,$out4,$in3
918 le?vperm $in1,$in1,$in1,$inpperm
919 lvx_u $in3,$x30,$inp
920 vncipherlast $out5,$out5,$in4
921 le?vperm $in2,$in2,$in2,$inpperm
922 lvx_u $in4,$x40,$inp
923 vncipherlast $out6,$out6,$in5
924 le?vperm $in3,$in3,$in3,$inpperm
925 lvx_u $in5,$x50,$inp
926 vncipherlast $out7,$out7,$in6
927 le?vperm $in4,$in4,$in4,$inpperm
928 lvx_u $in6,$x60,$inp
929 vmr $ivec,$in7
930 le?vperm $in5,$in5,$in5,$inpperm
931 lvx_u $in7,$x70,$inp
932 addi $inp,$inp,0x80
933
934 le?vperm $out0,$out0,$out0,$inpperm
935 le?vperm $out1,$out1,$out1,$inpperm
936 stvx_u $out0,$x00,$out
937 le?vperm $in6,$in6,$in6,$inpperm
938 vxor $out0,$in0,$rndkey0
939 le?vperm $out2,$out2,$out2,$inpperm
940 stvx_u $out1,$x10,$out
941 le?vperm $in7,$in7,$in7,$inpperm
942 vxor $out1,$in1,$rndkey0
943 le?vperm $out3,$out3,$out3,$inpperm
944 stvx_u $out2,$x20,$out
945 vxor $out2,$in2,$rndkey0
946 le?vperm $out4,$out4,$out4,$inpperm
947 stvx_u $out3,$x30,$out
948 vxor $out3,$in3,$rndkey0
949 le?vperm $out5,$out5,$out5,$inpperm
950 stvx_u $out4,$x40,$out
951 vxor $out4,$in4,$rndkey0
952 le?vperm $out6,$out6,$out6,$inpperm
953 stvx_u $out5,$x50,$out
954 vxor $out5,$in5,$rndkey0
955 le?vperm $out7,$out7,$out7,$inpperm
956 stvx_u $out6,$x60,$out
957 vxor $out6,$in6,$rndkey0
958 stvx_u $out7,$x70,$out
959 addi $out,$out,0x80
960 vxor $out7,$in7,$rndkey0
961
962 mtctr $rounds
963 beq Loop_cbc_dec8x # did $len-=128 borrow?
964
965 addic. $len,$len,128
966 beq Lcbc_dec8x_done
967 nop
968 nop
969
970Loop_cbc_dec8x_tail: # up to 7 "words" tail...
971 vncipher $out1,$out1,v24
972 vncipher $out2,$out2,v24
973 vncipher $out3,$out3,v24
974 vncipher $out4,$out4,v24
975 vncipher $out5,$out5,v24
976 vncipher $out6,$out6,v24
977 vncipher $out7,$out7,v24
978 lvx v24,$x20,$key_ # round[3]
979 addi $key_,$key_,0x20
980
981 vncipher $out1,$out1,v25
982 vncipher $out2,$out2,v25
983 vncipher $out3,$out3,v25
984 vncipher $out4,$out4,v25
985 vncipher $out5,$out5,v25
986 vncipher $out6,$out6,v25
987 vncipher $out7,$out7,v25
988 lvx v25,$x10,$key_ # round[4]
989 bdnz Loop_cbc_dec8x_tail
990
991 vncipher $out1,$out1,v24
992 vncipher $out2,$out2,v24
993 vncipher $out3,$out3,v24
994 vncipher $out4,$out4,v24
995 vncipher $out5,$out5,v24
996 vncipher $out6,$out6,v24
997 vncipher $out7,$out7,v24
998
999 vncipher $out1,$out1,v25
1000 vncipher $out2,$out2,v25
1001 vncipher $out3,$out3,v25
1002 vncipher $out4,$out4,v25
1003 vncipher $out5,$out5,v25
1004 vncipher $out6,$out6,v25
1005 vncipher $out7,$out7,v25
1006
1007 vncipher $out1,$out1,v26
1008 vncipher $out2,$out2,v26
1009 vncipher $out3,$out3,v26
1010 vncipher $out4,$out4,v26
1011 vncipher $out5,$out5,v26
1012 vncipher $out6,$out6,v26
1013 vncipher $out7,$out7,v26
1014
1015 vncipher $out1,$out1,v27
1016 vncipher $out2,$out2,v27
1017 vncipher $out3,$out3,v27
1018 vncipher $out4,$out4,v27
1019 vncipher $out5,$out5,v27
1020 vncipher $out6,$out6,v27
1021 vncipher $out7,$out7,v27
1022
1023 vncipher $out1,$out1,v28
1024 vncipher $out2,$out2,v28
1025 vncipher $out3,$out3,v28
1026 vncipher $out4,$out4,v28
1027 vncipher $out5,$out5,v28
1028 vncipher $out6,$out6,v28
1029 vncipher $out7,$out7,v28
1030
1031 vncipher $out1,$out1,v29
1032 vncipher $out2,$out2,v29
1033 vncipher $out3,$out3,v29
1034 vncipher $out4,$out4,v29
1035 vncipher $out5,$out5,v29
1036 vncipher $out6,$out6,v29
1037 vncipher $out7,$out7,v29
1038
1039 vncipher $out1,$out1,v30
1040 vxor $ivec,$ivec,v31 # last round key
1041 vncipher $out2,$out2,v30
1042 vxor $in1,$in1,v31
1043 vncipher $out3,$out3,v30
1044 vxor $in2,$in2,v31
1045 vncipher $out4,$out4,v30
1046 vxor $in3,$in3,v31
1047 vncipher $out5,$out5,v30
1048 vxor $in4,$in4,v31
1049 vncipher $out6,$out6,v30
1050 vxor $in5,$in5,v31
1051 vncipher $out7,$out7,v30
1052 vxor $in6,$in6,v31
1053
1054 cmplwi $len,32 # switch($len)
1055 blt Lcbc_dec8x_one
1056 nop
1057 beq Lcbc_dec8x_two
1058 cmplwi $len,64
1059 blt Lcbc_dec8x_three
1060 nop
1061 beq Lcbc_dec8x_four
1062 cmplwi $len,96
1063 blt Lcbc_dec8x_five
1064 nop
1065 beq Lcbc_dec8x_six
1066
1067Lcbc_dec8x_seven:
1068 vncipherlast $out1,$out1,$ivec
1069 vncipherlast $out2,$out2,$in1
1070 vncipherlast $out3,$out3,$in2
1071 vncipherlast $out4,$out4,$in3
1072 vncipherlast $out5,$out5,$in4
1073 vncipherlast $out6,$out6,$in5
1074 vncipherlast $out7,$out7,$in6
1075 vmr $ivec,$in7
1076
1077 le?vperm $out1,$out1,$out1,$inpperm
1078 le?vperm $out2,$out2,$out2,$inpperm
1079 stvx_u $out1,$x00,$out
1080 le?vperm $out3,$out3,$out3,$inpperm
1081 stvx_u $out2,$x10,$out
1082 le?vperm $out4,$out4,$out4,$inpperm
1083 stvx_u $out3,$x20,$out
1084 le?vperm $out5,$out5,$out5,$inpperm
1085 stvx_u $out4,$x30,$out
1086 le?vperm $out6,$out6,$out6,$inpperm
1087 stvx_u $out5,$x40,$out
1088 le?vperm $out7,$out7,$out7,$inpperm
1089 stvx_u $out6,$x50,$out
1090 stvx_u $out7,$x60,$out
1091 addi $out,$out,0x70
1092 b Lcbc_dec8x_done
1093
1094.align 5
1095Lcbc_dec8x_six:
1096 vncipherlast $out2,$out2,$ivec
1097 vncipherlast $out3,$out3,$in2
1098 vncipherlast $out4,$out4,$in3
1099 vncipherlast $out5,$out5,$in4
1100 vncipherlast $out6,$out6,$in5
1101 vncipherlast $out7,$out7,$in6
1102 vmr $ivec,$in7
1103
1104 le?vperm $out2,$out2,$out2,$inpperm
1105 le?vperm $out3,$out3,$out3,$inpperm
1106 stvx_u $out2,$x00,$out
1107 le?vperm $out4,$out4,$out4,$inpperm
1108 stvx_u $out3,$x10,$out
1109 le?vperm $out5,$out5,$out5,$inpperm
1110 stvx_u $out4,$x20,$out
1111 le?vperm $out6,$out6,$out6,$inpperm
1112 stvx_u $out5,$x30,$out
1113 le?vperm $out7,$out7,$out7,$inpperm
1114 stvx_u $out6,$x40,$out
1115 stvx_u $out7,$x50,$out
1116 addi $out,$out,0x60
1117 b Lcbc_dec8x_done
1118
1119.align 5
1120Lcbc_dec8x_five:
1121 vncipherlast $out3,$out3,$ivec
1122 vncipherlast $out4,$out4,$in3
1123 vncipherlast $out5,$out5,$in4
1124 vncipherlast $out6,$out6,$in5
1125 vncipherlast $out7,$out7,$in6
1126 vmr $ivec,$in7
1127
1128 le?vperm $out3,$out3,$out3,$inpperm
1129 le?vperm $out4,$out4,$out4,$inpperm
1130 stvx_u $out3,$x00,$out
1131 le?vperm $out5,$out5,$out5,$inpperm
1132 stvx_u $out4,$x10,$out
1133 le?vperm $out6,$out6,$out6,$inpperm
1134 stvx_u $out5,$x20,$out
1135 le?vperm $out7,$out7,$out7,$inpperm
1136 stvx_u $out6,$x30,$out
1137 stvx_u $out7,$x40,$out
1138 addi $out,$out,0x50
1139 b Lcbc_dec8x_done
1140
1141.align 5
1142Lcbc_dec8x_four:
1143 vncipherlast $out4,$out4,$ivec
1144 vncipherlast $out5,$out5,$in4
1145 vncipherlast $out6,$out6,$in5
1146 vncipherlast $out7,$out7,$in6
1147 vmr $ivec,$in7
1148
1149 le?vperm $out4,$out4,$out4,$inpperm
1150 le?vperm $out5,$out5,$out5,$inpperm
1151 stvx_u $out4,$x00,$out
1152 le?vperm $out6,$out6,$out6,$inpperm
1153 stvx_u $out5,$x10,$out
1154 le?vperm $out7,$out7,$out7,$inpperm
1155 stvx_u $out6,$x20,$out
1156 stvx_u $out7,$x30,$out
1157 addi $out,$out,0x40
1158 b Lcbc_dec8x_done
1159
1160.align 5
1161Lcbc_dec8x_three:
1162 vncipherlast $out5,$out5,$ivec
1163 vncipherlast $out6,$out6,$in5
1164 vncipherlast $out7,$out7,$in6
1165 vmr $ivec,$in7
1166
1167 le?vperm $out5,$out5,$out5,$inpperm
1168 le?vperm $out6,$out6,$out6,$inpperm
1169 stvx_u $out5,$x00,$out
1170 le?vperm $out7,$out7,$out7,$inpperm
1171 stvx_u $out6,$x10,$out
1172 stvx_u $out7,$x20,$out
1173 addi $out,$out,0x30
1174 b Lcbc_dec8x_done
1175
1176.align 5
1177Lcbc_dec8x_two:
1178 vncipherlast $out6,$out6,$ivec
1179 vncipherlast $out7,$out7,$in6
1180 vmr $ivec,$in7
1181
1182 le?vperm $out6,$out6,$out6,$inpperm
1183 le?vperm $out7,$out7,$out7,$inpperm
1184 stvx_u $out6,$x00,$out
1185 stvx_u $out7,$x10,$out
1186 addi $out,$out,0x20
1187 b Lcbc_dec8x_done
1188
1189.align 5
1190Lcbc_dec8x_one:
1191 vncipherlast $out7,$out7,$ivec
1192 vmr $ivec,$in7
1193
1194 le?vperm $out7,$out7,$out7,$inpperm
1195 stvx_u $out7,0,$out
1196 addi $out,$out,0x10
1197
1198Lcbc_dec8x_done:
1199 le?vperm $ivec,$ivec,$ivec,$inpperm
1200 stvx_u $ivec,0,$ivp # write [unaligned] iv
1201
1202 li r10,`$FRAME+15`
1203 li r11,`$FRAME+31`
1204 stvx $inpperm,r10,$sp # wipe copies of round keys
1205 addi r10,r10,32
1206 stvx $inpperm,r11,$sp
1207 addi r11,r11,32
1208 stvx $inpperm,r10,$sp
1209 addi r10,r10,32
1210 stvx $inpperm,r11,$sp
1211 addi r11,r11,32
1212 stvx $inpperm,r10,$sp
1213 addi r10,r10,32
1214 stvx $inpperm,r11,$sp
1215 addi r11,r11,32
1216 stvx $inpperm,r10,$sp
1217 addi r10,r10,32
1218 stvx $inpperm,r11,$sp
1219 addi r11,r11,32
1220
1221 mtspr 256,$vrsave
1222 lvx v20,r10,$sp # ABI says so
1223 addi r10,r10,32
1224 lvx v21,r11,$sp
1225 addi r11,r11,32
1226 lvx v22,r10,$sp
1227 addi r10,r10,32
1228 lvx v23,r11,$sp
1229 addi r11,r11,32
1230 lvx v24,r10,$sp
1231 addi r10,r10,32
1232 lvx v25,r11,$sp
1233 addi r11,r11,32
1234 lvx v26,r10,$sp
1235 addi r10,r10,32
1236 lvx v27,r11,$sp
1237 addi r11,r11,32
1238 lvx v28,r10,$sp
1239 addi r10,r10,32
1240 lvx v29,r11,$sp
1241 addi r11,r11,32
1242 lvx v30,r10,$sp
1243 lvx v31,r11,$sp
1244 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1245 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1246 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1247 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1248 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1249 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1250 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1251 blr
1252 .long 0
1253 .byte 0,12,0x04,0,0x80,6,6,0
1254 .long 0
1255.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1256___
1257}} }}}
1258
1259#########################################################################
1260{{{ # CTR procedure[s] #
1261my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1262my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1263my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1264 map("v$_",(4..11));
1265my $dat=$tmp;
1266
1267$code.=<<___;
1268.globl .${prefix}_ctr32_encrypt_blocks
1269.align 5
1270.${prefix}_ctr32_encrypt_blocks:
1271 ${UCMP}i $len,1
1272 bltlr-
1273
1274 lis r0,0xfff0
1275 mfspr $vrsave,256
1276 mtspr 256,r0
1277
1278 li $idx,15
1279 vxor $rndkey0,$rndkey0,$rndkey0
1280 le?vspltisb $tmp,0x0f
1281
1282 lvx $ivec,0,$ivp # load [unaligned] iv
1283 lvsl $inpperm,0,$ivp
1284 lvx $inptail,$idx,$ivp
1285 vspltisb $one,1
1286 le?vxor $inpperm,$inpperm,$tmp
1287 vperm $ivec,$ivec,$inptail,$inpperm
1288 vsldoi $one,$rndkey0,$one,1
1289
1290 neg r11,$inp
1291 ?lvsl $keyperm,0,$key # prepare for unaligned key
1292 lwz $rounds,240($key)
1293
1294 lvsr $inpperm,0,r11 # prepare for unaligned load
1295 lvx $inptail,0,$inp
1296 addi $inp,$inp,15 # 15 is not typo
1297 le?vxor $inpperm,$inpperm,$tmp
1298
1299 srwi $rounds,$rounds,1
1300 li $idx,16
1301 subi $rounds,$rounds,1
1302
1303 ${UCMP}i $len,8
1304 bge _aesp8_ctr32_encrypt8x
1305
1306 ?lvsr $outperm,0,$out # prepare for unaligned store
1307 vspltisb $outmask,-1
1308 lvx $outhead,0,$out
1309 ?vperm $outmask,$rndkey0,$outmask,$outperm
1310 le?vxor $outperm,$outperm,$tmp
1311
1312 lvx $rndkey0,0,$key
1313 mtctr $rounds
1314 lvx $rndkey1,$idx,$key
1315 addi $idx,$idx,16
1316 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1317 vxor $inout,$ivec,$rndkey0
1318 lvx $rndkey0,$idx,$key
1319 addi $idx,$idx,16
1320 b Loop_ctr32_enc
1321
1322.align 5
1323Loop_ctr32_enc:
1324 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1325 vcipher $inout,$inout,$rndkey1
1326 lvx $rndkey1,$idx,$key
1327 addi $idx,$idx,16
1328 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1329 vcipher $inout,$inout,$rndkey0
1330 lvx $rndkey0,$idx,$key
1331 addi $idx,$idx,16
1332 bdnz Loop_ctr32_enc
1333
1334 vadduwm $ivec,$ivec,$one
1335 vmr $dat,$inptail
1336 lvx $inptail,0,$inp
1337 addi $inp,$inp,16
1338 subic. $len,$len,1 # blocks--
1339
1340 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1341 vcipher $inout,$inout,$rndkey1
1342 lvx $rndkey1,$idx,$key
1343 vperm $dat,$dat,$inptail,$inpperm
1344 li $idx,16
1345 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1346 lvx $rndkey0,0,$key
1347 vxor $dat,$dat,$rndkey1 # last round key
1348 vcipherlast $inout,$inout,$dat
1349
1350 lvx $rndkey1,$idx,$key
1351 addi $idx,$idx,16
1352 vperm $inout,$inout,$inout,$outperm
1353 vsel $dat,$outhead,$inout,$outmask
1354 mtctr $rounds
1355 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1356 vmr $outhead,$inout
1357 vxor $inout,$ivec,$rndkey0
1358 lvx $rndkey0,$idx,$key
1359 addi $idx,$idx,16
1360 stvx $dat,0,$out
1361 addi $out,$out,16
1362 bne Loop_ctr32_enc
1363
1364 addi $out,$out,-1
1365 lvx $inout,0,$out # redundant in aligned case
1366 vsel $inout,$outhead,$inout,$outmask
1367 stvx $inout,0,$out
1368
1369 mtspr 256,$vrsave
1370 blr
1371 .long 0
1372 .byte 0,12,0x14,0,0,0,6,0
1373 .long 0
1374___
1375#########################################################################
1376{{ # Optimized CTR procedure #
1377my $key_="r11";
1378my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1379 $x00=0 if ($flavour =~ /osx/);
1380my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1381my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1382my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1383 # v26-v31 last 6 round keys
1384my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1385my ($two,$three,$four)=($outhead,$outperm,$outmask);
1386
1387$code.=<<___;
1388.align 5
1389_aesp8_ctr32_encrypt8x:
1390 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1391 li r10,`$FRAME+8*16+15`
1392 li r11,`$FRAME+8*16+31`
1393 stvx v20,r10,$sp # ABI says so
1394 addi r10,r10,32
1395 stvx v21,r11,$sp
1396 addi r11,r11,32
1397 stvx v22,r10,$sp
1398 addi r10,r10,32
1399 stvx v23,r11,$sp
1400 addi r11,r11,32
1401 stvx v24,r10,$sp
1402 addi r10,r10,32
1403 stvx v25,r11,$sp
1404 addi r11,r11,32
1405 stvx v26,r10,$sp
1406 addi r10,r10,32
1407 stvx v27,r11,$sp
1408 addi r11,r11,32
1409 stvx v28,r10,$sp
1410 addi r10,r10,32
1411 stvx v29,r11,$sp
1412 addi r11,r11,32
1413 stvx v30,r10,$sp
1414 stvx v31,r11,$sp
1415 li r0,-1
1416 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1417 li $x10,0x10
1418 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1419 li $x20,0x20
1420 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1421 li $x30,0x30
1422 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1423 li $x40,0x40
1424 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1425 li $x50,0x50
1426 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1427 li $x60,0x60
1428 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1429 li $x70,0x70
1430 mtspr 256,r0
1431
1432 subi $rounds,$rounds,3 # -4 in total
1433
1434 lvx $rndkey0,$x00,$key # load key schedule
1435 lvx v30,$x10,$key
1436 addi $key,$key,0x20
1437 lvx v31,$x00,$key
1438 ?vperm $rndkey0,$rndkey0,v30,$keyperm
Robert Sloan8ff03552017-06-14 12:40:58 -07001439 addi $key_,$sp,`$FRAME+15`
Steven Valdezbb1ceac2016-10-07 10:34:51 -04001440 mtctr $rounds
1441
1442Load_ctr32_enc_key:
1443 ?vperm v24,v30,v31,$keyperm
1444 lvx v30,$x10,$key
1445 addi $key,$key,0x20
1446 stvx v24,$x00,$key_ # off-load round[1]
1447 ?vperm v25,v31,v30,$keyperm
1448 lvx v31,$x00,$key
1449 stvx v25,$x10,$key_ # off-load round[2]
1450 addi $key_,$key_,0x20
1451 bdnz Load_ctr32_enc_key
1452
1453 lvx v26,$x10,$key
1454 ?vperm v24,v30,v31,$keyperm
1455 lvx v27,$x20,$key
1456 stvx v24,$x00,$key_ # off-load round[3]
1457 ?vperm v25,v31,v26,$keyperm
1458 lvx v28,$x30,$key
1459 stvx v25,$x10,$key_ # off-load round[4]
Robert Sloan8ff03552017-06-14 12:40:58 -07001460 addi $key_,$sp,`$FRAME+15` # rewind $key_
Steven Valdezbb1ceac2016-10-07 10:34:51 -04001461 ?vperm v26,v26,v27,$keyperm
1462 lvx v29,$x40,$key
1463 ?vperm v27,v27,v28,$keyperm
1464 lvx v30,$x50,$key
1465 ?vperm v28,v28,v29,$keyperm
1466 lvx v31,$x60,$key
1467 ?vperm v29,v29,v30,$keyperm
1468 lvx $out0,$x70,$key # borrow $out0
1469 ?vperm v30,v30,v31,$keyperm
1470 lvx v24,$x00,$key_ # pre-load round[1]
1471 ?vperm v31,v31,$out0,$keyperm
1472 lvx v25,$x10,$key_ # pre-load round[2]
1473
1474 vadduwm $two,$one,$one
1475 subi $inp,$inp,15 # undo "caller"
1476 $SHL $len,$len,4
1477
1478 vadduwm $out1,$ivec,$one # counter values ...
1479 vadduwm $out2,$ivec,$two
1480 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1481 le?li $idx,8
1482 vadduwm $out3,$out1,$two
1483 vxor $out1,$out1,$rndkey0
1484 le?lvsl $inpperm,0,$idx
1485 vadduwm $out4,$out2,$two
1486 vxor $out2,$out2,$rndkey0
1487 le?vspltisb $tmp,0x0f
1488 vadduwm $out5,$out3,$two
1489 vxor $out3,$out3,$rndkey0
1490 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1491 vadduwm $out6,$out4,$two
1492 vxor $out4,$out4,$rndkey0
1493 vadduwm $out7,$out5,$two
1494 vxor $out5,$out5,$rndkey0
1495 vadduwm $ivec,$out6,$two # next counter value
1496 vxor $out6,$out6,$rndkey0
1497 vxor $out7,$out7,$rndkey0
1498
1499 mtctr $rounds
1500 b Loop_ctr32_enc8x
1501.align 5
1502Loop_ctr32_enc8x:
1503 vcipher $out0,$out0,v24
1504 vcipher $out1,$out1,v24
1505 vcipher $out2,$out2,v24
1506 vcipher $out3,$out3,v24
1507 vcipher $out4,$out4,v24
1508 vcipher $out5,$out5,v24
1509 vcipher $out6,$out6,v24
1510 vcipher $out7,$out7,v24
1511Loop_ctr32_enc8x_middle:
1512 lvx v24,$x20,$key_ # round[3]
1513 addi $key_,$key_,0x20
1514
1515 vcipher $out0,$out0,v25
1516 vcipher $out1,$out1,v25
1517 vcipher $out2,$out2,v25
1518 vcipher $out3,$out3,v25
1519 vcipher $out4,$out4,v25
1520 vcipher $out5,$out5,v25
1521 vcipher $out6,$out6,v25
1522 vcipher $out7,$out7,v25
1523 lvx v25,$x10,$key_ # round[4]
1524 bdnz Loop_ctr32_enc8x
1525
1526 subic r11,$len,256 # $len-256, borrow $key_
1527 vcipher $out0,$out0,v24
1528 vcipher $out1,$out1,v24
1529 vcipher $out2,$out2,v24
1530 vcipher $out3,$out3,v24
1531 vcipher $out4,$out4,v24
1532 vcipher $out5,$out5,v24
1533 vcipher $out6,$out6,v24
1534 vcipher $out7,$out7,v24
1535
1536 subfe r0,r0,r0 # borrow?-1:0
1537 vcipher $out0,$out0,v25
1538 vcipher $out1,$out1,v25
1539 vcipher $out2,$out2,v25
1540 vcipher $out3,$out3,v25
1541 vcipher $out4,$out4,v25
1542 vcipher $out5,$out5,v25
1543 vcipher $out6,$out6,v25
1544 vcipher $out7,$out7,v25
1545
1546 and r0,r0,r11
Robert Sloan8ff03552017-06-14 12:40:58 -07001547 addi $key_,$sp,`$FRAME+15` # rewind $key_
Steven Valdezbb1ceac2016-10-07 10:34:51 -04001548 vcipher $out0,$out0,v26
1549 vcipher $out1,$out1,v26
1550 vcipher $out2,$out2,v26
1551 vcipher $out3,$out3,v26
1552 vcipher $out4,$out4,v26
1553 vcipher $out5,$out5,v26
1554 vcipher $out6,$out6,v26
1555 vcipher $out7,$out7,v26
1556 lvx v24,$x00,$key_ # re-pre-load round[1]
1557
1558 subic $len,$len,129 # $len-=129
1559 vcipher $out0,$out0,v27
1560 addi $len,$len,1 # $len-=128 really
1561 vcipher $out1,$out1,v27
1562 vcipher $out2,$out2,v27
1563 vcipher $out3,$out3,v27
1564 vcipher $out4,$out4,v27
1565 vcipher $out5,$out5,v27
1566 vcipher $out6,$out6,v27
1567 vcipher $out7,$out7,v27
1568 lvx v25,$x10,$key_ # re-pre-load round[2]
1569
1570 vcipher $out0,$out0,v28
1571 lvx_u $in0,$x00,$inp # load input
1572 vcipher $out1,$out1,v28
1573 lvx_u $in1,$x10,$inp
1574 vcipher $out2,$out2,v28
1575 lvx_u $in2,$x20,$inp
1576 vcipher $out3,$out3,v28
1577 lvx_u $in3,$x30,$inp
1578 vcipher $out4,$out4,v28
1579 lvx_u $in4,$x40,$inp
1580 vcipher $out5,$out5,v28
1581 lvx_u $in5,$x50,$inp
1582 vcipher $out6,$out6,v28
1583 lvx_u $in6,$x60,$inp
1584 vcipher $out7,$out7,v28
1585 lvx_u $in7,$x70,$inp
1586 addi $inp,$inp,0x80
1587
1588 vcipher $out0,$out0,v29
1589 le?vperm $in0,$in0,$in0,$inpperm
1590 vcipher $out1,$out1,v29
1591 le?vperm $in1,$in1,$in1,$inpperm
1592 vcipher $out2,$out2,v29
1593 le?vperm $in2,$in2,$in2,$inpperm
1594 vcipher $out3,$out3,v29
1595 le?vperm $in3,$in3,$in3,$inpperm
1596 vcipher $out4,$out4,v29
1597 le?vperm $in4,$in4,$in4,$inpperm
1598 vcipher $out5,$out5,v29
1599 le?vperm $in5,$in5,$in5,$inpperm
1600 vcipher $out6,$out6,v29
1601 le?vperm $in6,$in6,$in6,$inpperm
1602 vcipher $out7,$out7,v29
1603 le?vperm $in7,$in7,$in7,$inpperm
1604
1605 add $inp,$inp,r0 # $inp is adjusted in such
1606 # way that at exit from the
1607 # loop inX-in7 are loaded
1608 # with last "words"
1609 subfe. r0,r0,r0 # borrow?-1:0
1610 vcipher $out0,$out0,v30
1611 vxor $in0,$in0,v31 # xor with last round key
1612 vcipher $out1,$out1,v30
1613 vxor $in1,$in1,v31
1614 vcipher $out2,$out2,v30
1615 vxor $in2,$in2,v31
1616 vcipher $out3,$out3,v30
1617 vxor $in3,$in3,v31
1618 vcipher $out4,$out4,v30
1619 vxor $in4,$in4,v31
1620 vcipher $out5,$out5,v30
1621 vxor $in5,$in5,v31
1622 vcipher $out6,$out6,v30
1623 vxor $in6,$in6,v31
1624 vcipher $out7,$out7,v30
1625 vxor $in7,$in7,v31
1626
1627 bne Lctr32_enc8x_break # did $len-129 borrow?
1628
1629 vcipherlast $in0,$out0,$in0
1630 vcipherlast $in1,$out1,$in1
1631 vadduwm $out1,$ivec,$one # counter values ...
1632 vcipherlast $in2,$out2,$in2
1633 vadduwm $out2,$ivec,$two
1634 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1635 vcipherlast $in3,$out3,$in3
1636 vadduwm $out3,$out1,$two
1637 vxor $out1,$out1,$rndkey0
1638 vcipherlast $in4,$out4,$in4
1639 vadduwm $out4,$out2,$two
1640 vxor $out2,$out2,$rndkey0
1641 vcipherlast $in5,$out5,$in5
1642 vadduwm $out5,$out3,$two
1643 vxor $out3,$out3,$rndkey0
1644 vcipherlast $in6,$out6,$in6
1645 vadduwm $out6,$out4,$two
1646 vxor $out4,$out4,$rndkey0
1647 vcipherlast $in7,$out7,$in7
1648 vadduwm $out7,$out5,$two
1649 vxor $out5,$out5,$rndkey0
1650 le?vperm $in0,$in0,$in0,$inpperm
1651 vadduwm $ivec,$out6,$two # next counter value
1652 vxor $out6,$out6,$rndkey0
1653 le?vperm $in1,$in1,$in1,$inpperm
1654 vxor $out7,$out7,$rndkey0
1655 mtctr $rounds
1656
1657 vcipher $out0,$out0,v24
1658 stvx_u $in0,$x00,$out
1659 le?vperm $in2,$in2,$in2,$inpperm
1660 vcipher $out1,$out1,v24
1661 stvx_u $in1,$x10,$out
1662 le?vperm $in3,$in3,$in3,$inpperm
1663 vcipher $out2,$out2,v24
1664 stvx_u $in2,$x20,$out
1665 le?vperm $in4,$in4,$in4,$inpperm
1666 vcipher $out3,$out3,v24
1667 stvx_u $in3,$x30,$out
1668 le?vperm $in5,$in5,$in5,$inpperm
1669 vcipher $out4,$out4,v24
1670 stvx_u $in4,$x40,$out
1671 le?vperm $in6,$in6,$in6,$inpperm
1672 vcipher $out5,$out5,v24
1673 stvx_u $in5,$x50,$out
1674 le?vperm $in7,$in7,$in7,$inpperm
1675 vcipher $out6,$out6,v24
1676 stvx_u $in6,$x60,$out
1677 vcipher $out7,$out7,v24
1678 stvx_u $in7,$x70,$out
1679 addi $out,$out,0x80
1680
1681 b Loop_ctr32_enc8x_middle
1682
1683.align 5
1684Lctr32_enc8x_break:
1685 cmpwi $len,-0x60
1686 blt Lctr32_enc8x_one
1687 nop
1688 beq Lctr32_enc8x_two
1689 cmpwi $len,-0x40
1690 blt Lctr32_enc8x_three
1691 nop
1692 beq Lctr32_enc8x_four
1693 cmpwi $len,-0x20
1694 blt Lctr32_enc8x_five
1695 nop
1696 beq Lctr32_enc8x_six
1697 cmpwi $len,0x00
1698 blt Lctr32_enc8x_seven
1699
1700Lctr32_enc8x_eight:
1701 vcipherlast $out0,$out0,$in0
1702 vcipherlast $out1,$out1,$in1
1703 vcipherlast $out2,$out2,$in2
1704 vcipherlast $out3,$out3,$in3
1705 vcipherlast $out4,$out4,$in4
1706 vcipherlast $out5,$out5,$in5
1707 vcipherlast $out6,$out6,$in6
1708 vcipherlast $out7,$out7,$in7
1709
1710 le?vperm $out0,$out0,$out0,$inpperm
1711 le?vperm $out1,$out1,$out1,$inpperm
1712 stvx_u $out0,$x00,$out
1713 le?vperm $out2,$out2,$out2,$inpperm
1714 stvx_u $out1,$x10,$out
1715 le?vperm $out3,$out3,$out3,$inpperm
1716 stvx_u $out2,$x20,$out
1717 le?vperm $out4,$out4,$out4,$inpperm
1718 stvx_u $out3,$x30,$out
1719 le?vperm $out5,$out5,$out5,$inpperm
1720 stvx_u $out4,$x40,$out
1721 le?vperm $out6,$out6,$out6,$inpperm
1722 stvx_u $out5,$x50,$out
1723 le?vperm $out7,$out7,$out7,$inpperm
1724 stvx_u $out6,$x60,$out
1725 stvx_u $out7,$x70,$out
1726 addi $out,$out,0x80
1727 b Lctr32_enc8x_done
1728
1729.align 5
1730Lctr32_enc8x_seven:
1731 vcipherlast $out0,$out0,$in1
1732 vcipherlast $out1,$out1,$in2
1733 vcipherlast $out2,$out2,$in3
1734 vcipherlast $out3,$out3,$in4
1735 vcipherlast $out4,$out4,$in5
1736 vcipherlast $out5,$out5,$in6
1737 vcipherlast $out6,$out6,$in7
1738
1739 le?vperm $out0,$out0,$out0,$inpperm
1740 le?vperm $out1,$out1,$out1,$inpperm
1741 stvx_u $out0,$x00,$out
1742 le?vperm $out2,$out2,$out2,$inpperm
1743 stvx_u $out1,$x10,$out
1744 le?vperm $out3,$out3,$out3,$inpperm
1745 stvx_u $out2,$x20,$out
1746 le?vperm $out4,$out4,$out4,$inpperm
1747 stvx_u $out3,$x30,$out
1748 le?vperm $out5,$out5,$out5,$inpperm
1749 stvx_u $out4,$x40,$out
1750 le?vperm $out6,$out6,$out6,$inpperm
1751 stvx_u $out5,$x50,$out
1752 stvx_u $out6,$x60,$out
1753 addi $out,$out,0x70
1754 b Lctr32_enc8x_done
1755
1756.align 5
1757Lctr32_enc8x_six:
1758 vcipherlast $out0,$out0,$in2
1759 vcipherlast $out1,$out1,$in3
1760 vcipherlast $out2,$out2,$in4
1761 vcipherlast $out3,$out3,$in5
1762 vcipherlast $out4,$out4,$in6
1763 vcipherlast $out5,$out5,$in7
1764
1765 le?vperm $out0,$out0,$out0,$inpperm
1766 le?vperm $out1,$out1,$out1,$inpperm
1767 stvx_u $out0,$x00,$out
1768 le?vperm $out2,$out2,$out2,$inpperm
1769 stvx_u $out1,$x10,$out
1770 le?vperm $out3,$out3,$out3,$inpperm
1771 stvx_u $out2,$x20,$out
1772 le?vperm $out4,$out4,$out4,$inpperm
1773 stvx_u $out3,$x30,$out
1774 le?vperm $out5,$out5,$out5,$inpperm
1775 stvx_u $out4,$x40,$out
1776 stvx_u $out5,$x50,$out
1777 addi $out,$out,0x60
1778 b Lctr32_enc8x_done
1779
1780.align 5
1781Lctr32_enc8x_five:
1782 vcipherlast $out0,$out0,$in3
1783 vcipherlast $out1,$out1,$in4
1784 vcipherlast $out2,$out2,$in5
1785 vcipherlast $out3,$out3,$in6
1786 vcipherlast $out4,$out4,$in7
1787
1788 le?vperm $out0,$out0,$out0,$inpperm
1789 le?vperm $out1,$out1,$out1,$inpperm
1790 stvx_u $out0,$x00,$out
1791 le?vperm $out2,$out2,$out2,$inpperm
1792 stvx_u $out1,$x10,$out
1793 le?vperm $out3,$out3,$out3,$inpperm
1794 stvx_u $out2,$x20,$out
1795 le?vperm $out4,$out4,$out4,$inpperm
1796 stvx_u $out3,$x30,$out
1797 stvx_u $out4,$x40,$out
1798 addi $out,$out,0x50
1799 b Lctr32_enc8x_done
1800
1801.align 5
1802Lctr32_enc8x_four:
1803 vcipherlast $out0,$out0,$in4
1804 vcipherlast $out1,$out1,$in5
1805 vcipherlast $out2,$out2,$in6
1806 vcipherlast $out3,$out3,$in7
1807
1808 le?vperm $out0,$out0,$out0,$inpperm
1809 le?vperm $out1,$out1,$out1,$inpperm
1810 stvx_u $out0,$x00,$out
1811 le?vperm $out2,$out2,$out2,$inpperm
1812 stvx_u $out1,$x10,$out
1813 le?vperm $out3,$out3,$out3,$inpperm
1814 stvx_u $out2,$x20,$out
1815 stvx_u $out3,$x30,$out
1816 addi $out,$out,0x40
1817 b Lctr32_enc8x_done
1818
1819.align 5
1820Lctr32_enc8x_three:
1821 vcipherlast $out0,$out0,$in5
1822 vcipherlast $out1,$out1,$in6
1823 vcipherlast $out2,$out2,$in7
1824
1825 le?vperm $out0,$out0,$out0,$inpperm
1826 le?vperm $out1,$out1,$out1,$inpperm
1827 stvx_u $out0,$x00,$out
1828 le?vperm $out2,$out2,$out2,$inpperm
1829 stvx_u $out1,$x10,$out
1830 stvx_u $out2,$x20,$out
1831 addi $out,$out,0x30
Pete Bentley0c61efe2019-08-13 09:32:23 +01001832 b Lctr32_enc8x_done
Steven Valdezbb1ceac2016-10-07 10:34:51 -04001833
1834.align 5
1835Lctr32_enc8x_two:
1836 vcipherlast $out0,$out0,$in6
1837 vcipherlast $out1,$out1,$in7
1838
1839 le?vperm $out0,$out0,$out0,$inpperm
1840 le?vperm $out1,$out1,$out1,$inpperm
1841 stvx_u $out0,$x00,$out
1842 stvx_u $out1,$x10,$out
1843 addi $out,$out,0x20
Pete Bentley0c61efe2019-08-13 09:32:23 +01001844 b Lctr32_enc8x_done
Steven Valdezbb1ceac2016-10-07 10:34:51 -04001845
1846.align 5
1847Lctr32_enc8x_one:
1848 vcipherlast $out0,$out0,$in7
1849
1850 le?vperm $out0,$out0,$out0,$inpperm
1851 stvx_u $out0,0,$out
1852 addi $out,$out,0x10
1853
1854Lctr32_enc8x_done:
1855 li r10,`$FRAME+15`
1856 li r11,`$FRAME+31`
1857 stvx $inpperm,r10,$sp # wipe copies of round keys
1858 addi r10,r10,32
1859 stvx $inpperm,r11,$sp
1860 addi r11,r11,32
1861 stvx $inpperm,r10,$sp
1862 addi r10,r10,32
1863 stvx $inpperm,r11,$sp
1864 addi r11,r11,32
1865 stvx $inpperm,r10,$sp
1866 addi r10,r10,32
1867 stvx $inpperm,r11,$sp
1868 addi r11,r11,32
1869 stvx $inpperm,r10,$sp
1870 addi r10,r10,32
1871 stvx $inpperm,r11,$sp
1872 addi r11,r11,32
1873
1874 mtspr 256,$vrsave
1875 lvx v20,r10,$sp # ABI says so
1876 addi r10,r10,32
1877 lvx v21,r11,$sp
1878 addi r11,r11,32
1879 lvx v22,r10,$sp
1880 addi r10,r10,32
1881 lvx v23,r11,$sp
1882 addi r11,r11,32
1883 lvx v24,r10,$sp
1884 addi r10,r10,32
1885 lvx v25,r11,$sp
1886 addi r11,r11,32
1887 lvx v26,r10,$sp
1888 addi r10,r10,32
1889 lvx v27,r11,$sp
1890 addi r11,r11,32
1891 lvx v28,r10,$sp
1892 addi r10,r10,32
1893 lvx v29,r11,$sp
1894 addi r11,r11,32
1895 lvx v30,r10,$sp
1896 lvx v31,r11,$sp
1897 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1898 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1899 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1900 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1901 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1902 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1903 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1904 blr
1905 .long 0
1906 .byte 0,12,0x04,0,0x80,6,6,0
1907 .long 0
1908.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1909___
1910}} }}}
1911
1912#########################################################################
1913{{{ # XTS procedures #
1914# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1915# const AES_KEY *key1, const AES_KEY *key2, #
1916# [const] unsigned char iv[16]); #
1917# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1918# input tweak value is assumed to be encrypted already, and last tweak #
1919# value, one suitable for consecutive call on same chunk of data, is #
1920# written back to original buffer. In addition, in "tweak chaining" #
1921# mode only complete input blocks are processed. #
1922
1923my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1924my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1925my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1926my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1927my $taillen = $key2;
1928
1929 ($inp,$idx) = ($idx,$inp); # reassign
1930
1931$code.=<<___;
1932.globl .${prefix}_xts_encrypt
1933.align 5
1934.${prefix}_xts_encrypt:
1935 mr $inp,r3 # reassign
1936 li r3,-1
1937 ${UCMP}i $len,16
1938 bltlr-
1939
1940 lis r0,0xfff0
1941 mfspr r12,256 # save vrsave
1942 li r11,0
1943 mtspr 256,r0
1944
1945 vspltisb $seven,0x07 # 0x070707..07
1946 le?lvsl $leperm,r11,r11
1947 le?vspltisb $tmp,0x0f
1948 le?vxor $leperm,$leperm,$seven
1949
1950 li $idx,15
1951 lvx $tweak,0,$ivp # load [unaligned] iv
1952 lvsl $inpperm,0,$ivp
1953 lvx $inptail,$idx,$ivp
1954 le?vxor $inpperm,$inpperm,$tmp
1955 vperm $tweak,$tweak,$inptail,$inpperm
1956
1957 neg r11,$inp
1958 lvsr $inpperm,0,r11 # prepare for unaligned load
1959 lvx $inout,0,$inp
1960 addi $inp,$inp,15 # 15 is not typo
1961 le?vxor $inpperm,$inpperm,$tmp
1962
1963 ${UCMP}i $key2,0 # key2==NULL?
1964 beq Lxts_enc_no_key2
1965
1966 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
1967 lwz $rounds,240($key2)
1968 srwi $rounds,$rounds,1
1969 subi $rounds,$rounds,1
1970 li $idx,16
1971
1972 lvx $rndkey0,0,$key2
1973 lvx $rndkey1,$idx,$key2
1974 addi $idx,$idx,16
1975 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1976 vxor $tweak,$tweak,$rndkey0
1977 lvx $rndkey0,$idx,$key2
1978 addi $idx,$idx,16
1979 mtctr $rounds
1980
1981Ltweak_xts_enc:
1982 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1983 vcipher $tweak,$tweak,$rndkey1
1984 lvx $rndkey1,$idx,$key2
1985 addi $idx,$idx,16
1986 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1987 vcipher $tweak,$tweak,$rndkey0
1988 lvx $rndkey0,$idx,$key2
1989 addi $idx,$idx,16
1990 bdnz Ltweak_xts_enc
1991
1992 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1993 vcipher $tweak,$tweak,$rndkey1
1994 lvx $rndkey1,$idx,$key2
1995 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1996 vcipherlast $tweak,$tweak,$rndkey0
1997
1998 li $ivp,0 # don't chain the tweak
1999 b Lxts_enc
2000
2001Lxts_enc_no_key2:
2002 li $idx,-16
2003 and $len,$len,$idx # in "tweak chaining"
2004 # mode only complete
2005 # blocks are processed
2006Lxts_enc:
2007 lvx $inptail,0,$inp
2008 addi $inp,$inp,16
2009
2010 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2011 lwz $rounds,240($key1)
2012 srwi $rounds,$rounds,1
2013 subi $rounds,$rounds,1
2014 li $idx,16
2015
2016 vslb $eighty7,$seven,$seven # 0x808080..80
2017 vor $eighty7,$eighty7,$seven # 0x878787..87
2018 vspltisb $tmp,1 # 0x010101..01
2019 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2020
2021 ${UCMP}i $len,96
2022 bge _aesp8_xts_encrypt6x
2023
2024 andi. $taillen,$len,15
2025 subic r0,$len,32
2026 subi $taillen,$taillen,16
2027 subfe r0,r0,r0
2028 and r0,r0,$taillen
2029 add $inp,$inp,r0
2030
2031 lvx $rndkey0,0,$key1
2032 lvx $rndkey1,$idx,$key1
2033 addi $idx,$idx,16
2034 vperm $inout,$inout,$inptail,$inpperm
2035 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2036 vxor $inout,$inout,$tweak
2037 vxor $inout,$inout,$rndkey0
2038 lvx $rndkey0,$idx,$key1
2039 addi $idx,$idx,16
2040 mtctr $rounds
2041 b Loop_xts_enc
2042
2043.align 5
2044Loop_xts_enc:
2045 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2046 vcipher $inout,$inout,$rndkey1
2047 lvx $rndkey1,$idx,$key1
2048 addi $idx,$idx,16
2049 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2050 vcipher $inout,$inout,$rndkey0
2051 lvx $rndkey0,$idx,$key1
2052 addi $idx,$idx,16
2053 bdnz Loop_xts_enc
2054
2055 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2056 vcipher $inout,$inout,$rndkey1
2057 lvx $rndkey1,$idx,$key1
2058 li $idx,16
2059 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2060 vxor $rndkey0,$rndkey0,$tweak
2061 vcipherlast $output,$inout,$rndkey0
2062
2063 le?vperm $tmp,$output,$output,$leperm
2064 be?nop
2065 le?stvx_u $tmp,0,$out
2066 be?stvx_u $output,0,$out
2067 addi $out,$out,16
2068
2069 subic. $len,$len,16
2070 beq Lxts_enc_done
2071
2072 vmr $inout,$inptail
2073 lvx $inptail,0,$inp
2074 addi $inp,$inp,16
2075 lvx $rndkey0,0,$key1
2076 lvx $rndkey1,$idx,$key1
2077 addi $idx,$idx,16
2078
2079 subic r0,$len,32
2080 subfe r0,r0,r0
2081 and r0,r0,$taillen
2082 add $inp,$inp,r0
2083
2084 vsrab $tmp,$tweak,$seven # next tweak value
2085 vaddubm $tweak,$tweak,$tweak
2086 vsldoi $tmp,$tmp,$tmp,15
2087 vand $tmp,$tmp,$eighty7
2088 vxor $tweak,$tweak,$tmp
2089
2090 vperm $inout,$inout,$inptail,$inpperm
2091 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2092 vxor $inout,$inout,$tweak
2093 vxor $output,$output,$rndkey0 # just in case $len<16
2094 vxor $inout,$inout,$rndkey0
2095 lvx $rndkey0,$idx,$key1
2096 addi $idx,$idx,16
2097
2098 mtctr $rounds
2099 ${UCMP}i $len,16
2100 bge Loop_xts_enc
2101
2102 vxor $output,$output,$tweak
2103 lvsr $inpperm,0,$len # $inpperm is no longer needed
2104 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2105 vspltisb $tmp,-1
2106 vperm $inptail,$inptail,$tmp,$inpperm
2107 vsel $inout,$inout,$output,$inptail
2108
2109 subi r11,$out,17
2110 subi $out,$out,16
2111 mtctr $len
2112 li $len,16
2113Loop_xts_enc_steal:
2114 lbzu r0,1(r11)
2115 stb r0,16(r11)
2116 bdnz Loop_xts_enc_steal
2117
2118 mtctr $rounds
2119 b Loop_xts_enc # one more time...
2120
2121Lxts_enc_done:
2122 ${UCMP}i $ivp,0
2123 beq Lxts_enc_ret
2124
2125 vsrab $tmp,$tweak,$seven # next tweak value
2126 vaddubm $tweak,$tweak,$tweak
2127 vsldoi $tmp,$tmp,$tmp,15
2128 vand $tmp,$tmp,$eighty7
2129 vxor $tweak,$tweak,$tmp
2130
2131 le?vperm $tweak,$tweak,$tweak,$leperm
2132 stvx_u $tweak,0,$ivp
2133
2134Lxts_enc_ret:
2135 mtspr 256,r12 # restore vrsave
2136 li r3,0
2137 blr
2138 .long 0
2139 .byte 0,12,0x04,0,0x80,6,6,0
2140 .long 0
2141.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2142
2143.globl .${prefix}_xts_decrypt
2144.align 5
2145.${prefix}_xts_decrypt:
2146 mr $inp,r3 # reassign
2147 li r3,-1
2148 ${UCMP}i $len,16
2149 bltlr-
2150
2151 lis r0,0xfff8
2152 mfspr r12,256 # save vrsave
2153 li r11,0
2154 mtspr 256,r0
2155
2156 andi. r0,$len,15
2157 neg r0,r0
2158 andi. r0,r0,16
2159 sub $len,$len,r0
2160
2161 vspltisb $seven,0x07 # 0x070707..07
2162 le?lvsl $leperm,r11,r11
2163 le?vspltisb $tmp,0x0f
2164 le?vxor $leperm,$leperm,$seven
2165
2166 li $idx,15
2167 lvx $tweak,0,$ivp # load [unaligned] iv
2168 lvsl $inpperm,0,$ivp
2169 lvx $inptail,$idx,$ivp
2170 le?vxor $inpperm,$inpperm,$tmp
2171 vperm $tweak,$tweak,$inptail,$inpperm
2172
2173 neg r11,$inp
2174 lvsr $inpperm,0,r11 # prepare for unaligned load
2175 lvx $inout,0,$inp
2176 addi $inp,$inp,15 # 15 is not typo
2177 le?vxor $inpperm,$inpperm,$tmp
2178
2179 ${UCMP}i $key2,0 # key2==NULL?
2180 beq Lxts_dec_no_key2
2181
2182 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2183 lwz $rounds,240($key2)
2184 srwi $rounds,$rounds,1
2185 subi $rounds,$rounds,1
2186 li $idx,16
2187
2188 lvx $rndkey0,0,$key2
2189 lvx $rndkey1,$idx,$key2
2190 addi $idx,$idx,16
2191 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2192 vxor $tweak,$tweak,$rndkey0
2193 lvx $rndkey0,$idx,$key2
2194 addi $idx,$idx,16
2195 mtctr $rounds
2196
2197Ltweak_xts_dec:
2198 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2199 vcipher $tweak,$tweak,$rndkey1
2200 lvx $rndkey1,$idx,$key2
2201 addi $idx,$idx,16
2202 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2203 vcipher $tweak,$tweak,$rndkey0
2204 lvx $rndkey0,$idx,$key2
2205 addi $idx,$idx,16
2206 bdnz Ltweak_xts_dec
2207
2208 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2209 vcipher $tweak,$tweak,$rndkey1
2210 lvx $rndkey1,$idx,$key2
2211 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2212 vcipherlast $tweak,$tweak,$rndkey0
2213
2214 li $ivp,0 # don't chain the tweak
2215 b Lxts_dec
2216
2217Lxts_dec_no_key2:
2218 neg $idx,$len
2219 andi. $idx,$idx,15
2220 add $len,$len,$idx # in "tweak chaining"
2221 # mode only complete
2222 # blocks are processed
2223Lxts_dec:
2224 lvx $inptail,0,$inp
2225 addi $inp,$inp,16
2226
2227 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2228 lwz $rounds,240($key1)
2229 srwi $rounds,$rounds,1
2230 subi $rounds,$rounds,1
2231 li $idx,16
2232
2233 vslb $eighty7,$seven,$seven # 0x808080..80
2234 vor $eighty7,$eighty7,$seven # 0x878787..87
2235 vspltisb $tmp,1 # 0x010101..01
2236 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2237
2238 ${UCMP}i $len,96
2239 bge _aesp8_xts_decrypt6x
2240
2241 lvx $rndkey0,0,$key1
2242 lvx $rndkey1,$idx,$key1
2243 addi $idx,$idx,16
2244 vperm $inout,$inout,$inptail,$inpperm
2245 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2246 vxor $inout,$inout,$tweak
2247 vxor $inout,$inout,$rndkey0
2248 lvx $rndkey0,$idx,$key1
2249 addi $idx,$idx,16
2250 mtctr $rounds
2251
2252 ${UCMP}i $len,16
2253 blt Ltail_xts_dec
2254 be?b Loop_xts_dec
2255
2256.align 5
2257Loop_xts_dec:
2258 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2259 vncipher $inout,$inout,$rndkey1
2260 lvx $rndkey1,$idx,$key1
2261 addi $idx,$idx,16
2262 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2263 vncipher $inout,$inout,$rndkey0
2264 lvx $rndkey0,$idx,$key1
2265 addi $idx,$idx,16
2266 bdnz Loop_xts_dec
2267
2268 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2269 vncipher $inout,$inout,$rndkey1
2270 lvx $rndkey1,$idx,$key1
2271 li $idx,16
2272 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2273 vxor $rndkey0,$rndkey0,$tweak
2274 vncipherlast $output,$inout,$rndkey0
2275
2276 le?vperm $tmp,$output,$output,$leperm
2277 be?nop
2278 le?stvx_u $tmp,0,$out
2279 be?stvx_u $output,0,$out
2280 addi $out,$out,16
2281
2282 subic. $len,$len,16
2283 beq Lxts_dec_done
2284
2285 vmr $inout,$inptail
2286 lvx $inptail,0,$inp
2287 addi $inp,$inp,16
2288 lvx $rndkey0,0,$key1
2289 lvx $rndkey1,$idx,$key1
2290 addi $idx,$idx,16
2291
2292 vsrab $tmp,$tweak,$seven # next tweak value
2293 vaddubm $tweak,$tweak,$tweak
2294 vsldoi $tmp,$tmp,$tmp,15
2295 vand $tmp,$tmp,$eighty7
2296 vxor $tweak,$tweak,$tmp
2297
2298 vperm $inout,$inout,$inptail,$inpperm
2299 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2300 vxor $inout,$inout,$tweak
2301 vxor $inout,$inout,$rndkey0
2302 lvx $rndkey0,$idx,$key1
2303 addi $idx,$idx,16
2304
2305 mtctr $rounds
2306 ${UCMP}i $len,16
2307 bge Loop_xts_dec
2308
2309Ltail_xts_dec:
2310 vsrab $tmp,$tweak,$seven # next tweak value
2311 vaddubm $tweak1,$tweak,$tweak
2312 vsldoi $tmp,$tmp,$tmp,15
2313 vand $tmp,$tmp,$eighty7
2314 vxor $tweak1,$tweak1,$tmp
2315
2316 subi $inp,$inp,16
2317 add $inp,$inp,$len
2318
2319 vxor $inout,$inout,$tweak # :-(
2320 vxor $inout,$inout,$tweak1 # :-)
2321
2322Loop_xts_dec_short:
2323 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2324 vncipher $inout,$inout,$rndkey1
2325 lvx $rndkey1,$idx,$key1
2326 addi $idx,$idx,16
2327 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2328 vncipher $inout,$inout,$rndkey0
2329 lvx $rndkey0,$idx,$key1
2330 addi $idx,$idx,16
2331 bdnz Loop_xts_dec_short
2332
2333 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2334 vncipher $inout,$inout,$rndkey1
2335 lvx $rndkey1,$idx,$key1
2336 li $idx,16
2337 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2338 vxor $rndkey0,$rndkey0,$tweak1
2339 vncipherlast $output,$inout,$rndkey0
2340
2341 le?vperm $tmp,$output,$output,$leperm
2342 be?nop
2343 le?stvx_u $tmp,0,$out
2344 be?stvx_u $output,0,$out
2345
2346 vmr $inout,$inptail
2347 lvx $inptail,0,$inp
2348 #addi $inp,$inp,16
2349 lvx $rndkey0,0,$key1
2350 lvx $rndkey1,$idx,$key1
2351 addi $idx,$idx,16
2352 vperm $inout,$inout,$inptail,$inpperm
2353 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2354
2355 lvsr $inpperm,0,$len # $inpperm is no longer needed
2356 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2357 vspltisb $tmp,-1
2358 vperm $inptail,$inptail,$tmp,$inpperm
2359 vsel $inout,$inout,$output,$inptail
2360
2361 vxor $rndkey0,$rndkey0,$tweak
2362 vxor $inout,$inout,$rndkey0
2363 lvx $rndkey0,$idx,$key1
2364 addi $idx,$idx,16
2365
2366 subi r11,$out,1
2367 mtctr $len
2368 li $len,16
2369Loop_xts_dec_steal:
2370 lbzu r0,1(r11)
2371 stb r0,16(r11)
2372 bdnz Loop_xts_dec_steal
2373
2374 mtctr $rounds
2375 b Loop_xts_dec # one more time...
2376
2377Lxts_dec_done:
2378 ${UCMP}i $ivp,0
2379 beq Lxts_dec_ret
2380
2381 vsrab $tmp,$tweak,$seven # next tweak value
2382 vaddubm $tweak,$tweak,$tweak
2383 vsldoi $tmp,$tmp,$tmp,15
2384 vand $tmp,$tmp,$eighty7
2385 vxor $tweak,$tweak,$tmp
2386
2387 le?vperm $tweak,$tweak,$tweak,$leperm
2388 stvx_u $tweak,0,$ivp
2389
2390Lxts_dec_ret:
2391 mtspr 256,r12 # restore vrsave
2392 li r3,0
2393 blr
2394 .long 0
2395 .byte 0,12,0x04,0,0x80,6,6,0
2396 .long 0
2397.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2398___
2399#########################################################################
2400{{ # Optimized XTS procedures #
2401my $key_=$key2;
2402my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2403 $x00=0 if ($flavour =~ /osx/);
2404my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2405my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2406my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2407my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2408 # v26-v31 last 6 round keys
2409my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2410my $taillen=$x70;
2411
2412$code.=<<___;
2413.align 5
2414_aesp8_xts_encrypt6x:
2415 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2416 mflr r11
2417 li r7,`$FRAME+8*16+15`
2418 li r3,`$FRAME+8*16+31`
2419 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2420 stvx v20,r7,$sp # ABI says so
2421 addi r7,r7,32
2422 stvx v21,r3,$sp
2423 addi r3,r3,32
2424 stvx v22,r7,$sp
2425 addi r7,r7,32
2426 stvx v23,r3,$sp
2427 addi r3,r3,32
2428 stvx v24,r7,$sp
2429 addi r7,r7,32
2430 stvx v25,r3,$sp
2431 addi r3,r3,32
2432 stvx v26,r7,$sp
2433 addi r7,r7,32
2434 stvx v27,r3,$sp
2435 addi r3,r3,32
2436 stvx v28,r7,$sp
2437 addi r7,r7,32
2438 stvx v29,r3,$sp
2439 addi r3,r3,32
2440 stvx v30,r7,$sp
2441 stvx v31,r3,$sp
2442 li r0,-1
2443 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2444 li $x10,0x10
2445 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2446 li $x20,0x20
2447 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2448 li $x30,0x30
2449 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2450 li $x40,0x40
2451 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2452 li $x50,0x50
2453 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2454 li $x60,0x60
2455 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2456 li $x70,0x70
2457 mtspr 256,r0
2458
2459 subi $rounds,$rounds,3 # -4 in total
2460
2461 lvx $rndkey0,$x00,$key1 # load key schedule
2462 lvx v30,$x10,$key1
2463 addi $key1,$key1,0x20
2464 lvx v31,$x00,$key1
2465 ?vperm $rndkey0,$rndkey0,v30,$keyperm
Robert Sloan8ff03552017-06-14 12:40:58 -07002466 addi $key_,$sp,`$FRAME+15`
Steven Valdezbb1ceac2016-10-07 10:34:51 -04002467 mtctr $rounds
2468
2469Load_xts_enc_key:
2470 ?vperm v24,v30,v31,$keyperm
2471 lvx v30,$x10,$key1
2472 addi $key1,$key1,0x20
2473 stvx v24,$x00,$key_ # off-load round[1]
2474 ?vperm v25,v31,v30,$keyperm
2475 lvx v31,$x00,$key1
2476 stvx v25,$x10,$key_ # off-load round[2]
2477 addi $key_,$key_,0x20
2478 bdnz Load_xts_enc_key
2479
2480 lvx v26,$x10,$key1
2481 ?vperm v24,v30,v31,$keyperm
2482 lvx v27,$x20,$key1
2483 stvx v24,$x00,$key_ # off-load round[3]
2484 ?vperm v25,v31,v26,$keyperm
2485 lvx v28,$x30,$key1
2486 stvx v25,$x10,$key_ # off-load round[4]
Robert Sloan8ff03552017-06-14 12:40:58 -07002487 addi $key_,$sp,`$FRAME+15` # rewind $key_
Steven Valdezbb1ceac2016-10-07 10:34:51 -04002488 ?vperm v26,v26,v27,$keyperm
2489 lvx v29,$x40,$key1
2490 ?vperm v27,v27,v28,$keyperm
2491 lvx v30,$x50,$key1
2492 ?vperm v28,v28,v29,$keyperm
2493 lvx v31,$x60,$key1
2494 ?vperm v29,v29,v30,$keyperm
2495 lvx $twk5,$x70,$key1 # borrow $twk5
2496 ?vperm v30,v30,v31,$keyperm
2497 lvx v24,$x00,$key_ # pre-load round[1]
2498 ?vperm v31,v31,$twk5,$keyperm
2499 lvx v25,$x10,$key_ # pre-load round[2]
2500
2501 vperm $in0,$inout,$inptail,$inpperm
2502 subi $inp,$inp,31 # undo "caller"
2503 vxor $twk0,$tweak,$rndkey0
2504 vsrab $tmp,$tweak,$seven # next tweak value
2505 vaddubm $tweak,$tweak,$tweak
2506 vsldoi $tmp,$tmp,$tmp,15
2507 vand $tmp,$tmp,$eighty7
2508 vxor $out0,$in0,$twk0
2509 vxor $tweak,$tweak,$tmp
2510
2511 lvx_u $in1,$x10,$inp
2512 vxor $twk1,$tweak,$rndkey0
2513 vsrab $tmp,$tweak,$seven # next tweak value
2514 vaddubm $tweak,$tweak,$tweak
2515 vsldoi $tmp,$tmp,$tmp,15
2516 le?vperm $in1,$in1,$in1,$leperm
2517 vand $tmp,$tmp,$eighty7
2518 vxor $out1,$in1,$twk1
2519 vxor $tweak,$tweak,$tmp
2520
2521 lvx_u $in2,$x20,$inp
2522 andi. $taillen,$len,15
2523 vxor $twk2,$tweak,$rndkey0
2524 vsrab $tmp,$tweak,$seven # next tweak value
2525 vaddubm $tweak,$tweak,$tweak
2526 vsldoi $tmp,$tmp,$tmp,15
2527 le?vperm $in2,$in2,$in2,$leperm
2528 vand $tmp,$tmp,$eighty7
2529 vxor $out2,$in2,$twk2
2530 vxor $tweak,$tweak,$tmp
2531
2532 lvx_u $in3,$x30,$inp
2533 sub $len,$len,$taillen
2534 vxor $twk3,$tweak,$rndkey0
2535 vsrab $tmp,$tweak,$seven # next tweak value
2536 vaddubm $tweak,$tweak,$tweak
2537 vsldoi $tmp,$tmp,$tmp,15
2538 le?vperm $in3,$in3,$in3,$leperm
2539 vand $tmp,$tmp,$eighty7
2540 vxor $out3,$in3,$twk3
2541 vxor $tweak,$tweak,$tmp
2542
2543 lvx_u $in4,$x40,$inp
2544 subi $len,$len,0x60
2545 vxor $twk4,$tweak,$rndkey0
2546 vsrab $tmp,$tweak,$seven # next tweak value
2547 vaddubm $tweak,$tweak,$tweak
2548 vsldoi $tmp,$tmp,$tmp,15
2549 le?vperm $in4,$in4,$in4,$leperm
2550 vand $tmp,$tmp,$eighty7
2551 vxor $out4,$in4,$twk4
2552 vxor $tweak,$tweak,$tmp
2553
2554 lvx_u $in5,$x50,$inp
2555 addi $inp,$inp,0x60
2556 vxor $twk5,$tweak,$rndkey0
2557 vsrab $tmp,$tweak,$seven # next tweak value
2558 vaddubm $tweak,$tweak,$tweak
2559 vsldoi $tmp,$tmp,$tmp,15
2560 le?vperm $in5,$in5,$in5,$leperm
2561 vand $tmp,$tmp,$eighty7
2562 vxor $out5,$in5,$twk5
2563 vxor $tweak,$tweak,$tmp
2564
2565 vxor v31,v31,$rndkey0
2566 mtctr $rounds
2567 b Loop_xts_enc6x
2568
2569.align 5
2570Loop_xts_enc6x:
2571 vcipher $out0,$out0,v24
2572 vcipher $out1,$out1,v24
2573 vcipher $out2,$out2,v24
2574 vcipher $out3,$out3,v24
2575 vcipher $out4,$out4,v24
2576 vcipher $out5,$out5,v24
2577 lvx v24,$x20,$key_ # round[3]
2578 addi $key_,$key_,0x20
2579
2580 vcipher $out0,$out0,v25
2581 vcipher $out1,$out1,v25
2582 vcipher $out2,$out2,v25
2583 vcipher $out3,$out3,v25
2584 vcipher $out4,$out4,v25
2585 vcipher $out5,$out5,v25
2586 lvx v25,$x10,$key_ # round[4]
2587 bdnz Loop_xts_enc6x
2588
2589 subic $len,$len,96 # $len-=96
2590 vxor $in0,$twk0,v31 # xor with last round key
2591 vcipher $out0,$out0,v24
2592 vcipher $out1,$out1,v24
2593 vsrab $tmp,$tweak,$seven # next tweak value
2594 vxor $twk0,$tweak,$rndkey0
2595 vaddubm $tweak,$tweak,$tweak
2596 vcipher $out2,$out2,v24
2597 vcipher $out3,$out3,v24
2598 vsldoi $tmp,$tmp,$tmp,15
2599 vcipher $out4,$out4,v24
2600 vcipher $out5,$out5,v24
2601
2602 subfe. r0,r0,r0 # borrow?-1:0
2603 vand $tmp,$tmp,$eighty7
2604 vcipher $out0,$out0,v25
2605 vcipher $out1,$out1,v25
2606 vxor $tweak,$tweak,$tmp
2607 vcipher $out2,$out2,v25
2608 vcipher $out3,$out3,v25
2609 vxor $in1,$twk1,v31
2610 vsrab $tmp,$tweak,$seven # next tweak value
2611 vxor $twk1,$tweak,$rndkey0
2612 vcipher $out4,$out4,v25
2613 vcipher $out5,$out5,v25
2614
2615 and r0,r0,$len
2616 vaddubm $tweak,$tweak,$tweak
2617 vsldoi $tmp,$tmp,$tmp,15
2618 vcipher $out0,$out0,v26
2619 vcipher $out1,$out1,v26
2620 vand $tmp,$tmp,$eighty7
2621 vcipher $out2,$out2,v26
2622 vcipher $out3,$out3,v26
2623 vxor $tweak,$tweak,$tmp
2624 vcipher $out4,$out4,v26
2625 vcipher $out5,$out5,v26
2626
2627 add $inp,$inp,r0 # $inp is adjusted in such
2628 # way that at exit from the
2629 # loop inX-in5 are loaded
2630 # with last "words"
2631 vxor $in2,$twk2,v31
2632 vsrab $tmp,$tweak,$seven # next tweak value
2633 vxor $twk2,$tweak,$rndkey0
2634 vaddubm $tweak,$tweak,$tweak
2635 vcipher $out0,$out0,v27
2636 vcipher $out1,$out1,v27
2637 vsldoi $tmp,$tmp,$tmp,15
2638 vcipher $out2,$out2,v27
2639 vcipher $out3,$out3,v27
2640 vand $tmp,$tmp,$eighty7
2641 vcipher $out4,$out4,v27
2642 vcipher $out5,$out5,v27
2643
Robert Sloan8ff03552017-06-14 12:40:58 -07002644 addi $key_,$sp,`$FRAME+15` # rewind $key_
Steven Valdezbb1ceac2016-10-07 10:34:51 -04002645 vxor $tweak,$tweak,$tmp
2646 vcipher $out0,$out0,v28
2647 vcipher $out1,$out1,v28
2648 vxor $in3,$twk3,v31
2649 vsrab $tmp,$tweak,$seven # next tweak value
2650 vxor $twk3,$tweak,$rndkey0
2651 vcipher $out2,$out2,v28
2652 vcipher $out3,$out3,v28
2653 vaddubm $tweak,$tweak,$tweak
2654 vsldoi $tmp,$tmp,$tmp,15
2655 vcipher $out4,$out4,v28
2656 vcipher $out5,$out5,v28
2657 lvx v24,$x00,$key_ # re-pre-load round[1]
2658 vand $tmp,$tmp,$eighty7
2659
2660 vcipher $out0,$out0,v29
2661 vcipher $out1,$out1,v29
2662 vxor $tweak,$tweak,$tmp
2663 vcipher $out2,$out2,v29
2664 vcipher $out3,$out3,v29
2665 vxor $in4,$twk4,v31
2666 vsrab $tmp,$tweak,$seven # next tweak value
2667 vxor $twk4,$tweak,$rndkey0
2668 vcipher $out4,$out4,v29
2669 vcipher $out5,$out5,v29
2670 lvx v25,$x10,$key_ # re-pre-load round[2]
2671 vaddubm $tweak,$tweak,$tweak
2672 vsldoi $tmp,$tmp,$tmp,15
2673
2674 vcipher $out0,$out0,v30
2675 vcipher $out1,$out1,v30
2676 vand $tmp,$tmp,$eighty7
2677 vcipher $out2,$out2,v30
2678 vcipher $out3,$out3,v30
2679 vxor $tweak,$tweak,$tmp
2680 vcipher $out4,$out4,v30
2681 vcipher $out5,$out5,v30
2682 vxor $in5,$twk5,v31
2683 vsrab $tmp,$tweak,$seven # next tweak value
2684 vxor $twk5,$tweak,$rndkey0
2685
2686 vcipherlast $out0,$out0,$in0
2687 lvx_u $in0,$x00,$inp # load next input block
2688 vaddubm $tweak,$tweak,$tweak
2689 vsldoi $tmp,$tmp,$tmp,15
2690 vcipherlast $out1,$out1,$in1
2691 lvx_u $in1,$x10,$inp
2692 vcipherlast $out2,$out2,$in2
2693 le?vperm $in0,$in0,$in0,$leperm
2694 lvx_u $in2,$x20,$inp
2695 vand $tmp,$tmp,$eighty7
2696 vcipherlast $out3,$out3,$in3
2697 le?vperm $in1,$in1,$in1,$leperm
2698 lvx_u $in3,$x30,$inp
2699 vcipherlast $out4,$out4,$in4
2700 le?vperm $in2,$in2,$in2,$leperm
2701 lvx_u $in4,$x40,$inp
2702 vxor $tweak,$tweak,$tmp
2703 vcipherlast $tmp,$out5,$in5 # last block might be needed
2704 # in stealing mode
2705 le?vperm $in3,$in3,$in3,$leperm
2706 lvx_u $in5,$x50,$inp
2707 addi $inp,$inp,0x60
2708 le?vperm $in4,$in4,$in4,$leperm
2709 le?vperm $in5,$in5,$in5,$leperm
2710
2711 le?vperm $out0,$out0,$out0,$leperm
2712 le?vperm $out1,$out1,$out1,$leperm
2713 stvx_u $out0,$x00,$out # store output
2714 vxor $out0,$in0,$twk0
2715 le?vperm $out2,$out2,$out2,$leperm
2716 stvx_u $out1,$x10,$out
2717 vxor $out1,$in1,$twk1
2718 le?vperm $out3,$out3,$out3,$leperm
2719 stvx_u $out2,$x20,$out
2720 vxor $out2,$in2,$twk2
2721 le?vperm $out4,$out4,$out4,$leperm
2722 stvx_u $out3,$x30,$out
2723 vxor $out3,$in3,$twk3
2724 le?vperm $out5,$tmp,$tmp,$leperm
2725 stvx_u $out4,$x40,$out
2726 vxor $out4,$in4,$twk4
2727 le?stvx_u $out5,$x50,$out
2728 be?stvx_u $tmp, $x50,$out
2729 vxor $out5,$in5,$twk5
2730 addi $out,$out,0x60
2731
2732 mtctr $rounds
2733 beq Loop_xts_enc6x # did $len-=96 borrow?
2734
2735 addic. $len,$len,0x60
2736 beq Lxts_enc6x_zero
2737 cmpwi $len,0x20
2738 blt Lxts_enc6x_one
2739 nop
2740 beq Lxts_enc6x_two
2741 cmpwi $len,0x40
2742 blt Lxts_enc6x_three
2743 nop
2744 beq Lxts_enc6x_four
2745
2746Lxts_enc6x_five:
2747 vxor $out0,$in1,$twk0
2748 vxor $out1,$in2,$twk1
2749 vxor $out2,$in3,$twk2
2750 vxor $out3,$in4,$twk3
2751 vxor $out4,$in5,$twk4
2752
2753 bl _aesp8_xts_enc5x
2754
2755 le?vperm $out0,$out0,$out0,$leperm
2756 vmr $twk0,$twk5 # unused tweak
2757 le?vperm $out1,$out1,$out1,$leperm
2758 stvx_u $out0,$x00,$out # store output
2759 le?vperm $out2,$out2,$out2,$leperm
2760 stvx_u $out1,$x10,$out
2761 le?vperm $out3,$out3,$out3,$leperm
2762 stvx_u $out2,$x20,$out
2763 vxor $tmp,$out4,$twk5 # last block prep for stealing
2764 le?vperm $out4,$out4,$out4,$leperm
2765 stvx_u $out3,$x30,$out
2766 stvx_u $out4,$x40,$out
2767 addi $out,$out,0x50
2768 bne Lxts_enc6x_steal
2769 b Lxts_enc6x_done
2770
2771.align 4
2772Lxts_enc6x_four:
2773 vxor $out0,$in2,$twk0
2774 vxor $out1,$in3,$twk1
2775 vxor $out2,$in4,$twk2
2776 vxor $out3,$in5,$twk3
2777 vxor $out4,$out4,$out4
2778
2779 bl _aesp8_xts_enc5x
2780
2781 le?vperm $out0,$out0,$out0,$leperm
2782 vmr $twk0,$twk4 # unused tweak
2783 le?vperm $out1,$out1,$out1,$leperm
2784 stvx_u $out0,$x00,$out # store output
2785 le?vperm $out2,$out2,$out2,$leperm
2786 stvx_u $out1,$x10,$out
2787 vxor $tmp,$out3,$twk4 # last block prep for stealing
2788 le?vperm $out3,$out3,$out3,$leperm
2789 stvx_u $out2,$x20,$out
2790 stvx_u $out3,$x30,$out
2791 addi $out,$out,0x40
2792 bne Lxts_enc6x_steal
2793 b Lxts_enc6x_done
2794
2795.align 4
2796Lxts_enc6x_three:
2797 vxor $out0,$in3,$twk0
2798 vxor $out1,$in4,$twk1
2799 vxor $out2,$in5,$twk2
2800 vxor $out3,$out3,$out3
2801 vxor $out4,$out4,$out4
2802
2803 bl _aesp8_xts_enc5x
2804
2805 le?vperm $out0,$out0,$out0,$leperm
2806 vmr $twk0,$twk3 # unused tweak
2807 le?vperm $out1,$out1,$out1,$leperm
2808 stvx_u $out0,$x00,$out # store output
2809 vxor $tmp,$out2,$twk3 # last block prep for stealing
2810 le?vperm $out2,$out2,$out2,$leperm
2811 stvx_u $out1,$x10,$out
2812 stvx_u $out2,$x20,$out
2813 addi $out,$out,0x30
2814 bne Lxts_enc6x_steal
2815 b Lxts_enc6x_done
2816
2817.align 4
2818Lxts_enc6x_two:
2819 vxor $out0,$in4,$twk0
2820 vxor $out1,$in5,$twk1
2821 vxor $out2,$out2,$out2
2822 vxor $out3,$out3,$out3
2823 vxor $out4,$out4,$out4
2824
2825 bl _aesp8_xts_enc5x
2826
2827 le?vperm $out0,$out0,$out0,$leperm
2828 vmr $twk0,$twk2 # unused tweak
2829 vxor $tmp,$out1,$twk2 # last block prep for stealing
2830 le?vperm $out1,$out1,$out1,$leperm
2831 stvx_u $out0,$x00,$out # store output
2832 stvx_u $out1,$x10,$out
2833 addi $out,$out,0x20
2834 bne Lxts_enc6x_steal
2835 b Lxts_enc6x_done
2836
2837.align 4
2838Lxts_enc6x_one:
2839 vxor $out0,$in5,$twk0
2840 nop
2841Loop_xts_enc1x:
2842 vcipher $out0,$out0,v24
2843 lvx v24,$x20,$key_ # round[3]
2844 addi $key_,$key_,0x20
2845
2846 vcipher $out0,$out0,v25
2847 lvx v25,$x10,$key_ # round[4]
2848 bdnz Loop_xts_enc1x
2849
2850 add $inp,$inp,$taillen
2851 cmpwi $taillen,0
2852 vcipher $out0,$out0,v24
2853
2854 subi $inp,$inp,16
2855 vcipher $out0,$out0,v25
2856
2857 lvsr $inpperm,0,$taillen
2858 vcipher $out0,$out0,v26
2859
2860 lvx_u $in0,0,$inp
2861 vcipher $out0,$out0,v27
2862
Robert Sloan8ff03552017-06-14 12:40:58 -07002863 addi $key_,$sp,`$FRAME+15` # rewind $key_
Steven Valdezbb1ceac2016-10-07 10:34:51 -04002864 vcipher $out0,$out0,v28
2865 lvx v24,$x00,$key_ # re-pre-load round[1]
2866
2867 vcipher $out0,$out0,v29
2868 lvx v25,$x10,$key_ # re-pre-load round[2]
2869 vxor $twk0,$twk0,v31
2870
2871 le?vperm $in0,$in0,$in0,$leperm
2872 vcipher $out0,$out0,v30
2873
2874 vperm $in0,$in0,$in0,$inpperm
2875 vcipherlast $out0,$out0,$twk0
2876
2877 vmr $twk0,$twk1 # unused tweak
2878 vxor $tmp,$out0,$twk1 # last block prep for stealing
2879 le?vperm $out0,$out0,$out0,$leperm
2880 stvx_u $out0,$x00,$out # store output
2881 addi $out,$out,0x10
2882 bne Lxts_enc6x_steal
2883 b Lxts_enc6x_done
2884
2885.align 4
2886Lxts_enc6x_zero:
2887 cmpwi $taillen,0
2888 beq Lxts_enc6x_done
2889
2890 add $inp,$inp,$taillen
2891 subi $inp,$inp,16
2892 lvx_u $in0,0,$inp
2893 lvsr $inpperm,0,$taillen # $in5 is no more
2894 le?vperm $in0,$in0,$in0,$leperm
2895 vperm $in0,$in0,$in0,$inpperm
2896 vxor $tmp,$tmp,$twk0
2897Lxts_enc6x_steal:
2898 vxor $in0,$in0,$twk0
2899 vxor $out0,$out0,$out0
2900 vspltisb $out1,-1
2901 vperm $out0,$out0,$out1,$inpperm
2902 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2903
2904 subi r30,$out,17
2905 subi $out,$out,16
2906 mtctr $taillen
2907Loop_xts_enc6x_steal:
2908 lbzu r0,1(r30)
2909 stb r0,16(r30)
2910 bdnz Loop_xts_enc6x_steal
2911
2912 li $taillen,0
2913 mtctr $rounds
2914 b Loop_xts_enc1x # one more time...
2915
2916.align 4
2917Lxts_enc6x_done:
2918 ${UCMP}i $ivp,0
2919 beq Lxts_enc6x_ret
2920
2921 vxor $tweak,$twk0,$rndkey0
2922 le?vperm $tweak,$tweak,$tweak,$leperm
2923 stvx_u $tweak,0,$ivp
2924
2925Lxts_enc6x_ret:
2926 mtlr r11
2927 li r10,`$FRAME+15`
2928 li r11,`$FRAME+31`
2929 stvx $seven,r10,$sp # wipe copies of round keys
2930 addi r10,r10,32
2931 stvx $seven,r11,$sp
2932 addi r11,r11,32
2933 stvx $seven,r10,$sp
2934 addi r10,r10,32
2935 stvx $seven,r11,$sp
2936 addi r11,r11,32
2937 stvx $seven,r10,$sp
2938 addi r10,r10,32
2939 stvx $seven,r11,$sp
2940 addi r11,r11,32
2941 stvx $seven,r10,$sp
2942 addi r10,r10,32
2943 stvx $seven,r11,$sp
2944 addi r11,r11,32
2945
2946 mtspr 256,$vrsave
2947 lvx v20,r10,$sp # ABI says so
2948 addi r10,r10,32
2949 lvx v21,r11,$sp
2950 addi r11,r11,32
2951 lvx v22,r10,$sp
2952 addi r10,r10,32
2953 lvx v23,r11,$sp
2954 addi r11,r11,32
2955 lvx v24,r10,$sp
2956 addi r10,r10,32
2957 lvx v25,r11,$sp
2958 addi r11,r11,32
2959 lvx v26,r10,$sp
2960 addi r10,r10,32
2961 lvx v27,r11,$sp
2962 addi r11,r11,32
2963 lvx v28,r10,$sp
2964 addi r10,r10,32
2965 lvx v29,r11,$sp
2966 addi r11,r11,32
2967 lvx v30,r10,$sp
2968 lvx v31,r11,$sp
2969 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2970 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2971 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2972 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2973 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2974 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2975 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2976 blr
2977 .long 0
2978 .byte 0,12,0x04,1,0x80,6,6,0
2979 .long 0
2980
2981.align 5
2982_aesp8_xts_enc5x:
2983 vcipher $out0,$out0,v24
2984 vcipher $out1,$out1,v24
2985 vcipher $out2,$out2,v24
2986 vcipher $out3,$out3,v24
2987 vcipher $out4,$out4,v24
2988 lvx v24,$x20,$key_ # round[3]
2989 addi $key_,$key_,0x20
2990
2991 vcipher $out0,$out0,v25
2992 vcipher $out1,$out1,v25
2993 vcipher $out2,$out2,v25
2994 vcipher $out3,$out3,v25
2995 vcipher $out4,$out4,v25
2996 lvx v25,$x10,$key_ # round[4]
2997 bdnz _aesp8_xts_enc5x
2998
2999 add $inp,$inp,$taillen
3000 cmpwi $taillen,0
3001 vcipher $out0,$out0,v24
3002 vcipher $out1,$out1,v24
3003 vcipher $out2,$out2,v24
3004 vcipher $out3,$out3,v24
3005 vcipher $out4,$out4,v24
3006
3007 subi $inp,$inp,16
3008 vcipher $out0,$out0,v25
3009 vcipher $out1,$out1,v25
3010 vcipher $out2,$out2,v25
3011 vcipher $out3,$out3,v25
3012 vcipher $out4,$out4,v25
3013 vxor $twk0,$twk0,v31
3014
3015 vcipher $out0,$out0,v26
Steven Valdez909b19f2016-11-21 15:35:44 -05003016 lvsr $inpperm,0,$taillen # $in5 is no more
Steven Valdezbb1ceac2016-10-07 10:34:51 -04003017 vcipher $out1,$out1,v26
3018 vcipher $out2,$out2,v26
3019 vcipher $out3,$out3,v26
3020 vcipher $out4,$out4,v26
3021 vxor $in1,$twk1,v31
3022
3023 vcipher $out0,$out0,v27
3024 lvx_u $in0,0,$inp
3025 vcipher $out1,$out1,v27
3026 vcipher $out2,$out2,v27
3027 vcipher $out3,$out3,v27
3028 vcipher $out4,$out4,v27
3029 vxor $in2,$twk2,v31
3030
Robert Sloan8ff03552017-06-14 12:40:58 -07003031 addi $key_,$sp,`$FRAME+15` # rewind $key_
Steven Valdezbb1ceac2016-10-07 10:34:51 -04003032 vcipher $out0,$out0,v28
3033 vcipher $out1,$out1,v28
3034 vcipher $out2,$out2,v28
3035 vcipher $out3,$out3,v28
3036 vcipher $out4,$out4,v28
3037 lvx v24,$x00,$key_ # re-pre-load round[1]
3038 vxor $in3,$twk3,v31
3039
3040 vcipher $out0,$out0,v29
3041 le?vperm $in0,$in0,$in0,$leperm
3042 vcipher $out1,$out1,v29
3043 vcipher $out2,$out2,v29
3044 vcipher $out3,$out3,v29
3045 vcipher $out4,$out4,v29
3046 lvx v25,$x10,$key_ # re-pre-load round[2]
3047 vxor $in4,$twk4,v31
3048
3049 vcipher $out0,$out0,v30
3050 vperm $in0,$in0,$in0,$inpperm
3051 vcipher $out1,$out1,v30
3052 vcipher $out2,$out2,v30
3053 vcipher $out3,$out3,v30
3054 vcipher $out4,$out4,v30
3055
3056 vcipherlast $out0,$out0,$twk0
3057 vcipherlast $out1,$out1,$in1
3058 vcipherlast $out2,$out2,$in2
3059 vcipherlast $out3,$out3,$in3
3060 vcipherlast $out4,$out4,$in4
3061 blr
3062 .long 0
3063 .byte 0,12,0x14,0,0,0,0,0
3064
3065.align 5
3066_aesp8_xts_decrypt6x:
3067 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3068 mflr r11
3069 li r7,`$FRAME+8*16+15`
3070 li r3,`$FRAME+8*16+31`
3071 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3072 stvx v20,r7,$sp # ABI says so
3073 addi r7,r7,32
3074 stvx v21,r3,$sp
3075 addi r3,r3,32
3076 stvx v22,r7,$sp
3077 addi r7,r7,32
3078 stvx v23,r3,$sp
3079 addi r3,r3,32
3080 stvx v24,r7,$sp
3081 addi r7,r7,32
3082 stvx v25,r3,$sp
3083 addi r3,r3,32
3084 stvx v26,r7,$sp
3085 addi r7,r7,32
3086 stvx v27,r3,$sp
3087 addi r3,r3,32
3088 stvx v28,r7,$sp
3089 addi r7,r7,32
3090 stvx v29,r3,$sp
3091 addi r3,r3,32
3092 stvx v30,r7,$sp
3093 stvx v31,r3,$sp
3094 li r0,-1
3095 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3096 li $x10,0x10
3097 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3098 li $x20,0x20
3099 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3100 li $x30,0x30
3101 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3102 li $x40,0x40
3103 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3104 li $x50,0x50
3105 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3106 li $x60,0x60
3107 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3108 li $x70,0x70
3109 mtspr 256,r0
3110
3111 subi $rounds,$rounds,3 # -4 in total
3112
3113 lvx $rndkey0,$x00,$key1 # load key schedule
3114 lvx v30,$x10,$key1
3115 addi $key1,$key1,0x20
3116 lvx v31,$x00,$key1
3117 ?vperm $rndkey0,$rndkey0,v30,$keyperm
Robert Sloan8ff03552017-06-14 12:40:58 -07003118 addi $key_,$sp,`$FRAME+15`
Steven Valdezbb1ceac2016-10-07 10:34:51 -04003119 mtctr $rounds
3120
3121Load_xts_dec_key:
3122 ?vperm v24,v30,v31,$keyperm
3123 lvx v30,$x10,$key1
3124 addi $key1,$key1,0x20
3125 stvx v24,$x00,$key_ # off-load round[1]
3126 ?vperm v25,v31,v30,$keyperm
3127 lvx v31,$x00,$key1
3128 stvx v25,$x10,$key_ # off-load round[2]
3129 addi $key_,$key_,0x20
3130 bdnz Load_xts_dec_key
3131
3132 lvx v26,$x10,$key1
3133 ?vperm v24,v30,v31,$keyperm
3134 lvx v27,$x20,$key1
3135 stvx v24,$x00,$key_ # off-load round[3]
3136 ?vperm v25,v31,v26,$keyperm
3137 lvx v28,$x30,$key1
3138 stvx v25,$x10,$key_ # off-load round[4]
Robert Sloan8ff03552017-06-14 12:40:58 -07003139 addi $key_,$sp,`$FRAME+15` # rewind $key_
Steven Valdezbb1ceac2016-10-07 10:34:51 -04003140 ?vperm v26,v26,v27,$keyperm
3141 lvx v29,$x40,$key1
3142 ?vperm v27,v27,v28,$keyperm
3143 lvx v30,$x50,$key1
3144 ?vperm v28,v28,v29,$keyperm
3145 lvx v31,$x60,$key1
3146 ?vperm v29,v29,v30,$keyperm
3147 lvx $twk5,$x70,$key1 # borrow $twk5
3148 ?vperm v30,v30,v31,$keyperm
3149 lvx v24,$x00,$key_ # pre-load round[1]
3150 ?vperm v31,v31,$twk5,$keyperm
3151 lvx v25,$x10,$key_ # pre-load round[2]
3152
3153 vperm $in0,$inout,$inptail,$inpperm
3154 subi $inp,$inp,31 # undo "caller"
3155 vxor $twk0,$tweak,$rndkey0
3156 vsrab $tmp,$tweak,$seven # next tweak value
3157 vaddubm $tweak,$tweak,$tweak
3158 vsldoi $tmp,$tmp,$tmp,15
3159 vand $tmp,$tmp,$eighty7
3160 vxor $out0,$in0,$twk0
3161 vxor $tweak,$tweak,$tmp
3162
3163 lvx_u $in1,$x10,$inp
3164 vxor $twk1,$tweak,$rndkey0
3165 vsrab $tmp,$tweak,$seven # next tweak value
3166 vaddubm $tweak,$tweak,$tweak
3167 vsldoi $tmp,$tmp,$tmp,15
3168 le?vperm $in1,$in1,$in1,$leperm
3169 vand $tmp,$tmp,$eighty7
3170 vxor $out1,$in1,$twk1
3171 vxor $tweak,$tweak,$tmp
3172
3173 lvx_u $in2,$x20,$inp
3174 andi. $taillen,$len,15
3175 vxor $twk2,$tweak,$rndkey0
3176 vsrab $tmp,$tweak,$seven # next tweak value
3177 vaddubm $tweak,$tweak,$tweak
3178 vsldoi $tmp,$tmp,$tmp,15
3179 le?vperm $in2,$in2,$in2,$leperm
3180 vand $tmp,$tmp,$eighty7
3181 vxor $out2,$in2,$twk2
3182 vxor $tweak,$tweak,$tmp
3183
3184 lvx_u $in3,$x30,$inp
3185 sub $len,$len,$taillen
3186 vxor $twk3,$tweak,$rndkey0
3187 vsrab $tmp,$tweak,$seven # next tweak value
3188 vaddubm $tweak,$tweak,$tweak
3189 vsldoi $tmp,$tmp,$tmp,15
3190 le?vperm $in3,$in3,$in3,$leperm
3191 vand $tmp,$tmp,$eighty7
3192 vxor $out3,$in3,$twk3
3193 vxor $tweak,$tweak,$tmp
3194
3195 lvx_u $in4,$x40,$inp
3196 subi $len,$len,0x60
3197 vxor $twk4,$tweak,$rndkey0
3198 vsrab $tmp,$tweak,$seven # next tweak value
3199 vaddubm $tweak,$tweak,$tweak
3200 vsldoi $tmp,$tmp,$tmp,15
3201 le?vperm $in4,$in4,$in4,$leperm
3202 vand $tmp,$tmp,$eighty7
3203 vxor $out4,$in4,$twk4
3204 vxor $tweak,$tweak,$tmp
3205
3206 lvx_u $in5,$x50,$inp
3207 addi $inp,$inp,0x60
3208 vxor $twk5,$tweak,$rndkey0
3209 vsrab $tmp,$tweak,$seven # next tweak value
3210 vaddubm $tweak,$tweak,$tweak
3211 vsldoi $tmp,$tmp,$tmp,15
3212 le?vperm $in5,$in5,$in5,$leperm
3213 vand $tmp,$tmp,$eighty7
3214 vxor $out5,$in5,$twk5
3215 vxor $tweak,$tweak,$tmp
3216
3217 vxor v31,v31,$rndkey0
3218 mtctr $rounds
3219 b Loop_xts_dec6x
3220
3221.align 5
3222Loop_xts_dec6x:
3223 vncipher $out0,$out0,v24
3224 vncipher $out1,$out1,v24
3225 vncipher $out2,$out2,v24
3226 vncipher $out3,$out3,v24
3227 vncipher $out4,$out4,v24
3228 vncipher $out5,$out5,v24
3229 lvx v24,$x20,$key_ # round[3]
3230 addi $key_,$key_,0x20
3231
3232 vncipher $out0,$out0,v25
3233 vncipher $out1,$out1,v25
3234 vncipher $out2,$out2,v25
3235 vncipher $out3,$out3,v25
3236 vncipher $out4,$out4,v25
3237 vncipher $out5,$out5,v25
3238 lvx v25,$x10,$key_ # round[4]
3239 bdnz Loop_xts_dec6x
3240
3241 subic $len,$len,96 # $len-=96
3242 vxor $in0,$twk0,v31 # xor with last round key
3243 vncipher $out0,$out0,v24
3244 vncipher $out1,$out1,v24
3245 vsrab $tmp,$tweak,$seven # next tweak value
3246 vxor $twk0,$tweak,$rndkey0
3247 vaddubm $tweak,$tweak,$tweak
3248 vncipher $out2,$out2,v24
3249 vncipher $out3,$out3,v24
3250 vsldoi $tmp,$tmp,$tmp,15
3251 vncipher $out4,$out4,v24
3252 vncipher $out5,$out5,v24
3253
3254 subfe. r0,r0,r0 # borrow?-1:0
3255 vand $tmp,$tmp,$eighty7
3256 vncipher $out0,$out0,v25
3257 vncipher $out1,$out1,v25
3258 vxor $tweak,$tweak,$tmp
3259 vncipher $out2,$out2,v25
3260 vncipher $out3,$out3,v25
3261 vxor $in1,$twk1,v31
3262 vsrab $tmp,$tweak,$seven # next tweak value
3263 vxor $twk1,$tweak,$rndkey0
3264 vncipher $out4,$out4,v25
3265 vncipher $out5,$out5,v25
3266
3267 and r0,r0,$len
3268 vaddubm $tweak,$tweak,$tweak
3269 vsldoi $tmp,$tmp,$tmp,15
3270 vncipher $out0,$out0,v26
3271 vncipher $out1,$out1,v26
3272 vand $tmp,$tmp,$eighty7
3273 vncipher $out2,$out2,v26
3274 vncipher $out3,$out3,v26
3275 vxor $tweak,$tweak,$tmp
3276 vncipher $out4,$out4,v26
3277 vncipher $out5,$out5,v26
3278
3279 add $inp,$inp,r0 # $inp is adjusted in such
3280 # way that at exit from the
3281 # loop inX-in5 are loaded
3282 # with last "words"
3283 vxor $in2,$twk2,v31
3284 vsrab $tmp,$tweak,$seven # next tweak value
3285 vxor $twk2,$tweak,$rndkey0
3286 vaddubm $tweak,$tweak,$tweak
3287 vncipher $out0,$out0,v27
3288 vncipher $out1,$out1,v27
3289 vsldoi $tmp,$tmp,$tmp,15
3290 vncipher $out2,$out2,v27
3291 vncipher $out3,$out3,v27
3292 vand $tmp,$tmp,$eighty7
3293 vncipher $out4,$out4,v27
3294 vncipher $out5,$out5,v27
3295
Robert Sloan8ff03552017-06-14 12:40:58 -07003296 addi $key_,$sp,`$FRAME+15` # rewind $key_
Steven Valdezbb1ceac2016-10-07 10:34:51 -04003297 vxor $tweak,$tweak,$tmp
3298 vncipher $out0,$out0,v28
3299 vncipher $out1,$out1,v28
3300 vxor $in3,$twk3,v31
3301 vsrab $tmp,$tweak,$seven # next tweak value
3302 vxor $twk3,$tweak,$rndkey0
3303 vncipher $out2,$out2,v28
3304 vncipher $out3,$out3,v28
3305 vaddubm $tweak,$tweak,$tweak
3306 vsldoi $tmp,$tmp,$tmp,15
3307 vncipher $out4,$out4,v28
3308 vncipher $out5,$out5,v28
3309 lvx v24,$x00,$key_ # re-pre-load round[1]
3310 vand $tmp,$tmp,$eighty7
3311
3312 vncipher $out0,$out0,v29
3313 vncipher $out1,$out1,v29
3314 vxor $tweak,$tweak,$tmp
3315 vncipher $out2,$out2,v29
3316 vncipher $out3,$out3,v29
3317 vxor $in4,$twk4,v31
3318 vsrab $tmp,$tweak,$seven # next tweak value
3319 vxor $twk4,$tweak,$rndkey0
3320 vncipher $out4,$out4,v29
3321 vncipher $out5,$out5,v29
3322 lvx v25,$x10,$key_ # re-pre-load round[2]
3323 vaddubm $tweak,$tweak,$tweak
3324 vsldoi $tmp,$tmp,$tmp,15
3325
3326 vncipher $out0,$out0,v30
3327 vncipher $out1,$out1,v30
3328 vand $tmp,$tmp,$eighty7
3329 vncipher $out2,$out2,v30
3330 vncipher $out3,$out3,v30
3331 vxor $tweak,$tweak,$tmp
3332 vncipher $out4,$out4,v30
3333 vncipher $out5,$out5,v30
3334 vxor $in5,$twk5,v31
3335 vsrab $tmp,$tweak,$seven # next tweak value
3336 vxor $twk5,$tweak,$rndkey0
3337
3338 vncipherlast $out0,$out0,$in0
3339 lvx_u $in0,$x00,$inp # load next input block
3340 vaddubm $tweak,$tweak,$tweak
3341 vsldoi $tmp,$tmp,$tmp,15
3342 vncipherlast $out1,$out1,$in1
3343 lvx_u $in1,$x10,$inp
3344 vncipherlast $out2,$out2,$in2
3345 le?vperm $in0,$in0,$in0,$leperm
3346 lvx_u $in2,$x20,$inp
3347 vand $tmp,$tmp,$eighty7
3348 vncipherlast $out3,$out3,$in3
3349 le?vperm $in1,$in1,$in1,$leperm
3350 lvx_u $in3,$x30,$inp
3351 vncipherlast $out4,$out4,$in4
3352 le?vperm $in2,$in2,$in2,$leperm
3353 lvx_u $in4,$x40,$inp
3354 vxor $tweak,$tweak,$tmp
3355 vncipherlast $out5,$out5,$in5
3356 le?vperm $in3,$in3,$in3,$leperm
3357 lvx_u $in5,$x50,$inp
3358 addi $inp,$inp,0x60
3359 le?vperm $in4,$in4,$in4,$leperm
3360 le?vperm $in5,$in5,$in5,$leperm
3361
3362 le?vperm $out0,$out0,$out0,$leperm
3363 le?vperm $out1,$out1,$out1,$leperm
3364 stvx_u $out0,$x00,$out # store output
3365 vxor $out0,$in0,$twk0
3366 le?vperm $out2,$out2,$out2,$leperm
3367 stvx_u $out1,$x10,$out
3368 vxor $out1,$in1,$twk1
3369 le?vperm $out3,$out3,$out3,$leperm
3370 stvx_u $out2,$x20,$out
3371 vxor $out2,$in2,$twk2
3372 le?vperm $out4,$out4,$out4,$leperm
3373 stvx_u $out3,$x30,$out
3374 vxor $out3,$in3,$twk3
3375 le?vperm $out5,$out5,$out5,$leperm
3376 stvx_u $out4,$x40,$out
3377 vxor $out4,$in4,$twk4
3378 stvx_u $out5,$x50,$out
3379 vxor $out5,$in5,$twk5
3380 addi $out,$out,0x60
3381
3382 mtctr $rounds
3383 beq Loop_xts_dec6x # did $len-=96 borrow?
3384
3385 addic. $len,$len,0x60
3386 beq Lxts_dec6x_zero
3387 cmpwi $len,0x20
3388 blt Lxts_dec6x_one
3389 nop
3390 beq Lxts_dec6x_two
3391 cmpwi $len,0x40
3392 blt Lxts_dec6x_three
3393 nop
3394 beq Lxts_dec6x_four
3395
3396Lxts_dec6x_five:
3397 vxor $out0,$in1,$twk0
3398 vxor $out1,$in2,$twk1
3399 vxor $out2,$in3,$twk2
3400 vxor $out3,$in4,$twk3
3401 vxor $out4,$in5,$twk4
3402
3403 bl _aesp8_xts_dec5x
3404
3405 le?vperm $out0,$out0,$out0,$leperm
3406 vmr $twk0,$twk5 # unused tweak
3407 vxor $twk1,$tweak,$rndkey0
3408 le?vperm $out1,$out1,$out1,$leperm
3409 stvx_u $out0,$x00,$out # store output
3410 vxor $out0,$in0,$twk1
3411 le?vperm $out2,$out2,$out2,$leperm
3412 stvx_u $out1,$x10,$out
3413 le?vperm $out3,$out3,$out3,$leperm
3414 stvx_u $out2,$x20,$out
3415 le?vperm $out4,$out4,$out4,$leperm
3416 stvx_u $out3,$x30,$out
3417 stvx_u $out4,$x40,$out
3418 addi $out,$out,0x50
3419 bne Lxts_dec6x_steal
3420 b Lxts_dec6x_done
3421
3422.align 4
3423Lxts_dec6x_four:
3424 vxor $out0,$in2,$twk0
3425 vxor $out1,$in3,$twk1
3426 vxor $out2,$in4,$twk2
3427 vxor $out3,$in5,$twk3
3428 vxor $out4,$out4,$out4
3429
3430 bl _aesp8_xts_dec5x
3431
3432 le?vperm $out0,$out0,$out0,$leperm
3433 vmr $twk0,$twk4 # unused tweak
3434 vmr $twk1,$twk5
3435 le?vperm $out1,$out1,$out1,$leperm
3436 stvx_u $out0,$x00,$out # store output
3437 vxor $out0,$in0,$twk5
3438 le?vperm $out2,$out2,$out2,$leperm
3439 stvx_u $out1,$x10,$out
3440 le?vperm $out3,$out3,$out3,$leperm
3441 stvx_u $out2,$x20,$out
3442 stvx_u $out3,$x30,$out
3443 addi $out,$out,0x40
3444 bne Lxts_dec6x_steal
3445 b Lxts_dec6x_done
3446
3447.align 4
3448Lxts_dec6x_three:
3449 vxor $out0,$in3,$twk0
3450 vxor $out1,$in4,$twk1
3451 vxor $out2,$in5,$twk2
3452 vxor $out3,$out3,$out3
3453 vxor $out4,$out4,$out4
3454
3455 bl _aesp8_xts_dec5x
3456
3457 le?vperm $out0,$out0,$out0,$leperm
3458 vmr $twk0,$twk3 # unused tweak
3459 vmr $twk1,$twk4
3460 le?vperm $out1,$out1,$out1,$leperm
3461 stvx_u $out0,$x00,$out # store output
3462 vxor $out0,$in0,$twk4
3463 le?vperm $out2,$out2,$out2,$leperm
3464 stvx_u $out1,$x10,$out
3465 stvx_u $out2,$x20,$out
3466 addi $out,$out,0x30
3467 bne Lxts_dec6x_steal
3468 b Lxts_dec6x_done
3469
3470.align 4
3471Lxts_dec6x_two:
3472 vxor $out0,$in4,$twk0
3473 vxor $out1,$in5,$twk1
3474 vxor $out2,$out2,$out2
3475 vxor $out3,$out3,$out3
3476 vxor $out4,$out4,$out4
3477
3478 bl _aesp8_xts_dec5x
3479
3480 le?vperm $out0,$out0,$out0,$leperm
3481 vmr $twk0,$twk2 # unused tweak
3482 vmr $twk1,$twk3
3483 le?vperm $out1,$out1,$out1,$leperm
3484 stvx_u $out0,$x00,$out # store output
3485 vxor $out0,$in0,$twk3
3486 stvx_u $out1,$x10,$out
3487 addi $out,$out,0x20
3488 bne Lxts_dec6x_steal
3489 b Lxts_dec6x_done
3490
3491.align 4
3492Lxts_dec6x_one:
3493 vxor $out0,$in5,$twk0
3494 nop
3495Loop_xts_dec1x:
3496 vncipher $out0,$out0,v24
3497 lvx v24,$x20,$key_ # round[3]
3498 addi $key_,$key_,0x20
3499
3500 vncipher $out0,$out0,v25
3501 lvx v25,$x10,$key_ # round[4]
3502 bdnz Loop_xts_dec1x
3503
3504 subi r0,$taillen,1
3505 vncipher $out0,$out0,v24
3506
3507 andi. r0,r0,16
3508 cmpwi $taillen,0
3509 vncipher $out0,$out0,v25
3510
3511 sub $inp,$inp,r0
3512 vncipher $out0,$out0,v26
3513
3514 lvx_u $in0,0,$inp
3515 vncipher $out0,$out0,v27
3516
Robert Sloan8ff03552017-06-14 12:40:58 -07003517 addi $key_,$sp,`$FRAME+15` # rewind $key_
Steven Valdezbb1ceac2016-10-07 10:34:51 -04003518 vncipher $out0,$out0,v28
3519 lvx v24,$x00,$key_ # re-pre-load round[1]
3520
3521 vncipher $out0,$out0,v29
3522 lvx v25,$x10,$key_ # re-pre-load round[2]
3523 vxor $twk0,$twk0,v31
3524
3525 le?vperm $in0,$in0,$in0,$leperm
3526 vncipher $out0,$out0,v30
3527
3528 mtctr $rounds
3529 vncipherlast $out0,$out0,$twk0
3530
3531 vmr $twk0,$twk1 # unused tweak
3532 vmr $twk1,$twk2
3533 le?vperm $out0,$out0,$out0,$leperm
3534 stvx_u $out0,$x00,$out # store output
3535 addi $out,$out,0x10
3536 vxor $out0,$in0,$twk2
3537 bne Lxts_dec6x_steal
3538 b Lxts_dec6x_done
3539
3540.align 4
3541Lxts_dec6x_zero:
3542 cmpwi $taillen,0
3543 beq Lxts_dec6x_done
3544
3545 lvx_u $in0,0,$inp
3546 le?vperm $in0,$in0,$in0,$leperm
3547 vxor $out0,$in0,$twk1
3548Lxts_dec6x_steal:
3549 vncipher $out0,$out0,v24
3550 lvx v24,$x20,$key_ # round[3]
3551 addi $key_,$key_,0x20
3552
3553 vncipher $out0,$out0,v25
3554 lvx v25,$x10,$key_ # round[4]
3555 bdnz Lxts_dec6x_steal
3556
3557 add $inp,$inp,$taillen
3558 vncipher $out0,$out0,v24
3559
3560 cmpwi $taillen,0
3561 vncipher $out0,$out0,v25
3562
3563 lvx_u $in0,0,$inp
3564 vncipher $out0,$out0,v26
3565
3566 lvsr $inpperm,0,$taillen # $in5 is no more
3567 vncipher $out0,$out0,v27
3568
Robert Sloan8ff03552017-06-14 12:40:58 -07003569 addi $key_,$sp,`$FRAME+15` # rewind $key_
Steven Valdezbb1ceac2016-10-07 10:34:51 -04003570 vncipher $out0,$out0,v28
3571 lvx v24,$x00,$key_ # re-pre-load round[1]
3572
3573 vncipher $out0,$out0,v29
3574 lvx v25,$x10,$key_ # re-pre-load round[2]
3575 vxor $twk1,$twk1,v31
3576
3577 le?vperm $in0,$in0,$in0,$leperm
3578 vncipher $out0,$out0,v30
3579
3580 vperm $in0,$in0,$in0,$inpperm
3581 vncipherlast $tmp,$out0,$twk1
3582
3583 le?vperm $out0,$tmp,$tmp,$leperm
3584 le?stvx_u $out0,0,$out
3585 be?stvx_u $tmp,0,$out
3586
3587 vxor $out0,$out0,$out0
3588 vspltisb $out1,-1
3589 vperm $out0,$out0,$out1,$inpperm
3590 vsel $out0,$in0,$tmp,$out0
3591 vxor $out0,$out0,$twk0
3592
3593 subi r30,$out,1
3594 mtctr $taillen
3595Loop_xts_dec6x_steal:
3596 lbzu r0,1(r30)
3597 stb r0,16(r30)
3598 bdnz Loop_xts_dec6x_steal
3599
3600 li $taillen,0
3601 mtctr $rounds
3602 b Loop_xts_dec1x # one more time...
3603
3604.align 4
3605Lxts_dec6x_done:
3606 ${UCMP}i $ivp,0
3607 beq Lxts_dec6x_ret
3608
3609 vxor $tweak,$twk0,$rndkey0
3610 le?vperm $tweak,$tweak,$tweak,$leperm
3611 stvx_u $tweak,0,$ivp
3612
3613Lxts_dec6x_ret:
3614 mtlr r11
3615 li r10,`$FRAME+15`
3616 li r11,`$FRAME+31`
3617 stvx $seven,r10,$sp # wipe copies of round keys
3618 addi r10,r10,32
3619 stvx $seven,r11,$sp
3620 addi r11,r11,32
3621 stvx $seven,r10,$sp
3622 addi r10,r10,32
3623 stvx $seven,r11,$sp
3624 addi r11,r11,32
3625 stvx $seven,r10,$sp
3626 addi r10,r10,32
3627 stvx $seven,r11,$sp
3628 addi r11,r11,32
3629 stvx $seven,r10,$sp
3630 addi r10,r10,32
3631 stvx $seven,r11,$sp
3632 addi r11,r11,32
3633
3634 mtspr 256,$vrsave
3635 lvx v20,r10,$sp # ABI says so
3636 addi r10,r10,32
3637 lvx v21,r11,$sp
3638 addi r11,r11,32
3639 lvx v22,r10,$sp
3640 addi r10,r10,32
3641 lvx v23,r11,$sp
3642 addi r11,r11,32
3643 lvx v24,r10,$sp
3644 addi r10,r10,32
3645 lvx v25,r11,$sp
3646 addi r11,r11,32
3647 lvx v26,r10,$sp
3648 addi r10,r10,32
3649 lvx v27,r11,$sp
3650 addi r11,r11,32
3651 lvx v28,r10,$sp
3652 addi r10,r10,32
3653 lvx v29,r11,$sp
3654 addi r11,r11,32
3655 lvx v30,r10,$sp
3656 lvx v31,r11,$sp
3657 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3658 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3659 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3660 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3661 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3662 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3663 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3664 blr
3665 .long 0
3666 .byte 0,12,0x04,1,0x80,6,6,0
3667 .long 0
3668
3669.align 5
3670_aesp8_xts_dec5x:
3671 vncipher $out0,$out0,v24
3672 vncipher $out1,$out1,v24
3673 vncipher $out2,$out2,v24
3674 vncipher $out3,$out3,v24
3675 vncipher $out4,$out4,v24
3676 lvx v24,$x20,$key_ # round[3]
3677 addi $key_,$key_,0x20
3678
3679 vncipher $out0,$out0,v25
3680 vncipher $out1,$out1,v25
3681 vncipher $out2,$out2,v25
3682 vncipher $out3,$out3,v25
3683 vncipher $out4,$out4,v25
3684 lvx v25,$x10,$key_ # round[4]
3685 bdnz _aesp8_xts_dec5x
3686
3687 subi r0,$taillen,1
3688 vncipher $out0,$out0,v24
3689 vncipher $out1,$out1,v24
3690 vncipher $out2,$out2,v24
3691 vncipher $out3,$out3,v24
3692 vncipher $out4,$out4,v24
3693
3694 andi. r0,r0,16
3695 cmpwi $taillen,0
3696 vncipher $out0,$out0,v25
3697 vncipher $out1,$out1,v25
3698 vncipher $out2,$out2,v25
3699 vncipher $out3,$out3,v25
3700 vncipher $out4,$out4,v25
3701 vxor $twk0,$twk0,v31
3702
3703 sub $inp,$inp,r0
3704 vncipher $out0,$out0,v26
3705 vncipher $out1,$out1,v26
3706 vncipher $out2,$out2,v26
3707 vncipher $out3,$out3,v26
3708 vncipher $out4,$out4,v26
3709 vxor $in1,$twk1,v31
3710
3711 vncipher $out0,$out0,v27
3712 lvx_u $in0,0,$inp
3713 vncipher $out1,$out1,v27
3714 vncipher $out2,$out2,v27
3715 vncipher $out3,$out3,v27
3716 vncipher $out4,$out4,v27
3717 vxor $in2,$twk2,v31
3718
Robert Sloan8ff03552017-06-14 12:40:58 -07003719 addi $key_,$sp,`$FRAME+15` # rewind $key_
Steven Valdezbb1ceac2016-10-07 10:34:51 -04003720 vncipher $out0,$out0,v28
3721 vncipher $out1,$out1,v28
3722 vncipher $out2,$out2,v28
3723 vncipher $out3,$out3,v28
3724 vncipher $out4,$out4,v28
3725 lvx v24,$x00,$key_ # re-pre-load round[1]
3726 vxor $in3,$twk3,v31
3727
3728 vncipher $out0,$out0,v29
3729 le?vperm $in0,$in0,$in0,$leperm
3730 vncipher $out1,$out1,v29
3731 vncipher $out2,$out2,v29
3732 vncipher $out3,$out3,v29
3733 vncipher $out4,$out4,v29
3734 lvx v25,$x10,$key_ # re-pre-load round[2]
3735 vxor $in4,$twk4,v31
3736
3737 vncipher $out0,$out0,v30
3738 vncipher $out1,$out1,v30
3739 vncipher $out2,$out2,v30
3740 vncipher $out3,$out3,v30
3741 vncipher $out4,$out4,v30
3742
3743 vncipherlast $out0,$out0,$twk0
3744 vncipherlast $out1,$out1,$in1
3745 vncipherlast $out2,$out2,$in2
3746 vncipherlast $out3,$out3,$in3
3747 vncipherlast $out4,$out4,$in4
3748 mtctr $rounds
3749 blr
3750 .long 0
3751 .byte 0,12,0x14,0,0,0,0,0
3752___
3753}} }}}
3754
3755my $consts=1;
3756foreach(split("\n",$code)) {
3757 s/\`([^\`]*)\`/eval($1)/geo;
3758
3759 # constants table endian-specific conversion
3760 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3761 my $conv=$3;
3762 my @bytes=();
3763
3764 # convert to endian-agnostic format
3765 if ($1 eq "long") {
3766 foreach (split(/,\s*/,$2)) {
3767 my $l = /^0/?oct:int;
3768 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3769 }
3770 } else {
3771 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3772 }
3773
3774 # little-endian conversion
3775 if ($flavour =~ /le$/o) {
3776 SWITCH: for($conv) {
3777 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
Steven Valdez909b19f2016-11-21 15:35:44 -05003778 /\?rev/ && do { @bytes=reverse(@bytes); last; };
Steven Valdezbb1ceac2016-10-07 10:34:51 -04003779 }
3780 }
3781
3782 #emit
3783 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3784 next;
3785 }
3786 $consts=0 if (m/Lconsts:/o); # end of table
3787
3788 # instructions prefixed with '?' are endian-specific and need
3789 # to be adjusted accordingly...
3790 if ($flavour =~ /le$/o) { # little-endian
3791 s/le\?//o or
3792 s/be\?/#be#/o or
3793 s/\?lvsr/lvsl/o or
3794 s/\?lvsl/lvsr/o or
3795 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3796 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3797 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3798 } else { # big-endian
3799 s/le\?/#le#/o or
3800 s/be\?//o or
3801 s/\?([a-z]+)/$1/o;
3802 }
3803
3804 print $_,"\n";
3805}
3806
Pete Bentley0c61efe2019-08-13 09:32:23 +01003807close STDOUT or die "error closing STDOUT";