blob: 325bdeb4a7f1d401892b029e9f5361eb3df49a3a [file] [log] [blame]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001#!/usr/bin/env perl
2#
3# ====================================================================
4# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9#
10# This module implements support for ARMv8 AES instructions. The
11# module is endian-agnostic in sense that it supports both big- and
12# little-endian cases. As does it support both 32- and 64-bit modes
13# of operation. Latter is achieved by limiting amount of utilized
14# registers to 16, which implies additional NEON load and integer
15# instructions. This has no effect on mighty Apple A7, where results
16# are literally equal to the theoretical estimates based on AES
17# instruction latencies and issue rates. On Cortex-A53, an in-order
18# execution core, this costs up to 10-15%, which is partially
19# compensated by implementing dedicated code path for 128-bit
20# CBC encrypt case. On Cortex-A57 parallelizable mode performance
21# seems to be limited by sheer amount of NEON instructions...
22#
23# Performance in cycles per byte processed with 128-bit key:
24#
25# CBC enc CBC dec CTR
26# Apple A7 2.39 1.20 1.20
Adam Langleye9ada862015-05-11 17:20:37 -070027# Cortex-A53 1.32 1.29 1.46
28# Cortex-A57(*) 1.95 0.85 0.93
29# Denver 1.96 0.86 0.80
30#
31# (*) original 3.64/1.34/1.32 results were for r0p0 revision
32# and are still same even for updated module;
Adam Langleyd9e397b2015-01-22 14:27:53 -080033
34$flavour = shift;
Adam Langleye9ada862015-05-11 17:20:37 -070035$output = shift;
36
37$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
38( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
Robert Sloan572a4e22017-04-17 10:52:19 -070039( $xlate="${dir}../../../perlasm/arm-xlate.pl" and -f $xlate) or
Adam Langleye9ada862015-05-11 17:20:37 -070040die "can't locate arm-xlate.pl";
41
42open OUT,"| \"$^X\" $xlate $flavour $output";
43*STDOUT=*OUT;
Adam Langleyd9e397b2015-01-22 14:27:53 -080044
Steven Valdezbb1ceac2016-10-07 10:34:51 -040045$prefix="aes_hw";
Adam Langleyd9e397b2015-01-22 14:27:53 -080046
47$code=<<___;
Kenny Rootb8494592015-09-25 02:29:14 +000048#include <openssl/arm_arch.h>
Adam Langleyd9e397b2015-01-22 14:27:53 -080049
50#if __ARM_MAX_ARCH__>=7
51.text
52___
Adam Langleye9ada862015-05-11 17:20:37 -070053$code.=<<___ if ($flavour =~ /64/);
David Benjaminf0c4a6c2016-08-11 13:26:41 -040054#if !defined(__clang__) || defined(BORINGSSL_CLANG_SUPPORTS_DOT_ARCH)
Adam Langleye9ada862015-05-11 17:20:37 -070055.arch armv8-a+crypto
Kenny Rootd18b6332015-04-18 14:27:55 -070056#endif
57___
Adam Langleyd9e397b2015-01-22 14:27:53 -080058$code.=".arch armv7-a\n.fpu neon\n.code 32\n" if ($flavour !~ /64/);
59 #^^^^^^ this is done to simplify adoption by not depending
60 # on latest binutils.
61
62# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax,
63# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to
64# maintain both 32- and 64-bit codes within single module and
65# transliterate common code to either flavour with regex vodoo.
66#
67{{{
68my ($inp,$bits,$out,$ptr,$rounds)=("x0","w1","x2","x3","w12");
69my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)=
70 $flavour=~/64/? map("q$_",(0..6)) : map("q$_",(0..3,8..10));
71
72
73$code.=<<___;
74.align 5
Adam Langleye9ada862015-05-11 17:20:37 -070075.Lrcon:
Adam Langleyd9e397b2015-01-22 14:27:53 -080076.long 0x01,0x01,0x01,0x01
77.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
78.long 0x1b,0x1b,0x1b,0x1b
79
80.globl ${prefix}_set_encrypt_key
81.type ${prefix}_set_encrypt_key,%function
82.align 5
83${prefix}_set_encrypt_key:
84.Lenc_key:
85___
86$code.=<<___ if ($flavour =~ /64/);
87 stp x29,x30,[sp,#-16]!
88 add x29,sp,#0
89___
90$code.=<<___;
91 mov $ptr,#-1
92 cmp $inp,#0
93 b.eq .Lenc_key_abort
94 cmp $out,#0
95 b.eq .Lenc_key_abort
96 mov $ptr,#-2
97 cmp $bits,#128
98 b.lt .Lenc_key_abort
99 cmp $bits,#256
100 b.gt .Lenc_key_abort
101 tst $bits,#0x3f
102 b.ne .Lenc_key_abort
103
Adam Langleye9ada862015-05-11 17:20:37 -0700104 adr $ptr,.Lrcon
Adam Langleyd9e397b2015-01-22 14:27:53 -0800105 cmp $bits,#192
106
107 veor $zero,$zero,$zero
108 vld1.8 {$in0},[$inp],#16
109 mov $bits,#8 // reuse $bits
110 vld1.32 {$rcon,$mask},[$ptr],#32
111
112 b.lt .Loop128
113 b.eq .L192
114 b .L256
115
116.align 4
117.Loop128:
118 vtbl.8 $key,{$in0},$mask
119 vext.8 $tmp,$zero,$in0,#12
120 vst1.32 {$in0},[$out],#16
121 aese $key,$zero
122 subs $bits,$bits,#1
123
124 veor $in0,$in0,$tmp
125 vext.8 $tmp,$zero,$tmp,#12
126 veor $in0,$in0,$tmp
127 vext.8 $tmp,$zero,$tmp,#12
128 veor $key,$key,$rcon
129 veor $in0,$in0,$tmp
130 vshl.u8 $rcon,$rcon,#1
131 veor $in0,$in0,$key
132 b.ne .Loop128
133
134 vld1.32 {$rcon},[$ptr]
135
136 vtbl.8 $key,{$in0},$mask
137 vext.8 $tmp,$zero,$in0,#12
138 vst1.32 {$in0},[$out],#16
139 aese $key,$zero
140
141 veor $in0,$in0,$tmp
142 vext.8 $tmp,$zero,$tmp,#12
143 veor $in0,$in0,$tmp
144 vext.8 $tmp,$zero,$tmp,#12
145 veor $key,$key,$rcon
146 veor $in0,$in0,$tmp
147 vshl.u8 $rcon,$rcon,#1
148 veor $in0,$in0,$key
149
150 vtbl.8 $key,{$in0},$mask
151 vext.8 $tmp,$zero,$in0,#12
152 vst1.32 {$in0},[$out],#16
153 aese $key,$zero
154
155 veor $in0,$in0,$tmp
156 vext.8 $tmp,$zero,$tmp,#12
157 veor $in0,$in0,$tmp
158 vext.8 $tmp,$zero,$tmp,#12
159 veor $key,$key,$rcon
160 veor $in0,$in0,$tmp
161 veor $in0,$in0,$key
162 vst1.32 {$in0},[$out]
163 add $out,$out,#0x50
164
165 mov $rounds,#10
166 b .Ldone
167
168.align 4
169.L192:
170 vld1.8 {$in1},[$inp],#8
171 vmov.i8 $key,#8 // borrow $key
172 vst1.32 {$in0},[$out],#16
173 vsub.i8 $mask,$mask,$key // adjust the mask
174
175.Loop192:
176 vtbl.8 $key,{$in1},$mask
177 vext.8 $tmp,$zero,$in0,#12
178 vst1.32 {$in1},[$out],#8
179 aese $key,$zero
180 subs $bits,$bits,#1
181
182 veor $in0,$in0,$tmp
183 vext.8 $tmp,$zero,$tmp,#12
184 veor $in0,$in0,$tmp
185 vext.8 $tmp,$zero,$tmp,#12
186 veor $in0,$in0,$tmp
187
188 vdup.32 $tmp,${in0}[3]
189 veor $tmp,$tmp,$in1
190 veor $key,$key,$rcon
191 vext.8 $in1,$zero,$in1,#12
192 vshl.u8 $rcon,$rcon,#1
193 veor $in1,$in1,$tmp
194 veor $in0,$in0,$key
195 veor $in1,$in1,$key
196 vst1.32 {$in0},[$out],#16
197 b.ne .Loop192
198
199 mov $rounds,#12
200 add $out,$out,#0x20
201 b .Ldone
202
203.align 4
204.L256:
205 vld1.8 {$in1},[$inp]
206 mov $bits,#7
207 mov $rounds,#14
208 vst1.32 {$in0},[$out],#16
209
210.Loop256:
211 vtbl.8 $key,{$in1},$mask
212 vext.8 $tmp,$zero,$in0,#12
213 vst1.32 {$in1},[$out],#16
214 aese $key,$zero
215 subs $bits,$bits,#1
216
217 veor $in0,$in0,$tmp
218 vext.8 $tmp,$zero,$tmp,#12
219 veor $in0,$in0,$tmp
220 vext.8 $tmp,$zero,$tmp,#12
221 veor $key,$key,$rcon
222 veor $in0,$in0,$tmp
223 vshl.u8 $rcon,$rcon,#1
224 veor $in0,$in0,$key
225 vst1.32 {$in0},[$out],#16
226 b.eq .Ldone
227
228 vdup.32 $key,${in0}[3] // just splat
229 vext.8 $tmp,$zero,$in1,#12
230 aese $key,$zero
231
232 veor $in1,$in1,$tmp
233 vext.8 $tmp,$zero,$tmp,#12
234 veor $in1,$in1,$tmp
235 vext.8 $tmp,$zero,$tmp,#12
236 veor $in1,$in1,$tmp
237
238 veor $in1,$in1,$key
239 b .Loop256
240
241.Ldone:
242 str $rounds,[$out]
243 mov $ptr,#0
244
245.Lenc_key_abort:
246 mov x0,$ptr // return value
247 `"ldr x29,[sp],#16" if ($flavour =~ /64/)`
248 ret
249.size ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key
250
251.globl ${prefix}_set_decrypt_key
252.type ${prefix}_set_decrypt_key,%function
253.align 5
254${prefix}_set_decrypt_key:
255___
256$code.=<<___ if ($flavour =~ /64/);
257 stp x29,x30,[sp,#-16]!
258 add x29,sp,#0
259___
260$code.=<<___ if ($flavour !~ /64/);
261 stmdb sp!,{r4,lr}
262___
263$code.=<<___;
264 bl .Lenc_key
265
266 cmp x0,#0
267 b.ne .Ldec_key_abort
268
269 sub $out,$out,#240 // restore original $out
270 mov x4,#-16
271 add $inp,$out,x12,lsl#4 // end of key schedule
272
273 vld1.32 {v0.16b},[$out]
274 vld1.32 {v1.16b},[$inp]
275 vst1.32 {v0.16b},[$inp],x4
276 vst1.32 {v1.16b},[$out],#16
277
278.Loop_imc:
279 vld1.32 {v0.16b},[$out]
280 vld1.32 {v1.16b},[$inp]
281 aesimc v0.16b,v0.16b
282 aesimc v1.16b,v1.16b
283 vst1.32 {v0.16b},[$inp],x4
284 vst1.32 {v1.16b},[$out],#16
285 cmp $inp,$out
286 b.hi .Loop_imc
287
288 vld1.32 {v0.16b},[$out]
289 aesimc v0.16b,v0.16b
290 vst1.32 {v0.16b},[$inp]
291
292 eor x0,x0,x0 // return value
293.Ldec_key_abort:
294___
295$code.=<<___ if ($flavour !~ /64/);
296 ldmia sp!,{r4,pc}
297___
298$code.=<<___ if ($flavour =~ /64/);
299 ldp x29,x30,[sp],#16
300 ret
301___
302$code.=<<___;
303.size ${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key
304___
305}}}
306{{{
307sub gen_block () {
308my $dir = shift;
309my ($e,$mc) = $dir eq "en" ? ("e","mc") : ("d","imc");
310my ($inp,$out,$key)=map("x$_",(0..2));
311my $rounds="w3";
312my ($rndkey0,$rndkey1,$inout)=map("q$_",(0..3));
313
314$code.=<<___;
315.globl ${prefix}_${dir}crypt
316.type ${prefix}_${dir}crypt,%function
317.align 5
318${prefix}_${dir}crypt:
319 ldr $rounds,[$key,#240]
320 vld1.32 {$rndkey0},[$key],#16
321 vld1.8 {$inout},[$inp]
322 sub $rounds,$rounds,#2
323 vld1.32 {$rndkey1},[$key],#16
324
325.Loop_${dir}c:
326 aes$e $inout,$rndkey0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800327 aes$mc $inout,$inout
Adam Langleye9ada862015-05-11 17:20:37 -0700328 vld1.32 {$rndkey0},[$key],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800329 subs $rounds,$rounds,#2
330 aes$e $inout,$rndkey1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800331 aes$mc $inout,$inout
Adam Langleye9ada862015-05-11 17:20:37 -0700332 vld1.32 {$rndkey1},[$key],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800333 b.gt .Loop_${dir}c
334
335 aes$e $inout,$rndkey0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800336 aes$mc $inout,$inout
Adam Langleye9ada862015-05-11 17:20:37 -0700337 vld1.32 {$rndkey0},[$key]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800338 aes$e $inout,$rndkey1
339 veor $inout,$inout,$rndkey0
340
341 vst1.8 {$inout},[$out]
342 ret
343.size ${prefix}_${dir}crypt,.-${prefix}_${dir}crypt
344___
345}
346&gen_block("en");
347&gen_block("de");
348}}}
349{{{
350my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4)); my $enc="w5";
351my ($rounds,$cnt,$key_,$step,$step1)=($enc,"w6","x7","x8","x12");
352my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7));
353
354my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1);
Adam Langleye9ada862015-05-11 17:20:37 -0700355my ($key4,$key5,$key6,$key7)=("x6","x12","x14",$key);
Adam Langleyd9e397b2015-01-22 14:27:53 -0800356
357### q8-q15 preloaded key schedule
358
359$code.=<<___;
360.globl ${prefix}_cbc_encrypt
361.type ${prefix}_cbc_encrypt,%function
362.align 5
363${prefix}_cbc_encrypt:
364___
365$code.=<<___ if ($flavour =~ /64/);
366 stp x29,x30,[sp,#-16]!
367 add x29,sp,#0
368___
369$code.=<<___ if ($flavour !~ /64/);
370 mov ip,sp
371 stmdb sp!,{r4-r8,lr}
372 vstmdb sp!,{d8-d15} @ ABI specification says so
373 ldmia ip,{r4-r5} @ load remaining args
374___
375$code.=<<___;
376 subs $len,$len,#16
377 mov $step,#16
378 b.lo .Lcbc_abort
379 cclr $step,eq
380
381 cmp $enc,#0 // en- or decrypting?
382 ldr $rounds,[$key,#240]
383 and $len,$len,#-16
384 vld1.8 {$ivec},[$ivp]
385 vld1.8 {$dat},[$inp],$step
386
387 vld1.32 {q8-q9},[$key] // load key schedule...
388 sub $rounds,$rounds,#6
389 add $key_,$key,x5,lsl#4 // pointer to last 7 round keys
390 sub $rounds,$rounds,#2
391 vld1.32 {q10-q11},[$key_],#32
392 vld1.32 {q12-q13},[$key_],#32
393 vld1.32 {q14-q15},[$key_],#32
394 vld1.32 {$rndlast},[$key_]
395
396 add $key_,$key,#32
397 mov $cnt,$rounds
398 b.eq .Lcbc_dec
399
400 cmp $rounds,#2
401 veor $dat,$dat,$ivec
402 veor $rndzero_n_last,q8,$rndlast
403 b.eq .Lcbc_enc128
404
Adam Langleye9ada862015-05-11 17:20:37 -0700405 vld1.32 {$in0-$in1},[$key_]
406 add $key_,$key,#16
407 add $key4,$key,#16*4
408 add $key5,$key,#16*5
409 aese $dat,q8
410 aesmc $dat,$dat
411 add $key6,$key,#16*6
412 add $key7,$key,#16*7
413 b .Lenter_cbc_enc
414
415.align 4
Adam Langleyd9e397b2015-01-22 14:27:53 -0800416.Loop_cbc_enc:
417 aese $dat,q8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800418 aesmc $dat,$dat
Adam Langleye9ada862015-05-11 17:20:37 -0700419 vst1.8 {$ivec},[$out],#16
420.Lenter_cbc_enc:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800421 aese $dat,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800422 aesmc $dat,$dat
Adam Langleye9ada862015-05-11 17:20:37 -0700423 aese $dat,$in0
424 aesmc $dat,$dat
425 vld1.32 {q8},[$key4]
426 cmp $rounds,#4
427 aese $dat,$in1
428 aesmc $dat,$dat
429 vld1.32 {q9},[$key5]
430 b.eq .Lcbc_enc192
Adam Langleyd9e397b2015-01-22 14:27:53 -0800431
432 aese $dat,q8
433 aesmc $dat,$dat
Adam Langleye9ada862015-05-11 17:20:37 -0700434 vld1.32 {q8},[$key6]
435 aese $dat,q9
436 aesmc $dat,$dat
437 vld1.32 {q9},[$key7]
438 nop
439
440.Lcbc_enc192:
441 aese $dat,q8
442 aesmc $dat,$dat
Adam Langleyd9e397b2015-01-22 14:27:53 -0800443 subs $len,$len,#16
444 aese $dat,q9
445 aesmc $dat,$dat
446 cclr $step,eq
447 aese $dat,q10
448 aesmc $dat,$dat
Adam Langleyd9e397b2015-01-22 14:27:53 -0800449 aese $dat,q11
450 aesmc $dat,$dat
451 vld1.8 {q8},[$inp],$step
452 aese $dat,q12
453 aesmc $dat,$dat
454 veor q8,q8,$rndzero_n_last
455 aese $dat,q13
456 aesmc $dat,$dat
Adam Langleye9ada862015-05-11 17:20:37 -0700457 vld1.32 {q9},[$key_] // re-pre-load rndkey[1]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800458 aese $dat,q14
459 aesmc $dat,$dat
460 aese $dat,q15
Adam Langleyd9e397b2015-01-22 14:27:53 -0800461 veor $ivec,$dat,$rndlast
Adam Langleyd9e397b2015-01-22 14:27:53 -0800462 b.hs .Loop_cbc_enc
463
Adam Langleye9ada862015-05-11 17:20:37 -0700464 vst1.8 {$ivec},[$out],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800465 b .Lcbc_done
466
467.align 5
468.Lcbc_enc128:
469 vld1.32 {$in0-$in1},[$key_]
470 aese $dat,q8
471 aesmc $dat,$dat
472 b .Lenter_cbc_enc128
473.Loop_cbc_enc128:
474 aese $dat,q8
475 aesmc $dat,$dat
476 vst1.8 {$ivec},[$out],#16
477.Lenter_cbc_enc128:
478 aese $dat,q9
479 aesmc $dat,$dat
480 subs $len,$len,#16
481 aese $dat,$in0
482 aesmc $dat,$dat
483 cclr $step,eq
484 aese $dat,$in1
485 aesmc $dat,$dat
486 aese $dat,q10
487 aesmc $dat,$dat
488 aese $dat,q11
489 aesmc $dat,$dat
490 vld1.8 {q8},[$inp],$step
491 aese $dat,q12
492 aesmc $dat,$dat
493 aese $dat,q13
494 aesmc $dat,$dat
495 aese $dat,q14
496 aesmc $dat,$dat
497 veor q8,q8,$rndzero_n_last
498 aese $dat,q15
499 veor $ivec,$dat,$rndlast
500 b.hs .Loop_cbc_enc128
501
502 vst1.8 {$ivec},[$out],#16
503 b .Lcbc_done
504___
505{
506my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
507$code.=<<___;
508.align 5
509.Lcbc_dec:
510 vld1.8 {$dat2},[$inp],#16
511 subs $len,$len,#32 // bias
512 add $cnt,$rounds,#2
513 vorr $in1,$dat,$dat
514 vorr $dat1,$dat,$dat
515 vorr $in2,$dat2,$dat2
516 b.lo .Lcbc_dec_tail
517
518 vorr $dat1,$dat2,$dat2
519 vld1.8 {$dat2},[$inp],#16
520 vorr $in0,$dat,$dat
521 vorr $in1,$dat1,$dat1
522 vorr $in2,$dat2,$dat2
523
524.Loop3x_cbc_dec:
525 aesd $dat0,q8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800526 aesimc $dat0,$dat0
Adam Langleye9ada862015-05-11 17:20:37 -0700527 aesd $dat1,q8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800528 aesimc $dat1,$dat1
Adam Langleye9ada862015-05-11 17:20:37 -0700529 aesd $dat2,q8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800530 aesimc $dat2,$dat2
Adam Langleye9ada862015-05-11 17:20:37 -0700531 vld1.32 {q8},[$key_],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800532 subs $cnt,$cnt,#2
533 aesd $dat0,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800534 aesimc $dat0,$dat0
Adam Langleye9ada862015-05-11 17:20:37 -0700535 aesd $dat1,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800536 aesimc $dat1,$dat1
Adam Langleye9ada862015-05-11 17:20:37 -0700537 aesd $dat2,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800538 aesimc $dat2,$dat2
Adam Langleye9ada862015-05-11 17:20:37 -0700539 vld1.32 {q9},[$key_],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800540 b.gt .Loop3x_cbc_dec
541
542 aesd $dat0,q8
Adam Langleye9ada862015-05-11 17:20:37 -0700543 aesimc $dat0,$dat0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800544 aesd $dat1,q8
Adam Langleye9ada862015-05-11 17:20:37 -0700545 aesimc $dat1,$dat1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800546 aesd $dat2,q8
Adam Langleye9ada862015-05-11 17:20:37 -0700547 aesimc $dat2,$dat2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800548 veor $tmp0,$ivec,$rndlast
Adam Langleyd9e397b2015-01-22 14:27:53 -0800549 subs $len,$len,#0x30
Adam Langleye9ada862015-05-11 17:20:37 -0700550 veor $tmp1,$in0,$rndlast
Adam Langleyd9e397b2015-01-22 14:27:53 -0800551 mov.lo x6,$len // x6, $cnt, is zero at this point
Adam Langleye9ada862015-05-11 17:20:37 -0700552 aesd $dat0,q9
553 aesimc $dat0,$dat0
554 aesd $dat1,q9
555 aesimc $dat1,$dat1
556 aesd $dat2,q9
557 aesimc $dat2,$dat2
558 veor $tmp2,$in1,$rndlast
Adam Langleyd9e397b2015-01-22 14:27:53 -0800559 add $inp,$inp,x6 // $inp is adjusted in such way that
560 // at exit from the loop $dat1-$dat2
561 // are loaded with last "words"
Adam Langleye9ada862015-05-11 17:20:37 -0700562 vorr $ivec,$in2,$in2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800563 mov $key_,$key
Adam Langleye9ada862015-05-11 17:20:37 -0700564 aesd $dat0,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -0800565 aesimc $dat0,$dat0
Adam Langleye9ada862015-05-11 17:20:37 -0700566 aesd $dat1,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -0800567 aesimc $dat1,$dat1
Adam Langleye9ada862015-05-11 17:20:37 -0700568 aesd $dat2,q12
569 aesimc $dat2,$dat2
570 vld1.8 {$in0},[$inp],#16
571 aesd $dat0,q13
572 aesimc $dat0,$dat0
573 aesd $dat1,q13
574 aesimc $dat1,$dat1
575 aesd $dat2,q13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800576 aesimc $dat2,$dat2
577 vld1.8 {$in1},[$inp],#16
578 aesd $dat0,q14
Adam Langleyd9e397b2015-01-22 14:27:53 -0800579 aesimc $dat0,$dat0
Adam Langleye9ada862015-05-11 17:20:37 -0700580 aesd $dat1,q14
Adam Langleyd9e397b2015-01-22 14:27:53 -0800581 aesimc $dat1,$dat1
Adam Langleye9ada862015-05-11 17:20:37 -0700582 aesd $dat2,q14
Adam Langleyd9e397b2015-01-22 14:27:53 -0800583 aesimc $dat2,$dat2
Adam Langleye9ada862015-05-11 17:20:37 -0700584 vld1.8 {$in2},[$inp],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800585 aesd $dat0,q15
586 aesd $dat1,q15
587 aesd $dat2,q15
Adam Langleye9ada862015-05-11 17:20:37 -0700588 vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800589 add $cnt,$rounds,#2
590 veor $tmp0,$tmp0,$dat0
591 veor $tmp1,$tmp1,$dat1
592 veor $dat2,$dat2,$tmp2
593 vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800594 vst1.8 {$tmp0},[$out],#16
Adam Langleye9ada862015-05-11 17:20:37 -0700595 vorr $dat0,$in0,$in0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800596 vst1.8 {$tmp1},[$out],#16
Adam Langleye9ada862015-05-11 17:20:37 -0700597 vorr $dat1,$in1,$in1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800598 vst1.8 {$dat2},[$out],#16
599 vorr $dat2,$in2,$in2
600 b.hs .Loop3x_cbc_dec
601
602 cmn $len,#0x30
603 b.eq .Lcbc_done
604 nop
605
606.Lcbc_dec_tail:
607 aesd $dat1,q8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800608 aesimc $dat1,$dat1
Adam Langleye9ada862015-05-11 17:20:37 -0700609 aesd $dat2,q8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800610 aesimc $dat2,$dat2
Adam Langleye9ada862015-05-11 17:20:37 -0700611 vld1.32 {q8},[$key_],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800612 subs $cnt,$cnt,#2
613 aesd $dat1,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800614 aesimc $dat1,$dat1
Adam Langleye9ada862015-05-11 17:20:37 -0700615 aesd $dat2,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800616 aesimc $dat2,$dat2
Adam Langleye9ada862015-05-11 17:20:37 -0700617 vld1.32 {q9},[$key_],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800618 b.gt .Lcbc_dec_tail
619
620 aesd $dat1,q8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800621 aesimc $dat1,$dat1
Adam Langleye9ada862015-05-11 17:20:37 -0700622 aesd $dat2,q8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800623 aesimc $dat2,$dat2
624 aesd $dat1,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800625 aesimc $dat1,$dat1
Adam Langleye9ada862015-05-11 17:20:37 -0700626 aesd $dat2,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800627 aesimc $dat2,$dat2
628 aesd $dat1,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -0800629 aesimc $dat1,$dat1
Adam Langleye9ada862015-05-11 17:20:37 -0700630 aesd $dat2,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -0800631 aesimc $dat2,$dat2
632 cmn $len,#0x20
633 aesd $dat1,q13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800634 aesimc $dat1,$dat1
Adam Langleye9ada862015-05-11 17:20:37 -0700635 aesd $dat2,q13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800636 aesimc $dat2,$dat2
637 veor $tmp1,$ivec,$rndlast
638 aesd $dat1,q14
Adam Langleyd9e397b2015-01-22 14:27:53 -0800639 aesimc $dat1,$dat1
Adam Langleye9ada862015-05-11 17:20:37 -0700640 aesd $dat2,q14
Adam Langleyd9e397b2015-01-22 14:27:53 -0800641 aesimc $dat2,$dat2
642 veor $tmp2,$in1,$rndlast
643 aesd $dat1,q15
644 aesd $dat2,q15
645 b.eq .Lcbc_dec_one
646 veor $tmp1,$tmp1,$dat1
647 veor $tmp2,$tmp2,$dat2
648 vorr $ivec,$in2,$in2
649 vst1.8 {$tmp1},[$out],#16
650 vst1.8 {$tmp2},[$out],#16
651 b .Lcbc_done
652
653.Lcbc_dec_one:
654 veor $tmp1,$tmp1,$dat2
655 vorr $ivec,$in2,$in2
656 vst1.8 {$tmp1},[$out],#16
657
658.Lcbc_done:
659 vst1.8 {$ivec},[$ivp]
660.Lcbc_abort:
661___
662}
663$code.=<<___ if ($flavour !~ /64/);
664 vldmia sp!,{d8-d15}
665 ldmia sp!,{r4-r8,pc}
666___
667$code.=<<___ if ($flavour =~ /64/);
668 ldr x29,[sp],#16
669 ret
670___
671$code.=<<___;
672.size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt
673___
674}}}
675{{{
676my ($inp,$out,$len,$key,$ivp)=map("x$_",(0..4));
677my ($rounds,$cnt,$key_)=("w5","w6","x7");
678my ($ctr,$tctr0,$tctr1,$tctr2)=map("w$_",(8..10,12));
679my $step="x12"; # aliases with $tctr2
680
681my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7));
682my ($dat2,$in2,$tmp2)=map("q$_",(10,11,9));
683
684my ($dat,$tmp)=($dat0,$tmp0);
685
686### q8-q15 preloaded key schedule
687
688$code.=<<___;
689.globl ${prefix}_ctr32_encrypt_blocks
690.type ${prefix}_ctr32_encrypt_blocks,%function
691.align 5
692${prefix}_ctr32_encrypt_blocks:
693___
694$code.=<<___ if ($flavour =~ /64/);
695 stp x29,x30,[sp,#-16]!
696 add x29,sp,#0
697___
698$code.=<<___ if ($flavour !~ /64/);
699 mov ip,sp
700 stmdb sp!,{r4-r10,lr}
701 vstmdb sp!,{d8-d15} @ ABI specification says so
702 ldr r4, [ip] @ load remaining arg
703___
704$code.=<<___;
705 ldr $rounds,[$key,#240]
706
707 ldr $ctr, [$ivp, #12]
708 vld1.32 {$dat0},[$ivp]
709
710 vld1.32 {q8-q9},[$key] // load key schedule...
711 sub $rounds,$rounds,#4
712 mov $step,#16
713 cmp $len,#2
714 add $key_,$key,x5,lsl#4 // pointer to last 5 round keys
715 sub $rounds,$rounds,#2
716 vld1.32 {q12-q13},[$key_],#32
717 vld1.32 {q14-q15},[$key_],#32
718 vld1.32 {$rndlast},[$key_]
719 add $key_,$key,#32
720 mov $cnt,$rounds
721 cclr $step,lo
722#ifndef __ARMEB__
723 rev $ctr, $ctr
724#endif
725 vorr $dat1,$dat0,$dat0
726 add $tctr1, $ctr, #1
727 vorr $dat2,$dat0,$dat0
728 add $ctr, $ctr, #2
729 vorr $ivec,$dat0,$dat0
730 rev $tctr1, $tctr1
731 vmov.32 ${dat1}[3],$tctr1
732 b.ls .Lctr32_tail
733 rev $tctr2, $ctr
734 sub $len,$len,#3 // bias
735 vmov.32 ${dat2}[3],$tctr2
736 b .Loop3x_ctr32
737
738.align 4
739.Loop3x_ctr32:
740 aese $dat0,q8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800741 aesmc $dat0,$dat0
Adam Langleye9ada862015-05-11 17:20:37 -0700742 aese $dat1,q8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800743 aesmc $dat1,$dat1
Adam Langleye9ada862015-05-11 17:20:37 -0700744 aese $dat2,q8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800745 aesmc $dat2,$dat2
Adam Langleye9ada862015-05-11 17:20:37 -0700746 vld1.32 {q8},[$key_],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800747 subs $cnt,$cnt,#2
748 aese $dat0,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800749 aesmc $dat0,$dat0
Adam Langleye9ada862015-05-11 17:20:37 -0700750 aese $dat1,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800751 aesmc $dat1,$dat1
Adam Langleye9ada862015-05-11 17:20:37 -0700752 aese $dat2,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800753 aesmc $dat2,$dat2
Adam Langleye9ada862015-05-11 17:20:37 -0700754 vld1.32 {q9},[$key_],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800755 b.gt .Loop3x_ctr32
756
757 aese $dat0,q8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800758 aesmc $tmp0,$dat0
Adam Langleye9ada862015-05-11 17:20:37 -0700759 aese $dat1,q8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800760 aesmc $tmp1,$dat1
Adam Langleye9ada862015-05-11 17:20:37 -0700761 vld1.8 {$in0},[$inp],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800762 vorr $dat0,$ivec,$ivec
Adam Langleye9ada862015-05-11 17:20:37 -0700763 aese $dat2,q8
764 aesmc $dat2,$dat2
Adam Langleyd9e397b2015-01-22 14:27:53 -0800765 vld1.8 {$in1},[$inp],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800766 vorr $dat1,$ivec,$ivec
Adam Langleye9ada862015-05-11 17:20:37 -0700767 aese $tmp0,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800768 aesmc $tmp0,$tmp0
Adam Langleye9ada862015-05-11 17:20:37 -0700769 aese $tmp1,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800770 aesmc $tmp1,$tmp1
Adam Langleye9ada862015-05-11 17:20:37 -0700771 vld1.8 {$in2},[$inp],#16
772 mov $key_,$key
773 aese $dat2,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800774 aesmc $tmp2,$dat2
775 vorr $dat2,$ivec,$ivec
776 add $tctr0,$ctr,#1
777 aese $tmp0,q12
Adam Langleye9ada862015-05-11 17:20:37 -0700778 aesmc $tmp0,$tmp0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800779 aese $tmp1,q12
Adam Langleye9ada862015-05-11 17:20:37 -0700780 aesmc $tmp1,$tmp1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800781 veor $in0,$in0,$rndlast
782 add $tctr1,$ctr,#2
Adam Langleye9ada862015-05-11 17:20:37 -0700783 aese $tmp2,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -0800784 aesmc $tmp2,$tmp2
785 veor $in1,$in1,$rndlast
786 add $ctr,$ctr,#3
787 aese $tmp0,q13
Adam Langleye9ada862015-05-11 17:20:37 -0700788 aesmc $tmp0,$tmp0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800789 aese $tmp1,q13
Adam Langleye9ada862015-05-11 17:20:37 -0700790 aesmc $tmp1,$tmp1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800791 veor $in2,$in2,$rndlast
792 rev $tctr0,$tctr0
Adam Langleye9ada862015-05-11 17:20:37 -0700793 aese $tmp2,q13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800794 aesmc $tmp2,$tmp2
795 vmov.32 ${dat0}[3], $tctr0
796 rev $tctr1,$tctr1
797 aese $tmp0,q14
Adam Langleye9ada862015-05-11 17:20:37 -0700798 aesmc $tmp0,$tmp0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800799 aese $tmp1,q14
Adam Langleye9ada862015-05-11 17:20:37 -0700800 aesmc $tmp1,$tmp1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800801 vmov.32 ${dat1}[3], $tctr1
802 rev $tctr2,$ctr
Adam Langleye9ada862015-05-11 17:20:37 -0700803 aese $tmp2,q14
Adam Langleyd9e397b2015-01-22 14:27:53 -0800804 aesmc $tmp2,$tmp2
805 vmov.32 ${dat2}[3], $tctr2
806 subs $len,$len,#3
807 aese $tmp0,q15
808 aese $tmp1,q15
809 aese $tmp2,q15
810
Adam Langleyd9e397b2015-01-22 14:27:53 -0800811 veor $in0,$in0,$tmp0
Adam Langleye9ada862015-05-11 17:20:37 -0700812 vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0]
813 vst1.8 {$in0},[$out],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800814 veor $in1,$in1,$tmp1
Adam Langleye9ada862015-05-11 17:20:37 -0700815 mov $cnt,$rounds
816 vst1.8 {$in1},[$out],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800817 veor $in2,$in2,$tmp2
818 vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800819 vst1.8 {$in2},[$out],#16
820 b.hs .Loop3x_ctr32
821
822 adds $len,$len,#3
823 b.eq .Lctr32_done
824 cmp $len,#1
825 mov $step,#16
826 cclr $step,eq
827
828.Lctr32_tail:
829 aese $dat0,q8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800830 aesmc $dat0,$dat0
Adam Langleye9ada862015-05-11 17:20:37 -0700831 aese $dat1,q8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800832 aesmc $dat1,$dat1
Adam Langleye9ada862015-05-11 17:20:37 -0700833 vld1.32 {q8},[$key_],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800834 subs $cnt,$cnt,#2
835 aese $dat0,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800836 aesmc $dat0,$dat0
Adam Langleye9ada862015-05-11 17:20:37 -0700837 aese $dat1,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800838 aesmc $dat1,$dat1
Adam Langleye9ada862015-05-11 17:20:37 -0700839 vld1.32 {q9},[$key_],#16
Adam Langleyd9e397b2015-01-22 14:27:53 -0800840 b.gt .Lctr32_tail
841
842 aese $dat0,q8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800843 aesmc $dat0,$dat0
Adam Langleye9ada862015-05-11 17:20:37 -0700844 aese $dat1,q8
Adam Langleyd9e397b2015-01-22 14:27:53 -0800845 aesmc $dat1,$dat1
846 aese $dat0,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800847 aesmc $dat0,$dat0
Adam Langleye9ada862015-05-11 17:20:37 -0700848 aese $dat1,q9
Adam Langleyd9e397b2015-01-22 14:27:53 -0800849 aesmc $dat1,$dat1
850 vld1.8 {$in0},[$inp],$step
851 aese $dat0,q12
Adam Langleye9ada862015-05-11 17:20:37 -0700852 aesmc $dat0,$dat0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800853 aese $dat1,q12
Adam Langleye9ada862015-05-11 17:20:37 -0700854 aesmc $dat1,$dat1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800855 vld1.8 {$in1},[$inp]
Adam Langleyd9e397b2015-01-22 14:27:53 -0800856 aese $dat0,q13
Adam Langleye9ada862015-05-11 17:20:37 -0700857 aesmc $dat0,$dat0
Adam Langleyd9e397b2015-01-22 14:27:53 -0800858 aese $dat1,q13
Adam Langleyd9e397b2015-01-22 14:27:53 -0800859 aesmc $dat1,$dat1
Adam Langleyd9e397b2015-01-22 14:27:53 -0800860 veor $in0,$in0,$rndlast
Adam Langleye9ada862015-05-11 17:20:37 -0700861 aese $dat0,q14
Adam Langleyd9e397b2015-01-22 14:27:53 -0800862 aesmc $dat0,$dat0
Adam Langleye9ada862015-05-11 17:20:37 -0700863 aese $dat1,q14
Adam Langleyd9e397b2015-01-22 14:27:53 -0800864 aesmc $dat1,$dat1
865 veor $in1,$in1,$rndlast
866 aese $dat0,q15
867 aese $dat1,q15
868
869 cmp $len,#1
870 veor $in0,$in0,$dat0
871 veor $in1,$in1,$dat1
872 vst1.8 {$in0},[$out],#16
873 b.eq .Lctr32_done
874 vst1.8 {$in1},[$out]
875
876.Lctr32_done:
877___
878$code.=<<___ if ($flavour !~ /64/);
879 vldmia sp!,{d8-d15}
880 ldmia sp!,{r4-r10,pc}
881___
882$code.=<<___ if ($flavour =~ /64/);
883 ldr x29,[sp],#16
884 ret
885___
886$code.=<<___;
887.size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks
888___
889}}}
890$code.=<<___;
891#endif
892___
893########################################
894if ($flavour =~ /64/) { ######## 64-bit code
895 my %opcode = (
896 "aesd" => 0x4e285800, "aese" => 0x4e284800,
897 "aesimc"=> 0x4e287800, "aesmc" => 0x4e286800 );
898
899 local *unaes = sub {
900 my ($mnemonic,$arg)=@_;
901
902 $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o &&
903 sprintf ".inst\t0x%08x\t//%s %s",
904 $opcode{$mnemonic}|$1|($2<<5),
905 $mnemonic,$arg;
906 };
907
908 foreach(split("\n",$code)) {
909 s/\`([^\`]*)\`/eval($1)/geo;
910
911 s/\bq([0-9]+)\b/"v".($1<8?$1:$1+8).".16b"/geo; # old->new registers
912 s/@\s/\/\//o; # old->new style commentary
913
914 #s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or
915 s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel $1$2,$1zr,$1$2,$3/o or
916 s/mov\.([a-z]+)\s+([wx][0-9]+),\s*([wx][0-9]+)/csel $2,$3,$2,$1/o or
917 s/vmov\.i8/movi/o or # fix up legacy mnemonics
918 s/vext\.8/ext/o or
919 s/vrev32\.8/rev32/o or
920 s/vtst\.8/cmtst/o or
921 s/vshr/ushr/o or
922 s/^(\s+)v/$1/o or # strip off v prefix
923 s/\bbx\s+lr\b/ret/o;
924
925 # fix up remainig legacy suffixes
926 s/\.[ui]?8//o;
927 m/\],#8/o and s/\.16b/\.8b/go;
928 s/\.[ui]?32//o and s/\.16b/\.4s/go;
929 s/\.[ui]?64//o and s/\.16b/\.2d/go;
930 s/\.[42]([sd])\[([0-3])\]/\.$1\[$2\]/o;
931
932 print $_,"\n";
933 }
934} else { ######## 32-bit code
935 my %opcode = (
936 "aesd" => 0xf3b00340, "aese" => 0xf3b00300,
937 "aesimc"=> 0xf3b003c0, "aesmc" => 0xf3b00380 );
938
939 local *unaes = sub {
940 my ($mnemonic,$arg)=@_;
941
942 if ($arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)/o) {
943 my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
944 |(($2&7)<<1) |(($2&8)<<2);
945 # since ARMv7 instructions are always encoded little-endian.
946 # correct solution is to use .inst directive, but older
947 # assemblers don't implement it:-(
948 sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
949 $word&0xff,($word>>8)&0xff,
950 ($word>>16)&0xff,($word>>24)&0xff,
951 $mnemonic,$arg;
952 }
953 };
954
955 sub unvtbl {
956 my $arg=shift;
957
958 $arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o &&
959 sprintf "vtbl.8 d%d,{q%d},d%d\n\t".
Robert Sloana94fe052017-02-21 08:49:28 -0800960 "vtbl.8 d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1;
Adam Langleyd9e397b2015-01-22 14:27:53 -0800961 }
962
963 sub unvdup32 {
964 my $arg=shift;
965
966 $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o &&
Robert Sloana94fe052017-02-21 08:49:28 -0800967 sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1;
Adam Langleyd9e397b2015-01-22 14:27:53 -0800968 }
969
970 sub unvmov32 {
971 my $arg=shift;
972
973 $arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o &&
Robert Sloana94fe052017-02-21 08:49:28 -0800974 sprintf "vmov.32 d%d[%d],%s",2*$1+($2>>1),$2&1,$3;
Adam Langleyd9e397b2015-01-22 14:27:53 -0800975 }
976
977 foreach(split("\n",$code)) {
978 s/\`([^\`]*)\`/eval($1)/geo;
979
980 s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers
981 s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers
982 s/\/\/\s?/@ /o; # new->old style commentary
983
984 # fix up remainig new-style suffixes
985 s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo or
986 s/\],#[0-9]+/]!/o;
987
988 s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo or
989 s/cclr\s+([^,]+),\s*([a-z]+)/mov$2 $1,#0/o or
990 s/vtbl\.8\s+(.*)/unvtbl($1)/geo or
991 s/vdup\.32\s+(.*)/unvdup32($1)/geo or
992 s/vmov\.32\s+(.*)/unvmov32($1)/geo or
993 s/^(\s+)b\./$1b/o or
994 s/^(\s+)mov\./$1mov/o or
995 s/^(\s+)ret/$1bx\tlr/o;
996
997 print $_,"\n";
998 }
999}
1000
1001close STDOUT;