blob: 1928075349e34a5af21a7c12accf25e98c5fcb1b [file] [log] [blame]
Robert Sloanc9abfe42018-11-26 12:19:07 -08001// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
Pete Bentley0c61efe2019-08-13 09:32:23 +01004#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
Robert Sloan726e9d12018-09-11 11:45:04 -07007#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
Robert Sloan726e9d12018-09-11 11:45:04 -070010
11#if !defined(OPENSSL_NO_ASM)
Kenny Rootb8494592015-09-25 02:29:14 +000012#if defined(__arm__)
Robert Sloan726e9d12018-09-11 11:45:04 -070013#if defined(BORINGSSL_PREFIX)
14#include <boringssl_prefix_symbols_asm.h>
15#endif
David Benjamin1b249672016-12-06 18:25:50 -050016@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
17@
18@ Licensed under the OpenSSL license (the "License"). You may not use
19@ this file except in compliance with the License. You can obtain a copy
20@ in the file LICENSE in the source distribution or at
21@ https://www.openssl.org/source/license.html
22
Adam Langleye9ada862015-05-11 17:20:37 -070023
24@ ====================================================================
25@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
26@ project. The module is, however, dual licensed under OpenSSL and
27@ CRYPTOGAMS licenses depending on where you obtain it. For further
28@ details see http://www.openssl.org/~appro/cryptogams/.
29@
30@ Permission to use under GPL terms is granted.
31@ ====================================================================
32
33@ SHA256 block procedure for ARMv4. May 2007.
34
35@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
36@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
37@ byte [on single-issue Xscale PXA250 core].
38
39@ July 2010.
40@
41@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
42@ Cortex A8 core and ~20 cycles per processed byte.
43
44@ February 2011.
45@
46@ Profiler-assisted and platform-specific optimization resulted in 16%
47@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
48
49@ September 2013.
50@
51@ Add NEON implementation. On Cortex A8 it was measured to process one
52@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
53@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
54@ code (meaning that latter performs sub-optimally, nothing was done
55@ about it).
56
57@ May 2014.
58@
59@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
60
61#ifndef __KERNEL__
Kenny Rootb8494592015-09-25 02:29:14 +000062# include <openssl/arm_arch.h>
Adam Langleye9ada862015-05-11 17:20:37 -070063#else
64# define __ARM_ARCH__ __LINUX_ARM_ARCH__
65# define __ARM_MAX_ARCH__ 7
66#endif
Adam Langleyd9e397b2015-01-22 14:27:53 -080067
Robert Sloan55818102017-12-18 11:26:17 -080068@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
69@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
70@ instructions are manually-encoded. (See unsha256.)
71.arch armv7-a
72
Adam Langleyd9e397b2015-01-22 14:27:53 -080073.text
David Benjamin1b249672016-12-06 18:25:50 -050074#if defined(__thumb2__)
Adam Langleye9ada862015-05-11 17:20:37 -070075.syntax unified
Adam Langleye9ada862015-05-11 17:20:37 -070076.thumb
David Benjamin1b249672016-12-06 18:25:50 -050077#else
Adam Langleye9ada862015-05-11 17:20:37 -070078.code 32
Adam Langleye9ada862015-05-11 17:20:37 -070079#endif
Adam Langleyd9e397b2015-01-22 14:27:53 -080080
81.type K256,%object
82.align 5
83K256:
84.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
85.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
86.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
87.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
88.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
89.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
90.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
91.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
92.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
93.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
94.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
95.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
96.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
97.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
98.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
99.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
100.size K256,.-K256
101.word 0 @ terminator
Adam Langleye9ada862015-05-11 17:20:37 -0700102#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800103.LOPENSSL_armcap:
Adam Langleye9ada862015-05-11 17:20:37 -0700104.word OPENSSL_armcap_P-.Lsha256_block_data_order
Adam Langleyd9e397b2015-01-22 14:27:53 -0800105#endif
106.align 5
107
Adam Langleye9ada862015-05-11 17:20:37 -0700108.globl sha256_block_data_order
David Benjamin4969cc92016-04-22 15:02:23 -0400109.hidden sha256_block_data_order
Adam Langleyd9e397b2015-01-22 14:27:53 -0800110.type sha256_block_data_order,%function
111sha256_block_data_order:
Adam Langleye9ada862015-05-11 17:20:37 -0700112.Lsha256_block_data_order:
David Benjamin1b249672016-12-06 18:25:50 -0500113#if __ARM_ARCH__<7 && !defined(__thumb2__)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800114 sub r3,pc,#8 @ sha256_block_data_order
Adam Langleye9ada862015-05-11 17:20:37 -0700115#else
David Benjamin1b249672016-12-06 18:25:50 -0500116 adr r3,.Lsha256_block_data_order
Adam Langleye9ada862015-05-11 17:20:37 -0700117#endif
118#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800119 ldr r12,.LOPENSSL_armcap
120 ldr r12,[r3,r12] @ OPENSSL_armcap_P
Adam Langleye9ada862015-05-11 17:20:37 -0700121#ifdef __APPLE__
122 ldr r12,[r12]
123#endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800124 tst r12,#ARMV8_SHA256
125 bne .LARMv8
126 tst r12,#ARMV7_NEON
127 bne .LNEON
128#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700129 add r2,r1,r2,lsl#6 @ len to point at the end of inp
Adam Langleyd9e397b2015-01-22 14:27:53 -0800130 stmdb sp!,{r0,r1,r2,r4-r11,lr}
131 ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
132 sub r14,r3,#256+32 @ K256
133 sub sp,sp,#16*4 @ alloca(X[16])
134.Loop:
135# if __ARM_ARCH__>=7
136 ldr r2,[r1],#4
137# else
138 ldrb r2,[r1,#3]
139# endif
140 eor r3,r5,r6 @ magic
141 eor r12,r12,r12
142#if __ARM_ARCH__>=7
143 @ ldr r2,[r1],#4 @ 0
144# if 0==15
145 str r1,[sp,#17*4] @ make room for r1
146# endif
147 eor r0,r8,r8,ror#5
148 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
149 eor r0,r0,r8,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700150# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800151 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700152# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800153#else
154 @ ldrb r2,[r1,#3] @ 0
155 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
156 ldrb r12,[r1,#2]
157 ldrb r0,[r1,#1]
158 orr r2,r2,r12,lsl#8
159 ldrb r12,[r1],#4
160 orr r2,r2,r0,lsl#16
161# if 0==15
162 str r1,[sp,#17*4] @ make room for r1
163# endif
164 eor r0,r8,r8,ror#5
165 orr r2,r2,r12,lsl#24
166 eor r0,r0,r8,ror#19 @ Sigma1(e)
167#endif
168 ldr r12,[r14],#4 @ *K256++
169 add r11,r11,r2 @ h+=X[i]
170 str r2,[sp,#0*4]
171 eor r2,r9,r10
172 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
173 and r2,r2,r8
174 add r11,r11,r12 @ h+=K256[i]
175 eor r2,r2,r10 @ Ch(e,f,g)
176 eor r0,r4,r4,ror#11
177 add r11,r11,r2 @ h+=Ch(e,f,g)
178#if 0==31
179 and r12,r12,#0xff
180 cmp r12,#0xf2 @ done?
181#endif
182#if 0<15
183# if __ARM_ARCH__>=7
184 ldr r2,[r1],#4 @ prefetch
185# else
186 ldrb r2,[r1,#3]
187# endif
188 eor r12,r4,r5 @ a^b, b^c in next round
189#else
190 ldr r2,[sp,#2*4] @ from future BODY_16_xx
191 eor r12,r4,r5 @ a^b, b^c in next round
192 ldr r1,[sp,#15*4] @ from future BODY_16_xx
193#endif
194 eor r0,r0,r4,ror#20 @ Sigma0(a)
195 and r3,r3,r12 @ (b^c)&=(a^b)
196 add r7,r7,r11 @ d+=h
197 eor r3,r3,r5 @ Maj(a,b,c)
198 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
199 @ add r11,r11,r3 @ h+=Maj(a,b,c)
200#if __ARM_ARCH__>=7
201 @ ldr r2,[r1],#4 @ 1
202# if 1==15
203 str r1,[sp,#17*4] @ make room for r1
204# endif
205 eor r0,r7,r7,ror#5
206 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
207 eor r0,r0,r7,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700208# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800209 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700210# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800211#else
212 @ ldrb r2,[r1,#3] @ 1
213 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
214 ldrb r3,[r1,#2]
215 ldrb r0,[r1,#1]
216 orr r2,r2,r3,lsl#8
217 ldrb r3,[r1],#4
218 orr r2,r2,r0,lsl#16
219# if 1==15
220 str r1,[sp,#17*4] @ make room for r1
221# endif
222 eor r0,r7,r7,ror#5
223 orr r2,r2,r3,lsl#24
224 eor r0,r0,r7,ror#19 @ Sigma1(e)
225#endif
226 ldr r3,[r14],#4 @ *K256++
227 add r10,r10,r2 @ h+=X[i]
228 str r2,[sp,#1*4]
229 eor r2,r8,r9
230 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
231 and r2,r2,r7
232 add r10,r10,r3 @ h+=K256[i]
233 eor r2,r2,r9 @ Ch(e,f,g)
234 eor r0,r11,r11,ror#11
235 add r10,r10,r2 @ h+=Ch(e,f,g)
236#if 1==31
237 and r3,r3,#0xff
238 cmp r3,#0xf2 @ done?
239#endif
240#if 1<15
241# if __ARM_ARCH__>=7
242 ldr r2,[r1],#4 @ prefetch
243# else
244 ldrb r2,[r1,#3]
245# endif
246 eor r3,r11,r4 @ a^b, b^c in next round
247#else
248 ldr r2,[sp,#3*4] @ from future BODY_16_xx
249 eor r3,r11,r4 @ a^b, b^c in next round
250 ldr r1,[sp,#0*4] @ from future BODY_16_xx
251#endif
252 eor r0,r0,r11,ror#20 @ Sigma0(a)
253 and r12,r12,r3 @ (b^c)&=(a^b)
254 add r6,r6,r10 @ d+=h
255 eor r12,r12,r4 @ Maj(a,b,c)
256 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
257 @ add r10,r10,r12 @ h+=Maj(a,b,c)
258#if __ARM_ARCH__>=7
259 @ ldr r2,[r1],#4 @ 2
260# if 2==15
261 str r1,[sp,#17*4] @ make room for r1
262# endif
263 eor r0,r6,r6,ror#5
264 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
265 eor r0,r0,r6,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700266# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800267 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700268# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800269#else
270 @ ldrb r2,[r1,#3] @ 2
271 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
272 ldrb r12,[r1,#2]
273 ldrb r0,[r1,#1]
274 orr r2,r2,r12,lsl#8
275 ldrb r12,[r1],#4
276 orr r2,r2,r0,lsl#16
277# if 2==15
278 str r1,[sp,#17*4] @ make room for r1
279# endif
280 eor r0,r6,r6,ror#5
281 orr r2,r2,r12,lsl#24
282 eor r0,r0,r6,ror#19 @ Sigma1(e)
283#endif
284 ldr r12,[r14],#4 @ *K256++
285 add r9,r9,r2 @ h+=X[i]
286 str r2,[sp,#2*4]
287 eor r2,r7,r8
288 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
289 and r2,r2,r6
290 add r9,r9,r12 @ h+=K256[i]
291 eor r2,r2,r8 @ Ch(e,f,g)
292 eor r0,r10,r10,ror#11
293 add r9,r9,r2 @ h+=Ch(e,f,g)
294#if 2==31
295 and r12,r12,#0xff
296 cmp r12,#0xf2 @ done?
297#endif
298#if 2<15
299# if __ARM_ARCH__>=7
300 ldr r2,[r1],#4 @ prefetch
301# else
302 ldrb r2,[r1,#3]
303# endif
304 eor r12,r10,r11 @ a^b, b^c in next round
305#else
306 ldr r2,[sp,#4*4] @ from future BODY_16_xx
307 eor r12,r10,r11 @ a^b, b^c in next round
308 ldr r1,[sp,#1*4] @ from future BODY_16_xx
309#endif
310 eor r0,r0,r10,ror#20 @ Sigma0(a)
311 and r3,r3,r12 @ (b^c)&=(a^b)
312 add r5,r5,r9 @ d+=h
313 eor r3,r3,r11 @ Maj(a,b,c)
314 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
315 @ add r9,r9,r3 @ h+=Maj(a,b,c)
316#if __ARM_ARCH__>=7
317 @ ldr r2,[r1],#4 @ 3
318# if 3==15
319 str r1,[sp,#17*4] @ make room for r1
320# endif
321 eor r0,r5,r5,ror#5
322 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
323 eor r0,r0,r5,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700324# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800325 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700326# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800327#else
328 @ ldrb r2,[r1,#3] @ 3
329 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
330 ldrb r3,[r1,#2]
331 ldrb r0,[r1,#1]
332 orr r2,r2,r3,lsl#8
333 ldrb r3,[r1],#4
334 orr r2,r2,r0,lsl#16
335# if 3==15
336 str r1,[sp,#17*4] @ make room for r1
337# endif
338 eor r0,r5,r5,ror#5
339 orr r2,r2,r3,lsl#24
340 eor r0,r0,r5,ror#19 @ Sigma1(e)
341#endif
342 ldr r3,[r14],#4 @ *K256++
343 add r8,r8,r2 @ h+=X[i]
344 str r2,[sp,#3*4]
345 eor r2,r6,r7
346 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
347 and r2,r2,r5
348 add r8,r8,r3 @ h+=K256[i]
349 eor r2,r2,r7 @ Ch(e,f,g)
350 eor r0,r9,r9,ror#11
351 add r8,r8,r2 @ h+=Ch(e,f,g)
352#if 3==31
353 and r3,r3,#0xff
354 cmp r3,#0xf2 @ done?
355#endif
356#if 3<15
357# if __ARM_ARCH__>=7
358 ldr r2,[r1],#4 @ prefetch
359# else
360 ldrb r2,[r1,#3]
361# endif
362 eor r3,r9,r10 @ a^b, b^c in next round
363#else
364 ldr r2,[sp,#5*4] @ from future BODY_16_xx
365 eor r3,r9,r10 @ a^b, b^c in next round
366 ldr r1,[sp,#2*4] @ from future BODY_16_xx
367#endif
368 eor r0,r0,r9,ror#20 @ Sigma0(a)
369 and r12,r12,r3 @ (b^c)&=(a^b)
370 add r4,r4,r8 @ d+=h
371 eor r12,r12,r10 @ Maj(a,b,c)
372 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
373 @ add r8,r8,r12 @ h+=Maj(a,b,c)
374#if __ARM_ARCH__>=7
375 @ ldr r2,[r1],#4 @ 4
376# if 4==15
377 str r1,[sp,#17*4] @ make room for r1
378# endif
379 eor r0,r4,r4,ror#5
380 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
381 eor r0,r0,r4,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700382# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800383 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700384# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800385#else
386 @ ldrb r2,[r1,#3] @ 4
387 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
388 ldrb r12,[r1,#2]
389 ldrb r0,[r1,#1]
390 orr r2,r2,r12,lsl#8
391 ldrb r12,[r1],#4
392 orr r2,r2,r0,lsl#16
393# if 4==15
394 str r1,[sp,#17*4] @ make room for r1
395# endif
396 eor r0,r4,r4,ror#5
397 orr r2,r2,r12,lsl#24
398 eor r0,r0,r4,ror#19 @ Sigma1(e)
399#endif
400 ldr r12,[r14],#4 @ *K256++
401 add r7,r7,r2 @ h+=X[i]
402 str r2,[sp,#4*4]
403 eor r2,r5,r6
404 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
405 and r2,r2,r4
406 add r7,r7,r12 @ h+=K256[i]
407 eor r2,r2,r6 @ Ch(e,f,g)
408 eor r0,r8,r8,ror#11
409 add r7,r7,r2 @ h+=Ch(e,f,g)
410#if 4==31
411 and r12,r12,#0xff
412 cmp r12,#0xf2 @ done?
413#endif
414#if 4<15
415# if __ARM_ARCH__>=7
416 ldr r2,[r1],#4 @ prefetch
417# else
418 ldrb r2,[r1,#3]
419# endif
420 eor r12,r8,r9 @ a^b, b^c in next round
421#else
422 ldr r2,[sp,#6*4] @ from future BODY_16_xx
423 eor r12,r8,r9 @ a^b, b^c in next round
424 ldr r1,[sp,#3*4] @ from future BODY_16_xx
425#endif
426 eor r0,r0,r8,ror#20 @ Sigma0(a)
427 and r3,r3,r12 @ (b^c)&=(a^b)
428 add r11,r11,r7 @ d+=h
429 eor r3,r3,r9 @ Maj(a,b,c)
430 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
431 @ add r7,r7,r3 @ h+=Maj(a,b,c)
432#if __ARM_ARCH__>=7
433 @ ldr r2,[r1],#4 @ 5
434# if 5==15
435 str r1,[sp,#17*4] @ make room for r1
436# endif
437 eor r0,r11,r11,ror#5
438 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
439 eor r0,r0,r11,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700440# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800441 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700442# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800443#else
444 @ ldrb r2,[r1,#3] @ 5
445 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
446 ldrb r3,[r1,#2]
447 ldrb r0,[r1,#1]
448 orr r2,r2,r3,lsl#8
449 ldrb r3,[r1],#4
450 orr r2,r2,r0,lsl#16
451# if 5==15
452 str r1,[sp,#17*4] @ make room for r1
453# endif
454 eor r0,r11,r11,ror#5
455 orr r2,r2,r3,lsl#24
456 eor r0,r0,r11,ror#19 @ Sigma1(e)
457#endif
458 ldr r3,[r14],#4 @ *K256++
459 add r6,r6,r2 @ h+=X[i]
460 str r2,[sp,#5*4]
461 eor r2,r4,r5
462 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
463 and r2,r2,r11
464 add r6,r6,r3 @ h+=K256[i]
465 eor r2,r2,r5 @ Ch(e,f,g)
466 eor r0,r7,r7,ror#11
467 add r6,r6,r2 @ h+=Ch(e,f,g)
468#if 5==31
469 and r3,r3,#0xff
470 cmp r3,#0xf2 @ done?
471#endif
472#if 5<15
473# if __ARM_ARCH__>=7
474 ldr r2,[r1],#4 @ prefetch
475# else
476 ldrb r2,[r1,#3]
477# endif
478 eor r3,r7,r8 @ a^b, b^c in next round
479#else
480 ldr r2,[sp,#7*4] @ from future BODY_16_xx
481 eor r3,r7,r8 @ a^b, b^c in next round
482 ldr r1,[sp,#4*4] @ from future BODY_16_xx
483#endif
484 eor r0,r0,r7,ror#20 @ Sigma0(a)
485 and r12,r12,r3 @ (b^c)&=(a^b)
486 add r10,r10,r6 @ d+=h
487 eor r12,r12,r8 @ Maj(a,b,c)
488 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
489 @ add r6,r6,r12 @ h+=Maj(a,b,c)
490#if __ARM_ARCH__>=7
491 @ ldr r2,[r1],#4 @ 6
492# if 6==15
493 str r1,[sp,#17*4] @ make room for r1
494# endif
495 eor r0,r10,r10,ror#5
496 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
497 eor r0,r0,r10,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700498# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800499 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700500# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800501#else
502 @ ldrb r2,[r1,#3] @ 6
503 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
504 ldrb r12,[r1,#2]
505 ldrb r0,[r1,#1]
506 orr r2,r2,r12,lsl#8
507 ldrb r12,[r1],#4
508 orr r2,r2,r0,lsl#16
509# if 6==15
510 str r1,[sp,#17*4] @ make room for r1
511# endif
512 eor r0,r10,r10,ror#5
513 orr r2,r2,r12,lsl#24
514 eor r0,r0,r10,ror#19 @ Sigma1(e)
515#endif
516 ldr r12,[r14],#4 @ *K256++
517 add r5,r5,r2 @ h+=X[i]
518 str r2,[sp,#6*4]
519 eor r2,r11,r4
520 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
521 and r2,r2,r10
522 add r5,r5,r12 @ h+=K256[i]
523 eor r2,r2,r4 @ Ch(e,f,g)
524 eor r0,r6,r6,ror#11
525 add r5,r5,r2 @ h+=Ch(e,f,g)
526#if 6==31
527 and r12,r12,#0xff
528 cmp r12,#0xf2 @ done?
529#endif
530#if 6<15
531# if __ARM_ARCH__>=7
532 ldr r2,[r1],#4 @ prefetch
533# else
534 ldrb r2,[r1,#3]
535# endif
536 eor r12,r6,r7 @ a^b, b^c in next round
537#else
538 ldr r2,[sp,#8*4] @ from future BODY_16_xx
539 eor r12,r6,r7 @ a^b, b^c in next round
540 ldr r1,[sp,#5*4] @ from future BODY_16_xx
541#endif
542 eor r0,r0,r6,ror#20 @ Sigma0(a)
543 and r3,r3,r12 @ (b^c)&=(a^b)
544 add r9,r9,r5 @ d+=h
545 eor r3,r3,r7 @ Maj(a,b,c)
546 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
547 @ add r5,r5,r3 @ h+=Maj(a,b,c)
548#if __ARM_ARCH__>=7
549 @ ldr r2,[r1],#4 @ 7
550# if 7==15
551 str r1,[sp,#17*4] @ make room for r1
552# endif
553 eor r0,r9,r9,ror#5
554 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
555 eor r0,r0,r9,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700556# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800557 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700558# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800559#else
560 @ ldrb r2,[r1,#3] @ 7
561 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
562 ldrb r3,[r1,#2]
563 ldrb r0,[r1,#1]
564 orr r2,r2,r3,lsl#8
565 ldrb r3,[r1],#4
566 orr r2,r2,r0,lsl#16
567# if 7==15
568 str r1,[sp,#17*4] @ make room for r1
569# endif
570 eor r0,r9,r9,ror#5
571 orr r2,r2,r3,lsl#24
572 eor r0,r0,r9,ror#19 @ Sigma1(e)
573#endif
574 ldr r3,[r14],#4 @ *K256++
575 add r4,r4,r2 @ h+=X[i]
576 str r2,[sp,#7*4]
577 eor r2,r10,r11
578 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
579 and r2,r2,r9
580 add r4,r4,r3 @ h+=K256[i]
581 eor r2,r2,r11 @ Ch(e,f,g)
582 eor r0,r5,r5,ror#11
583 add r4,r4,r2 @ h+=Ch(e,f,g)
584#if 7==31
585 and r3,r3,#0xff
586 cmp r3,#0xf2 @ done?
587#endif
588#if 7<15
589# if __ARM_ARCH__>=7
590 ldr r2,[r1],#4 @ prefetch
591# else
592 ldrb r2,[r1,#3]
593# endif
594 eor r3,r5,r6 @ a^b, b^c in next round
595#else
596 ldr r2,[sp,#9*4] @ from future BODY_16_xx
597 eor r3,r5,r6 @ a^b, b^c in next round
598 ldr r1,[sp,#6*4] @ from future BODY_16_xx
599#endif
600 eor r0,r0,r5,ror#20 @ Sigma0(a)
601 and r12,r12,r3 @ (b^c)&=(a^b)
602 add r8,r8,r4 @ d+=h
603 eor r12,r12,r6 @ Maj(a,b,c)
604 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
605 @ add r4,r4,r12 @ h+=Maj(a,b,c)
606#if __ARM_ARCH__>=7
607 @ ldr r2,[r1],#4 @ 8
608# if 8==15
609 str r1,[sp,#17*4] @ make room for r1
610# endif
611 eor r0,r8,r8,ror#5
612 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
613 eor r0,r0,r8,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700614# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800615 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700616# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800617#else
618 @ ldrb r2,[r1,#3] @ 8
619 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
620 ldrb r12,[r1,#2]
621 ldrb r0,[r1,#1]
622 orr r2,r2,r12,lsl#8
623 ldrb r12,[r1],#4
624 orr r2,r2,r0,lsl#16
625# if 8==15
626 str r1,[sp,#17*4] @ make room for r1
627# endif
628 eor r0,r8,r8,ror#5
629 orr r2,r2,r12,lsl#24
630 eor r0,r0,r8,ror#19 @ Sigma1(e)
631#endif
632 ldr r12,[r14],#4 @ *K256++
633 add r11,r11,r2 @ h+=X[i]
634 str r2,[sp,#8*4]
635 eor r2,r9,r10
636 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
637 and r2,r2,r8
638 add r11,r11,r12 @ h+=K256[i]
639 eor r2,r2,r10 @ Ch(e,f,g)
640 eor r0,r4,r4,ror#11
641 add r11,r11,r2 @ h+=Ch(e,f,g)
642#if 8==31
643 and r12,r12,#0xff
644 cmp r12,#0xf2 @ done?
645#endif
646#if 8<15
647# if __ARM_ARCH__>=7
648 ldr r2,[r1],#4 @ prefetch
649# else
650 ldrb r2,[r1,#3]
651# endif
652 eor r12,r4,r5 @ a^b, b^c in next round
653#else
654 ldr r2,[sp,#10*4] @ from future BODY_16_xx
655 eor r12,r4,r5 @ a^b, b^c in next round
656 ldr r1,[sp,#7*4] @ from future BODY_16_xx
657#endif
658 eor r0,r0,r4,ror#20 @ Sigma0(a)
659 and r3,r3,r12 @ (b^c)&=(a^b)
660 add r7,r7,r11 @ d+=h
661 eor r3,r3,r5 @ Maj(a,b,c)
662 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
663 @ add r11,r11,r3 @ h+=Maj(a,b,c)
664#if __ARM_ARCH__>=7
665 @ ldr r2,[r1],#4 @ 9
666# if 9==15
667 str r1,[sp,#17*4] @ make room for r1
668# endif
669 eor r0,r7,r7,ror#5
670 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
671 eor r0,r0,r7,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700672# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800673 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700674# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800675#else
676 @ ldrb r2,[r1,#3] @ 9
677 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
678 ldrb r3,[r1,#2]
679 ldrb r0,[r1,#1]
680 orr r2,r2,r3,lsl#8
681 ldrb r3,[r1],#4
682 orr r2,r2,r0,lsl#16
683# if 9==15
684 str r1,[sp,#17*4] @ make room for r1
685# endif
686 eor r0,r7,r7,ror#5
687 orr r2,r2,r3,lsl#24
688 eor r0,r0,r7,ror#19 @ Sigma1(e)
689#endif
690 ldr r3,[r14],#4 @ *K256++
691 add r10,r10,r2 @ h+=X[i]
692 str r2,[sp,#9*4]
693 eor r2,r8,r9
694 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
695 and r2,r2,r7
696 add r10,r10,r3 @ h+=K256[i]
697 eor r2,r2,r9 @ Ch(e,f,g)
698 eor r0,r11,r11,ror#11
699 add r10,r10,r2 @ h+=Ch(e,f,g)
700#if 9==31
701 and r3,r3,#0xff
702 cmp r3,#0xf2 @ done?
703#endif
704#if 9<15
705# if __ARM_ARCH__>=7
706 ldr r2,[r1],#4 @ prefetch
707# else
708 ldrb r2,[r1,#3]
709# endif
710 eor r3,r11,r4 @ a^b, b^c in next round
711#else
712 ldr r2,[sp,#11*4] @ from future BODY_16_xx
713 eor r3,r11,r4 @ a^b, b^c in next round
714 ldr r1,[sp,#8*4] @ from future BODY_16_xx
715#endif
716 eor r0,r0,r11,ror#20 @ Sigma0(a)
717 and r12,r12,r3 @ (b^c)&=(a^b)
718 add r6,r6,r10 @ d+=h
719 eor r12,r12,r4 @ Maj(a,b,c)
720 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
721 @ add r10,r10,r12 @ h+=Maj(a,b,c)
722#if __ARM_ARCH__>=7
723 @ ldr r2,[r1],#4 @ 10
724# if 10==15
725 str r1,[sp,#17*4] @ make room for r1
726# endif
727 eor r0,r6,r6,ror#5
728 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
729 eor r0,r0,r6,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700730# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800731 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700732# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800733#else
734 @ ldrb r2,[r1,#3] @ 10
735 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
736 ldrb r12,[r1,#2]
737 ldrb r0,[r1,#1]
738 orr r2,r2,r12,lsl#8
739 ldrb r12,[r1],#4
740 orr r2,r2,r0,lsl#16
741# if 10==15
742 str r1,[sp,#17*4] @ make room for r1
743# endif
744 eor r0,r6,r6,ror#5
745 orr r2,r2,r12,lsl#24
746 eor r0,r0,r6,ror#19 @ Sigma1(e)
747#endif
748 ldr r12,[r14],#4 @ *K256++
749 add r9,r9,r2 @ h+=X[i]
750 str r2,[sp,#10*4]
751 eor r2,r7,r8
752 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
753 and r2,r2,r6
754 add r9,r9,r12 @ h+=K256[i]
755 eor r2,r2,r8 @ Ch(e,f,g)
756 eor r0,r10,r10,ror#11
757 add r9,r9,r2 @ h+=Ch(e,f,g)
758#if 10==31
759 and r12,r12,#0xff
760 cmp r12,#0xf2 @ done?
761#endif
762#if 10<15
763# if __ARM_ARCH__>=7
764 ldr r2,[r1],#4 @ prefetch
765# else
766 ldrb r2,[r1,#3]
767# endif
768 eor r12,r10,r11 @ a^b, b^c in next round
769#else
770 ldr r2,[sp,#12*4] @ from future BODY_16_xx
771 eor r12,r10,r11 @ a^b, b^c in next round
772 ldr r1,[sp,#9*4] @ from future BODY_16_xx
773#endif
774 eor r0,r0,r10,ror#20 @ Sigma0(a)
775 and r3,r3,r12 @ (b^c)&=(a^b)
776 add r5,r5,r9 @ d+=h
777 eor r3,r3,r11 @ Maj(a,b,c)
778 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
779 @ add r9,r9,r3 @ h+=Maj(a,b,c)
780#if __ARM_ARCH__>=7
781 @ ldr r2,[r1],#4 @ 11
782# if 11==15
783 str r1,[sp,#17*4] @ make room for r1
784# endif
785 eor r0,r5,r5,ror#5
786 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
787 eor r0,r0,r5,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700788# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800789 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700790# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800791#else
792 @ ldrb r2,[r1,#3] @ 11
793 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
794 ldrb r3,[r1,#2]
795 ldrb r0,[r1,#1]
796 orr r2,r2,r3,lsl#8
797 ldrb r3,[r1],#4
798 orr r2,r2,r0,lsl#16
799# if 11==15
800 str r1,[sp,#17*4] @ make room for r1
801# endif
802 eor r0,r5,r5,ror#5
803 orr r2,r2,r3,lsl#24
804 eor r0,r0,r5,ror#19 @ Sigma1(e)
805#endif
806 ldr r3,[r14],#4 @ *K256++
807 add r8,r8,r2 @ h+=X[i]
808 str r2,[sp,#11*4]
809 eor r2,r6,r7
810 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
811 and r2,r2,r5
812 add r8,r8,r3 @ h+=K256[i]
813 eor r2,r2,r7 @ Ch(e,f,g)
814 eor r0,r9,r9,ror#11
815 add r8,r8,r2 @ h+=Ch(e,f,g)
816#if 11==31
817 and r3,r3,#0xff
818 cmp r3,#0xf2 @ done?
819#endif
820#if 11<15
821# if __ARM_ARCH__>=7
822 ldr r2,[r1],#4 @ prefetch
823# else
824 ldrb r2,[r1,#3]
825# endif
826 eor r3,r9,r10 @ a^b, b^c in next round
827#else
828 ldr r2,[sp,#13*4] @ from future BODY_16_xx
829 eor r3,r9,r10 @ a^b, b^c in next round
830 ldr r1,[sp,#10*4] @ from future BODY_16_xx
831#endif
832 eor r0,r0,r9,ror#20 @ Sigma0(a)
833 and r12,r12,r3 @ (b^c)&=(a^b)
834 add r4,r4,r8 @ d+=h
835 eor r12,r12,r10 @ Maj(a,b,c)
836 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
837 @ add r8,r8,r12 @ h+=Maj(a,b,c)
838#if __ARM_ARCH__>=7
839 @ ldr r2,[r1],#4 @ 12
840# if 12==15
841 str r1,[sp,#17*4] @ make room for r1
842# endif
843 eor r0,r4,r4,ror#5
844 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
845 eor r0,r0,r4,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700846# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800847 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700848# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800849#else
850 @ ldrb r2,[r1,#3] @ 12
851 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
852 ldrb r12,[r1,#2]
853 ldrb r0,[r1,#1]
854 orr r2,r2,r12,lsl#8
855 ldrb r12,[r1],#4
856 orr r2,r2,r0,lsl#16
857# if 12==15
858 str r1,[sp,#17*4] @ make room for r1
859# endif
860 eor r0,r4,r4,ror#5
861 orr r2,r2,r12,lsl#24
862 eor r0,r0,r4,ror#19 @ Sigma1(e)
863#endif
864 ldr r12,[r14],#4 @ *K256++
865 add r7,r7,r2 @ h+=X[i]
866 str r2,[sp,#12*4]
867 eor r2,r5,r6
868 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
869 and r2,r2,r4
870 add r7,r7,r12 @ h+=K256[i]
871 eor r2,r2,r6 @ Ch(e,f,g)
872 eor r0,r8,r8,ror#11
873 add r7,r7,r2 @ h+=Ch(e,f,g)
874#if 12==31
875 and r12,r12,#0xff
876 cmp r12,#0xf2 @ done?
877#endif
878#if 12<15
879# if __ARM_ARCH__>=7
880 ldr r2,[r1],#4 @ prefetch
881# else
882 ldrb r2,[r1,#3]
883# endif
884 eor r12,r8,r9 @ a^b, b^c in next round
885#else
886 ldr r2,[sp,#14*4] @ from future BODY_16_xx
887 eor r12,r8,r9 @ a^b, b^c in next round
888 ldr r1,[sp,#11*4] @ from future BODY_16_xx
889#endif
890 eor r0,r0,r8,ror#20 @ Sigma0(a)
891 and r3,r3,r12 @ (b^c)&=(a^b)
892 add r11,r11,r7 @ d+=h
893 eor r3,r3,r9 @ Maj(a,b,c)
894 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
895 @ add r7,r7,r3 @ h+=Maj(a,b,c)
896#if __ARM_ARCH__>=7
897 @ ldr r2,[r1],#4 @ 13
898# if 13==15
899 str r1,[sp,#17*4] @ make room for r1
900# endif
901 eor r0,r11,r11,ror#5
902 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
903 eor r0,r0,r11,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700904# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800905 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700906# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800907#else
908 @ ldrb r2,[r1,#3] @ 13
909 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
910 ldrb r3,[r1,#2]
911 ldrb r0,[r1,#1]
912 orr r2,r2,r3,lsl#8
913 ldrb r3,[r1],#4
914 orr r2,r2,r0,lsl#16
915# if 13==15
916 str r1,[sp,#17*4] @ make room for r1
917# endif
918 eor r0,r11,r11,ror#5
919 orr r2,r2,r3,lsl#24
920 eor r0,r0,r11,ror#19 @ Sigma1(e)
921#endif
922 ldr r3,[r14],#4 @ *K256++
923 add r6,r6,r2 @ h+=X[i]
924 str r2,[sp,#13*4]
925 eor r2,r4,r5
926 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
927 and r2,r2,r11
928 add r6,r6,r3 @ h+=K256[i]
929 eor r2,r2,r5 @ Ch(e,f,g)
930 eor r0,r7,r7,ror#11
931 add r6,r6,r2 @ h+=Ch(e,f,g)
932#if 13==31
933 and r3,r3,#0xff
934 cmp r3,#0xf2 @ done?
935#endif
936#if 13<15
937# if __ARM_ARCH__>=7
938 ldr r2,[r1],#4 @ prefetch
939# else
940 ldrb r2,[r1,#3]
941# endif
942 eor r3,r7,r8 @ a^b, b^c in next round
943#else
944 ldr r2,[sp,#15*4] @ from future BODY_16_xx
945 eor r3,r7,r8 @ a^b, b^c in next round
946 ldr r1,[sp,#12*4] @ from future BODY_16_xx
947#endif
948 eor r0,r0,r7,ror#20 @ Sigma0(a)
949 and r12,r12,r3 @ (b^c)&=(a^b)
950 add r10,r10,r6 @ d+=h
951 eor r12,r12,r8 @ Maj(a,b,c)
952 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
953 @ add r6,r6,r12 @ h+=Maj(a,b,c)
954#if __ARM_ARCH__>=7
955 @ ldr r2,[r1],#4 @ 14
956# if 14==15
957 str r1,[sp,#17*4] @ make room for r1
958# endif
959 eor r0,r10,r10,ror#5
960 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
961 eor r0,r0,r10,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700962# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800963 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700964# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800965#else
966 @ ldrb r2,[r1,#3] @ 14
967 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
968 ldrb r12,[r1,#2]
969 ldrb r0,[r1,#1]
970 orr r2,r2,r12,lsl#8
971 ldrb r12,[r1],#4
972 orr r2,r2,r0,lsl#16
973# if 14==15
974 str r1,[sp,#17*4] @ make room for r1
975# endif
976 eor r0,r10,r10,ror#5
977 orr r2,r2,r12,lsl#24
978 eor r0,r0,r10,ror#19 @ Sigma1(e)
979#endif
980 ldr r12,[r14],#4 @ *K256++
981 add r5,r5,r2 @ h+=X[i]
982 str r2,[sp,#14*4]
983 eor r2,r11,r4
984 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
985 and r2,r2,r10
986 add r5,r5,r12 @ h+=K256[i]
987 eor r2,r2,r4 @ Ch(e,f,g)
988 eor r0,r6,r6,ror#11
989 add r5,r5,r2 @ h+=Ch(e,f,g)
990#if 14==31
991 and r12,r12,#0xff
992 cmp r12,#0xf2 @ done?
993#endif
994#if 14<15
995# if __ARM_ARCH__>=7
996 ldr r2,[r1],#4 @ prefetch
997# else
998 ldrb r2,[r1,#3]
999# endif
1000 eor r12,r6,r7 @ a^b, b^c in next round
1001#else
1002 ldr r2,[sp,#0*4] @ from future BODY_16_xx
1003 eor r12,r6,r7 @ a^b, b^c in next round
1004 ldr r1,[sp,#13*4] @ from future BODY_16_xx
1005#endif
1006 eor r0,r0,r6,ror#20 @ Sigma0(a)
1007 and r3,r3,r12 @ (b^c)&=(a^b)
1008 add r9,r9,r5 @ d+=h
1009 eor r3,r3,r7 @ Maj(a,b,c)
1010 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1011 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1012#if __ARM_ARCH__>=7
1013 @ ldr r2,[r1],#4 @ 15
1014# if 15==15
1015 str r1,[sp,#17*4] @ make room for r1
1016# endif
1017 eor r0,r9,r9,ror#5
1018 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1019 eor r0,r0,r9,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -07001020# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -08001021 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -07001022# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -08001023#else
1024 @ ldrb r2,[r1,#3] @ 15
1025 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1026 ldrb r3,[r1,#2]
1027 ldrb r0,[r1,#1]
1028 orr r2,r2,r3,lsl#8
1029 ldrb r3,[r1],#4
1030 orr r2,r2,r0,lsl#16
1031# if 15==15
1032 str r1,[sp,#17*4] @ make room for r1
1033# endif
1034 eor r0,r9,r9,ror#5
1035 orr r2,r2,r3,lsl#24
1036 eor r0,r0,r9,ror#19 @ Sigma1(e)
1037#endif
1038 ldr r3,[r14],#4 @ *K256++
1039 add r4,r4,r2 @ h+=X[i]
1040 str r2,[sp,#15*4]
1041 eor r2,r10,r11
1042 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1043 and r2,r2,r9
1044 add r4,r4,r3 @ h+=K256[i]
1045 eor r2,r2,r11 @ Ch(e,f,g)
1046 eor r0,r5,r5,ror#11
1047 add r4,r4,r2 @ h+=Ch(e,f,g)
1048#if 15==31
1049 and r3,r3,#0xff
1050 cmp r3,#0xf2 @ done?
1051#endif
1052#if 15<15
1053# if __ARM_ARCH__>=7
1054 ldr r2,[r1],#4 @ prefetch
1055# else
1056 ldrb r2,[r1,#3]
1057# endif
1058 eor r3,r5,r6 @ a^b, b^c in next round
1059#else
1060 ldr r2,[sp,#1*4] @ from future BODY_16_xx
1061 eor r3,r5,r6 @ a^b, b^c in next round
1062 ldr r1,[sp,#14*4] @ from future BODY_16_xx
1063#endif
1064 eor r0,r0,r5,ror#20 @ Sigma0(a)
1065 and r12,r12,r3 @ (b^c)&=(a^b)
1066 add r8,r8,r4 @ d+=h
1067 eor r12,r12,r6 @ Maj(a,b,c)
1068 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1069 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1070.Lrounds_16_xx:
1071 @ ldr r2,[sp,#1*4] @ 16
1072 @ ldr r1,[sp,#14*4]
1073 mov r0,r2,ror#7
1074 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1075 mov r12,r1,ror#17
1076 eor r0,r0,r2,ror#18
1077 eor r12,r12,r1,ror#19
1078 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1079 ldr r2,[sp,#0*4]
1080 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1081 ldr r1,[sp,#9*4]
1082
1083 add r12,r12,r0
1084 eor r0,r8,r8,ror#5 @ from BODY_00_15
1085 add r2,r2,r12
1086 eor r0,r0,r8,ror#19 @ Sigma1(e)
1087 add r2,r2,r1 @ X[i]
1088 ldr r12,[r14],#4 @ *K256++
1089 add r11,r11,r2 @ h+=X[i]
1090 str r2,[sp,#0*4]
1091 eor r2,r9,r10
1092 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
1093 and r2,r2,r8
1094 add r11,r11,r12 @ h+=K256[i]
1095 eor r2,r2,r10 @ Ch(e,f,g)
1096 eor r0,r4,r4,ror#11
1097 add r11,r11,r2 @ h+=Ch(e,f,g)
1098#if 16==31
1099 and r12,r12,#0xff
1100 cmp r12,#0xf2 @ done?
1101#endif
1102#if 16<15
1103# if __ARM_ARCH__>=7
1104 ldr r2,[r1],#4 @ prefetch
1105# else
1106 ldrb r2,[r1,#3]
1107# endif
1108 eor r12,r4,r5 @ a^b, b^c in next round
1109#else
1110 ldr r2,[sp,#2*4] @ from future BODY_16_xx
1111 eor r12,r4,r5 @ a^b, b^c in next round
1112 ldr r1,[sp,#15*4] @ from future BODY_16_xx
1113#endif
1114 eor r0,r0,r4,ror#20 @ Sigma0(a)
1115 and r3,r3,r12 @ (b^c)&=(a^b)
1116 add r7,r7,r11 @ d+=h
1117 eor r3,r3,r5 @ Maj(a,b,c)
1118 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
1119 @ add r11,r11,r3 @ h+=Maj(a,b,c)
1120 @ ldr r2,[sp,#2*4] @ 17
1121 @ ldr r1,[sp,#15*4]
1122 mov r0,r2,ror#7
1123 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
1124 mov r3,r1,ror#17
1125 eor r0,r0,r2,ror#18
1126 eor r3,r3,r1,ror#19
1127 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1128 ldr r2,[sp,#1*4]
1129 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1130 ldr r1,[sp,#10*4]
1131
1132 add r3,r3,r0
1133 eor r0,r7,r7,ror#5 @ from BODY_00_15
1134 add r2,r2,r3
1135 eor r0,r0,r7,ror#19 @ Sigma1(e)
1136 add r2,r2,r1 @ X[i]
1137 ldr r3,[r14],#4 @ *K256++
1138 add r10,r10,r2 @ h+=X[i]
1139 str r2,[sp,#1*4]
1140 eor r2,r8,r9
1141 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
1142 and r2,r2,r7
1143 add r10,r10,r3 @ h+=K256[i]
1144 eor r2,r2,r9 @ Ch(e,f,g)
1145 eor r0,r11,r11,ror#11
1146 add r10,r10,r2 @ h+=Ch(e,f,g)
1147#if 17==31
1148 and r3,r3,#0xff
1149 cmp r3,#0xf2 @ done?
1150#endif
1151#if 17<15
1152# if __ARM_ARCH__>=7
1153 ldr r2,[r1],#4 @ prefetch
1154# else
1155 ldrb r2,[r1,#3]
1156# endif
1157 eor r3,r11,r4 @ a^b, b^c in next round
1158#else
1159 ldr r2,[sp,#3*4] @ from future BODY_16_xx
1160 eor r3,r11,r4 @ a^b, b^c in next round
1161 ldr r1,[sp,#0*4] @ from future BODY_16_xx
1162#endif
1163 eor r0,r0,r11,ror#20 @ Sigma0(a)
1164 and r12,r12,r3 @ (b^c)&=(a^b)
1165 add r6,r6,r10 @ d+=h
1166 eor r12,r12,r4 @ Maj(a,b,c)
1167 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
1168 @ add r10,r10,r12 @ h+=Maj(a,b,c)
1169 @ ldr r2,[sp,#3*4] @ 18
1170 @ ldr r1,[sp,#0*4]
1171 mov r0,r2,ror#7
1172 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
1173 mov r12,r1,ror#17
1174 eor r0,r0,r2,ror#18
1175 eor r12,r12,r1,ror#19
1176 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1177 ldr r2,[sp,#2*4]
1178 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1179 ldr r1,[sp,#11*4]
1180
1181 add r12,r12,r0
1182 eor r0,r6,r6,ror#5 @ from BODY_00_15
1183 add r2,r2,r12
1184 eor r0,r0,r6,ror#19 @ Sigma1(e)
1185 add r2,r2,r1 @ X[i]
1186 ldr r12,[r14],#4 @ *K256++
1187 add r9,r9,r2 @ h+=X[i]
1188 str r2,[sp,#2*4]
1189 eor r2,r7,r8
1190 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
1191 and r2,r2,r6
1192 add r9,r9,r12 @ h+=K256[i]
1193 eor r2,r2,r8 @ Ch(e,f,g)
1194 eor r0,r10,r10,ror#11
1195 add r9,r9,r2 @ h+=Ch(e,f,g)
1196#if 18==31
1197 and r12,r12,#0xff
1198 cmp r12,#0xf2 @ done?
1199#endif
1200#if 18<15
1201# if __ARM_ARCH__>=7
1202 ldr r2,[r1],#4 @ prefetch
1203# else
1204 ldrb r2,[r1,#3]
1205# endif
1206 eor r12,r10,r11 @ a^b, b^c in next round
1207#else
1208 ldr r2,[sp,#4*4] @ from future BODY_16_xx
1209 eor r12,r10,r11 @ a^b, b^c in next round
1210 ldr r1,[sp,#1*4] @ from future BODY_16_xx
1211#endif
1212 eor r0,r0,r10,ror#20 @ Sigma0(a)
1213 and r3,r3,r12 @ (b^c)&=(a^b)
1214 add r5,r5,r9 @ d+=h
1215 eor r3,r3,r11 @ Maj(a,b,c)
1216 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
1217 @ add r9,r9,r3 @ h+=Maj(a,b,c)
1218 @ ldr r2,[sp,#4*4] @ 19
1219 @ ldr r1,[sp,#1*4]
1220 mov r0,r2,ror#7
1221 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
1222 mov r3,r1,ror#17
1223 eor r0,r0,r2,ror#18
1224 eor r3,r3,r1,ror#19
1225 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1226 ldr r2,[sp,#3*4]
1227 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1228 ldr r1,[sp,#12*4]
1229
1230 add r3,r3,r0
1231 eor r0,r5,r5,ror#5 @ from BODY_00_15
1232 add r2,r2,r3
1233 eor r0,r0,r5,ror#19 @ Sigma1(e)
1234 add r2,r2,r1 @ X[i]
1235 ldr r3,[r14],#4 @ *K256++
1236 add r8,r8,r2 @ h+=X[i]
1237 str r2,[sp,#3*4]
1238 eor r2,r6,r7
1239 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
1240 and r2,r2,r5
1241 add r8,r8,r3 @ h+=K256[i]
1242 eor r2,r2,r7 @ Ch(e,f,g)
1243 eor r0,r9,r9,ror#11
1244 add r8,r8,r2 @ h+=Ch(e,f,g)
1245#if 19==31
1246 and r3,r3,#0xff
1247 cmp r3,#0xf2 @ done?
1248#endif
1249#if 19<15
1250# if __ARM_ARCH__>=7
1251 ldr r2,[r1],#4 @ prefetch
1252# else
1253 ldrb r2,[r1,#3]
1254# endif
1255 eor r3,r9,r10 @ a^b, b^c in next round
1256#else
1257 ldr r2,[sp,#5*4] @ from future BODY_16_xx
1258 eor r3,r9,r10 @ a^b, b^c in next round
1259 ldr r1,[sp,#2*4] @ from future BODY_16_xx
1260#endif
1261 eor r0,r0,r9,ror#20 @ Sigma0(a)
1262 and r12,r12,r3 @ (b^c)&=(a^b)
1263 add r4,r4,r8 @ d+=h
1264 eor r12,r12,r10 @ Maj(a,b,c)
1265 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
1266 @ add r8,r8,r12 @ h+=Maj(a,b,c)
1267 @ ldr r2,[sp,#5*4] @ 20
1268 @ ldr r1,[sp,#2*4]
1269 mov r0,r2,ror#7
1270 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
1271 mov r12,r1,ror#17
1272 eor r0,r0,r2,ror#18
1273 eor r12,r12,r1,ror#19
1274 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1275 ldr r2,[sp,#4*4]
1276 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1277 ldr r1,[sp,#13*4]
1278
1279 add r12,r12,r0
1280 eor r0,r4,r4,ror#5 @ from BODY_00_15
1281 add r2,r2,r12
1282 eor r0,r0,r4,ror#19 @ Sigma1(e)
1283 add r2,r2,r1 @ X[i]
1284 ldr r12,[r14],#4 @ *K256++
1285 add r7,r7,r2 @ h+=X[i]
1286 str r2,[sp,#4*4]
1287 eor r2,r5,r6
1288 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
1289 and r2,r2,r4
1290 add r7,r7,r12 @ h+=K256[i]
1291 eor r2,r2,r6 @ Ch(e,f,g)
1292 eor r0,r8,r8,ror#11
1293 add r7,r7,r2 @ h+=Ch(e,f,g)
1294#if 20==31
1295 and r12,r12,#0xff
1296 cmp r12,#0xf2 @ done?
1297#endif
1298#if 20<15
1299# if __ARM_ARCH__>=7
1300 ldr r2,[r1],#4 @ prefetch
1301# else
1302 ldrb r2,[r1,#3]
1303# endif
1304 eor r12,r8,r9 @ a^b, b^c in next round
1305#else
1306 ldr r2,[sp,#6*4] @ from future BODY_16_xx
1307 eor r12,r8,r9 @ a^b, b^c in next round
1308 ldr r1,[sp,#3*4] @ from future BODY_16_xx
1309#endif
1310 eor r0,r0,r8,ror#20 @ Sigma0(a)
1311 and r3,r3,r12 @ (b^c)&=(a^b)
1312 add r11,r11,r7 @ d+=h
1313 eor r3,r3,r9 @ Maj(a,b,c)
1314 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
1315 @ add r7,r7,r3 @ h+=Maj(a,b,c)
1316 @ ldr r2,[sp,#6*4] @ 21
1317 @ ldr r1,[sp,#3*4]
1318 mov r0,r2,ror#7
1319 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
1320 mov r3,r1,ror#17
1321 eor r0,r0,r2,ror#18
1322 eor r3,r3,r1,ror#19
1323 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1324 ldr r2,[sp,#5*4]
1325 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1326 ldr r1,[sp,#14*4]
1327
1328 add r3,r3,r0
1329 eor r0,r11,r11,ror#5 @ from BODY_00_15
1330 add r2,r2,r3
1331 eor r0,r0,r11,ror#19 @ Sigma1(e)
1332 add r2,r2,r1 @ X[i]
1333 ldr r3,[r14],#4 @ *K256++
1334 add r6,r6,r2 @ h+=X[i]
1335 str r2,[sp,#5*4]
1336 eor r2,r4,r5
1337 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
1338 and r2,r2,r11
1339 add r6,r6,r3 @ h+=K256[i]
1340 eor r2,r2,r5 @ Ch(e,f,g)
1341 eor r0,r7,r7,ror#11
1342 add r6,r6,r2 @ h+=Ch(e,f,g)
1343#if 21==31
1344 and r3,r3,#0xff
1345 cmp r3,#0xf2 @ done?
1346#endif
1347#if 21<15
1348# if __ARM_ARCH__>=7
1349 ldr r2,[r1],#4 @ prefetch
1350# else
1351 ldrb r2,[r1,#3]
1352# endif
1353 eor r3,r7,r8 @ a^b, b^c in next round
1354#else
1355 ldr r2,[sp,#7*4] @ from future BODY_16_xx
1356 eor r3,r7,r8 @ a^b, b^c in next round
1357 ldr r1,[sp,#4*4] @ from future BODY_16_xx
1358#endif
1359 eor r0,r0,r7,ror#20 @ Sigma0(a)
1360 and r12,r12,r3 @ (b^c)&=(a^b)
1361 add r10,r10,r6 @ d+=h
1362 eor r12,r12,r8 @ Maj(a,b,c)
1363 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
1364 @ add r6,r6,r12 @ h+=Maj(a,b,c)
1365 @ ldr r2,[sp,#7*4] @ 22
1366 @ ldr r1,[sp,#4*4]
1367 mov r0,r2,ror#7
1368 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
1369 mov r12,r1,ror#17
1370 eor r0,r0,r2,ror#18
1371 eor r12,r12,r1,ror#19
1372 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1373 ldr r2,[sp,#6*4]
1374 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1375 ldr r1,[sp,#15*4]
1376
1377 add r12,r12,r0
1378 eor r0,r10,r10,ror#5 @ from BODY_00_15
1379 add r2,r2,r12
1380 eor r0,r0,r10,ror#19 @ Sigma1(e)
1381 add r2,r2,r1 @ X[i]
1382 ldr r12,[r14],#4 @ *K256++
1383 add r5,r5,r2 @ h+=X[i]
1384 str r2,[sp,#6*4]
1385 eor r2,r11,r4
1386 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
1387 and r2,r2,r10
1388 add r5,r5,r12 @ h+=K256[i]
1389 eor r2,r2,r4 @ Ch(e,f,g)
1390 eor r0,r6,r6,ror#11
1391 add r5,r5,r2 @ h+=Ch(e,f,g)
1392#if 22==31
1393 and r12,r12,#0xff
1394 cmp r12,#0xf2 @ done?
1395#endif
1396#if 22<15
1397# if __ARM_ARCH__>=7
1398 ldr r2,[r1],#4 @ prefetch
1399# else
1400 ldrb r2,[r1,#3]
1401# endif
1402 eor r12,r6,r7 @ a^b, b^c in next round
1403#else
1404 ldr r2,[sp,#8*4] @ from future BODY_16_xx
1405 eor r12,r6,r7 @ a^b, b^c in next round
1406 ldr r1,[sp,#5*4] @ from future BODY_16_xx
1407#endif
1408 eor r0,r0,r6,ror#20 @ Sigma0(a)
1409 and r3,r3,r12 @ (b^c)&=(a^b)
1410 add r9,r9,r5 @ d+=h
1411 eor r3,r3,r7 @ Maj(a,b,c)
1412 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1413 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1414 @ ldr r2,[sp,#8*4] @ 23
1415 @ ldr r1,[sp,#5*4]
1416 mov r0,r2,ror#7
1417 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1418 mov r3,r1,ror#17
1419 eor r0,r0,r2,ror#18
1420 eor r3,r3,r1,ror#19
1421 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1422 ldr r2,[sp,#7*4]
1423 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1424 ldr r1,[sp,#0*4]
1425
1426 add r3,r3,r0
1427 eor r0,r9,r9,ror#5 @ from BODY_00_15
1428 add r2,r2,r3
1429 eor r0,r0,r9,ror#19 @ Sigma1(e)
1430 add r2,r2,r1 @ X[i]
1431 ldr r3,[r14],#4 @ *K256++
1432 add r4,r4,r2 @ h+=X[i]
1433 str r2,[sp,#7*4]
1434 eor r2,r10,r11
1435 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1436 and r2,r2,r9
1437 add r4,r4,r3 @ h+=K256[i]
1438 eor r2,r2,r11 @ Ch(e,f,g)
1439 eor r0,r5,r5,ror#11
1440 add r4,r4,r2 @ h+=Ch(e,f,g)
1441#if 23==31
1442 and r3,r3,#0xff
1443 cmp r3,#0xf2 @ done?
1444#endif
1445#if 23<15
1446# if __ARM_ARCH__>=7
1447 ldr r2,[r1],#4 @ prefetch
1448# else
1449 ldrb r2,[r1,#3]
1450# endif
1451 eor r3,r5,r6 @ a^b, b^c in next round
1452#else
1453 ldr r2,[sp,#9*4] @ from future BODY_16_xx
1454 eor r3,r5,r6 @ a^b, b^c in next round
1455 ldr r1,[sp,#6*4] @ from future BODY_16_xx
1456#endif
1457 eor r0,r0,r5,ror#20 @ Sigma0(a)
1458 and r12,r12,r3 @ (b^c)&=(a^b)
1459 add r8,r8,r4 @ d+=h
1460 eor r12,r12,r6 @ Maj(a,b,c)
1461 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1462 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1463 @ ldr r2,[sp,#9*4] @ 24
1464 @ ldr r1,[sp,#6*4]
1465 mov r0,r2,ror#7
1466 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1467 mov r12,r1,ror#17
1468 eor r0,r0,r2,ror#18
1469 eor r12,r12,r1,ror#19
1470 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1471 ldr r2,[sp,#8*4]
1472 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1473 ldr r1,[sp,#1*4]
1474
1475 add r12,r12,r0
1476 eor r0,r8,r8,ror#5 @ from BODY_00_15
1477 add r2,r2,r12
1478 eor r0,r0,r8,ror#19 @ Sigma1(e)
1479 add r2,r2,r1 @ X[i]
1480 ldr r12,[r14],#4 @ *K256++
1481 add r11,r11,r2 @ h+=X[i]
1482 str r2,[sp,#8*4]
1483 eor r2,r9,r10
1484 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
1485 and r2,r2,r8
1486 add r11,r11,r12 @ h+=K256[i]
1487 eor r2,r2,r10 @ Ch(e,f,g)
1488 eor r0,r4,r4,ror#11
1489 add r11,r11,r2 @ h+=Ch(e,f,g)
1490#if 24==31
1491 and r12,r12,#0xff
1492 cmp r12,#0xf2 @ done?
1493#endif
1494#if 24<15
1495# if __ARM_ARCH__>=7
1496 ldr r2,[r1],#4 @ prefetch
1497# else
1498 ldrb r2,[r1,#3]
1499# endif
1500 eor r12,r4,r5 @ a^b, b^c in next round
1501#else
1502 ldr r2,[sp,#10*4] @ from future BODY_16_xx
1503 eor r12,r4,r5 @ a^b, b^c in next round
1504 ldr r1,[sp,#7*4] @ from future BODY_16_xx
1505#endif
1506 eor r0,r0,r4,ror#20 @ Sigma0(a)
1507 and r3,r3,r12 @ (b^c)&=(a^b)
1508 add r7,r7,r11 @ d+=h
1509 eor r3,r3,r5 @ Maj(a,b,c)
1510 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
1511 @ add r11,r11,r3 @ h+=Maj(a,b,c)
1512 @ ldr r2,[sp,#10*4] @ 25
1513 @ ldr r1,[sp,#7*4]
1514 mov r0,r2,ror#7
1515 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
1516 mov r3,r1,ror#17
1517 eor r0,r0,r2,ror#18
1518 eor r3,r3,r1,ror#19
1519 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1520 ldr r2,[sp,#9*4]
1521 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1522 ldr r1,[sp,#2*4]
1523
1524 add r3,r3,r0
1525 eor r0,r7,r7,ror#5 @ from BODY_00_15
1526 add r2,r2,r3
1527 eor r0,r0,r7,ror#19 @ Sigma1(e)
1528 add r2,r2,r1 @ X[i]
1529 ldr r3,[r14],#4 @ *K256++
1530 add r10,r10,r2 @ h+=X[i]
1531 str r2,[sp,#9*4]
1532 eor r2,r8,r9
1533 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
1534 and r2,r2,r7
1535 add r10,r10,r3 @ h+=K256[i]
1536 eor r2,r2,r9 @ Ch(e,f,g)
1537 eor r0,r11,r11,ror#11
1538 add r10,r10,r2 @ h+=Ch(e,f,g)
1539#if 25==31
1540 and r3,r3,#0xff
1541 cmp r3,#0xf2 @ done?
1542#endif
1543#if 25<15
1544# if __ARM_ARCH__>=7
1545 ldr r2,[r1],#4 @ prefetch
1546# else
1547 ldrb r2,[r1,#3]
1548# endif
1549 eor r3,r11,r4 @ a^b, b^c in next round
1550#else
1551 ldr r2,[sp,#11*4] @ from future BODY_16_xx
1552 eor r3,r11,r4 @ a^b, b^c in next round
1553 ldr r1,[sp,#8*4] @ from future BODY_16_xx
1554#endif
1555 eor r0,r0,r11,ror#20 @ Sigma0(a)
1556 and r12,r12,r3 @ (b^c)&=(a^b)
1557 add r6,r6,r10 @ d+=h
1558 eor r12,r12,r4 @ Maj(a,b,c)
1559 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
1560 @ add r10,r10,r12 @ h+=Maj(a,b,c)
1561 @ ldr r2,[sp,#11*4] @ 26
1562 @ ldr r1,[sp,#8*4]
1563 mov r0,r2,ror#7
1564 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
1565 mov r12,r1,ror#17
1566 eor r0,r0,r2,ror#18
1567 eor r12,r12,r1,ror#19
1568 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1569 ldr r2,[sp,#10*4]
1570 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1571 ldr r1,[sp,#3*4]
1572
1573 add r12,r12,r0
1574 eor r0,r6,r6,ror#5 @ from BODY_00_15
1575 add r2,r2,r12
1576 eor r0,r0,r6,ror#19 @ Sigma1(e)
1577 add r2,r2,r1 @ X[i]
1578 ldr r12,[r14],#4 @ *K256++
1579 add r9,r9,r2 @ h+=X[i]
1580 str r2,[sp,#10*4]
1581 eor r2,r7,r8
1582 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
1583 and r2,r2,r6
1584 add r9,r9,r12 @ h+=K256[i]
1585 eor r2,r2,r8 @ Ch(e,f,g)
1586 eor r0,r10,r10,ror#11
1587 add r9,r9,r2 @ h+=Ch(e,f,g)
1588#if 26==31
1589 and r12,r12,#0xff
1590 cmp r12,#0xf2 @ done?
1591#endif
1592#if 26<15
1593# if __ARM_ARCH__>=7
1594 ldr r2,[r1],#4 @ prefetch
1595# else
1596 ldrb r2,[r1,#3]
1597# endif
1598 eor r12,r10,r11 @ a^b, b^c in next round
1599#else
1600 ldr r2,[sp,#12*4] @ from future BODY_16_xx
1601 eor r12,r10,r11 @ a^b, b^c in next round
1602 ldr r1,[sp,#9*4] @ from future BODY_16_xx
1603#endif
1604 eor r0,r0,r10,ror#20 @ Sigma0(a)
1605 and r3,r3,r12 @ (b^c)&=(a^b)
1606 add r5,r5,r9 @ d+=h
1607 eor r3,r3,r11 @ Maj(a,b,c)
1608 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
1609 @ add r9,r9,r3 @ h+=Maj(a,b,c)
1610 @ ldr r2,[sp,#12*4] @ 27
1611 @ ldr r1,[sp,#9*4]
1612 mov r0,r2,ror#7
1613 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
1614 mov r3,r1,ror#17
1615 eor r0,r0,r2,ror#18
1616 eor r3,r3,r1,ror#19
1617 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1618 ldr r2,[sp,#11*4]
1619 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1620 ldr r1,[sp,#4*4]
1621
1622 add r3,r3,r0
1623 eor r0,r5,r5,ror#5 @ from BODY_00_15
1624 add r2,r2,r3
1625 eor r0,r0,r5,ror#19 @ Sigma1(e)
1626 add r2,r2,r1 @ X[i]
1627 ldr r3,[r14],#4 @ *K256++
1628 add r8,r8,r2 @ h+=X[i]
1629 str r2,[sp,#11*4]
1630 eor r2,r6,r7
1631 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
1632 and r2,r2,r5
1633 add r8,r8,r3 @ h+=K256[i]
1634 eor r2,r2,r7 @ Ch(e,f,g)
1635 eor r0,r9,r9,ror#11
1636 add r8,r8,r2 @ h+=Ch(e,f,g)
1637#if 27==31
1638 and r3,r3,#0xff
1639 cmp r3,#0xf2 @ done?
1640#endif
1641#if 27<15
1642# if __ARM_ARCH__>=7
1643 ldr r2,[r1],#4 @ prefetch
1644# else
1645 ldrb r2,[r1,#3]
1646# endif
1647 eor r3,r9,r10 @ a^b, b^c in next round
1648#else
1649 ldr r2,[sp,#13*4] @ from future BODY_16_xx
1650 eor r3,r9,r10 @ a^b, b^c in next round
1651 ldr r1,[sp,#10*4] @ from future BODY_16_xx
1652#endif
1653 eor r0,r0,r9,ror#20 @ Sigma0(a)
1654 and r12,r12,r3 @ (b^c)&=(a^b)
1655 add r4,r4,r8 @ d+=h
1656 eor r12,r12,r10 @ Maj(a,b,c)
1657 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
1658 @ add r8,r8,r12 @ h+=Maj(a,b,c)
1659 @ ldr r2,[sp,#13*4] @ 28
1660 @ ldr r1,[sp,#10*4]
1661 mov r0,r2,ror#7
1662 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
1663 mov r12,r1,ror#17
1664 eor r0,r0,r2,ror#18
1665 eor r12,r12,r1,ror#19
1666 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1667 ldr r2,[sp,#12*4]
1668 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1669 ldr r1,[sp,#5*4]
1670
1671 add r12,r12,r0
1672 eor r0,r4,r4,ror#5 @ from BODY_00_15
1673 add r2,r2,r12
1674 eor r0,r0,r4,ror#19 @ Sigma1(e)
1675 add r2,r2,r1 @ X[i]
1676 ldr r12,[r14],#4 @ *K256++
1677 add r7,r7,r2 @ h+=X[i]
1678 str r2,[sp,#12*4]
1679 eor r2,r5,r6
1680 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
1681 and r2,r2,r4
1682 add r7,r7,r12 @ h+=K256[i]
1683 eor r2,r2,r6 @ Ch(e,f,g)
1684 eor r0,r8,r8,ror#11
1685 add r7,r7,r2 @ h+=Ch(e,f,g)
1686#if 28==31
1687 and r12,r12,#0xff
1688 cmp r12,#0xf2 @ done?
1689#endif
1690#if 28<15
1691# if __ARM_ARCH__>=7
1692 ldr r2,[r1],#4 @ prefetch
1693# else
1694 ldrb r2,[r1,#3]
1695# endif
1696 eor r12,r8,r9 @ a^b, b^c in next round
1697#else
1698 ldr r2,[sp,#14*4] @ from future BODY_16_xx
1699 eor r12,r8,r9 @ a^b, b^c in next round
1700 ldr r1,[sp,#11*4] @ from future BODY_16_xx
1701#endif
1702 eor r0,r0,r8,ror#20 @ Sigma0(a)
1703 and r3,r3,r12 @ (b^c)&=(a^b)
1704 add r11,r11,r7 @ d+=h
1705 eor r3,r3,r9 @ Maj(a,b,c)
1706 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
1707 @ add r7,r7,r3 @ h+=Maj(a,b,c)
1708 @ ldr r2,[sp,#14*4] @ 29
1709 @ ldr r1,[sp,#11*4]
1710 mov r0,r2,ror#7
1711 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
1712 mov r3,r1,ror#17
1713 eor r0,r0,r2,ror#18
1714 eor r3,r3,r1,ror#19
1715 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1716 ldr r2,[sp,#13*4]
1717 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1718 ldr r1,[sp,#6*4]
1719
1720 add r3,r3,r0
1721 eor r0,r11,r11,ror#5 @ from BODY_00_15
1722 add r2,r2,r3
1723 eor r0,r0,r11,ror#19 @ Sigma1(e)
1724 add r2,r2,r1 @ X[i]
1725 ldr r3,[r14],#4 @ *K256++
1726 add r6,r6,r2 @ h+=X[i]
1727 str r2,[sp,#13*4]
1728 eor r2,r4,r5
1729 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
1730 and r2,r2,r11
1731 add r6,r6,r3 @ h+=K256[i]
1732 eor r2,r2,r5 @ Ch(e,f,g)
1733 eor r0,r7,r7,ror#11
1734 add r6,r6,r2 @ h+=Ch(e,f,g)
1735#if 29==31
1736 and r3,r3,#0xff
1737 cmp r3,#0xf2 @ done?
1738#endif
1739#if 29<15
1740# if __ARM_ARCH__>=7
1741 ldr r2,[r1],#4 @ prefetch
1742# else
1743 ldrb r2,[r1,#3]
1744# endif
1745 eor r3,r7,r8 @ a^b, b^c in next round
1746#else
1747 ldr r2,[sp,#15*4] @ from future BODY_16_xx
1748 eor r3,r7,r8 @ a^b, b^c in next round
1749 ldr r1,[sp,#12*4] @ from future BODY_16_xx
1750#endif
1751 eor r0,r0,r7,ror#20 @ Sigma0(a)
1752 and r12,r12,r3 @ (b^c)&=(a^b)
1753 add r10,r10,r6 @ d+=h
1754 eor r12,r12,r8 @ Maj(a,b,c)
1755 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
1756 @ add r6,r6,r12 @ h+=Maj(a,b,c)
1757 @ ldr r2,[sp,#15*4] @ 30
1758 @ ldr r1,[sp,#12*4]
1759 mov r0,r2,ror#7
1760 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
1761 mov r12,r1,ror#17
1762 eor r0,r0,r2,ror#18
1763 eor r12,r12,r1,ror#19
1764 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1765 ldr r2,[sp,#14*4]
1766 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1767 ldr r1,[sp,#7*4]
1768
1769 add r12,r12,r0
1770 eor r0,r10,r10,ror#5 @ from BODY_00_15
1771 add r2,r2,r12
1772 eor r0,r0,r10,ror#19 @ Sigma1(e)
1773 add r2,r2,r1 @ X[i]
1774 ldr r12,[r14],#4 @ *K256++
1775 add r5,r5,r2 @ h+=X[i]
1776 str r2,[sp,#14*4]
1777 eor r2,r11,r4
1778 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
1779 and r2,r2,r10
1780 add r5,r5,r12 @ h+=K256[i]
1781 eor r2,r2,r4 @ Ch(e,f,g)
1782 eor r0,r6,r6,ror#11
1783 add r5,r5,r2 @ h+=Ch(e,f,g)
1784#if 30==31
1785 and r12,r12,#0xff
1786 cmp r12,#0xf2 @ done?
1787#endif
1788#if 30<15
1789# if __ARM_ARCH__>=7
1790 ldr r2,[r1],#4 @ prefetch
1791# else
1792 ldrb r2,[r1,#3]
1793# endif
1794 eor r12,r6,r7 @ a^b, b^c in next round
1795#else
1796 ldr r2,[sp,#0*4] @ from future BODY_16_xx
1797 eor r12,r6,r7 @ a^b, b^c in next round
1798 ldr r1,[sp,#13*4] @ from future BODY_16_xx
1799#endif
1800 eor r0,r0,r6,ror#20 @ Sigma0(a)
1801 and r3,r3,r12 @ (b^c)&=(a^b)
1802 add r9,r9,r5 @ d+=h
1803 eor r3,r3,r7 @ Maj(a,b,c)
1804 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1805 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1806 @ ldr r2,[sp,#0*4] @ 31
1807 @ ldr r1,[sp,#13*4]
1808 mov r0,r2,ror#7
1809 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1810 mov r3,r1,ror#17
1811 eor r0,r0,r2,ror#18
1812 eor r3,r3,r1,ror#19
1813 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1814 ldr r2,[sp,#15*4]
1815 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1816 ldr r1,[sp,#8*4]
1817
1818 add r3,r3,r0
1819 eor r0,r9,r9,ror#5 @ from BODY_00_15
1820 add r2,r2,r3
1821 eor r0,r0,r9,ror#19 @ Sigma1(e)
1822 add r2,r2,r1 @ X[i]
1823 ldr r3,[r14],#4 @ *K256++
1824 add r4,r4,r2 @ h+=X[i]
1825 str r2,[sp,#15*4]
1826 eor r2,r10,r11
1827 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1828 and r2,r2,r9
1829 add r4,r4,r3 @ h+=K256[i]
1830 eor r2,r2,r11 @ Ch(e,f,g)
1831 eor r0,r5,r5,ror#11
1832 add r4,r4,r2 @ h+=Ch(e,f,g)
1833#if 31==31
1834 and r3,r3,#0xff
1835 cmp r3,#0xf2 @ done?
1836#endif
1837#if 31<15
1838# if __ARM_ARCH__>=7
1839 ldr r2,[r1],#4 @ prefetch
1840# else
1841 ldrb r2,[r1,#3]
1842# endif
1843 eor r3,r5,r6 @ a^b, b^c in next round
1844#else
1845 ldr r2,[sp,#1*4] @ from future BODY_16_xx
1846 eor r3,r5,r6 @ a^b, b^c in next round
1847 ldr r1,[sp,#14*4] @ from future BODY_16_xx
1848#endif
1849 eor r0,r0,r5,ror#20 @ Sigma0(a)
1850 and r12,r12,r3 @ (b^c)&=(a^b)
1851 add r8,r8,r4 @ d+=h
1852 eor r12,r12,r6 @ Maj(a,b,c)
1853 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1854 @ add r4,r4,r12 @ h+=Maj(a,b,c)
Adam Langleye9ada862015-05-11 17:20:37 -07001855#if __ARM_ARCH__>=7
1856 ite eq @ Thumb2 thing, sanity check in ARM
1857#endif
Adam Langleyd9e397b2015-01-22 14:27:53 -08001858 ldreq r3,[sp,#16*4] @ pull ctx
1859 bne .Lrounds_16_xx
1860
1861 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1862 ldr r0,[r3,#0]
1863 ldr r2,[r3,#4]
1864 ldr r12,[r3,#8]
1865 add r4,r4,r0
1866 ldr r0,[r3,#12]
1867 add r5,r5,r2
1868 ldr r2,[r3,#16]
1869 add r6,r6,r12
1870 ldr r12,[r3,#20]
1871 add r7,r7,r0
1872 ldr r0,[r3,#24]
1873 add r8,r8,r2
1874 ldr r2,[r3,#28]
1875 add r9,r9,r12
1876 ldr r1,[sp,#17*4] @ pull inp
1877 ldr r12,[sp,#18*4] @ pull inp+len
1878 add r10,r10,r0
1879 add r11,r11,r2
1880 stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1881 cmp r1,r12
1882 sub r14,r14,#256 @ rewind Ktbl
1883 bne .Loop
1884
1885 add sp,sp,#19*4 @ destroy frame
1886#if __ARM_ARCH__>=5
Adam Langleye9ada862015-05-11 17:20:37 -07001887 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
Adam Langleyd9e397b2015-01-22 14:27:53 -08001888#else
Adam Langleye9ada862015-05-11 17:20:37 -07001889 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
Adam Langleyd9e397b2015-01-22 14:27:53 -08001890 tst lr,#1
1891 moveq pc,lr @ be binary compatible with V4, yet
Adam Langleye9ada862015-05-11 17:20:37 -07001892.word 0xe12fff1e @ interoperable with Thumb ISA:-)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001893#endif
1894.size sha256_block_data_order,.-sha256_block_data_order
1895#if __ARM_MAX_ARCH__>=7
1896.arch armv7-a
1897.fpu neon
1898
Adam Langleye9ada862015-05-11 17:20:37 -07001899.globl sha256_block_data_order_neon
David Benjamin4969cc92016-04-22 15:02:23 -04001900.hidden sha256_block_data_order_neon
Adam Langleyd9e397b2015-01-22 14:27:53 -08001901.type sha256_block_data_order_neon,%function
David Benjamin1b249672016-12-06 18:25:50 -05001902.align 5
1903.skip 16
Adam Langleyd9e397b2015-01-22 14:27:53 -08001904sha256_block_data_order_neon:
1905.LNEON:
Adam Langleye9ada862015-05-11 17:20:37 -07001906 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
Adam Langleyd9e397b2015-01-22 14:27:53 -08001907
Adam Langleye9ada862015-05-11 17:20:37 -07001908 sub r11,sp,#16*4+16
David Benjamin1b249672016-12-06 18:25:50 -05001909 adr r14,K256
Adam Langleye9ada862015-05-11 17:20:37 -07001910 bic r11,r11,#15 @ align for 128-bit stores
Adam Langleyd9e397b2015-01-22 14:27:53 -08001911 mov r12,sp
Adam Langleye9ada862015-05-11 17:20:37 -07001912 mov sp,r11 @ alloca
1913 add r2,r1,r2,lsl#6 @ len to point at the end of inp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001914
Adam Langleye9ada862015-05-11 17:20:37 -07001915 vld1.8 {q0},[r1]!
1916 vld1.8 {q1},[r1]!
1917 vld1.8 {q2},[r1]!
1918 vld1.8 {q3},[r1]!
1919 vld1.32 {q8},[r14,:128]!
1920 vld1.32 {q9},[r14,:128]!
1921 vld1.32 {q10},[r14,:128]!
1922 vld1.32 {q11},[r14,:128]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08001923 vrev32.8 q0,q0 @ yes, even on
Adam Langleye9ada862015-05-11 17:20:37 -07001924 str r0,[sp,#64]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001925 vrev32.8 q1,q1 @ big-endian
Adam Langleye9ada862015-05-11 17:20:37 -07001926 str r1,[sp,#68]
1927 mov r1,sp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001928 vrev32.8 q2,q2
Adam Langleye9ada862015-05-11 17:20:37 -07001929 str r2,[sp,#72]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001930 vrev32.8 q3,q3
Adam Langleye9ada862015-05-11 17:20:37 -07001931 str r12,[sp,#76] @ save original sp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001932 vadd.i32 q8,q8,q0
1933 vadd.i32 q9,q9,q1
Adam Langleye9ada862015-05-11 17:20:37 -07001934 vst1.32 {q8},[r1,:128]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08001935 vadd.i32 q10,q10,q2
Adam Langleye9ada862015-05-11 17:20:37 -07001936 vst1.32 {q9},[r1,:128]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08001937 vadd.i32 q11,q11,q3
Adam Langleye9ada862015-05-11 17:20:37 -07001938 vst1.32 {q10},[r1,:128]!
1939 vst1.32 {q11},[r1,:128]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08001940
Adam Langleye9ada862015-05-11 17:20:37 -07001941 ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1942 sub r1,r1,#64
1943 ldr r2,[sp,#0]
1944 eor r12,r12,r12
1945 eor r3,r5,r6
1946 b .L_00_48
Adam Langleyd9e397b2015-01-22 14:27:53 -08001947
1948.align 4
1949.L_00_48:
1950 vext.8 q8,q0,q1,#4
1951 add r11,r11,r2
1952 eor r2,r9,r10
1953 eor r0,r8,r8,ror#5
1954 vext.8 q9,q2,q3,#4
1955 add r4,r4,r12
1956 and r2,r2,r8
1957 eor r12,r0,r8,ror#19
1958 vshr.u32 q10,q8,#7
1959 eor r0,r4,r4,ror#11
1960 eor r2,r2,r10
1961 vadd.i32 q0,q0,q9
1962 add r11,r11,r12,ror#6
1963 eor r12,r4,r5
1964 vshr.u32 q9,q8,#3
1965 eor r0,r0,r4,ror#20
1966 add r11,r11,r2
1967 vsli.32 q10,q8,#25
1968 ldr r2,[sp,#4]
1969 and r3,r3,r12
1970 vshr.u32 q11,q8,#18
1971 add r7,r7,r11
1972 add r11,r11,r0,ror#2
1973 eor r3,r3,r5
1974 veor q9,q9,q10
1975 add r10,r10,r2
1976 vsli.32 q11,q8,#14
1977 eor r2,r8,r9
1978 eor r0,r7,r7,ror#5
1979 vshr.u32 d24,d7,#17
1980 add r11,r11,r3
1981 and r2,r2,r7
1982 veor q9,q9,q11
1983 eor r3,r0,r7,ror#19
1984 eor r0,r11,r11,ror#11
1985 vsli.32 d24,d7,#15
1986 eor r2,r2,r9
1987 add r10,r10,r3,ror#6
1988 vshr.u32 d25,d7,#10
1989 eor r3,r11,r4
1990 eor r0,r0,r11,ror#20
1991 vadd.i32 q0,q0,q9
1992 add r10,r10,r2
1993 ldr r2,[sp,#8]
1994 veor d25,d25,d24
1995 and r12,r12,r3
1996 add r6,r6,r10
1997 vshr.u32 d24,d7,#19
1998 add r10,r10,r0,ror#2
1999 eor r12,r12,r4
2000 vsli.32 d24,d7,#13
2001 add r9,r9,r2
2002 eor r2,r7,r8
2003 veor d25,d25,d24
2004 eor r0,r6,r6,ror#5
2005 add r10,r10,r12
2006 vadd.i32 d0,d0,d25
2007 and r2,r2,r6
2008 eor r12,r0,r6,ror#19
2009 vshr.u32 d24,d0,#17
2010 eor r0,r10,r10,ror#11
2011 eor r2,r2,r8
2012 vsli.32 d24,d0,#15
2013 add r9,r9,r12,ror#6
2014 eor r12,r10,r11
2015 vshr.u32 d25,d0,#10
2016 eor r0,r0,r10,ror#20
2017 add r9,r9,r2
2018 veor d25,d25,d24
2019 ldr r2,[sp,#12]
2020 and r3,r3,r12
2021 vshr.u32 d24,d0,#19
2022 add r5,r5,r9
2023 add r9,r9,r0,ror#2
2024 eor r3,r3,r11
2025 vld1.32 {q8},[r14,:128]!
2026 add r8,r8,r2
2027 vsli.32 d24,d0,#13
2028 eor r2,r6,r7
2029 eor r0,r5,r5,ror#5
2030 veor d25,d25,d24
2031 add r9,r9,r3
2032 and r2,r2,r5
2033 vadd.i32 d1,d1,d25
2034 eor r3,r0,r5,ror#19
2035 eor r0,r9,r9,ror#11
2036 vadd.i32 q8,q8,q0
2037 eor r2,r2,r7
2038 add r8,r8,r3,ror#6
2039 eor r3,r9,r10
2040 eor r0,r0,r9,ror#20
2041 add r8,r8,r2
2042 ldr r2,[sp,#16]
2043 and r12,r12,r3
2044 add r4,r4,r8
2045 vst1.32 {q8},[r1,:128]!
2046 add r8,r8,r0,ror#2
2047 eor r12,r12,r10
2048 vext.8 q8,q1,q2,#4
2049 add r7,r7,r2
2050 eor r2,r5,r6
2051 eor r0,r4,r4,ror#5
2052 vext.8 q9,q3,q0,#4
2053 add r8,r8,r12
2054 and r2,r2,r4
2055 eor r12,r0,r4,ror#19
2056 vshr.u32 q10,q8,#7
2057 eor r0,r8,r8,ror#11
2058 eor r2,r2,r6
2059 vadd.i32 q1,q1,q9
2060 add r7,r7,r12,ror#6
2061 eor r12,r8,r9
2062 vshr.u32 q9,q8,#3
2063 eor r0,r0,r8,ror#20
2064 add r7,r7,r2
2065 vsli.32 q10,q8,#25
2066 ldr r2,[sp,#20]
2067 and r3,r3,r12
2068 vshr.u32 q11,q8,#18
2069 add r11,r11,r7
2070 add r7,r7,r0,ror#2
2071 eor r3,r3,r9
2072 veor q9,q9,q10
2073 add r6,r6,r2
2074 vsli.32 q11,q8,#14
2075 eor r2,r4,r5
2076 eor r0,r11,r11,ror#5
2077 vshr.u32 d24,d1,#17
2078 add r7,r7,r3
2079 and r2,r2,r11
2080 veor q9,q9,q11
2081 eor r3,r0,r11,ror#19
2082 eor r0,r7,r7,ror#11
2083 vsli.32 d24,d1,#15
2084 eor r2,r2,r5
2085 add r6,r6,r3,ror#6
2086 vshr.u32 d25,d1,#10
2087 eor r3,r7,r8
2088 eor r0,r0,r7,ror#20
2089 vadd.i32 q1,q1,q9
2090 add r6,r6,r2
2091 ldr r2,[sp,#24]
2092 veor d25,d25,d24
2093 and r12,r12,r3
2094 add r10,r10,r6
2095 vshr.u32 d24,d1,#19
2096 add r6,r6,r0,ror#2
2097 eor r12,r12,r8
2098 vsli.32 d24,d1,#13
2099 add r5,r5,r2
2100 eor r2,r11,r4
2101 veor d25,d25,d24
2102 eor r0,r10,r10,ror#5
2103 add r6,r6,r12
2104 vadd.i32 d2,d2,d25
2105 and r2,r2,r10
2106 eor r12,r0,r10,ror#19
2107 vshr.u32 d24,d2,#17
2108 eor r0,r6,r6,ror#11
2109 eor r2,r2,r4
2110 vsli.32 d24,d2,#15
2111 add r5,r5,r12,ror#6
2112 eor r12,r6,r7
2113 vshr.u32 d25,d2,#10
2114 eor r0,r0,r6,ror#20
2115 add r5,r5,r2
2116 veor d25,d25,d24
2117 ldr r2,[sp,#28]
2118 and r3,r3,r12
2119 vshr.u32 d24,d2,#19
2120 add r9,r9,r5
2121 add r5,r5,r0,ror#2
2122 eor r3,r3,r7
2123 vld1.32 {q8},[r14,:128]!
2124 add r4,r4,r2
2125 vsli.32 d24,d2,#13
2126 eor r2,r10,r11
2127 eor r0,r9,r9,ror#5
2128 veor d25,d25,d24
2129 add r5,r5,r3
2130 and r2,r2,r9
2131 vadd.i32 d3,d3,d25
2132 eor r3,r0,r9,ror#19
2133 eor r0,r5,r5,ror#11
2134 vadd.i32 q8,q8,q1
2135 eor r2,r2,r11
2136 add r4,r4,r3,ror#6
2137 eor r3,r5,r6
2138 eor r0,r0,r5,ror#20
2139 add r4,r4,r2
2140 ldr r2,[sp,#32]
2141 and r12,r12,r3
2142 add r8,r8,r4
2143 vst1.32 {q8},[r1,:128]!
2144 add r4,r4,r0,ror#2
2145 eor r12,r12,r6
2146 vext.8 q8,q2,q3,#4
2147 add r11,r11,r2
2148 eor r2,r9,r10
2149 eor r0,r8,r8,ror#5
2150 vext.8 q9,q0,q1,#4
2151 add r4,r4,r12
2152 and r2,r2,r8
2153 eor r12,r0,r8,ror#19
2154 vshr.u32 q10,q8,#7
2155 eor r0,r4,r4,ror#11
2156 eor r2,r2,r10
2157 vadd.i32 q2,q2,q9
2158 add r11,r11,r12,ror#6
2159 eor r12,r4,r5
2160 vshr.u32 q9,q8,#3
2161 eor r0,r0,r4,ror#20
2162 add r11,r11,r2
2163 vsli.32 q10,q8,#25
2164 ldr r2,[sp,#36]
2165 and r3,r3,r12
2166 vshr.u32 q11,q8,#18
2167 add r7,r7,r11
2168 add r11,r11,r0,ror#2
2169 eor r3,r3,r5
2170 veor q9,q9,q10
2171 add r10,r10,r2
2172 vsli.32 q11,q8,#14
2173 eor r2,r8,r9
2174 eor r0,r7,r7,ror#5
2175 vshr.u32 d24,d3,#17
2176 add r11,r11,r3
2177 and r2,r2,r7
2178 veor q9,q9,q11
2179 eor r3,r0,r7,ror#19
2180 eor r0,r11,r11,ror#11
2181 vsli.32 d24,d3,#15
2182 eor r2,r2,r9
2183 add r10,r10,r3,ror#6
2184 vshr.u32 d25,d3,#10
2185 eor r3,r11,r4
2186 eor r0,r0,r11,ror#20
2187 vadd.i32 q2,q2,q9
2188 add r10,r10,r2
2189 ldr r2,[sp,#40]
2190 veor d25,d25,d24
2191 and r12,r12,r3
2192 add r6,r6,r10
2193 vshr.u32 d24,d3,#19
2194 add r10,r10,r0,ror#2
2195 eor r12,r12,r4
2196 vsli.32 d24,d3,#13
2197 add r9,r9,r2
2198 eor r2,r7,r8
2199 veor d25,d25,d24
2200 eor r0,r6,r6,ror#5
2201 add r10,r10,r12
2202 vadd.i32 d4,d4,d25
2203 and r2,r2,r6
2204 eor r12,r0,r6,ror#19
2205 vshr.u32 d24,d4,#17
2206 eor r0,r10,r10,ror#11
2207 eor r2,r2,r8
2208 vsli.32 d24,d4,#15
2209 add r9,r9,r12,ror#6
2210 eor r12,r10,r11
2211 vshr.u32 d25,d4,#10
2212 eor r0,r0,r10,ror#20
2213 add r9,r9,r2
2214 veor d25,d25,d24
2215 ldr r2,[sp,#44]
2216 and r3,r3,r12
2217 vshr.u32 d24,d4,#19
2218 add r5,r5,r9
2219 add r9,r9,r0,ror#2
2220 eor r3,r3,r11
2221 vld1.32 {q8},[r14,:128]!
2222 add r8,r8,r2
2223 vsli.32 d24,d4,#13
2224 eor r2,r6,r7
2225 eor r0,r5,r5,ror#5
2226 veor d25,d25,d24
2227 add r9,r9,r3
2228 and r2,r2,r5
2229 vadd.i32 d5,d5,d25
2230 eor r3,r0,r5,ror#19
2231 eor r0,r9,r9,ror#11
2232 vadd.i32 q8,q8,q2
2233 eor r2,r2,r7
2234 add r8,r8,r3,ror#6
2235 eor r3,r9,r10
2236 eor r0,r0,r9,ror#20
2237 add r8,r8,r2
2238 ldr r2,[sp,#48]
2239 and r12,r12,r3
2240 add r4,r4,r8
2241 vst1.32 {q8},[r1,:128]!
2242 add r8,r8,r0,ror#2
2243 eor r12,r12,r10
2244 vext.8 q8,q3,q0,#4
2245 add r7,r7,r2
2246 eor r2,r5,r6
2247 eor r0,r4,r4,ror#5
2248 vext.8 q9,q1,q2,#4
2249 add r8,r8,r12
2250 and r2,r2,r4
2251 eor r12,r0,r4,ror#19
2252 vshr.u32 q10,q8,#7
2253 eor r0,r8,r8,ror#11
2254 eor r2,r2,r6
2255 vadd.i32 q3,q3,q9
2256 add r7,r7,r12,ror#6
2257 eor r12,r8,r9
2258 vshr.u32 q9,q8,#3
2259 eor r0,r0,r8,ror#20
2260 add r7,r7,r2
2261 vsli.32 q10,q8,#25
2262 ldr r2,[sp,#52]
2263 and r3,r3,r12
2264 vshr.u32 q11,q8,#18
2265 add r11,r11,r7
2266 add r7,r7,r0,ror#2
2267 eor r3,r3,r9
2268 veor q9,q9,q10
2269 add r6,r6,r2
2270 vsli.32 q11,q8,#14
2271 eor r2,r4,r5
2272 eor r0,r11,r11,ror#5
2273 vshr.u32 d24,d5,#17
2274 add r7,r7,r3
2275 and r2,r2,r11
2276 veor q9,q9,q11
2277 eor r3,r0,r11,ror#19
2278 eor r0,r7,r7,ror#11
2279 vsli.32 d24,d5,#15
2280 eor r2,r2,r5
2281 add r6,r6,r3,ror#6
2282 vshr.u32 d25,d5,#10
2283 eor r3,r7,r8
2284 eor r0,r0,r7,ror#20
2285 vadd.i32 q3,q3,q9
2286 add r6,r6,r2
2287 ldr r2,[sp,#56]
2288 veor d25,d25,d24
2289 and r12,r12,r3
2290 add r10,r10,r6
2291 vshr.u32 d24,d5,#19
2292 add r6,r6,r0,ror#2
2293 eor r12,r12,r8
2294 vsli.32 d24,d5,#13
2295 add r5,r5,r2
2296 eor r2,r11,r4
2297 veor d25,d25,d24
2298 eor r0,r10,r10,ror#5
2299 add r6,r6,r12
2300 vadd.i32 d6,d6,d25
2301 and r2,r2,r10
2302 eor r12,r0,r10,ror#19
2303 vshr.u32 d24,d6,#17
2304 eor r0,r6,r6,ror#11
2305 eor r2,r2,r4
2306 vsli.32 d24,d6,#15
2307 add r5,r5,r12,ror#6
2308 eor r12,r6,r7
2309 vshr.u32 d25,d6,#10
2310 eor r0,r0,r6,ror#20
2311 add r5,r5,r2
2312 veor d25,d25,d24
2313 ldr r2,[sp,#60]
2314 and r3,r3,r12
2315 vshr.u32 d24,d6,#19
2316 add r9,r9,r5
2317 add r5,r5,r0,ror#2
2318 eor r3,r3,r7
2319 vld1.32 {q8},[r14,:128]!
2320 add r4,r4,r2
2321 vsli.32 d24,d6,#13
2322 eor r2,r10,r11
2323 eor r0,r9,r9,ror#5
2324 veor d25,d25,d24
2325 add r5,r5,r3
2326 and r2,r2,r9
2327 vadd.i32 d7,d7,d25
2328 eor r3,r0,r9,ror#19
2329 eor r0,r5,r5,ror#11
2330 vadd.i32 q8,q8,q3
2331 eor r2,r2,r11
2332 add r4,r4,r3,ror#6
2333 eor r3,r5,r6
2334 eor r0,r0,r5,ror#20
2335 add r4,r4,r2
2336 ldr r2,[r14]
2337 and r12,r12,r3
2338 add r8,r8,r4
2339 vst1.32 {q8},[r1,:128]!
2340 add r4,r4,r0,ror#2
2341 eor r12,r12,r6
2342 teq r2,#0 @ check for K256 terminator
2343 ldr r2,[sp,#0]
2344 sub r1,r1,#64
2345 bne .L_00_48
2346
Adam Langleye9ada862015-05-11 17:20:37 -07002347 ldr r1,[sp,#68]
2348 ldr r0,[sp,#72]
2349 sub r14,r14,#256 @ rewind r14
2350 teq r1,r0
2351 it eq
2352 subeq r1,r1,#64 @ avoid SEGV
2353 vld1.8 {q0},[r1]! @ load next input block
2354 vld1.8 {q1},[r1]!
2355 vld1.8 {q2},[r1]!
2356 vld1.8 {q3},[r1]!
2357 it ne
2358 strne r1,[sp,#68]
2359 mov r1,sp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002360 add r11,r11,r2
2361 eor r2,r9,r10
2362 eor r0,r8,r8,ror#5
2363 add r4,r4,r12
2364 vld1.32 {q8},[r14,:128]!
2365 and r2,r2,r8
2366 eor r12,r0,r8,ror#19
2367 eor r0,r4,r4,ror#11
2368 eor r2,r2,r10
2369 vrev32.8 q0,q0
2370 add r11,r11,r12,ror#6
2371 eor r12,r4,r5
2372 eor r0,r0,r4,ror#20
2373 add r11,r11,r2
2374 vadd.i32 q8,q8,q0
2375 ldr r2,[sp,#4]
2376 and r3,r3,r12
2377 add r7,r7,r11
2378 add r11,r11,r0,ror#2
2379 eor r3,r3,r5
2380 add r10,r10,r2
2381 eor r2,r8,r9
2382 eor r0,r7,r7,ror#5
2383 add r11,r11,r3
2384 and r2,r2,r7
2385 eor r3,r0,r7,ror#19
2386 eor r0,r11,r11,ror#11
2387 eor r2,r2,r9
2388 add r10,r10,r3,ror#6
2389 eor r3,r11,r4
2390 eor r0,r0,r11,ror#20
2391 add r10,r10,r2
2392 ldr r2,[sp,#8]
2393 and r12,r12,r3
2394 add r6,r6,r10
2395 add r10,r10,r0,ror#2
2396 eor r12,r12,r4
2397 add r9,r9,r2
2398 eor r2,r7,r8
2399 eor r0,r6,r6,ror#5
2400 add r10,r10,r12
2401 and r2,r2,r6
2402 eor r12,r0,r6,ror#19
2403 eor r0,r10,r10,ror#11
2404 eor r2,r2,r8
2405 add r9,r9,r12,ror#6
2406 eor r12,r10,r11
2407 eor r0,r0,r10,ror#20
2408 add r9,r9,r2
2409 ldr r2,[sp,#12]
2410 and r3,r3,r12
2411 add r5,r5,r9
2412 add r9,r9,r0,ror#2
2413 eor r3,r3,r11
2414 add r8,r8,r2
2415 eor r2,r6,r7
2416 eor r0,r5,r5,ror#5
2417 add r9,r9,r3
2418 and r2,r2,r5
2419 eor r3,r0,r5,ror#19
2420 eor r0,r9,r9,ror#11
2421 eor r2,r2,r7
2422 add r8,r8,r3,ror#6
2423 eor r3,r9,r10
2424 eor r0,r0,r9,ror#20
2425 add r8,r8,r2
2426 ldr r2,[sp,#16]
2427 and r12,r12,r3
2428 add r4,r4,r8
2429 add r8,r8,r0,ror#2
2430 eor r12,r12,r10
2431 vst1.32 {q8},[r1,:128]!
2432 add r7,r7,r2
2433 eor r2,r5,r6
2434 eor r0,r4,r4,ror#5
2435 add r8,r8,r12
2436 vld1.32 {q8},[r14,:128]!
2437 and r2,r2,r4
2438 eor r12,r0,r4,ror#19
2439 eor r0,r8,r8,ror#11
2440 eor r2,r2,r6
2441 vrev32.8 q1,q1
2442 add r7,r7,r12,ror#6
2443 eor r12,r8,r9
2444 eor r0,r0,r8,ror#20
2445 add r7,r7,r2
2446 vadd.i32 q8,q8,q1
2447 ldr r2,[sp,#20]
2448 and r3,r3,r12
2449 add r11,r11,r7
2450 add r7,r7,r0,ror#2
2451 eor r3,r3,r9
2452 add r6,r6,r2
2453 eor r2,r4,r5
2454 eor r0,r11,r11,ror#5
2455 add r7,r7,r3
2456 and r2,r2,r11
2457 eor r3,r0,r11,ror#19
2458 eor r0,r7,r7,ror#11
2459 eor r2,r2,r5
2460 add r6,r6,r3,ror#6
2461 eor r3,r7,r8
2462 eor r0,r0,r7,ror#20
2463 add r6,r6,r2
2464 ldr r2,[sp,#24]
2465 and r12,r12,r3
2466 add r10,r10,r6
2467 add r6,r6,r0,ror#2
2468 eor r12,r12,r8
2469 add r5,r5,r2
2470 eor r2,r11,r4
2471 eor r0,r10,r10,ror#5
2472 add r6,r6,r12
2473 and r2,r2,r10
2474 eor r12,r0,r10,ror#19
2475 eor r0,r6,r6,ror#11
2476 eor r2,r2,r4
2477 add r5,r5,r12,ror#6
2478 eor r12,r6,r7
2479 eor r0,r0,r6,ror#20
2480 add r5,r5,r2
2481 ldr r2,[sp,#28]
2482 and r3,r3,r12
2483 add r9,r9,r5
2484 add r5,r5,r0,ror#2
2485 eor r3,r3,r7
2486 add r4,r4,r2
2487 eor r2,r10,r11
2488 eor r0,r9,r9,ror#5
2489 add r5,r5,r3
2490 and r2,r2,r9
2491 eor r3,r0,r9,ror#19
2492 eor r0,r5,r5,ror#11
2493 eor r2,r2,r11
2494 add r4,r4,r3,ror#6
2495 eor r3,r5,r6
2496 eor r0,r0,r5,ror#20
2497 add r4,r4,r2
2498 ldr r2,[sp,#32]
2499 and r12,r12,r3
2500 add r8,r8,r4
2501 add r4,r4,r0,ror#2
2502 eor r12,r12,r6
2503 vst1.32 {q8},[r1,:128]!
2504 add r11,r11,r2
2505 eor r2,r9,r10
2506 eor r0,r8,r8,ror#5
2507 add r4,r4,r12
2508 vld1.32 {q8},[r14,:128]!
2509 and r2,r2,r8
2510 eor r12,r0,r8,ror#19
2511 eor r0,r4,r4,ror#11
2512 eor r2,r2,r10
2513 vrev32.8 q2,q2
2514 add r11,r11,r12,ror#6
2515 eor r12,r4,r5
2516 eor r0,r0,r4,ror#20
2517 add r11,r11,r2
2518 vadd.i32 q8,q8,q2
2519 ldr r2,[sp,#36]
2520 and r3,r3,r12
2521 add r7,r7,r11
2522 add r11,r11,r0,ror#2
2523 eor r3,r3,r5
2524 add r10,r10,r2
2525 eor r2,r8,r9
2526 eor r0,r7,r7,ror#5
2527 add r11,r11,r3
2528 and r2,r2,r7
2529 eor r3,r0,r7,ror#19
2530 eor r0,r11,r11,ror#11
2531 eor r2,r2,r9
2532 add r10,r10,r3,ror#6
2533 eor r3,r11,r4
2534 eor r0,r0,r11,ror#20
2535 add r10,r10,r2
2536 ldr r2,[sp,#40]
2537 and r12,r12,r3
2538 add r6,r6,r10
2539 add r10,r10,r0,ror#2
2540 eor r12,r12,r4
2541 add r9,r9,r2
2542 eor r2,r7,r8
2543 eor r0,r6,r6,ror#5
2544 add r10,r10,r12
2545 and r2,r2,r6
2546 eor r12,r0,r6,ror#19
2547 eor r0,r10,r10,ror#11
2548 eor r2,r2,r8
2549 add r9,r9,r12,ror#6
2550 eor r12,r10,r11
2551 eor r0,r0,r10,ror#20
2552 add r9,r9,r2
2553 ldr r2,[sp,#44]
2554 and r3,r3,r12
2555 add r5,r5,r9
2556 add r9,r9,r0,ror#2
2557 eor r3,r3,r11
2558 add r8,r8,r2
2559 eor r2,r6,r7
2560 eor r0,r5,r5,ror#5
2561 add r9,r9,r3
2562 and r2,r2,r5
2563 eor r3,r0,r5,ror#19
2564 eor r0,r9,r9,ror#11
2565 eor r2,r2,r7
2566 add r8,r8,r3,ror#6
2567 eor r3,r9,r10
2568 eor r0,r0,r9,ror#20
2569 add r8,r8,r2
2570 ldr r2,[sp,#48]
2571 and r12,r12,r3
2572 add r4,r4,r8
2573 add r8,r8,r0,ror#2
2574 eor r12,r12,r10
2575 vst1.32 {q8},[r1,:128]!
2576 add r7,r7,r2
2577 eor r2,r5,r6
2578 eor r0,r4,r4,ror#5
2579 add r8,r8,r12
2580 vld1.32 {q8},[r14,:128]!
2581 and r2,r2,r4
2582 eor r12,r0,r4,ror#19
2583 eor r0,r8,r8,ror#11
2584 eor r2,r2,r6
2585 vrev32.8 q3,q3
2586 add r7,r7,r12,ror#6
2587 eor r12,r8,r9
2588 eor r0,r0,r8,ror#20
2589 add r7,r7,r2
2590 vadd.i32 q8,q8,q3
2591 ldr r2,[sp,#52]
2592 and r3,r3,r12
2593 add r11,r11,r7
2594 add r7,r7,r0,ror#2
2595 eor r3,r3,r9
2596 add r6,r6,r2
2597 eor r2,r4,r5
2598 eor r0,r11,r11,ror#5
2599 add r7,r7,r3
2600 and r2,r2,r11
2601 eor r3,r0,r11,ror#19
2602 eor r0,r7,r7,ror#11
2603 eor r2,r2,r5
2604 add r6,r6,r3,ror#6
2605 eor r3,r7,r8
2606 eor r0,r0,r7,ror#20
2607 add r6,r6,r2
2608 ldr r2,[sp,#56]
2609 and r12,r12,r3
2610 add r10,r10,r6
2611 add r6,r6,r0,ror#2
2612 eor r12,r12,r8
2613 add r5,r5,r2
2614 eor r2,r11,r4
2615 eor r0,r10,r10,ror#5
2616 add r6,r6,r12
2617 and r2,r2,r10
2618 eor r12,r0,r10,ror#19
2619 eor r0,r6,r6,ror#11
2620 eor r2,r2,r4
2621 add r5,r5,r12,ror#6
2622 eor r12,r6,r7
2623 eor r0,r0,r6,ror#20
2624 add r5,r5,r2
2625 ldr r2,[sp,#60]
2626 and r3,r3,r12
2627 add r9,r9,r5
2628 add r5,r5,r0,ror#2
2629 eor r3,r3,r7
2630 add r4,r4,r2
2631 eor r2,r10,r11
2632 eor r0,r9,r9,ror#5
2633 add r5,r5,r3
2634 and r2,r2,r9
2635 eor r3,r0,r9,ror#19
2636 eor r0,r5,r5,ror#11
2637 eor r2,r2,r11
2638 add r4,r4,r3,ror#6
2639 eor r3,r5,r6
2640 eor r0,r0,r5,ror#20
2641 add r4,r4,r2
2642 ldr r2,[sp,#64]
2643 and r12,r12,r3
2644 add r8,r8,r4
2645 add r4,r4,r0,ror#2
2646 eor r12,r12,r6
2647 vst1.32 {q8},[r1,:128]!
2648 ldr r0,[r2,#0]
2649 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
2650 ldr r12,[r2,#4]
2651 ldr r3,[r2,#8]
2652 ldr r1,[r2,#12]
2653 add r4,r4,r0 @ accumulate
2654 ldr r0,[r2,#16]
2655 add r5,r5,r12
2656 ldr r12,[r2,#20]
2657 add r6,r6,r3
2658 ldr r3,[r2,#24]
2659 add r7,r7,r1
2660 ldr r1,[r2,#28]
2661 add r8,r8,r0
2662 str r4,[r2],#4
2663 add r9,r9,r12
2664 str r5,[r2],#4
2665 add r10,r10,r3
2666 str r6,[r2],#4
2667 add r11,r11,r1
2668 str r7,[r2],#4
Adam Langleye9ada862015-05-11 17:20:37 -07002669 stmia r2,{r8,r9,r10,r11}
Adam Langleyd9e397b2015-01-22 14:27:53 -08002670
Adam Langleye9ada862015-05-11 17:20:37 -07002671 ittte ne
Adam Langleyd9e397b2015-01-22 14:27:53 -08002672 movne r1,sp
2673 ldrne r2,[sp,#0]
2674 eorne r12,r12,r12
2675 ldreq sp,[sp,#76] @ restore original sp
Adam Langleye9ada862015-05-11 17:20:37 -07002676 itt ne
Adam Langleyd9e397b2015-01-22 14:27:53 -08002677 eorne r3,r5,r6
2678 bne .L_00_48
2679
Adam Langleye9ada862015-05-11 17:20:37 -07002680 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
Adam Langleyd9e397b2015-01-22 14:27:53 -08002681.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
2682#endif
Adam Langleye9ada862015-05-11 17:20:37 -07002683#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2684
David Benjamin1b249672016-12-06 18:25:50 -05002685# if defined(__thumb2__)
Adam Langleye9ada862015-05-11 17:20:37 -07002686# define INST(a,b,c,d) .byte c,d|0xc,a,b
2687# else
2688# define INST(a,b,c,d) .byte a,b,c,d
2689# endif
2690
Adam Langleyd9e397b2015-01-22 14:27:53 -08002691.type sha256_block_data_order_armv8,%function
2692.align 5
2693sha256_block_data_order_armv8:
2694.LARMv8:
2695 vld1.32 {q0,q1},[r0]
Adam Langleye9ada862015-05-11 17:20:37 -07002696 sub r3,r3,#256+32
Adam Langleye9ada862015-05-11 17:20:37 -07002697 add r2,r1,r2,lsl#6 @ len to point at the end of inp
David Benjamin1b249672016-12-06 18:25:50 -05002698 b .Loop_v8
Adam Langleyd9e397b2015-01-22 14:27:53 -08002699
David Benjamin1b249672016-12-06 18:25:50 -05002700.align 4
Adam Langleyd9e397b2015-01-22 14:27:53 -08002701.Loop_v8:
Adam Langleye9ada862015-05-11 17:20:37 -07002702 vld1.8 {q8,q9},[r1]!
2703 vld1.8 {q10,q11},[r1]!
2704 vld1.32 {q12},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002705 vrev32.8 q8,q8
2706 vrev32.8 q9,q9
2707 vrev32.8 q10,q10
2708 vrev32.8 q11,q11
Adam Langleye9ada862015-05-11 17:20:37 -07002709 vmov q14,q0 @ offload
2710 vmov q15,q1
2711 teq r1,r2
2712 vld1.32 {q13},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002713 vadd.i32 q12,q12,q8
Adam Langleye9ada862015-05-11 17:20:37 -07002714 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2715 vmov q2,q0
2716 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2717 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2718 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2719 vld1.32 {q12},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002720 vadd.i32 q13,q13,q9
Adam Langleye9ada862015-05-11 17:20:37 -07002721 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2722 vmov q2,q0
2723 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2724 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2725 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2726 vld1.32 {q13},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002727 vadd.i32 q12,q12,q10
Adam Langleye9ada862015-05-11 17:20:37 -07002728 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2729 vmov q2,q0
2730 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2731 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2732 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2733 vld1.32 {q12},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002734 vadd.i32 q13,q13,q11
Adam Langleye9ada862015-05-11 17:20:37 -07002735 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2736 vmov q2,q0
2737 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2738 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2739 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2740 vld1.32 {q13},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002741 vadd.i32 q12,q12,q8
Adam Langleye9ada862015-05-11 17:20:37 -07002742 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2743 vmov q2,q0
2744 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2745 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2746 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2747 vld1.32 {q12},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002748 vadd.i32 q13,q13,q9
Adam Langleye9ada862015-05-11 17:20:37 -07002749 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2750 vmov q2,q0
2751 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2752 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2753 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2754 vld1.32 {q13},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002755 vadd.i32 q12,q12,q10
Adam Langleye9ada862015-05-11 17:20:37 -07002756 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2757 vmov q2,q0
2758 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2759 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2760 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2761 vld1.32 {q12},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002762 vadd.i32 q13,q13,q11
Adam Langleye9ada862015-05-11 17:20:37 -07002763 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2764 vmov q2,q0
2765 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2766 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2767 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2768 vld1.32 {q13},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002769 vadd.i32 q12,q12,q8
Adam Langleye9ada862015-05-11 17:20:37 -07002770 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2771 vmov q2,q0
2772 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2773 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2774 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2775 vld1.32 {q12},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002776 vadd.i32 q13,q13,q9
Adam Langleye9ada862015-05-11 17:20:37 -07002777 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2778 vmov q2,q0
2779 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2780 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2781 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2782 vld1.32 {q13},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002783 vadd.i32 q12,q12,q10
Adam Langleye9ada862015-05-11 17:20:37 -07002784 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2785 vmov q2,q0
2786 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2787 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2788 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2789 vld1.32 {q12},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002790 vadd.i32 q13,q13,q11
Adam Langleye9ada862015-05-11 17:20:37 -07002791 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2792 vmov q2,q0
2793 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2794 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2795 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2796 vld1.32 {q13},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002797 vadd.i32 q12,q12,q8
Adam Langleye9ada862015-05-11 17:20:37 -07002798 vmov q2,q0
2799 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2800 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08002801
Adam Langleye9ada862015-05-11 17:20:37 -07002802 vld1.32 {q12},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002803 vadd.i32 q13,q13,q9
Adam Langleye9ada862015-05-11 17:20:37 -07002804 vmov q2,q0
2805 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2806 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
Adam Langleyd9e397b2015-01-22 14:27:53 -08002807
Adam Langleye9ada862015-05-11 17:20:37 -07002808 vld1.32 {q13},[r3]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002809 vadd.i32 q12,q12,q10
Adam Langleye9ada862015-05-11 17:20:37 -07002810 sub r3,r3,#256-16 @ rewind
2811 vmov q2,q0
2812 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2813 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08002814
2815 vadd.i32 q13,q13,q11
Adam Langleye9ada862015-05-11 17:20:37 -07002816 vmov q2,q0
2817 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2818 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
Adam Langleyd9e397b2015-01-22 14:27:53 -08002819
2820 vadd.i32 q0,q0,q14
2821 vadd.i32 q1,q1,q15
Adam Langleye9ada862015-05-11 17:20:37 -07002822 it ne
2823 bne .Loop_v8
Adam Langleyd9e397b2015-01-22 14:27:53 -08002824
Adam Langleye9ada862015-05-11 17:20:37 -07002825 vst1.32 {q0,q1},[r0]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002826
2827 bx lr @ bx lr
2828.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2829#endif
Adam Langleye9ada862015-05-11 17:20:37 -07002830.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
Adam Langleyd9e397b2015-01-22 14:27:53 -08002831.align 2
Adam Langleye9ada862015-05-11 17:20:37 -07002832.align 2
2833#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2834.comm OPENSSL_armcap_P,4,4
2835.hidden OPENSSL_armcap_P
Adam Langleyd9e397b2015-01-22 14:27:53 -08002836#endif
David Benjamin4969cc92016-04-22 15:02:23 -04002837#endif
Robert Sloan726e9d12018-09-11 11:45:04 -07002838#endif // !OPENSSL_NO_ASM