blob: 4f5739a71647a7cb7531219962886319957b0e4a [file] [log] [blame]
Robert Sloanc9abfe42018-11-26 12:19:07 -08001// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
Robert Sloan726e9d12018-09-11 11:45:04 -07004#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if !defined(OPENSSL_NO_ASM)
Kenny Rootb8494592015-09-25 02:29:14 +000011#if defined(__arm__)
Robert Sloan726e9d12018-09-11 11:45:04 -070012#if defined(BORINGSSL_PREFIX)
13#include <boringssl_prefix_symbols_asm.h>
14#endif
David Benjamin1b249672016-12-06 18:25:50 -050015@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
16@
17@ Licensed under the OpenSSL license (the "License"). You may not use
18@ this file except in compliance with the License. You can obtain a copy
19@ in the file LICENSE in the source distribution or at
20@ https://www.openssl.org/source/license.html
21
Adam Langleye9ada862015-05-11 17:20:37 -070022
23@ ====================================================================
24@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
25@ project. The module is, however, dual licensed under OpenSSL and
26@ CRYPTOGAMS licenses depending on where you obtain it. For further
27@ details see http://www.openssl.org/~appro/cryptogams/.
28@
29@ Permission to use under GPL terms is granted.
30@ ====================================================================
31
32@ SHA256 block procedure for ARMv4. May 2007.
33
34@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
35@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
36@ byte [on single-issue Xscale PXA250 core].
37
38@ July 2010.
39@
40@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
41@ Cortex A8 core and ~20 cycles per processed byte.
42
43@ February 2011.
44@
45@ Profiler-assisted and platform-specific optimization resulted in 16%
46@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
47
48@ September 2013.
49@
50@ Add NEON implementation. On Cortex A8 it was measured to process one
51@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
52@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
53@ code (meaning that latter performs sub-optimally, nothing was done
54@ about it).
55
56@ May 2014.
57@
58@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
59
60#ifndef __KERNEL__
Kenny Rootb8494592015-09-25 02:29:14 +000061# include <openssl/arm_arch.h>
Adam Langleye9ada862015-05-11 17:20:37 -070062#else
63# define __ARM_ARCH__ __LINUX_ARM_ARCH__
64# define __ARM_MAX_ARCH__ 7
65#endif
Adam Langleyd9e397b2015-01-22 14:27:53 -080066
Robert Sloan55818102017-12-18 11:26:17 -080067@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
68@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
69@ instructions are manually-encoded. (See unsha256.)
70.arch armv7-a
71
Adam Langleyd9e397b2015-01-22 14:27:53 -080072.text
David Benjamin1b249672016-12-06 18:25:50 -050073#if defined(__thumb2__)
Adam Langleye9ada862015-05-11 17:20:37 -070074.syntax unified
Adam Langleye9ada862015-05-11 17:20:37 -070075.thumb
David Benjamin1b249672016-12-06 18:25:50 -050076#else
Adam Langleye9ada862015-05-11 17:20:37 -070077.code 32
Adam Langleye9ada862015-05-11 17:20:37 -070078#endif
Adam Langleyd9e397b2015-01-22 14:27:53 -080079
80.type K256,%object
81.align 5
82K256:
83.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
84.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
85.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
86.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
87.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
88.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
89.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
90.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
91.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
92.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
93.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
94.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
95.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
96.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
97.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
98.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
99.size K256,.-K256
100.word 0 @ terminator
Adam Langleye9ada862015-05-11 17:20:37 -0700101#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800102.LOPENSSL_armcap:
Adam Langleye9ada862015-05-11 17:20:37 -0700103.word OPENSSL_armcap_P-.Lsha256_block_data_order
Adam Langleyd9e397b2015-01-22 14:27:53 -0800104#endif
105.align 5
106
Adam Langleye9ada862015-05-11 17:20:37 -0700107.globl sha256_block_data_order
David Benjamin4969cc92016-04-22 15:02:23 -0400108.hidden sha256_block_data_order
Adam Langleyd9e397b2015-01-22 14:27:53 -0800109.type sha256_block_data_order,%function
110sha256_block_data_order:
Adam Langleye9ada862015-05-11 17:20:37 -0700111.Lsha256_block_data_order:
David Benjamin1b249672016-12-06 18:25:50 -0500112#if __ARM_ARCH__<7 && !defined(__thumb2__)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800113 sub r3,pc,#8 @ sha256_block_data_order
Adam Langleye9ada862015-05-11 17:20:37 -0700114#else
David Benjamin1b249672016-12-06 18:25:50 -0500115 adr r3,.Lsha256_block_data_order
Adam Langleye9ada862015-05-11 17:20:37 -0700116#endif
117#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800118 ldr r12,.LOPENSSL_armcap
119 ldr r12,[r3,r12] @ OPENSSL_armcap_P
Adam Langleye9ada862015-05-11 17:20:37 -0700120#ifdef __APPLE__
121 ldr r12,[r12]
122#endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800123 tst r12,#ARMV8_SHA256
124 bne .LARMv8
125 tst r12,#ARMV7_NEON
126 bne .LNEON
127#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700128 add r2,r1,r2,lsl#6 @ len to point at the end of inp
Adam Langleyd9e397b2015-01-22 14:27:53 -0800129 stmdb sp!,{r0,r1,r2,r4-r11,lr}
130 ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
131 sub r14,r3,#256+32 @ K256
132 sub sp,sp,#16*4 @ alloca(X[16])
133.Loop:
134# if __ARM_ARCH__>=7
135 ldr r2,[r1],#4
136# else
137 ldrb r2,[r1,#3]
138# endif
139 eor r3,r5,r6 @ magic
140 eor r12,r12,r12
141#if __ARM_ARCH__>=7
142 @ ldr r2,[r1],#4 @ 0
143# if 0==15
144 str r1,[sp,#17*4] @ make room for r1
145# endif
146 eor r0,r8,r8,ror#5
147 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
148 eor r0,r0,r8,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700149# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800150 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700151# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800152#else
153 @ ldrb r2,[r1,#3] @ 0
154 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
155 ldrb r12,[r1,#2]
156 ldrb r0,[r1,#1]
157 orr r2,r2,r12,lsl#8
158 ldrb r12,[r1],#4
159 orr r2,r2,r0,lsl#16
160# if 0==15
161 str r1,[sp,#17*4] @ make room for r1
162# endif
163 eor r0,r8,r8,ror#5
164 orr r2,r2,r12,lsl#24
165 eor r0,r0,r8,ror#19 @ Sigma1(e)
166#endif
167 ldr r12,[r14],#4 @ *K256++
168 add r11,r11,r2 @ h+=X[i]
169 str r2,[sp,#0*4]
170 eor r2,r9,r10
171 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
172 and r2,r2,r8
173 add r11,r11,r12 @ h+=K256[i]
174 eor r2,r2,r10 @ Ch(e,f,g)
175 eor r0,r4,r4,ror#11
176 add r11,r11,r2 @ h+=Ch(e,f,g)
177#if 0==31
178 and r12,r12,#0xff
179 cmp r12,#0xf2 @ done?
180#endif
181#if 0<15
182# if __ARM_ARCH__>=7
183 ldr r2,[r1],#4 @ prefetch
184# else
185 ldrb r2,[r1,#3]
186# endif
187 eor r12,r4,r5 @ a^b, b^c in next round
188#else
189 ldr r2,[sp,#2*4] @ from future BODY_16_xx
190 eor r12,r4,r5 @ a^b, b^c in next round
191 ldr r1,[sp,#15*4] @ from future BODY_16_xx
192#endif
193 eor r0,r0,r4,ror#20 @ Sigma0(a)
194 and r3,r3,r12 @ (b^c)&=(a^b)
195 add r7,r7,r11 @ d+=h
196 eor r3,r3,r5 @ Maj(a,b,c)
197 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
198 @ add r11,r11,r3 @ h+=Maj(a,b,c)
199#if __ARM_ARCH__>=7
200 @ ldr r2,[r1],#4 @ 1
201# if 1==15
202 str r1,[sp,#17*4] @ make room for r1
203# endif
204 eor r0,r7,r7,ror#5
205 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
206 eor r0,r0,r7,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700207# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800208 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700209# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800210#else
211 @ ldrb r2,[r1,#3] @ 1
212 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
213 ldrb r3,[r1,#2]
214 ldrb r0,[r1,#1]
215 orr r2,r2,r3,lsl#8
216 ldrb r3,[r1],#4
217 orr r2,r2,r0,lsl#16
218# if 1==15
219 str r1,[sp,#17*4] @ make room for r1
220# endif
221 eor r0,r7,r7,ror#5
222 orr r2,r2,r3,lsl#24
223 eor r0,r0,r7,ror#19 @ Sigma1(e)
224#endif
225 ldr r3,[r14],#4 @ *K256++
226 add r10,r10,r2 @ h+=X[i]
227 str r2,[sp,#1*4]
228 eor r2,r8,r9
229 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
230 and r2,r2,r7
231 add r10,r10,r3 @ h+=K256[i]
232 eor r2,r2,r9 @ Ch(e,f,g)
233 eor r0,r11,r11,ror#11
234 add r10,r10,r2 @ h+=Ch(e,f,g)
235#if 1==31
236 and r3,r3,#0xff
237 cmp r3,#0xf2 @ done?
238#endif
239#if 1<15
240# if __ARM_ARCH__>=7
241 ldr r2,[r1],#4 @ prefetch
242# else
243 ldrb r2,[r1,#3]
244# endif
245 eor r3,r11,r4 @ a^b, b^c in next round
246#else
247 ldr r2,[sp,#3*4] @ from future BODY_16_xx
248 eor r3,r11,r4 @ a^b, b^c in next round
249 ldr r1,[sp,#0*4] @ from future BODY_16_xx
250#endif
251 eor r0,r0,r11,ror#20 @ Sigma0(a)
252 and r12,r12,r3 @ (b^c)&=(a^b)
253 add r6,r6,r10 @ d+=h
254 eor r12,r12,r4 @ Maj(a,b,c)
255 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
256 @ add r10,r10,r12 @ h+=Maj(a,b,c)
257#if __ARM_ARCH__>=7
258 @ ldr r2,[r1],#4 @ 2
259# if 2==15
260 str r1,[sp,#17*4] @ make room for r1
261# endif
262 eor r0,r6,r6,ror#5
263 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
264 eor r0,r0,r6,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700265# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800266 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700267# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800268#else
269 @ ldrb r2,[r1,#3] @ 2
270 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
271 ldrb r12,[r1,#2]
272 ldrb r0,[r1,#1]
273 orr r2,r2,r12,lsl#8
274 ldrb r12,[r1],#4
275 orr r2,r2,r0,lsl#16
276# if 2==15
277 str r1,[sp,#17*4] @ make room for r1
278# endif
279 eor r0,r6,r6,ror#5
280 orr r2,r2,r12,lsl#24
281 eor r0,r0,r6,ror#19 @ Sigma1(e)
282#endif
283 ldr r12,[r14],#4 @ *K256++
284 add r9,r9,r2 @ h+=X[i]
285 str r2,[sp,#2*4]
286 eor r2,r7,r8
287 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
288 and r2,r2,r6
289 add r9,r9,r12 @ h+=K256[i]
290 eor r2,r2,r8 @ Ch(e,f,g)
291 eor r0,r10,r10,ror#11
292 add r9,r9,r2 @ h+=Ch(e,f,g)
293#if 2==31
294 and r12,r12,#0xff
295 cmp r12,#0xf2 @ done?
296#endif
297#if 2<15
298# if __ARM_ARCH__>=7
299 ldr r2,[r1],#4 @ prefetch
300# else
301 ldrb r2,[r1,#3]
302# endif
303 eor r12,r10,r11 @ a^b, b^c in next round
304#else
305 ldr r2,[sp,#4*4] @ from future BODY_16_xx
306 eor r12,r10,r11 @ a^b, b^c in next round
307 ldr r1,[sp,#1*4] @ from future BODY_16_xx
308#endif
309 eor r0,r0,r10,ror#20 @ Sigma0(a)
310 and r3,r3,r12 @ (b^c)&=(a^b)
311 add r5,r5,r9 @ d+=h
312 eor r3,r3,r11 @ Maj(a,b,c)
313 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
314 @ add r9,r9,r3 @ h+=Maj(a,b,c)
315#if __ARM_ARCH__>=7
316 @ ldr r2,[r1],#4 @ 3
317# if 3==15
318 str r1,[sp,#17*4] @ make room for r1
319# endif
320 eor r0,r5,r5,ror#5
321 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
322 eor r0,r0,r5,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700323# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800324 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700325# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800326#else
327 @ ldrb r2,[r1,#3] @ 3
328 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
329 ldrb r3,[r1,#2]
330 ldrb r0,[r1,#1]
331 orr r2,r2,r3,lsl#8
332 ldrb r3,[r1],#4
333 orr r2,r2,r0,lsl#16
334# if 3==15
335 str r1,[sp,#17*4] @ make room for r1
336# endif
337 eor r0,r5,r5,ror#5
338 orr r2,r2,r3,lsl#24
339 eor r0,r0,r5,ror#19 @ Sigma1(e)
340#endif
341 ldr r3,[r14],#4 @ *K256++
342 add r8,r8,r2 @ h+=X[i]
343 str r2,[sp,#3*4]
344 eor r2,r6,r7
345 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
346 and r2,r2,r5
347 add r8,r8,r3 @ h+=K256[i]
348 eor r2,r2,r7 @ Ch(e,f,g)
349 eor r0,r9,r9,ror#11
350 add r8,r8,r2 @ h+=Ch(e,f,g)
351#if 3==31
352 and r3,r3,#0xff
353 cmp r3,#0xf2 @ done?
354#endif
355#if 3<15
356# if __ARM_ARCH__>=7
357 ldr r2,[r1],#4 @ prefetch
358# else
359 ldrb r2,[r1,#3]
360# endif
361 eor r3,r9,r10 @ a^b, b^c in next round
362#else
363 ldr r2,[sp,#5*4] @ from future BODY_16_xx
364 eor r3,r9,r10 @ a^b, b^c in next round
365 ldr r1,[sp,#2*4] @ from future BODY_16_xx
366#endif
367 eor r0,r0,r9,ror#20 @ Sigma0(a)
368 and r12,r12,r3 @ (b^c)&=(a^b)
369 add r4,r4,r8 @ d+=h
370 eor r12,r12,r10 @ Maj(a,b,c)
371 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
372 @ add r8,r8,r12 @ h+=Maj(a,b,c)
373#if __ARM_ARCH__>=7
374 @ ldr r2,[r1],#4 @ 4
375# if 4==15
376 str r1,[sp,#17*4] @ make room for r1
377# endif
378 eor r0,r4,r4,ror#5
379 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
380 eor r0,r0,r4,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700381# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800382 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700383# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800384#else
385 @ ldrb r2,[r1,#3] @ 4
386 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
387 ldrb r12,[r1,#2]
388 ldrb r0,[r1,#1]
389 orr r2,r2,r12,lsl#8
390 ldrb r12,[r1],#4
391 orr r2,r2,r0,lsl#16
392# if 4==15
393 str r1,[sp,#17*4] @ make room for r1
394# endif
395 eor r0,r4,r4,ror#5
396 orr r2,r2,r12,lsl#24
397 eor r0,r0,r4,ror#19 @ Sigma1(e)
398#endif
399 ldr r12,[r14],#4 @ *K256++
400 add r7,r7,r2 @ h+=X[i]
401 str r2,[sp,#4*4]
402 eor r2,r5,r6
403 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
404 and r2,r2,r4
405 add r7,r7,r12 @ h+=K256[i]
406 eor r2,r2,r6 @ Ch(e,f,g)
407 eor r0,r8,r8,ror#11
408 add r7,r7,r2 @ h+=Ch(e,f,g)
409#if 4==31
410 and r12,r12,#0xff
411 cmp r12,#0xf2 @ done?
412#endif
413#if 4<15
414# if __ARM_ARCH__>=7
415 ldr r2,[r1],#4 @ prefetch
416# else
417 ldrb r2,[r1,#3]
418# endif
419 eor r12,r8,r9 @ a^b, b^c in next round
420#else
421 ldr r2,[sp,#6*4] @ from future BODY_16_xx
422 eor r12,r8,r9 @ a^b, b^c in next round
423 ldr r1,[sp,#3*4] @ from future BODY_16_xx
424#endif
425 eor r0,r0,r8,ror#20 @ Sigma0(a)
426 and r3,r3,r12 @ (b^c)&=(a^b)
427 add r11,r11,r7 @ d+=h
428 eor r3,r3,r9 @ Maj(a,b,c)
429 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
430 @ add r7,r7,r3 @ h+=Maj(a,b,c)
431#if __ARM_ARCH__>=7
432 @ ldr r2,[r1],#4 @ 5
433# if 5==15
434 str r1,[sp,#17*4] @ make room for r1
435# endif
436 eor r0,r11,r11,ror#5
437 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
438 eor r0,r0,r11,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700439# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800440 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700441# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800442#else
443 @ ldrb r2,[r1,#3] @ 5
444 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
445 ldrb r3,[r1,#2]
446 ldrb r0,[r1,#1]
447 orr r2,r2,r3,lsl#8
448 ldrb r3,[r1],#4
449 orr r2,r2,r0,lsl#16
450# if 5==15
451 str r1,[sp,#17*4] @ make room for r1
452# endif
453 eor r0,r11,r11,ror#5
454 orr r2,r2,r3,lsl#24
455 eor r0,r0,r11,ror#19 @ Sigma1(e)
456#endif
457 ldr r3,[r14],#4 @ *K256++
458 add r6,r6,r2 @ h+=X[i]
459 str r2,[sp,#5*4]
460 eor r2,r4,r5
461 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
462 and r2,r2,r11
463 add r6,r6,r3 @ h+=K256[i]
464 eor r2,r2,r5 @ Ch(e,f,g)
465 eor r0,r7,r7,ror#11
466 add r6,r6,r2 @ h+=Ch(e,f,g)
467#if 5==31
468 and r3,r3,#0xff
469 cmp r3,#0xf2 @ done?
470#endif
471#if 5<15
472# if __ARM_ARCH__>=7
473 ldr r2,[r1],#4 @ prefetch
474# else
475 ldrb r2,[r1,#3]
476# endif
477 eor r3,r7,r8 @ a^b, b^c in next round
478#else
479 ldr r2,[sp,#7*4] @ from future BODY_16_xx
480 eor r3,r7,r8 @ a^b, b^c in next round
481 ldr r1,[sp,#4*4] @ from future BODY_16_xx
482#endif
483 eor r0,r0,r7,ror#20 @ Sigma0(a)
484 and r12,r12,r3 @ (b^c)&=(a^b)
485 add r10,r10,r6 @ d+=h
486 eor r12,r12,r8 @ Maj(a,b,c)
487 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
488 @ add r6,r6,r12 @ h+=Maj(a,b,c)
489#if __ARM_ARCH__>=7
490 @ ldr r2,[r1],#4 @ 6
491# if 6==15
492 str r1,[sp,#17*4] @ make room for r1
493# endif
494 eor r0,r10,r10,ror#5
495 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
496 eor r0,r0,r10,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700497# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800498 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700499# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800500#else
501 @ ldrb r2,[r1,#3] @ 6
502 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
503 ldrb r12,[r1,#2]
504 ldrb r0,[r1,#1]
505 orr r2,r2,r12,lsl#8
506 ldrb r12,[r1],#4
507 orr r2,r2,r0,lsl#16
508# if 6==15
509 str r1,[sp,#17*4] @ make room for r1
510# endif
511 eor r0,r10,r10,ror#5
512 orr r2,r2,r12,lsl#24
513 eor r0,r0,r10,ror#19 @ Sigma1(e)
514#endif
515 ldr r12,[r14],#4 @ *K256++
516 add r5,r5,r2 @ h+=X[i]
517 str r2,[sp,#6*4]
518 eor r2,r11,r4
519 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
520 and r2,r2,r10
521 add r5,r5,r12 @ h+=K256[i]
522 eor r2,r2,r4 @ Ch(e,f,g)
523 eor r0,r6,r6,ror#11
524 add r5,r5,r2 @ h+=Ch(e,f,g)
525#if 6==31
526 and r12,r12,#0xff
527 cmp r12,#0xf2 @ done?
528#endif
529#if 6<15
530# if __ARM_ARCH__>=7
531 ldr r2,[r1],#4 @ prefetch
532# else
533 ldrb r2,[r1,#3]
534# endif
535 eor r12,r6,r7 @ a^b, b^c in next round
536#else
537 ldr r2,[sp,#8*4] @ from future BODY_16_xx
538 eor r12,r6,r7 @ a^b, b^c in next round
539 ldr r1,[sp,#5*4] @ from future BODY_16_xx
540#endif
541 eor r0,r0,r6,ror#20 @ Sigma0(a)
542 and r3,r3,r12 @ (b^c)&=(a^b)
543 add r9,r9,r5 @ d+=h
544 eor r3,r3,r7 @ Maj(a,b,c)
545 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
546 @ add r5,r5,r3 @ h+=Maj(a,b,c)
547#if __ARM_ARCH__>=7
548 @ ldr r2,[r1],#4 @ 7
549# if 7==15
550 str r1,[sp,#17*4] @ make room for r1
551# endif
552 eor r0,r9,r9,ror#5
553 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
554 eor r0,r0,r9,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700555# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800556 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700557# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800558#else
559 @ ldrb r2,[r1,#3] @ 7
560 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
561 ldrb r3,[r1,#2]
562 ldrb r0,[r1,#1]
563 orr r2,r2,r3,lsl#8
564 ldrb r3,[r1],#4
565 orr r2,r2,r0,lsl#16
566# if 7==15
567 str r1,[sp,#17*4] @ make room for r1
568# endif
569 eor r0,r9,r9,ror#5
570 orr r2,r2,r3,lsl#24
571 eor r0,r0,r9,ror#19 @ Sigma1(e)
572#endif
573 ldr r3,[r14],#4 @ *K256++
574 add r4,r4,r2 @ h+=X[i]
575 str r2,[sp,#7*4]
576 eor r2,r10,r11
577 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
578 and r2,r2,r9
579 add r4,r4,r3 @ h+=K256[i]
580 eor r2,r2,r11 @ Ch(e,f,g)
581 eor r0,r5,r5,ror#11
582 add r4,r4,r2 @ h+=Ch(e,f,g)
583#if 7==31
584 and r3,r3,#0xff
585 cmp r3,#0xf2 @ done?
586#endif
587#if 7<15
588# if __ARM_ARCH__>=7
589 ldr r2,[r1],#4 @ prefetch
590# else
591 ldrb r2,[r1,#3]
592# endif
593 eor r3,r5,r6 @ a^b, b^c in next round
594#else
595 ldr r2,[sp,#9*4] @ from future BODY_16_xx
596 eor r3,r5,r6 @ a^b, b^c in next round
597 ldr r1,[sp,#6*4] @ from future BODY_16_xx
598#endif
599 eor r0,r0,r5,ror#20 @ Sigma0(a)
600 and r12,r12,r3 @ (b^c)&=(a^b)
601 add r8,r8,r4 @ d+=h
602 eor r12,r12,r6 @ Maj(a,b,c)
603 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
604 @ add r4,r4,r12 @ h+=Maj(a,b,c)
605#if __ARM_ARCH__>=7
606 @ ldr r2,[r1],#4 @ 8
607# if 8==15
608 str r1,[sp,#17*4] @ make room for r1
609# endif
610 eor r0,r8,r8,ror#5
611 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
612 eor r0,r0,r8,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700613# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800614 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700615# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800616#else
617 @ ldrb r2,[r1,#3] @ 8
618 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
619 ldrb r12,[r1,#2]
620 ldrb r0,[r1,#1]
621 orr r2,r2,r12,lsl#8
622 ldrb r12,[r1],#4
623 orr r2,r2,r0,lsl#16
624# if 8==15
625 str r1,[sp,#17*4] @ make room for r1
626# endif
627 eor r0,r8,r8,ror#5
628 orr r2,r2,r12,lsl#24
629 eor r0,r0,r8,ror#19 @ Sigma1(e)
630#endif
631 ldr r12,[r14],#4 @ *K256++
632 add r11,r11,r2 @ h+=X[i]
633 str r2,[sp,#8*4]
634 eor r2,r9,r10
635 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
636 and r2,r2,r8
637 add r11,r11,r12 @ h+=K256[i]
638 eor r2,r2,r10 @ Ch(e,f,g)
639 eor r0,r4,r4,ror#11
640 add r11,r11,r2 @ h+=Ch(e,f,g)
641#if 8==31
642 and r12,r12,#0xff
643 cmp r12,#0xf2 @ done?
644#endif
645#if 8<15
646# if __ARM_ARCH__>=7
647 ldr r2,[r1],#4 @ prefetch
648# else
649 ldrb r2,[r1,#3]
650# endif
651 eor r12,r4,r5 @ a^b, b^c in next round
652#else
653 ldr r2,[sp,#10*4] @ from future BODY_16_xx
654 eor r12,r4,r5 @ a^b, b^c in next round
655 ldr r1,[sp,#7*4] @ from future BODY_16_xx
656#endif
657 eor r0,r0,r4,ror#20 @ Sigma0(a)
658 and r3,r3,r12 @ (b^c)&=(a^b)
659 add r7,r7,r11 @ d+=h
660 eor r3,r3,r5 @ Maj(a,b,c)
661 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
662 @ add r11,r11,r3 @ h+=Maj(a,b,c)
663#if __ARM_ARCH__>=7
664 @ ldr r2,[r1],#4 @ 9
665# if 9==15
666 str r1,[sp,#17*4] @ make room for r1
667# endif
668 eor r0,r7,r7,ror#5
669 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
670 eor r0,r0,r7,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700671# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800672 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700673# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800674#else
675 @ ldrb r2,[r1,#3] @ 9
676 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
677 ldrb r3,[r1,#2]
678 ldrb r0,[r1,#1]
679 orr r2,r2,r3,lsl#8
680 ldrb r3,[r1],#4
681 orr r2,r2,r0,lsl#16
682# if 9==15
683 str r1,[sp,#17*4] @ make room for r1
684# endif
685 eor r0,r7,r7,ror#5
686 orr r2,r2,r3,lsl#24
687 eor r0,r0,r7,ror#19 @ Sigma1(e)
688#endif
689 ldr r3,[r14],#4 @ *K256++
690 add r10,r10,r2 @ h+=X[i]
691 str r2,[sp,#9*4]
692 eor r2,r8,r9
693 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
694 and r2,r2,r7
695 add r10,r10,r3 @ h+=K256[i]
696 eor r2,r2,r9 @ Ch(e,f,g)
697 eor r0,r11,r11,ror#11
698 add r10,r10,r2 @ h+=Ch(e,f,g)
699#if 9==31
700 and r3,r3,#0xff
701 cmp r3,#0xf2 @ done?
702#endif
703#if 9<15
704# if __ARM_ARCH__>=7
705 ldr r2,[r1],#4 @ prefetch
706# else
707 ldrb r2,[r1,#3]
708# endif
709 eor r3,r11,r4 @ a^b, b^c in next round
710#else
711 ldr r2,[sp,#11*4] @ from future BODY_16_xx
712 eor r3,r11,r4 @ a^b, b^c in next round
713 ldr r1,[sp,#8*4] @ from future BODY_16_xx
714#endif
715 eor r0,r0,r11,ror#20 @ Sigma0(a)
716 and r12,r12,r3 @ (b^c)&=(a^b)
717 add r6,r6,r10 @ d+=h
718 eor r12,r12,r4 @ Maj(a,b,c)
719 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
720 @ add r10,r10,r12 @ h+=Maj(a,b,c)
721#if __ARM_ARCH__>=7
722 @ ldr r2,[r1],#4 @ 10
723# if 10==15
724 str r1,[sp,#17*4] @ make room for r1
725# endif
726 eor r0,r6,r6,ror#5
727 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
728 eor r0,r0,r6,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700729# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800730 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700731# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800732#else
733 @ ldrb r2,[r1,#3] @ 10
734 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
735 ldrb r12,[r1,#2]
736 ldrb r0,[r1,#1]
737 orr r2,r2,r12,lsl#8
738 ldrb r12,[r1],#4
739 orr r2,r2,r0,lsl#16
740# if 10==15
741 str r1,[sp,#17*4] @ make room for r1
742# endif
743 eor r0,r6,r6,ror#5
744 orr r2,r2,r12,lsl#24
745 eor r0,r0,r6,ror#19 @ Sigma1(e)
746#endif
747 ldr r12,[r14],#4 @ *K256++
748 add r9,r9,r2 @ h+=X[i]
749 str r2,[sp,#10*4]
750 eor r2,r7,r8
751 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
752 and r2,r2,r6
753 add r9,r9,r12 @ h+=K256[i]
754 eor r2,r2,r8 @ Ch(e,f,g)
755 eor r0,r10,r10,ror#11
756 add r9,r9,r2 @ h+=Ch(e,f,g)
757#if 10==31
758 and r12,r12,#0xff
759 cmp r12,#0xf2 @ done?
760#endif
761#if 10<15
762# if __ARM_ARCH__>=7
763 ldr r2,[r1],#4 @ prefetch
764# else
765 ldrb r2,[r1,#3]
766# endif
767 eor r12,r10,r11 @ a^b, b^c in next round
768#else
769 ldr r2,[sp,#12*4] @ from future BODY_16_xx
770 eor r12,r10,r11 @ a^b, b^c in next round
771 ldr r1,[sp,#9*4] @ from future BODY_16_xx
772#endif
773 eor r0,r0,r10,ror#20 @ Sigma0(a)
774 and r3,r3,r12 @ (b^c)&=(a^b)
775 add r5,r5,r9 @ d+=h
776 eor r3,r3,r11 @ Maj(a,b,c)
777 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
778 @ add r9,r9,r3 @ h+=Maj(a,b,c)
779#if __ARM_ARCH__>=7
780 @ ldr r2,[r1],#4 @ 11
781# if 11==15
782 str r1,[sp,#17*4] @ make room for r1
783# endif
784 eor r0,r5,r5,ror#5
785 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
786 eor r0,r0,r5,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700787# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800788 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700789# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800790#else
791 @ ldrb r2,[r1,#3] @ 11
792 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
793 ldrb r3,[r1,#2]
794 ldrb r0,[r1,#1]
795 orr r2,r2,r3,lsl#8
796 ldrb r3,[r1],#4
797 orr r2,r2,r0,lsl#16
798# if 11==15
799 str r1,[sp,#17*4] @ make room for r1
800# endif
801 eor r0,r5,r5,ror#5
802 orr r2,r2,r3,lsl#24
803 eor r0,r0,r5,ror#19 @ Sigma1(e)
804#endif
805 ldr r3,[r14],#4 @ *K256++
806 add r8,r8,r2 @ h+=X[i]
807 str r2,[sp,#11*4]
808 eor r2,r6,r7
809 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
810 and r2,r2,r5
811 add r8,r8,r3 @ h+=K256[i]
812 eor r2,r2,r7 @ Ch(e,f,g)
813 eor r0,r9,r9,ror#11
814 add r8,r8,r2 @ h+=Ch(e,f,g)
815#if 11==31
816 and r3,r3,#0xff
817 cmp r3,#0xf2 @ done?
818#endif
819#if 11<15
820# if __ARM_ARCH__>=7
821 ldr r2,[r1],#4 @ prefetch
822# else
823 ldrb r2,[r1,#3]
824# endif
825 eor r3,r9,r10 @ a^b, b^c in next round
826#else
827 ldr r2,[sp,#13*4] @ from future BODY_16_xx
828 eor r3,r9,r10 @ a^b, b^c in next round
829 ldr r1,[sp,#10*4] @ from future BODY_16_xx
830#endif
831 eor r0,r0,r9,ror#20 @ Sigma0(a)
832 and r12,r12,r3 @ (b^c)&=(a^b)
833 add r4,r4,r8 @ d+=h
834 eor r12,r12,r10 @ Maj(a,b,c)
835 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
836 @ add r8,r8,r12 @ h+=Maj(a,b,c)
837#if __ARM_ARCH__>=7
838 @ ldr r2,[r1],#4 @ 12
839# if 12==15
840 str r1,[sp,#17*4] @ make room for r1
841# endif
842 eor r0,r4,r4,ror#5
843 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
844 eor r0,r0,r4,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700845# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800846 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700847# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800848#else
849 @ ldrb r2,[r1,#3] @ 12
850 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
851 ldrb r12,[r1,#2]
852 ldrb r0,[r1,#1]
853 orr r2,r2,r12,lsl#8
854 ldrb r12,[r1],#4
855 orr r2,r2,r0,lsl#16
856# if 12==15
857 str r1,[sp,#17*4] @ make room for r1
858# endif
859 eor r0,r4,r4,ror#5
860 orr r2,r2,r12,lsl#24
861 eor r0,r0,r4,ror#19 @ Sigma1(e)
862#endif
863 ldr r12,[r14],#4 @ *K256++
864 add r7,r7,r2 @ h+=X[i]
865 str r2,[sp,#12*4]
866 eor r2,r5,r6
867 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
868 and r2,r2,r4
869 add r7,r7,r12 @ h+=K256[i]
870 eor r2,r2,r6 @ Ch(e,f,g)
871 eor r0,r8,r8,ror#11
872 add r7,r7,r2 @ h+=Ch(e,f,g)
873#if 12==31
874 and r12,r12,#0xff
875 cmp r12,#0xf2 @ done?
876#endif
877#if 12<15
878# if __ARM_ARCH__>=7
879 ldr r2,[r1],#4 @ prefetch
880# else
881 ldrb r2,[r1,#3]
882# endif
883 eor r12,r8,r9 @ a^b, b^c in next round
884#else
885 ldr r2,[sp,#14*4] @ from future BODY_16_xx
886 eor r12,r8,r9 @ a^b, b^c in next round
887 ldr r1,[sp,#11*4] @ from future BODY_16_xx
888#endif
889 eor r0,r0,r8,ror#20 @ Sigma0(a)
890 and r3,r3,r12 @ (b^c)&=(a^b)
891 add r11,r11,r7 @ d+=h
892 eor r3,r3,r9 @ Maj(a,b,c)
893 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
894 @ add r7,r7,r3 @ h+=Maj(a,b,c)
895#if __ARM_ARCH__>=7
896 @ ldr r2,[r1],#4 @ 13
897# if 13==15
898 str r1,[sp,#17*4] @ make room for r1
899# endif
900 eor r0,r11,r11,ror#5
901 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
902 eor r0,r0,r11,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700903# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800904 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700905# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800906#else
907 @ ldrb r2,[r1,#3] @ 13
908 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
909 ldrb r3,[r1,#2]
910 ldrb r0,[r1,#1]
911 orr r2,r2,r3,lsl#8
912 ldrb r3,[r1],#4
913 orr r2,r2,r0,lsl#16
914# if 13==15
915 str r1,[sp,#17*4] @ make room for r1
916# endif
917 eor r0,r11,r11,ror#5
918 orr r2,r2,r3,lsl#24
919 eor r0,r0,r11,ror#19 @ Sigma1(e)
920#endif
921 ldr r3,[r14],#4 @ *K256++
922 add r6,r6,r2 @ h+=X[i]
923 str r2,[sp,#13*4]
924 eor r2,r4,r5
925 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
926 and r2,r2,r11
927 add r6,r6,r3 @ h+=K256[i]
928 eor r2,r2,r5 @ Ch(e,f,g)
929 eor r0,r7,r7,ror#11
930 add r6,r6,r2 @ h+=Ch(e,f,g)
931#if 13==31
932 and r3,r3,#0xff
933 cmp r3,#0xf2 @ done?
934#endif
935#if 13<15
936# if __ARM_ARCH__>=7
937 ldr r2,[r1],#4 @ prefetch
938# else
939 ldrb r2,[r1,#3]
940# endif
941 eor r3,r7,r8 @ a^b, b^c in next round
942#else
943 ldr r2,[sp,#15*4] @ from future BODY_16_xx
944 eor r3,r7,r8 @ a^b, b^c in next round
945 ldr r1,[sp,#12*4] @ from future BODY_16_xx
946#endif
947 eor r0,r0,r7,ror#20 @ Sigma0(a)
948 and r12,r12,r3 @ (b^c)&=(a^b)
949 add r10,r10,r6 @ d+=h
950 eor r12,r12,r8 @ Maj(a,b,c)
951 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
952 @ add r6,r6,r12 @ h+=Maj(a,b,c)
953#if __ARM_ARCH__>=7
954 @ ldr r2,[r1],#4 @ 14
955# if 14==15
956 str r1,[sp,#17*4] @ make room for r1
957# endif
958 eor r0,r10,r10,ror#5
959 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
960 eor r0,r0,r10,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -0700961# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -0800962 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -0700963# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800964#else
965 @ ldrb r2,[r1,#3] @ 14
966 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
967 ldrb r12,[r1,#2]
968 ldrb r0,[r1,#1]
969 orr r2,r2,r12,lsl#8
970 ldrb r12,[r1],#4
971 orr r2,r2,r0,lsl#16
972# if 14==15
973 str r1,[sp,#17*4] @ make room for r1
974# endif
975 eor r0,r10,r10,ror#5
976 orr r2,r2,r12,lsl#24
977 eor r0,r0,r10,ror#19 @ Sigma1(e)
978#endif
979 ldr r12,[r14],#4 @ *K256++
980 add r5,r5,r2 @ h+=X[i]
981 str r2,[sp,#14*4]
982 eor r2,r11,r4
983 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
984 and r2,r2,r10
985 add r5,r5,r12 @ h+=K256[i]
986 eor r2,r2,r4 @ Ch(e,f,g)
987 eor r0,r6,r6,ror#11
988 add r5,r5,r2 @ h+=Ch(e,f,g)
989#if 14==31
990 and r12,r12,#0xff
991 cmp r12,#0xf2 @ done?
992#endif
993#if 14<15
994# if __ARM_ARCH__>=7
995 ldr r2,[r1],#4 @ prefetch
996# else
997 ldrb r2,[r1,#3]
998# endif
999 eor r12,r6,r7 @ a^b, b^c in next round
1000#else
1001 ldr r2,[sp,#0*4] @ from future BODY_16_xx
1002 eor r12,r6,r7 @ a^b, b^c in next round
1003 ldr r1,[sp,#13*4] @ from future BODY_16_xx
1004#endif
1005 eor r0,r0,r6,ror#20 @ Sigma0(a)
1006 and r3,r3,r12 @ (b^c)&=(a^b)
1007 add r9,r9,r5 @ d+=h
1008 eor r3,r3,r7 @ Maj(a,b,c)
1009 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1010 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1011#if __ARM_ARCH__>=7
1012 @ ldr r2,[r1],#4 @ 15
1013# if 15==15
1014 str r1,[sp,#17*4] @ make room for r1
1015# endif
1016 eor r0,r9,r9,ror#5
1017 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1018 eor r0,r0,r9,ror#19 @ Sigma1(e)
Adam Langleye9ada862015-05-11 17:20:37 -07001019# ifndef __ARMEB__
Adam Langleyd9e397b2015-01-22 14:27:53 -08001020 rev r2,r2
Adam Langleye9ada862015-05-11 17:20:37 -07001021# endif
Adam Langleyd9e397b2015-01-22 14:27:53 -08001022#else
1023 @ ldrb r2,[r1,#3] @ 15
1024 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1025 ldrb r3,[r1,#2]
1026 ldrb r0,[r1,#1]
1027 orr r2,r2,r3,lsl#8
1028 ldrb r3,[r1],#4
1029 orr r2,r2,r0,lsl#16
1030# if 15==15
1031 str r1,[sp,#17*4] @ make room for r1
1032# endif
1033 eor r0,r9,r9,ror#5
1034 orr r2,r2,r3,lsl#24
1035 eor r0,r0,r9,ror#19 @ Sigma1(e)
1036#endif
1037 ldr r3,[r14],#4 @ *K256++
1038 add r4,r4,r2 @ h+=X[i]
1039 str r2,[sp,#15*4]
1040 eor r2,r10,r11
1041 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1042 and r2,r2,r9
1043 add r4,r4,r3 @ h+=K256[i]
1044 eor r2,r2,r11 @ Ch(e,f,g)
1045 eor r0,r5,r5,ror#11
1046 add r4,r4,r2 @ h+=Ch(e,f,g)
1047#if 15==31
1048 and r3,r3,#0xff
1049 cmp r3,#0xf2 @ done?
1050#endif
1051#if 15<15
1052# if __ARM_ARCH__>=7
1053 ldr r2,[r1],#4 @ prefetch
1054# else
1055 ldrb r2,[r1,#3]
1056# endif
1057 eor r3,r5,r6 @ a^b, b^c in next round
1058#else
1059 ldr r2,[sp,#1*4] @ from future BODY_16_xx
1060 eor r3,r5,r6 @ a^b, b^c in next round
1061 ldr r1,[sp,#14*4] @ from future BODY_16_xx
1062#endif
1063 eor r0,r0,r5,ror#20 @ Sigma0(a)
1064 and r12,r12,r3 @ (b^c)&=(a^b)
1065 add r8,r8,r4 @ d+=h
1066 eor r12,r12,r6 @ Maj(a,b,c)
1067 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1068 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1069.Lrounds_16_xx:
1070 @ ldr r2,[sp,#1*4] @ 16
1071 @ ldr r1,[sp,#14*4]
1072 mov r0,r2,ror#7
1073 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1074 mov r12,r1,ror#17
1075 eor r0,r0,r2,ror#18
1076 eor r12,r12,r1,ror#19
1077 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1078 ldr r2,[sp,#0*4]
1079 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1080 ldr r1,[sp,#9*4]
1081
1082 add r12,r12,r0
1083 eor r0,r8,r8,ror#5 @ from BODY_00_15
1084 add r2,r2,r12
1085 eor r0,r0,r8,ror#19 @ Sigma1(e)
1086 add r2,r2,r1 @ X[i]
1087 ldr r12,[r14],#4 @ *K256++
1088 add r11,r11,r2 @ h+=X[i]
1089 str r2,[sp,#0*4]
1090 eor r2,r9,r10
1091 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
1092 and r2,r2,r8
1093 add r11,r11,r12 @ h+=K256[i]
1094 eor r2,r2,r10 @ Ch(e,f,g)
1095 eor r0,r4,r4,ror#11
1096 add r11,r11,r2 @ h+=Ch(e,f,g)
1097#if 16==31
1098 and r12,r12,#0xff
1099 cmp r12,#0xf2 @ done?
1100#endif
1101#if 16<15
1102# if __ARM_ARCH__>=7
1103 ldr r2,[r1],#4 @ prefetch
1104# else
1105 ldrb r2,[r1,#3]
1106# endif
1107 eor r12,r4,r5 @ a^b, b^c in next round
1108#else
1109 ldr r2,[sp,#2*4] @ from future BODY_16_xx
1110 eor r12,r4,r5 @ a^b, b^c in next round
1111 ldr r1,[sp,#15*4] @ from future BODY_16_xx
1112#endif
1113 eor r0,r0,r4,ror#20 @ Sigma0(a)
1114 and r3,r3,r12 @ (b^c)&=(a^b)
1115 add r7,r7,r11 @ d+=h
1116 eor r3,r3,r5 @ Maj(a,b,c)
1117 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
1118 @ add r11,r11,r3 @ h+=Maj(a,b,c)
1119 @ ldr r2,[sp,#2*4] @ 17
1120 @ ldr r1,[sp,#15*4]
1121 mov r0,r2,ror#7
1122 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
1123 mov r3,r1,ror#17
1124 eor r0,r0,r2,ror#18
1125 eor r3,r3,r1,ror#19
1126 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1127 ldr r2,[sp,#1*4]
1128 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1129 ldr r1,[sp,#10*4]
1130
1131 add r3,r3,r0
1132 eor r0,r7,r7,ror#5 @ from BODY_00_15
1133 add r2,r2,r3
1134 eor r0,r0,r7,ror#19 @ Sigma1(e)
1135 add r2,r2,r1 @ X[i]
1136 ldr r3,[r14],#4 @ *K256++
1137 add r10,r10,r2 @ h+=X[i]
1138 str r2,[sp,#1*4]
1139 eor r2,r8,r9
1140 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
1141 and r2,r2,r7
1142 add r10,r10,r3 @ h+=K256[i]
1143 eor r2,r2,r9 @ Ch(e,f,g)
1144 eor r0,r11,r11,ror#11
1145 add r10,r10,r2 @ h+=Ch(e,f,g)
1146#if 17==31
1147 and r3,r3,#0xff
1148 cmp r3,#0xf2 @ done?
1149#endif
1150#if 17<15
1151# if __ARM_ARCH__>=7
1152 ldr r2,[r1],#4 @ prefetch
1153# else
1154 ldrb r2,[r1,#3]
1155# endif
1156 eor r3,r11,r4 @ a^b, b^c in next round
1157#else
1158 ldr r2,[sp,#3*4] @ from future BODY_16_xx
1159 eor r3,r11,r4 @ a^b, b^c in next round
1160 ldr r1,[sp,#0*4] @ from future BODY_16_xx
1161#endif
1162 eor r0,r0,r11,ror#20 @ Sigma0(a)
1163 and r12,r12,r3 @ (b^c)&=(a^b)
1164 add r6,r6,r10 @ d+=h
1165 eor r12,r12,r4 @ Maj(a,b,c)
1166 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
1167 @ add r10,r10,r12 @ h+=Maj(a,b,c)
1168 @ ldr r2,[sp,#3*4] @ 18
1169 @ ldr r1,[sp,#0*4]
1170 mov r0,r2,ror#7
1171 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
1172 mov r12,r1,ror#17
1173 eor r0,r0,r2,ror#18
1174 eor r12,r12,r1,ror#19
1175 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1176 ldr r2,[sp,#2*4]
1177 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1178 ldr r1,[sp,#11*4]
1179
1180 add r12,r12,r0
1181 eor r0,r6,r6,ror#5 @ from BODY_00_15
1182 add r2,r2,r12
1183 eor r0,r0,r6,ror#19 @ Sigma1(e)
1184 add r2,r2,r1 @ X[i]
1185 ldr r12,[r14],#4 @ *K256++
1186 add r9,r9,r2 @ h+=X[i]
1187 str r2,[sp,#2*4]
1188 eor r2,r7,r8
1189 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
1190 and r2,r2,r6
1191 add r9,r9,r12 @ h+=K256[i]
1192 eor r2,r2,r8 @ Ch(e,f,g)
1193 eor r0,r10,r10,ror#11
1194 add r9,r9,r2 @ h+=Ch(e,f,g)
1195#if 18==31
1196 and r12,r12,#0xff
1197 cmp r12,#0xf2 @ done?
1198#endif
1199#if 18<15
1200# if __ARM_ARCH__>=7
1201 ldr r2,[r1],#4 @ prefetch
1202# else
1203 ldrb r2,[r1,#3]
1204# endif
1205 eor r12,r10,r11 @ a^b, b^c in next round
1206#else
1207 ldr r2,[sp,#4*4] @ from future BODY_16_xx
1208 eor r12,r10,r11 @ a^b, b^c in next round
1209 ldr r1,[sp,#1*4] @ from future BODY_16_xx
1210#endif
1211 eor r0,r0,r10,ror#20 @ Sigma0(a)
1212 and r3,r3,r12 @ (b^c)&=(a^b)
1213 add r5,r5,r9 @ d+=h
1214 eor r3,r3,r11 @ Maj(a,b,c)
1215 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
1216 @ add r9,r9,r3 @ h+=Maj(a,b,c)
1217 @ ldr r2,[sp,#4*4] @ 19
1218 @ ldr r1,[sp,#1*4]
1219 mov r0,r2,ror#7
1220 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
1221 mov r3,r1,ror#17
1222 eor r0,r0,r2,ror#18
1223 eor r3,r3,r1,ror#19
1224 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1225 ldr r2,[sp,#3*4]
1226 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1227 ldr r1,[sp,#12*4]
1228
1229 add r3,r3,r0
1230 eor r0,r5,r5,ror#5 @ from BODY_00_15
1231 add r2,r2,r3
1232 eor r0,r0,r5,ror#19 @ Sigma1(e)
1233 add r2,r2,r1 @ X[i]
1234 ldr r3,[r14],#4 @ *K256++
1235 add r8,r8,r2 @ h+=X[i]
1236 str r2,[sp,#3*4]
1237 eor r2,r6,r7
1238 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
1239 and r2,r2,r5
1240 add r8,r8,r3 @ h+=K256[i]
1241 eor r2,r2,r7 @ Ch(e,f,g)
1242 eor r0,r9,r9,ror#11
1243 add r8,r8,r2 @ h+=Ch(e,f,g)
1244#if 19==31
1245 and r3,r3,#0xff
1246 cmp r3,#0xf2 @ done?
1247#endif
1248#if 19<15
1249# if __ARM_ARCH__>=7
1250 ldr r2,[r1],#4 @ prefetch
1251# else
1252 ldrb r2,[r1,#3]
1253# endif
1254 eor r3,r9,r10 @ a^b, b^c in next round
1255#else
1256 ldr r2,[sp,#5*4] @ from future BODY_16_xx
1257 eor r3,r9,r10 @ a^b, b^c in next round
1258 ldr r1,[sp,#2*4] @ from future BODY_16_xx
1259#endif
1260 eor r0,r0,r9,ror#20 @ Sigma0(a)
1261 and r12,r12,r3 @ (b^c)&=(a^b)
1262 add r4,r4,r8 @ d+=h
1263 eor r12,r12,r10 @ Maj(a,b,c)
1264 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
1265 @ add r8,r8,r12 @ h+=Maj(a,b,c)
1266 @ ldr r2,[sp,#5*4] @ 20
1267 @ ldr r1,[sp,#2*4]
1268 mov r0,r2,ror#7
1269 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
1270 mov r12,r1,ror#17
1271 eor r0,r0,r2,ror#18
1272 eor r12,r12,r1,ror#19
1273 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1274 ldr r2,[sp,#4*4]
1275 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1276 ldr r1,[sp,#13*4]
1277
1278 add r12,r12,r0
1279 eor r0,r4,r4,ror#5 @ from BODY_00_15
1280 add r2,r2,r12
1281 eor r0,r0,r4,ror#19 @ Sigma1(e)
1282 add r2,r2,r1 @ X[i]
1283 ldr r12,[r14],#4 @ *K256++
1284 add r7,r7,r2 @ h+=X[i]
1285 str r2,[sp,#4*4]
1286 eor r2,r5,r6
1287 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
1288 and r2,r2,r4
1289 add r7,r7,r12 @ h+=K256[i]
1290 eor r2,r2,r6 @ Ch(e,f,g)
1291 eor r0,r8,r8,ror#11
1292 add r7,r7,r2 @ h+=Ch(e,f,g)
1293#if 20==31
1294 and r12,r12,#0xff
1295 cmp r12,#0xf2 @ done?
1296#endif
1297#if 20<15
1298# if __ARM_ARCH__>=7
1299 ldr r2,[r1],#4 @ prefetch
1300# else
1301 ldrb r2,[r1,#3]
1302# endif
1303 eor r12,r8,r9 @ a^b, b^c in next round
1304#else
1305 ldr r2,[sp,#6*4] @ from future BODY_16_xx
1306 eor r12,r8,r9 @ a^b, b^c in next round
1307 ldr r1,[sp,#3*4] @ from future BODY_16_xx
1308#endif
1309 eor r0,r0,r8,ror#20 @ Sigma0(a)
1310 and r3,r3,r12 @ (b^c)&=(a^b)
1311 add r11,r11,r7 @ d+=h
1312 eor r3,r3,r9 @ Maj(a,b,c)
1313 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
1314 @ add r7,r7,r3 @ h+=Maj(a,b,c)
1315 @ ldr r2,[sp,#6*4] @ 21
1316 @ ldr r1,[sp,#3*4]
1317 mov r0,r2,ror#7
1318 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
1319 mov r3,r1,ror#17
1320 eor r0,r0,r2,ror#18
1321 eor r3,r3,r1,ror#19
1322 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1323 ldr r2,[sp,#5*4]
1324 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1325 ldr r1,[sp,#14*4]
1326
1327 add r3,r3,r0
1328 eor r0,r11,r11,ror#5 @ from BODY_00_15
1329 add r2,r2,r3
1330 eor r0,r0,r11,ror#19 @ Sigma1(e)
1331 add r2,r2,r1 @ X[i]
1332 ldr r3,[r14],#4 @ *K256++
1333 add r6,r6,r2 @ h+=X[i]
1334 str r2,[sp,#5*4]
1335 eor r2,r4,r5
1336 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
1337 and r2,r2,r11
1338 add r6,r6,r3 @ h+=K256[i]
1339 eor r2,r2,r5 @ Ch(e,f,g)
1340 eor r0,r7,r7,ror#11
1341 add r6,r6,r2 @ h+=Ch(e,f,g)
1342#if 21==31
1343 and r3,r3,#0xff
1344 cmp r3,#0xf2 @ done?
1345#endif
1346#if 21<15
1347# if __ARM_ARCH__>=7
1348 ldr r2,[r1],#4 @ prefetch
1349# else
1350 ldrb r2,[r1,#3]
1351# endif
1352 eor r3,r7,r8 @ a^b, b^c in next round
1353#else
1354 ldr r2,[sp,#7*4] @ from future BODY_16_xx
1355 eor r3,r7,r8 @ a^b, b^c in next round
1356 ldr r1,[sp,#4*4] @ from future BODY_16_xx
1357#endif
1358 eor r0,r0,r7,ror#20 @ Sigma0(a)
1359 and r12,r12,r3 @ (b^c)&=(a^b)
1360 add r10,r10,r6 @ d+=h
1361 eor r12,r12,r8 @ Maj(a,b,c)
1362 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
1363 @ add r6,r6,r12 @ h+=Maj(a,b,c)
1364 @ ldr r2,[sp,#7*4] @ 22
1365 @ ldr r1,[sp,#4*4]
1366 mov r0,r2,ror#7
1367 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
1368 mov r12,r1,ror#17
1369 eor r0,r0,r2,ror#18
1370 eor r12,r12,r1,ror#19
1371 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1372 ldr r2,[sp,#6*4]
1373 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1374 ldr r1,[sp,#15*4]
1375
1376 add r12,r12,r0
1377 eor r0,r10,r10,ror#5 @ from BODY_00_15
1378 add r2,r2,r12
1379 eor r0,r0,r10,ror#19 @ Sigma1(e)
1380 add r2,r2,r1 @ X[i]
1381 ldr r12,[r14],#4 @ *K256++
1382 add r5,r5,r2 @ h+=X[i]
1383 str r2,[sp,#6*4]
1384 eor r2,r11,r4
1385 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
1386 and r2,r2,r10
1387 add r5,r5,r12 @ h+=K256[i]
1388 eor r2,r2,r4 @ Ch(e,f,g)
1389 eor r0,r6,r6,ror#11
1390 add r5,r5,r2 @ h+=Ch(e,f,g)
1391#if 22==31
1392 and r12,r12,#0xff
1393 cmp r12,#0xf2 @ done?
1394#endif
1395#if 22<15
1396# if __ARM_ARCH__>=7
1397 ldr r2,[r1],#4 @ prefetch
1398# else
1399 ldrb r2,[r1,#3]
1400# endif
1401 eor r12,r6,r7 @ a^b, b^c in next round
1402#else
1403 ldr r2,[sp,#8*4] @ from future BODY_16_xx
1404 eor r12,r6,r7 @ a^b, b^c in next round
1405 ldr r1,[sp,#5*4] @ from future BODY_16_xx
1406#endif
1407 eor r0,r0,r6,ror#20 @ Sigma0(a)
1408 and r3,r3,r12 @ (b^c)&=(a^b)
1409 add r9,r9,r5 @ d+=h
1410 eor r3,r3,r7 @ Maj(a,b,c)
1411 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1412 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1413 @ ldr r2,[sp,#8*4] @ 23
1414 @ ldr r1,[sp,#5*4]
1415 mov r0,r2,ror#7
1416 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1417 mov r3,r1,ror#17
1418 eor r0,r0,r2,ror#18
1419 eor r3,r3,r1,ror#19
1420 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1421 ldr r2,[sp,#7*4]
1422 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1423 ldr r1,[sp,#0*4]
1424
1425 add r3,r3,r0
1426 eor r0,r9,r9,ror#5 @ from BODY_00_15
1427 add r2,r2,r3
1428 eor r0,r0,r9,ror#19 @ Sigma1(e)
1429 add r2,r2,r1 @ X[i]
1430 ldr r3,[r14],#4 @ *K256++
1431 add r4,r4,r2 @ h+=X[i]
1432 str r2,[sp,#7*4]
1433 eor r2,r10,r11
1434 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1435 and r2,r2,r9
1436 add r4,r4,r3 @ h+=K256[i]
1437 eor r2,r2,r11 @ Ch(e,f,g)
1438 eor r0,r5,r5,ror#11
1439 add r4,r4,r2 @ h+=Ch(e,f,g)
1440#if 23==31
1441 and r3,r3,#0xff
1442 cmp r3,#0xf2 @ done?
1443#endif
1444#if 23<15
1445# if __ARM_ARCH__>=7
1446 ldr r2,[r1],#4 @ prefetch
1447# else
1448 ldrb r2,[r1,#3]
1449# endif
1450 eor r3,r5,r6 @ a^b, b^c in next round
1451#else
1452 ldr r2,[sp,#9*4] @ from future BODY_16_xx
1453 eor r3,r5,r6 @ a^b, b^c in next round
1454 ldr r1,[sp,#6*4] @ from future BODY_16_xx
1455#endif
1456 eor r0,r0,r5,ror#20 @ Sigma0(a)
1457 and r12,r12,r3 @ (b^c)&=(a^b)
1458 add r8,r8,r4 @ d+=h
1459 eor r12,r12,r6 @ Maj(a,b,c)
1460 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1461 @ add r4,r4,r12 @ h+=Maj(a,b,c)
1462 @ ldr r2,[sp,#9*4] @ 24
1463 @ ldr r1,[sp,#6*4]
1464 mov r0,r2,ror#7
1465 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1466 mov r12,r1,ror#17
1467 eor r0,r0,r2,ror#18
1468 eor r12,r12,r1,ror#19
1469 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1470 ldr r2,[sp,#8*4]
1471 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1472 ldr r1,[sp,#1*4]
1473
1474 add r12,r12,r0
1475 eor r0,r8,r8,ror#5 @ from BODY_00_15
1476 add r2,r2,r12
1477 eor r0,r0,r8,ror#19 @ Sigma1(e)
1478 add r2,r2,r1 @ X[i]
1479 ldr r12,[r14],#4 @ *K256++
1480 add r11,r11,r2 @ h+=X[i]
1481 str r2,[sp,#8*4]
1482 eor r2,r9,r10
1483 add r11,r11,r0,ror#6 @ h+=Sigma1(e)
1484 and r2,r2,r8
1485 add r11,r11,r12 @ h+=K256[i]
1486 eor r2,r2,r10 @ Ch(e,f,g)
1487 eor r0,r4,r4,ror#11
1488 add r11,r11,r2 @ h+=Ch(e,f,g)
1489#if 24==31
1490 and r12,r12,#0xff
1491 cmp r12,#0xf2 @ done?
1492#endif
1493#if 24<15
1494# if __ARM_ARCH__>=7
1495 ldr r2,[r1],#4 @ prefetch
1496# else
1497 ldrb r2,[r1,#3]
1498# endif
1499 eor r12,r4,r5 @ a^b, b^c in next round
1500#else
1501 ldr r2,[sp,#10*4] @ from future BODY_16_xx
1502 eor r12,r4,r5 @ a^b, b^c in next round
1503 ldr r1,[sp,#7*4] @ from future BODY_16_xx
1504#endif
1505 eor r0,r0,r4,ror#20 @ Sigma0(a)
1506 and r3,r3,r12 @ (b^c)&=(a^b)
1507 add r7,r7,r11 @ d+=h
1508 eor r3,r3,r5 @ Maj(a,b,c)
1509 add r11,r11,r0,ror#2 @ h+=Sigma0(a)
1510 @ add r11,r11,r3 @ h+=Maj(a,b,c)
1511 @ ldr r2,[sp,#10*4] @ 25
1512 @ ldr r1,[sp,#7*4]
1513 mov r0,r2,ror#7
1514 add r11,r11,r3 @ h+=Maj(a,b,c) from the past
1515 mov r3,r1,ror#17
1516 eor r0,r0,r2,ror#18
1517 eor r3,r3,r1,ror#19
1518 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1519 ldr r2,[sp,#9*4]
1520 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1521 ldr r1,[sp,#2*4]
1522
1523 add r3,r3,r0
1524 eor r0,r7,r7,ror#5 @ from BODY_00_15
1525 add r2,r2,r3
1526 eor r0,r0,r7,ror#19 @ Sigma1(e)
1527 add r2,r2,r1 @ X[i]
1528 ldr r3,[r14],#4 @ *K256++
1529 add r10,r10,r2 @ h+=X[i]
1530 str r2,[sp,#9*4]
1531 eor r2,r8,r9
1532 add r10,r10,r0,ror#6 @ h+=Sigma1(e)
1533 and r2,r2,r7
1534 add r10,r10,r3 @ h+=K256[i]
1535 eor r2,r2,r9 @ Ch(e,f,g)
1536 eor r0,r11,r11,ror#11
1537 add r10,r10,r2 @ h+=Ch(e,f,g)
1538#if 25==31
1539 and r3,r3,#0xff
1540 cmp r3,#0xf2 @ done?
1541#endif
1542#if 25<15
1543# if __ARM_ARCH__>=7
1544 ldr r2,[r1],#4 @ prefetch
1545# else
1546 ldrb r2,[r1,#3]
1547# endif
1548 eor r3,r11,r4 @ a^b, b^c in next round
1549#else
1550 ldr r2,[sp,#11*4] @ from future BODY_16_xx
1551 eor r3,r11,r4 @ a^b, b^c in next round
1552 ldr r1,[sp,#8*4] @ from future BODY_16_xx
1553#endif
1554 eor r0,r0,r11,ror#20 @ Sigma0(a)
1555 and r12,r12,r3 @ (b^c)&=(a^b)
1556 add r6,r6,r10 @ d+=h
1557 eor r12,r12,r4 @ Maj(a,b,c)
1558 add r10,r10,r0,ror#2 @ h+=Sigma0(a)
1559 @ add r10,r10,r12 @ h+=Maj(a,b,c)
1560 @ ldr r2,[sp,#11*4] @ 26
1561 @ ldr r1,[sp,#8*4]
1562 mov r0,r2,ror#7
1563 add r10,r10,r12 @ h+=Maj(a,b,c) from the past
1564 mov r12,r1,ror#17
1565 eor r0,r0,r2,ror#18
1566 eor r12,r12,r1,ror#19
1567 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1568 ldr r2,[sp,#10*4]
1569 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1570 ldr r1,[sp,#3*4]
1571
1572 add r12,r12,r0
1573 eor r0,r6,r6,ror#5 @ from BODY_00_15
1574 add r2,r2,r12
1575 eor r0,r0,r6,ror#19 @ Sigma1(e)
1576 add r2,r2,r1 @ X[i]
1577 ldr r12,[r14],#4 @ *K256++
1578 add r9,r9,r2 @ h+=X[i]
1579 str r2,[sp,#10*4]
1580 eor r2,r7,r8
1581 add r9,r9,r0,ror#6 @ h+=Sigma1(e)
1582 and r2,r2,r6
1583 add r9,r9,r12 @ h+=K256[i]
1584 eor r2,r2,r8 @ Ch(e,f,g)
1585 eor r0,r10,r10,ror#11
1586 add r9,r9,r2 @ h+=Ch(e,f,g)
1587#if 26==31
1588 and r12,r12,#0xff
1589 cmp r12,#0xf2 @ done?
1590#endif
1591#if 26<15
1592# if __ARM_ARCH__>=7
1593 ldr r2,[r1],#4 @ prefetch
1594# else
1595 ldrb r2,[r1,#3]
1596# endif
1597 eor r12,r10,r11 @ a^b, b^c in next round
1598#else
1599 ldr r2,[sp,#12*4] @ from future BODY_16_xx
1600 eor r12,r10,r11 @ a^b, b^c in next round
1601 ldr r1,[sp,#9*4] @ from future BODY_16_xx
1602#endif
1603 eor r0,r0,r10,ror#20 @ Sigma0(a)
1604 and r3,r3,r12 @ (b^c)&=(a^b)
1605 add r5,r5,r9 @ d+=h
1606 eor r3,r3,r11 @ Maj(a,b,c)
1607 add r9,r9,r0,ror#2 @ h+=Sigma0(a)
1608 @ add r9,r9,r3 @ h+=Maj(a,b,c)
1609 @ ldr r2,[sp,#12*4] @ 27
1610 @ ldr r1,[sp,#9*4]
1611 mov r0,r2,ror#7
1612 add r9,r9,r3 @ h+=Maj(a,b,c) from the past
1613 mov r3,r1,ror#17
1614 eor r0,r0,r2,ror#18
1615 eor r3,r3,r1,ror#19
1616 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1617 ldr r2,[sp,#11*4]
1618 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1619 ldr r1,[sp,#4*4]
1620
1621 add r3,r3,r0
1622 eor r0,r5,r5,ror#5 @ from BODY_00_15
1623 add r2,r2,r3
1624 eor r0,r0,r5,ror#19 @ Sigma1(e)
1625 add r2,r2,r1 @ X[i]
1626 ldr r3,[r14],#4 @ *K256++
1627 add r8,r8,r2 @ h+=X[i]
1628 str r2,[sp,#11*4]
1629 eor r2,r6,r7
1630 add r8,r8,r0,ror#6 @ h+=Sigma1(e)
1631 and r2,r2,r5
1632 add r8,r8,r3 @ h+=K256[i]
1633 eor r2,r2,r7 @ Ch(e,f,g)
1634 eor r0,r9,r9,ror#11
1635 add r8,r8,r2 @ h+=Ch(e,f,g)
1636#if 27==31
1637 and r3,r3,#0xff
1638 cmp r3,#0xf2 @ done?
1639#endif
1640#if 27<15
1641# if __ARM_ARCH__>=7
1642 ldr r2,[r1],#4 @ prefetch
1643# else
1644 ldrb r2,[r1,#3]
1645# endif
1646 eor r3,r9,r10 @ a^b, b^c in next round
1647#else
1648 ldr r2,[sp,#13*4] @ from future BODY_16_xx
1649 eor r3,r9,r10 @ a^b, b^c in next round
1650 ldr r1,[sp,#10*4] @ from future BODY_16_xx
1651#endif
1652 eor r0,r0,r9,ror#20 @ Sigma0(a)
1653 and r12,r12,r3 @ (b^c)&=(a^b)
1654 add r4,r4,r8 @ d+=h
1655 eor r12,r12,r10 @ Maj(a,b,c)
1656 add r8,r8,r0,ror#2 @ h+=Sigma0(a)
1657 @ add r8,r8,r12 @ h+=Maj(a,b,c)
1658 @ ldr r2,[sp,#13*4] @ 28
1659 @ ldr r1,[sp,#10*4]
1660 mov r0,r2,ror#7
1661 add r8,r8,r12 @ h+=Maj(a,b,c) from the past
1662 mov r12,r1,ror#17
1663 eor r0,r0,r2,ror#18
1664 eor r12,r12,r1,ror#19
1665 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1666 ldr r2,[sp,#12*4]
1667 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1668 ldr r1,[sp,#5*4]
1669
1670 add r12,r12,r0
1671 eor r0,r4,r4,ror#5 @ from BODY_00_15
1672 add r2,r2,r12
1673 eor r0,r0,r4,ror#19 @ Sigma1(e)
1674 add r2,r2,r1 @ X[i]
1675 ldr r12,[r14],#4 @ *K256++
1676 add r7,r7,r2 @ h+=X[i]
1677 str r2,[sp,#12*4]
1678 eor r2,r5,r6
1679 add r7,r7,r0,ror#6 @ h+=Sigma1(e)
1680 and r2,r2,r4
1681 add r7,r7,r12 @ h+=K256[i]
1682 eor r2,r2,r6 @ Ch(e,f,g)
1683 eor r0,r8,r8,ror#11
1684 add r7,r7,r2 @ h+=Ch(e,f,g)
1685#if 28==31
1686 and r12,r12,#0xff
1687 cmp r12,#0xf2 @ done?
1688#endif
1689#if 28<15
1690# if __ARM_ARCH__>=7
1691 ldr r2,[r1],#4 @ prefetch
1692# else
1693 ldrb r2,[r1,#3]
1694# endif
1695 eor r12,r8,r9 @ a^b, b^c in next round
1696#else
1697 ldr r2,[sp,#14*4] @ from future BODY_16_xx
1698 eor r12,r8,r9 @ a^b, b^c in next round
1699 ldr r1,[sp,#11*4] @ from future BODY_16_xx
1700#endif
1701 eor r0,r0,r8,ror#20 @ Sigma0(a)
1702 and r3,r3,r12 @ (b^c)&=(a^b)
1703 add r11,r11,r7 @ d+=h
1704 eor r3,r3,r9 @ Maj(a,b,c)
1705 add r7,r7,r0,ror#2 @ h+=Sigma0(a)
1706 @ add r7,r7,r3 @ h+=Maj(a,b,c)
1707 @ ldr r2,[sp,#14*4] @ 29
1708 @ ldr r1,[sp,#11*4]
1709 mov r0,r2,ror#7
1710 add r7,r7,r3 @ h+=Maj(a,b,c) from the past
1711 mov r3,r1,ror#17
1712 eor r0,r0,r2,ror#18
1713 eor r3,r3,r1,ror#19
1714 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1715 ldr r2,[sp,#13*4]
1716 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1717 ldr r1,[sp,#6*4]
1718
1719 add r3,r3,r0
1720 eor r0,r11,r11,ror#5 @ from BODY_00_15
1721 add r2,r2,r3
1722 eor r0,r0,r11,ror#19 @ Sigma1(e)
1723 add r2,r2,r1 @ X[i]
1724 ldr r3,[r14],#4 @ *K256++
1725 add r6,r6,r2 @ h+=X[i]
1726 str r2,[sp,#13*4]
1727 eor r2,r4,r5
1728 add r6,r6,r0,ror#6 @ h+=Sigma1(e)
1729 and r2,r2,r11
1730 add r6,r6,r3 @ h+=K256[i]
1731 eor r2,r2,r5 @ Ch(e,f,g)
1732 eor r0,r7,r7,ror#11
1733 add r6,r6,r2 @ h+=Ch(e,f,g)
1734#if 29==31
1735 and r3,r3,#0xff
1736 cmp r3,#0xf2 @ done?
1737#endif
1738#if 29<15
1739# if __ARM_ARCH__>=7
1740 ldr r2,[r1],#4 @ prefetch
1741# else
1742 ldrb r2,[r1,#3]
1743# endif
1744 eor r3,r7,r8 @ a^b, b^c in next round
1745#else
1746 ldr r2,[sp,#15*4] @ from future BODY_16_xx
1747 eor r3,r7,r8 @ a^b, b^c in next round
1748 ldr r1,[sp,#12*4] @ from future BODY_16_xx
1749#endif
1750 eor r0,r0,r7,ror#20 @ Sigma0(a)
1751 and r12,r12,r3 @ (b^c)&=(a^b)
1752 add r10,r10,r6 @ d+=h
1753 eor r12,r12,r8 @ Maj(a,b,c)
1754 add r6,r6,r0,ror#2 @ h+=Sigma0(a)
1755 @ add r6,r6,r12 @ h+=Maj(a,b,c)
1756 @ ldr r2,[sp,#15*4] @ 30
1757 @ ldr r1,[sp,#12*4]
1758 mov r0,r2,ror#7
1759 add r6,r6,r12 @ h+=Maj(a,b,c) from the past
1760 mov r12,r1,ror#17
1761 eor r0,r0,r2,ror#18
1762 eor r12,r12,r1,ror#19
1763 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1764 ldr r2,[sp,#14*4]
1765 eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
1766 ldr r1,[sp,#7*4]
1767
1768 add r12,r12,r0
1769 eor r0,r10,r10,ror#5 @ from BODY_00_15
1770 add r2,r2,r12
1771 eor r0,r0,r10,ror#19 @ Sigma1(e)
1772 add r2,r2,r1 @ X[i]
1773 ldr r12,[r14],#4 @ *K256++
1774 add r5,r5,r2 @ h+=X[i]
1775 str r2,[sp,#14*4]
1776 eor r2,r11,r4
1777 add r5,r5,r0,ror#6 @ h+=Sigma1(e)
1778 and r2,r2,r10
1779 add r5,r5,r12 @ h+=K256[i]
1780 eor r2,r2,r4 @ Ch(e,f,g)
1781 eor r0,r6,r6,ror#11
1782 add r5,r5,r2 @ h+=Ch(e,f,g)
1783#if 30==31
1784 and r12,r12,#0xff
1785 cmp r12,#0xf2 @ done?
1786#endif
1787#if 30<15
1788# if __ARM_ARCH__>=7
1789 ldr r2,[r1],#4 @ prefetch
1790# else
1791 ldrb r2,[r1,#3]
1792# endif
1793 eor r12,r6,r7 @ a^b, b^c in next round
1794#else
1795 ldr r2,[sp,#0*4] @ from future BODY_16_xx
1796 eor r12,r6,r7 @ a^b, b^c in next round
1797 ldr r1,[sp,#13*4] @ from future BODY_16_xx
1798#endif
1799 eor r0,r0,r6,ror#20 @ Sigma0(a)
1800 and r3,r3,r12 @ (b^c)&=(a^b)
1801 add r9,r9,r5 @ d+=h
1802 eor r3,r3,r7 @ Maj(a,b,c)
1803 add r5,r5,r0,ror#2 @ h+=Sigma0(a)
1804 @ add r5,r5,r3 @ h+=Maj(a,b,c)
1805 @ ldr r2,[sp,#0*4] @ 31
1806 @ ldr r1,[sp,#13*4]
1807 mov r0,r2,ror#7
1808 add r5,r5,r3 @ h+=Maj(a,b,c) from the past
1809 mov r3,r1,ror#17
1810 eor r0,r0,r2,ror#18
1811 eor r3,r3,r1,ror#19
1812 eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
1813 ldr r2,[sp,#15*4]
1814 eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
1815 ldr r1,[sp,#8*4]
1816
1817 add r3,r3,r0
1818 eor r0,r9,r9,ror#5 @ from BODY_00_15
1819 add r2,r2,r3
1820 eor r0,r0,r9,ror#19 @ Sigma1(e)
1821 add r2,r2,r1 @ X[i]
1822 ldr r3,[r14],#4 @ *K256++
1823 add r4,r4,r2 @ h+=X[i]
1824 str r2,[sp,#15*4]
1825 eor r2,r10,r11
1826 add r4,r4,r0,ror#6 @ h+=Sigma1(e)
1827 and r2,r2,r9
1828 add r4,r4,r3 @ h+=K256[i]
1829 eor r2,r2,r11 @ Ch(e,f,g)
1830 eor r0,r5,r5,ror#11
1831 add r4,r4,r2 @ h+=Ch(e,f,g)
1832#if 31==31
1833 and r3,r3,#0xff
1834 cmp r3,#0xf2 @ done?
1835#endif
1836#if 31<15
1837# if __ARM_ARCH__>=7
1838 ldr r2,[r1],#4 @ prefetch
1839# else
1840 ldrb r2,[r1,#3]
1841# endif
1842 eor r3,r5,r6 @ a^b, b^c in next round
1843#else
1844 ldr r2,[sp,#1*4] @ from future BODY_16_xx
1845 eor r3,r5,r6 @ a^b, b^c in next round
1846 ldr r1,[sp,#14*4] @ from future BODY_16_xx
1847#endif
1848 eor r0,r0,r5,ror#20 @ Sigma0(a)
1849 and r12,r12,r3 @ (b^c)&=(a^b)
1850 add r8,r8,r4 @ d+=h
1851 eor r12,r12,r6 @ Maj(a,b,c)
1852 add r4,r4,r0,ror#2 @ h+=Sigma0(a)
1853 @ add r4,r4,r12 @ h+=Maj(a,b,c)
Adam Langleye9ada862015-05-11 17:20:37 -07001854#if __ARM_ARCH__>=7
1855 ite eq @ Thumb2 thing, sanity check in ARM
1856#endif
Adam Langleyd9e397b2015-01-22 14:27:53 -08001857 ldreq r3,[sp,#16*4] @ pull ctx
1858 bne .Lrounds_16_xx
1859
1860 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
1861 ldr r0,[r3,#0]
1862 ldr r2,[r3,#4]
1863 ldr r12,[r3,#8]
1864 add r4,r4,r0
1865 ldr r0,[r3,#12]
1866 add r5,r5,r2
1867 ldr r2,[r3,#16]
1868 add r6,r6,r12
1869 ldr r12,[r3,#20]
1870 add r7,r7,r0
1871 ldr r0,[r3,#24]
1872 add r8,r8,r2
1873 ldr r2,[r3,#28]
1874 add r9,r9,r12
1875 ldr r1,[sp,#17*4] @ pull inp
1876 ldr r12,[sp,#18*4] @ pull inp+len
1877 add r10,r10,r0
1878 add r11,r11,r2
1879 stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1880 cmp r1,r12
1881 sub r14,r14,#256 @ rewind Ktbl
1882 bne .Loop
1883
1884 add sp,sp,#19*4 @ destroy frame
1885#if __ARM_ARCH__>=5
Adam Langleye9ada862015-05-11 17:20:37 -07001886 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
Adam Langleyd9e397b2015-01-22 14:27:53 -08001887#else
Adam Langleye9ada862015-05-11 17:20:37 -07001888 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
Adam Langleyd9e397b2015-01-22 14:27:53 -08001889 tst lr,#1
1890 moveq pc,lr @ be binary compatible with V4, yet
Adam Langleye9ada862015-05-11 17:20:37 -07001891.word 0xe12fff1e @ interoperable with Thumb ISA:-)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001892#endif
1893.size sha256_block_data_order,.-sha256_block_data_order
1894#if __ARM_MAX_ARCH__>=7
1895.arch armv7-a
1896.fpu neon
1897
Adam Langleye9ada862015-05-11 17:20:37 -07001898.globl sha256_block_data_order_neon
David Benjamin4969cc92016-04-22 15:02:23 -04001899.hidden sha256_block_data_order_neon
Adam Langleyd9e397b2015-01-22 14:27:53 -08001900.type sha256_block_data_order_neon,%function
David Benjamin1b249672016-12-06 18:25:50 -05001901.align 5
1902.skip 16
Adam Langleyd9e397b2015-01-22 14:27:53 -08001903sha256_block_data_order_neon:
1904.LNEON:
Adam Langleye9ada862015-05-11 17:20:37 -07001905 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
Adam Langleyd9e397b2015-01-22 14:27:53 -08001906
Adam Langleye9ada862015-05-11 17:20:37 -07001907 sub r11,sp,#16*4+16
David Benjamin1b249672016-12-06 18:25:50 -05001908 adr r14,K256
Adam Langleye9ada862015-05-11 17:20:37 -07001909 bic r11,r11,#15 @ align for 128-bit stores
Adam Langleyd9e397b2015-01-22 14:27:53 -08001910 mov r12,sp
Adam Langleye9ada862015-05-11 17:20:37 -07001911 mov sp,r11 @ alloca
1912 add r2,r1,r2,lsl#6 @ len to point at the end of inp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001913
Adam Langleye9ada862015-05-11 17:20:37 -07001914 vld1.8 {q0},[r1]!
1915 vld1.8 {q1},[r1]!
1916 vld1.8 {q2},[r1]!
1917 vld1.8 {q3},[r1]!
1918 vld1.32 {q8},[r14,:128]!
1919 vld1.32 {q9},[r14,:128]!
1920 vld1.32 {q10},[r14,:128]!
1921 vld1.32 {q11},[r14,:128]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08001922 vrev32.8 q0,q0 @ yes, even on
Adam Langleye9ada862015-05-11 17:20:37 -07001923 str r0,[sp,#64]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001924 vrev32.8 q1,q1 @ big-endian
Adam Langleye9ada862015-05-11 17:20:37 -07001925 str r1,[sp,#68]
1926 mov r1,sp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001927 vrev32.8 q2,q2
Adam Langleye9ada862015-05-11 17:20:37 -07001928 str r2,[sp,#72]
Adam Langleyd9e397b2015-01-22 14:27:53 -08001929 vrev32.8 q3,q3
Adam Langleye9ada862015-05-11 17:20:37 -07001930 str r12,[sp,#76] @ save original sp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001931 vadd.i32 q8,q8,q0
1932 vadd.i32 q9,q9,q1
Adam Langleye9ada862015-05-11 17:20:37 -07001933 vst1.32 {q8},[r1,:128]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08001934 vadd.i32 q10,q10,q2
Adam Langleye9ada862015-05-11 17:20:37 -07001935 vst1.32 {q9},[r1,:128]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08001936 vadd.i32 q11,q11,q3
Adam Langleye9ada862015-05-11 17:20:37 -07001937 vst1.32 {q10},[r1,:128]!
1938 vst1.32 {q11},[r1,:128]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08001939
Adam Langleye9ada862015-05-11 17:20:37 -07001940 ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1941 sub r1,r1,#64
1942 ldr r2,[sp,#0]
1943 eor r12,r12,r12
1944 eor r3,r5,r6
1945 b .L_00_48
Adam Langleyd9e397b2015-01-22 14:27:53 -08001946
1947.align 4
1948.L_00_48:
1949 vext.8 q8,q0,q1,#4
1950 add r11,r11,r2
1951 eor r2,r9,r10
1952 eor r0,r8,r8,ror#5
1953 vext.8 q9,q2,q3,#4
1954 add r4,r4,r12
1955 and r2,r2,r8
1956 eor r12,r0,r8,ror#19
1957 vshr.u32 q10,q8,#7
1958 eor r0,r4,r4,ror#11
1959 eor r2,r2,r10
1960 vadd.i32 q0,q0,q9
1961 add r11,r11,r12,ror#6
1962 eor r12,r4,r5
1963 vshr.u32 q9,q8,#3
1964 eor r0,r0,r4,ror#20
1965 add r11,r11,r2
1966 vsli.32 q10,q8,#25
1967 ldr r2,[sp,#4]
1968 and r3,r3,r12
1969 vshr.u32 q11,q8,#18
1970 add r7,r7,r11
1971 add r11,r11,r0,ror#2
1972 eor r3,r3,r5
1973 veor q9,q9,q10
1974 add r10,r10,r2
1975 vsli.32 q11,q8,#14
1976 eor r2,r8,r9
1977 eor r0,r7,r7,ror#5
1978 vshr.u32 d24,d7,#17
1979 add r11,r11,r3
1980 and r2,r2,r7
1981 veor q9,q9,q11
1982 eor r3,r0,r7,ror#19
1983 eor r0,r11,r11,ror#11
1984 vsli.32 d24,d7,#15
1985 eor r2,r2,r9
1986 add r10,r10,r3,ror#6
1987 vshr.u32 d25,d7,#10
1988 eor r3,r11,r4
1989 eor r0,r0,r11,ror#20
1990 vadd.i32 q0,q0,q9
1991 add r10,r10,r2
1992 ldr r2,[sp,#8]
1993 veor d25,d25,d24
1994 and r12,r12,r3
1995 add r6,r6,r10
1996 vshr.u32 d24,d7,#19
1997 add r10,r10,r0,ror#2
1998 eor r12,r12,r4
1999 vsli.32 d24,d7,#13
2000 add r9,r9,r2
2001 eor r2,r7,r8
2002 veor d25,d25,d24
2003 eor r0,r6,r6,ror#5
2004 add r10,r10,r12
2005 vadd.i32 d0,d0,d25
2006 and r2,r2,r6
2007 eor r12,r0,r6,ror#19
2008 vshr.u32 d24,d0,#17
2009 eor r0,r10,r10,ror#11
2010 eor r2,r2,r8
2011 vsli.32 d24,d0,#15
2012 add r9,r9,r12,ror#6
2013 eor r12,r10,r11
2014 vshr.u32 d25,d0,#10
2015 eor r0,r0,r10,ror#20
2016 add r9,r9,r2
2017 veor d25,d25,d24
2018 ldr r2,[sp,#12]
2019 and r3,r3,r12
2020 vshr.u32 d24,d0,#19
2021 add r5,r5,r9
2022 add r9,r9,r0,ror#2
2023 eor r3,r3,r11
2024 vld1.32 {q8},[r14,:128]!
2025 add r8,r8,r2
2026 vsli.32 d24,d0,#13
2027 eor r2,r6,r7
2028 eor r0,r5,r5,ror#5
2029 veor d25,d25,d24
2030 add r9,r9,r3
2031 and r2,r2,r5
2032 vadd.i32 d1,d1,d25
2033 eor r3,r0,r5,ror#19
2034 eor r0,r9,r9,ror#11
2035 vadd.i32 q8,q8,q0
2036 eor r2,r2,r7
2037 add r8,r8,r3,ror#6
2038 eor r3,r9,r10
2039 eor r0,r0,r9,ror#20
2040 add r8,r8,r2
2041 ldr r2,[sp,#16]
2042 and r12,r12,r3
2043 add r4,r4,r8
2044 vst1.32 {q8},[r1,:128]!
2045 add r8,r8,r0,ror#2
2046 eor r12,r12,r10
2047 vext.8 q8,q1,q2,#4
2048 add r7,r7,r2
2049 eor r2,r5,r6
2050 eor r0,r4,r4,ror#5
2051 vext.8 q9,q3,q0,#4
2052 add r8,r8,r12
2053 and r2,r2,r4
2054 eor r12,r0,r4,ror#19
2055 vshr.u32 q10,q8,#7
2056 eor r0,r8,r8,ror#11
2057 eor r2,r2,r6
2058 vadd.i32 q1,q1,q9
2059 add r7,r7,r12,ror#6
2060 eor r12,r8,r9
2061 vshr.u32 q9,q8,#3
2062 eor r0,r0,r8,ror#20
2063 add r7,r7,r2
2064 vsli.32 q10,q8,#25
2065 ldr r2,[sp,#20]
2066 and r3,r3,r12
2067 vshr.u32 q11,q8,#18
2068 add r11,r11,r7
2069 add r7,r7,r0,ror#2
2070 eor r3,r3,r9
2071 veor q9,q9,q10
2072 add r6,r6,r2
2073 vsli.32 q11,q8,#14
2074 eor r2,r4,r5
2075 eor r0,r11,r11,ror#5
2076 vshr.u32 d24,d1,#17
2077 add r7,r7,r3
2078 and r2,r2,r11
2079 veor q9,q9,q11
2080 eor r3,r0,r11,ror#19
2081 eor r0,r7,r7,ror#11
2082 vsli.32 d24,d1,#15
2083 eor r2,r2,r5
2084 add r6,r6,r3,ror#6
2085 vshr.u32 d25,d1,#10
2086 eor r3,r7,r8
2087 eor r0,r0,r7,ror#20
2088 vadd.i32 q1,q1,q9
2089 add r6,r6,r2
2090 ldr r2,[sp,#24]
2091 veor d25,d25,d24
2092 and r12,r12,r3
2093 add r10,r10,r6
2094 vshr.u32 d24,d1,#19
2095 add r6,r6,r0,ror#2
2096 eor r12,r12,r8
2097 vsli.32 d24,d1,#13
2098 add r5,r5,r2
2099 eor r2,r11,r4
2100 veor d25,d25,d24
2101 eor r0,r10,r10,ror#5
2102 add r6,r6,r12
2103 vadd.i32 d2,d2,d25
2104 and r2,r2,r10
2105 eor r12,r0,r10,ror#19
2106 vshr.u32 d24,d2,#17
2107 eor r0,r6,r6,ror#11
2108 eor r2,r2,r4
2109 vsli.32 d24,d2,#15
2110 add r5,r5,r12,ror#6
2111 eor r12,r6,r7
2112 vshr.u32 d25,d2,#10
2113 eor r0,r0,r6,ror#20
2114 add r5,r5,r2
2115 veor d25,d25,d24
2116 ldr r2,[sp,#28]
2117 and r3,r3,r12
2118 vshr.u32 d24,d2,#19
2119 add r9,r9,r5
2120 add r5,r5,r0,ror#2
2121 eor r3,r3,r7
2122 vld1.32 {q8},[r14,:128]!
2123 add r4,r4,r2
2124 vsli.32 d24,d2,#13
2125 eor r2,r10,r11
2126 eor r0,r9,r9,ror#5
2127 veor d25,d25,d24
2128 add r5,r5,r3
2129 and r2,r2,r9
2130 vadd.i32 d3,d3,d25
2131 eor r3,r0,r9,ror#19
2132 eor r0,r5,r5,ror#11
2133 vadd.i32 q8,q8,q1
2134 eor r2,r2,r11
2135 add r4,r4,r3,ror#6
2136 eor r3,r5,r6
2137 eor r0,r0,r5,ror#20
2138 add r4,r4,r2
2139 ldr r2,[sp,#32]
2140 and r12,r12,r3
2141 add r8,r8,r4
2142 vst1.32 {q8},[r1,:128]!
2143 add r4,r4,r0,ror#2
2144 eor r12,r12,r6
2145 vext.8 q8,q2,q3,#4
2146 add r11,r11,r2
2147 eor r2,r9,r10
2148 eor r0,r8,r8,ror#5
2149 vext.8 q9,q0,q1,#4
2150 add r4,r4,r12
2151 and r2,r2,r8
2152 eor r12,r0,r8,ror#19
2153 vshr.u32 q10,q8,#7
2154 eor r0,r4,r4,ror#11
2155 eor r2,r2,r10
2156 vadd.i32 q2,q2,q9
2157 add r11,r11,r12,ror#6
2158 eor r12,r4,r5
2159 vshr.u32 q9,q8,#3
2160 eor r0,r0,r4,ror#20
2161 add r11,r11,r2
2162 vsli.32 q10,q8,#25
2163 ldr r2,[sp,#36]
2164 and r3,r3,r12
2165 vshr.u32 q11,q8,#18
2166 add r7,r7,r11
2167 add r11,r11,r0,ror#2
2168 eor r3,r3,r5
2169 veor q9,q9,q10
2170 add r10,r10,r2
2171 vsli.32 q11,q8,#14
2172 eor r2,r8,r9
2173 eor r0,r7,r7,ror#5
2174 vshr.u32 d24,d3,#17
2175 add r11,r11,r3
2176 and r2,r2,r7
2177 veor q9,q9,q11
2178 eor r3,r0,r7,ror#19
2179 eor r0,r11,r11,ror#11
2180 vsli.32 d24,d3,#15
2181 eor r2,r2,r9
2182 add r10,r10,r3,ror#6
2183 vshr.u32 d25,d3,#10
2184 eor r3,r11,r4
2185 eor r0,r0,r11,ror#20
2186 vadd.i32 q2,q2,q9
2187 add r10,r10,r2
2188 ldr r2,[sp,#40]
2189 veor d25,d25,d24
2190 and r12,r12,r3
2191 add r6,r6,r10
2192 vshr.u32 d24,d3,#19
2193 add r10,r10,r0,ror#2
2194 eor r12,r12,r4
2195 vsli.32 d24,d3,#13
2196 add r9,r9,r2
2197 eor r2,r7,r8
2198 veor d25,d25,d24
2199 eor r0,r6,r6,ror#5
2200 add r10,r10,r12
2201 vadd.i32 d4,d4,d25
2202 and r2,r2,r6
2203 eor r12,r0,r6,ror#19
2204 vshr.u32 d24,d4,#17
2205 eor r0,r10,r10,ror#11
2206 eor r2,r2,r8
2207 vsli.32 d24,d4,#15
2208 add r9,r9,r12,ror#6
2209 eor r12,r10,r11
2210 vshr.u32 d25,d4,#10
2211 eor r0,r0,r10,ror#20
2212 add r9,r9,r2
2213 veor d25,d25,d24
2214 ldr r2,[sp,#44]
2215 and r3,r3,r12
2216 vshr.u32 d24,d4,#19
2217 add r5,r5,r9
2218 add r9,r9,r0,ror#2
2219 eor r3,r3,r11
2220 vld1.32 {q8},[r14,:128]!
2221 add r8,r8,r2
2222 vsli.32 d24,d4,#13
2223 eor r2,r6,r7
2224 eor r0,r5,r5,ror#5
2225 veor d25,d25,d24
2226 add r9,r9,r3
2227 and r2,r2,r5
2228 vadd.i32 d5,d5,d25
2229 eor r3,r0,r5,ror#19
2230 eor r0,r9,r9,ror#11
2231 vadd.i32 q8,q8,q2
2232 eor r2,r2,r7
2233 add r8,r8,r3,ror#6
2234 eor r3,r9,r10
2235 eor r0,r0,r9,ror#20
2236 add r8,r8,r2
2237 ldr r2,[sp,#48]
2238 and r12,r12,r3
2239 add r4,r4,r8
2240 vst1.32 {q8},[r1,:128]!
2241 add r8,r8,r0,ror#2
2242 eor r12,r12,r10
2243 vext.8 q8,q3,q0,#4
2244 add r7,r7,r2
2245 eor r2,r5,r6
2246 eor r0,r4,r4,ror#5
2247 vext.8 q9,q1,q2,#4
2248 add r8,r8,r12
2249 and r2,r2,r4
2250 eor r12,r0,r4,ror#19
2251 vshr.u32 q10,q8,#7
2252 eor r0,r8,r8,ror#11
2253 eor r2,r2,r6
2254 vadd.i32 q3,q3,q9
2255 add r7,r7,r12,ror#6
2256 eor r12,r8,r9
2257 vshr.u32 q9,q8,#3
2258 eor r0,r0,r8,ror#20
2259 add r7,r7,r2
2260 vsli.32 q10,q8,#25
2261 ldr r2,[sp,#52]
2262 and r3,r3,r12
2263 vshr.u32 q11,q8,#18
2264 add r11,r11,r7
2265 add r7,r7,r0,ror#2
2266 eor r3,r3,r9
2267 veor q9,q9,q10
2268 add r6,r6,r2
2269 vsli.32 q11,q8,#14
2270 eor r2,r4,r5
2271 eor r0,r11,r11,ror#5
2272 vshr.u32 d24,d5,#17
2273 add r7,r7,r3
2274 and r2,r2,r11
2275 veor q9,q9,q11
2276 eor r3,r0,r11,ror#19
2277 eor r0,r7,r7,ror#11
2278 vsli.32 d24,d5,#15
2279 eor r2,r2,r5
2280 add r6,r6,r3,ror#6
2281 vshr.u32 d25,d5,#10
2282 eor r3,r7,r8
2283 eor r0,r0,r7,ror#20
2284 vadd.i32 q3,q3,q9
2285 add r6,r6,r2
2286 ldr r2,[sp,#56]
2287 veor d25,d25,d24
2288 and r12,r12,r3
2289 add r10,r10,r6
2290 vshr.u32 d24,d5,#19
2291 add r6,r6,r0,ror#2
2292 eor r12,r12,r8
2293 vsli.32 d24,d5,#13
2294 add r5,r5,r2
2295 eor r2,r11,r4
2296 veor d25,d25,d24
2297 eor r0,r10,r10,ror#5
2298 add r6,r6,r12
2299 vadd.i32 d6,d6,d25
2300 and r2,r2,r10
2301 eor r12,r0,r10,ror#19
2302 vshr.u32 d24,d6,#17
2303 eor r0,r6,r6,ror#11
2304 eor r2,r2,r4
2305 vsli.32 d24,d6,#15
2306 add r5,r5,r12,ror#6
2307 eor r12,r6,r7
2308 vshr.u32 d25,d6,#10
2309 eor r0,r0,r6,ror#20
2310 add r5,r5,r2
2311 veor d25,d25,d24
2312 ldr r2,[sp,#60]
2313 and r3,r3,r12
2314 vshr.u32 d24,d6,#19
2315 add r9,r9,r5
2316 add r5,r5,r0,ror#2
2317 eor r3,r3,r7
2318 vld1.32 {q8},[r14,:128]!
2319 add r4,r4,r2
2320 vsli.32 d24,d6,#13
2321 eor r2,r10,r11
2322 eor r0,r9,r9,ror#5
2323 veor d25,d25,d24
2324 add r5,r5,r3
2325 and r2,r2,r9
2326 vadd.i32 d7,d7,d25
2327 eor r3,r0,r9,ror#19
2328 eor r0,r5,r5,ror#11
2329 vadd.i32 q8,q8,q3
2330 eor r2,r2,r11
2331 add r4,r4,r3,ror#6
2332 eor r3,r5,r6
2333 eor r0,r0,r5,ror#20
2334 add r4,r4,r2
2335 ldr r2,[r14]
2336 and r12,r12,r3
2337 add r8,r8,r4
2338 vst1.32 {q8},[r1,:128]!
2339 add r4,r4,r0,ror#2
2340 eor r12,r12,r6
2341 teq r2,#0 @ check for K256 terminator
2342 ldr r2,[sp,#0]
2343 sub r1,r1,#64
2344 bne .L_00_48
2345
Adam Langleye9ada862015-05-11 17:20:37 -07002346 ldr r1,[sp,#68]
2347 ldr r0,[sp,#72]
2348 sub r14,r14,#256 @ rewind r14
2349 teq r1,r0
2350 it eq
2351 subeq r1,r1,#64 @ avoid SEGV
2352 vld1.8 {q0},[r1]! @ load next input block
2353 vld1.8 {q1},[r1]!
2354 vld1.8 {q2},[r1]!
2355 vld1.8 {q3},[r1]!
2356 it ne
2357 strne r1,[sp,#68]
2358 mov r1,sp
Adam Langleyd9e397b2015-01-22 14:27:53 -08002359 add r11,r11,r2
2360 eor r2,r9,r10
2361 eor r0,r8,r8,ror#5
2362 add r4,r4,r12
2363 vld1.32 {q8},[r14,:128]!
2364 and r2,r2,r8
2365 eor r12,r0,r8,ror#19
2366 eor r0,r4,r4,ror#11
2367 eor r2,r2,r10
2368 vrev32.8 q0,q0
2369 add r11,r11,r12,ror#6
2370 eor r12,r4,r5
2371 eor r0,r0,r4,ror#20
2372 add r11,r11,r2
2373 vadd.i32 q8,q8,q0
2374 ldr r2,[sp,#4]
2375 and r3,r3,r12
2376 add r7,r7,r11
2377 add r11,r11,r0,ror#2
2378 eor r3,r3,r5
2379 add r10,r10,r2
2380 eor r2,r8,r9
2381 eor r0,r7,r7,ror#5
2382 add r11,r11,r3
2383 and r2,r2,r7
2384 eor r3,r0,r7,ror#19
2385 eor r0,r11,r11,ror#11
2386 eor r2,r2,r9
2387 add r10,r10,r3,ror#6
2388 eor r3,r11,r4
2389 eor r0,r0,r11,ror#20
2390 add r10,r10,r2
2391 ldr r2,[sp,#8]
2392 and r12,r12,r3
2393 add r6,r6,r10
2394 add r10,r10,r0,ror#2
2395 eor r12,r12,r4
2396 add r9,r9,r2
2397 eor r2,r7,r8
2398 eor r0,r6,r6,ror#5
2399 add r10,r10,r12
2400 and r2,r2,r6
2401 eor r12,r0,r6,ror#19
2402 eor r0,r10,r10,ror#11
2403 eor r2,r2,r8
2404 add r9,r9,r12,ror#6
2405 eor r12,r10,r11
2406 eor r0,r0,r10,ror#20
2407 add r9,r9,r2
2408 ldr r2,[sp,#12]
2409 and r3,r3,r12
2410 add r5,r5,r9
2411 add r9,r9,r0,ror#2
2412 eor r3,r3,r11
2413 add r8,r8,r2
2414 eor r2,r6,r7
2415 eor r0,r5,r5,ror#5
2416 add r9,r9,r3
2417 and r2,r2,r5
2418 eor r3,r0,r5,ror#19
2419 eor r0,r9,r9,ror#11
2420 eor r2,r2,r7
2421 add r8,r8,r3,ror#6
2422 eor r3,r9,r10
2423 eor r0,r0,r9,ror#20
2424 add r8,r8,r2
2425 ldr r2,[sp,#16]
2426 and r12,r12,r3
2427 add r4,r4,r8
2428 add r8,r8,r0,ror#2
2429 eor r12,r12,r10
2430 vst1.32 {q8},[r1,:128]!
2431 add r7,r7,r2
2432 eor r2,r5,r6
2433 eor r0,r4,r4,ror#5
2434 add r8,r8,r12
2435 vld1.32 {q8},[r14,:128]!
2436 and r2,r2,r4
2437 eor r12,r0,r4,ror#19
2438 eor r0,r8,r8,ror#11
2439 eor r2,r2,r6
2440 vrev32.8 q1,q1
2441 add r7,r7,r12,ror#6
2442 eor r12,r8,r9
2443 eor r0,r0,r8,ror#20
2444 add r7,r7,r2
2445 vadd.i32 q8,q8,q1
2446 ldr r2,[sp,#20]
2447 and r3,r3,r12
2448 add r11,r11,r7
2449 add r7,r7,r0,ror#2
2450 eor r3,r3,r9
2451 add r6,r6,r2
2452 eor r2,r4,r5
2453 eor r0,r11,r11,ror#5
2454 add r7,r7,r3
2455 and r2,r2,r11
2456 eor r3,r0,r11,ror#19
2457 eor r0,r7,r7,ror#11
2458 eor r2,r2,r5
2459 add r6,r6,r3,ror#6
2460 eor r3,r7,r8
2461 eor r0,r0,r7,ror#20
2462 add r6,r6,r2
2463 ldr r2,[sp,#24]
2464 and r12,r12,r3
2465 add r10,r10,r6
2466 add r6,r6,r0,ror#2
2467 eor r12,r12,r8
2468 add r5,r5,r2
2469 eor r2,r11,r4
2470 eor r0,r10,r10,ror#5
2471 add r6,r6,r12
2472 and r2,r2,r10
2473 eor r12,r0,r10,ror#19
2474 eor r0,r6,r6,ror#11
2475 eor r2,r2,r4
2476 add r5,r5,r12,ror#6
2477 eor r12,r6,r7
2478 eor r0,r0,r6,ror#20
2479 add r5,r5,r2
2480 ldr r2,[sp,#28]
2481 and r3,r3,r12
2482 add r9,r9,r5
2483 add r5,r5,r0,ror#2
2484 eor r3,r3,r7
2485 add r4,r4,r2
2486 eor r2,r10,r11
2487 eor r0,r9,r9,ror#5
2488 add r5,r5,r3
2489 and r2,r2,r9
2490 eor r3,r0,r9,ror#19
2491 eor r0,r5,r5,ror#11
2492 eor r2,r2,r11
2493 add r4,r4,r3,ror#6
2494 eor r3,r5,r6
2495 eor r0,r0,r5,ror#20
2496 add r4,r4,r2
2497 ldr r2,[sp,#32]
2498 and r12,r12,r3
2499 add r8,r8,r4
2500 add r4,r4,r0,ror#2
2501 eor r12,r12,r6
2502 vst1.32 {q8},[r1,:128]!
2503 add r11,r11,r2
2504 eor r2,r9,r10
2505 eor r0,r8,r8,ror#5
2506 add r4,r4,r12
2507 vld1.32 {q8},[r14,:128]!
2508 and r2,r2,r8
2509 eor r12,r0,r8,ror#19
2510 eor r0,r4,r4,ror#11
2511 eor r2,r2,r10
2512 vrev32.8 q2,q2
2513 add r11,r11,r12,ror#6
2514 eor r12,r4,r5
2515 eor r0,r0,r4,ror#20
2516 add r11,r11,r2
2517 vadd.i32 q8,q8,q2
2518 ldr r2,[sp,#36]
2519 and r3,r3,r12
2520 add r7,r7,r11
2521 add r11,r11,r0,ror#2
2522 eor r3,r3,r5
2523 add r10,r10,r2
2524 eor r2,r8,r9
2525 eor r0,r7,r7,ror#5
2526 add r11,r11,r3
2527 and r2,r2,r7
2528 eor r3,r0,r7,ror#19
2529 eor r0,r11,r11,ror#11
2530 eor r2,r2,r9
2531 add r10,r10,r3,ror#6
2532 eor r3,r11,r4
2533 eor r0,r0,r11,ror#20
2534 add r10,r10,r2
2535 ldr r2,[sp,#40]
2536 and r12,r12,r3
2537 add r6,r6,r10
2538 add r10,r10,r0,ror#2
2539 eor r12,r12,r4
2540 add r9,r9,r2
2541 eor r2,r7,r8
2542 eor r0,r6,r6,ror#5
2543 add r10,r10,r12
2544 and r2,r2,r6
2545 eor r12,r0,r6,ror#19
2546 eor r0,r10,r10,ror#11
2547 eor r2,r2,r8
2548 add r9,r9,r12,ror#6
2549 eor r12,r10,r11
2550 eor r0,r0,r10,ror#20
2551 add r9,r9,r2
2552 ldr r2,[sp,#44]
2553 and r3,r3,r12
2554 add r5,r5,r9
2555 add r9,r9,r0,ror#2
2556 eor r3,r3,r11
2557 add r8,r8,r2
2558 eor r2,r6,r7
2559 eor r0,r5,r5,ror#5
2560 add r9,r9,r3
2561 and r2,r2,r5
2562 eor r3,r0,r5,ror#19
2563 eor r0,r9,r9,ror#11
2564 eor r2,r2,r7
2565 add r8,r8,r3,ror#6
2566 eor r3,r9,r10
2567 eor r0,r0,r9,ror#20
2568 add r8,r8,r2
2569 ldr r2,[sp,#48]
2570 and r12,r12,r3
2571 add r4,r4,r8
2572 add r8,r8,r0,ror#2
2573 eor r12,r12,r10
2574 vst1.32 {q8},[r1,:128]!
2575 add r7,r7,r2
2576 eor r2,r5,r6
2577 eor r0,r4,r4,ror#5
2578 add r8,r8,r12
2579 vld1.32 {q8},[r14,:128]!
2580 and r2,r2,r4
2581 eor r12,r0,r4,ror#19
2582 eor r0,r8,r8,ror#11
2583 eor r2,r2,r6
2584 vrev32.8 q3,q3
2585 add r7,r7,r12,ror#6
2586 eor r12,r8,r9
2587 eor r0,r0,r8,ror#20
2588 add r7,r7,r2
2589 vadd.i32 q8,q8,q3
2590 ldr r2,[sp,#52]
2591 and r3,r3,r12
2592 add r11,r11,r7
2593 add r7,r7,r0,ror#2
2594 eor r3,r3,r9
2595 add r6,r6,r2
2596 eor r2,r4,r5
2597 eor r0,r11,r11,ror#5
2598 add r7,r7,r3
2599 and r2,r2,r11
2600 eor r3,r0,r11,ror#19
2601 eor r0,r7,r7,ror#11
2602 eor r2,r2,r5
2603 add r6,r6,r3,ror#6
2604 eor r3,r7,r8
2605 eor r0,r0,r7,ror#20
2606 add r6,r6,r2
2607 ldr r2,[sp,#56]
2608 and r12,r12,r3
2609 add r10,r10,r6
2610 add r6,r6,r0,ror#2
2611 eor r12,r12,r8
2612 add r5,r5,r2
2613 eor r2,r11,r4
2614 eor r0,r10,r10,ror#5
2615 add r6,r6,r12
2616 and r2,r2,r10
2617 eor r12,r0,r10,ror#19
2618 eor r0,r6,r6,ror#11
2619 eor r2,r2,r4
2620 add r5,r5,r12,ror#6
2621 eor r12,r6,r7
2622 eor r0,r0,r6,ror#20
2623 add r5,r5,r2
2624 ldr r2,[sp,#60]
2625 and r3,r3,r12
2626 add r9,r9,r5
2627 add r5,r5,r0,ror#2
2628 eor r3,r3,r7
2629 add r4,r4,r2
2630 eor r2,r10,r11
2631 eor r0,r9,r9,ror#5
2632 add r5,r5,r3
2633 and r2,r2,r9
2634 eor r3,r0,r9,ror#19
2635 eor r0,r5,r5,ror#11
2636 eor r2,r2,r11
2637 add r4,r4,r3,ror#6
2638 eor r3,r5,r6
2639 eor r0,r0,r5,ror#20
2640 add r4,r4,r2
2641 ldr r2,[sp,#64]
2642 and r12,r12,r3
2643 add r8,r8,r4
2644 add r4,r4,r0,ror#2
2645 eor r12,r12,r6
2646 vst1.32 {q8},[r1,:128]!
2647 ldr r0,[r2,#0]
2648 add r4,r4,r12 @ h+=Maj(a,b,c) from the past
2649 ldr r12,[r2,#4]
2650 ldr r3,[r2,#8]
2651 ldr r1,[r2,#12]
2652 add r4,r4,r0 @ accumulate
2653 ldr r0,[r2,#16]
2654 add r5,r5,r12
2655 ldr r12,[r2,#20]
2656 add r6,r6,r3
2657 ldr r3,[r2,#24]
2658 add r7,r7,r1
2659 ldr r1,[r2,#28]
2660 add r8,r8,r0
2661 str r4,[r2],#4
2662 add r9,r9,r12
2663 str r5,[r2],#4
2664 add r10,r10,r3
2665 str r6,[r2],#4
2666 add r11,r11,r1
2667 str r7,[r2],#4
Adam Langleye9ada862015-05-11 17:20:37 -07002668 stmia r2,{r8,r9,r10,r11}
Adam Langleyd9e397b2015-01-22 14:27:53 -08002669
Adam Langleye9ada862015-05-11 17:20:37 -07002670 ittte ne
Adam Langleyd9e397b2015-01-22 14:27:53 -08002671 movne r1,sp
2672 ldrne r2,[sp,#0]
2673 eorne r12,r12,r12
2674 ldreq sp,[sp,#76] @ restore original sp
Adam Langleye9ada862015-05-11 17:20:37 -07002675 itt ne
Adam Langleyd9e397b2015-01-22 14:27:53 -08002676 eorne r3,r5,r6
2677 bne .L_00_48
2678
Adam Langleye9ada862015-05-11 17:20:37 -07002679 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
Adam Langleyd9e397b2015-01-22 14:27:53 -08002680.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
2681#endif
Adam Langleye9ada862015-05-11 17:20:37 -07002682#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2683
David Benjamin1b249672016-12-06 18:25:50 -05002684# if defined(__thumb2__)
Adam Langleye9ada862015-05-11 17:20:37 -07002685# define INST(a,b,c,d) .byte c,d|0xc,a,b
2686# else
2687# define INST(a,b,c,d) .byte a,b,c,d
2688# endif
2689
Adam Langleyd9e397b2015-01-22 14:27:53 -08002690.type sha256_block_data_order_armv8,%function
2691.align 5
2692sha256_block_data_order_armv8:
2693.LARMv8:
2694 vld1.32 {q0,q1},[r0]
Adam Langleye9ada862015-05-11 17:20:37 -07002695 sub r3,r3,#256+32
Adam Langleye9ada862015-05-11 17:20:37 -07002696 add r2,r1,r2,lsl#6 @ len to point at the end of inp
David Benjamin1b249672016-12-06 18:25:50 -05002697 b .Loop_v8
Adam Langleyd9e397b2015-01-22 14:27:53 -08002698
David Benjamin1b249672016-12-06 18:25:50 -05002699.align 4
Adam Langleyd9e397b2015-01-22 14:27:53 -08002700.Loop_v8:
Adam Langleye9ada862015-05-11 17:20:37 -07002701 vld1.8 {q8,q9},[r1]!
2702 vld1.8 {q10,q11},[r1]!
2703 vld1.32 {q12},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002704 vrev32.8 q8,q8
2705 vrev32.8 q9,q9
2706 vrev32.8 q10,q10
2707 vrev32.8 q11,q11
Adam Langleye9ada862015-05-11 17:20:37 -07002708 vmov q14,q0 @ offload
2709 vmov q15,q1
2710 teq r1,r2
2711 vld1.32 {q13},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002712 vadd.i32 q12,q12,q8
Adam Langleye9ada862015-05-11 17:20:37 -07002713 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2714 vmov q2,q0
2715 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2716 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2717 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2718 vld1.32 {q12},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002719 vadd.i32 q13,q13,q9
Adam Langleye9ada862015-05-11 17:20:37 -07002720 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2721 vmov q2,q0
2722 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2723 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2724 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2725 vld1.32 {q13},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002726 vadd.i32 q12,q12,q10
Adam Langleye9ada862015-05-11 17:20:37 -07002727 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2728 vmov q2,q0
2729 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2730 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2731 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2732 vld1.32 {q12},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002733 vadd.i32 q13,q13,q11
Adam Langleye9ada862015-05-11 17:20:37 -07002734 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2735 vmov q2,q0
2736 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2737 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2738 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2739 vld1.32 {q13},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002740 vadd.i32 q12,q12,q8
Adam Langleye9ada862015-05-11 17:20:37 -07002741 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2742 vmov q2,q0
2743 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2744 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2745 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2746 vld1.32 {q12},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002747 vadd.i32 q13,q13,q9
Adam Langleye9ada862015-05-11 17:20:37 -07002748 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2749 vmov q2,q0
2750 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2751 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2752 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2753 vld1.32 {q13},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002754 vadd.i32 q12,q12,q10
Adam Langleye9ada862015-05-11 17:20:37 -07002755 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2756 vmov q2,q0
2757 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2758 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2759 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2760 vld1.32 {q12},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002761 vadd.i32 q13,q13,q11
Adam Langleye9ada862015-05-11 17:20:37 -07002762 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2763 vmov q2,q0
2764 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2765 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2766 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2767 vld1.32 {q13},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002768 vadd.i32 q12,q12,q8
Adam Langleye9ada862015-05-11 17:20:37 -07002769 INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
2770 vmov q2,q0
2771 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2772 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2773 INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
2774 vld1.32 {q12},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002775 vadd.i32 q13,q13,q9
Adam Langleye9ada862015-05-11 17:20:37 -07002776 INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
2777 vmov q2,q0
2778 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2779 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2780 INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
2781 vld1.32 {q13},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002782 vadd.i32 q12,q12,q10
Adam Langleye9ada862015-05-11 17:20:37 -07002783 INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
2784 vmov q2,q0
2785 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2786 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
2787 INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
2788 vld1.32 {q12},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002789 vadd.i32 q13,q13,q11
Adam Langleye9ada862015-05-11 17:20:37 -07002790 INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
2791 vmov q2,q0
2792 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2793 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
2794 INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
2795 vld1.32 {q13},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002796 vadd.i32 q12,q12,q8
Adam Langleye9ada862015-05-11 17:20:37 -07002797 vmov q2,q0
2798 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2799 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08002800
Adam Langleye9ada862015-05-11 17:20:37 -07002801 vld1.32 {q12},[r3]!
Adam Langleyd9e397b2015-01-22 14:27:53 -08002802 vadd.i32 q13,q13,q9
Adam Langleye9ada862015-05-11 17:20:37 -07002803 vmov q2,q0
2804 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2805 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
Adam Langleyd9e397b2015-01-22 14:27:53 -08002806
Adam Langleye9ada862015-05-11 17:20:37 -07002807 vld1.32 {q13},[r3]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002808 vadd.i32 q12,q12,q10
Adam Langleye9ada862015-05-11 17:20:37 -07002809 sub r3,r3,#256-16 @ rewind
2810 vmov q2,q0
2811 INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
2812 INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08002813
2814 vadd.i32 q13,q13,q11
Adam Langleye9ada862015-05-11 17:20:37 -07002815 vmov q2,q0
2816 INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
2817 INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
Adam Langleyd9e397b2015-01-22 14:27:53 -08002818
2819 vadd.i32 q0,q0,q14
2820 vadd.i32 q1,q1,q15
Adam Langleye9ada862015-05-11 17:20:37 -07002821 it ne
2822 bne .Loop_v8
Adam Langleyd9e397b2015-01-22 14:27:53 -08002823
Adam Langleye9ada862015-05-11 17:20:37 -07002824 vst1.32 {q0,q1},[r0]
Adam Langleyd9e397b2015-01-22 14:27:53 -08002825
2826 bx lr @ bx lr
2827.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2828#endif
Adam Langleye9ada862015-05-11 17:20:37 -07002829.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
Adam Langleyd9e397b2015-01-22 14:27:53 -08002830.align 2
Adam Langleye9ada862015-05-11 17:20:37 -07002831.align 2
2832#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2833.comm OPENSSL_armcap_P,4,4
2834.hidden OPENSSL_armcap_P
Adam Langleyd9e397b2015-01-22 14:27:53 -08002835#endif
David Benjamin4969cc92016-04-22 15:02:23 -04002836#endif
Robert Sloan726e9d12018-09-11 11:45:04 -07002837#endif // !OPENSSL_NO_ASM