blob: bbeddf92200cafdcc23c96920637d9b9f884419c [file] [log] [blame]
Kenny Rootb8494592015-09-25 02:29:14 +00001#if defined(__arm__)
Robert Sloan8ff03552017-06-14 12:40:58 -07002@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
3@
4@ Licensed under the OpenSSL license (the "License"). You may not use
5@ this file except in compliance with the License. You can obtain a copy
6@ in the file LICENSE in the source distribution or at
7@ https://www.openssl.org/source/license.html
8
Adam Langleye9ada862015-05-11 17:20:37 -07009
10@ ====================================================================
11@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12@ project. The module is, however, dual licensed under OpenSSL and
13@ CRYPTOGAMS licenses depending on where you obtain it. For further
14@ details see http://www.openssl.org/~appro/cryptogams/.
15@
16@ Permission to use under GPL terms is granted.
17@ ====================================================================
18
19@ SHA512 block procedure for ARMv4. September 2007.
20
21@ This code is ~4.5 (four and a half) times faster than code generated
22@ by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
23@ Xscale PXA250 core].
24@
25@ July 2010.
26@
27@ Rescheduling for dual-issue pipeline resulted in 6% improvement on
28@ Cortex A8 core and ~40 cycles per processed byte.
29
30@ February 2011.
31@
32@ Profiler-assisted and platform-specific optimization resulted in 7%
33@ improvement on Coxtex A8 core and ~38 cycles per byte.
34
35@ March 2011.
36@
37@ Add NEON implementation. On Cortex A8 it was measured to process
38@ one byte in 23.3 cycles or ~60% faster than integer-only code.
39
40@ August 2012.
41@
42@ Improve NEON performance by 12% on Snapdragon S4. In absolute
43@ terms it's 22.6 cycles per byte, which is disappointing result.
44@ Technical writers asserted that 3-way S4 pipeline can sustain
45@ multiple NEON instructions per cycle, but dual NEON issue could
46@ not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html
47@ for further details. On side note Cortex-A15 processes one byte in
48@ 16 cycles.
49
50@ Byte order [in]dependence. =========================================
51@
52@ Originally caller was expected to maintain specific *dword* order in
53@ h[0-7], namely with most significant dword at *lower* address, which
54@ was reflected in below two parameters as 0 and 4. Now caller is
55@ expected to maintain native byte order for whole 64-bit values.
56#ifndef __KERNEL__
Kenny Rootb8494592015-09-25 02:29:14 +000057# include <openssl/arm_arch.h>
Adam Langleye9ada862015-05-11 17:20:37 -070058# define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
59# define VFP_ABI_POP vldmia sp!,{d8-d15}
60#else
61# define __ARM_ARCH__ __LINUX_ARM_ARCH__
62# define __ARM_MAX_ARCH__ 7
63# define VFP_ABI_PUSH
64# define VFP_ABI_POP
65#endif
66
Adam Langleyd9e397b2015-01-22 14:27:53 -080067#ifdef __ARMEL__
68# define LO 0
69# define HI 4
70# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1
71#else
72# define HI 0
73# define LO 4
74# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1
75#endif
76
77.text
Robert Sloan8ff03552017-06-14 12:40:58 -070078#if defined(__thumb2__)
Adam Langleye9ada862015-05-11 17:20:37 -070079.syntax unified
Adam Langleye9ada862015-05-11 17:20:37 -070080.thumb
Robert Sloan8ff03552017-06-14 12:40:58 -070081# define adrl adr
82#else
Adam Langleye9ada862015-05-11 17:20:37 -070083.code 32
Adam Langleye9ada862015-05-11 17:20:37 -070084#endif
85
Adam Langleyd9e397b2015-01-22 14:27:53 -080086.type K512,%object
87.align 5
88K512:
Adam Langleye9ada862015-05-11 17:20:37 -070089 WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
90 WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
91 WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
92 WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
93 WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
94 WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
95 WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
96 WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
97 WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
98 WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
99 WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
100 WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
101 WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
102 WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
103 WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
104 WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
105 WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
106 WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
107 WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
108 WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
109 WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
110 WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
111 WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
112 WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
113 WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
114 WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
115 WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
116 WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
117 WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
118 WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
119 WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
120 WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
121 WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
122 WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
123 WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
124 WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
125 WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
126 WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
127 WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
128 WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800129.size K512,.-K512
Adam Langleye9ada862015-05-11 17:20:37 -0700130#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800131.LOPENSSL_armcap:
Adam Langleye9ada862015-05-11 17:20:37 -0700132.word OPENSSL_armcap_P-.Lsha512_block_data_order
Adam Langleyd9e397b2015-01-22 14:27:53 -0800133.skip 32-4
134#else
135.skip 32
136#endif
137
Adam Langleye9ada862015-05-11 17:20:37 -0700138.globl sha512_block_data_order
David Benjamin4969cc92016-04-22 15:02:23 -0400139.hidden sha512_block_data_order
Adam Langleyd9e397b2015-01-22 14:27:53 -0800140.type sha512_block_data_order,%function
141sha512_block_data_order:
Adam Langleye9ada862015-05-11 17:20:37 -0700142.Lsha512_block_data_order:
Robert Sloan8ff03552017-06-14 12:40:58 -0700143#if __ARM_ARCH__<7 && !defined(__thumb2__)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800144 sub r3,pc,#8 @ sha512_block_data_order
Adam Langleye9ada862015-05-11 17:20:37 -0700145#else
Robert Sloan8ff03552017-06-14 12:40:58 -0700146 adr r3,.Lsha512_block_data_order
Adam Langleye9ada862015-05-11 17:20:37 -0700147#endif
148#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800149 ldr r12,.LOPENSSL_armcap
150 ldr r12,[r3,r12] @ OPENSSL_armcap_P
Adam Langleye9ada862015-05-11 17:20:37 -0700151#ifdef __APPLE__
152 ldr r12,[r12]
153#endif
David Benjamin4969cc92016-04-22 15:02:23 -0400154 tst r12,#ARMV7_NEON
Adam Langleyd9e397b2015-01-22 14:27:53 -0800155 bne .LNEON
156#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700157 add r2,r1,r2,lsl#7 @ len to point at the end of inp
158 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
Adam Langleyd9e397b2015-01-22 14:27:53 -0800159 sub r14,r3,#672 @ K512
160 sub sp,sp,#9*8
161
162 ldr r7,[r0,#32+LO]
163 ldr r8,[r0,#32+HI]
164 ldr r9, [r0,#48+LO]
165 ldr r10, [r0,#48+HI]
166 ldr r11, [r0,#56+LO]
167 ldr r12, [r0,#56+HI]
168.Loop:
169 str r9, [sp,#48+0]
170 str r10, [sp,#48+4]
171 str r11, [sp,#56+0]
172 str r12, [sp,#56+4]
173 ldr r5,[r0,#0+LO]
174 ldr r6,[r0,#0+HI]
175 ldr r3,[r0,#8+LO]
176 ldr r4,[r0,#8+HI]
177 ldr r9, [r0,#16+LO]
178 ldr r10, [r0,#16+HI]
179 ldr r11, [r0,#24+LO]
180 ldr r12, [r0,#24+HI]
181 str r3,[sp,#8+0]
182 str r4,[sp,#8+4]
183 str r9, [sp,#16+0]
184 str r10, [sp,#16+4]
185 str r11, [sp,#24+0]
186 str r12, [sp,#24+4]
187 ldr r3,[r0,#40+LO]
188 ldr r4,[r0,#40+HI]
189 str r3,[sp,#40+0]
190 str r4,[sp,#40+4]
191
192.L00_15:
193#if __ARM_ARCH__<7
194 ldrb r3,[r1,#7]
195 ldrb r9, [r1,#6]
196 ldrb r10, [r1,#5]
197 ldrb r11, [r1,#4]
198 ldrb r4,[r1,#3]
199 ldrb r12, [r1,#2]
200 orr r3,r3,r9,lsl#8
201 ldrb r9, [r1,#1]
202 orr r3,r3,r10,lsl#16
203 ldrb r10, [r1],#8
204 orr r3,r3,r11,lsl#24
205 orr r4,r4,r12,lsl#8
206 orr r4,r4,r9,lsl#16
207 orr r4,r4,r10,lsl#24
208#else
209 ldr r3,[r1,#4]
210 ldr r4,[r1],#8
211#ifdef __ARMEL__
212 rev r3,r3
213 rev r4,r4
214#endif
215#endif
216 @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
217 @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
218 @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
219 mov r9,r7,lsr#14
220 str r3,[sp,#64+0]
221 mov r10,r8,lsr#14
222 str r4,[sp,#64+4]
223 eor r9,r9,r8,lsl#18
224 ldr r11,[sp,#56+0] @ h.lo
225 eor r10,r10,r7,lsl#18
226 ldr r12,[sp,#56+4] @ h.hi
227 eor r9,r9,r7,lsr#18
228 eor r10,r10,r8,lsr#18
229 eor r9,r9,r8,lsl#14
230 eor r10,r10,r7,lsl#14
231 eor r9,r9,r8,lsr#9
232 eor r10,r10,r7,lsr#9
233 eor r9,r9,r7,lsl#23
234 eor r10,r10,r8,lsl#23 @ Sigma1(e)
235 adds r3,r3,r9
236 ldr r9,[sp,#40+0] @ f.lo
237 adc r4,r4,r10 @ T += Sigma1(e)
238 ldr r10,[sp,#40+4] @ f.hi
239 adds r3,r3,r11
240 ldr r11,[sp,#48+0] @ g.lo
241 adc r4,r4,r12 @ T += h
242 ldr r12,[sp,#48+4] @ g.hi
243
244 eor r9,r9,r11
245 str r7,[sp,#32+0]
246 eor r10,r10,r12
247 str r8,[sp,#32+4]
248 and r9,r9,r7
249 str r5,[sp,#0+0]
250 and r10,r10,r8
251 str r6,[sp,#0+4]
252 eor r9,r9,r11
253 ldr r11,[r14,#LO] @ K[i].lo
254 eor r10,r10,r12 @ Ch(e,f,g)
255 ldr r12,[r14,#HI] @ K[i].hi
256
257 adds r3,r3,r9
258 ldr r7,[sp,#24+0] @ d.lo
259 adc r4,r4,r10 @ T += Ch(e,f,g)
260 ldr r8,[sp,#24+4] @ d.hi
261 adds r3,r3,r11
262 and r9,r11,#0xff
263 adc r4,r4,r12 @ T += K[i]
264 adds r7,r7,r3
265 ldr r11,[sp,#8+0] @ b.lo
266 adc r8,r8,r4 @ d += T
267 teq r9,#148
268
269 ldr r12,[sp,#16+0] @ c.lo
Adam Langleye9ada862015-05-11 17:20:37 -0700270#if __ARM_ARCH__>=7
271 it eq @ Thumb2 thing, sanity check in ARM
272#endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800273 orreq r14,r14,#1
274 @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
275 @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
276 @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
277 mov r9,r5,lsr#28
278 mov r10,r6,lsr#28
279 eor r9,r9,r6,lsl#4
280 eor r10,r10,r5,lsl#4
281 eor r9,r9,r6,lsr#2
282 eor r10,r10,r5,lsr#2
283 eor r9,r9,r5,lsl#30
284 eor r10,r10,r6,lsl#30
285 eor r9,r9,r6,lsr#7
286 eor r10,r10,r5,lsr#7
287 eor r9,r9,r5,lsl#25
288 eor r10,r10,r6,lsl#25 @ Sigma0(a)
289 adds r3,r3,r9
290 and r9,r5,r11
291 adc r4,r4,r10 @ T += Sigma0(a)
292
293 ldr r10,[sp,#8+4] @ b.hi
294 orr r5,r5,r11
295 ldr r11,[sp,#16+4] @ c.hi
296 and r5,r5,r12
297 and r12,r6,r10
298 orr r6,r6,r10
299 orr r5,r5,r9 @ Maj(a,b,c).lo
300 and r6,r6,r11
301 adds r5,r5,r3
302 orr r6,r6,r12 @ Maj(a,b,c).hi
303 sub sp,sp,#8
304 adc r6,r6,r4 @ h += T
305 tst r14,#1
306 add r14,r14,#8
307 tst r14,#1
308 beq .L00_15
309 ldr r9,[sp,#184+0]
310 ldr r10,[sp,#184+4]
311 bic r14,r14,#1
312.L16_79:
313 @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
314 @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
315 @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
316 mov r3,r9,lsr#1
317 ldr r11,[sp,#80+0]
318 mov r4,r10,lsr#1
319 ldr r12,[sp,#80+4]
320 eor r3,r3,r10,lsl#31
321 eor r4,r4,r9,lsl#31
322 eor r3,r3,r9,lsr#8
323 eor r4,r4,r10,lsr#8
324 eor r3,r3,r10,lsl#24
325 eor r4,r4,r9,lsl#24
326 eor r3,r3,r9,lsr#7
327 eor r4,r4,r10,lsr#7
328 eor r3,r3,r10,lsl#25
329
330 @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
331 @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
332 @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
333 mov r9,r11,lsr#19
334 mov r10,r12,lsr#19
335 eor r9,r9,r12,lsl#13
336 eor r10,r10,r11,lsl#13
337 eor r9,r9,r12,lsr#29
338 eor r10,r10,r11,lsr#29
339 eor r9,r9,r11,lsl#3
340 eor r10,r10,r12,lsl#3
341 eor r9,r9,r11,lsr#6
342 eor r10,r10,r12,lsr#6
343 ldr r11,[sp,#120+0]
344 eor r9,r9,r12,lsl#26
345
346 ldr r12,[sp,#120+4]
347 adds r3,r3,r9
348 ldr r9,[sp,#192+0]
349 adc r4,r4,r10
350
351 ldr r10,[sp,#192+4]
352 adds r3,r3,r11
353 adc r4,r4,r12
354 adds r3,r3,r9
355 adc r4,r4,r10
356 @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
357 @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
358 @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
359 mov r9,r7,lsr#14
360 str r3,[sp,#64+0]
361 mov r10,r8,lsr#14
362 str r4,[sp,#64+4]
363 eor r9,r9,r8,lsl#18
364 ldr r11,[sp,#56+0] @ h.lo
365 eor r10,r10,r7,lsl#18
366 ldr r12,[sp,#56+4] @ h.hi
367 eor r9,r9,r7,lsr#18
368 eor r10,r10,r8,lsr#18
369 eor r9,r9,r8,lsl#14
370 eor r10,r10,r7,lsl#14
371 eor r9,r9,r8,lsr#9
372 eor r10,r10,r7,lsr#9
373 eor r9,r9,r7,lsl#23
374 eor r10,r10,r8,lsl#23 @ Sigma1(e)
375 adds r3,r3,r9
376 ldr r9,[sp,#40+0] @ f.lo
377 adc r4,r4,r10 @ T += Sigma1(e)
378 ldr r10,[sp,#40+4] @ f.hi
379 adds r3,r3,r11
380 ldr r11,[sp,#48+0] @ g.lo
381 adc r4,r4,r12 @ T += h
382 ldr r12,[sp,#48+4] @ g.hi
383
384 eor r9,r9,r11
385 str r7,[sp,#32+0]
386 eor r10,r10,r12
387 str r8,[sp,#32+4]
388 and r9,r9,r7
389 str r5,[sp,#0+0]
390 and r10,r10,r8
391 str r6,[sp,#0+4]
392 eor r9,r9,r11
393 ldr r11,[r14,#LO] @ K[i].lo
394 eor r10,r10,r12 @ Ch(e,f,g)
395 ldr r12,[r14,#HI] @ K[i].hi
396
397 adds r3,r3,r9
398 ldr r7,[sp,#24+0] @ d.lo
399 adc r4,r4,r10 @ T += Ch(e,f,g)
400 ldr r8,[sp,#24+4] @ d.hi
401 adds r3,r3,r11
402 and r9,r11,#0xff
403 adc r4,r4,r12 @ T += K[i]
404 adds r7,r7,r3
405 ldr r11,[sp,#8+0] @ b.lo
406 adc r8,r8,r4 @ d += T
407 teq r9,#23
408
409 ldr r12,[sp,#16+0] @ c.lo
Adam Langleye9ada862015-05-11 17:20:37 -0700410#if __ARM_ARCH__>=7
411 it eq @ Thumb2 thing, sanity check in ARM
412#endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800413 orreq r14,r14,#1
414 @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
415 @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
416 @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
417 mov r9,r5,lsr#28
418 mov r10,r6,lsr#28
419 eor r9,r9,r6,lsl#4
420 eor r10,r10,r5,lsl#4
421 eor r9,r9,r6,lsr#2
422 eor r10,r10,r5,lsr#2
423 eor r9,r9,r5,lsl#30
424 eor r10,r10,r6,lsl#30
425 eor r9,r9,r6,lsr#7
426 eor r10,r10,r5,lsr#7
427 eor r9,r9,r5,lsl#25
428 eor r10,r10,r6,lsl#25 @ Sigma0(a)
429 adds r3,r3,r9
430 and r9,r5,r11
431 adc r4,r4,r10 @ T += Sigma0(a)
432
433 ldr r10,[sp,#8+4] @ b.hi
434 orr r5,r5,r11
435 ldr r11,[sp,#16+4] @ c.hi
436 and r5,r5,r12
437 and r12,r6,r10
438 orr r6,r6,r10
439 orr r5,r5,r9 @ Maj(a,b,c).lo
440 and r6,r6,r11
441 adds r5,r5,r3
442 orr r6,r6,r12 @ Maj(a,b,c).hi
443 sub sp,sp,#8
444 adc r6,r6,r4 @ h += T
445 tst r14,#1
446 add r14,r14,#8
Adam Langleye9ada862015-05-11 17:20:37 -0700447#if __ARM_ARCH__>=7
448 ittt eq @ Thumb2 thing, sanity check in ARM
449#endif
Adam Langleyd9e397b2015-01-22 14:27:53 -0800450 ldreq r9,[sp,#184+0]
451 ldreq r10,[sp,#184+4]
452 beq .L16_79
453 bic r14,r14,#1
454
455 ldr r3,[sp,#8+0]
456 ldr r4,[sp,#8+4]
457 ldr r9, [r0,#0+LO]
458 ldr r10, [r0,#0+HI]
459 ldr r11, [r0,#8+LO]
460 ldr r12, [r0,#8+HI]
461 adds r9,r5,r9
462 str r9, [r0,#0+LO]
463 adc r10,r6,r10
464 str r10, [r0,#0+HI]
465 adds r11,r3,r11
466 str r11, [r0,#8+LO]
467 adc r12,r4,r12
468 str r12, [r0,#8+HI]
469
470 ldr r5,[sp,#16+0]
471 ldr r6,[sp,#16+4]
472 ldr r3,[sp,#24+0]
473 ldr r4,[sp,#24+4]
474 ldr r9, [r0,#16+LO]
475 ldr r10, [r0,#16+HI]
476 ldr r11, [r0,#24+LO]
477 ldr r12, [r0,#24+HI]
478 adds r9,r5,r9
479 str r9, [r0,#16+LO]
480 adc r10,r6,r10
481 str r10, [r0,#16+HI]
482 adds r11,r3,r11
483 str r11, [r0,#24+LO]
484 adc r12,r4,r12
485 str r12, [r0,#24+HI]
486
487 ldr r3,[sp,#40+0]
488 ldr r4,[sp,#40+4]
489 ldr r9, [r0,#32+LO]
490 ldr r10, [r0,#32+HI]
491 ldr r11, [r0,#40+LO]
492 ldr r12, [r0,#40+HI]
493 adds r7,r7,r9
494 str r7,[r0,#32+LO]
495 adc r8,r8,r10
496 str r8,[r0,#32+HI]
497 adds r11,r3,r11
498 str r11, [r0,#40+LO]
499 adc r12,r4,r12
500 str r12, [r0,#40+HI]
501
502 ldr r5,[sp,#48+0]
503 ldr r6,[sp,#48+4]
504 ldr r3,[sp,#56+0]
505 ldr r4,[sp,#56+4]
506 ldr r9, [r0,#48+LO]
507 ldr r10, [r0,#48+HI]
508 ldr r11, [r0,#56+LO]
509 ldr r12, [r0,#56+HI]
510 adds r9,r5,r9
511 str r9, [r0,#48+LO]
512 adc r10,r6,r10
513 str r10, [r0,#48+HI]
514 adds r11,r3,r11
515 str r11, [r0,#56+LO]
516 adc r12,r4,r12
517 str r12, [r0,#56+HI]
518
519 add sp,sp,#640
520 sub r14,r14,#640
521
522 teq r1,r2
523 bne .Loop
524
525 add sp,sp,#8*9 @ destroy frame
526#if __ARM_ARCH__>=5
Adam Langleye9ada862015-05-11 17:20:37 -0700527 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
Adam Langleyd9e397b2015-01-22 14:27:53 -0800528#else
Adam Langleye9ada862015-05-11 17:20:37 -0700529 ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
Adam Langleyd9e397b2015-01-22 14:27:53 -0800530 tst lr,#1
531 moveq pc,lr @ be binary compatible with V4, yet
Adam Langleye9ada862015-05-11 17:20:37 -0700532.word 0xe12fff1e @ interoperable with Thumb ISA:-)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800533#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700534.size sha512_block_data_order,.-sha512_block_data_order
Adam Langleyd9e397b2015-01-22 14:27:53 -0800535#if __ARM_MAX_ARCH__>=7
536.arch armv7-a
537.fpu neon
538
Adam Langleye9ada862015-05-11 17:20:37 -0700539.globl sha512_block_data_order_neon
David Benjamin4969cc92016-04-22 15:02:23 -0400540.hidden sha512_block_data_order_neon
Adam Langleye9ada862015-05-11 17:20:37 -0700541.type sha512_block_data_order_neon,%function
Adam Langleyd9e397b2015-01-22 14:27:53 -0800542.align 4
Adam Langleye9ada862015-05-11 17:20:37 -0700543sha512_block_data_order_neon:
Adam Langleyd9e397b2015-01-22 14:27:53 -0800544.LNEON:
Adam Langleye9ada862015-05-11 17:20:37 -0700545 dmb @ errata #451034 on early Cortex A8
546 add r2,r1,r2,lsl#7 @ len to point at the end of inp
547 adr r3,K512
548 VFP_ABI_PUSH
549 vldmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ load context
Adam Langleyd9e397b2015-01-22 14:27:53 -0800550.Loop_neon:
551 vshr.u64 d24,d20,#14 @ 0
552#if 0<16
Adam Langleye9ada862015-05-11 17:20:37 -0700553 vld1.64 {d0},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -0800554#endif
555 vshr.u64 d25,d20,#18
556#if 0>0
Adam Langleye9ada862015-05-11 17:20:37 -0700557 vadd.i64 d16,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -0800558#endif
559 vshr.u64 d26,d20,#41
Adam Langleye9ada862015-05-11 17:20:37 -0700560 vld1.64 {d28},[r3,:64]! @ K[i++]
561 vsli.64 d24,d20,#50
562 vsli.64 d25,d20,#46
563 vmov d29,d20
564 vsli.64 d26,d20,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -0800565#if 0<16 && defined(__ARMEL__)
566 vrev64.8 d0,d0
567#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700568 veor d25,d24
569 vbsl d29,d21,d22 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800570 vshr.u64 d24,d16,#28
Adam Langleye9ada862015-05-11 17:20:37 -0700571 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800572 vadd.i64 d27,d29,d23
573 vshr.u64 d25,d16,#34
Adam Langleye9ada862015-05-11 17:20:37 -0700574 vsli.64 d24,d16,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -0800575 vadd.i64 d27,d26
576 vshr.u64 d26,d16,#39
577 vadd.i64 d28,d0
Adam Langleye9ada862015-05-11 17:20:37 -0700578 vsli.64 d25,d16,#30
579 veor d30,d16,d17
580 vsli.64 d26,d16,#25
581 veor d23,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -0800582 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -0700583 vbsl d30,d18,d17 @ Maj(a,b,c)
584 veor d23,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800585 vadd.i64 d19,d27
586 vadd.i64 d30,d27
587 @ vadd.i64 d23,d30
588 vshr.u64 d24,d19,#14 @ 1
589#if 1<16
Adam Langleye9ada862015-05-11 17:20:37 -0700590 vld1.64 {d1},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -0800591#endif
592 vshr.u64 d25,d19,#18
593#if 1>0
Adam Langleye9ada862015-05-11 17:20:37 -0700594 vadd.i64 d23,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -0800595#endif
596 vshr.u64 d26,d19,#41
Adam Langleye9ada862015-05-11 17:20:37 -0700597 vld1.64 {d28},[r3,:64]! @ K[i++]
598 vsli.64 d24,d19,#50
599 vsli.64 d25,d19,#46
600 vmov d29,d19
601 vsli.64 d26,d19,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -0800602#if 1<16 && defined(__ARMEL__)
603 vrev64.8 d1,d1
604#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700605 veor d25,d24
606 vbsl d29,d20,d21 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800607 vshr.u64 d24,d23,#28
Adam Langleye9ada862015-05-11 17:20:37 -0700608 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800609 vadd.i64 d27,d29,d22
610 vshr.u64 d25,d23,#34
Adam Langleye9ada862015-05-11 17:20:37 -0700611 vsli.64 d24,d23,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -0800612 vadd.i64 d27,d26
613 vshr.u64 d26,d23,#39
614 vadd.i64 d28,d1
Adam Langleye9ada862015-05-11 17:20:37 -0700615 vsli.64 d25,d23,#30
616 veor d30,d23,d16
617 vsli.64 d26,d23,#25
618 veor d22,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -0800619 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -0700620 vbsl d30,d17,d16 @ Maj(a,b,c)
621 veor d22,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800622 vadd.i64 d18,d27
623 vadd.i64 d30,d27
624 @ vadd.i64 d22,d30
625 vshr.u64 d24,d18,#14 @ 2
626#if 2<16
Adam Langleye9ada862015-05-11 17:20:37 -0700627 vld1.64 {d2},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -0800628#endif
629 vshr.u64 d25,d18,#18
630#if 2>0
Adam Langleye9ada862015-05-11 17:20:37 -0700631 vadd.i64 d22,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -0800632#endif
633 vshr.u64 d26,d18,#41
Adam Langleye9ada862015-05-11 17:20:37 -0700634 vld1.64 {d28},[r3,:64]! @ K[i++]
635 vsli.64 d24,d18,#50
636 vsli.64 d25,d18,#46
637 vmov d29,d18
638 vsli.64 d26,d18,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -0800639#if 2<16 && defined(__ARMEL__)
640 vrev64.8 d2,d2
641#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700642 veor d25,d24
643 vbsl d29,d19,d20 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800644 vshr.u64 d24,d22,#28
Adam Langleye9ada862015-05-11 17:20:37 -0700645 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800646 vadd.i64 d27,d29,d21
647 vshr.u64 d25,d22,#34
Adam Langleye9ada862015-05-11 17:20:37 -0700648 vsli.64 d24,d22,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -0800649 vadd.i64 d27,d26
650 vshr.u64 d26,d22,#39
651 vadd.i64 d28,d2
Adam Langleye9ada862015-05-11 17:20:37 -0700652 vsli.64 d25,d22,#30
653 veor d30,d22,d23
654 vsli.64 d26,d22,#25
655 veor d21,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -0800656 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -0700657 vbsl d30,d16,d23 @ Maj(a,b,c)
658 veor d21,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800659 vadd.i64 d17,d27
660 vadd.i64 d30,d27
661 @ vadd.i64 d21,d30
662 vshr.u64 d24,d17,#14 @ 3
663#if 3<16
Adam Langleye9ada862015-05-11 17:20:37 -0700664 vld1.64 {d3},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -0800665#endif
666 vshr.u64 d25,d17,#18
667#if 3>0
Adam Langleye9ada862015-05-11 17:20:37 -0700668 vadd.i64 d21,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -0800669#endif
670 vshr.u64 d26,d17,#41
Adam Langleye9ada862015-05-11 17:20:37 -0700671 vld1.64 {d28},[r3,:64]! @ K[i++]
672 vsli.64 d24,d17,#50
673 vsli.64 d25,d17,#46
674 vmov d29,d17
675 vsli.64 d26,d17,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -0800676#if 3<16 && defined(__ARMEL__)
677 vrev64.8 d3,d3
678#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700679 veor d25,d24
680 vbsl d29,d18,d19 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800681 vshr.u64 d24,d21,#28
Adam Langleye9ada862015-05-11 17:20:37 -0700682 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800683 vadd.i64 d27,d29,d20
684 vshr.u64 d25,d21,#34
Adam Langleye9ada862015-05-11 17:20:37 -0700685 vsli.64 d24,d21,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -0800686 vadd.i64 d27,d26
687 vshr.u64 d26,d21,#39
688 vadd.i64 d28,d3
Adam Langleye9ada862015-05-11 17:20:37 -0700689 vsli.64 d25,d21,#30
690 veor d30,d21,d22
691 vsli.64 d26,d21,#25
692 veor d20,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -0800693 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -0700694 vbsl d30,d23,d22 @ Maj(a,b,c)
695 veor d20,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800696 vadd.i64 d16,d27
697 vadd.i64 d30,d27
698 @ vadd.i64 d20,d30
699 vshr.u64 d24,d16,#14 @ 4
700#if 4<16
Adam Langleye9ada862015-05-11 17:20:37 -0700701 vld1.64 {d4},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -0800702#endif
703 vshr.u64 d25,d16,#18
704#if 4>0
Adam Langleye9ada862015-05-11 17:20:37 -0700705 vadd.i64 d20,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -0800706#endif
707 vshr.u64 d26,d16,#41
Adam Langleye9ada862015-05-11 17:20:37 -0700708 vld1.64 {d28},[r3,:64]! @ K[i++]
709 vsli.64 d24,d16,#50
710 vsli.64 d25,d16,#46
711 vmov d29,d16
712 vsli.64 d26,d16,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -0800713#if 4<16 && defined(__ARMEL__)
714 vrev64.8 d4,d4
715#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700716 veor d25,d24
717 vbsl d29,d17,d18 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800718 vshr.u64 d24,d20,#28
Adam Langleye9ada862015-05-11 17:20:37 -0700719 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800720 vadd.i64 d27,d29,d19
721 vshr.u64 d25,d20,#34
Adam Langleye9ada862015-05-11 17:20:37 -0700722 vsli.64 d24,d20,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -0800723 vadd.i64 d27,d26
724 vshr.u64 d26,d20,#39
725 vadd.i64 d28,d4
Adam Langleye9ada862015-05-11 17:20:37 -0700726 vsli.64 d25,d20,#30
727 veor d30,d20,d21
728 vsli.64 d26,d20,#25
729 veor d19,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -0800730 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -0700731 vbsl d30,d22,d21 @ Maj(a,b,c)
732 veor d19,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800733 vadd.i64 d23,d27
734 vadd.i64 d30,d27
735 @ vadd.i64 d19,d30
736 vshr.u64 d24,d23,#14 @ 5
737#if 5<16
Adam Langleye9ada862015-05-11 17:20:37 -0700738 vld1.64 {d5},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -0800739#endif
740 vshr.u64 d25,d23,#18
741#if 5>0
Adam Langleye9ada862015-05-11 17:20:37 -0700742 vadd.i64 d19,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -0800743#endif
744 vshr.u64 d26,d23,#41
Adam Langleye9ada862015-05-11 17:20:37 -0700745 vld1.64 {d28},[r3,:64]! @ K[i++]
746 vsli.64 d24,d23,#50
747 vsli.64 d25,d23,#46
748 vmov d29,d23
749 vsli.64 d26,d23,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -0800750#if 5<16 && defined(__ARMEL__)
751 vrev64.8 d5,d5
752#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700753 veor d25,d24
754 vbsl d29,d16,d17 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800755 vshr.u64 d24,d19,#28
Adam Langleye9ada862015-05-11 17:20:37 -0700756 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800757 vadd.i64 d27,d29,d18
758 vshr.u64 d25,d19,#34
Adam Langleye9ada862015-05-11 17:20:37 -0700759 vsli.64 d24,d19,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -0800760 vadd.i64 d27,d26
761 vshr.u64 d26,d19,#39
762 vadd.i64 d28,d5
Adam Langleye9ada862015-05-11 17:20:37 -0700763 vsli.64 d25,d19,#30
764 veor d30,d19,d20
765 vsli.64 d26,d19,#25
766 veor d18,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -0800767 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -0700768 vbsl d30,d21,d20 @ Maj(a,b,c)
769 veor d18,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800770 vadd.i64 d22,d27
771 vadd.i64 d30,d27
772 @ vadd.i64 d18,d30
773 vshr.u64 d24,d22,#14 @ 6
774#if 6<16
Adam Langleye9ada862015-05-11 17:20:37 -0700775 vld1.64 {d6},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -0800776#endif
777 vshr.u64 d25,d22,#18
778#if 6>0
Adam Langleye9ada862015-05-11 17:20:37 -0700779 vadd.i64 d18,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -0800780#endif
781 vshr.u64 d26,d22,#41
Adam Langleye9ada862015-05-11 17:20:37 -0700782 vld1.64 {d28},[r3,:64]! @ K[i++]
783 vsli.64 d24,d22,#50
784 vsli.64 d25,d22,#46
785 vmov d29,d22
786 vsli.64 d26,d22,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -0800787#if 6<16 && defined(__ARMEL__)
788 vrev64.8 d6,d6
789#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700790 veor d25,d24
791 vbsl d29,d23,d16 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800792 vshr.u64 d24,d18,#28
Adam Langleye9ada862015-05-11 17:20:37 -0700793 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800794 vadd.i64 d27,d29,d17
795 vshr.u64 d25,d18,#34
Adam Langleye9ada862015-05-11 17:20:37 -0700796 vsli.64 d24,d18,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -0800797 vadd.i64 d27,d26
798 vshr.u64 d26,d18,#39
799 vadd.i64 d28,d6
Adam Langleye9ada862015-05-11 17:20:37 -0700800 vsli.64 d25,d18,#30
801 veor d30,d18,d19
802 vsli.64 d26,d18,#25
803 veor d17,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -0800804 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -0700805 vbsl d30,d20,d19 @ Maj(a,b,c)
806 veor d17,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800807 vadd.i64 d21,d27
808 vadd.i64 d30,d27
809 @ vadd.i64 d17,d30
810 vshr.u64 d24,d21,#14 @ 7
811#if 7<16
Adam Langleye9ada862015-05-11 17:20:37 -0700812 vld1.64 {d7},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -0800813#endif
814 vshr.u64 d25,d21,#18
815#if 7>0
Adam Langleye9ada862015-05-11 17:20:37 -0700816 vadd.i64 d17,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -0800817#endif
818 vshr.u64 d26,d21,#41
Adam Langleye9ada862015-05-11 17:20:37 -0700819 vld1.64 {d28},[r3,:64]! @ K[i++]
820 vsli.64 d24,d21,#50
821 vsli.64 d25,d21,#46
822 vmov d29,d21
823 vsli.64 d26,d21,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -0800824#if 7<16 && defined(__ARMEL__)
825 vrev64.8 d7,d7
826#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700827 veor d25,d24
828 vbsl d29,d22,d23 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800829 vshr.u64 d24,d17,#28
Adam Langleye9ada862015-05-11 17:20:37 -0700830 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800831 vadd.i64 d27,d29,d16
832 vshr.u64 d25,d17,#34
Adam Langleye9ada862015-05-11 17:20:37 -0700833 vsli.64 d24,d17,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -0800834 vadd.i64 d27,d26
835 vshr.u64 d26,d17,#39
836 vadd.i64 d28,d7
Adam Langleye9ada862015-05-11 17:20:37 -0700837 vsli.64 d25,d17,#30
838 veor d30,d17,d18
839 vsli.64 d26,d17,#25
840 veor d16,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -0800841 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -0700842 vbsl d30,d19,d18 @ Maj(a,b,c)
843 veor d16,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800844 vadd.i64 d20,d27
845 vadd.i64 d30,d27
846 @ vadd.i64 d16,d30
847 vshr.u64 d24,d20,#14 @ 8
848#if 8<16
Adam Langleye9ada862015-05-11 17:20:37 -0700849 vld1.64 {d8},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -0800850#endif
851 vshr.u64 d25,d20,#18
852#if 8>0
Adam Langleye9ada862015-05-11 17:20:37 -0700853 vadd.i64 d16,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -0800854#endif
855 vshr.u64 d26,d20,#41
Adam Langleye9ada862015-05-11 17:20:37 -0700856 vld1.64 {d28},[r3,:64]! @ K[i++]
857 vsli.64 d24,d20,#50
858 vsli.64 d25,d20,#46
859 vmov d29,d20
860 vsli.64 d26,d20,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -0800861#if 8<16 && defined(__ARMEL__)
862 vrev64.8 d8,d8
863#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700864 veor d25,d24
865 vbsl d29,d21,d22 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800866 vshr.u64 d24,d16,#28
Adam Langleye9ada862015-05-11 17:20:37 -0700867 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800868 vadd.i64 d27,d29,d23
869 vshr.u64 d25,d16,#34
Adam Langleye9ada862015-05-11 17:20:37 -0700870 vsli.64 d24,d16,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -0800871 vadd.i64 d27,d26
872 vshr.u64 d26,d16,#39
873 vadd.i64 d28,d8
Adam Langleye9ada862015-05-11 17:20:37 -0700874 vsli.64 d25,d16,#30
875 veor d30,d16,d17
876 vsli.64 d26,d16,#25
877 veor d23,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -0800878 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -0700879 vbsl d30,d18,d17 @ Maj(a,b,c)
880 veor d23,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800881 vadd.i64 d19,d27
882 vadd.i64 d30,d27
883 @ vadd.i64 d23,d30
884 vshr.u64 d24,d19,#14 @ 9
885#if 9<16
Adam Langleye9ada862015-05-11 17:20:37 -0700886 vld1.64 {d9},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -0800887#endif
888 vshr.u64 d25,d19,#18
889#if 9>0
Adam Langleye9ada862015-05-11 17:20:37 -0700890 vadd.i64 d23,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -0800891#endif
892 vshr.u64 d26,d19,#41
Adam Langleye9ada862015-05-11 17:20:37 -0700893 vld1.64 {d28},[r3,:64]! @ K[i++]
894 vsli.64 d24,d19,#50
895 vsli.64 d25,d19,#46
896 vmov d29,d19
897 vsli.64 d26,d19,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -0800898#if 9<16 && defined(__ARMEL__)
899 vrev64.8 d9,d9
900#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700901 veor d25,d24
902 vbsl d29,d20,d21 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800903 vshr.u64 d24,d23,#28
Adam Langleye9ada862015-05-11 17:20:37 -0700904 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800905 vadd.i64 d27,d29,d22
906 vshr.u64 d25,d23,#34
Adam Langleye9ada862015-05-11 17:20:37 -0700907 vsli.64 d24,d23,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -0800908 vadd.i64 d27,d26
909 vshr.u64 d26,d23,#39
910 vadd.i64 d28,d9
Adam Langleye9ada862015-05-11 17:20:37 -0700911 vsli.64 d25,d23,#30
912 veor d30,d23,d16
913 vsli.64 d26,d23,#25
914 veor d22,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -0800915 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -0700916 vbsl d30,d17,d16 @ Maj(a,b,c)
917 veor d22,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800918 vadd.i64 d18,d27
919 vadd.i64 d30,d27
920 @ vadd.i64 d22,d30
921 vshr.u64 d24,d18,#14 @ 10
922#if 10<16
Adam Langleye9ada862015-05-11 17:20:37 -0700923 vld1.64 {d10},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -0800924#endif
925 vshr.u64 d25,d18,#18
926#if 10>0
Adam Langleye9ada862015-05-11 17:20:37 -0700927 vadd.i64 d22,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -0800928#endif
929 vshr.u64 d26,d18,#41
Adam Langleye9ada862015-05-11 17:20:37 -0700930 vld1.64 {d28},[r3,:64]! @ K[i++]
931 vsli.64 d24,d18,#50
932 vsli.64 d25,d18,#46
933 vmov d29,d18
934 vsli.64 d26,d18,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -0800935#if 10<16 && defined(__ARMEL__)
936 vrev64.8 d10,d10
937#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700938 veor d25,d24
939 vbsl d29,d19,d20 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800940 vshr.u64 d24,d22,#28
Adam Langleye9ada862015-05-11 17:20:37 -0700941 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800942 vadd.i64 d27,d29,d21
943 vshr.u64 d25,d22,#34
Adam Langleye9ada862015-05-11 17:20:37 -0700944 vsli.64 d24,d22,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -0800945 vadd.i64 d27,d26
946 vshr.u64 d26,d22,#39
947 vadd.i64 d28,d10
Adam Langleye9ada862015-05-11 17:20:37 -0700948 vsli.64 d25,d22,#30
949 veor d30,d22,d23
950 vsli.64 d26,d22,#25
951 veor d21,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -0800952 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -0700953 vbsl d30,d16,d23 @ Maj(a,b,c)
954 veor d21,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800955 vadd.i64 d17,d27
956 vadd.i64 d30,d27
957 @ vadd.i64 d21,d30
958 vshr.u64 d24,d17,#14 @ 11
959#if 11<16
Adam Langleye9ada862015-05-11 17:20:37 -0700960 vld1.64 {d11},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -0800961#endif
962 vshr.u64 d25,d17,#18
963#if 11>0
Adam Langleye9ada862015-05-11 17:20:37 -0700964 vadd.i64 d21,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -0800965#endif
966 vshr.u64 d26,d17,#41
Adam Langleye9ada862015-05-11 17:20:37 -0700967 vld1.64 {d28},[r3,:64]! @ K[i++]
968 vsli.64 d24,d17,#50
969 vsli.64 d25,d17,#46
970 vmov d29,d17
971 vsli.64 d26,d17,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -0800972#if 11<16 && defined(__ARMEL__)
973 vrev64.8 d11,d11
974#endif
Adam Langleye9ada862015-05-11 17:20:37 -0700975 veor d25,d24
976 vbsl d29,d18,d19 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800977 vshr.u64 d24,d21,#28
Adam Langleye9ada862015-05-11 17:20:37 -0700978 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800979 vadd.i64 d27,d29,d20
980 vshr.u64 d25,d21,#34
Adam Langleye9ada862015-05-11 17:20:37 -0700981 vsli.64 d24,d21,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -0800982 vadd.i64 d27,d26
983 vshr.u64 d26,d21,#39
984 vadd.i64 d28,d11
Adam Langleye9ada862015-05-11 17:20:37 -0700985 vsli.64 d25,d21,#30
986 veor d30,d21,d22
987 vsli.64 d26,d21,#25
988 veor d20,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -0800989 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -0700990 vbsl d30,d23,d22 @ Maj(a,b,c)
991 veor d20,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -0800992 vadd.i64 d16,d27
993 vadd.i64 d30,d27
994 @ vadd.i64 d20,d30
995 vshr.u64 d24,d16,#14 @ 12
996#if 12<16
Adam Langleye9ada862015-05-11 17:20:37 -0700997 vld1.64 {d12},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -0800998#endif
999 vshr.u64 d25,d16,#18
1000#if 12>0
Adam Langleye9ada862015-05-11 17:20:37 -07001001 vadd.i64 d20,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001002#endif
1003 vshr.u64 d26,d16,#41
Adam Langleye9ada862015-05-11 17:20:37 -07001004 vld1.64 {d28},[r3,:64]! @ K[i++]
1005 vsli.64 d24,d16,#50
1006 vsli.64 d25,d16,#46
1007 vmov d29,d16
1008 vsli.64 d26,d16,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001009#if 12<16 && defined(__ARMEL__)
1010 vrev64.8 d12,d12
1011#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001012 veor d25,d24
1013 vbsl d29,d17,d18 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001014 vshr.u64 d24,d20,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001015 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001016 vadd.i64 d27,d29,d19
1017 vshr.u64 d25,d20,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001018 vsli.64 d24,d20,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001019 vadd.i64 d27,d26
1020 vshr.u64 d26,d20,#39
1021 vadd.i64 d28,d12
Adam Langleye9ada862015-05-11 17:20:37 -07001022 vsli.64 d25,d20,#30
1023 veor d30,d20,d21
1024 vsli.64 d26,d20,#25
1025 veor d19,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001026 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001027 vbsl d30,d22,d21 @ Maj(a,b,c)
1028 veor d19,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001029 vadd.i64 d23,d27
1030 vadd.i64 d30,d27
1031 @ vadd.i64 d19,d30
1032 vshr.u64 d24,d23,#14 @ 13
1033#if 13<16
Adam Langleye9ada862015-05-11 17:20:37 -07001034 vld1.64 {d13},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -08001035#endif
1036 vshr.u64 d25,d23,#18
1037#if 13>0
Adam Langleye9ada862015-05-11 17:20:37 -07001038 vadd.i64 d19,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001039#endif
1040 vshr.u64 d26,d23,#41
Adam Langleye9ada862015-05-11 17:20:37 -07001041 vld1.64 {d28},[r3,:64]! @ K[i++]
1042 vsli.64 d24,d23,#50
1043 vsli.64 d25,d23,#46
1044 vmov d29,d23
1045 vsli.64 d26,d23,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001046#if 13<16 && defined(__ARMEL__)
1047 vrev64.8 d13,d13
1048#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001049 veor d25,d24
1050 vbsl d29,d16,d17 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001051 vshr.u64 d24,d19,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001052 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001053 vadd.i64 d27,d29,d18
1054 vshr.u64 d25,d19,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001055 vsli.64 d24,d19,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001056 vadd.i64 d27,d26
1057 vshr.u64 d26,d19,#39
1058 vadd.i64 d28,d13
Adam Langleye9ada862015-05-11 17:20:37 -07001059 vsli.64 d25,d19,#30
1060 veor d30,d19,d20
1061 vsli.64 d26,d19,#25
1062 veor d18,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001063 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001064 vbsl d30,d21,d20 @ Maj(a,b,c)
1065 veor d18,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001066 vadd.i64 d22,d27
1067 vadd.i64 d30,d27
1068 @ vadd.i64 d18,d30
1069 vshr.u64 d24,d22,#14 @ 14
1070#if 14<16
Adam Langleye9ada862015-05-11 17:20:37 -07001071 vld1.64 {d14},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -08001072#endif
1073 vshr.u64 d25,d22,#18
1074#if 14>0
Adam Langleye9ada862015-05-11 17:20:37 -07001075 vadd.i64 d18,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001076#endif
1077 vshr.u64 d26,d22,#41
Adam Langleye9ada862015-05-11 17:20:37 -07001078 vld1.64 {d28},[r3,:64]! @ K[i++]
1079 vsli.64 d24,d22,#50
1080 vsli.64 d25,d22,#46
1081 vmov d29,d22
1082 vsli.64 d26,d22,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001083#if 14<16 && defined(__ARMEL__)
1084 vrev64.8 d14,d14
1085#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001086 veor d25,d24
1087 vbsl d29,d23,d16 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001088 vshr.u64 d24,d18,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001089 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001090 vadd.i64 d27,d29,d17
1091 vshr.u64 d25,d18,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001092 vsli.64 d24,d18,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001093 vadd.i64 d27,d26
1094 vshr.u64 d26,d18,#39
1095 vadd.i64 d28,d14
Adam Langleye9ada862015-05-11 17:20:37 -07001096 vsli.64 d25,d18,#30
1097 veor d30,d18,d19
1098 vsli.64 d26,d18,#25
1099 veor d17,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001100 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001101 vbsl d30,d20,d19 @ Maj(a,b,c)
1102 veor d17,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001103 vadd.i64 d21,d27
1104 vadd.i64 d30,d27
1105 @ vadd.i64 d17,d30
1106 vshr.u64 d24,d21,#14 @ 15
1107#if 15<16
Adam Langleye9ada862015-05-11 17:20:37 -07001108 vld1.64 {d15},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -08001109#endif
1110 vshr.u64 d25,d21,#18
1111#if 15>0
Adam Langleye9ada862015-05-11 17:20:37 -07001112 vadd.i64 d17,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001113#endif
1114 vshr.u64 d26,d21,#41
Adam Langleye9ada862015-05-11 17:20:37 -07001115 vld1.64 {d28},[r3,:64]! @ K[i++]
1116 vsli.64 d24,d21,#50
1117 vsli.64 d25,d21,#46
1118 vmov d29,d21
1119 vsli.64 d26,d21,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001120#if 15<16 && defined(__ARMEL__)
1121 vrev64.8 d15,d15
1122#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001123 veor d25,d24
1124 vbsl d29,d22,d23 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001125 vshr.u64 d24,d17,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001126 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001127 vadd.i64 d27,d29,d16
1128 vshr.u64 d25,d17,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001129 vsli.64 d24,d17,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001130 vadd.i64 d27,d26
1131 vshr.u64 d26,d17,#39
1132 vadd.i64 d28,d15
Adam Langleye9ada862015-05-11 17:20:37 -07001133 vsli.64 d25,d17,#30
1134 veor d30,d17,d18
1135 vsli.64 d26,d17,#25
1136 veor d16,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001137 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001138 vbsl d30,d19,d18 @ Maj(a,b,c)
1139 veor d16,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001140 vadd.i64 d20,d27
1141 vadd.i64 d30,d27
1142 @ vadd.i64 d16,d30
Adam Langleye9ada862015-05-11 17:20:37 -07001143 mov r12,#4
Adam Langleyd9e397b2015-01-22 14:27:53 -08001144.L16_79_neon:
Adam Langleye9ada862015-05-11 17:20:37 -07001145 subs r12,#1
Adam Langleyd9e397b2015-01-22 14:27:53 -08001146 vshr.u64 q12,q7,#19
1147 vshr.u64 q13,q7,#61
Adam Langleye9ada862015-05-11 17:20:37 -07001148 vadd.i64 d16,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001149 vshr.u64 q15,q7,#6
Adam Langleye9ada862015-05-11 17:20:37 -07001150 vsli.64 q12,q7,#45
1151 vext.8 q14,q0,q1,#8 @ X[i+1]
1152 vsli.64 q13,q7,#3
1153 veor q15,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001154 vshr.u64 q12,q14,#1
Adam Langleye9ada862015-05-11 17:20:37 -07001155 veor q15,q13 @ sigma1(X[i+14])
Adam Langleyd9e397b2015-01-22 14:27:53 -08001156 vshr.u64 q13,q14,#8
1157 vadd.i64 q0,q15
1158 vshr.u64 q15,q14,#7
Adam Langleye9ada862015-05-11 17:20:37 -07001159 vsli.64 q12,q14,#63
1160 vsli.64 q13,q14,#56
1161 vext.8 q14,q4,q5,#8 @ X[i+9]
1162 veor q15,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001163 vshr.u64 d24,d20,#14 @ from NEON_00_15
1164 vadd.i64 q0,q14
1165 vshr.u64 d25,d20,#18 @ from NEON_00_15
Adam Langleye9ada862015-05-11 17:20:37 -07001166 veor q15,q13 @ sigma0(X[i+1])
Adam Langleyd9e397b2015-01-22 14:27:53 -08001167 vshr.u64 d26,d20,#41 @ from NEON_00_15
1168 vadd.i64 q0,q15
Adam Langleye9ada862015-05-11 17:20:37 -07001169 vld1.64 {d28},[r3,:64]! @ K[i++]
1170 vsli.64 d24,d20,#50
1171 vsli.64 d25,d20,#46
1172 vmov d29,d20
1173 vsli.64 d26,d20,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001174#if 16<16 && defined(__ARMEL__)
1175 vrev64.8 ,
1176#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001177 veor d25,d24
1178 vbsl d29,d21,d22 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001179 vshr.u64 d24,d16,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001180 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001181 vadd.i64 d27,d29,d23
1182 vshr.u64 d25,d16,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001183 vsli.64 d24,d16,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001184 vadd.i64 d27,d26
1185 vshr.u64 d26,d16,#39
1186 vadd.i64 d28,d0
Adam Langleye9ada862015-05-11 17:20:37 -07001187 vsli.64 d25,d16,#30
1188 veor d30,d16,d17
1189 vsli.64 d26,d16,#25
1190 veor d23,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001191 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001192 vbsl d30,d18,d17 @ Maj(a,b,c)
1193 veor d23,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001194 vadd.i64 d19,d27
1195 vadd.i64 d30,d27
1196 @ vadd.i64 d23,d30
1197 vshr.u64 d24,d19,#14 @ 17
1198#if 17<16
Adam Langleye9ada862015-05-11 17:20:37 -07001199 vld1.64 {d1},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -08001200#endif
1201 vshr.u64 d25,d19,#18
1202#if 17>0
Adam Langleye9ada862015-05-11 17:20:37 -07001203 vadd.i64 d23,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001204#endif
1205 vshr.u64 d26,d19,#41
Adam Langleye9ada862015-05-11 17:20:37 -07001206 vld1.64 {d28},[r3,:64]! @ K[i++]
1207 vsli.64 d24,d19,#50
1208 vsli.64 d25,d19,#46
1209 vmov d29,d19
1210 vsli.64 d26,d19,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001211#if 17<16 && defined(__ARMEL__)
1212 vrev64.8 ,
1213#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001214 veor d25,d24
1215 vbsl d29,d20,d21 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001216 vshr.u64 d24,d23,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001217 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001218 vadd.i64 d27,d29,d22
1219 vshr.u64 d25,d23,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001220 vsli.64 d24,d23,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001221 vadd.i64 d27,d26
1222 vshr.u64 d26,d23,#39
1223 vadd.i64 d28,d1
Adam Langleye9ada862015-05-11 17:20:37 -07001224 vsli.64 d25,d23,#30
1225 veor d30,d23,d16
1226 vsli.64 d26,d23,#25
1227 veor d22,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001228 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001229 vbsl d30,d17,d16 @ Maj(a,b,c)
1230 veor d22,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001231 vadd.i64 d18,d27
1232 vadd.i64 d30,d27
1233 @ vadd.i64 d22,d30
1234 vshr.u64 q12,q0,#19
1235 vshr.u64 q13,q0,#61
Adam Langleye9ada862015-05-11 17:20:37 -07001236 vadd.i64 d22,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001237 vshr.u64 q15,q0,#6
Adam Langleye9ada862015-05-11 17:20:37 -07001238 vsli.64 q12,q0,#45
1239 vext.8 q14,q1,q2,#8 @ X[i+1]
1240 vsli.64 q13,q0,#3
1241 veor q15,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001242 vshr.u64 q12,q14,#1
Adam Langleye9ada862015-05-11 17:20:37 -07001243 veor q15,q13 @ sigma1(X[i+14])
Adam Langleyd9e397b2015-01-22 14:27:53 -08001244 vshr.u64 q13,q14,#8
1245 vadd.i64 q1,q15
1246 vshr.u64 q15,q14,#7
Adam Langleye9ada862015-05-11 17:20:37 -07001247 vsli.64 q12,q14,#63
1248 vsli.64 q13,q14,#56
1249 vext.8 q14,q5,q6,#8 @ X[i+9]
1250 veor q15,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001251 vshr.u64 d24,d18,#14 @ from NEON_00_15
1252 vadd.i64 q1,q14
1253 vshr.u64 d25,d18,#18 @ from NEON_00_15
Adam Langleye9ada862015-05-11 17:20:37 -07001254 veor q15,q13 @ sigma0(X[i+1])
Adam Langleyd9e397b2015-01-22 14:27:53 -08001255 vshr.u64 d26,d18,#41 @ from NEON_00_15
1256 vadd.i64 q1,q15
Adam Langleye9ada862015-05-11 17:20:37 -07001257 vld1.64 {d28},[r3,:64]! @ K[i++]
1258 vsli.64 d24,d18,#50
1259 vsli.64 d25,d18,#46
1260 vmov d29,d18
1261 vsli.64 d26,d18,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001262#if 18<16 && defined(__ARMEL__)
1263 vrev64.8 ,
1264#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001265 veor d25,d24
1266 vbsl d29,d19,d20 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001267 vshr.u64 d24,d22,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001268 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001269 vadd.i64 d27,d29,d21
1270 vshr.u64 d25,d22,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001271 vsli.64 d24,d22,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001272 vadd.i64 d27,d26
1273 vshr.u64 d26,d22,#39
1274 vadd.i64 d28,d2
Adam Langleye9ada862015-05-11 17:20:37 -07001275 vsli.64 d25,d22,#30
1276 veor d30,d22,d23
1277 vsli.64 d26,d22,#25
1278 veor d21,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001279 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001280 vbsl d30,d16,d23 @ Maj(a,b,c)
1281 veor d21,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001282 vadd.i64 d17,d27
1283 vadd.i64 d30,d27
1284 @ vadd.i64 d21,d30
1285 vshr.u64 d24,d17,#14 @ 19
1286#if 19<16
Adam Langleye9ada862015-05-11 17:20:37 -07001287 vld1.64 {d3},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -08001288#endif
1289 vshr.u64 d25,d17,#18
1290#if 19>0
Adam Langleye9ada862015-05-11 17:20:37 -07001291 vadd.i64 d21,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001292#endif
1293 vshr.u64 d26,d17,#41
Adam Langleye9ada862015-05-11 17:20:37 -07001294 vld1.64 {d28},[r3,:64]! @ K[i++]
1295 vsli.64 d24,d17,#50
1296 vsli.64 d25,d17,#46
1297 vmov d29,d17
1298 vsli.64 d26,d17,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001299#if 19<16 && defined(__ARMEL__)
1300 vrev64.8 ,
1301#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001302 veor d25,d24
1303 vbsl d29,d18,d19 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001304 vshr.u64 d24,d21,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001305 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001306 vadd.i64 d27,d29,d20
1307 vshr.u64 d25,d21,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001308 vsli.64 d24,d21,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001309 vadd.i64 d27,d26
1310 vshr.u64 d26,d21,#39
1311 vadd.i64 d28,d3
Adam Langleye9ada862015-05-11 17:20:37 -07001312 vsli.64 d25,d21,#30
1313 veor d30,d21,d22
1314 vsli.64 d26,d21,#25
1315 veor d20,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001316 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001317 vbsl d30,d23,d22 @ Maj(a,b,c)
1318 veor d20,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001319 vadd.i64 d16,d27
1320 vadd.i64 d30,d27
1321 @ vadd.i64 d20,d30
1322 vshr.u64 q12,q1,#19
1323 vshr.u64 q13,q1,#61
Adam Langleye9ada862015-05-11 17:20:37 -07001324 vadd.i64 d20,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001325 vshr.u64 q15,q1,#6
Adam Langleye9ada862015-05-11 17:20:37 -07001326 vsli.64 q12,q1,#45
1327 vext.8 q14,q2,q3,#8 @ X[i+1]
1328 vsli.64 q13,q1,#3
1329 veor q15,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001330 vshr.u64 q12,q14,#1
Adam Langleye9ada862015-05-11 17:20:37 -07001331 veor q15,q13 @ sigma1(X[i+14])
Adam Langleyd9e397b2015-01-22 14:27:53 -08001332 vshr.u64 q13,q14,#8
1333 vadd.i64 q2,q15
1334 vshr.u64 q15,q14,#7
Adam Langleye9ada862015-05-11 17:20:37 -07001335 vsli.64 q12,q14,#63
1336 vsli.64 q13,q14,#56
1337 vext.8 q14,q6,q7,#8 @ X[i+9]
1338 veor q15,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001339 vshr.u64 d24,d16,#14 @ from NEON_00_15
1340 vadd.i64 q2,q14
1341 vshr.u64 d25,d16,#18 @ from NEON_00_15
Adam Langleye9ada862015-05-11 17:20:37 -07001342 veor q15,q13 @ sigma0(X[i+1])
Adam Langleyd9e397b2015-01-22 14:27:53 -08001343 vshr.u64 d26,d16,#41 @ from NEON_00_15
1344 vadd.i64 q2,q15
Adam Langleye9ada862015-05-11 17:20:37 -07001345 vld1.64 {d28},[r3,:64]! @ K[i++]
1346 vsli.64 d24,d16,#50
1347 vsli.64 d25,d16,#46
1348 vmov d29,d16
1349 vsli.64 d26,d16,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001350#if 20<16 && defined(__ARMEL__)
1351 vrev64.8 ,
1352#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001353 veor d25,d24
1354 vbsl d29,d17,d18 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001355 vshr.u64 d24,d20,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001356 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001357 vadd.i64 d27,d29,d19
1358 vshr.u64 d25,d20,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001359 vsli.64 d24,d20,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001360 vadd.i64 d27,d26
1361 vshr.u64 d26,d20,#39
1362 vadd.i64 d28,d4
Adam Langleye9ada862015-05-11 17:20:37 -07001363 vsli.64 d25,d20,#30
1364 veor d30,d20,d21
1365 vsli.64 d26,d20,#25
1366 veor d19,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001367 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001368 vbsl d30,d22,d21 @ Maj(a,b,c)
1369 veor d19,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001370 vadd.i64 d23,d27
1371 vadd.i64 d30,d27
1372 @ vadd.i64 d19,d30
1373 vshr.u64 d24,d23,#14 @ 21
1374#if 21<16
Adam Langleye9ada862015-05-11 17:20:37 -07001375 vld1.64 {d5},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -08001376#endif
1377 vshr.u64 d25,d23,#18
1378#if 21>0
Adam Langleye9ada862015-05-11 17:20:37 -07001379 vadd.i64 d19,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001380#endif
1381 vshr.u64 d26,d23,#41
Adam Langleye9ada862015-05-11 17:20:37 -07001382 vld1.64 {d28},[r3,:64]! @ K[i++]
1383 vsli.64 d24,d23,#50
1384 vsli.64 d25,d23,#46
1385 vmov d29,d23
1386 vsli.64 d26,d23,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001387#if 21<16 && defined(__ARMEL__)
1388 vrev64.8 ,
1389#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001390 veor d25,d24
1391 vbsl d29,d16,d17 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001392 vshr.u64 d24,d19,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001393 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001394 vadd.i64 d27,d29,d18
1395 vshr.u64 d25,d19,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001396 vsli.64 d24,d19,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001397 vadd.i64 d27,d26
1398 vshr.u64 d26,d19,#39
1399 vadd.i64 d28,d5
Adam Langleye9ada862015-05-11 17:20:37 -07001400 vsli.64 d25,d19,#30
1401 veor d30,d19,d20
1402 vsli.64 d26,d19,#25
1403 veor d18,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001404 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001405 vbsl d30,d21,d20 @ Maj(a,b,c)
1406 veor d18,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001407 vadd.i64 d22,d27
1408 vadd.i64 d30,d27
1409 @ vadd.i64 d18,d30
1410 vshr.u64 q12,q2,#19
1411 vshr.u64 q13,q2,#61
Adam Langleye9ada862015-05-11 17:20:37 -07001412 vadd.i64 d18,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001413 vshr.u64 q15,q2,#6
Adam Langleye9ada862015-05-11 17:20:37 -07001414 vsli.64 q12,q2,#45
1415 vext.8 q14,q3,q4,#8 @ X[i+1]
1416 vsli.64 q13,q2,#3
1417 veor q15,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001418 vshr.u64 q12,q14,#1
Adam Langleye9ada862015-05-11 17:20:37 -07001419 veor q15,q13 @ sigma1(X[i+14])
Adam Langleyd9e397b2015-01-22 14:27:53 -08001420 vshr.u64 q13,q14,#8
1421 vadd.i64 q3,q15
1422 vshr.u64 q15,q14,#7
Adam Langleye9ada862015-05-11 17:20:37 -07001423 vsli.64 q12,q14,#63
1424 vsli.64 q13,q14,#56
1425 vext.8 q14,q7,q0,#8 @ X[i+9]
1426 veor q15,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001427 vshr.u64 d24,d22,#14 @ from NEON_00_15
1428 vadd.i64 q3,q14
1429 vshr.u64 d25,d22,#18 @ from NEON_00_15
Adam Langleye9ada862015-05-11 17:20:37 -07001430 veor q15,q13 @ sigma0(X[i+1])
Adam Langleyd9e397b2015-01-22 14:27:53 -08001431 vshr.u64 d26,d22,#41 @ from NEON_00_15
1432 vadd.i64 q3,q15
Adam Langleye9ada862015-05-11 17:20:37 -07001433 vld1.64 {d28},[r3,:64]! @ K[i++]
1434 vsli.64 d24,d22,#50
1435 vsli.64 d25,d22,#46
1436 vmov d29,d22
1437 vsli.64 d26,d22,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001438#if 22<16 && defined(__ARMEL__)
1439 vrev64.8 ,
1440#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001441 veor d25,d24
1442 vbsl d29,d23,d16 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001443 vshr.u64 d24,d18,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001444 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001445 vadd.i64 d27,d29,d17
1446 vshr.u64 d25,d18,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001447 vsli.64 d24,d18,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001448 vadd.i64 d27,d26
1449 vshr.u64 d26,d18,#39
1450 vadd.i64 d28,d6
Adam Langleye9ada862015-05-11 17:20:37 -07001451 vsli.64 d25,d18,#30
1452 veor d30,d18,d19
1453 vsli.64 d26,d18,#25
1454 veor d17,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001455 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001456 vbsl d30,d20,d19 @ Maj(a,b,c)
1457 veor d17,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001458 vadd.i64 d21,d27
1459 vadd.i64 d30,d27
1460 @ vadd.i64 d17,d30
1461 vshr.u64 d24,d21,#14 @ 23
1462#if 23<16
Adam Langleye9ada862015-05-11 17:20:37 -07001463 vld1.64 {d7},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -08001464#endif
1465 vshr.u64 d25,d21,#18
1466#if 23>0
Adam Langleye9ada862015-05-11 17:20:37 -07001467 vadd.i64 d17,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001468#endif
1469 vshr.u64 d26,d21,#41
Adam Langleye9ada862015-05-11 17:20:37 -07001470 vld1.64 {d28},[r3,:64]! @ K[i++]
1471 vsli.64 d24,d21,#50
1472 vsli.64 d25,d21,#46
1473 vmov d29,d21
1474 vsli.64 d26,d21,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001475#if 23<16 && defined(__ARMEL__)
1476 vrev64.8 ,
1477#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001478 veor d25,d24
1479 vbsl d29,d22,d23 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001480 vshr.u64 d24,d17,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001481 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001482 vadd.i64 d27,d29,d16
1483 vshr.u64 d25,d17,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001484 vsli.64 d24,d17,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001485 vadd.i64 d27,d26
1486 vshr.u64 d26,d17,#39
1487 vadd.i64 d28,d7
Adam Langleye9ada862015-05-11 17:20:37 -07001488 vsli.64 d25,d17,#30
1489 veor d30,d17,d18
1490 vsli.64 d26,d17,#25
1491 veor d16,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001492 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001493 vbsl d30,d19,d18 @ Maj(a,b,c)
1494 veor d16,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001495 vadd.i64 d20,d27
1496 vadd.i64 d30,d27
1497 @ vadd.i64 d16,d30
1498 vshr.u64 q12,q3,#19
1499 vshr.u64 q13,q3,#61
Adam Langleye9ada862015-05-11 17:20:37 -07001500 vadd.i64 d16,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001501 vshr.u64 q15,q3,#6
Adam Langleye9ada862015-05-11 17:20:37 -07001502 vsli.64 q12,q3,#45
1503 vext.8 q14,q4,q5,#8 @ X[i+1]
1504 vsli.64 q13,q3,#3
1505 veor q15,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001506 vshr.u64 q12,q14,#1
Adam Langleye9ada862015-05-11 17:20:37 -07001507 veor q15,q13 @ sigma1(X[i+14])
Adam Langleyd9e397b2015-01-22 14:27:53 -08001508 vshr.u64 q13,q14,#8
1509 vadd.i64 q4,q15
1510 vshr.u64 q15,q14,#7
Adam Langleye9ada862015-05-11 17:20:37 -07001511 vsli.64 q12,q14,#63
1512 vsli.64 q13,q14,#56
1513 vext.8 q14,q0,q1,#8 @ X[i+9]
1514 veor q15,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001515 vshr.u64 d24,d20,#14 @ from NEON_00_15
1516 vadd.i64 q4,q14
1517 vshr.u64 d25,d20,#18 @ from NEON_00_15
Adam Langleye9ada862015-05-11 17:20:37 -07001518 veor q15,q13 @ sigma0(X[i+1])
Adam Langleyd9e397b2015-01-22 14:27:53 -08001519 vshr.u64 d26,d20,#41 @ from NEON_00_15
1520 vadd.i64 q4,q15
Adam Langleye9ada862015-05-11 17:20:37 -07001521 vld1.64 {d28},[r3,:64]! @ K[i++]
1522 vsli.64 d24,d20,#50
1523 vsli.64 d25,d20,#46
1524 vmov d29,d20
1525 vsli.64 d26,d20,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001526#if 24<16 && defined(__ARMEL__)
1527 vrev64.8 ,
1528#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001529 veor d25,d24
1530 vbsl d29,d21,d22 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001531 vshr.u64 d24,d16,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001532 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001533 vadd.i64 d27,d29,d23
1534 vshr.u64 d25,d16,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001535 vsli.64 d24,d16,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001536 vadd.i64 d27,d26
1537 vshr.u64 d26,d16,#39
1538 vadd.i64 d28,d8
Adam Langleye9ada862015-05-11 17:20:37 -07001539 vsli.64 d25,d16,#30
1540 veor d30,d16,d17
1541 vsli.64 d26,d16,#25
1542 veor d23,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001543 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001544 vbsl d30,d18,d17 @ Maj(a,b,c)
1545 veor d23,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001546 vadd.i64 d19,d27
1547 vadd.i64 d30,d27
1548 @ vadd.i64 d23,d30
1549 vshr.u64 d24,d19,#14 @ 25
1550#if 25<16
Adam Langleye9ada862015-05-11 17:20:37 -07001551 vld1.64 {d9},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -08001552#endif
1553 vshr.u64 d25,d19,#18
1554#if 25>0
Adam Langleye9ada862015-05-11 17:20:37 -07001555 vadd.i64 d23,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001556#endif
1557 vshr.u64 d26,d19,#41
Adam Langleye9ada862015-05-11 17:20:37 -07001558 vld1.64 {d28},[r3,:64]! @ K[i++]
1559 vsli.64 d24,d19,#50
1560 vsli.64 d25,d19,#46
1561 vmov d29,d19
1562 vsli.64 d26,d19,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001563#if 25<16 && defined(__ARMEL__)
1564 vrev64.8 ,
1565#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001566 veor d25,d24
1567 vbsl d29,d20,d21 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001568 vshr.u64 d24,d23,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001569 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001570 vadd.i64 d27,d29,d22
1571 vshr.u64 d25,d23,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001572 vsli.64 d24,d23,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001573 vadd.i64 d27,d26
1574 vshr.u64 d26,d23,#39
1575 vadd.i64 d28,d9
Adam Langleye9ada862015-05-11 17:20:37 -07001576 vsli.64 d25,d23,#30
1577 veor d30,d23,d16
1578 vsli.64 d26,d23,#25
1579 veor d22,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001580 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001581 vbsl d30,d17,d16 @ Maj(a,b,c)
1582 veor d22,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001583 vadd.i64 d18,d27
1584 vadd.i64 d30,d27
1585 @ vadd.i64 d22,d30
1586 vshr.u64 q12,q4,#19
1587 vshr.u64 q13,q4,#61
Adam Langleye9ada862015-05-11 17:20:37 -07001588 vadd.i64 d22,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001589 vshr.u64 q15,q4,#6
Adam Langleye9ada862015-05-11 17:20:37 -07001590 vsli.64 q12,q4,#45
1591 vext.8 q14,q5,q6,#8 @ X[i+1]
1592 vsli.64 q13,q4,#3
1593 veor q15,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001594 vshr.u64 q12,q14,#1
Adam Langleye9ada862015-05-11 17:20:37 -07001595 veor q15,q13 @ sigma1(X[i+14])
Adam Langleyd9e397b2015-01-22 14:27:53 -08001596 vshr.u64 q13,q14,#8
1597 vadd.i64 q5,q15
1598 vshr.u64 q15,q14,#7
Adam Langleye9ada862015-05-11 17:20:37 -07001599 vsli.64 q12,q14,#63
1600 vsli.64 q13,q14,#56
1601 vext.8 q14,q1,q2,#8 @ X[i+9]
1602 veor q15,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001603 vshr.u64 d24,d18,#14 @ from NEON_00_15
1604 vadd.i64 q5,q14
1605 vshr.u64 d25,d18,#18 @ from NEON_00_15
Adam Langleye9ada862015-05-11 17:20:37 -07001606 veor q15,q13 @ sigma0(X[i+1])
Adam Langleyd9e397b2015-01-22 14:27:53 -08001607 vshr.u64 d26,d18,#41 @ from NEON_00_15
1608 vadd.i64 q5,q15
Adam Langleye9ada862015-05-11 17:20:37 -07001609 vld1.64 {d28},[r3,:64]! @ K[i++]
1610 vsli.64 d24,d18,#50
1611 vsli.64 d25,d18,#46
1612 vmov d29,d18
1613 vsli.64 d26,d18,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001614#if 26<16 && defined(__ARMEL__)
1615 vrev64.8 ,
1616#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001617 veor d25,d24
1618 vbsl d29,d19,d20 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001619 vshr.u64 d24,d22,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001620 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001621 vadd.i64 d27,d29,d21
1622 vshr.u64 d25,d22,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001623 vsli.64 d24,d22,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001624 vadd.i64 d27,d26
1625 vshr.u64 d26,d22,#39
1626 vadd.i64 d28,d10
Adam Langleye9ada862015-05-11 17:20:37 -07001627 vsli.64 d25,d22,#30
1628 veor d30,d22,d23
1629 vsli.64 d26,d22,#25
1630 veor d21,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001631 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001632 vbsl d30,d16,d23 @ Maj(a,b,c)
1633 veor d21,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001634 vadd.i64 d17,d27
1635 vadd.i64 d30,d27
1636 @ vadd.i64 d21,d30
1637 vshr.u64 d24,d17,#14 @ 27
1638#if 27<16
Adam Langleye9ada862015-05-11 17:20:37 -07001639 vld1.64 {d11},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -08001640#endif
1641 vshr.u64 d25,d17,#18
1642#if 27>0
Adam Langleye9ada862015-05-11 17:20:37 -07001643 vadd.i64 d21,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001644#endif
1645 vshr.u64 d26,d17,#41
Adam Langleye9ada862015-05-11 17:20:37 -07001646 vld1.64 {d28},[r3,:64]! @ K[i++]
1647 vsli.64 d24,d17,#50
1648 vsli.64 d25,d17,#46
1649 vmov d29,d17
1650 vsli.64 d26,d17,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001651#if 27<16 && defined(__ARMEL__)
1652 vrev64.8 ,
1653#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001654 veor d25,d24
1655 vbsl d29,d18,d19 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001656 vshr.u64 d24,d21,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001657 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001658 vadd.i64 d27,d29,d20
1659 vshr.u64 d25,d21,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001660 vsli.64 d24,d21,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001661 vadd.i64 d27,d26
1662 vshr.u64 d26,d21,#39
1663 vadd.i64 d28,d11
Adam Langleye9ada862015-05-11 17:20:37 -07001664 vsli.64 d25,d21,#30
1665 veor d30,d21,d22
1666 vsli.64 d26,d21,#25
1667 veor d20,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001668 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001669 vbsl d30,d23,d22 @ Maj(a,b,c)
1670 veor d20,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001671 vadd.i64 d16,d27
1672 vadd.i64 d30,d27
1673 @ vadd.i64 d20,d30
1674 vshr.u64 q12,q5,#19
1675 vshr.u64 q13,q5,#61
Adam Langleye9ada862015-05-11 17:20:37 -07001676 vadd.i64 d20,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001677 vshr.u64 q15,q5,#6
Adam Langleye9ada862015-05-11 17:20:37 -07001678 vsli.64 q12,q5,#45
1679 vext.8 q14,q6,q7,#8 @ X[i+1]
1680 vsli.64 q13,q5,#3
1681 veor q15,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001682 vshr.u64 q12,q14,#1
Adam Langleye9ada862015-05-11 17:20:37 -07001683 veor q15,q13 @ sigma1(X[i+14])
Adam Langleyd9e397b2015-01-22 14:27:53 -08001684 vshr.u64 q13,q14,#8
1685 vadd.i64 q6,q15
1686 vshr.u64 q15,q14,#7
Adam Langleye9ada862015-05-11 17:20:37 -07001687 vsli.64 q12,q14,#63
1688 vsli.64 q13,q14,#56
1689 vext.8 q14,q2,q3,#8 @ X[i+9]
1690 veor q15,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001691 vshr.u64 d24,d16,#14 @ from NEON_00_15
1692 vadd.i64 q6,q14
1693 vshr.u64 d25,d16,#18 @ from NEON_00_15
Adam Langleye9ada862015-05-11 17:20:37 -07001694 veor q15,q13 @ sigma0(X[i+1])
Adam Langleyd9e397b2015-01-22 14:27:53 -08001695 vshr.u64 d26,d16,#41 @ from NEON_00_15
1696 vadd.i64 q6,q15
Adam Langleye9ada862015-05-11 17:20:37 -07001697 vld1.64 {d28},[r3,:64]! @ K[i++]
1698 vsli.64 d24,d16,#50
1699 vsli.64 d25,d16,#46
1700 vmov d29,d16
1701 vsli.64 d26,d16,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001702#if 28<16 && defined(__ARMEL__)
1703 vrev64.8 ,
1704#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001705 veor d25,d24
1706 vbsl d29,d17,d18 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001707 vshr.u64 d24,d20,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001708 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001709 vadd.i64 d27,d29,d19
1710 vshr.u64 d25,d20,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001711 vsli.64 d24,d20,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001712 vadd.i64 d27,d26
1713 vshr.u64 d26,d20,#39
1714 vadd.i64 d28,d12
Adam Langleye9ada862015-05-11 17:20:37 -07001715 vsli.64 d25,d20,#30
1716 veor d30,d20,d21
1717 vsli.64 d26,d20,#25
1718 veor d19,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001719 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001720 vbsl d30,d22,d21 @ Maj(a,b,c)
1721 veor d19,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001722 vadd.i64 d23,d27
1723 vadd.i64 d30,d27
1724 @ vadd.i64 d19,d30
1725 vshr.u64 d24,d23,#14 @ 29
1726#if 29<16
Adam Langleye9ada862015-05-11 17:20:37 -07001727 vld1.64 {d13},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -08001728#endif
1729 vshr.u64 d25,d23,#18
1730#if 29>0
Adam Langleye9ada862015-05-11 17:20:37 -07001731 vadd.i64 d19,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001732#endif
1733 vshr.u64 d26,d23,#41
Adam Langleye9ada862015-05-11 17:20:37 -07001734 vld1.64 {d28},[r3,:64]! @ K[i++]
1735 vsli.64 d24,d23,#50
1736 vsli.64 d25,d23,#46
1737 vmov d29,d23
1738 vsli.64 d26,d23,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001739#if 29<16 && defined(__ARMEL__)
1740 vrev64.8 ,
1741#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001742 veor d25,d24
1743 vbsl d29,d16,d17 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001744 vshr.u64 d24,d19,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001745 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001746 vadd.i64 d27,d29,d18
1747 vshr.u64 d25,d19,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001748 vsli.64 d24,d19,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001749 vadd.i64 d27,d26
1750 vshr.u64 d26,d19,#39
1751 vadd.i64 d28,d13
Adam Langleye9ada862015-05-11 17:20:37 -07001752 vsli.64 d25,d19,#30
1753 veor d30,d19,d20
1754 vsli.64 d26,d19,#25
1755 veor d18,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001756 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001757 vbsl d30,d21,d20 @ Maj(a,b,c)
1758 veor d18,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001759 vadd.i64 d22,d27
1760 vadd.i64 d30,d27
1761 @ vadd.i64 d18,d30
1762 vshr.u64 q12,q6,#19
1763 vshr.u64 q13,q6,#61
Adam Langleye9ada862015-05-11 17:20:37 -07001764 vadd.i64 d18,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001765 vshr.u64 q15,q6,#6
Adam Langleye9ada862015-05-11 17:20:37 -07001766 vsli.64 q12,q6,#45
1767 vext.8 q14,q7,q0,#8 @ X[i+1]
1768 vsli.64 q13,q6,#3
1769 veor q15,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001770 vshr.u64 q12,q14,#1
Adam Langleye9ada862015-05-11 17:20:37 -07001771 veor q15,q13 @ sigma1(X[i+14])
Adam Langleyd9e397b2015-01-22 14:27:53 -08001772 vshr.u64 q13,q14,#8
1773 vadd.i64 q7,q15
1774 vshr.u64 q15,q14,#7
Adam Langleye9ada862015-05-11 17:20:37 -07001775 vsli.64 q12,q14,#63
1776 vsli.64 q13,q14,#56
1777 vext.8 q14,q3,q4,#8 @ X[i+9]
1778 veor q15,q12
Adam Langleyd9e397b2015-01-22 14:27:53 -08001779 vshr.u64 d24,d22,#14 @ from NEON_00_15
1780 vadd.i64 q7,q14
1781 vshr.u64 d25,d22,#18 @ from NEON_00_15
Adam Langleye9ada862015-05-11 17:20:37 -07001782 veor q15,q13 @ sigma0(X[i+1])
Adam Langleyd9e397b2015-01-22 14:27:53 -08001783 vshr.u64 d26,d22,#41 @ from NEON_00_15
1784 vadd.i64 q7,q15
Adam Langleye9ada862015-05-11 17:20:37 -07001785 vld1.64 {d28},[r3,:64]! @ K[i++]
1786 vsli.64 d24,d22,#50
1787 vsli.64 d25,d22,#46
1788 vmov d29,d22
1789 vsli.64 d26,d22,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001790#if 30<16 && defined(__ARMEL__)
1791 vrev64.8 ,
1792#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001793 veor d25,d24
1794 vbsl d29,d23,d16 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001795 vshr.u64 d24,d18,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001796 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001797 vadd.i64 d27,d29,d17
1798 vshr.u64 d25,d18,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001799 vsli.64 d24,d18,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001800 vadd.i64 d27,d26
1801 vshr.u64 d26,d18,#39
1802 vadd.i64 d28,d14
Adam Langleye9ada862015-05-11 17:20:37 -07001803 vsli.64 d25,d18,#30
1804 veor d30,d18,d19
1805 vsli.64 d26,d18,#25
1806 veor d17,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001807 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001808 vbsl d30,d20,d19 @ Maj(a,b,c)
1809 veor d17,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001810 vadd.i64 d21,d27
1811 vadd.i64 d30,d27
1812 @ vadd.i64 d17,d30
1813 vshr.u64 d24,d21,#14 @ 31
1814#if 31<16
Adam Langleye9ada862015-05-11 17:20:37 -07001815 vld1.64 {d15},[r1]! @ handles unaligned
Adam Langleyd9e397b2015-01-22 14:27:53 -08001816#endif
1817 vshr.u64 d25,d21,#18
1818#if 31>0
Adam Langleye9ada862015-05-11 17:20:37 -07001819 vadd.i64 d17,d30 @ h+=Maj from the past
Adam Langleyd9e397b2015-01-22 14:27:53 -08001820#endif
1821 vshr.u64 d26,d21,#41
Adam Langleye9ada862015-05-11 17:20:37 -07001822 vld1.64 {d28},[r3,:64]! @ K[i++]
1823 vsli.64 d24,d21,#50
1824 vsli.64 d25,d21,#46
1825 vmov d29,d21
1826 vsli.64 d26,d21,#23
Adam Langleyd9e397b2015-01-22 14:27:53 -08001827#if 31<16 && defined(__ARMEL__)
1828 vrev64.8 ,
1829#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001830 veor d25,d24
1831 vbsl d29,d22,d23 @ Ch(e,f,g)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001832 vshr.u64 d24,d17,#28
Adam Langleye9ada862015-05-11 17:20:37 -07001833 veor d26,d25 @ Sigma1(e)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001834 vadd.i64 d27,d29,d16
1835 vshr.u64 d25,d17,#34
Adam Langleye9ada862015-05-11 17:20:37 -07001836 vsli.64 d24,d17,#36
Adam Langleyd9e397b2015-01-22 14:27:53 -08001837 vadd.i64 d27,d26
1838 vshr.u64 d26,d17,#39
1839 vadd.i64 d28,d15
Adam Langleye9ada862015-05-11 17:20:37 -07001840 vsli.64 d25,d17,#30
1841 veor d30,d17,d18
1842 vsli.64 d26,d17,#25
1843 veor d16,d24,d25
Adam Langleyd9e397b2015-01-22 14:27:53 -08001844 vadd.i64 d27,d28
Adam Langleye9ada862015-05-11 17:20:37 -07001845 vbsl d30,d19,d18 @ Maj(a,b,c)
1846 veor d16,d26 @ Sigma0(a)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001847 vadd.i64 d20,d27
1848 vadd.i64 d30,d27
1849 @ vadd.i64 d16,d30
Adam Langleye9ada862015-05-11 17:20:37 -07001850 bne .L16_79_neon
Adam Langleyd9e397b2015-01-22 14:27:53 -08001851
Adam Langleye9ada862015-05-11 17:20:37 -07001852 vadd.i64 d16,d30 @ h+=Maj from the past
1853 vldmia r0,{d24,d25,d26,d27,d28,d29,d30,d31} @ load context to temp
Adam Langleyd9e397b2015-01-22 14:27:53 -08001854 vadd.i64 q8,q12 @ vectorized accumulate
1855 vadd.i64 q9,q13
1856 vadd.i64 q10,q14
1857 vadd.i64 q11,q15
Adam Langleye9ada862015-05-11 17:20:37 -07001858 vstmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ save context
1859 teq r1,r2
1860 sub r3,#640 @ rewind K512
1861 bne .Loop_neon
Adam Langleyd9e397b2015-01-22 14:27:53 -08001862
Adam Langleye9ada862015-05-11 17:20:37 -07001863 VFP_ABI_POP
Adam Langleyd9e397b2015-01-22 14:27:53 -08001864 bx lr @ .word 0xe12fff1e
Adam Langleye9ada862015-05-11 17:20:37 -07001865.size sha512_block_data_order_neon,.-sha512_block_data_order_neon
Adam Langleyd9e397b2015-01-22 14:27:53 -08001866#endif
Adam Langleye9ada862015-05-11 17:20:37 -07001867.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
Adam Langleyd9e397b2015-01-22 14:27:53 -08001868.align 2
Adam Langleye9ada862015-05-11 17:20:37 -07001869.align 2
1870#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
Adam Langleyd9e397b2015-01-22 14:27:53 -08001871.comm OPENSSL_armcap_P,4,4
Adam Langley13066f12015-02-13 14:47:35 -08001872.hidden OPENSSL_armcap_P
Adam Langleyd9e397b2015-01-22 14:27:53 -08001873#endif
David Benjamin4969cc92016-04-22 15:02:23 -04001874#endif