blob: 8693b82a1345e24be41452821707008c0cf4140a [file] [log] [blame]
#if defined(__i386__)
.file "ghash-x86.S"
.text
.globl _gcm_gmult_4bit_x86
.private_extern _gcm_gmult_4bit_x86
.align 4
_gcm_gmult_4bit_x86:
L_gcm_gmult_4bit_x86_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
subl $84,%esp
movl 104(%esp),%edi
movl 108(%esp),%esi
movl (%edi),%ebp
movl 4(%edi),%edx
movl 8(%edi),%ecx
movl 12(%edi),%ebx
movl $0,16(%esp)
movl $471859200,20(%esp)
movl $943718400,24(%esp)
movl $610271232,28(%esp)
movl $1887436800,32(%esp)
movl $1822425088,36(%esp)
movl $1220542464,40(%esp)
movl $1423966208,44(%esp)
movl $3774873600,48(%esp)
movl $4246732800,52(%esp)
movl $3644850176,56(%esp)
movl $3311403008,60(%esp)
movl $2441084928,64(%esp)
movl $2376073216,68(%esp)
movl $2847932416,72(%esp)
movl $3051356160,76(%esp)
movl %ebp,(%esp)
movl %edx,4(%esp)
movl %ecx,8(%esp)
movl %ebx,12(%esp)
shrl $20,%ebx
andl $240,%ebx
movl 4(%esi,%ebx,1),%ebp
movl (%esi,%ebx,1),%edx
movl 12(%esi,%ebx,1),%ecx
movl 8(%esi,%ebx,1),%ebx
xorl %eax,%eax
movl $15,%edi
jmp L000x86_loop
.align 4,0x90
L000x86_loop:
movb %bl,%al
shrdl $4,%ecx,%ebx
andb $15,%al
shrdl $4,%edx,%ecx
shrdl $4,%ebp,%edx
shrl $4,%ebp
xorl 16(%esp,%eax,4),%ebp
movb (%esp,%edi,1),%al
andb $240,%al
xorl 8(%esi,%eax,1),%ebx
xorl 12(%esi,%eax,1),%ecx
xorl (%esi,%eax,1),%edx
xorl 4(%esi,%eax,1),%ebp
decl %edi
js L001x86_break
movb %bl,%al
shrdl $4,%ecx,%ebx
andb $15,%al
shrdl $4,%edx,%ecx
shrdl $4,%ebp,%edx
shrl $4,%ebp
xorl 16(%esp,%eax,4),%ebp
movb (%esp,%edi,1),%al
shlb $4,%al
xorl 8(%esi,%eax,1),%ebx
xorl 12(%esi,%eax,1),%ecx
xorl (%esi,%eax,1),%edx
xorl 4(%esi,%eax,1),%ebp
jmp L000x86_loop
.align 4,0x90
L001x86_break:
bswap %ebx
bswap %ecx
bswap %edx
bswap %ebp
movl 104(%esp),%edi
movl %ebx,12(%edi)
movl %ecx,8(%edi)
movl %edx,4(%edi)
movl %ebp,(%edi)
addl $84,%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _gcm_ghash_4bit_x86
.private_extern _gcm_ghash_4bit_x86
.align 4
_gcm_ghash_4bit_x86:
L_gcm_ghash_4bit_x86_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
subl $84,%esp
movl 104(%esp),%ebx
movl 108(%esp),%esi
movl 112(%esp),%edi
movl 116(%esp),%ecx
addl %edi,%ecx
movl %ecx,116(%esp)
movl (%ebx),%ebp
movl 4(%ebx),%edx
movl 8(%ebx),%ecx
movl 12(%ebx),%ebx
movl $0,16(%esp)
movl $471859200,20(%esp)
movl $943718400,24(%esp)
movl $610271232,28(%esp)
movl $1887436800,32(%esp)
movl $1822425088,36(%esp)
movl $1220542464,40(%esp)
movl $1423966208,44(%esp)
movl $3774873600,48(%esp)
movl $4246732800,52(%esp)
movl $3644850176,56(%esp)
movl $3311403008,60(%esp)
movl $2441084928,64(%esp)
movl $2376073216,68(%esp)
movl $2847932416,72(%esp)
movl $3051356160,76(%esp)
.align 4,0x90
L002x86_outer_loop:
xorl 12(%edi),%ebx
xorl 8(%edi),%ecx
xorl 4(%edi),%edx
xorl (%edi),%ebp
movl %ebx,12(%esp)
movl %ecx,8(%esp)
movl %edx,4(%esp)
movl %ebp,(%esp)
shrl $20,%ebx
andl $240,%ebx
movl 4(%esi,%ebx,1),%ebp
movl (%esi,%ebx,1),%edx
movl 12(%esi,%ebx,1),%ecx
movl 8(%esi,%ebx,1),%ebx
xorl %eax,%eax
movl $15,%edi
jmp L003x86_loop
.align 4,0x90
L003x86_loop:
movb %bl,%al
shrdl $4,%ecx,%ebx
andb $15,%al
shrdl $4,%edx,%ecx
shrdl $4,%ebp,%edx
shrl $4,%ebp
xorl 16(%esp,%eax,4),%ebp
movb (%esp,%edi,1),%al
andb $240,%al
xorl 8(%esi,%eax,1),%ebx
xorl 12(%esi,%eax,1),%ecx
xorl (%esi,%eax,1),%edx
xorl 4(%esi,%eax,1),%ebp
decl %edi
js L004x86_break
movb %bl,%al
shrdl $4,%ecx,%ebx
andb $15,%al
shrdl $4,%edx,%ecx
shrdl $4,%ebp,%edx
shrl $4,%ebp
xorl 16(%esp,%eax,4),%ebp
movb (%esp,%edi,1),%al
shlb $4,%al
xorl 8(%esi,%eax,1),%ebx
xorl 12(%esi,%eax,1),%ecx
xorl (%esi,%eax,1),%edx
xorl 4(%esi,%eax,1),%ebp
jmp L003x86_loop
.align 4,0x90
L004x86_break:
bswap %ebx
bswap %ecx
bswap %edx
bswap %ebp
movl 112(%esp),%edi
leal 16(%edi),%edi
cmpl 116(%esp),%edi
movl %edi,112(%esp)
jb L002x86_outer_loop
movl 104(%esp),%edi
movl %ebx,12(%edi)
movl %ecx,8(%edi)
movl %edx,4(%edi)
movl %ebp,(%edi)
addl $84,%esp
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _gcm_gmult_4bit_mmx
.private_extern _gcm_gmult_4bit_mmx
.align 4
_gcm_gmult_4bit_mmx:
L_gcm_gmult_4bit_mmx_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
call L005pic_point
L005pic_point:
popl %eax
leal Lrem_4bit-L005pic_point(%eax),%eax
movzbl 15(%edi),%ebx
xorl %ecx,%ecx
movl %ebx,%edx
movb %dl,%cl
movl $14,%ebp
shlb $4,%cl
andl $240,%edx
movq 8(%esi,%ecx,1),%mm0
movq (%esi,%ecx,1),%mm1
movd %mm0,%ebx
jmp L006mmx_loop
.align 4,0x90
L006mmx_loop:
psrlq $4,%mm0
andl $15,%ebx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
movb (%edi,%ebp,1),%cl
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
decl %ebp
movd %mm0,%ebx
pxor (%esi,%edx,1),%mm1
movl %ecx,%edx
pxor %mm2,%mm0
js L007mmx_break
shlb $4,%cl
andl $15,%ebx
psrlq $4,%mm0
andl $240,%edx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%ecx,1),%mm1
pxor %mm2,%mm0
jmp L006mmx_loop
.align 4,0x90
L007mmx_break:
shlb $4,%cl
andl $15,%ebx
psrlq $4,%mm0
andl $240,%edx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%ecx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%ecx,1),%mm1
pxor %mm2,%mm0
psrlq $4,%mm0
andl $15,%ebx
movq %mm1,%mm2
psrlq $4,%mm1
pxor 8(%esi,%edx,1),%mm0
psllq $60,%mm2
pxor (%eax,%ebx,8),%mm1
movd %mm0,%ebx
pxor (%esi,%edx,1),%mm1
pxor %mm2,%mm0
psrlq $32,%mm0
movd %mm1,%edx
psrlq $32,%mm1
movd %mm0,%ecx
movd %mm1,%ebp
bswap %ebx
bswap %edx
bswap %ecx
bswap %ebp
emms
movl %ebx,12(%edi)
movl %edx,4(%edi)
movl %ecx,8(%edi)
movl %ebp,(%edi)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _gcm_ghash_4bit_mmx
.private_extern _gcm_ghash_4bit_mmx
.align 4
_gcm_ghash_4bit_mmx:
L_gcm_ghash_4bit_mmx_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%eax
movl 24(%esp),%ebx
movl 28(%esp),%ecx
movl 32(%esp),%edx
movl %esp,%ebp
call L008pic_point
L008pic_point:
popl %esi
leal Lrem_8bit-L008pic_point(%esi),%esi
subl $544,%esp
andl $-64,%esp
subl $16,%esp
addl %ecx,%edx
movl %eax,544(%esp)
movl %edx,552(%esp)
movl %ebp,556(%esp)
addl $128,%ebx
leal 144(%esp),%edi
leal 400(%esp),%ebp
movl -120(%ebx),%edx
movq -120(%ebx),%mm0
movq -128(%ebx),%mm3
shll $4,%edx
movb %dl,(%esp)
movl -104(%ebx),%edx
movq -104(%ebx),%mm2
movq -112(%ebx),%mm5
movq %mm0,-128(%edi)
psrlq $4,%mm0
movq %mm3,(%edi)
movq %mm3,%mm7
psrlq $4,%mm3
shll $4,%edx
movb %dl,1(%esp)
movl -88(%ebx),%edx
movq -88(%ebx),%mm1
psllq $60,%mm7
movq -96(%ebx),%mm4
por %mm7,%mm0
movq %mm2,-120(%edi)
psrlq $4,%mm2
movq %mm5,8(%edi)
movq %mm5,%mm6
movq %mm0,-128(%ebp)
psrlq $4,%mm5
movq %mm3,(%ebp)
shll $4,%edx
movb %dl,2(%esp)
movl -72(%ebx),%edx
movq -72(%ebx),%mm0
psllq $60,%mm6
movq -80(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-112(%edi)
psrlq $4,%mm1
movq %mm4,16(%edi)
movq %mm4,%mm7
movq %mm2,-120(%ebp)
psrlq $4,%mm4
movq %mm5,8(%ebp)
shll $4,%edx
movb %dl,3(%esp)
movl -56(%ebx),%edx
movq -56(%ebx),%mm2
psllq $60,%mm7
movq -64(%ebx),%mm5
por %mm7,%mm1
movq %mm0,-104(%edi)
psrlq $4,%mm0
movq %mm3,24(%edi)
movq %mm3,%mm6
movq %mm1,-112(%ebp)
psrlq $4,%mm3
movq %mm4,16(%ebp)
shll $4,%edx
movb %dl,4(%esp)
movl -40(%ebx),%edx
movq -40(%ebx),%mm1
psllq $60,%mm6
movq -48(%ebx),%mm4
por %mm6,%mm0
movq %mm2,-96(%edi)
psrlq $4,%mm2
movq %mm5,32(%edi)
movq %mm5,%mm7
movq %mm0,-104(%ebp)
psrlq $4,%mm5
movq %mm3,24(%ebp)
shll $4,%edx
movb %dl,5(%esp)
movl -24(%ebx),%edx
movq -24(%ebx),%mm0
psllq $60,%mm7
movq -32(%ebx),%mm3
por %mm7,%mm2
movq %mm1,-88(%edi)
psrlq $4,%mm1
movq %mm4,40(%edi)
movq %mm4,%mm6
movq %mm2,-96(%ebp)
psrlq $4,%mm4
movq %mm5,32(%ebp)
shll $4,%edx
movb %dl,6(%esp)
movl -8(%ebx),%edx
movq -8(%ebx),%mm2
psllq $60,%mm6
movq -16(%ebx),%mm5
por %mm6,%mm1
movq %mm0,-80(%edi)
psrlq $4,%mm0
movq %mm3,48(%edi)
movq %mm3,%mm7
movq %mm1,-88(%ebp)
psrlq $4,%mm3
movq %mm4,40(%ebp)
shll $4,%edx
movb %dl,7(%esp)
movl 8(%ebx),%edx
movq 8(%ebx),%mm1
psllq $60,%mm7
movq (%ebx),%mm4
por %mm7,%mm0
movq %mm2,-72(%edi)
psrlq $4,%mm2
movq %mm5,56(%edi)
movq %mm5,%mm6
movq %mm0,-80(%ebp)
psrlq $4,%mm5
movq %mm3,48(%ebp)
shll $4,%edx
movb %dl,8(%esp)
movl 24(%ebx),%edx
movq 24(%ebx),%mm0
psllq $60,%mm6
movq 16(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-64(%edi)
psrlq $4,%mm1
movq %mm4,64(%edi)
movq %mm4,%mm7
movq %mm2,-72(%ebp)
psrlq $4,%mm4
movq %mm5,56(%ebp)
shll $4,%edx
movb %dl,9(%esp)
movl 40(%ebx),%edx
movq 40(%ebx),%mm2
psllq $60,%mm7
movq 32(%ebx),%mm5
por %mm7,%mm1
movq %mm0,-56(%edi)
psrlq $4,%mm0
movq %mm3,72(%edi)
movq %mm3,%mm6
movq %mm1,-64(%ebp)
psrlq $4,%mm3
movq %mm4,64(%ebp)
shll $4,%edx
movb %dl,10(%esp)
movl 56(%ebx),%edx
movq 56(%ebx),%mm1
psllq $60,%mm6
movq 48(%ebx),%mm4
por %mm6,%mm0
movq %mm2,-48(%edi)
psrlq $4,%mm2
movq %mm5,80(%edi)
movq %mm5,%mm7
movq %mm0,-56(%ebp)
psrlq $4,%mm5
movq %mm3,72(%ebp)
shll $4,%edx
movb %dl,11(%esp)
movl 72(%ebx),%edx
movq 72(%ebx),%mm0
psllq $60,%mm7
movq 64(%ebx),%mm3
por %mm7,%mm2
movq %mm1,-40(%edi)
psrlq $4,%mm1
movq %mm4,88(%edi)
movq %mm4,%mm6
movq %mm2,-48(%ebp)
psrlq $4,%mm4
movq %mm5,80(%ebp)
shll $4,%edx
movb %dl,12(%esp)
movl 88(%ebx),%edx
movq 88(%ebx),%mm2
psllq $60,%mm6
movq 80(%ebx),%mm5
por %mm6,%mm1
movq %mm0,-32(%edi)
psrlq $4,%mm0
movq %mm3,96(%edi)
movq %mm3,%mm7
movq %mm1,-40(%ebp)
psrlq $4,%mm3
movq %mm4,88(%ebp)
shll $4,%edx
movb %dl,13(%esp)
movl 104(%ebx),%edx
movq 104(%ebx),%mm1
psllq $60,%mm7
movq 96(%ebx),%mm4
por %mm7,%mm0
movq %mm2,-24(%edi)
psrlq $4,%mm2
movq %mm5,104(%edi)
movq %mm5,%mm6
movq %mm0,-32(%ebp)
psrlq $4,%mm5
movq %mm3,96(%ebp)
shll $4,%edx
movb %dl,14(%esp)
movl 120(%ebx),%edx
movq 120(%ebx),%mm0
psllq $60,%mm6
movq 112(%ebx),%mm3
por %mm6,%mm2
movq %mm1,-16(%edi)
psrlq $4,%mm1
movq %mm4,112(%edi)
movq %mm4,%mm7
movq %mm2,-24(%ebp)
psrlq $4,%mm4
movq %mm5,104(%ebp)
shll $4,%edx
movb %dl,15(%esp)
psllq $60,%mm7
por %mm7,%mm1
movq %mm0,-8(%edi)
psrlq $4,%mm0
movq %mm3,120(%edi)
movq %mm3,%mm6
movq %mm1,-16(%ebp)
psrlq $4,%mm3
movq %mm4,112(%ebp)
psllq $60,%mm6
por %mm6,%mm0
movq %mm0,-8(%ebp)
movq %mm3,120(%ebp)
movq (%eax),%mm6
movl 8(%eax),%ebx
movl 12(%eax),%edx
.align 4,0x90
L009outer:
xorl 12(%ecx),%edx
xorl 8(%ecx),%ebx
pxor (%ecx),%mm6
leal 16(%ecx),%ecx
movl %ebx,536(%esp)
movq %mm6,528(%esp)
movl %ecx,548(%esp)
xorl %eax,%eax
roll $8,%edx
movb %dl,%al
movl %eax,%ebp
andb $15,%al
shrl $4,%ebp
pxor %mm0,%mm0
roll $8,%edx
pxor %mm1,%mm1
pxor %mm2,%mm2
movq 16(%esp,%eax,8),%mm7
movq 144(%esp,%eax,8),%mm6
movb %dl,%al
movd %mm7,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
shrl $4,%edi
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 536(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 532(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 528(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm1,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm0
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
xorb (%esp,%ebp,1),%bl
movb %dl,%al
movd %mm7,%ecx
movzbl %bl,%ebx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%ebp
psrlq $8,%mm6
pxor 272(%esp,%edi,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm0,%mm6
shrl $4,%ebp
pinsrw $2,(%esi,%ebx,2),%mm2
pxor 16(%esp,%eax,8),%mm7
roll $8,%edx
pxor 144(%esp,%eax,8),%mm6
pxor %mm3,%mm7
pxor 400(%esp,%edi,8),%mm6
xorb (%esp,%edi,1),%cl
movb %dl,%al
movl 524(%esp),%edx
movd %mm7,%ebx
movzbl %cl,%ecx
psrlq $8,%mm7
movq %mm6,%mm3
movl %eax,%edi
psrlq $8,%mm6
pxor 272(%esp,%ebp,8),%mm7
andb $15,%al
psllq $56,%mm3
pxor %mm2,%mm6
shrl $4,%edi
pinsrw $2,(%esi,%ecx,2),%mm1
pxor 16(%esp,%eax,8),%mm7
pxor 144(%esp,%eax,8),%mm6
xorb (%esp,%ebp,1),%bl
pxor %mm3,%mm7
pxor 400(%esp,%ebp,8),%mm6
movzbl %bl,%ebx
pxor %mm2,%mm2
psllq $4,%mm1
movd %mm7,%ecx
psrlq $4,%mm7
movq %mm6,%mm3
psrlq $4,%mm6
shll $4,%ecx
pxor 16(%esp,%edi,8),%mm7
psllq $60,%mm3
movzbl %cl,%ecx
pxor %mm3,%mm7
pxor 144(%esp,%edi,8),%mm6
pinsrw $2,(%esi,%ebx,2),%mm0
pxor %mm1,%mm6
movd %mm7,%edx
pinsrw $3,(%esi,%ecx,2),%mm2
psllq $12,%mm0
pxor %mm0,%mm6
psrlq $32,%mm7
pxor %mm2,%mm6
movl 548(%esp),%ecx
movd %mm7,%ebx
movq %mm6,%mm3
psllw $8,%mm6
psrlw $8,%mm3
por %mm3,%mm6
bswap %edx
pshufw $27,%mm6,%mm6
bswap %ebx
cmpl 552(%esp),%ecx
jne L009outer
movl 544(%esp),%eax
movl %edx,12(%eax)
movl %ebx,8(%eax)
movq %mm6,(%eax)
movl 556(%esp),%esp
emms
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.globl _gcm_init_clmul
.private_extern _gcm_init_clmul
.align 4
_gcm_init_clmul:
L_gcm_init_clmul_begin:
movl 4(%esp),%edx
movl 8(%esp),%eax
call L010pic
L010pic:
popl %ecx
leal Lbswap-L010pic(%ecx),%ecx
movdqu (%eax),%xmm2
pshufd $78,%xmm2,%xmm2
pshufd $255,%xmm2,%xmm4
movdqa %xmm2,%xmm3
psllq $1,%xmm2
pxor %xmm5,%xmm5
psrlq $63,%xmm3
pcmpgtd %xmm4,%xmm5
pslldq $8,%xmm3
por %xmm3,%xmm2
pand 16(%ecx),%xmm5
pxor %xmm5,%xmm2
movdqa %xmm2,%xmm0
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pshufd $78,%xmm2,%xmm4
pxor %xmm0,%xmm3
pxor %xmm2,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
pshufd $78,%xmm2,%xmm3
pshufd $78,%xmm0,%xmm4
pxor %xmm2,%xmm3
movdqu %xmm2,(%edx)
pxor %xmm0,%xmm4
movdqu %xmm0,16(%edx)
.byte 102,15,58,15,227,8
movdqu %xmm4,32(%edx)
ret
.globl _gcm_gmult_clmul
.private_extern _gcm_gmult_clmul
.align 4
_gcm_gmult_clmul:
L_gcm_gmult_clmul_begin:
movl 4(%esp),%eax
movl 8(%esp),%edx
call L011pic
L011pic:
popl %ecx
leal Lbswap-L011pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movups (%edx),%xmm2
.byte 102,15,56,0,197
movups 32(%edx),%xmm4
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
ret
.globl _gcm_ghash_clmul
.private_extern _gcm_ghash_clmul
.align 4
_gcm_ghash_clmul:
L_gcm_ghash_clmul_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%eax
movl 24(%esp),%edx
movl 28(%esp),%esi
movl 32(%esp),%ebx
call L012pic
L012pic:
popl %ecx
leal Lbswap-L012pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movdqu (%edx),%xmm2
.byte 102,15,56,0,197
subl $16,%ebx
jz L013odd_tail
movdqu (%esi),%xmm3
movdqu 16(%esi),%xmm6
.byte 102,15,56,0,221
.byte 102,15,56,0,245
movdqu 32(%edx),%xmm5
pxor %xmm3,%xmm0
pshufd $78,%xmm6,%xmm3
movdqa %xmm6,%xmm7
pxor %xmm6,%xmm3
leal 32(%esi),%esi
.byte 102,15,58,68,242,0
.byte 102,15,58,68,250,17
.byte 102,15,58,68,221,0
movups 16(%edx),%xmm2
nop
subl $32,%ebx
jbe L014even_tail
jmp L015mod_loop
.align 5,0x90
L015mod_loop:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
nop
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,229,16
movups (%edx),%xmm2
xorps %xmm6,%xmm0
movdqa (%ecx),%xmm5
xorps %xmm7,%xmm1
movdqu (%esi),%xmm7
pxor %xmm0,%xmm3
movdqu 16(%esi),%xmm6
pxor %xmm1,%xmm3
.byte 102,15,56,0,253
pxor %xmm3,%xmm4
movdqa %xmm4,%xmm3
psrldq $8,%xmm4
pslldq $8,%xmm3
pxor %xmm4,%xmm1
pxor %xmm3,%xmm0
.byte 102,15,56,0,245
pxor %xmm7,%xmm1
movdqa %xmm6,%xmm7
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
.byte 102,15,58,68,242,0
movups 32(%edx),%xmm5
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
pshufd $78,%xmm7,%xmm3
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm7,%xmm3
pxor %xmm4,%xmm1
.byte 102,15,58,68,250,17
movups 16(%edx),%xmm2
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.byte 102,15,58,68,221,0
leal 32(%esi),%esi
subl $32,%ebx
ja L015mod_loop
L014even_tail:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,229,16
movdqa (%ecx),%xmm5
xorps %xmm6,%xmm0
xorps %xmm7,%xmm1
pxor %xmm0,%xmm3
pxor %xmm1,%xmm3
pxor %xmm3,%xmm4
movdqa %xmm4,%xmm3
psrldq $8,%xmm4
pslldq $8,%xmm3
pxor %xmm4,%xmm1
pxor %xmm3,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
testl %ebx,%ebx
jnz L016done
movups (%edx),%xmm2
L013odd_tail:
movdqu (%esi),%xmm3
.byte 102,15,56,0,221
pxor %xmm3,%xmm0
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pshufd $78,%xmm2,%xmm4
pxor %xmm0,%xmm3
pxor %xmm2,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
L016done:
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.align 6,0x90
Lbswap:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
.align 6,0x90
Lrem_8bit:
.value 0,450,900,582,1800,1738,1164,1358
.value 3600,4050,3476,3158,2328,2266,2716,2910
.value 7200,7650,8100,7782,6952,6890,6316,6510
.value 4656,5106,4532,4214,5432,5370,5820,6014
.value 14400,14722,15300,14854,16200,16010,15564,15630
.value 13904,14226,13780,13334,12632,12442,13020,13086
.value 9312,9634,10212,9766,9064,8874,8428,8494
.value 10864,11186,10740,10294,11640,11450,12028,12094
.value 28800,28994,29444,29382,30600,30282,29708,30158
.value 32400,32594,32020,31958,31128,30810,31260,31710
.value 27808,28002,28452,28390,27560,27242,26668,27118
.value 25264,25458,24884,24822,26040,25722,26172,26622
.value 18624,18690,19268,19078,20424,19978,19532,19854
.value 18128,18194,17748,17558,16856,16410,16988,17310
.value 21728,21794,22372,22182,21480,21034,20588,20910
.value 23280,23346,22900,22710,24056,23610,24188,24510
.value 57600,57538,57988,58182,58888,59338,58764,58446
.value 61200,61138,60564,60758,59416,59866,60316,59998
.value 64800,64738,65188,65382,64040,64490,63916,63598
.value 62256,62194,61620,61814,62520,62970,63420,63102
.value 55616,55426,56004,56070,56904,57226,56780,56334
.value 55120,54930,54484,54550,53336,53658,54236,53790
.value 50528,50338,50916,50982,49768,50090,49644,49198
.value 52080,51890,51444,51510,52344,52666,53244,52798
.value 37248,36930,37380,37830,38536,38730,38156,38094
.value 40848,40530,39956,40406,39064,39258,39708,39646
.value 36256,35938,36388,36838,35496,35690,35116,35054
.value 33712,33394,32820,33270,33976,34170,34620,34558
.value 43456,43010,43588,43910,44744,44810,44364,44174
.value 42960,42514,42068,42390,41176,41242,41820,41630
.value 46560,46114,46692,47014,45800,45866,45420,45230
.value 48112,47666,47220,47542,48376,48442,49020,48830
.align 6,0x90
Lrem_4bit:
.long 0,0,0,471859200,0,943718400,0,610271232
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
.byte 0
#endif