blob: caa9e2964015fa4aaabcb1595fdf8a3921406027 [file] [log] [blame]
; vim:filetype=nasm ts=8
; libFLAC - Free Lossless Audio Codec library
; Copyright (C) 2001-2009 Josh Coalson
; Copyright (C) 2011-2014 Xiph.Org Foundation
;
; Redistribution and use in source and binary forms, with or without
; modification, are permitted provided that the following conditions
; are met:
;
; - Redistributions of source code must retain the above copyright
; notice, this list of conditions and the following disclaimer.
;
; - Redistributions in binary form must reproduce the above copyright
; notice, this list of conditions and the following disclaimer in the
; documentation and/or other materials provided with the distribution.
;
; - Neither the name of the Xiph.org Foundation nor the names of its
; contributors may be used to endorse or promote products derived from
; this software without specific prior written permission.
;
; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
; ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
; [CR] is a note to flag that the instruction can be easily reordered
%include "nasm.h"
data_section
cglobal FLAC__lpc_compute_autocorrelation_asm
code_section
; **********************************************************************
;
; void FLAC__lpc_compute_autocorrelation_asm(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
; {
; FLAC__real d;
; unsigned sample, coeff;
; const unsigned limit = data_len - lag;
;
; assert(lag > 0);
; assert(lag <= data_len);
;
; for(coeff = 0; coeff < lag; coeff++)
; autoc[coeff] = 0.0;
; for(sample = 0; sample <= limit; sample++){
; d = data[sample];
; for(coeff = 0; coeff < lag; coeff++)
; autoc[coeff] += d * data[sample+coeff];
; }
; for(; sample < data_len; sample++){
; d = data[sample];
; for(coeff = 0; coeff < data_len - sample; coeff++)
; autoc[coeff] += d * data[sample+coeff];
; }
; }
;
FLAC__lpc_compute_autocorrelation_asm:
push ebp
lea ebp, [esp + 8]
push ebx
push esi
push edi
mov edx, [ebp + 8] ; edx == lag
mov ecx, [ebp + 4] ; ecx == data_len
mov esi, [ebp] ; esi == data
mov edi, [ebp + 12] ; edi == autoc
cmp edx, 1
ja short .lag_above_1
.lag_eq_1:
fldz ; will accumulate autoc[0]
ALIGN 16
.lag_1_loop:
fld dword [esi]
add esi, byte 4 ; sample++
fmul st0, st0
faddp st1, st0
dec ecx
jnz .lag_1_loop
fstp dword [edi]
jmp .end
.lag_above_1:
cmp edx, 2
ja short .lag_above_2
.lag_eq_2:
fldz ; will accumulate autoc[1]
dec ecx
fldz ; will accumulate autoc[0]
fld dword [esi]
ALIGN 16
.lag_2_loop:
add esi, byte 4 ; [CR] sample++
fld st0
fmul st0, st0
faddp st2, st0 ; add to autoc[0]
fld dword [esi]
fmul st1, st0
fxch
faddp st3, st0 ; add to autoc[1]
dec ecx
jnz .lag_2_loop
; clean up the leftovers
fmul st0, st0
faddp st1, st0 ; add to autoc[0]
fstp dword [edi]
fstp dword [edi + 4]
jmp .end
.lag_above_2:
cmp edx, 3
ja short .lag_above_3
.lag_eq_3:
fldz ; will accumulate autoc[2]
dec ecx
fldz ; will accumulate autoc[1]
dec ecx
fldz ; will accumulate autoc[0]
ALIGN 16
.lag_3_loop:
fld dword [esi]
fld st0
fmul st0, st0
faddp st2, st0 ; add to autoc[0]
fld dword [esi + 4]
fmul st0, st1
faddp st3, st0 ; add to autoc[1]
fld dword [esi + 8]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st3, st0 ; add to autoc[2]
dec ecx
jnz .lag_3_loop
; clean up the leftovers
fld dword [esi]
fld st0
fmul st0, st0
faddp st2, st0 ; add to autoc[0]
fld dword [esi + 4]
fmul st1, st0
fxch
faddp st3, st0 ; add to autoc[1]
fmul st0, st0
faddp st1, st0 ; add to autoc[0]
fstp dword [edi]
fstp dword [edi + 4]
fstp dword [edi + 8]
jmp .end
.lag_above_3:
cmp edx, 4
ja near .lag_above_4
.lag_eq_4:
fldz ; will accumulate autoc[3]
dec ecx
fldz ; will accumulate autoc[2]
dec ecx
fldz ; will accumulate autoc[1]
dec ecx
fldz ; will accumulate autoc[0]
ALIGN 16
.lag_4_loop:
fld dword [esi]
fld st0
fmul st0, st0
faddp st2, st0 ; add to autoc[0]
fld dword [esi + 4]
fmul st0, st1
faddp st3, st0 ; add to autoc[1]
fld dword [esi + 8]
fmul st0, st1
faddp st4, st0 ; add to autoc[2]
fld dword [esi + 12]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st4, st0 ; add to autoc[3]
dec ecx
jnz .lag_4_loop
; clean up the leftovers
fld dword [esi]
fld st0
fmul st0, st0
faddp st2, st0 ; add to autoc[0]
fld dword [esi + 4]
fmul st0, st1
faddp st3, st0 ; add to autoc[1]
fld dword [esi + 8]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st3, st0 ; add to autoc[2]
fld dword [esi]
fld st0
fmul st0, st0
faddp st2, st0 ; add to autoc[0]
fld dword [esi + 4]
fmul st1, st0
fxch
faddp st3, st0 ; add to autoc[1]
fmul st0, st0
faddp st1, st0 ; add to autoc[0]
fstp dword [edi]
fstp dword [edi + 4]
fstp dword [edi + 8]
fstp dword [edi + 12]
jmp .end
.lag_above_4:
cmp edx, 5
ja near .lag_above_5
.lag_eq_5:
fldz ; will accumulate autoc[4]
fldz ; will accumulate autoc[3]
fldz ; will accumulate autoc[2]
fldz ; will accumulate autoc[1]
fldz ; will accumulate autoc[0]
sub ecx, byte 4
ALIGN 16
.lag_5_loop:
fld dword [esi]
fld st0
fmul st0, st0
faddp st2, st0 ; add to autoc[0]
fld dword [esi + 4]
fmul st0, st1
faddp st3, st0 ; add to autoc[1]
fld dword [esi + 8]
fmul st0, st1
faddp st4, st0 ; add to autoc[2]
fld dword [esi + 12]
fmul st0, st1
faddp st5, st0 ; add to autoc[3]
fld dword [esi + 16]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st5, st0 ; add to autoc[4]
dec ecx
jnz .lag_5_loop
; clean up the leftovers
fld dword [esi]
fld st0
fmul st0, st0
faddp st2, st0 ; add to autoc[0]
fld dword [esi + 4]
fmul st0, st1
faddp st3, st0 ; add to autoc[1]
fld dword [esi + 8]
fmul st0, st1
faddp st4, st0 ; add to autoc[2]
fld dword [esi + 12]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st4, st0 ; add to autoc[3]
fld dword [esi]
fld st0
fmul st0, st0
faddp st2, st0 ; add to autoc[0]
fld dword [esi + 4]
fmul st0, st1
faddp st3, st0 ; add to autoc[1]
fld dword [esi + 8]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st3, st0 ; add to autoc[2]
fld dword [esi]
fld st0
fmul st0, st0
faddp st2, st0 ; add to autoc[0]
fld dword [esi + 4]
fmul st1, st0
fxch
faddp st3, st0 ; add to autoc[1]
fmul st0, st0
faddp st1, st0 ; add to autoc[0]
fstp dword [edi]
fstp dword [edi + 4]
fstp dword [edi + 8]
fstp dword [edi + 12]
fstp dword [edi + 16]
jmp .end
.lag_above_5:
cmp edx, 6
ja .lag_above_6
.lag_eq_6:
fldz ; will accumulate autoc[5]
fldz ; will accumulate autoc[4]
fldz ; will accumulate autoc[3]
fldz ; will accumulate autoc[2]
fldz ; will accumulate autoc[1]
fldz ; will accumulate autoc[0]
sub ecx, byte 5
ALIGN 16
.lag_6_loop:
fld dword [esi]
fld st0
fmul st0, st0
faddp st2, st0 ; add to autoc[0]
fld dword [esi + 4]
fmul st0, st1
faddp st3, st0 ; add to autoc[1]
fld dword [esi + 8]
fmul st0, st1
faddp st4, st0 ; add to autoc[2]
fld dword [esi + 12]
fmul st0, st1
faddp st5, st0 ; add to autoc[3]
fld dword [esi + 16]
fmul st0, st1
faddp st6, st0 ; add to autoc[4]
fld dword [esi + 20]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st6, st0 ; add to autoc[5]
dec ecx
jnz .lag_6_loop
; clean up the leftovers
fld dword [esi]
fld st0
fmul st0, st0
faddp st2, st0 ; add to autoc[0]
fld dword [esi + 4]
fmul st0, st1
faddp st3, st0 ; add to autoc[1]
fld dword [esi + 8]
fmul st0, st1
faddp st4, st0 ; add to autoc[2]
fld dword [esi + 12]
fmul st0, st1
faddp st5, st0 ; add to autoc[3]
fld dword [esi + 16]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st5, st0 ; add to autoc[4]
fld dword [esi]
fld st0
fmul st0, st0
faddp st2, st0 ; add to autoc[0]
fld dword [esi + 4]
fmul st0, st1
faddp st3, st0 ; add to autoc[1]
fld dword [esi + 8]
fmul st0, st1
faddp st4, st0 ; add to autoc[2]
fld dword [esi + 12]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st4, st0 ; add to autoc[3]
fld dword [esi]
fld st0
fmul st0, st0
faddp st2, st0 ; add to autoc[0]
fld dword [esi + 4]
fmul st0, st1
faddp st3, st0 ; add to autoc[1]
fld dword [esi + 8]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st3, st0 ; add to autoc[2]
fld dword [esi]
fld st0
fmul st0, st0
faddp st2, st0 ; add to autoc[0]
fld dword [esi + 4]
fmul st1, st0
fxch
faddp st3, st0 ; add to autoc[1]
fmul st0, st0
faddp st1, st0 ; add to autoc[0]
fstp dword [edi]
fstp dword [edi + 4]
fstp dword [edi + 8]
fstp dword [edi + 12]
fstp dword [edi + 16]
fstp dword [edi + 20]
jmp .end
.lag_above_6:
; for(coeff = 0; coeff < lag; coeff++)
; autoc[coeff] = 0.0;
lea ecx, [edx * 2] ; ecx = # of dwords of 0 to write
xor eax, eax
rep stosd
mov ecx, [ebp + 4] ; ecx == data_len
mov edi, [ebp + 12] ; edi == autoc
; const unsigned limit = data_len - lag;
sub ecx, edx
inc ecx ; we are looping <= limit so we add one to the counter
; for(sample = 0; sample <= limit; sample++){
; d = data[sample];
; for(coeff = 0; coeff < lag; coeff++)
; autoc[coeff] += d * data[sample+coeff];
; }
xor eax, eax ; eax == sample <- 0
ALIGN 16
.outer_loop:
push eax ; save sample
fld dword [esi + eax * 4] ; ST = d <- data[sample]
mov ebx, eax ; ebx == sample+coeff <- sample
mov edx, [ebp + 8] ; edx <- lag
xor eax, eax ; eax == coeff <- 0
ALIGN 16
.inner_loop:
fld st0 ; ST = d d
fmul dword [esi + ebx * 4] ; ST = d*data[sample+coeff] d
fadd dword [edi + eax * 4] ; ST = autoc[coeff]+d*data[sample+coeff] d
fstp dword [edi + eax * 4] ; autoc[coeff]+=d*data[sample+coeff] ST = d
inc ebx ; (sample+coeff)++
inc eax ; coeff++
dec edx
jnz .inner_loop
pop eax ; restore sample
fstp st0 ; pop d, ST = empty
inc eax ; sample++
loop .outer_loop
; for(; sample < data_len; sample++){
; d = data[sample];
; for(coeff = 0; coeff < data_len - sample; coeff++)
; autoc[coeff] += d * data[sample+coeff];
; }
mov ecx, [ebp + 8] ; ecx <- lag
dec ecx ; ecx <- lag - 1
jz .outer_end ; skip loop if 0
.outer_loop2:
push eax ; save sample
fld dword [esi + eax * 4] ; ST = d <- data[sample]
mov ebx, eax ; ebx == sample+coeff <- sample
mov edx, [ebp + 4] ; edx <- data_len
sub edx, eax ; edx <- data_len-sample
xor eax, eax ; eax == coeff <- 0
.inner_loop2:
fld st0 ; ST = d d
fmul dword [esi + ebx * 4] ; ST = d*data[sample+coeff] d
fadd dword [edi + eax * 4] ; ST = autoc[coeff]+d*data[sample+coeff] d
fstp dword [edi + eax * 4] ; autoc[coeff]+=d*data[sample+coeff] ST = d
inc ebx ; (sample+coeff)++
inc eax ; coeff++
dec edx
jnz .inner_loop2
pop eax ; restore sample
fstp st0 ; pop d, ST = empty
inc eax ; sample++
loop .outer_loop2
.outer_end:
jmp .end
.lag_eq_6_plus_1:
mov ecx, [ebp + 4] ; ecx == data_len
mov esi, [ebp] ; esi == data
mov edi, [ebp + 12] ; edi == autoc
fldz ; will accumulate autoc[6]
sub ecx, byte 6
ALIGN 16
.lag_6_1_loop:
fld dword [esi]
fld dword [esi + 24]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st1, st0 ; add to autoc[6]
dec ecx
jnz .lag_6_1_loop
fstp dword [edi + 24]
jmp .end
.lag_eq_6_plus_2:
mov ecx, [ebp + 4] ; ecx == data_len
mov esi, [ebp] ; esi == data
mov edi, [ebp + 12] ; edi == autoc
fldz ; will accumulate autoc[7]
fldz ; will accumulate autoc[6]
sub ecx, byte 7
ALIGN 16
.lag_6_2_loop:
fld dword [esi]
fld dword [esi + 24]
fmul st0, st1
faddp st2, st0 ; add to autoc[6]
fld dword [esi + 28]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st2, st0 ; add to autoc[7]
dec ecx
jnz .lag_6_2_loop
; clean up the leftovers
fld dword [esi]
fld dword [esi + 24]
fmulp st1, st0
faddp st1, st0 ; add to autoc[6]
fstp dword [edi + 24]
fstp dword [edi + 28]
jmp .end
.lag_eq_6_plus_3:
mov ecx, [ebp + 4] ; ecx == data_len
mov esi, [ebp] ; esi == data
mov edi, [ebp + 12] ; edi == autoc
fldz ; will accumulate autoc[8]
fldz ; will accumulate autoc[7]
fldz ; will accumulate autoc[6]
sub ecx, byte 8
ALIGN 16
.lag_6_3_loop:
fld dword [esi]
fld dword [esi + 24]
fmul st0, st1
faddp st2, st0 ; add to autoc[6]
fld dword [esi + 28]
fmul st0, st1
faddp st3, st0 ; add to autoc[7]
fld dword [esi + 32]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st3, st0 ; add to autoc[8]
dec ecx
jnz .lag_6_3_loop
; clean up the leftovers
fld dword [esi]
fld dword [esi + 24]
fmul st0, st1
faddp st2, st0 ; add to autoc[6]
fld dword [esi + 28]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st2, st0 ; add to autoc[7]
fld dword [esi]
fld dword [esi + 24]
fmulp st1, st0
faddp st1, st0 ; add to autoc[6]
fstp dword [edi + 24]
fstp dword [edi + 28]
fstp dword [edi + 32]
jmp .end
.lag_eq_6_plus_4:
mov ecx, [ebp + 4] ; ecx == data_len
mov esi, [ebp] ; esi == data
mov edi, [ebp + 12] ; edi == autoc
fldz ; will accumulate autoc[9]
fldz ; will accumulate autoc[8]
fldz ; will accumulate autoc[7]
fldz ; will accumulate autoc[6]
sub ecx, byte 9
ALIGN 16
.lag_6_4_loop:
fld dword [esi]
fld dword [esi + 24]
fmul st0, st1
faddp st2, st0 ; add to autoc[6]
fld dword [esi + 28]
fmul st0, st1
faddp st3, st0 ; add to autoc[7]
fld dword [esi + 32]
fmul st0, st1
faddp st4, st0 ; add to autoc[8]
fld dword [esi + 36]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st4, st0 ; add to autoc[9]
dec ecx
jnz .lag_6_4_loop
; clean up the leftovers
fld dword [esi]
fld dword [esi + 24]
fmul st0, st1
faddp st2, st0 ; add to autoc[6]
fld dword [esi + 28]
fmul st0, st1
faddp st3, st0 ; add to autoc[7]
fld dword [esi + 32]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st3, st0 ; add to autoc[8]
fld dword [esi]
fld dword [esi + 24]
fmul st0, st1
faddp st2, st0 ; add to autoc[6]
fld dword [esi + 28]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st2, st0 ; add to autoc[7]
fld dword [esi]
fld dword [esi + 24]
fmulp st1, st0
faddp st1, st0 ; add to autoc[6]
fstp dword [edi + 24]
fstp dword [edi + 28]
fstp dword [edi + 32]
fstp dword [edi + 36]
jmp .end
.lag_eq_6_plus_5:
mov ecx, [ebp + 4] ; ecx == data_len
mov esi, [ebp] ; esi == data
mov edi, [ebp + 12] ; edi == autoc
fldz ; will accumulate autoc[10]
fldz ; will accumulate autoc[9]
fldz ; will accumulate autoc[8]
fldz ; will accumulate autoc[7]
fldz ; will accumulate autoc[6]
sub ecx, byte 10
ALIGN 16
.lag_6_5_loop:
fld dword [esi]
fld dword [esi + 24]
fmul st0, st1
faddp st2, st0 ; add to autoc[6]
fld dword [esi + 28]
fmul st0, st1
faddp st3, st0 ; add to autoc[7]
fld dword [esi + 32]
fmul st0, st1
faddp st4, st0 ; add to autoc[8]
fld dword [esi + 36]
fmul st0, st1
faddp st5, st0 ; add to autoc[9]
fld dword [esi + 40]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st5, st0 ; add to autoc[10]
dec ecx
jnz .lag_6_5_loop
; clean up the leftovers
fld dword [esi]
fld dword [esi + 24]
fmul st0, st1
faddp st2, st0 ; add to autoc[6]
fld dword [esi + 28]
fmul st0, st1
faddp st3, st0 ; add to autoc[7]
fld dword [esi + 32]
fmul st0, st1
faddp st4, st0 ; add to autoc[8]
fld dword [esi + 36]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st4, st0 ; add to autoc[9]
fld dword [esi]
fld dword [esi + 24]
fmul st0, st1
faddp st2, st0 ; add to autoc[6]
fld dword [esi + 28]
fmul st0, st1
faddp st3, st0 ; add to autoc[7]
fld dword [esi + 32]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st3, st0 ; add to autoc[8]
fld dword [esi]
fld dword [esi + 24]
fmul st0, st1
faddp st2, st0 ; add to autoc[6]
fld dword [esi + 28]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st2, st0 ; add to autoc[7]
fld dword [esi]
fld dword [esi + 24]
fmulp st1, st0
faddp st1, st0 ; add to autoc[6]
fstp dword [edi + 24]
fstp dword [edi + 28]
fstp dword [edi + 32]
fstp dword [edi + 36]
fstp dword [edi + 40]
jmp .end
.lag_eq_6_plus_6:
mov ecx, [ebp + 4] ; ecx == data_len
mov esi, [ebp] ; esi == data
mov edi, [ebp + 12] ; edi == autoc
fldz ; will accumulate autoc[11]
fldz ; will accumulate autoc[10]
fldz ; will accumulate autoc[9]
fldz ; will accumulate autoc[8]
fldz ; will accumulate autoc[7]
fldz ; will accumulate autoc[6]
sub ecx, byte 11
ALIGN 16
.lag_6_6_loop:
fld dword [esi]
fld dword [esi + 24]
fmul st0, st1
faddp st2, st0 ; add to autoc[6]
fld dword [esi + 28]
fmul st0, st1
faddp st3, st0 ; add to autoc[7]
fld dword [esi + 32]
fmul st0, st1
faddp st4, st0 ; add to autoc[8]
fld dword [esi + 36]
fmul st0, st1
faddp st5, st0 ; add to autoc[9]
fld dword [esi + 40]
fmul st0, st1
faddp st6, st0 ; add to autoc[10]
fld dword [esi + 44]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st6, st0 ; add to autoc[11]
dec ecx
jnz .lag_6_6_loop
; clean up the leftovers
fld dword [esi]
fld dword [esi + 24]
fmul st0, st1
faddp st2, st0 ; add to autoc[6]
fld dword [esi + 28]
fmul st0, st1
faddp st3, st0 ; add to autoc[7]
fld dword [esi + 32]
fmul st0, st1
faddp st4, st0 ; add to autoc[8]
fld dword [esi + 36]
fmul st0, st1
faddp st5, st0 ; add to autoc[9]
fld dword [esi + 40]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st5, st0 ; add to autoc[10]
fld dword [esi]
fld dword [esi + 24]
fmul st0, st1
faddp st2, st0 ; add to autoc[6]
fld dword [esi + 28]
fmul st0, st1
faddp st3, st0 ; add to autoc[7]
fld dword [esi + 32]
fmul st0, st1
faddp st4, st0 ; add to autoc[8]
fld dword [esi + 36]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st4, st0 ; add to autoc[9]
fld dword [esi]
fld dword [esi + 24]
fmul st0, st1
faddp st2, st0 ; add to autoc[6]
fld dword [esi + 28]
fmul st0, st1
faddp st3, st0 ; add to autoc[7]
fld dword [esi + 32]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st3, st0 ; add to autoc[8]
fld dword [esi]
fld dword [esi + 24]
fmul st0, st1
faddp st2, st0 ; add to autoc[6]
fld dword [esi + 28]
fmulp st1, st0
add esi, byte 4 ; [CR] sample++
faddp st2, st0 ; add to autoc[7]
fld dword [esi]
fld dword [esi + 24]
fmulp st1, st0
faddp st1, st0 ; add to autoc[6]
fstp dword [edi + 24]
fstp dword [edi + 28]
fstp dword [edi + 32]
fstp dword [edi + 36]
fstp dword [edi + 40]
fstp dword [edi + 44]
jmp .end
.end:
pop edi
pop esi
pop ebx
pop ebp
ret
; end