blob: 4abbf482c1eeb37cd63dd8f3aaaa6f12ca12d5af [file] [log] [blame]
Bill Yi4e213d52015-06-23 13:53:11 -07001/* Intel SIMD MMX implementation of Viterbi ACS butterflies
2 for 64-state (k=7) convolutional code
3 Copyright 2004 Phil Karn, KA9Q
4 This code may be used under the terms of the GNU Lesser General Public License (LGPL)
5
6 int update_viterbi27_blk_mmx(struct v27 *vp,unsigned char *syms,int nbits) ;
7*/
8 # MMX (64-bit SIMD) version
9 # requires Pentium-MMX, Pentium-II or better
10
11 # These are offsets into struct v27, defined in viterbi27_mmx.c
12 .set DP,128
13 .set OLDMETRICS,132
14 .set NEWMETRICS,136
15 .text
16 .global update_viterbi27_blk_mmx,Mettab27_1,Mettab27_2
17 .type update_viterbi27_blk_mmx,@function
18 .align 16
19
20update_viterbi27_blk_mmx:
21 pushl %ebp
22 movl %esp,%ebp
23 pushl %esi
24 pushl %edi
25 pushl %edx
26 pushl %ebx
27
28 movl 8(%ebp),%edx # edx = vp
29 testl %edx,%edx
30 jnz 0f
31 movl -1,%eax
32 jmp err
330: movl OLDMETRICS(%edx),%esi # esi -> old metrics
34 movl NEWMETRICS(%edx),%edi # edi -> new metrics
35 movl DP(%edx),%edx # edx -> decisions
36
371: movl 16(%ebp),%eax # eax = nbits
38 decl %eax
39 jl 2f # passed zero, we're done
40 movl %eax,16(%ebp)
41
42 movl 12(%ebp),%ebx # ebx = syms
43 movw (%ebx),%ax # ax = second symbol : first symbol
44 addl $2,%ebx
45 movl %ebx,12(%ebp)
46
47 movb %ah,%bl
48 andl $255,%eax
49 andl $255,%ebx
50
51 # shift into first array index dimension slot
52 shll $5,%eax
53 shll $5,%ebx
54
55 # each invocation of this macro will do 8 butterflies in parallel
56 .MACRO butterfly GROUP
57 # Compute branch metrics
58 movq (Mettab27_1+8*\GROUP)(%eax),%mm3
59 movq fifteens,%mm0
60
61 paddb (Mettab27_2+8*\GROUP)(%ebx),%mm3
62 paddb ones,%mm3 # emulate pavgb - this may not be necessary
63 psrlq $1,%mm3
64 pand %mm0,%mm3
65
66 movq (8*\GROUP)(%esi),%mm6 # Incoming path metric, high bit = 0
67 movq ((8*\GROUP)+32)(%esi),%mm2 # Incoming path metric, high bit = 1
68 movq %mm6,%mm1
69 movq %mm2,%mm7
70
71 paddb %mm3,%mm6
72 paddb %mm3,%mm2
73 pxor %mm0,%mm3 # invert branch metric
74 paddb %mm3,%mm7 # path metric for inverted symbols
75 paddb %mm3,%mm1
76
77 # live registers 1 2 6 7
78 # Compare mm6 and mm7; mm1 and mm2
79 pxor %mm3,%mm3
80 movq %mm6,%mm4
81 movq %mm1,%mm5
82 psubb %mm7,%mm4 # mm4 = mm6 - mm7
83 psubb %mm2,%mm5 # mm5 = mm1 - mm2
84 pcmpgtb %mm3,%mm4 # mm4 = first set of decisions (ff = 1 better)
85 pcmpgtb %mm3,%mm5 # mm5 = second set of decisions
86
87 # live registers 1 2 4 5 6 7
88 # select survivors
89 movq %mm4,%mm0
90 pand %mm4,%mm7
91 movq %mm5,%mm3
92 pand %mm5,%mm2
93 pandn %mm6,%mm0
94 pandn %mm1,%mm3
95 por %mm0,%mm7 # mm7 = first set of survivors
96 por %mm3,%mm2 # mm2 = second set of survivors
97
98 # live registers 2 4 5 7
99 # interleave & store decisions in mm4, mm5
100 # interleave & store new branch metrics in mm2, mm7
101 movq %mm4,%mm3
102 movq %mm7,%mm0
103 punpckhbw %mm5,%mm4
104 punpcklbw %mm5,%mm3
105 punpcklbw %mm2,%mm7 # interleave second 8 new metrics
106 punpckhbw %mm2,%mm0 # interleave first 8 new metrics
107 movq %mm4,(16*\GROUP+8)(%edx)
108 movq %mm3,(16*\GROUP)(%edx)
109 movq %mm7,(16*\GROUP)(%edi)
110 movq %mm0,(16*\GROUP+8)(%edi)
111
112 .endm
113
114# invoke macro 4 times for a total of 32 butterflies
115 butterfly GROUP=0
116 butterfly GROUP=1
117 butterfly GROUP=2
118 butterfly GROUP=3
119
120 addl $64,%edx # bump decision pointer
121
122 # swap metrics
123 movl %esi,%eax
124 movl %edi,%esi
125 movl %eax,%edi
126 jmp 1b
127
1282: emms
129 movl 8(%ebp),%ebx # ebx = vp
130 # stash metric pointers
131 movl %esi,OLDMETRICS(%ebx)
132 movl %edi,NEWMETRICS(%ebx)
133 movl %edx,DP(%ebx) # stash incremented value of vp->dp
134 xorl %eax,%eax
135err: popl %ebx
136 popl %edx
137 popl %edi
138 popl %esi
139 popl %ebp
140 ret
141
142 .data
143 .align 8
144fifteens:
145 .byte 15,15,15,15,15,15,15,15
146
147 .align 8
148ones: .byte 1,1,1,1,1,1,1,1