blob: e37cab86b68dd98b9d5a285d2c5b6c1ba0f142cd [file] [log] [blame]
Bill Yi4e213d52015-06-23 13:53:11 -07001/* Intel SIMD MMX implementation of Viterbi ACS butterflies
2 for 256-state (k=9) convolutional code
3 Copyright 2004 Phil Karn, KA9Q
4 This code may be used under the terms of the GNU Lesser General Public License (LGPL)
5
6 void update_viterbi29_blk_mmx(struct v29 *vp,unsigned char *syms,int nbits);
7*/
8
9 # These are offsets into struct v29, defined in viterbi29.h
10 .set DP,512
11 .set OLDMETRICS,516
12 .set NEWMETRICS,520
13 .text
14 .global update_viterbi29_blk_mmx,Mettab29_1,Mettab29_2
15 .type update_viterbi29_blk_mmx,@function
16 .align 16
17
18 # MMX (64-bit SIMD) version
19 # requires Pentium-MMX, Pentium-II or better
20
21update_viterbi29_blk_mmx:
22 pushl %ebp
23 movl %esp,%ebp
24 pushl %esi
25 pushl %edi
26 pushl %edx
27 pushl %ebx
28
29 movl 8(%ebp),%edx # edx = vp
30 movl 8(%ebp),%edx # edx = vp
31 testl %edx,%edx
32 jnz 0f
33 movl -1,%eax
34 jmp err
350: movl OLDMETRICS(%edx),%esi # esi -> old metrics
36 movl NEWMETRICS(%edx),%edi # edi -> new metrics
37 movl DP(%edx),%edx # edx -> decisions
38
391: movl 16(%ebp),%eax # eax = nbits
40 decl %eax
41 jl 2f # passed zero, we're done
42 movl %eax,16(%ebp)
43
44 movl 12(%ebp),%ebx # ebx = syms
45 movw (%ebx),%ax # ax = second symbol : first symbol
46 addl $2,%ebx
47 movl %ebx,12(%ebp)
48
49 movb %ah,%bl
50 andl $255,%eax
51 andl $255,%ebx
52
53 # shift into first array index dimension slot
54 shll $7,%eax
55 shll $7,%ebx
56
57 # each invocation of this macro will do 8 butterflies in parallel
58 .MACRO butterfly GROUP
59 # Compute branch metrics
60 movq (Mettab29_1+8*\GROUP)(%eax),%mm3
61 movq fifteens,%mm0
62 paddb (Mettab29_2+8*\GROUP)(%ebx),%mm3
63 paddb ones,%mm3 # emulate pavgb - this may not be necessary
64 psrlq $1,%mm3
65 pand %mm0,%mm3
66
67 movq (8*\GROUP)(%esi),%mm6 # Incoming path metric, high bit = 0
68 movq ((8*\GROUP)+128)(%esi),%mm2 # Incoming path metric, high bit = 1
69 movq %mm6,%mm1
70 movq %mm2,%mm7
71
72 paddb %mm3,%mm6
73 paddb %mm3,%mm2
74 pxor %mm0,%mm3 # invert branch metric
75 paddb %mm3,%mm7 # path metric for inverted symbols
76 paddb %mm3,%mm1
77
78 # live registers 1 2 6 7
79 # Compare mm6 and mm7; mm1 and mm2
80 pxor %mm3,%mm3
81 movq %mm6,%mm4
82 movq %mm1,%mm5
83 psubb %mm7,%mm4 # mm4 = mm6 - mm7
84 psubb %mm2,%mm5 # mm5 = mm1 - mm2
85 pcmpgtb %mm3,%mm4 # mm4 = first set of decisions (ff = 1 better)
86 pcmpgtb %mm3,%mm5 # mm5 = second set of decisions
87
88 # live registers 1 2 4 5 6 7
89 # select survivors
90 movq %mm4,%mm0
91 pand %mm4,%mm7
92 movq %mm5,%mm3
93 pand %mm5,%mm2
94 pandn %mm6,%mm0
95 pandn %mm1,%mm3
96 por %mm0,%mm7 # mm7 = first set of survivors
97 por %mm3,%mm2 # mm2 = second set of survivors
98
99 # live registers 2 4 5 7
100 # interleave & store decisions in mm4, mm5
101 # interleave & store new branch metrics in mm2, mm7
102 movq %mm4,%mm3
103 movq %mm7,%mm0
104 punpckhbw %mm5,%mm4
105 punpcklbw %mm5,%mm3
106 punpcklbw %mm2,%mm7 # interleave second 8 new metrics
107 punpckhbw %mm2,%mm0 # interleave first 8 new metrics
108 movq %mm4,(16*\GROUP+8)(%edx)
109 movq %mm3,(16*\GROUP)(%edx)
110 movq %mm7,(16*\GROUP)(%edi)
111 movq %mm0,(16*\GROUP+8)(%edi)
112
113 .endm
114
115# invoke macro 16 times for a total of 128 butterflies
116 butterfly GROUP=0
117 butterfly GROUP=1
118 butterfly GROUP=2
119 butterfly GROUP=3
120 butterfly GROUP=4
121 butterfly GROUP=5
122 butterfly GROUP=6
123 butterfly GROUP=7
124 butterfly GROUP=8
125 butterfly GROUP=9
126 butterfly GROUP=10
127 butterfly GROUP=11
128 butterfly GROUP=12
129 butterfly GROUP=13
130 butterfly GROUP=14
131 butterfly GROUP=15
132
133 addl $256,%edx # bump decision pointer
134
135 # swap metrics
136 movl %esi,%eax
137 movl %edi,%esi
138 movl %eax,%edi
139 jmp 1b
140
1412: emms
142 movl 8(%ebp),%ebx # ebx = vp
143 # stash metric pointers
144 movl %esi,OLDMETRICS(%ebx)
145 movl %edi,NEWMETRICS(%ebx)
146 movl %edx,DP(%ebx) # stash incremented value of vp->dp
147 xorl %eax,%eax
148err: popl %ebx
149 popl %edx
150 popl %edi
151 popl %esi
152 popl %ebp
153 ret
154
155 .data
156 .align 8
157fifteens:
158 .byte 15,15,15,15,15,15,15,15
159
160 .align 8
161ones: .byte 1,1,1,1,1,1,1,1