Blame - viterbi615_av.c - platform/external/fec

blob: 4a6ce9c914054119d7f5f47ad0a5bac0f8d87225 [file] [log] [blame]

Bill Yi	4e213d5	2015-06-23 13:53:11 -0700	[diff] [blame]	1	/* K=15 r=1/6 Viterbi decoder for PowerPC G4/G5 Altivec vector instructions
				2	* 8-bit offset-binary soft decision samples
				3	* Copyright Mar 2004, Phil Karn, KA9Q
				4	* May be used under the terms of the GNU Lesser General Public License (LGPL)
				5	*/
				6	#include <stdio.h>
				7	#include <stdlib.h>
				8	#include <memory.h>
				9	#include <limits.h>
				10	#include "fec.h"
				11
				12	typedef union { unsigned char c[128][16]; vector unsigned char v[128]; } decision_t;
				13	typedef union { unsigned short s[16384]; vector unsigned short v[2048]; } metric_t;
				14
				15	static union branchtab615 { unsigned short s[8192]; vector unsigned short v[1024];} Branchtab615[6];
				16	static int Init = 0;
				17
				18	/* State info for instance of Viterbi decoder */
				19	struct v615 {
				20	metric_t metrics1; /* path metric buffer 1 */
				21	metric_t metrics2; /* path metric buffer 2 */
				22	void dp; / Pointer to current decision */
				23	metric_t old_metrics,new_metrics; /* Pointers to path metrics, swapped on every bit */
				24	void decisions; / Beginning of decisions for block */
				25	};
				26
				27	/* Initialize Viterbi decoder for start of new frame */
				28	int init_viterbi615_av(void *p,int starting_state){
				29	struct v615 *vp = p;
				30	int i;
				31
				32	if(p == NULL)
				33	return -1;
				34
				35	for(i=0;i<2048;i++)
				36	vp->metrics1.v[i] = (vector unsigned short)(5000);
				37
				38	vp->old_metrics = &vp->metrics1;
				39	vp->new_metrics = &vp->metrics2;
				40	vp->dp = vp->decisions;
				41	vp->old_metrics->s[starting_state & 16383] = 0; /* Bias known start state */
				42	return 0;
				43	}
				44
				45	/* Create a new instance of a Viterbi decoder */
				46	void *create_viterbi615_av(int len){
				47	struct v615 *vp;
				48
				49	if(!Init){
				50	int polys[6] = { V615POLYA,V615POLYB,V615POLYC,V615POLYD,V615POLYE,V615POLYF };
				51	set_viterbi615_polynomial_av(polys);
				52	}
				53	vp = (struct v615 *)malloc(sizeof(struct v615));
				54	vp->decisions = malloc(sizeof(decision_t)*(len+14));
				55	init_viterbi615_av(vp,0);
				56	return vp;
				57	}
				58
				59	void set_viterbi615_polynomial_av(int polys[6]){
				60	int state;
				61	int i;
				62
				63	for(state=0;state < 8192;state++){
				64	for(i=0;i<6;i++)
				65	Branchtab615[i].s[state] = (polys[i] < 0) ^ parity((2*state) & abs(polys[i])) ? 255 : 0;
				66	}
				67	Init++;
				68	}
				69
				70
				71	/* Viterbi chainback */
				72	int chainback_viterbi615_av(
				73	void *p,
				74	unsigned char data, / Decoded output data */
				75	unsigned int nbits, /* Number of data bits */
				76	unsigned int endstate){ /* Terminal encoder state */
				77	struct v615 *vp = p;
				78	decision_t d = (decision_t )vp->decisions;
				79	int path_metric;
				80
				81	endstate %= 16384;
				82
				83	path_metric = vp->old_metrics->s[endstate];
				84
				85	/* The store into data[] only needs to be done every 8 bits.
				86	* But this avoids a conditional branch, and the writes will
				87	* combine in the cache anyway
				88	*/
				89	d += 14; /* Look past tail */
				90	while(nbits-- != 0){
				91	int k;
				92
				93	k = (d[nbits].c[endstate >> 7][endstate & 15] & (0x80 >> ((endstate>>4)&7)) ) ? 1 : 0;
				94	endstate = (k << 13) \| (endstate >> 1);
				95	data[nbits>>3] = endstate >> 6;
				96	}
				97	return path_metric;
				98	}
				99
				100	/* Delete instance of a Viterbi decoder */
				101	void delete_viterbi615_av(void *p){
				102	struct v615 *vp = p;
				103
				104	if(vp != NULL){
				105	free(vp->decisions);
				106	free(vp);
				107	}
				108	}
				109
				110	int update_viterbi615_blk_av(void p,unsigned char syms,int nbits){
				111	struct v615 *vp = p;
				112	decision_t d = (decision_t )vp->dp;
				113	int path_metric = 0;
				114	vector unsigned char decisions = (vector unsigned char)(0);
				115
				116	while(nbits--){
				117	vector unsigned short symv,sym0v,sym1v,sym2v,sym3v,sym4v,sym5v;
				118	vector unsigned char s;
				119	void *tmp;
				120	int i;
				121
				122	/* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */
				123	s = (vector unsigned char)vec_perm(vec_ld(0,syms),vec_ld(5,syms),vec_lvsl(0,syms));
				124
				125	symv = (vector unsigned short)vec_mergeh((vector unsigned char)(0),s); /* Unsigned byte->word unpack */
				126	sym0v = vec_splat(symv,0);
				127	sym1v = vec_splat(symv,1);
				128	sym2v = vec_splat(symv,2);
				129	sym3v = vec_splat(symv,3);
				130	sym4v = vec_splat(symv,4);
				131	sym5v = vec_splat(symv,5);
				132	syms += 6;
				133
				134	for(i=0;i<1024;i++){
				135	vector bool short decision0,decision1;
				136	vector unsigned short metric,m_metric,m0,m1,m2,m3,survivor0,survivor1;
				137
				138	/* Form branch metrics
				139	* Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255,
				140	* the XOR operations constitute conditional negation.
				141	* metric and m_metric (-metric) are in the range 0-1530
				142	*/
				143	m0 = vec_add(vec_xor(Branchtab615[0].v[i],sym0v),vec_xor(Branchtab615[1].v[i],sym1v));
				144	m1 = vec_add(vec_xor(Branchtab615[2].v[i],sym2v),vec_xor(Branchtab615[3].v[i],sym3v));
				145	m2 = vec_add(vec_xor(Branchtab615[4].v[i],sym4v),vec_xor(Branchtab615[5].v[i],sym5v));
				146	metric = vec_add(m0,m1);
				147	metric = vec_add(metric,m2);
				148	m_metric = vec_sub((vector unsigned short)(1530),metric);
				149
				150	/* Add branch metrics to path metrics */
				151	m0 = vec_adds(vp->old_metrics->v[i],metric);
				152	m3 = vec_adds(vp->old_metrics->v[1024+i],metric);
				153	m1 = vec_adds(vp->old_metrics->v[1024+i],m_metric);
				154	m2 = vec_adds(vp->old_metrics->v[i],m_metric);
				155
				156	/* Compare and select */
				157	decision0 = vec_cmpgt(m0,m1);
				158	decision1 = vec_cmpgt(m2,m3);
				159	survivor0 = vec_min(m0,m1);
				160	survivor1 = vec_min(m2,m3);
				161
				162	/* Store decisions and survivors.
				163	* To save space without SSE2's handy PMOVMSKB instruction, we pack and store them in
				164	* a funny interleaved fashion that we undo in the chainback function.
				165	*/
				166	decisions = vec_add(decisions,decisions); /* Shift each byte 1 bit to the left */
				167
				168	/* Booleans are either 0xff or 0x00. Subtracting 0x00 leaves the lsb zero; subtracting
				169	* 0xff is equivalent to adding 1, which sets the lsb.
				170	*/
				171	decisions = vec_sub(decisions,(vector unsigned char)vec_pack(vec_mergeh(decision0,decision1),vec_mergel(decision0,decision1)));
				172
				173	vp->new_metrics->v[2*i] = vec_mergeh(survivor0,survivor1);
				174	vp->new_metrics->v[2*i+1] = vec_mergel(survivor0,survivor1);
				175
				176	if((i % 8) == 7){
				177	/* We've accumulated a total of 128 decisions, stash and start again */
				178	d->v[i>>3] = decisions; /* No need to clear, the new bits will replace the old */
				179	}
				180	}
				181	#if 0
				182	/* Experimentally determine metric spread
				183	* The results are fixed for a given code and input symbol size
				184	*/
				185	{
				186	int i;
				187	vector unsigned short min_metric;
				188	vector unsigned short max_metric;
				189	union { vector unsigned short v; unsigned short s[8];} t;
				190	int minimum,maximum;
				191	static int max_spread = 0;
				192
				193	min_metric = max_metric = vp->new_metrics->v[0];
				194	for(i=1;i<2048;i++){
				195	min_metric = vec_min(min_metric,vp->new_metrics->v[i]);
				196	max_metric = vec_max(max_metric,vp->new_metrics->v[i]);
				197	}
				198	min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,8));
				199	max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,8));
				200	min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,4));
				201	max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,4));
				202	min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,2));
				203	max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,2));
				204
				205	t.v = min_metric;
				206	minimum = t.s[0];
				207	t.v = max_metric;
				208	maximum = t.s[0];
				209	if(maximum-minimum > max_spread){
				210	max_spread = maximum-minimum;
				211	printf("metric spread = %d\n",max_spread);
				212	}
				213	}
				214	#endif
				215
				216	/* Renormalize if necessary. This deserves some explanation.
				217
				218	* The maximum possible spread, found by experiment, for 4-bit symbols is 405; for 8 bit symbols, it's 12750.
				219	* So by looking at one arbitrary metric we can tell if any of them have possibly saturated.
				220	* However, this is very conservative. Large spreads occur only at very high Eb/No, where
				221	* saturating a bad path metric doesn't do much to increase its chances of being erroneously chosen as a survivor.
				222
				223	* At more interesting (low) Eb/No ratios, the spreads are much smaller so our chances of saturating a metric
				224	* by not not normalizing when we should are extremely low. So either way, the risk to performance is small.
				225
				226	* All this is borne out by experiment.
				227	*/
				228	if(vp->new_metrics->s[0] >= USHRT_MAX-12750){
				229	vector unsigned short scale;
				230	union { vector unsigned short v; unsigned short s[8];} t;
				231
				232	/* Find smallest metric and splat */
				233	scale = vp->new_metrics->v[0];
				234	for(i=1;i<2048;i++)
				235	scale = vec_min(scale,vp->new_metrics->v[i]);
				236
				237	scale = vec_min(scale,vec_sld(scale,scale,8));
				238	scale = vec_min(scale,vec_sld(scale,scale,4));
				239	scale = vec_min(scale,vec_sld(scale,scale,2));
				240
				241	/* Subtract it from all metrics
				242	* Work backwards to try to improve the cache hit ratio, assuming LRU
				243	*/
				244	for(i=2047;i>=0;i--)
				245	vp->new_metrics->v[i] = vec_subs(vp->new_metrics->v[i],scale);
				246	t.v = scale;
				247	path_metric += t.s[0];
				248	}
				249	d++;
				250	/* Swap pointers to old and new metrics */
				251	tmp = vp->old_metrics;
				252	vp->old_metrics = vp->new_metrics;
				253	vp->new_metrics = tmp;
				254	}
				255	vp->dp = d;
				256	return path_metric;
				257	}