Blame - src/dsp/dec_mips32.c - fp2-dev/platform/external/webp

blob: 3e89ed37aae7de6423b099383e591395a54c1c97 [file] [log] [blame]

Vikas Arora	af51b94	2014-08-28 10:51:12 -0700	[diff] [blame]	1	// Copyright 2014 Google Inc. All Rights Reserved.
				2	//
				3	// Use of this source code is governed by a BSD-style license
				4	// that can be found in the COPYING file in the root of the source
				5	// tree. An additional intellectual property rights grant can be found
				6	// in the file PATENTS. All contributing project authors may
				7	// be found in the AUTHORS file in the root of the source tree.
				8	// -----------------------------------------------------------------------------
				9	//
				10	// MIPS version of dsp functions
				11	//
				12	// Author(s): Djordje Pesut (djordje.pesut@imgtec.com)
				13	// Jovan Zelincevic (jovan.zelincevic@imgtec.com)
				14
				15	#include "./dsp.h"
				16
				17	#if defined(WEBP_USE_MIPS32)
				18
				19	static const int kC1 = 20091 + (1 << 16);
				20	static const int kC2 = 35468;
				21
				22	static WEBP_INLINE int abs_mips32(int x) {
				23	const int sign = x >> 31;
				24	return (x ^ sign) - sign;
				25	}
				26
				27	// 4 pixels in, 2 pixels out
				28	static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
				29	const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
				30	const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];
				31	const int a1 = VP8ksclip2[(a + 4) >> 3];
				32	const int a2 = VP8ksclip2[(a + 3) >> 3];
				33	p[-step] = VP8kclip1[p0 + a2];
				34	p[ 0] = VP8kclip1[q0 - a1];
				35	}
				36
				37	// 4 pixels in, 4 pixels out
				38	static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
				39	const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
				40	const int a = 3 * (q0 - p0);
				41	const int a1 = VP8ksclip2[(a + 4) >> 3];
				42	const int a2 = VP8ksclip2[(a + 3) >> 3];
				43	const int a3 = (a1 + 1) >> 1;
				44	p[-2 * step] = VP8kclip1[p1 + a3];
				45	p[- step] = VP8kclip1[p0 + a2];
				46	p[ 0] = VP8kclip1[q0 - a1];
				47	p[ step] = VP8kclip1[q1 - a3];
				48	}
				49
				50	// 6 pixels in, 6 pixels out
				51	static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
				52	const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
				53	const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
				54	const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
				55	const int a1 = (27 * a + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7
				56	const int a2 = (18 * a + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7
				57	const int a3 = (9 * a + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7
				58	p[-3 * step] = VP8kclip1[p2 + a3];
				59	p[-2 * step] = VP8kclip1[p1 + a2];
				60	p[- step] = VP8kclip1[p0 + a1];
				61	p[ 0] = VP8kclip1[q0 - a1];
				62	p[ step] = VP8kclip1[q1 - a2];
				63	p[ 2 * step] = VP8kclip1[q2 - a3];
				64	}
				65
				66	static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
				67	const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
				68	return (abs_mips32(p1 - p0) > thresh) \|\| (abs_mips32(q1 - q0) > thresh);
				69	}
				70
				71	static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int thresh) {
				72	const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
				73	return ((2 * abs_mips32(p0 - q0) + (abs_mips32(p1 - q1) >> 1)) <= thresh);
				74	}
				75
				76	static WEBP_INLINE int needs_filter2(const uint8_t* p,
				77	int step, int t, int it) {
				78	const int p3 = p[-4 * step], p2 = p[-3 * step];
				79	const int p1 = p[-2 * step], p0 = p[-step];
				80	const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
				81	if ((2 * abs_mips32(p0 - q0) + (abs_mips32(p1 - q1) >> 1)) > t) {
				82	return 0;
				83	}
				84	return abs_mips32(p3 - p2) <= it && abs_mips32(p2 - p1) <= it &&
				85	abs_mips32(p1 - p0) <= it && abs_mips32(q3 - q2) <= it &&
				86	abs_mips32(q2 - q1) <= it && abs_mips32(q1 - q0) <= it;
				87	}
				88
				89	static WEBP_INLINE void FilterLoop26(uint8_t* p,
				90	int hstride, int vstride, int size,
				91	int thresh, int ithresh, int hev_thresh) {
				92	while (size-- > 0) {
				93	if (needs_filter2(p, hstride, thresh, ithresh)) {
				94	if (hev(p, hstride, hev_thresh)) {
				95	do_filter2(p, hstride);
				96	} else {
				97	do_filter6(p, hstride);
				98	}
				99	}
				100	p += vstride;
				101	}
				102	}
				103
				104	static WEBP_INLINE void FilterLoop24(uint8_t* p,
				105	int hstride, int vstride, int size,
				106	int thresh, int ithresh, int hev_thresh) {
				107	while (size-- > 0) {
				108	if (needs_filter2(p, hstride, thresh, ithresh)) {
				109	if (hev(p, hstride, hev_thresh)) {
				110	do_filter2(p, hstride);
				111	} else {
				112	do_filter4(p, hstride);
				113	}
				114	}
				115	p += vstride;
				116	}
				117	}
				118
				119	// on macroblock edges
				120	static void VFilter16(uint8_t* p, int stride,
				121	int thresh, int ithresh, int hev_thresh) {
				122	FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
				123	}
				124
				125	static void HFilter16(uint8_t* p, int stride,
				126	int thresh, int ithresh, int hev_thresh) {
				127	FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
				128	}
				129
				130	// 8-pixels wide variant, for chroma filtering
				131	static void VFilter8(uint8_t* u, uint8_t* v, int stride,
				132	int thresh, int ithresh, int hev_thresh) {
				133	FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
				134	FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
				135	}
				136
				137	static void HFilter8(uint8_t* u, uint8_t* v, int stride,
				138	int thresh, int ithresh, int hev_thresh) {
				139	FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
				140	FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
				141	}
				142
				143	static void VFilter8i(uint8_t* u, uint8_t* v, int stride,
				144	int thresh, int ithresh, int hev_thresh) {
				145	FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
				146	FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
				147	}
				148
				149	static void HFilter8i(uint8_t* u, uint8_t* v, int stride,
				150	int thresh, int ithresh, int hev_thresh) {
				151	FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
				152	FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
				153	}
				154
				155	// on three inner edges
				156	static void VFilter16i(uint8_t* p, int stride,
				157	int thresh, int ithresh, int hev_thresh) {
				158	int k;
				159	for (k = 3; k > 0; --k) {
				160	p += 4 * stride;
				161	FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
				162	}
				163	}
				164
				165	static void HFilter16i(uint8_t* p, int stride,
				166	int thresh, int ithresh, int hev_thresh) {
				167	int k;
				168	for (k = 3; k > 0; --k) {
				169	p += 4;
				170	FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
				171	}
				172	}
				173
				174	//------------------------------------------------------------------------------
				175	// Simple In-loop filtering (Paragraph 15.2)
				176
				177	static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
				178	int i;
				179	for (i = 0; i < 16; ++i) {
				180	if (needs_filter(p + i, stride, thresh)) {
				181	do_filter2(p + i, stride);
				182	}
				183	}
				184	}
				185
				186	static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
				187	int i;
				188	for (i = 0; i < 16; ++i) {
				189	if (needs_filter(p + i * stride, 1, thresh)) {
				190	do_filter2(p + i * stride, 1);
				191	}
				192	}
				193	}
				194
				195	static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
				196	int k;
				197	for (k = 3; k > 0; --k) {
				198	p += 4 * stride;
				199	SimpleVFilter16(p, stride, thresh);
				200	}
				201	}
				202
				203	static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
				204	int k;
				205	for (k = 3; k > 0; --k) {
				206	p += 4;
				207	SimpleHFilter16(p, stride, thresh);
				208	}
				209	}
				210
				211	static void TransformOne(const int16_t* in, uint8_t* dst) {
				212	int temp0, temp1, temp2, temp3, temp4;
				213	int temp5, temp6, temp7, temp8, temp9;
				214	int temp10, temp11, temp12, temp13, temp14;
				215	int temp15, temp16, temp17, temp18;
				216	int16_t* p_in = (int16_t*)in;
				217
				218	// loops unrolled and merged to avoid usage of tmp buffer
				219	// and to reduce number of stalls. MUL macro is written
				220	// in assembler and inlined
				221	__asm__ volatile(
				222	"lh %[temp0], 0(%[in]) \n\t"
				223	"lh %[temp8], 16(%[in]) \n\t"
				224	"lh %[temp4], 8(%[in]) \n\t"
				225	"lh %[temp12], 24(%[in]) \n\t"
				226	"addu %[temp16], %[temp0], %[temp8] \n\t"
				227	"subu %[temp0], %[temp0], %[temp8] \n\t"
				228	"mul %[temp8], %[temp4], %[kC2] \n\t"
				229	"mul %[temp17], %[temp12], %[kC1] \n\t"
				230	"mul %[temp4], %[temp4], %[kC1] \n\t"
				231	"mul %[temp12], %[temp12], %[kC2] \n\t"
				232	"lh %[temp1], 2(%[in]) \n\t"
				233	"lh %[temp5], 10(%[in]) \n\t"
				234	"lh %[temp9], 18(%[in]) \n\t"
				235	"lh %[temp13], 26(%[in]) \n\t"
				236	"sra %[temp8], %[temp8], 16 \n\t"
				237	"sra %[temp17], %[temp17], 16 \n\t"
				238	"sra %[temp4], %[temp4], 16 \n\t"
				239	"sra %[temp12], %[temp12], 16 \n\t"
				240	"lh %[temp2], 4(%[in]) \n\t"
				241	"lh %[temp6], 12(%[in]) \n\t"
				242	"lh %[temp10], 20(%[in]) \n\t"
				243	"lh %[temp14], 28(%[in]) \n\t"
				244	"subu %[temp17], %[temp8], %[temp17] \n\t"
				245	"addu %[temp4], %[temp4], %[temp12] \n\t"
				246	"addu %[temp8], %[temp16], %[temp4] \n\t"
				247	"subu %[temp4], %[temp16], %[temp4] \n\t"
				248	"addu %[temp16], %[temp1], %[temp9] \n\t"
				249	"subu %[temp1], %[temp1], %[temp9] \n\t"
				250	"lh %[temp3], 6(%[in]) \n\t"
				251	"lh %[temp7], 14(%[in]) \n\t"
				252	"lh %[temp11], 22(%[in]) \n\t"
				253	"lh %[temp15], 30(%[in]) \n\t"
				254	"addu %[temp12], %[temp0], %[temp17] \n\t"
				255	"subu %[temp0], %[temp0], %[temp17] \n\t"
				256	"mul %[temp9], %[temp5], %[kC2] \n\t"
				257	"mul %[temp17], %[temp13], %[kC1] \n\t"
				258	"mul %[temp5], %[temp5], %[kC1] \n\t"
				259	"mul %[temp13], %[temp13], %[kC2] \n\t"
				260	"sra %[temp9], %[temp9], 16 \n\t"
				261	"sra %[temp17], %[temp17], 16 \n\t"
				262	"subu %[temp17], %[temp9], %[temp17] \n\t"
				263	"sra %[temp5], %[temp5], 16 \n\t"
				264	"sra %[temp13], %[temp13], 16 \n\t"
				265	"addu %[temp5], %[temp5], %[temp13] \n\t"
				266	"addu %[temp13], %[temp1], %[temp17] \n\t"
				267	"subu %[temp1], %[temp1], %[temp17] \n\t"
				268	"mul %[temp17], %[temp14], %[kC1] \n\t"
				269	"mul %[temp14], %[temp14], %[kC2] \n\t"
				270	"addu %[temp9], %[temp16], %[temp5] \n\t"
				271	"subu %[temp5], %[temp16], %[temp5] \n\t"
				272	"addu %[temp16], %[temp2], %[temp10] \n\t"
				273	"subu %[temp2], %[temp2], %[temp10] \n\t"
				274	"mul %[temp10], %[temp6], %[kC2] \n\t"
				275	"mul %[temp6], %[temp6], %[kC1] \n\t"
				276	"sra %[temp17], %[temp17], 16 \n\t"
				277	"sra %[temp14], %[temp14], 16 \n\t"
				278	"sra %[temp10], %[temp10], 16 \n\t"
				279	"sra %[temp6], %[temp6], 16 \n\t"
				280	"subu %[temp17], %[temp10], %[temp17] \n\t"
				281	"addu %[temp6], %[temp6], %[temp14] \n\t"
				282	"addu %[temp10], %[temp16], %[temp6] \n\t"
				283	"subu %[temp6], %[temp16], %[temp6] \n\t"
				284	"addu %[temp14], %[temp2], %[temp17] \n\t"
				285	"subu %[temp2], %[temp2], %[temp17] \n\t"
				286	"mul %[temp17], %[temp15], %[kC1] \n\t"
				287	"mul %[temp15], %[temp15], %[kC2] \n\t"
				288	"addu %[temp16], %[temp3], %[temp11] \n\t"
				289	"subu %[temp3], %[temp3], %[temp11] \n\t"
				290	"mul %[temp11], %[temp7], %[kC2] \n\t"
				291	"mul %[temp7], %[temp7], %[kC1] \n\t"
				292	"addiu %[temp8], %[temp8], 4 \n\t"
				293	"addiu %[temp12], %[temp12], 4 \n\t"
				294	"addiu %[temp0], %[temp0], 4 \n\t"
				295	"addiu %[temp4], %[temp4], 4 \n\t"
				296	"sra %[temp17], %[temp17], 16 \n\t"
				297	"sra %[temp15], %[temp15], 16 \n\t"
				298	"sra %[temp11], %[temp11], 16 \n\t"
				299	"sra %[temp7], %[temp7], 16 \n\t"
				300	"subu %[temp17], %[temp11], %[temp17] \n\t"
				301	"addu %[temp7], %[temp7], %[temp15] \n\t"
				302	"addu %[temp15], %[temp3], %[temp17] \n\t"
				303	"subu %[temp3], %[temp3], %[temp17] \n\t"
				304	"addu %[temp11], %[temp16], %[temp7] \n\t"
				305	"subu %[temp7], %[temp16], %[temp7] \n\t"
				306	"addu %[temp16], %[temp8], %[temp10] \n\t"
				307	"subu %[temp8], %[temp8], %[temp10] \n\t"
				308	"mul %[temp10], %[temp9], %[kC2] \n\t"
				309	"mul %[temp17], %[temp11], %[kC1] \n\t"
				310	"mul %[temp9], %[temp9], %[kC1] \n\t"
				311	"mul %[temp11], %[temp11], %[kC2] \n\t"
				312	"sra %[temp10], %[temp10], 16 \n\t"
				313	"sra %[temp17], %[temp17], 16 \n\t"
				314	"sra %[temp9], %[temp9], 16 \n\t"
				315	"sra %[temp11], %[temp11], 16 \n\t"
				316	"subu %[temp17], %[temp10], %[temp17] \n\t"
				317	"addu %[temp11], %[temp9], %[temp11] \n\t"
				318	"addu %[temp10], %[temp12], %[temp14] \n\t"
				319	"subu %[temp12], %[temp12], %[temp14] \n\t"
				320	"mul %[temp14], %[temp13], %[kC2] \n\t"
				321	"mul %[temp9], %[temp15], %[kC1] \n\t"
				322	"mul %[temp13], %[temp13], %[kC1] \n\t"
				323	"mul %[temp15], %[temp15], %[kC2] \n\t"
				324	"sra %[temp14], %[temp14], 16 \n\t"
				325	"sra %[temp9], %[temp9], 16 \n\t"
				326	"sra %[temp13], %[temp13], 16 \n\t"
				327	"sra %[temp15], %[temp15], 16 \n\t"
				328	"subu %[temp9], %[temp14], %[temp9] \n\t"
				329	"addu %[temp15], %[temp13], %[temp15] \n\t"
				330	"addu %[temp14], %[temp0], %[temp2] \n\t"
				331	"subu %[temp0], %[temp0], %[temp2] \n\t"
				332	"mul %[temp2], %[temp1], %[kC2] \n\t"
				333	"mul %[temp13], %[temp3], %[kC1] \n\t"
				334	"mul %[temp1], %[temp1], %[kC1] \n\t"
				335	"mul %[temp3], %[temp3], %[kC2] \n\t"
				336	"sra %[temp2], %[temp2], 16 \n\t"
				337	"sra %[temp13], %[temp13], 16 \n\t"
				338	"sra %[temp1], %[temp1], 16 \n\t"
				339	"sra %[temp3], %[temp3], 16 \n\t"
				340	"subu %[temp13], %[temp2], %[temp13] \n\t"
				341	"addu %[temp3], %[temp1], %[temp3] \n\t"
				342	"addu %[temp2], %[temp4], %[temp6] \n\t"
				343	"subu %[temp4], %[temp4], %[temp6] \n\t"
				344	"mul %[temp6], %[temp5], %[kC2] \n\t"
				345	"mul %[temp1], %[temp7], %[kC1] \n\t"
				346	"mul %[temp5], %[temp5], %[kC1] \n\t"
				347	"mul %[temp7], %[temp7], %[kC2] \n\t"
				348	"sra %[temp6], %[temp6], 16 \n\t"
				349	"sra %[temp1], %[temp1], 16 \n\t"
				350	"sra %[temp5], %[temp5], 16 \n\t"
				351	"sra %[temp7], %[temp7], 16 \n\t"
				352	"subu %[temp1], %[temp6], %[temp1] \n\t"
				353	"addu %[temp7], %[temp5], %[temp7] \n\t"
				354	"addu %[temp5], %[temp16], %[temp11] \n\t"
				355	"subu %[temp16], %[temp16], %[temp11] \n\t"
				356	"addu %[temp11], %[temp8], %[temp17] \n\t"
				357	"subu %[temp8], %[temp8], %[temp17] \n\t"
				358	"sra %[temp5], %[temp5], 3 \n\t"
				359	"sra %[temp16], %[temp16], 3 \n\t"
				360	"sra %[temp11], %[temp11], 3 \n\t"
				361	"sra %[temp8], %[temp8], 3 \n\t"
				362	"addu %[temp17], %[temp10], %[temp15] \n\t"
				363	"subu %[temp10], %[temp10], %[temp15] \n\t"
				364	"addu %[temp15], %[temp12], %[temp9] \n\t"
				365	"subu %[temp12], %[temp12], %[temp9] \n\t"
				366	"sra %[temp17], %[temp17], 3 \n\t"
				367	"sra %[temp10], %[temp10], 3 \n\t"
				368	"sra %[temp15], %[temp15], 3 \n\t"
				369	"sra %[temp12], %[temp12], 3 \n\t"
				370	"addu %[temp9], %[temp14], %[temp3] \n\t"
				371	"subu %[temp14], %[temp14], %[temp3] \n\t"
				372	"addu %[temp3], %[temp0], %[temp13] \n\t"
				373	"subu %[temp0], %[temp0], %[temp13] \n\t"
				374	"sra %[temp9], %[temp9], 3 \n\t"
				375	"sra %[temp14], %[temp14], 3 \n\t"
				376	"sra %[temp3], %[temp3], 3 \n\t"
				377	"sra %[temp0], %[temp0], 3 \n\t"
				378	"addu %[temp13], %[temp2], %[temp7] \n\t"
				379	"subu %[temp2], %[temp2], %[temp7] \n\t"
				380	"addu %[temp7], %[temp4], %[temp1] \n\t"
				381	"subu %[temp4], %[temp4], %[temp1] \n\t"
				382	"sra %[temp13], %[temp13], 3 \n\t"
				383	"sra %[temp2], %[temp2], 3 \n\t"
				384	"sra %[temp7], %[temp7], 3 \n\t"
				385	"sra %[temp4], %[temp4], 3 \n\t"
				386	"addiu %[temp6], $zero, 255 \n\t"
				387	"lbu %[temp1], 0(%[dst]) \n\t"
				388	"addu %[temp1], %[temp1], %[temp5] \n\t"
				389	"sra %[temp5], %[temp1], 8 \n\t"
				390	"sra %[temp18], %[temp1], 31 \n\t"
				391	"beqz %[temp5], 1f \n\t"
				392	"xor %[temp1], %[temp1], %[temp1] \n\t"
				393	"movz %[temp1], %[temp6], %[temp18] \n\t"
				394	"1: \n\t"
				395	"lbu %[temp18], 1(%[dst]) \n\t"
				396	"sb %[temp1], 0(%[dst]) \n\t"
				397	"addu %[temp18], %[temp18], %[temp11] \n\t"
				398	"sra %[temp11], %[temp18], 8 \n\t"
				399	"sra %[temp1], %[temp18], 31 \n\t"
				400	"beqz %[temp11], 2f \n\t"
				401	"xor %[temp18], %[temp18], %[temp18] \n\t"
				402	"movz %[temp18], %[temp6], %[temp1] \n\t"
				403	"2: \n\t"
				404	"lbu %[temp1], 2(%[dst]) \n\t"
				405	"sb %[temp18], 1(%[dst]) \n\t"
				406	"addu %[temp1], %[temp1], %[temp8] \n\t"
				407	"sra %[temp8], %[temp1], 8 \n\t"
				408	"sra %[temp18], %[temp1], 31 \n\t"
				409	"beqz %[temp8], 3f \n\t"
				410	"xor %[temp1], %[temp1], %[temp1] \n\t"
				411	"movz %[temp1], %[temp6], %[temp18] \n\t"
				412	"3: \n\t"
				413	"lbu %[temp18], 3(%[dst]) \n\t"
				414	"sb %[temp1], 2(%[dst]) \n\t"
				415	"addu %[temp18], %[temp18], %[temp16] \n\t"
				416	"sra %[temp16], %[temp18], 8 \n\t"
				417	"sra %[temp1], %[temp18], 31 \n\t"
				418	"beqz %[temp16], 4f \n\t"
				419	"xor %[temp18], %[temp18], %[temp18] \n\t"
				420	"movz %[temp18], %[temp6], %[temp1] \n\t"
				421	"4: \n\t"
				422	"sb %[temp18], 3(%[dst]) \n\t"
				423	"lbu %[temp5], 32(%[dst]) \n\t"
				424	"lbu %[temp8], 33(%[dst]) \n\t"
				425	"lbu %[temp11], 34(%[dst]) \n\t"
				426	"lbu %[temp16], 35(%[dst]) \n\t"
				427	"addu %[temp5], %[temp5], %[temp17] \n\t"
				428	"addu %[temp8], %[temp8], %[temp15] \n\t"
				429	"addu %[temp11], %[temp11], %[temp12] \n\t"
				430	"addu %[temp16], %[temp16], %[temp10] \n\t"
				431	"sra %[temp18], %[temp5], 8 \n\t"
				432	"sra %[temp1], %[temp5], 31 \n\t"
				433	"beqz %[temp18], 5f \n\t"
				434	"xor %[temp5], %[temp5], %[temp5] \n\t"
				435	"movz %[temp5], %[temp6], %[temp1] \n\t"
				436	"5: \n\t"
				437	"sra %[temp18], %[temp8], 8 \n\t"
				438	"sra %[temp1], %[temp8], 31 \n\t"
				439	"beqz %[temp18], 6f \n\t"
				440	"xor %[temp8], %[temp8], %[temp8] \n\t"
				441	"movz %[temp8], %[temp6], %[temp1] \n\t"
				442	"6: \n\t"
				443	"sra %[temp18], %[temp11], 8 \n\t"
				444	"sra %[temp1], %[temp11], 31 \n\t"
				445	"sra %[temp17], %[temp16], 8 \n\t"
				446	"sra %[temp15], %[temp16], 31 \n\t"
				447	"beqz %[temp18], 7f \n\t"
				448	"xor %[temp11], %[temp11], %[temp11] \n\t"
				449	"movz %[temp11], %[temp6], %[temp1] \n\t"
				450	"7: \n\t"
				451	"beqz %[temp17], 8f \n\t"
				452	"xor %[temp16], %[temp16], %[temp16] \n\t"
				453	"movz %[temp16], %[temp6], %[temp15] \n\t"
				454	"8: \n\t"
				455	"sb %[temp5], 32(%[dst]) \n\t"
				456	"sb %[temp8], 33(%[dst]) \n\t"
				457	"sb %[temp11], 34(%[dst]) \n\t"
				458	"sb %[temp16], 35(%[dst]) \n\t"
				459	"lbu %[temp5], 64(%[dst]) \n\t"
				460	"lbu %[temp8], 65(%[dst]) \n\t"
				461	"lbu %[temp11], 66(%[dst]) \n\t"
				462	"lbu %[temp16], 67(%[dst]) \n\t"
				463	"addu %[temp5], %[temp5], %[temp9] \n\t"
				464	"addu %[temp8], %[temp8], %[temp3] \n\t"
				465	"addu %[temp11], %[temp11], %[temp0] \n\t"
				466	"addu %[temp16], %[temp16], %[temp14] \n\t"
				467	"sra %[temp18], %[temp5], 8 \n\t"
				468	"sra %[temp1], %[temp5], 31 \n\t"
				469	"sra %[temp17], %[temp8], 8 \n\t"
				470	"sra %[temp15], %[temp8], 31 \n\t"
				471	"sra %[temp12], %[temp11], 8 \n\t"
				472	"sra %[temp10], %[temp11], 31 \n\t"
				473	"sra %[temp9], %[temp16], 8 \n\t"
				474	"sra %[temp3], %[temp16], 31 \n\t"
				475	"beqz %[temp18], 9f \n\t"
				476	"xor %[temp5], %[temp5], %[temp5] \n\t"
				477	"movz %[temp5], %[temp6], %[temp1] \n\t"
				478	"9: \n\t"
				479	"beqz %[temp17], 10f \n\t"
				480	"xor %[temp8], %[temp8], %[temp8] \n\t"
				481	"movz %[temp8], %[temp6], %[temp15] \n\t"
				482	"10: \n\t"
				483	"beqz %[temp12], 11f \n\t"
				484	"xor %[temp11], %[temp11], %[temp11] \n\t"
				485	"movz %[temp11], %[temp6], %[temp10] \n\t"
				486	"11: \n\t"
				487	"beqz %[temp9], 12f \n\t"
				488	"xor %[temp16], %[temp16], %[temp16] \n\t"
				489	"movz %[temp16], %[temp6], %[temp3] \n\t"
				490	"12: \n\t"
				491	"sb %[temp5], 64(%[dst]) \n\t"
				492	"sb %[temp8], 65(%[dst]) \n\t"
				493	"sb %[temp11], 66(%[dst]) \n\t"
				494	"sb %[temp16], 67(%[dst]) \n\t"
				495	"lbu %[temp5], 96(%[dst]) \n\t"
				496	"lbu %[temp8], 97(%[dst]) \n\t"
				497	"lbu %[temp11], 98(%[dst]) \n\t"
				498	"lbu %[temp16], 99(%[dst]) \n\t"
				499	"addu %[temp5], %[temp5], %[temp13] \n\t"
				500	"addu %[temp8], %[temp8], %[temp7] \n\t"
				501	"addu %[temp11], %[temp11], %[temp4] \n\t"
				502	"addu %[temp16], %[temp16], %[temp2] \n\t"
				503	"sra %[temp18], %[temp5], 8 \n\t"
				504	"sra %[temp1], %[temp5], 31 \n\t"
				505	"sra %[temp17], %[temp8], 8 \n\t"
				506	"sra %[temp15], %[temp8], 31 \n\t"
				507	"sra %[temp12], %[temp11], 8 \n\t"
				508	"sra %[temp10], %[temp11], 31 \n\t"
				509	"sra %[temp9], %[temp16], 8 \n\t"
				510	"sra %[temp3], %[temp16], 31 \n\t"
				511	"beqz %[temp18], 13f \n\t"
				512	"xor %[temp5], %[temp5], %[temp5] \n\t"
				513	"movz %[temp5], %[temp6], %[temp1] \n\t"
				514	"13: \n\t"
				515	"beqz %[temp17], 14f \n\t"
				516	"xor %[temp8], %[temp8], %[temp8] \n\t"
				517	"movz %[temp8], %[temp6], %[temp15] \n\t"
				518	"14: \n\t"
				519	"beqz %[temp12], 15f \n\t"
				520	"xor %[temp11], %[temp11], %[temp11] \n\t"
				521	"movz %[temp11], %[temp6], %[temp10] \n\t"
				522	"15: \n\t"
				523	"beqz %[temp9], 16f \n\t"
				524	"xor %[temp16], %[temp16], %[temp16] \n\t"
				525	"movz %[temp16], %[temp6], %[temp3] \n\t"
				526	"16: \n\t"
				527	"sb %[temp5], 96(%[dst]) \n\t"
				528	"sb %[temp8], 97(%[dst]) \n\t"
				529	"sb %[temp11], 98(%[dst]) \n\t"
				530	"sb %[temp16], 99(%[dst]) \n\t"
				531
				532	: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
				533	[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
				534	[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
				535	[temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
				536	[temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
				537	[temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
				538	[temp18]"=&r"(temp18)
				539	: [in]"r"(p_in), [kC1]"r"(kC1), [kC2]"r"(kC2), [dst]"r"(dst)
				540	: "memory", "hi", "lo"
				541	);
				542	}
				543
				544	static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
				545	TransformOne(in, dst);
				546	if (do_two) {
				547	TransformOne(in + 16, dst + 4);
				548	}
				549	}
				550
				551	#endif // WEBP_USE_MIPS32
				552
				553	//------------------------------------------------------------------------------
				554	// Entry point
				555
				556	extern void VP8DspInitMIPS32(void);
				557
				558	void VP8DspInitMIPS32(void) {
				559	#if defined(WEBP_USE_MIPS32)
				560	VP8InitClipTables();
				561
				562	VP8Transform = TransformTwo;
				563
				564	VP8VFilter16 = VFilter16;
				565	VP8HFilter16 = HFilter16;
				566	VP8VFilter8 = VFilter8;
				567	VP8HFilter8 = HFilter8;
				568	VP8VFilter16i = VFilter16i;
				569	VP8HFilter16i = HFilter16i;
				570	VP8VFilter8i = VFilter8i;
				571	VP8HFilter8i = HFilter8i;
				572
				573	VP8SimpleVFilter16 = SimpleVFilter16;
				574	VP8SimpleHFilter16 = SimpleHFilter16;
				575	VP8SimpleVFilter16i = SimpleVFilter16i;
				576	VP8SimpleHFilter16i = SimpleHFilter16i;
				577	#endif // WEBP_USE_MIPS32
				578	}