blob: 7235abb11cc3e51f2b1a480e42112bb09a190226 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1998-2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26
27
28#include "vis_proto.h"
29#include "mlib_image.h"
30#include "mlib_v_ImageLookUpFunc.h"
31
32/***************************************************************/
33static void mlib_v_ImageLookUpSI_S16_U8_2_DstA8D1(const mlib_s16 *src,
34 mlib_u8 *dst,
35 mlib_s32 xsize,
36 const mlib_u8 **table);
37
38static void mlib_v_ImageLookUpSI_S16_U8_2_D1(const mlib_s16 *src,
39 mlib_u8 *dst,
40 mlib_s32 xsize,
41 const mlib_u8 **table);
42
43static void mlib_v_ImageLookUpSI_S16_U8_3_D1(const mlib_s16 *src,
44 mlib_u8 *dst,
45 mlib_s32 xsize,
46 const mlib_u8 **table);
47
48static void mlib_v_ImageLookUpSI_S16_U8_4_DstOff0_D1(const mlib_s16 *src,
49 mlib_u8 *dst,
50 mlib_s32 xsize,
51 const mlib_u8 **table);
52
53static void mlib_v_ImageLookUpSI_S16_U8_4_DstOff1_D1(const mlib_s16 *src,
54 mlib_u8 *dst,
55 mlib_s32 xsize,
56 const mlib_u8 **table);
57
58static void mlib_v_ImageLookUpSI_S16_U8_4_DstOff2_D1(const mlib_s16 *src,
59 mlib_u8 *dst,
60 mlib_s32 xsize,
61 const mlib_u8 **table);
62
63static void mlib_v_ImageLookUpSI_S16_U8_4_DstOff3_D1(const mlib_s16 *src,
64 mlib_u8 *dst,
65 mlib_s32 xsize,
66 const mlib_u8 **table);
67
68/***************************************************************/
69#define VIS_LD_U8_I(X, Y) vis_ld_u8_i((void *)(X), (Y))
70
71/***************************************************************/
72void mlib_v_ImageLookUpSI_S16_U8_2_DstA8D1(const mlib_s16 *src,
73 mlib_u8 *dst,
74 mlib_s32 xsize,
75 const mlib_u8 **table)
76{
77 mlib_s16 *sp; /* pointer to source data */
78 mlib_s32 s0, s1, s2, s3; /* source data */
79 mlib_u16 *dl; /* pointer to start of destination */
80 mlib_u16 *dend; /* pointer to end of destination */
81 mlib_d64 *dp; /* aligned pointer to destination */
82 mlib_d64 t0, t1, t2; /* destination data */
83 mlib_d64 t3, t4, t5; /* destination data */
84 mlib_d64 t6, t7, acc; /* destination data */
85 mlib_s32 emask; /* edge mask */
86 mlib_s32 i, num; /* loop variable */
87 const mlib_u8 *tab0 = &table[0][32768];
88 const mlib_u8 *tab1 = &table[1][32768];
89
90 sp = (void *)src;
91 dl = (mlib_u16 *) dst;
92 dp = (mlib_d64 *) dl;
93 dend = dl + xsize - 1;
94
95 vis_alignaddr((void *)0, 7);
96
97 if (xsize >= 4) {
98
99 s0 = sp[0];
100 s1 = sp[1];
101 s2 = sp[2];
102 s3 = sp[3];
103 sp += 4;
104
105#pragma pipeloop(0)
106 for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
107 t7 = VIS_LD_U8_I(tab1, s3);
108 t6 = VIS_LD_U8_I(tab0, s3);
109 t5 = VIS_LD_U8_I(tab1, s2);
110 t4 = VIS_LD_U8_I(tab0, s2);
111 t3 = VIS_LD_U8_I(tab1, s1);
112 t2 = VIS_LD_U8_I(tab0, s1);
113 t1 = VIS_LD_U8_I(tab1, s0);
114 t0 = VIS_LD_U8_I(tab0, s0);
115 acc = vis_faligndata(t7, acc);
116 acc = vis_faligndata(t6, acc);
117 acc = vis_faligndata(t5, acc);
118 acc = vis_faligndata(t4, acc);
119 acc = vis_faligndata(t3, acc);
120 acc = vis_faligndata(t2, acc);
121 acc = vis_faligndata(t1, acc);
122 acc = vis_faligndata(t0, acc);
123 s0 = sp[0];
124 s1 = sp[1];
125 s2 = sp[2];
126 s3 = sp[3];
127 *dp++ = acc;
128 }
129
130 t7 = VIS_LD_U8_I(tab1, s3);
131 t6 = VIS_LD_U8_I(tab0, s3);
132 t5 = VIS_LD_U8_I(tab1, s2);
133 t4 = VIS_LD_U8_I(tab0, s2);
134 t3 = VIS_LD_U8_I(tab1, s1);
135 t2 = VIS_LD_U8_I(tab0, s1);
136 t1 = VIS_LD_U8_I(tab1, s0);
137 t0 = VIS_LD_U8_I(tab0, s0);
138 acc = vis_faligndata(t7, acc);
139 acc = vis_faligndata(t6, acc);
140 acc = vis_faligndata(t5, acc);
141 acc = vis_faligndata(t4, acc);
142 acc = vis_faligndata(t3, acc);
143 acc = vis_faligndata(t2, acc);
144 acc = vis_faligndata(t1, acc);
145 acc = vis_faligndata(t0, acc);
146 *dp++ = acc;
147 }
148
149 if ((mlib_addr) dp <= (mlib_addr) dend) {
150
151 num = (mlib_u16 *) dend - (mlib_u16 *) dp;
152 sp += num;
153 num++;
154#pragma pipeloop(0)
155 for (i = 0; i < num; i++) {
156 s0 = (mlib_s32) * sp;
157 sp--;
158
159 t0 = VIS_LD_U8_I(tab1, s0);
160 acc = vis_faligndata(t0, acc);
161
162 t0 = VIS_LD_U8_I(tab0, s0);
163 acc = vis_faligndata(t0, acc);
164 }
165
166 emask = vis_edge16(dp, dend);
167 vis_pst_16(acc, dp, emask);
168 }
169}
170
171/***************************************************************/
172void mlib_v_ImageLookUpSI_S16_U8_2_D1(const mlib_s16 *src,
173 mlib_u8 *dst,
174 mlib_s32 xsize,
175 const mlib_u8 **table)
176{
177 mlib_s16 *sp; /* pointer to source data */
178 mlib_s32 s0, s1, s2, s3, s4; /* source data */
179 mlib_u8 *dl; /* pointer to start of destination */
180 mlib_u8 *dend; /* pointer to end of destination */
181 mlib_d64 *dp; /* aligned pointer to destination */
182 mlib_d64 t0, t1, t2; /* destination data */
183 mlib_d64 t3, t4, t5; /* destination data */
184 mlib_d64 t6, t7, acc; /* destination data */
185 mlib_s32 emask; /* edge mask */
186 mlib_s32 i, num; /* loop variable */
187 const mlib_u8 *tab0 = &table[0][32768];
188 const mlib_u8 *tab1 = &table[1][32768];
189
190 sp = (void *)src;
191 dl = dst;
192
193 dend = dl + 2 * xsize - 1;
194
195 vis_alignaddr((void *)0, 7);
196
197 s0 = *sp++;
198 *dl++ = tab0[s0];
199 dp = (mlib_d64 *) dl;
200 xsize--;
201
202 if (xsize >= 4) {
203
204 s1 = sp[0];
205 s2 = sp[1];
206 s3 = sp[2];
207 s4 = sp[3];
208 sp += 4;
209
210#pragma pipeloop(0)
211 for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
212 t7 = VIS_LD_U8_I(tab0, s4);
213 t6 = VIS_LD_U8_I(tab1, s3);
214 t5 = VIS_LD_U8_I(tab0, s3);
215 t4 = VIS_LD_U8_I(tab1, s2);
216 t3 = VIS_LD_U8_I(tab0, s2);
217 t2 = VIS_LD_U8_I(tab1, s1);
218 t1 = VIS_LD_U8_I(tab0, s1);
219 t0 = VIS_LD_U8_I(tab1, s0);
220 acc = vis_faligndata(t7, acc);
221 acc = vis_faligndata(t6, acc);
222 acc = vis_faligndata(t5, acc);
223 acc = vis_faligndata(t4, acc);
224 acc = vis_faligndata(t3, acc);
225 acc = vis_faligndata(t2, acc);
226 acc = vis_faligndata(t1, acc);
227 acc = vis_faligndata(t0, acc);
228 s0 = s4;
229 s1 = sp[0];
230 s2 = sp[1];
231 s3 = sp[2];
232 s4 = sp[3];
233 *dp++ = acc;
234 }
235
236 t7 = VIS_LD_U8_I(tab0, s4);
237 t6 = VIS_LD_U8_I(tab1, s3);
238 t5 = VIS_LD_U8_I(tab0, s3);
239 t4 = VIS_LD_U8_I(tab1, s2);
240 t3 = VIS_LD_U8_I(tab0, s2);
241 t2 = VIS_LD_U8_I(tab1, s1);
242 t1 = VIS_LD_U8_I(tab0, s1);
243 t0 = VIS_LD_U8_I(tab1, s0);
244 acc = vis_faligndata(t7, acc);
245 acc = vis_faligndata(t6, acc);
246 acc = vis_faligndata(t5, acc);
247 acc = vis_faligndata(t4, acc);
248 acc = vis_faligndata(t3, acc);
249 acc = vis_faligndata(t2, acc);
250 acc = vis_faligndata(t1, acc);
251 acc = vis_faligndata(t0, acc);
252 s0 = s4;
253 *dp++ = acc;
254 }
255
256 num = ((mlib_u8 *) dend - (mlib_u8 *) dp) >> 1;
257 sp += num;
258 num++;
259
260#pragma pipeloop(0)
261 for (i = 0; i < num; i++) {
262 s1 = (mlib_s32) * sp;
263 sp--;
264
265 t0 = VIS_LD_U8_I(tab1, s1);
266 acc = vis_faligndata(t0, acc);
267
268 t0 = VIS_LD_U8_I(tab0, s1);
269 acc = vis_faligndata(t0, acc);
270 }
271
272 t0 = VIS_LD_U8_I(tab1, s0);
273 acc = vis_faligndata(t0, acc);
274 emask = vis_edge8(dp, dend);
275 vis_pst_8(acc, dp, emask);
276}
277
278/***************************************************************/
279void mlib_v_ImageLookUpSI_S16_U8_2(const mlib_s16 *src,
280 mlib_s32 slb,
281 mlib_u8 *dst,
282 mlib_s32 dlb,
283 mlib_s32 xsize,
284 mlib_s32 ysize,
285 const mlib_u8 **table)
286{
287 mlib_s16 *sl;
288 mlib_u8 *dl;
289 mlib_s32 i, j;
290 const mlib_u8 *tab0 = &table[0][32768];
291 const mlib_u8 *tab1 = &table[1][32768];
292
293 sl = (void *)src;
294 dl = dst;
295
296 /* row loop */
297 for (j = 0; j < ysize; j++) {
298 mlib_s16 *sp = sl;
299 mlib_u8 *dp = dl;
300 mlib_s32 off, s0, size = xsize;
301
302 off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
303 off = (off < size) ? off : size;
304
305 for (i = 0; i < off; i++) {
306 s0 = *sp++;
307 *dp++ = tab0[s0];
308 *dp++ = tab1[s0];
309 size--;
310 }
311
312 if (size > 0) {
313
314 if (((mlib_addr) dp & 1) == 0) {
315 mlib_v_ImageLookUpSI_S16_U8_2_DstA8D1(sp, dp, size, table);
316 }
317 else {
318 mlib_v_ImageLookUpSI_S16_U8_2_D1(sp, dp, size, table);
319 }
320 }
321
322 sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
323 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
324 }
325}
326
327/***************************************************************/
328void mlib_v_ImageLookUpSI_S16_U8_3_D1(const mlib_s16 *src,
329 mlib_u8 *dst,
330 mlib_s32 xsize,
331 const mlib_u8 **table)
332{
333 mlib_s16 *sp; /* pointer to source data */
334 mlib_u8 *dl; /* pointer to start of destination */
335 mlib_d64 *dp; /* aligned pointer to destination */
336 mlib_d64 t0, t1, t2; /* destination data */
337 mlib_d64 t3, t4, t5; /* destination data */
338 mlib_d64 t6, t7; /* destination data */
339 mlib_d64 acc0, acc1, acc2; /* destination data */
340 mlib_s32 i; /* loop variable */
341 const mlib_u8 *tab0 = &table[0][32768];
342 const mlib_u8 *tab1 = &table[1][32768];
343 const mlib_u8 *tab2 = &table[2][32768];
344 mlib_s32 s00, s01, s02, s03;
345 mlib_s32 s10, s11, s12, s13;
346
347 sp = (void *)src;
348 dl = dst;
349 dp = (mlib_d64 *) dl;
350
351 vis_alignaddr((void *)0, 7);
352
353 i = 0;
354
355 if (xsize >= 8) {
356
357 s00 = sp[0];
358 s01 = sp[1];
359 s02 = sp[2];
360 s03 = sp[3];
361 s10 = sp[4];
362 s11 = sp[5];
363 s12 = sp[6];
364 s13 = sp[7];
365 sp += 8;
366
367#pragma pipeloop(0)
368 for (i = 0; i <= xsize - 16; i += 8, sp += 8) {
369 t7 = VIS_LD_U8_I(tab1, s02);
370 t6 = VIS_LD_U8_I(tab0, s02);
371 t5 = VIS_LD_U8_I(tab2, s01);
372 t4 = VIS_LD_U8_I(tab1, s01);
373 t3 = VIS_LD_U8_I(tab0, s01);
374 t2 = VIS_LD_U8_I(tab2, s00);
375 t1 = VIS_LD_U8_I(tab1, s00);
376 t0 = VIS_LD_U8_I(tab0, s00);
377 acc0 = vis_faligndata(t7, acc0);
378 acc0 = vis_faligndata(t6, acc0);
379 acc0 = vis_faligndata(t5, acc0);
380 acc0 = vis_faligndata(t4, acc0);
381 acc0 = vis_faligndata(t3, acc0);
382 acc0 = vis_faligndata(t2, acc0);
383 acc0 = vis_faligndata(t1, acc0);
384 acc0 = vis_faligndata(t0, acc0);
385 t7 = VIS_LD_U8_I(tab0, s11);
386 t6 = VIS_LD_U8_I(tab2, s10);
387 t5 = VIS_LD_U8_I(tab1, s10);
388 t4 = VIS_LD_U8_I(tab0, s10);
389 t3 = VIS_LD_U8_I(tab2, s03);
390 t2 = VIS_LD_U8_I(tab1, s03);
391 t1 = VIS_LD_U8_I(tab0, s03);
392 t0 = VIS_LD_U8_I(tab2, s02);
393 acc1 = vis_faligndata(t7, acc1);
394 acc1 = vis_faligndata(t6, acc1);
395 acc1 = vis_faligndata(t5, acc1);
396 acc1 = vis_faligndata(t4, acc1);
397 acc1 = vis_faligndata(t3, acc1);
398 acc1 = vis_faligndata(t2, acc1);
399 acc1 = vis_faligndata(t1, acc1);
400 acc1 = vis_faligndata(t0, acc1);
401 t7 = VIS_LD_U8_I(tab2, s13);
402 t6 = VIS_LD_U8_I(tab1, s13);
403 t5 = VIS_LD_U8_I(tab0, s13);
404 t4 = VIS_LD_U8_I(tab2, s12);
405 t3 = VIS_LD_U8_I(tab1, s12);
406 t2 = VIS_LD_U8_I(tab0, s12);
407 t1 = VIS_LD_U8_I(tab2, s11);
408 t0 = VIS_LD_U8_I(tab1, s11);
409 acc2 = vis_faligndata(t7, acc2);
410 acc2 = vis_faligndata(t6, acc2);
411 acc2 = vis_faligndata(t5, acc2);
412 acc2 = vis_faligndata(t4, acc2);
413 acc2 = vis_faligndata(t3, acc2);
414 acc2 = vis_faligndata(t2, acc2);
415 acc2 = vis_faligndata(t1, acc2);
416 acc2 = vis_faligndata(t0, acc2);
417 s00 = sp[0];
418 s01 = sp[1];
419 s02 = sp[2];
420 s03 = sp[3];
421 s10 = sp[4];
422 s11 = sp[5];
423 s12 = sp[6];
424 s13 = sp[7];
425 *dp++ = acc0;
426 *dp++ = acc1;
427 *dp++ = acc2;
428 }
429
430 t7 = VIS_LD_U8_I(tab1, s02);
431 t6 = VIS_LD_U8_I(tab0, s02);
432 t5 = VIS_LD_U8_I(tab2, s01);
433 t4 = VIS_LD_U8_I(tab1, s01);
434 t3 = VIS_LD_U8_I(tab0, s01);
435 t2 = VIS_LD_U8_I(tab2, s00);
436 t1 = VIS_LD_U8_I(tab1, s00);
437 t0 = VIS_LD_U8_I(tab0, s00);
438 acc0 = vis_faligndata(t7, acc0);
439 acc0 = vis_faligndata(t6, acc0);
440 acc0 = vis_faligndata(t5, acc0);
441 acc0 = vis_faligndata(t4, acc0);
442 acc0 = vis_faligndata(t3, acc0);
443 acc0 = vis_faligndata(t2, acc0);
444 acc0 = vis_faligndata(t1, acc0);
445 acc0 = vis_faligndata(t0, acc0);
446 t7 = VIS_LD_U8_I(tab0, s11);
447 t6 = VIS_LD_U8_I(tab2, s10);
448 t5 = VIS_LD_U8_I(tab1, s10);
449 t4 = VIS_LD_U8_I(tab0, s10);
450 t3 = VIS_LD_U8_I(tab2, s03);
451 t2 = VIS_LD_U8_I(tab1, s03);
452 t1 = VIS_LD_U8_I(tab0, s03);
453 t0 = VIS_LD_U8_I(tab2, s02);
454 acc1 = vis_faligndata(t7, acc1);
455 acc1 = vis_faligndata(t6, acc1);
456 acc1 = vis_faligndata(t5, acc1);
457 acc1 = vis_faligndata(t4, acc1);
458 acc1 = vis_faligndata(t3, acc1);
459 acc1 = vis_faligndata(t2, acc1);
460 acc1 = vis_faligndata(t1, acc1);
461 acc1 = vis_faligndata(t0, acc1);
462 t7 = VIS_LD_U8_I(tab2, s13);
463 t6 = VIS_LD_U8_I(tab1, s13);
464 t5 = VIS_LD_U8_I(tab0, s13);
465 t4 = VIS_LD_U8_I(tab2, s12);
466 t3 = VIS_LD_U8_I(tab1, s12);
467 t2 = VIS_LD_U8_I(tab0, s12);
468 t1 = VIS_LD_U8_I(tab2, s11);
469 t0 = VIS_LD_U8_I(tab1, s11);
470 acc2 = vis_faligndata(t7, acc2);
471 acc2 = vis_faligndata(t6, acc2);
472 acc2 = vis_faligndata(t5, acc2);
473 acc2 = vis_faligndata(t4, acc2);
474 acc2 = vis_faligndata(t3, acc2);
475 acc2 = vis_faligndata(t2, acc2);
476 acc2 = vis_faligndata(t1, acc2);
477 acc2 = vis_faligndata(t0, acc2);
478 *dp++ = acc0;
479 *dp++ = acc1;
480 *dp++ = acc2;
481 i += 8;
482 }
483
484 dl = (mlib_u8 *) dp;
485
486#pragma pipeloop(0)
487 for (; i < xsize; i++) {
488 s00 = sp[0];
489 dl[0] = tab0[s00];
490 dl[1] = tab1[s00];
491 dl[2] = tab2[s00];
492 dl += 3;
493 sp++;
494 }
495}
496
497/***************************************************************/
498void mlib_v_ImageLookUpSI_S16_U8_3(const mlib_s16 *src,
499 mlib_s32 slb,
500 mlib_u8 *dst,
501 mlib_s32 dlb,
502 mlib_s32 xsize,
503 mlib_s32 ysize,
504 const mlib_u8 **table)
505{
506 mlib_s16 *sl;
507 mlib_u8 *dl;
508 mlib_s32 i, j;
509 const mlib_u8 *tab0 = &table[0][32768];
510 const mlib_u8 *tab1 = &table[1][32768];
511 const mlib_u8 *tab2 = &table[2][32768];
512
513 sl = (void *)src;
514 dl = dst;
515
516 /* row loop */
517 for (j = 0; j < ysize; j++) {
518 mlib_s16 *sp = sl;
519 mlib_u8 *dp = dl;
520 mlib_s32 off, s0, size = xsize;
521
522 off = (mlib_addr) dp & 7;
523 off = (off * 5) & 7;
524 off = (off < size) ? off : size;
525
526 for (i = 0; i < off; i++) {
527 s0 = *sp++;
528 *dp++ = tab0[s0];
529 *dp++ = tab1[s0];
530 *dp++ = tab2[s0];
531 size--;
532 }
533
534 if (size > 0) {
535 mlib_v_ImageLookUpSI_S16_U8_3_D1(sp, dp, size, table);
536 }
537
538 sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
539 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
540 }
541}
542
543/***************************************************************/
544void mlib_v_ImageLookUpSI_S16_U8_4_DstOff0_D1(const mlib_s16 *src,
545 mlib_u8 *dst,
546 mlib_s32 xsize,
547 const mlib_u8 **table)
548{
549 mlib_s16 *sp; /* pointer to source data */
550 mlib_s32 s0, s1; /* source data */
551 mlib_u8 *dl; /* pointer to start of destination */
552 mlib_d64 *dp; /* aligned pointer to destination */
553 mlib_d64 t0, t1, t2; /* destination data */
554 mlib_d64 t3, t4, t5; /* destination data */
555 mlib_d64 t6, t7, acc; /* destination data */
556 mlib_s32 i; /* loop variable */
557 const mlib_u8 *tab0 = &table[0][32768];
558 const mlib_u8 *tab1 = &table[1][32768];
559 const mlib_u8 *tab2 = &table[2][32768];
560 const mlib_u8 *tab3 = &table[3][32768];
561
562 sp = (void *)src;
563 dl = dst;
564 dp = (mlib_d64 *) dl;
565
566 vis_alignaddr((void *)0, 7);
567
568 if (xsize >= 2) {
569
570 s0 = sp[0];
571 s1 = sp[1];
572 sp += 2;
573
574#pragma pipeloop(0)
575 for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
576 t7 = VIS_LD_U8_I(tab3, s1);
577 t6 = VIS_LD_U8_I(tab2, s1);
578 t5 = VIS_LD_U8_I(tab1, s1);
579 t4 = VIS_LD_U8_I(tab0, s1);
580 t3 = VIS_LD_U8_I(tab3, s0);
581 t2 = VIS_LD_U8_I(tab2, s0);
582 t1 = VIS_LD_U8_I(tab1, s0);
583 t0 = VIS_LD_U8_I(tab0, s0);
584 acc = vis_faligndata(t7, acc);
585 acc = vis_faligndata(t6, acc);
586 acc = vis_faligndata(t5, acc);
587 acc = vis_faligndata(t4, acc);
588 acc = vis_faligndata(t3, acc);
589 acc = vis_faligndata(t2, acc);
590 acc = vis_faligndata(t1, acc);
591 acc = vis_faligndata(t0, acc);
592 s0 = sp[0];
593 s1 = sp[1];
594 *dp++ = acc;
595 }
596
597 t7 = VIS_LD_U8_I(tab3, s1);
598 t6 = VIS_LD_U8_I(tab2, s1);
599 t5 = VIS_LD_U8_I(tab1, s1);
600 t4 = VIS_LD_U8_I(tab0, s1);
601 t3 = VIS_LD_U8_I(tab3, s0);
602 t2 = VIS_LD_U8_I(tab2, s0);
603 t1 = VIS_LD_U8_I(tab1, s0);
604 t0 = VIS_LD_U8_I(tab0, s0);
605 acc = vis_faligndata(t7, acc);
606 acc = vis_faligndata(t6, acc);
607 acc = vis_faligndata(t5, acc);
608 acc = vis_faligndata(t4, acc);
609 acc = vis_faligndata(t3, acc);
610 acc = vis_faligndata(t2, acc);
611 acc = vis_faligndata(t1, acc);
612 acc = vis_faligndata(t0, acc);
613 *dp++ = acc;
614 }
615
616 if ((xsize & 1) != 0) {
617 s0 = sp[0];
618 t7 = VIS_LD_U8_I(tab3, s0);
619 t6 = VIS_LD_U8_I(tab2, s0);
620 t5 = VIS_LD_U8_I(tab1, s0);
621 t4 = VIS_LD_U8_I(tab0, s0);
622 acc = vis_faligndata(t7, acc);
623 acc = vis_faligndata(t6, acc);
624 acc = vis_faligndata(t5, acc);
625 acc = vis_faligndata(t4, acc);
626 *(mlib_f32 *) dp = vis_read_hi(acc);
627 }
628}
629
630/***************************************************************/
631void mlib_v_ImageLookUpSI_S16_U8_4_DstOff1_D1(const mlib_s16 *src,
632 mlib_u8 *dst,
633 mlib_s32 xsize,
634 const mlib_u8 **table)
635{
636 mlib_s16 *sp; /* pointer to source data */
637 mlib_s32 s0, s1, s2; /* source data */
638 mlib_u8 *dl; /* pointer to start of destination */
639 mlib_d64 *dp; /* aligned pointer to destination */
640 mlib_d64 t0, t1, t2; /* destination data */
641 mlib_d64 t3, t4, t5; /* destination data */
642 mlib_d64 t6, t7, acc; /* destination data */
643 mlib_s32 i; /* loop variable */
644 const mlib_u8 *tab0 = &table[0][32768];
645 const mlib_u8 *tab1 = &table[1][32768];
646 const mlib_u8 *tab2 = &table[2][32768];
647 const mlib_u8 *tab3 = &table[3][32768];
648
649 sp = (void *)src;
650 dl = dst;
651 dp = (mlib_d64 *) dl;
652
653 vis_alignaddr((void *)0, 7);
654
655 s0 = *sp++;
656
657 if (xsize >= 2) {
658
659 s1 = sp[0];
660 s2 = sp[1];
661 sp += 2;
662
663#pragma pipeloop(0)
664 for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
665 t7 = VIS_LD_U8_I(tab0, s2);
666 t6 = VIS_LD_U8_I(tab3, s1);
667 t5 = VIS_LD_U8_I(tab2, s1);
668 t4 = VIS_LD_U8_I(tab1, s1);
669 t3 = VIS_LD_U8_I(tab0, s1);
670 t2 = VIS_LD_U8_I(tab3, s0);
671 t1 = VIS_LD_U8_I(tab2, s0);
672 t0 = VIS_LD_U8_I(tab1, s0);
673 acc = vis_faligndata(t7, acc);
674 acc = vis_faligndata(t6, acc);
675 acc = vis_faligndata(t5, acc);
676 acc = vis_faligndata(t4, acc);
677 acc = vis_faligndata(t3, acc);
678 acc = vis_faligndata(t2, acc);
679 acc = vis_faligndata(t1, acc);
680 acc = vis_faligndata(t0, acc);
681 s0 = s2;
682 s1 = sp[0];
683 s2 = sp[1];
684 *dp++ = acc;
685 }
686
687 t7 = VIS_LD_U8_I(tab0, s2);
688 t6 = VIS_LD_U8_I(tab3, s1);
689 t5 = VIS_LD_U8_I(tab2, s1);
690 t4 = VIS_LD_U8_I(tab1, s1);
691 t3 = VIS_LD_U8_I(tab0, s1);
692 t2 = VIS_LD_U8_I(tab3, s0);
693 t1 = VIS_LD_U8_I(tab2, s0);
694 t0 = VIS_LD_U8_I(tab1, s0);
695 acc = vis_faligndata(t7, acc);
696 acc = vis_faligndata(t6, acc);
697 acc = vis_faligndata(t5, acc);
698 acc = vis_faligndata(t4, acc);
699 acc = vis_faligndata(t3, acc);
700 acc = vis_faligndata(t2, acc);
701 acc = vis_faligndata(t1, acc);
702 acc = vis_faligndata(t0, acc);
703 s0 = s2;
704 *dp++ = acc;
705 }
706
707 dl = (mlib_u8 *) dp;
708
709 if ((xsize & 1) != 0) {
710 s1 = sp[0];
711 t7 = VIS_LD_U8_I(tab0, s1);
712 t6 = VIS_LD_U8_I(tab3, s0);
713 t5 = VIS_LD_U8_I(tab2, s0);
714 t4 = VIS_LD_U8_I(tab1, s0);
715 acc = vis_faligndata(t7, acc);
716 acc = vis_faligndata(t6, acc);
717 acc = vis_faligndata(t5, acc);
718 acc = vis_faligndata(t4, acc);
719 *(mlib_f32 *) dl = vis_read_hi(acc);
720 dl += 4;
721 s0 = s1;
722 }
723
724 dl[0] = tab1[s0];
725 dl[1] = tab2[s0];
726 dl[2] = tab3[s0];
727}
728
729/***************************************************************/
730void mlib_v_ImageLookUpSI_S16_U8_4_DstOff2_D1(const mlib_s16 *src,
731 mlib_u8 *dst,
732 mlib_s32 xsize,
733 const mlib_u8 **table)
734{
735 mlib_s16 *sp; /* pointer to source data */
736 mlib_s32 s0, s1, s2; /* source data */
737 mlib_u8 *dl; /* pointer to start of destination */
738 mlib_d64 *dp; /* aligned pointer to destination */
739 mlib_d64 t0, t1, t2; /* destination data */
740 mlib_d64 t3, t4, t5; /* destination data */
741 mlib_d64 t6, t7, acc; /* destination data */
742 mlib_s32 i; /* loop variable */
743 const mlib_u8 *tab0 = &table[0][32768];
744 const mlib_u8 *tab1 = &table[1][32768];
745 const mlib_u8 *tab2 = &table[2][32768];
746 const mlib_u8 *tab3 = &table[3][32768];
747
748 sp = (void *)src;
749 dl = dst;
750 dp = (mlib_d64 *) dl;
751
752 vis_alignaddr((void *)0, 7);
753
754 s0 = *sp++;
755
756 if (xsize >= 2) {
757
758 s1 = sp[0];
759 s2 = sp[1];
760 sp += 2;
761
762#pragma pipeloop(0)
763 for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
764 t7 = VIS_LD_U8_I(tab1, s2);
765 t6 = VIS_LD_U8_I(tab0, s2);
766 t5 = VIS_LD_U8_I(tab3, s1);
767 t4 = VIS_LD_U8_I(tab2, s1);
768 t3 = VIS_LD_U8_I(tab1, s1);
769 t2 = VIS_LD_U8_I(tab0, s1);
770 t1 = VIS_LD_U8_I(tab3, s0);
771 t0 = VIS_LD_U8_I(tab2, s0);
772 acc = vis_faligndata(t7, acc);
773 acc = vis_faligndata(t6, acc);
774 acc = vis_faligndata(t5, acc);
775 acc = vis_faligndata(t4, acc);
776 acc = vis_faligndata(t3, acc);
777 acc = vis_faligndata(t2, acc);
778 acc = vis_faligndata(t1, acc);
779 acc = vis_faligndata(t0, acc);
780 s0 = s2;
781 s1 = sp[0];
782 s2 = sp[1];
783 *dp++ = acc;
784 }
785
786 t7 = VIS_LD_U8_I(tab1, s2);
787 t6 = VIS_LD_U8_I(tab0, s2);
788 t5 = VIS_LD_U8_I(tab3, s1);
789 t4 = VIS_LD_U8_I(tab2, s1);
790 t3 = VIS_LD_U8_I(tab1, s1);
791 t2 = VIS_LD_U8_I(tab0, s1);
792 t1 = VIS_LD_U8_I(tab3, s0);
793 t0 = VIS_LD_U8_I(tab2, s0);
794 acc = vis_faligndata(t7, acc);
795 acc = vis_faligndata(t6, acc);
796 acc = vis_faligndata(t5, acc);
797 acc = vis_faligndata(t4, acc);
798 acc = vis_faligndata(t3, acc);
799 acc = vis_faligndata(t2, acc);
800 acc = vis_faligndata(t1, acc);
801 acc = vis_faligndata(t0, acc);
802 s0 = s2;
803 *dp++ = acc;
804 }
805
806 dl = (mlib_u8 *) dp;
807
808 if ((xsize & 1) != 0) {
809 s1 = sp[0];
810 t7 = VIS_LD_U8_I(tab1, s1);
811 t6 = VIS_LD_U8_I(tab0, s1);
812 t5 = VIS_LD_U8_I(tab3, s0);
813 t4 = VIS_LD_U8_I(tab2, s0);
814 acc = vis_faligndata(t7, acc);
815 acc = vis_faligndata(t6, acc);
816 acc = vis_faligndata(t5, acc);
817 acc = vis_faligndata(t4, acc);
818 *(mlib_f32 *) dl = vis_read_hi(acc);
819 dl += 4;
820 s0 = s1;
821 }
822
823 dl[0] = tab2[s0];
824 dl[1] = tab3[s0];
825}
826
827/***************************************************************/
828void mlib_v_ImageLookUpSI_S16_U8_4_DstOff3_D1(const mlib_s16 *src,
829 mlib_u8 *dst,
830 mlib_s32 xsize,
831 const mlib_u8 **table)
832{
833 mlib_s16 *sp; /* pointer to source data */
834 mlib_s32 s0, s1, s2; /* source data */
835 mlib_u8 *dl; /* pointer to start of destination */
836 mlib_d64 *dp; /* aligned pointer to destination */
837 mlib_d64 t0, t1, t2; /* destination data */
838 mlib_d64 t3, t4, t5; /* destination data */
839 mlib_d64 t6, t7, acc; /* destination data */
840 mlib_s32 i; /* loop variable */
841 const mlib_u8 *tab0 = &table[0][32768];
842 const mlib_u8 *tab1 = &table[1][32768];
843 const mlib_u8 *tab2 = &table[2][32768];
844 const mlib_u8 *tab3 = &table[3][32768];
845
846 sp = (void *)src;
847 dl = dst;
848 dp = (mlib_d64 *) dl;
849
850 vis_alignaddr((void *)0, 7);
851
852 s0 = *sp++;
853
854 if (xsize >= 2) {
855
856 s1 = sp[0];
857 s2 = sp[1];
858 sp += 2;
859
860#pragma pipeloop(0)
861 for (i = 0; i <= xsize - 4; i += 2, sp += 2) {
862 t7 = VIS_LD_U8_I(tab2, s2);
863 t6 = VIS_LD_U8_I(tab1, s2);
864 t5 = VIS_LD_U8_I(tab0, s2);
865 t4 = VIS_LD_U8_I(tab3, s1);
866 t3 = VIS_LD_U8_I(tab2, s1);
867 t2 = VIS_LD_U8_I(tab1, s1);
868 t1 = VIS_LD_U8_I(tab0, s1);
869 t0 = VIS_LD_U8_I(tab3, s0);
870 acc = vis_faligndata(t7, acc);
871 acc = vis_faligndata(t6, acc);
872 acc = vis_faligndata(t5, acc);
873 acc = vis_faligndata(t4, acc);
874 acc = vis_faligndata(t3, acc);
875 acc = vis_faligndata(t2, acc);
876 acc = vis_faligndata(t1, acc);
877 acc = vis_faligndata(t0, acc);
878 s0 = s2;
879 s1 = sp[0];
880 s2 = sp[1];
881 *dp++ = acc;
882 }
883
884 t7 = VIS_LD_U8_I(tab2, s2);
885 t6 = VIS_LD_U8_I(tab1, s2);
886 t5 = VIS_LD_U8_I(tab0, s2);
887 t4 = VIS_LD_U8_I(tab3, s1);
888 t3 = VIS_LD_U8_I(tab2, s1);
889 t2 = VIS_LD_U8_I(tab1, s1);
890 t1 = VIS_LD_U8_I(tab0, s1);
891 t0 = VIS_LD_U8_I(tab3, s0);
892 acc = vis_faligndata(t7, acc);
893 acc = vis_faligndata(t6, acc);
894 acc = vis_faligndata(t5, acc);
895 acc = vis_faligndata(t4, acc);
896 acc = vis_faligndata(t3, acc);
897 acc = vis_faligndata(t2, acc);
898 acc = vis_faligndata(t1, acc);
899 acc = vis_faligndata(t0, acc);
900 s0 = s2;
901 *dp++ = acc;
902 }
903
904 dl = (mlib_u8 *) dp;
905
906 if ((xsize & 1) != 0) {
907 s1 = sp[0];
908 t7 = VIS_LD_U8_I(tab2, s1);
909 t6 = VIS_LD_U8_I(tab1, s1);
910 t5 = VIS_LD_U8_I(tab0, s1);
911 t4 = VIS_LD_U8_I(tab3, s0);
912 acc = vis_faligndata(t7, acc);
913 acc = vis_faligndata(t6, acc);
914 acc = vis_faligndata(t5, acc);
915 acc = vis_faligndata(t4, acc);
916 *(mlib_f32 *) dl = vis_read_hi(acc);
917 dl += 4;
918 s0 = s1;
919 }
920
921 dl[0] = tab3[s0];
922}
923
924/***************************************************************/
925void mlib_v_ImageLookUpSI_S16_U8_4(const mlib_s16 *src,
926 mlib_s32 slb,
927 mlib_u8 *dst,
928 mlib_s32 dlb,
929 mlib_s32 xsize,
930 mlib_s32 ysize,
931 const mlib_u8 **table)
932{
933 mlib_s16 *sl;
934 mlib_u8 *dl;
935 mlib_s32 j;
936 const mlib_u8 *tab0 = &table[0][32768];
937 const mlib_u8 *tab1 = &table[1][32768];
938 const mlib_u8 *tab2 = &table[2][32768];
939 const mlib_u8 *tab3 = &table[3][32768];
940
941 sl = (void *)src;
942 dl = dst;
943
944 /* row loop */
945 for (j = 0; j < ysize; j++) {
946 mlib_s16 *sp = sl;
947 mlib_u8 *dp = dl;
948 mlib_s32 off, s0, size = xsize;
949
950 off = (8 - ((mlib_addr) dp & 7)) & 7;
951
952 if ((off >= 4) && (size > 0)) {
953 s0 = *sp++;
954 *dp++ = tab0[s0];
955 *dp++ = tab1[s0];
956 *dp++ = tab2[s0];
957 *dp++ = tab3[s0];
958 size--;
959 }
960
961 if (size > 0) {
962 off = (4 - ((mlib_addr) dp & 3)) & 3;
963
964 if (off == 0) {
965 mlib_v_ImageLookUpSI_S16_U8_4_DstOff0_D1(sp, dp, size, table);
966 }
967 else if (off == 1) {
968 s0 = *sp;
969 *dp++ = tab0[s0];
970 size--;
971 mlib_v_ImageLookUpSI_S16_U8_4_DstOff1_D1(sp, dp, size, table);
972 }
973 else if (off == 2) {
974 s0 = *sp;
975 *dp++ = tab0[s0];
976 *dp++ = tab1[s0];
977 size--;
978 mlib_v_ImageLookUpSI_S16_U8_4_DstOff2_D1(sp, dp, size, table);
979 }
980 else if (off == 3) {
981 s0 = *sp;
982 *dp++ = tab0[s0];
983 *dp++ = tab1[s0];
984 *dp++ = tab2[s0];
985 size--;
986 mlib_v_ImageLookUpSI_S16_U8_4_DstOff3_D1(sp, dp, size, table);
987 }
988 }
989
990 sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
991 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
992 }
993}
994
995/***************************************************************/