blob: de054fef2b18b01f4580db731ee27a1ef82450a5 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1998-2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26
27
28#include "vis_proto.h"
29#include "mlib_image.h"
30#include "mlib_v_ImageLookUpFunc.h"
31
32/***************************************************************/
33static void mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(const mlib_u8 *src,
34 mlib_s16 *dst,
35 mlib_s32 xsize,
36 const mlib_s16 *table0,
37 const mlib_s16 *table1,
38 const mlib_s16 *table2,
39 const mlib_s16 *table3);
40
41static void mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(const mlib_u8 *src,
42 mlib_s16 *dst,
43 mlib_s32 xsize,
44 const mlib_s16 *table0,
45 const mlib_s16 *table1,
46 const mlib_s16 *table2,
47 const mlib_s16 *table3);
48
49static void mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(const mlib_u8 *src,
50 mlib_s16 *dst,
51 mlib_s32 xsize,
52 const mlib_s16 *table0,
53 const mlib_s16 *table1,
54 const mlib_s16 *table2,
55 const mlib_s16 *table3);
56
57static void mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(const mlib_u8 *src,
58 mlib_s16 *dst,
59 mlib_s32 xsize,
60 const mlib_s16 *table0,
61 const mlib_s16 *table1,
62 const mlib_s16 *table2,
63 const mlib_s16 *table3);
64
65static void mlib_v_ImageLookUp_U8_S16_3_SrcOff0_D1(const mlib_u8 *src,
66 mlib_s16 *dst,
67 mlib_s32 xsize,
68 const mlib_s16 *table0,
69 const mlib_s16 *table1,
70 const mlib_s16 *table2);
71
72static void mlib_v_ImageLookUp_U8_S16_3_SrcOff1_D1(const mlib_u8 *src,
73 mlib_s16 *dst,
74 mlib_s32 xsize,
75 const mlib_s16 *table0,
76 const mlib_s16 *table1,
77 const mlib_s16 *table2);
78
79static void mlib_v_ImageLookUp_U8_S16_3_SrcOff2_D1(const mlib_u8 *src,
80 mlib_s16 *dst,
81 mlib_s32 xsize,
82 const mlib_s16 *table0,
83 const mlib_s16 *table1,
84 const mlib_s16 *table2);
85
86static void mlib_v_ImageLookUp_U8_S16_3_SrcOff3_D1(const mlib_u8 *src,
87 mlib_s16 *dst,
88 mlib_s32 xsize,
89 const mlib_s16 *table0,
90 const mlib_s16 *table1,
91 const mlib_s16 *table2);
92
93/***************************************************************/
94#define VIS_LD_U16_I(X, Y) vis_ld_u16_i((void *)(X), (Y))
95
96/***************************************************************/
97void mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(const mlib_u8 *src,
98 mlib_s16 *dst,
99 mlib_s32 xsize,
100 const mlib_s16 *table0,
101 const mlib_s16 *table1,
102 const mlib_s16 *table2,
103 const mlib_s16 *table3)
104{
105 mlib_u32 *sa; /* aligned pointer to source data */
106 mlib_u8 *sp; /* pointer to source data */
107 mlib_u32 s0; /* source data */
108 mlib_s16 *dl; /* pointer to start of destination */
109 mlib_s16 *dend; /* pointer to end of destination */
110 mlib_d64 *dp; /* aligned pointer to destination */
111 mlib_d64 t0, t1, t2; /* destination data */
112 mlib_d64 t3, acc0; /* destination data */
113 mlib_s32 emask; /* edge mask */
114 mlib_s32 i, num; /* loop variable */
115
116 sa = (mlib_u32 *) src;
117 dl = dst;
118 dp = (mlib_d64 *) dl;
119 dend = dl + xsize - 1;
120
121 vis_alignaddr((void *)0, 6);
122
123 i = 0;
124
125 if (xsize >= 4) {
126
127 s0 = *sa++;
128
129#pragma pipeloop(0)
130 for (i = 0; i <= xsize - 8; i += 4) {
131 t3 = VIS_LD_U16_I(table3, (s0 << 1) & 0x1FE);
132 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
133 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
134 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
135 acc0 = vis_faligndata(t3, acc0);
136 acc0 = vis_faligndata(t2, acc0);
137 acc0 = vis_faligndata(t1, acc0);
138 acc0 = vis_faligndata(t0, acc0);
139 s0 = *sa++;
140 *dp++ = acc0;
141 }
142
143 t3 = VIS_LD_U16_I(table3, (s0 << 1) & 0x1FE);
144 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
145 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
146 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
147 acc0 = vis_faligndata(t3, acc0);
148 acc0 = vis_faligndata(t2, acc0);
149 acc0 = vis_faligndata(t1, acc0);
150 acc0 = vis_faligndata(t0, acc0);
151 *dp++ = acc0;
152 }
153
154 sp = (mlib_u8 *) sa;
155
156 if ((mlib_addr) dp <= (mlib_addr) dend) {
157
158 num = (mlib_s16 *) dend - (mlib_s16 *) dp;
159 sp += num;
160 num++;
161
162 if (num == 1) {
163 s0 = (mlib_s32) * sp;
164 sp--;
165
166 t0 = VIS_LD_U16_I(table0, 2 * s0);
167 acc0 = vis_faligndata(t0, acc0);
168 }
169 else if (num == 2) {
170 s0 = (mlib_s32) * sp;
171 sp--;
172
173 t0 = VIS_LD_U16_I(table1, 2 * s0);
174 acc0 = vis_faligndata(t0, acc0);
175
176 s0 = (mlib_s32) * sp;
177 sp--;
178
179 t0 = VIS_LD_U16_I(table0, 2 * s0);
180 acc0 = vis_faligndata(t0, acc0);
181 }
182 else if (num == 3) {
183 s0 = (mlib_s32) * sp;
184 sp--;
185
186 t0 = VIS_LD_U16_I(table2, 2 * s0);
187 acc0 = vis_faligndata(t0, acc0);
188
189 s0 = (mlib_s32) * sp;
190 sp--;
191
192 t0 = VIS_LD_U16_I(table1, 2 * s0);
193 acc0 = vis_faligndata(t0, acc0);
194
195 s0 = (mlib_s32) * sp;
196 sp--;
197
198 t0 = VIS_LD_U16_I(table0, 2 * s0);
199 acc0 = vis_faligndata(t0, acc0);
200 }
201
202 emask = vis_edge16(dp, dend);
203 vis_pst_16(acc0, dp, emask);
204 }
205}
206
207/***************************************************************/
208void mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(const mlib_u8 *src,
209 mlib_s16 *dst,
210 mlib_s32 xsize,
211 const mlib_s16 *table0,
212 const mlib_s16 *table1,
213 const mlib_s16 *table2,
214 const mlib_s16 *table3)
215{
216 mlib_u32 *sa; /* aligned pointer to source data */
217 mlib_u8 *sp; /* pointer to source data */
218 mlib_u32 s0, s1; /* source data */
219 mlib_s16 *dl; /* pointer to start of destination */
220 mlib_s16 *dend; /* pointer to end of destination */
221 mlib_d64 *dp; /* aligned pointer to destination */
222 mlib_d64 t0, t1, t2; /* destination data */
223 mlib_d64 t3, acc0; /* destination data */
224 mlib_s32 emask; /* edge mask */
225 mlib_s32 i, num; /* loop variable */
226
227 sa = (mlib_u32 *) (src - 1);
228 dl = dst;
229 dp = (mlib_d64 *) dl;
230 dend = dl + xsize - 1;
231
232 vis_alignaddr((void *)0, 6);
233
234 s0 = *sa++;
235
236 if (xsize >= 4) {
237
238 s1 = *sa++;
239
240#pragma pipeloop(0)
241 for (i = 0; i <= xsize - 8; i += 4) {
242 t3 = VIS_LD_U16_I(table3, (s1 >> 23) & 0x1FE);
243 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
244 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
245 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
246 acc0 = vis_faligndata(t3, acc0);
247 acc0 = vis_faligndata(t2, acc0);
248 acc0 = vis_faligndata(t1, acc0);
249 acc0 = vis_faligndata(t0, acc0);
250 s0 = s1;
251 s1 = *sa++;
252 *dp++ = acc0;
253 }
254
255 t3 = VIS_LD_U16_I(table3, (s1 >> 23) & 0x1FE);
256 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
257 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
258 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
259 acc0 = vis_faligndata(t3, acc0);
260 acc0 = vis_faligndata(t2, acc0);
261 acc0 = vis_faligndata(t1, acc0);
262 acc0 = vis_faligndata(t0, acc0);
263 s0 = s1;
264 *dp++ = acc0;
265 }
266
267 sp = (mlib_u8 *) sa;
268 sp -= 3;
269
270 if ((mlib_addr) dp <= (mlib_addr) dend) {
271
272 num = (mlib_s16 *) dend - (mlib_s16 *) dp;
273 sp += num;
274 num++;
275
276 if (num == 1) {
277 s0 = (mlib_s32) * sp;
278 sp--;
279
280 t0 = VIS_LD_U16_I(table0, 2 * s0);
281 acc0 = vis_faligndata(t0, acc0);
282 }
283 else if (num == 2) {
284 s0 = (mlib_s32) * sp;
285 sp--;
286
287 t0 = VIS_LD_U16_I(table1, 2 * s0);
288 acc0 = vis_faligndata(t0, acc0);
289
290 s0 = (mlib_s32) * sp;
291 sp--;
292
293 t0 = VIS_LD_U16_I(table0, 2 * s0);
294 acc0 = vis_faligndata(t0, acc0);
295 }
296 else if (num == 3) {
297 s0 = (mlib_s32) * sp;
298 sp--;
299
300 t0 = VIS_LD_U16_I(table2, 2 * s0);
301 acc0 = vis_faligndata(t0, acc0);
302
303 s0 = (mlib_s32) * sp;
304 sp--;
305
306 t0 = VIS_LD_U16_I(table1, 2 * s0);
307 acc0 = vis_faligndata(t0, acc0);
308
309 s0 = (mlib_s32) * sp;
310 sp--;
311
312 t0 = VIS_LD_U16_I(table0, 2 * s0);
313 acc0 = vis_faligndata(t0, acc0);
314 }
315
316 emask = vis_edge16(dp, dend);
317 vis_pst_16(acc0, dp, emask);
318 }
319}
320
321/***************************************************************/
322void mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(const mlib_u8 *src,
323 mlib_s16 *dst,
324 mlib_s32 xsize,
325 const mlib_s16 *table0,
326 const mlib_s16 *table1,
327 const mlib_s16 *table2,
328 const mlib_s16 *table3)
329{
330 mlib_u32 *sa; /* aligned pointer to source data */
331 mlib_u8 *sp; /* pointer to source data */
332 mlib_u32 s0, s1; /* source data */
333 mlib_s16 *dl; /* pointer to start of destination */
334 mlib_s16 *dend; /* pointer to end of destination */
335 mlib_d64 *dp; /* aligned pointer to destination */
336 mlib_d64 t0, t1, t2; /* destination data */
337 mlib_d64 t3, acc0; /* destination data */
338 mlib_s32 emask; /* edge mask */
339 mlib_s32 i, num; /* loop variable */
340
341 sa = (mlib_u32 *) (src - 2);
342 dl = dst;
343 dp = (mlib_d64 *) dl;
344 dend = dl + xsize - 1;
345
346 vis_alignaddr((void *)0, 6);
347
348 s0 = *sa++;
349
350 if (xsize >= 4) {
351
352 s1 = *sa++;
353
354#pragma pipeloop(0)
355 for (i = 0; i <= xsize - 8; i += 4) {
356 t3 = VIS_LD_U16_I(table3, (s1 >> 15) & 0x1FE);
357 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
358 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
359 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
360 acc0 = vis_faligndata(t3, acc0);
361 acc0 = vis_faligndata(t2, acc0);
362 acc0 = vis_faligndata(t1, acc0);
363 acc0 = vis_faligndata(t0, acc0);
364 s0 = s1;
365 s1 = *sa++;
366 *dp++ = acc0;
367 }
368
369 t3 = VIS_LD_U16_I(table3, (s1 >> 15) & 0x1FE);
370 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
371 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
372 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
373 acc0 = vis_faligndata(t3, acc0);
374 acc0 = vis_faligndata(t2, acc0);
375 acc0 = vis_faligndata(t1, acc0);
376 acc0 = vis_faligndata(t0, acc0);
377 s0 = s1;
378 *dp++ = acc0;
379 }
380
381 sp = (mlib_u8 *) sa;
382 sp -= 2;
383
384 if ((mlib_addr) dp <= (mlib_addr) dend) {
385
386 num = (mlib_s16 *) dend - (mlib_s16 *) dp;
387 sp += num;
388 num++;
389
390 if (num == 1) {
391 s0 = (mlib_s32) * sp;
392 sp--;
393
394 t0 = VIS_LD_U16_I(table0, 2 * s0);
395 acc0 = vis_faligndata(t0, acc0);
396 }
397 else if (num == 2) {
398 s0 = (mlib_s32) * sp;
399 sp--;
400
401 t0 = VIS_LD_U16_I(table1, 2 * s0);
402 acc0 = vis_faligndata(t0, acc0);
403
404 s0 = (mlib_s32) * sp;
405 sp--;
406
407 t0 = VIS_LD_U16_I(table0, 2 * s0);
408 acc0 = vis_faligndata(t0, acc0);
409 }
410 else if (num == 3) {
411 s0 = (mlib_s32) * sp;
412 sp--;
413
414 t0 = VIS_LD_U16_I(table2, 2 * s0);
415 acc0 = vis_faligndata(t0, acc0);
416
417 s0 = (mlib_s32) * sp;
418 sp--;
419
420 t0 = VIS_LD_U16_I(table1, 2 * s0);
421 acc0 = vis_faligndata(t0, acc0);
422
423 s0 = (mlib_s32) * sp;
424 sp--;
425
426 t0 = VIS_LD_U16_I(table0, 2 * s0);
427 acc0 = vis_faligndata(t0, acc0);
428 }
429
430 emask = vis_edge16(dp, dend);
431 vis_pst_16(acc0, dp, emask);
432 }
433}
434
435/***************************************************************/
436void mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(const mlib_u8 *src,
437 mlib_s16 *dst,
438 mlib_s32 xsize,
439 const mlib_s16 *table0,
440 const mlib_s16 *table1,
441 const mlib_s16 *table2,
442 const mlib_s16 *table3)
443{
444 mlib_u32 *sa; /* aligned pointer to source data */
445 mlib_u8 *sp; /* pointer to source data */
446 mlib_u32 s0, s1; /* source data */
447 mlib_s16 *dl; /* pointer to start of destination */
448 mlib_s16 *dend; /* pointer to end of destination */
449 mlib_d64 *dp; /* aligned pointer to destination */
450 mlib_d64 t0, t1, t2; /* destination data */
451 mlib_d64 t3, acc0; /* destination data */
452 mlib_s32 emask; /* edge mask */
453 mlib_s32 i, num; /* loop variable */
454
455 sa = (mlib_u32 *) (src - 3);
456 dl = dst;
457 dp = (mlib_d64 *) dl;
458 dend = dl + xsize - 1;
459
460 vis_alignaddr((void *)0, 6);
461
462 s0 = *sa++;
463
464 if (xsize >= 4) {
465
466 s1 = *sa++;
467
468#pragma pipeloop(0)
469 for (i = 0; i <= xsize - 8; i += 4) {
470 t3 = VIS_LD_U16_I(table3, (s1 >> 7) & 0x1FE);
471 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
472 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
473 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
474 acc0 = vis_faligndata(t3, acc0);
475 acc0 = vis_faligndata(t2, acc0);
476 acc0 = vis_faligndata(t1, acc0);
477 acc0 = vis_faligndata(t0, acc0);
478 s0 = s1;
479 s1 = *sa++;
480 *dp++ = acc0;
481 }
482
483 t3 = VIS_LD_U16_I(table3, (s1 >> 7) & 0x1FE);
484 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
485 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
486 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
487 acc0 = vis_faligndata(t3, acc0);
488 acc0 = vis_faligndata(t2, acc0);
489 acc0 = vis_faligndata(t1, acc0);
490 acc0 = vis_faligndata(t0, acc0);
491 s0 = s1;
492 *dp++ = acc0;
493 }
494
495 sp = (mlib_u8 *) sa;
496 sp -= 1;
497
498 if ((mlib_addr) dp <= (mlib_addr) dend) {
499
500 num = (mlib_s16 *) dend - (mlib_s16 *) dp;
501 sp += num;
502 num++;
503
504 if (num == 1) {
505 s0 = (mlib_s32) * sp;
506 sp--;
507
508 t0 = VIS_LD_U16_I(table0, 2 * s0);
509 acc0 = vis_faligndata(t0, acc0);
510 }
511 else if (num == 2) {
512 s0 = (mlib_s32) * sp;
513 sp--;
514
515 t0 = VIS_LD_U16_I(table1, 2 * s0);
516 acc0 = vis_faligndata(t0, acc0);
517
518 s0 = (mlib_s32) * sp;
519 sp--;
520
521 t0 = VIS_LD_U16_I(table0, 2 * s0);
522 acc0 = vis_faligndata(t0, acc0);
523 }
524 else if (num == 3) {
525 s0 = (mlib_s32) * sp;
526 sp--;
527
528 t0 = VIS_LD_U16_I(table2, 2 * s0);
529 acc0 = vis_faligndata(t0, acc0);
530
531 s0 = (mlib_s32) * sp;
532 sp--;
533
534 t0 = VIS_LD_U16_I(table1, 2 * s0);
535 acc0 = vis_faligndata(t0, acc0);
536
537 s0 = (mlib_s32) * sp;
538 sp--;
539
540 t0 = VIS_LD_U16_I(table0, 2 * s0);
541 acc0 = vis_faligndata(t0, acc0);
542 }
543
544 emask = vis_edge16(dp, dend);
545 vis_pst_16(acc0, dp, emask);
546 }
547}
548
549/***************************************************************/
550void mlib_v_ImageLookUp_U8_S16_1(const mlib_u8 *src,
551 mlib_s32 slb,
552 mlib_s16 *dst,
553 mlib_s32 dlb,
554 mlib_s32 xsize,
555 mlib_s32 ysize,
556 const mlib_s16 **table)
557{
558 mlib_u8 *sl;
559 mlib_s16 *dl;
560 const mlib_s16 *tab = table[0];
561 mlib_s32 j, i;
562
563 sl = (void *)src;
564 dl = dst;
565
566 /* row loop */
567 for (j = 0; j < ysize; j++) {
568 mlib_u8 *sp = sl;
569 mlib_s16 *dp = dl;
570 mlib_s32 off, size = xsize;
571
572 off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
573
574 off = (off < size) ? off : size;
575
576 for (i = 0; i < off; i++) {
577 *dp++ = tab[(*sp++)];
578 size--;
579 }
580
581 if (size > 0) {
582
583 off = (mlib_addr) sp & 3;
584
585 if (off == 0) {
586 mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(sp, dp, size, tab, tab, tab,
587 tab);
588 }
589 else if (off == 1) {
590 mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(sp, dp, size, tab, tab, tab,
591 tab);
592 }
593 else if (off == 2) {
594 mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(sp, dp, size, tab, tab, tab,
595 tab);
596 }
597 else {
598 mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(sp, dp, size, tab, tab, tab,
599 tab);
600 }
601 }
602
603 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
604 dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
605 }
606}
607
608/***************************************************************/
609void mlib_v_ImageLookUp_U8_S16_2(const mlib_u8 *src,
610 mlib_s32 slb,
611 mlib_s16 *dst,
612 mlib_s32 dlb,
613 mlib_s32 xsize,
614 mlib_s32 ysize,
615 const mlib_s16 **table)
616{
617 mlib_u8 *sl;
618 mlib_s16 *dl;
619 const mlib_s16 *tab;
620 mlib_s32 j, i;
621
622 sl = (void *)src;
623 dl = dst;
624
625 /* row loop */
626 for (j = 0; j < ysize; j++) {
627 mlib_u8 *sp = sl;
628 mlib_s16 *dp = dl;
629 mlib_s32 off, size = xsize * 2;
630 const mlib_s16 *tab0 = table[0];
631 const mlib_s16 *tab1 = table[1];
632
633 off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
634
635 off = (off < size) ? off : size;
636
637 for (i = 0; i < off - 1; i += 2) {
638 *dp++ = tab0[(*sp++)];
639 *dp++ = tab1[(*sp++)];
640 size -= 2;
641 }
642
643 if ((off & 1) != 0) {
644 *dp++ = tab0[(*sp++)];
645 size--;
646 tab = tab0;
647 tab0 = tab1;
648 tab1 = tab;
649 }
650
651 if (size > 0) {
652
653 off = (mlib_addr) sp & 3;
654
655 if (off == 0) {
656 mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab0,
657 tab1);
658 }
659 else if (off == 1) {
660 mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab0,
661 tab1);
662 }
663 else if (off == 2) {
664 mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab0,
665 tab1);
666 }
667 else {
668 mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab0,
669 tab1);
670 }
671 }
672
673 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
674 dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
675 }
676}
677
678/***************************************************************/
679void mlib_v_ImageLookUp_U8_S16_4(const mlib_u8 *src,
680 mlib_s32 slb,
681 mlib_s16 *dst,
682 mlib_s32 dlb,
683 mlib_s32 xsize,
684 mlib_s32 ysize,
685 const mlib_s16 **table)
686{
687 mlib_u8 *sl;
688 mlib_s16 *dl;
689 const mlib_s16 *tab;
690 mlib_s32 j;
691
692 sl = (void *)src;
693 dl = dst;
694
695 /* row loop */
696 for (j = 0; j < ysize; j++) {
697 mlib_u8 *sp = sl;
698 mlib_s16 *dp = dl;
699 const mlib_s16 *tab0 = table[0];
700 const mlib_s16 *tab1 = table[1];
701 const mlib_s16 *tab2 = table[2];
702 const mlib_s16 *tab3 = table[3];
703 mlib_s32 off, size = xsize * 4;
704
705 off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
706
707 off = (off < size) ? off : size;
708
709 if (off == 1) {
710 *dp++ = tab0[(*sp++)];
711 tab = tab0;
712 tab0 = tab1;
713 tab1 = tab2;
714 tab2 = tab3;
715 tab3 = tab;
716 size--;
717 }
718 else if (off == 2) {
719 *dp++ = tab0[(*sp++)];
720 *dp++ = tab1[(*sp++)];
721 tab = tab0;
722 tab0 = tab2;
723 tab2 = tab;
724 tab = tab1;
725 tab1 = tab3;
726 tab3 = tab;
727 size -= 2;
728 }
729 else if (off == 3) {
730 *dp++ = tab0[(*sp++)];
731 *dp++ = tab1[(*sp++)];
732 *dp++ = tab2[(*sp++)];
733 tab = tab3;
734 tab3 = tab2;
735 tab2 = tab1;
736 tab1 = tab0;
737 tab0 = tab;
738 size -= 3;
739 }
740
741 if (size > 0) {
742
743 off = (mlib_addr) sp & 3;
744
745 if (off == 0) {
746 mlib_v_ImageLookUp_U8_S16_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2,
747 tab3);
748 }
749 else if (off == 1) {
750 mlib_v_ImageLookUp_U8_S16_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2,
751 tab3);
752 }
753 else if (off == 2) {
754 mlib_v_ImageLookUp_U8_S16_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2,
755 tab3);
756 }
757 else {
758 mlib_v_ImageLookUp_U8_S16_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2,
759 tab3);
760 }
761 }
762
763 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
764 dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
765 }
766}
767
768/***************************************************************/
769void mlib_v_ImageLookUp_U8_S16_3_SrcOff0_D1(const mlib_u8 *src,
770 mlib_s16 *dst,
771 mlib_s32 xsize,
772 const mlib_s16 *table0,
773 const mlib_s16 *table1,
774 const mlib_s16 *table2)
775{
776 mlib_u32 *sa; /* aligned pointer to source data */
777 mlib_u8 *sp; /* pointer to source data */
778 mlib_u32 s0, s1, s2; /* source data */
779 mlib_s16 *dl; /* pointer to start of destination */
780 mlib_s16 *dend; /* pointer to end of destination */
781 mlib_d64 *dp; /* aligned pointer to destination */
782 mlib_d64 t0, t1, t2; /* destination data */
783 mlib_d64 t3, t4, t5; /* destination data */
784 mlib_d64 t6, t7, t8; /* destination data */
785 mlib_d64 t9, t10, t11; /* destination data */
786 mlib_d64 acc0, acc1, acc2; /* destination data */
787 mlib_s32 emask; /* edge mask */
788 mlib_s32 i, num; /* loop variable */
789 const mlib_s16 *table;
790
791 sa = (mlib_u32 *) src;
792 dl = dst;
793 dp = (mlib_d64 *) dl;
794 dend = dl + xsize - 1;
795
796 vis_alignaddr((void *)0, 6);
797
798 i = 0;
799
800 if (xsize >= 12) {
801
802 s0 = sa[0];
803 s1 = sa[1];
804 s2 = sa[2];
805 sa += 3;
806
807#pragma pipeloop(0)
808 for (i = 0; i <= xsize - 24; i += 12, sa += 3, dp += 3) {
809 t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
810 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
811 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
812 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
813 t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
814 t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
815 t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
816 t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
817 t11 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
818 t10 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
819 t9 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
820 t8 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
821 acc0 = vis_faligndata(t3, acc0);
822 acc0 = vis_faligndata(t2, acc0);
823 acc0 = vis_faligndata(t1, acc0);
824 acc0 = vis_faligndata(t0, acc0);
825 acc1 = vis_faligndata(t7, acc1);
826 acc1 = vis_faligndata(t6, acc1);
827 acc1 = vis_faligndata(t5, acc1);
828 acc1 = vis_faligndata(t4, acc1);
829 acc2 = vis_faligndata(t11, acc2);
830 acc2 = vis_faligndata(t10, acc2);
831 acc2 = vis_faligndata(t9, acc2);
832 acc2 = vis_faligndata(t8, acc2);
833 s0 = sa[0];
834 s1 = sa[1];
835 s2 = sa[2];
836 dp[0] = acc0;
837 dp[1] = acc1;
838 dp[2] = acc2;
839 }
840
841 t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
842 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
843 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
844 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
845 t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
846 t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
847 t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
848 t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
849 t11 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
850 t10 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
851 t9 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
852 t8 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
853 acc0 = vis_faligndata(t3, acc0);
854 acc0 = vis_faligndata(t2, acc0);
855 acc0 = vis_faligndata(t1, acc0);
856 acc0 = vis_faligndata(t0, acc0);
857 acc1 = vis_faligndata(t7, acc1);
858 acc1 = vis_faligndata(t6, acc1);
859 acc1 = vis_faligndata(t5, acc1);
860 acc1 = vis_faligndata(t4, acc1);
861 acc2 = vis_faligndata(t11, acc2);
862 acc2 = vis_faligndata(t10, acc2);
863 acc2 = vis_faligndata(t9, acc2);
864 acc2 = vis_faligndata(t8, acc2);
865 dp[0] = acc0;
866 dp[1] = acc1;
867 dp[2] = acc2;
868 dp += 3;
869 i += 12;
870 }
871
872 if (i <= xsize - 8) {
873 s0 = sa[0];
874 s1 = sa[1];
875 t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
876 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
877 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
878 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
879 t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
880 t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
881 t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
882 t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
883 acc0 = vis_faligndata(t3, acc0);
884 acc0 = vis_faligndata(t2, acc0);
885 acc0 = vis_faligndata(t1, acc0);
886 acc0 = vis_faligndata(t0, acc0);
887 acc1 = vis_faligndata(t7, acc1);
888 acc1 = vis_faligndata(t6, acc1);
889 acc1 = vis_faligndata(t5, acc1);
890 acc1 = vis_faligndata(t4, acc1);
891 dp[0] = acc0;
892 dp[1] = acc1;
893 table = table0;
894 table0 = table2;
895 table2 = table1;
896 table1 = table;
897 sa += 2;
898 i += 8;
899 dp += 2;
900 }
901
902 if (i <= xsize - 4) {
903 s0 = sa[0];
904 t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
905 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
906 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
907 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
908 acc0 = vis_faligndata(t3, acc0);
909 acc0 = vis_faligndata(t2, acc0);
910 acc0 = vis_faligndata(t1, acc0);
911 acc0 = vis_faligndata(t0, acc0);
912 dp[0] = acc0;
913 table = table0;
914 table0 = table1;
915 table1 = table2;
916 table2 = table;
917 sa++;
918 i += 4;
919 dp++;
920 }
921
922 sp = (mlib_u8 *) sa;
923
924 if ((mlib_addr) dp <= (mlib_addr) dend) {
925
926 num = (mlib_s16 *) dend - (mlib_s16 *) dp;
927 sp += num;
928 num++;
929
930 if (num == 1) {
931 s0 = (mlib_s32) * sp;
932 sp--;
933
934 t0 = VIS_LD_U16_I(table0, 2 * s0);
935 acc0 = vis_faligndata(t0, acc0);
936 }
937 else if (num == 2) {
938 s0 = (mlib_s32) * sp;
939 sp--;
940
941 t0 = VIS_LD_U16_I(table1, 2 * s0);
942 acc0 = vis_faligndata(t0, acc0);
943
944 s0 = (mlib_s32) * sp;
945 sp--;
946
947 t0 = VIS_LD_U16_I(table0, 2 * s0);
948 acc0 = vis_faligndata(t0, acc0);
949 }
950 else if (num == 3) {
951 s0 = (mlib_s32) * sp;
952 sp--;
953
954 t0 = VIS_LD_U16_I(table2, 2 * s0);
955 acc0 = vis_faligndata(t0, acc0);
956
957 s0 = (mlib_s32) * sp;
958 sp--;
959
960 t0 = VIS_LD_U16_I(table1, 2 * s0);
961 acc0 = vis_faligndata(t0, acc0);
962
963 s0 = (mlib_s32) * sp;
964 sp--;
965
966 t0 = VIS_LD_U16_I(table0, 2 * s0);
967 acc0 = vis_faligndata(t0, acc0);
968 }
969
970 emask = vis_edge16(dp, dend);
971 vis_pst_16(acc0, dp, emask);
972 }
973}
974
975/***************************************************************/
976void mlib_v_ImageLookUp_U8_S16_3_SrcOff1_D1(const mlib_u8 *src,
977 mlib_s16 *dst,
978 mlib_s32 xsize,
979 const mlib_s16 *table0,
980 const mlib_s16 *table1,
981 const mlib_s16 *table2)
982{
983 mlib_u32 *sa; /* aligned pointer to source data */
984 mlib_u8 *sp; /* pointer to source data */
985 mlib_u32 s0, s1, s2, s3; /* source data */
986 mlib_s16 *dl; /* pointer to start of destination */
987 mlib_s16 *dend; /* pointer to end of destination */
988 mlib_d64 *dp; /* aligned pointer to destination */
989 mlib_d64 t0, t1, t2; /* destination data */
990 mlib_d64 t3, t4, t5; /* destination data */
991 mlib_d64 t6, t7, t8; /* destination data */
992 mlib_d64 t9, t10, t11; /* destination data */
993 mlib_d64 acc0, acc1, acc2; /* destination data */
994 mlib_s32 emask; /* edge mask */
995 mlib_s32 i, num; /* loop variable */
996 const mlib_s16 *table;
997
998 sa = (mlib_u32 *) (src - 1);
999 dl = dst;
1000 dp = (mlib_d64 *) dl;
1001 dend = dl + xsize - 1;
1002
1003 vis_alignaddr((void *)0, 6);
1004
1005 i = 0;
1006
1007 s0 = *sa++;
1008
1009 if (xsize >= 12) {
1010
1011 s1 = sa[0];
1012 s2 = sa[1];
1013 s3 = sa[2];
1014 sa += 3;
1015
1016#pragma pipeloop(0)
1017 for (i = 0; i <= xsize - 24; i += 12, sa += 3, dp += 3) {
1018 t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
1019 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
1020 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
1021 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
1022 t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE);
1023 t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE);
1024 t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE);
1025 t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE);
1026 t11 = VIS_LD_U16_I(table2, (s3 >> 23) & 0x1FE);
1027 t10 = VIS_LD_U16_I(table1, (s2 << 1) & 0x1FE);
1028 t9 = VIS_LD_U16_I(table0, (s2 >> 7) & 0x1FE);
1029 t8 = VIS_LD_U16_I(table2, (s2 >> 15) & 0x1FE);
1030 acc0 = vis_faligndata(t3, acc0);
1031 acc0 = vis_faligndata(t2, acc0);
1032 acc0 = vis_faligndata(t1, acc0);
1033 acc0 = vis_faligndata(t0, acc0);
1034 acc1 = vis_faligndata(t7, acc1);
1035 acc1 = vis_faligndata(t6, acc1);
1036 acc1 = vis_faligndata(t5, acc1);
1037 acc1 = vis_faligndata(t4, acc1);
1038 acc2 = vis_faligndata(t11, acc2);
1039 acc2 = vis_faligndata(t10, acc2);
1040 acc2 = vis_faligndata(t9, acc2);
1041 acc2 = vis_faligndata(t8, acc2);
1042 s0 = s3;
1043 s1 = sa[0];
1044 s2 = sa[1];
1045 s3 = sa[2];
1046 dp[0] = acc0;
1047 dp[1] = acc1;
1048 dp[2] = acc2;
1049 }
1050
1051 t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
1052 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
1053 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
1054 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
1055 t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE);
1056 t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE);
1057 t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE);
1058 t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE);
1059 t11 = VIS_LD_U16_I(table2, (s3 >> 23) & 0x1FE);
1060 t10 = VIS_LD_U16_I(table1, (s2 << 1) & 0x1FE);
1061 t9 = VIS_LD_U16_I(table0, (s2 >> 7) & 0x1FE);
1062 t8 = VIS_LD_U16_I(table2, (s2 >> 15) & 0x1FE);
1063 acc0 = vis_faligndata(t3, acc0);
1064 acc0 = vis_faligndata(t2, acc0);
1065 acc0 = vis_faligndata(t1, acc0);
1066 acc0 = vis_faligndata(t0, acc0);
1067 acc1 = vis_faligndata(t7, acc1);
1068 acc1 = vis_faligndata(t6, acc1);
1069 acc1 = vis_faligndata(t5, acc1);
1070 acc1 = vis_faligndata(t4, acc1);
1071 acc2 = vis_faligndata(t11, acc2);
1072 acc2 = vis_faligndata(t10, acc2);
1073 acc2 = vis_faligndata(t9, acc2);
1074 acc2 = vis_faligndata(t8, acc2);
1075 dp[0] = acc0;
1076 dp[1] = acc1;
1077 dp[2] = acc2;
1078 s0 = s3;
1079 dp += 3;
1080 i += 12;
1081 }
1082
1083 if (i <= xsize - 8) {
1084 s1 = sa[0];
1085 s2 = sa[1];
1086 t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
1087 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
1088 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
1089 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
1090 t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE);
1091 t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE);
1092 t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE);
1093 t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE);
1094 acc0 = vis_faligndata(t3, acc0);
1095 acc0 = vis_faligndata(t2, acc0);
1096 acc0 = vis_faligndata(t1, acc0);
1097 acc0 = vis_faligndata(t0, acc0);
1098 acc1 = vis_faligndata(t7, acc1);
1099 acc1 = vis_faligndata(t6, acc1);
1100 acc1 = vis_faligndata(t5, acc1);
1101 acc1 = vis_faligndata(t4, acc1);
1102 dp[0] = acc0;
1103 dp[1] = acc1;
1104 table = table0;
1105 table0 = table2;
1106 table2 = table1;
1107 table1 = table;
1108 sa += 2;
1109 i += 8;
1110 dp += 2;
1111 s0 = s2;
1112 }
1113
1114 if (i <= xsize - 4) {
1115 s1 = sa[0];
1116 t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
1117 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
1118 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
1119 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
1120 acc0 = vis_faligndata(t3, acc0);
1121 acc0 = vis_faligndata(t2, acc0);
1122 acc0 = vis_faligndata(t1, acc0);
1123 acc0 = vis_faligndata(t0, acc0);
1124 dp[0] = acc0;
1125 table = table0;
1126 table0 = table1;
1127 table1 = table2;
1128 table2 = table;
1129 sa++;
1130 i += 4;
1131 dp++;
1132 s0 = s1;
1133 }
1134
1135 sp = (mlib_u8 *) sa;
1136 sp -= 3;
1137
1138 if ((mlib_addr) dp <= (mlib_addr) dend) {
1139
1140 num = (mlib_s16 *) dend - (mlib_s16 *) dp;
1141 sp += num;
1142 num++;
1143
1144 if (num == 1) {
1145 s0 = (mlib_s32) * sp;
1146 sp--;
1147
1148 t0 = VIS_LD_U16_I(table0, 2 * s0);
1149 acc0 = vis_faligndata(t0, acc0);
1150 }
1151 else if (num == 2) {
1152 s0 = (mlib_s32) * sp;
1153 sp--;
1154
1155 t0 = VIS_LD_U16_I(table1, 2 * s0);
1156 acc0 = vis_faligndata(t0, acc0);
1157
1158 s0 = (mlib_s32) * sp;
1159 sp--;
1160
1161 t0 = VIS_LD_U16_I(table0, 2 * s0);
1162 acc0 = vis_faligndata(t0, acc0);
1163 }
1164 else if (num == 3) {
1165 s0 = (mlib_s32) * sp;
1166 sp--;
1167
1168 t0 = VIS_LD_U16_I(table2, 2 * s0);
1169 acc0 = vis_faligndata(t0, acc0);
1170
1171 s0 = (mlib_s32) * sp;
1172 sp--;
1173
1174 t0 = VIS_LD_U16_I(table1, 2 * s0);
1175 acc0 = vis_faligndata(t0, acc0);
1176
1177 s0 = (mlib_s32) * sp;
1178 sp--;
1179
1180 t0 = VIS_LD_U16_I(table0, 2 * s0);
1181 acc0 = vis_faligndata(t0, acc0);
1182 }
1183
1184 emask = vis_edge16(dp, dend);
1185 vis_pst_16(acc0, dp, emask);
1186 }
1187}
1188
1189/***************************************************************/
1190void mlib_v_ImageLookUp_U8_S16_3_SrcOff2_D1(const mlib_u8 *src,
1191 mlib_s16 *dst,
1192 mlib_s32 xsize,
1193 const mlib_s16 *table0,
1194 const mlib_s16 *table1,
1195 const mlib_s16 *table2)
1196{
1197 mlib_u32 *sa; /* aligned pointer to source data */
1198 mlib_u8 *sp; /* pointer to source data */
1199 mlib_u32 s0, s1, s2, s3; /* source data */
1200 mlib_s16 *dl; /* pointer to start of destination */
1201 mlib_s16 *dend; /* pointer to end of destination */
1202 mlib_d64 *dp; /* aligned pointer to destination */
1203 mlib_d64 t0, t1, t2; /* destination data */
1204 mlib_d64 t3, t4, t5; /* destination data */
1205 mlib_d64 t6, t7, t8; /* destination data */
1206 mlib_d64 t9, t10, t11; /* destination data */
1207 mlib_d64 acc0, acc1, acc2; /* destination data */
1208 mlib_s32 emask; /* edge mask */
1209 mlib_s32 i, num; /* loop variable */
1210 const mlib_s16 *table;
1211
1212 sa = (mlib_u32 *) (src - 2);
1213 dl = dst;
1214 dp = (mlib_d64 *) dl;
1215 dend = dl + xsize - 1;
1216
1217 vis_alignaddr((void *)0, 6);
1218
1219 i = 0;
1220
1221 s0 = *sa++;
1222
1223 if (xsize >= 12) {
1224
1225 s1 = sa[0];
1226 s2 = sa[1];
1227 s3 = sa[2];
1228 sa += 3;
1229
1230#pragma pipeloop(0)
1231 for (i = 0; i <= xsize - 24; i += 12, sa += 3, dp += 3) {
1232 t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
1233 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
1234 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
1235 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
1236 t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE);
1237 t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE);
1238 t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE);
1239 t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE);
1240 t11 = VIS_LD_U16_I(table2, (s3 >> 15) & 0x1FE);
1241 t10 = VIS_LD_U16_I(table1, (s3 >> 23) & 0x1FE);
1242 t9 = VIS_LD_U16_I(table0, (s2 << 1) & 0x1FE);
1243 t8 = VIS_LD_U16_I(table2, (s2 >> 7) & 0x1FE);
1244 acc0 = vis_faligndata(t3, acc0);
1245 acc0 = vis_faligndata(t2, acc0);
1246 acc0 = vis_faligndata(t1, acc0);
1247 acc0 = vis_faligndata(t0, acc0);
1248 acc1 = vis_faligndata(t7, acc1);
1249 acc1 = vis_faligndata(t6, acc1);
1250 acc1 = vis_faligndata(t5, acc1);
1251 acc1 = vis_faligndata(t4, acc1);
1252 acc2 = vis_faligndata(t11, acc2);
1253 acc2 = vis_faligndata(t10, acc2);
1254 acc2 = vis_faligndata(t9, acc2);
1255 acc2 = vis_faligndata(t8, acc2);
1256 s0 = s3;
1257 s1 = sa[0];
1258 s2 = sa[1];
1259 s3 = sa[2];
1260 dp[0] = acc0;
1261 dp[1] = acc1;
1262 dp[2] = acc2;
1263 }
1264
1265 t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
1266 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
1267 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
1268 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
1269 t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE);
1270 t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE);
1271 t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE);
1272 t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE);
1273 t11 = VIS_LD_U16_I(table2, (s3 >> 15) & 0x1FE);
1274 t10 = VIS_LD_U16_I(table1, (s3 >> 23) & 0x1FE);
1275 t9 = VIS_LD_U16_I(table0, (s2 << 1) & 0x1FE);
1276 t8 = VIS_LD_U16_I(table2, (s2 >> 7) & 0x1FE);
1277 acc0 = vis_faligndata(t3, acc0);
1278 acc0 = vis_faligndata(t2, acc0);
1279 acc0 = vis_faligndata(t1, acc0);
1280 acc0 = vis_faligndata(t0, acc0);
1281 acc1 = vis_faligndata(t7, acc1);
1282 acc1 = vis_faligndata(t6, acc1);
1283 acc1 = vis_faligndata(t5, acc1);
1284 acc1 = vis_faligndata(t4, acc1);
1285 acc2 = vis_faligndata(t11, acc2);
1286 acc2 = vis_faligndata(t10, acc2);
1287 acc2 = vis_faligndata(t9, acc2);
1288 acc2 = vis_faligndata(t8, acc2);
1289 dp[0] = acc0;
1290 dp[1] = acc1;
1291 dp[2] = acc2;
1292 s0 = s3;
1293 dp += 3;
1294 i += 12;
1295 }
1296
1297 if (i <= xsize - 8) {
1298 s1 = sa[0];
1299 s2 = sa[1];
1300 t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
1301 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
1302 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
1303 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
1304 t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE);
1305 t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE);
1306 t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE);
1307 t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE);
1308 acc0 = vis_faligndata(t3, acc0);
1309 acc0 = vis_faligndata(t2, acc0);
1310 acc0 = vis_faligndata(t1, acc0);
1311 acc0 = vis_faligndata(t0, acc0);
1312 acc1 = vis_faligndata(t7, acc1);
1313 acc1 = vis_faligndata(t6, acc1);
1314 acc1 = vis_faligndata(t5, acc1);
1315 acc1 = vis_faligndata(t4, acc1);
1316 dp[0] = acc0;
1317 dp[1] = acc1;
1318 table = table0;
1319 table0 = table2;
1320 table2 = table1;
1321 table1 = table;
1322 sa += 2;
1323 i += 8;
1324 dp += 2;
1325 s0 = s2;
1326 }
1327
1328 if (i <= xsize - 4) {
1329 s1 = sa[0];
1330 t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
1331 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
1332 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
1333 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
1334 acc0 = vis_faligndata(t3, acc0);
1335 acc0 = vis_faligndata(t2, acc0);
1336 acc0 = vis_faligndata(t1, acc0);
1337 acc0 = vis_faligndata(t0, acc0);
1338 dp[0] = acc0;
1339 table = table0;
1340 table0 = table1;
1341 table1 = table2;
1342 table2 = table;
1343 sa++;
1344 i += 4;
1345 dp++;
1346 s0 = s1;
1347 }
1348
1349 sp = (mlib_u8 *) sa;
1350 sp -= 2;
1351
1352 if ((mlib_addr) dp <= (mlib_addr) dend) {
1353
1354 num = (mlib_s16 *) dend - (mlib_s16 *) dp;
1355 sp += num;
1356 num++;
1357
1358 if (num == 1) {
1359 s0 = (mlib_s32) * sp;
1360 sp--;
1361
1362 t0 = VIS_LD_U16_I(table0, 2 * s0);
1363 acc0 = vis_faligndata(t0, acc0);
1364 }
1365 else if (num == 2) {
1366 s0 = (mlib_s32) * sp;
1367 sp--;
1368
1369 t0 = VIS_LD_U16_I(table1, 2 * s0);
1370 acc0 = vis_faligndata(t0, acc0);
1371
1372 s0 = (mlib_s32) * sp;
1373 sp--;
1374
1375 t0 = VIS_LD_U16_I(table0, 2 * s0);
1376 acc0 = vis_faligndata(t0, acc0);
1377 }
1378 else if (num == 3) {
1379 s0 = (mlib_s32) * sp;
1380 sp--;
1381
1382 t0 = VIS_LD_U16_I(table2, 2 * s0);
1383 acc0 = vis_faligndata(t0, acc0);
1384
1385 s0 = (mlib_s32) * sp;
1386 sp--;
1387
1388 t0 = VIS_LD_U16_I(table1, 2 * s0);
1389 acc0 = vis_faligndata(t0, acc0);
1390
1391 s0 = (mlib_s32) * sp;
1392 sp--;
1393
1394 t0 = VIS_LD_U16_I(table0, 2 * s0);
1395 acc0 = vis_faligndata(t0, acc0);
1396 }
1397
1398 emask = vis_edge16(dp, dend);
1399 vis_pst_16(acc0, dp, emask);
1400 }
1401}
1402
1403/***************************************************************/
1404void mlib_v_ImageLookUp_U8_S16_3_SrcOff3_D1(const mlib_u8 *src,
1405 mlib_s16 *dst,
1406 mlib_s32 xsize,
1407 const mlib_s16 *table0,
1408 const mlib_s16 *table1,
1409 const mlib_s16 *table2)
1410{
1411 mlib_u32 *sa; /* aligned pointer to source data */
1412 mlib_u8 *sp; /* pointer to source data */
1413 mlib_u32 s0, s1, s2, s3; /* source data */
1414 mlib_s16 *dl; /* pointer to start of destination */
1415 mlib_s16 *dend; /* pointer to end of destination */
1416 mlib_d64 *dp; /* aligned pointer to destination */
1417 mlib_d64 t0, t1, t2; /* destination data */
1418 mlib_d64 t3, t4, t5; /* destination data */
1419 mlib_d64 t6, t7, t8; /* destination data */
1420 mlib_d64 t9, t10, t11; /* destination data */
1421 mlib_d64 acc0, acc1, acc2; /* destination data */
1422 mlib_s32 emask; /* edge mask */
1423 mlib_s32 i, num; /* loop variable */
1424 const mlib_s16 *table;
1425
1426 sa = (mlib_u32 *) (src - 3);
1427 dl = dst;
1428 dp = (mlib_d64 *) dl;
1429 dend = dl + xsize - 1;
1430
1431 vis_alignaddr((void *)0, 6);
1432
1433 i = 0;
1434
1435 s0 = *sa++;
1436
1437 if (xsize >= 12) {
1438
1439 s1 = sa[0];
1440 s2 = sa[1];
1441 s3 = sa[2];
1442 sa += 3;
1443
1444#pragma pipeloop(0)
1445 for (i = 0; i <= xsize - 24; i += 12, sa += 3, dp += 3) {
1446 t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
1447 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
1448 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
1449 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
1450 t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
1451 t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
1452 t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
1453 t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
1454 t11 = VIS_LD_U16_I(table2, (s3 >> 7) & 0x1FE);
1455 t10 = VIS_LD_U16_I(table1, (s3 >> 15) & 0x1FE);
1456 t9 = VIS_LD_U16_I(table0, (s3 >> 23) & 0x1FE);
1457 t8 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
1458 acc0 = vis_faligndata(t3, acc0);
1459 acc0 = vis_faligndata(t2, acc0);
1460 acc0 = vis_faligndata(t1, acc0);
1461 acc0 = vis_faligndata(t0, acc0);
1462 acc1 = vis_faligndata(t7, acc1);
1463 acc1 = vis_faligndata(t6, acc1);
1464 acc1 = vis_faligndata(t5, acc1);
1465 acc1 = vis_faligndata(t4, acc1);
1466 acc2 = vis_faligndata(t11, acc2);
1467 acc2 = vis_faligndata(t10, acc2);
1468 acc2 = vis_faligndata(t9, acc2);
1469 acc2 = vis_faligndata(t8, acc2);
1470 s0 = s3;
1471 s1 = sa[0];
1472 s2 = sa[1];
1473 s3 = sa[2];
1474 dp[0] = acc0;
1475 dp[1] = acc1;
1476 dp[2] = acc2;
1477 }
1478
1479 t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
1480 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
1481 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
1482 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
1483 t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
1484 t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
1485 t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
1486 t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
1487 t11 = VIS_LD_U16_I(table2, (s3 >> 7) & 0x1FE);
1488 t10 = VIS_LD_U16_I(table1, (s3 >> 15) & 0x1FE);
1489 t9 = VIS_LD_U16_I(table0, (s3 >> 23) & 0x1FE);
1490 t8 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
1491 acc0 = vis_faligndata(t3, acc0);
1492 acc0 = vis_faligndata(t2, acc0);
1493 acc0 = vis_faligndata(t1, acc0);
1494 acc0 = vis_faligndata(t0, acc0);
1495 acc1 = vis_faligndata(t7, acc1);
1496 acc1 = vis_faligndata(t6, acc1);
1497 acc1 = vis_faligndata(t5, acc1);
1498 acc1 = vis_faligndata(t4, acc1);
1499 acc2 = vis_faligndata(t11, acc2);
1500 acc2 = vis_faligndata(t10, acc2);
1501 acc2 = vis_faligndata(t9, acc2);
1502 acc2 = vis_faligndata(t8, acc2);
1503 dp[0] = acc0;
1504 dp[1] = acc1;
1505 dp[2] = acc2;
1506 s0 = s3;
1507 dp += 3;
1508 i += 12;
1509 }
1510
1511 if (i <= xsize - 8) {
1512 s1 = sa[0];
1513 s2 = sa[1];
1514 t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
1515 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
1516 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
1517 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
1518 t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
1519 t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
1520 t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
1521 t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
1522 acc0 = vis_faligndata(t3, acc0);
1523 acc0 = vis_faligndata(t2, acc0);
1524 acc0 = vis_faligndata(t1, acc0);
1525 acc0 = vis_faligndata(t0, acc0);
1526 acc1 = vis_faligndata(t7, acc1);
1527 acc1 = vis_faligndata(t6, acc1);
1528 acc1 = vis_faligndata(t5, acc1);
1529 acc1 = vis_faligndata(t4, acc1);
1530 dp[0] = acc0;
1531 dp[1] = acc1;
1532 table = table0;
1533 table0 = table2;
1534 table2 = table1;
1535 table1 = table;
1536 sa += 2;
1537 i += 8;
1538 dp += 2;
1539 s0 = s2;
1540 }
1541
1542 if (i <= xsize - 4) {
1543 s1 = sa[0];
1544 t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
1545 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
1546 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
1547 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
1548 acc0 = vis_faligndata(t3, acc0);
1549 acc0 = vis_faligndata(t2, acc0);
1550 acc0 = vis_faligndata(t1, acc0);
1551 acc0 = vis_faligndata(t0, acc0);
1552 dp[0] = acc0;
1553 table = table0;
1554 table0 = table1;
1555 table1 = table2;
1556 table2 = table;
1557 sa++;
1558 i += 4;
1559 dp++;
1560 s0 = s1;
1561 }
1562
1563 sp = (mlib_u8 *) sa;
1564 sp -= 1;
1565
1566 if ((mlib_addr) dp <= (mlib_addr) dend) {
1567
1568 num = (mlib_s16 *) dend - (mlib_s16 *) dp;
1569 sp += num;
1570 num++;
1571
1572 if (num == 1) {
1573 s0 = (mlib_s32) * sp;
1574 sp--;
1575
1576 t0 = VIS_LD_U16_I(table0, 2 * s0);
1577 acc0 = vis_faligndata(t0, acc0);
1578 }
1579 else if (num == 2) {
1580 s0 = (mlib_s32) * sp;
1581 sp--;
1582
1583 t0 = VIS_LD_U16_I(table1, 2 * s0);
1584 acc0 = vis_faligndata(t0, acc0);
1585
1586 s0 = (mlib_s32) * sp;
1587 sp--;
1588
1589 t0 = VIS_LD_U16_I(table0, 2 * s0);
1590 acc0 = vis_faligndata(t0, acc0);
1591 }
1592 else if (num == 3) {
1593 s0 = (mlib_s32) * sp;
1594 sp--;
1595
1596 t0 = VIS_LD_U16_I(table2, 2 * s0);
1597 acc0 = vis_faligndata(t0, acc0);
1598
1599 s0 = (mlib_s32) * sp;
1600 sp--;
1601
1602 t0 = VIS_LD_U16_I(table1, 2 * s0);
1603 acc0 = vis_faligndata(t0, acc0);
1604
1605 s0 = (mlib_s32) * sp;
1606 sp--;
1607
1608 t0 = VIS_LD_U16_I(table0, 2 * s0);
1609 acc0 = vis_faligndata(t0, acc0);
1610 }
1611
1612 emask = vis_edge16(dp, dend);
1613 vis_pst_16(acc0, dp, emask);
1614 }
1615}
1616
1617/***************************************************************/
1618void mlib_v_ImageLookUp_U8_S16_3(const mlib_u8 *src,
1619 mlib_s32 slb,
1620 mlib_s16 *dst,
1621 mlib_s32 dlb,
1622 mlib_s32 xsize,
1623 mlib_s32 ysize,
1624 const mlib_s16 **table)
1625{
1626 mlib_u8 *sl;
1627 mlib_s16 *dl;
1628 const mlib_s16 *tab;
1629 mlib_s32 j, i;
1630
1631 sl = (void *)src;
1632 dl = dst;
1633
1634 /* row loop */
1635 for (j = 0; j < ysize; j++) {
1636 mlib_u8 *sp = sl;
1637 mlib_s16 *dp = dl;
1638 const mlib_s16 *tab0 = table[0];
1639 const mlib_s16 *tab1 = table[1];
1640 const mlib_s16 *tab2 = table[2];
1641 mlib_s32 off, size = xsize * 3;
1642
1643 off = ((8 - ((mlib_addr) dp & 7)) & 7) >> 1;
1644
1645 off = (off < size) ? off : size;
1646
1647 for (i = 0; i < off - 2; i += 3) {
1648 *dp++ = tab0[(*sp++)];
1649 *dp++ = tab1[(*sp++)];
1650 *dp++ = tab2[(*sp++)];
1651 size -= 3;
1652 }
1653
1654 off -= i;
1655
1656 if (off == 1) {
1657 *dp++ = tab0[(*sp++)];
1658 tab = tab0;
1659 tab0 = tab1;
1660 tab1 = tab2;
1661 tab2 = tab;
1662 size--;
1663 }
1664 else if (off == 2) {
1665 *dp++ = tab0[(*sp++)];
1666 *dp++ = tab1[(*sp++)];
1667 tab = tab2;
1668 tab2 = tab1;
1669 tab1 = tab0;
1670 tab0 = tab;
1671 size -= 2;
1672 }
1673
1674 if (size > 0) {
1675
1676 off = (mlib_addr) sp & 3;
1677
1678 if (off == 0) {
1679 mlib_v_ImageLookUp_U8_S16_3_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2);
1680 }
1681 else if (off == 1) {
1682 mlib_v_ImageLookUp_U8_S16_3_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2);
1683 }
1684 else if (off == 2) {
1685 mlib_v_ImageLookUp_U8_S16_3_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2);
1686 }
1687 else {
1688 mlib_v_ImageLookUp_U8_S16_3_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2);
1689 }
1690 }
1691
1692 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
1693 dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
1694 }
1695}
1696
1697/***************************************************************/