blob: 97662f197d2061a325600d197c5feb76052f9a29 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1998-2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26
27
28#include "vis_proto.h"
29#include "mlib_image.h"
30#include "mlib_v_ImageLookUpFunc.h"
31
32/***************************************************************/
33static void mlib_v_ImageLookUp_U8_U8_124_SrcOff0_D1(const mlib_u8 *src,
34 mlib_u8 *dst,
35 mlib_s32 xsize,
36 const mlib_u8 *table0,
37 const mlib_u8 *table1,
38 const mlib_u8 *table2,
39 const mlib_u8 *table3);
40
41static void mlib_v_ImageLookUp_U8_U8_124_SrcOff1_D1(const mlib_u8 *src,
42 mlib_u8 *dst,
43 mlib_s32 xsize,
44 const mlib_u8 *table0,
45 const mlib_u8 *table1,
46 const mlib_u8 *table2,
47 const mlib_u8 *table3);
48
49static void mlib_v_ImageLookUp_U8_U8_124_SrcOff2_D1(const mlib_u8 *src,
50 mlib_u8 *dst,
51 mlib_s32 xsize,
52 const mlib_u8 *table0,
53 const mlib_u8 *table1,
54 const mlib_u8 *table2,
55 const mlib_u8 *table3);
56
57static void mlib_v_ImageLookUp_U8_U8_124_SrcOff3_D1(const mlib_u8 *src,
58 mlib_u8 *dst,
59 mlib_s32 xsize,
60 const mlib_u8 *table0,
61 const mlib_u8 *table1,
62 const mlib_u8 *table2,
63 const mlib_u8 *table3);
64
65static void mlib_v_ImageLookUp_U8_U8_3_SrcOff0_D1(const mlib_u8 *src,
66 mlib_u8 *dst,
67 mlib_s32 xsize,
68 const mlib_u8 *table0,
69 const mlib_u8 *table1,
70 const mlib_u8 *table2);
71
72static void mlib_v_ImageLookUp_U8_U8_3_SrcOff1_D1(const mlib_u8 *src,
73 mlib_u8 *dst,
74 mlib_s32 xsize,
75 const mlib_u8 *table0,
76 const mlib_u8 *table1,
77 const mlib_u8 *table2);
78
79static void mlib_v_ImageLookUp_U8_U8_3_SrcOff2_D1(const mlib_u8 *src,
80 mlib_u8 *dst,
81 mlib_s32 xsize,
82 const mlib_u8 *table0,
83 const mlib_u8 *table1,
84 const mlib_u8 *table2);
85
86static void mlib_v_ImageLookUp_U8_U8_3_SrcOff3_D1(const mlib_u8 *src,
87 mlib_u8 *dst,
88 mlib_s32 xsize,
89 const mlib_u8 *table0,
90 const mlib_u8 *table1,
91 const mlib_u8 *table2);
92
93/***************************************************************/
94#define VIS_LD_U8_I(X, Y) vis_ld_u8_i((void *)(X), (Y))
95
96/***************************************************************/
97void mlib_v_ImageLookUp_U8_U8_124_SrcOff0_D1(const mlib_u8 *src,
98 mlib_u8 *dst,
99 mlib_s32 xsize,
100 const mlib_u8 *table0,
101 const mlib_u8 *table1,
102 const mlib_u8 *table2,
103 const mlib_u8 *table3)
104{
105 mlib_u32 *sa; /* aligned pointer to source data */
106 mlib_u8 *sp; /* pointer to source data */
107 mlib_u32 s0, s1; /* source data */
108 mlib_u8 *dl; /* pointer to start of destination */
109 mlib_u8 *dend; /* pointer to end of destination */
110 mlib_d64 *dp; /* aligned pointer to destination */
111 mlib_d64 t0, t1, t2; /* destination data */
112 mlib_d64 t3, t4, t5; /* destination data */
113 mlib_d64 t6, t7, acc; /* destination data */
114 mlib_s32 emask; /* edge mask */
115 mlib_s32 i, num; /* loop variable */
116
117 sa = (mlib_u32 *) src;
118 dl = dst;
119 dp = (mlib_d64 *) dl;
120 dend = dl + xsize - 1;
121
122 vis_alignaddr((void *)0, 7);
123
124 if (xsize >= 8) {
125
126 s0 = sa[0];
127 s1 = sa[1];
128 sa += 2;
129
130#pragma pipeloop(0)
131 for (i = 0; i <= xsize - 16; i += 8, sa += 2) {
132 t7 = VIS_LD_U8_I(table3, s1 & 0xFF);
133 t6 = VIS_LD_U8_I(table2, (s1 >> 8) & 0xFF);
134 t5 = VIS_LD_U8_I(table1, (s1 >> 16) & 0xFF);
135 t4 = VIS_LD_U8_I(table0, s1 >> 24);
136 t3 = VIS_LD_U8_I(table3, s0 & 0xFF);
137 t2 = VIS_LD_U8_I(table2, (s0 >> 8) & 0xFF);
138 t1 = VIS_LD_U8_I(table1, (s0 >> 16) & 0xFF);
139 t0 = VIS_LD_U8_I(table0, s0 >> 24);
140 acc = vis_faligndata(t7, acc);
141 acc = vis_faligndata(t6, acc);
142 acc = vis_faligndata(t5, acc);
143 acc = vis_faligndata(t4, acc);
144 acc = vis_faligndata(t3, acc);
145 acc = vis_faligndata(t2, acc);
146 acc = vis_faligndata(t1, acc);
147 acc = vis_faligndata(t0, acc);
148 s0 = sa[0];
149 s1 = sa[1];
150 *dp++ = acc;
151 }
152
153 t7 = VIS_LD_U8_I(table3, s1 & 0xFF);
154 t6 = VIS_LD_U8_I(table2, (s1 >> 8) & 0xFF);
155 t5 = VIS_LD_U8_I(table1, (s1 >> 16) & 0xFF);
156 t4 = VIS_LD_U8_I(table0, s1 >> 24);
157 t3 = VIS_LD_U8_I(table3, s0 & 0xFF);
158 t2 = VIS_LD_U8_I(table2, (s0 >> 8) & 0xFF);
159 t1 = VIS_LD_U8_I(table1, (s0 >> 16) & 0xFF);
160 t0 = VIS_LD_U8_I(table0, s0 >> 24);
161 acc = vis_faligndata(t7, acc);
162 acc = vis_faligndata(t6, acc);
163 acc = vis_faligndata(t5, acc);
164 acc = vis_faligndata(t4, acc);
165 acc = vis_faligndata(t3, acc);
166 acc = vis_faligndata(t2, acc);
167 acc = vis_faligndata(t1, acc);
168 acc = vis_faligndata(t0, acc);
169 *dp++ = acc;
170 }
171
172 sp = (mlib_u8 *) sa;
173
174 if ((mlib_addr) dp <= (mlib_addr) dend) {
175
176 num = (mlib_addr) dend - (mlib_addr) dp;
177 sp += num;
178 num++;
179
180 if ((num & 3) == 1) {
181 s0 = (mlib_s32) * sp;
182 sp--;
183
184 t0 = VIS_LD_U8_I(table0, s0);
185 acc = vis_faligndata(t0, acc);
186 num--;
187 }
188 else if ((num & 3) == 2) {
189 s0 = (mlib_s32) * sp;
190 sp--;
191
192 t0 = VIS_LD_U8_I(table1, s0);
193 acc = vis_faligndata(t0, acc);
194
195 s0 = (mlib_s32) * sp;
196 sp--;
197
198 t0 = VIS_LD_U8_I(table0, s0);
199 acc = vis_faligndata(t0, acc);
200 num -= 2;
201 }
202 else if ((num & 3) == 3) {
203 s0 = (mlib_s32) * sp;
204 sp--;
205
206 t0 = VIS_LD_U8_I(table2, s0);
207 acc = vis_faligndata(t0, acc);
208
209 s0 = (mlib_s32) * sp;
210 sp--;
211
212 t0 = VIS_LD_U8_I(table1, s0);
213 acc = vis_faligndata(t0, acc);
214
215 s0 = (mlib_s32) * sp;
216 sp--;
217
218 t0 = VIS_LD_U8_I(table0, s0);
219 acc = vis_faligndata(t0, acc);
220 num -= 3;
221 }
222
223 if (num != 0) {
224 s0 = (mlib_s32) * sp;
225 sp--;
226
227 t0 = VIS_LD_U8_I(table3, s0);
228 acc = vis_faligndata(t0, acc);
229
230 s0 = (mlib_s32) * sp;
231 sp--;
232
233 t0 = VIS_LD_U8_I(table2, s0);
234 acc = vis_faligndata(t0, acc);
235
236 s0 = (mlib_s32) * sp;
237 sp--;
238
239 t0 = VIS_LD_U8_I(table1, s0);
240 acc = vis_faligndata(t0, acc);
241
242 s0 = (mlib_s32) * sp;
243 sp--;
244
245 t0 = VIS_LD_U8_I(table0, s0);
246 acc = vis_faligndata(t0, acc);
247 }
248
249 emask = vis_edge8(dp, dend);
250 vis_pst_8(acc, dp, emask);
251 }
252}
253
254/***************************************************************/
255void mlib_v_ImageLookUp_U8_U8_124_SrcOff1_D1(const mlib_u8 *src,
256 mlib_u8 *dst,
257 mlib_s32 xsize,
258 const mlib_u8 *table0,
259 const mlib_u8 *table1,
260 const mlib_u8 *table2,
261 const mlib_u8 *table3)
262{
263 mlib_u32 *sa; /* aligned pointer to source data */
264 mlib_u8 *sp; /* pointer to source data */
265 mlib_u32 s0, s1, s2; /* source data */
266 mlib_u8 *dl; /* pointer to start of destination */
267 mlib_u8 *dend; /* pointer to end of destination */
268 mlib_d64 *dp; /* aligned pointer to destination */
269 mlib_d64 t0, t1, t2; /* destination data */
270 mlib_d64 t3, t4, t5; /* destination data */
271 mlib_d64 t6, t7, acc; /* destination data */
272 mlib_s32 emask; /* edge mask */
273 mlib_s32 i, num; /* loop variable */
274
275 sa = (mlib_u32 *) (src - 1);
276 dl = dst;
277 dp = (mlib_d64 *) dl;
278 dend = dl + xsize - 1;
279
280 vis_alignaddr((void *)0, 7);
281
282 s0 = *sa++;
283
284 if (xsize >= 8) {
285
286 s1 = sa[0];
287 s2 = sa[1];
288 sa += 2;
289
290#pragma pipeloop(0)
291 for (i = 0; i <= xsize - 16; i += 8, sa += 2) {
292 t7 = VIS_LD_U8_I(table3, s2 >> 24);
293 t6 = VIS_LD_U8_I(table2, s1 & 0xFF);
294 t5 = VIS_LD_U8_I(table1, (s1 >> 8) & 0xFF);
295 t4 = VIS_LD_U8_I(table0, (s1 >> 16) & 0xFF);
296 t3 = VIS_LD_U8_I(table3, s1 >> 24);
297 t2 = VIS_LD_U8_I(table2, s0 & 0xFF);
298 t1 = VIS_LD_U8_I(table1, (s0 >> 8) & 0xFF);
299 t0 = VIS_LD_U8_I(table0, (s0 >> 16) & 0xFF);
300 acc = vis_faligndata(t7, acc);
301 acc = vis_faligndata(t6, acc);
302 acc = vis_faligndata(t5, acc);
303 acc = vis_faligndata(t4, acc);
304 acc = vis_faligndata(t3, acc);
305 acc = vis_faligndata(t2, acc);
306 acc = vis_faligndata(t1, acc);
307 acc = vis_faligndata(t0, acc);
308 s0 = s2;
309 s1 = sa[0];
310 s2 = sa[1];
311 *dp++ = acc;
312 }
313
314 t7 = VIS_LD_U8_I(table3, s2 >> 24);
315 t6 = VIS_LD_U8_I(table2, s1 & 0xFF);
316 t5 = VIS_LD_U8_I(table1, (s1 >> 8) & 0xFF);
317 t4 = VIS_LD_U8_I(table0, (s1 >> 16) & 0xFF);
318 t3 = VIS_LD_U8_I(table3, s1 >> 24);
319 t2 = VIS_LD_U8_I(table2, s0 & 0xFF);
320 t1 = VIS_LD_U8_I(table1, (s0 >> 8) & 0xFF);
321 t0 = VIS_LD_U8_I(table0, (s0 >> 16) & 0xFF);
322 acc = vis_faligndata(t7, acc);
323 acc = vis_faligndata(t6, acc);
324 acc = vis_faligndata(t5, acc);
325 acc = vis_faligndata(t4, acc);
326 acc = vis_faligndata(t3, acc);
327 acc = vis_faligndata(t2, acc);
328 acc = vis_faligndata(t1, acc);
329 acc = vis_faligndata(t0, acc);
330 *dp++ = acc;
331 }
332
333 sp = (mlib_u8 *) sa;
334 sp -= 3;
335
336 if ((mlib_addr) dp <= (mlib_addr) dend) {
337
338 num = (mlib_addr) dend - (mlib_addr) dp;
339 sp += num;
340 num++;
341
342 if ((num & 3) == 1) {
343 s0 = (mlib_s32) * sp;
344 sp--;
345
346 t0 = VIS_LD_U8_I(table0, s0);
347 acc = vis_faligndata(t0, acc);
348 num--;
349 }
350 else if ((num & 3) == 2) {
351 s0 = (mlib_s32) * sp;
352 sp--;
353
354 t0 = VIS_LD_U8_I(table1, s0);
355 acc = vis_faligndata(t0, acc);
356
357 s0 = (mlib_s32) * sp;
358 sp--;
359
360 t0 = VIS_LD_U8_I(table0, s0);
361 acc = vis_faligndata(t0, acc);
362 num -= 2;
363 }
364 else if ((num & 3) == 3) {
365 s0 = (mlib_s32) * sp;
366 sp--;
367
368 t0 = VIS_LD_U8_I(table2, s0);
369 acc = vis_faligndata(t0, acc);
370
371 s0 = (mlib_s32) * sp;
372 sp--;
373
374 t0 = VIS_LD_U8_I(table1, s0);
375 acc = vis_faligndata(t0, acc);
376
377 s0 = (mlib_s32) * sp;
378 sp--;
379
380 t0 = VIS_LD_U8_I(table0, s0);
381 acc = vis_faligndata(t0, acc);
382 num -= 3;
383 }
384
385 if (num != 0) {
386 s0 = (mlib_s32) * sp;
387 sp--;
388
389 t0 = VIS_LD_U8_I(table3, s0);
390 acc = vis_faligndata(t0, acc);
391
392 s0 = (mlib_s32) * sp;
393 sp--;
394
395 t0 = VIS_LD_U8_I(table2, s0);
396 acc = vis_faligndata(t0, acc);
397
398 s0 = (mlib_s32) * sp;
399 sp--;
400
401 t0 = VIS_LD_U8_I(table1, s0);
402 acc = vis_faligndata(t0, acc);
403
404 s0 = (mlib_s32) * sp;
405 sp--;
406
407 t0 = VIS_LD_U8_I(table0, s0);
408 acc = vis_faligndata(t0, acc);
409 }
410
411 emask = vis_edge8(dp, dend);
412 vis_pst_8(acc, dp, emask);
413 }
414}
415
416/***************************************************************/
417void mlib_v_ImageLookUp_U8_U8_124_SrcOff2_D1(const mlib_u8 *src,
418 mlib_u8 *dst,
419 mlib_s32 xsize,
420 const mlib_u8 *table0,
421 const mlib_u8 *table1,
422 const mlib_u8 *table2,
423 const mlib_u8 *table3)
424{
425 mlib_u32 *sa; /* aligned pointer to source data */
426 mlib_u8 *sp; /* pointer to source data */
427 mlib_u32 s0, s1, s2; /* source data */
428 mlib_u8 *dl; /* pointer to start of destination */
429 mlib_u8 *dend; /* pointer to end of destination */
430 mlib_d64 *dp; /* aligned pointer to destination */
431 mlib_d64 t0, t1, t2; /* destination data */
432 mlib_d64 t3, t4, t5; /* destination data */
433 mlib_d64 t6, t7, acc; /* destination data */
434 mlib_s32 emask; /* edge mask */
435 mlib_s32 i, num; /* loop variable */
436
437 sa = (mlib_u32 *) (src - 2);
438 dl = dst;
439 dp = (mlib_d64 *) dl;
440 dend = dl + xsize - 1;
441
442 vis_alignaddr((void *)0, 7);
443
444 s0 = *sa++;
445
446 if (xsize >= 8) {
447
448 s1 = sa[0];
449 s2 = sa[1];
450 sa += 2;
451
452#pragma pipeloop(0)
453 for (i = 0; i <= xsize - 16; i += 8, sa += 2) {
454 t7 = VIS_LD_U8_I(table3, (s2 >> 16) & 0xFF);
455 t6 = VIS_LD_U8_I(table2, s2 >> 24);
456 t5 = VIS_LD_U8_I(table1, s1 & 0xFF);
457 t4 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF);
458 t3 = VIS_LD_U8_I(table3, (s1 >> 16) & 0xFF);
459 t2 = VIS_LD_U8_I(table2, s1 >> 24);
460 t1 = VIS_LD_U8_I(table1, s0 & 0xFF);
461 t0 = VIS_LD_U8_I(table0, (s0 >> 8) & 0xFF);
462 acc = vis_faligndata(t7, acc);
463 acc = vis_faligndata(t6, acc);
464 acc = vis_faligndata(t5, acc);
465 acc = vis_faligndata(t4, acc);
466 acc = vis_faligndata(t3, acc);
467 acc = vis_faligndata(t2, acc);
468 acc = vis_faligndata(t1, acc);
469 acc = vis_faligndata(t0, acc);
470 s0 = s2;
471 s1 = sa[0];
472 s2 = sa[1];
473 *dp++ = acc;
474 }
475
476 t7 = VIS_LD_U8_I(table3, (s2 >> 16) & 0xFF);
477 t6 = VIS_LD_U8_I(table2, s2 >> 24);
478 t5 = VIS_LD_U8_I(table1, s1 & 0xFF);
479 t4 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF);
480 t3 = VIS_LD_U8_I(table3, (s1 >> 16) & 0xFF);
481 t2 = VIS_LD_U8_I(table2, s1 >> 24);
482 t1 = VIS_LD_U8_I(table1, s0 & 0xFF);
483 t0 = VIS_LD_U8_I(table0, (s0 >> 8) & 0xFF);
484 acc = vis_faligndata(t7, acc);
485 acc = vis_faligndata(t6, acc);
486 acc = vis_faligndata(t5, acc);
487 acc = vis_faligndata(t4, acc);
488 acc = vis_faligndata(t3, acc);
489 acc = vis_faligndata(t2, acc);
490 acc = vis_faligndata(t1, acc);
491 acc = vis_faligndata(t0, acc);
492 *dp++ = acc;
493 }
494
495 sp = (mlib_u8 *) sa;
496 sp -= 2;
497
498 if ((mlib_addr) dp <= (mlib_addr) dend) {
499
500 num = (mlib_addr) dend - (mlib_addr) dp;
501 sp += num;
502 num++;
503
504 if ((num & 3) == 1) {
505 s0 = (mlib_s32) * sp;
506 sp--;
507
508 t0 = VIS_LD_U8_I(table0, s0);
509 acc = vis_faligndata(t0, acc);
510 num--;
511 }
512 else if ((num & 3) == 2) {
513 s0 = (mlib_s32) * sp;
514 sp--;
515
516 t0 = VIS_LD_U8_I(table1, s0);
517 acc = vis_faligndata(t0, acc);
518
519 s0 = (mlib_s32) * sp;
520 sp--;
521
522 t0 = VIS_LD_U8_I(table0, s0);
523 acc = vis_faligndata(t0, acc);
524 num -= 2;
525 }
526 else if ((num & 3) == 3) {
527 s0 = (mlib_s32) * sp;
528 sp--;
529
530 t0 = VIS_LD_U8_I(table2, s0);
531 acc = vis_faligndata(t0, acc);
532
533 s0 = (mlib_s32) * sp;
534 sp--;
535
536 t0 = VIS_LD_U8_I(table1, s0);
537 acc = vis_faligndata(t0, acc);
538
539 s0 = (mlib_s32) * sp;
540 sp--;
541
542 t0 = VIS_LD_U8_I(table0, s0);
543 acc = vis_faligndata(t0, acc);
544 num -= 3;
545 }
546
547 if (num != 0) {
548 s0 = (mlib_s32) * sp;
549 sp--;
550
551 t0 = VIS_LD_U8_I(table3, s0);
552 acc = vis_faligndata(t0, acc);
553
554 s0 = (mlib_s32) * sp;
555 sp--;
556
557 t0 = VIS_LD_U8_I(table2, s0);
558 acc = vis_faligndata(t0, acc);
559
560 s0 = (mlib_s32) * sp;
561 sp--;
562
563 t0 = VIS_LD_U8_I(table1, s0);
564 acc = vis_faligndata(t0, acc);
565
566 s0 = (mlib_s32) * sp;
567 sp--;
568
569 t0 = VIS_LD_U8_I(table0, s0);
570 acc = vis_faligndata(t0, acc);
571 }
572
573 emask = vis_edge8(dp, dend);
574 vis_pst_8(acc, dp, emask);
575 }
576}
577
578/***************************************************************/
579void mlib_v_ImageLookUp_U8_U8_124_SrcOff3_D1(const mlib_u8 *src,
580 mlib_u8 *dst,
581 mlib_s32 xsize,
582 const mlib_u8 *table0,
583 const mlib_u8 *table1,
584 const mlib_u8 *table2,
585 const mlib_u8 *table3)
586{
587 mlib_u32 *sa; /* aligned pointer to source data */
588 mlib_u8 *sp; /* pointer to source data */
589 mlib_u32 s0, s1, s2; /* source data */
590 mlib_u8 *dl; /* pointer to start of destination */
591 mlib_u8 *dend; /* pointer to end of destination */
592 mlib_d64 *dp; /* aligned pointer to destination */
593 mlib_d64 t0, t1, t2; /* destination data */
594 mlib_d64 t3, t4, t5; /* destination data */
595 mlib_d64 t6, t7, acc; /* destination data */
596 mlib_s32 emask; /* edge mask */
597 mlib_s32 i, num; /* loop variable */
598
599 sa = (mlib_u32 *) (src - 3);
600 dl = dst;
601 dp = (mlib_d64 *) dl;
602 dend = dl + xsize - 1;
603
604 vis_alignaddr((void *)0, 7);
605
606 s0 = *sa++;
607
608 if (xsize >= 8) {
609
610 s1 = sa[0];
611 s2 = sa[1];
612 sa += 2;
613
614#pragma pipeloop(0)
615 for (i = 0; i <= xsize - 16; i += 8, sa += 2) {
616 t7 = VIS_LD_U8_I(table3, (s2 >> 8) & 0xFF);
617 t6 = VIS_LD_U8_I(table2, (s2 >> 16) & 0xFF);
618 t5 = VIS_LD_U8_I(table1, s2 >> 24);
619 t4 = VIS_LD_U8_I(table0, s1 & 0xFF);
620 t3 = VIS_LD_U8_I(table3, (s1 >> 8) & 0xFF);
621 t2 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF);
622 t1 = VIS_LD_U8_I(table1, s1 >> 24);
623 t0 = VIS_LD_U8_I(table0, s0 & 0xFF);
624 acc = vis_faligndata(t7, acc);
625 acc = vis_faligndata(t6, acc);
626 acc = vis_faligndata(t5, acc);
627 acc = vis_faligndata(t4, acc);
628 acc = vis_faligndata(t3, acc);
629 acc = vis_faligndata(t2, acc);
630 acc = vis_faligndata(t1, acc);
631 acc = vis_faligndata(t0, acc);
632 s0 = s2;
633 s1 = sa[0];
634 s2 = sa[1];
635 *dp++ = acc;
636 }
637
638 t7 = VIS_LD_U8_I(table3, (s2 >> 8) & 0xFF);
639 t6 = VIS_LD_U8_I(table2, (s2 >> 16) & 0xFF);
640 t5 = VIS_LD_U8_I(table1, s2 >> 24);
641 t4 = VIS_LD_U8_I(table0, s1 & 0xFF);
642 t3 = VIS_LD_U8_I(table3, (s1 >> 8) & 0xFF);
643 t2 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF);
644 t1 = VIS_LD_U8_I(table1, s1 >> 24);
645 t0 = VIS_LD_U8_I(table0, s0 & 0xFF);
646 acc = vis_faligndata(t7, acc);
647 acc = vis_faligndata(t6, acc);
648 acc = vis_faligndata(t5, acc);
649 acc = vis_faligndata(t4, acc);
650 acc = vis_faligndata(t3, acc);
651 acc = vis_faligndata(t2, acc);
652 acc = vis_faligndata(t1, acc);
653 acc = vis_faligndata(t0, acc);
654 *dp++ = acc;
655 }
656
657 sp = (mlib_u8 *) sa;
658 sp--;
659
660 if ((mlib_addr) dp <= (mlib_addr) dend) {
661
662 num = (mlib_addr) dend - (mlib_addr) dp;
663 sp += num;
664 num++;
665
666 if ((num & 3) == 1) {
667 s0 = (mlib_s32) * sp;
668 sp--;
669
670 t0 = VIS_LD_U8_I(table0, s0);
671 acc = vis_faligndata(t0, acc);
672 num--;
673 }
674 else if ((num & 3) == 2) {
675 s0 = (mlib_s32) * sp;
676 sp--;
677
678 t0 = VIS_LD_U8_I(table1, s0);
679 acc = vis_faligndata(t0, acc);
680
681 s0 = (mlib_s32) * sp;
682 sp--;
683
684 t0 = VIS_LD_U8_I(table0, s0);
685 acc = vis_faligndata(t0, acc);
686 num -= 2;
687 }
688 else if ((num & 3) == 3) {
689 s0 = (mlib_s32) * sp;
690 sp--;
691
692 t0 = VIS_LD_U8_I(table2, s0);
693 acc = vis_faligndata(t0, acc);
694
695 s0 = (mlib_s32) * sp;
696 sp--;
697
698 t0 = VIS_LD_U8_I(table1, s0);
699 acc = vis_faligndata(t0, acc);
700
701 s0 = (mlib_s32) * sp;
702 sp--;
703
704 t0 = VIS_LD_U8_I(table0, s0);
705 acc = vis_faligndata(t0, acc);
706 num -= 3;
707 }
708
709 if (num != 0) {
710 s0 = (mlib_s32) * sp;
711 sp--;
712
713 t0 = VIS_LD_U8_I(table3, s0);
714 acc = vis_faligndata(t0, acc);
715
716 s0 = (mlib_s32) * sp;
717 sp--;
718
719 t0 = VIS_LD_U8_I(table2, s0);
720 acc = vis_faligndata(t0, acc);
721
722 s0 = (mlib_s32) * sp;
723 sp--;
724
725 t0 = VIS_LD_U8_I(table1, s0);
726 acc = vis_faligndata(t0, acc);
727
728 s0 = (mlib_s32) * sp;
729 sp--;
730
731 t0 = VIS_LD_U8_I(table0, s0);
732 acc = vis_faligndata(t0, acc);
733 }
734
735 emask = vis_edge8(dp, dend);
736 vis_pst_8(acc, dp, emask);
737 }
738}
739
740/***************************************************************/
741void mlib_v_ImageLookUp_U8_U8_1(const mlib_u8 *src,
742 mlib_s32 slb,
743 mlib_u8 *dst,
744 mlib_s32 dlb,
745 mlib_s32 xsize,
746 mlib_s32 ysize,
747 const mlib_u8 **table)
748{
749 mlib_u8 *sl;
750 mlib_u8 *dl;
751 const mlib_u8 *tab = table[0];
752 mlib_s32 j, i;
753
754 sl = (void *)src;
755 dl = dst;
756
757 /* row loop */
758 for (j = 0; j < ysize; j++) {
759 mlib_u8 *sp = sl;
760 mlib_u8 *dp = dl;
761 mlib_s32 off, size = xsize;
762
763 off = (8 - ((mlib_addr) dp & 7)) & 7;
764
765 off = (off < size) ? off : size;
766
767 for (i = 0; i < off; i++) {
768 *dp++ = tab[(*sp++)];
769 size--;
770 }
771
772 if (size > 0) {
773
774 off = (mlib_addr) sp & 3;
775
776 if (off == 0) {
777 mlib_v_ImageLookUp_U8_U8_124_SrcOff0_D1(sp, dp, size, tab, tab, tab,
778 tab);
779 }
780 else if (off == 1) {
781 mlib_v_ImageLookUp_U8_U8_124_SrcOff1_D1(sp, dp, size, tab, tab, tab,
782 tab);
783 }
784 else if (off == 2) {
785 mlib_v_ImageLookUp_U8_U8_124_SrcOff2_D1(sp, dp, size, tab, tab, tab,
786 tab);
787 }
788 else {
789 mlib_v_ImageLookUp_U8_U8_124_SrcOff3_D1(sp, dp, size, tab, tab, tab,
790 tab);
791 }
792 }
793
794 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
795 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
796 }
797}
798
799/***************************************************************/
800void mlib_v_ImageLookUp_U8_U8_2(const mlib_u8 *src,
801 mlib_s32 slb,
802 mlib_u8 *dst,
803 mlib_s32 dlb,
804 mlib_s32 xsize,
805 mlib_s32 ysize,
806 const mlib_u8 **table)
807{
808 mlib_u8 *sl;
809 mlib_u8 *dl;
810 const mlib_u8 *tab;
811 mlib_s32 j, i;
812
813 sl = (void *)src;
814 dl = dst;
815
816 /* row loop */
817 for (j = 0; j < ysize; j++) {
818 mlib_u8 *sp = sl;
819 mlib_u8 *dp = dl;
820 mlib_s32 off, size = xsize * 2;
821 const mlib_u8 *tab0 = table[0];
822 const mlib_u8 *tab1 = table[1];
823
824 off = (8 - ((mlib_addr) dp & 7)) & 7;
825
826 off = (off < size) ? off : size;
827
828 for (i = 0; i < off - 1; i += 2) {
829 *dp++ = tab0[(*sp++)];
830 *dp++ = tab1[(*sp++)];
831 size -= 2;
832 }
833
834 if ((off & 1) != 0) {
835 *dp++ = tab0[(*sp++)];
836 size--;
837 tab = tab0;
838 tab0 = tab1;
839 tab1 = tab;
840 }
841
842 if (size > 0) {
843
844 off = (mlib_addr) sp & 3;
845
846 if (off == 0) {
847 mlib_v_ImageLookUp_U8_U8_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab0,
848 tab1);
849 }
850 else if (off == 1) {
851 mlib_v_ImageLookUp_U8_U8_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab0,
852 tab1);
853 }
854 else if (off == 2) {
855 mlib_v_ImageLookUp_U8_U8_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab0,
856 tab1);
857 }
858 else {
859 mlib_v_ImageLookUp_U8_U8_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab0,
860 tab1);
861 }
862 }
863
864 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
865 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
866 }
867}
868
869/***************************************************************/
870void mlib_v_ImageLookUp_U8_U8_4(const mlib_u8 *src,
871 mlib_s32 slb,
872 mlib_u8 *dst,
873 mlib_s32 dlb,
874 mlib_s32 xsize,
875 mlib_s32 ysize,
876 const mlib_u8 **table)
877{
878 mlib_u8 *sl;
879 mlib_u8 *dl;
880 const mlib_u8 *tab;
881 mlib_s32 j;
882
883 sl = (void *)src;
884 dl = dst;
885
886 /* row loop */
887 for (j = 0; j < ysize; j++) {
888 mlib_u8 *sp = sl;
889 mlib_u8 *dp = dl;
890 const mlib_u8 *tab0 = table[0];
891 const mlib_u8 *tab1 = table[1];
892 const mlib_u8 *tab2 = table[2];
893 const mlib_u8 *tab3 = table[3];
894 mlib_s32 off, size = xsize * 4;
895
896 off = (8 - ((mlib_addr) dp & 7)) & 7;
897
898 off = (off < size) ? off : size;
899
900 if (off >= 4) {
901 *dp++ = tab0[(*sp++)];
902 *dp++ = tab1[(*sp++)];
903 *dp++ = tab2[(*sp++)];
904 *dp++ = tab3[(*sp++)];
905 size -= 4;
906 off -= 4;
907 }
908
909 if (off == 1) {
910 *dp++ = tab0[(*sp++)];
911 tab = tab0;
912 tab0 = tab1;
913 tab1 = tab2;
914 tab2 = tab3;
915 tab3 = tab;
916 size--;
917 }
918 else if (off == 2) {
919 *dp++ = tab0[(*sp++)];
920 *dp++ = tab1[(*sp++)];
921 tab = tab0;
922 tab0 = tab2;
923 tab2 = tab;
924 tab = tab1;
925 tab1 = tab3;
926 tab3 = tab;
927 size -= 2;
928 }
929 else if (off == 3) {
930 *dp++ = tab0[(*sp++)];
931 *dp++ = tab1[(*sp++)];
932 *dp++ = tab2[(*sp++)];
933 tab = tab3;
934 tab3 = tab2;
935 tab2 = tab1;
936 tab1 = tab0;
937 tab0 = tab;
938 size -= 3;
939 }
940
941 if (size > 0) {
942
943 off = (mlib_addr) sp & 3;
944
945 if (off == 0) {
946 mlib_v_ImageLookUp_U8_U8_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2,
947 tab3);
948 }
949 else if (off == 1) {
950 mlib_v_ImageLookUp_U8_U8_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2,
951 tab3);
952 }
953 else if (off == 2) {
954 mlib_v_ImageLookUp_U8_U8_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2,
955 tab3);
956 }
957 else {
958 mlib_v_ImageLookUp_U8_U8_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2,
959 tab3);
960 }
961 }
962
963 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
964 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
965 }
966}
967
968/***************************************************************/
969void mlib_v_ImageLookUp_U8_U8_3_SrcOff0_D1(const mlib_u8 *src,
970 mlib_u8 *dst,
971 mlib_s32 xsize,
972 const mlib_u8 *table0,
973 const mlib_u8 *table1,
974 const mlib_u8 *table2)
975{
976 mlib_u32 *sa; /* aligned pointer to source data */
977 mlib_u8 *sp; /* pointer to source data */
978 mlib_u32 s0, s1; /* source data */
979 mlib_u8 *dl; /* pointer to start of destination */
980 mlib_u8 *dend; /* pointer to end of destination */
981 mlib_d64 *dp; /* aligned pointer to destination */
982 mlib_d64 t0, t1, t2; /* destination data */
983 mlib_d64 t3, t4, t5; /* destination data */
984 mlib_d64 t6, t7, acc; /* destination data */
985 mlib_s32 emask; /* edge mask */
986 mlib_s32 i, num; /* loop variable */
987 const mlib_u8 *table;
988
989 sa = (mlib_u32 *) src;
990 dl = dst;
991 dp = (mlib_d64 *) dl;
992 dend = dl + xsize - 1;
993
994 vis_alignaddr((void *)0, 7);
995
996 if (xsize >= 8) {
997
998 s0 = sa[0];
999 s1 = sa[1];
1000 sa += 2;
1001
1002#pragma pipeloop(0)
1003 for (i = 0; i <= xsize - 16; i += 8, sa += 2) {
1004 t7 = VIS_LD_U8_I(table1, s1 & 0xFF);
1005 t6 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF);
1006 t5 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF);
1007 t4 = VIS_LD_U8_I(table1, s1 >> 24);
1008 t3 = VIS_LD_U8_I(table0, s0 & 0xFF);
1009 t2 = VIS_LD_U8_I(table2, (s0 >> 8) & 0xFF);
1010 t1 = VIS_LD_U8_I(table1, (s0 >> 16) & 0xFF);
1011 t0 = VIS_LD_U8_I(table0, s0 >> 24);
1012 acc = vis_faligndata(t7, acc);
1013 acc = vis_faligndata(t6, acc);
1014 acc = vis_faligndata(t5, acc);
1015 acc = vis_faligndata(t4, acc);
1016 acc = vis_faligndata(t3, acc);
1017 acc = vis_faligndata(t2, acc);
1018 acc = vis_faligndata(t1, acc);
1019 acc = vis_faligndata(t0, acc);
1020 table = table0;
1021 table0 = table2;
1022 table2 = table1;
1023 table1 = table;
1024 s0 = sa[0];
1025 s1 = sa[1];
1026 *dp++ = acc;
1027 }
1028
1029 t7 = VIS_LD_U8_I(table1, s1 & 0xFF);
1030 t6 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF);
1031 t5 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF);
1032 t4 = VIS_LD_U8_I(table1, s1 >> 24);
1033 t3 = VIS_LD_U8_I(table0, s0 & 0xFF);
1034 t2 = VIS_LD_U8_I(table2, (s0 >> 8) & 0xFF);
1035 t1 = VIS_LD_U8_I(table1, (s0 >> 16) & 0xFF);
1036 t0 = VIS_LD_U8_I(table0, s0 >> 24);
1037 acc = vis_faligndata(t7, acc);
1038 acc = vis_faligndata(t6, acc);
1039 acc = vis_faligndata(t5, acc);
1040 acc = vis_faligndata(t4, acc);
1041 acc = vis_faligndata(t3, acc);
1042 acc = vis_faligndata(t2, acc);
1043 acc = vis_faligndata(t1, acc);
1044 acc = vis_faligndata(t0, acc);
1045 table = table0;
1046 table0 = table2;
1047 table2 = table1;
1048 table1 = table;
1049 *dp++ = acc;
1050 }
1051
1052 sp = (mlib_u8 *) sa;
1053
1054 if ((mlib_addr) dp <= (mlib_addr) dend) {
1055
1056 num = (mlib_addr) dend - (mlib_addr) dp;
1057 sp += num;
1058 num++;
1059 i = num - 3 * (num / 3);
1060
1061 if (i == 2) {
1062 s0 = (mlib_s32) * sp;
1063 sp--;
1064
1065 t0 = VIS_LD_U8_I(table1, s0);
1066 acc = vis_faligndata(t0, acc);
1067
1068 s0 = (mlib_s32) * sp;
1069 sp--;
1070
1071 t0 = VIS_LD_U8_I(table0, s0);
1072 acc = vis_faligndata(t0, acc);
1073 num -= 2;
1074 }
1075 else if (i == 1) {
1076 s0 = (mlib_s32) * sp;
1077 sp--;
1078
1079 t0 = VIS_LD_U8_I(table0, s0);
1080 acc = vis_faligndata(t0, acc);
1081 num--;
1082 }
1083
1084#pragma pipeloop(0)
1085 for (i = 0; i < num; i += 3) {
1086 s0 = (mlib_s32) * sp;
1087 sp--;
1088
1089 t0 = VIS_LD_U8_I(table2, s0);
1090 acc = vis_faligndata(t0, acc);
1091
1092 s0 = (mlib_s32) * sp;
1093 sp--;
1094
1095 t0 = VIS_LD_U8_I(table1, s0);
1096 acc = vis_faligndata(t0, acc);
1097
1098 s0 = (mlib_s32) * sp;
1099 sp--;
1100
1101 t0 = VIS_LD_U8_I(table0, s0);
1102 acc = vis_faligndata(t0, acc);
1103 }
1104
1105 emask = vis_edge8(dp, dend);
1106 vis_pst_8(acc, dp, emask);
1107 }
1108}
1109
1110/***************************************************************/
1111void mlib_v_ImageLookUp_U8_U8_3_SrcOff1_D1(const mlib_u8 *src,
1112 mlib_u8 *dst,
1113 mlib_s32 xsize,
1114 const mlib_u8 *table0,
1115 const mlib_u8 *table1,
1116 const mlib_u8 *table2)
1117{
1118 mlib_u32 *sa; /* aligned pointer to source data */
1119 mlib_u8 *sp; /* pointer to source data */
1120 mlib_u32 s0, s1, s2; /* source data */
1121 mlib_u8 *dl; /* pointer to start of destination */
1122 mlib_u8 *dend; /* pointer to end of destination */
1123 mlib_d64 *dp; /* aligned pointer to destination */
1124 mlib_d64 t0, t1, t2; /* destination data */
1125 mlib_d64 t3, t4, t5; /* destination data */
1126 mlib_d64 t6, t7, acc; /* destination data */
1127 mlib_s32 emask; /* edge mask */
1128 mlib_s32 i, num; /* loop variable */
1129 const mlib_u8 *table;
1130
1131 sa = (mlib_u32 *) (src - 1);
1132 dl = dst;
1133 dp = (mlib_d64 *) dl;
1134 dend = dl + xsize - 1;
1135
1136 vis_alignaddr((void *)0, 7);
1137
1138 s0 = *sa++;
1139
1140 if (xsize >= 8) {
1141
1142 s1 = sa[0];
1143 s2 = sa[1];
1144 sa += 2;
1145
1146#pragma pipeloop(0)
1147 for (i = 0; i <= xsize - 16; i += 8, sa += 2) {
1148 t7 = VIS_LD_U8_I(table1, s2 >> 24);
1149 t6 = VIS_LD_U8_I(table0, s1 & 0xFF);
1150 t5 = VIS_LD_U8_I(table2, (s1 >> 8) & 0xFF);
1151 t4 = VIS_LD_U8_I(table1, (s1 >> 16) & 0xFF);
1152 t3 = VIS_LD_U8_I(table0, s1 >> 24);
1153 t2 = VIS_LD_U8_I(table2, s0 & 0xFF);
1154 t1 = VIS_LD_U8_I(table1, (s0 >> 8) & 0xFF);
1155 t0 = VIS_LD_U8_I(table0, (s0 >> 16) & 0xFF);
1156 acc = vis_faligndata(t7, acc);
1157 acc = vis_faligndata(t6, acc);
1158 acc = vis_faligndata(t5, acc);
1159 acc = vis_faligndata(t4, acc);
1160 acc = vis_faligndata(t3, acc);
1161 acc = vis_faligndata(t2, acc);
1162 acc = vis_faligndata(t1, acc);
1163 acc = vis_faligndata(t0, acc);
1164 table = table0;
1165 table0 = table2;
1166 table2 = table1;
1167 table1 = table;
1168 s0 = s2;
1169 s1 = sa[0];
1170 s2 = sa[1];
1171 *dp++ = acc;
1172 }
1173
1174 t7 = VIS_LD_U8_I(table1, s2 >> 24);
1175 t6 = VIS_LD_U8_I(table0, s1 & 0xFF);
1176 t5 = VIS_LD_U8_I(table2, (s1 >> 8) & 0xFF);
1177 t4 = VIS_LD_U8_I(table1, (s1 >> 16) & 0xFF);
1178 t3 = VIS_LD_U8_I(table0, s1 >> 24);
1179 t2 = VIS_LD_U8_I(table2, s0 & 0xFF);
1180 t1 = VIS_LD_U8_I(table1, (s0 >> 8) & 0xFF);
1181 t0 = VIS_LD_U8_I(table0, (s0 >> 16) & 0xFF);
1182 acc = vis_faligndata(t7, acc);
1183 acc = vis_faligndata(t6, acc);
1184 acc = vis_faligndata(t5, acc);
1185 acc = vis_faligndata(t4, acc);
1186 acc = vis_faligndata(t3, acc);
1187 acc = vis_faligndata(t2, acc);
1188 acc = vis_faligndata(t1, acc);
1189 acc = vis_faligndata(t0, acc);
1190 table = table0;
1191 table0 = table2;
1192 table2 = table1;
1193 table1 = table;
1194 *dp++ = acc;
1195 }
1196
1197 sp = (mlib_u8 *) sa;
1198 sp -= 3;
1199
1200 if ((mlib_addr) dp <= (mlib_addr) dend) {
1201
1202 num = (mlib_addr) dend - (mlib_addr) dp;
1203 sp += num;
1204 num++;
1205 i = num - 3 * (num / 3);
1206
1207 if (i == 2) {
1208 s0 = (mlib_s32) * sp;
1209 sp--;
1210
1211 t0 = VIS_LD_U8_I(table1, s0);
1212 acc = vis_faligndata(t0, acc);
1213
1214 s0 = (mlib_s32) * sp;
1215 sp--;
1216
1217 t0 = VIS_LD_U8_I(table0, s0);
1218 acc = vis_faligndata(t0, acc);
1219 num -= 2;
1220 }
1221 else if (i == 1) {
1222 s0 = (mlib_s32) * sp;
1223 sp--;
1224
1225 t0 = VIS_LD_U8_I(table0, s0);
1226 acc = vis_faligndata(t0, acc);
1227 num--;
1228 }
1229
1230#pragma pipeloop(0)
1231 for (i = 0; i < num; i += 3) {
1232 s0 = (mlib_s32) * sp;
1233 sp--;
1234
1235 t0 = VIS_LD_U8_I(table2, s0);
1236 acc = vis_faligndata(t0, acc);
1237
1238 s0 = (mlib_s32) * sp;
1239 sp--;
1240
1241 t0 = VIS_LD_U8_I(table1, s0);
1242 acc = vis_faligndata(t0, acc);
1243
1244 s0 = (mlib_s32) * sp;
1245 sp--;
1246
1247 t0 = VIS_LD_U8_I(table0, s0);
1248 acc = vis_faligndata(t0, acc);
1249 }
1250
1251 emask = vis_edge8(dp, dend);
1252 vis_pst_8(acc, dp, emask);
1253 }
1254}
1255
1256/***************************************************************/
1257void mlib_v_ImageLookUp_U8_U8_3_SrcOff2_D1(const mlib_u8 *src,
1258 mlib_u8 *dst,
1259 mlib_s32 xsize,
1260 const mlib_u8 *table0,
1261 const mlib_u8 *table1,
1262 const mlib_u8 *table2)
1263{
1264 mlib_u32 *sa; /* aligned pointer to source data */
1265 mlib_u8 *sp; /* pointer to source data */
1266 mlib_u32 s0, s1, s2; /* source data */
1267 mlib_u8 *dl; /* pointer to start of destination */
1268 mlib_u8 *dend; /* pointer to end of destination */
1269 mlib_d64 *dp; /* aligned pointer to destination */
1270 mlib_d64 t0, t1, t2; /* destination data */
1271 mlib_d64 t3, t4, t5; /* destination data */
1272 mlib_d64 t6, t7, acc; /* destination data */
1273 mlib_s32 emask; /* edge mask */
1274 mlib_s32 i, num; /* loop variable */
1275 const mlib_u8 *table;
1276
1277 sa = (mlib_u32 *) (src - 2);
1278 dl = dst;
1279 dp = (mlib_d64 *) dl;
1280 dend = dl + xsize - 1;
1281
1282 vis_alignaddr((void *)0, 7);
1283
1284 s0 = *sa++;
1285
1286 if (xsize >= 8) {
1287
1288 s1 = sa[0];
1289 s2 = sa[1];
1290 sa += 2;
1291
1292#pragma pipeloop(0)
1293 for (i = 0; i <= xsize - 16; i += 8, sa += 2) {
1294 t7 = VIS_LD_U8_I(table1, (s2 >> 16) & 0xFF);
1295 t6 = VIS_LD_U8_I(table0, s2 >> 24);
1296 t5 = VIS_LD_U8_I(table2, s1 & 0xFF);
1297 t4 = VIS_LD_U8_I(table1, (s1 >> 8) & 0xFF);
1298 t3 = VIS_LD_U8_I(table0, (s1 >> 16) & 0xFF);
1299 t2 = VIS_LD_U8_I(table2, s1 >> 24);
1300 t1 = VIS_LD_U8_I(table1, s0 & 0xFF);
1301 t0 = VIS_LD_U8_I(table0, (s0 >> 8) & 0xFF);
1302 acc = vis_faligndata(t7, acc);
1303 acc = vis_faligndata(t6, acc);
1304 acc = vis_faligndata(t5, acc);
1305 acc = vis_faligndata(t4, acc);
1306 acc = vis_faligndata(t3, acc);
1307 acc = vis_faligndata(t2, acc);
1308 acc = vis_faligndata(t1, acc);
1309 acc = vis_faligndata(t0, acc);
1310 table = table0;
1311 table0 = table2;
1312 table2 = table1;
1313 table1 = table;
1314 s0 = s2;
1315 s1 = sa[0];
1316 s2 = sa[1];
1317 *dp++ = acc;
1318 }
1319
1320 t7 = VIS_LD_U8_I(table1, (s2 >> 16) & 0xFF);
1321 t6 = VIS_LD_U8_I(table0, s2 >> 24);
1322 t5 = VIS_LD_U8_I(table2, s1 & 0xFF);
1323 t4 = VIS_LD_U8_I(table1, (s1 >> 8) & 0xFF);
1324 t3 = VIS_LD_U8_I(table0, (s1 >> 16) & 0xFF);
1325 t2 = VIS_LD_U8_I(table2, s1 >> 24);
1326 t1 = VIS_LD_U8_I(table1, s0 & 0xFF);
1327 t0 = VIS_LD_U8_I(table0, (s0 >> 8) & 0xFF);
1328 acc = vis_faligndata(t7, acc);
1329 acc = vis_faligndata(t6, acc);
1330 acc = vis_faligndata(t5, acc);
1331 acc = vis_faligndata(t4, acc);
1332 acc = vis_faligndata(t3, acc);
1333 acc = vis_faligndata(t2, acc);
1334 acc = vis_faligndata(t1, acc);
1335 acc = vis_faligndata(t0, acc);
1336 table = table0;
1337 table0 = table2;
1338 table2 = table1;
1339 table1 = table;
1340 *dp++ = acc;
1341 }
1342
1343 sp = (mlib_u8 *) sa;
1344 sp -= 2;
1345
1346 if ((mlib_addr) dp <= (mlib_addr) dend) {
1347
1348 num = (mlib_addr) dend - (mlib_addr) dp;
1349 sp += num;
1350 num++;
1351 i = num - 3 * (num / 3);
1352
1353 if (i == 2) {
1354 s0 = (mlib_s32) * sp;
1355 sp--;
1356
1357 t0 = VIS_LD_U8_I(table1, s0);
1358 acc = vis_faligndata(t0, acc);
1359
1360 s0 = (mlib_s32) * sp;
1361 sp--;
1362
1363 t0 = VIS_LD_U8_I(table0, s0);
1364 acc = vis_faligndata(t0, acc);
1365 num -= 2;
1366 }
1367 else if (i == 1) {
1368 s0 = (mlib_s32) * sp;
1369 sp--;
1370
1371 t0 = VIS_LD_U8_I(table0, s0);
1372 acc = vis_faligndata(t0, acc);
1373 num--;
1374 }
1375
1376#pragma pipeloop(0)
1377 for (i = 0; i < num; i += 3) {
1378 s0 = (mlib_s32) * sp;
1379 sp--;
1380
1381 t0 = VIS_LD_U8_I(table2, s0);
1382 acc = vis_faligndata(t0, acc);
1383
1384 s0 = (mlib_s32) * sp;
1385 sp--;
1386
1387 t0 = VIS_LD_U8_I(table1, s0);
1388 acc = vis_faligndata(t0, acc);
1389
1390 s0 = (mlib_s32) * sp;
1391 sp--;
1392
1393 t0 = VIS_LD_U8_I(table0, s0);
1394 acc = vis_faligndata(t0, acc);
1395 }
1396
1397 emask = vis_edge8(dp, dend);
1398 vis_pst_8(acc, dp, emask);
1399 }
1400}
1401
1402/***************************************************************/
1403void mlib_v_ImageLookUp_U8_U8_3_SrcOff3_D1(const mlib_u8 *src,
1404 mlib_u8 *dst,
1405 mlib_s32 xsize,
1406 const mlib_u8 *table0,
1407 const mlib_u8 *table1,
1408 const mlib_u8 *table2)
1409{
1410 mlib_u32 *sa; /* aligned pointer to source data */
1411 mlib_u8 *sp; /* pointer to source data */
1412 mlib_u32 s0, s1, s2; /* source data */
1413 mlib_u8 *dl; /* pointer to start of destination */
1414 mlib_u8 *dend; /* pointer to end of destination */
1415 mlib_d64 *dp; /* aligned pointer to destination */
1416 mlib_d64 t0, t1, t2; /* destination data */
1417 mlib_d64 t3, t4, t5; /* destination data */
1418 mlib_d64 t6, t7, acc; /* destination data */
1419 mlib_s32 emask; /* edge mask */
1420 mlib_s32 i, num; /* loop variable */
1421 const mlib_u8 *table;
1422
1423 sa = (mlib_u32 *) (src - 3);
1424 dl = dst;
1425 dp = (mlib_d64 *) dl;
1426 dend = dl + xsize - 1;
1427
1428 vis_alignaddr((void *)0, 7);
1429
1430 s0 = *sa++;
1431
1432 if (xsize >= 8) {
1433
1434 s1 = sa[0];
1435 s2 = sa[1];
1436 sa += 2;
1437
1438#pragma pipeloop(0)
1439 for (i = 0; i <= xsize - 16; i += 8, sa += 2) {
1440 t7 = VIS_LD_U8_I(table1, (s2 >> 8) & 0xFF);
1441 t6 = VIS_LD_U8_I(table0, (s2 >> 16) & 0xFF);
1442 t5 = VIS_LD_U8_I(table2, s2 >> 24);
1443 t4 = VIS_LD_U8_I(table1, s1 & 0xFF);
1444 t3 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF);
1445 t2 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF);
1446 t1 = VIS_LD_U8_I(table1, s1 >> 24);
1447 t0 = VIS_LD_U8_I(table0, s0 & 0xFF);
1448 acc = vis_faligndata(t7, acc);
1449 acc = vis_faligndata(t6, acc);
1450 acc = vis_faligndata(t5, acc);
1451 acc = vis_faligndata(t4, acc);
1452 acc = vis_faligndata(t3, acc);
1453 acc = vis_faligndata(t2, acc);
1454 acc = vis_faligndata(t1, acc);
1455 acc = vis_faligndata(t0, acc);
1456 table = table0;
1457 table0 = table2;
1458 table2 = table1;
1459 table1 = table;
1460 s0 = s2;
1461 s1 = sa[0];
1462 s2 = sa[1];
1463 *dp++ = acc;
1464 }
1465
1466 t7 = VIS_LD_U8_I(table1, (s2 >> 8) & 0xFF);
1467 t6 = VIS_LD_U8_I(table0, (s2 >> 16) & 0xFF);
1468 t5 = VIS_LD_U8_I(table2, s2 >> 24);
1469 t4 = VIS_LD_U8_I(table1, s1 & 0xFF);
1470 t3 = VIS_LD_U8_I(table0, (s1 >> 8) & 0xFF);
1471 t2 = VIS_LD_U8_I(table2, (s1 >> 16) & 0xFF);
1472 t1 = VIS_LD_U8_I(table1, s1 >> 24);
1473 t0 = VIS_LD_U8_I(table0, s0 & 0xFF);
1474 acc = vis_faligndata(t7, acc);
1475 acc = vis_faligndata(t6, acc);
1476 acc = vis_faligndata(t5, acc);
1477 acc = vis_faligndata(t4, acc);
1478 acc = vis_faligndata(t3, acc);
1479 acc = vis_faligndata(t2, acc);
1480 acc = vis_faligndata(t1, acc);
1481 acc = vis_faligndata(t0, acc);
1482 table = table0;
1483 table0 = table2;
1484 table2 = table1;
1485 table1 = table;
1486 *dp++ = acc;
1487 }
1488
1489 sp = (mlib_u8 *) sa;
1490 sp--;
1491
1492 if ((mlib_addr) dp <= (mlib_addr) dend) {
1493
1494 num = (mlib_addr) dend - (mlib_addr) dp;
1495 sp += num;
1496 num++;
1497 i = num - 3 * (num / 3);
1498
1499 if (i == 2) {
1500 s0 = (mlib_s32) * sp;
1501 sp--;
1502
1503 t0 = VIS_LD_U8_I(table1, s0);
1504 acc = vis_faligndata(t0, acc);
1505
1506 s0 = (mlib_s32) * sp;
1507 sp--;
1508
1509 t0 = VIS_LD_U8_I(table0, s0);
1510 acc = vis_faligndata(t0, acc);
1511 num -= 2;
1512 }
1513 else if (i == 1) {
1514 s0 = (mlib_s32) * sp;
1515 sp--;
1516
1517 t0 = VIS_LD_U8_I(table0, s0);
1518 acc = vis_faligndata(t0, acc);
1519 num--;
1520 }
1521
1522#pragma pipeloop(0)
1523 for (i = 0; i < num; i += 3) {
1524 s0 = (mlib_s32) * sp;
1525 sp--;
1526
1527 t0 = VIS_LD_U8_I(table2, s0);
1528 acc = vis_faligndata(t0, acc);
1529
1530 s0 = (mlib_s32) * sp;
1531 sp--;
1532
1533 t0 = VIS_LD_U8_I(table1, s0);
1534 acc = vis_faligndata(t0, acc);
1535
1536 s0 = (mlib_s32) * sp;
1537 sp--;
1538
1539 t0 = VIS_LD_U8_I(table0, s0);
1540 acc = vis_faligndata(t0, acc);
1541 }
1542
1543 emask = vis_edge8(dp, dend);
1544 vis_pst_8(acc, dp, emask);
1545 }
1546}
1547
1548/***************************************************************/
1549void mlib_v_ImageLookUp_U8_U8_3(const mlib_u8 *src,
1550 mlib_s32 slb,
1551 mlib_u8 *dst,
1552 mlib_s32 dlb,
1553 mlib_s32 xsize,
1554 mlib_s32 ysize,
1555 const mlib_u8 **table)
1556{
1557 mlib_u8 *sl;
1558 mlib_u8 *dl;
1559 const mlib_u8 *tab;
1560 mlib_s32 j, i;
1561
1562 sl = (void *)src;
1563 dl = dst;
1564
1565 /* row loop */
1566 for (j = 0; j < ysize; j++) {
1567 mlib_u8 *sp = sl;
1568 mlib_u8 *dp = dl;
1569 const mlib_u8 *tab0 = table[0];
1570 const mlib_u8 *tab1 = table[1];
1571 const mlib_u8 *tab2 = table[2];
1572 mlib_s32 off, size = xsize * 3;
1573
1574 off = (8 - ((mlib_addr) dp & 7)) & 7;
1575
1576 off = (off < size) ? off : size;
1577
1578 for (i = 0; i < off - 2; i += 3) {
1579 *dp++ = tab0[(*sp++)];
1580 *dp++ = tab1[(*sp++)];
1581 *dp++ = tab2[(*sp++)];
1582 size -= 3;
1583 }
1584
1585 off -= i;
1586
1587 if (off == 1) {
1588 *dp++ = tab0[(*sp++)];
1589 tab = tab0;
1590 tab0 = tab1;
1591 tab1 = tab2;
1592 tab2 = tab;
1593 size--;
1594 }
1595 else if (off == 2) {
1596 *dp++ = tab0[(*sp++)];
1597 *dp++ = tab1[(*sp++)];
1598 tab = tab2;
1599 tab2 = tab1;
1600 tab1 = tab0;
1601 tab0 = tab;
1602 size -= 2;
1603 }
1604
1605 if (size > 0) {
1606
1607 off = (mlib_addr) sp & 3;
1608
1609 if (off == 0) {
1610 mlib_v_ImageLookUp_U8_U8_3_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2);
1611 }
1612 else if (off == 1) {
1613 mlib_v_ImageLookUp_U8_U8_3_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2);
1614 }
1615 else if (off == 2) {
1616 mlib_v_ImageLookUp_U8_U8_3_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2);
1617 }
1618 else {
1619 mlib_v_ImageLookUp_U8_U8_3_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2);
1620 }
1621 }
1622
1623 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
1624 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
1625 }
1626}
1627
1628/***************************************************************/