blob: e67cbce63f3cc8ce9bd8d3919bfbe78bd6ff3a3a [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1998-2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26
27
28#include "vis_proto.h"
29#include "mlib_image.h"
30#include "mlib_v_ImageLookUpFunc.h"
31
32/***************************************************************/
33static void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff0_D1(const mlib_u8 *src,
34 mlib_u8 *dst,
35 mlib_s32 xsize,
36 const mlib_u16 *table);
37
38static void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff1_D1(const mlib_u8 *src,
39 mlib_u8 *dst,
40 mlib_s32 xsize,
41 const mlib_u16 *table);
42
43static void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff2_D1(const mlib_u8 *src,
44 mlib_u8 *dst,
45 mlib_s32 xsize,
46 const mlib_u16 *table);
47
48static void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff3_D1(const mlib_u8 *src,
49 mlib_u8 *dst,
50 mlib_s32 xsize,
51 const mlib_u16 *table);
52
53static void mlib_v_ImageLookUpSI_U8_U8_2_DstNonAl_D1(const mlib_u8 *src,
54 mlib_u8 *dst,
55 mlib_s32 xsize,
56 const mlib_u16 *table);
57
58static void mlib_v_ImageLookUpSI_U8_U8_2_DstA8D1_SMALL(const mlib_u8 *src,
59 mlib_u8 *dst,
60 mlib_s32 xsize,
61 const mlib_u8 **table);
62
63static void mlib_v_ImageLookUpSI_U8_U8_2_D1_SMALL(const mlib_u8 *src,
64 mlib_u8 *dst,
65 mlib_s32 xsize,
66 const mlib_u8 **table);
67
68static void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff0_D1(const mlib_u8 *src,
69 mlib_u8 *dst,
70 mlib_s32 xsize,
71 const mlib_d64 *table);
72
73static void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff1_D1(const mlib_u8 *src,
74 mlib_u8 *dst,
75 mlib_s32 xsize,
76 const mlib_d64 *table);
77
78static void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff2_D1(const mlib_u8 *src,
79 mlib_u8 *dst,
80 mlib_s32 xsize,
81 const mlib_d64 *table);
82
83static void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff3_D1(const mlib_u8 *src,
84 mlib_u8 *dst,
85 mlib_s32 xsize,
86 const mlib_d64 *table);
87
88static void mlib_v_ImageLookUpSI_U8_U8_3_D1_SMALL(const mlib_u8 *src,
89 mlib_u8 *dst,
90 mlib_s32 xsize,
91 const mlib_u8 **table);
92
93static void mlib_v_ImageLookUpSI_U8_U8_4_SrcOff0_D1(const mlib_u8 *src,
94 mlib_u8 *dst,
95 mlib_s32 xsize,
96 const mlib_f32 *table);
97
98static void mlib_v_ImageLookUpSI_U8_U8_4_DstNonAl_D1(const mlib_u8 *src,
99 mlib_u8 *dst,
100 mlib_s32 xsize,
101 const mlib_f32 *table);
102
103static void mlib_v_ImageLookUpSI_U8_U8_4_DstOff0_D1_SMALL(const mlib_u8 *src,
104 mlib_u8 *dst,
105 mlib_s32 xsize,
106 const mlib_u8 **table);
107
108static void mlib_v_ImageLookUpSI_U8_U8_4_DstOff1_D1_SMALL(const mlib_u8 *src,
109 mlib_u8 *dst,
110 mlib_s32 xsize,
111 const mlib_u8 **table);
112
113static void mlib_v_ImageLookUpSI_U8_U8_4_DstOff2_D1_SMALL(const mlib_u8 *src,
114 mlib_u8 *dst,
115 mlib_s32 xsize,
116 const mlib_u8 **table);
117
118static void mlib_v_ImageLookUpSI_U8_U8_4_DstOff3_D1_SMALL(const mlib_u8 *src,
119 mlib_u8 *dst,
120 mlib_s32 xsize,
121 const mlib_u8 **table);
122
123/***************************************************************/
124#define VIS_LD_U8_I(X, Y) vis_ld_u8_i((void *)(X), (Y))
125#define VIS_LD_U16_I(X, Y) vis_ld_u16_i((void *)(X), (Y))
126
127/***************************************************************/
128void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff0_D1(const mlib_u8 *src,
129 mlib_u8 *dst,
130 mlib_s32 xsize,
131 const mlib_u16 *table)
132{
133 mlib_u32 *sa; /* aligned pointer to source data */
134 mlib_u8 *sp; /* pointer to source data */
135 mlib_u32 s0; /* source data */
136 mlib_u16 *dl; /* pointer to start of destination */
137 mlib_u16 *dend; /* pointer to end of destination */
138 mlib_d64 *dp; /* aligned pointer to destination */
139 mlib_d64 t0, t1, t2; /* destination data */
140 mlib_d64 t3, acc; /* destination data */
141 mlib_s32 emask; /* edge mask */
142 mlib_s32 i, num; /* loop variable */
143
144 sa = (mlib_u32*)src;
145 dl = (mlib_u16*)dst;
146 dp = (mlib_d64 *) dl;
147 dend = dl + xsize - 1;
148
149 vis_alignaddr((void *) 0, 6);
150
151 if (xsize >= 4) {
152
153 s0 = sa[0];
154 sa ++;
155
156#pragma pipeloop(0)
157 for(i = 0; i <= xsize - 8; i+=4, sa++) {
158 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE);
159 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE);
160 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE);
161 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE);
162 acc = vis_faligndata(t3, acc);
163 acc = vis_faligndata(t2, acc);
164 acc = vis_faligndata(t1, acc);
165 acc = vis_faligndata(t0, acc);
166 s0 = sa[0];
167 *dp++ = acc;
168 }
169
170 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE);
171 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE);
172 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE);
173 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE);
174 acc = vis_faligndata(t3, acc);
175 acc = vis_faligndata(t2, acc);
176 acc = vis_faligndata(t1, acc);
177 acc = vis_faligndata(t0, acc);
178 *dp++ = acc;
179 }
180
181 sp = (mlib_u8*)sa;
182
183 if ((mlib_addr) dp <= (mlib_addr) dend) {
184
185 num = (mlib_u16*) dend - (mlib_u16*) dp;
186 sp += num;
187 num ++;
188#pragma pipeloop(0)
189 for (i = 0; i < num; i ++) {
190 s0 = (mlib_s32) *sp;
191 sp --;
192
193 t0 = VIS_LD_U16_I(table, 2*s0);
194 acc = vis_faligndata(t0, acc);
195 }
196
197 emask = vis_edge16(dp, dend);
198 vis_pst_16(acc, dp, emask);
199 }
200}
201
202/***************************************************************/
203void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff1_D1(const mlib_u8 *src,
204 mlib_u8 *dst,
205 mlib_s32 xsize,
206 const mlib_u16 *table)
207{
208 mlib_u32 *sa; /* aligned pointer to source data */
209 mlib_u8 *sp; /* pointer to source data */
210 mlib_u32 s0, s1; /* source data */
211 mlib_u16 *dl; /* pointer to start of destination */
212 mlib_u16 *dend; /* pointer to end of destination */
213 mlib_d64 *dp; /* aligned pointer to destination */
214 mlib_d64 t0, t1, t2; /* destination data */
215 mlib_d64 t3, acc; /* destination data */
216 mlib_s32 emask; /* edge mask */
217 mlib_s32 i, num; /* loop variable */
218
219 sa = (mlib_u32*)(src-1);
220 dl = (mlib_u16*)dst;
221 dp = (mlib_d64 *) dl;
222 dend = dl + xsize - 1;
223
224 vis_alignaddr((void *) 0, 6);
225
226 s0 = *sa++;
227
228 if (xsize >= 4) {
229
230 s1 = sa[0];
231 sa ++;
232
233#pragma pipeloop(0)
234 for(i = 0; i <= xsize - 8; i+=4, sa++) {
235 t3 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE);
236 t2 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE);
237 t1 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE);
238 t0 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE);
239 acc = vis_faligndata(t3, acc);
240 acc = vis_faligndata(t2, acc);
241 acc = vis_faligndata(t1, acc);
242 acc = vis_faligndata(t0, acc);
243 s0 = s1;
244 s1 = sa[0];
245 *dp++ = acc;
246 }
247
248 t3 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE);
249 t2 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE);
250 t1 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE);
251 t0 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE);
252 acc = vis_faligndata(t3, acc);
253 acc = vis_faligndata(t2, acc);
254 acc = vis_faligndata(t1, acc);
255 acc = vis_faligndata(t0, acc);
256 *dp++ = acc;
257 }
258
259 sp = (mlib_u8*)sa;
260 sp -= 3;
261
262 if ((mlib_addr) dp <= (mlib_addr) dend) {
263
264 num = (mlib_u16*) dend - (mlib_u16*) dp;
265 sp += num;
266 num ++;
267#pragma pipeloop(0)
268 for (i = 0; i < num; i ++) {
269 s0 = (mlib_s32) *sp;
270 sp --;
271
272 t0 = VIS_LD_U16_I(table, 2*s0);
273 acc = vis_faligndata(t0, acc);
274 }
275
276 emask = vis_edge16(dp, dend);
277 vis_pst_16(acc, dp, emask);
278 }
279}
280
281/***************************************************************/
282void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff2_D1(const mlib_u8 *src,
283 mlib_u8 *dst,
284 mlib_s32 xsize,
285 const mlib_u16 *table)
286{
287 mlib_u32 *sa; /* pointer to source data */
288 mlib_u8 *sp; /* pointer to source data */
289 mlib_u32 s0, s1; /* source data */
290 mlib_u16 *dl; /* pointer to start of destination */
291 mlib_u16 *dend; /* pointer to end of destination */
292 mlib_d64 *dp; /* aligned pointer to destination */
293 mlib_d64 t0, t1, t2; /* destination data */
294 mlib_d64 t3, acc; /* destination data */
295 mlib_s32 emask; /* edge mask */
296 mlib_s32 i, num; /* loop variable */
297
298 sa = (mlib_u32*)(src-2);
299 dl = (mlib_u16*)dst;
300 dp = (mlib_d64 *) dl;
301 dend = dl + xsize - 1;
302
303 vis_alignaddr((void *) 0, 6);
304
305 s0 = *sa++;
306
307 if (xsize >= 4) {
308
309 s1 = sa[0];
310 sa ++;
311
312#pragma pipeloop(0)
313 for(i = 0; i <= xsize - 8; i+=4, sa++) {
314 t3 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE);
315 t2 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE);
316 t1 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE);
317 t0 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE);
318 acc = vis_faligndata(t3, acc);
319 acc = vis_faligndata(t2, acc);
320 acc = vis_faligndata(t1, acc);
321 acc = vis_faligndata(t0, acc);
322 s0 = s1;
323 s1 = sa[0];
324 *dp++ = acc;
325 }
326
327 t3 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE);
328 t2 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE);
329 t1 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE);
330 t0 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE);
331 acc = vis_faligndata(t3, acc);
332 acc = vis_faligndata(t2, acc);
333 acc = vis_faligndata(t1, acc);
334 acc = vis_faligndata(t0, acc);
335 *dp++ = acc;
336 }
337
338 sp = (mlib_u8*)sa;
339 sp -= 2;
340
341 if ((mlib_addr) dp <= (mlib_addr) dend) {
342
343 num = (mlib_u16*) dend - (mlib_u16*) dp;
344 sp += num;
345 num ++;
346#pragma pipeloop(0)
347 for (i = 0; i < num; i ++) {
348 s0 = (mlib_s32) *sp;
349 sp --;
350
351 t0 = VIS_LD_U16_I(table, 2*s0);
352 acc = vis_faligndata(t0, acc);
353 }
354
355 emask = vis_edge16(dp, dend);
356 vis_pst_16(acc, dp, emask);
357 }
358}
359
360/***************************************************************/
361void mlib_v_ImageLookUpSI_U8_U8_2_SrcOff3_D1(const mlib_u8 *src,
362 mlib_u8 *dst,
363 mlib_s32 xsize,
364 const mlib_u16 *table)
365{
366 mlib_u32 *sa; /* aligned pointer to source data */
367 mlib_u8 *sp; /* pointer to source data */
368 mlib_u32 s0, s1; /* source data */
369 mlib_u16 *dl; /* pointer to start of destination */
370 mlib_u16 *dend; /* pointer to end of destination */
371 mlib_d64 *dp; /* aligned pointer to destination */
372 mlib_d64 t0, t1, t2; /* destination data */
373 mlib_d64 t3, acc; /* destination data */
374 mlib_s32 emask; /* edge mask */
375 mlib_s32 i, num; /* loop variable */
376
377 sa = (mlib_u32*)(src-3);
378 dl = (mlib_u16*)dst;
379 dp = (mlib_d64 *) dl;
380 dend = dl + xsize - 1;
381
382 vis_alignaddr((void *) 0, 6);
383
384 s0 = *sa++;
385
386 if (xsize >= 4) {
387
388 s1 = sa[0];
389 sa ++;
390
391#pragma pipeloop(0)
392 for(i = 0; i <= xsize - 8; i+=4, sa++) {
393 t3 = VIS_LD_U16_I(table, (s1 >> 7) & 0x1FE);
394 t2 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE);
395 t1 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE);
396 t0 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE);
397 acc = vis_faligndata(t3, acc);
398 acc = vis_faligndata(t2, acc);
399 acc = vis_faligndata(t1, acc);
400 acc = vis_faligndata(t0, acc);
401 s0 = s1;
402 s1 = sa[0];
403 *dp++ = acc;
404 }
405
406 t3 = VIS_LD_U16_I(table, (s1 >> 7) & 0x1FE);
407 t2 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE);
408 t1 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE);
409 t0 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE);
410 acc = vis_faligndata(t3, acc);
411 acc = vis_faligndata(t2, acc);
412 acc = vis_faligndata(t1, acc);
413 acc = vis_faligndata(t0, acc);
414 *dp++ = acc;
415 }
416
417 sp = (mlib_u8*)sa;
418 sp -= 1;
419
420 if ((mlib_addr) dp <= (mlib_addr) dend) {
421
422 num = (mlib_u16*) dend - (mlib_u16*) dp;
423 sp += num;
424 num ++;
425#pragma pipeloop(0)
426 for (i = 0; i < num; i ++) {
427 s0 = (mlib_s32) *sp;
428 sp --;
429
430 t0 = VIS_LD_U16_I(table, 2*s0);
431 acc = vis_faligndata(t0, acc);
432 }
433
434 emask = vis_edge16(dp, dend);
435 vis_pst_16(acc, dp, emask);
436 }
437}
438
439/***************************************************************/
440void mlib_v_ImageLookUpSI_U8_U8_2_DstNonAl_D1(const mlib_u8 *src,
441 mlib_u8 *dst,
442 mlib_s32 xsize,
443 const mlib_u16 *table)
444{
445 mlib_u32 *sa; /* aligned pointer to source data */
446 mlib_u8 *sp; /* pointer to source data */
447 mlib_u32 s0, s1, s2, s3; /* source data */
448 mlib_u8 *dl; /* pointer to start of destination */
449 mlib_u8 *dend; /* pointer to end of destination */
450 mlib_d64 *dp; /* aligned pointer to destination */
451 mlib_d64 t0, t1, t2; /* destination data */
452 mlib_d64 t3, t4, t5; /* destination data */
453 mlib_d64 t6, t7, acc0; /* destination data */
454 mlib_d64 acc1, acc2; /* destination data */
455 mlib_d64 acc3, acc4; /* destination data */
456 mlib_s32 emask; /* edge mask */
457 mlib_s32 i, num; /* loop variable */
458 mlib_s32 off; /* offset */
459
460 sa = (mlib_u32*)src;
461 dl = dst;
462 sp = (void *)src;
463 dend = dl + 2*xsize - 1;
464 dp = (mlib_d64 *) ((mlib_addr) dl & (~7));
465 off = (mlib_addr) dp - (mlib_addr) dl;
466
467 emask = vis_edge8(dl, dend);
468 num = (xsize < 4) ? xsize : 4;
469
470 sp += (num-1);
471
472 vis_alignaddr(dp, 6);
473
474 for (i = 0; i < num; i ++) {
475 s0 = (mlib_s32) *sp;
476 sp --;
477
478 t0 = VIS_LD_U16_I(table, 2*s0);
479 acc0 = vis_faligndata(t0, acc0);
480 }
481
482 vis_alignaddr(dp, off);
483 vis_pst_8(vis_faligndata(acc0, acc0), dp++, emask);
484
485 sa++;
486
487 xsize -= 4;
488
489 i = 0;
490
491 if (xsize >= 16) {
492
493 s0 = sa[0];
494 s1 = sa[1];
495 s2 = sa[2];
496 s3 = sa[3];
497 sa += 4;
498
499#pragma pipeloop(0)
500 for(i = 0; i <= xsize - 32; i+=16, sa+=4) {
501 vis_alignaddr(dp, 6);
502 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE);
503 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE);
504 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE);
505 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE);
506 acc1 = vis_faligndata(t3, acc1);
507 acc1 = vis_faligndata(t2, acc1);
508 acc1 = vis_faligndata(t1, acc1);
509 acc1 = vis_faligndata(t0, acc1);
510 t7 = VIS_LD_U16_I(table, (s1 << 1) & 0x1FE);
511 t6 = VIS_LD_U16_I(table, (s1 >> 7) & 0x1FE);
512 t5 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE);
513 t4 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE);
514 acc2 = vis_faligndata(t7, acc2);
515 acc2 = vis_faligndata(t6, acc2);
516 acc2 = vis_faligndata(t5, acc2);
517 acc2 = vis_faligndata(t4, acc2);
518 t3 = VIS_LD_U16_I(table, (s2 << 1) & 0x1FE);
519 t2 = VIS_LD_U16_I(table, (s2 >> 7) & 0x1FE);
520 t1 = VIS_LD_U16_I(table, (s2 >> 15) & 0x1FE);
521 t0 = VIS_LD_U16_I(table, (s2 >> 23) & 0x1FE);
522 acc3 = vis_faligndata(t3, acc3);
523 acc3 = vis_faligndata(t2, acc3);
524 acc3 = vis_faligndata(t1, acc3);
525 acc3 = vis_faligndata(t0, acc3);
526 t7 = VIS_LD_U16_I(table, (s3 << 1) & 0x1FE);
527 t6 = VIS_LD_U16_I(table, (s3 >> 7) & 0x1FE);
528 t5 = VIS_LD_U16_I(table, (s3 >> 15) & 0x1FE);
529 t4 = VIS_LD_U16_I(table, (s3 >> 23) & 0x1FE);
530 acc4 = vis_faligndata(t7, acc4);
531 acc4 = vis_faligndata(t6, acc4);
532 acc4 = vis_faligndata(t5, acc4);
533 acc4 = vis_faligndata(t4, acc4);
534 vis_alignaddr(dp, off);
535 s0 = sa[0];
536 s1 = sa[1];
537 s2 = sa[2];
538 s3 = sa[3];
539 *dp++ = vis_faligndata(acc0, acc1);
540 *dp++ = vis_faligndata(acc1, acc2);
541 *dp++ = vis_faligndata(acc2, acc3);
542 *dp++ = vis_faligndata(acc3, acc4);
543 acc0 = acc4;
544 }
545
546 vis_alignaddr(dp, 6);
547 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE);
548 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE);
549 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE);
550 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE);
551 acc1 = vis_faligndata(t3, acc1);
552 acc1 = vis_faligndata(t2, acc1);
553 acc1 = vis_faligndata(t1, acc1);
554 acc1 = vis_faligndata(t0, acc1);
555 t7 = VIS_LD_U16_I(table, (s1 << 1) & 0x1FE);
556 t6 = VIS_LD_U16_I(table, (s1 >> 7) & 0x1FE);
557 t5 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE);
558 t4 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE);
559 acc2 = vis_faligndata(t7, acc2);
560 acc2 = vis_faligndata(t6, acc2);
561 acc2 = vis_faligndata(t5, acc2);
562 acc2 = vis_faligndata(t4, acc2);
563 t3 = VIS_LD_U16_I(table, (s2 << 1) & 0x1FE);
564 t2 = VIS_LD_U16_I(table, (s2 >> 7) & 0x1FE);
565 t1 = VIS_LD_U16_I(table, (s2 >> 15) & 0x1FE);
566 t0 = VIS_LD_U16_I(table, (s2 >> 23) & 0x1FE);
567 acc3 = vis_faligndata(t3, acc3);
568 acc3 = vis_faligndata(t2, acc3);
569 acc3 = vis_faligndata(t1, acc3);
570 acc3 = vis_faligndata(t0, acc3);
571 t7 = VIS_LD_U16_I(table, (s3 << 1) & 0x1FE);
572 t6 = VIS_LD_U16_I(table, (s3 >> 7) & 0x1FE);
573 t5 = VIS_LD_U16_I(table, (s3 >> 15) & 0x1FE);
574 t4 = VIS_LD_U16_I(table, (s3 >> 23) & 0x1FE);
575 acc4 = vis_faligndata(t7, acc4);
576 acc4 = vis_faligndata(t6, acc4);
577 acc4 = vis_faligndata(t5, acc4);
578 acc4 = vis_faligndata(t4, acc4);
579 vis_alignaddr(dp, off);
580 *dp++ = vis_faligndata(acc0, acc1);
581 *dp++ = vis_faligndata(acc1, acc2);
582 *dp++ = vis_faligndata(acc2, acc3);
583 *dp++ = vis_faligndata(acc3, acc4);
584 acc0 = acc4; i+=16;
585 }
586
587 if (i <= xsize - 8) {
588 s0 = sa[0];
589 s1 = sa[1];
590 vis_alignaddr(dp, 6);
591 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE);
592 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE);
593 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE);
594 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE);
595 acc1 = vis_faligndata(t3, acc1);
596 acc1 = vis_faligndata(t2, acc1);
597 acc1 = vis_faligndata(t1, acc1);
598 acc1 = vis_faligndata(t0, acc1);
599 t7 = VIS_LD_U16_I(table, (s1 << 1) & 0x1FE);
600 t6 = VIS_LD_U16_I(table, (s1 >> 7) & 0x1FE);
601 t5 = VIS_LD_U16_I(table, (s1 >> 15) & 0x1FE);
602 t4 = VIS_LD_U16_I(table, (s1 >> 23) & 0x1FE);
603 acc2 = vis_faligndata(t7, acc2);
604 acc2 = vis_faligndata(t6, acc2);
605 acc2 = vis_faligndata(t5, acc2);
606 acc2 = vis_faligndata(t4, acc2);
607 vis_alignaddr(dp, off);
608 *dp++ = vis_faligndata(acc0, acc1);
609 *dp++ = vis_faligndata(acc1, acc2);
610 acc0 = acc2; i += 8; sa += 2;
611 }
612
613 if (i <= xsize - 4) {
614 s0 = *sa++;
615 vis_alignaddr(dp, 6);
616 t3 = VIS_LD_U16_I(table, (s0 << 1) & 0x1FE);
617 t2 = VIS_LD_U16_I(table, (s0 >> 7) & 0x1FE);
618 t1 = VIS_LD_U16_I(table, (s0 >> 15) & 0x1FE);
619 t0 = VIS_LD_U16_I(table, (s0 >> 23) & 0x1FE);
620 acc1 = vis_faligndata(t3, acc1);
621 acc1 = vis_faligndata(t2, acc1);
622 acc1 = vis_faligndata(t1, acc1);
623 acc1 = vis_faligndata(t0, acc1);
624 vis_alignaddr(dp, off);
625 *dp++ = vis_faligndata(acc0, acc1);
626 acc0 = acc1;
627 }
628
629 sp = (mlib_u8*)sa;
630
631 if ((mlib_addr) dp <= (mlib_addr) dend) {
632
633 num = (((mlib_u8*) dend - (mlib_u8*) dp) + off + 1) >> 1;
634 sp += (num - 1);
635 vis_alignaddr(dp, 6);
636#pragma pipeloop(0)
637 for (i = 0; i < num; i ++) {
638 s0 = (mlib_s32) *sp;
639 sp --;
640
641 t0 = VIS_LD_U16_I(table, 2*s0);
642 acc1 = vis_faligndata(t0, acc1);
643 }
644
645 vis_alignaddr(dp, off);
646 emask = vis_edge8(dp, dend);
647 vis_pst_8(vis_faligndata(acc0, acc1), dp++, emask);
648 }
649
650 if ((mlib_addr) dp <= (mlib_addr) dend) {
651 emask = vis_edge8(dp, dend);
652 vis_pst_8(vis_faligndata(acc1, acc1), dp++, emask);
653 }
654}
655
656/***************************************************************/
657void mlib_v_ImageLookUpSI_U8_U8_2_DstA8D1_SMALL(const mlib_u8 *src,
658 mlib_u8 *dst,
659 mlib_s32 xsize,
660 const mlib_u8 **table)
661{
662 mlib_u8 *sp; /* pointer to source data */
663 mlib_u32 s0, s1, s2, s3; /* source data */
664 mlib_u16 *dl; /* pointer to start of destination */
665 mlib_u16 *dend; /* pointer to end of destination */
666 mlib_d64 *dp; /* aligned pointer to destination */
667 mlib_d64 t0, t1, t2; /* destination data */
668 mlib_d64 t3, t4, t5; /* destination data */
669 mlib_d64 t6, t7, acc; /* destination data */
670 mlib_s32 emask; /* edge mask */
671 mlib_s32 i, num; /* loop variable */
672 const mlib_u8 *tab0 = table[0];
673 const mlib_u8 *tab1 = table[1];
674
675 sp = (void *)src;
676 dl = (mlib_u16*)dst;
677 dp = (mlib_d64 *) dl;
678 dend = dl + xsize - 1;
679
680 vis_alignaddr((void *) 0, 7);
681
682 if (xsize >= 4) {
683
684 s0 = sp[0];
685 s1 = sp[1];
686 s2 = sp[2];
687 s3 = sp[3];
688 sp += 4;
689
690#pragma pipeloop(0)
691 for(i = 0; i <= xsize - 8; i+=4, sp+=4) {
692 t7 = VIS_LD_U8_I(tab1, s3);
693 t6 = VIS_LD_U8_I(tab0, s3);
694 t5 = VIS_LD_U8_I(tab1, s2);
695 t4 = VIS_LD_U8_I(tab0, s2);
696 t3 = VIS_LD_U8_I(tab1, s1);
697 t2 = VIS_LD_U8_I(tab0, s1);
698 t1 = VIS_LD_U8_I(tab1, s0);
699 t0 = VIS_LD_U8_I(tab0, s0);
700 acc = vis_faligndata(t7, acc);
701 acc = vis_faligndata(t6, acc);
702 acc = vis_faligndata(t5, acc);
703 acc = vis_faligndata(t4, acc);
704 acc = vis_faligndata(t3, acc);
705 acc = vis_faligndata(t2, acc);
706 acc = vis_faligndata(t1, acc);
707 acc = vis_faligndata(t0, acc);
708 s0 = sp[0];
709 s1 = sp[1];
710 s2 = sp[2];
711 s3 = sp[3];
712 *dp++ = acc;
713 }
714
715 t7 = VIS_LD_U8_I(tab1, s3);
716 t6 = VIS_LD_U8_I(tab0, s3);
717 t5 = VIS_LD_U8_I(tab1, s2);
718 t4 = VIS_LD_U8_I(tab0, s2);
719 t3 = VIS_LD_U8_I(tab1, s1);
720 t2 = VIS_LD_U8_I(tab0, s1);
721 t1 = VIS_LD_U8_I(tab1, s0);
722 t0 = VIS_LD_U8_I(tab0, s0);
723 acc = vis_faligndata(t7, acc);
724 acc = vis_faligndata(t6, acc);
725 acc = vis_faligndata(t5, acc);
726 acc = vis_faligndata(t4, acc);
727 acc = vis_faligndata(t3, acc);
728 acc = vis_faligndata(t2, acc);
729 acc = vis_faligndata(t1, acc);
730 acc = vis_faligndata(t0, acc);
731 *dp++ = acc;
732 }
733
734 if ((mlib_addr) dp <= (mlib_addr) dend) {
735
736 num = (mlib_u16*) dend - (mlib_u16*) dp;
737 sp += num;
738 num ++;
739#pragma pipeloop(0)
740 for (i = 0; i < num; i ++) {
741 s0 = (mlib_s32) *sp;
742 sp --;
743
744 t0 = VIS_LD_U8_I(tab1, s0);
745 acc = vis_faligndata(t0, acc);
746
747 t0 = VIS_LD_U8_I(tab0, s0);
748 acc = vis_faligndata(t0, acc);
749 }
750
751 emask = vis_edge16(dp, dend);
752 vis_pst_16(acc, dp, emask);
753 }
754}
755
756/***************************************************************/
757void mlib_v_ImageLookUpSI_U8_U8_2_D1_SMALL(const mlib_u8 *src,
758 mlib_u8 *dst,
759 mlib_s32 xsize,
760 const mlib_u8 **table)
761{
762 mlib_u8 *sp; /* pointer to source data */
763 mlib_u32 s0, s1, s2, s3, s4; /* source data */
764 mlib_u8 *dl; /* pointer to start of destination */
765 mlib_u8 *dend; /* pointer to end of destination */
766 mlib_d64 *dp; /* aligned pointer to destination */
767 mlib_d64 t0, t1, t2; /* destination data */
768 mlib_d64 t3, t4, t5; /* destination data */
769 mlib_d64 t6, t7, acc; /* destination data */
770 mlib_s32 emask; /* edge mask */
771 mlib_s32 i, num; /* loop variable */
772 const mlib_u8 *tab0 = table[0];
773 const mlib_u8 *tab1 = table[1];
774
775 sp = (void *)src;
776 dl = dst;
777
778 dend = dl + 2 * xsize - 1;
779
780 vis_alignaddr((void *) 0, 7);
781
782 s0 = *sp++;
783 *dl++ = tab0[s0];
784 dp = (mlib_d64 *) dl;
785 xsize--;
786
787 if (xsize >= 4) {
788
789 s1 = sp[0];
790 s2 = sp[1];
791 s3 = sp[2];
792 s4 = sp[3];
793 sp += 4;
794
795#pragma pipeloop(0)
796 for(i = 0; i <= xsize - 8; i+=4, sp+=4) {
797 t7 = VIS_LD_U8_I(tab0, s4);
798 t6 = VIS_LD_U8_I(tab1, s3);
799 t5 = VIS_LD_U8_I(tab0, s3);
800 t4 = VIS_LD_U8_I(tab1, s2);
801 t3 = VIS_LD_U8_I(tab0, s2);
802 t2 = VIS_LD_U8_I(tab1, s1);
803 t1 = VIS_LD_U8_I(tab0, s1);
804 t0 = VIS_LD_U8_I(tab1, s0);
805 acc = vis_faligndata(t7, acc);
806 acc = vis_faligndata(t6, acc);
807 acc = vis_faligndata(t5, acc);
808 acc = vis_faligndata(t4, acc);
809 acc = vis_faligndata(t3, acc);
810 acc = vis_faligndata(t2, acc);
811 acc = vis_faligndata(t1, acc);
812 acc = vis_faligndata(t0, acc);
813 s0 = s4;
814 s1 = sp[0];
815 s2 = sp[1];
816 s3 = sp[2];
817 s4 = sp[3];
818 *dp++ = acc;
819 }
820
821 t7 = VIS_LD_U8_I(tab0, s4);
822 t6 = VIS_LD_U8_I(tab1, s3);
823 t5 = VIS_LD_U8_I(tab0, s3);
824 t4 = VIS_LD_U8_I(tab1, s2);
825 t3 = VIS_LD_U8_I(tab0, s2);
826 t2 = VIS_LD_U8_I(tab1, s1);
827 t1 = VIS_LD_U8_I(tab0, s1);
828 t0 = VIS_LD_U8_I(tab1, s0);
829 acc = vis_faligndata(t7, acc);
830 acc = vis_faligndata(t6, acc);
831 acc = vis_faligndata(t5, acc);
832 acc = vis_faligndata(t4, acc);
833 acc = vis_faligndata(t3, acc);
834 acc = vis_faligndata(t2, acc);
835 acc = vis_faligndata(t1, acc);
836 acc = vis_faligndata(t0, acc);
837 s0 = s4;
838 *dp++ = acc;
839 }
840
841 num = ((mlib_u8*) dend - (mlib_u8*) dp) >> 1;
842 sp += num;
843 num ++;
844
845#pragma pipeloop(0)
846 for (i = 0; i < num; i ++) {
847 s1 = (mlib_s32) *sp;
848 sp --;
849
850 t0 = VIS_LD_U8_I(tab1, s1);
851 acc = vis_faligndata(t0, acc);
852
853 t0 = VIS_LD_U8_I(tab0, s1);
854 acc = vis_faligndata(t0, acc);
855 }
856
857 t0 = VIS_LD_U8_I(tab1, s0);
858 acc = vis_faligndata(t0, acc);
859 emask = vis_edge8(dp, dend);
860 vis_pst_8(acc, dp, emask);
861}
862
863/***************************************************************/
864void mlib_v_ImageLookUpSI_U8_U8_2(const mlib_u8 *src,
865 mlib_s32 slb,
866 mlib_u8 *dst,
867 mlib_s32 dlb,
868 mlib_s32 xsize,
869 mlib_s32 ysize,
870 const mlib_u8 **table)
871{
872 if ((xsize * ysize) < 650) {
873 mlib_u8 *sl;
874 mlib_u8 *dl;
875 mlib_s32 i, j;
876
877 sl = (void *)src;
878 dl = dst;
879
880 /* row loop */
881 for (j = 0; j < ysize; j ++) {
882 mlib_u8 *sp = sl;
883 mlib_u8 *dp = dl;
884 mlib_s32 off, s0, size = xsize;
885
886 off = ((8 - ((mlib_addr)dp & 7)) & 7) >> 1;
887 off = (off < size) ? off : size;
888
889 for (i = 0; i < off; i++) {
890 s0 = *sp++;
891 *dp++ = table[0][s0];
892 *dp++ = table[1][s0];
893 size--;
894 }
895
896 if (size > 0) {
897
898 if (((mlib_addr)dp & 1) == 0) {
899 mlib_v_ImageLookUpSI_U8_U8_2_DstA8D1_SMALL(sp, dp, size, table);
900 } else {
901 mlib_v_ImageLookUpSI_U8_U8_2_D1_SMALL(sp, dp, size, table);
902 }
903 }
904
905 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
906 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
907 }
908
909 } else {
910 mlib_u8 *sl;
911 mlib_u8 *dl;
912 mlib_u16 tab[256];
913 const mlib_u8 *tab0 = table[0];
914 const mlib_u8 *tab1 = table[1];
915 mlib_s32 i, j, s0, s1, s2;
916
917 s0 = tab0[0];
918 s1 = tab1[0];
919 for (i = 1; i < 256; i++) {
920 s2 = (s0 << 8) + s1;
921 s0 = tab0[i];
922 s1 = tab1[i];
923 tab[i-1] = (mlib_u16)s2;
924 }
925
926 s2 = (s0 << 8) + s1;
927 tab[255] = (mlib_u16)s2;
928
929 sl = (void *)src;
930 dl = dst;
931
932 /* row loop */
933 for (j = 0; j < ysize; j ++) {
934 mlib_u8 *sp = sl;
935 mlib_u8 *dp = dl;
936 mlib_s32 off, s0, size = xsize;
937
938 if (((mlib_addr)dp & 1) == 0) {
939
940 off = ((8 - ((mlib_addr)dp & 7)) & 7) >> 1;
941 off = (off < size) ? off : size;
942
943 for (i = 0; i < off; i++) {
944 *(mlib_u16*)dp = tab[(*sp)];
945 dp += 2;
946 size--; sp++;
947 }
948
949 if (size > 0) {
950
951 off = (mlib_addr)sp & 3;
952
953 if (off == 0) {
954 mlib_v_ImageLookUpSI_U8_U8_2_SrcOff0_D1(sp, dp, size, tab);
955 } else if (off == 1) {
956 mlib_v_ImageLookUpSI_U8_U8_2_SrcOff1_D1(sp, dp, size, tab);
957 } else if (off == 2) {
958 mlib_v_ImageLookUpSI_U8_U8_2_SrcOff2_D1(sp, dp, size, tab);
959 } else {
960 mlib_v_ImageLookUpSI_U8_U8_2_SrcOff3_D1(sp, dp, size, tab);
961 }
962 }
963
964 } else {
965
966 off = ((4 - ((mlib_addr)sp & 3)) & 3);
967 off = (off < size) ? off : size;
968
969 for (i = 0; i < off; i++) {
970 s0 = tab[(*sp)];
971 *dp++ = (s0 >> 8);
972 *dp++ = (s0 & 0xFF);
973 size--; sp++;
974 }
975
976 if (size > 0) {
977 mlib_v_ImageLookUpSI_U8_U8_2_DstNonAl_D1(sp, dp, size, tab);
978 }
979 }
980
981 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
982 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
983 }
984 }
985}
986
987/***************************************************************/
988void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff0_D1(const mlib_u8 *src,
989 mlib_u8 *dst,
990 mlib_s32 xsize,
991 const mlib_d64 *table)
992{
993 mlib_u8 *sp; /* pointer to source data */
994 mlib_u32 *sa; /* aligned pointer to source data */
995 mlib_u32 s0; /* source data */
996 mlib_u8 *dl; /* pointer to start of destination */
997 mlib_f32 *dp; /* aligned pointer to destination */
998 mlib_d64 t0, t1, t2, t3; /* destination data */
999 mlib_d64 acc0, acc1; /* destination data */
1000 mlib_s32 i; /* loop variable */
1001 mlib_u8 *ptr;
1002
1003 dl = dst;
1004 dp = (mlib_f32 *) dl;
1005 sp = (void *)src;
1006 sa = (mlib_u32*)sp;
1007
1008 vis_alignaddr((void *) 0, 3);
1009
1010 i = 0;
1011
1012 if (xsize >= 4) {
1013
1014 s0 = *sa++;
1015
1016#pragma pipeloop(0)
1017 for(i = 0; i <= xsize - 8; i+=4, dp+=3) {
1018 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 21) & 0x7F8 ));
1019 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 13) & 0x7F8 ));
1020 t2 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 ));
1021 t3 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 ));
1022 acc0 = vis_faligndata(t0, t0);
1023 acc0 = vis_faligndata(acc0, t1);
1024 acc1 = vis_faligndata(acc0, acc0);
1025 acc0 = vis_faligndata(acc0, t2);
1026 acc1 = vis_faligndata(acc1, acc0);
1027 acc0 = vis_faligndata(acc0, t3);
1028 s0 = *sa++;
1029 dp[0] = vis_read_lo(acc1);
1030 dp[1] = vis_read_hi(acc0);
1031 dp[2] = vis_read_lo(acc0);
1032 }
1033
1034 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 21) & 0x7F8 ));
1035 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 13) & 0x7F8 ));
1036 t2 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 ));
1037 t3 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 ));
1038 acc0 = vis_faligndata(t0, t0);
1039 acc0 = vis_faligndata(acc0, t1);
1040 acc1 = vis_faligndata(acc0, acc0);
1041 acc0 = vis_faligndata(acc0, t2);
1042 acc1 = vis_faligndata(acc1, acc0);
1043 acc0 = vis_faligndata(acc0, t3);
1044 dp[0] = vis_read_lo(acc1);
1045 dp[1] = vis_read_hi(acc0);
1046 dp[2] = vis_read_lo(acc0);
1047 dp += 3;
1048 i += 4;
1049 }
1050
1051 dl = (mlib_u8*)dp;
1052
1053#pragma pipeloop(0)
1054 for (; i < xsize; i++) {
1055 ptr = (mlib_u8*)(table + src[i]);
1056 dl[0] = ptr[0];
1057 dl[1] = ptr[1];
1058 dl[2] = ptr[2];
1059 dl += 3;
1060 }
1061}
1062
1063/***************************************************************/
1064void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff1_D1(const mlib_u8 *src,
1065 mlib_u8 *dst,
1066 mlib_s32 xsize,
1067 const mlib_d64 *table)
1068{
1069 mlib_u8 *sp; /* pointer to source data */
1070 mlib_u32 *sa; /* aligned pointer to source data */
1071 mlib_u32 s0, s1; /* source data */
1072 mlib_u8 *dl; /* pointer to start of destination */
1073 mlib_f32 *dp; /* aligned pointer to destination */
1074 mlib_d64 t0, t1, t2, t3; /* destination data */
1075 mlib_d64 acc0, acc1; /* destination data */
1076 mlib_s32 i; /* loop variable */
1077 mlib_u8 *ptr;
1078
1079 dl = dst;
1080 dp = (mlib_f32 *) dl;
1081 sp = (void *)src;
1082 sa = (mlib_u32*)(sp - 1);
1083
1084 vis_alignaddr((void *) 0, 3);
1085
1086 i = 0;
1087 s0 = *sa++;
1088
1089 if (xsize >= 4) {
1090
1091 s1 = *sa++;
1092
1093#pragma pipeloop(0)
1094 for(i = 0; i <= xsize - 8; i+=4, dp+=3) {
1095 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 13) & 0x7F8 ));
1096 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 ));
1097 t2 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 ));
1098 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 ));
1099 acc0 = vis_faligndata(t0, t0);
1100 acc0 = vis_faligndata(acc0, t1);
1101 acc1 = vis_faligndata(acc0, acc0);
1102 acc0 = vis_faligndata(acc0, t2);
1103 acc1 = vis_faligndata(acc1, acc0);
1104 acc0 = vis_faligndata(acc0, t3);
1105 s0 = s1;
1106 s1 = *sa++;
1107 dp[0] = vis_read_lo(acc1);
1108 dp[1] = vis_read_hi(acc0);
1109 dp[2] = vis_read_lo(acc0);
1110 }
1111
1112 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 13) & 0x7F8 ));
1113 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 ));
1114 t2 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 ));
1115 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 ));
1116 acc0 = vis_faligndata(t0, t0);
1117 acc0 = vis_faligndata(acc0, t1);
1118 acc1 = vis_faligndata(acc0, acc0);
1119 acc0 = vis_faligndata(acc0, t2);
1120 acc1 = vis_faligndata(acc1, acc0);
1121 acc0 = vis_faligndata(acc0, t3);
1122 dp[0] = vis_read_lo(acc1);
1123 dp[1] = vis_read_hi(acc0);
1124 dp[2] = vis_read_lo(acc0);
1125 dp += 3;
1126 i += 4;
1127 }
1128
1129 dl = (mlib_u8*)dp;
1130
1131#pragma pipeloop(0)
1132 for (; i < xsize; i++) {
1133 ptr = (mlib_u8*)(table + src[i]);
1134 dl[0] = ptr[0];
1135 dl[1] = ptr[1];
1136 dl[2] = ptr[2];
1137 dl += 3;
1138 }
1139}
1140
1141/***************************************************************/
1142void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff2_D1(const mlib_u8 *src,
1143 mlib_u8 *dst,
1144 mlib_s32 xsize,
1145 const mlib_d64 *table)
1146{
1147 mlib_u8 *sp; /* pointer to source data */
1148 mlib_u32 *sa; /* aligned pointer to source data */
1149 mlib_u32 s0, s1; /* source data */
1150 mlib_u8 *dl; /* pointer to start of destination */
1151 mlib_f32 *dp; /* aligned pointer to destination */
1152 mlib_d64 t0, t1, t2, t3; /* destination data */
1153 mlib_d64 acc0, acc1; /* destination data */
1154 mlib_s32 i; /* loop variable */
1155 mlib_u8 *ptr;
1156
1157 dl = dst;
1158 dp = (mlib_f32 *) dl;
1159 sp = (void *)src;
1160 sa = (mlib_u32*)(sp - 2);
1161
1162 vis_alignaddr((void *) 0, 3);
1163
1164 i = 0;
1165 s0 = *sa++;
1166
1167 if (xsize >= 4) {
1168
1169 s1 = *sa++;
1170
1171#pragma pipeloop(0)
1172 for(i = 0; i <= xsize - 8; i+=4, dp+=3) {
1173 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 ));
1174 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 ));
1175 t2 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 ));
1176 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 13) & 0x7F8 ));
1177 acc0 = vis_faligndata(t0, t0);
1178 acc0 = vis_faligndata(acc0, t1);
1179 acc1 = vis_faligndata(acc0, acc0);
1180 acc0 = vis_faligndata(acc0, t2);
1181 acc1 = vis_faligndata(acc1, acc0);
1182 acc0 = vis_faligndata(acc0, t3);
1183 s0 = s1;
1184 s1 = *sa++;
1185 dp[0] = vis_read_lo(acc1);
1186 dp[1] = vis_read_hi(acc0);
1187 dp[2] = vis_read_lo(acc0);
1188 }
1189
1190 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 >> 5) & 0x7F8 ));
1191 t1 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 ));
1192 t2 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 ));
1193 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 13) & 0x7F8 ));
1194 acc0 = vis_faligndata(t0, t0);
1195 acc0 = vis_faligndata(acc0, t1);
1196 acc1 = vis_faligndata(acc0, acc0);
1197 acc0 = vis_faligndata(acc0, t2);
1198 acc1 = vis_faligndata(acc1, acc0);
1199 acc0 = vis_faligndata(acc0, t3);
1200 dp[0] = vis_read_lo(acc1);
1201 dp[1] = vis_read_hi(acc0);
1202 dp[2] = vis_read_lo(acc0);
1203 dp += 3;
1204 i += 4;
1205 }
1206
1207 dl = (mlib_u8*)dp;
1208
1209#pragma pipeloop(0)
1210 for (; i < xsize; i++) {
1211 ptr = (mlib_u8*)(table + src[i]);
1212 dl[0] = ptr[0];
1213 dl[1] = ptr[1];
1214 dl[2] = ptr[2];
1215 dl += 3;
1216 }
1217}
1218
1219/***************************************************************/
1220void mlib_v_ImageLookUpSI_U8_U8_3_SrcOff3_D1(const mlib_u8 *src,
1221 mlib_u8 *dst,
1222 mlib_s32 xsize,
1223 const mlib_d64 *table)
1224{
1225 mlib_u8 *sp; /* pointer to source data */
1226 mlib_u32 *sa; /* aligned pointer to source data */
1227 mlib_u32 s0, s1; /* source data */
1228 mlib_u8 *dl; /* pointer to start of destination */
1229 mlib_f32 *dp; /* aligned pointer to destination */
1230 mlib_d64 t0, t1, t2, t3; /* destination data */
1231 mlib_d64 acc0, acc1; /* destination data */
1232 mlib_s32 i; /* loop variable */
1233 mlib_u8 *ptr;
1234
1235 dl = dst;
1236 dp = (mlib_f32 *) dl;
1237 sp = (void *)src;
1238 sa = (mlib_u32*)(sp - 3);
1239
1240 vis_alignaddr((void *) 0, 3);
1241
1242 i = 0;
1243 s0 = *sa++;
1244
1245 if (xsize >= 4) {
1246
1247 s1 = *sa++;
1248
1249#pragma pipeloop(0)
1250 for(i = 0; i <= xsize - 8; i+=4, dp+=3) {
1251 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 ));
1252 t1 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 ));
1253 t2 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 13) & 0x7F8 ));
1254 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 5) & 0x7F8 ));
1255 acc0 = vis_faligndata(t0, t0);
1256 acc0 = vis_faligndata(acc0, t1);
1257 acc1 = vis_faligndata(acc0, acc0);
1258 acc0 = vis_faligndata(acc0, t2);
1259 acc1 = vis_faligndata(acc1, acc0);
1260 acc0 = vis_faligndata(acc0, t3);
1261 s0 = s1;
1262 s1 = *sa++;
1263 dp[0] = vis_read_lo(acc1);
1264 dp[1] = vis_read_hi(acc0);
1265 dp[2] = vis_read_lo(acc0);
1266 }
1267
1268 t0 = *(mlib_d64*)((mlib_u8*)table + ((s0 << 3) & 0x7F8 ));
1269 t1 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 21) & 0x7F8 ));
1270 t2 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 13) & 0x7F8 ));
1271 t3 = *(mlib_d64*)((mlib_u8*)table + ((s1 >> 5) & 0x7F8 ));
1272 acc0 = vis_faligndata(t0, t0);
1273 acc0 = vis_faligndata(acc0, t1);
1274 acc1 = vis_faligndata(acc0, acc0);
1275 acc0 = vis_faligndata(acc0, t2);
1276 acc1 = vis_faligndata(acc1, acc0);
1277 acc0 = vis_faligndata(acc0, t3);
1278 dp[0] = vis_read_lo(acc1);
1279 dp[1] = vis_read_hi(acc0);
1280 dp[2] = vis_read_lo(acc0);
1281 dp += 3;
1282 i += 4;
1283 }
1284
1285 dl = (mlib_u8*)dp;
1286
1287#pragma pipeloop(0)
1288 for (; i < xsize; i++) {
1289 ptr = (mlib_u8*)(table + src[i]);
1290 dl[0] = ptr[0];
1291 dl[1] = ptr[1];
1292 dl[2] = ptr[2];
1293 dl += 3;
1294 }
1295}
1296
1297/***************************************************************/
1298void mlib_v_ImageLookUpSI_U8_U8_3_D1_SMALL(const mlib_u8 *src,
1299 mlib_u8 *dst,
1300 mlib_s32 xsize,
1301 const mlib_u8 **table)
1302{
1303 mlib_u8 *sp; /* pointer to source data */
1304 mlib_u8 *dl; /* pointer to start of destination */
1305 mlib_d64 *dp; /* aligned pointer to destination */
1306 mlib_d64 t0, t1, t2; /* destination data */
1307 mlib_d64 t3, t4, t5; /* destination data */
1308 mlib_d64 t6, t7; /* destination data */
1309 mlib_d64 acc0, acc1, acc2; /* destination data */
1310 mlib_s32 i; /* loop variable */
1311 const mlib_u8 *tab0 = table[0];
1312 const mlib_u8 *tab1 = table[1];
1313 const mlib_u8 *tab2 = table[2];
1314 mlib_u32 s00, s01, s02, s03;
1315 mlib_u32 s10, s11, s12, s13;
1316
1317 sp = (void *)src;
1318 dl = dst;
1319 dp = (mlib_d64 *) dl;
1320
1321 vis_alignaddr((void *) 0, 7);
1322
1323 i = 0;
1324
1325 if (xsize >= 8) {
1326
1327 s00 = sp[0];
1328 s01 = sp[1];
1329 s02 = sp[2];
1330 s03 = sp[3];
1331 s10 = sp[4];
1332 s11 = sp[5];
1333 s12 = sp[6];
1334 s13 = sp[7];
1335 sp += 8;
1336
1337#pragma pipeloop(0)
1338 for(i = 0; i <= xsize - 16; i+=8, sp+=8) {
1339 t7 = VIS_LD_U8_I(tab1, s02);
1340 t6 = VIS_LD_U8_I(tab0, s02);
1341 t5 = VIS_LD_U8_I(tab2, s01);
1342 t4 = VIS_LD_U8_I(tab1, s01);
1343 t3 = VIS_LD_U8_I(tab0, s01);
1344 t2 = VIS_LD_U8_I(tab2, s00);
1345 t1 = VIS_LD_U8_I(tab1, s00);
1346 t0 = VIS_LD_U8_I(tab0, s00);
1347 acc0 = vis_faligndata(t7, acc0);
1348 acc0 = vis_faligndata(t6, acc0);
1349 acc0 = vis_faligndata(t5, acc0);
1350 acc0 = vis_faligndata(t4, acc0);
1351 acc0 = vis_faligndata(t3, acc0);
1352 acc0 = vis_faligndata(t2, acc0);
1353 acc0 = vis_faligndata(t1, acc0);
1354 acc0 = vis_faligndata(t0, acc0);
1355 t7 = VIS_LD_U8_I(tab0, s11);
1356 t6 = VIS_LD_U8_I(tab2, s10);
1357 t5 = VIS_LD_U8_I(tab1, s10);
1358 t4 = VIS_LD_U8_I(tab0, s10);
1359 t3 = VIS_LD_U8_I(tab2, s03);
1360 t2 = VIS_LD_U8_I(tab1, s03);
1361 t1 = VIS_LD_U8_I(tab0, s03);
1362 t0 = VIS_LD_U8_I(tab2, s02);
1363 acc1 = vis_faligndata(t7, acc1);
1364 acc1 = vis_faligndata(t6, acc1);
1365 acc1 = vis_faligndata(t5, acc1);
1366 acc1 = vis_faligndata(t4, acc1);
1367 acc1 = vis_faligndata(t3, acc1);
1368 acc1 = vis_faligndata(t2, acc1);
1369 acc1 = vis_faligndata(t1, acc1);
1370 acc1 = vis_faligndata(t0, acc1);
1371 t7 = VIS_LD_U8_I(tab2, s13);
1372 t6 = VIS_LD_U8_I(tab1, s13);
1373 t5 = VIS_LD_U8_I(tab0, s13);
1374 t4 = VIS_LD_U8_I(tab2, s12);
1375 t3 = VIS_LD_U8_I(tab1, s12);
1376 t2 = VIS_LD_U8_I(tab0, s12);
1377 t1 = VIS_LD_U8_I(tab2, s11);
1378 t0 = VIS_LD_U8_I(tab1, s11);
1379 acc2 = vis_faligndata(t7, acc2);
1380 acc2 = vis_faligndata(t6, acc2);
1381 acc2 = vis_faligndata(t5, acc2);
1382 acc2 = vis_faligndata(t4, acc2);
1383 acc2 = vis_faligndata(t3, acc2);
1384 acc2 = vis_faligndata(t2, acc2);
1385 acc2 = vis_faligndata(t1, acc2);
1386 acc2 = vis_faligndata(t0, acc2);
1387 s00 = sp[0];
1388 s01 = sp[1];
1389 s02 = sp[2];
1390 s03 = sp[3];
1391 s10 = sp[4];
1392 s11 = sp[5];
1393 s12 = sp[6];
1394 s13 = sp[7];
1395 *dp++ = acc0;
1396 *dp++ = acc1;
1397 *dp++ = acc2;
1398 }
1399
1400 t7 = VIS_LD_U8_I(tab1, s02);
1401 t6 = VIS_LD_U8_I(tab0, s02);
1402 t5 = VIS_LD_U8_I(tab2, s01);
1403 t4 = VIS_LD_U8_I(tab1, s01);
1404 t3 = VIS_LD_U8_I(tab0, s01);
1405 t2 = VIS_LD_U8_I(tab2, s00);
1406 t1 = VIS_LD_U8_I(tab1, s00);
1407 t0 = VIS_LD_U8_I(tab0, s00);
1408 acc0 = vis_faligndata(t7, acc0);
1409 acc0 = vis_faligndata(t6, acc0);
1410 acc0 = vis_faligndata(t5, acc0);
1411 acc0 = vis_faligndata(t4, acc0);
1412 acc0 = vis_faligndata(t3, acc0);
1413 acc0 = vis_faligndata(t2, acc0);
1414 acc0 = vis_faligndata(t1, acc0);
1415 acc0 = vis_faligndata(t0, acc0);
1416 t7 = VIS_LD_U8_I(tab0, s11);
1417 t6 = VIS_LD_U8_I(tab2, s10);
1418 t5 = VIS_LD_U8_I(tab1, s10);
1419 t4 = VIS_LD_U8_I(tab0, s10);
1420 t3 = VIS_LD_U8_I(tab2, s03);
1421 t2 = VIS_LD_U8_I(tab1, s03);
1422 t1 = VIS_LD_U8_I(tab0, s03);
1423 t0 = VIS_LD_U8_I(tab2, s02);
1424 acc1 = vis_faligndata(t7, acc1);
1425 acc1 = vis_faligndata(t6, acc1);
1426 acc1 = vis_faligndata(t5, acc1);
1427 acc1 = vis_faligndata(t4, acc1);
1428 acc1 = vis_faligndata(t3, acc1);
1429 acc1 = vis_faligndata(t2, acc1);
1430 acc1 = vis_faligndata(t1, acc1);
1431 acc1 = vis_faligndata(t0, acc1);
1432 t7 = VIS_LD_U8_I(tab2, s13);
1433 t6 = VIS_LD_U8_I(tab1, s13);
1434 t5 = VIS_LD_U8_I(tab0, s13);
1435 t4 = VIS_LD_U8_I(tab2, s12);
1436 t3 = VIS_LD_U8_I(tab1, s12);
1437 t2 = VIS_LD_U8_I(tab0, s12);
1438 t1 = VIS_LD_U8_I(tab2, s11);
1439 t0 = VIS_LD_U8_I(tab1, s11);
1440 acc2 = vis_faligndata(t7, acc2);
1441 acc2 = vis_faligndata(t6, acc2);
1442 acc2 = vis_faligndata(t5, acc2);
1443 acc2 = vis_faligndata(t4, acc2);
1444 acc2 = vis_faligndata(t3, acc2);
1445 acc2 = vis_faligndata(t2, acc2);
1446 acc2 = vis_faligndata(t1, acc2);
1447 acc2 = vis_faligndata(t0, acc2);
1448 *dp++ = acc0;
1449 *dp++ = acc1;
1450 *dp++ = acc2;
1451 i += 8;
1452 }
1453
1454 dl = (mlib_u8*)dp;
1455
1456#pragma pipeloop(0)
1457 for (; i < xsize; i++) {
1458 s00 = sp[0];
1459 dl[0] = tab0[s00];
1460 dl[1] = tab1[s00];
1461 dl[2] = tab2[s00];
1462 dl += 3; sp ++;
1463 }
1464}
1465
1466/***************************************************************/
1467void mlib_v_ImageLookUpSI_U8_U8_3(const mlib_u8 *src,
1468 mlib_s32 slb,
1469 mlib_u8 *dst,
1470 mlib_s32 dlb,
1471 mlib_s32 xsize,
1472 mlib_s32 ysize,
1473 const mlib_u8 **table)
1474{
1475 if ((xsize * ysize) < 650) {
1476 mlib_u8 *sl;
1477 mlib_u8 *dl;
1478 mlib_s32 i, j;
1479 const mlib_u8 *tab0 = table[0];
1480 const mlib_u8 *tab1 = table[1];
1481 const mlib_u8 *tab2 = table[2];
1482
1483 sl = (void *)src;
1484 dl = dst;
1485
1486 /* row loop */
1487 for (j = 0; j < ysize; j ++) {
1488 mlib_u8 *sp = sl;
1489 mlib_u8 *dp = dl;
1490 mlib_s32 off, s0, size = xsize;
1491
1492 off = (mlib_addr)dp & 7;
1493 off = (off * 5) & 7;
1494 off = (off < size) ? off : size;
1495
1496 for (i = 0; i < off; i++) {
1497 s0 = *sp++;
1498 *dp++ = tab0[s0];
1499 *dp++ = tab1[s0];
1500 *dp++ = tab2[s0];
1501 size--;
1502 }
1503
1504 if (size > 0) {
1505 mlib_v_ImageLookUpSI_U8_U8_3_D1_SMALL(sp, dp, size, table);
1506 }
1507
1508 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
1509 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
1510 }
1511
1512 } else {
1513 mlib_u8 *sl;
1514 mlib_u8 *dl;
1515 mlib_u32 tab[512];
1516 const mlib_u8 *tab0 = table[0];
1517 const mlib_u8 *tab1 = table[1];
1518 const mlib_u8 *tab2 = table[2];
1519 mlib_s32 i, j;
1520 mlib_u32 s0, s1, s2, s3;
1521
1522 s0 = tab0[0];
1523 s1 = tab1[0];
1524 s2 = tab2[0];
1525 for (i = 1; i < 256; i++) {
1526 s3 = (s0 << 24) + (s1 << 16) + (s2 << 8);
1527 s0 = tab0[i];
1528 s1 = tab1[i];
1529 s2 = tab2[i];
1530 tab[2*i-2] = s3;
1531 }
1532
1533 s3 = (s0 << 24) + (s1 << 16) + (s2 << 8);
1534 tab[510] = s3;
1535
1536 sl = (void *)src;
1537 dl = dst;
1538
1539 /* row loop */
1540 for (j = 0; j < ysize; j ++) {
1541 mlib_u8 *sp = sl;
1542 mlib_u8 *dp = dl;
1543 mlib_s32 off, size = xsize;
1544 mlib_u8 *ptr;
1545
1546 off = ((mlib_addr)dp & 3);
1547 off = (off < size) ? off : size;
1548
1549#pragma pipeloop(0)
1550 for (i = 0; i < off; i++) {
1551 ptr = (mlib_u8*)(tab + 2*sp[i]);
1552 dp[0] = ptr[0];
1553 dp[1] = ptr[1];
1554 dp[2] = ptr[2];
1555 dp += 3;
1556 }
1557
1558 size -= off;
1559 sp += off;
1560
1561 if (size > 0) {
1562 off = (mlib_addr)sp & 3;
1563
1564 if (off == 0) {
1565 mlib_v_ImageLookUpSI_U8_U8_3_SrcOff0_D1(sp, dp, size, (mlib_d64*)tab);
1566 } else if (off == 1) {
1567 mlib_v_ImageLookUpSI_U8_U8_3_SrcOff1_D1(sp, dp, size, (mlib_d64*)tab);
1568 } else if (off == 2) {
1569 mlib_v_ImageLookUpSI_U8_U8_3_SrcOff2_D1(sp, dp, size, (mlib_d64*)tab);
1570 } else if (off == 3) {
1571 mlib_v_ImageLookUpSI_U8_U8_3_SrcOff3_D1(sp, dp, size, (mlib_d64*)tab);
1572 }
1573 }
1574
1575 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
1576 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
1577 }
1578 }
1579}
1580
1581/***************************************************************/
1582void mlib_v_ImageLookUpSI_U8_U8_4_SrcOff0_D1(const mlib_u8 *src,
1583 mlib_u8 *dst,
1584 mlib_s32 xsize,
1585 const mlib_f32 *table)
1586{
1587 mlib_u32 *sa; /* aligned pointer to source data */
1588 mlib_u8 *sp; /* pointer to source data */
1589 mlib_u32 s0; /* source data */
1590 mlib_f32 *dp; /* aligned pointer to destination */
1591 mlib_f32 acc0, acc1; /* destination data */
1592 mlib_f32 acc2, acc3; /* destination data */
1593 mlib_s32 i; /* loop variable */
1594 mlib_u32 s00, s01, s02, s03;
1595
1596 sa = (mlib_u32*)src;
1597 dp = (mlib_f32 *) dst;
1598
1599 i = 0;
1600
1601 if (xsize >= 4) {
1602
1603 s0 = *sa++;
1604 s00 = (s0 >> 22) & 0x3FC;
1605 s01 = (s0 >> 14) & 0x3FC;
1606
1607#pragma pipeloop(0)
1608 for(i = 0; i <= xsize - 8; i+=4, dp += 4) {
1609 s02 = (s0 >> 6) & 0x3FC;
1610 s03 = (s0 << 2) & 0x3FC;
1611 acc0 = *(mlib_f32*)((mlib_u8*)table + s00);
1612 acc1 = *(mlib_f32*)((mlib_u8*)table + s01);
1613 acc2 = *(mlib_f32*)((mlib_u8*)table + s02);
1614 acc3 = *(mlib_f32*)((mlib_u8*)table + s03);
1615 s0 = *sa++;
1616 s00 = (s0 >> 22) & 0x3FC;
1617 s01 = (s0 >> 14) & 0x3FC;
1618 dp[0] = acc0;
1619 dp[1] = acc1;
1620 dp[2] = acc2;
1621 dp[3] = acc3;
1622 }
1623
1624 s02 = (s0 >> 6) & 0x3FC;
1625 s03 = (s0 << 2) & 0x3FC;
1626 acc0 = *(mlib_f32*)((mlib_u8*)table + s00);
1627 acc1 = *(mlib_f32*)((mlib_u8*)table + s01);
1628 acc2 = *(mlib_f32*)((mlib_u8*)table + s02);
1629 acc3 = *(mlib_f32*)((mlib_u8*)table + s03);
1630 dp[0] = acc0;
1631 dp[1] = acc1;
1632 dp[2] = acc2;
1633 dp[3] = acc3;
1634 dp += 4;
1635 i += 4;
1636 }
1637
1638 sp = (mlib_u8*)sa;
1639
1640 if ( i <= xsize - 2) {
1641 *dp++ = table[sp[0]];
1642 *dp++ = table[sp[1]];
1643 i+=2; sp += 2;
1644 }
1645
1646 if ( i < xsize) *dp = table[sp[0]];
1647}
1648
1649/***************************************************************/
1650void mlib_v_ImageLookUpSI_U8_U8_4_DstNonAl_D1(const mlib_u8 *src,
1651 mlib_u8 *dst,
1652 mlib_s32 xsize,
1653 const mlib_f32 *table)
1654{
1655 mlib_u32 *sa; /* aligned pointer to source data */
1656 mlib_u8 *sp; /* pointer to source data */
1657 mlib_u32 s0; /* source data */
1658 mlib_u8 *dl; /* pointer to start of destination */
1659 mlib_d64 *dp; /* aligned pointer to destination */
1660 mlib_d64 acc0, acc1, acc2; /* destination data */
1661 mlib_s32 i; /* loop variable */
1662 mlib_u8 *dend; /* pointer to end of destination */
1663 mlib_s32 emask; /* edge mask */
1664 mlib_s32 off;
1665 mlib_u32 s00, s01, s02, s03;
1666
1667 sa = (mlib_u32*)src;
1668 sp = (void *)src;
1669 dl = dst;
1670 dend = dl + (xsize << 2) - 1;
1671 dp = (mlib_d64 *) ((mlib_addr) dl & (~7));
1672 off = (mlib_addr) dp - (mlib_addr) dl;
1673 vis_alignaddr(dp, off);
1674
1675 emask = vis_edge8(dl, dend);
1676 acc0 = vis_freg_pair(table[sp[0]], table[sp[1]]);
1677 vis_pst_8(vis_faligndata(acc0, acc0), dp++, emask);
1678 sp += 2;
1679
1680 xsize -= 2;
1681
1682 if (xsize >= 2) {
1683 acc1 = vis_freg_pair(table[sp[0]], table[sp[1]]);
1684 *dp++ = vis_faligndata(acc0, acc1);
1685 acc0 = acc1;
1686 sp += 2; xsize -= 2;
1687 }
1688
1689 sa++;
1690
1691 i = 0;
1692
1693 if (xsize >= 4) {
1694
1695 s0 = *sa++;
1696 s00 = (s0 >> 22) & 0x3FC;
1697 s01 = (s0 >> 14) & 0x3FC;
1698
1699#pragma pipeloop(0)
1700 for(i = 0; i <= xsize - 8; i+=4, dp += 2) {
1701 s02 = (s0 >> 6) & 0x3FC;
1702 s03 = (s0 << 2) & 0x3FC;
1703 acc1 = vis_freg_pair(*(mlib_f32*)((mlib_u8*)table + s00),
1704 *(mlib_f32*)((mlib_u8*)table + s01));
1705 acc2 = vis_freg_pair(*(mlib_f32*)((mlib_u8*)table + s02),
1706 *(mlib_f32*)((mlib_u8*)table + s03));
1707 s0 = *sa++;
1708 s00 = (s0 >> 22) & 0x3FC;
1709 s01 = (s0 >> 14) & 0x3FC;
1710 dp[0] = vis_faligndata(acc0, acc1);
1711 dp[1] = vis_faligndata(acc1, acc2);
1712 acc0 = acc2;
1713 }
1714
1715 s02 = (s0 >> 6) & 0x3FC;
1716 s03 = (s0 << 2) & 0x3FC;
1717 acc1 = vis_freg_pair(*(mlib_f32*)((mlib_u8*)table + s00),
1718 *(mlib_f32*)((mlib_u8*)table + s01));
1719 acc2 = vis_freg_pair(*(mlib_f32*)((mlib_u8*)table + s02),
1720 *(mlib_f32*)((mlib_u8*)table + s03));
1721 dp[0] = vis_faligndata(acc0, acc1);
1722 dp[1] = vis_faligndata(acc1, acc2);
1723 acc0 = acc2;
1724 sp = (mlib_u8*)sa;
1725 dp += 2;
1726 i += 4;
1727 }
1728
1729 if ( i <= xsize - 2) {
1730 acc1 = vis_freg_pair(table[sp[0]], table[sp[1]]);
1731 *dp++ = vis_faligndata(acc0, acc1);
1732 acc0 = acc1;
1733 i+=2; sp += 2;
1734 }
1735
1736 if ((mlib_addr) dp <= (mlib_addr) dend) {
1737 emask = vis_edge8(dp, dend);
1738 acc1 = vis_freg_pair(table[sp[0]], table[sp[1]]);
1739 vis_pst_8(vis_faligndata(acc0, acc1), dp++, emask);
1740 }
1741
1742 if ((mlib_addr) dp <= (mlib_addr) dend) {
1743 emask = vis_edge8(dp, dend);
1744 vis_pst_8(vis_faligndata(acc1, acc1), dp++, emask);
1745 }
1746}
1747
1748/***************************************************************/
1749void mlib_v_ImageLookUpSI_U8_U8_4_DstOff0_D1_SMALL(const mlib_u8 *src,
1750 mlib_u8 *dst,
1751 mlib_s32 xsize,
1752 const mlib_u8 **table)
1753{
1754 mlib_u8 *sp; /* pointer to source data */
1755 mlib_u32 s0, s1; /* source data */
1756 mlib_u8 *dl; /* pointer to start of destination */
1757 mlib_d64 *dp; /* aligned pointer to destination */
1758 mlib_d64 t0, t1, t2; /* destination data */
1759 mlib_d64 t3, t4, t5; /* destination data */
1760 mlib_d64 t6, t7, acc; /* destination data */
1761 mlib_s32 i; /* loop variable */
1762 const mlib_u8 *tab0 = table[0];
1763 const mlib_u8 *tab1 = table[1];
1764 const mlib_u8 *tab2 = table[2];
1765 const mlib_u8 *tab3 = table[3];
1766
1767 sp = (void *)src;
1768 dl = dst;
1769 dp = (mlib_d64 *) dl;
1770
1771 vis_alignaddr((void *) 0, 7);
1772
1773 if (xsize >= 2) {
1774
1775 s0 = sp[0];
1776 s1 = sp[1];
1777 sp += 2;
1778
1779#pragma pipeloop(0)
1780 for(i = 0; i <= xsize - 4; i+=2, sp+=2) {
1781 t7 = VIS_LD_U8_I(tab3, s1);
1782 t6 = VIS_LD_U8_I(tab2, s1);
1783 t5 = VIS_LD_U8_I(tab1, s1);
1784 t4 = VIS_LD_U8_I(tab0, s1);
1785 t3 = VIS_LD_U8_I(tab3, s0);
1786 t2 = VIS_LD_U8_I(tab2, s0);
1787 t1 = VIS_LD_U8_I(tab1, s0);
1788 t0 = VIS_LD_U8_I(tab0, s0);
1789 acc = vis_faligndata(t7, acc);
1790 acc = vis_faligndata(t6, acc);
1791 acc = vis_faligndata(t5, acc);
1792 acc = vis_faligndata(t4, acc);
1793 acc = vis_faligndata(t3, acc);
1794 acc = vis_faligndata(t2, acc);
1795 acc = vis_faligndata(t1, acc);
1796 acc = vis_faligndata(t0, acc);
1797 s0 = sp[0];
1798 s1 = sp[1];
1799 *dp++ = acc;
1800 }
1801
1802 t7 = VIS_LD_U8_I(tab3, s1);
1803 t6 = VIS_LD_U8_I(tab2, s1);
1804 t5 = VIS_LD_U8_I(tab1, s1);
1805 t4 = VIS_LD_U8_I(tab0, s1);
1806 t3 = VIS_LD_U8_I(tab3, s0);
1807 t2 = VIS_LD_U8_I(tab2, s0);
1808 t1 = VIS_LD_U8_I(tab1, s0);
1809 t0 = VIS_LD_U8_I(tab0, s0);
1810 acc = vis_faligndata(t7, acc);
1811 acc = vis_faligndata(t6, acc);
1812 acc = vis_faligndata(t5, acc);
1813 acc = vis_faligndata(t4, acc);
1814 acc = vis_faligndata(t3, acc);
1815 acc = vis_faligndata(t2, acc);
1816 acc = vis_faligndata(t1, acc);
1817 acc = vis_faligndata(t0, acc);
1818 *dp++ = acc;
1819 }
1820
1821 if ((xsize & 1) != 0) {
1822 s0 = sp[0];
1823 t7 = VIS_LD_U8_I(tab3, s0);
1824 t6 = VIS_LD_U8_I(tab2, s0);
1825 t5 = VIS_LD_U8_I(tab1, s0);
1826 t4 = VIS_LD_U8_I(tab0, s0);
1827 acc = vis_faligndata(t7, acc);
1828 acc = vis_faligndata(t6, acc);
1829 acc = vis_faligndata(t5, acc);
1830 acc = vis_faligndata(t4, acc);
1831 *(mlib_f32*)dp = vis_read_hi(acc);
1832 }
1833}
1834
1835/***************************************************************/
1836void mlib_v_ImageLookUpSI_U8_U8_4_DstOff1_D1_SMALL(const mlib_u8 *src,
1837 mlib_u8 *dst,
1838 mlib_s32 xsize,
1839 const mlib_u8 **table)
1840{
1841 mlib_u8 *sp; /* pointer to source data */
1842 mlib_u32 s0, s1, s2; /* source data */
1843 mlib_u8 *dl; /* pointer to start of destination */
1844 mlib_d64 *dp; /* aligned pointer to destination */
1845 mlib_d64 t0, t1, t2; /* destination data */
1846 mlib_d64 t3, t4, t5; /* destination data */
1847 mlib_d64 t6, t7, acc; /* destination data */
1848 mlib_s32 i; /* loop variable */
1849 const mlib_u8 *tab0 = table[0];
1850 const mlib_u8 *tab1 = table[1];
1851 const mlib_u8 *tab2 = table[2];
1852 const mlib_u8 *tab3 = table[3];
1853
1854 sp = (void *)src;
1855 dl = dst;
1856 dp = (mlib_d64 *) dl;
1857
1858 vis_alignaddr((void *) 0, 7);
1859
1860 s0 = *sp++;
1861
1862 if (xsize >= 2) {
1863
1864 s1 = sp[0];
1865 s2 = sp[1];
1866 sp += 2;
1867
1868#pragma pipeloop(0)
1869 for(i = 0; i <= xsize - 4; i+=2, sp+=2) {
1870 t7 = VIS_LD_U8_I(tab0, s2);
1871 t6 = VIS_LD_U8_I(tab3, s1);
1872 t5 = VIS_LD_U8_I(tab2, s1);
1873 t4 = VIS_LD_U8_I(tab1, s1);
1874 t3 = VIS_LD_U8_I(tab0, s1);
1875 t2 = VIS_LD_U8_I(tab3, s0);
1876 t1 = VIS_LD_U8_I(tab2, s0);
1877 t0 = VIS_LD_U8_I(tab1, s0);
1878 acc = vis_faligndata(t7, acc);
1879 acc = vis_faligndata(t6, acc);
1880 acc = vis_faligndata(t5, acc);
1881 acc = vis_faligndata(t4, acc);
1882 acc = vis_faligndata(t3, acc);
1883 acc = vis_faligndata(t2, acc);
1884 acc = vis_faligndata(t1, acc);
1885 acc = vis_faligndata(t0, acc);
1886 s0 = s2;
1887 s1 = sp[0];
1888 s2 = sp[1];
1889 *dp++ = acc;
1890 }
1891
1892 t7 = VIS_LD_U8_I(tab0, s2);
1893 t6 = VIS_LD_U8_I(tab3, s1);
1894 t5 = VIS_LD_U8_I(tab2, s1);
1895 t4 = VIS_LD_U8_I(tab1, s1);
1896 t3 = VIS_LD_U8_I(tab0, s1);
1897 t2 = VIS_LD_U8_I(tab3, s0);
1898 t1 = VIS_LD_U8_I(tab2, s0);
1899 t0 = VIS_LD_U8_I(tab1, s0);
1900 acc = vis_faligndata(t7, acc);
1901 acc = vis_faligndata(t6, acc);
1902 acc = vis_faligndata(t5, acc);
1903 acc = vis_faligndata(t4, acc);
1904 acc = vis_faligndata(t3, acc);
1905 acc = vis_faligndata(t2, acc);
1906 acc = vis_faligndata(t1, acc);
1907 acc = vis_faligndata(t0, acc);
1908 s0 = s2;
1909 *dp++ = acc;
1910 }
1911
1912 dl = (mlib_u8*)dp;
1913
1914 if ((xsize & 1) != 0) {
1915 s1 = sp[0];
1916 t7 = VIS_LD_U8_I(tab0, s1);
1917 t6 = VIS_LD_U8_I(tab3, s0);
1918 t5 = VIS_LD_U8_I(tab2, s0);
1919 t4 = VIS_LD_U8_I(tab1, s0);
1920 acc = vis_faligndata(t7, acc);
1921 acc = vis_faligndata(t6, acc);
1922 acc = vis_faligndata(t5, acc);
1923 acc = vis_faligndata(t4, acc);
1924 *(mlib_f32*)dl = vis_read_hi(acc);
1925 dl += 4;
1926 s0 = s1;
1927 }
1928
1929 dl[0] = tab1[s0];
1930 dl[1] = tab2[s0];
1931 dl[2] = tab3[s0];
1932}
1933
1934/***************************************************************/
1935void mlib_v_ImageLookUpSI_U8_U8_4_DstOff2_D1_SMALL(const mlib_u8 *src,
1936 mlib_u8 *dst,
1937 mlib_s32 xsize,
1938 const mlib_u8 **table)
1939{
1940 mlib_u8 *sp; /* pointer to source data */
1941 mlib_u32 s0, s1, s2; /* source data */
1942 mlib_u8 *dl; /* pointer to start of destination */
1943 mlib_d64 *dp; /* aligned pointer to destination */
1944 mlib_d64 t0, t1, t2; /* destination data */
1945 mlib_d64 t3, t4, t5; /* destination data */
1946 mlib_d64 t6, t7, acc; /* destination data */
1947 mlib_s32 i; /* loop variable */
1948 const mlib_u8 *tab0 = table[0];
1949 const mlib_u8 *tab1 = table[1];
1950 const mlib_u8 *tab2 = table[2];
1951 const mlib_u8 *tab3 = table[3];
1952
1953 sp = (void *)src;
1954 dl = dst;
1955 dp = (mlib_d64 *) dl;
1956
1957 vis_alignaddr((void *) 0, 7);
1958
1959 s0 = *sp++;
1960
1961 if (xsize >= 2) {
1962
1963 s1 = sp[0];
1964 s2 = sp[1];
1965 sp += 2;
1966
1967#pragma pipeloop(0)
1968 for(i = 0; i <= xsize - 4; i+=2, sp+=2) {
1969 t7 = VIS_LD_U8_I(tab1, s2);
1970 t6 = VIS_LD_U8_I(tab0, s2);
1971 t5 = VIS_LD_U8_I(tab3, s1);
1972 t4 = VIS_LD_U8_I(tab2, s1);
1973 t3 = VIS_LD_U8_I(tab1, s1);
1974 t2 = VIS_LD_U8_I(tab0, s1);
1975 t1 = VIS_LD_U8_I(tab3, s0);
1976 t0 = VIS_LD_U8_I(tab2, s0);
1977 acc = vis_faligndata(t7, acc);
1978 acc = vis_faligndata(t6, acc);
1979 acc = vis_faligndata(t5, acc);
1980 acc = vis_faligndata(t4, acc);
1981 acc = vis_faligndata(t3, acc);
1982 acc = vis_faligndata(t2, acc);
1983 acc = vis_faligndata(t1, acc);
1984 acc = vis_faligndata(t0, acc);
1985 s0 = s2;
1986 s1 = sp[0];
1987 s2 = sp[1];
1988 *dp++ = acc;
1989 }
1990
1991 t7 = VIS_LD_U8_I(tab1, s2);
1992 t6 = VIS_LD_U8_I(tab0, s2);
1993 t5 = VIS_LD_U8_I(tab3, s1);
1994 t4 = VIS_LD_U8_I(tab2, s1);
1995 t3 = VIS_LD_U8_I(tab1, s1);
1996 t2 = VIS_LD_U8_I(tab0, s1);
1997 t1 = VIS_LD_U8_I(tab3, s0);
1998 t0 = VIS_LD_U8_I(tab2, s0);
1999 acc = vis_faligndata(t7, acc);
2000 acc = vis_faligndata(t6, acc);
2001 acc = vis_faligndata(t5, acc);
2002 acc = vis_faligndata(t4, acc);
2003 acc = vis_faligndata(t3, acc);
2004 acc = vis_faligndata(t2, acc);
2005 acc = vis_faligndata(t1, acc);
2006 acc = vis_faligndata(t0, acc);
2007 s0 = s2;
2008 *dp++ = acc;
2009 }
2010
2011 dl = (mlib_u8*)dp;
2012
2013 if ((xsize & 1) != 0) {
2014 s1 = sp[0];
2015 t7 = VIS_LD_U8_I(tab1, s1);
2016 t6 = VIS_LD_U8_I(tab0, s1);
2017 t5 = VIS_LD_U8_I(tab3, s0);
2018 t4 = VIS_LD_U8_I(tab2, s0);
2019 acc = vis_faligndata(t7, acc);
2020 acc = vis_faligndata(t6, acc);
2021 acc = vis_faligndata(t5, acc);
2022 acc = vis_faligndata(t4, acc);
2023 *(mlib_f32*)dl = vis_read_hi(acc);
2024 dl += 4;
2025 s0 = s1;
2026 }
2027
2028 dl[0] = tab2[s0];
2029 dl[1] = tab3[s0];
2030}
2031
2032/***************************************************************/
2033void mlib_v_ImageLookUpSI_U8_U8_4_DstOff3_D1_SMALL(const mlib_u8 *src,
2034 mlib_u8 *dst,
2035 mlib_s32 xsize,
2036 const mlib_u8 **table)
2037{
2038 mlib_u8 *sp; /* pointer to source data */
2039 mlib_u32 s0, s1, s2; /* source data */
2040 mlib_u8 *dl; /* pointer to start of destination */
2041 mlib_d64 *dp; /* aligned pointer to destination */
2042 mlib_d64 t0, t1, t2; /* destination data */
2043 mlib_d64 t3, t4, t5; /* destination data */
2044 mlib_d64 t6, t7, acc; /* destination data */
2045 mlib_s32 i; /* loop variable */
2046 const mlib_u8 *tab0 = table[0];
2047 const mlib_u8 *tab1 = table[1];
2048 const mlib_u8 *tab2 = table[2];
2049 const mlib_u8 *tab3 = table[3];
2050
2051 sp = (void *)src;
2052 dl = dst;
2053 dp = (mlib_d64 *) dl;
2054
2055 vis_alignaddr((void *) 0, 7);
2056
2057 s0 = *sp++;
2058
2059 if (xsize >= 2) {
2060
2061 s1 = sp[0];
2062 s2 = sp[1];
2063 sp += 2;
2064
2065#pragma pipeloop(0)
2066 for(i = 0; i <= xsize - 4; i+=2, sp+=2) {
2067 t7 = VIS_LD_U8_I(tab2, s2);
2068 t6 = VIS_LD_U8_I(tab1, s2);
2069 t5 = VIS_LD_U8_I(tab0, s2);
2070 t4 = VIS_LD_U8_I(tab3, s1);
2071 t3 = VIS_LD_U8_I(tab2, s1);
2072 t2 = VIS_LD_U8_I(tab1, s1);
2073 t1 = VIS_LD_U8_I(tab0, s1);
2074 t0 = VIS_LD_U8_I(tab3, s0);
2075 acc = vis_faligndata(t7, acc);
2076 acc = vis_faligndata(t6, acc);
2077 acc = vis_faligndata(t5, acc);
2078 acc = vis_faligndata(t4, acc);
2079 acc = vis_faligndata(t3, acc);
2080 acc = vis_faligndata(t2, acc);
2081 acc = vis_faligndata(t1, acc);
2082 acc = vis_faligndata(t0, acc);
2083 s0 = s2;
2084 s1 = sp[0];
2085 s2 = sp[1];
2086 *dp++ = acc;
2087 }
2088
2089 t7 = VIS_LD_U8_I(tab2, s2);
2090 t6 = VIS_LD_U8_I(tab1, s2);
2091 t5 = VIS_LD_U8_I(tab0, s2);
2092 t4 = VIS_LD_U8_I(tab3, s1);
2093 t3 = VIS_LD_U8_I(tab2, s1);
2094 t2 = VIS_LD_U8_I(tab1, s1);
2095 t1 = VIS_LD_U8_I(tab0, s1);
2096 t0 = VIS_LD_U8_I(tab3, s0);
2097 acc = vis_faligndata(t7, acc);
2098 acc = vis_faligndata(t6, acc);
2099 acc = vis_faligndata(t5, acc);
2100 acc = vis_faligndata(t4, acc);
2101 acc = vis_faligndata(t3, acc);
2102 acc = vis_faligndata(t2, acc);
2103 acc = vis_faligndata(t1, acc);
2104 acc = vis_faligndata(t0, acc);
2105 s0 = s2;
2106 *dp++ = acc;
2107 }
2108
2109 dl = (mlib_u8*)dp;
2110
2111 if ((xsize & 1) != 0) {
2112 s1 = sp[0];
2113 t7 = VIS_LD_U8_I(tab2, s1);
2114 t6 = VIS_LD_U8_I(tab1, s1);
2115 t5 = VIS_LD_U8_I(tab0, s1);
2116 t4 = VIS_LD_U8_I(tab3, s0);
2117 acc = vis_faligndata(t7, acc);
2118 acc = vis_faligndata(t6, acc);
2119 acc = vis_faligndata(t5, acc);
2120 acc = vis_faligndata(t4, acc);
2121 *(mlib_f32*)dl = vis_read_hi(acc);
2122 dl += 4;
2123 s0 = s1;
2124 }
2125
2126 dl[0] = tab3[s0];
2127}
2128
2129/***************************************************************/
2130void mlib_v_ImageLookUpSI_U8_U8_4(const mlib_u8 *src,
2131 mlib_s32 slb,
2132 mlib_u8 *dst,
2133 mlib_s32 dlb,
2134 mlib_s32 xsize,
2135 mlib_s32 ysize,
2136 const mlib_u8 **table)
2137{
2138 if ((xsize * ysize) < 500) {
2139 mlib_u8 *sl;
2140 mlib_u8 *dl;
2141 mlib_s32 j;
2142 const mlib_u8 *tab0 = table[0];
2143 const mlib_u8 *tab1 = table[1];
2144 const mlib_u8 *tab2 = table[2];
2145 const mlib_u8 *tab3 = table[3];
2146
2147 sl = (void *)src;
2148 dl = dst;
2149
2150 /* row loop */
2151 for (j = 0; j < ysize; j ++) {
2152 mlib_u8 *sp = sl;
2153 mlib_u8 *dp = dl;
2154 mlib_s32 off, s0, size = xsize;
2155
2156 off = (8 - ((mlib_addr)dp & 7)) & 7;
2157
2158 if ((off >= 4) && (size > 0)) {
2159 s0 = *sp++;
2160 *dp++ = tab0[s0];
2161 *dp++ = tab1[s0];
2162 *dp++ = tab2[s0];
2163 *dp++ = tab3[s0];
2164 size--;
2165 }
2166
2167 if (size > 0) {
2168 off = (4 - ((mlib_addr)dp & 3)) & 3;
2169
2170 if (off == 0) {
2171 mlib_v_ImageLookUpSI_U8_U8_4_DstOff0_D1_SMALL(sp, dp, size, table);
2172 } else if (off == 1) {
2173 s0 = *sp;
2174 *dp++ = tab0[s0];
2175 size--;
2176 mlib_v_ImageLookUpSI_U8_U8_4_DstOff1_D1_SMALL(sp, dp, size, table);
2177 } else if (off == 2) {
2178 s0 = *sp;
2179 *dp++ = tab0[s0];
2180 *dp++ = tab1[s0];
2181 size--;
2182 mlib_v_ImageLookUpSI_U8_U8_4_DstOff2_D1_SMALL(sp, dp, size, table);
2183 } else if (off == 3) {
2184 s0 = *sp;
2185 *dp++ = tab0[s0];
2186 *dp++ = tab1[s0];
2187 *dp++ = tab2[s0];
2188 size--;
2189 mlib_v_ImageLookUpSI_U8_U8_4_DstOff3_D1_SMALL(sp, dp, size, table);
2190 }
2191 }
2192
2193 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
2194 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
2195 }
2196
2197 } else {
2198 mlib_u8 *sl;
2199 mlib_u8 *dl;
2200 mlib_u32 tab[256];
2201 const mlib_u8 *tab0 = table[0];
2202 const mlib_u8 *tab1 = table[1];
2203 const mlib_u8 *tab2 = table[2];
2204 const mlib_u8 *tab3 = table[3];
2205 mlib_s32 i, j;
2206 mlib_u32 s0, s1, s2, s3, s4;
2207
2208 s0 = tab0[0];
2209 s1 = tab1[0];
2210 s2 = tab2[0];
2211 s3 = tab3[0];
2212 for (i = 1; i < 256; i++) {
2213 s4 = (s0 << 24) + (s1 << 16) + (s2 << 8) + s3;
2214 s0 = tab0[i];
2215 s1 = tab1[i];
2216 s2 = tab2[i];
2217 s3 = tab3[i];
2218 tab[i-1] = s4;
2219 }
2220
2221 s4 = (s0 << 24) + (s1 << 16) + (s2 << 8) + s3;
2222 tab[255] = s4;
2223
2224 sl = (void *)src;
2225 dl = dst;
2226
2227 /* row loop */
2228 for (j = 0; j < ysize; j ++) {
2229 mlib_u8 *sp = sl;
2230 mlib_u8 *dp = dl;
2231 mlib_s32 off, size = xsize;
2232
2233 if (((mlib_addr)dp & 3) == 0) {
2234 off = (4 - (mlib_addr)sp & 3) & 3;
2235
2236 off = (off < size) ? off : size;
2237
2238#pragma pipeloop(0)
2239 for (i = 0; i < off; i++) {
2240 *(mlib_u32*)dp = tab[(*sp)];
2241 dp += 4; sp++;
2242 }
2243
2244 size -= off;
2245
2246 if (size > 0) {
2247 mlib_v_ImageLookUpSI_U8_U8_4_SrcOff0_D1(sp, dp, size, (mlib_f32*)tab);
2248 }
2249
2250 } else {
2251
2252 off = ((4 - ((mlib_addr)sp & 3)) & 3);
2253 off = (off < size) ? off : size;
2254
2255 for (i = 0; i < off; i++) {
2256 s0 = tab[(*sp)];
2257 *dp++ = (s0 >> 24);
2258 *dp++ = (s0 >> 16);
2259 *dp++ = (s0 >> 8);
2260 *dp++ = s0;
2261 size--; sp++;
2262 }
2263
2264 if (size > 0) {
2265 mlib_v_ImageLookUpSI_U8_U8_4_DstNonAl_D1(sp, dp, size, (mlib_f32*)tab);
2266 }
2267 }
2268
2269 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
2270 dl = (mlib_u8 *) ((mlib_u8 *) dl + dlb);
2271 }
2272 }
2273}
2274
2275/***************************************************************/