blob: 3c719cd6c178a75f05176028fa3e6f40475ba0a5 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26
27
28#include "vis_proto.h"
29#include "mlib_image.h"
30#include "mlib_v_ImageLookUpFunc.h"
31
32/***************************************************************/
33static void mlib_v_ImageLookUp_U8_U16_124_SrcOff0_D1(const mlib_u8 *src,
34 mlib_u16 *dst,
35 mlib_s32 xsize,
36 const mlib_u16 *table0,
37 const mlib_u16 *table1,
38 const mlib_u16 *table2,
39 const mlib_u16 *table3);
40
41static void mlib_v_ImageLookUp_U8_U16_124_SrcOff1_D1(const mlib_u8 *src,
42 mlib_u16 *dst,
43 mlib_s32 xsize,
44 const mlib_u16 *table0,
45 const mlib_u16 *table1,
46 const mlib_u16 *table2,
47 const mlib_u16 *table3);
48
49static void mlib_v_ImageLookUp_U8_U16_124_SrcOff2_D1(const mlib_u8 *src,
50 mlib_u16 *dst,
51 mlib_s32 xsize,
52 const mlib_u16 *table0,
53 const mlib_u16 *table1,
54 const mlib_u16 *table2,
55 const mlib_u16 *table3);
56
57static void mlib_v_ImageLookUp_U8_U16_124_SrcOff3_D1(const mlib_u8 *src,
58 mlib_u16 *dst,
59 mlib_s32 xsize,
60 const mlib_u16 *table0,
61 const mlib_u16 *table1,
62 const mlib_u16 *table2,
63 const mlib_u16 *table3);
64
65static void mlib_v_ImageLookUp_U8_U16_3_SrcOff0_D1(const mlib_u8 *src,
66 mlib_u16 *dst,
67 mlib_s32 xsize,
68 const mlib_u16 *table0,
69 const mlib_u16 *table1,
70 const mlib_u16 *table2);
71
72static void mlib_v_ImageLookUp_U8_U16_3_SrcOff1_D1(const mlib_u8 *src,
73 mlib_u16 *dst,
74 mlib_s32 xsize,
75 const mlib_u16 *table0,
76 const mlib_u16 *table1,
77 const mlib_u16 *table2);
78
79static void mlib_v_ImageLookUp_U8_U16_3_SrcOff2_D1(const mlib_u8 *src,
80 mlib_u16 *dst,
81 mlib_s32 xsize,
82 const mlib_u16 *table0,
83 const mlib_u16 *table1,
84 const mlib_u16 *table2);
85
86static void mlib_v_ImageLookUp_U8_U16_3_SrcOff3_D1(const mlib_u8 *src,
87 mlib_u16 *dst,
88 mlib_s32 xsize,
89 const mlib_u16 *table0,
90 const mlib_u16 *table1,
91 const mlib_u16 *table2);
92
93/***************************************************************/
94#define VIS_LD_U16_I(X, Y) vis_ld_u16_i((void *)(X), (Y))
95
96/***************************************************************/
97void mlib_v_ImageLookUp_U8_U16_124_SrcOff0_D1(const mlib_u8 *src,
98 mlib_u16 *dst,
99 mlib_s32 xsize,
100 const mlib_u16 *table0,
101 const mlib_u16 *table1,
102 const mlib_u16 *table2,
103 const mlib_u16 *table3)
104{
105 mlib_u32 *sa; /* aligned pointer to source data */
106 mlib_u8 *sp; /* pointer to source data */
107 mlib_u32 s0; /* source data */
108 mlib_u16 *dl; /* pointer to start of destination */
109 mlib_u16 *dend; /* pointer to end of destination */
110 mlib_d64 *dp; /* aligned pointer to destination */
111 mlib_d64 t0, t1, t2; /* destination data */
112 mlib_d64 t3, acc0; /* destination data */
113 mlib_s32 emask; /* edge mask */
114 mlib_s32 i, num; /* loop variable */
115
116 sa = (mlib_u32*)src;
117 dl = dst;
118 dp = (mlib_d64 *) dl;
119 dend = dl + xsize - 1;
120
121 vis_alignaddr((void *) 0, 6);
122
123 i = 0;
124
125 if (xsize >= 4) {
126
127 s0 = *sa++;
128
129#pragma pipeloop(0)
130 for(i = 0; i <= xsize - 8; i+=4) {
131 t3 = VIS_LD_U16_I(table3, (s0 << 1) & 0x1FE);
132 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
133 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
134 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
135 acc0 = vis_faligndata(t3, acc0);
136 acc0 = vis_faligndata(t2, acc0);
137 acc0 = vis_faligndata(t1, acc0);
138 acc0 = vis_faligndata(t0, acc0);
139 s0 = *sa++;
140 *dp++ = acc0;
141 }
142
143 t3 = VIS_LD_U16_I(table3, (s0 << 1) & 0x1FE);
144 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
145 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
146 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
147 acc0 = vis_faligndata(t3, acc0);
148 acc0 = vis_faligndata(t2, acc0);
149 acc0 = vis_faligndata(t1, acc0);
150 acc0 = vis_faligndata(t0, acc0);
151 *dp++ = acc0;
152 }
153
154 sp = (mlib_u8*)sa;
155
156 if ((mlib_addr) dp <= (mlib_addr) dend) {
157
158 num = (mlib_u16*) dend - (mlib_u16*) dp;
159 sp += num;
160 num ++;
161
162 if (num == 1) {
163 s0 = (mlib_s32) *sp;
164 sp --;
165
166 t0 = VIS_LD_U16_I(table0, 2*s0);
167 acc0 = vis_faligndata(t0, acc0);
168 } else if (num == 2) {
169 s0 = (mlib_s32) *sp;
170 sp --;
171
172 t0 = VIS_LD_U16_I(table1, 2*s0);
173 acc0 = vis_faligndata(t0, acc0);
174
175 s0 = (mlib_s32) *sp;
176 sp --;
177
178 t0 = VIS_LD_U16_I(table0, 2*s0);
179 acc0 = vis_faligndata(t0, acc0);
180 } else if (num == 3) {
181 s0 = (mlib_s32) *sp;
182 sp --;
183
184 t0 = VIS_LD_U16_I(table2, 2*s0);
185 acc0 = vis_faligndata(t0, acc0);
186
187 s0 = (mlib_s32) *sp;
188 sp --;
189
190 t0 = VIS_LD_U16_I(table1, 2*s0);
191 acc0 = vis_faligndata(t0, acc0);
192
193 s0 = (mlib_s32) *sp;
194 sp --;
195
196 t0 = VIS_LD_U16_I(table0, 2*s0);
197 acc0 = vis_faligndata(t0, acc0);
198 }
199
200 emask = vis_edge16(dp, dend);
201 vis_pst_16(acc0, dp, emask);
202 }
203}
204
205/***************************************************************/
206void mlib_v_ImageLookUp_U8_U16_124_SrcOff1_D1(const mlib_u8 *src,
207 mlib_u16 *dst,
208 mlib_s32 xsize,
209 const mlib_u16 *table0,
210 const mlib_u16 *table1,
211 const mlib_u16 *table2,
212 const mlib_u16 *table3)
213{
214 mlib_u32 *sa; /* aligned pointer to source data */
215 mlib_u8 *sp; /* pointer to source data */
216 mlib_u32 s0, s1; /* source data */
217 mlib_u16 *dl; /* pointer to start of destination */
218 mlib_u16 *dend; /* pointer to end of destination */
219 mlib_d64 *dp; /* aligned pointer to destination */
220 mlib_d64 t0, t1, t2; /* destination data */
221 mlib_d64 t3, acc0; /* destination data */
222 mlib_s32 emask; /* edge mask */
223 mlib_s32 i, num; /* loop variable */
224
225 sa = (mlib_u32*)(src - 1);
226 dl = dst;
227 dp = (mlib_d64 *) dl;
228 dend = dl + xsize - 1;
229
230 vis_alignaddr((void *) 0, 6);
231
232 s0 = *sa++;
233
234 if (xsize >= 4) {
235
236 s1 = *sa++;
237
238#pragma pipeloop(0)
239 for(i = 0; i <= xsize - 8; i+=4) {
240 t3 = VIS_LD_U16_I(table3, (s1 >> 23) & 0x1FE);
241 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
242 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
243 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
244 acc0 = vis_faligndata(t3, acc0);
245 acc0 = vis_faligndata(t2, acc0);
246 acc0 = vis_faligndata(t1, acc0);
247 acc0 = vis_faligndata(t0, acc0);
248 s0 = s1;
249 s1 = *sa++;
250 *dp++ = acc0;
251 }
252
253 t3 = VIS_LD_U16_I(table3, (s1 >> 23) & 0x1FE);
254 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
255 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
256 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
257 acc0 = vis_faligndata(t3, acc0);
258 acc0 = vis_faligndata(t2, acc0);
259 acc0 = vis_faligndata(t1, acc0);
260 acc0 = vis_faligndata(t0, acc0);
261 s0 = s1;
262 *dp++ = acc0;
263 }
264
265 sp = (mlib_u8*)sa;
266 sp -= 3;
267
268 if ((mlib_addr) dp <= (mlib_addr) dend) {
269
270 num = (mlib_u16*) dend - (mlib_u16*) dp;
271 sp += num;
272 num ++;
273
274 if (num == 1) {
275 s0 = (mlib_s32) *sp;
276 sp --;
277
278 t0 = VIS_LD_U16_I(table0, 2*s0);
279 acc0 = vis_faligndata(t0, acc0);
280 } else if (num == 2) {
281 s0 = (mlib_s32) *sp;
282 sp --;
283
284 t0 = VIS_LD_U16_I(table1, 2*s0);
285 acc0 = vis_faligndata(t0, acc0);
286
287 s0 = (mlib_s32) *sp;
288 sp --;
289
290 t0 = VIS_LD_U16_I(table0, 2*s0);
291 acc0 = vis_faligndata(t0, acc0);
292 } else if (num == 3) {
293 s0 = (mlib_s32) *sp;
294 sp --;
295
296 t0 = VIS_LD_U16_I(table2, 2*s0);
297 acc0 = vis_faligndata(t0, acc0);
298
299 s0 = (mlib_s32) *sp;
300 sp --;
301
302 t0 = VIS_LD_U16_I(table1, 2*s0);
303 acc0 = vis_faligndata(t0, acc0);
304
305 s0 = (mlib_s32) *sp;
306 sp --;
307
308 t0 = VIS_LD_U16_I(table0, 2*s0);
309 acc0 = vis_faligndata(t0, acc0);
310 }
311
312 emask = vis_edge16(dp, dend);
313 vis_pst_16(acc0, dp, emask);
314 }
315}
316
317/***************************************************************/
318void mlib_v_ImageLookUp_U8_U16_124_SrcOff2_D1(const mlib_u8 *src,
319 mlib_u16 *dst,
320 mlib_s32 xsize,
321 const mlib_u16 *table0,
322 const mlib_u16 *table1,
323 const mlib_u16 *table2,
324 const mlib_u16 *table3)
325{
326 mlib_u32 *sa; /* aligned pointer to source data */
327 mlib_u8 *sp; /* pointer to source data */
328 mlib_u32 s0, s1; /* source data */
329 mlib_u16 *dl; /* pointer to start of destination */
330 mlib_u16 *dend; /* pointer to end of destination */
331 mlib_d64 *dp; /* aligned pointer to destination */
332 mlib_d64 t0, t1, t2; /* destination data */
333 mlib_d64 t3, acc0; /* destination data */
334 mlib_s32 emask; /* edge mask */
335 mlib_s32 i, num; /* loop variable */
336
337 sa = (mlib_u32*)(src - 2);
338 dl = dst;
339 dp = (mlib_d64 *) dl;
340 dend = dl + xsize - 1;
341
342 vis_alignaddr((void *) 0, 6);
343
344 s0 = *sa++;
345
346 if (xsize >= 4) {
347
348 s1 = *sa++;
349
350#pragma pipeloop(0)
351 for(i = 0; i <= xsize - 8; i+=4) {
352 t3 = VIS_LD_U16_I(table3, (s1 >> 15) & 0x1FE);
353 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
354 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
355 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
356 acc0 = vis_faligndata(t3, acc0);
357 acc0 = vis_faligndata(t2, acc0);
358 acc0 = vis_faligndata(t1, acc0);
359 acc0 = vis_faligndata(t0, acc0);
360 s0 = s1;
361 s1 = *sa++;
362 *dp++ = acc0;
363 }
364
365 t3 = VIS_LD_U16_I(table3, (s1 >> 15) & 0x1FE);
366 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
367 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
368 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
369 acc0 = vis_faligndata(t3, acc0);
370 acc0 = vis_faligndata(t2, acc0);
371 acc0 = vis_faligndata(t1, acc0);
372 acc0 = vis_faligndata(t0, acc0);
373 s0 = s1;
374 *dp++ = acc0;
375 }
376
377 sp = (mlib_u8*)sa;
378 sp -= 2;
379
380 if ((mlib_addr) dp <= (mlib_addr) dend) {
381
382 num = (mlib_u16*) dend - (mlib_u16*) dp;
383 sp += num;
384 num ++;
385
386 if (num == 1) {
387 s0 = (mlib_s32) *sp;
388 sp --;
389
390 t0 = VIS_LD_U16_I(table0, 2*s0);
391 acc0 = vis_faligndata(t0, acc0);
392 } else if (num == 2) {
393 s0 = (mlib_s32) *sp;
394 sp --;
395
396 t0 = VIS_LD_U16_I(table1, 2*s0);
397 acc0 = vis_faligndata(t0, acc0);
398
399 s0 = (mlib_s32) *sp;
400 sp --;
401
402 t0 = VIS_LD_U16_I(table0, 2*s0);
403 acc0 = vis_faligndata(t0, acc0);
404 } else if (num == 3) {
405 s0 = (mlib_s32) *sp;
406 sp --;
407
408 t0 = VIS_LD_U16_I(table2, 2*s0);
409 acc0 = vis_faligndata(t0, acc0);
410
411 s0 = (mlib_s32) *sp;
412 sp --;
413
414 t0 = VIS_LD_U16_I(table1, 2*s0);
415 acc0 = vis_faligndata(t0, acc0);
416
417 s0 = (mlib_s32) *sp;
418 sp --;
419
420 t0 = VIS_LD_U16_I(table0, 2*s0);
421 acc0 = vis_faligndata(t0, acc0);
422 }
423
424 emask = vis_edge16(dp, dend);
425 vis_pst_16(acc0, dp, emask);
426 }
427}
428
429/***************************************************************/
430void mlib_v_ImageLookUp_U8_U16_124_SrcOff3_D1(const mlib_u8 *src,
431 mlib_u16 *dst,
432 mlib_s32 xsize,
433 const mlib_u16 *table0,
434 const mlib_u16 *table1,
435 const mlib_u16 *table2,
436 const mlib_u16 *table3)
437{
438 mlib_u32 *sa; /* aligned pointer to source data */
439 mlib_u8 *sp; /* pointer to source data */
440 mlib_u32 s0, s1; /* source data */
441 mlib_u16 *dl; /* pointer to start of destination */
442 mlib_u16 *dend; /* pointer to end of destination */
443 mlib_d64 *dp; /* aligned pointer to destination */
444 mlib_d64 t0, t1, t2; /* destination data */
445 mlib_d64 t3, acc0; /* destination data */
446 mlib_s32 emask; /* edge mask */
447 mlib_s32 i, num; /* loop variable */
448
449 sa = (mlib_u32*)(src - 3);
450 dl = dst;
451 dp = (mlib_d64 *) dl;
452 dend = dl + xsize - 1;
453
454 vis_alignaddr((void *) 0, 6);
455
456 s0 = *sa++;
457
458 if (xsize >= 4) {
459
460 s1 = *sa++;
461
462#pragma pipeloop(0)
463 for(i = 0; i <= xsize - 8; i+=4) {
464 t3 = VIS_LD_U16_I(table3, (s1 >> 7) & 0x1FE);
465 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
466 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
467 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
468 acc0 = vis_faligndata(t3, acc0);
469 acc0 = vis_faligndata(t2, acc0);
470 acc0 = vis_faligndata(t1, acc0);
471 acc0 = vis_faligndata(t0, acc0);
472 s0 = s1;
473 s1 = *sa++;
474 *dp++ = acc0;
475 }
476
477 t3 = VIS_LD_U16_I(table3, (s1 >> 7) & 0x1FE);
478 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
479 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
480 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
481 acc0 = vis_faligndata(t3, acc0);
482 acc0 = vis_faligndata(t2, acc0);
483 acc0 = vis_faligndata(t1, acc0);
484 acc0 = vis_faligndata(t0, acc0);
485 s0 = s1;
486 *dp++ = acc0;
487 }
488
489 sp = (mlib_u8*)sa;
490 sp -= 1;
491
492 if ((mlib_addr) dp <= (mlib_addr) dend) {
493
494 num = (mlib_u16*) dend - (mlib_u16*) dp;
495 sp += num;
496 num ++;
497
498 if (num == 1) {
499 s0 = (mlib_s32) *sp;
500 sp --;
501
502 t0 = VIS_LD_U16_I(table0, 2*s0);
503 acc0 = vis_faligndata(t0, acc0);
504 } else if (num == 2) {
505 s0 = (mlib_s32) *sp;
506 sp --;
507
508 t0 = VIS_LD_U16_I(table1, 2*s0);
509 acc0 = vis_faligndata(t0, acc0);
510
511 s0 = (mlib_s32) *sp;
512 sp --;
513
514 t0 = VIS_LD_U16_I(table0, 2*s0);
515 acc0 = vis_faligndata(t0, acc0);
516 } else if (num == 3) {
517 s0 = (mlib_s32) *sp;
518 sp --;
519
520 t0 = VIS_LD_U16_I(table2, 2*s0);
521 acc0 = vis_faligndata(t0, acc0);
522
523 s0 = (mlib_s32) *sp;
524 sp --;
525
526 t0 = VIS_LD_U16_I(table1, 2*s0);
527 acc0 = vis_faligndata(t0, acc0);
528
529 s0 = (mlib_s32) *sp;
530 sp --;
531
532 t0 = VIS_LD_U16_I(table0, 2*s0);
533 acc0 = vis_faligndata(t0, acc0);
534 }
535
536 emask = vis_edge16(dp, dend);
537 vis_pst_16(acc0, dp, emask);
538 }
539}
540
541/***************************************************************/
542void mlib_v_ImageLookUp_U8_U16_1(const mlib_u8 *src,
543 mlib_s32 slb,
544 mlib_u16 *dst,
545 mlib_s32 dlb,
546 mlib_s32 xsize,
547 mlib_s32 ysize,
548 const mlib_u16 **table)
549{
550 mlib_u8 *sl;
551 mlib_u16 *dl;
552 const mlib_u16 *tab = table[0];
553 mlib_s32 j, i;
554
555 sl = (void *)src;
556 dl = dst;
557
558 /* row loop */
559 for (j = 0; j < ysize; j ++) {
560 mlib_u8 *sp = sl;
561 mlib_u16 *dp = dl;
562 mlib_s32 off, size = xsize;
563
564 off = ((8 - ((mlib_addr)dp & 7)) & 7) >> 1;
565
566 off = (off < size) ? off : size;
567
568 for (i = 0; i < off; i++) {
569 *dp++ = tab[(*sp++)];
570 size--;
571 }
572
573 if (size > 0) {
574
575 off = (mlib_addr)sp & 3;
576
577 if (off == 0) {
578 mlib_v_ImageLookUp_U8_U16_124_SrcOff0_D1(sp, dp, size, tab, tab, tab, tab);
579 } else if (off == 1) {
580 mlib_v_ImageLookUp_U8_U16_124_SrcOff1_D1(sp, dp, size, tab, tab, tab, tab);
581 } else if (off == 2) {
582 mlib_v_ImageLookUp_U8_U16_124_SrcOff2_D1(sp, dp, size, tab, tab, tab, tab);
583 } else {
584 mlib_v_ImageLookUp_U8_U16_124_SrcOff3_D1(sp, dp, size, tab, tab, tab, tab);
585 }
586 }
587
588 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
589 dl = (mlib_u16 *) ((mlib_u8 *) dl + dlb);
590 }
591}
592
593/***************************************************************/
594void mlib_v_ImageLookUp_U8_U16_2(const mlib_u8 *src,
595 mlib_s32 slb,
596 mlib_u16 *dst,
597 mlib_s32 dlb,
598 mlib_s32 xsize,
599 mlib_s32 ysize,
600 const mlib_u16 **table)
601{
602 mlib_u8 *sl;
603 mlib_u16 *dl;
604 const mlib_u16 *tab;
605 mlib_s32 j, i;
606
607 sl = (void *)src;
608 dl = dst;
609
610 /* row loop */
611 for (j = 0; j < ysize; j ++) {
612 mlib_u8 *sp = sl;
613 mlib_u16 *dp = dl;
614 mlib_s32 off, size = xsize * 2;
615 const mlib_u16 *tab0 = table[0];
616 const mlib_u16 *tab1 = table[1];
617
618 off = ((8 - ((mlib_addr)dp & 7)) & 7) >> 1;
619
620 off = (off < size) ? off : size;
621
622 for (i = 0; i < off - 1; i+=2) {
623 *dp++ = tab0[(*sp++)];
624 *dp++ = tab1[(*sp++)];
625 size-=2;
626 }
627
628 if ((off & 1) != 0) {
629 *dp++ = tab0[(*sp++)];
630 size--;
631 tab = tab0; tab0 = tab1; tab1 = tab;
632 }
633
634 if (size > 0) {
635
636 off = (mlib_addr)sp & 3;
637
638 if (off == 0) {
639 mlib_v_ImageLookUp_U8_U16_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab0, tab1);
640 } else if (off == 1) {
641 mlib_v_ImageLookUp_U8_U16_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab0, tab1);
642 } else if (off == 2) {
643 mlib_v_ImageLookUp_U8_U16_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab0, tab1);
644 } else {
645 mlib_v_ImageLookUp_U8_U16_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab0, tab1);
646 }
647 }
648
649 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
650 dl = (mlib_u16 *) ((mlib_u8 *) dl + dlb);
651 }
652}
653
654/***************************************************************/
655void mlib_v_ImageLookUp_U8_U16_4(const mlib_u8 *src,
656 mlib_s32 slb,
657 mlib_u16 *dst,
658 mlib_s32 dlb,
659 mlib_s32 xsize,
660 mlib_s32 ysize,
661 const mlib_u16 **table)
662{
663 mlib_u8 *sl;
664 mlib_u16 *dl;
665 const mlib_u16 *tab;
666 mlib_s32 j;
667
668 sl = (void *)src;
669 dl = dst;
670
671 /* row loop */
672 for (j = 0; j < ysize; j ++) {
673 mlib_u8 *sp = sl;
674 mlib_u16 *dp = dl;
675 const mlib_u16 *tab0 = table[0];
676 const mlib_u16 *tab1 = table[1];
677 const mlib_u16 *tab2 = table[2];
678 const mlib_u16 *tab3 = table[3];
679 mlib_s32 off, size = xsize * 4;
680
681 off = ((8 - ((mlib_addr)dp & 7)) & 7) >> 1;
682
683 off = (off < size) ? off : size;
684
685 if (off == 1) {
686 *dp++ = tab0[(*sp++)];
687 tab = tab0; tab0 = tab1;
688 tab1 = tab2; tab2 = tab3; tab3 = tab;
689 size--;
690 } else if (off == 2) {
691 *dp++ = tab0[(*sp++)];
692 *dp++ = tab1[(*sp++)];
693 tab = tab0; tab0 = tab2; tab2 = tab;
694 tab = tab1; tab1 = tab3; tab3 = tab;
695 size-=2;
696 } else if (off == 3) {
697 *dp++ = tab0[(*sp++)];
698 *dp++ = tab1[(*sp++)];
699 *dp++ = tab2[(*sp++)];
700 tab = tab3; tab3 = tab2;
701 tab2 = tab1; tab1 = tab0; tab0 = tab;
702 size-=3;
703 }
704
705 if (size > 0) {
706
707 off = (mlib_addr)sp & 3;
708
709 if (off == 0) {
710 mlib_v_ImageLookUp_U8_U16_124_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2, tab3);
711 } else if (off == 1) {
712 mlib_v_ImageLookUp_U8_U16_124_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2, tab3);
713 } else if (off == 2) {
714 mlib_v_ImageLookUp_U8_U16_124_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2, tab3);
715 } else {
716 mlib_v_ImageLookUp_U8_U16_124_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2, tab3);
717 }
718 }
719
720 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
721 dl = (mlib_u16 *) ((mlib_u8 *) dl + dlb);
722 }
723}
724
725/***************************************************************/
726void mlib_v_ImageLookUp_U8_U16_3_SrcOff0_D1(const mlib_u8 *src,
727 mlib_u16 *dst,
728 mlib_s32 xsize,
729 const mlib_u16 *table0,
730 const mlib_u16 *table1,
731 const mlib_u16 *table2)
732{
733 mlib_u32 *sa; /* aligned pointer to source data */
734 mlib_u8 *sp; /* pointer to source data */
735 mlib_u32 s0, s1, s2; /* source data */
736 mlib_u16 *dl; /* pointer to start of destination */
737 mlib_u16 *dend; /* pointer to end of destination */
738 mlib_d64 *dp; /* aligned pointer to destination */
739 mlib_d64 t0, t1, t2; /* destination data */
740 mlib_d64 t3, t4, t5; /* destination data */
741 mlib_d64 t6, t7, t8; /* destination data */
742 mlib_d64 t9, t10, t11; /* destination data */
743 mlib_d64 acc0, acc1, acc2; /* destination data */
744 mlib_s32 emask; /* edge mask */
745 mlib_s32 i, num; /* loop variable */
746 const mlib_u16 *table;
747
748 sa = (mlib_u32*)src;
749 dl = dst;
750 dp = (mlib_d64 *) dl;
751 dend = dl + xsize - 1;
752
753 vis_alignaddr((void *) 0, 6);
754
755 i = 0;
756
757 if (xsize >= 12) {
758
759 s0 = sa[0];
760 s1 = sa[1];
761 s2 = sa[2];
762 sa += 3;
763
764#pragma pipeloop(0)
765 for(i = 0; i <= xsize - 24; i+=12, sa += 3, dp += 3) {
766 t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
767 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
768 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
769 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
770 t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
771 t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
772 t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
773 t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
774 t11 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
775 t10 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
776 t9 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
777 t8 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
778 acc0 = vis_faligndata(t3, acc0);
779 acc0 = vis_faligndata(t2, acc0);
780 acc0 = vis_faligndata(t1, acc0);
781 acc0 = vis_faligndata(t0, acc0);
782 acc1 = vis_faligndata(t7, acc1);
783 acc1 = vis_faligndata(t6, acc1);
784 acc1 = vis_faligndata(t5, acc1);
785 acc1 = vis_faligndata(t4, acc1);
786 acc2 = vis_faligndata(t11, acc2);
787 acc2 = vis_faligndata(t10, acc2);
788 acc2 = vis_faligndata(t9, acc2);
789 acc2 = vis_faligndata(t8, acc2);
790 s0 = sa[0];
791 s1 = sa[1];
792 s2 = sa[2];
793 dp[0] = acc0;
794 dp[1] = acc1;
795 dp[2] = acc2;
796 }
797
798 t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
799 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
800 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
801 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
802 t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
803 t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
804 t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
805 t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
806 t11 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
807 t10 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
808 t9 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
809 t8 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
810 acc0 = vis_faligndata(t3, acc0);
811 acc0 = vis_faligndata(t2, acc0);
812 acc0 = vis_faligndata(t1, acc0);
813 acc0 = vis_faligndata(t0, acc0);
814 acc1 = vis_faligndata(t7, acc1);
815 acc1 = vis_faligndata(t6, acc1);
816 acc1 = vis_faligndata(t5, acc1);
817 acc1 = vis_faligndata(t4, acc1);
818 acc2 = vis_faligndata(t11, acc2);
819 acc2 = vis_faligndata(t10, acc2);
820 acc2 = vis_faligndata(t9, acc2);
821 acc2 = vis_faligndata(t8, acc2);
822 dp[0] = acc0;
823 dp[1] = acc1;
824 dp[2] = acc2;
825 dp += 3; i += 12;
826 }
827
828 if (i <= xsize - 8) {
829 s0 = sa[0];
830 s1 = sa[1];
831 t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
832 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
833 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
834 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
835 t7 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
836 t6 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
837 t5 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
838 t4 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
839 acc0 = vis_faligndata(t3, acc0);
840 acc0 = vis_faligndata(t2, acc0);
841 acc0 = vis_faligndata(t1, acc0);
842 acc0 = vis_faligndata(t0, acc0);
843 acc1 = vis_faligndata(t7, acc1);
844 acc1 = vis_faligndata(t6, acc1);
845 acc1 = vis_faligndata(t5, acc1);
846 acc1 = vis_faligndata(t4, acc1);
847 dp[0] = acc0;
848 dp[1] = acc1;
849 table = table0; table0 = table2;
850 table2 = table1; table1 = table;
851 sa += 2; i += 8; dp += 2;
852 }
853
854 if (i <= xsize - 4) {
855 s0 = sa[0];
856 t3 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
857 t2 = VIS_LD_U16_I(table2, (s0 >> 7) & 0x1FE);
858 t1 = VIS_LD_U16_I(table1, (s0 >> 15) & 0x1FE);
859 t0 = VIS_LD_U16_I(table0, (s0 >> 23) & 0x1FE);
860 acc0 = vis_faligndata(t3, acc0);
861 acc0 = vis_faligndata(t2, acc0);
862 acc0 = vis_faligndata(t1, acc0);
863 acc0 = vis_faligndata(t0, acc0);
864 dp[0] = acc0;
865 table = table0; table0 = table1;
866 table1 = table2; table2 = table;
867 sa++; i += 4; dp++;
868 }
869
870 sp = (mlib_u8*)sa;
871
872 if ((mlib_addr) dp <= (mlib_addr) dend) {
873
874 num = (mlib_u16*) dend - (mlib_u16*) dp;
875 sp += num;
876 num ++;
877
878 if (num == 1) {
879 s0 = (mlib_s32) *sp;
880 sp --;
881
882 t0 = VIS_LD_U16_I(table0, 2*s0);
883 acc0 = vis_faligndata(t0, acc0);
884 } else if (num == 2) {
885 s0 = (mlib_s32) *sp;
886 sp --;
887
888 t0 = VIS_LD_U16_I(table1, 2*s0);
889 acc0 = vis_faligndata(t0, acc0);
890
891 s0 = (mlib_s32) *sp;
892 sp --;
893
894 t0 = VIS_LD_U16_I(table0, 2*s0);
895 acc0 = vis_faligndata(t0, acc0);
896 } else if (num == 3) {
897 s0 = (mlib_s32) *sp;
898 sp --;
899
900 t0 = VIS_LD_U16_I(table2, 2*s0);
901 acc0 = vis_faligndata(t0, acc0);
902
903 s0 = (mlib_s32) *sp;
904 sp --;
905
906 t0 = VIS_LD_U16_I(table1, 2*s0);
907 acc0 = vis_faligndata(t0, acc0);
908
909 s0 = (mlib_s32) *sp;
910 sp --;
911
912 t0 = VIS_LD_U16_I(table0, 2*s0);
913 acc0 = vis_faligndata(t0, acc0);
914 }
915
916 emask = vis_edge16(dp, dend);
917 vis_pst_16(acc0, dp, emask);
918 }
919}
920
921/***************************************************************/
922void mlib_v_ImageLookUp_U8_U16_3_SrcOff1_D1(const mlib_u8 *src,
923 mlib_u16 *dst,
924 mlib_s32 xsize,
925 const mlib_u16 *table0,
926 const mlib_u16 *table1,
927 const mlib_u16 *table2)
928{
929 mlib_u32 *sa; /* aligned pointer to source data */
930 mlib_u8 *sp; /* pointer to source data */
931 mlib_u32 s0, s1, s2, s3; /* source data */
932 mlib_u16 *dl; /* pointer to start of destination */
933 mlib_u16 *dend; /* pointer to end of destination */
934 mlib_d64 *dp; /* aligned pointer to destination */
935 mlib_d64 t0, t1, t2; /* destination data */
936 mlib_d64 t3, t4, t5; /* destination data */
937 mlib_d64 t6, t7, t8; /* destination data */
938 mlib_d64 t9, t10, t11; /* destination data */
939 mlib_d64 acc0, acc1, acc2; /* destination data */
940 mlib_s32 emask; /* edge mask */
941 mlib_s32 i, num; /* loop variable */
942 const mlib_u16 *table;
943
944 sa = (mlib_u32*)(src - 1);
945 dl = dst;
946 dp = (mlib_d64 *) dl;
947 dend = dl + xsize - 1;
948
949 vis_alignaddr((void *) 0, 6);
950
951 i = 0;
952
953 s0 = *sa++;
954
955 if (xsize >= 12) {
956
957 s1 = sa[0];
958 s2 = sa[1];
959 s3 = sa[2];
960 sa += 3;
961
962#pragma pipeloop(0)
963 for(i = 0; i <= xsize - 24; i+=12, sa += 3, dp += 3) {
964 t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
965 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
966 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
967 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
968 t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE);
969 t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE);
970 t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE);
971 t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE);
972 t11 = VIS_LD_U16_I(table2, (s3 >> 23) & 0x1FE);
973 t10 = VIS_LD_U16_I(table1, (s2 << 1) & 0x1FE);
974 t9 = VIS_LD_U16_I(table0, (s2 >> 7) & 0x1FE);
975 t8 = VIS_LD_U16_I(table2, (s2 >> 15) & 0x1FE);
976 acc0 = vis_faligndata(t3, acc0);
977 acc0 = vis_faligndata(t2, acc0);
978 acc0 = vis_faligndata(t1, acc0);
979 acc0 = vis_faligndata(t0, acc0);
980 acc1 = vis_faligndata(t7, acc1);
981 acc1 = vis_faligndata(t6, acc1);
982 acc1 = vis_faligndata(t5, acc1);
983 acc1 = vis_faligndata(t4, acc1);
984 acc2 = vis_faligndata(t11, acc2);
985 acc2 = vis_faligndata(t10, acc2);
986 acc2 = vis_faligndata(t9, acc2);
987 acc2 = vis_faligndata(t8, acc2);
988 s0 = s3;
989 s1 = sa[0];
990 s2 = sa[1];
991 s3 = sa[2];
992 dp[0] = acc0;
993 dp[1] = acc1;
994 dp[2] = acc2;
995 }
996
997 t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
998 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
999 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
1000 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
1001 t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE);
1002 t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE);
1003 t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE);
1004 t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE);
1005 t11 = VIS_LD_U16_I(table2, (s3 >> 23) & 0x1FE);
1006 t10 = VIS_LD_U16_I(table1, (s2 << 1) & 0x1FE);
1007 t9 = VIS_LD_U16_I(table0, (s2 >> 7) & 0x1FE);
1008 t8 = VIS_LD_U16_I(table2, (s2 >> 15) & 0x1FE);
1009 acc0 = vis_faligndata(t3, acc0);
1010 acc0 = vis_faligndata(t2, acc0);
1011 acc0 = vis_faligndata(t1, acc0);
1012 acc0 = vis_faligndata(t0, acc0);
1013 acc1 = vis_faligndata(t7, acc1);
1014 acc1 = vis_faligndata(t6, acc1);
1015 acc1 = vis_faligndata(t5, acc1);
1016 acc1 = vis_faligndata(t4, acc1);
1017 acc2 = vis_faligndata(t11, acc2);
1018 acc2 = vis_faligndata(t10, acc2);
1019 acc2 = vis_faligndata(t9, acc2);
1020 acc2 = vis_faligndata(t8, acc2);
1021 dp[0] = acc0;
1022 dp[1] = acc1;
1023 dp[2] = acc2;
1024 s0 = s3;
1025 dp += 3; i += 12;
1026 }
1027
1028 if (i <= xsize - 8) {
1029 s1 = sa[0];
1030 s2 = sa[1];
1031 t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
1032 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
1033 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
1034 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
1035 t7 = VIS_LD_U16_I(table1, (s2 >> 23) & 0x1FE);
1036 t6 = VIS_LD_U16_I(table0, (s1 << 1) & 0x1FE);
1037 t5 = VIS_LD_U16_I(table2, (s1 >> 7) & 0x1FE);
1038 t4 = VIS_LD_U16_I(table1, (s1 >> 15) & 0x1FE);
1039 acc0 = vis_faligndata(t3, acc0);
1040 acc0 = vis_faligndata(t2, acc0);
1041 acc0 = vis_faligndata(t1, acc0);
1042 acc0 = vis_faligndata(t0, acc0);
1043 acc1 = vis_faligndata(t7, acc1);
1044 acc1 = vis_faligndata(t6, acc1);
1045 acc1 = vis_faligndata(t5, acc1);
1046 acc1 = vis_faligndata(t4, acc1);
1047 dp[0] = acc0;
1048 dp[1] = acc1;
1049 table = table0; table0 = table2;
1050 table2 = table1; table1 = table;
1051 sa += 2; i += 8; dp += 2;
1052 s0 = s2;
1053 }
1054
1055 if (i <= xsize - 4) {
1056 s1 = sa[0];
1057 t3 = VIS_LD_U16_I(table0, (s1 >> 23) & 0x1FE);
1058 t2 = VIS_LD_U16_I(table2, (s0 << 1) & 0x1FE);
1059 t1 = VIS_LD_U16_I(table1, (s0 >> 7) & 0x1FE);
1060 t0 = VIS_LD_U16_I(table0, (s0 >> 15) & 0x1FE);
1061 acc0 = vis_faligndata(t3, acc0);
1062 acc0 = vis_faligndata(t2, acc0);
1063 acc0 = vis_faligndata(t1, acc0);
1064 acc0 = vis_faligndata(t0, acc0);
1065 dp[0] = acc0;
1066 table = table0; table0 = table1;
1067 table1 = table2; table2 = table;
1068 sa++; i += 4; dp++;
1069 s0 = s1;
1070 }
1071
1072 sp = (mlib_u8*)sa;
1073 sp -= 3;
1074
1075 if ((mlib_addr) dp <= (mlib_addr) dend) {
1076
1077 num = (mlib_u16*) dend - (mlib_u16*) dp;
1078 sp += num;
1079 num ++;
1080
1081 if (num == 1) {
1082 s0 = (mlib_s32) *sp;
1083 sp --;
1084
1085 t0 = VIS_LD_U16_I(table0, 2*s0);
1086 acc0 = vis_faligndata(t0, acc0);
1087 } else if (num == 2) {
1088 s0 = (mlib_s32) *sp;
1089 sp --;
1090
1091 t0 = VIS_LD_U16_I(table1, 2*s0);
1092 acc0 = vis_faligndata(t0, acc0);
1093
1094 s0 = (mlib_s32) *sp;
1095 sp --;
1096
1097 t0 = VIS_LD_U16_I(table0, 2*s0);
1098 acc0 = vis_faligndata(t0, acc0);
1099 } else if (num == 3) {
1100 s0 = (mlib_s32) *sp;
1101 sp --;
1102
1103 t0 = VIS_LD_U16_I(table2, 2*s0);
1104 acc0 = vis_faligndata(t0, acc0);
1105
1106 s0 = (mlib_s32) *sp;
1107 sp --;
1108
1109 t0 = VIS_LD_U16_I(table1, 2*s0);
1110 acc0 = vis_faligndata(t0, acc0);
1111
1112 s0 = (mlib_s32) *sp;
1113 sp --;
1114
1115 t0 = VIS_LD_U16_I(table0, 2*s0);
1116 acc0 = vis_faligndata(t0, acc0);
1117 }
1118
1119 emask = vis_edge16(dp, dend);
1120 vis_pst_16(acc0, dp, emask);
1121 }
1122}
1123
1124/***************************************************************/
1125void mlib_v_ImageLookUp_U8_U16_3_SrcOff2_D1(const mlib_u8 *src,
1126 mlib_u16 *dst,
1127 mlib_s32 xsize,
1128 const mlib_u16 *table0,
1129 const mlib_u16 *table1,
1130 const mlib_u16 *table2)
1131{
1132 mlib_u32 *sa; /* aligned pointer to source data */
1133 mlib_u8 *sp; /* pointer to source data */
1134 mlib_u32 s0, s1, s2, s3; /* source data */
1135 mlib_u16 *dl; /* pointer to start of destination */
1136 mlib_u16 *dend; /* pointer to end of destination */
1137 mlib_d64 *dp; /* aligned pointer to destination */
1138 mlib_d64 t0, t1, t2; /* destination data */
1139 mlib_d64 t3, t4, t5; /* destination data */
1140 mlib_d64 t6, t7, t8; /* destination data */
1141 mlib_d64 t9, t10, t11; /* destination data */
1142 mlib_d64 acc0, acc1, acc2; /* destination data */
1143 mlib_s32 emask; /* edge mask */
1144 mlib_s32 i, num; /* loop variable */
1145 const mlib_u16 *table;
1146
1147 sa = (mlib_u32*)(src - 2);
1148 dl = dst;
1149 dp = (mlib_d64 *) dl;
1150 dend = dl + xsize - 1;
1151
1152 vis_alignaddr((void *) 0, 6);
1153
1154 i = 0;
1155
1156 s0 = *sa++;
1157
1158 if (xsize >= 12) {
1159
1160 s1 = sa[0];
1161 s2 = sa[1];
1162 s3 = sa[2];
1163 sa += 3;
1164
1165#pragma pipeloop(0)
1166 for(i = 0; i <= xsize - 24; i+=12, sa += 3, dp += 3) {
1167 t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
1168 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
1169 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
1170 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
1171 t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE);
1172 t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE);
1173 t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE);
1174 t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE);
1175 t11 = VIS_LD_U16_I(table2, (s3 >> 15) & 0x1FE);
1176 t10 = VIS_LD_U16_I(table1, (s3 >> 23) & 0x1FE);
1177 t9 = VIS_LD_U16_I(table0, (s2 << 1) & 0x1FE);
1178 t8 = VIS_LD_U16_I(table2, (s2 >> 7) & 0x1FE);
1179 acc0 = vis_faligndata(t3, acc0);
1180 acc0 = vis_faligndata(t2, acc0);
1181 acc0 = vis_faligndata(t1, acc0);
1182 acc0 = vis_faligndata(t0, acc0);
1183 acc1 = vis_faligndata(t7, acc1);
1184 acc1 = vis_faligndata(t6, acc1);
1185 acc1 = vis_faligndata(t5, acc1);
1186 acc1 = vis_faligndata(t4, acc1);
1187 acc2 = vis_faligndata(t11, acc2);
1188 acc2 = vis_faligndata(t10, acc2);
1189 acc2 = vis_faligndata(t9, acc2);
1190 acc2 = vis_faligndata(t8, acc2);
1191 s0 = s3;
1192 s1 = sa[0];
1193 s2 = sa[1];
1194 s3 = sa[2];
1195 dp[0] = acc0;
1196 dp[1] = acc1;
1197 dp[2] = acc2;
1198 }
1199
1200 t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
1201 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
1202 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
1203 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
1204 t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE);
1205 t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE);
1206 t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE);
1207 t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE);
1208 t11 = VIS_LD_U16_I(table2, (s3 >> 15) & 0x1FE);
1209 t10 = VIS_LD_U16_I(table1, (s3 >> 23) & 0x1FE);
1210 t9 = VIS_LD_U16_I(table0, (s2 << 1) & 0x1FE);
1211 t8 = VIS_LD_U16_I(table2, (s2 >> 7) & 0x1FE);
1212 acc0 = vis_faligndata(t3, acc0);
1213 acc0 = vis_faligndata(t2, acc0);
1214 acc0 = vis_faligndata(t1, acc0);
1215 acc0 = vis_faligndata(t0, acc0);
1216 acc1 = vis_faligndata(t7, acc1);
1217 acc1 = vis_faligndata(t6, acc1);
1218 acc1 = vis_faligndata(t5, acc1);
1219 acc1 = vis_faligndata(t4, acc1);
1220 acc2 = vis_faligndata(t11, acc2);
1221 acc2 = vis_faligndata(t10, acc2);
1222 acc2 = vis_faligndata(t9, acc2);
1223 acc2 = vis_faligndata(t8, acc2);
1224 dp[0] = acc0;
1225 dp[1] = acc1;
1226 dp[2] = acc2;
1227 s0 = s3;
1228 dp += 3; i += 12;
1229 }
1230
1231 if (i <= xsize - 8) {
1232 s1 = sa[0];
1233 s2 = sa[1];
1234 t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
1235 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
1236 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
1237 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
1238 t7 = VIS_LD_U16_I(table1, (s2 >> 15) & 0x1FE);
1239 t6 = VIS_LD_U16_I(table0, (s2 >> 23) & 0x1FE);
1240 t5 = VIS_LD_U16_I(table2, (s1 << 1) & 0x1FE);
1241 t4 = VIS_LD_U16_I(table1, (s1 >> 7) & 0x1FE);
1242 acc0 = vis_faligndata(t3, acc0);
1243 acc0 = vis_faligndata(t2, acc0);
1244 acc0 = vis_faligndata(t1, acc0);
1245 acc0 = vis_faligndata(t0, acc0);
1246 acc1 = vis_faligndata(t7, acc1);
1247 acc1 = vis_faligndata(t6, acc1);
1248 acc1 = vis_faligndata(t5, acc1);
1249 acc1 = vis_faligndata(t4, acc1);
1250 dp[0] = acc0;
1251 dp[1] = acc1;
1252 table = table0; table0 = table2;
1253 table2 = table1; table1 = table;
1254 sa += 2; i += 8; dp += 2;
1255 s0 = s2;
1256 }
1257
1258 if (i <= xsize - 4) {
1259 s1 = sa[0];
1260 t3 = VIS_LD_U16_I(table0, (s1 >> 15) & 0x1FE);
1261 t2 = VIS_LD_U16_I(table2, (s1 >> 23) & 0x1FE);
1262 t1 = VIS_LD_U16_I(table1, (s0 << 1) & 0x1FE);
1263 t0 = VIS_LD_U16_I(table0, (s0 >> 7) & 0x1FE);
1264 acc0 = vis_faligndata(t3, acc0);
1265 acc0 = vis_faligndata(t2, acc0);
1266 acc0 = vis_faligndata(t1, acc0);
1267 acc0 = vis_faligndata(t0, acc0);
1268 dp[0] = acc0;
1269 table = table0; table0 = table1;
1270 table1 = table2; table2 = table;
1271 sa++; i += 4; dp++;
1272 s0 = s1;
1273 }
1274
1275 sp = (mlib_u8*)sa;
1276 sp -= 2;
1277
1278 if ((mlib_addr) dp <= (mlib_addr) dend) {
1279
1280 num = (mlib_u16*) dend - (mlib_u16*) dp;
1281 sp += num;
1282 num ++;
1283
1284 if (num == 1) {
1285 s0 = (mlib_s32) *sp;
1286 sp --;
1287
1288 t0 = VIS_LD_U16_I(table0, 2*s0);
1289 acc0 = vis_faligndata(t0, acc0);
1290 } else if (num == 2) {
1291 s0 = (mlib_s32) *sp;
1292 sp --;
1293
1294 t0 = VIS_LD_U16_I(table1, 2*s0);
1295 acc0 = vis_faligndata(t0, acc0);
1296
1297 s0 = (mlib_s32) *sp;
1298 sp --;
1299
1300 t0 = VIS_LD_U16_I(table0, 2*s0);
1301 acc0 = vis_faligndata(t0, acc0);
1302 } else if (num == 3) {
1303 s0 = (mlib_s32) *sp;
1304 sp --;
1305
1306 t0 = VIS_LD_U16_I(table2, 2*s0);
1307 acc0 = vis_faligndata(t0, acc0);
1308
1309 s0 = (mlib_s32) *sp;
1310 sp --;
1311
1312 t0 = VIS_LD_U16_I(table1, 2*s0);
1313 acc0 = vis_faligndata(t0, acc0);
1314
1315 s0 = (mlib_s32) *sp;
1316 sp --;
1317
1318 t0 = VIS_LD_U16_I(table0, 2*s0);
1319 acc0 = vis_faligndata(t0, acc0);
1320 }
1321
1322 emask = vis_edge16(dp, dend);
1323 vis_pst_16(acc0, dp, emask);
1324 }
1325}
1326
1327/***************************************************************/
1328void mlib_v_ImageLookUp_U8_U16_3_SrcOff3_D1(const mlib_u8 *src,
1329 mlib_u16 *dst,
1330 mlib_s32 xsize,
1331 const mlib_u16 *table0,
1332 const mlib_u16 *table1,
1333 const mlib_u16 *table2)
1334{
1335 mlib_u32 *sa; /* aligned pointer to source data */
1336 mlib_u8 *sp; /* pointer to source data */
1337 mlib_u32 s0, s1, s2, s3; /* source data */
1338 mlib_u16 *dl; /* pointer to start of destination */
1339 mlib_u16 *dend; /* pointer to end of destination */
1340 mlib_d64 *dp; /* aligned pointer to destination */
1341 mlib_d64 t0, t1, t2; /* destination data */
1342 mlib_d64 t3, t4, t5; /* destination data */
1343 mlib_d64 t6, t7, t8; /* destination data */
1344 mlib_d64 t9, t10, t11; /* destination data */
1345 mlib_d64 acc0, acc1, acc2; /* destination data */
1346 mlib_s32 emask; /* edge mask */
1347 mlib_s32 i, num; /* loop variable */
1348 const mlib_u16 *table;
1349
1350 sa = (mlib_u32*)(src - 3);
1351 dl = dst;
1352 dp = (mlib_d64 *) dl;
1353 dend = dl + xsize - 1;
1354
1355 vis_alignaddr((void *) 0, 6);
1356
1357 i = 0;
1358
1359 s0 = *sa++;
1360
1361 if (xsize >= 12) {
1362
1363 s1 = sa[0];
1364 s2 = sa[1];
1365 s3 = sa[2];
1366 sa += 3;
1367
1368#pragma pipeloop(0)
1369 for(i = 0; i <= xsize - 24; i+=12, sa += 3, dp += 3) {
1370 t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
1371 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
1372 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
1373 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
1374 t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
1375 t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
1376 t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
1377 t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
1378 t11 = VIS_LD_U16_I(table2, (s3 >> 7) & 0x1FE);
1379 t10 = VIS_LD_U16_I(table1, (s3 >> 15) & 0x1FE);
1380 t9 = VIS_LD_U16_I(table0, (s3 >> 23) & 0x1FE);
1381 t8 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
1382 acc0 = vis_faligndata(t3, acc0);
1383 acc0 = vis_faligndata(t2, acc0);
1384 acc0 = vis_faligndata(t1, acc0);
1385 acc0 = vis_faligndata(t0, acc0);
1386 acc1 = vis_faligndata(t7, acc1);
1387 acc1 = vis_faligndata(t6, acc1);
1388 acc1 = vis_faligndata(t5, acc1);
1389 acc1 = vis_faligndata(t4, acc1);
1390 acc2 = vis_faligndata(t11, acc2);
1391 acc2 = vis_faligndata(t10, acc2);
1392 acc2 = vis_faligndata(t9, acc2);
1393 acc2 = vis_faligndata(t8, acc2);
1394 s0 = s3;
1395 s1 = sa[0];
1396 s2 = sa[1];
1397 s3 = sa[2];
1398 dp[0] = acc0;
1399 dp[1] = acc1;
1400 dp[2] = acc2;
1401 }
1402
1403 t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
1404 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
1405 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
1406 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
1407 t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
1408 t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
1409 t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
1410 t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
1411 t11 = VIS_LD_U16_I(table2, (s3 >> 7) & 0x1FE);
1412 t10 = VIS_LD_U16_I(table1, (s3 >> 15) & 0x1FE);
1413 t9 = VIS_LD_U16_I(table0, (s3 >> 23) & 0x1FE);
1414 t8 = VIS_LD_U16_I(table2, (s2 << 1) & 0x1FE);
1415 acc0 = vis_faligndata(t3, acc0);
1416 acc0 = vis_faligndata(t2, acc0);
1417 acc0 = vis_faligndata(t1, acc0);
1418 acc0 = vis_faligndata(t0, acc0);
1419 acc1 = vis_faligndata(t7, acc1);
1420 acc1 = vis_faligndata(t6, acc1);
1421 acc1 = vis_faligndata(t5, acc1);
1422 acc1 = vis_faligndata(t4, acc1);
1423 acc2 = vis_faligndata(t11, acc2);
1424 acc2 = vis_faligndata(t10, acc2);
1425 acc2 = vis_faligndata(t9, acc2);
1426 acc2 = vis_faligndata(t8, acc2);
1427 dp[0] = acc0;
1428 dp[1] = acc1;
1429 dp[2] = acc2;
1430 s0 = s3;
1431 dp += 3; i += 12;
1432 }
1433
1434 if (i <= xsize - 8) {
1435 s1 = sa[0];
1436 s2 = sa[1];
1437 t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
1438 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
1439 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
1440 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
1441 t7 = VIS_LD_U16_I(table1, (s2 >> 7) & 0x1FE);
1442 t6 = VIS_LD_U16_I(table0, (s2 >> 15) & 0x1FE);
1443 t5 = VIS_LD_U16_I(table2, (s2 >> 23) & 0x1FE);
1444 t4 = VIS_LD_U16_I(table1, (s1 << 1) & 0x1FE);
1445 acc0 = vis_faligndata(t3, acc0);
1446 acc0 = vis_faligndata(t2, acc0);
1447 acc0 = vis_faligndata(t1, acc0);
1448 acc0 = vis_faligndata(t0, acc0);
1449 acc1 = vis_faligndata(t7, acc1);
1450 acc1 = vis_faligndata(t6, acc1);
1451 acc1 = vis_faligndata(t5, acc1);
1452 acc1 = vis_faligndata(t4, acc1);
1453 dp[0] = acc0;
1454 dp[1] = acc1;
1455 table = table0; table0 = table2;
1456 table2 = table1; table1 = table;
1457 sa += 2; i += 8; dp += 2;
1458 s0 = s2;
1459 }
1460
1461 if (i <= xsize - 4) {
1462 s1 = sa[0];
1463 t3 = VIS_LD_U16_I(table0, (s1 >> 7) & 0x1FE);
1464 t2 = VIS_LD_U16_I(table2, (s1 >> 15) & 0x1FE);
1465 t1 = VIS_LD_U16_I(table1, (s1 >> 23) & 0x1FE);
1466 t0 = VIS_LD_U16_I(table0, (s0 << 1) & 0x1FE);
1467 acc0 = vis_faligndata(t3, acc0);
1468 acc0 = vis_faligndata(t2, acc0);
1469 acc0 = vis_faligndata(t1, acc0);
1470 acc0 = vis_faligndata(t0, acc0);
1471 dp[0] = acc0;
1472 table = table0; table0 = table1;
1473 table1 = table2; table2 = table;
1474 sa++; i += 4; dp++;
1475 s0 = s1;
1476 }
1477
1478 sp = (mlib_u8*)sa;
1479 sp -= 1;
1480
1481 if ((mlib_addr) dp <= (mlib_addr) dend) {
1482
1483 num = (mlib_u16*) dend - (mlib_u16*) dp;
1484 sp += num;
1485 num ++;
1486
1487 if (num == 1) {
1488 s0 = (mlib_s32) *sp;
1489 sp --;
1490
1491 t0 = VIS_LD_U16_I(table0, 2*s0);
1492 acc0 = vis_faligndata(t0, acc0);
1493 } else if (num == 2) {
1494 s0 = (mlib_s32) *sp;
1495 sp --;
1496
1497 t0 = VIS_LD_U16_I(table1, 2*s0);
1498 acc0 = vis_faligndata(t0, acc0);
1499
1500 s0 = (mlib_s32) *sp;
1501 sp --;
1502
1503 t0 = VIS_LD_U16_I(table0, 2*s0);
1504 acc0 = vis_faligndata(t0, acc0);
1505 } else if (num == 3) {
1506 s0 = (mlib_s32) *sp;
1507 sp --;
1508
1509 t0 = VIS_LD_U16_I(table2, 2*s0);
1510 acc0 = vis_faligndata(t0, acc0);
1511
1512 s0 = (mlib_s32) *sp;
1513 sp --;
1514
1515 t0 = VIS_LD_U16_I(table1, 2*s0);
1516 acc0 = vis_faligndata(t0, acc0);
1517
1518 s0 = (mlib_s32) *sp;
1519 sp --;
1520
1521 t0 = VIS_LD_U16_I(table0, 2*s0);
1522 acc0 = vis_faligndata(t0, acc0);
1523 }
1524
1525 emask = vis_edge16(dp, dend);
1526 vis_pst_16(acc0, dp, emask);
1527 }
1528}
1529
1530/***************************************************************/
1531void mlib_v_ImageLookUp_U8_U16_3(const mlib_u8 *src,
1532 mlib_s32 slb,
1533 mlib_u16 *dst,
1534 mlib_s32 dlb,
1535 mlib_s32 xsize,
1536 mlib_s32 ysize,
1537 const mlib_u16 **table)
1538{
1539 mlib_u8 *sl;
1540 mlib_u16 *dl;
1541 const mlib_u16 *tab;
1542 mlib_s32 j, i;
1543
1544 sl = (void *)src;
1545 dl = dst;
1546
1547 /* row loop */
1548 for (j = 0; j < ysize; j ++) {
1549 mlib_u8 *sp = sl;
1550 mlib_u16 *dp = dl;
1551 const mlib_u16 *tab0 = table[0];
1552 const mlib_u16 *tab1 = table[1];
1553 const mlib_u16 *tab2 = table[2];
1554 mlib_s32 off, size = xsize * 3;
1555
1556 off = ((8 - ((mlib_addr)dp & 7)) & 7) >> 1;
1557
1558 off = (off < size) ? off : size;
1559
1560 for (i = 0; i < off - 2; i += 3) {
1561 *dp++ = tab0[(*sp++)];
1562 *dp++ = tab1[(*sp++)];
1563 *dp++ = tab2[(*sp++)];
1564 size-=3;
1565 }
1566
1567 off -= i;
1568
1569 if (off == 1) {
1570 *dp++ = tab0[(*sp++)];
1571 tab = tab0; tab0 = tab1;
1572 tab1 = tab2; tab2 = tab;
1573 size--;
1574 } else if (off == 2) {
1575 *dp++ = tab0[(*sp++)];
1576 *dp++ = tab1[(*sp++)];
1577 tab = tab2; tab2 = tab1;
1578 tab1 = tab0; tab0 = tab;
1579 size-=2;
1580 }
1581
1582 if (size > 0) {
1583
1584 off = (mlib_addr)sp & 3;
1585
1586 if (off == 0) {
1587 mlib_v_ImageLookUp_U8_U16_3_SrcOff0_D1(sp, dp, size, tab0, tab1, tab2);
1588 } else if (off == 1) {
1589 mlib_v_ImageLookUp_U8_U16_3_SrcOff1_D1(sp, dp, size, tab0, tab1, tab2);
1590 } else if (off == 2) {
1591 mlib_v_ImageLookUp_U8_U16_3_SrcOff2_D1(sp, dp, size, tab0, tab1, tab2);
1592 } else {
1593 mlib_v_ImageLookUp_U8_U16_3_SrcOff3_D1(sp, dp, size, tab0, tab1, tab2);
1594 }
1595 }
1596
1597 sl = (mlib_u8 *) ((mlib_u8 *) sl + slb);
1598 dl = (mlib_u16 *) ((mlib_u8 *) dl + dlb);
1599 }
1600}
1601
1602/***************************************************************/