blob: 90189cde00c78b1fb875c2490acc3c61a65efc5c [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1998-2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26
27
28/*
29 * FILENAME: mlib_v_ImageChannelExtract_43.c
30 *
31 * FUNCTIONS
32 * mlib_v_ImageChannelExtract_U8_43R_A8D1X8
33 * mlib_v_ImageChannelExtract_U8_43R_A8D2X8
34 * mlib_v_ImageChannelExtract_U8_43R_D1
35 * mlib_v_ImageChannelExtract_U8_43R
36 * mlib_v_ImageChannelExtract_S16_43R_A8D1X4
37 * mlib_v_ImageChannelExtract_S16_43R_A8D2X4
38 * mlib_v_ImageChannelExtract_S16_43R_D1
39 * mlib_v_ImageChannelExtract_S16_43R
40 * mlib_v_ImageChannelExtract_U8_43L_A8D1X8
41 * mlib_v_ImageChannelExtract_U8_43L_A8D2X8
42 * mlib_v_ImageChannelExtract_U8_43L_D1
43 * mlib_v_ImageChannelExtract_U8_43L
44 * mlib_v_ImageChannelExtract_S16_43L_A8D1X4
45 * mlib_v_ImageChannelExtract_S16_43L_A8D2X4
46 * mlib_v_ImageChannelExtract_S16_43L_D1
47 * mlib_v_ImageChannelExtract_S16_43L
48 *
49 * SYNOPSIS
50 *
51 * ARGUMENT
52 * src pointer to source image data
53 * dst pointer to destination image data
54 * slb source image line stride in bytes
55 * dlb destination image line stride in bytes
56 * dsize image data size in pixels
57 * xsize image width in pixels
58 * ysize image height in lines
59 * cmask channel mask
60 *
61 * DESCRIPTION
62 * extract the right or left 3 channels of a 4-channel image to
63 * a 3-channel image -- VIS version low level functions.
64 *
65 * ABGR => BGR (43R), or RGBA => RGB (43L)
66 *
67 * NOTE
68 * These functions are separated from mlib_v_ImageChannelExtract.c
69 * for loop unrolling and structure clarity.
70 */
71
72#include "vis_proto.h"
73#include "mlib_image.h"
74#include "mlib_v_ImageChannelExtract.h"
75
76/***************************************************************/
77#define EXTRACT_U8_43R_old /* shift right */ \
78 dd2 = vis_faligndata(sd3, dd2); /* r7-------------- */ \
79 sd3 = vis_faligndata(sd3, sd3); \
80 dd2 = vis_faligndata(sd3, dd2); /* g7r7------------ */ \
81 sd3 = vis_faligndata(sd3, sd3); \
82 dd2 = vis_faligndata(sd3, dd2); /* b7g7r7---------- */ \
83 sd3 = vis_faligndata(sd3, sd3); \
84 sd3 = vis_faligndata(sd3, sd3); \
85 dd2 = vis_faligndata(sd3, dd2); /* r6b7g7r7-------- */ \
86 sd3 = vis_faligndata(sd3, sd3); \
87 dd2 = vis_faligndata(sd3, dd2); /* g6r6b7g7r7------ */ \
88 sd3 = vis_faligndata(sd3, sd3); \
89 dd2 = vis_faligndata(sd3, dd2); /* b6g6r6b7g7r7---- */ \
90 \
91 dd2 = vis_faligndata(sd2, dd2); /* r5b6g6r6b7g7r7-- */ \
92 sd2 = vis_faligndata(sd2, sd2); \
93 dd2 = vis_faligndata(sd2, dd2); /* g5r5b6g6r6b7g7r7 */ \
94 \
95 sd2 = vis_faligndata(sd2, sd2); \
96 dd1 = vis_faligndata(sd2, dd1); /* b5-------------- */ \
97 sd2 = vis_faligndata(sd2, sd2); \
98 sd2 = vis_faligndata(sd2, sd2); \
99 dd1 = vis_faligndata(sd2, dd1); /* r4b5------------ */ \
100 sd2 = vis_faligndata(sd2, sd2); \
101 dd1 = vis_faligndata(sd2, dd1); /* g4r4b5---------- */ \
102 sd2 = vis_faligndata(sd2, sd2); \
103 dd1 = vis_faligndata(sd2, dd1); /* b4g4r4b5-------- */ \
104 \
105 dd1 = vis_faligndata(sd1, dd1); /* r3b4g4r4b5------ */ \
106 sd1 = vis_faligndata(sd1, sd1); \
107 dd1 = vis_faligndata(sd1, dd1); /* g3r3b4g4r4b5---- */ \
108 sd1 = vis_faligndata(sd1, sd1); \
109 dd1 = vis_faligndata(sd1, dd1); /* b3g3r3b4g4r4b5-- */ \
110 sd1 = vis_faligndata(sd1, sd1); \
111 sd1 = vis_faligndata(sd1, sd1); \
112 dd1 = vis_faligndata(sd1, dd1); /* r2b3g3r3b4g4r4b5 */ \
113 \
114 sd1 = vis_faligndata(sd1, sd1); \
115 dd0 = vis_faligndata(sd1, dd0); /* g2-------------- */ \
116 sd1 = vis_faligndata(sd1, sd1); \
117 dd0 = vis_faligndata(sd1, dd0); /* b2g2------------ */ \
118 \
119 dd0 = vis_faligndata(sd0, dd0); /* r1b2g2---------- */ \
120 sd0 = vis_faligndata(sd0, sd0); \
121 dd0 = vis_faligndata(sd0, dd0); /* g1r1b2g2-------- */ \
122 sd0 = vis_faligndata(sd0, sd0); \
123 dd0 = vis_faligndata(sd0, dd0); /* b1g1r1b2g2------ */ \
124 sd0 = vis_faligndata(sd0, sd0); \
125 sd0 = vis_faligndata(sd0, sd0); \
126 dd0 = vis_faligndata(sd0, dd0); /* r0b1g1r1b2g2---- */ \
127 sd0 = vis_faligndata(sd0, sd0); \
128 dd0 = vis_faligndata(sd0, dd0); /* g0r0b1g1r1b2g2-- */ \
129 sd0 = vis_faligndata(sd0, sd0); \
130 dd0 = vis_faligndata(sd0, dd0); /* b0g0r0b1g1r1b2g2 */
131
132/***************************************************************/
133#define EXTRACT_U8_43R /* shift right */ \
134 vis_alignaddr((void *)0, 5); \
135 dd2 = vis_faligndata(sd3, dd2); /* b7g7r7---------- */ \
136 sda = vis_freg_pair(vis_read_hi(sd3), vis_read_hi(sd3)); \
137 dd2 = vis_faligndata(sda, dd2); /* b6g6r6b7g7r7---- */ \
138 \
139 vis_alignaddr((void *)0, 6); \
140 dd2 = vis_faligndata(sd2, dd2); /* g5r5b6g6r6b7g7r7 */ \
141 \
142 vis_alignaddr((void *)0, 5); \
143 dd1 = vis_faligndata(sd2, dd1); /* b5g5r5---------- */ \
144 sda = vis_freg_pair(vis_read_hi(sd2), vis_read_hi(sd2)); \
145 dd1 = vis_faligndata(sda, dd1); /* b4g4r4b5g5r5---- */ \
146 dd1 = vis_faligndata(sd1, dd1); /* b3g3r3b4g4r4b5g5 */ \
147 sda = vis_freg_pair(vis_read_hi(sd1), vis_read_hi(sd1)); \
148 vis_alignaddr((void *)0, 7); \
149 dd1 = vis_faligndata(sda, dd1); /* r2b3g3r3b4g4r4b5 */ \
150 \
151 vis_alignaddr((void *)0, 5); \
152 dd0 = vis_faligndata(sda, dd0); /* b2g2r2---------- */ \
153 dd0 = vis_faligndata(sd0, dd0); /* b1g1r1b2g2r2---- */ \
154 sda = vis_freg_pair(vis_read_hi(sd0), vis_read_hi(sd0)); \
155 dd0 = vis_faligndata(sda, dd0); /* b0g0r0b1g1r1b2g2 */
156
157/***************************************************************/
158#define LOAD_EXTRACT_U8_43R_STORE \
159 sd0 = *sp++; /* --b0g0r0--b1g1r1 */ \
160 sd1 = *sp++; /* --b2g2r2--b3g3r3 */ \
161 sd2 = *sp++; /* --b4g4r4--b5g5r5 */ \
162 sd3 = *sp++; /* --b6g6r6--b7g7r7 */ \
163 EXTRACT_U8_43R; \
164 *dp++ = dd0; /* b0g0r0b1g1r1b2g2 */ \
165 *dp++ = dd1; /* r2b3g3r3b4g4r4b5 */ \
166 *dp++ = dd2; /* g5r5b6g6r6b7g7r7 */
167
168/***************************************************************/
169#define LOAD_EXTRACT_U8_43R \
170 vis_alignaddr((void *)soff, 0); \
171 s0 = s4; \
172 s1 = sp[1]; \
173 s2 = sp[2]; \
174 s3 = sp[3]; \
175 s4 = sp[4]; \
176 sd0 = vis_faligndata(s0, s1); \
177 sd1 = vis_faligndata(s1, s2); \
178 sd2 = vis_faligndata(s2, s3); \
179 sd3 = vis_faligndata(s3, s4); \
180 sp += 4; \
181 dd2old = dd2; \
182 EXTRACT_U8_43R
183
184/***************************************************************/
185/*
186 * Both source and destination image data are 1-d vectors and
187 * 8-byte aligned. And dsize is multiple of 8.
188 */
189
190void mlib_v_ImageChannelExtract_U8_43R_A8D1X8(const mlib_u8 *src,
191 mlib_u8 *dst,
192 mlib_s32 dsize)
193{
194 mlib_d64 *sp, *dp;
195 mlib_d64 sd0, sd1, sd2, sd3; /* source data */
196 mlib_d64 dd0, dd1, dd2; /* dst data */
197 mlib_d64 sda;
198 mlib_s32 i;
199
200 sp = (mlib_d64 *) src;
201 dp = (mlib_d64 *) dst;
202
203 /* set GSR.offset for vis_faligndata() */
204/* vis_alignaddr((void *)0, 7); *//* only for _old */
205
206#pragma pipeloop(0)
207 for (i = 0; i < dsize / 8; i++) {
208 LOAD_EXTRACT_U8_43R_STORE;
209 }
210}
211
212/***************************************************************/
213/*
214 * Either source or destination image data are not 1-d vectors, but
215 * they are 8-byte aligned. And slb and dlb are multiple of 8.
216 * The xsize is multiple of 8.
217 */
218
219void mlib_v_ImageChannelExtract_U8_43R_A8D2X8(const mlib_u8 *src,
220 mlib_s32 slb,
221 mlib_u8 *dst,
222 mlib_s32 dlb,
223 mlib_s32 xsize,
224 mlib_s32 ysize)
225{
226 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */
227 mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */
228 mlib_d64 sd0, sd1, sd2, sd3; /* source data */
229 mlib_d64 dd0, dd1, dd2; /* dst data */
230 mlib_d64 sda;
231 mlib_s32 i, j; /* indices for x, y */
232
233 /* set GSR.offset for vis_faligndata() */
234/* vis_alignaddr((void *)0, 7); *//* only for _old */
235
236 sp = sl = (mlib_d64 *) src;
237 dp = dl = (mlib_d64 *) dst;
238
239 /* row loop */
240 for (j = 0; j < ysize; j++) {
241 /* 8-byte column loop */
242#pragma pipeloop(0)
243 for (i = 0; i < xsize / 8; i++) {
244 LOAD_EXTRACT_U8_43R_STORE;
245 }
246
247 sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
248 dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
249 }
250}
251
252/***************************************************************/
253/*
254 * Either source or destination data are not 8-byte aligned.
255 * And dsize is is in pixels.
256 */
257
258void mlib_v_ImageChannelExtract_U8_43R_D1(const mlib_u8 *src,
259 mlib_u8 *dst,
260 mlib_s32 dsize)
261{
262 mlib_u8 *sa, *da;
263 mlib_u8 *dend, *dend2; /* end points in dst */
264 mlib_d64 *dp; /* 8-byte aligned start points in dst */
265 mlib_d64 *sp; /* 8-byte aligned start point in src */
266 mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */
267 mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */
268 mlib_d64 dd0, dd1, dd2; /* dst data */
269 mlib_d64 dd2old; /* the last datum of the last step */
270 mlib_d64 sda;
271 mlib_s32 soff; /* offset of address in src */
272 mlib_s32 doff; /* offset of address in dst */
273 mlib_s32 emask; /* edge mask */
274 mlib_s32 i, n;
275
276 sa = (void *)src;
277 da = dst;
278
279 /* prepare the source address */
280 sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
281 soff = ((mlib_addr) sa & 7);
282
283 /* prepare the destination addresses */
284 dp = (mlib_d64 *) ((mlib_addr) da & (~7));
285 dend = da + dsize * 3 - 1;
286 dend2 = dend - 23;
287 doff = 8 - ((mlib_addr) da & 7);
288
289 /* generate edge mask for the start point */
290 emask = vis_edge8(da, dend);
291
292 /* load 32 byte, convert, store 24 bytes */
293 s4 = sp[0]; /* initial value */
294 LOAD_EXTRACT_U8_43R;
295
296 if (dsize >= 8) {
297 if (doff == 8) {
298 vis_pst_8(dd0, dp++, emask);
299 *dp++ = dd1;
300 *dp++ = dd2;
301 }
302 else {
303 vis_alignaddr((void *)doff, 0);
304 vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask);
305 *dp++ = vis_faligndata(dd0, dd1);
306 *dp++ = vis_faligndata(dd1, dd2);
307 }
308 }
309 else { /* for very small size */
310 if (doff == 8) {
311 vis_pst_8(dd0, dp++, emask);
312 if ((mlib_addr) dp <= (mlib_addr) dend) {
313 emask = vis_edge8(dp, dend);
314 vis_pst_8(dd1, dp++, emask);
315 if ((mlib_addr) dp <= (mlib_addr) dend) {
316 emask = vis_edge8(dp, dend);
317 vis_pst_8(dd2, dp++, emask);
318 }
319 }
320 }
321 else {
322 vis_alignaddr((void *)doff, 0);
323 vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask);
324 if ((mlib_addr) dp <= (mlib_addr) dend) {
325 emask = vis_edge8(dp, dend);
326 vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask);
327 if ((mlib_addr) dp <= (mlib_addr) dend) {
328 emask = vis_edge8(dp, dend);
329 vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask);
330 if ((mlib_addr) dp <= (mlib_addr) dend) {
331 emask = vis_edge8(dp, dend);
332 vis_pst_8(vis_faligndata(dd2, dd2), dp++, emask);
333 }
334 }
335 }
336 }
337 }
338
339 /* no edge handling is needed in the loop */
340 if (doff == 8) {
341 if ((mlib_addr) dp <= (mlib_addr) dend2) {
342 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
343#pragma pipeloop(0)
344 for (i = 0; i < n; i++) {
345 LOAD_EXTRACT_U8_43R;
346 *dp++ = dd0;
347 *dp++ = dd1;
348 *dp++ = dd2;
349 }
350 }
351 }
352 else {
353 if ((mlib_addr) dp <= (mlib_addr) dend2) {
354 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
355#pragma pipeloop(0)
356 for (i = 0; i < n; i++) {
357 LOAD_EXTRACT_U8_43R;
358 vis_alignaddr((void *)doff, 0);
359 *dp++ = vis_faligndata(dd2old, dd0);
360 *dp++ = vis_faligndata(dd0, dd1);
361 *dp++ = vis_faligndata(dd1, dd2);
362 }
363 }
364 }
365
366 if ((mlib_addr) dp <= (mlib_addr) dend) {
367 LOAD_EXTRACT_U8_43R;
368 emask = vis_edge8(dp, dend);
369 if (doff == 8) {
370 vis_pst_8(dd0, dp++, emask);
371 if ((mlib_addr) dp <= (mlib_addr) dend) {
372 emask = vis_edge8(dp, dend);
373 vis_pst_8(dd1, dp++, emask);
374 if ((mlib_addr) dp <= (mlib_addr) dend) {
375 emask = vis_edge8(dp, dend);
376 vis_pst_8(dd2, dp++, emask);
377 }
378 }
379 }
380 else {
381 vis_alignaddr((void *)doff, 0);
382 vis_pst_8(vis_faligndata(dd2old, dd0), dp++, emask);
383 if ((mlib_addr) dp <= (mlib_addr) dend) {
384 emask = vis_edge8(dp, dend);
385 vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask);
386 if ((mlib_addr) dp <= (mlib_addr) dend) {
387 emask = vis_edge8(dp, dend);
388 vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask);
389 }
390 }
391 }
392 }
393}
394
395/***************************************************************/
396void mlib_v_ImageChannelExtract_U8_43R(const mlib_u8 *src,
397 mlib_s32 slb,
398 mlib_u8 *dst,
399 mlib_s32 dlb,
400 mlib_s32 xsize,
401 mlib_s32 ysize)
402{
403 mlib_u8 *sa, *da;
404 mlib_u8 *sl, *dl;
405 mlib_s32 j;
406
407 sa = sl = (void *)src;
408 da = dl = dst;
409
410 for (j = 0; j < ysize; j++) {
411 mlib_v_ImageChannelExtract_U8_43R_D1(sa, da, xsize);
412 sa = sl += slb;
413 da = dl += dlb;
414 }
415}
416
417/***************************************************************/
418#define EXTRACT_S16_43R_old /* shift right */ \
419 \
420 dd2 = vis_faligndata(sd3, dd2); /* r3------ */ \
421 sd3 = vis_faligndata(sd3, sd3); \
422 dd2 = vis_faligndata(sd3, dd2); /* g3r3---- */ \
423 sd3 = vis_faligndata(sd3, sd3); \
424 dd2 = vis_faligndata(sd3, dd2); /* b3g3r3-- */ \
425 \
426 dd2 = vis_faligndata(sd2, dd2); /* r2b3g3r3 */ \
427 sd2 = vis_faligndata(sd2, sd2); \
428 dd1 = vis_faligndata(sd2, dd1); /* g2------ */ \
429 sd2 = vis_faligndata(sd2, sd2); \
430 dd1 = vis_faligndata(sd2, dd1); /* b2g2---- */ \
431 \
432 dd1 = vis_faligndata(sd1, dd1); /* r1b2g2-- */ \
433 sd1 = vis_faligndata(sd1, sd1); \
434 dd1 = vis_faligndata(sd1, dd1); /* g1r1b2g2 */ \
435 sd1 = vis_faligndata(sd1, sd1); \
436 dd0 = vis_faligndata(sd1, dd0); /* b1------ */ \
437 \
438 dd0 = vis_faligndata(sd0, dd0); /* r0b1---- */ \
439 sd0 = vis_faligndata(sd0, sd0); \
440 dd0 = vis_faligndata(sd0, dd0); /* g0r0b1-- */ \
441 sd0 = vis_faligndata(sd0, sd0); \
442 dd0 = vis_faligndata(sd0, dd0); /* b0g0r0b1 */
443
444/***************************************************************/
445#define EXTRACT_S16_43R /* shift right */ \
446 \
447 vis_alignaddr((void *)0, 2); \
448 dd2 = vis_faligndata(sd3, dd2); /* b3g3r3-- */ \
449 \
450 vis_alignaddr((void *)0, 6); \
451 dd2 = vis_faligndata(sd2, dd2); /* r2b3g3r3 */ \
452 vis_alignaddr((void *)0, 2); \
453 dd1 = vis_faligndata(sd2, dd1); /* b2g2r2-- */ \
454 \
455 vis_alignaddr((void *)0, 4); \
456 dd1 = vis_faligndata(sd1, dd1); /* g1r1b2g2 */ \
457 vis_alignaddr((void *)0, 2); \
458 dd0 = vis_faligndata(sd1, dd0); /* b1g1r1-- */ \
459 dd0 = vis_faligndata(sd0, dd0); /* b0g0r0b1 */
460
461/***************************************************************/
462#define LOAD_EXTRACT_S16_43R_STORE \
463 \
464 sd0 = *sp++; /* --b0g0r0 */ \
465 sd1 = *sp++; /* --b1g1r1 */ \
466 sd2 = *sp++; /* --b2g2r2 */ \
467 sd3 = *sp++; /* --b3g3r3 */ \
468 \
469 EXTRACT_S16_43R; \
470 \
471 *dp++ = dd0; /* b0g0r0b1 */ \
472 *dp++ = dd1; /* g1r1b2g2 */ \
473 *dp++ = dd2; /* r2b3g3r3 */
474
475/***************************************************************/
476#define LOAD_EXTRACT_S16_43R \
477 \
478 vis_alignaddr((void *)soff, 0); \
479 s0 = s4; \
480 s1 = sp[1]; \
481 s2 = sp[2]; \
482 s3 = sp[3]; \
483 s4 = sp[4]; \
484 sd0 = vis_faligndata(s0, s1); \
485 sd1 = vis_faligndata(s1, s2); \
486 sd2 = vis_faligndata(s2, s3); \
487 sd3 = vis_faligndata(s3, s4); \
488 sp += 4; \
489 dd2old = dd2; \
490 EXTRACT_S16_43R
491
492/***************************************************************/
493/*
494 * Both source and destination image data are 1-d vectors and
495 * 8-byte aligned. And size is in 4-pixels.
496 */
497
498void mlib_v_ImageChannelExtract_S16_43R_A8D1X4(const mlib_s16 *src,
499 mlib_s16 *dst,
500 mlib_s32 dsize)
501{
502 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */
503 mlib_d64 sd0, sd1, sd2, sd3; /* source data */
504 mlib_d64 dd0, dd1, dd2; /* dst data */
505 mlib_s32 i;
506
507 sp = (mlib_d64 *) src;
508 dp = (mlib_d64 *) dst;
509
510 /* set GSR.offset for vis_faligndata() */
511/* vis_alignaddr((void *)0, 6); *//* only for _old */
512
513#pragma pipeloop(0)
514 for (i = 0; i < dsize / 4; i++) {
515 LOAD_EXTRACT_S16_43R_STORE;
516 }
517}
518
519/***************************************************************/
520/*
521 * Either source or destination image data are not 1-d vectors, but
522 * they are 8-byte aligned. The xsize is multiple of 8.
523 * slb and dlb are multiple of 8.
524 */
525
526void mlib_v_ImageChannelExtract_S16_43R_A8D2X4(const mlib_s16 *src,
527 mlib_s32 slb,
528 mlib_s16 *dst,
529 mlib_s32 dlb,
530 mlib_s32 xsize,
531 mlib_s32 ysize)
532{
533 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */
534 mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */
535 mlib_d64 sd0, sd1, sd2, sd3; /* source data */
536 mlib_d64 dd0, dd1, dd2; /* dst data */
537 mlib_s32 i, j; /* indices for x, y */
538
539 /* set GSR.offset for vis_faligndata() */
540/* vis_alignaddr((void *)0, 6); *//* only for _old */
541
542 sp = sl = (mlib_d64 *) src;
543 dp = dl = (mlib_d64 *) dst;
544
545 /* row loop */
546 for (j = 0; j < ysize; j++) {
547 /* 4-pixel column loop */
548#pragma pipeloop(0)
549 for (i = 0; i < xsize / 4; i++) {
550 LOAD_EXTRACT_S16_43R_STORE;
551 }
552
553 sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
554 dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
555 }
556}
557
558/***************************************************************/
559/*
560 * Either source or destination data are not 8-byte aligned.
561 * And dsize is multiple of 8.
562 */
563
564void mlib_v_ImageChannelExtract_S16_43R_D1(const mlib_s16 *src,
565 mlib_s16 *dst,
566 mlib_s32 dsize)
567{
568 mlib_s16 *sa, *da; /* pointer for pixel */
569 mlib_s16 *dend, *dend2; /* end points in dst */
570 mlib_d64 *dp; /* 8-byte aligned start points in dst */
571 mlib_d64 *sp; /* 8-byte aligned start point in src */
572 mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */
573 mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */
574 mlib_d64 dd0, dd1, dd2; /* dst data */
575 mlib_d64 dd2old; /* the last datum of the last step */
576 mlib_s32 soff; /* offset of address in src */
577 mlib_s32 doff; /* offset of address in dst */
578 mlib_s32 emask; /* edge mask */
579 mlib_s32 i, n;
580
581 sa = (void *)src;
582 da = dst;
583
584 /* prepare the source address */
585 sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
586 soff = ((mlib_addr) sa & 7);
587
588 /* prepare the destination addresses */
589 dp = (mlib_d64 *) ((mlib_addr) da & (~7));
590 dend = da + dsize * 3 - 1;
591 dend2 = dend - 11;
592 doff = 8 - ((mlib_addr) da & 7);
593
594 /* generate edge mask for the start point */
595 emask = vis_edge16(da, dend);
596
597 /* load 32 byte, convert, store 24 bytes */
598 s4 = sp[0]; /* initial value */
599 LOAD_EXTRACT_S16_43R;
600
601 if (dsize >= 4) {
602 if (doff == 8) {
603 vis_pst_16(dd0, dp++, emask);
604 *dp++ = dd1;
605 *dp++ = dd2;
606 }
607 else {
608 vis_alignaddr((void *)doff, 0);
609 vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask);
610 *dp++ = vis_faligndata(dd0, dd1);
611 *dp++ = vis_faligndata(dd1, dd2);
612 }
613 }
614 else { /* for very small size */
615 if (doff == 8) {
616 vis_pst_16(dd0, dp++, emask);
617 if ((mlib_addr) dp <= (mlib_addr) dend) {
618 emask = vis_edge16(dp, dend);
619 vis_pst_16(dd1, dp++, emask);
620 if ((mlib_addr) dp <= (mlib_addr) dend) {
621 emask = vis_edge16(dp, dend);
622 vis_pst_16(dd2, dp++, emask);
623 }
624 }
625 }
626 else {
627 vis_alignaddr((void *)doff, 0);
628 vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask);
629 if ((mlib_addr) dp <= (mlib_addr) dend) {
630 emask = vis_edge16(dp, dend);
631 vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
632 if ((mlib_addr) dp <= (mlib_addr) dend) {
633 emask = vis_edge16(dp, dend);
634 vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask);
635 }
636 }
637 }
638 }
639
640 /* no edge handling is needed in the loop */
641 if (doff == 8) {
642 if ((mlib_addr) dp <= (mlib_addr) dend2) {
643 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
644#pragma pipeloop(0)
645 for (i = 0; i < n; i++) {
646 LOAD_EXTRACT_S16_43R;
647 *dp++ = dd0;
648 *dp++ = dd1;
649 *dp++ = dd2;
650 }
651 }
652 }
653 else {
654 if ((mlib_addr) dp <= (mlib_addr) dend2) {
655 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
656#pragma pipeloop(0)
657 for (i = 0; i < n; i++) {
658 LOAD_EXTRACT_S16_43R;
659 vis_alignaddr((void *)doff, 0);
660 *dp++ = vis_faligndata(dd2old, dd0);
661 *dp++ = vis_faligndata(dd0, dd1);
662 *dp++ = vis_faligndata(dd1, dd2);
663 }
664 }
665 }
666
667 if ((mlib_addr) dp <= (mlib_addr) dend) {
668 LOAD_EXTRACT_S16_43R;
669 emask = vis_edge16(dp, dend);
670 if (doff == 8) {
671 vis_pst_16(dd0, dp++, emask);
672 if ((mlib_addr) dp <= (mlib_addr) dend) {
673 emask = vis_edge16(dp, dend);
674 vis_pst_16(dd1, dp++, emask);
675 if ((mlib_addr) dp <= (mlib_addr) dend) {
676 emask = vis_edge16(dp, dend);
677 vis_pst_16(dd2, dp++, emask);
678 }
679 }
680 }
681 else {
682 vis_alignaddr((void *)doff, 0);
683 vis_pst_16(vis_faligndata(dd2old, dd0), dp++, emask);
684 if ((mlib_addr) dp <= (mlib_addr) dend) {
685 emask = vis_edge16(dp, dend);
686 vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
687 if ((mlib_addr) dp <= (mlib_addr) dend) {
688 emask = vis_edge16(dp, dend);
689 vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask);
690 }
691 }
692 }
693 }
694}
695
696/***************************************************************/
697void mlib_v_ImageChannelExtract_S16_43R(const mlib_s16 *src,
698 mlib_s32 slb,
699 mlib_s16 *dst,
700 mlib_s32 dlb,
701 mlib_s32 xsize,
702 mlib_s32 ysize)
703{
704 mlib_s16 *sa, *da;
705 mlib_s16 *sl, *dl;
706 mlib_s32 j;
707
708 sa = sl = (void *)src;
709 da = dl = dst;
710
711 for (j = 0; j < ysize; j++) {
712 mlib_v_ImageChannelExtract_S16_43R_D1(sa, da, xsize);
713 sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
714 da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
715 }
716}
717
718/***************************************************************/
719#define EXTRACT_U8_43L_old /* shift left */ \
720 \
721 dd0 = vis_faligndata(dd0, sd0); /* --------------r0 */ \
722 sd0 = vis_faligndata(sd0, sd0); \
723 dd0 = vis_faligndata(dd0, sd0); /* ------------r0g0 */ \
724 sd0 = vis_faligndata(sd0, sd0); \
725 dd0 = vis_faligndata(dd0, sd0); /* ----------r0g0b0 */ \
726 sd0 = vis_faligndata(sd0, sd0); \
727 sd0 = vis_faligndata(sd0, sd0); \
728 dd0 = vis_faligndata(dd0, sd0); /* --------r0g0b0r1 */ \
729 sd0 = vis_faligndata(sd0, sd0); \
730 dd0 = vis_faligndata(dd0, sd0); /* ------r0g0b0r1g1 */ \
731 sd0 = vis_faligndata(sd0, sd0); \
732 dd0 = vis_faligndata(dd0, sd0); /* ----r0g0b0r1g1b1 */ \
733 \
734 dd0 = vis_faligndata(dd0, sd1); /* --r0g0b0r1g1b1r2 */ \
735 sd1 = vis_faligndata(sd1, sd1); \
736 dd0 = vis_faligndata(dd0, sd1); /* r0g0b0r1g1b1r2g2 */ \
737 \
738 sd1 = vis_faligndata(sd1, sd1); \
739 dd1 = vis_faligndata(dd1, sd1); /* --------------b2 */ \
740 sd1 = vis_faligndata(sd1, sd1); \
741 sd1 = vis_faligndata(sd1, sd1); \
742 dd1 = vis_faligndata(dd1, sd1); /* ------------b2r3 */ \
743 sd1 = vis_faligndata(sd1, sd1); \
744 dd1 = vis_faligndata(dd1, sd1); /* ----------b2r3g3 */ \
745 sd1 = vis_faligndata(sd1, sd1); \
746 dd1 = vis_faligndata(dd1, sd1); /* --------b2r3g3b3 */ \
747 \
748 dd1 = vis_faligndata(dd1, sd2); /* ------b2r3g3b3r4 */ \
749 sd2 = vis_faligndata(sd2, sd2); \
750 dd1 = vis_faligndata(dd1, sd2); /* ----b2r3g3b3r4g4 */ \
751 sd2 = vis_faligndata(sd2, sd2); \
752 dd1 = vis_faligndata(dd1, sd2); /* --b2r3g3b3r4g4b4 */ \
753 sd2 = vis_faligndata(sd2, sd2); \
754 sd2 = vis_faligndata(sd2, sd2); \
755 dd1 = vis_faligndata(dd1, sd2); /* b2r3g3b3r4g4b4r5 */ \
756 \
757 sd2 = vis_faligndata(sd2, sd2); \
758 dd2 = vis_faligndata(dd2, sd2); /* --------------g5 */ \
759 sd2 = vis_faligndata(sd2, sd2); \
760 dd2 = vis_faligndata(dd2, sd2); /* ------------g5b5 */ \
761 \
762 dd2 = vis_faligndata(dd2, sd3); /* ----------g5b5r6 */ \
763 sd3 = vis_faligndata(sd3, sd3); \
764 dd2 = vis_faligndata(dd2, sd3); /* --------g5b5r6g6 */ \
765 sd3 = vis_faligndata(sd3, sd3); \
766 dd2 = vis_faligndata(dd2, sd3); /* ------g5b5r6g6b6 */ \
767 sd3 = vis_faligndata(sd3, sd3); \
768 sd3 = vis_faligndata(sd3, sd3); \
769 dd2 = vis_faligndata(dd2, sd3); /* ----g5b5r6g6b6r7 */ \
770 sd3 = vis_faligndata(sd3, sd3); \
771 dd2 = vis_faligndata(dd2, sd3); /* --g5b5r6g6b6r7g7 */ \
772 sd3 = vis_faligndata(sd3, sd3); \
773 dd2 = vis_faligndata(dd2, sd3); /* g5b5r6g6b6r7g7b7 */
774
775/***************************************************************/
776#define EXTRACT_U8_43L /* shift left */ \
777 \
778 vis_alignaddr((void *)0, 3); \
779 dd0 = vis_faligndata(dd0, sd0); /* ----------r0g0b0 */ \
780 sda = vis_freg_pair(vis_read_lo(sd0), vis_read_hi(sd0)); \
781 dd0 = vis_faligndata(dd0, sda); /* ----r0g0b0r1g1b1 */ \
782 \
783 vis_alignaddr((void *)0, 2); \
784 dd0 = vis_faligndata(dd0, sd1); /* r0g0b0r1g1b1r2g2 */ \
785 \
786 vis_alignaddr((void *)0, 3); \
787 dd1 = vis_faligndata(dd1, sd1); /* ----------r2g2b2 */ \
788 sda = vis_freg_pair(vis_read_lo(sd1), vis_read_hi(sd1)); \
789 dd1 = vis_faligndata(dd1, sda); /* ----r2g2b2r3g3b3 */ \
790 dd1 = vis_faligndata(dd1, sd2); /* g2b2r3g3b3r4g4b4 */ \
791 \
792 sda = vis_freg_pair(vis_read_lo(sd2), vis_read_hi(sd2)); \
793 vis_alignaddr((void *)0, 1); \
794 dd1 = vis_faligndata(dd1, sda); /* b2r3g3b3r4g4b4r5 */ \
795 \
796 vis_alignaddr((void *)0, 3); \
797 dd2 = vis_faligndata(dd2, sda); /* ----------r5g5b5 */ \
798 \
799 dd2 = vis_faligndata(dd2, sd3); /* ----r5g5b5r6g6b6 */ \
800 sda = vis_freg_pair(vis_read_lo(sd3), vis_read_hi(sd3)); \
801 dd2 = vis_faligndata(dd2, sda); /* g5b5r6g6b6r7g7b7 */
802
803/***************************************************************/
804#define LOAD_EXTRACT_U8_43L_STORE \
805 \
806 sd0 = *sp++; /* r0g0b0--r1g1b1-- */ \
807 sd1 = *sp++; /* r2g2b2--r3g3b3-- */ \
808 sd2 = *sp++; /* r4g4b4--r5g5b5-- */ \
809 sd3 = *sp++; /* r6g6b6--r7g7b7-- */ \
810 \
811 EXTRACT_U8_43L; \
812 \
813 *dp++ = dd0; /* r0g0b0r1g1b1r2g2 */ \
814 *dp++ = dd1; /* b2r3g3b3r4g4b4r5 */ \
815 *dp++ = dd2; /* g5b5r6g6b6r7g7b7 */
816
817/***************************************************************/
818#define LOAD_EXTRACT_U8_43L \
819 \
820 vis_alignaddr((void *)soff, 0); \
821 s0 = s4; \
822 s1 = sp[1]; \
823 s2 = sp[2]; \
824 s3 = sp[3]; \
825 s4 = sp[4]; \
826 sd0 = vis_faligndata(s0, s1); /* the intermediate is ABGR aligned */ \
827 sd1 = vis_faligndata(s1, s2); \
828 sd2 = vis_faligndata(s2, s3); \
829 sd3 = vis_faligndata(s3, s4); \
830 sp += 4; \
831 \
832/* vis_alignaddr((void *)0, 1); */ /* for _old only */ \
833 dd2old = dd2; \
834 EXTRACT_U8_43L
835
836/***************************************************************/
837/*
838 * Both source and destination image data are 1-d vectors and
839 * 8-byte aligned. And dsize is multiple of 8.
840 */
841
842void mlib_v_ImageChannelExtract_U8_43L_A8D1X8(const mlib_u8 *src,
843 mlib_u8 *dst,
844 mlib_s32 dsize)
845{
846 mlib_d64 *sp, *dp;
847 mlib_d64 sd0, sd1, sd2, sd3; /* source data */
848 mlib_d64 dd0, dd1, dd2; /* dst data */
849 mlib_d64 sda;
850 mlib_s32 i;
851
852 sp = (mlib_d64 *) src;
853 dp = (mlib_d64 *) dst;
854
855 /* set GSR.offset for vis_faligndata() */
856/* vis_alignaddr((void *)0, 1); *//* for _old only */
857
858#pragma pipeloop(0)
859 for (i = 0; i < dsize / 8; i++) {
860 LOAD_EXTRACT_U8_43L_STORE;
861 }
862}
863
864/***************************************************************/
865/*
866 * Either source or destination image data are not 1-d vectors, but
867 * they are 8-byte aligned. And slb and dlb are multiple of 8.
868 * The xsize is multiple of 8.
869 */
870
871void mlib_v_ImageChannelExtract_U8_43L_A8D2X8(const mlib_u8 *src,
872 mlib_s32 slb,
873 mlib_u8 *dst,
874 mlib_s32 dlb,
875 mlib_s32 xsize,
876 mlib_s32 ysize)
877{
878 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */
879 mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */
880 mlib_d64 sd0, sd1, sd2, sd3; /* source data */
881 mlib_d64 dd0, dd1, dd2; /* dst data */
882 mlib_d64 sda;
883 mlib_s32 i, j; /* indices for x, y */
884
885 /* set GSR.offset for vis_faligndata() */
886/* vis_alignaddr((void *)0, 1); *//* for _old only */
887
888 sp = sl = (mlib_d64 *) src;
889 dp = dl = (mlib_d64 *) dst;
890
891 /* row loop */
892 for (j = 0; j < ysize; j++) {
893 /* 8-byte column loop */
894#pragma pipeloop(0)
895 for (i = 0; i < xsize / 8; i++) {
896 LOAD_EXTRACT_U8_43L_STORE;
897 }
898
899 sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
900 dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
901 }
902}
903
904/***************************************************************/
905/*
906 * Either source or destination data are not 8-byte aligned.
907 * And ssize is multiple of 8.
908 */
909
910void mlib_v_ImageChannelExtract_U8_43L_D1(const mlib_u8 *src,
911 mlib_u8 *dst,
912 mlib_s32 dsize)
913{
914 mlib_u8 *sa, *da;
915 mlib_u8 *dend, *dend2; /* end points in dst */
916 mlib_d64 *dp; /* 8-byte aligned start points in dst */
917 mlib_d64 *sp; /* 8-byte aligned start point in src */
918 mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */
919 mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */
920 mlib_d64 dd0, dd1, dd2; /* dst data */
921 mlib_d64 dd2old; /* the last datum of the last step */
922 mlib_d64 sda;
923 mlib_s32 soff; /* offset of address in src */
924 mlib_s32 doff; /* offset of address in dst */
925 mlib_s32 emask; /* edge mask */
926 mlib_s32 i, n;
927
928 sa = (void *)src;
929 da = dst;
930
931 /* prepare the source address */
932 sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
933 soff = ((mlib_addr) sa & 7);
934
935 /* prepare the destination addresses */
936 dp = (mlib_d64 *) ((mlib_addr) da & (~7));
937 dend = da + dsize * 3 - 1;
938 dend2 = dend - 23;
939 doff = 8 - ((mlib_addr) da & 7);
940
941 /* generate edge mask for the start point */
942 emask = vis_edge8(da, dend);
943
944 /* load 32 byte, convert, store 24 bytes */
945 s4 = sp[0]; /* initial value */
946 LOAD_EXTRACT_U8_43L;
947
948 if (dsize >= 8) {
949 if (doff == 8) {
950 vis_pst_8(dd0, dp++, emask);
951 *dp++ = dd1;
952 *dp++ = dd2;
953 }
954 else {
955 vis_alignaddr((void *)doff, 0);
956 vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask);
957 *dp++ = vis_faligndata(dd0, dd1);
958 *dp++ = vis_faligndata(dd1, dd2);
959 }
960 }
961 else { /* for very small size */
962 if (doff == 8) {
963 vis_pst_8(dd0, dp++, emask);
964 if ((mlib_addr) dp <= (mlib_addr) dend) {
965 emask = vis_edge8(dp, dend);
966 vis_pst_8(dd1, dp++, emask);
967 if ((mlib_addr) dp <= (mlib_addr) dend) {
968 emask = vis_edge8(dp, dend);
969 vis_pst_8(dd2, dp++, emask);
970 }
971 }
972 }
973 else {
974 vis_alignaddr((void *)doff, 0);
975 vis_pst_8(vis_faligndata(dd0, dd0), dp++, emask);
976 if ((mlib_addr) dp <= (mlib_addr) dend) {
977 emask = vis_edge8(dp, dend);
978 vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask);
979 if ((mlib_addr) dp <= (mlib_addr) dend) {
980 emask = vis_edge8(dp, dend);
981 vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask);
982 if ((mlib_addr) dp <= (mlib_addr) dend) {
983 emask = vis_edge8(dp, dend);
984 vis_pst_8(vis_faligndata(dd2, dd2), dp++, emask);
985 }
986 }
987 }
988 }
989 }
990
991 /* no edge handling is needed in the loop */
992 if (doff == 8) {
993 if ((mlib_addr) dp <= (mlib_addr) dend2) {
994 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
995#pragma pipeloop(0)
996 for (i = 0; i < n; i++) {
997 LOAD_EXTRACT_U8_43L;
998 *dp++ = dd0;
999 *dp++ = dd1;
1000 *dp++ = dd2;
1001 }
1002 }
1003 }
1004 else {
1005 if ((mlib_addr) dp <= (mlib_addr) dend2) {
1006 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
1007#pragma pipeloop(0)
1008 for (i = 0; i < n; i++) {
1009 LOAD_EXTRACT_U8_43L;
1010 vis_alignaddr((void *)doff, 0);
1011 *dp++ = vis_faligndata(dd2old, dd0);
1012 *dp++ = vis_faligndata(dd0, dd1);
1013 *dp++ = vis_faligndata(dd1, dd2);
1014 }
1015 }
1016 }
1017
1018 if ((mlib_addr) dp <= (mlib_addr) dend) {
1019 LOAD_EXTRACT_U8_43L;
1020 emask = vis_edge8(dp, dend);
1021 if (doff == 8) {
1022 vis_pst_8(dd0, dp++, emask);
1023 if ((mlib_addr) dp <= (mlib_addr) dend) {
1024 emask = vis_edge8(dp, dend);
1025 vis_pst_8(dd1, dp++, emask);
1026 if ((mlib_addr) dp <= (mlib_addr) dend) {
1027 emask = vis_edge8(dp, dend);
1028 vis_pst_8(dd2, dp++, emask);
1029 }
1030 }
1031 }
1032 else {
1033 vis_alignaddr((void *)doff, 0);
1034 vis_pst_8(vis_faligndata(dd2old, dd0), dp++, emask);
1035 if ((mlib_addr) dp <= (mlib_addr) dend) {
1036 emask = vis_edge8(dp, dend);
1037 vis_pst_8(vis_faligndata(dd0, dd1), dp++, emask);
1038 if ((mlib_addr) dp <= (mlib_addr) dend) {
1039 emask = vis_edge8(dp, dend);
1040 vis_pst_8(vis_faligndata(dd1, dd2), dp++, emask);
1041 }
1042 }
1043 }
1044 }
1045}
1046
1047/***************************************************************/
1048void mlib_v_ImageChannelExtract_U8_43L(const mlib_u8 *src,
1049 mlib_s32 slb,
1050 mlib_u8 *dst,
1051 mlib_s32 dlb,
1052 mlib_s32 xsize,
1053 mlib_s32 ysize)
1054{
1055 mlib_u8 *sa, *da;
1056 mlib_u8 *sl, *dl;
1057 mlib_s32 j;
1058
1059 sa = sl = (void *)src;
1060 da = dl = dst;
1061
1062 for (j = 0; j < ysize; j++) {
1063 mlib_v_ImageChannelExtract_U8_43L_D1(sa, da, xsize);
1064 sa = sl += slb;
1065 da = dl += dlb;
1066 }
1067}
1068
1069/***************************************************************/
1070#define EXTRACT_S16_43L /* shift left */ \
1071 vis_alignaddr((void *)0, 6); \
1072 dd0 = vis_faligndata(dd0, sd0); /* --r0g0b0 */ \
1073 vis_alignaddr((void *)0, 2); \
1074 dd0 = vis_faligndata(dd0, sd1); /* r0g0b0r1 */ \
1075 \
1076 vis_alignaddr((void *)0, 6); \
1077 dd1 = vis_faligndata(dd1, sd1); /* --r1g1b1 */ \
1078 vis_alignaddr((void *)0, 4); \
1079 dd1 = vis_faligndata(dd1, sd2); /* g1b1r2g2 */ \
1080 \
1081 vis_alignaddr((void *)0, 6); \
1082 dd2 = vis_faligndata(dd2, sd2); /* --r2g2b2 */ \
1083 dd2 = vis_faligndata(dd2, sd3); /* b2r3g3b3 */
1084
1085/***************************************************************/
1086#define LOAD_EXTRACT_S16_43L_STORE \
1087 \
1088 sd0 = *sp++; /* r0g0b0-- */ \
1089 sd1 = *sp++; /* r1g1b1-- */ \
1090 sd2 = *sp++; /* r2g2b2-- */ \
1091 sd3 = *sp++; /* r3g3b3-- */ \
1092 \
1093 EXTRACT_S16_43L; \
1094 \
1095 *dp++ = dd0; /* r0g0b0r1 */ \
1096 *dp++ = dd1; /* g1b1r2g2 */ \
1097 *dp++ = dd2; /* b2r3g3b3 */
1098
1099/***************************************************************/
1100#define LOAD_EXTRACT_S16_43L \
1101 \
1102 vis_alignaddr((void *)soff, 0); \
1103 s0 = s4; \
1104 s1 = sp[1]; \
1105 s2 = sp[2]; \
1106 s3 = sp[3]; \
1107 s4 = sp[4]; \
1108 sd0 = vis_faligndata(s0, s1); \
1109 sd1 = vis_faligndata(s1, s2); \
1110 sd2 = vis_faligndata(s2, s3); \
1111 sd3 = vis_faligndata(s3, s4); \
1112 sp += 4; \
1113 dd2old = dd2; \
1114 EXTRACT_S16_43L
1115
1116/***************************************************************/
1117/*
1118 * Both source and destination image data are 1-d vectors and
1119 * 8-byte aligned. And dsize is multiple of 4.
1120 */
1121
1122void mlib_v_ImageChannelExtract_S16_43L_A8D1X4(const mlib_s16 *src,
1123 mlib_s16 *dst,
1124 mlib_s32 dsize)
1125{
1126 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */
1127 mlib_d64 sd0, sd1, sd2, sd3; /* source data */
1128 mlib_d64 dd0, dd1, dd2; /* dst data */
1129 mlib_s32 i;
1130
1131 sp = (mlib_d64 *) src;
1132 dp = (mlib_d64 *) dst;
1133
1134 /* set GSR.offset for vis_faligndata() */
1135/* vis_alignaddr((void *)0, 2); *//* only for _old */
1136
1137#pragma pipeloop(0)
1138 for (i = 0; i < dsize / 4; i++) {
1139 LOAD_EXTRACT_S16_43L_STORE;
1140 }
1141}
1142
1143/***************************************************************/
1144/*
1145 * Either source or destination image data are not 1-d vectors, but
1146 * they are 8-byte aligned. The xsize is multiple of 4.
1147 * And slb and dlb are multiple of 8.
1148 */
1149
1150void mlib_v_ImageChannelExtract_S16_43L_A8D2X4(const mlib_s16 *src,
1151 mlib_s32 slb,
1152 mlib_s16 *dst,
1153 mlib_s32 dlb,
1154 mlib_s32 xsize,
1155 mlib_s32 ysize)
1156{
1157 mlib_d64 *sp, *dp; /* 8-byte aligned pointer for pixel */
1158 mlib_d64 *sl, *dl; /* 8-byte aligned pointer for line */
1159 mlib_d64 sd0, sd1, sd2, sd3; /* source data */
1160 mlib_d64 dd0, dd1, dd2; /* dst data */
1161 mlib_s32 i, j; /* indices for x, y */
1162
1163 /* set GSR.offset for vis_faligndata() */
1164/* vis_alignaddr((void *)0, 2); *//* only for _old */
1165
1166 sp = sl = (mlib_d64 *) src;
1167 dp = dl = (mlib_d64 *) dst;
1168
1169 /* row loop */
1170 for (j = 0; j < ysize; j++) {
1171 /* 4-pixel column loop */
1172#pragma pipeloop(0)
1173 for (i = 0; i < xsize / 4; i++) {
1174 LOAD_EXTRACT_S16_43L_STORE;
1175 }
1176
1177 sp = sl = (mlib_d64 *) ((mlib_u8 *) sl + slb);
1178 dp = dl = (mlib_d64 *) ((mlib_u8 *) dl + dlb);
1179 }
1180}
1181
1182/***************************************************************/
1183/*
1184 * Either source or destination data are not 8-byte aligned.
1185 * And size is is in pixels.
1186 */
1187
1188void mlib_v_ImageChannelExtract_S16_43L_D1(const mlib_s16 *src,
1189 mlib_s16 *dst,
1190 mlib_s32 dsize)
1191{
1192 mlib_s16 *sa, *da; /* pointer for pixel */
1193 mlib_s16 *dend, *dend2; /* end points in dst */
1194 mlib_d64 *dp; /* 8-byte aligned start points in dst */
1195 mlib_d64 *sp; /* 8-byte aligned start point in src */
1196 mlib_d64 s0, s1, s2, s3, s4; /* 8-byte source row data */
1197 mlib_d64 sd0, sd1, sd2, sd3; /* 8-byte source data */
1198 mlib_d64 dd0, dd1, dd2; /* dst data */
1199 mlib_d64 dd2old; /* the last datum of the last step */
1200 mlib_s32 soff; /* offset of address in src */
1201 mlib_s32 doff; /* offset of address in dst */
1202 mlib_s32 emask; /* edge mask */
1203 mlib_s32 i, n;
1204
1205 sa = (void *)src;
1206 da = dst;
1207
1208 /* prepare the source address */
1209 sp = (mlib_d64 *) ((mlib_addr) sa & (~7));
1210 soff = ((mlib_addr) sa & 7);
1211
1212 /* prepare the destination addresses */
1213 dp = (mlib_d64 *) ((mlib_addr) da & (~7));
1214 dend = da + dsize * 3 - 1;
1215 dend2 = dend - 11;
1216 doff = 8 - ((mlib_addr) da & 7);
1217
1218 /* generate edge mask for the start point */
1219 emask = vis_edge16(da, dend);
1220
1221 /* load 32 byte, convert, store 24 bytes */
1222 s4 = sp[0]; /* initial value */
1223 LOAD_EXTRACT_S16_43L;
1224
1225 if (dsize >= 4) {
1226 if (doff == 8) {
1227 vis_pst_16(dd0, dp++, emask);
1228 *dp++ = dd1;
1229 *dp++ = dd2;
1230 }
1231 else {
1232 vis_alignaddr((void *)doff, 0);
1233 vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask);
1234 *dp++ = vis_faligndata(dd0, dd1);
1235 *dp++ = vis_faligndata(dd1, dd2);
1236 }
1237 }
1238 else { /* for very small size */
1239 if (doff == 8) {
1240 vis_pst_16(dd0, dp++, emask);
1241 if ((mlib_addr) dp <= (mlib_addr) dend) {
1242 emask = vis_edge16(dp, dend);
1243 vis_pst_16(dd1, dp++, emask);
1244 if ((mlib_addr) dp <= (mlib_addr) dend) {
1245 emask = vis_edge16(dp, dend);
1246 vis_pst_16(dd2, dp++, emask);
1247 }
1248 }
1249 }
1250 else {
1251 vis_alignaddr((void *)doff, 0);
1252 vis_pst_16(vis_faligndata(dd0, dd0), dp++, emask);
1253 if ((mlib_addr) dp <= (mlib_addr) dend) {
1254 emask = vis_edge16(dp, dend);
1255 vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
1256 if ((mlib_addr) dp <= (mlib_addr) dend) {
1257 emask = vis_edge16(dp, dend);
1258 vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask);
1259 }
1260 }
1261 }
1262 }
1263
1264 /* no edge handling is needed in the loop */
1265 if (doff == 8) {
1266 if ((mlib_addr) dp <= (mlib_addr) dend2) {
1267 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
1268#pragma pipeloop(0)
1269 for (i = 0; i < n; i++) {
1270 LOAD_EXTRACT_S16_43L;
1271 *dp++ = dd0;
1272 *dp++ = dd1;
1273 *dp++ = dd2;
1274 }
1275 }
1276 }
1277 else {
1278 if ((mlib_addr) dp <= (mlib_addr) dend2) {
1279 n = ((mlib_u8 *) dend2 - (mlib_u8 *) dp) / 24 + 1;
1280#pragma pipeloop(0)
1281 for (i = 0; i < n; i++) {
1282 LOAD_EXTRACT_S16_43L;
1283 vis_alignaddr((void *)doff, 0);
1284 *dp++ = vis_faligndata(dd2old, dd0);
1285 *dp++ = vis_faligndata(dd0, dd1);
1286 *dp++ = vis_faligndata(dd1, dd2);
1287 }
1288 }
1289 }
1290
1291 if ((mlib_addr) dp <= (mlib_addr) dend) {
1292 LOAD_EXTRACT_S16_43L;
1293 emask = vis_edge16(dp, dend);
1294 if (doff == 8) {
1295 vis_pst_16(dd0, dp++, emask);
1296 if ((mlib_addr) dp <= (mlib_addr) dend) {
1297 emask = vis_edge16(dp, dend);
1298 vis_pst_16(dd1, dp++, emask);
1299 if ((mlib_addr) dp <= (mlib_addr) dend) {
1300 emask = vis_edge16(dp, dend);
1301 vis_pst_16(dd2, dp++, emask);
1302 }
1303 }
1304 }
1305 else {
1306 vis_alignaddr((void *)doff, 0);
1307 vis_pst_16(vis_faligndata(dd2old, dd0), dp++, emask);
1308 if ((mlib_addr) dp <= (mlib_addr) dend) {
1309 emask = vis_edge16(dp, dend);
1310 vis_pst_16(vis_faligndata(dd0, dd1), dp++, emask);
1311 if ((mlib_addr) dp <= (mlib_addr) dend) {
1312 emask = vis_edge16(dp, dend);
1313 vis_pst_16(vis_faligndata(dd1, dd2), dp++, emask);
1314 }
1315 }
1316 }
1317 }
1318}
1319
1320/***************************************************************/
1321void mlib_v_ImageChannelExtract_S16_43L(const mlib_s16 *src,
1322 mlib_s32 slb,
1323 mlib_s16 *dst,
1324 mlib_s32 dlb,
1325 mlib_s32 xsize,
1326 mlib_s32 ysize)
1327{
1328 mlib_s16 *sa, *da;
1329 mlib_s16 *sl, *dl;
1330 mlib_s32 j;
1331
1332 sa = sl = (void *)src;
1333 da = dl = dst;
1334
1335 for (j = 0; j < ysize; j++) {
1336 mlib_v_ImageChannelExtract_S16_43L_D1(sa, da, xsize);
1337 sa = sl = (mlib_s16 *) ((mlib_u8 *) sl + slb);
1338 da = dl = (mlib_s16 *) ((mlib_u8 *) dl + dlb);
1339 }
1340}
1341
1342/***************************************************************/