blob: 9d7f6e66c0104744c7cc14d42b546b7f3928ec09 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1998-2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26
27
28/*
29 * FUNCTIONS
30 * mlib_ImageConvCopyEdge - Copy src edges to dst edges
31 *
32 *
33 * SYNOPSIS
34 * mlib_status mlib_ImageConvCopyEdge(mlib_image *dst,
35 * const mlib_image *src,
36 * mlib_s32 dx_l,
37 * mlib_s32 dx_r,
38 * mlib_s32 dy_t,
39 * mlib_s32 dy_b,
40 * mlib_s32 cmask)
41 *
42 * ARGUMENT
43 * dst Pointer to an dst image.
44 * src Pointer to an src image.
45 * dx_l Number of columns on the left side of the
46 * image to be copyed.
47 * dx_r Number of columns on the right side of the
48 * image to be copyed.
49 * dy_t Number of rows on the top edge of the
50 * image to be copyed.
51 * dy_b Number of rows on the top edge of the
52 * image to be copyed.
53 * cmask Channel mask to indicate the channels to be convolved.
54 * Each bit of which represents a channel in the image. The
55 * channels corresponded to 1 bits are those to be processed.
56 *
57 * RESTRICTION
58 * The src and the dst must be the same type, same width, same height and have same number
59 * of channels (1, 2, 3, or 4). The unselected channels are not
60 * overwritten. If both src and dst have just one channel,
61 * cmask is ignored.
62 *
63 * DESCRIPTION
64 * Copy src edges to dst edges.
65
66 * The unselected channels are not overwritten.
67 * If src and dst have just one channel,
68 * cmask is ignored.
69 */
70
71#include "vis_proto.h"
72#include "mlib_image.h"
73#include "mlib_ImageConvEdge.h"
74
75/***************************************************************/
76static void mlib_ImageConvCopyEdge_U8(mlib_image *dst,
77 const mlib_image *src,
78 mlib_s32 dx_l,
79 mlib_s32 dx_r,
80 mlib_s32 dy_t,
81 mlib_s32 dy_b,
82 mlib_s32 cmask,
83 mlib_s32 nchan);
84
85static void mlib_ImageConvCopyEdge_U8_3(mlib_image *dst,
86 const mlib_image *src,
87 mlib_s32 dx_l,
88 mlib_s32 dx_r,
89 mlib_s32 dy_t,
90 mlib_s32 dy_b,
91 mlib_s32 cmask);
92
93static void mlib_ImageConvCopyEdge_S16(mlib_image *dst,
94 const mlib_image *src,
95 mlib_s32 dx_l,
96 mlib_s32 dx_r,
97 mlib_s32 dy_t,
98 mlib_s32 dy_b,
99 mlib_s32 cmask,
100 mlib_s32 nchan);
101
102static void mlib_ImageConvCopyEdge_S16_3(mlib_image *dst,
103 const mlib_image *src,
104 mlib_s32 dx_l,
105 mlib_s32 dx_r,
106 mlib_s32 dy_t,
107 mlib_s32 dy_b,
108 mlib_s32 cmask);
109
110static void mlib_ImageConvCopyEdge_S32(mlib_image *dst,
111 const mlib_image *src,
112 mlib_s32 dx_l,
113 mlib_s32 dx_r,
114 mlib_s32 dy_t,
115 mlib_s32 dy_b,
116 mlib_s32 cmask,
117 mlib_s32 nchan);
118
119static void mlib_ImageConvCopyEdge_S32_3(mlib_image *dst,
120 const mlib_image *src,
121 mlib_s32 dx_l,
122 mlib_s32 dx_r,
123 mlib_s32 dy_t,
124 mlib_s32 dy_b,
125 mlib_s32 cmask);
126
127static void mlib_ImageConvCopyEdge_S32_4(mlib_image *dst,
128 const mlib_image *src,
129 mlib_s32 dx_l,
130 mlib_s32 dx_r,
131 mlib_s32 dy_t,
132 mlib_s32 dy_b,
133 mlib_s32 cmask);
134
135/***************************************************************/
136#define VERT_EDGES(chan, type, mask) \
137 type *pdst = (type *) mlib_ImageGetData(dst); \
138 type *psrc = (type *) mlib_ImageGetData(src); \
139 type *pdst_row, *psrc_row, *pdst_row_end; \
140 mlib_s32 img_height = mlib_ImageGetHeight(dst); \
141 mlib_s32 img_width = mlib_ImageGetWidth(dst); \
142 mlib_s32 dst_stride = mlib_ImageGetStride(dst) / sizeof(type); \
143 mlib_s32 src_stride = mlib_ImageGetStride(src) / sizeof(type); \
144 mlib_s32 i, j, l; \
145 mlib_s32 emask, testchan; \
146 mlib_s32 img_width_t, img_width_b; \
147 mlib_d64 *dpdst, *dpsrc, data0, data1; \
148 \
149 testchan = 1; \
150 for (l = chan - 1; l >= 0; l--) { \
151 if ((mask & testchan) == 0) { \
152 testchan <<= 1; \
153 continue; \
154 } \
155 testchan <<= 1; \
156 for (j = 0; j < dx_l; j++) { \
157 for (i = dy_t; i < (img_height - dy_b); i++) { \
158 pdst[i*dst_stride + l + j*chan] = \
159 psrc[i*src_stride + l + j*chan]; \
160 } \
161 } \
162 for (j = 0; j < dx_r; j++) { \
163 for (i = dy_t; i < (img_height - dy_b); i++) { \
164 pdst[i*dst_stride + l+(img_width-1 - j)*chan] = \
165 psrc[i*src_stride + l+(img_width-1 - j)*chan]; \
166 } \
167 } \
168 } \
169 img_width_t = img_width; \
170 img_width_b = img_width; \
171 if (((img_width * chan) == dst_stride) && \
172 ((img_width * chan) == src_stride)) { \
173 img_width_t *= dy_t; \
174 img_width_b *= dy_b; \
175 dst_stride *= (img_height - dy_b); \
176 src_stride *= (img_height - dy_b); \
177 img_height = 2; \
178 dy_t = ((dy_t == 0) ? 0 : 1); \
179 dy_b = ((dy_b == 0) ? 0 : 1); \
180 }
181
182/***************************************************************/
183#define HORIZ_EDGES(chan, type, mask) { \
184 testchan = 1; \
185 for (l = chan - 1; l >= 0; l--) { \
186 if ((mask & testchan) == 0) { \
187 testchan <<= 1; \
188 continue; \
189 } \
190 testchan <<= 1; \
191 for (i = 0; i < dy_t; i++) { \
192 for (j = 0; j < img_width_t; j++) { \
193 pdst[i*dst_stride + l + j*chan] = \
194 psrc[i*src_stride + l + j*chan]; \
195 } \
196 } \
197 for (i = 0; i < dy_b; i++) { \
198 for (j = 0; j < img_width_b; j++) { \
199 pdst[(img_height-1 - i)*dst_stride + l + j*chan] = \
200 psrc[(img_height-1 - i)*src_stride + l + j*chan]; \
201 } \
202 } \
203 } \
204 return; \
205 }
206
207/***************************************************************/
208mlib_status mlib_ImageConvCopyEdge(mlib_image *dst,
209 const mlib_image *src,
210 mlib_s32 dx_l,
211 mlib_s32 dx_r,
212 mlib_s32 dy_t,
213 mlib_s32 dy_b,
214 mlib_s32 cmask)
215{
216 mlib_s32 img_width = mlib_ImageGetWidth(dst);
217 mlib_s32 img_height = mlib_ImageGetHeight(dst);
218
219 if (dx_l + dx_r > img_width) {
220 dx_l = img_width;
221 dx_r = 0;
222 }
223
224 if (dy_t + dy_b > img_height) {
225 dy_t = img_height;
226 dy_b = 0;
227 }
228
229 switch (mlib_ImageGetType(dst)) {
230 case MLIB_BIT:
231 return mlib_ImageConvCopyEdge_Bit(dst, src, dx_l, dx_r, dy_t, dy_b, cmask);
232
233 case MLIB_BYTE:
234 switch (mlib_ImageGetChannels(dst)) {
235
236 case 1:
237 mlib_ImageConvCopyEdge_U8(dst, src, dx_l, dx_r, dy_t, dy_b, 1, 1);
238 break;
239
240 case 2:
241 mlib_ImageConvCopyEdge_U8(dst, src, dx_l, dx_r, dy_t, dy_b, cmask, 2);
242 break;
243
244 case 3:
245 mlib_ImageConvCopyEdge_U8_3(dst, src, dx_l, dx_r, dy_t, dy_b, cmask);
246 break;
247
248 case 4:
249 mlib_ImageConvCopyEdge_U8(dst, src, dx_l, dx_r, dy_t, dy_b, cmask, 4);
250 break;
251
252 default:
253 return MLIB_FAILURE;
254 }
255
256 break;
257
258 case MLIB_SHORT:
259 case MLIB_USHORT:
260 switch (mlib_ImageGetChannels(dst)) {
261
262 case 1:
263 mlib_ImageConvCopyEdge_S16(dst, src, dx_l, dx_r, dy_t, dy_b, 1, 1);
264 break;
265
266 case 2:
267 mlib_ImageConvCopyEdge_S16(dst, src, dx_l, dx_r, dy_t, dy_b, cmask, 2);
268 break;
269
270 case 3:
271 mlib_ImageConvCopyEdge_S16_3(dst, src, dx_l, dx_r, dy_t, dy_b, cmask);
272 break;
273
274 case 4:
275 mlib_ImageConvCopyEdge_S16(dst, src, dx_l, dx_r, dy_t, dy_b, cmask, 4);
276 break;
277
278 default:
279 return MLIB_FAILURE;
280 }
281
282 break;
283
284 case MLIB_INT:
285 case MLIB_FLOAT:
286 switch (mlib_ImageGetChannels(dst)) {
287
288 case 1:
289 mlib_ImageConvCopyEdge_S32(dst, src, dx_l, dx_r, dy_t, dy_b, 1, 1);
290 break;
291
292 case 2:
293 mlib_ImageConvCopyEdge_S32(dst, src, dx_l, dx_r, dy_t, dy_b, cmask, 2);
294 break;
295
296 case 3:
297 mlib_ImageConvCopyEdge_S32_3(dst, src, dx_l, dx_r, dy_t, dy_b, cmask);
298 break;
299
300 case 4:
301 mlib_ImageConvCopyEdge_S32_4(dst, src, dx_l, dx_r, dy_t, dy_b, cmask);
302 break;
303
304 default:
305 return MLIB_FAILURE;
306 }
307
308 break;
309
310 case MLIB_DOUBLE:
311 return mlib_ImageConvCopyEdge_Fp(dst, src, dx_l, dx_r, dy_t, dy_b, cmask);
312
313 default:
314 return MLIB_FAILURE;
315 }
316
317 return MLIB_SUCCESS;
318}
319
320/***************************************************************/
321void mlib_ImageConvCopyEdge_U8(mlib_image *dst,
322 const mlib_image *src,
323 mlib_s32 dx_l,
324 mlib_s32 dx_r,
325 mlib_s32 dy_t,
326 mlib_s32 dy_b,
327 mlib_s32 cmask,
328 mlib_s32 nchan)
329{
330 mlib_s32 tmask = cmask & ((1 << nchan) - 1), mask1, offset;
331 VERT_EDGES(nchan, mlib_u8, cmask);
332
333 if (img_width < 16 / nchan)
334 HORIZ_EDGES(nchan, mlib_u8, cmask);
335
336 if (nchan == 1)
337 tmask = 0xFFFF;
338 else if (nchan == 2) {
339 tmask |= (tmask << 2);
340 tmask |= (tmask << 4);
341 tmask |= (tmask << 8);
342 }
343 else if (nchan == 4) {
344 tmask |= (tmask << 4);
345 tmask |= (tmask << 8);
346 }
347
348 for (i = 0; i < dy_t; i++) {
349 pdst_row = pdst + i * dst_stride,
350 psrc_row = psrc + i * src_stride, pdst_row_end = pdst_row + img_width_t * nchan - 1;
351 dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
352 offset = pdst_row - (mlib_u8 *) dpdst;
353 dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -offset);
354 mask1 = (tmask >> offset);
355 data0 = *dpsrc++;
356 data1 = *dpsrc++;
357 emask = vis_edge8(pdst_row, pdst_row_end) & mask1;
358 vis_pst_8(vis_faligndata(data0, data1), dpdst++, emask);
359 j = (mlib_s32) ((mlib_u8 *) dpdst - pdst_row);
360 data0 = data1;
361 for (; j < (img_width_t * nchan - 8); j += 8) {
362 data1 = *dpsrc++;
363 vis_pst_8(vis_faligndata(data0, data1), dpdst++, mask1);
364 data0 = data1;
365 }
366
367 data1 = *dpsrc++;
368 emask = vis_edge8(dpdst, pdst_row_end) & mask1;
369 vis_pst_8(vis_faligndata(data0, data1), dpdst++, emask);
370 }
371
372 for (i = 0; i < dy_b; i++) {
373 pdst_row = pdst + (img_height - 1 - i) * dst_stride;
374 psrc_row = psrc + (img_height - 1 - i) * src_stride;
375 pdst_row_end = pdst_row + img_width_b * nchan - 1;
376 dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
377 offset = pdst_row - (mlib_u8 *) dpdst;
378 dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -offset);
379 mask1 = (tmask >> offset);
380 data0 = *dpsrc++;
381 data1 = *dpsrc++;
382 emask = vis_edge8(pdst_row, pdst_row_end) & mask1;
383 vis_pst_8(vis_faligndata(data0, data1), dpdst++, emask);
384 j = (mlib_s32) ((mlib_u8 *) dpdst - pdst_row);
385 data0 = data1;
386 for (; j < (img_width_b * nchan - 8); j += 8) {
387 data1 = *dpsrc++;
388 vis_pst_8(vis_faligndata(data0, data1), dpdst++, mask1);
389 data0 = data1;
390 }
391
392 data1 = *dpsrc++;
393 emask = vis_edge8(dpdst, pdst_row_end) & mask1;
394 vis_pst_8(vis_faligndata(data0, data1), dpdst++, emask);
395 }
396}
397
398/***************************************************************/
399void mlib_ImageConvCopyEdge_U8_3(mlib_image *dst,
400 const mlib_image *src,
401 mlib_s32 dx_l,
402 mlib_s32 dx_r,
403 mlib_s32 dy_t,
404 mlib_s32 dy_b,
405 mlib_s32 cmask)
406{
407 mlib_s32 tmask = cmask & 7, mask0, mask1, mask2, offset;
408
409 VERT_EDGES(3, mlib_u8, cmask);
410
411 if (img_width < 16)
412 HORIZ_EDGES(3, mlib_u8, cmask);
413
414 tmask |= (tmask << 3);
415 tmask |= (tmask << 6);
416 tmask |= (tmask << 12);
417 for (i = 0; i < dy_t; i++) {
418 pdst_row = pdst + i * dst_stride,
419 psrc_row = psrc + i * src_stride, pdst_row_end = pdst_row + img_width_t * 3 - 1;
420 dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
421 offset = pdst_row - (mlib_u8 *) dpdst;
422 dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -offset);
423 mask2 = (tmask >> (offset + 1));
424 mask0 = mask2 >> 1;
425 mask1 = mask0 >> 1;
426 data0 = *dpsrc++;
427 data1 = *dpsrc++;
428 emask = vis_edge8(pdst_row, pdst_row_end) & mask2;
429 vis_pst_8(vis_faligndata(data0, data1), dpdst++, emask);
430 data0 = data1;
431 j = (mlib_s32) ((mlib_u8 *) dpdst - pdst_row);
432 for (; j < (img_width_t * 3 - 24); j += 24) {
433 data1 = *dpsrc++;
434 vis_pst_8(vis_faligndata(data0, data1), dpdst, mask0);
435 data0 = data1;
436 data1 = *dpsrc++;
437 vis_pst_8(vis_faligndata(data0, data1), dpdst + 1, mask1);
438 data0 = data1;
439 data1 = *dpsrc++;
440 vis_pst_8(vis_faligndata(data0, data1), dpdst + 2, mask2);
441 data0 = data1;
442 dpdst += 3;
443 }
444
445 if (j < (img_width_t * 3 - 8)) {
446 data1 = *dpsrc++;
447 vis_pst_8(vis_faligndata(data0, data1), dpdst++, mask0);
448 data0 = data1;
449
450 if (j < (img_width_t * 3 - 16)) {
451 data1 = *dpsrc++;
452 vis_pst_8(vis_faligndata(data0, data1), dpdst++, mask1);
453 data0 = data1;
454 mask0 = mask2;
455 }
456 else {
457 mask0 = mask1;
458 }
459 }
460
461 data1 = *dpsrc++;
462 emask = vis_edge8(dpdst, pdst_row_end) & mask0;
463 vis_pst_8(vis_faligndata(data0, data1), dpdst, emask);
464 }
465
466 for (i = 0; i < dy_b; i++) {
467 pdst_row = pdst + (img_height - 1 - i) * dst_stride;
468 psrc_row = psrc + (img_height - 1 - i) * src_stride;
469 pdst_row_end = pdst_row + img_width_b * 3 - 1;
470
471 dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
472 offset = pdst_row - (mlib_u8 *) dpdst;
473 dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -offset);
474 mask2 = (tmask >> (offset + 1));
475 mask0 = mask2 >> 1;
476 mask1 = mask0 >> 1;
477 data0 = *dpsrc++;
478 data1 = *dpsrc++;
479 emask = vis_edge8(pdst_row, pdst_row_end) & mask2;
480 vis_pst_8(vis_faligndata(data0, data1), dpdst++, emask);
481 data0 = data1;
482 j = (mlib_s32) ((mlib_u8 *) dpdst - pdst_row);
483 for (; j < (img_width_b * 3 - 24); j += 24) {
484 data1 = *dpsrc++;
485 vis_pst_8(vis_faligndata(data0, data1), dpdst, mask0);
486 data0 = data1;
487 data1 = *dpsrc++;
488 vis_pst_8(vis_faligndata(data0, data1), dpdst + 1, mask1);
489 data0 = data1;
490 data1 = *dpsrc++;
491 vis_pst_8(vis_faligndata(data0, data1), dpdst + 2, mask2);
492 data0 = data1;
493 dpdst += 3;
494 }
495
496 if (j < (img_width_b * 3 - 8)) {
497 data1 = *dpsrc++;
498 vis_pst_8(vis_faligndata(data0, data1), dpdst++, mask0);
499 data0 = data1;
500
501 if (j < (img_width_b * 3 - 16)) {
502 data1 = *dpsrc++;
503 vis_pst_8(vis_faligndata(data0, data1), dpdst++, mask1);
504 data0 = data1;
505 mask0 = mask2;
506 }
507 else {
508 mask0 = mask1;
509 }
510 }
511
512 data1 = *dpsrc++;
513 emask = vis_edge8(dpdst, pdst_row_end) & mask0;
514 vis_pst_8(vis_faligndata(data0, data1), dpdst, emask);
515 }
516}
517
518/***************************************************************/
519void mlib_ImageConvCopyEdge_S16(mlib_image *dst,
520 const mlib_image *src,
521 mlib_s32 dx_l,
522 mlib_s32 dx_r,
523 mlib_s32 dy_t,
524 mlib_s32 dy_b,
525 mlib_s32 cmask,
526 mlib_s32 nchan)
527{
528 mlib_s32 tmask = cmask & ((1 << nchan) - 1), mask1, offset;
529 VERT_EDGES(nchan, mlib_s16, cmask);
530
531 if (img_width < 16 / nchan)
532 HORIZ_EDGES(nchan, mlib_s16, cmask);
533
534 if (nchan == 1)
535 tmask = 0xFFFF;
536 else if (nchan == 2) {
537 tmask |= (tmask << 2);
538 tmask |= (tmask << 4);
539 }
540 else if (nchan == 4)
541 tmask |= (tmask << 4);
542
543 for (i = 0; i < dy_t; i++) {
544 pdst_row = pdst + i * dst_stride,
545 psrc_row = psrc + i * src_stride, pdst_row_end = pdst_row + img_width_t * nchan - 1;
546 dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
547 offset = pdst_row - (mlib_s16 *) dpdst;
548 dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 1));
549 mask1 = (tmask >> offset);
550 data0 = *dpsrc++;
551 data1 = *dpsrc++;
552 emask = vis_edge16(pdst_row, pdst_row_end) & mask1;
553 vis_pst_16(vis_faligndata(data0, data1), dpdst++, emask);
554 j = (mlib_s32) ((mlib_s16 *) dpdst - pdst_row);
555 data0 = data1;
556 for (; j < (img_width_t * nchan - 4); j += 4) {
557 data1 = *dpsrc++;
558 vis_pst_16(vis_faligndata(data0, data1), dpdst++, mask1);
559 data0 = data1;
560 }
561
562 data1 = *dpsrc++;
563 emask = vis_edge16(dpdst, pdst_row_end) & mask1;
564 vis_pst_16(vis_faligndata(data0, data1), dpdst++, emask);
565 }
566
567 for (i = 0; i < dy_b; i++) {
568 pdst_row = pdst + (img_height - 1 - i) * dst_stride;
569 psrc_row = psrc + (img_height - 1 - i) * src_stride;
570 pdst_row_end = pdst_row + img_width_b * nchan - 1;
571 dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
572 offset = pdst_row - (mlib_s16 *) dpdst;
573 dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 1));
574 mask1 = (tmask >> offset);
575 data0 = *dpsrc++;
576 data1 = *dpsrc++;
577 emask = vis_edge16(pdst_row, pdst_row_end) & mask1;
578 vis_pst_16(vis_faligndata(data0, data1), dpdst++, emask);
579 j = (mlib_s32) ((mlib_s16 *) dpdst - pdst_row);
580 data0 = data1;
581 for (; j < (img_width_b * nchan - 4); j += 4) {
582 data1 = *dpsrc++;
583 vis_pst_16(vis_faligndata(data0, data1), dpdst++, mask1);
584 data0 = data1;
585 }
586
587 data1 = *dpsrc++;
588 emask = vis_edge16(dpdst, pdst_row_end) & mask1;
589 vis_pst_16(vis_faligndata(data0, data1), dpdst++, emask);
590 }
591}
592
593/***************************************************************/
594void mlib_ImageConvCopyEdge_S16_3(mlib_image *dst,
595 const mlib_image *src,
596 mlib_s32 dx_l,
597 mlib_s32 dx_r,
598 mlib_s32 dy_t,
599 mlib_s32 dy_b,
600 mlib_s32 cmask)
601{
602 mlib_s32 tmask = cmask & 7, mask0, mask1, mask2, offset;
603
604 VERT_EDGES(3, mlib_s16, cmask);
605
606 if (img_width < 16)
607 HORIZ_EDGES(3, mlib_s16, cmask);
608
609 tmask |= (tmask << 3);
610 tmask |= (tmask << 6);
611 tmask |= (tmask << 12);
612 for (i = 0; i < dy_t; i++) {
613 pdst_row = pdst + i * dst_stride,
614 psrc_row = psrc + i * src_stride, pdst_row_end = pdst_row + img_width_t * 3 - 1;
615 dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
616 offset = pdst_row - (mlib_s16 *) dpdst;
617 dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 1));
618 mask2 = (tmask >> (offset + 2));
619 mask0 = mask2 >> 2;
620 mask1 = mask0 >> 2;
621 data0 = *dpsrc++;
622 data1 = *dpsrc++;
623 emask = vis_edge16(pdst_row, pdst_row_end) & mask2;
624 vis_pst_16(vis_faligndata(data0, data1), dpdst++, emask);
625 data0 = data1;
626 j = (mlib_s32) ((mlib_s16 *) dpdst - pdst_row);
627 for (; j < (img_width_t * 3 - 12); j += 12) {
628 data1 = *dpsrc++;
629 vis_pst_16(vis_faligndata(data0, data1), dpdst, mask0);
630 data0 = data1;
631 data1 = *dpsrc++;
632 vis_pst_16(vis_faligndata(data0, data1), dpdst + 1, mask1);
633 data0 = data1;
634 data1 = *dpsrc++;
635 vis_pst_16(vis_faligndata(data0, data1), dpdst + 2, mask2);
636 data0 = data1;
637 dpdst += 3;
638 }
639
640 if (j < (img_width_t * 3 - 4)) {
641 data1 = *dpsrc++;
642 vis_pst_16(vis_faligndata(data0, data1), dpdst++, mask0);
643 data0 = data1;
644
645 if (j < (img_width_t * 3 - 8)) {
646 data1 = *dpsrc++;
647 vis_pst_16(vis_faligndata(data0, data1), dpdst++, mask1);
648 data0 = data1;
649 mask0 = mask2;
650 }
651 else {
652 mask0 = mask1;
653 }
654 }
655
656 data1 = *dpsrc++;
657 emask = vis_edge16(dpdst, pdst_row_end) & mask0;
658 vis_pst_16(vis_faligndata(data0, data1), dpdst, emask);
659 }
660
661 for (i = 0; i < dy_b; i++) {
662 pdst_row = pdst + (img_height - 1 - i) * dst_stride;
663 psrc_row = psrc + (img_height - 1 - i) * src_stride;
664 pdst_row_end = pdst_row + img_width_b * 3 - 1;
665
666 dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
667 offset = pdst_row - (mlib_s16 *) dpdst;
668 dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 1));
669 mask2 = (tmask >> (offset + 2));
670 mask0 = mask2 >> 2;
671 mask1 = mask0 >> 2;
672 data0 = *dpsrc++;
673 data1 = *dpsrc++;
674 emask = vis_edge16(pdst_row, pdst_row_end) & mask2;
675 vis_pst_16(vis_faligndata(data0, data1), dpdst++, emask);
676 data0 = data1;
677 j = (mlib_s32) ((mlib_s16 *) dpdst - pdst_row);
678 for (; j < (img_width_b * 3 - 12); j += 12) {
679 data1 = *dpsrc++;
680 vis_pst_16(vis_faligndata(data0, data1), dpdst, mask0);
681 data0 = data1;
682 data1 = *dpsrc++;
683 vis_pst_16(vis_faligndata(data0, data1), dpdst + 1, mask1);
684 data0 = data1;
685 data1 = *dpsrc++;
686 vis_pst_16(vis_faligndata(data0, data1), dpdst + 2, mask2);
687 data0 = data1;
688 dpdst += 3;
689 }
690
691 if (j < (img_width_b * 3 - 4)) {
692 data1 = *dpsrc++;
693 vis_pst_16(vis_faligndata(data0, data1), dpdst++, mask0);
694 data0 = data1;
695
696 if (j < (img_width_b * 3 - 8)) {
697 data1 = *dpsrc++;
698 vis_pst_16(vis_faligndata(data0, data1), dpdst++, mask1);
699 data0 = data1;
700 mask0 = mask2;
701 }
702 else {
703 mask0 = mask1;
704 }
705 }
706
707 data1 = *dpsrc++;
708 emask = vis_edge16(dpdst, pdst_row_end) & mask0;
709 vis_pst_16(vis_faligndata(data0, data1), dpdst, emask);
710 }
711}
712
713/***************************************************************/
714void mlib_ImageConvCopyEdge_S32(mlib_image *dst,
715 const mlib_image *src,
716 mlib_s32 dx_l,
717 mlib_s32 dx_r,
718 mlib_s32 dy_t,
719 mlib_s32 dy_b,
720 mlib_s32 cmask,
721 mlib_s32 nchan)
722{
723 mlib_s32 tmask = cmask & ((1 << nchan) - 1), mask1, offset;
724 VERT_EDGES(nchan, mlib_s32, cmask);
725
726 if (img_width < 16 / nchan)
727 HORIZ_EDGES(nchan, mlib_s32, cmask);
728
729 if (nchan == 1)
730 tmask = 0xFFFF;
731 else if (nchan == 2) {
732 tmask |= (tmask << 2);
733 tmask |= (tmask << 4);
734 }
735
736 for (i = 0; i < dy_t; i++) {
737 pdst_row = pdst + i * dst_stride,
738 psrc_row = psrc + i * src_stride, pdst_row_end = pdst_row + img_width_t * nchan - 1;
739 dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
740 offset = pdst_row - (mlib_s32 *) dpdst;
741 dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 2));
742 mask1 = (tmask >> offset);
743 data0 = *dpsrc++;
744 data1 = *dpsrc++;
745 emask = vis_edge32(pdst_row, pdst_row_end) & mask1;
746 vis_pst_32(vis_faligndata(data0, data1), dpdst++, emask);
747 j = (mlib_s32) ((mlib_s32 *) dpdst - pdst_row);
748 data0 = data1;
749 for (; j < (img_width_t * nchan - 2); j += 2) {
750 data1 = *dpsrc++;
751 vis_pst_32(vis_faligndata(data0, data1), dpdst++, mask1);
752 data0 = data1;
753 }
754
755 data1 = *dpsrc++;
756 emask = vis_edge32(dpdst, pdst_row_end) & mask1;
757 vis_pst_32(vis_faligndata(data0, data1), dpdst++, emask);
758 }
759
760 for (i = 0; i < dy_b; i++) {
761 pdst_row = pdst + (img_height - 1 - i) * dst_stride;
762 psrc_row = psrc + (img_height - 1 - i) * src_stride;
763 pdst_row_end = pdst_row + img_width_b * nchan - 1;
764 dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
765 offset = pdst_row - (mlib_s32 *) dpdst;
766 dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 2));
767 mask1 = (tmask >> offset);
768 data0 = *dpsrc++;
769 data1 = *dpsrc++;
770 emask = vis_edge32(pdst_row, pdst_row_end) & mask1;
771 vis_pst_32(vis_faligndata(data0, data1), dpdst++, emask);
772 j = (mlib_s32) ((mlib_s32 *) dpdst - pdst_row);
773 data0 = data1;
774 for (; j < (img_width_b * nchan - 2); j += 2) {
775 data1 = *dpsrc++;
776 vis_pst_32(vis_faligndata(data0, data1), dpdst++, mask1);
777 data0 = data1;
778 }
779
780 data1 = *dpsrc++;
781 emask = vis_edge32(dpdst, pdst_row_end) & mask1;
782 vis_pst_32(vis_faligndata(data0, data1), dpdst++, emask);
783 }
784}
785
786/***************************************************************/
787void mlib_ImageConvCopyEdge_S32_3(mlib_image *dst,
788 const mlib_image *src,
789 mlib_s32 dx_l,
790 mlib_s32 dx_r,
791 mlib_s32 dy_t,
792 mlib_s32 dy_b,
793 mlib_s32 cmask)
794{
795 mlib_s32 tmask = cmask & 7, mask0, mask1, mask2, offset;
796
797 VERT_EDGES(3, mlib_s32, cmask);
798
799 if (img_width < 16)
800 HORIZ_EDGES(3, mlib_s32, cmask);
801
802 tmask |= (tmask << 3);
803 tmask |= (tmask << 6);
804 tmask |= (tmask << 12);
805 for (i = 0; i < dy_t; i++) {
806 pdst_row = pdst + i * dst_stride,
807 psrc_row = psrc + i * src_stride, pdst_row_end = pdst_row + img_width_t * 3 - 1;
808 dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
809 offset = pdst_row - (mlib_s32 *) dpdst;
810 dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 2));
811 mask2 = (tmask >> (offset + 1));
812 mask0 = mask2 >> 1;
813 mask1 = mask0 >> 1;
814 data0 = *dpsrc++;
815 data1 = *dpsrc++;
816 emask = vis_edge32(pdst_row, pdst_row_end) & mask2;
817 vis_pst_32(vis_faligndata(data0, data1), dpdst++, emask);
818 data0 = data1;
819 j = (mlib_s32) ((mlib_s32 *) dpdst - pdst_row);
820 for (; j < (img_width_t * 3 - 6); j += 6) {
821 data1 = *dpsrc++;
822 vis_pst_32(vis_faligndata(data0, data1), dpdst, mask0);
823 data0 = data1;
824 data1 = *dpsrc++;
825 vis_pst_32(vis_faligndata(data0, data1), dpdst + 1, mask1);
826 data0 = data1;
827 data1 = *dpsrc++;
828 vis_pst_32(vis_faligndata(data0, data1), dpdst + 2, mask2);
829 data0 = data1;
830 dpdst += 3;
831 }
832
833 if (j < (img_width_t * 3 - 2)) {
834 data1 = *dpsrc++;
835 vis_pst_32(vis_faligndata(data0, data1), dpdst++, mask0);
836 data0 = data1;
837
838 if (j < (img_width_t * 3 - 4)) {
839 data1 = *dpsrc++;
840 vis_pst_32(vis_faligndata(data0, data1), dpdst++, mask1);
841 data0 = data1;
842 mask0 = mask2;
843 }
844 else {
845 mask0 = mask1;
846 }
847 }
848
849 data1 = *dpsrc++;
850 emask = vis_edge32(dpdst, pdst_row_end) & mask0;
851 vis_pst_32(vis_faligndata(data0, data1), dpdst, emask);
852 }
853
854 for (i = 0; i < dy_b; i++) {
855 pdst_row = pdst + (img_height - 1 - i) * dst_stride;
856 psrc_row = psrc + (img_height - 1 - i) * src_stride;
857 pdst_row_end = pdst_row + img_width_b * 3 - 1;
858
859 dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
860 offset = pdst_row - (mlib_s32 *) dpdst;
861 dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 2));
862 mask2 = (tmask >> (offset + 1));
863 mask0 = mask2 >> 1;
864 mask1 = mask0 >> 1;
865 data0 = *dpsrc++;
866 data1 = *dpsrc++;
867 emask = vis_edge32(pdst_row, pdst_row_end) & mask2;
868 vis_pst_32(vis_faligndata(data0, data1), dpdst++, emask);
869 data0 = data1;
870 j = (mlib_s32) ((mlib_s32 *) dpdst - pdst_row);
871 for (; j < (img_width_b * 3 - 6); j += 6) {
872 data1 = *dpsrc++;
873 vis_pst_32(vis_faligndata(data0, data1), dpdst, mask0);
874 data0 = data1;
875 data1 = *dpsrc++;
876 vis_pst_32(vis_faligndata(data0, data1), dpdst + 1, mask1);
877 data0 = data1;
878 data1 = *dpsrc++;
879 vis_pst_32(vis_faligndata(data0, data1), dpdst + 2, mask2);
880 data0 = data1;
881 dpdst += 3;
882 }
883
884 if (j < (img_width_b * 3 - 2)) {
885 data1 = *dpsrc++;
886 vis_pst_32(vis_faligndata(data0, data1), dpdst++, mask0);
887 data0 = data1;
888
889 if (j < (img_width_b * 3 - 4)) {
890 data1 = *dpsrc++;
891 vis_pst_32(vis_faligndata(data0, data1), dpdst++, mask1);
892 data0 = data1;
893 mask0 = mask2;
894 }
895 else {
896 mask0 = mask1;
897 }
898 }
899
900 data1 = *dpsrc++;
901 emask = vis_edge32(dpdst, pdst_row_end) & mask0;
902 vis_pst_32(vis_faligndata(data0, data1), dpdst, emask);
903 }
904}
905
906/***************************************************************/
907void mlib_ImageConvCopyEdge_S32_4(mlib_image *dst,
908 const mlib_image *src,
909 mlib_s32 dx_l,
910 mlib_s32 dx_r,
911 mlib_s32 dy_t,
912 mlib_s32 dy_b,
913 mlib_s32 cmask)
914{
915 mlib_s32 tmask = cmask & 15, mask0, mask1, offset;
916
917 VERT_EDGES(4, mlib_s32, cmask);
918
919 if (img_width < 16)
920 HORIZ_EDGES(4, mlib_s32, cmask);
921
922 tmask |= (tmask << 4);
923 tmask |= (tmask << 8);
924 for (i = 0; i < dy_t; i++) {
925 pdst_row = pdst + i * dst_stride,
926 psrc_row = psrc + i * src_stride, pdst_row_end = pdst_row + img_width_t * 4 - 1;
927 dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
928 offset = pdst_row - (mlib_s32 *) dpdst;
929 dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 2));
930 mask1 = (tmask >> (offset + 2));
931 mask0 = mask1 >> 2;
932 data0 = *dpsrc++;
933 data1 = *dpsrc++;
934 emask = vis_edge32(pdst_row, pdst_row_end) & mask1;
935 vis_pst_32(vis_faligndata(data0, data1), dpdst++, emask);
936 data0 = data1;
937 j = (mlib_s32) ((mlib_s32 *) dpdst - pdst_row);
938 for (; j < (img_width_t * 4 - 4); j += 4) {
939 data1 = *dpsrc++;
940 vis_pst_32(vis_faligndata(data0, data1), dpdst, mask0);
941 data0 = *dpsrc++;
942 vis_pst_32(vis_faligndata(data1, data0), dpdst + 1, mask1);
943 dpdst += 2;
944 }
945
946 if (j < (img_width_t * 4 - 2)) {
947 data1 = *dpsrc++;
948 vis_pst_32(vis_faligndata(data0, data1), dpdst++, mask0);
949 data0 = data1;
950 mask0 = mask1;
951 }
952
953 data1 = *dpsrc++;
954 emask = vis_edge32(dpdst, pdst_row_end) & mask0;
955 vis_pst_32(vis_faligndata(data0, data1), dpdst, emask);
956 }
957
958 for (i = 0; i < dy_b; i++) {
959 pdst_row = pdst + (img_height - 1 - i) * dst_stride;
960 psrc_row = psrc + (img_height - 1 - i) * src_stride;
961 pdst_row_end = pdst_row + img_width_b * 4 - 1;
962
963 dpdst = (mlib_d64 *) ((mlib_addr) pdst_row & ~7);
964 offset = pdst_row - (mlib_s32 *) dpdst;
965 dpsrc = (mlib_d64 *) vis_alignaddr(psrc_row, -(offset << 2));
966 mask1 = (tmask >> (offset + 2));
967 mask0 = mask1 >> 2;
968 data0 = *dpsrc++;
969 data1 = *dpsrc++;
970 emask = vis_edge32(pdst_row, pdst_row_end) & mask1;
971 vis_pst_32(vis_faligndata(data0, data1), dpdst++, emask);
972 data0 = data1;
973 j = (mlib_s32) ((mlib_s32 *) dpdst - pdst_row);
974 for (; j < (img_width_b * 4 - 4); j += 4) {
975 data1 = *dpsrc++;
976 vis_pst_32(vis_faligndata(data0, data1), dpdst, mask0);
977 data0 = *dpsrc++;
978 vis_pst_32(vis_faligndata(data1, data0), dpdst + 1, mask1);
979 dpdst += 2;
980 }
981
982 if (j < (img_width_b * 4 - 2)) {
983 data1 = *dpsrc++;
984 vis_pst_32(vis_faligndata(data0, data1), dpdst++, mask0);
985 data0 = data1;
986 mask0 = mask1;
987 }
988
989 data1 = *dpsrc++;
990 emask = vis_edge32(dpdst, pdst_row_end) & mask0;
991 vis_pst_32(vis_faligndata(data0, data1), dpdst, emask);
992 }
993}
994
995/***************************************************************/