blob: 0c61236ce7a119a2f42d52d1fc5a5f0000ea96ed [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26
27/*
28 * FUNCTION
29 * Internal functions for mlib_ImageConv2x2 on U8/S16/U16 types
30 * and MLIB_EDGE_DST_NO_WRITE mask.
31 */
32
33#include "mlib_image.h"
34#include "mlib_ImageConv.h"
35#include "mlib_c_ImageConv.h"
36
37/***************************************************************/
38#ifdef i386 /* do not copy by mlib_d64 data type for x86 */
39
40typedef struct {
41 mlib_s32 int0, int1;
42} two_int;
43
44#define TYPE_64BIT two_int
45
46#else /* i386 */
47
48#define TYPE_64BIT mlib_d64
49
50#endif /* i386 ( do not copy by mlib_d64 data type for x86 ) */
51
52/***************************************************************/
53#define LOAD_KERNEL_INTO_DOUBLE() \
54 while (scalef_expon > 30) { \
55 scalef /= (1 << 30); \
56 scalef_expon -= 30; \
57 } \
58 \
59 scalef /= (1 << scalef_expon); \
60 \
61 /* keep kernel in regs */ \
62 k0 = scalef * kern[0]; k1 = scalef * kern[1]; k2 = scalef * kern[2]; \
63 k3 = scalef * kern[3]
64
65/***************************************************************/
66#define GET_SRC_DST_PARAMETERS(type) \
67 hgt = mlib_ImageGetHeight(src); \
68 wid = mlib_ImageGetWidth(src); \
69 nchannel = mlib_ImageGetChannels(src); \
70 sll = mlib_ImageGetStride(src) / sizeof(type); \
71 dll = mlib_ImageGetStride(dst) / sizeof(type); \
72 adr_src = (type *)mlib_ImageGetData(src); \
73 adr_dst = (type *)mlib_ImageGetData(dst)
74
75/***************************************************************/
76#ifndef MLIB_USE_FTOI_CLAMPING
77
78#define CLAMP_S32(x) \
79 (((x) <= MLIB_S32_MIN) ? MLIB_S32_MIN : \
80 (((x) >= MLIB_S32_MAX) ? MLIB_S32_MAX : (mlib_s32)(x)))
81
82#else
83
84#define CLAMP_S32(x) ((mlib_s32)(x))
85
86#endif /* MLIB_USE_FTOI_CLAMPING */
87
88/***************************************************************/
89#if defined(_LITTLE_ENDIAN) && !defined(_NO_LONGLONG)
90
91/* NB: Explicit cast to DTYPE is necessary to avoid warning from Microsoft VC compiler.
92 And we need to explicitly define cast behavior if source exceeds destination range.
93 (it is undefined according to C99 spec). We use mask here because this macro is typically
94 used to extract bit regions. */
95
96#define STORE2(res0, res1) \
97 dp[0 ] = (DTYPE) ((res1) & DTYPE_MASK); \
98 dp[chan1] = (DTYPE) ((res0) & DTYPE_MASK)
99
100#else
101
102#define STORE2(res0, res1) \
103 dp[0 ] = (DTYPE) ((res0) & DTYPE_MASK); \
104 dp[chan1] = (DTYPE) ((res1) & DTYPE_MASK)
105
106#endif /* defined(_LITTLE_ENDIAN) && !defined(_NO_LONGLONG) */
107
108/***************************************************************/
109#ifdef _NO_LONGLONG
110
111#define LOAD_BUFF(buff) \
112 buff[i ] = sp[0]; \
113 buff[i + 1] = sp[chan1]
114
115#else /* _NO_LONGLONG */
116
117#ifdef _LITTLE_ENDIAN
118
119#define LOAD_BUFF(buff) \
120 *(mlib_s64*)(buff + i) = (((mlib_s64)sp[chan1]) << 32) | ((mlib_s64)sp[0] & 0xffffffff)
121
122#else /* _LITTLE_ENDIAN */
123
124#define LOAD_BUFF(buff) \
125 *(mlib_s64*)(buff + i) = (((mlib_s64)sp[0]) << 32) | ((mlib_s64)sp[chan1] & 0xffffffff)
126
127#endif /* _LITTLE_ENDIAN */
128
129#endif /* _NO_LONGLONG */
130
131/***************************************************************/
132typedef union {
133 TYPE_64BIT d64;
134 struct {
135 mlib_s32 i0, i1;
136 } i32s;
137} d64_2x32;
138
139/***************************************************************/
140#define D_KER 1
141
142#define BUFF_LINE 256
143
144/***************************************************************/
145#define XOR_80(x) x ^= 0x80
146
147void mlib_ImageXor80_aa(mlib_u8 *dl,
148 mlib_s32 wid,
149 mlib_s32 hgt,
150 mlib_s32 str)
151{
152 mlib_u8 *dp, *dend;
153#ifdef _NO_LONGLONG
154 mlib_u32 cadd = 0x80808080;
155#else /* _NO_LONGLONG */
156 mlib_u64 cadd = MLIB_U64_CONST(0x8080808080808080);
157#endif /* _NO_LONGLONG */
158 mlib_s32 j;
159
160 if (wid == str) {
161 wid *= hgt;
162 hgt = 1;
163 }
164
165 for (j = 0; j < hgt; j++) {
166 dend = dl + wid;
167
168 for (dp = dl; ((mlib_addr)dp & 7) && (dp < dend); dp++) XOR_80(dp[0]);
169
170#ifdef __SUNPRO_C
171#pragma pipeloop(0)
172#endif /* __SUNPRO_C */
173 for (; dp <= (dend - 8); dp += 8) {
174#ifdef _NO_LONGLONG
175 *((mlib_s32*)dp) ^= cadd;
176 *((mlib_s32*)dp+1) ^= cadd;
177#else /* _NO_LONGLONG */
178 *((mlib_u64*)dp) ^= cadd;
179#endif /* _NO_LONGLONG */
180 }
181
182 for (; (dp < dend); dp++) XOR_80(dp[0]);
183
184 dl += str;
185 }
186}
187
188/***************************************************************/
189void mlib_ImageXor80(mlib_u8 *dl,
190 mlib_s32 wid,
191 mlib_s32 hgt,
192 mlib_s32 str,
193 mlib_s32 nchan,
194 mlib_s32 cmask)
195{
196 mlib_s32 i, j, c;
197
198 for (j = 0; j < hgt; j++) {
199 for (c = 0; c < nchan; c++) {
200 if (cmask & (1 << (nchan - 1 - c))) {
201 mlib_u8 *dp = dl + c;
202
203#ifdef __SUNPRO_C
204#pragma pipeloop(0)
205#endif /* __SUNPRO_C */
206 for (i = 0; i < wid; i++) XOR_80(dp[i*nchan]);
207 }
208 }
209
210 dl += str;
211 }
212}
213
214/***************************************************************/
215#define DTYPE mlib_s16
216#define DTYPE_MASK 0xffff
217
218mlib_status mlib_c_conv2x2nw_s16(mlib_image *dst,
219 const mlib_image *src,
220 const mlib_s32 *kern,
221 mlib_s32 scalef_expon,
222 mlib_s32 cmask)
223{
224 mlib_d64 buff_arr[2*BUFF_LINE];
225 mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
226 DTYPE *adr_src, *sl, *sp, *sl1;
227 DTYPE *adr_dst, *dl, *dp;
228 mlib_d64 k0, k1, k2, k3, scalef = 65536.0;
229 mlib_d64 p00, p01, p02,
230 p10, p11, p12;
231 mlib_s32 wid, hgt, sll, dll, wid1;
232 mlib_s32 nchannel, chan1, chan2;
233 mlib_s32 i, j, c;
234 LOAD_KERNEL_INTO_DOUBLE();
235 GET_SRC_DST_PARAMETERS(DTYPE);
236
237 wid1 = (wid + 1) &~ 1;
238
239 if (wid1 > BUFF_LINE) {
240 pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
241
242 if (pbuff == NULL) return MLIB_FAILURE;
243 }
244
245 buffo = pbuff;
246 buff0 = buffo + wid1;
247 buff1 = buff0 + wid1;
248 buff2 = buff1 + wid1;
249
250 chan1 = nchannel;
251 chan2 = chan1 + chan1;
252
253 wid -= D_KER;
254 hgt -= D_KER;
255
256 for (c = 0; c < nchannel; c++) {
257 if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
258
259 sl = adr_src + c;
260 dl = adr_dst + c;
261
262 sl1 = sl + sll;
263#ifdef __SUNPRO_C
264#pragma pipeloop(0)
265#endif /* __SUNPRO_C */
266 for (i = 0; i < wid + D_KER; i++) {
267 buff0[i - 1] = (mlib_s32)sl[i*chan1];
268 buff1[i - 1] = (mlib_s32)sl1[i*chan1];
269 }
270
271 sl += (D_KER + 1)*sll;
272
273 for (j = 0; j < hgt; j++) {
274 sp = sl;
275 dp = dl;
276
277 buff2[-1] = (mlib_s32)sp[0];
278 sp += chan1;
279
280 p02 = buff0[-1];
281 p12 = buff1[-1];
282
283#ifdef __SUNPRO_C
284#pragma pipeloop(0)
285#endif /* __SUNPRO_C */
286 for (i = 0; i <= (wid - 2); i += 2) {
287#ifdef _NO_LONGLONG
288 mlib_s32 o64_1, o64_2;
289#else /* _NO_LONGLONG */
290 mlib_s64 o64;
291#endif /* _NO_LONGLONG */
292 d64_2x32 sd0, sd1, dd;
293
294 p00 = p02; p10 = p12;
295
296 sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
297 sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
298 p01 = (mlib_d64)sd0.i32s.i0;
299 p02 = (mlib_d64)sd0.i32s.i1;
300 p11 = (mlib_d64)sd1.i32s.i0;
301 p12 = (mlib_d64)sd1.i32s.i1;
302
303 LOAD_BUFF(buff2);
304
305 dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3);
306 dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3);
307 *(TYPE_64BIT*)(buffo + i) = dd.d64;
308
309#ifdef _NO_LONGLONG
310
311 o64_1 = buffo[i];
312 o64_2 = buffo[i+1];
313 STORE2(o64_1 >> 16, o64_2 >> 16);
314
315#else /* _NO_LONGLONG */
316
317 o64 = *(mlib_s64*)(buffo + i);
318 STORE2(o64 >> 48, o64 >> 16);
319
320#endif /* _NO_LONGLONG */
321
322 sp += chan2;
323 dp += chan2;
324 }
325
326 for (; i < wid; i++) {
327 p00 = buff0[i - 1]; p10 = buff1[i - 1];
328 p01 = buff0[i]; p11 = buff1[i];
329
330 buff2[i] = (mlib_s32)sp[0];
331
332 buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3);
333 dp[0] = buffo[i] >> 16;
334
335 sp += chan1;
336 dp += chan1;
337 }
338
339 sl += sll;
340 dl += dll;
341
342 buffT = buff0;
343 buff0 = buff1;
344 buff1 = buff2;
345 buff2 = buffT;
346 }
347 }
348
349 if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
350
351 return MLIB_SUCCESS;
352}
353
354/***************************************************************/
355mlib_status mlib_c_conv2x2ext_s16(mlib_image *dst,
356 const mlib_image *src,
357 mlib_s32 dx_l,
358 mlib_s32 dx_r,
359 mlib_s32 dy_t,
360 mlib_s32 dy_b,
361 const mlib_s32 *kern,
362 mlib_s32 scalef_expon,
363 mlib_s32 cmask)
364{
365 mlib_d64 buff_arr[2*BUFF_LINE];
366 mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
367 DTYPE *adr_src, *sl, *sp, *sl1;
368 DTYPE *adr_dst, *dl, *dp;
369 mlib_d64 k0, k1, k2, k3, scalef = 65536.0;
370 mlib_d64 p00, p01, p02,
371 p10, p11, p12;
372 mlib_s32 wid, hgt, sll, dll, wid1;
373 mlib_s32 nchannel, chan1, chan2;
374 mlib_s32 i, j, c, swid;
375 LOAD_KERNEL_INTO_DOUBLE();
376 GET_SRC_DST_PARAMETERS(DTYPE);
377
378 swid = wid + D_KER;
379
380 wid1 = (swid + 1) &~ 1;
381
382 if (wid1 > BUFF_LINE) {
383 pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
384
385 if (pbuff == NULL) return MLIB_FAILURE;
386 }
387
388 buffo = pbuff;
389 buff0 = buffo + wid1;
390 buff1 = buff0 + wid1;
391 buff2 = buff1 + wid1;
392
393 swid -= dx_r;
394
395 chan1 = nchannel;
396 chan2 = chan1 + chan1;
397
398 for (c = 0; c < nchannel; c++) {
399 if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
400
401 sl = adr_src + c;
402 dl = adr_dst + c;
403
404 if ((hgt - dy_b) > 0) sl1 = sl + sll;
405 else sl1 = sl;
406
407#ifdef __SUNPRO_C
408#pragma pipeloop(0)
409#endif /* __SUNPRO_C */
410 for (i = 0; i < swid; i++) {
411 buff0[i - 1] = (mlib_s32)sl[i*chan1];
412 buff1[i - 1] = (mlib_s32)sl1[i*chan1];
413 }
414
415 if (dx_r != 0) {
416 buff0[swid - 1] = buff0[swid - 2];
417 buff1[swid - 1] = buff1[swid - 2];
418 }
419
420 if ((hgt - dy_b) > 1) sl = sl1 + sll;
421 else sl = sl1;
422
423 for (j = 0; j < hgt; j++) {
424 sp = sl;
425 dp = dl;
426
427 buff2[-1] = (mlib_s32)sp[0];
428 sp += chan1;
429
430 p02 = buff0[-1];
431 p12 = buff1[-1];
432
433#ifdef __SUNPRO_C
434#pragma pipeloop(0)
435#endif /* __SUNPRO_C */
436 for (i = 0; i <= (wid - 2); i += 2) {
437#ifdef _NO_LONGLONG
438 mlib_s32 o64_1, o64_2;
439#else /* _NO_LONGLONG */
440 mlib_s64 o64;
441#endif /* _NO_LONGLONG */
442 d64_2x32 sd0, sd1, dd;
443
444 p00 = p02; p10 = p12;
445
446 sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
447 sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
448 p01 = (mlib_d64)sd0.i32s.i0;
449 p02 = (mlib_d64)sd0.i32s.i1;
450 p11 = (mlib_d64)sd1.i32s.i0;
451 p12 = (mlib_d64)sd1.i32s.i1;
452
453 LOAD_BUFF(buff2);
454
455 dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3);
456 dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3);
457 *(TYPE_64BIT*)(buffo + i) = dd.d64;
458
459#ifdef _NO_LONGLONG
460
461 o64_1 = buffo[i];
462 o64_2 = buffo[i+1];
463 STORE2(o64_1 >> 16, o64_2 >> 16);
464
465#else /* _NO_LONGLONG */
466
467 o64 = *(mlib_s64*)(buffo + i);
468 STORE2(o64 >> 48, o64 >> 16);
469
470#endif /* _NO_LONGLONG */
471
472 sp += chan2;
473 dp += chan2;
474 }
475
476 for (; i < wid; i++) {
477 p00 = buff0[i - 1]; p10 = buff1[i - 1];
478 p01 = buff0[i]; p11 = buff1[i];
479
480 buff2[i] = (mlib_s32)sp[0];
481
482 buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3);
483 dp[0] = buffo[i] >> 16;
484
485 sp += chan1;
486 dp += chan1;
487 }
488
489 if (dx_r != 0) buff2[swid - 1] = buff2[swid - 2];
490
491 if (j < hgt - dy_b - 2) sl += sll;
492 dl += dll;
493
494 buffT = buff0;
495 buff0 = buff1;
496 buff1 = buff2;
497 buff2 = buffT;
498 }
499 }
500
501 if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
502
503 return MLIB_SUCCESS;
504}
505
506/***************************************************************/
507#undef DTYPE
508#define DTYPE mlib_u16
509
510mlib_status mlib_c_conv2x2nw_u16(mlib_image *dst,
511 const mlib_image *src,
512 const mlib_s32 *kern,
513 mlib_s32 scalef_expon,
514 mlib_s32 cmask)
515{
516 mlib_d64 buff_arr[2*BUFF_LINE];
517 mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
518 DTYPE *adr_src, *sl, *sp, *sl1;
519 DTYPE *adr_dst, *dl, *dp;
520 mlib_d64 k0, k1, k2, k3, scalef = 65536.0;
521 mlib_d64 p00, p01, p02,
522 p10, p11, p12;
523 mlib_s32 wid, hgt, sll, dll, wid1;
524 mlib_s32 nchannel, chan1, chan2;
525 mlib_s32 i, j, c;
526 mlib_d64 doff = 0x7FFF8000;
527 LOAD_KERNEL_INTO_DOUBLE();
528 GET_SRC_DST_PARAMETERS(DTYPE);
529
530 wid1 = (wid + 1) &~ 1;
531
532 if (wid1 > BUFF_LINE) {
533 pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
534
535 if (pbuff == NULL) return MLIB_FAILURE;
536 }
537
538 buffo = pbuff;
539 buff0 = buffo + wid1;
540 buff1 = buff0 + wid1;
541 buff2 = buff1 + wid1;
542
543 chan1 = nchannel;
544 chan2 = chan1 + chan1;
545
546 wid -= D_KER;
547 hgt -= D_KER;
548
549 for (c = 0; c < nchannel; c++) {
550 if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
551
552 sl = adr_src + c;
553 dl = adr_dst + c;
554
555 sl1 = sl + sll;
556#ifdef __SUNPRO_C
557#pragma pipeloop(0)
558#endif /* __SUNPRO_C */
559 for (i = 0; i < wid + D_KER; i++) {
560 buff0[i - 1] = (mlib_s32)sl[i*chan1];
561 buff1[i - 1] = (mlib_s32)sl1[i*chan1];
562 }
563
564 sl += (D_KER + 1)*sll;
565
566 for (j = 0; j < hgt; j++) {
567 sp = sl;
568 dp = dl;
569
570 buff2[-1] = (mlib_s32)sp[0];
571 sp += chan1;
572
573 p02 = buff0[-1];
574 p12 = buff1[-1];
575
576#ifdef __SUNPRO_C
577#pragma pipeloop(0)
578#endif /* __SUNPRO_C */
579 for (i = 0; i <= (wid - 2); i += 2) {
580#ifdef _NO_LONGLONG
581 mlib_s32 o64_1, o64_2;
582#else /* _NO_LONGLONG */
583 mlib_s64 o64;
584#endif /* _NO_LONGLONG */
585 d64_2x32 sd0, sd1, dd;
586
587 p00 = p02; p10 = p12;
588
589 sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
590 sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
591 p01 = (mlib_d64)sd0.i32s.i0;
592 p02 = (mlib_d64)sd0.i32s.i1;
593 p11 = (mlib_d64)sd1.i32s.i0;
594 p12 = (mlib_d64)sd1.i32s.i1;
595
596 LOAD_BUFF(buff2);
597
598 dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff);
599 dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - doff);
600 *(TYPE_64BIT*)(buffo + i) = dd.d64;
601
602#ifdef _NO_LONGLONG
603
604 o64_1 = buffo[i];
605 o64_2 = buffo[i+1];
606 o64_1 = o64_1 ^ 0x80000000U;
607 o64_2 = o64_2 ^ 0x80000000U;
608 STORE2(o64_1 >> 16, o64_2 >> 16);
609
610#else /* _NO_LONGLONG */
611
612 o64 = *(mlib_s64*)(buffo + i);
613 o64 = o64 ^ MLIB_U64_CONST(0x8000000080000000);
614 STORE2(o64 >> 48, o64 >> 16);
615
616#endif /* _NO_LONGLONG */
617
618 sp += chan2;
619 dp += chan2;
620 }
621
622 for (; i < wid; i++) {
623 p00 = buff0[i - 1]; p10 = buff1[i - 1];
624 p01 = buff0[i]; p11 = buff1[i];
625
626 buff2[i] = (mlib_s32)sp[0];
627
628 buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff);
629 dp[0] = (buffo[i] >> 16) ^ 0x8000;
630
631 sp += chan1;
632 dp += chan1;
633 }
634
635 sl += sll;
636 dl += dll;
637
638 buffT = buff0;
639 buff0 = buff1;
640 buff1 = buff2;
641 buff2 = buffT;
642 }
643 }
644
645 if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
646
647 return MLIB_SUCCESS;
648}
649
650/***************************************************************/
651mlib_status mlib_c_conv2x2ext_u16(mlib_image *dst,
652 const mlib_image *src,
653 mlib_s32 dx_l,
654 mlib_s32 dx_r,
655 mlib_s32 dy_t,
656 mlib_s32 dy_b,
657 const mlib_s32 *kern,
658 mlib_s32 scalef_expon,
659 mlib_s32 cmask)
660{
661 mlib_d64 buff_arr[2*BUFF_LINE];
662 mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
663 DTYPE *adr_src, *sl, *sp, *sl1;
664 DTYPE *adr_dst, *dl, *dp;
665 mlib_d64 k0, k1, k2, k3, scalef = 65536.0;
666 mlib_d64 p00, p01, p02,
667 p10, p11, p12;
668 mlib_s32 wid, hgt, sll, dll, wid1;
669 mlib_s32 nchannel, chan1, chan2;
670 mlib_s32 i, j, c, swid;
671 mlib_d64 doff = 0x7FFF8000;
672 LOAD_KERNEL_INTO_DOUBLE();
673 GET_SRC_DST_PARAMETERS(DTYPE);
674
675 swid = wid + D_KER;
676
677 wid1 = (swid + 1) &~ 1;
678
679 if (wid1 > BUFF_LINE) {
680 pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
681
682 if (pbuff == NULL) return MLIB_FAILURE;
683 }
684
685 buffo = pbuff;
686 buff0 = buffo + wid1;
687 buff1 = buff0 + wid1;
688 buff2 = buff1 + wid1;
689
690 swid -= dx_r;
691
692 chan1 = nchannel;
693 chan2 = chan1 + chan1;
694
695 for (c = 0; c < nchannel; c++) {
696 if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
697
698 sl = adr_src + c;
699 dl = adr_dst + c;
700
701 if ((hgt - dy_b) > 0) sl1 = sl + sll;
702 else sl1 = sl;
703
704#ifdef __SUNPRO_C
705#pragma pipeloop(0)
706#endif /* __SUNPRO_C */
707 for (i = 0; i < swid; i++) {
708 buff0[i - 1] = (mlib_s32)sl[i*chan1];
709 buff1[i - 1] = (mlib_s32)sl1[i*chan1];
710 }
711
712 if (dx_r != 0) {
713 buff0[swid - 1] = buff0[swid - 2];
714 buff1[swid - 1] = buff1[swid - 2];
715 }
716
717 if ((hgt - dy_b) > 1) sl = sl1 + sll;
718 else sl = sl1;
719
720 for (j = 0; j < hgt; j++) {
721 sp = sl;
722 dp = dl;
723
724 buff2[-1] = (mlib_s32)sp[0];
725 sp += chan1;
726
727 p02 = buff0[-1];
728 p12 = buff1[-1];
729
730#ifdef __SUNPRO_C
731#pragma pipeloop(0)
732#endif /* __SUNPRO_C */
733 for (i = 0; i <= (wid - 2); i += 2) {
734#ifdef _NO_LONGLONG
735 mlib_s32 o64_1, o64_2;
736#else /* _NO_LONGLONG */
737 mlib_s64 o64;
738#endif /* _NO_LONGLONG */
739 d64_2x32 sd0, sd1, dd;
740
741 p00 = p02; p10 = p12;
742
743 sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
744 sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
745 p01 = (mlib_d64)sd0.i32s.i0;
746 p02 = (mlib_d64)sd0.i32s.i1;
747 p11 = (mlib_d64)sd1.i32s.i0;
748 p12 = (mlib_d64)sd1.i32s.i1;
749
750 LOAD_BUFF(buff2);
751
752 dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff);
753 dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - doff);
754 *(TYPE_64BIT*)(buffo + i) = dd.d64;
755
756#ifdef _NO_LONGLONG
757
758 o64_1 = buffo[i];
759 o64_2 = buffo[i+1];
760 o64_1 = o64_1 ^ 0x80000000U;
761 o64_2 = o64_2 ^ 0x80000000U;
762 STORE2(o64_1 >> 16, o64_2 >> 16);
763
764#else /* _NO_LONGLONG */
765
766 o64 = *(mlib_s64*)(buffo + i);
767 o64 = o64 ^ MLIB_U64_CONST(0x8000000080000000);
768 STORE2(o64 >> 48, o64 >> 16);
769
770#endif /* _NO_LONGLONG */
771
772 sp += chan2;
773 dp += chan2;
774 }
775
776 for (; i < wid; i++) {
777 p00 = buff0[i - 1]; p10 = buff1[i - 1];
778 p01 = buff0[i]; p11 = buff1[i];
779
780 buff2[i] = (mlib_s32)sp[0];
781
782 buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - doff);
783 dp[0] = (buffo[i] >> 16) ^ 0x8000;
784
785 sp += chan1;
786 dp += chan1;
787 }
788
789 if (dx_r != 0) buff2[swid - 1] = buff2[swid - 2];
790
791 if (j < hgt - dy_b - 2) sl += sll;
792 dl += dll;
793
794 buffT = buff0;
795 buff0 = buff1;
796 buff1 = buff2;
797 buff2 = buffT;
798 }
799 }
800
801 if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
802
803 return MLIB_SUCCESS;
804}
805
806/***************************************************************/
807#undef DTYPE
808#define DTYPE mlib_u8
809
810mlib_status mlib_c_conv2x2nw_u8(mlib_image *dst,
811 const mlib_image *src,
812 const mlib_s32 *kern,
813 mlib_s32 scalef_expon,
814 mlib_s32 cmask)
815{
816 mlib_d64 buff_arr[2*BUFF_LINE];
817 mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
818 DTYPE *adr_src, *sl, *sp, *sl1;
819 DTYPE *adr_dst, *dl, *dp;
820 mlib_d64 k0, k1, k2, k3, scalef = (1 << 24);
821 mlib_d64 p00, p01, p02,
822 p10, p11, p12;
823 mlib_s32 wid, hgt, sll, dll, wid1;
824 mlib_s32 nchannel, chan1, chan2;
825 mlib_s32 i, j, c;
826 LOAD_KERNEL_INTO_DOUBLE();
827 GET_SRC_DST_PARAMETERS(DTYPE);
828
829 wid1 = (wid + 1) &~ 1;
830
831 if (wid1 > BUFF_LINE) {
832 pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
833
834 if (pbuff == NULL) return MLIB_FAILURE;
835 }
836
837 buffo = pbuff;
838 buff0 = buffo + wid1;
839 buff1 = buff0 + wid1;
840 buff2 = buff1 + wid1;
841
842 chan1 = nchannel;
843 chan2 = chan1 + chan1;
844
845 wid -= D_KER;
846 hgt -= D_KER;
847
848 for (c = 0; c < nchannel; c++) {
849 if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
850
851 sl = adr_src + c;
852 dl = adr_dst + c;
853
854 sl1 = sl + sll;
855#ifdef __SUNPRO_C
856#pragma pipeloop(0)
857#endif /* __SUNPRO_C */
858 for (i = 0; i < wid + D_KER; i++) {
859 buff0[i - 1] = (mlib_s32)sl[i*chan1];
860 buff1[i - 1] = (mlib_s32)sl1[i*chan1];
861 }
862
863 sl += (D_KER + 1)*sll;
864
865 for (j = 0; j < hgt; j++) {
866 sp = sl;
867 dp = dl;
868
869 buff2[-1] = (mlib_s32)sp[0];
870 sp += chan1;
871
872 p02 = buff0[-1];
873 p12 = buff1[-1];
874
875#ifdef __SUNPRO_C
876#pragma pipeloop(0)
877#endif /* __SUNPRO_C */
878 for (i = 0; i <= (wid - 2); i += 2) {
879#ifdef _NO_LONGLONG
880 mlib_s32 o64_1, o64_2;
881#else /* _NO_LONGLONG */
882 mlib_s64 o64;
883#endif /* _NO_LONGLONG */
884 d64_2x32 sd0, sd1, dd;
885
886 p00 = p02; p10 = p12;
887
888 sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
889 sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
890 p01 = (mlib_d64)sd0.i32s.i0;
891 p02 = (mlib_d64)sd0.i32s.i1;
892 p11 = (mlib_d64)sd1.i32s.i0;
893 p12 = (mlib_d64)sd1.i32s.i1;
894
895 LOAD_BUFF(buff2);
896
897 dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31));
898 dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - (1u << 31));
899 *(TYPE_64BIT*)(buffo + i) = dd.d64;
900
901#ifdef _NO_LONGLONG
902
903 o64_1 = buffo[i];
904 o64_2 = buffo[i+1];
905 STORE2(o64_1 >> 24, o64_2 >> 24);
906
907#else /* _NO_LONGLONG */
908
909 o64 = *(mlib_s64*)(buffo + i);
910 STORE2(o64 >> 56, o64 >> 24);
911
912#endif /* _NO_LONGLONG */
913
914 sp += chan2;
915 dp += chan2;
916 }
917
918 for (; i < wid; i++) {
919 p00 = buff0[i - 1]; p10 = buff1[i - 1];
920 p01 = buff0[i]; p11 = buff1[i];
921
922 buff2[i] = (mlib_s32)sp[0];
923
924 buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31));
925 dp[0] = (buffo[i] >> 24);
926
927 sp += chan1;
928 dp += chan1;
929 }
930
931 sl += sll;
932 dl += dll;
933
934 buffT = buff0;
935 buff0 = buff1;
936 buff1 = buff2;
937 buff2 = buffT;
938 }
939 }
940
941 {
942 mlib_s32 amask = (1 << nchannel) - 1;
943
944 if ((cmask & amask) != amask) {
945 mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask);
946 } else {
947 mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll);
948 }
949 }
950
951 if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
952
953 return MLIB_SUCCESS;
954}
955
956/***************************************************************/
957mlib_status mlib_c_conv2x2ext_u8(mlib_image *dst,
958 const mlib_image *src,
959 mlib_s32 dx_l,
960 mlib_s32 dx_r,
961 mlib_s32 dy_t,
962 mlib_s32 dy_b,
963 const mlib_s32 *kern,
964 mlib_s32 scalef_expon,
965 mlib_s32 cmask)
966{
967 mlib_d64 buff_arr[4*BUFF_LINE];
968 mlib_s32 *pbuff = (mlib_s32*)buff_arr, *buffo, *buff0, *buff1, *buff2, *buffT;
969 DTYPE *adr_src, *sl, *sp, *sl1;
970 DTYPE *adr_dst, *dl, *dp;
971 mlib_d64 k0, k1, k2, k3, scalef = (1 << 24);
972 mlib_d64 p00, p01, p02,
973 p10, p11, p12;
974 mlib_s32 wid, hgt, sll, dll, wid1;
975 mlib_s32 nchannel, chan1, chan2;
976 mlib_s32 i, j, c, swid;
977 LOAD_KERNEL_INTO_DOUBLE();
978 GET_SRC_DST_PARAMETERS(DTYPE);
979
980 swid = wid + D_KER;
981
982 wid1 = (swid + 1) &~ 1;
983
984 if (wid1 > BUFF_LINE) {
985 pbuff = mlib_malloc(4*sizeof(mlib_s32)*wid1);
986
987 if (pbuff == NULL) return MLIB_FAILURE;
988 }
989
990 buffo = pbuff;
991 buff0 = buffo + wid1;
992 buff1 = buff0 + wid1;
993 buff2 = buff1 + wid1;
994
995 chan1 = nchannel;
996 chan2 = chan1 + chan1;
997
998 swid -= dx_r;
999
1000 for (c = 0; c < nchannel; c++) {
1001 if (!(cmask & (1 << (nchannel - 1 - c)))) continue;
1002
1003 sl = adr_src + c;
1004 dl = adr_dst + c;
1005
1006 if ((hgt - dy_b) > 0) sl1 = sl + sll;
1007 else sl1 = sl;
1008
1009#ifdef __SUNPRO_C
1010#pragma pipeloop(0)
1011#endif /* __SUNPRO_C */
1012 for (i = 0; i < swid; i++) {
1013 buff0[i - 1] = (mlib_s32)sl[i*chan1];
1014 buff1[i - 1] = (mlib_s32)sl1[i*chan1];
1015 }
1016
1017 if (dx_r != 0) {
1018 buff0[swid - 1] = buff0[swid - 2];
1019 buff1[swid - 1] = buff1[swid - 2];
1020 }
1021
1022 if ((hgt - dy_b) > 1) sl = sl1 + sll;
1023 else sl = sl1;
1024
1025 for (j = 0; j < hgt; j++) {
1026 sp = sl;
1027 dp = dl;
1028
1029 buff2[-1] = (mlib_s32)sp[0];
1030 sp += chan1;
1031
1032 p02 = buff0[-1];
1033 p12 = buff1[-1];
1034
1035#ifdef __SUNPRO_C
1036#pragma pipeloop(0)
1037#endif /* __SUNPRO_C */
1038 for (i = 0; i <= (wid - 2); i += 2) {
1039#ifdef _NO_LONGLONG
1040 mlib_s32 o64_1, o64_2;
1041#else /* _NO_LONGLONG */
1042 mlib_s64 o64;
1043#endif /* _NO_LONGLONG */
1044 d64_2x32 sd0, sd1, dd;
1045
1046 p00 = p02; p10 = p12;
1047
1048 sd0.d64 = *(TYPE_64BIT*)(buff0 + i);
1049 sd1.d64 = *(TYPE_64BIT*)(buff1 + i);
1050 p01 = (mlib_d64)sd0.i32s.i0;
1051 p02 = (mlib_d64)sd0.i32s.i1;
1052 p11 = (mlib_d64)sd1.i32s.i0;
1053 p12 = (mlib_d64)sd1.i32s.i1;
1054
1055 LOAD_BUFF(buff2);
1056
1057 dd.i32s.i0 = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31));
1058 dd.i32s.i1 = CLAMP_S32(p01 * k0 + p02 * k1 + p11 * k2 + p12 * k3 - (1u << 31));
1059 *(TYPE_64BIT*)(buffo + i) = dd.d64;
1060
1061#ifdef _NO_LONGLONG
1062
1063 o64_1 = buffo[i];
1064 o64_2 = buffo[i+1];
1065 STORE2(o64_1 >> 24, o64_2 >> 24);
1066
1067#else /* _NO_LONGLONG */
1068
1069 o64 = *(mlib_s64*)(buffo + i);
1070 STORE2(o64 >> 56, o64 >> 24);
1071
1072#endif /* _NO_LONGLONG */
1073
1074 sp += chan2;
1075 dp += chan2;
1076 }
1077
1078 for (; i < wid; i++) {
1079 p00 = buff0[i - 1]; p10 = buff1[i - 1];
1080 p01 = buff0[i]; p11 = buff1[i];
1081
1082 buff2[i] = (mlib_s32)sp[0];
1083
1084 buffo[i] = CLAMP_S32(p00 * k0 + p01 * k1 + p10 * k2 + p11 * k3 - (1u << 31));
1085 dp[0] = (buffo[i] >> 24);
1086
1087 sp += chan1;
1088 dp += chan1;
1089 }
1090
1091 if (dx_r != 0) buff2[swid - 1] = buff2[swid - 2];
1092
1093 if (j < hgt - dy_b - 2) sl += sll;
1094 dl += dll;
1095
1096 buffT = buff0;
1097 buff0 = buff1;
1098 buff1 = buff2;
1099 buff2 = buffT;
1100 }
1101 }
1102
1103 {
1104 mlib_s32 amask = (1 << nchannel) - 1;
1105
1106 if ((cmask & amask) != amask) {
1107 mlib_ImageXor80(adr_dst, wid, hgt, dll, nchannel, cmask);
1108 } else {
1109 mlib_ImageXor80_aa(adr_dst, wid*nchannel, hgt, dll);
1110 }
1111 }
1112
1113 if (pbuff != (mlib_s32*)buff_arr) mlib_free(pbuff);
1114
1115 return MLIB_SUCCESS;
1116}
1117
1118/***************************************************************/