blob: a73659250ff228a595c2e05f20fafb3e6fe9bba8 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26
27
28/*
29 * The functions step along the lines from xLeft to xRight and apply
30 * the bilinear filtering.
31 *
32 */
33
34#include "vis_proto.h"
35#include "mlib_image.h"
36#include "mlib_ImageColormap.h"
37#include "mlib_ImageCopy.h"
38#include "mlib_ImageAffine.h"
39#include "mlib_v_ImageFilters.h"
40#include "mlib_v_ImageChannelExtract.h"
41#include "mlib_v_ImageAffine_BL_S16.h"
42
43/*#define MLIB_VIS2*/
44
45/***************************************************************/
46#define DTYPE mlib_s16
47
48#define FUN_NAME(CHAN) mlib_ImageAffine_u16_##CHAN##_bl
49
50/***************************************************************/
51mlib_status FUN_NAME(2ch_na)(mlib_affine_param *param);
52mlib_status FUN_NAME(4ch_na)(mlib_affine_param *param);
53
54/***************************************************************/
55#define XOR_8000(x) x = vis_fxor(x, mask_8000)
56
57/***************************************************************/
58#ifdef MLIB_VIS2
59#define MLIB_WRITE_BMASK(bmask) vis_write_bmask(bmask, 0)
60#else
61#define MLIB_WRITE_BMASK(bmask)
62#endif /* MLIB_VIS2 */
63
64/***************************************************************/
65#undef DECLAREVAR
66#define DECLAREVAR() \
67 DECLAREVAR0(); \
68 mlib_s32 *warp_tbl = param -> warp_tbl; \
69 mlib_s32 srcYStride = param -> srcYStride; \
70 mlib_u8 *dl; \
71 mlib_s32 i, size; \
72 mlib_d64 mask_8000 = vis_to_double_dup(0x80008000); \
73 mlib_d64 mask_7fff = vis_to_double_dup(0x7FFF7FFF); \
74 mlib_d64 dx64, dy64, deltax, deltay, delta1_x, delta1_y; \
75 mlib_d64 s0, s1, s2, s3; \
76 mlib_d64 d0, d1, d2, d3, dd
77
78/***************************************************************/
79
80/* arguments (x, y) are swapped to prevent overflow */
81#define FMUL_16x16(x, y) \
82 vis_fpadd16(vis_fmul8sux16(y, x), \
83 vis_fmul8ulx16(y, x))
84
85/***************************************************************/
86#define BUF_SIZE 512
87
88/***************************************************************/
89#define DOUBLE_4U16(x0, x1, x2, x3) \
90 vis_to_double(((((x0) & 0xFFFE) << 15) | (((x1) & 0xFFFE) >> 1)), \
91 ((((x2) & 0xFFFE) << 15) | (((x3) & 0xFFFE) >> 1)))
92
93/***************************************************************/
94#define BL_SUM() \
95 XOR_8000(s0); \
96 XOR_8000(s1); \
97 XOR_8000(s2); \
98 XOR_8000(s3); \
99 \
100 delta1_x = vis_fpsub16(mask_7fff, deltax); \
101 delta1_y = vis_fpsub16(mask_7fff, deltay); \
102 \
103 d0 = FMUL_16x16(s0, delta1_x); \
104 d1 = FMUL_16x16(s1, deltax); \
105 d0 = vis_fpadd16(d0, d1); \
106 d0 = vis_fpadd16(d0, d0); \
107 d0 = FMUL_16x16(d0, delta1_y); \
108 \
109 d2 = FMUL_16x16(s2, delta1_x); \
110 d3 = FMUL_16x16(s3, deltax); \
111 d2 = vis_fpadd16(d2, d3); \
112 d2 = vis_fpadd16(d2, d2); \
113 d2 = FMUL_16x16(d2, deltay); \
114 \
115 dd = vis_fpadd16(d0, d2); \
116 dd = vis_fpadd16(dd, dd); \
117 XOR_8000(dd); \
118 \
119 deltax = vis_fpadd16(deltax, dx64); \
120 deltay = vis_fpadd16(deltay, dy64); \
121 deltax = vis_fand(deltax, mask_7fff); \
122 deltay = vis_fand(deltay, mask_7fff)
123
124/***************************************************************/
125#define BL_SUM_3CH() \
126 XOR_8000(s0); \
127 XOR_8000(s1); \
128 XOR_8000(s2); \
129 XOR_8000(s3); \
130 \
131 delta1_x = vis_fpsub16(mask_7fff, deltax); \
132 delta1_y = vis_fpsub16(mask_7fff, deltay); \
133 \
134 d0 = FMUL_16x16(s0, delta1_y); \
135 d2 = FMUL_16x16(s2, deltay); \
136 d0 = vis_fpadd16(d0, d2); \
137 d0 = vis_fpadd16(d0, d0); \
138 d0 = FMUL_16x16(d0, delta1_x); \
139 \
140 d1 = FMUL_16x16(s1, delta1_y); \
141 d3 = FMUL_16x16(s3, deltay); \
142 d1 = vis_fpadd16(d1, d3); \
143 d1 = vis_fpadd16(d1, d1); \
144 d1 = FMUL_16x16(d1, deltax); \
145 \
146 vis_alignaddr((void*)0, 2); \
147 d0 = vis_faligndata(d0, d0); \
148 dd = vis_fpadd16(d0, d1); \
149 dd = vis_fpadd16(dd, dd); \
150 XOR_8000(dd); \
151 \
152 deltax = vis_fpadd16(deltax, dx64); \
153 deltay = vis_fpadd16(deltay, dy64); \
154 deltax = vis_fand(deltax, mask_7fff); \
155 deltay = vis_fand(deltay, mask_7fff)
156
157/***************************************************************/
158#define LD_U16(sp, ind) vis_ld_u16(sp + ind)
159
160/***************************************************************/
161#ifndef MLIB_VIS2
162
163#define LOAD_1CH() \
164 s0 = vis_faligndata(LD_U16(sp3, 0), mask_7fff); \
165 s1 = vis_faligndata(LD_U16(sp3, 2), mask_7fff); \
166 s2 = vis_faligndata(LD_U16(sp3, srcYStride), mask_7fff); \
167 s3 = vis_faligndata(LD_U16(sp3, srcYStride + 2), mask_7fff); \
168 \
169 s0 = vis_faligndata(LD_U16(sp2, 0), s0); \
170 s1 = vis_faligndata(LD_U16(sp2, 2), s1); \
171 s2 = vis_faligndata(LD_U16(sp2, srcYStride), s2); \
172 s3 = vis_faligndata(LD_U16(sp2, srcYStride + 2), s3); \
173 \
174 s0 = vis_faligndata(LD_U16(sp1, 0), s0); \
175 s1 = vis_faligndata(LD_U16(sp1, 2), s1); \
176 s2 = vis_faligndata(LD_U16(sp1, srcYStride), s2); \
177 s3 = vis_faligndata(LD_U16(sp1, srcYStride + 2), s3); \
178 \
179 s0 = vis_faligndata(LD_U16(sp0, 0), s0); \
180 s1 = vis_faligndata(LD_U16(sp0, 2), s1); \
181 s2 = vis_faligndata(LD_U16(sp0, srcYStride), s2); \
182 s3 = vis_faligndata(LD_U16(sp0, srcYStride + 2), s3)
183
184#else
185
186#define LOAD_1CH() \
187 s0 = vis_bshuffle(LD_U16(sp0, 0), LD_U16(sp2, 0)); \
188 s1 = vis_bshuffle(LD_U16(sp0, 2), LD_U16(sp2, 2)); \
189 s2 = vis_bshuffle(LD_U16(sp0, srcYStride), LD_U16(sp2, srcYStride)); \
190 s3 = vis_bshuffle(LD_U16(sp0, srcYStride + 2), LD_U16(sp2, srcYStride + 2)); \
191 \
192 t0 = vis_bshuffle(LD_U16(sp1, 0), LD_U16(sp3, 0)); \
193 t1 = vis_bshuffle(LD_U16(sp1, 2), LD_U16(sp3, 2)); \
194 t2 = vis_bshuffle(LD_U16(sp1, srcYStride), LD_U16(sp3, srcYStride)); \
195 t3 = vis_bshuffle(LD_U16(sp1, srcYStride + 2), LD_U16(sp3, srcYStride + 2)); \
196 \
197 s0 = vis_bshuffle(s0, t0); \
198 s1 = vis_bshuffle(s1, t1); \
199 s2 = vis_bshuffle(s2, t2); \
200 s3 = vis_bshuffle(s3, t3)
201
202#endif /* MLIB_VIS2 */
203
204/***************************************************************/
205#define GET_POINTER(sp) \
206 sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 2*(X >> MLIB_SHIFT); \
207 X += dX; \
208 Y += dY
209
210/***************************************************************/
211#undef PREPARE_DELTAS
212#define PREPARE_DELTAS \
213 if (warp_tbl != NULL) { \
214 dX = warp_tbl[2*j ]; \
215 dY = warp_tbl[2*j + 1]; \
216 dx64 = vis_to_double_dup((((dX << 1) & 0xFFFF) << 16) | ((dX << 1) & 0xFFFF)); \
217 dy64 = vis_to_double_dup((((dY << 1) & 0xFFFF) << 16) | ((dY << 1) & 0xFFFF)); \
218 }
219
220/***************************************************************/
221mlib_status FUN_NAME(1ch)(mlib_affine_param *param)
222{
223 DECLAREVAR();
224 mlib_s32 off;
225 mlib_s32 x0, x1, x2, x3, y0, y1, y2, y3;
226#ifdef MLIB_VIS2
227 mlib_d64 t0, t1, t2, t3;
228 vis_write_bmask(0x45CD67EF, 0);
229#else
230 vis_alignaddr((void*)0, 6);
231#endif /* MLIB_VIS2 */
232
233 dx64 = vis_to_double_dup((((dX << 1) & 0xFFFF) << 16) | ((dX << 1) & 0xFFFF));
234 dy64 = vis_to_double_dup((((dY << 1) & 0xFFFF) << 16) | ((dY << 1) & 0xFFFF));
235
236 for (j = yStart; j <= yFinish; j++) {
237 mlib_u8 *sp0, *sp1, *sp2, *sp3;
238 mlib_d64 *dp, dmask;
239
240 NEW_LINE(1);
241
242 off = (mlib_s32)dl & 7;
243 dp = (mlib_d64*)(dl - off);
244 off >>= 1;
245
246 x0 = X - off*dX; y0 = Y - off*dY;
247 x1 = x0 + dX; y1 = y0 + dY;
248 x2 = x1 + dX; y2 = y1 + dY;
249 x3 = x2 + dX; y3 = y2 + dY;
250
251 deltax = DOUBLE_4U16(x0, x1, x2, x3);
252 deltay = DOUBLE_4U16(y0, y1, y2, y3);
253
254 if (off) {
255 mlib_s32 emask = vis_edge16((void*)(2*off), (void*)(2*(off + size - 1)));
256
257 off = 4 - off;
258 GET_POINTER(sp3);
259 sp0 = sp1 = sp2 = sp3;
260
261 if (off > 1 && size > 1) {
262 GET_POINTER(sp3);
263 }
264
265 if (off > 2) {
266 sp2 = sp3;
267
268 if (size > 2) {
269 GET_POINTER(sp3);
270 }
271 }
272
273 LOAD_1CH();
274 BL_SUM();
275
276 dmask = ((mlib_d64*)mlib_dmask_arr)[emask];
277 *dp++ = vis_for (vis_fand(dmask, dd), vis_fandnot(dmask, dp[0]));
278
279 size -= off;
280
281 if (size < 0) size = 0;
282 }
283
284#pragma pipeloop(0)
285 for (i = 0; i < size/4; i++) {
286 GET_POINTER(sp0);
287 GET_POINTER(sp1);
288 GET_POINTER(sp2);
289 GET_POINTER(sp3);
290
291 LOAD_1CH();
292 BL_SUM();
293
294 dp[i] = dd;
295 }
296
297 off = size & 3;
298
299 if (off) {
300 GET_POINTER(sp0);
301 sp1 = sp2 = sp3 = sp0;
302
303 if (off > 1) {
304 GET_POINTER(sp1);
305 }
306
307 if (off > 2) {
308 GET_POINTER(sp2);
309 }
310
311 LOAD_1CH();
312 BL_SUM();
313
314 dmask = ((mlib_d64*)mlib_dmask_arr)[(0xF0 >> off) & 0x0F];
315 dp[i] = vis_for (vis_fand(dmask, dd), vis_fandnot(dmask, dp[i]));
316 }
317 }
318
319 return MLIB_SUCCESS;
320}
321
322/***************************************************************/
323#undef GET_POINTER
324#define GET_POINTER(sp) \
325 sp = *(mlib_f32**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT); \
326 X += dX; \
327 Y += dY
328
329/***************************************************************/
330#define LOAD_2CH() \
331 s0 = vis_freg_pair(sp0[0], sp1[0]); \
332 s1 = vis_freg_pair(sp0[1], sp1[1]); \
333 s2 = vis_freg_pair(sp0[srcYStride], sp1[srcYStride]); \
334 s3 = vis_freg_pair(sp0[srcYStride + 1], sp1[srcYStride + 1])
335
336/***************************************************************/
337#undef PREPARE_DELTAS
338#define PREPARE_DELTAS \
339 if (warp_tbl != NULL) { \
340 dX = warp_tbl[2*j ]; \
341 dY = warp_tbl[2*j + 1]; \
342 dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF)); \
343 dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF)); \
344 }
345
346/***************************************************************/
347mlib_status FUN_NAME(2ch)(mlib_affine_param *param)
348{
349 DECLAREVAR();
350 mlib_s32 off;
351 mlib_s32 x0, x1, y0, y1;
352
353 if (((mlib_s32)lineAddr[0] | (mlib_s32)dstData | srcYStride | dstYStride) & 3) {
354 return FUN_NAME(2ch_na)(param);
355 }
356
357 srcYStride >>= 2;
358
359 dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF));
360 dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF));
361
362 for (j = yStart; j <= yFinish; j++) {
363 mlib_f32 *sp0, *sp1;
364 mlib_d64 *dp;
365
366 NEW_LINE(2);
367
368 off = (mlib_s32)dl & 7;
369 dp = (mlib_d64*)(dl - off);
370
371 if (off) {
372 x0 = X - dX; y0 = Y - dY;
373 x1 = X; y1 = Y;
374 } else {
375 x0 = X; y0 = Y;
376 x1 = X + dX; y1 = Y + dY;
377 }
378
379 deltax = DOUBLE_4U16(x0, x0, x1, x1);
380 deltay = DOUBLE_4U16(y0, y0, y1, y1);
381
382 if (off) {
383 GET_POINTER(sp1);
384 sp0 = sp1;
385 LOAD_2CH();
386
387 BL_SUM();
388
389 ((mlib_f32*)dp)[1] = vis_read_lo(dd);
390 dp++;
391 size--;
392 }
393
394#pragma pipeloop(0)
395 for (i = 0; i < size/2; i++) {
396 GET_POINTER(sp0);
397 GET_POINTER(sp1);
398 LOAD_2CH();
399
400 BL_SUM();
401
402 *dp++ = dd;
403 }
404
405 if (size & 1) {
406 GET_POINTER(sp0);
407 sp1 = sp0;
408 LOAD_2CH();
409
410 BL_SUM();
411
412 ((mlib_f32*)dp)[0] = vis_read_hi(dd);
413 }
414 }
415
416 return MLIB_SUCCESS;
417}
418
419/***************************************************************/
420#undef GET_POINTER
421#define GET_POINTER(sp) \
422 sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 4*(X >> MLIB_SHIFT); \
423 X += dX; \
424 Y += dY
425
426/***************************************************************/
427#ifndef MLIB_VIS2
428
429#define LOAD_2CH_NA() \
430 s0 = vis_faligndata(LD_U16(sp1, 2), mask_7fff); \
431 s1 = vis_faligndata(LD_U16(sp1, 6), mask_7fff); \
432 s2 = vis_faligndata(LD_U16(sp1, srcYStride + 2), mask_7fff); \
433 s3 = vis_faligndata(LD_U16(sp1, srcYStride + 6), mask_7fff); \
434 \
435 s0 = vis_faligndata(LD_U16(sp1, 0), s0); \
436 s1 = vis_faligndata(LD_U16(sp1, 4), s1); \
437 s2 = vis_faligndata(LD_U16(sp1, srcYStride), s2); \
438 s3 = vis_faligndata(LD_U16(sp1, srcYStride + 4), s3); \
439 \
440 s0 = vis_faligndata(LD_U16(sp0, 2), s0); \
441 s1 = vis_faligndata(LD_U16(sp0, 6), s1); \
442 s2 = vis_faligndata(LD_U16(sp0, srcYStride + 2), s2); \
443 s3 = vis_faligndata(LD_U16(sp0, srcYStride + 6), s3); \
444 \
445 s0 = vis_faligndata(LD_U16(sp0, 0), s0); \
446 s1 = vis_faligndata(LD_U16(sp0, 4), s1); \
447 s2 = vis_faligndata(LD_U16(sp0, srcYStride), s2); \
448 s3 = vis_faligndata(LD_U16(sp0, srcYStride + 4), s3)
449
450#else
451
452#define LOAD_2CH_NA() \
453 s0 = vis_bshuffle(LD_U16(sp0, 0), LD_U16(sp1, 0)); \
454 s1 = vis_bshuffle(LD_U16(sp0, 4), LD_U16(sp1, 4)); \
455 s2 = vis_bshuffle(LD_U16(sp0, srcYStride), LD_U16(sp1, srcYStride)); \
456 s3 = vis_bshuffle(LD_U16(sp0, srcYStride + 4), LD_U16(sp1, srcYStride + 4)); \
457 \
458 t0 = vis_bshuffle(LD_U16(sp0, 2), LD_U16(sp1, 2)); \
459 t1 = vis_bshuffle(LD_U16(sp0, 6), LD_U16(sp1, 6)); \
460 t2 = vis_bshuffle(LD_U16(sp0, srcYStride + 2), LD_U16(sp1, srcYStride + 2)); \
461 t3 = vis_bshuffle(LD_U16(sp0, srcYStride + 6), LD_U16(sp1, srcYStride + 6)); \
462 \
463 s0 = vis_bshuffle(s0, t0); \
464 s1 = vis_bshuffle(s1, t1); \
465 s2 = vis_bshuffle(s2, t2); \
466 s3 = vis_bshuffle(s3, t3)
467
468#endif /* MLIB_VIS2 */
469
470/***************************************************************/
471mlib_status FUN_NAME(2ch_na)(mlib_affine_param *param)
472{
473 DECLAREVAR();
474 mlib_s32 max_xsize = param -> max_xsize, bsize;
475 mlib_s32 x0, x1, y0, y1;
476 mlib_d64 buff[BUF_SIZE], *pbuff = buff;
477#ifdef MLIB_VIS2
478 mlib_d64 t0, t1, t2, t3;
479#endif /* MLIB_VIS2 */
480
481 bsize = (max_xsize + 1)/2;
482
483 if (bsize > BUF_SIZE) {
484 pbuff = mlib_malloc(bsize*sizeof(mlib_d64));
485
486 if (pbuff == NULL) return MLIB_FAILURE;
487 }
488
489 MLIB_WRITE_BMASK(0x45CD67EF);
490
491 dx64 = vis_to_double_dup(((dX & 0xFFFF) << 16) | (dX & 0xFFFF));
492 dy64 = vis_to_double_dup(((dY & 0xFFFF) << 16) | (dY & 0xFFFF));
493
494 for (j = yStart; j <= yFinish; j++) {
495 mlib_u8 *sp0, *sp1;
496
497#ifndef MLIB_VIS2
498 vis_alignaddr((void*)0, 6);
499#endif /* MLIB_VIS2 */
500
501 NEW_LINE(2);
502
503 x0 = X; y0 = Y;
504 x1 = X + dX; y1 = Y + dY;
505
506 deltax = DOUBLE_4U16(x0, x0, x1, x1);
507 deltay = DOUBLE_4U16(y0, y0, y1, y1);
508
509#pragma pipeloop(0)
510 for (i = 0; i < size/2; i++) {
511 GET_POINTER(sp0);
512 GET_POINTER(sp1);
513 LOAD_2CH_NA();
514
515 BL_SUM();
516
517 pbuff[i] = dd;
518 }
519
520 if (size & 1) {
521 GET_POINTER(sp0);
522 sp1 = sp0;
523 LOAD_2CH_NA();
524
525 BL_SUM();
526
527 pbuff[i] = dd;
528 }
529
530 mlib_ImageCopy_na((mlib_u8*)pbuff, dl, 4*size);
531 }
532
533 if (pbuff != buff) {
534 mlib_free(pbuff);
535 }
536
537 return MLIB_SUCCESS;
538}
539
540/***************************************************************/
541#undef PREPARE_DELTAS
542#define PREPARE_DELTAS \
543 if (warp_tbl != NULL) { \
544 dX = warp_tbl[2*j ]; \
545 dY = warp_tbl[2*j + 1]; \
546 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */ \
547 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */ \
548 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF)); \
549 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF)); \
550 }
551
552/***************************************************************/
553mlib_status FUN_NAME(3ch)(mlib_affine_param *param)
554{
555 DECLAREVAR();
556 mlib_s32 max_xsize = param -> max_xsize;
557 mlib_d64 buff[BUF_SIZE], *pbuff = buff;
558
559 if (max_xsize > BUF_SIZE) {
560 pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64));
561
562 if (pbuff == NULL) return MLIB_FAILURE;
563 }
564
565 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */
566 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */
567 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF));
568 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF));
569
570 for (j = yStart; j <= yFinish; j++) {
571 mlib_u8 *sp;
572 mlib_d64 *sp0, *sp1;
573
574 NEW_LINE(3);
575
576 deltax = DOUBLE_4U16(X, X, X, X);
577 deltay = DOUBLE_4U16(Y, Y, Y, Y);
578
579#pragma pipeloop(0)
580 for (i = 0; i < size; i++) {
581 sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 6*(X >> MLIB_SHIFT) - 2;
582
583 vis_alignaddr(sp, 0);
584 sp0 = AL_ADDR(sp, 0);
585 s0 = vis_faligndata(sp0[0], sp0[1]);
586 s1 = vis_faligndata(sp0[1], sp0[2]);
587
588 vis_alignaddr(sp, srcYStride);
589 sp1 = AL_ADDR(sp, srcYStride);
590 s2 = vis_faligndata(sp1[0], sp1[1]);
591 s3 = vis_faligndata(sp1[1], sp1[2]);
592
593 BL_SUM_3CH();
594
595 pbuff[i] = dd;
596 X += dX;
597 Y += dY;
598 }
599
600 mlib_v_ImageChannelExtract_S16_43L_D1((void *)pbuff, (void *)dl, size);
601 }
602
603 if (pbuff != buff) {
604 mlib_free(pbuff);
605 }
606
607 return MLIB_SUCCESS;
608}
609
610/***************************************************************/
611mlib_status FUN_NAME(4ch)(mlib_affine_param *param)
612{
613 DECLAREVAR();
614
615 if (((mlib_s32)lineAddr[0] | (mlib_s32)dstData | srcYStride | dstYStride) & 7) {
616 return FUN_NAME(4ch_na)(param);
617 }
618
619 srcYStride >>= 3;
620
621 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */
622 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */
623 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF));
624 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF));
625
626 for (j = yStart; j <= yFinish; j++) {
627 mlib_d64 *sp;
628
629 NEW_LINE(4);
630
631 deltax = DOUBLE_4U16(X, X, X, X);
632 deltay = DOUBLE_4U16(Y, Y, Y, Y);
633
634#pragma pipeloop(0)
635 for (i = 0; i < size; i++) {
636 sp = *(mlib_d64**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + (X >> MLIB_SHIFT);
637 s0 = sp[0];
638 s1 = sp[1];
639 s2 = sp[srcYStride];
640 s3 = sp[srcYStride + 1];
641
642 BL_SUM();
643
644 ((mlib_d64*)dl)[i] = dd;
645 X += dX;
646 Y += dY;
647 }
648 }
649
650 return MLIB_SUCCESS;
651}
652
653/***************************************************************/
654mlib_status FUN_NAME(4ch_na)(mlib_affine_param *param)
655{
656 DECLAREVAR();
657 mlib_s32 max_xsize = param -> max_xsize;
658 mlib_d64 buff[BUF_SIZE], *pbuff = buff;
659
660 if (max_xsize > BUF_SIZE) {
661 pbuff = mlib_malloc(max_xsize*sizeof(mlib_d64));
662
663 if (pbuff == NULL) return MLIB_FAILURE;
664 }
665
666 dX = (dX - (dX >> 31)) &~ 1; /* rounding towards ZERO */
667 dY = (dY - (dY >> 31)) &~ 1; /* rounding towards ZERO */
668 dx64 = vis_to_double_dup((((dX >> 1) & 0xFFFF) << 16) | ((dX >> 1) & 0xFFFF));
669 dy64 = vis_to_double_dup((((dY >> 1) & 0xFFFF) << 16) | ((dY >> 1) & 0xFFFF));
670
671 for (j = yStart; j <= yFinish; j++) {
672 mlib_u8 *sp;
673 mlib_d64 *sp0, *sp1;
674
675 NEW_LINE(4);
676
677 deltax = DOUBLE_4U16(X, X, X, X);
678 deltay = DOUBLE_4U16(Y, Y, Y, Y);
679
680#pragma pipeloop(0)
681 for (i = 0; i < size; i++) {
682 sp = *(mlib_u8**)((mlib_u8*)lineAddr + PTR_SHIFT(Y)) + 8*(X >> MLIB_SHIFT);
683
684 vis_alignaddr(sp, 0);
685 sp0 = AL_ADDR(sp, 0);
686 s0 = vis_faligndata(sp0[0], sp0[1]);
687 s1 = vis_faligndata(sp0[1], sp0[2]);
688
689 vis_alignaddr(sp, srcYStride);
690 sp1 = AL_ADDR(sp, srcYStride);
691 s2 = vis_faligndata(sp1[0], sp1[1]);
692 s3 = vis_faligndata(sp1[1], sp1[2]);
693
694 BL_SUM();
695
696 pbuff[i] = dd;
697 X += dX;
698 Y += dY;
699 }
700
701 mlib_ImageCopy_na((mlib_u8*)pbuff, dl, 8*size);
702 }
703
704 if (pbuff != buff) {
705 mlib_free(pbuff);
706 }
707
708 return MLIB_SUCCESS;
709}
710
711/***************************************************************/