blob: 2745801a1815cec1adccda2bfe793527eb42dc50 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26#if !defined(JAVA2D_NO_MLIB) || defined(MLIB_ADD_SUFF)
27
28#include "vis_AlphaMacros.h"
29
30/***************************************************************/
31
32#define Gray2Argb(x) \
33 0xff000000 | (x << 16) | (x << 8) | x
34
35/***************************************************************/
36
37#if VIS >= 0x200
38
39#define BMASK_FOR_ARGB \
40 vis_write_bmask(0x03214765, 0);
41
42#else
43
44#define BMASK_FOR_ARGB
45
46#endif
47
48/***************************************************************/
49
50#define RGB2ABGR_DB(x) \
51 x = vis_for(x, amask); \
52 ARGB2ABGR_DB(x)
53
54/***************************************************************/
55
56#define INSERT_U8_34R \
57 sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1)); \
58 sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2)); \
59 sdc = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2)); \
60 sdd = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb)); \
61 sde = vis_fpmerge(vis_read_lo(sda), vis_read_hi(sdc)); \
62 sdf = vis_fpmerge(vis_read_hi(sdb), vis_read_lo(sdc)); \
63 sdg = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sde)); \
64 sdh = vis_fpmerge(vis_read_lo(sdd), vis_read_hi(sdf)); \
65 sdi = vis_fpmerge(vis_read_hi(sde), vis_read_lo(sdf)); \
66 sdj = vis_fpmerge(vis_read_hi(sdg), vis_read_hi(sdi)); \
67 sdk = vis_fpmerge(vis_read_lo(sdg), vis_read_lo(sdi)); \
68 sdl = vis_fpmerge(vis_read_hi(sFF), vis_read_hi(sdh)); \
69 sdm = vis_fpmerge(vis_read_lo(sFF), vis_read_lo(sdh)); \
70 dd0 = vis_fpmerge(vis_read_hi(sdl), vis_read_hi(sdj)); \
71 dd1 = vis_fpmerge(vis_read_lo(sdl), vis_read_lo(sdj)); \
72 dd2 = vis_fpmerge(vis_read_hi(sdm), vis_read_hi(sdk)); \
73 dd3 = vis_fpmerge(vis_read_lo(sdm), vis_read_lo(sdk))
74
75/***************************************************************/
76
77void IntArgbToIntAbgrConvert_line(mlib_s32 *srcBase,
78 mlib_s32 *dstBase,
79 mlib_s32 width)
80{
81 mlib_s32 *dst_end = dstBase + width;
82 mlib_d64 dd;
83 mlib_f32 ff;
84
85 BMASK_FOR_ARGB
86
87 if ((mlib_s32)srcBase & 7) {
88 ff = *(mlib_f32*)srcBase;
89 ARGB2ABGR_FL(ff)
90 *(mlib_f32*)dstBase = ff;
91 srcBase++;
92 dstBase++;
93 }
94
95 if ((mlib_s32)dstBase & 7) {
96#pragma pipeloop(0)
97 for (; dstBase <= (dst_end - 2); dstBase += 2) {
98 dd = *(mlib_d64*)srcBase;
99 ARGB2ABGR_DB(dd)
100 ((mlib_f32*)dstBase)[0] = vis_read_hi(dd);
101 ((mlib_f32*)dstBase)[1] = vis_read_lo(dd);
102 srcBase += 2;
103 }
104 } else {
105#pragma pipeloop(0)
106 for (; dstBase <= (dst_end - 2); dstBase += 2) {
107 dd = *(mlib_d64*)srcBase;
108 ARGB2ABGR_DB(dd)
109 *(mlib_d64*)dstBase = dd;
110 srcBase += 2;
111 }
112 }
113
114 if (dstBase < dst_end) {
115 ff = *(mlib_f32*)srcBase;
116 ARGB2ABGR_FL(ff)
117 *(mlib_f32*)dstBase = ff;
118 }
119}
120
121/***************************************************************/
122
123void ADD_SUFF(FourByteAbgrToIntArgbConvert)(BLIT_PARAMS)
124{
125 mlib_u32 *argb = (mlib_u32 *)dstBase;
126 mlib_u8 *pabgr = (mlib_u8 *)srcBase;
127 mlib_s32 dstScan = (pDstInfo)->scanStride;
128 mlib_s32 srcScan = (pSrcInfo)->scanStride;
129 mlib_s32 i, j, count, left;
130 mlib_d64 w_abgr;
131
132 if (width < 16) {
133 for (j = 0; j < height; j++) {
134 mlib_u8 *src = srcBase;
135 mlib_s32 *dst = dstBase;
136
137 for (i = 0; i < width; i++) {
138 *dst++ = (src[0] << 24) | (src[3] << 16) |
139 (src[2] << 8) | (src[1]);
140 src += 4;
141 }
142
143 PTR_ADD(dstBase, dstScan);
144 PTR_ADD(srcBase, srcScan);
145 }
146 return;
147 }
148
149 if (dstScan == 4*width && srcScan == dstScan) {
150 width *= height;
151 height = 1;
152 }
153 count = width >> 1;
154 left = width & 1;
155
156 BMASK_FOR_ARGB
157
158 if ((((mlib_addr)pabgr & 3) == 0) && ((srcScan & 3) == 0)) {
159 mlib_u32 *abgr = (mlib_u32 *)pabgr;
160
161 dstScan >>= 2;
162 srcScan >>= 2;
163
164 for (i = 0; i < height; i++, argb += dstScan, abgr += srcScan) {
165 if ((((mlib_addr) argb | (mlib_addr) abgr) & 7) == 0) {
166 mlib_d64 *d_abgr = (mlib_d64 *) abgr;
167 mlib_d64 *d_argb = (mlib_d64 *) argb;
168
169#pragma pipeloop(0)
170 for (j = 0; j < count; j++) {
171 w_abgr = d_abgr[j];
172 ARGB2ABGR_DB(w_abgr)
173 d_argb[j] = w_abgr;
174 }
175
176 if (left) {
177 w_abgr = d_abgr[count];
178 ARGB2ABGR_DB(w_abgr)
179 ((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);
180 }
181 } else {
182 mlib_f32 v_abgr0, v_abgr1;
183
184#pragma pipeloop(0)
185 for (j = 0; j < count; j++) {
186 v_abgr0 = ((mlib_f32 *) abgr)[2 * j];
187 v_abgr1 = ((mlib_f32 *) abgr)[2 * j + 1];
188 w_abgr = vis_freg_pair(v_abgr0, v_abgr1);
189 ARGB2ABGR_DB(w_abgr)
190 ((mlib_f32 *) argb)[2 * j] = vis_read_hi(w_abgr);
191 ((mlib_f32 *) argb)[2 * j + 1] = vis_read_lo(w_abgr);
192 }
193
194 if (left) {
195 v_abgr0 = ((mlib_f32 *) abgr)[2 * count];
196 w_abgr = vis_freg_pair(v_abgr0, 0);
197 ARGB2ABGR_DB(w_abgr)
198 ((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);
199 }
200 }
201 }
202 } else { /* abgr is not aligned */
203 mlib_u8 *abgr = pabgr;
204 mlib_d64 *d_abgr, db0, db1;
205
206 dstScan >>= 2;
207
208 for (i = 0; i < height; i++, argb += dstScan, abgr += srcScan) {
209 d_abgr = vis_alignaddr(abgr, 0);
210 db0 = *d_abgr++;
211
212 if (((mlib_addr) argb & 7) == 0) {
213 mlib_d64 *d_argb = (mlib_d64 *) argb;
214
215#pragma pipeloop(0)
216 for (j = 0; j < count; j++) {
217 db1 = d_abgr[j];
218 w_abgr = vis_faligndata(db0, db1);
219 db0 = db1;
220 ARGB2ABGR_DB(w_abgr)
221 d_argb[j] = w_abgr;
222 }
223
224 if (left) {
225 db1 = d_abgr[j];
226 w_abgr = vis_faligndata(db0, db1);
227 ARGB2ABGR_DB(w_abgr)
228 ((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);
229 }
230 } else {
231 mlib_d64 w_abgr;
232
233 db1 = *d_abgr++;
234 w_abgr = vis_faligndata(db0, db1);
235 db0 = db1;
236#pragma pipeloop(0)
237 for (j = 0; j < count; j++) {
238 ARGB2ABGR_DB(w_abgr)
239 ((mlib_f32 *) argb)[2 * j] = vis_read_hi(w_abgr);
240 ((mlib_f32 *) argb)[2 * j + 1] = vis_read_lo(w_abgr);
241 db1 = d_abgr[j];
242 w_abgr = vis_faligndata(db0, db1);
243 db0 = db1;
244 }
245
246 if (left) {
247 ARGB2ABGR_DB(w_abgr)
248 ((mlib_f32 *) argb)[2 * count] = vis_read_hi(w_abgr);
249 }
250 }
251 }
252 }
253}
254
255/***************************************************************/
256
257void ADD_SUFF(IntArgbToFourByteAbgrConvert)(BLIT_PARAMS)
258{
259 mlib_u32 *argb = (mlib_u32 *)srcBase;
260 mlib_u8 *abgr = (mlib_u8 *)dstBase;
261 mlib_s32 dstScan = (pDstInfo)->scanStride;
262 mlib_s32 srcScan = (pSrcInfo)->scanStride;
263 mlib_s32 i, j, count, left;
264 mlib_d64 w_abgr;
265
266 if (width < 16) {
267 for (j = 0; j < height; j++) {
268 mlib_s32 *src = srcBase;
269 mlib_u8 *dst = dstBase;
270
271 for (i = 0; i < width; i++) {
272 mlib_u32 x = *src++;
273 dst[0] = x >> 24;
274 dst[1] = x;
275 dst[2] = x >> 8;
276 dst[3] = x >> 16;
277 dst += 4;
278 }
279
280 PTR_ADD(dstBase, dstScan);
281 PTR_ADD(srcBase, srcScan);
282 }
283 return;
284 }
285
286 if (dstScan == 4*width && srcScan == dstScan) {
287 width *= height;
288 height = 1;
289 }
290 count = width >> 1;
291 left = width & 1;
292
293 BMASK_FOR_ARGB
294
295 srcScan >>= 2;
296
297 for (i = 0; i < height; i++, argb += srcScan, abgr += dstScan) {
298
299 if ((((mlib_addr) abgr | (mlib_addr) argb) & 7) == 0) {
300 mlib_d64 *d_argb = (mlib_d64 *) argb;
301 mlib_d64 *d_abgr = (mlib_d64 *) abgr;
302
303#pragma pipeloop(0)
304 for (j = 0; j < count; j++) {
305 w_abgr = d_argb[j];
306 ARGB2ABGR_DB(w_abgr)
307 d_abgr[j] = w_abgr;
308 }
309
310 if (left) {
311 w_abgr = d_argb[count];
312 ARGB2ABGR_DB(w_abgr)
313 ((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);
314 }
315
316 } else if (((mlib_addr) abgr & 3) == 0) {
317 mlib_f32 v_argb0, v_argb1;
318
319#pragma pipeloop(0)
320 for (j = 0; j < count; j++) {
321 v_argb0 = ((mlib_f32 *) argb)[2 * j];
322 v_argb1 = ((mlib_f32 *) argb)[2 * j + 1];
323 w_abgr = vis_freg_pair(v_argb0, v_argb1);
324
325 ARGB2ABGR_DB(w_abgr)
326 ((mlib_f32 *) abgr)[2 * j] = vis_read_hi(w_abgr);
327 ((mlib_f32 *) abgr)[2 * j + 1] = vis_read_lo(w_abgr);
328 }
329
330 if (left) {
331 v_argb0 = ((mlib_f32 *) argb)[2 * count];
332 w_abgr = vis_freg_pair(v_argb0, vis_fzeros());
333
334 ARGB2ABGR_DB(w_abgr)
335 ((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);
336 }
337
338 } else { /* abgr is not aligned */
339
340 mlib_u8 *pend = abgr + (width << 2) - 1;
341 mlib_d64 *d_abgr, db0, db1;
342 mlib_s32 emask, off;
343 mlib_f32 *f_argb = (mlib_f32 *) argb;
344
345 off = (mlib_addr)abgr & 7;
346 vis_alignaddr((void *)(8 - off), 0);
347 d_abgr = (mlib_d64 *) (abgr - off);
348
349 db1 = vis_freg_pair(*f_argb++, *f_argb++);
350 ARGB2ABGR_DB(db1)
351 w_abgr = vis_faligndata(db1, db1);
352 emask = vis_edge8(abgr, pend);
353 vis_pst_8(w_abgr, d_abgr++, emask);
354 db0 = db1;
355
356 db1 = vis_freg_pair(f_argb[0], f_argb[1]);
357#pragma pipeloop(0)
358 for (; (mlib_addr)d_abgr < (mlib_addr)(pend - 6); ) {
359 ARGB2ABGR_DB(db1)
360 w_abgr = vis_faligndata(db0, db1);
361 *d_abgr++ = w_abgr;
362 db0 = db1;
363 f_argb += 2;
364 db1 = vis_freg_pair(f_argb[0], f_argb[1]);
365 }
366
367 if ((mlib_addr)d_abgr <= (mlib_addr)pend) {
368 ARGB2ABGR_DB(db1)
369 w_abgr = vis_faligndata(db0, db1);
370 emask = vis_edge8(d_abgr, pend);
371 vis_pst_8(w_abgr, d_abgr, emask);
372 }
373 }
374 }
375}
376
377/***************************************************************/
378
379void ADD_SUFF(IntRgbToFourByteAbgrConvert)(BLIT_PARAMS)
380{
381 mlib_u32 *argb = (mlib_u32 *)srcBase;
382 mlib_u8 *abgr = (mlib_u8 *)dstBase;
383 mlib_s32 dstScan = (pDstInfo)->scanStride;
384 mlib_s32 srcScan = (pSrcInfo)->scanStride;
385 mlib_s32 i, j, count, left;
386 mlib_d64 w_abgr;
387 mlib_d64 amask = vis_to_double_dup(0xFF000000);
388
389 if (width < 16) {
390 for (j = 0; j < height; j++) {
391 mlib_s32 *src = srcBase;
392 mlib_u8 *dst = dstBase;
393
394 for (i = 0; i < width; i++) {
395 mlib_u32 x = *src++;
396 dst[0] = 0xFF;
397 dst[1] = x;
398 dst[2] = x >> 8;
399 dst[3] = x >> 16;
400 dst += 4;
401 }
402
403 PTR_ADD(dstBase, dstScan);
404 PTR_ADD(srcBase, srcScan);
405 }
406 return;
407 }
408
409 if (dstScan == 4*width && srcScan == dstScan) {
410 width *= height;
411 height = 1;
412 }
413 count = width >> 1;
414 left = width & 1;
415
416 BMASK_FOR_ARGB
417
418 srcScan >>= 2;
419
420 for (i = 0; i < height; i++, argb += srcScan, abgr += dstScan) {
421
422 if ((((mlib_addr) abgr | (mlib_addr) argb) & 7) == 0) {
423 mlib_d64 *d_argb = (mlib_d64 *) argb;
424 mlib_d64 *d_abgr = (mlib_d64 *) abgr;
425
426#pragma pipeloop(0)
427 for (j = 0; j < count; j++) {
428 w_abgr = d_argb[j];
429 RGB2ABGR_DB(w_abgr)
430 d_abgr[j] = w_abgr;
431 }
432
433 if (left) {
434 w_abgr = d_argb[count];
435 RGB2ABGR_DB(w_abgr)
436 ((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);
437 }
438
439 } else if (((mlib_addr) abgr & 3) == 0) {
440 mlib_f32 v_argb0, v_argb1;
441
442#pragma pipeloop(0)
443 for (j = 0; j < count; j++) {
444 v_argb0 = ((mlib_f32 *) argb)[2 * j];
445 v_argb1 = ((mlib_f32 *) argb)[2 * j + 1];
446 w_abgr = vis_freg_pair(v_argb0, v_argb1);
447
448 RGB2ABGR_DB(w_abgr)
449 ((mlib_f32 *) abgr)[2 * j] = vis_read_hi(w_abgr);
450 ((mlib_f32 *) abgr)[2 * j + 1] = vis_read_lo(w_abgr);
451 }
452
453 if (left) {
454 v_argb0 = ((mlib_f32 *) argb)[2 * count];
455 w_abgr = vis_freg_pair(v_argb0, vis_fzeros());
456
457 RGB2ABGR_DB(w_abgr)
458 ((mlib_f32 *) abgr)[2 * count] = vis_read_hi(w_abgr);
459 }
460
461 } else { /* abgr is not aligned */
462
463 mlib_u8 *pend = abgr + (width << 2) - 1;
464 mlib_d64 *d_abgr, db0, db1;
465 mlib_s32 emask, off;
466 mlib_f32 *f_argb = (mlib_f32 *) argb;
467
468 off = (mlib_addr)abgr & 7;
469 vis_alignaddr((void *)(8 - off), 0);
470 d_abgr = (mlib_d64 *) (abgr - off);
471
472 db1 = vis_freg_pair(*f_argb++, *f_argb++);
473 RGB2ABGR_DB(db1)
474 w_abgr = vis_faligndata(db1, db1);
475 emask = vis_edge8(abgr, pend);
476 vis_pst_8(w_abgr, d_abgr++, emask);
477 db0 = db1;
478
479 db1 = vis_freg_pair(f_argb[0], f_argb[1]);
480#pragma pipeloop(0)
481 for (; (mlib_addr)d_abgr < (mlib_addr)(pend - 6); ) {
482 RGB2ABGR_DB(db1)
483 w_abgr = vis_faligndata(db0, db1);
484 *d_abgr++ = w_abgr;
485 db0 = db1;
486 f_argb += 2;
487 db1 = vis_freg_pair(f_argb[0], f_argb[1]);
488 }
489
490 if ((mlib_addr)d_abgr <= (mlib_addr)pend) {
491 RGB2ABGR_DB(db1)
492 w_abgr = vis_faligndata(db0, db1);
493 emask = vis_edge8(d_abgr, pend);
494 vis_pst_8(w_abgr, d_abgr, emask);
495 }
496 }
497 }
498}
499
500/***************************************************************/
501
502void ADD_SUFF(ThreeByteBgrToFourByteAbgrConvert)(BLIT_PARAMS)
503{
504 mlib_s32 dstScan = pDstInfo->scanStride;
505 mlib_s32 srcScan = pSrcInfo->scanStride;
506 mlib_d64 sd0, sd1, sd2;
507 mlib_d64 dd0, dd1, dd2, dd3;
508 mlib_d64 sda, sdb, sdc, sdd;
509 mlib_d64 sde, sdf, sdg, sdh;
510 mlib_d64 sdi, sdj, sdk, sdl;
511 mlib_d64 sdm;
512 mlib_d64 sFF;
513 mlib_s32 r, g, b;
514 mlib_s32 i, j;
515
516 if (width < 16) {
517 for (j = 0; j < height; j++) {
518 mlib_u8 *src = srcBase;
519 mlib_u8 *dst = dstBase;
520
521#pragma pipeloop(0)
522 for (i = 0; i < width; i++) {
523 dst[0] = 0xFF;
524 dst[1] = src[0];
525 dst[2] = src[1];
526 dst[3] = src[2];
527 src += 3;
528 dst += 4;
529 }
530
531 PTR_ADD(dstBase, dstScan);
532 PTR_ADD(srcBase, srcScan);
533 }
534 return;
535 }
536
537 if (dstScan == 4*width && srcScan == 3*width) {
538 width *= height;
539 height = 1;
540 }
541
542 sFF = vis_fone();
543
544 for (j = 0; j < height; j++) {
545 mlib_u8 *pSrc = srcBase;
546 mlib_u8 *pDst = dstBase;
547
548 if (!(((mlib_s32)pSrc | (mlib_s32)pDst) & 7)) {
549#pragma pipeloop(0)
550 for (i = 0; i <= ((mlib_s32)width - 8); i += 8) {
551 sd0 = ((mlib_d64*)pSrc)[0];
552 sd1 = ((mlib_d64*)pSrc)[1];
553 sd2 = ((mlib_d64*)pSrc)[2];
554 pSrc += 3*8;
555 INSERT_U8_34R;
556 ((mlib_d64*)pDst)[0] = dd0;
557 ((mlib_d64*)pDst)[1] = dd1;
558 ((mlib_d64*)pDst)[2] = dd2;
559 ((mlib_d64*)pDst)[3] = dd3;
560 pDst += 4*8;
561 }
562
563 for (; i < width; i++) {
564 b = pSrc[0];
565 g = pSrc[1];
566 r = pSrc[2];
567 ((mlib_u16*)pDst)[0] = 0xff00 | b;
568 ((mlib_u16*)pDst)[1] = (g << 8) | r;
569 pSrc += 3;
570 pDst += 4;
571 }
572 } else if (!((mlib_s32)pDst & 1)) {
573#pragma pipeloop(0)
574 for (i = 0; i < width; i++) {
575 b = pSrc[0];
576 g = pSrc[1];
577 r = pSrc[2];
578 ((mlib_u16*)pDst)[0] = 0xff00 | b;
579 ((mlib_u16*)pDst)[1] = (g << 8) | r;
580 pSrc += 3;
581 pDst += 4;
582 }
583 } else {
584 *pDst++ = 0xff;
585#pragma pipeloop(0)
586 for (i = 0; i < (mlib_s32)width - 1; i++) {
587 b = pSrc[0];
588 g = pSrc[1];
589 r = pSrc[2];
590 ((mlib_u16*)pDst)[0] = (b << 8) | g;
591 ((mlib_u16*)pDst)[1] = (r << 8) | 0xff;
592 pSrc += 3;
593 pDst += 4;
594 }
595 if (width) {
596 pDst[0] = pSrc[0];
597 pDst[1] = pSrc[1];
598 pDst[2] = pSrc[2];
599 }
600 }
601
602 PTR_ADD(dstBase, dstScan);
603 PTR_ADD(srcBase, srcScan);
604 }
605}
606
607/***************************************************************/
608
609#if 1
610
611#define LOAD_BGR(dd) { \
612 mlib_u8 *sp = pSrc - 1 + 3*(tmpsxloc >> shift); \
613 mlib_d64 *ap = (void*)((mlib_addr)sp &~ 7); \
614 vis_alignaddr(sp, 0); \
615 dd = vis_faligndata(ap[0], ap[1]); \
616 tmpsxloc += sxinc; \
617}
618
619#else
620
621#define LOAD_BGR(dd) { \
622 mlib_u8 *sp = pSrc + 3*(tmpsxloc >> shift); \
623 dd = vis_faligndata(vis_ld_u8(sp + 2), dd); \
624 dd = vis_faligndata(vis_ld_u8(sp + 1), dd); \
625 dd = vis_faligndata(vis_ld_u8(sp ), dd); \
626 dd = vis_faligndata(amask, dd); \
627 tmpsxloc += sxinc; \
628}
629
630#endif
631
632/***************************************************************/
633
634void ADD_SUFF(ThreeByteBgrToFourByteAbgrScaleConvert)(SCALE_PARAMS)
635{
636 mlib_s32 dstScan = pDstInfo->scanStride;
637 mlib_s32 srcScan = pSrcInfo->scanStride;
638 mlib_d64 d0;
639 mlib_d64 amask;
640 mlib_s32 r, g, b;
641 mlib_s32 i, j;
642
643 if (width < 16 /*|| (((mlib_s32)dstBase | dstScan) & 3)*/) {
644 for (j = 0; j < height; j++) {
645 mlib_u8 *pSrc = srcBase;
646 mlib_u8 *pDst = dstBase;
647 mlib_s32 tmpsxloc = sxloc;
648
649 PTR_ADD(pSrc, (syloc >> shift) * srcScan);
650
651#pragma pipeloop(0)
652 for (i = 0; i < width; i++) {
653 mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);
654 pDst[0] = 0xff;
655 pDst[1] = pp[0];
656 pDst[2] = pp[1];
657 pDst[3] = pp[2];
658 tmpsxloc += sxinc;
659 pDst += 4;
660 }
661
662 PTR_ADD(dstBase, dstScan);
663 syloc += syinc;
664 }
665 return;
666 }
667
668 vis_alignaddr(NULL, 7);
669 amask = vis_to_double_dup(0xFF000000);
670
671 for (j = 0; j < height; j++) {
672 mlib_u8 *pSrc = srcBase;
673 mlib_u8 *pDst = dstBase;
674 mlib_s32 tmpsxloc = sxloc;
675
676 PTR_ADD(pSrc, (syloc >> shift) * srcScan);
677
678 if (!((mlib_s32)pDst & 3)) {
679#pragma pipeloop(0)
680 for (i = 0; i < width; i++) {
681 LOAD_BGR(d0);
682 ((mlib_f32*)pDst)[0] = vis_fors(vis_read_hi(d0),
683 vis_read_hi(amask));
684 pDst += 4;
685 }
686 } else if (!((mlib_s32)pDst & 1)) {
687#pragma pipeloop(0)
688 for (i = 0; i < width; i++) {
689 mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);
690 tmpsxloc += sxinc;
691 b = pp[0];
692 g = pp[1];
693 r = pp[2];
694 ((mlib_u16*)pDst)[2*i ] = 0xff00 | b;
695 ((mlib_u16*)pDst)[2*i + 1] = (g << 8) | r;
696 }
697 } else {
698 *pDst++ = 0xff;
699#pragma pipeloop(0)
700 for (i = 0; i < (mlib_s32)width - 1; i++) {
701 mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);
702 tmpsxloc += sxinc;
703 b = pp[0];
704 g = pp[1];
705 r = pp[2];
706 ((mlib_u16*)pDst)[2*i ] = (b << 8) | g;
707 ((mlib_u16*)pDst)[2*i + 1] = (r << 8) | 0xff;
708 }
709 if (width) {
710 mlib_u8 *pp = pSrc + 3*(tmpsxloc >> shift);
711 tmpsxloc += sxinc;
712 pDst[4*i ] = pp[0];
713 pDst[4*i+1] = pp[1];
714 pDst[4*i+2] = pp[2];
715 }
716 }
717
718 PTR_ADD(dstBase, dstScan);
719 syloc += syinc;
720 }
721}
722
723/***************************************************************/
724
725void ADD_SUFF(ByteGrayToFourByteAbgrConvert)(BLIT_PARAMS)
726{
727 mlib_s32 dstScan = pDstInfo->scanStride;
728 mlib_s32 srcScan = pSrcInfo->scanStride;
729 mlib_d64 d0, d1, d2, d3;
730 mlib_f32 ff, aa = vis_fones();
731 mlib_s32 i, j, x;
732
733 if (!(((mlib_s32)dstBase | dstScan) & 3)) {
734 ADD_SUFF(ByteGrayToIntArgbConvert)(BLIT_CALL_PARAMS);
735 return;
736 }
737
738 if (width < 16) {
739 for (j = 0; j < height; j++) {
740 mlib_u8 *src = srcBase;
741 mlib_u8 *dst = dstBase;
742
743 for (i = 0; i < width; i++) {
744 x = *src++;
745 dst[0] = 0xff;
746 dst[1] = x;
747 dst[2] = x;
748 dst[3] = x;
749 dst += 4;
750 }
751
752 PTR_ADD(dstBase, dstScan);
753 PTR_ADD(srcBase, srcScan);
754 }
755 return;
756 }
757
758 if (srcScan == width && dstScan == 4*width) {
759 width *= height;
760 height = 1;
761 }
762
763 for (j = 0; j < height; j++) {
764 mlib_u8 *src = srcBase;
765 mlib_u8 *dst = dstBase;
766 mlib_u8 *dst_end;
767
768 dst_end = dst + 4*width;
769
770 while (((mlib_s32)src & 3) && dst < dst_end) {
771 x = *src++;
772 dst[0] = 0xff;
773 dst[1] = x;
774 dst[2] = x;
775 dst[3] = x;
776 dst += 4;
777 }
778
779 if (!((mlib_s32)dst & 3)) {
780#pragma pipeloop(0)
781 for (; dst <= (dst_end - 4*4); dst += 4*4) {
782 ff = *(mlib_f32*)src;
783 d0 = vis_fpmerge(aa, ff);
784 d1 = vis_fpmerge(ff, ff);
785 d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
786 d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
787 ((mlib_f32*)dst)[0] = vis_read_hi(d2);
788 ((mlib_f32*)dst)[1] = vis_read_lo(d2);
789 ((mlib_f32*)dst)[2] = vis_read_hi(d3);
790 ((mlib_f32*)dst)[3] = vis_read_lo(d3);
791 src += 4;
792 }
793 } else {
794 mlib_d64 *dp;
795
796 dp = vis_alignaddr(dst, 0);
797 d3 = vis_faligndata(dp[0], dp[0]);
798 vis_alignaddrl(dst, 0);
799
800#pragma pipeloop(0)
801 for (; dst <= (dst_end - 4*4); dst += 4*4) {
802 ff = *(mlib_f32*)src;
803 d0 = vis_fpmerge(aa, ff);
804 d1 = vis_fpmerge(ff, ff);
805 d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
806 *dp++ = vis_faligndata(d3, d2);
807 d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
808 *dp++ = vis_faligndata(d2, d3);
809 src += 4;
810 }
811
812 vis_pst_8(vis_faligndata(d3, d3), dp, vis_edge8(dp, dst - 1));
813 }
814
815 while (dst < dst_end) {
816 x = *src++;
817 dst[0] = 0xff;
818 dst[1] = x;
819 dst[2] = x;
820 dst[3] = x;
821 dst += 4;
822 }
823
824 PTR_ADD(dstBase, dstScan);
825 PTR_ADD(srcBase, srcScan);
826 }
827}
828
829/***************************************************************/
830
831void ADD_SUFF(IntArgbToFourByteAbgrXorBlit)(BLIT_PARAMS)
832{
833 mlib_s32 dstScan = pDstInfo->scanStride;
834 mlib_s32 srcScan = pSrcInfo->scanStride;
835 mlib_u32 xorpixel = pCompInfo->details.xorPixel;
836 mlib_u32 alphamask = pCompInfo->alphaMask;
837 mlib_d64 dd, d_xorpixel, d_alphamask, d_zero;
838 mlib_s32 i, j, x, neg_mask;
839
840 if (width < 16) {
841 xorpixel = (xorpixel << 24) | (xorpixel >> 8);
842 alphamask = (alphamask << 24) | (alphamask >> 8);
843
844 for (j = 0; j < height; j++) {
845 mlib_s32 *src = srcBase;
846 mlib_u8 *dst = dstBase;
847
848 for (i = 0; i < width; i++) {
849 x = src[i];
850 neg_mask = x >> 31;
851 x = (x ^ xorpixel) & (neg_mask &~ alphamask);
852 dst[0] ^= x >> 24;
853 dst[1] ^= x;
854 dst[2] ^= x >> 8;
855 dst[3] ^= x >> 16;
856 dst += 4;
857 }
858
859 PTR_ADD(dstBase, dstScan);
860 PTR_ADD(srcBase, srcScan);
861 }
862 return;
863 }
864
865 if (srcScan == 4*width && dstScan == 4*width) {
866 width *= height;
867 height = 1;
868 }
869
870 d_zero = vis_fzero();
871 d_xorpixel = vis_freg_pair(vis_ldfa_ASI_PL(&xorpixel),
872 vis_ldfa_ASI_PL(&xorpixel));
873 d_alphamask = vis_freg_pair(vis_ldfa_ASI_PL(&alphamask),
874 vis_ldfa_ASI_PL(&alphamask));
875
876 dd = vis_freg_pair(vis_read_hi(d_xorpixel), vis_read_hi(d_alphamask));
877 ARGB2ABGR_DB(dd)
878 xorpixel = ((mlib_s32*)&dd)[0];
879 alphamask = ((mlib_s32*)&dd)[1];
880
881 for (j = 0; j < height; j++) {
882 mlib_s32 *src = srcBase;
883 mlib_u8 *dst = dstBase;
884 mlib_u8 *dst_end;
885
886 dst_end = dst + 4*width;
887
888 if (!((mlib_s32)dst & 7)) {
889#pragma pipeloop(0)
890 for (; dst <= (dst_end - 8); dst += 8) {
891 dd = vis_freg_pair(((mlib_f32*)src)[0], ((mlib_f32*)src)[1]);
892 src += 2;
893 neg_mask = vis_fcmplt32(dd, d_zero);
894 ARGB2ABGR_DB(dd)
895 dd = vis_fxor(dd, d_xorpixel);
896 dd = vis_fandnot(d_alphamask, dd);
897 dd = vis_fxor(dd, *(mlib_d64*)dst);
898 vis_pst_32(dd, dst, neg_mask);
899 }
900 }
901
902 while (dst < dst_end) {
903 x = *src++;
904 neg_mask = x >> 31;
905 x = (x ^ xorpixel) & (neg_mask &~ alphamask);
906 dst[0] ^= x >> 24;
907 dst[1] ^= x;
908 dst[2] ^= x >> 8;
909 dst[3] ^= x >> 16;
910 dst += 4;
911 }
912
913 PTR_ADD(dstBase, dstScan);
914 PTR_ADD(srcBase, srcScan);
915 }
916}
917
918/***************************************************************/
919
920void ADD_SUFF(ByteGrayToFourByteAbgrScaleConvert)(SCALE_PARAMS)
921{
922 mlib_s32 dstScan = pDstInfo->scanStride;
923 mlib_s32 srcScan = pSrcInfo->scanStride;
924 mlib_d64 d0, d1, d2, d3, dd;
925 mlib_f32 ff, aa;
926 mlib_s32 i, j, x;
927
928/* if (!(((mlib_s32)dstBase | dstScan) & 3)) {
929 ADD_SUFF(ByteGrayToIntArgbScaleConvert)(SCALE_CALL_PARAMS);
930 return;
931 }*/
932
933 if (width < 16) {
934 for (j = 0; j < height; j++) {
935 mlib_u8 *src = srcBase;
936 mlib_u8 *dst = dstBase;
937 mlib_s32 tmpsxloc = sxloc;
938
939 PTR_ADD(src, (syloc >> shift) * srcScan);
940
941 for (i = 0; i < width; i++) {
942 x = src[tmpsxloc >> shift];
943 tmpsxloc += sxinc;
944 dst[4*i ] = 0xff;
945 dst[4*i + 1] = x;
946 dst[4*i + 2] = x;
947 dst[4*i + 3] = x;
948 }
949
950 PTR_ADD(dstBase, dstScan);
951 syloc += syinc;
952 }
953 return;
954 }
955
956 aa = vis_fones();
957
958 for (j = 0; j < height; j++) {
959 mlib_u8 *src = srcBase;
960 mlib_u8 *dst = dstBase;
961 mlib_u8 *dst_end;
962 mlib_s32 tmpsxloc = sxloc;
963
964 PTR_ADD(src, (syloc >> shift) * srcScan);
965
966 dst_end = dst + 4*width;
967
968 if (!((mlib_s32)dst & 3)) {
969 vis_alignaddr(NULL, 7);
970#pragma pipeloop(0)
971 for (; dst <= (dst_end - 4*4); dst += 4*4) {
972 LOAD_NEXT_U8(dd, src + ((tmpsxloc + 3*sxinc) >> shift));
973 LOAD_NEXT_U8(dd, src + ((tmpsxloc + 2*sxinc) >> shift));
974 LOAD_NEXT_U8(dd, src + ((tmpsxloc + sxinc) >> shift));
975 LOAD_NEXT_U8(dd, src + ((tmpsxloc ) >> shift));
976 tmpsxloc += 4*sxinc;
977 ff = vis_read_hi(dd);
978 d0 = vis_fpmerge(aa, ff);
979 d1 = vis_fpmerge(ff, ff);
980 d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
981 d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
982 ((mlib_f32*)dst)[0] = vis_read_hi(d2);
983 ((mlib_f32*)dst)[1] = vis_read_lo(d2);
984 ((mlib_f32*)dst)[2] = vis_read_hi(d3);
985 ((mlib_f32*)dst)[3] = vis_read_lo(d3);
986 }
987 } else {
988 mlib_d64 *dp;
989
990 dp = vis_alignaddr(dst, 0);
991 d3 = vis_faligndata(dp[0], dp[0]);
992 vis_alignaddrl(dst, 0);
993
994#pragma pipeloop(0)
995 for (; dst <= (dst_end - 4*4); dst += 4*4) {
996 mlib_d64 s0, s1, s2, s3;
997 s0 = vis_ld_u8(src + ((tmpsxloc ) >> shift));
998 s1 = vis_ld_u8(src + ((tmpsxloc + sxinc) >> shift));
999 s2 = vis_ld_u8(src + ((tmpsxloc + 2*sxinc) >> shift));
1000 s3 = vis_ld_u8(src + ((tmpsxloc + 3*sxinc) >> shift));
1001 tmpsxloc += 4*sxinc;
1002 s0 = vis_fpmerge(vis_read_lo(s0), vis_read_lo(s2));
1003 s1 = vis_fpmerge(vis_read_lo(s1), vis_read_lo(s3));
1004 dd = vis_fpmerge(vis_read_lo(s0), vis_read_lo(s1));
1005 ff = vis_read_lo(dd);
1006 d0 = vis_fpmerge(aa, ff);
1007 d1 = vis_fpmerge(ff, ff);
1008 d2 = vis_fpmerge(vis_read_hi(d0), vis_read_hi(d1));
1009 *dp++ = vis_faligndata(d3, d2);
1010 d3 = vis_fpmerge(vis_read_lo(d0), vis_read_lo(d1));
1011 *dp++ = vis_faligndata(d2, d3);
1012 }
1013
1014 vis_pst_8(vis_faligndata(d3, d3), dp, vis_edge8(dp, dst - 1));
1015 }
1016
1017 while (dst < dst_end) {
1018 x = src[tmpsxloc >> shift];
1019 tmpsxloc += sxinc;
1020 dst[0] = 0xff;
1021 dst[1] = x;
1022 dst[2] = x;
1023 dst[3] = x;
1024 dst += 4;
1025 }
1026
1027 PTR_ADD(dstBase, dstScan);
1028 syloc += syinc;
1029 }
1030}
1031
1032/***************************************************************/
1033
1034void ADD_SUFF(ByteIndexedToFourByteAbgrConvert)(BLIT_PARAMS)
1035{
1036 jint *pixLut = pSrcInfo->lutBase;
1037 mlib_s32 dstScan = pDstInfo->scanStride;
1038 mlib_s32 srcScan = pSrcInfo->scanStride;
1039 mlib_d64 dd, d_old;
1040 mlib_s32 i, j, x;
1041
1042/* if (!(((mlib_s32)dstBase | dstScan) & 3)) {
1043 ADD_SUFF(ByteIndexedToIntAbgrConvert)(BLIT_CALL_PARAMS);
1044 return;
1045 }*/
1046
1047 if (width < 8) {
1048 for (j = 0; j < height; j++) {
1049 mlib_u8 *src = srcBase;
1050 mlib_u8 *dst = dstBase;
1051
1052 for (i = 0; i < width; i++) {
1053 x = pixLut[src[i]];
1054 dst[4*i ] = x >> 24;
1055 dst[4*i + 1] = x;
1056 dst[4*i + 2] = x >> 8;
1057 dst[4*i + 3] = x >> 16;
1058 }
1059
1060 PTR_ADD(dstBase, dstScan);
1061 PTR_ADD(srcBase, srcScan);
1062 }
1063 return;
1064 }
1065
1066 if (srcScan == width && dstScan == 4*width) {
1067 width *= height;
1068 height = 1;
1069 }
1070
1071 BMASK_FOR_ARGB
1072
1073 for (j = 0; j < height; j++) {
1074 mlib_u8 *src = srcBase;
1075 mlib_u8 *dst = dstBase;
1076 mlib_u8 *dst_end;
1077
1078 dst_end = dst + 4*width;
1079
1080 if (!((mlib_s32)dst & 7)) {
1081#pragma pipeloop(0)
1082 for (; dst <= (dst_end - 2*4); dst += 2*4) {
1083 dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],
1084 ((mlib_f32*)pixLut)[src[1]]);
1085 ARGB2ABGR_DB(dd)
1086 *(mlib_d64*)dst = dd;
1087 src += 2;
1088 }
1089 } else {
1090 mlib_d64 *dp;
1091
1092 dp = vis_alignaddr(dst, 0);
1093 dd = vis_faligndata(dp[0], dp[0]);
1094 vis_alignaddrl(dst, 0);
1095
1096#pragma pipeloop(0)
1097 for (; dst <= (dst_end - 2*4); dst += 2*4) {
1098 d_old = dd;
1099 dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],
1100 ((mlib_f32*)pixLut)[src[1]]);
1101 ARGB2ABGR_DB(dd)
1102 *dp++ = vis_faligndata(d_old, dd);
1103 src += 2;
1104 }
1105
1106 vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));
1107 }
1108
1109 while (dst < dst_end) {
1110 x = pixLut[*src++];
1111 dst[0] = x >> 24;
1112 dst[1] = x;
1113 dst[2] = x >> 8;
1114 dst[3] = x >> 16;
1115 dst += 4;
1116 }
1117
1118 PTR_ADD(dstBase, dstScan);
1119 PTR_ADD(srcBase, srcScan);
1120 }
1121}
1122
1123/***************************************************************/
1124
1125void ADD_SUFF(ByteIndexedBmToFourByteAbgrXparOver)(BLIT_PARAMS)
1126{
1127 jint *pixLut = pSrcInfo->lutBase;
1128 mlib_s32 dstScan = pDstInfo->scanStride;
1129 mlib_s32 srcScan = pSrcInfo->scanStride;
1130 mlib_d64 dd, dzero;
1131 mlib_s32 i, j, x, mask;
1132
1133/* if (!(((mlib_s32)dstBase | dstScan) & 3)) {
1134 ADD_SUFF(ByteIndexedToIntAbgrConvert)(BLIT_CALL_PARAMS);
1135 return;
1136 }*/
1137
1138 if (width < 8) {
1139 for (j = 0; j < height; j++) {
1140 mlib_u8 *src = srcBase;
1141 mlib_u8 *dst = dstBase;
1142
1143 for (i = 0; i < width; i++) {
1144 x = pixLut[src[i]];
1145 if (x < 0) {
1146 dst[4*i ] = x >> 24;
1147 dst[4*i + 1] = x;
1148 dst[4*i + 2] = x >> 8;
1149 dst[4*i + 3] = x >> 16;
1150 }
1151 }
1152
1153 PTR_ADD(dstBase, dstScan);
1154 PTR_ADD(srcBase, srcScan);
1155 }
1156 return;
1157 }
1158
1159 if (srcScan == width && dstScan == 4*width) {
1160 width *= height;
1161 height = 1;
1162 }
1163
1164 BMASK_FOR_ARGB
1165
1166 dzero = vis_fzero();
1167
1168 for (j = 0; j < height; j++) {
1169 mlib_u8 *src = srcBase;
1170 mlib_u8 *dst = dstBase;
1171 mlib_u8 *dst_end;
1172
1173 dst_end = dst + 4*width;
1174
1175 if (!((mlib_s32)dst & 7)) {
1176#pragma pipeloop(0)
1177 for (; dst <= (dst_end - 2*4); dst += 2*4) {
1178 dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],
1179 ((mlib_f32*)pixLut)[src[1]]);
1180 mask = vis_fcmplt32(dd, dzero);
1181 ARGB2ABGR_DB(dd)
1182 vis_pst_32(dd, dst, mask);
1183 src += 2;
1184 }
1185 }
1186
1187 while (dst < dst_end) {
1188 x = pixLut[*src++];
1189 if (x < 0) {
1190 dst[0] = x >> 24;
1191 dst[1] = x;
1192 dst[2] = x >> 8;
1193 dst[3] = x >> 16;
1194 }
1195 dst += 4;
1196 }
1197
1198 PTR_ADD(dstBase, dstScan);
1199 PTR_ADD(srcBase, srcScan);
1200 }
1201}
1202
1203/***************************************************************/
1204
1205void ADD_SUFF(ByteIndexedBmToFourByteAbgrXparBgCopy)(BCOPY_PARAMS)
1206{
1207 jint *pixLut = pSrcInfo->lutBase;
1208 mlib_s32 dstScan = pDstInfo->scanStride;
1209 mlib_s32 srcScan = pSrcInfo->scanStride;
1210 mlib_d64 dd, dzero, d_bgpixel;
1211 mlib_s32 i, j, x, mask;
1212 mlib_s32 bgpix0 = bgpixel;
1213 mlib_s32 bgpix1 = bgpixel >> 8;
1214 mlib_s32 bgpix2 = bgpixel >> 16;
1215 mlib_s32 bgpix3 = bgpixel >> 24;
1216
1217 if (width < 8) {
1218 for (j = 0; j < height; j++) {
1219 mlib_u8 *src = srcBase;
1220 mlib_u8 *dst = dstBase;
1221
1222 for (i = 0; i < width; i++) {
1223 x = pixLut[src[i]];
1224 if (x < 0) {
1225 dst[4*i ] = x >> 24;
1226 dst[4*i + 1] = x;
1227 dst[4*i + 2] = x >> 8;
1228 dst[4*i + 3] = x >> 16;
1229 } else {
1230 dst[4*i ] = bgpix0;
1231 dst[4*i + 1] = bgpix1;
1232 dst[4*i + 2] = bgpix2;
1233 dst[4*i + 3] = bgpix3;
1234 }
1235 }
1236
1237 PTR_ADD(dstBase, dstScan);
1238 PTR_ADD(srcBase, srcScan);
1239 }
1240 return;
1241 }
1242
1243 if (srcScan == width && dstScan == 4*width) {
1244 width *= height;
1245 height = 1;
1246 }
1247
1248 BMASK_FOR_ARGB
1249
1250 dzero = vis_fzero();
1251 d_bgpixel = vis_freg_pair(vis_ldfa_ASI_PL(&bgpixel),
1252 vis_ldfa_ASI_PL(&bgpixel));
1253
1254 for (j = 0; j < height; j++) {
1255 mlib_u8 *src = srcBase;
1256 mlib_u8 *dst = dstBase;
1257 mlib_u8 *dst_end;
1258
1259 dst_end = dst + 4*width;
1260
1261 if (!((mlib_s32)dst & 7)) {
1262#pragma pipeloop(0)
1263 for (; dst <= (dst_end - 2*4); dst += 2*4) {
1264 dd = vis_freg_pair(((mlib_f32*)pixLut)[src[0]],
1265 ((mlib_f32*)pixLut)[src[1]]);
1266 mask = vis_fcmplt32(dd, dzero);
1267 ARGB2ABGR_DB(dd)
1268 *(mlib_d64*)dst = d_bgpixel;
1269 vis_pst_32(dd, dst, mask);
1270 src += 2;
1271 }
1272 }
1273
1274 while (dst < dst_end) {
1275 x = pixLut[*src++];
1276 if (x < 0) {
1277 dst[0] = x >> 24;
1278 dst[1] = x;
1279 dst[2] = x >> 8;
1280 dst[3] = x >> 16;
1281 } else {
1282 dst[0] = bgpix0;
1283 dst[1] = bgpix1;
1284 dst[2] = bgpix2;
1285 dst[3] = bgpix3;
1286 }
1287 dst += 4;
1288 }
1289
1290 PTR_ADD(dstBase, dstScan);
1291 PTR_ADD(srcBase, srcScan);
1292 }
1293}
1294
1295/***************************************************************/
1296
1297void ADD_SUFF(ByteIndexedToFourByteAbgrScaleConvert)(SCALE_PARAMS)
1298{
1299 jint *pixLut = pSrcInfo->lutBase;
1300 mlib_s32 dstScan = pDstInfo->scanStride;
1301 mlib_s32 srcScan = pSrcInfo->scanStride;
1302 mlib_d64 dd, d_old;
1303 mlib_s32 i, j, x;
1304
1305/*
1306 if (!(((mlib_s32)dstBase | dstScan) & 3)) {
1307 ADD_SUFF(ByteIndexedToIntAbgrScaleConvert)(SCALE_CALL_PARAMS);
1308 return;
1309 }
1310*/
1311
1312 if (width < 8) {
1313 for (j = 0; j < height; j++) {
1314 mlib_u8 *src = srcBase;
1315 mlib_u8 *dst = dstBase;
1316 mlib_s32 tmpsxloc = sxloc;
1317
1318 PTR_ADD(src, (syloc >> shift) * srcScan);
1319
1320 for (i = 0; i < width; i++) {
1321 x = pixLut[src[tmpsxloc >> shift]];
1322 tmpsxloc += sxinc;
1323 dst[4*i ] = x >> 24;
1324 dst[4*i + 1] = x;
1325 dst[4*i + 2] = x >> 8;
1326 dst[4*i + 3] = x >> 16;
1327 }
1328
1329 PTR_ADD(dstBase, dstScan);
1330 syloc += syinc;
1331 }
1332 return;
1333 }
1334
1335 BMASK_FOR_ARGB
1336
1337 for (j = 0; j < height; j++) {
1338 mlib_u8 *src = srcBase;
1339 mlib_u8 *dst = dstBase;
1340 mlib_u8 *dst_end;
1341 mlib_s32 tmpsxloc = sxloc;
1342
1343 PTR_ADD(src, (syloc >> shift) * srcScan);
1344
1345 dst_end = dst + 4*width;
1346
1347 if (!((mlib_s32)dst & 7)) {
1348#pragma pipeloop(0)
1349 for (; dst <= (dst_end - 2*4); dst += 2*4) {
1350 dd = LOAD_2F32(pixLut, src[tmpsxloc >> shift],
1351 src[(tmpsxloc + sxinc) >> shift]);
1352 tmpsxloc += 2*sxinc;
1353 ARGB2ABGR_DB(dd)
1354 *(mlib_d64*)dst = dd;
1355 }
1356 } else {
1357 mlib_d64 *dp;
1358
1359 dp = vis_alignaddr(dst, 0);
1360 dd = vis_faligndata(dp[0], dp[0]);
1361 vis_alignaddrl(dst, 0);
1362
1363#pragma pipeloop(0)
1364 for (; dst <= (dst_end - 2*4); dst += 2*4) {
1365 d_old = dd;
1366 dd = LOAD_2F32(pixLut, src[tmpsxloc >> shift],
1367 src[(tmpsxloc + sxinc) >> shift]);
1368 tmpsxloc += 2*sxinc;
1369 ARGB2ABGR_DB(dd)
1370 *dp++ = vis_faligndata(d_old, dd);
1371 }
1372
1373 vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));
1374 }
1375
1376 while (dst < dst_end) {
1377 x = pixLut[src[tmpsxloc >> shift]];
1378 tmpsxloc += sxinc;
1379 dst[0] = x >> 24;
1380 dst[1] = x;
1381 dst[2] = x >> 8;
1382 dst[3] = x >> 16;
1383 dst += 4;
1384 }
1385
1386 PTR_ADD(dstBase, dstScan);
1387 syloc += syinc;
1388 }
1389}
1390
1391/***************************************************************/
1392
1393void ADD_SUFF(ByteIndexedBmToFourByteAbgrScaleXparOver)(SCALE_PARAMS)
1394{
1395 jint *pixLut = pSrcInfo->lutBase;
1396 mlib_s32 dstScan = pDstInfo->scanStride;
1397 mlib_s32 srcScan = pSrcInfo->scanStride;
1398 mlib_d64 dd, dzero;
1399 mlib_s32 i, j, x, mask;
1400
1401/*
1402 if (!(((mlib_s32)dstBase | dstScan) & 3)) {
1403 ADD_SUFF(ByteIndexedToIntAbgrScaleConvert)(SCALE_CALL_PARAMS);
1404 return;
1405 }
1406*/
1407
1408 if (width < 8) {
1409 for (j = 0; j < height; j++) {
1410 mlib_u8 *src = srcBase;
1411 mlib_u8 *dst = dstBase;
1412 mlib_s32 tmpsxloc = sxloc;
1413
1414 PTR_ADD(src, (syloc >> shift) * srcScan);
1415
1416 for (i = 0; i < width; i++) {
1417 x = pixLut[src[tmpsxloc >> shift]];
1418 tmpsxloc += sxinc;
1419 if (x < 0) {
1420 dst[4*i ] = x >> 24;
1421 dst[4*i + 1] = x;
1422 dst[4*i + 2] = x >> 8;
1423 dst[4*i + 3] = x >> 16;
1424 }
1425 }
1426
1427 PTR_ADD(dstBase, dstScan);
1428 syloc += syinc;
1429 }
1430 return;
1431 }
1432
1433 BMASK_FOR_ARGB
1434
1435 dzero = vis_fzero();
1436
1437 for (j = 0; j < height; j++) {
1438 mlib_u8 *src = srcBase;
1439 mlib_u8 *dst = dstBase;
1440 mlib_u8 *dst_end;
1441 mlib_s32 tmpsxloc = sxloc;
1442
1443 PTR_ADD(src, (syloc >> shift) * srcScan);
1444
1445 dst_end = dst + 4*width;
1446
1447 if (!((mlib_s32)dst & 7)) {
1448#pragma pipeloop(0)
1449 for (; dst <= (dst_end - 2*4); dst += 2*4) {
1450 dd = LOAD_2F32(pixLut, src[tmpsxloc >> shift],
1451 src[(tmpsxloc + sxinc) >> shift]);
1452 tmpsxloc += 2*sxinc;
1453 mask = vis_fcmplt32(dd, dzero);
1454 ARGB2ABGR_DB(dd)
1455 vis_pst_32(dd, dst, mask);
1456 }
1457 }
1458
1459 while (dst < dst_end) {
1460 x = pixLut[src[tmpsxloc >> shift]];
1461 tmpsxloc += sxinc;
1462 if (x < 0) {
1463 dst[0] = x >> 24;
1464 dst[1] = x;
1465 dst[2] = x >> 8;
1466 dst[3] = x >> 16;
1467 }
1468 dst += 4;
1469 }
1470
1471 PTR_ADD(dstBase, dstScan);
1472 syloc += syinc;
1473 }
1474}
1475
1476/***************************************************************/
1477
1478void ADD_SUFF(IntArgbBmToFourByteAbgrScaleXparOver)(SCALE_PARAMS)
1479{
1480 mlib_s32 dstScan = pDstInfo->scanStride;
1481 mlib_s32 srcScan = pSrcInfo->scanStride;
1482 mlib_d64 dd, amask;
1483 mlib_s32 i, j, x, mask;
1484
1485 if (width < 16) {
1486 for (j = 0; j < height; j++) {
1487 mlib_s32 *src = srcBase;
1488 mlib_u8 *dst = dstBase;
1489 mlib_s32 tmpsxloc = sxloc;
1490
1491 PTR_ADD(src, (syloc >> shift) * srcScan);
1492
1493 for (i = 0; i < width; i++) {
1494 x = src[tmpsxloc >> shift];
1495 tmpsxloc += sxinc;
1496 if (x >> 24) {
1497 dst[4*i ] = 0xFF;
1498 dst[4*i + 1] = x;
1499 dst[4*i + 2] = x >> 8;
1500 dst[4*i + 3] = x >> 16;
1501 }
1502 }
1503
1504 PTR_ADD(dstBase, dstScan);
1505 syloc += syinc;
1506 }
1507 return;
1508 }
1509
1510 BMASK_FOR_ARGB
1511
1512 amask = vis_to_double_dup(0xFF000000);
1513
1514 for (j = 0; j < height; j++) {
1515 mlib_s32 *src = srcBase;
1516 mlib_u8 *dst = dstBase;
1517 mlib_u8 *dst_end;
1518 mlib_s32 tmpsxloc = sxloc;
1519
1520 PTR_ADD(src, (syloc >> shift) * srcScan);
1521
1522 dst_end = dst + 4*width;
1523
1524 if (!((mlib_s32)dst & 7)) {
1525#pragma pipeloop(0)
1526 for (; dst <= (dst_end - 2*4); dst += 2*4) {
1527 mlib_s32 *pp0 = src + (tmpsxloc >> shift);
1528 mlib_s32 *pp1 = src + ((tmpsxloc + sxinc) >> shift);
1529 dd = vis_freg_pair(*(mlib_f32*)pp0, *(mlib_f32*)pp1);
1530 tmpsxloc += 2*sxinc;
1531 ARGB2ABGR_DB(dd)
1532 dd = vis_for(dd, amask);
1533 mask = (((-*(mlib_u8*)pp0) >> 31) & 2) |
1534 (((-*(mlib_u8*)pp1) >> 31) & 1);
1535 vis_pst_32(dd, dst, mask);
1536 }
1537 }
1538
1539 while (dst < dst_end) {
1540 x = src[tmpsxloc >> shift];
1541 tmpsxloc += sxinc;
1542 if (x >> 24) {
1543 dst[0] = 0xFF;
1544 dst[1] = x;
1545 dst[2] = x >> 8;
1546 dst[3] = x >> 16;
1547 }
1548 dst += 4;
1549 }
1550
1551 PTR_ADD(dstBase, dstScan);
1552 syloc += syinc;
1553 }
1554}
1555
1556/***************************************************************/
1557
1558#ifdef MLIB_ADD_SUFF
1559#pragma weak IntArgbBmToFourByteAbgrPreScaleXparOver_F = \
1560 IntArgbBmToFourByteAbgrScaleXparOver_F
1561#else
1562#pragma weak IntArgbBmToFourByteAbgrPreScaleXparOver = \
1563 IntArgbBmToFourByteAbgrScaleXparOver
1564#endif
1565
1566/***************************************************************/
1567
1568void ADD_SUFF(FourByteAbgrToIntArgbScaleConvert)(SCALE_PARAMS)
1569{
1570 mlib_s32 dstScan = pDstInfo->scanStride;
1571 mlib_s32 srcScan = pSrcInfo->scanStride;
1572 mlib_s32 i, j;
1573
1574 if (width < 16) {
1575 for (j = 0; j < height; j++) {
1576 mlib_u8 *src = srcBase;
1577 mlib_s32 *dst = dstBase;
1578 mlib_s32 tmpsxloc = sxloc;
1579
1580 PTR_ADD(src, (syloc >> shift) * srcScan);
1581
1582 for (i = 0; i < width; i++) {
1583 mlib_u8 *pp = src + 4*(tmpsxloc >> shift);
1584 *dst++ = (pp[0] << 24) | (pp[3] << 16) | (pp[2] << 8) | pp[1];
1585 tmpsxloc += sxinc;
1586 }
1587
1588 PTR_ADD(dstBase, dstScan);
1589 syloc += syinc;
1590 }
1591 return;
1592 }
1593
1594 BMASK_FOR_ARGB
1595
1596 for (j = 0; j < height; j++) {
1597 mlib_u8 *src = srcBase;
1598 mlib_s32 *dst = dstBase;
1599 mlib_s32 *dst_end = dst + width;
1600 mlib_s32 tmpsxloc = sxloc;
1601 mlib_s32 off;
1602 mlib_d64 dd, dd0, dd1;
1603 mlib_f32 *pp0, *pp1;
1604
1605 PTR_ADD(src, (syloc >> shift) * srcScan);
1606
1607 if ((mlib_s32)dst & 7) {
1608 mlib_u8 *pp = src + 4*(tmpsxloc >> shift);
1609 *dst++ = (pp[0] << 24) | (pp[3] << 16) | (pp[2] << 8) | pp[1];
1610 tmpsxloc += sxinc;
1611 }
1612
1613 off = (mlib_s32)src & 3;
1614 if (!off) {
1615#pragma pipeloop(0)
1616 for (; dst <= (dst_end - 2); dst += 2) {
1617 pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
1618 pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
1619 tmpsxloc += 2*sxinc;
1620 dd = vis_freg_pair(pp0[0], pp1[0]);
1621 ARGB2ABGR_DB(dd)
1622 *(mlib_d64*)dst = dd;
1623 }
1624 } else {
1625 vis_alignaddr(NULL, off);
1626#pragma pipeloop(0)
1627 for (; dst <= (dst_end - 2); dst += 2) {
1628 pp0 = (mlib_f32*)(src - off) + (tmpsxloc >> shift);
1629 pp1 = (mlib_f32*)(src - off) + ((tmpsxloc + sxinc) >> shift);
1630 tmpsxloc += 2*sxinc;
1631 dd0 = vis_freg_pair(pp0[0], pp0[1]);
1632 dd1 = vis_freg_pair(pp1[0], pp1[1]);
1633 dd0 = vis_faligndata(dd0, dd0);
1634 dd1 = vis_faligndata(dd1, dd1);
1635 ARGB2ABGR_FL2(dd, vis_read_hi(dd0), vis_read_hi(dd1))
1636 *(mlib_d64*)dst = dd;
1637 }
1638 }
1639
1640 if (dst < dst_end) {
1641 mlib_u8 *pp = src + 4*(tmpsxloc >> shift);
1642 *dst++ = (pp[0] << 24) | (pp[3] << 16) | (pp[2] << 8) | pp[1];
1643 tmpsxloc += sxinc;
1644 }
1645
1646 PTR_ADD(dstBase, dstScan);
1647 syloc += syinc;
1648 }
1649}
1650
1651/***************************************************************/
1652
1653void ADD_SUFF(IntArgbToFourByteAbgrScaleConvert)(SCALE_PARAMS)
1654{
1655 mlib_s32 dstScan = pDstInfo->scanStride;
1656 mlib_s32 srcScan = pSrcInfo->scanStride;
1657 mlib_s32 i, j;
1658 mlib_s32 x;
1659
1660 if (width < 16) {
1661 for (j = 0; j < height; j++) {
1662 mlib_s32 *src = srcBase;
1663 mlib_u8 *dst = dstBase;
1664 mlib_s32 tmpsxloc = sxloc;
1665
1666 PTR_ADD(src, (syloc >> shift) * srcScan);
1667
1668 for (i = 0; i < width; i++) {
1669 x = src[tmpsxloc >> shift];
1670 tmpsxloc += sxinc;
1671 dst[4*i ] = x >> 24;
1672 dst[4*i + 1] = x;
1673 dst[4*i + 2] = x >> 8;
1674 dst[4*i + 3] = x >> 16;
1675 }
1676
1677 PTR_ADD(dstBase, dstScan);
1678 syloc += syinc;
1679 }
1680 return;
1681 }
1682
1683 BMASK_FOR_ARGB
1684
1685 for (j = 0; j < height; j++) {
1686 mlib_s32 *src = srcBase;
1687 mlib_u8 *dst = dstBase;
1688 mlib_u8 *dst_end = dst + 4*width;
1689 mlib_s32 tmpsxloc = sxloc;
1690 mlib_d64 dd, d_old;
1691 mlib_f32 *pp0, *pp1;
1692
1693 PTR_ADD(src, (syloc >> shift) * srcScan);
1694
1695 if (!((mlib_s32)dst & 3)) {
1696 if ((mlib_s32)dst & 7) {
1697 x = src[tmpsxloc >> shift];
1698 tmpsxloc += sxinc;
1699 dst[0] = x >> 24;
1700 dst[1] = x;
1701 dst[2] = x >> 8;
1702 dst[3] = x >> 16;
1703 dst += 4;
1704 }
1705#pragma pipeloop(0)
1706 for (; dst <= (dst_end - 2*4); dst += 2*4) {
1707 pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
1708 pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
1709 tmpsxloc += 2*sxinc;
1710 dd = vis_freg_pair(pp0[0], pp1[0]);
1711 ARGB2ABGR_DB(dd)
1712 *(mlib_d64*)dst = dd;
1713 }
1714 } else {
1715 mlib_d64 *dp;
1716
1717 dp = vis_alignaddr(dst, 0);
1718 dd = vis_faligndata(dp[0], dp[0]);
1719 vis_alignaddrl(dst, 0);
1720
1721#pragma pipeloop(0)
1722 for (; dst <= (dst_end - 2*4); dst += 2*4) {
1723 d_old = dd;
1724 pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
1725 pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
1726 tmpsxloc += 2*sxinc;
1727 dd = vis_freg_pair(pp0[0], pp1[0]);
1728 ARGB2ABGR_DB(dd)
1729 *dp++ = vis_faligndata(d_old, dd);
1730 }
1731
1732 vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));
1733 }
1734
1735 if (dst < dst_end) {
1736 x = src[tmpsxloc >> shift];
1737 tmpsxloc += sxinc;
1738 dst[0] = x >> 24;
1739 dst[1] = x;
1740 dst[2] = x >> 8;
1741 dst[3] = x >> 16;
1742 dst += 4;
1743 }
1744
1745 PTR_ADD(dstBase, dstScan);
1746 syloc += syinc;
1747 }
1748}
1749
1750/***************************************************************/
1751
1752void ADD_SUFF(IntRgbToFourByteAbgrScaleConvert)(SCALE_PARAMS)
1753{
1754 mlib_s32 dstScan = pDstInfo->scanStride;
1755 mlib_s32 srcScan = pSrcInfo->scanStride;
1756 mlib_s32 i, j;
1757 mlib_s32 x;
1758 mlib_d64 amask = vis_to_double_dup(0xFF000000);
1759
1760 if (width < 16) {
1761 for (j = 0; j < height; j++) {
1762 mlib_s32 *src = srcBase;
1763 mlib_u8 *dst = dstBase;
1764 mlib_s32 tmpsxloc = sxloc;
1765
1766 PTR_ADD(src, (syloc >> shift) * srcScan);
1767
1768 for (i = 0; i < width; i++) {
1769 x = src[tmpsxloc >> shift];
1770 tmpsxloc += sxinc;
1771 dst[4*i ] = 0xFF;
1772 dst[4*i + 1] = x;
1773 dst[4*i + 2] = x >> 8;
1774 dst[4*i + 3] = x >> 16;
1775 }
1776
1777 PTR_ADD(dstBase, dstScan);
1778 syloc += syinc;
1779 }
1780 return;
1781 }
1782
1783 BMASK_FOR_ARGB
1784
1785 for (j = 0; j < height; j++) {
1786 mlib_s32 *src = srcBase;
1787 mlib_u8 *dst = dstBase;
1788 mlib_u8 *dst_end = dst + 4*width;
1789 mlib_s32 tmpsxloc = sxloc;
1790 mlib_d64 dd, d_old;
1791 mlib_f32 *pp0, *pp1;
1792
1793 PTR_ADD(src, (syloc >> shift) * srcScan);
1794
1795 if (!((mlib_s32)dst & 3)) {
1796 if ((mlib_s32)dst & 7) {
1797 x = src[tmpsxloc >> shift];
1798 tmpsxloc += sxinc;
1799 dst[0] = 0xFF;
1800 dst[1] = x;
1801 dst[2] = x >> 8;
1802 dst[3] = x >> 16;
1803 dst += 4;
1804 }
1805#pragma pipeloop(0)
1806 for (; dst <= (dst_end - 2*4); dst += 2*4) {
1807 pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
1808 pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
1809 tmpsxloc += 2*sxinc;
1810 dd = vis_freg_pair(pp0[0], pp1[0]);
1811 RGB2ABGR_DB(dd)
1812 *(mlib_d64*)dst = dd;
1813 }
1814 } else {
1815 mlib_d64 *dp;
1816
1817 dp = vis_alignaddr(dst, 0);
1818 dd = vis_faligndata(dp[0], dp[0]);
1819 vis_alignaddrl(dst, 0);
1820
1821#pragma pipeloop(0)
1822 for (; dst <= (dst_end - 2*4); dst += 2*4) {
1823 d_old = dd;
1824 pp0 = (mlib_f32*)src + (tmpsxloc >> shift);
1825 pp1 = (mlib_f32*)src + ((tmpsxloc + sxinc) >> shift);
1826 tmpsxloc += 2*sxinc;
1827 dd = vis_freg_pair(pp0[0], pp1[0]);
1828 RGB2ABGR_DB(dd)
1829 *dp++ = vis_faligndata(d_old, dd);
1830 }
1831
1832 vis_pst_8(vis_faligndata(dd, dd), dp, vis_edge8(dp, dst - 1));
1833 }
1834
1835 if (dst < dst_end) {
1836 x = src[tmpsxloc >> shift];
1837 tmpsxloc += sxinc;
1838 dst[0] = 0xFF;
1839 dst[1] = x;
1840 dst[2] = x >> 8;
1841 dst[3] = x >> 16;
1842 dst += 4;
1843 }
1844
1845 PTR_ADD(dstBase, dstScan);
1846 syloc += syinc;
1847 }
1848}
1849
1850/***************************************************************/
1851
1852void ADD_SUFF(FourByteAbgrDrawGlyphListAA)(SurfaceDataRasInfo * pRasInfo,
1853 ImageRef *glyphs,
1854 jint totalGlyphs,
1855 jint fgpixel, jint argbcolor,
1856 jint clipLeft, jint clipTop,
1857 jint clipRight, jint clipBottom,
1858 NativePrimitive * pPrim,
1859 CompositeInfo * pCompInfo)
1860{
1861 mlib_d64 buff[BUFF_SIZE/2];
1862 void *pbuff = buff;
1863 mlib_s32 glyphCounter;
1864 mlib_s32 scan = pRasInfo->scanStride;
1865 mlib_u8 *dstBase;
1866 mlib_s32 i, j;
1867 mlib_d64 dmix0, dmix1, dd, d0, d1, e0, e1, fgpixel_d;
1868 mlib_d64 done, done16, d_half;
1869 mlib_s32 pix, mask;
1870 mlib_f32 fgpixel_f, srcG_f;
1871 mlib_s32 max_width = BUFF_SIZE;
1872
1873 done = vis_to_double_dup(0x7fff7fff);
1874 done16 = vis_to_double_dup(0x7fff);
1875 d_half = vis_to_double_dup((1 << (16 + 6)) | (1 << 6));
1876
1877 fgpixel_f = vis_ldfa_ASI_PL(&fgpixel);
1878 fgpixel_d = vis_freg_pair(fgpixel_f, fgpixel_f);
1879 srcG_f = vis_to_float(argbcolor);
1880 ARGB2ABGR_FL(srcG_f)
1881
1882 vis_write_gsr(0 << 3);
1883
1884 for (glyphCounter = 0; glyphCounter < totalGlyphs; glyphCounter++) {
1885 const jubyte *pixels;
1886 unsigned int rowBytes;
1887 int left, top;
1888 int width, height;
1889 int right, bottom;
1890
1891 pixels = (const jubyte *) glyphs[glyphCounter].pixels;
1892
1893 if (!pixels) continue;
1894
1895 left = glyphs[glyphCounter].x;
1896 top = glyphs[glyphCounter].y;
1897 width = glyphs[glyphCounter].width;
1898 height = glyphs[glyphCounter].height;
1899 rowBytes = width;
1900 right = left + width;
1901 bottom = top + height;
1902 if (left < clipLeft) {
1903 pixels += clipLeft - left;
1904 left = clipLeft;
1905 }
1906 if (top < clipTop) {
1907 pixels += (clipTop - top) * rowBytes;
1908 top = clipTop;
1909 }
1910 if (right > clipRight) {
1911 right = clipRight;
1912 }
1913 if (bottom > clipBottom) {
1914 bottom = clipBottom;
1915 }
1916 if (right <= left || bottom <= top) {
1917 continue;
1918 }
1919 width = right - left;
1920 height = bottom - top;
1921
1922 dstBase = pRasInfo->rasBase;
1923 PTR_ADD(dstBase, top*scan + 4*left);
1924
1925 if (((mlib_s32)dstBase | scan) & 3) {
1926 if (width > max_width) {
1927 if (pbuff != buff) {
1928 mlib_free(pbuff);
1929 }
1930 pbuff = mlib_malloc(width*sizeof(mlib_s32));
1931 if (pbuff == NULL) return;
1932 max_width = width;
1933 }
1934 }
1935
1936 for (j = 0; j < height; j++) {
1937 mlib_u8 *src = (void*)pixels;
1938 mlib_s32 *dst, *dst_end;
1939
1940 if ((mlib_s32)dstBase & 3) {
1941 COPY_NA(dstBase, pbuff, width*sizeof(mlib_s32));
1942 dst = pbuff;
1943 } else {
1944 dst = (void*)dstBase;
1945 }
1946 dst_end = dst + width;
1947
1948 if ((mlib_s32)dst & 7) {
1949 pix = *src++;
1950 dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
1951 dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);
1952 *(mlib_f32*)dst = vis_fpack16(dd);
1953 if (pix == 255) *(mlib_f32*)dst = vis_read_hi(fgpixel_d);
1954 dst++;
1955 }
1956
1957#pragma pipeloop(0)
1958 for (; dst <= (dst_end - 2); dst += 2) {
1959 dmix0 = vis_freg_pair(((mlib_f32 *)vis_mul8s_tbl)[src[0]],
1960 ((mlib_f32 *)vis_mul8s_tbl)[src[1]]);
1961 mask = vis_fcmplt32(dmix0, done16);
1962 dmix1 = vis_fpsub16(done, dmix0);
1963 src += 2;
1964
1965 dd = *(mlib_d64*)dst;
1966 d0 = vis_fmul8x16al(srcG_f, vis_read_hi(dmix0));
1967 d1 = vis_fmul8x16al(srcG_f, vis_read_lo(dmix0));
1968 e0 = vis_fmul8x16al(vis_read_hi(dd), vis_read_hi(dmix1));
1969 e1 = vis_fmul8x16al(vis_read_lo(dd), vis_read_lo(dmix1));
1970 d0 = vis_fpadd16(vis_fpadd16(d0, d_half), e0);
1971 d1 = vis_fpadd16(vis_fpadd16(d1, d_half), e1);
1972 dd = vis_fpack16_pair(d0, d1);
1973
1974 *(mlib_d64*)dst = fgpixel_d;
1975 vis_pst_32(dd, dst, mask);
1976 }
1977
1978 while (dst < dst_end) {
1979 pix = *src++;
1980 dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
1981 dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);
1982 *(mlib_f32*)dst = vis_fpack16(dd);
1983 if (pix == 255) *(mlib_f32*)dst = vis_read_hi(fgpixel_d);
1984 dst++;
1985 }
1986
1987 if ((mlib_s32)dstBase & 3) {
1988 COPY_NA(pbuff, dstBase, width*sizeof(mlib_s32));
1989 }
1990
1991 PTR_ADD(dstBase, scan);
1992 pixels += rowBytes;
1993 }
1994 }
1995
1996 if (pbuff != buff) {
1997 mlib_free(pbuff);
1998 }
1999}
2000
2001/***************************************************************/
2002
2003#endif /* JAVA2D_NO_MLIB */