blob: cd773365ea85c0c33792a7164448ff496709efcd [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26#if !defined(JAVA2D_NO_MLIB) || defined(MLIB_ADD_SUFF)
27
28#include "vis_AlphaMacros.h"
29
30/***************************************************************/
31
32#define GET_ARGBPRE(i) \
33 0xFF000000 | (src[3*i + 2] << 16) | (src[3*i + 1] << 8) | src[3*i]
34
35/***************************************************************/
36
37#define CONVERT_PRE(rr, dstA, dstARGB) \
38 rr = vis_fmul8x16(dstARGB, ((mlib_d64*)vis_div8pre_tbl)[dstA])
39
40/***************************************************************/
41
42void ADD_SUFF(IntArgbPreToIntArgbConvert)(BLIT_PARAMS)
43{
44 mlib_s32 dstScan = pDstInfo->scanStride;
45 mlib_s32 srcScan = pSrcInfo->scanStride;
46 mlib_s32 dstA0, dstA1;
47 mlib_d64 res0, res1, dstARGB;
48 mlib_f32 dstARGB0;
49 mlib_s32 i, i0, j;
50
51 vis_write_gsr(7 << 3);
52
53 if (dstScan == 4*width && srcScan == 4*width) {
54 width *= height;
55 height = 1;
56 }
57
58 for (j = 0; j < height; j++) {
59 mlib_f32 *src = srcBase;
60 mlib_f32 *dst = dstBase;
61
62 i = i0 = 0;
63
64 if ((mlib_s32)dst & 7) {
65 dstA0 = *(mlib_u8*)(src + i);
66 dstARGB0 = src[i];
67 CONVERT_PRE(res0, dstA0, dstARGB0);
68 dst[i] = vis_fpack16(res0);
69
70 i0 = 1;
71 }
72
73#pragma pipeloop(0)
74 for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
75 dstA0 = *(mlib_u8*)(src + i);
76 dstA1 = *(mlib_u8*)(src + i + 1);
77 dstARGB = vis_freg_pair(src[i], src[i + 1]);
78
79 CONVERT_PRE(res0, dstA0, vis_read_hi(dstARGB));
80 CONVERT_PRE(res1, dstA1, vis_read_lo(dstARGB));
81
82 res0 = vis_fpack16_pair(res0, res1);
83
84 *(mlib_d64*)(dst + i) = res0;
85 }
86
87 if (i < width) {
88 dstA0 = *(mlib_u8*)(src + i);
89 dstARGB0 = src[i];
90 CONVERT_PRE(res0, dstA0, dstARGB0);
91 dst[i] = vis_fpack16(res0);
92 }
93
94 PTR_ADD(dstBase, dstScan);
95 PTR_ADD(srcBase, srcScan);
96 }
97}
98
99/***************************************************************/
100
101void ADD_SUFF(IntArgbPreToIntArgbScaleConvert)(SCALE_PARAMS)
102{
103 mlib_s32 dstScan = pDstInfo->scanStride;
104 mlib_s32 srcScan = pSrcInfo->scanStride;
105 mlib_s32 dstA0, dstA1;
106 mlib_d64 res0, res1, dstARGB;
107 mlib_f32 dstARGB0;
108 mlib_s32 i, i0, j, ind0, ind1;
109
110 if (width < 16) {
111 for (j = 0; j < height; j++) {
112 mlib_s32 *src = srcBase;
113 mlib_u8 *dst = dstBase;
114 mlib_s32 tmpsxloc = sxloc;
115
116 PTR_ADD(src, (syloc >> shift) * srcScan);
117
118 for (i = 0; i < width; i++) {
119 mlib_u32 argb = src[tmpsxloc >> shift];
120 mlib_u32 a, r, g, b;
121 b = argb & 0xff;
122 g = (argb >> 8) & 0xff;
123 r = (argb >> 16) & 0xff;
124 a = argb >> 24;
125 dst[4*i] = a;
126 if (a == 0) a = 255; /* a |= (a - 1) >> 24; */
127 dst[4*i + 1] = div8table[a][r];
128 dst[4*i + 2] = div8table[a][g];
129 dst[4*i + 3] = div8table[a][b];
130 tmpsxloc += sxinc;
131 }
132
133 PTR_ADD(dstBase, dstScan);
134 syloc += syinc;
135 }
136 return;
137 }
138
139 vis_write_gsr(7 << 3);
140
141 for (j = 0; j < height; j++) {
142 mlib_f32 *src = srcBase;
143 mlib_f32 *dst = dstBase;
144 mlib_s32 tmpsxloc = sxloc;
145
146 PTR_ADD(src, (syloc >> shift) * srcScan);
147
148 i = i0 = 0;
149
150 if ((mlib_s32)dst & 7) {
151 ind0 = tmpsxloc >> shift;
152 tmpsxloc += sxinc;
153 dstA0 = *(mlib_u8*)(src + ind0);
154 dstARGB0 = src[ind0];
155 CONVERT_PRE(res0, dstA0, dstARGB0);
156 dst[i] = vis_fpack16(res0);
157
158 i0 = 1;
159 }
160
161#pragma pipeloop(0)
162 for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
163 ind0 = tmpsxloc >> shift;
164 tmpsxloc += sxinc;
165 ind1 = tmpsxloc >> shift;
166 tmpsxloc += sxinc;
167 dstA0 = *(mlib_u8*)(src + ind0);
168 dstA1 = *(mlib_u8*)(src + ind1);
169
170 dstARGB = vis_freg_pair(src[ind0], src[ind1]);
171
172 CONVERT_PRE(res0, dstA0, vis_read_hi(dstARGB));
173 CONVERT_PRE(res1, dstA1, vis_read_lo(dstARGB));
174
175 res0 = vis_fpack16_pair(res0, res1);
176
177 *(mlib_d64*)(dst + i) = res0;
178 }
179
180 if (i < width) {
181 ind0 = tmpsxloc >> shift;
182 tmpsxloc += sxinc;
183 dstA0 = *(mlib_u8*)(src + ind0);
184 dstARGB0 = src[ind0];
185 CONVERT_PRE(res0, dstA0, dstARGB0);
186 dst[i] = vis_fpack16(res0);
187 }
188
189 PTR_ADD(dstBase, dstScan);
190 syloc += syinc;
191 }
192}
193
194/***************************************************************/
195
196#undef CONVERT_PRE
197#define CONVERT_PRE(rr, dstA, dstARGB) \
198 rr = MUL8_VIS(dstARGB, dstA)
199
200void ADD_SUFF(IntArgbToIntArgbPreConvert)(BLIT_PARAMS)
201{
202 mlib_s32 dstScan = pDstInfo->scanStride;
203 mlib_s32 srcScan = pSrcInfo->scanStride;
204 mlib_s32 dstA0, dstA1;
205 mlib_d64 res0, res1, dstARGB;
206 mlib_f32 dstARGB0;
207 mlib_s32 i, i0, j;
208
209 vis_write_gsr(0 << 3);
210
211 if (dstScan == 4*width && srcScan == 4*width) {
212 width *= height;
213 height = 1;
214 }
215
216 for (j = 0; j < height; j++) {
217 mlib_f32 *src = srcBase;
218 mlib_f32 *dst = dstBase;
219
220 i = i0 = 0;
221
222 if ((mlib_s32)dst & 7) {
223 dstA0 = *(mlib_u8*)(src + i);
224 dstARGB0 = src[i];
225 CONVERT_PRE(res0, dstA0, dstARGB0);
226 dst[i] = vis_fpack16(res0);
227 *(mlib_u8*)(dst + i) = dstA0;
228
229 i0 = 1;
230 }
231
232#pragma pipeloop(0)
233 for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
234 dstA0 = *(mlib_u8*)(src + i);
235 dstA1 = *(mlib_u8*)(src + i + 1);
236 dstARGB = vis_freg_pair(src[i], src[i + 1]);
237
238 CONVERT_PRE(res0, dstA0, vis_read_hi(dstARGB));
239 CONVERT_PRE(res1, dstA1, vis_read_lo(dstARGB));
240
241 res0 = vis_fpack16_pair(res0, res1);
242
243 *(mlib_d64*)(dst + i) = res0;
244 vis_pst_8(dstARGB, dst + i, 0x88);
245 }
246
247 if (i < width) {
248 dstA0 = *(mlib_u8*)(src + i);
249 dstARGB0 = src[i];
250 CONVERT_PRE(res0, dstA0, dstARGB0);
251 dst[i] = vis_fpack16(res0);
252 *(mlib_u8*)(dst + i) = dstA0;
253 }
254
255 PTR_ADD(dstBase, dstScan);
256 PTR_ADD(srcBase, srcScan);
257 }
258}
259
260/***************************************************************/
261
262void ADD_SUFF(IntArgbToIntArgbPreScaleConvert)(SCALE_PARAMS)
263{
264 mlib_s32 dstScan = pDstInfo->scanStride;
265 mlib_s32 srcScan = pSrcInfo->scanStride;
266 mlib_s32 dstA0, dstA1;
267 mlib_d64 res0, res1, dstARGB;
268 mlib_f32 dstARGB0;
269 mlib_s32 i, i0, j, ind0, ind1;
270
271 if (width < 16) {
272 for (j = 0; j < height; j++) {
273 mlib_s32 *src = srcBase;
274 mlib_u8 *dst = dstBase;
275 mlib_s32 tmpsxloc = sxloc;
276
277 PTR_ADD(src, (syloc >> shift) * srcScan);
278
279 for (i = 0; i < width; i++) {
280 mlib_u32 argb = src[tmpsxloc >> shift];
281 mlib_u32 a, r, g, b;
282 b = argb & 0xff;
283 g = (argb >> 8) & 0xff;
284 r = (argb >> 16) & 0xff;
285 a = argb >> 24;
286 dst[4*i] = a;
287 dst[4*i + 1] = mul8table[a][r];
288 dst[4*i + 2] = mul8table[a][g];
289 dst[4*i + 3] = mul8table[a][b];
290 tmpsxloc += sxinc;
291 }
292
293 PTR_ADD(dstBase, dstScan);
294 syloc += syinc;
295 }
296 return;
297 }
298
299 vis_write_gsr(0 << 3);
300
301 for (j = 0; j < height; j++) {
302 mlib_f32 *src = srcBase;
303 mlib_f32 *dst = dstBase;
304 mlib_s32 tmpsxloc = sxloc;
305
306 PTR_ADD(src, (syloc >> shift) * srcScan);
307
308 i = i0 = 0;
309
310 if ((mlib_s32)dst & 7) {
311 ind0 = tmpsxloc >> shift;
312 tmpsxloc += sxinc;
313 dstA0 = *(mlib_u8*)(src + ind0);
314 dstARGB0 = src[ind0];
315 CONVERT_PRE(res0, dstA0, dstARGB0);
316 dst[i] = vis_fpack16(res0);
317 *(mlib_u8*)(dst + i) = dstA0;
318
319 i0 = 1;
320 }
321
322#pragma pipeloop(0)
323 for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
324 ind0 = tmpsxloc >> shift;
325 tmpsxloc += sxinc;
326 ind1 = tmpsxloc >> shift;
327 tmpsxloc += sxinc;
328 dstA0 = *(mlib_u8*)(src + ind0);
329 dstA1 = *(mlib_u8*)(src + ind1);
330
331 dstARGB = vis_freg_pair(src[ind0], src[ind1]);
332
333 CONVERT_PRE(res0, dstA0, vis_read_hi(dstARGB));
334 CONVERT_PRE(res1, dstA1, vis_read_lo(dstARGB));
335
336 res0 = vis_fpack16_pair(res0, res1);
337
338 *(mlib_d64*)(dst + i) = res0;
339 vis_pst_8(dstARGB, dst + i, 0x88);
340 }
341
342 if (i < width) {
343 ind0 = tmpsxloc >> shift;
344 tmpsxloc += sxinc;
345 dstA0 = *(mlib_u8*)(src + ind0);
346 dstARGB0 = src[ind0];
347 CONVERT_PRE(res0, dstA0, dstARGB0);
348 dst[i] = vis_fpack16(res0);
349 *(mlib_u8*)(dst + i) = dstA0;
350 }
351
352 PTR_ADD(dstBase, dstScan);
353 syloc += syinc;
354 }
355}
356
357/***************************************************************/
358
359void ADD_SUFF(IntArgbToIntArgbPreXorBlit)(BLIT_PARAMS)
360{
361 mlib_s32 dstScan = pDstInfo->scanStride;
362 mlib_s32 srcScan = pSrcInfo->scanStride;
363 mlib_s32 xorpixel = pCompInfo->details.xorPixel;
364 mlib_s32 alphamask = pCompInfo->alphaMask;
365 mlib_s32 dstA0, dstA1;
366 mlib_d64 res0, res1, dstARGB, dd, d_xorpixel, d_alphamask, maskRGB;
367 mlib_d64 d_round;
368 mlib_f32 dstARGB0, ff;
369 mlib_s32 i, i0, j;
370
371 vis_write_gsr(0 << 3);
372
373 if (dstScan == 4*width && srcScan == 4*width) {
374 width *= height;
375 height = 1;
376 }
377
378 d_xorpixel = vis_to_double_dup(xorpixel);
379 d_alphamask = vis_to_double_dup(alphamask);
380 maskRGB = vis_to_double_dup(0xFFFFFF);
381 d_round = vis_to_double_dup(((1 << 16) | 1) << 6);
382
383 xorpixel >>= 24;
384 alphamask >>= 24;
385
386 for (j = 0; j < height; j++) {
387 mlib_f32 *src = srcBase;
388 mlib_f32 *dst = dstBase;
389
390 i = i0 = 0;
391
392 if ((mlib_s32)dst & 7) {
393 dstA0 = *(mlib_u8*)(src + i);
394 dstARGB0 = src[i];
395 if (dstA0 & 0x80) {
396 CONVERT_PRE(res0, dstA0, dstARGB0);
397 res0 = vis_fpadd16(res0, d_round);
398 ff = vis_fpack16(res0);
399 ff = vis_fxors(ff, vis_read_hi(d_xorpixel));
400 ff = vis_fandnots(vis_read_hi(d_alphamask), ff);
401 ff = vis_fxors(ff, dst[i]);
402 dstA0 = *(mlib_u8*)(dst + i) ^
403 ((dstA0 ^ xorpixel) &~ alphamask);
404 dst[i] = ff;
405 *(mlib_u8*)(dst + i) = dstA0;
406 }
407
408 i0 = 1;
409 }
410
411#pragma pipeloop(0)
412 for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
413 dstA0 = *(mlib_u8*)(src + i);
414 dstA1 = *(mlib_u8*)(src + i + 1);
415 dstARGB = vis_freg_pair(src[i], src[i + 1]);
416
417 CONVERT_PRE(res0, dstA0, vis_read_hi(dstARGB));
418 CONVERT_PRE(res1, dstA1, vis_read_lo(dstARGB));
419 res0 = vis_fpadd16(res0, d_round);
420 res1 = vis_fpadd16(res1, d_round);
421 dd = vis_fpack16_pair(res0, res1);
422
423 dd = vis_for(vis_fand(maskRGB, dd), vis_fandnot(maskRGB, dstARGB));
424
425 dd = vis_fxor(dd, d_xorpixel);
426 dd = vis_fandnot(d_alphamask, dd);
427 dd = vis_fxor(dd, *(mlib_d64*)(dst + i));
428
429 vis_pst_32(dd, dst + i, ((dstA0 >> 6) & 2) | (dstA1 >> 7));
430 }
431
432 if (i < width) {
433 dstA0 = *(mlib_u8*)(src + i);
434 dstARGB0 = src[i];
435 if (dstA0 & 0x80) {
436 CONVERT_PRE(res0, dstA0, dstARGB0);
437 res0 = vis_fpadd16(res0, d_round);
438 ff = vis_fpack16(res0);
439 ff = vis_fxors(ff, vis_read_hi(d_xorpixel));
440 ff = vis_fandnots(vis_read_hi(d_alphamask), ff);
441 ff = vis_fxors(ff, dst[i]);
442 dstA0 = *(mlib_u8*)(dst + i) ^
443 ((dstA0 ^ xorpixel) &~ alphamask);
444 dst[i] = ff;
445 *(mlib_u8*)(dst + i) = dstA0;
446 }
447 }
448
449 PTR_ADD(dstBase, dstScan);
450 PTR_ADD(srcBase, srcScan);
451 }
452}
453
454/***************************************************************/
455
456void ADD_SUFF(IntRgbToIntArgbPreConvert)(BLIT_PARAMS)
457{
458 mlib_s32 dstScan = pDstInfo->scanStride;
459 mlib_s32 srcScan = pSrcInfo->scanStride;
460 mlib_d64 dd, mask;
461 mlib_s32 i, i0, j;
462
463 if (dstScan == 4*width && srcScan == 4*width) {
464 width *= height;
465 height = 1;
466 }
467
468 mask = vis_to_double_dup(0xFF000000);
469
470 for (j = 0; j < height; j++) {
471 mlib_f32 *src = srcBase;
472 mlib_f32 *dst = dstBase;
473
474 i = i0 = 0;
475
476 if ((mlib_s32)dst & 7) {
477 dst[i] = vis_fors(src[i], vis_read_hi(mask));
478 i0 = 1;
479 }
480
481#pragma pipeloop(0)
482 for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
483 dd = vis_freg_pair(src[i], src[i + 1]);
484
485 *(mlib_d64*)(dst + i) = vis_for(dd, mask);
486 }
487
488 if (i < width) {
489 dst[i] = vis_fors(src[i], vis_read_hi(mask));
490 }
491
492 PTR_ADD(dstBase, dstScan);
493 PTR_ADD(srcBase, srcScan);
494 }
495}
496
497/***************************************************************/
498
499void ADD_SUFF(IntRgbToIntArgbPreScaleConvert)(SCALE_PARAMS)
500{
501 mlib_s32 dstScan = pDstInfo->scanStride;
502 mlib_s32 srcScan = pSrcInfo->scanStride;
503 mlib_d64 dd, mask;
504 mlib_s32 j;
505
506 mask = vis_to_double_dup(0xFF000000);
507
508 for (j = 0; j < height; j++) {
509 mlib_f32 *src = srcBase;
510 mlib_f32 *dst = dstBase;
511 mlib_f32 *dst_end = dst + width;
512 mlib_s32 tmpsxloc = sxloc;
513
514 PTR_ADD(src, (syloc >> shift) * srcScan);
515
516 if ((mlib_s32)dst & 7) {
517 *dst++ = vis_fors(src[tmpsxloc >> shift], vis_read_hi(mask));
518 tmpsxloc += sxinc;
519 }
520
521#pragma pipeloop(0)
522 for (; dst <= dst_end - 2; dst += 2) {
523 dd = vis_freg_pair(src[tmpsxloc >> shift],
524 src[(tmpsxloc + sxinc) >> shift]);
525 *(mlib_d64*)dst = vis_for(dd, mask);
526 tmpsxloc += 2*sxinc;
527 }
528
529 if (dst < dst_end) {
530 *dst = vis_fors(src[tmpsxloc >> shift], vis_read_hi(mask));
531 }
532
533 PTR_ADD(dstBase, dstScan);
534 syloc += syinc;
535 }
536}
537
538/***************************************************************/
539
540#define BGR_TO_ARGB { \
541 mlib_d64 sda, sdb, sdc, sdd, sde, sdf; \
542 mlib_d64 s_1, s_2, s_3, a13, b13, a02, b02; \
543 \
544 sda = vis_fpmerge(vis_read_hi(sd0), vis_read_lo(sd1)); \
545 sdb = vis_fpmerge(vis_read_lo(sd0), vis_read_hi(sd2)); \
546 sdc = vis_fpmerge(vis_read_hi(sd1), vis_read_lo(sd2)); \
547 \
548 sdd = vis_fpmerge(vis_read_hi(sda), vis_read_lo(sdb)); \
549 sde = vis_fpmerge(vis_read_lo(sda), vis_read_hi(sdc)); \
550 sdf = vis_fpmerge(vis_read_hi(sdb), vis_read_lo(sdc)); \
551 \
552 s_3 = vis_fpmerge(vis_read_hi(sdd), vis_read_lo(sde)); \
553 s_2 = vis_fpmerge(vis_read_lo(sdd), vis_read_hi(sdf)); \
554 s_1 = vis_fpmerge(vis_read_hi(sde), vis_read_lo(sdf)); \
555 \
556 a13 = vis_fpmerge(vis_read_hi(s_1), vis_read_hi(s_3)); \
557 b13 = vis_fpmerge(vis_read_lo(s_1), vis_read_lo(s_3)); \
558 a02 = vis_fpmerge(vis_read_hi(s_0), vis_read_hi(s_2)); \
559 b02 = vis_fpmerge(vis_read_lo(s_0), vis_read_lo(s_2)); \
560 \
561 dd0 = vis_fpmerge(vis_read_hi(a02), vis_read_hi(a13)); \
562 dd1 = vis_fpmerge(vis_read_lo(a02), vis_read_lo(a13)); \
563 dd2 = vis_fpmerge(vis_read_hi(b02), vis_read_hi(b13)); \
564 dd3 = vis_fpmerge(vis_read_lo(b02), vis_read_lo(b13)); \
565}
566
567/***************************************************************/
568
569void ADD_SUFF(ThreeByteBgrToIntArgbPreConvert)(BLIT_PARAMS)
570{
571 mlib_s32 dstScan = pDstInfo->scanStride;
572 mlib_s32 srcScan = pSrcInfo->scanStride;
573 mlib_d64 *sp;
574 mlib_d64 s_0;
575 mlib_d64 s0, s1, s2, s3, sd0, sd1, sd2, dd0, dd1, dd2, dd3;
576 mlib_s32 i, i0, j;
577
578 if (srcScan == 3*width && dstScan == 4*width) {
579 width *= height;
580 height = 1;
581 }
582
583 s_0 = vis_fone();
584
585 for (j = 0; j < height; j++) {
586 mlib_u8 *src = srcBase;
587 mlib_f32 *dst = dstBase;
588
589 i = i0 = 0;
590
591 if ((mlib_s32)dst & 7) {
592 ((mlib_s32*)dst)[i] = GET_ARGBPRE(i);
593 i0 = 1;
594 }
595
596 sp = vis_alignaddr(src, 3*i0);
597 s3 = *sp++;
598
599#pragma pipeloop(0)
600 for (i = i0; i <= (mlib_s32)width - 8; i += 8) {
601 s0 = s3;
602 s1 = *sp++;
603 s2 = *sp++;
604 s3 = *sp++;
605 sd0 = vis_faligndata(s0, s1);
606 sd1 = vis_faligndata(s1, s2);
607 sd2 = vis_faligndata(s2, s3);
608
609 BGR_TO_ARGB
610
611 *(mlib_d64*)(dst + i ) = dd0;
612 *(mlib_d64*)(dst + i + 2) = dd1;
613 *(mlib_d64*)(dst + i + 4) = dd2;
614 *(mlib_d64*)(dst + i + 6) = dd3;
615 }
616
617 for (; i < width; i++) {
618 ((mlib_s32*)dst)[i] = GET_ARGBPRE(i);
619 }
620
621 PTR_ADD(dstBase, dstScan);
622 PTR_ADD(srcBase, srcScan);
623 }
624}
625
626/***************************************************************/
627
628void ADD_SUFF(ThreeByteBgrToIntArgbPreScaleConvert)(SCALE_PARAMS)
629{
630 mlib_s32 dstScan = pDstInfo->scanStride;
631 mlib_s32 srcScan = pSrcInfo->scanStride;
632 mlib_d64 dd, maskFF;
633 mlib_s32 i, i0, i1, j;
634
635 maskFF = vis_fone();
636
637 vis_alignaddr(NULL, 7);
638
639 for (j = 0; j < height; j++) {
640 mlib_u8 *src = srcBase;
641 mlib_f32 *dst = dstBase;
642 mlib_f32 *dst_end = dst + width;
643 mlib_s32 tmpsxloc = sxloc;
644
645 PTR_ADD(src, (syloc >> shift) * srcScan);
646
647 if ((mlib_s32)dst & 7) {
648 i = tmpsxloc >> shift;
649 tmpsxloc += sxinc;
650 *(mlib_s32*)dst = GET_ARGBPRE(i);
651 dst++;
652 }
653
654#pragma pipeloop(0)
655 for (; dst <= dst_end - 2; dst += 2) {
656 i0 = tmpsxloc >> shift;
657 i1 = (tmpsxloc + sxinc) >> shift;
658 tmpsxloc += 2*sxinc;
659
660 dd = vis_faligndata(vis_ld_u8(src + 3*i1 ), dd);
661 dd = vis_faligndata(vis_ld_u8(src + 3*i1 + 1), dd);
662 dd = vis_faligndata(vis_ld_u8(src + 3*i1 + 2), dd);
663 dd = vis_faligndata(maskFF, dd);
664 dd = vis_faligndata(vis_ld_u8(src + 3*i0 ), dd);
665 dd = vis_faligndata(vis_ld_u8(src + 3*i0 + 1), dd);
666 dd = vis_faligndata(vis_ld_u8(src + 3*i0 + 2), dd);
667 dd = vis_faligndata(maskFF, dd);
668
669 *(mlib_d64*)dst = dd;
670 }
671
672 for (; dst < dst_end; dst++) {
673 i = tmpsxloc >> shift;
674 tmpsxloc += sxinc;
675 *(mlib_s32*)dst = GET_ARGBPRE(i);
676 }
677
678 PTR_ADD(dstBase, dstScan);
679 syloc += syinc;
680 }
681}
682
683/***************************************************************/
684
685void ADD_SUFF(ByteIndexedToIntArgbPreConvert)(BLIT_PARAMS)
686{
687 jint *pixLut = pSrcInfo->lutBase;
688 mlib_s32 buff[256];
689 mlib_s32 dstScan = pDstInfo->scanStride;
690 mlib_s32 srcScan = pSrcInfo->scanStride;
691 mlib_s32 i, i0, j;
692
693 if (width < 16) {
694 for (j = 0; j < height; j++) {
695 mlib_u8 *src = srcBase;
696 mlib_s32 *dst = dstBase;
697
698 for (i = 0; i < width; i++) {
699 mlib_s32 a, r, g, b;
700 mlib_u32 x = pixLut[src[i]];
701 b = x & 0xff;
702 g = (x >> 8) & 0xff;
703 r = (x >> 16) & 0xff;
704 a = x >> 24;
705 r = mul8table[a][r];
706 g = mul8table[a][g];
707 b = mul8table[a][b];
708 dst[i] = (a << 24) | (r << 16) | (g << 8) | b;
709 }
710
711 PTR_ADD(dstBase, dstScan);
712 PTR_ADD(srcBase, srcScan);
713 }
714 return;
715 }
716
717 if (srcScan == width && dstScan == 4*width) {
718 width *= height;
719 height = 1;
720 }
721
722 ADD_SUFF(IntArgbToIntArgbPreConvert)(pixLut, buff, 256, 1,
723 pSrcInfo, pDstInfo, pPrim, pCompInfo);
724
725 for (j = 0; j < height; j++) {
726 mlib_u8 *src = srcBase;
727 mlib_s32 *dst = dstBase;
728
729 i = i0 = 0;
730
731 if ((mlib_s32)dst & 7) {
732 dst[i] = buff[src[i]];
733 i0 = 1;
734 }
735
736#pragma pipeloop(0)
737 for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
738 *(mlib_d64*)(dst + i) = LOAD_2F32(buff, src[i], src[i + 1]);
739 }
740
741 for (; i < width; i++) {
742 dst[i] = buff[src[i]];
743 }
744
745 PTR_ADD(dstBase, dstScan);
746 PTR_ADD(srcBase, srcScan);
747 }
748}
749
750/***************************************************************/
751
752void ADD_SUFF(ByteIndexedToIntArgbPreScaleConvert)(SCALE_PARAMS)
753{
754 jint *pixLut = pSrcInfo->lutBase;
755 mlib_s32 buff[256];
756 mlib_s32 dstScan = pDstInfo->scanStride;
757 mlib_s32 srcScan = pSrcInfo->scanStride;
758 mlib_s32 i, j;
759
760 if (width < 16) {
761 for (j = 0; j < height; j++) {
762 mlib_u8 *src = srcBase;
763 mlib_s32 *dst = dstBase;
764 mlib_s32 tmpsxloc = sxloc;
765
766 PTR_ADD(src, (syloc >> shift) * srcScan);
767
768 for (i = 0; i < width; i++) {
769 mlib_s32 a, r, g, b;
770 mlib_u32 x = pixLut[src[tmpsxloc >> shift]];
771 tmpsxloc += sxinc;
772 b = x & 0xff;
773 g = (x >> 8) & 0xff;
774 r = (x >> 16) & 0xff;
775 a = x >> 24;
776 r = mul8table[a][r];
777 g = mul8table[a][g];
778 b = mul8table[a][b];
779 dst[i] = (a << 24) | (r << 16) | (g << 8) | b;
780 }
781
782 PTR_ADD(dstBase, dstScan);
783 syloc += syinc;
784 }
785 return;
786 }
787
788 ADD_SUFF(IntArgbToIntArgbPreConvert)(pixLut, buff, 256, 1,
789 pSrcInfo, pDstInfo, pPrim, pCompInfo);
790
791 for (j = 0; j < height; j++) {
792 mlib_u8 *src = srcBase;
793 mlib_s32 *dst = dstBase;
794 mlib_s32 *dst_end = dst + width;
795 mlib_s32 tmpsxloc = sxloc;
796
797 PTR_ADD(src, (syloc >> shift) * srcScan);
798
799 if ((mlib_s32)dst & 7) {
800 *dst++ = buff[src[tmpsxloc >> shift]];
801 tmpsxloc += sxinc;
802 }
803
804#pragma pipeloop(0)
805 for (; dst <= dst_end - 2; dst += 2) {
806 *(mlib_d64*)dst = LOAD_2F32(buff, src[tmpsxloc >> shift],
807 src[(tmpsxloc + sxinc) >> shift]);
808 tmpsxloc += 2*sxinc;
809 }
810
811 for (; dst < dst_end; dst++) {
812 *dst = buff[src[tmpsxloc >> shift]];
813 tmpsxloc += sxinc;
814 }
815
816 PTR_ADD(dstBase, dstScan);
817 syloc += syinc;
818 }
819}
820
821/***************************************************************/
822
823void ADD_SUFF(ByteIndexedBmToIntArgbPreXparOver)(BLIT_PARAMS)
824{
825 jint *pixLut = pSrcInfo->lutBase;
826 mlib_s32 buff[256];
827 mlib_s32 dstScan = pDstInfo->scanStride;
828 mlib_s32 srcScan = pSrcInfo->scanStride;
829 mlib_d64 dd, dzero;
830 mlib_s32 i, i0, j, x, mask;
831
832 if (width < 16) {
833 for (j = 0; j < height; j++) {
834 mlib_u8 *src = srcBase;
835 mlib_s32 *dst = dstBase;
836
837 for (i = 0; i < width; i++) {
838 mlib_s32 a, r, g, b;
839 mlib_s32 x = pixLut[src[i]];
840 if (x < 0) {
841 b = x & 0xff;
842 g = (x >> 8) & 0xff;
843 r = (x >> 16) & 0xff;
844 a = (mlib_u32)x >> 24;
845 r = mul8table[a][r];
846 g = mul8table[a][g];
847 b = mul8table[a][b];
848 dst[i] = (a << 24) | (r << 16) | (g << 8) | b;
849 }
850 }
851
852 PTR_ADD(dstBase, dstScan);
853 PTR_ADD(srcBase, srcScan);
854 }
855 return;
856 }
857
858 if (srcScan == width && dstScan == 4*width) {
859 width *= height;
860 height = 1;
861 }
862
863 ADD_SUFF(IntArgbToIntArgbPreConvert)(pixLut, buff, 256, 1,
864 pSrcInfo, pDstInfo, pPrim, pCompInfo);
865
866 dzero = vis_fzero();
867
868 for (j = 0; j < height; j++) {
869 mlib_u8 *src = srcBase;
870 mlib_s32 *dst = dstBase;
871
872 i = i0 = 0;
873
874 if ((mlib_s32)dst & 7) {
875 x = buff[src[i]];
876 if (x < 0) {
877 dst[i] = x;
878 }
879 i0 = 1;
880 }
881
882#pragma pipeloop(0)
883 for (i = i0; i <= (mlib_s32)width - 2; i += 2) {
884 dd = vis_freg_pair(((mlib_f32*)buff)[src[i]],
885 ((mlib_f32*)buff)[src[i + 1]]);
886 mask = vis_fcmplt32(dd, dzero);
887 vis_pst_32(dd, dst + i, mask);
888 }
889
890 for (; i < width; i++) {
891 x = buff[src[i]];
892 if (x < 0) {
893 dst[i] = x;
894 }
895 }
896
897 PTR_ADD(dstBase, dstScan);
898 PTR_ADD(srcBase, srcScan);
899 }
900}
901
902/***************************************************************/
903
904void ADD_SUFF(ByteIndexedBmToIntArgbPreScaleXparOver)(SCALE_PARAMS)
905{
906 jint *pixLut = pSrcInfo->lutBase;
907 mlib_s32 buff[256];
908 mlib_s32 dstScan = pDstInfo->scanStride;
909 mlib_s32 srcScan = pSrcInfo->scanStride;
910 mlib_d64 dd, dzero;
911 mlib_s32 i, j, x, mask;
912
913 if (width < 16) {
914 for (j = 0; j < height; j++) {
915 mlib_u8 *src = srcBase;
916 mlib_s32 *dst = dstBase;
917 mlib_s32 tmpsxloc = sxloc;
918
919 PTR_ADD(src, (syloc >> shift) * srcScan);
920
921 for (i = 0; i < width; i++) {
922 mlib_s32 a, r, g, b;
923 mlib_s32 x = pixLut[src[tmpsxloc >> shift]];
924 tmpsxloc += sxinc;
925 if (x < 0) {
926 b = x & 0xff;
927 g = (x >> 8) & 0xff;
928 r = (x >> 16) & 0xff;
929 a = (mlib_u32)x >> 24;
930 r = mul8table[a][r];
931 g = mul8table[a][g];
932 b = mul8table[a][b];
933 dst[i] = (a << 24) | (r << 16) | (g << 8) | b;
934 }
935 }
936
937 PTR_ADD(dstBase, dstScan);
938 syloc += syinc;
939 }
940 return;
941 }
942
943 ADD_SUFF(IntArgbToIntArgbPreConvert)(pixLut, buff, 256, 1,
944 pSrcInfo, pDstInfo, pPrim, pCompInfo);
945
946 dzero = vis_fzero();
947
948 for (j = 0; j < height; j++) {
949 mlib_u8 *src = srcBase;
950 mlib_s32 *dst = dstBase;
951 mlib_s32 *dst_end = dst + width;
952 mlib_s32 tmpsxloc = sxloc;
953
954 PTR_ADD(src, (syloc >> shift) * srcScan);
955
956 if ((mlib_s32)dst & 7) {
957 x = buff[src[tmpsxloc >> shift]];
958 tmpsxloc += sxinc;
959 if (x < 0) {
960 *dst = x;
961 }
962 dst++;
963 }
964
965#pragma pipeloop(0)
966 for (; dst <= dst_end - 2; dst += 2) {
967 dd = LOAD_2F32(buff, src[tmpsxloc >> shift],
968 src[(tmpsxloc + sxinc) >> shift]);
969 tmpsxloc += 2*sxinc;
970 mask = vis_fcmplt32(dd, dzero);
971 vis_pst_32(dd, dst, mask);
972 }
973
974 for (; dst < dst_end; dst++) {
975 x = buff[src[tmpsxloc >> shift]];
976 tmpsxloc += sxinc;
977 if (x < 0) {
978 *dst = x;
979 }
980 }
981
982 PTR_ADD(dstBase, dstScan);
983 syloc += syinc;
984 }
985}
986
987/***************************************************************/
988
989void ADD_SUFF(ByteIndexedBmToIntArgbPreXparBgCopy)(BCOPY_PARAMS)
990{
991 jint *pixLut = pSrcInfo->lutBase;
992 mlib_s32 buff[256];
993 mlib_s32 dstScan = pDstInfo->scanStride;
994 mlib_s32 srcScan = pSrcInfo->scanStride;
995 mlib_d64 dd, dzero, d_bgpixel;
996 mlib_s32 i, j, x, mask;
997
998 if (width < 16) {
999 for (j = 0; j < height; j++) {
1000 mlib_u8 *src = srcBase;
1001 mlib_s32 *dst = dstBase;
1002
1003 for (i = 0; i < width; i++) {
1004 x = pixLut[src[i]];
1005 if (x < 0) {
1006 mlib_s32 a, r, g, b;
1007 b = x & 0xff;
1008 g = (x >> 8) & 0xff;
1009 r = (x >> 16) & 0xff;
1010 a = (mlib_u32)x >> 24;
1011 r = mul8table[a][r];
1012 g = mul8table[a][g];
1013 b = mul8table[a][b];
1014 dst[i] = (a << 24) | (r << 16) | (g << 8) | b;
1015 } else {
1016 dst[i] = bgpixel;
1017 }
1018 }
1019
1020 PTR_ADD(dstBase, dstScan);
1021 PTR_ADD(srcBase, srcScan);
1022 }
1023 return;
1024 }
1025
1026 ADD_SUFF(IntArgbToIntArgbPreConvert)(pixLut, buff, 256, 1,
1027 pSrcInfo, pDstInfo, pPrim, pCompInfo);
1028
1029 if (srcScan == width && dstScan == 4*width) {
1030 width *= height;
1031 height = 1;
1032 }
1033
1034 dzero = vis_fzero();
1035 d_bgpixel = vis_to_double_dup(bgpixel);
1036
1037 for (j = 0; j < height; j++) {
1038 mlib_u8 *src = srcBase;
1039 mlib_s32 *dst = dstBase;
1040 mlib_s32 *dst_end;
1041
1042 dst_end = dst + width;
1043
1044 if ((mlib_s32)dst & 7) {
1045 x = buff[*src++];
1046 if (x < 0) {
1047 *dst = x;
1048 } else {
1049 *dst = bgpixel;
1050 }
1051 dst++;
1052 }
1053
1054#pragma pipeloop(0)
1055 for (; dst <= (dst_end - 2); dst += 2) {
1056 dd = vis_freg_pair(((mlib_f32*)buff)[src[0]],
1057 ((mlib_f32*)buff)[src[1]]);
1058 mask = vis_fcmplt32(dd, dzero);
1059 *(mlib_d64*)dst = d_bgpixel;
1060 vis_pst_32(dd, dst, mask);
1061 src += 2;
1062 }
1063
1064 while (dst < dst_end) {
1065 x = buff[*src++];
1066 if (x < 0) {
1067 *dst = x;
1068 } else {
1069 *dst = bgpixel;
1070 }
1071 dst++;
1072 }
1073
1074 PTR_ADD(dstBase, dstScan);
1075 PTR_ADD(srcBase, srcScan);
1076 }
1077}
1078
1079/***************************************************************/
1080
1081void ADD_SUFF(IntArgbPreDrawGlyphListAA)(SurfaceDataRasInfo * pRasInfo,
1082 ImageRef *glyphs,
1083 jint totalGlyphs,
1084 jint fgpixel, jint argbcolor,
1085 jint clipLeft, jint clipTop,
1086 jint clipRight, jint clipBottom,
1087 NativePrimitive * pPrim,
1088 CompositeInfo * pCompInfo)
1089{
1090 mlib_s32 glyphCounter;
1091 mlib_s32 scan = pRasInfo->scanStride;
1092 mlib_u8 *dstBase, *dstBase0;
1093 mlib_s32 i, j;
1094 mlib_d64 dmix0, dmix1, dd, d0, d1, e0, e1;
1095 mlib_d64 done, d_half;
1096 mlib_s32 pix;
1097 mlib_f32 srcG_f;
1098
1099 done = vis_to_double_dup(0x7fff7fff);
1100 d_half = vis_to_double_dup((1 << (16 + 6)) | (1 << 6));
1101
1102 srcG_f = vis_to_float(argbcolor);
1103
1104 for (glyphCounter = 0; glyphCounter < totalGlyphs; glyphCounter++) {
1105 const jubyte *pixels, *pixels0;
1106 unsigned int rowBytes;
1107 int left, top;
1108 int width, height;
1109 int right, bottom;
1110
1111 pixels = (const jubyte *) glyphs[glyphCounter].pixels;
1112
1113 if (!pixels) continue;
1114
1115 left = glyphs[glyphCounter].x;
1116 top = glyphs[glyphCounter].y;
1117 width = glyphs[glyphCounter].width;
1118 height = glyphs[glyphCounter].height;
1119 rowBytes = width;
1120 right = left + width;
1121 bottom = top + height;
1122 if (left < clipLeft) {
1123 pixels += clipLeft - left;
1124 left = clipLeft;
1125 }
1126 if (top < clipTop) {
1127 pixels += (clipTop - top) * rowBytes;
1128 top = clipTop;
1129 }
1130 if (right > clipRight) {
1131 right = clipRight;
1132 }
1133 if (bottom > clipBottom) {
1134 bottom = clipBottom;
1135 }
1136 if (right <= left || bottom <= top) {
1137 continue;
1138 }
1139 width = right - left;
1140 height = bottom - top;
1141
1142 dstBase = pRasInfo->rasBase;
1143 PTR_ADD(dstBase, top*scan + 4*left);
1144
1145 pixels0 = pixels;
1146 dstBase0 = dstBase;
1147
1148 for (j = 0; j < height; j++) {
1149 mlib_u8 *src = (void*)pixels;
1150 mlib_s32 *dst, *dst_end;
1151
1152 dst = (void*)dstBase;
1153 dst_end = dst + width;
1154
1155 ADD_SUFF(IntArgbPreToIntArgbConvert)(dstBase, dstBase, width, 1,
1156 pRasInfo, pRasInfo,
1157 pPrim, pCompInfo);
1158
1159 vis_write_gsr(0 << 3);
1160
1161 if ((mlib_s32)dst & 7) {
1162 pix = *src++;
1163 dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
1164 dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);
1165 *(mlib_f32*)dst = vis_fpack16(dd);
1166 dst++;
1167 }
1168
1169#pragma pipeloop(0)
1170 for (; dst <= (dst_end - 2); dst += 2) {
1171 dmix0 = vis_freg_pair(((mlib_f32 *)vis_mul8s_tbl)[src[0]],
1172 ((mlib_f32 *)vis_mul8s_tbl)[src[1]]);
1173 dmix1 = vis_fpsub16(done, dmix0);
1174 src += 2;
1175
1176 dd = *(mlib_d64*)dst;
1177 d0 = vis_fmul8x16al(srcG_f, vis_read_hi(dmix0));
1178 d1 = vis_fmul8x16al(srcG_f, vis_read_lo(dmix0));
1179 e0 = vis_fmul8x16al(vis_read_hi(dd), vis_read_hi(dmix1));
1180 e1 = vis_fmul8x16al(vis_read_lo(dd), vis_read_lo(dmix1));
1181 d0 = vis_fpadd16(vis_fpadd16(d0, d_half), e0);
1182 d1 = vis_fpadd16(vis_fpadd16(d1, d_half), e1);
1183 dd = vis_fpack16_pair(d0, d1);
1184
1185 *(mlib_d64*)dst = dd;
1186 }
1187
1188 while (dst < dst_end) {
1189 pix = *src++;
1190 dd = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
1191 dd = vis_fpadd16(MUL8_VIS(*(mlib_f32*)dst, 255 - pix), dd);
1192 *(mlib_f32*)dst = vis_fpack16(dd);
1193 dst++;
1194 }
1195
1196 ADD_SUFF(IntArgbToIntArgbPreConvert)(dstBase, dstBase, width, 1,
1197 pRasInfo, pRasInfo,
1198 pPrim, pCompInfo);
1199
1200 PTR_ADD(dstBase, scan);
1201 pixels += rowBytes;
1202 }
1203
1204 pixels = pixels0;
1205 dstBase = dstBase0;
1206
1207 for (j = 0; j < height; j++) {
1208 mlib_u8 *src = (void*)pixels;
1209 mlib_s32 *dst = (void*)dstBase;
1210
1211 for (i = 0; i < width; i++) {
1212 if (src[i] == 255) dst[i] = fgpixel;
1213 }
1214 PTR_ADD(dstBase, scan);
1215 pixels += rowBytes;
1216 }
1217 }
1218}
1219
1220/***************************************************************/
1221
1222#endif /* JAVA2D_NO_MLIB */