blob: 37f21b9d65e3b6a6e3d7c003974d0cd431343687 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2003-2005 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26#if !defined(JAVA2D_NO_MLIB) || defined(MLIB_ADD_SUFF)
27
28#include <vis_proto.h>
29#include "java2d_Mlib.h"
30#include "vis_AlphaMacros.h"
31
32/***************************************************************/
33
34mlib_d64 vis_d64_div_tbl[256] = {
35 0 , 1.0000000000, 0.5000000000, 0.3333333333,
36 0.2500000000, 0.2000000000, 0.1666666667, 0.1428571429,
37 0.1250000000, 0.1111111111, 0.1000000000, 0.0909090909,
38 0.0833333333, 0.0769230769, 0.0714285714, 0.0666666667,
39 0.0625000000, 0.0588235294, 0.0555555556, 0.0526315789,
40 0.0500000000, 0.0476190476, 0.0454545455, 0.0434782609,
41 0.0416666667, 0.0400000000, 0.0384615385, 0.0370370370,
42 0.0357142857, 0.0344827586, 0.0333333333, 0.0322580645,
43 0.0312500000, 0.0303030303, 0.0294117647, 0.0285714286,
44 0.0277777778, 0.0270270270, 0.0263157895, 0.0256410256,
45 0.0250000000, 0.0243902439, 0.0238095238, 0.0232558140,
46 0.0227272727, 0.0222222222, 0.0217391304, 0.0212765957,
47 0.0208333333, 0.0204081633, 0.0200000000, 0.0196078431,
48 0.0192307692, 0.0188679245, 0.0185185185, 0.0181818182,
49 0.0178571429, 0.0175438596, 0.0172413793, 0.0169491525,
50 0.0166666667, 0.0163934426, 0.0161290323, 0.0158730159,
51 0.0156250000, 0.0153846154, 0.0151515152, 0.0149253731,
52 0.0147058824, 0.0144927536, 0.0142857143, 0.0140845070,
53 0.0138888889, 0.0136986301, 0.0135135135, 0.0133333333,
54 0.0131578947, 0.0129870130, 0.0128205128, 0.0126582278,
55 0.0125000000, 0.0123456790, 0.0121951220, 0.0120481928,
56 0.0119047619, 0.0117647059, 0.0116279070, 0.0114942529,
57 0.0113636364, 0.0112359551, 0.0111111111, 0.0109890110,
58 0.0108695652, 0.0107526882, 0.0106382979, 0.0105263158,
59 0.0104166667, 0.0103092784, 0.0102040816, 0.0101010101,
60 0.0100000000, 0.0099009901, 0.0098039216, 0.0097087379,
61 0.0096153846, 0.0095238095, 0.0094339623, 0.0093457944,
62 0.0092592593, 0.0091743119, 0.0090909091, 0.0090090090,
63 0.0089285714, 0.0088495575, 0.0087719298, 0.0086956522,
64 0.0086206897, 0.0085470085, 0.0084745763, 0.0084033613,
65 0.0083333333, 0.0082644628, 0.0081967213, 0.0081300813,
66 0.0080645161, 0.0080000000, 0.0079365079, 0.0078740157,
67 0.0078125000, 0.0077519380, 0.0076923077, 0.0076335878,
68 0.0075757576, 0.0075187970, 0.0074626866, 0.0074074074,
69 0.0073529412, 0.0072992701, 0.0072463768, 0.0071942446,
70 0.0071428571, 0.0070921986, 0.0070422535, 0.0069930070,
71 0.0069444444, 0.0068965517, 0.0068493151, 0.0068027211,
72 0.0067567568, 0.0067114094, 0.0066666667, 0.0066225166,
73 0.0065789474, 0.0065359477, 0.0064935065, 0.0064516129,
74 0.0064102564, 0.0063694268, 0.0063291139, 0.0062893082,
75 0.0062500000, 0.0062111801, 0.0061728395, 0.0061349693,
76 0.0060975610, 0.0060606061, 0.0060240964, 0.0059880240,
77 0.0059523810, 0.0059171598, 0.0058823529, 0.0058479532,
78 0.0058139535, 0.0057803468, 0.0057471264, 0.0057142857,
79 0.0056818182, 0.0056497175, 0.0056179775, 0.0055865922,
80 0.0055555556, 0.0055248619, 0.0054945055, 0.0054644809,
81 0.0054347826, 0.0054054054, 0.0053763441, 0.0053475936,
82 0.0053191489, 0.0052910053, 0.0052631579, 0.0052356021,
83 0.0052083333, 0.0051813472, 0.0051546392, 0.0051282051,
84 0.0051020408, 0.0050761421, 0.0050505051, 0.0050251256,
85 0.0050000000, 0.0049751244, 0.0049504950, 0.0049261084,
86 0.0049019608, 0.0048780488, 0.0048543689, 0.0048309179,
87 0.0048076923, 0.0047846890, 0.0047619048, 0.0047393365,
88 0.0047169811, 0.0046948357, 0.0046728972, 0.0046511628,
89 0.0046296296, 0.0046082949, 0.0045871560, 0.0045662100,
90 0.0045454545, 0.0045248869, 0.0045045045, 0.0044843049,
91 0.0044642857, 0.0044444444, 0.0044247788, 0.0044052863,
92 0.0043859649, 0.0043668122, 0.0043478261, 0.0043290043,
93 0.0043103448, 0.0042918455, 0.0042735043, 0.0042553191,
94 0.0042372881, 0.0042194093, 0.0042016807, 0.0041841004,
95 0.0041666667, 0.0041493776, 0.0041322314, 0.0041152263,
96 0.0040983607, 0.0040816327, 0.0040650407, 0.0040485830,
97 0.0040322581, 0.0040160643, 0.0040000000, 0.0039840637,
98 0.0039682540, 0.0039525692, 0.0039370079, 0.0039215686
99};
100
101/***************************************************************/
102
103#define D64_FROM_F32x2(ff) \
104 vis_freg_pair(ff, ff)
105
106/***************************************************************/
107
108#define RGB2GRAY(r, g, b) \
109 (((77 * (r)) + (150 * (g)) + (29 * (b)) + 128) >> 8)
110
111/***************************************************************/
112
113static void vis_ByteGrayBlendMask(mlib_u8 *rasBase,
114 mlib_u8 *pMask,
115 mlib_s32 rasScan,
116 mlib_s32 maskScan,
117 mlib_s32 width,
118 mlib_s32 height,
119 mlib_s32 *a0_S32,
120 mlib_s32 srcG)
121{
122 mlib_f32 ff, srcG_f;
123 mlib_d64 dd, a0, a1;
124 mlib_d64 d_one = vis_to_double_dup(0x7FFF7FFF);
125 mlib_d64 d_round = vis_to_double_dup(((1 << 16) | 1) << 6);
126 mlib_s32 j, pathA;
127
128 maskScan -= width;
129
130 srcG = (srcG << 8) | srcG;
131 srcG_f = vis_to_float((srcG << 16) | srcG);
132
133 vis_write_gsr((0 << 3) | 6);
134
135 for (j = 0; j < height; j++) {
136 mlib_u8 *dst = rasBase;
137 mlib_u8 *dst_end;
138
139 dst_end = dst + width;
140
141 while (((mlib_s32)dst & 3) && dst < dst_end) {
142 dd = vis_ld_u8(dst);
143 pathA = *pMask++;
144 a0 = vis_ld_u16(a0_S32 + pathA);
145 a1 = vis_fpsub16(d_one, a0);
146 a0 = vis_fmul8x16(vis_read_lo(dd), a0);
147 a1 = vis_fmul8x16(srcG_f, a1);
148 a0 = vis_fpadd16(a0, d_round);
149 a0 = vis_fpadd16(a0, a1);
150 ff = vis_fpack16(a0);
151 dd = D64_FROM_F32x2(ff);
152 vis_st_u8(dd, dst);
153 dst++;
154 }
155
156#pragma pipeloop(0)
157 for (; dst <= (dst_end - 4); dst += 4) {
158 ff = *(mlib_f32*)dst;
159 a0 = vis_faligndata(vis_ld_u16(a0_S32 + pMask[3]), a0);
160 a0 = vis_faligndata(vis_ld_u16(a0_S32 + pMask[2]), a0);
161 a0 = vis_faligndata(vis_ld_u16(a0_S32 + pMask[1]), a0);
162 a0 = vis_faligndata(vis_ld_u16(a0_S32 + pMask[0]), a0);
163 a1 = vis_fpsub16(d_one, a0);
164 a0 = vis_fmul8x16(ff, a0);
165 a1 = vis_fmul8x16(srcG_f, a1);
166 a0 = vis_fpadd16(a0, d_round);
167 a0 = vis_fpadd16(a0, a1);
168 ff = vis_fpack16(a0);
169 *(mlib_f32*)dst = ff;
170 pMask += 4;
171 }
172
173 while (dst < dst_end) {
174 dd = vis_ld_u8(dst);
175 pathA = *pMask++;
176 a0 = vis_ld_u16(a0_S32 + pathA);
177 a1 = vis_fpsub16(d_one, a0);
178 a0 = vis_fmul8x16(vis_read_lo(dd), a0);
179 a1 = vis_fmul8x16(srcG_f, a1);
180 a0 = vis_fpadd16(a0, d_round);
181 a0 = vis_fpadd16(a0, a1);
182 ff = vis_fpack16(a0);
183 dd = D64_FROM_F32x2(ff);
184 vis_st_u8(dd, dst);
185 dst++;
186 }
187
188 PTR_ADD(rasBase, rasScan);
189 PTR_ADD(pMask, maskScan);
190 }
191}
192
193/***************************************************************/
194
195static void vis_ByteGrayBlendMask2(mlib_u8 *rasBase,
196 mlib_u8 *pMask,
197 mlib_s32 rasScan,
198 mlib_s32 maskScan,
199 mlib_s32 width,
200 mlib_s32 height,
201 mlib_s32 *a0_S32,
202 mlib_s16 *d1_S16)
203{
204 mlib_f32 ff;
205 mlib_d64 dd, a0, a1;
206 mlib_s32 j, pathA;
207
208 maskScan -= width;
209
210 vis_write_gsr((0 << 3) | 6);
211
212 for (j = 0; j < height; j++) {
213 mlib_u8 *dst = rasBase;
214 mlib_u8 *dst_end;
215
216 dst_end = dst + width;
217
218 while (((mlib_s32)dst & 3) && dst < dst_end) {
219 dd = vis_ld_u8(dst);
220 pathA = *pMask++;
221 a0 = vis_ld_u16(a0_S32 + pathA);
222 a1 = vis_ld_u16(d1_S16 + pathA);
223 a0 = vis_fmul8x16(vis_read_lo(dd), a0);
224 a0 = vis_fpadd16(a0, a1);
225 ff = vis_fpack16(a0);
226 dd = D64_FROM_F32x2(ff);
227 vis_st_u8(dd, dst);
228 dst++;
229 }
230
231#pragma pipeloop(0)
232 for (; dst <= (dst_end - 4); dst += 4) {
233 ff = *(mlib_f32*)dst;
234 a0 = vis_faligndata(vis_ld_u16(a0_S32 + pMask[3]), a0);
235 a0 = vis_faligndata(vis_ld_u16(a0_S32 + pMask[2]), a0);
236 a0 = vis_faligndata(vis_ld_u16(a0_S32 + pMask[1]), a0);
237 a0 = vis_faligndata(vis_ld_u16(a0_S32 + pMask[0]), a0);
238 a1 = vis_faligndata(vis_ld_u16(d1_S16 + pMask[3]), a1);
239 a1 = vis_faligndata(vis_ld_u16(d1_S16 + pMask[2]), a1);
240 a1 = vis_faligndata(vis_ld_u16(d1_S16 + pMask[1]), a1);
241 a1 = vis_faligndata(vis_ld_u16(d1_S16 + pMask[0]), a1);
242 a0 = vis_fmul8x16(ff, a0);
243 a0 = vis_fpadd16(a0, a1);
244 ff = vis_fpack16(a0);
245 *(mlib_f32*)dst = ff;
246 pMask += 4;
247 }
248
249 while (dst < dst_end) {
250 dd = vis_ld_u8(dst);
251 pathA = *pMask++;
252 a0 = vis_ld_u16(a0_S32 + pathA);
253 a1 = vis_ld_u16(d1_S16 + pathA);
254 a0 = vis_fmul8x16(vis_read_lo(dd), a0);
255 a0 = vis_fpadd16(a0, a1);
256 ff = vis_fpack16(a0);
257 dd = D64_FROM_F32x2(ff);
258 vis_st_u8(dd, dst);
259 dst++;
260 }
261
262 PTR_ADD(rasBase, rasScan);
263 PTR_ADD(pMask, maskScan);
264 }
265}
266
267/***************************************************************/
268
269static void vis_ByteGrayBlend(mlib_u8 *rasBase,
270 mlib_s32 rasScan,
271 mlib_s32 width,
272 mlib_s32 height,
273 mlib_f32 a0,
274 mlib_d64 d1)
275{
276 mlib_f32 ff;
277 mlib_d64 dd;
278 mlib_s32 j;
279
280 vis_write_gsr((0 << 3) | 6);
281
282 for (j = 0; j < height; j++) {
283 mlib_u8 *dst = rasBase;
284 mlib_u8 *dst_end;
285
286 dst_end = dst + width;
287
288 while (((mlib_s32)dst & 3) && dst < dst_end) {
289 dd = vis_ld_u8(dst);
290 dd = vis_fmul8x16al(vis_read_lo(dd), a0);
291 dd = vis_fpadd16(dd, d1);
292 ff = vis_fpack16(dd);
293 dd = D64_FROM_F32x2(ff);
294 vis_st_u8(dd, dst);
295 dst++;
296 }
297
298#pragma pipeloop(0)
299 for (; dst <= (dst_end - 4); dst += 4) {
300 ff = *(mlib_f32*)dst;
301 dd = vis_fmul8x16al(ff, a0);
302 dd = vis_fpadd16(dd, d1);
303 ff = vis_fpack16(dd);
304 *(mlib_f32*)dst = ff;
305 }
306
307 while (dst < dst_end) {
308 dd = vis_ld_u8(dst);
309 dd = vis_fmul8x16al(vis_read_lo(dd), a0);
310 dd = vis_fpadd16(dd, d1);
311 ff = vis_fpack16(dd);
312 dd = D64_FROM_F32x2(ff);
313 vis_st_u8(dd, dst);
314 dst++;
315 }
316
317 PTR_ADD(rasBase, rasScan);
318 }
319}
320
321/***************************************************************/
322
323void ADD_SUFF(ByteGraySrcMaskFill)(void *rasBase,
324 jubyte *pMask,
325 jint maskOff,
326 jint maskScan,
327 jint width,
328 jint height,
329 jint fgColor,
330 SurfaceDataRasInfo *pRasInfo,
331 NativePrimitive *pPrim,
332 CompositeInfo *pCompInfo)
333{
334 mlib_s32 rasScan = pRasInfo->scanStride;
335 mlib_s32 r, g, b, i, j;
336 mlib_s32 a0_S32[256];
337 mlib_s32 resA, resG, dstF, pathA, srcA, srcG;
338 mlib_d64 dscale;
339
340 b = (fgColor) & 0xff;
341 g = (fgColor >> 8) & 0xff;
342 r = (fgColor >> 16) & 0xff;
343 srcA = (fgColor >> 24) & 0xff;
344 srcG = RGB2GRAY(r, g, b);
345
346#ifdef LOOPS_OLD_VERSION
347 if (srcA == 0) return;
348
349 if (pMask == NULL) {
350 AnyByteSetRect(pRasInfo, 0, 0, width, height, srcG, pPrim, pCompInfo);
351 return;
352 }
353#else
354 if (pMask == NULL) {
355 if (srcA == 0) srcG = 0;
356 ADD_SUFF(AnyByteSetRect)(pRasInfo,
357 pRasInfo->bounds.x1, pRasInfo->bounds.y1,
358 pRasInfo->bounds.x2, pRasInfo->bounds.y2,
359 srcG, pPrim, pCompInfo);
360 return;
361 }
362#endif
363
364 pMask += maskOff;
365
366 if (width < 32) {
367 srcG = mul8table[srcA][srcG];
368
369 for (j = 0; j < height; j++) {
370 mlib_u8 *dst = rasBase;
371
372 for (i = 0; i < width; i++) {
373 pathA = pMask[i];
374 resG = dst[i];
375 dstF = 0xff - pathA;
376 resA = dstF + mul8table[pathA][srcA];
377 resG = mul8table[dstF][resG] + mul8table[pathA][srcG];
378 resG = div8table[resA][resG];
379 dst[i] = resG;
380 }
381
382 PTR_ADD(rasBase, rasScan);
383 PTR_ADD(pMask, maskScan);
384 }
385 return;
386 }
387
388 dscale = (mlib_d64)(1 << 15)*(1 << 16);
389 a0_S32[0] = dscale - 1;
390#pragma pipeloop(0)
391 for (pathA = 1; pathA < 256; pathA++) {
392 dstF = 0xff - pathA;
393 resA = dstF + mul8table[pathA][srcA];
394 dstF = dscale*dstF*vis_d64_div_tbl[resA];
395 a0_S32[pathA] = dstF;
396 }
397
398 vis_ByteGrayBlendMask(rasBase, pMask, rasScan, maskScan,
399 width, height, a0_S32, srcG);
400}
401
402/***************************************************************/
403
404void ADD_SUFF(ByteGraySrcOverMaskFill)(void *rasBase,
405 jubyte *pMask,
406 jint maskOff,
407 jint maskScan,
408 jint width,
409 jint height,
410 jint fgColor,
411 SurfaceDataRasInfo *pRasInfo,
412 NativePrimitive *pPrim,
413 CompositeInfo *pCompInfo)
414{
415 mlib_s32 rasScan = pRasInfo->scanStride;
416 mlib_s32 r, g, b, i, j;
417 mlib_s32 dstA, pathA, srcA, srcG;
418
419 b = (fgColor) & 0xff;
420 g = (fgColor >> 8) & 0xff;
421 r = (fgColor >> 16) & 0xff;
422 srcA = (fgColor >> 24) & 0xff;
423 srcG = RGB2GRAY(r, g, b);
424
425 if (srcA == 0) return;
426
427 if (pMask != NULL) pMask += maskOff;
428
429 if (width < 16) {
430 srcG = mul8table[srcA][srcG];
431
432 if (pMask != NULL) {
433 for (j = 0; j < height; j++) {
434 mlib_u8 *dst = rasBase;
435
436 for (i = 0; i < width; i++) {
437 pathA = pMask[i];
438 dstA = 0xff - mul8table[pathA][srcA];
439 dst[i] = mul8table[dstA][dst[i]] + mul8table[pathA][srcG];
440 }
441
442 PTR_ADD(rasBase, rasScan);
443 PTR_ADD(pMask, maskScan);
444 }
445 } else {
446 mlib_u8 *mul8_dstA = mul8table[0xff - srcA];
447
448 for (j = 0; j < height; j++) {
449 mlib_u8 *dst = rasBase;
450
451 for (i = 0; i < width; i++) {
452 dst[i] = mul8_dstA[dst[i]] + srcG;
453 }
454
455 PTR_ADD(rasBase, rasScan);
456 }
457 }
458 return;
459 }
460
461 if (pMask != NULL) {
462 mlib_s32 a0_S32[256];
463 mlib_d64 dscale = (mlib_d64)(1 << 15)*(1 << 16);
464
465 a0_S32[0] = dscale - 1;
466#pragma pipeloop(0)
467 for (pathA = 1; pathA < 256; pathA++) {
468 a0_S32[pathA] = dscale - pathA*srcA*(dscale*(1.0/(255*255)));
469 }
470
471 vis_ByteGrayBlendMask(rasBase, pMask, rasScan, maskScan,
472 width, height, a0_S32, srcG);
473 } else {
474 mlib_s32 a0_int = (1 << 15)*(1.0 - srcA*(1.0/255));
475 mlib_f32 a0, a1, srcG_f;
476 mlib_d64 d1;
477 mlib_d64 d_round = vis_to_double_dup(((1 << 16) | 1) << 6);
478
479 srcG = (srcG << 8) | srcG;
480 srcG_f = vis_to_float((srcG << 16) | srcG);
481
482 a0 = vis_to_float(a0_int);
483 a1 = vis_to_float(0x7FFF - a0_int);
484 d1 = vis_fmul8x16al(srcG_f, a1);
485 d1 = vis_fpadd16(d1, d_round);
486
487 vis_ByteGrayBlend(rasBase, rasScan, width, height, a0, d1);
488 }
489}
490
491/***************************************************************/
492
493void ADD_SUFF(ByteGrayAlphaMaskFill)(void *rasBase,
494 jubyte *pMask,
495 jint maskOff,
496 jint maskScan,
497 jint width,
498 jint height,
499 jint fgColor,
500 SurfaceDataRasInfo *pRasInfo,
501 NativePrimitive *pPrim,
502 CompositeInfo *pCompInfo)
503{
504 mlib_s32 rasScan = pRasInfo->scanStride;
505 mlib_s32 pathA, srcA, srcG, dstA, dstFbase, srcFbase;
506 mlib_s32 SrcOpAnd, SrcOpXor, SrcOpAdd;
507 mlib_s32 DstOpAnd, DstOpXor, DstOpAdd;
508 mlib_s32 r, g, b;
509 mlib_s32 resA, resG, srcF, i, j;
510
511 b = (fgColor) & 0xff;
512 g = (fgColor >> 8) & 0xff;
513 r = (fgColor >> 16) & 0xff;
514 srcA = (fgColor >> 24) & 0xff;
515 srcG = RGB2GRAY(r, g, b);
516
517 SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval;
518 SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval;
519 SrcOpAdd = (AlphaRules[pCompInfo->rule].srcOps).addval - SrcOpXor;
520
521 DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval;
522 DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval;
523 DstOpAdd = (AlphaRules[pCompInfo->rule].dstOps).addval - DstOpXor;
524
525 dstFbase = ((((srcA) & DstOpAnd) ^ DstOpXor) + DstOpAdd);
526 srcFbase = ((((0xff) & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd);
527
528 if (pMask != NULL) pMask += maskOff;
529
530 srcG = mul8table[srcA][srcG];
531
532 if (width < 100) {
533 if (pMask != NULL) {
534 for (j = 0; j < height; j++) {
535 mlib_u8 *dst = rasBase;
536
537 for (i = 0; i < width; i++) {
538 pathA = pMask[i];
539 srcF = mul8table[pathA][srcFbase];
540 dstA = 0xff - pathA + mul8table[pathA][dstFbase];
541
542 resA = dstA + mul8table[srcF][srcA];
543 resG = mul8table[dstA][dst[i]] + mul8table[srcF][srcG];
544
545 dst[i] = div8table[resA][resG];
546 }
547
548 PTR_ADD(rasBase, rasScan);
549 PTR_ADD(pMask, maskScan);
550 }
551 } else {
552 mlib_u8 *mul8_dstA;
553
554 srcF = srcFbase;
555 dstA = dstFbase;
556 resA = dstA + mul8table[srcF][srcA];
557 srcG = mul8table[srcF][srcG];
558 mul8_dstA = mul8table[dstA];
559
560 for (j = 0; j < height; j++) {
561 mlib_u8 *dst = rasBase;
562
563 for (i = 0; i < width; i++) {
564 resG = mul8_dstA[dst[i]] + srcG;
565 dst[i] = div8table[resA][resG];
566 }
567
568 PTR_ADD(rasBase, rasScan);
569 }
570 }
571 return;
572 }
573
574 if (pMask != NULL) {
575 mlib_s32 a0_S32[256];
576 mlib_s16 d1_S16[256];
577 mlib_d64 dscale = (mlib_d64)(1 << 15)*(1 << 16);
578
579 a0_S32[0] = dscale - 1;
580 d1_S16[0] = (1 << 6);
581#pragma pipeloop(0)
582 for (pathA = 1; pathA < 256; pathA++) {
583 srcF = mul8table[pathA][srcFbase];
584 dstA = 0xff - pathA + mul8table[pathA][dstFbase];
585 resA = dstA + mul8table[srcF][srcA];
586 a0_S32[pathA] = dscale*dstA*vis_d64_div_tbl[resA] + (1 << 15);
587 d1_S16[pathA] = (1 << 7)*srcG*srcF*vis_d64_div_tbl[resA] + (1 << 6);
588 }
589
590 vis_ByteGrayBlendMask2(rasBase, pMask, rasScan, maskScan,
591 width, height, a0_S32, d1_S16);
592 } else {
593 mlib_d64 dscale = (mlib_d64)(1 << 15)*(1 << 16);
594 mlib_s32 _a0, _d1;
595 mlib_f32 a0;
596 mlib_d64 d1;
597
598 srcF = srcFbase;
599 dstA = dstFbase;
600 resA = dstA + mul8table[srcF][srcA];
601 _a0 = dscale*dstA*vis_d64_div_tbl[resA] + (1 << 15);
602 _d1 = (1 << 7)*vis_d64_div_tbl[resA]*srcF*srcG + (1 << 6);
603
604 a0 = vis_to_float(_a0 >> 16);
605 d1 = vis_to_double_dup((_d1 << 16) | _d1);
606
607 vis_ByteGrayBlend(rasBase, rasScan, width, height, a0, d1);
608 }
609}
610
611/***************************************************************/
612
613#define TBL_MUL ((mlib_s16*)vis_mul8s_tbl + 1)
614
615void ADD_SUFF(ByteGrayDrawGlyphListAA)(GLYPH_LIST_PARAMS)
616{
617 mlib_s32 glyphCounter;
618 mlib_s32 scan = pRasInfo->scanStride;
619 mlib_u8 *pPix;
620 mlib_s32 srcG;
621 int i, j, r, g, b;
622 mlib_d64 mix0, mix1, dd, d0, d1, e0, e1, fgpixel_d;
623 mlib_d64 done, d_half;
624 mlib_s32 pix, mask0, mask1;
625 mlib_f32 fgpixel_f, srcG_f;
626
627 b = (argbcolor) & 0xff;
628 g = (argbcolor >> 8) & 0xff;
629 r = (argbcolor >> 16) & 0xff;
630 srcG = RGB2GRAY(r, g, b);
631
632 if (clipRight - clipLeft >= 16) {
633 done = vis_to_double_dup(0x7fff7fff);
634 d_half = vis_to_double_dup((1 << (16 + 6)) | (1 << 6));
635
636 fgpixel &= 0xff;
637 fgpixel_f = F32_FROM_U8x4(fgpixel, fgpixel, fgpixel, fgpixel);
638 fgpixel_d = vis_freg_pair(fgpixel_f, fgpixel_f);
639 srcG_f = F32_FROM_U8x4(srcG, srcG, srcG, srcG);
640
641 vis_write_gsr((0 << 3) | 6);
642 }
643
644 for (glyphCounter = 0; glyphCounter < totalGlyphs; glyphCounter++) {
645 const jubyte *pixels;
646 unsigned int rowBytes;
647 int left, top;
648 int width, height;
649 int right, bottom;
650
651 pixels = (const jubyte *) glyphs[glyphCounter].pixels;
652
653 if (!pixels) continue;
654
655 left = glyphs[glyphCounter].x;
656 top = glyphs[glyphCounter].y;
657 width = glyphs[glyphCounter].width;
658 height = glyphs[glyphCounter].height;
659 rowBytes = width;
660 right = left + width;
661 bottom = top + height;
662 if (left < clipLeft) {
663 pixels += clipLeft - left;
664 left = clipLeft;
665 }
666 if (top < clipTop) {
667 pixels += (clipTop - top) * rowBytes;
668 top = clipTop;
669 }
670 if (right > clipRight) {
671 right = clipRight;
672 }
673 if (bottom > clipBottom) {
674 bottom = clipBottom;
675 }
676 if (right <= left || bottom <= top) {
677 continue;
678 }
679 width = right - left;
680 height = bottom - top;
681
682 pPix = pRasInfo->rasBase;
683 PTR_ADD(pPix, top * scan + left);
684
685 if (width < 16) {
686 for (j = 0; j < height; j++) {
687 for (i = 0; i < width; i++) {
688 jint dstG;
689 jint mixValSrc = pixels[i];
690 if (mixValSrc) {
691 if (mixValSrc < 255) {
692 jint mixValDst = 255 - mixValSrc;
693 dstG = pPix[i];
694 dstG =
695 mul8table[mixValDst][dstG] +
696 mul8table[mixValSrc][srcG];
697 pPix[i] = dstG;
698 } else {
699 pPix[i] = fgpixel;
700 }
701 }
702 }
703
704 PTR_ADD(pPix, scan);
705 pixels += rowBytes;
706 }
707 } else {
708 for (j = 0; j < height; j++) {
709 mlib_u8 *src = (void*)pixels;
710 mlib_u8 *dst = pPix;
711 mlib_u8 *dst_end = dst + width;
712
713 while (((mlib_s32)dst & 7) && dst < dst_end) {
714 pix = *src++;
715 d0 = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
716 d1 = MUL8_VIS(vis_read_lo(vis_ld_u8(dst)), 255 - pix);
717 dd = vis_fpadd16(d0, d1);
718 vis_st_u8(D64_FROM_F32x2(vis_fpack16(dd)), dst);
719 if (pix == 255) *dst = fgpixel;
720 dst++;
721 }
722
723#pragma pipeloop(0)
724 for (; dst <= (dst_end - 8); dst += 8) {
725 mix0 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*src[3]), mix0);
726 mix1 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*src[7]), mix1);
727 mix0 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*src[2]), mix0);
728 mix1 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*src[6]), mix1);
729 mix0 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*src[1]), mix0);
730 mix1 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*src[5]), mix1);
731 mix0 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*src[0]), mix0);
732 mix1 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*src[4]), mix1);
733 src += 8;
734
735 dd = *(mlib_d64*)dst;
736 d0 = vis_fpadd16(vis_fmul8x16(srcG_f, mix0), d_half);
737 d1 = vis_fpadd16(vis_fmul8x16(srcG_f, mix1), d_half);
738 e0 = vis_fmul8x16(vis_read_hi(dd), vis_fpsub16(done, mix0));
739 e1 = vis_fmul8x16(vis_read_lo(dd), vis_fpsub16(done, mix1));
740 d0 = vis_fpadd16(e0, d0);
741 d1 = vis_fpadd16(e1, d1);
742 dd = vis_fpack16_pair(d0, d1);
743
744 mask0 = vis_fcmplt16(mix0, done);
745 mask1 = vis_fcmplt16(mix1, done);
746
747 *(mlib_d64*)dst = fgpixel_d;
748 vis_pst_8(dd, dst, (mask0 << 4) | mask1);
749 }
750
751 while (dst < dst_end) {
752 pix = *src++;
753 d0 = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half);
754 d1 = MUL8_VIS(vis_read_lo(vis_ld_u8(dst)), 255 - pix);
755 dd = vis_fpadd16(d0, d1);
756 vis_st_u8(D64_FROM_F32x2(vis_fpack16(dd)), dst);
757 if (pix == 255) *dst = fgpixel;
758 dst++;
759 }
760
761 PTR_ADD(pPix, scan);
762 pixels += rowBytes;
763 }
764 }
765 }
766}
767
768/***************************************************************/
769
770#endif