J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 2003-2005 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | #if !defined(JAVA2D_NO_MLIB) || defined(MLIB_ADD_SUFF) |
| 27 | |
| 28 | #include <vis_proto.h> |
| 29 | #include "java2d_Mlib.h" |
| 30 | #include "vis_AlphaMacros.h" |
| 31 | |
| 32 | /***************************************************************/ |
| 33 | |
| 34 | mlib_d64 vis_d64_div_tbl[256] = { |
| 35 | 0 , 1.0000000000, 0.5000000000, 0.3333333333, |
| 36 | 0.2500000000, 0.2000000000, 0.1666666667, 0.1428571429, |
| 37 | 0.1250000000, 0.1111111111, 0.1000000000, 0.0909090909, |
| 38 | 0.0833333333, 0.0769230769, 0.0714285714, 0.0666666667, |
| 39 | 0.0625000000, 0.0588235294, 0.0555555556, 0.0526315789, |
| 40 | 0.0500000000, 0.0476190476, 0.0454545455, 0.0434782609, |
| 41 | 0.0416666667, 0.0400000000, 0.0384615385, 0.0370370370, |
| 42 | 0.0357142857, 0.0344827586, 0.0333333333, 0.0322580645, |
| 43 | 0.0312500000, 0.0303030303, 0.0294117647, 0.0285714286, |
| 44 | 0.0277777778, 0.0270270270, 0.0263157895, 0.0256410256, |
| 45 | 0.0250000000, 0.0243902439, 0.0238095238, 0.0232558140, |
| 46 | 0.0227272727, 0.0222222222, 0.0217391304, 0.0212765957, |
| 47 | 0.0208333333, 0.0204081633, 0.0200000000, 0.0196078431, |
| 48 | 0.0192307692, 0.0188679245, 0.0185185185, 0.0181818182, |
| 49 | 0.0178571429, 0.0175438596, 0.0172413793, 0.0169491525, |
| 50 | 0.0166666667, 0.0163934426, 0.0161290323, 0.0158730159, |
| 51 | 0.0156250000, 0.0153846154, 0.0151515152, 0.0149253731, |
| 52 | 0.0147058824, 0.0144927536, 0.0142857143, 0.0140845070, |
| 53 | 0.0138888889, 0.0136986301, 0.0135135135, 0.0133333333, |
| 54 | 0.0131578947, 0.0129870130, 0.0128205128, 0.0126582278, |
| 55 | 0.0125000000, 0.0123456790, 0.0121951220, 0.0120481928, |
| 56 | 0.0119047619, 0.0117647059, 0.0116279070, 0.0114942529, |
| 57 | 0.0113636364, 0.0112359551, 0.0111111111, 0.0109890110, |
| 58 | 0.0108695652, 0.0107526882, 0.0106382979, 0.0105263158, |
| 59 | 0.0104166667, 0.0103092784, 0.0102040816, 0.0101010101, |
| 60 | 0.0100000000, 0.0099009901, 0.0098039216, 0.0097087379, |
| 61 | 0.0096153846, 0.0095238095, 0.0094339623, 0.0093457944, |
| 62 | 0.0092592593, 0.0091743119, 0.0090909091, 0.0090090090, |
| 63 | 0.0089285714, 0.0088495575, 0.0087719298, 0.0086956522, |
| 64 | 0.0086206897, 0.0085470085, 0.0084745763, 0.0084033613, |
| 65 | 0.0083333333, 0.0082644628, 0.0081967213, 0.0081300813, |
| 66 | 0.0080645161, 0.0080000000, 0.0079365079, 0.0078740157, |
| 67 | 0.0078125000, 0.0077519380, 0.0076923077, 0.0076335878, |
| 68 | 0.0075757576, 0.0075187970, 0.0074626866, 0.0074074074, |
| 69 | 0.0073529412, 0.0072992701, 0.0072463768, 0.0071942446, |
| 70 | 0.0071428571, 0.0070921986, 0.0070422535, 0.0069930070, |
| 71 | 0.0069444444, 0.0068965517, 0.0068493151, 0.0068027211, |
| 72 | 0.0067567568, 0.0067114094, 0.0066666667, 0.0066225166, |
| 73 | 0.0065789474, 0.0065359477, 0.0064935065, 0.0064516129, |
| 74 | 0.0064102564, 0.0063694268, 0.0063291139, 0.0062893082, |
| 75 | 0.0062500000, 0.0062111801, 0.0061728395, 0.0061349693, |
| 76 | 0.0060975610, 0.0060606061, 0.0060240964, 0.0059880240, |
| 77 | 0.0059523810, 0.0059171598, 0.0058823529, 0.0058479532, |
| 78 | 0.0058139535, 0.0057803468, 0.0057471264, 0.0057142857, |
| 79 | 0.0056818182, 0.0056497175, 0.0056179775, 0.0055865922, |
| 80 | 0.0055555556, 0.0055248619, 0.0054945055, 0.0054644809, |
| 81 | 0.0054347826, 0.0054054054, 0.0053763441, 0.0053475936, |
| 82 | 0.0053191489, 0.0052910053, 0.0052631579, 0.0052356021, |
| 83 | 0.0052083333, 0.0051813472, 0.0051546392, 0.0051282051, |
| 84 | 0.0051020408, 0.0050761421, 0.0050505051, 0.0050251256, |
| 85 | 0.0050000000, 0.0049751244, 0.0049504950, 0.0049261084, |
| 86 | 0.0049019608, 0.0048780488, 0.0048543689, 0.0048309179, |
| 87 | 0.0048076923, 0.0047846890, 0.0047619048, 0.0047393365, |
| 88 | 0.0047169811, 0.0046948357, 0.0046728972, 0.0046511628, |
| 89 | 0.0046296296, 0.0046082949, 0.0045871560, 0.0045662100, |
| 90 | 0.0045454545, 0.0045248869, 0.0045045045, 0.0044843049, |
| 91 | 0.0044642857, 0.0044444444, 0.0044247788, 0.0044052863, |
| 92 | 0.0043859649, 0.0043668122, 0.0043478261, 0.0043290043, |
| 93 | 0.0043103448, 0.0042918455, 0.0042735043, 0.0042553191, |
| 94 | 0.0042372881, 0.0042194093, 0.0042016807, 0.0041841004, |
| 95 | 0.0041666667, 0.0041493776, 0.0041322314, 0.0041152263, |
| 96 | 0.0040983607, 0.0040816327, 0.0040650407, 0.0040485830, |
| 97 | 0.0040322581, 0.0040160643, 0.0040000000, 0.0039840637, |
| 98 | 0.0039682540, 0.0039525692, 0.0039370079, 0.0039215686 |
| 99 | }; |
| 100 | |
| 101 | /***************************************************************/ |
| 102 | |
| 103 | #define D64_FROM_F32x2(ff) \ |
| 104 | vis_freg_pair(ff, ff) |
| 105 | |
| 106 | /***************************************************************/ |
| 107 | |
| 108 | #define RGB2GRAY(r, g, b) \ |
| 109 | (((77 * (r)) + (150 * (g)) + (29 * (b)) + 128) >> 8) |
| 110 | |
| 111 | /***************************************************************/ |
| 112 | |
| 113 | static void vis_ByteGrayBlendMask(mlib_u8 *rasBase, |
| 114 | mlib_u8 *pMask, |
| 115 | mlib_s32 rasScan, |
| 116 | mlib_s32 maskScan, |
| 117 | mlib_s32 width, |
| 118 | mlib_s32 height, |
| 119 | mlib_s32 *a0_S32, |
| 120 | mlib_s32 srcG) |
| 121 | { |
| 122 | mlib_f32 ff, srcG_f; |
| 123 | mlib_d64 dd, a0, a1; |
| 124 | mlib_d64 d_one = vis_to_double_dup(0x7FFF7FFF); |
| 125 | mlib_d64 d_round = vis_to_double_dup(((1 << 16) | 1) << 6); |
| 126 | mlib_s32 j, pathA; |
| 127 | |
| 128 | maskScan -= width; |
| 129 | |
| 130 | srcG = (srcG << 8) | srcG; |
| 131 | srcG_f = vis_to_float((srcG << 16) | srcG); |
| 132 | |
| 133 | vis_write_gsr((0 << 3) | 6); |
| 134 | |
| 135 | for (j = 0; j < height; j++) { |
| 136 | mlib_u8 *dst = rasBase; |
| 137 | mlib_u8 *dst_end; |
| 138 | |
| 139 | dst_end = dst + width; |
| 140 | |
| 141 | while (((mlib_s32)dst & 3) && dst < dst_end) { |
| 142 | dd = vis_ld_u8(dst); |
| 143 | pathA = *pMask++; |
| 144 | a0 = vis_ld_u16(a0_S32 + pathA); |
| 145 | a1 = vis_fpsub16(d_one, a0); |
| 146 | a0 = vis_fmul8x16(vis_read_lo(dd), a0); |
| 147 | a1 = vis_fmul8x16(srcG_f, a1); |
| 148 | a0 = vis_fpadd16(a0, d_round); |
| 149 | a0 = vis_fpadd16(a0, a1); |
| 150 | ff = vis_fpack16(a0); |
| 151 | dd = D64_FROM_F32x2(ff); |
| 152 | vis_st_u8(dd, dst); |
| 153 | dst++; |
| 154 | } |
| 155 | |
| 156 | #pragma pipeloop(0) |
| 157 | for (; dst <= (dst_end - 4); dst += 4) { |
| 158 | ff = *(mlib_f32*)dst; |
| 159 | a0 = vis_faligndata(vis_ld_u16(a0_S32 + pMask[3]), a0); |
| 160 | a0 = vis_faligndata(vis_ld_u16(a0_S32 + pMask[2]), a0); |
| 161 | a0 = vis_faligndata(vis_ld_u16(a0_S32 + pMask[1]), a0); |
| 162 | a0 = vis_faligndata(vis_ld_u16(a0_S32 + pMask[0]), a0); |
| 163 | a1 = vis_fpsub16(d_one, a0); |
| 164 | a0 = vis_fmul8x16(ff, a0); |
| 165 | a1 = vis_fmul8x16(srcG_f, a1); |
| 166 | a0 = vis_fpadd16(a0, d_round); |
| 167 | a0 = vis_fpadd16(a0, a1); |
| 168 | ff = vis_fpack16(a0); |
| 169 | *(mlib_f32*)dst = ff; |
| 170 | pMask += 4; |
| 171 | } |
| 172 | |
| 173 | while (dst < dst_end) { |
| 174 | dd = vis_ld_u8(dst); |
| 175 | pathA = *pMask++; |
| 176 | a0 = vis_ld_u16(a0_S32 + pathA); |
| 177 | a1 = vis_fpsub16(d_one, a0); |
| 178 | a0 = vis_fmul8x16(vis_read_lo(dd), a0); |
| 179 | a1 = vis_fmul8x16(srcG_f, a1); |
| 180 | a0 = vis_fpadd16(a0, d_round); |
| 181 | a0 = vis_fpadd16(a0, a1); |
| 182 | ff = vis_fpack16(a0); |
| 183 | dd = D64_FROM_F32x2(ff); |
| 184 | vis_st_u8(dd, dst); |
| 185 | dst++; |
| 186 | } |
| 187 | |
| 188 | PTR_ADD(rasBase, rasScan); |
| 189 | PTR_ADD(pMask, maskScan); |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | /***************************************************************/ |
| 194 | |
| 195 | static void vis_ByteGrayBlendMask2(mlib_u8 *rasBase, |
| 196 | mlib_u8 *pMask, |
| 197 | mlib_s32 rasScan, |
| 198 | mlib_s32 maskScan, |
| 199 | mlib_s32 width, |
| 200 | mlib_s32 height, |
| 201 | mlib_s32 *a0_S32, |
| 202 | mlib_s16 *d1_S16) |
| 203 | { |
| 204 | mlib_f32 ff; |
| 205 | mlib_d64 dd, a0, a1; |
| 206 | mlib_s32 j, pathA; |
| 207 | |
| 208 | maskScan -= width; |
| 209 | |
| 210 | vis_write_gsr((0 << 3) | 6); |
| 211 | |
| 212 | for (j = 0; j < height; j++) { |
| 213 | mlib_u8 *dst = rasBase; |
| 214 | mlib_u8 *dst_end; |
| 215 | |
| 216 | dst_end = dst + width; |
| 217 | |
| 218 | while (((mlib_s32)dst & 3) && dst < dst_end) { |
| 219 | dd = vis_ld_u8(dst); |
| 220 | pathA = *pMask++; |
| 221 | a0 = vis_ld_u16(a0_S32 + pathA); |
| 222 | a1 = vis_ld_u16(d1_S16 + pathA); |
| 223 | a0 = vis_fmul8x16(vis_read_lo(dd), a0); |
| 224 | a0 = vis_fpadd16(a0, a1); |
| 225 | ff = vis_fpack16(a0); |
| 226 | dd = D64_FROM_F32x2(ff); |
| 227 | vis_st_u8(dd, dst); |
| 228 | dst++; |
| 229 | } |
| 230 | |
| 231 | #pragma pipeloop(0) |
| 232 | for (; dst <= (dst_end - 4); dst += 4) { |
| 233 | ff = *(mlib_f32*)dst; |
| 234 | a0 = vis_faligndata(vis_ld_u16(a0_S32 + pMask[3]), a0); |
| 235 | a0 = vis_faligndata(vis_ld_u16(a0_S32 + pMask[2]), a0); |
| 236 | a0 = vis_faligndata(vis_ld_u16(a0_S32 + pMask[1]), a0); |
| 237 | a0 = vis_faligndata(vis_ld_u16(a0_S32 + pMask[0]), a0); |
| 238 | a1 = vis_faligndata(vis_ld_u16(d1_S16 + pMask[3]), a1); |
| 239 | a1 = vis_faligndata(vis_ld_u16(d1_S16 + pMask[2]), a1); |
| 240 | a1 = vis_faligndata(vis_ld_u16(d1_S16 + pMask[1]), a1); |
| 241 | a1 = vis_faligndata(vis_ld_u16(d1_S16 + pMask[0]), a1); |
| 242 | a0 = vis_fmul8x16(ff, a0); |
| 243 | a0 = vis_fpadd16(a0, a1); |
| 244 | ff = vis_fpack16(a0); |
| 245 | *(mlib_f32*)dst = ff; |
| 246 | pMask += 4; |
| 247 | } |
| 248 | |
| 249 | while (dst < dst_end) { |
| 250 | dd = vis_ld_u8(dst); |
| 251 | pathA = *pMask++; |
| 252 | a0 = vis_ld_u16(a0_S32 + pathA); |
| 253 | a1 = vis_ld_u16(d1_S16 + pathA); |
| 254 | a0 = vis_fmul8x16(vis_read_lo(dd), a0); |
| 255 | a0 = vis_fpadd16(a0, a1); |
| 256 | ff = vis_fpack16(a0); |
| 257 | dd = D64_FROM_F32x2(ff); |
| 258 | vis_st_u8(dd, dst); |
| 259 | dst++; |
| 260 | } |
| 261 | |
| 262 | PTR_ADD(rasBase, rasScan); |
| 263 | PTR_ADD(pMask, maskScan); |
| 264 | } |
| 265 | } |
| 266 | |
| 267 | /***************************************************************/ |
| 268 | |
| 269 | static void vis_ByteGrayBlend(mlib_u8 *rasBase, |
| 270 | mlib_s32 rasScan, |
| 271 | mlib_s32 width, |
| 272 | mlib_s32 height, |
| 273 | mlib_f32 a0, |
| 274 | mlib_d64 d1) |
| 275 | { |
| 276 | mlib_f32 ff; |
| 277 | mlib_d64 dd; |
| 278 | mlib_s32 j; |
| 279 | |
| 280 | vis_write_gsr((0 << 3) | 6); |
| 281 | |
| 282 | for (j = 0; j < height; j++) { |
| 283 | mlib_u8 *dst = rasBase; |
| 284 | mlib_u8 *dst_end; |
| 285 | |
| 286 | dst_end = dst + width; |
| 287 | |
| 288 | while (((mlib_s32)dst & 3) && dst < dst_end) { |
| 289 | dd = vis_ld_u8(dst); |
| 290 | dd = vis_fmul8x16al(vis_read_lo(dd), a0); |
| 291 | dd = vis_fpadd16(dd, d1); |
| 292 | ff = vis_fpack16(dd); |
| 293 | dd = D64_FROM_F32x2(ff); |
| 294 | vis_st_u8(dd, dst); |
| 295 | dst++; |
| 296 | } |
| 297 | |
| 298 | #pragma pipeloop(0) |
| 299 | for (; dst <= (dst_end - 4); dst += 4) { |
| 300 | ff = *(mlib_f32*)dst; |
| 301 | dd = vis_fmul8x16al(ff, a0); |
| 302 | dd = vis_fpadd16(dd, d1); |
| 303 | ff = vis_fpack16(dd); |
| 304 | *(mlib_f32*)dst = ff; |
| 305 | } |
| 306 | |
| 307 | while (dst < dst_end) { |
| 308 | dd = vis_ld_u8(dst); |
| 309 | dd = vis_fmul8x16al(vis_read_lo(dd), a0); |
| 310 | dd = vis_fpadd16(dd, d1); |
| 311 | ff = vis_fpack16(dd); |
| 312 | dd = D64_FROM_F32x2(ff); |
| 313 | vis_st_u8(dd, dst); |
| 314 | dst++; |
| 315 | } |
| 316 | |
| 317 | PTR_ADD(rasBase, rasScan); |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | /***************************************************************/ |
| 322 | |
| 323 | void ADD_SUFF(ByteGraySrcMaskFill)(void *rasBase, |
| 324 | jubyte *pMask, |
| 325 | jint maskOff, |
| 326 | jint maskScan, |
| 327 | jint width, |
| 328 | jint height, |
| 329 | jint fgColor, |
| 330 | SurfaceDataRasInfo *pRasInfo, |
| 331 | NativePrimitive *pPrim, |
| 332 | CompositeInfo *pCompInfo) |
| 333 | { |
| 334 | mlib_s32 rasScan = pRasInfo->scanStride; |
| 335 | mlib_s32 r, g, b, i, j; |
| 336 | mlib_s32 a0_S32[256]; |
| 337 | mlib_s32 resA, resG, dstF, pathA, srcA, srcG; |
| 338 | mlib_d64 dscale; |
| 339 | |
| 340 | b = (fgColor) & 0xff; |
| 341 | g = (fgColor >> 8) & 0xff; |
| 342 | r = (fgColor >> 16) & 0xff; |
| 343 | srcA = (fgColor >> 24) & 0xff; |
| 344 | srcG = RGB2GRAY(r, g, b); |
| 345 | |
| 346 | #ifdef LOOPS_OLD_VERSION |
| 347 | if (srcA == 0) return; |
| 348 | |
| 349 | if (pMask == NULL) { |
| 350 | AnyByteSetRect(pRasInfo, 0, 0, width, height, srcG, pPrim, pCompInfo); |
| 351 | return; |
| 352 | } |
| 353 | #else |
| 354 | if (pMask == NULL) { |
| 355 | if (srcA == 0) srcG = 0; |
| 356 | ADD_SUFF(AnyByteSetRect)(pRasInfo, |
| 357 | pRasInfo->bounds.x1, pRasInfo->bounds.y1, |
| 358 | pRasInfo->bounds.x2, pRasInfo->bounds.y2, |
| 359 | srcG, pPrim, pCompInfo); |
| 360 | return; |
| 361 | } |
| 362 | #endif |
| 363 | |
| 364 | pMask += maskOff; |
| 365 | |
| 366 | if (width < 32) { |
| 367 | srcG = mul8table[srcA][srcG]; |
| 368 | |
| 369 | for (j = 0; j < height; j++) { |
| 370 | mlib_u8 *dst = rasBase; |
| 371 | |
| 372 | for (i = 0; i < width; i++) { |
| 373 | pathA = pMask[i]; |
| 374 | resG = dst[i]; |
| 375 | dstF = 0xff - pathA; |
| 376 | resA = dstF + mul8table[pathA][srcA]; |
| 377 | resG = mul8table[dstF][resG] + mul8table[pathA][srcG]; |
| 378 | resG = div8table[resA][resG]; |
| 379 | dst[i] = resG; |
| 380 | } |
| 381 | |
| 382 | PTR_ADD(rasBase, rasScan); |
| 383 | PTR_ADD(pMask, maskScan); |
| 384 | } |
| 385 | return; |
| 386 | } |
| 387 | |
| 388 | dscale = (mlib_d64)(1 << 15)*(1 << 16); |
| 389 | a0_S32[0] = dscale - 1; |
| 390 | #pragma pipeloop(0) |
| 391 | for (pathA = 1; pathA < 256; pathA++) { |
| 392 | dstF = 0xff - pathA; |
| 393 | resA = dstF + mul8table[pathA][srcA]; |
| 394 | dstF = dscale*dstF*vis_d64_div_tbl[resA]; |
| 395 | a0_S32[pathA] = dstF; |
| 396 | } |
| 397 | |
| 398 | vis_ByteGrayBlendMask(rasBase, pMask, rasScan, maskScan, |
| 399 | width, height, a0_S32, srcG); |
| 400 | } |
| 401 | |
| 402 | /***************************************************************/ |
| 403 | |
| 404 | void ADD_SUFF(ByteGraySrcOverMaskFill)(void *rasBase, |
| 405 | jubyte *pMask, |
| 406 | jint maskOff, |
| 407 | jint maskScan, |
| 408 | jint width, |
| 409 | jint height, |
| 410 | jint fgColor, |
| 411 | SurfaceDataRasInfo *pRasInfo, |
| 412 | NativePrimitive *pPrim, |
| 413 | CompositeInfo *pCompInfo) |
| 414 | { |
| 415 | mlib_s32 rasScan = pRasInfo->scanStride; |
| 416 | mlib_s32 r, g, b, i, j; |
| 417 | mlib_s32 dstA, pathA, srcA, srcG; |
| 418 | |
| 419 | b = (fgColor) & 0xff; |
| 420 | g = (fgColor >> 8) & 0xff; |
| 421 | r = (fgColor >> 16) & 0xff; |
| 422 | srcA = (fgColor >> 24) & 0xff; |
| 423 | srcG = RGB2GRAY(r, g, b); |
| 424 | |
| 425 | if (srcA == 0) return; |
| 426 | |
| 427 | if (pMask != NULL) pMask += maskOff; |
| 428 | |
| 429 | if (width < 16) { |
| 430 | srcG = mul8table[srcA][srcG]; |
| 431 | |
| 432 | if (pMask != NULL) { |
| 433 | for (j = 0; j < height; j++) { |
| 434 | mlib_u8 *dst = rasBase; |
| 435 | |
| 436 | for (i = 0; i < width; i++) { |
| 437 | pathA = pMask[i]; |
| 438 | dstA = 0xff - mul8table[pathA][srcA]; |
| 439 | dst[i] = mul8table[dstA][dst[i]] + mul8table[pathA][srcG]; |
| 440 | } |
| 441 | |
| 442 | PTR_ADD(rasBase, rasScan); |
| 443 | PTR_ADD(pMask, maskScan); |
| 444 | } |
| 445 | } else { |
| 446 | mlib_u8 *mul8_dstA = mul8table[0xff - srcA]; |
| 447 | |
| 448 | for (j = 0; j < height; j++) { |
| 449 | mlib_u8 *dst = rasBase; |
| 450 | |
| 451 | for (i = 0; i < width; i++) { |
| 452 | dst[i] = mul8_dstA[dst[i]] + srcG; |
| 453 | } |
| 454 | |
| 455 | PTR_ADD(rasBase, rasScan); |
| 456 | } |
| 457 | } |
| 458 | return; |
| 459 | } |
| 460 | |
| 461 | if (pMask != NULL) { |
| 462 | mlib_s32 a0_S32[256]; |
| 463 | mlib_d64 dscale = (mlib_d64)(1 << 15)*(1 << 16); |
| 464 | |
| 465 | a0_S32[0] = dscale - 1; |
| 466 | #pragma pipeloop(0) |
| 467 | for (pathA = 1; pathA < 256; pathA++) { |
| 468 | a0_S32[pathA] = dscale - pathA*srcA*(dscale*(1.0/(255*255))); |
| 469 | } |
| 470 | |
| 471 | vis_ByteGrayBlendMask(rasBase, pMask, rasScan, maskScan, |
| 472 | width, height, a0_S32, srcG); |
| 473 | } else { |
| 474 | mlib_s32 a0_int = (1 << 15)*(1.0 - srcA*(1.0/255)); |
| 475 | mlib_f32 a0, a1, srcG_f; |
| 476 | mlib_d64 d1; |
| 477 | mlib_d64 d_round = vis_to_double_dup(((1 << 16) | 1) << 6); |
| 478 | |
| 479 | srcG = (srcG << 8) | srcG; |
| 480 | srcG_f = vis_to_float((srcG << 16) | srcG); |
| 481 | |
| 482 | a0 = vis_to_float(a0_int); |
| 483 | a1 = vis_to_float(0x7FFF - a0_int); |
| 484 | d1 = vis_fmul8x16al(srcG_f, a1); |
| 485 | d1 = vis_fpadd16(d1, d_round); |
| 486 | |
| 487 | vis_ByteGrayBlend(rasBase, rasScan, width, height, a0, d1); |
| 488 | } |
| 489 | } |
| 490 | |
| 491 | /***************************************************************/ |
| 492 | |
| 493 | void ADD_SUFF(ByteGrayAlphaMaskFill)(void *rasBase, |
| 494 | jubyte *pMask, |
| 495 | jint maskOff, |
| 496 | jint maskScan, |
| 497 | jint width, |
| 498 | jint height, |
| 499 | jint fgColor, |
| 500 | SurfaceDataRasInfo *pRasInfo, |
| 501 | NativePrimitive *pPrim, |
| 502 | CompositeInfo *pCompInfo) |
| 503 | { |
| 504 | mlib_s32 rasScan = pRasInfo->scanStride; |
| 505 | mlib_s32 pathA, srcA, srcG, dstA, dstFbase, srcFbase; |
| 506 | mlib_s32 SrcOpAnd, SrcOpXor, SrcOpAdd; |
| 507 | mlib_s32 DstOpAnd, DstOpXor, DstOpAdd; |
| 508 | mlib_s32 r, g, b; |
| 509 | mlib_s32 resA, resG, srcF, i, j; |
| 510 | |
| 511 | b = (fgColor) & 0xff; |
| 512 | g = (fgColor >> 8) & 0xff; |
| 513 | r = (fgColor >> 16) & 0xff; |
| 514 | srcA = (fgColor >> 24) & 0xff; |
| 515 | srcG = RGB2GRAY(r, g, b); |
| 516 | |
| 517 | SrcOpAnd = (AlphaRules[pCompInfo->rule].srcOps).andval; |
| 518 | SrcOpXor = (AlphaRules[pCompInfo->rule].srcOps).xorval; |
| 519 | SrcOpAdd = (AlphaRules[pCompInfo->rule].srcOps).addval - SrcOpXor; |
| 520 | |
| 521 | DstOpAnd = (AlphaRules[pCompInfo->rule].dstOps).andval; |
| 522 | DstOpXor = (AlphaRules[pCompInfo->rule].dstOps).xorval; |
| 523 | DstOpAdd = (AlphaRules[pCompInfo->rule].dstOps).addval - DstOpXor; |
| 524 | |
| 525 | dstFbase = ((((srcA) & DstOpAnd) ^ DstOpXor) + DstOpAdd); |
| 526 | srcFbase = ((((0xff) & SrcOpAnd) ^ SrcOpXor) + SrcOpAdd); |
| 527 | |
| 528 | if (pMask != NULL) pMask += maskOff; |
| 529 | |
| 530 | srcG = mul8table[srcA][srcG]; |
| 531 | |
| 532 | if (width < 100) { |
| 533 | if (pMask != NULL) { |
| 534 | for (j = 0; j < height; j++) { |
| 535 | mlib_u8 *dst = rasBase; |
| 536 | |
| 537 | for (i = 0; i < width; i++) { |
| 538 | pathA = pMask[i]; |
| 539 | srcF = mul8table[pathA][srcFbase]; |
| 540 | dstA = 0xff - pathA + mul8table[pathA][dstFbase]; |
| 541 | |
| 542 | resA = dstA + mul8table[srcF][srcA]; |
| 543 | resG = mul8table[dstA][dst[i]] + mul8table[srcF][srcG]; |
| 544 | |
| 545 | dst[i] = div8table[resA][resG]; |
| 546 | } |
| 547 | |
| 548 | PTR_ADD(rasBase, rasScan); |
| 549 | PTR_ADD(pMask, maskScan); |
| 550 | } |
| 551 | } else { |
| 552 | mlib_u8 *mul8_dstA; |
| 553 | |
| 554 | srcF = srcFbase; |
| 555 | dstA = dstFbase; |
| 556 | resA = dstA + mul8table[srcF][srcA]; |
| 557 | srcG = mul8table[srcF][srcG]; |
| 558 | mul8_dstA = mul8table[dstA]; |
| 559 | |
| 560 | for (j = 0; j < height; j++) { |
| 561 | mlib_u8 *dst = rasBase; |
| 562 | |
| 563 | for (i = 0; i < width; i++) { |
| 564 | resG = mul8_dstA[dst[i]] + srcG; |
| 565 | dst[i] = div8table[resA][resG]; |
| 566 | } |
| 567 | |
| 568 | PTR_ADD(rasBase, rasScan); |
| 569 | } |
| 570 | } |
| 571 | return; |
| 572 | } |
| 573 | |
| 574 | if (pMask != NULL) { |
| 575 | mlib_s32 a0_S32[256]; |
| 576 | mlib_s16 d1_S16[256]; |
| 577 | mlib_d64 dscale = (mlib_d64)(1 << 15)*(1 << 16); |
| 578 | |
| 579 | a0_S32[0] = dscale - 1; |
| 580 | d1_S16[0] = (1 << 6); |
| 581 | #pragma pipeloop(0) |
| 582 | for (pathA = 1; pathA < 256; pathA++) { |
| 583 | srcF = mul8table[pathA][srcFbase]; |
| 584 | dstA = 0xff - pathA + mul8table[pathA][dstFbase]; |
| 585 | resA = dstA + mul8table[srcF][srcA]; |
| 586 | a0_S32[pathA] = dscale*dstA*vis_d64_div_tbl[resA] + (1 << 15); |
| 587 | d1_S16[pathA] = (1 << 7)*srcG*srcF*vis_d64_div_tbl[resA] + (1 << 6); |
| 588 | } |
| 589 | |
| 590 | vis_ByteGrayBlendMask2(rasBase, pMask, rasScan, maskScan, |
| 591 | width, height, a0_S32, d1_S16); |
| 592 | } else { |
| 593 | mlib_d64 dscale = (mlib_d64)(1 << 15)*(1 << 16); |
| 594 | mlib_s32 _a0, _d1; |
| 595 | mlib_f32 a0; |
| 596 | mlib_d64 d1; |
| 597 | |
| 598 | srcF = srcFbase; |
| 599 | dstA = dstFbase; |
| 600 | resA = dstA + mul8table[srcF][srcA]; |
| 601 | _a0 = dscale*dstA*vis_d64_div_tbl[resA] + (1 << 15); |
| 602 | _d1 = (1 << 7)*vis_d64_div_tbl[resA]*srcF*srcG + (1 << 6); |
| 603 | |
| 604 | a0 = vis_to_float(_a0 >> 16); |
| 605 | d1 = vis_to_double_dup((_d1 << 16) | _d1); |
| 606 | |
| 607 | vis_ByteGrayBlend(rasBase, rasScan, width, height, a0, d1); |
| 608 | } |
| 609 | } |
| 610 | |
| 611 | /***************************************************************/ |
| 612 | |
| 613 | #define TBL_MUL ((mlib_s16*)vis_mul8s_tbl + 1) |
| 614 | |
| 615 | void ADD_SUFF(ByteGrayDrawGlyphListAA)(GLYPH_LIST_PARAMS) |
| 616 | { |
| 617 | mlib_s32 glyphCounter; |
| 618 | mlib_s32 scan = pRasInfo->scanStride; |
| 619 | mlib_u8 *pPix; |
| 620 | mlib_s32 srcG; |
| 621 | int i, j, r, g, b; |
| 622 | mlib_d64 mix0, mix1, dd, d0, d1, e0, e1, fgpixel_d; |
| 623 | mlib_d64 done, d_half; |
| 624 | mlib_s32 pix, mask0, mask1; |
| 625 | mlib_f32 fgpixel_f, srcG_f; |
| 626 | |
| 627 | b = (argbcolor) & 0xff; |
| 628 | g = (argbcolor >> 8) & 0xff; |
| 629 | r = (argbcolor >> 16) & 0xff; |
| 630 | srcG = RGB2GRAY(r, g, b); |
| 631 | |
| 632 | if (clipRight - clipLeft >= 16) { |
| 633 | done = vis_to_double_dup(0x7fff7fff); |
| 634 | d_half = vis_to_double_dup((1 << (16 + 6)) | (1 << 6)); |
| 635 | |
| 636 | fgpixel &= 0xff; |
| 637 | fgpixel_f = F32_FROM_U8x4(fgpixel, fgpixel, fgpixel, fgpixel); |
| 638 | fgpixel_d = vis_freg_pair(fgpixel_f, fgpixel_f); |
| 639 | srcG_f = F32_FROM_U8x4(srcG, srcG, srcG, srcG); |
| 640 | |
| 641 | vis_write_gsr((0 << 3) | 6); |
| 642 | } |
| 643 | |
| 644 | for (glyphCounter = 0; glyphCounter < totalGlyphs; glyphCounter++) { |
| 645 | const jubyte *pixels; |
| 646 | unsigned int rowBytes; |
| 647 | int left, top; |
| 648 | int width, height; |
| 649 | int right, bottom; |
| 650 | |
| 651 | pixels = (const jubyte *) glyphs[glyphCounter].pixels; |
| 652 | |
| 653 | if (!pixels) continue; |
| 654 | |
| 655 | left = glyphs[glyphCounter].x; |
| 656 | top = glyphs[glyphCounter].y; |
| 657 | width = glyphs[glyphCounter].width; |
| 658 | height = glyphs[glyphCounter].height; |
| 659 | rowBytes = width; |
| 660 | right = left + width; |
| 661 | bottom = top + height; |
| 662 | if (left < clipLeft) { |
| 663 | pixels += clipLeft - left; |
| 664 | left = clipLeft; |
| 665 | } |
| 666 | if (top < clipTop) { |
| 667 | pixels += (clipTop - top) * rowBytes; |
| 668 | top = clipTop; |
| 669 | } |
| 670 | if (right > clipRight) { |
| 671 | right = clipRight; |
| 672 | } |
| 673 | if (bottom > clipBottom) { |
| 674 | bottom = clipBottom; |
| 675 | } |
| 676 | if (right <= left || bottom <= top) { |
| 677 | continue; |
| 678 | } |
| 679 | width = right - left; |
| 680 | height = bottom - top; |
| 681 | |
| 682 | pPix = pRasInfo->rasBase; |
| 683 | PTR_ADD(pPix, top * scan + left); |
| 684 | |
| 685 | if (width < 16) { |
| 686 | for (j = 0; j < height; j++) { |
| 687 | for (i = 0; i < width; i++) { |
| 688 | jint dstG; |
| 689 | jint mixValSrc = pixels[i]; |
| 690 | if (mixValSrc) { |
| 691 | if (mixValSrc < 255) { |
| 692 | jint mixValDst = 255 - mixValSrc; |
| 693 | dstG = pPix[i]; |
| 694 | dstG = |
| 695 | mul8table[mixValDst][dstG] + |
| 696 | mul8table[mixValSrc][srcG]; |
| 697 | pPix[i] = dstG; |
| 698 | } else { |
| 699 | pPix[i] = fgpixel; |
| 700 | } |
| 701 | } |
| 702 | } |
| 703 | |
| 704 | PTR_ADD(pPix, scan); |
| 705 | pixels += rowBytes; |
| 706 | } |
| 707 | } else { |
| 708 | for (j = 0; j < height; j++) { |
| 709 | mlib_u8 *src = (void*)pixels; |
| 710 | mlib_u8 *dst = pPix; |
| 711 | mlib_u8 *dst_end = dst + width; |
| 712 | |
| 713 | while (((mlib_s32)dst & 7) && dst < dst_end) { |
| 714 | pix = *src++; |
| 715 | d0 = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half); |
| 716 | d1 = MUL8_VIS(vis_read_lo(vis_ld_u8(dst)), 255 - pix); |
| 717 | dd = vis_fpadd16(d0, d1); |
| 718 | vis_st_u8(D64_FROM_F32x2(vis_fpack16(dd)), dst); |
| 719 | if (pix == 255) *dst = fgpixel; |
| 720 | dst++; |
| 721 | } |
| 722 | |
| 723 | #pragma pipeloop(0) |
| 724 | for (; dst <= (dst_end - 8); dst += 8) { |
| 725 | mix0 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*src[3]), mix0); |
| 726 | mix1 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*src[7]), mix1); |
| 727 | mix0 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*src[2]), mix0); |
| 728 | mix1 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*src[6]), mix1); |
| 729 | mix0 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*src[1]), mix0); |
| 730 | mix1 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*src[5]), mix1); |
| 731 | mix0 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*src[0]), mix0); |
| 732 | mix1 = vis_faligndata(vis_ld_u16(TBL_MUL + 2*src[4]), mix1); |
| 733 | src += 8; |
| 734 | |
| 735 | dd = *(mlib_d64*)dst; |
| 736 | d0 = vis_fpadd16(vis_fmul8x16(srcG_f, mix0), d_half); |
| 737 | d1 = vis_fpadd16(vis_fmul8x16(srcG_f, mix1), d_half); |
| 738 | e0 = vis_fmul8x16(vis_read_hi(dd), vis_fpsub16(done, mix0)); |
| 739 | e1 = vis_fmul8x16(vis_read_lo(dd), vis_fpsub16(done, mix1)); |
| 740 | d0 = vis_fpadd16(e0, d0); |
| 741 | d1 = vis_fpadd16(e1, d1); |
| 742 | dd = vis_fpack16_pair(d0, d1); |
| 743 | |
| 744 | mask0 = vis_fcmplt16(mix0, done); |
| 745 | mask1 = vis_fcmplt16(mix1, done); |
| 746 | |
| 747 | *(mlib_d64*)dst = fgpixel_d; |
| 748 | vis_pst_8(dd, dst, (mask0 << 4) | mask1); |
| 749 | } |
| 750 | |
| 751 | while (dst < dst_end) { |
| 752 | pix = *src++; |
| 753 | d0 = vis_fpadd16(MUL8_VIS(srcG_f, pix), d_half); |
| 754 | d1 = MUL8_VIS(vis_read_lo(vis_ld_u8(dst)), 255 - pix); |
| 755 | dd = vis_fpadd16(d0, d1); |
| 756 | vis_st_u8(D64_FROM_F32x2(vis_fpack16(dd)), dst); |
| 757 | if (pix == 255) *dst = fgpixel; |
| 758 | dst++; |
| 759 | } |
| 760 | |
| 761 | PTR_ADD(pPix, scan); |
| 762 | pixels += rowBytes; |
| 763 | } |
| 764 | } |
| 765 | } |
| 766 | } |
| 767 | |
| 768 | /***************************************************************/ |
| 769 | |
| 770 | #endif |