mtklein | 4977983 | 2015-08-10 12:58:17 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2015 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #ifndef SkBlitMask_opts_DEFINED |
| 9 | #define SkBlitMask_opts_DEFINED |
| 10 | |
| 11 | #include "Sk4px.h" |
| 12 | |
| 13 | namespace SK_OPTS_NS { |
| 14 | |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame^] | 15 | #if defined(SK_ARM_HAS_NEON) |
| 16 | // The Sk4px versions below will work fine with NEON, but we have had many indications |
| 17 | // that it doesn't perform as well as this NEON-specific code. TODO(mtklein): why? |
| 18 | #include "SkColor_opts_neon.h" |
| 19 | |
| 20 | template <bool isColor> |
| 21 | static void D32_A8_Opaque_Color_neon(void* SK_RESTRICT dst, size_t dstRB, |
| 22 | const void* SK_RESTRICT maskPtr, size_t maskRB, |
| 23 | SkColor color, int width, int height) { |
| 24 | SkPMColor pmc = SkPreMultiplyColor(color); |
| 25 | SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; |
| 26 | const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; |
| 27 | uint8x8x4_t vpmc; |
| 28 | |
| 29 | maskRB -= width; |
| 30 | dstRB -= (width << 2); |
| 31 | |
| 32 | if (width >= 8) { |
| 33 | vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc)); |
| 34 | vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc)); |
| 35 | vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc)); |
| 36 | vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc)); |
mtklein | 12d40c1 | 2015-09-01 11:03:11 -0700 | [diff] [blame] | 37 | } |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame^] | 38 | do { |
| 39 | int w = width; |
| 40 | while (w >= 8) { |
| 41 | uint8x8_t vmask = vld1_u8(mask); |
| 42 | uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask); |
| 43 | if (isColor) { |
| 44 | vscale = vsubw_u8(vdupq_n_u16(256), |
| 45 | SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)); |
| 46 | } else { |
| 47 | vscale = vsubw_u8(vdupq_n_u16(256), vmask); |
| 48 | } |
| 49 | uint8x8x4_t vdev = vld4_u8((uint8_t*)device); |
| 50 | |
| 51 | vdev.val[NEON_A] = SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256) |
| 52 | + SkAlphaMul_neon8(vdev.val[NEON_A], vscale); |
| 53 | vdev.val[NEON_R] = SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256) |
| 54 | + SkAlphaMul_neon8(vdev.val[NEON_R], vscale); |
| 55 | vdev.val[NEON_G] = SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256) |
| 56 | + SkAlphaMul_neon8(vdev.val[NEON_G], vscale); |
| 57 | vdev.val[NEON_B] = SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256) |
| 58 | + SkAlphaMul_neon8(vdev.val[NEON_B], vscale); |
| 59 | |
| 60 | vst4_u8((uint8_t*)device, vdev); |
| 61 | |
| 62 | mask += 8; |
| 63 | device += 8; |
| 64 | w -= 8; |
| 65 | } |
| 66 | |
| 67 | while (w--) { |
| 68 | unsigned aa = *mask++; |
| 69 | if (isColor) { |
| 70 | *device = SkBlendARGB32(pmc, *device, aa); |
| 71 | } else { |
| 72 | *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa)) |
| 73 | + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); |
| 74 | } |
| 75 | device += 1; |
| 76 | }; |
| 77 | |
| 78 | device = (uint32_t*)((char*)device + dstRB); |
| 79 | mask += maskRB; |
| 80 | |
| 81 | } while (--height != 0); |
mtklein | 5015176 | 2015-08-26 12:35:14 -0700 | [diff] [blame] | 82 | } |
mtklein | 12d40c1 | 2015-09-01 11:03:11 -0700 | [diff] [blame] | 83 | |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame^] | 84 | static void blit_mask_d32_a8_general(SkPMColor* dst, size_t dstRB, |
| 85 | const SkAlpha* mask, size_t maskRB, |
| 86 | SkColor color, int w, int h) { |
| 87 | D32_A8_Opaque_Color_neon<true>(dst, dstRB, mask, maskRB, color, w, h); |
| 88 | } |
mtklein | 12d40c1 | 2015-09-01 11:03:11 -0700 | [diff] [blame] | 89 | |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame^] | 90 | // As above, but made slightly simpler by requiring that color is opaque. |
| 91 | static void blit_mask_d32_a8_opaque(SkPMColor* dst, size_t dstRB, |
| 92 | const SkAlpha* mask, size_t maskRB, |
| 93 | SkColor color, int w, int h) { |
| 94 | D32_A8_Opaque_Color_neon<false>(dst, dstRB, mask, maskRB, color, w, h); |
| 95 | } |
| 96 | |
| 97 | // Same as _opaque, but assumes color == SK_ColorBLACK, a very common and even simpler case. |
| 98 | static void blit_mask_d32_a8_black(SkPMColor* dst, size_t dstRB, |
| 99 | const SkAlpha* maskPtr, size_t maskRB, |
| 100 | int width, int height) { |
| 101 | SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; |
| 102 | const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; |
| 103 | |
| 104 | maskRB -= width; |
| 105 | dstRB -= (width << 2); |
| 106 | do { |
| 107 | int w = width; |
| 108 | while (w >= 8) { |
| 109 | uint8x8_t vmask = vld1_u8(mask); |
| 110 | uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask); |
| 111 | uint8x8x4_t vdevice = vld4_u8((uint8_t*)device); |
| 112 | |
| 113 | vdevice = SkAlphaMulQ_neon8(vdevice, vscale); |
| 114 | vdevice.val[NEON_A] += vmask; |
| 115 | |
| 116 | vst4_u8((uint8_t*)device, vdevice); |
| 117 | |
| 118 | mask += 8; |
| 119 | device += 8; |
| 120 | w -= 8; |
| 121 | } |
| 122 | while (w-- > 0) { |
| 123 | unsigned aa = *mask++; |
| 124 | *device = (aa << SK_A32_SHIFT) |
| 125 | + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); |
| 126 | device += 1; |
| 127 | }; |
| 128 | device = (uint32_t*)((char*)device + dstRB); |
| 129 | mask += maskRB; |
| 130 | } while (--height != 0); |
| 131 | } |
| 132 | |
| 133 | #else |
| 134 | static void blit_mask_d32_a8_general(SkPMColor* dst, size_t dstRB, |
| 135 | const SkAlpha* mask, size_t maskRB, |
| 136 | SkColor color, int w, int h) { |
| 137 | auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color)); |
| 138 | auto fn = [&](const Sk4px& d, const Sk4px& aa) { |
| 139 | // = (s + d(1-sa))aa + d(1-aa) |
| 140 | // = s*aa + d(1-sa*aa) |
| 141 | auto left = s.approxMulDiv255(aa), |
| 142 | right = d.approxMulDiv255(left.alphas().inv()); |
| 143 | return left + right; // This does not overflow (exhaustively checked). |
mtklein | 12d40c1 | 2015-09-01 11:03:11 -0700 | [diff] [blame] | 144 | }; |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame^] | 145 | while (h --> 0) { |
| 146 | Sk4px::MapDstAlpha(w, dst, mask, fn); |
| 147 | dst += dstRB / sizeof(*dst); |
| 148 | mask += maskRB / sizeof(*mask); |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | // As above, but made slightly simpler by requiring that color is opaque. |
| 153 | static void blit_mask_d32_a8_opaque(SkPMColor* dst, size_t dstRB, |
| 154 | const SkAlpha* mask, size_t maskRB, |
| 155 | SkColor color, int w, int h) { |
| 156 | SkASSERT(SkColorGetA(color) == 0xFF); |
| 157 | auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color)); |
| 158 | auto fn = [&](const Sk4px& d, const Sk4px& aa) { |
mtklein | 12d40c1 | 2015-09-01 11:03:11 -0700 | [diff] [blame] | 159 | // = (s + d(1-sa))aa + d(1-aa) |
| 160 | // = s*aa + d(1-sa*aa) |
| 161 | // ~~~> |
| 162 | // = s*aa + d(1-aa) |
| 163 | return s.approxMulDiv255(aa) + d.approxMulDiv255(aa.inv()); |
| 164 | }; |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame^] | 165 | while (h --> 0) { |
| 166 | Sk4px::MapDstAlpha(w, dst, mask, fn); |
| 167 | dst += dstRB / sizeof(*dst); |
| 168 | mask += maskRB / sizeof(*mask); |
| 169 | } |
| 170 | } |
| 171 | |
| 172 | // Same as _opaque, but assumes color == SK_ColorBLACK, a very common and even simpler case. |
| 173 | static void blit_mask_d32_a8_black(SkPMColor* dst, size_t dstRB, |
| 174 | const SkAlpha* mask, size_t maskRB, |
| 175 | int w, int h) { |
| 176 | auto fn = [](const Sk4px& d, const Sk4px& aa) { |
| 177 | // = (s + d(1-sa))aa + d(1-aa) |
| 178 | // = s*aa + d(1-sa*aa) |
| 179 | // ~~~> |
| 180 | // a = 1*aa + d(1-1*aa) = aa + d(1-aa) |
| 181 | // c = 0*aa + d(1-1*aa) = d(1-aa) |
| 182 | return aa.zeroColors() + d.approxMulDiv255(aa.inv()); |
mtklein | e8e17cf | 2015-11-06 14:10:48 -0800 | [diff] [blame] | 183 | }; |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame^] | 184 | while (h --> 0) { |
| 185 | Sk4px::MapDstAlpha(w, dst, mask, fn); |
| 186 | dst += dstRB / sizeof(*dst); |
| 187 | mask += maskRB / sizeof(*mask); |
| 188 | } |
| 189 | } |
| 190 | #endif |
| 191 | |
| 192 | static void blit_mask_d32_a8(SkPMColor* dst, size_t dstRB, |
| 193 | const SkAlpha* mask, size_t maskRB, |
| 194 | SkColor color, int w, int h) { |
| 195 | if (color == SK_ColorBLACK) { |
| 196 | blit_mask_d32_a8_black(dst, dstRB, mask, maskRB, w, h); |
| 197 | } else if (SkColorGetA(color) == 0xFF) { |
| 198 | blit_mask_d32_a8_opaque(dst, dstRB, mask, maskRB, color, w, h); |
| 199 | } else { |
| 200 | blit_mask_d32_a8_general(dst, dstRB, mask, maskRB, color, w, h); |
mtklein | 5015176 | 2015-08-26 12:35:14 -0700 | [diff] [blame] | 201 | } |
| 202 | } |
| 203 | |
mtklein | 4977983 | 2015-08-10 12:58:17 -0700 | [diff] [blame] | 204 | } // SK_OPTS_NS |
| 205 | |
| 206 | #endif//SkBlitMask_opts_DEFINED |