mtklein | 4977983 | 2015-08-10 12:58:17 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2015 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #ifndef SkBlitMask_opts_DEFINED |
| 9 | #define SkBlitMask_opts_DEFINED |
| 10 | |
Mike Klein | c0bd9f9 | 2019-04-23 12:05:21 -0500 | [diff] [blame] | 11 | #include "src/core/Sk4px.h" |
mtklein | 4977983 | 2015-08-10 12:58:17 -0700 | [diff] [blame] | 12 | |
| 13 | namespace SK_OPTS_NS { |
| 14 | |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame] | 15 | #if defined(SK_ARM_HAS_NEON) |
| 16 | // The Sk4px versions below will work fine with NEON, but we have had many indications |
| 17 | // that it doesn't perform as well as this NEON-specific code. TODO(mtklein): why? |
Mike Klein | 16777f7 | 2018-12-04 14:44:59 -0500 | [diff] [blame] | 18 | |
| 19 | #define NEON_A (SK_A32_SHIFT / 8) |
| 20 | #define NEON_R (SK_R32_SHIFT / 8) |
| 21 | #define NEON_G (SK_G32_SHIFT / 8) |
| 22 | #define NEON_B (SK_B32_SHIFT / 8) |
| 23 | |
| 24 | static inline uint16x8_t SkAlpha255To256_neon8(uint8x8_t alpha) { |
| 25 | return vaddw_u8(vdupq_n_u16(1), alpha); |
| 26 | } |
| 27 | |
| 28 | static inline uint8x8_t SkAlphaMul_neon8(uint8x8_t color, uint16x8_t scale) { |
| 29 | return vshrn_n_u16(vmovl_u8(color) * scale, 8); |
| 30 | } |
| 31 | |
| 32 | static inline uint8x8x4_t SkAlphaMulQ_neon8(uint8x8x4_t color, uint16x8_t scale) { |
| 33 | uint8x8x4_t ret; |
| 34 | |
| 35 | ret.val[0] = SkAlphaMul_neon8(color.val[0], scale); |
| 36 | ret.val[1] = SkAlphaMul_neon8(color.val[1], scale); |
| 37 | ret.val[2] = SkAlphaMul_neon8(color.val[2], scale); |
| 38 | ret.val[3] = SkAlphaMul_neon8(color.val[3], scale); |
| 39 | |
| 40 | return ret; |
| 41 | } |
| 42 | |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame] | 43 | |
| 44 | template <bool isColor> |
| 45 | static void D32_A8_Opaque_Color_neon(void* SK_RESTRICT dst, size_t dstRB, |
| 46 | const void* SK_RESTRICT maskPtr, size_t maskRB, |
| 47 | SkColor color, int width, int height) { |
| 48 | SkPMColor pmc = SkPreMultiplyColor(color); |
| 49 | SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; |
| 50 | const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; |
| 51 | uint8x8x4_t vpmc; |
| 52 | |
| 53 | maskRB -= width; |
| 54 | dstRB -= (width << 2); |
| 55 | |
| 56 | if (width >= 8) { |
| 57 | vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc)); |
| 58 | vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc)); |
| 59 | vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc)); |
| 60 | vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc)); |
mtklein | 12d40c1 | 2015-09-01 11:03:11 -0700 | [diff] [blame] | 61 | } |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame] | 62 | do { |
| 63 | int w = width; |
| 64 | while (w >= 8) { |
| 65 | uint8x8_t vmask = vld1_u8(mask); |
| 66 | uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask); |
| 67 | if (isColor) { |
| 68 | vscale = vsubw_u8(vdupq_n_u16(256), |
| 69 | SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)); |
| 70 | } else { |
| 71 | vscale = vsubw_u8(vdupq_n_u16(256), vmask); |
| 72 | } |
| 73 | uint8x8x4_t vdev = vld4_u8((uint8_t*)device); |
| 74 | |
| 75 | vdev.val[NEON_A] = SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256) |
| 76 | + SkAlphaMul_neon8(vdev.val[NEON_A], vscale); |
| 77 | vdev.val[NEON_R] = SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256) |
| 78 | + SkAlphaMul_neon8(vdev.val[NEON_R], vscale); |
| 79 | vdev.val[NEON_G] = SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256) |
| 80 | + SkAlphaMul_neon8(vdev.val[NEON_G], vscale); |
| 81 | vdev.val[NEON_B] = SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256) |
| 82 | + SkAlphaMul_neon8(vdev.val[NEON_B], vscale); |
| 83 | |
| 84 | vst4_u8((uint8_t*)device, vdev); |
| 85 | |
| 86 | mask += 8; |
| 87 | device += 8; |
| 88 | w -= 8; |
| 89 | } |
| 90 | |
| 91 | while (w--) { |
| 92 | unsigned aa = *mask++; |
| 93 | if (isColor) { |
| 94 | *device = SkBlendARGB32(pmc, *device, aa); |
| 95 | } else { |
| 96 | *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa)) |
| 97 | + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); |
| 98 | } |
| 99 | device += 1; |
Brian Osman | 7e2c061 | 2019-03-20 13:00:48 -0400 | [diff] [blame] | 100 | } |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame] | 101 | |
| 102 | device = (uint32_t*)((char*)device + dstRB); |
| 103 | mask += maskRB; |
| 104 | |
| 105 | } while (--height != 0); |
mtklein | 5015176 | 2015-08-26 12:35:14 -0700 | [diff] [blame] | 106 | } |
mtklein | 12d40c1 | 2015-09-01 11:03:11 -0700 | [diff] [blame] | 107 | |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame] | 108 | static void blit_mask_d32_a8_general(SkPMColor* dst, size_t dstRB, |
| 109 | const SkAlpha* mask, size_t maskRB, |
| 110 | SkColor color, int w, int h) { |
| 111 | D32_A8_Opaque_Color_neon<true>(dst, dstRB, mask, maskRB, color, w, h); |
| 112 | } |
mtklein | 12d40c1 | 2015-09-01 11:03:11 -0700 | [diff] [blame] | 113 | |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame] | 114 | // As above, but made slightly simpler by requiring that color is opaque. |
| 115 | static void blit_mask_d32_a8_opaque(SkPMColor* dst, size_t dstRB, |
| 116 | const SkAlpha* mask, size_t maskRB, |
| 117 | SkColor color, int w, int h) { |
| 118 | D32_A8_Opaque_Color_neon<false>(dst, dstRB, mask, maskRB, color, w, h); |
| 119 | } |
| 120 | |
| 121 | // Same as _opaque, but assumes color == SK_ColorBLACK, a very common and even simpler case. |
| 122 | static void blit_mask_d32_a8_black(SkPMColor* dst, size_t dstRB, |
| 123 | const SkAlpha* maskPtr, size_t maskRB, |
| 124 | int width, int height) { |
| 125 | SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; |
| 126 | const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; |
| 127 | |
| 128 | maskRB -= width; |
| 129 | dstRB -= (width << 2); |
| 130 | do { |
| 131 | int w = width; |
| 132 | while (w >= 8) { |
| 133 | uint8x8_t vmask = vld1_u8(mask); |
| 134 | uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask); |
| 135 | uint8x8x4_t vdevice = vld4_u8((uint8_t*)device); |
| 136 | |
| 137 | vdevice = SkAlphaMulQ_neon8(vdevice, vscale); |
| 138 | vdevice.val[NEON_A] += vmask; |
| 139 | |
| 140 | vst4_u8((uint8_t*)device, vdevice); |
| 141 | |
| 142 | mask += 8; |
| 143 | device += 8; |
| 144 | w -= 8; |
| 145 | } |
| 146 | while (w-- > 0) { |
| 147 | unsigned aa = *mask++; |
| 148 | *device = (aa << SK_A32_SHIFT) |
| 149 | + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); |
| 150 | device += 1; |
Brian Osman | 7e2c061 | 2019-03-20 13:00:48 -0400 | [diff] [blame] | 151 | } |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame] | 152 | device = (uint32_t*)((char*)device + dstRB); |
| 153 | mask += maskRB; |
| 154 | } while (--height != 0); |
| 155 | } |
| 156 | |
| 157 | #else |
| 158 | static void blit_mask_d32_a8_general(SkPMColor* dst, size_t dstRB, |
| 159 | const SkAlpha* mask, size_t maskRB, |
| 160 | SkColor color, int w, int h) { |
| 161 | auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color)); |
| 162 | auto fn = [&](const Sk4px& d, const Sk4px& aa) { |
| 163 | // = (s + d(1-sa))aa + d(1-aa) |
| 164 | // = s*aa + d(1-sa*aa) |
| 165 | auto left = s.approxMulDiv255(aa), |
| 166 | right = d.approxMulDiv255(left.alphas().inv()); |
| 167 | return left + right; // This does not overflow (exhaustively checked). |
mtklein | 12d40c1 | 2015-09-01 11:03:11 -0700 | [diff] [blame] | 168 | }; |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame] | 169 | while (h --> 0) { |
| 170 | Sk4px::MapDstAlpha(w, dst, mask, fn); |
| 171 | dst += dstRB / sizeof(*dst); |
| 172 | mask += maskRB / sizeof(*mask); |
| 173 | } |
| 174 | } |
| 175 | |
| 176 | // As above, but made slightly simpler by requiring that color is opaque. |
| 177 | static void blit_mask_d32_a8_opaque(SkPMColor* dst, size_t dstRB, |
| 178 | const SkAlpha* mask, size_t maskRB, |
| 179 | SkColor color, int w, int h) { |
| 180 | SkASSERT(SkColorGetA(color) == 0xFF); |
| 181 | auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color)); |
| 182 | auto fn = [&](const Sk4px& d, const Sk4px& aa) { |
mtklein | 12d40c1 | 2015-09-01 11:03:11 -0700 | [diff] [blame] | 183 | // = (s + d(1-sa))aa + d(1-aa) |
| 184 | // = s*aa + d(1-sa*aa) |
| 185 | // ~~~> |
| 186 | // = s*aa + d(1-aa) |
| 187 | return s.approxMulDiv255(aa) + d.approxMulDiv255(aa.inv()); |
| 188 | }; |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame] | 189 | while (h --> 0) { |
| 190 | Sk4px::MapDstAlpha(w, dst, mask, fn); |
| 191 | dst += dstRB / sizeof(*dst); |
| 192 | mask += maskRB / sizeof(*mask); |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | // Same as _opaque, but assumes color == SK_ColorBLACK, a very common and even simpler case. |
| 197 | static void blit_mask_d32_a8_black(SkPMColor* dst, size_t dstRB, |
| 198 | const SkAlpha* mask, size_t maskRB, |
| 199 | int w, int h) { |
| 200 | auto fn = [](const Sk4px& d, const Sk4px& aa) { |
| 201 | // = (s + d(1-sa))aa + d(1-aa) |
| 202 | // = s*aa + d(1-sa*aa) |
| 203 | // ~~~> |
| 204 | // a = 1*aa + d(1-1*aa) = aa + d(1-aa) |
| 205 | // c = 0*aa + d(1-1*aa) = d(1-aa) |
Mike Klein | 7dfe6d9 | 2018-12-18 14:53:37 -0500 | [diff] [blame] | 206 | return Sk4px(Sk16b(aa) & Sk16b(0,0,0,255, 0,0,0,255, 0,0,0,255, 0,0,0,255)) |
| 207 | + d.approxMulDiv255(aa.inv()); |
mtklein | e8e17cf | 2015-11-06 14:10:48 -0800 | [diff] [blame] | 208 | }; |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame] | 209 | while (h --> 0) { |
| 210 | Sk4px::MapDstAlpha(w, dst, mask, fn); |
| 211 | dst += dstRB / sizeof(*dst); |
| 212 | mask += maskRB / sizeof(*mask); |
| 213 | } |
| 214 | } |
| 215 | #endif |
| 216 | |
Mike Klein | cd71f11 | 2017-08-23 11:11:55 -0400 | [diff] [blame] | 217 | /*not static*/ inline void blit_mask_d32_a8(SkPMColor* dst, size_t dstRB, |
| 218 | const SkAlpha* mask, size_t maskRB, |
| 219 | SkColor color, int w, int h) { |
mtklein | 9b34114 | 2015-11-18 18:59:18 -0800 | [diff] [blame] | 220 | if (color == SK_ColorBLACK) { |
| 221 | blit_mask_d32_a8_black(dst, dstRB, mask, maskRB, w, h); |
| 222 | } else if (SkColorGetA(color) == 0xFF) { |
| 223 | blit_mask_d32_a8_opaque(dst, dstRB, mask, maskRB, color, w, h); |
| 224 | } else { |
| 225 | blit_mask_d32_a8_general(dst, dstRB, mask, maskRB, color, w, h); |
mtklein | 5015176 | 2015-08-26 12:35:14 -0700 | [diff] [blame] | 226 | } |
| 227 | } |
| 228 | |
mtklein | 4977983 | 2015-08-10 12:58:17 -0700 | [diff] [blame] | 229 | } // SK_OPTS_NS |
| 230 | |
| 231 | #endif//SkBlitMask_opts_DEFINED |