Blame - include/core/SkColorPriv.h - fp2-dev/platform/external/chromium_org/third_party/skia

blob: 041c0380a0e397c4fb55964ed06814aa94704bb5 [file] [log] [blame]

reed@android.com	8a1c16f	2008-12-17 15:59:43 +0000	[diff] [blame^]	1	/*
				2	* Copyright (C) 2006 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	#ifndef SkColorPriv_DEFINED
				18	#define SkColorPriv_DEFINED
				19
				20	// turn this own for extra debug checking when blending onto 565
				21	#ifdef SK_DEBUG
				22	#define CHECK_FOR_565_OVERFLOW
				23	#endif
				24
				25	#include "SkColor.h"
				26	#include "SkMath.h"
				27
				28	/** Turn 0..255 into 0..256 by adding 1 at the half-way point. Used to turn a
				29	byte into a scale value, so that we can say scale * value >> 8 instead of
				30	alpha * value / 255.
				31
				32	In debugging, asserts that alpha is 0..255
				33	*/
				34	static inline unsigned SkAlpha255To256(U8CPU alpha) {
				35	SkASSERT(SkToU8(alpha) == alpha);
				36	return alpha + (alpha >> 7);
				37	}
				38
				39	/** Multiplify value by 0..256, and shift the result down 8
				40	(i.e. return (value * alpha256) >> 8)
				41	*/
				42	#define SkAlphaMul(value, alpha256) (SkMulS16(value, alpha256) >> 8)
				43
				44	// The caller may want negative values, so keep all params signed (int)
				45	// so we don't accidentally slip into unsigned math and lose the sign
				46	// extension when we shift (in SkAlphaMul)
				47	inline int SkAlphaBlend(int src, int dst, int scale256) {
				48	SkASSERT((unsigned)scale256 <= 256);
				49	return dst + SkAlphaMul(src - dst, scale256);
				50	}
				51
				52	#define SK_R16_BITS 5
				53	#define SK_G16_BITS 6
				54	#define SK_B16_BITS 5
				55
				56	#define SK_R16_SHIFT (SK_B16_BITS + SK_G16_BITS)
				57	#define SK_G16_SHIFT (SK_B16_BITS)
				58	#define SK_B16_SHIFT 0
				59
				60	#define SK_R16_MASK ((1 << SK_R16_BITS) - 1)
				61	#define SK_G16_MASK ((1 << SK_G16_BITS) - 1)
				62	#define SK_B16_MASK ((1 << SK_B16_BITS) - 1)
				63
				64	#define SkGetPackedR16(color) (((unsigned)(color) >> SK_R16_SHIFT) & SK_R16_MASK)
				65	#define SkGetPackedG16(color) (((unsigned)(color) >> SK_G16_SHIFT) & SK_G16_MASK)
				66	#define SkGetPackedB16(color) (((unsigned)(color) >> SK_B16_SHIFT) & SK_B16_MASK)
				67
				68	#define SkR16Assert(r) SkASSERT((unsigned)(r) <= SK_R16_MASK)
				69	#define SkG16Assert(g) SkASSERT((unsigned)(g) <= SK_G16_MASK)
				70	#define SkB16Assert(b) SkASSERT((unsigned)(b) <= SK_B16_MASK)
				71
				72	static inline uint16_t SkPackRGB16(unsigned r, unsigned g, unsigned b) {
				73	SkASSERT(r <= SK_R16_MASK);
				74	SkASSERT(g <= SK_G16_MASK);
				75	SkASSERT(b <= SK_B16_MASK);
				76
				77	return SkToU16((r << SK_R16_SHIFT) \| (g << SK_G16_SHIFT) \| (b << SK_B16_SHIFT));
				78	}
				79
				80	#define SK_R16_MASK_IN_PLACE (SK_R16_MASK << SK_R16_SHIFT)
				81	#define SK_G16_MASK_IN_PLACE (SK_G16_MASK << SK_G16_SHIFT)
				82	#define SK_B16_MASK_IN_PLACE (SK_B16_MASK << SK_B16_SHIFT)
				83
				84	/** Expand the 16bit color into a 32bit value that can be scaled all at once
				85	by a value up to 32. Used in conjunction with SkCompact_rgb_16.
				86	*/
				87	static inline uint32_t SkExpand_rgb_16(U16CPU c) {
				88	SkASSERT(c == (uint16_t)c);
				89
				90	return ((c & SK_G16_MASK_IN_PLACE) << 16) \| (c & ~SK_G16_MASK_IN_PLACE);
				91	}
				92
				93	/** Compress an expanded value (from SkExpand_rgb_16) back down to a 16bit
				94	color value. The computation yields only 16bits of valid data, but we claim
				95	to return 32bits, so that the compiler won't generate extra instructions to
				96	"clean" the top 16bits. However, the top 16 can contain garbage, so it is
				97	up to the caller to safely ignore them.
				98	*/
				99	static inline U16CPU SkCompact_rgb_16(uint32_t c) {
				100	return ((c >> 16) & SK_G16_MASK_IN_PLACE) \| (c & ~SK_G16_MASK_IN_PLACE);
				101	}
				102
				103	/** Scale the 16bit color value by the 0..256 scale parameter.
				104	The computation yields only 16bits of valid data, but we claim
				105	to return 32bits, so that the compiler won't generate extra instructions to
				106	"clean" the top 16bits.
				107	*/
				108	static inline U16CPU SkAlphaMulRGB16(U16CPU c, unsigned scale) {
				109	return SkCompact_rgb_16(SkExpand_rgb_16(c) * (scale >> 3) >> 5);
				110	}
				111
				112	// this helper explicitly returns a clean 16bit value (but slower)
				113	#define SkAlphaMulRGB16_ToU16(c, s) (uint16_t)SkAlphaMulRGB16(c, s)
				114
				115	/** Blend src and dst 16bit colors by the 0..256 scale parameter.
				116	The computation yields only 16bits of valid data, but we claim
				117	to return 32bits, so that the compiler won't generate extra instructions to
				118	"clean" the top 16bits.
				119	*/
				120	static inline U16CPU SkBlendRGB16(U16CPU src, U16CPU dst, int srcScale) {
				121	SkASSERT((unsigned)srcScale <= 256);
				122
				123	srcScale >>= 3;
				124
				125	uint32_t src32 = SkExpand_rgb_16(src);
				126	uint32_t dst32 = SkExpand_rgb_16(dst);
				127	return SkCompact_rgb_16(dst32 + ((src32 - dst32) * srcScale >> 5));
				128	}
				129
				130	static inline void SkBlendRGB16(const uint16_t src[], uint16_t dst[],
				131	int srcScale, int count) {
				132	SkASSERT(count > 0);
				133	SkASSERT((unsigned)srcScale <= 256);
				134
				135	srcScale >>= 3;
				136
				137	do {
				138	uint32_t src32 = SkExpand_rgb_16(*src++);
				139	uint32_t dst32 = SkExpand_rgb_16(*dst);
				140	dst++ = SkCompact_rgb_16(dst32 + ((src32 - dst32) srcScale >> 5));
				141	} while (--count > 0);
				142	}
				143
				144	#ifdef SK_DEBUG
				145	static U16CPU SkRGB16Add(U16CPU a, U16CPU b) {
				146	SkASSERT(SkGetPackedR16(a) + SkGetPackedR16(b) <= SK_R16_MASK);
				147	SkASSERT(SkGetPackedG16(a) + SkGetPackedG16(b) <= SK_G16_MASK);
				148	SkASSERT(SkGetPackedB16(a) + SkGetPackedB16(b) <= SK_B16_MASK);
				149
				150	return a + b;
				151	}
				152	#else
				153	#define SkRGB16Add(a, b) ((a) + (b))
				154	#endif
				155
				156	/////////////////////////////////////////////////////////////////////////////////////////////
				157
				158	#define SK_A32_BITS 8
				159	#define SK_R32_BITS 8
				160	#define SK_G32_BITS 8
				161	#define SK_B32_BITS 8
				162
				163	/* we check to see if the SHIFT value has already been defined (SkUserConfig.h)
				164	if not, we define it ourself to some default values. We default to OpenGL
				165	order (in memory: r,g,b,a)
				166	*/
				167	#ifndef SK_A32_SHIFT
				168	#ifdef SK_CPU_BENDIAN
				169	#define SK_R32_SHIFT 24
				170	#define SK_G32_SHIFT 16
				171	#define SK_B32_SHIFT 8
				172	#define SK_A32_SHIFT 0
				173	#else
				174	#define SK_R32_SHIFT 0
				175	#define SK_G32_SHIFT 8
				176	#define SK_B32_SHIFT 16
				177	#define SK_A32_SHIFT 24
				178	#endif
				179	#endif
				180
				181	#define SK_A32_MASK ((1 << SK_A32_BITS) - 1)
				182	#define SK_R32_MASK ((1 << SK_R32_BITS) - 1)
				183	#define SK_G32_MASK ((1 << SK_G32_BITS) - 1)
				184	#define SK_B32_MASK ((1 << SK_B32_BITS) - 1)
				185
				186	#define SkGetPackedA32(packed) ((uint32_t)((packed) << (24 - SK_A32_SHIFT)) >> 24)
				187	#define SkGetPackedR32(packed) ((uint32_t)((packed) << (24 - SK_R32_SHIFT)) >> 24)
				188	#define SkGetPackedG32(packed) ((uint32_t)((packed) << (24 - SK_G32_SHIFT)) >> 24)
				189	#define SkGetPackedB32(packed) ((uint32_t)((packed) << (24 - SK_B32_SHIFT)) >> 24)
				190
				191	#define SkA32Assert(a) SkASSERT((unsigned)(a) <= SK_A32_MASK)
				192	#define SkR32Assert(r) SkASSERT((unsigned)(r) <= SK_R32_MASK)
				193	#define SkG32Assert(g) SkASSERT((unsigned)(g) <= SK_G32_MASK)
				194	#define SkB32Assert(b) SkASSERT((unsigned)(b) <= SK_B32_MASK)
				195
				196	#ifdef SK_DEBUG
				197	inline void SkPMColorAssert(SkPMColor c) {
				198	unsigned a = SkGetPackedA32(c);
				199	unsigned r = SkGetPackedR32(c);
				200	unsigned g = SkGetPackedG32(c);
				201	unsigned b = SkGetPackedB32(c);
				202
				203	SkA32Assert(a);
				204	SkASSERT(r <= a);
				205	SkASSERT(g <= a);
				206	SkASSERT(b <= a);
				207	}
				208	#else
				209	#define SkPMColorAssert(c)
				210	#endif
				211
				212	inline SkPMColor SkPackARGB32(U8CPU a, U8CPU r, U8CPU g, U8CPU b) {
				213	SkA32Assert(a);
				214	SkASSERT(r <= a);
				215	SkASSERT(g <= a);
				216	SkASSERT(b <= a);
				217
				218	return (a << SK_A32_SHIFT) \| (r << SK_R32_SHIFT) \|
				219	(g << SK_G32_SHIFT) \| (b << SK_B32_SHIFT);
				220	}
				221
				222	extern const uint32_t gMask_00FF00FF;
				223
				224	inline uint32_t SkAlphaMulQ(uint32_t c, unsigned scale) {
				225	uint32_t mask = gMask_00FF00FF;
				226	// uint32_t mask = 0xFF00FF;
				227
				228	uint32_t rb = ((c & mask) * scale) >> 8;
				229	uint32_t ag = ((c >> 8) & mask) * scale;
				230	return (rb & mask) \| (ag & ~mask);
				231	}
				232
				233	inline SkPMColor SkPMSrcOver(SkPMColor src, SkPMColor dst) {
				234	return src + SkAlphaMulQ(dst, SkAlpha255To256(255 - SkGetPackedA32(src)));
				235	}
				236
				237	inline SkPMColor SkBlendARGB32(SkPMColor src, SkPMColor dst, U8CPU aa) {
				238	SkASSERT((unsigned)aa <= 255);
				239
				240	unsigned src_scale = SkAlpha255To256(aa);
				241	unsigned dst_scale = SkAlpha255To256(255 - SkAlphaMul(SkGetPackedA32(src), src_scale));
				242
				243	return SkAlphaMulQ(src, src_scale) + SkAlphaMulQ(dst, dst_scale);
				244	}
				245
				246	////////////////////////////////////////////////////////////////////////////////////////////
				247	// Convert a 32bit pixel to a 16bit pixel (no dither)
				248
				249	#define SkR32ToR16_MACRO(r) ((unsigned)(r) >> (SK_R32_BITS - SK_R16_BITS))
				250	#define SkG32ToG16_MACRO(g) ((unsigned)(g) >> (SK_G32_BITS - SK_G16_BITS))
				251	#define SkB32ToB16_MACRO(b) ((unsigned)(b) >> (SK_B32_BITS - SK_B16_BITS))
				252
				253	#ifdef SK_DEBUG
				254	inline unsigned SkR32ToR16(unsigned r)
				255	{
				256	SkR32Assert(r);
				257	return SkR32ToR16_MACRO(r);
				258	}
				259	inline unsigned SkG32ToG16(unsigned g)
				260	{
				261	SkG32Assert(g);
				262	return SkG32ToG16_MACRO(g);
				263	}
				264	inline unsigned SkB32ToB16(unsigned b)
				265	{
				266	SkB32Assert(b);
				267	return SkB32ToB16_MACRO(b);
				268	}
				269	#else
				270	#define SkR32ToR16(r) SkR32ToR16_MACRO(r)
				271	#define SkG32ToG16(g) SkG32ToG16_MACRO(g)
				272	#define SkB32ToB16(b) SkB32ToB16_MACRO(b)
				273	#endif
				274
				275	#define SkPacked32ToR16(c) (((unsigned)(c) >> (SK_R32_SHIFT + SK_R32_BITS - SK_R16_BITS)) & SK_R16_MASK)
				276	#define SkPacked32ToG16(c) (((unsigned)(c) >> (SK_G32_SHIFT + SK_G32_BITS - SK_G16_BITS)) & SK_G16_MASK)
				277	#define SkPacked32ToB16(c) (((unsigned)(c) >> (SK_B32_SHIFT + SK_B32_BITS - SK_B16_BITS)) & SK_B16_MASK)
				278
				279	inline U16CPU SkPixel32ToPixel16(SkPMColor c)
				280	{
				281	unsigned r = ((c >> (SK_R32_SHIFT + (8 - SK_R16_BITS))) & SK_R16_MASK) << SK_R16_SHIFT;
				282	unsigned g = ((c >> (SK_G32_SHIFT + (8 - SK_G16_BITS))) & SK_G16_MASK) << SK_G16_SHIFT;
				283	unsigned b = ((c >> (SK_B32_SHIFT + (8 - SK_B16_BITS))) & SK_B16_MASK) << SK_B16_SHIFT;
				284	return r \| g \| b;
				285	}
				286
				287	inline U16CPU SkPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b)
				288	{
				289	return (SkR32ToR16(r) << SK_R16_SHIFT) \|
				290	(SkG32ToG16(g) << SK_G16_SHIFT) \|
				291	(SkB32ToB16(b) << SK_B16_SHIFT);
				292	}
				293
				294	#define SkPixel32ToPixel16_ToU16(src) SkToU16(SkPixel32ToPixel16(src))
				295
				296	/////////////////////////////////////////////////////////////////////////////////////////
				297	// Fast dither from 32->16
				298
				299	#define SkShouldDitherXY(x, y) (((x) ^ (y)) & 1)
				300
				301	inline uint16_t SkDitherPack888ToRGB16(U8CPU r, U8CPU g, U8CPU b)
				302	{
				303	r = ((r << 1) - ((r >> (8 - SK_R16_BITS) << (8 - SK_R16_BITS)) \| (r >> SK_R16_BITS))) >> (8 - SK_R16_BITS);
				304	g = ((g << 1) - ((g >> (8 - SK_G16_BITS) << (8 - SK_G16_BITS)) \| (g >> SK_G16_BITS))) >> (8 - SK_G16_BITS);
				305	b = ((b << 1) - ((b >> (8 - SK_B16_BITS) << (8 - SK_B16_BITS)) \| (b >> SK_B16_BITS))) >> (8 - SK_B16_BITS);
				306
				307	return SkPackRGB16(r, g, b);
				308	}
				309
				310	inline uint16_t SkDitherPixel32ToPixel16(SkPMColor c)
				311	{
				312	return SkDitherPack888ToRGB16(SkGetPackedR32(c), SkGetPackedG32(c), SkGetPackedB32(c));
				313	}
				314
				315	/* Return c in expanded_rgb_16 format, but also scaled up by 32 (5 bits)
				316	It is now suitable for combining with a scaled expanded_rgb_16 color
				317	as in SkSrcOver32To16().
				318	We must do this 565 high-bit replication, in order for the subsequent add
				319	to saturate properly (and not overflow). If we take the 8 bits as is, it is
				320	possible to overflow.
				321	*/
				322	static inline uint32_t SkPMColorToExpanded16x5(SkPMColor c)
				323	{
				324	unsigned sr = SkPacked32ToR16(c);
				325	unsigned sg = SkPacked32ToG16(c);
				326	unsigned sb = SkPacked32ToB16(c);
				327
				328	sr = (sr << 5) \| sr;
				329	sg = (sg << 5) \| (sg >> 1);
				330	sb = (sb << 5) \| sb;
				331	return (sr << 11) \| (sg << 21) \| (sb << 0);
				332	}
				333
				334	/* SrcOver the 32bit src color with the 16bit dst, returning a 16bit value
				335	(with dirt in the high 16bits, so caller beware).
				336	*/
				337	static inline U16CPU SkSrcOver32To16(SkPMColor src, uint16_t dst) {
				338	unsigned sr = SkGetPackedR32(src);
				339	unsigned sg = SkGetPackedG32(src);
				340	unsigned sb = SkGetPackedB32(src);
				341
				342	unsigned dr = SkGetPackedR16(dst);
				343	unsigned dg = SkGetPackedG16(dst);
				344	unsigned db = SkGetPackedB16(dst);
				345
				346	unsigned isa = 255 - SkGetPackedA32(src);
				347
				348	dr = (sr + SkMul16ShiftRound(dr, isa, SK_R16_BITS)) >> (8 - SK_R16_BITS);
				349	dg = (sg + SkMul16ShiftRound(dg, isa, SK_G16_BITS)) >> (8 - SK_G16_BITS);
				350	db = (sb + SkMul16ShiftRound(db, isa, SK_B16_BITS)) >> (8 - SK_B16_BITS);
				351
				352	return SkPackRGB16(dr, dg, db);
				353	}
				354
				355	////////////////////////////////////////////////////////////////////////////////////////////
				356	// Convert a 16bit pixel to a 32bit pixel
				357
				358	inline unsigned SkR16ToR32(unsigned r)
				359	{
				360	return (r << (8 - SK_R16_BITS)) \| (r >> (2 * SK_R16_BITS - 8));
				361	}
				362	inline unsigned SkG16ToG32(unsigned g)
				363	{
				364	return (g << (8 - SK_G16_BITS)) \| (g >> (2 * SK_G16_BITS - 8));
				365	}
				366	inline unsigned SkB16ToB32(unsigned b)
				367	{
				368	return (b << (8 - SK_B16_BITS)) \| (b >> (2 * SK_B16_BITS - 8));
				369	}
				370
				371	#define SkPacked16ToR32(c) SkR16ToR32(SkGetPackedR16(c))
				372	#define SkPacked16ToG32(c) SkG16ToG32(SkGetPackedG16(c))
				373	#define SkPacked16ToB32(c) SkB16ToB32(SkGetPackedB16(c))
				374
				375	inline SkPMColor SkPixel16ToPixel32(U16CPU src)
				376	{
				377	SkASSERT(src == SkToU16(src));
				378
				379	unsigned r = SkPacked16ToR32(src);
				380	unsigned g = SkPacked16ToG32(src);
				381	unsigned b = SkPacked16ToB32(src);
				382
				383	SkASSERT((r >> (8 - SK_R16_BITS)) == SkGetPackedR16(src));
				384	SkASSERT((g >> (8 - SK_G16_BITS)) == SkGetPackedG16(src));
				385	SkASSERT((b >> (8 - SK_B16_BITS)) == SkGetPackedB16(src));
				386
				387	return SkPackARGB32(0xFF, r, g, b);
				388	}
				389
				390	///////////////////////////////////////////////////////////////////////////////
				391
				392	typedef uint16_t SkPMColor16;
				393
				394	// Put in OpenGL order (r g b a)
				395	#define SK_A4444_SHIFT 0
				396	#define SK_R4444_SHIFT 12
				397	#define SK_G4444_SHIFT 8
				398	#define SK_B4444_SHIFT 4
				399
				400	#define SkA32To4444(a) ((unsigned)(a) >> 4)
				401	#define SkR32To4444(r) ((unsigned)(r) >> 4)
				402	#define SkG32To4444(g) ((unsigned)(g) >> 4)
				403	#define SkB32To4444(b) ((unsigned)(b) >> 4)
				404
				405	static U8CPU SkReplicateNibble(unsigned nib)
				406	{
				407	SkASSERT(nib <= 0xF);
				408	return (nib << 4) \| nib;
				409	}
				410
				411	#define SkA4444ToA32(a) SkReplicateNibble(a)
				412	#define SkR4444ToR32(r) SkReplicateNibble(r)
				413	#define SkG4444ToG32(g) SkReplicateNibble(g)
				414	#define SkB4444ToB32(b) SkReplicateNibble(b)
				415
				416	#define SkGetPackedA4444(c) (((unsigned)(c) >> SK_A4444_SHIFT) & 0xF)
				417	#define SkGetPackedR4444(c) (((unsigned)(c) >> SK_R4444_SHIFT) & 0xF)
				418	#define SkGetPackedG4444(c) (((unsigned)(c) >> SK_G4444_SHIFT) & 0xF)
				419	#define SkGetPackedB4444(c) (((unsigned)(c) >> SK_B4444_SHIFT) & 0xF)
				420
				421	#define SkPacked4444ToA32(c) SkReplicateNibble(SkGetPackedA4444(c))
				422	#define SkPacked4444ToR32(c) SkReplicateNibble(SkGetPackedR4444(c))
				423	#define SkPacked4444ToG32(c) SkReplicateNibble(SkGetPackedG4444(c))
				424	#define SkPacked4444ToB32(c) SkReplicateNibble(SkGetPackedB4444(c))
				425
				426	#ifdef SK_DEBUG
				427	static inline void SkPMColor16Assert(U16CPU c)
				428	{
				429	unsigned a = SkGetPackedA4444(c);
				430	unsigned r = SkGetPackedR4444(c);
				431	unsigned g = SkGetPackedG4444(c);
				432	unsigned b = SkGetPackedB4444(c);
				433
				434	SkASSERT(a <= 0xF);
				435	SkASSERT(r <= a);
				436	SkASSERT(g <= a);
				437	SkASSERT(b <= a);
				438	}
				439	#else
				440	#define SkPMColor16Assert(c)
				441	#endif
				442
				443	static inline unsigned SkAlpha15To16(unsigned a)
				444	{
				445	SkASSERT(a <= 0xF);
				446	return a + (a >> 3);
				447	}
				448
				449	#ifdef SK_DEBUG
				450	static inline int SkAlphaMul4(int value, int scale)
				451	{
				452	SkASSERT((unsigned)scale <= 0x10);
				453	return value * scale >> 4;
				454	}
				455	#else
				456	#define SkAlphaMul4(value, scale) ((value) * (scale) >> 4)
				457	#endif
				458
				459	static inline unsigned SkR4444ToR565(unsigned r)
				460	{
				461	SkASSERT(r <= 0xF);
				462	return (r << (SK_R16_BITS - 4)) \| (r >> (8 - SK_R16_BITS));
				463	}
				464
				465	static inline unsigned SkG4444ToG565(unsigned g)
				466	{
				467	SkASSERT(g <= 0xF);
				468	return (g << (SK_G16_BITS - 4)) \| (g >> (8 - SK_G16_BITS));
				469	}
				470
				471	static inline unsigned SkB4444ToB565(unsigned b)
				472	{
				473	SkASSERT(b <= 0xF);
				474	return (b << (SK_B16_BITS - 4)) \| (b >> (8 - SK_B16_BITS));
				475	}
				476
				477	static inline SkPMColor16 SkPackARGB4444(unsigned a, unsigned r,
				478	unsigned g, unsigned b)
				479	{
				480	SkASSERT(a <= 0xF);
				481	SkASSERT(r <= a);
				482	SkASSERT(g <= a);
				483	SkASSERT(b <= a);
				484
				485	return (SkPMColor16)((a << SK_A4444_SHIFT) \| (r << SK_R4444_SHIFT) \|
				486	(g << SK_G4444_SHIFT) \| (b << SK_B4444_SHIFT));
				487	}
				488
				489	extern const uint16_t gMask_0F0F;
				490
				491	inline U16CPU SkAlphaMulQ4(U16CPU c, unsigned scale)
				492	{
				493	SkASSERT(scale <= 16);
				494
				495	const unsigned mask = 0xF0F; //gMask_0F0F;
				496
				497	#if 0
				498	unsigned rb = ((c & mask) * scale) >> 4;
				499	unsigned ag = ((c >> 4) & mask) * scale;
				500	return (rb & mask) \| (ag & ~mask);
				501	#else
				502	c = (c & mask) \| ((c & (mask << 4)) << 12);
				503	c = c * scale >> 4;
				504	return (c & mask) \| ((c >> 12) & (mask << 4));
				505	#endif
				506	}
				507
				508	/** Expand the SkPMColor16 color into a 32bit value that can be scaled all at
				509	once by a value up to 16. Used in conjunction with SkCompact_4444.
				510	*/
				511	inline uint32_t SkExpand_4444(U16CPU c)
				512	{
				513	SkASSERT(c == (uint16_t)c);
				514
				515	const unsigned mask = 0xF0F; //gMask_0F0F;
				516	return (c & mask) \| ((c & ~mask) << 12);
				517	}
				518
				519	/** Compress an expanded value (from SkExpand_4444) back down to a SkPMColor16.
				520	NOTE: this explicitly does not clean the top 16 bits (which may be garbage).
				521	It does this for speed, since if it is being written directly to 16bits of
				522	memory, the top 16bits will be ignored. Casting the result to uint16_t here
				523	would add 2 more instructions, slow us down. It is up to the caller to
				524	perform the cast if needed.
				525	*/
				526	static inline U16CPU SkCompact_4444(uint32_t c)
				527	{
				528	const unsigned mask = 0xF0F; //gMask_0F0F;
				529	return (c & mask) \| ((c >> 12) & ~mask);
				530	}
				531
				532	static inline uint16_t SkSrcOver4444To16(SkPMColor16 s, uint16_t d)
				533	{
				534	unsigned sa = SkGetPackedA4444(s);
				535	unsigned sr = SkR4444ToR565(SkGetPackedR4444(s));
				536	unsigned sg = SkG4444ToG565(SkGetPackedG4444(s));
				537	unsigned sb = SkB4444ToB565(SkGetPackedB4444(s));
				538
				539	// To avoid overflow, we have to clear the low bit of the synthetic sg
				540	// if the src alpha is <= 7.
				541	// to see why, try blending 0x4444 on top of 565-white and watch green
				542	// overflow (sum == 64)
				543	sg &= ~(~(sa >> 3) & 1);
				544
				545	unsigned scale = SkAlpha15To16(15 - sa);
				546	unsigned dr = SkAlphaMul4(SkGetPackedR16(d), scale);
				547	unsigned dg = SkAlphaMul4(SkGetPackedG16(d), scale);
				548	unsigned db = SkAlphaMul4(SkGetPackedB16(d), scale);
				549
				550	#if 0
				551	if (sg + dg > 63) {
				552	SkDebugf("---- SkSrcOver4444To16 src=%x dst=%x scale=%d, sg=%d dg=%d\n", s, d, scale, sg, dg);
				553	}
				554	#endif
				555	return SkPackRGB16(sr + dr, sg + dg, sb + db);
				556	}
				557
				558	static inline uint16_t SkBlend4444To16(SkPMColor16 src, uint16_t dst, int scale16)
				559	{
				560	SkASSERT((unsigned)scale16 <= 16);
				561
				562	return SkSrcOver4444To16(SkAlphaMulQ4(src, scale16), dst);
				563	}
				564
				565	static inline uint16_t SkBlend4444(SkPMColor16 src, SkPMColor16 dst, int scale16)
				566	{
				567	SkASSERT((unsigned)scale16 <= 16);
				568
				569	uint32_t src32 = SkExpand_4444(src) * scale16;
				570	// the scaled srcAlpha is the bottom byte
				571	#ifdef SK_DEBUG
				572	{
				573	unsigned srcA = SkGetPackedA4444(src) * scale16;
				574	SkASSERT(srcA == (src32 & 0xFF));
				575	}
				576	#endif
				577	unsigned dstScale = SkAlpha255To256(255 - (src32 & 0xFF)) >> 4;
				578	uint32_t dst32 = SkExpand_4444(dst) * dstScale;
				579	return SkCompact_4444((src32 + dst32) >> 4);
				580	}
				581
				582	static inline SkPMColor SkPixel4444ToPixel32(U16CPU c)
				583	{
				584	uint32_t d = (SkGetPackedA4444(c) << SK_A32_SHIFT) \|
				585	(SkGetPackedR4444(c) << SK_R32_SHIFT) \|
				586	(SkGetPackedG4444(c) << SK_G32_SHIFT) \|
				587	(SkGetPackedB4444(c) << SK_B32_SHIFT);
				588	return d \| (d << 4);
				589	}
				590
				591	static inline SkPMColor16 SkPixel32ToPixel4444(SkPMColor c)
				592	{
				593	return (((c >> (SK_A32_SHIFT + 4)) & 0xF) << SK_A4444_SHIFT) \|
				594	(((c >> (SK_R32_SHIFT + 4)) & 0xF) << SK_R4444_SHIFT) \|
				595	(((c >> (SK_G32_SHIFT + 4)) & 0xF) << SK_G4444_SHIFT) \|
				596	(((c >> (SK_B32_SHIFT + 4)) & 0xF) << SK_B4444_SHIFT);
				597	}
				598
				599	// cheap 2x2 dither
				600	static inline SkPMColor16 SkDitherARGB32To4444(U8CPU a, U8CPU r,
				601	U8CPU g, U8CPU b)
				602	{
				603	a = ((a << 1) - ((a >> 4 << 4) \| (a >> 4))) >> 4;
				604	r = ((r << 1) - ((r >> 4 << 4) \| (r >> 4))) >> 4;
				605	g = ((g << 1) - ((g >> 4 << 4) \| (g >> 4))) >> 4;
				606	b = ((b << 1) - ((b >> 4 << 4) \| (b >> 4))) >> 4;
				607
				608	return SkPackARGB4444(a, r, g, b);
				609	}
				610
				611	static inline SkPMColor16 SkDitherPixel32To4444(SkPMColor c)
				612	{
				613	return SkDitherARGB32To4444(SkGetPackedA32(c), SkGetPackedR32(c),
				614	SkGetPackedG32(c), SkGetPackedB32(c));
				615	}
				616
				617	/* Assumes 16bit is in standard RGBA order.
				618	Transforms a normal ARGB_8888 into the same byte order as
				619	expanded ARGB_4444, but keeps each component 8bits
				620	*/
				621	static uint32_t SkExpand_8888(SkPMColor c)
				622	{
				623	return (((c >> SK_R32_SHIFT) & 0xFF) << 24) \|
				624	(((c >> SK_G32_SHIFT) & 0xFF) << 8) \|
				625	(((c >> SK_B32_SHIFT) & 0xFF) << 16) \|
				626	(((c >> SK_A32_SHIFT) & 0xFF) << 0);
				627	}
				628
				629	/* Undo the operation of SkExpand_8888, turning the argument back into
				630	a SkPMColor.
				631	*/
				632	static SkPMColor SkCompact_8888(uint32_t c)
				633	{
				634	return (((c >> 24) & 0xFF) << SK_R32_SHIFT) \|
				635	(((c >> 8) & 0xFF) << SK_G32_SHIFT) \|
				636	(((c >> 16) & 0xFF) << SK_B32_SHIFT) \|
				637	(((c >> 0) & 0xFF) << SK_A32_SHIFT);
				638	}
				639
				640	/* Like SkExpand_8888, this transforms a pmcolor into the expanded 4444 format,
				641	but this routine just keeps the high 4bits of each component in the low
				642	4bits of the result (just like a newly expanded PMColor16).
				643	*/
				644	static uint32_t SkExpand32_4444(SkPMColor c)
				645	{
				646	return (((c >> (SK_R32_SHIFT + 4)) & 0xF) << 24) \|
				647	(((c >> (SK_G32_SHIFT + 4)) & 0xF) << 8) \|
				648	(((c >> (SK_B32_SHIFT + 4)) & 0xF) << 16) \|
				649	(((c >> (SK_A32_SHIFT + 4)) & 0xF) << 0);
				650	}
				651
				652	// takes two values and alternamtes them as part of a memset16
				653	// used for cheap 2x2 dithering when the colors are opaque
				654	void sk_dither_memset16(uint16_t dst[], uint16_t value, uint16_t other, int n);
				655
				656	#endif
				657