Blame - libpixelflinger/scanline.cpp - platform/system/core

blob: 8fba14740b6520654ff481061157e79088bcbc8f [file] [log] [blame]

The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1	/* libs/pixelflinger/scanline.cpp
				2	**
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	3	** Copyright 2006-2011, The Android Open Source Project
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	4	**
				5	** Licensed under the Apache License, Version 2.0 (the "License");
				6	** you may not use this file except in compliance with the License.
				7	** You may obtain a copy of the License at
				8	**
				9	** http://www.apache.org/licenses/LICENSE-2.0
				10	**
				11	** Unless required by applicable law or agreed to in writing, software
				12	** distributed under the License is distributed on an "AS IS" BASIS,
				13	** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	** See the License for the specific language governing permissions and
				15	** limitations under the License.
				16	*/
				17
				18
				19	#define LOG_TAG "pixelflinger"
				20
				21	#include <assert.h>
				22	#include <stdlib.h>
				23	#include <stdio.h>
				24	#include <string.h>
				25
				26	#include <cutils/memory.h>
				27	#include <cutils/log.h>
				28
				29	#include "buffer.h"
				30	#include "scanline.h"
				31
				32	#include "codeflinger/CodeCache.h"
				33	#include "codeflinger/GGLAssembler.h"
				34	#include "codeflinger/ARMAssembler.h"
				35	//#include "codeflinger/ARMAssemblerOptimizer.h"
				36
				37	// ----------------------------------------------------------------------------
				38
				39	#define ANDROID_CODEGEN_GENERIC 0 // force generic pixel pipeline
				40	#define ANDROID_CODEGEN_C 1 // hand-written C, fallback generic
				41	#define ANDROID_CODEGEN_ASM 2 // hand-written asm, fallback generic
				42	#define ANDROID_CODEGEN_GENERATED 3 // hand-written asm, fallback codegen
				43
				44	#ifdef NDEBUG
				45	# define ANDROID_RELEASE
				46	# define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED
				47	#else
				48	# define ANDROID_DEBUG
				49	# define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED
				50	#endif
				51
				52	#if defined(__arm__)
				53	# define ANDROID_ARM_CODEGEN 1
				54	#else
				55	# define ANDROID_ARM_CODEGEN 0
				56	#endif
				57
				58	#define DEBUG__CODEGEN_ONLY 0
				59
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	60	/* Set to 1 to dump to the log the states that need a new
				61	* code-generated scanline callback, i.e. those that don't
				62	* have a corresponding shortcut function.
				63	*/
				64	#define DEBUG_NEEDS 0
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	65
				66	#define ASSEMBLY_SCRATCH_SIZE 2048
				67
				68	// ----------------------------------------------------------------------------
				69	namespace android {
				70	// ----------------------------------------------------------------------------
				71
				72	static void init_y(context_t*, int32_t);
				73	static void init_y_noop(context_t*, int32_t);
				74	static void init_y_packed(context_t*, int32_t);
				75	static void init_y_error(context_t*, int32_t);
				76
				77	static void step_y__generic(context_t* c);
				78	static void step_y__nop(context_t*);
				79	static void step_y__smooth(context_t* c);
				80	static void step_y__tmu(context_t* c);
				81	static void step_y__w(context_t* c);
				82
				83	static void scanline(context_t* c);
				84	static void scanline_perspective(context_t* c);
				85	static void scanline_perspective_single(context_t* c);
				86	static void scanline_t32cb16blend(context_t* c);
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	87	static void scanline_t32cb16blend_dither(context_t* c);
				88	static void scanline_t32cb16blend_srca(context_t* c);
				89	static void scanline_t32cb16blend_clamp(context_t* c);
				90	static void scanline_t32cb16blend_clamp_dither(context_t* c);
				91	static void scanline_t32cb16blend_clamp_mod(context_t* c);
				92	static void scanline_x32cb16blend_clamp_mod(context_t* c);
				93	static void scanline_t32cb16blend_clamp_mod_dither(context_t* c);
				94	static void scanline_x32cb16blend_clamp_mod_dither(context_t* c);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	95	static void scanline_t32cb16(context_t* c);
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	96	static void scanline_t32cb16_dither(context_t* c);
				97	static void scanline_t32cb16_clamp(context_t* c);
				98	static void scanline_t32cb16_clamp_dither(context_t* c);
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame]	99	static void scanline_col32cb16blend(context_t* c);
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	100	static void scanline_t16cb16_clamp(context_t* c);
				101	static void scanline_t16cb16blend_clamp_mod(context_t* c);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	102	static void scanline_memcpy(context_t* c);
				103	static void scanline_memset8(context_t* c);
				104	static void scanline_memset16(context_t* c);
				105	static void scanline_memset32(context_t* c);
				106	static void scanline_noop(context_t* c);
				107	static void scanline_set(context_t* c);
				108	static void scanline_clear(context_t* c);
				109
				110	static void rect_generic(context_t* c, size_t yc);
				111	static void rect_memcpy(context_t* c, size_t yc);
				112
				113	extern "C" void scanline_t32cb16blend_arm(uint16_t, uint32_t, size_t);
				114	extern "C" void scanline_t32cb16_arm(uint16_t dst, uint32_t src, size_t ct);
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame]	115	extern "C" void scanline_col32cb16blend_neon(uint16_t dst, uint32_t col, size_t ct);
				116	extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	117
				118	// ----------------------------------------------------------------------------
				119
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	120	static inline uint16_t convertAbgr8888ToRgb565(uint32_t pix)
				121	{
				122	return uint16_t( ((pix << 8) & 0xf800) \|
				123	((pix >> 5) & 0x07e0) \|
				124	((pix >> 19) & 0x001f) );
				125	}
				126
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	127	struct shortcut_t {
				128	needs_filter_t filter;
				129	const char* desc;
				130	void (scanline)(context_t);
				131	void (init_y)(context_t, int32_t);
				132	};
				133
				134	// Keep in sync with needs
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	135
				136	/* To understand the values here, have a look at:
				137	* system/core/include/private/pixelflinger/ggl_context.h
				138	*
				139	* Especially the lines defining and using GGL_RESERVE_NEEDS
				140	*
				141	* Quick reminders:
				142	* - the last nibble of the first value is the destination buffer format.
				143	* - the last nibble of the third value is the source texture format
				144	* - formats: 4=rgb565 1=abgr8888 2=xbgr8888
				145	*
				146	* In the descriptions below:
				147	*
				148	* SRC means we copy the source pixels to the destination
				149	*
				150	* SRC_OVER means we blend the source pixels to the destination
				151	* with dstFactor = 1-srcA, srcFactor=1 (premultiplied source).
				152	* This mode is otherwise called 'blend'.
				153	*
				154	* SRCA_OVER means we blend the source pixels to the destination
				155	* with dstFactor=srcA*(1-srcA) srcFactor=srcA (non-premul source).
				156	* This mode is otherwise called 'blend_srca'
				157	*
				158	* clamp means we fetch source pixels from a texture with u/v clamping
				159	*
				160	* mod means the source pixels are modulated (multiplied) by the
				161	* a/r/g/b of the current context's color. Typically used for
				162	* fade-in / fade-out.
				163	*
				164	* dither means we dither 32 bit values to 16 bits
				165	*/
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	166	static shortcut_t shortcuts[] = {
				167	{ { { 0x03515104, 0x00000077, { 0x00000A01, 0x00000000 } },
				168	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	169	"565 fb, 8888 tx, blend SRC_OVER", scanline_t32cb16blend, init_y_noop },
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	170	{ { { 0x03010104, 0x00000077, { 0x00000A01, 0x00000000 } },
				171	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	172	"565 fb, 8888 tx, SRC", scanline_t32cb16, init_y_noop },
				173	/* same as first entry, but with dithering */
				174	{ { { 0x03515104, 0x00000177, { 0x00000A01, 0x00000000 } },
				175	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				176	"565 fb, 8888 tx, blend SRC_OVER dither", scanline_t32cb16blend_dither, init_y_noop },
				177	/* same as second entry, but with dithering */
				178	{ { { 0x03010104, 0x00000177, { 0x00000A01, 0x00000000 } },
				179	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				180	"565 fb, 8888 tx, SRC dither", scanline_t32cb16_dither, init_y_noop },
				181	/* this is used during the boot animation - CHEAT: ignore dithering */
				182	{ { { 0x03545404, 0x00000077, { 0x00000A01, 0x00000000 } },
				183	{ 0xFFFFFFFF, 0xFFFFFEFF, { 0xFFFFFFFF, 0x0000003F } } },
				184	"565 fb, 8888 tx, blend dst:ONE_MINUS_SRCA src:SRCA", scanline_t32cb16blend_srca, init_y_noop },
				185	/* special case for arbitrary texture coordinates (think scaling) */
				186	{ { { 0x03515104, 0x00000077, { 0x00000001, 0x00000000 } },
				187	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				188	"565 fb, 8888 tx, SRC_OVER clamp", scanline_t32cb16blend_clamp, init_y },
				189	{ { { 0x03515104, 0x00000177, { 0x00000001, 0x00000000 } },
				190	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				191	"565 fb, 8888 tx, SRC_OVER clamp dither", scanline_t32cb16blend_clamp_dither, init_y },
				192	/* another case used during emulation */
				193	{ { { 0x03515104, 0x00000077, { 0x00001001, 0x00000000 } },
				194	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				195	"565 fb, 8888 tx, SRC_OVER clamp modulate", scanline_t32cb16blend_clamp_mod, init_y },
				196	/* and this */
				197	{ { { 0x03515104, 0x00000077, { 0x00001002, 0x00000000 } },
				198	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				199	"565 fb, x888 tx, SRC_OVER clamp modulate", scanline_x32cb16blend_clamp_mod, init_y },
				200	{ { { 0x03515104, 0x00000177, { 0x00001001, 0x00000000 } },
				201	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				202	"565 fb, 8888 tx, SRC_OVER clamp modulate dither", scanline_t32cb16blend_clamp_mod_dither, init_y },
				203	{ { { 0x03515104, 0x00000177, { 0x00001002, 0x00000000 } },
				204	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				205	"565 fb, x888 tx, SRC_OVER clamp modulate dither", scanline_x32cb16blend_clamp_mod_dither, init_y },
				206	{ { { 0x03010104, 0x00000077, { 0x00000001, 0x00000000 } },
				207	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				208	"565 fb, 8888 tx, SRC clamp", scanline_t32cb16_clamp, init_y },
				209	{ { { 0x03010104, 0x00000077, { 0x00000002, 0x00000000 } },
				210	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				211	"565 fb, x888 tx, SRC clamp", scanline_t32cb16_clamp, init_y },
				212	{ { { 0x03010104, 0x00000177, { 0x00000001, 0x00000000 } },
				213	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				214	"565 fb, 8888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y },
				215	{ { { 0x03010104, 0x00000177, { 0x00000002, 0x00000000 } },
				216	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				217	"565 fb, x888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y },
				218	{ { { 0x03010104, 0x00000077, { 0x00000004, 0x00000000 } },
				219	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				220	"565 fb, 565 tx, SRC clamp", scanline_t16cb16_clamp, init_y },
				221	{ { { 0x03515104, 0x00000077, { 0x00001004, 0x00000000 } },
				222	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
				223	"565 fb, 565 tx, SRC_OVER clamp", scanline_t16cb16blend_clamp_mod, init_y },
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame]	224	{ { { 0x03515104, 0x00000077, { 0x00000000, 0x00000000 } },
				225	{ 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0xFFFFFFFF } } },
				226	"565 fb, 8888 fixed color", scanline_col32cb16blend, init_y_packed },
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	227	{ { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
				228	{ 0x00000000, 0x00000007, { 0x00000000, 0x00000000 } } },
				229	"(nop) alpha test", scanline_noop, init_y_noop },
				230	{ { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
				231	{ 0x00000000, 0x00000070, { 0x00000000, 0x00000000 } } },
				232	"(nop) depth test", scanline_noop, init_y_noop },
				233	{ { { 0x05000000, 0x00000000, { 0x00000000, 0x00000000 } },
				234	{ 0x0F000000, 0x00000080, { 0x00000000, 0x00000000 } } },
				235	"(nop) logic_op", scanline_noop, init_y_noop },
				236	{ { { 0xF0000000, 0x00000000, { 0x00000000, 0x00000000 } },
				237	{ 0xF0000000, 0x00000080, { 0x00000000, 0x00000000 } } },
				238	"(nop) color mask", scanline_noop, init_y_noop },
				239	{ { { 0x0F000000, 0x00000077, { 0x00000000, 0x00000000 } },
				240	{ 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } },
				241	"(set) logic_op", scanline_set, init_y_noop },
				242	{ { { 0x00000000, 0x00000077, { 0x00000000, 0x00000000 } },
				243	{ 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } },
				244	"(clear) logic_op", scanline_clear, init_y_noop },
				245	{ { { 0x03000000, 0x00000077, { 0x00000000, 0x00000000 } },
				246	{ 0xFFFFFF00, 0x000000F7, { 0x00000000, 0x00000000 } } },
				247	"(clear) blending 0/0", scanline_clear, init_y_noop },
				248	{ { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
				249	{ 0x0000003F, 0x00000000, { 0x00000000, 0x00000000 } } },
				250	"(error) invalid color-buffer format", scanline_noop, init_y_error },
				251	};
				252	static const needs_filter_t noblend1to1 = {
				253	// (disregard dithering, see below)
				254	{ 0x03010100, 0x00000077, { 0x00000A00, 0x00000000 } },
				255	{ 0xFFFFFFC0, 0xFFFFFEFF, { 0xFFFFFFC0, 0x0000003F } }
				256	};
				257	static const needs_filter_t fill16noblend = {
				258	{ 0x03010100, 0x00000077, { 0x00000000, 0x00000000 } },
				259	{ 0xFFFFFFC0, 0xFFFFFFFF, { 0x0000003F, 0x0000003F } }
				260	};
				261
				262	// ----------------------------------------------------------------------------
				263
				264	#if ANDROID_ARM_CODEGEN
				265	static CodeCache gCodeCache(12 * 1024);
				266
				267	class ScanlineAssembly : public Assembly {
				268	AssemblyKey<needs_t> mKey;
				269	public:
				270	ScanlineAssembly(needs_t needs, size_t size)
				271	: Assembly(size), mKey(needs) { }
				272	const AssemblyKey<needs_t>& key() const { return mKey; }
				273	};
				274	#endif
				275
				276	// ----------------------------------------------------------------------------
				277
				278	void ggl_init_scanline(context_t* c)
				279	{
				280	c->init_y = init_y;
				281	c->step_y = step_y__generic;
				282	c->scanline = scanline;
				283	}
				284
				285	void ggl_uninit_scanline(context_t* c)
				286	{
				287	if (c->state.buffers.coverage)
				288	free(c->state.buffers.coverage);
				289	#if ANDROID_ARM_CODEGEN
				290	if (c->scanline_as)
				291	c->scanline_as->decStrong(c);
				292	#endif
				293	}
				294
				295	// ----------------------------------------------------------------------------
				296
				297	static void pick_scanline(context_t* c)
				298	{
				299	#if (!defined(DEBUG__CODEGEN_ONLY) \|\| (DEBUG__CODEGEN_ONLY == 0))
				300
				301	#if ANDROID_CODEGEN == ANDROID_CODEGEN_GENERIC
				302	c->init_y = init_y;
				303	c->step_y = step_y__generic;
				304	c->scanline = scanline;
				305	return;
				306	#endif
				307
				308	//printf("*** needs [%08lx:%08lx:%08lx:%08lx]\n",
				309	// c->state.needs.n, c->state.needs.p,
				310	// c->state.needs.t[0], c->state.needs.t[1]);
				311
				312	// first handle the special case that we cannot test with a filter
				313	const uint32_t cb_format = GGL_READ_NEEDS(CB_FORMAT, c->state.needs.n);
				314	if (GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0]) == cb_format) {
				315	if (c->state.needs.match(noblend1to1)) {
				316	// this will match regardless of dithering state, since both
				317	// src and dest have the same format anyway, there is no dithering
				318	// to be done.
				319	const GGLFormat* f =
				320	&(c->formats[GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0])]);
				321	if ((f->components == GGL_RGB) \|\|
				322	(f->components == GGL_RGBA) \|\|
				323	(f->components == GGL_LUMINANCE) \|\|
				324	(f->components == GGL_LUMINANCE_ALPHA))
				325	{
				326	// format must have all of RGB components
				327	// (so the current color doesn't show through)
				328	c->scanline = scanline_memcpy;
				329	c->init_y = init_y_noop;
				330	return;
				331	}
				332	}
				333	}
				334
				335	if (c->state.needs.match(fill16noblend)) {
				336	c->init_y = init_y_packed;
				337	switch (c->formats[cb_format].size) {
				338	case 1: c->scanline = scanline_memset8; return;
				339	case 2: c->scanline = scanline_memset16; return;
				340	case 4: c->scanline = scanline_memset32; return;
				341	}
				342	}
				343
				344	const int numFilters = sizeof(shortcuts)/sizeof(shortcut_t);
				345	for (int i=0 ; i<numFilters ; i++) {
				346	if (c->state.needs.match(shortcuts[i].filter)) {
				347	c->scanline = shortcuts[i].scanline;
				348	c->init_y = shortcuts[i].init_y;
				349	return;
				350	}
				351	}
				352
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	353	#ifdef DEBUG_NEEDS
				354	LOGI("Needs: n=0x%08x p=0x%08x t0=0x%08x t1=0x%08x",
				355	c->state.needs.n, c->state.needs.p,
				356	c->state.needs.t[0], c->state.needs.t[1]);
				357	#endif
				358
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	359	#endif // DEBUG__CODEGEN_ONLY
				360
				361	c->init_y = init_y;
				362	c->step_y = step_y__generic;
				363
				364	#if ANDROID_ARM_CODEGEN
				365	// we're going to have to generate some code...
				366	// here, generate code for our pixel pipeline
				367	const AssemblyKey<needs_t> key(c->state.needs);
				368	sp<Assembly> assembly = gCodeCache.lookup(key);
				369	if (assembly == 0) {
				370	// create a new assembly region
				371	sp<ScanlineAssembly> a = new ScanlineAssembly(c->state.needs,
				372	ASSEMBLY_SCRATCH_SIZE);
				373	// initialize our assembler
				374	GGLAssembler assembler( new ARMAssembler(a) );
				375	//GGLAssembler assembler(
				376	// new ARMAssemblerOptimizer(new ARMAssembler(a)) );
				377	// generate the scanline code for the given needs
				378	int err = assembler.scanline(c->state.needs, c);
				379	if (ggl_likely(!err)) {
				380	// finally, cache this assembly
				381	err = gCodeCache.cache(a->key(), a);
				382	}
				383	if (ggl_unlikely(err)) {
				384	LOGE("error generating or caching assembly. Reverting to NOP.");
				385	c->scanline = scanline_noop;
				386	c->init_y = init_y_noop;
				387	c->step_y = step_y__nop;
				388	return;
				389	}
				390	assembly = a;
				391	}
				392
				393	// release the previous assembly
				394	if (c->scanline_as) {
				395	c->scanline_as->decStrong(c);
				396	}
				397
				398	//LOGI("using generated pixel-pipeline");
				399	c->scanline_as = assembly.get();
				400	c->scanline_as->incStrong(c); // hold on to assembly
				401	c->scanline = (void()(context_t c))assembly->base();
				402	#else
				403	// LOGW("using generic (slow) pixel-pipeline");
				404	c->scanline = scanline;
				405	#endif
				406	}
				407
				408	void ggl_pick_scanline(context_t* c)
				409	{
				410	pick_scanline(c);
				411	if ((c->state.enables & GGL_ENABLE_W) &&
				412	(c->state.enables & GGL_ENABLE_TMUS))
				413	{
				414	c->span = c->scanline;
				415	c->scanline = scanline_perspective;
				416	if (!(c->state.enabled_tmu & (c->state.enabled_tmu - 1))) {
				417	// only one TMU enabled
				418	c->scanline = scanline_perspective_single;
				419	}
				420	}
				421	}
				422
				423	// ----------------------------------------------------------------------------
				424
				425	static void blending(context_t* c, pixel_t* fragment, pixel_t* fb);
				426	static void blend_factor(context_t* c, pixel_t* r, uint32_t factor,
				427	const pixel_t* src, const pixel_t* dst);
				428	static void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv);
				429
				430	#if ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED)
				431
				432	// no need to compile the generic-pipeline, it can't be reached
				433	void scanline(context_t*)
				434	{
				435	}
				436
				437	#else
				438
				439	void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv)
				440	{
				441	if (su && sv) {
				442	if (su > sv) {
				443	v = ggl_expand(v, sv, su);
				444	sv = su;
				445	} else if (su < sv) {
				446	u = ggl_expand(u, su, sv);
				447	su = sv;
				448	}
				449	}
				450	}
				451
				452	void blending(context_t* c, pixel_t* fragment, pixel_t* fb)
				453	{
				454	rescale(fragment->c[0], fragment->s[0], fb->c[0], fb->s[0]);
				455	rescale(fragment->c[1], fragment->s[1], fb->c[1], fb->s[1]);
				456	rescale(fragment->c[2], fragment->s[2], fb->c[2], fb->s[2]);
				457	rescale(fragment->c[3], fragment->s[3], fb->c[3], fb->s[3]);
				458
				459	pixel_t sf, df;
				460	blend_factor(c, &sf, c->state.blend.src, fragment, fb);
				461	blend_factor(c, &df, c->state.blend.dst, fragment, fb);
				462
				463	fragment->c[1] =
				464	gglMulAddx(fragment->c[1], sf.c[1], gglMulx(fb->c[1], df.c[1]));
				465	fragment->c[2] =
				466	gglMulAddx(fragment->c[2], sf.c[2], gglMulx(fb->c[2], df.c[2]));
				467	fragment->c[3] =
				468	gglMulAddx(fragment->c[3], sf.c[3], gglMulx(fb->c[3], df.c[3]));
				469
				470	if (c->state.blend.alpha_separate) {
				471	blend_factor(c, &sf, c->state.blend.src_alpha, fragment, fb);
				472	blend_factor(c, &df, c->state.blend.dst_alpha, fragment, fb);
				473	}
				474
				475	fragment->c[0] =
				476	gglMulAddx(fragment->c[0], sf.c[0], gglMulx(fb->c[0], df.c[0]));
				477
				478	// clamp to 1.0
				479	if (fragment->c[0] >= (1LU<<fragment->s[0]))
				480	fragment->c[0] = (1<<fragment->s[0])-1;
				481	if (fragment->c[1] >= (1LU<<fragment->s[1]))
				482	fragment->c[1] = (1<<fragment->s[1])-1;
				483	if (fragment->c[2] >= (1LU<<fragment->s[2]))
				484	fragment->c[2] = (1<<fragment->s[2])-1;
				485	if (fragment->c[3] >= (1LU<<fragment->s[3]))
				486	fragment->c[3] = (1<<fragment->s[3])-1;
				487	}
				488
				489	static inline int blendfactor(uint32_t x, uint32_t size, uint32_t def = 0)
				490	{
				491	if (!size)
				492	return def;
				493
				494	// scale to 16 bits
				495	if (size > 16) {
				496	x >>= (size - 16);
				497	} else if (size < 16) {
				498	x = ggl_expand(x, size, 16);
				499	}
				500	x += x >> 15;
				501	return x;
				502	}
				503
				504	void blend_factor(context_t* c, pixel_t* r,
				505	uint32_t factor, const pixel_t* src, const pixel_t* dst)
				506	{
				507	switch (factor) {
				508	case GGL_ZERO:
				509	r->c[1] =
				510	r->c[2] =
				511	r->c[3] =
				512	r->c[0] = 0;
				513	break;
				514	case GGL_ONE:
				515	r->c[1] =
				516	r->c[2] =
				517	r->c[3] =
				518	r->c[0] = FIXED_ONE;
				519	break;
				520	case GGL_DST_COLOR:
				521	r->c[1] = blendfactor(dst->c[1], dst->s[1]);
				522	r->c[2] = blendfactor(dst->c[2], dst->s[2]);
				523	r->c[3] = blendfactor(dst->c[3], dst->s[3]);
				524	r->c[0] = blendfactor(dst->c[0], dst->s[0]);
				525	break;
				526	case GGL_SRC_COLOR:
				527	r->c[1] = blendfactor(src->c[1], src->s[1]);
				528	r->c[2] = blendfactor(src->c[2], src->s[2]);
				529	r->c[3] = blendfactor(src->c[3], src->s[3]);
				530	r->c[0] = blendfactor(src->c[0], src->s[0]);
				531	break;
				532	case GGL_ONE_MINUS_DST_COLOR:
				533	r->c[1] = FIXED_ONE - blendfactor(dst->c[1], dst->s[1]);
				534	r->c[2] = FIXED_ONE - blendfactor(dst->c[2], dst->s[2]);
				535	r->c[3] = FIXED_ONE - blendfactor(dst->c[3], dst->s[3]);
				536	r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0]);
				537	break;
				538	case GGL_ONE_MINUS_SRC_COLOR:
				539	r->c[1] = FIXED_ONE - blendfactor(src->c[1], src->s[1]);
				540	r->c[2] = FIXED_ONE - blendfactor(src->c[2], src->s[2]);
				541	r->c[3] = FIXED_ONE - blendfactor(src->c[3], src->s[3]);
				542	r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0]);
				543	break;
				544	case GGL_SRC_ALPHA:
				545	r->c[1] =
				546	r->c[2] =
				547	r->c[3] =
				548	r->c[0] = blendfactor(src->c[0], src->s[0], FIXED_ONE);
				549	break;
				550	case GGL_ONE_MINUS_SRC_ALPHA:
				551	r->c[1] =
				552	r->c[2] =
				553	r->c[3] =
				554	r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0], FIXED_ONE);
				555	break;
				556	case GGL_DST_ALPHA:
				557	r->c[1] =
				558	r->c[2] =
				559	r->c[3] =
				560	r->c[0] = blendfactor(dst->c[0], dst->s[0], FIXED_ONE);
				561	break;
				562	case GGL_ONE_MINUS_DST_ALPHA:
				563	r->c[1] =
				564	r->c[2] =
				565	r->c[3] =
				566	r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0], FIXED_ONE);
				567	break;
				568	case GGL_SRC_ALPHA_SATURATE:
				569	// XXX: GGL_SRC_ALPHA_SATURATE
				570	break;
				571	}
				572	}
				573
				574	static GGLfixed wrapping(int32_t coord, uint32_t size, int tx_wrap)
				575	{
				576	GGLfixed d;
				577	if (tx_wrap == GGL_REPEAT) {
				578	d = (uint32_t(coord)>>16) * size;
				579	} else if (tx_wrap == GGL_CLAMP) { // CLAMP_TO_EDGE semantics
				580	const GGLfixed clamp_min = FIXED_HALF;
				581	const GGLfixed clamp_max = (size << 16) - FIXED_HALF;
				582	if (coord < clamp_min) coord = clamp_min;
				583	if (coord > clamp_max) coord = clamp_max;
				584	d = coord;
				585	} else { // 1:1
				586	const GGLfixed clamp_min = 0;
				587	const GGLfixed clamp_max = (size << 16);
				588	if (coord < clamp_min) coord = clamp_min;
				589	if (coord > clamp_max) coord = clamp_max;
				590	d = coord;
				591	}
				592	return d;
				593	}
				594
				595	static inline
				596	GGLcolor ADJUST_COLOR_ITERATOR(GGLcolor v, GGLcolor dvdx, int len)
				597	{
				598	const int32_t end = dvdx * (len-1) + v;
				599	if (end < 0)
				600	v -= end;
				601	v &= ~(v>>31);
				602	return v;
				603	}
				604
				605	void scanline(context_t* c)
				606	{
				607	const uint32_t enables = c->state.enables;
				608	const int xs = c->iterators.xl;
				609	const int x1 = c->iterators.xr;
				610	int xc = x1 - xs;
				611	const int16_t* covPtr = c->state.buffers.coverage + xs;
				612
				613	// All iterated values are sampled at the pixel center
				614
				615	// reset iterators for that scanline...
				616	GGLcolor r, g, b, a;
				617	iterators_t& ci = c->iterators;
				618	if (enables & GGL_ENABLE_SMOOTH) {
				619	r = (xs * c->shade.drdx) + ci.ydrdy;
				620	g = (xs * c->shade.dgdx) + ci.ydgdy;
				621	b = (xs * c->shade.dbdx) + ci.ydbdy;
				622	a = (xs * c->shade.dadx) + ci.ydady;
				623	r = ADJUST_COLOR_ITERATOR(r, c->shade.drdx, xc);
				624	g = ADJUST_COLOR_ITERATOR(g, c->shade.dgdx, xc);
				625	b = ADJUST_COLOR_ITERATOR(b, c->shade.dbdx, xc);
				626	a = ADJUST_COLOR_ITERATOR(a, c->shade.dadx, xc);
				627	} else {
				628	r = ci.ydrdy;
				629	g = ci.ydgdy;
				630	b = ci.ydbdy;
				631	a = ci.ydady;
				632	}
				633
				634	// z iterators are 1.31
				635	GGLfixed z = (xs * c->shade.dzdx) + ci.ydzdy;
				636	GGLfixed f = (xs * c->shade.dfdx) + ci.ydfdy;
				637
				638	struct {
				639	GGLfixed s, t;
				640	} tc[GGL_TEXTURE_UNIT_COUNT];
				641	if (enables & GGL_ENABLE_TMUS) {
				642	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				643	if (c->state.texture[i].enable) {
				644	texture_iterators_t& ti = c->state.texture[i].iterators;
				645	if (enables & GGL_ENABLE_W) {
				646	tc[i].s = ti.ydsdy;
				647	tc[i].t = ti.ydtdy;
				648	} else {
				649	tc[i].s = (xs * ti.dsdx) + ti.ydsdy;
				650	tc[i].t = (xs * ti.dtdx) + ti.ydtdy;
				651	}
				652	}
				653	}
				654	}
				655
				656	pixel_t fragment;
				657	pixel_t texel;
				658	pixel_t fb;
				659
				660	uint32_t x = xs;
				661	uint32_t y = c->iterators.y;
				662
				663	while (xc--) {
				664
				665	{ // just a scope
				666
				667	// read color (convert to 8 bits by keeping only the integer part)
				668	fragment.s[1] = fragment.s[2] =
				669	fragment.s[3] = fragment.s[0] = 8;
				670	fragment.c[1] = r >> (GGL_COLOR_BITS-8);
				671	fragment.c[2] = g >> (GGL_COLOR_BITS-8);
				672	fragment.c[3] = b >> (GGL_COLOR_BITS-8);
				673	fragment.c[0] = a >> (GGL_COLOR_BITS-8);
				674
				675	// texturing
				676	if (enables & GGL_ENABLE_TMUS) {
				677	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				678	texture_t& tx = c->state.texture[i];
				679	if (!tx.enable)
				680	continue;
				681	texture_iterators_t& ti = tx.iterators;
				682	int32_t u, v;
				683
				684	// s-coordinate
				685	if (tx.s_coord != GGL_ONE_TO_ONE) {
				686	const int w = tx.surface.width;
				687	u = wrapping(tc[i].s, w, tx.s_wrap);
				688	tc[i].s += ti.dsdx;
				689	} else {
				690	u = (((tx.shade.is0>>16) + x)<<16) + FIXED_HALF;
				691	}
				692
				693	// t-coordinate
				694	if (tx.t_coord != GGL_ONE_TO_ONE) {
				695	const int h = tx.surface.height;
				696	v = wrapping(tc[i].t, h, tx.t_wrap);
				697	tc[i].t += ti.dtdx;
				698	} else {
				699	v = (((tx.shade.it0>>16) + y)<<16) + FIXED_HALF;
				700	}
				701
				702	// read texture
				703	if (tx.mag_filter == GGL_NEAREST &&
				704	tx.min_filter == GGL_NEAREST)
				705	{
				706	u >>= 16;
				707	v >>= 16;
				708	tx.surface.read(&tx.surface, c, u, v, &texel);
				709	} else {
				710	const int w = tx.surface.width;
				711	const int h = tx.surface.height;
				712	u -= FIXED_HALF;
				713	v -= FIXED_HALF;
				714	int u0 = u >> 16;
				715	int v0 = v >> 16;
				716	int u1 = u0 + 1;
				717	int v1 = v0 + 1;
				718	if (tx.s_wrap == GGL_REPEAT) {
				719	if (u0<0) u0 += w;
				720	if (u1<0) u1 += w;
				721	if (u0>=w) u0 -= w;
				722	if (u1>=w) u1 -= w;
				723	} else {
				724	if (u0<0) u0 = 0;
				725	if (u1<0) u1 = 0;
				726	if (u0>=w) u0 = w-1;
				727	if (u1>=w) u1 = w-1;
				728	}
				729	if (tx.t_wrap == GGL_REPEAT) {
				730	if (v0<0) v0 += h;
				731	if (v1<0) v1 += h;
				732	if (v0>=h) v0 -= h;
				733	if (v1>=h) v1 -= h;
				734	} else {
				735	if (v0<0) v0 = 0;
				736	if (v1<0) v1 = 0;
				737	if (v0>=h) v0 = h-1;
				738	if (v1>=h) v1 = h-1;
				739	}
				740	pixel_t texels[4];
				741	uint32_t mm[4];
				742	tx.surface.read(&tx.surface, c, u0, v0, &texels[0]);
				743	tx.surface.read(&tx.surface, c, u0, v1, &texels[1]);
				744	tx.surface.read(&tx.surface, c, u1, v0, &texels[2]);
				745	tx.surface.read(&tx.surface, c, u1, v1, &texels[3]);
				746	u = (u >> 12) & 0xF;
				747	v = (v >> 12) & 0xF;
				748	u += u>>3;
				749	v += v>>3;
				750	mm[0] = (0x10 - u) * (0x10 - v);
				751	mm[1] = (0x10 - u) * v;
				752	mm[2] = u * (0x10 - v);
				753	mm[3] = 0x100 - (mm[0] + mm[1] + mm[2]);
				754	for (int j=0 ; j<4 ; j++) {
				755	texel.s[j] = texels[0].s[j];
				756	if (!texel.s[j]) continue;
				757	texel.s[j] += 8;
				758	texel.c[j] = texels[0].c[j]*mm[0] +
				759	texels[1].c[j]*mm[1] +
				760	texels[2].c[j]*mm[2] +
				761	texels[3].c[j]*mm[3] ;
				762	}
				763	}
				764
				765	// Texture environnement...
				766	for (int j=0 ; j<4 ; j++) {
				767	uint32_t& Cf = fragment.c[j];
				768	uint32_t& Ct = texel.c[j];
				769	uint8_t& sf = fragment.s[j];
				770	uint8_t& st = texel.s[j];
				771	uint32_t At = texel.c[0];
				772	uint8_t sat = texel.s[0];
				773	switch (tx.env) {
				774	case GGL_REPLACE:
				775	if (st) {
				776	Cf = Ct;
				777	sf = st;
				778	}
				779	break;
				780	case GGL_MODULATE:
				781	if (st) {
				782	uint32_t factor = Ct + (Ct>>(st-1));
				783	Cf = (Cf * factor) >> st;
				784	}
				785	break;
				786	case GGL_DECAL:
				787	if (sat) {
				788	rescale(Cf, sf, Ct, st);
				789	Cf += ((Ct - Cf) * (At + (At>>(sat-1)))) >> sat;
				790	}
				791	break;
				792	case GGL_BLEND:
				793	if (st) {
				794	uint32_t Cc = tx.env_color[i];
				795	if (sf>8) Cc = (Cc * ((1<<sf)-1))>>8;
				796	else if (sf<8) Cc = (Cc - (Cc>>(8-sf)))>>(8-sf);
				797	uint32_t factor = Ct + (Ct>>(st-1));
				798	Cf = ((((1<<st) - factor) * Cf) + Ct*Cc)>>st;
				799	}
				800	break;
				801	case GGL_ADD:
				802	if (st) {
				803	rescale(Cf, sf, Ct, st);
				804	Cf += Ct;
				805	}
				806	break;
				807	}
				808	}
				809	}
				810	}
				811
				812	// coverage application
				813	if (enables & GGL_ENABLE_AA) {
				814	int16_t cf = *covPtr++;
				815	fragment.c[0] = (int64_t(fragment.c[0]) * cf) >> 15;
				816	}
				817
				818	// alpha-test
				819	if (enables & GGL_ENABLE_ALPHA_TEST) {
				820	GGLcolor ref = c->state.alpha_test.ref;
				821	GGLcolor alpha = (uint64_t(fragment.c[0]) *
				822	((1<<GGL_COLOR_BITS)-1)) / ((1<<fragment.s[0])-1);
				823	switch (c->state.alpha_test.func) {
				824	case GGL_NEVER: goto discard;
				825	case GGL_LESS: if (alpha<ref) break; goto discard;
				826	case GGL_EQUAL: if (alpha==ref) break; goto discard;
				827	case GGL_LEQUAL: if (alpha<=ref) break; goto discard;
				828	case GGL_GREATER: if (alpha>ref) break; goto discard;
				829	case GGL_NOTEQUAL: if (alpha!=ref) break; goto discard;
				830	case GGL_GEQUAL: if (alpha>=ref) break; goto discard;
				831	}
				832	}
				833
				834	// depth test
				835	if (c->state.buffers.depth.format) {
				836	if (enables & GGL_ENABLE_DEPTH_TEST) {
				837	surface_t* cb = &(c->state.buffers.depth);
				838	uint16_t* p = (uint16_t)(cb->data)+(x+(cb->stridey));
				839	uint16_t zz = uint32_t(z)>>(16);
				840	uint16_t depth = *p;
				841	switch (c->state.depth_test.func) {
				842	case GGL_NEVER: goto discard;
				843	case GGL_LESS: if (zz<depth) break; goto discard;
				844	case GGL_EQUAL: if (zz==depth) break; goto discard;
				845	case GGL_LEQUAL: if (zz<=depth) break; goto discard;
				846	case GGL_GREATER: if (zz>depth) break; goto discard;
				847	case GGL_NOTEQUAL: if (zz!=depth) break; goto discard;
				848	case GGL_GEQUAL: if (zz>=depth) break; goto discard;
				849	}
				850	// depth buffer is not enabled, if depth-test is not enabled
				851	/*
				852	fragment.s[1] = fragment.s[2] =
				853	fragment.s[3] = fragment.s[0] = 8;
				854	fragment.c[1] =
				855	fragment.c[2] =
				856	fragment.c[3] =
				857	fragment.c[0] = 255 - (zz>>8);
				858	*/
				859	if (c->state.mask.depth) {
				860	*p = zz;
				861	}
				862	}
				863	}
				864
				865	// fog
				866	if (enables & GGL_ENABLE_FOG) {
				867	for (int i=1 ; i<=3 ; i++) {
				868	GGLfixed fc = (c->state.fog.color[i] * 0x10000) / 0xFF;
				869	uint32_t& c = fragment.c[i];
				870	uint8_t& s = fragment.s[i];
				871	c = (c * 0x10000) / ((1<<s)-1);
				872	c = gglMulAddx(c, f, gglMulx(fc, 0x10000 - f));
				873	s = 16;
				874	}
				875	}
				876
				877	// blending
				878	if (enables & GGL_ENABLE_BLENDING) {
				879	fb.c[1] = fb.c[2] = fb.c[3] = fb.c[0] = 0; // placate valgrind
				880	fb.s[1] = fb.s[2] = fb.s[3] = fb.s[0] = 0;
				881	c->state.buffers.color.read(
				882	&(c->state.buffers.color), c, x, y, &fb);
				883	blending( c, &fragment, &fb );
				884	}
				885
				886	// write
				887	c->state.buffers.color.write(
				888	&(c->state.buffers.color), c, x, y, &fragment);
				889	}
				890
				891	discard:
				892	// iterate...
				893	x += 1;
				894	if (enables & GGL_ENABLE_SMOOTH) {
				895	r += c->shade.drdx;
				896	g += c->shade.dgdx;
				897	b += c->shade.dbdx;
				898	a += c->shade.dadx;
				899	}
				900	z += c->shade.dzdx;
				901	f += c->shade.dfdx;
				902	}
				903	}
				904
				905	#endif // ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED)
				906
				907	// ----------------------------------------------------------------------------
				908	#if 0
				909	#pragma mark -
				910	#pragma mark Scanline
				911	#endif
				912
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	913	/* Used to parse a 32-bit source texture linearly. Usage is:
				914	*
				915	* horz_iterator32 hi(context);
				916	* while (...) {
				917	* uint32_t src_pixel = hi.get_pixel32();
				918	* ...
				919	* }
				920	*
				921	* Use only for one-to-one texture mapping.
				922	*/
				923	struct horz_iterator32 {
				924	horz_iterator32(context_t* c) {
				925	const int x = c->iterators.xl;
				926	const int y = c->iterators.y;
				927	texture_t& tx = c->state.texture[0];
				928	const int32_t u = (tx.shade.is0>>16) + x;
				929	const int32_t v = (tx.shade.it0>>16) + y;
				930	m_src = reinterpret_cast<uint32_t>(tx.surface.data)+(u+(tx.surface.stridev));
				931	}
				932	uint32_t get_pixel32() {
				933	return *m_src++;
				934	}
				935	protected:
				936	uint32_t* m_src;
				937	};
				938
				939	/* A variant for 16-bit source textures. */
				940	struct horz_iterator16 {
				941	horz_iterator16(context_t* c) {
				942	const int x = c->iterators.xl;
				943	const int y = c->iterators.y;
				944	texture_t& tx = c->state.texture[0];
				945	const int32_t u = (tx.shade.is0>>16) + x;
				946	const int32_t v = (tx.shade.it0>>16) + y;
				947	m_src = reinterpret_cast<uint16_t>(tx.surface.data)+(u+(tx.surface.stridev));
				948	}
				949	uint16_t get_pixel16() {
				950	return *m_src++;
				951	}
				952	protected:
				953	uint16_t* m_src;
				954	};
				955
				956	/* A clamp iterator is used to iterate inside a texture with GGL_CLAMP.
				957	* After initialization, call get_src16() or get_src32() to get the current
				958	* texture pixel value.
				959	*/
				960	struct clamp_iterator {
				961	clamp_iterator(context_t* c) {
				962	const int xs = c->iterators.xl;
				963	texture_t& tx = c->state.texture[0];
				964	texture_iterators_t& ti = tx.iterators;
				965	m_s = (xs * ti.dsdx) + ti.ydsdy;
				966	m_t = (xs * ti.dtdx) + ti.ydtdy;
				967	m_ds = ti.dsdx;
				968	m_dt = ti.dtdx;
				969	m_width_m1 = tx.surface.width - 1;
				970	m_height_m1 = tx.surface.height - 1;
				971	m_data = tx.surface.data;
				972	m_stride = tx.surface.stride;
				973	}
				974	uint16_t get_pixel16() {
				975	int u, v;
				976	get_uv(u, v);
				977	uint16_t* src = reinterpret_cast<uint16_t>(m_data) + (u + (m_stridev));
				978	return src[0];
				979	}
				980	uint32_t get_pixel32() {
				981	int u, v;
				982	get_uv(u, v);
				983	uint32_t* src = reinterpret_cast<uint32_t>(m_data) + (u + (m_stridev));
				984	return src[0];
				985	}
				986	private:
				987	void get_uv(int& u, int& v) {
				988	int uu = m_s >> 16;
				989	int vv = m_t >> 16;
				990	if (uu < 0)
				991	uu = 0;
				992	if (uu > m_width_m1)
				993	uu = m_width_m1;
				994	if (vv < 0)
				995	vv = 0;
				996	if (vv > m_height_m1)
				997	vv = m_height_m1;
				998	u = uu;
				999	v = vv;
				1000	m_s += m_ds;
				1001	m_t += m_dt;
				1002	}
				1003
				1004	GGLfixed m_s, m_t;
				1005	GGLfixed m_ds, m_dt;
				1006	int m_width_m1, m_height_m1;
				1007	uint8_t* m_data;
				1008	int m_stride;
				1009	};
				1010
				1011	/*
				1012	* The 'horizontal clamp iterator' variant corresponds to the case where
				1013	* the 'v' coordinate doesn't change. This is useful to avoid one mult and
				1014	* extra adds / checks per pixels, if the blending/processing operation after
				1015	* this is very fast.
				1016	*/
				1017	static int is_context_horizontal(const context_t* c) {
				1018	return (c->state.texture[0].iterators.dtdx == 0);
				1019	}
				1020
				1021	struct horz_clamp_iterator {
				1022	uint16_t get_pixel16() {
				1023	int u = m_s >> 16;
				1024	m_s += m_ds;
				1025	if (u < 0)
				1026	u = 0;
				1027	if (u > m_width_m1)
				1028	u = m_width_m1;
				1029	const uint16_t* src = reinterpret_cast<const uint16_t*>(m_data);
				1030	return src[u];
				1031	}
				1032	uint32_t get_pixel32() {
				1033	int u = m_s >> 16;
				1034	m_s += m_ds;
				1035	if (u < 0)
				1036	u = 0;
				1037	if (u > m_width_m1)
				1038	u = m_width_m1;
				1039	const uint32_t* src = reinterpret_cast<const uint32_t*>(m_data);
				1040	return src[u];
				1041	}
				1042	protected:
				1043	void init(const context_t* c, int shift);
				1044	GGLfixed m_s;
				1045	GGLfixed m_ds;
				1046	int m_width_m1;
				1047	const uint8_t* m_data;
				1048	};
				1049
				1050	void horz_clamp_iterator::init(const context_t* c, int shift)
				1051	{
				1052	const int xs = c->iterators.xl;
				1053	const texture_t& tx = c->state.texture[0];
				1054	const texture_iterators_t& ti = tx.iterators;
				1055	m_s = (xs * ti.dsdx) + ti.ydsdy;
				1056	m_ds = ti.dsdx;
				1057	m_width_m1 = tx.surface.width-1;
				1058	m_data = tx.surface.data;
				1059
				1060	GGLfixed t = (xs * ti.dtdx) + ti.ydtdy;
				1061	int v = t >> 16;
				1062	if (v < 0)
				1063	v = 0;
				1064	else if (v >= (int)tx.surface.height)
				1065	v = (int)tx.surface.height-1;
				1066
				1067	m_data += (tx.surface.stride*v) << shift;
				1068	}
				1069
				1070	struct horz_clamp_iterator16 : horz_clamp_iterator {
				1071	horz_clamp_iterator16(const context_t* c) {
				1072	init(c,1);
				1073	};
				1074	};
				1075
				1076	struct horz_clamp_iterator32 : horz_clamp_iterator {
				1077	horz_clamp_iterator32(context_t* c) {
				1078	init(c,2);
				1079	};
				1080	};
				1081
				1082	/* This is used to perform dithering operations.
				1083	*/
				1084	struct ditherer {
				1085	ditherer(const context_t* c) {
				1086	const int x = c->iterators.xl;
				1087	const int y = c->iterators.y;
				1088	m_line = &c->ditherMatrix[ ((y & GGL_DITHER_MASK)<<GGL_DITHER_ORDER_SHIFT) ];
				1089	m_index = x & GGL_DITHER_MASK;
				1090	}
				1091	void step(void) {
				1092	m_index++;
				1093	}
				1094	int get_value(void) {
				1095	int ret = m_line[m_index & GGL_DITHER_MASK];
				1096	m_index++;
				1097	return ret;
				1098	}
				1099	uint16_t abgr8888ToRgb565(uint32_t s) {
				1100	uint32_t r = s & 0xff;
				1101	uint32_t g = (s >> 8) & 0xff;
				1102	uint32_t b = (s >> 16) & 0xff;
				1103	return rgb888ToRgb565(r,g,b);
				1104	}
				1105	/* The following assumes that r/g/b are in the 0..255 range each */
				1106	uint16_t rgb888ToRgb565(uint32_t& r, uint32_t& g, uint32_t &b) {
				1107	int threshold = get_value();
				1108	/* dither in on GGL_DITHER_BITS, and each of r, g, b is on 8 bits */
				1109	r += (threshold >> (GGL_DITHER_BITS-8 +5));
				1110	g += (threshold >> (GGL_DITHER_BITS-8 +6));
				1111	b += (threshold >> (GGL_DITHER_BITS-8 +5));
				1112	if (r > 0xff)
				1113	r = 0xff;
				1114	if (g > 0xff)
				1115	g = 0xff;
				1116	if (b > 0xff)
				1117	b = 0xff;
				1118	return uint16_t(((r & 0xf8) << 8) \| ((g & 0xfc) << 3) \| (b >> 3));
				1119	}
				1120	protected:
				1121	const uint8_t* m_line;
				1122	int m_index;
				1123	};
				1124
				1125	/* This structure is used to blend (SRC_OVER) 32-bit source pixels
				1126	* onto 16-bit destination ones. Usage is simply:
				1127	*
				1128	* blender.blend(<32-bit-src-pixel-value>,<ptr-to-16-bit-dest-pixel>)
				1129	*/
				1130	struct blender_32to16 {
				1131	blender_32to16(context_t* c) { }
				1132	void write(uint32_t s, uint16_t* dst) {
				1133	if (s == 0)
				1134	return;
				1135	s = GGL_RGBA_TO_HOST(s);
				1136	int sA = (s>>24);
				1137	if (sA == 0xff) {
				1138	*dst = convertAbgr8888ToRgb565(s);
				1139	} else {
				1140	int f = 0x100 - (sA + (sA>>7));
				1141	int sR = (s >> ( 3))&0x1F;
				1142	int sG = (s >> ( 8+2))&0x3F;
				1143	int sB = (s >> (16+3))&0x1F;
				1144	uint16_t d = *dst;
				1145	int dR = (d>>11)&0x1f;
				1146	int dG = (d>>5)&0x3f;
				1147	int dB = (d)&0x1f;
				1148	sR += (f*dR)>>8;
				1149	sG += (f*dG)>>8;
				1150	sB += (f*dB)>>8;
				1151	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1152	}
				1153	}
				1154	void write(uint32_t s, uint16_t* dst, ditherer& di) {
				1155	if (s == 0) {
				1156	di.step();
				1157	return;
				1158	}
				1159	s = GGL_RGBA_TO_HOST(s);
				1160	int sA = (s>>24);
				1161	if (sA == 0xff) {
				1162	*dst = di.abgr8888ToRgb565(s);
				1163	} else {
				1164	int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
				1165	int f = 0x100 - (sA + (sA>>7));
				1166	int sR = (s >> ( 3))&0x1F;
				1167	int sG = (s >> ( 8+2))&0x3F;
				1168	int sB = (s >> (16+3))&0x1F;
				1169	uint16_t d = *dst;
				1170	int dR = (d>>11)&0x1f;
				1171	int dG = (d>>5)&0x3f;
				1172	int dB = (d)&0x1f;
				1173	sR = ((sR << 8) + f*dR + threshold)>>8;
				1174	sG = ((sG << 8) + f*dG + threshold)>>8;
				1175	sB = ((sB << 8) + f*dB + threshold)>>8;
				1176	if (sR > 0x1f) sR = 0x1f;
				1177	if (sG > 0x3f) sG = 0x3f;
				1178	if (sB > 0x1f) sB = 0x1f;
				1179	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1180	}
				1181	}
				1182	};
				1183
				1184	/* This blender does the same for the 'blend_srca' operation.
				1185	* where dstFactor=srcA*(1-srcA) srcFactor=srcA
				1186	*/
				1187	struct blender_32to16_srcA {
				1188	blender_32to16_srcA(const context_t* c) { }
				1189	void write(uint32_t s, uint16_t* dst) {
				1190	if (!s) {
				1191	return;
				1192	}
				1193	uint16_t d = *dst;
				1194	s = GGL_RGBA_TO_HOST(s);
				1195	int sR = (s >> ( 3))&0x1F;
				1196	int sG = (s >> ( 8+2))&0x3F;
				1197	int sB = (s >> (16+3))&0x1F;
				1198	int sA = (s>>24);
				1199	int f1 = (sA + (sA>>7));
				1200	int f2 = 0x100-f1;
				1201	int dR = (d>>11)&0x1f;
				1202	int dG = (d>>5)&0x3f;
				1203	int dB = (d)&0x1f;
				1204	sR = (f1sR + f2dR)>>8;
				1205	sG = (f1sG + f2dG)>>8;
				1206	sB = (f1sB + f2dB)>>8;
				1207	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1208	}
				1209	};
				1210
				1211	/* Common init code the modulating blenders */
				1212	struct blender_modulate {
				1213	void init(const context_t* c) {
				1214	const int r = c->iterators.ydrdy >> (GGL_COLOR_BITS-8);
				1215	const int g = c->iterators.ydgdy >> (GGL_COLOR_BITS-8);
				1216	const int b = c->iterators.ydbdy >> (GGL_COLOR_BITS-8);
				1217	const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8);
				1218	m_r = r + (r >> 7);
				1219	m_g = g + (g >> 7);
				1220	m_b = b + (b >> 7);
				1221	m_a = a + (a >> 7);
				1222	}
				1223	protected:
				1224	int m_r, m_g, m_b, m_a;
				1225	};
				1226
				1227	/* This blender does a normal blend after modulation.
				1228	*/
				1229	struct blender_32to16_modulate : blender_modulate {
				1230	blender_32to16_modulate(const context_t* c) {
				1231	init(c);
				1232	}
				1233	void write(uint32_t s, uint16_t* dst) {
				1234	// blend source and destination
				1235	if (!s) {
				1236	return;
				1237	}
				1238	s = GGL_RGBA_TO_HOST(s);
				1239
				1240	/* We need to modulate s */
				1241	uint32_t sA = (s >> 24);
				1242	uint32_t sB = (s >> 16) & 0xff;
				1243	uint32_t sG = (s >> 8) & 0xff;
				1244	uint32_t sR = s & 0xff;
				1245
				1246	sA = (sA*m_a) >> 8;
				1247	/* Keep R/G/B scaled to 5.8 or 6.8 fixed float format */
				1248	sR = (sR*m_r) >> (8 - 5);
				1249	sG = (sG*m_g) >> (8 - 6);
				1250	sB = (sB*m_b) >> (8 - 5);
				1251
				1252	/* Now do a normal blend */
				1253	int f = 0x100 - (sA + (sA>>7));
				1254	uint16_t d = *dst;
				1255	int dR = (d>>11)&0x1f;
				1256	int dG = (d>>5)&0x3f;
				1257	int dB = (d)&0x1f;
				1258	sR = (sR + f*dR)>>8;
				1259	sG = (sG + f*dG)>>8;
				1260	sB = (sB + f*dB)>>8;
				1261	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1262	}
				1263	void write(uint32_t s, uint16_t* dst, ditherer& di) {
				1264	// blend source and destination
				1265	if (!s) {
				1266	di.step();
				1267	return;
				1268	}
				1269	s = GGL_RGBA_TO_HOST(s);
				1270
				1271	/* We need to modulate s */
				1272	uint32_t sA = (s >> 24);
				1273	uint32_t sB = (s >> 16) & 0xff;
				1274	uint32_t sG = (s >> 8) & 0xff;
				1275	uint32_t sR = s & 0xff;
				1276
				1277	sA = (sA*m_a) >> 8;
				1278	/* keep R/G/B scaled to 5.8 or 6.8 fixed float format */
				1279	sR = (sR*m_r) >> (8 - 5);
				1280	sG = (sG*m_g) >> (8 - 6);
				1281	sB = (sB*m_b) >> (8 - 5);
				1282
				1283	/* Scale threshold to 0.8 fixed float format */
				1284	int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
				1285	int f = 0x100 - (sA + (sA>>7));
				1286	uint16_t d = *dst;
				1287	int dR = (d>>11)&0x1f;
				1288	int dG = (d>>5)&0x3f;
				1289	int dB = (d)&0x1f;
				1290	sR = (sR + f*dR + threshold)>>8;
				1291	sG = (sG + f*dG + threshold)>>8;
				1292	sB = (sB + f*dB + threshold)>>8;
				1293	if (sR > 0x1f) sR = 0x1f;
				1294	if (sG > 0x3f) sG = 0x3f;
				1295	if (sB > 0x1f) sB = 0x1f;
				1296	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1297	}
				1298	};
				1299
				1300	/* same as 32to16_modulate, except that the input is xRGB, instead of ARGB */
				1301	struct blender_x32to16_modulate : blender_modulate {
				1302	blender_x32to16_modulate(const context_t* c) {
				1303	init(c);
				1304	}
				1305	void write(uint32_t s, uint16_t* dst) {
				1306	s = GGL_RGBA_TO_HOST(s);
				1307
				1308	uint32_t sB = (s >> 16) & 0xff;
				1309	uint32_t sG = (s >> 8) & 0xff;
				1310	uint32_t sR = s & 0xff;
				1311
				1312	/* Keep R/G/B in 5.8 or 6.8 format */
				1313	sR = (sR*m_r) >> (8 - 5);
				1314	sG = (sG*m_g) >> (8 - 6);
				1315	sB = (sB*m_b) >> (8 - 5);
				1316
				1317	int f = 0x100 - m_a;
				1318	uint16_t d = *dst;
				1319	int dR = (d>>11)&0x1f;
				1320	int dG = (d>>5)&0x3f;
				1321	int dB = (d)&0x1f;
				1322	sR = (sR + f*dR)>>8;
				1323	sG = (sG + f*dG)>>8;
				1324	sB = (sB + f*dB)>>8;
				1325	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1326	}
				1327	void write(uint32_t s, uint16_t* dst, ditherer& di) {
				1328	s = GGL_RGBA_TO_HOST(s);
				1329
				1330	uint32_t sB = (s >> 16) & 0xff;
				1331	uint32_t sG = (s >> 8) & 0xff;
				1332	uint32_t sR = s & 0xff;
				1333
				1334	sR = (sR*m_r) >> (8 - 5);
				1335	sG = (sG*m_g) >> (8 - 6);
				1336	sB = (sB*m_b) >> (8 - 5);
				1337
				1338	/* Now do a normal blend */
				1339	int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
				1340	int f = 0x100 - m_a;
				1341	uint16_t d = *dst;
				1342	int dR = (d>>11)&0x1f;
				1343	int dG = (d>>5)&0x3f;
				1344	int dB = (d)&0x1f;
				1345	sR = (sR + f*dR + threshold)>>8;
				1346	sG = (sG + f*dG + threshold)>>8;
				1347	sB = (sB + f*dB + threshold)>>8;
				1348	if (sR > 0x1f) sR = 0x1f;
				1349	if (sG > 0x3f) sG = 0x3f;
				1350	if (sB > 0x1f) sB = 0x1f;
				1351	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1352	}
				1353	};
				1354
				1355	/* Same as above, but source is 16bit rgb565 */
				1356	struct blender_16to16_modulate : blender_modulate {
				1357	blender_16to16_modulate(const context_t* c) {
				1358	init(c);
				1359	}
				1360	void write(uint16_t s16, uint16_t* dst) {
				1361	uint32_t s = s16;
				1362
				1363	uint32_t sR = s >> 11;
				1364	uint32_t sG = (s >> 5) & 0x3f;
				1365	uint32_t sB = s & 0x1f;
				1366
				1367	sR = (sR*m_r);
				1368	sG = (sG*m_g);
				1369	sB = (sB*m_b);
				1370
				1371	int f = 0x100 - m_a;
				1372	uint16_t d = *dst;
				1373	int dR = (d>>11)&0x1f;
				1374	int dG = (d>>5)&0x3f;
				1375	int dB = (d)&0x1f;
				1376	sR = (sR + f*dR)>>8;
				1377	sG = (sG + f*dG)>>8;
				1378	sB = (sB + f*dB)>>8;
				1379	*dst = uint16_t((sR<<11)\|(sG<<5)\|sB);
				1380	}
				1381	};
				1382
				1383	/* This is used to iterate over a 16-bit destination color buffer.
				1384	* Usage is:
				1385	*
				1386	* dst_iterator16 di(context);
				1387	* while (di.count--) {
				1388	* <do stuff with dest pixel at di.dst>
				1389	* di.dst++;
				1390	* }
				1391	*/
				1392	struct dst_iterator16 {
				1393	dst_iterator16(const context_t* c) {
				1394	const int x = c->iterators.xl;
				1395	const int width = c->iterators.xr - x;
				1396	const int32_t y = c->iterators.y;
				1397	const surface_t* cb = &(c->state.buffers.color);
				1398	count = width;
				1399	dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				1400	}
				1401	int count;
				1402	uint16_t* dst;
				1403	};
				1404
				1405
				1406	static void scanline_t32cb16_clamp(context_t* c)
				1407	{
				1408	dst_iterator16 di(c);
				1409
				1410	if (is_context_horizontal(c)) {
				1411	/* Special case for simple horizontal scaling */
				1412	horz_clamp_iterator32 ci(c);
				1413	while (di.count--) {
				1414	uint32_t s = ci.get_pixel32();
				1415	*di.dst++ = convertAbgr8888ToRgb565(s);
				1416	}
				1417	} else {
				1418	/* General case */
				1419	clamp_iterator ci(c);
				1420	while (di.count--) {
				1421	uint32_t s = ci.get_pixel32();
				1422	*di.dst++ = convertAbgr8888ToRgb565(s);
				1423	}
				1424	}
				1425	}
				1426
				1427	static void scanline_t32cb16_dither(context_t* c)
				1428	{
				1429	horz_iterator32 si(c);
				1430	dst_iterator16 di(c);
				1431	ditherer dither(c);
				1432
				1433	while (di.count--) {
				1434	uint32_t s = si.get_pixel32();
				1435	*di.dst++ = dither.abgr8888ToRgb565(s);
				1436	}
				1437	}
				1438
				1439	static void scanline_t32cb16_clamp_dither(context_t* c)
				1440	{
				1441	dst_iterator16 di(c);
				1442	ditherer dither(c);
				1443
				1444	if (is_context_horizontal(c)) {
				1445	/* Special case for simple horizontal scaling */
				1446	horz_clamp_iterator32 ci(c);
				1447	while (di.count--) {
				1448	uint32_t s = ci.get_pixel32();
				1449	*di.dst++ = dither.abgr8888ToRgb565(s);
				1450	}
				1451	} else {
				1452	/* General case */
				1453	clamp_iterator ci(c);
				1454	while (di.count--) {
				1455	uint32_t s = ci.get_pixel32();
				1456	*di.dst++ = dither.abgr8888ToRgb565(s);
				1457	}
				1458	}
				1459	}
				1460
				1461	static void scanline_t32cb16blend_dither(context_t* c)
				1462	{
				1463	dst_iterator16 di(c);
				1464	ditherer dither(c);
				1465	blender_32to16 bl(c);
				1466	horz_iterator32 hi(c);
				1467	while (di.count--) {
				1468	uint32_t s = hi.get_pixel32();
				1469	bl.write(s, di.dst, dither);
				1470	di.dst++;
				1471	}
				1472	}
				1473
				1474	static void scanline_t32cb16blend_clamp(context_t* c)
				1475	{
				1476	dst_iterator16 di(c);
				1477	blender_32to16 bl(c);
				1478
				1479	if (is_context_horizontal(c)) {
				1480	horz_clamp_iterator32 ci(c);
				1481	while (di.count--) {
				1482	uint32_t s = ci.get_pixel32();
				1483	bl.write(s, di.dst);
				1484	di.dst++;
				1485	}
				1486	} else {
				1487	clamp_iterator ci(c);
				1488	while (di.count--) {
				1489	uint32_t s = ci.get_pixel32();
				1490	bl.write(s, di.dst);
				1491	di.dst++;
				1492	}
				1493	}
				1494	}
				1495
				1496	static void scanline_t32cb16blend_clamp_dither(context_t* c)
				1497	{
				1498	dst_iterator16 di(c);
				1499	ditherer dither(c);
				1500	blender_32to16 bl(c);
				1501
				1502	clamp_iterator ci(c);
				1503	while (di.count--) {
				1504	uint32_t s = ci.get_pixel32();
				1505	bl.write(s, di.dst, dither);
				1506	di.dst++;
				1507	}
				1508	}
				1509
				1510	void scanline_t32cb16blend_clamp_mod(context_t* c)
				1511	{
				1512	dst_iterator16 di(c);
				1513	blender_32to16_modulate bl(c);
				1514
				1515	clamp_iterator ci(c);
				1516	while (di.count--) {
				1517	uint32_t s = ci.get_pixel32();
				1518	bl.write(s, di.dst);
				1519	di.dst++;
				1520	}
				1521	}
				1522
				1523	void scanline_t32cb16blend_clamp_mod_dither(context_t* c)
				1524	{
				1525	dst_iterator16 di(c);
				1526	blender_32to16_modulate bl(c);
				1527	ditherer dither(c);
				1528
				1529	clamp_iterator ci(c);
				1530	while (di.count--) {
				1531	uint32_t s = ci.get_pixel32();
				1532	bl.write(s, di.dst, dither);
				1533	di.dst++;
				1534	}
				1535	}
				1536
				1537	/* Variant of scanline_t32cb16blend_clamp_mod with a xRGB texture */
				1538	void scanline_x32cb16blend_clamp_mod(context_t* c)
				1539	{
				1540	dst_iterator16 di(c);
				1541	blender_x32to16_modulate bl(c);
				1542
				1543	clamp_iterator ci(c);
				1544	while (di.count--) {
				1545	uint32_t s = ci.get_pixel32();
				1546	bl.write(s, di.dst);
				1547	di.dst++;
				1548	}
				1549	}
				1550
				1551	void scanline_x32cb16blend_clamp_mod_dither(context_t* c)
				1552	{
				1553	dst_iterator16 di(c);
				1554	blender_x32to16_modulate bl(c);
				1555	ditherer dither(c);
				1556
				1557	clamp_iterator ci(c);
				1558	while (di.count--) {
				1559	uint32_t s = ci.get_pixel32();
				1560	bl.write(s, di.dst, dither);
				1561	di.dst++;
				1562	}
				1563	}
				1564
				1565	void scanline_t16cb16_clamp(context_t* c)
				1566	{
				1567	dst_iterator16 di(c);
				1568
				1569	/* Special case for simple horizontal scaling */
				1570	if (is_context_horizontal(c)) {
				1571	horz_clamp_iterator16 ci(c);
				1572	while (di.count--) {
				1573	*di.dst++ = ci.get_pixel16();
				1574	}
				1575	} else {
				1576	clamp_iterator ci(c);
				1577	while (di.count--) {
				1578	*di.dst++ = ci.get_pixel16();
				1579	}
				1580	}
				1581	}
				1582
				1583
				1584
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1585	template <typename T, typename U>
				1586	static inline __attribute__((const))
				1587	T interpolate(int y, T v0, U dvdx, U dvdy) {
				1588	// interpolates in pixel's centers
				1589	// v = v0 + (y + 0.5) * dvdy + (0.5 * dvdx)
				1590	return (y * dvdy) + (v0 + ((dvdy + dvdx) >> 1));
				1591	}
				1592
				1593	// ----------------------------------------------------------------------------
				1594	#if 0
				1595	#pragma mark -
				1596	#endif
				1597
				1598	void init_y(context_t* c, int32_t ys)
				1599	{
				1600	const uint32_t enables = c->state.enables;
				1601
				1602	// compute iterators...
				1603	iterators_t& ci = c->iterators;
				1604
				1605	// sample in the center
				1606	ci.y = ys;
				1607
				1608	if (enables & (GGL_ENABLE_DEPTH_TEST\|GGL_ENABLE_W\|GGL_ENABLE_FOG)) {
				1609	ci.ydzdy = interpolate(ys, c->shade.z0, c->shade.dzdx, c->shade.dzdy);
				1610	ci.ydwdy = interpolate(ys, c->shade.w0, c->shade.dwdx, c->shade.dwdy);
				1611	ci.ydfdy = interpolate(ys, c->shade.f0, c->shade.dfdx, c->shade.dfdy);
				1612	}
				1613
				1614	if (ggl_unlikely(enables & GGL_ENABLE_SMOOTH)) {
				1615	ci.ydrdy = interpolate(ys, c->shade.r0, c->shade.drdx, c->shade.drdy);
				1616	ci.ydgdy = interpolate(ys, c->shade.g0, c->shade.dgdx, c->shade.dgdy);
				1617	ci.ydbdy = interpolate(ys, c->shade.b0, c->shade.dbdx, c->shade.dbdy);
				1618	ci.ydady = interpolate(ys, c->shade.a0, c->shade.dadx, c->shade.dady);
				1619	c->step_y = step_y__smooth;
				1620	} else {
				1621	ci.ydrdy = c->shade.r0;
				1622	ci.ydgdy = c->shade.g0;
				1623	ci.ydbdy = c->shade.b0;
				1624	ci.ydady = c->shade.a0;
				1625	// XXX: do only if needed, or make sure this is fast
				1626	c->packed = ggl_pack_color(c, c->state.buffers.color.format,
				1627	ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady);
				1628	c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888,
				1629	ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady);
				1630	}
				1631
				1632	// initialize the variables we need in the shader
				1633	generated_vars_t& gen = c->generated_vars;
				1634	gen.argb[GGLFormat::ALPHA].c = ci.ydady;
				1635	gen.argb[GGLFormat::ALPHA].dx = c->shade.dadx;
				1636	gen.argb[GGLFormat::RED ].c = ci.ydrdy;
				1637	gen.argb[GGLFormat::RED ].dx = c->shade.drdx;
				1638	gen.argb[GGLFormat::GREEN].c = ci.ydgdy;
				1639	gen.argb[GGLFormat::GREEN].dx = c->shade.dgdx;
				1640	gen.argb[GGLFormat::BLUE ].c = ci.ydbdy;
				1641	gen.argb[GGLFormat::BLUE ].dx = c->shade.dbdx;
				1642	gen.dzdx = c->shade.dzdx;
				1643	gen.f = ci.ydfdy;
				1644	gen.dfdx = c->shade.dfdx;
				1645
				1646	if (enables & GGL_ENABLE_TMUS) {
				1647	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1648	texture_t& t = c->state.texture[i];
				1649	if (!t.enable) continue;
				1650
				1651	texture_iterators_t& ti = t.iterators;
				1652	if (t.s_coord == GGL_ONE_TO_ONE && t.t_coord == GGL_ONE_TO_ONE) {
				1653	// we need to set all of these to 0 because in some cases
				1654	// step_y__generic() or step_y__tmu() will be used and
				1655	// therefore will update dtdy, however, in 1:1 mode
				1656	// this is always done by the scanline rasterizer.
				1657	ti.dsdx = ti.dsdy = ti.dtdx = ti.dtdy = 0;
				1658	ti.ydsdy = t.shade.is0;
				1659	ti.ydtdy = t.shade.it0;
				1660	} else {
				1661	const int adjustSWrap = ((t.s_wrap==GGL_CLAMP)?0:16);
				1662	const int adjustTWrap = ((t.t_wrap==GGL_CLAMP)?0:16);
				1663	ti.sscale = t.shade.sscale + adjustSWrap;
				1664	ti.tscale = t.shade.tscale + adjustTWrap;
				1665	if (!(enables & GGL_ENABLE_W)) {
				1666	// S coordinate
				1667	const int32_t sscale = ti.sscale;
				1668	const int32_t sy = interpolate(ys,
				1669	t.shade.is0, t.shade.idsdx, t.shade.idsdy);
				1670	if (sscale>=0) {
				1671	ti.ydsdy= sy << sscale;
				1672	ti.dsdx = t.shade.idsdx << sscale;
				1673	ti.dsdy = t.shade.idsdy << sscale;
				1674	} else {
				1675	ti.ydsdy= sy >> -sscale;
				1676	ti.dsdx = t.shade.idsdx >> -sscale;
				1677	ti.dsdy = t.shade.idsdy >> -sscale;
				1678	}
				1679	// T coordinate
				1680	const int32_t tscale = ti.tscale;
				1681	const int32_t ty = interpolate(ys,
				1682	t.shade.it0, t.shade.idtdx, t.shade.idtdy);
				1683	if (tscale>=0) {
				1684	ti.ydtdy= ty << tscale;
				1685	ti.dtdx = t.shade.idtdx << tscale;
				1686	ti.dtdy = t.shade.idtdy << tscale;
				1687	} else {
				1688	ti.ydtdy= ty >> -tscale;
				1689	ti.dtdx = t.shade.idtdx >> -tscale;
				1690	ti.dtdy = t.shade.idtdy >> -tscale;
				1691	}
				1692	}
				1693	}
				1694	// mirror for generated code...
				1695	generated_tex_vars_t& gen = c->generated_vars.texture[i];
				1696	gen.width = t.surface.width;
				1697	gen.height = t.surface.height;
				1698	gen.stride = t.surface.stride;
				1699	gen.data = int32_t(t.surface.data);
				1700	gen.dsdx = ti.dsdx;
				1701	gen.dtdx = ti.dtdx;
				1702	}
				1703	}
				1704
				1705	// choose the y-stepper
				1706	c->step_y = step_y__nop;
				1707	if (enables & GGL_ENABLE_FOG) {
				1708	c->step_y = step_y__generic;
				1709	} else if (enables & GGL_ENABLE_TMUS) {
				1710	if (enables & GGL_ENABLE_SMOOTH) {
				1711	c->step_y = step_y__generic;
				1712	} else if (enables & GGL_ENABLE_W) {
				1713	c->step_y = step_y__w;
				1714	} else {
				1715	c->step_y = step_y__tmu;
				1716	}
				1717	} else {
				1718	if (enables & GGL_ENABLE_SMOOTH) {
				1719	c->step_y = step_y__smooth;
				1720	}
				1721	}
				1722
				1723	// choose the rectangle blitter
				1724	c->rect = rect_generic;
				1725	if ((c->step_y == step_y__nop) &&
				1726	(c->scanline == scanline_memcpy))
				1727	{
				1728	c->rect = rect_memcpy;
				1729	}
				1730	}
				1731
				1732	void init_y_packed(context_t* c, int32_t y0)
				1733	{
				1734	uint8_t f = c->state.buffers.color.format;
				1735	c->packed = ggl_pack_color(c, f,
				1736	c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0);
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame]	1737	c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888,
				1738	c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	1739	c->iterators.y = y0;
				1740	c->step_y = step_y__nop;
				1741	// choose the rectangle blitter
				1742	c->rect = rect_generic;
				1743	if (c->scanline == scanline_memcpy) {
				1744	c->rect = rect_memcpy;
				1745	}
				1746	}
				1747
				1748	void init_y_noop(context_t* c, int32_t y0)
				1749	{
				1750	c->iterators.y = y0;
				1751	c->step_y = step_y__nop;
				1752	// choose the rectangle blitter
				1753	c->rect = rect_generic;
				1754	if (c->scanline == scanline_memcpy) {
				1755	c->rect = rect_memcpy;
				1756	}
				1757	}
				1758
				1759	void init_y_error(context_t* c, int32_t y0)
				1760	{
				1761	// woooops, shoud never happen,
				1762	// fail gracefully (don't display anything)
				1763	init_y_noop(c, y0);
				1764	LOGE("color-buffer has an invalid format!");
				1765	}
				1766
				1767	// ----------------------------------------------------------------------------
				1768	#if 0
				1769	#pragma mark -
				1770	#endif
				1771
				1772	void step_y__generic(context_t* c)
				1773	{
				1774	const uint32_t enables = c->state.enables;
				1775
				1776	// iterate...
				1777	iterators_t& ci = c->iterators;
				1778	ci.y += 1;
				1779
				1780	if (enables & GGL_ENABLE_SMOOTH) {
				1781	ci.ydrdy += c->shade.drdy;
				1782	ci.ydgdy += c->shade.dgdy;
				1783	ci.ydbdy += c->shade.dbdy;
				1784	ci.ydady += c->shade.dady;
				1785	}
				1786
				1787	const uint32_t mask =
				1788	GGL_ENABLE_DEPTH_TEST \|
				1789	GGL_ENABLE_W \|
				1790	GGL_ENABLE_FOG;
				1791	if (enables & mask) {
				1792	ci.ydzdy += c->shade.dzdy;
				1793	ci.ydwdy += c->shade.dwdy;
				1794	ci.ydfdy += c->shade.dfdy;
				1795	}
				1796
				1797	if ((enables & GGL_ENABLE_TMUS) && (!(enables & GGL_ENABLE_W))) {
				1798	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1799	if (c->state.texture[i].enable) {
				1800	texture_iterators_t& ti = c->state.texture[i].iterators;
				1801	ti.ydsdy += ti.dsdy;
				1802	ti.ydtdy += ti.dtdy;
				1803	}
				1804	}
				1805	}
				1806	}
				1807
				1808	void step_y__nop(context_t* c)
				1809	{
				1810	c->iterators.y += 1;
				1811	c->iterators.ydzdy += c->shade.dzdy;
				1812	}
				1813
				1814	void step_y__smooth(context_t* c)
				1815	{
				1816	iterators_t& ci = c->iterators;
				1817	ci.y += 1;
				1818	ci.ydrdy += c->shade.drdy;
				1819	ci.ydgdy += c->shade.dgdy;
				1820	ci.ydbdy += c->shade.dbdy;
				1821	ci.ydady += c->shade.dady;
				1822	ci.ydzdy += c->shade.dzdy;
				1823	}
				1824
				1825	void step_y__w(context_t* c)
				1826	{
				1827	iterators_t& ci = c->iterators;
				1828	ci.y += 1;
				1829	ci.ydzdy += c->shade.dzdy;
				1830	ci.ydwdy += c->shade.dwdy;
				1831	}
				1832
				1833	void step_y__tmu(context_t* c)
				1834	{
				1835	iterators_t& ci = c->iterators;
				1836	ci.y += 1;
				1837	ci.ydzdy += c->shade.dzdy;
				1838	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1839	if (c->state.texture[i].enable) {
				1840	texture_iterators_t& ti = c->state.texture[i].iterators;
				1841	ti.ydsdy += ti.dsdy;
				1842	ti.ydtdy += ti.dtdy;
				1843	}
				1844	}
				1845	}
				1846
				1847	// ----------------------------------------------------------------------------
				1848	#if 0
				1849	#pragma mark -
				1850	#endif
				1851
				1852	void scanline_perspective(context_t* c)
				1853	{
				1854	struct {
				1855	union {
				1856	struct {
				1857	int32_t s, sq;
				1858	int32_t t, tq;
				1859	};
				1860	struct {
				1861	int32_t v, q;
				1862	} st[2];
				1863	};
				1864	} tc[GGL_TEXTURE_UNIT_COUNT] __attribute__((aligned(16)));
				1865
				1866	// XXX: we should have a special case when dwdx = 0
				1867
				1868	// 32 pixels spans works okay. 16 is a lot better,
				1869	// but hey, it's a software renderer...
				1870	const uint32_t SPAN_BITS = 5;
				1871	const uint32_t ys = c->iterators.y;
				1872	const uint32_t xs = c->iterators.xl;
				1873	const uint32_t x1 = c->iterators.xr;
				1874	const uint32_t xc = x1 - xs;
				1875	uint32_t remainder = xc & ((1<<SPAN_BITS)-1);
				1876	uint32_t numSpans = xc >> SPAN_BITS;
				1877
				1878	const iterators_t& ci = c->iterators;
				1879	int32_t w0 = (xs * c->shade.dwdx) + ci.ydwdy;
				1880	int32_t q0 = gglRecipQ(w0, 30);
				1881	const int iwscale = 32 - gglClz(q0);
				1882
				1883	const int32_t dwdx = c->shade.dwdx << SPAN_BITS;
				1884	int32_t xl = c->iterators.xl;
				1885
				1886	// We process s & t with a loop to reduce the code size
				1887	// (and i-cache pressure).
				1888
				1889	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1890	const texture_t& tmu = c->state.texture[i];
				1891	if (!tmu.enable) continue;
				1892	int32_t s = tmu.shade.is0 +
				1893	(tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) +
				1894	((tmu.shade.idsdx + tmu.shade.idsdy)>>1);
				1895	int32_t t = tmu.shade.it0 +
				1896	(tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) +
				1897	((tmu.shade.idtdx + tmu.shade.idtdy)>>1);
				1898	tc[i].s = s;
				1899	tc[i].t = t;
				1900	tc[i].sq = gglMulx(s, q0, iwscale);
				1901	tc[i].tq = gglMulx(t, q0, iwscale);
				1902	}
				1903
				1904	int32_t span = 0;
				1905	do {
				1906	int32_t w1;
				1907	if (ggl_likely(numSpans)) {
				1908	w1 = w0 + dwdx;
				1909	} else {
				1910	if (remainder) {
				1911	// finish off the scanline...
				1912	span = remainder;
				1913	w1 = (c->shade.dwdx * span) + w0;
				1914	} else {
				1915	break;
				1916	}
				1917	}
				1918	int32_t q1 = gglRecipQ(w1, 30);
				1919	for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
				1920	texture_t& tmu = c->state.texture[i];
				1921	if (!tmu.enable) continue;
				1922	texture_iterators_t& ti = tmu.iterators;
				1923
				1924	for (int j=0 ; j<2 ; j++) {
				1925	int32_t v = tc[i].st[j].v;
				1926	if (span) v += (tmu.shade.st[j].dx)*span;
				1927	else v += (tmu.shade.st[j].dx)<<SPAN_BITS;
				1928	const int32_t v0 = tc[i].st[j].q;
				1929	const int32_t v1 = gglMulx(v, q1, iwscale);
				1930	int32_t dvdx = v1 - v0;
				1931	if (span) dvdx /= span;
				1932	else dvdx >>= SPAN_BITS;
				1933	tc[i].st[j].v = v;
				1934	tc[i].st[j].q = v1;
				1935
				1936	const int scale = ti.st[j].scale + (iwscale - 30);
				1937	if (scale >= 0) {
				1938	ti.st[j].ydvdy = v0 << scale;
				1939	ti.st[j].dvdx = dvdx << scale;
				1940	} else {
				1941	ti.st[j].ydvdy = v0 >> -scale;
				1942	ti.st[j].dvdx = dvdx >> -scale;
				1943	}
				1944	}
				1945	generated_tex_vars_t& gen = c->generated_vars.texture[i];
				1946	gen.dsdx = ti.st[0].dvdx;
				1947	gen.dtdx = ti.st[1].dvdx;
				1948	}
				1949	c->iterators.xl = xl;
				1950	c->iterators.xr = xl = xl + (span ? span : (1<<SPAN_BITS));
				1951	w0 = w1;
				1952	q0 = q1;
				1953	c->span(c);
				1954	} while(numSpans--);
				1955	}
				1956
				1957	void scanline_perspective_single(context_t* c)
				1958	{
				1959	// 32 pixels spans works okay. 16 is a lot better,
				1960	// but hey, it's a software renderer...
				1961	const uint32_t SPAN_BITS = 5;
				1962	const uint32_t ys = c->iterators.y;
				1963	const uint32_t xs = c->iterators.xl;
				1964	const uint32_t x1 = c->iterators.xr;
				1965	const uint32_t xc = x1 - xs;
				1966
				1967	const iterators_t& ci = c->iterators;
				1968	int32_t w = (xs * c->shade.dwdx) + ci.ydwdy;
				1969	int32_t iw = gglRecipQ(w, 30);
				1970	const int iwscale = 32 - gglClz(iw);
				1971
				1972	const int i = 31 - gglClz(c->state.enabled_tmu);
				1973	generated_tex_vars_t& gen = c->generated_vars.texture[i];
				1974	texture_t& tmu = c->state.texture[i];
				1975	texture_iterators_t& ti = tmu.iterators;
				1976	const int sscale = ti.sscale + (iwscale - 30);
				1977	const int tscale = ti.tscale + (iwscale - 30);
				1978	int32_t s = tmu.shade.is0 +
				1979	(tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) +
				1980	((tmu.shade.idsdx + tmu.shade.idsdy)>>1);
				1981	int32_t t = tmu.shade.it0 +
				1982	(tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) +
				1983	((tmu.shade.idtdx + tmu.shade.idtdy)>>1);
				1984	int32_t s0 = gglMulx(s, iw, iwscale);
				1985	int32_t t0 = gglMulx(t, iw, iwscale);
				1986	int32_t xl = c->iterators.xl;
				1987
				1988	int32_t sq, tq, dsdx, dtdx;
				1989	int32_t premainder = xc & ((1<<SPAN_BITS)-1);
				1990	uint32_t numSpans = xc >> SPAN_BITS;
				1991	if (c->shade.dwdx == 0) {
				1992	// XXX: we could choose to do this if the error is small enough
				1993	numSpans = 0;
				1994	premainder = xc;
				1995	goto no_perspective;
				1996	}
				1997
				1998	if (premainder) {
				1999	w += c->shade.dwdx * premainder;
				2000	iw = gglRecipQ(w, 30);
				2001	no_perspective:
				2002	s += tmu.shade.idsdx * premainder;
				2003	t += tmu.shade.idtdx * premainder;
				2004	sq = gglMulx(s, iw, iwscale);
				2005	tq = gglMulx(t, iw, iwscale);
				2006	dsdx = (sq - s0) / premainder;
				2007	dtdx = (tq - t0) / premainder;
				2008	c->iterators.xl = xl;
				2009	c->iterators.xr = xl = xl + premainder;
				2010	goto finish;
				2011	}
				2012
				2013	while (numSpans--) {
				2014	w += c->shade.dwdx << SPAN_BITS;
				2015	s += tmu.shade.idsdx << SPAN_BITS;
				2016	t += tmu.shade.idtdx << SPAN_BITS;
				2017	iw = gglRecipQ(w, 30);
				2018	sq = gglMulx(s, iw, iwscale);
				2019	tq = gglMulx(t, iw, iwscale);
				2020	dsdx = (sq - s0) >> SPAN_BITS;
				2021	dtdx = (tq - t0) >> SPAN_BITS;
				2022	c->iterators.xl = xl;
				2023	c->iterators.xr = xl = xl + (1<<SPAN_BITS);
				2024	finish:
				2025	if (sscale >= 0) {
				2026	ti.ydsdy = s0 << sscale;
				2027	ti.dsdx = dsdx << sscale;
				2028	} else {
				2029	ti.ydsdy = s0 >>-sscale;
				2030	ti.dsdx = dsdx >>-sscale;
				2031	}
				2032	if (tscale >= 0) {
				2033	ti.ydtdy = t0 << tscale;
				2034	ti.dtdx = dtdx << tscale;
				2035	} else {
				2036	ti.ydtdy = t0 >>-tscale;
				2037	ti.dtdx = dtdx >>-tscale;
				2038	}
				2039	s0 = sq;
				2040	t0 = tq;
				2041	gen.dsdx = ti.dsdx;
				2042	gen.dtdx = ti.dtdx;
				2043	c->span(c);
				2044	}
				2045	}
				2046
				2047	// ----------------------------------------------------------------------------
				2048
Martyn Capewell	f9e8ab0	2009-12-07 15:00:19 +0000	[diff] [blame]	2049	void scanline_col32cb16blend(context_t* c)
				2050	{
				2051	int32_t x = c->iterators.xl;
				2052	size_t ct = c->iterators.xr - x;
				2053	int32_t y = c->iterators.y;
				2054	surface_t* cb = &(c->state.buffers.color);
				2055	union {
				2056	uint16_t* dst;
				2057	uint32_t* dst32;
				2058	};
				2059	dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				2060
				2061	#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__))
				2062	#if defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
				2063	scanline_col32cb16blend_neon(dst, &(c->packed8888), ct);
				2064	#else // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
				2065	scanline_col32cb16blend_arm(dst, GGL_RGBA_TO_HOST(c->packed8888), ct);
				2066	#endif // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
				2067	#else
				2068	uint32_t s = GGL_RGBA_TO_HOST(c->packed8888);
				2069	int sA = (s>>24);
				2070	int f = 0x100 - (sA + (sA>>7));
				2071	while (ct--) {
				2072	uint16_t d = *dst;
				2073	int dR = (d>>11)&0x1f;
				2074	int dG = (d>>5)&0x3f;
				2075	int dB = (d)&0x1f;
				2076	int sR = (s >> ( 3))&0x1F;
				2077	int sG = (s >> ( 8+2))&0x3F;
				2078	int sB = (s >> (16+3))&0x1F;
				2079	sR += (f*dR)>>8;
				2080	sG += (f*dG)>>8;
				2081	sB += (f*dB)>>8;
				2082	*dst++ = uint16_t((sR<<11)\|(sG<<5)\|sB);
				2083	}
				2084	#endif
				2085
				2086	}
				2087
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2088	void scanline_t32cb16(context_t* c)
				2089	{
				2090	int32_t x = c->iterators.xl;
				2091	size_t ct = c->iterators.xr - x;
				2092	int32_t y = c->iterators.y;
				2093	surface_t* cb = &(c->state.buffers.color);
				2094	union {
				2095	uint16_t* dst;
				2096	uint32_t* dst32;
				2097	};
				2098	dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				2099
				2100	surface_t* tex = &(c->state.texture[0].surface);
				2101	const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
				2102	const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
				2103	uint32_t src = reinterpret_cast<uint32_t>(tex->data)+(u+(tex->stride*v));
				2104	int sR, sG, sB;
				2105	uint32_t s, d;
				2106
				2107	if (ct==1 \|\| uint32_t(dst)&2) {
				2108	last_one:
				2109	s = GGL_RGBA_TO_HOST( *src++ );
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	2110	*dst++ = convertAbgr8888ToRgb565(s);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2111	ct--;
				2112	}
				2113
				2114	while (ct >= 2) {
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2115	#if BYTE_ORDER == BIG_ENDIAN
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	2116	s = GGL_RGBA_TO_HOST( *src++ );
				2117	d = convertAbgr8888ToRgb565_hi16(s);
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2118
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	2119	s = GGL_RGBA_TO_HOST( *src++ );
				2120	d \|= convertAbgr8888ToRgb565(s);
				2121	#else
				2122	s = GGL_RGBA_TO_HOST( *src++ );
				2123	d = convertAbgr8888ToRgb565(s);
				2124
				2125	s = GGL_RGBA_TO_HOST( *src++ );
				2126	d \|= convertAbgr8888ToRgb565(s) << 16;
				2127	#endif
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2128	*dst32++ = d;
				2129	ct -= 2;
				2130	}
				2131
				2132	if (ct > 0) {
				2133	goto last_one;
				2134	}
				2135	}
				2136
				2137	void scanline_t32cb16blend(context_t* c)
				2138	{
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	2139	#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__))
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2140	int32_t x = c->iterators.xl;
				2141	size_t ct = c->iterators.xr - x;
				2142	int32_t y = c->iterators.y;
				2143	surface_t* cb = &(c->state.buffers.color);
				2144	uint16_t* dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				2145
				2146	surface_t* tex = &(c->state.texture[0].surface);
				2147	const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
				2148	const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
				2149	uint32_t src = reinterpret_cast<uint32_t>(tex->data)+(u+(tex->stride*v));
				2150
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2151	scanline_t32cb16blend_arm(dst, src, ct);
				2152	#else
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	2153	dst_iterator16 di(c);
				2154	horz_iterator32 hi(c);
				2155	blender_32to16 bl(c);
				2156	while (di.count--) {
				2157	uint32_t s = hi.get_pixel32();
				2158	bl.write(s, di.dst);
				2159	di.dst++;
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2160	}
				2161	#endif
				2162	}
				2163
David 'Digit' Turner	39764f4	2011-04-15 20:12:07 +0200	[diff] [blame]	2164	void scanline_t32cb16blend_srca(context_t* c)
				2165	{
				2166	dst_iterator16 di(c);
				2167	horz_iterator32 hi(c);
				2168	blender_32to16_srcA blender(c);
				2169
				2170	while (di.count--) {
				2171	uint32_t s = hi.get_pixel32();
				2172	blender.write(s,di.dst);
				2173	di.dst++;
				2174	}
				2175	}
				2176
				2177	void scanline_t16cb16blend_clamp_mod(context_t* c)
				2178	{
				2179	const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8);
				2180	if (a == 0) {
				2181	return;
				2182	}
				2183
				2184	if (a == 255) {
				2185	scanline_t16cb16_clamp(c);
				2186	return;
				2187	}
				2188
				2189	dst_iterator16 di(c);
				2190	blender_16to16_modulate blender(c);
				2191	clamp_iterator ci(c);
				2192
				2193	while (di.count--) {
				2194	uint16_t s = ci.get_pixel16();
				2195	blender.write(s, di.dst);
				2196	di.dst++;
				2197	}
				2198	}
				2199
The Android Open Source Project	dd7bc33	2009-03-03 19:32:55 -0800	[diff] [blame]	2200	void scanline_memcpy(context_t* c)
				2201	{
				2202	int32_t x = c->iterators.xl;
				2203	size_t ct = c->iterators.xr - x;
				2204	int32_t y = c->iterators.y;
				2205	surface_t* cb = &(c->state.buffers.color);
				2206	const GGLFormat* fp = &(c->formats[cb->format]);
				2207	uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
				2208	(x + (cb->stride * y)) * fp->size;
				2209
				2210	surface_t* tex = &(c->state.texture[0].surface);
				2211	const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
				2212	const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
				2213	uint8_t src = reinterpret_cast<uint8_t>(tex->data) +
				2214	(u + (tex->stride * v)) * fp->size;
				2215
				2216	const size_t size = ct * fp->size;
				2217	memcpy(dst, src, size);
				2218	}
				2219
				2220	void scanline_memset8(context_t* c)
				2221	{
				2222	int32_t x = c->iterators.xl;
				2223	size_t ct = c->iterators.xr - x;
				2224	int32_t y = c->iterators.y;
				2225	surface_t* cb = &(c->state.buffers.color);
				2226	uint8_t* dst = reinterpret_cast<uint8_t>(cb->data) + (x+(cb->stridey));
				2227	uint32_t packed = c->packed;
				2228	memset(dst, packed, ct);
				2229	}
				2230
				2231	void scanline_memset16(context_t* c)
				2232	{
				2233	int32_t x = c->iterators.xl;
				2234	size_t ct = c->iterators.xr - x;
				2235	int32_t y = c->iterators.y;
				2236	surface_t* cb = &(c->state.buffers.color);
				2237	uint16_t* dst = reinterpret_cast<uint16_t>(cb->data) + (x+(cb->stridey));
				2238	uint32_t packed = c->packed;
				2239	android_memset16(dst, packed, ct*2);
				2240	}
				2241
				2242	void scanline_memset32(context_t* c)
				2243	{
				2244	int32_t x = c->iterators.xl;
				2245	size_t ct = c->iterators.xr - x;
				2246	int32_t y = c->iterators.y;
				2247	surface_t* cb = &(c->state.buffers.color);
				2248	uint32_t* dst = reinterpret_cast<uint32_t>(cb->data) + (x+(cb->stridey));
				2249	uint32_t packed = GGL_HOST_TO_RGBA(c->packed);
				2250	android_memset32(dst, packed, ct*4);
				2251	}
				2252
				2253	void scanline_clear(context_t* c)
				2254	{
				2255	int32_t x = c->iterators.xl;
				2256	size_t ct = c->iterators.xr - x;
				2257	int32_t y = c->iterators.y;
				2258	surface_t* cb = &(c->state.buffers.color);
				2259	const GGLFormat* fp = &(c->formats[cb->format]);
				2260	uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
				2261	(x + (cb->stride * y)) * fp->size;
				2262	const size_t size = ct * fp->size;
				2263	memset(dst, 0, size);
				2264	}
				2265
				2266	void scanline_set(context_t* c)
				2267	{
				2268	int32_t x = c->iterators.xl;
				2269	size_t ct = c->iterators.xr - x;
				2270	int32_t y = c->iterators.y;
				2271	surface_t* cb = &(c->state.buffers.color);
				2272	const GGLFormat* fp = &(c->formats[cb->format]);
				2273	uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
				2274	(x + (cb->stride * y)) * fp->size;
				2275	const size_t size = ct * fp->size;
				2276	memset(dst, 0xFF, size);
				2277	}
				2278
				2279	void scanline_noop(context_t* c)
				2280	{
				2281	}
				2282
				2283	void rect_generic(context_t* c, size_t yc)
				2284	{
				2285	do {
				2286	c->scanline(c);
				2287	c->step_y(c);
				2288	} while (--yc);
				2289	}
				2290
				2291	void rect_memcpy(context_t* c, size_t yc)
				2292	{
				2293	int32_t x = c->iterators.xl;
				2294	size_t ct = c->iterators.xr - x;
				2295	int32_t y = c->iterators.y;
				2296	surface_t* cb = &(c->state.buffers.color);
				2297	const GGLFormat* fp = &(c->formats[cb->format]);
				2298	uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
				2299	(x + (cb->stride * y)) * fp->size;
				2300
				2301	surface_t* tex = &(c->state.texture[0].surface);
				2302	const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
				2303	const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
				2304	uint8_t src = reinterpret_cast<uint8_t>(tex->data) +
				2305	(u + (tex->stride * v)) * fp->size;
				2306
				2307	if (cb->stride == tex->stride && ct == size_t(cb->stride)) {
				2308	memcpy(dst, src, ct * fp->size * yc);
				2309	} else {
				2310	const size_t size = ct * fp->size;
				2311	const size_t dbpr = cb->stride * fp->size;
				2312	const size_t sbpr = tex->stride * fp->size;
				2313	do {
				2314	memcpy(dst, src, size);
				2315	dst += dbpr;
				2316	src += sbpr;
				2317	} while (--yc);
				2318	}
				2319	}
				2320	// ----------------------------------------------------------------------------
				2321	}; // namespace android
				2322