blob: aa1836053eade5233fda9174b03c851bafaa6fd2 [file] [log] [blame]
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08001/* libs/pixelflinger/scanline.cpp
2**
David 'Digit' Turner39764f42011-04-15 20:12:07 +02003** Copyright 2006-2011, The Android Open Source Project
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08004**
5** Licensed under the Apache License, Version 2.0 (the "License");
6** you may not use this file except in compliance with the License.
7** You may obtain a copy of the License at
8**
9** http://www.apache.org/licenses/LICENSE-2.0
10**
11** Unless required by applicable law or agreed to in writing, software
12** distributed under the License is distributed on an "AS IS" BASIS,
13** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14** See the License for the specific language governing permissions and
15** limitations under the License.
16*/
17
18
19#define LOG_TAG "pixelflinger"
20
21#include <assert.h>
22#include <stdlib.h>
23#include <stdio.h>
24#include <string.h>
25
26#include <cutils/memory.h>
27#include <cutils/log.h>
28
29#include "buffer.h"
30#include "scanline.h"
31
32#include "codeflinger/CodeCache.h"
33#include "codeflinger/GGLAssembler.h"
Ashok Bhat658f89d2013-02-28 18:32:03 +000034#if defined(__arm__)
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080035#include "codeflinger/ARMAssembler.h"
Ashok Bhat658f89d2013-02-28 18:32:03 +000036#elif defined(__aarch64__)
Colin Crossd4146e62014-01-21 20:12:28 -080037#include "codeflinger/Arm64Assembler.h"
Duane Sand734f50c2014-06-28 18:55:26 -070038#elif defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6
Paul Lind2bc2b792012-02-01 10:54:19 -080039#include "codeflinger/MIPSAssembler.h"
Elliott Hughes606d4ae2015-11-05 18:55:20 +000040#elif defined(__mips__) && defined(__LP64__)
41#include "codeflinger/MIPS64Assembler.h"
Paul Lind2bc2b792012-02-01 10:54:19 -080042#endif
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080043//#include "codeflinger/ARMAssemblerOptimizer.h"
44
45// ----------------------------------------------------------------------------
46
47#define ANDROID_CODEGEN_GENERIC 0 // force generic pixel pipeline
48#define ANDROID_CODEGEN_C 1 // hand-written C, fallback generic
49#define ANDROID_CODEGEN_ASM 2 // hand-written asm, fallback generic
50#define ANDROID_CODEGEN_GENERATED 3 // hand-written asm, fallback codegen
51
52#ifdef NDEBUG
53# define ANDROID_RELEASE
54# define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED
55#else
56# define ANDROID_DEBUG
57# define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED
58#endif
59
Elliott Hughes606d4ae2015-11-05 18:55:20 +000060#if defined(__arm__) || (defined(__mips__) && ((!defined(__LP64__) && __mips_isa_rev < 6) || defined(__LP64__))) || defined(__aarch64__)
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080061# define ANDROID_ARM_CODEGEN 1
62#else
63# define ANDROID_ARM_CODEGEN 0
64#endif
65
66#define DEBUG__CODEGEN_ONLY 0
67
David 'Digit' Turner39764f42011-04-15 20:12:07 +020068/* Set to 1 to dump to the log the states that need a new
69 * code-generated scanline callback, i.e. those that don't
70 * have a corresponding shortcut function.
71 */
72#define DEBUG_NEEDS 0
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080073
Elliott Hughes606d4ae2015-11-05 18:55:20 +000074#if defined( __mips__) && ((!defined(__LP64__) && __mips_isa_rev < 6) || defined(__LP64__))
Paul Lind2bc2b792012-02-01 10:54:19 -080075#define ASSEMBLY_SCRATCH_SIZE 4096
Ashok Bhat658f89d2013-02-28 18:32:03 +000076#elif defined(__aarch64__)
77#define ASSEMBLY_SCRATCH_SIZE 8192
Paul Lind2bc2b792012-02-01 10:54:19 -080078#else
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080079#define ASSEMBLY_SCRATCH_SIZE 2048
Paul Lind2bc2b792012-02-01 10:54:19 -080080#endif
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -080081
82// ----------------------------------------------------------------------------
83namespace android {
84// ----------------------------------------------------------------------------
85
86static void init_y(context_t*, int32_t);
87static void init_y_noop(context_t*, int32_t);
88static void init_y_packed(context_t*, int32_t);
89static void init_y_error(context_t*, int32_t);
90
91static void step_y__generic(context_t* c);
92static void step_y__nop(context_t*);
93static void step_y__smooth(context_t* c);
94static void step_y__tmu(context_t* c);
95static void step_y__w(context_t* c);
96
97static void scanline(context_t* c);
98static void scanline_perspective(context_t* c);
99static void scanline_perspective_single(context_t* c);
100static void scanline_t32cb16blend(context_t* c);
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200101static void scanline_t32cb16blend_dither(context_t* c);
102static void scanline_t32cb16blend_srca(context_t* c);
103static void scanline_t32cb16blend_clamp(context_t* c);
104static void scanline_t32cb16blend_clamp_dither(context_t* c);
105static void scanline_t32cb16blend_clamp_mod(context_t* c);
106static void scanline_x32cb16blend_clamp_mod(context_t* c);
107static void scanline_t32cb16blend_clamp_mod_dither(context_t* c);
108static void scanline_x32cb16blend_clamp_mod_dither(context_t* c);
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800109static void scanline_t32cb16(context_t* c);
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200110static void scanline_t32cb16_dither(context_t* c);
111static void scanline_t32cb16_clamp(context_t* c);
112static void scanline_t32cb16_clamp_dither(context_t* c);
Martyn Capewellf9e8ab02009-12-07 15:00:19 +0000113static void scanline_col32cb16blend(context_t* c);
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200114static void scanline_t16cb16_clamp(context_t* c);
115static void scanline_t16cb16blend_clamp_mod(context_t* c);
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800116static void scanline_memcpy(context_t* c);
117static void scanline_memset8(context_t* c);
118static void scanline_memset16(context_t* c);
119static void scanline_memset32(context_t* c);
120static void scanline_noop(context_t* c);
121static void scanline_set(context_t* c);
122static void scanline_clear(context_t* c);
123
124static void rect_generic(context_t* c, size_t yc);
125static void rect_memcpy(context_t* c, size_t yc);
126
Duane Sand068f9f32012-05-24 22:09:24 -0700127#if defined( __arm__)
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800128extern "C" void scanline_t32cb16blend_arm(uint16_t*, uint32_t*, size_t);
129extern "C" void scanline_t32cb16_arm(uint16_t *dst, uint32_t *src, size_t ct);
Martyn Capewellf9e8ab02009-12-07 15:00:19 +0000130extern "C" void scanline_col32cb16blend_neon(uint16_t *dst, uint32_t *col, size_t ct);
131extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct);
Ashok Bhat658f89d2013-02-28 18:32:03 +0000132#elif defined(__aarch64__)
Colin Crossd4146e62014-01-21 20:12:28 -0800133extern "C" void scanline_t32cb16blend_arm64(uint16_t*, uint32_t*, size_t);
134extern "C" void scanline_col32cb16blend_arm64(uint16_t *dst, uint32_t col, size_t ct);
Duane Sand734f50c2014-06-28 18:55:26 -0700135#elif defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6
Duane Sand068f9f32012-05-24 22:09:24 -0700136extern "C" void scanline_t32cb16blend_mips(uint16_t*, uint32_t*, size_t);
Elliott Hughes606d4ae2015-11-05 18:55:20 +0000137#elif defined(__mips__) && defined(__LP64__)
138extern "C" void scanline_t32cb16blend_mips64(uint16_t*, uint32_t*, size_t);
139extern "C" void scanline_col32cb16blend_mips64(uint16_t *dst, uint32_t col, size_t ct);
Duane Sand068f9f32012-05-24 22:09:24 -0700140#endif
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800141
142// ----------------------------------------------------------------------------
143
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200144static inline uint16_t convertAbgr8888ToRgb565(uint32_t pix)
145{
146 return uint16_t( ((pix << 8) & 0xf800) |
147 ((pix >> 5) & 0x07e0) |
148 ((pix >> 19) & 0x001f) );
149}
150
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800151struct shortcut_t {
152 needs_filter_t filter;
153 const char* desc;
154 void (*scanline)(context_t*);
155 void (*init_y)(context_t*, int32_t);
156};
157
158// Keep in sync with needs
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200159
160/* To understand the values here, have a look at:
161 * system/core/include/private/pixelflinger/ggl_context.h
162 *
163 * Especially the lines defining and using GGL_RESERVE_NEEDS
164 *
165 * Quick reminders:
166 * - the last nibble of the first value is the destination buffer format.
167 * - the last nibble of the third value is the source texture format
168 * - formats: 4=rgb565 1=abgr8888 2=xbgr8888
169 *
170 * In the descriptions below:
171 *
172 * SRC means we copy the source pixels to the destination
173 *
174 * SRC_OVER means we blend the source pixels to the destination
175 * with dstFactor = 1-srcA, srcFactor=1 (premultiplied source).
176 * This mode is otherwise called 'blend'.
177 *
178 * SRCA_OVER means we blend the source pixels to the destination
179 * with dstFactor=srcA*(1-srcA) srcFactor=srcA (non-premul source).
180 * This mode is otherwise called 'blend_srca'
181 *
182 * clamp means we fetch source pixels from a texture with u/v clamping
183 *
184 * mod means the source pixels are modulated (multiplied) by the
185 * a/r/g/b of the current context's color. Typically used for
186 * fade-in / fade-out.
187 *
188 * dither means we dither 32 bit values to 16 bits
189 */
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800190static shortcut_t shortcuts[] = {
191 { { { 0x03515104, 0x00000077, { 0x00000A01, 0x00000000 } },
192 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200193 "565 fb, 8888 tx, blend SRC_OVER", scanline_t32cb16blend, init_y_noop },
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800194 { { { 0x03010104, 0x00000077, { 0x00000A01, 0x00000000 } },
195 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200196 "565 fb, 8888 tx, SRC", scanline_t32cb16, init_y_noop },
197 /* same as first entry, but with dithering */
198 { { { 0x03515104, 0x00000177, { 0x00000A01, 0x00000000 } },
199 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
200 "565 fb, 8888 tx, blend SRC_OVER dither", scanline_t32cb16blend_dither, init_y_noop },
201 /* same as second entry, but with dithering */
202 { { { 0x03010104, 0x00000177, { 0x00000A01, 0x00000000 } },
203 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
204 "565 fb, 8888 tx, SRC dither", scanline_t32cb16_dither, init_y_noop },
205 /* this is used during the boot animation - CHEAT: ignore dithering */
206 { { { 0x03545404, 0x00000077, { 0x00000A01, 0x00000000 } },
207 { 0xFFFFFFFF, 0xFFFFFEFF, { 0xFFFFFFFF, 0x0000003F } } },
208 "565 fb, 8888 tx, blend dst:ONE_MINUS_SRCA src:SRCA", scanline_t32cb16blend_srca, init_y_noop },
209 /* special case for arbitrary texture coordinates (think scaling) */
210 { { { 0x03515104, 0x00000077, { 0x00000001, 0x00000000 } },
211 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
212 "565 fb, 8888 tx, SRC_OVER clamp", scanline_t32cb16blend_clamp, init_y },
213 { { { 0x03515104, 0x00000177, { 0x00000001, 0x00000000 } },
214 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
215 "565 fb, 8888 tx, SRC_OVER clamp dither", scanline_t32cb16blend_clamp_dither, init_y },
216 /* another case used during emulation */
217 { { { 0x03515104, 0x00000077, { 0x00001001, 0x00000000 } },
218 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
219 "565 fb, 8888 tx, SRC_OVER clamp modulate", scanline_t32cb16blend_clamp_mod, init_y },
220 /* and this */
221 { { { 0x03515104, 0x00000077, { 0x00001002, 0x00000000 } },
222 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
223 "565 fb, x888 tx, SRC_OVER clamp modulate", scanline_x32cb16blend_clamp_mod, init_y },
224 { { { 0x03515104, 0x00000177, { 0x00001001, 0x00000000 } },
225 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
226 "565 fb, 8888 tx, SRC_OVER clamp modulate dither", scanline_t32cb16blend_clamp_mod_dither, init_y },
227 { { { 0x03515104, 0x00000177, { 0x00001002, 0x00000000 } },
228 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
229 "565 fb, x888 tx, SRC_OVER clamp modulate dither", scanline_x32cb16blend_clamp_mod_dither, init_y },
230 { { { 0x03010104, 0x00000077, { 0x00000001, 0x00000000 } },
231 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
232 "565 fb, 8888 tx, SRC clamp", scanline_t32cb16_clamp, init_y },
233 { { { 0x03010104, 0x00000077, { 0x00000002, 0x00000000 } },
234 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
235 "565 fb, x888 tx, SRC clamp", scanline_t32cb16_clamp, init_y },
236 { { { 0x03010104, 0x00000177, { 0x00000001, 0x00000000 } },
237 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
238 "565 fb, 8888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y },
239 { { { 0x03010104, 0x00000177, { 0x00000002, 0x00000000 } },
240 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
241 "565 fb, x888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y },
242 { { { 0x03010104, 0x00000077, { 0x00000004, 0x00000000 } },
243 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
244 "565 fb, 565 tx, SRC clamp", scanline_t16cb16_clamp, init_y },
245 { { { 0x03515104, 0x00000077, { 0x00001004, 0x00000000 } },
246 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } },
247 "565 fb, 565 tx, SRC_OVER clamp", scanline_t16cb16blend_clamp_mod, init_y },
Martyn Capewellf9e8ab02009-12-07 15:00:19 +0000248 { { { 0x03515104, 0x00000077, { 0x00000000, 0x00000000 } },
249 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0xFFFFFFFF } } },
250 "565 fb, 8888 fixed color", scanline_col32cb16blend, init_y_packed },
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800251 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
252 { 0x00000000, 0x00000007, { 0x00000000, 0x00000000 } } },
253 "(nop) alpha test", scanline_noop, init_y_noop },
254 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
255 { 0x00000000, 0x00000070, { 0x00000000, 0x00000000 } } },
256 "(nop) depth test", scanline_noop, init_y_noop },
257 { { { 0x05000000, 0x00000000, { 0x00000000, 0x00000000 } },
258 { 0x0F000000, 0x00000080, { 0x00000000, 0x00000000 } } },
259 "(nop) logic_op", scanline_noop, init_y_noop },
260 { { { 0xF0000000, 0x00000000, { 0x00000000, 0x00000000 } },
261 { 0xF0000000, 0x00000080, { 0x00000000, 0x00000000 } } },
262 "(nop) color mask", scanline_noop, init_y_noop },
263 { { { 0x0F000000, 0x00000077, { 0x00000000, 0x00000000 } },
264 { 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } },
265 "(set) logic_op", scanline_set, init_y_noop },
266 { { { 0x00000000, 0x00000077, { 0x00000000, 0x00000000 } },
267 { 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } },
268 "(clear) logic_op", scanline_clear, init_y_noop },
269 { { { 0x03000000, 0x00000077, { 0x00000000, 0x00000000 } },
270 { 0xFFFFFF00, 0x000000F7, { 0x00000000, 0x00000000 } } },
271 "(clear) blending 0/0", scanline_clear, init_y_noop },
272 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } },
273 { 0x0000003F, 0x00000000, { 0x00000000, 0x00000000 } } },
274 "(error) invalid color-buffer format", scanline_noop, init_y_error },
275};
276static const needs_filter_t noblend1to1 = {
277 // (disregard dithering, see below)
278 { 0x03010100, 0x00000077, { 0x00000A00, 0x00000000 } },
279 { 0xFFFFFFC0, 0xFFFFFEFF, { 0xFFFFFFC0, 0x0000003F } }
280};
281static const needs_filter_t fill16noblend = {
282 { 0x03010100, 0x00000077, { 0x00000000, 0x00000000 } },
283 { 0xFFFFFFC0, 0xFFFFFFFF, { 0x0000003F, 0x0000003F } }
284};
285
286// ----------------------------------------------------------------------------
287
288#if ANDROID_ARM_CODEGEN
Paul Lind2bc2b792012-02-01 10:54:19 -0800289
Elliott Hughes606d4ae2015-11-05 18:55:20 +0000290#if defined(__mips__) && ((!defined(__LP64__) && __mips_isa_rev < 6) || defined(__LP64__))
Paul Lind2bc2b792012-02-01 10:54:19 -0800291static CodeCache gCodeCache(32 * 1024);
Ashok Bhat658f89d2013-02-28 18:32:03 +0000292#elif defined(__aarch64__)
293static CodeCache gCodeCache(48 * 1024);
Paul Lind2bc2b792012-02-01 10:54:19 -0800294#else
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800295static CodeCache gCodeCache(12 * 1024);
Paul Lind2bc2b792012-02-01 10:54:19 -0800296#endif
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800297
298class ScanlineAssembly : public Assembly {
299 AssemblyKey<needs_t> mKey;
300public:
301 ScanlineAssembly(needs_t needs, size_t size)
302 : Assembly(size), mKey(needs) { }
303 const AssemblyKey<needs_t>& key() const { return mKey; }
304};
305#endif
306
307// ----------------------------------------------------------------------------
308
309void ggl_init_scanline(context_t* c)
310{
311 c->init_y = init_y;
312 c->step_y = step_y__generic;
313 c->scanline = scanline;
314}
315
316void ggl_uninit_scanline(context_t* c)
317{
318 if (c->state.buffers.coverage)
319 free(c->state.buffers.coverage);
320#if ANDROID_ARM_CODEGEN
321 if (c->scanline_as)
322 c->scanline_as->decStrong(c);
323#endif
324}
325
326// ----------------------------------------------------------------------------
327
328static void pick_scanline(context_t* c)
329{
330#if (!defined(DEBUG__CODEGEN_ONLY) || (DEBUG__CODEGEN_ONLY == 0))
331
332#if ANDROID_CODEGEN == ANDROID_CODEGEN_GENERIC
333 c->init_y = init_y;
334 c->step_y = step_y__generic;
335 c->scanline = scanline;
336 return;
337#endif
338
339 //printf("*** needs [%08lx:%08lx:%08lx:%08lx]\n",
340 // c->state.needs.n, c->state.needs.p,
341 // c->state.needs.t[0], c->state.needs.t[1]);
342
343 // first handle the special case that we cannot test with a filter
344 const uint32_t cb_format = GGL_READ_NEEDS(CB_FORMAT, c->state.needs.n);
345 if (GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0]) == cb_format) {
346 if (c->state.needs.match(noblend1to1)) {
347 // this will match regardless of dithering state, since both
348 // src and dest have the same format anyway, there is no dithering
349 // to be done.
350 const GGLFormat* f =
351 &(c->formats[GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0])]);
352 if ((f->components == GGL_RGB) ||
353 (f->components == GGL_RGBA) ||
354 (f->components == GGL_LUMINANCE) ||
355 (f->components == GGL_LUMINANCE_ALPHA))
356 {
357 // format must have all of RGB components
358 // (so the current color doesn't show through)
359 c->scanline = scanline_memcpy;
360 c->init_y = init_y_noop;
361 return;
362 }
363 }
364 }
365
366 if (c->state.needs.match(fill16noblend)) {
367 c->init_y = init_y_packed;
368 switch (c->formats[cb_format].size) {
369 case 1: c->scanline = scanline_memset8; return;
370 case 2: c->scanline = scanline_memset16; return;
371 case 4: c->scanline = scanline_memset32; return;
372 }
373 }
374
375 const int numFilters = sizeof(shortcuts)/sizeof(shortcut_t);
376 for (int i=0 ; i<numFilters ; i++) {
377 if (c->state.needs.match(shortcuts[i].filter)) {
378 c->scanline = shortcuts[i].scanline;
379 c->init_y = shortcuts[i].init_y;
380 return;
381 }
382 }
383
Vladimir Chtchetkinedccddee2011-08-29 10:02:24 -0700384#if DEBUG_NEEDS
Steve Block4163b452012-01-04 19:19:03 +0000385 ALOGI("Needs: n=0x%08x p=0x%08x t0=0x%08x t1=0x%08x",
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200386 c->state.needs.n, c->state.needs.p,
387 c->state.needs.t[0], c->state.needs.t[1]);
388#endif
389
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800390#endif // DEBUG__CODEGEN_ONLY
391
392 c->init_y = init_y;
393 c->step_y = step_y__generic;
394
395#if ANDROID_ARM_CODEGEN
396 // we're going to have to generate some code...
397 // here, generate code for our pixel pipeline
398 const AssemblyKey<needs_t> key(c->state.needs);
399 sp<Assembly> assembly = gCodeCache.lookup(key);
400 if (assembly == 0) {
401 // create a new assembly region
402 sp<ScanlineAssembly> a = new ScanlineAssembly(c->state.needs,
403 ASSEMBLY_SCRATCH_SIZE);
404 // initialize our assembler
Paul Lind2bc2b792012-02-01 10:54:19 -0800405#if defined(__arm__)
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800406 GGLAssembler assembler( new ARMAssembler(a) );
407 //GGLAssembler assembler(
408 // new ARMAssemblerOptimizer(new ARMAssembler(a)) );
Paul Lind2bc2b792012-02-01 10:54:19 -0800409#endif
Elliott Hughes606d4ae2015-11-05 18:55:20 +0000410#if defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6
Paul Lind2bc2b792012-02-01 10:54:19 -0800411 GGLAssembler assembler( new ArmToMipsAssembler(a) );
Elliott Hughes606d4ae2015-11-05 18:55:20 +0000412#elif defined(__mips__) && defined(__LP64__)
413 GGLAssembler assembler( new ArmToMips64Assembler(a) );
Ashok Bhat658f89d2013-02-28 18:32:03 +0000414#elif defined(__aarch64__)
Colin Crossd4146e62014-01-21 20:12:28 -0800415 GGLAssembler assembler( new ArmToArm64Assembler(a) );
Paul Lind2bc2b792012-02-01 10:54:19 -0800416#endif
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800417 // generate the scanline code for the given needs
Kévin PETITc2659e72014-02-24 14:20:37 +0000418 bool err = assembler.scanline(c->state.needs, c) != 0;
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800419 if (ggl_likely(!err)) {
420 // finally, cache this assembly
Kévin PETITc2659e72014-02-24 14:20:37 +0000421 err = gCodeCache.cache(a->key(), a) < 0;
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800422 }
423 if (ggl_unlikely(err)) {
Steve Block8aeb6e22012-01-06 14:13:42 +0000424 ALOGE("error generating or caching assembly. Reverting to NOP.");
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800425 c->scanline = scanline_noop;
426 c->init_y = init_y_noop;
427 c->step_y = step_y__nop;
428 return;
429 }
430 assembly = a;
431 }
432
433 // release the previous assembly
434 if (c->scanline_as) {
435 c->scanline_as->decStrong(c);
436 }
437
Steve Block4163b452012-01-04 19:19:03 +0000438 //ALOGI("using generated pixel-pipeline");
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800439 c->scanline_as = assembly.get();
440 c->scanline_as->incStrong(c); // hold on to assembly
441 c->scanline = (void(*)(context_t* c))assembly->base();
442#else
Steve Block4f07a1f2012-01-05 22:25:38 +0000443// ALOGW("using generic (slow) pixel-pipeline");
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800444 c->scanline = scanline;
445#endif
446}
447
448void ggl_pick_scanline(context_t* c)
449{
450 pick_scanline(c);
451 if ((c->state.enables & GGL_ENABLE_W) &&
452 (c->state.enables & GGL_ENABLE_TMUS))
453 {
454 c->span = c->scanline;
455 c->scanline = scanline_perspective;
456 if (!(c->state.enabled_tmu & (c->state.enabled_tmu - 1))) {
457 // only one TMU enabled
458 c->scanline = scanline_perspective_single;
459 }
460 }
461}
462
463// ----------------------------------------------------------------------------
464
465static void blending(context_t* c, pixel_t* fragment, pixel_t* fb);
466static void blend_factor(context_t* c, pixel_t* r, uint32_t factor,
467 const pixel_t* src, const pixel_t* dst);
468static void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv);
469
470#if ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED)
471
472// no need to compile the generic-pipeline, it can't be reached
473void scanline(context_t*)
474{
475}
476
477#else
478
479void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv)
480{
481 if (su && sv) {
482 if (su > sv) {
483 v = ggl_expand(v, sv, su);
484 sv = su;
485 } else if (su < sv) {
486 u = ggl_expand(u, su, sv);
487 su = sv;
488 }
489 }
490}
491
492void blending(context_t* c, pixel_t* fragment, pixel_t* fb)
493{
494 rescale(fragment->c[0], fragment->s[0], fb->c[0], fb->s[0]);
495 rescale(fragment->c[1], fragment->s[1], fb->c[1], fb->s[1]);
496 rescale(fragment->c[2], fragment->s[2], fb->c[2], fb->s[2]);
497 rescale(fragment->c[3], fragment->s[3], fb->c[3], fb->s[3]);
498
499 pixel_t sf, df;
500 blend_factor(c, &sf, c->state.blend.src, fragment, fb);
501 blend_factor(c, &df, c->state.blend.dst, fragment, fb);
502
503 fragment->c[1] =
504 gglMulAddx(fragment->c[1], sf.c[1], gglMulx(fb->c[1], df.c[1]));
505 fragment->c[2] =
506 gglMulAddx(fragment->c[2], sf.c[2], gglMulx(fb->c[2], df.c[2]));
507 fragment->c[3] =
508 gglMulAddx(fragment->c[3], sf.c[3], gglMulx(fb->c[3], df.c[3]));
509
510 if (c->state.blend.alpha_separate) {
511 blend_factor(c, &sf, c->state.blend.src_alpha, fragment, fb);
512 blend_factor(c, &df, c->state.blend.dst_alpha, fragment, fb);
513 }
514
515 fragment->c[0] =
516 gglMulAddx(fragment->c[0], sf.c[0], gglMulx(fb->c[0], df.c[0]));
517
518 // clamp to 1.0
519 if (fragment->c[0] >= (1LU<<fragment->s[0]))
520 fragment->c[0] = (1<<fragment->s[0])-1;
521 if (fragment->c[1] >= (1LU<<fragment->s[1]))
522 fragment->c[1] = (1<<fragment->s[1])-1;
523 if (fragment->c[2] >= (1LU<<fragment->s[2]))
524 fragment->c[2] = (1<<fragment->s[2])-1;
525 if (fragment->c[3] >= (1LU<<fragment->s[3]))
526 fragment->c[3] = (1<<fragment->s[3])-1;
527}
528
529static inline int blendfactor(uint32_t x, uint32_t size, uint32_t def = 0)
530{
531 if (!size)
532 return def;
533
534 // scale to 16 bits
535 if (size > 16) {
536 x >>= (size - 16);
537 } else if (size < 16) {
538 x = ggl_expand(x, size, 16);
539 }
540 x += x >> 15;
541 return x;
542}
543
Ashok Bhat3078b132014-02-17 15:15:46 +0000544void blend_factor(context_t* /*c*/, pixel_t* r,
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -0800545 uint32_t factor, const pixel_t* src, const pixel_t* dst)
546{
547 switch (factor) {
548 case GGL_ZERO:
549 r->c[1] =
550 r->c[2] =
551 r->c[3] =
552 r->c[0] = 0;
553 break;
554 case GGL_ONE:
555 r->c[1] =
556 r->c[2] =
557 r->c[3] =
558 r->c[0] = FIXED_ONE;
559 break;
560 case GGL_DST_COLOR:
561 r->c[1] = blendfactor(dst->c[1], dst->s[1]);
562 r->c[2] = blendfactor(dst->c[2], dst->s[2]);
563 r->c[3] = blendfactor(dst->c[3], dst->s[3]);
564 r->c[0] = blendfactor(dst->c[0], dst->s[0]);
565 break;
566 case GGL_SRC_COLOR:
567 r->c[1] = blendfactor(src->c[1], src->s[1]);
568 r->c[2] = blendfactor(src->c[2], src->s[2]);
569 r->c[3] = blendfactor(src->c[3], src->s[3]);
570 r->c[0] = blendfactor(src->c[0], src->s[0]);
571 break;
572 case GGL_ONE_MINUS_DST_COLOR:
573 r->c[1] = FIXED_ONE - blendfactor(dst->c[1], dst->s[1]);
574 r->c[2] = FIXED_ONE - blendfactor(dst->c[2], dst->s[2]);
575 r->c[3] = FIXED_ONE - blendfactor(dst->c[3], dst->s[3]);
576 r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0]);
577 break;
578 case GGL_ONE_MINUS_SRC_COLOR:
579 r->c[1] = FIXED_ONE - blendfactor(src->c[1], src->s[1]);
580 r->c[2] = FIXED_ONE - blendfactor(src->c[2], src->s[2]);
581 r->c[3] = FIXED_ONE - blendfactor(src->c[3], src->s[3]);
582 r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0]);
583 break;
584 case GGL_SRC_ALPHA:
585 r->c[1] =
586 r->c[2] =
587 r->c[3] =
588 r->c[0] = blendfactor(src->c[0], src->s[0], FIXED_ONE);
589 break;
590 case GGL_ONE_MINUS_SRC_ALPHA:
591 r->c[1] =
592 r->c[2] =
593 r->c[3] =
594 r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0], FIXED_ONE);
595 break;
596 case GGL_DST_ALPHA:
597 r->c[1] =
598 r->c[2] =
599 r->c[3] =
600 r->c[0] = blendfactor(dst->c[0], dst->s[0], FIXED_ONE);
601 break;
602 case GGL_ONE_MINUS_DST_ALPHA:
603 r->c[1] =
604 r->c[2] =
605 r->c[3] =
606 r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0], FIXED_ONE);
607 break;
608 case GGL_SRC_ALPHA_SATURATE:
609 // XXX: GGL_SRC_ALPHA_SATURATE
610 break;
611 }
612}
613
614static GGLfixed wrapping(int32_t coord, uint32_t size, int tx_wrap)
615{
616 GGLfixed d;
617 if (tx_wrap == GGL_REPEAT) {
618 d = (uint32_t(coord)>>16) * size;
619 } else if (tx_wrap == GGL_CLAMP) { // CLAMP_TO_EDGE semantics
620 const GGLfixed clamp_min = FIXED_HALF;
621 const GGLfixed clamp_max = (size << 16) - FIXED_HALF;
622 if (coord < clamp_min) coord = clamp_min;
623 if (coord > clamp_max) coord = clamp_max;
624 d = coord;
625 } else { // 1:1
626 const GGLfixed clamp_min = 0;
627 const GGLfixed clamp_max = (size << 16);
628 if (coord < clamp_min) coord = clamp_min;
629 if (coord > clamp_max) coord = clamp_max;
630 d = coord;
631 }
632 return d;
633}
634
635static inline
636GGLcolor ADJUST_COLOR_ITERATOR(GGLcolor v, GGLcolor dvdx, int len)
637{
638 const int32_t end = dvdx * (len-1) + v;
639 if (end < 0)
640 v -= end;
641 v &= ~(v>>31);
642 return v;
643}
644
645void scanline(context_t* c)
646{
647 const uint32_t enables = c->state.enables;
648 const int xs = c->iterators.xl;
649 const int x1 = c->iterators.xr;
650 int xc = x1 - xs;
651 const int16_t* covPtr = c->state.buffers.coverage + xs;
652
653 // All iterated values are sampled at the pixel center
654
655 // reset iterators for that scanline...
656 GGLcolor r, g, b, a;
657 iterators_t& ci = c->iterators;
658 if (enables & GGL_ENABLE_SMOOTH) {
659 r = (xs * c->shade.drdx) + ci.ydrdy;
660 g = (xs * c->shade.dgdx) + ci.ydgdy;
661 b = (xs * c->shade.dbdx) + ci.ydbdy;
662 a = (xs * c->shade.dadx) + ci.ydady;
663 r = ADJUST_COLOR_ITERATOR(r, c->shade.drdx, xc);
664 g = ADJUST_COLOR_ITERATOR(g, c->shade.dgdx, xc);
665 b = ADJUST_COLOR_ITERATOR(b, c->shade.dbdx, xc);
666 a = ADJUST_COLOR_ITERATOR(a, c->shade.dadx, xc);
667 } else {
668 r = ci.ydrdy;
669 g = ci.ydgdy;
670 b = ci.ydbdy;
671 a = ci.ydady;
672 }
673
674 // z iterators are 1.31
675 GGLfixed z = (xs * c->shade.dzdx) + ci.ydzdy;
676 GGLfixed f = (xs * c->shade.dfdx) + ci.ydfdy;
677
678 struct {
679 GGLfixed s, t;
680 } tc[GGL_TEXTURE_UNIT_COUNT];
681 if (enables & GGL_ENABLE_TMUS) {
682 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
683 if (c->state.texture[i].enable) {
684 texture_iterators_t& ti = c->state.texture[i].iterators;
685 if (enables & GGL_ENABLE_W) {
686 tc[i].s = ti.ydsdy;
687 tc[i].t = ti.ydtdy;
688 } else {
689 tc[i].s = (xs * ti.dsdx) + ti.ydsdy;
690 tc[i].t = (xs * ti.dtdx) + ti.ydtdy;
691 }
692 }
693 }
694 }
695
696 pixel_t fragment;
697 pixel_t texel;
698 pixel_t fb;
699
700 uint32_t x = xs;
701 uint32_t y = c->iterators.y;
702
703 while (xc--) {
704
705 { // just a scope
706
707 // read color (convert to 8 bits by keeping only the integer part)
708 fragment.s[1] = fragment.s[2] =
709 fragment.s[3] = fragment.s[0] = 8;
710 fragment.c[1] = r >> (GGL_COLOR_BITS-8);
711 fragment.c[2] = g >> (GGL_COLOR_BITS-8);
712 fragment.c[3] = b >> (GGL_COLOR_BITS-8);
713 fragment.c[0] = a >> (GGL_COLOR_BITS-8);
714
715 // texturing
716 if (enables & GGL_ENABLE_TMUS) {
717 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
718 texture_t& tx = c->state.texture[i];
719 if (!tx.enable)
720 continue;
721 texture_iterators_t& ti = tx.iterators;
722 int32_t u, v;
723
724 // s-coordinate
725 if (tx.s_coord != GGL_ONE_TO_ONE) {
726 const int w = tx.surface.width;
727 u = wrapping(tc[i].s, w, tx.s_wrap);
728 tc[i].s += ti.dsdx;
729 } else {
730 u = (((tx.shade.is0>>16) + x)<<16) + FIXED_HALF;
731 }
732
733 // t-coordinate
734 if (tx.t_coord != GGL_ONE_TO_ONE) {
735 const int h = tx.surface.height;
736 v = wrapping(tc[i].t, h, tx.t_wrap);
737 tc[i].t += ti.dtdx;
738 } else {
739 v = (((tx.shade.it0>>16) + y)<<16) + FIXED_HALF;
740 }
741
742 // read texture
743 if (tx.mag_filter == GGL_NEAREST &&
744 tx.min_filter == GGL_NEAREST)
745 {
746 u >>= 16;
747 v >>= 16;
748 tx.surface.read(&tx.surface, c, u, v, &texel);
749 } else {
750 const int w = tx.surface.width;
751 const int h = tx.surface.height;
752 u -= FIXED_HALF;
753 v -= FIXED_HALF;
754 int u0 = u >> 16;
755 int v0 = v >> 16;
756 int u1 = u0 + 1;
757 int v1 = v0 + 1;
758 if (tx.s_wrap == GGL_REPEAT) {
759 if (u0<0) u0 += w;
760 if (u1<0) u1 += w;
761 if (u0>=w) u0 -= w;
762 if (u1>=w) u1 -= w;
763 } else {
764 if (u0<0) u0 = 0;
765 if (u1<0) u1 = 0;
766 if (u0>=w) u0 = w-1;
767 if (u1>=w) u1 = w-1;
768 }
769 if (tx.t_wrap == GGL_REPEAT) {
770 if (v0<0) v0 += h;
771 if (v1<0) v1 += h;
772 if (v0>=h) v0 -= h;
773 if (v1>=h) v1 -= h;
774 } else {
775 if (v0<0) v0 = 0;
776 if (v1<0) v1 = 0;
777 if (v0>=h) v0 = h-1;
778 if (v1>=h) v1 = h-1;
779 }
780 pixel_t texels[4];
781 uint32_t mm[4];
782 tx.surface.read(&tx.surface, c, u0, v0, &texels[0]);
783 tx.surface.read(&tx.surface, c, u0, v1, &texels[1]);
784 tx.surface.read(&tx.surface, c, u1, v0, &texels[2]);
785 tx.surface.read(&tx.surface, c, u1, v1, &texels[3]);
786 u = (u >> 12) & 0xF;
787 v = (v >> 12) & 0xF;
788 u += u>>3;
789 v += v>>3;
790 mm[0] = (0x10 - u) * (0x10 - v);
791 mm[1] = (0x10 - u) * v;
792 mm[2] = u * (0x10 - v);
793 mm[3] = 0x100 - (mm[0] + mm[1] + mm[2]);
794 for (int j=0 ; j<4 ; j++) {
795 texel.s[j] = texels[0].s[j];
796 if (!texel.s[j]) continue;
797 texel.s[j] += 8;
798 texel.c[j] = texels[0].c[j]*mm[0] +
799 texels[1].c[j]*mm[1] +
800 texels[2].c[j]*mm[2] +
801 texels[3].c[j]*mm[3] ;
802 }
803 }
804
805 // Texture environnement...
806 for (int j=0 ; j<4 ; j++) {
807 uint32_t& Cf = fragment.c[j];
808 uint32_t& Ct = texel.c[j];
809 uint8_t& sf = fragment.s[j];
810 uint8_t& st = texel.s[j];
811 uint32_t At = texel.c[0];
812 uint8_t sat = texel.s[0];
813 switch (tx.env) {
814 case GGL_REPLACE:
815 if (st) {
816 Cf = Ct;
817 sf = st;
818 }
819 break;
820 case GGL_MODULATE:
821 if (st) {
822 uint32_t factor = Ct + (Ct>>(st-1));
823 Cf = (Cf * factor) >> st;
824 }
825 break;
826 case GGL_DECAL:
827 if (sat) {
828 rescale(Cf, sf, Ct, st);
829 Cf += ((Ct - Cf) * (At + (At>>(sat-1)))) >> sat;
830 }
831 break;
832 case GGL_BLEND:
833 if (st) {
834 uint32_t Cc = tx.env_color[i];
835 if (sf>8) Cc = (Cc * ((1<<sf)-1))>>8;
836 else if (sf<8) Cc = (Cc - (Cc>>(8-sf)))>>(8-sf);
837 uint32_t factor = Ct + (Ct>>(st-1));
838 Cf = ((((1<<st) - factor) * Cf) + Ct*Cc)>>st;
839 }
840 break;
841 case GGL_ADD:
842 if (st) {
843 rescale(Cf, sf, Ct, st);
844 Cf += Ct;
845 }
846 break;
847 }
848 }
849 }
850 }
851
852 // coverage application
853 if (enables & GGL_ENABLE_AA) {
854 int16_t cf = *covPtr++;
855 fragment.c[0] = (int64_t(fragment.c[0]) * cf) >> 15;
856 }
857
858 // alpha-test
859 if (enables & GGL_ENABLE_ALPHA_TEST) {
860 GGLcolor ref = c->state.alpha_test.ref;
861 GGLcolor alpha = (uint64_t(fragment.c[0]) *
862 ((1<<GGL_COLOR_BITS)-1)) / ((1<<fragment.s[0])-1);
863 switch (c->state.alpha_test.func) {
864 case GGL_NEVER: goto discard;
865 case GGL_LESS: if (alpha<ref) break; goto discard;
866 case GGL_EQUAL: if (alpha==ref) break; goto discard;
867 case GGL_LEQUAL: if (alpha<=ref) break; goto discard;
868 case GGL_GREATER: if (alpha>ref) break; goto discard;
869 case GGL_NOTEQUAL: if (alpha!=ref) break; goto discard;
870 case GGL_GEQUAL: if (alpha>=ref) break; goto discard;
871 }
872 }
873
874 // depth test
875 if (c->state.buffers.depth.format) {
876 if (enables & GGL_ENABLE_DEPTH_TEST) {
877 surface_t* cb = &(c->state.buffers.depth);
878 uint16_t* p = (uint16_t*)(cb->data)+(x+(cb->stride*y));
879 uint16_t zz = uint32_t(z)>>(16);
880 uint16_t depth = *p;
881 switch (c->state.depth_test.func) {
882 case GGL_NEVER: goto discard;
883 case GGL_LESS: if (zz<depth) break; goto discard;
884 case GGL_EQUAL: if (zz==depth) break; goto discard;
885 case GGL_LEQUAL: if (zz<=depth) break; goto discard;
886 case GGL_GREATER: if (zz>depth) break; goto discard;
887 case GGL_NOTEQUAL: if (zz!=depth) break; goto discard;
888 case GGL_GEQUAL: if (zz>=depth) break; goto discard;
889 }
890 // depth buffer is not enabled, if depth-test is not enabled
891/*
892 fragment.s[1] = fragment.s[2] =
893 fragment.s[3] = fragment.s[0] = 8;
894 fragment.c[1] =
895 fragment.c[2] =
896 fragment.c[3] =
897 fragment.c[0] = 255 - (zz>>8);
898*/
899 if (c->state.mask.depth) {
900 *p = zz;
901 }
902 }
903 }
904
905 // fog
906 if (enables & GGL_ENABLE_FOG) {
907 for (int i=1 ; i<=3 ; i++) {
908 GGLfixed fc = (c->state.fog.color[i] * 0x10000) / 0xFF;
909 uint32_t& c = fragment.c[i];
910 uint8_t& s = fragment.s[i];
911 c = (c * 0x10000) / ((1<<s)-1);
912 c = gglMulAddx(c, f, gglMulx(fc, 0x10000 - f));
913 s = 16;
914 }
915 }
916
917 // blending
918 if (enables & GGL_ENABLE_BLENDING) {
919 fb.c[1] = fb.c[2] = fb.c[3] = fb.c[0] = 0; // placate valgrind
920 fb.s[1] = fb.s[2] = fb.s[3] = fb.s[0] = 0;
921 c->state.buffers.color.read(
922 &(c->state.buffers.color), c, x, y, &fb);
923 blending( c, &fragment, &fb );
924 }
925
926 // write
927 c->state.buffers.color.write(
928 &(c->state.buffers.color), c, x, y, &fragment);
929 }
930
931discard:
932 // iterate...
933 x += 1;
934 if (enables & GGL_ENABLE_SMOOTH) {
935 r += c->shade.drdx;
936 g += c->shade.dgdx;
937 b += c->shade.dbdx;
938 a += c->shade.dadx;
939 }
940 z += c->shade.dzdx;
941 f += c->shade.dfdx;
942 }
943}
944
945#endif // ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED)
946
947// ----------------------------------------------------------------------------
948#if 0
949#pragma mark -
950#pragma mark Scanline
951#endif
952
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200953/* Used to parse a 32-bit source texture linearly. Usage is:
954 *
955 * horz_iterator32 hi(context);
956 * while (...) {
957 * uint32_t src_pixel = hi.get_pixel32();
958 * ...
959 * }
960 *
961 * Use only for one-to-one texture mapping.
962 */
963struct horz_iterator32 {
Chih-Hung Hsieh75935ef2016-04-25 15:28:36 -0700964 explicit horz_iterator32(context_t* c) {
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200965 const int x = c->iterators.xl;
966 const int y = c->iterators.y;
967 texture_t& tx = c->state.texture[0];
968 const int32_t u = (tx.shade.is0>>16) + x;
969 const int32_t v = (tx.shade.it0>>16) + y;
970 m_src = reinterpret_cast<uint32_t*>(tx.surface.data)+(u+(tx.surface.stride*v));
971 }
972 uint32_t get_pixel32() {
973 return *m_src++;
974 }
975protected:
976 uint32_t* m_src;
977};
978
979/* A variant for 16-bit source textures. */
980struct horz_iterator16 {
Chih-Hung Hsieh75935ef2016-04-25 15:28:36 -0700981 explicit horz_iterator16(context_t* c) {
David 'Digit' Turner39764f42011-04-15 20:12:07 +0200982 const int x = c->iterators.xl;
983 const int y = c->iterators.y;
984 texture_t& tx = c->state.texture[0];
985 const int32_t u = (tx.shade.is0>>16) + x;
986 const int32_t v = (tx.shade.it0>>16) + y;
987 m_src = reinterpret_cast<uint16_t*>(tx.surface.data)+(u+(tx.surface.stride*v));
988 }
989 uint16_t get_pixel16() {
990 return *m_src++;
991 }
992protected:
993 uint16_t* m_src;
994};
995
996/* A clamp iterator is used to iterate inside a texture with GGL_CLAMP.
997 * After initialization, call get_src16() or get_src32() to get the current
998 * texture pixel value.
999 */
1000struct clamp_iterator {
Chih-Hung Hsieh75935ef2016-04-25 15:28:36 -07001001 explicit clamp_iterator(context_t* c) {
David 'Digit' Turner39764f42011-04-15 20:12:07 +02001002 const int xs = c->iterators.xl;
1003 texture_t& tx = c->state.texture[0];
1004 texture_iterators_t& ti = tx.iterators;
1005 m_s = (xs * ti.dsdx) + ti.ydsdy;
1006 m_t = (xs * ti.dtdx) + ti.ydtdy;
1007 m_ds = ti.dsdx;
1008 m_dt = ti.dtdx;
1009 m_width_m1 = tx.surface.width - 1;
1010 m_height_m1 = tx.surface.height - 1;
1011 m_data = tx.surface.data;
1012 m_stride = tx.surface.stride;
1013 }
1014 uint16_t get_pixel16() {
1015 int u, v;
1016 get_uv(u, v);
1017 uint16_t* src = reinterpret_cast<uint16_t*>(m_data) + (u + (m_stride*v));
1018 return src[0];
1019 }
1020 uint32_t get_pixel32() {
1021 int u, v;
1022 get_uv(u, v);
1023 uint32_t* src = reinterpret_cast<uint32_t*>(m_data) + (u + (m_stride*v));
1024 return src[0];
1025 }
1026private:
1027 void get_uv(int& u, int& v) {
1028 int uu = m_s >> 16;
1029 int vv = m_t >> 16;
1030 if (uu < 0)
1031 uu = 0;
1032 if (uu > m_width_m1)
1033 uu = m_width_m1;
1034 if (vv < 0)
1035 vv = 0;
1036 if (vv > m_height_m1)
1037 vv = m_height_m1;
1038 u = uu;
1039 v = vv;
1040 m_s += m_ds;
1041 m_t += m_dt;
1042 }
1043
1044 GGLfixed m_s, m_t;
1045 GGLfixed m_ds, m_dt;
1046 int m_width_m1, m_height_m1;
1047 uint8_t* m_data;
1048 int m_stride;
1049};
1050
1051/*
1052 * The 'horizontal clamp iterator' variant corresponds to the case where
1053 * the 'v' coordinate doesn't change. This is useful to avoid one mult and
1054 * extra adds / checks per pixels, if the blending/processing operation after
1055 * this is very fast.
1056 */
1057static int is_context_horizontal(const context_t* c) {
1058 return (c->state.texture[0].iterators.dtdx == 0);
1059}
1060
1061struct horz_clamp_iterator {
1062 uint16_t get_pixel16() {
1063 int u = m_s >> 16;
1064 m_s += m_ds;
1065 if (u < 0)
1066 u = 0;
1067 if (u > m_width_m1)
1068 u = m_width_m1;
1069 const uint16_t* src = reinterpret_cast<const uint16_t*>(m_data);
1070 return src[u];
1071 }
1072 uint32_t get_pixel32() {
1073 int u = m_s >> 16;
1074 m_s += m_ds;
1075 if (u < 0)
1076 u = 0;
1077 if (u > m_width_m1)
1078 u = m_width_m1;
1079 const uint32_t* src = reinterpret_cast<const uint32_t*>(m_data);
1080 return src[u];
1081 }
1082protected:
1083 void init(const context_t* c, int shift);
1084 GGLfixed m_s;
1085 GGLfixed m_ds;
1086 int m_width_m1;
1087 const uint8_t* m_data;
1088};
1089
1090void horz_clamp_iterator::init(const context_t* c, int shift)
1091{
1092 const int xs = c->iterators.xl;
1093 const texture_t& tx = c->state.texture[0];
1094 const texture_iterators_t& ti = tx.iterators;
1095 m_s = (xs * ti.dsdx) + ti.ydsdy;
1096 m_ds = ti.dsdx;
1097 m_width_m1 = tx.surface.width-1;
1098 m_data = tx.surface.data;
1099
1100 GGLfixed t = (xs * ti.dtdx) + ti.ydtdy;
1101 int v = t >> 16;
1102 if (v < 0)
1103 v = 0;
1104 else if (v >= (int)tx.surface.height)
1105 v = (int)tx.surface.height-1;
1106
1107 m_data += (tx.surface.stride*v) << shift;
1108}
1109
1110struct horz_clamp_iterator16 : horz_clamp_iterator {
Chih-Hung Hsieh75935ef2016-04-25 15:28:36 -07001111 explicit horz_clamp_iterator16(const context_t* c) {
David 'Digit' Turner39764f42011-04-15 20:12:07 +02001112 init(c,1);
1113 };
1114};
1115
1116struct horz_clamp_iterator32 : horz_clamp_iterator {
Chih-Hung Hsieh75935ef2016-04-25 15:28:36 -07001117 explicit horz_clamp_iterator32(context_t* c) {
David 'Digit' Turner39764f42011-04-15 20:12:07 +02001118 init(c,2);
1119 };
1120};
1121
1122/* This is used to perform dithering operations.
1123 */
1124struct ditherer {
Chih-Hung Hsieh75935ef2016-04-25 15:28:36 -07001125 explicit ditherer(const context_t* c) {
David 'Digit' Turner39764f42011-04-15 20:12:07 +02001126 const int x = c->iterators.xl;
1127 const int y = c->iterators.y;
1128 m_line = &c->ditherMatrix[ ((y & GGL_DITHER_MASK)<<GGL_DITHER_ORDER_SHIFT) ];
1129 m_index = x & GGL_DITHER_MASK;
1130 }
1131 void step(void) {
1132 m_index++;
1133 }
1134 int get_value(void) {
1135 int ret = m_line[m_index & GGL_DITHER_MASK];
1136 m_index++;
1137 return ret;
1138 }
1139 uint16_t abgr8888ToRgb565(uint32_t s) {
1140 uint32_t r = s & 0xff;
1141 uint32_t g = (s >> 8) & 0xff;
1142 uint32_t b = (s >> 16) & 0xff;
1143 return rgb888ToRgb565(r,g,b);
1144 }
1145 /* The following assumes that r/g/b are in the 0..255 range each */
1146 uint16_t rgb888ToRgb565(uint32_t& r, uint32_t& g, uint32_t &b) {
1147 int threshold = get_value();
1148 /* dither in on GGL_DITHER_BITS, and each of r, g, b is on 8 bits */
1149 r += (threshold >> (GGL_DITHER_BITS-8 +5));
1150 g += (threshold >> (GGL_DITHER_BITS-8 +6));
1151 b += (threshold >> (GGL_DITHER_BITS-8 +5));
1152 if (r > 0xff)
1153 r = 0xff;
1154 if (g > 0xff)
1155 g = 0xff;
1156 if (b > 0xff)
1157 b = 0xff;
1158 return uint16_t(((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3));
1159 }
1160protected:
1161 const uint8_t* m_line;
1162 int m_index;
1163};
1164
1165/* This structure is used to blend (SRC_OVER) 32-bit source pixels
1166 * onto 16-bit destination ones. Usage is simply:
1167 *
1168 * blender.blend(<32-bit-src-pixel-value>,<ptr-to-16-bit-dest-pixel>)
1169 */
1170struct blender_32to16 {
Chih-Hung Hsieh75935ef2016-04-25 15:28:36 -07001171 explicit blender_32to16(context_t* /*c*/) { }
David 'Digit' Turner39764f42011-04-15 20:12:07 +02001172 void write(uint32_t s, uint16_t* dst) {
1173 if (s == 0)
1174 return;
1175 s = GGL_RGBA_TO_HOST(s);
1176 int sA = (s>>24);
1177 if (sA == 0xff) {
1178 *dst = convertAbgr8888ToRgb565(s);
1179 } else {
1180 int f = 0x100 - (sA + (sA>>7));
1181 int sR = (s >> ( 3))&0x1F;
1182 int sG = (s >> ( 8+2))&0x3F;
1183 int sB = (s >> (16+3))&0x1F;
1184 uint16_t d = *dst;
1185 int dR = (d>>11)&0x1f;
1186 int dG = (d>>5)&0x3f;
1187 int dB = (d)&0x1f;
1188 sR += (f*dR)>>8;
1189 sG += (f*dG)>>8;
1190 sB += (f*dB)>>8;
1191 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1192 }
1193 }
1194 void write(uint32_t s, uint16_t* dst, ditherer& di) {
1195 if (s == 0) {
1196 di.step();
1197 return;
1198 }
1199 s = GGL_RGBA_TO_HOST(s);
1200 int sA = (s>>24);
1201 if (sA == 0xff) {
1202 *dst = di.abgr8888ToRgb565(s);
1203 } else {
1204 int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
1205 int f = 0x100 - (sA + (sA>>7));
1206 int sR = (s >> ( 3))&0x1F;
1207 int sG = (s >> ( 8+2))&0x3F;
1208 int sB = (s >> (16+3))&0x1F;
1209 uint16_t d = *dst;
1210 int dR = (d>>11)&0x1f;
1211 int dG = (d>>5)&0x3f;
1212 int dB = (d)&0x1f;
1213 sR = ((sR << 8) + f*dR + threshold)>>8;
1214 sG = ((sG << 8) + f*dG + threshold)>>8;
1215 sB = ((sB << 8) + f*dB + threshold)>>8;
1216 if (sR > 0x1f) sR = 0x1f;
1217 if (sG > 0x3f) sG = 0x3f;
1218 if (sB > 0x1f) sB = 0x1f;
1219 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1220 }
1221 }
1222};
1223
1224/* This blender does the same for the 'blend_srca' operation.
1225 * where dstFactor=srcA*(1-srcA) srcFactor=srcA
1226 */
1227struct blender_32to16_srcA {
Chih-Hung Hsieh75935ef2016-04-25 15:28:36 -07001228 explicit blender_32to16_srcA(const context_t* /*c*/) { }
David 'Digit' Turner39764f42011-04-15 20:12:07 +02001229 void write(uint32_t s, uint16_t* dst) {
1230 if (!s) {
1231 return;
1232 }
1233 uint16_t d = *dst;
1234 s = GGL_RGBA_TO_HOST(s);
1235 int sR = (s >> ( 3))&0x1F;
1236 int sG = (s >> ( 8+2))&0x3F;
1237 int sB = (s >> (16+3))&0x1F;
1238 int sA = (s>>24);
1239 int f1 = (sA + (sA>>7));
1240 int f2 = 0x100-f1;
1241 int dR = (d>>11)&0x1f;
1242 int dG = (d>>5)&0x3f;
1243 int dB = (d)&0x1f;
1244 sR = (f1*sR + f2*dR)>>8;
1245 sG = (f1*sG + f2*dG)>>8;
1246 sB = (f1*sB + f2*dB)>>8;
1247 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1248 }
1249};
1250
1251/* Common init code the modulating blenders */
1252struct blender_modulate {
1253 void init(const context_t* c) {
1254 const int r = c->iterators.ydrdy >> (GGL_COLOR_BITS-8);
1255 const int g = c->iterators.ydgdy >> (GGL_COLOR_BITS-8);
1256 const int b = c->iterators.ydbdy >> (GGL_COLOR_BITS-8);
1257 const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8);
1258 m_r = r + (r >> 7);
1259 m_g = g + (g >> 7);
1260 m_b = b + (b >> 7);
1261 m_a = a + (a >> 7);
1262 }
1263protected:
1264 int m_r, m_g, m_b, m_a;
1265};
1266
1267/* This blender does a normal blend after modulation.
1268 */
1269struct blender_32to16_modulate : blender_modulate {
Chih-Hung Hsieh75935ef2016-04-25 15:28:36 -07001270 explicit blender_32to16_modulate(const context_t* c) {
David 'Digit' Turner39764f42011-04-15 20:12:07 +02001271 init(c);
1272 }
1273 void write(uint32_t s, uint16_t* dst) {
1274 // blend source and destination
1275 if (!s) {
1276 return;
1277 }
1278 s = GGL_RGBA_TO_HOST(s);
1279
1280 /* We need to modulate s */
1281 uint32_t sA = (s >> 24);
1282 uint32_t sB = (s >> 16) & 0xff;
1283 uint32_t sG = (s >> 8) & 0xff;
1284 uint32_t sR = s & 0xff;
1285
1286 sA = (sA*m_a) >> 8;
1287 /* Keep R/G/B scaled to 5.8 or 6.8 fixed float format */
1288 sR = (sR*m_r) >> (8 - 5);
1289 sG = (sG*m_g) >> (8 - 6);
1290 sB = (sB*m_b) >> (8 - 5);
1291
1292 /* Now do a normal blend */
1293 int f = 0x100 - (sA + (sA>>7));
1294 uint16_t d = *dst;
1295 int dR = (d>>11)&0x1f;
1296 int dG = (d>>5)&0x3f;
1297 int dB = (d)&0x1f;
1298 sR = (sR + f*dR)>>8;
1299 sG = (sG + f*dG)>>8;
1300 sB = (sB + f*dB)>>8;
1301 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1302 }
1303 void write(uint32_t s, uint16_t* dst, ditherer& di) {
1304 // blend source and destination
1305 if (!s) {
1306 di.step();
1307 return;
1308 }
1309 s = GGL_RGBA_TO_HOST(s);
1310
1311 /* We need to modulate s */
1312 uint32_t sA = (s >> 24);
1313 uint32_t sB = (s >> 16) & 0xff;
1314 uint32_t sG = (s >> 8) & 0xff;
1315 uint32_t sR = s & 0xff;
1316
1317 sA = (sA*m_a) >> 8;
1318 /* keep R/G/B scaled to 5.8 or 6.8 fixed float format */
1319 sR = (sR*m_r) >> (8 - 5);
1320 sG = (sG*m_g) >> (8 - 6);
1321 sB = (sB*m_b) >> (8 - 5);
1322
1323 /* Scale threshold to 0.8 fixed float format */
1324 int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
1325 int f = 0x100 - (sA + (sA>>7));
1326 uint16_t d = *dst;
1327 int dR = (d>>11)&0x1f;
1328 int dG = (d>>5)&0x3f;
1329 int dB = (d)&0x1f;
1330 sR = (sR + f*dR + threshold)>>8;
1331 sG = (sG + f*dG + threshold)>>8;
1332 sB = (sB + f*dB + threshold)>>8;
1333 if (sR > 0x1f) sR = 0x1f;
1334 if (sG > 0x3f) sG = 0x3f;
1335 if (sB > 0x1f) sB = 0x1f;
1336 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1337 }
1338};
1339
1340/* same as 32to16_modulate, except that the input is xRGB, instead of ARGB */
1341struct blender_x32to16_modulate : blender_modulate {
Chih-Hung Hsieh75935ef2016-04-25 15:28:36 -07001342 explicit blender_x32to16_modulate(const context_t* c) {
David 'Digit' Turner39764f42011-04-15 20:12:07 +02001343 init(c);
1344 }
1345 void write(uint32_t s, uint16_t* dst) {
1346 s = GGL_RGBA_TO_HOST(s);
1347
1348 uint32_t sB = (s >> 16) & 0xff;
1349 uint32_t sG = (s >> 8) & 0xff;
1350 uint32_t sR = s & 0xff;
1351
1352 /* Keep R/G/B in 5.8 or 6.8 format */
1353 sR = (sR*m_r) >> (8 - 5);
1354 sG = (sG*m_g) >> (8 - 6);
1355 sB = (sB*m_b) >> (8 - 5);
1356
1357 int f = 0x100 - m_a;
1358 uint16_t d = *dst;
1359 int dR = (d>>11)&0x1f;
1360 int dG = (d>>5)&0x3f;
1361 int dB = (d)&0x1f;
1362 sR = (sR + f*dR)>>8;
1363 sG = (sG + f*dG)>>8;
1364 sB = (sB + f*dB)>>8;
1365 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1366 }
1367 void write(uint32_t s, uint16_t* dst, ditherer& di) {
1368 s = GGL_RGBA_TO_HOST(s);
1369
1370 uint32_t sB = (s >> 16) & 0xff;
1371 uint32_t sG = (s >> 8) & 0xff;
1372 uint32_t sR = s & 0xff;
1373
1374 sR = (sR*m_r) >> (8 - 5);
1375 sG = (sG*m_g) >> (8 - 6);
1376 sB = (sB*m_b) >> (8 - 5);
1377
1378 /* Now do a normal blend */
1379 int threshold = di.get_value() << (8 - GGL_DITHER_BITS);
1380 int f = 0x100 - m_a;
1381 uint16_t d = *dst;
1382 int dR = (d>>11)&0x1f;
1383 int dG = (d>>5)&0x3f;
1384 int dB = (d)&0x1f;
1385 sR = (sR + f*dR + threshold)>>8;
1386 sG = (sG + f*dG + threshold)>>8;
1387 sB = (sB + f*dB + threshold)>>8;
1388 if (sR > 0x1f) sR = 0x1f;
1389 if (sG > 0x3f) sG = 0x3f;
1390 if (sB > 0x1f) sB = 0x1f;
1391 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1392 }
1393};
1394
1395/* Same as above, but source is 16bit rgb565 */
1396struct blender_16to16_modulate : blender_modulate {
Chih-Hung Hsieh75935ef2016-04-25 15:28:36 -07001397 explicit blender_16to16_modulate(const context_t* c) {
David 'Digit' Turner39764f42011-04-15 20:12:07 +02001398 init(c);
1399 }
1400 void write(uint16_t s16, uint16_t* dst) {
1401 uint32_t s = s16;
1402
1403 uint32_t sR = s >> 11;
1404 uint32_t sG = (s >> 5) & 0x3f;
1405 uint32_t sB = s & 0x1f;
1406
1407 sR = (sR*m_r);
1408 sG = (sG*m_g);
1409 sB = (sB*m_b);
1410
1411 int f = 0x100 - m_a;
1412 uint16_t d = *dst;
1413 int dR = (d>>11)&0x1f;
1414 int dG = (d>>5)&0x3f;
1415 int dB = (d)&0x1f;
1416 sR = (sR + f*dR)>>8;
1417 sG = (sG + f*dG)>>8;
1418 sB = (sB + f*dB)>>8;
1419 *dst = uint16_t((sR<<11)|(sG<<5)|sB);
1420 }
1421};
1422
1423/* This is used to iterate over a 16-bit destination color buffer.
1424 * Usage is:
1425 *
1426 * dst_iterator16 di(context);
1427 * while (di.count--) {
1428 * <do stuff with dest pixel at di.dst>
1429 * di.dst++;
1430 * }
1431 */
1432struct dst_iterator16 {
Chih-Hung Hsieh75935ef2016-04-25 15:28:36 -07001433 explicit dst_iterator16(const context_t* c) {
David 'Digit' Turner39764f42011-04-15 20:12:07 +02001434 const int x = c->iterators.xl;
1435 const int width = c->iterators.xr - x;
1436 const int32_t y = c->iterators.y;
1437 const surface_t* cb = &(c->state.buffers.color);
1438 count = width;
1439 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y));
1440 }
1441 int count;
1442 uint16_t* dst;
1443};
1444
1445
1446static void scanline_t32cb16_clamp(context_t* c)
1447{
1448 dst_iterator16 di(c);
1449
1450 if (is_context_horizontal(c)) {
1451 /* Special case for simple horizontal scaling */
1452 horz_clamp_iterator32 ci(c);
1453 while (di.count--) {
1454 uint32_t s = ci.get_pixel32();
1455 *di.dst++ = convertAbgr8888ToRgb565(s);
1456 }
1457 } else {
1458 /* General case */
1459 clamp_iterator ci(c);
1460 while (di.count--) {
1461 uint32_t s = ci.get_pixel32();
1462 *di.dst++ = convertAbgr8888ToRgb565(s);
1463 }
1464 }
1465}
1466
1467static void scanline_t32cb16_dither(context_t* c)
1468{
1469 horz_iterator32 si(c);
1470 dst_iterator16 di(c);
1471 ditherer dither(c);
1472
1473 while (di.count--) {
1474 uint32_t s = si.get_pixel32();
1475 *di.dst++ = dither.abgr8888ToRgb565(s);
1476 }
1477}
1478
1479static void scanline_t32cb16_clamp_dither(context_t* c)
1480{
1481 dst_iterator16 di(c);
1482 ditherer dither(c);
1483
1484 if (is_context_horizontal(c)) {
1485 /* Special case for simple horizontal scaling */
1486 horz_clamp_iterator32 ci(c);
1487 while (di.count--) {
1488 uint32_t s = ci.get_pixel32();
1489 *di.dst++ = dither.abgr8888ToRgb565(s);
1490 }
1491 } else {
1492 /* General case */
1493 clamp_iterator ci(c);
1494 while (di.count--) {
1495 uint32_t s = ci.get_pixel32();
1496 *di.dst++ = dither.abgr8888ToRgb565(s);
1497 }
1498 }
1499}
1500
1501static void scanline_t32cb16blend_dither(context_t* c)
1502{
1503 dst_iterator16 di(c);
1504 ditherer dither(c);
1505 blender_32to16 bl(c);
1506 horz_iterator32 hi(c);
1507 while (di.count--) {
1508 uint32_t s = hi.get_pixel32();
1509 bl.write(s, di.dst, dither);
1510 di.dst++;
1511 }
1512}
1513
1514static void scanline_t32cb16blend_clamp(context_t* c)
1515{
1516 dst_iterator16 di(c);
1517 blender_32to16 bl(c);
1518
1519 if (is_context_horizontal(c)) {
1520 horz_clamp_iterator32 ci(c);
1521 while (di.count--) {
1522 uint32_t s = ci.get_pixel32();
1523 bl.write(s, di.dst);
1524 di.dst++;
1525 }
1526 } else {
1527 clamp_iterator ci(c);
1528 while (di.count--) {
1529 uint32_t s = ci.get_pixel32();
1530 bl.write(s, di.dst);
1531 di.dst++;
1532 }
1533 }
1534}
1535
1536static void scanline_t32cb16blend_clamp_dither(context_t* c)
1537{
1538 dst_iterator16 di(c);
1539 ditherer dither(c);
1540 blender_32to16 bl(c);
1541
1542 clamp_iterator ci(c);
1543 while (di.count--) {
1544 uint32_t s = ci.get_pixel32();
1545 bl.write(s, di.dst, dither);
1546 di.dst++;
1547 }
1548}
1549
1550void scanline_t32cb16blend_clamp_mod(context_t* c)
1551{
1552 dst_iterator16 di(c);
1553 blender_32to16_modulate bl(c);
1554
1555 clamp_iterator ci(c);
1556 while (di.count--) {
1557 uint32_t s = ci.get_pixel32();
1558 bl.write(s, di.dst);
1559 di.dst++;
1560 }
1561}
1562
1563void scanline_t32cb16blend_clamp_mod_dither(context_t* c)
1564{
1565 dst_iterator16 di(c);
1566 blender_32to16_modulate bl(c);
1567 ditherer dither(c);
1568
1569 clamp_iterator ci(c);
1570 while (di.count--) {
1571 uint32_t s = ci.get_pixel32();
1572 bl.write(s, di.dst, dither);
1573 di.dst++;
1574 }
1575}
1576
1577/* Variant of scanline_t32cb16blend_clamp_mod with a xRGB texture */
1578void scanline_x32cb16blend_clamp_mod(context_t* c)
1579{
1580 dst_iterator16 di(c);
1581 blender_x32to16_modulate bl(c);
1582
1583 clamp_iterator ci(c);
1584 while (di.count--) {
1585 uint32_t s = ci.get_pixel32();
1586 bl.write(s, di.dst);
1587 di.dst++;
1588 }
1589}
1590
1591void scanline_x32cb16blend_clamp_mod_dither(context_t* c)
1592{
1593 dst_iterator16 di(c);
1594 blender_x32to16_modulate bl(c);
1595 ditherer dither(c);
1596
1597 clamp_iterator ci(c);
1598 while (di.count--) {
1599 uint32_t s = ci.get_pixel32();
1600 bl.write(s, di.dst, dither);
1601 di.dst++;
1602 }
1603}
1604
1605void scanline_t16cb16_clamp(context_t* c)
1606{
1607 dst_iterator16 di(c);
1608
1609 /* Special case for simple horizontal scaling */
1610 if (is_context_horizontal(c)) {
1611 horz_clamp_iterator16 ci(c);
1612 while (di.count--) {
1613 *di.dst++ = ci.get_pixel16();
1614 }
1615 } else {
1616 clamp_iterator ci(c);
1617 while (di.count--) {
1618 *di.dst++ = ci.get_pixel16();
1619 }
1620 }
1621}
1622
1623
1624
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08001625template <typename T, typename U>
1626static inline __attribute__((const))
1627T interpolate(int y, T v0, U dvdx, U dvdy) {
1628 // interpolates in pixel's centers
1629 // v = v0 + (y + 0.5) * dvdy + (0.5 * dvdx)
1630 return (y * dvdy) + (v0 + ((dvdy + dvdx) >> 1));
1631}
1632
1633// ----------------------------------------------------------------------------
1634#if 0
1635#pragma mark -
1636#endif
1637
1638void init_y(context_t* c, int32_t ys)
1639{
1640 const uint32_t enables = c->state.enables;
1641
1642 // compute iterators...
1643 iterators_t& ci = c->iterators;
1644
1645 // sample in the center
1646 ci.y = ys;
1647
1648 if (enables & (GGL_ENABLE_DEPTH_TEST|GGL_ENABLE_W|GGL_ENABLE_FOG)) {
1649 ci.ydzdy = interpolate(ys, c->shade.z0, c->shade.dzdx, c->shade.dzdy);
1650 ci.ydwdy = interpolate(ys, c->shade.w0, c->shade.dwdx, c->shade.dwdy);
1651 ci.ydfdy = interpolate(ys, c->shade.f0, c->shade.dfdx, c->shade.dfdy);
1652 }
1653
1654 if (ggl_unlikely(enables & GGL_ENABLE_SMOOTH)) {
1655 ci.ydrdy = interpolate(ys, c->shade.r0, c->shade.drdx, c->shade.drdy);
1656 ci.ydgdy = interpolate(ys, c->shade.g0, c->shade.dgdx, c->shade.dgdy);
1657 ci.ydbdy = interpolate(ys, c->shade.b0, c->shade.dbdx, c->shade.dbdy);
1658 ci.ydady = interpolate(ys, c->shade.a0, c->shade.dadx, c->shade.dady);
1659 c->step_y = step_y__smooth;
1660 } else {
1661 ci.ydrdy = c->shade.r0;
1662 ci.ydgdy = c->shade.g0;
1663 ci.ydbdy = c->shade.b0;
1664 ci.ydady = c->shade.a0;
1665 // XXX: do only if needed, or make sure this is fast
1666 c->packed = ggl_pack_color(c, c->state.buffers.color.format,
1667 ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady);
1668 c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888,
1669 ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady);
1670 }
1671
1672 // initialize the variables we need in the shader
1673 generated_vars_t& gen = c->generated_vars;
1674 gen.argb[GGLFormat::ALPHA].c = ci.ydady;
1675 gen.argb[GGLFormat::ALPHA].dx = c->shade.dadx;
1676 gen.argb[GGLFormat::RED ].c = ci.ydrdy;
1677 gen.argb[GGLFormat::RED ].dx = c->shade.drdx;
1678 gen.argb[GGLFormat::GREEN].c = ci.ydgdy;
1679 gen.argb[GGLFormat::GREEN].dx = c->shade.dgdx;
1680 gen.argb[GGLFormat::BLUE ].c = ci.ydbdy;
1681 gen.argb[GGLFormat::BLUE ].dx = c->shade.dbdx;
1682 gen.dzdx = c->shade.dzdx;
1683 gen.f = ci.ydfdy;
1684 gen.dfdx = c->shade.dfdx;
1685
1686 if (enables & GGL_ENABLE_TMUS) {
1687 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
1688 texture_t& t = c->state.texture[i];
1689 if (!t.enable) continue;
1690
1691 texture_iterators_t& ti = t.iterators;
1692 if (t.s_coord == GGL_ONE_TO_ONE && t.t_coord == GGL_ONE_TO_ONE) {
1693 // we need to set all of these to 0 because in some cases
1694 // step_y__generic() or step_y__tmu() will be used and
1695 // therefore will update dtdy, however, in 1:1 mode
1696 // this is always done by the scanline rasterizer.
1697 ti.dsdx = ti.dsdy = ti.dtdx = ti.dtdy = 0;
1698 ti.ydsdy = t.shade.is0;
1699 ti.ydtdy = t.shade.it0;
1700 } else {
1701 const int adjustSWrap = ((t.s_wrap==GGL_CLAMP)?0:16);
1702 const int adjustTWrap = ((t.t_wrap==GGL_CLAMP)?0:16);
1703 ti.sscale = t.shade.sscale + adjustSWrap;
1704 ti.tscale = t.shade.tscale + adjustTWrap;
1705 if (!(enables & GGL_ENABLE_W)) {
1706 // S coordinate
1707 const int32_t sscale = ti.sscale;
1708 const int32_t sy = interpolate(ys,
1709 t.shade.is0, t.shade.idsdx, t.shade.idsdy);
1710 if (sscale>=0) {
1711 ti.ydsdy= sy << sscale;
1712 ti.dsdx = t.shade.idsdx << sscale;
1713 ti.dsdy = t.shade.idsdy << sscale;
1714 } else {
1715 ti.ydsdy= sy >> -sscale;
1716 ti.dsdx = t.shade.idsdx >> -sscale;
1717 ti.dsdy = t.shade.idsdy >> -sscale;
1718 }
1719 // T coordinate
1720 const int32_t tscale = ti.tscale;
1721 const int32_t ty = interpolate(ys,
1722 t.shade.it0, t.shade.idtdx, t.shade.idtdy);
1723 if (tscale>=0) {
1724 ti.ydtdy= ty << tscale;
1725 ti.dtdx = t.shade.idtdx << tscale;
1726 ti.dtdy = t.shade.idtdy << tscale;
1727 } else {
1728 ti.ydtdy= ty >> -tscale;
1729 ti.dtdx = t.shade.idtdx >> -tscale;
1730 ti.dtdy = t.shade.idtdy >> -tscale;
1731 }
1732 }
1733 }
1734 // mirror for generated code...
1735 generated_tex_vars_t& gen = c->generated_vars.texture[i];
1736 gen.width = t.surface.width;
1737 gen.height = t.surface.height;
1738 gen.stride = t.surface.stride;
Ashok Bhatd10afb12013-11-14 11:13:41 +00001739 gen.data = uintptr_t(t.surface.data);
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08001740 gen.dsdx = ti.dsdx;
1741 gen.dtdx = ti.dtdx;
1742 }
1743 }
1744
1745 // choose the y-stepper
1746 c->step_y = step_y__nop;
1747 if (enables & GGL_ENABLE_FOG) {
1748 c->step_y = step_y__generic;
1749 } else if (enables & GGL_ENABLE_TMUS) {
1750 if (enables & GGL_ENABLE_SMOOTH) {
1751 c->step_y = step_y__generic;
1752 } else if (enables & GGL_ENABLE_W) {
1753 c->step_y = step_y__w;
1754 } else {
1755 c->step_y = step_y__tmu;
1756 }
1757 } else {
1758 if (enables & GGL_ENABLE_SMOOTH) {
1759 c->step_y = step_y__smooth;
1760 }
1761 }
1762
1763 // choose the rectangle blitter
1764 c->rect = rect_generic;
1765 if ((c->step_y == step_y__nop) &&
1766 (c->scanline == scanline_memcpy))
1767 {
1768 c->rect = rect_memcpy;
1769 }
1770}
1771
1772void init_y_packed(context_t* c, int32_t y0)
1773{
1774 uint8_t f = c->state.buffers.color.format;
1775 c->packed = ggl_pack_color(c, f,
1776 c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0);
Martyn Capewellf9e8ab02009-12-07 15:00:19 +00001777 c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888,
1778 c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0);
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08001779 c->iterators.y = y0;
1780 c->step_y = step_y__nop;
1781 // choose the rectangle blitter
1782 c->rect = rect_generic;
1783 if (c->scanline == scanline_memcpy) {
1784 c->rect = rect_memcpy;
1785 }
1786}
1787
1788void init_y_noop(context_t* c, int32_t y0)
1789{
1790 c->iterators.y = y0;
1791 c->step_y = step_y__nop;
1792 // choose the rectangle blitter
1793 c->rect = rect_generic;
1794 if (c->scanline == scanline_memcpy) {
1795 c->rect = rect_memcpy;
1796 }
1797}
1798
1799void init_y_error(context_t* c, int32_t y0)
1800{
1801 // woooops, shoud never happen,
1802 // fail gracefully (don't display anything)
1803 init_y_noop(c, y0);
Steve Block8aeb6e22012-01-06 14:13:42 +00001804 ALOGE("color-buffer has an invalid format!");
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08001805}
1806
1807// ----------------------------------------------------------------------------
1808#if 0
1809#pragma mark -
1810#endif
1811
1812void step_y__generic(context_t* c)
1813{
1814 const uint32_t enables = c->state.enables;
1815
1816 // iterate...
1817 iterators_t& ci = c->iterators;
1818 ci.y += 1;
1819
1820 if (enables & GGL_ENABLE_SMOOTH) {
1821 ci.ydrdy += c->shade.drdy;
1822 ci.ydgdy += c->shade.dgdy;
1823 ci.ydbdy += c->shade.dbdy;
1824 ci.ydady += c->shade.dady;
1825 }
1826
1827 const uint32_t mask =
1828 GGL_ENABLE_DEPTH_TEST |
1829 GGL_ENABLE_W |
1830 GGL_ENABLE_FOG;
1831 if (enables & mask) {
1832 ci.ydzdy += c->shade.dzdy;
1833 ci.ydwdy += c->shade.dwdy;
1834 ci.ydfdy += c->shade.dfdy;
1835 }
1836
1837 if ((enables & GGL_ENABLE_TMUS) && (!(enables & GGL_ENABLE_W))) {
1838 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
1839 if (c->state.texture[i].enable) {
1840 texture_iterators_t& ti = c->state.texture[i].iterators;
1841 ti.ydsdy += ti.dsdy;
1842 ti.ydtdy += ti.dtdy;
1843 }
1844 }
1845 }
1846}
1847
1848void step_y__nop(context_t* c)
1849{
1850 c->iterators.y += 1;
1851 c->iterators.ydzdy += c->shade.dzdy;
1852}
1853
1854void step_y__smooth(context_t* c)
1855{
1856 iterators_t& ci = c->iterators;
1857 ci.y += 1;
1858 ci.ydrdy += c->shade.drdy;
1859 ci.ydgdy += c->shade.dgdy;
1860 ci.ydbdy += c->shade.dbdy;
1861 ci.ydady += c->shade.dady;
1862 ci.ydzdy += c->shade.dzdy;
1863}
1864
1865void step_y__w(context_t* c)
1866{
1867 iterators_t& ci = c->iterators;
1868 ci.y += 1;
1869 ci.ydzdy += c->shade.dzdy;
1870 ci.ydwdy += c->shade.dwdy;
1871}
1872
1873void step_y__tmu(context_t* c)
1874{
1875 iterators_t& ci = c->iterators;
1876 ci.y += 1;
1877 ci.ydzdy += c->shade.dzdy;
1878 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
1879 if (c->state.texture[i].enable) {
1880 texture_iterators_t& ti = c->state.texture[i].iterators;
1881 ti.ydsdy += ti.dsdy;
1882 ti.ydtdy += ti.dtdy;
1883 }
1884 }
1885}
1886
1887// ----------------------------------------------------------------------------
1888#if 0
1889#pragma mark -
1890#endif
1891
1892void scanline_perspective(context_t* c)
1893{
1894 struct {
1895 union {
1896 struct {
1897 int32_t s, sq;
1898 int32_t t, tq;
synergy devcd2fe3b2013-11-06 16:30:06 -08001899 } sqtq;
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08001900 struct {
1901 int32_t v, q;
1902 } st[2];
1903 };
1904 } tc[GGL_TEXTURE_UNIT_COUNT] __attribute__((aligned(16)));
1905
1906 // XXX: we should have a special case when dwdx = 0
1907
1908 // 32 pixels spans works okay. 16 is a lot better,
1909 // but hey, it's a software renderer...
1910 const uint32_t SPAN_BITS = 5;
1911 const uint32_t ys = c->iterators.y;
1912 const uint32_t xs = c->iterators.xl;
1913 const uint32_t x1 = c->iterators.xr;
1914 const uint32_t xc = x1 - xs;
1915 uint32_t remainder = xc & ((1<<SPAN_BITS)-1);
1916 uint32_t numSpans = xc >> SPAN_BITS;
1917
1918 const iterators_t& ci = c->iterators;
1919 int32_t w0 = (xs * c->shade.dwdx) + ci.ydwdy;
1920 int32_t q0 = gglRecipQ(w0, 30);
1921 const int iwscale = 32 - gglClz(q0);
1922
1923 const int32_t dwdx = c->shade.dwdx << SPAN_BITS;
1924 int32_t xl = c->iterators.xl;
1925
1926 // We process s & t with a loop to reduce the code size
1927 // (and i-cache pressure).
1928
1929 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
1930 const texture_t& tmu = c->state.texture[i];
1931 if (!tmu.enable) continue;
1932 int32_t s = tmu.shade.is0 +
1933 (tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) +
1934 ((tmu.shade.idsdx + tmu.shade.idsdy)>>1);
1935 int32_t t = tmu.shade.it0 +
1936 (tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) +
1937 ((tmu.shade.idtdx + tmu.shade.idtdy)>>1);
synergy devcd2fe3b2013-11-06 16:30:06 -08001938 tc[i].sqtq.s = s;
1939 tc[i].sqtq.t = t;
1940 tc[i].sqtq.sq = gglMulx(s, q0, iwscale);
1941 tc[i].sqtq.tq = gglMulx(t, q0, iwscale);
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08001942 }
1943
1944 int32_t span = 0;
1945 do {
1946 int32_t w1;
1947 if (ggl_likely(numSpans)) {
1948 w1 = w0 + dwdx;
1949 } else {
1950 if (remainder) {
1951 // finish off the scanline...
1952 span = remainder;
1953 w1 = (c->shade.dwdx * span) + w0;
1954 } else {
1955 break;
1956 }
1957 }
1958 int32_t q1 = gglRecipQ(w1, 30);
1959 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) {
1960 texture_t& tmu = c->state.texture[i];
1961 if (!tmu.enable) continue;
1962 texture_iterators_t& ti = tmu.iterators;
1963
1964 for (int j=0 ; j<2 ; j++) {
1965 int32_t v = tc[i].st[j].v;
1966 if (span) v += (tmu.shade.st[j].dx)*span;
1967 else v += (tmu.shade.st[j].dx)<<SPAN_BITS;
1968 const int32_t v0 = tc[i].st[j].q;
1969 const int32_t v1 = gglMulx(v, q1, iwscale);
1970 int32_t dvdx = v1 - v0;
1971 if (span) dvdx /= span;
1972 else dvdx >>= SPAN_BITS;
1973 tc[i].st[j].v = v;
1974 tc[i].st[j].q = v1;
1975
1976 const int scale = ti.st[j].scale + (iwscale - 30);
1977 if (scale >= 0) {
1978 ti.st[j].ydvdy = v0 << scale;
1979 ti.st[j].dvdx = dvdx << scale;
1980 } else {
1981 ti.st[j].ydvdy = v0 >> -scale;
1982 ti.st[j].dvdx = dvdx >> -scale;
1983 }
1984 }
1985 generated_tex_vars_t& gen = c->generated_vars.texture[i];
1986 gen.dsdx = ti.st[0].dvdx;
1987 gen.dtdx = ti.st[1].dvdx;
1988 }
1989 c->iterators.xl = xl;
1990 c->iterators.xr = xl = xl + (span ? span : (1<<SPAN_BITS));
1991 w0 = w1;
1992 q0 = q1;
1993 c->span(c);
1994 } while(numSpans--);
1995}
1996
1997void scanline_perspective_single(context_t* c)
1998{
1999 // 32 pixels spans works okay. 16 is a lot better,
2000 // but hey, it's a software renderer...
2001 const uint32_t SPAN_BITS = 5;
2002 const uint32_t ys = c->iterators.y;
2003 const uint32_t xs = c->iterators.xl;
2004 const uint32_t x1 = c->iterators.xr;
2005 const uint32_t xc = x1 - xs;
2006
2007 const iterators_t& ci = c->iterators;
2008 int32_t w = (xs * c->shade.dwdx) + ci.ydwdy;
2009 int32_t iw = gglRecipQ(w, 30);
2010 const int iwscale = 32 - gglClz(iw);
2011
2012 const int i = 31 - gglClz(c->state.enabled_tmu);
2013 generated_tex_vars_t& gen = c->generated_vars.texture[i];
2014 texture_t& tmu = c->state.texture[i];
2015 texture_iterators_t& ti = tmu.iterators;
2016 const int sscale = ti.sscale + (iwscale - 30);
2017 const int tscale = ti.tscale + (iwscale - 30);
2018 int32_t s = tmu.shade.is0 +
2019 (tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) +
2020 ((tmu.shade.idsdx + tmu.shade.idsdy)>>1);
2021 int32_t t = tmu.shade.it0 +
2022 (tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) +
2023 ((tmu.shade.idtdx + tmu.shade.idtdy)>>1);
2024 int32_t s0 = gglMulx(s, iw, iwscale);
2025 int32_t t0 = gglMulx(t, iw, iwscale);
2026 int32_t xl = c->iterators.xl;
2027
2028 int32_t sq, tq, dsdx, dtdx;
2029 int32_t premainder = xc & ((1<<SPAN_BITS)-1);
2030 uint32_t numSpans = xc >> SPAN_BITS;
2031 if (c->shade.dwdx == 0) {
2032 // XXX: we could choose to do this if the error is small enough
2033 numSpans = 0;
2034 premainder = xc;
2035 goto no_perspective;
2036 }
2037
2038 if (premainder) {
2039 w += c->shade.dwdx * premainder;
2040 iw = gglRecipQ(w, 30);
2041no_perspective:
2042 s += tmu.shade.idsdx * premainder;
2043 t += tmu.shade.idtdx * premainder;
2044 sq = gglMulx(s, iw, iwscale);
2045 tq = gglMulx(t, iw, iwscale);
2046 dsdx = (sq - s0) / premainder;
2047 dtdx = (tq - t0) / premainder;
2048 c->iterators.xl = xl;
2049 c->iterators.xr = xl = xl + premainder;
2050 goto finish;
2051 }
2052
2053 while (numSpans--) {
2054 w += c->shade.dwdx << SPAN_BITS;
2055 s += tmu.shade.idsdx << SPAN_BITS;
2056 t += tmu.shade.idtdx << SPAN_BITS;
2057 iw = gglRecipQ(w, 30);
2058 sq = gglMulx(s, iw, iwscale);
2059 tq = gglMulx(t, iw, iwscale);
2060 dsdx = (sq - s0) >> SPAN_BITS;
2061 dtdx = (tq - t0) >> SPAN_BITS;
2062 c->iterators.xl = xl;
2063 c->iterators.xr = xl = xl + (1<<SPAN_BITS);
2064finish:
2065 if (sscale >= 0) {
2066 ti.ydsdy = s0 << sscale;
2067 ti.dsdx = dsdx << sscale;
2068 } else {
2069 ti.ydsdy = s0 >>-sscale;
2070 ti.dsdx = dsdx >>-sscale;
2071 }
2072 if (tscale >= 0) {
2073 ti.ydtdy = t0 << tscale;
2074 ti.dtdx = dtdx << tscale;
2075 } else {
2076 ti.ydtdy = t0 >>-tscale;
2077 ti.dtdx = dtdx >>-tscale;
2078 }
2079 s0 = sq;
2080 t0 = tq;
2081 gen.dsdx = ti.dsdx;
2082 gen.dtdx = ti.dtdx;
2083 c->span(c);
2084 }
2085}
2086
2087// ----------------------------------------------------------------------------
2088
Martyn Capewellf9e8ab02009-12-07 15:00:19 +00002089void scanline_col32cb16blend(context_t* c)
2090{
2091 int32_t x = c->iterators.xl;
2092 size_t ct = c->iterators.xr - x;
2093 int32_t y = c->iterators.y;
2094 surface_t* cb = &(c->state.buffers.color);
2095 union {
2096 uint16_t* dst;
2097 uint32_t* dst32;
2098 };
2099 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y));
2100
2101#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__))
2102#if defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
2103 scanline_col32cb16blend_neon(dst, &(c->packed8888), ct);
2104#else // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
2105 scanline_col32cb16blend_arm(dst, GGL_RGBA_TO_HOST(c->packed8888), ct);
2106#endif // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN
Ashok Bhat658f89d2013-02-28 18:32:03 +00002107#elif ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__aarch64__))
Colin Crossd4146e62014-01-21 20:12:28 -08002108 scanline_col32cb16blend_arm64(dst, GGL_RGBA_TO_HOST(c->packed8888), ct);
Elliott Hughes606d4ae2015-11-05 18:55:20 +00002109#elif ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__mips__) && defined(__LP64__)))
2110 scanline_col32cb16blend_mips64(dst, GGL_RGBA_TO_HOST(c->packed8888), ct);
Martyn Capewellf9e8ab02009-12-07 15:00:19 +00002111#else
2112 uint32_t s = GGL_RGBA_TO_HOST(c->packed8888);
2113 int sA = (s>>24);
2114 int f = 0x100 - (sA + (sA>>7));
2115 while (ct--) {
2116 uint16_t d = *dst;
2117 int dR = (d>>11)&0x1f;
2118 int dG = (d>>5)&0x3f;
2119 int dB = (d)&0x1f;
2120 int sR = (s >> ( 3))&0x1F;
2121 int sG = (s >> ( 8+2))&0x3F;
2122 int sB = (s >> (16+3))&0x1F;
2123 sR += (f*dR)>>8;
2124 sG += (f*dG)>>8;
2125 sB += (f*dB)>>8;
2126 *dst++ = uint16_t((sR<<11)|(sG<<5)|sB);
2127 }
2128#endif
2129
2130}
2131
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002132void scanline_t32cb16(context_t* c)
2133{
2134 int32_t x = c->iterators.xl;
2135 size_t ct = c->iterators.xr - x;
2136 int32_t y = c->iterators.y;
2137 surface_t* cb = &(c->state.buffers.color);
2138 union {
2139 uint16_t* dst;
2140 uint32_t* dst32;
2141 };
2142 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y));
2143
2144 surface_t* tex = &(c->state.texture[0].surface);
2145 const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
2146 const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
2147 uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v));
2148 int sR, sG, sB;
2149 uint32_t s, d;
2150
Ashok Bhatd10afb12013-11-14 11:13:41 +00002151 if (ct==1 || uintptr_t(dst)&2) {
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002152last_one:
2153 s = GGL_RGBA_TO_HOST( *src++ );
David 'Digit' Turner39764f42011-04-15 20:12:07 +02002154 *dst++ = convertAbgr8888ToRgb565(s);
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002155 ct--;
2156 }
2157
2158 while (ct >= 2) {
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002159#if BYTE_ORDER == BIG_ENDIAN
David 'Digit' Turner39764f42011-04-15 20:12:07 +02002160 s = GGL_RGBA_TO_HOST( *src++ );
2161 d = convertAbgr8888ToRgb565_hi16(s);
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002162
David 'Digit' Turner39764f42011-04-15 20:12:07 +02002163 s = GGL_RGBA_TO_HOST( *src++ );
2164 d |= convertAbgr8888ToRgb565(s);
2165#else
2166 s = GGL_RGBA_TO_HOST( *src++ );
2167 d = convertAbgr8888ToRgb565(s);
2168
2169 s = GGL_RGBA_TO_HOST( *src++ );
2170 d |= convertAbgr8888ToRgb565(s) << 16;
2171#endif
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002172 *dst32++ = d;
2173 ct -= 2;
2174 }
2175
2176 if (ct > 0) {
2177 goto last_one;
2178 }
2179}
2180
2181void scanline_t32cb16blend(context_t* c)
2182{
Elliott Hughes606d4ae2015-11-05 18:55:20 +00002183#if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__arm__) || defined(__aarch64__) || \
2184 (defined(__mips__) && ((!defined(__LP64__) && __mips_isa_rev < 6) || defined(__LP64__)))))
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002185 int32_t x = c->iterators.xl;
2186 size_t ct = c->iterators.xr - x;
2187 int32_t y = c->iterators.y;
2188 surface_t* cb = &(c->state.buffers.color);
2189 uint16_t* dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y));
2190
2191 surface_t* tex = &(c->state.texture[0].surface);
2192 const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
2193 const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
2194 uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v));
2195
Duane Sand068f9f32012-05-24 22:09:24 -07002196#ifdef __arm__
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002197 scanline_t32cb16blend_arm(dst, src, ct);
Ashok Bhat658f89d2013-02-28 18:32:03 +00002198#elif defined(__aarch64__)
Colin Crossd4146e62014-01-21 20:12:28 -08002199 scanline_t32cb16blend_arm64(dst, src, ct);
Elliott Hughes606d4ae2015-11-05 18:55:20 +00002200#elif defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6
Duane Sand068f9f32012-05-24 22:09:24 -07002201 scanline_t32cb16blend_mips(dst, src, ct);
Elliott Hughes606d4ae2015-11-05 18:55:20 +00002202#elif defined(__mips__) && defined(__LP64__)
2203 scanline_t32cb16blend_mips64(dst, src, ct);
Duane Sand068f9f32012-05-24 22:09:24 -07002204#endif
2205#else
David 'Digit' Turner39764f42011-04-15 20:12:07 +02002206 dst_iterator16 di(c);
2207 horz_iterator32 hi(c);
2208 blender_32to16 bl(c);
2209 while (di.count--) {
2210 uint32_t s = hi.get_pixel32();
2211 bl.write(s, di.dst);
2212 di.dst++;
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002213 }
2214#endif
2215}
2216
David 'Digit' Turner39764f42011-04-15 20:12:07 +02002217void scanline_t32cb16blend_srca(context_t* c)
2218{
2219 dst_iterator16 di(c);
2220 horz_iterator32 hi(c);
2221 blender_32to16_srcA blender(c);
2222
2223 while (di.count--) {
2224 uint32_t s = hi.get_pixel32();
2225 blender.write(s,di.dst);
2226 di.dst++;
2227 }
2228}
2229
2230void scanline_t16cb16blend_clamp_mod(context_t* c)
2231{
2232 const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8);
2233 if (a == 0) {
2234 return;
2235 }
2236
2237 if (a == 255) {
2238 scanline_t16cb16_clamp(c);
2239 return;
2240 }
2241
2242 dst_iterator16 di(c);
2243 blender_16to16_modulate blender(c);
2244 clamp_iterator ci(c);
2245
2246 while (di.count--) {
2247 uint16_t s = ci.get_pixel16();
2248 blender.write(s, di.dst);
2249 di.dst++;
2250 }
2251}
2252
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002253void scanline_memcpy(context_t* c)
2254{
2255 int32_t x = c->iterators.xl;
2256 size_t ct = c->iterators.xr - x;
2257 int32_t y = c->iterators.y;
2258 surface_t* cb = &(c->state.buffers.color);
2259 const GGLFormat* fp = &(c->formats[cb->format]);
2260 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
2261 (x + (cb->stride * y)) * fp->size;
2262
2263 surface_t* tex = &(c->state.texture[0].surface);
2264 const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
2265 const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
2266 uint8_t *src = reinterpret_cast<uint8_t*>(tex->data) +
2267 (u + (tex->stride * v)) * fp->size;
2268
2269 const size_t size = ct * fp->size;
2270 memcpy(dst, src, size);
2271}
2272
2273void scanline_memset8(context_t* c)
2274{
2275 int32_t x = c->iterators.xl;
2276 size_t ct = c->iterators.xr - x;
2277 int32_t y = c->iterators.y;
2278 surface_t* cb = &(c->state.buffers.color);
2279 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + (x+(cb->stride*y));
2280 uint32_t packed = c->packed;
2281 memset(dst, packed, ct);
2282}
2283
2284void scanline_memset16(context_t* c)
2285{
2286 int32_t x = c->iterators.xl;
2287 size_t ct = c->iterators.xr - x;
2288 int32_t y = c->iterators.y;
2289 surface_t* cb = &(c->state.buffers.color);
2290 uint16_t* dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y));
2291 uint32_t packed = c->packed;
2292 android_memset16(dst, packed, ct*2);
2293}
2294
2295void scanline_memset32(context_t* c)
2296{
2297 int32_t x = c->iterators.xl;
2298 size_t ct = c->iterators.xr - x;
2299 int32_t y = c->iterators.y;
2300 surface_t* cb = &(c->state.buffers.color);
2301 uint32_t* dst = reinterpret_cast<uint32_t*>(cb->data) + (x+(cb->stride*y));
2302 uint32_t packed = GGL_HOST_TO_RGBA(c->packed);
2303 android_memset32(dst, packed, ct*4);
2304}
2305
2306void scanline_clear(context_t* c)
2307{
2308 int32_t x = c->iterators.xl;
2309 size_t ct = c->iterators.xr - x;
2310 int32_t y = c->iterators.y;
2311 surface_t* cb = &(c->state.buffers.color);
2312 const GGLFormat* fp = &(c->formats[cb->format]);
2313 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
2314 (x + (cb->stride * y)) * fp->size;
2315 const size_t size = ct * fp->size;
2316 memset(dst, 0, size);
2317}
2318
2319void scanline_set(context_t* c)
2320{
2321 int32_t x = c->iterators.xl;
2322 size_t ct = c->iterators.xr - x;
2323 int32_t y = c->iterators.y;
2324 surface_t* cb = &(c->state.buffers.color);
2325 const GGLFormat* fp = &(c->formats[cb->format]);
2326 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
2327 (x + (cb->stride * y)) * fp->size;
2328 const size_t size = ct * fp->size;
2329 memset(dst, 0xFF, size);
2330}
2331
Ashok Bhat3078b132014-02-17 15:15:46 +00002332void scanline_noop(context_t* /*c*/)
The Android Open Source Projectdd7bc332009-03-03 19:32:55 -08002333{
2334}
2335
2336void rect_generic(context_t* c, size_t yc)
2337{
2338 do {
2339 c->scanline(c);
2340 c->step_y(c);
2341 } while (--yc);
2342}
2343
2344void rect_memcpy(context_t* c, size_t yc)
2345{
2346 int32_t x = c->iterators.xl;
2347 size_t ct = c->iterators.xr - x;
2348 int32_t y = c->iterators.y;
2349 surface_t* cb = &(c->state.buffers.color);
2350 const GGLFormat* fp = &(c->formats[cb->format]);
2351 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) +
2352 (x + (cb->stride * y)) * fp->size;
2353
2354 surface_t* tex = &(c->state.texture[0].surface);
2355 const int32_t u = (c->state.texture[0].shade.is0>>16) + x;
2356 const int32_t v = (c->state.texture[0].shade.it0>>16) + y;
2357 uint8_t *src = reinterpret_cast<uint8_t*>(tex->data) +
2358 (u + (tex->stride * v)) * fp->size;
2359
2360 if (cb->stride == tex->stride && ct == size_t(cb->stride)) {
2361 memcpy(dst, src, ct * fp->size * yc);
2362 } else {
2363 const size_t size = ct * fp->size;
2364 const size_t dbpr = cb->stride * fp->size;
2365 const size_t sbpr = tex->stride * fp->size;
2366 do {
2367 memcpy(dst, src, size);
2368 dst += dbpr;
2369 src += sbpr;
2370 } while (--yc);
2371 }
2372}
2373// ----------------------------------------------------------------------------
2374}; // namespace android
2375