blob: 99dafd331f2247e38adef4c793f7598b1d5d82d6 [file] [log] [blame]
John Bauman89401822014-05-06 15:04:28 -04001// SwiftShader Software Renderer
2//
John Bauman66b8ab22014-05-06 15:57:45 -04003// Copyright(c) 2005-2013 TransGaming Inc.
John Bauman89401822014-05-06 15:04:28 -04004//
5// All rights reserved. No part of this software may be copied, distributed, transmitted,
6// transcribed, stored in a retrieval system, translated into any human or computer
7// language by any means, or disclosed to third parties without the explicit written
8// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9// or implied, including but not limited to any patent rights, are granted to you.
10//
11
12#include "Surface.hpp"
13
14#include "Color.hpp"
15#include "Context.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040016#include "Renderer.hpp"
John Bauman89401822014-05-06 15:04:28 -040017#include "Common/Half.hpp"
18#include "Common/Memory.hpp"
19#include "Common/CPUID.hpp"
20#include "Common/Resource.hpp"
21#include "Common/Debug.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040022#include "Reactor/Reactor.hpp"
John Bauman89401822014-05-06 15:04:28 -040023
24#include <xmmintrin.h>
25#include <emmintrin.h>
26
27#undef min
28#undef max
29
30namespace sw
31{
32 extern bool quadLayoutEnabled;
33 extern bool complementaryDepthBuffer;
34 extern TranscendentalPrecision logPrecision;
35
36 unsigned int *Surface::palette = 0;
37 unsigned int Surface::paletteID = 0;
38
John Bauman19bac1e2014-05-06 15:23:49 -040039 void Rect::clip(int minX, int minY, int maxX, int maxY)
40 {
Nicolas Capens22658242014-11-29 00:31:41 -050041 x0 = clamp(x0, minX, maxX);
42 y0 = clamp(y0, minY, maxY);
43 x1 = clamp(x1, minX, maxX);
44 y1 = clamp(y1, minY, maxY);
John Bauman19bac1e2014-05-06 15:23:49 -040045 }
46
John Bauman89401822014-05-06 15:04:28 -040047 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color)
48 {
49 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
50
51 write(element, color);
52 }
53
54 void Surface::Buffer::write(int x, int y, const Color<float> &color)
55 {
56 void *element = (unsigned char*)buffer + x * bytes + y * pitchB;
57
58 write(element, color);
59 }
60
61 inline void Surface::Buffer::write(void *element, const Color<float> &color)
62 {
63 switch(format)
64 {
65 case FORMAT_A8:
66 *(unsigned char*)element = unorm<8>(color.a);
67 break;
68 case FORMAT_R8:
69 *(unsigned char*)element = unorm<8>(color.r);
70 break;
71 case FORMAT_R3G3B2:
72 *(unsigned char*)element = (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
73 break;
74 case FORMAT_A8R3G3B2:
75 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
76 break;
77 case FORMAT_X4R4G4B4:
78 *(unsigned short*)element = 0xF000 | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
79 break;
80 case FORMAT_A4R4G4B4:
81 *(unsigned short*)element = (unorm<4>(color.a) << 12) | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
82 break;
Nicolas Capens80594422015-06-09 16:42:56 -040083 case FORMAT_R4G4B4A4:
84 *(unsigned short*)element = (unorm<4>(color.r) << 12) | (unorm<4>(color.g) << 8) | (unorm<4>(color.b) << 4) | (unorm<4>(color.a) << 0);
85 break;
John Bauman89401822014-05-06 15:04:28 -040086 case FORMAT_R5G6B5:
87 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<6>(color.g) << 5) | (unorm<5>(color.b) << 0);
88 break;
89 case FORMAT_A1R5G5B5:
90 *(unsigned short*)element = (unorm<1>(color.a) << 15) | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
91 break;
Nicolas Capens80594422015-06-09 16:42:56 -040092 case FORMAT_R5G5B5A1:
93 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<5>(color.g) << 6) | (unorm<5>(color.b) << 1) | (unorm<5>(color.a) << 0);
94 break;
John Bauman89401822014-05-06 15:04:28 -040095 case FORMAT_X1R5G5B5:
96 *(unsigned short*)element = 0x8000 | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
97 break;
98 case FORMAT_A8R8G8B8:
99 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
100 break;
101 case FORMAT_X8R8G8B8:
102 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
103 break;
104 case FORMAT_A8B8G8R8:
105 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
106 break;
107 case FORMAT_X8B8G8R8:
108 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
109 break;
110 case FORMAT_A2R10G10B10:
111 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.r) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.b) << 0);
112 break;
113 case FORMAT_A2B10G10R10:
114 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.b) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.r) << 0);
115 break;
116 case FORMAT_G8R8:
117 *(unsigned int*)element = (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
118 break;
119 case FORMAT_G16R16:
120 *(unsigned int*)element = (unorm<16>(color.g) << 16) | (unorm<16>(color.r) << 0);
121 break;
122 case FORMAT_A16B16G16R16:
123 ((unsigned short*)element)[0] = unorm<16>(color.r);
124 ((unsigned short*)element)[1] = unorm<16>(color.g);
125 ((unsigned short*)element)[2] = unorm<16>(color.b);
126 ((unsigned short*)element)[3] = unorm<16>(color.a);
127 break;
128 case FORMAT_V8U8:
129 *(unsigned short*)element = (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
130 break;
131 case FORMAT_L6V5U5:
132 *(unsigned short*)element = (unorm<6>(color.b) << 10) | (snorm<5>(color.g) << 5) | (snorm<5>(color.r) << 0);
133 break;
134 case FORMAT_Q8W8V8U8:
135 *(unsigned int*)element = (snorm<8>(color.a) << 24) | (snorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
136 break;
137 case FORMAT_X8L8V8U8:
138 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
139 break;
140 case FORMAT_V16U16:
141 *(unsigned int*)element = (snorm<16>(color.g) << 16) | (snorm<16>(color.r) << 0);
142 break;
143 case FORMAT_A2W10V10U10:
144 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (snorm<10>(color.b) << 20) | (snorm<10>(color.g) << 10) | (snorm<10>(color.r) << 0);
145 break;
146 case FORMAT_A16W16V16U16:
147 ((unsigned short*)element)[0] = snorm<16>(color.r);
148 ((unsigned short*)element)[1] = snorm<16>(color.g);
149 ((unsigned short*)element)[2] = snorm<16>(color.b);
150 ((unsigned short*)element)[3] = unorm<16>(color.a);
151 break;
152 case FORMAT_Q16W16V16U16:
153 ((unsigned short*)element)[0] = snorm<16>(color.r);
154 ((unsigned short*)element)[1] = snorm<16>(color.g);
155 ((unsigned short*)element)[2] = snorm<16>(color.b);
156 ((unsigned short*)element)[3] = snorm<16>(color.a);
157 break;
158 case FORMAT_R8G8B8:
159 ((unsigned char*)element)[0] = unorm<8>(color.b);
160 ((unsigned char*)element)[1] = unorm<8>(color.g);
161 ((unsigned char*)element)[2] = unorm<8>(color.r);
162 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400163 case FORMAT_B8G8R8:
164 ((unsigned char*)element)[0] = unorm<8>(color.r);
165 ((unsigned char*)element)[1] = unorm<8>(color.g);
166 ((unsigned char*)element)[2] = unorm<8>(color.b);
167 break;
John Bauman89401822014-05-06 15:04:28 -0400168 case FORMAT_R16F:
169 *(half*)element = (half)color.r;
170 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400171 case FORMAT_A16F:
172 *(half*)element = (half)color.a;
173 break;
John Bauman89401822014-05-06 15:04:28 -0400174 case FORMAT_G16R16F:
175 ((half*)element)[0] = (half)color.r;
176 ((half*)element)[1] = (half)color.g;
177 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400178 case FORMAT_B16G16R16F:
179 ((half*)element)[0] = (half)color.r;
180 ((half*)element)[1] = (half)color.g;
181 ((half*)element)[2] = (half)color.b;
182 break;
John Bauman89401822014-05-06 15:04:28 -0400183 case FORMAT_A16B16G16R16F:
184 ((half*)element)[0] = (half)color.r;
185 ((half*)element)[1] = (half)color.g;
186 ((half*)element)[2] = (half)color.b;
187 ((half*)element)[3] = (half)color.a;
188 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400189 case FORMAT_A32F:
190 *(float*)element = color.a;
191 break;
John Bauman89401822014-05-06 15:04:28 -0400192 case FORMAT_R32F:
193 *(float*)element = color.r;
194 break;
195 case FORMAT_G32R32F:
196 ((float*)element)[0] = color.r;
197 ((float*)element)[1] = color.g;
198 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400199 case FORMAT_B32G32R32F:
200 ((float*)element)[0] = color.r;
201 ((float*)element)[1] = color.g;
202 ((float*)element)[2] = color.b;
203 break;
John Bauman89401822014-05-06 15:04:28 -0400204 case FORMAT_A32B32G32R32F:
205 ((float*)element)[0] = color.r;
206 ((float*)element)[1] = color.g;
207 ((float*)element)[2] = color.b;
208 ((float*)element)[3] = color.a;
209 break;
210 case FORMAT_D32F:
211 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400212 case FORMAT_D32FS8_TEXTURE:
213 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400214 *((float*)element) = color.r;
215 break;
216 case FORMAT_D32F_COMPLEMENTARY:
217 *((float*)element) = 1 - color.r;
218 break;
219 case FORMAT_S8:
220 *((unsigned char*)element) = unorm<8>(color.r);
221 break;
222 case FORMAT_L8:
223 *(unsigned char*)element = unorm<8>(color.r);
224 break;
225 case FORMAT_A4L4:
226 *(unsigned char*)element = (unorm<4>(color.a) << 4) | (unorm<4>(color.r) << 0);
227 break;
228 case FORMAT_L16:
229 *(unsigned short*)element = unorm<16>(color.r);
230 break;
231 case FORMAT_A8L8:
232 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<8>(color.r) << 0);
233 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400234 case FORMAT_L16F:
235 *(half*)element = (half)color.r;
236 break;
237 case FORMAT_A16L16F:
238 ((half*)element)[0] = (half)color.r;
239 ((half*)element)[1] = (half)color.a;
240 break;
241 case FORMAT_L32F:
242 *(float*)element = color.r;
243 break;
244 case FORMAT_A32L32F:
245 ((float*)element)[0] = color.r;
246 ((float*)element)[1] = color.a;
247 break;
John Bauman89401822014-05-06 15:04:28 -0400248 default:
249 ASSERT(false);
250 }
251 }
252
253 Color<float> Surface::Buffer::read(int x, int y, int z) const
254 {
255 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
256
257 return read(element);
258 }
259
260 Color<float> Surface::Buffer::read(int x, int y) const
261 {
262 void *element = (unsigned char*)buffer + x * bytes + y * pitchB;
263
264 return read(element);
265 }
266
267 inline Color<float> Surface::Buffer::read(void *element) const
268 {
Nicolas Capens3f439242015-06-09 16:33:50 -0400269 float r = 0.0f;
270 float g = 0.0f;
271 float b = 0.0f;
272 float a = 1.0f;
John Bauman89401822014-05-06 15:04:28 -0400273
274 switch(format)
275 {
276 case FORMAT_P8:
277 {
278 ASSERT(palette);
279
280 unsigned int abgr = palette[*(unsigned char*)element];
281
282 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
283 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
284 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
285 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
286 }
287 break;
288 case FORMAT_A8P8:
289 {
290 ASSERT(palette);
291
292 unsigned int bgr = palette[((unsigned char*)element)[0]];
293
294 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF);
295 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00);
296 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000);
297 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
298 }
299 break;
300 case FORMAT_A8:
301 r = 0;
302 g = 0;
303 b = 0;
304 a = *(unsigned char*)element * (1.0f / 0xFF);
305 break;
306 case FORMAT_R8:
307 r = *(unsigned char*)element * (1.0f / 0xFF);
308 break;
309 case FORMAT_R3G3B2:
310 {
311 unsigned char rgb = *(unsigned char*)element;
312
313 r = (rgb & 0xE0) * (1.0f / 0xE0);
314 g = (rgb & 0x1C) * (1.0f / 0x1C);
315 b = (rgb & 0x03) * (1.0f / 0x03);
316 }
317 break;
318 case FORMAT_A8R3G3B2:
319 {
320 unsigned short argb = *(unsigned short*)element;
321
322 a = (argb & 0xFF00) * (1.0f / 0xFF00);
323 r = (argb & 0x00E0) * (1.0f / 0x00E0);
324 g = (argb & 0x001C) * (1.0f / 0x001C);
325 b = (argb & 0x0003) * (1.0f / 0x0003);
326 }
327 break;
328 case FORMAT_X4R4G4B4:
329 {
330 unsigned short rgb = *(unsigned short*)element;
331
332 r = (rgb & 0x0F00) * (1.0f / 0x0F00);
333 g = (rgb & 0x00F0) * (1.0f / 0x00F0);
334 b = (rgb & 0x000F) * (1.0f / 0x000F);
335 }
336 break;
337 case FORMAT_A4R4G4B4:
338 {
339 unsigned short argb = *(unsigned short*)element;
340
341 a = (argb & 0xF000) * (1.0f / 0xF000);
342 r = (argb & 0x0F00) * (1.0f / 0x0F00);
343 g = (argb & 0x00F0) * (1.0f / 0x00F0);
344 b = (argb & 0x000F) * (1.0f / 0x000F);
345 }
346 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400347 case FORMAT_R4G4B4A4:
348 {
349 unsigned short rgba = *(unsigned short*)element;
350
351 r = (rgba & 0xF000) * (1.0f / 0xF000);
352 g = (rgba & 0x0F00) * (1.0f / 0x0F00);
353 b = (rgba & 0x00F0) * (1.0f / 0x00F0);
354 a = (rgba & 0x000F) * (1.0f / 0x000F);
355 }
356 break;
John Bauman89401822014-05-06 15:04:28 -0400357 case FORMAT_R5G6B5:
358 {
359 unsigned short rgb = *(unsigned short*)element;
360
361 r = (rgb & 0xF800) * (1.0f / 0xF800);
362 g = (rgb & 0x07E0) * (1.0f / 0x07E0);
363 b = (rgb & 0x001F) * (1.0f / 0x001F);
364 }
365 break;
366 case FORMAT_A1R5G5B5:
367 {
368 unsigned short argb = *(unsigned short*)element;
369
370 a = (argb & 0x8000) * (1.0f / 0x8000);
371 r = (argb & 0x7C00) * (1.0f / 0x7C00);
372 g = (argb & 0x03E0) * (1.0f / 0x03E0);
373 b = (argb & 0x001F) * (1.0f / 0x001F);
374 }
375 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400376 case FORMAT_R5G5B5A1:
377 {
378 unsigned short rgba = *(unsigned short*)element;
379
380 r = (rgba & 0xF800) * (1.0f / 0xF800);
381 g = (rgba & 0x07C0) * (1.0f / 0x07C0);
382 b = (rgba & 0x003E) * (1.0f / 0x003E);
383 a = (rgba & 0x0001) * (1.0f / 0x0001);
384 }
385 break;
John Bauman89401822014-05-06 15:04:28 -0400386 case FORMAT_X1R5G5B5:
387 {
388 unsigned short xrgb = *(unsigned short*)element;
389
390 r = (xrgb & 0x7C00) * (1.0f / 0x7C00);
391 g = (xrgb & 0x03E0) * (1.0f / 0x03E0);
392 b = (xrgb & 0x001F) * (1.0f / 0x001F);
393 }
394 break;
395 case FORMAT_A8R8G8B8:
396 {
397 unsigned int argb = *(unsigned int*)element;
398
399 a = (argb & 0xFF000000) * (1.0f / 0xFF000000);
400 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000);
401 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00);
402 b = (argb & 0x000000FF) * (1.0f / 0x000000FF);
403 }
404 break;
405 case FORMAT_X8R8G8B8:
406 {
407 unsigned int xrgb = *(unsigned int*)element;
408
409 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000);
410 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00);
411 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF);
412 }
413 break;
414 case FORMAT_A8B8G8R8:
415 {
416 unsigned int abgr = *(unsigned int*)element;
417
418 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
419 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
420 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
421 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
422 }
423 break;
424 case FORMAT_X8B8G8R8:
425 {
426 unsigned int xbgr = *(unsigned int*)element;
427
428 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000);
429 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00);
430 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF);
431 }
432 break;
433 case FORMAT_G8R8:
434 {
435 unsigned short gr = *(unsigned short*)element;
436
437 g = (gr & 0xFF00) * (1.0f / 0xFF00);
438 r = (gr & 0x00FF) * (1.0f / 0x00FF);
439 }
440 break;
441 case FORMAT_G16R16:
442 {
443 unsigned int gr = *(unsigned int*)element;
444
445 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000);
446 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF);
447 }
448 break;
449 case FORMAT_A2R10G10B10:
450 {
451 unsigned int argb = *(unsigned int*)element;
452
453 a = (argb & 0xC0000000) * (1.0f / 0xC0000000);
454 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000);
455 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00);
456 b = (argb & 0x000003FF) * (1.0f / 0x000003FF);
457 }
458 break;
459 case FORMAT_A2B10G10R10:
460 {
461 unsigned int abgr = *(unsigned int*)element;
462
463 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000);
464 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000);
465 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00);
466 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF);
467 }
468 break;
469 case FORMAT_A16B16G16R16:
470 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF);
471 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF);
472 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF);
473 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
474 break;
475 case FORMAT_V8U8:
476 {
477 unsigned short vu = *(unsigned short*)element;
478
479 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000);
480 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000);
481 }
482 break;
483 case FORMAT_L6V5U5:
484 {
485 unsigned short lvu = *(unsigned short*)element;
486
487 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000);
488 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000);
489 b = (lvu & 0xFC00) * (1.0f / 0xFC00);
490 }
491 break;
492 case FORMAT_Q8W8V8U8:
493 {
494 unsigned int qwvu = *(unsigned int*)element;
495
496 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
497 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
498 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000);
499 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000);
500 }
501 break;
502 case FORMAT_X8L8V8U8:
503 {
504 unsigned int xlvu = *(unsigned int*)element;
505
506 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
507 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
508 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000);
509 }
510 break;
511 case FORMAT_R8G8B8:
512 r = ((unsigned char*)element)[2] * (1.0f / 0xFF);
513 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
514 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
515 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400516 case FORMAT_B8G8R8:
517 r = ((unsigned char*)element)[0] * (1.0f / 0xFF);
518 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
519 b = ((unsigned char*)element)[2] * (1.0f / 0xFF);
520 break;
John Bauman89401822014-05-06 15:04:28 -0400521 case FORMAT_V16U16:
522 {
523 unsigned int vu = *(unsigned int*)element;
524
525 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000);
526 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000);
527 }
528 break;
529 case FORMAT_A2W10V10U10:
530 {
531 unsigned int awvu = *(unsigned int*)element;
532
533 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000);
534 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000);
535 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000);
536 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000);
537 }
538 break;
539 case FORMAT_A16W16V16U16:
540 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
541 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
542 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
543 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
544 break;
545 case FORMAT_Q16W16V16U16:
546 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
547 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
548 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
549 a = ((signed short*)element)[3] * (1.0f / 0x7FFF);
550 break;
551 case FORMAT_L8:
552 r =
553 g =
554 b = *(unsigned char*)element * (1.0f / 0xFF);
555 break;
556 case FORMAT_A4L4:
557 {
558 unsigned char al = *(unsigned char*)element;
559
560 r =
561 g =
562 b = (al & 0x0F) * (1.0f / 0x0F);
563 a = (al & 0xF0) * (1.0f / 0xF0);
564 }
565 break;
566 case FORMAT_L16:
567 r =
568 g =
569 b = *(unsigned short*)element * (1.0f / 0xFFFF);
570 break;
571 case FORMAT_A8L8:
572 r =
573 g =
574 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
575 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
576 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400577 case FORMAT_L16F:
578 r =
579 g =
580 b = *(half*)element;
581 break;
582 case FORMAT_A16L16F:
583 r =
584 g =
585 b = ((half*)element)[0];
586 a = ((half*)element)[1];
587 break;
588 case FORMAT_L32F:
589 r =
590 g =
591 b = *(float*)element;
592 break;
593 case FORMAT_A32L32F:
594 r =
595 g =
596 b = ((float*)element)[0];
597 a = ((float*)element)[1];
598 break;
599 case FORMAT_A16F:
600 a = *(half*)element;
601 break;
John Bauman89401822014-05-06 15:04:28 -0400602 case FORMAT_R16F:
603 r = *(half*)element;
604 break;
605 case FORMAT_G16R16F:
606 r = ((half*)element)[0];
607 g = ((half*)element)[1];
608 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400609 case FORMAT_B16G16R16F:
610 r = ((half*)element)[0];
611 g = ((half*)element)[1];
612 b = ((half*)element)[2];
613 break;
John Bauman89401822014-05-06 15:04:28 -0400614 case FORMAT_A16B16G16R16F:
615 r = ((half*)element)[0];
616 g = ((half*)element)[1];
617 b = ((half*)element)[2];
618 a = ((half*)element)[3];
619 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400620 case FORMAT_A32F:
621 a = *(float*)element;
622 break;
John Bauman89401822014-05-06 15:04:28 -0400623 case FORMAT_R32F:
624 r = *(float*)element;
625 break;
626 case FORMAT_G32R32F:
627 r = ((float*)element)[0];
628 g = ((float*)element)[1];
629 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400630 case FORMAT_B32G32R32F:
631 r = ((float*)element)[0];
632 g = ((float*)element)[1];
633 b = ((float*)element)[2];
634 break;
John Bauman89401822014-05-06 15:04:28 -0400635 case FORMAT_A32B32G32R32F:
636 r = ((float*)element)[0];
637 g = ((float*)element)[1];
638 b = ((float*)element)[2];
639 a = ((float*)element)[3];
640 break;
641 case FORMAT_D32F:
642 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400643 case FORMAT_D32FS8_TEXTURE:
644 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400645 r = *(float*)element;
646 g = r;
647 b = r;
648 a = r;
649 break;
650 case FORMAT_D32F_COMPLEMENTARY:
John Bauman66b8ab22014-05-06 15:57:45 -0400651 r = 1.0f - *(float*)element;
John Bauman89401822014-05-06 15:04:28 -0400652 g = r;
653 b = r;
654 a = r;
655 break;
656 case FORMAT_S8:
657 r = *(unsigned char*)element * (1.0f / 0xFF);
658 break;
659 default:
660 ASSERT(false);
661 }
662
663 // if(sRGB)
664 // {
665 // r = sRGBtoLinear(r);
666 // g = sRGBtoLinear(g);
667 // b = sRGBtoLinear(b);
668 // }
669
670 return Color<float>(r, g, b, a);
671 }
672
673 Color<float> Surface::Buffer::sample(float x, float y, float z) const
674 {
675 x -= 0.5f;
676 y -= 0.5f;
677 z -= 0.5f;
678
679 int x0 = clamp((int)x, 0, width - 1);
680 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
681
682 int y0 = clamp((int)y, 0, height - 1);
683 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
684
685 int z0 = clamp((int)z, 0, depth - 1);
686 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1;
687
688 Color<float> c000 = read(x0, y0, z0);
689 Color<float> c100 = read(x1, y0, z0);
690 Color<float> c010 = read(x0, y1, z0);
691 Color<float> c110 = read(x1, y1, z0);
692 Color<float> c001 = read(x0, y0, z1);
693 Color<float> c101 = read(x1, y0, z1);
694 Color<float> c011 = read(x0, y1, z1);
695 Color<float> c111 = read(x1, y1, z1);
696
697 float fx = x - x0;
698 float fy = y - y0;
699 float fz = z - z0;
700
701 c000 *= (1 - fx) * (1 - fy) * (1 - fz);
702 c100 *= fx * (1 - fy) * (1 - fz);
703 c010 *= (1 - fx) * fy * (1 - fz);
704 c110 *= fx * fy * (1 - fz);
705 c001 *= (1 - fx) * (1 - fy) * fz;
706 c101 *= fx * (1 - fy) * fz;
707 c011 *= (1 - fx) * fy * fz;
708 c111 *= fx * fy * fz;
709
710 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111;
711 }
712
713 Color<float> Surface::Buffer::sample(float x, float y) const
714 {
715 x -= 0.5f;
716 y -= 0.5f;
717
718 int x0 = clamp((int)x, 0, width - 1);
719 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
720
721 int y0 = clamp((int)y, 0, height - 1);
722 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
723
724 Color<float> c00 = read(x0, y0);
725 Color<float> c10 = read(x1, y0);
726 Color<float> c01 = read(x0, y1);
727 Color<float> c11 = read(x1, y1);
728
729 float fx = x - x0;
730 float fy = y - y0;
731
732 c00 *= (1 - fx) * (1 - fy);
733 c10 *= fx * (1 - fy);
734 c01 *= (1 - fx) * fy;
735 c11 *= fx * fy;
736
737 return c00 + c10 + c01 + c11;
738 }
739
John Bauman19bac1e2014-05-06 15:23:49 -0400740 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock)
John Bauman89401822014-05-06 15:04:28 -0400741 {
742 this->lock = lock;
743
744 switch(lock)
745 {
746 case LOCK_UNLOCKED:
747 case LOCK_READONLY:
748 break;
749 case LOCK_WRITEONLY:
750 case LOCK_READWRITE:
751 case LOCK_DISCARD:
752 dirty = true;
753 break;
754 default:
755 ASSERT(false);
756 }
757
John Baumand4ae8632014-05-06 16:18:33 -0400758 if(buffer)
John Bauman89401822014-05-06 15:04:28 -0400759 {
John Baumand4ae8632014-05-06 16:18:33 -0400760 switch(format)
761 {
762 #if S3TC_SUPPORT
763 case FORMAT_DXT1:
764 #endif
765 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -0500766 case FORMAT_ETC1:
John Baumand4ae8632014-05-06 16:18:33 -0400767 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB;
768 #if S3TC_SUPPORT
769 case FORMAT_DXT3:
770 case FORMAT_DXT5:
771 #endif
772 case FORMAT_ATI2:
773 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
774 default:
775 return (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
776 }
John Bauman89401822014-05-06 15:04:28 -0400777 }
778
779 return 0;
780 }
781
782 void Surface::Buffer::unlockRect()
783 {
784 lock = LOCK_UNLOCKED;
785 }
786
Nicolas Capens477314b2015-06-09 16:47:29 -0400787 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false)
788 {
789 resource = new Resource(0);
790 hasParent = false;
791 ownExternal = false;
792 depth = max(1, depth);
793
794 external.buffer = pixels;
795 external.width = width;
796 external.height = height;
797 external.depth = depth;
798 external.format = format;
799 external.bytes = bytes(external.format);
800 external.pitchB = pitch;
801 external.pitchP = pitch / external.bytes;
802 external.sliceB = slice;
803 external.sliceP = slice / external.bytes;
804 external.lock = LOCK_UNLOCKED;
805 external.dirty = true;
806
807 internal.buffer = 0;
808 internal.width = width;
809 internal.height = height;
810 internal.depth = depth;
811 internal.format = selectInternalFormat(format);
812 internal.bytes = bytes(internal.format);
813 internal.pitchB = pitchB(internal.width, internal.format, false);
814 internal.pitchP = pitchP(internal.width, internal.format, false);
815 internal.sliceB = sliceB(internal.width, internal.height, internal.format, false);
816 internal.sliceP = sliceP(internal.width, internal.height, internal.format, false);
817 internal.lock = LOCK_UNLOCKED;
818 internal.dirty = false;
819
820 stencil.buffer = 0;
821 stencil.width = width;
822 stencil.height = height;
823 stencil.depth = depth;
824 stencil.format = FORMAT_S8;
825 stencil.bytes = bytes(stencil.format);
826 stencil.pitchB = pitchB(stencil.width, stencil.format, false);
827 stencil.pitchP = pitchP(stencil.width, stencil.format, false);
828 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, false);
829 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, false);
830 stencil.lock = LOCK_UNLOCKED;
831 stencil.dirty = false;
832
833 dirtyMipmaps = true;
834 paletteUsed = 0;
835 }
836
John Bauman89401822014-05-06 15:04:28 -0400837 Surface::Surface(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget) : lockable(lockable), renderTarget(renderTarget)
838 {
839 resource = texture ? texture : new Resource(0);
John Bauman19bac1e2014-05-06 15:23:49 -0400840 hasParent = texture != 0;
Nicolas Capens477314b2015-06-09 16:47:29 -0400841 ownExternal = true;
John Bauman89401822014-05-06 15:04:28 -0400842 depth = max(1, depth);
843
844 external.buffer = 0;
845 external.width = width;
846 external.height = height;
847 external.depth = depth;
848 external.format = format;
849 external.bytes = bytes(external.format);
850 external.pitchB = pitchB(external.width, external.format, renderTarget && !texture);
851 external.pitchP = pitchP(external.width, external.format, renderTarget && !texture);
852 external.sliceB = sliceB(external.width, external.height, external.format, renderTarget && !texture);
853 external.sliceP = sliceP(external.width, external.height, external.format, renderTarget && !texture);
854 external.lock = LOCK_UNLOCKED;
855 external.dirty = false;
John Bauman89401822014-05-06 15:04:28 -0400856
857 internal.buffer = 0;
858 internal.width = width;
859 internal.height = height;
860 internal.depth = depth;
861 internal.format = selectInternalFormat(format);
862 internal.bytes = bytes(internal.format);
863 internal.pitchB = pitchB(internal.width, internal.format, renderTarget);
864 internal.pitchP = pitchP(internal.width, internal.format, renderTarget);
865 internal.sliceB = sliceB(internal.width, internal.height, internal.format, renderTarget);
866 internal.sliceP = sliceP(internal.width, internal.height, internal.format, renderTarget);
867 internal.lock = LOCK_UNLOCKED;
868 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -0400869
870 stencil.buffer = 0;
871 stencil.width = width;
872 stencil.height = height;
873 stencil.depth = depth;
874 stencil.format = FORMAT_S8;
875 stencil.bytes = bytes(stencil.format);
876 stencil.pitchB = pitchB(stencil.width, stencil.format, renderTarget);
877 stencil.pitchP = pitchP(stencil.width, stencil.format, renderTarget);
878 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, renderTarget);
879 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, renderTarget);
880 stencil.lock = LOCK_UNLOCKED;
881 stencil.dirty = false;
John Bauman89401822014-05-06 15:04:28 -0400882
883 dirtyMipmaps = true;
John Bauman66b8ab22014-05-06 15:57:45 -0400884 paletteUsed = 0;
John Bauman89401822014-05-06 15:04:28 -0400885 }
886
887 Surface::~Surface()
888 {
John Bauman8a4f6fc2014-05-06 15:26:18 -0400889 // Synchronize so we can deallocate the buffers below
890 resource->lock(DESTRUCT);
891 resource->unlock();
892
John Bauman89401822014-05-06 15:04:28 -0400893 if(!hasParent)
894 {
895 resource->destruct();
896 }
897
Nicolas Capens477314b2015-06-09 16:47:29 -0400898 if(ownExternal)
899 {
900 deallocate(external.buffer);
901 }
John Bauman89401822014-05-06 15:04:28 -0400902
903 if(internal.buffer != external.buffer)
904 {
905 deallocate(internal.buffer);
906 }
907
908 deallocate(stencil.buffer);
909
910 external.buffer = 0;
911 internal.buffer = 0;
912 stencil.buffer = 0;
913 }
914
John Bauman19bac1e2014-05-06 15:23:49 -0400915 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -0400916 {
917 resource->lock(client);
918
919 if(!external.buffer)
920 {
921 if(internal.buffer && identicalFormats())
922 {
923 external.buffer = internal.buffer;
924 }
925 else
926 {
927 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.format);
928 }
929 }
930
931 if(internal.dirty)
932 {
933 if(lock != LOCK_DISCARD)
934 {
935 update(external, internal);
936 }
John Bauman66b8ab22014-05-06 15:57:45 -0400937
938 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -0400939 }
940
941 switch(lock)
942 {
943 case LOCK_READONLY:
944 break;
945 case LOCK_WRITEONLY:
946 case LOCK_READWRITE:
947 case LOCK_DISCARD:
948 dirtyMipmaps = true;
949 break;
950 default:
951 ASSERT(false);
952 }
953
John Bauman19bac1e2014-05-06 15:23:49 -0400954 return external.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -0400955 }
956
957 void Surface::unlockExternal()
958 {
959 resource->unlock();
960
961 external.unlockRect();
962 }
963
John Bauman19bac1e2014-05-06 15:23:49 -0400964 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -0400965 {
966 if(lock != LOCK_UNLOCKED)
967 {
968 resource->lock(client);
969 }
970
971 if(!internal.buffer)
972 {
973 if(external.buffer && identicalFormats())
974 {
975 internal.buffer = external.buffer;
976 }
977 else
978 {
979 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.format);
980 }
981 }
982
983 // FIXME: WHQL requires conversion to lower external precision and back
984 if(logPrecision >= WHQL)
985 {
986 if(internal.dirty && renderTarget && internal.format != external.format)
987 {
988 if(lock != LOCK_DISCARD)
989 {
990 switch(external.format)
991 {
992 case FORMAT_R3G3B2:
993 case FORMAT_A8R3G3B2:
994 case FORMAT_A1R5G5B5:
995 case FORMAT_A2R10G10B10:
996 case FORMAT_A2B10G10R10:
997 lockExternal(0, 0, 0, LOCK_READWRITE, client);
998 unlockExternal();
999 break;
1000 default:
1001 // Difference passes WHQL
1002 break;
1003 }
1004 }
1005 }
1006 }
1007
John Bauman66b8ab22014-05-06 15:57:45 -04001008 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID))
John Bauman89401822014-05-06 15:04:28 -04001009 {
1010 if(lock != LOCK_DISCARD)
1011 {
1012 update(internal, external);
1013 }
John Bauman89401822014-05-06 15:04:28 -04001014
John Bauman66b8ab22014-05-06 15:57:45 -04001015 external.dirty = false;
1016 paletteUsed = Surface::paletteID;
John Bauman89401822014-05-06 15:04:28 -04001017 }
1018
1019 switch(lock)
1020 {
1021 case LOCK_UNLOCKED:
1022 case LOCK_READONLY:
1023 break;
1024 case LOCK_WRITEONLY:
1025 case LOCK_READWRITE:
1026 case LOCK_DISCARD:
1027 dirtyMipmaps = true;
1028 break;
1029 default:
1030 ASSERT(false);
1031 }
1032
1033 if(lock == LOCK_READONLY && client == PUBLIC)
1034 {
1035 resolve();
1036 }
1037
John Bauman19bac1e2014-05-06 15:23:49 -04001038 return internal.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001039 }
1040
1041 void Surface::unlockInternal()
1042 {
1043 resource->unlock();
1044
1045 internal.unlockRect();
1046 }
1047
1048 void *Surface::lockStencil(int front, Accessor client)
1049 {
1050 resource->lock(client);
1051
1052 if(!stencil.buffer)
1053 {
1054 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.format);
1055 }
1056
John Bauman89401822014-05-06 15:04:28 -04001057 return stencil.lockRect(0, 0, front, LOCK_READWRITE); // FIXME
1058 }
1059
1060 void Surface::unlockStencil()
1061 {
1062 resource->unlock();
1063
1064 stencil.unlockRect();
1065 }
1066
1067 int Surface::bytes(Format format)
1068 {
1069 switch(format)
1070 {
1071 case FORMAT_NULL: return 0;
1072 case FORMAT_P8: return 1;
1073 case FORMAT_A8P8: return 2;
1074 case FORMAT_A8: return 1;
1075 case FORMAT_R8: return 1;
1076 case FORMAT_R3G3B2: return 1;
1077 case FORMAT_A8R3G3B2: return 2;
1078 case FORMAT_R5G6B5: return 2;
1079 case FORMAT_A1R5G5B5: return 2;
1080 case FORMAT_X1R5G5B5: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001081 case FORMAT_R5G5B5A1: return 2;
John Bauman89401822014-05-06 15:04:28 -04001082 case FORMAT_X4R4G4B4: return 2;
1083 case FORMAT_A4R4G4B4: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001084 case FORMAT_R4G4B4A4: return 2;
John Bauman89401822014-05-06 15:04:28 -04001085 case FORMAT_R8G8B8: return 3;
Nicolas Capens80594422015-06-09 16:42:56 -04001086 case FORMAT_B8G8R8: return 3;
John Bauman89401822014-05-06 15:04:28 -04001087 case FORMAT_X8R8G8B8: return 4;
1088 // case FORMAT_X8G8R8B8Q: return 4;
1089 case FORMAT_A8R8G8B8: return 4;
1090 // case FORMAT_A8G8R8B8Q: return 4;
1091 case FORMAT_X8B8G8R8: return 4;
1092 case FORMAT_A8B8G8R8: return 4;
1093 case FORMAT_A2R10G10B10: return 4;
1094 case FORMAT_A2B10G10R10: return 4;
1095 case FORMAT_G8R8: return 2;
1096 case FORMAT_G16R16: return 4;
1097 case FORMAT_A16B16G16R16: return 8;
1098 // Compressed formats
1099 #if S3TC_SUPPORT
1100 case FORMAT_DXT1: return 2; // Column of four pixels
1101 case FORMAT_DXT3: return 4; // Column of four pixels
1102 case FORMAT_DXT5: return 4; // Column of four pixels
John Bauman66b8ab22014-05-06 15:57:45 -04001103 #endif
John Bauman89401822014-05-06 15:04:28 -04001104 case FORMAT_ATI1: return 2; // Column of four pixels
1105 case FORMAT_ATI2: return 4; // Column of four pixels
Nicolas Capens22658242014-11-29 00:31:41 -05001106 case FORMAT_ETC1: return 2; // Column of four pixels
John Bauman89401822014-05-06 15:04:28 -04001107 // Bumpmap formats
1108 case FORMAT_V8U8: return 2;
1109 case FORMAT_L6V5U5: return 2;
1110 case FORMAT_Q8W8V8U8: return 4;
1111 case FORMAT_X8L8V8U8: return 4;
1112 case FORMAT_A2W10V10U10: return 4;
1113 case FORMAT_V16U16: return 4;
1114 case FORMAT_A16W16V16U16: return 8;
1115 case FORMAT_Q16W16V16U16: return 8;
1116 // Luminance formats
1117 case FORMAT_L8: return 1;
1118 case FORMAT_A4L4: return 1;
1119 case FORMAT_L16: return 2;
1120 case FORMAT_A8L8: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001121 case FORMAT_L16F: return 2;
1122 case FORMAT_A16L16F: return 4;
1123 case FORMAT_L32F: return 4;
1124 case FORMAT_A32L32F: return 8;
John Bauman89401822014-05-06 15:04:28 -04001125 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04001126 case FORMAT_A16F: return 2;
John Bauman89401822014-05-06 15:04:28 -04001127 case FORMAT_R16F: return 2;
1128 case FORMAT_G16R16F: return 4;
Nicolas Capens80594422015-06-09 16:42:56 -04001129 case FORMAT_B16G16R16F: return 6;
John Bauman89401822014-05-06 15:04:28 -04001130 case FORMAT_A16B16G16R16F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001131 case FORMAT_A32F: return 4;
John Bauman89401822014-05-06 15:04:28 -04001132 case FORMAT_R32F: return 4;
1133 case FORMAT_G32R32F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001134 case FORMAT_B32G32R32F: return 12;
John Bauman89401822014-05-06 15:04:28 -04001135 case FORMAT_A32B32G32R32F: return 16;
1136 // Depth/stencil formats
1137 case FORMAT_D16: return 2;
1138 case FORMAT_D32: return 4;
1139 case FORMAT_D24X8: return 4;
1140 case FORMAT_D24S8: return 4;
1141 case FORMAT_D24FS8: return 4;
1142 case FORMAT_D32F: return 4;
1143 case FORMAT_D32F_COMPLEMENTARY: return 4;
1144 case FORMAT_D32F_LOCKABLE: return 4;
John Bauman66b8ab22014-05-06 15:57:45 -04001145 case FORMAT_D32FS8_TEXTURE: return 4;
1146 case FORMAT_D32FS8_SHADOW: return 4;
1147 case FORMAT_DF24S8: return 4;
1148 case FORMAT_DF16S8: return 2;
John Bauman89401822014-05-06 15:04:28 -04001149 case FORMAT_INTZ: return 4;
1150 case FORMAT_S8: return 1;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001151 case FORMAT_YV12_BT601: return 1; // Y plane only
1152 case FORMAT_YV12_BT709: return 1; // Y plane only
1153 case FORMAT_YV12_JFIF: return 1; // Y plane only
John Bauman89401822014-05-06 15:04:28 -04001154 default:
1155 ASSERT(false);
1156 }
1157
1158 return 0;
1159 }
1160
1161 int Surface::pitchB(int width, Format format, bool target)
1162 {
1163 if(target || isDepth(format) || isStencil(format))
1164 {
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001165 width = align(width, 2);
John Bauman89401822014-05-06 15:04:28 -04001166 }
1167
1168 switch(format)
1169 {
1170 #if S3TC_SUPPORT
1171 case FORMAT_DXT1:
Nicolas Capens22658242014-11-29 00:31:41 -05001172 #endif
1173 case FORMAT_ETC1:
John Bauman89401822014-05-06 15:04:28 -04001174 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows
Nicolas Capens22658242014-11-29 00:31:41 -05001175 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04001176 case FORMAT_DXT3:
1177 case FORMAT_DXT5:
1178 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
John Bauman66b8ab22014-05-06 15:57:45 -04001179 #endif
John Bauman89401822014-05-06 15:04:28 -04001180 case FORMAT_ATI1:
1181 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row
1182 case FORMAT_ATI2:
1183 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001184 case FORMAT_YV12_BT601:
1185 case FORMAT_YV12_BT709:
1186 case FORMAT_YV12_JFIF:
1187 return align(width, 16);
John Bauman89401822014-05-06 15:04:28 -04001188 default:
1189 return bytes(format) * width;
1190 }
1191 }
1192
1193 int Surface::pitchP(int width, Format format, bool target)
1194 {
1195 int B = bytes(format);
1196
1197 return B > 0 ? pitchB(width, format, target) / B : 0;
1198 }
1199
1200 int Surface::sliceB(int width, int height, Format format, bool target)
1201 {
1202 if(target || isDepth(format) || isStencil(format))
1203 {
1204 height = ((height + 1) & ~1);
1205 }
1206
1207 switch(format)
1208 {
1209 #if S3TC_SUPPORT
1210 case FORMAT_DXT1:
1211 case FORMAT_DXT3:
1212 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04001213 #endif
Nicolas Capens22658242014-11-29 00:31:41 -05001214 case FORMAT_ETC1:
1215 return pitchB(width, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows
1216 case FORMAT_ATI1:
1217 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04001218 default:
Nicolas Capens22658242014-11-29 00:31:41 -05001219 return pitchB(width, format, target) * height; // Pitch computed per row
John Bauman89401822014-05-06 15:04:28 -04001220 }
1221 }
1222
1223 int Surface::sliceP(int width, int height, Format format, bool target)
1224 {
1225 int B = bytes(format);
1226
1227 return B > 0 ? sliceB(width, height, format, target) / B : 0;
1228 }
1229
1230 void Surface::update(Buffer &destination, Buffer &source)
1231 {
1232 // ASSERT(source.lock != LOCK_UNLOCKED);
1233 // ASSERT(destination.lock != LOCK_UNLOCKED);
1234
1235 if(destination.buffer != source.buffer)
1236 {
1237 ASSERT(source.dirty && !destination.dirty);
1238
1239 switch(source.format)
1240 {
1241 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001242 case FORMAT_R5G6B5: decodeR5G6B5(destination, source); break; // FIXME: Check destination format
1243 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format
1244 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format
1245 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format
1246 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format
1247 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format
1248 #if S3TC_SUPPORT
1249 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format
1250 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format
1251 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format
Nicolas Capens22658242014-11-29 00:31:41 -05001252 #endif
John Bauman89401822014-05-06 15:04:28 -04001253 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format
1254 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format
Nicolas Capens22658242014-11-29 00:31:41 -05001255 case FORMAT_ETC1: decodeETC1(destination, source); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001256 default: genericUpdate(destination, source); break;
1257 }
1258 }
John Bauman89401822014-05-06 15:04:28 -04001259 }
1260
1261 void Surface::genericUpdate(Buffer &destination, Buffer &source)
1262 {
1263 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1264 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1265
1266 int depth = min(destination.depth, source.depth);
1267 int height = min(destination.height, source.height);
1268 int width = min(destination.width, source.width);
1269 int rowBytes = width * source.bytes;
1270
1271 for(int z = 0; z < depth; z++)
1272 {
1273 unsigned char *sourceRow = sourceSlice;
1274 unsigned char *destinationRow = destinationSlice;
1275
1276 for(int y = 0; y < height; y++)
1277 {
1278 if(source.format == destination.format)
1279 {
1280 memcpy(destinationRow, sourceRow, rowBytes);
1281 }
1282 else
1283 {
1284 unsigned char *sourceElement = sourceRow;
1285 unsigned char *destinationElement = destinationRow;
1286
1287 for(int x = 0; x < width; x++)
1288 {
1289 Color<float> color = source.read(sourceElement);
1290 destination.write(destinationElement, color);
1291
1292 sourceElement += source.bytes;
1293 destinationElement += destination.bytes;
1294 }
1295 }
1296
1297 sourceRow += source.pitchB;
1298 destinationRow += destination.pitchB;
1299 }
1300
1301 sourceSlice += source.sliceB;
1302 destinationSlice += destination.sliceB;
1303 }
1304 }
1305
1306 void Surface::decodeR8G8B8(Buffer &destination, const Buffer &source)
1307 {
1308 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1309 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1310
1311 for(int z = 0; z < destination.depth && z < source.depth; z++)
1312 {
1313 unsigned char *sourceRow = sourceSlice;
1314 unsigned char *destinationRow = destinationSlice;
1315
1316 for(int y = 0; y < destination.height && y < source.height; y++)
1317 {
1318 unsigned char *sourceElement = sourceRow;
1319 unsigned char *destinationElement = destinationRow;
1320
1321 for(int x = 0; x < destination.width && x < source.width; x++)
1322 {
1323 unsigned int b = sourceElement[0];
1324 unsigned int g = sourceElement[1];
1325 unsigned int r = sourceElement[2];
1326
1327 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0);
1328
1329 sourceElement += source.bytes;
1330 destinationElement += destination.bytes;
1331 }
1332
1333 sourceRow += source.pitchB;
1334 destinationRow += destination.pitchB;
1335 }
1336
1337 sourceSlice += source.sliceB;
1338 destinationSlice += destination.sliceB;
1339 }
1340 }
1341
John Bauman89401822014-05-06 15:04:28 -04001342 void Surface::decodeR5G6B5(Buffer &destination, const Buffer &source)
1343 {
1344 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1345 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1346
1347 for(int z = 0; z < destination.depth && z < source.depth; z++)
1348 {
1349 unsigned char *sourceRow = sourceSlice;
1350 unsigned char *destinationRow = destinationSlice;
1351
1352 for(int y = 0; y < destination.height && y < source.height; y++)
1353 {
1354 unsigned char *sourceElement = sourceRow;
1355 unsigned char *destinationElement = destinationRow;
1356
1357 for(int x = 0; x < destination.width && x < source.width; x++)
1358 {
1359 unsigned int rgb = *(unsigned short*)sourceElement;
1360
1361 unsigned int r = (((rgb & 0xF800) * 67385 + 0x800000) >> 8) & 0x00FF0000;
1362 unsigned int g = (((rgb & 0x07E0) * 8289 + 0x8000) >> 8) & 0x0000FF00;
1363 unsigned int b = (((rgb & 0x001F) * 2106 + 0x80) >> 8);
1364
1365 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
1366
1367 sourceElement += source.bytes;
1368 destinationElement += destination.bytes;
1369 }
1370
1371 sourceRow += source.pitchB;
1372 destinationRow += destination.pitchB;
1373 }
1374
1375 sourceSlice += source.sliceB;
1376 destinationSlice += destination.sliceB;
1377 }
1378 }
1379
1380 void Surface::decodeX1R5G5B5(Buffer &destination, const Buffer &source)
1381 {
1382 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1383 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1384
1385 for(int z = 0; z < destination.depth && z < source.depth; z++)
1386 {
1387 unsigned char *sourceRow = sourceSlice;
1388 unsigned char *destinationRow = destinationSlice;
1389
1390 for(int y = 0; y < destination.height && y < source.height; y++)
1391 {
1392 unsigned char *sourceElement = sourceRow;
1393 unsigned char *destinationElement = destinationRow;
1394
1395 for(int x = 0; x < destination.width && x < source.width; x++)
1396 {
1397 unsigned int xrgb = *(unsigned short*)sourceElement;
1398
1399 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
1400 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
1401 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8);
1402
1403 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
1404
1405 sourceElement += source.bytes;
1406 destinationElement += destination.bytes;
1407 }
1408
1409 sourceRow += source.pitchB;
1410 destinationRow += destination.pitchB;
1411 }
1412
1413 sourceSlice += source.sliceB;
1414 destinationSlice += destination.sliceB;
1415 }
1416 }
1417
1418 void Surface::decodeA1R5G5B5(Buffer &destination, const Buffer &source)
1419 {
1420 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1421 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1422
1423 for(int z = 0; z < destination.depth && z < source.depth; z++)
1424 {
1425 unsigned char *sourceRow = sourceSlice;
1426 unsigned char *destinationRow = destinationSlice;
1427
1428 for(int y = 0; y < destination.height && y < source.height; y++)
1429 {
1430 unsigned char *sourceElement = sourceRow;
1431 unsigned char *destinationElement = destinationRow;
1432
1433 for(int x = 0; x < destination.width && x < source.width; x++)
1434 {
1435 unsigned int argb = *(unsigned short*)sourceElement;
1436
1437 unsigned int a = (argb & 0x8000) * 130560;
1438 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
1439 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
1440 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8);
1441
1442 *(unsigned int*)destinationElement = a | r | g | b;
1443
1444 sourceElement += source.bytes;
1445 destinationElement += destination.bytes;
1446 }
1447
1448 sourceRow += source.pitchB;
1449 destinationRow += destination.pitchB;
1450 }
1451
1452 sourceSlice += source.sliceB;
1453 destinationSlice += destination.sliceB;
1454 }
1455 }
1456
1457 void Surface::decodeX4R4G4B4(Buffer &destination, const Buffer &source)
1458 {
1459 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1460 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1461
1462 for(int z = 0; z < destination.depth && z < source.depth; z++)
1463 {
1464 unsigned char *sourceRow = sourceSlice;
1465 unsigned char *destinationRow = destinationSlice;
1466
1467 for(int y = 0; y < destination.height && y < source.height; y++)
1468 {
1469 unsigned char *sourceElement = sourceRow;
1470 unsigned char *destinationElement = destinationRow;
1471
1472 for(int x = 0; x < destination.width && x < source.width; x++)
1473 {
1474 unsigned int xrgb = *(unsigned short*)sourceElement;
1475
1476 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000;
1477 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00;
1478 unsigned int b = (xrgb & 0x000F) * 0x00000011;
1479
1480 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
1481
1482 sourceElement += source.bytes;
1483 destinationElement += destination.bytes;
1484 }
1485
1486 sourceRow += source.pitchB;
1487 destinationRow += destination.pitchB;
1488 }
1489
1490 sourceSlice += source.sliceB;
1491 destinationSlice += destination.sliceB;
1492 }
1493 }
1494
1495 void Surface::decodeA4R4G4B4(Buffer &destination, const Buffer &source)
1496 {
1497 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1498 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1499
1500 for(int z = 0; z < destination.depth && z < source.depth; z++)
1501 {
1502 unsigned char *sourceRow = sourceSlice;
1503 unsigned char *destinationRow = destinationSlice;
1504
1505 for(int y = 0; y < destination.height && y < source.height; y++)
1506 {
1507 unsigned char *sourceElement = sourceRow;
1508 unsigned char *destinationElement = destinationRow;
1509
1510 for(int x = 0; x < destination.width && x < source.width; x++)
1511 {
1512 unsigned int argb = *(unsigned short*)sourceElement;
1513
1514 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000;
1515 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000;
1516 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00;
1517 unsigned int b = (argb & 0x000F) * 0x00000011;
1518
1519 *(unsigned int*)destinationElement = a | r | g | b;
1520
1521 sourceElement += source.bytes;
1522 destinationElement += destination.bytes;
1523 }
1524
1525 sourceRow += source.pitchB;
1526 destinationRow += destination.pitchB;
1527 }
1528
1529 sourceSlice += source.sliceB;
1530 destinationSlice += destination.sliceB;
1531 }
1532 }
1533
1534 void Surface::decodeP8(Buffer &destination, const Buffer &source)
1535 {
1536 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1537 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1538
1539 for(int z = 0; z < destination.depth && z < source.depth; z++)
1540 {
1541 unsigned char *sourceRow = sourceSlice;
1542 unsigned char *destinationRow = destinationSlice;
1543
1544 for(int y = 0; y < destination.height && y < source.height; y++)
1545 {
1546 unsigned char *sourceElement = sourceRow;
1547 unsigned char *destinationElement = destinationRow;
1548
1549 for(int x = 0; x < destination.width && x < source.width; x++)
1550 {
1551 unsigned int abgr = palette[*(unsigned char*)sourceElement];
1552
1553 unsigned int r = (abgr & 0x000000FF) << 16;
1554 unsigned int g = (abgr & 0x0000FF00) << 0;
1555 unsigned int b = (abgr & 0x00FF0000) >> 16;
1556 unsigned int a = (abgr & 0xFF000000) >> 0;
1557
1558 *(unsigned int*)destinationElement = a | r | g | b;
1559
1560 sourceElement += source.bytes;
1561 destinationElement += destination.bytes;
1562 }
1563
1564 sourceRow += source.pitchB;
1565 destinationRow += destination.pitchB;
1566 }
1567
1568 sourceSlice += source.sliceB;
1569 destinationSlice += destination.sliceB;
1570 }
1571 }
1572
1573#if S3TC_SUPPORT
1574 void Surface::decodeDXT1(Buffer &internal, const Buffer &external)
1575 {
1576 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05001577 const DXT1 *source = (const DXT1*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04001578
1579 for(int z = 0; z < external.depth; z++)
1580 {
1581 unsigned int *dest = destSlice;
1582
1583 for(int y = 0; y < external.height; y += 4)
1584 {
1585 for(int x = 0; x < external.width; x += 4)
1586 {
1587 Color<byte> c[4];
1588
1589 c[0] = source->c0;
1590 c[1] = source->c1;
1591
1592 if(source->c0 > source->c1) // No transparency
1593 {
1594 // c2 = 2 / 3 * c0 + 1 / 3 * c1
1595 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
1596 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
1597 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
1598 c[2].a = 0xFF;
1599
1600 // c3 = 1 / 3 * c0 + 2 / 3 * c1
1601 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
1602 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
1603 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
1604 c[3].a = 0xFF;
1605 }
1606 else // c3 transparent
1607 {
1608 // c2 = 1 / 2 * c0 + 1 / 2 * c1
1609 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2);
1610 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2);
1611 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2);
1612 c[2].a = 0xFF;
1613
1614 c[3].r = 0;
1615 c[3].g = 0;
1616 c[3].b = 0;
1617 c[3].a = 0;
1618 }
1619
1620 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
1621 {
1622 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
1623 {
1624 dest[(x + i) + (y + j) * internal.width] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4];
1625 }
1626 }
1627
1628 source++;
1629 }
1630 }
1631
1632 (byte*&)destSlice += internal.sliceB;
1633 }
1634 }
1635
1636 void Surface::decodeDXT3(Buffer &internal, const Buffer &external)
1637 {
1638 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05001639 const DXT3 *source = (const DXT3*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04001640
1641 for(int z = 0; z < external.depth; z++)
1642 {
1643 unsigned int *dest = destSlice;
1644
1645 for(int y = 0; y < external.height; y += 4)
1646 {
1647 for(int x = 0; x < external.width; x += 4)
1648 {
1649 Color<byte> c[4];
1650
1651 c[0] = source->c0;
1652 c[1] = source->c1;
1653
1654 // c2 = 2 / 3 * c0 + 1 / 3 * c1
1655 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
1656 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
1657 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
1658
1659 // c3 = 1 / 3 * c0 + 2 / 3 * c1
1660 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
1661 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
1662 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
1663
1664 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
1665 {
1666 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
1667 {
1668 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F;
1669 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24));
1670
1671 dest[(x + i) + (y + j) * internal.width] = color;
1672 }
1673 }
1674
1675 source++;
1676 }
1677 }
1678
1679 (byte*&)destSlice += internal.sliceB;
1680 }
1681 }
1682
1683 void Surface::decodeDXT5(Buffer &internal, const Buffer &external)
1684 {
1685 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05001686 const DXT5 *source = (const DXT5*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04001687
1688 for(int z = 0; z < external.depth; z++)
1689 {
1690 unsigned int *dest = destSlice;
1691
1692 for(int y = 0; y < external.height; y += 4)
1693 {
1694 for(int x = 0; x < external.width; x += 4)
1695 {
1696 Color<byte> c[4];
1697
1698 c[0] = source->c0;
1699 c[1] = source->c1;
1700
1701 // c2 = 2 / 3 * c0 + 1 / 3 * c1
1702 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
1703 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
1704 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
1705
1706 // c3 = 1 / 3 * c0 + 2 / 3 * c1
1707 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
1708 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
1709 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
1710
1711 byte a[8];
1712
1713 a[0] = source->a0;
1714 a[1] = source->a1;
1715
1716 if(a[0] > a[1])
1717 {
1718 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7);
1719 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7);
1720 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7);
1721 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7);
1722 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7);
1723 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7);
1724 }
1725 else
1726 {
1727 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5);
1728 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5);
1729 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5);
1730 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5);
1731 a[6] = 0;
1732 a[7] = 0xFF;
1733 }
1734
1735 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
1736 {
1737 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
1738 {
1739 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24;
1740 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha;
1741
1742 dest[(x + i) + (y + j) * internal.width] = color;
1743 }
1744 }
1745
1746 source++;
1747 }
1748 }
1749
1750 (byte*&)destSlice += internal.sliceB;
1751 }
1752 }
Nicolas Capens22658242014-11-29 00:31:41 -05001753#endif
John Bauman89401822014-05-06 15:04:28 -04001754
1755 void Surface::decodeATI1(Buffer &internal, const Buffer &external)
1756 {
1757 byte *destSlice = (byte*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05001758 const ATI1 *source = (const ATI1*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04001759
1760 for(int z = 0; z < external.depth; z++)
1761 {
1762 byte *dest = destSlice;
1763
1764 for(int y = 0; y < external.height; y += 4)
1765 {
1766 for(int x = 0; x < external.width; x += 4)
1767 {
1768 byte r[8];
1769
1770 r[0] = source->r0;
1771 r[1] = source->r1;
1772
1773 if(r[0] > r[1])
1774 {
1775 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7);
1776 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7);
1777 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7);
1778 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7);
1779 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7);
1780 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7);
1781 }
1782 else
1783 {
1784 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5);
1785 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5);
1786 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5);
1787 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5);
1788 r[6] = 0;
1789 r[7] = 0xFF;
1790 }
1791
1792 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
1793 {
1794 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
1795 {
1796 dest[(x + i) + (y + j) * internal.width] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8];
1797 }
1798 }
1799
1800 source++;
1801 }
1802 }
1803
1804 destSlice += internal.sliceB;
1805 }
1806 }
1807
1808 void Surface::decodeATI2(Buffer &internal, const Buffer &external)
1809 {
1810 word *destSlice = (word*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05001811 const ATI2 *source = (const ATI2*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04001812
1813 for(int z = 0; z < external.depth; z++)
1814 {
1815 word *dest = destSlice;
1816
1817 for(int y = 0; y < external.height; y += 4)
1818 {
1819 for(int x = 0; x < external.width; x += 4)
1820 {
1821 byte X[8];
1822
1823 X[0] = source->x0;
1824 X[1] = source->x1;
1825
1826 if(X[0] > X[1])
1827 {
1828 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7);
1829 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7);
1830 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7);
1831 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7);
1832 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7);
1833 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7);
1834 }
1835 else
1836 {
1837 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5);
1838 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5);
1839 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5);
1840 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5);
1841 X[6] = 0;
1842 X[7] = 0xFF;
1843 }
1844
1845 byte Y[8];
1846
1847 Y[0] = source->y0;
1848 Y[1] = source->y1;
1849
1850 if(Y[0] > Y[1])
1851 {
1852 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7);
1853 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7);
1854 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7);
1855 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7);
1856 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7);
1857 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7);
1858 }
1859 else
1860 {
1861 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5);
1862 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5);
1863 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5);
1864 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5);
1865 Y[6] = 0;
1866 Y[7] = 0xFF;
1867 }
1868
1869 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
1870 {
1871 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
1872 {
1873 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8];
1874 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8];
1875
1876 dest[(x + i) + (y + j) * internal.width] = (g << 8) + r;
1877 }
1878 }
1879
1880 source++;
1881 }
1882 }
1883
1884 (byte*&)destSlice += internal.sliceB;
1885 }
1886 }
Nicolas Capens22658242014-11-29 00:31:41 -05001887
1888 struct bgrx8
1889 {
1890 byte b;
1891 byte g;
1892 byte r;
1893 byte x;
1894
1895 inline bgrx8()
1896 {
1897 }
1898
1899 inline void set(int red, int green, int blue)
1900 {
1901 r = static_cast<byte>(clamp(red, 0, 255));
1902 g = static_cast<byte>(clamp(green, 0, 255));
1903 b = static_cast<byte>(clamp(blue, 0, 255));
1904 x = 255;
1905 }
1906 };
1907
1908 struct ETC1
1909 {
1910 struct
1911 {
1912 union
1913 {
1914 struct // Individual colors
1915 {
1916 byte R2 : 4;
1917 byte R1 : 4;
1918 byte G2 : 4;
1919 byte G1 : 4;
1920 byte B2 : 4;
1921 byte B1 : 4;
1922 };
1923
1924 struct // Differential colors
1925 {
1926 sbyte dR : 3;
1927 byte R : 5;
1928 sbyte dG : 3;
1929 byte G : 5;
1930 sbyte dB : 3;
1931 byte B : 5;
1932 };
1933 };
1934
1935 bool flipbit : 1;
1936 bool diffbit : 1;
1937 byte cw2 : 3;
1938 byte cw1 : 3;
1939
1940 byte pixelIndexMSB[2];
1941 byte pixelIndexLSB[2];
1942 };
1943
1944 inline int getIndex(int x, int y) const
1945 {
1946 int bitIndex = x * 4 + y;
1947 int bitOffset = bitIndex & 7;
1948 int lsb = (pixelIndexLSB[1 - (bitIndex >> 3)] >> bitOffset) & 1;
1949 int msb = (pixelIndexMSB[1 - (bitIndex >> 3)] >> bitOffset) & 1;
1950
1951 return (msb << 1) | lsb;
1952 }
1953 };
1954
1955 inline int extend_4to8bits(int x)
1956 {
1957 return (x << 4) | x;
1958 }
1959
1960 inline int extend_5to8bits(int x)
1961 {
1962 return (x << 3) | (x >> 2);
1963 }
1964
1965 void Surface::decodeETC1(Buffer &internal, const Buffer &external)
1966 {
1967 unsigned int *destSlice = (unsigned int*)internal.buffer;
1968 const ETC1 *source = (const ETC1*)external.buffer;
1969
1970 for(int z = 0; z < external.depth; z++)
1971 {
1972 unsigned int *dest = destSlice;
1973
1974 for(int y = 0; y < external.height; y += 4)
1975 {
1976 for(int x = 0; x < external.width; x += 4)
1977 {
Alexis Hetu56dd42f2015-08-28 11:16:13 -04001978 bgrx8 *color = reinterpret_cast<bgrx8*>(&dest[x + y * internal.pitchP]);
Nicolas Capens22658242014-11-29 00:31:41 -05001979
1980 int r1, g1, b1;
1981 int r2, g2, b2;
1982
1983 if(source->diffbit)
1984 {
1985 b1 = extend_5to8bits(source->B);
1986 g1 = extend_5to8bits(source->G);
1987 r1 = extend_5to8bits(source->R);
1988
1989 r2 = extend_5to8bits(source->R + source->dR);
1990 g2 = extend_5to8bits(source->G + source->dG);
1991 b2 = extend_5to8bits(source->B + source->dB);
1992 }
1993 else
1994 {
1995 r1 = extend_4to8bits(source->R1);
1996 g1 = extend_4to8bits(source->G1);
1997 b1 = extend_4to8bits(source->B1);
1998
1999 r2 = extend_4to8bits(source->R2);
2000 g2 = extend_4to8bits(source->G2);
2001 b2 = extend_4to8bits(source->B2);
2002 }
2003
2004 bgrx8 subblockColors0[4];
2005 bgrx8 subblockColors1[4];
2006
2007 // Table 3.17.2 sorted according to table 3.17.3
2008 static const int intensityModifier[8][4] =
2009 {
2010 {2, 8, -2, -8},
2011 {5, 17, -5, -17},
2012 {9, 29, -9, -29},
2013 {13, 42, -13, -42},
2014 {18, 60, -18, -60},
2015 {24, 80, -24, -80},
2016 {33, 106, -33, -106},
2017 {47, 183, -47, -183}
2018 };
2019
2020 const int i10 = intensityModifier[source->cw1][0];
2021 const int i11 = intensityModifier[source->cw1][1];
2022 const int i12 = intensityModifier[source->cw1][2];
2023 const int i13 = intensityModifier[source->cw1][3];
2024
2025 subblockColors0[0].set(r1 + i10, g1 + i10, b1 + i10);
2026 subblockColors0[1].set(r1 + i11, g1 + i11, b1 + i11);
2027 subblockColors0[2].set(r1 + i12, g1 + i12, b1 + i12);
2028 subblockColors0[3].set(r1 + i13, g1 + i13, b1 + i13);
2029
2030 const int i20 = intensityModifier[source->cw2][0];
2031 const int i21 = intensityModifier[source->cw2][1];
2032 const int i22 = intensityModifier[source->cw2][2];
2033 const int i23 = intensityModifier[source->cw2][3];
2034
2035 subblockColors1[0].set(r2 + i20, g2 + i20, b2 + i20);
2036 subblockColors1[1].set(r2 + i21, g2 + i21, b2 + i21);
2037 subblockColors1[2].set(r2 + i22, g2 + i22, b2 + i22);
2038 subblockColors1[3].set(r2 + i23, g2 + i23, b2 + i23);
2039
2040 if(source->flipbit)
2041 {
Nicolas Capens00555c42015-07-21 15:15:30 -04002042 for(int j = 0; j < 2 && (y + j) < internal.height; j++)
Nicolas Capens22658242014-11-29 00:31:41 -05002043 {
Nicolas Capens00555c42015-07-21 15:15:30 -04002044 if((x + 0) < internal.width) color[0] = subblockColors0[source->getIndex(0, j)];
2045 if((x + 1) < internal.width) color[1] = subblockColors0[source->getIndex(1, j)];
2046 if((x + 2) < internal.width) color[2] = subblockColors0[source->getIndex(2, j)];
2047 if((x + 3) < internal.width) color[3] = subblockColors0[source->getIndex(3, j)];
Alexis Hetu56dd42f2015-08-28 11:16:13 -04002048 color += internal.pitchP;
Nicolas Capens22658242014-11-29 00:31:41 -05002049 }
2050
Nicolas Capens00555c42015-07-21 15:15:30 -04002051 for(int j = 2; j < 4 && (y + j) < internal.height; j++)
Nicolas Capens22658242014-11-29 00:31:41 -05002052 {
Nicolas Capens00555c42015-07-21 15:15:30 -04002053 if((x + 0) < internal.width) color[0] = subblockColors1[source->getIndex(0, j)];
2054 if((x + 1) < internal.width) color[1] = subblockColors1[source->getIndex(1, j)];
2055 if((x + 2) < internal.width) color[2] = subblockColors1[source->getIndex(2, j)];
2056 if((x + 3) < internal.width) color[3] = subblockColors1[source->getIndex(3, j)];
Alexis Hetu56dd42f2015-08-28 11:16:13 -04002057 color += internal.pitchP;
Nicolas Capens22658242014-11-29 00:31:41 -05002058 }
2059 }
2060 else
2061 {
Nicolas Capens00555c42015-07-21 15:15:30 -04002062 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
Nicolas Capens22658242014-11-29 00:31:41 -05002063 {
Nicolas Capens00555c42015-07-21 15:15:30 -04002064 if((x + 0) < internal.width) color[0] = subblockColors0[source->getIndex(0, j)];
2065 if((x + 1) < internal.width) color[1] = subblockColors0[source->getIndex(1, j)];
2066 if((x + 2) < internal.width) color[2] = subblockColors1[source->getIndex(2, j)];
2067 if((x + 3) < internal.width) color[3] = subblockColors1[source->getIndex(3, j)];
Alexis Hetu56dd42f2015-08-28 11:16:13 -04002068 color += internal.pitchP;
Nicolas Capens22658242014-11-29 00:31:41 -05002069 }
2070 }
2071
2072 source++;
2073 }
2074 }
2075
2076 (byte*&)destSlice += internal.sliceB;
2077 }
2078 }
John Bauman89401822014-05-06 15:04:28 -04002079
2080 unsigned int Surface::size(int width, int height, int depth, Format format)
2081 {
Nicolas Capens00555c42015-07-21 15:15:30 -04002082 // Dimensions rounded up to multiples of 4, used for compressed formats
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002083 int width4 = align(width, 4);
2084 int height4 = align(height, 4);
John Bauman89401822014-05-06 15:04:28 -04002085
2086 switch(format)
2087 {
2088 #if S3TC_SUPPORT
2089 case FORMAT_DXT1:
John Bauman66b8ab22014-05-06 15:57:45 -04002090 #endif
John Bauman89401822014-05-06 15:04:28 -04002091 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05002092 case FORMAT_ETC1:
John Bauman89401822014-05-06 15:04:28 -04002093 return width4 * height4 * depth / 2;
John Bauman66b8ab22014-05-06 15:57:45 -04002094 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04002095 case FORMAT_DXT3:
2096 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002097 #endif
John Bauman89401822014-05-06 15:04:28 -04002098 case FORMAT_ATI2:
2099 return width4 * height4 * depth;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002100 case FORMAT_YV12_BT601:
2101 case FORMAT_YV12_BT709:
2102 case FORMAT_YV12_JFIF:
2103 {
2104 unsigned int YStride = align(width, 16);
2105 unsigned int YSize = YStride * height;
2106 unsigned int CStride = align(YStride / 2, 16);
2107 unsigned int CSize = CStride * height / 2;
2108
2109 return YSize + 2 * CSize;
2110 }
John Bauman89401822014-05-06 15:04:28 -04002111 default:
2112 return bytes(format) * width * height * depth;
2113 }
2114
2115 return 0;
2116 }
2117
2118 bool Surface::isStencil(Format format)
2119 {
2120 switch(format)
2121 {
2122 case FORMAT_D32:
2123 case FORMAT_D16:
2124 case FORMAT_D24X8:
2125 case FORMAT_D32F:
2126 case FORMAT_D32F_COMPLEMENTARY:
2127 case FORMAT_D32F_LOCKABLE:
2128 return false;
2129 case FORMAT_D24S8:
2130 case FORMAT_D24FS8:
2131 case FORMAT_S8:
John Bauman66b8ab22014-05-06 15:57:45 -04002132 case FORMAT_DF24S8:
2133 case FORMAT_DF16S8:
2134 case FORMAT_D32FS8_TEXTURE:
2135 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002136 case FORMAT_INTZ:
2137 return true;
2138 default:
2139 return false;
2140 }
2141 }
2142
2143 bool Surface::isDepth(Format format)
2144 {
2145 switch(format)
2146 {
2147 case FORMAT_D32:
2148 case FORMAT_D16:
2149 case FORMAT_D24X8:
2150 case FORMAT_D24S8:
2151 case FORMAT_D24FS8:
2152 case FORMAT_D32F:
2153 case FORMAT_D32F_COMPLEMENTARY:
2154 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002155 case FORMAT_DF24S8:
2156 case FORMAT_DF16S8:
2157 case FORMAT_D32FS8_TEXTURE:
2158 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002159 case FORMAT_INTZ:
2160 return true;
2161 case FORMAT_S8:
2162 return false;
2163 default:
2164 return false;
2165 }
2166 }
2167
2168 bool Surface::isPalette(Format format)
2169 {
2170 switch(format)
2171 {
2172 case FORMAT_P8:
2173 case FORMAT_A8P8:
2174 return true;
2175 default:
2176 return false;
2177 }
2178 }
2179
2180 bool Surface::isFloatFormat(Format format)
2181 {
2182 switch(format)
2183 {
2184 case FORMAT_X8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002185 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002186 case FORMAT_A8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002187 case FORMAT_A8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002188 case FORMAT_G8R8:
2189 case FORMAT_G16R16:
2190 case FORMAT_A16B16G16R16:
2191 case FORMAT_V8U8:
2192 case FORMAT_Q8W8V8U8:
2193 case FORMAT_X8L8V8U8:
2194 case FORMAT_V16U16:
2195 case FORMAT_A16W16V16U16:
2196 case FORMAT_Q16W16V16U16:
2197 case FORMAT_A8:
2198 case FORMAT_R8:
2199 case FORMAT_L8:
2200 case FORMAT_L16:
2201 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002202 case FORMAT_YV12_BT601:
2203 case FORMAT_YV12_BT709:
2204 case FORMAT_YV12_JFIF:
John Bauman89401822014-05-06 15:04:28 -04002205 return false;
2206 case FORMAT_R32F:
2207 case FORMAT_G32R32F:
2208 case FORMAT_A32B32G32R32F:
2209 case FORMAT_D32F:
2210 case FORMAT_D32F_COMPLEMENTARY:
2211 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002212 case FORMAT_D32FS8_TEXTURE:
2213 case FORMAT_D32FS8_SHADOW:
Nicolas Capens80594422015-06-09 16:42:56 -04002214 case FORMAT_L16F:
2215 case FORMAT_A16L16F:
2216 case FORMAT_L32F:
2217 case FORMAT_A32L32F:
John Bauman89401822014-05-06 15:04:28 -04002218 return true;
2219 default:
2220 ASSERT(false);
2221 }
2222
2223 return false;
2224 }
2225
2226 bool Surface::isUnsignedComponent(Format format, int component)
2227 {
2228 switch(format)
2229 {
2230 case FORMAT_NULL:
2231 case FORMAT_X8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002232 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002233 case FORMAT_A8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002234 case FORMAT_A8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002235 case FORMAT_G8R8:
2236 case FORMAT_G16R16:
2237 case FORMAT_A16B16G16R16:
2238 case FORMAT_D32F:
2239 case FORMAT_D32F_COMPLEMENTARY:
2240 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002241 case FORMAT_D32FS8_TEXTURE:
2242 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002243 case FORMAT_A8:
2244 case FORMAT_R8:
2245 case FORMAT_L8:
2246 case FORMAT_L16:
2247 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002248 case FORMAT_YV12_BT601:
2249 case FORMAT_YV12_BT709:
2250 case FORMAT_YV12_JFIF:
John Bauman89401822014-05-06 15:04:28 -04002251 return true;
2252 case FORMAT_V8U8:
2253 case FORMAT_X8L8V8U8:
2254 case FORMAT_V16U16:
2255 if(component < 2)
2256 {
2257 return false;
2258 }
2259 else
2260 {
2261 return true;
2262 }
2263 case FORMAT_A16W16V16U16:
2264 if(component < 3)
2265 {
2266 return false;
2267 }
2268 else
2269 {
2270 return true;
2271 }
2272 case FORMAT_Q8W8V8U8:
2273 case FORMAT_Q16W16V16U16:
2274 return false;
2275 case FORMAT_R32F:
2276 if(component < 1)
2277 {
2278 return false;
2279 }
2280 else
2281 {
2282 return true;
2283 }
2284 case FORMAT_G32R32F:
2285 if(component < 2)
2286 {
2287 return false;
2288 }
2289 else
2290 {
2291 return true;
2292 }
2293 case FORMAT_A32B32G32R32F:
2294 return false;
2295 default:
2296 ASSERT(false);
2297 }
2298
2299 return false;
2300 }
2301
2302 bool Surface::isSRGBreadable(Format format)
2303 {
2304 // Keep in sync with Capabilities::isSRGBreadable
2305 switch(format)
2306 {
2307 case FORMAT_L8:
2308 case FORMAT_A8L8:
2309 case FORMAT_R8G8B8:
2310 case FORMAT_A8R8G8B8:
2311 case FORMAT_X8R8G8B8:
2312 case FORMAT_A8B8G8R8:
2313 case FORMAT_X8B8G8R8:
2314 case FORMAT_R5G6B5:
2315 case FORMAT_X1R5G5B5:
2316 case FORMAT_A1R5G5B5:
2317 case FORMAT_A4R4G4B4:
2318 #if S3TC_SUPPORT
2319 case FORMAT_DXT1:
2320 case FORMAT_DXT3:
2321 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002322 #endif
John Bauman89401822014-05-06 15:04:28 -04002323 case FORMAT_ATI1:
2324 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04002325 return true;
2326 default:
2327 return false;
2328 }
2329
2330 return false;
2331 }
2332
2333 bool Surface::isSRGBwritable(Format format)
2334 {
2335 // Keep in sync with Capabilities::isSRGBwritable
2336 switch(format)
2337 {
2338 case FORMAT_NULL:
2339 case FORMAT_A8R8G8B8:
2340 case FORMAT_X8R8G8B8:
2341 case FORMAT_A8B8G8R8:
2342 case FORMAT_X8B8G8R8:
2343 case FORMAT_R5G6B5:
2344 return true;
2345 default:
2346 return false;
2347 }
2348 }
2349
2350 bool Surface::isCompressed(Format format)
2351 {
2352 switch(format)
2353 {
2354 #if S3TC_SUPPORT
2355 case FORMAT_DXT1:
2356 case FORMAT_DXT3:
2357 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002358 #endif
John Bauman89401822014-05-06 15:04:28 -04002359 case FORMAT_ATI1:
2360 case FORMAT_ATI2:
Nicolas Capens22658242014-11-29 00:31:41 -05002361 case FORMAT_ETC1:
John Bauman89401822014-05-06 15:04:28 -04002362 return true;
John Bauman89401822014-05-06 15:04:28 -04002363 default:
2364 return false;
2365 }
2366 }
2367
2368 int Surface::componentCount(Format format)
2369 {
2370 switch(format)
2371 {
2372 case FORMAT_X8R8G8B8: return 3;
Nicolas Capensef77ac12015-03-28 21:48:51 -04002373 case FORMAT_X8B8G8R8: return 3;
John Bauman89401822014-05-06 15:04:28 -04002374 case FORMAT_A8R8G8B8: return 4;
Nicolas Capensef77ac12015-03-28 21:48:51 -04002375 case FORMAT_A8B8G8R8: return 4;
John Bauman89401822014-05-06 15:04:28 -04002376 case FORMAT_G8R8: return 2;
2377 case FORMAT_G16R16: return 2;
2378 case FORMAT_A16B16G16R16: return 4;
2379 case FORMAT_V8U8: return 2;
2380 case FORMAT_Q8W8V8U8: return 4;
2381 case FORMAT_X8L8V8U8: return 3;
2382 case FORMAT_V16U16: return 2;
2383 case FORMAT_A16W16V16U16: return 4;
2384 case FORMAT_Q16W16V16U16: return 4;
2385 case FORMAT_R32F: return 1;
2386 case FORMAT_G32R32F: return 2;
2387 case FORMAT_A32B32G32R32F: return 4;
2388 case FORMAT_D32F_LOCKABLE: return 1;
John Bauman66b8ab22014-05-06 15:57:45 -04002389 case FORMAT_D32FS8_TEXTURE: return 1;
2390 case FORMAT_D32FS8_SHADOW: return 1;
John Bauman89401822014-05-06 15:04:28 -04002391 case FORMAT_A8: return 1;
2392 case FORMAT_R8: return 1;
2393 case FORMAT_L8: return 1;
2394 case FORMAT_L16: return 1;
2395 case FORMAT_A8L8: return 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002396 case FORMAT_YV12_BT601: return 3;
2397 case FORMAT_YV12_BT709: return 3;
2398 case FORMAT_YV12_JFIF: return 3;
John Bauman89401822014-05-06 15:04:28 -04002399 default:
2400 ASSERT(false);
2401 }
2402
2403 return 1;
2404 }
2405
2406 void *Surface::allocateBuffer(int width, int height, int depth, Format format)
2407 {
Nicolas Capensdb17b5d2015-06-26 11:15:58 -04002408 // Render targets require 2x2 quads
2409 int width2 = (width + 1) & ~1;
2410 int height2 = (height + 1) & ~1;
John Bauman89401822014-05-06 15:04:28 -04002411
Nicolas Capens6ea71872015-06-26 13:00:48 -04002412 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
2413 // so we have to allocate 4 extra bytes to avoid buffer overruns.
2414 return allocateZero(size(width2, height2, depth, format) + 4);
John Bauman89401822014-05-06 15:04:28 -04002415 }
2416
Nicolas Capens5ba566b2015-05-25 17:11:04 -04002417 void Surface::memfill4(void *buffer, int pattern, int bytes)
John Bauman89401822014-05-06 15:04:28 -04002418 {
2419 while((size_t)buffer & 0x1 && bytes >= 1)
2420 {
2421 *(char*)buffer = (char)pattern;
2422 (char*&)buffer += 1;
2423 bytes -= 1;
2424 }
2425
2426 while((size_t)buffer & 0x3 && bytes >= 2)
2427 {
2428 *(short*)buffer = (short)pattern;
2429 (short*&)buffer += 1;
2430 bytes -= 2;
2431 }
2432
2433 if(CPUID::supportsSSE())
2434 {
2435 while((size_t)buffer & 0xF && bytes >= 4)
2436 {
2437 *(int*)buffer = pattern;
2438 (int*&)buffer += 1;
2439 bytes -= 4;
2440 }
2441
2442 __m128 quad = _mm_set_ps1((float&)pattern);
2443
2444 float *pointer = (float*)buffer;
2445 int qxwords = bytes / 64;
2446 bytes -= qxwords * 64;
2447
2448 while(qxwords--)
2449 {
2450 _mm_stream_ps(pointer + 0, quad);
2451 _mm_stream_ps(pointer + 4, quad);
2452 _mm_stream_ps(pointer + 8, quad);
2453 _mm_stream_ps(pointer + 12, quad);
2454
2455 pointer += 16;
2456 }
2457
2458 buffer = pointer;
2459 }
2460
2461 while(bytes >= 4)
2462 {
2463 *(int*)buffer = (int)pattern;
2464 (int*&)buffer += 1;
2465 bytes -= 4;
2466 }
2467
2468 while(bytes >= 2)
2469 {
2470 *(short*)buffer = (short)pattern;
2471 (short*&)buffer += 1;
2472 bytes -= 2;
2473 }
2474
2475 while(bytes >= 1)
2476 {
2477 *(char*)buffer = (char)pattern;
2478 (char*&)buffer += 1;
2479 bytes -= 1;
2480 }
2481 }
2482
Nicolas Capensef77ac12015-03-28 21:48:51 -04002483 void Surface::clearColorBuffer(unsigned int colorARGB, unsigned int rgbaMask, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04002484 {
2485 // FIXME: Also clear buffers in other formats?
2486
2487 // Not overlapping
2488 if(x0 > internal.width) return;
2489 if(y0 > internal.height) return;
2490 if(x0 + width < 0) return;
2491 if(y0 + height < 0) return;
2492
2493 // Clip against dimensions
2494 if(x0 < 0) {width += x0; x0 = 0;}
2495 if(x0 + width > internal.width) width = internal.width - x0;
2496 if(y0 < 0) {height += y0; y0 = 0;}
2497 if(y0 + height > internal.height) height = internal.height - y0;
2498
2499 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height;
2500 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY;
2501
John Bauman89401822014-05-06 15:04:28 -04002502 int x1 = x0 + width;
2503 int y1 = y0 + height;
2504
John Bauman89401822014-05-06 15:04:28 -04002505 // if(lockable || !quadLayoutEnabled)
2506 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04002507 unsigned char *buffer = (unsigned char*)lockInternal(x0, y0, 0, lock, PUBLIC);
John Bauman89401822014-05-06 15:04:28 -04002508
2509 for(int z = 0; z < internal.depth; z++)
2510 {
2511 unsigned char *target = buffer;
2512
2513 for(int y = y0; y < y1; y++)
2514 {
2515 switch(internal.format)
2516 {
2517 case FORMAT_NULL:
2518 break;
2519 case FORMAT_X8R8G8B8:
2520 case FORMAT_A8R8G8B8:
2521 // case FORMAT_X8G8R8B8Q: // FIXME
2522 // case FORMAT_A8G8R8B8Q: // FIXME
John Bauman19bac1e2014-05-06 15:23:49 -04002523 if(rgbaMask == 0xF || (internal.format == FORMAT_X8R8G8B8 && rgbaMask == 0x7))
John Bauman89401822014-05-06 15:04:28 -04002524 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04002525 memfill4(target, colorARGB, 4 * (x1 - x0));
John Bauman89401822014-05-06 15:04:28 -04002526 }
2527 else
2528 {
2529 unsigned int bgraMask = (rgbaMask & 0x1 ? 0x00FF0000 : 0) | (rgbaMask & 0x2 ? 0x0000FF00 : 0) | (rgbaMask & 0x4 ? 0x000000FF : 0) | (rgbaMask & 0x8 ? 0xFF000000 : 0);
2530 unsigned int invMask = ~bgraMask;
Nicolas Capensef77ac12015-03-28 21:48:51 -04002531 unsigned int maskedColor = colorARGB & bgraMask;
John Bauman89401822014-05-06 15:04:28 -04002532 unsigned int *target32 = (unsigned int*)target;
2533
2534 for(int x = 0; x < width; x++)
2535 {
2536 target32[x] = maskedColor | (target32[x] & invMask);
2537 }
2538 }
2539 break;
Nicolas Capensef77ac12015-03-28 21:48:51 -04002540 case FORMAT_X8B8G8R8:
2541 case FORMAT_A8B8G8R8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002542 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002543 unsigned char r8 = (colorARGB & 0x00FF0000) >> 16;
2544 unsigned char g8 = (colorARGB & 0x0000FF00) >> 8;
2545 unsigned char b8 = (colorARGB & 0x000000FF) >> 0;
2546 unsigned char a8 = (colorARGB & 0xFF000000) >> 24;
2547 unsigned char a8b8g8r8[4] = {r8, g8, b8, a8};
2548 unsigned int colorABGR = (unsigned int&)a8b8g8r8;
Nicolas Capensef77ac12015-03-28 21:48:51 -04002549
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002550 if(rgbaMask == 0xF || (internal.format == FORMAT_X8B8G8R8 && rgbaMask == 0x7))
Nicolas Capensef77ac12015-03-28 21:48:51 -04002551 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002552 memfill4(target, colorABGR, 4 * (x1 - x0));
2553 }
2554 else
2555 {
2556 unsigned int rgbaMask32 = (rgbaMask & 0x1 ? 0x000000FF : 0) | (rgbaMask & 0x2 ? 0x0000FF00 : 0) | (rgbaMask & 0x4 ? 0x00FF0000 : 0) | (rgbaMask & 0x8 ? 0xFF000000 : 0);
2557 unsigned int invMask = ~rgbaMask32;
2558 unsigned int maskedColor = colorABGR & rgbaMask32;
2559 unsigned int *target32 = (unsigned int*)target;
2560
2561 for(int x = 0; x < width; x++)
2562 {
2563 target32[x] = maskedColor | (target32[x] & invMask);
2564 }
Nicolas Capensef77ac12015-03-28 21:48:51 -04002565 }
2566 }
2567 break;
John Bauman89401822014-05-06 15:04:28 -04002568 case FORMAT_G8R8:
John Bauman89401822014-05-06 15:04:28 -04002569 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002570 unsigned char r8 = (colorARGB & 0x00FF0000) >> 16;
2571 unsigned char g8 = (colorARGB & 0x0000FF00) >> 8;
2572 unsigned char g8r8[4] = {r8, g8, r8, g8};
John Bauman89401822014-05-06 15:04:28 -04002573
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002574 if((rgbaMask & 0x3) == 0x3)
John Bauman89401822014-05-06 15:04:28 -04002575 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002576 memfill4(target, (int&)g8r8, 2 * (x1 - x0));
2577 }
2578 else
2579 {
2580 unsigned short rgMask = (rgbaMask & 0x1 ? 0x000000FF : 0) | (rgbaMask & 0x2 ? 0x0000FF00 : 0);
2581 unsigned short invMask = ~rgMask;
2582 unsigned short maskedColor = (unsigned short&)g8r8 & rgMask;
2583 unsigned short *target16 = (unsigned short*)target;
2584
2585 for(int x = 0; x < width; x++)
2586 {
2587 target16[x] = maskedColor | (target16[x] & invMask);
2588 }
John Bauman89401822014-05-06 15:04:28 -04002589 }
2590 }
2591 break;
2592 case FORMAT_G16R16:
John Bauman89401822014-05-06 15:04:28 -04002593 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002594 unsigned char r8 = (colorARGB & 0x00FF0000) >> 16;
2595 unsigned char g8 = (colorARGB & 0x0000FF00) >> 8;
2596 unsigned short r16 = (r8 << 8) | r8;
2597 unsigned short g16 = (g8 << 8) | g8;
2598 unsigned short g16r16[2] = {r16, g16};
John Bauman89401822014-05-06 15:04:28 -04002599
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002600 if((rgbaMask & 0x3) == 0x3)
John Bauman89401822014-05-06 15:04:28 -04002601 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002602 memfill4(target, (int&)g16r16, 4 * (x1 - x0));
2603 }
2604 else
2605 {
2606 unsigned int rgMask = (rgbaMask & 0x1 ? 0x0000FFFF : 0) | (rgbaMask & 0x2 ? 0xFFFF0000 : 0);
2607 unsigned int invMask = ~rgMask;
2608 unsigned int maskedColor = (unsigned int&)g16r16 & rgMask;
2609 unsigned int *target32 = (unsigned int*)target;
2610
2611 for(int x = 0; x < width; x++)
2612 {
2613 target32[x] = maskedColor | (target32[x] & invMask);
2614 }
John Bauman89401822014-05-06 15:04:28 -04002615 }
2616 }
2617 break;
2618 case FORMAT_A16B16G16R16:
John Bauman89401822014-05-06 15:04:28 -04002619 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002620 unsigned char r8 = (colorARGB & 0x00FF0000) >> 16;
2621 unsigned char g8 = (colorARGB & 0x0000FF00) >> 8;
2622 unsigned char b8 = (colorARGB & 0x000000FF) >> 0;
2623 unsigned char a8 = (colorARGB & 0xFF000000) >> 24;
2624 unsigned short r16 = (r8 << 8) | r8;
2625 unsigned short g16 = (g8 << 8) | g8;
2626 unsigned short b16 = (b8 << 8) | b8;
2627 unsigned short a16 = (a8 << 8) | a8;
2628
2629 if(rgbaMask == 0xF)
John Bauman89401822014-05-06 15:04:28 -04002630 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002631 for(int x = 0; x < width; x++)
2632 {
2633 ((unsigned short*)target)[4 * x + 0] = r16;
2634 ((unsigned short*)target)[4 * x + 1] = g16;
2635 ((unsigned short*)target)[4 * x + 2] = b16;
2636 ((unsigned short*)target)[4 * x + 3] = a16;
2637 }
John Bauman89401822014-05-06 15:04:28 -04002638 }
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002639 else
2640 {
2641 if(rgbaMask & 0x1) for(int x = 0; x < width; x++) ((unsigned short*)target)[4 * x + 0] = r16;
2642 if(rgbaMask & 0x2) for(int x = 0; x < width; x++) ((unsigned short*)target)[4 * x + 1] = g16;
2643 if(rgbaMask & 0x4) for(int x = 0; x < width; x++) ((unsigned short*)target)[4 * x + 2] = b16;
2644 if(rgbaMask & 0x8) for(int x = 0; x < width; x++) ((unsigned short*)target)[4 * x + 3] = a16;
2645 }
John Bauman89401822014-05-06 15:04:28 -04002646 }
2647 break;
2648 case FORMAT_R32F:
2649 if(rgbaMask & 0x1)
2650 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002651 float r32f = (float)(colorARGB & 0x00FF0000) / 0x00FF0000;
2652
John Bauman89401822014-05-06 15:04:28 -04002653 for(int x = 0; x < width; x++)
2654 {
2655 ((float*)target)[x] = r32f;
2656 }
2657 }
2658 break;
2659 case FORMAT_G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002660 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002661 float r32f = (float)(colorARGB & 0x00FF0000) / 0x00FF0000;
2662 float g32f = (float)(colorARGB & 0x0000FF00) / 0x0000FF00;
2663
2664 if((rgbaMask & 0x3) == 0x3)
John Bauman89401822014-05-06 15:04:28 -04002665 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002666 for(int x = 0; x < width; x++)
2667 {
2668 ((float*)target)[2 * x + 0] = r32f;
2669 ((float*)target)[2 * x + 1] = g32f;
2670 }
John Bauman89401822014-05-06 15:04:28 -04002671 }
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002672 else
2673 {
2674 if(rgbaMask & 0x1) for(int x = 0; x < width; x++) ((float*)target)[2 * x + 0] = r32f;
2675 if(rgbaMask & 0x2) for(int x = 0; x < width; x++) ((float*)target)[2 * x + 1] = g32f;
2676 }
John Bauman89401822014-05-06 15:04:28 -04002677 }
2678 break;
2679 case FORMAT_A32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002680 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002681 float r32f = (float)(colorARGB & 0x00FF0000) / 0x00FF0000;
2682 float g32f = (float)(colorARGB & 0x0000FF00) / 0x0000FF00;
2683 float b32f = (float)(colorARGB & 0x000000FF) / 0x000000FF;
2684 float a32f = (float)(colorARGB & 0xFF000000) / 0xFF000000;
2685
2686 if(rgbaMask == 0xF)
John Bauman89401822014-05-06 15:04:28 -04002687 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002688 for(int x = 0; x < width; x++)
2689 {
2690 ((float*)target)[4 * x + 0] = r32f;
2691 ((float*)target)[4 * x + 1] = g32f;
2692 ((float*)target)[4 * x + 2] = b32f;
2693 ((float*)target)[4 * x + 3] = a32f;
2694 }
John Bauman89401822014-05-06 15:04:28 -04002695 }
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002696 else
2697 {
2698 if(rgbaMask & 0x1) for(int x = 0; x < width; x++) ((float*)target)[4 * x + 0] = r32f;
2699 if(rgbaMask & 0x2) for(int x = 0; x < width; x++) ((float*)target)[4 * x + 1] = g32f;
2700 if(rgbaMask & 0x4) for(int x = 0; x < width; x++) ((float*)target)[4 * x + 2] = b32f;
2701 if(rgbaMask & 0x8) for(int x = 0; x < width; x++) ((float*)target)[4 * x + 3] = a32f;
2702 }
John Bauman89401822014-05-06 15:04:28 -04002703 }
2704 break;
Nicolas Capens5ba566b2015-05-25 17:11:04 -04002705 case FORMAT_R5G6B5:
Nicolas Capens5ba566b2015-05-25 17:11:04 -04002706 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002707 unsigned int r5g6b5 = ((colorARGB >> 8) & 0xF800) | ((colorARGB >> 5) & 0x07E0) | ((colorARGB >> 3) & 0x001F);
Nicolas Capens5ba566b2015-05-25 17:11:04 -04002708
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002709 if((rgbaMask & 0x7) == 0x7)
Nicolas Capens5ba566b2015-05-25 17:11:04 -04002710 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04002711 unsigned int r5g6b5r5g6b5 = r5g6b5 | (r5g6b5 << 16);
2712 memfill4(target, r5g6b5r5g6b5, 2 * (x1 - x0));
2713 }
2714 else
2715 {
2716 unsigned short rgbMask = (rgbaMask & 0x1 ? 0xF800 : 0) | (rgbaMask & 0x2 ? 0x07E0 : 0) | (rgbaMask & 0x3 ? 0x001F : 0);
2717 unsigned short invMask = ~rgbMask;
2718 unsigned short maskedColor = r5g6b5 & rgbMask;
2719 unsigned short *target16 = (unsigned short*)target;
2720
2721 for(int x = 0; x < width; x++)
2722 {
2723 target16[x] = maskedColor | (target16[x] & invMask);
2724 }
Nicolas Capens5ba566b2015-05-25 17:11:04 -04002725 }
2726 }
2727 break;
John Bauman89401822014-05-06 15:04:28 -04002728 default:
2729 ASSERT(false);
2730 }
2731
2732 target += internal.pitchB;
2733 }
2734
2735 buffer += internal.sliceB;
2736 }
2737
2738 unlockInternal();
2739 }
2740 /* else
2741 {
Alexis Hetu0085c442015-06-12 15:19:20 -04002742 int width2 = (internal.width + 1) & ~1;
2743
John Bauman89401822014-05-06 15:04:28 -04002744 // unsigned char *target = (unsigned char*&)buffer;
2745 //
2746 // for(int y = y0; y < y1; y++)
2747 // {
2748 // for(int x = x0; x < x1; x++)
2749 // {
2750 // target[width2 * 4 * (y & ~1) + 2 * (y & 1) + 8 * (x & ~1) + (x & 1) + 0] = (color & 0x000000FF) >> 0;
2751 // target[width2 * 4 * (y & ~1) + 2 * (y & 1) + 8 * (x & ~1) + (x & 1) + 4] = (color & 0x00FF0000) >> 16;
2752 // target[width2 * 4 * (y & ~1) + 2 * (y & 1) + 8 * (x & ~1) + (x & 1) + 8] = (color & 0x0000FF00) >> 8;
2753 // target[width2 * 4 * (y & ~1) + 2 * (y & 1) + 8 * (x & ~1) + (x & 1) + 12] = (color & 0xFF000000) >> 24;
2754 // }
2755 // }
2756
2757 unsigned char colorQ[16];
2758
2759 colorQ[0] = (color & 0x000000FF) >> 0;
2760 colorQ[1] = (color & 0x000000FF) >> 0;
2761 colorQ[2] = (color & 0x000000FF) >> 0;
2762 colorQ[3] = (color & 0x000000FF) >> 0;
2763 colorQ[4] = (color & 0x00FF0000) >> 16;
2764 colorQ[5] = (color & 0x00FF0000) >> 16;
2765 colorQ[6] = (color & 0x00FF0000) >> 16;
2766 colorQ[7] = (color & 0x00FF0000) >> 16;
2767 colorQ[8] = (color & 0x0000FF00) >> 8;
2768 colorQ[9] = (color & 0x0000FF00) >> 8;
2769 colorQ[10] = (color & 0x0000FF00) >> 8;
2770 colorQ[11] = (color & 0x0000FF00) >> 8;
2771 colorQ[12] = (color & 0xFF000000) >> 24;
2772 colorQ[13] = (color & 0xFF000000) >> 24;
2773 colorQ[14] = (color & 0xFF000000) >> 24;
2774 colorQ[15] = (color & 0xFF000000) >> 24;
2775
2776 for(int y = y0; y < y1; y++)
2777 {
2778 unsigned char *target = (unsigned char*)lockInternal(0, 0, 0, lock) + width2 * 4 * (y & ~1) + 2 * (y & 1); // FIXME: Unlock
2779
2780 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once
2781 {
2782 if((x0 & 1) != 0)
2783 {
2784 target[8 * (x0 & ~1) + 1 + 0] = (color & 0x000000FF) >> 0;
2785 target[8 * (x0 & ~1) + 1 + 4] = (color & 0x00FF0000) >> 16;
2786 target[8 * (x0 & ~1) + 1 + 8] = (color & 0x0000FF00) >> 8;
2787 target[8 * (x0 & ~1) + 1 + 12] = (color & 0xFF000000) >> 24;
2788
2789 target[8 * (x0 & ~1) + 3 + 0] = (color & 0x000000FF) >> 0;
2790 target[8 * (x0 & ~1) + 3 + 4] = (color & 0x00FF0000) >> 16;
2791 target[8 * (x0 & ~1) + 3 + 8] = (color & 0x0000FF00) >> 8;
2792 target[8 * (x0 & ~1) + 3 + 12] = (color & 0xFF000000) >> 24;
2793 }
2794
2795 __asm
2796 {
2797 movq mm0, colorQ+0
2798 movq mm1, colorQ+8
2799
2800 mov eax, x0
2801 add eax, 1
2802 and eax, 0xFFFFFFFE
2803 cmp eax, x1
2804 jge qEnd
2805
2806 mov edi, target
2807
2808 qLoop:
2809 movntq [edi+8*eax+0], mm0
2810 movntq [edi+8*eax+8], mm1
2811
2812 add eax, 2
2813 cmp eax, x1
2814 jl qLoop
2815 qEnd:
2816 emms
2817 }
2818
2819 if((x1 & 1) != 0)
2820 {
2821 target[8 * (x1 & ~1) + 0 + 0] = (color & 0x000000FF) >> 0;
2822 target[8 * (x1 & ~1) + 0 + 4] = (color & 0x00FF0000) >> 16;
2823 target[8 * (x1 & ~1) + 0 + 8] = (color & 0x0000FF00) >> 8;
2824 target[8 * (x1 & ~1) + 0 + 12] = (color & 0xFF000000) >> 24;
2825
2826 target[8 * (x1 & ~1) + 2 + 0] = (color & 0x000000FF) >> 0;
2827 target[8 * (x1 & ~1) + 2 + 4] = (color & 0x00FF0000) >> 16;
2828 target[8 * (x1 & ~1) + 2 + 8] = (color & 0x0000FF00) >> 8;
2829 target[8 * (x1 & ~1) + 2 + 12] = (color & 0xFF000000) >> 24;
2830 }
2831
2832 y++;
2833 }
2834 else
2835 {
2836 for(int x = x0; x < x1; x++)
2837 {
2838 target[8 * (x & ~1) + (x & 1) + 0] = (color & 0x000000FF) >> 0;
2839 target[8 * (x & ~1) + (x & 1) + 4] = (color & 0x00FF0000) >> 16;
2840 target[8 * (x & ~1) + (x & 1) + 8] = (color & 0x0000FF00) >> 8;
2841 target[8 * (x & ~1) + (x & 1) + 12] = (color & 0xFF000000) >> 24;
2842 }
2843 }
2844 }
2845 }*/
2846 }
2847
2848 void Surface::clearDepthBuffer(float depth, int x0, int y0, int width, int height)
2849 {
2850 // Not overlapping
2851 if(x0 > internal.width) return;
2852 if(y0 > internal.height) return;
2853 if(x0 + width < 0) return;
2854 if(y0 + height < 0) return;
2855
2856 // Clip against dimensions
2857 if(x0 < 0) {width += x0; x0 = 0;}
2858 if(x0 + width > internal.width) width = internal.width - x0;
2859 if(y0 < 0) {height += y0; y0 = 0;}
2860 if(y0 + height > internal.height) height = internal.height - y0;
2861
2862 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height;
2863 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY;
2864
2865 int width2 = (internal.width + 1) & ~1;
2866
2867 int x1 = x0 + width;
2868 int y1 = y0 + height;
2869
2870 if(internal.format == FORMAT_D32F_LOCKABLE ||
John Bauman66b8ab22014-05-06 15:57:45 -04002871 internal.format == FORMAT_D32FS8_TEXTURE ||
2872 internal.format == FORMAT_D32FS8_SHADOW)
John Bauman89401822014-05-06 15:04:28 -04002873 {
2874 float *target = (float*)lockInternal(0, 0, 0, lock, PUBLIC) + x0 + width2 * y0;
2875
2876 for(int z = 0; z < internal.depth; z++)
2877 {
2878 for(int y = y0; y < y1; y++)
2879 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04002880 memfill4(target, (int&)depth, 4 * width);
John Bauman89401822014-05-06 15:04:28 -04002881 target += width2;
2882 }
2883 }
2884
2885 unlockInternal();
2886 }
2887 else // Quad layout
2888 {
2889 if(complementaryDepthBuffer)
2890 {
2891 depth = 1 - depth;
2892 }
2893
2894 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC);
2895
2896 for(int z = 0; z < internal.depth; z++)
2897 {
2898 for(int y = y0; y < y1; y++)
2899 {
2900 float *target = buffer + (y & ~1) * width2 + (y & 1) * 2;
2901
2902 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once
2903 {
2904 if((x0 & 1) != 0)
2905 {
2906 target[(x0 & ~1) * 2 + 1] = depth;
2907 target[(x0 & ~1) * 2 + 3] = depth;
2908 }
2909
2910 // for(int x2 = ((x0 + 1) & ~1) * 2; x2 < x1 * 2; x2 += 4)
2911 // {
2912 // target[x2 + 0] = depth;
2913 // target[x2 + 1] = depth;
2914 // target[x2 + 2] = depth;
2915 // target[x2 + 3] = depth;
2916 // }
2917
2918 // __asm
2919 // {
2920 // movss xmm0, depth
2921 // shufps xmm0, xmm0, 0x00
2922 //
2923 // mov eax, x0
2924 // add eax, 1
2925 // and eax, 0xFFFFFFFE
2926 // cmp eax, x1
2927 // jge qEnd
2928 //
2929 // mov edi, target
2930 //
2931 // qLoop:
2932 // movntps [edi+8*eax], xmm0
2933 //
2934 // add eax, 2
2935 // cmp eax, x1
2936 // jl qLoop
2937 // qEnd:
2938 // }
2939
Nicolas Capens5ba566b2015-05-25 17:11:04 -04002940 memfill4(&target[((x0 + 1) & ~1) * 2], (int&)depth, 8 * ((x1 & ~1) - ((x0 + 1) & ~1)));
John Bauman89401822014-05-06 15:04:28 -04002941
2942 if((x1 & 1) != 0)
2943 {
2944 target[(x1 & ~1) * 2 + 0] = depth;
2945 target[(x1 & ~1) * 2 + 2] = depth;
2946 }
2947
2948 y++;
2949 }
2950 else
2951 {
2952 for(int x = x0; x < x1; x++)
2953 {
2954 target[(x & ~1) * 2 + (x & 1)] = depth;
2955 }
2956 }
2957 }
2958
2959 buffer += internal.sliceP;
2960 }
2961
2962 unlockInternal();
2963 }
2964 }
2965
2966 void Surface::clearStencilBuffer(unsigned char s, unsigned char mask, int x0, int y0, int width, int height)
2967 {
2968 // Not overlapping
2969 if(x0 > internal.width) return;
2970 if(y0 > internal.height) return;
2971 if(x0 + width < 0) return;
2972 if(y0 + height < 0) return;
2973
2974 // Clip against dimensions
2975 if(x0 < 0) {width += x0; x0 = 0;}
2976 if(x0 + width > internal.width) width = internal.width - x0;
2977 if(y0 < 0) {height += y0; y0 = 0;}
2978 if(y0 + height > internal.height) height = internal.height - y0;
2979
2980 int width2 = (internal.width + 1) & ~1;
2981
2982 int x1 = x0 + width;
2983 int y1 = y0 + height;
2984
2985 unsigned char maskedS = s & mask;
2986 unsigned char invMask = ~mask;
2987 unsigned int fill = maskedS;
2988 fill = fill | (fill << 8) | (fill << 16) + (fill << 24);
2989
2990 if(false)
2991 {
2992 char *target = (char*)lockStencil(0, PUBLIC) + x0 + width2 * y0;
2993
2994 for(int z = 0; z < stencil.depth; z++)
2995 {
2996 for(int y = y0; y < y0 + height; y++)
2997 {
2998 if(mask == 0xFF)
2999 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003000 memfill4(target, fill, width);
John Bauman89401822014-05-06 15:04:28 -04003001 }
3002 else
3003 {
3004 for(int x = 0; x < width; x++)
3005 {
3006 target[x] = maskedS | (target[x] & invMask);
3007 }
3008 }
3009
3010 target += width2;
3011 }
3012 }
3013
3014 unlockStencil();
3015 }
3016 else // Quad layout
3017 {
3018 char *buffer = (char*)lockStencil(0, PUBLIC);
3019
3020 if(mask == 0xFF)
3021 {
3022 for(int z = 0; z < stencil.depth; z++)
3023 {
3024 for(int y = y0; y < y1; y++)
3025 {
3026 char *target = buffer + (y & ~1) * width2 + (y & 1) * 2;
3027
3028 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once
3029 {
3030 if((x0 & 1) != 0)
3031 {
3032 target[(x0 & ~1) * 2 + 1] = fill;
3033 target[(x0 & ~1) * 2 + 3] = fill;
3034 }
3035
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003036 memfill4(&target[((x0 + 1) & ~1) * 2], fill, ((x1 + 1) & ~1) * 2 - ((x0 + 1) & ~1) * 2);
John Bauman89401822014-05-06 15:04:28 -04003037
3038 if((x1 & 1) != 0)
3039 {
3040 target[(x1 & ~1) * 2 + 0] = fill;
3041 target[(x1 & ~1) * 2 + 2] = fill;
3042 }
3043
3044 y++;
3045 }
3046 else
3047 {
3048 for(int x = x0; x < x1; x++)
3049 {
3050 target[(x & ~1) * 2 + (x & 1)] = maskedS | (target[x] & invMask);
3051 }
3052 }
3053 }
3054
3055 buffer += stencil.sliceP;
3056 }
3057 }
3058
3059 unlockStencil();
3060 }
3061 }
3062
3063 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height)
3064 {
3065 unsigned char *row;
3066 Buffer *buffer;
3067
3068 if(internal.dirty)
3069 {
3070 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3071 buffer = &internal;
3072 }
3073 else
3074 {
3075 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3076 buffer = &external;
3077 }
3078
3079 if(buffer->bytes <= 4)
3080 {
3081 int c;
3082 buffer->write(&c, color);
3083
3084 if(buffer->bytes <= 1) c = (c << 8) | c;
3085 if(buffer->bytes <= 2) c = (c << 16) | c;
3086
3087 for(int y = 0; y < height; y++)
3088 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003089 memfill4(row, c, width * buffer->bytes);
John Bauman89401822014-05-06 15:04:28 -04003090
3091 row += buffer->pitchB;
3092 }
3093 }
3094 else // Generic
3095 {
3096 for(int y = 0; y < height; y++)
3097 {
3098 unsigned char *element = row;
3099
3100 for(int x = 0; x < width; x++)
3101 {
3102 buffer->write(element, color);
3103
3104 element += buffer->bytes;
3105 }
3106
3107 row += buffer->pitchB;
3108 }
3109 }
3110
3111 if(buffer == &internal)
3112 {
3113 unlockInternal();
3114 }
3115 else
3116 {
3117 unlockExternal();
3118 }
3119 }
3120
3121 Color<float> Surface::readExternal(int x, int y, int z) const
3122 {
3123 ASSERT(external.lock != LOCK_UNLOCKED);
3124
3125 return external.read(x, y, z);
3126 }
3127
3128 Color<float> Surface::readExternal(int x, int y) const
3129 {
3130 ASSERT(external.lock != LOCK_UNLOCKED);
3131
3132 return external.read(x, y);
3133 }
3134
3135 Color<float> Surface::sampleExternal(float x, float y, float z) const
3136 {
3137 ASSERT(external.lock != LOCK_UNLOCKED);
3138
3139 return external.sample(x, y, z);
3140 }
3141
3142 Color<float> Surface::sampleExternal(float x, float y) const
3143 {
3144 ASSERT(external.lock != LOCK_UNLOCKED);
3145
3146 return external.sample(x, y);
3147 }
3148
3149 void Surface::writeExternal(int x, int y, int z, const Color<float> &color)
3150 {
3151 ASSERT(external.lock != LOCK_UNLOCKED);
3152
3153 external.write(x, y, z, color);
3154 }
3155
3156 void Surface::writeExternal(int x, int y, const Color<float> &color)
3157 {
3158 ASSERT(external.lock != LOCK_UNLOCKED);
3159
3160 external.write(x, y, color);
3161 }
3162
3163 Color<float> Surface::readInternal(int x, int y, int z) const
3164 {
3165 ASSERT(internal.lock != LOCK_UNLOCKED);
3166
3167 return internal.read(x, y, z);
3168 }
3169
3170 Color<float> Surface::readInternal(int x, int y) const
3171 {
3172 ASSERT(internal.lock != LOCK_UNLOCKED);
3173
3174 return internal.read(x, y);
3175 }
3176
3177 Color<float> Surface::sampleInternal(float x, float y, float z) const
3178 {
3179 ASSERT(internal.lock != LOCK_UNLOCKED);
3180
3181 return internal.sample(x, y, z);
3182 }
3183
3184 Color<float> Surface::sampleInternal(float x, float y) const
3185 {
3186 ASSERT(internal.lock != LOCK_UNLOCKED);
3187
3188 return internal.sample(x, y);
3189 }
3190
3191 void Surface::writeInternal(int x, int y, int z, const Color<float> &color)
3192 {
3193 ASSERT(internal.lock != LOCK_UNLOCKED);
3194
3195 internal.write(x, y, z, color);
3196 }
3197
3198 void Surface::writeInternal(int x, int y, const Color<float> &color)
3199 {
3200 ASSERT(internal.lock != LOCK_UNLOCKED);
3201
3202 internal.write(x, y, color);
3203 }
3204
3205 bool Surface::hasStencil() const
3206 {
3207 return isStencil(external.format);
3208 }
3209
3210 bool Surface::hasDepth() const
3211 {
3212 return isDepth(external.format);
3213 }
3214
3215 bool Surface::hasPalette() const
3216 {
3217 return isPalette(external.format);
3218 }
3219
3220 bool Surface::isRenderTarget() const
3221 {
3222 return renderTarget;
3223 }
3224
3225 bool Surface::hasDirtyMipmaps() const
3226 {
3227 return dirtyMipmaps;
3228 }
3229
3230 void Surface::cleanMipmaps()
3231 {
3232 dirtyMipmaps = false;
3233 }
3234
3235 Resource *Surface::getResource()
3236 {
3237 return resource;
3238 }
3239
3240 bool Surface::identicalFormats() const
3241 {
John Bauman66b8ab22014-05-06 15:57:45 -04003242 return external.format == internal.format &&
3243 external.width == internal.width &&
Nicolas Capens22658242014-11-29 00:31:41 -05003244 external.height == internal.height &&
3245 external.depth == internal.depth &&
3246 external.pitchB == internal.pitchB &&
3247 external.sliceB == internal.sliceB;
John Bauman89401822014-05-06 15:04:28 -04003248 }
3249
3250 Format Surface::selectInternalFormat(Format format) const
3251 {
3252 switch(format)
3253 {
3254 case FORMAT_NULL:
3255 return FORMAT_NULL;
3256 case FORMAT_P8:
3257 case FORMAT_A8P8:
3258 case FORMAT_A4R4G4B4:
3259 case FORMAT_A1R5G5B5:
3260 case FORMAT_A8R3G3B2:
3261 return FORMAT_A8R8G8B8;
3262 case FORMAT_A8:
3263 return FORMAT_A8;
3264 case FORMAT_R8:
3265 return FORMAT_R8;
3266 case FORMAT_A2R10G10B10:
3267 case FORMAT_A2B10G10R10:
3268 case FORMAT_A16B16G16R16:
3269 return FORMAT_A16B16G16R16;
3270 case FORMAT_G8R8:
3271 return FORMAT_G8R8;
3272 case FORMAT_G16R16:
3273 return FORMAT_G16R16;
3274 case FORMAT_A8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003275 if(lockable || !quadLayoutEnabled)
3276 {
3277 return FORMAT_A8R8G8B8;
3278 }
3279 else
3280 {
3281 return FORMAT_A8G8R8B8Q;
3282 }
Nicolas Capens80594422015-06-09 16:42:56 -04003283 case FORMAT_R5G5B5A1:
3284 case FORMAT_R4G4B4A4:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003285 case FORMAT_A8B8G8R8:
3286 return FORMAT_A8B8G8R8;
John Bauman89401822014-05-06 15:04:28 -04003287 case FORMAT_R3G3B2:
3288 case FORMAT_R5G6B5:
3289 case FORMAT_R8G8B8:
3290 case FORMAT_X4R4G4B4:
3291 case FORMAT_X1R5G5B5:
3292 case FORMAT_X8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003293 if(lockable || !quadLayoutEnabled)
3294 {
3295 return FORMAT_X8R8G8B8;
3296 }
3297 else
3298 {
3299 return FORMAT_X8G8R8B8Q;
3300 }
Nicolas Capens80594422015-06-09 16:42:56 -04003301 case FORMAT_B8G8R8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003302 case FORMAT_X8B8G8R8:
3303 return FORMAT_X8B8G8R8;
John Bauman89401822014-05-06 15:04:28 -04003304 // Compressed formats
3305 #if S3TC_SUPPORT
3306 case FORMAT_DXT1:
3307 case FORMAT_DXT3:
3308 case FORMAT_DXT5:
3309 return FORMAT_A8R8G8B8;
John Bauman66b8ab22014-05-06 15:57:45 -04003310 #endif
John Bauman89401822014-05-06 15:04:28 -04003311 case FORMAT_ATI1:
3312 return FORMAT_R8;
3313 case FORMAT_ATI2:
3314 return FORMAT_G8R8;
Nicolas Capens22658242014-11-29 00:31:41 -05003315 case FORMAT_ETC1:
3316 return FORMAT_X8R8G8B8;
John Bauman89401822014-05-06 15:04:28 -04003317 // Bumpmap formats
3318 case FORMAT_V8U8: return FORMAT_V8U8;
3319 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8;
3320 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8;
3321 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8;
3322 case FORMAT_V16U16: return FORMAT_V16U16;
3323 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16;
3324 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16;
3325 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04003326 case FORMAT_A16F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003327 case FORMAT_R16F: return FORMAT_R32F;
3328 case FORMAT_G16R16F: return FORMAT_G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003329 case FORMAT_B16G16R16F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003330 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003331 case FORMAT_A32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003332 case FORMAT_R32F: return FORMAT_R32F;
3333 case FORMAT_G32R32F: return FORMAT_G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003334 case FORMAT_B32G32R32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003335 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F;
3336 // Luminance formats
3337 case FORMAT_L8: return FORMAT_L8;
3338 case FORMAT_A4L4: return FORMAT_A8L8;
3339 case FORMAT_L16: return FORMAT_L16;
3340 case FORMAT_A8L8: return FORMAT_A8L8;
Nicolas Capens80594422015-06-09 16:42:56 -04003341 case FORMAT_L16F: return FORMAT_A32B32G32R32F;
3342 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F;
3343 case FORMAT_L32F: return FORMAT_A32B32G32R32F;
3344 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003345 // Depth/stencil formats
3346 case FORMAT_D16:
3347 case FORMAT_D32:
3348 case FORMAT_D24X8:
3349 case FORMAT_D24S8:
3350 case FORMAT_D24FS8:
3351 if(hasParent) // Texture
3352 {
John Bauman66b8ab22014-05-06 15:57:45 -04003353 return FORMAT_D32FS8_SHADOW;
John Bauman89401822014-05-06 15:04:28 -04003354 }
3355 else if(complementaryDepthBuffer)
3356 {
3357 return FORMAT_D32F_COMPLEMENTARY;
3358 }
3359 else
3360 {
3361 return FORMAT_D32F;
3362 }
John Bauman66b8ab22014-05-06 15:57:45 -04003363 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE;
3364 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE;
3365 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE;
3366 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW;
3367 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003368 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601;
3369 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709;
3370 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF;
John Bauman89401822014-05-06 15:04:28 -04003371 default:
3372 ASSERT(false);
3373 }
3374
3375 return FORMAT_NULL;
3376 }
3377
3378 void Surface::setTexturePalette(unsigned int *palette)
3379 {
3380 Surface::palette = palette;
3381 Surface::paletteID++;
3382 }
3383
3384 void Surface::resolve()
3385 {
3386 if(internal.depth <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL)
3387 {
3388 return;
3389 }
3390
3391 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE);
3392
3393 int quality = internal.depth;
3394 int width = internal.width;
3395 int height = internal.height;
3396 int pitch = internal.pitchB;
3397 int slice = internal.sliceB;
3398
3399 unsigned char *source0 = (unsigned char*)source;
3400 unsigned char *source1 = source0 + slice;
3401 unsigned char *source2 = source1 + slice;
3402 unsigned char *source3 = source2 + slice;
3403 unsigned char *source4 = source3 + slice;
3404 unsigned char *source5 = source4 + slice;
3405 unsigned char *source6 = source5 + slice;
3406 unsigned char *source7 = source6 + slice;
3407 unsigned char *source8 = source7 + slice;
3408 unsigned char *source9 = source8 + slice;
3409 unsigned char *sourceA = source9 + slice;
3410 unsigned char *sourceB = sourceA + slice;
3411 unsigned char *sourceC = sourceB + slice;
3412 unsigned char *sourceD = sourceC + slice;
3413 unsigned char *sourceE = sourceD + slice;
3414 unsigned char *sourceF = sourceE + slice;
3415
Nicolas Capensef77ac12015-03-28 21:48:51 -04003416 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 || internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8)
John Bauman89401822014-05-06 15:04:28 -04003417 {
3418 if(CPUID::supportsSSE2() && (width % 4) == 0)
3419 {
3420 if(internal.depth == 2)
3421 {
3422 for(int y = 0; y < height; y++)
3423 {
3424 for(int x = 0; x < width; x += 4)
3425 {
3426 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3427 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3428
3429 c0 = _mm_avg_epu8(c0, c1);
3430
3431 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3432 }
3433
3434 source0 += pitch;
3435 source1 += pitch;
3436 }
3437 }
3438 else if(internal.depth == 4)
3439 {
3440 for(int y = 0; y < height; y++)
3441 {
3442 for(int x = 0; x < width; x += 4)
3443 {
3444 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3445 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3446 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3447 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3448
3449 c0 = _mm_avg_epu8(c0, c1);
3450 c2 = _mm_avg_epu8(c2, c3);
3451 c0 = _mm_avg_epu8(c0, c2);
3452
3453 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3454 }
3455
3456 source0 += pitch;
3457 source1 += pitch;
3458 source2 += pitch;
3459 source3 += pitch;
3460 }
3461 }
3462 else if(internal.depth == 8)
3463 {
3464 for(int y = 0; y < height; y++)
3465 {
3466 for(int x = 0; x < width; x += 4)
3467 {
3468 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3469 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3470 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3471 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3472 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
3473 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
3474 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
3475 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
3476
3477 c0 = _mm_avg_epu8(c0, c1);
3478 c2 = _mm_avg_epu8(c2, c3);
3479 c4 = _mm_avg_epu8(c4, c5);
3480 c6 = _mm_avg_epu8(c6, c7);
3481 c0 = _mm_avg_epu8(c0, c2);
3482 c4 = _mm_avg_epu8(c4, c6);
3483 c0 = _mm_avg_epu8(c0, c4);
3484
3485 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3486 }
3487
3488 source0 += pitch;
3489 source1 += pitch;
3490 source2 += pitch;
3491 source3 += pitch;
3492 source4 += pitch;
3493 source5 += pitch;
3494 source6 += pitch;
3495 source7 += pitch;
3496 }
3497 }
3498 else if(internal.depth == 16)
3499 {
3500 for(int y = 0; y < height; y++)
3501 {
3502 for(int x = 0; x < width; x += 4)
3503 {
3504 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3505 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3506 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3507 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3508 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
3509 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
3510 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
3511 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
3512 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
3513 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
3514 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
3515 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
3516 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
3517 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
3518 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
3519 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
3520
3521 c0 = _mm_avg_epu8(c0, c1);
3522 c2 = _mm_avg_epu8(c2, c3);
3523 c4 = _mm_avg_epu8(c4, c5);
3524 c6 = _mm_avg_epu8(c6, c7);
3525 c8 = _mm_avg_epu8(c8, c9);
3526 cA = _mm_avg_epu8(cA, cB);
3527 cC = _mm_avg_epu8(cC, cD);
3528 cE = _mm_avg_epu8(cE, cF);
3529 c0 = _mm_avg_epu8(c0, c2);
3530 c4 = _mm_avg_epu8(c4, c6);
3531 c8 = _mm_avg_epu8(c8, cA);
3532 cC = _mm_avg_epu8(cC, cE);
3533 c0 = _mm_avg_epu8(c0, c4);
3534 c8 = _mm_avg_epu8(c8, cC);
3535 c0 = _mm_avg_epu8(c0, c8);
3536
3537 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3538 }
3539
3540 source0 += pitch;
3541 source1 += pitch;
3542 source2 += pitch;
3543 source3 += pitch;
3544 source4 += pitch;
3545 source5 += pitch;
3546 source6 += pitch;
3547 source7 += pitch;
3548 source8 += pitch;
3549 source9 += pitch;
3550 sourceA += pitch;
3551 sourceB += pitch;
3552 sourceC += pitch;
3553 sourceD += pitch;
3554 sourceE += pitch;
3555 sourceF += pitch;
3556 }
3557 }
3558 else ASSERT(false);
3559 }
3560 else
3561 {
3562 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101))
3563
3564 if(internal.depth == 2)
3565 {
3566 for(int y = 0; y < height; y++)
3567 {
3568 for(int x = 0; x < width; x++)
3569 {
3570 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3571 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3572
3573 c0 = AVERAGE(c0, c1);
3574
3575 *(unsigned int*)(source0 + 4 * x) = c0;
3576 }
3577
3578 source0 += pitch;
3579 source1 += pitch;
3580 }
3581 }
3582 else if(internal.depth == 4)
3583 {
3584 for(int y = 0; y < height; y++)
3585 {
3586 for(int x = 0; x < width; x++)
3587 {
3588 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3589 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3590 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3591 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3592
3593 c0 = AVERAGE(c0, c1);
3594 c2 = AVERAGE(c2, c3);
3595 c0 = AVERAGE(c0, c2);
3596
3597 *(unsigned int*)(source0 + 4 * x) = c0;
3598 }
3599
3600 source0 += pitch;
3601 source1 += pitch;
3602 source2 += pitch;
3603 source3 += pitch;
3604 }
3605 }
3606 else if(internal.depth == 8)
3607 {
3608 for(int y = 0; y < height; y++)
3609 {
3610 for(int x = 0; x < width; x++)
3611 {
3612 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3613 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3614 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3615 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3616 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
3617 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
3618 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
3619 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
3620
3621 c0 = AVERAGE(c0, c1);
3622 c2 = AVERAGE(c2, c3);
3623 c4 = AVERAGE(c4, c5);
3624 c6 = AVERAGE(c6, c7);
3625 c0 = AVERAGE(c0, c2);
3626 c4 = AVERAGE(c4, c6);
3627 c0 = AVERAGE(c0, c4);
3628
3629 *(unsigned int*)(source0 + 4 * x) = c0;
3630 }
3631
3632 source0 += pitch;
3633 source1 += pitch;
3634 source2 += pitch;
3635 source3 += pitch;
3636 source4 += pitch;
3637 source5 += pitch;
3638 source6 += pitch;
3639 source7 += pitch;
3640 }
3641 }
3642 else if(internal.depth == 16)
3643 {
3644 for(int y = 0; y < height; y++)
3645 {
3646 for(int x = 0; x < width; x++)
3647 {
3648 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3649 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3650 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3651 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3652 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
3653 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
3654 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
3655 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
3656 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
3657 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
3658 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
3659 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
3660 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
3661 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
3662 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
3663 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
3664
3665 c0 = AVERAGE(c0, c1);
3666 c2 = AVERAGE(c2, c3);
3667 c4 = AVERAGE(c4, c5);
3668 c6 = AVERAGE(c6, c7);
3669 c8 = AVERAGE(c8, c9);
3670 cA = AVERAGE(cA, cB);
3671 cC = AVERAGE(cC, cD);
3672 cE = AVERAGE(cE, cF);
3673 c0 = AVERAGE(c0, c2);
3674 c4 = AVERAGE(c4, c6);
3675 c8 = AVERAGE(c8, cA);
3676 cC = AVERAGE(cC, cE);
3677 c0 = AVERAGE(c0, c4);
3678 c8 = AVERAGE(c8, cC);
3679 c0 = AVERAGE(c0, c8);
3680
3681 *(unsigned int*)(source0 + 4 * x) = c0;
3682 }
3683
3684 source0 += pitch;
3685 source1 += pitch;
3686 source2 += pitch;
3687 source3 += pitch;
3688 source4 += pitch;
3689 source5 += pitch;
3690 source6 += pitch;
3691 source7 += pitch;
3692 source8 += pitch;
3693 source9 += pitch;
3694 sourceA += pitch;
3695 sourceB += pitch;
3696 sourceC += pitch;
3697 sourceD += pitch;
3698 sourceE += pitch;
3699 sourceF += pitch;
3700 }
3701 }
3702 else ASSERT(false);
3703
3704 #undef AVERAGE
3705 }
3706 }
3707 else if(internal.format == FORMAT_G16R16)
3708 {
3709 if(CPUID::supportsSSE2() && (width % 4) == 0)
3710 {
3711 if(internal.depth == 2)
3712 {
3713 for(int y = 0; y < height; y++)
3714 {
3715 for(int x = 0; x < width; x += 4)
3716 {
3717 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3718 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3719
3720 c0 = _mm_avg_epu16(c0, c1);
3721
3722 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3723 }
3724
3725 source0 += pitch;
3726 source1 += pitch;
3727 }
3728 }
3729 else if(internal.depth == 4)
3730 {
3731 for(int y = 0; y < height; y++)
3732 {
3733 for(int x = 0; x < width; x += 4)
3734 {
3735 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3736 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3737 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3738 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3739
3740 c0 = _mm_avg_epu16(c0, c1);
3741 c2 = _mm_avg_epu16(c2, c3);
3742 c0 = _mm_avg_epu16(c0, c2);
3743
3744 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3745 }
3746
3747 source0 += pitch;
3748 source1 += pitch;
3749 source2 += pitch;
3750 source3 += pitch;
3751 }
3752 }
3753 else if(internal.depth == 8)
3754 {
3755 for(int y = 0; y < height; y++)
3756 {
3757 for(int x = 0; x < width; x += 4)
3758 {
3759 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3760 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3761 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3762 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3763 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
3764 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
3765 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
3766 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
3767
3768 c0 = _mm_avg_epu16(c0, c1);
3769 c2 = _mm_avg_epu16(c2, c3);
3770 c4 = _mm_avg_epu16(c4, c5);
3771 c6 = _mm_avg_epu16(c6, c7);
3772 c0 = _mm_avg_epu16(c0, c2);
3773 c4 = _mm_avg_epu16(c4, c6);
3774 c0 = _mm_avg_epu16(c0, c4);
3775
3776 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3777 }
3778
3779 source0 += pitch;
3780 source1 += pitch;
3781 source2 += pitch;
3782 source3 += pitch;
3783 source4 += pitch;
3784 source5 += pitch;
3785 source6 += pitch;
3786 source7 += pitch;
3787 }
3788 }
3789 else if(internal.depth == 16)
3790 {
3791 for(int y = 0; y < height; y++)
3792 {
3793 for(int x = 0; x < width; x += 4)
3794 {
3795 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3796 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3797 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3798 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3799 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
3800 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
3801 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
3802 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
3803 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
3804 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
3805 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
3806 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
3807 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
3808 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
3809 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
3810 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
3811
3812 c0 = _mm_avg_epu16(c0, c1);
3813 c2 = _mm_avg_epu16(c2, c3);
3814 c4 = _mm_avg_epu16(c4, c5);
3815 c6 = _mm_avg_epu16(c6, c7);
3816 c8 = _mm_avg_epu16(c8, c9);
3817 cA = _mm_avg_epu16(cA, cB);
3818 cC = _mm_avg_epu16(cC, cD);
3819 cE = _mm_avg_epu16(cE, cF);
3820 c0 = _mm_avg_epu16(c0, c2);
3821 c4 = _mm_avg_epu16(c4, c6);
3822 c8 = _mm_avg_epu16(c8, cA);
3823 cC = _mm_avg_epu16(cC, cE);
3824 c0 = _mm_avg_epu16(c0, c4);
3825 c8 = _mm_avg_epu16(c8, cC);
3826 c0 = _mm_avg_epu16(c0, c8);
3827
3828 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3829 }
3830
3831 source0 += pitch;
3832 source1 += pitch;
3833 source2 += pitch;
3834 source3 += pitch;
3835 source4 += pitch;
3836 source5 += pitch;
3837 source6 += pitch;
3838 source7 += pitch;
3839 source8 += pitch;
3840 source9 += pitch;
3841 sourceA += pitch;
3842 sourceB += pitch;
3843 sourceC += pitch;
3844 sourceD += pitch;
3845 sourceE += pitch;
3846 sourceF += pitch;
3847 }
3848 }
3849 else ASSERT(false);
3850 }
3851 else
3852 {
3853 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
3854
3855 if(internal.depth == 2)
3856 {
3857 for(int y = 0; y < height; y++)
3858 {
3859 for(int x = 0; x < width; x++)
3860 {
3861 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3862 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3863
3864 c0 = AVERAGE(c0, c1);
3865
3866 *(unsigned int*)(source0 + 4 * x) = c0;
3867 }
3868
3869 source0 += pitch;
3870 source1 += pitch;
3871 }
3872 }
3873 else if(internal.depth == 4)
3874 {
3875 for(int y = 0; y < height; y++)
3876 {
3877 for(int x = 0; x < width; x++)
3878 {
3879 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3880 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3881 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3882 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3883
3884 c0 = AVERAGE(c0, c1);
3885 c2 = AVERAGE(c2, c3);
3886 c0 = AVERAGE(c0, c2);
3887
3888 *(unsigned int*)(source0 + 4 * x) = c0;
3889 }
3890
3891 source0 += pitch;
3892 source1 += pitch;
3893 source2 += pitch;
3894 source3 += pitch;
3895 }
3896 }
3897 else if(internal.depth == 8)
3898 {
3899 for(int y = 0; y < height; y++)
3900 {
3901 for(int x = 0; x < width; x++)
3902 {
3903 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3904 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3905 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3906 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3907 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
3908 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
3909 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
3910 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
3911
3912 c0 = AVERAGE(c0, c1);
3913 c2 = AVERAGE(c2, c3);
3914 c4 = AVERAGE(c4, c5);
3915 c6 = AVERAGE(c6, c7);
3916 c0 = AVERAGE(c0, c2);
3917 c4 = AVERAGE(c4, c6);
3918 c0 = AVERAGE(c0, c4);
3919
3920 *(unsigned int*)(source0 + 4 * x) = c0;
3921 }
3922
3923 source0 += pitch;
3924 source1 += pitch;
3925 source2 += pitch;
3926 source3 += pitch;
3927 source4 += pitch;
3928 source5 += pitch;
3929 source6 += pitch;
3930 source7 += pitch;
3931 }
3932 }
3933 else if(internal.depth == 16)
3934 {
3935 for(int y = 0; y < height; y++)
3936 {
3937 for(int x = 0; x < width; x++)
3938 {
3939 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3940 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3941 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3942 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3943 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
3944 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
3945 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
3946 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
3947 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
3948 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
3949 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
3950 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
3951 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
3952 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
3953 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
3954 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
3955
3956 c0 = AVERAGE(c0, c1);
3957 c2 = AVERAGE(c2, c3);
3958 c4 = AVERAGE(c4, c5);
3959 c6 = AVERAGE(c6, c7);
3960 c8 = AVERAGE(c8, c9);
3961 cA = AVERAGE(cA, cB);
3962 cC = AVERAGE(cC, cD);
3963 cE = AVERAGE(cE, cF);
3964 c0 = AVERAGE(c0, c2);
3965 c4 = AVERAGE(c4, c6);
3966 c8 = AVERAGE(c8, cA);
3967 cC = AVERAGE(cC, cE);
3968 c0 = AVERAGE(c0, c4);
3969 c8 = AVERAGE(c8, cC);
3970 c0 = AVERAGE(c0, c8);
3971
3972 *(unsigned int*)(source0 + 4 * x) = c0;
3973 }
3974
3975 source0 += pitch;
3976 source1 += pitch;
3977 source2 += pitch;
3978 source3 += pitch;
3979 source4 += pitch;
3980 source5 += pitch;
3981 source6 += pitch;
3982 source7 += pitch;
3983 source8 += pitch;
3984 source9 += pitch;
3985 sourceA += pitch;
3986 sourceB += pitch;
3987 sourceC += pitch;
3988 sourceD += pitch;
3989 sourceE += pitch;
3990 sourceF += pitch;
3991 }
3992 }
3993 else ASSERT(false);
3994
3995 #undef AVERAGE
3996 }
3997 }
3998 else if(internal.format == FORMAT_A16B16G16R16)
3999 {
4000 if(CPUID::supportsSSE2() && (width % 2) == 0)
4001 {
4002 if(internal.depth == 2)
4003 {
4004 for(int y = 0; y < height; y++)
4005 {
4006 for(int x = 0; x < width; x += 2)
4007 {
4008 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4009 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4010
4011 c0 = _mm_avg_epu16(c0, c1);
4012
4013 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4014 }
4015
4016 source0 += pitch;
4017 source1 += pitch;
4018 }
4019 }
4020 else if(internal.depth == 4)
4021 {
4022 for(int y = 0; y < height; y++)
4023 {
4024 for(int x = 0; x < width; x += 2)
4025 {
4026 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4027 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4028 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4029 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4030
4031 c0 = _mm_avg_epu16(c0, c1);
4032 c2 = _mm_avg_epu16(c2, c3);
4033 c0 = _mm_avg_epu16(c0, c2);
4034
4035 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4036 }
4037
4038 source0 += pitch;
4039 source1 += pitch;
4040 source2 += pitch;
4041 source3 += pitch;
4042 }
4043 }
4044 else if(internal.depth == 8)
4045 {
4046 for(int y = 0; y < height; y++)
4047 {
4048 for(int x = 0; x < width; x += 2)
4049 {
4050 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4051 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4052 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4053 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4054 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4055 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4056 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4057 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
4058
4059 c0 = _mm_avg_epu16(c0, c1);
4060 c2 = _mm_avg_epu16(c2, c3);
4061 c4 = _mm_avg_epu16(c4, c5);
4062 c6 = _mm_avg_epu16(c6, c7);
4063 c0 = _mm_avg_epu16(c0, c2);
4064 c4 = _mm_avg_epu16(c4, c6);
4065 c0 = _mm_avg_epu16(c0, c4);
4066
4067 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4068 }
4069
4070 source0 += pitch;
4071 source1 += pitch;
4072 source2 += pitch;
4073 source3 += pitch;
4074 source4 += pitch;
4075 source5 += pitch;
4076 source6 += pitch;
4077 source7 += pitch;
4078 }
4079 }
4080 else if(internal.depth == 16)
4081 {
4082 for(int y = 0; y < height; y++)
4083 {
4084 for(int x = 0; x < width; x += 2)
4085 {
4086 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4087 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4088 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4089 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4090 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4091 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4092 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4093 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
4094 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x));
4095 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x));
4096 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x));
4097 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x));
4098 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x));
4099 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x));
4100 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x));
4101 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x));
4102
4103 c0 = _mm_avg_epu16(c0, c1);
4104 c2 = _mm_avg_epu16(c2, c3);
4105 c4 = _mm_avg_epu16(c4, c5);
4106 c6 = _mm_avg_epu16(c6, c7);
4107 c8 = _mm_avg_epu16(c8, c9);
4108 cA = _mm_avg_epu16(cA, cB);
4109 cC = _mm_avg_epu16(cC, cD);
4110 cE = _mm_avg_epu16(cE, cF);
4111 c0 = _mm_avg_epu16(c0, c2);
4112 c4 = _mm_avg_epu16(c4, c6);
4113 c8 = _mm_avg_epu16(c8, cA);
4114 cC = _mm_avg_epu16(cC, cE);
4115 c0 = _mm_avg_epu16(c0, c4);
4116 c8 = _mm_avg_epu16(c8, cC);
4117 c0 = _mm_avg_epu16(c0, c8);
4118
4119 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4120 }
4121
4122 source0 += pitch;
4123 source1 += pitch;
4124 source2 += pitch;
4125 source3 += pitch;
4126 source4 += pitch;
4127 source5 += pitch;
4128 source6 += pitch;
4129 source7 += pitch;
4130 source8 += pitch;
4131 source9 += pitch;
4132 sourceA += pitch;
4133 sourceB += pitch;
4134 sourceC += pitch;
4135 sourceD += pitch;
4136 sourceE += pitch;
4137 sourceF += pitch;
4138 }
4139 }
4140 else ASSERT(false);
4141 }
4142 else
4143 {
4144 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4145
4146 if(internal.depth == 2)
4147 {
4148 for(int y = 0; y < height; y++)
4149 {
4150 for(int x = 0; x < 2 * width; x++)
4151 {
4152 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4153 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4154
4155 c0 = AVERAGE(c0, c1);
4156
4157 *(unsigned int*)(source0 + 4 * x) = c0;
4158 }
4159
4160 source0 += pitch;
4161 source1 += pitch;
4162 }
4163 }
4164 else if(internal.depth == 4)
4165 {
4166 for(int y = 0; y < height; y++)
4167 {
4168 for(int x = 0; x < 2 * width; x++)
4169 {
4170 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4171 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4172 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4173 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4174
4175 c0 = AVERAGE(c0, c1);
4176 c2 = AVERAGE(c2, c3);
4177 c0 = AVERAGE(c0, c2);
4178
4179 *(unsigned int*)(source0 + 4 * x) = c0;
4180 }
4181
4182 source0 += pitch;
4183 source1 += pitch;
4184 source2 += pitch;
4185 source3 += pitch;
4186 }
4187 }
4188 else if(internal.depth == 8)
4189 {
4190 for(int y = 0; y < height; y++)
4191 {
4192 for(int x = 0; x < 2 * width; x++)
4193 {
4194 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4195 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4196 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4197 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4198 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4199 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4200 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4201 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4202
4203 c0 = AVERAGE(c0, c1);
4204 c2 = AVERAGE(c2, c3);
4205 c4 = AVERAGE(c4, c5);
4206 c6 = AVERAGE(c6, c7);
4207 c0 = AVERAGE(c0, c2);
4208 c4 = AVERAGE(c4, c6);
4209 c0 = AVERAGE(c0, c4);
4210
4211 *(unsigned int*)(source0 + 4 * x) = c0;
4212 }
4213
4214 source0 += pitch;
4215 source1 += pitch;
4216 source2 += pitch;
4217 source3 += pitch;
4218 source4 += pitch;
4219 source5 += pitch;
4220 source6 += pitch;
4221 source7 += pitch;
4222 }
4223 }
4224 else if(internal.depth == 16)
4225 {
4226 for(int y = 0; y < height; y++)
4227 {
4228 for(int x = 0; x < 2 * width; x++)
4229 {
4230 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4231 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4232 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4233 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4234 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4235 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4236 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4237 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4238 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4239 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4240 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4241 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4242 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4243 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4244 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4245 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4246
4247 c0 = AVERAGE(c0, c1);
4248 c2 = AVERAGE(c2, c3);
4249 c4 = AVERAGE(c4, c5);
4250 c6 = AVERAGE(c6, c7);
4251 c8 = AVERAGE(c8, c9);
4252 cA = AVERAGE(cA, cB);
4253 cC = AVERAGE(cC, cD);
4254 cE = AVERAGE(cE, cF);
4255 c0 = AVERAGE(c0, c2);
4256 c4 = AVERAGE(c4, c6);
4257 c8 = AVERAGE(c8, cA);
4258 cC = AVERAGE(cC, cE);
4259 c0 = AVERAGE(c0, c4);
4260 c8 = AVERAGE(c8, cC);
4261 c0 = AVERAGE(c0, c8);
4262
4263 *(unsigned int*)(source0 + 4 * x) = c0;
4264 }
4265
4266 source0 += pitch;
4267 source1 += pitch;
4268 source2 += pitch;
4269 source3 += pitch;
4270 source4 += pitch;
4271 source5 += pitch;
4272 source6 += pitch;
4273 source7 += pitch;
4274 source8 += pitch;
4275 source9 += pitch;
4276 sourceA += pitch;
4277 sourceB += pitch;
4278 sourceC += pitch;
4279 sourceD += pitch;
4280 sourceE += pitch;
4281 sourceF += pitch;
4282 }
4283 }
4284 else ASSERT(false);
4285
4286 #undef AVERAGE
4287 }
4288 }
4289 else if(internal.format == FORMAT_R32F)
4290 {
4291 if(CPUID::supportsSSE() && (width % 4) == 0)
4292 {
4293 if(internal.depth == 2)
4294 {
4295 for(int y = 0; y < height; y++)
4296 {
4297 for(int x = 0; x < width; x += 4)
4298 {
4299 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4300 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4301
4302 c0 = _mm_add_ps(c0, c1);
4303 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
4304
4305 _mm_store_ps((float*)(source0 + 4 * x), c0);
4306 }
4307
4308 source0 += pitch;
4309 source1 += pitch;
4310 }
4311 }
4312 else if(internal.depth == 4)
4313 {
4314 for(int y = 0; y < height; y++)
4315 {
4316 for(int x = 0; x < width; x += 4)
4317 {
4318 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4319 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4320 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4321 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4322
4323 c0 = _mm_add_ps(c0, c1);
4324 c2 = _mm_add_ps(c2, c3);
4325 c0 = _mm_add_ps(c0, c2);
4326 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
4327
4328 _mm_store_ps((float*)(source0 + 4 * x), c0);
4329 }
4330
4331 source0 += pitch;
4332 source1 += pitch;
4333 source2 += pitch;
4334 source3 += pitch;
4335 }
4336 }
4337 else if(internal.depth == 8)
4338 {
4339 for(int y = 0; y < height; y++)
4340 {
4341 for(int x = 0; x < width; x += 4)
4342 {
4343 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4344 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4345 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4346 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4347 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4348 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4349 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4350 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
4351
4352 c0 = _mm_add_ps(c0, c1);
4353 c2 = _mm_add_ps(c2, c3);
4354 c4 = _mm_add_ps(c4, c5);
4355 c6 = _mm_add_ps(c6, c7);
4356 c0 = _mm_add_ps(c0, c2);
4357 c4 = _mm_add_ps(c4, c6);
4358 c0 = _mm_add_ps(c0, c4);
4359 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
4360
4361 _mm_store_ps((float*)(source0 + 4 * x), c0);
4362 }
4363
4364 source0 += pitch;
4365 source1 += pitch;
4366 source2 += pitch;
4367 source3 += pitch;
4368 source4 += pitch;
4369 source5 += pitch;
4370 source6 += pitch;
4371 source7 += pitch;
4372 }
4373 }
4374 else if(internal.depth == 16)
4375 {
4376 for(int y = 0; y < height; y++)
4377 {
4378 for(int x = 0; x < width; x += 4)
4379 {
4380 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4381 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4382 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4383 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4384 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4385 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4386 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4387 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
4388 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x));
4389 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x));
4390 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x));
4391 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x));
4392 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x));
4393 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x));
4394 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x));
4395 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x));
4396
4397 c0 = _mm_add_ps(c0, c1);
4398 c2 = _mm_add_ps(c2, c3);
4399 c4 = _mm_add_ps(c4, c5);
4400 c6 = _mm_add_ps(c6, c7);
4401 c8 = _mm_add_ps(c8, c9);
4402 cA = _mm_add_ps(cA, cB);
4403 cC = _mm_add_ps(cC, cD);
4404 cE = _mm_add_ps(cE, cF);
4405 c0 = _mm_add_ps(c0, c2);
4406 c4 = _mm_add_ps(c4, c6);
4407 c8 = _mm_add_ps(c8, cA);
4408 cC = _mm_add_ps(cC, cE);
4409 c0 = _mm_add_ps(c0, c4);
4410 c8 = _mm_add_ps(c8, cC);
4411 c0 = _mm_add_ps(c0, c8);
4412 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
4413
4414 _mm_store_ps((float*)(source0 + 4 * x), c0);
4415 }
4416
4417 source0 += pitch;
4418 source1 += pitch;
4419 source2 += pitch;
4420 source3 += pitch;
4421 source4 += pitch;
4422 source5 += pitch;
4423 source6 += pitch;
4424 source7 += pitch;
4425 source8 += pitch;
4426 source9 += pitch;
4427 sourceA += pitch;
4428 sourceB += pitch;
4429 sourceC += pitch;
4430 sourceD += pitch;
4431 sourceE += pitch;
4432 sourceF += pitch;
4433 }
4434 }
4435 else ASSERT(false);
4436 }
4437 else
4438 {
4439 if(internal.depth == 2)
4440 {
4441 for(int y = 0; y < height; y++)
4442 {
4443 for(int x = 0; x < width; x++)
4444 {
4445 float c0 = *(float*)(source0 + 4 * x);
4446 float c1 = *(float*)(source1 + 4 * x);
4447
4448 c0 = c0 + c1;
4449 c0 *= 1.0f / 2.0f;
4450
4451 *(float*)(source0 + 4 * x) = c0;
4452 }
4453
4454 source0 += pitch;
4455 source1 += pitch;
4456 }
4457 }
4458 else if(internal.depth == 4)
4459 {
4460 for(int y = 0; y < height; y++)
4461 {
4462 for(int x = 0; x < width; x++)
4463 {
4464 float c0 = *(float*)(source0 + 4 * x);
4465 float c1 = *(float*)(source1 + 4 * x);
4466 float c2 = *(float*)(source2 + 4 * x);
4467 float c3 = *(float*)(source3 + 4 * x);
4468
4469 c0 = c0 + c1;
4470 c2 = c2 + c3;
4471 c0 = c0 + c2;
4472 c0 *= 1.0f / 4.0f;
4473
4474 *(float*)(source0 + 4 * x) = c0;
4475 }
4476
4477 source0 += pitch;
4478 source1 += pitch;
4479 source2 += pitch;
4480 source3 += pitch;
4481 }
4482 }
4483 else if(internal.depth == 8)
4484 {
4485 for(int y = 0; y < height; y++)
4486 {
4487 for(int x = 0; x < width; x++)
4488 {
4489 float c0 = *(float*)(source0 + 4 * x);
4490 float c1 = *(float*)(source1 + 4 * x);
4491 float c2 = *(float*)(source2 + 4 * x);
4492 float c3 = *(float*)(source3 + 4 * x);
4493 float c4 = *(float*)(source4 + 4 * x);
4494 float c5 = *(float*)(source5 + 4 * x);
4495 float c6 = *(float*)(source6 + 4 * x);
4496 float c7 = *(float*)(source7 + 4 * x);
4497
4498 c0 = c0 + c1;
4499 c2 = c2 + c3;
4500 c4 = c4 + c5;
4501 c6 = c6 + c7;
4502 c0 = c0 + c2;
4503 c4 = c4 + c6;
4504 c0 = c0 + c4;
4505 c0 *= 1.0f / 8.0f;
4506
4507 *(float*)(source0 + 4 * x) = c0;
4508 }
4509
4510 source0 += pitch;
4511 source1 += pitch;
4512 source2 += pitch;
4513 source3 += pitch;
4514 source4 += pitch;
4515 source5 += pitch;
4516 source6 += pitch;
4517 source7 += pitch;
4518 }
4519 }
4520 else if(internal.depth == 16)
4521 {
4522 for(int y = 0; y < height; y++)
4523 {
4524 for(int x = 0; x < width; x++)
4525 {
4526 float c0 = *(float*)(source0 + 4 * x);
4527 float c1 = *(float*)(source1 + 4 * x);
4528 float c2 = *(float*)(source2 + 4 * x);
4529 float c3 = *(float*)(source3 + 4 * x);
4530 float c4 = *(float*)(source4 + 4 * x);
4531 float c5 = *(float*)(source5 + 4 * x);
4532 float c6 = *(float*)(source6 + 4 * x);
4533 float c7 = *(float*)(source7 + 4 * x);
4534 float c8 = *(float*)(source8 + 4 * x);
4535 float c9 = *(float*)(source9 + 4 * x);
4536 float cA = *(float*)(sourceA + 4 * x);
4537 float cB = *(float*)(sourceB + 4 * x);
4538 float cC = *(float*)(sourceC + 4 * x);
4539 float cD = *(float*)(sourceD + 4 * x);
4540 float cE = *(float*)(sourceE + 4 * x);
4541 float cF = *(float*)(sourceF + 4 * x);
4542
4543 c0 = c0 + c1;
4544 c2 = c2 + c3;
4545 c4 = c4 + c5;
4546 c6 = c6 + c7;
4547 c8 = c8 + c9;
4548 cA = cA + cB;
4549 cC = cC + cD;
4550 cE = cE + cF;
4551 c0 = c0 + c2;
4552 c4 = c4 + c6;
4553 c8 = c8 + cA;
4554 cC = cC + cE;
4555 c0 = c0 + c4;
4556 c8 = c8 + cC;
4557 c0 = c0 + c8;
4558 c0 *= 1.0f / 16.0f;
4559
4560 *(float*)(source0 + 4 * x) = c0;
4561 }
4562
4563 source0 += pitch;
4564 source1 += pitch;
4565 source2 += pitch;
4566 source3 += pitch;
4567 source4 += pitch;
4568 source5 += pitch;
4569 source6 += pitch;
4570 source7 += pitch;
4571 source8 += pitch;
4572 source9 += pitch;
4573 sourceA += pitch;
4574 sourceB += pitch;
4575 sourceC += pitch;
4576 sourceD += pitch;
4577 sourceE += pitch;
4578 sourceF += pitch;
4579 }
4580 }
4581 else ASSERT(false);
4582 }
4583 }
4584 else if(internal.format == FORMAT_G32R32F)
4585 {
4586 if(CPUID::supportsSSE() && (width % 2) == 0)
4587 {
4588 if(internal.depth == 2)
4589 {
4590 for(int y = 0; y < height; y++)
4591 {
4592 for(int x = 0; x < width; x += 2)
4593 {
4594 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4595 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
4596
4597 c0 = _mm_add_ps(c0, c1);
4598 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
4599
4600 _mm_store_ps((float*)(source0 + 8 * x), c0);
4601 }
4602
4603 source0 += pitch;
4604 source1 += pitch;
4605 }
4606 }
4607 else if(internal.depth == 4)
4608 {
4609 for(int y = 0; y < height; y++)
4610 {
4611 for(int x = 0; x < width; x += 2)
4612 {
4613 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4614 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
4615 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
4616 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
4617
4618 c0 = _mm_add_ps(c0, c1);
4619 c2 = _mm_add_ps(c2, c3);
4620 c0 = _mm_add_ps(c0, c2);
4621 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
4622
4623 _mm_store_ps((float*)(source0 + 8 * x), c0);
4624 }
4625
4626 source0 += pitch;
4627 source1 += pitch;
4628 source2 += pitch;
4629 source3 += pitch;
4630 }
4631 }
4632 else if(internal.depth == 8)
4633 {
4634 for(int y = 0; y < height; y++)
4635 {
4636 for(int x = 0; x < width; x += 2)
4637 {
4638 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4639 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
4640 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
4641 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
4642 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
4643 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
4644 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
4645 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
4646
4647 c0 = _mm_add_ps(c0, c1);
4648 c2 = _mm_add_ps(c2, c3);
4649 c4 = _mm_add_ps(c4, c5);
4650 c6 = _mm_add_ps(c6, c7);
4651 c0 = _mm_add_ps(c0, c2);
4652 c4 = _mm_add_ps(c4, c6);
4653 c0 = _mm_add_ps(c0, c4);
4654 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
4655
4656 _mm_store_ps((float*)(source0 + 8 * x), c0);
4657 }
4658
4659 source0 += pitch;
4660 source1 += pitch;
4661 source2 += pitch;
4662 source3 += pitch;
4663 source4 += pitch;
4664 source5 += pitch;
4665 source6 += pitch;
4666 source7 += pitch;
4667 }
4668 }
4669 else if(internal.depth == 16)
4670 {
4671 for(int y = 0; y < height; y++)
4672 {
4673 for(int x = 0; x < width; x += 2)
4674 {
4675 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4676 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
4677 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
4678 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
4679 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
4680 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
4681 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
4682 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
4683 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x));
4684 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x));
4685 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x));
4686 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x));
4687 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x));
4688 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x));
4689 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x));
4690 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x));
4691
4692 c0 = _mm_add_ps(c0, c1);
4693 c2 = _mm_add_ps(c2, c3);
4694 c4 = _mm_add_ps(c4, c5);
4695 c6 = _mm_add_ps(c6, c7);
4696 c8 = _mm_add_ps(c8, c9);
4697 cA = _mm_add_ps(cA, cB);
4698 cC = _mm_add_ps(cC, cD);
4699 cE = _mm_add_ps(cE, cF);
4700 c0 = _mm_add_ps(c0, c2);
4701 c4 = _mm_add_ps(c4, c6);
4702 c8 = _mm_add_ps(c8, cA);
4703 cC = _mm_add_ps(cC, cE);
4704 c0 = _mm_add_ps(c0, c4);
4705 c8 = _mm_add_ps(c8, cC);
4706 c0 = _mm_add_ps(c0, c8);
4707 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
4708
4709 _mm_store_ps((float*)(source0 + 8 * x), c0);
4710 }
4711
4712 source0 += pitch;
4713 source1 += pitch;
4714 source2 += pitch;
4715 source3 += pitch;
4716 source4 += pitch;
4717 source5 += pitch;
4718 source6 += pitch;
4719 source7 += pitch;
4720 source8 += pitch;
4721 source9 += pitch;
4722 sourceA += pitch;
4723 sourceB += pitch;
4724 sourceC += pitch;
4725 sourceD += pitch;
4726 sourceE += pitch;
4727 sourceF += pitch;
4728 }
4729 }
4730 else ASSERT(false);
4731 }
4732 else
4733 {
4734 if(internal.depth == 2)
4735 {
4736 for(int y = 0; y < height; y++)
4737 {
4738 for(int x = 0; x < 2 * width; x++)
4739 {
4740 float c0 = *(float*)(source0 + 4 * x);
4741 float c1 = *(float*)(source1 + 4 * x);
4742
4743 c0 = c0 + c1;
4744 c0 *= 1.0f / 2.0f;
4745
4746 *(float*)(source0 + 4 * x) = c0;
4747 }
4748
4749 source0 += pitch;
4750 source1 += pitch;
4751 }
4752 }
4753 else if(internal.depth == 4)
4754 {
4755 for(int y = 0; y < height; y++)
4756 {
4757 for(int x = 0; x < 2 * width; x++)
4758 {
4759 float c0 = *(float*)(source0 + 4 * x);
4760 float c1 = *(float*)(source1 + 4 * x);
4761 float c2 = *(float*)(source2 + 4 * x);
4762 float c3 = *(float*)(source3 + 4 * x);
4763
4764 c0 = c0 + c1;
4765 c2 = c2 + c3;
4766 c0 = c0 + c2;
4767 c0 *= 1.0f / 4.0f;
4768
4769 *(float*)(source0 + 4 * x) = c0;
4770 }
4771
4772 source0 += pitch;
4773 source1 += pitch;
4774 source2 += pitch;
4775 source3 += pitch;
4776 }
4777 }
4778 else if(internal.depth == 8)
4779 {
4780 for(int y = 0; y < height; y++)
4781 {
4782 for(int x = 0; x < 2 * width; x++)
4783 {
4784 float c0 = *(float*)(source0 + 4 * x);
4785 float c1 = *(float*)(source1 + 4 * x);
4786 float c2 = *(float*)(source2 + 4 * x);
4787 float c3 = *(float*)(source3 + 4 * x);
4788 float c4 = *(float*)(source4 + 4 * x);
4789 float c5 = *(float*)(source5 + 4 * x);
4790 float c6 = *(float*)(source6 + 4 * x);
4791 float c7 = *(float*)(source7 + 4 * x);
4792
4793 c0 = c0 + c1;
4794 c2 = c2 + c3;
4795 c4 = c4 + c5;
4796 c6 = c6 + c7;
4797 c0 = c0 + c2;
4798 c4 = c4 + c6;
4799 c0 = c0 + c4;
4800 c0 *= 1.0f / 8.0f;
4801
4802 *(float*)(source0 + 4 * x) = c0;
4803 }
4804
4805 source0 += pitch;
4806 source1 += pitch;
4807 source2 += pitch;
4808 source3 += pitch;
4809 source4 += pitch;
4810 source5 += pitch;
4811 source6 += pitch;
4812 source7 += pitch;
4813 }
4814 }
4815 else if(internal.depth == 16)
4816 {
4817 for(int y = 0; y < height; y++)
4818 {
4819 for(int x = 0; x < 2 * width; x++)
4820 {
4821 float c0 = *(float*)(source0 + 4 * x);
4822 float c1 = *(float*)(source1 + 4 * x);
4823 float c2 = *(float*)(source2 + 4 * x);
4824 float c3 = *(float*)(source3 + 4 * x);
4825 float c4 = *(float*)(source4 + 4 * x);
4826 float c5 = *(float*)(source5 + 4 * x);
4827 float c6 = *(float*)(source6 + 4 * x);
4828 float c7 = *(float*)(source7 + 4 * x);
4829 float c8 = *(float*)(source8 + 4 * x);
4830 float c9 = *(float*)(source9 + 4 * x);
4831 float cA = *(float*)(sourceA + 4 * x);
4832 float cB = *(float*)(sourceB + 4 * x);
4833 float cC = *(float*)(sourceC + 4 * x);
4834 float cD = *(float*)(sourceD + 4 * x);
4835 float cE = *(float*)(sourceE + 4 * x);
4836 float cF = *(float*)(sourceF + 4 * x);
4837
4838 c0 = c0 + c1;
4839 c2 = c2 + c3;
4840 c4 = c4 + c5;
4841 c6 = c6 + c7;
4842 c8 = c8 + c9;
4843 cA = cA + cB;
4844 cC = cC + cD;
4845 cE = cE + cF;
4846 c0 = c0 + c2;
4847 c4 = c4 + c6;
4848 c8 = c8 + cA;
4849 cC = cC + cE;
4850 c0 = c0 + c4;
4851 c8 = c8 + cC;
4852 c0 = c0 + c8;
4853 c0 *= 1.0f / 16.0f;
4854
4855 *(float*)(source0 + 4 * x) = c0;
4856 }
4857
4858 source0 += pitch;
4859 source1 += pitch;
4860 source2 += pitch;
4861 source3 += pitch;
4862 source4 += pitch;
4863 source5 += pitch;
4864 source6 += pitch;
4865 source7 += pitch;
4866 source8 += pitch;
4867 source9 += pitch;
4868 sourceA += pitch;
4869 sourceB += pitch;
4870 sourceC += pitch;
4871 sourceD += pitch;
4872 sourceE += pitch;
4873 sourceF += pitch;
4874 }
4875 }
4876 else ASSERT(false);
4877 }
4878 }
4879 else if(internal.format == FORMAT_A32B32G32R32F)
4880 {
4881 if(CPUID::supportsSSE())
4882 {
4883 if(internal.depth == 2)
4884 {
4885 for(int y = 0; y < height; y++)
4886 {
4887 for(int x = 0; x < width; x++)
4888 {
4889 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
4890 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
4891
4892 c0 = _mm_add_ps(c0, c1);
4893 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
4894
4895 _mm_store_ps((float*)(source0 + 16 * x), c0);
4896 }
4897
4898 source0 += pitch;
4899 source1 += pitch;
4900 }
4901 }
4902 else if(internal.depth == 4)
4903 {
4904 for(int y = 0; y < height; y++)
4905 {
4906 for(int x = 0; x < width; x++)
4907 {
4908 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
4909 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
4910 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
4911 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
4912
4913 c0 = _mm_add_ps(c0, c1);
4914 c2 = _mm_add_ps(c2, c3);
4915 c0 = _mm_add_ps(c0, c2);
4916 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
4917
4918 _mm_store_ps((float*)(source0 + 16 * x), c0);
4919 }
4920
4921 source0 += pitch;
4922 source1 += pitch;
4923 source2 += pitch;
4924 source3 += pitch;
4925 }
4926 }
4927 else if(internal.depth == 8)
4928 {
4929 for(int y = 0; y < height; y++)
4930 {
4931 for(int x = 0; x < width; x++)
4932 {
4933 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
4934 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
4935 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
4936 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
4937 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
4938 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
4939 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
4940 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
4941
4942 c0 = _mm_add_ps(c0, c1);
4943 c2 = _mm_add_ps(c2, c3);
4944 c4 = _mm_add_ps(c4, c5);
4945 c6 = _mm_add_ps(c6, c7);
4946 c0 = _mm_add_ps(c0, c2);
4947 c4 = _mm_add_ps(c4, c6);
4948 c0 = _mm_add_ps(c0, c4);
4949 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
4950
4951 _mm_store_ps((float*)(source0 + 16 * x), c0);
4952 }
4953
4954 source0 += pitch;
4955 source1 += pitch;
4956 source2 += pitch;
4957 source3 += pitch;
4958 source4 += pitch;
4959 source5 += pitch;
4960 source6 += pitch;
4961 source7 += pitch;
4962 }
4963 }
4964 else if(internal.depth == 16)
4965 {
4966 for(int y = 0; y < height; y++)
4967 {
4968 for(int x = 0; x < width; x++)
4969 {
4970 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
4971 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
4972 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
4973 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
4974 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
4975 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
4976 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
4977 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
4978 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x));
4979 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x));
4980 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x));
4981 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x));
4982 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x));
4983 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x));
4984 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x));
4985 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x));
4986
4987 c0 = _mm_add_ps(c0, c1);
4988 c2 = _mm_add_ps(c2, c3);
4989 c4 = _mm_add_ps(c4, c5);
4990 c6 = _mm_add_ps(c6, c7);
4991 c8 = _mm_add_ps(c8, c9);
4992 cA = _mm_add_ps(cA, cB);
4993 cC = _mm_add_ps(cC, cD);
4994 cE = _mm_add_ps(cE, cF);
4995 c0 = _mm_add_ps(c0, c2);
4996 c4 = _mm_add_ps(c4, c6);
4997 c8 = _mm_add_ps(c8, cA);
4998 cC = _mm_add_ps(cC, cE);
4999 c0 = _mm_add_ps(c0, c4);
5000 c8 = _mm_add_ps(c8, cC);
5001 c0 = _mm_add_ps(c0, c8);
5002 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
5003
5004 _mm_store_ps((float*)(source0 + 16 * x), c0);
5005 }
5006
5007 source0 += pitch;
5008 source1 += pitch;
5009 source2 += pitch;
5010 source3 += pitch;
5011 source4 += pitch;
5012 source5 += pitch;
5013 source6 += pitch;
5014 source7 += pitch;
5015 source8 += pitch;
5016 source9 += pitch;
5017 sourceA += pitch;
5018 sourceB += pitch;
5019 sourceC += pitch;
5020 sourceD += pitch;
5021 sourceE += pitch;
5022 sourceF += pitch;
5023 }
5024 }
5025 else ASSERT(false);
5026 }
5027 else
5028 {
5029 if(internal.depth == 2)
5030 {
5031 for(int y = 0; y < height; y++)
5032 {
5033 for(int x = 0; x < 4 * width; x++)
5034 {
5035 float c0 = *(float*)(source0 + 4 * x);
5036 float c1 = *(float*)(source1 + 4 * x);
5037
5038 c0 = c0 + c1;
5039 c0 *= 1.0f / 2.0f;
5040
5041 *(float*)(source0 + 4 * x) = c0;
5042 }
5043
5044 source0 += pitch;
5045 source1 += pitch;
5046 }
5047 }
5048 else if(internal.depth == 4)
5049 {
5050 for(int y = 0; y < height; y++)
5051 {
5052 for(int x = 0; x < 4 * width; x++)
5053 {
5054 float c0 = *(float*)(source0 + 4 * x);
5055 float c1 = *(float*)(source1 + 4 * x);
5056 float c2 = *(float*)(source2 + 4 * x);
5057 float c3 = *(float*)(source3 + 4 * x);
5058
5059 c0 = c0 + c1;
5060 c2 = c2 + c3;
5061 c0 = c0 + c2;
5062 c0 *= 1.0f / 4.0f;
5063
5064 *(float*)(source0 + 4 * x) = c0;
5065 }
5066
5067 source0 += pitch;
5068 source1 += pitch;
5069 source2 += pitch;
5070 source3 += pitch;
5071 }
5072 }
5073 else if(internal.depth == 8)
5074 {
5075 for(int y = 0; y < height; y++)
5076 {
5077 for(int x = 0; x < 4 * width; x++)
5078 {
5079 float c0 = *(float*)(source0 + 4 * x);
5080 float c1 = *(float*)(source1 + 4 * x);
5081 float c2 = *(float*)(source2 + 4 * x);
5082 float c3 = *(float*)(source3 + 4 * x);
5083 float c4 = *(float*)(source4 + 4 * x);
5084 float c5 = *(float*)(source5 + 4 * x);
5085 float c6 = *(float*)(source6 + 4 * x);
5086 float c7 = *(float*)(source7 + 4 * x);
5087
5088 c0 = c0 + c1;
5089 c2 = c2 + c3;
5090 c4 = c4 + c5;
5091 c6 = c6 + c7;
5092 c0 = c0 + c2;
5093 c4 = c4 + c6;
5094 c0 = c0 + c4;
5095 c0 *= 1.0f / 8.0f;
5096
5097 *(float*)(source0 + 4 * x) = c0;
5098 }
5099
5100 source0 += pitch;
5101 source1 += pitch;
5102 source2 += pitch;
5103 source3 += pitch;
5104 source4 += pitch;
5105 source5 += pitch;
5106 source6 += pitch;
5107 source7 += pitch;
5108 }
5109 }
5110 else if(internal.depth == 16)
5111 {
5112 for(int y = 0; y < height; y++)
5113 {
5114 for(int x = 0; x < 4 * width; x++)
5115 {
5116 float c0 = *(float*)(source0 + 4 * x);
5117 float c1 = *(float*)(source1 + 4 * x);
5118 float c2 = *(float*)(source2 + 4 * x);
5119 float c3 = *(float*)(source3 + 4 * x);
5120 float c4 = *(float*)(source4 + 4 * x);
5121 float c5 = *(float*)(source5 + 4 * x);
5122 float c6 = *(float*)(source6 + 4 * x);
5123 float c7 = *(float*)(source7 + 4 * x);
5124 float c8 = *(float*)(source8 + 4 * x);
5125 float c9 = *(float*)(source9 + 4 * x);
5126 float cA = *(float*)(sourceA + 4 * x);
5127 float cB = *(float*)(sourceB + 4 * x);
5128 float cC = *(float*)(sourceC + 4 * x);
5129 float cD = *(float*)(sourceD + 4 * x);
5130 float cE = *(float*)(sourceE + 4 * x);
5131 float cF = *(float*)(sourceF + 4 * x);
5132
5133 c0 = c0 + c1;
5134 c2 = c2 + c3;
5135 c4 = c4 + c5;
5136 c6 = c6 + c7;
5137 c8 = c8 + c9;
5138 cA = cA + cB;
5139 cC = cC + cD;
5140 cE = cE + cF;
5141 c0 = c0 + c2;
5142 c4 = c4 + c6;
5143 c8 = c8 + cA;
5144 cC = cC + cE;
5145 c0 = c0 + c4;
5146 c8 = c8 + cC;
5147 c0 = c0 + c8;
5148 c0 *= 1.0f / 16.0f;
5149
5150 *(float*)(source0 + 4 * x) = c0;
5151 }
5152
5153 source0 += pitch;
5154 source1 += pitch;
5155 source2 += pitch;
5156 source3 += pitch;
5157 source4 += pitch;
5158 source5 += pitch;
5159 source6 += pitch;
5160 source7 += pitch;
5161 source8 += pitch;
5162 source9 += pitch;
5163 sourceA += pitch;
5164 sourceB += pitch;
5165 sourceC += pitch;
5166 sourceD += pitch;
5167 sourceE += pitch;
5168 sourceF += pitch;
5169 }
5170 }
5171 else ASSERT(false);
5172 }
5173 }
5174 else
5175 {
5176 // UNIMPLEMENTED();
5177 }
5178 }
5179}