blob: 74213f95bf2b186d3507befb5972941a7f61755d [file] [log] [blame]
John Bauman89401822014-05-06 15:04:28 -04001// SwiftShader Software Renderer
2//
John Bauman66b8ab22014-05-06 15:57:45 -04003// Copyright(c) 2005-2013 TransGaming Inc.
John Bauman89401822014-05-06 15:04:28 -04004//
5// All rights reserved. No part of this software may be copied, distributed, transmitted,
6// transcribed, stored in a retrieval system, translated into any human or computer
7// language by any means, or disclosed to third parties without the explicit written
8// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9// or implied, including but not limited to any patent rights, are granted to you.
10//
11
12#include "Surface.hpp"
13
14#include "Color.hpp"
15#include "Context.hpp"
Alexis Hetu0de50d42015-09-09 13:56:41 -040016#include "ETC_Decoder.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040017#include "Renderer.hpp"
John Bauman89401822014-05-06 15:04:28 -040018#include "Common/Half.hpp"
19#include "Common/Memory.hpp"
20#include "Common/CPUID.hpp"
21#include "Common/Resource.hpp"
22#include "Common/Debug.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040023#include "Reactor/Reactor.hpp"
John Bauman89401822014-05-06 15:04:28 -040024
25#include <xmmintrin.h>
26#include <emmintrin.h>
27
28#undef min
29#undef max
30
31namespace sw
32{
33 extern bool quadLayoutEnabled;
34 extern bool complementaryDepthBuffer;
35 extern TranscendentalPrecision logPrecision;
36
37 unsigned int *Surface::palette = 0;
38 unsigned int Surface::paletteID = 0;
39
John Bauman19bac1e2014-05-06 15:23:49 -040040 void Rect::clip(int minX, int minY, int maxX, int maxY)
41 {
Nicolas Capens22658242014-11-29 00:31:41 -050042 x0 = clamp(x0, minX, maxX);
43 y0 = clamp(y0, minY, maxY);
44 x1 = clamp(x1, minX, maxX);
45 y1 = clamp(y1, minY, maxY);
John Bauman19bac1e2014-05-06 15:23:49 -040046 }
47
John Bauman89401822014-05-06 15:04:28 -040048 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color)
49 {
50 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
51
52 write(element, color);
53 }
54
55 void Surface::Buffer::write(int x, int y, const Color<float> &color)
56 {
57 void *element = (unsigned char*)buffer + x * bytes + y * pitchB;
58
59 write(element, color);
60 }
61
62 inline void Surface::Buffer::write(void *element, const Color<float> &color)
63 {
64 switch(format)
65 {
66 case FORMAT_A8:
67 *(unsigned char*)element = unorm<8>(color.a);
68 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040069 case FORMAT_R8I_SNORM:
70 *(char*)element = snorm<8>(color.r);
71 break;
John Bauman89401822014-05-06 15:04:28 -040072 case FORMAT_R8:
73 *(unsigned char*)element = unorm<8>(color.r);
74 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040075 case FORMAT_R8I:
76 *(char*)element = scast<8>(color.r);
77 break;
78 case FORMAT_R8UI:
79 *(unsigned char*)element = ucast<8>(color.r);
80 break;
81 case FORMAT_R16I:
82 *(short*)element = scast<16>(color.r);
83 break;
84 case FORMAT_R16UI:
85 *(unsigned short*)element = ucast<16>(color.r);
86 break;
87 case FORMAT_R32I:
88 *(int*)element = static_cast<int>(color.r);
89 break;
90 case FORMAT_R32UI:
91 *(unsigned int*)element = static_cast<unsigned int>(color.r);
92 break;
John Bauman89401822014-05-06 15:04:28 -040093 case FORMAT_R3G3B2:
94 *(unsigned char*)element = (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
95 break;
96 case FORMAT_A8R3G3B2:
97 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
98 break;
99 case FORMAT_X4R4G4B4:
100 *(unsigned short*)element = 0xF000 | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
101 break;
102 case FORMAT_A4R4G4B4:
103 *(unsigned short*)element = (unorm<4>(color.a) << 12) | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
104 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400105 case FORMAT_R4G4B4A4:
106 *(unsigned short*)element = (unorm<4>(color.r) << 12) | (unorm<4>(color.g) << 8) | (unorm<4>(color.b) << 4) | (unorm<4>(color.a) << 0);
107 break;
John Bauman89401822014-05-06 15:04:28 -0400108 case FORMAT_R5G6B5:
109 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<6>(color.g) << 5) | (unorm<5>(color.b) << 0);
110 break;
111 case FORMAT_A1R5G5B5:
112 *(unsigned short*)element = (unorm<1>(color.a) << 15) | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
113 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400114 case FORMAT_R5G5B5A1:
115 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<5>(color.g) << 6) | (unorm<5>(color.b) << 1) | (unorm<5>(color.a) << 0);
116 break;
John Bauman89401822014-05-06 15:04:28 -0400117 case FORMAT_X1R5G5B5:
118 *(unsigned short*)element = 0x8000 | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
119 break;
120 case FORMAT_A8R8G8B8:
121 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
122 break;
123 case FORMAT_X8R8G8B8:
124 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
125 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400126 case FORMAT_A8B8G8R8I_SNORM:
127 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(color.a)) << 24) |
128 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
129 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
130 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
131 break;
John Bauman89401822014-05-06 15:04:28 -0400132 case FORMAT_A8B8G8R8:
133 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
134 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400135 case FORMAT_A8B8G8R8I:
136 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(color.a)) << 24) |
137 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
138 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
139 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
140 break;
141 case FORMAT_A8B8G8R8UI:
142 *(unsigned int*)element = (ucast<8>(color.a) << 24) | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
143 break;
144 case FORMAT_X8B8G8R8I_SNORM:
145 *(unsigned int*)element = 0x7F000000 |
146 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
147 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
148 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
149 break;
John Bauman89401822014-05-06 15:04:28 -0400150 case FORMAT_X8B8G8R8:
151 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
152 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400153 case FORMAT_X8B8G8R8I:
154 *(unsigned int*)element = 0x7F000000 |
155 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
156 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
157 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
158 case FORMAT_X8B8G8R8UI:
159 *(unsigned int*)element = 0xFF000000 | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
160 break;
John Bauman89401822014-05-06 15:04:28 -0400161 case FORMAT_A2R10G10B10:
162 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.r) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.b) << 0);
163 break;
164 case FORMAT_A2B10G10R10:
165 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.b) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.r) << 0);
166 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400167 case FORMAT_G8R8I_SNORM:
168 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(color.g)) << 8) |
169 (static_cast<unsigned short>(snorm<8>(color.r)) << 0);
170 break;
John Bauman89401822014-05-06 15:04:28 -0400171 case FORMAT_G8R8:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400172 *(unsigned short*)element = (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
173 break;
174 case FORMAT_G8R8I:
175 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(color.g)) << 8) |
176 (static_cast<unsigned short>(scast<8>(color.r)) << 0);
177 break;
178 case FORMAT_G8R8UI:
179 *(unsigned short*)element = (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400180 break;
181 case FORMAT_G16R16:
182 *(unsigned int*)element = (unorm<16>(color.g) << 16) | (unorm<16>(color.r) << 0);
183 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400184 case FORMAT_G16R16I:
185 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(color.g)) << 16) |
186 (static_cast<unsigned int>(scast<16>(color.r)) << 0);
187 break;
188 case FORMAT_G16R16UI:
189 *(unsigned int*)element = (ucast<16>(color.g) << 16) | (ucast<16>(color.r) << 0);
190 break;
191 case FORMAT_G32R32I:
192 case FORMAT_G32R32UI:
193 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
194 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
195 break;
John Bauman89401822014-05-06 15:04:28 -0400196 case FORMAT_A16B16G16R16:
197 ((unsigned short*)element)[0] = unorm<16>(color.r);
198 ((unsigned short*)element)[1] = unorm<16>(color.g);
199 ((unsigned short*)element)[2] = unorm<16>(color.b);
200 ((unsigned short*)element)[3] = unorm<16>(color.a);
201 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400202 case FORMAT_A16B16G16R16I:
203 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
204 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
205 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
206 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(color.a));
207 break;
208 case FORMAT_A16B16G16R16UI:
209 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
210 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
211 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
212 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(color.a));
213 break;
214 case FORMAT_X16B16G16R16I:
215 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
216 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
217 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
218 break;
219 case FORMAT_X16B16G16R16UI:
220 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
221 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
222 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
223 break;
224 case FORMAT_A32B32G32R32I:
225 case FORMAT_A32B32G32R32UI:
226 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
227 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
228 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
229 ((unsigned int*)element)[3] = static_cast<unsigned int>(color.a);
230 break;
231 case FORMAT_X32B32G32R32I:
232 case FORMAT_X32B32G32R32UI:
233 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
234 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
235 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
236 break;
John Bauman89401822014-05-06 15:04:28 -0400237 case FORMAT_V8U8:
238 *(unsigned short*)element = (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
239 break;
240 case FORMAT_L6V5U5:
241 *(unsigned short*)element = (unorm<6>(color.b) << 10) | (snorm<5>(color.g) << 5) | (snorm<5>(color.r) << 0);
242 break;
243 case FORMAT_Q8W8V8U8:
244 *(unsigned int*)element = (snorm<8>(color.a) << 24) | (snorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
245 break;
246 case FORMAT_X8L8V8U8:
247 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
248 break;
249 case FORMAT_V16U16:
250 *(unsigned int*)element = (snorm<16>(color.g) << 16) | (snorm<16>(color.r) << 0);
251 break;
252 case FORMAT_A2W10V10U10:
253 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (snorm<10>(color.b) << 20) | (snorm<10>(color.g) << 10) | (snorm<10>(color.r) << 0);
254 break;
255 case FORMAT_A16W16V16U16:
256 ((unsigned short*)element)[0] = snorm<16>(color.r);
257 ((unsigned short*)element)[1] = snorm<16>(color.g);
258 ((unsigned short*)element)[2] = snorm<16>(color.b);
259 ((unsigned short*)element)[3] = unorm<16>(color.a);
260 break;
261 case FORMAT_Q16W16V16U16:
262 ((unsigned short*)element)[0] = snorm<16>(color.r);
263 ((unsigned short*)element)[1] = snorm<16>(color.g);
264 ((unsigned short*)element)[2] = snorm<16>(color.b);
265 ((unsigned short*)element)[3] = snorm<16>(color.a);
266 break;
267 case FORMAT_R8G8B8:
268 ((unsigned char*)element)[0] = unorm<8>(color.b);
269 ((unsigned char*)element)[1] = unorm<8>(color.g);
270 ((unsigned char*)element)[2] = unorm<8>(color.r);
271 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400272 case FORMAT_B8G8R8:
273 ((unsigned char*)element)[0] = unorm<8>(color.r);
274 ((unsigned char*)element)[1] = unorm<8>(color.g);
275 ((unsigned char*)element)[2] = unorm<8>(color.b);
276 break;
John Bauman89401822014-05-06 15:04:28 -0400277 case FORMAT_R16F:
278 *(half*)element = (half)color.r;
279 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400280 case FORMAT_A16F:
281 *(half*)element = (half)color.a;
282 break;
John Bauman89401822014-05-06 15:04:28 -0400283 case FORMAT_G16R16F:
284 ((half*)element)[0] = (half)color.r;
285 ((half*)element)[1] = (half)color.g;
286 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400287 case FORMAT_B16G16R16F:
288 ((half*)element)[0] = (half)color.r;
289 ((half*)element)[1] = (half)color.g;
290 ((half*)element)[2] = (half)color.b;
291 break;
John Bauman89401822014-05-06 15:04:28 -0400292 case FORMAT_A16B16G16R16F:
293 ((half*)element)[0] = (half)color.r;
294 ((half*)element)[1] = (half)color.g;
295 ((half*)element)[2] = (half)color.b;
296 ((half*)element)[3] = (half)color.a;
297 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400298 case FORMAT_A32F:
299 *(float*)element = color.a;
300 break;
John Bauman89401822014-05-06 15:04:28 -0400301 case FORMAT_R32F:
302 *(float*)element = color.r;
303 break;
304 case FORMAT_G32R32F:
305 ((float*)element)[0] = color.r;
306 ((float*)element)[1] = color.g;
307 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400308 case FORMAT_B32G32R32F:
309 ((float*)element)[0] = color.r;
310 ((float*)element)[1] = color.g;
311 ((float*)element)[2] = color.b;
312 break;
John Bauman89401822014-05-06 15:04:28 -0400313 case FORMAT_A32B32G32R32F:
314 ((float*)element)[0] = color.r;
315 ((float*)element)[1] = color.g;
316 ((float*)element)[2] = color.b;
317 ((float*)element)[3] = color.a;
318 break;
319 case FORMAT_D32F:
320 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400321 case FORMAT_D32FS8_TEXTURE:
322 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400323 *((float*)element) = color.r;
324 break;
325 case FORMAT_D32F_COMPLEMENTARY:
326 *((float*)element) = 1 - color.r;
327 break;
328 case FORMAT_S8:
329 *((unsigned char*)element) = unorm<8>(color.r);
330 break;
331 case FORMAT_L8:
332 *(unsigned char*)element = unorm<8>(color.r);
333 break;
334 case FORMAT_A4L4:
335 *(unsigned char*)element = (unorm<4>(color.a) << 4) | (unorm<4>(color.r) << 0);
336 break;
337 case FORMAT_L16:
338 *(unsigned short*)element = unorm<16>(color.r);
339 break;
340 case FORMAT_A8L8:
341 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<8>(color.r) << 0);
342 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400343 case FORMAT_L16F:
344 *(half*)element = (half)color.r;
345 break;
346 case FORMAT_A16L16F:
347 ((half*)element)[0] = (half)color.r;
348 ((half*)element)[1] = (half)color.a;
349 break;
350 case FORMAT_L32F:
351 *(float*)element = color.r;
352 break;
353 case FORMAT_A32L32F:
354 ((float*)element)[0] = color.r;
355 ((float*)element)[1] = color.a;
356 break;
John Bauman89401822014-05-06 15:04:28 -0400357 default:
358 ASSERT(false);
359 }
360 }
361
362 Color<float> Surface::Buffer::read(int x, int y, int z) const
363 {
364 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
365
366 return read(element);
367 }
368
369 Color<float> Surface::Buffer::read(int x, int y) const
370 {
371 void *element = (unsigned char*)buffer + x * bytes + y * pitchB;
372
373 return read(element);
374 }
375
376 inline Color<float> Surface::Buffer::read(void *element) const
377 {
Nicolas Capens3f439242015-06-09 16:33:50 -0400378 float r = 0.0f;
379 float g = 0.0f;
380 float b = 0.0f;
381 float a = 1.0f;
John Bauman89401822014-05-06 15:04:28 -0400382
383 switch(format)
384 {
385 case FORMAT_P8:
386 {
387 ASSERT(palette);
388
389 unsigned int abgr = palette[*(unsigned char*)element];
390
391 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
392 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
393 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
394 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
395 }
396 break;
397 case FORMAT_A8P8:
398 {
399 ASSERT(palette);
400
401 unsigned int bgr = palette[((unsigned char*)element)[0]];
402
403 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF);
404 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00);
405 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000);
406 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
407 }
408 break;
409 case FORMAT_A8:
410 r = 0;
411 g = 0;
412 b = 0;
413 a = *(unsigned char*)element * (1.0f / 0xFF);
414 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400415 case FORMAT_R8I_SNORM:
416 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f);
417 break;
John Bauman89401822014-05-06 15:04:28 -0400418 case FORMAT_R8:
419 r = *(unsigned char*)element * (1.0f / 0xFF);
420 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400421 case FORMAT_R8I:
422 r = *(signed char*)element;
423 break;
424 case FORMAT_R8UI:
425 r = *(unsigned char*)element;
426 break;
John Bauman89401822014-05-06 15:04:28 -0400427 case FORMAT_R3G3B2:
428 {
429 unsigned char rgb = *(unsigned char*)element;
430
431 r = (rgb & 0xE0) * (1.0f / 0xE0);
432 g = (rgb & 0x1C) * (1.0f / 0x1C);
433 b = (rgb & 0x03) * (1.0f / 0x03);
434 }
435 break;
436 case FORMAT_A8R3G3B2:
437 {
438 unsigned short argb = *(unsigned short*)element;
439
440 a = (argb & 0xFF00) * (1.0f / 0xFF00);
441 r = (argb & 0x00E0) * (1.0f / 0x00E0);
442 g = (argb & 0x001C) * (1.0f / 0x001C);
443 b = (argb & 0x0003) * (1.0f / 0x0003);
444 }
445 break;
446 case FORMAT_X4R4G4B4:
447 {
448 unsigned short rgb = *(unsigned short*)element;
449
450 r = (rgb & 0x0F00) * (1.0f / 0x0F00);
451 g = (rgb & 0x00F0) * (1.0f / 0x00F0);
452 b = (rgb & 0x000F) * (1.0f / 0x000F);
453 }
454 break;
455 case FORMAT_A4R4G4B4:
456 {
457 unsigned short argb = *(unsigned short*)element;
458
459 a = (argb & 0xF000) * (1.0f / 0xF000);
460 r = (argb & 0x0F00) * (1.0f / 0x0F00);
461 g = (argb & 0x00F0) * (1.0f / 0x00F0);
462 b = (argb & 0x000F) * (1.0f / 0x000F);
463 }
464 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400465 case FORMAT_R4G4B4A4:
466 {
467 unsigned short rgba = *(unsigned short*)element;
468
469 r = (rgba & 0xF000) * (1.0f / 0xF000);
470 g = (rgba & 0x0F00) * (1.0f / 0x0F00);
471 b = (rgba & 0x00F0) * (1.0f / 0x00F0);
472 a = (rgba & 0x000F) * (1.0f / 0x000F);
473 }
474 break;
John Bauman89401822014-05-06 15:04:28 -0400475 case FORMAT_R5G6B5:
476 {
477 unsigned short rgb = *(unsigned short*)element;
478
479 r = (rgb & 0xF800) * (1.0f / 0xF800);
480 g = (rgb & 0x07E0) * (1.0f / 0x07E0);
481 b = (rgb & 0x001F) * (1.0f / 0x001F);
482 }
483 break;
484 case FORMAT_A1R5G5B5:
485 {
486 unsigned short argb = *(unsigned short*)element;
487
488 a = (argb & 0x8000) * (1.0f / 0x8000);
489 r = (argb & 0x7C00) * (1.0f / 0x7C00);
490 g = (argb & 0x03E0) * (1.0f / 0x03E0);
491 b = (argb & 0x001F) * (1.0f / 0x001F);
492 }
493 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400494 case FORMAT_R5G5B5A1:
495 {
496 unsigned short rgba = *(unsigned short*)element;
497
498 r = (rgba & 0xF800) * (1.0f / 0xF800);
499 g = (rgba & 0x07C0) * (1.0f / 0x07C0);
500 b = (rgba & 0x003E) * (1.0f / 0x003E);
501 a = (rgba & 0x0001) * (1.0f / 0x0001);
502 }
503 break;
John Bauman89401822014-05-06 15:04:28 -0400504 case FORMAT_X1R5G5B5:
505 {
506 unsigned short xrgb = *(unsigned short*)element;
507
508 r = (xrgb & 0x7C00) * (1.0f / 0x7C00);
509 g = (xrgb & 0x03E0) * (1.0f / 0x03E0);
510 b = (xrgb & 0x001F) * (1.0f / 0x001F);
511 }
512 break;
513 case FORMAT_A8R8G8B8:
514 {
515 unsigned int argb = *(unsigned int*)element;
516
517 a = (argb & 0xFF000000) * (1.0f / 0xFF000000);
518 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000);
519 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00);
520 b = (argb & 0x000000FF) * (1.0f / 0x000000FF);
521 }
522 break;
523 case FORMAT_X8R8G8B8:
524 {
525 unsigned int xrgb = *(unsigned int*)element;
526
527 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000);
528 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00);
529 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF);
530 }
531 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400532 case FORMAT_A8B8G8R8I_SNORM:
533 {
534 signed char* abgr = (signed char*)element;
535
536 r = max(abgr[0] * (1.0f / 0x7F), -1.0f);
537 g = max(abgr[1] * (1.0f / 0x7F), -1.0f);
538 b = max(abgr[2] * (1.0f / 0x7F), -1.0f);
539 a = max(abgr[3] * (1.0f / 0x7F), -1.0f);
540 }
541 break;
John Bauman89401822014-05-06 15:04:28 -0400542 case FORMAT_A8B8G8R8:
543 {
544 unsigned int abgr = *(unsigned int*)element;
545
546 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
547 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
548 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
549 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
550 }
551 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400552 case FORMAT_A8B8G8R8I:
553 {
554 signed char* abgr = (signed char*)element;
555
556 r = abgr[0];
557 g = abgr[1];
558 b = abgr[2];
559 a = abgr[3];
560 }
561 break;
562 case FORMAT_A8B8G8R8UI:
563 {
564 unsigned char* abgr = (unsigned char*)element;
565
566 r = abgr[0];
567 g = abgr[1];
568 b = abgr[2];
569 a = abgr[3];
570 }
571 break;
572 case FORMAT_X8B8G8R8I_SNORM:
573 {
574 signed char* bgr = (signed char*)element;
575
576 r = max(bgr[0] * (1.0f / 0x7F), -1.0f);
577 g = max(bgr[1] * (1.0f / 0x7F), -1.0f);
578 b = max(bgr[2] * (1.0f / 0x7F), -1.0f);
579 }
580 break;
John Bauman89401822014-05-06 15:04:28 -0400581 case FORMAT_X8B8G8R8:
582 {
583 unsigned int xbgr = *(unsigned int*)element;
584
585 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000);
586 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00);
587 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF);
588 }
589 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400590 case FORMAT_X8B8G8R8I:
591 {
592 signed char* bgr = (signed char*)element;
593
594 r = bgr[0];
595 g = bgr[1];
596 b = bgr[2];
597 }
598 break;
599 case FORMAT_X8B8G8R8UI:
600 {
601 unsigned char* bgr = (unsigned char*)element;
602
603 r = bgr[0];
604 g = bgr[1];
605 b = bgr[2];
606 }
607 break;
608 case FORMAT_G8R8I_SNORM:
609 {
610 signed char* gr = (signed char*)element;
611
612 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00);
613 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF);
614 }
615 break;
John Bauman89401822014-05-06 15:04:28 -0400616 case FORMAT_G8R8:
617 {
618 unsigned short gr = *(unsigned short*)element;
619
620 g = (gr & 0xFF00) * (1.0f / 0xFF00);
621 r = (gr & 0x00FF) * (1.0f / 0x00FF);
622 }
623 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400624 case FORMAT_G8R8I:
625 {
626 signed char* gr = (signed char*)element;
627
628 r = gr[0];
629 g = gr[1];
630 }
631 break;
632 case FORMAT_G8R8UI:
633 {
634 unsigned char* gr = (unsigned char*)element;
635
636 r = gr[0];
637 g = gr[1];
638 }
639 break;
640 case FORMAT_R16I:
641 r = *((short*)element);
642 break;
643 case FORMAT_R16UI:
644 r = *((unsigned short*)element);
645 break;
646 case FORMAT_G16R16I:
647 {
648 short* gr = (short*)element;
649
650 r = gr[0];
651 g = gr[1];
652 }
653 break;
John Bauman89401822014-05-06 15:04:28 -0400654 case FORMAT_G16R16:
655 {
656 unsigned int gr = *(unsigned int*)element;
657
658 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000);
659 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF);
660 }
661 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400662 case FORMAT_G16R16UI:
663 {
664 unsigned short* gr = (unsigned short*)element;
665
666 r = gr[0];
667 g = gr[1];
668 }
669 break;
John Bauman89401822014-05-06 15:04:28 -0400670 case FORMAT_A2R10G10B10:
671 {
672 unsigned int argb = *(unsigned int*)element;
673
674 a = (argb & 0xC0000000) * (1.0f / 0xC0000000);
675 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000);
676 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00);
677 b = (argb & 0x000003FF) * (1.0f / 0x000003FF);
678 }
679 break;
680 case FORMAT_A2B10G10R10:
681 {
682 unsigned int abgr = *(unsigned int*)element;
683
684 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000);
685 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000);
686 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00);
687 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF);
688 }
689 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400690 case FORMAT_A16B16G16R16I:
691 {
692 short* abgr = (short*)element;
693
694 r = abgr[0];
695 g = abgr[1];
696 b = abgr[2];
697 a = abgr[3];
698 }
699 break;
John Bauman89401822014-05-06 15:04:28 -0400700 case FORMAT_A16B16G16R16:
701 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF);
702 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF);
703 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF);
704 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
705 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400706 case FORMAT_A16B16G16R16UI:
707 {
708 unsigned short* abgr = (unsigned short*)element;
709
710 r = abgr[0];
711 g = abgr[1];
712 b = abgr[2];
713 a = abgr[3];
714 }
715 break;
716 case FORMAT_X16B16G16R16I:
717 {
718 short* bgr = (short*)element;
719
720 r = bgr[0];
721 g = bgr[1];
722 b = bgr[2];
723 }
724 break;
725 case FORMAT_X16B16G16R16UI:
726 {
727 unsigned short* bgr = (unsigned short*)element;
728
729 r = bgr[0];
730 g = bgr[1];
731 b = bgr[2];
732 }
733 break;
734 case FORMAT_A32B32G32R32I:
735 {
736 int* abgr = (int*)element;
737
738 r = static_cast<float>(abgr[0]);
739 g = static_cast<float>(abgr[1]);
740 b = static_cast<float>(abgr[2]);
741 a = static_cast<float>(abgr[3]);
742 }
743 break;
744 case FORMAT_A32B32G32R32UI:
745 {
746 unsigned int* abgr = (unsigned int*)element;
747
748 r = static_cast<float>(abgr[0]);
749 g = static_cast<float>(abgr[1]);
750 b = static_cast<float>(abgr[2]);
751 a = static_cast<float>(abgr[3]);
752 }
753 break;
754 case FORMAT_X32B32G32R32I:
755 {
756 int* bgr = (int*)element;
757
758 r = static_cast<float>(bgr[0]);
759 g = static_cast<float>(bgr[1]);
760 b = static_cast<float>(bgr[2]);
761 }
762 break;
763 case FORMAT_X32B32G32R32UI:
764 {
765 unsigned int* bgr = (unsigned int*)element;
766
767 r = static_cast<float>(bgr[0]);
768 g = static_cast<float>(bgr[1]);
769 b = static_cast<float>(bgr[2]);
770 }
771 break;
772 case FORMAT_G32R32I:
773 {
774 int* gr = (int*)element;
775
776 r = static_cast<float>(gr[0]);
777 g = static_cast<float>(gr[1]);
778 }
779 break;
780 case FORMAT_G32R32UI:
781 {
782 unsigned int* gr = (unsigned int*)element;
783
784 r = static_cast<float>(gr[0]);
785 g = static_cast<float>(gr[1]);
786 }
787 break;
788 case FORMAT_R32I:
789 r = static_cast<float>(*((int*)element));
790 break;
791 case FORMAT_R32UI:
792 r = static_cast<float>(*((unsigned int*)element));
793 break;
John Bauman89401822014-05-06 15:04:28 -0400794 case FORMAT_V8U8:
795 {
796 unsigned short vu = *(unsigned short*)element;
797
798 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000);
799 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000);
800 }
801 break;
802 case FORMAT_L6V5U5:
803 {
804 unsigned short lvu = *(unsigned short*)element;
805
806 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000);
807 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000);
808 b = (lvu & 0xFC00) * (1.0f / 0xFC00);
809 }
810 break;
811 case FORMAT_Q8W8V8U8:
812 {
813 unsigned int qwvu = *(unsigned int*)element;
814
815 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
816 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
817 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000);
818 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000);
819 }
820 break;
821 case FORMAT_X8L8V8U8:
822 {
823 unsigned int xlvu = *(unsigned int*)element;
824
825 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
826 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
827 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000);
828 }
829 break;
830 case FORMAT_R8G8B8:
831 r = ((unsigned char*)element)[2] * (1.0f / 0xFF);
832 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
833 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
834 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400835 case FORMAT_B8G8R8:
836 r = ((unsigned char*)element)[0] * (1.0f / 0xFF);
837 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
838 b = ((unsigned char*)element)[2] * (1.0f / 0xFF);
839 break;
John Bauman89401822014-05-06 15:04:28 -0400840 case FORMAT_V16U16:
841 {
842 unsigned int vu = *(unsigned int*)element;
843
844 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000);
845 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000);
846 }
847 break;
848 case FORMAT_A2W10V10U10:
849 {
850 unsigned int awvu = *(unsigned int*)element;
851
852 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000);
853 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000);
854 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000);
855 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000);
856 }
857 break;
858 case FORMAT_A16W16V16U16:
859 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
860 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
861 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
862 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
863 break;
864 case FORMAT_Q16W16V16U16:
865 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
866 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
867 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
868 a = ((signed short*)element)[3] * (1.0f / 0x7FFF);
869 break;
870 case FORMAT_L8:
871 r =
872 g =
873 b = *(unsigned char*)element * (1.0f / 0xFF);
874 break;
875 case FORMAT_A4L4:
876 {
877 unsigned char al = *(unsigned char*)element;
878
879 r =
880 g =
881 b = (al & 0x0F) * (1.0f / 0x0F);
882 a = (al & 0xF0) * (1.0f / 0xF0);
883 }
884 break;
885 case FORMAT_L16:
886 r =
887 g =
888 b = *(unsigned short*)element * (1.0f / 0xFFFF);
889 break;
890 case FORMAT_A8L8:
891 r =
892 g =
893 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
894 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
895 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400896 case FORMAT_L16F:
897 r =
898 g =
899 b = *(half*)element;
900 break;
901 case FORMAT_A16L16F:
902 r =
903 g =
904 b = ((half*)element)[0];
905 a = ((half*)element)[1];
906 break;
907 case FORMAT_L32F:
908 r =
909 g =
910 b = *(float*)element;
911 break;
912 case FORMAT_A32L32F:
913 r =
914 g =
915 b = ((float*)element)[0];
916 a = ((float*)element)[1];
917 break;
918 case FORMAT_A16F:
919 a = *(half*)element;
920 break;
John Bauman89401822014-05-06 15:04:28 -0400921 case FORMAT_R16F:
922 r = *(half*)element;
923 break;
924 case FORMAT_G16R16F:
925 r = ((half*)element)[0];
926 g = ((half*)element)[1];
927 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400928 case FORMAT_B16G16R16F:
929 r = ((half*)element)[0];
930 g = ((half*)element)[1];
931 b = ((half*)element)[2];
932 break;
John Bauman89401822014-05-06 15:04:28 -0400933 case FORMAT_A16B16G16R16F:
934 r = ((half*)element)[0];
935 g = ((half*)element)[1];
936 b = ((half*)element)[2];
937 a = ((half*)element)[3];
938 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400939 case FORMAT_A32F:
940 a = *(float*)element;
941 break;
John Bauman89401822014-05-06 15:04:28 -0400942 case FORMAT_R32F:
943 r = *(float*)element;
944 break;
945 case FORMAT_G32R32F:
946 r = ((float*)element)[0];
947 g = ((float*)element)[1];
948 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400949 case FORMAT_B32G32R32F:
950 r = ((float*)element)[0];
951 g = ((float*)element)[1];
952 b = ((float*)element)[2];
953 break;
John Bauman89401822014-05-06 15:04:28 -0400954 case FORMAT_A32B32G32R32F:
955 r = ((float*)element)[0];
956 g = ((float*)element)[1];
957 b = ((float*)element)[2];
958 a = ((float*)element)[3];
959 break;
960 case FORMAT_D32F:
961 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400962 case FORMAT_D32FS8_TEXTURE:
963 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400964 r = *(float*)element;
965 g = r;
966 b = r;
967 a = r;
968 break;
969 case FORMAT_D32F_COMPLEMENTARY:
John Bauman66b8ab22014-05-06 15:57:45 -0400970 r = 1.0f - *(float*)element;
John Bauman89401822014-05-06 15:04:28 -0400971 g = r;
972 b = r;
973 a = r;
974 break;
975 case FORMAT_S8:
976 r = *(unsigned char*)element * (1.0f / 0xFF);
977 break;
978 default:
979 ASSERT(false);
980 }
981
982 // if(sRGB)
983 // {
984 // r = sRGBtoLinear(r);
985 // g = sRGBtoLinear(g);
986 // b = sRGBtoLinear(b);
987 // }
988
989 return Color<float>(r, g, b, a);
990 }
991
992 Color<float> Surface::Buffer::sample(float x, float y, float z) const
993 {
994 x -= 0.5f;
995 y -= 0.5f;
996 z -= 0.5f;
997
998 int x0 = clamp((int)x, 0, width - 1);
999 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1000
1001 int y0 = clamp((int)y, 0, height - 1);
1002 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1003
1004 int z0 = clamp((int)z, 0, depth - 1);
1005 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1;
1006
1007 Color<float> c000 = read(x0, y0, z0);
1008 Color<float> c100 = read(x1, y0, z0);
1009 Color<float> c010 = read(x0, y1, z0);
1010 Color<float> c110 = read(x1, y1, z0);
1011 Color<float> c001 = read(x0, y0, z1);
1012 Color<float> c101 = read(x1, y0, z1);
1013 Color<float> c011 = read(x0, y1, z1);
1014 Color<float> c111 = read(x1, y1, z1);
1015
1016 float fx = x - x0;
1017 float fy = y - y0;
1018 float fz = z - z0;
1019
1020 c000 *= (1 - fx) * (1 - fy) * (1 - fz);
1021 c100 *= fx * (1 - fy) * (1 - fz);
1022 c010 *= (1 - fx) * fy * (1 - fz);
1023 c110 *= fx * fy * (1 - fz);
1024 c001 *= (1 - fx) * (1 - fy) * fz;
1025 c101 *= fx * (1 - fy) * fz;
1026 c011 *= (1 - fx) * fy * fz;
1027 c111 *= fx * fy * fz;
1028
1029 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111;
1030 }
1031
1032 Color<float> Surface::Buffer::sample(float x, float y) const
1033 {
1034 x -= 0.5f;
1035 y -= 0.5f;
1036
1037 int x0 = clamp((int)x, 0, width - 1);
1038 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1039
1040 int y0 = clamp((int)y, 0, height - 1);
1041 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1042
1043 Color<float> c00 = read(x0, y0);
1044 Color<float> c10 = read(x1, y0);
1045 Color<float> c01 = read(x0, y1);
1046 Color<float> c11 = read(x1, y1);
1047
1048 float fx = x - x0;
1049 float fy = y - y0;
1050
1051 c00 *= (1 - fx) * (1 - fy);
1052 c10 *= fx * (1 - fy);
1053 c01 *= (1 - fx) * fy;
1054 c11 *= fx * fy;
1055
1056 return c00 + c10 + c01 + c11;
1057 }
1058
John Bauman19bac1e2014-05-06 15:23:49 -04001059 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock)
John Bauman89401822014-05-06 15:04:28 -04001060 {
1061 this->lock = lock;
1062
1063 switch(lock)
1064 {
1065 case LOCK_UNLOCKED:
1066 case LOCK_READONLY:
1067 break;
1068 case LOCK_WRITEONLY:
1069 case LOCK_READWRITE:
1070 case LOCK_DISCARD:
1071 dirty = true;
1072 break;
1073 default:
1074 ASSERT(false);
1075 }
1076
John Baumand4ae8632014-05-06 16:18:33 -04001077 if(buffer)
John Bauman89401822014-05-06 15:04:28 -04001078 {
John Baumand4ae8632014-05-06 16:18:33 -04001079 switch(format)
1080 {
1081 #if S3TC_SUPPORT
1082 case FORMAT_DXT1:
1083 #endif
1084 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05001085 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001086 case FORMAT_R11_EAC:
1087 case FORMAT_SIGNED_R11_EAC:
1088 case FORMAT_RGB8_ETC2:
1089 case FORMAT_SRGB8_ETC2:
1090 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1091 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Baumand4ae8632014-05-06 16:18:33 -04001092 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB;
Alexis Hetu460e41f2015-09-01 10:58:37 -04001093 case FORMAT_RG11_EAC:
1094 case FORMAT_SIGNED_RG11_EAC:
1095 case FORMAT_RGBA8_ETC2_EAC:
1096 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1097 case FORMAT_RGBA_ASTC_4x4_KHR:
1098 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1099 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1100 case FORMAT_RGBA_ASTC_5x4_KHR:
1101 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1102 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB;
1103 case FORMAT_RGBA_ASTC_5x5_KHR:
1104 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1105 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB;
1106 case FORMAT_RGBA_ASTC_6x5_KHR:
1107 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1108 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB;
1109 case FORMAT_RGBA_ASTC_6x6_KHR:
1110 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1111 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB;
1112 case FORMAT_RGBA_ASTC_8x5_KHR:
1113 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1114 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB;
1115 case FORMAT_RGBA_ASTC_8x6_KHR:
1116 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1117 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB;
1118 case FORMAT_RGBA_ASTC_8x8_KHR:
1119 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1120 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB;
1121 case FORMAT_RGBA_ASTC_10x5_KHR:
1122 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1123 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB;
1124 case FORMAT_RGBA_ASTC_10x6_KHR:
1125 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1126 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB;
1127 case FORMAT_RGBA_ASTC_10x8_KHR:
1128 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1129 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB;
1130 case FORMAT_RGBA_ASTC_10x10_KHR:
1131 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1132 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB;
1133 case FORMAT_RGBA_ASTC_12x10_KHR:
1134 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1135 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB;
1136 case FORMAT_RGBA_ASTC_12x12_KHR:
1137 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1138 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB;
John Baumand4ae8632014-05-06 16:18:33 -04001139 #if S3TC_SUPPORT
1140 case FORMAT_DXT3:
1141 case FORMAT_DXT5:
1142 #endif
1143 case FORMAT_ATI2:
1144 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1145 default:
1146 return (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
1147 }
John Bauman89401822014-05-06 15:04:28 -04001148 }
1149
1150 return 0;
1151 }
1152
1153 void Surface::Buffer::unlockRect()
1154 {
1155 lock = LOCK_UNLOCKED;
1156 }
1157
Nicolas Capens477314b2015-06-09 16:47:29 -04001158 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false)
1159 {
1160 resource = new Resource(0);
1161 hasParent = false;
1162 ownExternal = false;
1163 depth = max(1, depth);
1164
1165 external.buffer = pixels;
1166 external.width = width;
1167 external.height = height;
1168 external.depth = depth;
1169 external.format = format;
1170 external.bytes = bytes(external.format);
1171 external.pitchB = pitch;
1172 external.pitchP = pitch / external.bytes;
1173 external.sliceB = slice;
1174 external.sliceP = slice / external.bytes;
1175 external.lock = LOCK_UNLOCKED;
1176 external.dirty = true;
1177
1178 internal.buffer = 0;
1179 internal.width = width;
1180 internal.height = height;
1181 internal.depth = depth;
1182 internal.format = selectInternalFormat(format);
1183 internal.bytes = bytes(internal.format);
1184 internal.pitchB = pitchB(internal.width, internal.format, false);
1185 internal.pitchP = pitchP(internal.width, internal.format, false);
1186 internal.sliceB = sliceB(internal.width, internal.height, internal.format, false);
1187 internal.sliceP = sliceP(internal.width, internal.height, internal.format, false);
1188 internal.lock = LOCK_UNLOCKED;
1189 internal.dirty = false;
1190
1191 stencil.buffer = 0;
1192 stencil.width = width;
1193 stencil.height = height;
1194 stencil.depth = depth;
1195 stencil.format = FORMAT_S8;
1196 stencil.bytes = bytes(stencil.format);
1197 stencil.pitchB = pitchB(stencil.width, stencil.format, false);
1198 stencil.pitchP = pitchP(stencil.width, stencil.format, false);
1199 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, false);
1200 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, false);
1201 stencil.lock = LOCK_UNLOCKED;
1202 stencil.dirty = false;
1203
1204 dirtyMipmaps = true;
1205 paletteUsed = 0;
1206 }
1207
John Bauman89401822014-05-06 15:04:28 -04001208 Surface::Surface(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget) : lockable(lockable), renderTarget(renderTarget)
1209 {
1210 resource = texture ? texture : new Resource(0);
John Bauman19bac1e2014-05-06 15:23:49 -04001211 hasParent = texture != 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001212 ownExternal = true;
John Bauman89401822014-05-06 15:04:28 -04001213 depth = max(1, depth);
1214
1215 external.buffer = 0;
1216 external.width = width;
1217 external.height = height;
1218 external.depth = depth;
1219 external.format = format;
1220 external.bytes = bytes(external.format);
1221 external.pitchB = pitchB(external.width, external.format, renderTarget && !texture);
1222 external.pitchP = pitchP(external.width, external.format, renderTarget && !texture);
1223 external.sliceB = sliceB(external.width, external.height, external.format, renderTarget && !texture);
1224 external.sliceP = sliceP(external.width, external.height, external.format, renderTarget && !texture);
1225 external.lock = LOCK_UNLOCKED;
1226 external.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001227
1228 internal.buffer = 0;
1229 internal.width = width;
1230 internal.height = height;
1231 internal.depth = depth;
1232 internal.format = selectInternalFormat(format);
1233 internal.bytes = bytes(internal.format);
1234 internal.pitchB = pitchB(internal.width, internal.format, renderTarget);
1235 internal.pitchP = pitchP(internal.width, internal.format, renderTarget);
1236 internal.sliceB = sliceB(internal.width, internal.height, internal.format, renderTarget);
1237 internal.sliceP = sliceP(internal.width, internal.height, internal.format, renderTarget);
1238 internal.lock = LOCK_UNLOCKED;
1239 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001240
1241 stencil.buffer = 0;
1242 stencil.width = width;
1243 stencil.height = height;
1244 stencil.depth = depth;
1245 stencil.format = FORMAT_S8;
1246 stencil.bytes = bytes(stencil.format);
1247 stencil.pitchB = pitchB(stencil.width, stencil.format, renderTarget);
1248 stencil.pitchP = pitchP(stencil.width, stencil.format, renderTarget);
1249 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, renderTarget);
1250 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, renderTarget);
1251 stencil.lock = LOCK_UNLOCKED;
1252 stencil.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001253
1254 dirtyMipmaps = true;
John Bauman66b8ab22014-05-06 15:57:45 -04001255 paletteUsed = 0;
John Bauman89401822014-05-06 15:04:28 -04001256 }
1257
1258 Surface::~Surface()
1259 {
John Bauman8a4f6fc2014-05-06 15:26:18 -04001260 // Synchronize so we can deallocate the buffers below
1261 resource->lock(DESTRUCT);
1262 resource->unlock();
1263
John Bauman89401822014-05-06 15:04:28 -04001264 if(!hasParent)
1265 {
1266 resource->destruct();
1267 }
1268
Nicolas Capens477314b2015-06-09 16:47:29 -04001269 if(ownExternal)
1270 {
1271 deallocate(external.buffer);
1272 }
John Bauman89401822014-05-06 15:04:28 -04001273
1274 if(internal.buffer != external.buffer)
1275 {
1276 deallocate(internal.buffer);
1277 }
1278
1279 deallocate(stencil.buffer);
1280
1281 external.buffer = 0;
1282 internal.buffer = 0;
1283 stencil.buffer = 0;
1284 }
1285
John Bauman19bac1e2014-05-06 15:23:49 -04001286 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001287 {
1288 resource->lock(client);
1289
1290 if(!external.buffer)
1291 {
1292 if(internal.buffer && identicalFormats())
1293 {
1294 external.buffer = internal.buffer;
1295 }
1296 else
1297 {
1298 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.format);
1299 }
1300 }
1301
1302 if(internal.dirty)
1303 {
1304 if(lock != LOCK_DISCARD)
1305 {
1306 update(external, internal);
1307 }
John Bauman66b8ab22014-05-06 15:57:45 -04001308
1309 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001310 }
1311
1312 switch(lock)
1313 {
1314 case LOCK_READONLY:
1315 break;
1316 case LOCK_WRITEONLY:
1317 case LOCK_READWRITE:
1318 case LOCK_DISCARD:
1319 dirtyMipmaps = true;
1320 break;
1321 default:
1322 ASSERT(false);
1323 }
1324
John Bauman19bac1e2014-05-06 15:23:49 -04001325 return external.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001326 }
1327
1328 void Surface::unlockExternal()
1329 {
1330 resource->unlock();
1331
1332 external.unlockRect();
1333 }
1334
John Bauman19bac1e2014-05-06 15:23:49 -04001335 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001336 {
1337 if(lock != LOCK_UNLOCKED)
1338 {
1339 resource->lock(client);
1340 }
1341
1342 if(!internal.buffer)
1343 {
1344 if(external.buffer && identicalFormats())
1345 {
1346 internal.buffer = external.buffer;
1347 }
1348 else
1349 {
1350 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.format);
1351 }
1352 }
1353
1354 // FIXME: WHQL requires conversion to lower external precision and back
1355 if(logPrecision >= WHQL)
1356 {
1357 if(internal.dirty && renderTarget && internal.format != external.format)
1358 {
1359 if(lock != LOCK_DISCARD)
1360 {
1361 switch(external.format)
1362 {
1363 case FORMAT_R3G3B2:
1364 case FORMAT_A8R3G3B2:
1365 case FORMAT_A1R5G5B5:
1366 case FORMAT_A2R10G10B10:
1367 case FORMAT_A2B10G10R10:
1368 lockExternal(0, 0, 0, LOCK_READWRITE, client);
1369 unlockExternal();
1370 break;
1371 default:
1372 // Difference passes WHQL
1373 break;
1374 }
1375 }
1376 }
1377 }
1378
John Bauman66b8ab22014-05-06 15:57:45 -04001379 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID))
John Bauman89401822014-05-06 15:04:28 -04001380 {
1381 if(lock != LOCK_DISCARD)
1382 {
1383 update(internal, external);
1384 }
John Bauman89401822014-05-06 15:04:28 -04001385
John Bauman66b8ab22014-05-06 15:57:45 -04001386 external.dirty = false;
1387 paletteUsed = Surface::paletteID;
John Bauman89401822014-05-06 15:04:28 -04001388 }
1389
1390 switch(lock)
1391 {
1392 case LOCK_UNLOCKED:
1393 case LOCK_READONLY:
1394 break;
1395 case LOCK_WRITEONLY:
1396 case LOCK_READWRITE:
1397 case LOCK_DISCARD:
1398 dirtyMipmaps = true;
1399 break;
1400 default:
1401 ASSERT(false);
1402 }
1403
1404 if(lock == LOCK_READONLY && client == PUBLIC)
1405 {
1406 resolve();
1407 }
1408
John Bauman19bac1e2014-05-06 15:23:49 -04001409 return internal.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001410 }
1411
1412 void Surface::unlockInternal()
1413 {
1414 resource->unlock();
1415
1416 internal.unlockRect();
1417 }
1418
1419 void *Surface::lockStencil(int front, Accessor client)
1420 {
1421 resource->lock(client);
1422
1423 if(!stencil.buffer)
1424 {
1425 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.format);
1426 }
1427
John Bauman89401822014-05-06 15:04:28 -04001428 return stencil.lockRect(0, 0, front, LOCK_READWRITE); // FIXME
1429 }
1430
1431 void Surface::unlockStencil()
1432 {
1433 resource->unlock();
1434
1435 stencil.unlockRect();
1436 }
1437
1438 int Surface::bytes(Format format)
1439 {
1440 switch(format)
1441 {
1442 case FORMAT_NULL: return 0;
1443 case FORMAT_P8: return 1;
1444 case FORMAT_A8P8: return 2;
1445 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001446 case FORMAT_R8I: return 1;
John Bauman89401822014-05-06 15:04:28 -04001447 case FORMAT_R8: return 1;
1448 case FORMAT_R3G3B2: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001449 case FORMAT_R16I: return 2;
1450 case FORMAT_R16UI: return 2;
John Bauman89401822014-05-06 15:04:28 -04001451 case FORMAT_A8R3G3B2: return 2;
1452 case FORMAT_R5G6B5: return 2;
1453 case FORMAT_A1R5G5B5: return 2;
1454 case FORMAT_X1R5G5B5: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001455 case FORMAT_R5G5B5A1: return 2;
John Bauman89401822014-05-06 15:04:28 -04001456 case FORMAT_X4R4G4B4: return 2;
1457 case FORMAT_A4R4G4B4: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001458 case FORMAT_R4G4B4A4: return 2;
John Bauman89401822014-05-06 15:04:28 -04001459 case FORMAT_R8G8B8: return 3;
Nicolas Capens80594422015-06-09 16:42:56 -04001460 case FORMAT_B8G8R8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001461 case FORMAT_R32I: return 4;
1462 case FORMAT_R32UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001463 case FORMAT_X8R8G8B8: return 4;
1464 // case FORMAT_X8G8R8B8Q: return 4;
1465 case FORMAT_A8R8G8B8: return 4;
1466 // case FORMAT_A8G8R8B8Q: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001467 case FORMAT_X8B8G8R8I: return 4;
John Bauman89401822014-05-06 15:04:28 -04001468 case FORMAT_X8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001469 case FORMAT_A8B8G8R8I: return 4;
1470 case FORMAT_R8UI: return 1;
1471 case FORMAT_G8R8UI: return 2;
1472 case FORMAT_X8B8G8R8UI: return 4;
1473 case FORMAT_A8B8G8R8UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001474 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001475 case FORMAT_R8I_SNORM: return 1;
1476 case FORMAT_G8R8I_SNORM: return 2;
1477 case FORMAT_X8B8G8R8I_SNORM: return 4;
1478 case FORMAT_A8B8G8R8I_SNORM: return 4;
John Bauman89401822014-05-06 15:04:28 -04001479 case FORMAT_A2R10G10B10: return 4;
1480 case FORMAT_A2B10G10R10: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001481 case FORMAT_G8R8I: return 2;
John Bauman89401822014-05-06 15:04:28 -04001482 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001483 case FORMAT_G16R16I: return 4;
1484 case FORMAT_G16R16UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001485 case FORMAT_G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001486 case FORMAT_G32R32I: return 8;
1487 case FORMAT_G32R32UI: return 8;
1488 case FORMAT_X16B16G16R16I: return 8;
1489 case FORMAT_X16B16G16R16UI: return 8;
1490 case FORMAT_A16B16G16R16I: return 8;
1491 case FORMAT_A16B16G16R16UI: return 8;
John Bauman89401822014-05-06 15:04:28 -04001492 case FORMAT_A16B16G16R16: return 8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001493 case FORMAT_X32B32G32R32I: return 16;
1494 case FORMAT_X32B32G32R32UI: return 16;
1495 case FORMAT_A32B32G32R32I: return 16;
1496 case FORMAT_A32B32G32R32UI: return 16;
John Bauman89401822014-05-06 15:04:28 -04001497 // Compressed formats
1498 #if S3TC_SUPPORT
1499 case FORMAT_DXT1: return 2; // Column of four pixels
1500 case FORMAT_DXT3: return 4; // Column of four pixels
1501 case FORMAT_DXT5: return 4; // Column of four pixels
John Bauman66b8ab22014-05-06 15:57:45 -04001502 #endif
John Bauman89401822014-05-06 15:04:28 -04001503 case FORMAT_ATI1: return 2; // Column of four pixels
1504 case FORMAT_ATI2: return 4; // Column of four pixels
Nicolas Capens22658242014-11-29 00:31:41 -05001505 case FORMAT_ETC1: return 2; // Column of four pixels
Alexis Hetu460e41f2015-09-01 10:58:37 -04001506 case FORMAT_R11_EAC: return 2;
1507 case FORMAT_SIGNED_R11_EAC: return 2;
1508 case FORMAT_RG11_EAC: return 4;
1509 case FORMAT_SIGNED_RG11_EAC: return 4;
1510 case FORMAT_RGB8_ETC2: return 2;
1511 case FORMAT_SRGB8_ETC2: return 2;
1512 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1513 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1514 case FORMAT_RGBA8_ETC2_EAC: return 4;
1515 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4;
1516 case FORMAT_RGBA_ASTC_4x4_KHR:
1517 case FORMAT_RGBA_ASTC_5x4_KHR:
1518 case FORMAT_RGBA_ASTC_5x5_KHR:
1519 case FORMAT_RGBA_ASTC_6x5_KHR:
1520 case FORMAT_RGBA_ASTC_6x6_KHR:
1521 case FORMAT_RGBA_ASTC_8x5_KHR:
1522 case FORMAT_RGBA_ASTC_8x6_KHR:
1523 case FORMAT_RGBA_ASTC_8x8_KHR:
1524 case FORMAT_RGBA_ASTC_10x5_KHR:
1525 case FORMAT_RGBA_ASTC_10x6_KHR:
1526 case FORMAT_RGBA_ASTC_10x8_KHR:
1527 case FORMAT_RGBA_ASTC_10x10_KHR:
1528 case FORMAT_RGBA_ASTC_12x10_KHR:
1529 case FORMAT_RGBA_ASTC_12x12_KHR:
1530 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1531 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1532 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1533 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1534 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1535 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1536 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1537 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1538 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1539 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1540 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1541 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1542 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1543 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME
John Bauman89401822014-05-06 15:04:28 -04001544 // Bumpmap formats
1545 case FORMAT_V8U8: return 2;
1546 case FORMAT_L6V5U5: return 2;
1547 case FORMAT_Q8W8V8U8: return 4;
1548 case FORMAT_X8L8V8U8: return 4;
1549 case FORMAT_A2W10V10U10: return 4;
1550 case FORMAT_V16U16: return 4;
1551 case FORMAT_A16W16V16U16: return 8;
1552 case FORMAT_Q16W16V16U16: return 8;
1553 // Luminance formats
1554 case FORMAT_L8: return 1;
1555 case FORMAT_A4L4: return 1;
1556 case FORMAT_L16: return 2;
1557 case FORMAT_A8L8: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001558 case FORMAT_L16F: return 2;
1559 case FORMAT_A16L16F: return 4;
1560 case FORMAT_L32F: return 4;
1561 case FORMAT_A32L32F: return 8;
John Bauman89401822014-05-06 15:04:28 -04001562 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04001563 case FORMAT_A16F: return 2;
John Bauman89401822014-05-06 15:04:28 -04001564 case FORMAT_R16F: return 2;
1565 case FORMAT_G16R16F: return 4;
Nicolas Capens80594422015-06-09 16:42:56 -04001566 case FORMAT_B16G16R16F: return 6;
John Bauman89401822014-05-06 15:04:28 -04001567 case FORMAT_A16B16G16R16F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001568 case FORMAT_A32F: return 4;
John Bauman89401822014-05-06 15:04:28 -04001569 case FORMAT_R32F: return 4;
1570 case FORMAT_G32R32F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001571 case FORMAT_B32G32R32F: return 12;
John Bauman89401822014-05-06 15:04:28 -04001572 case FORMAT_A32B32G32R32F: return 16;
1573 // Depth/stencil formats
1574 case FORMAT_D16: return 2;
1575 case FORMAT_D32: return 4;
1576 case FORMAT_D24X8: return 4;
1577 case FORMAT_D24S8: return 4;
1578 case FORMAT_D24FS8: return 4;
1579 case FORMAT_D32F: return 4;
1580 case FORMAT_D32F_COMPLEMENTARY: return 4;
1581 case FORMAT_D32F_LOCKABLE: return 4;
John Bauman66b8ab22014-05-06 15:57:45 -04001582 case FORMAT_D32FS8_TEXTURE: return 4;
1583 case FORMAT_D32FS8_SHADOW: return 4;
1584 case FORMAT_DF24S8: return 4;
1585 case FORMAT_DF16S8: return 2;
John Bauman89401822014-05-06 15:04:28 -04001586 case FORMAT_INTZ: return 4;
1587 case FORMAT_S8: return 1;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001588 case FORMAT_YV12_BT601: return 1; // Y plane only
1589 case FORMAT_YV12_BT709: return 1; // Y plane only
1590 case FORMAT_YV12_JFIF: return 1; // Y plane only
John Bauman89401822014-05-06 15:04:28 -04001591 default:
1592 ASSERT(false);
1593 }
1594
1595 return 0;
1596 }
1597
1598 int Surface::pitchB(int width, Format format, bool target)
1599 {
1600 if(target || isDepth(format) || isStencil(format))
1601 {
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001602 width = align(width, 2);
John Bauman89401822014-05-06 15:04:28 -04001603 }
1604
1605 switch(format)
1606 {
1607 #if S3TC_SUPPORT
1608 case FORMAT_DXT1:
Nicolas Capens22658242014-11-29 00:31:41 -05001609 #endif
1610 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001611 case FORMAT_R11_EAC:
1612 case FORMAT_SIGNED_R11_EAC:
1613 case FORMAT_RGB8_ETC2:
1614 case FORMAT_SRGB8_ETC2:
1615 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1616 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04001617 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001618 case FORMAT_RG11_EAC:
1619 case FORMAT_SIGNED_RG11_EAC:
1620 case FORMAT_RGBA8_ETC2_EAC:
1621 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1622 case FORMAT_RGBA_ASTC_4x4_KHR:
1623 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1624 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
1625 case FORMAT_RGBA_ASTC_5x4_KHR:
1626 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1627 case FORMAT_RGBA_ASTC_5x5_KHR:
1628 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1629 return 16 * ((width + 4) / 5);
1630 case FORMAT_RGBA_ASTC_6x5_KHR:
1631 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1632 case FORMAT_RGBA_ASTC_6x6_KHR:
1633 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1634 return 16 * ((width + 5) / 6);
1635 case FORMAT_RGBA_ASTC_8x5_KHR:
1636 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1637 case FORMAT_RGBA_ASTC_8x6_KHR:
1638 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1639 case FORMAT_RGBA_ASTC_8x8_KHR:
1640 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1641 return 16 * ((width + 7) / 8);
1642 case FORMAT_RGBA_ASTC_10x5_KHR:
1643 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1644 case FORMAT_RGBA_ASTC_10x6_KHR:
1645 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1646 case FORMAT_RGBA_ASTC_10x8_KHR:
1647 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1648 case FORMAT_RGBA_ASTC_10x10_KHR:
1649 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1650 return 16 * ((width + 9) / 10);
1651 case FORMAT_RGBA_ASTC_12x10_KHR:
1652 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1653 case FORMAT_RGBA_ASTC_12x12_KHR:
1654 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1655 return 16 * ((width + 11) / 12);
Nicolas Capens22658242014-11-29 00:31:41 -05001656 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04001657 case FORMAT_DXT3:
1658 case FORMAT_DXT5:
1659 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
John Bauman66b8ab22014-05-06 15:57:45 -04001660 #endif
John Bauman89401822014-05-06 15:04:28 -04001661 case FORMAT_ATI1:
1662 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row
1663 case FORMAT_ATI2:
1664 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001665 case FORMAT_YV12_BT601:
1666 case FORMAT_YV12_BT709:
1667 case FORMAT_YV12_JFIF:
1668 return align(width, 16);
John Bauman89401822014-05-06 15:04:28 -04001669 default:
1670 return bytes(format) * width;
1671 }
1672 }
1673
1674 int Surface::pitchP(int width, Format format, bool target)
1675 {
1676 int B = bytes(format);
1677
1678 return B > 0 ? pitchB(width, format, target) / B : 0;
1679 }
1680
1681 int Surface::sliceB(int width, int height, Format format, bool target)
1682 {
1683 if(target || isDepth(format) || isStencil(format))
1684 {
1685 height = ((height + 1) & ~1);
1686 }
1687
1688 switch(format)
1689 {
1690 #if S3TC_SUPPORT
1691 case FORMAT_DXT1:
1692 case FORMAT_DXT3:
1693 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04001694 #endif
Nicolas Capens22658242014-11-29 00:31:41 -05001695 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001696 case FORMAT_R11_EAC:
1697 case FORMAT_SIGNED_R11_EAC:
1698 case FORMAT_RG11_EAC:
1699 case FORMAT_SIGNED_RG11_EAC:
1700 case FORMAT_RGB8_ETC2:
1701 case FORMAT_SRGB8_ETC2:
1702 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1703 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1704 case FORMAT_RGBA8_ETC2_EAC:
1705 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1706 case FORMAT_RGBA_ASTC_4x4_KHR:
1707 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1708 case FORMAT_RGBA_ASTC_5x4_KHR:
1709 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
Nicolas Capens22658242014-11-29 00:31:41 -05001710 return pitchB(width, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001711 case FORMAT_RGBA_ASTC_5x5_KHR:
1712 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1713 case FORMAT_RGBA_ASTC_6x5_KHR:
1714 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1715 case FORMAT_RGBA_ASTC_8x5_KHR:
1716 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1717 case FORMAT_RGBA_ASTC_10x5_KHR:
1718 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1719 return pitchB(width, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows
1720 case FORMAT_RGBA_ASTC_6x6_KHR:
1721 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1722 case FORMAT_RGBA_ASTC_8x6_KHR:
1723 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1724 case FORMAT_RGBA_ASTC_10x6_KHR:
1725 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1726 return pitchB(width, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows
1727 case FORMAT_RGBA_ASTC_8x8_KHR:
1728 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1729 case FORMAT_RGBA_ASTC_10x8_KHR:
1730 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1731 return pitchB(width, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows
1732 case FORMAT_RGBA_ASTC_10x10_KHR:
1733 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1734 case FORMAT_RGBA_ASTC_12x10_KHR:
1735 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1736 return pitchB(width, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows
1737 case FORMAT_RGBA_ASTC_12x12_KHR:
1738 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1739 return pitchB(width, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows
Nicolas Capens22658242014-11-29 00:31:41 -05001740 case FORMAT_ATI1:
1741 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04001742 default:
Nicolas Capens22658242014-11-29 00:31:41 -05001743 return pitchB(width, format, target) * height; // Pitch computed per row
John Bauman89401822014-05-06 15:04:28 -04001744 }
1745 }
1746
1747 int Surface::sliceP(int width, int height, Format format, bool target)
1748 {
1749 int B = bytes(format);
1750
1751 return B > 0 ? sliceB(width, height, format, target) / B : 0;
1752 }
1753
1754 void Surface::update(Buffer &destination, Buffer &source)
1755 {
1756 // ASSERT(source.lock != LOCK_UNLOCKED);
1757 // ASSERT(destination.lock != LOCK_UNLOCKED);
1758
1759 if(destination.buffer != source.buffer)
1760 {
1761 ASSERT(source.dirty && !destination.dirty);
1762
1763 switch(source.format)
1764 {
1765 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001766 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format
1767 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format
1768 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format
1769 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format
1770 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format
1771 #if S3TC_SUPPORT
1772 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format
1773 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format
1774 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format
Nicolas Capens22658242014-11-29 00:31:41 -05001775 #endif
John Bauman89401822014-05-06 15:04:28 -04001776 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format
1777 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format
Alexis Hetu460e41f2015-09-01 10:58:37 -04001778 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format
1779 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format
1780 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format
1781 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format
Alexis Hetu0de50d42015-09-09 13:56:41 -04001782 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001783 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format
1784 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format
1785 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format
1786 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format
1787 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format
1788 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format
1789 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format
1790 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format
1791 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format
1792 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format
1793 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format
1794 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format
1795 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format
1796 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format
1797 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format
1798 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format
1799 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format
1800 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format
1801 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format
1802 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format
1803 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format
1804 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format
1805 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format
1806 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format
1807 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format
1808 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format
1809 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format
1810 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format
1811 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format
1812 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format
1813 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format
1814 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format
1815 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format
1816 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001817 default: genericUpdate(destination, source); break;
1818 }
1819 }
John Bauman89401822014-05-06 15:04:28 -04001820 }
1821
1822 void Surface::genericUpdate(Buffer &destination, Buffer &source)
1823 {
1824 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1825 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1826
1827 int depth = min(destination.depth, source.depth);
1828 int height = min(destination.height, source.height);
1829 int width = min(destination.width, source.width);
1830 int rowBytes = width * source.bytes;
1831
1832 for(int z = 0; z < depth; z++)
1833 {
1834 unsigned char *sourceRow = sourceSlice;
1835 unsigned char *destinationRow = destinationSlice;
1836
1837 for(int y = 0; y < height; y++)
1838 {
1839 if(source.format == destination.format)
1840 {
1841 memcpy(destinationRow, sourceRow, rowBytes);
1842 }
1843 else
1844 {
1845 unsigned char *sourceElement = sourceRow;
1846 unsigned char *destinationElement = destinationRow;
1847
1848 for(int x = 0; x < width; x++)
1849 {
1850 Color<float> color = source.read(sourceElement);
1851 destination.write(destinationElement, color);
1852
1853 sourceElement += source.bytes;
1854 destinationElement += destination.bytes;
1855 }
1856 }
1857
1858 sourceRow += source.pitchB;
1859 destinationRow += destination.pitchB;
1860 }
1861
1862 sourceSlice += source.sliceB;
1863 destinationSlice += destination.sliceB;
1864 }
1865 }
1866
1867 void Surface::decodeR8G8B8(Buffer &destination, const Buffer &source)
1868 {
1869 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1870 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1871
1872 for(int z = 0; z < destination.depth && z < source.depth; z++)
1873 {
1874 unsigned char *sourceRow = sourceSlice;
1875 unsigned char *destinationRow = destinationSlice;
1876
1877 for(int y = 0; y < destination.height && y < source.height; y++)
1878 {
1879 unsigned char *sourceElement = sourceRow;
1880 unsigned char *destinationElement = destinationRow;
1881
1882 for(int x = 0; x < destination.width && x < source.width; x++)
1883 {
1884 unsigned int b = sourceElement[0];
1885 unsigned int g = sourceElement[1];
1886 unsigned int r = sourceElement[2];
1887
1888 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0);
1889
1890 sourceElement += source.bytes;
1891 destinationElement += destination.bytes;
1892 }
1893
1894 sourceRow += source.pitchB;
1895 destinationRow += destination.pitchB;
1896 }
1897
1898 sourceSlice += source.sliceB;
1899 destinationSlice += destination.sliceB;
1900 }
1901 }
1902
John Bauman89401822014-05-06 15:04:28 -04001903 void Surface::decodeX1R5G5B5(Buffer &destination, const Buffer &source)
1904 {
1905 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1906 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1907
1908 for(int z = 0; z < destination.depth && z < source.depth; z++)
1909 {
1910 unsigned char *sourceRow = sourceSlice;
1911 unsigned char *destinationRow = destinationSlice;
1912
1913 for(int y = 0; y < destination.height && y < source.height; y++)
1914 {
1915 unsigned char *sourceElement = sourceRow;
1916 unsigned char *destinationElement = destinationRow;
1917
1918 for(int x = 0; x < destination.width && x < source.width; x++)
1919 {
1920 unsigned int xrgb = *(unsigned short*)sourceElement;
1921
1922 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
1923 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
1924 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8);
1925
1926 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
1927
1928 sourceElement += source.bytes;
1929 destinationElement += destination.bytes;
1930 }
1931
1932 sourceRow += source.pitchB;
1933 destinationRow += destination.pitchB;
1934 }
1935
1936 sourceSlice += source.sliceB;
1937 destinationSlice += destination.sliceB;
1938 }
1939 }
1940
1941 void Surface::decodeA1R5G5B5(Buffer &destination, const Buffer &source)
1942 {
1943 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1944 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1945
1946 for(int z = 0; z < destination.depth && z < source.depth; z++)
1947 {
1948 unsigned char *sourceRow = sourceSlice;
1949 unsigned char *destinationRow = destinationSlice;
1950
1951 for(int y = 0; y < destination.height && y < source.height; y++)
1952 {
1953 unsigned char *sourceElement = sourceRow;
1954 unsigned char *destinationElement = destinationRow;
1955
1956 for(int x = 0; x < destination.width && x < source.width; x++)
1957 {
1958 unsigned int argb = *(unsigned short*)sourceElement;
1959
1960 unsigned int a = (argb & 0x8000) * 130560;
1961 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
1962 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
1963 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8);
1964
1965 *(unsigned int*)destinationElement = a | r | g | b;
1966
1967 sourceElement += source.bytes;
1968 destinationElement += destination.bytes;
1969 }
1970
1971 sourceRow += source.pitchB;
1972 destinationRow += destination.pitchB;
1973 }
1974
1975 sourceSlice += source.sliceB;
1976 destinationSlice += destination.sliceB;
1977 }
1978 }
1979
1980 void Surface::decodeX4R4G4B4(Buffer &destination, const Buffer &source)
1981 {
1982 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1983 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1984
1985 for(int z = 0; z < destination.depth && z < source.depth; z++)
1986 {
1987 unsigned char *sourceRow = sourceSlice;
1988 unsigned char *destinationRow = destinationSlice;
1989
1990 for(int y = 0; y < destination.height && y < source.height; y++)
1991 {
1992 unsigned char *sourceElement = sourceRow;
1993 unsigned char *destinationElement = destinationRow;
1994
1995 for(int x = 0; x < destination.width && x < source.width; x++)
1996 {
1997 unsigned int xrgb = *(unsigned short*)sourceElement;
1998
1999 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000;
2000 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00;
2001 unsigned int b = (xrgb & 0x000F) * 0x00000011;
2002
2003 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
2004
2005 sourceElement += source.bytes;
2006 destinationElement += destination.bytes;
2007 }
2008
2009 sourceRow += source.pitchB;
2010 destinationRow += destination.pitchB;
2011 }
2012
2013 sourceSlice += source.sliceB;
2014 destinationSlice += destination.sliceB;
2015 }
2016 }
2017
2018 void Surface::decodeA4R4G4B4(Buffer &destination, const Buffer &source)
2019 {
2020 unsigned char *sourceSlice = (unsigned char*)source.buffer;
2021 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
2022
2023 for(int z = 0; z < destination.depth && z < source.depth; z++)
2024 {
2025 unsigned char *sourceRow = sourceSlice;
2026 unsigned char *destinationRow = destinationSlice;
2027
2028 for(int y = 0; y < destination.height && y < source.height; y++)
2029 {
2030 unsigned char *sourceElement = sourceRow;
2031 unsigned char *destinationElement = destinationRow;
2032
2033 for(int x = 0; x < destination.width && x < source.width; x++)
2034 {
2035 unsigned int argb = *(unsigned short*)sourceElement;
2036
2037 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000;
2038 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000;
2039 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00;
2040 unsigned int b = (argb & 0x000F) * 0x00000011;
2041
2042 *(unsigned int*)destinationElement = a | r | g | b;
2043
2044 sourceElement += source.bytes;
2045 destinationElement += destination.bytes;
2046 }
2047
2048 sourceRow += source.pitchB;
2049 destinationRow += destination.pitchB;
2050 }
2051
2052 sourceSlice += source.sliceB;
2053 destinationSlice += destination.sliceB;
2054 }
2055 }
2056
2057 void Surface::decodeP8(Buffer &destination, const Buffer &source)
2058 {
2059 unsigned char *sourceSlice = (unsigned char*)source.buffer;
2060 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
2061
2062 for(int z = 0; z < destination.depth && z < source.depth; z++)
2063 {
2064 unsigned char *sourceRow = sourceSlice;
2065 unsigned char *destinationRow = destinationSlice;
2066
2067 for(int y = 0; y < destination.height && y < source.height; y++)
2068 {
2069 unsigned char *sourceElement = sourceRow;
2070 unsigned char *destinationElement = destinationRow;
2071
2072 for(int x = 0; x < destination.width && x < source.width; x++)
2073 {
2074 unsigned int abgr = palette[*(unsigned char*)sourceElement];
2075
2076 unsigned int r = (abgr & 0x000000FF) << 16;
2077 unsigned int g = (abgr & 0x0000FF00) << 0;
2078 unsigned int b = (abgr & 0x00FF0000) >> 16;
2079 unsigned int a = (abgr & 0xFF000000) >> 0;
2080
2081 *(unsigned int*)destinationElement = a | r | g | b;
2082
2083 sourceElement += source.bytes;
2084 destinationElement += destination.bytes;
2085 }
2086
2087 sourceRow += source.pitchB;
2088 destinationRow += destination.pitchB;
2089 }
2090
2091 sourceSlice += source.sliceB;
2092 destinationSlice += destination.sliceB;
2093 }
2094 }
2095
2096#if S3TC_SUPPORT
2097 void Surface::decodeDXT1(Buffer &internal, const Buffer &external)
2098 {
2099 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002100 const DXT1 *source = (const DXT1*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002101
2102 for(int z = 0; z < external.depth; z++)
2103 {
2104 unsigned int *dest = destSlice;
2105
2106 for(int y = 0; y < external.height; y += 4)
2107 {
2108 for(int x = 0; x < external.width; x += 4)
2109 {
2110 Color<byte> c[4];
2111
2112 c[0] = source->c0;
2113 c[1] = source->c1;
2114
2115 if(source->c0 > source->c1) // No transparency
2116 {
2117 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2118 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2119 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2120 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2121 c[2].a = 0xFF;
2122
2123 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2124 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2125 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2126 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2127 c[3].a = 0xFF;
2128 }
2129 else // c3 transparent
2130 {
2131 // c2 = 1 / 2 * c0 + 1 / 2 * c1
2132 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2);
2133 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2);
2134 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2);
2135 c[2].a = 0xFF;
2136
2137 c[3].r = 0;
2138 c[3].g = 0;
2139 c[3].b = 0;
2140 c[3].a = 0;
2141 }
2142
2143 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2144 {
2145 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2146 {
2147 dest[(x + i) + (y + j) * internal.width] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4];
2148 }
2149 }
2150
2151 source++;
2152 }
2153 }
2154
2155 (byte*&)destSlice += internal.sliceB;
2156 }
2157 }
2158
2159 void Surface::decodeDXT3(Buffer &internal, const Buffer &external)
2160 {
2161 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002162 const DXT3 *source = (const DXT3*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002163
2164 for(int z = 0; z < external.depth; z++)
2165 {
2166 unsigned int *dest = destSlice;
2167
2168 for(int y = 0; y < external.height; y += 4)
2169 {
2170 for(int x = 0; x < external.width; x += 4)
2171 {
2172 Color<byte> c[4];
2173
2174 c[0] = source->c0;
2175 c[1] = source->c1;
2176
2177 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2178 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2179 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2180 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2181
2182 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2183 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2184 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2185 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2186
2187 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2188 {
2189 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2190 {
2191 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F;
2192 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24));
2193
2194 dest[(x + i) + (y + j) * internal.width] = color;
2195 }
2196 }
2197
2198 source++;
2199 }
2200 }
2201
2202 (byte*&)destSlice += internal.sliceB;
2203 }
2204 }
2205
2206 void Surface::decodeDXT5(Buffer &internal, const Buffer &external)
2207 {
2208 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002209 const DXT5 *source = (const DXT5*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002210
2211 for(int z = 0; z < external.depth; z++)
2212 {
2213 unsigned int *dest = destSlice;
2214
2215 for(int y = 0; y < external.height; y += 4)
2216 {
2217 for(int x = 0; x < external.width; x += 4)
2218 {
2219 Color<byte> c[4];
2220
2221 c[0] = source->c0;
2222 c[1] = source->c1;
2223
2224 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2225 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2226 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2227 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2228
2229 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2230 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2231 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2232 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2233
2234 byte a[8];
2235
2236 a[0] = source->a0;
2237 a[1] = source->a1;
2238
2239 if(a[0] > a[1])
2240 {
2241 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7);
2242 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7);
2243 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7);
2244 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7);
2245 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7);
2246 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7);
2247 }
2248 else
2249 {
2250 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5);
2251 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5);
2252 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5);
2253 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5);
2254 a[6] = 0;
2255 a[7] = 0xFF;
2256 }
2257
2258 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2259 {
2260 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2261 {
2262 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24;
2263 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha;
2264
2265 dest[(x + i) + (y + j) * internal.width] = color;
2266 }
2267 }
2268
2269 source++;
2270 }
2271 }
2272
2273 (byte*&)destSlice += internal.sliceB;
2274 }
2275 }
Nicolas Capens22658242014-11-29 00:31:41 -05002276#endif
John Bauman89401822014-05-06 15:04:28 -04002277
2278 void Surface::decodeATI1(Buffer &internal, const Buffer &external)
2279 {
2280 byte *destSlice = (byte*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002281 const ATI1 *source = (const ATI1*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002282
2283 for(int z = 0; z < external.depth; z++)
2284 {
2285 byte *dest = destSlice;
2286
2287 for(int y = 0; y < external.height; y += 4)
2288 {
2289 for(int x = 0; x < external.width; x += 4)
2290 {
2291 byte r[8];
2292
2293 r[0] = source->r0;
2294 r[1] = source->r1;
2295
2296 if(r[0] > r[1])
2297 {
2298 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7);
2299 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7);
2300 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7);
2301 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7);
2302 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7);
2303 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7);
2304 }
2305 else
2306 {
2307 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5);
2308 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5);
2309 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5);
2310 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5);
2311 r[6] = 0;
2312 r[7] = 0xFF;
2313 }
2314
2315 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2316 {
2317 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2318 {
2319 dest[(x + i) + (y + j) * internal.width] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8];
2320 }
2321 }
2322
2323 source++;
2324 }
2325 }
2326
2327 destSlice += internal.sliceB;
2328 }
2329 }
2330
2331 void Surface::decodeATI2(Buffer &internal, const Buffer &external)
2332 {
2333 word *destSlice = (word*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002334 const ATI2 *source = (const ATI2*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002335
2336 for(int z = 0; z < external.depth; z++)
2337 {
2338 word *dest = destSlice;
2339
2340 for(int y = 0; y < external.height; y += 4)
2341 {
2342 for(int x = 0; x < external.width; x += 4)
2343 {
2344 byte X[8];
2345
2346 X[0] = source->x0;
2347 X[1] = source->x1;
2348
2349 if(X[0] > X[1])
2350 {
2351 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7);
2352 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7);
2353 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7);
2354 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7);
2355 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7);
2356 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7);
2357 }
2358 else
2359 {
2360 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5);
2361 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5);
2362 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5);
2363 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5);
2364 X[6] = 0;
2365 X[7] = 0xFF;
2366 }
2367
2368 byte Y[8];
2369
2370 Y[0] = source->y0;
2371 Y[1] = source->y1;
2372
2373 if(Y[0] > Y[1])
2374 {
2375 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7);
2376 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7);
2377 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7);
2378 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7);
2379 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7);
2380 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7);
2381 }
2382 else
2383 {
2384 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5);
2385 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5);
2386 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5);
2387 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5);
2388 Y[6] = 0;
2389 Y[7] = 0xFF;
2390 }
2391
2392 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2393 {
2394 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2395 {
2396 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8];
2397 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8];
2398
2399 dest[(x + i) + (y + j) * internal.width] = (g << 8) + r;
2400 }
2401 }
2402
2403 source++;
2404 }
2405 }
2406
2407 (byte*&)destSlice += internal.sliceB;
2408 }
2409 }
Nicolas Capens22658242014-11-29 00:31:41 -05002410
Alexis Hetu0de50d42015-09-09 13:56:41 -04002411 void Surface::decodeETC2(Buffer &internal, const Buffer &external, int nbAlphaBits, bool isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002412 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002413 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
2414 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB));
Nicolas Capens22658242014-11-29 00:31:41 -05002415
Alexis Hetu0de50d42015-09-09 13:56:41 -04002416 if(isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002417 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002418 static byte sRGBtoLinearTable[256];
2419 static bool sRGBtoLinearTableDirty = true;
2420 if(sRGBtoLinearTableDirty)
Nicolas Capens22658242014-11-29 00:31:41 -05002421 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002422 for(int i = 0; i < 256; i++)
Nicolas Capens22658242014-11-29 00:31:41 -05002423 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002424 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f);
Nicolas Capens22658242014-11-29 00:31:41 -05002425 }
Alexis Hetu0de50d42015-09-09 13:56:41 -04002426 sRGBtoLinearTableDirty = false;
Nicolas Capens22658242014-11-29 00:31:41 -05002427 }
2428
Alexis Hetu0de50d42015-09-09 13:56:41 -04002429 // Perform sRGB conversion in place after decoding
2430 byte* src = (byte*)internal.buffer;
2431 for(int y = 0; y < internal.height; y++)
2432 {
2433 byte* srcRow = src + y * internal.pitchB;
2434 for(int x = 0; x < internal.width; x++)
2435 {
2436 byte* srcPix = srcRow + x * internal.bytes;
2437 for(int i = 0; i < 3; i++)
2438 {
2439 srcPix[i] = sRGBtoLinearTable[srcPix[i]];
2440 }
2441 }
2442 }
Nicolas Capens22658242014-11-29 00:31:41 -05002443 }
2444 }
John Bauman89401822014-05-06 15:04:28 -04002445
Alexis Hetu460e41f2015-09-01 10:58:37 -04002446 void Surface::decodeEAC(Buffer &internal, const Buffer &external, int nbChannels, bool isSigned)
2447 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002448 ASSERT(nbChannels == 1 || nbChannels == 2);
Alexis Hetu460e41f2015-09-01 10:58:37 -04002449
Alexis Hetu0de50d42015-09-09 13:56:41 -04002450 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
2451 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED));
2452
2453 // FIXME: We convert signed data to float, until signed integer internal formats are supported
2454 // This code can be removed if signed ETC2 images are decoded to internal 8 bit signed R/RG formats
2455 if(isSigned)
2456 {
2457 sbyte* src = (sbyte*)internal.buffer;
2458
2459 for(int y = 0; y < internal.height; y++)
2460 {
2461 sbyte* srcRow = src + y * internal.pitchB;
2462 for(int x = internal.width - 1; x >= 0; x--)
2463 {
2464 int dx = x & 0xFFFFFFFC;
2465 int mx = x - dx;
2466 sbyte* srcPix = srcRow + dx * internal.bytes + mx * nbChannels;
2467 float* dstPix = (float*)(srcRow + x * internal.bytes);
2468 for(int c = nbChannels - 1; c >= 0; c--)
2469 {
2470 static const float normalization = 1.0f / 127.875f;
2471 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f);
2472 }
2473 }
2474 }
2475 }
Alexis Hetu460e41f2015-09-01 10:58:37 -04002476 }
2477
2478 void Surface::decodeASTC(Buffer &internal, const Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB)
2479 {
2480 }
2481
John Bauman89401822014-05-06 15:04:28 -04002482 unsigned int Surface::size(int width, int height, int depth, Format format)
2483 {
Nicolas Capens00555c42015-07-21 15:15:30 -04002484 // Dimensions rounded up to multiples of 4, used for compressed formats
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002485 int width4 = align(width, 4);
2486 int height4 = align(height, 4);
John Bauman89401822014-05-06 15:04:28 -04002487
2488 switch(format)
2489 {
2490 #if S3TC_SUPPORT
2491 case FORMAT_DXT1:
John Bauman66b8ab22014-05-06 15:57:45 -04002492 #endif
John Bauman89401822014-05-06 15:04:28 -04002493 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05002494 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002495 case FORMAT_R11_EAC:
2496 case FORMAT_SIGNED_R11_EAC:
2497 case FORMAT_RGB8_ETC2:
2498 case FORMAT_SRGB8_ETC2:
2499 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2500 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04002501 return width4 * height4 * depth / 2;
John Bauman66b8ab22014-05-06 15:57:45 -04002502 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04002503 case FORMAT_DXT3:
2504 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002505 #endif
John Bauman89401822014-05-06 15:04:28 -04002506 case FORMAT_ATI2:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002507 case FORMAT_RG11_EAC:
2508 case FORMAT_SIGNED_RG11_EAC:
2509 case FORMAT_RGBA8_ETC2_EAC:
2510 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
2511 case FORMAT_RGBA_ASTC_4x4_KHR:
2512 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
John Bauman89401822014-05-06 15:04:28 -04002513 return width4 * height4 * depth;
Alexis Hetu460e41f2015-09-01 10:58:37 -04002514 case FORMAT_RGBA_ASTC_5x4_KHR:
2515 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
2516 return align(width, 5) * height4 * depth;
2517 case FORMAT_RGBA_ASTC_5x5_KHR:
2518 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
2519 return align(width, 5) * align(height, 5) * depth;
2520 case FORMAT_RGBA_ASTC_6x5_KHR:
2521 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
2522 return align(width, 6) * align(height, 5) * depth;
2523 case FORMAT_RGBA_ASTC_6x6_KHR:
2524 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
2525 return align(width, 6) * align(height, 6) * depth;
2526 case FORMAT_RGBA_ASTC_8x5_KHR:
2527 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
2528 return align(width, 8) * align(height, 5) * depth;
2529 case FORMAT_RGBA_ASTC_8x6_KHR:
2530 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
2531 return align(width, 8) * align(height, 6) * depth;
2532 case FORMAT_RGBA_ASTC_8x8_KHR:
2533 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
2534 return align(width, 8) * align(height, 8) * depth;
2535 case FORMAT_RGBA_ASTC_10x5_KHR:
2536 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
2537 return align(width, 10) * align(height, 5) * depth;
2538 case FORMAT_RGBA_ASTC_10x6_KHR:
2539 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
2540 return align(width, 10) * align(height, 6) * depth;
2541 case FORMAT_RGBA_ASTC_10x8_KHR:
2542 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
2543 return align(width, 10) * align(height, 8) * depth;
2544 case FORMAT_RGBA_ASTC_10x10_KHR:
2545 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
2546 return align(width, 10) * align(height, 10) * depth;
2547 case FORMAT_RGBA_ASTC_12x10_KHR:
2548 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
2549 return align(width, 12) * align(height, 10) * depth;
2550 case FORMAT_RGBA_ASTC_12x12_KHR:
2551 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
2552 return align(width, 12) * align(height, 12) * depth;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002553 case FORMAT_YV12_BT601:
2554 case FORMAT_YV12_BT709:
2555 case FORMAT_YV12_JFIF:
2556 {
2557 unsigned int YStride = align(width, 16);
2558 unsigned int YSize = YStride * height;
2559 unsigned int CStride = align(YStride / 2, 16);
2560 unsigned int CSize = CStride * height / 2;
2561
2562 return YSize + 2 * CSize;
2563 }
John Bauman89401822014-05-06 15:04:28 -04002564 default:
2565 return bytes(format) * width * height * depth;
2566 }
2567
2568 return 0;
2569 }
2570
2571 bool Surface::isStencil(Format format)
2572 {
2573 switch(format)
2574 {
2575 case FORMAT_D32:
2576 case FORMAT_D16:
2577 case FORMAT_D24X8:
2578 case FORMAT_D32F:
2579 case FORMAT_D32F_COMPLEMENTARY:
2580 case FORMAT_D32F_LOCKABLE:
2581 return false;
2582 case FORMAT_D24S8:
2583 case FORMAT_D24FS8:
2584 case FORMAT_S8:
John Bauman66b8ab22014-05-06 15:57:45 -04002585 case FORMAT_DF24S8:
2586 case FORMAT_DF16S8:
2587 case FORMAT_D32FS8_TEXTURE:
2588 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002589 case FORMAT_INTZ:
2590 return true;
2591 default:
2592 return false;
2593 }
2594 }
2595
2596 bool Surface::isDepth(Format format)
2597 {
2598 switch(format)
2599 {
2600 case FORMAT_D32:
2601 case FORMAT_D16:
2602 case FORMAT_D24X8:
2603 case FORMAT_D24S8:
2604 case FORMAT_D24FS8:
2605 case FORMAT_D32F:
2606 case FORMAT_D32F_COMPLEMENTARY:
2607 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002608 case FORMAT_DF24S8:
2609 case FORMAT_DF16S8:
2610 case FORMAT_D32FS8_TEXTURE:
2611 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002612 case FORMAT_INTZ:
2613 return true;
2614 case FORMAT_S8:
2615 return false;
2616 default:
2617 return false;
2618 }
2619 }
2620
2621 bool Surface::isPalette(Format format)
2622 {
2623 switch(format)
2624 {
2625 case FORMAT_P8:
2626 case FORMAT_A8P8:
2627 return true;
2628 default:
2629 return false;
2630 }
2631 }
2632
2633 bool Surface::isFloatFormat(Format format)
2634 {
2635 switch(format)
2636 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002637 case FORMAT_R5G6B5:
John Bauman89401822014-05-06 15:04:28 -04002638 case FORMAT_X8R8G8B8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002639 case FORMAT_X8B8G8R8I:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002640 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002641 case FORMAT_A8R8G8B8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002642 case FORMAT_A8B8G8R8I:
2643 case FORMAT_R8UI:
2644 case FORMAT_G8R8UI:
2645 case FORMAT_X8B8G8R8UI:
2646 case FORMAT_A8B8G8R8UI:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002647 case FORMAT_A8B8G8R8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002648 case FORMAT_G8R8I:
John Bauman89401822014-05-06 15:04:28 -04002649 case FORMAT_G8R8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002650 case FORMAT_R8I_SNORM:
2651 case FORMAT_G8R8I_SNORM:
2652 case FORMAT_X8B8G8R8I_SNORM:
2653 case FORMAT_A8B8G8R8I_SNORM:
2654 case FORMAT_R16I:
2655 case FORMAT_R16UI:
2656 case FORMAT_G16R16I:
2657 case FORMAT_G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002658 case FORMAT_G16R16:
Alexis Hetu43577b82015-10-21 15:32:16 -04002659 case FORMAT_X16B16G16R16I:
2660 case FORMAT_X16B16G16R16UI:
2661 case FORMAT_A16B16G16R16I:
2662 case FORMAT_A16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002663 case FORMAT_A16B16G16R16:
2664 case FORMAT_V8U8:
2665 case FORMAT_Q8W8V8U8:
2666 case FORMAT_X8L8V8U8:
2667 case FORMAT_V16U16:
2668 case FORMAT_A16W16V16U16:
2669 case FORMAT_Q16W16V16U16:
2670 case FORMAT_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002671 case FORMAT_R8I:
John Bauman89401822014-05-06 15:04:28 -04002672 case FORMAT_R8:
2673 case FORMAT_L8:
2674 case FORMAT_L16:
2675 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002676 case FORMAT_YV12_BT601:
2677 case FORMAT_YV12_BT709:
2678 case FORMAT_YV12_JFIF:
Alexis Hetu43577b82015-10-21 15:32:16 -04002679 case FORMAT_R32I:
2680 case FORMAT_R32UI:
2681 case FORMAT_G32R32I:
2682 case FORMAT_G32R32UI:
2683 case FORMAT_X32B32G32R32I:
2684 case FORMAT_X32B32G32R32UI:
2685 case FORMAT_A32B32G32R32I:
2686 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002687 return false;
2688 case FORMAT_R32F:
2689 case FORMAT_G32R32F:
2690 case FORMAT_A32B32G32R32F:
2691 case FORMAT_D32F:
2692 case FORMAT_D32F_COMPLEMENTARY:
2693 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002694 case FORMAT_D32FS8_TEXTURE:
2695 case FORMAT_D32FS8_SHADOW:
Nicolas Capens80594422015-06-09 16:42:56 -04002696 case FORMAT_L16F:
2697 case FORMAT_A16L16F:
2698 case FORMAT_L32F:
2699 case FORMAT_A32L32F:
John Bauman89401822014-05-06 15:04:28 -04002700 return true;
2701 default:
2702 ASSERT(false);
2703 }
2704
2705 return false;
2706 }
2707
2708 bool Surface::isUnsignedComponent(Format format, int component)
2709 {
2710 switch(format)
2711 {
2712 case FORMAT_NULL:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002713 case FORMAT_R5G6B5:
John Bauman89401822014-05-06 15:04:28 -04002714 case FORMAT_X8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002715 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002716 case FORMAT_A8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002717 case FORMAT_A8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002718 case FORMAT_G8R8:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002719 case FORMAT_R16UI:
John Bauman89401822014-05-06 15:04:28 -04002720 case FORMAT_G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002721 case FORMAT_G16R16UI:
2722 case FORMAT_X16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002723 case FORMAT_A16B16G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002724 case FORMAT_A16B16G16R16UI:
2725 case FORMAT_R32UI:
2726 case FORMAT_G32R32UI:
2727 case FORMAT_X32B32G32R32UI:
2728 case FORMAT_A32B32G32R32UI:
2729 case FORMAT_R8UI:
2730 case FORMAT_G8R8UI:
2731 case FORMAT_X8B8G8R8UI:
2732 case FORMAT_A8B8G8R8UI:
John Bauman89401822014-05-06 15:04:28 -04002733 case FORMAT_D32F:
2734 case FORMAT_D32F_COMPLEMENTARY:
2735 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002736 case FORMAT_D32FS8_TEXTURE:
2737 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002738 case FORMAT_A8:
2739 case FORMAT_R8:
2740 case FORMAT_L8:
2741 case FORMAT_L16:
2742 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002743 case FORMAT_YV12_BT601:
2744 case FORMAT_YV12_BT709:
2745 case FORMAT_YV12_JFIF:
John Bauman89401822014-05-06 15:04:28 -04002746 return true;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002747 case FORMAT_A8B8G8R8I:
2748 case FORMAT_A16B16G16R16I:
2749 case FORMAT_A32B32G32R32I:
2750 case FORMAT_A8B8G8R8I_SNORM:
2751 case FORMAT_Q8W8V8U8:
2752 case FORMAT_Q16W16V16U16:
2753 case FORMAT_A32B32G32R32F:
2754 return false;
2755 case FORMAT_R32F:
2756 case FORMAT_R8I:
2757 case FORMAT_R16I:
2758 case FORMAT_R32I:
2759 case FORMAT_R8I_SNORM:
2760 return component >= 1;
John Bauman89401822014-05-06 15:04:28 -04002761 case FORMAT_V8U8:
2762 case FORMAT_X8L8V8U8:
2763 case FORMAT_V16U16:
John Bauman89401822014-05-06 15:04:28 -04002764 case FORMAT_G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002765 case FORMAT_G8R8I:
2766 case FORMAT_G16R16I:
2767 case FORMAT_G32R32I:
2768 case FORMAT_G8R8I_SNORM:
2769 return component >= 2;
2770 case FORMAT_A16W16V16U16:
2771 case FORMAT_X8B8G8R8I:
2772 case FORMAT_X16B16G16R16I:
2773 case FORMAT_X32B32G32R32I:
2774 case FORMAT_X8B8G8R8I_SNORM:
2775 return component >= 3;
John Bauman89401822014-05-06 15:04:28 -04002776 default:
2777 ASSERT(false);
2778 }
2779
2780 return false;
2781 }
2782
2783 bool Surface::isSRGBreadable(Format format)
2784 {
2785 // Keep in sync with Capabilities::isSRGBreadable
2786 switch(format)
2787 {
2788 case FORMAT_L8:
2789 case FORMAT_A8L8:
2790 case FORMAT_R8G8B8:
2791 case FORMAT_A8R8G8B8:
2792 case FORMAT_X8R8G8B8:
2793 case FORMAT_A8B8G8R8:
2794 case FORMAT_X8B8G8R8:
2795 case FORMAT_R5G6B5:
2796 case FORMAT_X1R5G5B5:
2797 case FORMAT_A1R5G5B5:
2798 case FORMAT_A4R4G4B4:
2799 #if S3TC_SUPPORT
2800 case FORMAT_DXT1:
2801 case FORMAT_DXT3:
2802 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002803 #endif
John Bauman89401822014-05-06 15:04:28 -04002804 case FORMAT_ATI1:
2805 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04002806 return true;
2807 default:
2808 return false;
2809 }
2810
2811 return false;
2812 }
2813
2814 bool Surface::isSRGBwritable(Format format)
2815 {
2816 // Keep in sync with Capabilities::isSRGBwritable
2817 switch(format)
2818 {
2819 case FORMAT_NULL:
2820 case FORMAT_A8R8G8B8:
2821 case FORMAT_X8R8G8B8:
2822 case FORMAT_A8B8G8R8:
2823 case FORMAT_X8B8G8R8:
2824 case FORMAT_R5G6B5:
2825 return true;
2826 default:
2827 return false;
2828 }
2829 }
2830
2831 bool Surface::isCompressed(Format format)
2832 {
2833 switch(format)
2834 {
2835 #if S3TC_SUPPORT
2836 case FORMAT_DXT1:
2837 case FORMAT_DXT3:
2838 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002839 #endif
John Bauman89401822014-05-06 15:04:28 -04002840 case FORMAT_ATI1:
2841 case FORMAT_ATI2:
Nicolas Capens22658242014-11-29 00:31:41 -05002842 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002843 case FORMAT_R11_EAC:
2844 case FORMAT_SIGNED_R11_EAC:
2845 case FORMAT_RG11_EAC:
2846 case FORMAT_SIGNED_RG11_EAC:
2847 case FORMAT_RGB8_ETC2:
2848 case FORMAT_SRGB8_ETC2:
2849 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2850 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2851 case FORMAT_RGBA8_ETC2_EAC:
2852 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
2853 case FORMAT_RGBA_ASTC_4x4_KHR:
2854 case FORMAT_RGBA_ASTC_5x4_KHR:
2855 case FORMAT_RGBA_ASTC_5x5_KHR:
2856 case FORMAT_RGBA_ASTC_6x5_KHR:
2857 case FORMAT_RGBA_ASTC_6x6_KHR:
2858 case FORMAT_RGBA_ASTC_8x5_KHR:
2859 case FORMAT_RGBA_ASTC_8x6_KHR:
2860 case FORMAT_RGBA_ASTC_8x8_KHR:
2861 case FORMAT_RGBA_ASTC_10x5_KHR:
2862 case FORMAT_RGBA_ASTC_10x6_KHR:
2863 case FORMAT_RGBA_ASTC_10x8_KHR:
2864 case FORMAT_RGBA_ASTC_10x10_KHR:
2865 case FORMAT_RGBA_ASTC_12x10_KHR:
2866 case FORMAT_RGBA_ASTC_12x12_KHR:
2867 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
2868 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
2869 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
2870 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
2871 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
2872 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
2873 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
2874 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
2875 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
2876 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
2877 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
2878 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
2879 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
2880 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
John Bauman89401822014-05-06 15:04:28 -04002881 return true;
John Bauman89401822014-05-06 15:04:28 -04002882 default:
2883 return false;
2884 }
2885 }
2886
Alexis Hetu43577b82015-10-21 15:32:16 -04002887 bool Surface::isNonNormalizedInteger(Format format)
2888 {
2889 switch(format)
2890 {
2891 case FORMAT_A8B8G8R8I:
2892 case FORMAT_X8B8G8R8I:
2893 case FORMAT_G8R8I:
2894 case FORMAT_R8I:
2895 case FORMAT_A8B8G8R8UI:
2896 case FORMAT_X8B8G8R8UI:
2897 case FORMAT_G8R8UI:
2898 case FORMAT_R8UI:
2899 case FORMAT_A16B16G16R16I:
2900 case FORMAT_X16B16G16R16I:
2901 case FORMAT_G16R16I:
2902 case FORMAT_R16I:
2903 case FORMAT_A16B16G16R16UI:
2904 case FORMAT_X16B16G16R16UI:
2905 case FORMAT_G16R16UI:
2906 case FORMAT_R16UI:
2907 case FORMAT_A32B32G32R32I:
2908 case FORMAT_X32B32G32R32I:
2909 case FORMAT_G32R32I:
2910 case FORMAT_R32I:
2911 case FORMAT_A32B32G32R32UI:
2912 case FORMAT_X32B32G32R32UI:
2913 case FORMAT_G32R32UI:
2914 case FORMAT_R32UI:
2915 return true;
2916 default:
2917 return false;
2918 }
2919 }
2920
John Bauman89401822014-05-06 15:04:28 -04002921 int Surface::componentCount(Format format)
2922 {
2923 switch(format)
2924 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002925 case FORMAT_R5G6B5: return 3;
2926 case FORMAT_X8R8G8B8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002927 case FORMAT_X8B8G8R8I: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002928 case FORMAT_X8B8G8R8: return 3;
2929 case FORMAT_A8R8G8B8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002930 case FORMAT_A8B8G8R8I: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002931 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002932 case FORMAT_G8R8I: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002933 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002934 case FORMAT_R8I_SNORM: return 1;
2935 case FORMAT_G8R8I_SNORM: return 2;
2936 case FORMAT_X8B8G8R8I_SNORM:return 3;
2937 case FORMAT_A8B8G8R8I_SNORM:return 4;
2938 case FORMAT_R8UI: return 1;
2939 case FORMAT_G8R8UI: return 2;
2940 case FORMAT_X8B8G8R8UI: return 3;
2941 case FORMAT_A8B8G8R8UI: return 4;
2942 case FORMAT_G16R16I: return 2;
2943 case FORMAT_G16R16UI: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002944 case FORMAT_G16R16: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002945 case FORMAT_G32R32I: return 2;
2946 case FORMAT_G32R32UI: return 2;
2947 case FORMAT_X16B16G16R16I: return 3;
2948 case FORMAT_X16B16G16R16UI: return 3;
2949 case FORMAT_A16B16G16R16I: return 4;
2950 case FORMAT_A16B16G16R16UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002951 case FORMAT_A16B16G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002952 case FORMAT_X32B32G32R32I: return 3;
2953 case FORMAT_X32B32G32R32UI: return 3;
2954 case FORMAT_A32B32G32R32I: return 4;
2955 case FORMAT_A32B32G32R32UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002956 case FORMAT_V8U8: return 2;
2957 case FORMAT_Q8W8V8U8: return 4;
2958 case FORMAT_X8L8V8U8: return 3;
2959 case FORMAT_V16U16: return 2;
2960 case FORMAT_A16W16V16U16: return 4;
2961 case FORMAT_Q16W16V16U16: return 4;
2962 case FORMAT_R32F: return 1;
2963 case FORMAT_G32R32F: return 2;
2964 case FORMAT_A32B32G32R32F: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002965 case FORMAT_D32F: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002966 case FORMAT_D32F_LOCKABLE: return 1;
2967 case FORMAT_D32FS8_TEXTURE: return 1;
2968 case FORMAT_D32FS8_SHADOW: return 1;
2969 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002970 case FORMAT_R8I: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002971 case FORMAT_R8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002972 case FORMAT_R16I: return 1;
2973 case FORMAT_R16UI: return 1;
2974 case FORMAT_R32I: return 1;
2975 case FORMAT_R32UI: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002976 case FORMAT_L8: return 1;
2977 case FORMAT_L16: return 1;
2978 case FORMAT_A8L8: return 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002979 case FORMAT_YV12_BT601: return 3;
2980 case FORMAT_YV12_BT709: return 3;
2981 case FORMAT_YV12_JFIF: return 3;
John Bauman89401822014-05-06 15:04:28 -04002982 default:
2983 ASSERT(false);
2984 }
2985
2986 return 1;
2987 }
2988
2989 void *Surface::allocateBuffer(int width, int height, int depth, Format format)
2990 {
Nicolas Capensdb17b5d2015-06-26 11:15:58 -04002991 // Render targets require 2x2 quads
2992 int width2 = (width + 1) & ~1;
2993 int height2 = (height + 1) & ~1;
John Bauman89401822014-05-06 15:04:28 -04002994
Nicolas Capens6ea71872015-06-26 13:00:48 -04002995 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
2996 // so we have to allocate 4 extra bytes to avoid buffer overruns.
2997 return allocateZero(size(width2, height2, depth, format) + 4);
John Bauman89401822014-05-06 15:04:28 -04002998 }
2999
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003000 void Surface::memfill4(void *buffer, int pattern, int bytes)
John Bauman89401822014-05-06 15:04:28 -04003001 {
3002 while((size_t)buffer & 0x1 && bytes >= 1)
3003 {
3004 *(char*)buffer = (char)pattern;
3005 (char*&)buffer += 1;
3006 bytes -= 1;
3007 }
3008
3009 while((size_t)buffer & 0x3 && bytes >= 2)
3010 {
3011 *(short*)buffer = (short)pattern;
3012 (short*&)buffer += 1;
3013 bytes -= 2;
3014 }
3015
3016 if(CPUID::supportsSSE())
3017 {
3018 while((size_t)buffer & 0xF && bytes >= 4)
3019 {
3020 *(int*)buffer = pattern;
3021 (int*&)buffer += 1;
3022 bytes -= 4;
3023 }
3024
3025 __m128 quad = _mm_set_ps1((float&)pattern);
3026
3027 float *pointer = (float*)buffer;
3028 int qxwords = bytes / 64;
3029 bytes -= qxwords * 64;
3030
3031 while(qxwords--)
3032 {
3033 _mm_stream_ps(pointer + 0, quad);
3034 _mm_stream_ps(pointer + 4, quad);
3035 _mm_stream_ps(pointer + 8, quad);
3036 _mm_stream_ps(pointer + 12, quad);
3037
3038 pointer += 16;
3039 }
3040
3041 buffer = pointer;
3042 }
3043
3044 while(bytes >= 4)
3045 {
3046 *(int*)buffer = (int)pattern;
3047 (int*&)buffer += 1;
3048 bytes -= 4;
3049 }
3050
3051 while(bytes >= 2)
3052 {
3053 *(short*)buffer = (short)pattern;
3054 (short*&)buffer += 1;
3055 bytes -= 2;
3056 }
3057
3058 while(bytes >= 1)
3059 {
3060 *(char*)buffer = (char)pattern;
3061 (char*&)buffer += 1;
3062 bytes -= 1;
3063 }
3064 }
3065
Nicolas Capens66747262015-09-22 12:26:30 -04003066 void Surface::clearColorBuffer(float red, float green, float blue, float alpha, unsigned int rgbaMask, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04003067 {
3068 // FIXME: Also clear buffers in other formats?
3069
3070 // Not overlapping
3071 if(x0 > internal.width) return;
3072 if(y0 > internal.height) return;
3073 if(x0 + width < 0) return;
3074 if(y0 + height < 0) return;
3075
3076 // Clip against dimensions
3077 if(x0 < 0) {width += x0; x0 = 0;}
3078 if(x0 + width > internal.width) width = internal.width - x0;
3079 if(y0 < 0) {height += y0; y0 = 0;}
3080 if(y0 + height > internal.height) height = internal.height - y0;
3081
3082 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height;
3083 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY;
3084
John Bauman89401822014-05-06 15:04:28 -04003085 int x1 = x0 + width;
3086 int y1 = y0 + height;
3087
John Bauman89401822014-05-06 15:04:28 -04003088 // if(lockable || !quadLayoutEnabled)
3089 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003090 unsigned char *buffer = (unsigned char*)lockInternal(x0, y0, 0, lock, PUBLIC);
John Bauman89401822014-05-06 15:04:28 -04003091
3092 for(int z = 0; z < internal.depth; z++)
3093 {
3094 unsigned char *target = buffer;
3095
3096 for(int y = y0; y < y1; y++)
3097 {
3098 switch(internal.format)
3099 {
3100 case FORMAT_NULL:
3101 break;
3102 case FORMAT_X8R8G8B8:
3103 case FORMAT_A8R8G8B8:
3104 // case FORMAT_X8G8R8B8Q: // FIXME
3105 // case FORMAT_A8G8R8B8Q: // FIXME
John Bauman89401822014-05-06 15:04:28 -04003106 {
Nicolas Capens66747262015-09-22 12:26:30 -04003107 unsigned char r8 = iround(red * 0xFF);
3108 unsigned char g8 = iround(green * 0xFF);
3109 unsigned char b8 = iround(blue * 0xFF);
3110 unsigned char a8 = iround(alpha * 0xFF);
3111 unsigned char a8r8g8b8[4] = {b8, g8, r8, a8};
3112 unsigned int colorARGB = (unsigned int&)a8r8g8b8;
John Bauman89401822014-05-06 15:04:28 -04003113
Nicolas Capens66747262015-09-22 12:26:30 -04003114 if(rgbaMask == 0xF || (internal.format == FORMAT_X8R8G8B8 && rgbaMask == 0x7))
John Bauman89401822014-05-06 15:04:28 -04003115 {
Nicolas Capens66747262015-09-22 12:26:30 -04003116 memfill4(target, colorARGB, 4 * (x1 - x0));
3117 }
3118 else
3119 {
3120 unsigned int bgraMask = (rgbaMask & 0x1 ? 0x00FF0000 : 0) | (rgbaMask & 0x2 ? 0x0000FF00 : 0) | (rgbaMask & 0x4 ? 0x000000FF : 0) | (rgbaMask & 0x8 ? 0xFF000000 : 0);
3121 unsigned int invMask = ~bgraMask;
3122 unsigned int maskedColor = colorARGB & bgraMask;
3123 unsigned int *target32 = (unsigned int*)target;
3124
3125 for(int x = 0; x < width; x++)
3126 {
3127 target32[x] = maskedColor | (target32[x] & invMask);
3128 }
John Bauman89401822014-05-06 15:04:28 -04003129 }
3130 }
3131 break;
Nicolas Capensef77ac12015-03-28 21:48:51 -04003132 case FORMAT_X8B8G8R8:
3133 case FORMAT_A8B8G8R8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003134 {
Nicolas Capens66747262015-09-22 12:26:30 -04003135 unsigned char r8 = iround(red * 0xFF);
3136 unsigned char g8 = iround(green * 0xFF);
3137 unsigned char b8 = iround(blue * 0xFF);
3138 unsigned char a8 = iround(alpha * 0xFF);
Nicolas Capensd61d3a72015-05-26 10:56:05 -04003139 unsigned char a8b8g8r8[4] = {r8, g8, b8, a8};
3140 unsigned int colorABGR = (unsigned int&)a8b8g8r8;
Nicolas Capensef77ac12015-03-28 21:48:51 -04003141
Nicolas Capensd61d3a72015-05-26 10:56:05 -04003142 if(rgbaMask == 0xF || (internal.format == FORMAT_X8B8G8R8 && rgbaMask == 0x7))
Nicolas Capensef77ac12015-03-28 21:48:51 -04003143 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04003144 memfill4(target, colorABGR, 4 * (x1 - x0));
3145 }
3146 else
3147 {
3148 unsigned int rgbaMask32 = (rgbaMask & 0x1 ? 0x000000FF : 0) | (rgbaMask & 0x2 ? 0x0000FF00 : 0) | (rgbaMask & 0x4 ? 0x00FF0000 : 0) | (rgbaMask & 0x8 ? 0xFF000000 : 0);
3149 unsigned int invMask = ~rgbaMask32;
3150 unsigned int maskedColor = colorABGR & rgbaMask32;
3151 unsigned int *target32 = (unsigned int*)target;
3152
3153 for(int x = 0; x < width; x++)
3154 {
3155 target32[x] = maskedColor | (target32[x] & invMask);
3156 }
Nicolas Capensef77ac12015-03-28 21:48:51 -04003157 }
3158 }
3159 break;
John Bauman89401822014-05-06 15:04:28 -04003160 case FORMAT_G8R8:
John Bauman89401822014-05-06 15:04:28 -04003161 {
Nicolas Capens66747262015-09-22 12:26:30 -04003162 unsigned char r8 = iround(red * 0xFF);
3163 unsigned char g8 = iround(green * 0xFF);
Nicolas Capensd61d3a72015-05-26 10:56:05 -04003164 unsigned char g8r8[4] = {r8, g8, r8, g8};
John Bauman89401822014-05-06 15:04:28 -04003165
Nicolas Capensd61d3a72015-05-26 10:56:05 -04003166 if((rgbaMask & 0x3) == 0x3)
John Bauman89401822014-05-06 15:04:28 -04003167 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04003168 memfill4(target, (int&)g8r8, 2 * (x1 - x0));
3169 }
3170 else
3171 {
3172 unsigned short rgMask = (rgbaMask & 0x1 ? 0x000000FF : 0) | (rgbaMask & 0x2 ? 0x0000FF00 : 0);
3173 unsigned short invMask = ~rgMask;
3174 unsigned short maskedColor = (unsigned short&)g8r8 & rgMask;
3175 unsigned short *target16 = (unsigned short*)target;
3176
3177 for(int x = 0; x < width; x++)
3178 {
3179 target16[x] = maskedColor | (target16[x] & invMask);
3180 }
John Bauman89401822014-05-06 15:04:28 -04003181 }
3182 }
3183 break;
3184 case FORMAT_G16R16:
John Bauman89401822014-05-06 15:04:28 -04003185 {
Nicolas Capens66747262015-09-22 12:26:30 -04003186 unsigned char r16 = iround(red * 0xFFFF);
3187 unsigned char g16 = iround(green * 0xFFFF);
Nicolas Capensd61d3a72015-05-26 10:56:05 -04003188 unsigned short g16r16[2] = {r16, g16};
John Bauman89401822014-05-06 15:04:28 -04003189
Nicolas Capensd61d3a72015-05-26 10:56:05 -04003190 if((rgbaMask & 0x3) == 0x3)
John Bauman89401822014-05-06 15:04:28 -04003191 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04003192 memfill4(target, (int&)g16r16, 4 * (x1 - x0));
3193 }
3194 else
3195 {
3196 unsigned int rgMask = (rgbaMask & 0x1 ? 0x0000FFFF : 0) | (rgbaMask & 0x2 ? 0xFFFF0000 : 0);
3197 unsigned int invMask = ~rgMask;
3198 unsigned int maskedColor = (unsigned int&)g16r16 & rgMask;
3199 unsigned int *target32 = (unsigned int*)target;
3200
3201 for(int x = 0; x < width; x++)
3202 {
3203 target32[x] = maskedColor | (target32[x] & invMask);
3204 }
John Bauman89401822014-05-06 15:04:28 -04003205 }
3206 }
3207 break;
3208 case FORMAT_A16B16G16R16:
John Bauman89401822014-05-06 15:04:28 -04003209 {
Nicolas Capens66747262015-09-22 12:26:30 -04003210 unsigned char r16 = iround(red * 0xFFFF);
3211 unsigned char g16 = iround(green * 0xFFFF);
3212 unsigned char b16 = iround(blue * 0xFFFF);
3213 unsigned char a16 = iround(alpha * 0xFFFF);
Nicolas Capensd61d3a72015-05-26 10:56:05 -04003214
3215 if(rgbaMask == 0xF)
John Bauman89401822014-05-06 15:04:28 -04003216 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04003217 for(int x = 0; x < width; x++)
3218 {
3219 ((unsigned short*)target)[4 * x + 0] = r16;
3220 ((unsigned short*)target)[4 * x + 1] = g16;
3221 ((unsigned short*)target)[4 * x + 2] = b16;
3222 ((unsigned short*)target)[4 * x + 3] = a16;
3223 }
John Bauman89401822014-05-06 15:04:28 -04003224 }
Nicolas Capensd61d3a72015-05-26 10:56:05 -04003225 else
3226 {
3227 if(rgbaMask & 0x1) for(int x = 0; x < width; x++) ((unsigned short*)target)[4 * x + 0] = r16;
3228 if(rgbaMask & 0x2) for(int x = 0; x < width; x++) ((unsigned short*)target)[4 * x + 1] = g16;
3229 if(rgbaMask & 0x4) for(int x = 0; x < width; x++) ((unsigned short*)target)[4 * x + 2] = b16;
3230 if(rgbaMask & 0x8) for(int x = 0; x < width; x++) ((unsigned short*)target)[4 * x + 3] = a16;
3231 }
John Bauman89401822014-05-06 15:04:28 -04003232 }
3233 break;
3234 case FORMAT_R32F:
3235 if(rgbaMask & 0x1)
3236 {
3237 for(int x = 0; x < width; x++)
3238 {
Nicolas Capens66747262015-09-22 12:26:30 -04003239 ((float*)target)[x] = red;
John Bauman89401822014-05-06 15:04:28 -04003240 }
3241 }
3242 break;
3243 case FORMAT_G32R32F:
Nicolas Capens66747262015-09-22 12:26:30 -04003244 if((rgbaMask & 0x3) == 0x3)
John Bauman89401822014-05-06 15:04:28 -04003245 {
Nicolas Capens66747262015-09-22 12:26:30 -04003246 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04003247 {
Nicolas Capens66747262015-09-22 12:26:30 -04003248 ((float*)target)[2 * x + 0] = red;
3249 ((float*)target)[2 * x + 1] = green;
John Bauman89401822014-05-06 15:04:28 -04003250 }
Nicolas Capens66747262015-09-22 12:26:30 -04003251 }
3252 else
3253 {
3254 if(rgbaMask & 0x1) for(int x = 0; x < width; x++) ((float*)target)[2 * x + 0] = red;
3255 if(rgbaMask & 0x2) for(int x = 0; x < width; x++) ((float*)target)[2 * x + 1] = green;
John Bauman89401822014-05-06 15:04:28 -04003256 }
3257 break;
3258 case FORMAT_A32B32G32R32F:
Nicolas Capens66747262015-09-22 12:26:30 -04003259 if(rgbaMask == 0xF)
John Bauman89401822014-05-06 15:04:28 -04003260 {
Nicolas Capens66747262015-09-22 12:26:30 -04003261 for(int x = 0; x < width; x++)
John Bauman89401822014-05-06 15:04:28 -04003262 {
Nicolas Capens66747262015-09-22 12:26:30 -04003263 ((float*)target)[4 * x + 0] = red;
3264 ((float*)target)[4 * x + 1] = green;
3265 ((float*)target)[4 * x + 2] = blue;
3266 ((float*)target)[4 * x + 3] = alpha;
John Bauman89401822014-05-06 15:04:28 -04003267 }
Nicolas Capens66747262015-09-22 12:26:30 -04003268 }
3269 else
3270 {
3271 if(rgbaMask & 0x1) for(int x = 0; x < width; x++) ((float*)target)[4 * x + 0] = red;
3272 if(rgbaMask & 0x2) for(int x = 0; x < width; x++) ((float*)target)[4 * x + 1] = green;
3273 if(rgbaMask & 0x4) for(int x = 0; x < width; x++) ((float*)target)[4 * x + 2] = blue;
3274 if(rgbaMask & 0x8) for(int x = 0; x < width; x++) ((float*)target)[4 * x + 3] = alpha;
John Bauman89401822014-05-06 15:04:28 -04003275 }
3276 break;
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003277 case FORMAT_R5G6B5:
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003278 {
Nicolas Capens66747262015-09-22 12:26:30 -04003279 unsigned int r5 = iround(red * 0x1F);
3280 unsigned int g6 = iround(green * 0x3F);
3281 unsigned int b5 = iround(blue * 0x1F);
3282 unsigned int r5g6b5 = (r5 << 11) | (g6 << 5) | b5;
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003283
Nicolas Capensd61d3a72015-05-26 10:56:05 -04003284 if((rgbaMask & 0x7) == 0x7)
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003285 {
Nicolas Capensd61d3a72015-05-26 10:56:05 -04003286 unsigned int r5g6b5r5g6b5 = r5g6b5 | (r5g6b5 << 16);
3287 memfill4(target, r5g6b5r5g6b5, 2 * (x1 - x0));
3288 }
3289 else
3290 {
3291 unsigned short rgbMask = (rgbaMask & 0x1 ? 0xF800 : 0) | (rgbaMask & 0x2 ? 0x07E0 : 0) | (rgbaMask & 0x3 ? 0x001F : 0);
3292 unsigned short invMask = ~rgbMask;
3293 unsigned short maskedColor = r5g6b5 & rgbMask;
3294 unsigned short *target16 = (unsigned short*)target;
3295
3296 for(int x = 0; x < width; x++)
3297 {
3298 target16[x] = maskedColor | (target16[x] & invMask);
3299 }
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003300 }
3301 }
3302 break;
John Bauman89401822014-05-06 15:04:28 -04003303 default:
3304 ASSERT(false);
3305 }
3306
3307 target += internal.pitchB;
3308 }
3309
3310 buffer += internal.sliceB;
3311 }
3312
3313 unlockInternal();
3314 }
3315 /* else
3316 {
Alexis Hetu0085c442015-06-12 15:19:20 -04003317 int width2 = (internal.width + 1) & ~1;
3318
John Bauman89401822014-05-06 15:04:28 -04003319 // unsigned char *target = (unsigned char*&)buffer;
3320 //
3321 // for(int y = y0; y < y1; y++)
3322 // {
3323 // for(int x = x0; x < x1; x++)
3324 // {
3325 // target[width2 * 4 * (y & ~1) + 2 * (y & 1) + 8 * (x & ~1) + (x & 1) + 0] = (color & 0x000000FF) >> 0;
3326 // target[width2 * 4 * (y & ~1) + 2 * (y & 1) + 8 * (x & ~1) + (x & 1) + 4] = (color & 0x00FF0000) >> 16;
3327 // target[width2 * 4 * (y & ~1) + 2 * (y & 1) + 8 * (x & ~1) + (x & 1) + 8] = (color & 0x0000FF00) >> 8;
3328 // target[width2 * 4 * (y & ~1) + 2 * (y & 1) + 8 * (x & ~1) + (x & 1) + 12] = (color & 0xFF000000) >> 24;
3329 // }
3330 // }
3331
3332 unsigned char colorQ[16];
3333
3334 colorQ[0] = (color & 0x000000FF) >> 0;
3335 colorQ[1] = (color & 0x000000FF) >> 0;
3336 colorQ[2] = (color & 0x000000FF) >> 0;
3337 colorQ[3] = (color & 0x000000FF) >> 0;
3338 colorQ[4] = (color & 0x00FF0000) >> 16;
3339 colorQ[5] = (color & 0x00FF0000) >> 16;
3340 colorQ[6] = (color & 0x00FF0000) >> 16;
3341 colorQ[7] = (color & 0x00FF0000) >> 16;
3342 colorQ[8] = (color & 0x0000FF00) >> 8;
3343 colorQ[9] = (color & 0x0000FF00) >> 8;
3344 colorQ[10] = (color & 0x0000FF00) >> 8;
3345 colorQ[11] = (color & 0x0000FF00) >> 8;
3346 colorQ[12] = (color & 0xFF000000) >> 24;
3347 colorQ[13] = (color & 0xFF000000) >> 24;
3348 colorQ[14] = (color & 0xFF000000) >> 24;
3349 colorQ[15] = (color & 0xFF000000) >> 24;
3350
3351 for(int y = y0; y < y1; y++)
3352 {
3353 unsigned char *target = (unsigned char*)lockInternal(0, 0, 0, lock) + width2 * 4 * (y & ~1) + 2 * (y & 1); // FIXME: Unlock
3354
3355 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once
3356 {
3357 if((x0 & 1) != 0)
3358 {
3359 target[8 * (x0 & ~1) + 1 + 0] = (color & 0x000000FF) >> 0;
3360 target[8 * (x0 & ~1) + 1 + 4] = (color & 0x00FF0000) >> 16;
3361 target[8 * (x0 & ~1) + 1 + 8] = (color & 0x0000FF00) >> 8;
3362 target[8 * (x0 & ~1) + 1 + 12] = (color & 0xFF000000) >> 24;
3363
3364 target[8 * (x0 & ~1) + 3 + 0] = (color & 0x000000FF) >> 0;
3365 target[8 * (x0 & ~1) + 3 + 4] = (color & 0x00FF0000) >> 16;
3366 target[8 * (x0 & ~1) + 3 + 8] = (color & 0x0000FF00) >> 8;
3367 target[8 * (x0 & ~1) + 3 + 12] = (color & 0xFF000000) >> 24;
3368 }
3369
3370 __asm
3371 {
3372 movq mm0, colorQ+0
3373 movq mm1, colorQ+8
3374
3375 mov eax, x0
3376 add eax, 1
3377 and eax, 0xFFFFFFFE
3378 cmp eax, x1
3379 jge qEnd
3380
3381 mov edi, target
3382
3383 qLoop:
3384 movntq [edi+8*eax+0], mm0
3385 movntq [edi+8*eax+8], mm1
3386
3387 add eax, 2
3388 cmp eax, x1
3389 jl qLoop
3390 qEnd:
3391 emms
3392 }
3393
3394 if((x1 & 1) != 0)
3395 {
3396 target[8 * (x1 & ~1) + 0 + 0] = (color & 0x000000FF) >> 0;
3397 target[8 * (x1 & ~1) + 0 + 4] = (color & 0x00FF0000) >> 16;
3398 target[8 * (x1 & ~1) + 0 + 8] = (color & 0x0000FF00) >> 8;
3399 target[8 * (x1 & ~1) + 0 + 12] = (color & 0xFF000000) >> 24;
3400
3401 target[8 * (x1 & ~1) + 2 + 0] = (color & 0x000000FF) >> 0;
3402 target[8 * (x1 & ~1) + 2 + 4] = (color & 0x00FF0000) >> 16;
3403 target[8 * (x1 & ~1) + 2 + 8] = (color & 0x0000FF00) >> 8;
3404 target[8 * (x1 & ~1) + 2 + 12] = (color & 0xFF000000) >> 24;
3405 }
3406
3407 y++;
3408 }
3409 else
3410 {
3411 for(int x = x0; x < x1; x++)
3412 {
3413 target[8 * (x & ~1) + (x & 1) + 0] = (color & 0x000000FF) >> 0;
3414 target[8 * (x & ~1) + (x & 1) + 4] = (color & 0x00FF0000) >> 16;
3415 target[8 * (x & ~1) + (x & 1) + 8] = (color & 0x0000FF00) >> 8;
3416 target[8 * (x & ~1) + (x & 1) + 12] = (color & 0xFF000000) >> 24;
3417 }
3418 }
3419 }
3420 }*/
3421 }
3422
3423 void Surface::clearDepthBuffer(float depth, int x0, int y0, int width, int height)
3424 {
3425 // Not overlapping
3426 if(x0 > internal.width) return;
3427 if(y0 > internal.height) return;
3428 if(x0 + width < 0) return;
3429 if(y0 + height < 0) return;
3430
3431 // Clip against dimensions
3432 if(x0 < 0) {width += x0; x0 = 0;}
3433 if(x0 + width > internal.width) width = internal.width - x0;
3434 if(y0 < 0) {height += y0; y0 = 0;}
3435 if(y0 + height > internal.height) height = internal.height - y0;
3436
3437 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height;
3438 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY;
3439
3440 int width2 = (internal.width + 1) & ~1;
3441
3442 int x1 = x0 + width;
3443 int y1 = y0 + height;
3444
3445 if(internal.format == FORMAT_D32F_LOCKABLE ||
John Bauman66b8ab22014-05-06 15:57:45 -04003446 internal.format == FORMAT_D32FS8_TEXTURE ||
3447 internal.format == FORMAT_D32FS8_SHADOW)
John Bauman89401822014-05-06 15:04:28 -04003448 {
3449 float *target = (float*)lockInternal(0, 0, 0, lock, PUBLIC) + x0 + width2 * y0;
3450
3451 for(int z = 0; z < internal.depth; z++)
3452 {
3453 for(int y = y0; y < y1; y++)
3454 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003455 memfill4(target, (int&)depth, 4 * width);
John Bauman89401822014-05-06 15:04:28 -04003456 target += width2;
3457 }
3458 }
3459
3460 unlockInternal();
3461 }
3462 else // Quad layout
3463 {
3464 if(complementaryDepthBuffer)
3465 {
3466 depth = 1 - depth;
3467 }
3468
3469 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC);
3470
3471 for(int z = 0; z < internal.depth; z++)
3472 {
3473 for(int y = y0; y < y1; y++)
3474 {
3475 float *target = buffer + (y & ~1) * width2 + (y & 1) * 2;
3476
3477 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once
3478 {
3479 if((x0 & 1) != 0)
3480 {
3481 target[(x0 & ~1) * 2 + 1] = depth;
3482 target[(x0 & ~1) * 2 + 3] = depth;
3483 }
3484
3485 // for(int x2 = ((x0 + 1) & ~1) * 2; x2 < x1 * 2; x2 += 4)
3486 // {
3487 // target[x2 + 0] = depth;
3488 // target[x2 + 1] = depth;
3489 // target[x2 + 2] = depth;
3490 // target[x2 + 3] = depth;
3491 // }
3492
3493 // __asm
3494 // {
3495 // movss xmm0, depth
3496 // shufps xmm0, xmm0, 0x00
3497 //
3498 // mov eax, x0
3499 // add eax, 1
3500 // and eax, 0xFFFFFFFE
3501 // cmp eax, x1
3502 // jge qEnd
3503 //
3504 // mov edi, target
3505 //
3506 // qLoop:
3507 // movntps [edi+8*eax], xmm0
3508 //
3509 // add eax, 2
3510 // cmp eax, x1
3511 // jl qLoop
3512 // qEnd:
3513 // }
3514
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003515 memfill4(&target[((x0 + 1) & ~1) * 2], (int&)depth, 8 * ((x1 & ~1) - ((x0 + 1) & ~1)));
John Bauman89401822014-05-06 15:04:28 -04003516
3517 if((x1 & 1) != 0)
3518 {
3519 target[(x1 & ~1) * 2 + 0] = depth;
3520 target[(x1 & ~1) * 2 + 2] = depth;
3521 }
3522
3523 y++;
3524 }
3525 else
3526 {
3527 for(int x = x0; x < x1; x++)
3528 {
3529 target[(x & ~1) * 2 + (x & 1)] = depth;
3530 }
3531 }
3532 }
3533
3534 buffer += internal.sliceP;
3535 }
3536
3537 unlockInternal();
3538 }
3539 }
3540
3541 void Surface::clearStencilBuffer(unsigned char s, unsigned char mask, int x0, int y0, int width, int height)
3542 {
3543 // Not overlapping
3544 if(x0 > internal.width) return;
3545 if(y0 > internal.height) return;
3546 if(x0 + width < 0) return;
3547 if(y0 + height < 0) return;
3548
3549 // Clip against dimensions
3550 if(x0 < 0) {width += x0; x0 = 0;}
3551 if(x0 + width > internal.width) width = internal.width - x0;
3552 if(y0 < 0) {height += y0; y0 = 0;}
3553 if(y0 + height > internal.height) height = internal.height - y0;
3554
3555 int width2 = (internal.width + 1) & ~1;
3556
3557 int x1 = x0 + width;
3558 int y1 = y0 + height;
3559
3560 unsigned char maskedS = s & mask;
3561 unsigned char invMask = ~mask;
3562 unsigned int fill = maskedS;
3563 fill = fill | (fill << 8) | (fill << 16) + (fill << 24);
3564
3565 if(false)
3566 {
3567 char *target = (char*)lockStencil(0, PUBLIC) + x0 + width2 * y0;
3568
3569 for(int z = 0; z < stencil.depth; z++)
3570 {
3571 for(int y = y0; y < y0 + height; y++)
3572 {
3573 if(mask == 0xFF)
3574 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003575 memfill4(target, fill, width);
John Bauman89401822014-05-06 15:04:28 -04003576 }
3577 else
3578 {
3579 for(int x = 0; x < width; x++)
3580 {
3581 target[x] = maskedS | (target[x] & invMask);
3582 }
3583 }
3584
3585 target += width2;
3586 }
3587 }
3588
3589 unlockStencil();
3590 }
3591 else // Quad layout
3592 {
3593 char *buffer = (char*)lockStencil(0, PUBLIC);
3594
3595 if(mask == 0xFF)
3596 {
3597 for(int z = 0; z < stencil.depth; z++)
3598 {
3599 for(int y = y0; y < y1; y++)
3600 {
3601 char *target = buffer + (y & ~1) * width2 + (y & 1) * 2;
3602
3603 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once
3604 {
3605 if((x0 & 1) != 0)
3606 {
3607 target[(x0 & ~1) * 2 + 1] = fill;
3608 target[(x0 & ~1) * 2 + 3] = fill;
3609 }
3610
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003611 memfill4(&target[((x0 + 1) & ~1) * 2], fill, ((x1 + 1) & ~1) * 2 - ((x0 + 1) & ~1) * 2);
John Bauman89401822014-05-06 15:04:28 -04003612
3613 if((x1 & 1) != 0)
3614 {
3615 target[(x1 & ~1) * 2 + 0] = fill;
3616 target[(x1 & ~1) * 2 + 2] = fill;
3617 }
3618
3619 y++;
3620 }
3621 else
3622 {
3623 for(int x = x0; x < x1; x++)
3624 {
3625 target[(x & ~1) * 2 + (x & 1)] = maskedS | (target[x] & invMask);
3626 }
3627 }
3628 }
3629
3630 buffer += stencil.sliceP;
3631 }
3632 }
3633
3634 unlockStencil();
3635 }
3636 }
3637
3638 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height)
3639 {
3640 unsigned char *row;
3641 Buffer *buffer;
3642
3643 if(internal.dirty)
3644 {
3645 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3646 buffer = &internal;
3647 }
3648 else
3649 {
3650 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3651 buffer = &external;
3652 }
3653
3654 if(buffer->bytes <= 4)
3655 {
3656 int c;
3657 buffer->write(&c, color);
3658
3659 if(buffer->bytes <= 1) c = (c << 8) | c;
3660 if(buffer->bytes <= 2) c = (c << 16) | c;
3661
3662 for(int y = 0; y < height; y++)
3663 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003664 memfill4(row, c, width * buffer->bytes);
John Bauman89401822014-05-06 15:04:28 -04003665
3666 row += buffer->pitchB;
3667 }
3668 }
3669 else // Generic
3670 {
3671 for(int y = 0; y < height; y++)
3672 {
3673 unsigned char *element = row;
3674
3675 for(int x = 0; x < width; x++)
3676 {
3677 buffer->write(element, color);
3678
3679 element += buffer->bytes;
3680 }
3681
3682 row += buffer->pitchB;
3683 }
3684 }
3685
3686 if(buffer == &internal)
3687 {
3688 unlockInternal();
3689 }
3690 else
3691 {
3692 unlockExternal();
3693 }
3694 }
3695
3696 Color<float> Surface::readExternal(int x, int y, int z) const
3697 {
3698 ASSERT(external.lock != LOCK_UNLOCKED);
3699
3700 return external.read(x, y, z);
3701 }
3702
3703 Color<float> Surface::readExternal(int x, int y) const
3704 {
3705 ASSERT(external.lock != LOCK_UNLOCKED);
3706
3707 return external.read(x, y);
3708 }
3709
3710 Color<float> Surface::sampleExternal(float x, float y, float z) const
3711 {
3712 ASSERT(external.lock != LOCK_UNLOCKED);
3713
3714 return external.sample(x, y, z);
3715 }
3716
3717 Color<float> Surface::sampleExternal(float x, float y) const
3718 {
3719 ASSERT(external.lock != LOCK_UNLOCKED);
3720
3721 return external.sample(x, y);
3722 }
3723
3724 void Surface::writeExternal(int x, int y, int z, const Color<float> &color)
3725 {
3726 ASSERT(external.lock != LOCK_UNLOCKED);
3727
3728 external.write(x, y, z, color);
3729 }
3730
3731 void Surface::writeExternal(int x, int y, const Color<float> &color)
3732 {
3733 ASSERT(external.lock != LOCK_UNLOCKED);
3734
3735 external.write(x, y, color);
3736 }
3737
Alexis Hetu43577b82015-10-21 15:32:16 -04003738 void Surface::copyInternal(const Surface* source, int x, int y, float srcX, float srcY, bool filter)
John Bauman89401822014-05-06 15:04:28 -04003739 {
Alexis Hetu43577b82015-10-21 15:32:16 -04003740 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
John Bauman89401822014-05-06 15:04:28 -04003741
Alexis Hetu43577b82015-10-21 15:32:16 -04003742 sw::Color<float> color;
John Bauman89401822014-05-06 15:04:28 -04003743
Alexis Hetu43577b82015-10-21 15:32:16 -04003744 if(!filter)
3745 {
3746 color = source->internal.read((int)srcX, (int)srcY);
3747 }
3748 else // Bilinear filtering
3749 {
3750 color = source->internal.sample(srcX, srcY);
3751 }
John Bauman89401822014-05-06 15:04:28 -04003752
3753 internal.write(x, y, color);
3754 }
3755
Alexis Hetu43577b82015-10-21 15:32:16 -04003756 void Surface::copyInternal(const Surface* source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter)
3757 {
3758 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
3759
3760 sw::Color<float> color;
3761
3762 if(!filter)
3763 {
3764 color = source->internal.read((int)srcX, (int)srcY, int(srcZ));
3765 }
3766 else // Bilinear filtering
3767 {
3768 color = source->internal.sample(srcX, srcY, srcZ);
3769 }
3770
3771 internal.write(x, y, z, color);
3772 }
3773
John Bauman89401822014-05-06 15:04:28 -04003774 bool Surface::hasStencil() const
3775 {
3776 return isStencil(external.format);
3777 }
3778
3779 bool Surface::hasDepth() const
3780 {
3781 return isDepth(external.format);
3782 }
3783
3784 bool Surface::hasPalette() const
3785 {
3786 return isPalette(external.format);
3787 }
3788
3789 bool Surface::isRenderTarget() const
3790 {
3791 return renderTarget;
3792 }
3793
3794 bool Surface::hasDirtyMipmaps() const
3795 {
3796 return dirtyMipmaps;
3797 }
3798
3799 void Surface::cleanMipmaps()
3800 {
3801 dirtyMipmaps = false;
3802 }
3803
3804 Resource *Surface::getResource()
3805 {
3806 return resource;
3807 }
3808
3809 bool Surface::identicalFormats() const
3810 {
John Bauman66b8ab22014-05-06 15:57:45 -04003811 return external.format == internal.format &&
3812 external.width == internal.width &&
Nicolas Capens22658242014-11-29 00:31:41 -05003813 external.height == internal.height &&
3814 external.depth == internal.depth &&
3815 external.pitchB == internal.pitchB &&
3816 external.sliceB == internal.sliceB;
John Bauman89401822014-05-06 15:04:28 -04003817 }
3818
3819 Format Surface::selectInternalFormat(Format format) const
3820 {
3821 switch(format)
3822 {
3823 case FORMAT_NULL:
3824 return FORMAT_NULL;
3825 case FORMAT_P8:
3826 case FORMAT_A8P8:
3827 case FORMAT_A4R4G4B4:
3828 case FORMAT_A1R5G5B5:
3829 case FORMAT_A8R3G3B2:
3830 return FORMAT_A8R8G8B8;
3831 case FORMAT_A8:
3832 return FORMAT_A8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003833 case FORMAT_R8I:
3834 return FORMAT_R8I;
3835 case FORMAT_R8UI:
3836 return FORMAT_R8UI;
3837 case FORMAT_R8I_SNORM:
3838 return FORMAT_R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003839 case FORMAT_R8:
3840 return FORMAT_R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003841 case FORMAT_R16I:
3842 return FORMAT_R16I;
3843 case FORMAT_R16UI:
3844 return FORMAT_R16UI;
3845 case FORMAT_R32I:
3846 return FORMAT_R32I;
3847 case FORMAT_R32UI:
3848 return FORMAT_R32UI;
John Bauman89401822014-05-06 15:04:28 -04003849 case FORMAT_A2R10G10B10:
3850 case FORMAT_A2B10G10R10:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003851 case FORMAT_X16B16G16R16I:
3852 case FORMAT_A16B16G16R16I:
3853 return FORMAT_A16B16G16R16I;
3854 case FORMAT_X16B16G16R16UI:
3855 case FORMAT_A16B16G16R16UI:
3856 return FORMAT_A16B16G16R16UI;
John Bauman89401822014-05-06 15:04:28 -04003857 case FORMAT_A16B16G16R16:
3858 return FORMAT_A16B16G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003859 case FORMAT_X32B32G32R32I:
3860 case FORMAT_A32B32G32R32I:
3861 return FORMAT_A32B32G32R32I;
3862 case FORMAT_X32B32G32R32UI:
3863 case FORMAT_A32B32G32R32UI:
3864 return FORMAT_A32B32G32R32UI;
3865 case FORMAT_G8R8I:
3866 return FORMAT_G8R8I;
3867 case FORMAT_G8R8UI:
3868 return FORMAT_G8R8UI;
3869 case FORMAT_G8R8I_SNORM:
3870 return FORMAT_G8R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003871 case FORMAT_G8R8:
3872 return FORMAT_G8R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003873 case FORMAT_G16R16I:
3874 return FORMAT_G16R16I;
3875 case FORMAT_G16R16UI:
3876 return FORMAT_G16R16UI;
John Bauman89401822014-05-06 15:04:28 -04003877 case FORMAT_G16R16:
3878 return FORMAT_G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003879 case FORMAT_G32R32I:
3880 return FORMAT_G32R32I;
3881 case FORMAT_G32R32UI:
3882 return FORMAT_G32R32UI;
John Bauman89401822014-05-06 15:04:28 -04003883 case FORMAT_A8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003884 if(lockable || !quadLayoutEnabled)
3885 {
3886 return FORMAT_A8R8G8B8;
3887 }
3888 else
3889 {
3890 return FORMAT_A8G8R8B8Q;
3891 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003892 case FORMAT_A8B8G8R8I:
3893 return FORMAT_A8B8G8R8I;
3894 case FORMAT_A8B8G8R8UI:
3895 return FORMAT_A8B8G8R8UI;
3896 case FORMAT_A8B8G8R8I_SNORM:
3897 return FORMAT_A8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003898 case FORMAT_R5G5B5A1:
3899 case FORMAT_R4G4B4A4:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003900 case FORMAT_A8B8G8R8:
3901 return FORMAT_A8B8G8R8;
John Bauman89401822014-05-06 15:04:28 -04003902 case FORMAT_R5G6B5:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003903 return FORMAT_R5G6B5;
3904 case FORMAT_R3G3B2:
John Bauman89401822014-05-06 15:04:28 -04003905 case FORMAT_R8G8B8:
3906 case FORMAT_X4R4G4B4:
3907 case FORMAT_X1R5G5B5:
3908 case FORMAT_X8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003909 if(lockable || !quadLayoutEnabled)
3910 {
3911 return FORMAT_X8R8G8B8;
3912 }
3913 else
3914 {
3915 return FORMAT_X8G8R8B8Q;
3916 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003917 case FORMAT_X8B8G8R8I:
3918 return FORMAT_X8B8G8R8I;
3919 case FORMAT_X8B8G8R8UI:
3920 return FORMAT_X8B8G8R8UI;
3921 case FORMAT_X8B8G8R8I_SNORM:
3922 return FORMAT_X8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003923 case FORMAT_B8G8R8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003924 case FORMAT_X8B8G8R8:
3925 return FORMAT_X8B8G8R8;
John Bauman89401822014-05-06 15:04:28 -04003926 // Compressed formats
3927 #if S3TC_SUPPORT
3928 case FORMAT_DXT1:
3929 case FORMAT_DXT3:
3930 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04003931 #endif
Alexis Hetu460e41f2015-09-01 10:58:37 -04003932 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3933 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3934 case FORMAT_RGBA8_ETC2_EAC:
3935 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
3936 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
3937 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
3938 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
3939 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
3940 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
3941 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
3942 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
3943 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
3944 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
3945 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
3946 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
3947 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
3948 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
3949 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
3950 return FORMAT_A8R8G8B8;
3951 case FORMAT_RGBA_ASTC_4x4_KHR:
3952 case FORMAT_RGBA_ASTC_5x4_KHR:
3953 case FORMAT_RGBA_ASTC_5x5_KHR:
3954 case FORMAT_RGBA_ASTC_6x5_KHR:
3955 case FORMAT_RGBA_ASTC_6x6_KHR:
3956 case FORMAT_RGBA_ASTC_8x5_KHR:
3957 case FORMAT_RGBA_ASTC_8x6_KHR:
3958 case FORMAT_RGBA_ASTC_8x8_KHR:
3959 case FORMAT_RGBA_ASTC_10x5_KHR:
3960 case FORMAT_RGBA_ASTC_10x6_KHR:
3961 case FORMAT_RGBA_ASTC_10x8_KHR:
3962 case FORMAT_RGBA_ASTC_10x10_KHR:
3963 case FORMAT_RGBA_ASTC_12x10_KHR:
3964 case FORMAT_RGBA_ASTC_12x12_KHR:
3965 // ASTC supports HDR, so a floating point format is required to represent it properly
3966 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported
John Bauman89401822014-05-06 15:04:28 -04003967 case FORMAT_ATI1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003968 case FORMAT_R11_EAC:
John Bauman89401822014-05-06 15:04:28 -04003969 return FORMAT_R8;
Alexis Hetu0de50d42015-09-09 13:56:41 -04003970 case FORMAT_SIGNED_R11_EAC:
3971 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient
John Bauman89401822014-05-06 15:04:28 -04003972 case FORMAT_ATI2:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003973 case FORMAT_RG11_EAC:
John Bauman89401822014-05-06 15:04:28 -04003974 return FORMAT_G8R8;
Alexis Hetu0de50d42015-09-09 13:56:41 -04003975 case FORMAT_SIGNED_RG11_EAC:
3976 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient
Nicolas Capens22658242014-11-29 00:31:41 -05003977 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003978 case FORMAT_RGB8_ETC2:
3979 case FORMAT_SRGB8_ETC2:
Nicolas Capens22658242014-11-29 00:31:41 -05003980 return FORMAT_X8R8G8B8;
John Bauman89401822014-05-06 15:04:28 -04003981 // Bumpmap formats
3982 case FORMAT_V8U8: return FORMAT_V8U8;
3983 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8;
3984 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8;
3985 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8;
3986 case FORMAT_V16U16: return FORMAT_V16U16;
3987 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16;
3988 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16;
3989 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04003990 case FORMAT_A16F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003991 case FORMAT_R16F: return FORMAT_R32F;
3992 case FORMAT_G16R16F: return FORMAT_G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003993 case FORMAT_B16G16R16F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003994 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003995 case FORMAT_A32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003996 case FORMAT_R32F: return FORMAT_R32F;
3997 case FORMAT_G32R32F: return FORMAT_G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003998 case FORMAT_B32G32R32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003999 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F;
4000 // Luminance formats
4001 case FORMAT_L8: return FORMAT_L8;
4002 case FORMAT_A4L4: return FORMAT_A8L8;
4003 case FORMAT_L16: return FORMAT_L16;
4004 case FORMAT_A8L8: return FORMAT_A8L8;
Nicolas Capens80594422015-06-09 16:42:56 -04004005 case FORMAT_L16F: return FORMAT_A32B32G32R32F;
4006 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F;
4007 case FORMAT_L32F: return FORMAT_A32B32G32R32F;
4008 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04004009 // Depth/stencil formats
4010 case FORMAT_D16:
4011 case FORMAT_D32:
4012 case FORMAT_D24X8:
4013 case FORMAT_D24S8:
4014 case FORMAT_D24FS8:
4015 if(hasParent) // Texture
4016 {
John Bauman66b8ab22014-05-06 15:57:45 -04004017 return FORMAT_D32FS8_SHADOW;
John Bauman89401822014-05-06 15:04:28 -04004018 }
4019 else if(complementaryDepthBuffer)
4020 {
4021 return FORMAT_D32F_COMPLEMENTARY;
4022 }
4023 else
4024 {
4025 return FORMAT_D32F;
4026 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04004027 case FORMAT_D32F: return FORMAT_D32F;
John Bauman66b8ab22014-05-06 15:57:45 -04004028 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE;
4029 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE;
4030 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE;
4031 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW;
4032 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04004033 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601;
4034 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709;
4035 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF;
John Bauman89401822014-05-06 15:04:28 -04004036 default:
4037 ASSERT(false);
4038 }
4039
4040 return FORMAT_NULL;
4041 }
4042
4043 void Surface::setTexturePalette(unsigned int *palette)
4044 {
4045 Surface::palette = palette;
4046 Surface::paletteID++;
4047 }
4048
4049 void Surface::resolve()
4050 {
4051 if(internal.depth <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL)
4052 {
4053 return;
4054 }
4055
4056 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE);
4057
4058 int quality = internal.depth;
4059 int width = internal.width;
4060 int height = internal.height;
4061 int pitch = internal.pitchB;
4062 int slice = internal.sliceB;
4063
4064 unsigned char *source0 = (unsigned char*)source;
4065 unsigned char *source1 = source0 + slice;
4066 unsigned char *source2 = source1 + slice;
4067 unsigned char *source3 = source2 + slice;
4068 unsigned char *source4 = source3 + slice;
4069 unsigned char *source5 = source4 + slice;
4070 unsigned char *source6 = source5 + slice;
4071 unsigned char *source7 = source6 + slice;
4072 unsigned char *source8 = source7 + slice;
4073 unsigned char *source9 = source8 + slice;
4074 unsigned char *sourceA = source9 + slice;
4075 unsigned char *sourceB = sourceA + slice;
4076 unsigned char *sourceC = sourceB + slice;
4077 unsigned char *sourceD = sourceC + slice;
4078 unsigned char *sourceE = sourceD + slice;
4079 unsigned char *sourceF = sourceE + slice;
4080
Nicolas Capensef77ac12015-03-28 21:48:51 -04004081 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 || internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8)
John Bauman89401822014-05-06 15:04:28 -04004082 {
4083 if(CPUID::supportsSSE2() && (width % 4) == 0)
4084 {
4085 if(internal.depth == 2)
4086 {
4087 for(int y = 0; y < height; y++)
4088 {
4089 for(int x = 0; x < width; x += 4)
4090 {
4091 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4092 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4093
4094 c0 = _mm_avg_epu8(c0, c1);
4095
4096 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4097 }
4098
4099 source0 += pitch;
4100 source1 += pitch;
4101 }
4102 }
4103 else if(internal.depth == 4)
4104 {
4105 for(int y = 0; y < height; y++)
4106 {
4107 for(int x = 0; x < width; x += 4)
4108 {
4109 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4110 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4111 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4112 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4113
4114 c0 = _mm_avg_epu8(c0, c1);
4115 c2 = _mm_avg_epu8(c2, c3);
4116 c0 = _mm_avg_epu8(c0, c2);
4117
4118 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4119 }
4120
4121 source0 += pitch;
4122 source1 += pitch;
4123 source2 += pitch;
4124 source3 += pitch;
4125 }
4126 }
4127 else if(internal.depth == 8)
4128 {
4129 for(int y = 0; y < height; y++)
4130 {
4131 for(int x = 0; x < width; x += 4)
4132 {
4133 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4134 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4135 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4136 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4137 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4138 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4139 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4140 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4141
4142 c0 = _mm_avg_epu8(c0, c1);
4143 c2 = _mm_avg_epu8(c2, c3);
4144 c4 = _mm_avg_epu8(c4, c5);
4145 c6 = _mm_avg_epu8(c6, c7);
4146 c0 = _mm_avg_epu8(c0, c2);
4147 c4 = _mm_avg_epu8(c4, c6);
4148 c0 = _mm_avg_epu8(c0, c4);
4149
4150 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4151 }
4152
4153 source0 += pitch;
4154 source1 += pitch;
4155 source2 += pitch;
4156 source3 += pitch;
4157 source4 += pitch;
4158 source5 += pitch;
4159 source6 += pitch;
4160 source7 += pitch;
4161 }
4162 }
4163 else if(internal.depth == 16)
4164 {
4165 for(int y = 0; y < height; y++)
4166 {
4167 for(int x = 0; x < width; x += 4)
4168 {
4169 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4170 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4171 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4172 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4173 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4174 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4175 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4176 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4177 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
4178 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
4179 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
4180 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
4181 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
4182 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
4183 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
4184 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
4185
4186 c0 = _mm_avg_epu8(c0, c1);
4187 c2 = _mm_avg_epu8(c2, c3);
4188 c4 = _mm_avg_epu8(c4, c5);
4189 c6 = _mm_avg_epu8(c6, c7);
4190 c8 = _mm_avg_epu8(c8, c9);
4191 cA = _mm_avg_epu8(cA, cB);
4192 cC = _mm_avg_epu8(cC, cD);
4193 cE = _mm_avg_epu8(cE, cF);
4194 c0 = _mm_avg_epu8(c0, c2);
4195 c4 = _mm_avg_epu8(c4, c6);
4196 c8 = _mm_avg_epu8(c8, cA);
4197 cC = _mm_avg_epu8(cC, cE);
4198 c0 = _mm_avg_epu8(c0, c4);
4199 c8 = _mm_avg_epu8(c8, cC);
4200 c0 = _mm_avg_epu8(c0, c8);
4201
4202 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4203 }
4204
4205 source0 += pitch;
4206 source1 += pitch;
4207 source2 += pitch;
4208 source3 += pitch;
4209 source4 += pitch;
4210 source5 += pitch;
4211 source6 += pitch;
4212 source7 += pitch;
4213 source8 += pitch;
4214 source9 += pitch;
4215 sourceA += pitch;
4216 sourceB += pitch;
4217 sourceC += pitch;
4218 sourceD += pitch;
4219 sourceE += pitch;
4220 sourceF += pitch;
4221 }
4222 }
4223 else ASSERT(false);
4224 }
4225 else
4226 {
4227 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101))
4228
4229 if(internal.depth == 2)
4230 {
4231 for(int y = 0; y < height; y++)
4232 {
4233 for(int x = 0; x < width; x++)
4234 {
4235 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4236 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4237
4238 c0 = AVERAGE(c0, c1);
4239
4240 *(unsigned int*)(source0 + 4 * x) = c0;
4241 }
4242
4243 source0 += pitch;
4244 source1 += pitch;
4245 }
4246 }
4247 else if(internal.depth == 4)
4248 {
4249 for(int y = 0; y < height; y++)
4250 {
4251 for(int x = 0; x < width; x++)
4252 {
4253 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4254 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4255 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4256 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4257
4258 c0 = AVERAGE(c0, c1);
4259 c2 = AVERAGE(c2, c3);
4260 c0 = AVERAGE(c0, c2);
4261
4262 *(unsigned int*)(source0 + 4 * x) = c0;
4263 }
4264
4265 source0 += pitch;
4266 source1 += pitch;
4267 source2 += pitch;
4268 source3 += pitch;
4269 }
4270 }
4271 else if(internal.depth == 8)
4272 {
4273 for(int y = 0; y < height; y++)
4274 {
4275 for(int x = 0; x < width; x++)
4276 {
4277 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4278 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4279 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4280 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4281 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4282 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4283 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4284 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4285
4286 c0 = AVERAGE(c0, c1);
4287 c2 = AVERAGE(c2, c3);
4288 c4 = AVERAGE(c4, c5);
4289 c6 = AVERAGE(c6, c7);
4290 c0 = AVERAGE(c0, c2);
4291 c4 = AVERAGE(c4, c6);
4292 c0 = AVERAGE(c0, c4);
4293
4294 *(unsigned int*)(source0 + 4 * x) = c0;
4295 }
4296
4297 source0 += pitch;
4298 source1 += pitch;
4299 source2 += pitch;
4300 source3 += pitch;
4301 source4 += pitch;
4302 source5 += pitch;
4303 source6 += pitch;
4304 source7 += pitch;
4305 }
4306 }
4307 else if(internal.depth == 16)
4308 {
4309 for(int y = 0; y < height; y++)
4310 {
4311 for(int x = 0; x < width; x++)
4312 {
4313 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4314 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4315 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4316 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4317 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4318 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4319 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4320 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4321 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4322 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4323 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4324 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4325 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4326 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4327 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4328 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4329
4330 c0 = AVERAGE(c0, c1);
4331 c2 = AVERAGE(c2, c3);
4332 c4 = AVERAGE(c4, c5);
4333 c6 = AVERAGE(c6, c7);
4334 c8 = AVERAGE(c8, c9);
4335 cA = AVERAGE(cA, cB);
4336 cC = AVERAGE(cC, cD);
4337 cE = AVERAGE(cE, cF);
4338 c0 = AVERAGE(c0, c2);
4339 c4 = AVERAGE(c4, c6);
4340 c8 = AVERAGE(c8, cA);
4341 cC = AVERAGE(cC, cE);
4342 c0 = AVERAGE(c0, c4);
4343 c8 = AVERAGE(c8, cC);
4344 c0 = AVERAGE(c0, c8);
4345
4346 *(unsigned int*)(source0 + 4 * x) = c0;
4347 }
4348
4349 source0 += pitch;
4350 source1 += pitch;
4351 source2 += pitch;
4352 source3 += pitch;
4353 source4 += pitch;
4354 source5 += pitch;
4355 source6 += pitch;
4356 source7 += pitch;
4357 source8 += pitch;
4358 source9 += pitch;
4359 sourceA += pitch;
4360 sourceB += pitch;
4361 sourceC += pitch;
4362 sourceD += pitch;
4363 sourceE += pitch;
4364 sourceF += pitch;
4365 }
4366 }
4367 else ASSERT(false);
4368
4369 #undef AVERAGE
4370 }
4371 }
4372 else if(internal.format == FORMAT_G16R16)
4373 {
4374 if(CPUID::supportsSSE2() && (width % 4) == 0)
4375 {
4376 if(internal.depth == 2)
4377 {
4378 for(int y = 0; y < height; y++)
4379 {
4380 for(int x = 0; x < width; x += 4)
4381 {
4382 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4383 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4384
4385 c0 = _mm_avg_epu16(c0, c1);
4386
4387 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4388 }
4389
4390 source0 += pitch;
4391 source1 += pitch;
4392 }
4393 }
4394 else if(internal.depth == 4)
4395 {
4396 for(int y = 0; y < height; y++)
4397 {
4398 for(int x = 0; x < width; x += 4)
4399 {
4400 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4401 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4402 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4403 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4404
4405 c0 = _mm_avg_epu16(c0, c1);
4406 c2 = _mm_avg_epu16(c2, c3);
4407 c0 = _mm_avg_epu16(c0, c2);
4408
4409 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4410 }
4411
4412 source0 += pitch;
4413 source1 += pitch;
4414 source2 += pitch;
4415 source3 += pitch;
4416 }
4417 }
4418 else if(internal.depth == 8)
4419 {
4420 for(int y = 0; y < height; y++)
4421 {
4422 for(int x = 0; x < width; x += 4)
4423 {
4424 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4425 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4426 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4427 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4428 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4429 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4430 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4431 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4432
4433 c0 = _mm_avg_epu16(c0, c1);
4434 c2 = _mm_avg_epu16(c2, c3);
4435 c4 = _mm_avg_epu16(c4, c5);
4436 c6 = _mm_avg_epu16(c6, c7);
4437 c0 = _mm_avg_epu16(c0, c2);
4438 c4 = _mm_avg_epu16(c4, c6);
4439 c0 = _mm_avg_epu16(c0, c4);
4440
4441 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4442 }
4443
4444 source0 += pitch;
4445 source1 += pitch;
4446 source2 += pitch;
4447 source3 += pitch;
4448 source4 += pitch;
4449 source5 += pitch;
4450 source6 += pitch;
4451 source7 += pitch;
4452 }
4453 }
4454 else if(internal.depth == 16)
4455 {
4456 for(int y = 0; y < height; y++)
4457 {
4458 for(int x = 0; x < width; x += 4)
4459 {
4460 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4461 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4462 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4463 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4464 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4465 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4466 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4467 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4468 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
4469 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
4470 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
4471 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
4472 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
4473 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
4474 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
4475 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
4476
4477 c0 = _mm_avg_epu16(c0, c1);
4478 c2 = _mm_avg_epu16(c2, c3);
4479 c4 = _mm_avg_epu16(c4, c5);
4480 c6 = _mm_avg_epu16(c6, c7);
4481 c8 = _mm_avg_epu16(c8, c9);
4482 cA = _mm_avg_epu16(cA, cB);
4483 cC = _mm_avg_epu16(cC, cD);
4484 cE = _mm_avg_epu16(cE, cF);
4485 c0 = _mm_avg_epu16(c0, c2);
4486 c4 = _mm_avg_epu16(c4, c6);
4487 c8 = _mm_avg_epu16(c8, cA);
4488 cC = _mm_avg_epu16(cC, cE);
4489 c0 = _mm_avg_epu16(c0, c4);
4490 c8 = _mm_avg_epu16(c8, cC);
4491 c0 = _mm_avg_epu16(c0, c8);
4492
4493 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4494 }
4495
4496 source0 += pitch;
4497 source1 += pitch;
4498 source2 += pitch;
4499 source3 += pitch;
4500 source4 += pitch;
4501 source5 += pitch;
4502 source6 += pitch;
4503 source7 += pitch;
4504 source8 += pitch;
4505 source9 += pitch;
4506 sourceA += pitch;
4507 sourceB += pitch;
4508 sourceC += pitch;
4509 sourceD += pitch;
4510 sourceE += pitch;
4511 sourceF += pitch;
4512 }
4513 }
4514 else ASSERT(false);
4515 }
4516 else
4517 {
4518 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4519
4520 if(internal.depth == 2)
4521 {
4522 for(int y = 0; y < height; y++)
4523 {
4524 for(int x = 0; x < width; x++)
4525 {
4526 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4527 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4528
4529 c0 = AVERAGE(c0, c1);
4530
4531 *(unsigned int*)(source0 + 4 * x) = c0;
4532 }
4533
4534 source0 += pitch;
4535 source1 += pitch;
4536 }
4537 }
4538 else if(internal.depth == 4)
4539 {
4540 for(int y = 0; y < height; y++)
4541 {
4542 for(int x = 0; x < width; x++)
4543 {
4544 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4545 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4546 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4547 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4548
4549 c0 = AVERAGE(c0, c1);
4550 c2 = AVERAGE(c2, c3);
4551 c0 = AVERAGE(c0, c2);
4552
4553 *(unsigned int*)(source0 + 4 * x) = c0;
4554 }
4555
4556 source0 += pitch;
4557 source1 += pitch;
4558 source2 += pitch;
4559 source3 += pitch;
4560 }
4561 }
4562 else if(internal.depth == 8)
4563 {
4564 for(int y = 0; y < height; y++)
4565 {
4566 for(int x = 0; x < width; x++)
4567 {
4568 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4569 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4570 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4571 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4572 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4573 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4574 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4575 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4576
4577 c0 = AVERAGE(c0, c1);
4578 c2 = AVERAGE(c2, c3);
4579 c4 = AVERAGE(c4, c5);
4580 c6 = AVERAGE(c6, c7);
4581 c0 = AVERAGE(c0, c2);
4582 c4 = AVERAGE(c4, c6);
4583 c0 = AVERAGE(c0, c4);
4584
4585 *(unsigned int*)(source0 + 4 * x) = c0;
4586 }
4587
4588 source0 += pitch;
4589 source1 += pitch;
4590 source2 += pitch;
4591 source3 += pitch;
4592 source4 += pitch;
4593 source5 += pitch;
4594 source6 += pitch;
4595 source7 += pitch;
4596 }
4597 }
4598 else if(internal.depth == 16)
4599 {
4600 for(int y = 0; y < height; y++)
4601 {
4602 for(int x = 0; x < width; x++)
4603 {
4604 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4605 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4606 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4607 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4608 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4609 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4610 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4611 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4612 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4613 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4614 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4615 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4616 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4617 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4618 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4619 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4620
4621 c0 = AVERAGE(c0, c1);
4622 c2 = AVERAGE(c2, c3);
4623 c4 = AVERAGE(c4, c5);
4624 c6 = AVERAGE(c6, c7);
4625 c8 = AVERAGE(c8, c9);
4626 cA = AVERAGE(cA, cB);
4627 cC = AVERAGE(cC, cD);
4628 cE = AVERAGE(cE, cF);
4629 c0 = AVERAGE(c0, c2);
4630 c4 = AVERAGE(c4, c6);
4631 c8 = AVERAGE(c8, cA);
4632 cC = AVERAGE(cC, cE);
4633 c0 = AVERAGE(c0, c4);
4634 c8 = AVERAGE(c8, cC);
4635 c0 = AVERAGE(c0, c8);
4636
4637 *(unsigned int*)(source0 + 4 * x) = c0;
4638 }
4639
4640 source0 += pitch;
4641 source1 += pitch;
4642 source2 += pitch;
4643 source3 += pitch;
4644 source4 += pitch;
4645 source5 += pitch;
4646 source6 += pitch;
4647 source7 += pitch;
4648 source8 += pitch;
4649 source9 += pitch;
4650 sourceA += pitch;
4651 sourceB += pitch;
4652 sourceC += pitch;
4653 sourceD += pitch;
4654 sourceE += pitch;
4655 sourceF += pitch;
4656 }
4657 }
4658 else ASSERT(false);
4659
4660 #undef AVERAGE
4661 }
4662 }
4663 else if(internal.format == FORMAT_A16B16G16R16)
4664 {
4665 if(CPUID::supportsSSE2() && (width % 2) == 0)
4666 {
4667 if(internal.depth == 2)
4668 {
4669 for(int y = 0; y < height; y++)
4670 {
4671 for(int x = 0; x < width; x += 2)
4672 {
4673 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4674 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4675
4676 c0 = _mm_avg_epu16(c0, c1);
4677
4678 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4679 }
4680
4681 source0 += pitch;
4682 source1 += pitch;
4683 }
4684 }
4685 else if(internal.depth == 4)
4686 {
4687 for(int y = 0; y < height; y++)
4688 {
4689 for(int x = 0; x < width; x += 2)
4690 {
4691 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4692 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4693 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4694 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4695
4696 c0 = _mm_avg_epu16(c0, c1);
4697 c2 = _mm_avg_epu16(c2, c3);
4698 c0 = _mm_avg_epu16(c0, c2);
4699
4700 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4701 }
4702
4703 source0 += pitch;
4704 source1 += pitch;
4705 source2 += pitch;
4706 source3 += pitch;
4707 }
4708 }
4709 else if(internal.depth == 8)
4710 {
4711 for(int y = 0; y < height; y++)
4712 {
4713 for(int x = 0; x < width; x += 2)
4714 {
4715 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4716 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4717 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4718 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4719 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4720 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4721 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4722 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
4723
4724 c0 = _mm_avg_epu16(c0, c1);
4725 c2 = _mm_avg_epu16(c2, c3);
4726 c4 = _mm_avg_epu16(c4, c5);
4727 c6 = _mm_avg_epu16(c6, c7);
4728 c0 = _mm_avg_epu16(c0, c2);
4729 c4 = _mm_avg_epu16(c4, c6);
4730 c0 = _mm_avg_epu16(c0, c4);
4731
4732 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4733 }
4734
4735 source0 += pitch;
4736 source1 += pitch;
4737 source2 += pitch;
4738 source3 += pitch;
4739 source4 += pitch;
4740 source5 += pitch;
4741 source6 += pitch;
4742 source7 += pitch;
4743 }
4744 }
4745 else if(internal.depth == 16)
4746 {
4747 for(int y = 0; y < height; y++)
4748 {
4749 for(int x = 0; x < width; x += 2)
4750 {
4751 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4752 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4753 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4754 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4755 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4756 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4757 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4758 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
4759 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x));
4760 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x));
4761 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x));
4762 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x));
4763 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x));
4764 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x));
4765 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x));
4766 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x));
4767
4768 c0 = _mm_avg_epu16(c0, c1);
4769 c2 = _mm_avg_epu16(c2, c3);
4770 c4 = _mm_avg_epu16(c4, c5);
4771 c6 = _mm_avg_epu16(c6, c7);
4772 c8 = _mm_avg_epu16(c8, c9);
4773 cA = _mm_avg_epu16(cA, cB);
4774 cC = _mm_avg_epu16(cC, cD);
4775 cE = _mm_avg_epu16(cE, cF);
4776 c0 = _mm_avg_epu16(c0, c2);
4777 c4 = _mm_avg_epu16(c4, c6);
4778 c8 = _mm_avg_epu16(c8, cA);
4779 cC = _mm_avg_epu16(cC, cE);
4780 c0 = _mm_avg_epu16(c0, c4);
4781 c8 = _mm_avg_epu16(c8, cC);
4782 c0 = _mm_avg_epu16(c0, c8);
4783
4784 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4785 }
4786
4787 source0 += pitch;
4788 source1 += pitch;
4789 source2 += pitch;
4790 source3 += pitch;
4791 source4 += pitch;
4792 source5 += pitch;
4793 source6 += pitch;
4794 source7 += pitch;
4795 source8 += pitch;
4796 source9 += pitch;
4797 sourceA += pitch;
4798 sourceB += pitch;
4799 sourceC += pitch;
4800 sourceD += pitch;
4801 sourceE += pitch;
4802 sourceF += pitch;
4803 }
4804 }
4805 else ASSERT(false);
4806 }
4807 else
4808 {
4809 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4810
4811 if(internal.depth == 2)
4812 {
4813 for(int y = 0; y < height; y++)
4814 {
4815 for(int x = 0; x < 2 * width; x++)
4816 {
4817 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4818 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4819
4820 c0 = AVERAGE(c0, c1);
4821
4822 *(unsigned int*)(source0 + 4 * x) = c0;
4823 }
4824
4825 source0 += pitch;
4826 source1 += pitch;
4827 }
4828 }
4829 else if(internal.depth == 4)
4830 {
4831 for(int y = 0; y < height; y++)
4832 {
4833 for(int x = 0; x < 2 * width; x++)
4834 {
4835 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4836 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4837 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4838 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4839
4840 c0 = AVERAGE(c0, c1);
4841 c2 = AVERAGE(c2, c3);
4842 c0 = AVERAGE(c0, c2);
4843
4844 *(unsigned int*)(source0 + 4 * x) = c0;
4845 }
4846
4847 source0 += pitch;
4848 source1 += pitch;
4849 source2 += pitch;
4850 source3 += pitch;
4851 }
4852 }
4853 else if(internal.depth == 8)
4854 {
4855 for(int y = 0; y < height; y++)
4856 {
4857 for(int x = 0; x < 2 * width; x++)
4858 {
4859 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4860 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4861 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4862 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4863 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4864 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4865 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4866 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4867
4868 c0 = AVERAGE(c0, c1);
4869 c2 = AVERAGE(c2, c3);
4870 c4 = AVERAGE(c4, c5);
4871 c6 = AVERAGE(c6, c7);
4872 c0 = AVERAGE(c0, c2);
4873 c4 = AVERAGE(c4, c6);
4874 c0 = AVERAGE(c0, c4);
4875
4876 *(unsigned int*)(source0 + 4 * x) = c0;
4877 }
4878
4879 source0 += pitch;
4880 source1 += pitch;
4881 source2 += pitch;
4882 source3 += pitch;
4883 source4 += pitch;
4884 source5 += pitch;
4885 source6 += pitch;
4886 source7 += pitch;
4887 }
4888 }
4889 else if(internal.depth == 16)
4890 {
4891 for(int y = 0; y < height; y++)
4892 {
4893 for(int x = 0; x < 2 * width; x++)
4894 {
4895 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4896 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4897 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4898 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4899 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4900 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4901 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4902 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4903 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4904 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4905 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4906 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4907 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4908 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4909 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4910 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4911
4912 c0 = AVERAGE(c0, c1);
4913 c2 = AVERAGE(c2, c3);
4914 c4 = AVERAGE(c4, c5);
4915 c6 = AVERAGE(c6, c7);
4916 c8 = AVERAGE(c8, c9);
4917 cA = AVERAGE(cA, cB);
4918 cC = AVERAGE(cC, cD);
4919 cE = AVERAGE(cE, cF);
4920 c0 = AVERAGE(c0, c2);
4921 c4 = AVERAGE(c4, c6);
4922 c8 = AVERAGE(c8, cA);
4923 cC = AVERAGE(cC, cE);
4924 c0 = AVERAGE(c0, c4);
4925 c8 = AVERAGE(c8, cC);
4926 c0 = AVERAGE(c0, c8);
4927
4928 *(unsigned int*)(source0 + 4 * x) = c0;
4929 }
4930
4931 source0 += pitch;
4932 source1 += pitch;
4933 source2 += pitch;
4934 source3 += pitch;
4935 source4 += pitch;
4936 source5 += pitch;
4937 source6 += pitch;
4938 source7 += pitch;
4939 source8 += pitch;
4940 source9 += pitch;
4941 sourceA += pitch;
4942 sourceB += pitch;
4943 sourceC += pitch;
4944 sourceD += pitch;
4945 sourceE += pitch;
4946 sourceF += pitch;
4947 }
4948 }
4949 else ASSERT(false);
4950
4951 #undef AVERAGE
4952 }
4953 }
4954 else if(internal.format == FORMAT_R32F)
4955 {
4956 if(CPUID::supportsSSE() && (width % 4) == 0)
4957 {
4958 if(internal.depth == 2)
4959 {
4960 for(int y = 0; y < height; y++)
4961 {
4962 for(int x = 0; x < width; x += 4)
4963 {
4964 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4965 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4966
4967 c0 = _mm_add_ps(c0, c1);
4968 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
4969
4970 _mm_store_ps((float*)(source0 + 4 * x), c0);
4971 }
4972
4973 source0 += pitch;
4974 source1 += pitch;
4975 }
4976 }
4977 else if(internal.depth == 4)
4978 {
4979 for(int y = 0; y < height; y++)
4980 {
4981 for(int x = 0; x < width; x += 4)
4982 {
4983 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4984 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4985 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4986 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4987
4988 c0 = _mm_add_ps(c0, c1);
4989 c2 = _mm_add_ps(c2, c3);
4990 c0 = _mm_add_ps(c0, c2);
4991 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
4992
4993 _mm_store_ps((float*)(source0 + 4 * x), c0);
4994 }
4995
4996 source0 += pitch;
4997 source1 += pitch;
4998 source2 += pitch;
4999 source3 += pitch;
5000 }
5001 }
5002 else if(internal.depth == 8)
5003 {
5004 for(int y = 0; y < height; y++)
5005 {
5006 for(int x = 0; x < width; x += 4)
5007 {
5008 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
5009 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
5010 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
5011 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
5012 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
5013 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
5014 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
5015 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
5016
5017 c0 = _mm_add_ps(c0, c1);
5018 c2 = _mm_add_ps(c2, c3);
5019 c4 = _mm_add_ps(c4, c5);
5020 c6 = _mm_add_ps(c6, c7);
5021 c0 = _mm_add_ps(c0, c2);
5022 c4 = _mm_add_ps(c4, c6);
5023 c0 = _mm_add_ps(c0, c4);
5024 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
5025
5026 _mm_store_ps((float*)(source0 + 4 * x), c0);
5027 }
5028
5029 source0 += pitch;
5030 source1 += pitch;
5031 source2 += pitch;
5032 source3 += pitch;
5033 source4 += pitch;
5034 source5 += pitch;
5035 source6 += pitch;
5036 source7 += pitch;
5037 }
5038 }
5039 else if(internal.depth == 16)
5040 {
5041 for(int y = 0; y < height; y++)
5042 {
5043 for(int x = 0; x < width; x += 4)
5044 {
5045 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
5046 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
5047 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
5048 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
5049 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
5050 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
5051 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
5052 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
5053 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x));
5054 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x));
5055 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x));
5056 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x));
5057 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x));
5058 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x));
5059 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x));
5060 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x));
5061
5062 c0 = _mm_add_ps(c0, c1);
5063 c2 = _mm_add_ps(c2, c3);
5064 c4 = _mm_add_ps(c4, c5);
5065 c6 = _mm_add_ps(c6, c7);
5066 c8 = _mm_add_ps(c8, c9);
5067 cA = _mm_add_ps(cA, cB);
5068 cC = _mm_add_ps(cC, cD);
5069 cE = _mm_add_ps(cE, cF);
5070 c0 = _mm_add_ps(c0, c2);
5071 c4 = _mm_add_ps(c4, c6);
5072 c8 = _mm_add_ps(c8, cA);
5073 cC = _mm_add_ps(cC, cE);
5074 c0 = _mm_add_ps(c0, c4);
5075 c8 = _mm_add_ps(c8, cC);
5076 c0 = _mm_add_ps(c0, c8);
5077 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
5078
5079 _mm_store_ps((float*)(source0 + 4 * x), c0);
5080 }
5081
5082 source0 += pitch;
5083 source1 += pitch;
5084 source2 += pitch;
5085 source3 += pitch;
5086 source4 += pitch;
5087 source5 += pitch;
5088 source6 += pitch;
5089 source7 += pitch;
5090 source8 += pitch;
5091 source9 += pitch;
5092 sourceA += pitch;
5093 sourceB += pitch;
5094 sourceC += pitch;
5095 sourceD += pitch;
5096 sourceE += pitch;
5097 sourceF += pitch;
5098 }
5099 }
5100 else ASSERT(false);
5101 }
5102 else
5103 {
5104 if(internal.depth == 2)
5105 {
5106 for(int y = 0; y < height; y++)
5107 {
5108 for(int x = 0; x < width; x++)
5109 {
5110 float c0 = *(float*)(source0 + 4 * x);
5111 float c1 = *(float*)(source1 + 4 * x);
5112
5113 c0 = c0 + c1;
5114 c0 *= 1.0f / 2.0f;
5115
5116 *(float*)(source0 + 4 * x) = c0;
5117 }
5118
5119 source0 += pitch;
5120 source1 += pitch;
5121 }
5122 }
5123 else if(internal.depth == 4)
5124 {
5125 for(int y = 0; y < height; y++)
5126 {
5127 for(int x = 0; x < width; x++)
5128 {
5129 float c0 = *(float*)(source0 + 4 * x);
5130 float c1 = *(float*)(source1 + 4 * x);
5131 float c2 = *(float*)(source2 + 4 * x);
5132 float c3 = *(float*)(source3 + 4 * x);
5133
5134 c0 = c0 + c1;
5135 c2 = c2 + c3;
5136 c0 = c0 + c2;
5137 c0 *= 1.0f / 4.0f;
5138
5139 *(float*)(source0 + 4 * x) = c0;
5140 }
5141
5142 source0 += pitch;
5143 source1 += pitch;
5144 source2 += pitch;
5145 source3 += pitch;
5146 }
5147 }
5148 else if(internal.depth == 8)
5149 {
5150 for(int y = 0; y < height; y++)
5151 {
5152 for(int x = 0; x < width; x++)
5153 {
5154 float c0 = *(float*)(source0 + 4 * x);
5155 float c1 = *(float*)(source1 + 4 * x);
5156 float c2 = *(float*)(source2 + 4 * x);
5157 float c3 = *(float*)(source3 + 4 * x);
5158 float c4 = *(float*)(source4 + 4 * x);
5159 float c5 = *(float*)(source5 + 4 * x);
5160 float c6 = *(float*)(source6 + 4 * x);
5161 float c7 = *(float*)(source7 + 4 * x);
5162
5163 c0 = c0 + c1;
5164 c2 = c2 + c3;
5165 c4 = c4 + c5;
5166 c6 = c6 + c7;
5167 c0 = c0 + c2;
5168 c4 = c4 + c6;
5169 c0 = c0 + c4;
5170 c0 *= 1.0f / 8.0f;
5171
5172 *(float*)(source0 + 4 * x) = c0;
5173 }
5174
5175 source0 += pitch;
5176 source1 += pitch;
5177 source2 += pitch;
5178 source3 += pitch;
5179 source4 += pitch;
5180 source5 += pitch;
5181 source6 += pitch;
5182 source7 += pitch;
5183 }
5184 }
5185 else if(internal.depth == 16)
5186 {
5187 for(int y = 0; y < height; y++)
5188 {
5189 for(int x = 0; x < width; x++)
5190 {
5191 float c0 = *(float*)(source0 + 4 * x);
5192 float c1 = *(float*)(source1 + 4 * x);
5193 float c2 = *(float*)(source2 + 4 * x);
5194 float c3 = *(float*)(source3 + 4 * x);
5195 float c4 = *(float*)(source4 + 4 * x);
5196 float c5 = *(float*)(source5 + 4 * x);
5197 float c6 = *(float*)(source6 + 4 * x);
5198 float c7 = *(float*)(source7 + 4 * x);
5199 float c8 = *(float*)(source8 + 4 * x);
5200 float c9 = *(float*)(source9 + 4 * x);
5201 float cA = *(float*)(sourceA + 4 * x);
5202 float cB = *(float*)(sourceB + 4 * x);
5203 float cC = *(float*)(sourceC + 4 * x);
5204 float cD = *(float*)(sourceD + 4 * x);
5205 float cE = *(float*)(sourceE + 4 * x);
5206 float cF = *(float*)(sourceF + 4 * x);
5207
5208 c0 = c0 + c1;
5209 c2 = c2 + c3;
5210 c4 = c4 + c5;
5211 c6 = c6 + c7;
5212 c8 = c8 + c9;
5213 cA = cA + cB;
5214 cC = cC + cD;
5215 cE = cE + cF;
5216 c0 = c0 + c2;
5217 c4 = c4 + c6;
5218 c8 = c8 + cA;
5219 cC = cC + cE;
5220 c0 = c0 + c4;
5221 c8 = c8 + cC;
5222 c0 = c0 + c8;
5223 c0 *= 1.0f / 16.0f;
5224
5225 *(float*)(source0 + 4 * x) = c0;
5226 }
5227
5228 source0 += pitch;
5229 source1 += pitch;
5230 source2 += pitch;
5231 source3 += pitch;
5232 source4 += pitch;
5233 source5 += pitch;
5234 source6 += pitch;
5235 source7 += pitch;
5236 source8 += pitch;
5237 source9 += pitch;
5238 sourceA += pitch;
5239 sourceB += pitch;
5240 sourceC += pitch;
5241 sourceD += pitch;
5242 sourceE += pitch;
5243 sourceF += pitch;
5244 }
5245 }
5246 else ASSERT(false);
5247 }
5248 }
5249 else if(internal.format == FORMAT_G32R32F)
5250 {
5251 if(CPUID::supportsSSE() && (width % 2) == 0)
5252 {
5253 if(internal.depth == 2)
5254 {
5255 for(int y = 0; y < height; y++)
5256 {
5257 for(int x = 0; x < width; x += 2)
5258 {
5259 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5260 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5261
5262 c0 = _mm_add_ps(c0, c1);
5263 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
5264
5265 _mm_store_ps((float*)(source0 + 8 * x), c0);
5266 }
5267
5268 source0 += pitch;
5269 source1 += pitch;
5270 }
5271 }
5272 else if(internal.depth == 4)
5273 {
5274 for(int y = 0; y < height; y++)
5275 {
5276 for(int x = 0; x < width; x += 2)
5277 {
5278 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5279 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5280 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5281 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5282
5283 c0 = _mm_add_ps(c0, c1);
5284 c2 = _mm_add_ps(c2, c3);
5285 c0 = _mm_add_ps(c0, c2);
5286 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
5287
5288 _mm_store_ps((float*)(source0 + 8 * x), c0);
5289 }
5290
5291 source0 += pitch;
5292 source1 += pitch;
5293 source2 += pitch;
5294 source3 += pitch;
5295 }
5296 }
5297 else if(internal.depth == 8)
5298 {
5299 for(int y = 0; y < height; y++)
5300 {
5301 for(int x = 0; x < width; x += 2)
5302 {
5303 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5304 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5305 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5306 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5307 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
5308 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
5309 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
5310 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
5311
5312 c0 = _mm_add_ps(c0, c1);
5313 c2 = _mm_add_ps(c2, c3);
5314 c4 = _mm_add_ps(c4, c5);
5315 c6 = _mm_add_ps(c6, c7);
5316 c0 = _mm_add_ps(c0, c2);
5317 c4 = _mm_add_ps(c4, c6);
5318 c0 = _mm_add_ps(c0, c4);
5319 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
5320
5321 _mm_store_ps((float*)(source0 + 8 * x), c0);
5322 }
5323
5324 source0 += pitch;
5325 source1 += pitch;
5326 source2 += pitch;
5327 source3 += pitch;
5328 source4 += pitch;
5329 source5 += pitch;
5330 source6 += pitch;
5331 source7 += pitch;
5332 }
5333 }
5334 else if(internal.depth == 16)
5335 {
5336 for(int y = 0; y < height; y++)
5337 {
5338 for(int x = 0; x < width; x += 2)
5339 {
5340 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5341 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5342 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5343 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5344 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
5345 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
5346 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
5347 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
5348 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x));
5349 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x));
5350 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x));
5351 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x));
5352 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x));
5353 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x));
5354 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x));
5355 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x));
5356
5357 c0 = _mm_add_ps(c0, c1);
5358 c2 = _mm_add_ps(c2, c3);
5359 c4 = _mm_add_ps(c4, c5);
5360 c6 = _mm_add_ps(c6, c7);
5361 c8 = _mm_add_ps(c8, c9);
5362 cA = _mm_add_ps(cA, cB);
5363 cC = _mm_add_ps(cC, cD);
5364 cE = _mm_add_ps(cE, cF);
5365 c0 = _mm_add_ps(c0, c2);
5366 c4 = _mm_add_ps(c4, c6);
5367 c8 = _mm_add_ps(c8, cA);
5368 cC = _mm_add_ps(cC, cE);
5369 c0 = _mm_add_ps(c0, c4);
5370 c8 = _mm_add_ps(c8, cC);
5371 c0 = _mm_add_ps(c0, c8);
5372 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
5373
5374 _mm_store_ps((float*)(source0 + 8 * x), c0);
5375 }
5376
5377 source0 += pitch;
5378 source1 += pitch;
5379 source2 += pitch;
5380 source3 += pitch;
5381 source4 += pitch;
5382 source5 += pitch;
5383 source6 += pitch;
5384 source7 += pitch;
5385 source8 += pitch;
5386 source9 += pitch;
5387 sourceA += pitch;
5388 sourceB += pitch;
5389 sourceC += pitch;
5390 sourceD += pitch;
5391 sourceE += pitch;
5392 sourceF += pitch;
5393 }
5394 }
5395 else ASSERT(false);
5396 }
5397 else
5398 {
5399 if(internal.depth == 2)
5400 {
5401 for(int y = 0; y < height; y++)
5402 {
5403 for(int x = 0; x < 2 * width; x++)
5404 {
5405 float c0 = *(float*)(source0 + 4 * x);
5406 float c1 = *(float*)(source1 + 4 * x);
5407
5408 c0 = c0 + c1;
5409 c0 *= 1.0f / 2.0f;
5410
5411 *(float*)(source0 + 4 * x) = c0;
5412 }
5413
5414 source0 += pitch;
5415 source1 += pitch;
5416 }
5417 }
5418 else if(internal.depth == 4)
5419 {
5420 for(int y = 0; y < height; y++)
5421 {
5422 for(int x = 0; x < 2 * width; x++)
5423 {
5424 float c0 = *(float*)(source0 + 4 * x);
5425 float c1 = *(float*)(source1 + 4 * x);
5426 float c2 = *(float*)(source2 + 4 * x);
5427 float c3 = *(float*)(source3 + 4 * x);
5428
5429 c0 = c0 + c1;
5430 c2 = c2 + c3;
5431 c0 = c0 + c2;
5432 c0 *= 1.0f / 4.0f;
5433
5434 *(float*)(source0 + 4 * x) = c0;
5435 }
5436
5437 source0 += pitch;
5438 source1 += pitch;
5439 source2 += pitch;
5440 source3 += pitch;
5441 }
5442 }
5443 else if(internal.depth == 8)
5444 {
5445 for(int y = 0; y < height; y++)
5446 {
5447 for(int x = 0; x < 2 * width; x++)
5448 {
5449 float c0 = *(float*)(source0 + 4 * x);
5450 float c1 = *(float*)(source1 + 4 * x);
5451 float c2 = *(float*)(source2 + 4 * x);
5452 float c3 = *(float*)(source3 + 4 * x);
5453 float c4 = *(float*)(source4 + 4 * x);
5454 float c5 = *(float*)(source5 + 4 * x);
5455 float c6 = *(float*)(source6 + 4 * x);
5456 float c7 = *(float*)(source7 + 4 * x);
5457
5458 c0 = c0 + c1;
5459 c2 = c2 + c3;
5460 c4 = c4 + c5;
5461 c6 = c6 + c7;
5462 c0 = c0 + c2;
5463 c4 = c4 + c6;
5464 c0 = c0 + c4;
5465 c0 *= 1.0f / 8.0f;
5466
5467 *(float*)(source0 + 4 * x) = c0;
5468 }
5469
5470 source0 += pitch;
5471 source1 += pitch;
5472 source2 += pitch;
5473 source3 += pitch;
5474 source4 += pitch;
5475 source5 += pitch;
5476 source6 += pitch;
5477 source7 += pitch;
5478 }
5479 }
5480 else if(internal.depth == 16)
5481 {
5482 for(int y = 0; y < height; y++)
5483 {
5484 for(int x = 0; x < 2 * width; x++)
5485 {
5486 float c0 = *(float*)(source0 + 4 * x);
5487 float c1 = *(float*)(source1 + 4 * x);
5488 float c2 = *(float*)(source2 + 4 * x);
5489 float c3 = *(float*)(source3 + 4 * x);
5490 float c4 = *(float*)(source4 + 4 * x);
5491 float c5 = *(float*)(source5 + 4 * x);
5492 float c6 = *(float*)(source6 + 4 * x);
5493 float c7 = *(float*)(source7 + 4 * x);
5494 float c8 = *(float*)(source8 + 4 * x);
5495 float c9 = *(float*)(source9 + 4 * x);
5496 float cA = *(float*)(sourceA + 4 * x);
5497 float cB = *(float*)(sourceB + 4 * x);
5498 float cC = *(float*)(sourceC + 4 * x);
5499 float cD = *(float*)(sourceD + 4 * x);
5500 float cE = *(float*)(sourceE + 4 * x);
5501 float cF = *(float*)(sourceF + 4 * x);
5502
5503 c0 = c0 + c1;
5504 c2 = c2 + c3;
5505 c4 = c4 + c5;
5506 c6 = c6 + c7;
5507 c8 = c8 + c9;
5508 cA = cA + cB;
5509 cC = cC + cD;
5510 cE = cE + cF;
5511 c0 = c0 + c2;
5512 c4 = c4 + c6;
5513 c8 = c8 + cA;
5514 cC = cC + cE;
5515 c0 = c0 + c4;
5516 c8 = c8 + cC;
5517 c0 = c0 + c8;
5518 c0 *= 1.0f / 16.0f;
5519
5520 *(float*)(source0 + 4 * x) = c0;
5521 }
5522
5523 source0 += pitch;
5524 source1 += pitch;
5525 source2 += pitch;
5526 source3 += pitch;
5527 source4 += pitch;
5528 source5 += pitch;
5529 source6 += pitch;
5530 source7 += pitch;
5531 source8 += pitch;
5532 source9 += pitch;
5533 sourceA += pitch;
5534 sourceB += pitch;
5535 sourceC += pitch;
5536 sourceD += pitch;
5537 sourceE += pitch;
5538 sourceF += pitch;
5539 }
5540 }
5541 else ASSERT(false);
5542 }
5543 }
5544 else if(internal.format == FORMAT_A32B32G32R32F)
5545 {
5546 if(CPUID::supportsSSE())
5547 {
5548 if(internal.depth == 2)
5549 {
5550 for(int y = 0; y < height; y++)
5551 {
5552 for(int x = 0; x < width; x++)
5553 {
5554 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5555 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5556
5557 c0 = _mm_add_ps(c0, c1);
5558 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
5559
5560 _mm_store_ps((float*)(source0 + 16 * x), c0);
5561 }
5562
5563 source0 += pitch;
5564 source1 += pitch;
5565 }
5566 }
5567 else if(internal.depth == 4)
5568 {
5569 for(int y = 0; y < height; y++)
5570 {
5571 for(int x = 0; x < width; x++)
5572 {
5573 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5574 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5575 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5576 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5577
5578 c0 = _mm_add_ps(c0, c1);
5579 c2 = _mm_add_ps(c2, c3);
5580 c0 = _mm_add_ps(c0, c2);
5581 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
5582
5583 _mm_store_ps((float*)(source0 + 16 * x), c0);
5584 }
5585
5586 source0 += pitch;
5587 source1 += pitch;
5588 source2 += pitch;
5589 source3 += pitch;
5590 }
5591 }
5592 else if(internal.depth == 8)
5593 {
5594 for(int y = 0; y < height; y++)
5595 {
5596 for(int x = 0; x < width; x++)
5597 {
5598 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5599 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5600 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5601 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5602 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5603 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5604 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5605 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
5606
5607 c0 = _mm_add_ps(c0, c1);
5608 c2 = _mm_add_ps(c2, c3);
5609 c4 = _mm_add_ps(c4, c5);
5610 c6 = _mm_add_ps(c6, c7);
5611 c0 = _mm_add_ps(c0, c2);
5612 c4 = _mm_add_ps(c4, c6);
5613 c0 = _mm_add_ps(c0, c4);
5614 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
5615
5616 _mm_store_ps((float*)(source0 + 16 * x), c0);
5617 }
5618
5619 source0 += pitch;
5620 source1 += pitch;
5621 source2 += pitch;
5622 source3 += pitch;
5623 source4 += pitch;
5624 source5 += pitch;
5625 source6 += pitch;
5626 source7 += pitch;
5627 }
5628 }
5629 else if(internal.depth == 16)
5630 {
5631 for(int y = 0; y < height; y++)
5632 {
5633 for(int x = 0; x < width; x++)
5634 {
5635 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5636 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5637 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5638 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5639 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5640 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5641 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5642 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
5643 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x));
5644 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x));
5645 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x));
5646 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x));
5647 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x));
5648 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x));
5649 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x));
5650 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x));
5651
5652 c0 = _mm_add_ps(c0, c1);
5653 c2 = _mm_add_ps(c2, c3);
5654 c4 = _mm_add_ps(c4, c5);
5655 c6 = _mm_add_ps(c6, c7);
5656 c8 = _mm_add_ps(c8, c9);
5657 cA = _mm_add_ps(cA, cB);
5658 cC = _mm_add_ps(cC, cD);
5659 cE = _mm_add_ps(cE, cF);
5660 c0 = _mm_add_ps(c0, c2);
5661 c4 = _mm_add_ps(c4, c6);
5662 c8 = _mm_add_ps(c8, cA);
5663 cC = _mm_add_ps(cC, cE);
5664 c0 = _mm_add_ps(c0, c4);
5665 c8 = _mm_add_ps(c8, cC);
5666 c0 = _mm_add_ps(c0, c8);
5667 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
5668
5669 _mm_store_ps((float*)(source0 + 16 * x), c0);
5670 }
5671
5672 source0 += pitch;
5673 source1 += pitch;
5674 source2 += pitch;
5675 source3 += pitch;
5676 source4 += pitch;
5677 source5 += pitch;
5678 source6 += pitch;
5679 source7 += pitch;
5680 source8 += pitch;
5681 source9 += pitch;
5682 sourceA += pitch;
5683 sourceB += pitch;
5684 sourceC += pitch;
5685 sourceD += pitch;
5686 sourceE += pitch;
5687 sourceF += pitch;
5688 }
5689 }
5690 else ASSERT(false);
5691 }
5692 else
5693 {
5694 if(internal.depth == 2)
5695 {
5696 for(int y = 0; y < height; y++)
5697 {
5698 for(int x = 0; x < 4 * width; x++)
5699 {
5700 float c0 = *(float*)(source0 + 4 * x);
5701 float c1 = *(float*)(source1 + 4 * x);
5702
5703 c0 = c0 + c1;
5704 c0 *= 1.0f / 2.0f;
5705
5706 *(float*)(source0 + 4 * x) = c0;
5707 }
5708
5709 source0 += pitch;
5710 source1 += pitch;
5711 }
5712 }
5713 else if(internal.depth == 4)
5714 {
5715 for(int y = 0; y < height; y++)
5716 {
5717 for(int x = 0; x < 4 * width; x++)
5718 {
5719 float c0 = *(float*)(source0 + 4 * x);
5720 float c1 = *(float*)(source1 + 4 * x);
5721 float c2 = *(float*)(source2 + 4 * x);
5722 float c3 = *(float*)(source3 + 4 * x);
5723
5724 c0 = c0 + c1;
5725 c2 = c2 + c3;
5726 c0 = c0 + c2;
5727 c0 *= 1.0f / 4.0f;
5728
5729 *(float*)(source0 + 4 * x) = c0;
5730 }
5731
5732 source0 += pitch;
5733 source1 += pitch;
5734 source2 += pitch;
5735 source3 += pitch;
5736 }
5737 }
5738 else if(internal.depth == 8)
5739 {
5740 for(int y = 0; y < height; y++)
5741 {
5742 for(int x = 0; x < 4 * width; x++)
5743 {
5744 float c0 = *(float*)(source0 + 4 * x);
5745 float c1 = *(float*)(source1 + 4 * x);
5746 float c2 = *(float*)(source2 + 4 * x);
5747 float c3 = *(float*)(source3 + 4 * x);
5748 float c4 = *(float*)(source4 + 4 * x);
5749 float c5 = *(float*)(source5 + 4 * x);
5750 float c6 = *(float*)(source6 + 4 * x);
5751 float c7 = *(float*)(source7 + 4 * x);
5752
5753 c0 = c0 + c1;
5754 c2 = c2 + c3;
5755 c4 = c4 + c5;
5756 c6 = c6 + c7;
5757 c0 = c0 + c2;
5758 c4 = c4 + c6;
5759 c0 = c0 + c4;
5760 c0 *= 1.0f / 8.0f;
5761
5762 *(float*)(source0 + 4 * x) = c0;
5763 }
5764
5765 source0 += pitch;
5766 source1 += pitch;
5767 source2 += pitch;
5768 source3 += pitch;
5769 source4 += pitch;
5770 source5 += pitch;
5771 source6 += pitch;
5772 source7 += pitch;
5773 }
5774 }
5775 else if(internal.depth == 16)
5776 {
5777 for(int y = 0; y < height; y++)
5778 {
5779 for(int x = 0; x < 4 * width; x++)
5780 {
5781 float c0 = *(float*)(source0 + 4 * x);
5782 float c1 = *(float*)(source1 + 4 * x);
5783 float c2 = *(float*)(source2 + 4 * x);
5784 float c3 = *(float*)(source3 + 4 * x);
5785 float c4 = *(float*)(source4 + 4 * x);
5786 float c5 = *(float*)(source5 + 4 * x);
5787 float c6 = *(float*)(source6 + 4 * x);
5788 float c7 = *(float*)(source7 + 4 * x);
5789 float c8 = *(float*)(source8 + 4 * x);
5790 float c9 = *(float*)(source9 + 4 * x);
5791 float cA = *(float*)(sourceA + 4 * x);
5792 float cB = *(float*)(sourceB + 4 * x);
5793 float cC = *(float*)(sourceC + 4 * x);
5794 float cD = *(float*)(sourceD + 4 * x);
5795 float cE = *(float*)(sourceE + 4 * x);
5796 float cF = *(float*)(sourceF + 4 * x);
5797
5798 c0 = c0 + c1;
5799 c2 = c2 + c3;
5800 c4 = c4 + c5;
5801 c6 = c6 + c7;
5802 c8 = c8 + c9;
5803 cA = cA + cB;
5804 cC = cC + cD;
5805 cE = cE + cF;
5806 c0 = c0 + c2;
5807 c4 = c4 + c6;
5808 c8 = c8 + cA;
5809 cC = cC + cE;
5810 c0 = c0 + c4;
5811 c8 = c8 + cC;
5812 c0 = c0 + c8;
5813 c0 *= 1.0f / 16.0f;
5814
5815 *(float*)(source0 + 4 * x) = c0;
5816 }
5817
5818 source0 += pitch;
5819 source1 += pitch;
5820 source2 += pitch;
5821 source3 += pitch;
5822 source4 += pitch;
5823 source5 += pitch;
5824 source6 += pitch;
5825 source7 += pitch;
5826 source8 += pitch;
5827 source9 += pitch;
5828 sourceA += pitch;
5829 sourceB += pitch;
5830 sourceC += pitch;
5831 sourceD += pitch;
5832 sourceE += pitch;
5833 sourceF += pitch;
5834 }
5835 }
5836 else ASSERT(false);
5837 }
5838 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005839 else if(internal.format == FORMAT_R5G6B5)
5840 {
5841 if(CPUID::supportsSSE2() && (width % 8) == 0)
5842 {
5843 if(internal.depth == 2)
5844 {
5845 for(int y = 0; y < height; y++)
5846 {
5847 for(int x = 0; x < width; x += 8)
5848 {
5849 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5850 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5851
5852 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5853 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5854 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5855 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5856 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5857 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5858
5859 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5860 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5861 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5862 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5863 c0 = _mm_or_si128(c0, c1);
5864
5865 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5866 }
5867
5868 source0 += pitch;
5869 source1 += pitch;
5870 }
5871 }
5872 else if(internal.depth == 4)
5873 {
5874 for(int y = 0; y < height; y++)
5875 {
5876 for(int x = 0; x < width; x += 8)
5877 {
5878 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5879 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5880 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5881 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5882
5883 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5884 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5885 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5886 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5887 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5888 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5889 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5890 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5891 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5892 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5893
5894 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5895 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5896 c0 = _mm_avg_epu8(c0, c2);
5897 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5898 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5899 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5900 c1 = _mm_avg_epu16(c1, c3);
5901 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5902 c0 = _mm_or_si128(c0, c1);
5903
5904 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5905 }
5906
5907 source0 += pitch;
5908 source1 += pitch;
5909 source2 += pitch;
5910 source3 += pitch;
5911 }
5912 }
5913 else if(internal.depth == 8)
5914 {
5915 for(int y = 0; y < height; y++)
5916 {
5917 for(int x = 0; x < width; x += 8)
5918 {
5919 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5920 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5921 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5922 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5923 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5924 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5925 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5926 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
5927
5928 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5929 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5930 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5931 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5932 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5933 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5934 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5935 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5936 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5937 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5938 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5939 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5940 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5941 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5942 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5943 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5944 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5945 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
5946
5947 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5948 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5949 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5950 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5951 c0 = _mm_avg_epu8(c0, c2);
5952 c4 = _mm_avg_epu8(c4, c6);
5953 c0 = _mm_avg_epu8(c0, c4);
5954 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5955 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5956 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5957 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5958 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5959 c1 = _mm_avg_epu16(c1, c3);
5960 c5 = _mm_avg_epu16(c5, c7);
5961 c1 = _mm_avg_epu16(c1, c5);
5962 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5963 c0 = _mm_or_si128(c0, c1);
5964
5965 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5966 }
5967
5968 source0 += pitch;
5969 source1 += pitch;
5970 source2 += pitch;
5971 source3 += pitch;
5972 source4 += pitch;
5973 source5 += pitch;
5974 source6 += pitch;
5975 source7 += pitch;
5976 }
5977 }
5978 else if(internal.depth == 16)
5979 {
5980 for(int y = 0; y < height; y++)
5981 {
5982 for(int x = 0; x < width; x += 8)
5983 {
5984 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5985 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5986 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5987 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5988 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5989 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5990 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5991 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
5992 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x));
5993 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x));
5994 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x));
5995 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x));
5996 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x));
5997 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x));
5998 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x));
5999 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x));
6000
6001 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
6002 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
6003 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
6004 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
6005 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
6006 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
6007 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
6008 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
6009 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
6010 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
6011 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
6012 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
6013 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
6014 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
6015 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
6016 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
6017 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
6018 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
6019 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b));
6020 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_));
6021 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b));
6022 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_));
6023 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b));
6024 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_));
6025 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b));
6026 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_));
6027 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b));
6028 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_));
6029 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b));
6030 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_));
6031 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b));
6032 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_));
6033 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b));
6034 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_));
6035
6036 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
6037 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
6038 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
6039 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
6040 c8 = _mm_avg_epu8(c8_r_b, c9_r_b);
6041 cA = _mm_avg_epu8(cA_r_b, cB_r_b);
6042 cC = _mm_avg_epu8(cC_r_b, cD_r_b);
6043 cE = _mm_avg_epu8(cE_r_b, cF_r_b);
6044 c0 = _mm_avg_epu8(c0, c2);
6045 c4 = _mm_avg_epu8(c4, c6);
6046 c8 = _mm_avg_epu8(c8, cA);
6047 cC = _mm_avg_epu8(cC, cE);
6048 c0 = _mm_avg_epu8(c0, c4);
6049 c8 = _mm_avg_epu8(c8, cC);
6050 c0 = _mm_avg_epu8(c0, c8);
6051 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
6052 c1 = _mm_avg_epu16(c0__g_, c1__g_);
6053 c3 = _mm_avg_epu16(c2__g_, c3__g_);
6054 c5 = _mm_avg_epu16(c4__g_, c5__g_);
6055 c7 = _mm_avg_epu16(c6__g_, c7__g_);
6056 c9 = _mm_avg_epu16(c8__g_, c9__g_);
6057 cB = _mm_avg_epu16(cA__g_, cB__g_);
6058 cD = _mm_avg_epu16(cC__g_, cD__g_);
6059 cF = _mm_avg_epu16(cE__g_, cF__g_);
6060 c1 = _mm_avg_epu8(c1, c3);
6061 c5 = _mm_avg_epu8(c5, c7);
6062 c9 = _mm_avg_epu8(c9, cB);
6063 cD = _mm_avg_epu8(cD, cF);
6064 c1 = _mm_avg_epu8(c1, c5);
6065 c9 = _mm_avg_epu8(c9, cD);
6066 c1 = _mm_avg_epu8(c1, c9);
6067 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
6068 c0 = _mm_or_si128(c0, c1);
6069
6070 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
6071 }
6072
6073 source0 += pitch;
6074 source1 += pitch;
6075 source2 += pitch;
6076 source3 += pitch;
6077 source4 += pitch;
6078 source5 += pitch;
6079 source6 += pitch;
6080 source7 += pitch;
6081 source8 += pitch;
6082 source9 += pitch;
6083 sourceA += pitch;
6084 sourceB += pitch;
6085 sourceC += pitch;
6086 sourceD += pitch;
6087 sourceE += pitch;
6088 sourceF += pitch;
6089 }
6090 }
6091 else ASSERT(false);
6092 }
6093 else
6094 {
6095 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821))
6096
6097 if(internal.depth == 2)
6098 {
6099 for(int y = 0; y < height; y++)
6100 {
6101 for(int x = 0; x < width; x++)
6102 {
6103 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6104 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6105
6106 c0 = AVERAGE(c0, c1);
6107
6108 *(unsigned short*)(source0 + 2 * x) = c0;
6109 }
6110
6111 source0 += pitch;
6112 source1 += pitch;
6113 }
6114 }
6115 else if(internal.depth == 4)
6116 {
6117 for(int y = 0; y < height; y++)
6118 {
6119 for(int x = 0; x < width; x++)
6120 {
6121 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6122 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6123 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
6124 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
6125
6126 c0 = AVERAGE(c0, c1);
6127 c2 = AVERAGE(c2, c3);
6128 c0 = AVERAGE(c0, c2);
6129
6130 *(unsigned short*)(source0 + 2 * x) = c0;
6131 }
6132
6133 source0 += pitch;
6134 source1 += pitch;
6135 source2 += pitch;
6136 source3 += pitch;
6137 }
6138 }
6139 else if(internal.depth == 8)
6140 {
6141 for(int y = 0; y < height; y++)
6142 {
6143 for(int x = 0; x < width; x++)
6144 {
6145 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6146 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6147 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
6148 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
6149 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
6150 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
6151 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
6152 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
6153
6154 c0 = AVERAGE(c0, c1);
6155 c2 = AVERAGE(c2, c3);
6156 c4 = AVERAGE(c4, c5);
6157 c6 = AVERAGE(c6, c7);
6158 c0 = AVERAGE(c0, c2);
6159 c4 = AVERAGE(c4, c6);
6160 c0 = AVERAGE(c0, c4);
6161
6162 *(unsigned short*)(source0 + 2 * x) = c0;
6163 }
6164
6165 source0 += pitch;
6166 source1 += pitch;
6167 source2 += pitch;
6168 source3 += pitch;
6169 source4 += pitch;
6170 source5 += pitch;
6171 source6 += pitch;
6172 source7 += pitch;
6173 }
6174 }
6175 else if(internal.depth == 16)
6176 {
6177 for(int y = 0; y < height; y++)
6178 {
6179 for(int x = 0; x < width; x++)
6180 {
6181 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6182 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6183 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
6184 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
6185 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
6186 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
6187 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
6188 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
6189 unsigned short c8 = *(unsigned short*)(source8 + 2 * x);
6190 unsigned short c9 = *(unsigned short*)(source9 + 2 * x);
6191 unsigned short cA = *(unsigned short*)(sourceA + 2 * x);
6192 unsigned short cB = *(unsigned short*)(sourceB + 2 * x);
6193 unsigned short cC = *(unsigned short*)(sourceC + 2 * x);
6194 unsigned short cD = *(unsigned short*)(sourceD + 2 * x);
6195 unsigned short cE = *(unsigned short*)(sourceE + 2 * x);
6196 unsigned short cF = *(unsigned short*)(sourceF + 2 * x);
6197
6198 c0 = AVERAGE(c0, c1);
6199 c2 = AVERAGE(c2, c3);
6200 c4 = AVERAGE(c4, c5);
6201 c6 = AVERAGE(c6, c7);
6202 c8 = AVERAGE(c8, c9);
6203 cA = AVERAGE(cA, cB);
6204 cC = AVERAGE(cC, cD);
6205 cE = AVERAGE(cE, cF);
6206 c0 = AVERAGE(c0, c2);
6207 c4 = AVERAGE(c4, c6);
6208 c8 = AVERAGE(c8, cA);
6209 cC = AVERAGE(cC, cE);
6210 c0 = AVERAGE(c0, c4);
6211 c8 = AVERAGE(c8, cC);
6212 c0 = AVERAGE(c0, c8);
6213
6214 *(unsigned short*)(source0 + 2 * x) = c0;
6215 }
6216
6217 source0 += pitch;
6218 source1 += pitch;
6219 source2 += pitch;
6220 source3 += pitch;
6221 source4 += pitch;
6222 source5 += pitch;
6223 source6 += pitch;
6224 source7 += pitch;
6225 source8 += pitch;
6226 source9 += pitch;
6227 sourceA += pitch;
6228 sourceB += pitch;
6229 sourceC += pitch;
6230 sourceD += pitch;
6231 sourceE += pitch;
6232 sourceF += pitch;
6233 }
6234 }
6235 else ASSERT(false);
6236
6237 #undef AVERAGE
6238 }
6239 }
John Bauman89401822014-05-06 15:04:28 -04006240 else
6241 {
6242 // UNIMPLEMENTED();
6243 }
6244 }
6245}