blob: af2c7c99bb59f9df5136ebec5d8223b8712a1a5e [file] [log] [blame]
John Bauman89401822014-05-06 15:04:28 -04001// SwiftShader Software Renderer
2//
John Bauman66b8ab22014-05-06 15:57:45 -04003// Copyright(c) 2005-2013 TransGaming Inc.
John Bauman89401822014-05-06 15:04:28 -04004//
5// All rights reserved. No part of this software may be copied, distributed, transmitted,
6// transcribed, stored in a retrieval system, translated into any human or computer
7// language by any means, or disclosed to third parties without the explicit written
8// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9// or implied, including but not limited to any patent rights, are granted to you.
10//
11
12#include "Surface.hpp"
13
14#include "Color.hpp"
15#include "Context.hpp"
Alexis Hetu0de50d42015-09-09 13:56:41 -040016#include "ETC_Decoder.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040017#include "Renderer.hpp"
John Bauman89401822014-05-06 15:04:28 -040018#include "Common/Half.hpp"
19#include "Common/Memory.hpp"
20#include "Common/CPUID.hpp"
21#include "Common/Resource.hpp"
22#include "Common/Debug.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040023#include "Reactor/Reactor.hpp"
John Bauman89401822014-05-06 15:04:28 -040024
25#include <xmmintrin.h>
26#include <emmintrin.h>
27
28#undef min
29#undef max
30
31namespace sw
32{
33 extern bool quadLayoutEnabled;
34 extern bool complementaryDepthBuffer;
35 extern TranscendentalPrecision logPrecision;
36
37 unsigned int *Surface::palette = 0;
38 unsigned int Surface::paletteID = 0;
39
John Bauman19bac1e2014-05-06 15:23:49 -040040 void Rect::clip(int minX, int minY, int maxX, int maxY)
41 {
Nicolas Capens22658242014-11-29 00:31:41 -050042 x0 = clamp(x0, minX, maxX);
43 y0 = clamp(y0, minY, maxY);
44 x1 = clamp(x1, minX, maxX);
45 y1 = clamp(y1, minY, maxY);
John Bauman19bac1e2014-05-06 15:23:49 -040046 }
47
John Bauman89401822014-05-06 15:04:28 -040048 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color)
49 {
50 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
51
52 write(element, color);
53 }
54
55 void Surface::Buffer::write(int x, int y, const Color<float> &color)
56 {
57 void *element = (unsigned char*)buffer + x * bytes + y * pitchB;
58
59 write(element, color);
60 }
61
62 inline void Surface::Buffer::write(void *element, const Color<float> &color)
63 {
64 switch(format)
65 {
66 case FORMAT_A8:
67 *(unsigned char*)element = unorm<8>(color.a);
68 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040069 case FORMAT_R8I_SNORM:
70 *(char*)element = snorm<8>(color.r);
71 break;
John Bauman89401822014-05-06 15:04:28 -040072 case FORMAT_R8:
73 *(unsigned char*)element = unorm<8>(color.r);
74 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040075 case FORMAT_R8I:
76 *(char*)element = scast<8>(color.r);
77 break;
78 case FORMAT_R8UI:
79 *(unsigned char*)element = ucast<8>(color.r);
80 break;
81 case FORMAT_R16I:
82 *(short*)element = scast<16>(color.r);
83 break;
84 case FORMAT_R16UI:
85 *(unsigned short*)element = ucast<16>(color.r);
86 break;
87 case FORMAT_R32I:
88 *(int*)element = static_cast<int>(color.r);
89 break;
90 case FORMAT_R32UI:
91 *(unsigned int*)element = static_cast<unsigned int>(color.r);
92 break;
John Bauman89401822014-05-06 15:04:28 -040093 case FORMAT_R3G3B2:
94 *(unsigned char*)element = (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
95 break;
96 case FORMAT_A8R3G3B2:
97 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
98 break;
99 case FORMAT_X4R4G4B4:
100 *(unsigned short*)element = 0xF000 | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
101 break;
102 case FORMAT_A4R4G4B4:
103 *(unsigned short*)element = (unorm<4>(color.a) << 12) | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
104 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400105 case FORMAT_R4G4B4A4:
106 *(unsigned short*)element = (unorm<4>(color.r) << 12) | (unorm<4>(color.g) << 8) | (unorm<4>(color.b) << 4) | (unorm<4>(color.a) << 0);
107 break;
John Bauman89401822014-05-06 15:04:28 -0400108 case FORMAT_R5G6B5:
109 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<6>(color.g) << 5) | (unorm<5>(color.b) << 0);
110 break;
111 case FORMAT_A1R5G5B5:
112 *(unsigned short*)element = (unorm<1>(color.a) << 15) | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
113 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400114 case FORMAT_R5G5B5A1:
115 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<5>(color.g) << 6) | (unorm<5>(color.b) << 1) | (unorm<5>(color.a) << 0);
116 break;
John Bauman89401822014-05-06 15:04:28 -0400117 case FORMAT_X1R5G5B5:
118 *(unsigned short*)element = 0x8000 | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
119 break;
120 case FORMAT_A8R8G8B8:
121 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
122 break;
123 case FORMAT_X8R8G8B8:
124 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
125 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400126 case FORMAT_A8B8G8R8I_SNORM:
127 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(color.a)) << 24) |
128 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
129 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
130 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
131 break;
John Bauman89401822014-05-06 15:04:28 -0400132 case FORMAT_A8B8G8R8:
133 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
134 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400135 case FORMAT_A8B8G8R8I:
136 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(color.a)) << 24) |
137 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
138 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
139 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
140 break;
141 case FORMAT_A8B8G8R8UI:
142 *(unsigned int*)element = (ucast<8>(color.a) << 24) | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
143 break;
144 case FORMAT_X8B8G8R8I_SNORM:
145 *(unsigned int*)element = 0x7F000000 |
146 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
147 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
148 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
149 break;
John Bauman89401822014-05-06 15:04:28 -0400150 case FORMAT_X8B8G8R8:
151 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
152 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400153 case FORMAT_X8B8G8R8I:
154 *(unsigned int*)element = 0x7F000000 |
155 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
156 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
157 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
158 case FORMAT_X8B8G8R8UI:
159 *(unsigned int*)element = 0xFF000000 | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
160 break;
John Bauman89401822014-05-06 15:04:28 -0400161 case FORMAT_A2R10G10B10:
162 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.r) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.b) << 0);
163 break;
164 case FORMAT_A2B10G10R10:
165 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.b) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.r) << 0);
166 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400167 case FORMAT_G8R8I_SNORM:
168 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(color.g)) << 8) |
169 (static_cast<unsigned short>(snorm<8>(color.r)) << 0);
170 break;
John Bauman89401822014-05-06 15:04:28 -0400171 case FORMAT_G8R8:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400172 *(unsigned short*)element = (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
173 break;
174 case FORMAT_G8R8I:
175 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(color.g)) << 8) |
176 (static_cast<unsigned short>(scast<8>(color.r)) << 0);
177 break;
178 case FORMAT_G8R8UI:
179 *(unsigned short*)element = (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400180 break;
181 case FORMAT_G16R16:
182 *(unsigned int*)element = (unorm<16>(color.g) << 16) | (unorm<16>(color.r) << 0);
183 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400184 case FORMAT_G16R16I:
185 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(color.g)) << 16) |
186 (static_cast<unsigned int>(scast<16>(color.r)) << 0);
187 break;
188 case FORMAT_G16R16UI:
189 *(unsigned int*)element = (ucast<16>(color.g) << 16) | (ucast<16>(color.r) << 0);
190 break;
191 case FORMAT_G32R32I:
192 case FORMAT_G32R32UI:
193 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
194 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
195 break;
John Bauman89401822014-05-06 15:04:28 -0400196 case FORMAT_A16B16G16R16:
197 ((unsigned short*)element)[0] = unorm<16>(color.r);
198 ((unsigned short*)element)[1] = unorm<16>(color.g);
199 ((unsigned short*)element)[2] = unorm<16>(color.b);
200 ((unsigned short*)element)[3] = unorm<16>(color.a);
201 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400202 case FORMAT_A16B16G16R16I:
203 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
204 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
205 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
206 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(color.a));
207 break;
208 case FORMAT_A16B16G16R16UI:
209 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
210 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
211 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
212 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(color.a));
213 break;
214 case FORMAT_X16B16G16R16I:
215 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
216 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
217 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
218 break;
219 case FORMAT_X16B16G16R16UI:
220 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
221 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
222 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
223 break;
224 case FORMAT_A32B32G32R32I:
225 case FORMAT_A32B32G32R32UI:
226 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
227 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
228 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
229 ((unsigned int*)element)[3] = static_cast<unsigned int>(color.a);
230 break;
231 case FORMAT_X32B32G32R32I:
232 case FORMAT_X32B32G32R32UI:
233 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
234 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
235 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
236 break;
John Bauman89401822014-05-06 15:04:28 -0400237 case FORMAT_V8U8:
238 *(unsigned short*)element = (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
239 break;
240 case FORMAT_L6V5U5:
241 *(unsigned short*)element = (unorm<6>(color.b) << 10) | (snorm<5>(color.g) << 5) | (snorm<5>(color.r) << 0);
242 break;
243 case FORMAT_Q8W8V8U8:
244 *(unsigned int*)element = (snorm<8>(color.a) << 24) | (snorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
245 break;
246 case FORMAT_X8L8V8U8:
247 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
248 break;
249 case FORMAT_V16U16:
250 *(unsigned int*)element = (snorm<16>(color.g) << 16) | (snorm<16>(color.r) << 0);
251 break;
252 case FORMAT_A2W10V10U10:
253 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (snorm<10>(color.b) << 20) | (snorm<10>(color.g) << 10) | (snorm<10>(color.r) << 0);
254 break;
255 case FORMAT_A16W16V16U16:
256 ((unsigned short*)element)[0] = snorm<16>(color.r);
257 ((unsigned short*)element)[1] = snorm<16>(color.g);
258 ((unsigned short*)element)[2] = snorm<16>(color.b);
259 ((unsigned short*)element)[3] = unorm<16>(color.a);
260 break;
261 case FORMAT_Q16W16V16U16:
262 ((unsigned short*)element)[0] = snorm<16>(color.r);
263 ((unsigned short*)element)[1] = snorm<16>(color.g);
264 ((unsigned short*)element)[2] = snorm<16>(color.b);
265 ((unsigned short*)element)[3] = snorm<16>(color.a);
266 break;
267 case FORMAT_R8G8B8:
268 ((unsigned char*)element)[0] = unorm<8>(color.b);
269 ((unsigned char*)element)[1] = unorm<8>(color.g);
270 ((unsigned char*)element)[2] = unorm<8>(color.r);
271 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400272 case FORMAT_B8G8R8:
273 ((unsigned char*)element)[0] = unorm<8>(color.r);
274 ((unsigned char*)element)[1] = unorm<8>(color.g);
275 ((unsigned char*)element)[2] = unorm<8>(color.b);
276 break;
John Bauman89401822014-05-06 15:04:28 -0400277 case FORMAT_R16F:
278 *(half*)element = (half)color.r;
279 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400280 case FORMAT_A16F:
281 *(half*)element = (half)color.a;
282 break;
John Bauman89401822014-05-06 15:04:28 -0400283 case FORMAT_G16R16F:
284 ((half*)element)[0] = (half)color.r;
285 ((half*)element)[1] = (half)color.g;
286 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400287 case FORMAT_B16G16R16F:
288 ((half*)element)[0] = (half)color.r;
289 ((half*)element)[1] = (half)color.g;
290 ((half*)element)[2] = (half)color.b;
291 break;
John Bauman89401822014-05-06 15:04:28 -0400292 case FORMAT_A16B16G16R16F:
293 ((half*)element)[0] = (half)color.r;
294 ((half*)element)[1] = (half)color.g;
295 ((half*)element)[2] = (half)color.b;
296 ((half*)element)[3] = (half)color.a;
297 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400298 case FORMAT_A32F:
299 *(float*)element = color.a;
300 break;
John Bauman89401822014-05-06 15:04:28 -0400301 case FORMAT_R32F:
302 *(float*)element = color.r;
303 break;
304 case FORMAT_G32R32F:
305 ((float*)element)[0] = color.r;
306 ((float*)element)[1] = color.g;
307 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400308 case FORMAT_B32G32R32F:
309 ((float*)element)[0] = color.r;
310 ((float*)element)[1] = color.g;
311 ((float*)element)[2] = color.b;
312 break;
John Bauman89401822014-05-06 15:04:28 -0400313 case FORMAT_A32B32G32R32F:
314 ((float*)element)[0] = color.r;
315 ((float*)element)[1] = color.g;
316 ((float*)element)[2] = color.b;
317 ((float*)element)[3] = color.a;
318 break;
319 case FORMAT_D32F:
320 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400321 case FORMAT_D32FS8_TEXTURE:
322 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400323 *((float*)element) = color.r;
324 break;
325 case FORMAT_D32F_COMPLEMENTARY:
326 *((float*)element) = 1 - color.r;
327 break;
328 case FORMAT_S8:
329 *((unsigned char*)element) = unorm<8>(color.r);
330 break;
331 case FORMAT_L8:
332 *(unsigned char*)element = unorm<8>(color.r);
333 break;
334 case FORMAT_A4L4:
335 *(unsigned char*)element = (unorm<4>(color.a) << 4) | (unorm<4>(color.r) << 0);
336 break;
337 case FORMAT_L16:
338 *(unsigned short*)element = unorm<16>(color.r);
339 break;
340 case FORMAT_A8L8:
341 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<8>(color.r) << 0);
342 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400343 case FORMAT_L16F:
344 *(half*)element = (half)color.r;
345 break;
346 case FORMAT_A16L16F:
347 ((half*)element)[0] = (half)color.r;
348 ((half*)element)[1] = (half)color.a;
349 break;
350 case FORMAT_L32F:
351 *(float*)element = color.r;
352 break;
353 case FORMAT_A32L32F:
354 ((float*)element)[0] = color.r;
355 ((float*)element)[1] = color.a;
356 break;
John Bauman89401822014-05-06 15:04:28 -0400357 default:
358 ASSERT(false);
359 }
360 }
361
362 Color<float> Surface::Buffer::read(int x, int y, int z) const
363 {
364 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
365
366 return read(element);
367 }
368
369 Color<float> Surface::Buffer::read(int x, int y) const
370 {
371 void *element = (unsigned char*)buffer + x * bytes + y * pitchB;
372
373 return read(element);
374 }
375
376 inline Color<float> Surface::Buffer::read(void *element) const
377 {
Nicolas Capens3f439242015-06-09 16:33:50 -0400378 float r = 0.0f;
379 float g = 0.0f;
380 float b = 0.0f;
381 float a = 1.0f;
John Bauman89401822014-05-06 15:04:28 -0400382
383 switch(format)
384 {
385 case FORMAT_P8:
386 {
387 ASSERT(palette);
388
389 unsigned int abgr = palette[*(unsigned char*)element];
390
391 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
392 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
393 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
394 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
395 }
396 break;
397 case FORMAT_A8P8:
398 {
399 ASSERT(palette);
400
401 unsigned int bgr = palette[((unsigned char*)element)[0]];
402
403 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF);
404 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00);
405 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000);
406 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
407 }
408 break;
409 case FORMAT_A8:
410 r = 0;
411 g = 0;
412 b = 0;
413 a = *(unsigned char*)element * (1.0f / 0xFF);
414 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400415 case FORMAT_R8I_SNORM:
416 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f);
417 break;
John Bauman89401822014-05-06 15:04:28 -0400418 case FORMAT_R8:
419 r = *(unsigned char*)element * (1.0f / 0xFF);
420 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400421 case FORMAT_R8I:
422 r = *(signed char*)element;
423 break;
424 case FORMAT_R8UI:
425 r = *(unsigned char*)element;
426 break;
John Bauman89401822014-05-06 15:04:28 -0400427 case FORMAT_R3G3B2:
428 {
429 unsigned char rgb = *(unsigned char*)element;
430
431 r = (rgb & 0xE0) * (1.0f / 0xE0);
432 g = (rgb & 0x1C) * (1.0f / 0x1C);
433 b = (rgb & 0x03) * (1.0f / 0x03);
434 }
435 break;
436 case FORMAT_A8R3G3B2:
437 {
438 unsigned short argb = *(unsigned short*)element;
439
440 a = (argb & 0xFF00) * (1.0f / 0xFF00);
441 r = (argb & 0x00E0) * (1.0f / 0x00E0);
442 g = (argb & 0x001C) * (1.0f / 0x001C);
443 b = (argb & 0x0003) * (1.0f / 0x0003);
444 }
445 break;
446 case FORMAT_X4R4G4B4:
447 {
448 unsigned short rgb = *(unsigned short*)element;
449
450 r = (rgb & 0x0F00) * (1.0f / 0x0F00);
451 g = (rgb & 0x00F0) * (1.0f / 0x00F0);
452 b = (rgb & 0x000F) * (1.0f / 0x000F);
453 }
454 break;
455 case FORMAT_A4R4G4B4:
456 {
457 unsigned short argb = *(unsigned short*)element;
458
459 a = (argb & 0xF000) * (1.0f / 0xF000);
460 r = (argb & 0x0F00) * (1.0f / 0x0F00);
461 g = (argb & 0x00F0) * (1.0f / 0x00F0);
462 b = (argb & 0x000F) * (1.0f / 0x000F);
463 }
464 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400465 case FORMAT_R4G4B4A4:
466 {
467 unsigned short rgba = *(unsigned short*)element;
468
469 r = (rgba & 0xF000) * (1.0f / 0xF000);
470 g = (rgba & 0x0F00) * (1.0f / 0x0F00);
471 b = (rgba & 0x00F0) * (1.0f / 0x00F0);
472 a = (rgba & 0x000F) * (1.0f / 0x000F);
473 }
474 break;
John Bauman89401822014-05-06 15:04:28 -0400475 case FORMAT_R5G6B5:
476 {
477 unsigned short rgb = *(unsigned short*)element;
478
479 r = (rgb & 0xF800) * (1.0f / 0xF800);
480 g = (rgb & 0x07E0) * (1.0f / 0x07E0);
481 b = (rgb & 0x001F) * (1.0f / 0x001F);
482 }
483 break;
484 case FORMAT_A1R5G5B5:
485 {
486 unsigned short argb = *(unsigned short*)element;
487
488 a = (argb & 0x8000) * (1.0f / 0x8000);
489 r = (argb & 0x7C00) * (1.0f / 0x7C00);
490 g = (argb & 0x03E0) * (1.0f / 0x03E0);
491 b = (argb & 0x001F) * (1.0f / 0x001F);
492 }
493 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400494 case FORMAT_R5G5B5A1:
495 {
496 unsigned short rgba = *(unsigned short*)element;
497
498 r = (rgba & 0xF800) * (1.0f / 0xF800);
499 g = (rgba & 0x07C0) * (1.0f / 0x07C0);
500 b = (rgba & 0x003E) * (1.0f / 0x003E);
501 a = (rgba & 0x0001) * (1.0f / 0x0001);
502 }
503 break;
John Bauman89401822014-05-06 15:04:28 -0400504 case FORMAT_X1R5G5B5:
505 {
506 unsigned short xrgb = *(unsigned short*)element;
507
508 r = (xrgb & 0x7C00) * (1.0f / 0x7C00);
509 g = (xrgb & 0x03E0) * (1.0f / 0x03E0);
510 b = (xrgb & 0x001F) * (1.0f / 0x001F);
511 }
512 break;
513 case FORMAT_A8R8G8B8:
514 {
515 unsigned int argb = *(unsigned int*)element;
516
517 a = (argb & 0xFF000000) * (1.0f / 0xFF000000);
518 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000);
519 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00);
520 b = (argb & 0x000000FF) * (1.0f / 0x000000FF);
521 }
522 break;
523 case FORMAT_X8R8G8B8:
524 {
525 unsigned int xrgb = *(unsigned int*)element;
526
527 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000);
528 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00);
529 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF);
530 }
531 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400532 case FORMAT_A8B8G8R8I_SNORM:
533 {
534 signed char* abgr = (signed char*)element;
535
536 r = max(abgr[0] * (1.0f / 0x7F), -1.0f);
537 g = max(abgr[1] * (1.0f / 0x7F), -1.0f);
538 b = max(abgr[2] * (1.0f / 0x7F), -1.0f);
539 a = max(abgr[3] * (1.0f / 0x7F), -1.0f);
540 }
541 break;
John Bauman89401822014-05-06 15:04:28 -0400542 case FORMAT_A8B8G8R8:
543 {
544 unsigned int abgr = *(unsigned int*)element;
545
546 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
547 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
548 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
549 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
550 }
551 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400552 case FORMAT_A8B8G8R8I:
553 {
554 signed char* abgr = (signed char*)element;
555
556 r = abgr[0];
557 g = abgr[1];
558 b = abgr[2];
559 a = abgr[3];
560 }
561 break;
562 case FORMAT_A8B8G8R8UI:
563 {
564 unsigned char* abgr = (unsigned char*)element;
565
566 r = abgr[0];
567 g = abgr[1];
568 b = abgr[2];
569 a = abgr[3];
570 }
571 break;
572 case FORMAT_X8B8G8R8I_SNORM:
573 {
574 signed char* bgr = (signed char*)element;
575
576 r = max(bgr[0] * (1.0f / 0x7F), -1.0f);
577 g = max(bgr[1] * (1.0f / 0x7F), -1.0f);
578 b = max(bgr[2] * (1.0f / 0x7F), -1.0f);
579 }
580 break;
John Bauman89401822014-05-06 15:04:28 -0400581 case FORMAT_X8B8G8R8:
582 {
583 unsigned int xbgr = *(unsigned int*)element;
584
585 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000);
586 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00);
587 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF);
588 }
589 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400590 case FORMAT_X8B8G8R8I:
591 {
592 signed char* bgr = (signed char*)element;
593
594 r = bgr[0];
595 g = bgr[1];
596 b = bgr[2];
597 }
598 break;
599 case FORMAT_X8B8G8R8UI:
600 {
601 unsigned char* bgr = (unsigned char*)element;
602
603 r = bgr[0];
604 g = bgr[1];
605 b = bgr[2];
606 }
607 break;
608 case FORMAT_G8R8I_SNORM:
609 {
610 signed char* gr = (signed char*)element;
611
612 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00);
613 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF);
614 }
615 break;
John Bauman89401822014-05-06 15:04:28 -0400616 case FORMAT_G8R8:
617 {
618 unsigned short gr = *(unsigned short*)element;
619
620 g = (gr & 0xFF00) * (1.0f / 0xFF00);
621 r = (gr & 0x00FF) * (1.0f / 0x00FF);
622 }
623 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400624 case FORMAT_G8R8I:
625 {
626 signed char* gr = (signed char*)element;
627
628 r = gr[0];
629 g = gr[1];
630 }
631 break;
632 case FORMAT_G8R8UI:
633 {
634 unsigned char* gr = (unsigned char*)element;
635
636 r = gr[0];
637 g = gr[1];
638 }
639 break;
640 case FORMAT_R16I:
641 r = *((short*)element);
642 break;
643 case FORMAT_R16UI:
644 r = *((unsigned short*)element);
645 break;
646 case FORMAT_G16R16I:
647 {
648 short* gr = (short*)element;
649
650 r = gr[0];
651 g = gr[1];
652 }
653 break;
John Bauman89401822014-05-06 15:04:28 -0400654 case FORMAT_G16R16:
655 {
656 unsigned int gr = *(unsigned int*)element;
657
658 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000);
659 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF);
660 }
661 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400662 case FORMAT_G16R16UI:
663 {
664 unsigned short* gr = (unsigned short*)element;
665
666 r = gr[0];
667 g = gr[1];
668 }
669 break;
John Bauman89401822014-05-06 15:04:28 -0400670 case FORMAT_A2R10G10B10:
671 {
672 unsigned int argb = *(unsigned int*)element;
673
674 a = (argb & 0xC0000000) * (1.0f / 0xC0000000);
675 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000);
676 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00);
677 b = (argb & 0x000003FF) * (1.0f / 0x000003FF);
678 }
679 break;
680 case FORMAT_A2B10G10R10:
681 {
682 unsigned int abgr = *(unsigned int*)element;
683
684 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000);
685 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000);
686 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00);
687 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF);
688 }
689 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400690 case FORMAT_A16B16G16R16I:
691 {
692 short* abgr = (short*)element;
693
694 r = abgr[0];
695 g = abgr[1];
696 b = abgr[2];
697 a = abgr[3];
698 }
699 break;
John Bauman89401822014-05-06 15:04:28 -0400700 case FORMAT_A16B16G16R16:
701 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF);
702 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF);
703 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF);
704 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
705 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400706 case FORMAT_A16B16G16R16UI:
707 {
708 unsigned short* abgr = (unsigned short*)element;
709
710 r = abgr[0];
711 g = abgr[1];
712 b = abgr[2];
713 a = abgr[3];
714 }
715 break;
716 case FORMAT_X16B16G16R16I:
717 {
718 short* bgr = (short*)element;
719
720 r = bgr[0];
721 g = bgr[1];
722 b = bgr[2];
723 }
724 break;
725 case FORMAT_X16B16G16R16UI:
726 {
727 unsigned short* bgr = (unsigned short*)element;
728
729 r = bgr[0];
730 g = bgr[1];
731 b = bgr[2];
732 }
733 break;
734 case FORMAT_A32B32G32R32I:
735 {
736 int* abgr = (int*)element;
737
738 r = static_cast<float>(abgr[0]);
739 g = static_cast<float>(abgr[1]);
740 b = static_cast<float>(abgr[2]);
741 a = static_cast<float>(abgr[3]);
742 }
743 break;
744 case FORMAT_A32B32G32R32UI:
745 {
746 unsigned int* abgr = (unsigned int*)element;
747
748 r = static_cast<float>(abgr[0]);
749 g = static_cast<float>(abgr[1]);
750 b = static_cast<float>(abgr[2]);
751 a = static_cast<float>(abgr[3]);
752 }
753 break;
754 case FORMAT_X32B32G32R32I:
755 {
756 int* bgr = (int*)element;
757
758 r = static_cast<float>(bgr[0]);
759 g = static_cast<float>(bgr[1]);
760 b = static_cast<float>(bgr[2]);
761 }
762 break;
763 case FORMAT_X32B32G32R32UI:
764 {
765 unsigned int* bgr = (unsigned int*)element;
766
767 r = static_cast<float>(bgr[0]);
768 g = static_cast<float>(bgr[1]);
769 b = static_cast<float>(bgr[2]);
770 }
771 break;
772 case FORMAT_G32R32I:
773 {
774 int* gr = (int*)element;
775
776 r = static_cast<float>(gr[0]);
777 g = static_cast<float>(gr[1]);
778 }
779 break;
780 case FORMAT_G32R32UI:
781 {
782 unsigned int* gr = (unsigned int*)element;
783
784 r = static_cast<float>(gr[0]);
785 g = static_cast<float>(gr[1]);
786 }
787 break;
788 case FORMAT_R32I:
789 r = static_cast<float>(*((int*)element));
790 break;
791 case FORMAT_R32UI:
792 r = static_cast<float>(*((unsigned int*)element));
793 break;
John Bauman89401822014-05-06 15:04:28 -0400794 case FORMAT_V8U8:
795 {
796 unsigned short vu = *(unsigned short*)element;
797
798 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000);
799 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000);
800 }
801 break;
802 case FORMAT_L6V5U5:
803 {
804 unsigned short lvu = *(unsigned short*)element;
805
806 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000);
807 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000);
808 b = (lvu & 0xFC00) * (1.0f / 0xFC00);
809 }
810 break;
811 case FORMAT_Q8W8V8U8:
812 {
813 unsigned int qwvu = *(unsigned int*)element;
814
815 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
816 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
817 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000);
818 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000);
819 }
820 break;
821 case FORMAT_X8L8V8U8:
822 {
823 unsigned int xlvu = *(unsigned int*)element;
824
825 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
826 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
827 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000);
828 }
829 break;
830 case FORMAT_R8G8B8:
831 r = ((unsigned char*)element)[2] * (1.0f / 0xFF);
832 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
833 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
834 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400835 case FORMAT_B8G8R8:
836 r = ((unsigned char*)element)[0] * (1.0f / 0xFF);
837 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
838 b = ((unsigned char*)element)[2] * (1.0f / 0xFF);
839 break;
John Bauman89401822014-05-06 15:04:28 -0400840 case FORMAT_V16U16:
841 {
842 unsigned int vu = *(unsigned int*)element;
843
844 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000);
845 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000);
846 }
847 break;
848 case FORMAT_A2W10V10U10:
849 {
850 unsigned int awvu = *(unsigned int*)element;
851
852 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000);
853 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000);
854 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000);
855 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000);
856 }
857 break;
858 case FORMAT_A16W16V16U16:
859 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
860 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
861 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
862 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
863 break;
864 case FORMAT_Q16W16V16U16:
865 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
866 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
867 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
868 a = ((signed short*)element)[3] * (1.0f / 0x7FFF);
869 break;
870 case FORMAT_L8:
871 r =
872 g =
873 b = *(unsigned char*)element * (1.0f / 0xFF);
874 break;
875 case FORMAT_A4L4:
876 {
877 unsigned char al = *(unsigned char*)element;
878
879 r =
880 g =
881 b = (al & 0x0F) * (1.0f / 0x0F);
882 a = (al & 0xF0) * (1.0f / 0xF0);
883 }
884 break;
885 case FORMAT_L16:
886 r =
887 g =
888 b = *(unsigned short*)element * (1.0f / 0xFFFF);
889 break;
890 case FORMAT_A8L8:
891 r =
892 g =
893 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
894 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
895 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400896 case FORMAT_L16F:
897 r =
898 g =
899 b = *(half*)element;
900 break;
901 case FORMAT_A16L16F:
902 r =
903 g =
904 b = ((half*)element)[0];
905 a = ((half*)element)[1];
906 break;
907 case FORMAT_L32F:
908 r =
909 g =
910 b = *(float*)element;
911 break;
912 case FORMAT_A32L32F:
913 r =
914 g =
915 b = ((float*)element)[0];
916 a = ((float*)element)[1];
917 break;
918 case FORMAT_A16F:
919 a = *(half*)element;
920 break;
John Bauman89401822014-05-06 15:04:28 -0400921 case FORMAT_R16F:
922 r = *(half*)element;
923 break;
924 case FORMAT_G16R16F:
925 r = ((half*)element)[0];
926 g = ((half*)element)[1];
927 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400928 case FORMAT_B16G16R16F:
929 r = ((half*)element)[0];
930 g = ((half*)element)[1];
931 b = ((half*)element)[2];
932 break;
John Bauman89401822014-05-06 15:04:28 -0400933 case FORMAT_A16B16G16R16F:
934 r = ((half*)element)[0];
935 g = ((half*)element)[1];
936 b = ((half*)element)[2];
937 a = ((half*)element)[3];
938 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400939 case FORMAT_A32F:
940 a = *(float*)element;
941 break;
John Bauman89401822014-05-06 15:04:28 -0400942 case FORMAT_R32F:
943 r = *(float*)element;
944 break;
945 case FORMAT_G32R32F:
946 r = ((float*)element)[0];
947 g = ((float*)element)[1];
948 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400949 case FORMAT_B32G32R32F:
950 r = ((float*)element)[0];
951 g = ((float*)element)[1];
952 b = ((float*)element)[2];
953 break;
John Bauman89401822014-05-06 15:04:28 -0400954 case FORMAT_A32B32G32R32F:
955 r = ((float*)element)[0];
956 g = ((float*)element)[1];
957 b = ((float*)element)[2];
958 a = ((float*)element)[3];
959 break;
960 case FORMAT_D32F:
961 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400962 case FORMAT_D32FS8_TEXTURE:
963 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400964 r = *(float*)element;
965 g = r;
966 b = r;
967 a = r;
968 break;
969 case FORMAT_D32F_COMPLEMENTARY:
John Bauman66b8ab22014-05-06 15:57:45 -0400970 r = 1.0f - *(float*)element;
John Bauman89401822014-05-06 15:04:28 -0400971 g = r;
972 b = r;
973 a = r;
974 break;
975 case FORMAT_S8:
976 r = *(unsigned char*)element * (1.0f / 0xFF);
977 break;
978 default:
979 ASSERT(false);
980 }
981
982 // if(sRGB)
983 // {
984 // r = sRGBtoLinear(r);
985 // g = sRGBtoLinear(g);
986 // b = sRGBtoLinear(b);
987 // }
988
989 return Color<float>(r, g, b, a);
990 }
991
992 Color<float> Surface::Buffer::sample(float x, float y, float z) const
993 {
994 x -= 0.5f;
995 y -= 0.5f;
996 z -= 0.5f;
997
998 int x0 = clamp((int)x, 0, width - 1);
999 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1000
1001 int y0 = clamp((int)y, 0, height - 1);
1002 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1003
1004 int z0 = clamp((int)z, 0, depth - 1);
1005 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1;
1006
1007 Color<float> c000 = read(x0, y0, z0);
1008 Color<float> c100 = read(x1, y0, z0);
1009 Color<float> c010 = read(x0, y1, z0);
1010 Color<float> c110 = read(x1, y1, z0);
1011 Color<float> c001 = read(x0, y0, z1);
1012 Color<float> c101 = read(x1, y0, z1);
1013 Color<float> c011 = read(x0, y1, z1);
1014 Color<float> c111 = read(x1, y1, z1);
1015
1016 float fx = x - x0;
1017 float fy = y - y0;
1018 float fz = z - z0;
1019
1020 c000 *= (1 - fx) * (1 - fy) * (1 - fz);
1021 c100 *= fx * (1 - fy) * (1 - fz);
1022 c010 *= (1 - fx) * fy * (1 - fz);
1023 c110 *= fx * fy * (1 - fz);
1024 c001 *= (1 - fx) * (1 - fy) * fz;
1025 c101 *= fx * (1 - fy) * fz;
1026 c011 *= (1 - fx) * fy * fz;
1027 c111 *= fx * fy * fz;
1028
1029 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111;
1030 }
1031
1032 Color<float> Surface::Buffer::sample(float x, float y) const
1033 {
1034 x -= 0.5f;
1035 y -= 0.5f;
1036
1037 int x0 = clamp((int)x, 0, width - 1);
1038 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1039
1040 int y0 = clamp((int)y, 0, height - 1);
1041 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1042
1043 Color<float> c00 = read(x0, y0);
1044 Color<float> c10 = read(x1, y0);
1045 Color<float> c01 = read(x0, y1);
1046 Color<float> c11 = read(x1, y1);
1047
1048 float fx = x - x0;
1049 float fy = y - y0;
1050
1051 c00 *= (1 - fx) * (1 - fy);
1052 c10 *= fx * (1 - fy);
1053 c01 *= (1 - fx) * fy;
1054 c11 *= fx * fy;
1055
1056 return c00 + c10 + c01 + c11;
1057 }
1058
John Bauman19bac1e2014-05-06 15:23:49 -04001059 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock)
John Bauman89401822014-05-06 15:04:28 -04001060 {
1061 this->lock = lock;
1062
1063 switch(lock)
1064 {
1065 case LOCK_UNLOCKED:
1066 case LOCK_READONLY:
1067 break;
1068 case LOCK_WRITEONLY:
1069 case LOCK_READWRITE:
1070 case LOCK_DISCARD:
1071 dirty = true;
1072 break;
1073 default:
1074 ASSERT(false);
1075 }
1076
John Baumand4ae8632014-05-06 16:18:33 -04001077 if(buffer)
John Bauman89401822014-05-06 15:04:28 -04001078 {
John Baumand4ae8632014-05-06 16:18:33 -04001079 switch(format)
1080 {
1081 #if S3TC_SUPPORT
1082 case FORMAT_DXT1:
1083 #endif
1084 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05001085 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001086 case FORMAT_R11_EAC:
1087 case FORMAT_SIGNED_R11_EAC:
1088 case FORMAT_RGB8_ETC2:
1089 case FORMAT_SRGB8_ETC2:
1090 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1091 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Baumand4ae8632014-05-06 16:18:33 -04001092 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB;
Alexis Hetu460e41f2015-09-01 10:58:37 -04001093 case FORMAT_RG11_EAC:
1094 case FORMAT_SIGNED_RG11_EAC:
1095 case FORMAT_RGBA8_ETC2_EAC:
1096 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1097 case FORMAT_RGBA_ASTC_4x4_KHR:
1098 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1099 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1100 case FORMAT_RGBA_ASTC_5x4_KHR:
1101 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1102 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB;
1103 case FORMAT_RGBA_ASTC_5x5_KHR:
1104 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1105 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB;
1106 case FORMAT_RGBA_ASTC_6x5_KHR:
1107 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1108 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB;
1109 case FORMAT_RGBA_ASTC_6x6_KHR:
1110 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1111 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB;
1112 case FORMAT_RGBA_ASTC_8x5_KHR:
1113 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1114 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB;
1115 case FORMAT_RGBA_ASTC_8x6_KHR:
1116 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1117 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB;
1118 case FORMAT_RGBA_ASTC_8x8_KHR:
1119 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1120 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB;
1121 case FORMAT_RGBA_ASTC_10x5_KHR:
1122 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1123 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB;
1124 case FORMAT_RGBA_ASTC_10x6_KHR:
1125 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1126 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB;
1127 case FORMAT_RGBA_ASTC_10x8_KHR:
1128 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1129 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB;
1130 case FORMAT_RGBA_ASTC_10x10_KHR:
1131 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1132 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB;
1133 case FORMAT_RGBA_ASTC_12x10_KHR:
1134 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1135 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB;
1136 case FORMAT_RGBA_ASTC_12x12_KHR:
1137 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1138 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB;
John Baumand4ae8632014-05-06 16:18:33 -04001139 #if S3TC_SUPPORT
1140 case FORMAT_DXT3:
1141 case FORMAT_DXT5:
1142 #endif
1143 case FORMAT_ATI2:
1144 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1145 default:
1146 return (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
1147 }
John Bauman89401822014-05-06 15:04:28 -04001148 }
1149
1150 return 0;
1151 }
1152
1153 void Surface::Buffer::unlockRect()
1154 {
1155 lock = LOCK_UNLOCKED;
1156 }
1157
Nicolas Capens477314b2015-06-09 16:47:29 -04001158 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false)
1159 {
1160 resource = new Resource(0);
1161 hasParent = false;
1162 ownExternal = false;
1163 depth = max(1, depth);
1164
1165 external.buffer = pixels;
1166 external.width = width;
1167 external.height = height;
1168 external.depth = depth;
1169 external.format = format;
1170 external.bytes = bytes(external.format);
1171 external.pitchB = pitch;
1172 external.pitchP = pitch / external.bytes;
1173 external.sliceB = slice;
1174 external.sliceP = slice / external.bytes;
1175 external.lock = LOCK_UNLOCKED;
1176 external.dirty = true;
1177
1178 internal.buffer = 0;
1179 internal.width = width;
1180 internal.height = height;
1181 internal.depth = depth;
1182 internal.format = selectInternalFormat(format);
1183 internal.bytes = bytes(internal.format);
1184 internal.pitchB = pitchB(internal.width, internal.format, false);
1185 internal.pitchP = pitchP(internal.width, internal.format, false);
1186 internal.sliceB = sliceB(internal.width, internal.height, internal.format, false);
1187 internal.sliceP = sliceP(internal.width, internal.height, internal.format, false);
1188 internal.lock = LOCK_UNLOCKED;
1189 internal.dirty = false;
1190
1191 stencil.buffer = 0;
1192 stencil.width = width;
1193 stencil.height = height;
1194 stencil.depth = depth;
1195 stencil.format = FORMAT_S8;
1196 stencil.bytes = bytes(stencil.format);
1197 stencil.pitchB = pitchB(stencil.width, stencil.format, false);
1198 stencil.pitchP = pitchP(stencil.width, stencil.format, false);
1199 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, false);
1200 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, false);
1201 stencil.lock = LOCK_UNLOCKED;
1202 stencil.dirty = false;
1203
1204 dirtyMipmaps = true;
1205 paletteUsed = 0;
1206 }
1207
Nicolas Capensf3898612015-11-24 15:33:31 -05001208 Surface::Surface(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget)
John Bauman89401822014-05-06 15:04:28 -04001209 {
1210 resource = texture ? texture : new Resource(0);
John Bauman19bac1e2014-05-06 15:23:49 -04001211 hasParent = texture != 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001212 ownExternal = true;
John Bauman89401822014-05-06 15:04:28 -04001213 depth = max(1, depth);
1214
1215 external.buffer = 0;
1216 external.width = width;
1217 external.height = height;
1218 external.depth = depth;
1219 external.format = format;
1220 external.bytes = bytes(external.format);
1221 external.pitchB = pitchB(external.width, external.format, renderTarget && !texture);
1222 external.pitchP = pitchP(external.width, external.format, renderTarget && !texture);
1223 external.sliceB = sliceB(external.width, external.height, external.format, renderTarget && !texture);
1224 external.sliceP = sliceP(external.width, external.height, external.format, renderTarget && !texture);
1225 external.lock = LOCK_UNLOCKED;
1226 external.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001227
1228 internal.buffer = 0;
1229 internal.width = width;
1230 internal.height = height;
1231 internal.depth = depth;
1232 internal.format = selectInternalFormat(format);
1233 internal.bytes = bytes(internal.format);
Nicolas Capensf3898612015-11-24 15:33:31 -05001234 internal.pitchB = !pitchPprovided ? pitchB(internal.width, internal.format, renderTarget) : pitchPprovided * internal.bytes;
1235 internal.pitchP = !pitchPprovided ? pitchP(internal.width, internal.format, renderTarget) : pitchPprovided;
John Bauman89401822014-05-06 15:04:28 -04001236 internal.sliceB = sliceB(internal.width, internal.height, internal.format, renderTarget);
1237 internal.sliceP = sliceP(internal.width, internal.height, internal.format, renderTarget);
1238 internal.lock = LOCK_UNLOCKED;
1239 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001240
1241 stencil.buffer = 0;
1242 stencil.width = width;
1243 stencil.height = height;
1244 stencil.depth = depth;
1245 stencil.format = FORMAT_S8;
1246 stencil.bytes = bytes(stencil.format);
1247 stencil.pitchB = pitchB(stencil.width, stencil.format, renderTarget);
1248 stencil.pitchP = pitchP(stencil.width, stencil.format, renderTarget);
1249 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, renderTarget);
1250 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, renderTarget);
1251 stencil.lock = LOCK_UNLOCKED;
1252 stencil.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001253
1254 dirtyMipmaps = true;
John Bauman66b8ab22014-05-06 15:57:45 -04001255 paletteUsed = 0;
John Bauman89401822014-05-06 15:04:28 -04001256 }
1257
1258 Surface::~Surface()
1259 {
John Bauman8a4f6fc2014-05-06 15:26:18 -04001260 // Synchronize so we can deallocate the buffers below
1261 resource->lock(DESTRUCT);
1262 resource->unlock();
1263
John Bauman89401822014-05-06 15:04:28 -04001264 if(!hasParent)
1265 {
1266 resource->destruct();
1267 }
1268
Nicolas Capens477314b2015-06-09 16:47:29 -04001269 if(ownExternal)
1270 {
1271 deallocate(external.buffer);
1272 }
John Bauman89401822014-05-06 15:04:28 -04001273
1274 if(internal.buffer != external.buffer)
1275 {
1276 deallocate(internal.buffer);
1277 }
1278
1279 deallocate(stencil.buffer);
1280
1281 external.buffer = 0;
1282 internal.buffer = 0;
1283 stencil.buffer = 0;
1284 }
1285
John Bauman19bac1e2014-05-06 15:23:49 -04001286 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001287 {
1288 resource->lock(client);
1289
1290 if(!external.buffer)
1291 {
1292 if(internal.buffer && identicalFormats())
1293 {
1294 external.buffer = internal.buffer;
1295 }
1296 else
1297 {
1298 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.format);
1299 }
1300 }
1301
1302 if(internal.dirty)
1303 {
1304 if(lock != LOCK_DISCARD)
1305 {
1306 update(external, internal);
1307 }
John Bauman66b8ab22014-05-06 15:57:45 -04001308
1309 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001310 }
1311
1312 switch(lock)
1313 {
1314 case LOCK_READONLY:
1315 break;
1316 case LOCK_WRITEONLY:
1317 case LOCK_READWRITE:
1318 case LOCK_DISCARD:
1319 dirtyMipmaps = true;
1320 break;
1321 default:
1322 ASSERT(false);
1323 }
1324
John Bauman19bac1e2014-05-06 15:23:49 -04001325 return external.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001326 }
1327
1328 void Surface::unlockExternal()
1329 {
1330 resource->unlock();
1331
1332 external.unlockRect();
1333 }
1334
John Bauman19bac1e2014-05-06 15:23:49 -04001335 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001336 {
1337 if(lock != LOCK_UNLOCKED)
1338 {
1339 resource->lock(client);
1340 }
1341
1342 if(!internal.buffer)
1343 {
1344 if(external.buffer && identicalFormats())
1345 {
1346 internal.buffer = external.buffer;
1347 }
1348 else
1349 {
1350 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.format);
1351 }
1352 }
1353
1354 // FIXME: WHQL requires conversion to lower external precision and back
1355 if(logPrecision >= WHQL)
1356 {
1357 if(internal.dirty && renderTarget && internal.format != external.format)
1358 {
1359 if(lock != LOCK_DISCARD)
1360 {
1361 switch(external.format)
1362 {
1363 case FORMAT_R3G3B2:
1364 case FORMAT_A8R3G3B2:
1365 case FORMAT_A1R5G5B5:
1366 case FORMAT_A2R10G10B10:
1367 case FORMAT_A2B10G10R10:
1368 lockExternal(0, 0, 0, LOCK_READWRITE, client);
1369 unlockExternal();
1370 break;
1371 default:
1372 // Difference passes WHQL
1373 break;
1374 }
1375 }
1376 }
1377 }
1378
John Bauman66b8ab22014-05-06 15:57:45 -04001379 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID))
John Bauman89401822014-05-06 15:04:28 -04001380 {
1381 if(lock != LOCK_DISCARD)
1382 {
1383 update(internal, external);
1384 }
John Bauman89401822014-05-06 15:04:28 -04001385
John Bauman66b8ab22014-05-06 15:57:45 -04001386 external.dirty = false;
1387 paletteUsed = Surface::paletteID;
John Bauman89401822014-05-06 15:04:28 -04001388 }
1389
1390 switch(lock)
1391 {
1392 case LOCK_UNLOCKED:
1393 case LOCK_READONLY:
1394 break;
1395 case LOCK_WRITEONLY:
1396 case LOCK_READWRITE:
1397 case LOCK_DISCARD:
1398 dirtyMipmaps = true;
1399 break;
1400 default:
1401 ASSERT(false);
1402 }
1403
1404 if(lock == LOCK_READONLY && client == PUBLIC)
1405 {
1406 resolve();
1407 }
1408
John Bauman19bac1e2014-05-06 15:23:49 -04001409 return internal.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001410 }
1411
1412 void Surface::unlockInternal()
1413 {
1414 resource->unlock();
1415
1416 internal.unlockRect();
1417 }
1418
1419 void *Surface::lockStencil(int front, Accessor client)
1420 {
1421 resource->lock(client);
1422
1423 if(!stencil.buffer)
1424 {
1425 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.format);
1426 }
1427
John Bauman89401822014-05-06 15:04:28 -04001428 return stencil.lockRect(0, 0, front, LOCK_READWRITE); // FIXME
1429 }
1430
1431 void Surface::unlockStencil()
1432 {
1433 resource->unlock();
1434
1435 stencil.unlockRect();
1436 }
1437
1438 int Surface::bytes(Format format)
1439 {
1440 switch(format)
1441 {
1442 case FORMAT_NULL: return 0;
1443 case FORMAT_P8: return 1;
1444 case FORMAT_A8P8: return 2;
1445 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001446 case FORMAT_R8I: return 1;
John Bauman89401822014-05-06 15:04:28 -04001447 case FORMAT_R8: return 1;
1448 case FORMAT_R3G3B2: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001449 case FORMAT_R16I: return 2;
1450 case FORMAT_R16UI: return 2;
John Bauman89401822014-05-06 15:04:28 -04001451 case FORMAT_A8R3G3B2: return 2;
1452 case FORMAT_R5G6B5: return 2;
1453 case FORMAT_A1R5G5B5: return 2;
1454 case FORMAT_X1R5G5B5: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001455 case FORMAT_R5G5B5A1: return 2;
John Bauman89401822014-05-06 15:04:28 -04001456 case FORMAT_X4R4G4B4: return 2;
1457 case FORMAT_A4R4G4B4: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001458 case FORMAT_R4G4B4A4: return 2;
John Bauman89401822014-05-06 15:04:28 -04001459 case FORMAT_R8G8B8: return 3;
Nicolas Capens80594422015-06-09 16:42:56 -04001460 case FORMAT_B8G8R8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001461 case FORMAT_R32I: return 4;
1462 case FORMAT_R32UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001463 case FORMAT_X8R8G8B8: return 4;
1464 // case FORMAT_X8G8R8B8Q: return 4;
1465 case FORMAT_A8R8G8B8: return 4;
1466 // case FORMAT_A8G8R8B8Q: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001467 case FORMAT_X8B8G8R8I: return 4;
John Bauman89401822014-05-06 15:04:28 -04001468 case FORMAT_X8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001469 case FORMAT_A8B8G8R8I: return 4;
1470 case FORMAT_R8UI: return 1;
1471 case FORMAT_G8R8UI: return 2;
1472 case FORMAT_X8B8G8R8UI: return 4;
1473 case FORMAT_A8B8G8R8UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001474 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001475 case FORMAT_R8I_SNORM: return 1;
1476 case FORMAT_G8R8I_SNORM: return 2;
1477 case FORMAT_X8B8G8R8I_SNORM: return 4;
1478 case FORMAT_A8B8G8R8I_SNORM: return 4;
John Bauman89401822014-05-06 15:04:28 -04001479 case FORMAT_A2R10G10B10: return 4;
1480 case FORMAT_A2B10G10R10: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001481 case FORMAT_G8R8I: return 2;
John Bauman89401822014-05-06 15:04:28 -04001482 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001483 case FORMAT_G16R16I: return 4;
1484 case FORMAT_G16R16UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001485 case FORMAT_G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001486 case FORMAT_G32R32I: return 8;
1487 case FORMAT_G32R32UI: return 8;
1488 case FORMAT_X16B16G16R16I: return 8;
1489 case FORMAT_X16B16G16R16UI: return 8;
1490 case FORMAT_A16B16G16R16I: return 8;
1491 case FORMAT_A16B16G16R16UI: return 8;
John Bauman89401822014-05-06 15:04:28 -04001492 case FORMAT_A16B16G16R16: return 8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001493 case FORMAT_X32B32G32R32I: return 16;
1494 case FORMAT_X32B32G32R32UI: return 16;
1495 case FORMAT_A32B32G32R32I: return 16;
1496 case FORMAT_A32B32G32R32UI: return 16;
John Bauman89401822014-05-06 15:04:28 -04001497 // Compressed formats
1498 #if S3TC_SUPPORT
1499 case FORMAT_DXT1: return 2; // Column of four pixels
1500 case FORMAT_DXT3: return 4; // Column of four pixels
1501 case FORMAT_DXT5: return 4; // Column of four pixels
John Bauman66b8ab22014-05-06 15:57:45 -04001502 #endif
John Bauman89401822014-05-06 15:04:28 -04001503 case FORMAT_ATI1: return 2; // Column of four pixels
1504 case FORMAT_ATI2: return 4; // Column of four pixels
Nicolas Capens22658242014-11-29 00:31:41 -05001505 case FORMAT_ETC1: return 2; // Column of four pixels
Alexis Hetu460e41f2015-09-01 10:58:37 -04001506 case FORMAT_R11_EAC: return 2;
1507 case FORMAT_SIGNED_R11_EAC: return 2;
1508 case FORMAT_RG11_EAC: return 4;
1509 case FORMAT_SIGNED_RG11_EAC: return 4;
1510 case FORMAT_RGB8_ETC2: return 2;
1511 case FORMAT_SRGB8_ETC2: return 2;
1512 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1513 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1514 case FORMAT_RGBA8_ETC2_EAC: return 4;
1515 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4;
1516 case FORMAT_RGBA_ASTC_4x4_KHR:
1517 case FORMAT_RGBA_ASTC_5x4_KHR:
1518 case FORMAT_RGBA_ASTC_5x5_KHR:
1519 case FORMAT_RGBA_ASTC_6x5_KHR:
1520 case FORMAT_RGBA_ASTC_6x6_KHR:
1521 case FORMAT_RGBA_ASTC_8x5_KHR:
1522 case FORMAT_RGBA_ASTC_8x6_KHR:
1523 case FORMAT_RGBA_ASTC_8x8_KHR:
1524 case FORMAT_RGBA_ASTC_10x5_KHR:
1525 case FORMAT_RGBA_ASTC_10x6_KHR:
1526 case FORMAT_RGBA_ASTC_10x8_KHR:
1527 case FORMAT_RGBA_ASTC_10x10_KHR:
1528 case FORMAT_RGBA_ASTC_12x10_KHR:
1529 case FORMAT_RGBA_ASTC_12x12_KHR:
1530 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1531 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1532 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1533 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1534 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1535 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1536 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1537 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1538 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1539 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1540 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1541 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1542 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1543 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME
John Bauman89401822014-05-06 15:04:28 -04001544 // Bumpmap formats
1545 case FORMAT_V8U8: return 2;
1546 case FORMAT_L6V5U5: return 2;
1547 case FORMAT_Q8W8V8U8: return 4;
1548 case FORMAT_X8L8V8U8: return 4;
1549 case FORMAT_A2W10V10U10: return 4;
1550 case FORMAT_V16U16: return 4;
1551 case FORMAT_A16W16V16U16: return 8;
1552 case FORMAT_Q16W16V16U16: return 8;
1553 // Luminance formats
1554 case FORMAT_L8: return 1;
1555 case FORMAT_A4L4: return 1;
1556 case FORMAT_L16: return 2;
1557 case FORMAT_A8L8: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001558 case FORMAT_L16F: return 2;
1559 case FORMAT_A16L16F: return 4;
1560 case FORMAT_L32F: return 4;
1561 case FORMAT_A32L32F: return 8;
John Bauman89401822014-05-06 15:04:28 -04001562 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04001563 case FORMAT_A16F: return 2;
John Bauman89401822014-05-06 15:04:28 -04001564 case FORMAT_R16F: return 2;
1565 case FORMAT_G16R16F: return 4;
Nicolas Capens80594422015-06-09 16:42:56 -04001566 case FORMAT_B16G16R16F: return 6;
John Bauman89401822014-05-06 15:04:28 -04001567 case FORMAT_A16B16G16R16F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001568 case FORMAT_A32F: return 4;
John Bauman89401822014-05-06 15:04:28 -04001569 case FORMAT_R32F: return 4;
1570 case FORMAT_G32R32F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001571 case FORMAT_B32G32R32F: return 12;
John Bauman89401822014-05-06 15:04:28 -04001572 case FORMAT_A32B32G32R32F: return 16;
1573 // Depth/stencil formats
1574 case FORMAT_D16: return 2;
1575 case FORMAT_D32: return 4;
1576 case FORMAT_D24X8: return 4;
1577 case FORMAT_D24S8: return 4;
1578 case FORMAT_D24FS8: return 4;
1579 case FORMAT_D32F: return 4;
1580 case FORMAT_D32F_COMPLEMENTARY: return 4;
1581 case FORMAT_D32F_LOCKABLE: return 4;
John Bauman66b8ab22014-05-06 15:57:45 -04001582 case FORMAT_D32FS8_TEXTURE: return 4;
1583 case FORMAT_D32FS8_SHADOW: return 4;
1584 case FORMAT_DF24S8: return 4;
1585 case FORMAT_DF16S8: return 2;
John Bauman89401822014-05-06 15:04:28 -04001586 case FORMAT_INTZ: return 4;
1587 case FORMAT_S8: return 1;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001588 case FORMAT_YV12_BT601: return 1; // Y plane only
1589 case FORMAT_YV12_BT709: return 1; // Y plane only
1590 case FORMAT_YV12_JFIF: return 1; // Y plane only
John Bauman89401822014-05-06 15:04:28 -04001591 default:
1592 ASSERT(false);
1593 }
1594
1595 return 0;
1596 }
1597
1598 int Surface::pitchB(int width, Format format, bool target)
1599 {
1600 if(target || isDepth(format) || isStencil(format))
1601 {
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001602 width = align(width, 2);
John Bauman89401822014-05-06 15:04:28 -04001603 }
1604
1605 switch(format)
1606 {
1607 #if S3TC_SUPPORT
1608 case FORMAT_DXT1:
Nicolas Capens22658242014-11-29 00:31:41 -05001609 #endif
1610 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001611 case FORMAT_R11_EAC:
1612 case FORMAT_SIGNED_R11_EAC:
1613 case FORMAT_RGB8_ETC2:
1614 case FORMAT_SRGB8_ETC2:
1615 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1616 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04001617 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001618 case FORMAT_RG11_EAC:
1619 case FORMAT_SIGNED_RG11_EAC:
1620 case FORMAT_RGBA8_ETC2_EAC:
1621 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1622 case FORMAT_RGBA_ASTC_4x4_KHR:
1623 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1624 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
1625 case FORMAT_RGBA_ASTC_5x4_KHR:
1626 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1627 case FORMAT_RGBA_ASTC_5x5_KHR:
1628 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1629 return 16 * ((width + 4) / 5);
1630 case FORMAT_RGBA_ASTC_6x5_KHR:
1631 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1632 case FORMAT_RGBA_ASTC_6x6_KHR:
1633 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1634 return 16 * ((width + 5) / 6);
1635 case FORMAT_RGBA_ASTC_8x5_KHR:
1636 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1637 case FORMAT_RGBA_ASTC_8x6_KHR:
1638 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1639 case FORMAT_RGBA_ASTC_8x8_KHR:
1640 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1641 return 16 * ((width + 7) / 8);
1642 case FORMAT_RGBA_ASTC_10x5_KHR:
1643 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1644 case FORMAT_RGBA_ASTC_10x6_KHR:
1645 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1646 case FORMAT_RGBA_ASTC_10x8_KHR:
1647 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1648 case FORMAT_RGBA_ASTC_10x10_KHR:
1649 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1650 return 16 * ((width + 9) / 10);
1651 case FORMAT_RGBA_ASTC_12x10_KHR:
1652 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1653 case FORMAT_RGBA_ASTC_12x12_KHR:
1654 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1655 return 16 * ((width + 11) / 12);
Nicolas Capens22658242014-11-29 00:31:41 -05001656 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04001657 case FORMAT_DXT3:
1658 case FORMAT_DXT5:
1659 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
John Bauman66b8ab22014-05-06 15:57:45 -04001660 #endif
John Bauman89401822014-05-06 15:04:28 -04001661 case FORMAT_ATI1:
1662 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row
1663 case FORMAT_ATI2:
1664 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001665 case FORMAT_YV12_BT601:
1666 case FORMAT_YV12_BT709:
1667 case FORMAT_YV12_JFIF:
1668 return align(width, 16);
John Bauman89401822014-05-06 15:04:28 -04001669 default:
1670 return bytes(format) * width;
1671 }
1672 }
1673
1674 int Surface::pitchP(int width, Format format, bool target)
1675 {
1676 int B = bytes(format);
1677
1678 return B > 0 ? pitchB(width, format, target) / B : 0;
1679 }
1680
1681 int Surface::sliceB(int width, int height, Format format, bool target)
1682 {
1683 if(target || isDepth(format) || isStencil(format))
1684 {
1685 height = ((height + 1) & ~1);
1686 }
1687
1688 switch(format)
1689 {
1690 #if S3TC_SUPPORT
1691 case FORMAT_DXT1:
1692 case FORMAT_DXT3:
1693 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04001694 #endif
Nicolas Capens22658242014-11-29 00:31:41 -05001695 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001696 case FORMAT_R11_EAC:
1697 case FORMAT_SIGNED_R11_EAC:
1698 case FORMAT_RG11_EAC:
1699 case FORMAT_SIGNED_RG11_EAC:
1700 case FORMAT_RGB8_ETC2:
1701 case FORMAT_SRGB8_ETC2:
1702 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1703 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1704 case FORMAT_RGBA8_ETC2_EAC:
1705 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1706 case FORMAT_RGBA_ASTC_4x4_KHR:
1707 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1708 case FORMAT_RGBA_ASTC_5x4_KHR:
1709 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
Nicolas Capens22658242014-11-29 00:31:41 -05001710 return pitchB(width, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001711 case FORMAT_RGBA_ASTC_5x5_KHR:
1712 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1713 case FORMAT_RGBA_ASTC_6x5_KHR:
1714 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1715 case FORMAT_RGBA_ASTC_8x5_KHR:
1716 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1717 case FORMAT_RGBA_ASTC_10x5_KHR:
1718 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1719 return pitchB(width, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows
1720 case FORMAT_RGBA_ASTC_6x6_KHR:
1721 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1722 case FORMAT_RGBA_ASTC_8x6_KHR:
1723 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1724 case FORMAT_RGBA_ASTC_10x6_KHR:
1725 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1726 return pitchB(width, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows
1727 case FORMAT_RGBA_ASTC_8x8_KHR:
1728 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1729 case FORMAT_RGBA_ASTC_10x8_KHR:
1730 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1731 return pitchB(width, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows
1732 case FORMAT_RGBA_ASTC_10x10_KHR:
1733 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1734 case FORMAT_RGBA_ASTC_12x10_KHR:
1735 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1736 return pitchB(width, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows
1737 case FORMAT_RGBA_ASTC_12x12_KHR:
1738 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1739 return pitchB(width, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows
Nicolas Capens22658242014-11-29 00:31:41 -05001740 case FORMAT_ATI1:
1741 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04001742 default:
Nicolas Capens22658242014-11-29 00:31:41 -05001743 return pitchB(width, format, target) * height; // Pitch computed per row
John Bauman89401822014-05-06 15:04:28 -04001744 }
1745 }
1746
1747 int Surface::sliceP(int width, int height, Format format, bool target)
1748 {
1749 int B = bytes(format);
1750
1751 return B > 0 ? sliceB(width, height, format, target) / B : 0;
1752 }
1753
1754 void Surface::update(Buffer &destination, Buffer &source)
1755 {
1756 // ASSERT(source.lock != LOCK_UNLOCKED);
1757 // ASSERT(destination.lock != LOCK_UNLOCKED);
1758
1759 if(destination.buffer != source.buffer)
1760 {
1761 ASSERT(source.dirty && !destination.dirty);
1762
1763 switch(source.format)
1764 {
1765 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001766 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format
1767 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format
1768 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format
1769 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format
1770 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format
1771 #if S3TC_SUPPORT
1772 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format
1773 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format
1774 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format
Nicolas Capens22658242014-11-29 00:31:41 -05001775 #endif
John Bauman89401822014-05-06 15:04:28 -04001776 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format
1777 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format
Alexis Hetu460e41f2015-09-01 10:58:37 -04001778 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format
1779 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format
1780 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format
1781 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format
Alexis Hetu0de50d42015-09-09 13:56:41 -04001782 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001783 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format
1784 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format
1785 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format
1786 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format
1787 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format
1788 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format
1789 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format
1790 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format
1791 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format
1792 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format
1793 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format
1794 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format
1795 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format
1796 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format
1797 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format
1798 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format
1799 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format
1800 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format
1801 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format
1802 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format
1803 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format
1804 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format
1805 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format
1806 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format
1807 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format
1808 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format
1809 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format
1810 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format
1811 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format
1812 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format
1813 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format
1814 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format
1815 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format
1816 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001817 default: genericUpdate(destination, source); break;
1818 }
1819 }
John Bauman89401822014-05-06 15:04:28 -04001820 }
1821
1822 void Surface::genericUpdate(Buffer &destination, Buffer &source)
1823 {
1824 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1825 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1826
1827 int depth = min(destination.depth, source.depth);
1828 int height = min(destination.height, source.height);
1829 int width = min(destination.width, source.width);
1830 int rowBytes = width * source.bytes;
1831
1832 for(int z = 0; z < depth; z++)
1833 {
1834 unsigned char *sourceRow = sourceSlice;
1835 unsigned char *destinationRow = destinationSlice;
1836
1837 for(int y = 0; y < height; y++)
1838 {
1839 if(source.format == destination.format)
1840 {
1841 memcpy(destinationRow, sourceRow, rowBytes);
1842 }
1843 else
1844 {
1845 unsigned char *sourceElement = sourceRow;
1846 unsigned char *destinationElement = destinationRow;
1847
1848 for(int x = 0; x < width; x++)
1849 {
1850 Color<float> color = source.read(sourceElement);
1851 destination.write(destinationElement, color);
1852
1853 sourceElement += source.bytes;
1854 destinationElement += destination.bytes;
1855 }
1856 }
1857
1858 sourceRow += source.pitchB;
1859 destinationRow += destination.pitchB;
1860 }
1861
1862 sourceSlice += source.sliceB;
1863 destinationSlice += destination.sliceB;
1864 }
1865 }
1866
1867 void Surface::decodeR8G8B8(Buffer &destination, const Buffer &source)
1868 {
1869 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1870 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1871
1872 for(int z = 0; z < destination.depth && z < source.depth; z++)
1873 {
1874 unsigned char *sourceRow = sourceSlice;
1875 unsigned char *destinationRow = destinationSlice;
1876
1877 for(int y = 0; y < destination.height && y < source.height; y++)
1878 {
1879 unsigned char *sourceElement = sourceRow;
1880 unsigned char *destinationElement = destinationRow;
1881
1882 for(int x = 0; x < destination.width && x < source.width; x++)
1883 {
1884 unsigned int b = sourceElement[0];
1885 unsigned int g = sourceElement[1];
1886 unsigned int r = sourceElement[2];
1887
1888 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0);
1889
1890 sourceElement += source.bytes;
1891 destinationElement += destination.bytes;
1892 }
1893
1894 sourceRow += source.pitchB;
1895 destinationRow += destination.pitchB;
1896 }
1897
1898 sourceSlice += source.sliceB;
1899 destinationSlice += destination.sliceB;
1900 }
1901 }
1902
John Bauman89401822014-05-06 15:04:28 -04001903 void Surface::decodeX1R5G5B5(Buffer &destination, const Buffer &source)
1904 {
1905 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1906 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1907
1908 for(int z = 0; z < destination.depth && z < source.depth; z++)
1909 {
1910 unsigned char *sourceRow = sourceSlice;
1911 unsigned char *destinationRow = destinationSlice;
1912
1913 for(int y = 0; y < destination.height && y < source.height; y++)
1914 {
1915 unsigned char *sourceElement = sourceRow;
1916 unsigned char *destinationElement = destinationRow;
1917
1918 for(int x = 0; x < destination.width && x < source.width; x++)
1919 {
1920 unsigned int xrgb = *(unsigned short*)sourceElement;
1921
1922 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
1923 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
1924 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8);
1925
1926 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
1927
1928 sourceElement += source.bytes;
1929 destinationElement += destination.bytes;
1930 }
1931
1932 sourceRow += source.pitchB;
1933 destinationRow += destination.pitchB;
1934 }
1935
1936 sourceSlice += source.sliceB;
1937 destinationSlice += destination.sliceB;
1938 }
1939 }
1940
1941 void Surface::decodeA1R5G5B5(Buffer &destination, const Buffer &source)
1942 {
1943 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1944 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1945
1946 for(int z = 0; z < destination.depth && z < source.depth; z++)
1947 {
1948 unsigned char *sourceRow = sourceSlice;
1949 unsigned char *destinationRow = destinationSlice;
1950
1951 for(int y = 0; y < destination.height && y < source.height; y++)
1952 {
1953 unsigned char *sourceElement = sourceRow;
1954 unsigned char *destinationElement = destinationRow;
1955
1956 for(int x = 0; x < destination.width && x < source.width; x++)
1957 {
1958 unsigned int argb = *(unsigned short*)sourceElement;
1959
1960 unsigned int a = (argb & 0x8000) * 130560;
1961 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
1962 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
1963 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8);
1964
1965 *(unsigned int*)destinationElement = a | r | g | b;
1966
1967 sourceElement += source.bytes;
1968 destinationElement += destination.bytes;
1969 }
1970
1971 sourceRow += source.pitchB;
1972 destinationRow += destination.pitchB;
1973 }
1974
1975 sourceSlice += source.sliceB;
1976 destinationSlice += destination.sliceB;
1977 }
1978 }
1979
1980 void Surface::decodeX4R4G4B4(Buffer &destination, const Buffer &source)
1981 {
1982 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1983 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1984
1985 for(int z = 0; z < destination.depth && z < source.depth; z++)
1986 {
1987 unsigned char *sourceRow = sourceSlice;
1988 unsigned char *destinationRow = destinationSlice;
1989
1990 for(int y = 0; y < destination.height && y < source.height; y++)
1991 {
1992 unsigned char *sourceElement = sourceRow;
1993 unsigned char *destinationElement = destinationRow;
1994
1995 for(int x = 0; x < destination.width && x < source.width; x++)
1996 {
1997 unsigned int xrgb = *(unsigned short*)sourceElement;
1998
1999 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000;
2000 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00;
2001 unsigned int b = (xrgb & 0x000F) * 0x00000011;
2002
2003 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
2004
2005 sourceElement += source.bytes;
2006 destinationElement += destination.bytes;
2007 }
2008
2009 sourceRow += source.pitchB;
2010 destinationRow += destination.pitchB;
2011 }
2012
2013 sourceSlice += source.sliceB;
2014 destinationSlice += destination.sliceB;
2015 }
2016 }
2017
2018 void Surface::decodeA4R4G4B4(Buffer &destination, const Buffer &source)
2019 {
2020 unsigned char *sourceSlice = (unsigned char*)source.buffer;
2021 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
2022
2023 for(int z = 0; z < destination.depth && z < source.depth; z++)
2024 {
2025 unsigned char *sourceRow = sourceSlice;
2026 unsigned char *destinationRow = destinationSlice;
2027
2028 for(int y = 0; y < destination.height && y < source.height; y++)
2029 {
2030 unsigned char *sourceElement = sourceRow;
2031 unsigned char *destinationElement = destinationRow;
2032
2033 for(int x = 0; x < destination.width && x < source.width; x++)
2034 {
2035 unsigned int argb = *(unsigned short*)sourceElement;
2036
2037 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000;
2038 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000;
2039 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00;
2040 unsigned int b = (argb & 0x000F) * 0x00000011;
2041
2042 *(unsigned int*)destinationElement = a | r | g | b;
2043
2044 sourceElement += source.bytes;
2045 destinationElement += destination.bytes;
2046 }
2047
2048 sourceRow += source.pitchB;
2049 destinationRow += destination.pitchB;
2050 }
2051
2052 sourceSlice += source.sliceB;
2053 destinationSlice += destination.sliceB;
2054 }
2055 }
2056
2057 void Surface::decodeP8(Buffer &destination, const Buffer &source)
2058 {
2059 unsigned char *sourceSlice = (unsigned char*)source.buffer;
2060 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
2061
2062 for(int z = 0; z < destination.depth && z < source.depth; z++)
2063 {
2064 unsigned char *sourceRow = sourceSlice;
2065 unsigned char *destinationRow = destinationSlice;
2066
2067 for(int y = 0; y < destination.height && y < source.height; y++)
2068 {
2069 unsigned char *sourceElement = sourceRow;
2070 unsigned char *destinationElement = destinationRow;
2071
2072 for(int x = 0; x < destination.width && x < source.width; x++)
2073 {
2074 unsigned int abgr = palette[*(unsigned char*)sourceElement];
2075
2076 unsigned int r = (abgr & 0x000000FF) << 16;
2077 unsigned int g = (abgr & 0x0000FF00) << 0;
2078 unsigned int b = (abgr & 0x00FF0000) >> 16;
2079 unsigned int a = (abgr & 0xFF000000) >> 0;
2080
2081 *(unsigned int*)destinationElement = a | r | g | b;
2082
2083 sourceElement += source.bytes;
2084 destinationElement += destination.bytes;
2085 }
2086
2087 sourceRow += source.pitchB;
2088 destinationRow += destination.pitchB;
2089 }
2090
2091 sourceSlice += source.sliceB;
2092 destinationSlice += destination.sliceB;
2093 }
2094 }
2095
2096#if S3TC_SUPPORT
2097 void Surface::decodeDXT1(Buffer &internal, const Buffer &external)
2098 {
2099 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002100 const DXT1 *source = (const DXT1*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002101
2102 for(int z = 0; z < external.depth; z++)
2103 {
2104 unsigned int *dest = destSlice;
2105
2106 for(int y = 0; y < external.height; y += 4)
2107 {
2108 for(int x = 0; x < external.width; x += 4)
2109 {
2110 Color<byte> c[4];
2111
2112 c[0] = source->c0;
2113 c[1] = source->c1;
2114
2115 if(source->c0 > source->c1) // No transparency
2116 {
2117 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2118 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2119 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2120 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2121 c[2].a = 0xFF;
2122
2123 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2124 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2125 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2126 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2127 c[3].a = 0xFF;
2128 }
2129 else // c3 transparent
2130 {
2131 // c2 = 1 / 2 * c0 + 1 / 2 * c1
2132 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2);
2133 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2);
2134 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2);
2135 c[2].a = 0xFF;
2136
2137 c[3].r = 0;
2138 c[3].g = 0;
2139 c[3].b = 0;
2140 c[3].a = 0;
2141 }
2142
2143 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2144 {
2145 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2146 {
2147 dest[(x + i) + (y + j) * internal.width] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4];
2148 }
2149 }
2150
2151 source++;
2152 }
2153 }
2154
2155 (byte*&)destSlice += internal.sliceB;
2156 }
2157 }
2158
2159 void Surface::decodeDXT3(Buffer &internal, const Buffer &external)
2160 {
2161 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002162 const DXT3 *source = (const DXT3*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002163
2164 for(int z = 0; z < external.depth; z++)
2165 {
2166 unsigned int *dest = destSlice;
2167
2168 for(int y = 0; y < external.height; y += 4)
2169 {
2170 for(int x = 0; x < external.width; x += 4)
2171 {
2172 Color<byte> c[4];
2173
2174 c[0] = source->c0;
2175 c[1] = source->c1;
2176
2177 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2178 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2179 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2180 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2181
2182 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2183 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2184 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2185 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2186
2187 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2188 {
2189 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2190 {
2191 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F;
2192 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24));
2193
2194 dest[(x + i) + (y + j) * internal.width] = color;
2195 }
2196 }
2197
2198 source++;
2199 }
2200 }
2201
2202 (byte*&)destSlice += internal.sliceB;
2203 }
2204 }
2205
2206 void Surface::decodeDXT5(Buffer &internal, const Buffer &external)
2207 {
2208 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002209 const DXT5 *source = (const DXT5*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002210
2211 for(int z = 0; z < external.depth; z++)
2212 {
2213 unsigned int *dest = destSlice;
2214
2215 for(int y = 0; y < external.height; y += 4)
2216 {
2217 for(int x = 0; x < external.width; x += 4)
2218 {
2219 Color<byte> c[4];
2220
2221 c[0] = source->c0;
2222 c[1] = source->c1;
2223
2224 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2225 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2226 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2227 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2228
2229 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2230 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2231 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2232 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2233
2234 byte a[8];
2235
2236 a[0] = source->a0;
2237 a[1] = source->a1;
2238
2239 if(a[0] > a[1])
2240 {
2241 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7);
2242 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7);
2243 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7);
2244 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7);
2245 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7);
2246 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7);
2247 }
2248 else
2249 {
2250 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5);
2251 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5);
2252 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5);
2253 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5);
2254 a[6] = 0;
2255 a[7] = 0xFF;
2256 }
2257
2258 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2259 {
2260 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2261 {
2262 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24;
2263 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha;
2264
2265 dest[(x + i) + (y + j) * internal.width] = color;
2266 }
2267 }
2268
2269 source++;
2270 }
2271 }
2272
2273 (byte*&)destSlice += internal.sliceB;
2274 }
2275 }
Nicolas Capens22658242014-11-29 00:31:41 -05002276#endif
John Bauman89401822014-05-06 15:04:28 -04002277
2278 void Surface::decodeATI1(Buffer &internal, const Buffer &external)
2279 {
2280 byte *destSlice = (byte*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002281 const ATI1 *source = (const ATI1*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002282
2283 for(int z = 0; z < external.depth; z++)
2284 {
2285 byte *dest = destSlice;
2286
2287 for(int y = 0; y < external.height; y += 4)
2288 {
2289 for(int x = 0; x < external.width; x += 4)
2290 {
2291 byte r[8];
2292
2293 r[0] = source->r0;
2294 r[1] = source->r1;
2295
2296 if(r[0] > r[1])
2297 {
2298 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7);
2299 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7);
2300 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7);
2301 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7);
2302 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7);
2303 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7);
2304 }
2305 else
2306 {
2307 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5);
2308 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5);
2309 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5);
2310 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5);
2311 r[6] = 0;
2312 r[7] = 0xFF;
2313 }
2314
2315 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2316 {
2317 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2318 {
2319 dest[(x + i) + (y + j) * internal.width] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8];
2320 }
2321 }
2322
2323 source++;
2324 }
2325 }
2326
2327 destSlice += internal.sliceB;
2328 }
2329 }
2330
2331 void Surface::decodeATI2(Buffer &internal, const Buffer &external)
2332 {
2333 word *destSlice = (word*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002334 const ATI2 *source = (const ATI2*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002335
2336 for(int z = 0; z < external.depth; z++)
2337 {
2338 word *dest = destSlice;
2339
2340 for(int y = 0; y < external.height; y += 4)
2341 {
2342 for(int x = 0; x < external.width; x += 4)
2343 {
2344 byte X[8];
2345
2346 X[0] = source->x0;
2347 X[1] = source->x1;
2348
2349 if(X[0] > X[1])
2350 {
2351 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7);
2352 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7);
2353 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7);
2354 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7);
2355 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7);
2356 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7);
2357 }
2358 else
2359 {
2360 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5);
2361 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5);
2362 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5);
2363 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5);
2364 X[6] = 0;
2365 X[7] = 0xFF;
2366 }
2367
2368 byte Y[8];
2369
2370 Y[0] = source->y0;
2371 Y[1] = source->y1;
2372
2373 if(Y[0] > Y[1])
2374 {
2375 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7);
2376 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7);
2377 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7);
2378 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7);
2379 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7);
2380 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7);
2381 }
2382 else
2383 {
2384 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5);
2385 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5);
2386 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5);
2387 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5);
2388 Y[6] = 0;
2389 Y[7] = 0xFF;
2390 }
2391
2392 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2393 {
2394 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2395 {
2396 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8];
2397 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8];
2398
2399 dest[(x + i) + (y + j) * internal.width] = (g << 8) + r;
2400 }
2401 }
2402
2403 source++;
2404 }
2405 }
2406
2407 (byte*&)destSlice += internal.sliceB;
2408 }
2409 }
Nicolas Capens22658242014-11-29 00:31:41 -05002410
Alexis Hetu0de50d42015-09-09 13:56:41 -04002411 void Surface::decodeETC2(Buffer &internal, const Buffer &external, int nbAlphaBits, bool isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002412 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002413 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
2414 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB));
Nicolas Capens22658242014-11-29 00:31:41 -05002415
Alexis Hetu0de50d42015-09-09 13:56:41 -04002416 if(isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002417 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002418 static byte sRGBtoLinearTable[256];
2419 static bool sRGBtoLinearTableDirty = true;
2420 if(sRGBtoLinearTableDirty)
Nicolas Capens22658242014-11-29 00:31:41 -05002421 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002422 for(int i = 0; i < 256; i++)
Nicolas Capens22658242014-11-29 00:31:41 -05002423 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002424 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f);
Nicolas Capens22658242014-11-29 00:31:41 -05002425 }
Alexis Hetu0de50d42015-09-09 13:56:41 -04002426 sRGBtoLinearTableDirty = false;
Nicolas Capens22658242014-11-29 00:31:41 -05002427 }
2428
Alexis Hetu0de50d42015-09-09 13:56:41 -04002429 // Perform sRGB conversion in place after decoding
2430 byte* src = (byte*)internal.buffer;
2431 for(int y = 0; y < internal.height; y++)
2432 {
2433 byte* srcRow = src + y * internal.pitchB;
2434 for(int x = 0; x < internal.width; x++)
2435 {
2436 byte* srcPix = srcRow + x * internal.bytes;
2437 for(int i = 0; i < 3; i++)
2438 {
2439 srcPix[i] = sRGBtoLinearTable[srcPix[i]];
2440 }
2441 }
2442 }
Nicolas Capens22658242014-11-29 00:31:41 -05002443 }
2444 }
John Bauman89401822014-05-06 15:04:28 -04002445
Alexis Hetu460e41f2015-09-01 10:58:37 -04002446 void Surface::decodeEAC(Buffer &internal, const Buffer &external, int nbChannels, bool isSigned)
2447 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002448 ASSERT(nbChannels == 1 || nbChannels == 2);
Alexis Hetu460e41f2015-09-01 10:58:37 -04002449
Alexis Hetu0de50d42015-09-09 13:56:41 -04002450 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
2451 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED));
2452
2453 // FIXME: We convert signed data to float, until signed integer internal formats are supported
2454 // This code can be removed if signed ETC2 images are decoded to internal 8 bit signed R/RG formats
2455 if(isSigned)
2456 {
2457 sbyte* src = (sbyte*)internal.buffer;
2458
2459 for(int y = 0; y < internal.height; y++)
2460 {
2461 sbyte* srcRow = src + y * internal.pitchB;
2462 for(int x = internal.width - 1; x >= 0; x--)
2463 {
2464 int dx = x & 0xFFFFFFFC;
2465 int mx = x - dx;
2466 sbyte* srcPix = srcRow + dx * internal.bytes + mx * nbChannels;
2467 float* dstPix = (float*)(srcRow + x * internal.bytes);
2468 for(int c = nbChannels - 1; c >= 0; c--)
2469 {
2470 static const float normalization = 1.0f / 127.875f;
2471 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f);
2472 }
2473 }
2474 }
2475 }
Alexis Hetu460e41f2015-09-01 10:58:37 -04002476 }
2477
2478 void Surface::decodeASTC(Buffer &internal, const Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB)
2479 {
2480 }
2481
John Bauman89401822014-05-06 15:04:28 -04002482 unsigned int Surface::size(int width, int height, int depth, Format format)
2483 {
Nicolas Capens00555c42015-07-21 15:15:30 -04002484 // Dimensions rounded up to multiples of 4, used for compressed formats
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002485 int width4 = align(width, 4);
2486 int height4 = align(height, 4);
John Bauman89401822014-05-06 15:04:28 -04002487
2488 switch(format)
2489 {
2490 #if S3TC_SUPPORT
2491 case FORMAT_DXT1:
John Bauman66b8ab22014-05-06 15:57:45 -04002492 #endif
John Bauman89401822014-05-06 15:04:28 -04002493 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05002494 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002495 case FORMAT_R11_EAC:
2496 case FORMAT_SIGNED_R11_EAC:
2497 case FORMAT_RGB8_ETC2:
2498 case FORMAT_SRGB8_ETC2:
2499 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2500 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04002501 return width4 * height4 * depth / 2;
John Bauman66b8ab22014-05-06 15:57:45 -04002502 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04002503 case FORMAT_DXT3:
2504 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002505 #endif
John Bauman89401822014-05-06 15:04:28 -04002506 case FORMAT_ATI2:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002507 case FORMAT_RG11_EAC:
2508 case FORMAT_SIGNED_RG11_EAC:
2509 case FORMAT_RGBA8_ETC2_EAC:
2510 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
2511 case FORMAT_RGBA_ASTC_4x4_KHR:
2512 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
John Bauman89401822014-05-06 15:04:28 -04002513 return width4 * height4 * depth;
Alexis Hetu460e41f2015-09-01 10:58:37 -04002514 case FORMAT_RGBA_ASTC_5x4_KHR:
2515 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
2516 return align(width, 5) * height4 * depth;
2517 case FORMAT_RGBA_ASTC_5x5_KHR:
2518 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
2519 return align(width, 5) * align(height, 5) * depth;
2520 case FORMAT_RGBA_ASTC_6x5_KHR:
2521 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
2522 return align(width, 6) * align(height, 5) * depth;
2523 case FORMAT_RGBA_ASTC_6x6_KHR:
2524 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
2525 return align(width, 6) * align(height, 6) * depth;
2526 case FORMAT_RGBA_ASTC_8x5_KHR:
2527 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
2528 return align(width, 8) * align(height, 5) * depth;
2529 case FORMAT_RGBA_ASTC_8x6_KHR:
2530 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
2531 return align(width, 8) * align(height, 6) * depth;
2532 case FORMAT_RGBA_ASTC_8x8_KHR:
2533 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
2534 return align(width, 8) * align(height, 8) * depth;
2535 case FORMAT_RGBA_ASTC_10x5_KHR:
2536 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
2537 return align(width, 10) * align(height, 5) * depth;
2538 case FORMAT_RGBA_ASTC_10x6_KHR:
2539 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
2540 return align(width, 10) * align(height, 6) * depth;
2541 case FORMAT_RGBA_ASTC_10x8_KHR:
2542 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
2543 return align(width, 10) * align(height, 8) * depth;
2544 case FORMAT_RGBA_ASTC_10x10_KHR:
2545 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
2546 return align(width, 10) * align(height, 10) * depth;
2547 case FORMAT_RGBA_ASTC_12x10_KHR:
2548 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
2549 return align(width, 12) * align(height, 10) * depth;
2550 case FORMAT_RGBA_ASTC_12x12_KHR:
2551 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
2552 return align(width, 12) * align(height, 12) * depth;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002553 case FORMAT_YV12_BT601:
2554 case FORMAT_YV12_BT709:
2555 case FORMAT_YV12_JFIF:
2556 {
2557 unsigned int YStride = align(width, 16);
2558 unsigned int YSize = YStride * height;
2559 unsigned int CStride = align(YStride / 2, 16);
2560 unsigned int CSize = CStride * height / 2;
2561
2562 return YSize + 2 * CSize;
2563 }
John Bauman89401822014-05-06 15:04:28 -04002564 default:
2565 return bytes(format) * width * height * depth;
2566 }
2567
2568 return 0;
2569 }
2570
2571 bool Surface::isStencil(Format format)
2572 {
2573 switch(format)
2574 {
2575 case FORMAT_D32:
2576 case FORMAT_D16:
2577 case FORMAT_D24X8:
2578 case FORMAT_D32F:
2579 case FORMAT_D32F_COMPLEMENTARY:
2580 case FORMAT_D32F_LOCKABLE:
2581 return false;
2582 case FORMAT_D24S8:
2583 case FORMAT_D24FS8:
2584 case FORMAT_S8:
John Bauman66b8ab22014-05-06 15:57:45 -04002585 case FORMAT_DF24S8:
2586 case FORMAT_DF16S8:
2587 case FORMAT_D32FS8_TEXTURE:
2588 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002589 case FORMAT_INTZ:
2590 return true;
2591 default:
2592 return false;
2593 }
2594 }
2595
2596 bool Surface::isDepth(Format format)
2597 {
2598 switch(format)
2599 {
2600 case FORMAT_D32:
2601 case FORMAT_D16:
2602 case FORMAT_D24X8:
2603 case FORMAT_D24S8:
2604 case FORMAT_D24FS8:
2605 case FORMAT_D32F:
2606 case FORMAT_D32F_COMPLEMENTARY:
2607 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002608 case FORMAT_DF24S8:
2609 case FORMAT_DF16S8:
2610 case FORMAT_D32FS8_TEXTURE:
2611 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002612 case FORMAT_INTZ:
2613 return true;
2614 case FORMAT_S8:
2615 return false;
2616 default:
2617 return false;
2618 }
2619 }
2620
2621 bool Surface::isPalette(Format format)
2622 {
2623 switch(format)
2624 {
2625 case FORMAT_P8:
2626 case FORMAT_A8P8:
2627 return true;
2628 default:
2629 return false;
2630 }
2631 }
2632
2633 bool Surface::isFloatFormat(Format format)
2634 {
2635 switch(format)
2636 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002637 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002638 case FORMAT_R8G8B8:
2639 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002640 case FORMAT_X8R8G8B8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002641 case FORMAT_X8B8G8R8I:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002642 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002643 case FORMAT_A8R8G8B8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002644 case FORMAT_A8B8G8R8I:
2645 case FORMAT_R8UI:
2646 case FORMAT_G8R8UI:
2647 case FORMAT_X8B8G8R8UI:
2648 case FORMAT_A8B8G8R8UI:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002649 case FORMAT_A8B8G8R8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002650 case FORMAT_G8R8I:
John Bauman89401822014-05-06 15:04:28 -04002651 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002652 case FORMAT_A2B10G10R10:
Alexis Hetu43577b82015-10-21 15:32:16 -04002653 case FORMAT_R8I_SNORM:
2654 case FORMAT_G8R8I_SNORM:
2655 case FORMAT_X8B8G8R8I_SNORM:
2656 case FORMAT_A8B8G8R8I_SNORM:
2657 case FORMAT_R16I:
2658 case FORMAT_R16UI:
2659 case FORMAT_G16R16I:
2660 case FORMAT_G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002661 case FORMAT_G16R16:
Alexis Hetu43577b82015-10-21 15:32:16 -04002662 case FORMAT_X16B16G16R16I:
2663 case FORMAT_X16B16G16R16UI:
2664 case FORMAT_A16B16G16R16I:
2665 case FORMAT_A16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002666 case FORMAT_A16B16G16R16:
2667 case FORMAT_V8U8:
2668 case FORMAT_Q8W8V8U8:
2669 case FORMAT_X8L8V8U8:
2670 case FORMAT_V16U16:
2671 case FORMAT_A16W16V16U16:
2672 case FORMAT_Q16W16V16U16:
2673 case FORMAT_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002674 case FORMAT_R8I:
John Bauman89401822014-05-06 15:04:28 -04002675 case FORMAT_R8:
2676 case FORMAT_L8:
2677 case FORMAT_L16:
2678 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002679 case FORMAT_YV12_BT601:
2680 case FORMAT_YV12_BT709:
2681 case FORMAT_YV12_JFIF:
Alexis Hetu43577b82015-10-21 15:32:16 -04002682 case FORMAT_R32I:
2683 case FORMAT_R32UI:
2684 case FORMAT_G32R32I:
2685 case FORMAT_G32R32UI:
2686 case FORMAT_X32B32G32R32I:
2687 case FORMAT_X32B32G32R32UI:
2688 case FORMAT_A32B32G32R32I:
2689 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002690 return false;
2691 case FORMAT_R32F:
2692 case FORMAT_G32R32F:
2693 case FORMAT_A32B32G32R32F:
2694 case FORMAT_D32F:
2695 case FORMAT_D32F_COMPLEMENTARY:
2696 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002697 case FORMAT_D32FS8_TEXTURE:
2698 case FORMAT_D32FS8_SHADOW:
Nicolas Capens80594422015-06-09 16:42:56 -04002699 case FORMAT_L16F:
2700 case FORMAT_A16L16F:
2701 case FORMAT_L32F:
2702 case FORMAT_A32L32F:
John Bauman89401822014-05-06 15:04:28 -04002703 return true;
2704 default:
2705 ASSERT(false);
2706 }
2707
2708 return false;
2709 }
2710
2711 bool Surface::isUnsignedComponent(Format format, int component)
2712 {
2713 switch(format)
2714 {
2715 case FORMAT_NULL:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002716 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002717 case FORMAT_R8G8B8:
2718 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002719 case FORMAT_X8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002720 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002721 case FORMAT_A8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002722 case FORMAT_A8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002723 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002724 case FORMAT_A2B10G10R10:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002725 case FORMAT_R16UI:
John Bauman89401822014-05-06 15:04:28 -04002726 case FORMAT_G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002727 case FORMAT_G16R16UI:
2728 case FORMAT_X16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002729 case FORMAT_A16B16G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002730 case FORMAT_A16B16G16R16UI:
2731 case FORMAT_R32UI:
2732 case FORMAT_G32R32UI:
2733 case FORMAT_X32B32G32R32UI:
2734 case FORMAT_A32B32G32R32UI:
2735 case FORMAT_R8UI:
2736 case FORMAT_G8R8UI:
2737 case FORMAT_X8B8G8R8UI:
2738 case FORMAT_A8B8G8R8UI:
John Bauman89401822014-05-06 15:04:28 -04002739 case FORMAT_D32F:
2740 case FORMAT_D32F_COMPLEMENTARY:
2741 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002742 case FORMAT_D32FS8_TEXTURE:
2743 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002744 case FORMAT_A8:
2745 case FORMAT_R8:
2746 case FORMAT_L8:
2747 case FORMAT_L16:
2748 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002749 case FORMAT_YV12_BT601:
2750 case FORMAT_YV12_BT709:
2751 case FORMAT_YV12_JFIF:
John Bauman89401822014-05-06 15:04:28 -04002752 return true;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002753 case FORMAT_A8B8G8R8I:
2754 case FORMAT_A16B16G16R16I:
2755 case FORMAT_A32B32G32R32I:
2756 case FORMAT_A8B8G8R8I_SNORM:
2757 case FORMAT_Q8W8V8U8:
2758 case FORMAT_Q16W16V16U16:
2759 case FORMAT_A32B32G32R32F:
2760 return false;
2761 case FORMAT_R32F:
2762 case FORMAT_R8I:
2763 case FORMAT_R16I:
2764 case FORMAT_R32I:
2765 case FORMAT_R8I_SNORM:
2766 return component >= 1;
John Bauman89401822014-05-06 15:04:28 -04002767 case FORMAT_V8U8:
2768 case FORMAT_X8L8V8U8:
2769 case FORMAT_V16U16:
John Bauman89401822014-05-06 15:04:28 -04002770 case FORMAT_G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002771 case FORMAT_G8R8I:
2772 case FORMAT_G16R16I:
2773 case FORMAT_G32R32I:
2774 case FORMAT_G8R8I_SNORM:
2775 return component >= 2;
2776 case FORMAT_A16W16V16U16:
2777 case FORMAT_X8B8G8R8I:
2778 case FORMAT_X16B16G16R16I:
2779 case FORMAT_X32B32G32R32I:
2780 case FORMAT_X8B8G8R8I_SNORM:
2781 return component >= 3;
John Bauman89401822014-05-06 15:04:28 -04002782 default:
2783 ASSERT(false);
2784 }
2785
2786 return false;
2787 }
2788
2789 bool Surface::isSRGBreadable(Format format)
2790 {
2791 // Keep in sync with Capabilities::isSRGBreadable
2792 switch(format)
2793 {
2794 case FORMAT_L8:
2795 case FORMAT_A8L8:
2796 case FORMAT_R8G8B8:
2797 case FORMAT_A8R8G8B8:
2798 case FORMAT_X8R8G8B8:
2799 case FORMAT_A8B8G8R8:
2800 case FORMAT_X8B8G8R8:
2801 case FORMAT_R5G6B5:
2802 case FORMAT_X1R5G5B5:
2803 case FORMAT_A1R5G5B5:
2804 case FORMAT_A4R4G4B4:
2805 #if S3TC_SUPPORT
2806 case FORMAT_DXT1:
2807 case FORMAT_DXT3:
2808 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002809 #endif
John Bauman89401822014-05-06 15:04:28 -04002810 case FORMAT_ATI1:
2811 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04002812 return true;
2813 default:
2814 return false;
2815 }
2816
2817 return false;
2818 }
2819
2820 bool Surface::isSRGBwritable(Format format)
2821 {
2822 // Keep in sync with Capabilities::isSRGBwritable
2823 switch(format)
2824 {
2825 case FORMAT_NULL:
2826 case FORMAT_A8R8G8B8:
2827 case FORMAT_X8R8G8B8:
2828 case FORMAT_A8B8G8R8:
2829 case FORMAT_X8B8G8R8:
2830 case FORMAT_R5G6B5:
2831 return true;
2832 default:
2833 return false;
2834 }
2835 }
2836
2837 bool Surface::isCompressed(Format format)
2838 {
2839 switch(format)
2840 {
2841 #if S3TC_SUPPORT
2842 case FORMAT_DXT1:
2843 case FORMAT_DXT3:
2844 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002845 #endif
John Bauman89401822014-05-06 15:04:28 -04002846 case FORMAT_ATI1:
2847 case FORMAT_ATI2:
Nicolas Capens22658242014-11-29 00:31:41 -05002848 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002849 case FORMAT_R11_EAC:
2850 case FORMAT_SIGNED_R11_EAC:
2851 case FORMAT_RG11_EAC:
2852 case FORMAT_SIGNED_RG11_EAC:
2853 case FORMAT_RGB8_ETC2:
2854 case FORMAT_SRGB8_ETC2:
2855 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2856 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2857 case FORMAT_RGBA8_ETC2_EAC:
2858 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
2859 case FORMAT_RGBA_ASTC_4x4_KHR:
2860 case FORMAT_RGBA_ASTC_5x4_KHR:
2861 case FORMAT_RGBA_ASTC_5x5_KHR:
2862 case FORMAT_RGBA_ASTC_6x5_KHR:
2863 case FORMAT_RGBA_ASTC_6x6_KHR:
2864 case FORMAT_RGBA_ASTC_8x5_KHR:
2865 case FORMAT_RGBA_ASTC_8x6_KHR:
2866 case FORMAT_RGBA_ASTC_8x8_KHR:
2867 case FORMAT_RGBA_ASTC_10x5_KHR:
2868 case FORMAT_RGBA_ASTC_10x6_KHR:
2869 case FORMAT_RGBA_ASTC_10x8_KHR:
2870 case FORMAT_RGBA_ASTC_10x10_KHR:
2871 case FORMAT_RGBA_ASTC_12x10_KHR:
2872 case FORMAT_RGBA_ASTC_12x12_KHR:
2873 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
2874 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
2875 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
2876 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
2877 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
2878 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
2879 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
2880 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
2881 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
2882 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
2883 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
2884 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
2885 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
2886 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
John Bauman89401822014-05-06 15:04:28 -04002887 return true;
John Bauman89401822014-05-06 15:04:28 -04002888 default:
2889 return false;
2890 }
2891 }
2892
Alexis Hetu43577b82015-10-21 15:32:16 -04002893 bool Surface::isNonNormalizedInteger(Format format)
2894 {
2895 switch(format)
2896 {
2897 case FORMAT_A8B8G8R8I:
2898 case FORMAT_X8B8G8R8I:
2899 case FORMAT_G8R8I:
2900 case FORMAT_R8I:
2901 case FORMAT_A8B8G8R8UI:
2902 case FORMAT_X8B8G8R8UI:
2903 case FORMAT_G8R8UI:
2904 case FORMAT_R8UI:
2905 case FORMAT_A16B16G16R16I:
2906 case FORMAT_X16B16G16R16I:
2907 case FORMAT_G16R16I:
2908 case FORMAT_R16I:
2909 case FORMAT_A16B16G16R16UI:
2910 case FORMAT_X16B16G16R16UI:
2911 case FORMAT_G16R16UI:
2912 case FORMAT_R16UI:
2913 case FORMAT_A32B32G32R32I:
2914 case FORMAT_X32B32G32R32I:
2915 case FORMAT_G32R32I:
2916 case FORMAT_R32I:
2917 case FORMAT_A32B32G32R32UI:
2918 case FORMAT_X32B32G32R32UI:
2919 case FORMAT_G32R32UI:
2920 case FORMAT_R32UI:
2921 return true;
2922 default:
2923 return false;
2924 }
2925 }
2926
John Bauman89401822014-05-06 15:04:28 -04002927 int Surface::componentCount(Format format)
2928 {
2929 switch(format)
2930 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002931 case FORMAT_R5G6B5: return 3;
2932 case FORMAT_X8R8G8B8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002933 case FORMAT_X8B8G8R8I: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002934 case FORMAT_X8B8G8R8: return 3;
2935 case FORMAT_A8R8G8B8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002936 case FORMAT_A8B8G8R8I: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002937 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002938 case FORMAT_G8R8I: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002939 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002940 case FORMAT_R8I_SNORM: return 1;
2941 case FORMAT_G8R8I_SNORM: return 2;
2942 case FORMAT_X8B8G8R8I_SNORM:return 3;
2943 case FORMAT_A8B8G8R8I_SNORM:return 4;
2944 case FORMAT_R8UI: return 1;
2945 case FORMAT_G8R8UI: return 2;
2946 case FORMAT_X8B8G8R8UI: return 3;
2947 case FORMAT_A8B8G8R8UI: return 4;
Alexis Hetuf999a002015-12-17 11:09:36 -05002948 case FORMAT_A2B10G10R10: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002949 case FORMAT_G16R16I: return 2;
2950 case FORMAT_G16R16UI: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002951 case FORMAT_G16R16: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002952 case FORMAT_G32R32I: return 2;
2953 case FORMAT_G32R32UI: return 2;
2954 case FORMAT_X16B16G16R16I: return 3;
2955 case FORMAT_X16B16G16R16UI: return 3;
2956 case FORMAT_A16B16G16R16I: return 4;
2957 case FORMAT_A16B16G16R16UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002958 case FORMAT_A16B16G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002959 case FORMAT_X32B32G32R32I: return 3;
2960 case FORMAT_X32B32G32R32UI: return 3;
2961 case FORMAT_A32B32G32R32I: return 4;
2962 case FORMAT_A32B32G32R32UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002963 case FORMAT_V8U8: return 2;
2964 case FORMAT_Q8W8V8U8: return 4;
2965 case FORMAT_X8L8V8U8: return 3;
2966 case FORMAT_V16U16: return 2;
2967 case FORMAT_A16W16V16U16: return 4;
2968 case FORMAT_Q16W16V16U16: return 4;
2969 case FORMAT_R32F: return 1;
2970 case FORMAT_G32R32F: return 2;
2971 case FORMAT_A32B32G32R32F: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002972 case FORMAT_D32F: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002973 case FORMAT_D32F_LOCKABLE: return 1;
2974 case FORMAT_D32FS8_TEXTURE: return 1;
2975 case FORMAT_D32FS8_SHADOW: return 1;
2976 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002977 case FORMAT_R8I: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002978 case FORMAT_R8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002979 case FORMAT_R16I: return 1;
2980 case FORMAT_R16UI: return 1;
2981 case FORMAT_R32I: return 1;
2982 case FORMAT_R32UI: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002983 case FORMAT_L8: return 1;
2984 case FORMAT_L16: return 1;
2985 case FORMAT_A8L8: return 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002986 case FORMAT_YV12_BT601: return 3;
2987 case FORMAT_YV12_BT709: return 3;
2988 case FORMAT_YV12_JFIF: return 3;
John Bauman89401822014-05-06 15:04:28 -04002989 default:
2990 ASSERT(false);
2991 }
2992
2993 return 1;
2994 }
2995
2996 void *Surface::allocateBuffer(int width, int height, int depth, Format format)
2997 {
Nicolas Capensdb17b5d2015-06-26 11:15:58 -04002998 // Render targets require 2x2 quads
2999 int width2 = (width + 1) & ~1;
3000 int height2 = (height + 1) & ~1;
John Bauman89401822014-05-06 15:04:28 -04003001
Nicolas Capens6ea71872015-06-26 13:00:48 -04003002 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
3003 // so we have to allocate 4 extra bytes to avoid buffer overruns.
3004 return allocateZero(size(width2, height2, depth, format) + 4);
John Bauman89401822014-05-06 15:04:28 -04003005 }
3006
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003007 void Surface::memfill4(void *buffer, int pattern, int bytes)
John Bauman89401822014-05-06 15:04:28 -04003008 {
3009 while((size_t)buffer & 0x1 && bytes >= 1)
3010 {
3011 *(char*)buffer = (char)pattern;
3012 (char*&)buffer += 1;
3013 bytes -= 1;
3014 }
3015
3016 while((size_t)buffer & 0x3 && bytes >= 2)
3017 {
3018 *(short*)buffer = (short)pattern;
3019 (short*&)buffer += 1;
3020 bytes -= 2;
3021 }
3022
3023 if(CPUID::supportsSSE())
3024 {
3025 while((size_t)buffer & 0xF && bytes >= 4)
3026 {
3027 *(int*)buffer = pattern;
3028 (int*&)buffer += 1;
3029 bytes -= 4;
3030 }
3031
3032 __m128 quad = _mm_set_ps1((float&)pattern);
3033
3034 float *pointer = (float*)buffer;
3035 int qxwords = bytes / 64;
3036 bytes -= qxwords * 64;
3037
3038 while(qxwords--)
3039 {
3040 _mm_stream_ps(pointer + 0, quad);
3041 _mm_stream_ps(pointer + 4, quad);
3042 _mm_stream_ps(pointer + 8, quad);
3043 _mm_stream_ps(pointer + 12, quad);
3044
3045 pointer += 16;
3046 }
3047
3048 buffer = pointer;
3049 }
3050
3051 while(bytes >= 4)
3052 {
3053 *(int*)buffer = (int)pattern;
3054 (int*&)buffer += 1;
3055 bytes -= 4;
3056 }
3057
3058 while(bytes >= 2)
3059 {
3060 *(short*)buffer = (short)pattern;
3061 (short*&)buffer += 1;
3062 bytes -= 2;
3063 }
3064
3065 while(bytes >= 1)
3066 {
3067 *(char*)buffer = (char)pattern;
3068 (char*&)buffer += 1;
3069 bytes -= 1;
3070 }
3071 }
3072
Alexis Hetu75b650f2015-11-19 17:40:15 -05003073 bool Surface::isEntire(const SliceRect& rect) const
John Bauman89401822014-05-06 15:04:28 -04003074 {
Alexis Hetu75b650f2015-11-19 17:40:15 -05003075 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1);
3076 }
John Bauman89401822014-05-06 15:04:28 -04003077
Alexis Hetu75b650f2015-11-19 17:40:15 -05003078 bool Surface::getClearRect(int x0, int y0, int width, int height, SliceRect& rect) const
3079 {
John Bauman89401822014-05-06 15:04:28 -04003080 // Not overlapping
Alexis Hetu75b650f2015-11-19 17:40:15 -05003081 if(x0 > internal.width) return false;
3082 if(y0 > internal.height) return false;
3083 if(x0 + width < 0) return false;
3084 if(y0 + height < 0) return false;
John Bauman89401822014-05-06 15:04:28 -04003085
3086 // Clip against dimensions
Alexis Hetu75b650f2015-11-19 17:40:15 -05003087 if(x0 < 0) { width += x0; x0 = 0; }
John Bauman89401822014-05-06 15:04:28 -04003088 if(x0 + width > internal.width) width = internal.width - x0;
Alexis Hetu75b650f2015-11-19 17:40:15 -05003089 if(y0 < 0) { height += y0; y0 = 0; }
John Bauman89401822014-05-06 15:04:28 -04003090 if(y0 + height > internal.height) height = internal.height - y0;
3091
Alexis Hetu75b650f2015-11-19 17:40:15 -05003092 rect.x0 = x0;
3093 rect.x1 = x0 + width;
3094 rect.y0 = y0;
3095 rect.y1 = y0 + height;
John Bauman89401822014-05-06 15:04:28 -04003096
Alexis Hetu75b650f2015-11-19 17:40:15 -05003097 return true;
John Bauman89401822014-05-06 15:04:28 -04003098 }
3099
3100 void Surface::clearDepthBuffer(float depth, int x0, int y0, int width, int height)
3101 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003102 if(width == 0 || height == 0) return;
3103
John Bauman89401822014-05-06 15:04:28 -04003104 // Not overlapping
3105 if(x0 > internal.width) return;
3106 if(y0 > internal.height) return;
3107 if(x0 + width < 0) return;
3108 if(y0 + height < 0) return;
3109
3110 // Clip against dimensions
3111 if(x0 < 0) {width += x0; x0 = 0;}
3112 if(x0 + width > internal.width) width = internal.width - x0;
3113 if(y0 < 0) {height += y0; y0 = 0;}
3114 if(y0 + height > internal.height) height = internal.height - y0;
3115
3116 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height;
3117 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY;
3118
3119 int width2 = (internal.width + 1) & ~1;
3120
3121 int x1 = x0 + width;
3122 int y1 = y0 + height;
3123
3124 if(internal.format == FORMAT_D32F_LOCKABLE ||
John Bauman66b8ab22014-05-06 15:57:45 -04003125 internal.format == FORMAT_D32FS8_TEXTURE ||
3126 internal.format == FORMAT_D32FS8_SHADOW)
John Bauman89401822014-05-06 15:04:28 -04003127 {
3128 float *target = (float*)lockInternal(0, 0, 0, lock, PUBLIC) + x0 + width2 * y0;
3129
3130 for(int z = 0; z < internal.depth; z++)
3131 {
3132 for(int y = y0; y < y1; y++)
3133 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003134 memfill4(target, (int&)depth, 4 * width);
John Bauman89401822014-05-06 15:04:28 -04003135 target += width2;
3136 }
3137 }
3138
3139 unlockInternal();
3140 }
3141 else // Quad layout
3142 {
3143 if(complementaryDepthBuffer)
3144 {
3145 depth = 1 - depth;
3146 }
3147
3148 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC);
3149
Alexis Hetu358a1442015-12-03 14:23:10 -05003150 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3151 int oddX1 = (x1 & ~1) * 2;
3152 int evenX0 = ((x0 + 1) & ~1) * 2;
3153 int evenBytes = (oddX1 - evenX0) * sizeof(float);
3154
John Bauman89401822014-05-06 15:04:28 -04003155 for(int z = 0; z < internal.depth; z++)
3156 {
3157 for(int y = y0; y < y1; y++)
3158 {
3159 float *target = buffer + (y & ~1) * width2 + (y & 1) * 2;
3160
3161 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once
3162 {
3163 if((x0 & 1) != 0)
3164 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003165 target[oddX0 + 0] = depth;
3166 target[oddX0 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003167 }
3168
Alexis Hetu358a1442015-12-03 14:23:10 -05003169 // for(int x2 = evenX0; x2 < x1 * 2; x2 += 4)
John Bauman89401822014-05-06 15:04:28 -04003170 // {
3171 // target[x2 + 0] = depth;
3172 // target[x2 + 1] = depth;
3173 // target[x2 + 2] = depth;
3174 // target[x2 + 3] = depth;
3175 // }
3176
3177 // __asm
3178 // {
3179 // movss xmm0, depth
3180 // shufps xmm0, xmm0, 0x00
3181 //
3182 // mov eax, x0
3183 // add eax, 1
3184 // and eax, 0xFFFFFFFE
3185 // cmp eax, x1
3186 // jge qEnd
3187 //
3188 // mov edi, target
3189 //
3190 // qLoop:
3191 // movntps [edi+8*eax], xmm0
3192 //
3193 // add eax, 2
3194 // cmp eax, x1
3195 // jl qLoop
3196 // qEnd:
3197 // }
3198
Alexis Hetu358a1442015-12-03 14:23:10 -05003199 memfill4(&target[evenX0], (int&)depth, evenBytes);
John Bauman89401822014-05-06 15:04:28 -04003200
3201 if((x1 & 1) != 0)
3202 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003203 target[oddX1 + 0] = depth;
3204 target[oddX1 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003205 }
3206
3207 y++;
3208 }
3209 else
3210 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003211 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1))
John Bauman89401822014-05-06 15:04:28 -04003212 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003213 target[i] = depth;
John Bauman89401822014-05-06 15:04:28 -04003214 }
3215 }
3216 }
3217
3218 buffer += internal.sliceP;
3219 }
3220
3221 unlockInternal();
3222 }
3223 }
3224
3225 void Surface::clearStencilBuffer(unsigned char s, unsigned char mask, int x0, int y0, int width, int height)
3226 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003227 if(mask == 0 || width == 0 || height == 0) return;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003228
John Bauman89401822014-05-06 15:04:28 -04003229 // Not overlapping
3230 if(x0 > internal.width) return;
3231 if(y0 > internal.height) return;
3232 if(x0 + width < 0) return;
3233 if(y0 + height < 0) return;
3234
3235 // Clip against dimensions
3236 if(x0 < 0) {width += x0; x0 = 0;}
3237 if(x0 + width > internal.width) width = internal.width - x0;
3238 if(y0 < 0) {height += y0; y0 = 0;}
3239 if(y0 + height > internal.height) height = internal.height - y0;
3240
3241 int width2 = (internal.width + 1) & ~1;
3242
3243 int x1 = x0 + width;
3244 int y1 = y0 + height;
3245
Alexis Hetu358a1442015-12-03 14:23:10 -05003246 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3247 int oddX1 = (x1 & ~1) * 2;
3248 int evenX0 = ((x0 + 1) & ~1) * 2;
3249 int evenBytes = oddX1 - evenX0;
3250
John Bauman89401822014-05-06 15:04:28 -04003251 unsigned char maskedS = s & mask;
3252 unsigned char invMask = ~mask;
3253 unsigned int fill = maskedS;
3254 fill = fill | (fill << 8) | (fill << 16) + (fill << 24);
3255
Alexis Hetu2b052f82015-11-25 13:57:28 -05003256 char *buffer = (char*)lockStencil(0, PUBLIC);
3257
3258 // Stencil buffers are assumed to use quad layout
3259 for(int z = 0; z < stencil.depth; z++)
John Bauman89401822014-05-06 15:04:28 -04003260 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003261 for(int y = y0; y < y1; y++)
John Bauman89401822014-05-06 15:04:28 -04003262 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003263 char *target = buffer + (y & ~1) * width2 + (y & 1) * 2;
3264
3265 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once
John Bauman89401822014-05-06 15:04:28 -04003266 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003267 if((x0 & 1) != 0)
John Bauman89401822014-05-06 15:04:28 -04003268 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003269 target[oddX0 + 0] = fill;
3270 target[oddX0 + 2] = fill;
John Bauman89401822014-05-06 15:04:28 -04003271 }
3272
Alexis Hetu358a1442015-12-03 14:23:10 -05003273 memfill4(&target[evenX0], fill, evenBytes);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003274
3275 if((x1 & 1) != 0)
3276 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003277 target[oddX1 + 0] = fill;
3278 target[oddX1 + 2] = fill;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003279 }
3280
3281 y++;
3282 }
3283 else
3284 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003285 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1))
Alexis Hetu2b052f82015-11-25 13:57:28 -05003286 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003287 target[i] = maskedS | (target[i] & invMask);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003288 }
John Bauman89401822014-05-06 15:04:28 -04003289 }
3290 }
3291
Alexis Hetu2b052f82015-11-25 13:57:28 -05003292 buffer += stencil.sliceP;
John Bauman89401822014-05-06 15:04:28 -04003293 }
John Bauman89401822014-05-06 15:04:28 -04003294
Alexis Hetu2b052f82015-11-25 13:57:28 -05003295 unlockStencil();
John Bauman89401822014-05-06 15:04:28 -04003296 }
3297
3298 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height)
3299 {
3300 unsigned char *row;
3301 Buffer *buffer;
3302
3303 if(internal.dirty)
3304 {
3305 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3306 buffer = &internal;
3307 }
3308 else
3309 {
3310 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3311 buffer = &external;
3312 }
3313
3314 if(buffer->bytes <= 4)
3315 {
3316 int c;
3317 buffer->write(&c, color);
3318
3319 if(buffer->bytes <= 1) c = (c << 8) | c;
3320 if(buffer->bytes <= 2) c = (c << 16) | c;
3321
3322 for(int y = 0; y < height; y++)
3323 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003324 memfill4(row, c, width * buffer->bytes);
John Bauman89401822014-05-06 15:04:28 -04003325
3326 row += buffer->pitchB;
3327 }
3328 }
3329 else // Generic
3330 {
3331 for(int y = 0; y < height; y++)
3332 {
3333 unsigned char *element = row;
3334
3335 for(int x = 0; x < width; x++)
3336 {
3337 buffer->write(element, color);
3338
3339 element += buffer->bytes;
3340 }
3341
3342 row += buffer->pitchB;
3343 }
3344 }
3345
3346 if(buffer == &internal)
3347 {
3348 unlockInternal();
3349 }
3350 else
3351 {
3352 unlockExternal();
3353 }
3354 }
3355
Alexis Hetu43577b82015-10-21 15:32:16 -04003356 void Surface::copyInternal(const Surface* source, int x, int y, float srcX, float srcY, bool filter)
John Bauman89401822014-05-06 15:04:28 -04003357 {
Alexis Hetu43577b82015-10-21 15:32:16 -04003358 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
John Bauman89401822014-05-06 15:04:28 -04003359
Alexis Hetu43577b82015-10-21 15:32:16 -04003360 sw::Color<float> color;
John Bauman89401822014-05-06 15:04:28 -04003361
Alexis Hetu43577b82015-10-21 15:32:16 -04003362 if(!filter)
3363 {
3364 color = source->internal.read((int)srcX, (int)srcY);
3365 }
3366 else // Bilinear filtering
3367 {
3368 color = source->internal.sample(srcX, srcY);
3369 }
John Bauman89401822014-05-06 15:04:28 -04003370
3371 internal.write(x, y, color);
3372 }
3373
Alexis Hetu43577b82015-10-21 15:32:16 -04003374 void Surface::copyInternal(const Surface* source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter)
3375 {
3376 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
3377
3378 sw::Color<float> color;
3379
3380 if(!filter)
3381 {
3382 color = source->internal.read((int)srcX, (int)srcY, int(srcZ));
3383 }
3384 else // Bilinear filtering
3385 {
3386 color = source->internal.sample(srcX, srcY, srcZ);
3387 }
3388
3389 internal.write(x, y, z, color);
3390 }
3391
John Bauman89401822014-05-06 15:04:28 -04003392 bool Surface::hasStencil() const
3393 {
3394 return isStencil(external.format);
3395 }
3396
3397 bool Surface::hasDepth() const
3398 {
3399 return isDepth(external.format);
3400 }
3401
3402 bool Surface::hasPalette() const
3403 {
3404 return isPalette(external.format);
3405 }
3406
3407 bool Surface::isRenderTarget() const
3408 {
3409 return renderTarget;
3410 }
3411
3412 bool Surface::hasDirtyMipmaps() const
3413 {
3414 return dirtyMipmaps;
3415 }
3416
3417 void Surface::cleanMipmaps()
3418 {
3419 dirtyMipmaps = false;
3420 }
3421
3422 Resource *Surface::getResource()
3423 {
3424 return resource;
3425 }
3426
3427 bool Surface::identicalFormats() const
3428 {
John Bauman66b8ab22014-05-06 15:57:45 -04003429 return external.format == internal.format &&
3430 external.width == internal.width &&
Nicolas Capens22658242014-11-29 00:31:41 -05003431 external.height == internal.height &&
3432 external.depth == internal.depth &&
3433 external.pitchB == internal.pitchB &&
3434 external.sliceB == internal.sliceB;
John Bauman89401822014-05-06 15:04:28 -04003435 }
3436
3437 Format Surface::selectInternalFormat(Format format) const
3438 {
3439 switch(format)
3440 {
3441 case FORMAT_NULL:
3442 return FORMAT_NULL;
3443 case FORMAT_P8:
3444 case FORMAT_A8P8:
3445 case FORMAT_A4R4G4B4:
3446 case FORMAT_A1R5G5B5:
3447 case FORMAT_A8R3G3B2:
3448 return FORMAT_A8R8G8B8;
3449 case FORMAT_A8:
3450 return FORMAT_A8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003451 case FORMAT_R8I:
3452 return FORMAT_R8I;
3453 case FORMAT_R8UI:
3454 return FORMAT_R8UI;
3455 case FORMAT_R8I_SNORM:
3456 return FORMAT_R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003457 case FORMAT_R8:
3458 return FORMAT_R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003459 case FORMAT_R16I:
3460 return FORMAT_R16I;
3461 case FORMAT_R16UI:
3462 return FORMAT_R16UI;
3463 case FORMAT_R32I:
3464 return FORMAT_R32I;
3465 case FORMAT_R32UI:
3466 return FORMAT_R32UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003467 case FORMAT_X16B16G16R16I:
3468 case FORMAT_A16B16G16R16I:
3469 return FORMAT_A16B16G16R16I;
3470 case FORMAT_X16B16G16R16UI:
3471 case FORMAT_A16B16G16R16UI:
3472 return FORMAT_A16B16G16R16UI;
Alexis Hetuf999a002015-12-17 11:09:36 -05003473 case FORMAT_A2R10G10B10:
3474 case FORMAT_A2B10G10R10:
John Bauman89401822014-05-06 15:04:28 -04003475 case FORMAT_A16B16G16R16:
3476 return FORMAT_A16B16G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003477 case FORMAT_X32B32G32R32I:
3478 case FORMAT_A32B32G32R32I:
3479 return FORMAT_A32B32G32R32I;
3480 case FORMAT_X32B32G32R32UI:
3481 case FORMAT_A32B32G32R32UI:
3482 return FORMAT_A32B32G32R32UI;
3483 case FORMAT_G8R8I:
3484 return FORMAT_G8R8I;
3485 case FORMAT_G8R8UI:
3486 return FORMAT_G8R8UI;
3487 case FORMAT_G8R8I_SNORM:
3488 return FORMAT_G8R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003489 case FORMAT_G8R8:
3490 return FORMAT_G8R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003491 case FORMAT_G16R16I:
3492 return FORMAT_G16R16I;
3493 case FORMAT_G16R16UI:
3494 return FORMAT_G16R16UI;
John Bauman89401822014-05-06 15:04:28 -04003495 case FORMAT_G16R16:
3496 return FORMAT_G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003497 case FORMAT_G32R32I:
3498 return FORMAT_G32R32I;
3499 case FORMAT_G32R32UI:
3500 return FORMAT_G32R32UI;
John Bauman89401822014-05-06 15:04:28 -04003501 case FORMAT_A8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003502 if(lockable || !quadLayoutEnabled)
3503 {
3504 return FORMAT_A8R8G8B8;
3505 }
3506 else
3507 {
3508 return FORMAT_A8G8R8B8Q;
3509 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003510 case FORMAT_A8B8G8R8I:
3511 return FORMAT_A8B8G8R8I;
3512 case FORMAT_A8B8G8R8UI:
3513 return FORMAT_A8B8G8R8UI;
3514 case FORMAT_A8B8G8R8I_SNORM:
3515 return FORMAT_A8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003516 case FORMAT_R5G5B5A1:
3517 case FORMAT_R4G4B4A4:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003518 case FORMAT_A8B8G8R8:
3519 return FORMAT_A8B8G8R8;
John Bauman89401822014-05-06 15:04:28 -04003520 case FORMAT_R5G6B5:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003521 return FORMAT_R5G6B5;
3522 case FORMAT_R3G3B2:
John Bauman89401822014-05-06 15:04:28 -04003523 case FORMAT_R8G8B8:
3524 case FORMAT_X4R4G4B4:
3525 case FORMAT_X1R5G5B5:
3526 case FORMAT_X8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003527 if(lockable || !quadLayoutEnabled)
3528 {
3529 return FORMAT_X8R8G8B8;
3530 }
3531 else
3532 {
3533 return FORMAT_X8G8R8B8Q;
3534 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003535 case FORMAT_X8B8G8R8I:
3536 return FORMAT_X8B8G8R8I;
3537 case FORMAT_X8B8G8R8UI:
3538 return FORMAT_X8B8G8R8UI;
3539 case FORMAT_X8B8G8R8I_SNORM:
3540 return FORMAT_X8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003541 case FORMAT_B8G8R8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003542 case FORMAT_X8B8G8R8:
3543 return FORMAT_X8B8G8R8;
John Bauman89401822014-05-06 15:04:28 -04003544 // Compressed formats
3545 #if S3TC_SUPPORT
3546 case FORMAT_DXT1:
3547 case FORMAT_DXT3:
3548 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04003549 #endif
Alexis Hetu460e41f2015-09-01 10:58:37 -04003550 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3551 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3552 case FORMAT_RGBA8_ETC2_EAC:
3553 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
3554 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
3555 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
3556 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
3557 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
3558 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
3559 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
3560 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
3561 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
3562 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
3563 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
3564 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
3565 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
3566 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
3567 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
3568 return FORMAT_A8R8G8B8;
3569 case FORMAT_RGBA_ASTC_4x4_KHR:
3570 case FORMAT_RGBA_ASTC_5x4_KHR:
3571 case FORMAT_RGBA_ASTC_5x5_KHR:
3572 case FORMAT_RGBA_ASTC_6x5_KHR:
3573 case FORMAT_RGBA_ASTC_6x6_KHR:
3574 case FORMAT_RGBA_ASTC_8x5_KHR:
3575 case FORMAT_RGBA_ASTC_8x6_KHR:
3576 case FORMAT_RGBA_ASTC_8x8_KHR:
3577 case FORMAT_RGBA_ASTC_10x5_KHR:
3578 case FORMAT_RGBA_ASTC_10x6_KHR:
3579 case FORMAT_RGBA_ASTC_10x8_KHR:
3580 case FORMAT_RGBA_ASTC_10x10_KHR:
3581 case FORMAT_RGBA_ASTC_12x10_KHR:
3582 case FORMAT_RGBA_ASTC_12x12_KHR:
3583 // ASTC supports HDR, so a floating point format is required to represent it properly
3584 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported
John Bauman89401822014-05-06 15:04:28 -04003585 case FORMAT_ATI1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003586 case FORMAT_R11_EAC:
John Bauman89401822014-05-06 15:04:28 -04003587 return FORMAT_R8;
Alexis Hetu0de50d42015-09-09 13:56:41 -04003588 case FORMAT_SIGNED_R11_EAC:
3589 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient
John Bauman89401822014-05-06 15:04:28 -04003590 case FORMAT_ATI2:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003591 case FORMAT_RG11_EAC:
John Bauman89401822014-05-06 15:04:28 -04003592 return FORMAT_G8R8;
Alexis Hetu0de50d42015-09-09 13:56:41 -04003593 case FORMAT_SIGNED_RG11_EAC:
3594 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient
Nicolas Capens22658242014-11-29 00:31:41 -05003595 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003596 case FORMAT_RGB8_ETC2:
3597 case FORMAT_SRGB8_ETC2:
Nicolas Capens22658242014-11-29 00:31:41 -05003598 return FORMAT_X8R8G8B8;
John Bauman89401822014-05-06 15:04:28 -04003599 // Bumpmap formats
3600 case FORMAT_V8U8: return FORMAT_V8U8;
3601 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8;
3602 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8;
3603 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8;
3604 case FORMAT_V16U16: return FORMAT_V16U16;
3605 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16;
3606 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16;
3607 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04003608 case FORMAT_A16F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003609 case FORMAT_R16F: return FORMAT_R32F;
3610 case FORMAT_G16R16F: return FORMAT_G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003611 case FORMAT_B16G16R16F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003612 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003613 case FORMAT_A32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003614 case FORMAT_R32F: return FORMAT_R32F;
3615 case FORMAT_G32R32F: return FORMAT_G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003616 case FORMAT_B32G32R32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003617 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F;
3618 // Luminance formats
3619 case FORMAT_L8: return FORMAT_L8;
3620 case FORMAT_A4L4: return FORMAT_A8L8;
3621 case FORMAT_L16: return FORMAT_L16;
3622 case FORMAT_A8L8: return FORMAT_A8L8;
Nicolas Capens80594422015-06-09 16:42:56 -04003623 case FORMAT_L16F: return FORMAT_A32B32G32R32F;
3624 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F;
3625 case FORMAT_L32F: return FORMAT_A32B32G32R32F;
3626 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003627 // Depth/stencil formats
3628 case FORMAT_D16:
3629 case FORMAT_D32:
3630 case FORMAT_D24X8:
3631 case FORMAT_D24S8:
3632 case FORMAT_D24FS8:
3633 if(hasParent) // Texture
3634 {
John Bauman66b8ab22014-05-06 15:57:45 -04003635 return FORMAT_D32FS8_SHADOW;
John Bauman89401822014-05-06 15:04:28 -04003636 }
3637 else if(complementaryDepthBuffer)
3638 {
3639 return FORMAT_D32F_COMPLEMENTARY;
3640 }
3641 else
3642 {
3643 return FORMAT_D32F;
3644 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003645 case FORMAT_D32F: return FORMAT_D32F;
John Bauman66b8ab22014-05-06 15:57:45 -04003646 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE;
3647 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE;
3648 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE;
3649 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW;
3650 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003651 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601;
3652 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709;
3653 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF;
John Bauman89401822014-05-06 15:04:28 -04003654 default:
3655 ASSERT(false);
3656 }
3657
3658 return FORMAT_NULL;
3659 }
3660
3661 void Surface::setTexturePalette(unsigned int *palette)
3662 {
3663 Surface::palette = palette;
3664 Surface::paletteID++;
3665 }
3666
3667 void Surface::resolve()
3668 {
3669 if(internal.depth <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL)
3670 {
3671 return;
3672 }
3673
3674 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE);
3675
3676 int quality = internal.depth;
3677 int width = internal.width;
3678 int height = internal.height;
3679 int pitch = internal.pitchB;
3680 int slice = internal.sliceB;
3681
3682 unsigned char *source0 = (unsigned char*)source;
3683 unsigned char *source1 = source0 + slice;
3684 unsigned char *source2 = source1 + slice;
3685 unsigned char *source3 = source2 + slice;
3686 unsigned char *source4 = source3 + slice;
3687 unsigned char *source5 = source4 + slice;
3688 unsigned char *source6 = source5 + slice;
3689 unsigned char *source7 = source6 + slice;
3690 unsigned char *source8 = source7 + slice;
3691 unsigned char *source9 = source8 + slice;
3692 unsigned char *sourceA = source9 + slice;
3693 unsigned char *sourceB = sourceA + slice;
3694 unsigned char *sourceC = sourceB + slice;
3695 unsigned char *sourceD = sourceC + slice;
3696 unsigned char *sourceE = sourceD + slice;
3697 unsigned char *sourceF = sourceE + slice;
3698
Nicolas Capensef77ac12015-03-28 21:48:51 -04003699 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 || internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8)
John Bauman89401822014-05-06 15:04:28 -04003700 {
3701 if(CPUID::supportsSSE2() && (width % 4) == 0)
3702 {
3703 if(internal.depth == 2)
3704 {
3705 for(int y = 0; y < height; y++)
3706 {
3707 for(int x = 0; x < width; x += 4)
3708 {
3709 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3710 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3711
3712 c0 = _mm_avg_epu8(c0, c1);
3713
3714 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3715 }
3716
3717 source0 += pitch;
3718 source1 += pitch;
3719 }
3720 }
3721 else if(internal.depth == 4)
3722 {
3723 for(int y = 0; y < height; y++)
3724 {
3725 for(int x = 0; x < width; x += 4)
3726 {
3727 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3728 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3729 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3730 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3731
3732 c0 = _mm_avg_epu8(c0, c1);
3733 c2 = _mm_avg_epu8(c2, c3);
3734 c0 = _mm_avg_epu8(c0, c2);
3735
3736 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3737 }
3738
3739 source0 += pitch;
3740 source1 += pitch;
3741 source2 += pitch;
3742 source3 += pitch;
3743 }
3744 }
3745 else if(internal.depth == 8)
3746 {
3747 for(int y = 0; y < height; y++)
3748 {
3749 for(int x = 0; x < width; x += 4)
3750 {
3751 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3752 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3753 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3754 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3755 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
3756 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
3757 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
3758 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
3759
3760 c0 = _mm_avg_epu8(c0, c1);
3761 c2 = _mm_avg_epu8(c2, c3);
3762 c4 = _mm_avg_epu8(c4, c5);
3763 c6 = _mm_avg_epu8(c6, c7);
3764 c0 = _mm_avg_epu8(c0, c2);
3765 c4 = _mm_avg_epu8(c4, c6);
3766 c0 = _mm_avg_epu8(c0, c4);
3767
3768 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3769 }
3770
3771 source0 += pitch;
3772 source1 += pitch;
3773 source2 += pitch;
3774 source3 += pitch;
3775 source4 += pitch;
3776 source5 += pitch;
3777 source6 += pitch;
3778 source7 += pitch;
3779 }
3780 }
3781 else if(internal.depth == 16)
3782 {
3783 for(int y = 0; y < height; y++)
3784 {
3785 for(int x = 0; x < width; x += 4)
3786 {
3787 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3788 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3789 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3790 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3791 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
3792 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
3793 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
3794 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
3795 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
3796 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
3797 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
3798 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
3799 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
3800 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
3801 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
3802 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
3803
3804 c0 = _mm_avg_epu8(c0, c1);
3805 c2 = _mm_avg_epu8(c2, c3);
3806 c4 = _mm_avg_epu8(c4, c5);
3807 c6 = _mm_avg_epu8(c6, c7);
3808 c8 = _mm_avg_epu8(c8, c9);
3809 cA = _mm_avg_epu8(cA, cB);
3810 cC = _mm_avg_epu8(cC, cD);
3811 cE = _mm_avg_epu8(cE, cF);
3812 c0 = _mm_avg_epu8(c0, c2);
3813 c4 = _mm_avg_epu8(c4, c6);
3814 c8 = _mm_avg_epu8(c8, cA);
3815 cC = _mm_avg_epu8(cC, cE);
3816 c0 = _mm_avg_epu8(c0, c4);
3817 c8 = _mm_avg_epu8(c8, cC);
3818 c0 = _mm_avg_epu8(c0, c8);
3819
3820 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3821 }
3822
3823 source0 += pitch;
3824 source1 += pitch;
3825 source2 += pitch;
3826 source3 += pitch;
3827 source4 += pitch;
3828 source5 += pitch;
3829 source6 += pitch;
3830 source7 += pitch;
3831 source8 += pitch;
3832 source9 += pitch;
3833 sourceA += pitch;
3834 sourceB += pitch;
3835 sourceC += pitch;
3836 sourceD += pitch;
3837 sourceE += pitch;
3838 sourceF += pitch;
3839 }
3840 }
3841 else ASSERT(false);
3842 }
3843 else
3844 {
3845 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101))
3846
3847 if(internal.depth == 2)
3848 {
3849 for(int y = 0; y < height; y++)
3850 {
3851 for(int x = 0; x < width; x++)
3852 {
3853 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3854 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3855
3856 c0 = AVERAGE(c0, c1);
3857
3858 *(unsigned int*)(source0 + 4 * x) = c0;
3859 }
3860
3861 source0 += pitch;
3862 source1 += pitch;
3863 }
3864 }
3865 else if(internal.depth == 4)
3866 {
3867 for(int y = 0; y < height; y++)
3868 {
3869 for(int x = 0; x < width; x++)
3870 {
3871 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3872 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3873 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3874 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3875
3876 c0 = AVERAGE(c0, c1);
3877 c2 = AVERAGE(c2, c3);
3878 c0 = AVERAGE(c0, c2);
3879
3880 *(unsigned int*)(source0 + 4 * x) = c0;
3881 }
3882
3883 source0 += pitch;
3884 source1 += pitch;
3885 source2 += pitch;
3886 source3 += pitch;
3887 }
3888 }
3889 else if(internal.depth == 8)
3890 {
3891 for(int y = 0; y < height; y++)
3892 {
3893 for(int x = 0; x < width; x++)
3894 {
3895 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3896 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3897 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3898 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3899 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
3900 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
3901 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
3902 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
3903
3904 c0 = AVERAGE(c0, c1);
3905 c2 = AVERAGE(c2, c3);
3906 c4 = AVERAGE(c4, c5);
3907 c6 = AVERAGE(c6, c7);
3908 c0 = AVERAGE(c0, c2);
3909 c4 = AVERAGE(c4, c6);
3910 c0 = AVERAGE(c0, c4);
3911
3912 *(unsigned int*)(source0 + 4 * x) = c0;
3913 }
3914
3915 source0 += pitch;
3916 source1 += pitch;
3917 source2 += pitch;
3918 source3 += pitch;
3919 source4 += pitch;
3920 source5 += pitch;
3921 source6 += pitch;
3922 source7 += pitch;
3923 }
3924 }
3925 else if(internal.depth == 16)
3926 {
3927 for(int y = 0; y < height; y++)
3928 {
3929 for(int x = 0; x < width; x++)
3930 {
3931 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3932 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3933 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3934 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3935 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
3936 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
3937 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
3938 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
3939 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
3940 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
3941 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
3942 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
3943 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
3944 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
3945 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
3946 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
3947
3948 c0 = AVERAGE(c0, c1);
3949 c2 = AVERAGE(c2, c3);
3950 c4 = AVERAGE(c4, c5);
3951 c6 = AVERAGE(c6, c7);
3952 c8 = AVERAGE(c8, c9);
3953 cA = AVERAGE(cA, cB);
3954 cC = AVERAGE(cC, cD);
3955 cE = AVERAGE(cE, cF);
3956 c0 = AVERAGE(c0, c2);
3957 c4 = AVERAGE(c4, c6);
3958 c8 = AVERAGE(c8, cA);
3959 cC = AVERAGE(cC, cE);
3960 c0 = AVERAGE(c0, c4);
3961 c8 = AVERAGE(c8, cC);
3962 c0 = AVERAGE(c0, c8);
3963
3964 *(unsigned int*)(source0 + 4 * x) = c0;
3965 }
3966
3967 source0 += pitch;
3968 source1 += pitch;
3969 source2 += pitch;
3970 source3 += pitch;
3971 source4 += pitch;
3972 source5 += pitch;
3973 source6 += pitch;
3974 source7 += pitch;
3975 source8 += pitch;
3976 source9 += pitch;
3977 sourceA += pitch;
3978 sourceB += pitch;
3979 sourceC += pitch;
3980 sourceD += pitch;
3981 sourceE += pitch;
3982 sourceF += pitch;
3983 }
3984 }
3985 else ASSERT(false);
3986
3987 #undef AVERAGE
3988 }
3989 }
3990 else if(internal.format == FORMAT_G16R16)
3991 {
3992 if(CPUID::supportsSSE2() && (width % 4) == 0)
3993 {
3994 if(internal.depth == 2)
3995 {
3996 for(int y = 0; y < height; y++)
3997 {
3998 for(int x = 0; x < width; x += 4)
3999 {
4000 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4001 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4002
4003 c0 = _mm_avg_epu16(c0, c1);
4004
4005 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4006 }
4007
4008 source0 += pitch;
4009 source1 += pitch;
4010 }
4011 }
4012 else if(internal.depth == 4)
4013 {
4014 for(int y = 0; y < height; y++)
4015 {
4016 for(int x = 0; x < width; x += 4)
4017 {
4018 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4019 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4020 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4021 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4022
4023 c0 = _mm_avg_epu16(c0, c1);
4024 c2 = _mm_avg_epu16(c2, c3);
4025 c0 = _mm_avg_epu16(c0, c2);
4026
4027 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4028 }
4029
4030 source0 += pitch;
4031 source1 += pitch;
4032 source2 += pitch;
4033 source3 += pitch;
4034 }
4035 }
4036 else if(internal.depth == 8)
4037 {
4038 for(int y = 0; y < height; y++)
4039 {
4040 for(int x = 0; x < width; x += 4)
4041 {
4042 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4043 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4044 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4045 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4046 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4047 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4048 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4049 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4050
4051 c0 = _mm_avg_epu16(c0, c1);
4052 c2 = _mm_avg_epu16(c2, c3);
4053 c4 = _mm_avg_epu16(c4, c5);
4054 c6 = _mm_avg_epu16(c6, c7);
4055 c0 = _mm_avg_epu16(c0, c2);
4056 c4 = _mm_avg_epu16(c4, c6);
4057 c0 = _mm_avg_epu16(c0, c4);
4058
4059 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4060 }
4061
4062 source0 += pitch;
4063 source1 += pitch;
4064 source2 += pitch;
4065 source3 += pitch;
4066 source4 += pitch;
4067 source5 += pitch;
4068 source6 += pitch;
4069 source7 += pitch;
4070 }
4071 }
4072 else if(internal.depth == 16)
4073 {
4074 for(int y = 0; y < height; y++)
4075 {
4076 for(int x = 0; x < width; x += 4)
4077 {
4078 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4079 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4080 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4081 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4082 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4083 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4084 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4085 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4086 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
4087 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
4088 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
4089 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
4090 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
4091 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
4092 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
4093 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
4094
4095 c0 = _mm_avg_epu16(c0, c1);
4096 c2 = _mm_avg_epu16(c2, c3);
4097 c4 = _mm_avg_epu16(c4, c5);
4098 c6 = _mm_avg_epu16(c6, c7);
4099 c8 = _mm_avg_epu16(c8, c9);
4100 cA = _mm_avg_epu16(cA, cB);
4101 cC = _mm_avg_epu16(cC, cD);
4102 cE = _mm_avg_epu16(cE, cF);
4103 c0 = _mm_avg_epu16(c0, c2);
4104 c4 = _mm_avg_epu16(c4, c6);
4105 c8 = _mm_avg_epu16(c8, cA);
4106 cC = _mm_avg_epu16(cC, cE);
4107 c0 = _mm_avg_epu16(c0, c4);
4108 c8 = _mm_avg_epu16(c8, cC);
4109 c0 = _mm_avg_epu16(c0, c8);
4110
4111 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4112 }
4113
4114 source0 += pitch;
4115 source1 += pitch;
4116 source2 += pitch;
4117 source3 += pitch;
4118 source4 += pitch;
4119 source5 += pitch;
4120 source6 += pitch;
4121 source7 += pitch;
4122 source8 += pitch;
4123 source9 += pitch;
4124 sourceA += pitch;
4125 sourceB += pitch;
4126 sourceC += pitch;
4127 sourceD += pitch;
4128 sourceE += pitch;
4129 sourceF += pitch;
4130 }
4131 }
4132 else ASSERT(false);
4133 }
4134 else
4135 {
4136 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4137
4138 if(internal.depth == 2)
4139 {
4140 for(int y = 0; y < height; y++)
4141 {
4142 for(int x = 0; x < width; x++)
4143 {
4144 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4145 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4146
4147 c0 = AVERAGE(c0, c1);
4148
4149 *(unsigned int*)(source0 + 4 * x) = c0;
4150 }
4151
4152 source0 += pitch;
4153 source1 += pitch;
4154 }
4155 }
4156 else if(internal.depth == 4)
4157 {
4158 for(int y = 0; y < height; y++)
4159 {
4160 for(int x = 0; x < width; x++)
4161 {
4162 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4163 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4164 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4165 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4166
4167 c0 = AVERAGE(c0, c1);
4168 c2 = AVERAGE(c2, c3);
4169 c0 = AVERAGE(c0, c2);
4170
4171 *(unsigned int*)(source0 + 4 * x) = c0;
4172 }
4173
4174 source0 += pitch;
4175 source1 += pitch;
4176 source2 += pitch;
4177 source3 += pitch;
4178 }
4179 }
4180 else if(internal.depth == 8)
4181 {
4182 for(int y = 0; y < height; y++)
4183 {
4184 for(int x = 0; x < width; x++)
4185 {
4186 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4187 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4188 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4189 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4190 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4191 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4192 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4193 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4194
4195 c0 = AVERAGE(c0, c1);
4196 c2 = AVERAGE(c2, c3);
4197 c4 = AVERAGE(c4, c5);
4198 c6 = AVERAGE(c6, c7);
4199 c0 = AVERAGE(c0, c2);
4200 c4 = AVERAGE(c4, c6);
4201 c0 = AVERAGE(c0, c4);
4202
4203 *(unsigned int*)(source0 + 4 * x) = c0;
4204 }
4205
4206 source0 += pitch;
4207 source1 += pitch;
4208 source2 += pitch;
4209 source3 += pitch;
4210 source4 += pitch;
4211 source5 += pitch;
4212 source6 += pitch;
4213 source7 += pitch;
4214 }
4215 }
4216 else if(internal.depth == 16)
4217 {
4218 for(int y = 0; y < height; y++)
4219 {
4220 for(int x = 0; x < width; x++)
4221 {
4222 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4223 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4224 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4225 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4226 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4227 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4228 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4229 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4230 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4231 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4232 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4233 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4234 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4235 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4236 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4237 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4238
4239 c0 = AVERAGE(c0, c1);
4240 c2 = AVERAGE(c2, c3);
4241 c4 = AVERAGE(c4, c5);
4242 c6 = AVERAGE(c6, c7);
4243 c8 = AVERAGE(c8, c9);
4244 cA = AVERAGE(cA, cB);
4245 cC = AVERAGE(cC, cD);
4246 cE = AVERAGE(cE, cF);
4247 c0 = AVERAGE(c0, c2);
4248 c4 = AVERAGE(c4, c6);
4249 c8 = AVERAGE(c8, cA);
4250 cC = AVERAGE(cC, cE);
4251 c0 = AVERAGE(c0, c4);
4252 c8 = AVERAGE(c8, cC);
4253 c0 = AVERAGE(c0, c8);
4254
4255 *(unsigned int*)(source0 + 4 * x) = c0;
4256 }
4257
4258 source0 += pitch;
4259 source1 += pitch;
4260 source2 += pitch;
4261 source3 += pitch;
4262 source4 += pitch;
4263 source5 += pitch;
4264 source6 += pitch;
4265 source7 += pitch;
4266 source8 += pitch;
4267 source9 += pitch;
4268 sourceA += pitch;
4269 sourceB += pitch;
4270 sourceC += pitch;
4271 sourceD += pitch;
4272 sourceE += pitch;
4273 sourceF += pitch;
4274 }
4275 }
4276 else ASSERT(false);
4277
4278 #undef AVERAGE
4279 }
4280 }
4281 else if(internal.format == FORMAT_A16B16G16R16)
4282 {
4283 if(CPUID::supportsSSE2() && (width % 2) == 0)
4284 {
4285 if(internal.depth == 2)
4286 {
4287 for(int y = 0; y < height; y++)
4288 {
4289 for(int x = 0; x < width; x += 2)
4290 {
4291 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4292 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4293
4294 c0 = _mm_avg_epu16(c0, c1);
4295
4296 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4297 }
4298
4299 source0 += pitch;
4300 source1 += pitch;
4301 }
4302 }
4303 else if(internal.depth == 4)
4304 {
4305 for(int y = 0; y < height; y++)
4306 {
4307 for(int x = 0; x < width; x += 2)
4308 {
4309 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4310 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4311 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4312 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4313
4314 c0 = _mm_avg_epu16(c0, c1);
4315 c2 = _mm_avg_epu16(c2, c3);
4316 c0 = _mm_avg_epu16(c0, c2);
4317
4318 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4319 }
4320
4321 source0 += pitch;
4322 source1 += pitch;
4323 source2 += pitch;
4324 source3 += pitch;
4325 }
4326 }
4327 else if(internal.depth == 8)
4328 {
4329 for(int y = 0; y < height; y++)
4330 {
4331 for(int x = 0; x < width; x += 2)
4332 {
4333 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4334 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4335 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4336 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4337 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4338 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4339 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4340 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
4341
4342 c0 = _mm_avg_epu16(c0, c1);
4343 c2 = _mm_avg_epu16(c2, c3);
4344 c4 = _mm_avg_epu16(c4, c5);
4345 c6 = _mm_avg_epu16(c6, c7);
4346 c0 = _mm_avg_epu16(c0, c2);
4347 c4 = _mm_avg_epu16(c4, c6);
4348 c0 = _mm_avg_epu16(c0, c4);
4349
4350 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4351 }
4352
4353 source0 += pitch;
4354 source1 += pitch;
4355 source2 += pitch;
4356 source3 += pitch;
4357 source4 += pitch;
4358 source5 += pitch;
4359 source6 += pitch;
4360 source7 += pitch;
4361 }
4362 }
4363 else if(internal.depth == 16)
4364 {
4365 for(int y = 0; y < height; y++)
4366 {
4367 for(int x = 0; x < width; x += 2)
4368 {
4369 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4370 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4371 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4372 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4373 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4374 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4375 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4376 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
4377 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x));
4378 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x));
4379 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x));
4380 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x));
4381 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x));
4382 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x));
4383 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x));
4384 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x));
4385
4386 c0 = _mm_avg_epu16(c0, c1);
4387 c2 = _mm_avg_epu16(c2, c3);
4388 c4 = _mm_avg_epu16(c4, c5);
4389 c6 = _mm_avg_epu16(c6, c7);
4390 c8 = _mm_avg_epu16(c8, c9);
4391 cA = _mm_avg_epu16(cA, cB);
4392 cC = _mm_avg_epu16(cC, cD);
4393 cE = _mm_avg_epu16(cE, cF);
4394 c0 = _mm_avg_epu16(c0, c2);
4395 c4 = _mm_avg_epu16(c4, c6);
4396 c8 = _mm_avg_epu16(c8, cA);
4397 cC = _mm_avg_epu16(cC, cE);
4398 c0 = _mm_avg_epu16(c0, c4);
4399 c8 = _mm_avg_epu16(c8, cC);
4400 c0 = _mm_avg_epu16(c0, c8);
4401
4402 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4403 }
4404
4405 source0 += pitch;
4406 source1 += pitch;
4407 source2 += pitch;
4408 source3 += pitch;
4409 source4 += pitch;
4410 source5 += pitch;
4411 source6 += pitch;
4412 source7 += pitch;
4413 source8 += pitch;
4414 source9 += pitch;
4415 sourceA += pitch;
4416 sourceB += pitch;
4417 sourceC += pitch;
4418 sourceD += pitch;
4419 sourceE += pitch;
4420 sourceF += pitch;
4421 }
4422 }
4423 else ASSERT(false);
4424 }
4425 else
4426 {
4427 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4428
4429 if(internal.depth == 2)
4430 {
4431 for(int y = 0; y < height; y++)
4432 {
4433 for(int x = 0; x < 2 * width; x++)
4434 {
4435 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4436 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4437
4438 c0 = AVERAGE(c0, c1);
4439
4440 *(unsigned int*)(source0 + 4 * x) = c0;
4441 }
4442
4443 source0 += pitch;
4444 source1 += pitch;
4445 }
4446 }
4447 else if(internal.depth == 4)
4448 {
4449 for(int y = 0; y < height; y++)
4450 {
4451 for(int x = 0; x < 2 * width; x++)
4452 {
4453 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4454 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4455 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4456 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4457
4458 c0 = AVERAGE(c0, c1);
4459 c2 = AVERAGE(c2, c3);
4460 c0 = AVERAGE(c0, c2);
4461
4462 *(unsigned int*)(source0 + 4 * x) = c0;
4463 }
4464
4465 source0 += pitch;
4466 source1 += pitch;
4467 source2 += pitch;
4468 source3 += pitch;
4469 }
4470 }
4471 else if(internal.depth == 8)
4472 {
4473 for(int y = 0; y < height; y++)
4474 {
4475 for(int x = 0; x < 2 * width; x++)
4476 {
4477 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4478 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4479 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4480 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4481 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4482 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4483 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4484 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4485
4486 c0 = AVERAGE(c0, c1);
4487 c2 = AVERAGE(c2, c3);
4488 c4 = AVERAGE(c4, c5);
4489 c6 = AVERAGE(c6, c7);
4490 c0 = AVERAGE(c0, c2);
4491 c4 = AVERAGE(c4, c6);
4492 c0 = AVERAGE(c0, c4);
4493
4494 *(unsigned int*)(source0 + 4 * x) = c0;
4495 }
4496
4497 source0 += pitch;
4498 source1 += pitch;
4499 source2 += pitch;
4500 source3 += pitch;
4501 source4 += pitch;
4502 source5 += pitch;
4503 source6 += pitch;
4504 source7 += pitch;
4505 }
4506 }
4507 else if(internal.depth == 16)
4508 {
4509 for(int y = 0; y < height; y++)
4510 {
4511 for(int x = 0; x < 2 * width; x++)
4512 {
4513 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4514 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4515 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4516 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4517 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4518 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4519 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4520 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4521 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4522 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4523 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4524 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4525 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4526 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4527 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4528 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4529
4530 c0 = AVERAGE(c0, c1);
4531 c2 = AVERAGE(c2, c3);
4532 c4 = AVERAGE(c4, c5);
4533 c6 = AVERAGE(c6, c7);
4534 c8 = AVERAGE(c8, c9);
4535 cA = AVERAGE(cA, cB);
4536 cC = AVERAGE(cC, cD);
4537 cE = AVERAGE(cE, cF);
4538 c0 = AVERAGE(c0, c2);
4539 c4 = AVERAGE(c4, c6);
4540 c8 = AVERAGE(c8, cA);
4541 cC = AVERAGE(cC, cE);
4542 c0 = AVERAGE(c0, c4);
4543 c8 = AVERAGE(c8, cC);
4544 c0 = AVERAGE(c0, c8);
4545
4546 *(unsigned int*)(source0 + 4 * x) = c0;
4547 }
4548
4549 source0 += pitch;
4550 source1 += pitch;
4551 source2 += pitch;
4552 source3 += pitch;
4553 source4 += pitch;
4554 source5 += pitch;
4555 source6 += pitch;
4556 source7 += pitch;
4557 source8 += pitch;
4558 source9 += pitch;
4559 sourceA += pitch;
4560 sourceB += pitch;
4561 sourceC += pitch;
4562 sourceD += pitch;
4563 sourceE += pitch;
4564 sourceF += pitch;
4565 }
4566 }
4567 else ASSERT(false);
4568
4569 #undef AVERAGE
4570 }
4571 }
4572 else if(internal.format == FORMAT_R32F)
4573 {
4574 if(CPUID::supportsSSE() && (width % 4) == 0)
4575 {
4576 if(internal.depth == 2)
4577 {
4578 for(int y = 0; y < height; y++)
4579 {
4580 for(int x = 0; x < width; x += 4)
4581 {
4582 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4583 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4584
4585 c0 = _mm_add_ps(c0, c1);
4586 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
4587
4588 _mm_store_ps((float*)(source0 + 4 * x), c0);
4589 }
4590
4591 source0 += pitch;
4592 source1 += pitch;
4593 }
4594 }
4595 else if(internal.depth == 4)
4596 {
4597 for(int y = 0; y < height; y++)
4598 {
4599 for(int x = 0; x < width; x += 4)
4600 {
4601 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4602 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4603 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4604 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4605
4606 c0 = _mm_add_ps(c0, c1);
4607 c2 = _mm_add_ps(c2, c3);
4608 c0 = _mm_add_ps(c0, c2);
4609 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
4610
4611 _mm_store_ps((float*)(source0 + 4 * x), c0);
4612 }
4613
4614 source0 += pitch;
4615 source1 += pitch;
4616 source2 += pitch;
4617 source3 += pitch;
4618 }
4619 }
4620 else if(internal.depth == 8)
4621 {
4622 for(int y = 0; y < height; y++)
4623 {
4624 for(int x = 0; x < width; x += 4)
4625 {
4626 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4627 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4628 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4629 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4630 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4631 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4632 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4633 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
4634
4635 c0 = _mm_add_ps(c0, c1);
4636 c2 = _mm_add_ps(c2, c3);
4637 c4 = _mm_add_ps(c4, c5);
4638 c6 = _mm_add_ps(c6, c7);
4639 c0 = _mm_add_ps(c0, c2);
4640 c4 = _mm_add_ps(c4, c6);
4641 c0 = _mm_add_ps(c0, c4);
4642 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
4643
4644 _mm_store_ps((float*)(source0 + 4 * x), c0);
4645 }
4646
4647 source0 += pitch;
4648 source1 += pitch;
4649 source2 += pitch;
4650 source3 += pitch;
4651 source4 += pitch;
4652 source5 += pitch;
4653 source6 += pitch;
4654 source7 += pitch;
4655 }
4656 }
4657 else if(internal.depth == 16)
4658 {
4659 for(int y = 0; y < height; y++)
4660 {
4661 for(int x = 0; x < width; x += 4)
4662 {
4663 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4664 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4665 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4666 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4667 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4668 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4669 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4670 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
4671 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x));
4672 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x));
4673 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x));
4674 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x));
4675 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x));
4676 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x));
4677 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x));
4678 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x));
4679
4680 c0 = _mm_add_ps(c0, c1);
4681 c2 = _mm_add_ps(c2, c3);
4682 c4 = _mm_add_ps(c4, c5);
4683 c6 = _mm_add_ps(c6, c7);
4684 c8 = _mm_add_ps(c8, c9);
4685 cA = _mm_add_ps(cA, cB);
4686 cC = _mm_add_ps(cC, cD);
4687 cE = _mm_add_ps(cE, cF);
4688 c0 = _mm_add_ps(c0, c2);
4689 c4 = _mm_add_ps(c4, c6);
4690 c8 = _mm_add_ps(c8, cA);
4691 cC = _mm_add_ps(cC, cE);
4692 c0 = _mm_add_ps(c0, c4);
4693 c8 = _mm_add_ps(c8, cC);
4694 c0 = _mm_add_ps(c0, c8);
4695 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
4696
4697 _mm_store_ps((float*)(source0 + 4 * x), c0);
4698 }
4699
4700 source0 += pitch;
4701 source1 += pitch;
4702 source2 += pitch;
4703 source3 += pitch;
4704 source4 += pitch;
4705 source5 += pitch;
4706 source6 += pitch;
4707 source7 += pitch;
4708 source8 += pitch;
4709 source9 += pitch;
4710 sourceA += pitch;
4711 sourceB += pitch;
4712 sourceC += pitch;
4713 sourceD += pitch;
4714 sourceE += pitch;
4715 sourceF += pitch;
4716 }
4717 }
4718 else ASSERT(false);
4719 }
4720 else
4721 {
4722 if(internal.depth == 2)
4723 {
4724 for(int y = 0; y < height; y++)
4725 {
4726 for(int x = 0; x < width; x++)
4727 {
4728 float c0 = *(float*)(source0 + 4 * x);
4729 float c1 = *(float*)(source1 + 4 * x);
4730
4731 c0 = c0 + c1;
4732 c0 *= 1.0f / 2.0f;
4733
4734 *(float*)(source0 + 4 * x) = c0;
4735 }
4736
4737 source0 += pitch;
4738 source1 += pitch;
4739 }
4740 }
4741 else if(internal.depth == 4)
4742 {
4743 for(int y = 0; y < height; y++)
4744 {
4745 for(int x = 0; x < width; x++)
4746 {
4747 float c0 = *(float*)(source0 + 4 * x);
4748 float c1 = *(float*)(source1 + 4 * x);
4749 float c2 = *(float*)(source2 + 4 * x);
4750 float c3 = *(float*)(source3 + 4 * x);
4751
4752 c0 = c0 + c1;
4753 c2 = c2 + c3;
4754 c0 = c0 + c2;
4755 c0 *= 1.0f / 4.0f;
4756
4757 *(float*)(source0 + 4 * x) = c0;
4758 }
4759
4760 source0 += pitch;
4761 source1 += pitch;
4762 source2 += pitch;
4763 source3 += pitch;
4764 }
4765 }
4766 else if(internal.depth == 8)
4767 {
4768 for(int y = 0; y < height; y++)
4769 {
4770 for(int x = 0; x < width; x++)
4771 {
4772 float c0 = *(float*)(source0 + 4 * x);
4773 float c1 = *(float*)(source1 + 4 * x);
4774 float c2 = *(float*)(source2 + 4 * x);
4775 float c3 = *(float*)(source3 + 4 * x);
4776 float c4 = *(float*)(source4 + 4 * x);
4777 float c5 = *(float*)(source5 + 4 * x);
4778 float c6 = *(float*)(source6 + 4 * x);
4779 float c7 = *(float*)(source7 + 4 * x);
4780
4781 c0 = c0 + c1;
4782 c2 = c2 + c3;
4783 c4 = c4 + c5;
4784 c6 = c6 + c7;
4785 c0 = c0 + c2;
4786 c4 = c4 + c6;
4787 c0 = c0 + c4;
4788 c0 *= 1.0f / 8.0f;
4789
4790 *(float*)(source0 + 4 * x) = c0;
4791 }
4792
4793 source0 += pitch;
4794 source1 += pitch;
4795 source2 += pitch;
4796 source3 += pitch;
4797 source4 += pitch;
4798 source5 += pitch;
4799 source6 += pitch;
4800 source7 += pitch;
4801 }
4802 }
4803 else if(internal.depth == 16)
4804 {
4805 for(int y = 0; y < height; y++)
4806 {
4807 for(int x = 0; x < width; x++)
4808 {
4809 float c0 = *(float*)(source0 + 4 * x);
4810 float c1 = *(float*)(source1 + 4 * x);
4811 float c2 = *(float*)(source2 + 4 * x);
4812 float c3 = *(float*)(source3 + 4 * x);
4813 float c4 = *(float*)(source4 + 4 * x);
4814 float c5 = *(float*)(source5 + 4 * x);
4815 float c6 = *(float*)(source6 + 4 * x);
4816 float c7 = *(float*)(source7 + 4 * x);
4817 float c8 = *(float*)(source8 + 4 * x);
4818 float c9 = *(float*)(source9 + 4 * x);
4819 float cA = *(float*)(sourceA + 4 * x);
4820 float cB = *(float*)(sourceB + 4 * x);
4821 float cC = *(float*)(sourceC + 4 * x);
4822 float cD = *(float*)(sourceD + 4 * x);
4823 float cE = *(float*)(sourceE + 4 * x);
4824 float cF = *(float*)(sourceF + 4 * x);
4825
4826 c0 = c0 + c1;
4827 c2 = c2 + c3;
4828 c4 = c4 + c5;
4829 c6 = c6 + c7;
4830 c8 = c8 + c9;
4831 cA = cA + cB;
4832 cC = cC + cD;
4833 cE = cE + cF;
4834 c0 = c0 + c2;
4835 c4 = c4 + c6;
4836 c8 = c8 + cA;
4837 cC = cC + cE;
4838 c0 = c0 + c4;
4839 c8 = c8 + cC;
4840 c0 = c0 + c8;
4841 c0 *= 1.0f / 16.0f;
4842
4843 *(float*)(source0 + 4 * x) = c0;
4844 }
4845
4846 source0 += pitch;
4847 source1 += pitch;
4848 source2 += pitch;
4849 source3 += pitch;
4850 source4 += pitch;
4851 source5 += pitch;
4852 source6 += pitch;
4853 source7 += pitch;
4854 source8 += pitch;
4855 source9 += pitch;
4856 sourceA += pitch;
4857 sourceB += pitch;
4858 sourceC += pitch;
4859 sourceD += pitch;
4860 sourceE += pitch;
4861 sourceF += pitch;
4862 }
4863 }
4864 else ASSERT(false);
4865 }
4866 }
4867 else if(internal.format == FORMAT_G32R32F)
4868 {
4869 if(CPUID::supportsSSE() && (width % 2) == 0)
4870 {
4871 if(internal.depth == 2)
4872 {
4873 for(int y = 0; y < height; y++)
4874 {
4875 for(int x = 0; x < width; x += 2)
4876 {
4877 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4878 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
4879
4880 c0 = _mm_add_ps(c0, c1);
4881 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
4882
4883 _mm_store_ps((float*)(source0 + 8 * x), c0);
4884 }
4885
4886 source0 += pitch;
4887 source1 += pitch;
4888 }
4889 }
4890 else if(internal.depth == 4)
4891 {
4892 for(int y = 0; y < height; y++)
4893 {
4894 for(int x = 0; x < width; x += 2)
4895 {
4896 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4897 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
4898 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
4899 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
4900
4901 c0 = _mm_add_ps(c0, c1);
4902 c2 = _mm_add_ps(c2, c3);
4903 c0 = _mm_add_ps(c0, c2);
4904 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
4905
4906 _mm_store_ps((float*)(source0 + 8 * x), c0);
4907 }
4908
4909 source0 += pitch;
4910 source1 += pitch;
4911 source2 += pitch;
4912 source3 += pitch;
4913 }
4914 }
4915 else if(internal.depth == 8)
4916 {
4917 for(int y = 0; y < height; y++)
4918 {
4919 for(int x = 0; x < width; x += 2)
4920 {
4921 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4922 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
4923 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
4924 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
4925 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
4926 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
4927 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
4928 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
4929
4930 c0 = _mm_add_ps(c0, c1);
4931 c2 = _mm_add_ps(c2, c3);
4932 c4 = _mm_add_ps(c4, c5);
4933 c6 = _mm_add_ps(c6, c7);
4934 c0 = _mm_add_ps(c0, c2);
4935 c4 = _mm_add_ps(c4, c6);
4936 c0 = _mm_add_ps(c0, c4);
4937 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
4938
4939 _mm_store_ps((float*)(source0 + 8 * x), c0);
4940 }
4941
4942 source0 += pitch;
4943 source1 += pitch;
4944 source2 += pitch;
4945 source3 += pitch;
4946 source4 += pitch;
4947 source5 += pitch;
4948 source6 += pitch;
4949 source7 += pitch;
4950 }
4951 }
4952 else if(internal.depth == 16)
4953 {
4954 for(int y = 0; y < height; y++)
4955 {
4956 for(int x = 0; x < width; x += 2)
4957 {
4958 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4959 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
4960 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
4961 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
4962 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
4963 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
4964 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
4965 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
4966 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x));
4967 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x));
4968 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x));
4969 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x));
4970 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x));
4971 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x));
4972 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x));
4973 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x));
4974
4975 c0 = _mm_add_ps(c0, c1);
4976 c2 = _mm_add_ps(c2, c3);
4977 c4 = _mm_add_ps(c4, c5);
4978 c6 = _mm_add_ps(c6, c7);
4979 c8 = _mm_add_ps(c8, c9);
4980 cA = _mm_add_ps(cA, cB);
4981 cC = _mm_add_ps(cC, cD);
4982 cE = _mm_add_ps(cE, cF);
4983 c0 = _mm_add_ps(c0, c2);
4984 c4 = _mm_add_ps(c4, c6);
4985 c8 = _mm_add_ps(c8, cA);
4986 cC = _mm_add_ps(cC, cE);
4987 c0 = _mm_add_ps(c0, c4);
4988 c8 = _mm_add_ps(c8, cC);
4989 c0 = _mm_add_ps(c0, c8);
4990 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
4991
4992 _mm_store_ps((float*)(source0 + 8 * x), c0);
4993 }
4994
4995 source0 += pitch;
4996 source1 += pitch;
4997 source2 += pitch;
4998 source3 += pitch;
4999 source4 += pitch;
5000 source5 += pitch;
5001 source6 += pitch;
5002 source7 += pitch;
5003 source8 += pitch;
5004 source9 += pitch;
5005 sourceA += pitch;
5006 sourceB += pitch;
5007 sourceC += pitch;
5008 sourceD += pitch;
5009 sourceE += pitch;
5010 sourceF += pitch;
5011 }
5012 }
5013 else ASSERT(false);
5014 }
5015 else
5016 {
5017 if(internal.depth == 2)
5018 {
5019 for(int y = 0; y < height; y++)
5020 {
5021 for(int x = 0; x < 2 * width; x++)
5022 {
5023 float c0 = *(float*)(source0 + 4 * x);
5024 float c1 = *(float*)(source1 + 4 * x);
5025
5026 c0 = c0 + c1;
5027 c0 *= 1.0f / 2.0f;
5028
5029 *(float*)(source0 + 4 * x) = c0;
5030 }
5031
5032 source0 += pitch;
5033 source1 += pitch;
5034 }
5035 }
5036 else if(internal.depth == 4)
5037 {
5038 for(int y = 0; y < height; y++)
5039 {
5040 for(int x = 0; x < 2 * width; x++)
5041 {
5042 float c0 = *(float*)(source0 + 4 * x);
5043 float c1 = *(float*)(source1 + 4 * x);
5044 float c2 = *(float*)(source2 + 4 * x);
5045 float c3 = *(float*)(source3 + 4 * x);
5046
5047 c0 = c0 + c1;
5048 c2 = c2 + c3;
5049 c0 = c0 + c2;
5050 c0 *= 1.0f / 4.0f;
5051
5052 *(float*)(source0 + 4 * x) = c0;
5053 }
5054
5055 source0 += pitch;
5056 source1 += pitch;
5057 source2 += pitch;
5058 source3 += pitch;
5059 }
5060 }
5061 else if(internal.depth == 8)
5062 {
5063 for(int y = 0; y < height; y++)
5064 {
5065 for(int x = 0; x < 2 * width; x++)
5066 {
5067 float c0 = *(float*)(source0 + 4 * x);
5068 float c1 = *(float*)(source1 + 4 * x);
5069 float c2 = *(float*)(source2 + 4 * x);
5070 float c3 = *(float*)(source3 + 4 * x);
5071 float c4 = *(float*)(source4 + 4 * x);
5072 float c5 = *(float*)(source5 + 4 * x);
5073 float c6 = *(float*)(source6 + 4 * x);
5074 float c7 = *(float*)(source7 + 4 * x);
5075
5076 c0 = c0 + c1;
5077 c2 = c2 + c3;
5078 c4 = c4 + c5;
5079 c6 = c6 + c7;
5080 c0 = c0 + c2;
5081 c4 = c4 + c6;
5082 c0 = c0 + c4;
5083 c0 *= 1.0f / 8.0f;
5084
5085 *(float*)(source0 + 4 * x) = c0;
5086 }
5087
5088 source0 += pitch;
5089 source1 += pitch;
5090 source2 += pitch;
5091 source3 += pitch;
5092 source4 += pitch;
5093 source5 += pitch;
5094 source6 += pitch;
5095 source7 += pitch;
5096 }
5097 }
5098 else if(internal.depth == 16)
5099 {
5100 for(int y = 0; y < height; y++)
5101 {
5102 for(int x = 0; x < 2 * width; x++)
5103 {
5104 float c0 = *(float*)(source0 + 4 * x);
5105 float c1 = *(float*)(source1 + 4 * x);
5106 float c2 = *(float*)(source2 + 4 * x);
5107 float c3 = *(float*)(source3 + 4 * x);
5108 float c4 = *(float*)(source4 + 4 * x);
5109 float c5 = *(float*)(source5 + 4 * x);
5110 float c6 = *(float*)(source6 + 4 * x);
5111 float c7 = *(float*)(source7 + 4 * x);
5112 float c8 = *(float*)(source8 + 4 * x);
5113 float c9 = *(float*)(source9 + 4 * x);
5114 float cA = *(float*)(sourceA + 4 * x);
5115 float cB = *(float*)(sourceB + 4 * x);
5116 float cC = *(float*)(sourceC + 4 * x);
5117 float cD = *(float*)(sourceD + 4 * x);
5118 float cE = *(float*)(sourceE + 4 * x);
5119 float cF = *(float*)(sourceF + 4 * x);
5120
5121 c0 = c0 + c1;
5122 c2 = c2 + c3;
5123 c4 = c4 + c5;
5124 c6 = c6 + c7;
5125 c8 = c8 + c9;
5126 cA = cA + cB;
5127 cC = cC + cD;
5128 cE = cE + cF;
5129 c0 = c0 + c2;
5130 c4 = c4 + c6;
5131 c8 = c8 + cA;
5132 cC = cC + cE;
5133 c0 = c0 + c4;
5134 c8 = c8 + cC;
5135 c0 = c0 + c8;
5136 c0 *= 1.0f / 16.0f;
5137
5138 *(float*)(source0 + 4 * x) = c0;
5139 }
5140
5141 source0 += pitch;
5142 source1 += pitch;
5143 source2 += pitch;
5144 source3 += pitch;
5145 source4 += pitch;
5146 source5 += pitch;
5147 source6 += pitch;
5148 source7 += pitch;
5149 source8 += pitch;
5150 source9 += pitch;
5151 sourceA += pitch;
5152 sourceB += pitch;
5153 sourceC += pitch;
5154 sourceD += pitch;
5155 sourceE += pitch;
5156 sourceF += pitch;
5157 }
5158 }
5159 else ASSERT(false);
5160 }
5161 }
5162 else if(internal.format == FORMAT_A32B32G32R32F)
5163 {
5164 if(CPUID::supportsSSE())
5165 {
5166 if(internal.depth == 2)
5167 {
5168 for(int y = 0; y < height; y++)
5169 {
5170 for(int x = 0; x < width; x++)
5171 {
5172 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5173 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5174
5175 c0 = _mm_add_ps(c0, c1);
5176 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
5177
5178 _mm_store_ps((float*)(source0 + 16 * x), c0);
5179 }
5180
5181 source0 += pitch;
5182 source1 += pitch;
5183 }
5184 }
5185 else if(internal.depth == 4)
5186 {
5187 for(int y = 0; y < height; y++)
5188 {
5189 for(int x = 0; x < width; x++)
5190 {
5191 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5192 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5193 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5194 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5195
5196 c0 = _mm_add_ps(c0, c1);
5197 c2 = _mm_add_ps(c2, c3);
5198 c0 = _mm_add_ps(c0, c2);
5199 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
5200
5201 _mm_store_ps((float*)(source0 + 16 * x), c0);
5202 }
5203
5204 source0 += pitch;
5205 source1 += pitch;
5206 source2 += pitch;
5207 source3 += pitch;
5208 }
5209 }
5210 else if(internal.depth == 8)
5211 {
5212 for(int y = 0; y < height; y++)
5213 {
5214 for(int x = 0; x < width; x++)
5215 {
5216 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5217 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5218 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5219 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5220 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5221 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5222 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5223 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
5224
5225 c0 = _mm_add_ps(c0, c1);
5226 c2 = _mm_add_ps(c2, c3);
5227 c4 = _mm_add_ps(c4, c5);
5228 c6 = _mm_add_ps(c6, c7);
5229 c0 = _mm_add_ps(c0, c2);
5230 c4 = _mm_add_ps(c4, c6);
5231 c0 = _mm_add_ps(c0, c4);
5232 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
5233
5234 _mm_store_ps((float*)(source0 + 16 * x), c0);
5235 }
5236
5237 source0 += pitch;
5238 source1 += pitch;
5239 source2 += pitch;
5240 source3 += pitch;
5241 source4 += pitch;
5242 source5 += pitch;
5243 source6 += pitch;
5244 source7 += pitch;
5245 }
5246 }
5247 else if(internal.depth == 16)
5248 {
5249 for(int y = 0; y < height; y++)
5250 {
5251 for(int x = 0; x < width; x++)
5252 {
5253 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5254 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5255 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5256 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5257 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5258 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5259 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5260 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
5261 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x));
5262 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x));
5263 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x));
5264 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x));
5265 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x));
5266 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x));
5267 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x));
5268 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x));
5269
5270 c0 = _mm_add_ps(c0, c1);
5271 c2 = _mm_add_ps(c2, c3);
5272 c4 = _mm_add_ps(c4, c5);
5273 c6 = _mm_add_ps(c6, c7);
5274 c8 = _mm_add_ps(c8, c9);
5275 cA = _mm_add_ps(cA, cB);
5276 cC = _mm_add_ps(cC, cD);
5277 cE = _mm_add_ps(cE, cF);
5278 c0 = _mm_add_ps(c0, c2);
5279 c4 = _mm_add_ps(c4, c6);
5280 c8 = _mm_add_ps(c8, cA);
5281 cC = _mm_add_ps(cC, cE);
5282 c0 = _mm_add_ps(c0, c4);
5283 c8 = _mm_add_ps(c8, cC);
5284 c0 = _mm_add_ps(c0, c8);
5285 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
5286
5287 _mm_store_ps((float*)(source0 + 16 * x), c0);
5288 }
5289
5290 source0 += pitch;
5291 source1 += pitch;
5292 source2 += pitch;
5293 source3 += pitch;
5294 source4 += pitch;
5295 source5 += pitch;
5296 source6 += pitch;
5297 source7 += pitch;
5298 source8 += pitch;
5299 source9 += pitch;
5300 sourceA += pitch;
5301 sourceB += pitch;
5302 sourceC += pitch;
5303 sourceD += pitch;
5304 sourceE += pitch;
5305 sourceF += pitch;
5306 }
5307 }
5308 else ASSERT(false);
5309 }
5310 else
5311 {
5312 if(internal.depth == 2)
5313 {
5314 for(int y = 0; y < height; y++)
5315 {
5316 for(int x = 0; x < 4 * width; x++)
5317 {
5318 float c0 = *(float*)(source0 + 4 * x);
5319 float c1 = *(float*)(source1 + 4 * x);
5320
5321 c0 = c0 + c1;
5322 c0 *= 1.0f / 2.0f;
5323
5324 *(float*)(source0 + 4 * x) = c0;
5325 }
5326
5327 source0 += pitch;
5328 source1 += pitch;
5329 }
5330 }
5331 else if(internal.depth == 4)
5332 {
5333 for(int y = 0; y < height; y++)
5334 {
5335 for(int x = 0; x < 4 * width; x++)
5336 {
5337 float c0 = *(float*)(source0 + 4 * x);
5338 float c1 = *(float*)(source1 + 4 * x);
5339 float c2 = *(float*)(source2 + 4 * x);
5340 float c3 = *(float*)(source3 + 4 * x);
5341
5342 c0 = c0 + c1;
5343 c2 = c2 + c3;
5344 c0 = c0 + c2;
5345 c0 *= 1.0f / 4.0f;
5346
5347 *(float*)(source0 + 4 * x) = c0;
5348 }
5349
5350 source0 += pitch;
5351 source1 += pitch;
5352 source2 += pitch;
5353 source3 += pitch;
5354 }
5355 }
5356 else if(internal.depth == 8)
5357 {
5358 for(int y = 0; y < height; y++)
5359 {
5360 for(int x = 0; x < 4 * width; x++)
5361 {
5362 float c0 = *(float*)(source0 + 4 * x);
5363 float c1 = *(float*)(source1 + 4 * x);
5364 float c2 = *(float*)(source2 + 4 * x);
5365 float c3 = *(float*)(source3 + 4 * x);
5366 float c4 = *(float*)(source4 + 4 * x);
5367 float c5 = *(float*)(source5 + 4 * x);
5368 float c6 = *(float*)(source6 + 4 * x);
5369 float c7 = *(float*)(source7 + 4 * x);
5370
5371 c0 = c0 + c1;
5372 c2 = c2 + c3;
5373 c4 = c4 + c5;
5374 c6 = c6 + c7;
5375 c0 = c0 + c2;
5376 c4 = c4 + c6;
5377 c0 = c0 + c4;
5378 c0 *= 1.0f / 8.0f;
5379
5380 *(float*)(source0 + 4 * x) = c0;
5381 }
5382
5383 source0 += pitch;
5384 source1 += pitch;
5385 source2 += pitch;
5386 source3 += pitch;
5387 source4 += pitch;
5388 source5 += pitch;
5389 source6 += pitch;
5390 source7 += pitch;
5391 }
5392 }
5393 else if(internal.depth == 16)
5394 {
5395 for(int y = 0; y < height; y++)
5396 {
5397 for(int x = 0; x < 4 * width; x++)
5398 {
5399 float c0 = *(float*)(source0 + 4 * x);
5400 float c1 = *(float*)(source1 + 4 * x);
5401 float c2 = *(float*)(source2 + 4 * x);
5402 float c3 = *(float*)(source3 + 4 * x);
5403 float c4 = *(float*)(source4 + 4 * x);
5404 float c5 = *(float*)(source5 + 4 * x);
5405 float c6 = *(float*)(source6 + 4 * x);
5406 float c7 = *(float*)(source7 + 4 * x);
5407 float c8 = *(float*)(source8 + 4 * x);
5408 float c9 = *(float*)(source9 + 4 * x);
5409 float cA = *(float*)(sourceA + 4 * x);
5410 float cB = *(float*)(sourceB + 4 * x);
5411 float cC = *(float*)(sourceC + 4 * x);
5412 float cD = *(float*)(sourceD + 4 * x);
5413 float cE = *(float*)(sourceE + 4 * x);
5414 float cF = *(float*)(sourceF + 4 * x);
5415
5416 c0 = c0 + c1;
5417 c2 = c2 + c3;
5418 c4 = c4 + c5;
5419 c6 = c6 + c7;
5420 c8 = c8 + c9;
5421 cA = cA + cB;
5422 cC = cC + cD;
5423 cE = cE + cF;
5424 c0 = c0 + c2;
5425 c4 = c4 + c6;
5426 c8 = c8 + cA;
5427 cC = cC + cE;
5428 c0 = c0 + c4;
5429 c8 = c8 + cC;
5430 c0 = c0 + c8;
5431 c0 *= 1.0f / 16.0f;
5432
5433 *(float*)(source0 + 4 * x) = c0;
5434 }
5435
5436 source0 += pitch;
5437 source1 += pitch;
5438 source2 += pitch;
5439 source3 += pitch;
5440 source4 += pitch;
5441 source5 += pitch;
5442 source6 += pitch;
5443 source7 += pitch;
5444 source8 += pitch;
5445 source9 += pitch;
5446 sourceA += pitch;
5447 sourceB += pitch;
5448 sourceC += pitch;
5449 sourceD += pitch;
5450 sourceE += pitch;
5451 sourceF += pitch;
5452 }
5453 }
5454 else ASSERT(false);
5455 }
5456 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005457 else if(internal.format == FORMAT_R5G6B5)
5458 {
5459 if(CPUID::supportsSSE2() && (width % 8) == 0)
5460 {
5461 if(internal.depth == 2)
5462 {
5463 for(int y = 0; y < height; y++)
5464 {
5465 for(int x = 0; x < width; x += 8)
5466 {
5467 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5468 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5469
5470 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5471 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5472 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5473 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5474 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5475 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5476
5477 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5478 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5479 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5480 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5481 c0 = _mm_or_si128(c0, c1);
5482
5483 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5484 }
5485
5486 source0 += pitch;
5487 source1 += pitch;
5488 }
5489 }
5490 else if(internal.depth == 4)
5491 {
5492 for(int y = 0; y < height; y++)
5493 {
5494 for(int x = 0; x < width; x += 8)
5495 {
5496 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5497 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5498 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5499 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5500
5501 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5502 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5503 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5504 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5505 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5506 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5507 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5508 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5509 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5510 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5511
5512 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5513 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5514 c0 = _mm_avg_epu8(c0, c2);
5515 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5516 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5517 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5518 c1 = _mm_avg_epu16(c1, c3);
5519 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5520 c0 = _mm_or_si128(c0, c1);
5521
5522 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5523 }
5524
5525 source0 += pitch;
5526 source1 += pitch;
5527 source2 += pitch;
5528 source3 += pitch;
5529 }
5530 }
5531 else if(internal.depth == 8)
5532 {
5533 for(int y = 0; y < height; y++)
5534 {
5535 for(int x = 0; x < width; x += 8)
5536 {
5537 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5538 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5539 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5540 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5541 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5542 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5543 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5544 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
5545
5546 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5547 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5548 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5549 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5550 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5551 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5552 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5553 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5554 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5555 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5556 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5557 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5558 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5559 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5560 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5561 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5562 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5563 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
5564
5565 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5566 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5567 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5568 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5569 c0 = _mm_avg_epu8(c0, c2);
5570 c4 = _mm_avg_epu8(c4, c6);
5571 c0 = _mm_avg_epu8(c0, c4);
5572 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5573 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5574 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5575 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5576 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5577 c1 = _mm_avg_epu16(c1, c3);
5578 c5 = _mm_avg_epu16(c5, c7);
5579 c1 = _mm_avg_epu16(c1, c5);
5580 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5581 c0 = _mm_or_si128(c0, c1);
5582
5583 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5584 }
5585
5586 source0 += pitch;
5587 source1 += pitch;
5588 source2 += pitch;
5589 source3 += pitch;
5590 source4 += pitch;
5591 source5 += pitch;
5592 source6 += pitch;
5593 source7 += pitch;
5594 }
5595 }
5596 else if(internal.depth == 16)
5597 {
5598 for(int y = 0; y < height; y++)
5599 {
5600 for(int x = 0; x < width; x += 8)
5601 {
5602 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5603 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5604 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5605 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5606 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5607 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5608 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5609 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
5610 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x));
5611 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x));
5612 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x));
5613 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x));
5614 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x));
5615 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x));
5616 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x));
5617 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x));
5618
5619 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5620 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5621 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5622 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5623 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5624 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5625 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5626 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5627 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5628 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5629 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5630 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5631 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5632 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5633 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5634 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5635 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5636 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
5637 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b));
5638 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_));
5639 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b));
5640 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_));
5641 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b));
5642 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_));
5643 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b));
5644 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_));
5645 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b));
5646 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_));
5647 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b));
5648 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_));
5649 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b));
5650 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_));
5651 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b));
5652 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_));
5653
5654 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5655 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5656 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5657 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5658 c8 = _mm_avg_epu8(c8_r_b, c9_r_b);
5659 cA = _mm_avg_epu8(cA_r_b, cB_r_b);
5660 cC = _mm_avg_epu8(cC_r_b, cD_r_b);
5661 cE = _mm_avg_epu8(cE_r_b, cF_r_b);
5662 c0 = _mm_avg_epu8(c0, c2);
5663 c4 = _mm_avg_epu8(c4, c6);
5664 c8 = _mm_avg_epu8(c8, cA);
5665 cC = _mm_avg_epu8(cC, cE);
5666 c0 = _mm_avg_epu8(c0, c4);
5667 c8 = _mm_avg_epu8(c8, cC);
5668 c0 = _mm_avg_epu8(c0, c8);
5669 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5670 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5671 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5672 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5673 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5674 c9 = _mm_avg_epu16(c8__g_, c9__g_);
5675 cB = _mm_avg_epu16(cA__g_, cB__g_);
5676 cD = _mm_avg_epu16(cC__g_, cD__g_);
5677 cF = _mm_avg_epu16(cE__g_, cF__g_);
5678 c1 = _mm_avg_epu8(c1, c3);
5679 c5 = _mm_avg_epu8(c5, c7);
5680 c9 = _mm_avg_epu8(c9, cB);
5681 cD = _mm_avg_epu8(cD, cF);
5682 c1 = _mm_avg_epu8(c1, c5);
5683 c9 = _mm_avg_epu8(c9, cD);
5684 c1 = _mm_avg_epu8(c1, c9);
5685 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5686 c0 = _mm_or_si128(c0, c1);
5687
5688 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5689 }
5690
5691 source0 += pitch;
5692 source1 += pitch;
5693 source2 += pitch;
5694 source3 += pitch;
5695 source4 += pitch;
5696 source5 += pitch;
5697 source6 += pitch;
5698 source7 += pitch;
5699 source8 += pitch;
5700 source9 += pitch;
5701 sourceA += pitch;
5702 sourceB += pitch;
5703 sourceC += pitch;
5704 sourceD += pitch;
5705 sourceE += pitch;
5706 sourceF += pitch;
5707 }
5708 }
5709 else ASSERT(false);
5710 }
5711 else
5712 {
5713 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821))
5714
5715 if(internal.depth == 2)
5716 {
5717 for(int y = 0; y < height; y++)
5718 {
5719 for(int x = 0; x < width; x++)
5720 {
5721 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5722 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5723
5724 c0 = AVERAGE(c0, c1);
5725
5726 *(unsigned short*)(source0 + 2 * x) = c0;
5727 }
5728
5729 source0 += pitch;
5730 source1 += pitch;
5731 }
5732 }
5733 else if(internal.depth == 4)
5734 {
5735 for(int y = 0; y < height; y++)
5736 {
5737 for(int x = 0; x < width; x++)
5738 {
5739 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5740 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5741 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5742 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5743
5744 c0 = AVERAGE(c0, c1);
5745 c2 = AVERAGE(c2, c3);
5746 c0 = AVERAGE(c0, c2);
5747
5748 *(unsigned short*)(source0 + 2 * x) = c0;
5749 }
5750
5751 source0 += pitch;
5752 source1 += pitch;
5753 source2 += pitch;
5754 source3 += pitch;
5755 }
5756 }
5757 else if(internal.depth == 8)
5758 {
5759 for(int y = 0; y < height; y++)
5760 {
5761 for(int x = 0; x < width; x++)
5762 {
5763 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5764 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5765 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5766 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5767 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
5768 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
5769 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
5770 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
5771
5772 c0 = AVERAGE(c0, c1);
5773 c2 = AVERAGE(c2, c3);
5774 c4 = AVERAGE(c4, c5);
5775 c6 = AVERAGE(c6, c7);
5776 c0 = AVERAGE(c0, c2);
5777 c4 = AVERAGE(c4, c6);
5778 c0 = AVERAGE(c0, c4);
5779
5780 *(unsigned short*)(source0 + 2 * x) = c0;
5781 }
5782
5783 source0 += pitch;
5784 source1 += pitch;
5785 source2 += pitch;
5786 source3 += pitch;
5787 source4 += pitch;
5788 source5 += pitch;
5789 source6 += pitch;
5790 source7 += pitch;
5791 }
5792 }
5793 else if(internal.depth == 16)
5794 {
5795 for(int y = 0; y < height; y++)
5796 {
5797 for(int x = 0; x < width; x++)
5798 {
5799 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5800 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5801 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5802 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5803 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
5804 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
5805 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
5806 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
5807 unsigned short c8 = *(unsigned short*)(source8 + 2 * x);
5808 unsigned short c9 = *(unsigned short*)(source9 + 2 * x);
5809 unsigned short cA = *(unsigned short*)(sourceA + 2 * x);
5810 unsigned short cB = *(unsigned short*)(sourceB + 2 * x);
5811 unsigned short cC = *(unsigned short*)(sourceC + 2 * x);
5812 unsigned short cD = *(unsigned short*)(sourceD + 2 * x);
5813 unsigned short cE = *(unsigned short*)(sourceE + 2 * x);
5814 unsigned short cF = *(unsigned short*)(sourceF + 2 * x);
5815
5816 c0 = AVERAGE(c0, c1);
5817 c2 = AVERAGE(c2, c3);
5818 c4 = AVERAGE(c4, c5);
5819 c6 = AVERAGE(c6, c7);
5820 c8 = AVERAGE(c8, c9);
5821 cA = AVERAGE(cA, cB);
5822 cC = AVERAGE(cC, cD);
5823 cE = AVERAGE(cE, cF);
5824 c0 = AVERAGE(c0, c2);
5825 c4 = AVERAGE(c4, c6);
5826 c8 = AVERAGE(c8, cA);
5827 cC = AVERAGE(cC, cE);
5828 c0 = AVERAGE(c0, c4);
5829 c8 = AVERAGE(c8, cC);
5830 c0 = AVERAGE(c0, c8);
5831
5832 *(unsigned short*)(source0 + 2 * x) = c0;
5833 }
5834
5835 source0 += pitch;
5836 source1 += pitch;
5837 source2 += pitch;
5838 source3 += pitch;
5839 source4 += pitch;
5840 source5 += pitch;
5841 source6 += pitch;
5842 source7 += pitch;
5843 source8 += pitch;
5844 source9 += pitch;
5845 sourceA += pitch;
5846 sourceB += pitch;
5847 sourceC += pitch;
5848 sourceD += pitch;
5849 sourceE += pitch;
5850 sourceF += pitch;
5851 }
5852 }
5853 else ASSERT(false);
5854
5855 #undef AVERAGE
5856 }
5857 }
John Bauman89401822014-05-06 15:04:28 -04005858 else
5859 {
5860 // UNIMPLEMENTED();
5861 }
5862 }
5863}