blob: d64e041e8dd9f7a2e909767ebc486c694606b681 [file] [log] [blame]
John Bauman89401822014-05-06 15:04:28 -04001// SwiftShader Software Renderer
2//
John Bauman66b8ab22014-05-06 15:57:45 -04003// Copyright(c) 2005-2013 TransGaming Inc.
John Bauman89401822014-05-06 15:04:28 -04004//
5// All rights reserved. No part of this software may be copied, distributed, transmitted,
6// transcribed, stored in a retrieval system, translated into any human or computer
7// language by any means, or disclosed to third parties without the explicit written
8// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9// or implied, including but not limited to any patent rights, are granted to you.
10//
11
12#include "Surface.hpp"
13
14#include "Color.hpp"
15#include "Context.hpp"
Alexis Hetu0de50d42015-09-09 13:56:41 -040016#include "ETC_Decoder.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040017#include "Renderer.hpp"
John Bauman89401822014-05-06 15:04:28 -040018#include "Common/Half.hpp"
19#include "Common/Memory.hpp"
20#include "Common/CPUID.hpp"
21#include "Common/Resource.hpp"
22#include "Common/Debug.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040023#include "Reactor/Reactor.hpp"
John Bauman89401822014-05-06 15:04:28 -040024
25#include <xmmintrin.h>
26#include <emmintrin.h>
27
28#undef min
29#undef max
30
31namespace sw
32{
33 extern bool quadLayoutEnabled;
34 extern bool complementaryDepthBuffer;
35 extern TranscendentalPrecision logPrecision;
36
37 unsigned int *Surface::palette = 0;
38 unsigned int Surface::paletteID = 0;
39
John Bauman19bac1e2014-05-06 15:23:49 -040040 void Rect::clip(int minX, int minY, int maxX, int maxY)
41 {
Nicolas Capens22658242014-11-29 00:31:41 -050042 x0 = clamp(x0, minX, maxX);
43 y0 = clamp(y0, minY, maxY);
44 x1 = clamp(x1, minX, maxX);
45 y1 = clamp(y1, minY, maxY);
John Bauman19bac1e2014-05-06 15:23:49 -040046 }
47
John Bauman89401822014-05-06 15:04:28 -040048 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color)
49 {
50 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
51
52 write(element, color);
53 }
54
55 void Surface::Buffer::write(int x, int y, const Color<float> &color)
56 {
57 void *element = (unsigned char*)buffer + x * bytes + y * pitchB;
58
59 write(element, color);
60 }
61
62 inline void Surface::Buffer::write(void *element, const Color<float> &color)
63 {
64 switch(format)
65 {
66 case FORMAT_A8:
67 *(unsigned char*)element = unorm<8>(color.a);
68 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040069 case FORMAT_R8I_SNORM:
70 *(char*)element = snorm<8>(color.r);
71 break;
John Bauman89401822014-05-06 15:04:28 -040072 case FORMAT_R8:
73 *(unsigned char*)element = unorm<8>(color.r);
74 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040075 case FORMAT_R8I:
76 *(char*)element = scast<8>(color.r);
77 break;
78 case FORMAT_R8UI:
79 *(unsigned char*)element = ucast<8>(color.r);
80 break;
81 case FORMAT_R16I:
82 *(short*)element = scast<16>(color.r);
83 break;
84 case FORMAT_R16UI:
85 *(unsigned short*)element = ucast<16>(color.r);
86 break;
87 case FORMAT_R32I:
88 *(int*)element = static_cast<int>(color.r);
89 break;
90 case FORMAT_R32UI:
91 *(unsigned int*)element = static_cast<unsigned int>(color.r);
92 break;
John Bauman89401822014-05-06 15:04:28 -040093 case FORMAT_R3G3B2:
94 *(unsigned char*)element = (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
95 break;
96 case FORMAT_A8R3G3B2:
97 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
98 break;
99 case FORMAT_X4R4G4B4:
100 *(unsigned short*)element = 0xF000 | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
101 break;
102 case FORMAT_A4R4G4B4:
103 *(unsigned short*)element = (unorm<4>(color.a) << 12) | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
104 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400105 case FORMAT_R4G4B4A4:
106 *(unsigned short*)element = (unorm<4>(color.r) << 12) | (unorm<4>(color.g) << 8) | (unorm<4>(color.b) << 4) | (unorm<4>(color.a) << 0);
107 break;
John Bauman89401822014-05-06 15:04:28 -0400108 case FORMAT_R5G6B5:
109 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<6>(color.g) << 5) | (unorm<5>(color.b) << 0);
110 break;
111 case FORMAT_A1R5G5B5:
112 *(unsigned short*)element = (unorm<1>(color.a) << 15) | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
113 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400114 case FORMAT_R5G5B5A1:
115 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<5>(color.g) << 6) | (unorm<5>(color.b) << 1) | (unorm<5>(color.a) << 0);
116 break;
John Bauman89401822014-05-06 15:04:28 -0400117 case FORMAT_X1R5G5B5:
118 *(unsigned short*)element = 0x8000 | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
119 break;
120 case FORMAT_A8R8G8B8:
121 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
122 break;
123 case FORMAT_X8R8G8B8:
124 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
125 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400126 case FORMAT_A8B8G8R8I_SNORM:
127 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(color.a)) << 24) |
128 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
129 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
130 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
131 break;
John Bauman89401822014-05-06 15:04:28 -0400132 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400133 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -0400134 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
135 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400136 case FORMAT_A8B8G8R8I:
137 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(color.a)) << 24) |
138 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
139 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
140 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
141 break;
142 case FORMAT_A8B8G8R8UI:
143 *(unsigned int*)element = (ucast<8>(color.a) << 24) | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
144 break;
145 case FORMAT_X8B8G8R8I_SNORM:
146 *(unsigned int*)element = 0x7F000000 |
147 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
148 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
149 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
150 break;
John Bauman89401822014-05-06 15:04:28 -0400151 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400152 case FORMAT_SRGB8_X8:
John Bauman89401822014-05-06 15:04:28 -0400153 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
154 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400155 case FORMAT_X8B8G8R8I:
156 *(unsigned int*)element = 0x7F000000 |
157 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
158 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
159 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
160 case FORMAT_X8B8G8R8UI:
161 *(unsigned int*)element = 0xFF000000 | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
162 break;
John Bauman89401822014-05-06 15:04:28 -0400163 case FORMAT_A2R10G10B10:
164 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.r) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.b) << 0);
165 break;
166 case FORMAT_A2B10G10R10:
167 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.b) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.r) << 0);
168 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400169 case FORMAT_G8R8I_SNORM:
170 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(color.g)) << 8) |
171 (static_cast<unsigned short>(snorm<8>(color.r)) << 0);
172 break;
John Bauman89401822014-05-06 15:04:28 -0400173 case FORMAT_G8R8:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400174 *(unsigned short*)element = (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
175 break;
176 case FORMAT_G8R8I:
177 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(color.g)) << 8) |
178 (static_cast<unsigned short>(scast<8>(color.r)) << 0);
179 break;
180 case FORMAT_G8R8UI:
181 *(unsigned short*)element = (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400182 break;
183 case FORMAT_G16R16:
184 *(unsigned int*)element = (unorm<16>(color.g) << 16) | (unorm<16>(color.r) << 0);
185 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400186 case FORMAT_G16R16I:
187 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(color.g)) << 16) |
188 (static_cast<unsigned int>(scast<16>(color.r)) << 0);
189 break;
190 case FORMAT_G16R16UI:
191 *(unsigned int*)element = (ucast<16>(color.g) << 16) | (ucast<16>(color.r) << 0);
192 break;
193 case FORMAT_G32R32I:
194 case FORMAT_G32R32UI:
195 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
196 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
197 break;
John Bauman89401822014-05-06 15:04:28 -0400198 case FORMAT_A16B16G16R16:
199 ((unsigned short*)element)[0] = unorm<16>(color.r);
200 ((unsigned short*)element)[1] = unorm<16>(color.g);
201 ((unsigned short*)element)[2] = unorm<16>(color.b);
202 ((unsigned short*)element)[3] = unorm<16>(color.a);
203 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400204 case FORMAT_A16B16G16R16I:
205 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
206 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
207 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
208 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(color.a));
209 break;
210 case FORMAT_A16B16G16R16UI:
211 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
212 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
213 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
214 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(color.a));
215 break;
216 case FORMAT_X16B16G16R16I:
217 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
218 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
219 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
220 break;
221 case FORMAT_X16B16G16R16UI:
222 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
223 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
224 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
225 break;
226 case FORMAT_A32B32G32R32I:
227 case FORMAT_A32B32G32R32UI:
228 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
229 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
230 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
231 ((unsigned int*)element)[3] = static_cast<unsigned int>(color.a);
232 break;
233 case FORMAT_X32B32G32R32I:
234 case FORMAT_X32B32G32R32UI:
235 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
236 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
237 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
238 break;
John Bauman89401822014-05-06 15:04:28 -0400239 case FORMAT_V8U8:
240 *(unsigned short*)element = (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
241 break;
242 case FORMAT_L6V5U5:
243 *(unsigned short*)element = (unorm<6>(color.b) << 10) | (snorm<5>(color.g) << 5) | (snorm<5>(color.r) << 0);
244 break;
245 case FORMAT_Q8W8V8U8:
246 *(unsigned int*)element = (snorm<8>(color.a) << 24) | (snorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
247 break;
248 case FORMAT_X8L8V8U8:
249 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
250 break;
251 case FORMAT_V16U16:
252 *(unsigned int*)element = (snorm<16>(color.g) << 16) | (snorm<16>(color.r) << 0);
253 break;
254 case FORMAT_A2W10V10U10:
255 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (snorm<10>(color.b) << 20) | (snorm<10>(color.g) << 10) | (snorm<10>(color.r) << 0);
256 break;
257 case FORMAT_A16W16V16U16:
258 ((unsigned short*)element)[0] = snorm<16>(color.r);
259 ((unsigned short*)element)[1] = snorm<16>(color.g);
260 ((unsigned short*)element)[2] = snorm<16>(color.b);
261 ((unsigned short*)element)[3] = unorm<16>(color.a);
262 break;
263 case FORMAT_Q16W16V16U16:
264 ((unsigned short*)element)[0] = snorm<16>(color.r);
265 ((unsigned short*)element)[1] = snorm<16>(color.g);
266 ((unsigned short*)element)[2] = snorm<16>(color.b);
267 ((unsigned short*)element)[3] = snorm<16>(color.a);
268 break;
269 case FORMAT_R8G8B8:
270 ((unsigned char*)element)[0] = unorm<8>(color.b);
271 ((unsigned char*)element)[1] = unorm<8>(color.g);
272 ((unsigned char*)element)[2] = unorm<8>(color.r);
273 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400274 case FORMAT_B8G8R8:
275 ((unsigned char*)element)[0] = unorm<8>(color.r);
276 ((unsigned char*)element)[1] = unorm<8>(color.g);
277 ((unsigned char*)element)[2] = unorm<8>(color.b);
278 break;
John Bauman89401822014-05-06 15:04:28 -0400279 case FORMAT_R16F:
280 *(half*)element = (half)color.r;
281 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400282 case FORMAT_A16F:
283 *(half*)element = (half)color.a;
284 break;
John Bauman89401822014-05-06 15:04:28 -0400285 case FORMAT_G16R16F:
286 ((half*)element)[0] = (half)color.r;
287 ((half*)element)[1] = (half)color.g;
288 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400289 case FORMAT_B16G16R16F:
290 ((half*)element)[0] = (half)color.r;
291 ((half*)element)[1] = (half)color.g;
292 ((half*)element)[2] = (half)color.b;
293 break;
John Bauman89401822014-05-06 15:04:28 -0400294 case FORMAT_A16B16G16R16F:
295 ((half*)element)[0] = (half)color.r;
296 ((half*)element)[1] = (half)color.g;
297 ((half*)element)[2] = (half)color.b;
298 ((half*)element)[3] = (half)color.a;
299 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400300 case FORMAT_A32F:
301 *(float*)element = color.a;
302 break;
John Bauman89401822014-05-06 15:04:28 -0400303 case FORMAT_R32F:
304 *(float*)element = color.r;
305 break;
306 case FORMAT_G32R32F:
307 ((float*)element)[0] = color.r;
308 ((float*)element)[1] = color.g;
309 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -0400310 case FORMAT_X32B32G32R32F:
311 ((float*)element)[3] = 1.0f;
Nicolas Capens80594422015-06-09 16:42:56 -0400312 case FORMAT_B32G32R32F:
313 ((float*)element)[0] = color.r;
314 ((float*)element)[1] = color.g;
315 ((float*)element)[2] = color.b;
316 break;
John Bauman89401822014-05-06 15:04:28 -0400317 case FORMAT_A32B32G32R32F:
318 ((float*)element)[0] = color.r;
319 ((float*)element)[1] = color.g;
320 ((float*)element)[2] = color.b;
321 ((float*)element)[3] = color.a;
322 break;
323 case FORMAT_D32F:
324 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400325 case FORMAT_D32FS8_TEXTURE:
326 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400327 *((float*)element) = color.r;
328 break;
329 case FORMAT_D32F_COMPLEMENTARY:
330 *((float*)element) = 1 - color.r;
331 break;
332 case FORMAT_S8:
333 *((unsigned char*)element) = unorm<8>(color.r);
334 break;
335 case FORMAT_L8:
336 *(unsigned char*)element = unorm<8>(color.r);
337 break;
338 case FORMAT_A4L4:
339 *(unsigned char*)element = (unorm<4>(color.a) << 4) | (unorm<4>(color.r) << 0);
340 break;
341 case FORMAT_L16:
342 *(unsigned short*)element = unorm<16>(color.r);
343 break;
344 case FORMAT_A8L8:
345 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<8>(color.r) << 0);
346 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400347 case FORMAT_L16F:
348 *(half*)element = (half)color.r;
349 break;
350 case FORMAT_A16L16F:
351 ((half*)element)[0] = (half)color.r;
352 ((half*)element)[1] = (half)color.a;
353 break;
354 case FORMAT_L32F:
355 *(float*)element = color.r;
356 break;
357 case FORMAT_A32L32F:
358 ((float*)element)[0] = color.r;
359 ((float*)element)[1] = color.a;
360 break;
John Bauman89401822014-05-06 15:04:28 -0400361 default:
362 ASSERT(false);
363 }
364 }
365
366 Color<float> Surface::Buffer::read(int x, int y, int z) const
367 {
368 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
369
370 return read(element);
371 }
372
373 Color<float> Surface::Buffer::read(int x, int y) const
374 {
375 void *element = (unsigned char*)buffer + x * bytes + y * pitchB;
376
377 return read(element);
378 }
379
380 inline Color<float> Surface::Buffer::read(void *element) const
381 {
Nicolas Capens3f439242015-06-09 16:33:50 -0400382 float r = 0.0f;
383 float g = 0.0f;
384 float b = 0.0f;
385 float a = 1.0f;
John Bauman89401822014-05-06 15:04:28 -0400386
387 switch(format)
388 {
389 case FORMAT_P8:
390 {
391 ASSERT(palette);
392
393 unsigned int abgr = palette[*(unsigned char*)element];
Nicolas Capensc39901e2016-03-21 16:37:44 -0400394
John Bauman89401822014-05-06 15:04:28 -0400395 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
396 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
397 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
398 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
399 }
400 break;
401 case FORMAT_A8P8:
402 {
403 ASSERT(palette);
404
405 unsigned int bgr = palette[((unsigned char*)element)[0]];
Nicolas Capensc39901e2016-03-21 16:37:44 -0400406
John Bauman89401822014-05-06 15:04:28 -0400407 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF);
408 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00);
409 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000);
410 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
411 }
412 break;
413 case FORMAT_A8:
414 r = 0;
415 g = 0;
416 b = 0;
417 a = *(unsigned char*)element * (1.0f / 0xFF);
418 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400419 case FORMAT_R8I_SNORM:
420 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f);
421 break;
John Bauman89401822014-05-06 15:04:28 -0400422 case FORMAT_R8:
423 r = *(unsigned char*)element * (1.0f / 0xFF);
424 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400425 case FORMAT_R8I:
426 r = *(signed char*)element;
427 break;
428 case FORMAT_R8UI:
429 r = *(unsigned char*)element;
430 break;
John Bauman89401822014-05-06 15:04:28 -0400431 case FORMAT_R3G3B2:
432 {
433 unsigned char rgb = *(unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400434
John Bauman89401822014-05-06 15:04:28 -0400435 r = (rgb & 0xE0) * (1.0f / 0xE0);
436 g = (rgb & 0x1C) * (1.0f / 0x1C);
437 b = (rgb & 0x03) * (1.0f / 0x03);
438 }
439 break;
440 case FORMAT_A8R3G3B2:
441 {
442 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400443
John Bauman89401822014-05-06 15:04:28 -0400444 a = (argb & 0xFF00) * (1.0f / 0xFF00);
445 r = (argb & 0x00E0) * (1.0f / 0x00E0);
446 g = (argb & 0x001C) * (1.0f / 0x001C);
447 b = (argb & 0x0003) * (1.0f / 0x0003);
448 }
449 break;
450 case FORMAT_X4R4G4B4:
451 {
452 unsigned short rgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400453
John Bauman89401822014-05-06 15:04:28 -0400454 r = (rgb & 0x0F00) * (1.0f / 0x0F00);
455 g = (rgb & 0x00F0) * (1.0f / 0x00F0);
456 b = (rgb & 0x000F) * (1.0f / 0x000F);
457 }
458 break;
459 case FORMAT_A4R4G4B4:
460 {
461 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400462
John Bauman89401822014-05-06 15:04:28 -0400463 a = (argb & 0xF000) * (1.0f / 0xF000);
464 r = (argb & 0x0F00) * (1.0f / 0x0F00);
465 g = (argb & 0x00F0) * (1.0f / 0x00F0);
466 b = (argb & 0x000F) * (1.0f / 0x000F);
467 }
468 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400469 case FORMAT_R4G4B4A4:
470 {
471 unsigned short rgba = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400472
Nicolas Capens80594422015-06-09 16:42:56 -0400473 r = (rgba & 0xF000) * (1.0f / 0xF000);
474 g = (rgba & 0x0F00) * (1.0f / 0x0F00);
475 b = (rgba & 0x00F0) * (1.0f / 0x00F0);
476 a = (rgba & 0x000F) * (1.0f / 0x000F);
477 }
478 break;
John Bauman89401822014-05-06 15:04:28 -0400479 case FORMAT_R5G6B5:
480 {
481 unsigned short rgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400482
John Bauman89401822014-05-06 15:04:28 -0400483 r = (rgb & 0xF800) * (1.0f / 0xF800);
484 g = (rgb & 0x07E0) * (1.0f / 0x07E0);
485 b = (rgb & 0x001F) * (1.0f / 0x001F);
486 }
487 break;
488 case FORMAT_A1R5G5B5:
489 {
490 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400491
John Bauman89401822014-05-06 15:04:28 -0400492 a = (argb & 0x8000) * (1.0f / 0x8000);
493 r = (argb & 0x7C00) * (1.0f / 0x7C00);
494 g = (argb & 0x03E0) * (1.0f / 0x03E0);
495 b = (argb & 0x001F) * (1.0f / 0x001F);
496 }
497 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400498 case FORMAT_R5G5B5A1:
499 {
500 unsigned short rgba = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400501
Nicolas Capens80594422015-06-09 16:42:56 -0400502 r = (rgba & 0xF800) * (1.0f / 0xF800);
503 g = (rgba & 0x07C0) * (1.0f / 0x07C0);
504 b = (rgba & 0x003E) * (1.0f / 0x003E);
505 a = (rgba & 0x0001) * (1.0f / 0x0001);
506 }
507 break;
John Bauman89401822014-05-06 15:04:28 -0400508 case FORMAT_X1R5G5B5:
509 {
510 unsigned short xrgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400511
John Bauman89401822014-05-06 15:04:28 -0400512 r = (xrgb & 0x7C00) * (1.0f / 0x7C00);
513 g = (xrgb & 0x03E0) * (1.0f / 0x03E0);
514 b = (xrgb & 0x001F) * (1.0f / 0x001F);
515 }
516 break;
517 case FORMAT_A8R8G8B8:
518 {
519 unsigned int argb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400520
John Bauman89401822014-05-06 15:04:28 -0400521 a = (argb & 0xFF000000) * (1.0f / 0xFF000000);
522 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000);
523 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00);
524 b = (argb & 0x000000FF) * (1.0f / 0x000000FF);
525 }
526 break;
527 case FORMAT_X8R8G8B8:
528 {
529 unsigned int xrgb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400530
John Bauman89401822014-05-06 15:04:28 -0400531 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000);
532 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00);
533 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF);
534 }
535 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400536 case FORMAT_A8B8G8R8I_SNORM:
537 {
538 signed char* abgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400539
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400540 r = max(abgr[0] * (1.0f / 0x7F), -1.0f);
541 g = max(abgr[1] * (1.0f / 0x7F), -1.0f);
542 b = max(abgr[2] * (1.0f / 0x7F), -1.0f);
543 a = max(abgr[3] * (1.0f / 0x7F), -1.0f);
544 }
545 break;
John Bauman89401822014-05-06 15:04:28 -0400546 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400547 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -0400548 {
549 unsigned int abgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400550
John Bauman89401822014-05-06 15:04:28 -0400551 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
552 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
553 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
554 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
555 }
556 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400557 case FORMAT_A8B8G8R8I:
558 {
559 signed char* abgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400560
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400561 r = abgr[0];
562 g = abgr[1];
563 b = abgr[2];
564 a = abgr[3];
565 }
566 break;
567 case FORMAT_A8B8G8R8UI:
568 {
569 unsigned char* abgr = (unsigned char*)element;
570
571 r = abgr[0];
572 g = abgr[1];
573 b = abgr[2];
574 a = abgr[3];
575 }
576 break;
577 case FORMAT_X8B8G8R8I_SNORM:
578 {
579 signed char* bgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400580
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400581 r = max(bgr[0] * (1.0f / 0x7F), -1.0f);
582 g = max(bgr[1] * (1.0f / 0x7F), -1.0f);
583 b = max(bgr[2] * (1.0f / 0x7F), -1.0f);
584 }
585 break;
John Bauman89401822014-05-06 15:04:28 -0400586 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400587 case FORMAT_SRGB8_X8:
John Bauman89401822014-05-06 15:04:28 -0400588 {
589 unsigned int xbgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400590
John Bauman89401822014-05-06 15:04:28 -0400591 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000);
592 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00);
593 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF);
594 }
595 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400596 case FORMAT_X8B8G8R8I:
597 {
598 signed char* bgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400599
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400600 r = bgr[0];
601 g = bgr[1];
602 b = bgr[2];
603 }
604 break;
605 case FORMAT_X8B8G8R8UI:
606 {
607 unsigned char* bgr = (unsigned char*)element;
608
609 r = bgr[0];
610 g = bgr[1];
611 b = bgr[2];
612 }
613 break;
614 case FORMAT_G8R8I_SNORM:
615 {
616 signed char* gr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400617
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400618 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00);
619 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF);
620 }
621 break;
John Bauman89401822014-05-06 15:04:28 -0400622 case FORMAT_G8R8:
623 {
624 unsigned short gr = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400625
John Bauman89401822014-05-06 15:04:28 -0400626 g = (gr & 0xFF00) * (1.0f / 0xFF00);
627 r = (gr & 0x00FF) * (1.0f / 0x00FF);
628 }
629 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400630 case FORMAT_G8R8I:
631 {
632 signed char* gr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400633
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400634 r = gr[0];
635 g = gr[1];
636 }
637 break;
638 case FORMAT_G8R8UI:
639 {
640 unsigned char* gr = (unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400641
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400642 r = gr[0];
643 g = gr[1];
644 }
645 break;
646 case FORMAT_R16I:
647 r = *((short*)element);
648 break;
649 case FORMAT_R16UI:
650 r = *((unsigned short*)element);
651 break;
652 case FORMAT_G16R16I:
653 {
654 short* gr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400655
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400656 r = gr[0];
657 g = gr[1];
658 }
659 break;
John Bauman89401822014-05-06 15:04:28 -0400660 case FORMAT_G16R16:
661 {
662 unsigned int gr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400663
John Bauman89401822014-05-06 15:04:28 -0400664 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000);
665 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF);
666 }
667 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400668 case FORMAT_G16R16UI:
669 {
670 unsigned short* gr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400671
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400672 r = gr[0];
673 g = gr[1];
674 }
675 break;
John Bauman89401822014-05-06 15:04:28 -0400676 case FORMAT_A2R10G10B10:
677 {
678 unsigned int argb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400679
John Bauman89401822014-05-06 15:04:28 -0400680 a = (argb & 0xC0000000) * (1.0f / 0xC0000000);
681 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000);
682 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00);
683 b = (argb & 0x000003FF) * (1.0f / 0x000003FF);
684 }
685 break;
686 case FORMAT_A2B10G10R10:
687 {
688 unsigned int abgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400689
John Bauman89401822014-05-06 15:04:28 -0400690 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000);
691 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000);
692 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00);
693 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF);
694 }
695 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400696 case FORMAT_A16B16G16R16I:
697 {
698 short* abgr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400699
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400700 r = abgr[0];
701 g = abgr[1];
702 b = abgr[2];
703 a = abgr[3];
704 }
705 break;
John Bauman89401822014-05-06 15:04:28 -0400706 case FORMAT_A16B16G16R16:
707 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF);
708 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF);
709 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF);
710 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
711 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400712 case FORMAT_A16B16G16R16UI:
713 {
714 unsigned short* abgr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400715
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400716 r = abgr[0];
717 g = abgr[1];
718 b = abgr[2];
719 a = abgr[3];
720 }
721 break;
722 case FORMAT_X16B16G16R16I:
723 {
724 short* bgr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400725
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400726 r = bgr[0];
727 g = bgr[1];
728 b = bgr[2];
729 }
730 break;
731 case FORMAT_X16B16G16R16UI:
732 {
733 unsigned short* bgr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400734
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400735 r = bgr[0];
736 g = bgr[1];
737 b = bgr[2];
738 }
739 break;
740 case FORMAT_A32B32G32R32I:
741 {
742 int* abgr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400743
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400744 r = static_cast<float>(abgr[0]);
745 g = static_cast<float>(abgr[1]);
746 b = static_cast<float>(abgr[2]);
747 a = static_cast<float>(abgr[3]);
748 }
749 break;
750 case FORMAT_A32B32G32R32UI:
751 {
752 unsigned int* abgr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400753
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400754 r = static_cast<float>(abgr[0]);
755 g = static_cast<float>(abgr[1]);
756 b = static_cast<float>(abgr[2]);
757 a = static_cast<float>(abgr[3]);
758 }
759 break;
760 case FORMAT_X32B32G32R32I:
761 {
762 int* bgr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400763
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400764 r = static_cast<float>(bgr[0]);
765 g = static_cast<float>(bgr[1]);
766 b = static_cast<float>(bgr[2]);
767 }
768 break;
769 case FORMAT_X32B32G32R32UI:
770 {
771 unsigned int* bgr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400772
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400773 r = static_cast<float>(bgr[0]);
774 g = static_cast<float>(bgr[1]);
775 b = static_cast<float>(bgr[2]);
776 }
777 break;
778 case FORMAT_G32R32I:
779 {
780 int* gr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400781
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400782 r = static_cast<float>(gr[0]);
783 g = static_cast<float>(gr[1]);
784 }
785 break;
786 case FORMAT_G32R32UI:
787 {
788 unsigned int* gr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400789
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400790 r = static_cast<float>(gr[0]);
791 g = static_cast<float>(gr[1]);
792 }
793 break;
794 case FORMAT_R32I:
795 r = static_cast<float>(*((int*)element));
796 break;
797 case FORMAT_R32UI:
798 r = static_cast<float>(*((unsigned int*)element));
799 break;
John Bauman89401822014-05-06 15:04:28 -0400800 case FORMAT_V8U8:
801 {
802 unsigned short vu = *(unsigned short*)element;
803
804 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000);
805 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000);
806 }
807 break;
808 case FORMAT_L6V5U5:
809 {
810 unsigned short lvu = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400811
John Bauman89401822014-05-06 15:04:28 -0400812 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000);
813 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000);
814 b = (lvu & 0xFC00) * (1.0f / 0xFC00);
815 }
816 break;
817 case FORMAT_Q8W8V8U8:
818 {
819 unsigned int qwvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400820
John Bauman89401822014-05-06 15:04:28 -0400821 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
822 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
823 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000);
824 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000);
825 }
826 break;
827 case FORMAT_X8L8V8U8:
828 {
829 unsigned int xlvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400830
John Bauman89401822014-05-06 15:04:28 -0400831 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
832 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
833 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000);
834 }
835 break;
836 case FORMAT_R8G8B8:
837 r = ((unsigned char*)element)[2] * (1.0f / 0xFF);
838 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
839 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
840 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400841 case FORMAT_B8G8R8:
842 r = ((unsigned char*)element)[0] * (1.0f / 0xFF);
843 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
844 b = ((unsigned char*)element)[2] * (1.0f / 0xFF);
845 break;
John Bauman89401822014-05-06 15:04:28 -0400846 case FORMAT_V16U16:
847 {
848 unsigned int vu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400849
John Bauman89401822014-05-06 15:04:28 -0400850 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000);
851 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000);
852 }
853 break;
854 case FORMAT_A2W10V10U10:
855 {
856 unsigned int awvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400857
John Bauman89401822014-05-06 15:04:28 -0400858 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000);
859 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000);
860 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000);
861 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000);
862 }
863 break;
864 case FORMAT_A16W16V16U16:
865 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
866 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
867 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
868 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
869 break;
870 case FORMAT_Q16W16V16U16:
871 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
872 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
873 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
874 a = ((signed short*)element)[3] * (1.0f / 0x7FFF);
875 break;
876 case FORMAT_L8:
877 r =
878 g =
879 b = *(unsigned char*)element * (1.0f / 0xFF);
880 break;
881 case FORMAT_A4L4:
882 {
883 unsigned char al = *(unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400884
John Bauman89401822014-05-06 15:04:28 -0400885 r =
886 g =
887 b = (al & 0x0F) * (1.0f / 0x0F);
888 a = (al & 0xF0) * (1.0f / 0xF0);
889 }
890 break;
891 case FORMAT_L16:
892 r =
893 g =
894 b = *(unsigned short*)element * (1.0f / 0xFFFF);
895 break;
896 case FORMAT_A8L8:
897 r =
898 g =
899 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
900 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
901 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400902 case FORMAT_L16F:
903 r =
904 g =
905 b = *(half*)element;
906 break;
907 case FORMAT_A16L16F:
908 r =
909 g =
910 b = ((half*)element)[0];
911 a = ((half*)element)[1];
912 break;
913 case FORMAT_L32F:
914 r =
915 g =
916 b = *(float*)element;
917 break;
918 case FORMAT_A32L32F:
919 r =
920 g =
921 b = ((float*)element)[0];
922 a = ((float*)element)[1];
923 break;
924 case FORMAT_A16F:
925 a = *(half*)element;
926 break;
John Bauman89401822014-05-06 15:04:28 -0400927 case FORMAT_R16F:
928 r = *(half*)element;
929 break;
930 case FORMAT_G16R16F:
931 r = ((half*)element)[0];
932 g = ((half*)element)[1];
933 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400934 case FORMAT_B16G16R16F:
935 r = ((half*)element)[0];
936 g = ((half*)element)[1];
937 b = ((half*)element)[2];
938 break;
John Bauman89401822014-05-06 15:04:28 -0400939 case FORMAT_A16B16G16R16F:
940 r = ((half*)element)[0];
941 g = ((half*)element)[1];
942 b = ((half*)element)[2];
943 a = ((half*)element)[3];
944 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400945 case FORMAT_A32F:
946 a = *(float*)element;
947 break;
John Bauman89401822014-05-06 15:04:28 -0400948 case FORMAT_R32F:
949 r = *(float*)element;
950 break;
951 case FORMAT_G32R32F:
952 r = ((float*)element)[0];
953 g = ((float*)element)[1];
954 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -0400955 case FORMAT_X32B32G32R32F:
Nicolas Capens80594422015-06-09 16:42:56 -0400956 case FORMAT_B32G32R32F:
957 r = ((float*)element)[0];
958 g = ((float*)element)[1];
959 b = ((float*)element)[2];
960 break;
John Bauman89401822014-05-06 15:04:28 -0400961 case FORMAT_A32B32G32R32F:
962 r = ((float*)element)[0];
963 g = ((float*)element)[1];
964 b = ((float*)element)[2];
965 a = ((float*)element)[3];
966 break;
967 case FORMAT_D32F:
968 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400969 case FORMAT_D32FS8_TEXTURE:
970 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400971 r = *(float*)element;
972 g = r;
973 b = r;
974 a = r;
975 break;
976 case FORMAT_D32F_COMPLEMENTARY:
John Bauman66b8ab22014-05-06 15:57:45 -0400977 r = 1.0f - *(float*)element;
John Bauman89401822014-05-06 15:04:28 -0400978 g = r;
979 b = r;
980 a = r;
981 break;
982 case FORMAT_S8:
983 r = *(unsigned char*)element * (1.0f / 0xFF);
984 break;
985 default:
986 ASSERT(false);
987 }
988
989 // if(sRGB)
990 // {
991 // r = sRGBtoLinear(r);
992 // g = sRGBtoLinear(g);
993 // b = sRGBtoLinear(b);
994 // }
995
996 return Color<float>(r, g, b, a);
997 }
998
999 Color<float> Surface::Buffer::sample(float x, float y, float z) const
1000 {
1001 x -= 0.5f;
1002 y -= 0.5f;
1003 z -= 0.5f;
1004
1005 int x0 = clamp((int)x, 0, width - 1);
1006 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1007
1008 int y0 = clamp((int)y, 0, height - 1);
1009 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1010
1011 int z0 = clamp((int)z, 0, depth - 1);
1012 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1;
1013
1014 Color<float> c000 = read(x0, y0, z0);
1015 Color<float> c100 = read(x1, y0, z0);
1016 Color<float> c010 = read(x0, y1, z0);
1017 Color<float> c110 = read(x1, y1, z0);
1018 Color<float> c001 = read(x0, y0, z1);
1019 Color<float> c101 = read(x1, y0, z1);
1020 Color<float> c011 = read(x0, y1, z1);
1021 Color<float> c111 = read(x1, y1, z1);
1022
1023 float fx = x - x0;
1024 float fy = y - y0;
1025 float fz = z - z0;
1026
1027 c000 *= (1 - fx) * (1 - fy) * (1 - fz);
1028 c100 *= fx * (1 - fy) * (1 - fz);
1029 c010 *= (1 - fx) * fy * (1 - fz);
1030 c110 *= fx * fy * (1 - fz);
1031 c001 *= (1 - fx) * (1 - fy) * fz;
1032 c101 *= fx * (1 - fy) * fz;
1033 c011 *= (1 - fx) * fy * fz;
1034 c111 *= fx * fy * fz;
1035
1036 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111;
1037 }
1038
1039 Color<float> Surface::Buffer::sample(float x, float y) const
1040 {
1041 x -= 0.5f;
1042 y -= 0.5f;
1043
1044 int x0 = clamp((int)x, 0, width - 1);
1045 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1046
1047 int y0 = clamp((int)y, 0, height - 1);
1048 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1049
1050 Color<float> c00 = read(x0, y0);
1051 Color<float> c10 = read(x1, y0);
1052 Color<float> c01 = read(x0, y1);
1053 Color<float> c11 = read(x1, y1);
1054
1055 float fx = x - x0;
1056 float fy = y - y0;
1057
1058 c00 *= (1 - fx) * (1 - fy);
1059 c10 *= fx * (1 - fy);
1060 c01 *= (1 - fx) * fy;
1061 c11 *= fx * fy;
1062
1063 return c00 + c10 + c01 + c11;
1064 }
1065
John Bauman19bac1e2014-05-06 15:23:49 -04001066 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock)
John Bauman89401822014-05-06 15:04:28 -04001067 {
1068 this->lock = lock;
1069
1070 switch(lock)
1071 {
1072 case LOCK_UNLOCKED:
1073 case LOCK_READONLY:
1074 break;
1075 case LOCK_WRITEONLY:
1076 case LOCK_READWRITE:
1077 case LOCK_DISCARD:
1078 dirty = true;
1079 break;
1080 default:
1081 ASSERT(false);
1082 }
1083
John Baumand4ae8632014-05-06 16:18:33 -04001084 if(buffer)
John Bauman89401822014-05-06 15:04:28 -04001085 {
John Baumand4ae8632014-05-06 16:18:33 -04001086 switch(format)
1087 {
1088 #if S3TC_SUPPORT
1089 case FORMAT_DXT1:
1090 #endif
1091 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05001092 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001093 case FORMAT_R11_EAC:
1094 case FORMAT_SIGNED_R11_EAC:
1095 case FORMAT_RGB8_ETC2:
1096 case FORMAT_SRGB8_ETC2:
1097 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1098 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Baumand4ae8632014-05-06 16:18:33 -04001099 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB;
Alexis Hetu460e41f2015-09-01 10:58:37 -04001100 case FORMAT_RG11_EAC:
1101 case FORMAT_SIGNED_RG11_EAC:
1102 case FORMAT_RGBA8_ETC2_EAC:
1103 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1104 case FORMAT_RGBA_ASTC_4x4_KHR:
1105 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1106 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1107 case FORMAT_RGBA_ASTC_5x4_KHR:
1108 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1109 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB;
1110 case FORMAT_RGBA_ASTC_5x5_KHR:
1111 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1112 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB;
1113 case FORMAT_RGBA_ASTC_6x5_KHR:
1114 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1115 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB;
1116 case FORMAT_RGBA_ASTC_6x6_KHR:
1117 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1118 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB;
1119 case FORMAT_RGBA_ASTC_8x5_KHR:
1120 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1121 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB;
1122 case FORMAT_RGBA_ASTC_8x6_KHR:
1123 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1124 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB;
1125 case FORMAT_RGBA_ASTC_8x8_KHR:
1126 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1127 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB;
1128 case FORMAT_RGBA_ASTC_10x5_KHR:
1129 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1130 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB;
1131 case FORMAT_RGBA_ASTC_10x6_KHR:
1132 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1133 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB;
1134 case FORMAT_RGBA_ASTC_10x8_KHR:
1135 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1136 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB;
1137 case FORMAT_RGBA_ASTC_10x10_KHR:
1138 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1139 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB;
1140 case FORMAT_RGBA_ASTC_12x10_KHR:
1141 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1142 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB;
1143 case FORMAT_RGBA_ASTC_12x12_KHR:
1144 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1145 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB;
John Baumand4ae8632014-05-06 16:18:33 -04001146 #if S3TC_SUPPORT
1147 case FORMAT_DXT3:
1148 case FORMAT_DXT5:
1149 #endif
1150 case FORMAT_ATI2:
1151 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1152 default:
1153 return (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
1154 }
John Bauman89401822014-05-06 15:04:28 -04001155 }
1156
1157 return 0;
1158 }
1159
1160 void Surface::Buffer::unlockRect()
1161 {
1162 lock = LOCK_UNLOCKED;
1163 }
1164
Nicolas Capens477314b2015-06-09 16:47:29 -04001165 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false)
1166 {
1167 resource = new Resource(0);
1168 hasParent = false;
1169 ownExternal = false;
1170 depth = max(1, depth);
1171
1172 external.buffer = pixels;
1173 external.width = width;
1174 external.height = height;
1175 external.depth = depth;
1176 external.format = format;
1177 external.bytes = bytes(external.format);
1178 external.pitchB = pitch;
Nicolas Capens0a8d3d12016-02-12 17:10:58 -05001179 external.pitchP = external.bytes ? pitch / external.bytes : 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001180 external.sliceB = slice;
Nicolas Capens0a8d3d12016-02-12 17:10:58 -05001181 external.sliceP = external.bytes ? slice / external.bytes : 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001182 external.lock = LOCK_UNLOCKED;
1183 external.dirty = true;
1184
1185 internal.buffer = 0;
1186 internal.width = width;
1187 internal.height = height;
1188 internal.depth = depth;
1189 internal.format = selectInternalFormat(format);
1190 internal.bytes = bytes(internal.format);
1191 internal.pitchB = pitchB(internal.width, internal.format, false);
1192 internal.pitchP = pitchP(internal.width, internal.format, false);
1193 internal.sliceB = sliceB(internal.width, internal.height, internal.format, false);
1194 internal.sliceP = sliceP(internal.width, internal.height, internal.format, false);
1195 internal.lock = LOCK_UNLOCKED;
1196 internal.dirty = false;
1197
1198 stencil.buffer = 0;
1199 stencil.width = width;
1200 stencil.height = height;
1201 stencil.depth = depth;
1202 stencil.format = FORMAT_S8;
1203 stencil.bytes = bytes(stencil.format);
1204 stencil.pitchB = pitchB(stencil.width, stencil.format, false);
1205 stencil.pitchP = pitchP(stencil.width, stencil.format, false);
1206 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, false);
1207 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, false);
1208 stencil.lock = LOCK_UNLOCKED;
1209 stencil.dirty = false;
1210
1211 dirtyMipmaps = true;
1212 paletteUsed = 0;
1213 }
1214
Nicolas Capensf3898612015-11-24 15:33:31 -05001215 Surface::Surface(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget)
John Bauman89401822014-05-06 15:04:28 -04001216 {
1217 resource = texture ? texture : new Resource(0);
John Bauman19bac1e2014-05-06 15:23:49 -04001218 hasParent = texture != 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001219 ownExternal = true;
John Bauman89401822014-05-06 15:04:28 -04001220 depth = max(1, depth);
1221
1222 external.buffer = 0;
1223 external.width = width;
1224 external.height = height;
1225 external.depth = depth;
1226 external.format = format;
1227 external.bytes = bytes(external.format);
1228 external.pitchB = pitchB(external.width, external.format, renderTarget && !texture);
1229 external.pitchP = pitchP(external.width, external.format, renderTarget && !texture);
1230 external.sliceB = sliceB(external.width, external.height, external.format, renderTarget && !texture);
1231 external.sliceP = sliceP(external.width, external.height, external.format, renderTarget && !texture);
1232 external.lock = LOCK_UNLOCKED;
1233 external.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001234
1235 internal.buffer = 0;
1236 internal.width = width;
1237 internal.height = height;
1238 internal.depth = depth;
1239 internal.format = selectInternalFormat(format);
1240 internal.bytes = bytes(internal.format);
Nicolas Capensf3898612015-11-24 15:33:31 -05001241 internal.pitchB = !pitchPprovided ? pitchB(internal.width, internal.format, renderTarget) : pitchPprovided * internal.bytes;
1242 internal.pitchP = !pitchPprovided ? pitchP(internal.width, internal.format, renderTarget) : pitchPprovided;
John Bauman89401822014-05-06 15:04:28 -04001243 internal.sliceB = sliceB(internal.width, internal.height, internal.format, renderTarget);
1244 internal.sliceP = sliceP(internal.width, internal.height, internal.format, renderTarget);
1245 internal.lock = LOCK_UNLOCKED;
1246 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001247
1248 stencil.buffer = 0;
1249 stencil.width = width;
1250 stencil.height = height;
1251 stencil.depth = depth;
1252 stencil.format = FORMAT_S8;
1253 stencil.bytes = bytes(stencil.format);
1254 stencil.pitchB = pitchB(stencil.width, stencil.format, renderTarget);
1255 stencil.pitchP = pitchP(stencil.width, stencil.format, renderTarget);
1256 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, renderTarget);
1257 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, renderTarget);
1258 stencil.lock = LOCK_UNLOCKED;
1259 stencil.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001260
1261 dirtyMipmaps = true;
John Bauman66b8ab22014-05-06 15:57:45 -04001262 paletteUsed = 0;
John Bauman89401822014-05-06 15:04:28 -04001263 }
1264
1265 Surface::~Surface()
1266 {
John Bauman8a4f6fc2014-05-06 15:26:18 -04001267 // Synchronize so we can deallocate the buffers below
1268 resource->lock(DESTRUCT);
1269 resource->unlock();
1270
John Bauman89401822014-05-06 15:04:28 -04001271 if(!hasParent)
1272 {
1273 resource->destruct();
1274 }
1275
Nicolas Capens477314b2015-06-09 16:47:29 -04001276 if(ownExternal)
1277 {
1278 deallocate(external.buffer);
1279 }
John Bauman89401822014-05-06 15:04:28 -04001280
1281 if(internal.buffer != external.buffer)
1282 {
1283 deallocate(internal.buffer);
1284 }
1285
1286 deallocate(stencil.buffer);
1287
1288 external.buffer = 0;
1289 internal.buffer = 0;
1290 stencil.buffer = 0;
1291 }
1292
John Bauman19bac1e2014-05-06 15:23:49 -04001293 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001294 {
1295 resource->lock(client);
1296
1297 if(!external.buffer)
1298 {
1299 if(internal.buffer && identicalFormats())
1300 {
1301 external.buffer = internal.buffer;
1302 }
1303 else
1304 {
1305 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.format);
1306 }
1307 }
1308
1309 if(internal.dirty)
1310 {
1311 if(lock != LOCK_DISCARD)
1312 {
1313 update(external, internal);
1314 }
John Bauman66b8ab22014-05-06 15:57:45 -04001315
1316 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001317 }
1318
1319 switch(lock)
1320 {
1321 case LOCK_READONLY:
1322 break;
1323 case LOCK_WRITEONLY:
1324 case LOCK_READWRITE:
1325 case LOCK_DISCARD:
1326 dirtyMipmaps = true;
1327 break;
1328 default:
1329 ASSERT(false);
1330 }
1331
John Bauman19bac1e2014-05-06 15:23:49 -04001332 return external.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001333 }
1334
1335 void Surface::unlockExternal()
1336 {
1337 resource->unlock();
1338
1339 external.unlockRect();
1340 }
1341
John Bauman19bac1e2014-05-06 15:23:49 -04001342 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001343 {
1344 if(lock != LOCK_UNLOCKED)
1345 {
1346 resource->lock(client);
1347 }
1348
1349 if(!internal.buffer)
1350 {
1351 if(external.buffer && identicalFormats())
1352 {
1353 internal.buffer = external.buffer;
1354 }
1355 else
1356 {
1357 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.format);
1358 }
1359 }
1360
1361 // FIXME: WHQL requires conversion to lower external precision and back
1362 if(logPrecision >= WHQL)
1363 {
1364 if(internal.dirty && renderTarget && internal.format != external.format)
1365 {
1366 if(lock != LOCK_DISCARD)
1367 {
1368 switch(external.format)
1369 {
1370 case FORMAT_R3G3B2:
1371 case FORMAT_A8R3G3B2:
1372 case FORMAT_A1R5G5B5:
1373 case FORMAT_A2R10G10B10:
1374 case FORMAT_A2B10G10R10:
1375 lockExternal(0, 0, 0, LOCK_READWRITE, client);
1376 unlockExternal();
1377 break;
1378 default:
1379 // Difference passes WHQL
1380 break;
1381 }
1382 }
1383 }
1384 }
1385
John Bauman66b8ab22014-05-06 15:57:45 -04001386 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID))
John Bauman89401822014-05-06 15:04:28 -04001387 {
1388 if(lock != LOCK_DISCARD)
1389 {
1390 update(internal, external);
1391 }
John Bauman89401822014-05-06 15:04:28 -04001392
John Bauman66b8ab22014-05-06 15:57:45 -04001393 external.dirty = false;
1394 paletteUsed = Surface::paletteID;
John Bauman89401822014-05-06 15:04:28 -04001395 }
1396
1397 switch(lock)
1398 {
1399 case LOCK_UNLOCKED:
1400 case LOCK_READONLY:
1401 break;
1402 case LOCK_WRITEONLY:
1403 case LOCK_READWRITE:
1404 case LOCK_DISCARD:
1405 dirtyMipmaps = true;
1406 break;
1407 default:
1408 ASSERT(false);
1409 }
1410
1411 if(lock == LOCK_READONLY && client == PUBLIC)
1412 {
1413 resolve();
1414 }
1415
John Bauman19bac1e2014-05-06 15:23:49 -04001416 return internal.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001417 }
1418
1419 void Surface::unlockInternal()
1420 {
1421 resource->unlock();
1422
1423 internal.unlockRect();
1424 }
1425
1426 void *Surface::lockStencil(int front, Accessor client)
1427 {
1428 resource->lock(client);
1429
1430 if(!stencil.buffer)
1431 {
1432 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.format);
1433 }
1434
John Bauman89401822014-05-06 15:04:28 -04001435 return stencil.lockRect(0, 0, front, LOCK_READWRITE); // FIXME
1436 }
1437
1438 void Surface::unlockStencil()
1439 {
1440 resource->unlock();
1441
1442 stencil.unlockRect();
1443 }
1444
1445 int Surface::bytes(Format format)
1446 {
1447 switch(format)
1448 {
1449 case FORMAT_NULL: return 0;
1450 case FORMAT_P8: return 1;
1451 case FORMAT_A8P8: return 2;
1452 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001453 case FORMAT_R8I: return 1;
John Bauman89401822014-05-06 15:04:28 -04001454 case FORMAT_R8: return 1;
1455 case FORMAT_R3G3B2: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001456 case FORMAT_R16I: return 2;
1457 case FORMAT_R16UI: return 2;
John Bauman89401822014-05-06 15:04:28 -04001458 case FORMAT_A8R3G3B2: return 2;
1459 case FORMAT_R5G6B5: return 2;
1460 case FORMAT_A1R5G5B5: return 2;
1461 case FORMAT_X1R5G5B5: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001462 case FORMAT_R5G5B5A1: return 2;
John Bauman89401822014-05-06 15:04:28 -04001463 case FORMAT_X4R4G4B4: return 2;
1464 case FORMAT_A4R4G4B4: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001465 case FORMAT_R4G4B4A4: return 2;
John Bauman89401822014-05-06 15:04:28 -04001466 case FORMAT_R8G8B8: return 3;
Nicolas Capens80594422015-06-09 16:42:56 -04001467 case FORMAT_B8G8R8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001468 case FORMAT_R32I: return 4;
1469 case FORMAT_R32UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001470 case FORMAT_X8R8G8B8: return 4;
1471 // case FORMAT_X8G8R8B8Q: return 4;
1472 case FORMAT_A8R8G8B8: return 4;
1473 // case FORMAT_A8G8R8B8Q: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001474 case FORMAT_X8B8G8R8I: return 4;
John Bauman89401822014-05-06 15:04:28 -04001475 case FORMAT_X8B8G8R8: return 4;
Alexis Hetu049a1872016-04-25 16:59:58 -04001476 case FORMAT_SRGB8_X8: return 4;
1477 case FORMAT_SRGB8_A8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001478 case FORMAT_A8B8G8R8I: return 4;
1479 case FORMAT_R8UI: return 1;
1480 case FORMAT_G8R8UI: return 2;
1481 case FORMAT_X8B8G8R8UI: return 4;
1482 case FORMAT_A8B8G8R8UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001483 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001484 case FORMAT_R8I_SNORM: return 1;
1485 case FORMAT_G8R8I_SNORM: return 2;
1486 case FORMAT_X8B8G8R8I_SNORM: return 4;
1487 case FORMAT_A8B8G8R8I_SNORM: return 4;
John Bauman89401822014-05-06 15:04:28 -04001488 case FORMAT_A2R10G10B10: return 4;
1489 case FORMAT_A2B10G10R10: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001490 case FORMAT_G8R8I: return 2;
John Bauman89401822014-05-06 15:04:28 -04001491 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001492 case FORMAT_G16R16I: return 4;
1493 case FORMAT_G16R16UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001494 case FORMAT_G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001495 case FORMAT_G32R32I: return 8;
1496 case FORMAT_G32R32UI: return 8;
1497 case FORMAT_X16B16G16R16I: return 8;
1498 case FORMAT_X16B16G16R16UI: return 8;
1499 case FORMAT_A16B16G16R16I: return 8;
1500 case FORMAT_A16B16G16R16UI: return 8;
John Bauman89401822014-05-06 15:04:28 -04001501 case FORMAT_A16B16G16R16: return 8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001502 case FORMAT_X32B32G32R32I: return 16;
1503 case FORMAT_X32B32G32R32UI: return 16;
1504 case FORMAT_A32B32G32R32I: return 16;
1505 case FORMAT_A32B32G32R32UI: return 16;
John Bauman89401822014-05-06 15:04:28 -04001506 // Compressed formats
1507 #if S3TC_SUPPORT
1508 case FORMAT_DXT1: return 2; // Column of four pixels
1509 case FORMAT_DXT3: return 4; // Column of four pixels
1510 case FORMAT_DXT5: return 4; // Column of four pixels
John Bauman66b8ab22014-05-06 15:57:45 -04001511 #endif
John Bauman89401822014-05-06 15:04:28 -04001512 case FORMAT_ATI1: return 2; // Column of four pixels
1513 case FORMAT_ATI2: return 4; // Column of four pixels
Nicolas Capens22658242014-11-29 00:31:41 -05001514 case FORMAT_ETC1: return 2; // Column of four pixels
Alexis Hetu460e41f2015-09-01 10:58:37 -04001515 case FORMAT_R11_EAC: return 2;
1516 case FORMAT_SIGNED_R11_EAC: return 2;
1517 case FORMAT_RG11_EAC: return 4;
1518 case FORMAT_SIGNED_RG11_EAC: return 4;
1519 case FORMAT_RGB8_ETC2: return 2;
1520 case FORMAT_SRGB8_ETC2: return 2;
1521 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1522 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1523 case FORMAT_RGBA8_ETC2_EAC: return 4;
1524 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4;
1525 case FORMAT_RGBA_ASTC_4x4_KHR:
1526 case FORMAT_RGBA_ASTC_5x4_KHR:
1527 case FORMAT_RGBA_ASTC_5x5_KHR:
1528 case FORMAT_RGBA_ASTC_6x5_KHR:
1529 case FORMAT_RGBA_ASTC_6x6_KHR:
1530 case FORMAT_RGBA_ASTC_8x5_KHR:
1531 case FORMAT_RGBA_ASTC_8x6_KHR:
1532 case FORMAT_RGBA_ASTC_8x8_KHR:
1533 case FORMAT_RGBA_ASTC_10x5_KHR:
1534 case FORMAT_RGBA_ASTC_10x6_KHR:
1535 case FORMAT_RGBA_ASTC_10x8_KHR:
1536 case FORMAT_RGBA_ASTC_10x10_KHR:
1537 case FORMAT_RGBA_ASTC_12x10_KHR:
1538 case FORMAT_RGBA_ASTC_12x12_KHR:
1539 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1540 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1541 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1542 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1543 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1544 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1545 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1546 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1547 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1548 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1549 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1550 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1551 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1552 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME
John Bauman89401822014-05-06 15:04:28 -04001553 // Bumpmap formats
1554 case FORMAT_V8U8: return 2;
1555 case FORMAT_L6V5U5: return 2;
1556 case FORMAT_Q8W8V8U8: return 4;
1557 case FORMAT_X8L8V8U8: return 4;
1558 case FORMAT_A2W10V10U10: return 4;
1559 case FORMAT_V16U16: return 4;
1560 case FORMAT_A16W16V16U16: return 8;
1561 case FORMAT_Q16W16V16U16: return 8;
1562 // Luminance formats
1563 case FORMAT_L8: return 1;
1564 case FORMAT_A4L4: return 1;
1565 case FORMAT_L16: return 2;
1566 case FORMAT_A8L8: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001567 case FORMAT_L16F: return 2;
1568 case FORMAT_A16L16F: return 4;
1569 case FORMAT_L32F: return 4;
1570 case FORMAT_A32L32F: return 8;
John Bauman89401822014-05-06 15:04:28 -04001571 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04001572 case FORMAT_A16F: return 2;
John Bauman89401822014-05-06 15:04:28 -04001573 case FORMAT_R16F: return 2;
1574 case FORMAT_G16R16F: return 4;
Nicolas Capens80594422015-06-09 16:42:56 -04001575 case FORMAT_B16G16R16F: return 6;
John Bauman89401822014-05-06 15:04:28 -04001576 case FORMAT_A16B16G16R16F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001577 case FORMAT_A32F: return 4;
John Bauman89401822014-05-06 15:04:28 -04001578 case FORMAT_R32F: return 4;
1579 case FORMAT_G32R32F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001580 case FORMAT_B32G32R32F: return 12;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04001581 case FORMAT_X32B32G32R32F: return 16;
John Bauman89401822014-05-06 15:04:28 -04001582 case FORMAT_A32B32G32R32F: return 16;
1583 // Depth/stencil formats
1584 case FORMAT_D16: return 2;
1585 case FORMAT_D32: return 4;
1586 case FORMAT_D24X8: return 4;
1587 case FORMAT_D24S8: return 4;
1588 case FORMAT_D24FS8: return 4;
1589 case FORMAT_D32F: return 4;
1590 case FORMAT_D32F_COMPLEMENTARY: return 4;
1591 case FORMAT_D32F_LOCKABLE: return 4;
John Bauman66b8ab22014-05-06 15:57:45 -04001592 case FORMAT_D32FS8_TEXTURE: return 4;
1593 case FORMAT_D32FS8_SHADOW: return 4;
1594 case FORMAT_DF24S8: return 4;
1595 case FORMAT_DF16S8: return 2;
John Bauman89401822014-05-06 15:04:28 -04001596 case FORMAT_INTZ: return 4;
1597 case FORMAT_S8: return 1;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001598 case FORMAT_YV12_BT601: return 1; // Y plane only
1599 case FORMAT_YV12_BT709: return 1; // Y plane only
1600 case FORMAT_YV12_JFIF: return 1; // Y plane only
John Bauman89401822014-05-06 15:04:28 -04001601 default:
1602 ASSERT(false);
1603 }
1604
1605 return 0;
1606 }
1607
1608 int Surface::pitchB(int width, Format format, bool target)
1609 {
1610 if(target || isDepth(format) || isStencil(format))
1611 {
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001612 width = align(width, 2);
John Bauman89401822014-05-06 15:04:28 -04001613 }
1614
1615 switch(format)
1616 {
1617 #if S3TC_SUPPORT
1618 case FORMAT_DXT1:
Nicolas Capens22658242014-11-29 00:31:41 -05001619 #endif
1620 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001621 case FORMAT_R11_EAC:
1622 case FORMAT_SIGNED_R11_EAC:
1623 case FORMAT_RGB8_ETC2:
1624 case FORMAT_SRGB8_ETC2:
1625 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1626 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04001627 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001628 case FORMAT_RG11_EAC:
1629 case FORMAT_SIGNED_RG11_EAC:
1630 case FORMAT_RGBA8_ETC2_EAC:
1631 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1632 case FORMAT_RGBA_ASTC_4x4_KHR:
1633 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1634 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
1635 case FORMAT_RGBA_ASTC_5x4_KHR:
1636 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1637 case FORMAT_RGBA_ASTC_5x5_KHR:
1638 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1639 return 16 * ((width + 4) / 5);
1640 case FORMAT_RGBA_ASTC_6x5_KHR:
1641 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1642 case FORMAT_RGBA_ASTC_6x6_KHR:
1643 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1644 return 16 * ((width + 5) / 6);
1645 case FORMAT_RGBA_ASTC_8x5_KHR:
1646 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1647 case FORMAT_RGBA_ASTC_8x6_KHR:
1648 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1649 case FORMAT_RGBA_ASTC_8x8_KHR:
1650 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1651 return 16 * ((width + 7) / 8);
1652 case FORMAT_RGBA_ASTC_10x5_KHR:
1653 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1654 case FORMAT_RGBA_ASTC_10x6_KHR:
1655 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1656 case FORMAT_RGBA_ASTC_10x8_KHR:
1657 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1658 case FORMAT_RGBA_ASTC_10x10_KHR:
1659 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1660 return 16 * ((width + 9) / 10);
1661 case FORMAT_RGBA_ASTC_12x10_KHR:
1662 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1663 case FORMAT_RGBA_ASTC_12x12_KHR:
1664 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1665 return 16 * ((width + 11) / 12);
Nicolas Capens22658242014-11-29 00:31:41 -05001666 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04001667 case FORMAT_DXT3:
1668 case FORMAT_DXT5:
1669 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
John Bauman66b8ab22014-05-06 15:57:45 -04001670 #endif
John Bauman89401822014-05-06 15:04:28 -04001671 case FORMAT_ATI1:
1672 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row
1673 case FORMAT_ATI2:
1674 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001675 case FORMAT_YV12_BT601:
1676 case FORMAT_YV12_BT709:
1677 case FORMAT_YV12_JFIF:
1678 return align(width, 16);
John Bauman89401822014-05-06 15:04:28 -04001679 default:
1680 return bytes(format) * width;
1681 }
1682 }
1683
1684 int Surface::pitchP(int width, Format format, bool target)
1685 {
1686 int B = bytes(format);
1687
1688 return B > 0 ? pitchB(width, format, target) / B : 0;
1689 }
1690
1691 int Surface::sliceB(int width, int height, Format format, bool target)
1692 {
1693 if(target || isDepth(format) || isStencil(format))
1694 {
1695 height = ((height + 1) & ~1);
1696 }
1697
1698 switch(format)
1699 {
1700 #if S3TC_SUPPORT
1701 case FORMAT_DXT1:
1702 case FORMAT_DXT3:
1703 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04001704 #endif
Nicolas Capens22658242014-11-29 00:31:41 -05001705 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001706 case FORMAT_R11_EAC:
1707 case FORMAT_SIGNED_R11_EAC:
1708 case FORMAT_RG11_EAC:
1709 case FORMAT_SIGNED_RG11_EAC:
1710 case FORMAT_RGB8_ETC2:
1711 case FORMAT_SRGB8_ETC2:
1712 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1713 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1714 case FORMAT_RGBA8_ETC2_EAC:
1715 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1716 case FORMAT_RGBA_ASTC_4x4_KHR:
1717 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1718 case FORMAT_RGBA_ASTC_5x4_KHR:
1719 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
Nicolas Capens22658242014-11-29 00:31:41 -05001720 return pitchB(width, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001721 case FORMAT_RGBA_ASTC_5x5_KHR:
1722 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1723 case FORMAT_RGBA_ASTC_6x5_KHR:
1724 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1725 case FORMAT_RGBA_ASTC_8x5_KHR:
1726 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1727 case FORMAT_RGBA_ASTC_10x5_KHR:
1728 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1729 return pitchB(width, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows
1730 case FORMAT_RGBA_ASTC_6x6_KHR:
1731 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1732 case FORMAT_RGBA_ASTC_8x6_KHR:
1733 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1734 case FORMAT_RGBA_ASTC_10x6_KHR:
1735 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1736 return pitchB(width, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows
1737 case FORMAT_RGBA_ASTC_8x8_KHR:
1738 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1739 case FORMAT_RGBA_ASTC_10x8_KHR:
1740 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1741 return pitchB(width, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows
1742 case FORMAT_RGBA_ASTC_10x10_KHR:
1743 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1744 case FORMAT_RGBA_ASTC_12x10_KHR:
1745 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1746 return pitchB(width, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows
1747 case FORMAT_RGBA_ASTC_12x12_KHR:
1748 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1749 return pitchB(width, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows
Nicolas Capens22658242014-11-29 00:31:41 -05001750 case FORMAT_ATI1:
1751 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04001752 default:
Nicolas Capens22658242014-11-29 00:31:41 -05001753 return pitchB(width, format, target) * height; // Pitch computed per row
John Bauman89401822014-05-06 15:04:28 -04001754 }
1755 }
1756
1757 int Surface::sliceP(int width, int height, Format format, bool target)
1758 {
1759 int B = bytes(format);
1760
1761 return B > 0 ? sliceB(width, height, format, target) / B : 0;
1762 }
1763
1764 void Surface::update(Buffer &destination, Buffer &source)
1765 {
1766 // ASSERT(source.lock != LOCK_UNLOCKED);
1767 // ASSERT(destination.lock != LOCK_UNLOCKED);
Nicolas Capensc39901e2016-03-21 16:37:44 -04001768
John Bauman89401822014-05-06 15:04:28 -04001769 if(destination.buffer != source.buffer)
1770 {
1771 ASSERT(source.dirty && !destination.dirty);
1772
1773 switch(source.format)
1774 {
1775 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001776 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format
1777 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format
1778 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format
1779 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format
1780 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format
1781 #if S3TC_SUPPORT
1782 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format
1783 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format
1784 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format
Nicolas Capens22658242014-11-29 00:31:41 -05001785 #endif
John Bauman89401822014-05-06 15:04:28 -04001786 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format
1787 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format
Alexis Hetu460e41f2015-09-01 10:58:37 -04001788 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format
1789 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format
1790 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format
1791 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format
Alexis Hetu0de50d42015-09-09 13:56:41 -04001792 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001793 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format
1794 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format
1795 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format
1796 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format
1797 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format
1798 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format
1799 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format
1800 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format
1801 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format
1802 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format
1803 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format
1804 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format
1805 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format
1806 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format
1807 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format
1808 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format
1809 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format
1810 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format
1811 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format
1812 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format
1813 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format
1814 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format
1815 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format
1816 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format
1817 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format
1818 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format
1819 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format
1820 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format
1821 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format
1822 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format
1823 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format
1824 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format
1825 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format
1826 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001827 default: genericUpdate(destination, source); break;
1828 }
1829 }
John Bauman89401822014-05-06 15:04:28 -04001830 }
1831
1832 void Surface::genericUpdate(Buffer &destination, Buffer &source)
1833 {
1834 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1835 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1836
1837 int depth = min(destination.depth, source.depth);
1838 int height = min(destination.height, source.height);
1839 int width = min(destination.width, source.width);
1840 int rowBytes = width * source.bytes;
1841
1842 for(int z = 0; z < depth; z++)
1843 {
1844 unsigned char *sourceRow = sourceSlice;
1845 unsigned char *destinationRow = destinationSlice;
1846
1847 for(int y = 0; y < height; y++)
1848 {
1849 if(source.format == destination.format)
1850 {
1851 memcpy(destinationRow, sourceRow, rowBytes);
1852 }
1853 else
1854 {
1855 unsigned char *sourceElement = sourceRow;
1856 unsigned char *destinationElement = destinationRow;
1857
1858 for(int x = 0; x < width; x++)
1859 {
1860 Color<float> color = source.read(sourceElement);
1861 destination.write(destinationElement, color);
1862
1863 sourceElement += source.bytes;
1864 destinationElement += destination.bytes;
1865 }
1866 }
1867
1868 sourceRow += source.pitchB;
1869 destinationRow += destination.pitchB;
1870 }
1871
1872 sourceSlice += source.sliceB;
1873 destinationSlice += destination.sliceB;
1874 }
1875 }
1876
1877 void Surface::decodeR8G8B8(Buffer &destination, const Buffer &source)
1878 {
1879 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1880 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1881
1882 for(int z = 0; z < destination.depth && z < source.depth; z++)
1883 {
1884 unsigned char *sourceRow = sourceSlice;
1885 unsigned char *destinationRow = destinationSlice;
1886
1887 for(int y = 0; y < destination.height && y < source.height; y++)
1888 {
1889 unsigned char *sourceElement = sourceRow;
1890 unsigned char *destinationElement = destinationRow;
1891
1892 for(int x = 0; x < destination.width && x < source.width; x++)
1893 {
1894 unsigned int b = sourceElement[0];
1895 unsigned int g = sourceElement[1];
1896 unsigned int r = sourceElement[2];
1897
1898 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0);
1899
1900 sourceElement += source.bytes;
1901 destinationElement += destination.bytes;
1902 }
1903
1904 sourceRow += source.pitchB;
1905 destinationRow += destination.pitchB;
1906 }
1907
1908 sourceSlice += source.sliceB;
1909 destinationSlice += destination.sliceB;
1910 }
1911 }
1912
John Bauman89401822014-05-06 15:04:28 -04001913 void Surface::decodeX1R5G5B5(Buffer &destination, const Buffer &source)
1914 {
1915 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1916 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1917
1918 for(int z = 0; z < destination.depth && z < source.depth; z++)
1919 {
1920 unsigned char *sourceRow = sourceSlice;
1921 unsigned char *destinationRow = destinationSlice;
1922
1923 for(int y = 0; y < destination.height && y < source.height; y++)
1924 {
1925 unsigned char *sourceElement = sourceRow;
1926 unsigned char *destinationElement = destinationRow;
1927
1928 for(int x = 0; x < destination.width && x < source.width; x++)
1929 {
1930 unsigned int xrgb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04001931
John Bauman89401822014-05-06 15:04:28 -04001932 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
1933 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
1934 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8);
1935
1936 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
1937
1938 sourceElement += source.bytes;
1939 destinationElement += destination.bytes;
1940 }
1941
1942 sourceRow += source.pitchB;
1943 destinationRow += destination.pitchB;
1944 }
1945
1946 sourceSlice += source.sliceB;
1947 destinationSlice += destination.sliceB;
1948 }
1949 }
1950
1951 void Surface::decodeA1R5G5B5(Buffer &destination, const Buffer &source)
1952 {
1953 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1954 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1955
1956 for(int z = 0; z < destination.depth && z < source.depth; z++)
1957 {
1958 unsigned char *sourceRow = sourceSlice;
1959 unsigned char *destinationRow = destinationSlice;
1960
1961 for(int y = 0; y < destination.height && y < source.height; y++)
1962 {
1963 unsigned char *sourceElement = sourceRow;
1964 unsigned char *destinationElement = destinationRow;
1965
1966 for(int x = 0; x < destination.width && x < source.width; x++)
1967 {
1968 unsigned int argb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04001969
John Bauman89401822014-05-06 15:04:28 -04001970 unsigned int a = (argb & 0x8000) * 130560;
1971 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
1972 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
1973 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8);
1974
1975 *(unsigned int*)destinationElement = a | r | g | b;
1976
1977 sourceElement += source.bytes;
1978 destinationElement += destination.bytes;
1979 }
1980
1981 sourceRow += source.pitchB;
1982 destinationRow += destination.pitchB;
1983 }
1984
1985 sourceSlice += source.sliceB;
1986 destinationSlice += destination.sliceB;
1987 }
1988 }
1989
1990 void Surface::decodeX4R4G4B4(Buffer &destination, const Buffer &source)
1991 {
1992 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1993 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1994
1995 for(int z = 0; z < destination.depth && z < source.depth; z++)
1996 {
1997 unsigned char *sourceRow = sourceSlice;
1998 unsigned char *destinationRow = destinationSlice;
1999
2000 for(int y = 0; y < destination.height && y < source.height; y++)
2001 {
2002 unsigned char *sourceElement = sourceRow;
2003 unsigned char *destinationElement = destinationRow;
2004
2005 for(int x = 0; x < destination.width && x < source.width; x++)
2006 {
2007 unsigned int xrgb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002008
John Bauman89401822014-05-06 15:04:28 -04002009 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000;
2010 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00;
2011 unsigned int b = (xrgb & 0x000F) * 0x00000011;
2012
2013 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
2014
2015 sourceElement += source.bytes;
2016 destinationElement += destination.bytes;
2017 }
2018
2019 sourceRow += source.pitchB;
2020 destinationRow += destination.pitchB;
2021 }
2022
2023 sourceSlice += source.sliceB;
2024 destinationSlice += destination.sliceB;
2025 }
2026 }
2027
2028 void Surface::decodeA4R4G4B4(Buffer &destination, const Buffer &source)
2029 {
2030 unsigned char *sourceSlice = (unsigned char*)source.buffer;
2031 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
2032
2033 for(int z = 0; z < destination.depth && z < source.depth; z++)
2034 {
2035 unsigned char *sourceRow = sourceSlice;
2036 unsigned char *destinationRow = destinationSlice;
2037
2038 for(int y = 0; y < destination.height && y < source.height; y++)
2039 {
2040 unsigned char *sourceElement = sourceRow;
2041 unsigned char *destinationElement = destinationRow;
2042
2043 for(int x = 0; x < destination.width && x < source.width; x++)
2044 {
2045 unsigned int argb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002046
John Bauman89401822014-05-06 15:04:28 -04002047 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000;
2048 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000;
2049 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00;
2050 unsigned int b = (argb & 0x000F) * 0x00000011;
2051
2052 *(unsigned int*)destinationElement = a | r | g | b;
2053
2054 sourceElement += source.bytes;
2055 destinationElement += destination.bytes;
2056 }
2057
2058 sourceRow += source.pitchB;
2059 destinationRow += destination.pitchB;
2060 }
2061
2062 sourceSlice += source.sliceB;
2063 destinationSlice += destination.sliceB;
2064 }
2065 }
2066
2067 void Surface::decodeP8(Buffer &destination, const Buffer &source)
2068 {
2069 unsigned char *sourceSlice = (unsigned char*)source.buffer;
2070 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
2071
2072 for(int z = 0; z < destination.depth && z < source.depth; z++)
2073 {
2074 unsigned char *sourceRow = sourceSlice;
2075 unsigned char *destinationRow = destinationSlice;
2076
2077 for(int y = 0; y < destination.height && y < source.height; y++)
2078 {
2079 unsigned char *sourceElement = sourceRow;
2080 unsigned char *destinationElement = destinationRow;
2081
2082 for(int x = 0; x < destination.width && x < source.width; x++)
2083 {
2084 unsigned int abgr = palette[*(unsigned char*)sourceElement];
2085
2086 unsigned int r = (abgr & 0x000000FF) << 16;
2087 unsigned int g = (abgr & 0x0000FF00) << 0;
2088 unsigned int b = (abgr & 0x00FF0000) >> 16;
2089 unsigned int a = (abgr & 0xFF000000) >> 0;
2090
2091 *(unsigned int*)destinationElement = a | r | g | b;
2092
2093 sourceElement += source.bytes;
2094 destinationElement += destination.bytes;
2095 }
2096
2097 sourceRow += source.pitchB;
2098 destinationRow += destination.pitchB;
2099 }
2100
2101 sourceSlice += source.sliceB;
2102 destinationSlice += destination.sliceB;
2103 }
2104 }
2105
2106#if S3TC_SUPPORT
2107 void Surface::decodeDXT1(Buffer &internal, const Buffer &external)
2108 {
2109 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002110 const DXT1 *source = (const DXT1*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002111
2112 for(int z = 0; z < external.depth; z++)
2113 {
2114 unsigned int *dest = destSlice;
2115
2116 for(int y = 0; y < external.height; y += 4)
2117 {
2118 for(int x = 0; x < external.width; x += 4)
2119 {
2120 Color<byte> c[4];
2121
2122 c[0] = source->c0;
2123 c[1] = source->c1;
2124
2125 if(source->c0 > source->c1) // No transparency
2126 {
2127 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2128 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2129 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2130 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2131 c[2].a = 0xFF;
2132
2133 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2134 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2135 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2136 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2137 c[3].a = 0xFF;
2138 }
2139 else // c3 transparent
2140 {
2141 // c2 = 1 / 2 * c0 + 1 / 2 * c1
2142 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2);
2143 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2);
2144 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2);
2145 c[2].a = 0xFF;
2146
2147 c[3].r = 0;
2148 c[3].g = 0;
2149 c[3].b = 0;
2150 c[3].a = 0;
2151 }
2152
2153 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2154 {
2155 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2156 {
2157 dest[(x + i) + (y + j) * internal.width] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4];
2158 }
2159 }
2160
2161 source++;
2162 }
2163 }
2164
2165 (byte*&)destSlice += internal.sliceB;
2166 }
2167 }
2168
2169 void Surface::decodeDXT3(Buffer &internal, const Buffer &external)
2170 {
2171 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002172 const DXT3 *source = (const DXT3*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002173
2174 for(int z = 0; z < external.depth; z++)
2175 {
2176 unsigned int *dest = destSlice;
2177
2178 for(int y = 0; y < external.height; y += 4)
2179 {
2180 for(int x = 0; x < external.width; x += 4)
2181 {
2182 Color<byte> c[4];
2183
2184 c[0] = source->c0;
2185 c[1] = source->c1;
2186
2187 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2188 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2189 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2190 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2191
2192 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2193 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2194 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2195 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2196
2197 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2198 {
2199 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2200 {
2201 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F;
2202 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24));
2203
2204 dest[(x + i) + (y + j) * internal.width] = color;
2205 }
2206 }
2207
2208 source++;
2209 }
2210 }
2211
2212 (byte*&)destSlice += internal.sliceB;
2213 }
2214 }
2215
2216 void Surface::decodeDXT5(Buffer &internal, const Buffer &external)
2217 {
2218 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002219 const DXT5 *source = (const DXT5*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002220
2221 for(int z = 0; z < external.depth; z++)
2222 {
2223 unsigned int *dest = destSlice;
2224
2225 for(int y = 0; y < external.height; y += 4)
2226 {
2227 for(int x = 0; x < external.width; x += 4)
2228 {
2229 Color<byte> c[4];
2230
2231 c[0] = source->c0;
2232 c[1] = source->c1;
2233
2234 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2235 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2236 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2237 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2238
2239 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2240 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2241 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2242 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2243
2244 byte a[8];
2245
2246 a[0] = source->a0;
2247 a[1] = source->a1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002248
John Bauman89401822014-05-06 15:04:28 -04002249 if(a[0] > a[1])
2250 {
2251 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7);
2252 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7);
2253 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7);
2254 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7);
2255 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7);
2256 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7);
2257 }
2258 else
2259 {
2260 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5);
2261 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5);
2262 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5);
2263 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5);
2264 a[6] = 0;
2265 a[7] = 0xFF;
2266 }
2267
2268 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2269 {
2270 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2271 {
2272 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24;
2273 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002274
John Bauman89401822014-05-06 15:04:28 -04002275 dest[(x + i) + (y + j) * internal.width] = color;
2276 }
2277 }
2278
2279 source++;
2280 }
2281 }
2282
2283 (byte*&)destSlice += internal.sliceB;
2284 }
2285 }
Nicolas Capens22658242014-11-29 00:31:41 -05002286#endif
John Bauman89401822014-05-06 15:04:28 -04002287
2288 void Surface::decodeATI1(Buffer &internal, const Buffer &external)
2289 {
2290 byte *destSlice = (byte*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002291 const ATI1 *source = (const ATI1*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002292
2293 for(int z = 0; z < external.depth; z++)
2294 {
2295 byte *dest = destSlice;
2296
2297 for(int y = 0; y < external.height; y += 4)
2298 {
2299 for(int x = 0; x < external.width; x += 4)
2300 {
2301 byte r[8];
2302
2303 r[0] = source->r0;
2304 r[1] = source->r1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002305
John Bauman89401822014-05-06 15:04:28 -04002306 if(r[0] > r[1])
2307 {
2308 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7);
2309 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7);
2310 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7);
2311 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7);
2312 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7);
2313 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7);
2314 }
2315 else
2316 {
2317 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5);
2318 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5);
2319 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5);
2320 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5);
2321 r[6] = 0;
2322 r[7] = 0xFF;
2323 }
2324
2325 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2326 {
2327 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2328 {
2329 dest[(x + i) + (y + j) * internal.width] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8];
2330 }
2331 }
2332
2333 source++;
2334 }
2335 }
2336
2337 destSlice += internal.sliceB;
2338 }
2339 }
2340
2341 void Surface::decodeATI2(Buffer &internal, const Buffer &external)
2342 {
2343 word *destSlice = (word*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002344 const ATI2 *source = (const ATI2*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002345
2346 for(int z = 0; z < external.depth; z++)
2347 {
2348 word *dest = destSlice;
2349
2350 for(int y = 0; y < external.height; y += 4)
2351 {
2352 for(int x = 0; x < external.width; x += 4)
2353 {
2354 byte X[8];
2355
2356 X[0] = source->x0;
2357 X[1] = source->x1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002358
John Bauman89401822014-05-06 15:04:28 -04002359 if(X[0] > X[1])
2360 {
2361 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7);
2362 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7);
2363 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7);
2364 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7);
2365 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7);
2366 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7);
2367 }
2368 else
2369 {
2370 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5);
2371 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5);
2372 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5);
2373 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5);
2374 X[6] = 0;
2375 X[7] = 0xFF;
2376 }
2377
2378 byte Y[8];
2379
2380 Y[0] = source->y0;
2381 Y[1] = source->y1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002382
John Bauman89401822014-05-06 15:04:28 -04002383 if(Y[0] > Y[1])
2384 {
2385 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7);
2386 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7);
2387 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7);
2388 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7);
2389 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7);
2390 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7);
2391 }
2392 else
2393 {
2394 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5);
2395 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5);
2396 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5);
2397 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5);
2398 Y[6] = 0;
2399 Y[7] = 0xFF;
2400 }
2401
2402 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2403 {
2404 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2405 {
2406 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8];
2407 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8];
2408
2409 dest[(x + i) + (y + j) * internal.width] = (g << 8) + r;
2410 }
2411 }
2412
2413 source++;
2414 }
2415 }
2416
2417 (byte*&)destSlice += internal.sliceB;
2418 }
2419 }
Nicolas Capens22658242014-11-29 00:31:41 -05002420
Alexis Hetu0de50d42015-09-09 13:56:41 -04002421 void Surface::decodeETC2(Buffer &internal, const Buffer &external, int nbAlphaBits, bool isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002422 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002423 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
2424 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB));
Nicolas Capens22658242014-11-29 00:31:41 -05002425
Alexis Hetu0de50d42015-09-09 13:56:41 -04002426 if(isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002427 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002428 static byte sRGBtoLinearTable[256];
2429 static bool sRGBtoLinearTableDirty = true;
2430 if(sRGBtoLinearTableDirty)
Nicolas Capens22658242014-11-29 00:31:41 -05002431 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002432 for(int i = 0; i < 256; i++)
Nicolas Capens22658242014-11-29 00:31:41 -05002433 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002434 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f);
Nicolas Capens22658242014-11-29 00:31:41 -05002435 }
Alexis Hetu0de50d42015-09-09 13:56:41 -04002436 sRGBtoLinearTableDirty = false;
Nicolas Capens22658242014-11-29 00:31:41 -05002437 }
2438
Alexis Hetu0de50d42015-09-09 13:56:41 -04002439 // Perform sRGB conversion in place after decoding
2440 byte* src = (byte*)internal.buffer;
2441 for(int y = 0; y < internal.height; y++)
2442 {
2443 byte* srcRow = src + y * internal.pitchB;
2444 for(int x = 0; x < internal.width; x++)
2445 {
2446 byte* srcPix = srcRow + x * internal.bytes;
2447 for(int i = 0; i < 3; i++)
2448 {
2449 srcPix[i] = sRGBtoLinearTable[srcPix[i]];
2450 }
2451 }
2452 }
Nicolas Capens22658242014-11-29 00:31:41 -05002453 }
2454 }
John Bauman89401822014-05-06 15:04:28 -04002455
Alexis Hetu460e41f2015-09-01 10:58:37 -04002456 void Surface::decodeEAC(Buffer &internal, const Buffer &external, int nbChannels, bool isSigned)
2457 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002458 ASSERT(nbChannels == 1 || nbChannels == 2);
Alexis Hetu460e41f2015-09-01 10:58:37 -04002459
Alexis Hetu0de50d42015-09-09 13:56:41 -04002460 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
2461 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED));
2462
2463 // FIXME: We convert signed data to float, until signed integer internal formats are supported
2464 // This code can be removed if signed ETC2 images are decoded to internal 8 bit signed R/RG formats
2465 if(isSigned)
2466 {
2467 sbyte* src = (sbyte*)internal.buffer;
2468
2469 for(int y = 0; y < internal.height; y++)
2470 {
2471 sbyte* srcRow = src + y * internal.pitchB;
2472 for(int x = internal.width - 1; x >= 0; x--)
2473 {
2474 int dx = x & 0xFFFFFFFC;
2475 int mx = x - dx;
2476 sbyte* srcPix = srcRow + dx * internal.bytes + mx * nbChannels;
2477 float* dstPix = (float*)(srcRow + x * internal.bytes);
2478 for(int c = nbChannels - 1; c >= 0; c--)
2479 {
2480 static const float normalization = 1.0f / 127.875f;
2481 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f);
2482 }
2483 }
2484 }
2485 }
Alexis Hetu460e41f2015-09-01 10:58:37 -04002486 }
2487
2488 void Surface::decodeASTC(Buffer &internal, const Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB)
2489 {
2490 }
2491
John Bauman89401822014-05-06 15:04:28 -04002492 unsigned int Surface::size(int width, int height, int depth, Format format)
2493 {
Nicolas Capens00555c42015-07-21 15:15:30 -04002494 // Dimensions rounded up to multiples of 4, used for compressed formats
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002495 int width4 = align(width, 4);
2496 int height4 = align(height, 4);
John Bauman89401822014-05-06 15:04:28 -04002497
2498 switch(format)
2499 {
2500 #if S3TC_SUPPORT
2501 case FORMAT_DXT1:
John Bauman66b8ab22014-05-06 15:57:45 -04002502 #endif
John Bauman89401822014-05-06 15:04:28 -04002503 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05002504 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002505 case FORMAT_R11_EAC:
2506 case FORMAT_SIGNED_R11_EAC:
2507 case FORMAT_RGB8_ETC2:
2508 case FORMAT_SRGB8_ETC2:
2509 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2510 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04002511 return width4 * height4 * depth / 2;
John Bauman66b8ab22014-05-06 15:57:45 -04002512 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04002513 case FORMAT_DXT3:
2514 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002515 #endif
John Bauman89401822014-05-06 15:04:28 -04002516 case FORMAT_ATI2:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002517 case FORMAT_RG11_EAC:
2518 case FORMAT_SIGNED_RG11_EAC:
2519 case FORMAT_RGBA8_ETC2_EAC:
2520 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
2521 case FORMAT_RGBA_ASTC_4x4_KHR:
2522 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
John Bauman89401822014-05-06 15:04:28 -04002523 return width4 * height4 * depth;
Alexis Hetu460e41f2015-09-01 10:58:37 -04002524 case FORMAT_RGBA_ASTC_5x4_KHR:
2525 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
2526 return align(width, 5) * height4 * depth;
2527 case FORMAT_RGBA_ASTC_5x5_KHR:
2528 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
2529 return align(width, 5) * align(height, 5) * depth;
2530 case FORMAT_RGBA_ASTC_6x5_KHR:
2531 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
2532 return align(width, 6) * align(height, 5) * depth;
2533 case FORMAT_RGBA_ASTC_6x6_KHR:
2534 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
2535 return align(width, 6) * align(height, 6) * depth;
2536 case FORMAT_RGBA_ASTC_8x5_KHR:
2537 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
2538 return align(width, 8) * align(height, 5) * depth;
2539 case FORMAT_RGBA_ASTC_8x6_KHR:
2540 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
2541 return align(width, 8) * align(height, 6) * depth;
2542 case FORMAT_RGBA_ASTC_8x8_KHR:
2543 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
2544 return align(width, 8) * align(height, 8) * depth;
2545 case FORMAT_RGBA_ASTC_10x5_KHR:
2546 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
2547 return align(width, 10) * align(height, 5) * depth;
2548 case FORMAT_RGBA_ASTC_10x6_KHR:
2549 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
2550 return align(width, 10) * align(height, 6) * depth;
2551 case FORMAT_RGBA_ASTC_10x8_KHR:
2552 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
2553 return align(width, 10) * align(height, 8) * depth;
2554 case FORMAT_RGBA_ASTC_10x10_KHR:
2555 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
2556 return align(width, 10) * align(height, 10) * depth;
2557 case FORMAT_RGBA_ASTC_12x10_KHR:
2558 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
2559 return align(width, 12) * align(height, 10) * depth;
2560 case FORMAT_RGBA_ASTC_12x12_KHR:
2561 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
2562 return align(width, 12) * align(height, 12) * depth;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002563 case FORMAT_YV12_BT601:
2564 case FORMAT_YV12_BT709:
2565 case FORMAT_YV12_JFIF:
2566 {
2567 unsigned int YStride = align(width, 16);
2568 unsigned int YSize = YStride * height;
2569 unsigned int CStride = align(YStride / 2, 16);
2570 unsigned int CSize = CStride * height / 2;
2571
2572 return YSize + 2 * CSize;
2573 }
John Bauman89401822014-05-06 15:04:28 -04002574 default:
2575 return bytes(format) * width * height * depth;
2576 }
2577
2578 return 0;
2579 }
2580
2581 bool Surface::isStencil(Format format)
2582 {
2583 switch(format)
2584 {
2585 case FORMAT_D32:
2586 case FORMAT_D16:
2587 case FORMAT_D24X8:
2588 case FORMAT_D32F:
2589 case FORMAT_D32F_COMPLEMENTARY:
2590 case FORMAT_D32F_LOCKABLE:
2591 return false;
2592 case FORMAT_D24S8:
2593 case FORMAT_D24FS8:
2594 case FORMAT_S8:
John Bauman66b8ab22014-05-06 15:57:45 -04002595 case FORMAT_DF24S8:
2596 case FORMAT_DF16S8:
2597 case FORMAT_D32FS8_TEXTURE:
2598 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002599 case FORMAT_INTZ:
2600 return true;
2601 default:
2602 return false;
2603 }
2604 }
2605
2606 bool Surface::isDepth(Format format)
2607 {
2608 switch(format)
2609 {
2610 case FORMAT_D32:
2611 case FORMAT_D16:
2612 case FORMAT_D24X8:
2613 case FORMAT_D24S8:
2614 case FORMAT_D24FS8:
2615 case FORMAT_D32F:
2616 case FORMAT_D32F_COMPLEMENTARY:
2617 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002618 case FORMAT_DF24S8:
2619 case FORMAT_DF16S8:
2620 case FORMAT_D32FS8_TEXTURE:
2621 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002622 case FORMAT_INTZ:
2623 return true;
2624 case FORMAT_S8:
2625 return false;
2626 default:
2627 return false;
2628 }
2629 }
2630
2631 bool Surface::isPalette(Format format)
2632 {
2633 switch(format)
2634 {
2635 case FORMAT_P8:
2636 case FORMAT_A8P8:
2637 return true;
2638 default:
2639 return false;
2640 }
2641 }
2642
2643 bool Surface::isFloatFormat(Format format)
2644 {
2645 switch(format)
2646 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002647 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002648 case FORMAT_R8G8B8:
2649 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002650 case FORMAT_X8R8G8B8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002651 case FORMAT_X8B8G8R8I:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002652 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002653 case FORMAT_A8R8G8B8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002654 case FORMAT_SRGB8_X8:
2655 case FORMAT_SRGB8_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002656 case FORMAT_A8B8G8R8I:
2657 case FORMAT_R8UI:
2658 case FORMAT_G8R8UI:
2659 case FORMAT_X8B8G8R8UI:
2660 case FORMAT_A8B8G8R8UI:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002661 case FORMAT_A8B8G8R8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002662 case FORMAT_G8R8I:
John Bauman89401822014-05-06 15:04:28 -04002663 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002664 case FORMAT_A2B10G10R10:
Alexis Hetu43577b82015-10-21 15:32:16 -04002665 case FORMAT_R8I_SNORM:
2666 case FORMAT_G8R8I_SNORM:
2667 case FORMAT_X8B8G8R8I_SNORM:
2668 case FORMAT_A8B8G8R8I_SNORM:
2669 case FORMAT_R16I:
2670 case FORMAT_R16UI:
2671 case FORMAT_G16R16I:
2672 case FORMAT_G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002673 case FORMAT_G16R16:
Alexis Hetu43577b82015-10-21 15:32:16 -04002674 case FORMAT_X16B16G16R16I:
2675 case FORMAT_X16B16G16R16UI:
2676 case FORMAT_A16B16G16R16I:
2677 case FORMAT_A16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002678 case FORMAT_A16B16G16R16:
2679 case FORMAT_V8U8:
2680 case FORMAT_Q8W8V8U8:
2681 case FORMAT_X8L8V8U8:
2682 case FORMAT_V16U16:
2683 case FORMAT_A16W16V16U16:
2684 case FORMAT_Q16W16V16U16:
2685 case FORMAT_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002686 case FORMAT_R8I:
John Bauman89401822014-05-06 15:04:28 -04002687 case FORMAT_R8:
2688 case FORMAT_L8:
2689 case FORMAT_L16:
2690 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002691 case FORMAT_YV12_BT601:
2692 case FORMAT_YV12_BT709:
2693 case FORMAT_YV12_JFIF:
Alexis Hetu43577b82015-10-21 15:32:16 -04002694 case FORMAT_R32I:
2695 case FORMAT_R32UI:
2696 case FORMAT_G32R32I:
2697 case FORMAT_G32R32UI:
2698 case FORMAT_X32B32G32R32I:
2699 case FORMAT_X32B32G32R32UI:
2700 case FORMAT_A32B32G32R32I:
2701 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002702 return false;
2703 case FORMAT_R32F:
2704 case FORMAT_G32R32F:
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002705 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002706 case FORMAT_A32B32G32R32F:
2707 case FORMAT_D32F:
2708 case FORMAT_D32F_COMPLEMENTARY:
2709 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002710 case FORMAT_D32FS8_TEXTURE:
2711 case FORMAT_D32FS8_SHADOW:
Nicolas Capens80594422015-06-09 16:42:56 -04002712 case FORMAT_L16F:
2713 case FORMAT_A16L16F:
2714 case FORMAT_L32F:
2715 case FORMAT_A32L32F:
John Bauman89401822014-05-06 15:04:28 -04002716 return true;
2717 default:
2718 ASSERT(false);
2719 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04002720
John Bauman89401822014-05-06 15:04:28 -04002721 return false;
2722 }
2723
2724 bool Surface::isUnsignedComponent(Format format, int component)
2725 {
2726 switch(format)
2727 {
2728 case FORMAT_NULL:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002729 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002730 case FORMAT_R8G8B8:
2731 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002732 case FORMAT_X8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002733 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002734 case FORMAT_A8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002735 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002736 case FORMAT_SRGB8_X8:
2737 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002738 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002739 case FORMAT_A2B10G10R10:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002740 case FORMAT_R16UI:
John Bauman89401822014-05-06 15:04:28 -04002741 case FORMAT_G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002742 case FORMAT_G16R16UI:
2743 case FORMAT_X16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002744 case FORMAT_A16B16G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002745 case FORMAT_A16B16G16R16UI:
2746 case FORMAT_R32UI:
2747 case FORMAT_G32R32UI:
2748 case FORMAT_X32B32G32R32UI:
2749 case FORMAT_A32B32G32R32UI:
2750 case FORMAT_R8UI:
2751 case FORMAT_G8R8UI:
2752 case FORMAT_X8B8G8R8UI:
2753 case FORMAT_A8B8G8R8UI:
John Bauman89401822014-05-06 15:04:28 -04002754 case FORMAT_D32F:
2755 case FORMAT_D32F_COMPLEMENTARY:
2756 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002757 case FORMAT_D32FS8_TEXTURE:
2758 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002759 case FORMAT_A8:
2760 case FORMAT_R8:
2761 case FORMAT_L8:
2762 case FORMAT_L16:
2763 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002764 case FORMAT_YV12_BT601:
2765 case FORMAT_YV12_BT709:
2766 case FORMAT_YV12_JFIF:
John Bauman89401822014-05-06 15:04:28 -04002767 return true;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002768 case FORMAT_A8B8G8R8I:
2769 case FORMAT_A16B16G16R16I:
2770 case FORMAT_A32B32G32R32I:
2771 case FORMAT_A8B8G8R8I_SNORM:
2772 case FORMAT_Q8W8V8U8:
2773 case FORMAT_Q16W16V16U16:
2774 case FORMAT_A32B32G32R32F:
2775 return false;
2776 case FORMAT_R32F:
2777 case FORMAT_R8I:
2778 case FORMAT_R16I:
2779 case FORMAT_R32I:
2780 case FORMAT_R8I_SNORM:
2781 return component >= 1;
John Bauman89401822014-05-06 15:04:28 -04002782 case FORMAT_V8U8:
2783 case FORMAT_X8L8V8U8:
2784 case FORMAT_V16U16:
John Bauman89401822014-05-06 15:04:28 -04002785 case FORMAT_G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002786 case FORMAT_G8R8I:
2787 case FORMAT_G16R16I:
2788 case FORMAT_G32R32I:
2789 case FORMAT_G8R8I_SNORM:
2790 return component >= 2;
2791 case FORMAT_A16W16V16U16:
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002792 case FORMAT_X32B32G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002793 case FORMAT_X8B8G8R8I:
2794 case FORMAT_X16B16G16R16I:
2795 case FORMAT_X32B32G32R32I:
2796 case FORMAT_X8B8G8R8I_SNORM:
2797 return component >= 3;
John Bauman89401822014-05-06 15:04:28 -04002798 default:
2799 ASSERT(false);
2800 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04002801
John Bauman89401822014-05-06 15:04:28 -04002802 return false;
2803 }
2804
2805 bool Surface::isSRGBreadable(Format format)
2806 {
2807 // Keep in sync with Capabilities::isSRGBreadable
2808 switch(format)
2809 {
2810 case FORMAT_L8:
2811 case FORMAT_A8L8:
2812 case FORMAT_R8G8B8:
2813 case FORMAT_A8R8G8B8:
2814 case FORMAT_X8R8G8B8:
2815 case FORMAT_A8B8G8R8:
2816 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002817 case FORMAT_SRGB8_X8:
2818 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002819 case FORMAT_R5G6B5:
2820 case FORMAT_X1R5G5B5:
2821 case FORMAT_A1R5G5B5:
2822 case FORMAT_A4R4G4B4:
2823 #if S3TC_SUPPORT
2824 case FORMAT_DXT1:
2825 case FORMAT_DXT3:
2826 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002827 #endif
John Bauman89401822014-05-06 15:04:28 -04002828 case FORMAT_ATI1:
2829 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04002830 return true;
2831 default:
2832 return false;
2833 }
2834
2835 return false;
2836 }
2837
2838 bool Surface::isSRGBwritable(Format format)
2839 {
2840 // Keep in sync with Capabilities::isSRGBwritable
2841 switch(format)
2842 {
2843 case FORMAT_NULL:
2844 case FORMAT_A8R8G8B8:
2845 case FORMAT_X8R8G8B8:
2846 case FORMAT_A8B8G8R8:
2847 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002848 case FORMAT_SRGB8_X8:
2849 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002850 case FORMAT_R5G6B5:
2851 return true;
2852 default:
2853 return false;
2854 }
2855 }
2856
2857 bool Surface::isCompressed(Format format)
2858 {
2859 switch(format)
2860 {
2861 #if S3TC_SUPPORT
2862 case FORMAT_DXT1:
2863 case FORMAT_DXT3:
2864 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002865 #endif
John Bauman89401822014-05-06 15:04:28 -04002866 case FORMAT_ATI1:
2867 case FORMAT_ATI2:
Nicolas Capens22658242014-11-29 00:31:41 -05002868 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002869 case FORMAT_R11_EAC:
2870 case FORMAT_SIGNED_R11_EAC:
2871 case FORMAT_RG11_EAC:
2872 case FORMAT_SIGNED_RG11_EAC:
2873 case FORMAT_RGB8_ETC2:
2874 case FORMAT_SRGB8_ETC2:
2875 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2876 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2877 case FORMAT_RGBA8_ETC2_EAC:
2878 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
2879 case FORMAT_RGBA_ASTC_4x4_KHR:
2880 case FORMAT_RGBA_ASTC_5x4_KHR:
2881 case FORMAT_RGBA_ASTC_5x5_KHR:
2882 case FORMAT_RGBA_ASTC_6x5_KHR:
2883 case FORMAT_RGBA_ASTC_6x6_KHR:
2884 case FORMAT_RGBA_ASTC_8x5_KHR:
2885 case FORMAT_RGBA_ASTC_8x6_KHR:
2886 case FORMAT_RGBA_ASTC_8x8_KHR:
2887 case FORMAT_RGBA_ASTC_10x5_KHR:
2888 case FORMAT_RGBA_ASTC_10x6_KHR:
2889 case FORMAT_RGBA_ASTC_10x8_KHR:
2890 case FORMAT_RGBA_ASTC_10x10_KHR:
2891 case FORMAT_RGBA_ASTC_12x10_KHR:
2892 case FORMAT_RGBA_ASTC_12x12_KHR:
2893 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
2894 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
2895 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
2896 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
2897 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
2898 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
2899 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
2900 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
2901 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
2902 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
2903 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
2904 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
2905 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
2906 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
John Bauman89401822014-05-06 15:04:28 -04002907 return true;
John Bauman89401822014-05-06 15:04:28 -04002908 default:
2909 return false;
2910 }
2911 }
2912
Alexis Hetu43577b82015-10-21 15:32:16 -04002913 bool Surface::isNonNormalizedInteger(Format format)
2914 {
2915 switch(format)
2916 {
2917 case FORMAT_A8B8G8R8I:
2918 case FORMAT_X8B8G8R8I:
2919 case FORMAT_G8R8I:
2920 case FORMAT_R8I:
2921 case FORMAT_A8B8G8R8UI:
2922 case FORMAT_X8B8G8R8UI:
2923 case FORMAT_G8R8UI:
2924 case FORMAT_R8UI:
2925 case FORMAT_A16B16G16R16I:
2926 case FORMAT_X16B16G16R16I:
2927 case FORMAT_G16R16I:
2928 case FORMAT_R16I:
2929 case FORMAT_A16B16G16R16UI:
2930 case FORMAT_X16B16G16R16UI:
2931 case FORMAT_G16R16UI:
2932 case FORMAT_R16UI:
2933 case FORMAT_A32B32G32R32I:
2934 case FORMAT_X32B32G32R32I:
2935 case FORMAT_G32R32I:
2936 case FORMAT_R32I:
2937 case FORMAT_A32B32G32R32UI:
2938 case FORMAT_X32B32G32R32UI:
2939 case FORMAT_G32R32UI:
2940 case FORMAT_R32UI:
2941 return true;
2942 default:
2943 return false;
2944 }
2945 }
2946
John Bauman89401822014-05-06 15:04:28 -04002947 int Surface::componentCount(Format format)
2948 {
2949 switch(format)
2950 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002951 case FORMAT_R5G6B5: return 3;
2952 case FORMAT_X8R8G8B8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002953 case FORMAT_X8B8G8R8I: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002954 case FORMAT_X8B8G8R8: return 3;
2955 case FORMAT_A8R8G8B8: return 4;
Alexis Hetu049a1872016-04-25 16:59:58 -04002956 case FORMAT_SRGB8_X8: return 3;
2957 case FORMAT_SRGB8_A8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002958 case FORMAT_A8B8G8R8I: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002959 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002960 case FORMAT_G8R8I: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002961 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002962 case FORMAT_R8I_SNORM: return 1;
2963 case FORMAT_G8R8I_SNORM: return 2;
2964 case FORMAT_X8B8G8R8I_SNORM:return 3;
2965 case FORMAT_A8B8G8R8I_SNORM:return 4;
2966 case FORMAT_R8UI: return 1;
2967 case FORMAT_G8R8UI: return 2;
2968 case FORMAT_X8B8G8R8UI: return 3;
2969 case FORMAT_A8B8G8R8UI: return 4;
Alexis Hetuf999a002015-12-17 11:09:36 -05002970 case FORMAT_A2B10G10R10: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002971 case FORMAT_G16R16I: return 2;
2972 case FORMAT_G16R16UI: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002973 case FORMAT_G16R16: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002974 case FORMAT_G32R32I: return 2;
2975 case FORMAT_G32R32UI: return 2;
2976 case FORMAT_X16B16G16R16I: return 3;
2977 case FORMAT_X16B16G16R16UI: return 3;
2978 case FORMAT_A16B16G16R16I: return 4;
2979 case FORMAT_A16B16G16R16UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002980 case FORMAT_A16B16G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002981 case FORMAT_X32B32G32R32I: return 3;
2982 case FORMAT_X32B32G32R32UI: return 3;
2983 case FORMAT_A32B32G32R32I: return 4;
2984 case FORMAT_A32B32G32R32UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002985 case FORMAT_V8U8: return 2;
2986 case FORMAT_Q8W8V8U8: return 4;
2987 case FORMAT_X8L8V8U8: return 3;
2988 case FORMAT_V16U16: return 2;
2989 case FORMAT_A16W16V16U16: return 4;
2990 case FORMAT_Q16W16V16U16: return 4;
2991 case FORMAT_R32F: return 1;
2992 case FORMAT_G32R32F: return 2;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002993 case FORMAT_X32B32G32R32F: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002994 case FORMAT_A32B32G32R32F: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002995 case FORMAT_D32F: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002996 case FORMAT_D32F_LOCKABLE: return 1;
2997 case FORMAT_D32FS8_TEXTURE: return 1;
2998 case FORMAT_D32FS8_SHADOW: return 1;
2999 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003000 case FORMAT_R8I: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003001 case FORMAT_R8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003002 case FORMAT_R16I: return 1;
3003 case FORMAT_R16UI: return 1;
3004 case FORMAT_R32I: return 1;
3005 case FORMAT_R32UI: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003006 case FORMAT_L8: return 1;
3007 case FORMAT_L16: return 1;
3008 case FORMAT_A8L8: return 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003009 case FORMAT_YV12_BT601: return 3;
3010 case FORMAT_YV12_BT709: return 3;
3011 case FORMAT_YV12_JFIF: return 3;
John Bauman89401822014-05-06 15:04:28 -04003012 default:
3013 ASSERT(false);
3014 }
3015
3016 return 1;
3017 }
3018
3019 void *Surface::allocateBuffer(int width, int height, int depth, Format format)
3020 {
Nicolas Capensdb17b5d2015-06-26 11:15:58 -04003021 // Render targets require 2x2 quads
3022 int width2 = (width + 1) & ~1;
3023 int height2 = (height + 1) & ~1;
John Bauman89401822014-05-06 15:04:28 -04003024
Nicolas Capens6ea71872015-06-26 13:00:48 -04003025 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
3026 // so we have to allocate 4 extra bytes to avoid buffer overruns.
3027 return allocateZero(size(width2, height2, depth, format) + 4);
John Bauman89401822014-05-06 15:04:28 -04003028 }
3029
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003030 void Surface::memfill4(void *buffer, int pattern, int bytes)
John Bauman89401822014-05-06 15:04:28 -04003031 {
3032 while((size_t)buffer & 0x1 && bytes >= 1)
3033 {
3034 *(char*)buffer = (char)pattern;
3035 (char*&)buffer += 1;
3036 bytes -= 1;
3037 }
3038
3039 while((size_t)buffer & 0x3 && bytes >= 2)
3040 {
3041 *(short*)buffer = (short)pattern;
3042 (short*&)buffer += 1;
3043 bytes -= 2;
3044 }
3045
3046 if(CPUID::supportsSSE())
3047 {
3048 while((size_t)buffer & 0xF && bytes >= 4)
3049 {
3050 *(int*)buffer = pattern;
3051 (int*&)buffer += 1;
3052 bytes -= 4;
3053 }
3054
3055 __m128 quad = _mm_set_ps1((float&)pattern);
Nicolas Capensc39901e2016-03-21 16:37:44 -04003056
John Bauman89401822014-05-06 15:04:28 -04003057 float *pointer = (float*)buffer;
3058 int qxwords = bytes / 64;
3059 bytes -= qxwords * 64;
3060
3061 while(qxwords--)
3062 {
3063 _mm_stream_ps(pointer + 0, quad);
3064 _mm_stream_ps(pointer + 4, quad);
3065 _mm_stream_ps(pointer + 8, quad);
3066 _mm_stream_ps(pointer + 12, quad);
3067
3068 pointer += 16;
3069 }
3070
3071 buffer = pointer;
3072 }
3073
3074 while(bytes >= 4)
3075 {
3076 *(int*)buffer = (int)pattern;
3077 (int*&)buffer += 1;
3078 bytes -= 4;
3079 }
3080
3081 while(bytes >= 2)
3082 {
3083 *(short*)buffer = (short)pattern;
3084 (short*&)buffer += 1;
3085 bytes -= 2;
3086 }
3087
3088 while(bytes >= 1)
3089 {
3090 *(char*)buffer = (char)pattern;
3091 (char*&)buffer += 1;
3092 bytes -= 1;
3093 }
3094 }
3095
Alexis Hetu75b650f2015-11-19 17:40:15 -05003096 bool Surface::isEntire(const SliceRect& rect) const
John Bauman89401822014-05-06 15:04:28 -04003097 {
Alexis Hetu75b650f2015-11-19 17:40:15 -05003098 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1);
3099 }
John Bauman89401822014-05-06 15:04:28 -04003100
Nicolas Capensc39901e2016-03-21 16:37:44 -04003101 SliceRect Surface::getRect() const
Alexis Hetu75b650f2015-11-19 17:40:15 -05003102 {
Nicolas Capensc39901e2016-03-21 16:37:44 -04003103 return SliceRect(0, 0, internal.width, internal.height, 0);
John Bauman89401822014-05-06 15:04:28 -04003104 }
3105
Nicolas Capensc39901e2016-03-21 16:37:44 -04003106 void Surface::clearDepth(float depth, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04003107 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003108 if(width == 0 || height == 0) return;
3109
John Bauman89401822014-05-06 15:04:28 -04003110 // Not overlapping
3111 if(x0 > internal.width) return;
3112 if(y0 > internal.height) return;
3113 if(x0 + width < 0) return;
3114 if(y0 + height < 0) return;
3115
3116 // Clip against dimensions
3117 if(x0 < 0) {width += x0; x0 = 0;}
3118 if(x0 + width > internal.width) width = internal.width - x0;
3119 if(y0 < 0) {height += y0; y0 = 0;}
3120 if(y0 + height > internal.height) height = internal.height - y0;
3121
3122 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height;
3123 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY;
3124
3125 int width2 = (internal.width + 1) & ~1;
3126
3127 int x1 = x0 + width;
3128 int y1 = y0 + height;
3129
3130 if(internal.format == FORMAT_D32F_LOCKABLE ||
John Bauman66b8ab22014-05-06 15:57:45 -04003131 internal.format == FORMAT_D32FS8_TEXTURE ||
3132 internal.format == FORMAT_D32FS8_SHADOW)
John Bauman89401822014-05-06 15:04:28 -04003133 {
3134 float *target = (float*)lockInternal(0, 0, 0, lock, PUBLIC) + x0 + width2 * y0;
3135
3136 for(int z = 0; z < internal.depth; z++)
3137 {
3138 for(int y = y0; y < y1; y++)
3139 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003140 memfill4(target, (int&)depth, 4 * width);
John Bauman89401822014-05-06 15:04:28 -04003141 target += width2;
3142 }
3143 }
3144
3145 unlockInternal();
3146 }
3147 else // Quad layout
3148 {
3149 if(complementaryDepthBuffer)
3150 {
3151 depth = 1 - depth;
3152 }
3153
3154 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC);
3155
Alexis Hetu358a1442015-12-03 14:23:10 -05003156 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3157 int oddX1 = (x1 & ~1) * 2;
3158 int evenX0 = ((x0 + 1) & ~1) * 2;
3159 int evenBytes = (oddX1 - evenX0) * sizeof(float);
3160
John Bauman89401822014-05-06 15:04:28 -04003161 for(int z = 0; z < internal.depth; z++)
3162 {
3163 for(int y = y0; y < y1; y++)
3164 {
3165 float *target = buffer + (y & ~1) * width2 + (y & 1) * 2;
Nicolas Capensc39901e2016-03-21 16:37:44 -04003166
John Bauman89401822014-05-06 15:04:28 -04003167 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once
3168 {
3169 if((x0 & 1) != 0)
3170 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003171 target[oddX0 + 0] = depth;
3172 target[oddX0 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003173 }
3174
Alexis Hetu358a1442015-12-03 14:23:10 -05003175 // for(int x2 = evenX0; x2 < x1 * 2; x2 += 4)
John Bauman89401822014-05-06 15:04:28 -04003176 // {
3177 // target[x2 + 0] = depth;
3178 // target[x2 + 1] = depth;
3179 // target[x2 + 2] = depth;
3180 // target[x2 + 3] = depth;
3181 // }
3182
3183 // __asm
3184 // {
3185 // movss xmm0, depth
3186 // shufps xmm0, xmm0, 0x00
3187 //
3188 // mov eax, x0
3189 // add eax, 1
3190 // and eax, 0xFFFFFFFE
3191 // cmp eax, x1
3192 // jge qEnd
3193 //
3194 // mov edi, target
3195 //
3196 // qLoop:
3197 // movntps [edi+8*eax], xmm0
3198 //
3199 // add eax, 2
3200 // cmp eax, x1
3201 // jl qLoop
3202 // qEnd:
3203 // }
3204
Alexis Hetu358a1442015-12-03 14:23:10 -05003205 memfill4(&target[evenX0], (int&)depth, evenBytes);
John Bauman89401822014-05-06 15:04:28 -04003206
3207 if((x1 & 1) != 0)
3208 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003209 target[oddX1 + 0] = depth;
3210 target[oddX1 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003211 }
3212
3213 y++;
3214 }
3215 else
3216 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003217 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1))
John Bauman89401822014-05-06 15:04:28 -04003218 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003219 target[i] = depth;
John Bauman89401822014-05-06 15:04:28 -04003220 }
3221 }
3222 }
3223
3224 buffer += internal.sliceP;
3225 }
3226
3227 unlockInternal();
3228 }
3229 }
3230
Nicolas Capensc39901e2016-03-21 16:37:44 -04003231 void Surface::clearStencil(unsigned char s, unsigned char mask, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04003232 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003233 if(mask == 0 || width == 0 || height == 0) return;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003234
John Bauman89401822014-05-06 15:04:28 -04003235 // Not overlapping
3236 if(x0 > internal.width) return;
3237 if(y0 > internal.height) return;
3238 if(x0 + width < 0) return;
3239 if(y0 + height < 0) return;
3240
3241 // Clip against dimensions
3242 if(x0 < 0) {width += x0; x0 = 0;}
3243 if(x0 + width > internal.width) width = internal.width - x0;
3244 if(y0 < 0) {height += y0; y0 = 0;}
3245 if(y0 + height > internal.height) height = internal.height - y0;
3246
3247 int width2 = (internal.width + 1) & ~1;
3248
3249 int x1 = x0 + width;
3250 int y1 = y0 + height;
3251
Alexis Hetu358a1442015-12-03 14:23:10 -05003252 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3253 int oddX1 = (x1 & ~1) * 2;
3254 int evenX0 = ((x0 + 1) & ~1) * 2;
3255 int evenBytes = oddX1 - evenX0;
3256
John Bauman89401822014-05-06 15:04:28 -04003257 unsigned char maskedS = s & mask;
3258 unsigned char invMask = ~mask;
3259 unsigned int fill = maskedS;
3260 fill = fill | (fill << 8) | (fill << 16) + (fill << 24);
3261
Alexis Hetu2b052f82015-11-25 13:57:28 -05003262 char *buffer = (char*)lockStencil(0, PUBLIC);
3263
3264 // Stencil buffers are assumed to use quad layout
3265 for(int z = 0; z < stencil.depth; z++)
John Bauman89401822014-05-06 15:04:28 -04003266 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003267 for(int y = y0; y < y1; y++)
John Bauman89401822014-05-06 15:04:28 -04003268 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003269 char *target = buffer + (y & ~1) * width2 + (y & 1) * 2;
3270
3271 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once
John Bauman89401822014-05-06 15:04:28 -04003272 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003273 if((x0 & 1) != 0)
John Bauman89401822014-05-06 15:04:28 -04003274 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003275 target[oddX0 + 0] = fill;
3276 target[oddX0 + 2] = fill;
John Bauman89401822014-05-06 15:04:28 -04003277 }
3278
Alexis Hetu358a1442015-12-03 14:23:10 -05003279 memfill4(&target[evenX0], fill, evenBytes);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003280
3281 if((x1 & 1) != 0)
3282 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003283 target[oddX1 + 0] = fill;
3284 target[oddX1 + 2] = fill;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003285 }
3286
3287 y++;
3288 }
3289 else
3290 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003291 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1))
Alexis Hetu2b052f82015-11-25 13:57:28 -05003292 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003293 target[i] = maskedS | (target[i] & invMask);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003294 }
John Bauman89401822014-05-06 15:04:28 -04003295 }
3296 }
3297
Alexis Hetu2b052f82015-11-25 13:57:28 -05003298 buffer += stencil.sliceP;
John Bauman89401822014-05-06 15:04:28 -04003299 }
John Bauman89401822014-05-06 15:04:28 -04003300
Alexis Hetu2b052f82015-11-25 13:57:28 -05003301 unlockStencil();
John Bauman89401822014-05-06 15:04:28 -04003302 }
3303
3304 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height)
3305 {
3306 unsigned char *row;
3307 Buffer *buffer;
Nicolas Capensc39901e2016-03-21 16:37:44 -04003308
John Bauman89401822014-05-06 15:04:28 -04003309 if(internal.dirty)
3310 {
3311 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3312 buffer = &internal;
3313 }
3314 else
3315 {
3316 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3317 buffer = &external;
3318 }
3319
3320 if(buffer->bytes <= 4)
3321 {
3322 int c;
3323 buffer->write(&c, color);
3324
3325 if(buffer->bytes <= 1) c = (c << 8) | c;
3326 if(buffer->bytes <= 2) c = (c << 16) | c;
3327
3328 for(int y = 0; y < height; y++)
3329 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003330 memfill4(row, c, width * buffer->bytes);
John Bauman89401822014-05-06 15:04:28 -04003331
3332 row += buffer->pitchB;
3333 }
3334 }
3335 else // Generic
3336 {
3337 for(int y = 0; y < height; y++)
3338 {
3339 unsigned char *element = row;
3340
3341 for(int x = 0; x < width; x++)
3342 {
3343 buffer->write(element, color);
3344
3345 element += buffer->bytes;
3346 }
3347
3348 row += buffer->pitchB;
3349 }
3350 }
3351
3352 if(buffer == &internal)
3353 {
3354 unlockInternal();
3355 }
3356 else
3357 {
3358 unlockExternal();
3359 }
3360 }
3361
Alexis Hetu43577b82015-10-21 15:32:16 -04003362 void Surface::copyInternal(const Surface* source, int x, int y, float srcX, float srcY, bool filter)
John Bauman89401822014-05-06 15:04:28 -04003363 {
Alexis Hetu43577b82015-10-21 15:32:16 -04003364 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
John Bauman89401822014-05-06 15:04:28 -04003365
Alexis Hetu43577b82015-10-21 15:32:16 -04003366 sw::Color<float> color;
John Bauman89401822014-05-06 15:04:28 -04003367
Alexis Hetu43577b82015-10-21 15:32:16 -04003368 if(!filter)
3369 {
3370 color = source->internal.read((int)srcX, (int)srcY);
3371 }
3372 else // Bilinear filtering
3373 {
3374 color = source->internal.sample(srcX, srcY);
3375 }
John Bauman89401822014-05-06 15:04:28 -04003376
3377 internal.write(x, y, color);
3378 }
3379
Alexis Hetu43577b82015-10-21 15:32:16 -04003380 void Surface::copyInternal(const Surface* source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter)
3381 {
3382 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
3383
3384 sw::Color<float> color;
3385
3386 if(!filter)
3387 {
3388 color = source->internal.read((int)srcX, (int)srcY, int(srcZ));
3389 }
3390 else // Bilinear filtering
3391 {
3392 color = source->internal.sample(srcX, srcY, srcZ);
3393 }
3394
3395 internal.write(x, y, z, color);
3396 }
3397
John Bauman89401822014-05-06 15:04:28 -04003398 bool Surface::hasStencil() const
3399 {
3400 return isStencil(external.format);
3401 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04003402
John Bauman89401822014-05-06 15:04:28 -04003403 bool Surface::hasDepth() const
3404 {
3405 return isDepth(external.format);
3406 }
3407
3408 bool Surface::hasPalette() const
3409 {
3410 return isPalette(external.format);
3411 }
3412
3413 bool Surface::isRenderTarget() const
3414 {
3415 return renderTarget;
3416 }
3417
3418 bool Surface::hasDirtyMipmaps() const
3419 {
3420 return dirtyMipmaps;
3421 }
3422
3423 void Surface::cleanMipmaps()
3424 {
3425 dirtyMipmaps = false;
3426 }
3427
3428 Resource *Surface::getResource()
3429 {
3430 return resource;
3431 }
3432
3433 bool Surface::identicalFormats() const
3434 {
John Bauman66b8ab22014-05-06 15:57:45 -04003435 return external.format == internal.format &&
3436 external.width == internal.width &&
Nicolas Capens22658242014-11-29 00:31:41 -05003437 external.height == internal.height &&
3438 external.depth == internal.depth &&
3439 external.pitchB == internal.pitchB &&
3440 external.sliceB == internal.sliceB;
John Bauman89401822014-05-06 15:04:28 -04003441 }
3442
3443 Format Surface::selectInternalFormat(Format format) const
3444 {
3445 switch(format)
3446 {
3447 case FORMAT_NULL:
3448 return FORMAT_NULL;
3449 case FORMAT_P8:
3450 case FORMAT_A8P8:
3451 case FORMAT_A4R4G4B4:
3452 case FORMAT_A1R5G5B5:
3453 case FORMAT_A8R3G3B2:
3454 return FORMAT_A8R8G8B8;
3455 case FORMAT_A8:
3456 return FORMAT_A8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003457 case FORMAT_R8I:
3458 return FORMAT_R8I;
3459 case FORMAT_R8UI:
3460 return FORMAT_R8UI;
3461 case FORMAT_R8I_SNORM:
3462 return FORMAT_R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003463 case FORMAT_R8:
3464 return FORMAT_R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003465 case FORMAT_R16I:
3466 return FORMAT_R16I;
3467 case FORMAT_R16UI:
3468 return FORMAT_R16UI;
3469 case FORMAT_R32I:
3470 return FORMAT_R32I;
3471 case FORMAT_R32UI:
3472 return FORMAT_R32UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003473 case FORMAT_X16B16G16R16I:
3474 case FORMAT_A16B16G16R16I:
3475 return FORMAT_A16B16G16R16I;
3476 case FORMAT_X16B16G16R16UI:
3477 case FORMAT_A16B16G16R16UI:
3478 return FORMAT_A16B16G16R16UI;
Alexis Hetuf999a002015-12-17 11:09:36 -05003479 case FORMAT_A2R10G10B10:
3480 case FORMAT_A2B10G10R10:
John Bauman89401822014-05-06 15:04:28 -04003481 case FORMAT_A16B16G16R16:
3482 return FORMAT_A16B16G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003483 case FORMAT_X32B32G32R32I:
3484 case FORMAT_A32B32G32R32I:
3485 return FORMAT_A32B32G32R32I;
3486 case FORMAT_X32B32G32R32UI:
3487 case FORMAT_A32B32G32R32UI:
3488 return FORMAT_A32B32G32R32UI;
3489 case FORMAT_G8R8I:
3490 return FORMAT_G8R8I;
3491 case FORMAT_G8R8UI:
3492 return FORMAT_G8R8UI;
3493 case FORMAT_G8R8I_SNORM:
3494 return FORMAT_G8R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003495 case FORMAT_G8R8:
3496 return FORMAT_G8R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003497 case FORMAT_G16R16I:
3498 return FORMAT_G16R16I;
3499 case FORMAT_G16R16UI:
3500 return FORMAT_G16R16UI;
John Bauman89401822014-05-06 15:04:28 -04003501 case FORMAT_G16R16:
3502 return FORMAT_G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003503 case FORMAT_G32R32I:
3504 return FORMAT_G32R32I;
3505 case FORMAT_G32R32UI:
3506 return FORMAT_G32R32UI;
John Bauman89401822014-05-06 15:04:28 -04003507 case FORMAT_A8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003508 if(lockable || !quadLayoutEnabled)
3509 {
3510 return FORMAT_A8R8G8B8;
3511 }
3512 else
3513 {
3514 return FORMAT_A8G8R8B8Q;
3515 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003516 case FORMAT_A8B8G8R8I:
3517 return FORMAT_A8B8G8R8I;
3518 case FORMAT_A8B8G8R8UI:
3519 return FORMAT_A8B8G8R8UI;
3520 case FORMAT_A8B8G8R8I_SNORM:
3521 return FORMAT_A8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003522 case FORMAT_R5G5B5A1:
3523 case FORMAT_R4G4B4A4:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003524 case FORMAT_A8B8G8R8:
3525 return FORMAT_A8B8G8R8;
John Bauman89401822014-05-06 15:04:28 -04003526 case FORMAT_R5G6B5:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003527 return FORMAT_R5G6B5;
3528 case FORMAT_R3G3B2:
John Bauman89401822014-05-06 15:04:28 -04003529 case FORMAT_R8G8B8:
3530 case FORMAT_X4R4G4B4:
3531 case FORMAT_X1R5G5B5:
3532 case FORMAT_X8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003533 if(lockable || !quadLayoutEnabled)
3534 {
3535 return FORMAT_X8R8G8B8;
3536 }
3537 else
3538 {
3539 return FORMAT_X8G8R8B8Q;
3540 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003541 case FORMAT_X8B8G8R8I:
3542 return FORMAT_X8B8G8R8I;
3543 case FORMAT_X8B8G8R8UI:
3544 return FORMAT_X8B8G8R8UI;
3545 case FORMAT_X8B8G8R8I_SNORM:
3546 return FORMAT_X8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003547 case FORMAT_B8G8R8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003548 case FORMAT_X8B8G8R8:
3549 return FORMAT_X8B8G8R8;
Alexis Hetu049a1872016-04-25 16:59:58 -04003550 case FORMAT_SRGB8_X8:
3551 return FORMAT_SRGB8_X8;
3552 case FORMAT_SRGB8_A8:
3553 return FORMAT_SRGB8_A8;
John Bauman89401822014-05-06 15:04:28 -04003554 // Compressed formats
3555 #if S3TC_SUPPORT
3556 case FORMAT_DXT1:
3557 case FORMAT_DXT3:
3558 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04003559 #endif
Alexis Hetu460e41f2015-09-01 10:58:37 -04003560 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3561 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3562 case FORMAT_RGBA8_ETC2_EAC:
3563 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
3564 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
3565 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
3566 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
3567 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
3568 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
3569 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
3570 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
3571 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
3572 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
3573 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
3574 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
3575 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
3576 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
3577 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
3578 return FORMAT_A8R8G8B8;
3579 case FORMAT_RGBA_ASTC_4x4_KHR:
3580 case FORMAT_RGBA_ASTC_5x4_KHR:
3581 case FORMAT_RGBA_ASTC_5x5_KHR:
3582 case FORMAT_RGBA_ASTC_6x5_KHR:
3583 case FORMAT_RGBA_ASTC_6x6_KHR:
3584 case FORMAT_RGBA_ASTC_8x5_KHR:
3585 case FORMAT_RGBA_ASTC_8x6_KHR:
3586 case FORMAT_RGBA_ASTC_8x8_KHR:
3587 case FORMAT_RGBA_ASTC_10x5_KHR:
3588 case FORMAT_RGBA_ASTC_10x6_KHR:
3589 case FORMAT_RGBA_ASTC_10x8_KHR:
3590 case FORMAT_RGBA_ASTC_10x10_KHR:
3591 case FORMAT_RGBA_ASTC_12x10_KHR:
3592 case FORMAT_RGBA_ASTC_12x12_KHR:
3593 // ASTC supports HDR, so a floating point format is required to represent it properly
3594 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported
John Bauman89401822014-05-06 15:04:28 -04003595 case FORMAT_ATI1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003596 case FORMAT_R11_EAC:
John Bauman89401822014-05-06 15:04:28 -04003597 return FORMAT_R8;
Alexis Hetu0de50d42015-09-09 13:56:41 -04003598 case FORMAT_SIGNED_R11_EAC:
3599 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient
John Bauman89401822014-05-06 15:04:28 -04003600 case FORMAT_ATI2:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003601 case FORMAT_RG11_EAC:
John Bauman89401822014-05-06 15:04:28 -04003602 return FORMAT_G8R8;
Alexis Hetu0de50d42015-09-09 13:56:41 -04003603 case FORMAT_SIGNED_RG11_EAC:
3604 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient
Nicolas Capens22658242014-11-29 00:31:41 -05003605 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003606 case FORMAT_RGB8_ETC2:
3607 case FORMAT_SRGB8_ETC2:
Nicolas Capens22658242014-11-29 00:31:41 -05003608 return FORMAT_X8R8G8B8;
John Bauman89401822014-05-06 15:04:28 -04003609 // Bumpmap formats
3610 case FORMAT_V8U8: return FORMAT_V8U8;
3611 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8;
3612 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8;
3613 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8;
3614 case FORMAT_V16U16: return FORMAT_V16U16;
3615 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16;
3616 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16;
3617 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04003618 case FORMAT_A16F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003619 case FORMAT_R16F: return FORMAT_R32F;
3620 case FORMAT_G16R16F: return FORMAT_G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003621 case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003622 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003623 case FORMAT_A32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003624 case FORMAT_R32F: return FORMAT_R32F;
3625 case FORMAT_G32R32F: return FORMAT_G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003626 case FORMAT_B32G32R32F: return FORMAT_X32B32G32R32F;
3627 case FORMAT_X32B32G32R32F: return FORMAT_X32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003628 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F;
3629 // Luminance formats
3630 case FORMAT_L8: return FORMAT_L8;
3631 case FORMAT_A4L4: return FORMAT_A8L8;
3632 case FORMAT_L16: return FORMAT_L16;
3633 case FORMAT_A8L8: return FORMAT_A8L8;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003634 case FORMAT_L16F: return FORMAT_X32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003635 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003636 case FORMAT_L32F: return FORMAT_X32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003637 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003638 // Depth/stencil formats
3639 case FORMAT_D16:
3640 case FORMAT_D32:
3641 case FORMAT_D24X8:
3642 case FORMAT_D24S8:
3643 case FORMAT_D24FS8:
3644 if(hasParent) // Texture
3645 {
John Bauman66b8ab22014-05-06 15:57:45 -04003646 return FORMAT_D32FS8_SHADOW;
John Bauman89401822014-05-06 15:04:28 -04003647 }
3648 else if(complementaryDepthBuffer)
3649 {
3650 return FORMAT_D32F_COMPLEMENTARY;
3651 }
3652 else
3653 {
3654 return FORMAT_D32F;
3655 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003656 case FORMAT_D32F: return FORMAT_D32F;
John Bauman66b8ab22014-05-06 15:57:45 -04003657 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE;
3658 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE;
3659 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE;
3660 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW;
3661 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003662 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601;
3663 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709;
3664 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF;
John Bauman89401822014-05-06 15:04:28 -04003665 default:
3666 ASSERT(false);
3667 }
3668
3669 return FORMAT_NULL;
3670 }
3671
3672 void Surface::setTexturePalette(unsigned int *palette)
3673 {
3674 Surface::palette = palette;
3675 Surface::paletteID++;
3676 }
3677
3678 void Surface::resolve()
3679 {
3680 if(internal.depth <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL)
3681 {
3682 return;
3683 }
3684
3685 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE);
3686
3687 int quality = internal.depth;
3688 int width = internal.width;
3689 int height = internal.height;
3690 int pitch = internal.pitchB;
3691 int slice = internal.sliceB;
3692
3693 unsigned char *source0 = (unsigned char*)source;
3694 unsigned char *source1 = source0 + slice;
3695 unsigned char *source2 = source1 + slice;
3696 unsigned char *source3 = source2 + slice;
3697 unsigned char *source4 = source3 + slice;
3698 unsigned char *source5 = source4 + slice;
3699 unsigned char *source6 = source5 + slice;
3700 unsigned char *source7 = source6 + slice;
3701 unsigned char *source8 = source7 + slice;
3702 unsigned char *source9 = source8 + slice;
3703 unsigned char *sourceA = source9 + slice;
3704 unsigned char *sourceB = sourceA + slice;
3705 unsigned char *sourceC = sourceB + slice;
3706 unsigned char *sourceD = sourceC + slice;
3707 unsigned char *sourceE = sourceD + slice;
3708 unsigned char *sourceF = sourceE + slice;
3709
Alexis Hetu049a1872016-04-25 16:59:58 -04003710 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 ||
3711 internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8 ||
3712 internal.format == FORMAT_SRGB8_X8 || internal.format == FORMAT_SRGB8_A8)
John Bauman89401822014-05-06 15:04:28 -04003713 {
3714 if(CPUID::supportsSSE2() && (width % 4) == 0)
3715 {
3716 if(internal.depth == 2)
3717 {
3718 for(int y = 0; y < height; y++)
3719 {
3720 for(int x = 0; x < width; x += 4)
3721 {
3722 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3723 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04003724
John Bauman89401822014-05-06 15:04:28 -04003725 c0 = _mm_avg_epu8(c0, c1);
3726
3727 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3728 }
3729
3730 source0 += pitch;
3731 source1 += pitch;
3732 }
3733 }
3734 else if(internal.depth == 4)
3735 {
3736 for(int y = 0; y < height; y++)
3737 {
3738 for(int x = 0; x < width; x += 4)
3739 {
3740 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3741 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3742 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3743 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04003744
John Bauman89401822014-05-06 15:04:28 -04003745 c0 = _mm_avg_epu8(c0, c1);
3746 c2 = _mm_avg_epu8(c2, c3);
3747 c0 = _mm_avg_epu8(c0, c2);
3748
3749 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3750 }
3751
3752 source0 += pitch;
3753 source1 += pitch;
3754 source2 += pitch;
3755 source3 += pitch;
3756 }
3757 }
3758 else if(internal.depth == 8)
3759 {
3760 for(int y = 0; y < height; y++)
3761 {
3762 for(int x = 0; x < width; x += 4)
3763 {
3764 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3765 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3766 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3767 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3768 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
3769 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
3770 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
3771 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04003772
John Bauman89401822014-05-06 15:04:28 -04003773 c0 = _mm_avg_epu8(c0, c1);
3774 c2 = _mm_avg_epu8(c2, c3);
3775 c4 = _mm_avg_epu8(c4, c5);
3776 c6 = _mm_avg_epu8(c6, c7);
3777 c0 = _mm_avg_epu8(c0, c2);
3778 c4 = _mm_avg_epu8(c4, c6);
3779 c0 = _mm_avg_epu8(c0, c4);
3780
3781 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3782 }
3783
3784 source0 += pitch;
3785 source1 += pitch;
3786 source2 += pitch;
3787 source3 += pitch;
3788 source4 += pitch;
3789 source5 += pitch;
3790 source6 += pitch;
3791 source7 += pitch;
3792 }
3793 }
3794 else if(internal.depth == 16)
3795 {
3796 for(int y = 0; y < height; y++)
3797 {
3798 for(int x = 0; x < width; x += 4)
3799 {
3800 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3801 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3802 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3803 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3804 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
3805 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
3806 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
3807 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
3808 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
3809 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
3810 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
3811 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
3812 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
3813 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
3814 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
3815 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
3816
3817 c0 = _mm_avg_epu8(c0, c1);
3818 c2 = _mm_avg_epu8(c2, c3);
3819 c4 = _mm_avg_epu8(c4, c5);
3820 c6 = _mm_avg_epu8(c6, c7);
3821 c8 = _mm_avg_epu8(c8, c9);
3822 cA = _mm_avg_epu8(cA, cB);
3823 cC = _mm_avg_epu8(cC, cD);
3824 cE = _mm_avg_epu8(cE, cF);
3825 c0 = _mm_avg_epu8(c0, c2);
3826 c4 = _mm_avg_epu8(c4, c6);
3827 c8 = _mm_avg_epu8(c8, cA);
3828 cC = _mm_avg_epu8(cC, cE);
3829 c0 = _mm_avg_epu8(c0, c4);
3830 c8 = _mm_avg_epu8(c8, cC);
3831 c0 = _mm_avg_epu8(c0, c8);
3832
3833 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3834 }
3835
3836 source0 += pitch;
3837 source1 += pitch;
3838 source2 += pitch;
3839 source3 += pitch;
3840 source4 += pitch;
3841 source5 += pitch;
3842 source6 += pitch;
3843 source7 += pitch;
3844 source8 += pitch;
3845 source9 += pitch;
3846 sourceA += pitch;
3847 sourceB += pitch;
3848 sourceC += pitch;
3849 sourceD += pitch;
3850 sourceE += pitch;
3851 sourceF += pitch;
3852 }
3853 }
3854 else ASSERT(false);
3855 }
3856 else
3857 {
3858 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101))
3859
3860 if(internal.depth == 2)
3861 {
3862 for(int y = 0; y < height; y++)
3863 {
3864 for(int x = 0; x < width; x++)
3865 {
3866 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3867 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3868
3869 c0 = AVERAGE(c0, c1);
3870
3871 *(unsigned int*)(source0 + 4 * x) = c0;
3872 }
3873
3874 source0 += pitch;
3875 source1 += pitch;
3876 }
3877 }
3878 else if(internal.depth == 4)
3879 {
3880 for(int y = 0; y < height; y++)
3881 {
3882 for(int x = 0; x < width; x++)
3883 {
3884 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3885 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3886 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3887 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3888
3889 c0 = AVERAGE(c0, c1);
3890 c2 = AVERAGE(c2, c3);
3891 c0 = AVERAGE(c0, c2);
3892
3893 *(unsigned int*)(source0 + 4 * x) = c0;
3894 }
3895
3896 source0 += pitch;
3897 source1 += pitch;
3898 source2 += pitch;
3899 source3 += pitch;
3900 }
3901 }
3902 else if(internal.depth == 8)
3903 {
3904 for(int y = 0; y < height; y++)
3905 {
3906 for(int x = 0; x < width; x++)
3907 {
3908 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3909 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3910 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3911 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3912 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
3913 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
3914 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
3915 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
3916
3917 c0 = AVERAGE(c0, c1);
3918 c2 = AVERAGE(c2, c3);
3919 c4 = AVERAGE(c4, c5);
3920 c6 = AVERAGE(c6, c7);
3921 c0 = AVERAGE(c0, c2);
3922 c4 = AVERAGE(c4, c6);
3923 c0 = AVERAGE(c0, c4);
3924
3925 *(unsigned int*)(source0 + 4 * x) = c0;
3926 }
3927
3928 source0 += pitch;
3929 source1 += pitch;
3930 source2 += pitch;
3931 source3 += pitch;
3932 source4 += pitch;
3933 source5 += pitch;
3934 source6 += pitch;
3935 source7 += pitch;
3936 }
3937 }
3938 else if(internal.depth == 16)
3939 {
3940 for(int y = 0; y < height; y++)
3941 {
3942 for(int x = 0; x < width; x++)
3943 {
3944 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3945 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3946 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3947 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3948 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
3949 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
3950 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
3951 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
3952 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
3953 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
3954 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
3955 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
3956 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
3957 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
3958 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
3959 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
3960
3961 c0 = AVERAGE(c0, c1);
3962 c2 = AVERAGE(c2, c3);
3963 c4 = AVERAGE(c4, c5);
3964 c6 = AVERAGE(c6, c7);
3965 c8 = AVERAGE(c8, c9);
3966 cA = AVERAGE(cA, cB);
3967 cC = AVERAGE(cC, cD);
3968 cE = AVERAGE(cE, cF);
3969 c0 = AVERAGE(c0, c2);
3970 c4 = AVERAGE(c4, c6);
3971 c8 = AVERAGE(c8, cA);
3972 cC = AVERAGE(cC, cE);
3973 c0 = AVERAGE(c0, c4);
3974 c8 = AVERAGE(c8, cC);
3975 c0 = AVERAGE(c0, c8);
3976
3977 *(unsigned int*)(source0 + 4 * x) = c0;
3978 }
3979
3980 source0 += pitch;
3981 source1 += pitch;
3982 source2 += pitch;
3983 source3 += pitch;
3984 source4 += pitch;
3985 source5 += pitch;
3986 source6 += pitch;
3987 source7 += pitch;
3988 source8 += pitch;
3989 source9 += pitch;
3990 sourceA += pitch;
3991 sourceB += pitch;
3992 sourceC += pitch;
3993 sourceD += pitch;
3994 sourceE += pitch;
3995 sourceF += pitch;
3996 }
3997 }
3998 else ASSERT(false);
3999
4000 #undef AVERAGE
4001 }
4002 }
4003 else if(internal.format == FORMAT_G16R16)
4004 {
4005 if(CPUID::supportsSSE2() && (width % 4) == 0)
4006 {
4007 if(internal.depth == 2)
4008 {
4009 for(int y = 0; y < height; y++)
4010 {
4011 for(int x = 0; x < width; x += 4)
4012 {
4013 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4014 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004015
John Bauman89401822014-05-06 15:04:28 -04004016 c0 = _mm_avg_epu16(c0, c1);
4017
4018 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4019 }
4020
4021 source0 += pitch;
4022 source1 += pitch;
4023 }
4024 }
4025 else if(internal.depth == 4)
4026 {
4027 for(int y = 0; y < height; y++)
4028 {
4029 for(int x = 0; x < width; x += 4)
4030 {
4031 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4032 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4033 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4034 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004035
John Bauman89401822014-05-06 15:04:28 -04004036 c0 = _mm_avg_epu16(c0, c1);
4037 c2 = _mm_avg_epu16(c2, c3);
4038 c0 = _mm_avg_epu16(c0, c2);
4039
4040 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4041 }
4042
4043 source0 += pitch;
4044 source1 += pitch;
4045 source2 += pitch;
4046 source3 += pitch;
4047 }
4048 }
4049 else if(internal.depth == 8)
4050 {
4051 for(int y = 0; y < height; y++)
4052 {
4053 for(int x = 0; x < width; x += 4)
4054 {
4055 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4056 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4057 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4058 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4059 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4060 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4061 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4062 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004063
John Bauman89401822014-05-06 15:04:28 -04004064 c0 = _mm_avg_epu16(c0, c1);
4065 c2 = _mm_avg_epu16(c2, c3);
4066 c4 = _mm_avg_epu16(c4, c5);
4067 c6 = _mm_avg_epu16(c6, c7);
4068 c0 = _mm_avg_epu16(c0, c2);
4069 c4 = _mm_avg_epu16(c4, c6);
4070 c0 = _mm_avg_epu16(c0, c4);
4071
4072 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4073 }
4074
4075 source0 += pitch;
4076 source1 += pitch;
4077 source2 += pitch;
4078 source3 += pitch;
4079 source4 += pitch;
4080 source5 += pitch;
4081 source6 += pitch;
4082 source7 += pitch;
4083 }
4084 }
4085 else if(internal.depth == 16)
4086 {
4087 for(int y = 0; y < height; y++)
4088 {
4089 for(int x = 0; x < width; x += 4)
4090 {
4091 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4092 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4093 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4094 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4095 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4096 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4097 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4098 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4099 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
4100 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
4101 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
4102 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
4103 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
4104 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
4105 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
4106 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
4107
4108 c0 = _mm_avg_epu16(c0, c1);
4109 c2 = _mm_avg_epu16(c2, c3);
4110 c4 = _mm_avg_epu16(c4, c5);
4111 c6 = _mm_avg_epu16(c6, c7);
4112 c8 = _mm_avg_epu16(c8, c9);
4113 cA = _mm_avg_epu16(cA, cB);
4114 cC = _mm_avg_epu16(cC, cD);
4115 cE = _mm_avg_epu16(cE, cF);
4116 c0 = _mm_avg_epu16(c0, c2);
4117 c4 = _mm_avg_epu16(c4, c6);
4118 c8 = _mm_avg_epu16(c8, cA);
4119 cC = _mm_avg_epu16(cC, cE);
4120 c0 = _mm_avg_epu16(c0, c4);
4121 c8 = _mm_avg_epu16(c8, cC);
4122 c0 = _mm_avg_epu16(c0, c8);
4123
4124 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4125 }
4126
4127 source0 += pitch;
4128 source1 += pitch;
4129 source2 += pitch;
4130 source3 += pitch;
4131 source4 += pitch;
4132 source5 += pitch;
4133 source6 += pitch;
4134 source7 += pitch;
4135 source8 += pitch;
4136 source9 += pitch;
4137 sourceA += pitch;
4138 sourceB += pitch;
4139 sourceC += pitch;
4140 sourceD += pitch;
4141 sourceE += pitch;
4142 sourceF += pitch;
4143 }
4144 }
4145 else ASSERT(false);
4146 }
4147 else
4148 {
4149 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4150
4151 if(internal.depth == 2)
4152 {
4153 for(int y = 0; y < height; y++)
4154 {
4155 for(int x = 0; x < width; x++)
4156 {
4157 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4158 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4159
4160 c0 = AVERAGE(c0, c1);
4161
4162 *(unsigned int*)(source0 + 4 * x) = c0;
4163 }
4164
4165 source0 += pitch;
4166 source1 += pitch;
4167 }
4168 }
4169 else if(internal.depth == 4)
4170 {
4171 for(int y = 0; y < height; y++)
4172 {
4173 for(int x = 0; x < width; x++)
4174 {
4175 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4176 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4177 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4178 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4179
4180 c0 = AVERAGE(c0, c1);
4181 c2 = AVERAGE(c2, c3);
4182 c0 = AVERAGE(c0, c2);
4183
4184 *(unsigned int*)(source0 + 4 * x) = c0;
4185 }
4186
4187 source0 += pitch;
4188 source1 += pitch;
4189 source2 += pitch;
4190 source3 += pitch;
4191 }
4192 }
4193 else if(internal.depth == 8)
4194 {
4195 for(int y = 0; y < height; y++)
4196 {
4197 for(int x = 0; x < width; x++)
4198 {
4199 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4200 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4201 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4202 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4203 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4204 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4205 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4206 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4207
4208 c0 = AVERAGE(c0, c1);
4209 c2 = AVERAGE(c2, c3);
4210 c4 = AVERAGE(c4, c5);
4211 c6 = AVERAGE(c6, c7);
4212 c0 = AVERAGE(c0, c2);
4213 c4 = AVERAGE(c4, c6);
4214 c0 = AVERAGE(c0, c4);
4215
4216 *(unsigned int*)(source0 + 4 * x) = c0;
4217 }
4218
4219 source0 += pitch;
4220 source1 += pitch;
4221 source2 += pitch;
4222 source3 += pitch;
4223 source4 += pitch;
4224 source5 += pitch;
4225 source6 += pitch;
4226 source7 += pitch;
4227 }
4228 }
4229 else if(internal.depth == 16)
4230 {
4231 for(int y = 0; y < height; y++)
4232 {
4233 for(int x = 0; x < width; x++)
4234 {
4235 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4236 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4237 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4238 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4239 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4240 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4241 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4242 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4243 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4244 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4245 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4246 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4247 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4248 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4249 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4250 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4251
4252 c0 = AVERAGE(c0, c1);
4253 c2 = AVERAGE(c2, c3);
4254 c4 = AVERAGE(c4, c5);
4255 c6 = AVERAGE(c6, c7);
4256 c8 = AVERAGE(c8, c9);
4257 cA = AVERAGE(cA, cB);
4258 cC = AVERAGE(cC, cD);
4259 cE = AVERAGE(cE, cF);
4260 c0 = AVERAGE(c0, c2);
4261 c4 = AVERAGE(c4, c6);
4262 c8 = AVERAGE(c8, cA);
4263 cC = AVERAGE(cC, cE);
4264 c0 = AVERAGE(c0, c4);
4265 c8 = AVERAGE(c8, cC);
4266 c0 = AVERAGE(c0, c8);
4267
4268 *(unsigned int*)(source0 + 4 * x) = c0;
4269 }
4270
4271 source0 += pitch;
4272 source1 += pitch;
4273 source2 += pitch;
4274 source3 += pitch;
4275 source4 += pitch;
4276 source5 += pitch;
4277 source6 += pitch;
4278 source7 += pitch;
4279 source8 += pitch;
4280 source9 += pitch;
4281 sourceA += pitch;
4282 sourceB += pitch;
4283 sourceC += pitch;
4284 sourceD += pitch;
4285 sourceE += pitch;
4286 sourceF += pitch;
4287 }
4288 }
4289 else ASSERT(false);
4290
4291 #undef AVERAGE
4292 }
4293 }
4294 else if(internal.format == FORMAT_A16B16G16R16)
4295 {
4296 if(CPUID::supportsSSE2() && (width % 2) == 0)
4297 {
4298 if(internal.depth == 2)
4299 {
4300 for(int y = 0; y < height; y++)
4301 {
4302 for(int x = 0; x < width; x += 2)
4303 {
4304 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4305 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004306
John Bauman89401822014-05-06 15:04:28 -04004307 c0 = _mm_avg_epu16(c0, c1);
4308
4309 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4310 }
4311
4312 source0 += pitch;
4313 source1 += pitch;
4314 }
4315 }
4316 else if(internal.depth == 4)
4317 {
4318 for(int y = 0; y < height; y++)
4319 {
4320 for(int x = 0; x < width; x += 2)
4321 {
4322 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4323 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4324 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4325 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004326
John Bauman89401822014-05-06 15:04:28 -04004327 c0 = _mm_avg_epu16(c0, c1);
4328 c2 = _mm_avg_epu16(c2, c3);
4329 c0 = _mm_avg_epu16(c0, c2);
4330
4331 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4332 }
4333
4334 source0 += pitch;
4335 source1 += pitch;
4336 source2 += pitch;
4337 source3 += pitch;
4338 }
4339 }
4340 else if(internal.depth == 8)
4341 {
4342 for(int y = 0; y < height; y++)
4343 {
4344 for(int x = 0; x < width; x += 2)
4345 {
4346 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4347 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4348 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4349 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4350 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4351 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4352 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4353 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004354
John Bauman89401822014-05-06 15:04:28 -04004355 c0 = _mm_avg_epu16(c0, c1);
4356 c2 = _mm_avg_epu16(c2, c3);
4357 c4 = _mm_avg_epu16(c4, c5);
4358 c6 = _mm_avg_epu16(c6, c7);
4359 c0 = _mm_avg_epu16(c0, c2);
4360 c4 = _mm_avg_epu16(c4, c6);
4361 c0 = _mm_avg_epu16(c0, c4);
4362
4363 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4364 }
4365
4366 source0 += pitch;
4367 source1 += pitch;
4368 source2 += pitch;
4369 source3 += pitch;
4370 source4 += pitch;
4371 source5 += pitch;
4372 source6 += pitch;
4373 source7 += pitch;
4374 }
4375 }
4376 else if(internal.depth == 16)
4377 {
4378 for(int y = 0; y < height; y++)
4379 {
4380 for(int x = 0; x < width; x += 2)
4381 {
4382 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4383 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4384 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4385 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4386 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4387 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4388 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4389 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
4390 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x));
4391 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x));
4392 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x));
4393 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x));
4394 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x));
4395 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x));
4396 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x));
4397 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x));
4398
4399 c0 = _mm_avg_epu16(c0, c1);
4400 c2 = _mm_avg_epu16(c2, c3);
4401 c4 = _mm_avg_epu16(c4, c5);
4402 c6 = _mm_avg_epu16(c6, c7);
4403 c8 = _mm_avg_epu16(c8, c9);
4404 cA = _mm_avg_epu16(cA, cB);
4405 cC = _mm_avg_epu16(cC, cD);
4406 cE = _mm_avg_epu16(cE, cF);
4407 c0 = _mm_avg_epu16(c0, c2);
4408 c4 = _mm_avg_epu16(c4, c6);
4409 c8 = _mm_avg_epu16(c8, cA);
4410 cC = _mm_avg_epu16(cC, cE);
4411 c0 = _mm_avg_epu16(c0, c4);
4412 c8 = _mm_avg_epu16(c8, cC);
4413 c0 = _mm_avg_epu16(c0, c8);
4414
4415 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4416 }
4417
4418 source0 += pitch;
4419 source1 += pitch;
4420 source2 += pitch;
4421 source3 += pitch;
4422 source4 += pitch;
4423 source5 += pitch;
4424 source6 += pitch;
4425 source7 += pitch;
4426 source8 += pitch;
4427 source9 += pitch;
4428 sourceA += pitch;
4429 sourceB += pitch;
4430 sourceC += pitch;
4431 sourceD += pitch;
4432 sourceE += pitch;
4433 sourceF += pitch;
4434 }
4435 }
4436 else ASSERT(false);
4437 }
4438 else
4439 {
4440 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4441
4442 if(internal.depth == 2)
4443 {
4444 for(int y = 0; y < height; y++)
4445 {
4446 for(int x = 0; x < 2 * width; x++)
4447 {
4448 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4449 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4450
4451 c0 = AVERAGE(c0, c1);
4452
4453 *(unsigned int*)(source0 + 4 * x) = c0;
4454 }
4455
4456 source0 += pitch;
4457 source1 += pitch;
4458 }
4459 }
4460 else if(internal.depth == 4)
4461 {
4462 for(int y = 0; y < height; y++)
4463 {
4464 for(int x = 0; x < 2 * width; x++)
4465 {
4466 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4467 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4468 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4469 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4470
4471 c0 = AVERAGE(c0, c1);
4472 c2 = AVERAGE(c2, c3);
4473 c0 = AVERAGE(c0, c2);
4474
4475 *(unsigned int*)(source0 + 4 * x) = c0;
4476 }
4477
4478 source0 += pitch;
4479 source1 += pitch;
4480 source2 += pitch;
4481 source3 += pitch;
4482 }
4483 }
4484 else if(internal.depth == 8)
4485 {
4486 for(int y = 0; y < height; y++)
4487 {
4488 for(int x = 0; x < 2 * width; x++)
4489 {
4490 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4491 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4492 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4493 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4494 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4495 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4496 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4497 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4498
4499 c0 = AVERAGE(c0, c1);
4500 c2 = AVERAGE(c2, c3);
4501 c4 = AVERAGE(c4, c5);
4502 c6 = AVERAGE(c6, c7);
4503 c0 = AVERAGE(c0, c2);
4504 c4 = AVERAGE(c4, c6);
4505 c0 = AVERAGE(c0, c4);
4506
4507 *(unsigned int*)(source0 + 4 * x) = c0;
4508 }
4509
4510 source0 += pitch;
4511 source1 += pitch;
4512 source2 += pitch;
4513 source3 += pitch;
4514 source4 += pitch;
4515 source5 += pitch;
4516 source6 += pitch;
4517 source7 += pitch;
4518 }
4519 }
4520 else if(internal.depth == 16)
4521 {
4522 for(int y = 0; y < height; y++)
4523 {
4524 for(int x = 0; x < 2 * width; x++)
4525 {
4526 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4527 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4528 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4529 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4530 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4531 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4532 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4533 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4534 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4535 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4536 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4537 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4538 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4539 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4540 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4541 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4542
4543 c0 = AVERAGE(c0, c1);
4544 c2 = AVERAGE(c2, c3);
4545 c4 = AVERAGE(c4, c5);
4546 c6 = AVERAGE(c6, c7);
4547 c8 = AVERAGE(c8, c9);
4548 cA = AVERAGE(cA, cB);
4549 cC = AVERAGE(cC, cD);
4550 cE = AVERAGE(cE, cF);
4551 c0 = AVERAGE(c0, c2);
4552 c4 = AVERAGE(c4, c6);
4553 c8 = AVERAGE(c8, cA);
4554 cC = AVERAGE(cC, cE);
4555 c0 = AVERAGE(c0, c4);
4556 c8 = AVERAGE(c8, cC);
4557 c0 = AVERAGE(c0, c8);
4558
4559 *(unsigned int*)(source0 + 4 * x) = c0;
4560 }
4561
4562 source0 += pitch;
4563 source1 += pitch;
4564 source2 += pitch;
4565 source3 += pitch;
4566 source4 += pitch;
4567 source5 += pitch;
4568 source6 += pitch;
4569 source7 += pitch;
4570 source8 += pitch;
4571 source9 += pitch;
4572 sourceA += pitch;
4573 sourceB += pitch;
4574 sourceC += pitch;
4575 sourceD += pitch;
4576 sourceE += pitch;
4577 sourceF += pitch;
4578 }
4579 }
4580 else ASSERT(false);
4581
4582 #undef AVERAGE
4583 }
4584 }
4585 else if(internal.format == FORMAT_R32F)
4586 {
4587 if(CPUID::supportsSSE() && (width % 4) == 0)
4588 {
4589 if(internal.depth == 2)
4590 {
4591 for(int y = 0; y < height; y++)
4592 {
4593 for(int x = 0; x < width; x += 4)
4594 {
4595 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4596 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004597
John Bauman89401822014-05-06 15:04:28 -04004598 c0 = _mm_add_ps(c0, c1);
Nicolas Capensc39901e2016-03-21 16:37:44 -04004599 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04004600
4601 _mm_store_ps((float*)(source0 + 4 * x), c0);
4602 }
4603
4604 source0 += pitch;
4605 source1 += pitch;
4606 }
4607 }
4608 else if(internal.depth == 4)
4609 {
4610 for(int y = 0; y < height; y++)
4611 {
4612 for(int x = 0; x < width; x += 4)
4613 {
4614 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4615 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4616 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4617 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004618
John Bauman89401822014-05-06 15:04:28 -04004619 c0 = _mm_add_ps(c0, c1);
4620 c2 = _mm_add_ps(c2, c3);
4621 c0 = _mm_add_ps(c0, c2);
Nicolas Capensc39901e2016-03-21 16:37:44 -04004622 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04004623
4624 _mm_store_ps((float*)(source0 + 4 * x), c0);
4625 }
4626
4627 source0 += pitch;
4628 source1 += pitch;
4629 source2 += pitch;
4630 source3 += pitch;
4631 }
4632 }
4633 else if(internal.depth == 8)
4634 {
4635 for(int y = 0; y < height; y++)
4636 {
4637 for(int x = 0; x < width; x += 4)
4638 {
4639 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4640 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4641 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4642 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4643 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4644 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4645 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4646 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004647
John Bauman89401822014-05-06 15:04:28 -04004648 c0 = _mm_add_ps(c0, c1);
4649 c2 = _mm_add_ps(c2, c3);
4650 c4 = _mm_add_ps(c4, c5);
4651 c6 = _mm_add_ps(c6, c7);
4652 c0 = _mm_add_ps(c0, c2);
4653 c4 = _mm_add_ps(c4, c6);
4654 c0 = _mm_add_ps(c0, c4);
Nicolas Capensc39901e2016-03-21 16:37:44 -04004655 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04004656
4657 _mm_store_ps((float*)(source0 + 4 * x), c0);
4658 }
4659
4660 source0 += pitch;
4661 source1 += pitch;
4662 source2 += pitch;
4663 source3 += pitch;
4664 source4 += pitch;
4665 source5 += pitch;
4666 source6 += pitch;
4667 source7 += pitch;
4668 }
4669 }
4670 else if(internal.depth == 16)
4671 {
4672 for(int y = 0; y < height; y++)
4673 {
4674 for(int x = 0; x < width; x += 4)
4675 {
4676 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4677 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4678 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4679 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4680 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4681 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4682 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4683 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
4684 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x));
4685 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x));
4686 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x));
4687 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x));
4688 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x));
4689 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x));
4690 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x));
4691 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x));
4692
4693 c0 = _mm_add_ps(c0, c1);
4694 c2 = _mm_add_ps(c2, c3);
4695 c4 = _mm_add_ps(c4, c5);
4696 c6 = _mm_add_ps(c6, c7);
4697 c8 = _mm_add_ps(c8, c9);
4698 cA = _mm_add_ps(cA, cB);
4699 cC = _mm_add_ps(cC, cD);
4700 cE = _mm_add_ps(cE, cF);
4701 c0 = _mm_add_ps(c0, c2);
4702 c4 = _mm_add_ps(c4, c6);
4703 c8 = _mm_add_ps(c8, cA);
4704 cC = _mm_add_ps(cC, cE);
4705 c0 = _mm_add_ps(c0, c4);
4706 c8 = _mm_add_ps(c8, cC);
4707 c0 = _mm_add_ps(c0, c8);
Nicolas Capensc39901e2016-03-21 16:37:44 -04004708 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04004709
4710 _mm_store_ps((float*)(source0 + 4 * x), c0);
4711 }
4712
4713 source0 += pitch;
4714 source1 += pitch;
4715 source2 += pitch;
4716 source3 += pitch;
4717 source4 += pitch;
4718 source5 += pitch;
4719 source6 += pitch;
4720 source7 += pitch;
4721 source8 += pitch;
4722 source9 += pitch;
4723 sourceA += pitch;
4724 sourceB += pitch;
4725 sourceC += pitch;
4726 sourceD += pitch;
4727 sourceE += pitch;
4728 sourceF += pitch;
4729 }
4730 }
4731 else ASSERT(false);
4732 }
4733 else
4734 {
4735 if(internal.depth == 2)
4736 {
4737 for(int y = 0; y < height; y++)
4738 {
4739 for(int x = 0; x < width; x++)
4740 {
4741 float c0 = *(float*)(source0 + 4 * x);
4742 float c1 = *(float*)(source1 + 4 * x);
4743
4744 c0 = c0 + c1;
4745 c0 *= 1.0f / 2.0f;
4746
4747 *(float*)(source0 + 4 * x) = c0;
4748 }
4749
4750 source0 += pitch;
4751 source1 += pitch;
4752 }
4753 }
4754 else if(internal.depth == 4)
4755 {
4756 for(int y = 0; y < height; y++)
4757 {
4758 for(int x = 0; x < width; x++)
4759 {
4760 float c0 = *(float*)(source0 + 4 * x);
4761 float c1 = *(float*)(source1 + 4 * x);
4762 float c2 = *(float*)(source2 + 4 * x);
4763 float c3 = *(float*)(source3 + 4 * x);
4764
4765 c0 = c0 + c1;
4766 c2 = c2 + c3;
4767 c0 = c0 + c2;
4768 c0 *= 1.0f / 4.0f;
4769
4770 *(float*)(source0 + 4 * x) = c0;
4771 }
4772
4773 source0 += pitch;
4774 source1 += pitch;
4775 source2 += pitch;
4776 source3 += pitch;
4777 }
4778 }
4779 else if(internal.depth == 8)
4780 {
4781 for(int y = 0; y < height; y++)
4782 {
4783 for(int x = 0; x < width; x++)
4784 {
4785 float c0 = *(float*)(source0 + 4 * x);
4786 float c1 = *(float*)(source1 + 4 * x);
4787 float c2 = *(float*)(source2 + 4 * x);
4788 float c3 = *(float*)(source3 + 4 * x);
4789 float c4 = *(float*)(source4 + 4 * x);
4790 float c5 = *(float*)(source5 + 4 * x);
4791 float c6 = *(float*)(source6 + 4 * x);
4792 float c7 = *(float*)(source7 + 4 * x);
4793
4794 c0 = c0 + c1;
4795 c2 = c2 + c3;
4796 c4 = c4 + c5;
4797 c6 = c6 + c7;
4798 c0 = c0 + c2;
4799 c4 = c4 + c6;
4800 c0 = c0 + c4;
4801 c0 *= 1.0f / 8.0f;
4802
4803 *(float*)(source0 + 4 * x) = c0;
4804 }
4805
4806 source0 += pitch;
4807 source1 += pitch;
4808 source2 += pitch;
4809 source3 += pitch;
4810 source4 += pitch;
4811 source5 += pitch;
4812 source6 += pitch;
4813 source7 += pitch;
4814 }
4815 }
4816 else if(internal.depth == 16)
4817 {
4818 for(int y = 0; y < height; y++)
4819 {
4820 for(int x = 0; x < width; x++)
4821 {
4822 float c0 = *(float*)(source0 + 4 * x);
4823 float c1 = *(float*)(source1 + 4 * x);
4824 float c2 = *(float*)(source2 + 4 * x);
4825 float c3 = *(float*)(source3 + 4 * x);
4826 float c4 = *(float*)(source4 + 4 * x);
4827 float c5 = *(float*)(source5 + 4 * x);
4828 float c6 = *(float*)(source6 + 4 * x);
4829 float c7 = *(float*)(source7 + 4 * x);
4830 float c8 = *(float*)(source8 + 4 * x);
4831 float c9 = *(float*)(source9 + 4 * x);
4832 float cA = *(float*)(sourceA + 4 * x);
4833 float cB = *(float*)(sourceB + 4 * x);
4834 float cC = *(float*)(sourceC + 4 * x);
4835 float cD = *(float*)(sourceD + 4 * x);
4836 float cE = *(float*)(sourceE + 4 * x);
4837 float cF = *(float*)(sourceF + 4 * x);
4838
4839 c0 = c0 + c1;
4840 c2 = c2 + c3;
4841 c4 = c4 + c5;
4842 c6 = c6 + c7;
4843 c8 = c8 + c9;
4844 cA = cA + cB;
4845 cC = cC + cD;
4846 cE = cE + cF;
4847 c0 = c0 + c2;
4848 c4 = c4 + c6;
4849 c8 = c8 + cA;
4850 cC = cC + cE;
4851 c0 = c0 + c4;
4852 c8 = c8 + cC;
4853 c0 = c0 + c8;
4854 c0 *= 1.0f / 16.0f;
4855
4856 *(float*)(source0 + 4 * x) = c0;
4857 }
4858
4859 source0 += pitch;
4860 source1 += pitch;
4861 source2 += pitch;
4862 source3 += pitch;
4863 source4 += pitch;
4864 source5 += pitch;
4865 source6 += pitch;
4866 source7 += pitch;
4867 source8 += pitch;
4868 source9 += pitch;
4869 sourceA += pitch;
4870 sourceB += pitch;
4871 sourceC += pitch;
4872 sourceD += pitch;
4873 sourceE += pitch;
4874 sourceF += pitch;
4875 }
4876 }
4877 else ASSERT(false);
4878 }
4879 }
4880 else if(internal.format == FORMAT_G32R32F)
4881 {
4882 if(CPUID::supportsSSE() && (width % 2) == 0)
4883 {
4884 if(internal.depth == 2)
4885 {
4886 for(int y = 0; y < height; y++)
4887 {
4888 for(int x = 0; x < width; x += 2)
4889 {
4890 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4891 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004892
John Bauman89401822014-05-06 15:04:28 -04004893 c0 = _mm_add_ps(c0, c1);
Nicolas Capensc39901e2016-03-21 16:37:44 -04004894 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04004895
4896 _mm_store_ps((float*)(source0 + 8 * x), c0);
4897 }
4898
4899 source0 += pitch;
4900 source1 += pitch;
4901 }
4902 }
4903 else if(internal.depth == 4)
4904 {
4905 for(int y = 0; y < height; y++)
4906 {
4907 for(int x = 0; x < width; x += 2)
4908 {
4909 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4910 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
4911 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
4912 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004913
John Bauman89401822014-05-06 15:04:28 -04004914 c0 = _mm_add_ps(c0, c1);
4915 c2 = _mm_add_ps(c2, c3);
4916 c0 = _mm_add_ps(c0, c2);
Nicolas Capensc39901e2016-03-21 16:37:44 -04004917 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04004918
4919 _mm_store_ps((float*)(source0 + 8 * x), c0);
4920 }
4921
4922 source0 += pitch;
4923 source1 += pitch;
4924 source2 += pitch;
4925 source3 += pitch;
4926 }
4927 }
4928 else if(internal.depth == 8)
4929 {
4930 for(int y = 0; y < height; y++)
4931 {
4932 for(int x = 0; x < width; x += 2)
4933 {
4934 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4935 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
4936 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
4937 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
4938 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
4939 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
4940 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
4941 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004942
John Bauman89401822014-05-06 15:04:28 -04004943 c0 = _mm_add_ps(c0, c1);
4944 c2 = _mm_add_ps(c2, c3);
4945 c4 = _mm_add_ps(c4, c5);
4946 c6 = _mm_add_ps(c6, c7);
4947 c0 = _mm_add_ps(c0, c2);
4948 c4 = _mm_add_ps(c4, c6);
4949 c0 = _mm_add_ps(c0, c4);
Nicolas Capensc39901e2016-03-21 16:37:44 -04004950 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04004951
4952 _mm_store_ps((float*)(source0 + 8 * x), c0);
4953 }
4954
4955 source0 += pitch;
4956 source1 += pitch;
4957 source2 += pitch;
4958 source3 += pitch;
4959 source4 += pitch;
4960 source5 += pitch;
4961 source6 += pitch;
4962 source7 += pitch;
4963 }
4964 }
4965 else if(internal.depth == 16)
4966 {
4967 for(int y = 0; y < height; y++)
4968 {
4969 for(int x = 0; x < width; x += 2)
4970 {
4971 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4972 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
4973 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
4974 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
4975 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
4976 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
4977 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
4978 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
4979 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x));
4980 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x));
4981 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x));
4982 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x));
4983 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x));
4984 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x));
4985 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x));
4986 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x));
4987
4988 c0 = _mm_add_ps(c0, c1);
4989 c2 = _mm_add_ps(c2, c3);
4990 c4 = _mm_add_ps(c4, c5);
4991 c6 = _mm_add_ps(c6, c7);
4992 c8 = _mm_add_ps(c8, c9);
4993 cA = _mm_add_ps(cA, cB);
4994 cC = _mm_add_ps(cC, cD);
4995 cE = _mm_add_ps(cE, cF);
4996 c0 = _mm_add_ps(c0, c2);
4997 c4 = _mm_add_ps(c4, c6);
4998 c8 = _mm_add_ps(c8, cA);
4999 cC = _mm_add_ps(cC, cE);
5000 c0 = _mm_add_ps(c0, c4);
5001 c8 = _mm_add_ps(c8, cC);
5002 c0 = _mm_add_ps(c0, c8);
Nicolas Capensc39901e2016-03-21 16:37:44 -04005003 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005004
5005 _mm_store_ps((float*)(source0 + 8 * x), c0);
5006 }
5007
5008 source0 += pitch;
5009 source1 += pitch;
5010 source2 += pitch;
5011 source3 += pitch;
5012 source4 += pitch;
5013 source5 += pitch;
5014 source6 += pitch;
5015 source7 += pitch;
5016 source8 += pitch;
5017 source9 += pitch;
5018 sourceA += pitch;
5019 sourceB += pitch;
5020 sourceC += pitch;
5021 sourceD += pitch;
5022 sourceE += pitch;
5023 sourceF += pitch;
5024 }
5025 }
5026 else ASSERT(false);
5027 }
5028 else
5029 {
5030 if(internal.depth == 2)
5031 {
5032 for(int y = 0; y < height; y++)
5033 {
5034 for(int x = 0; x < 2 * width; x++)
5035 {
5036 float c0 = *(float*)(source0 + 4 * x);
5037 float c1 = *(float*)(source1 + 4 * x);
5038
5039 c0 = c0 + c1;
5040 c0 *= 1.0f / 2.0f;
5041
5042 *(float*)(source0 + 4 * x) = c0;
5043 }
5044
5045 source0 += pitch;
5046 source1 += pitch;
5047 }
5048 }
5049 else if(internal.depth == 4)
5050 {
5051 for(int y = 0; y < height; y++)
5052 {
5053 for(int x = 0; x < 2 * width; x++)
5054 {
5055 float c0 = *(float*)(source0 + 4 * x);
5056 float c1 = *(float*)(source1 + 4 * x);
5057 float c2 = *(float*)(source2 + 4 * x);
5058 float c3 = *(float*)(source3 + 4 * x);
5059
5060 c0 = c0 + c1;
5061 c2 = c2 + c3;
5062 c0 = c0 + c2;
5063 c0 *= 1.0f / 4.0f;
5064
5065 *(float*)(source0 + 4 * x) = c0;
5066 }
5067
5068 source0 += pitch;
5069 source1 += pitch;
5070 source2 += pitch;
5071 source3 += pitch;
5072 }
5073 }
5074 else if(internal.depth == 8)
5075 {
5076 for(int y = 0; y < height; y++)
5077 {
5078 for(int x = 0; x < 2 * width; x++)
5079 {
5080 float c0 = *(float*)(source0 + 4 * x);
5081 float c1 = *(float*)(source1 + 4 * x);
5082 float c2 = *(float*)(source2 + 4 * x);
5083 float c3 = *(float*)(source3 + 4 * x);
5084 float c4 = *(float*)(source4 + 4 * x);
5085 float c5 = *(float*)(source5 + 4 * x);
5086 float c6 = *(float*)(source6 + 4 * x);
5087 float c7 = *(float*)(source7 + 4 * x);
5088
5089 c0 = c0 + c1;
5090 c2 = c2 + c3;
5091 c4 = c4 + c5;
5092 c6 = c6 + c7;
5093 c0 = c0 + c2;
5094 c4 = c4 + c6;
5095 c0 = c0 + c4;
5096 c0 *= 1.0f / 8.0f;
5097
5098 *(float*)(source0 + 4 * x) = c0;
5099 }
5100
5101 source0 += pitch;
5102 source1 += pitch;
5103 source2 += pitch;
5104 source3 += pitch;
5105 source4 += pitch;
5106 source5 += pitch;
5107 source6 += pitch;
5108 source7 += pitch;
5109 }
5110 }
5111 else if(internal.depth == 16)
5112 {
5113 for(int y = 0; y < height; y++)
5114 {
5115 for(int x = 0; x < 2 * width; x++)
5116 {
5117 float c0 = *(float*)(source0 + 4 * x);
5118 float c1 = *(float*)(source1 + 4 * x);
5119 float c2 = *(float*)(source2 + 4 * x);
5120 float c3 = *(float*)(source3 + 4 * x);
5121 float c4 = *(float*)(source4 + 4 * x);
5122 float c5 = *(float*)(source5 + 4 * x);
5123 float c6 = *(float*)(source6 + 4 * x);
5124 float c7 = *(float*)(source7 + 4 * x);
5125 float c8 = *(float*)(source8 + 4 * x);
5126 float c9 = *(float*)(source9 + 4 * x);
5127 float cA = *(float*)(sourceA + 4 * x);
5128 float cB = *(float*)(sourceB + 4 * x);
5129 float cC = *(float*)(sourceC + 4 * x);
5130 float cD = *(float*)(sourceD + 4 * x);
5131 float cE = *(float*)(sourceE + 4 * x);
5132 float cF = *(float*)(sourceF + 4 * x);
5133
5134 c0 = c0 + c1;
5135 c2 = c2 + c3;
5136 c4 = c4 + c5;
5137 c6 = c6 + c7;
5138 c8 = c8 + c9;
5139 cA = cA + cB;
5140 cC = cC + cD;
5141 cE = cE + cF;
5142 c0 = c0 + c2;
5143 c4 = c4 + c6;
5144 c8 = c8 + cA;
5145 cC = cC + cE;
5146 c0 = c0 + c4;
5147 c8 = c8 + cC;
5148 c0 = c0 + c8;
5149 c0 *= 1.0f / 16.0f;
5150
5151 *(float*)(source0 + 4 * x) = c0;
5152 }
5153
5154 source0 += pitch;
5155 source1 += pitch;
5156 source2 += pitch;
5157 source3 += pitch;
5158 source4 += pitch;
5159 source5 += pitch;
5160 source6 += pitch;
5161 source7 += pitch;
5162 source8 += pitch;
5163 source9 += pitch;
5164 sourceA += pitch;
5165 sourceB += pitch;
5166 sourceC += pitch;
5167 sourceD += pitch;
5168 sourceE += pitch;
5169 sourceF += pitch;
5170 }
5171 }
5172 else ASSERT(false);
5173 }
5174 }
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04005175 else if(internal.format == FORMAT_A32B32G32R32F || internal.format == FORMAT_X32B32G32R32F)
John Bauman89401822014-05-06 15:04:28 -04005176 {
5177 if(CPUID::supportsSSE())
5178 {
5179 if(internal.depth == 2)
5180 {
5181 for(int y = 0; y < height; y++)
5182 {
5183 for(int x = 0; x < width; x++)
5184 {
5185 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5186 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005187
John Bauman89401822014-05-06 15:04:28 -04005188 c0 = _mm_add_ps(c0, c1);
Nicolas Capensc39901e2016-03-21 16:37:44 -04005189 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04005190
5191 _mm_store_ps((float*)(source0 + 16 * x), c0);
5192 }
5193
5194 source0 += pitch;
5195 source1 += pitch;
5196 }
5197 }
5198 else if(internal.depth == 4)
5199 {
5200 for(int y = 0; y < height; y++)
5201 {
5202 for(int x = 0; x < width; x++)
5203 {
5204 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5205 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5206 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5207 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005208
John Bauman89401822014-05-06 15:04:28 -04005209 c0 = _mm_add_ps(c0, c1);
5210 c2 = _mm_add_ps(c2, c3);
5211 c0 = _mm_add_ps(c0, c2);
Nicolas Capensc39901e2016-03-21 16:37:44 -04005212 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04005213
5214 _mm_store_ps((float*)(source0 + 16 * x), c0);
5215 }
5216
5217 source0 += pitch;
5218 source1 += pitch;
5219 source2 += pitch;
5220 source3 += pitch;
5221 }
5222 }
5223 else if(internal.depth == 8)
5224 {
5225 for(int y = 0; y < height; y++)
5226 {
5227 for(int x = 0; x < width; x++)
5228 {
5229 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5230 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5231 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5232 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5233 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5234 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5235 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5236 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005237
John Bauman89401822014-05-06 15:04:28 -04005238 c0 = _mm_add_ps(c0, c1);
5239 c2 = _mm_add_ps(c2, c3);
5240 c4 = _mm_add_ps(c4, c5);
5241 c6 = _mm_add_ps(c6, c7);
5242 c0 = _mm_add_ps(c0, c2);
5243 c4 = _mm_add_ps(c4, c6);
5244 c0 = _mm_add_ps(c0, c4);
Nicolas Capensc39901e2016-03-21 16:37:44 -04005245 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04005246
5247 _mm_store_ps((float*)(source0 + 16 * x), c0);
5248 }
5249
5250 source0 += pitch;
5251 source1 += pitch;
5252 source2 += pitch;
5253 source3 += pitch;
5254 source4 += pitch;
5255 source5 += pitch;
5256 source6 += pitch;
5257 source7 += pitch;
5258 }
5259 }
5260 else if(internal.depth == 16)
5261 {
5262 for(int y = 0; y < height; y++)
5263 {
5264 for(int x = 0; x < width; x++)
5265 {
5266 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5267 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5268 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5269 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5270 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5271 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5272 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5273 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
5274 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x));
5275 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x));
5276 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x));
5277 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x));
5278 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x));
5279 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x));
5280 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x));
5281 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x));
5282
5283 c0 = _mm_add_ps(c0, c1);
5284 c2 = _mm_add_ps(c2, c3);
5285 c4 = _mm_add_ps(c4, c5);
5286 c6 = _mm_add_ps(c6, c7);
5287 c8 = _mm_add_ps(c8, c9);
5288 cA = _mm_add_ps(cA, cB);
5289 cC = _mm_add_ps(cC, cD);
5290 cE = _mm_add_ps(cE, cF);
5291 c0 = _mm_add_ps(c0, c2);
5292 c4 = _mm_add_ps(c4, c6);
5293 c8 = _mm_add_ps(c8, cA);
5294 cC = _mm_add_ps(cC, cE);
5295 c0 = _mm_add_ps(c0, c4);
5296 c8 = _mm_add_ps(c8, cC);
5297 c0 = _mm_add_ps(c0, c8);
Nicolas Capensc39901e2016-03-21 16:37:44 -04005298 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005299
5300 _mm_store_ps((float*)(source0 + 16 * x), c0);
5301 }
5302
5303 source0 += pitch;
5304 source1 += pitch;
5305 source2 += pitch;
5306 source3 += pitch;
5307 source4 += pitch;
5308 source5 += pitch;
5309 source6 += pitch;
5310 source7 += pitch;
5311 source8 += pitch;
5312 source9 += pitch;
5313 sourceA += pitch;
5314 sourceB += pitch;
5315 sourceC += pitch;
5316 sourceD += pitch;
5317 sourceE += pitch;
5318 sourceF += pitch;
5319 }
5320 }
5321 else ASSERT(false);
5322 }
5323 else
5324 {
5325 if(internal.depth == 2)
5326 {
5327 for(int y = 0; y < height; y++)
5328 {
5329 for(int x = 0; x < 4 * width; x++)
5330 {
5331 float c0 = *(float*)(source0 + 4 * x);
5332 float c1 = *(float*)(source1 + 4 * x);
5333
5334 c0 = c0 + c1;
5335 c0 *= 1.0f / 2.0f;
5336
5337 *(float*)(source0 + 4 * x) = c0;
5338 }
5339
5340 source0 += pitch;
5341 source1 += pitch;
5342 }
5343 }
5344 else if(internal.depth == 4)
5345 {
5346 for(int y = 0; y < height; y++)
5347 {
5348 for(int x = 0; x < 4 * width; x++)
5349 {
5350 float c0 = *(float*)(source0 + 4 * x);
5351 float c1 = *(float*)(source1 + 4 * x);
5352 float c2 = *(float*)(source2 + 4 * x);
5353 float c3 = *(float*)(source3 + 4 * x);
5354
5355 c0 = c0 + c1;
5356 c2 = c2 + c3;
5357 c0 = c0 + c2;
5358 c0 *= 1.0f / 4.0f;
5359
5360 *(float*)(source0 + 4 * x) = c0;
5361 }
5362
5363 source0 += pitch;
5364 source1 += pitch;
5365 source2 += pitch;
5366 source3 += pitch;
5367 }
5368 }
5369 else if(internal.depth == 8)
5370 {
5371 for(int y = 0; y < height; y++)
5372 {
5373 for(int x = 0; x < 4 * width; x++)
5374 {
5375 float c0 = *(float*)(source0 + 4 * x);
5376 float c1 = *(float*)(source1 + 4 * x);
5377 float c2 = *(float*)(source2 + 4 * x);
5378 float c3 = *(float*)(source3 + 4 * x);
5379 float c4 = *(float*)(source4 + 4 * x);
5380 float c5 = *(float*)(source5 + 4 * x);
5381 float c6 = *(float*)(source6 + 4 * x);
5382 float c7 = *(float*)(source7 + 4 * x);
5383
5384 c0 = c0 + c1;
5385 c2 = c2 + c3;
5386 c4 = c4 + c5;
5387 c6 = c6 + c7;
5388 c0 = c0 + c2;
5389 c4 = c4 + c6;
5390 c0 = c0 + c4;
5391 c0 *= 1.0f / 8.0f;
5392
5393 *(float*)(source0 + 4 * x) = c0;
5394 }
5395
5396 source0 += pitch;
5397 source1 += pitch;
5398 source2 += pitch;
5399 source3 += pitch;
5400 source4 += pitch;
5401 source5 += pitch;
5402 source6 += pitch;
5403 source7 += pitch;
5404 }
5405 }
5406 else if(internal.depth == 16)
5407 {
5408 for(int y = 0; y < height; y++)
5409 {
5410 for(int x = 0; x < 4 * width; x++)
5411 {
5412 float c0 = *(float*)(source0 + 4 * x);
5413 float c1 = *(float*)(source1 + 4 * x);
5414 float c2 = *(float*)(source2 + 4 * x);
5415 float c3 = *(float*)(source3 + 4 * x);
5416 float c4 = *(float*)(source4 + 4 * x);
5417 float c5 = *(float*)(source5 + 4 * x);
5418 float c6 = *(float*)(source6 + 4 * x);
5419 float c7 = *(float*)(source7 + 4 * x);
5420 float c8 = *(float*)(source8 + 4 * x);
5421 float c9 = *(float*)(source9 + 4 * x);
5422 float cA = *(float*)(sourceA + 4 * x);
5423 float cB = *(float*)(sourceB + 4 * x);
5424 float cC = *(float*)(sourceC + 4 * x);
5425 float cD = *(float*)(sourceD + 4 * x);
5426 float cE = *(float*)(sourceE + 4 * x);
5427 float cF = *(float*)(sourceF + 4 * x);
5428
5429 c0 = c0 + c1;
5430 c2 = c2 + c3;
5431 c4 = c4 + c5;
5432 c6 = c6 + c7;
5433 c8 = c8 + c9;
5434 cA = cA + cB;
5435 cC = cC + cD;
5436 cE = cE + cF;
5437 c0 = c0 + c2;
5438 c4 = c4 + c6;
5439 c8 = c8 + cA;
5440 cC = cC + cE;
5441 c0 = c0 + c4;
5442 c8 = c8 + cC;
5443 c0 = c0 + c8;
5444 c0 *= 1.0f / 16.0f;
5445
5446 *(float*)(source0 + 4 * x) = c0;
5447 }
5448
5449 source0 += pitch;
5450 source1 += pitch;
5451 source2 += pitch;
5452 source3 += pitch;
5453 source4 += pitch;
5454 source5 += pitch;
5455 source6 += pitch;
5456 source7 += pitch;
5457 source8 += pitch;
5458 source9 += pitch;
5459 sourceA += pitch;
5460 sourceB += pitch;
5461 sourceC += pitch;
5462 sourceD += pitch;
5463 sourceE += pitch;
5464 sourceF += pitch;
5465 }
5466 }
5467 else ASSERT(false);
5468 }
5469 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005470 else if(internal.format == FORMAT_R5G6B5)
5471 {
5472 if(CPUID::supportsSSE2() && (width % 8) == 0)
5473 {
5474 if(internal.depth == 2)
5475 {
5476 for(int y = 0; y < height; y++)
5477 {
5478 for(int x = 0; x < width; x += 8)
5479 {
5480 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5481 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005482
Nicolas Capens0e12a922015-09-04 09:18:15 -04005483 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5484 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5485 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5486 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5487 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5488 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5489
5490 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5491 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5492 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5493 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5494 c0 = _mm_or_si128(c0, c1);
5495
5496 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5497 }
5498
5499 source0 += pitch;
5500 source1 += pitch;
5501 }
5502 }
5503 else if(internal.depth == 4)
5504 {
5505 for(int y = 0; y < height; y++)
5506 {
5507 for(int x = 0; x < width; x += 8)
5508 {
5509 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5510 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5511 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5512 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005513
Nicolas Capens0e12a922015-09-04 09:18:15 -04005514 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5515 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5516 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5517 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5518 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5519 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5520 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5521 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5522 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5523 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5524
5525 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5526 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5527 c0 = _mm_avg_epu8(c0, c2);
5528 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5529 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5530 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5531 c1 = _mm_avg_epu16(c1, c3);
5532 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5533 c0 = _mm_or_si128(c0, c1);
5534
5535 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5536 }
5537
5538 source0 += pitch;
5539 source1 += pitch;
5540 source2 += pitch;
5541 source3 += pitch;
5542 }
5543 }
5544 else if(internal.depth == 8)
5545 {
5546 for(int y = 0; y < height; y++)
5547 {
5548 for(int x = 0; x < width; x += 8)
5549 {
5550 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5551 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5552 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5553 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5554 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5555 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5556 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5557 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005558
Nicolas Capens0e12a922015-09-04 09:18:15 -04005559 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5560 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5561 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5562 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5563 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5564 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5565 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5566 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5567 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5568 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5569 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5570 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5571 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5572 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5573 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5574 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5575 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5576 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
5577
5578 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5579 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5580 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5581 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5582 c0 = _mm_avg_epu8(c0, c2);
5583 c4 = _mm_avg_epu8(c4, c6);
5584 c0 = _mm_avg_epu8(c0, c4);
5585 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5586 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5587 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5588 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5589 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5590 c1 = _mm_avg_epu16(c1, c3);
5591 c5 = _mm_avg_epu16(c5, c7);
5592 c1 = _mm_avg_epu16(c1, c5);
5593 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5594 c0 = _mm_or_si128(c0, c1);
5595
5596 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5597 }
5598
5599 source0 += pitch;
5600 source1 += pitch;
5601 source2 += pitch;
5602 source3 += pitch;
5603 source4 += pitch;
5604 source5 += pitch;
5605 source6 += pitch;
5606 source7 += pitch;
5607 }
5608 }
5609 else if(internal.depth == 16)
5610 {
5611 for(int y = 0; y < height; y++)
5612 {
5613 for(int x = 0; x < width; x += 8)
5614 {
5615 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5616 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5617 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5618 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5619 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5620 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5621 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5622 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
5623 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x));
5624 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x));
5625 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x));
5626 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x));
5627 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x));
5628 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x));
5629 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x));
5630 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x));
5631
5632 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5633 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5634 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5635 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5636 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5637 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5638 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5639 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5640 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5641 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5642 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5643 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5644 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5645 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5646 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5647 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5648 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5649 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
5650 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b));
5651 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_));
5652 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b));
5653 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_));
5654 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b));
5655 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_));
5656 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b));
5657 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_));
5658 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b));
5659 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_));
5660 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b));
5661 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_));
5662 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b));
5663 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_));
5664 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b));
5665 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_));
5666
5667 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5668 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5669 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5670 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5671 c8 = _mm_avg_epu8(c8_r_b, c9_r_b);
5672 cA = _mm_avg_epu8(cA_r_b, cB_r_b);
5673 cC = _mm_avg_epu8(cC_r_b, cD_r_b);
5674 cE = _mm_avg_epu8(cE_r_b, cF_r_b);
5675 c0 = _mm_avg_epu8(c0, c2);
5676 c4 = _mm_avg_epu8(c4, c6);
5677 c8 = _mm_avg_epu8(c8, cA);
5678 cC = _mm_avg_epu8(cC, cE);
5679 c0 = _mm_avg_epu8(c0, c4);
5680 c8 = _mm_avg_epu8(c8, cC);
5681 c0 = _mm_avg_epu8(c0, c8);
5682 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5683 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5684 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5685 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5686 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5687 c9 = _mm_avg_epu16(c8__g_, c9__g_);
5688 cB = _mm_avg_epu16(cA__g_, cB__g_);
5689 cD = _mm_avg_epu16(cC__g_, cD__g_);
5690 cF = _mm_avg_epu16(cE__g_, cF__g_);
5691 c1 = _mm_avg_epu8(c1, c3);
5692 c5 = _mm_avg_epu8(c5, c7);
5693 c9 = _mm_avg_epu8(c9, cB);
5694 cD = _mm_avg_epu8(cD, cF);
5695 c1 = _mm_avg_epu8(c1, c5);
5696 c9 = _mm_avg_epu8(c9, cD);
5697 c1 = _mm_avg_epu8(c1, c9);
5698 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5699 c0 = _mm_or_si128(c0, c1);
5700
5701 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5702 }
5703
5704 source0 += pitch;
5705 source1 += pitch;
5706 source2 += pitch;
5707 source3 += pitch;
5708 source4 += pitch;
5709 source5 += pitch;
5710 source6 += pitch;
5711 source7 += pitch;
5712 source8 += pitch;
5713 source9 += pitch;
5714 sourceA += pitch;
5715 sourceB += pitch;
5716 sourceC += pitch;
5717 sourceD += pitch;
5718 sourceE += pitch;
5719 sourceF += pitch;
5720 }
5721 }
5722 else ASSERT(false);
5723 }
5724 else
5725 {
5726 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821))
5727
5728 if(internal.depth == 2)
5729 {
5730 for(int y = 0; y < height; y++)
5731 {
5732 for(int x = 0; x < width; x++)
5733 {
5734 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5735 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5736
5737 c0 = AVERAGE(c0, c1);
5738
5739 *(unsigned short*)(source0 + 2 * x) = c0;
5740 }
5741
5742 source0 += pitch;
5743 source1 += pitch;
5744 }
5745 }
5746 else if(internal.depth == 4)
5747 {
5748 for(int y = 0; y < height; y++)
5749 {
5750 for(int x = 0; x < width; x++)
5751 {
5752 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5753 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5754 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5755 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5756
5757 c0 = AVERAGE(c0, c1);
5758 c2 = AVERAGE(c2, c3);
5759 c0 = AVERAGE(c0, c2);
5760
5761 *(unsigned short*)(source0 + 2 * x) = c0;
5762 }
5763
5764 source0 += pitch;
5765 source1 += pitch;
5766 source2 += pitch;
5767 source3 += pitch;
5768 }
5769 }
5770 else if(internal.depth == 8)
5771 {
5772 for(int y = 0; y < height; y++)
5773 {
5774 for(int x = 0; x < width; x++)
5775 {
5776 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5777 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5778 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5779 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5780 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
5781 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
5782 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
5783 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
5784
5785 c0 = AVERAGE(c0, c1);
5786 c2 = AVERAGE(c2, c3);
5787 c4 = AVERAGE(c4, c5);
5788 c6 = AVERAGE(c6, c7);
5789 c0 = AVERAGE(c0, c2);
5790 c4 = AVERAGE(c4, c6);
5791 c0 = AVERAGE(c0, c4);
5792
5793 *(unsigned short*)(source0 + 2 * x) = c0;
5794 }
5795
5796 source0 += pitch;
5797 source1 += pitch;
5798 source2 += pitch;
5799 source3 += pitch;
5800 source4 += pitch;
5801 source5 += pitch;
5802 source6 += pitch;
5803 source7 += pitch;
5804 }
5805 }
5806 else if(internal.depth == 16)
5807 {
5808 for(int y = 0; y < height; y++)
5809 {
5810 for(int x = 0; x < width; x++)
5811 {
5812 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5813 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5814 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5815 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5816 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
5817 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
5818 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
5819 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
5820 unsigned short c8 = *(unsigned short*)(source8 + 2 * x);
5821 unsigned short c9 = *(unsigned short*)(source9 + 2 * x);
5822 unsigned short cA = *(unsigned short*)(sourceA + 2 * x);
5823 unsigned short cB = *(unsigned short*)(sourceB + 2 * x);
5824 unsigned short cC = *(unsigned short*)(sourceC + 2 * x);
5825 unsigned short cD = *(unsigned short*)(sourceD + 2 * x);
5826 unsigned short cE = *(unsigned short*)(sourceE + 2 * x);
5827 unsigned short cF = *(unsigned short*)(sourceF + 2 * x);
5828
5829 c0 = AVERAGE(c0, c1);
5830 c2 = AVERAGE(c2, c3);
5831 c4 = AVERAGE(c4, c5);
5832 c6 = AVERAGE(c6, c7);
5833 c8 = AVERAGE(c8, c9);
5834 cA = AVERAGE(cA, cB);
5835 cC = AVERAGE(cC, cD);
5836 cE = AVERAGE(cE, cF);
5837 c0 = AVERAGE(c0, c2);
5838 c4 = AVERAGE(c4, c6);
5839 c8 = AVERAGE(c8, cA);
5840 cC = AVERAGE(cC, cE);
5841 c0 = AVERAGE(c0, c4);
5842 c8 = AVERAGE(c8, cC);
5843 c0 = AVERAGE(c0, c8);
5844
5845 *(unsigned short*)(source0 + 2 * x) = c0;
5846 }
5847
5848 source0 += pitch;
5849 source1 += pitch;
5850 source2 += pitch;
5851 source3 += pitch;
5852 source4 += pitch;
5853 source5 += pitch;
5854 source6 += pitch;
5855 source7 += pitch;
5856 source8 += pitch;
5857 source9 += pitch;
5858 sourceA += pitch;
5859 sourceB += pitch;
5860 sourceC += pitch;
5861 sourceD += pitch;
5862 sourceE += pitch;
5863 sourceF += pitch;
5864 }
5865 }
5866 else ASSERT(false);
5867
5868 #undef AVERAGE
5869 }
5870 }
John Bauman89401822014-05-06 15:04:28 -04005871 else
5872 {
5873 // UNIMPLEMENTED();
5874 }
5875 }
5876}