blob: 4eb77ca1ba1201c6d864336ef8030066be279cd2 [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
15#include "Surface.hpp"
16
17#include "Color.hpp"
18#include "Context.hpp"
Alexis Hetu0de50d42015-09-09 13:56:41 -040019#include "ETC_Decoder.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040020#include "Renderer.hpp"
John Bauman89401822014-05-06 15:04:28 -040021#include "Common/Half.hpp"
22#include "Common/Memory.hpp"
23#include "Common/CPUID.hpp"
24#include "Common/Resource.hpp"
25#include "Common/Debug.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040026#include "Reactor/Reactor.hpp"
John Bauman89401822014-05-06 15:04:28 -040027
Nicolas Capens47dc8672017-04-25 12:54:39 -040028#if defined(__i386__) || defined(__x86_64__)
29 #include <xmmintrin.h>
30 #include <emmintrin.h>
31#endif
John Bauman89401822014-05-06 15:04:28 -040032
33#undef min
34#undef max
35
36namespace sw
37{
38 extern bool quadLayoutEnabled;
39 extern bool complementaryDepthBuffer;
40 extern TranscendentalPrecision logPrecision;
41
42 unsigned int *Surface::palette = 0;
43 unsigned int Surface::paletteID = 0;
Alexis Hetu147f6682017-02-09 17:14:34 -050044 void Surface::typeinfo() {}
John Bauman89401822014-05-06 15:04:28 -040045
John Bauman19bac1e2014-05-06 15:23:49 -040046 void Rect::clip(int minX, int minY, int maxX, int maxY)
47 {
Nicolas Capens22658242014-11-29 00:31:41 -050048 x0 = clamp(x0, minX, maxX);
49 y0 = clamp(y0, minY, maxY);
50 x1 = clamp(x1, minX, maxX);
51 y1 = clamp(y1, minY, maxY);
John Bauman19bac1e2014-05-06 15:23:49 -040052 }
53
John Bauman89401822014-05-06 15:04:28 -040054 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color)
55 {
56 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
57
58 write(element, color);
59 }
60
61 void Surface::Buffer::write(int x, int y, const Color<float> &color)
62 {
63 void *element = (unsigned char*)buffer + x * bytes + y * pitchB;
64
65 write(element, color);
66 }
67
68 inline void Surface::Buffer::write(void *element, const Color<float> &color)
69 {
70 switch(format)
71 {
72 case FORMAT_A8:
73 *(unsigned char*)element = unorm<8>(color.a);
74 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040075 case FORMAT_R8I_SNORM:
76 *(char*)element = snorm<8>(color.r);
77 break;
John Bauman89401822014-05-06 15:04:28 -040078 case FORMAT_R8:
79 *(unsigned char*)element = unorm<8>(color.r);
80 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040081 case FORMAT_R8I:
82 *(char*)element = scast<8>(color.r);
83 break;
84 case FORMAT_R8UI:
85 *(unsigned char*)element = ucast<8>(color.r);
86 break;
87 case FORMAT_R16I:
88 *(short*)element = scast<16>(color.r);
89 break;
90 case FORMAT_R16UI:
91 *(unsigned short*)element = ucast<16>(color.r);
92 break;
93 case FORMAT_R32I:
94 *(int*)element = static_cast<int>(color.r);
95 break;
96 case FORMAT_R32UI:
97 *(unsigned int*)element = static_cast<unsigned int>(color.r);
98 break;
John Bauman89401822014-05-06 15:04:28 -040099 case FORMAT_R3G3B2:
100 *(unsigned char*)element = (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
101 break;
102 case FORMAT_A8R3G3B2:
103 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
104 break;
105 case FORMAT_X4R4G4B4:
106 *(unsigned short*)element = 0xF000 | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
107 break;
108 case FORMAT_A4R4G4B4:
109 *(unsigned short*)element = (unorm<4>(color.a) << 12) | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
110 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400111 case FORMAT_R4G4B4A4:
112 *(unsigned short*)element = (unorm<4>(color.r) << 12) | (unorm<4>(color.g) << 8) | (unorm<4>(color.b) << 4) | (unorm<4>(color.a) << 0);
113 break;
John Bauman89401822014-05-06 15:04:28 -0400114 case FORMAT_R5G6B5:
115 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<6>(color.g) << 5) | (unorm<5>(color.b) << 0);
116 break;
117 case FORMAT_A1R5G5B5:
118 *(unsigned short*)element = (unorm<1>(color.a) << 15) | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
119 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400120 case FORMAT_R5G5B5A1:
121 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<5>(color.g) << 6) | (unorm<5>(color.b) << 1) | (unorm<5>(color.a) << 0);
122 break;
John Bauman89401822014-05-06 15:04:28 -0400123 case FORMAT_X1R5G5B5:
124 *(unsigned short*)element = 0x8000 | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
125 break;
126 case FORMAT_A8R8G8B8:
127 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
128 break;
129 case FORMAT_X8R8G8B8:
130 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
131 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400132 case FORMAT_A8B8G8R8I_SNORM:
133 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(color.a)) << 24) |
134 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
135 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
136 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
137 break;
John Bauman89401822014-05-06 15:04:28 -0400138 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400139 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -0400140 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
141 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400142 case FORMAT_A8B8G8R8I:
143 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(color.a)) << 24) |
144 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
145 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
146 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
147 break;
148 case FORMAT_A8B8G8R8UI:
149 *(unsigned int*)element = (ucast<8>(color.a) << 24) | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
150 break;
151 case FORMAT_X8B8G8R8I_SNORM:
152 *(unsigned int*)element = 0x7F000000 |
153 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
154 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
155 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
156 break;
John Bauman89401822014-05-06 15:04:28 -0400157 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400158 case FORMAT_SRGB8_X8:
John Bauman89401822014-05-06 15:04:28 -0400159 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
160 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400161 case FORMAT_X8B8G8R8I:
162 *(unsigned int*)element = 0x7F000000 |
163 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
164 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
165 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
166 case FORMAT_X8B8G8R8UI:
167 *(unsigned int*)element = 0xFF000000 | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
168 break;
John Bauman89401822014-05-06 15:04:28 -0400169 case FORMAT_A2R10G10B10:
170 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.r) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.b) << 0);
171 break;
172 case FORMAT_A2B10G10R10:
173 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.b) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.r) << 0);
174 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400175 case FORMAT_G8R8I_SNORM:
176 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(color.g)) << 8) |
177 (static_cast<unsigned short>(snorm<8>(color.r)) << 0);
178 break;
John Bauman89401822014-05-06 15:04:28 -0400179 case FORMAT_G8R8:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400180 *(unsigned short*)element = (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
181 break;
182 case FORMAT_G8R8I:
183 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(color.g)) << 8) |
184 (static_cast<unsigned short>(scast<8>(color.r)) << 0);
185 break;
186 case FORMAT_G8R8UI:
187 *(unsigned short*)element = (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400188 break;
189 case FORMAT_G16R16:
190 *(unsigned int*)element = (unorm<16>(color.g) << 16) | (unorm<16>(color.r) << 0);
191 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400192 case FORMAT_G16R16I:
193 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(color.g)) << 16) |
194 (static_cast<unsigned int>(scast<16>(color.r)) << 0);
195 break;
196 case FORMAT_G16R16UI:
197 *(unsigned int*)element = (ucast<16>(color.g) << 16) | (ucast<16>(color.r) << 0);
198 break;
199 case FORMAT_G32R32I:
200 case FORMAT_G32R32UI:
201 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
202 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
203 break;
John Bauman89401822014-05-06 15:04:28 -0400204 case FORMAT_A16B16G16R16:
205 ((unsigned short*)element)[0] = unorm<16>(color.r);
206 ((unsigned short*)element)[1] = unorm<16>(color.g);
207 ((unsigned short*)element)[2] = unorm<16>(color.b);
208 ((unsigned short*)element)[3] = unorm<16>(color.a);
209 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400210 case FORMAT_A16B16G16R16I:
211 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
212 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
213 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
214 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(color.a));
215 break;
216 case FORMAT_A16B16G16R16UI:
217 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
218 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
219 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
220 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(color.a));
221 break;
222 case FORMAT_X16B16G16R16I:
223 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
224 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
225 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
226 break;
227 case FORMAT_X16B16G16R16UI:
228 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
229 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
230 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
231 break;
232 case FORMAT_A32B32G32R32I:
233 case FORMAT_A32B32G32R32UI:
234 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
235 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
236 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
237 ((unsigned int*)element)[3] = static_cast<unsigned int>(color.a);
238 break;
239 case FORMAT_X32B32G32R32I:
240 case FORMAT_X32B32G32R32UI:
241 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
242 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
243 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
244 break;
John Bauman89401822014-05-06 15:04:28 -0400245 case FORMAT_V8U8:
246 *(unsigned short*)element = (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
247 break;
248 case FORMAT_L6V5U5:
249 *(unsigned short*)element = (unorm<6>(color.b) << 10) | (snorm<5>(color.g) << 5) | (snorm<5>(color.r) << 0);
250 break;
251 case FORMAT_Q8W8V8U8:
252 *(unsigned int*)element = (snorm<8>(color.a) << 24) | (snorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
253 break;
254 case FORMAT_X8L8V8U8:
255 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
256 break;
257 case FORMAT_V16U16:
258 *(unsigned int*)element = (snorm<16>(color.g) << 16) | (snorm<16>(color.r) << 0);
259 break;
260 case FORMAT_A2W10V10U10:
261 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (snorm<10>(color.b) << 20) | (snorm<10>(color.g) << 10) | (snorm<10>(color.r) << 0);
262 break;
263 case FORMAT_A16W16V16U16:
264 ((unsigned short*)element)[0] = snorm<16>(color.r);
265 ((unsigned short*)element)[1] = snorm<16>(color.g);
266 ((unsigned short*)element)[2] = snorm<16>(color.b);
267 ((unsigned short*)element)[3] = unorm<16>(color.a);
268 break;
269 case FORMAT_Q16W16V16U16:
270 ((unsigned short*)element)[0] = snorm<16>(color.r);
271 ((unsigned short*)element)[1] = snorm<16>(color.g);
272 ((unsigned short*)element)[2] = snorm<16>(color.b);
273 ((unsigned short*)element)[3] = snorm<16>(color.a);
274 break;
275 case FORMAT_R8G8B8:
276 ((unsigned char*)element)[0] = unorm<8>(color.b);
277 ((unsigned char*)element)[1] = unorm<8>(color.g);
278 ((unsigned char*)element)[2] = unorm<8>(color.r);
279 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400280 case FORMAT_B8G8R8:
281 ((unsigned char*)element)[0] = unorm<8>(color.r);
282 ((unsigned char*)element)[1] = unorm<8>(color.g);
283 ((unsigned char*)element)[2] = unorm<8>(color.b);
284 break;
John Bauman89401822014-05-06 15:04:28 -0400285 case FORMAT_R16F:
286 *(half*)element = (half)color.r;
287 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400288 case FORMAT_A16F:
289 *(half*)element = (half)color.a;
290 break;
John Bauman89401822014-05-06 15:04:28 -0400291 case FORMAT_G16R16F:
292 ((half*)element)[0] = (half)color.r;
293 ((half*)element)[1] = (half)color.g;
294 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400295 case FORMAT_B16G16R16F:
296 ((half*)element)[0] = (half)color.r;
297 ((half*)element)[1] = (half)color.g;
298 ((half*)element)[2] = (half)color.b;
299 break;
John Bauman89401822014-05-06 15:04:28 -0400300 case FORMAT_A16B16G16R16F:
301 ((half*)element)[0] = (half)color.r;
302 ((half*)element)[1] = (half)color.g;
303 ((half*)element)[2] = (half)color.b;
304 ((half*)element)[3] = (half)color.a;
305 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400306 case FORMAT_A32F:
307 *(float*)element = color.a;
308 break;
John Bauman89401822014-05-06 15:04:28 -0400309 case FORMAT_R32F:
310 *(float*)element = color.r;
311 break;
312 case FORMAT_G32R32F:
313 ((float*)element)[0] = color.r;
314 ((float*)element)[1] = color.g;
315 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -0400316 case FORMAT_X32B32G32R32F:
317 ((float*)element)[3] = 1.0f;
Nicolas Capens80594422015-06-09 16:42:56 -0400318 case FORMAT_B32G32R32F:
319 ((float*)element)[0] = color.r;
320 ((float*)element)[1] = color.g;
321 ((float*)element)[2] = color.b;
322 break;
John Bauman89401822014-05-06 15:04:28 -0400323 case FORMAT_A32B32G32R32F:
324 ((float*)element)[0] = color.r;
325 ((float*)element)[1] = color.g;
326 ((float*)element)[2] = color.b;
327 ((float*)element)[3] = color.a;
328 break;
329 case FORMAT_D32F:
330 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400331 case FORMAT_D32FS8_TEXTURE:
332 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400333 *((float*)element) = color.r;
334 break;
335 case FORMAT_D32F_COMPLEMENTARY:
336 *((float*)element) = 1 - color.r;
337 break;
338 case FORMAT_S8:
339 *((unsigned char*)element) = unorm<8>(color.r);
340 break;
341 case FORMAT_L8:
342 *(unsigned char*)element = unorm<8>(color.r);
343 break;
344 case FORMAT_A4L4:
345 *(unsigned char*)element = (unorm<4>(color.a) << 4) | (unorm<4>(color.r) << 0);
346 break;
347 case FORMAT_L16:
348 *(unsigned short*)element = unorm<16>(color.r);
349 break;
350 case FORMAT_A8L8:
351 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<8>(color.r) << 0);
352 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400353 case FORMAT_L16F:
354 *(half*)element = (half)color.r;
355 break;
356 case FORMAT_A16L16F:
357 ((half*)element)[0] = (half)color.r;
358 ((half*)element)[1] = (half)color.a;
359 break;
360 case FORMAT_L32F:
361 *(float*)element = color.r;
362 break;
363 case FORMAT_A32L32F:
364 ((float*)element)[0] = color.r;
365 ((float*)element)[1] = color.a;
366 break;
John Bauman89401822014-05-06 15:04:28 -0400367 default:
368 ASSERT(false);
369 }
370 }
371
372 Color<float> Surface::Buffer::read(int x, int y, int z) const
373 {
374 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
375
376 return read(element);
377 }
378
379 Color<float> Surface::Buffer::read(int x, int y) const
380 {
381 void *element = (unsigned char*)buffer + x * bytes + y * pitchB;
382
383 return read(element);
384 }
385
386 inline Color<float> Surface::Buffer::read(void *element) const
387 {
Nicolas Capens3f439242015-06-09 16:33:50 -0400388 float r = 0.0f;
389 float g = 0.0f;
390 float b = 0.0f;
391 float a = 1.0f;
John Bauman89401822014-05-06 15:04:28 -0400392
393 switch(format)
394 {
395 case FORMAT_P8:
396 {
397 ASSERT(palette);
398
399 unsigned int abgr = palette[*(unsigned char*)element];
Nicolas Capensc39901e2016-03-21 16:37:44 -0400400
John Bauman89401822014-05-06 15:04:28 -0400401 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
402 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
403 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
404 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
405 }
406 break;
407 case FORMAT_A8P8:
408 {
409 ASSERT(palette);
410
411 unsigned int bgr = palette[((unsigned char*)element)[0]];
Nicolas Capensc39901e2016-03-21 16:37:44 -0400412
John Bauman89401822014-05-06 15:04:28 -0400413 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF);
414 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00);
415 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000);
416 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
417 }
418 break;
419 case FORMAT_A8:
420 r = 0;
421 g = 0;
422 b = 0;
423 a = *(unsigned char*)element * (1.0f / 0xFF);
424 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400425 case FORMAT_R8I_SNORM:
426 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f);
427 break;
John Bauman89401822014-05-06 15:04:28 -0400428 case FORMAT_R8:
429 r = *(unsigned char*)element * (1.0f / 0xFF);
430 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400431 case FORMAT_R8I:
432 r = *(signed char*)element;
433 break;
434 case FORMAT_R8UI:
435 r = *(unsigned char*)element;
436 break;
John Bauman89401822014-05-06 15:04:28 -0400437 case FORMAT_R3G3B2:
438 {
439 unsigned char rgb = *(unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400440
John Bauman89401822014-05-06 15:04:28 -0400441 r = (rgb & 0xE0) * (1.0f / 0xE0);
442 g = (rgb & 0x1C) * (1.0f / 0x1C);
443 b = (rgb & 0x03) * (1.0f / 0x03);
444 }
445 break;
446 case FORMAT_A8R3G3B2:
447 {
448 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400449
John Bauman89401822014-05-06 15:04:28 -0400450 a = (argb & 0xFF00) * (1.0f / 0xFF00);
451 r = (argb & 0x00E0) * (1.0f / 0x00E0);
452 g = (argb & 0x001C) * (1.0f / 0x001C);
453 b = (argb & 0x0003) * (1.0f / 0x0003);
454 }
455 break;
456 case FORMAT_X4R4G4B4:
457 {
458 unsigned short rgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400459
John Bauman89401822014-05-06 15:04:28 -0400460 r = (rgb & 0x0F00) * (1.0f / 0x0F00);
461 g = (rgb & 0x00F0) * (1.0f / 0x00F0);
462 b = (rgb & 0x000F) * (1.0f / 0x000F);
463 }
464 break;
465 case FORMAT_A4R4G4B4:
466 {
467 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400468
John Bauman89401822014-05-06 15:04:28 -0400469 a = (argb & 0xF000) * (1.0f / 0xF000);
470 r = (argb & 0x0F00) * (1.0f / 0x0F00);
471 g = (argb & 0x00F0) * (1.0f / 0x00F0);
472 b = (argb & 0x000F) * (1.0f / 0x000F);
473 }
474 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400475 case FORMAT_R4G4B4A4:
476 {
477 unsigned short rgba = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400478
Nicolas Capens80594422015-06-09 16:42:56 -0400479 r = (rgba & 0xF000) * (1.0f / 0xF000);
480 g = (rgba & 0x0F00) * (1.0f / 0x0F00);
481 b = (rgba & 0x00F0) * (1.0f / 0x00F0);
482 a = (rgba & 0x000F) * (1.0f / 0x000F);
483 }
484 break;
John Bauman89401822014-05-06 15:04:28 -0400485 case FORMAT_R5G6B5:
486 {
487 unsigned short rgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400488
John Bauman89401822014-05-06 15:04:28 -0400489 r = (rgb & 0xF800) * (1.0f / 0xF800);
490 g = (rgb & 0x07E0) * (1.0f / 0x07E0);
491 b = (rgb & 0x001F) * (1.0f / 0x001F);
492 }
493 break;
494 case FORMAT_A1R5G5B5:
495 {
496 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400497
John Bauman89401822014-05-06 15:04:28 -0400498 a = (argb & 0x8000) * (1.0f / 0x8000);
499 r = (argb & 0x7C00) * (1.0f / 0x7C00);
500 g = (argb & 0x03E0) * (1.0f / 0x03E0);
501 b = (argb & 0x001F) * (1.0f / 0x001F);
502 }
503 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400504 case FORMAT_R5G5B5A1:
505 {
506 unsigned short rgba = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400507
Nicolas Capens80594422015-06-09 16:42:56 -0400508 r = (rgba & 0xF800) * (1.0f / 0xF800);
509 g = (rgba & 0x07C0) * (1.0f / 0x07C0);
510 b = (rgba & 0x003E) * (1.0f / 0x003E);
511 a = (rgba & 0x0001) * (1.0f / 0x0001);
512 }
513 break;
John Bauman89401822014-05-06 15:04:28 -0400514 case FORMAT_X1R5G5B5:
515 {
516 unsigned short xrgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400517
John Bauman89401822014-05-06 15:04:28 -0400518 r = (xrgb & 0x7C00) * (1.0f / 0x7C00);
519 g = (xrgb & 0x03E0) * (1.0f / 0x03E0);
520 b = (xrgb & 0x001F) * (1.0f / 0x001F);
521 }
522 break;
523 case FORMAT_A8R8G8B8:
524 {
525 unsigned int argb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400526
John Bauman89401822014-05-06 15:04:28 -0400527 a = (argb & 0xFF000000) * (1.0f / 0xFF000000);
528 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000);
529 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00);
530 b = (argb & 0x000000FF) * (1.0f / 0x000000FF);
531 }
532 break;
533 case FORMAT_X8R8G8B8:
534 {
535 unsigned int xrgb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400536
John Bauman89401822014-05-06 15:04:28 -0400537 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000);
538 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00);
539 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF);
540 }
541 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400542 case FORMAT_A8B8G8R8I_SNORM:
543 {
544 signed char* abgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400545
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400546 r = max(abgr[0] * (1.0f / 0x7F), -1.0f);
547 g = max(abgr[1] * (1.0f / 0x7F), -1.0f);
548 b = max(abgr[2] * (1.0f / 0x7F), -1.0f);
549 a = max(abgr[3] * (1.0f / 0x7F), -1.0f);
550 }
551 break;
John Bauman89401822014-05-06 15:04:28 -0400552 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400553 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -0400554 {
555 unsigned int abgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400556
John Bauman89401822014-05-06 15:04:28 -0400557 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
558 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
559 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
560 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
561 }
562 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400563 case FORMAT_A8B8G8R8I:
564 {
565 signed char* abgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400566
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400567 r = abgr[0];
568 g = abgr[1];
569 b = abgr[2];
570 a = abgr[3];
571 }
572 break;
573 case FORMAT_A8B8G8R8UI:
574 {
575 unsigned char* abgr = (unsigned char*)element;
576
577 r = abgr[0];
578 g = abgr[1];
579 b = abgr[2];
580 a = abgr[3];
581 }
582 break;
583 case FORMAT_X8B8G8R8I_SNORM:
584 {
585 signed char* bgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400586
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400587 r = max(bgr[0] * (1.0f / 0x7F), -1.0f);
588 g = max(bgr[1] * (1.0f / 0x7F), -1.0f);
589 b = max(bgr[2] * (1.0f / 0x7F), -1.0f);
590 }
591 break;
John Bauman89401822014-05-06 15:04:28 -0400592 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400593 case FORMAT_SRGB8_X8:
John Bauman89401822014-05-06 15:04:28 -0400594 {
595 unsigned int xbgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400596
John Bauman89401822014-05-06 15:04:28 -0400597 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000);
598 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00);
599 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF);
600 }
601 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400602 case FORMAT_X8B8G8R8I:
603 {
604 signed char* bgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400605
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400606 r = bgr[0];
607 g = bgr[1];
608 b = bgr[2];
609 }
610 break;
611 case FORMAT_X8B8G8R8UI:
612 {
613 unsigned char* bgr = (unsigned char*)element;
614
615 r = bgr[0];
616 g = bgr[1];
617 b = bgr[2];
618 }
619 break;
620 case FORMAT_G8R8I_SNORM:
621 {
622 signed char* gr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400623
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400624 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00);
625 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF);
626 }
627 break;
John Bauman89401822014-05-06 15:04:28 -0400628 case FORMAT_G8R8:
629 {
630 unsigned short gr = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400631
John Bauman89401822014-05-06 15:04:28 -0400632 g = (gr & 0xFF00) * (1.0f / 0xFF00);
633 r = (gr & 0x00FF) * (1.0f / 0x00FF);
634 }
635 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400636 case FORMAT_G8R8I:
637 {
638 signed char* gr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400639
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400640 r = gr[0];
641 g = gr[1];
642 }
643 break;
644 case FORMAT_G8R8UI:
645 {
646 unsigned char* gr = (unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400647
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400648 r = gr[0];
649 g = gr[1];
650 }
651 break;
652 case FORMAT_R16I:
653 r = *((short*)element);
654 break;
655 case FORMAT_R16UI:
656 r = *((unsigned short*)element);
657 break;
658 case FORMAT_G16R16I:
659 {
660 short* gr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400661
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400662 r = gr[0];
663 g = gr[1];
664 }
665 break;
John Bauman89401822014-05-06 15:04:28 -0400666 case FORMAT_G16R16:
667 {
668 unsigned int gr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400669
John Bauman89401822014-05-06 15:04:28 -0400670 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000);
671 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF);
672 }
673 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400674 case FORMAT_G16R16UI:
675 {
676 unsigned short* gr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400677
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400678 r = gr[0];
679 g = gr[1];
680 }
681 break;
John Bauman89401822014-05-06 15:04:28 -0400682 case FORMAT_A2R10G10B10:
683 {
684 unsigned int argb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400685
John Bauman89401822014-05-06 15:04:28 -0400686 a = (argb & 0xC0000000) * (1.0f / 0xC0000000);
687 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000);
688 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00);
689 b = (argb & 0x000003FF) * (1.0f / 0x000003FF);
690 }
691 break;
692 case FORMAT_A2B10G10R10:
693 {
694 unsigned int abgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400695
John Bauman89401822014-05-06 15:04:28 -0400696 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000);
697 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000);
698 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00);
699 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF);
700 }
701 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400702 case FORMAT_A16B16G16R16I:
703 {
704 short* abgr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400705
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400706 r = abgr[0];
707 g = abgr[1];
708 b = abgr[2];
709 a = abgr[3];
710 }
711 break;
John Bauman89401822014-05-06 15:04:28 -0400712 case FORMAT_A16B16G16R16:
713 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF);
714 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF);
715 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF);
716 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
717 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400718 case FORMAT_A16B16G16R16UI:
719 {
720 unsigned short* abgr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400721
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400722 r = abgr[0];
723 g = abgr[1];
724 b = abgr[2];
725 a = abgr[3];
726 }
727 break;
728 case FORMAT_X16B16G16R16I:
729 {
730 short* bgr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400731
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400732 r = bgr[0];
733 g = bgr[1];
734 b = bgr[2];
735 }
736 break;
737 case FORMAT_X16B16G16R16UI:
738 {
739 unsigned short* bgr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400740
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400741 r = bgr[0];
742 g = bgr[1];
743 b = bgr[2];
744 }
745 break;
746 case FORMAT_A32B32G32R32I:
747 {
748 int* abgr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400749
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400750 r = static_cast<float>(abgr[0]);
751 g = static_cast<float>(abgr[1]);
752 b = static_cast<float>(abgr[2]);
753 a = static_cast<float>(abgr[3]);
754 }
755 break;
756 case FORMAT_A32B32G32R32UI:
757 {
758 unsigned int* abgr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400759
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400760 r = static_cast<float>(abgr[0]);
761 g = static_cast<float>(abgr[1]);
762 b = static_cast<float>(abgr[2]);
763 a = static_cast<float>(abgr[3]);
764 }
765 break;
766 case FORMAT_X32B32G32R32I:
767 {
768 int* bgr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400769
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400770 r = static_cast<float>(bgr[0]);
771 g = static_cast<float>(bgr[1]);
772 b = static_cast<float>(bgr[2]);
773 }
774 break;
775 case FORMAT_X32B32G32R32UI:
776 {
777 unsigned int* bgr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400778
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400779 r = static_cast<float>(bgr[0]);
780 g = static_cast<float>(bgr[1]);
781 b = static_cast<float>(bgr[2]);
782 }
783 break;
784 case FORMAT_G32R32I:
785 {
786 int* gr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400787
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400788 r = static_cast<float>(gr[0]);
789 g = static_cast<float>(gr[1]);
790 }
791 break;
792 case FORMAT_G32R32UI:
793 {
794 unsigned int* gr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400795
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400796 r = static_cast<float>(gr[0]);
797 g = static_cast<float>(gr[1]);
798 }
799 break;
800 case FORMAT_R32I:
801 r = static_cast<float>(*((int*)element));
802 break;
803 case FORMAT_R32UI:
804 r = static_cast<float>(*((unsigned int*)element));
805 break;
John Bauman89401822014-05-06 15:04:28 -0400806 case FORMAT_V8U8:
807 {
808 unsigned short vu = *(unsigned short*)element;
809
810 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000);
811 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000);
812 }
813 break;
814 case FORMAT_L6V5U5:
815 {
816 unsigned short lvu = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400817
John Bauman89401822014-05-06 15:04:28 -0400818 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000);
819 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000);
820 b = (lvu & 0xFC00) * (1.0f / 0xFC00);
821 }
822 break;
823 case FORMAT_Q8W8V8U8:
824 {
825 unsigned int qwvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400826
John Bauman89401822014-05-06 15:04:28 -0400827 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
828 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
829 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000);
830 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000);
831 }
832 break;
833 case FORMAT_X8L8V8U8:
834 {
835 unsigned int xlvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400836
John Bauman89401822014-05-06 15:04:28 -0400837 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
838 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
839 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000);
840 }
841 break;
842 case FORMAT_R8G8B8:
843 r = ((unsigned char*)element)[2] * (1.0f / 0xFF);
844 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
845 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
846 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400847 case FORMAT_B8G8R8:
848 r = ((unsigned char*)element)[0] * (1.0f / 0xFF);
849 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
850 b = ((unsigned char*)element)[2] * (1.0f / 0xFF);
851 break;
John Bauman89401822014-05-06 15:04:28 -0400852 case FORMAT_V16U16:
853 {
854 unsigned int vu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400855
John Bauman89401822014-05-06 15:04:28 -0400856 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000);
857 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000);
858 }
859 break;
860 case FORMAT_A2W10V10U10:
861 {
862 unsigned int awvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400863
John Bauman89401822014-05-06 15:04:28 -0400864 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000);
865 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000);
866 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000);
867 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000);
868 }
869 break;
870 case FORMAT_A16W16V16U16:
871 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
872 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
873 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
874 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
875 break;
876 case FORMAT_Q16W16V16U16:
877 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
878 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
879 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
880 a = ((signed short*)element)[3] * (1.0f / 0x7FFF);
881 break;
882 case FORMAT_L8:
883 r =
884 g =
885 b = *(unsigned char*)element * (1.0f / 0xFF);
886 break;
887 case FORMAT_A4L4:
888 {
889 unsigned char al = *(unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400890
John Bauman89401822014-05-06 15:04:28 -0400891 r =
892 g =
893 b = (al & 0x0F) * (1.0f / 0x0F);
894 a = (al & 0xF0) * (1.0f / 0xF0);
895 }
896 break;
897 case FORMAT_L16:
898 r =
899 g =
900 b = *(unsigned short*)element * (1.0f / 0xFFFF);
901 break;
902 case FORMAT_A8L8:
903 r =
904 g =
905 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
906 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
907 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400908 case FORMAT_L16F:
909 r =
910 g =
911 b = *(half*)element;
912 break;
913 case FORMAT_A16L16F:
914 r =
915 g =
916 b = ((half*)element)[0];
917 a = ((half*)element)[1];
918 break;
919 case FORMAT_L32F:
920 r =
921 g =
922 b = *(float*)element;
923 break;
924 case FORMAT_A32L32F:
925 r =
926 g =
927 b = ((float*)element)[0];
928 a = ((float*)element)[1];
929 break;
930 case FORMAT_A16F:
931 a = *(half*)element;
932 break;
John Bauman89401822014-05-06 15:04:28 -0400933 case FORMAT_R16F:
934 r = *(half*)element;
935 break;
936 case FORMAT_G16R16F:
937 r = ((half*)element)[0];
938 g = ((half*)element)[1];
939 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400940 case FORMAT_B16G16R16F:
941 r = ((half*)element)[0];
942 g = ((half*)element)[1];
943 b = ((half*)element)[2];
944 break;
John Bauman89401822014-05-06 15:04:28 -0400945 case FORMAT_A16B16G16R16F:
946 r = ((half*)element)[0];
947 g = ((half*)element)[1];
948 b = ((half*)element)[2];
949 a = ((half*)element)[3];
950 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400951 case FORMAT_A32F:
952 a = *(float*)element;
953 break;
John Bauman89401822014-05-06 15:04:28 -0400954 case FORMAT_R32F:
955 r = *(float*)element;
956 break;
957 case FORMAT_G32R32F:
958 r = ((float*)element)[0];
959 g = ((float*)element)[1];
960 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -0400961 case FORMAT_X32B32G32R32F:
Nicolas Capens80594422015-06-09 16:42:56 -0400962 case FORMAT_B32G32R32F:
963 r = ((float*)element)[0];
964 g = ((float*)element)[1];
965 b = ((float*)element)[2];
966 break;
John Bauman89401822014-05-06 15:04:28 -0400967 case FORMAT_A32B32G32R32F:
968 r = ((float*)element)[0];
969 g = ((float*)element)[1];
970 b = ((float*)element)[2];
971 a = ((float*)element)[3];
972 break;
973 case FORMAT_D32F:
974 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400975 case FORMAT_D32FS8_TEXTURE:
976 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400977 r = *(float*)element;
978 g = r;
979 b = r;
980 a = r;
981 break;
982 case FORMAT_D32F_COMPLEMENTARY:
John Bauman66b8ab22014-05-06 15:57:45 -0400983 r = 1.0f - *(float*)element;
John Bauman89401822014-05-06 15:04:28 -0400984 g = r;
985 b = r;
986 a = r;
987 break;
988 case FORMAT_S8:
989 r = *(unsigned char*)element * (1.0f / 0xFF);
990 break;
991 default:
992 ASSERT(false);
993 }
994
995 // if(sRGB)
996 // {
997 // r = sRGBtoLinear(r);
998 // g = sRGBtoLinear(g);
999 // b = sRGBtoLinear(b);
1000 // }
1001
1002 return Color<float>(r, g, b, a);
1003 }
1004
1005 Color<float> Surface::Buffer::sample(float x, float y, float z) const
1006 {
1007 x -= 0.5f;
1008 y -= 0.5f;
1009 z -= 0.5f;
1010
1011 int x0 = clamp((int)x, 0, width - 1);
1012 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1013
1014 int y0 = clamp((int)y, 0, height - 1);
1015 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1016
1017 int z0 = clamp((int)z, 0, depth - 1);
1018 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1;
1019
1020 Color<float> c000 = read(x0, y0, z0);
1021 Color<float> c100 = read(x1, y0, z0);
1022 Color<float> c010 = read(x0, y1, z0);
1023 Color<float> c110 = read(x1, y1, z0);
1024 Color<float> c001 = read(x0, y0, z1);
1025 Color<float> c101 = read(x1, y0, z1);
1026 Color<float> c011 = read(x0, y1, z1);
1027 Color<float> c111 = read(x1, y1, z1);
1028
1029 float fx = x - x0;
1030 float fy = y - y0;
1031 float fz = z - z0;
1032
1033 c000 *= (1 - fx) * (1 - fy) * (1 - fz);
1034 c100 *= fx * (1 - fy) * (1 - fz);
1035 c010 *= (1 - fx) * fy * (1 - fz);
1036 c110 *= fx * fy * (1 - fz);
1037 c001 *= (1 - fx) * (1 - fy) * fz;
1038 c101 *= fx * (1 - fy) * fz;
1039 c011 *= (1 - fx) * fy * fz;
1040 c111 *= fx * fy * fz;
1041
1042 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111;
1043 }
1044
1045 Color<float> Surface::Buffer::sample(float x, float y) const
1046 {
1047 x -= 0.5f;
1048 y -= 0.5f;
1049
1050 int x0 = clamp((int)x, 0, width - 1);
1051 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1052
1053 int y0 = clamp((int)y, 0, height - 1);
1054 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1055
1056 Color<float> c00 = read(x0, y0);
1057 Color<float> c10 = read(x1, y0);
1058 Color<float> c01 = read(x0, y1);
1059 Color<float> c11 = read(x1, y1);
1060
1061 float fx = x - x0;
1062 float fy = y - y0;
1063
1064 c00 *= (1 - fx) * (1 - fy);
1065 c10 *= fx * (1 - fy);
1066 c01 *= (1 - fx) * fy;
1067 c11 *= fx * fy;
1068
1069 return c00 + c10 + c01 + c11;
1070 }
1071
John Bauman19bac1e2014-05-06 15:23:49 -04001072 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock)
John Bauman89401822014-05-06 15:04:28 -04001073 {
1074 this->lock = lock;
1075
1076 switch(lock)
1077 {
1078 case LOCK_UNLOCKED:
1079 case LOCK_READONLY:
1080 break;
1081 case LOCK_WRITEONLY:
1082 case LOCK_READWRITE:
1083 case LOCK_DISCARD:
1084 dirty = true;
1085 break;
1086 default:
1087 ASSERT(false);
1088 }
1089
John Baumand4ae8632014-05-06 16:18:33 -04001090 if(buffer)
John Bauman89401822014-05-06 15:04:28 -04001091 {
John Baumand4ae8632014-05-06 16:18:33 -04001092 switch(format)
1093 {
1094 #if S3TC_SUPPORT
1095 case FORMAT_DXT1:
1096 #endif
1097 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05001098 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001099 case FORMAT_R11_EAC:
1100 case FORMAT_SIGNED_R11_EAC:
1101 case FORMAT_RGB8_ETC2:
1102 case FORMAT_SRGB8_ETC2:
1103 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1104 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Baumand4ae8632014-05-06 16:18:33 -04001105 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB;
Alexis Hetu460e41f2015-09-01 10:58:37 -04001106 case FORMAT_RG11_EAC:
1107 case FORMAT_SIGNED_RG11_EAC:
1108 case FORMAT_RGBA8_ETC2_EAC:
1109 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1110 case FORMAT_RGBA_ASTC_4x4_KHR:
1111 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1112 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1113 case FORMAT_RGBA_ASTC_5x4_KHR:
1114 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1115 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB;
1116 case FORMAT_RGBA_ASTC_5x5_KHR:
1117 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1118 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB;
1119 case FORMAT_RGBA_ASTC_6x5_KHR:
1120 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1121 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB;
1122 case FORMAT_RGBA_ASTC_6x6_KHR:
1123 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1124 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB;
1125 case FORMAT_RGBA_ASTC_8x5_KHR:
1126 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1127 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB;
1128 case FORMAT_RGBA_ASTC_8x6_KHR:
1129 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1130 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB;
1131 case FORMAT_RGBA_ASTC_8x8_KHR:
1132 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1133 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB;
1134 case FORMAT_RGBA_ASTC_10x5_KHR:
1135 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1136 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB;
1137 case FORMAT_RGBA_ASTC_10x6_KHR:
1138 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1139 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB;
1140 case FORMAT_RGBA_ASTC_10x8_KHR:
1141 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1142 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB;
1143 case FORMAT_RGBA_ASTC_10x10_KHR:
1144 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1145 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB;
1146 case FORMAT_RGBA_ASTC_12x10_KHR:
1147 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1148 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB;
1149 case FORMAT_RGBA_ASTC_12x12_KHR:
1150 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1151 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB;
John Baumand4ae8632014-05-06 16:18:33 -04001152 #if S3TC_SUPPORT
1153 case FORMAT_DXT3:
1154 case FORMAT_DXT5:
1155 #endif
1156 case FORMAT_ATI2:
1157 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1158 default:
1159 return (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
1160 }
John Bauman89401822014-05-06 15:04:28 -04001161 }
1162
1163 return 0;
1164 }
1165
1166 void Surface::Buffer::unlockRect()
1167 {
1168 lock = LOCK_UNLOCKED;
1169 }
1170
Nicolas Capens477314b2015-06-09 16:47:29 -04001171 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false)
1172 {
1173 resource = new Resource(0);
1174 hasParent = false;
1175 ownExternal = false;
1176 depth = max(1, depth);
1177
1178 external.buffer = pixels;
1179 external.width = width;
1180 external.height = height;
1181 external.depth = depth;
1182 external.format = format;
1183 external.bytes = bytes(external.format);
1184 external.pitchB = pitch;
Nicolas Capens0a8d3d12016-02-12 17:10:58 -05001185 external.pitchP = external.bytes ? pitch / external.bytes : 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001186 external.sliceB = slice;
Nicolas Capens0a8d3d12016-02-12 17:10:58 -05001187 external.sliceP = external.bytes ? slice / external.bytes : 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001188 external.lock = LOCK_UNLOCKED;
1189 external.dirty = true;
1190
1191 internal.buffer = 0;
1192 internal.width = width;
1193 internal.height = height;
1194 internal.depth = depth;
1195 internal.format = selectInternalFormat(format);
1196 internal.bytes = bytes(internal.format);
1197 internal.pitchB = pitchB(internal.width, internal.format, false);
1198 internal.pitchP = pitchP(internal.width, internal.format, false);
1199 internal.sliceB = sliceB(internal.width, internal.height, internal.format, false);
1200 internal.sliceP = sliceP(internal.width, internal.height, internal.format, false);
1201 internal.lock = LOCK_UNLOCKED;
1202 internal.dirty = false;
1203
1204 stencil.buffer = 0;
1205 stencil.width = width;
1206 stencil.height = height;
1207 stencil.depth = depth;
1208 stencil.format = FORMAT_S8;
1209 stencil.bytes = bytes(stencil.format);
1210 stencil.pitchB = pitchB(stencil.width, stencil.format, false);
1211 stencil.pitchP = pitchP(stencil.width, stencil.format, false);
1212 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, false);
1213 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, false);
1214 stencil.lock = LOCK_UNLOCKED;
1215 stencil.dirty = false;
1216
1217 dirtyMipmaps = true;
1218 paletteUsed = 0;
1219 }
1220
Nicolas Capensf3898612015-11-24 15:33:31 -05001221 Surface::Surface(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget)
John Bauman89401822014-05-06 15:04:28 -04001222 {
1223 resource = texture ? texture : new Resource(0);
John Bauman19bac1e2014-05-06 15:23:49 -04001224 hasParent = texture != 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001225 ownExternal = true;
John Bauman89401822014-05-06 15:04:28 -04001226 depth = max(1, depth);
1227
1228 external.buffer = 0;
1229 external.width = width;
1230 external.height = height;
1231 external.depth = depth;
1232 external.format = format;
1233 external.bytes = bytes(external.format);
1234 external.pitchB = pitchB(external.width, external.format, renderTarget && !texture);
1235 external.pitchP = pitchP(external.width, external.format, renderTarget && !texture);
1236 external.sliceB = sliceB(external.width, external.height, external.format, renderTarget && !texture);
1237 external.sliceP = sliceP(external.width, external.height, external.format, renderTarget && !texture);
1238 external.lock = LOCK_UNLOCKED;
1239 external.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001240
1241 internal.buffer = 0;
1242 internal.width = width;
1243 internal.height = height;
1244 internal.depth = depth;
1245 internal.format = selectInternalFormat(format);
1246 internal.bytes = bytes(internal.format);
Nicolas Capensf3898612015-11-24 15:33:31 -05001247 internal.pitchB = !pitchPprovided ? pitchB(internal.width, internal.format, renderTarget) : pitchPprovided * internal.bytes;
1248 internal.pitchP = !pitchPprovided ? pitchP(internal.width, internal.format, renderTarget) : pitchPprovided;
John Bauman89401822014-05-06 15:04:28 -04001249 internal.sliceB = sliceB(internal.width, internal.height, internal.format, renderTarget);
1250 internal.sliceP = sliceP(internal.width, internal.height, internal.format, renderTarget);
1251 internal.lock = LOCK_UNLOCKED;
1252 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001253
1254 stencil.buffer = 0;
1255 stencil.width = width;
1256 stencil.height = height;
1257 stencil.depth = depth;
1258 stencil.format = FORMAT_S8;
1259 stencil.bytes = bytes(stencil.format);
1260 stencil.pitchB = pitchB(stencil.width, stencil.format, renderTarget);
1261 stencil.pitchP = pitchP(stencil.width, stencil.format, renderTarget);
1262 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, renderTarget);
1263 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, renderTarget);
1264 stencil.lock = LOCK_UNLOCKED;
1265 stencil.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001266
1267 dirtyMipmaps = true;
John Bauman66b8ab22014-05-06 15:57:45 -04001268 paletteUsed = 0;
John Bauman89401822014-05-06 15:04:28 -04001269 }
1270
1271 Surface::~Surface()
1272 {
John Bauman8a4f6fc2014-05-06 15:26:18 -04001273 // Synchronize so we can deallocate the buffers below
1274 resource->lock(DESTRUCT);
1275 resource->unlock();
1276
John Bauman89401822014-05-06 15:04:28 -04001277 if(!hasParent)
1278 {
1279 resource->destruct();
1280 }
1281
Nicolas Capens477314b2015-06-09 16:47:29 -04001282 if(ownExternal)
1283 {
1284 deallocate(external.buffer);
1285 }
John Bauman89401822014-05-06 15:04:28 -04001286
1287 if(internal.buffer != external.buffer)
1288 {
1289 deallocate(internal.buffer);
1290 }
1291
1292 deallocate(stencil.buffer);
1293
1294 external.buffer = 0;
1295 internal.buffer = 0;
1296 stencil.buffer = 0;
1297 }
1298
John Bauman19bac1e2014-05-06 15:23:49 -04001299 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001300 {
1301 resource->lock(client);
1302
1303 if(!external.buffer)
1304 {
1305 if(internal.buffer && identicalFormats())
1306 {
1307 external.buffer = internal.buffer;
1308 }
1309 else
1310 {
1311 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.format);
1312 }
1313 }
1314
1315 if(internal.dirty)
1316 {
1317 if(lock != LOCK_DISCARD)
1318 {
1319 update(external, internal);
1320 }
John Bauman66b8ab22014-05-06 15:57:45 -04001321
1322 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001323 }
1324
1325 switch(lock)
1326 {
1327 case LOCK_READONLY:
1328 break;
1329 case LOCK_WRITEONLY:
1330 case LOCK_READWRITE:
1331 case LOCK_DISCARD:
1332 dirtyMipmaps = true;
1333 break;
1334 default:
1335 ASSERT(false);
1336 }
1337
John Bauman19bac1e2014-05-06 15:23:49 -04001338 return external.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001339 }
1340
1341 void Surface::unlockExternal()
1342 {
1343 resource->unlock();
1344
1345 external.unlockRect();
1346 }
1347
John Bauman19bac1e2014-05-06 15:23:49 -04001348 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001349 {
1350 if(lock != LOCK_UNLOCKED)
1351 {
1352 resource->lock(client);
1353 }
1354
1355 if(!internal.buffer)
1356 {
1357 if(external.buffer && identicalFormats())
1358 {
1359 internal.buffer = external.buffer;
1360 }
1361 else
1362 {
1363 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.format);
1364 }
1365 }
1366
1367 // FIXME: WHQL requires conversion to lower external precision and back
1368 if(logPrecision >= WHQL)
1369 {
1370 if(internal.dirty && renderTarget && internal.format != external.format)
1371 {
1372 if(lock != LOCK_DISCARD)
1373 {
1374 switch(external.format)
1375 {
1376 case FORMAT_R3G3B2:
1377 case FORMAT_A8R3G3B2:
1378 case FORMAT_A1R5G5B5:
1379 case FORMAT_A2R10G10B10:
1380 case FORMAT_A2B10G10R10:
1381 lockExternal(0, 0, 0, LOCK_READWRITE, client);
1382 unlockExternal();
1383 break;
1384 default:
1385 // Difference passes WHQL
1386 break;
1387 }
1388 }
1389 }
1390 }
1391
John Bauman66b8ab22014-05-06 15:57:45 -04001392 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID))
John Bauman89401822014-05-06 15:04:28 -04001393 {
1394 if(lock != LOCK_DISCARD)
1395 {
1396 update(internal, external);
1397 }
John Bauman89401822014-05-06 15:04:28 -04001398
John Bauman66b8ab22014-05-06 15:57:45 -04001399 external.dirty = false;
1400 paletteUsed = Surface::paletteID;
John Bauman89401822014-05-06 15:04:28 -04001401 }
1402
1403 switch(lock)
1404 {
1405 case LOCK_UNLOCKED:
1406 case LOCK_READONLY:
1407 break;
1408 case LOCK_WRITEONLY:
1409 case LOCK_READWRITE:
1410 case LOCK_DISCARD:
1411 dirtyMipmaps = true;
1412 break;
1413 default:
1414 ASSERT(false);
1415 }
1416
1417 if(lock == LOCK_READONLY && client == PUBLIC)
1418 {
1419 resolve();
1420 }
1421
John Bauman19bac1e2014-05-06 15:23:49 -04001422 return internal.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001423 }
1424
1425 void Surface::unlockInternal()
1426 {
1427 resource->unlock();
1428
1429 internal.unlockRect();
1430 }
1431
Alexis Hetua52dfbd2016-10-05 17:03:30 -04001432 void *Surface::lockStencil(int x, int y, int front, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001433 {
1434 resource->lock(client);
1435
1436 if(!stencil.buffer)
1437 {
1438 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.format);
1439 }
1440
Alexis Hetua52dfbd2016-10-05 17:03:30 -04001441 return stencil.lockRect(x, y, front, LOCK_READWRITE); // FIXME
John Bauman89401822014-05-06 15:04:28 -04001442 }
1443
1444 void Surface::unlockStencil()
1445 {
1446 resource->unlock();
1447
1448 stencil.unlockRect();
1449 }
1450
1451 int Surface::bytes(Format format)
1452 {
1453 switch(format)
1454 {
1455 case FORMAT_NULL: return 0;
1456 case FORMAT_P8: return 1;
1457 case FORMAT_A8P8: return 2;
1458 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001459 case FORMAT_R8I: return 1;
John Bauman89401822014-05-06 15:04:28 -04001460 case FORMAT_R8: return 1;
1461 case FORMAT_R3G3B2: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001462 case FORMAT_R16I: return 2;
1463 case FORMAT_R16UI: return 2;
John Bauman89401822014-05-06 15:04:28 -04001464 case FORMAT_A8R3G3B2: return 2;
1465 case FORMAT_R5G6B5: return 2;
1466 case FORMAT_A1R5G5B5: return 2;
1467 case FORMAT_X1R5G5B5: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001468 case FORMAT_R5G5B5A1: return 2;
John Bauman89401822014-05-06 15:04:28 -04001469 case FORMAT_X4R4G4B4: return 2;
1470 case FORMAT_A4R4G4B4: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001471 case FORMAT_R4G4B4A4: return 2;
John Bauman89401822014-05-06 15:04:28 -04001472 case FORMAT_R8G8B8: return 3;
Nicolas Capens80594422015-06-09 16:42:56 -04001473 case FORMAT_B8G8R8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001474 case FORMAT_R32I: return 4;
1475 case FORMAT_R32UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001476 case FORMAT_X8R8G8B8: return 4;
1477 // case FORMAT_X8G8R8B8Q: return 4;
1478 case FORMAT_A8R8G8B8: return 4;
1479 // case FORMAT_A8G8R8B8Q: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001480 case FORMAT_X8B8G8R8I: return 4;
John Bauman89401822014-05-06 15:04:28 -04001481 case FORMAT_X8B8G8R8: return 4;
Alexis Hetu049a1872016-04-25 16:59:58 -04001482 case FORMAT_SRGB8_X8: return 4;
1483 case FORMAT_SRGB8_A8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001484 case FORMAT_A8B8G8R8I: return 4;
1485 case FORMAT_R8UI: return 1;
1486 case FORMAT_G8R8UI: return 2;
1487 case FORMAT_X8B8G8R8UI: return 4;
1488 case FORMAT_A8B8G8R8UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001489 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001490 case FORMAT_R8I_SNORM: return 1;
1491 case FORMAT_G8R8I_SNORM: return 2;
1492 case FORMAT_X8B8G8R8I_SNORM: return 4;
1493 case FORMAT_A8B8G8R8I_SNORM: return 4;
John Bauman89401822014-05-06 15:04:28 -04001494 case FORMAT_A2R10G10B10: return 4;
1495 case FORMAT_A2B10G10R10: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001496 case FORMAT_G8R8I: return 2;
John Bauman89401822014-05-06 15:04:28 -04001497 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001498 case FORMAT_G16R16I: return 4;
1499 case FORMAT_G16R16UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001500 case FORMAT_G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001501 case FORMAT_G32R32I: return 8;
1502 case FORMAT_G32R32UI: return 8;
1503 case FORMAT_X16B16G16R16I: return 8;
1504 case FORMAT_X16B16G16R16UI: return 8;
1505 case FORMAT_A16B16G16R16I: return 8;
1506 case FORMAT_A16B16G16R16UI: return 8;
John Bauman89401822014-05-06 15:04:28 -04001507 case FORMAT_A16B16G16R16: return 8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001508 case FORMAT_X32B32G32R32I: return 16;
1509 case FORMAT_X32B32G32R32UI: return 16;
1510 case FORMAT_A32B32G32R32I: return 16;
1511 case FORMAT_A32B32G32R32UI: return 16;
John Bauman89401822014-05-06 15:04:28 -04001512 // Compressed formats
1513 #if S3TC_SUPPORT
1514 case FORMAT_DXT1: return 2; // Column of four pixels
1515 case FORMAT_DXT3: return 4; // Column of four pixels
1516 case FORMAT_DXT5: return 4; // Column of four pixels
John Bauman66b8ab22014-05-06 15:57:45 -04001517 #endif
John Bauman89401822014-05-06 15:04:28 -04001518 case FORMAT_ATI1: return 2; // Column of four pixels
1519 case FORMAT_ATI2: return 4; // Column of four pixels
Nicolas Capens22658242014-11-29 00:31:41 -05001520 case FORMAT_ETC1: return 2; // Column of four pixels
Alexis Hetu460e41f2015-09-01 10:58:37 -04001521 case FORMAT_R11_EAC: return 2;
1522 case FORMAT_SIGNED_R11_EAC: return 2;
1523 case FORMAT_RG11_EAC: return 4;
1524 case FORMAT_SIGNED_RG11_EAC: return 4;
1525 case FORMAT_RGB8_ETC2: return 2;
1526 case FORMAT_SRGB8_ETC2: return 2;
1527 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1528 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1529 case FORMAT_RGBA8_ETC2_EAC: return 4;
1530 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4;
1531 case FORMAT_RGBA_ASTC_4x4_KHR:
1532 case FORMAT_RGBA_ASTC_5x4_KHR:
1533 case FORMAT_RGBA_ASTC_5x5_KHR:
1534 case FORMAT_RGBA_ASTC_6x5_KHR:
1535 case FORMAT_RGBA_ASTC_6x6_KHR:
1536 case FORMAT_RGBA_ASTC_8x5_KHR:
1537 case FORMAT_RGBA_ASTC_8x6_KHR:
1538 case FORMAT_RGBA_ASTC_8x8_KHR:
1539 case FORMAT_RGBA_ASTC_10x5_KHR:
1540 case FORMAT_RGBA_ASTC_10x6_KHR:
1541 case FORMAT_RGBA_ASTC_10x8_KHR:
1542 case FORMAT_RGBA_ASTC_10x10_KHR:
1543 case FORMAT_RGBA_ASTC_12x10_KHR:
1544 case FORMAT_RGBA_ASTC_12x12_KHR:
1545 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1546 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1547 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1548 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1549 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1550 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1551 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1552 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1553 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1554 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1555 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1556 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1557 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1558 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME
John Bauman89401822014-05-06 15:04:28 -04001559 // Bumpmap formats
1560 case FORMAT_V8U8: return 2;
1561 case FORMAT_L6V5U5: return 2;
1562 case FORMAT_Q8W8V8U8: return 4;
1563 case FORMAT_X8L8V8U8: return 4;
1564 case FORMAT_A2W10V10U10: return 4;
1565 case FORMAT_V16U16: return 4;
1566 case FORMAT_A16W16V16U16: return 8;
1567 case FORMAT_Q16W16V16U16: return 8;
1568 // Luminance formats
1569 case FORMAT_L8: return 1;
1570 case FORMAT_A4L4: return 1;
1571 case FORMAT_L16: return 2;
1572 case FORMAT_A8L8: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001573 case FORMAT_L16F: return 2;
1574 case FORMAT_A16L16F: return 4;
1575 case FORMAT_L32F: return 4;
1576 case FORMAT_A32L32F: return 8;
John Bauman89401822014-05-06 15:04:28 -04001577 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04001578 case FORMAT_A16F: return 2;
John Bauman89401822014-05-06 15:04:28 -04001579 case FORMAT_R16F: return 2;
1580 case FORMAT_G16R16F: return 4;
Nicolas Capens80594422015-06-09 16:42:56 -04001581 case FORMAT_B16G16R16F: return 6;
John Bauman89401822014-05-06 15:04:28 -04001582 case FORMAT_A16B16G16R16F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001583 case FORMAT_A32F: return 4;
John Bauman89401822014-05-06 15:04:28 -04001584 case FORMAT_R32F: return 4;
1585 case FORMAT_G32R32F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001586 case FORMAT_B32G32R32F: return 12;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04001587 case FORMAT_X32B32G32R32F: return 16;
John Bauman89401822014-05-06 15:04:28 -04001588 case FORMAT_A32B32G32R32F: return 16;
1589 // Depth/stencil formats
1590 case FORMAT_D16: return 2;
1591 case FORMAT_D32: return 4;
1592 case FORMAT_D24X8: return 4;
1593 case FORMAT_D24S8: return 4;
1594 case FORMAT_D24FS8: return 4;
1595 case FORMAT_D32F: return 4;
1596 case FORMAT_D32F_COMPLEMENTARY: return 4;
1597 case FORMAT_D32F_LOCKABLE: return 4;
John Bauman66b8ab22014-05-06 15:57:45 -04001598 case FORMAT_D32FS8_TEXTURE: return 4;
1599 case FORMAT_D32FS8_SHADOW: return 4;
1600 case FORMAT_DF24S8: return 4;
1601 case FORMAT_DF16S8: return 2;
John Bauman89401822014-05-06 15:04:28 -04001602 case FORMAT_INTZ: return 4;
1603 case FORMAT_S8: return 1;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001604 case FORMAT_YV12_BT601: return 1; // Y plane only
1605 case FORMAT_YV12_BT709: return 1; // Y plane only
1606 case FORMAT_YV12_JFIF: return 1; // Y plane only
John Bauman89401822014-05-06 15:04:28 -04001607 default:
1608 ASSERT(false);
1609 }
1610
1611 return 0;
1612 }
1613
1614 int Surface::pitchB(int width, Format format, bool target)
1615 {
1616 if(target || isDepth(format) || isStencil(format))
1617 {
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001618 width = align(width, 2);
John Bauman89401822014-05-06 15:04:28 -04001619 }
1620
1621 switch(format)
1622 {
1623 #if S3TC_SUPPORT
1624 case FORMAT_DXT1:
Nicolas Capens22658242014-11-29 00:31:41 -05001625 #endif
1626 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001627 case FORMAT_R11_EAC:
1628 case FORMAT_SIGNED_R11_EAC:
1629 case FORMAT_RGB8_ETC2:
1630 case FORMAT_SRGB8_ETC2:
1631 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1632 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04001633 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001634 case FORMAT_RG11_EAC:
1635 case FORMAT_SIGNED_RG11_EAC:
1636 case FORMAT_RGBA8_ETC2_EAC:
1637 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1638 case FORMAT_RGBA_ASTC_4x4_KHR:
1639 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1640 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
1641 case FORMAT_RGBA_ASTC_5x4_KHR:
1642 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1643 case FORMAT_RGBA_ASTC_5x5_KHR:
1644 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1645 return 16 * ((width + 4) / 5);
1646 case FORMAT_RGBA_ASTC_6x5_KHR:
1647 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1648 case FORMAT_RGBA_ASTC_6x6_KHR:
1649 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1650 return 16 * ((width + 5) / 6);
1651 case FORMAT_RGBA_ASTC_8x5_KHR:
1652 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1653 case FORMAT_RGBA_ASTC_8x6_KHR:
1654 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1655 case FORMAT_RGBA_ASTC_8x8_KHR:
1656 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1657 return 16 * ((width + 7) / 8);
1658 case FORMAT_RGBA_ASTC_10x5_KHR:
1659 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1660 case FORMAT_RGBA_ASTC_10x6_KHR:
1661 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1662 case FORMAT_RGBA_ASTC_10x8_KHR:
1663 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1664 case FORMAT_RGBA_ASTC_10x10_KHR:
1665 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1666 return 16 * ((width + 9) / 10);
1667 case FORMAT_RGBA_ASTC_12x10_KHR:
1668 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1669 case FORMAT_RGBA_ASTC_12x12_KHR:
1670 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1671 return 16 * ((width + 11) / 12);
Nicolas Capens22658242014-11-29 00:31:41 -05001672 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04001673 case FORMAT_DXT3:
1674 case FORMAT_DXT5:
1675 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
John Bauman66b8ab22014-05-06 15:57:45 -04001676 #endif
John Bauman89401822014-05-06 15:04:28 -04001677 case FORMAT_ATI1:
1678 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row
1679 case FORMAT_ATI2:
1680 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001681 case FORMAT_YV12_BT601:
1682 case FORMAT_YV12_BT709:
1683 case FORMAT_YV12_JFIF:
1684 return align(width, 16);
John Bauman89401822014-05-06 15:04:28 -04001685 default:
1686 return bytes(format) * width;
1687 }
1688 }
1689
1690 int Surface::pitchP(int width, Format format, bool target)
1691 {
1692 int B = bytes(format);
1693
1694 return B > 0 ? pitchB(width, format, target) / B : 0;
1695 }
1696
1697 int Surface::sliceB(int width, int height, Format format, bool target)
1698 {
1699 if(target || isDepth(format) || isStencil(format))
1700 {
1701 height = ((height + 1) & ~1);
1702 }
1703
1704 switch(format)
1705 {
1706 #if S3TC_SUPPORT
1707 case FORMAT_DXT1:
1708 case FORMAT_DXT3:
1709 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04001710 #endif
Nicolas Capens22658242014-11-29 00:31:41 -05001711 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001712 case FORMAT_R11_EAC:
1713 case FORMAT_SIGNED_R11_EAC:
1714 case FORMAT_RG11_EAC:
1715 case FORMAT_SIGNED_RG11_EAC:
1716 case FORMAT_RGB8_ETC2:
1717 case FORMAT_SRGB8_ETC2:
1718 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1719 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1720 case FORMAT_RGBA8_ETC2_EAC:
1721 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1722 case FORMAT_RGBA_ASTC_4x4_KHR:
1723 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1724 case FORMAT_RGBA_ASTC_5x4_KHR:
1725 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
Nicolas Capens22658242014-11-29 00:31:41 -05001726 return pitchB(width, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001727 case FORMAT_RGBA_ASTC_5x5_KHR:
1728 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1729 case FORMAT_RGBA_ASTC_6x5_KHR:
1730 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1731 case FORMAT_RGBA_ASTC_8x5_KHR:
1732 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1733 case FORMAT_RGBA_ASTC_10x5_KHR:
1734 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1735 return pitchB(width, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows
1736 case FORMAT_RGBA_ASTC_6x6_KHR:
1737 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1738 case FORMAT_RGBA_ASTC_8x6_KHR:
1739 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1740 case FORMAT_RGBA_ASTC_10x6_KHR:
1741 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1742 return pitchB(width, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows
1743 case FORMAT_RGBA_ASTC_8x8_KHR:
1744 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1745 case FORMAT_RGBA_ASTC_10x8_KHR:
1746 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1747 return pitchB(width, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows
1748 case FORMAT_RGBA_ASTC_10x10_KHR:
1749 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1750 case FORMAT_RGBA_ASTC_12x10_KHR:
1751 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1752 return pitchB(width, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows
1753 case FORMAT_RGBA_ASTC_12x12_KHR:
1754 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1755 return pitchB(width, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows
Nicolas Capens22658242014-11-29 00:31:41 -05001756 case FORMAT_ATI1:
1757 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04001758 default:
Nicolas Capens22658242014-11-29 00:31:41 -05001759 return pitchB(width, format, target) * height; // Pitch computed per row
John Bauman89401822014-05-06 15:04:28 -04001760 }
1761 }
1762
1763 int Surface::sliceP(int width, int height, Format format, bool target)
1764 {
1765 int B = bytes(format);
1766
1767 return B > 0 ? sliceB(width, height, format, target) / B : 0;
1768 }
1769
1770 void Surface::update(Buffer &destination, Buffer &source)
1771 {
1772 // ASSERT(source.lock != LOCK_UNLOCKED);
1773 // ASSERT(destination.lock != LOCK_UNLOCKED);
Nicolas Capensc39901e2016-03-21 16:37:44 -04001774
John Bauman89401822014-05-06 15:04:28 -04001775 if(destination.buffer != source.buffer)
1776 {
1777 ASSERT(source.dirty && !destination.dirty);
1778
1779 switch(source.format)
1780 {
1781 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001782 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format
1783 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format
1784 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format
1785 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format
1786 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format
1787 #if S3TC_SUPPORT
1788 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format
1789 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format
1790 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format
Nicolas Capens22658242014-11-29 00:31:41 -05001791 #endif
John Bauman89401822014-05-06 15:04:28 -04001792 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format
1793 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format
Alexis Hetu460e41f2015-09-01 10:58:37 -04001794 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format
1795 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format
1796 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format
1797 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format
Alexis Hetu0de50d42015-09-09 13:56:41 -04001798 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001799 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format
1800 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format
1801 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format
1802 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format
1803 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format
1804 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format
1805 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format
1806 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format
1807 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format
1808 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format
1809 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format
1810 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format
1811 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format
1812 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format
1813 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format
1814 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format
1815 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format
1816 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format
1817 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format
1818 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format
1819 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format
1820 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format
1821 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format
1822 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format
1823 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format
1824 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format
1825 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format
1826 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format
1827 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format
1828 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format
1829 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format
1830 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format
1831 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format
1832 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001833 default: genericUpdate(destination, source); break;
1834 }
1835 }
John Bauman89401822014-05-06 15:04:28 -04001836 }
1837
1838 void Surface::genericUpdate(Buffer &destination, Buffer &source)
1839 {
1840 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1841 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1842
1843 int depth = min(destination.depth, source.depth);
1844 int height = min(destination.height, source.height);
1845 int width = min(destination.width, source.width);
1846 int rowBytes = width * source.bytes;
1847
1848 for(int z = 0; z < depth; z++)
1849 {
1850 unsigned char *sourceRow = sourceSlice;
1851 unsigned char *destinationRow = destinationSlice;
1852
1853 for(int y = 0; y < height; y++)
1854 {
1855 if(source.format == destination.format)
1856 {
1857 memcpy(destinationRow, sourceRow, rowBytes);
1858 }
1859 else
1860 {
1861 unsigned char *sourceElement = sourceRow;
1862 unsigned char *destinationElement = destinationRow;
1863
1864 for(int x = 0; x < width; x++)
1865 {
1866 Color<float> color = source.read(sourceElement);
1867 destination.write(destinationElement, color);
1868
1869 sourceElement += source.bytes;
1870 destinationElement += destination.bytes;
1871 }
1872 }
1873
1874 sourceRow += source.pitchB;
1875 destinationRow += destination.pitchB;
1876 }
1877
1878 sourceSlice += source.sliceB;
1879 destinationSlice += destination.sliceB;
1880 }
1881 }
1882
1883 void Surface::decodeR8G8B8(Buffer &destination, const Buffer &source)
1884 {
1885 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1886 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1887
1888 for(int z = 0; z < destination.depth && z < source.depth; z++)
1889 {
1890 unsigned char *sourceRow = sourceSlice;
1891 unsigned char *destinationRow = destinationSlice;
1892
1893 for(int y = 0; y < destination.height && y < source.height; y++)
1894 {
1895 unsigned char *sourceElement = sourceRow;
1896 unsigned char *destinationElement = destinationRow;
1897
1898 for(int x = 0; x < destination.width && x < source.width; x++)
1899 {
1900 unsigned int b = sourceElement[0];
1901 unsigned int g = sourceElement[1];
1902 unsigned int r = sourceElement[2];
1903
1904 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0);
1905
1906 sourceElement += source.bytes;
1907 destinationElement += destination.bytes;
1908 }
1909
1910 sourceRow += source.pitchB;
1911 destinationRow += destination.pitchB;
1912 }
1913
1914 sourceSlice += source.sliceB;
1915 destinationSlice += destination.sliceB;
1916 }
1917 }
1918
John Bauman89401822014-05-06 15:04:28 -04001919 void Surface::decodeX1R5G5B5(Buffer &destination, const Buffer &source)
1920 {
1921 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1922 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1923
1924 for(int z = 0; z < destination.depth && z < source.depth; z++)
1925 {
1926 unsigned char *sourceRow = sourceSlice;
1927 unsigned char *destinationRow = destinationSlice;
1928
1929 for(int y = 0; y < destination.height && y < source.height; y++)
1930 {
1931 unsigned char *sourceElement = sourceRow;
1932 unsigned char *destinationElement = destinationRow;
1933
1934 for(int x = 0; x < destination.width && x < source.width; x++)
1935 {
1936 unsigned int xrgb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04001937
John Bauman89401822014-05-06 15:04:28 -04001938 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
1939 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
1940 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8);
1941
1942 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
1943
1944 sourceElement += source.bytes;
1945 destinationElement += destination.bytes;
1946 }
1947
1948 sourceRow += source.pitchB;
1949 destinationRow += destination.pitchB;
1950 }
1951
1952 sourceSlice += source.sliceB;
1953 destinationSlice += destination.sliceB;
1954 }
1955 }
1956
1957 void Surface::decodeA1R5G5B5(Buffer &destination, const Buffer &source)
1958 {
1959 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1960 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
1961
1962 for(int z = 0; z < destination.depth && z < source.depth; z++)
1963 {
1964 unsigned char *sourceRow = sourceSlice;
1965 unsigned char *destinationRow = destinationSlice;
1966
1967 for(int y = 0; y < destination.height && y < source.height; y++)
1968 {
1969 unsigned char *sourceElement = sourceRow;
1970 unsigned char *destinationElement = destinationRow;
1971
1972 for(int x = 0; x < destination.width && x < source.width; x++)
1973 {
1974 unsigned int argb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04001975
John Bauman89401822014-05-06 15:04:28 -04001976 unsigned int a = (argb & 0x8000) * 130560;
1977 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
1978 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
1979 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8);
1980
1981 *(unsigned int*)destinationElement = a | r | g | b;
1982
1983 sourceElement += source.bytes;
1984 destinationElement += destination.bytes;
1985 }
1986
1987 sourceRow += source.pitchB;
1988 destinationRow += destination.pitchB;
1989 }
1990
1991 sourceSlice += source.sliceB;
1992 destinationSlice += destination.sliceB;
1993 }
1994 }
1995
1996 void Surface::decodeX4R4G4B4(Buffer &destination, const Buffer &source)
1997 {
1998 unsigned char *sourceSlice = (unsigned char*)source.buffer;
1999 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
2000
2001 for(int z = 0; z < destination.depth && z < source.depth; z++)
2002 {
2003 unsigned char *sourceRow = sourceSlice;
2004 unsigned char *destinationRow = destinationSlice;
2005
2006 for(int y = 0; y < destination.height && y < source.height; y++)
2007 {
2008 unsigned char *sourceElement = sourceRow;
2009 unsigned char *destinationElement = destinationRow;
2010
2011 for(int x = 0; x < destination.width && x < source.width; x++)
2012 {
2013 unsigned int xrgb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002014
John Bauman89401822014-05-06 15:04:28 -04002015 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000;
2016 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00;
2017 unsigned int b = (xrgb & 0x000F) * 0x00000011;
2018
2019 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
2020
2021 sourceElement += source.bytes;
2022 destinationElement += destination.bytes;
2023 }
2024
2025 sourceRow += source.pitchB;
2026 destinationRow += destination.pitchB;
2027 }
2028
2029 sourceSlice += source.sliceB;
2030 destinationSlice += destination.sliceB;
2031 }
2032 }
2033
2034 void Surface::decodeA4R4G4B4(Buffer &destination, const Buffer &source)
2035 {
2036 unsigned char *sourceSlice = (unsigned char*)source.buffer;
2037 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
2038
2039 for(int z = 0; z < destination.depth && z < source.depth; z++)
2040 {
2041 unsigned char *sourceRow = sourceSlice;
2042 unsigned char *destinationRow = destinationSlice;
2043
2044 for(int y = 0; y < destination.height && y < source.height; y++)
2045 {
2046 unsigned char *sourceElement = sourceRow;
2047 unsigned char *destinationElement = destinationRow;
2048
2049 for(int x = 0; x < destination.width && x < source.width; x++)
2050 {
2051 unsigned int argb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002052
John Bauman89401822014-05-06 15:04:28 -04002053 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000;
2054 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000;
2055 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00;
2056 unsigned int b = (argb & 0x000F) * 0x00000011;
2057
2058 *(unsigned int*)destinationElement = a | r | g | b;
2059
2060 sourceElement += source.bytes;
2061 destinationElement += destination.bytes;
2062 }
2063
2064 sourceRow += source.pitchB;
2065 destinationRow += destination.pitchB;
2066 }
2067
2068 sourceSlice += source.sliceB;
2069 destinationSlice += destination.sliceB;
2070 }
2071 }
2072
2073 void Surface::decodeP8(Buffer &destination, const Buffer &source)
2074 {
2075 unsigned char *sourceSlice = (unsigned char*)source.buffer;
2076 unsigned char *destinationSlice = (unsigned char*)destination.buffer;
2077
2078 for(int z = 0; z < destination.depth && z < source.depth; z++)
2079 {
2080 unsigned char *sourceRow = sourceSlice;
2081 unsigned char *destinationRow = destinationSlice;
2082
2083 for(int y = 0; y < destination.height && y < source.height; y++)
2084 {
2085 unsigned char *sourceElement = sourceRow;
2086 unsigned char *destinationElement = destinationRow;
2087
2088 for(int x = 0; x < destination.width && x < source.width; x++)
2089 {
2090 unsigned int abgr = palette[*(unsigned char*)sourceElement];
2091
2092 unsigned int r = (abgr & 0x000000FF) << 16;
2093 unsigned int g = (abgr & 0x0000FF00) << 0;
2094 unsigned int b = (abgr & 0x00FF0000) >> 16;
2095 unsigned int a = (abgr & 0xFF000000) >> 0;
2096
2097 *(unsigned int*)destinationElement = a | r | g | b;
2098
2099 sourceElement += source.bytes;
2100 destinationElement += destination.bytes;
2101 }
2102
2103 sourceRow += source.pitchB;
2104 destinationRow += destination.pitchB;
2105 }
2106
2107 sourceSlice += source.sliceB;
2108 destinationSlice += destination.sliceB;
2109 }
2110 }
2111
2112#if S3TC_SUPPORT
2113 void Surface::decodeDXT1(Buffer &internal, const Buffer &external)
2114 {
2115 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002116 const DXT1 *source = (const DXT1*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002117
2118 for(int z = 0; z < external.depth; z++)
2119 {
2120 unsigned int *dest = destSlice;
2121
2122 for(int y = 0; y < external.height; y += 4)
2123 {
2124 for(int x = 0; x < external.width; x += 4)
2125 {
2126 Color<byte> c[4];
2127
2128 c[0] = source->c0;
2129 c[1] = source->c1;
2130
2131 if(source->c0 > source->c1) // No transparency
2132 {
2133 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2134 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2135 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2136 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2137 c[2].a = 0xFF;
2138
2139 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2140 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2141 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2142 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2143 c[3].a = 0xFF;
2144 }
2145 else // c3 transparent
2146 {
2147 // c2 = 1 / 2 * c0 + 1 / 2 * c1
2148 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2);
2149 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2);
2150 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2);
2151 c[2].a = 0xFF;
2152
2153 c[3].r = 0;
2154 c[3].g = 0;
2155 c[3].b = 0;
2156 c[3].a = 0;
2157 }
2158
2159 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2160 {
2161 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2162 {
2163 dest[(x + i) + (y + j) * internal.width] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4];
2164 }
2165 }
2166
2167 source++;
2168 }
2169 }
2170
2171 (byte*&)destSlice += internal.sliceB;
2172 }
2173 }
2174
2175 void Surface::decodeDXT3(Buffer &internal, const Buffer &external)
2176 {
2177 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002178 const DXT3 *source = (const DXT3*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002179
2180 for(int z = 0; z < external.depth; z++)
2181 {
2182 unsigned int *dest = destSlice;
2183
2184 for(int y = 0; y < external.height; y += 4)
2185 {
2186 for(int x = 0; x < external.width; x += 4)
2187 {
2188 Color<byte> c[4];
2189
2190 c[0] = source->c0;
2191 c[1] = source->c1;
2192
2193 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2194 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2195 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2196 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2197
2198 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2199 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2200 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2201 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2202
2203 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2204 {
2205 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2206 {
2207 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F;
2208 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24));
2209
2210 dest[(x + i) + (y + j) * internal.width] = color;
2211 }
2212 }
2213
2214 source++;
2215 }
2216 }
2217
2218 (byte*&)destSlice += internal.sliceB;
2219 }
2220 }
2221
2222 void Surface::decodeDXT5(Buffer &internal, const Buffer &external)
2223 {
2224 unsigned int *destSlice = (unsigned int*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002225 const DXT5 *source = (const DXT5*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002226
2227 for(int z = 0; z < external.depth; z++)
2228 {
2229 unsigned int *dest = destSlice;
2230
2231 for(int y = 0; y < external.height; y += 4)
2232 {
2233 for(int x = 0; x < external.width; x += 4)
2234 {
2235 Color<byte> c[4];
2236
2237 c[0] = source->c0;
2238 c[1] = source->c1;
2239
2240 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2241 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2242 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2243 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2244
2245 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2246 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2247 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2248 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2249
2250 byte a[8];
2251
2252 a[0] = source->a0;
2253 a[1] = source->a1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002254
John Bauman89401822014-05-06 15:04:28 -04002255 if(a[0] > a[1])
2256 {
2257 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7);
2258 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7);
2259 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7);
2260 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7);
2261 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7);
2262 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7);
2263 }
2264 else
2265 {
2266 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5);
2267 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5);
2268 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5);
2269 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5);
2270 a[6] = 0;
2271 a[7] = 0xFF;
2272 }
2273
2274 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2275 {
2276 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2277 {
2278 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24;
2279 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002280
John Bauman89401822014-05-06 15:04:28 -04002281 dest[(x + i) + (y + j) * internal.width] = color;
2282 }
2283 }
2284
2285 source++;
2286 }
2287 }
2288
2289 (byte*&)destSlice += internal.sliceB;
2290 }
2291 }
Nicolas Capens22658242014-11-29 00:31:41 -05002292#endif
John Bauman89401822014-05-06 15:04:28 -04002293
2294 void Surface::decodeATI1(Buffer &internal, const Buffer &external)
2295 {
2296 byte *destSlice = (byte*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002297 const ATI1 *source = (const ATI1*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002298
2299 for(int z = 0; z < external.depth; z++)
2300 {
2301 byte *dest = destSlice;
2302
2303 for(int y = 0; y < external.height; y += 4)
2304 {
2305 for(int x = 0; x < external.width; x += 4)
2306 {
2307 byte r[8];
2308
2309 r[0] = source->r0;
2310 r[1] = source->r1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002311
John Bauman89401822014-05-06 15:04:28 -04002312 if(r[0] > r[1])
2313 {
2314 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7);
2315 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7);
2316 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7);
2317 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7);
2318 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7);
2319 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7);
2320 }
2321 else
2322 {
2323 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5);
2324 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5);
2325 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5);
2326 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5);
2327 r[6] = 0;
2328 r[7] = 0xFF;
2329 }
2330
2331 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2332 {
2333 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2334 {
2335 dest[(x + i) + (y + j) * internal.width] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8];
2336 }
2337 }
2338
2339 source++;
2340 }
2341 }
2342
2343 destSlice += internal.sliceB;
2344 }
2345 }
2346
2347 void Surface::decodeATI2(Buffer &internal, const Buffer &external)
2348 {
2349 word *destSlice = (word*)internal.buffer;
Nicolas Capens22658242014-11-29 00:31:41 -05002350 const ATI2 *source = (const ATI2*)external.buffer;
John Bauman89401822014-05-06 15:04:28 -04002351
2352 for(int z = 0; z < external.depth; z++)
2353 {
2354 word *dest = destSlice;
2355
2356 for(int y = 0; y < external.height; y += 4)
2357 {
2358 for(int x = 0; x < external.width; x += 4)
2359 {
2360 byte X[8];
2361
2362 X[0] = source->x0;
2363 X[1] = source->x1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002364
John Bauman89401822014-05-06 15:04:28 -04002365 if(X[0] > X[1])
2366 {
2367 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7);
2368 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7);
2369 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7);
2370 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7);
2371 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7);
2372 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7);
2373 }
2374 else
2375 {
2376 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5);
2377 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5);
2378 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5);
2379 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5);
2380 X[6] = 0;
2381 X[7] = 0xFF;
2382 }
2383
2384 byte Y[8];
2385
2386 Y[0] = source->y0;
2387 Y[1] = source->y1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002388
John Bauman89401822014-05-06 15:04:28 -04002389 if(Y[0] > Y[1])
2390 {
2391 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7);
2392 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7);
2393 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7);
2394 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7);
2395 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7);
2396 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7);
2397 }
2398 else
2399 {
2400 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5);
2401 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5);
2402 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5);
2403 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5);
2404 Y[6] = 0;
2405 Y[7] = 0xFF;
2406 }
2407
2408 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2409 {
2410 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2411 {
2412 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8];
2413 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8];
2414
2415 dest[(x + i) + (y + j) * internal.width] = (g << 8) + r;
2416 }
2417 }
2418
2419 source++;
2420 }
2421 }
2422
2423 (byte*&)destSlice += internal.sliceB;
2424 }
2425 }
Nicolas Capens22658242014-11-29 00:31:41 -05002426
Alexis Hetu0de50d42015-09-09 13:56:41 -04002427 void Surface::decodeETC2(Buffer &internal, const Buffer &external, int nbAlphaBits, bool isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002428 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002429 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
2430 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB));
Nicolas Capens22658242014-11-29 00:31:41 -05002431
Alexis Hetu0de50d42015-09-09 13:56:41 -04002432 if(isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002433 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002434 static byte sRGBtoLinearTable[256];
2435 static bool sRGBtoLinearTableDirty = true;
2436 if(sRGBtoLinearTableDirty)
Nicolas Capens22658242014-11-29 00:31:41 -05002437 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002438 for(int i = 0; i < 256; i++)
Nicolas Capens22658242014-11-29 00:31:41 -05002439 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002440 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f);
Nicolas Capens22658242014-11-29 00:31:41 -05002441 }
Alexis Hetu0de50d42015-09-09 13:56:41 -04002442 sRGBtoLinearTableDirty = false;
Nicolas Capens22658242014-11-29 00:31:41 -05002443 }
2444
Alexis Hetu0de50d42015-09-09 13:56:41 -04002445 // Perform sRGB conversion in place after decoding
2446 byte* src = (byte*)internal.buffer;
2447 for(int y = 0; y < internal.height; y++)
2448 {
2449 byte* srcRow = src + y * internal.pitchB;
2450 for(int x = 0; x < internal.width; x++)
2451 {
2452 byte* srcPix = srcRow + x * internal.bytes;
2453 for(int i = 0; i < 3; i++)
2454 {
2455 srcPix[i] = sRGBtoLinearTable[srcPix[i]];
2456 }
2457 }
2458 }
Nicolas Capens22658242014-11-29 00:31:41 -05002459 }
2460 }
John Bauman89401822014-05-06 15:04:28 -04002461
Alexis Hetu460e41f2015-09-01 10:58:37 -04002462 void Surface::decodeEAC(Buffer &internal, const Buffer &external, int nbChannels, bool isSigned)
2463 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002464 ASSERT(nbChannels == 1 || nbChannels == 2);
Alexis Hetu460e41f2015-09-01 10:58:37 -04002465
Alexis Hetu0de50d42015-09-09 13:56:41 -04002466 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
2467 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED));
2468
2469 // FIXME: We convert signed data to float, until signed integer internal formats are supported
2470 // This code can be removed if signed ETC2 images are decoded to internal 8 bit signed R/RG formats
2471 if(isSigned)
2472 {
2473 sbyte* src = (sbyte*)internal.buffer;
2474
2475 for(int y = 0; y < internal.height; y++)
2476 {
2477 sbyte* srcRow = src + y * internal.pitchB;
2478 for(int x = internal.width - 1; x >= 0; x--)
2479 {
2480 int dx = x & 0xFFFFFFFC;
2481 int mx = x - dx;
2482 sbyte* srcPix = srcRow + dx * internal.bytes + mx * nbChannels;
2483 float* dstPix = (float*)(srcRow + x * internal.bytes);
2484 for(int c = nbChannels - 1; c >= 0; c--)
2485 {
2486 static const float normalization = 1.0f / 127.875f;
2487 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f);
2488 }
2489 }
2490 }
2491 }
Alexis Hetu460e41f2015-09-01 10:58:37 -04002492 }
2493
2494 void Surface::decodeASTC(Buffer &internal, const Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB)
2495 {
2496 }
2497
John Bauman89401822014-05-06 15:04:28 -04002498 unsigned int Surface::size(int width, int height, int depth, Format format)
2499 {
Nicolas Capens00555c42015-07-21 15:15:30 -04002500 // Dimensions rounded up to multiples of 4, used for compressed formats
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002501 int width4 = align(width, 4);
2502 int height4 = align(height, 4);
John Bauman89401822014-05-06 15:04:28 -04002503
2504 switch(format)
2505 {
2506 #if S3TC_SUPPORT
2507 case FORMAT_DXT1:
John Bauman66b8ab22014-05-06 15:57:45 -04002508 #endif
John Bauman89401822014-05-06 15:04:28 -04002509 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05002510 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002511 case FORMAT_R11_EAC:
2512 case FORMAT_SIGNED_R11_EAC:
2513 case FORMAT_RGB8_ETC2:
2514 case FORMAT_SRGB8_ETC2:
2515 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2516 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04002517 return width4 * height4 * depth / 2;
John Bauman66b8ab22014-05-06 15:57:45 -04002518 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04002519 case FORMAT_DXT3:
2520 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002521 #endif
John Bauman89401822014-05-06 15:04:28 -04002522 case FORMAT_ATI2:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002523 case FORMAT_RG11_EAC:
2524 case FORMAT_SIGNED_RG11_EAC:
2525 case FORMAT_RGBA8_ETC2_EAC:
2526 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
2527 case FORMAT_RGBA_ASTC_4x4_KHR:
2528 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
John Bauman89401822014-05-06 15:04:28 -04002529 return width4 * height4 * depth;
Alexis Hetu460e41f2015-09-01 10:58:37 -04002530 case FORMAT_RGBA_ASTC_5x4_KHR:
2531 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
2532 return align(width, 5) * height4 * depth;
2533 case FORMAT_RGBA_ASTC_5x5_KHR:
2534 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
2535 return align(width, 5) * align(height, 5) * depth;
2536 case FORMAT_RGBA_ASTC_6x5_KHR:
2537 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
2538 return align(width, 6) * align(height, 5) * depth;
2539 case FORMAT_RGBA_ASTC_6x6_KHR:
2540 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
2541 return align(width, 6) * align(height, 6) * depth;
2542 case FORMAT_RGBA_ASTC_8x5_KHR:
2543 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
2544 return align(width, 8) * align(height, 5) * depth;
2545 case FORMAT_RGBA_ASTC_8x6_KHR:
2546 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
2547 return align(width, 8) * align(height, 6) * depth;
2548 case FORMAT_RGBA_ASTC_8x8_KHR:
2549 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
2550 return align(width, 8) * align(height, 8) * depth;
2551 case FORMAT_RGBA_ASTC_10x5_KHR:
2552 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
2553 return align(width, 10) * align(height, 5) * depth;
2554 case FORMAT_RGBA_ASTC_10x6_KHR:
2555 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
2556 return align(width, 10) * align(height, 6) * depth;
2557 case FORMAT_RGBA_ASTC_10x8_KHR:
2558 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
2559 return align(width, 10) * align(height, 8) * depth;
2560 case FORMAT_RGBA_ASTC_10x10_KHR:
2561 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
2562 return align(width, 10) * align(height, 10) * depth;
2563 case FORMAT_RGBA_ASTC_12x10_KHR:
2564 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
2565 return align(width, 12) * align(height, 10) * depth;
2566 case FORMAT_RGBA_ASTC_12x12_KHR:
2567 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
2568 return align(width, 12) * align(height, 12) * depth;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002569 case FORMAT_YV12_BT601:
2570 case FORMAT_YV12_BT709:
2571 case FORMAT_YV12_JFIF:
2572 {
2573 unsigned int YStride = align(width, 16);
2574 unsigned int YSize = YStride * height;
2575 unsigned int CStride = align(YStride / 2, 16);
Nicolas Capens0bac2852016-05-07 06:09:58 -04002576 unsigned int CSize = CStride * height / 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002577
2578 return YSize + 2 * CSize;
2579 }
John Bauman89401822014-05-06 15:04:28 -04002580 default:
2581 return bytes(format) * width * height * depth;
2582 }
John Bauman89401822014-05-06 15:04:28 -04002583 }
2584
2585 bool Surface::isStencil(Format format)
2586 {
2587 switch(format)
2588 {
2589 case FORMAT_D32:
2590 case FORMAT_D16:
2591 case FORMAT_D24X8:
2592 case FORMAT_D32F:
2593 case FORMAT_D32F_COMPLEMENTARY:
2594 case FORMAT_D32F_LOCKABLE:
2595 return false;
2596 case FORMAT_D24S8:
2597 case FORMAT_D24FS8:
2598 case FORMAT_S8:
John Bauman66b8ab22014-05-06 15:57:45 -04002599 case FORMAT_DF24S8:
2600 case FORMAT_DF16S8:
2601 case FORMAT_D32FS8_TEXTURE:
2602 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002603 case FORMAT_INTZ:
2604 return true;
2605 default:
2606 return false;
2607 }
2608 }
2609
2610 bool Surface::isDepth(Format format)
2611 {
2612 switch(format)
2613 {
2614 case FORMAT_D32:
2615 case FORMAT_D16:
2616 case FORMAT_D24X8:
2617 case FORMAT_D24S8:
2618 case FORMAT_D24FS8:
2619 case FORMAT_D32F:
2620 case FORMAT_D32F_COMPLEMENTARY:
2621 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002622 case FORMAT_DF24S8:
2623 case FORMAT_DF16S8:
2624 case FORMAT_D32FS8_TEXTURE:
2625 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002626 case FORMAT_INTZ:
2627 return true;
2628 case FORMAT_S8:
2629 return false;
2630 default:
2631 return false;
2632 }
2633 }
2634
Alexis Hetub9dda642016-10-06 11:25:32 -04002635 bool Surface::hasQuadLayout(Format format)
2636 {
2637 switch(format)
2638 {
2639 case FORMAT_D32:
2640 case FORMAT_D16:
2641 case FORMAT_D24X8:
2642 case FORMAT_D24S8:
2643 case FORMAT_D24FS8:
2644 case FORMAT_D32F:
2645 case FORMAT_D32F_COMPLEMENTARY:
2646 case FORMAT_DF24S8:
2647 case FORMAT_DF16S8:
2648 case FORMAT_INTZ:
2649 case FORMAT_S8:
2650 case FORMAT_A8G8R8B8Q:
2651 case FORMAT_X8G8R8B8Q:
2652 return true;
2653 case FORMAT_D32F_LOCKABLE:
2654 case FORMAT_D32FS8_TEXTURE:
2655 case FORMAT_D32FS8_SHADOW:
2656 default:
2657 break;
2658 }
2659
2660 return false;
2661 }
2662
John Bauman89401822014-05-06 15:04:28 -04002663 bool Surface::isPalette(Format format)
2664 {
2665 switch(format)
2666 {
2667 case FORMAT_P8:
2668 case FORMAT_A8P8:
2669 return true;
2670 default:
2671 return false;
2672 }
2673 }
2674
2675 bool Surface::isFloatFormat(Format format)
2676 {
2677 switch(format)
2678 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002679 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002680 case FORMAT_R8G8B8:
2681 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002682 case FORMAT_X8R8G8B8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002683 case FORMAT_X8B8G8R8I:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002684 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002685 case FORMAT_A8R8G8B8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002686 case FORMAT_SRGB8_X8:
2687 case FORMAT_SRGB8_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002688 case FORMAT_A8B8G8R8I:
2689 case FORMAT_R8UI:
2690 case FORMAT_G8R8UI:
2691 case FORMAT_X8B8G8R8UI:
2692 case FORMAT_A8B8G8R8UI:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002693 case FORMAT_A8B8G8R8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002694 case FORMAT_G8R8I:
John Bauman89401822014-05-06 15:04:28 -04002695 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002696 case FORMAT_A2B10G10R10:
Alexis Hetu43577b82015-10-21 15:32:16 -04002697 case FORMAT_R8I_SNORM:
2698 case FORMAT_G8R8I_SNORM:
2699 case FORMAT_X8B8G8R8I_SNORM:
2700 case FORMAT_A8B8G8R8I_SNORM:
2701 case FORMAT_R16I:
2702 case FORMAT_R16UI:
2703 case FORMAT_G16R16I:
2704 case FORMAT_G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002705 case FORMAT_G16R16:
Alexis Hetu43577b82015-10-21 15:32:16 -04002706 case FORMAT_X16B16G16R16I:
2707 case FORMAT_X16B16G16R16UI:
2708 case FORMAT_A16B16G16R16I:
2709 case FORMAT_A16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002710 case FORMAT_A16B16G16R16:
2711 case FORMAT_V8U8:
2712 case FORMAT_Q8W8V8U8:
2713 case FORMAT_X8L8V8U8:
2714 case FORMAT_V16U16:
2715 case FORMAT_A16W16V16U16:
2716 case FORMAT_Q16W16V16U16:
2717 case FORMAT_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002718 case FORMAT_R8I:
John Bauman89401822014-05-06 15:04:28 -04002719 case FORMAT_R8:
Alexis Hetub9dda642016-10-06 11:25:32 -04002720 case FORMAT_S8:
John Bauman89401822014-05-06 15:04:28 -04002721 case FORMAT_L8:
2722 case FORMAT_L16:
2723 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002724 case FORMAT_YV12_BT601:
2725 case FORMAT_YV12_BT709:
2726 case FORMAT_YV12_JFIF:
Alexis Hetu43577b82015-10-21 15:32:16 -04002727 case FORMAT_R32I:
2728 case FORMAT_R32UI:
2729 case FORMAT_G32R32I:
2730 case FORMAT_G32R32UI:
2731 case FORMAT_X32B32G32R32I:
2732 case FORMAT_X32B32G32R32UI:
2733 case FORMAT_A32B32G32R32I:
2734 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002735 return false;
Nicolas Capens400667e2017-03-29 14:40:14 -04002736 case FORMAT_R16F:
2737 case FORMAT_G16R16F:
2738 case FORMAT_B16G16R16F:
2739 case FORMAT_A16B16G16R16F:
John Bauman89401822014-05-06 15:04:28 -04002740 case FORMAT_R32F:
2741 case FORMAT_G32R32F:
Nicolas Capensc018e082016-12-13 10:19:33 -05002742 case FORMAT_B32G32R32F:
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002743 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002744 case FORMAT_A32B32G32R32F:
2745 case FORMAT_D32F:
2746 case FORMAT_D32F_COMPLEMENTARY:
2747 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002748 case FORMAT_D32FS8_TEXTURE:
2749 case FORMAT_D32FS8_SHADOW:
Nicolas Capens80594422015-06-09 16:42:56 -04002750 case FORMAT_L16F:
2751 case FORMAT_A16L16F:
2752 case FORMAT_L32F:
2753 case FORMAT_A32L32F:
John Bauman89401822014-05-06 15:04:28 -04002754 return true;
2755 default:
2756 ASSERT(false);
2757 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04002758
John Bauman89401822014-05-06 15:04:28 -04002759 return false;
2760 }
2761
2762 bool Surface::isUnsignedComponent(Format format, int component)
2763 {
2764 switch(format)
2765 {
2766 case FORMAT_NULL:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002767 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002768 case FORMAT_R8G8B8:
2769 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002770 case FORMAT_X8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002771 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002772 case FORMAT_A8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002773 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002774 case FORMAT_SRGB8_X8:
2775 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002776 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002777 case FORMAT_A2B10G10R10:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002778 case FORMAT_R16UI:
John Bauman89401822014-05-06 15:04:28 -04002779 case FORMAT_G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002780 case FORMAT_G16R16UI:
2781 case FORMAT_X16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002782 case FORMAT_A16B16G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002783 case FORMAT_A16B16G16R16UI:
2784 case FORMAT_R32UI:
2785 case FORMAT_G32R32UI:
2786 case FORMAT_X32B32G32R32UI:
2787 case FORMAT_A32B32G32R32UI:
2788 case FORMAT_R8UI:
2789 case FORMAT_G8R8UI:
2790 case FORMAT_X8B8G8R8UI:
2791 case FORMAT_A8B8G8R8UI:
John Bauman89401822014-05-06 15:04:28 -04002792 case FORMAT_D32F:
2793 case FORMAT_D32F_COMPLEMENTARY:
2794 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002795 case FORMAT_D32FS8_TEXTURE:
2796 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002797 case FORMAT_A8:
2798 case FORMAT_R8:
2799 case FORMAT_L8:
2800 case FORMAT_L16:
2801 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002802 case FORMAT_YV12_BT601:
2803 case FORMAT_YV12_BT709:
2804 case FORMAT_YV12_JFIF:
John Bauman89401822014-05-06 15:04:28 -04002805 return true;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002806 case FORMAT_A8B8G8R8I:
2807 case FORMAT_A16B16G16R16I:
2808 case FORMAT_A32B32G32R32I:
2809 case FORMAT_A8B8G8R8I_SNORM:
2810 case FORMAT_Q8W8V8U8:
2811 case FORMAT_Q16W16V16U16:
2812 case FORMAT_A32B32G32R32F:
2813 return false;
2814 case FORMAT_R32F:
2815 case FORMAT_R8I:
2816 case FORMAT_R16I:
2817 case FORMAT_R32I:
2818 case FORMAT_R8I_SNORM:
2819 return component >= 1;
John Bauman89401822014-05-06 15:04:28 -04002820 case FORMAT_V8U8:
2821 case FORMAT_X8L8V8U8:
2822 case FORMAT_V16U16:
John Bauman89401822014-05-06 15:04:28 -04002823 case FORMAT_G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002824 case FORMAT_G8R8I:
2825 case FORMAT_G16R16I:
2826 case FORMAT_G32R32I:
2827 case FORMAT_G8R8I_SNORM:
2828 return component >= 2;
2829 case FORMAT_A16W16V16U16:
Nicolas Capens2e363b02016-12-14 10:32:36 -05002830 case FORMAT_B32G32R32F:
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002831 case FORMAT_X32B32G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002832 case FORMAT_X8B8G8R8I:
2833 case FORMAT_X16B16G16R16I:
2834 case FORMAT_X32B32G32R32I:
2835 case FORMAT_X8B8G8R8I_SNORM:
2836 return component >= 3;
John Bauman89401822014-05-06 15:04:28 -04002837 default:
2838 ASSERT(false);
2839 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04002840
John Bauman89401822014-05-06 15:04:28 -04002841 return false;
2842 }
2843
2844 bool Surface::isSRGBreadable(Format format)
2845 {
2846 // Keep in sync with Capabilities::isSRGBreadable
2847 switch(format)
2848 {
2849 case FORMAT_L8:
2850 case FORMAT_A8L8:
2851 case FORMAT_R8G8B8:
2852 case FORMAT_A8R8G8B8:
2853 case FORMAT_X8R8G8B8:
2854 case FORMAT_A8B8G8R8:
2855 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002856 case FORMAT_SRGB8_X8:
2857 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002858 case FORMAT_R5G6B5:
2859 case FORMAT_X1R5G5B5:
2860 case FORMAT_A1R5G5B5:
2861 case FORMAT_A4R4G4B4:
2862 #if S3TC_SUPPORT
2863 case FORMAT_DXT1:
2864 case FORMAT_DXT3:
2865 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002866 #endif
John Bauman89401822014-05-06 15:04:28 -04002867 case FORMAT_ATI1:
2868 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04002869 return true;
2870 default:
2871 return false;
2872 }
John Bauman89401822014-05-06 15:04:28 -04002873 }
2874
2875 bool Surface::isSRGBwritable(Format format)
2876 {
2877 // Keep in sync with Capabilities::isSRGBwritable
2878 switch(format)
2879 {
2880 case FORMAT_NULL:
2881 case FORMAT_A8R8G8B8:
2882 case FORMAT_X8R8G8B8:
2883 case FORMAT_A8B8G8R8:
2884 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002885 case FORMAT_SRGB8_X8:
2886 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002887 case FORMAT_R5G6B5:
2888 return true;
2889 default:
2890 return false;
2891 }
2892 }
2893
2894 bool Surface::isCompressed(Format format)
2895 {
2896 switch(format)
2897 {
2898 #if S3TC_SUPPORT
2899 case FORMAT_DXT1:
2900 case FORMAT_DXT3:
2901 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002902 #endif
John Bauman89401822014-05-06 15:04:28 -04002903 case FORMAT_ATI1:
2904 case FORMAT_ATI2:
Nicolas Capens22658242014-11-29 00:31:41 -05002905 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002906 case FORMAT_R11_EAC:
2907 case FORMAT_SIGNED_R11_EAC:
2908 case FORMAT_RG11_EAC:
2909 case FORMAT_SIGNED_RG11_EAC:
2910 case FORMAT_RGB8_ETC2:
2911 case FORMAT_SRGB8_ETC2:
2912 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2913 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2914 case FORMAT_RGBA8_ETC2_EAC:
2915 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
2916 case FORMAT_RGBA_ASTC_4x4_KHR:
2917 case FORMAT_RGBA_ASTC_5x4_KHR:
2918 case FORMAT_RGBA_ASTC_5x5_KHR:
2919 case FORMAT_RGBA_ASTC_6x5_KHR:
2920 case FORMAT_RGBA_ASTC_6x6_KHR:
2921 case FORMAT_RGBA_ASTC_8x5_KHR:
2922 case FORMAT_RGBA_ASTC_8x6_KHR:
2923 case FORMAT_RGBA_ASTC_8x8_KHR:
2924 case FORMAT_RGBA_ASTC_10x5_KHR:
2925 case FORMAT_RGBA_ASTC_10x6_KHR:
2926 case FORMAT_RGBA_ASTC_10x8_KHR:
2927 case FORMAT_RGBA_ASTC_10x10_KHR:
2928 case FORMAT_RGBA_ASTC_12x10_KHR:
2929 case FORMAT_RGBA_ASTC_12x12_KHR:
2930 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
2931 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
2932 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
2933 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
2934 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
2935 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
2936 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
2937 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
2938 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
2939 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
2940 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
2941 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
2942 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
2943 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
John Bauman89401822014-05-06 15:04:28 -04002944 return true;
John Bauman89401822014-05-06 15:04:28 -04002945 default:
2946 return false;
2947 }
2948 }
2949
Nicolas Capens492887a2017-03-27 14:50:51 -04002950 bool Surface::isSignedNonNormalizedInteger(Format format)
Alexis Hetu43577b82015-10-21 15:32:16 -04002951 {
2952 switch(format)
2953 {
2954 case FORMAT_A8B8G8R8I:
2955 case FORMAT_X8B8G8R8I:
2956 case FORMAT_G8R8I:
2957 case FORMAT_R8I:
2958 case FORMAT_A8B8G8R8UI:
2959 case FORMAT_X8B8G8R8UI:
2960 case FORMAT_G8R8UI:
2961 case FORMAT_R8UI:
2962 case FORMAT_A16B16G16R16I:
2963 case FORMAT_X16B16G16R16I:
2964 case FORMAT_G16R16I:
2965 case FORMAT_R16I:
Nicolas Capens492887a2017-03-27 14:50:51 -04002966 return true;
2967 default:
2968 return false;
2969 }
2970 }
2971
2972 bool Surface::isUnsignedNonNormalizedInteger(Format format)
2973 {
2974 switch(format)
2975 {
Alexis Hetu43577b82015-10-21 15:32:16 -04002976 case FORMAT_A16B16G16R16UI:
2977 case FORMAT_X16B16G16R16UI:
2978 case FORMAT_G16R16UI:
2979 case FORMAT_R16UI:
2980 case FORMAT_A32B32G32R32I:
2981 case FORMAT_X32B32G32R32I:
2982 case FORMAT_G32R32I:
2983 case FORMAT_R32I:
2984 case FORMAT_A32B32G32R32UI:
2985 case FORMAT_X32B32G32R32UI:
2986 case FORMAT_G32R32UI:
2987 case FORMAT_R32UI:
2988 return true;
2989 default:
2990 return false;
2991 }
2992 }
2993
Nicolas Capens492887a2017-03-27 14:50:51 -04002994 bool Surface::isNonNormalizedInteger(Format format)
2995 {
2996 return isSignedNonNormalizedInteger(format) ||
2997 isUnsignedNonNormalizedInteger(format);
2998 }
2999
3000 bool Surface::isNormalizedInteger(Format format)
3001 {
3002 return !isFloatFormat(format) &&
3003 !isNonNormalizedInteger(format) &&
3004 !isCompressed(format) &&
3005 !isDepth(format) &&
3006 !isStencil(format);
3007 }
3008
John Bauman89401822014-05-06 15:04:28 -04003009 int Surface::componentCount(Format format)
3010 {
3011 switch(format)
3012 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003013 case FORMAT_R5G6B5: return 3;
3014 case FORMAT_X8R8G8B8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003015 case FORMAT_X8B8G8R8I: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003016 case FORMAT_X8B8G8R8: return 3;
3017 case FORMAT_A8R8G8B8: return 4;
Alexis Hetu049a1872016-04-25 16:59:58 -04003018 case FORMAT_SRGB8_X8: return 3;
3019 case FORMAT_SRGB8_A8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003020 case FORMAT_A8B8G8R8I: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003021 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003022 case FORMAT_G8R8I: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003023 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003024 case FORMAT_R8I_SNORM: return 1;
3025 case FORMAT_G8R8I_SNORM: return 2;
3026 case FORMAT_X8B8G8R8I_SNORM:return 3;
3027 case FORMAT_A8B8G8R8I_SNORM:return 4;
3028 case FORMAT_R8UI: return 1;
3029 case FORMAT_G8R8UI: return 2;
3030 case FORMAT_X8B8G8R8UI: return 3;
3031 case FORMAT_A8B8G8R8UI: return 4;
Alexis Hetuf999a002015-12-17 11:09:36 -05003032 case FORMAT_A2B10G10R10: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003033 case FORMAT_G16R16I: return 2;
3034 case FORMAT_G16R16UI: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003035 case FORMAT_G16R16: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003036 case FORMAT_G32R32I: return 2;
3037 case FORMAT_G32R32UI: return 2;
3038 case FORMAT_X16B16G16R16I: return 3;
3039 case FORMAT_X16B16G16R16UI: return 3;
3040 case FORMAT_A16B16G16R16I: return 4;
3041 case FORMAT_A16B16G16R16UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003042 case FORMAT_A16B16G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003043 case FORMAT_X32B32G32R32I: return 3;
3044 case FORMAT_X32B32G32R32UI: return 3;
3045 case FORMAT_A32B32G32R32I: return 4;
3046 case FORMAT_A32B32G32R32UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003047 case FORMAT_V8U8: return 2;
3048 case FORMAT_Q8W8V8U8: return 4;
3049 case FORMAT_X8L8V8U8: return 3;
3050 case FORMAT_V16U16: return 2;
3051 case FORMAT_A16W16V16U16: return 4;
3052 case FORMAT_Q16W16V16U16: return 4;
3053 case FORMAT_R32F: return 1;
3054 case FORMAT_G32R32F: return 2;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003055 case FORMAT_X32B32G32R32F: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003056 case FORMAT_A32B32G32R32F: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003057 case FORMAT_D32F: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003058 case FORMAT_D32F_LOCKABLE: return 1;
3059 case FORMAT_D32FS8_TEXTURE: return 1;
3060 case FORMAT_D32FS8_SHADOW: return 1;
3061 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003062 case FORMAT_R8I: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003063 case FORMAT_R8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003064 case FORMAT_R16I: return 1;
3065 case FORMAT_R16UI: return 1;
3066 case FORMAT_R32I: return 1;
3067 case FORMAT_R32UI: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003068 case FORMAT_L8: return 1;
3069 case FORMAT_L16: return 1;
3070 case FORMAT_A8L8: return 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003071 case FORMAT_YV12_BT601: return 3;
3072 case FORMAT_YV12_BT709: return 3;
3073 case FORMAT_YV12_JFIF: return 3;
John Bauman89401822014-05-06 15:04:28 -04003074 default:
3075 ASSERT(false);
3076 }
3077
3078 return 1;
3079 }
3080
3081 void *Surface::allocateBuffer(int width, int height, int depth, Format format)
3082 {
Nicolas Capensdb17b5d2015-06-26 11:15:58 -04003083 // Render targets require 2x2 quads
3084 int width2 = (width + 1) & ~1;
3085 int height2 = (height + 1) & ~1;
John Bauman89401822014-05-06 15:04:28 -04003086
Nicolas Capens6ea71872015-06-26 13:00:48 -04003087 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
Nicolas Capens48ef1252016-11-07 15:30:33 -05003088 // and stencil operations also read 8 bytes per four 8-bit stencil values,
Nicolas Capens6ea71872015-06-26 13:00:48 -04003089 // so we have to allocate 4 extra bytes to avoid buffer overruns.
3090 return allocateZero(size(width2, height2, depth, format) + 4);
John Bauman89401822014-05-06 15:04:28 -04003091 }
3092
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003093 void Surface::memfill4(void *buffer, int pattern, int bytes)
John Bauman89401822014-05-06 15:04:28 -04003094 {
3095 while((size_t)buffer & 0x1 && bytes >= 1)
3096 {
3097 *(char*)buffer = (char)pattern;
3098 (char*&)buffer += 1;
3099 bytes -= 1;
3100 }
3101
3102 while((size_t)buffer & 0x3 && bytes >= 2)
3103 {
3104 *(short*)buffer = (short)pattern;
3105 (short*&)buffer += 1;
3106 bytes -= 2;
3107 }
3108
Nicolas Capens47dc8672017-04-25 12:54:39 -04003109 #if defined(__i386__) || defined(__x86_64__)
3110 if(CPUID::supportsSSE())
John Bauman89401822014-05-06 15:04:28 -04003111 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003112 while((size_t)buffer & 0xF && bytes >= 4)
3113 {
3114 *(int*)buffer = pattern;
3115 (int*&)buffer += 1;
3116 bytes -= 4;
3117 }
3118
3119 __m128 quad = _mm_set_ps1((float&)pattern);
3120
3121 float *pointer = (float*)buffer;
3122 int qxwords = bytes / 64;
3123 bytes -= qxwords * 64;
3124
3125 while(qxwords--)
3126 {
3127 _mm_stream_ps(pointer + 0, quad);
3128 _mm_stream_ps(pointer + 4, quad);
3129 _mm_stream_ps(pointer + 8, quad);
3130 _mm_stream_ps(pointer + 12, quad);
3131
3132 pointer += 16;
3133 }
3134
3135 buffer = pointer;
John Bauman89401822014-05-06 15:04:28 -04003136 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003137 #endif
John Bauman89401822014-05-06 15:04:28 -04003138
3139 while(bytes >= 4)
3140 {
3141 *(int*)buffer = (int)pattern;
3142 (int*&)buffer += 1;
3143 bytes -= 4;
3144 }
3145
3146 while(bytes >= 2)
3147 {
3148 *(short*)buffer = (short)pattern;
3149 (short*&)buffer += 1;
3150 bytes -= 2;
3151 }
3152
3153 while(bytes >= 1)
3154 {
3155 *(char*)buffer = (char)pattern;
3156 (char*&)buffer += 1;
3157 bytes -= 1;
3158 }
3159 }
3160
Alexis Hetu75b650f2015-11-19 17:40:15 -05003161 bool Surface::isEntire(const SliceRect& rect) const
John Bauman89401822014-05-06 15:04:28 -04003162 {
Alexis Hetu75b650f2015-11-19 17:40:15 -05003163 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1);
3164 }
John Bauman89401822014-05-06 15:04:28 -04003165
Nicolas Capensc39901e2016-03-21 16:37:44 -04003166 SliceRect Surface::getRect() const
Alexis Hetu75b650f2015-11-19 17:40:15 -05003167 {
Nicolas Capensc39901e2016-03-21 16:37:44 -04003168 return SliceRect(0, 0, internal.width, internal.height, 0);
John Bauman89401822014-05-06 15:04:28 -04003169 }
3170
Nicolas Capensc39901e2016-03-21 16:37:44 -04003171 void Surface::clearDepth(float depth, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04003172 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003173 if(width == 0 || height == 0) return;
3174
John Bauman89401822014-05-06 15:04:28 -04003175 // Not overlapping
3176 if(x0 > internal.width) return;
3177 if(y0 > internal.height) return;
3178 if(x0 + width < 0) return;
3179 if(y0 + height < 0) return;
3180
3181 // Clip against dimensions
3182 if(x0 < 0) {width += x0; x0 = 0;}
3183 if(x0 + width > internal.width) width = internal.width - x0;
3184 if(y0 < 0) {height += y0; y0 = 0;}
3185 if(y0 + height > internal.height) height = internal.height - y0;
3186
3187 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height;
3188 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY;
3189
3190 int width2 = (internal.width + 1) & ~1;
3191
3192 int x1 = x0 + width;
3193 int y1 = y0 + height;
3194
3195 if(internal.format == FORMAT_D32F_LOCKABLE ||
John Bauman66b8ab22014-05-06 15:57:45 -04003196 internal.format == FORMAT_D32FS8_TEXTURE ||
3197 internal.format == FORMAT_D32FS8_SHADOW)
John Bauman89401822014-05-06 15:04:28 -04003198 {
3199 float *target = (float*)lockInternal(0, 0, 0, lock, PUBLIC) + x0 + width2 * y0;
3200
3201 for(int z = 0; z < internal.depth; z++)
3202 {
3203 for(int y = y0; y < y1; y++)
3204 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003205 memfill4(target, (int&)depth, 4 * width);
John Bauman89401822014-05-06 15:04:28 -04003206 target += width2;
3207 }
3208 }
3209
3210 unlockInternal();
3211 }
3212 else // Quad layout
3213 {
3214 if(complementaryDepthBuffer)
3215 {
3216 depth = 1 - depth;
3217 }
3218
3219 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC);
3220
Alexis Hetu358a1442015-12-03 14:23:10 -05003221 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3222 int oddX1 = (x1 & ~1) * 2;
3223 int evenX0 = ((x0 + 1) & ~1) * 2;
3224 int evenBytes = (oddX1 - evenX0) * sizeof(float);
3225
John Bauman89401822014-05-06 15:04:28 -04003226 for(int z = 0; z < internal.depth; z++)
3227 {
3228 for(int y = y0; y < y1; y++)
3229 {
3230 float *target = buffer + (y & ~1) * width2 + (y & 1) * 2;
Nicolas Capensc39901e2016-03-21 16:37:44 -04003231
John Bauman89401822014-05-06 15:04:28 -04003232 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once
3233 {
3234 if((x0 & 1) != 0)
3235 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003236 target[oddX0 + 0] = depth;
3237 target[oddX0 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003238 }
3239
Alexis Hetu358a1442015-12-03 14:23:10 -05003240 // for(int x2 = evenX0; x2 < x1 * 2; x2 += 4)
John Bauman89401822014-05-06 15:04:28 -04003241 // {
3242 // target[x2 + 0] = depth;
3243 // target[x2 + 1] = depth;
3244 // target[x2 + 2] = depth;
3245 // target[x2 + 3] = depth;
3246 // }
3247
3248 // __asm
3249 // {
3250 // movss xmm0, depth
3251 // shufps xmm0, xmm0, 0x00
3252 //
3253 // mov eax, x0
3254 // add eax, 1
3255 // and eax, 0xFFFFFFFE
3256 // cmp eax, x1
3257 // jge qEnd
3258 //
3259 // mov edi, target
3260 //
3261 // qLoop:
3262 // movntps [edi+8*eax], xmm0
3263 //
3264 // add eax, 2
3265 // cmp eax, x1
3266 // jl qLoop
3267 // qEnd:
3268 // }
3269
Alexis Hetu358a1442015-12-03 14:23:10 -05003270 memfill4(&target[evenX0], (int&)depth, evenBytes);
John Bauman89401822014-05-06 15:04:28 -04003271
3272 if((x1 & 1) != 0)
3273 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003274 target[oddX1 + 0] = depth;
3275 target[oddX1 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003276 }
3277
3278 y++;
3279 }
3280 else
3281 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003282 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1))
John Bauman89401822014-05-06 15:04:28 -04003283 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003284 target[i] = depth;
John Bauman89401822014-05-06 15:04:28 -04003285 }
3286 }
3287 }
3288
3289 buffer += internal.sliceP;
3290 }
3291
3292 unlockInternal();
3293 }
3294 }
3295
Nicolas Capensc39901e2016-03-21 16:37:44 -04003296 void Surface::clearStencil(unsigned char s, unsigned char mask, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04003297 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003298 if(mask == 0 || width == 0 || height == 0) return;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003299
John Bauman89401822014-05-06 15:04:28 -04003300 // Not overlapping
3301 if(x0 > internal.width) return;
3302 if(y0 > internal.height) return;
3303 if(x0 + width < 0) return;
3304 if(y0 + height < 0) return;
3305
3306 // Clip against dimensions
3307 if(x0 < 0) {width += x0; x0 = 0;}
3308 if(x0 + width > internal.width) width = internal.width - x0;
3309 if(y0 < 0) {height += y0; y0 = 0;}
3310 if(y0 + height > internal.height) height = internal.height - y0;
3311
3312 int width2 = (internal.width + 1) & ~1;
3313
3314 int x1 = x0 + width;
3315 int y1 = y0 + height;
3316
Alexis Hetu358a1442015-12-03 14:23:10 -05003317 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3318 int oddX1 = (x1 & ~1) * 2;
3319 int evenX0 = ((x0 + 1) & ~1) * 2;
3320 int evenBytes = oddX1 - evenX0;
3321
John Bauman89401822014-05-06 15:04:28 -04003322 unsigned char maskedS = s & mask;
3323 unsigned char invMask = ~mask;
3324 unsigned int fill = maskedS;
Tom Anderson69bc6e82017-03-20 11:54:29 -07003325 fill = fill | (fill << 8) | (fill << 16) | (fill << 24);
John Bauman89401822014-05-06 15:04:28 -04003326
Alexis Hetua52dfbd2016-10-05 17:03:30 -04003327 char *buffer = (char*)lockStencil(0, 0, 0, PUBLIC);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003328
3329 // Stencil buffers are assumed to use quad layout
3330 for(int z = 0; z < stencil.depth; z++)
John Bauman89401822014-05-06 15:04:28 -04003331 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003332 for(int y = y0; y < y1; y++)
John Bauman89401822014-05-06 15:04:28 -04003333 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003334 char *target = buffer + (y & ~1) * width2 + (y & 1) * 2;
3335
3336 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once
John Bauman89401822014-05-06 15:04:28 -04003337 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003338 if((x0 & 1) != 0)
John Bauman89401822014-05-06 15:04:28 -04003339 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003340 target[oddX0 + 0] = fill;
3341 target[oddX0 + 2] = fill;
John Bauman89401822014-05-06 15:04:28 -04003342 }
3343
Alexis Hetu358a1442015-12-03 14:23:10 -05003344 memfill4(&target[evenX0], fill, evenBytes);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003345
3346 if((x1 & 1) != 0)
3347 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003348 target[oddX1 + 0] = fill;
3349 target[oddX1 + 2] = fill;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003350 }
3351
3352 y++;
3353 }
3354 else
3355 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003356 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1))
Alexis Hetu2b052f82015-11-25 13:57:28 -05003357 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003358 target[i] = maskedS | (target[i] & invMask);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003359 }
John Bauman89401822014-05-06 15:04:28 -04003360 }
3361 }
3362
Alexis Hetu2b052f82015-11-25 13:57:28 -05003363 buffer += stencil.sliceP;
John Bauman89401822014-05-06 15:04:28 -04003364 }
John Bauman89401822014-05-06 15:04:28 -04003365
Alexis Hetu2b052f82015-11-25 13:57:28 -05003366 unlockStencil();
John Bauman89401822014-05-06 15:04:28 -04003367 }
3368
3369 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height)
3370 {
3371 unsigned char *row;
3372 Buffer *buffer;
Nicolas Capensc39901e2016-03-21 16:37:44 -04003373
John Bauman89401822014-05-06 15:04:28 -04003374 if(internal.dirty)
3375 {
3376 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3377 buffer = &internal;
3378 }
3379 else
3380 {
3381 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3382 buffer = &external;
3383 }
3384
3385 if(buffer->bytes <= 4)
3386 {
3387 int c;
3388 buffer->write(&c, color);
3389
3390 if(buffer->bytes <= 1) c = (c << 8) | c;
3391 if(buffer->bytes <= 2) c = (c << 16) | c;
3392
3393 for(int y = 0; y < height; y++)
3394 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003395 memfill4(row, c, width * buffer->bytes);
John Bauman89401822014-05-06 15:04:28 -04003396
3397 row += buffer->pitchB;
3398 }
3399 }
3400 else // Generic
3401 {
3402 for(int y = 0; y < height; y++)
3403 {
3404 unsigned char *element = row;
3405
3406 for(int x = 0; x < width; x++)
3407 {
3408 buffer->write(element, color);
3409
3410 element += buffer->bytes;
3411 }
3412
3413 row += buffer->pitchB;
3414 }
3415 }
3416
3417 if(buffer == &internal)
3418 {
3419 unlockInternal();
3420 }
3421 else
3422 {
3423 unlockExternal();
3424 }
3425 }
3426
Alexis Hetu43577b82015-10-21 15:32:16 -04003427 void Surface::copyInternal(const Surface* source, int x, int y, float srcX, float srcY, bool filter)
John Bauman89401822014-05-06 15:04:28 -04003428 {
Alexis Hetu43577b82015-10-21 15:32:16 -04003429 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
John Bauman89401822014-05-06 15:04:28 -04003430
Alexis Hetu43577b82015-10-21 15:32:16 -04003431 sw::Color<float> color;
John Bauman89401822014-05-06 15:04:28 -04003432
Alexis Hetu43577b82015-10-21 15:32:16 -04003433 if(!filter)
3434 {
3435 color = source->internal.read((int)srcX, (int)srcY);
3436 }
3437 else // Bilinear filtering
3438 {
3439 color = source->internal.sample(srcX, srcY);
3440 }
John Bauman89401822014-05-06 15:04:28 -04003441
3442 internal.write(x, y, color);
3443 }
3444
Alexis Hetu43577b82015-10-21 15:32:16 -04003445 void Surface::copyInternal(const Surface* source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter)
3446 {
3447 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
3448
3449 sw::Color<float> color;
3450
3451 if(!filter)
3452 {
3453 color = source->internal.read((int)srcX, (int)srcY, int(srcZ));
3454 }
3455 else // Bilinear filtering
3456 {
3457 color = source->internal.sample(srcX, srcY, srcZ);
3458 }
3459
3460 internal.write(x, y, z, color);
3461 }
3462
John Bauman89401822014-05-06 15:04:28 -04003463 bool Surface::hasStencil() const
3464 {
3465 return isStencil(external.format);
3466 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04003467
John Bauman89401822014-05-06 15:04:28 -04003468 bool Surface::hasDepth() const
3469 {
3470 return isDepth(external.format);
3471 }
3472
3473 bool Surface::hasPalette() const
3474 {
3475 return isPalette(external.format);
3476 }
3477
3478 bool Surface::isRenderTarget() const
3479 {
3480 return renderTarget;
3481 }
3482
3483 bool Surface::hasDirtyMipmaps() const
3484 {
3485 return dirtyMipmaps;
3486 }
3487
3488 void Surface::cleanMipmaps()
3489 {
3490 dirtyMipmaps = false;
3491 }
3492
3493 Resource *Surface::getResource()
3494 {
3495 return resource;
3496 }
3497
3498 bool Surface::identicalFormats() const
3499 {
John Bauman66b8ab22014-05-06 15:57:45 -04003500 return external.format == internal.format &&
3501 external.width == internal.width &&
Nicolas Capens22658242014-11-29 00:31:41 -05003502 external.height == internal.height &&
3503 external.depth == internal.depth &&
3504 external.pitchB == internal.pitchB &&
3505 external.sliceB == internal.sliceB;
John Bauman89401822014-05-06 15:04:28 -04003506 }
3507
3508 Format Surface::selectInternalFormat(Format format) const
3509 {
3510 switch(format)
3511 {
3512 case FORMAT_NULL:
3513 return FORMAT_NULL;
3514 case FORMAT_P8:
3515 case FORMAT_A8P8:
3516 case FORMAT_A4R4G4B4:
3517 case FORMAT_A1R5G5B5:
3518 case FORMAT_A8R3G3B2:
3519 return FORMAT_A8R8G8B8;
3520 case FORMAT_A8:
3521 return FORMAT_A8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003522 case FORMAT_R8I:
3523 return FORMAT_R8I;
3524 case FORMAT_R8UI:
3525 return FORMAT_R8UI;
3526 case FORMAT_R8I_SNORM:
3527 return FORMAT_R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003528 case FORMAT_R8:
3529 return FORMAT_R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003530 case FORMAT_R16I:
3531 return FORMAT_R16I;
3532 case FORMAT_R16UI:
3533 return FORMAT_R16UI;
3534 case FORMAT_R32I:
3535 return FORMAT_R32I;
3536 case FORMAT_R32UI:
3537 return FORMAT_R32UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003538 case FORMAT_X16B16G16R16I:
3539 case FORMAT_A16B16G16R16I:
3540 return FORMAT_A16B16G16R16I;
3541 case FORMAT_X16B16G16R16UI:
3542 case FORMAT_A16B16G16R16UI:
3543 return FORMAT_A16B16G16R16UI;
Alexis Hetuf999a002015-12-17 11:09:36 -05003544 case FORMAT_A2R10G10B10:
3545 case FORMAT_A2B10G10R10:
John Bauman89401822014-05-06 15:04:28 -04003546 case FORMAT_A16B16G16R16:
3547 return FORMAT_A16B16G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003548 case FORMAT_X32B32G32R32I:
3549 case FORMAT_A32B32G32R32I:
3550 return FORMAT_A32B32G32R32I;
3551 case FORMAT_X32B32G32R32UI:
3552 case FORMAT_A32B32G32R32UI:
3553 return FORMAT_A32B32G32R32UI;
3554 case FORMAT_G8R8I:
3555 return FORMAT_G8R8I;
3556 case FORMAT_G8R8UI:
3557 return FORMAT_G8R8UI;
3558 case FORMAT_G8R8I_SNORM:
3559 return FORMAT_G8R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003560 case FORMAT_G8R8:
3561 return FORMAT_G8R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003562 case FORMAT_G16R16I:
3563 return FORMAT_G16R16I;
3564 case FORMAT_G16R16UI:
3565 return FORMAT_G16R16UI;
John Bauman89401822014-05-06 15:04:28 -04003566 case FORMAT_G16R16:
3567 return FORMAT_G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003568 case FORMAT_G32R32I:
3569 return FORMAT_G32R32I;
3570 case FORMAT_G32R32UI:
3571 return FORMAT_G32R32UI;
John Bauman89401822014-05-06 15:04:28 -04003572 case FORMAT_A8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003573 if(lockable || !quadLayoutEnabled)
3574 {
3575 return FORMAT_A8R8G8B8;
3576 }
3577 else
3578 {
3579 return FORMAT_A8G8R8B8Q;
3580 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003581 case FORMAT_A8B8G8R8I:
3582 return FORMAT_A8B8G8R8I;
3583 case FORMAT_A8B8G8R8UI:
3584 return FORMAT_A8B8G8R8UI;
3585 case FORMAT_A8B8G8R8I_SNORM:
3586 return FORMAT_A8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003587 case FORMAT_R5G5B5A1:
3588 case FORMAT_R4G4B4A4:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003589 case FORMAT_A8B8G8R8:
3590 return FORMAT_A8B8G8R8;
John Bauman89401822014-05-06 15:04:28 -04003591 case FORMAT_R5G6B5:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003592 return FORMAT_R5G6B5;
3593 case FORMAT_R3G3B2:
John Bauman89401822014-05-06 15:04:28 -04003594 case FORMAT_R8G8B8:
3595 case FORMAT_X4R4G4B4:
3596 case FORMAT_X1R5G5B5:
3597 case FORMAT_X8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003598 if(lockable || !quadLayoutEnabled)
3599 {
3600 return FORMAT_X8R8G8B8;
3601 }
3602 else
3603 {
3604 return FORMAT_X8G8R8B8Q;
3605 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003606 case FORMAT_X8B8G8R8I:
3607 return FORMAT_X8B8G8R8I;
3608 case FORMAT_X8B8G8R8UI:
3609 return FORMAT_X8B8G8R8UI;
3610 case FORMAT_X8B8G8R8I_SNORM:
3611 return FORMAT_X8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003612 case FORMAT_B8G8R8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003613 case FORMAT_X8B8G8R8:
3614 return FORMAT_X8B8G8R8;
Alexis Hetu049a1872016-04-25 16:59:58 -04003615 case FORMAT_SRGB8_X8:
3616 return FORMAT_SRGB8_X8;
3617 case FORMAT_SRGB8_A8:
3618 return FORMAT_SRGB8_A8;
John Bauman89401822014-05-06 15:04:28 -04003619 // Compressed formats
3620 #if S3TC_SUPPORT
3621 case FORMAT_DXT1:
3622 case FORMAT_DXT3:
3623 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04003624 #endif
Alexis Hetu460e41f2015-09-01 10:58:37 -04003625 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3626 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3627 case FORMAT_RGBA8_ETC2_EAC:
3628 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
3629 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
3630 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
3631 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
3632 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
3633 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
3634 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
3635 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
3636 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
3637 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
3638 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
3639 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
3640 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
3641 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
3642 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
3643 return FORMAT_A8R8G8B8;
3644 case FORMAT_RGBA_ASTC_4x4_KHR:
3645 case FORMAT_RGBA_ASTC_5x4_KHR:
3646 case FORMAT_RGBA_ASTC_5x5_KHR:
3647 case FORMAT_RGBA_ASTC_6x5_KHR:
3648 case FORMAT_RGBA_ASTC_6x6_KHR:
3649 case FORMAT_RGBA_ASTC_8x5_KHR:
3650 case FORMAT_RGBA_ASTC_8x6_KHR:
3651 case FORMAT_RGBA_ASTC_8x8_KHR:
3652 case FORMAT_RGBA_ASTC_10x5_KHR:
3653 case FORMAT_RGBA_ASTC_10x6_KHR:
3654 case FORMAT_RGBA_ASTC_10x8_KHR:
3655 case FORMAT_RGBA_ASTC_10x10_KHR:
3656 case FORMAT_RGBA_ASTC_12x10_KHR:
3657 case FORMAT_RGBA_ASTC_12x12_KHR:
3658 // ASTC supports HDR, so a floating point format is required to represent it properly
3659 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported
John Bauman89401822014-05-06 15:04:28 -04003660 case FORMAT_ATI1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003661 case FORMAT_R11_EAC:
John Bauman89401822014-05-06 15:04:28 -04003662 return FORMAT_R8;
Alexis Hetu0de50d42015-09-09 13:56:41 -04003663 case FORMAT_SIGNED_R11_EAC:
3664 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient
John Bauman89401822014-05-06 15:04:28 -04003665 case FORMAT_ATI2:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003666 case FORMAT_RG11_EAC:
John Bauman89401822014-05-06 15:04:28 -04003667 return FORMAT_G8R8;
Alexis Hetu0de50d42015-09-09 13:56:41 -04003668 case FORMAT_SIGNED_RG11_EAC:
3669 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient
Nicolas Capens22658242014-11-29 00:31:41 -05003670 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003671 case FORMAT_RGB8_ETC2:
3672 case FORMAT_SRGB8_ETC2:
Nicolas Capens22658242014-11-29 00:31:41 -05003673 return FORMAT_X8R8G8B8;
John Bauman89401822014-05-06 15:04:28 -04003674 // Bumpmap formats
3675 case FORMAT_V8U8: return FORMAT_V8U8;
3676 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8;
3677 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8;
3678 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8;
3679 case FORMAT_V16U16: return FORMAT_V16U16;
3680 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16;
3681 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16;
3682 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04003683 case FORMAT_A16F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003684 case FORMAT_R16F: return FORMAT_R32F;
3685 case FORMAT_G16R16F: return FORMAT_G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003686 case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003687 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003688 case FORMAT_A32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003689 case FORMAT_R32F: return FORMAT_R32F;
3690 case FORMAT_G32R32F: return FORMAT_G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003691 case FORMAT_B32G32R32F: return FORMAT_X32B32G32R32F;
3692 case FORMAT_X32B32G32R32F: return FORMAT_X32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003693 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F;
3694 // Luminance formats
3695 case FORMAT_L8: return FORMAT_L8;
3696 case FORMAT_A4L4: return FORMAT_A8L8;
3697 case FORMAT_L16: return FORMAT_L16;
3698 case FORMAT_A8L8: return FORMAT_A8L8;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003699 case FORMAT_L16F: return FORMAT_X32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003700 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003701 case FORMAT_L32F: return FORMAT_X32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003702 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003703 // Depth/stencil formats
3704 case FORMAT_D16:
3705 case FORMAT_D32:
3706 case FORMAT_D24X8:
3707 case FORMAT_D24S8:
3708 case FORMAT_D24FS8:
3709 if(hasParent) // Texture
3710 {
John Bauman66b8ab22014-05-06 15:57:45 -04003711 return FORMAT_D32FS8_SHADOW;
John Bauman89401822014-05-06 15:04:28 -04003712 }
3713 else if(complementaryDepthBuffer)
3714 {
3715 return FORMAT_D32F_COMPLEMENTARY;
3716 }
3717 else
3718 {
3719 return FORMAT_D32F;
3720 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003721 case FORMAT_D32F: return FORMAT_D32F;
John Bauman66b8ab22014-05-06 15:57:45 -04003722 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE;
3723 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE;
3724 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE;
3725 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW;
3726 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003727 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601;
3728 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709;
3729 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF;
John Bauman89401822014-05-06 15:04:28 -04003730 default:
3731 ASSERT(false);
3732 }
3733
3734 return FORMAT_NULL;
3735 }
3736
3737 void Surface::setTexturePalette(unsigned int *palette)
3738 {
3739 Surface::palette = palette;
3740 Surface::paletteID++;
3741 }
3742
3743 void Surface::resolve()
3744 {
3745 if(internal.depth <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL)
3746 {
3747 return;
3748 }
3749
3750 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE);
3751
John Bauman89401822014-05-06 15:04:28 -04003752 int width = internal.width;
3753 int height = internal.height;
3754 int pitch = internal.pitchB;
3755 int slice = internal.sliceB;
3756
3757 unsigned char *source0 = (unsigned char*)source;
3758 unsigned char *source1 = source0 + slice;
3759 unsigned char *source2 = source1 + slice;
3760 unsigned char *source3 = source2 + slice;
3761 unsigned char *source4 = source3 + slice;
3762 unsigned char *source5 = source4 + slice;
3763 unsigned char *source6 = source5 + slice;
3764 unsigned char *source7 = source6 + slice;
3765 unsigned char *source8 = source7 + slice;
3766 unsigned char *source9 = source8 + slice;
3767 unsigned char *sourceA = source9 + slice;
3768 unsigned char *sourceB = sourceA + slice;
3769 unsigned char *sourceC = sourceB + slice;
3770 unsigned char *sourceD = sourceC + slice;
3771 unsigned char *sourceE = sourceD + slice;
3772 unsigned char *sourceF = sourceE + slice;
3773
Alexis Hetu049a1872016-04-25 16:59:58 -04003774 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 ||
3775 internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8 ||
3776 internal.format == FORMAT_SRGB8_X8 || internal.format == FORMAT_SRGB8_A8)
John Bauman89401822014-05-06 15:04:28 -04003777 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003778 #if defined(__i386__) || defined(__x86_64__)
3779 if(CPUID::supportsSSE2() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04003780 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003781 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04003782 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003783 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04003784 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003785 for(int x = 0; x < width; x += 4)
3786 {
3787 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3788 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04003789
Nicolas Capens47dc8672017-04-25 12:54:39 -04003790 c0 = _mm_avg_epu8(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04003791
Nicolas Capens47dc8672017-04-25 12:54:39 -04003792 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3793 }
3794
3795 source0 += pitch;
3796 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04003797 }
John Bauman89401822014-05-06 15:04:28 -04003798 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003799 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04003800 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003801 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04003802 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003803 for(int x = 0; x < width; x += 4)
3804 {
3805 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3806 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3807 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3808 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04003809
Nicolas Capens47dc8672017-04-25 12:54:39 -04003810 c0 = _mm_avg_epu8(c0, c1);
3811 c2 = _mm_avg_epu8(c2, c3);
3812 c0 = _mm_avg_epu8(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04003813
Nicolas Capens47dc8672017-04-25 12:54:39 -04003814 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3815 }
3816
3817 source0 += pitch;
3818 source1 += pitch;
3819 source2 += pitch;
3820 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04003821 }
John Bauman89401822014-05-06 15:04:28 -04003822 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003823 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04003824 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003825 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04003826 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003827 for(int x = 0; x < width; x += 4)
3828 {
3829 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3830 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3831 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3832 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3833 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
3834 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
3835 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
3836 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04003837
Nicolas Capens47dc8672017-04-25 12:54:39 -04003838 c0 = _mm_avg_epu8(c0, c1);
3839 c2 = _mm_avg_epu8(c2, c3);
3840 c4 = _mm_avg_epu8(c4, c5);
3841 c6 = _mm_avg_epu8(c6, c7);
3842 c0 = _mm_avg_epu8(c0, c2);
3843 c4 = _mm_avg_epu8(c4, c6);
3844 c0 = _mm_avg_epu8(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04003845
Nicolas Capens47dc8672017-04-25 12:54:39 -04003846 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3847 }
3848
3849 source0 += pitch;
3850 source1 += pitch;
3851 source2 += pitch;
3852 source3 += pitch;
3853 source4 += pitch;
3854 source5 += pitch;
3855 source6 += pitch;
3856 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04003857 }
John Bauman89401822014-05-06 15:04:28 -04003858 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003859 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04003860 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003861 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04003862 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003863 for(int x = 0; x < width; x += 4)
3864 {
3865 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3866 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
3867 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
3868 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
3869 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
3870 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
3871 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
3872 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
3873 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
3874 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
3875 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
3876 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
3877 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
3878 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
3879 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
3880 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04003881
Nicolas Capens47dc8672017-04-25 12:54:39 -04003882 c0 = _mm_avg_epu8(c0, c1);
3883 c2 = _mm_avg_epu8(c2, c3);
3884 c4 = _mm_avg_epu8(c4, c5);
3885 c6 = _mm_avg_epu8(c6, c7);
3886 c8 = _mm_avg_epu8(c8, c9);
3887 cA = _mm_avg_epu8(cA, cB);
3888 cC = _mm_avg_epu8(cC, cD);
3889 cE = _mm_avg_epu8(cE, cF);
3890 c0 = _mm_avg_epu8(c0, c2);
3891 c4 = _mm_avg_epu8(c4, c6);
3892 c8 = _mm_avg_epu8(c8, cA);
3893 cC = _mm_avg_epu8(cC, cE);
3894 c0 = _mm_avg_epu8(c0, c4);
3895 c8 = _mm_avg_epu8(c8, cC);
3896 c0 = _mm_avg_epu8(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04003897
Nicolas Capens47dc8672017-04-25 12:54:39 -04003898 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3899 }
3900
3901 source0 += pitch;
3902 source1 += pitch;
3903 source2 += pitch;
3904 source3 += pitch;
3905 source4 += pitch;
3906 source5 += pitch;
3907 source6 += pitch;
3908 source7 += pitch;
3909 source8 += pitch;
3910 source9 += pitch;
3911 sourceA += pitch;
3912 sourceB += pitch;
3913 sourceC += pitch;
3914 sourceD += pitch;
3915 sourceE += pitch;
3916 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04003917 }
John Bauman89401822014-05-06 15:04:28 -04003918 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003919 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04003920 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003921 else
3922 #endif
John Bauman89401822014-05-06 15:04:28 -04003923 {
3924 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101))
3925
3926 if(internal.depth == 2)
3927 {
3928 for(int y = 0; y < height; y++)
3929 {
3930 for(int x = 0; x < width; x++)
3931 {
3932 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3933 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3934
3935 c0 = AVERAGE(c0, c1);
3936
3937 *(unsigned int*)(source0 + 4 * x) = c0;
3938 }
3939
3940 source0 += pitch;
3941 source1 += pitch;
3942 }
3943 }
3944 else if(internal.depth == 4)
3945 {
3946 for(int y = 0; y < height; y++)
3947 {
3948 for(int x = 0; x < width; x++)
3949 {
3950 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3951 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3952 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3953 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3954
3955 c0 = AVERAGE(c0, c1);
3956 c2 = AVERAGE(c2, c3);
3957 c0 = AVERAGE(c0, c2);
3958
3959 *(unsigned int*)(source0 + 4 * x) = c0;
3960 }
3961
3962 source0 += pitch;
3963 source1 += pitch;
3964 source2 += pitch;
3965 source3 += pitch;
3966 }
3967 }
3968 else if(internal.depth == 8)
3969 {
3970 for(int y = 0; y < height; y++)
3971 {
3972 for(int x = 0; x < width; x++)
3973 {
3974 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
3975 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
3976 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
3977 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
3978 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
3979 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
3980 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
3981 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
3982
3983 c0 = AVERAGE(c0, c1);
3984 c2 = AVERAGE(c2, c3);
3985 c4 = AVERAGE(c4, c5);
3986 c6 = AVERAGE(c6, c7);
3987 c0 = AVERAGE(c0, c2);
3988 c4 = AVERAGE(c4, c6);
3989 c0 = AVERAGE(c0, c4);
3990
3991 *(unsigned int*)(source0 + 4 * x) = c0;
3992 }
3993
3994 source0 += pitch;
3995 source1 += pitch;
3996 source2 += pitch;
3997 source3 += pitch;
3998 source4 += pitch;
3999 source5 += pitch;
4000 source6 += pitch;
4001 source7 += pitch;
4002 }
4003 }
4004 else if(internal.depth == 16)
4005 {
4006 for(int y = 0; y < height; y++)
4007 {
4008 for(int x = 0; x < width; x++)
4009 {
4010 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4011 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4012 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4013 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4014 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4015 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4016 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4017 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4018 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4019 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4020 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4021 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4022 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4023 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4024 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4025 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4026
4027 c0 = AVERAGE(c0, c1);
4028 c2 = AVERAGE(c2, c3);
4029 c4 = AVERAGE(c4, c5);
4030 c6 = AVERAGE(c6, c7);
4031 c8 = AVERAGE(c8, c9);
4032 cA = AVERAGE(cA, cB);
4033 cC = AVERAGE(cC, cD);
4034 cE = AVERAGE(cE, cF);
4035 c0 = AVERAGE(c0, c2);
4036 c4 = AVERAGE(c4, c6);
4037 c8 = AVERAGE(c8, cA);
4038 cC = AVERAGE(cC, cE);
4039 c0 = AVERAGE(c0, c4);
4040 c8 = AVERAGE(c8, cC);
4041 c0 = AVERAGE(c0, c8);
4042
4043 *(unsigned int*)(source0 + 4 * x) = c0;
4044 }
4045
4046 source0 += pitch;
4047 source1 += pitch;
4048 source2 += pitch;
4049 source3 += pitch;
4050 source4 += pitch;
4051 source5 += pitch;
4052 source6 += pitch;
4053 source7 += pitch;
4054 source8 += pitch;
4055 source9 += pitch;
4056 sourceA += pitch;
4057 sourceB += pitch;
4058 sourceC += pitch;
4059 sourceD += pitch;
4060 sourceE += pitch;
4061 sourceF += pitch;
4062 }
4063 }
4064 else ASSERT(false);
4065
4066 #undef AVERAGE
4067 }
4068 }
4069 else if(internal.format == FORMAT_G16R16)
4070 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004071
4072 #if defined(__i386__) || defined(__x86_64__)
4073 if(CPUID::supportsSSE2() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04004074 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004075 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04004076 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004077 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004078 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004079 for(int x = 0; x < width; x += 4)
4080 {
4081 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4082 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004083
Nicolas Capens47dc8672017-04-25 12:54:39 -04004084 c0 = _mm_avg_epu16(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04004085
Nicolas Capens47dc8672017-04-25 12:54:39 -04004086 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4087 }
4088
4089 source0 += pitch;
4090 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004091 }
John Bauman89401822014-05-06 15:04:28 -04004092 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004093 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04004094 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004095 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004096 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004097 for(int x = 0; x < width; x += 4)
4098 {
4099 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4100 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4101 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4102 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004103
Nicolas Capens47dc8672017-04-25 12:54:39 -04004104 c0 = _mm_avg_epu16(c0, c1);
4105 c2 = _mm_avg_epu16(c2, c3);
4106 c0 = _mm_avg_epu16(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04004107
Nicolas Capens47dc8672017-04-25 12:54:39 -04004108 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4109 }
4110
4111 source0 += pitch;
4112 source1 += pitch;
4113 source2 += pitch;
4114 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004115 }
John Bauman89401822014-05-06 15:04:28 -04004116 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004117 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04004118 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004119 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004120 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004121 for(int x = 0; x < width; x += 4)
4122 {
4123 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4124 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4125 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4126 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4127 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4128 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4129 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4130 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004131
Nicolas Capens47dc8672017-04-25 12:54:39 -04004132 c0 = _mm_avg_epu16(c0, c1);
4133 c2 = _mm_avg_epu16(c2, c3);
4134 c4 = _mm_avg_epu16(c4, c5);
4135 c6 = _mm_avg_epu16(c6, c7);
4136 c0 = _mm_avg_epu16(c0, c2);
4137 c4 = _mm_avg_epu16(c4, c6);
4138 c0 = _mm_avg_epu16(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04004139
Nicolas Capens47dc8672017-04-25 12:54:39 -04004140 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4141 }
4142
4143 source0 += pitch;
4144 source1 += pitch;
4145 source2 += pitch;
4146 source3 += pitch;
4147 source4 += pitch;
4148 source5 += pitch;
4149 source6 += pitch;
4150 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004151 }
John Bauman89401822014-05-06 15:04:28 -04004152 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004153 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04004154 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004155 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004156 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004157 for(int x = 0; x < width; x += 4)
4158 {
4159 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4160 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4161 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4162 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4163 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4164 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4165 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4166 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4167 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
4168 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
4169 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
4170 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
4171 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
4172 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
4173 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
4174 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04004175
Nicolas Capens47dc8672017-04-25 12:54:39 -04004176 c0 = _mm_avg_epu16(c0, c1);
4177 c2 = _mm_avg_epu16(c2, c3);
4178 c4 = _mm_avg_epu16(c4, c5);
4179 c6 = _mm_avg_epu16(c6, c7);
4180 c8 = _mm_avg_epu16(c8, c9);
4181 cA = _mm_avg_epu16(cA, cB);
4182 cC = _mm_avg_epu16(cC, cD);
4183 cE = _mm_avg_epu16(cE, cF);
4184 c0 = _mm_avg_epu16(c0, c2);
4185 c4 = _mm_avg_epu16(c4, c6);
4186 c8 = _mm_avg_epu16(c8, cA);
4187 cC = _mm_avg_epu16(cC, cE);
4188 c0 = _mm_avg_epu16(c0, c4);
4189 c8 = _mm_avg_epu16(c8, cC);
4190 c0 = _mm_avg_epu16(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04004191
Nicolas Capens47dc8672017-04-25 12:54:39 -04004192 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4193 }
4194
4195 source0 += pitch;
4196 source1 += pitch;
4197 source2 += pitch;
4198 source3 += pitch;
4199 source4 += pitch;
4200 source5 += pitch;
4201 source6 += pitch;
4202 source7 += pitch;
4203 source8 += pitch;
4204 source9 += pitch;
4205 sourceA += pitch;
4206 sourceB += pitch;
4207 sourceC += pitch;
4208 sourceD += pitch;
4209 sourceE += pitch;
4210 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004211 }
John Bauman89401822014-05-06 15:04:28 -04004212 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004213 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004214 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004215 else
4216 #endif
John Bauman89401822014-05-06 15:04:28 -04004217 {
4218 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4219
4220 if(internal.depth == 2)
4221 {
4222 for(int y = 0; y < height; y++)
4223 {
4224 for(int x = 0; x < width; x++)
4225 {
4226 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4227 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4228
4229 c0 = AVERAGE(c0, c1);
4230
4231 *(unsigned int*)(source0 + 4 * x) = c0;
4232 }
4233
4234 source0 += pitch;
4235 source1 += pitch;
4236 }
4237 }
4238 else if(internal.depth == 4)
4239 {
4240 for(int y = 0; y < height; y++)
4241 {
4242 for(int x = 0; x < width; x++)
4243 {
4244 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4245 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4246 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4247 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4248
4249 c0 = AVERAGE(c0, c1);
4250 c2 = AVERAGE(c2, c3);
4251 c0 = AVERAGE(c0, c2);
4252
4253 *(unsigned int*)(source0 + 4 * x) = c0;
4254 }
4255
4256 source0 += pitch;
4257 source1 += pitch;
4258 source2 += pitch;
4259 source3 += pitch;
4260 }
4261 }
4262 else if(internal.depth == 8)
4263 {
4264 for(int y = 0; y < height; y++)
4265 {
4266 for(int x = 0; x < width; x++)
4267 {
4268 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4269 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4270 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4271 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4272 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4273 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4274 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4275 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4276
4277 c0 = AVERAGE(c0, c1);
4278 c2 = AVERAGE(c2, c3);
4279 c4 = AVERAGE(c4, c5);
4280 c6 = AVERAGE(c6, c7);
4281 c0 = AVERAGE(c0, c2);
4282 c4 = AVERAGE(c4, c6);
4283 c0 = AVERAGE(c0, c4);
4284
4285 *(unsigned int*)(source0 + 4 * x) = c0;
4286 }
4287
4288 source0 += pitch;
4289 source1 += pitch;
4290 source2 += pitch;
4291 source3 += pitch;
4292 source4 += pitch;
4293 source5 += pitch;
4294 source6 += pitch;
4295 source7 += pitch;
4296 }
4297 }
4298 else if(internal.depth == 16)
4299 {
4300 for(int y = 0; y < height; y++)
4301 {
4302 for(int x = 0; x < width; x++)
4303 {
4304 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4305 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4306 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4307 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4308 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4309 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4310 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4311 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4312 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4313 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4314 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4315 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4316 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4317 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4318 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4319 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4320
4321 c0 = AVERAGE(c0, c1);
4322 c2 = AVERAGE(c2, c3);
4323 c4 = AVERAGE(c4, c5);
4324 c6 = AVERAGE(c6, c7);
4325 c8 = AVERAGE(c8, c9);
4326 cA = AVERAGE(cA, cB);
4327 cC = AVERAGE(cC, cD);
4328 cE = AVERAGE(cE, cF);
4329 c0 = AVERAGE(c0, c2);
4330 c4 = AVERAGE(c4, c6);
4331 c8 = AVERAGE(c8, cA);
4332 cC = AVERAGE(cC, cE);
4333 c0 = AVERAGE(c0, c4);
4334 c8 = AVERAGE(c8, cC);
4335 c0 = AVERAGE(c0, c8);
4336
4337 *(unsigned int*)(source0 + 4 * x) = c0;
4338 }
4339
4340 source0 += pitch;
4341 source1 += pitch;
4342 source2 += pitch;
4343 source3 += pitch;
4344 source4 += pitch;
4345 source5 += pitch;
4346 source6 += pitch;
4347 source7 += pitch;
4348 source8 += pitch;
4349 source9 += pitch;
4350 sourceA += pitch;
4351 sourceB += pitch;
4352 sourceC += pitch;
4353 sourceD += pitch;
4354 sourceE += pitch;
4355 sourceF += pitch;
4356 }
4357 }
4358 else ASSERT(false);
4359
4360 #undef AVERAGE
4361 }
4362 }
4363 else if(internal.format == FORMAT_A16B16G16R16)
4364 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004365 #if defined(__i386__) || defined(__x86_64__)
4366 if(CPUID::supportsSSE2() && (width % 2) == 0)
John Bauman89401822014-05-06 15:04:28 -04004367 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004368 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04004369 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004370 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004371 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004372 for(int x = 0; x < width; x += 2)
4373 {
4374 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4375 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004376
Nicolas Capens47dc8672017-04-25 12:54:39 -04004377 c0 = _mm_avg_epu16(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04004378
Nicolas Capens47dc8672017-04-25 12:54:39 -04004379 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4380 }
4381
4382 source0 += pitch;
4383 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004384 }
John Bauman89401822014-05-06 15:04:28 -04004385 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004386 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04004387 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004388 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004389 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004390 for(int x = 0; x < width; x += 2)
4391 {
4392 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4393 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4394 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4395 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004396
Nicolas Capens47dc8672017-04-25 12:54:39 -04004397 c0 = _mm_avg_epu16(c0, c1);
4398 c2 = _mm_avg_epu16(c2, c3);
4399 c0 = _mm_avg_epu16(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04004400
Nicolas Capens47dc8672017-04-25 12:54:39 -04004401 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4402 }
4403
4404 source0 += pitch;
4405 source1 += pitch;
4406 source2 += pitch;
4407 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004408 }
John Bauman89401822014-05-06 15:04:28 -04004409 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004410 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04004411 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004412 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004413 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004414 for(int x = 0; x < width; x += 2)
4415 {
4416 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4417 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4418 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4419 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4420 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4421 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4422 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4423 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004424
Nicolas Capens47dc8672017-04-25 12:54:39 -04004425 c0 = _mm_avg_epu16(c0, c1);
4426 c2 = _mm_avg_epu16(c2, c3);
4427 c4 = _mm_avg_epu16(c4, c5);
4428 c6 = _mm_avg_epu16(c6, c7);
4429 c0 = _mm_avg_epu16(c0, c2);
4430 c4 = _mm_avg_epu16(c4, c6);
4431 c0 = _mm_avg_epu16(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04004432
Nicolas Capens47dc8672017-04-25 12:54:39 -04004433 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4434 }
4435
4436 source0 += pitch;
4437 source1 += pitch;
4438 source2 += pitch;
4439 source3 += pitch;
4440 source4 += pitch;
4441 source5 += pitch;
4442 source6 += pitch;
4443 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004444 }
John Bauman89401822014-05-06 15:04:28 -04004445 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004446 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04004447 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004448 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004449 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004450 for(int x = 0; x < width; x += 2)
4451 {
4452 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4453 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4454 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4455 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4456 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4457 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4458 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4459 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
4460 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x));
4461 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x));
4462 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x));
4463 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x));
4464 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x));
4465 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x));
4466 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x));
4467 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x));
John Bauman89401822014-05-06 15:04:28 -04004468
Nicolas Capens47dc8672017-04-25 12:54:39 -04004469 c0 = _mm_avg_epu16(c0, c1);
4470 c2 = _mm_avg_epu16(c2, c3);
4471 c4 = _mm_avg_epu16(c4, c5);
4472 c6 = _mm_avg_epu16(c6, c7);
4473 c8 = _mm_avg_epu16(c8, c9);
4474 cA = _mm_avg_epu16(cA, cB);
4475 cC = _mm_avg_epu16(cC, cD);
4476 cE = _mm_avg_epu16(cE, cF);
4477 c0 = _mm_avg_epu16(c0, c2);
4478 c4 = _mm_avg_epu16(c4, c6);
4479 c8 = _mm_avg_epu16(c8, cA);
4480 cC = _mm_avg_epu16(cC, cE);
4481 c0 = _mm_avg_epu16(c0, c4);
4482 c8 = _mm_avg_epu16(c8, cC);
4483 c0 = _mm_avg_epu16(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04004484
Nicolas Capens47dc8672017-04-25 12:54:39 -04004485 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4486 }
4487
4488 source0 += pitch;
4489 source1 += pitch;
4490 source2 += pitch;
4491 source3 += pitch;
4492 source4 += pitch;
4493 source5 += pitch;
4494 source6 += pitch;
4495 source7 += pitch;
4496 source8 += pitch;
4497 source9 += pitch;
4498 sourceA += pitch;
4499 sourceB += pitch;
4500 sourceC += pitch;
4501 sourceD += pitch;
4502 sourceE += pitch;
4503 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004504 }
John Bauman89401822014-05-06 15:04:28 -04004505 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004506 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004507 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004508 else
4509 #endif
John Bauman89401822014-05-06 15:04:28 -04004510 {
4511 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4512
4513 if(internal.depth == 2)
4514 {
4515 for(int y = 0; y < height; y++)
4516 {
4517 for(int x = 0; x < 2 * width; x++)
4518 {
4519 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4520 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4521
4522 c0 = AVERAGE(c0, c1);
4523
4524 *(unsigned int*)(source0 + 4 * x) = c0;
4525 }
4526
4527 source0 += pitch;
4528 source1 += pitch;
4529 }
4530 }
4531 else if(internal.depth == 4)
4532 {
4533 for(int y = 0; y < height; y++)
4534 {
4535 for(int x = 0; x < 2 * width; x++)
4536 {
4537 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4538 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4539 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4540 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4541
4542 c0 = AVERAGE(c0, c1);
4543 c2 = AVERAGE(c2, c3);
4544 c0 = AVERAGE(c0, c2);
4545
4546 *(unsigned int*)(source0 + 4 * x) = c0;
4547 }
4548
4549 source0 += pitch;
4550 source1 += pitch;
4551 source2 += pitch;
4552 source3 += pitch;
4553 }
4554 }
4555 else if(internal.depth == 8)
4556 {
4557 for(int y = 0; y < height; y++)
4558 {
4559 for(int x = 0; x < 2 * width; x++)
4560 {
4561 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4562 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4563 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4564 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4565 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4566 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4567 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4568 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4569
4570 c0 = AVERAGE(c0, c1);
4571 c2 = AVERAGE(c2, c3);
4572 c4 = AVERAGE(c4, c5);
4573 c6 = AVERAGE(c6, c7);
4574 c0 = AVERAGE(c0, c2);
4575 c4 = AVERAGE(c4, c6);
4576 c0 = AVERAGE(c0, c4);
4577
4578 *(unsigned int*)(source0 + 4 * x) = c0;
4579 }
4580
4581 source0 += pitch;
4582 source1 += pitch;
4583 source2 += pitch;
4584 source3 += pitch;
4585 source4 += pitch;
4586 source5 += pitch;
4587 source6 += pitch;
4588 source7 += pitch;
4589 }
4590 }
4591 else if(internal.depth == 16)
4592 {
4593 for(int y = 0; y < height; y++)
4594 {
4595 for(int x = 0; x < 2 * width; x++)
4596 {
4597 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4598 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4599 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4600 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4601 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4602 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4603 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4604 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4605 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4606 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4607 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4608 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4609 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4610 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4611 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4612 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4613
4614 c0 = AVERAGE(c0, c1);
4615 c2 = AVERAGE(c2, c3);
4616 c4 = AVERAGE(c4, c5);
4617 c6 = AVERAGE(c6, c7);
4618 c8 = AVERAGE(c8, c9);
4619 cA = AVERAGE(cA, cB);
4620 cC = AVERAGE(cC, cD);
4621 cE = AVERAGE(cE, cF);
4622 c0 = AVERAGE(c0, c2);
4623 c4 = AVERAGE(c4, c6);
4624 c8 = AVERAGE(c8, cA);
4625 cC = AVERAGE(cC, cE);
4626 c0 = AVERAGE(c0, c4);
4627 c8 = AVERAGE(c8, cC);
4628 c0 = AVERAGE(c0, c8);
4629
4630 *(unsigned int*)(source0 + 4 * x) = c0;
4631 }
4632
4633 source0 += pitch;
4634 source1 += pitch;
4635 source2 += pitch;
4636 source3 += pitch;
4637 source4 += pitch;
4638 source5 += pitch;
4639 source6 += pitch;
4640 source7 += pitch;
4641 source8 += pitch;
4642 source9 += pitch;
4643 sourceA += pitch;
4644 sourceB += pitch;
4645 sourceC += pitch;
4646 sourceD += pitch;
4647 sourceE += pitch;
4648 sourceF += pitch;
4649 }
4650 }
4651 else ASSERT(false);
4652
4653 #undef AVERAGE
4654 }
4655 }
4656 else if(internal.format == FORMAT_R32F)
4657 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004658 #if defined(__i386__) || defined(__x86_64__)
4659 if(CPUID::supportsSSE() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04004660 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004661 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04004662 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004663 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004664 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004665 for(int x = 0; x < width; x += 4)
4666 {
4667 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4668 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004669
Nicolas Capens47dc8672017-04-25 12:54:39 -04004670 c0 = _mm_add_ps(c0, c1);
4671 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04004672
Nicolas Capens47dc8672017-04-25 12:54:39 -04004673 _mm_store_ps((float*)(source0 + 4 * x), c0);
4674 }
4675
4676 source0 += pitch;
4677 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004678 }
John Bauman89401822014-05-06 15:04:28 -04004679 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004680 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04004681 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004682 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004683 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004684 for(int x = 0; x < width; x += 4)
4685 {
4686 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4687 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4688 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4689 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004690
Nicolas Capens47dc8672017-04-25 12:54:39 -04004691 c0 = _mm_add_ps(c0, c1);
4692 c2 = _mm_add_ps(c2, c3);
4693 c0 = _mm_add_ps(c0, c2);
4694 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04004695
Nicolas Capens47dc8672017-04-25 12:54:39 -04004696 _mm_store_ps((float*)(source0 + 4 * x), c0);
4697 }
4698
4699 source0 += pitch;
4700 source1 += pitch;
4701 source2 += pitch;
4702 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004703 }
John Bauman89401822014-05-06 15:04:28 -04004704 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004705 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04004706 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004707 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004708 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004709 for(int x = 0; x < width; x += 4)
4710 {
4711 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4712 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4713 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4714 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4715 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4716 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4717 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4718 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004719
Nicolas Capens47dc8672017-04-25 12:54:39 -04004720 c0 = _mm_add_ps(c0, c1);
4721 c2 = _mm_add_ps(c2, c3);
4722 c4 = _mm_add_ps(c4, c5);
4723 c6 = _mm_add_ps(c6, c7);
4724 c0 = _mm_add_ps(c0, c2);
4725 c4 = _mm_add_ps(c4, c6);
4726 c0 = _mm_add_ps(c0, c4);
4727 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04004728
Nicolas Capens47dc8672017-04-25 12:54:39 -04004729 _mm_store_ps((float*)(source0 + 4 * x), c0);
4730 }
4731
4732 source0 += pitch;
4733 source1 += pitch;
4734 source2 += pitch;
4735 source3 += pitch;
4736 source4 += pitch;
4737 source5 += pitch;
4738 source6 += pitch;
4739 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004740 }
John Bauman89401822014-05-06 15:04:28 -04004741 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004742 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04004743 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004744 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004745 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004746 for(int x = 0; x < width; x += 4)
4747 {
4748 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4749 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4750 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4751 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4752 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4753 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4754 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4755 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
4756 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x));
4757 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x));
4758 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x));
4759 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x));
4760 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x));
4761 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x));
4762 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x));
4763 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04004764
Nicolas Capens47dc8672017-04-25 12:54:39 -04004765 c0 = _mm_add_ps(c0, c1);
4766 c2 = _mm_add_ps(c2, c3);
4767 c4 = _mm_add_ps(c4, c5);
4768 c6 = _mm_add_ps(c6, c7);
4769 c8 = _mm_add_ps(c8, c9);
4770 cA = _mm_add_ps(cA, cB);
4771 cC = _mm_add_ps(cC, cD);
4772 cE = _mm_add_ps(cE, cF);
4773 c0 = _mm_add_ps(c0, c2);
4774 c4 = _mm_add_ps(c4, c6);
4775 c8 = _mm_add_ps(c8, cA);
4776 cC = _mm_add_ps(cC, cE);
4777 c0 = _mm_add_ps(c0, c4);
4778 c8 = _mm_add_ps(c8, cC);
4779 c0 = _mm_add_ps(c0, c8);
4780 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04004781
Nicolas Capens47dc8672017-04-25 12:54:39 -04004782 _mm_store_ps((float*)(source0 + 4 * x), c0);
4783 }
4784
4785 source0 += pitch;
4786 source1 += pitch;
4787 source2 += pitch;
4788 source3 += pitch;
4789 source4 += pitch;
4790 source5 += pitch;
4791 source6 += pitch;
4792 source7 += pitch;
4793 source8 += pitch;
4794 source9 += pitch;
4795 sourceA += pitch;
4796 sourceB += pitch;
4797 sourceC += pitch;
4798 sourceD += pitch;
4799 sourceE += pitch;
4800 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004801 }
John Bauman89401822014-05-06 15:04:28 -04004802 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004803 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004804 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004805 else
4806 #endif
John Bauman89401822014-05-06 15:04:28 -04004807 {
4808 if(internal.depth == 2)
4809 {
4810 for(int y = 0; y < height; y++)
4811 {
4812 for(int x = 0; x < width; x++)
4813 {
4814 float c0 = *(float*)(source0 + 4 * x);
4815 float c1 = *(float*)(source1 + 4 * x);
4816
4817 c0 = c0 + c1;
4818 c0 *= 1.0f / 2.0f;
4819
4820 *(float*)(source0 + 4 * x) = c0;
4821 }
4822
4823 source0 += pitch;
4824 source1 += pitch;
4825 }
4826 }
4827 else if(internal.depth == 4)
4828 {
4829 for(int y = 0; y < height; y++)
4830 {
4831 for(int x = 0; x < width; x++)
4832 {
4833 float c0 = *(float*)(source0 + 4 * x);
4834 float c1 = *(float*)(source1 + 4 * x);
4835 float c2 = *(float*)(source2 + 4 * x);
4836 float c3 = *(float*)(source3 + 4 * x);
4837
4838 c0 = c0 + c1;
4839 c2 = c2 + c3;
4840 c0 = c0 + c2;
4841 c0 *= 1.0f / 4.0f;
4842
4843 *(float*)(source0 + 4 * x) = c0;
4844 }
4845
4846 source0 += pitch;
4847 source1 += pitch;
4848 source2 += pitch;
4849 source3 += pitch;
4850 }
4851 }
4852 else if(internal.depth == 8)
4853 {
4854 for(int y = 0; y < height; y++)
4855 {
4856 for(int x = 0; x < width; x++)
4857 {
4858 float c0 = *(float*)(source0 + 4 * x);
4859 float c1 = *(float*)(source1 + 4 * x);
4860 float c2 = *(float*)(source2 + 4 * x);
4861 float c3 = *(float*)(source3 + 4 * x);
4862 float c4 = *(float*)(source4 + 4 * x);
4863 float c5 = *(float*)(source5 + 4 * x);
4864 float c6 = *(float*)(source6 + 4 * x);
4865 float c7 = *(float*)(source7 + 4 * x);
4866
4867 c0 = c0 + c1;
4868 c2 = c2 + c3;
4869 c4 = c4 + c5;
4870 c6 = c6 + c7;
4871 c0 = c0 + c2;
4872 c4 = c4 + c6;
4873 c0 = c0 + c4;
4874 c0 *= 1.0f / 8.0f;
4875
4876 *(float*)(source0 + 4 * x) = c0;
4877 }
4878
4879 source0 += pitch;
4880 source1 += pitch;
4881 source2 += pitch;
4882 source3 += pitch;
4883 source4 += pitch;
4884 source5 += pitch;
4885 source6 += pitch;
4886 source7 += pitch;
4887 }
4888 }
4889 else if(internal.depth == 16)
4890 {
4891 for(int y = 0; y < height; y++)
4892 {
4893 for(int x = 0; x < width; x++)
4894 {
4895 float c0 = *(float*)(source0 + 4 * x);
4896 float c1 = *(float*)(source1 + 4 * x);
4897 float c2 = *(float*)(source2 + 4 * x);
4898 float c3 = *(float*)(source3 + 4 * x);
4899 float c4 = *(float*)(source4 + 4 * x);
4900 float c5 = *(float*)(source5 + 4 * x);
4901 float c6 = *(float*)(source6 + 4 * x);
4902 float c7 = *(float*)(source7 + 4 * x);
4903 float c8 = *(float*)(source8 + 4 * x);
4904 float c9 = *(float*)(source9 + 4 * x);
4905 float cA = *(float*)(sourceA + 4 * x);
4906 float cB = *(float*)(sourceB + 4 * x);
4907 float cC = *(float*)(sourceC + 4 * x);
4908 float cD = *(float*)(sourceD + 4 * x);
4909 float cE = *(float*)(sourceE + 4 * x);
4910 float cF = *(float*)(sourceF + 4 * x);
4911
4912 c0 = c0 + c1;
4913 c2 = c2 + c3;
4914 c4 = c4 + c5;
4915 c6 = c6 + c7;
4916 c8 = c8 + c9;
4917 cA = cA + cB;
4918 cC = cC + cD;
4919 cE = cE + cF;
4920 c0 = c0 + c2;
4921 c4 = c4 + c6;
4922 c8 = c8 + cA;
4923 cC = cC + cE;
4924 c0 = c0 + c4;
4925 c8 = c8 + cC;
4926 c0 = c0 + c8;
4927 c0 *= 1.0f / 16.0f;
4928
4929 *(float*)(source0 + 4 * x) = c0;
4930 }
4931
4932 source0 += pitch;
4933 source1 += pitch;
4934 source2 += pitch;
4935 source3 += pitch;
4936 source4 += pitch;
4937 source5 += pitch;
4938 source6 += pitch;
4939 source7 += pitch;
4940 source8 += pitch;
4941 source9 += pitch;
4942 sourceA += pitch;
4943 sourceB += pitch;
4944 sourceC += pitch;
4945 sourceD += pitch;
4946 sourceE += pitch;
4947 sourceF += pitch;
4948 }
4949 }
4950 else ASSERT(false);
4951 }
4952 }
4953 else if(internal.format == FORMAT_G32R32F)
4954 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004955 #if defined(__i386__) || defined(__x86_64__)
4956 if(CPUID::supportsSSE() && (width % 2) == 0)
John Bauman89401822014-05-06 15:04:28 -04004957 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004958 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04004959 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004960 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004961 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004962 for(int x = 0; x < width; x += 2)
4963 {
4964 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4965 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004966
Nicolas Capens47dc8672017-04-25 12:54:39 -04004967 c0 = _mm_add_ps(c0, c1);
4968 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04004969
Nicolas Capens47dc8672017-04-25 12:54:39 -04004970 _mm_store_ps((float*)(source0 + 8 * x), c0);
4971 }
4972
4973 source0 += pitch;
4974 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004975 }
John Bauman89401822014-05-06 15:04:28 -04004976 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004977 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04004978 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004979 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004980 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004981 for(int x = 0; x < width; x += 2)
4982 {
4983 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
4984 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
4985 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
4986 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004987
Nicolas Capens47dc8672017-04-25 12:54:39 -04004988 c0 = _mm_add_ps(c0, c1);
4989 c2 = _mm_add_ps(c2, c3);
4990 c0 = _mm_add_ps(c0, c2);
4991 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04004992
Nicolas Capens47dc8672017-04-25 12:54:39 -04004993 _mm_store_ps((float*)(source0 + 8 * x), c0);
4994 }
4995
4996 source0 += pitch;
4997 source1 += pitch;
4998 source2 += pitch;
4999 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005000 }
John Bauman89401822014-05-06 15:04:28 -04005001 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005002 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04005003 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005004 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005005 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005006 for(int x = 0; x < width; x += 2)
5007 {
5008 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5009 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5010 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5011 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5012 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
5013 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
5014 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
5015 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005016
Nicolas Capens47dc8672017-04-25 12:54:39 -04005017 c0 = _mm_add_ps(c0, c1);
5018 c2 = _mm_add_ps(c2, c3);
5019 c4 = _mm_add_ps(c4, c5);
5020 c6 = _mm_add_ps(c6, c7);
5021 c0 = _mm_add_ps(c0, c2);
5022 c4 = _mm_add_ps(c4, c6);
5023 c0 = _mm_add_ps(c0, c4);
5024 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04005025
Nicolas Capens47dc8672017-04-25 12:54:39 -04005026 _mm_store_ps((float*)(source0 + 8 * x), c0);
5027 }
5028
5029 source0 += pitch;
5030 source1 += pitch;
5031 source2 += pitch;
5032 source3 += pitch;
5033 source4 += pitch;
5034 source5 += pitch;
5035 source6 += pitch;
5036 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005037 }
John Bauman89401822014-05-06 15:04:28 -04005038 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005039 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04005040 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005041 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005042 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005043 for(int x = 0; x < width; x += 2)
5044 {
5045 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5046 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5047 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5048 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5049 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
5050 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
5051 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
5052 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
5053 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x));
5054 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x));
5055 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x));
5056 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x));
5057 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x));
5058 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x));
5059 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x));
5060 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x));
John Bauman89401822014-05-06 15:04:28 -04005061
Nicolas Capens47dc8672017-04-25 12:54:39 -04005062 c0 = _mm_add_ps(c0, c1);
5063 c2 = _mm_add_ps(c2, c3);
5064 c4 = _mm_add_ps(c4, c5);
5065 c6 = _mm_add_ps(c6, c7);
5066 c8 = _mm_add_ps(c8, c9);
5067 cA = _mm_add_ps(cA, cB);
5068 cC = _mm_add_ps(cC, cD);
5069 cE = _mm_add_ps(cE, cF);
5070 c0 = _mm_add_ps(c0, c2);
5071 c4 = _mm_add_ps(c4, c6);
5072 c8 = _mm_add_ps(c8, cA);
5073 cC = _mm_add_ps(cC, cE);
5074 c0 = _mm_add_ps(c0, c4);
5075 c8 = _mm_add_ps(c8, cC);
5076 c0 = _mm_add_ps(c0, c8);
5077 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005078
Nicolas Capens47dc8672017-04-25 12:54:39 -04005079 _mm_store_ps((float*)(source0 + 8 * x), c0);
5080 }
5081
5082 source0 += pitch;
5083 source1 += pitch;
5084 source2 += pitch;
5085 source3 += pitch;
5086 source4 += pitch;
5087 source5 += pitch;
5088 source6 += pitch;
5089 source7 += pitch;
5090 source8 += pitch;
5091 source9 += pitch;
5092 sourceA += pitch;
5093 sourceB += pitch;
5094 sourceC += pitch;
5095 sourceD += pitch;
5096 sourceE += pitch;
5097 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04005098 }
John Bauman89401822014-05-06 15:04:28 -04005099 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005100 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04005101 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005102 else
5103 #endif
John Bauman89401822014-05-06 15:04:28 -04005104 {
5105 if(internal.depth == 2)
5106 {
5107 for(int y = 0; y < height; y++)
5108 {
5109 for(int x = 0; x < 2 * width; x++)
5110 {
5111 float c0 = *(float*)(source0 + 4 * x);
5112 float c1 = *(float*)(source1 + 4 * x);
5113
5114 c0 = c0 + c1;
5115 c0 *= 1.0f / 2.0f;
5116
5117 *(float*)(source0 + 4 * x) = c0;
5118 }
5119
5120 source0 += pitch;
5121 source1 += pitch;
5122 }
5123 }
5124 else if(internal.depth == 4)
5125 {
5126 for(int y = 0; y < height; y++)
5127 {
5128 for(int x = 0; x < 2 * width; x++)
5129 {
5130 float c0 = *(float*)(source0 + 4 * x);
5131 float c1 = *(float*)(source1 + 4 * x);
5132 float c2 = *(float*)(source2 + 4 * x);
5133 float c3 = *(float*)(source3 + 4 * x);
5134
5135 c0 = c0 + c1;
5136 c2 = c2 + c3;
5137 c0 = c0 + c2;
5138 c0 *= 1.0f / 4.0f;
5139
5140 *(float*)(source0 + 4 * x) = c0;
5141 }
5142
5143 source0 += pitch;
5144 source1 += pitch;
5145 source2 += pitch;
5146 source3 += pitch;
5147 }
5148 }
5149 else if(internal.depth == 8)
5150 {
5151 for(int y = 0; y < height; y++)
5152 {
5153 for(int x = 0; x < 2 * width; x++)
5154 {
5155 float c0 = *(float*)(source0 + 4 * x);
5156 float c1 = *(float*)(source1 + 4 * x);
5157 float c2 = *(float*)(source2 + 4 * x);
5158 float c3 = *(float*)(source3 + 4 * x);
5159 float c4 = *(float*)(source4 + 4 * x);
5160 float c5 = *(float*)(source5 + 4 * x);
5161 float c6 = *(float*)(source6 + 4 * x);
5162 float c7 = *(float*)(source7 + 4 * x);
5163
5164 c0 = c0 + c1;
5165 c2 = c2 + c3;
5166 c4 = c4 + c5;
5167 c6 = c6 + c7;
5168 c0 = c0 + c2;
5169 c4 = c4 + c6;
5170 c0 = c0 + c4;
5171 c0 *= 1.0f / 8.0f;
5172
5173 *(float*)(source0 + 4 * x) = c0;
5174 }
5175
5176 source0 += pitch;
5177 source1 += pitch;
5178 source2 += pitch;
5179 source3 += pitch;
5180 source4 += pitch;
5181 source5 += pitch;
5182 source6 += pitch;
5183 source7 += pitch;
5184 }
5185 }
5186 else if(internal.depth == 16)
5187 {
5188 for(int y = 0; y < height; y++)
5189 {
5190 for(int x = 0; x < 2 * width; x++)
5191 {
5192 float c0 = *(float*)(source0 + 4 * x);
5193 float c1 = *(float*)(source1 + 4 * x);
5194 float c2 = *(float*)(source2 + 4 * x);
5195 float c3 = *(float*)(source3 + 4 * x);
5196 float c4 = *(float*)(source4 + 4 * x);
5197 float c5 = *(float*)(source5 + 4 * x);
5198 float c6 = *(float*)(source6 + 4 * x);
5199 float c7 = *(float*)(source7 + 4 * x);
5200 float c8 = *(float*)(source8 + 4 * x);
5201 float c9 = *(float*)(source9 + 4 * x);
5202 float cA = *(float*)(sourceA + 4 * x);
5203 float cB = *(float*)(sourceB + 4 * x);
5204 float cC = *(float*)(sourceC + 4 * x);
5205 float cD = *(float*)(sourceD + 4 * x);
5206 float cE = *(float*)(sourceE + 4 * x);
5207 float cF = *(float*)(sourceF + 4 * x);
5208
5209 c0 = c0 + c1;
5210 c2 = c2 + c3;
5211 c4 = c4 + c5;
5212 c6 = c6 + c7;
5213 c8 = c8 + c9;
5214 cA = cA + cB;
5215 cC = cC + cD;
5216 cE = cE + cF;
5217 c0 = c0 + c2;
5218 c4 = c4 + c6;
5219 c8 = c8 + cA;
5220 cC = cC + cE;
5221 c0 = c0 + c4;
5222 c8 = c8 + cC;
5223 c0 = c0 + c8;
5224 c0 *= 1.0f / 16.0f;
5225
5226 *(float*)(source0 + 4 * x) = c0;
5227 }
5228
5229 source0 += pitch;
5230 source1 += pitch;
5231 source2 += pitch;
5232 source3 += pitch;
5233 source4 += pitch;
5234 source5 += pitch;
5235 source6 += pitch;
5236 source7 += pitch;
5237 source8 += pitch;
5238 source9 += pitch;
5239 sourceA += pitch;
5240 sourceB += pitch;
5241 sourceC += pitch;
5242 sourceD += pitch;
5243 sourceE += pitch;
5244 sourceF += pitch;
5245 }
5246 }
5247 else ASSERT(false);
5248 }
5249 }
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04005250 else if(internal.format == FORMAT_A32B32G32R32F || internal.format == FORMAT_X32B32G32R32F)
John Bauman89401822014-05-06 15:04:28 -04005251 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005252 #if defined(__i386__) || defined(__x86_64__)
5253 if(CPUID::supportsSSE())
John Bauman89401822014-05-06 15:04:28 -04005254 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005255 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04005256 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005257 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005258 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005259 for(int x = 0; x < width; x++)
5260 {
5261 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5262 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005263
Nicolas Capens47dc8672017-04-25 12:54:39 -04005264 c0 = _mm_add_ps(c0, c1);
5265 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04005266
Nicolas Capens47dc8672017-04-25 12:54:39 -04005267 _mm_store_ps((float*)(source0 + 16 * x), c0);
5268 }
5269
5270 source0 += pitch;
5271 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005272 }
John Bauman89401822014-05-06 15:04:28 -04005273 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005274 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04005275 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005276 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005277 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005278 for(int x = 0; x < width; x++)
5279 {
5280 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5281 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5282 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5283 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005284
Nicolas Capens47dc8672017-04-25 12:54:39 -04005285 c0 = _mm_add_ps(c0, c1);
5286 c2 = _mm_add_ps(c2, c3);
5287 c0 = _mm_add_ps(c0, c2);
5288 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04005289
Nicolas Capens47dc8672017-04-25 12:54:39 -04005290 _mm_store_ps((float*)(source0 + 16 * x), c0);
5291 }
5292
5293 source0 += pitch;
5294 source1 += pitch;
5295 source2 += pitch;
5296 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005297 }
John Bauman89401822014-05-06 15:04:28 -04005298 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005299 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04005300 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005301 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005302 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005303 for(int x = 0; x < width; x++)
5304 {
5305 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5306 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5307 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5308 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5309 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5310 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5311 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5312 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005313
Nicolas Capens47dc8672017-04-25 12:54:39 -04005314 c0 = _mm_add_ps(c0, c1);
5315 c2 = _mm_add_ps(c2, c3);
5316 c4 = _mm_add_ps(c4, c5);
5317 c6 = _mm_add_ps(c6, c7);
5318 c0 = _mm_add_ps(c0, c2);
5319 c4 = _mm_add_ps(c4, c6);
5320 c0 = _mm_add_ps(c0, c4);
5321 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04005322
Nicolas Capens47dc8672017-04-25 12:54:39 -04005323 _mm_store_ps((float*)(source0 + 16 * x), c0);
5324 }
5325
5326 source0 += pitch;
5327 source1 += pitch;
5328 source2 += pitch;
5329 source3 += pitch;
5330 source4 += pitch;
5331 source5 += pitch;
5332 source6 += pitch;
5333 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005334 }
John Bauman89401822014-05-06 15:04:28 -04005335 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005336 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04005337 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005338 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005339 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005340 for(int x = 0; x < width; x++)
5341 {
5342 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5343 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5344 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5345 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5346 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5347 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5348 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5349 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
5350 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x));
5351 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x));
5352 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x));
5353 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x));
5354 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x));
5355 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x));
5356 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x));
5357 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x));
John Bauman89401822014-05-06 15:04:28 -04005358
Nicolas Capens47dc8672017-04-25 12:54:39 -04005359 c0 = _mm_add_ps(c0, c1);
5360 c2 = _mm_add_ps(c2, c3);
5361 c4 = _mm_add_ps(c4, c5);
5362 c6 = _mm_add_ps(c6, c7);
5363 c8 = _mm_add_ps(c8, c9);
5364 cA = _mm_add_ps(cA, cB);
5365 cC = _mm_add_ps(cC, cD);
5366 cE = _mm_add_ps(cE, cF);
5367 c0 = _mm_add_ps(c0, c2);
5368 c4 = _mm_add_ps(c4, c6);
5369 c8 = _mm_add_ps(c8, cA);
5370 cC = _mm_add_ps(cC, cE);
5371 c0 = _mm_add_ps(c0, c4);
5372 c8 = _mm_add_ps(c8, cC);
5373 c0 = _mm_add_ps(c0, c8);
5374 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005375
Nicolas Capens47dc8672017-04-25 12:54:39 -04005376 _mm_store_ps((float*)(source0 + 16 * x), c0);
5377 }
5378
5379 source0 += pitch;
5380 source1 += pitch;
5381 source2 += pitch;
5382 source3 += pitch;
5383 source4 += pitch;
5384 source5 += pitch;
5385 source6 += pitch;
5386 source7 += pitch;
5387 source8 += pitch;
5388 source9 += pitch;
5389 sourceA += pitch;
5390 sourceB += pitch;
5391 sourceC += pitch;
5392 sourceD += pitch;
5393 sourceE += pitch;
5394 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04005395 }
John Bauman89401822014-05-06 15:04:28 -04005396 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005397 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04005398 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005399 else
5400 #endif
John Bauman89401822014-05-06 15:04:28 -04005401 {
5402 if(internal.depth == 2)
5403 {
5404 for(int y = 0; y < height; y++)
5405 {
5406 for(int x = 0; x < 4 * width; x++)
5407 {
5408 float c0 = *(float*)(source0 + 4 * x);
5409 float c1 = *(float*)(source1 + 4 * x);
5410
5411 c0 = c0 + c1;
5412 c0 *= 1.0f / 2.0f;
5413
5414 *(float*)(source0 + 4 * x) = c0;
5415 }
5416
5417 source0 += pitch;
5418 source1 += pitch;
5419 }
5420 }
5421 else if(internal.depth == 4)
5422 {
5423 for(int y = 0; y < height; y++)
5424 {
5425 for(int x = 0; x < 4 * width; x++)
5426 {
5427 float c0 = *(float*)(source0 + 4 * x);
5428 float c1 = *(float*)(source1 + 4 * x);
5429 float c2 = *(float*)(source2 + 4 * x);
5430 float c3 = *(float*)(source3 + 4 * x);
5431
5432 c0 = c0 + c1;
5433 c2 = c2 + c3;
5434 c0 = c0 + c2;
5435 c0 *= 1.0f / 4.0f;
5436
5437 *(float*)(source0 + 4 * x) = c0;
5438 }
5439
5440 source0 += pitch;
5441 source1 += pitch;
5442 source2 += pitch;
5443 source3 += pitch;
5444 }
5445 }
5446 else if(internal.depth == 8)
5447 {
5448 for(int y = 0; y < height; y++)
5449 {
5450 for(int x = 0; x < 4 * width; x++)
5451 {
5452 float c0 = *(float*)(source0 + 4 * x);
5453 float c1 = *(float*)(source1 + 4 * x);
5454 float c2 = *(float*)(source2 + 4 * x);
5455 float c3 = *(float*)(source3 + 4 * x);
5456 float c4 = *(float*)(source4 + 4 * x);
5457 float c5 = *(float*)(source5 + 4 * x);
5458 float c6 = *(float*)(source6 + 4 * x);
5459 float c7 = *(float*)(source7 + 4 * x);
5460
5461 c0 = c0 + c1;
5462 c2 = c2 + c3;
5463 c4 = c4 + c5;
5464 c6 = c6 + c7;
5465 c0 = c0 + c2;
5466 c4 = c4 + c6;
5467 c0 = c0 + c4;
5468 c0 *= 1.0f / 8.0f;
5469
5470 *(float*)(source0 + 4 * x) = c0;
5471 }
5472
5473 source0 += pitch;
5474 source1 += pitch;
5475 source2 += pitch;
5476 source3 += pitch;
5477 source4 += pitch;
5478 source5 += pitch;
5479 source6 += pitch;
5480 source7 += pitch;
5481 }
5482 }
5483 else if(internal.depth == 16)
5484 {
5485 for(int y = 0; y < height; y++)
5486 {
5487 for(int x = 0; x < 4 * width; x++)
5488 {
5489 float c0 = *(float*)(source0 + 4 * x);
5490 float c1 = *(float*)(source1 + 4 * x);
5491 float c2 = *(float*)(source2 + 4 * x);
5492 float c3 = *(float*)(source3 + 4 * x);
5493 float c4 = *(float*)(source4 + 4 * x);
5494 float c5 = *(float*)(source5 + 4 * x);
5495 float c6 = *(float*)(source6 + 4 * x);
5496 float c7 = *(float*)(source7 + 4 * x);
5497 float c8 = *(float*)(source8 + 4 * x);
5498 float c9 = *(float*)(source9 + 4 * x);
5499 float cA = *(float*)(sourceA + 4 * x);
5500 float cB = *(float*)(sourceB + 4 * x);
5501 float cC = *(float*)(sourceC + 4 * x);
5502 float cD = *(float*)(sourceD + 4 * x);
5503 float cE = *(float*)(sourceE + 4 * x);
5504 float cF = *(float*)(sourceF + 4 * x);
5505
5506 c0 = c0 + c1;
5507 c2 = c2 + c3;
5508 c4 = c4 + c5;
5509 c6 = c6 + c7;
5510 c8 = c8 + c9;
5511 cA = cA + cB;
5512 cC = cC + cD;
5513 cE = cE + cF;
5514 c0 = c0 + c2;
5515 c4 = c4 + c6;
5516 c8 = c8 + cA;
5517 cC = cC + cE;
5518 c0 = c0 + c4;
5519 c8 = c8 + cC;
5520 c0 = c0 + c8;
5521 c0 *= 1.0f / 16.0f;
5522
5523 *(float*)(source0 + 4 * x) = c0;
5524 }
5525
5526 source0 += pitch;
5527 source1 += pitch;
5528 source2 += pitch;
5529 source3 += pitch;
5530 source4 += pitch;
5531 source5 += pitch;
5532 source6 += pitch;
5533 source7 += pitch;
5534 source8 += pitch;
5535 source9 += pitch;
5536 sourceA += pitch;
5537 sourceB += pitch;
5538 sourceC += pitch;
5539 sourceD += pitch;
5540 sourceE += pitch;
5541 sourceF += pitch;
5542 }
5543 }
5544 else ASSERT(false);
5545 }
5546 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005547 else if(internal.format == FORMAT_R5G6B5)
5548 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005549 #if defined(__i386__) || defined(__x86_64__)
5550 if(CPUID::supportsSSE2() && (width % 8) == 0)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005551 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005552 if(internal.depth == 2)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005553 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005554 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005555 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005556 for(int x = 0; x < width; x += 8)
5557 {
5558 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5559 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005560
Nicolas Capens47dc8672017-04-25 12:54:39 -04005561 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5562 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5563 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5564 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5565 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5566 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005567
Nicolas Capens47dc8672017-04-25 12:54:39 -04005568 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5569 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5570 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5571 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5572 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005573
Nicolas Capens47dc8672017-04-25 12:54:39 -04005574 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5575 }
5576
5577 source0 += pitch;
5578 source1 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005579 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005580 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005581 else if(internal.depth == 4)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005582 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005583 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005584 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005585 for(int x = 0; x < width; x += 8)
5586 {
5587 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5588 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5589 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5590 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005591
Nicolas Capens47dc8672017-04-25 12:54:39 -04005592 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5593 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5594 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5595 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5596 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5597 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5598 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5599 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5600 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5601 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005602
Nicolas Capens47dc8672017-04-25 12:54:39 -04005603 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5604 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5605 c0 = _mm_avg_epu8(c0, c2);
5606 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5607 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5608 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5609 c1 = _mm_avg_epu16(c1, c3);
5610 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5611 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005612
Nicolas Capens47dc8672017-04-25 12:54:39 -04005613 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5614 }
5615
5616 source0 += pitch;
5617 source1 += pitch;
5618 source2 += pitch;
5619 source3 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005620 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005621 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005622 else if(internal.depth == 8)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005623 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005624 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005625 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005626 for(int x = 0; x < width; x += 8)
5627 {
5628 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5629 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5630 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5631 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5632 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5633 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5634 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5635 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005636
Nicolas Capens47dc8672017-04-25 12:54:39 -04005637 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5638 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5639 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5640 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5641 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5642 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5643 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5644 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5645 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5646 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5647 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5648 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5649 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5650 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5651 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5652 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5653 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5654 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005655
Nicolas Capens47dc8672017-04-25 12:54:39 -04005656 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5657 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5658 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5659 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5660 c0 = _mm_avg_epu8(c0, c2);
5661 c4 = _mm_avg_epu8(c4, c6);
5662 c0 = _mm_avg_epu8(c0, c4);
5663 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5664 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5665 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5666 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5667 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5668 c1 = _mm_avg_epu16(c1, c3);
5669 c5 = _mm_avg_epu16(c5, c7);
5670 c1 = _mm_avg_epu16(c1, c5);
5671 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5672 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005673
Nicolas Capens47dc8672017-04-25 12:54:39 -04005674 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5675 }
5676
5677 source0 += pitch;
5678 source1 += pitch;
5679 source2 += pitch;
5680 source3 += pitch;
5681 source4 += pitch;
5682 source5 += pitch;
5683 source6 += pitch;
5684 source7 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005685 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005686 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005687 else if(internal.depth == 16)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005688 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005689 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005690 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005691 for(int x = 0; x < width; x += 8)
5692 {
5693 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5694 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5695 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5696 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5697 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5698 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5699 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5700 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
5701 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x));
5702 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x));
5703 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x));
5704 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x));
5705 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x));
5706 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x));
5707 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x));
5708 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005709
Nicolas Capens47dc8672017-04-25 12:54:39 -04005710 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5711 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5712 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5713 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5714 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5715 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5716 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5717 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5718 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5719 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5720 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5721 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5722 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5723 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5724 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5725 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5726 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5727 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
5728 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b));
5729 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_));
5730 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b));
5731 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_));
5732 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b));
5733 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_));
5734 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b));
5735 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_));
5736 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b));
5737 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_));
5738 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b));
5739 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_));
5740 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b));
5741 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_));
5742 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b));
5743 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005744
Nicolas Capens47dc8672017-04-25 12:54:39 -04005745 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5746 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5747 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5748 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5749 c8 = _mm_avg_epu8(c8_r_b, c9_r_b);
5750 cA = _mm_avg_epu8(cA_r_b, cB_r_b);
5751 cC = _mm_avg_epu8(cC_r_b, cD_r_b);
5752 cE = _mm_avg_epu8(cE_r_b, cF_r_b);
5753 c0 = _mm_avg_epu8(c0, c2);
5754 c4 = _mm_avg_epu8(c4, c6);
5755 c8 = _mm_avg_epu8(c8, cA);
5756 cC = _mm_avg_epu8(cC, cE);
5757 c0 = _mm_avg_epu8(c0, c4);
5758 c8 = _mm_avg_epu8(c8, cC);
5759 c0 = _mm_avg_epu8(c0, c8);
5760 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5761 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5762 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5763 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5764 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5765 c9 = _mm_avg_epu16(c8__g_, c9__g_);
5766 cB = _mm_avg_epu16(cA__g_, cB__g_);
5767 cD = _mm_avg_epu16(cC__g_, cD__g_);
5768 cF = _mm_avg_epu16(cE__g_, cF__g_);
5769 c1 = _mm_avg_epu8(c1, c3);
5770 c5 = _mm_avg_epu8(c5, c7);
5771 c9 = _mm_avg_epu8(c9, cB);
5772 cD = _mm_avg_epu8(cD, cF);
5773 c1 = _mm_avg_epu8(c1, c5);
5774 c9 = _mm_avg_epu8(c9, cD);
5775 c1 = _mm_avg_epu8(c1, c9);
5776 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5777 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005778
Nicolas Capens47dc8672017-04-25 12:54:39 -04005779 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5780 }
5781
5782 source0 += pitch;
5783 source1 += pitch;
5784 source2 += pitch;
5785 source3 += pitch;
5786 source4 += pitch;
5787 source5 += pitch;
5788 source6 += pitch;
5789 source7 += pitch;
5790 source8 += pitch;
5791 source9 += pitch;
5792 sourceA += pitch;
5793 sourceB += pitch;
5794 sourceC += pitch;
5795 sourceD += pitch;
5796 sourceE += pitch;
5797 sourceF += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005798 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005799 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005800 else ASSERT(false);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005801 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005802 else
5803 #endif
Nicolas Capens0e12a922015-09-04 09:18:15 -04005804 {
5805 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821))
5806
5807 if(internal.depth == 2)
5808 {
5809 for(int y = 0; y < height; y++)
5810 {
5811 for(int x = 0; x < width; x++)
5812 {
5813 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5814 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5815
5816 c0 = AVERAGE(c0, c1);
5817
5818 *(unsigned short*)(source0 + 2 * x) = c0;
5819 }
5820
5821 source0 += pitch;
5822 source1 += pitch;
5823 }
5824 }
5825 else if(internal.depth == 4)
5826 {
5827 for(int y = 0; y < height; y++)
5828 {
5829 for(int x = 0; x < width; x++)
5830 {
5831 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5832 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5833 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5834 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5835
5836 c0 = AVERAGE(c0, c1);
5837 c2 = AVERAGE(c2, c3);
5838 c0 = AVERAGE(c0, c2);
5839
5840 *(unsigned short*)(source0 + 2 * x) = c0;
5841 }
5842
5843 source0 += pitch;
5844 source1 += pitch;
5845 source2 += pitch;
5846 source3 += pitch;
5847 }
5848 }
5849 else if(internal.depth == 8)
5850 {
5851 for(int y = 0; y < height; y++)
5852 {
5853 for(int x = 0; x < width; x++)
5854 {
5855 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5856 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5857 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5858 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5859 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
5860 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
5861 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
5862 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
5863
5864 c0 = AVERAGE(c0, c1);
5865 c2 = AVERAGE(c2, c3);
5866 c4 = AVERAGE(c4, c5);
5867 c6 = AVERAGE(c6, c7);
5868 c0 = AVERAGE(c0, c2);
5869 c4 = AVERAGE(c4, c6);
5870 c0 = AVERAGE(c0, c4);
5871
5872 *(unsigned short*)(source0 + 2 * x) = c0;
5873 }
5874
5875 source0 += pitch;
5876 source1 += pitch;
5877 source2 += pitch;
5878 source3 += pitch;
5879 source4 += pitch;
5880 source5 += pitch;
5881 source6 += pitch;
5882 source7 += pitch;
5883 }
5884 }
5885 else if(internal.depth == 16)
5886 {
5887 for(int y = 0; y < height; y++)
5888 {
5889 for(int x = 0; x < width; x++)
5890 {
5891 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
5892 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
5893 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
5894 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
5895 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
5896 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
5897 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
5898 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
5899 unsigned short c8 = *(unsigned short*)(source8 + 2 * x);
5900 unsigned short c9 = *(unsigned short*)(source9 + 2 * x);
5901 unsigned short cA = *(unsigned short*)(sourceA + 2 * x);
5902 unsigned short cB = *(unsigned short*)(sourceB + 2 * x);
5903 unsigned short cC = *(unsigned short*)(sourceC + 2 * x);
5904 unsigned short cD = *(unsigned short*)(sourceD + 2 * x);
5905 unsigned short cE = *(unsigned short*)(sourceE + 2 * x);
5906 unsigned short cF = *(unsigned short*)(sourceF + 2 * x);
5907
5908 c0 = AVERAGE(c0, c1);
5909 c2 = AVERAGE(c2, c3);
5910 c4 = AVERAGE(c4, c5);
5911 c6 = AVERAGE(c6, c7);
5912 c8 = AVERAGE(c8, c9);
5913 cA = AVERAGE(cA, cB);
5914 cC = AVERAGE(cC, cD);
5915 cE = AVERAGE(cE, cF);
5916 c0 = AVERAGE(c0, c2);
5917 c4 = AVERAGE(c4, c6);
5918 c8 = AVERAGE(c8, cA);
5919 cC = AVERAGE(cC, cE);
5920 c0 = AVERAGE(c0, c4);
5921 c8 = AVERAGE(c8, cC);
5922 c0 = AVERAGE(c0, c8);
5923
5924 *(unsigned short*)(source0 + 2 * x) = c0;
5925 }
5926
5927 source0 += pitch;
5928 source1 += pitch;
5929 source2 += pitch;
5930 source3 += pitch;
5931 source4 += pitch;
5932 source5 += pitch;
5933 source6 += pitch;
5934 source7 += pitch;
5935 source8 += pitch;
5936 source9 += pitch;
5937 sourceA += pitch;
5938 sourceB += pitch;
5939 sourceC += pitch;
5940 sourceD += pitch;
5941 sourceE += pitch;
5942 sourceF += pitch;
5943 }
5944 }
5945 else ASSERT(false);
5946
5947 #undef AVERAGE
5948 }
5949 }
John Bauman89401822014-05-06 15:04:28 -04005950 else
5951 {
5952 // UNIMPLEMENTED();
5953 }
5954 }
5955}