blob: b15102d55b8d6b077cd0b57453b7b7ad916e3b72 [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
15#include "Surface.hpp"
16
17#include "Color.hpp"
18#include "Context.hpp"
Alexis Hetu0de50d42015-09-09 13:56:41 -040019#include "ETC_Decoder.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040020#include "Renderer.hpp"
John Bauman89401822014-05-06 15:04:28 -040021#include "Common/Half.hpp"
22#include "Common/Memory.hpp"
23#include "Common/CPUID.hpp"
24#include "Common/Resource.hpp"
25#include "Common/Debug.hpp"
John Bauman19bac1e2014-05-06 15:23:49 -040026#include "Reactor/Reactor.hpp"
John Bauman89401822014-05-06 15:04:28 -040027
Nicolas Capens47dc8672017-04-25 12:54:39 -040028#if defined(__i386__) || defined(__x86_64__)
29 #include <xmmintrin.h>
30 #include <emmintrin.h>
31#endif
John Bauman89401822014-05-06 15:04:28 -040032
33#undef min
34#undef max
35
36namespace sw
37{
38 extern bool quadLayoutEnabled;
39 extern bool complementaryDepthBuffer;
40 extern TranscendentalPrecision logPrecision;
41
42 unsigned int *Surface::palette = 0;
43 unsigned int Surface::paletteID = 0;
44
45 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color)
46 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -050047 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB + z * sliceB;
John Bauman89401822014-05-06 15:04:28 -040048
49 write(element, color);
50 }
51
52 void Surface::Buffer::write(int x, int y, const Color<float> &color)
53 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -050054 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB;
John Bauman89401822014-05-06 15:04:28 -040055
56 write(element, color);
57 }
58
59 inline void Surface::Buffer::write(void *element, const Color<float> &color)
60 {
61 switch(format)
62 {
63 case FORMAT_A8:
64 *(unsigned char*)element = unorm<8>(color.a);
65 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040066 case FORMAT_R8I_SNORM:
67 *(char*)element = snorm<8>(color.r);
68 break;
John Bauman89401822014-05-06 15:04:28 -040069 case FORMAT_R8:
70 *(unsigned char*)element = unorm<8>(color.r);
71 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -040072 case FORMAT_R8I:
73 *(char*)element = scast<8>(color.r);
74 break;
75 case FORMAT_R8UI:
76 *(unsigned char*)element = ucast<8>(color.r);
77 break;
78 case FORMAT_R16I:
79 *(short*)element = scast<16>(color.r);
80 break;
81 case FORMAT_R16UI:
82 *(unsigned short*)element = ucast<16>(color.r);
83 break;
84 case FORMAT_R32I:
85 *(int*)element = static_cast<int>(color.r);
86 break;
87 case FORMAT_R32UI:
88 *(unsigned int*)element = static_cast<unsigned int>(color.r);
89 break;
John Bauman89401822014-05-06 15:04:28 -040090 case FORMAT_R3G3B2:
91 *(unsigned char*)element = (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
92 break;
93 case FORMAT_A8R3G3B2:
94 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0);
95 break;
96 case FORMAT_X4R4G4B4:
97 *(unsigned short*)element = 0xF000 | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
98 break;
99 case FORMAT_A4R4G4B4:
100 *(unsigned short*)element = (unorm<4>(color.a) << 12) | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0);
101 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400102 case FORMAT_R4G4B4A4:
103 *(unsigned short*)element = (unorm<4>(color.r) << 12) | (unorm<4>(color.g) << 8) | (unorm<4>(color.b) << 4) | (unorm<4>(color.a) << 0);
104 break;
John Bauman89401822014-05-06 15:04:28 -0400105 case FORMAT_R5G6B5:
106 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<6>(color.g) << 5) | (unorm<5>(color.b) << 0);
107 break;
108 case FORMAT_A1R5G5B5:
109 *(unsigned short*)element = (unorm<1>(color.a) << 15) | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
110 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400111 case FORMAT_R5G5B5A1:
112 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<5>(color.g) << 6) | (unorm<5>(color.b) << 1) | (unorm<5>(color.a) << 0);
113 break;
John Bauman89401822014-05-06 15:04:28 -0400114 case FORMAT_X1R5G5B5:
115 *(unsigned short*)element = 0x8000 | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0);
116 break;
117 case FORMAT_A8R8G8B8:
118 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
119 break;
120 case FORMAT_X8R8G8B8:
121 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0);
122 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400123 case FORMAT_A8B8G8R8I_SNORM:
124 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(color.a)) << 24) |
125 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
126 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
127 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
128 break;
John Bauman89401822014-05-06 15:04:28 -0400129 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400130 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -0400131 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
132 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400133 case FORMAT_A8B8G8R8I:
134 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(color.a)) << 24) |
135 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
136 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
137 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
138 break;
139 case FORMAT_A8B8G8R8UI:
140 *(unsigned int*)element = (ucast<8>(color.a) << 24) | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
141 break;
142 case FORMAT_X8B8G8R8I_SNORM:
143 *(unsigned int*)element = 0x7F000000 |
144 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) |
145 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) |
146 (static_cast<unsigned int>(snorm<8>(color.r)) << 0);
147 break;
John Bauman89401822014-05-06 15:04:28 -0400148 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400149 case FORMAT_SRGB8_X8:
John Bauman89401822014-05-06 15:04:28 -0400150 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
151 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400152 case FORMAT_X8B8G8R8I:
153 *(unsigned int*)element = 0x7F000000 |
154 (static_cast<unsigned int>(scast<8>(color.b)) << 16) |
155 (static_cast<unsigned int>(scast<8>(color.g)) << 8) |
156 (static_cast<unsigned int>(scast<8>(color.r)) << 0);
157 case FORMAT_X8B8G8R8UI:
158 *(unsigned int*)element = 0xFF000000 | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
159 break;
John Bauman89401822014-05-06 15:04:28 -0400160 case FORMAT_A2R10G10B10:
161 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.r) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.b) << 0);
162 break;
163 case FORMAT_A2B10G10R10:
164 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.b) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.r) << 0);
165 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400166 case FORMAT_G8R8I_SNORM:
167 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(color.g)) << 8) |
168 (static_cast<unsigned short>(snorm<8>(color.r)) << 0);
169 break;
John Bauman89401822014-05-06 15:04:28 -0400170 case FORMAT_G8R8:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400171 *(unsigned short*)element = (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0);
172 break;
173 case FORMAT_G8R8I:
174 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(color.g)) << 8) |
175 (static_cast<unsigned short>(scast<8>(color.r)) << 0);
176 break;
177 case FORMAT_G8R8UI:
178 *(unsigned short*)element = (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0);
John Bauman89401822014-05-06 15:04:28 -0400179 break;
180 case FORMAT_G16R16:
181 *(unsigned int*)element = (unorm<16>(color.g) << 16) | (unorm<16>(color.r) << 0);
182 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400183 case FORMAT_G16R16I:
184 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(color.g)) << 16) |
185 (static_cast<unsigned int>(scast<16>(color.r)) << 0);
186 break;
187 case FORMAT_G16R16UI:
188 *(unsigned int*)element = (ucast<16>(color.g) << 16) | (ucast<16>(color.r) << 0);
189 break;
190 case FORMAT_G32R32I:
191 case FORMAT_G32R32UI:
192 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
193 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
194 break;
John Bauman89401822014-05-06 15:04:28 -0400195 case FORMAT_A16B16G16R16:
196 ((unsigned short*)element)[0] = unorm<16>(color.r);
197 ((unsigned short*)element)[1] = unorm<16>(color.g);
198 ((unsigned short*)element)[2] = unorm<16>(color.b);
199 ((unsigned short*)element)[3] = unorm<16>(color.a);
200 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400201 case FORMAT_A16B16G16R16I:
202 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
203 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
204 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
205 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(color.a));
206 break;
207 case FORMAT_A16B16G16R16UI:
208 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
209 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
210 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
211 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(color.a));
212 break;
213 case FORMAT_X16B16G16R16I:
214 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r));
215 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g));
216 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b));
217 break;
218 case FORMAT_X16B16G16R16UI:
219 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r));
220 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g));
221 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b));
222 break;
223 case FORMAT_A32B32G32R32I:
224 case FORMAT_A32B32G32R32UI:
225 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
226 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
227 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
228 ((unsigned int*)element)[3] = static_cast<unsigned int>(color.a);
229 break;
230 case FORMAT_X32B32G32R32I:
231 case FORMAT_X32B32G32R32UI:
232 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r);
233 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g);
234 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b);
235 break;
John Bauman89401822014-05-06 15:04:28 -0400236 case FORMAT_V8U8:
237 *(unsigned short*)element = (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
238 break;
239 case FORMAT_L6V5U5:
240 *(unsigned short*)element = (unorm<6>(color.b) << 10) | (snorm<5>(color.g) << 5) | (snorm<5>(color.r) << 0);
241 break;
242 case FORMAT_Q8W8V8U8:
243 *(unsigned int*)element = (snorm<8>(color.a) << 24) | (snorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
244 break;
245 case FORMAT_X8L8V8U8:
246 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0);
247 break;
248 case FORMAT_V16U16:
249 *(unsigned int*)element = (snorm<16>(color.g) << 16) | (snorm<16>(color.r) << 0);
250 break;
251 case FORMAT_A2W10V10U10:
252 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (snorm<10>(color.b) << 20) | (snorm<10>(color.g) << 10) | (snorm<10>(color.r) << 0);
253 break;
254 case FORMAT_A16W16V16U16:
255 ((unsigned short*)element)[0] = snorm<16>(color.r);
256 ((unsigned short*)element)[1] = snorm<16>(color.g);
257 ((unsigned short*)element)[2] = snorm<16>(color.b);
258 ((unsigned short*)element)[3] = unorm<16>(color.a);
259 break;
260 case FORMAT_Q16W16V16U16:
261 ((unsigned short*)element)[0] = snorm<16>(color.r);
262 ((unsigned short*)element)[1] = snorm<16>(color.g);
263 ((unsigned short*)element)[2] = snorm<16>(color.b);
264 ((unsigned short*)element)[3] = snorm<16>(color.a);
265 break;
266 case FORMAT_R8G8B8:
267 ((unsigned char*)element)[0] = unorm<8>(color.b);
268 ((unsigned char*)element)[1] = unorm<8>(color.g);
269 ((unsigned char*)element)[2] = unorm<8>(color.r);
270 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400271 case FORMAT_B8G8R8:
272 ((unsigned char*)element)[0] = unorm<8>(color.r);
273 ((unsigned char*)element)[1] = unorm<8>(color.g);
274 ((unsigned char*)element)[2] = unorm<8>(color.b);
275 break;
John Bauman89401822014-05-06 15:04:28 -0400276 case FORMAT_R16F:
277 *(half*)element = (half)color.r;
278 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400279 case FORMAT_A16F:
280 *(half*)element = (half)color.a;
281 break;
John Bauman89401822014-05-06 15:04:28 -0400282 case FORMAT_G16R16F:
283 ((half*)element)[0] = (half)color.r;
284 ((half*)element)[1] = (half)color.g;
285 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400286 case FORMAT_B16G16R16F:
287 ((half*)element)[0] = (half)color.r;
288 ((half*)element)[1] = (half)color.g;
289 ((half*)element)[2] = (half)color.b;
290 break;
John Bauman89401822014-05-06 15:04:28 -0400291 case FORMAT_A16B16G16R16F:
292 ((half*)element)[0] = (half)color.r;
293 ((half*)element)[1] = (half)color.g;
294 ((half*)element)[2] = (half)color.b;
295 ((half*)element)[3] = (half)color.a;
296 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400297 case FORMAT_A32F:
298 *(float*)element = color.a;
299 break;
John Bauman89401822014-05-06 15:04:28 -0400300 case FORMAT_R32F:
301 *(float*)element = color.r;
302 break;
303 case FORMAT_G32R32F:
304 ((float*)element)[0] = color.r;
305 ((float*)element)[1] = color.g;
306 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -0400307 case FORMAT_X32B32G32R32F:
308 ((float*)element)[3] = 1.0f;
Nicolas Capens80594422015-06-09 16:42:56 -0400309 case FORMAT_B32G32R32F:
310 ((float*)element)[0] = color.r;
311 ((float*)element)[1] = color.g;
312 ((float*)element)[2] = color.b;
313 break;
John Bauman89401822014-05-06 15:04:28 -0400314 case FORMAT_A32B32G32R32F:
315 ((float*)element)[0] = color.r;
316 ((float*)element)[1] = color.g;
317 ((float*)element)[2] = color.b;
318 ((float*)element)[3] = color.a;
319 break;
320 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500321 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -0400322 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400323 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500324 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -0400325 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400326 *((float*)element) = color.r;
327 break;
328 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500329 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -0400330 *((float*)element) = 1 - color.r;
331 break;
332 case FORMAT_S8:
333 *((unsigned char*)element) = unorm<8>(color.r);
334 break;
335 case FORMAT_L8:
336 *(unsigned char*)element = unorm<8>(color.r);
337 break;
338 case FORMAT_A4L4:
339 *(unsigned char*)element = (unorm<4>(color.a) << 4) | (unorm<4>(color.r) << 0);
340 break;
341 case FORMAT_L16:
342 *(unsigned short*)element = unorm<16>(color.r);
343 break;
344 case FORMAT_A8L8:
345 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<8>(color.r) << 0);
346 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400347 case FORMAT_L16F:
348 *(half*)element = (half)color.r;
349 break;
350 case FORMAT_A16L16F:
351 ((half*)element)[0] = (half)color.r;
352 ((half*)element)[1] = (half)color.a;
353 break;
354 case FORMAT_L32F:
355 *(float*)element = color.r;
356 break;
357 case FORMAT_A32L32F:
358 ((float*)element)[0] = color.r;
359 ((float*)element)[1] = color.a;
360 break;
John Bauman89401822014-05-06 15:04:28 -0400361 default:
362 ASSERT(false);
363 }
364 }
365
366 Color<float> Surface::Buffer::read(int x, int y, int z) const
367 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -0500368 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB + z * sliceB;
John Bauman89401822014-05-06 15:04:28 -0400369
370 return read(element);
371 }
372
373 Color<float> Surface::Buffer::read(int x, int y) const
374 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -0500375 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB;
John Bauman89401822014-05-06 15:04:28 -0400376
377 return read(element);
378 }
379
380 inline Color<float> Surface::Buffer::read(void *element) const
381 {
Nicolas Capens3f439242015-06-09 16:33:50 -0400382 float r = 0.0f;
383 float g = 0.0f;
384 float b = 0.0f;
385 float a = 1.0f;
John Bauman89401822014-05-06 15:04:28 -0400386
387 switch(format)
388 {
389 case FORMAT_P8:
390 {
391 ASSERT(palette);
392
393 unsigned int abgr = palette[*(unsigned char*)element];
Nicolas Capensc39901e2016-03-21 16:37:44 -0400394
John Bauman89401822014-05-06 15:04:28 -0400395 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
396 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
397 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
398 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
399 }
400 break;
401 case FORMAT_A8P8:
402 {
403 ASSERT(palette);
404
405 unsigned int bgr = palette[((unsigned char*)element)[0]];
Nicolas Capensc39901e2016-03-21 16:37:44 -0400406
John Bauman89401822014-05-06 15:04:28 -0400407 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF);
408 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00);
409 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000);
410 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
411 }
412 break;
413 case FORMAT_A8:
414 r = 0;
415 g = 0;
416 b = 0;
417 a = *(unsigned char*)element * (1.0f / 0xFF);
418 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400419 case FORMAT_R8I_SNORM:
420 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f);
421 break;
John Bauman89401822014-05-06 15:04:28 -0400422 case FORMAT_R8:
423 r = *(unsigned char*)element * (1.0f / 0xFF);
424 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400425 case FORMAT_R8I:
426 r = *(signed char*)element;
427 break;
428 case FORMAT_R8UI:
429 r = *(unsigned char*)element;
430 break;
John Bauman89401822014-05-06 15:04:28 -0400431 case FORMAT_R3G3B2:
432 {
433 unsigned char rgb = *(unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400434
John Bauman89401822014-05-06 15:04:28 -0400435 r = (rgb & 0xE0) * (1.0f / 0xE0);
436 g = (rgb & 0x1C) * (1.0f / 0x1C);
437 b = (rgb & 0x03) * (1.0f / 0x03);
438 }
439 break;
440 case FORMAT_A8R3G3B2:
441 {
442 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400443
John Bauman89401822014-05-06 15:04:28 -0400444 a = (argb & 0xFF00) * (1.0f / 0xFF00);
445 r = (argb & 0x00E0) * (1.0f / 0x00E0);
446 g = (argb & 0x001C) * (1.0f / 0x001C);
447 b = (argb & 0x0003) * (1.0f / 0x0003);
448 }
449 break;
450 case FORMAT_X4R4G4B4:
451 {
452 unsigned short rgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400453
John Bauman89401822014-05-06 15:04:28 -0400454 r = (rgb & 0x0F00) * (1.0f / 0x0F00);
455 g = (rgb & 0x00F0) * (1.0f / 0x00F0);
456 b = (rgb & 0x000F) * (1.0f / 0x000F);
457 }
458 break;
459 case FORMAT_A4R4G4B4:
460 {
461 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400462
John Bauman89401822014-05-06 15:04:28 -0400463 a = (argb & 0xF000) * (1.0f / 0xF000);
464 r = (argb & 0x0F00) * (1.0f / 0x0F00);
465 g = (argb & 0x00F0) * (1.0f / 0x00F0);
466 b = (argb & 0x000F) * (1.0f / 0x000F);
467 }
468 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400469 case FORMAT_R4G4B4A4:
470 {
471 unsigned short rgba = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400472
Nicolas Capens80594422015-06-09 16:42:56 -0400473 r = (rgba & 0xF000) * (1.0f / 0xF000);
474 g = (rgba & 0x0F00) * (1.0f / 0x0F00);
475 b = (rgba & 0x00F0) * (1.0f / 0x00F0);
476 a = (rgba & 0x000F) * (1.0f / 0x000F);
477 }
478 break;
John Bauman89401822014-05-06 15:04:28 -0400479 case FORMAT_R5G6B5:
480 {
481 unsigned short rgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400482
John Bauman89401822014-05-06 15:04:28 -0400483 r = (rgb & 0xF800) * (1.0f / 0xF800);
484 g = (rgb & 0x07E0) * (1.0f / 0x07E0);
485 b = (rgb & 0x001F) * (1.0f / 0x001F);
486 }
487 break;
488 case FORMAT_A1R5G5B5:
489 {
490 unsigned short argb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400491
John Bauman89401822014-05-06 15:04:28 -0400492 a = (argb & 0x8000) * (1.0f / 0x8000);
493 r = (argb & 0x7C00) * (1.0f / 0x7C00);
494 g = (argb & 0x03E0) * (1.0f / 0x03E0);
495 b = (argb & 0x001F) * (1.0f / 0x001F);
496 }
497 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400498 case FORMAT_R5G5B5A1:
499 {
500 unsigned short rgba = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400501
Nicolas Capens80594422015-06-09 16:42:56 -0400502 r = (rgba & 0xF800) * (1.0f / 0xF800);
503 g = (rgba & 0x07C0) * (1.0f / 0x07C0);
504 b = (rgba & 0x003E) * (1.0f / 0x003E);
505 a = (rgba & 0x0001) * (1.0f / 0x0001);
506 }
507 break;
John Bauman89401822014-05-06 15:04:28 -0400508 case FORMAT_X1R5G5B5:
509 {
510 unsigned short xrgb = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400511
John Bauman89401822014-05-06 15:04:28 -0400512 r = (xrgb & 0x7C00) * (1.0f / 0x7C00);
513 g = (xrgb & 0x03E0) * (1.0f / 0x03E0);
514 b = (xrgb & 0x001F) * (1.0f / 0x001F);
515 }
516 break;
517 case FORMAT_A8R8G8B8:
518 {
519 unsigned int argb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400520
John Bauman89401822014-05-06 15:04:28 -0400521 a = (argb & 0xFF000000) * (1.0f / 0xFF000000);
522 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000);
523 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00);
524 b = (argb & 0x000000FF) * (1.0f / 0x000000FF);
525 }
526 break;
527 case FORMAT_X8R8G8B8:
528 {
529 unsigned int xrgb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400530
John Bauman89401822014-05-06 15:04:28 -0400531 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000);
532 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00);
533 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF);
534 }
535 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400536 case FORMAT_A8B8G8R8I_SNORM:
537 {
538 signed char* abgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400539
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400540 r = max(abgr[0] * (1.0f / 0x7F), -1.0f);
541 g = max(abgr[1] * (1.0f / 0x7F), -1.0f);
542 b = max(abgr[2] * (1.0f / 0x7F), -1.0f);
543 a = max(abgr[3] * (1.0f / 0x7F), -1.0f);
544 }
545 break;
John Bauman89401822014-05-06 15:04:28 -0400546 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400547 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -0400548 {
549 unsigned int abgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400550
John Bauman89401822014-05-06 15:04:28 -0400551 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000);
552 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000);
553 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00);
554 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF);
555 }
556 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400557 case FORMAT_A8B8G8R8I:
558 {
559 signed char* abgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400560
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400561 r = abgr[0];
562 g = abgr[1];
563 b = abgr[2];
564 a = abgr[3];
565 }
566 break;
567 case FORMAT_A8B8G8R8UI:
568 {
569 unsigned char* abgr = (unsigned char*)element;
570
571 r = abgr[0];
572 g = abgr[1];
573 b = abgr[2];
574 a = abgr[3];
575 }
576 break;
577 case FORMAT_X8B8G8R8I_SNORM:
578 {
579 signed char* bgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400580
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400581 r = max(bgr[0] * (1.0f / 0x7F), -1.0f);
582 g = max(bgr[1] * (1.0f / 0x7F), -1.0f);
583 b = max(bgr[2] * (1.0f / 0x7F), -1.0f);
584 }
585 break;
John Bauman89401822014-05-06 15:04:28 -0400586 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -0400587 case FORMAT_SRGB8_X8:
John Bauman89401822014-05-06 15:04:28 -0400588 {
589 unsigned int xbgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400590
John Bauman89401822014-05-06 15:04:28 -0400591 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000);
592 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00);
593 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF);
594 }
595 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400596 case FORMAT_X8B8G8R8I:
597 {
598 signed char* bgr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400599
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400600 r = bgr[0];
601 g = bgr[1];
602 b = bgr[2];
603 }
604 break;
605 case FORMAT_X8B8G8R8UI:
606 {
607 unsigned char* bgr = (unsigned char*)element;
608
609 r = bgr[0];
610 g = bgr[1];
611 b = bgr[2];
612 }
613 break;
614 case FORMAT_G8R8I_SNORM:
615 {
616 signed char* gr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400617
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400618 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00);
619 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF);
620 }
621 break;
John Bauman89401822014-05-06 15:04:28 -0400622 case FORMAT_G8R8:
623 {
624 unsigned short gr = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400625
John Bauman89401822014-05-06 15:04:28 -0400626 g = (gr & 0xFF00) * (1.0f / 0xFF00);
627 r = (gr & 0x00FF) * (1.0f / 0x00FF);
628 }
629 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400630 case FORMAT_G8R8I:
631 {
632 signed char* gr = (signed char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400633
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400634 r = gr[0];
635 g = gr[1];
636 }
637 break;
638 case FORMAT_G8R8UI:
639 {
640 unsigned char* gr = (unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400641
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400642 r = gr[0];
643 g = gr[1];
644 }
645 break;
646 case FORMAT_R16I:
647 r = *((short*)element);
648 break;
649 case FORMAT_R16UI:
650 r = *((unsigned short*)element);
651 break;
652 case FORMAT_G16R16I:
653 {
654 short* gr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400655
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400656 r = gr[0];
657 g = gr[1];
658 }
659 break;
John Bauman89401822014-05-06 15:04:28 -0400660 case FORMAT_G16R16:
661 {
662 unsigned int gr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400663
John Bauman89401822014-05-06 15:04:28 -0400664 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000);
665 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF);
666 }
667 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400668 case FORMAT_G16R16UI:
669 {
670 unsigned short* gr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400671
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400672 r = gr[0];
673 g = gr[1];
674 }
675 break;
John Bauman89401822014-05-06 15:04:28 -0400676 case FORMAT_A2R10G10B10:
677 {
678 unsigned int argb = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400679
John Bauman89401822014-05-06 15:04:28 -0400680 a = (argb & 0xC0000000) * (1.0f / 0xC0000000);
681 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000);
682 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00);
683 b = (argb & 0x000003FF) * (1.0f / 0x000003FF);
684 }
685 break;
686 case FORMAT_A2B10G10R10:
687 {
688 unsigned int abgr = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400689
John Bauman89401822014-05-06 15:04:28 -0400690 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000);
691 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000);
692 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00);
693 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF);
694 }
695 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400696 case FORMAT_A16B16G16R16I:
697 {
698 short* abgr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400699
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400700 r = abgr[0];
701 g = abgr[1];
702 b = abgr[2];
703 a = abgr[3];
704 }
705 break;
John Bauman89401822014-05-06 15:04:28 -0400706 case FORMAT_A16B16G16R16:
707 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF);
708 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF);
709 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF);
710 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
711 break;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400712 case FORMAT_A16B16G16R16UI:
713 {
714 unsigned short* abgr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400715
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400716 r = abgr[0];
717 g = abgr[1];
718 b = abgr[2];
719 a = abgr[3];
720 }
721 break;
722 case FORMAT_X16B16G16R16I:
723 {
724 short* bgr = (short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400725
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400726 r = bgr[0];
727 g = bgr[1];
728 b = bgr[2];
729 }
730 break;
731 case FORMAT_X16B16G16R16UI:
732 {
733 unsigned short* bgr = (unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400734
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400735 r = bgr[0];
736 g = bgr[1];
737 b = bgr[2];
738 }
739 break;
740 case FORMAT_A32B32G32R32I:
741 {
742 int* abgr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400743
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400744 r = static_cast<float>(abgr[0]);
745 g = static_cast<float>(abgr[1]);
746 b = static_cast<float>(abgr[2]);
747 a = static_cast<float>(abgr[3]);
748 }
749 break;
750 case FORMAT_A32B32G32R32UI:
751 {
752 unsigned int* abgr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400753
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400754 r = static_cast<float>(abgr[0]);
755 g = static_cast<float>(abgr[1]);
756 b = static_cast<float>(abgr[2]);
757 a = static_cast<float>(abgr[3]);
758 }
759 break;
760 case FORMAT_X32B32G32R32I:
761 {
762 int* bgr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400763
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400764 r = static_cast<float>(bgr[0]);
765 g = static_cast<float>(bgr[1]);
766 b = static_cast<float>(bgr[2]);
767 }
768 break;
769 case FORMAT_X32B32G32R32UI:
770 {
771 unsigned int* bgr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400772
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400773 r = static_cast<float>(bgr[0]);
774 g = static_cast<float>(bgr[1]);
775 b = static_cast<float>(bgr[2]);
776 }
777 break;
778 case FORMAT_G32R32I:
779 {
780 int* gr = (int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400781
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400782 r = static_cast<float>(gr[0]);
783 g = static_cast<float>(gr[1]);
784 }
785 break;
786 case FORMAT_G32R32UI:
787 {
788 unsigned int* gr = (unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400789
Alexis Hetud3a2d3d2015-10-22 10:57:58 -0400790 r = static_cast<float>(gr[0]);
791 g = static_cast<float>(gr[1]);
792 }
793 break;
794 case FORMAT_R32I:
795 r = static_cast<float>(*((int*)element));
796 break;
797 case FORMAT_R32UI:
798 r = static_cast<float>(*((unsigned int*)element));
799 break;
John Bauman89401822014-05-06 15:04:28 -0400800 case FORMAT_V8U8:
801 {
802 unsigned short vu = *(unsigned short*)element;
803
804 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000);
805 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000);
806 }
807 break;
808 case FORMAT_L6V5U5:
809 {
810 unsigned short lvu = *(unsigned short*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400811
John Bauman89401822014-05-06 15:04:28 -0400812 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000);
813 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000);
814 b = (lvu & 0xFC00) * (1.0f / 0xFC00);
815 }
816 break;
817 case FORMAT_Q8W8V8U8:
818 {
819 unsigned int qwvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400820
John Bauman89401822014-05-06 15:04:28 -0400821 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
822 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
823 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000);
824 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000);
825 }
826 break;
827 case FORMAT_X8L8V8U8:
828 {
829 unsigned int xlvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400830
John Bauman89401822014-05-06 15:04:28 -0400831 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000);
832 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000);
833 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000);
834 }
835 break;
836 case FORMAT_R8G8B8:
837 r = ((unsigned char*)element)[2] * (1.0f / 0xFF);
838 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
839 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
840 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400841 case FORMAT_B8G8R8:
842 r = ((unsigned char*)element)[0] * (1.0f / 0xFF);
843 g = ((unsigned char*)element)[1] * (1.0f / 0xFF);
844 b = ((unsigned char*)element)[2] * (1.0f / 0xFF);
845 break;
John Bauman89401822014-05-06 15:04:28 -0400846 case FORMAT_V16U16:
847 {
848 unsigned int vu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400849
John Bauman89401822014-05-06 15:04:28 -0400850 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000);
851 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000);
852 }
853 break;
854 case FORMAT_A2W10V10U10:
855 {
856 unsigned int awvu = *(unsigned int*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400857
John Bauman89401822014-05-06 15:04:28 -0400858 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000);
859 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000);
860 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000);
861 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000);
862 }
863 break;
864 case FORMAT_A16W16V16U16:
865 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
866 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
867 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
868 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF);
869 break;
870 case FORMAT_Q16W16V16U16:
871 r = ((signed short*)element)[0] * (1.0f / 0x7FFF);
872 g = ((signed short*)element)[1] * (1.0f / 0x7FFF);
873 b = ((signed short*)element)[2] * (1.0f / 0x7FFF);
874 a = ((signed short*)element)[3] * (1.0f / 0x7FFF);
875 break;
876 case FORMAT_L8:
877 r =
878 g =
879 b = *(unsigned char*)element * (1.0f / 0xFF);
880 break;
881 case FORMAT_A4L4:
882 {
883 unsigned char al = *(unsigned char*)element;
Nicolas Capensc39901e2016-03-21 16:37:44 -0400884
John Bauman89401822014-05-06 15:04:28 -0400885 r =
886 g =
887 b = (al & 0x0F) * (1.0f / 0x0F);
888 a = (al & 0xF0) * (1.0f / 0xF0);
889 }
890 break;
891 case FORMAT_L16:
892 r =
893 g =
894 b = *(unsigned short*)element * (1.0f / 0xFFFF);
895 break;
896 case FORMAT_A8L8:
897 r =
898 g =
899 b = ((unsigned char*)element)[0] * (1.0f / 0xFF);
900 a = ((unsigned char*)element)[1] * (1.0f / 0xFF);
901 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400902 case FORMAT_L16F:
903 r =
904 g =
905 b = *(half*)element;
906 break;
907 case FORMAT_A16L16F:
908 r =
909 g =
910 b = ((half*)element)[0];
911 a = ((half*)element)[1];
912 break;
913 case FORMAT_L32F:
914 r =
915 g =
916 b = *(float*)element;
917 break;
918 case FORMAT_A32L32F:
919 r =
920 g =
921 b = ((float*)element)[0];
922 a = ((float*)element)[1];
923 break;
924 case FORMAT_A16F:
925 a = *(half*)element;
926 break;
John Bauman89401822014-05-06 15:04:28 -0400927 case FORMAT_R16F:
928 r = *(half*)element;
929 break;
930 case FORMAT_G16R16F:
931 r = ((half*)element)[0];
932 g = ((half*)element)[1];
933 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400934 case FORMAT_B16G16R16F:
935 r = ((half*)element)[0];
936 g = ((half*)element)[1];
937 b = ((half*)element)[2];
938 break;
John Bauman89401822014-05-06 15:04:28 -0400939 case FORMAT_A16B16G16R16F:
940 r = ((half*)element)[0];
941 g = ((half*)element)[1];
942 b = ((half*)element)[2];
943 a = ((half*)element)[3];
944 break;
Nicolas Capens80594422015-06-09 16:42:56 -0400945 case FORMAT_A32F:
946 a = *(float*)element;
947 break;
John Bauman89401822014-05-06 15:04:28 -0400948 case FORMAT_R32F:
949 r = *(float*)element;
950 break;
951 case FORMAT_G32R32F:
952 r = ((float*)element)[0];
953 g = ((float*)element)[1];
954 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -0400955 case FORMAT_X32B32G32R32F:
Nicolas Capens80594422015-06-09 16:42:56 -0400956 case FORMAT_B32G32R32F:
957 r = ((float*)element)[0];
958 g = ((float*)element)[1];
959 b = ((float*)element)[2];
960 break;
John Bauman89401822014-05-06 15:04:28 -0400961 case FORMAT_A32B32G32R32F:
962 r = ((float*)element)[0];
963 g = ((float*)element)[1];
964 b = ((float*)element)[2];
965 a = ((float*)element)[3];
966 break;
967 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500968 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -0400969 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -0400970 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500971 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -0400972 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -0400973 r = *(float*)element;
974 g = r;
975 b = r;
976 a = r;
977 break;
978 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -0500979 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman66b8ab22014-05-06 15:57:45 -0400980 r = 1.0f - *(float*)element;
John Bauman89401822014-05-06 15:04:28 -0400981 g = r;
982 b = r;
983 a = r;
984 break;
985 case FORMAT_S8:
986 r = *(unsigned char*)element * (1.0f / 0xFF);
987 break;
988 default:
989 ASSERT(false);
990 }
991
992 // if(sRGB)
993 // {
994 // r = sRGBtoLinear(r);
995 // g = sRGBtoLinear(g);
996 // b = sRGBtoLinear(b);
997 // }
998
999 return Color<float>(r, g, b, a);
1000 }
1001
1002 Color<float> Surface::Buffer::sample(float x, float y, float z) const
1003 {
1004 x -= 0.5f;
1005 y -= 0.5f;
1006 z -= 0.5f;
1007
1008 int x0 = clamp((int)x, 0, width - 1);
1009 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1010
1011 int y0 = clamp((int)y, 0, height - 1);
1012 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1013
1014 int z0 = clamp((int)z, 0, depth - 1);
1015 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1;
1016
1017 Color<float> c000 = read(x0, y0, z0);
1018 Color<float> c100 = read(x1, y0, z0);
1019 Color<float> c010 = read(x0, y1, z0);
1020 Color<float> c110 = read(x1, y1, z0);
1021 Color<float> c001 = read(x0, y0, z1);
1022 Color<float> c101 = read(x1, y0, z1);
1023 Color<float> c011 = read(x0, y1, z1);
1024 Color<float> c111 = read(x1, y1, z1);
1025
1026 float fx = x - x0;
1027 float fy = y - y0;
1028 float fz = z - z0;
1029
1030 c000 *= (1 - fx) * (1 - fy) * (1 - fz);
1031 c100 *= fx * (1 - fy) * (1 - fz);
1032 c010 *= (1 - fx) * fy * (1 - fz);
1033 c110 *= fx * fy * (1 - fz);
1034 c001 *= (1 - fx) * (1 - fy) * fz;
1035 c101 *= fx * (1 - fy) * fz;
1036 c011 *= (1 - fx) * fy * fz;
1037 c111 *= fx * fy * fz;
1038
1039 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111;
1040 }
1041
1042 Color<float> Surface::Buffer::sample(float x, float y) const
1043 {
1044 x -= 0.5f;
1045 y -= 0.5f;
1046
1047 int x0 = clamp((int)x, 0, width - 1);
1048 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1;
1049
1050 int y0 = clamp((int)y, 0, height - 1);
1051 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1;
1052
1053 Color<float> c00 = read(x0, y0);
1054 Color<float> c10 = read(x1, y0);
1055 Color<float> c01 = read(x0, y1);
1056 Color<float> c11 = read(x1, y1);
1057
1058 float fx = x - x0;
1059 float fy = y - y0;
1060
1061 c00 *= (1 - fx) * (1 - fy);
1062 c10 *= fx * (1 - fy);
1063 c01 *= (1 - fx) * fy;
1064 c11 *= fx * fy;
1065
1066 return c00 + c10 + c01 + c11;
1067 }
1068
John Bauman19bac1e2014-05-06 15:23:49 -04001069 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock)
John Bauman89401822014-05-06 15:04:28 -04001070 {
1071 this->lock = lock;
1072
1073 switch(lock)
1074 {
1075 case LOCK_UNLOCKED:
1076 case LOCK_READONLY:
1077 break;
1078 case LOCK_WRITEONLY:
1079 case LOCK_READWRITE:
1080 case LOCK_DISCARD:
1081 dirty = true;
1082 break;
1083 default:
1084 ASSERT(false);
1085 }
1086
John Baumand4ae8632014-05-06 16:18:33 -04001087 if(buffer)
John Bauman89401822014-05-06 15:04:28 -04001088 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001089 x += border;
1090 y += border;
1091
John Baumand4ae8632014-05-06 16:18:33 -04001092 switch(format)
1093 {
1094 #if S3TC_SUPPORT
1095 case FORMAT_DXT1:
1096 #endif
1097 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05001098 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001099 case FORMAT_R11_EAC:
1100 case FORMAT_SIGNED_R11_EAC:
1101 case FORMAT_RGB8_ETC2:
1102 case FORMAT_SRGB8_ETC2:
1103 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1104 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Baumand4ae8632014-05-06 16:18:33 -04001105 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB;
Alexis Hetu460e41f2015-09-01 10:58:37 -04001106 case FORMAT_RG11_EAC:
1107 case FORMAT_SIGNED_RG11_EAC:
1108 case FORMAT_RGBA8_ETC2_EAC:
1109 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1110 case FORMAT_RGBA_ASTC_4x4_KHR:
1111 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1112 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1113 case FORMAT_RGBA_ASTC_5x4_KHR:
1114 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1115 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB;
1116 case FORMAT_RGBA_ASTC_5x5_KHR:
1117 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1118 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB;
1119 case FORMAT_RGBA_ASTC_6x5_KHR:
1120 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1121 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB;
1122 case FORMAT_RGBA_ASTC_6x6_KHR:
1123 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1124 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB;
1125 case FORMAT_RGBA_ASTC_8x5_KHR:
1126 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1127 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB;
1128 case FORMAT_RGBA_ASTC_8x6_KHR:
1129 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1130 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB;
1131 case FORMAT_RGBA_ASTC_8x8_KHR:
1132 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1133 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB;
1134 case FORMAT_RGBA_ASTC_10x5_KHR:
1135 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1136 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB;
1137 case FORMAT_RGBA_ASTC_10x6_KHR:
1138 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1139 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB;
1140 case FORMAT_RGBA_ASTC_10x8_KHR:
1141 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1142 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB;
1143 case FORMAT_RGBA_ASTC_10x10_KHR:
1144 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1145 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB;
1146 case FORMAT_RGBA_ASTC_12x10_KHR:
1147 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1148 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB;
1149 case FORMAT_RGBA_ASTC_12x12_KHR:
1150 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1151 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB;
John Baumand4ae8632014-05-06 16:18:33 -04001152 #if S3TC_SUPPORT
1153 case FORMAT_DXT3:
1154 case FORMAT_DXT5:
1155 #endif
1156 case FORMAT_ATI2:
1157 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB;
1158 default:
1159 return (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB;
1160 }
John Bauman89401822014-05-06 15:04:28 -04001161 }
1162
1163 return 0;
1164 }
1165
1166 void Surface::Buffer::unlockRect()
1167 {
1168 lock = LOCK_UNLOCKED;
1169 }
1170
Nicolas Capensf41f0332017-05-30 15:25:50 -04001171 class SurfaceImplementation : public Surface
1172 {
1173 public:
1174 SurfaceImplementation(int width, int height, int depth, Format format, void *pixels, int pitch, int slice)
1175 : Surface(width, height, depth, format, pixels, pitch, slice) {}
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001176 SurfaceImplementation(Resource *texture, int width, int height, int depth, int border, Format format, bool lockable, bool renderTarget, int pitchP = 0)
1177 : Surface(texture, width, height, depth, border, format, lockable, renderTarget, pitchP) {}
Nicolas Capensf41f0332017-05-30 15:25:50 -04001178 ~SurfaceImplementation() override {};
1179
1180 void *lockInternal(int x, int y, int z, Lock lock, Accessor client) override
1181 {
1182 return Surface::lockInternal(x, y, z, lock, client);
1183 }
1184
1185 void unlockInternal() override
1186 {
1187 Surface::unlockInternal();
1188 }
1189 };
1190
1191 Surface *Surface::create(int width, int height, int depth, Format format, void *pixels, int pitch, int slice)
1192 {
1193 return new SurfaceImplementation(width, height, depth, format, pixels, pitch, slice);
1194 }
1195
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001196 Surface *Surface::create(Resource *texture, int width, int height, int depth, int border, Format format, bool lockable, bool renderTarget, int pitchPprovided)
Nicolas Capensf41f0332017-05-30 15:25:50 -04001197 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001198 return new SurfaceImplementation(texture, width, height, depth, border, format, lockable, renderTarget, pitchPprovided);
Nicolas Capensf41f0332017-05-30 15:25:50 -04001199 }
1200
Nicolas Capens477314b2015-06-09 16:47:29 -04001201 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false)
1202 {
1203 resource = new Resource(0);
1204 hasParent = false;
1205 ownExternal = false;
1206 depth = max(1, depth);
1207
1208 external.buffer = pixels;
1209 external.width = width;
1210 external.height = height;
1211 external.depth = depth;
1212 external.format = format;
1213 external.bytes = bytes(external.format);
1214 external.pitchB = pitch;
Nicolas Capens0a8d3d12016-02-12 17:10:58 -05001215 external.pitchP = external.bytes ? pitch / external.bytes : 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001216 external.sliceB = slice;
Nicolas Capens0a8d3d12016-02-12 17:10:58 -05001217 external.sliceP = external.bytes ? slice / external.bytes : 0;
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001218 external.border = 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001219 external.lock = LOCK_UNLOCKED;
1220 external.dirty = true;
1221
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001222 internal.buffer = nullptr;
Nicolas Capens477314b2015-06-09 16:47:29 -04001223 internal.width = width;
1224 internal.height = height;
1225 internal.depth = depth;
1226 internal.format = selectInternalFormat(format);
1227 internal.bytes = bytes(internal.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001228 internal.pitchB = pitchB(internal.width, 0, internal.format, false);
1229 internal.pitchP = pitchP(internal.width, 0, internal.format, false);
1230 internal.sliceB = sliceB(internal.width, internal.height, 0, internal.format, false);
1231 internal.sliceP = sliceP(internal.width, internal.height, 0, internal.format, false);
1232 internal.border = 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001233 internal.lock = LOCK_UNLOCKED;
1234 internal.dirty = false;
1235
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001236 stencil.buffer = nullptr;
Nicolas Capens477314b2015-06-09 16:47:29 -04001237 stencil.width = width;
1238 stencil.height = height;
1239 stencil.depth = depth;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001240 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL;
Nicolas Capens477314b2015-06-09 16:47:29 -04001241 stencil.bytes = bytes(stencil.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001242 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, false);
1243 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, false);
1244 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, false);
1245 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, false);
1246 stencil.border = 0;
Nicolas Capens477314b2015-06-09 16:47:29 -04001247 stencil.lock = LOCK_UNLOCKED;
1248 stencil.dirty = false;
1249
Nicolas Capens73e18c12017-11-28 13:31:35 -05001250 dirtyContents = true;
Nicolas Capens477314b2015-06-09 16:47:29 -04001251 paletteUsed = 0;
1252 }
1253
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001254 Surface::Surface(Resource *texture, int width, int height, int depth, int border, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget)
John Bauman89401822014-05-06 15:04:28 -04001255 {
1256 resource = texture ? texture : new Resource(0);
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001257 hasParent = texture != nullptr;
Nicolas Capens477314b2015-06-09 16:47:29 -04001258 ownExternal = true;
John Bauman89401822014-05-06 15:04:28 -04001259 depth = max(1, depth);
1260
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001261 external.buffer = nullptr;
John Bauman89401822014-05-06 15:04:28 -04001262 external.width = width;
1263 external.height = height;
1264 external.depth = depth;
1265 external.format = format;
1266 external.bytes = bytes(external.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001267 external.pitchB = pitchB(external.width, 0, external.format, renderTarget && !texture);
1268 external.pitchP = pitchP(external.width, 0, external.format, renderTarget && !texture);
1269 external.sliceB = sliceB(external.width, external.height, 0, external.format, renderTarget && !texture);
1270 external.sliceP = sliceP(external.width, external.height, 0, external.format, renderTarget && !texture);
1271 external.border = 0;
John Bauman89401822014-05-06 15:04:28 -04001272 external.lock = LOCK_UNLOCKED;
1273 external.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001274
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001275 internal.buffer = nullptr;
John Bauman89401822014-05-06 15:04:28 -04001276 internal.width = width;
1277 internal.height = height;
1278 internal.depth = depth;
1279 internal.format = selectInternalFormat(format);
1280 internal.bytes = bytes(internal.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001281 internal.pitchB = !pitchPprovided ? pitchB(internal.width, border, internal.format, renderTarget) : pitchPprovided * internal.bytes;
1282 internal.pitchP = !pitchPprovided ? pitchP(internal.width, border, internal.format, renderTarget) : pitchPprovided;
1283 internal.sliceB = sliceB(internal.width, internal.height, border, internal.format, renderTarget);
1284 internal.sliceP = sliceP(internal.width, internal.height, border, internal.format, renderTarget);
1285 internal.border = border;
John Bauman89401822014-05-06 15:04:28 -04001286 internal.lock = LOCK_UNLOCKED;
1287 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001288
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001289 stencil.buffer = nullptr;
John Bauman89401822014-05-06 15:04:28 -04001290 stencil.width = width;
1291 stencil.height = height;
1292 stencil.depth = depth;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001293 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL;
John Bauman89401822014-05-06 15:04:28 -04001294 stencil.bytes = bytes(stencil.format);
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001295 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, renderTarget);
1296 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, renderTarget);
1297 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, renderTarget);
1298 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, renderTarget);
1299 stencil.border = 0;
John Bauman89401822014-05-06 15:04:28 -04001300 stencil.lock = LOCK_UNLOCKED;
1301 stencil.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001302
Nicolas Capens73e18c12017-11-28 13:31:35 -05001303 dirtyContents = true;
John Bauman66b8ab22014-05-06 15:57:45 -04001304 paletteUsed = 0;
John Bauman89401822014-05-06 15:04:28 -04001305 }
1306
1307 Surface::~Surface()
1308 {
Nicolas Capensbf7a8142017-05-19 10:57:28 -04001309 // sync() must be called before this destructor to ensure all locks have been released.
1310 // We can't call it here because the parent resource may already have been destroyed.
Antoine Labourfc2b84d2017-06-09 18:14:05 -07001311 ASSERT(isUnlocked());
John Bauman8a4f6fc2014-05-06 15:26:18 -04001312
John Bauman89401822014-05-06 15:04:28 -04001313 if(!hasParent)
1314 {
1315 resource->destruct();
1316 }
1317
Nicolas Capens477314b2015-06-09 16:47:29 -04001318 if(ownExternal)
1319 {
1320 deallocate(external.buffer);
1321 }
John Bauman89401822014-05-06 15:04:28 -04001322
1323 if(internal.buffer != external.buffer)
1324 {
1325 deallocate(internal.buffer);
1326 }
1327
1328 deallocate(stencil.buffer);
1329
1330 external.buffer = 0;
1331 internal.buffer = 0;
1332 stencil.buffer = 0;
1333 }
1334
John Bauman19bac1e2014-05-06 15:23:49 -04001335 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001336 {
1337 resource->lock(client);
1338
1339 if(!external.buffer)
1340 {
1341 if(internal.buffer && identicalFormats())
1342 {
1343 external.buffer = internal.buffer;
1344 }
1345 else
1346 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001347 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.border, external.format);
John Bauman89401822014-05-06 15:04:28 -04001348 }
1349 }
1350
1351 if(internal.dirty)
1352 {
1353 if(lock != LOCK_DISCARD)
1354 {
1355 update(external, internal);
1356 }
John Bauman66b8ab22014-05-06 15:57:45 -04001357
1358 internal.dirty = false;
John Bauman89401822014-05-06 15:04:28 -04001359 }
1360
1361 switch(lock)
1362 {
1363 case LOCK_READONLY:
1364 break;
1365 case LOCK_WRITEONLY:
1366 case LOCK_READWRITE:
1367 case LOCK_DISCARD:
Nicolas Capens73e18c12017-11-28 13:31:35 -05001368 dirtyContents = true;
John Bauman89401822014-05-06 15:04:28 -04001369 break;
1370 default:
1371 ASSERT(false);
1372 }
1373
John Bauman19bac1e2014-05-06 15:23:49 -04001374 return external.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001375 }
1376
1377 void Surface::unlockExternal()
1378 {
John Bauman89401822014-05-06 15:04:28 -04001379 external.unlockRect();
Antoine Labourfc2b84d2017-06-09 18:14:05 -07001380
1381 resource->unlock();
John Bauman89401822014-05-06 15:04:28 -04001382 }
1383
John Bauman19bac1e2014-05-06 15:23:49 -04001384 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001385 {
1386 if(lock != LOCK_UNLOCKED)
1387 {
1388 resource->lock(client);
1389 }
1390
1391 if(!internal.buffer)
1392 {
1393 if(external.buffer && identicalFormats())
1394 {
1395 internal.buffer = external.buffer;
1396 }
1397 else
1398 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001399 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.border, internal.format);
John Bauman89401822014-05-06 15:04:28 -04001400 }
1401 }
1402
1403 // FIXME: WHQL requires conversion to lower external precision and back
1404 if(logPrecision >= WHQL)
1405 {
1406 if(internal.dirty && renderTarget && internal.format != external.format)
1407 {
1408 if(lock != LOCK_DISCARD)
1409 {
1410 switch(external.format)
1411 {
1412 case FORMAT_R3G3B2:
1413 case FORMAT_A8R3G3B2:
1414 case FORMAT_A1R5G5B5:
1415 case FORMAT_A2R10G10B10:
1416 case FORMAT_A2B10G10R10:
1417 lockExternal(0, 0, 0, LOCK_READWRITE, client);
1418 unlockExternal();
1419 break;
1420 default:
1421 // Difference passes WHQL
1422 break;
1423 }
1424 }
1425 }
1426 }
1427
John Bauman66b8ab22014-05-06 15:57:45 -04001428 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID))
John Bauman89401822014-05-06 15:04:28 -04001429 {
1430 if(lock != LOCK_DISCARD)
1431 {
1432 update(internal, external);
1433 }
John Bauman89401822014-05-06 15:04:28 -04001434
John Bauman66b8ab22014-05-06 15:57:45 -04001435 external.dirty = false;
1436 paletteUsed = Surface::paletteID;
John Bauman89401822014-05-06 15:04:28 -04001437 }
1438
1439 switch(lock)
1440 {
1441 case LOCK_UNLOCKED:
1442 case LOCK_READONLY:
1443 break;
1444 case LOCK_WRITEONLY:
1445 case LOCK_READWRITE:
1446 case LOCK_DISCARD:
Nicolas Capens73e18c12017-11-28 13:31:35 -05001447 dirtyContents = true;
John Bauman89401822014-05-06 15:04:28 -04001448 break;
1449 default:
1450 ASSERT(false);
1451 }
1452
1453 if(lock == LOCK_READONLY && client == PUBLIC)
1454 {
1455 resolve();
1456 }
1457
John Bauman19bac1e2014-05-06 15:23:49 -04001458 return internal.lockRect(x, y, z, lock);
John Bauman89401822014-05-06 15:04:28 -04001459 }
1460
1461 void Surface::unlockInternal()
1462 {
John Bauman89401822014-05-06 15:04:28 -04001463 internal.unlockRect();
Antoine Labourfc2b84d2017-06-09 18:14:05 -07001464
1465 resource->unlock();
John Bauman89401822014-05-06 15:04:28 -04001466 }
1467
Alexis Hetua52dfbd2016-10-05 17:03:30 -04001468 void *Surface::lockStencil(int x, int y, int front, Accessor client)
John Bauman89401822014-05-06 15:04:28 -04001469 {
1470 resource->lock(client);
1471
1472 if(!stencil.buffer)
1473 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001474 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.border, stencil.format);
John Bauman89401822014-05-06 15:04:28 -04001475 }
1476
Alexis Hetua52dfbd2016-10-05 17:03:30 -04001477 return stencil.lockRect(x, y, front, LOCK_READWRITE); // FIXME
John Bauman89401822014-05-06 15:04:28 -04001478 }
1479
1480 void Surface::unlockStencil()
1481 {
John Bauman89401822014-05-06 15:04:28 -04001482 stencil.unlockRect();
Antoine Labourfc2b84d2017-06-09 18:14:05 -07001483
1484 resource->unlock();
John Bauman89401822014-05-06 15:04:28 -04001485 }
1486
1487 int Surface::bytes(Format format)
1488 {
1489 switch(format)
1490 {
1491 case FORMAT_NULL: return 0;
1492 case FORMAT_P8: return 1;
1493 case FORMAT_A8P8: return 2;
1494 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001495 case FORMAT_R8I: return 1;
John Bauman89401822014-05-06 15:04:28 -04001496 case FORMAT_R8: return 1;
1497 case FORMAT_R3G3B2: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001498 case FORMAT_R16I: return 2;
1499 case FORMAT_R16UI: return 2;
John Bauman89401822014-05-06 15:04:28 -04001500 case FORMAT_A8R3G3B2: return 2;
1501 case FORMAT_R5G6B5: return 2;
1502 case FORMAT_A1R5G5B5: return 2;
1503 case FORMAT_X1R5G5B5: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001504 case FORMAT_R5G5B5A1: return 2;
John Bauman89401822014-05-06 15:04:28 -04001505 case FORMAT_X4R4G4B4: return 2;
1506 case FORMAT_A4R4G4B4: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001507 case FORMAT_R4G4B4A4: return 2;
John Bauman89401822014-05-06 15:04:28 -04001508 case FORMAT_R8G8B8: return 3;
Nicolas Capens80594422015-06-09 16:42:56 -04001509 case FORMAT_B8G8R8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001510 case FORMAT_R32I: return 4;
1511 case FORMAT_R32UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001512 case FORMAT_X8R8G8B8: return 4;
1513 // case FORMAT_X8G8R8B8Q: return 4;
1514 case FORMAT_A8R8G8B8: return 4;
1515 // case FORMAT_A8G8R8B8Q: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001516 case FORMAT_X8B8G8R8I: return 4;
John Bauman89401822014-05-06 15:04:28 -04001517 case FORMAT_X8B8G8R8: return 4;
Alexis Hetu049a1872016-04-25 16:59:58 -04001518 case FORMAT_SRGB8_X8: return 4;
1519 case FORMAT_SRGB8_A8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001520 case FORMAT_A8B8G8R8I: return 4;
1521 case FORMAT_R8UI: return 1;
1522 case FORMAT_G8R8UI: return 2;
1523 case FORMAT_X8B8G8R8UI: return 4;
1524 case FORMAT_A8B8G8R8UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001525 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001526 case FORMAT_R8I_SNORM: return 1;
1527 case FORMAT_G8R8I_SNORM: return 2;
1528 case FORMAT_X8B8G8R8I_SNORM: return 4;
1529 case FORMAT_A8B8G8R8I_SNORM: return 4;
John Bauman89401822014-05-06 15:04:28 -04001530 case FORMAT_A2R10G10B10: return 4;
1531 case FORMAT_A2B10G10R10: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001532 case FORMAT_G8R8I: return 2;
John Bauman89401822014-05-06 15:04:28 -04001533 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001534 case FORMAT_G16R16I: return 4;
1535 case FORMAT_G16R16UI: return 4;
John Bauman89401822014-05-06 15:04:28 -04001536 case FORMAT_G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001537 case FORMAT_G32R32I: return 8;
1538 case FORMAT_G32R32UI: return 8;
1539 case FORMAT_X16B16G16R16I: return 8;
1540 case FORMAT_X16B16G16R16UI: return 8;
1541 case FORMAT_A16B16G16R16I: return 8;
1542 case FORMAT_A16B16G16R16UI: return 8;
John Bauman89401822014-05-06 15:04:28 -04001543 case FORMAT_A16B16G16R16: return 8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04001544 case FORMAT_X32B32G32R32I: return 16;
1545 case FORMAT_X32B32G32R32UI: return 16;
1546 case FORMAT_A32B32G32R32I: return 16;
1547 case FORMAT_A32B32G32R32UI: return 16;
John Bauman89401822014-05-06 15:04:28 -04001548 // Compressed formats
1549 #if S3TC_SUPPORT
1550 case FORMAT_DXT1: return 2; // Column of four pixels
1551 case FORMAT_DXT3: return 4; // Column of four pixels
1552 case FORMAT_DXT5: return 4; // Column of four pixels
John Bauman66b8ab22014-05-06 15:57:45 -04001553 #endif
John Bauman89401822014-05-06 15:04:28 -04001554 case FORMAT_ATI1: return 2; // Column of four pixels
1555 case FORMAT_ATI2: return 4; // Column of four pixels
Nicolas Capens22658242014-11-29 00:31:41 -05001556 case FORMAT_ETC1: return 2; // Column of four pixels
Alexis Hetu460e41f2015-09-01 10:58:37 -04001557 case FORMAT_R11_EAC: return 2;
1558 case FORMAT_SIGNED_R11_EAC: return 2;
1559 case FORMAT_RG11_EAC: return 4;
1560 case FORMAT_SIGNED_RG11_EAC: return 4;
1561 case FORMAT_RGB8_ETC2: return 2;
1562 case FORMAT_SRGB8_ETC2: return 2;
1563 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1564 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2;
1565 case FORMAT_RGBA8_ETC2_EAC: return 4;
1566 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4;
1567 case FORMAT_RGBA_ASTC_4x4_KHR:
1568 case FORMAT_RGBA_ASTC_5x4_KHR:
1569 case FORMAT_RGBA_ASTC_5x5_KHR:
1570 case FORMAT_RGBA_ASTC_6x5_KHR:
1571 case FORMAT_RGBA_ASTC_6x6_KHR:
1572 case FORMAT_RGBA_ASTC_8x5_KHR:
1573 case FORMAT_RGBA_ASTC_8x6_KHR:
1574 case FORMAT_RGBA_ASTC_8x8_KHR:
1575 case FORMAT_RGBA_ASTC_10x5_KHR:
1576 case FORMAT_RGBA_ASTC_10x6_KHR:
1577 case FORMAT_RGBA_ASTC_10x8_KHR:
1578 case FORMAT_RGBA_ASTC_10x10_KHR:
1579 case FORMAT_RGBA_ASTC_12x10_KHR:
1580 case FORMAT_RGBA_ASTC_12x12_KHR:
1581 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1582 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1583 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1584 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1585 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1586 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1587 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1588 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1589 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1590 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1591 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1592 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1593 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1594 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME
John Bauman89401822014-05-06 15:04:28 -04001595 // Bumpmap formats
1596 case FORMAT_V8U8: return 2;
1597 case FORMAT_L6V5U5: return 2;
1598 case FORMAT_Q8W8V8U8: return 4;
1599 case FORMAT_X8L8V8U8: return 4;
1600 case FORMAT_A2W10V10U10: return 4;
1601 case FORMAT_V16U16: return 4;
1602 case FORMAT_A16W16V16U16: return 8;
1603 case FORMAT_Q16W16V16U16: return 8;
1604 // Luminance formats
1605 case FORMAT_L8: return 1;
1606 case FORMAT_A4L4: return 1;
1607 case FORMAT_L16: return 2;
1608 case FORMAT_A8L8: return 2;
Nicolas Capens80594422015-06-09 16:42:56 -04001609 case FORMAT_L16F: return 2;
1610 case FORMAT_A16L16F: return 4;
1611 case FORMAT_L32F: return 4;
1612 case FORMAT_A32L32F: return 8;
John Bauman89401822014-05-06 15:04:28 -04001613 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04001614 case FORMAT_A16F: return 2;
John Bauman89401822014-05-06 15:04:28 -04001615 case FORMAT_R16F: return 2;
1616 case FORMAT_G16R16F: return 4;
Nicolas Capens80594422015-06-09 16:42:56 -04001617 case FORMAT_B16G16R16F: return 6;
John Bauman89401822014-05-06 15:04:28 -04001618 case FORMAT_A16B16G16R16F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001619 case FORMAT_A32F: return 4;
John Bauman89401822014-05-06 15:04:28 -04001620 case FORMAT_R32F: return 4;
1621 case FORMAT_G32R32F: return 8;
Nicolas Capens80594422015-06-09 16:42:56 -04001622 case FORMAT_B32G32R32F: return 12;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04001623 case FORMAT_X32B32G32R32F: return 16;
John Bauman89401822014-05-06 15:04:28 -04001624 case FORMAT_A32B32G32R32F: return 16;
1625 // Depth/stencil formats
1626 case FORMAT_D16: return 2;
1627 case FORMAT_D32: return 4;
1628 case FORMAT_D24X8: return 4;
1629 case FORMAT_D24S8: return 4;
1630 case FORMAT_D24FS8: return 4;
1631 case FORMAT_D32F: return 4;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001632 case FORMAT_D32FS8: return 4;
John Bauman89401822014-05-06 15:04:28 -04001633 case FORMAT_D32F_COMPLEMENTARY: return 4;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001634 case FORMAT_D32FS8_COMPLEMENTARY: return 4;
John Bauman89401822014-05-06 15:04:28 -04001635 case FORMAT_D32F_LOCKABLE: return 4;
John Bauman66b8ab22014-05-06 15:57:45 -04001636 case FORMAT_D32FS8_TEXTURE: return 4;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05001637 case FORMAT_D32F_SHADOW: return 4;
John Bauman66b8ab22014-05-06 15:57:45 -04001638 case FORMAT_D32FS8_SHADOW: return 4;
1639 case FORMAT_DF24S8: return 4;
1640 case FORMAT_DF16S8: return 2;
John Bauman89401822014-05-06 15:04:28 -04001641 case FORMAT_INTZ: return 4;
1642 case FORMAT_S8: return 1;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001643 case FORMAT_YV12_BT601: return 1; // Y plane only
1644 case FORMAT_YV12_BT709: return 1; // Y plane only
1645 case FORMAT_YV12_JFIF: return 1; // Y plane only
John Bauman89401822014-05-06 15:04:28 -04001646 default:
1647 ASSERT(false);
1648 }
1649
1650 return 0;
1651 }
1652
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001653 int Surface::pitchB(int width, int border, Format format, bool target)
John Bauman89401822014-05-06 15:04:28 -04001654 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001655 width += 2 * border;
1656
John Bauman89401822014-05-06 15:04:28 -04001657 if(target || isDepth(format) || isStencil(format))
1658 {
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001659 width = align(width, 2);
John Bauman89401822014-05-06 15:04:28 -04001660 }
1661
1662 switch(format)
1663 {
1664 #if S3TC_SUPPORT
1665 case FORMAT_DXT1:
Nicolas Capens22658242014-11-29 00:31:41 -05001666 #endif
1667 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001668 case FORMAT_R11_EAC:
1669 case FORMAT_SIGNED_R11_EAC:
1670 case FORMAT_RGB8_ETC2:
1671 case FORMAT_SRGB8_ETC2:
1672 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1673 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04001674 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001675 case FORMAT_RG11_EAC:
1676 case FORMAT_SIGNED_RG11_EAC:
1677 case FORMAT_RGBA8_ETC2_EAC:
1678 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1679 case FORMAT_RGBA_ASTC_4x4_KHR:
1680 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1681 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
1682 case FORMAT_RGBA_ASTC_5x4_KHR:
1683 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
1684 case FORMAT_RGBA_ASTC_5x5_KHR:
1685 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1686 return 16 * ((width + 4) / 5);
1687 case FORMAT_RGBA_ASTC_6x5_KHR:
1688 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1689 case FORMAT_RGBA_ASTC_6x6_KHR:
1690 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1691 return 16 * ((width + 5) / 6);
1692 case FORMAT_RGBA_ASTC_8x5_KHR:
1693 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1694 case FORMAT_RGBA_ASTC_8x6_KHR:
1695 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1696 case FORMAT_RGBA_ASTC_8x8_KHR:
1697 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1698 return 16 * ((width + 7) / 8);
1699 case FORMAT_RGBA_ASTC_10x5_KHR:
1700 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
1701 case FORMAT_RGBA_ASTC_10x6_KHR:
1702 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
1703 case FORMAT_RGBA_ASTC_10x8_KHR:
1704 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
1705 case FORMAT_RGBA_ASTC_10x10_KHR:
1706 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1707 return 16 * ((width + 9) / 10);
1708 case FORMAT_RGBA_ASTC_12x10_KHR:
1709 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
1710 case FORMAT_RGBA_ASTC_12x12_KHR:
1711 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
1712 return 16 * ((width + 11) / 12);
Nicolas Capens22658242014-11-29 00:31:41 -05001713 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04001714 case FORMAT_DXT3:
1715 case FORMAT_DXT5:
1716 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows
John Bauman66b8ab22014-05-06 15:57:45 -04001717 #endif
John Bauman89401822014-05-06 15:04:28 -04001718 case FORMAT_ATI1:
1719 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row
1720 case FORMAT_ATI2:
1721 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04001722 case FORMAT_YV12_BT601:
1723 case FORMAT_YV12_BT709:
1724 case FORMAT_YV12_JFIF:
1725 return align(width, 16);
John Bauman89401822014-05-06 15:04:28 -04001726 default:
1727 return bytes(format) * width;
1728 }
1729 }
1730
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001731 int Surface::pitchP(int width, int border, Format format, bool target)
John Bauman89401822014-05-06 15:04:28 -04001732 {
1733 int B = bytes(format);
1734
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001735 return B > 0 ? pitchB(width, border, format, target) / B : 0;
John Bauman89401822014-05-06 15:04:28 -04001736 }
1737
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001738 int Surface::sliceB(int width, int height, int border, Format format, bool target)
John Bauman89401822014-05-06 15:04:28 -04001739 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001740 height += 2 * border;
1741
John Bauman89401822014-05-06 15:04:28 -04001742 if(target || isDepth(format) || isStencil(format))
1743 {
1744 height = ((height + 1) & ~1);
1745 }
1746
1747 switch(format)
1748 {
1749 #if S3TC_SUPPORT
1750 case FORMAT_DXT1:
1751 case FORMAT_DXT3:
1752 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04001753 #endif
Nicolas Capens22658242014-11-29 00:31:41 -05001754 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001755 case FORMAT_R11_EAC:
1756 case FORMAT_SIGNED_R11_EAC:
1757 case FORMAT_RG11_EAC:
1758 case FORMAT_SIGNED_RG11_EAC:
1759 case FORMAT_RGB8_ETC2:
1760 case FORMAT_SRGB8_ETC2:
1761 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1762 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
1763 case FORMAT_RGBA8_ETC2_EAC:
1764 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
1765 case FORMAT_RGBA_ASTC_4x4_KHR:
1766 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
1767 case FORMAT_RGBA_ASTC_5x4_KHR:
1768 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001769 return pitchB(width, border, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001770 case FORMAT_RGBA_ASTC_5x5_KHR:
1771 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
1772 case FORMAT_RGBA_ASTC_6x5_KHR:
1773 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
1774 case FORMAT_RGBA_ASTC_8x5_KHR:
1775 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
1776 case FORMAT_RGBA_ASTC_10x5_KHR:
1777 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001778 return pitchB(width, border, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001779 case FORMAT_RGBA_ASTC_6x6_KHR:
1780 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
1781 case FORMAT_RGBA_ASTC_8x6_KHR:
1782 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
1783 case FORMAT_RGBA_ASTC_10x6_KHR:
1784 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001785 return pitchB(width, border, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001786 case FORMAT_RGBA_ASTC_8x8_KHR:
1787 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
1788 case FORMAT_RGBA_ASTC_10x8_KHR:
1789 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001790 return pitchB(width, border, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001791 case FORMAT_RGBA_ASTC_10x10_KHR:
1792 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
1793 case FORMAT_RGBA_ASTC_12x10_KHR:
1794 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001795 return pitchB(width, border, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows
Alexis Hetu460e41f2015-09-01 10:58:37 -04001796 case FORMAT_RGBA_ASTC_12x12_KHR:
1797 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001798 return pitchB(width, border, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows
Nicolas Capens22658242014-11-29 00:31:41 -05001799 case FORMAT_ATI1:
1800 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04001801 default:
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001802 return pitchB(width, border, format, target) * height; // Pitch computed per row
John Bauman89401822014-05-06 15:04:28 -04001803 }
1804 }
1805
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001806 int Surface::sliceP(int width, int height, int border, Format format, bool target)
John Bauman89401822014-05-06 15:04:28 -04001807 {
1808 int B = bytes(format);
1809
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001810 return B > 0 ? sliceB(width, height, border, format, target) / B : 0;
John Bauman89401822014-05-06 15:04:28 -04001811 }
1812
1813 void Surface::update(Buffer &destination, Buffer &source)
1814 {
1815 // ASSERT(source.lock != LOCK_UNLOCKED);
1816 // ASSERT(destination.lock != LOCK_UNLOCKED);
Nicolas Capensc39901e2016-03-21 16:37:44 -04001817
John Bauman89401822014-05-06 15:04:28 -04001818 if(destination.buffer != source.buffer)
1819 {
1820 ASSERT(source.dirty && !destination.dirty);
1821
1822 switch(source.format)
1823 {
1824 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001825 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format
1826 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format
1827 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format
1828 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format
1829 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format
1830 #if S3TC_SUPPORT
1831 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format
1832 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format
1833 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format
Nicolas Capens22658242014-11-29 00:31:41 -05001834 #endif
John Bauman89401822014-05-06 15:04:28 -04001835 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format
1836 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format
Alexis Hetu460e41f2015-09-01 10:58:37 -04001837 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format
1838 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format
1839 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format
1840 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format
Alexis Hetu0de50d42015-09-09 13:56:41 -04001841 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04001842 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format
1843 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format
1844 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format
1845 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format
1846 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format
1847 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format
1848 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format
1849 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format
1850 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format
1851 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format
1852 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format
1853 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format
1854 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format
1855 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format
1856 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format
1857 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format
1858 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format
1859 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format
1860 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format
1861 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format
1862 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format
1863 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format
1864 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format
1865 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format
1866 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format
1867 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format
1868 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format
1869 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format
1870 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format
1871 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format
1872 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format
1873 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format
1874 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format
1875 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format
John Bauman89401822014-05-06 15:04:28 -04001876 default: genericUpdate(destination, source); break;
1877 }
1878 }
John Bauman89401822014-05-06 15:04:28 -04001879 }
1880
1881 void Surface::genericUpdate(Buffer &destination, Buffer &source)
1882 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001883 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
1884 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04001885
1886 int depth = min(destination.depth, source.depth);
1887 int height = min(destination.height, source.height);
1888 int width = min(destination.width, source.width);
1889 int rowBytes = width * source.bytes;
1890
1891 for(int z = 0; z < depth; z++)
1892 {
1893 unsigned char *sourceRow = sourceSlice;
1894 unsigned char *destinationRow = destinationSlice;
1895
1896 for(int y = 0; y < height; y++)
1897 {
1898 if(source.format == destination.format)
1899 {
1900 memcpy(destinationRow, sourceRow, rowBytes);
1901 }
1902 else
1903 {
1904 unsigned char *sourceElement = sourceRow;
1905 unsigned char *destinationElement = destinationRow;
1906
1907 for(int x = 0; x < width; x++)
1908 {
1909 Color<float> color = source.read(sourceElement);
1910 destination.write(destinationElement, color);
1911
1912 sourceElement += source.bytes;
1913 destinationElement += destination.bytes;
1914 }
1915 }
1916
1917 sourceRow += source.pitchB;
1918 destinationRow += destination.pitchB;
1919 }
1920
1921 sourceSlice += source.sliceB;
1922 destinationSlice += destination.sliceB;
1923 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001924
1925 source.unlockRect();
1926 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04001927 }
1928
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001929 void Surface::decodeR8G8B8(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04001930 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001931 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
1932 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04001933
1934 for(int z = 0; z < destination.depth && z < source.depth; z++)
1935 {
1936 unsigned char *sourceRow = sourceSlice;
1937 unsigned char *destinationRow = destinationSlice;
1938
1939 for(int y = 0; y < destination.height && y < source.height; y++)
1940 {
1941 unsigned char *sourceElement = sourceRow;
1942 unsigned char *destinationElement = destinationRow;
1943
1944 for(int x = 0; x < destination.width && x < source.width; x++)
1945 {
1946 unsigned int b = sourceElement[0];
1947 unsigned int g = sourceElement[1];
1948 unsigned int r = sourceElement[2];
1949
1950 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0);
1951
1952 sourceElement += source.bytes;
1953 destinationElement += destination.bytes;
1954 }
1955
1956 sourceRow += source.pitchB;
1957 destinationRow += destination.pitchB;
1958 }
1959
1960 sourceSlice += source.sliceB;
1961 destinationSlice += destination.sliceB;
1962 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001963
1964 source.unlockRect();
1965 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04001966 }
1967
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001968 void Surface::decodeX1R5G5B5(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04001969 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05001970 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
1971 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04001972
1973 for(int z = 0; z < destination.depth && z < source.depth; z++)
1974 {
1975 unsigned char *sourceRow = sourceSlice;
1976 unsigned char *destinationRow = destinationSlice;
1977
1978 for(int y = 0; y < destination.height && y < source.height; y++)
1979 {
1980 unsigned char *sourceElement = sourceRow;
1981 unsigned char *destinationElement = destinationRow;
1982
1983 for(int x = 0; x < destination.width && x < source.width; x++)
1984 {
1985 unsigned int xrgb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04001986
John Bauman89401822014-05-06 15:04:28 -04001987 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
1988 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
1989 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8);
1990
1991 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
1992
1993 sourceElement += source.bytes;
1994 destinationElement += destination.bytes;
1995 }
1996
1997 sourceRow += source.pitchB;
1998 destinationRow += destination.pitchB;
1999 }
2000
2001 sourceSlice += source.sliceB;
2002 destinationSlice += destination.sliceB;
2003 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002004
2005 source.unlockRect();
2006 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002007 }
2008
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002009 void Surface::decodeA1R5G5B5(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04002010 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002011 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2012 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04002013
2014 for(int z = 0; z < destination.depth && z < source.depth; z++)
2015 {
2016 unsigned char *sourceRow = sourceSlice;
2017 unsigned char *destinationRow = destinationSlice;
2018
2019 for(int y = 0; y < destination.height && y < source.height; y++)
2020 {
2021 unsigned char *sourceElement = sourceRow;
2022 unsigned char *destinationElement = destinationRow;
2023
2024 for(int x = 0; x < destination.width && x < source.width; x++)
2025 {
2026 unsigned int argb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002027
John Bauman89401822014-05-06 15:04:28 -04002028 unsigned int a = (argb & 0x8000) * 130560;
2029 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000;
2030 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00;
2031 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8);
2032
2033 *(unsigned int*)destinationElement = a | r | g | b;
2034
2035 sourceElement += source.bytes;
2036 destinationElement += destination.bytes;
2037 }
2038
2039 sourceRow += source.pitchB;
2040 destinationRow += destination.pitchB;
2041 }
2042
2043 sourceSlice += source.sliceB;
2044 destinationSlice += destination.sliceB;
2045 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002046
2047 source.unlockRect();
2048 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002049 }
2050
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002051 void Surface::decodeX4R4G4B4(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04002052 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002053 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2054 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04002055
2056 for(int z = 0; z < destination.depth && z < source.depth; z++)
2057 {
2058 unsigned char *sourceRow = sourceSlice;
2059 unsigned char *destinationRow = destinationSlice;
2060
2061 for(int y = 0; y < destination.height && y < source.height; y++)
2062 {
2063 unsigned char *sourceElement = sourceRow;
2064 unsigned char *destinationElement = destinationRow;
2065
2066 for(int x = 0; x < destination.width && x < source.width; x++)
2067 {
2068 unsigned int xrgb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002069
John Bauman89401822014-05-06 15:04:28 -04002070 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000;
2071 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00;
2072 unsigned int b = (xrgb & 0x000F) * 0x00000011;
2073
2074 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b;
2075
2076 sourceElement += source.bytes;
2077 destinationElement += destination.bytes;
2078 }
2079
2080 sourceRow += source.pitchB;
2081 destinationRow += destination.pitchB;
2082 }
2083
2084 sourceSlice += source.sliceB;
2085 destinationSlice += destination.sliceB;
2086 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002087
2088 source.unlockRect();
2089 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002090 }
2091
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002092 void Surface::decodeA4R4G4B4(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04002093 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002094 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2095 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04002096
2097 for(int z = 0; z < destination.depth && z < source.depth; z++)
2098 {
2099 unsigned char *sourceRow = sourceSlice;
2100 unsigned char *destinationRow = destinationSlice;
2101
2102 for(int y = 0; y < destination.height && y < source.height; y++)
2103 {
2104 unsigned char *sourceElement = sourceRow;
2105 unsigned char *destinationElement = destinationRow;
2106
2107 for(int x = 0; x < destination.width && x < source.width; x++)
2108 {
2109 unsigned int argb = *(unsigned short*)sourceElement;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002110
John Bauman89401822014-05-06 15:04:28 -04002111 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000;
2112 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000;
2113 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00;
2114 unsigned int b = (argb & 0x000F) * 0x00000011;
2115
2116 *(unsigned int*)destinationElement = a | r | g | b;
2117
2118 sourceElement += source.bytes;
2119 destinationElement += destination.bytes;
2120 }
2121
2122 sourceRow += source.pitchB;
2123 destinationRow += destination.pitchB;
2124 }
2125
2126 sourceSlice += source.sliceB;
2127 destinationSlice += destination.sliceB;
2128 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002129
2130 source.unlockRect();
2131 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002132 }
2133
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002134 void Surface::decodeP8(Buffer &destination, Buffer &source)
John Bauman89401822014-05-06 15:04:28 -04002135 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002136 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY);
2137 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_WRITEONLY);
John Bauman89401822014-05-06 15:04:28 -04002138
2139 for(int z = 0; z < destination.depth && z < source.depth; z++)
2140 {
2141 unsigned char *sourceRow = sourceSlice;
2142 unsigned char *destinationRow = destinationSlice;
2143
2144 for(int y = 0; y < destination.height && y < source.height; y++)
2145 {
2146 unsigned char *sourceElement = sourceRow;
2147 unsigned char *destinationElement = destinationRow;
2148
2149 for(int x = 0; x < destination.width && x < source.width; x++)
2150 {
2151 unsigned int abgr = palette[*(unsigned char*)sourceElement];
2152
2153 unsigned int r = (abgr & 0x000000FF) << 16;
2154 unsigned int g = (abgr & 0x0000FF00) << 0;
2155 unsigned int b = (abgr & 0x00FF0000) >> 16;
2156 unsigned int a = (abgr & 0xFF000000) >> 0;
2157
2158 *(unsigned int*)destinationElement = a | r | g | b;
2159
2160 sourceElement += source.bytes;
2161 destinationElement += destination.bytes;
2162 }
2163
2164 sourceRow += source.pitchB;
2165 destinationRow += destination.pitchB;
2166 }
2167
2168 sourceSlice += source.sliceB;
2169 destinationSlice += destination.sliceB;
2170 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002171
2172 source.unlockRect();
2173 destination.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002174 }
2175
2176#if S3TC_SUPPORT
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002177 void Surface::decodeDXT1(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002178 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002179 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
2180 const DXT1 *source = (const DXT1*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002181
2182 for(int z = 0; z < external.depth; z++)
2183 {
2184 unsigned int *dest = destSlice;
2185
2186 for(int y = 0; y < external.height; y += 4)
2187 {
2188 for(int x = 0; x < external.width; x += 4)
2189 {
2190 Color<byte> c[4];
2191
2192 c[0] = source->c0;
2193 c[1] = source->c1;
2194
2195 if(source->c0 > source->c1) // No transparency
2196 {
2197 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2198 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2199 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2200 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2201 c[2].a = 0xFF;
2202
2203 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2204 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2205 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2206 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2207 c[3].a = 0xFF;
2208 }
2209 else // c3 transparent
2210 {
2211 // c2 = 1 / 2 * c0 + 1 / 2 * c1
2212 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2);
2213 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2);
2214 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2);
2215 c[2].a = 0xFF;
2216
2217 c[3].r = 0;
2218 c[3].g = 0;
2219 c[3].b = 0;
2220 c[3].a = 0;
2221 }
2222
2223 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2224 {
2225 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2226 {
2227 dest[(x + i) + (y + j) * internal.width] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4];
2228 }
2229 }
2230
2231 source++;
2232 }
2233 }
2234
2235 (byte*&)destSlice += internal.sliceB;
2236 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002237
2238 external.unlockRect();
2239 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002240 }
2241
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002242 void Surface::decodeDXT3(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002243 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002244 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
2245 const DXT3 *source = (const DXT3*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002246
2247 for(int z = 0; z < external.depth; z++)
2248 {
2249 unsigned int *dest = destSlice;
2250
2251 for(int y = 0; y < external.height; y += 4)
2252 {
2253 for(int x = 0; x < external.width; x += 4)
2254 {
2255 Color<byte> c[4];
2256
2257 c[0] = source->c0;
2258 c[1] = source->c1;
2259
2260 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2261 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2262 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2263 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2264
2265 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2266 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2267 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2268 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2269
2270 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2271 {
2272 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2273 {
2274 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F;
2275 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24));
2276
2277 dest[(x + i) + (y + j) * internal.width] = color;
2278 }
2279 }
2280
2281 source++;
2282 }
2283 }
2284
2285 (byte*&)destSlice += internal.sliceB;
2286 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002287
2288 external.unlockRect();
2289 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002290 }
2291
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002292 void Surface::decodeDXT5(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002293 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002294 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
2295 const DXT5 *source = (const DXT5*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002296
2297 for(int z = 0; z < external.depth; z++)
2298 {
2299 unsigned int *dest = destSlice;
2300
2301 for(int y = 0; y < external.height; y += 4)
2302 {
2303 for(int x = 0; x < external.width; x += 4)
2304 {
2305 Color<byte> c[4];
2306
2307 c[0] = source->c0;
2308 c[1] = source->c1;
2309
2310 // c2 = 2 / 3 * c0 + 1 / 3 * c1
2311 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3);
2312 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3);
2313 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3);
2314
2315 // c3 = 1 / 3 * c0 + 2 / 3 * c1
2316 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3);
2317 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3);
2318 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3);
2319
2320 byte a[8];
2321
2322 a[0] = source->a0;
2323 a[1] = source->a1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002324
John Bauman89401822014-05-06 15:04:28 -04002325 if(a[0] > a[1])
2326 {
2327 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7);
2328 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7);
2329 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7);
2330 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7);
2331 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7);
2332 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7);
2333 }
2334 else
2335 {
2336 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5);
2337 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5);
2338 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5);
2339 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5);
2340 a[6] = 0;
2341 a[7] = 0xFF;
2342 }
2343
2344 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2345 {
2346 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2347 {
2348 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24;
2349 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002350
John Bauman89401822014-05-06 15:04:28 -04002351 dest[(x + i) + (y + j) * internal.width] = color;
2352 }
2353 }
2354
2355 source++;
2356 }
2357 }
2358
2359 (byte*&)destSlice += internal.sliceB;
2360 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002361
2362 external.unlockRect();
2363 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002364 }
Nicolas Capens22658242014-11-29 00:31:41 -05002365#endif
John Bauman89401822014-05-06 15:04:28 -04002366
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002367 void Surface::decodeATI1(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002368 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002369 byte *destSlice = (byte*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
2370 const ATI1 *source = (const ATI1*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002371
2372 for(int z = 0; z < external.depth; z++)
2373 {
2374 byte *dest = destSlice;
2375
2376 for(int y = 0; y < external.height; y += 4)
2377 {
2378 for(int x = 0; x < external.width; x += 4)
2379 {
2380 byte r[8];
2381
2382 r[0] = source->r0;
2383 r[1] = source->r1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002384
John Bauman89401822014-05-06 15:04:28 -04002385 if(r[0] > r[1])
2386 {
2387 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7);
2388 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7);
2389 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7);
2390 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7);
2391 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7);
2392 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7);
2393 }
2394 else
2395 {
2396 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5);
2397 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5);
2398 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5);
2399 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5);
2400 r[6] = 0;
2401 r[7] = 0xFF;
2402 }
2403
2404 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2405 {
2406 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2407 {
2408 dest[(x + i) + (y + j) * internal.width] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8];
2409 }
2410 }
2411
2412 source++;
2413 }
2414 }
2415
2416 destSlice += internal.sliceB;
2417 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002418
2419 external.unlockRect();
2420 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002421 }
2422
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002423 void Surface::decodeATI2(Buffer &internal, Buffer &external)
John Bauman89401822014-05-06 15:04:28 -04002424 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002425 word *destSlice = (word*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY);
2426 const ATI2 *source = (const ATI2*)external.lockRect(0, 0, 0, LOCK_READONLY);
John Bauman89401822014-05-06 15:04:28 -04002427
2428 for(int z = 0; z < external.depth; z++)
2429 {
2430 word *dest = destSlice;
2431
2432 for(int y = 0; y < external.height; y += 4)
2433 {
2434 for(int x = 0; x < external.width; x += 4)
2435 {
2436 byte X[8];
2437
2438 X[0] = source->x0;
2439 X[1] = source->x1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002440
John Bauman89401822014-05-06 15:04:28 -04002441 if(X[0] > X[1])
2442 {
2443 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7);
2444 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7);
2445 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7);
2446 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7);
2447 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7);
2448 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7);
2449 }
2450 else
2451 {
2452 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5);
2453 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5);
2454 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5);
2455 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5);
2456 X[6] = 0;
2457 X[7] = 0xFF;
2458 }
2459
2460 byte Y[8];
2461
2462 Y[0] = source->y0;
2463 Y[1] = source->y1;
Nicolas Capensc39901e2016-03-21 16:37:44 -04002464
John Bauman89401822014-05-06 15:04:28 -04002465 if(Y[0] > Y[1])
2466 {
2467 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7);
2468 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7);
2469 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7);
2470 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7);
2471 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7);
2472 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7);
2473 }
2474 else
2475 {
2476 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5);
2477 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5);
2478 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5);
2479 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5);
2480 Y[6] = 0;
2481 Y[7] = 0xFF;
2482 }
2483
2484 for(int j = 0; j < 4 && (y + j) < internal.height; j++)
2485 {
2486 for(int i = 0; i < 4 && (x + i) < internal.width; i++)
2487 {
2488 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8];
2489 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8];
2490
2491 dest[(x + i) + (y + j) * internal.width] = (g << 8) + r;
2492 }
2493 }
2494
2495 source++;
2496 }
2497 }
2498
2499 (byte*&)destSlice += internal.sliceB;
2500 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002501
2502 external.unlockRect();
2503 internal.unlockRect();
John Bauman89401822014-05-06 15:04:28 -04002504 }
Nicolas Capens22658242014-11-29 00:31:41 -05002505
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002506 void Surface::decodeETC2(Buffer &internal, Buffer &external, int nbAlphaBits, bool isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002507 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002508 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), (byte*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY), external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
Alexis Hetu0de50d42015-09-09 13:56:41 -04002509 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB));
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002510 external.unlockRect();
2511 internal.unlockRect();
Nicolas Capens22658242014-11-29 00:31:41 -05002512
Alexis Hetu0de50d42015-09-09 13:56:41 -04002513 if(isSRGB)
Nicolas Capens22658242014-11-29 00:31:41 -05002514 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002515 static byte sRGBtoLinearTable[256];
2516 static bool sRGBtoLinearTableDirty = true;
2517 if(sRGBtoLinearTableDirty)
Nicolas Capens22658242014-11-29 00:31:41 -05002518 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002519 for(int i = 0; i < 256; i++)
Nicolas Capens22658242014-11-29 00:31:41 -05002520 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002521 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f);
Nicolas Capens22658242014-11-29 00:31:41 -05002522 }
Alexis Hetu0de50d42015-09-09 13:56:41 -04002523 sRGBtoLinearTableDirty = false;
Nicolas Capens22658242014-11-29 00:31:41 -05002524 }
2525
Alexis Hetu0de50d42015-09-09 13:56:41 -04002526 // Perform sRGB conversion in place after decoding
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002527 byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE);
Alexis Hetu0de50d42015-09-09 13:56:41 -04002528 for(int y = 0; y < internal.height; y++)
2529 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002530 byte *srcRow = src + y * internal.pitchB;
Alexis Hetu0de50d42015-09-09 13:56:41 -04002531 for(int x = 0; x < internal.width; x++)
2532 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002533 byte *srcPix = srcRow + x * internal.bytes;
Alexis Hetu0de50d42015-09-09 13:56:41 -04002534 for(int i = 0; i < 3; i++)
2535 {
2536 srcPix[i] = sRGBtoLinearTable[srcPix[i]];
2537 }
2538 }
2539 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002540 internal.unlockRect();
Nicolas Capens22658242014-11-29 00:31:41 -05002541 }
2542 }
John Bauman89401822014-05-06 15:04:28 -04002543
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002544 void Surface::decodeEAC(Buffer &internal, Buffer &external, int nbChannels, bool isSigned)
Alexis Hetu460e41f2015-09-01 10:58:37 -04002545 {
Alexis Hetu0de50d42015-09-09 13:56:41 -04002546 ASSERT(nbChannels == 1 || nbChannels == 2);
Alexis Hetu460e41f2015-09-01 10:58:37 -04002547
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002548 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), (byte*)internal.lockRect(0, 0, 0, LOCK_WRITEONLY), external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes,
Alexis Hetu0de50d42015-09-09 13:56:41 -04002549 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED));
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002550 external.unlockRect();
2551 internal.unlockRect();
Alexis Hetu0de50d42015-09-09 13:56:41 -04002552
2553 // FIXME: We convert signed data to float, until signed integer internal formats are supported
2554 // This code can be removed if signed ETC2 images are decoded to internal 8 bit signed R/RG formats
2555 if(isSigned)
2556 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002557 sbyte *src = (sbyte*)internal.lockRect(0, 0, 0, LOCK_READWRITE);
Alexis Hetu0de50d42015-09-09 13:56:41 -04002558
2559 for(int y = 0; y < internal.height; y++)
2560 {
2561 sbyte* srcRow = src + y * internal.pitchB;
2562 for(int x = internal.width - 1; x >= 0; x--)
2563 {
2564 int dx = x & 0xFFFFFFFC;
2565 int mx = x - dx;
2566 sbyte* srcPix = srcRow + dx * internal.bytes + mx * nbChannels;
2567 float* dstPix = (float*)(srcRow + x * internal.bytes);
2568 for(int c = nbChannels - 1; c >= 0; c--)
2569 {
2570 static const float normalization = 1.0f / 127.875f;
2571 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f);
2572 }
2573 }
2574 }
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002575
2576 internal.unlockRect();
Alexis Hetu0de50d42015-09-09 13:56:41 -04002577 }
Alexis Hetu460e41f2015-09-01 10:58:37 -04002578 }
2579
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002580 void Surface::decodeASTC(Buffer &internal, Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB)
Alexis Hetu460e41f2015-09-01 10:58:37 -04002581 {
2582 }
2583
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002584 unsigned int Surface::size(int width, int height, int depth, int border, Format format)
John Bauman89401822014-05-06 15:04:28 -04002585 {
Alexis Hetu9c6d5222016-11-29 17:02:14 -05002586 width += 2 * border;
2587 height += 2 * border;
2588
Nicolas Capens00555c42015-07-21 15:15:30 -04002589 // Dimensions rounded up to multiples of 4, used for compressed formats
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002590 int width4 = align(width, 4);
2591 int height4 = align(height, 4);
John Bauman89401822014-05-06 15:04:28 -04002592
2593 switch(format)
2594 {
2595 #if S3TC_SUPPORT
2596 case FORMAT_DXT1:
John Bauman66b8ab22014-05-06 15:57:45 -04002597 #endif
John Bauman89401822014-05-06 15:04:28 -04002598 case FORMAT_ATI1:
Nicolas Capens22658242014-11-29 00:31:41 -05002599 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002600 case FORMAT_R11_EAC:
2601 case FORMAT_SIGNED_R11_EAC:
2602 case FORMAT_RGB8_ETC2:
2603 case FORMAT_SRGB8_ETC2:
2604 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
2605 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
John Bauman89401822014-05-06 15:04:28 -04002606 return width4 * height4 * depth / 2;
John Bauman66b8ab22014-05-06 15:57:45 -04002607 #if S3TC_SUPPORT
John Bauman89401822014-05-06 15:04:28 -04002608 case FORMAT_DXT3:
2609 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002610 #endif
John Bauman89401822014-05-06 15:04:28 -04002611 case FORMAT_ATI2:
Alexis Hetu460e41f2015-09-01 10:58:37 -04002612 case FORMAT_RG11_EAC:
2613 case FORMAT_SIGNED_RG11_EAC:
2614 case FORMAT_RGBA8_ETC2_EAC:
2615 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
2616 case FORMAT_RGBA_ASTC_4x4_KHR:
2617 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
John Bauman89401822014-05-06 15:04:28 -04002618 return width4 * height4 * depth;
Alexis Hetu460e41f2015-09-01 10:58:37 -04002619 case FORMAT_RGBA_ASTC_5x4_KHR:
2620 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
2621 return align(width, 5) * height4 * depth;
2622 case FORMAT_RGBA_ASTC_5x5_KHR:
2623 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
2624 return align(width, 5) * align(height, 5) * depth;
2625 case FORMAT_RGBA_ASTC_6x5_KHR:
2626 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
2627 return align(width, 6) * align(height, 5) * depth;
2628 case FORMAT_RGBA_ASTC_6x6_KHR:
2629 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
2630 return align(width, 6) * align(height, 6) * depth;
2631 case FORMAT_RGBA_ASTC_8x5_KHR:
2632 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
2633 return align(width, 8) * align(height, 5) * depth;
2634 case FORMAT_RGBA_ASTC_8x6_KHR:
2635 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
2636 return align(width, 8) * align(height, 6) * depth;
2637 case FORMAT_RGBA_ASTC_8x8_KHR:
2638 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
2639 return align(width, 8) * align(height, 8) * depth;
2640 case FORMAT_RGBA_ASTC_10x5_KHR:
2641 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
2642 return align(width, 10) * align(height, 5) * depth;
2643 case FORMAT_RGBA_ASTC_10x6_KHR:
2644 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
2645 return align(width, 10) * align(height, 6) * depth;
2646 case FORMAT_RGBA_ASTC_10x8_KHR:
2647 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
2648 return align(width, 10) * align(height, 8) * depth;
2649 case FORMAT_RGBA_ASTC_10x10_KHR:
2650 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
2651 return align(width, 10) * align(height, 10) * depth;
2652 case FORMAT_RGBA_ASTC_12x10_KHR:
2653 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
2654 return align(width, 12) * align(height, 10) * depth;
2655 case FORMAT_RGBA_ASTC_12x12_KHR:
2656 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
2657 return align(width, 12) * align(height, 12) * depth;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002658 case FORMAT_YV12_BT601:
2659 case FORMAT_YV12_BT709:
2660 case FORMAT_YV12_JFIF:
2661 {
2662 unsigned int YStride = align(width, 16);
2663 unsigned int YSize = YStride * height;
2664 unsigned int CStride = align(YStride / 2, 16);
Nicolas Capens0bac2852016-05-07 06:09:58 -04002665 unsigned int CSize = CStride * height / 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002666
2667 return YSize + 2 * CSize;
2668 }
John Bauman89401822014-05-06 15:04:28 -04002669 default:
2670 return bytes(format) * width * height * depth;
2671 }
John Bauman89401822014-05-06 15:04:28 -04002672 }
2673
2674 bool Surface::isStencil(Format format)
2675 {
2676 switch(format)
2677 {
2678 case FORMAT_D32:
2679 case FORMAT_D16:
2680 case FORMAT_D24X8:
2681 case FORMAT_D32F:
2682 case FORMAT_D32F_COMPLEMENTARY:
2683 case FORMAT_D32F_LOCKABLE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002684 case FORMAT_D32F_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002685 return false;
2686 case FORMAT_D24S8:
2687 case FORMAT_D24FS8:
2688 case FORMAT_S8:
John Bauman66b8ab22014-05-06 15:57:45 -04002689 case FORMAT_DF24S8:
2690 case FORMAT_DF16S8:
2691 case FORMAT_D32FS8_TEXTURE:
2692 case FORMAT_D32FS8_SHADOW:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002693 case FORMAT_D32FS8:
2694 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -04002695 case FORMAT_INTZ:
2696 return true;
2697 default:
2698 return false;
2699 }
2700 }
2701
2702 bool Surface::isDepth(Format format)
2703 {
2704 switch(format)
2705 {
2706 case FORMAT_D32:
2707 case FORMAT_D16:
2708 case FORMAT_D24X8:
2709 case FORMAT_D24S8:
2710 case FORMAT_D24FS8:
2711 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002712 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -04002713 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002714 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -04002715 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002716 case FORMAT_DF24S8:
2717 case FORMAT_DF16S8:
2718 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002719 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -04002720 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002721 case FORMAT_INTZ:
2722 return true;
2723 case FORMAT_S8:
2724 return false;
2725 default:
2726 return false;
2727 }
2728 }
2729
Alexis Hetub9dda642016-10-06 11:25:32 -04002730 bool Surface::hasQuadLayout(Format format)
2731 {
2732 switch(format)
2733 {
2734 case FORMAT_D32:
2735 case FORMAT_D16:
2736 case FORMAT_D24X8:
2737 case FORMAT_D24S8:
2738 case FORMAT_D24FS8:
2739 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002740 case FORMAT_D32FS8:
Alexis Hetub9dda642016-10-06 11:25:32 -04002741 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002742 case FORMAT_D32FS8_COMPLEMENTARY:
Alexis Hetub9dda642016-10-06 11:25:32 -04002743 case FORMAT_DF24S8:
2744 case FORMAT_DF16S8:
2745 case FORMAT_INTZ:
2746 case FORMAT_S8:
2747 case FORMAT_A8G8R8B8Q:
2748 case FORMAT_X8G8R8B8Q:
2749 return true;
2750 case FORMAT_D32F_LOCKABLE:
2751 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002752 case FORMAT_D32F_SHADOW:
Alexis Hetub9dda642016-10-06 11:25:32 -04002753 case FORMAT_D32FS8_SHADOW:
2754 default:
2755 break;
2756 }
2757
2758 return false;
2759 }
2760
John Bauman89401822014-05-06 15:04:28 -04002761 bool Surface::isPalette(Format format)
2762 {
2763 switch(format)
2764 {
2765 case FORMAT_P8:
2766 case FORMAT_A8P8:
2767 return true;
2768 default:
2769 return false;
2770 }
2771 }
2772
2773 bool Surface::isFloatFormat(Format format)
2774 {
2775 switch(format)
2776 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002777 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002778 case FORMAT_R8G8B8:
2779 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002780 case FORMAT_X8R8G8B8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002781 case FORMAT_X8B8G8R8I:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002782 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002783 case FORMAT_A8R8G8B8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002784 case FORMAT_SRGB8_X8:
2785 case FORMAT_SRGB8_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002786 case FORMAT_A8B8G8R8I:
2787 case FORMAT_R8UI:
2788 case FORMAT_G8R8UI:
2789 case FORMAT_X8B8G8R8UI:
2790 case FORMAT_A8B8G8R8UI:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002791 case FORMAT_A8B8G8R8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002792 case FORMAT_G8R8I:
John Bauman89401822014-05-06 15:04:28 -04002793 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002794 case FORMAT_A2B10G10R10:
Alexis Hetu43577b82015-10-21 15:32:16 -04002795 case FORMAT_R8I_SNORM:
2796 case FORMAT_G8R8I_SNORM:
2797 case FORMAT_X8B8G8R8I_SNORM:
2798 case FORMAT_A8B8G8R8I_SNORM:
2799 case FORMAT_R16I:
2800 case FORMAT_R16UI:
2801 case FORMAT_G16R16I:
2802 case FORMAT_G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002803 case FORMAT_G16R16:
Alexis Hetu43577b82015-10-21 15:32:16 -04002804 case FORMAT_X16B16G16R16I:
2805 case FORMAT_X16B16G16R16UI:
2806 case FORMAT_A16B16G16R16I:
2807 case FORMAT_A16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002808 case FORMAT_A16B16G16R16:
2809 case FORMAT_V8U8:
2810 case FORMAT_Q8W8V8U8:
2811 case FORMAT_X8L8V8U8:
2812 case FORMAT_V16U16:
2813 case FORMAT_A16W16V16U16:
2814 case FORMAT_Q16W16V16U16:
2815 case FORMAT_A8:
Alexis Hetu43577b82015-10-21 15:32:16 -04002816 case FORMAT_R8I:
John Bauman89401822014-05-06 15:04:28 -04002817 case FORMAT_R8:
Alexis Hetub9dda642016-10-06 11:25:32 -04002818 case FORMAT_S8:
John Bauman89401822014-05-06 15:04:28 -04002819 case FORMAT_L8:
2820 case FORMAT_L16:
2821 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002822 case FORMAT_YV12_BT601:
2823 case FORMAT_YV12_BT709:
2824 case FORMAT_YV12_JFIF:
Alexis Hetu43577b82015-10-21 15:32:16 -04002825 case FORMAT_R32I:
2826 case FORMAT_R32UI:
2827 case FORMAT_G32R32I:
2828 case FORMAT_G32R32UI:
2829 case FORMAT_X32B32G32R32I:
2830 case FORMAT_X32B32G32R32UI:
2831 case FORMAT_A32B32G32R32I:
2832 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002833 return false;
Nicolas Capens400667e2017-03-29 14:40:14 -04002834 case FORMAT_R16F:
2835 case FORMAT_G16R16F:
2836 case FORMAT_B16G16R16F:
2837 case FORMAT_A16B16G16R16F:
John Bauman89401822014-05-06 15:04:28 -04002838 case FORMAT_R32F:
2839 case FORMAT_G32R32F:
Nicolas Capensc018e082016-12-13 10:19:33 -05002840 case FORMAT_B32G32R32F:
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002841 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002842 case FORMAT_A32B32G32R32F:
2843 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002844 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -04002845 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002846 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -04002847 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002848 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002849 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -04002850 case FORMAT_D32FS8_SHADOW:
Nicolas Capens80594422015-06-09 16:42:56 -04002851 case FORMAT_L16F:
2852 case FORMAT_A16L16F:
2853 case FORMAT_L32F:
2854 case FORMAT_A32L32F:
John Bauman89401822014-05-06 15:04:28 -04002855 return true;
2856 default:
2857 ASSERT(false);
2858 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04002859
John Bauman89401822014-05-06 15:04:28 -04002860 return false;
2861 }
2862
2863 bool Surface::isUnsignedComponent(Format format, int component)
2864 {
2865 switch(format)
2866 {
2867 case FORMAT_NULL:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04002868 case FORMAT_R5G6B5:
Alexis Hetu925c2822015-11-24 14:09:34 -05002869 case FORMAT_R8G8B8:
2870 case FORMAT_B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002871 case FORMAT_X8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002872 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002873 case FORMAT_A8R8G8B8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04002874 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002875 case FORMAT_SRGB8_X8:
2876 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002877 case FORMAT_G8R8:
Alexis Hetuf999a002015-12-17 11:09:36 -05002878 case FORMAT_A2B10G10R10:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002879 case FORMAT_R16UI:
John Bauman89401822014-05-06 15:04:28 -04002880 case FORMAT_G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002881 case FORMAT_G16R16UI:
2882 case FORMAT_X16B16G16R16UI:
John Bauman89401822014-05-06 15:04:28 -04002883 case FORMAT_A16B16G16R16:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002884 case FORMAT_A16B16G16R16UI:
2885 case FORMAT_R32UI:
2886 case FORMAT_G32R32UI:
2887 case FORMAT_X32B32G32R32UI:
2888 case FORMAT_A32B32G32R32UI:
2889 case FORMAT_R8UI:
2890 case FORMAT_G8R8UI:
2891 case FORMAT_X8B8G8R8UI:
2892 case FORMAT_A8B8G8R8UI:
John Bauman89401822014-05-06 15:04:28 -04002893 case FORMAT_D32F:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002894 case FORMAT_D32FS8:
John Bauman89401822014-05-06 15:04:28 -04002895 case FORMAT_D32F_COMPLEMENTARY:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002896 case FORMAT_D32FS8_COMPLEMENTARY:
John Bauman89401822014-05-06 15:04:28 -04002897 case FORMAT_D32F_LOCKABLE:
John Bauman66b8ab22014-05-06 15:57:45 -04002898 case FORMAT_D32FS8_TEXTURE:
Nicolas Capens57e7cea2017-12-13 22:25:04 -05002899 case FORMAT_D32F_SHADOW:
John Bauman66b8ab22014-05-06 15:57:45 -04002900 case FORMAT_D32FS8_SHADOW:
John Bauman89401822014-05-06 15:04:28 -04002901 case FORMAT_A8:
2902 case FORMAT_R8:
2903 case FORMAT_L8:
2904 case FORMAT_L16:
2905 case FORMAT_A8L8:
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04002906 case FORMAT_YV12_BT601:
2907 case FORMAT_YV12_BT709:
2908 case FORMAT_YV12_JFIF:
John Bauman89401822014-05-06 15:04:28 -04002909 return true;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002910 case FORMAT_A8B8G8R8I:
2911 case FORMAT_A16B16G16R16I:
2912 case FORMAT_A32B32G32R32I:
2913 case FORMAT_A8B8G8R8I_SNORM:
2914 case FORMAT_Q8W8V8U8:
2915 case FORMAT_Q16W16V16U16:
2916 case FORMAT_A32B32G32R32F:
2917 return false;
2918 case FORMAT_R32F:
2919 case FORMAT_R8I:
2920 case FORMAT_R16I:
2921 case FORMAT_R32I:
2922 case FORMAT_R8I_SNORM:
2923 return component >= 1;
John Bauman89401822014-05-06 15:04:28 -04002924 case FORMAT_V8U8:
2925 case FORMAT_X8L8V8U8:
2926 case FORMAT_V16U16:
John Bauman89401822014-05-06 15:04:28 -04002927 case FORMAT_G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002928 case FORMAT_G8R8I:
2929 case FORMAT_G16R16I:
2930 case FORMAT_G32R32I:
2931 case FORMAT_G8R8I_SNORM:
2932 return component >= 2;
2933 case FORMAT_A16W16V16U16:
Nicolas Capens2e363b02016-12-14 10:32:36 -05002934 case FORMAT_B32G32R32F:
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002935 case FORMAT_X32B32G32R32F:
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04002936 case FORMAT_X8B8G8R8I:
2937 case FORMAT_X16B16G16R16I:
2938 case FORMAT_X32B32G32R32I:
2939 case FORMAT_X8B8G8R8I_SNORM:
2940 return component >= 3;
John Bauman89401822014-05-06 15:04:28 -04002941 default:
2942 ASSERT(false);
2943 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04002944
John Bauman89401822014-05-06 15:04:28 -04002945 return false;
2946 }
2947
2948 bool Surface::isSRGBreadable(Format format)
2949 {
2950 // Keep in sync with Capabilities::isSRGBreadable
2951 switch(format)
2952 {
2953 case FORMAT_L8:
2954 case FORMAT_A8L8:
2955 case FORMAT_R8G8B8:
2956 case FORMAT_A8R8G8B8:
2957 case FORMAT_X8R8G8B8:
2958 case FORMAT_A8B8G8R8:
2959 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002960 case FORMAT_SRGB8_X8:
2961 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002962 case FORMAT_R5G6B5:
2963 case FORMAT_X1R5G5B5:
2964 case FORMAT_A1R5G5B5:
2965 case FORMAT_A4R4G4B4:
2966 #if S3TC_SUPPORT
2967 case FORMAT_DXT1:
2968 case FORMAT_DXT3:
2969 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04002970 #endif
John Bauman89401822014-05-06 15:04:28 -04002971 case FORMAT_ATI1:
2972 case FORMAT_ATI2:
John Bauman89401822014-05-06 15:04:28 -04002973 return true;
2974 default:
2975 return false;
2976 }
John Bauman89401822014-05-06 15:04:28 -04002977 }
2978
2979 bool Surface::isSRGBwritable(Format format)
2980 {
2981 // Keep in sync with Capabilities::isSRGBwritable
2982 switch(format)
2983 {
2984 case FORMAT_NULL:
2985 case FORMAT_A8R8G8B8:
2986 case FORMAT_X8R8G8B8:
2987 case FORMAT_A8B8G8R8:
2988 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04002989 case FORMAT_SRGB8_X8:
2990 case FORMAT_SRGB8_A8:
John Bauman89401822014-05-06 15:04:28 -04002991 case FORMAT_R5G6B5:
2992 return true;
2993 default:
2994 return false;
2995 }
2996 }
2997
2998 bool Surface::isCompressed(Format format)
2999 {
3000 switch(format)
3001 {
3002 #if S3TC_SUPPORT
3003 case FORMAT_DXT1:
3004 case FORMAT_DXT3:
3005 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04003006 #endif
John Bauman89401822014-05-06 15:04:28 -04003007 case FORMAT_ATI1:
3008 case FORMAT_ATI2:
Nicolas Capens22658242014-11-29 00:31:41 -05003009 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003010 case FORMAT_R11_EAC:
3011 case FORMAT_SIGNED_R11_EAC:
3012 case FORMAT_RG11_EAC:
3013 case FORMAT_SIGNED_RG11_EAC:
3014 case FORMAT_RGB8_ETC2:
3015 case FORMAT_SRGB8_ETC2:
3016 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3017 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3018 case FORMAT_RGBA8_ETC2_EAC:
3019 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
3020 case FORMAT_RGBA_ASTC_4x4_KHR:
3021 case FORMAT_RGBA_ASTC_5x4_KHR:
3022 case FORMAT_RGBA_ASTC_5x5_KHR:
3023 case FORMAT_RGBA_ASTC_6x5_KHR:
3024 case FORMAT_RGBA_ASTC_6x6_KHR:
3025 case FORMAT_RGBA_ASTC_8x5_KHR:
3026 case FORMAT_RGBA_ASTC_8x6_KHR:
3027 case FORMAT_RGBA_ASTC_8x8_KHR:
3028 case FORMAT_RGBA_ASTC_10x5_KHR:
3029 case FORMAT_RGBA_ASTC_10x6_KHR:
3030 case FORMAT_RGBA_ASTC_10x8_KHR:
3031 case FORMAT_RGBA_ASTC_10x10_KHR:
3032 case FORMAT_RGBA_ASTC_12x10_KHR:
3033 case FORMAT_RGBA_ASTC_12x12_KHR:
3034 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
3035 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
3036 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
3037 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
3038 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
3039 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
3040 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
3041 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
3042 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
3043 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
3044 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
3045 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
3046 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
3047 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
John Bauman89401822014-05-06 15:04:28 -04003048 return true;
John Bauman89401822014-05-06 15:04:28 -04003049 default:
3050 return false;
3051 }
3052 }
3053
Nicolas Capens492887a2017-03-27 14:50:51 -04003054 bool Surface::isSignedNonNormalizedInteger(Format format)
Alexis Hetu43577b82015-10-21 15:32:16 -04003055 {
3056 switch(format)
3057 {
3058 case FORMAT_A8B8G8R8I:
3059 case FORMAT_X8B8G8R8I:
3060 case FORMAT_G8R8I:
3061 case FORMAT_R8I:
Alexis Hetu43577b82015-10-21 15:32:16 -04003062 case FORMAT_A16B16G16R16I:
3063 case FORMAT_X16B16G16R16I:
3064 case FORMAT_G16R16I:
3065 case FORMAT_R16I:
Alexis Hetu91dd1c42017-07-18 13:03:42 -04003066 case FORMAT_A32B32G32R32I:
3067 case FORMAT_X32B32G32R32I:
3068 case FORMAT_G32R32I:
3069 case FORMAT_R32I:
Nicolas Capens492887a2017-03-27 14:50:51 -04003070 return true;
3071 default:
3072 return false;
3073 }
3074 }
3075
3076 bool Surface::isUnsignedNonNormalizedInteger(Format format)
3077 {
3078 switch(format)
3079 {
Alexis Hetu91dd1c42017-07-18 13:03:42 -04003080 case FORMAT_A8B8G8R8UI:
3081 case FORMAT_X8B8G8R8UI:
3082 case FORMAT_G8R8UI:
3083 case FORMAT_R8UI:
Alexis Hetu43577b82015-10-21 15:32:16 -04003084 case FORMAT_A16B16G16R16UI:
3085 case FORMAT_X16B16G16R16UI:
3086 case FORMAT_G16R16UI:
3087 case FORMAT_R16UI:
Alexis Hetu43577b82015-10-21 15:32:16 -04003088 case FORMAT_A32B32G32R32UI:
3089 case FORMAT_X32B32G32R32UI:
3090 case FORMAT_G32R32UI:
3091 case FORMAT_R32UI:
3092 return true;
3093 default:
3094 return false;
3095 }
3096 }
3097
Nicolas Capens492887a2017-03-27 14:50:51 -04003098 bool Surface::isNonNormalizedInteger(Format format)
3099 {
3100 return isSignedNonNormalizedInteger(format) ||
3101 isUnsignedNonNormalizedInteger(format);
3102 }
3103
3104 bool Surface::isNormalizedInteger(Format format)
3105 {
3106 return !isFloatFormat(format) &&
3107 !isNonNormalizedInteger(format) &&
3108 !isCompressed(format) &&
3109 !isDepth(format) &&
3110 !isStencil(format);
3111 }
3112
John Bauman89401822014-05-06 15:04:28 -04003113 int Surface::componentCount(Format format)
3114 {
3115 switch(format)
3116 {
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003117 case FORMAT_R5G6B5: return 3;
3118 case FORMAT_X8R8G8B8: return 3;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003119 case FORMAT_X8B8G8R8I: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003120 case FORMAT_X8B8G8R8: return 3;
3121 case FORMAT_A8R8G8B8: return 4;
Alexis Hetu049a1872016-04-25 16:59:58 -04003122 case FORMAT_SRGB8_X8: return 3;
3123 case FORMAT_SRGB8_A8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003124 case FORMAT_A8B8G8R8I: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003125 case FORMAT_A8B8G8R8: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003126 case FORMAT_G8R8I: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003127 case FORMAT_G8R8: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003128 case FORMAT_R8I_SNORM: return 1;
3129 case FORMAT_G8R8I_SNORM: return 2;
3130 case FORMAT_X8B8G8R8I_SNORM:return 3;
3131 case FORMAT_A8B8G8R8I_SNORM:return 4;
3132 case FORMAT_R8UI: return 1;
3133 case FORMAT_G8R8UI: return 2;
3134 case FORMAT_X8B8G8R8UI: return 3;
3135 case FORMAT_A8B8G8R8UI: return 4;
Alexis Hetuf999a002015-12-17 11:09:36 -05003136 case FORMAT_A2B10G10R10: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003137 case FORMAT_G16R16I: return 2;
3138 case FORMAT_G16R16UI: return 2;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003139 case FORMAT_G16R16: return 2;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003140 case FORMAT_G32R32I: return 2;
3141 case FORMAT_G32R32UI: return 2;
3142 case FORMAT_X16B16G16R16I: return 3;
3143 case FORMAT_X16B16G16R16UI: return 3;
3144 case FORMAT_A16B16G16R16I: return 4;
3145 case FORMAT_A16B16G16R16UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003146 case FORMAT_A16B16G16R16: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003147 case FORMAT_X32B32G32R32I: return 3;
3148 case FORMAT_X32B32G32R32UI: return 3;
3149 case FORMAT_A32B32G32R32I: return 4;
3150 case FORMAT_A32B32G32R32UI: return 4;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003151 case FORMAT_V8U8: return 2;
3152 case FORMAT_Q8W8V8U8: return 4;
3153 case FORMAT_X8L8V8U8: return 3;
3154 case FORMAT_V16U16: return 2;
3155 case FORMAT_A16W16V16U16: return 4;
3156 case FORMAT_Q16W16V16U16: return 4;
3157 case FORMAT_R32F: return 1;
3158 case FORMAT_G32R32F: return 2;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003159 case FORMAT_X32B32G32R32F: return 3;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003160 case FORMAT_A32B32G32R32F: return 4;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003161 case FORMAT_D32F: return 1;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003162 case FORMAT_D32FS8: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003163 case FORMAT_D32F_LOCKABLE: return 1;
3164 case FORMAT_D32FS8_TEXTURE: return 1;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003165 case FORMAT_D32F_SHADOW: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003166 case FORMAT_D32FS8_SHADOW: return 1;
3167 case FORMAT_A8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003168 case FORMAT_R8I: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003169 case FORMAT_R8: return 1;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003170 case FORMAT_R16I: return 1;
3171 case FORMAT_R16UI: return 1;
3172 case FORMAT_R32I: return 1;
3173 case FORMAT_R32UI: return 1;
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003174 case FORMAT_L8: return 1;
3175 case FORMAT_L16: return 1;
3176 case FORMAT_A8L8: return 2;
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003177 case FORMAT_YV12_BT601: return 3;
3178 case FORMAT_YV12_BT709: return 3;
3179 case FORMAT_YV12_JFIF: return 3;
John Bauman89401822014-05-06 15:04:28 -04003180 default:
3181 ASSERT(false);
3182 }
3183
3184 return 1;
3185 }
3186
Alexis Hetu9c6d5222016-11-29 17:02:14 -05003187 void *Surface::allocateBuffer(int width, int height, int depth, int border, Format format)
John Bauman89401822014-05-06 15:04:28 -04003188 {
Nicolas Capensdb17b5d2015-06-26 11:15:58 -04003189 // Render targets require 2x2 quads
3190 int width2 = (width + 1) & ~1;
3191 int height2 = (height + 1) & ~1;
John Bauman89401822014-05-06 15:04:28 -04003192
Nicolas Capens6ea71872015-06-26 13:00:48 -04003193 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes,
Nicolas Capens48ef1252016-11-07 15:30:33 -05003194 // and stencil operations also read 8 bytes per four 8-bit stencil values,
Nicolas Capens6ea71872015-06-26 13:00:48 -04003195 // so we have to allocate 4 extra bytes to avoid buffer overruns.
Alexis Hetu9c6d5222016-11-29 17:02:14 -05003196 return allocate(size(width2, height2, depth, border, format) + 4);
John Bauman89401822014-05-06 15:04:28 -04003197 }
3198
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003199 void Surface::memfill4(void *buffer, int pattern, int bytes)
John Bauman89401822014-05-06 15:04:28 -04003200 {
3201 while((size_t)buffer & 0x1 && bytes >= 1)
3202 {
3203 *(char*)buffer = (char)pattern;
3204 (char*&)buffer += 1;
3205 bytes -= 1;
3206 }
3207
3208 while((size_t)buffer & 0x3 && bytes >= 2)
3209 {
3210 *(short*)buffer = (short)pattern;
3211 (short*&)buffer += 1;
3212 bytes -= 2;
3213 }
3214
Nicolas Capens47dc8672017-04-25 12:54:39 -04003215 #if defined(__i386__) || defined(__x86_64__)
3216 if(CPUID::supportsSSE())
John Bauman89401822014-05-06 15:04:28 -04003217 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003218 while((size_t)buffer & 0xF && bytes >= 4)
3219 {
3220 *(int*)buffer = pattern;
3221 (int*&)buffer += 1;
3222 bytes -= 4;
3223 }
3224
3225 __m128 quad = _mm_set_ps1((float&)pattern);
3226
3227 float *pointer = (float*)buffer;
3228 int qxwords = bytes / 64;
3229 bytes -= qxwords * 64;
3230
3231 while(qxwords--)
3232 {
3233 _mm_stream_ps(pointer + 0, quad);
3234 _mm_stream_ps(pointer + 4, quad);
3235 _mm_stream_ps(pointer + 8, quad);
3236 _mm_stream_ps(pointer + 12, quad);
3237
3238 pointer += 16;
3239 }
3240
3241 buffer = pointer;
John Bauman89401822014-05-06 15:04:28 -04003242 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04003243 #endif
John Bauman89401822014-05-06 15:04:28 -04003244
3245 while(bytes >= 4)
3246 {
3247 *(int*)buffer = (int)pattern;
3248 (int*&)buffer += 1;
3249 bytes -= 4;
3250 }
3251
3252 while(bytes >= 2)
3253 {
3254 *(short*)buffer = (short)pattern;
3255 (short*&)buffer += 1;
3256 bytes -= 2;
3257 }
3258
3259 while(bytes >= 1)
3260 {
3261 *(char*)buffer = (char)pattern;
3262 (char*&)buffer += 1;
3263 bytes -= 1;
3264 }
3265 }
3266
Nicolas Capensbf7a8142017-05-19 10:57:28 -04003267 void Surface::sync()
3268 {
3269 resource->lock(EXCLUSIVE);
3270 resource->unlock();
3271 }
3272
Nicolas Capens426cb5e2017-07-20 14:14:09 -04003273 bool Surface::isEntire(const Rect& rect) const
John Bauman89401822014-05-06 15:04:28 -04003274 {
Alexis Hetu75b650f2015-11-19 17:40:15 -05003275 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1);
3276 }
John Bauman89401822014-05-06 15:04:28 -04003277
Nicolas Capens426cb5e2017-07-20 14:14:09 -04003278 Rect Surface::getRect() const
Alexis Hetu75b650f2015-11-19 17:40:15 -05003279 {
Nicolas Capens426cb5e2017-07-20 14:14:09 -04003280 return Rect(0, 0, internal.width, internal.height);
John Bauman89401822014-05-06 15:04:28 -04003281 }
3282
Nicolas Capensc39901e2016-03-21 16:37:44 -04003283 void Surface::clearDepth(float depth, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04003284 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003285 if(width == 0 || height == 0) return;
3286
John Bauman89401822014-05-06 15:04:28 -04003287 // Not overlapping
3288 if(x0 > internal.width) return;
3289 if(y0 > internal.height) return;
3290 if(x0 + width < 0) return;
3291 if(y0 + height < 0) return;
3292
3293 // Clip against dimensions
3294 if(x0 < 0) {width += x0; x0 = 0;}
3295 if(x0 + width > internal.width) width = internal.width - x0;
3296 if(y0 < 0) {height += y0; y0 = 0;}
3297 if(y0 + height > internal.height) height = internal.height - y0;
3298
3299 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height;
3300 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY;
3301
3302 int width2 = (internal.width + 1) & ~1;
3303
3304 int x1 = x0 + width;
3305 int y1 = y0 + height;
3306
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003307 if(!hasQuadLayout(internal.format))
John Bauman89401822014-05-06 15:04:28 -04003308 {
3309 float *target = (float*)lockInternal(0, 0, 0, lock, PUBLIC) + x0 + width2 * y0;
3310
3311 for(int z = 0; z < internal.depth; z++)
3312 {
3313 for(int y = y0; y < y1; y++)
3314 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003315 memfill4(target, (int&)depth, 4 * width);
John Bauman89401822014-05-06 15:04:28 -04003316 target += width2;
3317 }
3318 }
3319
3320 unlockInternal();
3321 }
3322 else // Quad layout
3323 {
3324 if(complementaryDepthBuffer)
3325 {
3326 depth = 1 - depth;
3327 }
3328
3329 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC);
3330
Alexis Hetu358a1442015-12-03 14:23:10 -05003331 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3332 int oddX1 = (x1 & ~1) * 2;
3333 int evenX0 = ((x0 + 1) & ~1) * 2;
3334 int evenBytes = (oddX1 - evenX0) * sizeof(float);
3335
John Bauman89401822014-05-06 15:04:28 -04003336 for(int z = 0; z < internal.depth; z++)
3337 {
3338 for(int y = y0; y < y1; y++)
3339 {
3340 float *target = buffer + (y & ~1) * width2 + (y & 1) * 2;
Nicolas Capensc39901e2016-03-21 16:37:44 -04003341
John Bauman89401822014-05-06 15:04:28 -04003342 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once
3343 {
3344 if((x0 & 1) != 0)
3345 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003346 target[oddX0 + 0] = depth;
3347 target[oddX0 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003348 }
3349
Alexis Hetu358a1442015-12-03 14:23:10 -05003350 // for(int x2 = evenX0; x2 < x1 * 2; x2 += 4)
John Bauman89401822014-05-06 15:04:28 -04003351 // {
3352 // target[x2 + 0] = depth;
3353 // target[x2 + 1] = depth;
3354 // target[x2 + 2] = depth;
3355 // target[x2 + 3] = depth;
3356 // }
3357
3358 // __asm
3359 // {
3360 // movss xmm0, depth
3361 // shufps xmm0, xmm0, 0x00
3362 //
3363 // mov eax, x0
3364 // add eax, 1
3365 // and eax, 0xFFFFFFFE
3366 // cmp eax, x1
3367 // jge qEnd
3368 //
3369 // mov edi, target
3370 //
3371 // qLoop:
3372 // movntps [edi+8*eax], xmm0
3373 //
3374 // add eax, 2
3375 // cmp eax, x1
3376 // jl qLoop
3377 // qEnd:
3378 // }
3379
Alexis Hetu358a1442015-12-03 14:23:10 -05003380 memfill4(&target[evenX0], (int&)depth, evenBytes);
John Bauman89401822014-05-06 15:04:28 -04003381
3382 if((x1 & 1) != 0)
3383 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003384 target[oddX1 + 0] = depth;
3385 target[oddX1 + 2] = depth;
John Bauman89401822014-05-06 15:04:28 -04003386 }
3387
3388 y++;
3389 }
3390 else
3391 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003392 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1))
John Bauman89401822014-05-06 15:04:28 -04003393 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003394 target[i] = depth;
John Bauman89401822014-05-06 15:04:28 -04003395 }
3396 }
3397 }
3398
3399 buffer += internal.sliceP;
3400 }
3401
3402 unlockInternal();
3403 }
3404 }
3405
Nicolas Capensc39901e2016-03-21 16:37:44 -04003406 void Surface::clearStencil(unsigned char s, unsigned char mask, int x0, int y0, int width, int height)
John Bauman89401822014-05-06 15:04:28 -04003407 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003408 if(mask == 0 || width == 0 || height == 0) return;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003409
John Bauman89401822014-05-06 15:04:28 -04003410 // Not overlapping
3411 if(x0 > internal.width) return;
3412 if(y0 > internal.height) return;
3413 if(x0 + width < 0) return;
3414 if(y0 + height < 0) return;
3415
3416 // Clip against dimensions
3417 if(x0 < 0) {width += x0; x0 = 0;}
3418 if(x0 + width > internal.width) width = internal.width - x0;
3419 if(y0 < 0) {height += y0; y0 = 0;}
3420 if(y0 + height > internal.height) height = internal.height - y0;
3421
3422 int width2 = (internal.width + 1) & ~1;
3423
3424 int x1 = x0 + width;
3425 int y1 = y0 + height;
3426
Alexis Hetu358a1442015-12-03 14:23:10 -05003427 int oddX0 = (x0 & ~1) * 2 + (x0 & 1);
3428 int oddX1 = (x1 & ~1) * 2;
3429 int evenX0 = ((x0 + 1) & ~1) * 2;
3430 int evenBytes = oddX1 - evenX0;
3431
John Bauman89401822014-05-06 15:04:28 -04003432 unsigned char maskedS = s & mask;
3433 unsigned char invMask = ~mask;
3434 unsigned int fill = maskedS;
Tom Anderson69bc6e82017-03-20 11:54:29 -07003435 fill = fill | (fill << 8) | (fill << 16) | (fill << 24);
John Bauman89401822014-05-06 15:04:28 -04003436
Alexis Hetua52dfbd2016-10-05 17:03:30 -04003437 char *buffer = (char*)lockStencil(0, 0, 0, PUBLIC);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003438
3439 // Stencil buffers are assumed to use quad layout
3440 for(int z = 0; z < stencil.depth; z++)
John Bauman89401822014-05-06 15:04:28 -04003441 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003442 for(int y = y0; y < y1; y++)
John Bauman89401822014-05-06 15:04:28 -04003443 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003444 char *target = buffer + (y & ~1) * width2 + (y & 1) * 2;
3445
3446 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once
John Bauman89401822014-05-06 15:04:28 -04003447 {
Alexis Hetu2b052f82015-11-25 13:57:28 -05003448 if((x0 & 1) != 0)
John Bauman89401822014-05-06 15:04:28 -04003449 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003450 target[oddX0 + 0] = fill;
3451 target[oddX0 + 2] = fill;
John Bauman89401822014-05-06 15:04:28 -04003452 }
3453
Alexis Hetu358a1442015-12-03 14:23:10 -05003454 memfill4(&target[evenX0], fill, evenBytes);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003455
3456 if((x1 & 1) != 0)
3457 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003458 target[oddX1 + 0] = fill;
3459 target[oddX1 + 2] = fill;
Alexis Hetu2b052f82015-11-25 13:57:28 -05003460 }
3461
3462 y++;
3463 }
3464 else
3465 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003466 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1))
Alexis Hetu2b052f82015-11-25 13:57:28 -05003467 {
Alexis Hetu358a1442015-12-03 14:23:10 -05003468 target[i] = maskedS | (target[i] & invMask);
Alexis Hetu2b052f82015-11-25 13:57:28 -05003469 }
John Bauman89401822014-05-06 15:04:28 -04003470 }
3471 }
3472
Alexis Hetu2b052f82015-11-25 13:57:28 -05003473 buffer += stencil.sliceP;
John Bauman89401822014-05-06 15:04:28 -04003474 }
John Bauman89401822014-05-06 15:04:28 -04003475
Alexis Hetu2b052f82015-11-25 13:57:28 -05003476 unlockStencil();
John Bauman89401822014-05-06 15:04:28 -04003477 }
3478
3479 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height)
3480 {
3481 unsigned char *row;
3482 Buffer *buffer;
Nicolas Capensc39901e2016-03-21 16:37:44 -04003483
John Bauman89401822014-05-06 15:04:28 -04003484 if(internal.dirty)
3485 {
3486 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3487 buffer = &internal;
3488 }
3489 else
3490 {
3491 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC);
3492 buffer = &external;
3493 }
3494
3495 if(buffer->bytes <= 4)
3496 {
3497 int c;
3498 buffer->write(&c, color);
3499
3500 if(buffer->bytes <= 1) c = (c << 8) | c;
3501 if(buffer->bytes <= 2) c = (c << 16) | c;
3502
3503 for(int y = 0; y < height; y++)
3504 {
Nicolas Capens5ba566b2015-05-25 17:11:04 -04003505 memfill4(row, c, width * buffer->bytes);
John Bauman89401822014-05-06 15:04:28 -04003506
3507 row += buffer->pitchB;
3508 }
3509 }
3510 else // Generic
3511 {
3512 for(int y = 0; y < height; y++)
3513 {
3514 unsigned char *element = row;
3515
3516 for(int x = 0; x < width; x++)
3517 {
3518 buffer->write(element, color);
3519
3520 element += buffer->bytes;
3521 }
3522
3523 row += buffer->pitchB;
3524 }
3525 }
3526
3527 if(buffer == &internal)
3528 {
3529 unlockInternal();
3530 }
3531 else
3532 {
3533 unlockExternal();
3534 }
3535 }
3536
Alexis Hetu43577b82015-10-21 15:32:16 -04003537 void Surface::copyInternal(const Surface* source, int x, int y, float srcX, float srcY, bool filter)
John Bauman89401822014-05-06 15:04:28 -04003538 {
Alexis Hetu43577b82015-10-21 15:32:16 -04003539 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
John Bauman89401822014-05-06 15:04:28 -04003540
Alexis Hetu43577b82015-10-21 15:32:16 -04003541 sw::Color<float> color;
John Bauman89401822014-05-06 15:04:28 -04003542
Alexis Hetu43577b82015-10-21 15:32:16 -04003543 if(!filter)
3544 {
3545 color = source->internal.read((int)srcX, (int)srcY);
3546 }
3547 else // Bilinear filtering
3548 {
3549 color = source->internal.sample(srcX, srcY);
3550 }
John Bauman89401822014-05-06 15:04:28 -04003551
3552 internal.write(x, y, color);
3553 }
3554
Alexis Hetu43577b82015-10-21 15:32:16 -04003555 void Surface::copyInternal(const Surface* source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter)
3556 {
3557 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED);
3558
3559 sw::Color<float> color;
3560
3561 if(!filter)
3562 {
3563 color = source->internal.read((int)srcX, (int)srcY, int(srcZ));
3564 }
3565 else // Bilinear filtering
3566 {
3567 color = source->internal.sample(srcX, srcY, srcZ);
3568 }
3569
3570 internal.write(x, y, z, color);
3571 }
3572
Alexis Hetua76a1bf2016-11-29 17:17:26 -05003573 void Surface::copyCubeEdge(Edge dstEdge, Surface *src, Edge srcEdge)
3574 {
3575 Surface *dst = this;
3576
3577 // Figure out if the edges to be copied in reverse order respectively from one another
3578 // The copy should be reversed whenever the same edges are contiguous or if we're
3579 // copying top <-> right or bottom <-> left. This is explained by the layout, which is:
3580 //
3581 // | +y |
3582 // | -x | +z | +x | -z |
3583 // | -y |
3584
3585 bool reverse = (srcEdge == dstEdge) ||
3586 ((srcEdge == TOP) && (dstEdge == RIGHT)) ||
3587 ((srcEdge == RIGHT) && (dstEdge == TOP)) ||
3588 ((srcEdge == BOTTOM) && (dstEdge == LEFT)) ||
3589 ((srcEdge == LEFT) && (dstEdge == BOTTOM));
3590
3591 int srcBytes = src->bytes(src->Surface::getInternalFormat());
3592 int srcPitch = src->getInternalPitchB();
3593 int dstBytes = dst->bytes(dst->Surface::getInternalFormat());
3594 int dstPitch = dst->getInternalPitchB();
3595
3596 int srcW = src->getWidth();
3597 int srcH = src->getHeight();
3598 int dstW = dst->getWidth();
3599 int dstH = dst->getHeight();
3600
3601 ASSERT(srcW == srcH && dstW == dstH && srcW == dstW && srcBytes == dstBytes);
3602
3603 // Src is expressed in the regular [0, width-1], [0, height-1] space
3604 int srcDelta = ((srcEdge == TOP) || (srcEdge == BOTTOM)) ? srcBytes : srcPitch;
3605 int srcStart = ((srcEdge == BOTTOM) ? srcPitch * (srcH - 1) : ((srcEdge == RIGHT) ? srcBytes * (srcW - 1) : 0));
3606
3607 // Dst contains borders, so it is expressed in the [-1, width+1], [-1, height+1] space
3608 int dstDelta = (((dstEdge == TOP) || (dstEdge == BOTTOM)) ? dstBytes : dstPitch) * (reverse ? -1 : 1);
3609 int dstStart = ((dstEdge == BOTTOM) ? dstPitch * (dstH + 1) : ((dstEdge == RIGHT) ? dstBytes * (dstW + 1) : 0)) + (reverse ? dstW * -dstDelta : dstDelta);
3610
3611 char *srcBuf = (char*)src->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PRIVATE) + srcStart;
3612 char *dstBuf = (char*)dst->lockInternal(-1, -1, 0, sw::LOCK_READWRITE, sw::PRIVATE) + dstStart;
3613
3614 for(int i = 0; i < srcW; ++i, dstBuf += dstDelta, srcBuf += srcDelta)
3615 {
3616 memcpy(dstBuf, srcBuf, srcBytes);
3617 }
3618
3619 if(dstEdge == LEFT || dstEdge == RIGHT)
3620 {
3621 // TOP and BOTTOM are already set, let's average out the corners
3622 int x0 = (dstEdge == RIGHT) ? dstW : -1;
3623 int y0 = -1;
3624 int x1 = (dstEdge == RIGHT) ? dstW - 1 : 0;
3625 int y1 = 0;
3626 dst->computeCubeCorner(x0, y0, x1, y1);
3627 y0 = dstH;
3628 y1 = dstH - 1;
3629 dst->computeCubeCorner(x0, y0, x1, y1);
3630 }
3631
3632 src->unlockInternal();
3633 dst->unlockInternal();
3634 }
3635
3636 void Surface::computeCubeCorner(int x0, int y0, int x1, int y1)
3637 {
3638 ASSERT(internal.lock != LOCK_UNLOCKED);
3639
3640 sw::Color<float> color = internal.read(x0, y1);
3641 color += internal.read(x1, y0);
3642 color += internal.read(x1, y1);
3643 color *= (1.0f / 3.0f);
3644
3645 internal.write(x0, y0, color);
3646 }
3647
John Bauman89401822014-05-06 15:04:28 -04003648 bool Surface::hasStencil() const
3649 {
3650 return isStencil(external.format);
3651 }
Nicolas Capensc39901e2016-03-21 16:37:44 -04003652
John Bauman89401822014-05-06 15:04:28 -04003653 bool Surface::hasDepth() const
3654 {
3655 return isDepth(external.format);
3656 }
3657
3658 bool Surface::hasPalette() const
3659 {
3660 return isPalette(external.format);
3661 }
3662
3663 bool Surface::isRenderTarget() const
3664 {
3665 return renderTarget;
3666 }
3667
Nicolas Capens73e18c12017-11-28 13:31:35 -05003668 bool Surface::hasDirtyContents() const
John Bauman89401822014-05-06 15:04:28 -04003669 {
Nicolas Capens73e18c12017-11-28 13:31:35 -05003670 return dirtyContents;
John Bauman89401822014-05-06 15:04:28 -04003671 }
3672
Nicolas Capens73e18c12017-11-28 13:31:35 -05003673 void Surface::markContentsClean()
John Bauman89401822014-05-06 15:04:28 -04003674 {
Nicolas Capens73e18c12017-11-28 13:31:35 -05003675 dirtyContents = false;
John Bauman89401822014-05-06 15:04:28 -04003676 }
3677
3678 Resource *Surface::getResource()
3679 {
3680 return resource;
3681 }
3682
3683 bool Surface::identicalFormats() const
3684 {
John Bauman66b8ab22014-05-06 15:57:45 -04003685 return external.format == internal.format &&
3686 external.width == internal.width &&
Nicolas Capens22658242014-11-29 00:31:41 -05003687 external.height == internal.height &&
3688 external.depth == internal.depth &&
3689 external.pitchB == internal.pitchB &&
Alexis Hetu9c6d5222016-11-29 17:02:14 -05003690 external.sliceB == internal.sliceB &&
3691 external.border == internal.border;
John Bauman89401822014-05-06 15:04:28 -04003692 }
3693
3694 Format Surface::selectInternalFormat(Format format) const
3695 {
3696 switch(format)
3697 {
3698 case FORMAT_NULL:
3699 return FORMAT_NULL;
3700 case FORMAT_P8:
3701 case FORMAT_A8P8:
3702 case FORMAT_A4R4G4B4:
3703 case FORMAT_A1R5G5B5:
3704 case FORMAT_A8R3G3B2:
3705 return FORMAT_A8R8G8B8;
3706 case FORMAT_A8:
3707 return FORMAT_A8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003708 case FORMAT_R8I:
3709 return FORMAT_R8I;
3710 case FORMAT_R8UI:
3711 return FORMAT_R8UI;
3712 case FORMAT_R8I_SNORM:
3713 return FORMAT_R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003714 case FORMAT_R8:
3715 return FORMAT_R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003716 case FORMAT_R16I:
3717 return FORMAT_R16I;
3718 case FORMAT_R16UI:
3719 return FORMAT_R16UI;
3720 case FORMAT_R32I:
3721 return FORMAT_R32I;
3722 case FORMAT_R32UI:
3723 return FORMAT_R32UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003724 case FORMAT_X16B16G16R16I:
Nicolas Capense4a88b92017-11-30 00:14:57 -05003725 return FORMAT_X16B16G16R16I;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003726 case FORMAT_A16B16G16R16I:
3727 return FORMAT_A16B16G16R16I;
3728 case FORMAT_X16B16G16R16UI:
Nicolas Capense4a88b92017-11-30 00:14:57 -05003729 return FORMAT_X16B16G16R16UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003730 case FORMAT_A16B16G16R16UI:
3731 return FORMAT_A16B16G16R16UI;
Alexis Hetuf999a002015-12-17 11:09:36 -05003732 case FORMAT_A2R10G10B10:
3733 case FORMAT_A2B10G10R10:
John Bauman89401822014-05-06 15:04:28 -04003734 case FORMAT_A16B16G16R16:
3735 return FORMAT_A16B16G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003736 case FORMAT_X32B32G32R32I:
Nicolas Capense4a88b92017-11-30 00:14:57 -05003737 return FORMAT_X32B32G32R32I;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003738 case FORMAT_A32B32G32R32I:
3739 return FORMAT_A32B32G32R32I;
3740 case FORMAT_X32B32G32R32UI:
Nicolas Capense4a88b92017-11-30 00:14:57 -05003741 return FORMAT_X32B32G32R32UI;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003742 case FORMAT_A32B32G32R32UI:
3743 return FORMAT_A32B32G32R32UI;
3744 case FORMAT_G8R8I:
3745 return FORMAT_G8R8I;
3746 case FORMAT_G8R8UI:
3747 return FORMAT_G8R8UI;
3748 case FORMAT_G8R8I_SNORM:
3749 return FORMAT_G8R8I_SNORM;
John Bauman89401822014-05-06 15:04:28 -04003750 case FORMAT_G8R8:
3751 return FORMAT_G8R8;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003752 case FORMAT_G16R16I:
3753 return FORMAT_G16R16I;
3754 case FORMAT_G16R16UI:
3755 return FORMAT_G16R16UI;
John Bauman89401822014-05-06 15:04:28 -04003756 case FORMAT_G16R16:
3757 return FORMAT_G16R16;
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003758 case FORMAT_G32R32I:
3759 return FORMAT_G32R32I;
3760 case FORMAT_G32R32UI:
3761 return FORMAT_G32R32UI;
John Bauman89401822014-05-06 15:04:28 -04003762 case FORMAT_A8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003763 if(lockable || !quadLayoutEnabled)
3764 {
3765 return FORMAT_A8R8G8B8;
3766 }
3767 else
3768 {
3769 return FORMAT_A8G8R8B8Q;
3770 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003771 case FORMAT_A8B8G8R8I:
3772 return FORMAT_A8B8G8R8I;
3773 case FORMAT_A8B8G8R8UI:
3774 return FORMAT_A8B8G8R8UI;
3775 case FORMAT_A8B8G8R8I_SNORM:
3776 return FORMAT_A8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003777 case FORMAT_R5G5B5A1:
3778 case FORMAT_R4G4B4A4:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003779 case FORMAT_A8B8G8R8:
3780 return FORMAT_A8B8G8R8;
John Bauman89401822014-05-06 15:04:28 -04003781 case FORMAT_R5G6B5:
Nicolas Capens5a86ee92015-09-04 10:45:43 -04003782 return FORMAT_R5G6B5;
3783 case FORMAT_R3G3B2:
John Bauman89401822014-05-06 15:04:28 -04003784 case FORMAT_R8G8B8:
3785 case FORMAT_X4R4G4B4:
3786 case FORMAT_X1R5G5B5:
3787 case FORMAT_X8R8G8B8:
John Bauman89401822014-05-06 15:04:28 -04003788 if(lockable || !quadLayoutEnabled)
3789 {
3790 return FORMAT_X8R8G8B8;
3791 }
3792 else
3793 {
3794 return FORMAT_X8G8R8B8Q;
3795 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003796 case FORMAT_X8B8G8R8I:
3797 return FORMAT_X8B8G8R8I;
3798 case FORMAT_X8B8G8R8UI:
3799 return FORMAT_X8B8G8R8UI;
3800 case FORMAT_X8B8G8R8I_SNORM:
3801 return FORMAT_X8B8G8R8I_SNORM;
Nicolas Capens80594422015-06-09 16:42:56 -04003802 case FORMAT_B8G8R8:
Nicolas Capensef77ac12015-03-28 21:48:51 -04003803 case FORMAT_X8B8G8R8:
3804 return FORMAT_X8B8G8R8;
Alexis Hetu049a1872016-04-25 16:59:58 -04003805 case FORMAT_SRGB8_X8:
3806 return FORMAT_SRGB8_X8;
3807 case FORMAT_SRGB8_A8:
3808 return FORMAT_SRGB8_A8;
John Bauman89401822014-05-06 15:04:28 -04003809 // Compressed formats
3810 #if S3TC_SUPPORT
3811 case FORMAT_DXT1:
3812 case FORMAT_DXT3:
3813 case FORMAT_DXT5:
John Bauman66b8ab22014-05-06 15:57:45 -04003814 #endif
Alexis Hetu460e41f2015-09-01 10:58:37 -04003815 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3816 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
3817 case FORMAT_RGBA8_ETC2_EAC:
3818 case FORMAT_SRGB8_ALPHA8_ETC2_EAC:
3819 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR:
3820 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR:
3821 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR:
3822 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR:
3823 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR:
3824 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR:
3825 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR:
3826 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR:
3827 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR:
3828 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR:
3829 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR:
3830 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR:
3831 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR:
3832 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR:
3833 return FORMAT_A8R8G8B8;
3834 case FORMAT_RGBA_ASTC_4x4_KHR:
3835 case FORMAT_RGBA_ASTC_5x4_KHR:
3836 case FORMAT_RGBA_ASTC_5x5_KHR:
3837 case FORMAT_RGBA_ASTC_6x5_KHR:
3838 case FORMAT_RGBA_ASTC_6x6_KHR:
3839 case FORMAT_RGBA_ASTC_8x5_KHR:
3840 case FORMAT_RGBA_ASTC_8x6_KHR:
3841 case FORMAT_RGBA_ASTC_8x8_KHR:
3842 case FORMAT_RGBA_ASTC_10x5_KHR:
3843 case FORMAT_RGBA_ASTC_10x6_KHR:
3844 case FORMAT_RGBA_ASTC_10x8_KHR:
3845 case FORMAT_RGBA_ASTC_10x10_KHR:
3846 case FORMAT_RGBA_ASTC_12x10_KHR:
3847 case FORMAT_RGBA_ASTC_12x12_KHR:
3848 // ASTC supports HDR, so a floating point format is required to represent it properly
3849 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported
John Bauman89401822014-05-06 15:04:28 -04003850 case FORMAT_ATI1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003851 case FORMAT_R11_EAC:
John Bauman89401822014-05-06 15:04:28 -04003852 return FORMAT_R8;
Alexis Hetu0de50d42015-09-09 13:56:41 -04003853 case FORMAT_SIGNED_R11_EAC:
3854 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient
John Bauman89401822014-05-06 15:04:28 -04003855 case FORMAT_ATI2:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003856 case FORMAT_RG11_EAC:
John Bauman89401822014-05-06 15:04:28 -04003857 return FORMAT_G8R8;
Alexis Hetu0de50d42015-09-09 13:56:41 -04003858 case FORMAT_SIGNED_RG11_EAC:
3859 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient
Nicolas Capens22658242014-11-29 00:31:41 -05003860 case FORMAT_ETC1:
Alexis Hetu460e41f2015-09-01 10:58:37 -04003861 case FORMAT_RGB8_ETC2:
3862 case FORMAT_SRGB8_ETC2:
Nicolas Capens22658242014-11-29 00:31:41 -05003863 return FORMAT_X8R8G8B8;
John Bauman89401822014-05-06 15:04:28 -04003864 // Bumpmap formats
3865 case FORMAT_V8U8: return FORMAT_V8U8;
3866 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8;
3867 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8;
3868 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8;
3869 case FORMAT_V16U16: return FORMAT_V16U16;
3870 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16;
3871 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16;
3872 // Floating-point formats
Nicolas Capens80594422015-06-09 16:42:56 -04003873 case FORMAT_A16F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003874 case FORMAT_R16F: return FORMAT_R32F;
3875 case FORMAT_G16R16F: return FORMAT_G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003876 case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003877 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003878 case FORMAT_A32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003879 case FORMAT_R32F: return FORMAT_R32F;
3880 case FORMAT_G32R32F: return FORMAT_G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003881 case FORMAT_B32G32R32F: return FORMAT_X32B32G32R32F;
3882 case FORMAT_X32B32G32R32F: return FORMAT_X32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003883 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F;
3884 // Luminance formats
3885 case FORMAT_L8: return FORMAT_L8;
3886 case FORMAT_A4L4: return FORMAT_A8L8;
3887 case FORMAT_L16: return FORMAT_L16;
3888 case FORMAT_A8L8: return FORMAT_A8L8;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003889 case FORMAT_L16F: return FORMAT_X32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003890 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04003891 case FORMAT_L32F: return FORMAT_X32B32G32R32F;
Nicolas Capens80594422015-06-09 16:42:56 -04003892 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F;
John Bauman89401822014-05-06 15:04:28 -04003893 // Depth/stencil formats
3894 case FORMAT_D16:
3895 case FORMAT_D32:
3896 case FORMAT_D24X8:
John Bauman89401822014-05-06 15:04:28 -04003897 if(hasParent) // Texture
3898 {
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003899 return FORMAT_D32F_SHADOW;
John Bauman89401822014-05-06 15:04:28 -04003900 }
3901 else if(complementaryDepthBuffer)
3902 {
3903 return FORMAT_D32F_COMPLEMENTARY;
3904 }
3905 else
3906 {
3907 return FORMAT_D32F;
3908 }
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003909 case FORMAT_D24S8:
3910 case FORMAT_D24FS8:
3911 if(hasParent) // Texture
3912 {
3913 return FORMAT_D32FS8_SHADOW;
3914 }
3915 else if(complementaryDepthBuffer)
3916 {
3917 return FORMAT_D32FS8_COMPLEMENTARY;
3918 }
3919 else
3920 {
3921 return FORMAT_D32FS8;
3922 }
Alexis Hetud3a2d3d2015-10-22 10:57:58 -04003923 case FORMAT_D32F: return FORMAT_D32F;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003924 case FORMAT_D32FS8: return FORMAT_D32FS8;
John Bauman66b8ab22014-05-06 15:57:45 -04003925 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE;
3926 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE;
3927 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE;
3928 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW;
3929 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW;
Nicolas Capens57e7cea2017-12-13 22:25:04 -05003930 case FORMAT_S8: return FORMAT_S8;
3931 // YUV formats
Nicolas Capens8e8a7e82015-09-01 14:39:57 -04003932 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601;
3933 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709;
3934 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF;
John Bauman89401822014-05-06 15:04:28 -04003935 default:
3936 ASSERT(false);
3937 }
3938
3939 return FORMAT_NULL;
3940 }
3941
3942 void Surface::setTexturePalette(unsigned int *palette)
3943 {
3944 Surface::palette = palette;
3945 Surface::paletteID++;
3946 }
3947
3948 void Surface::resolve()
3949 {
3950 if(internal.depth <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL)
3951 {
3952 return;
3953 }
3954
3955 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE);
3956
John Bauman89401822014-05-06 15:04:28 -04003957 int width = internal.width;
3958 int height = internal.height;
3959 int pitch = internal.pitchB;
3960 int slice = internal.sliceB;
3961
3962 unsigned char *source0 = (unsigned char*)source;
3963 unsigned char *source1 = source0 + slice;
3964 unsigned char *source2 = source1 + slice;
3965 unsigned char *source3 = source2 + slice;
3966 unsigned char *source4 = source3 + slice;
3967 unsigned char *source5 = source4 + slice;
3968 unsigned char *source6 = source5 + slice;
3969 unsigned char *source7 = source6 + slice;
3970 unsigned char *source8 = source7 + slice;
3971 unsigned char *source9 = source8 + slice;
3972 unsigned char *sourceA = source9 + slice;
3973 unsigned char *sourceB = sourceA + slice;
3974 unsigned char *sourceC = sourceB + slice;
3975 unsigned char *sourceD = sourceC + slice;
3976 unsigned char *sourceE = sourceD + slice;
3977 unsigned char *sourceF = sourceE + slice;
3978
Alexis Hetu049a1872016-04-25 16:59:58 -04003979 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 ||
3980 internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8 ||
3981 internal.format == FORMAT_SRGB8_X8 || internal.format == FORMAT_SRGB8_A8)
John Bauman89401822014-05-06 15:04:28 -04003982 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003983 #if defined(__i386__) || defined(__x86_64__)
3984 if(CPUID::supportsSSE2() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04003985 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003986 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04003987 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003988 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04003989 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04003990 for(int x = 0; x < width; x += 4)
3991 {
3992 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
3993 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04003994
Nicolas Capens47dc8672017-04-25 12:54:39 -04003995 c0 = _mm_avg_epu8(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04003996
Nicolas Capens47dc8672017-04-25 12:54:39 -04003997 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
3998 }
3999
4000 source0 += pitch;
4001 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004002 }
John Bauman89401822014-05-06 15:04:28 -04004003 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004004 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04004005 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004006 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004007 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004008 for(int x = 0; x < width; x += 4)
4009 {
4010 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4011 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4012 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4013 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004014
Nicolas Capens47dc8672017-04-25 12:54:39 -04004015 c0 = _mm_avg_epu8(c0, c1);
4016 c2 = _mm_avg_epu8(c2, c3);
4017 c0 = _mm_avg_epu8(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04004018
Nicolas Capens47dc8672017-04-25 12:54:39 -04004019 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4020 }
4021
4022 source0 += pitch;
4023 source1 += pitch;
4024 source2 += pitch;
4025 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004026 }
John Bauman89401822014-05-06 15:04:28 -04004027 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004028 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04004029 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004030 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004031 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004032 for(int x = 0; x < width; x += 4)
4033 {
4034 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4035 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4036 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4037 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4038 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4039 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4040 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4041 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004042
Nicolas Capens47dc8672017-04-25 12:54:39 -04004043 c0 = _mm_avg_epu8(c0, c1);
4044 c2 = _mm_avg_epu8(c2, c3);
4045 c4 = _mm_avg_epu8(c4, c5);
4046 c6 = _mm_avg_epu8(c6, c7);
4047 c0 = _mm_avg_epu8(c0, c2);
4048 c4 = _mm_avg_epu8(c4, c6);
4049 c0 = _mm_avg_epu8(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04004050
Nicolas Capens47dc8672017-04-25 12:54:39 -04004051 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4052 }
4053
4054 source0 += pitch;
4055 source1 += pitch;
4056 source2 += pitch;
4057 source3 += pitch;
4058 source4 += pitch;
4059 source5 += pitch;
4060 source6 += pitch;
4061 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004062 }
John Bauman89401822014-05-06 15:04:28 -04004063 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004064 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04004065 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004066 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004067 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004068 for(int x = 0; x < width; x += 4)
4069 {
4070 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4071 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4072 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4073 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4074 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4075 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4076 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4077 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4078 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
4079 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
4080 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
4081 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
4082 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
4083 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
4084 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
4085 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04004086
Nicolas Capens47dc8672017-04-25 12:54:39 -04004087 c0 = _mm_avg_epu8(c0, c1);
4088 c2 = _mm_avg_epu8(c2, c3);
4089 c4 = _mm_avg_epu8(c4, c5);
4090 c6 = _mm_avg_epu8(c6, c7);
4091 c8 = _mm_avg_epu8(c8, c9);
4092 cA = _mm_avg_epu8(cA, cB);
4093 cC = _mm_avg_epu8(cC, cD);
4094 cE = _mm_avg_epu8(cE, cF);
4095 c0 = _mm_avg_epu8(c0, c2);
4096 c4 = _mm_avg_epu8(c4, c6);
4097 c8 = _mm_avg_epu8(c8, cA);
4098 cC = _mm_avg_epu8(cC, cE);
4099 c0 = _mm_avg_epu8(c0, c4);
4100 c8 = _mm_avg_epu8(c8, cC);
4101 c0 = _mm_avg_epu8(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04004102
Nicolas Capens47dc8672017-04-25 12:54:39 -04004103 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4104 }
4105
4106 source0 += pitch;
4107 source1 += pitch;
4108 source2 += pitch;
4109 source3 += pitch;
4110 source4 += pitch;
4111 source5 += pitch;
4112 source6 += pitch;
4113 source7 += pitch;
4114 source8 += pitch;
4115 source9 += pitch;
4116 sourceA += pitch;
4117 sourceB += pitch;
4118 sourceC += pitch;
4119 sourceD += pitch;
4120 sourceE += pitch;
4121 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004122 }
John Bauman89401822014-05-06 15:04:28 -04004123 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004124 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004125 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004126 else
4127 #endif
John Bauman89401822014-05-06 15:04:28 -04004128 {
4129 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101))
4130
4131 if(internal.depth == 2)
4132 {
4133 for(int y = 0; y < height; y++)
4134 {
4135 for(int x = 0; x < width; x++)
4136 {
4137 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4138 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4139
4140 c0 = AVERAGE(c0, c1);
4141
4142 *(unsigned int*)(source0 + 4 * x) = c0;
4143 }
4144
4145 source0 += pitch;
4146 source1 += pitch;
4147 }
4148 }
4149 else if(internal.depth == 4)
4150 {
4151 for(int y = 0; y < height; y++)
4152 {
4153 for(int x = 0; x < width; x++)
4154 {
4155 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4156 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4157 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4158 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4159
4160 c0 = AVERAGE(c0, c1);
4161 c2 = AVERAGE(c2, c3);
4162 c0 = AVERAGE(c0, c2);
4163
4164 *(unsigned int*)(source0 + 4 * x) = c0;
4165 }
4166
4167 source0 += pitch;
4168 source1 += pitch;
4169 source2 += pitch;
4170 source3 += pitch;
4171 }
4172 }
4173 else if(internal.depth == 8)
4174 {
4175 for(int y = 0; y < height; y++)
4176 {
4177 for(int x = 0; x < width; x++)
4178 {
4179 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4180 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4181 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4182 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4183 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4184 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4185 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4186 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4187
4188 c0 = AVERAGE(c0, c1);
4189 c2 = AVERAGE(c2, c3);
4190 c4 = AVERAGE(c4, c5);
4191 c6 = AVERAGE(c6, c7);
4192 c0 = AVERAGE(c0, c2);
4193 c4 = AVERAGE(c4, c6);
4194 c0 = AVERAGE(c0, c4);
4195
4196 *(unsigned int*)(source0 + 4 * x) = c0;
4197 }
4198
4199 source0 += pitch;
4200 source1 += pitch;
4201 source2 += pitch;
4202 source3 += pitch;
4203 source4 += pitch;
4204 source5 += pitch;
4205 source6 += pitch;
4206 source7 += pitch;
4207 }
4208 }
4209 else if(internal.depth == 16)
4210 {
4211 for(int y = 0; y < height; y++)
4212 {
4213 for(int x = 0; x < width; x++)
4214 {
4215 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4216 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4217 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4218 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4219 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4220 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4221 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4222 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4223 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4224 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4225 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4226 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4227 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4228 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4229 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4230 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4231
4232 c0 = AVERAGE(c0, c1);
4233 c2 = AVERAGE(c2, c3);
4234 c4 = AVERAGE(c4, c5);
4235 c6 = AVERAGE(c6, c7);
4236 c8 = AVERAGE(c8, c9);
4237 cA = AVERAGE(cA, cB);
4238 cC = AVERAGE(cC, cD);
4239 cE = AVERAGE(cE, cF);
4240 c0 = AVERAGE(c0, c2);
4241 c4 = AVERAGE(c4, c6);
4242 c8 = AVERAGE(c8, cA);
4243 cC = AVERAGE(cC, cE);
4244 c0 = AVERAGE(c0, c4);
4245 c8 = AVERAGE(c8, cC);
4246 c0 = AVERAGE(c0, c8);
4247
4248 *(unsigned int*)(source0 + 4 * x) = c0;
4249 }
4250
4251 source0 += pitch;
4252 source1 += pitch;
4253 source2 += pitch;
4254 source3 += pitch;
4255 source4 += pitch;
4256 source5 += pitch;
4257 source6 += pitch;
4258 source7 += pitch;
4259 source8 += pitch;
4260 source9 += pitch;
4261 sourceA += pitch;
4262 sourceB += pitch;
4263 sourceC += pitch;
4264 sourceD += pitch;
4265 sourceE += pitch;
4266 sourceF += pitch;
4267 }
4268 }
4269 else ASSERT(false);
4270
4271 #undef AVERAGE
4272 }
4273 }
4274 else if(internal.format == FORMAT_G16R16)
4275 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004276
4277 #if defined(__i386__) || defined(__x86_64__)
4278 if(CPUID::supportsSSE2() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04004279 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004280 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04004281 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004282 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004283 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004284 for(int x = 0; x < width; x += 4)
4285 {
4286 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4287 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004288
Nicolas Capens47dc8672017-04-25 12:54:39 -04004289 c0 = _mm_avg_epu16(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04004290
Nicolas Capens47dc8672017-04-25 12:54:39 -04004291 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4292 }
4293
4294 source0 += pitch;
4295 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004296 }
John Bauman89401822014-05-06 15:04:28 -04004297 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004298 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04004299 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004300 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004301 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004302 for(int x = 0; x < width; x += 4)
4303 {
4304 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4305 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4306 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4307 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004308
Nicolas Capens47dc8672017-04-25 12:54:39 -04004309 c0 = _mm_avg_epu16(c0, c1);
4310 c2 = _mm_avg_epu16(c2, c3);
4311 c0 = _mm_avg_epu16(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04004312
Nicolas Capens47dc8672017-04-25 12:54:39 -04004313 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4314 }
4315
4316 source0 += pitch;
4317 source1 += pitch;
4318 source2 += pitch;
4319 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004320 }
John Bauman89401822014-05-06 15:04:28 -04004321 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004322 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04004323 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004324 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004325 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004326 for(int x = 0; x < width; x += 4)
4327 {
4328 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4329 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4330 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4331 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4332 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4333 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4334 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4335 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004336
Nicolas Capens47dc8672017-04-25 12:54:39 -04004337 c0 = _mm_avg_epu16(c0, c1);
4338 c2 = _mm_avg_epu16(c2, c3);
4339 c4 = _mm_avg_epu16(c4, c5);
4340 c6 = _mm_avg_epu16(c6, c7);
4341 c0 = _mm_avg_epu16(c0, c2);
4342 c4 = _mm_avg_epu16(c4, c6);
4343 c0 = _mm_avg_epu16(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04004344
Nicolas Capens47dc8672017-04-25 12:54:39 -04004345 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4346 }
4347
4348 source0 += pitch;
4349 source1 += pitch;
4350 source2 += pitch;
4351 source3 += pitch;
4352 source4 += pitch;
4353 source5 += pitch;
4354 source6 += pitch;
4355 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004356 }
John Bauman89401822014-05-06 15:04:28 -04004357 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004358 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04004359 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004360 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004361 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004362 for(int x = 0; x < width; x += 4)
4363 {
4364 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x));
4365 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x));
4366 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x));
4367 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x));
4368 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x));
4369 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x));
4370 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x));
4371 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x));
4372 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x));
4373 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x));
4374 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x));
4375 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x));
4376 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x));
4377 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x));
4378 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x));
4379 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04004380
Nicolas Capens47dc8672017-04-25 12:54:39 -04004381 c0 = _mm_avg_epu16(c0, c1);
4382 c2 = _mm_avg_epu16(c2, c3);
4383 c4 = _mm_avg_epu16(c4, c5);
4384 c6 = _mm_avg_epu16(c6, c7);
4385 c8 = _mm_avg_epu16(c8, c9);
4386 cA = _mm_avg_epu16(cA, cB);
4387 cC = _mm_avg_epu16(cC, cD);
4388 cE = _mm_avg_epu16(cE, cF);
4389 c0 = _mm_avg_epu16(c0, c2);
4390 c4 = _mm_avg_epu16(c4, c6);
4391 c8 = _mm_avg_epu16(c8, cA);
4392 cC = _mm_avg_epu16(cC, cE);
4393 c0 = _mm_avg_epu16(c0, c4);
4394 c8 = _mm_avg_epu16(c8, cC);
4395 c0 = _mm_avg_epu16(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04004396
Nicolas Capens47dc8672017-04-25 12:54:39 -04004397 _mm_store_si128((__m128i*)(source0 + 4 * x), c0);
4398 }
4399
4400 source0 += pitch;
4401 source1 += pitch;
4402 source2 += pitch;
4403 source3 += pitch;
4404 source4 += pitch;
4405 source5 += pitch;
4406 source6 += pitch;
4407 source7 += pitch;
4408 source8 += pitch;
4409 source9 += pitch;
4410 sourceA += pitch;
4411 sourceB += pitch;
4412 sourceC += pitch;
4413 sourceD += pitch;
4414 sourceE += pitch;
4415 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004416 }
John Bauman89401822014-05-06 15:04:28 -04004417 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004418 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004419 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004420 else
4421 #endif
John Bauman89401822014-05-06 15:04:28 -04004422 {
4423 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4424
4425 if(internal.depth == 2)
4426 {
4427 for(int y = 0; y < height; y++)
4428 {
4429 for(int x = 0; x < width; x++)
4430 {
4431 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4432 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4433
4434 c0 = AVERAGE(c0, c1);
4435
4436 *(unsigned int*)(source0 + 4 * x) = c0;
4437 }
4438
4439 source0 += pitch;
4440 source1 += pitch;
4441 }
4442 }
4443 else if(internal.depth == 4)
4444 {
4445 for(int y = 0; y < height; y++)
4446 {
4447 for(int x = 0; x < width; x++)
4448 {
4449 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4450 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4451 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4452 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4453
4454 c0 = AVERAGE(c0, c1);
4455 c2 = AVERAGE(c2, c3);
4456 c0 = AVERAGE(c0, c2);
4457
4458 *(unsigned int*)(source0 + 4 * x) = c0;
4459 }
4460
4461 source0 += pitch;
4462 source1 += pitch;
4463 source2 += pitch;
4464 source3 += pitch;
4465 }
4466 }
4467 else if(internal.depth == 8)
4468 {
4469 for(int y = 0; y < height; y++)
4470 {
4471 for(int x = 0; x < width; x++)
4472 {
4473 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4474 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4475 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4476 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4477 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4478 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4479 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4480 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4481
4482 c0 = AVERAGE(c0, c1);
4483 c2 = AVERAGE(c2, c3);
4484 c4 = AVERAGE(c4, c5);
4485 c6 = AVERAGE(c6, c7);
4486 c0 = AVERAGE(c0, c2);
4487 c4 = AVERAGE(c4, c6);
4488 c0 = AVERAGE(c0, c4);
4489
4490 *(unsigned int*)(source0 + 4 * x) = c0;
4491 }
4492
4493 source0 += pitch;
4494 source1 += pitch;
4495 source2 += pitch;
4496 source3 += pitch;
4497 source4 += pitch;
4498 source5 += pitch;
4499 source6 += pitch;
4500 source7 += pitch;
4501 }
4502 }
4503 else if(internal.depth == 16)
4504 {
4505 for(int y = 0; y < height; y++)
4506 {
4507 for(int x = 0; x < width; x++)
4508 {
4509 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4510 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4511 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4512 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4513 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4514 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4515 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4516 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4517 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4518 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4519 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4520 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4521 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4522 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4523 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4524 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4525
4526 c0 = AVERAGE(c0, c1);
4527 c2 = AVERAGE(c2, c3);
4528 c4 = AVERAGE(c4, c5);
4529 c6 = AVERAGE(c6, c7);
4530 c8 = AVERAGE(c8, c9);
4531 cA = AVERAGE(cA, cB);
4532 cC = AVERAGE(cC, cD);
4533 cE = AVERAGE(cE, cF);
4534 c0 = AVERAGE(c0, c2);
4535 c4 = AVERAGE(c4, c6);
4536 c8 = AVERAGE(c8, cA);
4537 cC = AVERAGE(cC, cE);
4538 c0 = AVERAGE(c0, c4);
4539 c8 = AVERAGE(c8, cC);
4540 c0 = AVERAGE(c0, c8);
4541
4542 *(unsigned int*)(source0 + 4 * x) = c0;
4543 }
4544
4545 source0 += pitch;
4546 source1 += pitch;
4547 source2 += pitch;
4548 source3 += pitch;
4549 source4 += pitch;
4550 source5 += pitch;
4551 source6 += pitch;
4552 source7 += pitch;
4553 source8 += pitch;
4554 source9 += pitch;
4555 sourceA += pitch;
4556 sourceB += pitch;
4557 sourceC += pitch;
4558 sourceD += pitch;
4559 sourceE += pitch;
4560 sourceF += pitch;
4561 }
4562 }
4563 else ASSERT(false);
4564
4565 #undef AVERAGE
4566 }
4567 }
4568 else if(internal.format == FORMAT_A16B16G16R16)
4569 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004570 #if defined(__i386__) || defined(__x86_64__)
4571 if(CPUID::supportsSSE2() && (width % 2) == 0)
John Bauman89401822014-05-06 15:04:28 -04004572 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004573 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04004574 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004575 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004576 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004577 for(int x = 0; x < width; x += 2)
4578 {
4579 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4580 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004581
Nicolas Capens47dc8672017-04-25 12:54:39 -04004582 c0 = _mm_avg_epu16(c0, c1);
John Bauman89401822014-05-06 15:04:28 -04004583
Nicolas Capens47dc8672017-04-25 12:54:39 -04004584 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4585 }
4586
4587 source0 += pitch;
4588 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004589 }
John Bauman89401822014-05-06 15:04:28 -04004590 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004591 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04004592 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004593 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004594 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004595 for(int x = 0; x < width; x += 2)
4596 {
4597 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4598 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4599 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4600 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004601
Nicolas Capens47dc8672017-04-25 12:54:39 -04004602 c0 = _mm_avg_epu16(c0, c1);
4603 c2 = _mm_avg_epu16(c2, c3);
4604 c0 = _mm_avg_epu16(c0, c2);
John Bauman89401822014-05-06 15:04:28 -04004605
Nicolas Capens47dc8672017-04-25 12:54:39 -04004606 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4607 }
4608
4609 source0 += pitch;
4610 source1 += pitch;
4611 source2 += pitch;
4612 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004613 }
John Bauman89401822014-05-06 15:04:28 -04004614 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004615 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04004616 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004617 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004618 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004619 for(int x = 0; x < width; x += 2)
4620 {
4621 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4622 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4623 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4624 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4625 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4626 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4627 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4628 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004629
Nicolas Capens47dc8672017-04-25 12:54:39 -04004630 c0 = _mm_avg_epu16(c0, c1);
4631 c2 = _mm_avg_epu16(c2, c3);
4632 c4 = _mm_avg_epu16(c4, c5);
4633 c6 = _mm_avg_epu16(c6, c7);
4634 c0 = _mm_avg_epu16(c0, c2);
4635 c4 = _mm_avg_epu16(c4, c6);
4636 c0 = _mm_avg_epu16(c0, c4);
John Bauman89401822014-05-06 15:04:28 -04004637
Nicolas Capens47dc8672017-04-25 12:54:39 -04004638 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4639 }
4640
4641 source0 += pitch;
4642 source1 += pitch;
4643 source2 += pitch;
4644 source3 += pitch;
4645 source4 += pitch;
4646 source5 += pitch;
4647 source6 += pitch;
4648 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004649 }
John Bauman89401822014-05-06 15:04:28 -04004650 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004651 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04004652 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004653 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004654 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004655 for(int x = 0; x < width; x += 2)
4656 {
4657 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x));
4658 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x));
4659 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x));
4660 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x));
4661 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x));
4662 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x));
4663 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x));
4664 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x));
4665 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x));
4666 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x));
4667 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x));
4668 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x));
4669 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x));
4670 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x));
4671 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x));
4672 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x));
John Bauman89401822014-05-06 15:04:28 -04004673
Nicolas Capens47dc8672017-04-25 12:54:39 -04004674 c0 = _mm_avg_epu16(c0, c1);
4675 c2 = _mm_avg_epu16(c2, c3);
4676 c4 = _mm_avg_epu16(c4, c5);
4677 c6 = _mm_avg_epu16(c6, c7);
4678 c8 = _mm_avg_epu16(c8, c9);
4679 cA = _mm_avg_epu16(cA, cB);
4680 cC = _mm_avg_epu16(cC, cD);
4681 cE = _mm_avg_epu16(cE, cF);
4682 c0 = _mm_avg_epu16(c0, c2);
4683 c4 = _mm_avg_epu16(c4, c6);
4684 c8 = _mm_avg_epu16(c8, cA);
4685 cC = _mm_avg_epu16(cC, cE);
4686 c0 = _mm_avg_epu16(c0, c4);
4687 c8 = _mm_avg_epu16(c8, cC);
4688 c0 = _mm_avg_epu16(c0, c8);
John Bauman89401822014-05-06 15:04:28 -04004689
Nicolas Capens47dc8672017-04-25 12:54:39 -04004690 _mm_store_si128((__m128i*)(source0 + 8 * x), c0);
4691 }
4692
4693 source0 += pitch;
4694 source1 += pitch;
4695 source2 += pitch;
4696 source3 += pitch;
4697 source4 += pitch;
4698 source5 += pitch;
4699 source6 += pitch;
4700 source7 += pitch;
4701 source8 += pitch;
4702 source9 += pitch;
4703 sourceA += pitch;
4704 sourceB += pitch;
4705 sourceC += pitch;
4706 sourceD += pitch;
4707 sourceE += pitch;
4708 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04004709 }
John Bauman89401822014-05-06 15:04:28 -04004710 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004711 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04004712 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004713 else
4714 #endif
John Bauman89401822014-05-06 15:04:28 -04004715 {
4716 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001))
4717
4718 if(internal.depth == 2)
4719 {
4720 for(int y = 0; y < height; y++)
4721 {
4722 for(int x = 0; x < 2 * width; x++)
4723 {
4724 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4725 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4726
4727 c0 = AVERAGE(c0, c1);
4728
4729 *(unsigned int*)(source0 + 4 * x) = c0;
4730 }
4731
4732 source0 += pitch;
4733 source1 += pitch;
4734 }
4735 }
4736 else if(internal.depth == 4)
4737 {
4738 for(int y = 0; y < height; y++)
4739 {
4740 for(int x = 0; x < 2 * width; x++)
4741 {
4742 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4743 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4744 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4745 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4746
4747 c0 = AVERAGE(c0, c1);
4748 c2 = AVERAGE(c2, c3);
4749 c0 = AVERAGE(c0, c2);
4750
4751 *(unsigned int*)(source0 + 4 * x) = c0;
4752 }
4753
4754 source0 += pitch;
4755 source1 += pitch;
4756 source2 += pitch;
4757 source3 += pitch;
4758 }
4759 }
4760 else if(internal.depth == 8)
4761 {
4762 for(int y = 0; y < height; y++)
4763 {
4764 for(int x = 0; x < 2 * width; x++)
4765 {
4766 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4767 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4768 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4769 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4770 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4771 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4772 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4773 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4774
4775 c0 = AVERAGE(c0, c1);
4776 c2 = AVERAGE(c2, c3);
4777 c4 = AVERAGE(c4, c5);
4778 c6 = AVERAGE(c6, c7);
4779 c0 = AVERAGE(c0, c2);
4780 c4 = AVERAGE(c4, c6);
4781 c0 = AVERAGE(c0, c4);
4782
4783 *(unsigned int*)(source0 + 4 * x) = c0;
4784 }
4785
4786 source0 += pitch;
4787 source1 += pitch;
4788 source2 += pitch;
4789 source3 += pitch;
4790 source4 += pitch;
4791 source5 += pitch;
4792 source6 += pitch;
4793 source7 += pitch;
4794 }
4795 }
4796 else if(internal.depth == 16)
4797 {
4798 for(int y = 0; y < height; y++)
4799 {
4800 for(int x = 0; x < 2 * width; x++)
4801 {
4802 unsigned int c0 = *(unsigned int*)(source0 + 4 * x);
4803 unsigned int c1 = *(unsigned int*)(source1 + 4 * x);
4804 unsigned int c2 = *(unsigned int*)(source2 + 4 * x);
4805 unsigned int c3 = *(unsigned int*)(source3 + 4 * x);
4806 unsigned int c4 = *(unsigned int*)(source4 + 4 * x);
4807 unsigned int c5 = *(unsigned int*)(source5 + 4 * x);
4808 unsigned int c6 = *(unsigned int*)(source6 + 4 * x);
4809 unsigned int c7 = *(unsigned int*)(source7 + 4 * x);
4810 unsigned int c8 = *(unsigned int*)(source8 + 4 * x);
4811 unsigned int c9 = *(unsigned int*)(source9 + 4 * x);
4812 unsigned int cA = *(unsigned int*)(sourceA + 4 * x);
4813 unsigned int cB = *(unsigned int*)(sourceB + 4 * x);
4814 unsigned int cC = *(unsigned int*)(sourceC + 4 * x);
4815 unsigned int cD = *(unsigned int*)(sourceD + 4 * x);
4816 unsigned int cE = *(unsigned int*)(sourceE + 4 * x);
4817 unsigned int cF = *(unsigned int*)(sourceF + 4 * x);
4818
4819 c0 = AVERAGE(c0, c1);
4820 c2 = AVERAGE(c2, c3);
4821 c4 = AVERAGE(c4, c5);
4822 c6 = AVERAGE(c6, c7);
4823 c8 = AVERAGE(c8, c9);
4824 cA = AVERAGE(cA, cB);
4825 cC = AVERAGE(cC, cD);
4826 cE = AVERAGE(cE, cF);
4827 c0 = AVERAGE(c0, c2);
4828 c4 = AVERAGE(c4, c6);
4829 c8 = AVERAGE(c8, cA);
4830 cC = AVERAGE(cC, cE);
4831 c0 = AVERAGE(c0, c4);
4832 c8 = AVERAGE(c8, cC);
4833 c0 = AVERAGE(c0, c8);
4834
4835 *(unsigned int*)(source0 + 4 * x) = c0;
4836 }
4837
4838 source0 += pitch;
4839 source1 += pitch;
4840 source2 += pitch;
4841 source3 += pitch;
4842 source4 += pitch;
4843 source5 += pitch;
4844 source6 += pitch;
4845 source7 += pitch;
4846 source8 += pitch;
4847 source9 += pitch;
4848 sourceA += pitch;
4849 sourceB += pitch;
4850 sourceC += pitch;
4851 sourceD += pitch;
4852 sourceE += pitch;
4853 sourceF += pitch;
4854 }
4855 }
4856 else ASSERT(false);
4857
4858 #undef AVERAGE
4859 }
4860 }
4861 else if(internal.format == FORMAT_R32F)
4862 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004863 #if defined(__i386__) || defined(__x86_64__)
4864 if(CPUID::supportsSSE() && (width % 4) == 0)
John Bauman89401822014-05-06 15:04:28 -04004865 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004866 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04004867 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004868 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004869 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004870 for(int x = 0; x < width; x += 4)
4871 {
4872 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4873 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004874
Nicolas Capens47dc8672017-04-25 12:54:39 -04004875 c0 = _mm_add_ps(c0, c1);
4876 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04004877
Nicolas Capens47dc8672017-04-25 12:54:39 -04004878 _mm_store_ps((float*)(source0 + 4 * x), c0);
4879 }
4880
4881 source0 += pitch;
4882 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004883 }
John Bauman89401822014-05-06 15:04:28 -04004884 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004885 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04004886 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004887 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004888 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004889 for(int x = 0; x < width; x += 4)
4890 {
4891 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4892 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4893 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4894 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004895
Nicolas Capens47dc8672017-04-25 12:54:39 -04004896 c0 = _mm_add_ps(c0, c1);
4897 c2 = _mm_add_ps(c2, c3);
4898 c0 = _mm_add_ps(c0, c2);
4899 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04004900
Nicolas Capens47dc8672017-04-25 12:54:39 -04004901 _mm_store_ps((float*)(source0 + 4 * x), c0);
4902 }
4903
4904 source0 += pitch;
4905 source1 += pitch;
4906 source2 += pitch;
4907 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004908 }
John Bauman89401822014-05-06 15:04:28 -04004909 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004910 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04004911 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004912 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004913 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004914 for(int x = 0; x < width; x += 4)
4915 {
4916 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4917 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4918 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4919 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4920 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4921 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4922 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4923 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04004924
Nicolas Capens47dc8672017-04-25 12:54:39 -04004925 c0 = _mm_add_ps(c0, c1);
4926 c2 = _mm_add_ps(c2, c3);
4927 c4 = _mm_add_ps(c4, c5);
4928 c6 = _mm_add_ps(c6, c7);
4929 c0 = _mm_add_ps(c0, c2);
4930 c4 = _mm_add_ps(c4, c6);
4931 c0 = _mm_add_ps(c0, c4);
4932 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04004933
Nicolas Capens47dc8672017-04-25 12:54:39 -04004934 _mm_store_ps((float*)(source0 + 4 * x), c0);
4935 }
4936
4937 source0 += pitch;
4938 source1 += pitch;
4939 source2 += pitch;
4940 source3 += pitch;
4941 source4 += pitch;
4942 source5 += pitch;
4943 source6 += pitch;
4944 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04004945 }
John Bauman89401822014-05-06 15:04:28 -04004946 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04004947 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04004948 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004949 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04004950 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04004951 for(int x = 0; x < width; x += 4)
4952 {
4953 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x));
4954 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x));
4955 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x));
4956 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x));
4957 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x));
4958 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x));
4959 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x));
4960 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x));
4961 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x));
4962 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x));
4963 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x));
4964 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x));
4965 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x));
4966 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x));
4967 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x));
4968 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x));
John Bauman89401822014-05-06 15:04:28 -04004969
Nicolas Capens47dc8672017-04-25 12:54:39 -04004970 c0 = _mm_add_ps(c0, c1);
4971 c2 = _mm_add_ps(c2, c3);
4972 c4 = _mm_add_ps(c4, c5);
4973 c6 = _mm_add_ps(c6, c7);
4974 c8 = _mm_add_ps(c8, c9);
4975 cA = _mm_add_ps(cA, cB);
4976 cC = _mm_add_ps(cC, cD);
4977 cE = _mm_add_ps(cE, cF);
4978 c0 = _mm_add_ps(c0, c2);
4979 c4 = _mm_add_ps(c4, c6);
4980 c8 = _mm_add_ps(c8, cA);
4981 cC = _mm_add_ps(cC, cE);
4982 c0 = _mm_add_ps(c0, c4);
4983 c8 = _mm_add_ps(c8, cC);
4984 c0 = _mm_add_ps(c0, c8);
4985 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04004986
Nicolas Capens47dc8672017-04-25 12:54:39 -04004987 _mm_store_ps((float*)(source0 + 4 * x), c0);
4988 }
4989
4990 source0 += pitch;
4991 source1 += pitch;
4992 source2 += pitch;
4993 source3 += pitch;
4994 source4 += pitch;
4995 source5 += pitch;
4996 source6 += pitch;
4997 source7 += pitch;
4998 source8 += pitch;
4999 source9 += pitch;
5000 sourceA += pitch;
5001 sourceB += pitch;
5002 sourceC += pitch;
5003 sourceD += pitch;
5004 sourceE += pitch;
5005 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04005006 }
John Bauman89401822014-05-06 15:04:28 -04005007 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005008 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04005009 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005010 else
5011 #endif
John Bauman89401822014-05-06 15:04:28 -04005012 {
5013 if(internal.depth == 2)
5014 {
5015 for(int y = 0; y < height; y++)
5016 {
5017 for(int x = 0; x < width; x++)
5018 {
5019 float c0 = *(float*)(source0 + 4 * x);
5020 float c1 = *(float*)(source1 + 4 * x);
5021
5022 c0 = c0 + c1;
5023 c0 *= 1.0f / 2.0f;
5024
5025 *(float*)(source0 + 4 * x) = c0;
5026 }
5027
5028 source0 += pitch;
5029 source1 += pitch;
5030 }
5031 }
5032 else if(internal.depth == 4)
5033 {
5034 for(int y = 0; y < height; y++)
5035 {
5036 for(int x = 0; x < width; x++)
5037 {
5038 float c0 = *(float*)(source0 + 4 * x);
5039 float c1 = *(float*)(source1 + 4 * x);
5040 float c2 = *(float*)(source2 + 4 * x);
5041 float c3 = *(float*)(source3 + 4 * x);
5042
5043 c0 = c0 + c1;
5044 c2 = c2 + c3;
5045 c0 = c0 + c2;
5046 c0 *= 1.0f / 4.0f;
5047
5048 *(float*)(source0 + 4 * x) = c0;
5049 }
5050
5051 source0 += pitch;
5052 source1 += pitch;
5053 source2 += pitch;
5054 source3 += pitch;
5055 }
5056 }
5057 else if(internal.depth == 8)
5058 {
5059 for(int y = 0; y < height; y++)
5060 {
5061 for(int x = 0; x < width; x++)
5062 {
5063 float c0 = *(float*)(source0 + 4 * x);
5064 float c1 = *(float*)(source1 + 4 * x);
5065 float c2 = *(float*)(source2 + 4 * x);
5066 float c3 = *(float*)(source3 + 4 * x);
5067 float c4 = *(float*)(source4 + 4 * x);
5068 float c5 = *(float*)(source5 + 4 * x);
5069 float c6 = *(float*)(source6 + 4 * x);
5070 float c7 = *(float*)(source7 + 4 * x);
5071
5072 c0 = c0 + c1;
5073 c2 = c2 + c3;
5074 c4 = c4 + c5;
5075 c6 = c6 + c7;
5076 c0 = c0 + c2;
5077 c4 = c4 + c6;
5078 c0 = c0 + c4;
5079 c0 *= 1.0f / 8.0f;
5080
5081 *(float*)(source0 + 4 * x) = c0;
5082 }
5083
5084 source0 += pitch;
5085 source1 += pitch;
5086 source2 += pitch;
5087 source3 += pitch;
5088 source4 += pitch;
5089 source5 += pitch;
5090 source6 += pitch;
5091 source7 += pitch;
5092 }
5093 }
5094 else if(internal.depth == 16)
5095 {
5096 for(int y = 0; y < height; y++)
5097 {
5098 for(int x = 0; x < width; x++)
5099 {
5100 float c0 = *(float*)(source0 + 4 * x);
5101 float c1 = *(float*)(source1 + 4 * x);
5102 float c2 = *(float*)(source2 + 4 * x);
5103 float c3 = *(float*)(source3 + 4 * x);
5104 float c4 = *(float*)(source4 + 4 * x);
5105 float c5 = *(float*)(source5 + 4 * x);
5106 float c6 = *(float*)(source6 + 4 * x);
5107 float c7 = *(float*)(source7 + 4 * x);
5108 float c8 = *(float*)(source8 + 4 * x);
5109 float c9 = *(float*)(source9 + 4 * x);
5110 float cA = *(float*)(sourceA + 4 * x);
5111 float cB = *(float*)(sourceB + 4 * x);
5112 float cC = *(float*)(sourceC + 4 * x);
5113 float cD = *(float*)(sourceD + 4 * x);
5114 float cE = *(float*)(sourceE + 4 * x);
5115 float cF = *(float*)(sourceF + 4 * x);
5116
5117 c0 = c0 + c1;
5118 c2 = c2 + c3;
5119 c4 = c4 + c5;
5120 c6 = c6 + c7;
5121 c8 = c8 + c9;
5122 cA = cA + cB;
5123 cC = cC + cD;
5124 cE = cE + cF;
5125 c0 = c0 + c2;
5126 c4 = c4 + c6;
5127 c8 = c8 + cA;
5128 cC = cC + cE;
5129 c0 = c0 + c4;
5130 c8 = c8 + cC;
5131 c0 = c0 + c8;
5132 c0 *= 1.0f / 16.0f;
5133
5134 *(float*)(source0 + 4 * x) = c0;
5135 }
5136
5137 source0 += pitch;
5138 source1 += pitch;
5139 source2 += pitch;
5140 source3 += pitch;
5141 source4 += pitch;
5142 source5 += pitch;
5143 source6 += pitch;
5144 source7 += pitch;
5145 source8 += pitch;
5146 source9 += pitch;
5147 sourceA += pitch;
5148 sourceB += pitch;
5149 sourceC += pitch;
5150 sourceD += pitch;
5151 sourceE += pitch;
5152 sourceF += pitch;
5153 }
5154 }
5155 else ASSERT(false);
5156 }
5157 }
5158 else if(internal.format == FORMAT_G32R32F)
5159 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005160 #if defined(__i386__) || defined(__x86_64__)
5161 if(CPUID::supportsSSE() && (width % 2) == 0)
John Bauman89401822014-05-06 15:04:28 -04005162 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005163 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04005164 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005165 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005166 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005167 for(int x = 0; x < width; x += 2)
5168 {
5169 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5170 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005171
Nicolas Capens47dc8672017-04-25 12:54:39 -04005172 c0 = _mm_add_ps(c0, c1);
5173 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04005174
Nicolas Capens47dc8672017-04-25 12:54:39 -04005175 _mm_store_ps((float*)(source0 + 8 * x), c0);
5176 }
5177
5178 source0 += pitch;
5179 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005180 }
John Bauman89401822014-05-06 15:04:28 -04005181 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005182 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04005183 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005184 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005185 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005186 for(int x = 0; x < width; x += 2)
5187 {
5188 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5189 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5190 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5191 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005192
Nicolas Capens47dc8672017-04-25 12:54:39 -04005193 c0 = _mm_add_ps(c0, c1);
5194 c2 = _mm_add_ps(c2, c3);
5195 c0 = _mm_add_ps(c0, c2);
5196 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04005197
Nicolas Capens47dc8672017-04-25 12:54:39 -04005198 _mm_store_ps((float*)(source0 + 8 * x), c0);
5199 }
5200
5201 source0 += pitch;
5202 source1 += pitch;
5203 source2 += pitch;
5204 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005205 }
John Bauman89401822014-05-06 15:04:28 -04005206 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005207 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04005208 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005209 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005210 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005211 for(int x = 0; x < width; x += 2)
5212 {
5213 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5214 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5215 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5216 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5217 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
5218 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
5219 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
5220 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005221
Nicolas Capens47dc8672017-04-25 12:54:39 -04005222 c0 = _mm_add_ps(c0, c1);
5223 c2 = _mm_add_ps(c2, c3);
5224 c4 = _mm_add_ps(c4, c5);
5225 c6 = _mm_add_ps(c6, c7);
5226 c0 = _mm_add_ps(c0, c2);
5227 c4 = _mm_add_ps(c4, c6);
5228 c0 = _mm_add_ps(c0, c4);
5229 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04005230
Nicolas Capens47dc8672017-04-25 12:54:39 -04005231 _mm_store_ps((float*)(source0 + 8 * x), c0);
5232 }
5233
5234 source0 += pitch;
5235 source1 += pitch;
5236 source2 += pitch;
5237 source3 += pitch;
5238 source4 += pitch;
5239 source5 += pitch;
5240 source6 += pitch;
5241 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005242 }
John Bauman89401822014-05-06 15:04:28 -04005243 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005244 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04005245 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005246 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005247 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005248 for(int x = 0; x < width; x += 2)
5249 {
5250 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x));
5251 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x));
5252 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x));
5253 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x));
5254 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x));
5255 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x));
5256 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x));
5257 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x));
5258 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x));
5259 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x));
5260 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x));
5261 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x));
5262 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x));
5263 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x));
5264 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x));
5265 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x));
John Bauman89401822014-05-06 15:04:28 -04005266
Nicolas Capens47dc8672017-04-25 12:54:39 -04005267 c0 = _mm_add_ps(c0, c1);
5268 c2 = _mm_add_ps(c2, c3);
5269 c4 = _mm_add_ps(c4, c5);
5270 c6 = _mm_add_ps(c6, c7);
5271 c8 = _mm_add_ps(c8, c9);
5272 cA = _mm_add_ps(cA, cB);
5273 cC = _mm_add_ps(cC, cD);
5274 cE = _mm_add_ps(cE, cF);
5275 c0 = _mm_add_ps(c0, c2);
5276 c4 = _mm_add_ps(c4, c6);
5277 c8 = _mm_add_ps(c8, cA);
5278 cC = _mm_add_ps(cC, cE);
5279 c0 = _mm_add_ps(c0, c4);
5280 c8 = _mm_add_ps(c8, cC);
5281 c0 = _mm_add_ps(c0, c8);
5282 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005283
Nicolas Capens47dc8672017-04-25 12:54:39 -04005284 _mm_store_ps((float*)(source0 + 8 * x), c0);
5285 }
5286
5287 source0 += pitch;
5288 source1 += pitch;
5289 source2 += pitch;
5290 source3 += pitch;
5291 source4 += pitch;
5292 source5 += pitch;
5293 source6 += pitch;
5294 source7 += pitch;
5295 source8 += pitch;
5296 source9 += pitch;
5297 sourceA += pitch;
5298 sourceB += pitch;
5299 sourceC += pitch;
5300 sourceD += pitch;
5301 sourceE += pitch;
5302 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04005303 }
John Bauman89401822014-05-06 15:04:28 -04005304 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005305 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04005306 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005307 else
5308 #endif
John Bauman89401822014-05-06 15:04:28 -04005309 {
5310 if(internal.depth == 2)
5311 {
5312 for(int y = 0; y < height; y++)
5313 {
5314 for(int x = 0; x < 2 * width; x++)
5315 {
5316 float c0 = *(float*)(source0 + 4 * x);
5317 float c1 = *(float*)(source1 + 4 * x);
5318
5319 c0 = c0 + c1;
5320 c0 *= 1.0f / 2.0f;
5321
5322 *(float*)(source0 + 4 * x) = c0;
5323 }
5324
5325 source0 += pitch;
5326 source1 += pitch;
5327 }
5328 }
5329 else if(internal.depth == 4)
5330 {
5331 for(int y = 0; y < height; y++)
5332 {
5333 for(int x = 0; x < 2 * width; x++)
5334 {
5335 float c0 = *(float*)(source0 + 4 * x);
5336 float c1 = *(float*)(source1 + 4 * x);
5337 float c2 = *(float*)(source2 + 4 * x);
5338 float c3 = *(float*)(source3 + 4 * x);
5339
5340 c0 = c0 + c1;
5341 c2 = c2 + c3;
5342 c0 = c0 + c2;
5343 c0 *= 1.0f / 4.0f;
5344
5345 *(float*)(source0 + 4 * x) = c0;
5346 }
5347
5348 source0 += pitch;
5349 source1 += pitch;
5350 source2 += pitch;
5351 source3 += pitch;
5352 }
5353 }
5354 else if(internal.depth == 8)
5355 {
5356 for(int y = 0; y < height; y++)
5357 {
5358 for(int x = 0; x < 2 * width; x++)
5359 {
5360 float c0 = *(float*)(source0 + 4 * x);
5361 float c1 = *(float*)(source1 + 4 * x);
5362 float c2 = *(float*)(source2 + 4 * x);
5363 float c3 = *(float*)(source3 + 4 * x);
5364 float c4 = *(float*)(source4 + 4 * x);
5365 float c5 = *(float*)(source5 + 4 * x);
5366 float c6 = *(float*)(source6 + 4 * x);
5367 float c7 = *(float*)(source7 + 4 * x);
5368
5369 c0 = c0 + c1;
5370 c2 = c2 + c3;
5371 c4 = c4 + c5;
5372 c6 = c6 + c7;
5373 c0 = c0 + c2;
5374 c4 = c4 + c6;
5375 c0 = c0 + c4;
5376 c0 *= 1.0f / 8.0f;
5377
5378 *(float*)(source0 + 4 * x) = c0;
5379 }
5380
5381 source0 += pitch;
5382 source1 += pitch;
5383 source2 += pitch;
5384 source3 += pitch;
5385 source4 += pitch;
5386 source5 += pitch;
5387 source6 += pitch;
5388 source7 += pitch;
5389 }
5390 }
5391 else if(internal.depth == 16)
5392 {
5393 for(int y = 0; y < height; y++)
5394 {
5395 for(int x = 0; x < 2 * width; x++)
5396 {
5397 float c0 = *(float*)(source0 + 4 * x);
5398 float c1 = *(float*)(source1 + 4 * x);
5399 float c2 = *(float*)(source2 + 4 * x);
5400 float c3 = *(float*)(source3 + 4 * x);
5401 float c4 = *(float*)(source4 + 4 * x);
5402 float c5 = *(float*)(source5 + 4 * x);
5403 float c6 = *(float*)(source6 + 4 * x);
5404 float c7 = *(float*)(source7 + 4 * x);
5405 float c8 = *(float*)(source8 + 4 * x);
5406 float c9 = *(float*)(source9 + 4 * x);
5407 float cA = *(float*)(sourceA + 4 * x);
5408 float cB = *(float*)(sourceB + 4 * x);
5409 float cC = *(float*)(sourceC + 4 * x);
5410 float cD = *(float*)(sourceD + 4 * x);
5411 float cE = *(float*)(sourceE + 4 * x);
5412 float cF = *(float*)(sourceF + 4 * x);
5413
5414 c0 = c0 + c1;
5415 c2 = c2 + c3;
5416 c4 = c4 + c5;
5417 c6 = c6 + c7;
5418 c8 = c8 + c9;
5419 cA = cA + cB;
5420 cC = cC + cD;
5421 cE = cE + cF;
5422 c0 = c0 + c2;
5423 c4 = c4 + c6;
5424 c8 = c8 + cA;
5425 cC = cC + cE;
5426 c0 = c0 + c4;
5427 c8 = c8 + cC;
5428 c0 = c0 + c8;
5429 c0 *= 1.0f / 16.0f;
5430
5431 *(float*)(source0 + 4 * x) = c0;
5432 }
5433
5434 source0 += pitch;
5435 source1 += pitch;
5436 source2 += pitch;
5437 source3 += pitch;
5438 source4 += pitch;
5439 source5 += pitch;
5440 source6 += pitch;
5441 source7 += pitch;
5442 source8 += pitch;
5443 source9 += pitch;
5444 sourceA += pitch;
5445 sourceB += pitch;
5446 sourceC += pitch;
5447 sourceD += pitch;
5448 sourceE += pitch;
5449 sourceF += pitch;
5450 }
5451 }
5452 else ASSERT(false);
5453 }
5454 }
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04005455 else if(internal.format == FORMAT_A32B32G32R32F || internal.format == FORMAT_X32B32G32R32F)
John Bauman89401822014-05-06 15:04:28 -04005456 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005457 #if defined(__i386__) || defined(__x86_64__)
5458 if(CPUID::supportsSSE())
John Bauman89401822014-05-06 15:04:28 -04005459 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005460 if(internal.depth == 2)
John Bauman89401822014-05-06 15:04:28 -04005461 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005462 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005463 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005464 for(int x = 0; x < width; x++)
5465 {
5466 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5467 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005468
Nicolas Capens47dc8672017-04-25 12:54:39 -04005469 c0 = _mm_add_ps(c0, c1);
5470 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f));
John Bauman89401822014-05-06 15:04:28 -04005471
Nicolas Capens47dc8672017-04-25 12:54:39 -04005472 _mm_store_ps((float*)(source0 + 16 * x), c0);
5473 }
5474
5475 source0 += pitch;
5476 source1 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005477 }
John Bauman89401822014-05-06 15:04:28 -04005478 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005479 else if(internal.depth == 4)
John Bauman89401822014-05-06 15:04:28 -04005480 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005481 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005482 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005483 for(int x = 0; x < width; x++)
5484 {
5485 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5486 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5487 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5488 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005489
Nicolas Capens47dc8672017-04-25 12:54:39 -04005490 c0 = _mm_add_ps(c0, c1);
5491 c2 = _mm_add_ps(c2, c3);
5492 c0 = _mm_add_ps(c0, c2);
5493 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f));
John Bauman89401822014-05-06 15:04:28 -04005494
Nicolas Capens47dc8672017-04-25 12:54:39 -04005495 _mm_store_ps((float*)(source0 + 16 * x), c0);
5496 }
5497
5498 source0 += pitch;
5499 source1 += pitch;
5500 source2 += pitch;
5501 source3 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005502 }
John Bauman89401822014-05-06 15:04:28 -04005503 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005504 else if(internal.depth == 8)
John Bauman89401822014-05-06 15:04:28 -04005505 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005506 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005507 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005508 for(int x = 0; x < width; x++)
5509 {
5510 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5511 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5512 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5513 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5514 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5515 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5516 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5517 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005518
Nicolas Capens47dc8672017-04-25 12:54:39 -04005519 c0 = _mm_add_ps(c0, c1);
5520 c2 = _mm_add_ps(c2, c3);
5521 c4 = _mm_add_ps(c4, c5);
5522 c6 = _mm_add_ps(c6, c7);
5523 c0 = _mm_add_ps(c0, c2);
5524 c4 = _mm_add_ps(c4, c6);
5525 c0 = _mm_add_ps(c0, c4);
5526 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f));
John Bauman89401822014-05-06 15:04:28 -04005527
Nicolas Capens47dc8672017-04-25 12:54:39 -04005528 _mm_store_ps((float*)(source0 + 16 * x), c0);
5529 }
5530
5531 source0 += pitch;
5532 source1 += pitch;
5533 source2 += pitch;
5534 source3 += pitch;
5535 source4 += pitch;
5536 source5 += pitch;
5537 source6 += pitch;
5538 source7 += pitch;
John Bauman89401822014-05-06 15:04:28 -04005539 }
John Bauman89401822014-05-06 15:04:28 -04005540 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005541 else if(internal.depth == 16)
John Bauman89401822014-05-06 15:04:28 -04005542 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005543 for(int y = 0; y < height; y++)
John Bauman89401822014-05-06 15:04:28 -04005544 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005545 for(int x = 0; x < width; x++)
5546 {
5547 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x));
5548 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x));
5549 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x));
5550 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x));
5551 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x));
5552 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x));
5553 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x));
5554 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x));
5555 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x));
5556 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x));
5557 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x));
5558 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x));
5559 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x));
5560 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x));
5561 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x));
5562 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x));
John Bauman89401822014-05-06 15:04:28 -04005563
Nicolas Capens47dc8672017-04-25 12:54:39 -04005564 c0 = _mm_add_ps(c0, c1);
5565 c2 = _mm_add_ps(c2, c3);
5566 c4 = _mm_add_ps(c4, c5);
5567 c6 = _mm_add_ps(c6, c7);
5568 c8 = _mm_add_ps(c8, c9);
5569 cA = _mm_add_ps(cA, cB);
5570 cC = _mm_add_ps(cC, cD);
5571 cE = _mm_add_ps(cE, cF);
5572 c0 = _mm_add_ps(c0, c2);
5573 c4 = _mm_add_ps(c4, c6);
5574 c8 = _mm_add_ps(c8, cA);
5575 cC = _mm_add_ps(cC, cE);
5576 c0 = _mm_add_ps(c0, c4);
5577 c8 = _mm_add_ps(c8, cC);
5578 c0 = _mm_add_ps(c0, c8);
5579 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f));
John Bauman89401822014-05-06 15:04:28 -04005580
Nicolas Capens47dc8672017-04-25 12:54:39 -04005581 _mm_store_ps((float*)(source0 + 16 * x), c0);
5582 }
5583
5584 source0 += pitch;
5585 source1 += pitch;
5586 source2 += pitch;
5587 source3 += pitch;
5588 source4 += pitch;
5589 source5 += pitch;
5590 source6 += pitch;
5591 source7 += pitch;
5592 source8 += pitch;
5593 source9 += pitch;
5594 sourceA += pitch;
5595 sourceB += pitch;
5596 sourceC += pitch;
5597 sourceD += pitch;
5598 sourceE += pitch;
5599 sourceF += pitch;
John Bauman89401822014-05-06 15:04:28 -04005600 }
John Bauman89401822014-05-06 15:04:28 -04005601 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005602 else ASSERT(false);
John Bauman89401822014-05-06 15:04:28 -04005603 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005604 else
5605 #endif
John Bauman89401822014-05-06 15:04:28 -04005606 {
5607 if(internal.depth == 2)
5608 {
5609 for(int y = 0; y < height; y++)
5610 {
5611 for(int x = 0; x < 4 * width; x++)
5612 {
5613 float c0 = *(float*)(source0 + 4 * x);
5614 float c1 = *(float*)(source1 + 4 * x);
5615
5616 c0 = c0 + c1;
5617 c0 *= 1.0f / 2.0f;
5618
5619 *(float*)(source0 + 4 * x) = c0;
5620 }
5621
5622 source0 += pitch;
5623 source1 += pitch;
5624 }
5625 }
5626 else if(internal.depth == 4)
5627 {
5628 for(int y = 0; y < height; y++)
5629 {
5630 for(int x = 0; x < 4 * width; x++)
5631 {
5632 float c0 = *(float*)(source0 + 4 * x);
5633 float c1 = *(float*)(source1 + 4 * x);
5634 float c2 = *(float*)(source2 + 4 * x);
5635 float c3 = *(float*)(source3 + 4 * x);
5636
5637 c0 = c0 + c1;
5638 c2 = c2 + c3;
5639 c0 = c0 + c2;
5640 c0 *= 1.0f / 4.0f;
5641
5642 *(float*)(source0 + 4 * x) = c0;
5643 }
5644
5645 source0 += pitch;
5646 source1 += pitch;
5647 source2 += pitch;
5648 source3 += pitch;
5649 }
5650 }
5651 else if(internal.depth == 8)
5652 {
5653 for(int y = 0; y < height; y++)
5654 {
5655 for(int x = 0; x < 4 * width; x++)
5656 {
5657 float c0 = *(float*)(source0 + 4 * x);
5658 float c1 = *(float*)(source1 + 4 * x);
5659 float c2 = *(float*)(source2 + 4 * x);
5660 float c3 = *(float*)(source3 + 4 * x);
5661 float c4 = *(float*)(source4 + 4 * x);
5662 float c5 = *(float*)(source5 + 4 * x);
5663 float c6 = *(float*)(source6 + 4 * x);
5664 float c7 = *(float*)(source7 + 4 * x);
5665
5666 c0 = c0 + c1;
5667 c2 = c2 + c3;
5668 c4 = c4 + c5;
5669 c6 = c6 + c7;
5670 c0 = c0 + c2;
5671 c4 = c4 + c6;
5672 c0 = c0 + c4;
5673 c0 *= 1.0f / 8.0f;
5674
5675 *(float*)(source0 + 4 * x) = c0;
5676 }
5677
5678 source0 += pitch;
5679 source1 += pitch;
5680 source2 += pitch;
5681 source3 += pitch;
5682 source4 += pitch;
5683 source5 += pitch;
5684 source6 += pitch;
5685 source7 += pitch;
5686 }
5687 }
5688 else if(internal.depth == 16)
5689 {
5690 for(int y = 0; y < height; y++)
5691 {
5692 for(int x = 0; x < 4 * width; x++)
5693 {
5694 float c0 = *(float*)(source0 + 4 * x);
5695 float c1 = *(float*)(source1 + 4 * x);
5696 float c2 = *(float*)(source2 + 4 * x);
5697 float c3 = *(float*)(source3 + 4 * x);
5698 float c4 = *(float*)(source4 + 4 * x);
5699 float c5 = *(float*)(source5 + 4 * x);
5700 float c6 = *(float*)(source6 + 4 * x);
5701 float c7 = *(float*)(source7 + 4 * x);
5702 float c8 = *(float*)(source8 + 4 * x);
5703 float c9 = *(float*)(source9 + 4 * x);
5704 float cA = *(float*)(sourceA + 4 * x);
5705 float cB = *(float*)(sourceB + 4 * x);
5706 float cC = *(float*)(sourceC + 4 * x);
5707 float cD = *(float*)(sourceD + 4 * x);
5708 float cE = *(float*)(sourceE + 4 * x);
5709 float cF = *(float*)(sourceF + 4 * x);
5710
5711 c0 = c0 + c1;
5712 c2 = c2 + c3;
5713 c4 = c4 + c5;
5714 c6 = c6 + c7;
5715 c8 = c8 + c9;
5716 cA = cA + cB;
5717 cC = cC + cD;
5718 cE = cE + cF;
5719 c0 = c0 + c2;
5720 c4 = c4 + c6;
5721 c8 = c8 + cA;
5722 cC = cC + cE;
5723 c0 = c0 + c4;
5724 c8 = c8 + cC;
5725 c0 = c0 + c8;
5726 c0 *= 1.0f / 16.0f;
5727
5728 *(float*)(source0 + 4 * x) = c0;
5729 }
5730
5731 source0 += pitch;
5732 source1 += pitch;
5733 source2 += pitch;
5734 source3 += pitch;
5735 source4 += pitch;
5736 source5 += pitch;
5737 source6 += pitch;
5738 source7 += pitch;
5739 source8 += pitch;
5740 source9 += pitch;
5741 sourceA += pitch;
5742 sourceB += pitch;
5743 sourceC += pitch;
5744 sourceD += pitch;
5745 sourceE += pitch;
5746 sourceF += pitch;
5747 }
5748 }
5749 else ASSERT(false);
5750 }
5751 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005752 else if(internal.format == FORMAT_R5G6B5)
5753 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005754 #if defined(__i386__) || defined(__x86_64__)
5755 if(CPUID::supportsSSE2() && (width % 8) == 0)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005756 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005757 if(internal.depth == 2)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005758 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005759 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005760 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005761 for(int x = 0; x < width; x += 8)
5762 {
5763 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5764 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005765
Nicolas Capens47dc8672017-04-25 12:54:39 -04005766 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5767 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5768 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5769 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5770 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5771 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005772
Nicolas Capens47dc8672017-04-25 12:54:39 -04005773 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5774 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5775 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5776 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5777 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005778
Nicolas Capens47dc8672017-04-25 12:54:39 -04005779 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5780 }
5781
5782 source0 += pitch;
5783 source1 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005784 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005785 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005786 else if(internal.depth == 4)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005787 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005788 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005789 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005790 for(int x = 0; x < width; x += 8)
5791 {
5792 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5793 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5794 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5795 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005796
Nicolas Capens47dc8672017-04-25 12:54:39 -04005797 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5798 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5799 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5800 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5801 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5802 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5803 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5804 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5805 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5806 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005807
Nicolas Capens47dc8672017-04-25 12:54:39 -04005808 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5809 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5810 c0 = _mm_avg_epu8(c0, c2);
5811 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5812 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5813 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5814 c1 = _mm_avg_epu16(c1, c3);
5815 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5816 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005817
Nicolas Capens47dc8672017-04-25 12:54:39 -04005818 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5819 }
5820
5821 source0 += pitch;
5822 source1 += pitch;
5823 source2 += pitch;
5824 source3 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005825 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005826 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005827 else if(internal.depth == 8)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005828 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005829 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005830 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005831 for(int x = 0; x < width; x += 8)
5832 {
5833 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5834 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5835 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5836 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5837 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5838 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5839 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5840 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
Nicolas Capensc39901e2016-03-21 16:37:44 -04005841
Nicolas Capens47dc8672017-04-25 12:54:39 -04005842 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5843 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5844 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5845 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5846 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5847 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5848 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5849 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5850 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5851 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5852 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5853 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5854 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5855 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5856 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5857 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5858 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5859 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005860
Nicolas Capens47dc8672017-04-25 12:54:39 -04005861 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5862 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5863 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5864 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5865 c0 = _mm_avg_epu8(c0, c2);
5866 c4 = _mm_avg_epu8(c4, c6);
5867 c0 = _mm_avg_epu8(c0, c4);
5868 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5869 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5870 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5871 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5872 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5873 c1 = _mm_avg_epu16(c1, c3);
5874 c5 = _mm_avg_epu16(c5, c7);
5875 c1 = _mm_avg_epu16(c1, c5);
5876 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5877 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005878
Nicolas Capens47dc8672017-04-25 12:54:39 -04005879 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5880 }
5881
5882 source0 += pitch;
5883 source1 += pitch;
5884 source2 += pitch;
5885 source3 += pitch;
5886 source4 += pitch;
5887 source5 += pitch;
5888 source6 += pitch;
5889 source7 += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04005890 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04005891 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04005892 else if(internal.depth == 16)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005893 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005894 for(int y = 0; y < height; y++)
Nicolas Capens0e12a922015-09-04 09:18:15 -04005895 {
Nicolas Capens47dc8672017-04-25 12:54:39 -04005896 for(int x = 0; x < width; x += 8)
5897 {
5898 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x));
5899 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x));
5900 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x));
5901 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x));
5902 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x));
5903 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x));
5904 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x));
5905 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x));
5906 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x));
5907 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x));
5908 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x));
5909 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x));
5910 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x));
5911 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x));
5912 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x));
5913 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005914
Nicolas Capens47dc8672017-04-25 12:54:39 -04005915 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F};
5916 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0};
5917 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5918 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_));
5919 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b));
5920 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5921 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b));
5922 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_));
5923 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b));
5924 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_));
5925 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b));
5926 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_));
5927 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b));
5928 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_));
5929 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b));
5930 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_));
5931 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b));
5932 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_));
5933 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b));
5934 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_));
5935 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b));
5936 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_));
5937 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b));
5938 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_));
5939 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b));
5940 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_));
5941 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b));
5942 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_));
5943 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b));
5944 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_));
5945 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b));
5946 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_));
5947 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b));
5948 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_));
Nicolas Capens0e12a922015-09-04 09:18:15 -04005949
Nicolas Capens47dc8672017-04-25 12:54:39 -04005950 c0 = _mm_avg_epu8(c0_r_b, c1_r_b);
5951 c2 = _mm_avg_epu8(c2_r_b, c3_r_b);
5952 c4 = _mm_avg_epu8(c4_r_b, c5_r_b);
5953 c6 = _mm_avg_epu8(c6_r_b, c7_r_b);
5954 c8 = _mm_avg_epu8(c8_r_b, c9_r_b);
5955 cA = _mm_avg_epu8(cA_r_b, cB_r_b);
5956 cC = _mm_avg_epu8(cC_r_b, cD_r_b);
5957 cE = _mm_avg_epu8(cE_r_b, cF_r_b);
5958 c0 = _mm_avg_epu8(c0, c2);
5959 c4 = _mm_avg_epu8(c4, c6);
5960 c8 = _mm_avg_epu8(c8, cA);
5961 cC = _mm_avg_epu8(cC, cE);
5962 c0 = _mm_avg_epu8(c0, c4);
5963 c8 = _mm_avg_epu8(c8, cC);
5964 c0 = _mm_avg_epu8(c0, c8);
5965 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b));
5966 c1 = _mm_avg_epu16(c0__g_, c1__g_);
5967 c3 = _mm_avg_epu16(c2__g_, c3__g_);
5968 c5 = _mm_avg_epu16(c4__g_, c5__g_);
5969 c7 = _mm_avg_epu16(c6__g_, c7__g_);
5970 c9 = _mm_avg_epu16(c8__g_, c9__g_);
5971 cB = _mm_avg_epu16(cA__g_, cB__g_);
5972 cD = _mm_avg_epu16(cC__g_, cD__g_);
5973 cF = _mm_avg_epu16(cE__g_, cF__g_);
5974 c1 = _mm_avg_epu8(c1, c3);
5975 c5 = _mm_avg_epu8(c5, c7);
5976 c9 = _mm_avg_epu8(c9, cB);
5977 cD = _mm_avg_epu8(cD, cF);
5978 c1 = _mm_avg_epu8(c1, c5);
5979 c9 = _mm_avg_epu8(c9, cD);
5980 c1 = _mm_avg_epu8(c1, c9);
5981 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_));
5982 c0 = _mm_or_si128(c0, c1);
Nicolas Capens0e12a922015-09-04 09:18:15 -04005983
Nicolas Capens47dc8672017-04-25 12:54:39 -04005984 _mm_store_si128((__m128i*)(source0 + 2 * x), c0);
5985 }
5986
5987 source0 += pitch;
5988 source1 += pitch;
5989 source2 += pitch;
5990 source3 += pitch;
5991 source4 += pitch;
5992 source5 += pitch;
5993 source6 += pitch;
5994 source7 += pitch;
5995 source8 += pitch;
5996 source9 += pitch;
5997 sourceA += pitch;
5998 sourceB += pitch;
5999 sourceC += pitch;
6000 sourceD += pitch;
6001 sourceE += pitch;
6002 sourceF += pitch;
Nicolas Capens0e12a922015-09-04 09:18:15 -04006003 }
Nicolas Capens0e12a922015-09-04 09:18:15 -04006004 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04006005 else ASSERT(false);
Nicolas Capens0e12a922015-09-04 09:18:15 -04006006 }
Nicolas Capens47dc8672017-04-25 12:54:39 -04006007 else
6008 #endif
Nicolas Capens0e12a922015-09-04 09:18:15 -04006009 {
6010 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821))
6011
6012 if(internal.depth == 2)
6013 {
6014 for(int y = 0; y < height; y++)
6015 {
6016 for(int x = 0; x < width; x++)
6017 {
6018 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6019 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6020
6021 c0 = AVERAGE(c0, c1);
6022
6023 *(unsigned short*)(source0 + 2 * x) = c0;
6024 }
6025
6026 source0 += pitch;
6027 source1 += pitch;
6028 }
6029 }
6030 else if(internal.depth == 4)
6031 {
6032 for(int y = 0; y < height; y++)
6033 {
6034 for(int x = 0; x < width; x++)
6035 {
6036 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6037 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6038 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
6039 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
6040
6041 c0 = AVERAGE(c0, c1);
6042 c2 = AVERAGE(c2, c3);
6043 c0 = AVERAGE(c0, c2);
6044
6045 *(unsigned short*)(source0 + 2 * x) = c0;
6046 }
6047
6048 source0 += pitch;
6049 source1 += pitch;
6050 source2 += pitch;
6051 source3 += pitch;
6052 }
6053 }
6054 else if(internal.depth == 8)
6055 {
6056 for(int y = 0; y < height; y++)
6057 {
6058 for(int x = 0; x < width; x++)
6059 {
6060 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6061 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6062 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
6063 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
6064 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
6065 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
6066 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
6067 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
6068
6069 c0 = AVERAGE(c0, c1);
6070 c2 = AVERAGE(c2, c3);
6071 c4 = AVERAGE(c4, c5);
6072 c6 = AVERAGE(c6, c7);
6073 c0 = AVERAGE(c0, c2);
6074 c4 = AVERAGE(c4, c6);
6075 c0 = AVERAGE(c0, c4);
6076
6077 *(unsigned short*)(source0 + 2 * x) = c0;
6078 }
6079
6080 source0 += pitch;
6081 source1 += pitch;
6082 source2 += pitch;
6083 source3 += pitch;
6084 source4 += pitch;
6085 source5 += pitch;
6086 source6 += pitch;
6087 source7 += pitch;
6088 }
6089 }
6090 else if(internal.depth == 16)
6091 {
6092 for(int y = 0; y < height; y++)
6093 {
6094 for(int x = 0; x < width; x++)
6095 {
6096 unsigned short c0 = *(unsigned short*)(source0 + 2 * x);
6097 unsigned short c1 = *(unsigned short*)(source1 + 2 * x);
6098 unsigned short c2 = *(unsigned short*)(source2 + 2 * x);
6099 unsigned short c3 = *(unsigned short*)(source3 + 2 * x);
6100 unsigned short c4 = *(unsigned short*)(source4 + 2 * x);
6101 unsigned short c5 = *(unsigned short*)(source5 + 2 * x);
6102 unsigned short c6 = *(unsigned short*)(source6 + 2 * x);
6103 unsigned short c7 = *(unsigned short*)(source7 + 2 * x);
6104 unsigned short c8 = *(unsigned short*)(source8 + 2 * x);
6105 unsigned short c9 = *(unsigned short*)(source9 + 2 * x);
6106 unsigned short cA = *(unsigned short*)(sourceA + 2 * x);
6107 unsigned short cB = *(unsigned short*)(sourceB + 2 * x);
6108 unsigned short cC = *(unsigned short*)(sourceC + 2 * x);
6109 unsigned short cD = *(unsigned short*)(sourceD + 2 * x);
6110 unsigned short cE = *(unsigned short*)(sourceE + 2 * x);
6111 unsigned short cF = *(unsigned short*)(sourceF + 2 * x);
6112
6113 c0 = AVERAGE(c0, c1);
6114 c2 = AVERAGE(c2, c3);
6115 c4 = AVERAGE(c4, c5);
6116 c6 = AVERAGE(c6, c7);
6117 c8 = AVERAGE(c8, c9);
6118 cA = AVERAGE(cA, cB);
6119 cC = AVERAGE(cC, cD);
6120 cE = AVERAGE(cE, cF);
6121 c0 = AVERAGE(c0, c2);
6122 c4 = AVERAGE(c4, c6);
6123 c8 = AVERAGE(c8, cA);
6124 cC = AVERAGE(cC, cE);
6125 c0 = AVERAGE(c0, c4);
6126 c8 = AVERAGE(c8, cC);
6127 c0 = AVERAGE(c0, c8);
6128
6129 *(unsigned short*)(source0 + 2 * x) = c0;
6130 }
6131
6132 source0 += pitch;
6133 source1 += pitch;
6134 source2 += pitch;
6135 source3 += pitch;
6136 source4 += pitch;
6137 source5 += pitch;
6138 source6 += pitch;
6139 source7 += pitch;
6140 source8 += pitch;
6141 source9 += pitch;
6142 sourceA += pitch;
6143 sourceB += pitch;
6144 sourceC += pitch;
6145 sourceD += pitch;
6146 sourceE += pitch;
6147 sourceF += pitch;
6148 }
6149 }
6150 else ASSERT(false);
6151
6152 #undef AVERAGE
6153 }
6154 }
John Bauman89401822014-05-06 15:04:28 -04006155 else
6156 {
6157 // UNIMPLEMENTED();
6158 }
6159 }
6160}