blob: 6522a13c55966e4dfcad9f4e4721df5cb5447780 [file] [log] [blame]
Nicolas Capens68a82382018-10-02 13:16:55 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Blitter.hpp"
16
17#include "Shader/ShaderCore.hpp"
18#include "Reactor/Reactor.hpp"
19#include "Common/Memory.hpp"
20#include "Common/Debug.hpp"
21
22namespace sw
23{
24 Blitter::Blitter()
25 {
26 blitCache = new RoutineCache<State>(1024);
27 }
28
29 Blitter::~Blitter()
30 {
31 delete blitCache;
32 }
33
34 void Blitter::clear(void *pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
35 {
36 if(fastClear(pixel, format, dest, dRect, rgbaMask))
37 {
38 return;
39 }
40
41 sw::Surface *color = sw::Surface::create(1, 1, 1, format, pixel, sw::Surface::bytes(format), sw::Surface::bytes(format));
42 SliceRectF sRect(0.5f, 0.5f, 0.5f, 0.5f, 0); // Sample from the middle.
43 blit(color, sRect, dest, dRect, {rgbaMask});
44 delete color;
45 }
46
47 bool Blitter::fastClear(void *pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
48 {
49 if(format != FORMAT_A32B32G32R32F)
50 {
51 return false;
52 }
53
54 float *color = (float*)pixel;
55 float r = color[0];
56 float g = color[1];
57 float b = color[2];
58 float a = color[3];
59
60 uint32_t packed;
61
62 switch(dest->getFormat())
63 {
64 case FORMAT_R5G6B5:
65 if((rgbaMask & 0x7) != 0x7) return false;
66 packed = ((uint16_t)(31 * b + 0.5f) << 0) |
67 ((uint16_t)(63 * g + 0.5f) << 5) |
68 ((uint16_t)(31 * r + 0.5f) << 11);
69 break;
70 case FORMAT_X8B8G8R8:
71 if((rgbaMask & 0x7) != 0x7) return false;
72 packed = ((uint32_t)(255) << 24) |
73 ((uint32_t)(255 * b + 0.5f) << 16) |
74 ((uint32_t)(255 * g + 0.5f) << 8) |
75 ((uint32_t)(255 * r + 0.5f) << 0);
76 break;
77 case FORMAT_A8B8G8R8:
78 if((rgbaMask & 0xF) != 0xF) return false;
79 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
80 ((uint32_t)(255 * b + 0.5f) << 16) |
81 ((uint32_t)(255 * g + 0.5f) << 8) |
82 ((uint32_t)(255 * r + 0.5f) << 0);
83 break;
84 case FORMAT_X8R8G8B8:
85 if((rgbaMask & 0x7) != 0x7) return false;
86 packed = ((uint32_t)(255) << 24) |
87 ((uint32_t)(255 * r + 0.5f) << 16) |
88 ((uint32_t)(255 * g + 0.5f) << 8) |
89 ((uint32_t)(255 * b + 0.5f) << 0);
90 break;
91 case FORMAT_A8R8G8B8:
92 if((rgbaMask & 0xF) != 0xF) return false;
93 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
94 ((uint32_t)(255 * r + 0.5f) << 16) |
95 ((uint32_t)(255 * g + 0.5f) << 8) |
96 ((uint32_t)(255 * b + 0.5f) << 0);
97 break;
98 default:
99 return false;
100 }
101
102 bool useDestInternal = !dest->isExternalDirty();
103 uint8_t *slice = (uint8_t*)dest->lock(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC, useDestInternal);
104
105 for(int j = 0; j < dest->getSamples(); j++)
106 {
107 uint8_t *d = slice;
108
109 switch(Surface::bytes(dest->getFormat()))
110 {
111 case 2:
112 for(int i = dRect.y0; i < dRect.y1; i++)
113 {
114 sw::clear((uint16_t*)d, packed, dRect.x1 - dRect.x0);
115 d += dest->getPitchB(useDestInternal);
116 }
117 break;
118 case 4:
119 for(int i = dRect.y0; i < dRect.y1; i++)
120 {
121 sw::clear((uint32_t*)d, packed, dRect.x1 - dRect.x0);
122 d += dest->getPitchB(useDestInternal);
123 }
124 break;
125 default:
126 assert(false);
127 }
128
129 slice += dest->getSliceB(useDestInternal);
130 }
131
132 dest->unlock(useDestInternal);
133
134 return true;
135 }
136
137 void Blitter::blit(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options)
138 {
139 if(dest->getInternalFormat() == FORMAT_NULL)
140 {
141 return;
142 }
143
144 if(blitReactor(source, sourceRect, dest, destRect, options))
145 {
146 return;
147 }
148
149 SliceRectF sRect = sourceRect;
150 SliceRect dRect = destRect;
151
152 bool flipX = destRect.x0 > destRect.x1;
153 bool flipY = destRect.y0 > destRect.y1;
154
155 if(flipX)
156 {
157 swap(dRect.x0, dRect.x1);
158 swap(sRect.x0, sRect.x1);
159 }
160 if(flipY)
161 {
162 swap(dRect.y0, dRect.y1);
163 swap(sRect.y0, sRect.y1);
164 }
165
166 source->lockInternal(0, 0, sRect.slice, sw::LOCK_READONLY, sw::PUBLIC);
167 dest->lockInternal(0, 0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC);
168
169 float w = sRect.width() / dRect.width();
170 float h = sRect.height() / dRect.height();
171
172 float xStart = sRect.x0 + (0.5f - dRect.x0) * w;
173 float yStart = sRect.y0 + (0.5f - dRect.y0) * h;
174
175 for(int j = dRect.y0; j < dRect.y1; j++)
176 {
177 float y = yStart + j * h;
178
179 for(int i = dRect.x0; i < dRect.x1; i++)
180 {
181 float x = xStart + i * w;
182
183 // FIXME: Support RGBA mask
184 dest->copyInternal(source, i, j, x, y, options.filter);
185 }
186 }
187
188 source->unlockInternal();
189 dest->unlockInternal();
190 }
191
192 void Blitter::blit3D(Surface *source, Surface *dest)
193 {
194 source->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PUBLIC);
195 dest->lockInternal(0, 0, 0, sw::LOCK_WRITEONLY, sw::PUBLIC);
196
197 float w = static_cast<float>(source->getWidth()) / static_cast<float>(dest->getWidth());
198 float h = static_cast<float>(source->getHeight()) / static_cast<float>(dest->getHeight());
199 float d = static_cast<float>(source->getDepth()) / static_cast<float>(dest->getDepth());
200
201 for(int k = 0; k < dest->getDepth(); k++)
202 {
203 float z = (k + 0.5f) * d;
204
205 for(int j = 0; j < dest->getHeight(); j++)
206 {
207 float y = (j + 0.5f) * h;
208
209 for(int i = 0; i < dest->getWidth(); i++)
210 {
211 float x = (i + 0.5f) * w;
212
213 dest->copyInternal(source, i, j, k, x, y, z, true);
214 }
215 }
216 }
217
218 source->unlockInternal();
219 dest->unlockInternal();
220 }
221
222 bool Blitter::read(Float4 &c, Pointer<Byte> element, const State &state)
223 {
224 c = Float4(0.0f, 0.0f, 0.0f, 1.0f);
225
226 switch(state.sourceFormat)
227 {
228 case FORMAT_L8:
229 c.xyz = Float(Int(*Pointer<Byte>(element)));
230 c.w = float(0xFF);
231 break;
232 case FORMAT_A8:
233 c.w = Float(Int(*Pointer<Byte>(element)));
234 break;
235 case FORMAT_R8I:
236 case FORMAT_R8_SNORM:
237 c.x = Float(Int(*Pointer<SByte>(element)));
238 c.w = float(0x7F);
239 break;
240 case FORMAT_R8:
241 case FORMAT_R8UI:
242 c.x = Float(Int(*Pointer<Byte>(element)));
243 c.w = float(0xFF);
244 break;
245 case FORMAT_R16I:
246 c.x = Float(Int(*Pointer<Short>(element)));
247 c.w = float(0x7FFF);
248 break;
249 case FORMAT_R16UI:
250 c.x = Float(Int(*Pointer<UShort>(element)));
251 c.w = float(0xFFFF);
252 break;
253 case FORMAT_R32I:
254 c.x = Float(*Pointer<Int>(element));
255 c.w = float(0x7FFFFFFF);
256 break;
257 case FORMAT_R32UI:
258 c.x = Float(*Pointer<UInt>(element));
259 c.w = float(0xFFFFFFFF);
260 break;
261 case FORMAT_A8R8G8B8:
262 c = Float4(*Pointer<Byte4>(element)).zyxw;
263 break;
264 case FORMAT_A8B8G8R8I:
265 case FORMAT_A8B8G8R8_SNORM:
266 c = Float4(*Pointer<SByte4>(element));
267 break;
268 case FORMAT_A8B8G8R8:
269 case FORMAT_A8B8G8R8UI:
270 case FORMAT_SRGB8_A8:
271 c = Float4(*Pointer<Byte4>(element));
272 break;
273 case FORMAT_X8R8G8B8:
274 c = Float4(*Pointer<Byte4>(element)).zyxw;
275 c.w = float(0xFF);
276 break;
277 case FORMAT_R8G8B8:
278 c.z = Float(Int(*Pointer<Byte>(element + 0)));
279 c.y = Float(Int(*Pointer<Byte>(element + 1)));
280 c.x = Float(Int(*Pointer<Byte>(element + 2)));
281 c.w = float(0xFF);
282 break;
283 case FORMAT_B8G8R8:
284 c.x = Float(Int(*Pointer<Byte>(element + 0)));
285 c.y = Float(Int(*Pointer<Byte>(element + 1)));
286 c.z = Float(Int(*Pointer<Byte>(element + 2)));
287 c.w = float(0xFF);
288 break;
289 case FORMAT_X8B8G8R8I:
290 case FORMAT_X8B8G8R8_SNORM:
291 c = Float4(*Pointer<SByte4>(element));
292 c.w = float(0x7F);
293 break;
294 case FORMAT_X8B8G8R8:
295 case FORMAT_X8B8G8R8UI:
296 case FORMAT_SRGB8_X8:
297 c = Float4(*Pointer<Byte4>(element));
298 c.w = float(0xFF);
299 break;
300 case FORMAT_A16B16G16R16I:
301 c = Float4(*Pointer<Short4>(element));
302 break;
303 case FORMAT_A16B16G16R16:
304 case FORMAT_A16B16G16R16UI:
305 c = Float4(*Pointer<UShort4>(element));
306 break;
307 case FORMAT_X16B16G16R16I:
308 c = Float4(*Pointer<Short4>(element));
309 c.w = float(0x7FFF);
310 break;
311 case FORMAT_X16B16G16R16UI:
312 c = Float4(*Pointer<UShort4>(element));
313 c.w = float(0xFFFF);
314 break;
315 case FORMAT_A32B32G32R32I:
316 c = Float4(*Pointer<Int4>(element));
317 break;
318 case FORMAT_A32B32G32R32UI:
319 c = Float4(*Pointer<UInt4>(element));
320 break;
321 case FORMAT_X32B32G32R32I:
322 c = Float4(*Pointer<Int4>(element));
323 c.w = float(0x7FFFFFFF);
324 break;
325 case FORMAT_X32B32G32R32UI:
326 c = Float4(*Pointer<UInt4>(element));
327 c.w = float(0xFFFFFFFF);
328 break;
329 case FORMAT_G8R8I:
330 case FORMAT_G8R8_SNORM:
331 c.x = Float(Int(*Pointer<SByte>(element + 0)));
332 c.y = Float(Int(*Pointer<SByte>(element + 1)));
333 c.w = float(0x7F);
334 break;
335 case FORMAT_G8R8:
336 case FORMAT_G8R8UI:
337 c.x = Float(Int(*Pointer<Byte>(element + 0)));
338 c.y = Float(Int(*Pointer<Byte>(element + 1)));
339 c.w = float(0xFF);
340 break;
341 case FORMAT_G16R16I:
342 c.x = Float(Int(*Pointer<Short>(element + 0)));
343 c.y = Float(Int(*Pointer<Short>(element + 2)));
344 c.w = float(0x7FFF);
345 break;
346 case FORMAT_G16R16:
347 case FORMAT_G16R16UI:
348 c.x = Float(Int(*Pointer<UShort>(element + 0)));
349 c.y = Float(Int(*Pointer<UShort>(element + 2)));
350 c.w = float(0xFFFF);
351 break;
352 case FORMAT_G32R32I:
353 c.x = Float(*Pointer<Int>(element + 0));
354 c.y = Float(*Pointer<Int>(element + 4));
355 c.w = float(0x7FFFFFFF);
356 break;
357 case FORMAT_G32R32UI:
358 c.x = Float(*Pointer<UInt>(element + 0));
359 c.y = Float(*Pointer<UInt>(element + 4));
360 c.w = float(0xFFFFFFFF);
361 break;
362 case FORMAT_A32B32G32R32F:
363 c = *Pointer<Float4>(element);
364 break;
365 case FORMAT_X32B32G32R32F:
366 case FORMAT_X32B32G32R32F_UNSIGNED:
367 case FORMAT_B32G32R32F:
368 c.z = *Pointer<Float>(element + 8);
369 case FORMAT_G32R32F:
370 c.x = *Pointer<Float>(element + 0);
371 c.y = *Pointer<Float>(element + 4);
372 break;
373 case FORMAT_R32F:
374 c.x = *Pointer<Float>(element);
375 break;
376 case FORMAT_R5G6B5:
377 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
378 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
379 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
380 break;
381 case FORMAT_A2B10G10R10:
382 case FORMAT_A2B10G10R10UI:
383 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
384 c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
385 c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
386 c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
387 break;
388 case FORMAT_D16:
389 c.x = Float(Int((*Pointer<UShort>(element))));
390 break;
391 case FORMAT_D24S8:
392 case FORMAT_D24X8:
393 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0xFFFFFF00)) >> 8));
394 break;
395 case FORMAT_D32:
396 c.x = Float(Int((*Pointer<UInt>(element))));
397 break;
398 case FORMAT_D32F_COMPLEMENTARY:
399 case FORMAT_D32FS8_COMPLEMENTARY:
400 c.x = 1.0f - *Pointer<Float>(element);
401 break;
402 case FORMAT_D32F:
403 case FORMAT_D32FS8:
404 case FORMAT_D32F_LOCKABLE:
405 case FORMAT_D32FS8_TEXTURE:
406 case FORMAT_D32F_SHADOW:
407 case FORMAT_D32FS8_SHADOW:
408 c.x = *Pointer<Float>(element);
409 break;
410 case FORMAT_S8:
411 c.x = Float(Int(*Pointer<Byte>(element)));
412 break;
413 default:
414 return false;
415 }
416
417 return true;
418 }
419
420 bool Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
421 {
422 bool writeR = state.writeRed;
423 bool writeG = state.writeGreen;
424 bool writeB = state.writeBlue;
425 bool writeA = state.writeAlpha;
426 bool writeRGBA = writeR && writeG && writeB && writeA;
427
428 switch(state.destFormat)
429 {
430 case FORMAT_L8:
431 *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
432 break;
433 case FORMAT_A8:
434 if(writeA) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.w))); }
435 break;
436 case FORMAT_A8R8G8B8:
437 if(writeRGBA)
438 {
439 Short4 c0 = RoundShort4(c.zyxw);
440 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
441 }
442 else
443 {
444 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
445 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
446 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
447 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
448 }
449 break;
450 case FORMAT_A8B8G8R8:
451 case FORMAT_SRGB8_A8:
452 if(writeRGBA)
453 {
454 Short4 c0 = RoundShort4(c);
455 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
456 }
457 else
458 {
459 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
460 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
461 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
462 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
463 }
464 break;
465 case FORMAT_X8R8G8B8:
466 if(writeRGBA)
467 {
468 Short4 c0 = RoundShort4(c.zyxw) | Short4(0x0000, 0x0000, 0x0000, 0x00FF);
469 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
470 }
471 else
472 {
473 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
474 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
475 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
476 if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
477 }
478 break;
479 case FORMAT_X8B8G8R8:
480 case FORMAT_SRGB8_X8:
481 if(writeRGBA)
482 {
483 Short4 c0 = RoundShort4(c) | Short4(0x0000, 0x0000, 0x0000, 0x00FF);
484 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
485 }
486 else
487 {
488 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
489 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
490 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
491 if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
492 }
493 break;
494 case FORMAT_R8G8B8:
495 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
496 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
497 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
498 break;
499 case FORMAT_B8G8R8:
500 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
501 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
502 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
503 break;
504 case FORMAT_A32B32G32R32F:
505 if(writeRGBA)
506 {
507 *Pointer<Float4>(element) = c;
508 }
509 else
510 {
511 if(writeR) { *Pointer<Float>(element) = c.x; }
512 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
513 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
514 if(writeA) { *Pointer<Float>(element + 12) = c.w; }
515 }
516 break;
517 case FORMAT_X32B32G32R32F:
518 case FORMAT_X32B32G32R32F_UNSIGNED:
519 if(writeA) { *Pointer<Float>(element + 12) = 1.0f; }
520 case FORMAT_B32G32R32F:
521 if(writeR) { *Pointer<Float>(element) = c.x; }
522 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
523 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
524 break;
525 case FORMAT_G32R32F:
526 if(writeR && writeG)
527 {
528 *Pointer<Float2>(element) = Float2(c);
529 }
530 else
531 {
532 if(writeR) { *Pointer<Float>(element) = c.x; }
533 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
534 }
535 break;
536 case FORMAT_R32F:
537 if(writeR) { *Pointer<Float>(element) = c.x; }
538 break;
539 case FORMAT_A8B8G8R8I:
540 case FORMAT_A8B8G8R8_SNORM:
541 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
542 case FORMAT_X8B8G8R8I:
543 case FORMAT_X8B8G8R8_SNORM:
544 if(writeA && (state.destFormat == FORMAT_X8B8G8R8I || state.destFormat == FORMAT_X8B8G8R8_SNORM))
545 {
546 *Pointer<SByte>(element + 3) = SByte(0x7F);
547 }
548 if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
549 case FORMAT_G8R8I:
550 case FORMAT_G8R8_SNORM:
551 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
552 case FORMAT_R8I:
553 case FORMAT_R8_SNORM:
554 if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
555 break;
556 case FORMAT_A8B8G8R8UI:
557 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
558 case FORMAT_X8B8G8R8UI:
559 if(writeA && (state.destFormat == FORMAT_X8B8G8R8UI))
560 {
561 *Pointer<Byte>(element + 3) = Byte(0xFF);
562 }
563 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
564 case FORMAT_G8R8UI:
565 case FORMAT_G8R8:
566 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
567 case FORMAT_R8UI:
568 case FORMAT_R8:
569 if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
570 break;
571 case FORMAT_A16B16G16R16I:
572 if(writeRGBA)
573 {
574 *Pointer<Short4>(element) = Short4(RoundInt(c));
575 }
576 else
577 {
578 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
579 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
580 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
581 if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
582 }
583 break;
584 case FORMAT_X16B16G16R16I:
585 if(writeRGBA)
586 {
587 *Pointer<Short4>(element) = Short4(RoundInt(c));
588 }
589 else
590 {
591 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
592 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
593 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
594 }
595 if(writeA) { *Pointer<Short>(element + 6) = Short(0x7F); }
596 break;
597 case FORMAT_G16R16I:
598 if(writeR && writeG)
599 {
600 *Pointer<Short2>(element) = Short2(Short4(RoundInt(c)));
601 }
602 else
603 {
604 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
605 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
606 }
607 break;
608 case FORMAT_R16I:
609 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
610 break;
611 case FORMAT_A16B16G16R16UI:
612 case FORMAT_A16B16G16R16:
613 if(writeRGBA)
614 {
615 *Pointer<UShort4>(element) = UShort4(RoundInt(c));
616 }
617 else
618 {
619 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
620 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
621 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
622 if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
623 }
624 break;
625 case FORMAT_X16B16G16R16UI:
626 if(writeRGBA)
627 {
628 *Pointer<UShort4>(element) = UShort4(RoundInt(c));
629 }
630 else
631 {
632 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
633 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
634 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
635 }
636 if(writeA) { *Pointer<UShort>(element + 6) = UShort(0xFF); }
637 break;
638 case FORMAT_G16R16UI:
639 case FORMAT_G16R16:
640 if(writeR && writeG)
641 {
642 *Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c)));
643 }
644 else
645 {
646 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
647 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
648 }
649 break;
650 case FORMAT_R16UI:
651 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
652 break;
653 case FORMAT_A32B32G32R32I:
654 if(writeRGBA)
655 {
656 *Pointer<Int4>(element) = RoundInt(c);
657 }
658 else
659 {
660 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
661 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
662 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
663 if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
664 }
665 break;
666 case FORMAT_X32B32G32R32I:
667 if(writeRGBA)
668 {
669 *Pointer<Int4>(element) = RoundInt(c);
670 }
671 else
672 {
673 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
674 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
675 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
676 }
677 if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
678 break;
679 case FORMAT_G32R32I:
680 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
681 case FORMAT_R32I:
682 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
683 break;
684 case FORMAT_A32B32G32R32UI:
685 if(writeRGBA)
686 {
687 *Pointer<UInt4>(element) = UInt4(RoundInt(c));
688 }
689 else
690 {
691 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
692 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
693 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
694 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
695 }
696 break;
697 case FORMAT_X32B32G32R32UI:
698 if(writeRGBA)
699 {
700 *Pointer<UInt4>(element) = UInt4(RoundInt(c));
701 }
702 else
703 {
704 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
705 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
706 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
707 }
708 if(writeA) { *Pointer<UInt4>(element + 12) = UInt4(0xFFFFFFFF); }
709 break;
710 case FORMAT_G32R32UI:
711 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
712 case FORMAT_R32UI:
713 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
714 break;
715 case FORMAT_R5G6B5:
716 if(writeR && writeG && writeB)
717 {
718 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
719 (RoundInt(Float(c.y)) << Int(5)) |
720 (RoundInt(Float(c.x)) << Int(11)));
721 }
722 else
723 {
724 unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
725 unsigned short unmask = ~mask;
726 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
727 (UShort(RoundInt(Float(c.z)) |
728 (RoundInt(Float(c.y)) << Int(5)) |
729 (RoundInt(Float(c.x)) << Int(11))) & UShort(mask));
730 }
731 break;
732 case FORMAT_A2B10G10R10:
733 case FORMAT_A2B10G10R10UI:
734 if(writeRGBA)
735 {
736 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) |
737 (RoundInt(Float(c.y)) << 10) |
738 (RoundInt(Float(c.z)) << 20) |
739 (RoundInt(Float(c.w)) << 30));
740 }
741 else
742 {
743 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
744 (writeB ? 0x3FF00000 : 0x0000) |
745 (writeG ? 0x000FFC00 : 0x0000) |
746 (writeR ? 0x000003FF : 0x0000);
747 unsigned int unmask = ~mask;
748 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
749 (UInt(RoundInt(Float(c.x)) |
750 (RoundInt(Float(c.y)) << 10) |
751 (RoundInt(Float(c.z)) << 20) |
752 (RoundInt(Float(c.w)) << 30)) & UInt(mask));
753 }
754 break;
755 case FORMAT_D16:
756 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x)));
757 break;
758 case FORMAT_D24S8:
759 case FORMAT_D24X8:
760 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) << 8);
761 break;
762 case FORMAT_D32:
763 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)));
764 break;
765 case FORMAT_D32F_COMPLEMENTARY:
766 case FORMAT_D32FS8_COMPLEMENTARY:
767 *Pointer<Float>(element) = 1.0f - c.x;
768 break;
769 case FORMAT_D32F:
770 case FORMAT_D32FS8:
771 case FORMAT_D32F_LOCKABLE:
772 case FORMAT_D32FS8_TEXTURE:
773 case FORMAT_D32F_SHADOW:
774 case FORMAT_D32FS8_SHADOW:
775 *Pointer<Float>(element) = c.x;
776 break;
777 case FORMAT_S8:
778 *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
779 break;
780 default:
781 return false;
782 }
783 return true;
784 }
785
786 bool Blitter::read(Int4 &c, Pointer<Byte> element, const State &state)
787 {
788 c = Int4(0, 0, 0, 1);
789
790 switch(state.sourceFormat)
791 {
792 case FORMAT_A8B8G8R8I:
793 c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
794 case FORMAT_X8B8G8R8I:
795 c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
796 case FORMAT_G8R8I:
797 c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
798 case FORMAT_R8I:
799 c = Insert(c, Int(*Pointer<SByte>(element)), 0);
800 break;
801 case FORMAT_A8B8G8R8UI:
802 c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
803 case FORMAT_X8B8G8R8UI:
804 c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
805 case FORMAT_G8R8UI:
806 c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
807 case FORMAT_R8UI:
808 c = Insert(c, Int(*Pointer<Byte>(element)), 0);
809 break;
810 case FORMAT_A16B16G16R16I:
811 c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
812 case FORMAT_X16B16G16R16I:
813 c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
814 case FORMAT_G16R16I:
815 c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
816 case FORMAT_R16I:
817 c = Insert(c, Int(*Pointer<Short>(element)), 0);
818 break;
819 case FORMAT_A16B16G16R16UI:
820 c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
821 case FORMAT_X16B16G16R16UI:
822 c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
823 case FORMAT_G16R16UI:
824 c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
825 case FORMAT_R16UI:
826 c = Insert(c, Int(*Pointer<UShort>(element)), 0);
827 break;
828 case FORMAT_A32B32G32R32I:
829 case FORMAT_A32B32G32R32UI:
830 c = *Pointer<Int4>(element);
831 break;
832 case FORMAT_X32B32G32R32I:
833 case FORMAT_X32B32G32R32UI:
834 c = Insert(c, *Pointer<Int>(element + 8), 2);
835 case FORMAT_G32R32I:
836 case FORMAT_G32R32UI:
837 c = Insert(c, *Pointer<Int>(element + 4), 1);
838 case FORMAT_R32I:
839 case FORMAT_R32UI:
840 c = Insert(c, *Pointer<Int>(element), 0);
841 break;
842 default:
843 return false;
844 }
845
846 return true;
847 }
848
849 bool Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
850 {
851 bool writeR = state.writeRed;
852 bool writeG = state.writeGreen;
853 bool writeB = state.writeBlue;
854 bool writeA = state.writeAlpha;
855 bool writeRGBA = writeR && writeG && writeB && writeA;
856
857 switch(state.destFormat)
858 {
859 case FORMAT_A8B8G8R8I:
860 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
861 case FORMAT_X8B8G8R8I:
862 if(writeA && (state.destFormat != FORMAT_A8B8G8R8I))
863 {
864 *Pointer<SByte>(element + 3) = SByte(0x7F);
865 }
866 if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
867 case FORMAT_G8R8I:
868 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
869 case FORMAT_R8I:
870 if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
871 break;
872 case FORMAT_A8B8G8R8UI:
873 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
874 case FORMAT_X8B8G8R8UI:
875 if(writeA && (state.destFormat != FORMAT_A8B8G8R8UI))
876 {
877 *Pointer<Byte>(element + 3) = Byte(0xFF);
878 }
879 if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
880 case FORMAT_G8R8UI:
881 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
882 case FORMAT_R8UI:
883 if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
884 break;
885 case FORMAT_A16B16G16R16I:
886 if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
887 case FORMAT_X16B16G16R16I:
888 if(writeA && (state.destFormat != FORMAT_A16B16G16R16I))
889 {
890 *Pointer<Short>(element + 6) = Short(0x7FFF);
891 }
892 if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
893 case FORMAT_G16R16I:
894 if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
895 case FORMAT_R16I:
896 if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
897 break;
898 case FORMAT_A16B16G16R16UI:
899 if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
900 case FORMAT_X16B16G16R16UI:
901 if(writeA && (state.destFormat != FORMAT_A16B16G16R16UI))
902 {
903 *Pointer<UShort>(element + 6) = UShort(0xFFFF);
904 }
905 if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
906 case FORMAT_G16R16UI:
907 if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
908 case FORMAT_R16UI:
909 if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
910 break;
911 case FORMAT_A32B32G32R32I:
912 if(writeRGBA)
913 {
914 *Pointer<Int4>(element) = c;
915 }
916 else
917 {
918 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
919 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
920 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
921 if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
922 }
923 break;
924 case FORMAT_X32B32G32R32I:
925 if(writeRGBA)
926 {
927 *Pointer<Int4>(element) = c;
928 }
929 else
930 {
931 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
932 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
933 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
934 }
935 if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
936 break;
937 case FORMAT_G32R32I:
938 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
939 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
940 break;
941 case FORMAT_R32I:
942 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
943 break;
944 case FORMAT_A32B32G32R32UI:
945 if(writeRGBA)
946 {
947 *Pointer<UInt4>(element) = As<UInt4>(c);
948 }
949 else
950 {
951 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
952 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
953 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
954 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
955 }
956 break;
957 case FORMAT_X32B32G32R32UI:
958 if(writeRGBA)
959 {
960 *Pointer<UInt4>(element) = As<UInt4>(c);
961 }
962 else
963 {
964 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
965 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
966 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
967 }
968 if(writeA) { *Pointer<UInt>(element + 3) = UInt(0xFFFFFFFF); }
969 break;
970 case FORMAT_G32R32UI:
971 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
972 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
973 break;
974 case FORMAT_R32UI:
975 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
976 break;
977 default:
978 return false;
979 }
980
981 return true;
982 }
983
984 bool Blitter::GetScale(float4 &scale, Format format)
985 {
986 switch(format)
987 {
988 case FORMAT_L8:
989 case FORMAT_A8:
990 case FORMAT_A8R8G8B8:
991 case FORMAT_X8R8G8B8:
992 case FORMAT_R8:
993 case FORMAT_G8R8:
994 case FORMAT_R8G8B8:
995 case FORMAT_B8G8R8:
996 case FORMAT_X8B8G8R8:
997 case FORMAT_A8B8G8R8:
998 case FORMAT_SRGB8_X8:
999 case FORMAT_SRGB8_A8:
1000 scale = vector(0xFF, 0xFF, 0xFF, 0xFF);
1001 break;
1002 case FORMAT_R8_SNORM:
1003 case FORMAT_G8R8_SNORM:
1004 case FORMAT_X8B8G8R8_SNORM:
1005 case FORMAT_A8B8G8R8_SNORM:
1006 scale = vector(0x7F, 0x7F, 0x7F, 0x7F);
1007 break;
1008 case FORMAT_A16B16G16R16:
1009 scale = vector(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
1010 break;
1011 case FORMAT_R8I:
1012 case FORMAT_R8UI:
1013 case FORMAT_G8R8I:
1014 case FORMAT_G8R8UI:
1015 case FORMAT_X8B8G8R8I:
1016 case FORMAT_X8B8G8R8UI:
1017 case FORMAT_A8B8G8R8I:
1018 case FORMAT_A8B8G8R8UI:
1019 case FORMAT_R16I:
1020 case FORMAT_R16UI:
1021 case FORMAT_G16R16:
1022 case FORMAT_G16R16I:
1023 case FORMAT_G16R16UI:
1024 case FORMAT_X16B16G16R16I:
1025 case FORMAT_X16B16G16R16UI:
1026 case FORMAT_A16B16G16R16I:
1027 case FORMAT_A16B16G16R16UI:
1028 case FORMAT_R32I:
1029 case FORMAT_R32UI:
1030 case FORMAT_G32R32I:
1031 case FORMAT_G32R32UI:
1032 case FORMAT_X32B32G32R32I:
1033 case FORMAT_X32B32G32R32UI:
1034 case FORMAT_A32B32G32R32I:
1035 case FORMAT_A32B32G32R32UI:
1036 case FORMAT_A32B32G32R32F:
1037 case FORMAT_X32B32G32R32F:
1038 case FORMAT_X32B32G32R32F_UNSIGNED:
1039 case FORMAT_B32G32R32F:
1040 case FORMAT_G32R32F:
1041 case FORMAT_R32F:
1042 case FORMAT_A2B10G10R10UI:
1043 scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
1044 break;
1045 case FORMAT_R5G6B5:
1046 scale = vector(0x1F, 0x3F, 0x1F, 1.0f);
1047 break;
1048 case FORMAT_A2B10G10R10:
1049 scale = vector(0x3FF, 0x3FF, 0x3FF, 0x03);
1050 break;
1051 case FORMAT_D16:
1052 scale = vector(0xFFFF, 0.0f, 0.0f, 0.0f);
1053 break;
1054 case FORMAT_D24S8:
1055 case FORMAT_D24X8:
1056 scale = vector(0xFFFFFF, 0.0f, 0.0f, 0.0f);
1057 break;
1058 case FORMAT_D32:
1059 scale = vector(static_cast<float>(0xFFFFFFFF), 0.0f, 0.0f, 0.0f);
1060 break;
1061 case FORMAT_D32F:
1062 case FORMAT_D32FS8:
1063 case FORMAT_D32F_COMPLEMENTARY:
1064 case FORMAT_D32FS8_COMPLEMENTARY:
1065 case FORMAT_D32F_LOCKABLE:
1066 case FORMAT_D32FS8_TEXTURE:
1067 case FORMAT_D32F_SHADOW:
1068 case FORMAT_D32FS8_SHADOW:
1069 case FORMAT_S8:
1070 scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
1071 break;
1072 default:
1073 return false;
1074 }
1075
1076 return true;
1077 }
1078
1079 bool Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled)
1080 {
1081 float4 scale, unscale;
1082 if(state.clearOperation &&
1083 Surface::isNonNormalizedInteger(state.sourceFormat) &&
1084 !Surface::isNonNormalizedInteger(state.destFormat))
1085 {
1086 // If we're clearing a buffer from an int or uint color into a normalized color,
1087 // then the whole range of the int or uint color must be scaled between 0 and 1.
1088 switch(state.sourceFormat)
1089 {
1090 case FORMAT_A32B32G32R32I:
1091 unscale = replicate(static_cast<float>(0x7FFFFFFF));
1092 break;
1093 case FORMAT_A32B32G32R32UI:
1094 unscale = replicate(static_cast<float>(0xFFFFFFFF));
1095 break;
1096 default:
1097 return false;
1098 }
1099 }
1100 else if(!GetScale(unscale, state.sourceFormat))
1101 {
1102 return false;
1103 }
1104
1105 if(!GetScale(scale, state.destFormat))
1106 {
1107 return false;
1108 }
1109
1110 bool srcSRGB = Surface::isSRGBformat(state.sourceFormat);
1111 bool dstSRGB = Surface::isSRGBformat(state.destFormat);
1112
1113 if(state.convertSRGB && ((srcSRGB && !preScaled) || dstSRGB)) // One of the formats is sRGB encoded.
1114 {
1115 value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale
1116 Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale
1117 value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value);
1118 value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale
1119 }
1120 else if(unscale != scale)
1121 {
1122 value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
1123 }
1124
1125 if(state.destFormat == FORMAT_X32B32G32R32F_UNSIGNED)
1126 {
1127 value = Max(value, Float4(0.0f)); // TODO: Only necessary if source is signed.
1128 }
1129 else if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat))
1130 {
1131 value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
1132
1133 value = Max(value, Float4(Surface::isUnsignedComponent(state.destFormat, 0) ? 0.0f : -scale.x,
1134 Surface::isUnsignedComponent(state.destFormat, 1) ? 0.0f : -scale.y,
1135 Surface::isUnsignedComponent(state.destFormat, 2) ? 0.0f : -scale.z,
1136 Surface::isUnsignedComponent(state.destFormat, 3) ? 0.0f : -scale.w));
1137 }
1138
1139 return true;
1140 }
1141
1142 Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes, bool quadLayout)
1143 {
1144 if(!quadLayout)
1145 {
1146 return y * pitchB + x * bytes;
1147 }
1148 else
1149 {
1150 // (x & ~1) * 2 + (x & 1) == (x - (x & 1)) * 2 + (x & 1) == x * 2 - (x & 1) * 2 + (x & 1) == x * 2 - (x & 1)
1151 return (y & Int(~1)) * pitchB +
1152 ((y & Int(1)) * 2 + x * 2 - (x & Int(1))) * bytes;
1153 }
1154 }
1155
1156 Float4 Blitter::LinearToSRGB(Float4 &c)
1157 {
1158 Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f);
1159 Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f);
1160
1161 Float4 s = c;
1162 s.xyz = Max(lc, ec);
1163
1164 return s;
1165 }
1166
1167 Float4 Blitter::sRGBtoLinear(Float4 &c)
1168 {
1169 Float4 lc = c * Float4(1.0f / 12.92f);
1170 Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f));
1171
1172 Int4 linear = CmpLT(c, Float4(0.04045f));
1173
1174 Float4 s = c;
1175 s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec))); // FIXME: IfThenElse()
1176
1177 return s;
1178 }
1179
1180 Routine *Blitter::generate(const State &state)
1181 {
1182 Function<Void(Pointer<Byte>)> function;
1183 {
1184 Pointer<Byte> blit(function.Arg<0>());
1185
1186 Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source));
1187 Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest));
1188 Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB));
1189 Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB));
1190
1191 Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0));
1192 Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0));
1193 Float w = *Pointer<Float>(blit + OFFSET(BlitData,w));
1194 Float h = *Pointer<Float>(blit + OFFSET(BlitData,h));
1195
1196 Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d));
1197 Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d));
1198 Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d));
1199 Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d));
1200
1201 Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth));
1202 Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight));
1203
1204 bool intSrc = Surface::isNonNormalizedInteger(state.sourceFormat);
1205 bool intDst = Surface::isNonNormalizedInteger(state.destFormat);
1206 bool intBoth = intSrc && intDst;
1207 bool srcQuadLayout = Surface::hasQuadLayout(state.sourceFormat);
1208 bool dstQuadLayout = Surface::hasQuadLayout(state.destFormat);
1209 int srcBytes = Surface::bytes(state.sourceFormat);
1210 int dstBytes = Surface::bytes(state.destFormat);
1211
1212 bool hasConstantColorI = false;
1213 Int4 constantColorI;
1214 bool hasConstantColorF = false;
1215 Float4 constantColorF;
1216 if(state.clearOperation)
1217 {
1218 if(intBoth) // Integer types
1219 {
1220 if(!read(constantColorI, source, state))
1221 {
1222 return nullptr;
1223 }
1224 hasConstantColorI = true;
1225 }
1226 else
1227 {
1228 if(!read(constantColorF, source, state))
1229 {
1230 return nullptr;
1231 }
1232 hasConstantColorF = true;
1233
1234 if(!ApplyScaleAndClamp(constantColorF, state))
1235 {
1236 return nullptr;
1237 }
1238 }
1239 }
1240
1241 For(Int j = y0d, j < y1d, j++)
1242 {
1243 Float y = state.clearOperation ? RValue<Float>(y0) : y0 + Float(j) * h;
1244 Pointer<Byte> destLine = dest + (dstQuadLayout ? j & Int(~1) : RValue<Int>(j)) * dPitchB;
1245
1246 For(Int i = x0d, i < x1d, i++)
1247 {
1248 Float x = state.clearOperation ? RValue<Float>(x0) : x0 + Float(i) * w;
1249 Pointer<Byte> d = destLine + (dstQuadLayout ? (((j & Int(1)) << 1) + (i * 2) - (i & Int(1))) : RValue<Int>(i)) * dstBytes;
1250
1251 if(hasConstantColorI)
1252 {
1253 if(!write(constantColorI, d, state))
1254 {
1255 return nullptr;
1256 }
1257 }
1258 else if(hasConstantColorF)
1259 {
1260 for(int s = 0; s < state.destSamples; s++)
1261 {
1262 if(!write(constantColorF, d, state))
1263 {
1264 return nullptr;
1265 }
1266
1267 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
1268 }
1269 }
1270 else if(intBoth) // Integer types do not support filtering
1271 {
1272 Int4 color; // When both formats are true integer types, we don't go to float to avoid losing precision
1273 Int X = Int(x);
1274 Int Y = Int(y);
1275
1276 if(state.clampToEdge)
1277 {
1278 X = Clamp(X, 0, sWidth - 1);
1279 Y = Clamp(Y, 0, sHeight - 1);
1280 }
1281
1282 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1283
1284 if(!read(color, s, state))
1285 {
1286 return nullptr;
1287 }
1288
1289 if(!write(color, d, state))
1290 {
1291 return nullptr;
1292 }
1293 }
1294 else
1295 {
1296 Float4 color;
1297
1298 bool preScaled = false;
1299 if(!state.filter || intSrc)
1300 {
1301 Int X = Int(x);
1302 Int Y = Int(y);
1303
1304 if(state.clampToEdge)
1305 {
1306 X = Clamp(X, 0, sWidth - 1);
1307 Y = Clamp(Y, 0, sHeight - 1);
1308 }
1309
1310 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1311
1312 if(!read(color, s, state))
1313 {
1314 return nullptr;
1315 }
1316 }
1317 else // Bilinear filtering
1318 {
1319 Float X = x;
1320 Float Y = y;
1321
1322 if(state.clampToEdge)
1323 {
1324 X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f);
1325 Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f);
1326 }
1327
1328 Float x0 = X - 0.5f;
1329 Float y0 = Y - 0.5f;
1330
1331 Int X0 = Max(Int(x0), 0);
1332 Int Y0 = Max(Int(y0), 0);
1333
1334 Int X1 = X0 + 1;
1335 Int Y1 = Y0 + 1;
1336 X1 = IfThenElse(X1 >= sWidth, X0, X1);
1337 Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1);
1338
1339 Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes, srcQuadLayout);
1340 Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes, srcQuadLayout);
1341 Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes, srcQuadLayout);
1342 Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes, srcQuadLayout);
1343
1344 Float4 c00; if(!read(c00, s00, state)) return nullptr;
1345 Float4 c01; if(!read(c01, s01, state)) return nullptr;
1346 Float4 c10; if(!read(c10, s10, state)) return nullptr;
1347 Float4 c11; if(!read(c11, s11, state)) return nullptr;
1348
1349 if(state.convertSRGB && Surface::isSRGBformat(state.sourceFormat)) // sRGB -> RGB
1350 {
1351 if(!ApplyScaleAndClamp(c00, state)) return nullptr;
1352 if(!ApplyScaleAndClamp(c01, state)) return nullptr;
1353 if(!ApplyScaleAndClamp(c10, state)) return nullptr;
1354 if(!ApplyScaleAndClamp(c11, state)) return nullptr;
1355 preScaled = true;
1356 }
1357
1358 Float4 fx = Float4(x0 - Float(X0));
1359 Float4 fy = Float4(y0 - Float(Y0));
1360 Float4 ix = Float4(1.0f) - fx;
1361 Float4 iy = Float4(1.0f) - fy;
1362
1363 color = (c00 * ix + c01 * fx) * iy +
1364 (c10 * ix + c11 * fx) * fy;
1365 }
1366
1367 if(!ApplyScaleAndClamp(color, state, preScaled))
1368 {
1369 return nullptr;
1370 }
1371
1372 for(int s = 0; s < state.destSamples; s++)
1373 {
1374 if(!write(color, d, state))
1375 {
1376 return nullptr;
1377 }
1378
1379 d += *Pointer<Int>(blit + OFFSET(BlitData,dSliceB));
1380 }
1381 }
1382 }
1383 }
1384 }
1385
1386 return function(L"BlitRoutine");
1387 }
1388
1389 bool Blitter::blitReactor(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options &options)
1390 {
1391 ASSERT(!options.clearOperation || ((source->getWidth() == 1) && (source->getHeight() == 1) && (source->getDepth() == 1)));
1392
1393 Rect dRect = destRect;
1394 RectF sRect = sourceRect;
1395 if(destRect.x0 > destRect.x1)
1396 {
1397 swap(dRect.x0, dRect.x1);
1398 swap(sRect.x0, sRect.x1);
1399 }
1400 if(destRect.y0 > destRect.y1)
1401 {
1402 swap(dRect.y0, dRect.y1);
1403 swap(sRect.y0, sRect.y1);
1404 }
1405
1406 State state(options);
1407 state.clampToEdge = (sourceRect.x0 < 0.0f) ||
1408 (sourceRect.y0 < 0.0f) ||
1409 (sourceRect.x1 > (float)source->getWidth()) ||
1410 (sourceRect.y1 > (float)source->getHeight());
1411
1412 bool useSourceInternal = !source->isExternalDirty();
1413 bool useDestInternal = !dest->isExternalDirty();
1414 bool isStencil = options.useStencil;
1415
1416 state.sourceFormat = isStencil ? source->getStencilFormat() : source->getFormat(useSourceInternal);
1417 state.destFormat = isStencil ? dest->getStencilFormat() : dest->getFormat(useDestInternal);
1418 state.destSamples = dest->getSamples();
1419
1420 criticalSection.lock();
1421 Routine *blitRoutine = blitCache->query(state);
1422
1423 if(!blitRoutine)
1424 {
1425 blitRoutine = generate(state);
1426
1427 if(!blitRoutine)
1428 {
1429 criticalSection.unlock();
1430 return false;
1431 }
1432
1433 blitCache->add(state, blitRoutine);
1434 }
1435
1436 criticalSection.unlock();
1437
1438 void (*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry();
1439
1440 BlitData data;
1441
1442 bool isRGBA = options.writeMask == 0xF;
1443 bool isEntireDest = dest->isEntire(destRect);
1444
1445 data.source = isStencil ? source->lockStencil(0, 0, 0, sw::PUBLIC) :
1446 source->lock(0, 0, sourceRect.slice, sw::LOCK_READONLY, sw::PUBLIC, useSourceInternal);
1447 data.dest = isStencil ? dest->lockStencil(0, 0, 0, sw::PUBLIC) :
1448 dest->lock(0, 0, destRect.slice, isRGBA ? (isEntireDest ? sw::LOCK_DISCARD : sw::LOCK_WRITEONLY) : sw::LOCK_READWRITE, sw::PUBLIC, useDestInternal);
1449 data.sPitchB = isStencil ? source->getStencilPitchB() : source->getPitchB(useSourceInternal);
1450 data.dPitchB = isStencil ? dest->getStencilPitchB() : dest->getPitchB(useDestInternal);
1451 data.dSliceB = isStencil ? dest->getStencilSliceB() : dest->getSliceB(useDestInternal);
1452
1453 data.w = sRect.width() / dRect.width();
1454 data.h = sRect.height() / dRect.height();
1455 data.x0 = sRect.x0 + (0.5f - dRect.x0) * data.w;
1456 data.y0 = sRect.y0 + (0.5f - dRect.y0) * data.h;
1457
1458 data.x0d = dRect.x0;
1459 data.x1d = dRect.x1;
1460 data.y0d = dRect.y0;
1461 data.y1d = dRect.y1;
1462
1463 data.sWidth = source->getWidth();
1464 data.sHeight = source->getHeight();
1465
1466 blitFunction(&data);
1467
1468 if(isStencil)
1469 {
1470 source->unlockStencil();
1471 dest->unlockStencil();
1472 }
1473 else
1474 {
1475 source->unlock(useSourceInternal);
1476 dest->unlock(useDestInternal);
1477 }
1478
1479 return true;
1480 }
1481}