blob: 0b4bdab0c12e33cbb73cca154728074b5e4cb8a0 [file] [log] [blame]
Nicolas Capens68a82382018-10-02 13:16:55 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Blitter.hpp"
16
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050017#include "Pipeline/ShaderCore.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040018#include "Reactor/Reactor.hpp"
Nicolas Capens02cbe8e2019-08-05 15:10:05 -040019#include "System/Half.hpp"
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050020#include "System/Memory.hpp"
Chris Forbesebe5f7f2019-01-16 10:38:34 -080021#include "Vulkan/VkDebug.hpp"
Alexis Hetu33642272019-03-01 11:55:59 -050022#include "Vulkan/VkImage.hpp"
Chris Forbes529eda32019-05-08 10:27:05 -070023#include "Vulkan/VkBuffer.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040024
Nicolas Capensb8c63932019-03-19 01:52:40 -040025#include <utility>
26
Nicolas Capens157ba262019-12-10 17:49:14 -050027namespace sw {
28
29Blitter::Blitter() :
30 blitMutex(),
31 blitCache(1024),
32 cornerUpdateMutex(),
33 cornerUpdateCache(64) // We only need one of these per format
Nicolas Capens68a82382018-10-02 13:16:55 -040034{
Nicolas Capens157ba262019-12-10 17:49:14 -050035}
36
37Blitter::~Blitter()
38{
39}
40
41void Blitter::clear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format& viewFormat, const VkImageSubresourceRange& subresourceRange, const VkRect2D* renderArea)
42{
43 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
44 vk::Format dstFormat = viewFormat.getAspectFormat(aspect);
45 if(dstFormat == VK_FORMAT_UNDEFINED)
Nicolas Capens68a82382018-10-02 13:16:55 -040046 {
Nicolas Capens157ba262019-12-10 17:49:14 -050047 return;
Nicolas Capens68a82382018-10-02 13:16:55 -040048 }
49
Nicolas Capens157ba262019-12-10 17:49:14 -050050 float *pPixel = static_cast<float *>(pixel);
51 if (viewFormat.isUnsignedNormalized())
Nicolas Capens68a82382018-10-02 13:16:55 -040052 {
Nicolas Capens157ba262019-12-10 17:49:14 -050053 pPixel[0] = sw::clamp(pPixel[0], 0.0f, 1.0f);
54 pPixel[1] = sw::clamp(pPixel[1], 0.0f, 1.0f);
55 pPixel[2] = sw::clamp(pPixel[2], 0.0f, 1.0f);
56 pPixel[3] = sw::clamp(pPixel[3], 0.0f, 1.0f);
57 }
58 else if (viewFormat.isSignedNormalized())
59 {
60 pPixel[0] = sw::clamp(pPixel[0], -1.0f, 1.0f);
61 pPixel[1] = sw::clamp(pPixel[1], -1.0f, 1.0f);
62 pPixel[2] = sw::clamp(pPixel[2], -1.0f, 1.0f);
63 pPixel[3] = sw::clamp(pPixel[3], -1.0f, 1.0f);
Nicolas Capens68a82382018-10-02 13:16:55 -040064 }
65
Nicolas Capens157ba262019-12-10 17:49:14 -050066 if(fastClear(pixel, format, dest, dstFormat, subresourceRange, renderArea))
Alexis Hetu33642272019-03-01 11:55:59 -050067 {
Nicolas Capens157ba262019-12-10 17:49:14 -050068 return;
69 }
70
71 State state(format, dstFormat, 1, dest->getSampleCountFlagBits(), Options{ 0xF });
72 auto blitRoutine = getBlitRoutine(state);
73 if(!blitRoutine)
74 {
75 return;
76 }
77
78 VkImageSubresourceLayers subresLayers =
79 {
80 subresourceRange.aspectMask,
81 subresourceRange.baseMipLevel,
82 subresourceRange.baseArrayLayer,
83 1
84 };
85
86 uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange);
87 uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange);
88
89 VkRect2D area = { { 0, 0 }, { 0, 0 } };
90 if(renderArea)
91 {
92 ASSERT(subresourceRange.levelCount == 1);
93 area = *renderArea;
94 }
95
96 for(; subresLayers.mipLevel <= lastMipLevel; subresLayers.mipLevel++)
97 {
98 VkExtent3D extent = dest->getMipLevelExtent(aspect, subresLayers.mipLevel);
99 if(!renderArea)
Alexis Hetu33642272019-03-01 11:55:59 -0500100 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500101 area.extent.width = extent.width;
102 area.extent.height = extent.height;
Alexis Hetu33642272019-03-01 11:55:59 -0500103 }
104
Nicolas Capens157ba262019-12-10 17:49:14 -0500105 BlitData data =
Chris Forbes88289192019-08-28 16:49:36 -0700106 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500107 pixel, nullptr, // source, dest
Chris Forbes88289192019-08-28 16:49:36 -0700108
Nicolas Capens157ba262019-12-10 17:49:14 -0500109 format.bytes(), // sPitchB
110 dest->rowPitchBytes(aspect, subresLayers.mipLevel), // dPitchB
111 0, // sSliceB (unused in clear operations)
112 dest->slicePitchBytes(aspect, subresLayers.mipLevel), // dSliceB
Alexis Hetu33642272019-03-01 11:55:59 -0500113
Nicolas Capens157ba262019-12-10 17:49:14 -0500114 0.5f, 0.5f, 0.0f, 0.0f, // x0, y0, w, h
Alexis Hetu33642272019-03-01 11:55:59 -0500115
Nicolas Capens157ba262019-12-10 17:49:14 -0500116 area.offset.y, static_cast<int>(area.offset.y + area.extent.height), // y0d, y1d
117 area.offset.x, static_cast<int>(area.offset.x + area.extent.width), // x0d, x1d
118
119 0, 0, // sWidth, sHeight
Alexis Hetu33642272019-03-01 11:55:59 -0500120 };
121
Nicolas Capens157ba262019-12-10 17:49:14 -0500122 if (renderArea && dest->is3DSlice())
Alexis Hetu33642272019-03-01 11:55:59 -0500123 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500124 // Reinterpret layers as depth slices
125 subresLayers.baseArrayLayer = 0;
126 subresLayers.layerCount = 1;
127 for (uint32_t depth = subresourceRange.baseArrayLayer; depth <= lastLayer; depth++)
Alexis Hetu33642272019-03-01 11:55:59 -0500128 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500129 data.dest = dest->getTexelPointer({0, 0, static_cast<int32_t>(depth)}, subresLayers);
130 blitRoutine(&data);
Nicolas Capens68a82382018-10-02 13:16:55 -0400131 }
132 }
Nicolas Capens88ac3672019-08-01 13:22:34 -0400133 else
Nicolas Capens68a82382018-10-02 13:16:55 -0400134 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500135 for(subresLayers.baseArrayLayer = subresourceRange.baseArrayLayer; subresLayers.baseArrayLayer <= lastLayer; subresLayers.baseArrayLayer++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400136 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500137 for(uint32_t depth = 0; depth < extent.depth; depth++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400138 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500139 data.dest = dest->getTexelPointer({ 0, 0, static_cast<int32_t>(depth) }, subresLayers);
140
141 blitRoutine(&data);
142 }
143 }
144 }
145 }
146}
147
148bool Blitter::fastClear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format& viewFormat, const VkImageSubresourceRange& subresourceRange, const VkRect2D* renderArea)
149{
150 if(format != VK_FORMAT_R32G32B32A32_SFLOAT)
151 {
152 return false;
153 }
154
155 float *color = (float*)pixel;
156 float r = color[0];
157 float g = color[1];
158 float b = color[2];
159 float a = color[3];
160
161 uint32_t packed;
162
163 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
164 switch(viewFormat)
165 {
166 case VK_FORMAT_R5G6B5_UNORM_PACK16:
167 packed = ((uint16_t)(31 * b + 0.5f) << 0) |
168 ((uint16_t)(63 * g + 0.5f) << 5) |
169 ((uint16_t)(31 * r + 0.5f) << 11);
170 break;
171 case VK_FORMAT_B5G6R5_UNORM_PACK16:
172 packed = ((uint16_t)(31 * r + 0.5f) << 0) |
173 ((uint16_t)(63 * g + 0.5f) << 5) |
174 ((uint16_t)(31 * b + 0.5f) << 11);
175 break;
176 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
177 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
178 case VK_FORMAT_R8G8B8A8_UNORM:
179 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
180 ((uint32_t)(255 * b + 0.5f) << 16) |
181 ((uint32_t)(255 * g + 0.5f) << 8) |
182 ((uint32_t)(255 * r + 0.5f) << 0);
183 break;
184 case VK_FORMAT_B8G8R8A8_UNORM:
185 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
186 ((uint32_t)(255 * r + 0.5f) << 16) |
187 ((uint32_t)(255 * g + 0.5f) << 8) |
188 ((uint32_t)(255 * b + 0.5f) << 0);
189 break;
190 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
191 packed = R11G11B10F(color);
192 break;
193 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
194 packed = RGB9E5(color);
195 break;
196 default:
197 return false;
198 }
199
200 VkImageSubresourceLayers subresLayers =
201 {
202 subresourceRange.aspectMask,
203 subresourceRange.baseMipLevel,
204 subresourceRange.baseArrayLayer,
205 1
206 };
207 uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange);
208 uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange);
209
210 VkRect2D area = { { 0, 0 }, { 0, 0 } };
211 if(renderArea)
212 {
213 ASSERT(subresourceRange.levelCount == 1);
214 area = *renderArea;
215 }
216
217 for(; subresLayers.mipLevel <= lastMipLevel; subresLayers.mipLevel++)
218 {
219 int rowPitchBytes = dest->rowPitchBytes(aspect, subresLayers.mipLevel);
220 int slicePitchBytes = dest->slicePitchBytes(aspect, subresLayers.mipLevel);
221 VkExtent3D extent = dest->getMipLevelExtent(aspect, subresLayers.mipLevel);
222 if(!renderArea)
223 {
224 area.extent.width = extent.width;
225 area.extent.height = extent.height;
226 }
227 if(dest->is3DSlice())
228 {
229 extent.depth = 1; // The 3D image is instead interpreted as a 2D image with layers
230 }
231
232 for(subresLayers.baseArrayLayer = subresourceRange.baseArrayLayer; subresLayers.baseArrayLayer <= lastLayer; subresLayers.baseArrayLayer++)
233 {
234 for(uint32_t depth = 0; depth < extent.depth; depth++)
235 {
236 uint8_t *slice = (uint8_t*)dest->getTexelPointer(
237 { area.offset.x, area.offset.y, static_cast<int32_t>(depth) }, subresLayers);
238
239 for(int j = 0; j < dest->getSampleCountFlagBits(); j++)
240 {
241 uint8_t *d = slice;
242
243 switch(viewFormat.bytes())
244 {
245 case 2:
246 for(uint32_t i = 0; i < area.extent.height; i++)
247 {
248 ASSERT(d < dest->end());
249 sw::clear((uint16_t*)d, static_cast<uint16_t>(packed), area.extent.width);
250 d += rowPitchBytes;
251 }
252 break;
253 case 4:
254 for(uint32_t i = 0; i < area.extent.height; i++)
255 {
256 ASSERT(d < dest->end());
257 sw::clear((uint32_t*)d, packed, area.extent.width);
258 d += rowPitchBytes;
259 }
260 break;
261 default:
262 assert(false);
263 }
264
265 slice += slicePitchBytes;
266 }
267 }
268 }
269 }
270
271 return true;
272}
273
274Float4 Blitter::readFloat4(Pointer<Byte> element, const State &state)
275{
276 Float4 c(0.0f, 0.0f, 0.0f, 1.0f);
277
278 switch(state.sourceFormat)
279 {
280 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
281 c.w = Float(Int(*Pointer<Byte>(element)) & Int(0xF));
282 c.x = Float((Int(*Pointer<Byte>(element)) >> 4) & Int(0xF));
283 c.y = Float(Int(*Pointer<Byte>(element + 1)) & Int(0xF));
284 c.z = Float((Int(*Pointer<Byte>(element + 1)) >> 4) & Int(0xF));
285 break;
286 case VK_FORMAT_R8_SINT:
287 case VK_FORMAT_R8_SNORM:
288 c.x = Float(Int(*Pointer<SByte>(element)));
289 c.w = float(0x7F);
290 break;
291 case VK_FORMAT_R8_UNORM:
292 case VK_FORMAT_R8_UINT:
293 case VK_FORMAT_R8_SRGB:
294 c.x = Float(Int(*Pointer<Byte>(element)));
295 c.w = float(0xFF);
296 break;
297 case VK_FORMAT_R16_SINT:
298 case VK_FORMAT_R16_SNORM:
299 c.x = Float(Int(*Pointer<Short>(element)));
300 c.w = float(0x7FFF);
301 break;
302 case VK_FORMAT_R16_UNORM:
303 case VK_FORMAT_R16_UINT:
304 c.x = Float(Int(*Pointer<UShort>(element)));
305 c.w = float(0xFFFF);
306 break;
307 case VK_FORMAT_R32_SINT:
308 c.x = Float(*Pointer<Int>(element));
309 c.w = float(0x7FFFFFFF);
310 break;
311 case VK_FORMAT_R32_UINT:
312 c.x = Float(*Pointer<UInt>(element));
313 c.w = float(0xFFFFFFFF);
314 break;
315 case VK_FORMAT_B8G8R8A8_SRGB:
316 case VK_FORMAT_B8G8R8A8_UNORM:
317 c = Float4(*Pointer<Byte4>(element)).zyxw;
318 break;
319 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
320 case VK_FORMAT_R8G8B8A8_SINT:
321 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
322 case VK_FORMAT_R8G8B8A8_SNORM:
323 c = Float4(*Pointer<SByte4>(element));
324 break;
325 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
326 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
327 case VK_FORMAT_R8G8B8A8_UNORM:
328 case VK_FORMAT_R8G8B8A8_UINT:
329 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
330 case VK_FORMAT_R8G8B8A8_SRGB:
331 c = Float4(*Pointer<Byte4>(element));
332 break;
333 case VK_FORMAT_R16G16B16A16_SINT:
334 c = Float4(*Pointer<Short4>(element));
335 break;
336 case VK_FORMAT_R16G16B16A16_UNORM:
337 case VK_FORMAT_R16G16B16A16_UINT:
338 c = Float4(*Pointer<UShort4>(element));
339 break;
340 case VK_FORMAT_R32G32B32A32_SINT:
341 c = Float4(*Pointer<Int4>(element));
342 break;
343 case VK_FORMAT_R32G32B32A32_UINT:
344 c = Float4(*Pointer<UInt4>(element));
345 break;
346 case VK_FORMAT_R8G8_SINT:
347 case VK_FORMAT_R8G8_SNORM:
348 c.x = Float(Int(*Pointer<SByte>(element + 0)));
349 c.y = Float(Int(*Pointer<SByte>(element + 1)));
350 c.w = float(0x7F);
351 break;
352 case VK_FORMAT_R8G8_UNORM:
353 case VK_FORMAT_R8G8_UINT:
354 case VK_FORMAT_R8G8_SRGB:
355 c.x = Float(Int(*Pointer<Byte>(element + 0)));
356 c.y = Float(Int(*Pointer<Byte>(element + 1)));
357 c.w = float(0xFF);
358 break;
359 case VK_FORMAT_R16G16_SINT:
360 case VK_FORMAT_R16G16_SNORM:
361 c.x = Float(Int(*Pointer<Short>(element + 0)));
362 c.y = Float(Int(*Pointer<Short>(element + 2)));
363 c.w = float(0x7FFF);
364 break;
365 case VK_FORMAT_R16G16_UNORM:
366 case VK_FORMAT_R16G16_UINT:
367 c.x = Float(Int(*Pointer<UShort>(element + 0)));
368 c.y = Float(Int(*Pointer<UShort>(element + 2)));
369 c.w = float(0xFFFF);
370 break;
371 case VK_FORMAT_R32G32_SINT:
372 c.x = Float(*Pointer<Int>(element + 0));
373 c.y = Float(*Pointer<Int>(element + 4));
374 c.w = float(0x7FFFFFFF);
375 break;
376 case VK_FORMAT_R32G32_UINT:
377 c.x = Float(*Pointer<UInt>(element + 0));
378 c.y = Float(*Pointer<UInt>(element + 4));
379 c.w = float(0xFFFFFFFF);
380 break;
381 case VK_FORMAT_R32G32B32A32_SFLOAT:
382 c = *Pointer<Float4>(element);
383 break;
384 case VK_FORMAT_R32G32_SFLOAT:
385 c.x = *Pointer<Float>(element + 0);
386 c.y = *Pointer<Float>(element + 4);
387 break;
388 case VK_FORMAT_R32_SFLOAT:
389 c.x = *Pointer<Float>(element);
390 break;
391 case VK_FORMAT_R16G16B16A16_SFLOAT:
392 c.w = Float(*Pointer<Half>(element + 6));
393 case VK_FORMAT_R16G16B16_SFLOAT:
394 c.z = Float(*Pointer<Half>(element + 4));
395 case VK_FORMAT_R16G16_SFLOAT:
396 c.y = Float(*Pointer<Half>(element + 2));
397 case VK_FORMAT_R16_SFLOAT:
398 c.x = Float(*Pointer<Half>(element));
399 break;
400 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
401 // 10 (or 11) bit float formats are unsigned formats with a 5 bit exponent and a 5 (or 6) bit mantissa.
402 // Since the Half float format also has a 5 bit exponent, we can convert these formats to half by
403 // copy/pasting the bits so the the exponent bits and top mantissa bits are aligned to the half format.
404 // In this case, we have:
405 // B B B B B B B B B B G G G G G G G G G G G R R R R R R R R R R R
406 // 1st Short: |xxxxxxxxxx---------------------|
407 // 2nd Short: |xxxx---------------------xxxxxx|
408 // 3rd Short: |--------------------xxxxxxxxxxxx|
409 // These memory reads overlap, but each of them contains an entire channel, so we can read this without
410 // any int -> short conversion.
411 c.x = Float(As<Half>((*Pointer<UShort>(element + 0) & UShort(0x07FF)) << UShort(4)));
412 c.y = Float(As<Half>((*Pointer<UShort>(element + 1) & UShort(0x3FF8)) << UShort(1)));
413 c.z = Float(As<Half>((*Pointer<UShort>(element + 2) & UShort(0xFFC0)) >> UShort(1)));
414 break;
415 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
416 // This type contains a common 5 bit exponent (E) and a 9 bit the mantissa for R, G and B.
417 c.x = Float(*Pointer<UInt>(element) & UInt(0x000001FF)); // R's mantissa (bits 0-8)
418 c.y = Float((*Pointer<UInt>(element) & UInt(0x0003FE00)) >> 9); // G's mantissa (bits 9-17)
419 c.z = Float((*Pointer<UInt>(element) & UInt(0x07FC0000)) >> 18); // B's mantissa (bits 18-26)
420 c *= Float4(
421 // 2^E, using the exponent (bits 27-31) and treating it as an unsigned integer value
422 Float(UInt(1) << ((*Pointer<UInt>(element) & UInt(0xF8000000)) >> 27)) *
423 // Since the 9 bit mantissa values currently stored in RGB were converted straight
424 // from int to float (in the [0, 1<<9] range instead of the [0, 1] range), they
425 // are (1 << 9) times too high.
426 // Also, the exponent has 5 bits and we compute the exponent bias of floating point
427 // formats using "2^(k-1) - 1", so, in this case, the exponent bias is 2^(5-1)-1 = 15
428 // Exponent bias (15) + number of mantissa bits per component (9) = 24
429 Float(1.0f / (1 << 24)));
430 c.w = 1.0f;
431 break;
432 case VK_FORMAT_R5G6B5_UNORM_PACK16:
433 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
434 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
435 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
436 break;
437 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
438 c.w = Float(Int((*Pointer<UShort>(element) & UShort(0x8000)) >> UShort(15)));
439 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0x7C00)) >> UShort(10)));
440 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x03E0)) >> UShort(5)));
441 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
442 break;
443 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
444 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
445 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
446 c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
447 c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
448 c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
449 break;
450 case VK_FORMAT_D16_UNORM:
451 c.x = Float(Int((*Pointer<UShort>(element))));
452 break;
453 case VK_FORMAT_X8_D24_UNORM_PACK32:
454 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0xFFFFFF00)) >> 8));
455 break;
456 case VK_FORMAT_D32_SFLOAT:
457 c.x = *Pointer<Float>(element);
458 break;
459 case VK_FORMAT_S8_UINT:
460 c.x = Float(Int(*Pointer<Byte>(element)));
461 break;
462 default:
463 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
464 }
465
466 return c;
467}
468
469void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
470{
471 bool writeR = state.writeRed;
472 bool writeG = state.writeGreen;
473 bool writeB = state.writeBlue;
474 bool writeA = state.writeAlpha;
475 bool writeRGBA = writeR && writeG && writeB && writeA;
476
477 switch(state.destFormat)
478 {
479 case VK_FORMAT_R4G4_UNORM_PACK8:
480 if(writeR | writeG)
481 {
482 if(!writeR)
483 {
484 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
485 (*Pointer<Byte>(element) & Byte(0xF0));
486 }
487 else if(!writeG)
488 {
489 *Pointer<Byte>(element) = (*Pointer<Byte>(element) & Byte(0xF)) |
490 (Byte(RoundInt(Float(c.x))) << Byte(4));
491 }
492 else
493 {
494 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
495 (Byte(RoundInt(Float(c.x))) << Byte(4));
496 }
497 }
498 break;
499 case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
500 if(writeR || writeG || writeB || writeA)
501 {
502 *Pointer<UShort>(element) = (writeR ? ((UShort(RoundInt(Float(c.x))) & UShort(0xF)) << UShort(12)) :
503 (*Pointer<UShort>(element) & UShort(0x000F))) |
504 (writeG ? ((UShort(RoundInt(Float(c.y))) & UShort(0xF)) << UShort(8)) :
505 (*Pointer<UShort>(element) & UShort(0x00F0))) |
506 (writeB ? ((UShort(RoundInt(Float(c.z))) & UShort(0xF)) << UShort(4)) :
507 (*Pointer<UShort>(element) & UShort(0x0F00))) |
508 (writeA ? (UShort(RoundInt(Float(c.w))) & UShort(0xF)) :
509 (*Pointer<UShort>(element) & UShort(0xF000)));
510 }
511 break;
512 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
513 if(writeRGBA)
514 {
515 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) & Int(0xF)) |
516 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
517 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
518 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12);
519 }
520 else
521 {
522 unsigned short mask = (writeA ? 0x000F : 0x0000) |
523 (writeR ? 0x00F0 : 0x0000) |
524 (writeG ? 0x0F00 : 0x0000) |
525 (writeB ? 0xF000 : 0x0000);
526 unsigned short unmask = ~mask;
527 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
528 ((UShort(RoundInt(Float(c.w)) & Int(0xF)) |
529 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
530 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
531 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12)) & UShort(mask));
532 }
533 break;
534 case VK_FORMAT_B8G8R8A8_SRGB:
535 case VK_FORMAT_B8G8R8A8_UNORM:
536 if(writeRGBA)
537 {
538 Short4 c0 = RoundShort4(c.zyxw);
539 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
540 }
541 else
542 {
543 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
544 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
545 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
546 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
547 }
548 break;
549 case VK_FORMAT_B8G8R8_SNORM:
550 if(writeB) { *Pointer<SByte>(element + 0) = SByte(RoundInt(Float(c.z))); }
551 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
552 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
553 break;
554 case VK_FORMAT_B8G8R8_UNORM:
555 case VK_FORMAT_B8G8R8_SRGB:
556 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
557 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
558 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
559 break;
560 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
561 case VK_FORMAT_R8G8B8A8_UNORM:
562 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
563 case VK_FORMAT_R8G8B8A8_SRGB:
564 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
565 case VK_FORMAT_R8G8B8A8_UINT:
566 case VK_FORMAT_R8G8B8A8_USCALED:
567 case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
568 if(writeRGBA)
569 {
570 Short4 c0 = RoundShort4(c);
571 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
572 }
573 else
574 {
575 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
576 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
577 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
578 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
579 }
580 break;
581 case VK_FORMAT_R32G32B32A32_SFLOAT:
582 if(writeRGBA)
583 {
584 *Pointer<Float4>(element) = c;
585 }
586 else
587 {
588 if(writeR) { *Pointer<Float>(element) = c.x; }
589 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
590 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
591 if(writeA) { *Pointer<Float>(element + 12) = c.w; }
592 }
593 break;
594 case VK_FORMAT_R32G32B32_SFLOAT:
595 if(writeR) { *Pointer<Float>(element) = c.x; }
596 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
597 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
598 break;
599 case VK_FORMAT_R32G32_SFLOAT:
600 if(writeR && writeG)
601 {
602 *Pointer<Float2>(element) = Float2(c);
603 }
604 else
605 {
606 if(writeR) { *Pointer<Float>(element) = c.x; }
607 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
608 }
609 break;
610 case VK_FORMAT_R32_SFLOAT:
611 if(writeR) { *Pointer<Float>(element) = c.x; }
612 break;
613 case VK_FORMAT_R16G16B16A16_SFLOAT:
614 if(writeA) { *Pointer<Half>(element + 6) = Half(c.w); }
615 case VK_FORMAT_R16G16B16_SFLOAT:
616 if(writeB) { *Pointer<Half>(element + 4) = Half(c.z); }
617 case VK_FORMAT_R16G16_SFLOAT:
618 if(writeG) { *Pointer<Half>(element + 2) = Half(c.y); }
619 case VK_FORMAT_R16_SFLOAT:
620 if(writeR) { *Pointer<Half>(element) = Half(c.x); }
621 break;
622 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
623 {
624 // 10 (or 11) bit float formats are unsigned formats with a 5 bit exponent and a 5 (or 6) bit mantissa.
625 // Since the 16-bit half-precision float format also has a 5 bit exponent, we can extract these minifloats from them.
626
627 // FIXME(b/138944025): Handle negative values, Inf, and NaN.
628 // FIXME(b/138944025): Perform rounding before truncating the mantissa.
629 UInt r = (UInt(As<UShort>(Half(c.x))) & 0x00007FF0) >> 4;
630 UInt g = (UInt(As<UShort>(Half(c.y))) & 0x00007FF0) << 7;
631 UInt b = (UInt(As<UShort>(Half(c.z))) & 0x00007FE0) << 17;
632
633 UInt rgb = r | g | b;
634
635 UInt old = *Pointer<UInt>(element);
636
637 unsigned int mask = (writeR ? 0x000007FF : 0) |
638 (writeG ? 0x003FF800 : 0) |
639 (writeB ? 0xFFC00000 : 0);
640
641 *Pointer<UInt>(element) = (rgb & mask) | (old & ~mask);
642 }
643 break;
644 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
645 {
646 ASSERT(writeRGBA); // Can't sensibly write just part of this format.
647
648 // Vulkan 1.1.117 section 15.2.1 RGB to Shared Exponent Conversion
649
650 constexpr int N = 9; // number of mantissa bits per component
651 constexpr int B = 15; // exponent bias
652 constexpr int E_max = 31; // maximum possible biased exponent value
653
654 // Maximum representable value.
655 constexpr float sharedexp_max = ((static_cast<float>(1 << N) - 1) / static_cast<float>(1 << N)) * static_cast<float>(1 << (E_max - B));
656
657 // Clamp components to valid range. NaN becomes 0.
658 Float red_c = Min(IfThenElse(!(c.x > 0), Float(0), Float(c.x)), sharedexp_max);
659 Float green_c = Min(IfThenElse(!(c.y > 0), Float(0), Float(c.y)), sharedexp_max);
660 Float blue_c = Min(IfThenElse(!(c.z > 0), Float(0), Float(c.z)), sharedexp_max);
661
662 // We're reducing the mantissa to 9 bits, so we must round up if the next
663 // bit is 1. In other words add 0.5 to the new mantissa's position and
664 // allow overflow into the exponent so we can scale correctly.
665 constexpr int half = 1 << (23 - N);
666 Float red_r = As<Float>(As<Int>(red_c) + half);
667 Float green_r = As<Float>(As<Int>(green_c) + half);
668 Float blue_r = As<Float>(As<Int>(blue_c) + half);
669
670 // The largest component determines the shared exponent. It can't be lower
671 // than 0 (after bias subtraction) so also limit to the mimimum representable.
672 constexpr float min_s = 0.5f / (1 << B);
673 Float max_s = Max(Max(red_r, green_r), Max(blue_r, min_s));
674
675 // Obtain the reciprocal of the shared exponent by inverting the bits,
676 // and scale by the new mantissa's size. Note that the IEEE-754 single-precision
677 // format has an implicit leading 1, but this shared component format does not.
678 Float scale = As<Float>((As<Int>(max_s) & 0x7F800000) ^ 0x7F800000) * (1 << (N - 2));
679
680 UInt R9 = RoundInt(red_c * scale);
681 UInt G9 = UInt(RoundInt(green_c * scale));
682 UInt B9 = UInt(RoundInt(blue_c * scale));
683 UInt E5 = (As<UInt>(max_s) >> 23) - 127 + 15 + 1;
684
685 UInt E5B9G9R9 = (E5 << 27) | (B9 << 18) | (G9 << 9) | R9;
686
687 *Pointer<UInt>(element) = E5B9G9R9;
688 }
689 break;
690 case VK_FORMAT_B8G8R8A8_SNORM:
691 if(writeB) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.z))); }
692 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
693 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
694 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
695 break;
696 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
697 case VK_FORMAT_R8G8B8A8_SINT:
698 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
699 case VK_FORMAT_R8G8B8A8_SNORM:
700 case VK_FORMAT_R8G8B8A8_SSCALED:
701 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
702 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
703 case VK_FORMAT_R8G8B8_SINT:
704 case VK_FORMAT_R8G8B8_SNORM:
705 case VK_FORMAT_R8G8B8_SSCALED:
706 if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
707 case VK_FORMAT_R8G8_SINT:
708 case VK_FORMAT_R8G8_SNORM:
709 case VK_FORMAT_R8G8_SSCALED:
710 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
711 case VK_FORMAT_R8_SINT:
712 case VK_FORMAT_R8_SNORM:
713 case VK_FORMAT_R8_SSCALED:
714 if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
715 break;
716 case VK_FORMAT_R8G8B8_UINT:
717 case VK_FORMAT_R8G8B8_UNORM:
718 case VK_FORMAT_R8G8B8_USCALED:
719 case VK_FORMAT_R8G8B8_SRGB:
720 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
721 case VK_FORMAT_R8G8_UINT:
722 case VK_FORMAT_R8G8_UNORM:
723 case VK_FORMAT_R8G8_USCALED:
724 case VK_FORMAT_R8G8_SRGB:
725 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
726 case VK_FORMAT_R8_UINT:
727 case VK_FORMAT_R8_UNORM:
728 case VK_FORMAT_R8_USCALED:
729 case VK_FORMAT_R8_SRGB:
730 if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
731 break;
732 case VK_FORMAT_R16G16B16A16_SINT:
733 case VK_FORMAT_R16G16B16A16_SNORM:
734 case VK_FORMAT_R16G16B16A16_SSCALED:
735 if(writeRGBA)
736 {
737 *Pointer<Short4>(element) = Short4(RoundInt(c));
738 }
739 else
740 {
741 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
742 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
743 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
744 if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
745 }
746 break;
747 case VK_FORMAT_R16G16B16_SINT:
748 case VK_FORMAT_R16G16B16_SNORM:
749 case VK_FORMAT_R16G16B16_SSCALED:
750 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
751 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
752 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
753 break;
754 case VK_FORMAT_R16G16_SINT:
755 case VK_FORMAT_R16G16_SNORM:
756 case VK_FORMAT_R16G16_SSCALED:
757 if(writeR && writeG)
758 {
759 *Pointer<Short2>(element) = Short2(Short4(RoundInt(c)));
760 }
761 else
762 {
763 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
764 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
765 }
766 break;
767 case VK_FORMAT_R16_SINT:
768 case VK_FORMAT_R16_SNORM:
769 case VK_FORMAT_R16_SSCALED:
770 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
771 break;
772 case VK_FORMAT_R16G16B16A16_UINT:
773 case VK_FORMAT_R16G16B16A16_UNORM:
774 case VK_FORMAT_R16G16B16A16_USCALED:
775 if(writeRGBA)
776 {
777 *Pointer<UShort4>(element) = UShort4(RoundInt(c));
778 }
779 else
780 {
781 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
782 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
783 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
784 if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
785 }
786 break;
787 case VK_FORMAT_R16G16B16_UINT:
788 case VK_FORMAT_R16G16B16_UNORM:
789 case VK_FORMAT_R16G16B16_USCALED:
790 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
791 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
792 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
793 break;
794 case VK_FORMAT_R16G16_UINT:
795 case VK_FORMAT_R16G16_UNORM:
796 case VK_FORMAT_R16G16_USCALED:
797 if(writeR && writeG)
798 {
799 *Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c)));
800 }
801 else
802 {
803 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
804 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
805 }
806 break;
807 case VK_FORMAT_R16_UINT:
808 case VK_FORMAT_R16_UNORM:
809 case VK_FORMAT_R16_USCALED:
810 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
811 break;
812 case VK_FORMAT_R32G32B32A32_SINT:
813 if(writeRGBA)
814 {
815 *Pointer<Int4>(element) = RoundInt(c);
816 }
817 else
818 {
819 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
820 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
821 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
822 if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
823 }
824 break;
825 case VK_FORMAT_R32G32B32_SINT:
826 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
827 case VK_FORMAT_R32G32_SINT:
828 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
829 case VK_FORMAT_R32_SINT:
830 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
831 break;
832 case VK_FORMAT_R32G32B32A32_UINT:
833 if(writeRGBA)
834 {
835 *Pointer<UInt4>(element) = UInt4(RoundInt(c));
836 }
837 else
838 {
839 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
840 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
841 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
842 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
843 }
844 break;
845 case VK_FORMAT_R32G32B32_UINT:
846 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
847 case VK_FORMAT_R32G32_UINT:
848 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
849 case VK_FORMAT_R32_UINT:
850 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
851 break;
852 case VK_FORMAT_R5G6B5_UNORM_PACK16:
853 if(writeR && writeG && writeB)
854 {
855 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
856 (RoundInt(Float(c.y)) << Int(5)) |
857 (RoundInt(Float(c.x)) << Int(11)));
858 }
859 else
860 {
861 unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
862 unsigned short unmask = ~mask;
863 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
864 (UShort(RoundInt(Float(c.z)) |
865 (RoundInt(Float(c.y)) << Int(5)) |
866 (RoundInt(Float(c.x)) << Int(11))) & UShort(mask));
867 }
868 break;
869 case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
870 if(writeRGBA)
871 {
872 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) |
873 (RoundInt(Float(c.z)) << Int(1)) |
874 (RoundInt(Float(c.y)) << Int(6)) |
875 (RoundInt(Float(c.x)) << Int(11)));
876 }
877 else
878 {
879 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
880 (writeR ? 0x7C00 : 0x0000) |
881 (writeG ? 0x03E0 : 0x0000) |
882 (writeB ? 0x001F : 0x0000);
883 unsigned short unmask = ~mask;
884 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
885 (UShort(RoundInt(Float(c.w)) |
886 (RoundInt(Float(c.z)) << Int(1)) |
887 (RoundInt(Float(c.y)) << Int(6)) |
888 (RoundInt(Float(c.x)) << Int(11))) & UShort(mask));
889 }
890 break;
891 case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
892 if(writeRGBA)
893 {
894 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) |
895 (RoundInt(Float(c.x)) << Int(1)) |
896 (RoundInt(Float(c.y)) << Int(6)) |
897 (RoundInt(Float(c.z)) << Int(11)));
898 }
899 else
900 {
901 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
902 (writeR ? 0x7C00 : 0x0000) |
903 (writeG ? 0x03E0 : 0x0000) |
904 (writeB ? 0x001F : 0x0000);
905 unsigned short unmask = ~mask;
906 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
907 (UShort(RoundInt(Float(c.w)) |
908 (RoundInt(Float(c.x)) << Int(1)) |
909 (RoundInt(Float(c.y)) << Int(6)) |
910 (RoundInt(Float(c.z)) << Int(11))) & UShort(mask));
911 }
912 break;
913 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
914 if(writeRGBA)
915 {
916 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
917 (RoundInt(Float(c.y)) << Int(5)) |
918 (RoundInt(Float(c.x)) << Int(10)) |
919 (RoundInt(Float(c.w)) << Int(15)));
920 }
921 else
922 {
923 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
924 (writeR ? 0x7C00 : 0x0000) |
925 (writeG ? 0x03E0 : 0x0000) |
926 (writeB ? 0x001F : 0x0000);
927 unsigned short unmask = ~mask;
928 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
929 (UShort(RoundInt(Float(c.z)) |
930 (RoundInt(Float(c.y)) << Int(5)) |
931 (RoundInt(Float(c.x)) << Int(10)) |
932 (RoundInt(Float(c.w)) << Int(15))) & UShort(mask));
933 }
934 break;
935 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
936 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
937 case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
938 if(writeRGBA)
939 {
940 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) |
941 (RoundInt(Float(c.y)) << 10) |
942 (RoundInt(Float(c.z)) << 20) |
943 (RoundInt(Float(c.w)) << 30));
944 }
945 else
946 {
947 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
948 (writeB ? 0x3FF00000 : 0x0000) |
949 (writeG ? 0x000FFC00 : 0x0000) |
950 (writeR ? 0x000003FF : 0x0000);
951 unsigned int unmask = ~mask;
952 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
953 (UInt(RoundInt(Float(c.x)) |
954 (RoundInt(Float(c.y)) << 10) |
955 (RoundInt(Float(c.z)) << 20) |
956 (RoundInt(Float(c.w)) << 30)) & UInt(mask));
957 }
958 break;
959 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
960 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
961 case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
962 if(writeRGBA)
963 {
964 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.z)) |
965 (RoundInt(Float(c.y)) << 10) |
966 (RoundInt(Float(c.x)) << 20) |
967 (RoundInt(Float(c.w)) << 30));
968 }
969 else
970 {
971 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
972 (writeR ? 0x3FF00000 : 0x0000) |
973 (writeG ? 0x000FFC00 : 0x0000) |
974 (writeB ? 0x000003FF : 0x0000);
975 unsigned int unmask = ~mask;
976 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
977 (UInt(RoundInt(Float(c.z)) |
978 (RoundInt(Float(c.y)) << 10) |
979 (RoundInt(Float(c.x)) << 20) |
980 (RoundInt(Float(c.w)) << 30)) & UInt(mask));
981 }
982 break;
983 case VK_FORMAT_D16_UNORM:
984 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x)));
985 break;
986 case VK_FORMAT_X8_D24_UNORM_PACK32:
987 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) << 8);
988 break;
989 case VK_FORMAT_D32_SFLOAT:
990 *Pointer<Float>(element) = c.x;
991 break;
992 case VK_FORMAT_S8_UINT:
993 *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
994 break;
995 default:
996 UNSUPPORTED("Blitter destination format %d", (int)state.destFormat);
997 break;
998 }
999}
1000
1001Int4 Blitter::readInt4(Pointer<Byte> element, const State &state)
1002{
1003 Int4 c(0, 0, 0, 1);
1004
1005 switch(state.sourceFormat)
1006 {
1007 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1008 case VK_FORMAT_R8G8B8A8_SINT:
1009 c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
1010 c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
1011 case VK_FORMAT_R8G8_SINT:
1012 c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
1013 case VK_FORMAT_R8_SINT:
1014 c = Insert(c, Int(*Pointer<SByte>(element)), 0);
1015 break;
1016 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1017 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000003FF))), 0);
1018 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10), 1);
1019 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20), 2);
1020 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30), 3);
1021 break;
1022 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1023 case VK_FORMAT_R8G8B8A8_UINT:
1024 c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
1025 c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
1026 case VK_FORMAT_R8G8_UINT:
1027 c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
1028 case VK_FORMAT_R8_UINT:
1029 case VK_FORMAT_S8_UINT:
1030 c = Insert(c, Int(*Pointer<Byte>(element)), 0);
1031 break;
1032 case VK_FORMAT_R16G16B16A16_SINT:
1033 c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
1034 c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
1035 case VK_FORMAT_R16G16_SINT:
1036 c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
1037 case VK_FORMAT_R16_SINT:
1038 c = Insert(c, Int(*Pointer<Short>(element)), 0);
1039 break;
1040 case VK_FORMAT_R16G16B16A16_UINT:
1041 c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
1042 c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
1043 case VK_FORMAT_R16G16_UINT:
1044 c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
1045 case VK_FORMAT_R16_UINT:
1046 c = Insert(c, Int(*Pointer<UShort>(element)), 0);
1047 break;
1048 case VK_FORMAT_R32G32B32A32_SINT:
1049 case VK_FORMAT_R32G32B32A32_UINT:
1050 c = *Pointer<Int4>(element);
1051 break;
1052 case VK_FORMAT_R32G32_SINT:
1053 case VK_FORMAT_R32G32_UINT:
1054 c = Insert(c, *Pointer<Int>(element + 4), 1);
1055 case VK_FORMAT_R32_SINT:
1056 case VK_FORMAT_R32_UINT:
1057 c = Insert(c, *Pointer<Int>(element), 0);
1058 break;
1059 default:
1060 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
1061 }
1062
1063 return c;
1064}
1065
1066void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
1067{
1068 bool writeR = state.writeRed;
1069 bool writeG = state.writeGreen;
1070 bool writeB = state.writeBlue;
1071 bool writeA = state.writeAlpha;
1072 bool writeRGBA = writeR && writeG && writeB && writeA;
1073
1074 switch(state.destFormat)
1075 {
1076 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1077 c = Min(As<UInt4>(c), UInt4(0x03FF, 0x03FF, 0x03FF, 0x0003));
1078 break;
1079 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1080 case VK_FORMAT_R8G8B8A8_UINT:
1081 case VK_FORMAT_R8G8B8_UINT:
1082 case VK_FORMAT_R8G8_UINT:
1083 case VK_FORMAT_R8_UINT:
1084 case VK_FORMAT_R8G8B8A8_USCALED:
1085 case VK_FORMAT_R8G8B8_USCALED:
1086 case VK_FORMAT_R8G8_USCALED:
1087 case VK_FORMAT_R8_USCALED:
1088 case VK_FORMAT_S8_UINT:
1089 c = Min(As<UInt4>(c), UInt4(0xFF));
1090 break;
1091 case VK_FORMAT_R16G16B16A16_UINT:
1092 case VK_FORMAT_R16G16B16_UINT:
1093 case VK_FORMAT_R16G16_UINT:
1094 case VK_FORMAT_R16_UINT:
1095 case VK_FORMAT_R16G16B16A16_USCALED:
1096 case VK_FORMAT_R16G16B16_USCALED:
1097 case VK_FORMAT_R16G16_USCALED:
1098 case VK_FORMAT_R16_USCALED:
1099 c = Min(As<UInt4>(c), UInt4(0xFFFF));
1100 break;
1101 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1102 case VK_FORMAT_R8G8B8A8_SINT:
1103 case VK_FORMAT_R8G8_SINT:
1104 case VK_FORMAT_R8_SINT:
1105 case VK_FORMAT_R8G8B8A8_SSCALED:
1106 case VK_FORMAT_R8G8B8_SSCALED:
1107 case VK_FORMAT_R8G8_SSCALED:
1108 case VK_FORMAT_R8_SSCALED:
1109 c = Min(Max(c, Int4(-0x80)), Int4(0x7F));
1110 break;
1111 case VK_FORMAT_R16G16B16A16_SINT:
1112 case VK_FORMAT_R16G16B16_SINT:
1113 case VK_FORMAT_R16G16_SINT:
1114 case VK_FORMAT_R16_SINT:
1115 case VK_FORMAT_R16G16B16A16_SSCALED:
1116 case VK_FORMAT_R16G16B16_SSCALED:
1117 case VK_FORMAT_R16G16_SSCALED:
1118 case VK_FORMAT_R16_SSCALED:
1119 c = Min(Max(c, Int4(-0x8000)), Int4(0x7FFF));
1120 break;
1121 default:
1122 break;
1123 }
1124
1125 switch(state.destFormat)
1126 {
1127 case VK_FORMAT_B8G8R8A8_SINT:
1128 case VK_FORMAT_B8G8R8A8_SSCALED:
1129 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
1130 case VK_FORMAT_B8G8R8_SINT:
1131 case VK_FORMAT_B8G8R8_SSCALED:
1132 if(writeB) { *Pointer<SByte>(element) = SByte(Extract(c, 2)); }
1133 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
1134 if(writeR) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 0)); }
1135 break;
1136 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1137 case VK_FORMAT_R8G8B8A8_SINT:
1138 case VK_FORMAT_R8G8B8A8_SSCALED:
1139 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
1140 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
1141 case VK_FORMAT_R8G8B8_SINT:
1142 case VK_FORMAT_R8G8B8_SSCALED:
1143 if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
1144 case VK_FORMAT_R8G8_SINT:
1145 case VK_FORMAT_R8G8_SSCALED:
1146 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
1147 case VK_FORMAT_R8_SINT:
1148 case VK_FORMAT_R8_SSCALED:
1149 if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
1150 break;
1151 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1152 case VK_FORMAT_A2B10G10R10_SINT_PACK32:
1153 case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
1154 case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
1155 if(writeRGBA)
1156 {
1157 *Pointer<UInt>(element) =
1158 UInt((Extract(c, 0)) | (Extract(c, 1) << 10) | (Extract(c, 2) << 20) | (Extract(c, 3) << 30));
1159 }
1160 else
1161 {
1162 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1163 (writeB ? 0x3FF00000 : 0x0000) |
1164 (writeG ? 0x000FFC00 : 0x0000) |
1165 (writeR ? 0x000003FF : 0x0000);
1166 unsigned int unmask = ~mask;
1167 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
1168 (UInt(Extract(c, 0) | (Extract(c, 1) << 10) | (Extract(c, 2) << 20) | (Extract(c, 3) << 30)) & UInt(mask));
1169 }
1170 break;
1171 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1172 case VK_FORMAT_A2R10G10B10_SINT_PACK32:
1173 case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
1174 case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
1175 if(writeRGBA)
1176 {
1177 *Pointer<UInt>(element) =
1178 UInt((Extract(c, 2)) | (Extract(c, 1) << 10) | (Extract(c, 0) << 20) | (Extract(c, 3) << 30));
1179 }
1180 else
1181 {
1182 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1183 (writeR ? 0x3FF00000 : 0x0000) |
1184 (writeG ? 0x000FFC00 : 0x0000) |
1185 (writeB ? 0x000003FF : 0x0000);
1186 unsigned int unmask = ~mask;
1187 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
1188 (UInt(Extract(c, 2) | (Extract(c, 1) << 10) | (Extract(c, 0) << 20) | (Extract(c, 3) << 30)) & UInt(mask));
1189 }
1190 break;
1191 case VK_FORMAT_B8G8R8A8_UINT:
1192 case VK_FORMAT_B8G8R8A8_USCALED:
1193 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
1194 case VK_FORMAT_B8G8R8_UINT:
1195 case VK_FORMAT_B8G8R8_USCALED:
1196 case VK_FORMAT_B8G8R8_SRGB:
1197 if(writeB) { *Pointer<Byte>(element) = Byte(Extract(c, 2)); }
1198 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
1199 if(writeR) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 0)); }
1200 break;
1201 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1202 case VK_FORMAT_R8G8B8A8_UINT:
1203 case VK_FORMAT_R8G8B8A8_USCALED:
1204 case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
1205 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
1206 case VK_FORMAT_R8G8B8_UINT:
1207 case VK_FORMAT_R8G8B8_USCALED:
1208 if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
1209 case VK_FORMAT_R8G8_UINT:
1210 case VK_FORMAT_R8G8_USCALED:
1211 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
1212 case VK_FORMAT_R8_UINT:
1213 case VK_FORMAT_R8_USCALED:
1214 case VK_FORMAT_S8_UINT:
1215 if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
1216 break;
1217 case VK_FORMAT_R16G16B16A16_SINT:
1218 case VK_FORMAT_R16G16B16A16_SSCALED:
1219 if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
1220 case VK_FORMAT_R16G16B16_SINT:
1221 case VK_FORMAT_R16G16B16_SSCALED:
1222 if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
1223 case VK_FORMAT_R16G16_SINT:
1224 case VK_FORMAT_R16G16_SSCALED:
1225 if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
1226 case VK_FORMAT_R16_SINT:
1227 case VK_FORMAT_R16_SSCALED:
1228 if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
1229 break;
1230 case VK_FORMAT_R16G16B16A16_UINT:
1231 case VK_FORMAT_R16G16B16A16_USCALED:
1232 if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
1233 case VK_FORMAT_R16G16B16_UINT:
1234 case VK_FORMAT_R16G16B16_USCALED:
1235 if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
1236 case VK_FORMAT_R16G16_UINT:
1237 case VK_FORMAT_R16G16_USCALED:
1238 if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
1239 case VK_FORMAT_R16_UINT:
1240 case VK_FORMAT_R16_USCALED:
1241 if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
1242 break;
1243 case VK_FORMAT_R32G32B32A32_SINT:
1244 if(writeRGBA)
1245 {
1246 *Pointer<Int4>(element) = c;
1247 }
1248 else
1249 {
1250 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1251 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1252 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
1253 if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
1254 }
1255 break;
1256 case VK_FORMAT_R32G32B32_SINT:
1257 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1258 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1259 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
1260 break;
1261 case VK_FORMAT_R32G32_SINT:
1262 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1263 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1264 break;
1265 case VK_FORMAT_R32_SINT:
1266 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1267 break;
1268 case VK_FORMAT_R32G32B32A32_UINT:
1269 if(writeRGBA)
1270 {
1271 *Pointer<UInt4>(element) = As<UInt4>(c);
1272 }
1273 else
1274 {
1275 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1276 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
1277 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
1278 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
1279 }
1280 break;
1281 case VK_FORMAT_R32G32B32_UINT:
1282 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
1283 case VK_FORMAT_R32G32_UINT:
1284 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
1285 case VK_FORMAT_R32_UINT:
1286 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1287 break;
1288 default:
1289 UNSUPPORTED("Blitter destination format %d", (int)state.destFormat);
1290 }
1291}
1292
1293void Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled)
1294{
1295 float4 scale{}, unscale{};
1296
1297 if(state.clearOperation &&
1298 state.sourceFormat.isNonNormalizedInteger() &&
1299 !state.destFormat.isNonNormalizedInteger())
1300 {
1301 // If we're clearing a buffer from an int or uint color into a normalized color,
1302 // then the whole range of the int or uint color must be scaled between 0 and 1.
1303 switch(state.sourceFormat)
1304 {
1305 case VK_FORMAT_R32G32B32A32_SINT:
1306 unscale = replicate(static_cast<float>(0x7FFFFFFF));
1307 break;
1308 case VK_FORMAT_R32G32B32A32_UINT:
1309 unscale = replicate(static_cast<float>(0xFFFFFFFF));
1310 break;
1311 default:
1312 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
1313 }
1314 }
1315 else
1316 {
1317 unscale = state.sourceFormat.getScale();
1318 }
1319
1320 scale = state.destFormat.getScale();
1321
1322 bool srcSRGB = state.sourceFormat.isSRGBformat();
1323 bool dstSRGB = state.destFormat.isSRGBformat();
1324
1325 if(state.allowSRGBConversion && ((srcSRGB && !preScaled) || dstSRGB)) // One of the formats is sRGB encoded.
1326 {
1327 value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale
1328 Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale
1329 value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value);
1330 value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale
1331 }
1332 else if(unscale != scale)
1333 {
1334 value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
1335 }
1336
1337 if(state.sourceFormat.isFloatFormat() && !state.destFormat.isFloatFormat())
1338 {
1339 value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
1340
1341 value = Max(value, Float4(state.destFormat.isUnsignedComponent(0) ? 0.0f : -scale.x,
1342 state.destFormat.isUnsignedComponent(1) ? 0.0f : -scale.y,
1343 state.destFormat.isUnsignedComponent(2) ? 0.0f : -scale.z,
1344 state.destFormat.isUnsignedComponent(3) ? 0.0f : -scale.w));
1345 }
1346}
1347
1348Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes)
1349{
1350 return y * pitchB + x * bytes;
1351}
1352
1353Float4 Blitter::LinearToSRGB(Float4 &c)
1354{
1355 Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f);
1356 Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f);
1357
1358 Float4 s = c;
1359 s.xyz = Max(lc, ec);
1360
1361 return s;
1362}
1363
1364Float4 Blitter::sRGBtoLinear(Float4 &c)
1365{
1366 Float4 lc = c * Float4(1.0f / 12.92f);
1367 Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f));
1368
1369 Int4 linear = CmpLT(c, Float4(0.04045f));
1370
1371 Float4 s = c;
1372 s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec))); // TODO: IfThenElse()
1373
1374 return s;
1375}
1376
1377Blitter::BlitRoutineType Blitter::generate(const State &state)
1378{
1379 BlitFunction function;
1380 {
1381 Pointer<Byte> blit(function.Arg<0>());
1382
1383 Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source));
1384 Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest));
1385 Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB));
1386 Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB));
1387
1388 Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0));
1389 Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0));
1390 Float w = *Pointer<Float>(blit + OFFSET(BlitData,w));
1391 Float h = *Pointer<Float>(blit + OFFSET(BlitData,h));
1392
1393 Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d));
1394 Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d));
1395 Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d));
1396 Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d));
1397
1398 Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth));
1399 Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight));
1400
1401 bool intSrc = state.sourceFormat.isNonNormalizedInteger();
1402 bool intDst = state.destFormat.isNonNormalizedInteger();
1403 bool intBoth = intSrc && intDst;
1404 int srcBytes = state.sourceFormat.bytes();
1405 int dstBytes = state.destFormat.bytes();
1406
1407 bool hasConstantColorI = false;
1408 Int4 constantColorI;
1409 bool hasConstantColorF = false;
1410 Float4 constantColorF;
1411 if(state.clearOperation)
1412 {
1413 if(intBoth) // Integer types
1414 {
1415 constantColorI = readInt4(source, state);
1416 hasConstantColorI = true;
1417 }
1418 else
1419 {
1420 constantColorF = readFloat4(source, state);
1421 hasConstantColorF = true;
1422
1423 ApplyScaleAndClamp(constantColorF, state);
1424 }
1425 }
1426
1427 For(Int j = y0d, j < y1d, j++)
1428 {
1429 Float y = state.clearOperation ? RValue<Float>(y0) : y0 + Float(j) * h;
1430 Pointer<Byte> destLine = dest + j * dPitchB;
1431
1432 For(Int i = x0d, i < x1d, i++)
1433 {
1434 Float x = state.clearOperation ? RValue<Float>(x0) : x0 + Float(i) * w;
1435 Pointer<Byte> d = destLine + i * dstBytes;
1436
1437 if(hasConstantColorI)
1438 {
1439 for(int s = 0; s < state.destSamples; s++)
1440 {
1441 write(constantColorI, d, state);
1442
1443 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
1444 }
1445 }
1446 else if(hasConstantColorF)
1447 {
1448 for(int s = 0; s < state.destSamples; s++)
1449 {
1450 write(constantColorF, d, state);
1451
1452 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
1453 }
1454 }
1455 else if(intBoth) // Integer types do not support filtering
1456 {
1457 Int X = Int(x);
1458 Int Y = Int(y);
1459
1460 if(state.clampToEdge)
1461 {
1462 X = Clamp(X, 0, sWidth - 1);
1463 Y = Clamp(Y, 0, sHeight - 1);
1464 }
1465
1466 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes);
1467
1468 // When both formats are true integer types, we don't go to float to avoid losing precision
1469 Int4 color = readInt4(s, state);
1470 for(int s = 0; s < state.destSamples; s++)
1471 {
1472 write(color, d, state);
1473
1474 d += *Pointer<Int>(blit + OFFSET(BlitData,dSliceB));
1475 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001476 }
1477 else
1478 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001479 Float4 color;
Nicolas Capens68a82382018-10-02 13:16:55 -04001480
Nicolas Capens157ba262019-12-10 17:49:14 -05001481 bool preScaled = false;
1482 if(!state.filter || intSrc)
Nicolas Capens68a82382018-10-02 13:16:55 -04001483 {
Nicolas Capens68a82382018-10-02 13:16:55 -04001484 Int X = Int(x);
1485 Int Y = Int(y);
1486
1487 if(state.clampToEdge)
1488 {
1489 X = Clamp(X, 0, sWidth - 1);
1490 Y = Clamp(Y, 0, sHeight - 1);
1491 }
1492
Alexis Hetud34bb292019-11-13 17:18:02 -05001493 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes);
Nicolas Capens68a82382018-10-02 13:16:55 -04001494
Nicolas Capens157ba262019-12-10 17:49:14 -05001495 color = readFloat4(s, state);
1496
1497 if(state.srcSamples > 1) // Resolve multisampled source
Alexis Hetuf8df30f2019-10-23 18:03:21 -04001498 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001499 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
Nicolas Capens68a82382018-10-02 13:16:55 -04001500 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001501 ApplyScaleAndClamp(color, state);
1502 preScaled = true;
Nicolas Capens68a82382018-10-02 13:16:55 -04001503 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001504 Float4 accum = color;
1505 for(int sample = 1; sample < state.srcSamples; sample++)
Alexis Hetu54ec7592019-03-20 14:37:16 -04001506 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001507 s += *Pointer<Int>(blit + OFFSET(BlitData, sSliceB));
1508 color = readFloat4(s, state);
1509
Antonio Maiorano7738ed72019-10-21 11:29:41 -04001510 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
Alexis Hetua4308132019-06-13 09:55:26 -04001511 {
Nicolas Capens88ac3672019-08-01 13:22:34 -04001512 ApplyScaleAndClamp(color, state);
Alexis Hetua4308132019-06-13 09:55:26 -04001513 preScaled = true;
1514 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001515 accum += color;
Alexis Hetu54ec7592019-03-20 14:37:16 -04001516 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001517 color = accum * Float4(1.0f / static_cast<float>(state.srcSamples));
Nicolas Capens68a82382018-10-02 13:16:55 -04001518 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001519 }
1520 else // Bilinear filtering
1521 {
1522 Float X = x;
1523 Float Y = y;
1524
1525 if(state.clampToEdge)
Nicolas Capens68a82382018-10-02 13:16:55 -04001526 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001527 X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f);
1528 Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f);
Nicolas Capens68a82382018-10-02 13:16:55 -04001529 }
1530
Nicolas Capens157ba262019-12-10 17:49:14 -05001531 Float x0 = X - 0.5f;
1532 Float y0 = Y - 0.5f;
Nicolas Capens68a82382018-10-02 13:16:55 -04001533
Nicolas Capens157ba262019-12-10 17:49:14 -05001534 Int X0 = Max(Int(x0), 0);
1535 Int Y0 = Max(Int(y0), 0);
1536
1537 Int X1 = X0 + 1;
1538 Int Y1 = Y0 + 1;
1539 X1 = IfThenElse(X1 >= sWidth, X0, X1);
1540 Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1);
1541
1542 Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes);
1543 Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes);
1544 Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes);
1545 Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes);
1546
1547 Float4 c00 = readFloat4(s00, state);
1548 Float4 c01 = readFloat4(s01, state);
1549 Float4 c10 = readFloat4(s10, state);
1550 Float4 c11 = readFloat4(s11, state);
1551
1552 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
Nicolas Capens68a82382018-10-02 13:16:55 -04001553 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001554 ApplyScaleAndClamp(c00, state);
1555 ApplyScaleAndClamp(c01, state);
1556 ApplyScaleAndClamp(c10, state);
1557 ApplyScaleAndClamp(c11, state);
1558 preScaled = true;
Nicolas Capens68a82382018-10-02 13:16:55 -04001559 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001560
1561 Float4 fx = Float4(x0 - Float(X0));
1562 Float4 fy = Float4(y0 - Float(Y0));
1563 Float4 ix = Float4(1.0f) - fx;
1564 Float4 iy = Float4(1.0f) - fy;
1565
1566 color = (c00 * ix + c01 * fx) * iy +
1567 (c10 * ix + c11 * fx) * fy;
1568 }
1569
1570 ApplyScaleAndClamp(color, state, preScaled);
1571
1572 for(int s = 0; s < state.destSamples; s++)
1573 {
1574 write(color, d, state);
1575
1576 d += *Pointer<Int>(blit + OFFSET(BlitData,dSliceB));
Nicolas Capens68a82382018-10-02 13:16:55 -04001577 }
1578 }
1579 }
1580 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001581 }
1582
Nicolas Capens157ba262019-12-10 17:49:14 -05001583 return function("BlitRoutine");
1584}
1585
1586Blitter::BlitRoutineType Blitter::getBlitRoutine(const State &state)
1587{
1588 std::unique_lock<std::mutex> lock(blitMutex);
1589 auto blitRoutine = blitCache.query(state);
1590
1591 if(!blitRoutine)
Alexis Hetu33642272019-03-01 11:55:59 -05001592 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001593 blitRoutine = generate(state);
1594 blitCache.add(state, blitRoutine);
Alexis Hetu33642272019-03-01 11:55:59 -05001595 }
1596
Nicolas Capens157ba262019-12-10 17:49:14 -05001597 return blitRoutine;
1598}
1599
1600Blitter::CornerUpdateRoutineType Blitter::getCornerUpdateRoutine(const State &state)
1601{
1602 std::unique_lock<std::mutex> lock(cornerUpdateMutex);
1603 auto cornerUpdateRoutine = cornerUpdateCache.query(state);
1604
1605 if(!cornerUpdateRoutine)
Alexis Hetuf60a2d52019-05-09 14:16:05 -04001606 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001607 cornerUpdateRoutine = generateCornerUpdate(state);
1608 cornerUpdateCache.add(state, cornerUpdateRoutine);
Alexis Hetuf60a2d52019-05-09 14:16:05 -04001609 }
1610
Nicolas Capens157ba262019-12-10 17:49:14 -05001611 return cornerUpdateRoutine;
1612}
1613
1614void Blitter::blitToBuffer(const vk::Image *src, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *dst, int bufferRowPitch, int bufferSlicePitch)
1615{
1616 auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
1617 auto format = src->getFormat(aspect);
1618 State state(format, format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT, Options{false, false});
1619
1620 auto blitRoutine = getBlitRoutine(state);
1621 if(!blitRoutine)
Chris Forbes529eda32019-05-08 10:27:05 -07001622 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001623 return;
Chris Forbes529eda32019-05-08 10:27:05 -07001624 }
1625
Nicolas Capens157ba262019-12-10 17:49:14 -05001626 BlitData data =
Chris Forbes529eda32019-05-08 10:27:05 -07001627 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001628 nullptr, // source
1629 dst, // dest
1630 src->rowPitchBytes(aspect, subresource.mipLevel), // sPitchB
1631 bufferRowPitch, // dPitchB
1632 src->slicePitchBytes(aspect, subresource.mipLevel), // sSliceB
1633 bufferSlicePitch, // dSliceB
Chris Forbes529eda32019-05-08 10:27:05 -07001634
Nicolas Capens157ba262019-12-10 17:49:14 -05001635 0, 0, 1, 1,
Chris Forbes529eda32019-05-08 10:27:05 -07001636
Nicolas Capens157ba262019-12-10 17:49:14 -05001637 0, // y0d
1638 static_cast<int>(extent.height), // y1d
1639 0, // x0d
1640 static_cast<int>(extent.width), // x1d
Chris Forbes529eda32019-05-08 10:27:05 -07001641
Nicolas Capens157ba262019-12-10 17:49:14 -05001642 static_cast<int>(extent.width), // sWidth
1643 static_cast<int>(extent.height) // sHeight;
1644 };
Chris Forbes529eda32019-05-08 10:27:05 -07001645
Nicolas Capens157ba262019-12-10 17:49:14 -05001646 VkOffset3D srcOffset = { 0, 0, offset.z };
Chris Forbes529eda32019-05-08 10:27:05 -07001647
Nicolas Capens157ba262019-12-10 17:49:14 -05001648 VkImageSubresourceLayers srcSubresLayers = subresource;
1649 srcSubresLayers.layerCount = 1;
Chris Forbes529eda32019-05-08 10:27:05 -07001650
Nicolas Capens157ba262019-12-10 17:49:14 -05001651 VkImageSubresourceRange srcSubresRange =
Alexis Hetu33642272019-03-01 11:55:59 -05001652 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001653 subresource.aspectMask,
1654 subresource.mipLevel,
1655 1,
1656 subresource.baseArrayLayer,
1657 subresource.layerCount
1658 };
Alexis Hetu33642272019-03-01 11:55:59 -05001659
Nicolas Capens157ba262019-12-10 17:49:14 -05001660 uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange);
Alexis Hetu33642272019-03-01 11:55:59 -05001661
Nicolas Capens157ba262019-12-10 17:49:14 -05001662 for(; srcSubresLayers.baseArrayLayer <= lastLayer; srcSubresLayers.baseArrayLayer++)
Alexis Hetub317d962019-04-29 14:07:31 -04001663 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001664 srcOffset.z = offset.z;
Alexis Hetub317d962019-04-29 14:07:31 -04001665
Nicolas Capens157ba262019-12-10 17:49:14 -05001666 for(auto i = 0u; i < extent.depth; i++)
Alexis Hetub317d962019-04-29 14:07:31 -04001667 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001668 data.source = src->getTexelPointer(srcOffset, srcSubresLayers);
1669 ASSERT(data.source < src->end());
1670 blitRoutine(&data);
1671 srcOffset.z++;
1672 data.dest = (dst += bufferSlicePitch);
Alexis Hetub317d962019-04-29 14:07:31 -04001673 }
1674 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001675}
Nicolas Capens157ba262019-12-10 17:49:14 -05001676
1677void Blitter::blitFromBuffer(const vk::Image *dst, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *src, int bufferRowPitch, int bufferSlicePitch)
1678{
1679 auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
1680 auto format = dst->getFormat(aspect);
1681 State state(format, format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT, Options{false, false});
1682
1683 auto blitRoutine = getBlitRoutine(state);
1684 if(!blitRoutine)
1685 {
1686 return;
1687 }
1688
1689 BlitData data =
1690 {
1691 src, // source
1692 nullptr, // dest
1693 bufferRowPitch, // sPitchB
1694 dst->rowPitchBytes(aspect, subresource.mipLevel), // dPitchB
1695 bufferSlicePitch, // sSliceB
1696 dst->slicePitchBytes(aspect, subresource.mipLevel), // dSliceB
1697
1698 static_cast<float>(-offset.x), // x0
1699 static_cast<float>(-offset.y), // y0
1700 1.0f, // w
1701 1.0f, // h
1702
1703 offset.y, // y0d
1704 static_cast<int>(offset.y + extent.height), // y1d
1705 offset.x, // x0d
1706 static_cast<int>(offset.x + extent.width), // x1d
1707
1708 static_cast<int>(extent.width), // sWidth
1709 static_cast<int>(extent.height) // sHeight;
1710 };
1711
1712 VkOffset3D dstOffset = { 0, 0, offset.z };
1713
1714 VkImageSubresourceLayers dstSubresLayers = subresource;
1715 dstSubresLayers.layerCount = 1;
1716
1717 VkImageSubresourceRange dstSubresRange =
1718 {
1719 subresource.aspectMask,
1720 subresource.mipLevel,
1721 1,
1722 subresource.baseArrayLayer,
1723 subresource.layerCount
1724 };
1725
1726 uint32_t lastLayer = dst->getLastLayerIndex(dstSubresRange);
1727
1728 for(; dstSubresLayers.baseArrayLayer <= lastLayer; dstSubresLayers.baseArrayLayer++)
1729 {
1730 dstOffset.z = offset.z;
1731
1732 for(auto i = 0u; i < extent.depth; i++)
1733 {
1734 data.dest = dst->getTexelPointer(dstOffset, dstSubresLayers);
1735 ASSERT(data.dest < dst->end());
1736 blitRoutine(&data);
1737 dstOffset.z++;
1738 data.source = (src += bufferSlicePitch);
1739 }
1740 }
1741}
1742
1743void Blitter::blit(const vk::Image *src, vk::Image *dst, VkImageBlit region, VkFilter filter)
1744{
1745 if(dst->getFormat() == VK_FORMAT_UNDEFINED)
1746 {
1747 return;
1748 }
1749
1750 if((region.srcSubresource.layerCount != region.dstSubresource.layerCount) ||
1751 (region.srcSubresource.aspectMask != region.dstSubresource.aspectMask))
1752 {
1753 UNIMPLEMENTED("region");
1754 }
1755
1756 if(region.dstOffsets[0].x > region.dstOffsets[1].x)
1757 {
1758 std::swap(region.srcOffsets[0].x, region.srcOffsets[1].x);
1759 std::swap(region.dstOffsets[0].x, region.dstOffsets[1].x);
1760 }
1761
1762 if(region.dstOffsets[0].y > region.dstOffsets[1].y)
1763 {
1764 std::swap(region.srcOffsets[0].y, region.srcOffsets[1].y);
1765 std::swap(region.dstOffsets[0].y, region.dstOffsets[1].y);
1766 }
1767
1768 VkImageAspectFlagBits srcAspect = static_cast<VkImageAspectFlagBits>(region.srcSubresource.aspectMask);
1769 VkImageAspectFlagBits dstAspect = static_cast<VkImageAspectFlagBits>(region.dstSubresource.aspectMask);
1770 VkExtent3D srcExtent = src->getMipLevelExtent(srcAspect, region.srcSubresource.mipLevel);
1771
1772 int32_t numSlices = (region.srcOffsets[1].z - region.srcOffsets[0].z);
1773 ASSERT(numSlices == (region.dstOffsets[1].z - region.dstOffsets[0].z));
1774
1775 float widthRatio = static_cast<float>(region.srcOffsets[1].x - region.srcOffsets[0].x) /
1776 static_cast<float>(region.dstOffsets[1].x - region.dstOffsets[0].x);
1777 float heightRatio = static_cast<float>(region.srcOffsets[1].y - region.srcOffsets[0].y) /
1778 static_cast<float>(region.dstOffsets[1].y - region.dstOffsets[0].y);
1779 float x0 = region.srcOffsets[0].x + (0.5f - region.dstOffsets[0].x) * widthRatio;
1780 float y0 = region.srcOffsets[0].y + (0.5f - region.dstOffsets[0].y) * heightRatio;
1781
1782 auto srcFormat = src->getFormat(srcAspect);
1783 auto dstFormat = dst->getFormat(dstAspect);
1784
1785 bool doFilter = (filter != VK_FILTER_NEAREST);
1786 bool allowSRGBConversion =
1787 doFilter ||
1788 (src->getSampleCountFlagBits() > 1) ||
1789 (srcFormat.isSRGBformat() != dstFormat.isSRGBformat());
1790
1791 State state(src->getFormat(srcAspect), dst->getFormat(dstAspect), src->getSampleCountFlagBits(), dst->getSampleCountFlagBits(),
1792 Options{ doFilter, allowSRGBConversion });
1793 state.clampToEdge = (region.srcOffsets[0].x < 0) ||
1794 (region.srcOffsets[0].y < 0) ||
1795 (static_cast<uint32_t>(region.srcOffsets[1].x) > srcExtent.width) ||
1796 (static_cast<uint32_t>(region.srcOffsets[1].y) > srcExtent.height) ||
1797 (doFilter && ((x0 < 0.5f) || (y0 < 0.5f)));
1798
1799 auto blitRoutine = getBlitRoutine(state);
1800 if(!blitRoutine)
1801 {
1802 return;
1803 }
1804
1805 BlitData data =
1806 {
1807 nullptr, // source
1808 nullptr, // dest
1809 src->rowPitchBytes(srcAspect, region.srcSubresource.mipLevel), // sPitchB
1810 dst->rowPitchBytes(dstAspect, region.dstSubresource.mipLevel), // dPitchB
1811 src->slicePitchBytes(srcAspect, region.srcSubresource.mipLevel), // sSliceB
1812 dst->slicePitchBytes(dstAspect, region.dstSubresource.mipLevel), // dSliceB
1813
1814 x0,
1815 y0,
1816 widthRatio,
1817 heightRatio,
1818
1819 region.dstOffsets[0].y, // y0d
1820 region.dstOffsets[1].y, // y1d
1821 region.dstOffsets[0].x, // x0d
1822 region.dstOffsets[1].x, // x1d
1823
1824 static_cast<int>(srcExtent.width), // sWidth
1825 static_cast<int>(srcExtent.height) // sHeight;
1826 };
1827
1828 VkOffset3D srcOffset = { 0, 0, region.srcOffsets[0].z };
1829 VkOffset3D dstOffset = { 0, 0, region.dstOffsets[0].z };
1830
1831 VkImageSubresourceLayers srcSubresLayers =
1832 {
1833 region.srcSubresource.aspectMask,
1834 region.srcSubresource.mipLevel,
1835 region.srcSubresource.baseArrayLayer,
1836 1
1837 };
1838
1839 VkImageSubresourceLayers dstSubresLayers =
1840 {
1841 region.dstSubresource.aspectMask,
1842 region.dstSubresource.mipLevel,
1843 region.dstSubresource.baseArrayLayer,
1844 1
1845 };
1846
1847 VkImageSubresourceRange srcSubresRange =
1848 {
1849 region.srcSubresource.aspectMask,
1850 region.srcSubresource.mipLevel,
1851 1,
1852 region.srcSubresource.baseArrayLayer,
1853 region.srcSubresource.layerCount
1854 };
1855
1856 uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange);
1857
1858 for(; srcSubresLayers.baseArrayLayer <= lastLayer; srcSubresLayers.baseArrayLayer++, dstSubresLayers.baseArrayLayer++)
1859 {
1860 srcOffset.z = region.srcOffsets[0].z;
1861 dstOffset.z = region.dstOffsets[0].z;
1862
1863 for(int i = 0; i < numSlices; i++)
1864 {
1865 data.source = src->getTexelPointer(srcOffset, srcSubresLayers);
1866 data.dest = dst->getTexelPointer(dstOffset, dstSubresLayers);
1867
1868 ASSERT(data.source < src->end());
1869 ASSERT(data.dest < dst->end());
1870
1871 blitRoutine(&data);
1872 srcOffset.z++;
1873 dstOffset.z++;
1874 }
1875 }
1876}
1877
1878void Blitter::computeCubeCorner(Pointer<Byte>& layer, Int& x0, Int& x1, Int& y0, Int& y1, Int& pitchB, const State& state)
1879{
1880 int bytes = state.sourceFormat.bytes();
1881
1882 Float4 c = readFloat4(layer + ComputeOffset(x0, y1, pitchB, bytes), state) +
1883 readFloat4(layer + ComputeOffset(x1, y0, pitchB, bytes), state) +
1884 readFloat4(layer + ComputeOffset(x1, y1, pitchB, bytes), state);
1885
1886 c *= Float4(1.0f / 3.0f);
1887
1888 write(c, layer + ComputeOffset(x0, y0, pitchB, bytes), state);
1889}
1890
1891Blitter::CornerUpdateRoutineType Blitter::generateCornerUpdate(const State& state)
1892{
1893 // Reading and writing from/to the same image
1894 ASSERT(state.sourceFormat == state.destFormat);
1895 ASSERT(state.srcSamples == state.destSamples);
1896
1897 if(state.srcSamples != 1)
1898 {
1899 UNIMPLEMENTED("state.srcSamples %d", state.srcSamples);
1900 }
1901
1902 CornerUpdateFunction function;
1903 {
1904 Pointer<Byte> blit(function.Arg<0>());
1905
1906 Pointer<Byte> layers = *Pointer<Pointer<Byte>>(blit + OFFSET(CubeBorderData, layers));
1907 Int pitchB = *Pointer<Int>(blit + OFFSET(CubeBorderData, pitchB));
1908 UInt layerSize = *Pointer<Int>(blit + OFFSET(CubeBorderData, layerSize));
1909 UInt dim = *Pointer<Int>(blit + OFFSET(CubeBorderData, dim));
1910
1911 // Low Border, Low Pixel, High Border, High Pixel
1912 Int LB(-1), LP(0), HB(dim), HP(dim-1);
1913
1914 for(int face = 0; face < 6; face++)
1915 {
1916 computeCubeCorner(layers, LB, LP, LB, LP, pitchB, state);
1917 computeCubeCorner(layers, LB, LP, HB, HP, pitchB, state);
1918 computeCubeCorner(layers, HB, HP, LB, LP, pitchB, state);
1919 computeCubeCorner(layers, HB, HP, HB, HP, pitchB, state);
1920 layers = layers + layerSize;
1921 }
1922 }
1923
1924 return function("BlitRoutine");
1925}
1926
1927void Blitter::updateBorders(vk::Image* image, const VkImageSubresourceLayers& subresourceLayers)
1928{
1929 if(image->getArrayLayers() < (subresourceLayers.baseArrayLayer + 6))
1930 {
1931 UNIMPLEMENTED("image->getArrayLayers() %d, baseArrayLayer %d",
1932 image->getArrayLayers(), subresourceLayers.baseArrayLayer);
1933 }
1934
1935 // From Vulkan 1.1 spec, section 11.5. Image Views:
1936 // "For cube and cube array image views, the layers of the image view starting
1937 // at baseArrayLayer correspond to faces in the order +X, -X, +Y, -Y, +Z, -Z."
1938 VkImageSubresourceLayers posX = subresourceLayers;
1939 posX.layerCount = 1;
1940 VkImageSubresourceLayers negX = posX;
1941 negX.baseArrayLayer++;
1942 VkImageSubresourceLayers posY = negX;
1943 posY.baseArrayLayer++;
1944 VkImageSubresourceLayers negY = posY;
1945 negY.baseArrayLayer++;
1946 VkImageSubresourceLayers posZ = negY;
1947 posZ.baseArrayLayer++;
1948 VkImageSubresourceLayers negZ = posZ;
1949 negZ.baseArrayLayer++;
1950
1951 // Copy top / bottom
1952 copyCubeEdge(image, posX, BOTTOM, negY, RIGHT);
1953 copyCubeEdge(image, posY, BOTTOM, posZ, TOP);
1954 copyCubeEdge(image, posZ, BOTTOM, negY, TOP);
1955 copyCubeEdge(image, negX, BOTTOM, negY, LEFT);
1956 copyCubeEdge(image, negY, BOTTOM, negZ, BOTTOM);
1957 copyCubeEdge(image, negZ, BOTTOM, negY, BOTTOM);
1958
1959 copyCubeEdge(image, posX, TOP, posY, RIGHT);
1960 copyCubeEdge(image, posY, TOP, negZ, TOP);
1961 copyCubeEdge(image, posZ, TOP, posY, BOTTOM);
1962 copyCubeEdge(image, negX, TOP, posY, LEFT);
1963 copyCubeEdge(image, negY, TOP, posZ, BOTTOM);
1964 copyCubeEdge(image, negZ, TOP, posY, TOP);
1965
1966 // Copy left / right
1967 copyCubeEdge(image, posX, RIGHT, negZ, LEFT);
1968 copyCubeEdge(image, posY, RIGHT, posX, TOP);
1969 copyCubeEdge(image, posZ, RIGHT, posX, LEFT);
1970 copyCubeEdge(image, negX, RIGHT, posZ, LEFT);
1971 copyCubeEdge(image, negY, RIGHT, posX, BOTTOM);
1972 copyCubeEdge(image, negZ, RIGHT, negX, LEFT);
1973
1974 copyCubeEdge(image, posX, LEFT, posZ, RIGHT);
1975 copyCubeEdge(image, posY, LEFT, negX, TOP);
1976 copyCubeEdge(image, posZ, LEFT, negX, RIGHT);
1977 copyCubeEdge(image, negX, LEFT, negZ, RIGHT);
1978 copyCubeEdge(image, negY, LEFT, negX, BOTTOM);
1979 copyCubeEdge(image, negZ, LEFT, posX, RIGHT);
1980
1981 // Compute corner colors
1982 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceLayers.aspectMask);
1983 vk::Format format = image->getFormat(aspect);
1984 VkSampleCountFlagBits samples = image->getSampleCountFlagBits();
1985 State state(format, format, samples, samples, Options{ 0xF });
1986
1987 if(samples != VK_SAMPLE_COUNT_1_BIT)
1988 {
1989 UNIMPLEMENTED("Multi-sampled cube: %d samples", static_cast<int>(samples));
1990 }
1991
1992 auto cornerUpdateRoutine = getCornerUpdateRoutine(state);
1993 if(!cornerUpdateRoutine)
1994 {
1995 return;
1996 }
1997
1998 VkExtent3D extent = image->getMipLevelExtent(aspect, subresourceLayers.mipLevel);
1999 CubeBorderData data =
2000 {
2001 image->getTexelPointer({ 0, 0, 0 }, posX),
2002 image->rowPitchBytes(aspect, subresourceLayers.mipLevel),
2003 static_cast<uint32_t>(image->getLayerSize(aspect)),
2004 extent.width
2005 };
2006 cornerUpdateRoutine(&data);
2007}
2008
2009void Blitter::copyCubeEdge(vk::Image* image,
2010 const VkImageSubresourceLayers& dstSubresourceLayers, Edge dstEdge,
2011 const VkImageSubresourceLayers& srcSubresourceLayers, Edge srcEdge)
2012{
2013 ASSERT(srcSubresourceLayers.aspectMask == dstSubresourceLayers.aspectMask);
2014 ASSERT(srcSubresourceLayers.mipLevel == dstSubresourceLayers.mipLevel);
2015 ASSERT(srcSubresourceLayers.baseArrayLayer != dstSubresourceLayers.baseArrayLayer);
2016 ASSERT(srcSubresourceLayers.layerCount == 1);
2017 ASSERT(dstSubresourceLayers.layerCount == 1);
2018
2019 // Figure out if the edges to be copied in reverse order respectively from one another
2020 // The copy should be reversed whenever the same edges are contiguous or if we're
2021 // copying top <-> right or bottom <-> left. This is explained by the layout, which is:
2022 //
2023 // | +y |
2024 // | -x | +z | +x | -z |
2025 // | -y |
2026
2027 bool reverse = (srcEdge == dstEdge) ||
2028 ((srcEdge == TOP) && (dstEdge == RIGHT)) ||
2029 ((srcEdge == RIGHT) && (dstEdge == TOP)) ||
2030 ((srcEdge == BOTTOM) && (dstEdge == LEFT)) ||
2031 ((srcEdge == LEFT) && (dstEdge == BOTTOM));
2032
2033 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(srcSubresourceLayers.aspectMask);
2034 int bytes = image->getFormat(aspect).bytes();
2035 int pitchB = image->rowPitchBytes(aspect, srcSubresourceLayers.mipLevel);
2036
2037 VkExtent3D extent = image->getMipLevelExtent(aspect, srcSubresourceLayers.mipLevel);
2038 int w = extent.width;
2039 int h = extent.height;
2040 if(w != h)
2041 {
2042 UNSUPPORTED("Cube doesn't have square faces : (%d, %d)", w, h);
2043 }
2044
2045 // Src is expressed in the regular [0, width-1], [0, height-1] space
2046 bool srcHorizontal = ((srcEdge == TOP) || (srcEdge == BOTTOM));
2047 int srcDelta = srcHorizontal ? bytes : pitchB;
2048 VkOffset3D srcOffset = { (srcEdge == RIGHT) ? (w - 1) : 0, (srcEdge == BOTTOM) ? (h - 1) : 0, 0 };
2049
2050 // Dst contains borders, so it is expressed in the [-1, width], [-1, height] space
2051 bool dstHorizontal = ((dstEdge == TOP) || (dstEdge == BOTTOM));
2052 int dstDelta = (dstHorizontal ? bytes : pitchB) * (reverse ? -1 : 1);
2053 VkOffset3D dstOffset = { (dstEdge == RIGHT) ? w : -1, (dstEdge == BOTTOM) ? h : -1, 0 };
2054
2055 // Don't write in the corners
2056 if(dstHorizontal)
2057 {
2058 dstOffset.x += reverse ? w : 1;
2059 }
2060 else
2061 {
2062 dstOffset.y += reverse ? h : 1;
2063 }
2064
2065 const uint8_t* src = static_cast<const uint8_t*>(image->getTexelPointer(srcOffset, srcSubresourceLayers));
2066 uint8_t *dst = static_cast<uint8_t*>(image->getTexelPointer(dstOffset, dstSubresourceLayers));
2067 ASSERT((src < image->end()) && ((src + (w * srcDelta)) < image->end()));
2068 ASSERT((dst < image->end()) && ((dst + (w * dstDelta)) < image->end()));
2069
2070 for(int i = 0; i < w; ++i, dst += dstDelta, src += srcDelta)
2071 {
2072 memcpy(dst, src, bytes);
2073 }
2074}
2075
2076} // namepspace sw