blob: 2394fb24f52629dc9e48e31c66caebc5fd488bb0 [file] [log] [blame]
Nicolas Capens68a82382018-10-02 13:16:55 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Blitter.hpp"
16
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050017#include "Pipeline/ShaderCore.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040018#include "Reactor/Reactor.hpp"
Nicolas Capens02cbe8e2019-08-05 15:10:05 -040019#include "System/Half.hpp"
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050020#include "System/Memory.hpp"
Ben Claytonfccfc562019-12-17 20:37:31 +000021#include "Vulkan/VkBuffer.hpp"
Chris Forbesebe5f7f2019-01-16 10:38:34 -080022#include "Vulkan/VkDebug.hpp"
Alexis Hetu33642272019-03-01 11:55:59 -050023#include "Vulkan/VkImage.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040024
Nicolas Capensb8c63932019-03-19 01:52:40 -040025#include <utility>
26
Nicolas Capens157ba262019-12-10 17:49:14 -050027namespace sw {
28
Ben Claytonfccfc562019-12-17 20:37:31 +000029Blitter::Blitter()
30 : blitMutex()
31 , blitCache(1024)
32 , cornerUpdateMutex()
33 , cornerUpdateCache(64) // We only need one of these per format
Nicolas Capens68a82382018-10-02 13:16:55 -040034{
Nicolas Capens157ba262019-12-10 17:49:14 -050035}
36
37Blitter::~Blitter()
38{
39}
40
Ben Claytonfccfc562019-12-17 20:37:31 +000041void Blitter::clear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea)
Nicolas Capens157ba262019-12-10 17:49:14 -050042{
43 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
44 vk::Format dstFormat = viewFormat.getAspectFormat(aspect);
45 if(dstFormat == VK_FORMAT_UNDEFINED)
Nicolas Capens68a82382018-10-02 13:16:55 -040046 {
Nicolas Capens157ba262019-12-10 17:49:14 -050047 return;
Nicolas Capens68a82382018-10-02 13:16:55 -040048 }
49
Nicolas Capens157ba262019-12-10 17:49:14 -050050 float *pPixel = static_cast<float *>(pixel);
Nicolas Capens81bc9d92019-12-16 15:05:57 -050051 if(viewFormat.isUnsignedNormalized())
Nicolas Capens68a82382018-10-02 13:16:55 -040052 {
Nicolas Capens157ba262019-12-10 17:49:14 -050053 pPixel[0] = sw::clamp(pPixel[0], 0.0f, 1.0f);
54 pPixel[1] = sw::clamp(pPixel[1], 0.0f, 1.0f);
55 pPixel[2] = sw::clamp(pPixel[2], 0.0f, 1.0f);
56 pPixel[3] = sw::clamp(pPixel[3], 0.0f, 1.0f);
57 }
Nicolas Capens81bc9d92019-12-16 15:05:57 -050058 else if(viewFormat.isSignedNormalized())
Nicolas Capens157ba262019-12-10 17:49:14 -050059 {
60 pPixel[0] = sw::clamp(pPixel[0], -1.0f, 1.0f);
61 pPixel[1] = sw::clamp(pPixel[1], -1.0f, 1.0f);
62 pPixel[2] = sw::clamp(pPixel[2], -1.0f, 1.0f);
63 pPixel[3] = sw::clamp(pPixel[3], -1.0f, 1.0f);
Nicolas Capens68a82382018-10-02 13:16:55 -040064 }
65
Nicolas Capens157ba262019-12-10 17:49:14 -050066 if(fastClear(pixel, format, dest, dstFormat, subresourceRange, renderArea))
Alexis Hetu33642272019-03-01 11:55:59 -050067 {
Nicolas Capens157ba262019-12-10 17:49:14 -050068 return;
69 }
70
71 State state(format, dstFormat, 1, dest->getSampleCountFlagBits(), Options{ 0xF });
72 auto blitRoutine = getBlitRoutine(state);
73 if(!blitRoutine)
74 {
75 return;
76 }
77
Ben Claytonfccfc562019-12-17 20:37:31 +000078 VkImageSubresourceLayers subresLayers = {
Nicolas Capens157ba262019-12-10 17:49:14 -050079 subresourceRange.aspectMask,
80 subresourceRange.baseMipLevel,
81 subresourceRange.baseArrayLayer,
82 1
83 };
84
85 uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange);
86 uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange);
87
88 VkRect2D area = { { 0, 0 }, { 0, 0 } };
89 if(renderArea)
90 {
91 ASSERT(subresourceRange.levelCount == 1);
92 area = *renderArea;
93 }
94
95 for(; subresLayers.mipLevel <= lastMipLevel; subresLayers.mipLevel++)
96 {
97 VkExtent3D extent = dest->getMipLevelExtent(aspect, subresLayers.mipLevel);
98 if(!renderArea)
Alexis Hetu33642272019-03-01 11:55:59 -050099 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500100 area.extent.width = extent.width;
101 area.extent.height = extent.height;
Alexis Hetu33642272019-03-01 11:55:59 -0500102 }
103
Ben Claytonfccfc562019-12-17 20:37:31 +0000104 BlitData data = {
105 pixel, nullptr, // source, dest
Chris Forbes88289192019-08-28 16:49:36 -0700106
Ben Claytonfccfc562019-12-17 20:37:31 +0000107 format.bytes(), // sPitchB
108 dest->rowPitchBytes(aspect, subresLayers.mipLevel), // dPitchB
109 0, // sSliceB (unused in clear operations)
110 dest->slicePitchBytes(aspect, subresLayers.mipLevel), // dSliceB
Alexis Hetu33642272019-03-01 11:55:59 -0500111
Ben Claytonfccfc562019-12-17 20:37:31 +0000112 0.5f, 0.5f, 0.0f, 0.0f, // x0, y0, w, h
Alexis Hetu33642272019-03-01 11:55:59 -0500113
Ben Claytonfccfc562019-12-17 20:37:31 +0000114 area.offset.y, static_cast<int>(area.offset.y + area.extent.height), // y0d, y1d
115 area.offset.x, static_cast<int>(area.offset.x + area.extent.width), // x0d, x1d
Nicolas Capens157ba262019-12-10 17:49:14 -0500116
Ben Claytonfccfc562019-12-17 20:37:31 +0000117 0, 0, // sWidth, sHeight
Alexis Hetu33642272019-03-01 11:55:59 -0500118 };
119
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500120 if(renderArea && dest->is3DSlice())
Alexis Hetu33642272019-03-01 11:55:59 -0500121 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500122 // Reinterpret layers as depth slices
123 subresLayers.baseArrayLayer = 0;
124 subresLayers.layerCount = 1;
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500125 for(uint32_t depth = subresourceRange.baseArrayLayer; depth <= lastLayer; depth++)
Alexis Hetu33642272019-03-01 11:55:59 -0500126 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000127 data.dest = dest->getTexelPointer({ 0, 0, static_cast<int32_t>(depth) }, subresLayers);
Nicolas Capens157ba262019-12-10 17:49:14 -0500128 blitRoutine(&data);
Nicolas Capens68a82382018-10-02 13:16:55 -0400129 }
130 }
Nicolas Capens88ac3672019-08-01 13:22:34 -0400131 else
Nicolas Capens68a82382018-10-02 13:16:55 -0400132 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500133 for(subresLayers.baseArrayLayer = subresourceRange.baseArrayLayer; subresLayers.baseArrayLayer <= lastLayer; subresLayers.baseArrayLayer++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400134 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500135 for(uint32_t depth = 0; depth < extent.depth; depth++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400136 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500137 data.dest = dest->getTexelPointer({ 0, 0, static_cast<int32_t>(depth) }, subresLayers);
138
139 blitRoutine(&data);
140 }
141 }
142 }
143 }
144}
145
Ben Claytonfccfc562019-12-17 20:37:31 +0000146bool Blitter::fastClear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea)
Nicolas Capens157ba262019-12-10 17:49:14 -0500147{
148 if(format != VK_FORMAT_R32G32B32A32_SFLOAT)
149 {
150 return false;
151 }
152
Ben Claytonfccfc562019-12-17 20:37:31 +0000153 float *color = (float *)pixel;
Nicolas Capens157ba262019-12-10 17:49:14 -0500154 float r = color[0];
155 float g = color[1];
156 float b = color[2];
157 float a = color[3];
158
159 uint32_t packed;
160
161 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
162 switch(viewFormat)
163 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000164 case VK_FORMAT_R5G6B5_UNORM_PACK16:
165 packed = ((uint16_t)(31 * b + 0.5f) << 0) |
166 ((uint16_t)(63 * g + 0.5f) << 5) |
167 ((uint16_t)(31 * r + 0.5f) << 11);
168 break;
169 case VK_FORMAT_B5G6R5_UNORM_PACK16:
170 packed = ((uint16_t)(31 * r + 0.5f) << 0) |
171 ((uint16_t)(63 * g + 0.5f) << 5) |
172 ((uint16_t)(31 * b + 0.5f) << 11);
173 break;
174 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
175 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
176 case VK_FORMAT_R8G8B8A8_UNORM:
177 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
178 ((uint32_t)(255 * b + 0.5f) << 16) |
179 ((uint32_t)(255 * g + 0.5f) << 8) |
180 ((uint32_t)(255 * r + 0.5f) << 0);
181 break;
182 case VK_FORMAT_B8G8R8A8_UNORM:
183 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
184 ((uint32_t)(255 * r + 0.5f) << 16) |
185 ((uint32_t)(255 * g + 0.5f) << 8) |
186 ((uint32_t)(255 * b + 0.5f) << 0);
187 break;
188 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
189 packed = R11G11B10F(color);
190 break;
191 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
192 packed = RGB9E5(color);
193 break;
194 default:
195 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -0500196 }
197
Ben Claytonfccfc562019-12-17 20:37:31 +0000198 VkImageSubresourceLayers subresLayers = {
Nicolas Capens157ba262019-12-10 17:49:14 -0500199 subresourceRange.aspectMask,
200 subresourceRange.baseMipLevel,
201 subresourceRange.baseArrayLayer,
202 1
203 };
204 uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange);
205 uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange);
206
207 VkRect2D area = { { 0, 0 }, { 0, 0 } };
208 if(renderArea)
209 {
210 ASSERT(subresourceRange.levelCount == 1);
211 area = *renderArea;
212 }
213
214 for(; subresLayers.mipLevel <= lastMipLevel; subresLayers.mipLevel++)
215 {
216 int rowPitchBytes = dest->rowPitchBytes(aspect, subresLayers.mipLevel);
217 int slicePitchBytes = dest->slicePitchBytes(aspect, subresLayers.mipLevel);
218 VkExtent3D extent = dest->getMipLevelExtent(aspect, subresLayers.mipLevel);
219 if(!renderArea)
220 {
221 area.extent.width = extent.width;
222 area.extent.height = extent.height;
223 }
224 if(dest->is3DSlice())
225 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000226 extent.depth = 1; // The 3D image is instead interpreted as a 2D image with layers
Nicolas Capens157ba262019-12-10 17:49:14 -0500227 }
228
229 for(subresLayers.baseArrayLayer = subresourceRange.baseArrayLayer; subresLayers.baseArrayLayer <= lastLayer; subresLayers.baseArrayLayer++)
230 {
231 for(uint32_t depth = 0; depth < extent.depth; depth++)
232 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000233 uint8_t *slice = (uint8_t *)dest->getTexelPointer(
234 { area.offset.x, area.offset.y, static_cast<int32_t>(depth) }, subresLayers);
Nicolas Capens157ba262019-12-10 17:49:14 -0500235
236 for(int j = 0; j < dest->getSampleCountFlagBits(); j++)
237 {
238 uint8_t *d = slice;
239
240 switch(viewFormat.bytes())
241 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000242 case 2:
243 for(uint32_t i = 0; i < area.extent.height; i++)
244 {
245 ASSERT(d < dest->end());
246 sw::clear((uint16_t *)d, static_cast<uint16_t>(packed), area.extent.width);
247 d += rowPitchBytes;
248 }
249 break;
250 case 4:
251 for(uint32_t i = 0; i < area.extent.height; i++)
252 {
253 ASSERT(d < dest->end());
254 sw::clear((uint32_t *)d, packed, area.extent.width);
255 d += rowPitchBytes;
256 }
257 break;
258 default:
259 assert(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500260 }
261
262 slice += slicePitchBytes;
263 }
264 }
265 }
266 }
267
268 return true;
269}
270
271Float4 Blitter::readFloat4(Pointer<Byte> element, const State &state)
272{
273 Float4 c(0.0f, 0.0f, 0.0f, 1.0f);
274
275 switch(state.sourceFormat)
276 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000277 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
278 c.w = Float(Int(*Pointer<Byte>(element)) & Int(0xF));
279 c.x = Float((Int(*Pointer<Byte>(element)) >> 4) & Int(0xF));
280 c.y = Float(Int(*Pointer<Byte>(element + 1)) & Int(0xF));
281 c.z = Float((Int(*Pointer<Byte>(element + 1)) >> 4) & Int(0xF));
282 break;
283 case VK_FORMAT_R8_SINT:
284 case VK_FORMAT_R8_SNORM:
285 c.x = Float(Int(*Pointer<SByte>(element)));
286 c.w = float(0x7F);
287 break;
288 case VK_FORMAT_R8_UNORM:
289 case VK_FORMAT_R8_UINT:
290 case VK_FORMAT_R8_SRGB:
291 c.x = Float(Int(*Pointer<Byte>(element)));
292 c.w = float(0xFF);
293 break;
294 case VK_FORMAT_R16_SINT:
295 case VK_FORMAT_R16_SNORM:
296 c.x = Float(Int(*Pointer<Short>(element)));
297 c.w = float(0x7FFF);
298 break;
299 case VK_FORMAT_R16_UNORM:
300 case VK_FORMAT_R16_UINT:
301 c.x = Float(Int(*Pointer<UShort>(element)));
302 c.w = float(0xFFFF);
303 break;
304 case VK_FORMAT_R32_SINT:
305 c.x = Float(*Pointer<Int>(element));
306 c.w = float(0x7FFFFFFF);
307 break;
308 case VK_FORMAT_R32_UINT:
309 c.x = Float(*Pointer<UInt>(element));
310 c.w = float(0xFFFFFFFF);
311 break;
312 case VK_FORMAT_B8G8R8A8_SRGB:
313 case VK_FORMAT_B8G8R8A8_UNORM:
314 c = Float4(*Pointer<Byte4>(element)).zyxw;
315 break;
316 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
317 case VK_FORMAT_R8G8B8A8_SINT:
318 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
319 case VK_FORMAT_R8G8B8A8_SNORM:
320 c = Float4(*Pointer<SByte4>(element));
321 break;
322 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
323 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
324 case VK_FORMAT_R8G8B8A8_UNORM:
325 case VK_FORMAT_R8G8B8A8_UINT:
326 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
327 case VK_FORMAT_R8G8B8A8_SRGB:
328 c = Float4(*Pointer<Byte4>(element));
329 break;
330 case VK_FORMAT_R16G16B16A16_SINT:
331 c = Float4(*Pointer<Short4>(element));
332 break;
333 case VK_FORMAT_R16G16B16A16_UNORM:
334 case VK_FORMAT_R16G16B16A16_UINT:
335 c = Float4(*Pointer<UShort4>(element));
336 break;
337 case VK_FORMAT_R32G32B32A32_SINT:
338 c = Float4(*Pointer<Int4>(element));
339 break;
340 case VK_FORMAT_R32G32B32A32_UINT:
341 c = Float4(*Pointer<UInt4>(element));
342 break;
343 case VK_FORMAT_R8G8_SINT:
344 case VK_FORMAT_R8G8_SNORM:
345 c.x = Float(Int(*Pointer<SByte>(element + 0)));
346 c.y = Float(Int(*Pointer<SByte>(element + 1)));
347 c.w = float(0x7F);
348 break;
349 case VK_FORMAT_R8G8_UNORM:
350 case VK_FORMAT_R8G8_UINT:
351 case VK_FORMAT_R8G8_SRGB:
352 c.x = Float(Int(*Pointer<Byte>(element + 0)));
353 c.y = Float(Int(*Pointer<Byte>(element + 1)));
354 c.w = float(0xFF);
355 break;
356 case VK_FORMAT_R16G16_SINT:
357 case VK_FORMAT_R16G16_SNORM:
358 c.x = Float(Int(*Pointer<Short>(element + 0)));
359 c.y = Float(Int(*Pointer<Short>(element + 2)));
360 c.w = float(0x7FFF);
361 break;
362 case VK_FORMAT_R16G16_UNORM:
363 case VK_FORMAT_R16G16_UINT:
364 c.x = Float(Int(*Pointer<UShort>(element + 0)));
365 c.y = Float(Int(*Pointer<UShort>(element + 2)));
366 c.w = float(0xFFFF);
367 break;
368 case VK_FORMAT_R32G32_SINT:
369 c.x = Float(*Pointer<Int>(element + 0));
370 c.y = Float(*Pointer<Int>(element + 4));
371 c.w = float(0x7FFFFFFF);
372 break;
373 case VK_FORMAT_R32G32_UINT:
374 c.x = Float(*Pointer<UInt>(element + 0));
375 c.y = Float(*Pointer<UInt>(element + 4));
376 c.w = float(0xFFFFFFFF);
377 break;
378 case VK_FORMAT_R32G32B32A32_SFLOAT:
379 c = *Pointer<Float4>(element);
380 break;
381 case VK_FORMAT_R32G32_SFLOAT:
382 c.x = *Pointer<Float>(element + 0);
383 c.y = *Pointer<Float>(element + 4);
384 break;
385 case VK_FORMAT_R32_SFLOAT:
386 c.x = *Pointer<Float>(element);
387 break;
388 case VK_FORMAT_R16G16B16A16_SFLOAT:
389 c.w = Float(*Pointer<Half>(element + 6));
390 case VK_FORMAT_R16G16B16_SFLOAT:
391 c.z = Float(*Pointer<Half>(element + 4));
392 case VK_FORMAT_R16G16_SFLOAT:
393 c.y = Float(*Pointer<Half>(element + 2));
394 case VK_FORMAT_R16_SFLOAT:
395 c.x = Float(*Pointer<Half>(element));
396 break;
397 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
398 c = r11g11b10Unpack(*Pointer<UInt>(element));
399 break;
400 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
401 // This type contains a common 5 bit exponent (E) and a 9 bit the mantissa for R, G and B.
402 c.x = Float(*Pointer<UInt>(element) & UInt(0x000001FF)); // R's mantissa (bits 0-8)
403 c.y = Float((*Pointer<UInt>(element) & UInt(0x0003FE00)) >> 9); // G's mantissa (bits 9-17)
404 c.z = Float((*Pointer<UInt>(element) & UInt(0x07FC0000)) >> 18); // B's mantissa (bits 18-26)
405 c *= Float4(
406 // 2^E, using the exponent (bits 27-31) and treating it as an unsigned integer value
407 Float(UInt(1) << ((*Pointer<UInt>(element) & UInt(0xF8000000)) >> 27)) *
408 // Since the 9 bit mantissa values currently stored in RGB were converted straight
409 // from int to float (in the [0, 1<<9] range instead of the [0, 1] range), they
410 // are (1 << 9) times too high.
411 // Also, the exponent has 5 bits and we compute the exponent bias of floating point
412 // formats using "2^(k-1) - 1", so, in this case, the exponent bias is 2^(5-1)-1 = 15
413 // Exponent bias (15) + number of mantissa bits per component (9) = 24
414 Float(1.0f / (1 << 24)));
415 c.w = 1.0f;
416 break;
417 case VK_FORMAT_R5G6B5_UNORM_PACK16:
418 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
419 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
420 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
421 break;
422 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
423 c.w = Float(Int((*Pointer<UShort>(element) & UShort(0x8000)) >> UShort(15)));
424 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0x7C00)) >> UShort(10)));
425 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x03E0)) >> UShort(5)));
426 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
427 break;
428 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
429 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
430 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
431 c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
432 c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
433 c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
434 break;
435 case VK_FORMAT_D16_UNORM:
436 c.x = Float(Int((*Pointer<UShort>(element))));
437 break;
438 case VK_FORMAT_X8_D24_UNORM_PACK32:
439 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0xFFFFFF00)) >> 8));
440 break;
441 case VK_FORMAT_D32_SFLOAT:
442 c.x = *Pointer<Float>(element);
443 break;
444 case VK_FORMAT_S8_UINT:
445 c.x = Float(Int(*Pointer<Byte>(element)));
446 break;
447 default:
448 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -0500449 }
450
451 return c;
452}
453
454void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
455{
456 bool writeR = state.writeRed;
457 bool writeG = state.writeGreen;
458 bool writeB = state.writeBlue;
459 bool writeA = state.writeAlpha;
460 bool writeRGBA = writeR && writeG && writeB && writeA;
461
462 switch(state.destFormat)
463 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000464 case VK_FORMAT_R4G4_UNORM_PACK8:
465 if(writeR | writeG)
Nicolas Capens157ba262019-12-10 17:49:14 -0500466 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000467 if(!writeR)
468 {
469 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
470 (*Pointer<Byte>(element) & Byte(0xF0));
471 }
472 else if(!writeG)
473 {
474 *Pointer<Byte>(element) = (*Pointer<Byte>(element) & Byte(0xF)) |
475 (Byte(RoundInt(Float(c.x))) << Byte(4));
476 }
477 else
478 {
479 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
480 (Byte(RoundInt(Float(c.x))) << Byte(4));
481 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500482 }
Ben Claytonfccfc562019-12-17 20:37:31 +0000483 break;
484 case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
485 if(writeR || writeG || writeB || writeA)
Nicolas Capens157ba262019-12-10 17:49:14 -0500486 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000487 *Pointer<UShort>(element) = (writeR ? ((UShort(RoundInt(Float(c.x))) & UShort(0xF)) << UShort(12)) : (*Pointer<UShort>(element) & UShort(0x000F))) |
488 (writeG ? ((UShort(RoundInt(Float(c.y))) & UShort(0xF)) << UShort(8)) : (*Pointer<UShort>(element) & UShort(0x00F0))) |
489 (writeB ? ((UShort(RoundInt(Float(c.z))) & UShort(0xF)) << UShort(4)) : (*Pointer<UShort>(element) & UShort(0x0F00))) |
490 (writeA ? (UShort(RoundInt(Float(c.w))) & UShort(0xF)) : (*Pointer<UShort>(element) & UShort(0xF000)));
491 }
492 break;
493 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
494 if(writeRGBA)
495 {
496 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) & Int(0xF)) |
497 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
498 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
499 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12);
Nicolas Capens157ba262019-12-10 17:49:14 -0500500 }
501 else
502 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000503 unsigned short mask = (writeA ? 0x000F : 0x0000) |
504 (writeR ? 0x00F0 : 0x0000) |
505 (writeG ? 0x0F00 : 0x0000) |
506 (writeB ? 0xF000 : 0x0000);
507 unsigned short unmask = ~mask;
508 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
509 ((UShort(RoundInt(Float(c.w)) & Int(0xF)) |
510 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
511 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
512 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12)) &
513 UShort(mask));
Nicolas Capens157ba262019-12-10 17:49:14 -0500514 }
Ben Claytonfccfc562019-12-17 20:37:31 +0000515 break;
516 case VK_FORMAT_B8G8R8A8_SRGB:
517 case VK_FORMAT_B8G8R8A8_UNORM:
518 if(writeRGBA)
519 {
520 Short4 c0 = RoundShort4(c.zyxw);
521 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
522 }
523 else
524 {
525 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
526 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
527 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
528 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
529 }
530 break;
531 case VK_FORMAT_B8G8R8_SNORM:
532 if(writeB) { *Pointer<SByte>(element + 0) = SByte(RoundInt(Float(c.z))); }
533 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
534 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
535 break;
536 case VK_FORMAT_B8G8R8_UNORM:
537 case VK_FORMAT_B8G8R8_SRGB:
Nicolas Capens157ba262019-12-10 17:49:14 -0500538 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
539 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
540 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000541 break;
542 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
543 case VK_FORMAT_R8G8B8A8_UNORM:
544 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
545 case VK_FORMAT_R8G8B8A8_SRGB:
546 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
547 case VK_FORMAT_R8G8B8A8_UINT:
548 case VK_FORMAT_R8G8B8A8_USCALED:
549 case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
550 if(writeRGBA)
551 {
552 Short4 c0 = RoundShort4(c);
553 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
554 }
555 else
556 {
557 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
558 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
559 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
560 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
561 }
562 break;
563 case VK_FORMAT_R32G32B32A32_SFLOAT:
564 if(writeRGBA)
565 {
566 *Pointer<Float4>(element) = c;
567 }
568 else
569 {
570 if(writeR) { *Pointer<Float>(element) = c.x; }
571 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
572 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
573 if(writeA) { *Pointer<Float>(element + 12) = c.w; }
574 }
575 break;
576 case VK_FORMAT_R32G32B32_SFLOAT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500577 if(writeR) { *Pointer<Float>(element) = c.x; }
578 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
579 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
Ben Claytonfccfc562019-12-17 20:37:31 +0000580 break;
581 case VK_FORMAT_R32G32_SFLOAT:
582 if(writeR && writeG)
583 {
584 *Pointer<Float2>(element) = Float2(c);
585 }
586 else
587 {
588 if(writeR) { *Pointer<Float>(element) = c.x; }
589 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
590 }
591 break;
592 case VK_FORMAT_R32_SFLOAT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500593 if(writeR) { *Pointer<Float>(element) = c.x; }
Ben Claytonfccfc562019-12-17 20:37:31 +0000594 break;
595 case VK_FORMAT_R16G16B16A16_SFLOAT:
596 if(writeA) { *Pointer<Half>(element + 6) = Half(c.w); }
597 case VK_FORMAT_R16G16B16_SFLOAT:
598 if(writeB) { *Pointer<Half>(element + 4) = Half(c.z); }
599 case VK_FORMAT_R16G16_SFLOAT:
600 if(writeG) { *Pointer<Half>(element + 2) = Half(c.y); }
601 case VK_FORMAT_R16_SFLOAT:
602 if(writeR) { *Pointer<Half>(element) = Half(c.x); }
603 break;
604 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
Nicolas Capens157ba262019-12-10 17:49:14 -0500605 {
Alexis Hetu24c49dd2019-12-13 16:32:43 -0500606 UInt rgb = r11g11b10Pack(c);
Nicolas Capens157ba262019-12-10 17:49:14 -0500607
608 UInt old = *Pointer<UInt>(element);
609
610 unsigned int mask = (writeR ? 0x000007FF : 0) |
611 (writeG ? 0x003FF800 : 0) |
612 (writeB ? 0xFFC00000 : 0);
613
614 *Pointer<UInt>(element) = (rgb & mask) | (old & ~mask);
615 }
616 break;
Ben Claytonfccfc562019-12-17 20:37:31 +0000617 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
Nicolas Capens157ba262019-12-10 17:49:14 -0500618 {
619 ASSERT(writeRGBA); // Can't sensibly write just part of this format.
620
621 // Vulkan 1.1.117 section 15.2.1 RGB to Shared Exponent Conversion
622
623 constexpr int N = 9; // number of mantissa bits per component
624 constexpr int B = 15; // exponent bias
625 constexpr int E_max = 31; // maximum possible biased exponent value
626
627 // Maximum representable value.
628 constexpr float sharedexp_max = ((static_cast<float>(1 << N) - 1) / static_cast<float>(1 << N)) * static_cast<float>(1 << (E_max - B));
629
630 // Clamp components to valid range. NaN becomes 0.
Ben Claytonfccfc562019-12-17 20:37:31 +0000631 Float red_c = Min(IfThenElse(!(c.x > 0), Float(0), Float(c.x)), sharedexp_max);
Nicolas Capens157ba262019-12-10 17:49:14 -0500632 Float green_c = Min(IfThenElse(!(c.y > 0), Float(0), Float(c.y)), sharedexp_max);
Ben Claytonfccfc562019-12-17 20:37:31 +0000633 Float blue_c = Min(IfThenElse(!(c.z > 0), Float(0), Float(c.z)), sharedexp_max);
Nicolas Capens157ba262019-12-10 17:49:14 -0500634
635 // We're reducing the mantissa to 9 bits, so we must round up if the next
636 // bit is 1. In other words add 0.5 to the new mantissa's position and
637 // allow overflow into the exponent so we can scale correctly.
638 constexpr int half = 1 << (23 - N);
639 Float red_r = As<Float>(As<Int>(red_c) + half);
640 Float green_r = As<Float>(As<Int>(green_c) + half);
641 Float blue_r = As<Float>(As<Int>(blue_c) + half);
642
643 // The largest component determines the shared exponent. It can't be lower
644 // than 0 (after bias subtraction) so also limit to the mimimum representable.
645 constexpr float min_s = 0.5f / (1 << B);
646 Float max_s = Max(Max(red_r, green_r), Max(blue_r, min_s));
647
648 // Obtain the reciprocal of the shared exponent by inverting the bits,
649 // and scale by the new mantissa's size. Note that the IEEE-754 single-precision
650 // format has an implicit leading 1, but this shared component format does not.
651 Float scale = As<Float>((As<Int>(max_s) & 0x7F800000) ^ 0x7F800000) * (1 << (N - 2));
652
653 UInt R9 = RoundInt(red_c * scale);
654 UInt G9 = UInt(RoundInt(green_c * scale));
655 UInt B9 = UInt(RoundInt(blue_c * scale));
656 UInt E5 = (As<UInt>(max_s) >> 23) - 127 + 15 + 1;
657
658 UInt E5B9G9R9 = (E5 << 27) | (B9 << 18) | (G9 << 9) | R9;
659
660 *Pointer<UInt>(element) = E5B9G9R9;
661 }
662 break;
Ben Claytonfccfc562019-12-17 20:37:31 +0000663 case VK_FORMAT_B8G8R8A8_SNORM:
664 if(writeB) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.z))); }
665 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
666 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
667 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
668 break;
669 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
670 case VK_FORMAT_R8G8B8A8_SINT:
671 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
672 case VK_FORMAT_R8G8B8A8_SNORM:
673 case VK_FORMAT_R8G8B8A8_SSCALED:
674 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
675 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
676 case VK_FORMAT_R8G8B8_SINT:
677 case VK_FORMAT_R8G8B8_SNORM:
678 case VK_FORMAT_R8G8B8_SSCALED:
679 if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
680 case VK_FORMAT_R8G8_SINT:
681 case VK_FORMAT_R8G8_SNORM:
682 case VK_FORMAT_R8G8_SSCALED:
683 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
684 case VK_FORMAT_R8_SINT:
685 case VK_FORMAT_R8_SNORM:
686 case VK_FORMAT_R8_SSCALED:
687 if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
688 break;
689 case VK_FORMAT_R8G8B8_UINT:
690 case VK_FORMAT_R8G8B8_UNORM:
691 case VK_FORMAT_R8G8B8_USCALED:
692 case VK_FORMAT_R8G8B8_SRGB:
693 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
694 case VK_FORMAT_R8G8_UINT:
695 case VK_FORMAT_R8G8_UNORM:
696 case VK_FORMAT_R8G8_USCALED:
697 case VK_FORMAT_R8G8_SRGB:
698 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
699 case VK_FORMAT_R8_UINT:
700 case VK_FORMAT_R8_UNORM:
701 case VK_FORMAT_R8_USCALED:
702 case VK_FORMAT_R8_SRGB:
703 if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
704 break;
705 case VK_FORMAT_R16G16B16A16_SINT:
706 case VK_FORMAT_R16G16B16A16_SNORM:
707 case VK_FORMAT_R16G16B16A16_SSCALED:
708 if(writeRGBA)
709 {
710 *Pointer<Short4>(element) = Short4(RoundInt(c));
711 }
712 else
713 {
714 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
715 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
716 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
717 if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
718 }
719 break;
720 case VK_FORMAT_R16G16B16_SINT:
721 case VK_FORMAT_R16G16B16_SNORM:
722 case VK_FORMAT_R16G16B16_SSCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500723 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
724 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
725 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000726 break;
727 case VK_FORMAT_R16G16_SINT:
728 case VK_FORMAT_R16G16_SNORM:
729 case VK_FORMAT_R16G16_SSCALED:
730 if(writeR && writeG)
731 {
732 *Pointer<Short2>(element) = Short2(Short4(RoundInt(c)));
733 }
734 else
735 {
736 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
737 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
738 }
739 break;
740 case VK_FORMAT_R16_SINT:
741 case VK_FORMAT_R16_SNORM:
742 case VK_FORMAT_R16_SSCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500743 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000744 break;
745 case VK_FORMAT_R16G16B16A16_UINT:
746 case VK_FORMAT_R16G16B16A16_UNORM:
747 case VK_FORMAT_R16G16B16A16_USCALED:
748 if(writeRGBA)
749 {
750 *Pointer<UShort4>(element) = UShort4(RoundInt(c));
751 }
752 else
753 {
754 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
755 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
756 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
757 if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
758 }
759 break;
760 case VK_FORMAT_R16G16B16_UINT:
761 case VK_FORMAT_R16G16B16_UNORM:
762 case VK_FORMAT_R16G16B16_USCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500763 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
764 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
765 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000766 break;
767 case VK_FORMAT_R16G16_UINT:
768 case VK_FORMAT_R16G16_UNORM:
769 case VK_FORMAT_R16G16_USCALED:
770 if(writeR && writeG)
771 {
772 *Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c)));
773 }
774 else
775 {
776 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
777 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
778 }
779 break;
780 case VK_FORMAT_R16_UINT:
781 case VK_FORMAT_R16_UNORM:
782 case VK_FORMAT_R16_USCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500783 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000784 break;
785 case VK_FORMAT_R32G32B32A32_SINT:
786 if(writeRGBA)
787 {
788 *Pointer<Int4>(element) = RoundInt(c);
789 }
790 else
791 {
792 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
793 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
794 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
795 if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
796 }
797 break;
798 case VK_FORMAT_R32G32B32_SINT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500799 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000800 case VK_FORMAT_R32G32_SINT:
801 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
802 case VK_FORMAT_R32_SINT:
803 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
804 break;
805 case VK_FORMAT_R32G32B32A32_UINT:
806 if(writeRGBA)
807 {
808 *Pointer<UInt4>(element) = UInt4(RoundInt(c));
809 }
810 else
811 {
812 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
813 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
814 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
815 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
816 }
817 break;
818 case VK_FORMAT_R32G32B32_UINT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500819 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000820 case VK_FORMAT_R32G32_UINT:
821 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
822 case VK_FORMAT_R32_UINT:
823 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
824 break;
825 case VK_FORMAT_R5G6B5_UNORM_PACK16:
826 if(writeR && writeG && writeB)
827 {
828 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
829 (RoundInt(Float(c.y)) << Int(5)) |
830 (RoundInt(Float(c.x)) << Int(11)));
831 }
832 else
833 {
834 unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
835 unsigned short unmask = ~mask;
836 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
837 (UShort(RoundInt(Float(c.z)) |
838 (RoundInt(Float(c.y)) << Int(5)) |
839 (RoundInt(Float(c.x)) << Int(11))) &
840 UShort(mask));
841 }
842 break;
843 case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
844 if(writeRGBA)
845 {
846 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) |
847 (RoundInt(Float(c.z)) << Int(1)) |
848 (RoundInt(Float(c.y)) << Int(6)) |
849 (RoundInt(Float(c.x)) << Int(11)));
850 }
851 else
852 {
853 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
854 (writeR ? 0x7C00 : 0x0000) |
855 (writeG ? 0x03E0 : 0x0000) |
856 (writeB ? 0x001F : 0x0000);
857 unsigned short unmask = ~mask;
858 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
859 (UShort(RoundInt(Float(c.w)) |
860 (RoundInt(Float(c.z)) << Int(1)) |
861 (RoundInt(Float(c.y)) << Int(6)) |
862 (RoundInt(Float(c.x)) << Int(11))) &
863 UShort(mask));
864 }
865 break;
866 case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
867 if(writeRGBA)
868 {
869 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) |
870 (RoundInt(Float(c.x)) << Int(1)) |
871 (RoundInt(Float(c.y)) << Int(6)) |
872 (RoundInt(Float(c.z)) << Int(11)));
873 }
874 else
875 {
876 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
877 (writeR ? 0x7C00 : 0x0000) |
878 (writeG ? 0x03E0 : 0x0000) |
879 (writeB ? 0x001F : 0x0000);
880 unsigned short unmask = ~mask;
881 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
882 (UShort(RoundInt(Float(c.w)) |
883 (RoundInt(Float(c.x)) << Int(1)) |
884 (RoundInt(Float(c.y)) << Int(6)) |
885 (RoundInt(Float(c.z)) << Int(11))) &
886 UShort(mask));
887 }
888 break;
889 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
890 if(writeRGBA)
891 {
892 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
893 (RoundInt(Float(c.y)) << Int(5)) |
894 (RoundInt(Float(c.x)) << Int(10)) |
895 (RoundInt(Float(c.w)) << Int(15)));
896 }
897 else
898 {
899 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
900 (writeR ? 0x7C00 : 0x0000) |
901 (writeG ? 0x03E0 : 0x0000) |
902 (writeB ? 0x001F : 0x0000);
903 unsigned short unmask = ~mask;
904 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
905 (UShort(RoundInt(Float(c.z)) |
906 (RoundInt(Float(c.y)) << Int(5)) |
907 (RoundInt(Float(c.x)) << Int(10)) |
908 (RoundInt(Float(c.w)) << Int(15))) &
909 UShort(mask));
910 }
911 break;
912 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
913 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
914 case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
915 if(writeRGBA)
916 {
917 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) |
918 (RoundInt(Float(c.y)) << 10) |
919 (RoundInt(Float(c.z)) << 20) |
920 (RoundInt(Float(c.w)) << 30));
921 }
922 else
923 {
924 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
925 (writeB ? 0x3FF00000 : 0x0000) |
926 (writeG ? 0x000FFC00 : 0x0000) |
927 (writeR ? 0x000003FF : 0x0000);
928 unsigned int unmask = ~mask;
929 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
930 (UInt(RoundInt(Float(c.x)) |
931 (RoundInt(Float(c.y)) << 10) |
932 (RoundInt(Float(c.z)) << 20) |
933 (RoundInt(Float(c.w)) << 30)) &
934 UInt(mask));
935 }
936 break;
937 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
938 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
939 case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
940 if(writeRGBA)
941 {
942 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.z)) |
943 (RoundInt(Float(c.y)) << 10) |
944 (RoundInt(Float(c.x)) << 20) |
945 (RoundInt(Float(c.w)) << 30));
946 }
947 else
948 {
949 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
950 (writeR ? 0x3FF00000 : 0x0000) |
951 (writeG ? 0x000FFC00 : 0x0000) |
952 (writeB ? 0x000003FF : 0x0000);
953 unsigned int unmask = ~mask;
954 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
955 (UInt(RoundInt(Float(c.z)) |
956 (RoundInt(Float(c.y)) << 10) |
957 (RoundInt(Float(c.x)) << 20) |
958 (RoundInt(Float(c.w)) << 30)) &
959 UInt(mask));
960 }
961 break;
962 case VK_FORMAT_D16_UNORM:
963 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x)));
964 break;
965 case VK_FORMAT_X8_D24_UNORM_PACK32:
966 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) << 8);
967 break;
968 case VK_FORMAT_D32_SFLOAT:
969 *Pointer<Float>(element) = c.x;
970 break;
971 case VK_FORMAT_S8_UINT:
972 *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
973 break;
974 default:
975 UNSUPPORTED("Blitter destination format %d", (int)state.destFormat);
976 break;
Nicolas Capens157ba262019-12-10 17:49:14 -0500977 }
978}
979
980Int4 Blitter::readInt4(Pointer<Byte> element, const State &state)
981{
982 Int4 c(0, 0, 0, 1);
983
984 switch(state.sourceFormat)
985 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000986 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
987 case VK_FORMAT_R8G8B8A8_SINT:
988 c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
989 c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
990 case VK_FORMAT_R8G8_SINT:
991 c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
992 case VK_FORMAT_R8_SINT:
993 c = Insert(c, Int(*Pointer<SByte>(element)), 0);
994 break;
995 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
996 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000003FF))), 0);
997 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10), 1);
998 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20), 2);
999 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30), 3);
1000 break;
1001 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1002 case VK_FORMAT_R8G8B8A8_UINT:
1003 c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
1004 c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
1005 case VK_FORMAT_R8G8_UINT:
1006 c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
1007 case VK_FORMAT_R8_UINT:
1008 case VK_FORMAT_S8_UINT:
1009 c = Insert(c, Int(*Pointer<Byte>(element)), 0);
1010 break;
1011 case VK_FORMAT_R16G16B16A16_SINT:
1012 c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
1013 c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
1014 case VK_FORMAT_R16G16_SINT:
1015 c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
1016 case VK_FORMAT_R16_SINT:
1017 c = Insert(c, Int(*Pointer<Short>(element)), 0);
1018 break;
1019 case VK_FORMAT_R16G16B16A16_UINT:
1020 c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
1021 c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
1022 case VK_FORMAT_R16G16_UINT:
1023 c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
1024 case VK_FORMAT_R16_UINT:
1025 c = Insert(c, Int(*Pointer<UShort>(element)), 0);
1026 break;
1027 case VK_FORMAT_R32G32B32A32_SINT:
1028 case VK_FORMAT_R32G32B32A32_UINT:
1029 c = *Pointer<Int4>(element);
1030 break;
1031 case VK_FORMAT_R32G32_SINT:
1032 case VK_FORMAT_R32G32_UINT:
1033 c = Insert(c, *Pointer<Int>(element + 4), 1);
1034 case VK_FORMAT_R32_SINT:
1035 case VK_FORMAT_R32_UINT:
1036 c = Insert(c, *Pointer<Int>(element), 0);
1037 break;
1038 default:
1039 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -05001040 }
1041
1042 return c;
1043}
1044
1045void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
1046{
1047 bool writeR = state.writeRed;
1048 bool writeG = state.writeGreen;
1049 bool writeB = state.writeBlue;
1050 bool writeA = state.writeAlpha;
1051 bool writeRGBA = writeR && writeG && writeB && writeA;
1052
1053 switch(state.destFormat)
1054 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001055 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1056 c = Min(As<UInt4>(c), UInt4(0x03FF, 0x03FF, 0x03FF, 0x0003));
1057 break;
1058 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1059 case VK_FORMAT_R8G8B8A8_UINT:
1060 case VK_FORMAT_R8G8B8_UINT:
1061 case VK_FORMAT_R8G8_UINT:
1062 case VK_FORMAT_R8_UINT:
1063 case VK_FORMAT_R8G8B8A8_USCALED:
1064 case VK_FORMAT_R8G8B8_USCALED:
1065 case VK_FORMAT_R8G8_USCALED:
1066 case VK_FORMAT_R8_USCALED:
1067 case VK_FORMAT_S8_UINT:
1068 c = Min(As<UInt4>(c), UInt4(0xFF));
1069 break;
1070 case VK_FORMAT_R16G16B16A16_UINT:
1071 case VK_FORMAT_R16G16B16_UINT:
1072 case VK_FORMAT_R16G16_UINT:
1073 case VK_FORMAT_R16_UINT:
1074 case VK_FORMAT_R16G16B16A16_USCALED:
1075 case VK_FORMAT_R16G16B16_USCALED:
1076 case VK_FORMAT_R16G16_USCALED:
1077 case VK_FORMAT_R16_USCALED:
1078 c = Min(As<UInt4>(c), UInt4(0xFFFF));
1079 break;
1080 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1081 case VK_FORMAT_R8G8B8A8_SINT:
1082 case VK_FORMAT_R8G8_SINT:
1083 case VK_FORMAT_R8_SINT:
1084 case VK_FORMAT_R8G8B8A8_SSCALED:
1085 case VK_FORMAT_R8G8B8_SSCALED:
1086 case VK_FORMAT_R8G8_SSCALED:
1087 case VK_FORMAT_R8_SSCALED:
1088 c = Min(Max(c, Int4(-0x80)), Int4(0x7F));
1089 break;
1090 case VK_FORMAT_R16G16B16A16_SINT:
1091 case VK_FORMAT_R16G16B16_SINT:
1092 case VK_FORMAT_R16G16_SINT:
1093 case VK_FORMAT_R16_SINT:
1094 case VK_FORMAT_R16G16B16A16_SSCALED:
1095 case VK_FORMAT_R16G16B16_SSCALED:
1096 case VK_FORMAT_R16G16_SSCALED:
1097 case VK_FORMAT_R16_SSCALED:
1098 c = Min(Max(c, Int4(-0x8000)), Int4(0x7FFF));
1099 break;
1100 default:
1101 break;
Nicolas Capens157ba262019-12-10 17:49:14 -05001102 }
1103
1104 switch(state.destFormat)
1105 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001106 case VK_FORMAT_B8G8R8A8_SINT:
1107 case VK_FORMAT_B8G8R8A8_SSCALED:
1108 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
1109 case VK_FORMAT_B8G8R8_SINT:
1110 case VK_FORMAT_B8G8R8_SSCALED:
1111 if(writeB) { *Pointer<SByte>(element) = SByte(Extract(c, 2)); }
1112 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
1113 if(writeR) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 0)); }
1114 break;
1115 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1116 case VK_FORMAT_R8G8B8A8_SINT:
1117 case VK_FORMAT_R8G8B8A8_SSCALED:
1118 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
1119 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
1120 case VK_FORMAT_R8G8B8_SINT:
1121 case VK_FORMAT_R8G8B8_SSCALED:
1122 if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
1123 case VK_FORMAT_R8G8_SINT:
1124 case VK_FORMAT_R8G8_SSCALED:
1125 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
1126 case VK_FORMAT_R8_SINT:
1127 case VK_FORMAT_R8_SSCALED:
1128 if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
1129 break;
1130 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1131 case VK_FORMAT_A2B10G10R10_SINT_PACK32:
1132 case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
1133 case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
1134 if(writeRGBA)
1135 {
1136 *Pointer<UInt>(element) =
1137 UInt((Extract(c, 0)) | (Extract(c, 1) << 10) | (Extract(c, 2) << 20) | (Extract(c, 3) << 30));
1138 }
1139 else
1140 {
1141 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1142 (writeB ? 0x3FF00000 : 0x0000) |
1143 (writeG ? 0x000FFC00 : 0x0000) |
1144 (writeR ? 0x000003FF : 0x0000);
1145 unsigned int unmask = ~mask;
1146 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
1147 (UInt(Extract(c, 0) | (Extract(c, 1) << 10) | (Extract(c, 2) << 20) | (Extract(c, 3) << 30)) & UInt(mask));
1148 }
1149 break;
1150 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1151 case VK_FORMAT_A2R10G10B10_SINT_PACK32:
1152 case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
1153 case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
1154 if(writeRGBA)
1155 {
1156 *Pointer<UInt>(element) =
1157 UInt((Extract(c, 2)) | (Extract(c, 1) << 10) | (Extract(c, 0) << 20) | (Extract(c, 3) << 30));
1158 }
1159 else
1160 {
1161 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1162 (writeR ? 0x3FF00000 : 0x0000) |
1163 (writeG ? 0x000FFC00 : 0x0000) |
1164 (writeB ? 0x000003FF : 0x0000);
1165 unsigned int unmask = ~mask;
1166 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
1167 (UInt(Extract(c, 2) | (Extract(c, 1) << 10) | (Extract(c, 0) << 20) | (Extract(c, 3) << 30)) & UInt(mask));
1168 }
1169 break;
1170 case VK_FORMAT_B8G8R8A8_UINT:
1171 case VK_FORMAT_B8G8R8A8_USCALED:
1172 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
1173 case VK_FORMAT_B8G8R8_UINT:
1174 case VK_FORMAT_B8G8R8_USCALED:
1175 case VK_FORMAT_B8G8R8_SRGB:
1176 if(writeB) { *Pointer<Byte>(element) = Byte(Extract(c, 2)); }
1177 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
1178 if(writeR) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 0)); }
1179 break;
1180 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1181 case VK_FORMAT_R8G8B8A8_UINT:
1182 case VK_FORMAT_R8G8B8A8_USCALED:
1183 case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
1184 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
1185 case VK_FORMAT_R8G8B8_UINT:
1186 case VK_FORMAT_R8G8B8_USCALED:
1187 if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
1188 case VK_FORMAT_R8G8_UINT:
1189 case VK_FORMAT_R8G8_USCALED:
1190 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
1191 case VK_FORMAT_R8_UINT:
1192 case VK_FORMAT_R8_USCALED:
1193 case VK_FORMAT_S8_UINT:
1194 if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
1195 break;
1196 case VK_FORMAT_R16G16B16A16_SINT:
1197 case VK_FORMAT_R16G16B16A16_SSCALED:
1198 if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
1199 case VK_FORMAT_R16G16B16_SINT:
1200 case VK_FORMAT_R16G16B16_SSCALED:
1201 if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
1202 case VK_FORMAT_R16G16_SINT:
1203 case VK_FORMAT_R16G16_SSCALED:
1204 if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
1205 case VK_FORMAT_R16_SINT:
1206 case VK_FORMAT_R16_SSCALED:
1207 if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
1208 break;
1209 case VK_FORMAT_R16G16B16A16_UINT:
1210 case VK_FORMAT_R16G16B16A16_USCALED:
1211 if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
1212 case VK_FORMAT_R16G16B16_UINT:
1213 case VK_FORMAT_R16G16B16_USCALED:
1214 if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
1215 case VK_FORMAT_R16G16_UINT:
1216 case VK_FORMAT_R16G16_USCALED:
1217 if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
1218 case VK_FORMAT_R16_UINT:
1219 case VK_FORMAT_R16_USCALED:
1220 if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
1221 break;
1222 case VK_FORMAT_R32G32B32A32_SINT:
1223 if(writeRGBA)
1224 {
1225 *Pointer<Int4>(element) = c;
1226 }
1227 else
1228 {
1229 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1230 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1231 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
1232 if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
1233 }
1234 break;
1235 case VK_FORMAT_R32G32B32_SINT:
Nicolas Capens157ba262019-12-10 17:49:14 -05001236 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1237 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1238 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
Ben Claytonfccfc562019-12-17 20:37:31 +00001239 break;
1240 case VK_FORMAT_R32G32_SINT:
1241 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1242 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1243 break;
1244 case VK_FORMAT_R32_SINT:
1245 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1246 break;
1247 case VK_FORMAT_R32G32B32A32_UINT:
1248 if(writeRGBA)
1249 {
1250 *Pointer<UInt4>(element) = As<UInt4>(c);
1251 }
1252 else
1253 {
1254 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1255 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
1256 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
1257 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
1258 }
1259 break;
1260 case VK_FORMAT_R32G32B32_UINT:
Nicolas Capens157ba262019-12-10 17:49:14 -05001261 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
Ben Claytonfccfc562019-12-17 20:37:31 +00001262 case VK_FORMAT_R32G32_UINT:
1263 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
1264 case VK_FORMAT_R32_UINT:
1265 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1266 break;
1267 default:
1268 UNSUPPORTED("Blitter destination format %d", (int)state.destFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -05001269 }
1270}
1271
1272void Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled)
1273{
1274 float4 scale{}, unscale{};
1275
1276 if(state.clearOperation &&
1277 state.sourceFormat.isNonNormalizedInteger() &&
1278 !state.destFormat.isNonNormalizedInteger())
1279 {
1280 // If we're clearing a buffer from an int or uint color into a normalized color,
1281 // then the whole range of the int or uint color must be scaled between 0 and 1.
1282 switch(state.sourceFormat)
1283 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001284 case VK_FORMAT_R32G32B32A32_SINT:
1285 unscale = float4(static_cast<float>(0x7FFFFFFF));
1286 break;
1287 case VK_FORMAT_R32G32B32A32_UINT:
1288 unscale = float4(static_cast<float>(0xFFFFFFFF));
1289 break;
1290 default:
1291 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -05001292 }
1293 }
1294 else
1295 {
1296 unscale = state.sourceFormat.getScale();
1297 }
1298
1299 scale = state.destFormat.getScale();
1300
1301 bool srcSRGB = state.sourceFormat.isSRGBformat();
1302 bool dstSRGB = state.destFormat.isSRGBformat();
1303
Ben Claytonfccfc562019-12-17 20:37:31 +00001304 if(state.allowSRGBConversion && ((srcSRGB && !preScaled) || dstSRGB)) // One of the formats is sRGB encoded.
Nicolas Capens157ba262019-12-10 17:49:14 -05001305 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001306 value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale
1307 Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale
Nicolas Capens157ba262019-12-10 17:49:14 -05001308 value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value);
Ben Claytonfccfc562019-12-17 20:37:31 +00001309 value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale
Nicolas Capens157ba262019-12-10 17:49:14 -05001310 }
1311 else if(unscale != scale)
1312 {
1313 value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
1314 }
1315
1316 if(state.sourceFormat.isFloatFormat() && !state.destFormat.isFloatFormat())
1317 {
1318 value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
1319
1320 value = Max(value, Float4(state.destFormat.isUnsignedComponent(0) ? 0.0f : -scale.x,
1321 state.destFormat.isUnsignedComponent(1) ? 0.0f : -scale.y,
1322 state.destFormat.isUnsignedComponent(2) ? 0.0f : -scale.z,
1323 state.destFormat.isUnsignedComponent(3) ? 0.0f : -scale.w));
1324 }
1325}
1326
1327Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes)
1328{
1329 return y * pitchB + x * bytes;
1330}
1331
1332Float4 Blitter::LinearToSRGB(Float4 &c)
1333{
1334 Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f);
1335 Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f);
1336
1337 Float4 s = c;
1338 s.xyz = Max(lc, ec);
1339
1340 return s;
1341}
1342
1343Float4 Blitter::sRGBtoLinear(Float4 &c)
1344{
1345 Float4 lc = c * Float4(1.0f / 12.92f);
1346 Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f));
1347
1348 Int4 linear = CmpLT(c, Float4(0.04045f));
1349
1350 Float4 s = c;
Ben Claytonfccfc562019-12-17 20:37:31 +00001351 s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec))); // TODO: IfThenElse()
Nicolas Capens157ba262019-12-10 17:49:14 -05001352
1353 return s;
1354}
1355
1356Blitter::BlitRoutineType Blitter::generate(const State &state)
1357{
1358 BlitFunction function;
1359 {
1360 Pointer<Byte> blit(function.Arg<0>());
1361
Ben Claytonfccfc562019-12-17 20:37:31 +00001362 Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData, source));
1363 Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData, dest));
1364 Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData, sPitchB));
1365 Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData, dPitchB));
Nicolas Capens157ba262019-12-10 17:49:14 -05001366
Ben Claytonfccfc562019-12-17 20:37:31 +00001367 Float x0 = *Pointer<Float>(blit + OFFSET(BlitData, x0));
1368 Float y0 = *Pointer<Float>(blit + OFFSET(BlitData, y0));
1369 Float w = *Pointer<Float>(blit + OFFSET(BlitData, w));
1370 Float h = *Pointer<Float>(blit + OFFSET(BlitData, h));
Nicolas Capens157ba262019-12-10 17:49:14 -05001371
Ben Claytonfccfc562019-12-17 20:37:31 +00001372 Int x0d = *Pointer<Int>(blit + OFFSET(BlitData, x0d));
1373 Int x1d = *Pointer<Int>(blit + OFFSET(BlitData, x1d));
1374 Int y0d = *Pointer<Int>(blit + OFFSET(BlitData, y0d));
1375 Int y1d = *Pointer<Int>(blit + OFFSET(BlitData, y1d));
Nicolas Capens157ba262019-12-10 17:49:14 -05001376
Ben Claytonfccfc562019-12-17 20:37:31 +00001377 Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData, sWidth));
1378 Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData, sHeight));
Nicolas Capens157ba262019-12-10 17:49:14 -05001379
1380 bool intSrc = state.sourceFormat.isNonNormalizedInteger();
1381 bool intDst = state.destFormat.isNonNormalizedInteger();
1382 bool intBoth = intSrc && intDst;
1383 int srcBytes = state.sourceFormat.bytes();
1384 int dstBytes = state.destFormat.bytes();
1385
1386 bool hasConstantColorI = false;
1387 Int4 constantColorI;
1388 bool hasConstantColorF = false;
1389 Float4 constantColorF;
1390 if(state.clearOperation)
1391 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001392 if(intBoth) // Integer types
Nicolas Capens157ba262019-12-10 17:49:14 -05001393 {
1394 constantColorI = readInt4(source, state);
1395 hasConstantColorI = true;
1396 }
1397 else
1398 {
1399 constantColorF = readFloat4(source, state);
1400 hasConstantColorF = true;
1401
1402 ApplyScaleAndClamp(constantColorF, state);
1403 }
1404 }
1405
1406 For(Int j = y0d, j < y1d, j++)
1407 {
1408 Float y = state.clearOperation ? RValue<Float>(y0) : y0 + Float(j) * h;
1409 Pointer<Byte> destLine = dest + j * dPitchB;
1410
1411 For(Int i = x0d, i < x1d, i++)
1412 {
1413 Float x = state.clearOperation ? RValue<Float>(x0) : x0 + Float(i) * w;
1414 Pointer<Byte> d = destLine + i * dstBytes;
1415
1416 if(hasConstantColorI)
1417 {
1418 for(int s = 0; s < state.destSamples; s++)
1419 {
1420 write(constantColorI, d, state);
1421
1422 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
1423 }
1424 }
1425 else if(hasConstantColorF)
1426 {
1427 for(int s = 0; s < state.destSamples; s++)
1428 {
1429 write(constantColorF, d, state);
1430
1431 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
1432 }
1433 }
Ben Claytonfccfc562019-12-17 20:37:31 +00001434 else if(intBoth) // Integer types do not support filtering
Nicolas Capens157ba262019-12-10 17:49:14 -05001435 {
1436 Int X = Int(x);
1437 Int Y = Int(y);
1438
1439 if(state.clampToEdge)
1440 {
1441 X = Clamp(X, 0, sWidth - 1);
1442 Y = Clamp(Y, 0, sHeight - 1);
1443 }
1444
1445 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes);
1446
1447 // When both formats are true integer types, we don't go to float to avoid losing precision
1448 Int4 color = readInt4(s, state);
1449 for(int s = 0; s < state.destSamples; s++)
1450 {
1451 write(color, d, state);
1452
Ben Claytonfccfc562019-12-17 20:37:31 +00001453 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
Nicolas Capens157ba262019-12-10 17:49:14 -05001454 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001455 }
1456 else
1457 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001458 Float4 color;
Nicolas Capens68a82382018-10-02 13:16:55 -04001459
Nicolas Capens157ba262019-12-10 17:49:14 -05001460 bool preScaled = false;
1461 if(!state.filter || intSrc)
Nicolas Capens68a82382018-10-02 13:16:55 -04001462 {
Nicolas Capens68a82382018-10-02 13:16:55 -04001463 Int X = Int(x);
1464 Int Y = Int(y);
1465
1466 if(state.clampToEdge)
1467 {
1468 X = Clamp(X, 0, sWidth - 1);
1469 Y = Clamp(Y, 0, sHeight - 1);
1470 }
1471
Alexis Hetud34bb292019-11-13 17:18:02 -05001472 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes);
Nicolas Capens68a82382018-10-02 13:16:55 -04001473
Nicolas Capens157ba262019-12-10 17:49:14 -05001474 color = readFloat4(s, state);
1475
Ben Claytonfccfc562019-12-17 20:37:31 +00001476 if(state.srcSamples > 1) // Resolve multisampled source
Alexis Hetuf8df30f2019-10-23 18:03:21 -04001477 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001478 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
Nicolas Capens68a82382018-10-02 13:16:55 -04001479 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001480 ApplyScaleAndClamp(color, state);
1481 preScaled = true;
Nicolas Capens68a82382018-10-02 13:16:55 -04001482 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001483 Float4 accum = color;
1484 for(int sample = 1; sample < state.srcSamples; sample++)
Alexis Hetu54ec7592019-03-20 14:37:16 -04001485 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001486 s += *Pointer<Int>(blit + OFFSET(BlitData, sSliceB));
1487 color = readFloat4(s, state);
1488
Ben Claytonfccfc562019-12-17 20:37:31 +00001489 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
Alexis Hetua4308132019-06-13 09:55:26 -04001490 {
Nicolas Capens88ac3672019-08-01 13:22:34 -04001491 ApplyScaleAndClamp(color, state);
Alexis Hetua4308132019-06-13 09:55:26 -04001492 preScaled = true;
1493 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001494 accum += color;
Alexis Hetu54ec7592019-03-20 14:37:16 -04001495 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001496 color = accum * Float4(1.0f / static_cast<float>(state.srcSamples));
Nicolas Capens68a82382018-10-02 13:16:55 -04001497 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001498 }
Ben Claytonfccfc562019-12-17 20:37:31 +00001499 else // Bilinear filtering
Nicolas Capens157ba262019-12-10 17:49:14 -05001500 {
1501 Float X = x;
1502 Float Y = y;
1503
1504 if(state.clampToEdge)
Nicolas Capens68a82382018-10-02 13:16:55 -04001505 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001506 X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f);
1507 Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f);
Nicolas Capens68a82382018-10-02 13:16:55 -04001508 }
1509
Nicolas Capens157ba262019-12-10 17:49:14 -05001510 Float x0 = X - 0.5f;
1511 Float y0 = Y - 0.5f;
Nicolas Capens68a82382018-10-02 13:16:55 -04001512
Nicolas Capens157ba262019-12-10 17:49:14 -05001513 Int X0 = Max(Int(x0), 0);
1514 Int Y0 = Max(Int(y0), 0);
1515
1516 Int X1 = X0 + 1;
1517 Int Y1 = Y0 + 1;
1518 X1 = IfThenElse(X1 >= sWidth, X0, X1);
1519 Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1);
1520
1521 Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes);
1522 Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes);
1523 Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes);
1524 Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes);
1525
1526 Float4 c00 = readFloat4(s00, state);
1527 Float4 c01 = readFloat4(s01, state);
1528 Float4 c10 = readFloat4(s10, state);
1529 Float4 c11 = readFloat4(s11, state);
1530
Ben Claytonfccfc562019-12-17 20:37:31 +00001531 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
Nicolas Capens68a82382018-10-02 13:16:55 -04001532 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001533 ApplyScaleAndClamp(c00, state);
1534 ApplyScaleAndClamp(c01, state);
1535 ApplyScaleAndClamp(c10, state);
1536 ApplyScaleAndClamp(c11, state);
1537 preScaled = true;
Nicolas Capens68a82382018-10-02 13:16:55 -04001538 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001539
1540 Float4 fx = Float4(x0 - Float(X0));
1541 Float4 fy = Float4(y0 - Float(Y0));
1542 Float4 ix = Float4(1.0f) - fx;
1543 Float4 iy = Float4(1.0f) - fy;
1544
1545 color = (c00 * ix + c01 * fx) * iy +
1546 (c10 * ix + c11 * fx) * fy;
1547 }
1548
1549 ApplyScaleAndClamp(color, state, preScaled);
1550
1551 for(int s = 0; s < state.destSamples; s++)
1552 {
1553 write(color, d, state);
1554
Ben Claytonfccfc562019-12-17 20:37:31 +00001555 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
Nicolas Capens68a82382018-10-02 13:16:55 -04001556 }
1557 }
1558 }
1559 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001560 }
1561
Nicolas Capens157ba262019-12-10 17:49:14 -05001562 return function("BlitRoutine");
1563}
1564
1565Blitter::BlitRoutineType Blitter::getBlitRoutine(const State &state)
1566{
1567 std::unique_lock<std::mutex> lock(blitMutex);
1568 auto blitRoutine = blitCache.query(state);
1569
1570 if(!blitRoutine)
Alexis Hetu33642272019-03-01 11:55:59 -05001571 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001572 blitRoutine = generate(state);
1573 blitCache.add(state, blitRoutine);
Alexis Hetu33642272019-03-01 11:55:59 -05001574 }
1575
Nicolas Capens157ba262019-12-10 17:49:14 -05001576 return blitRoutine;
1577}
1578
1579Blitter::CornerUpdateRoutineType Blitter::getCornerUpdateRoutine(const State &state)
1580{
1581 std::unique_lock<std::mutex> lock(cornerUpdateMutex);
1582 auto cornerUpdateRoutine = cornerUpdateCache.query(state);
1583
1584 if(!cornerUpdateRoutine)
Alexis Hetuf60a2d52019-05-09 14:16:05 -04001585 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001586 cornerUpdateRoutine = generateCornerUpdate(state);
1587 cornerUpdateCache.add(state, cornerUpdateRoutine);
Alexis Hetuf60a2d52019-05-09 14:16:05 -04001588 }
1589
Nicolas Capens157ba262019-12-10 17:49:14 -05001590 return cornerUpdateRoutine;
1591}
1592
1593void Blitter::blitToBuffer(const vk::Image *src, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *dst, int bufferRowPitch, int bufferSlicePitch)
1594{
1595 auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
1596 auto format = src->getFormat(aspect);
Ben Claytonfccfc562019-12-17 20:37:31 +00001597 State state(format, format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT, Options{ false, false });
Nicolas Capens157ba262019-12-10 17:49:14 -05001598
1599 auto blitRoutine = getBlitRoutine(state);
1600 if(!blitRoutine)
Chris Forbes529eda32019-05-08 10:27:05 -07001601 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001602 return;
Chris Forbes529eda32019-05-08 10:27:05 -07001603 }
1604
Ben Claytonfccfc562019-12-17 20:37:31 +00001605 BlitData data = {
1606 nullptr, // source
1607 dst, // dest
1608 src->rowPitchBytes(aspect, subresource.mipLevel), // sPitchB
1609 bufferRowPitch, // dPitchB
1610 src->slicePitchBytes(aspect, subresource.mipLevel), // sSliceB
1611 bufferSlicePitch, // dSliceB
Chris Forbes529eda32019-05-08 10:27:05 -07001612
Nicolas Capens157ba262019-12-10 17:49:14 -05001613 0, 0, 1, 1,
Chris Forbes529eda32019-05-08 10:27:05 -07001614
Ben Claytonfccfc562019-12-17 20:37:31 +00001615 0, // y0d
1616 static_cast<int>(extent.height), // y1d
1617 0, // x0d
1618 static_cast<int>(extent.width), // x1d
Chris Forbes529eda32019-05-08 10:27:05 -07001619
Ben Claytonfccfc562019-12-17 20:37:31 +00001620 static_cast<int>(extent.width), // sWidth
1621 static_cast<int>(extent.height) // sHeight;
Nicolas Capens157ba262019-12-10 17:49:14 -05001622 };
Chris Forbes529eda32019-05-08 10:27:05 -07001623
Nicolas Capens157ba262019-12-10 17:49:14 -05001624 VkOffset3D srcOffset = { 0, 0, offset.z };
Chris Forbes529eda32019-05-08 10:27:05 -07001625
Nicolas Capens157ba262019-12-10 17:49:14 -05001626 VkImageSubresourceLayers srcSubresLayers = subresource;
1627 srcSubresLayers.layerCount = 1;
Chris Forbes529eda32019-05-08 10:27:05 -07001628
Ben Claytonfccfc562019-12-17 20:37:31 +00001629 VkImageSubresourceRange srcSubresRange = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001630 subresource.aspectMask,
1631 subresource.mipLevel,
1632 1,
1633 subresource.baseArrayLayer,
1634 subresource.layerCount
1635 };
Alexis Hetu33642272019-03-01 11:55:59 -05001636
Nicolas Capens157ba262019-12-10 17:49:14 -05001637 uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange);
Alexis Hetu33642272019-03-01 11:55:59 -05001638
Nicolas Capens157ba262019-12-10 17:49:14 -05001639 for(; srcSubresLayers.baseArrayLayer <= lastLayer; srcSubresLayers.baseArrayLayer++)
Alexis Hetub317d962019-04-29 14:07:31 -04001640 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001641 srcOffset.z = offset.z;
Alexis Hetub317d962019-04-29 14:07:31 -04001642
Nicolas Capens157ba262019-12-10 17:49:14 -05001643 for(auto i = 0u; i < extent.depth; i++)
Alexis Hetub317d962019-04-29 14:07:31 -04001644 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001645 data.source = src->getTexelPointer(srcOffset, srcSubresLayers);
1646 ASSERT(data.source < src->end());
1647 blitRoutine(&data);
1648 srcOffset.z++;
1649 data.dest = (dst += bufferSlicePitch);
Alexis Hetub317d962019-04-29 14:07:31 -04001650 }
1651 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001652}
Nicolas Capens157ba262019-12-10 17:49:14 -05001653
1654void Blitter::blitFromBuffer(const vk::Image *dst, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *src, int bufferRowPitch, int bufferSlicePitch)
1655{
1656 auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
1657 auto format = dst->getFormat(aspect);
Ben Claytonfccfc562019-12-17 20:37:31 +00001658 State state(format, format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT, Options{ false, false });
Nicolas Capens157ba262019-12-10 17:49:14 -05001659
1660 auto blitRoutine = getBlitRoutine(state);
1661 if(!blitRoutine)
1662 {
1663 return;
1664 }
1665
Ben Claytonfccfc562019-12-17 20:37:31 +00001666 BlitData data = {
1667 src, // source
1668 nullptr, // dest
1669 bufferRowPitch, // sPitchB
1670 dst->rowPitchBytes(aspect, subresource.mipLevel), // dPitchB
1671 bufferSlicePitch, // sSliceB
1672 dst->slicePitchBytes(aspect, subresource.mipLevel), // dSliceB
Nicolas Capens157ba262019-12-10 17:49:14 -05001673
Ben Claytonfccfc562019-12-17 20:37:31 +00001674 static_cast<float>(-offset.x), // x0
1675 static_cast<float>(-offset.y), // y0
1676 1.0f, // w
1677 1.0f, // h
Nicolas Capens157ba262019-12-10 17:49:14 -05001678
Ben Claytonfccfc562019-12-17 20:37:31 +00001679 offset.y, // y0d
1680 static_cast<int>(offset.y + extent.height), // y1d
1681 offset.x, // x0d
1682 static_cast<int>(offset.x + extent.width), // x1d
Nicolas Capens157ba262019-12-10 17:49:14 -05001683
Ben Claytonfccfc562019-12-17 20:37:31 +00001684 static_cast<int>(extent.width), // sWidth
1685 static_cast<int>(extent.height) // sHeight;
Nicolas Capens157ba262019-12-10 17:49:14 -05001686 };
1687
1688 VkOffset3D dstOffset = { 0, 0, offset.z };
1689
1690 VkImageSubresourceLayers dstSubresLayers = subresource;
1691 dstSubresLayers.layerCount = 1;
1692
Ben Claytonfccfc562019-12-17 20:37:31 +00001693 VkImageSubresourceRange dstSubresRange = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001694 subresource.aspectMask,
1695 subresource.mipLevel,
1696 1,
1697 subresource.baseArrayLayer,
1698 subresource.layerCount
1699 };
1700
1701 uint32_t lastLayer = dst->getLastLayerIndex(dstSubresRange);
1702
1703 for(; dstSubresLayers.baseArrayLayer <= lastLayer; dstSubresLayers.baseArrayLayer++)
1704 {
1705 dstOffset.z = offset.z;
1706
1707 for(auto i = 0u; i < extent.depth; i++)
1708 {
1709 data.dest = dst->getTexelPointer(dstOffset, dstSubresLayers);
1710 ASSERT(data.dest < dst->end());
1711 blitRoutine(&data);
1712 dstOffset.z++;
1713 data.source = (src += bufferSlicePitch);
1714 }
1715 }
1716}
1717
1718void Blitter::blit(const vk::Image *src, vk::Image *dst, VkImageBlit region, VkFilter filter)
1719{
1720 if(dst->getFormat() == VK_FORMAT_UNDEFINED)
1721 {
1722 return;
1723 }
1724
1725 if((region.srcSubresource.layerCount != region.dstSubresource.layerCount) ||
1726 (region.srcSubresource.aspectMask != region.dstSubresource.aspectMask))
1727 {
1728 UNIMPLEMENTED("region");
1729 }
1730
1731 if(region.dstOffsets[0].x > region.dstOffsets[1].x)
1732 {
1733 std::swap(region.srcOffsets[0].x, region.srcOffsets[1].x);
1734 std::swap(region.dstOffsets[0].x, region.dstOffsets[1].x);
1735 }
1736
1737 if(region.dstOffsets[0].y > region.dstOffsets[1].y)
1738 {
1739 std::swap(region.srcOffsets[0].y, region.srcOffsets[1].y);
1740 std::swap(region.dstOffsets[0].y, region.dstOffsets[1].y);
1741 }
1742
1743 VkImageAspectFlagBits srcAspect = static_cast<VkImageAspectFlagBits>(region.srcSubresource.aspectMask);
1744 VkImageAspectFlagBits dstAspect = static_cast<VkImageAspectFlagBits>(region.dstSubresource.aspectMask);
1745 VkExtent3D srcExtent = src->getMipLevelExtent(srcAspect, region.srcSubresource.mipLevel);
1746
1747 int32_t numSlices = (region.srcOffsets[1].z - region.srcOffsets[0].z);
1748 ASSERT(numSlices == (region.dstOffsets[1].z - region.dstOffsets[0].z));
1749
1750 float widthRatio = static_cast<float>(region.srcOffsets[1].x - region.srcOffsets[0].x) /
1751 static_cast<float>(region.dstOffsets[1].x - region.dstOffsets[0].x);
1752 float heightRatio = static_cast<float>(region.srcOffsets[1].y - region.srcOffsets[0].y) /
1753 static_cast<float>(region.dstOffsets[1].y - region.dstOffsets[0].y);
1754 float x0 = region.srcOffsets[0].x + (0.5f - region.dstOffsets[0].x) * widthRatio;
1755 float y0 = region.srcOffsets[0].y + (0.5f - region.dstOffsets[0].y) * heightRatio;
1756
1757 auto srcFormat = src->getFormat(srcAspect);
1758 auto dstFormat = dst->getFormat(dstAspect);
1759
1760 bool doFilter = (filter != VK_FILTER_NEAREST);
1761 bool allowSRGBConversion =
Ben Claytonfccfc562019-12-17 20:37:31 +00001762 doFilter ||
1763 (src->getSampleCountFlagBits() > 1) ||
1764 (srcFormat.isSRGBformat() != dstFormat.isSRGBformat());
Nicolas Capens157ba262019-12-10 17:49:14 -05001765
1766 State state(src->getFormat(srcAspect), dst->getFormat(dstAspect), src->getSampleCountFlagBits(), dst->getSampleCountFlagBits(),
1767 Options{ doFilter, allowSRGBConversion });
1768 state.clampToEdge = (region.srcOffsets[0].x < 0) ||
1769 (region.srcOffsets[0].y < 0) ||
1770 (static_cast<uint32_t>(region.srcOffsets[1].x) > srcExtent.width) ||
1771 (static_cast<uint32_t>(region.srcOffsets[1].y) > srcExtent.height) ||
1772 (doFilter && ((x0 < 0.5f) || (y0 < 0.5f)));
1773
1774 auto blitRoutine = getBlitRoutine(state);
1775 if(!blitRoutine)
1776 {
1777 return;
1778 }
1779
Ben Claytonfccfc562019-12-17 20:37:31 +00001780 BlitData data = {
1781 nullptr, // source
1782 nullptr, // dest
1783 src->rowPitchBytes(srcAspect, region.srcSubresource.mipLevel), // sPitchB
1784 dst->rowPitchBytes(dstAspect, region.dstSubresource.mipLevel), // dPitchB
1785 src->slicePitchBytes(srcAspect, region.srcSubresource.mipLevel), // sSliceB
1786 dst->slicePitchBytes(dstAspect, region.dstSubresource.mipLevel), // dSliceB
Nicolas Capens157ba262019-12-10 17:49:14 -05001787
1788 x0,
1789 y0,
1790 widthRatio,
1791 heightRatio,
1792
Ben Claytonfccfc562019-12-17 20:37:31 +00001793 region.dstOffsets[0].y, // y0d
1794 region.dstOffsets[1].y, // y1d
1795 region.dstOffsets[0].x, // x0d
1796 region.dstOffsets[1].x, // x1d
Nicolas Capens157ba262019-12-10 17:49:14 -05001797
Ben Claytonfccfc562019-12-17 20:37:31 +00001798 static_cast<int>(srcExtent.width), // sWidth
1799 static_cast<int>(srcExtent.height) // sHeight;
Nicolas Capens157ba262019-12-10 17:49:14 -05001800 };
1801
1802 VkOffset3D srcOffset = { 0, 0, region.srcOffsets[0].z };
1803 VkOffset3D dstOffset = { 0, 0, region.dstOffsets[0].z };
1804
Ben Claytonfccfc562019-12-17 20:37:31 +00001805 VkImageSubresourceLayers srcSubresLayers = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001806 region.srcSubresource.aspectMask,
1807 region.srcSubresource.mipLevel,
1808 region.srcSubresource.baseArrayLayer,
1809 1
1810 };
1811
Ben Claytonfccfc562019-12-17 20:37:31 +00001812 VkImageSubresourceLayers dstSubresLayers = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001813 region.dstSubresource.aspectMask,
1814 region.dstSubresource.mipLevel,
1815 region.dstSubresource.baseArrayLayer,
1816 1
1817 };
1818
Ben Claytonfccfc562019-12-17 20:37:31 +00001819 VkImageSubresourceRange srcSubresRange = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001820 region.srcSubresource.aspectMask,
1821 region.srcSubresource.mipLevel,
1822 1,
1823 region.srcSubresource.baseArrayLayer,
1824 region.srcSubresource.layerCount
1825 };
1826
1827 uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange);
1828
1829 for(; srcSubresLayers.baseArrayLayer <= lastLayer; srcSubresLayers.baseArrayLayer++, dstSubresLayers.baseArrayLayer++)
1830 {
1831 srcOffset.z = region.srcOffsets[0].z;
1832 dstOffset.z = region.dstOffsets[0].z;
1833
1834 for(int i = 0; i < numSlices; i++)
1835 {
1836 data.source = src->getTexelPointer(srcOffset, srcSubresLayers);
1837 data.dest = dst->getTexelPointer(dstOffset, dstSubresLayers);
1838
1839 ASSERT(data.source < src->end());
1840 ASSERT(data.dest < dst->end());
1841
1842 blitRoutine(&data);
1843 srcOffset.z++;
1844 dstOffset.z++;
1845 }
1846 }
1847}
1848
Ben Claytonfccfc562019-12-17 20:37:31 +00001849void Blitter::computeCubeCorner(Pointer<Byte> &layer, Int &x0, Int &x1, Int &y0, Int &y1, Int &pitchB, const State &state)
Nicolas Capens157ba262019-12-10 17:49:14 -05001850{
1851 int bytes = state.sourceFormat.bytes();
1852
1853 Float4 c = readFloat4(layer + ComputeOffset(x0, y1, pitchB, bytes), state) +
1854 readFloat4(layer + ComputeOffset(x1, y0, pitchB, bytes), state) +
1855 readFloat4(layer + ComputeOffset(x1, y1, pitchB, bytes), state);
1856
1857 c *= Float4(1.0f / 3.0f);
1858
1859 write(c, layer + ComputeOffset(x0, y0, pitchB, bytes), state);
1860}
1861
Ben Claytonfccfc562019-12-17 20:37:31 +00001862Blitter::CornerUpdateRoutineType Blitter::generateCornerUpdate(const State &state)
Nicolas Capens157ba262019-12-10 17:49:14 -05001863{
1864 // Reading and writing from/to the same image
1865 ASSERT(state.sourceFormat == state.destFormat);
1866 ASSERT(state.srcSamples == state.destSamples);
1867
1868 if(state.srcSamples != 1)
1869 {
1870 UNIMPLEMENTED("state.srcSamples %d", state.srcSamples);
1871 }
1872
1873 CornerUpdateFunction function;
1874 {
1875 Pointer<Byte> blit(function.Arg<0>());
1876
1877 Pointer<Byte> layers = *Pointer<Pointer<Byte>>(blit + OFFSET(CubeBorderData, layers));
1878 Int pitchB = *Pointer<Int>(blit + OFFSET(CubeBorderData, pitchB));
1879 UInt layerSize = *Pointer<Int>(blit + OFFSET(CubeBorderData, layerSize));
1880 UInt dim = *Pointer<Int>(blit + OFFSET(CubeBorderData, dim));
1881
1882 // Low Border, Low Pixel, High Border, High Pixel
Ben Claytonfccfc562019-12-17 20:37:31 +00001883 Int LB(-1), LP(0), HB(dim), HP(dim - 1);
Nicolas Capens157ba262019-12-10 17:49:14 -05001884
1885 for(int face = 0; face < 6; face++)
1886 {
1887 computeCubeCorner(layers, LB, LP, LB, LP, pitchB, state);
1888 computeCubeCorner(layers, LB, LP, HB, HP, pitchB, state);
1889 computeCubeCorner(layers, HB, HP, LB, LP, pitchB, state);
1890 computeCubeCorner(layers, HB, HP, HB, HP, pitchB, state);
1891 layers = layers + layerSize;
1892 }
1893 }
1894
1895 return function("BlitRoutine");
1896}
1897
Ben Claytonfccfc562019-12-17 20:37:31 +00001898void Blitter::updateBorders(vk::Image *image, const VkImageSubresourceLayers &subresourceLayers)
Nicolas Capens157ba262019-12-10 17:49:14 -05001899{
1900 if(image->getArrayLayers() < (subresourceLayers.baseArrayLayer + 6))
1901 {
1902 UNIMPLEMENTED("image->getArrayLayers() %d, baseArrayLayer %d",
1903 image->getArrayLayers(), subresourceLayers.baseArrayLayer);
1904 }
1905
1906 // From Vulkan 1.1 spec, section 11.5. Image Views:
1907 // "For cube and cube array image views, the layers of the image view starting
1908 // at baseArrayLayer correspond to faces in the order +X, -X, +Y, -Y, +Z, -Z."
1909 VkImageSubresourceLayers posX = subresourceLayers;
1910 posX.layerCount = 1;
1911 VkImageSubresourceLayers negX = posX;
1912 negX.baseArrayLayer++;
1913 VkImageSubresourceLayers posY = negX;
1914 posY.baseArrayLayer++;
1915 VkImageSubresourceLayers negY = posY;
1916 negY.baseArrayLayer++;
1917 VkImageSubresourceLayers posZ = negY;
1918 posZ.baseArrayLayer++;
1919 VkImageSubresourceLayers negZ = posZ;
1920 negZ.baseArrayLayer++;
1921
1922 // Copy top / bottom
1923 copyCubeEdge(image, posX, BOTTOM, negY, RIGHT);
1924 copyCubeEdge(image, posY, BOTTOM, posZ, TOP);
1925 copyCubeEdge(image, posZ, BOTTOM, negY, TOP);
1926 copyCubeEdge(image, negX, BOTTOM, negY, LEFT);
1927 copyCubeEdge(image, negY, BOTTOM, negZ, BOTTOM);
1928 copyCubeEdge(image, negZ, BOTTOM, negY, BOTTOM);
1929
1930 copyCubeEdge(image, posX, TOP, posY, RIGHT);
1931 copyCubeEdge(image, posY, TOP, negZ, TOP);
1932 copyCubeEdge(image, posZ, TOP, posY, BOTTOM);
1933 copyCubeEdge(image, negX, TOP, posY, LEFT);
1934 copyCubeEdge(image, negY, TOP, posZ, BOTTOM);
1935 copyCubeEdge(image, negZ, TOP, posY, TOP);
1936
1937 // Copy left / right
1938 copyCubeEdge(image, posX, RIGHT, negZ, LEFT);
1939 copyCubeEdge(image, posY, RIGHT, posX, TOP);
1940 copyCubeEdge(image, posZ, RIGHT, posX, LEFT);
1941 copyCubeEdge(image, negX, RIGHT, posZ, LEFT);
1942 copyCubeEdge(image, negY, RIGHT, posX, BOTTOM);
1943 copyCubeEdge(image, negZ, RIGHT, negX, LEFT);
1944
1945 copyCubeEdge(image, posX, LEFT, posZ, RIGHT);
1946 copyCubeEdge(image, posY, LEFT, negX, TOP);
1947 copyCubeEdge(image, posZ, LEFT, negX, RIGHT);
1948 copyCubeEdge(image, negX, LEFT, negZ, RIGHT);
1949 copyCubeEdge(image, negY, LEFT, negX, BOTTOM);
1950 copyCubeEdge(image, negZ, LEFT, posX, RIGHT);
1951
1952 // Compute corner colors
1953 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceLayers.aspectMask);
1954 vk::Format format = image->getFormat(aspect);
1955 VkSampleCountFlagBits samples = image->getSampleCountFlagBits();
1956 State state(format, format, samples, samples, Options{ 0xF });
1957
1958 if(samples != VK_SAMPLE_COUNT_1_BIT)
1959 {
1960 UNIMPLEMENTED("Multi-sampled cube: %d samples", static_cast<int>(samples));
1961 }
1962
1963 auto cornerUpdateRoutine = getCornerUpdateRoutine(state);
1964 if(!cornerUpdateRoutine)
1965 {
1966 return;
1967 }
1968
1969 VkExtent3D extent = image->getMipLevelExtent(aspect, subresourceLayers.mipLevel);
Ben Claytonfccfc562019-12-17 20:37:31 +00001970 CubeBorderData data = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001971 image->getTexelPointer({ 0, 0, 0 }, posX),
1972 image->rowPitchBytes(aspect, subresourceLayers.mipLevel),
1973 static_cast<uint32_t>(image->getLayerSize(aspect)),
1974 extent.width
1975 };
1976 cornerUpdateRoutine(&data);
1977}
1978
Ben Claytonfccfc562019-12-17 20:37:31 +00001979void Blitter::copyCubeEdge(vk::Image *image,
1980 const VkImageSubresourceLayers &dstSubresourceLayers, Edge dstEdge,
1981 const VkImageSubresourceLayers &srcSubresourceLayers, Edge srcEdge)
Nicolas Capens157ba262019-12-10 17:49:14 -05001982{
1983 ASSERT(srcSubresourceLayers.aspectMask == dstSubresourceLayers.aspectMask);
1984 ASSERT(srcSubresourceLayers.mipLevel == dstSubresourceLayers.mipLevel);
1985 ASSERT(srcSubresourceLayers.baseArrayLayer != dstSubresourceLayers.baseArrayLayer);
1986 ASSERT(srcSubresourceLayers.layerCount == 1);
1987 ASSERT(dstSubresourceLayers.layerCount == 1);
1988
1989 // Figure out if the edges to be copied in reverse order respectively from one another
1990 // The copy should be reversed whenever the same edges are contiguous or if we're
1991 // copying top <-> right or bottom <-> left. This is explained by the layout, which is:
1992 //
1993 // | +y |
1994 // | -x | +z | +x | -z |
1995 // | -y |
1996
1997 bool reverse = (srcEdge == dstEdge) ||
1998 ((srcEdge == TOP) && (dstEdge == RIGHT)) ||
1999 ((srcEdge == RIGHT) && (dstEdge == TOP)) ||
2000 ((srcEdge == BOTTOM) && (dstEdge == LEFT)) ||
2001 ((srcEdge == LEFT) && (dstEdge == BOTTOM));
2002
2003 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(srcSubresourceLayers.aspectMask);
2004 int bytes = image->getFormat(aspect).bytes();
2005 int pitchB = image->rowPitchBytes(aspect, srcSubresourceLayers.mipLevel);
2006
2007 VkExtent3D extent = image->getMipLevelExtent(aspect, srcSubresourceLayers.mipLevel);
2008 int w = extent.width;
2009 int h = extent.height;
2010 if(w != h)
2011 {
2012 UNSUPPORTED("Cube doesn't have square faces : (%d, %d)", w, h);
2013 }
2014
2015 // Src is expressed in the regular [0, width-1], [0, height-1] space
2016 bool srcHorizontal = ((srcEdge == TOP) || (srcEdge == BOTTOM));
2017 int srcDelta = srcHorizontal ? bytes : pitchB;
2018 VkOffset3D srcOffset = { (srcEdge == RIGHT) ? (w - 1) : 0, (srcEdge == BOTTOM) ? (h - 1) : 0, 0 };
2019
2020 // Dst contains borders, so it is expressed in the [-1, width], [-1, height] space
2021 bool dstHorizontal = ((dstEdge == TOP) || (dstEdge == BOTTOM));
2022 int dstDelta = (dstHorizontal ? bytes : pitchB) * (reverse ? -1 : 1);
2023 VkOffset3D dstOffset = { (dstEdge == RIGHT) ? w : -1, (dstEdge == BOTTOM) ? h : -1, 0 };
2024
2025 // Don't write in the corners
2026 if(dstHorizontal)
2027 {
2028 dstOffset.x += reverse ? w : 1;
2029 }
2030 else
2031 {
2032 dstOffset.y += reverse ? h : 1;
2033 }
2034
Ben Claytonfccfc562019-12-17 20:37:31 +00002035 const uint8_t *src = static_cast<const uint8_t *>(image->getTexelPointer(srcOffset, srcSubresourceLayers));
2036 uint8_t *dst = static_cast<uint8_t *>(image->getTexelPointer(dstOffset, dstSubresourceLayers));
Nicolas Capens157ba262019-12-10 17:49:14 -05002037 ASSERT((src < image->end()) && ((src + (w * srcDelta)) < image->end()));
2038 ASSERT((dst < image->end()) && ((dst + (w * dstDelta)) < image->end()));
2039
2040 for(int i = 0; i < w; ++i, dst += dstDelta, src += srcDelta)
2041 {
2042 memcpy(dst, src, bytes);
2043 }
2044}
2045
Ben Claytonfccfc562019-12-17 20:37:31 +00002046} // namespace sw