blob: 958da630b0bcade5c58048e0c22f7a475e1291a5 [file] [log] [blame]
Nicolas Capens68a82382018-10-02 13:16:55 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Blitter.hpp"
16
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050017#include "Pipeline/ShaderCore.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040018#include "Reactor/Reactor.hpp"
Nicolas Capens02cbe8e2019-08-05 15:10:05 -040019#include "System/Half.hpp"
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050020#include "System/Memory.hpp"
Ben Claytonfccfc562019-12-17 20:37:31 +000021#include "Vulkan/VkBuffer.hpp"
Chris Forbesebe5f7f2019-01-16 10:38:34 -080022#include "Vulkan/VkDebug.hpp"
Alexis Hetu33642272019-03-01 11:55:59 -050023#include "Vulkan/VkImage.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040024
Nicolas Capensb8c63932019-03-19 01:52:40 -040025#include <utility>
26
Alexis Hetu3716c202019-12-19 17:09:08 -050027namespace {
28rr::RValue<rr::Int> PackFields(rr::Int4 const &ints, const sw::int4 shifts)
29{
30 return (rr::Int(ints.x) << shifts[0]) |
31 (rr::Int(ints.y) << shifts[1]) |
32 (rr::Int(ints.z) << shifts[2]) |
33 (rr::Int(ints.w) << shifts[3]);
34}
35} // namespace
36
Nicolas Capens157ba262019-12-10 17:49:14 -050037namespace sw {
38
Ben Claytonfccfc562019-12-17 20:37:31 +000039Blitter::Blitter()
40 : blitMutex()
41 , blitCache(1024)
42 , cornerUpdateMutex()
43 , cornerUpdateCache(64) // We only need one of these per format
Nicolas Capens68a82382018-10-02 13:16:55 -040044{
Nicolas Capens157ba262019-12-10 17:49:14 -050045}
46
47Blitter::~Blitter()
48{
49}
50
Ben Claytonfccfc562019-12-17 20:37:31 +000051void Blitter::clear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea)
Nicolas Capens157ba262019-12-10 17:49:14 -050052{
53 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
54 vk::Format dstFormat = viewFormat.getAspectFormat(aspect);
55 if(dstFormat == VK_FORMAT_UNDEFINED)
Nicolas Capens68a82382018-10-02 13:16:55 -040056 {
Nicolas Capens157ba262019-12-10 17:49:14 -050057 return;
Nicolas Capens68a82382018-10-02 13:16:55 -040058 }
59
Nicolas Capens157ba262019-12-10 17:49:14 -050060 float *pPixel = static_cast<float *>(pixel);
Nicolas Capens81bc9d92019-12-16 15:05:57 -050061 if(viewFormat.isUnsignedNormalized())
Nicolas Capens68a82382018-10-02 13:16:55 -040062 {
Nicolas Capens157ba262019-12-10 17:49:14 -050063 pPixel[0] = sw::clamp(pPixel[0], 0.0f, 1.0f);
64 pPixel[1] = sw::clamp(pPixel[1], 0.0f, 1.0f);
65 pPixel[2] = sw::clamp(pPixel[2], 0.0f, 1.0f);
66 pPixel[3] = sw::clamp(pPixel[3], 0.0f, 1.0f);
67 }
Nicolas Capens81bc9d92019-12-16 15:05:57 -050068 else if(viewFormat.isSignedNormalized())
Nicolas Capens157ba262019-12-10 17:49:14 -050069 {
70 pPixel[0] = sw::clamp(pPixel[0], -1.0f, 1.0f);
71 pPixel[1] = sw::clamp(pPixel[1], -1.0f, 1.0f);
72 pPixel[2] = sw::clamp(pPixel[2], -1.0f, 1.0f);
73 pPixel[3] = sw::clamp(pPixel[3], -1.0f, 1.0f);
Nicolas Capens68a82382018-10-02 13:16:55 -040074 }
75
Nicolas Capens157ba262019-12-10 17:49:14 -050076 if(fastClear(pixel, format, dest, dstFormat, subresourceRange, renderArea))
Alexis Hetu33642272019-03-01 11:55:59 -050077 {
Nicolas Capens157ba262019-12-10 17:49:14 -050078 return;
79 }
80
81 State state(format, dstFormat, 1, dest->getSampleCountFlagBits(), Options{ 0xF });
82 auto blitRoutine = getBlitRoutine(state);
83 if(!blitRoutine)
84 {
85 return;
86 }
87
Ben Claytonfccfc562019-12-17 20:37:31 +000088 VkImageSubresourceLayers subresLayers = {
Nicolas Capens157ba262019-12-10 17:49:14 -050089 subresourceRange.aspectMask,
90 subresourceRange.baseMipLevel,
91 subresourceRange.baseArrayLayer,
92 1
93 };
94
95 uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange);
96 uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange);
97
98 VkRect2D area = { { 0, 0 }, { 0, 0 } };
99 if(renderArea)
100 {
101 ASSERT(subresourceRange.levelCount == 1);
102 area = *renderArea;
103 }
104
105 for(; subresLayers.mipLevel <= lastMipLevel; subresLayers.mipLevel++)
106 {
107 VkExtent3D extent = dest->getMipLevelExtent(aspect, subresLayers.mipLevel);
108 if(!renderArea)
Alexis Hetu33642272019-03-01 11:55:59 -0500109 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500110 area.extent.width = extent.width;
111 area.extent.height = extent.height;
Alexis Hetu33642272019-03-01 11:55:59 -0500112 }
113
Ben Claytonfccfc562019-12-17 20:37:31 +0000114 BlitData data = {
115 pixel, nullptr, // source, dest
Chris Forbes88289192019-08-28 16:49:36 -0700116
Ben Claytonfccfc562019-12-17 20:37:31 +0000117 format.bytes(), // sPitchB
118 dest->rowPitchBytes(aspect, subresLayers.mipLevel), // dPitchB
119 0, // sSliceB (unused in clear operations)
120 dest->slicePitchBytes(aspect, subresLayers.mipLevel), // dSliceB
Alexis Hetu33642272019-03-01 11:55:59 -0500121
Ben Claytonfccfc562019-12-17 20:37:31 +0000122 0.5f, 0.5f, 0.0f, 0.0f, // x0, y0, w, h
Alexis Hetu33642272019-03-01 11:55:59 -0500123
Ben Claytonfccfc562019-12-17 20:37:31 +0000124 area.offset.y, static_cast<int>(area.offset.y + area.extent.height), // y0d, y1d
125 area.offset.x, static_cast<int>(area.offset.x + area.extent.width), // x0d, x1d
Nicolas Capens157ba262019-12-10 17:49:14 -0500126
Ben Claytonfccfc562019-12-17 20:37:31 +0000127 0, 0, // sWidth, sHeight
Alexis Hetu33642272019-03-01 11:55:59 -0500128 };
129
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500130 if(renderArea && dest->is3DSlice())
Alexis Hetu33642272019-03-01 11:55:59 -0500131 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500132 // Reinterpret layers as depth slices
133 subresLayers.baseArrayLayer = 0;
134 subresLayers.layerCount = 1;
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500135 for(uint32_t depth = subresourceRange.baseArrayLayer; depth <= lastLayer; depth++)
Alexis Hetu33642272019-03-01 11:55:59 -0500136 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000137 data.dest = dest->getTexelPointer({ 0, 0, static_cast<int32_t>(depth) }, subresLayers);
Nicolas Capens157ba262019-12-10 17:49:14 -0500138 blitRoutine(&data);
Nicolas Capens68a82382018-10-02 13:16:55 -0400139 }
140 }
Nicolas Capens88ac3672019-08-01 13:22:34 -0400141 else
Nicolas Capens68a82382018-10-02 13:16:55 -0400142 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500143 for(subresLayers.baseArrayLayer = subresourceRange.baseArrayLayer; subresLayers.baseArrayLayer <= lastLayer; subresLayers.baseArrayLayer++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400144 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500145 for(uint32_t depth = 0; depth < extent.depth; depth++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400146 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500147 data.dest = dest->getTexelPointer({ 0, 0, static_cast<int32_t>(depth) }, subresLayers);
148
149 blitRoutine(&data);
150 }
151 }
152 }
153 }
154}
155
Ben Claytonfccfc562019-12-17 20:37:31 +0000156bool Blitter::fastClear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea)
Nicolas Capens157ba262019-12-10 17:49:14 -0500157{
158 if(format != VK_FORMAT_R32G32B32A32_SFLOAT)
159 {
160 return false;
161 }
162
Ben Claytonfccfc562019-12-17 20:37:31 +0000163 float *color = (float *)pixel;
Nicolas Capens157ba262019-12-10 17:49:14 -0500164 float r = color[0];
165 float g = color[1];
166 float b = color[2];
167 float a = color[3];
168
169 uint32_t packed;
170
171 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
172 switch(viewFormat)
173 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000174 case VK_FORMAT_R5G6B5_UNORM_PACK16:
175 packed = ((uint16_t)(31 * b + 0.5f) << 0) |
176 ((uint16_t)(63 * g + 0.5f) << 5) |
177 ((uint16_t)(31 * r + 0.5f) << 11);
178 break;
179 case VK_FORMAT_B5G6R5_UNORM_PACK16:
180 packed = ((uint16_t)(31 * r + 0.5f) << 0) |
181 ((uint16_t)(63 * g + 0.5f) << 5) |
182 ((uint16_t)(31 * b + 0.5f) << 11);
183 break;
184 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
185 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
186 case VK_FORMAT_R8G8B8A8_UNORM:
187 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
188 ((uint32_t)(255 * b + 0.5f) << 16) |
189 ((uint32_t)(255 * g + 0.5f) << 8) |
190 ((uint32_t)(255 * r + 0.5f) << 0);
191 break;
192 case VK_FORMAT_B8G8R8A8_UNORM:
193 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
194 ((uint32_t)(255 * r + 0.5f) << 16) |
195 ((uint32_t)(255 * g + 0.5f) << 8) |
196 ((uint32_t)(255 * b + 0.5f) << 0);
197 break;
198 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
199 packed = R11G11B10F(color);
200 break;
201 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
202 packed = RGB9E5(color);
203 break;
204 default:
205 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -0500206 }
207
Ben Claytonfccfc562019-12-17 20:37:31 +0000208 VkImageSubresourceLayers subresLayers = {
Nicolas Capens157ba262019-12-10 17:49:14 -0500209 subresourceRange.aspectMask,
210 subresourceRange.baseMipLevel,
211 subresourceRange.baseArrayLayer,
212 1
213 };
214 uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange);
215 uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange);
216
217 VkRect2D area = { { 0, 0 }, { 0, 0 } };
218 if(renderArea)
219 {
220 ASSERT(subresourceRange.levelCount == 1);
221 area = *renderArea;
222 }
223
224 for(; subresLayers.mipLevel <= lastMipLevel; subresLayers.mipLevel++)
225 {
226 int rowPitchBytes = dest->rowPitchBytes(aspect, subresLayers.mipLevel);
227 int slicePitchBytes = dest->slicePitchBytes(aspect, subresLayers.mipLevel);
228 VkExtent3D extent = dest->getMipLevelExtent(aspect, subresLayers.mipLevel);
229 if(!renderArea)
230 {
231 area.extent.width = extent.width;
232 area.extent.height = extent.height;
233 }
234 if(dest->is3DSlice())
235 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000236 extent.depth = 1; // The 3D image is instead interpreted as a 2D image with layers
Nicolas Capens157ba262019-12-10 17:49:14 -0500237 }
238
239 for(subresLayers.baseArrayLayer = subresourceRange.baseArrayLayer; subresLayers.baseArrayLayer <= lastLayer; subresLayers.baseArrayLayer++)
240 {
241 for(uint32_t depth = 0; depth < extent.depth; depth++)
242 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000243 uint8_t *slice = (uint8_t *)dest->getTexelPointer(
244 { area.offset.x, area.offset.y, static_cast<int32_t>(depth) }, subresLayers);
Nicolas Capens157ba262019-12-10 17:49:14 -0500245
246 for(int j = 0; j < dest->getSampleCountFlagBits(); j++)
247 {
248 uint8_t *d = slice;
249
250 switch(viewFormat.bytes())
251 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000252 case 2:
253 for(uint32_t i = 0; i < area.extent.height; i++)
254 {
255 ASSERT(d < dest->end());
256 sw::clear((uint16_t *)d, static_cast<uint16_t>(packed), area.extent.width);
257 d += rowPitchBytes;
258 }
259 break;
260 case 4:
261 for(uint32_t i = 0; i < area.extent.height; i++)
262 {
263 ASSERT(d < dest->end());
264 sw::clear((uint32_t *)d, packed, area.extent.width);
265 d += rowPitchBytes;
266 }
267 break;
268 default:
269 assert(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500270 }
271
272 slice += slicePitchBytes;
273 }
274 }
275 }
276 }
277
278 return true;
279}
280
281Float4 Blitter::readFloat4(Pointer<Byte> element, const State &state)
282{
283 Float4 c(0.0f, 0.0f, 0.0f, 1.0f);
284
285 switch(state.sourceFormat)
286 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000287 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
288 c.w = Float(Int(*Pointer<Byte>(element)) & Int(0xF));
289 c.x = Float((Int(*Pointer<Byte>(element)) >> 4) & Int(0xF));
290 c.y = Float(Int(*Pointer<Byte>(element + 1)) & Int(0xF));
291 c.z = Float((Int(*Pointer<Byte>(element + 1)) >> 4) & Int(0xF));
292 break;
293 case VK_FORMAT_R8_SINT:
294 case VK_FORMAT_R8_SNORM:
295 c.x = Float(Int(*Pointer<SByte>(element)));
296 c.w = float(0x7F);
297 break;
298 case VK_FORMAT_R8_UNORM:
299 case VK_FORMAT_R8_UINT:
300 case VK_FORMAT_R8_SRGB:
301 c.x = Float(Int(*Pointer<Byte>(element)));
302 c.w = float(0xFF);
303 break;
304 case VK_FORMAT_R16_SINT:
305 case VK_FORMAT_R16_SNORM:
306 c.x = Float(Int(*Pointer<Short>(element)));
307 c.w = float(0x7FFF);
308 break;
309 case VK_FORMAT_R16_UNORM:
310 case VK_FORMAT_R16_UINT:
311 c.x = Float(Int(*Pointer<UShort>(element)));
312 c.w = float(0xFFFF);
313 break;
314 case VK_FORMAT_R32_SINT:
315 c.x = Float(*Pointer<Int>(element));
316 c.w = float(0x7FFFFFFF);
317 break;
318 case VK_FORMAT_R32_UINT:
319 c.x = Float(*Pointer<UInt>(element));
320 c.w = float(0xFFFFFFFF);
321 break;
322 case VK_FORMAT_B8G8R8A8_SRGB:
323 case VK_FORMAT_B8G8R8A8_UNORM:
324 c = Float4(*Pointer<Byte4>(element)).zyxw;
325 break;
326 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
327 case VK_FORMAT_R8G8B8A8_SINT:
328 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
329 case VK_FORMAT_R8G8B8A8_SNORM:
330 c = Float4(*Pointer<SByte4>(element));
331 break;
332 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
333 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
334 case VK_FORMAT_R8G8B8A8_UNORM:
335 case VK_FORMAT_R8G8B8A8_UINT:
336 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
337 case VK_FORMAT_R8G8B8A8_SRGB:
338 c = Float4(*Pointer<Byte4>(element));
339 break;
340 case VK_FORMAT_R16G16B16A16_SINT:
341 c = Float4(*Pointer<Short4>(element));
342 break;
343 case VK_FORMAT_R16G16B16A16_UNORM:
344 case VK_FORMAT_R16G16B16A16_UINT:
345 c = Float4(*Pointer<UShort4>(element));
346 break;
347 case VK_FORMAT_R32G32B32A32_SINT:
348 c = Float4(*Pointer<Int4>(element));
349 break;
350 case VK_FORMAT_R32G32B32A32_UINT:
351 c = Float4(*Pointer<UInt4>(element));
352 break;
353 case VK_FORMAT_R8G8_SINT:
354 case VK_FORMAT_R8G8_SNORM:
355 c.x = Float(Int(*Pointer<SByte>(element + 0)));
356 c.y = Float(Int(*Pointer<SByte>(element + 1)));
357 c.w = float(0x7F);
358 break;
359 case VK_FORMAT_R8G8_UNORM:
360 case VK_FORMAT_R8G8_UINT:
361 case VK_FORMAT_R8G8_SRGB:
362 c.x = Float(Int(*Pointer<Byte>(element + 0)));
363 c.y = Float(Int(*Pointer<Byte>(element + 1)));
364 c.w = float(0xFF);
365 break;
366 case VK_FORMAT_R16G16_SINT:
367 case VK_FORMAT_R16G16_SNORM:
368 c.x = Float(Int(*Pointer<Short>(element + 0)));
369 c.y = Float(Int(*Pointer<Short>(element + 2)));
370 c.w = float(0x7FFF);
371 break;
372 case VK_FORMAT_R16G16_UNORM:
373 case VK_FORMAT_R16G16_UINT:
374 c.x = Float(Int(*Pointer<UShort>(element + 0)));
375 c.y = Float(Int(*Pointer<UShort>(element + 2)));
376 c.w = float(0xFFFF);
377 break;
378 case VK_FORMAT_R32G32_SINT:
379 c.x = Float(*Pointer<Int>(element + 0));
380 c.y = Float(*Pointer<Int>(element + 4));
381 c.w = float(0x7FFFFFFF);
382 break;
383 case VK_FORMAT_R32G32_UINT:
384 c.x = Float(*Pointer<UInt>(element + 0));
385 c.y = Float(*Pointer<UInt>(element + 4));
386 c.w = float(0xFFFFFFFF);
387 break;
388 case VK_FORMAT_R32G32B32A32_SFLOAT:
389 c = *Pointer<Float4>(element);
390 break;
391 case VK_FORMAT_R32G32_SFLOAT:
392 c.x = *Pointer<Float>(element + 0);
393 c.y = *Pointer<Float>(element + 4);
394 break;
395 case VK_FORMAT_R32_SFLOAT:
396 c.x = *Pointer<Float>(element);
397 break;
398 case VK_FORMAT_R16G16B16A16_SFLOAT:
399 c.w = Float(*Pointer<Half>(element + 6));
400 case VK_FORMAT_R16G16B16_SFLOAT:
401 c.z = Float(*Pointer<Half>(element + 4));
402 case VK_FORMAT_R16G16_SFLOAT:
403 c.y = Float(*Pointer<Half>(element + 2));
404 case VK_FORMAT_R16_SFLOAT:
405 c.x = Float(*Pointer<Half>(element));
406 break;
407 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
408 c = r11g11b10Unpack(*Pointer<UInt>(element));
409 break;
410 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
411 // This type contains a common 5 bit exponent (E) and a 9 bit the mantissa for R, G and B.
412 c.x = Float(*Pointer<UInt>(element) & UInt(0x000001FF)); // R's mantissa (bits 0-8)
413 c.y = Float((*Pointer<UInt>(element) & UInt(0x0003FE00)) >> 9); // G's mantissa (bits 9-17)
414 c.z = Float((*Pointer<UInt>(element) & UInt(0x07FC0000)) >> 18); // B's mantissa (bits 18-26)
415 c *= Float4(
416 // 2^E, using the exponent (bits 27-31) and treating it as an unsigned integer value
417 Float(UInt(1) << ((*Pointer<UInt>(element) & UInt(0xF8000000)) >> 27)) *
418 // Since the 9 bit mantissa values currently stored in RGB were converted straight
419 // from int to float (in the [0, 1<<9] range instead of the [0, 1] range), they
420 // are (1 << 9) times too high.
421 // Also, the exponent has 5 bits and we compute the exponent bias of floating point
422 // formats using "2^(k-1) - 1", so, in this case, the exponent bias is 2^(5-1)-1 = 15
423 // Exponent bias (15) + number of mantissa bits per component (9) = 24
424 Float(1.0f / (1 << 24)));
425 c.w = 1.0f;
426 break;
427 case VK_FORMAT_R5G6B5_UNORM_PACK16:
428 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
429 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
430 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
431 break;
432 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
433 c.w = Float(Int((*Pointer<UShort>(element) & UShort(0x8000)) >> UShort(15)));
434 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0x7C00)) >> UShort(10)));
435 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x03E0)) >> UShort(5)));
436 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
437 break;
438 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
439 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
440 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
441 c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
442 c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
443 c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
444 break;
445 case VK_FORMAT_D16_UNORM:
446 c.x = Float(Int((*Pointer<UShort>(element))));
447 break;
448 case VK_FORMAT_X8_D24_UNORM_PACK32:
449 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0xFFFFFF00)) >> 8));
450 break;
451 case VK_FORMAT_D32_SFLOAT:
452 c.x = *Pointer<Float>(element);
453 break;
454 case VK_FORMAT_S8_UINT:
455 c.x = Float(Int(*Pointer<Byte>(element)));
456 break;
457 default:
458 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -0500459 }
460
461 return c;
462}
463
464void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
465{
466 bool writeR = state.writeRed;
467 bool writeG = state.writeGreen;
468 bool writeB = state.writeBlue;
469 bool writeA = state.writeAlpha;
470 bool writeRGBA = writeR && writeG && writeB && writeA;
471
472 switch(state.destFormat)
473 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000474 case VK_FORMAT_R4G4_UNORM_PACK8:
475 if(writeR | writeG)
Nicolas Capens157ba262019-12-10 17:49:14 -0500476 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000477 if(!writeR)
478 {
479 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
480 (*Pointer<Byte>(element) & Byte(0xF0));
481 }
482 else if(!writeG)
483 {
484 *Pointer<Byte>(element) = (*Pointer<Byte>(element) & Byte(0xF)) |
485 (Byte(RoundInt(Float(c.x))) << Byte(4));
486 }
487 else
488 {
489 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
490 (Byte(RoundInt(Float(c.x))) << Byte(4));
491 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500492 }
Ben Claytonfccfc562019-12-17 20:37:31 +0000493 break;
494 case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
495 if(writeR || writeG || writeB || writeA)
Nicolas Capens157ba262019-12-10 17:49:14 -0500496 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000497 *Pointer<UShort>(element) = (writeR ? ((UShort(RoundInt(Float(c.x))) & UShort(0xF)) << UShort(12)) : (*Pointer<UShort>(element) & UShort(0x000F))) |
498 (writeG ? ((UShort(RoundInt(Float(c.y))) & UShort(0xF)) << UShort(8)) : (*Pointer<UShort>(element) & UShort(0x00F0))) |
499 (writeB ? ((UShort(RoundInt(Float(c.z))) & UShort(0xF)) << UShort(4)) : (*Pointer<UShort>(element) & UShort(0x0F00))) |
500 (writeA ? (UShort(RoundInt(Float(c.w))) & UShort(0xF)) : (*Pointer<UShort>(element) & UShort(0xF000)));
501 }
502 break;
503 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
504 if(writeRGBA)
505 {
506 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) & Int(0xF)) |
507 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
508 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
509 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12);
Nicolas Capens157ba262019-12-10 17:49:14 -0500510 }
511 else
512 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000513 unsigned short mask = (writeA ? 0x000F : 0x0000) |
514 (writeR ? 0x00F0 : 0x0000) |
515 (writeG ? 0x0F00 : 0x0000) |
516 (writeB ? 0xF000 : 0x0000);
517 unsigned short unmask = ~mask;
518 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
519 ((UShort(RoundInt(Float(c.w)) & Int(0xF)) |
520 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
521 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
522 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12)) &
523 UShort(mask));
Nicolas Capens157ba262019-12-10 17:49:14 -0500524 }
Ben Claytonfccfc562019-12-17 20:37:31 +0000525 break;
526 case VK_FORMAT_B8G8R8A8_SRGB:
527 case VK_FORMAT_B8G8R8A8_UNORM:
528 if(writeRGBA)
529 {
530 Short4 c0 = RoundShort4(c.zyxw);
531 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
532 }
533 else
534 {
535 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
536 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
537 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
538 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
539 }
540 break;
541 case VK_FORMAT_B8G8R8_SNORM:
542 if(writeB) { *Pointer<SByte>(element + 0) = SByte(RoundInt(Float(c.z))); }
543 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
544 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
545 break;
546 case VK_FORMAT_B8G8R8_UNORM:
547 case VK_FORMAT_B8G8R8_SRGB:
Nicolas Capens157ba262019-12-10 17:49:14 -0500548 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
549 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
550 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000551 break;
552 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
553 case VK_FORMAT_R8G8B8A8_UNORM:
554 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
555 case VK_FORMAT_R8G8B8A8_SRGB:
556 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
557 case VK_FORMAT_R8G8B8A8_UINT:
558 case VK_FORMAT_R8G8B8A8_USCALED:
559 case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
560 if(writeRGBA)
561 {
562 Short4 c0 = RoundShort4(c);
563 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
564 }
565 else
566 {
567 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
568 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
569 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
570 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
571 }
572 break;
573 case VK_FORMAT_R32G32B32A32_SFLOAT:
574 if(writeRGBA)
575 {
576 *Pointer<Float4>(element) = c;
577 }
578 else
579 {
580 if(writeR) { *Pointer<Float>(element) = c.x; }
581 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
582 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
583 if(writeA) { *Pointer<Float>(element + 12) = c.w; }
584 }
585 break;
586 case VK_FORMAT_R32G32B32_SFLOAT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500587 if(writeR) { *Pointer<Float>(element) = c.x; }
588 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
589 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
Ben Claytonfccfc562019-12-17 20:37:31 +0000590 break;
591 case VK_FORMAT_R32G32_SFLOAT:
592 if(writeR && writeG)
593 {
594 *Pointer<Float2>(element) = Float2(c);
595 }
596 else
597 {
598 if(writeR) { *Pointer<Float>(element) = c.x; }
599 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
600 }
601 break;
602 case VK_FORMAT_R32_SFLOAT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500603 if(writeR) { *Pointer<Float>(element) = c.x; }
Ben Claytonfccfc562019-12-17 20:37:31 +0000604 break;
605 case VK_FORMAT_R16G16B16A16_SFLOAT:
606 if(writeA) { *Pointer<Half>(element + 6) = Half(c.w); }
607 case VK_FORMAT_R16G16B16_SFLOAT:
608 if(writeB) { *Pointer<Half>(element + 4) = Half(c.z); }
609 case VK_FORMAT_R16G16_SFLOAT:
610 if(writeG) { *Pointer<Half>(element + 2) = Half(c.y); }
611 case VK_FORMAT_R16_SFLOAT:
612 if(writeR) { *Pointer<Half>(element) = Half(c.x); }
613 break;
614 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
Nicolas Capens157ba262019-12-10 17:49:14 -0500615 {
Alexis Hetu24c49dd2019-12-13 16:32:43 -0500616 UInt rgb = r11g11b10Pack(c);
Nicolas Capens157ba262019-12-10 17:49:14 -0500617
618 UInt old = *Pointer<UInt>(element);
619
620 unsigned int mask = (writeR ? 0x000007FF : 0) |
621 (writeG ? 0x003FF800 : 0) |
622 (writeB ? 0xFFC00000 : 0);
623
624 *Pointer<UInt>(element) = (rgb & mask) | (old & ~mask);
625 }
626 break;
Ben Claytonfccfc562019-12-17 20:37:31 +0000627 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
Nicolas Capens157ba262019-12-10 17:49:14 -0500628 {
629 ASSERT(writeRGBA); // Can't sensibly write just part of this format.
630
631 // Vulkan 1.1.117 section 15.2.1 RGB to Shared Exponent Conversion
632
633 constexpr int N = 9; // number of mantissa bits per component
634 constexpr int B = 15; // exponent bias
635 constexpr int E_max = 31; // maximum possible biased exponent value
636
637 // Maximum representable value.
638 constexpr float sharedexp_max = ((static_cast<float>(1 << N) - 1) / static_cast<float>(1 << N)) * static_cast<float>(1 << (E_max - B));
639
640 // Clamp components to valid range. NaN becomes 0.
Ben Claytonfccfc562019-12-17 20:37:31 +0000641 Float red_c = Min(IfThenElse(!(c.x > 0), Float(0), Float(c.x)), sharedexp_max);
Nicolas Capens157ba262019-12-10 17:49:14 -0500642 Float green_c = Min(IfThenElse(!(c.y > 0), Float(0), Float(c.y)), sharedexp_max);
Ben Claytonfccfc562019-12-17 20:37:31 +0000643 Float blue_c = Min(IfThenElse(!(c.z > 0), Float(0), Float(c.z)), sharedexp_max);
Nicolas Capens157ba262019-12-10 17:49:14 -0500644
645 // We're reducing the mantissa to 9 bits, so we must round up if the next
646 // bit is 1. In other words add 0.5 to the new mantissa's position and
647 // allow overflow into the exponent so we can scale correctly.
648 constexpr int half = 1 << (23 - N);
649 Float red_r = As<Float>(As<Int>(red_c) + half);
650 Float green_r = As<Float>(As<Int>(green_c) + half);
651 Float blue_r = As<Float>(As<Int>(blue_c) + half);
652
653 // The largest component determines the shared exponent. It can't be lower
654 // than 0 (after bias subtraction) so also limit to the mimimum representable.
655 constexpr float min_s = 0.5f / (1 << B);
656 Float max_s = Max(Max(red_r, green_r), Max(blue_r, min_s));
657
658 // Obtain the reciprocal of the shared exponent by inverting the bits,
659 // and scale by the new mantissa's size. Note that the IEEE-754 single-precision
660 // format has an implicit leading 1, but this shared component format does not.
661 Float scale = As<Float>((As<Int>(max_s) & 0x7F800000) ^ 0x7F800000) * (1 << (N - 2));
662
663 UInt R9 = RoundInt(red_c * scale);
664 UInt G9 = UInt(RoundInt(green_c * scale));
665 UInt B9 = UInt(RoundInt(blue_c * scale));
666 UInt E5 = (As<UInt>(max_s) >> 23) - 127 + 15 + 1;
667
668 UInt E5B9G9R9 = (E5 << 27) | (B9 << 18) | (G9 << 9) | R9;
669
670 *Pointer<UInt>(element) = E5B9G9R9;
671 }
672 break;
Ben Claytonfccfc562019-12-17 20:37:31 +0000673 case VK_FORMAT_B8G8R8A8_SNORM:
674 if(writeB) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.z))); }
675 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
676 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
677 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
678 break;
679 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
680 case VK_FORMAT_R8G8B8A8_SINT:
681 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
682 case VK_FORMAT_R8G8B8A8_SNORM:
683 case VK_FORMAT_R8G8B8A8_SSCALED:
684 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
685 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
686 case VK_FORMAT_R8G8B8_SINT:
687 case VK_FORMAT_R8G8B8_SNORM:
688 case VK_FORMAT_R8G8B8_SSCALED:
689 if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
690 case VK_FORMAT_R8G8_SINT:
691 case VK_FORMAT_R8G8_SNORM:
692 case VK_FORMAT_R8G8_SSCALED:
693 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
694 case VK_FORMAT_R8_SINT:
695 case VK_FORMAT_R8_SNORM:
696 case VK_FORMAT_R8_SSCALED:
697 if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
698 break;
699 case VK_FORMAT_R8G8B8_UINT:
700 case VK_FORMAT_R8G8B8_UNORM:
701 case VK_FORMAT_R8G8B8_USCALED:
702 case VK_FORMAT_R8G8B8_SRGB:
703 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
704 case VK_FORMAT_R8G8_UINT:
705 case VK_FORMAT_R8G8_UNORM:
706 case VK_FORMAT_R8G8_USCALED:
707 case VK_FORMAT_R8G8_SRGB:
708 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
709 case VK_FORMAT_R8_UINT:
710 case VK_FORMAT_R8_UNORM:
711 case VK_FORMAT_R8_USCALED:
712 case VK_FORMAT_R8_SRGB:
713 if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
714 break;
715 case VK_FORMAT_R16G16B16A16_SINT:
716 case VK_FORMAT_R16G16B16A16_SNORM:
717 case VK_FORMAT_R16G16B16A16_SSCALED:
718 if(writeRGBA)
719 {
720 *Pointer<Short4>(element) = Short4(RoundInt(c));
721 }
722 else
723 {
724 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
725 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
726 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
727 if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
728 }
729 break;
730 case VK_FORMAT_R16G16B16_SINT:
731 case VK_FORMAT_R16G16B16_SNORM:
732 case VK_FORMAT_R16G16B16_SSCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500733 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
734 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
735 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000736 break;
737 case VK_FORMAT_R16G16_SINT:
738 case VK_FORMAT_R16G16_SNORM:
739 case VK_FORMAT_R16G16_SSCALED:
740 if(writeR && writeG)
741 {
742 *Pointer<Short2>(element) = Short2(Short4(RoundInt(c)));
743 }
744 else
745 {
746 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
747 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
748 }
749 break;
750 case VK_FORMAT_R16_SINT:
751 case VK_FORMAT_R16_SNORM:
752 case VK_FORMAT_R16_SSCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500753 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000754 break;
755 case VK_FORMAT_R16G16B16A16_UINT:
756 case VK_FORMAT_R16G16B16A16_UNORM:
757 case VK_FORMAT_R16G16B16A16_USCALED:
758 if(writeRGBA)
759 {
760 *Pointer<UShort4>(element) = UShort4(RoundInt(c));
761 }
762 else
763 {
764 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
765 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
766 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
767 if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
768 }
769 break;
770 case VK_FORMAT_R16G16B16_UINT:
771 case VK_FORMAT_R16G16B16_UNORM:
772 case VK_FORMAT_R16G16B16_USCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500773 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
774 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
775 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000776 break;
777 case VK_FORMAT_R16G16_UINT:
778 case VK_FORMAT_R16G16_UNORM:
779 case VK_FORMAT_R16G16_USCALED:
780 if(writeR && writeG)
781 {
782 *Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c)));
783 }
784 else
785 {
786 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
787 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
788 }
789 break;
790 case VK_FORMAT_R16_UINT:
791 case VK_FORMAT_R16_UNORM:
792 case VK_FORMAT_R16_USCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500793 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000794 break;
795 case VK_FORMAT_R32G32B32A32_SINT:
796 if(writeRGBA)
797 {
798 *Pointer<Int4>(element) = RoundInt(c);
799 }
800 else
801 {
802 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
803 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
804 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
805 if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
806 }
807 break;
808 case VK_FORMAT_R32G32B32_SINT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500809 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000810 case VK_FORMAT_R32G32_SINT:
811 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
812 case VK_FORMAT_R32_SINT:
813 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
814 break;
815 case VK_FORMAT_R32G32B32A32_UINT:
816 if(writeRGBA)
817 {
818 *Pointer<UInt4>(element) = UInt4(RoundInt(c));
819 }
820 else
821 {
822 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
823 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
824 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
825 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
826 }
827 break;
828 case VK_FORMAT_R32G32B32_UINT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500829 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000830 case VK_FORMAT_R32G32_UINT:
831 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
832 case VK_FORMAT_R32_UINT:
833 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
834 break;
835 case VK_FORMAT_R5G6B5_UNORM_PACK16:
836 if(writeR && writeG && writeB)
837 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500838 *Pointer<UShort>(element) = UShort(PackFields(RoundInt(c.xyzz), { 11, 5, 0, 0 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000839 }
840 else
841 {
842 unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
843 unsigned short unmask = ~mask;
844 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500845 (UShort(PackFields(RoundInt(c.xyzz), { 11, 5, 0, 0 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000846 UShort(mask));
847 }
848 break;
849 case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
850 if(writeRGBA)
851 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500852 *Pointer<UShort>(element) = UShort(PackFields(RoundInt(c), { 11, 6, 1, 0 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000853 }
854 else
855 {
856 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
857 (writeR ? 0x7C00 : 0x0000) |
858 (writeG ? 0x03E0 : 0x0000) |
859 (writeB ? 0x001F : 0x0000);
860 unsigned short unmask = ~mask;
861 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500862 (UShort(PackFields(RoundInt(c), { 11, 6, 1, 0 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000863 UShort(mask));
864 }
865 break;
866 case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
867 if(writeRGBA)
868 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500869 *Pointer<UShort>(element) = UShort(PackFields(RoundInt(c), { 1, 6, 11, 0 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000870 }
871 else
872 {
873 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
874 (writeR ? 0x7C00 : 0x0000) |
875 (writeG ? 0x03E0 : 0x0000) |
876 (writeB ? 0x001F : 0x0000);
877 unsigned short unmask = ~mask;
878 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500879 (UShort(PackFields(RoundInt(c), { 1, 6, 11, 0 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000880 UShort(mask));
881 }
882 break;
883 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
884 if(writeRGBA)
885 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500886 *Pointer<UShort>(element) = UShort(PackFields(RoundInt(c), { 10, 5, 0, 15 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000887 }
888 else
889 {
890 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
891 (writeR ? 0x7C00 : 0x0000) |
892 (writeG ? 0x03E0 : 0x0000) |
893 (writeB ? 0x001F : 0x0000);
894 unsigned short unmask = ~mask;
895 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500896 (UShort(PackFields(RoundInt(c), { 10, 5, 0, 15 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000897 UShort(mask));
898 }
899 break;
900 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
901 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
902 case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
903 if(writeRGBA)
904 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500905 *Pointer<UInt>(element) = As<UInt>(PackFields(RoundInt(c), { 0, 10, 20, 30 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000906 }
907 else
908 {
909 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
910 (writeB ? 0x3FF00000 : 0x0000) |
911 (writeG ? 0x000FFC00 : 0x0000) |
912 (writeR ? 0x000003FF : 0x0000);
913 unsigned int unmask = ~mask;
914 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500915 (As<UInt>(PackFields(RoundInt(c), { 0, 10, 20, 30 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000916 UInt(mask));
917 }
918 break;
919 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
920 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
921 case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
922 if(writeRGBA)
923 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500924 *Pointer<UInt>(element) = As<UInt>(PackFields(RoundInt(c), { 20, 10, 0, 30 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000925 }
926 else
927 {
928 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
929 (writeR ? 0x3FF00000 : 0x0000) |
930 (writeG ? 0x000FFC00 : 0x0000) |
931 (writeB ? 0x000003FF : 0x0000);
932 unsigned int unmask = ~mask;
933 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500934 (As<UInt>(PackFields(RoundInt(c), { 20, 10, 0, 30 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000935 UInt(mask));
936 }
937 break;
938 case VK_FORMAT_D16_UNORM:
939 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x)));
940 break;
941 case VK_FORMAT_X8_D24_UNORM_PACK32:
942 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) << 8);
943 break;
944 case VK_FORMAT_D32_SFLOAT:
945 *Pointer<Float>(element) = c.x;
946 break;
947 case VK_FORMAT_S8_UINT:
948 *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
949 break;
950 default:
951 UNSUPPORTED("Blitter destination format %d", (int)state.destFormat);
952 break;
Nicolas Capens157ba262019-12-10 17:49:14 -0500953 }
954}
955
956Int4 Blitter::readInt4(Pointer<Byte> element, const State &state)
957{
958 Int4 c(0, 0, 0, 1);
959
960 switch(state.sourceFormat)
961 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000962 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
963 case VK_FORMAT_R8G8B8A8_SINT:
964 c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
965 c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
966 case VK_FORMAT_R8G8_SINT:
967 c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
968 case VK_FORMAT_R8_SINT:
969 c = Insert(c, Int(*Pointer<SByte>(element)), 0);
970 break;
971 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
972 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000003FF))), 0);
973 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10), 1);
974 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20), 2);
975 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30), 3);
976 break;
977 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
978 case VK_FORMAT_R8G8B8A8_UINT:
979 c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
980 c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
981 case VK_FORMAT_R8G8_UINT:
982 c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
983 case VK_FORMAT_R8_UINT:
984 case VK_FORMAT_S8_UINT:
985 c = Insert(c, Int(*Pointer<Byte>(element)), 0);
986 break;
987 case VK_FORMAT_R16G16B16A16_SINT:
988 c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
989 c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
990 case VK_FORMAT_R16G16_SINT:
991 c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
992 case VK_FORMAT_R16_SINT:
993 c = Insert(c, Int(*Pointer<Short>(element)), 0);
994 break;
995 case VK_FORMAT_R16G16B16A16_UINT:
996 c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
997 c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
998 case VK_FORMAT_R16G16_UINT:
999 c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
1000 case VK_FORMAT_R16_UINT:
1001 c = Insert(c, Int(*Pointer<UShort>(element)), 0);
1002 break;
1003 case VK_FORMAT_R32G32B32A32_SINT:
1004 case VK_FORMAT_R32G32B32A32_UINT:
1005 c = *Pointer<Int4>(element);
1006 break;
1007 case VK_FORMAT_R32G32_SINT:
1008 case VK_FORMAT_R32G32_UINT:
1009 c = Insert(c, *Pointer<Int>(element + 4), 1);
1010 case VK_FORMAT_R32_SINT:
1011 case VK_FORMAT_R32_UINT:
1012 c = Insert(c, *Pointer<Int>(element), 0);
1013 break;
1014 default:
1015 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -05001016 }
1017
1018 return c;
1019}
1020
1021void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
1022{
1023 bool writeR = state.writeRed;
1024 bool writeG = state.writeGreen;
1025 bool writeB = state.writeBlue;
1026 bool writeA = state.writeAlpha;
1027 bool writeRGBA = writeR && writeG && writeB && writeA;
1028
1029 switch(state.destFormat)
1030 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001031 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1032 c = Min(As<UInt4>(c), UInt4(0x03FF, 0x03FF, 0x03FF, 0x0003));
1033 break;
1034 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1035 case VK_FORMAT_R8G8B8A8_UINT:
1036 case VK_FORMAT_R8G8B8_UINT:
1037 case VK_FORMAT_R8G8_UINT:
1038 case VK_FORMAT_R8_UINT:
1039 case VK_FORMAT_R8G8B8A8_USCALED:
1040 case VK_FORMAT_R8G8B8_USCALED:
1041 case VK_FORMAT_R8G8_USCALED:
1042 case VK_FORMAT_R8_USCALED:
1043 case VK_FORMAT_S8_UINT:
1044 c = Min(As<UInt4>(c), UInt4(0xFF));
1045 break;
1046 case VK_FORMAT_R16G16B16A16_UINT:
1047 case VK_FORMAT_R16G16B16_UINT:
1048 case VK_FORMAT_R16G16_UINT:
1049 case VK_FORMAT_R16_UINT:
1050 case VK_FORMAT_R16G16B16A16_USCALED:
1051 case VK_FORMAT_R16G16B16_USCALED:
1052 case VK_FORMAT_R16G16_USCALED:
1053 case VK_FORMAT_R16_USCALED:
1054 c = Min(As<UInt4>(c), UInt4(0xFFFF));
1055 break;
1056 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1057 case VK_FORMAT_R8G8B8A8_SINT:
1058 case VK_FORMAT_R8G8_SINT:
1059 case VK_FORMAT_R8_SINT:
1060 case VK_FORMAT_R8G8B8A8_SSCALED:
1061 case VK_FORMAT_R8G8B8_SSCALED:
1062 case VK_FORMAT_R8G8_SSCALED:
1063 case VK_FORMAT_R8_SSCALED:
1064 c = Min(Max(c, Int4(-0x80)), Int4(0x7F));
1065 break;
1066 case VK_FORMAT_R16G16B16A16_SINT:
1067 case VK_FORMAT_R16G16B16_SINT:
1068 case VK_FORMAT_R16G16_SINT:
1069 case VK_FORMAT_R16_SINT:
1070 case VK_FORMAT_R16G16B16A16_SSCALED:
1071 case VK_FORMAT_R16G16B16_SSCALED:
1072 case VK_FORMAT_R16G16_SSCALED:
1073 case VK_FORMAT_R16_SSCALED:
1074 c = Min(Max(c, Int4(-0x8000)), Int4(0x7FFF));
1075 break;
1076 default:
1077 break;
Nicolas Capens157ba262019-12-10 17:49:14 -05001078 }
1079
1080 switch(state.destFormat)
1081 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001082 case VK_FORMAT_B8G8R8A8_SINT:
1083 case VK_FORMAT_B8G8R8A8_SSCALED:
1084 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
1085 case VK_FORMAT_B8G8R8_SINT:
1086 case VK_FORMAT_B8G8R8_SSCALED:
1087 if(writeB) { *Pointer<SByte>(element) = SByte(Extract(c, 2)); }
1088 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
1089 if(writeR) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 0)); }
1090 break;
1091 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1092 case VK_FORMAT_R8G8B8A8_SINT:
1093 case VK_FORMAT_R8G8B8A8_SSCALED:
1094 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
1095 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
1096 case VK_FORMAT_R8G8B8_SINT:
1097 case VK_FORMAT_R8G8B8_SSCALED:
1098 if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
1099 case VK_FORMAT_R8G8_SINT:
1100 case VK_FORMAT_R8G8_SSCALED:
1101 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
1102 case VK_FORMAT_R8_SINT:
1103 case VK_FORMAT_R8_SSCALED:
1104 if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
1105 break;
1106 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1107 case VK_FORMAT_A2B10G10R10_SINT_PACK32:
1108 case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
1109 case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
1110 if(writeRGBA)
1111 {
Alexis Hetu3716c202019-12-19 17:09:08 -05001112 *Pointer<UInt>(element) = As<UInt>(PackFields(c, { 0, 10, 20, 30 }));
Ben Claytonfccfc562019-12-17 20:37:31 +00001113 }
1114 else
1115 {
1116 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1117 (writeB ? 0x3FF00000 : 0x0000) |
1118 (writeG ? 0x000FFC00 : 0x0000) |
1119 (writeR ? 0x000003FF : 0x0000);
1120 unsigned int unmask = ~mask;
1121 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -05001122 (As<UInt>(PackFields(c, { 0, 10, 20, 30 })) & UInt(mask));
Ben Claytonfccfc562019-12-17 20:37:31 +00001123 }
1124 break;
1125 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1126 case VK_FORMAT_A2R10G10B10_SINT_PACK32:
1127 case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
1128 case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
1129 if(writeRGBA)
1130 {
Alexis Hetu3716c202019-12-19 17:09:08 -05001131 *Pointer<UInt>(element) = As<UInt>(PackFields(c, { 20, 10, 0, 30 }));
Ben Claytonfccfc562019-12-17 20:37:31 +00001132 }
1133 else
1134 {
1135 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1136 (writeR ? 0x3FF00000 : 0x0000) |
1137 (writeG ? 0x000FFC00 : 0x0000) |
1138 (writeB ? 0x000003FF : 0x0000);
1139 unsigned int unmask = ~mask;
1140 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -05001141 (As<UInt>(PackFields(c, { 20, 10, 0, 30 })) & UInt(mask));
Ben Claytonfccfc562019-12-17 20:37:31 +00001142 }
1143 break;
1144 case VK_FORMAT_B8G8R8A8_UINT:
1145 case VK_FORMAT_B8G8R8A8_USCALED:
1146 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
1147 case VK_FORMAT_B8G8R8_UINT:
1148 case VK_FORMAT_B8G8R8_USCALED:
1149 case VK_FORMAT_B8G8R8_SRGB:
1150 if(writeB) { *Pointer<Byte>(element) = Byte(Extract(c, 2)); }
1151 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
1152 if(writeR) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 0)); }
1153 break;
1154 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1155 case VK_FORMAT_R8G8B8A8_UINT:
1156 case VK_FORMAT_R8G8B8A8_USCALED:
1157 case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
1158 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
1159 case VK_FORMAT_R8G8B8_UINT:
1160 case VK_FORMAT_R8G8B8_USCALED:
1161 if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
1162 case VK_FORMAT_R8G8_UINT:
1163 case VK_FORMAT_R8G8_USCALED:
1164 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
1165 case VK_FORMAT_R8_UINT:
1166 case VK_FORMAT_R8_USCALED:
1167 case VK_FORMAT_S8_UINT:
1168 if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
1169 break;
1170 case VK_FORMAT_R16G16B16A16_SINT:
1171 case VK_FORMAT_R16G16B16A16_SSCALED:
1172 if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
1173 case VK_FORMAT_R16G16B16_SINT:
1174 case VK_FORMAT_R16G16B16_SSCALED:
1175 if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
1176 case VK_FORMAT_R16G16_SINT:
1177 case VK_FORMAT_R16G16_SSCALED:
1178 if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
1179 case VK_FORMAT_R16_SINT:
1180 case VK_FORMAT_R16_SSCALED:
1181 if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
1182 break;
1183 case VK_FORMAT_R16G16B16A16_UINT:
1184 case VK_FORMAT_R16G16B16A16_USCALED:
1185 if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
1186 case VK_FORMAT_R16G16B16_UINT:
1187 case VK_FORMAT_R16G16B16_USCALED:
1188 if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
1189 case VK_FORMAT_R16G16_UINT:
1190 case VK_FORMAT_R16G16_USCALED:
1191 if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
1192 case VK_FORMAT_R16_UINT:
1193 case VK_FORMAT_R16_USCALED:
1194 if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
1195 break;
1196 case VK_FORMAT_R32G32B32A32_SINT:
1197 if(writeRGBA)
1198 {
1199 *Pointer<Int4>(element) = c;
1200 }
1201 else
1202 {
1203 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1204 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1205 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
1206 if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
1207 }
1208 break;
1209 case VK_FORMAT_R32G32B32_SINT:
Nicolas Capens157ba262019-12-10 17:49:14 -05001210 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1211 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1212 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
Ben Claytonfccfc562019-12-17 20:37:31 +00001213 break;
1214 case VK_FORMAT_R32G32_SINT:
1215 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1216 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1217 break;
1218 case VK_FORMAT_R32_SINT:
1219 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1220 break;
1221 case VK_FORMAT_R32G32B32A32_UINT:
1222 if(writeRGBA)
1223 {
1224 *Pointer<UInt4>(element) = As<UInt4>(c);
1225 }
1226 else
1227 {
1228 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1229 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
1230 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
1231 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
1232 }
1233 break;
1234 case VK_FORMAT_R32G32B32_UINT:
Nicolas Capens157ba262019-12-10 17:49:14 -05001235 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
Ben Claytonfccfc562019-12-17 20:37:31 +00001236 case VK_FORMAT_R32G32_UINT:
1237 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
1238 case VK_FORMAT_R32_UINT:
1239 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1240 break;
1241 default:
1242 UNSUPPORTED("Blitter destination format %d", (int)state.destFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -05001243 }
1244}
1245
1246void Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled)
1247{
1248 float4 scale{}, unscale{};
1249
1250 if(state.clearOperation &&
1251 state.sourceFormat.isNonNormalizedInteger() &&
1252 !state.destFormat.isNonNormalizedInteger())
1253 {
1254 // If we're clearing a buffer from an int or uint color into a normalized color,
1255 // then the whole range of the int or uint color must be scaled between 0 and 1.
1256 switch(state.sourceFormat)
1257 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001258 case VK_FORMAT_R32G32B32A32_SINT:
1259 unscale = float4(static_cast<float>(0x7FFFFFFF));
1260 break;
1261 case VK_FORMAT_R32G32B32A32_UINT:
1262 unscale = float4(static_cast<float>(0xFFFFFFFF));
1263 break;
1264 default:
1265 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -05001266 }
1267 }
1268 else
1269 {
1270 unscale = state.sourceFormat.getScale();
1271 }
1272
1273 scale = state.destFormat.getScale();
1274
1275 bool srcSRGB = state.sourceFormat.isSRGBformat();
1276 bool dstSRGB = state.destFormat.isSRGBformat();
1277
Ben Claytonfccfc562019-12-17 20:37:31 +00001278 if(state.allowSRGBConversion && ((srcSRGB && !preScaled) || dstSRGB)) // One of the formats is sRGB encoded.
Nicolas Capens157ba262019-12-10 17:49:14 -05001279 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001280 value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale
1281 Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale
Nicolas Capens157ba262019-12-10 17:49:14 -05001282 value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value);
Ben Claytonfccfc562019-12-17 20:37:31 +00001283 value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale
Nicolas Capens157ba262019-12-10 17:49:14 -05001284 }
1285 else if(unscale != scale)
1286 {
1287 value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
1288 }
1289
1290 if(state.sourceFormat.isFloatFormat() && !state.destFormat.isFloatFormat())
1291 {
1292 value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
1293
1294 value = Max(value, Float4(state.destFormat.isUnsignedComponent(0) ? 0.0f : -scale.x,
1295 state.destFormat.isUnsignedComponent(1) ? 0.0f : -scale.y,
1296 state.destFormat.isUnsignedComponent(2) ? 0.0f : -scale.z,
1297 state.destFormat.isUnsignedComponent(3) ? 0.0f : -scale.w));
1298 }
1299}
1300
1301Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes)
1302{
1303 return y * pitchB + x * bytes;
1304}
1305
1306Float4 Blitter::LinearToSRGB(Float4 &c)
1307{
1308 Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f);
1309 Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f);
1310
1311 Float4 s = c;
1312 s.xyz = Max(lc, ec);
1313
1314 return s;
1315}
1316
1317Float4 Blitter::sRGBtoLinear(Float4 &c)
1318{
1319 Float4 lc = c * Float4(1.0f / 12.92f);
1320 Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f));
1321
1322 Int4 linear = CmpLT(c, Float4(0.04045f));
1323
1324 Float4 s = c;
Ben Claytonfccfc562019-12-17 20:37:31 +00001325 s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec))); // TODO: IfThenElse()
Nicolas Capens157ba262019-12-10 17:49:14 -05001326
1327 return s;
1328}
1329
1330Blitter::BlitRoutineType Blitter::generate(const State &state)
1331{
1332 BlitFunction function;
1333 {
1334 Pointer<Byte> blit(function.Arg<0>());
1335
Ben Claytonfccfc562019-12-17 20:37:31 +00001336 Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData, source));
1337 Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData, dest));
1338 Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData, sPitchB));
1339 Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData, dPitchB));
Nicolas Capens157ba262019-12-10 17:49:14 -05001340
Ben Claytonfccfc562019-12-17 20:37:31 +00001341 Float x0 = *Pointer<Float>(blit + OFFSET(BlitData, x0));
1342 Float y0 = *Pointer<Float>(blit + OFFSET(BlitData, y0));
1343 Float w = *Pointer<Float>(blit + OFFSET(BlitData, w));
1344 Float h = *Pointer<Float>(blit + OFFSET(BlitData, h));
Nicolas Capens157ba262019-12-10 17:49:14 -05001345
Ben Claytonfccfc562019-12-17 20:37:31 +00001346 Int x0d = *Pointer<Int>(blit + OFFSET(BlitData, x0d));
1347 Int x1d = *Pointer<Int>(blit + OFFSET(BlitData, x1d));
1348 Int y0d = *Pointer<Int>(blit + OFFSET(BlitData, y0d));
1349 Int y1d = *Pointer<Int>(blit + OFFSET(BlitData, y1d));
Nicolas Capens157ba262019-12-10 17:49:14 -05001350
Ben Claytonfccfc562019-12-17 20:37:31 +00001351 Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData, sWidth));
1352 Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData, sHeight));
Nicolas Capens157ba262019-12-10 17:49:14 -05001353
1354 bool intSrc = state.sourceFormat.isNonNormalizedInteger();
1355 bool intDst = state.destFormat.isNonNormalizedInteger();
1356 bool intBoth = intSrc && intDst;
1357 int srcBytes = state.sourceFormat.bytes();
1358 int dstBytes = state.destFormat.bytes();
1359
1360 bool hasConstantColorI = false;
1361 Int4 constantColorI;
1362 bool hasConstantColorF = false;
1363 Float4 constantColorF;
1364 if(state.clearOperation)
1365 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001366 if(intBoth) // Integer types
Nicolas Capens157ba262019-12-10 17:49:14 -05001367 {
1368 constantColorI = readInt4(source, state);
1369 hasConstantColorI = true;
1370 }
1371 else
1372 {
1373 constantColorF = readFloat4(source, state);
1374 hasConstantColorF = true;
1375
1376 ApplyScaleAndClamp(constantColorF, state);
1377 }
1378 }
1379
1380 For(Int j = y0d, j < y1d, j++)
1381 {
1382 Float y = state.clearOperation ? RValue<Float>(y0) : y0 + Float(j) * h;
1383 Pointer<Byte> destLine = dest + j * dPitchB;
1384
1385 For(Int i = x0d, i < x1d, i++)
1386 {
1387 Float x = state.clearOperation ? RValue<Float>(x0) : x0 + Float(i) * w;
1388 Pointer<Byte> d = destLine + i * dstBytes;
1389
1390 if(hasConstantColorI)
1391 {
1392 for(int s = 0; s < state.destSamples; s++)
1393 {
1394 write(constantColorI, d, state);
1395
1396 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
1397 }
1398 }
1399 else if(hasConstantColorF)
1400 {
1401 for(int s = 0; s < state.destSamples; s++)
1402 {
1403 write(constantColorF, d, state);
1404
1405 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
1406 }
1407 }
Ben Claytonfccfc562019-12-17 20:37:31 +00001408 else if(intBoth) // Integer types do not support filtering
Nicolas Capens157ba262019-12-10 17:49:14 -05001409 {
1410 Int X = Int(x);
1411 Int Y = Int(y);
1412
1413 if(state.clampToEdge)
1414 {
1415 X = Clamp(X, 0, sWidth - 1);
1416 Y = Clamp(Y, 0, sHeight - 1);
1417 }
1418
1419 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes);
1420
1421 // When both formats are true integer types, we don't go to float to avoid losing precision
1422 Int4 color = readInt4(s, state);
1423 for(int s = 0; s < state.destSamples; s++)
1424 {
1425 write(color, d, state);
1426
Ben Claytonfccfc562019-12-17 20:37:31 +00001427 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
Nicolas Capens157ba262019-12-10 17:49:14 -05001428 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001429 }
1430 else
1431 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001432 Float4 color;
Nicolas Capens68a82382018-10-02 13:16:55 -04001433
Nicolas Capens157ba262019-12-10 17:49:14 -05001434 bool preScaled = false;
1435 if(!state.filter || intSrc)
Nicolas Capens68a82382018-10-02 13:16:55 -04001436 {
Nicolas Capens68a82382018-10-02 13:16:55 -04001437 Int X = Int(x);
1438 Int Y = Int(y);
1439
1440 if(state.clampToEdge)
1441 {
1442 X = Clamp(X, 0, sWidth - 1);
1443 Y = Clamp(Y, 0, sHeight - 1);
1444 }
1445
Alexis Hetud34bb292019-11-13 17:18:02 -05001446 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes);
Nicolas Capens68a82382018-10-02 13:16:55 -04001447
Nicolas Capens157ba262019-12-10 17:49:14 -05001448 color = readFloat4(s, state);
1449
Ben Claytonfccfc562019-12-17 20:37:31 +00001450 if(state.srcSamples > 1) // Resolve multisampled source
Alexis Hetuf8df30f2019-10-23 18:03:21 -04001451 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001452 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
Nicolas Capens68a82382018-10-02 13:16:55 -04001453 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001454 ApplyScaleAndClamp(color, state);
1455 preScaled = true;
Nicolas Capens68a82382018-10-02 13:16:55 -04001456 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001457 Float4 accum = color;
1458 for(int sample = 1; sample < state.srcSamples; sample++)
Alexis Hetu54ec7592019-03-20 14:37:16 -04001459 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001460 s += *Pointer<Int>(blit + OFFSET(BlitData, sSliceB));
1461 color = readFloat4(s, state);
1462
Ben Claytonfccfc562019-12-17 20:37:31 +00001463 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
Alexis Hetua4308132019-06-13 09:55:26 -04001464 {
Nicolas Capens88ac3672019-08-01 13:22:34 -04001465 ApplyScaleAndClamp(color, state);
Alexis Hetua4308132019-06-13 09:55:26 -04001466 preScaled = true;
1467 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001468 accum += color;
Alexis Hetu54ec7592019-03-20 14:37:16 -04001469 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001470 color = accum * Float4(1.0f / static_cast<float>(state.srcSamples));
Nicolas Capens68a82382018-10-02 13:16:55 -04001471 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001472 }
Ben Claytonfccfc562019-12-17 20:37:31 +00001473 else // Bilinear filtering
Nicolas Capens157ba262019-12-10 17:49:14 -05001474 {
1475 Float X = x;
1476 Float Y = y;
1477
1478 if(state.clampToEdge)
Nicolas Capens68a82382018-10-02 13:16:55 -04001479 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001480 X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f);
1481 Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f);
Nicolas Capens68a82382018-10-02 13:16:55 -04001482 }
1483
Nicolas Capens157ba262019-12-10 17:49:14 -05001484 Float x0 = X - 0.5f;
1485 Float y0 = Y - 0.5f;
Nicolas Capens68a82382018-10-02 13:16:55 -04001486
Nicolas Capens157ba262019-12-10 17:49:14 -05001487 Int X0 = Max(Int(x0), 0);
1488 Int Y0 = Max(Int(y0), 0);
1489
1490 Int X1 = X0 + 1;
1491 Int Y1 = Y0 + 1;
1492 X1 = IfThenElse(X1 >= sWidth, X0, X1);
1493 Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1);
1494
1495 Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes);
1496 Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes);
1497 Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes);
1498 Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes);
1499
1500 Float4 c00 = readFloat4(s00, state);
1501 Float4 c01 = readFloat4(s01, state);
1502 Float4 c10 = readFloat4(s10, state);
1503 Float4 c11 = readFloat4(s11, state);
1504
Ben Claytonfccfc562019-12-17 20:37:31 +00001505 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
Nicolas Capens68a82382018-10-02 13:16:55 -04001506 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001507 ApplyScaleAndClamp(c00, state);
1508 ApplyScaleAndClamp(c01, state);
1509 ApplyScaleAndClamp(c10, state);
1510 ApplyScaleAndClamp(c11, state);
1511 preScaled = true;
Nicolas Capens68a82382018-10-02 13:16:55 -04001512 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001513
1514 Float4 fx = Float4(x0 - Float(X0));
1515 Float4 fy = Float4(y0 - Float(Y0));
1516 Float4 ix = Float4(1.0f) - fx;
1517 Float4 iy = Float4(1.0f) - fy;
1518
1519 color = (c00 * ix + c01 * fx) * iy +
1520 (c10 * ix + c11 * fx) * fy;
1521 }
1522
1523 ApplyScaleAndClamp(color, state, preScaled);
1524
1525 for(int s = 0; s < state.destSamples; s++)
1526 {
1527 write(color, d, state);
1528
Ben Claytonfccfc562019-12-17 20:37:31 +00001529 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
Nicolas Capens68a82382018-10-02 13:16:55 -04001530 }
1531 }
1532 }
1533 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001534 }
1535
Nicolas Capens157ba262019-12-10 17:49:14 -05001536 return function("BlitRoutine");
1537}
1538
1539Blitter::BlitRoutineType Blitter::getBlitRoutine(const State &state)
1540{
1541 std::unique_lock<std::mutex> lock(blitMutex);
1542 auto blitRoutine = blitCache.query(state);
1543
1544 if(!blitRoutine)
Alexis Hetu33642272019-03-01 11:55:59 -05001545 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001546 blitRoutine = generate(state);
1547 blitCache.add(state, blitRoutine);
Alexis Hetu33642272019-03-01 11:55:59 -05001548 }
1549
Nicolas Capens157ba262019-12-10 17:49:14 -05001550 return blitRoutine;
1551}
1552
1553Blitter::CornerUpdateRoutineType Blitter::getCornerUpdateRoutine(const State &state)
1554{
1555 std::unique_lock<std::mutex> lock(cornerUpdateMutex);
1556 auto cornerUpdateRoutine = cornerUpdateCache.query(state);
1557
1558 if(!cornerUpdateRoutine)
Alexis Hetuf60a2d52019-05-09 14:16:05 -04001559 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001560 cornerUpdateRoutine = generateCornerUpdate(state);
1561 cornerUpdateCache.add(state, cornerUpdateRoutine);
Alexis Hetuf60a2d52019-05-09 14:16:05 -04001562 }
1563
Nicolas Capens157ba262019-12-10 17:49:14 -05001564 return cornerUpdateRoutine;
1565}
1566
1567void Blitter::blitToBuffer(const vk::Image *src, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *dst, int bufferRowPitch, int bufferSlicePitch)
1568{
1569 auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
1570 auto format = src->getFormat(aspect);
Ben Claytonfccfc562019-12-17 20:37:31 +00001571 State state(format, format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT, Options{ false, false });
Nicolas Capens157ba262019-12-10 17:49:14 -05001572
1573 auto blitRoutine = getBlitRoutine(state);
1574 if(!blitRoutine)
Chris Forbes529eda32019-05-08 10:27:05 -07001575 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001576 return;
Chris Forbes529eda32019-05-08 10:27:05 -07001577 }
1578
Ben Claytonfccfc562019-12-17 20:37:31 +00001579 BlitData data = {
1580 nullptr, // source
1581 dst, // dest
1582 src->rowPitchBytes(aspect, subresource.mipLevel), // sPitchB
1583 bufferRowPitch, // dPitchB
1584 src->slicePitchBytes(aspect, subresource.mipLevel), // sSliceB
1585 bufferSlicePitch, // dSliceB
Chris Forbes529eda32019-05-08 10:27:05 -07001586
Nicolas Capens157ba262019-12-10 17:49:14 -05001587 0, 0, 1, 1,
Chris Forbes529eda32019-05-08 10:27:05 -07001588
Ben Claytonfccfc562019-12-17 20:37:31 +00001589 0, // y0d
1590 static_cast<int>(extent.height), // y1d
1591 0, // x0d
1592 static_cast<int>(extent.width), // x1d
Chris Forbes529eda32019-05-08 10:27:05 -07001593
Ben Claytonfccfc562019-12-17 20:37:31 +00001594 static_cast<int>(extent.width), // sWidth
1595 static_cast<int>(extent.height) // sHeight;
Nicolas Capens157ba262019-12-10 17:49:14 -05001596 };
Chris Forbes529eda32019-05-08 10:27:05 -07001597
Nicolas Capens157ba262019-12-10 17:49:14 -05001598 VkOffset3D srcOffset = { 0, 0, offset.z };
Chris Forbes529eda32019-05-08 10:27:05 -07001599
Nicolas Capens157ba262019-12-10 17:49:14 -05001600 VkImageSubresourceLayers srcSubresLayers = subresource;
1601 srcSubresLayers.layerCount = 1;
Chris Forbes529eda32019-05-08 10:27:05 -07001602
Ben Claytonfccfc562019-12-17 20:37:31 +00001603 VkImageSubresourceRange srcSubresRange = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001604 subresource.aspectMask,
1605 subresource.mipLevel,
1606 1,
1607 subresource.baseArrayLayer,
1608 subresource.layerCount
1609 };
Alexis Hetu33642272019-03-01 11:55:59 -05001610
Nicolas Capens157ba262019-12-10 17:49:14 -05001611 uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange);
Alexis Hetu33642272019-03-01 11:55:59 -05001612
Nicolas Capens157ba262019-12-10 17:49:14 -05001613 for(; srcSubresLayers.baseArrayLayer <= lastLayer; srcSubresLayers.baseArrayLayer++)
Alexis Hetub317d962019-04-29 14:07:31 -04001614 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001615 srcOffset.z = offset.z;
Alexis Hetub317d962019-04-29 14:07:31 -04001616
Nicolas Capens157ba262019-12-10 17:49:14 -05001617 for(auto i = 0u; i < extent.depth; i++)
Alexis Hetub317d962019-04-29 14:07:31 -04001618 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001619 data.source = src->getTexelPointer(srcOffset, srcSubresLayers);
1620 ASSERT(data.source < src->end());
1621 blitRoutine(&data);
1622 srcOffset.z++;
1623 data.dest = (dst += bufferSlicePitch);
Alexis Hetub317d962019-04-29 14:07:31 -04001624 }
1625 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001626}
Nicolas Capens157ba262019-12-10 17:49:14 -05001627
1628void Blitter::blitFromBuffer(const vk::Image *dst, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *src, int bufferRowPitch, int bufferSlicePitch)
1629{
1630 auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
1631 auto format = dst->getFormat(aspect);
Ben Claytonfccfc562019-12-17 20:37:31 +00001632 State state(format, format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT, Options{ false, false });
Nicolas Capens157ba262019-12-10 17:49:14 -05001633
1634 auto blitRoutine = getBlitRoutine(state);
1635 if(!blitRoutine)
1636 {
1637 return;
1638 }
1639
Ben Claytonfccfc562019-12-17 20:37:31 +00001640 BlitData data = {
1641 src, // source
1642 nullptr, // dest
1643 bufferRowPitch, // sPitchB
1644 dst->rowPitchBytes(aspect, subresource.mipLevel), // dPitchB
1645 bufferSlicePitch, // sSliceB
1646 dst->slicePitchBytes(aspect, subresource.mipLevel), // dSliceB
Nicolas Capens157ba262019-12-10 17:49:14 -05001647
Ben Claytonfccfc562019-12-17 20:37:31 +00001648 static_cast<float>(-offset.x), // x0
1649 static_cast<float>(-offset.y), // y0
1650 1.0f, // w
1651 1.0f, // h
Nicolas Capens157ba262019-12-10 17:49:14 -05001652
Ben Claytonfccfc562019-12-17 20:37:31 +00001653 offset.y, // y0d
1654 static_cast<int>(offset.y + extent.height), // y1d
1655 offset.x, // x0d
1656 static_cast<int>(offset.x + extent.width), // x1d
Nicolas Capens157ba262019-12-10 17:49:14 -05001657
Ben Claytonfccfc562019-12-17 20:37:31 +00001658 static_cast<int>(extent.width), // sWidth
1659 static_cast<int>(extent.height) // sHeight;
Nicolas Capens157ba262019-12-10 17:49:14 -05001660 };
1661
1662 VkOffset3D dstOffset = { 0, 0, offset.z };
1663
1664 VkImageSubresourceLayers dstSubresLayers = subresource;
1665 dstSubresLayers.layerCount = 1;
1666
Ben Claytonfccfc562019-12-17 20:37:31 +00001667 VkImageSubresourceRange dstSubresRange = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001668 subresource.aspectMask,
1669 subresource.mipLevel,
1670 1,
1671 subresource.baseArrayLayer,
1672 subresource.layerCount
1673 };
1674
1675 uint32_t lastLayer = dst->getLastLayerIndex(dstSubresRange);
1676
1677 for(; dstSubresLayers.baseArrayLayer <= lastLayer; dstSubresLayers.baseArrayLayer++)
1678 {
1679 dstOffset.z = offset.z;
1680
1681 for(auto i = 0u; i < extent.depth; i++)
1682 {
1683 data.dest = dst->getTexelPointer(dstOffset, dstSubresLayers);
1684 ASSERT(data.dest < dst->end());
1685 blitRoutine(&data);
1686 dstOffset.z++;
1687 data.source = (src += bufferSlicePitch);
1688 }
1689 }
1690}
1691
1692void Blitter::blit(const vk::Image *src, vk::Image *dst, VkImageBlit region, VkFilter filter)
1693{
1694 if(dst->getFormat() == VK_FORMAT_UNDEFINED)
1695 {
1696 return;
1697 }
1698
1699 if((region.srcSubresource.layerCount != region.dstSubresource.layerCount) ||
1700 (region.srcSubresource.aspectMask != region.dstSubresource.aspectMask))
1701 {
1702 UNIMPLEMENTED("region");
1703 }
1704
1705 if(region.dstOffsets[0].x > region.dstOffsets[1].x)
1706 {
1707 std::swap(region.srcOffsets[0].x, region.srcOffsets[1].x);
1708 std::swap(region.dstOffsets[0].x, region.dstOffsets[1].x);
1709 }
1710
1711 if(region.dstOffsets[0].y > region.dstOffsets[1].y)
1712 {
1713 std::swap(region.srcOffsets[0].y, region.srcOffsets[1].y);
1714 std::swap(region.dstOffsets[0].y, region.dstOffsets[1].y);
1715 }
1716
1717 VkImageAspectFlagBits srcAspect = static_cast<VkImageAspectFlagBits>(region.srcSubresource.aspectMask);
1718 VkImageAspectFlagBits dstAspect = static_cast<VkImageAspectFlagBits>(region.dstSubresource.aspectMask);
1719 VkExtent3D srcExtent = src->getMipLevelExtent(srcAspect, region.srcSubresource.mipLevel);
1720
1721 int32_t numSlices = (region.srcOffsets[1].z - region.srcOffsets[0].z);
1722 ASSERT(numSlices == (region.dstOffsets[1].z - region.dstOffsets[0].z));
1723
1724 float widthRatio = static_cast<float>(region.srcOffsets[1].x - region.srcOffsets[0].x) /
1725 static_cast<float>(region.dstOffsets[1].x - region.dstOffsets[0].x);
1726 float heightRatio = static_cast<float>(region.srcOffsets[1].y - region.srcOffsets[0].y) /
1727 static_cast<float>(region.dstOffsets[1].y - region.dstOffsets[0].y);
1728 float x0 = region.srcOffsets[0].x + (0.5f - region.dstOffsets[0].x) * widthRatio;
1729 float y0 = region.srcOffsets[0].y + (0.5f - region.dstOffsets[0].y) * heightRatio;
1730
1731 auto srcFormat = src->getFormat(srcAspect);
1732 auto dstFormat = dst->getFormat(dstAspect);
1733
1734 bool doFilter = (filter != VK_FILTER_NEAREST);
1735 bool allowSRGBConversion =
Ben Claytonfccfc562019-12-17 20:37:31 +00001736 doFilter ||
1737 (src->getSampleCountFlagBits() > 1) ||
1738 (srcFormat.isSRGBformat() != dstFormat.isSRGBformat());
Nicolas Capens157ba262019-12-10 17:49:14 -05001739
1740 State state(src->getFormat(srcAspect), dst->getFormat(dstAspect), src->getSampleCountFlagBits(), dst->getSampleCountFlagBits(),
1741 Options{ doFilter, allowSRGBConversion });
1742 state.clampToEdge = (region.srcOffsets[0].x < 0) ||
1743 (region.srcOffsets[0].y < 0) ||
1744 (static_cast<uint32_t>(region.srcOffsets[1].x) > srcExtent.width) ||
1745 (static_cast<uint32_t>(region.srcOffsets[1].y) > srcExtent.height) ||
1746 (doFilter && ((x0 < 0.5f) || (y0 < 0.5f)));
1747
1748 auto blitRoutine = getBlitRoutine(state);
1749 if(!blitRoutine)
1750 {
1751 return;
1752 }
1753
Ben Claytonfccfc562019-12-17 20:37:31 +00001754 BlitData data = {
1755 nullptr, // source
1756 nullptr, // dest
1757 src->rowPitchBytes(srcAspect, region.srcSubresource.mipLevel), // sPitchB
1758 dst->rowPitchBytes(dstAspect, region.dstSubresource.mipLevel), // dPitchB
1759 src->slicePitchBytes(srcAspect, region.srcSubresource.mipLevel), // sSliceB
1760 dst->slicePitchBytes(dstAspect, region.dstSubresource.mipLevel), // dSliceB
Nicolas Capens157ba262019-12-10 17:49:14 -05001761
1762 x0,
1763 y0,
1764 widthRatio,
1765 heightRatio,
1766
Ben Claytonfccfc562019-12-17 20:37:31 +00001767 region.dstOffsets[0].y, // y0d
1768 region.dstOffsets[1].y, // y1d
1769 region.dstOffsets[0].x, // x0d
1770 region.dstOffsets[1].x, // x1d
Nicolas Capens157ba262019-12-10 17:49:14 -05001771
Ben Claytonfccfc562019-12-17 20:37:31 +00001772 static_cast<int>(srcExtent.width), // sWidth
1773 static_cast<int>(srcExtent.height) // sHeight;
Nicolas Capens157ba262019-12-10 17:49:14 -05001774 };
1775
1776 VkOffset3D srcOffset = { 0, 0, region.srcOffsets[0].z };
1777 VkOffset3D dstOffset = { 0, 0, region.dstOffsets[0].z };
1778
Ben Claytonfccfc562019-12-17 20:37:31 +00001779 VkImageSubresourceLayers srcSubresLayers = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001780 region.srcSubresource.aspectMask,
1781 region.srcSubresource.mipLevel,
1782 region.srcSubresource.baseArrayLayer,
1783 1
1784 };
1785
Ben Claytonfccfc562019-12-17 20:37:31 +00001786 VkImageSubresourceLayers dstSubresLayers = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001787 region.dstSubresource.aspectMask,
1788 region.dstSubresource.mipLevel,
1789 region.dstSubresource.baseArrayLayer,
1790 1
1791 };
1792
Ben Claytonfccfc562019-12-17 20:37:31 +00001793 VkImageSubresourceRange srcSubresRange = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001794 region.srcSubresource.aspectMask,
1795 region.srcSubresource.mipLevel,
1796 1,
1797 region.srcSubresource.baseArrayLayer,
1798 region.srcSubresource.layerCount
1799 };
1800
1801 uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange);
1802
1803 for(; srcSubresLayers.baseArrayLayer <= lastLayer; srcSubresLayers.baseArrayLayer++, dstSubresLayers.baseArrayLayer++)
1804 {
1805 srcOffset.z = region.srcOffsets[0].z;
1806 dstOffset.z = region.dstOffsets[0].z;
1807
1808 for(int i = 0; i < numSlices; i++)
1809 {
1810 data.source = src->getTexelPointer(srcOffset, srcSubresLayers);
1811 data.dest = dst->getTexelPointer(dstOffset, dstSubresLayers);
1812
1813 ASSERT(data.source < src->end());
1814 ASSERT(data.dest < dst->end());
1815
1816 blitRoutine(&data);
1817 srcOffset.z++;
1818 dstOffset.z++;
1819 }
1820 }
1821}
1822
Ben Claytonfccfc562019-12-17 20:37:31 +00001823void Blitter::computeCubeCorner(Pointer<Byte> &layer, Int &x0, Int &x1, Int &y0, Int &y1, Int &pitchB, const State &state)
Nicolas Capens157ba262019-12-10 17:49:14 -05001824{
1825 int bytes = state.sourceFormat.bytes();
1826
1827 Float4 c = readFloat4(layer + ComputeOffset(x0, y1, pitchB, bytes), state) +
1828 readFloat4(layer + ComputeOffset(x1, y0, pitchB, bytes), state) +
1829 readFloat4(layer + ComputeOffset(x1, y1, pitchB, bytes), state);
1830
1831 c *= Float4(1.0f / 3.0f);
1832
1833 write(c, layer + ComputeOffset(x0, y0, pitchB, bytes), state);
1834}
1835
Ben Claytonfccfc562019-12-17 20:37:31 +00001836Blitter::CornerUpdateRoutineType Blitter::generateCornerUpdate(const State &state)
Nicolas Capens157ba262019-12-10 17:49:14 -05001837{
1838 // Reading and writing from/to the same image
1839 ASSERT(state.sourceFormat == state.destFormat);
1840 ASSERT(state.srcSamples == state.destSamples);
1841
1842 if(state.srcSamples != 1)
1843 {
1844 UNIMPLEMENTED("state.srcSamples %d", state.srcSamples);
1845 }
1846
1847 CornerUpdateFunction function;
1848 {
1849 Pointer<Byte> blit(function.Arg<0>());
1850
1851 Pointer<Byte> layers = *Pointer<Pointer<Byte>>(blit + OFFSET(CubeBorderData, layers));
1852 Int pitchB = *Pointer<Int>(blit + OFFSET(CubeBorderData, pitchB));
1853 UInt layerSize = *Pointer<Int>(blit + OFFSET(CubeBorderData, layerSize));
1854 UInt dim = *Pointer<Int>(blit + OFFSET(CubeBorderData, dim));
1855
1856 // Low Border, Low Pixel, High Border, High Pixel
Ben Claytonfccfc562019-12-17 20:37:31 +00001857 Int LB(-1), LP(0), HB(dim), HP(dim - 1);
Nicolas Capens157ba262019-12-10 17:49:14 -05001858
1859 for(int face = 0; face < 6; face++)
1860 {
1861 computeCubeCorner(layers, LB, LP, LB, LP, pitchB, state);
1862 computeCubeCorner(layers, LB, LP, HB, HP, pitchB, state);
1863 computeCubeCorner(layers, HB, HP, LB, LP, pitchB, state);
1864 computeCubeCorner(layers, HB, HP, HB, HP, pitchB, state);
1865 layers = layers + layerSize;
1866 }
1867 }
1868
1869 return function("BlitRoutine");
1870}
1871
Ben Claytonfccfc562019-12-17 20:37:31 +00001872void Blitter::updateBorders(vk::Image *image, const VkImageSubresourceLayers &subresourceLayers)
Nicolas Capens157ba262019-12-10 17:49:14 -05001873{
1874 if(image->getArrayLayers() < (subresourceLayers.baseArrayLayer + 6))
1875 {
1876 UNIMPLEMENTED("image->getArrayLayers() %d, baseArrayLayer %d",
1877 image->getArrayLayers(), subresourceLayers.baseArrayLayer);
1878 }
1879
1880 // From Vulkan 1.1 spec, section 11.5. Image Views:
1881 // "For cube and cube array image views, the layers of the image view starting
1882 // at baseArrayLayer correspond to faces in the order +X, -X, +Y, -Y, +Z, -Z."
1883 VkImageSubresourceLayers posX = subresourceLayers;
1884 posX.layerCount = 1;
1885 VkImageSubresourceLayers negX = posX;
1886 negX.baseArrayLayer++;
1887 VkImageSubresourceLayers posY = negX;
1888 posY.baseArrayLayer++;
1889 VkImageSubresourceLayers negY = posY;
1890 negY.baseArrayLayer++;
1891 VkImageSubresourceLayers posZ = negY;
1892 posZ.baseArrayLayer++;
1893 VkImageSubresourceLayers negZ = posZ;
1894 negZ.baseArrayLayer++;
1895
1896 // Copy top / bottom
1897 copyCubeEdge(image, posX, BOTTOM, negY, RIGHT);
1898 copyCubeEdge(image, posY, BOTTOM, posZ, TOP);
1899 copyCubeEdge(image, posZ, BOTTOM, negY, TOP);
1900 copyCubeEdge(image, negX, BOTTOM, negY, LEFT);
1901 copyCubeEdge(image, negY, BOTTOM, negZ, BOTTOM);
1902 copyCubeEdge(image, negZ, BOTTOM, negY, BOTTOM);
1903
1904 copyCubeEdge(image, posX, TOP, posY, RIGHT);
1905 copyCubeEdge(image, posY, TOP, negZ, TOP);
1906 copyCubeEdge(image, posZ, TOP, posY, BOTTOM);
1907 copyCubeEdge(image, negX, TOP, posY, LEFT);
1908 copyCubeEdge(image, negY, TOP, posZ, BOTTOM);
1909 copyCubeEdge(image, negZ, TOP, posY, TOP);
1910
1911 // Copy left / right
1912 copyCubeEdge(image, posX, RIGHT, negZ, LEFT);
1913 copyCubeEdge(image, posY, RIGHT, posX, TOP);
1914 copyCubeEdge(image, posZ, RIGHT, posX, LEFT);
1915 copyCubeEdge(image, negX, RIGHT, posZ, LEFT);
1916 copyCubeEdge(image, negY, RIGHT, posX, BOTTOM);
1917 copyCubeEdge(image, negZ, RIGHT, negX, LEFT);
1918
1919 copyCubeEdge(image, posX, LEFT, posZ, RIGHT);
1920 copyCubeEdge(image, posY, LEFT, negX, TOP);
1921 copyCubeEdge(image, posZ, LEFT, negX, RIGHT);
1922 copyCubeEdge(image, negX, LEFT, negZ, RIGHT);
1923 copyCubeEdge(image, negY, LEFT, negX, BOTTOM);
1924 copyCubeEdge(image, negZ, LEFT, posX, RIGHT);
1925
1926 // Compute corner colors
1927 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceLayers.aspectMask);
1928 vk::Format format = image->getFormat(aspect);
1929 VkSampleCountFlagBits samples = image->getSampleCountFlagBits();
1930 State state(format, format, samples, samples, Options{ 0xF });
1931
1932 if(samples != VK_SAMPLE_COUNT_1_BIT)
1933 {
1934 UNIMPLEMENTED("Multi-sampled cube: %d samples", static_cast<int>(samples));
1935 }
1936
1937 auto cornerUpdateRoutine = getCornerUpdateRoutine(state);
1938 if(!cornerUpdateRoutine)
1939 {
1940 return;
1941 }
1942
1943 VkExtent3D extent = image->getMipLevelExtent(aspect, subresourceLayers.mipLevel);
Ben Claytonfccfc562019-12-17 20:37:31 +00001944 CubeBorderData data = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001945 image->getTexelPointer({ 0, 0, 0 }, posX),
1946 image->rowPitchBytes(aspect, subresourceLayers.mipLevel),
1947 static_cast<uint32_t>(image->getLayerSize(aspect)),
1948 extent.width
1949 };
1950 cornerUpdateRoutine(&data);
1951}
1952
Ben Claytonfccfc562019-12-17 20:37:31 +00001953void Blitter::copyCubeEdge(vk::Image *image,
1954 const VkImageSubresourceLayers &dstSubresourceLayers, Edge dstEdge,
1955 const VkImageSubresourceLayers &srcSubresourceLayers, Edge srcEdge)
Nicolas Capens157ba262019-12-10 17:49:14 -05001956{
1957 ASSERT(srcSubresourceLayers.aspectMask == dstSubresourceLayers.aspectMask);
1958 ASSERT(srcSubresourceLayers.mipLevel == dstSubresourceLayers.mipLevel);
1959 ASSERT(srcSubresourceLayers.baseArrayLayer != dstSubresourceLayers.baseArrayLayer);
1960 ASSERT(srcSubresourceLayers.layerCount == 1);
1961 ASSERT(dstSubresourceLayers.layerCount == 1);
1962
1963 // Figure out if the edges to be copied in reverse order respectively from one another
1964 // The copy should be reversed whenever the same edges are contiguous or if we're
1965 // copying top <-> right or bottom <-> left. This is explained by the layout, which is:
1966 //
1967 // | +y |
1968 // | -x | +z | +x | -z |
1969 // | -y |
1970
1971 bool reverse = (srcEdge == dstEdge) ||
1972 ((srcEdge == TOP) && (dstEdge == RIGHT)) ||
1973 ((srcEdge == RIGHT) && (dstEdge == TOP)) ||
1974 ((srcEdge == BOTTOM) && (dstEdge == LEFT)) ||
1975 ((srcEdge == LEFT) && (dstEdge == BOTTOM));
1976
1977 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(srcSubresourceLayers.aspectMask);
1978 int bytes = image->getFormat(aspect).bytes();
1979 int pitchB = image->rowPitchBytes(aspect, srcSubresourceLayers.mipLevel);
1980
1981 VkExtent3D extent = image->getMipLevelExtent(aspect, srcSubresourceLayers.mipLevel);
1982 int w = extent.width;
1983 int h = extent.height;
1984 if(w != h)
1985 {
1986 UNSUPPORTED("Cube doesn't have square faces : (%d, %d)", w, h);
1987 }
1988
1989 // Src is expressed in the regular [0, width-1], [0, height-1] space
1990 bool srcHorizontal = ((srcEdge == TOP) || (srcEdge == BOTTOM));
1991 int srcDelta = srcHorizontal ? bytes : pitchB;
1992 VkOffset3D srcOffset = { (srcEdge == RIGHT) ? (w - 1) : 0, (srcEdge == BOTTOM) ? (h - 1) : 0, 0 };
1993
1994 // Dst contains borders, so it is expressed in the [-1, width], [-1, height] space
1995 bool dstHorizontal = ((dstEdge == TOP) || (dstEdge == BOTTOM));
1996 int dstDelta = (dstHorizontal ? bytes : pitchB) * (reverse ? -1 : 1);
1997 VkOffset3D dstOffset = { (dstEdge == RIGHT) ? w : -1, (dstEdge == BOTTOM) ? h : -1, 0 };
1998
1999 // Don't write in the corners
2000 if(dstHorizontal)
2001 {
2002 dstOffset.x += reverse ? w : 1;
2003 }
2004 else
2005 {
2006 dstOffset.y += reverse ? h : 1;
2007 }
2008
Ben Claytonfccfc562019-12-17 20:37:31 +00002009 const uint8_t *src = static_cast<const uint8_t *>(image->getTexelPointer(srcOffset, srcSubresourceLayers));
2010 uint8_t *dst = static_cast<uint8_t *>(image->getTexelPointer(dstOffset, dstSubresourceLayers));
Nicolas Capens157ba262019-12-10 17:49:14 -05002011 ASSERT((src < image->end()) && ((src + (w * srcDelta)) < image->end()));
2012 ASSERT((dst < image->end()) && ((dst + (w * dstDelta)) < image->end()));
2013
2014 for(int i = 0; i < w; ++i, dst += dstDelta, src += srcDelta)
2015 {
2016 memcpy(dst, src, bytes);
2017 }
2018}
2019
Ben Claytonfccfc562019-12-17 20:37:31 +00002020} // namespace sw