blob: fca6cbd4cbe9e888d5ab24a33004e30b28751797 [file] [log] [blame]
Nicolas Capens68a82382018-10-02 13:16:55 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Blitter.hpp"
16
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050017#include "Pipeline/ShaderCore.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040018#include "Reactor/Reactor.hpp"
Ben Clayton25e06e02020-02-07 11:19:08 +000019#include "System/Debug.hpp"
Nicolas Capens02cbe8e2019-08-05 15:10:05 -040020#include "System/Half.hpp"
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050021#include "System/Memory.hpp"
Ben Claytonfccfc562019-12-17 20:37:31 +000022#include "Vulkan/VkBuffer.hpp"
Alexis Hetu33642272019-03-01 11:55:59 -050023#include "Vulkan/VkImage.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040024
Nicolas Capensb8c63932019-03-19 01:52:40 -040025#include <utility>
26
Alexis Hetu3716c202019-12-19 17:09:08 -050027namespace {
28rr::RValue<rr::Int> PackFields(rr::Int4 const &ints, const sw::int4 shifts)
29{
30 return (rr::Int(ints.x) << shifts[0]) |
31 (rr::Int(ints.y) << shifts[1]) |
32 (rr::Int(ints.z) << shifts[2]) |
33 (rr::Int(ints.w) << shifts[3]);
34}
35} // namespace
36
Nicolas Capens157ba262019-12-10 17:49:14 -050037namespace sw {
38
Ben Claytonfccfc562019-12-17 20:37:31 +000039Blitter::Blitter()
40 : blitMutex()
41 , blitCache(1024)
42 , cornerUpdateMutex()
43 , cornerUpdateCache(64) // We only need one of these per format
Nicolas Capens68a82382018-10-02 13:16:55 -040044{
Nicolas Capens157ba262019-12-10 17:49:14 -050045}
46
47Blitter::~Blitter()
48{
49}
50
Ben Claytonfccfc562019-12-17 20:37:31 +000051void Blitter::clear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea)
Nicolas Capens157ba262019-12-10 17:49:14 -050052{
53 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
54 vk::Format dstFormat = viewFormat.getAspectFormat(aspect);
55 if(dstFormat == VK_FORMAT_UNDEFINED)
Nicolas Capens68a82382018-10-02 13:16:55 -040056 {
Nicolas Capens157ba262019-12-10 17:49:14 -050057 return;
Nicolas Capens68a82382018-10-02 13:16:55 -040058 }
59
Nicolas Capens157ba262019-12-10 17:49:14 -050060 float *pPixel = static_cast<float *>(pixel);
Alexis Hetu64da65b2020-05-12 16:38:35 -040061 if(viewFormat.isUnsignedNormalized() || viewFormat.isSRGBformat())
Nicolas Capens68a82382018-10-02 13:16:55 -040062 {
Nicolas Capens157ba262019-12-10 17:49:14 -050063 pPixel[0] = sw::clamp(pPixel[0], 0.0f, 1.0f);
64 pPixel[1] = sw::clamp(pPixel[1], 0.0f, 1.0f);
65 pPixel[2] = sw::clamp(pPixel[2], 0.0f, 1.0f);
66 pPixel[3] = sw::clamp(pPixel[3], 0.0f, 1.0f);
67 }
Nicolas Capens81bc9d92019-12-16 15:05:57 -050068 else if(viewFormat.isSignedNormalized())
Nicolas Capens157ba262019-12-10 17:49:14 -050069 {
70 pPixel[0] = sw::clamp(pPixel[0], -1.0f, 1.0f);
71 pPixel[1] = sw::clamp(pPixel[1], -1.0f, 1.0f);
72 pPixel[2] = sw::clamp(pPixel[2], -1.0f, 1.0f);
73 pPixel[3] = sw::clamp(pPixel[3], -1.0f, 1.0f);
Nicolas Capens68a82382018-10-02 13:16:55 -040074 }
75
Nicolas Capens157ba262019-12-10 17:49:14 -050076 if(fastClear(pixel, format, dest, dstFormat, subresourceRange, renderArea))
Alexis Hetu33642272019-03-01 11:55:59 -050077 {
Nicolas Capens157ba262019-12-10 17:49:14 -050078 return;
79 }
80
81 State state(format, dstFormat, 1, dest->getSampleCountFlagBits(), Options{ 0xF });
82 auto blitRoutine = getBlitRoutine(state);
83 if(!blitRoutine)
84 {
85 return;
86 }
87
Alexis Hetu46159712020-06-15 16:13:51 -040088 VkImageSubresource subres = {
Nicolas Capens157ba262019-12-10 17:49:14 -050089 subresourceRange.aspectMask,
90 subresourceRange.baseMipLevel,
Alexis Hetu46159712020-06-15 16:13:51 -040091 subresourceRange.baseArrayLayer
Nicolas Capens157ba262019-12-10 17:49:14 -050092 };
93
94 uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange);
95 uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange);
96
97 VkRect2D area = { { 0, 0 }, { 0, 0 } };
98 if(renderArea)
99 {
100 ASSERT(subresourceRange.levelCount == 1);
101 area = *renderArea;
102 }
103
Alexis Hetu46159712020-06-15 16:13:51 -0400104 for(; subres.mipLevel <= lastMipLevel; subres.mipLevel++)
Nicolas Capens157ba262019-12-10 17:49:14 -0500105 {
Alexis Hetu46159712020-06-15 16:13:51 -0400106 VkExtent3D extent = dest->getMipLevelExtent(aspect, subres.mipLevel);
Nicolas Capens157ba262019-12-10 17:49:14 -0500107 if(!renderArea)
Alexis Hetu33642272019-03-01 11:55:59 -0500108 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500109 area.extent.width = extent.width;
110 area.extent.height = extent.height;
Alexis Hetu33642272019-03-01 11:55:59 -0500111 }
112
Ben Claytonfccfc562019-12-17 20:37:31 +0000113 BlitData data = {
114 pixel, nullptr, // source, dest
Chris Forbes88289192019-08-28 16:49:36 -0700115
Alexis Hetu46159712020-06-15 16:13:51 -0400116 format.bytes(), // sPitchB
117 dest->rowPitchBytes(aspect, subres.mipLevel), // dPitchB
118 0, // sSliceB (unused in clear operations)
119 dest->slicePitchBytes(aspect, subres.mipLevel), // dSliceB
Alexis Hetu33642272019-03-01 11:55:59 -0500120
Alexis Hetu18daa812020-03-11 17:06:53 -0400121 0.5f, 0.5f, 0.5f, 0.0f, 0.0f, 0.0f, // x0, y0, z0, w, h, d
Alexis Hetu33642272019-03-01 11:55:59 -0500122
Ben Claytonfccfc562019-12-17 20:37:31 +0000123 area.offset.x, static_cast<int>(area.offset.x + area.extent.width), // x0d, x1d
Alexis Hetu18daa812020-03-11 17:06:53 -0400124 area.offset.y, static_cast<int>(area.offset.y + area.extent.height), // y0d, y1d
125 0, 1, // z0d, z1d
Nicolas Capens157ba262019-12-10 17:49:14 -0500126
Alexis Hetu18daa812020-03-11 17:06:53 -0400127 0, 0, 0, // sWidth, sHeight, sDepth
Ben Clayton21fb75f2020-04-16 10:36:55 +0100128
129 false, // filter3D
Alexis Hetu33642272019-03-01 11:55:59 -0500130 };
131
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500132 if(renderArea && dest->is3DSlice())
Alexis Hetu33642272019-03-01 11:55:59 -0500133 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500134 // Reinterpret layers as depth slices
Alexis Hetu46159712020-06-15 16:13:51 -0400135 subres.arrayLayer = 0;
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500136 for(uint32_t depth = subresourceRange.baseArrayLayer; depth <= lastLayer; depth++)
Alexis Hetu33642272019-03-01 11:55:59 -0500137 {
Alexis Hetu46159712020-06-15 16:13:51 -0400138 data.dest = dest->getTexelPointer({ 0, 0, static_cast<int32_t>(depth) }, subres);
Nicolas Capens157ba262019-12-10 17:49:14 -0500139 blitRoutine(&data);
Nicolas Capens68a82382018-10-02 13:16:55 -0400140 }
141 }
Nicolas Capens88ac3672019-08-01 13:22:34 -0400142 else
Nicolas Capens68a82382018-10-02 13:16:55 -0400143 {
Alexis Hetu46159712020-06-15 16:13:51 -0400144 for(subres.arrayLayer = subresourceRange.baseArrayLayer; subres.arrayLayer <= lastLayer; subres.arrayLayer++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400145 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500146 for(uint32_t depth = 0; depth < extent.depth; depth++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400147 {
Alexis Hetu46159712020-06-15 16:13:51 -0400148 data.dest = dest->getTexelPointer({ 0, 0, static_cast<int32_t>(depth) }, subres);
Nicolas Capens157ba262019-12-10 17:49:14 -0500149
150 blitRoutine(&data);
151 }
152 }
153 }
154 }
Alexis Hetu4f438a52020-06-15 16:13:51 -0400155 dest->contentsChanged(subresourceRange);
Nicolas Capens157ba262019-12-10 17:49:14 -0500156}
157
Nicolas Capensb3240d02020-06-10 22:40:19 -0400158bool Blitter::fastClear(void *clearValue, vk::Format clearFormat, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea)
Nicolas Capens157ba262019-12-10 17:49:14 -0500159{
Nicolas Capensb3240d02020-06-10 22:40:19 -0400160 if(clearFormat != VK_FORMAT_R32G32B32A32_SFLOAT &&
161 clearFormat != VK_FORMAT_D32_SFLOAT &&
162 clearFormat != VK_FORMAT_S8_UINT)
Nicolas Capens157ba262019-12-10 17:49:14 -0500163 {
164 return false;
165 }
166
Nicolas Capensb3240d02020-06-10 22:40:19 -0400167 union ClearValue
168 {
169 struct
170 {
171 float r;
172 float g;
173 float b;
174 float a;
175 };
Nicolas Capens157ba262019-12-10 17:49:14 -0500176
Nicolas Capensb3240d02020-06-10 22:40:19 -0400177 float rgb[3];
178
179 float d;
180 uint32_t d_as_u32;
181
182 uint32_t s;
183 };
184
185 ClearValue &c = *reinterpret_cast<ClearValue *>(clearValue);
186
187 uint32_t packed = 0;
Nicolas Capens157ba262019-12-10 17:49:14 -0500188
189 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
190 switch(viewFormat)
191 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000192 case VK_FORMAT_R5G6B5_UNORM_PACK16:
Nicolas Capensb3240d02020-06-10 22:40:19 -0400193 packed = ((uint16_t)(31 * c.b + 0.5f) << 0) |
194 ((uint16_t)(63 * c.g + 0.5f) << 5) |
195 ((uint16_t)(31 * c.r + 0.5f) << 11);
Ben Claytonfccfc562019-12-17 20:37:31 +0000196 break;
197 case VK_FORMAT_B5G6R5_UNORM_PACK16:
Nicolas Capensb3240d02020-06-10 22:40:19 -0400198 packed = ((uint16_t)(31 * c.r + 0.5f) << 0) |
199 ((uint16_t)(63 * c.g + 0.5f) << 5) |
200 ((uint16_t)(31 * c.b + 0.5f) << 11);
Ben Claytonfccfc562019-12-17 20:37:31 +0000201 break;
202 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
203 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
204 case VK_FORMAT_R8G8B8A8_UNORM:
Nicolas Capensb3240d02020-06-10 22:40:19 -0400205 packed = ((uint32_t)(255 * c.a + 0.5f) << 24) |
206 ((uint32_t)(255 * c.b + 0.5f) << 16) |
207 ((uint32_t)(255 * c.g + 0.5f) << 8) |
208 ((uint32_t)(255 * c.r + 0.5f) << 0);
Ben Claytonfccfc562019-12-17 20:37:31 +0000209 break;
210 case VK_FORMAT_B8G8R8A8_UNORM:
Nicolas Capensb3240d02020-06-10 22:40:19 -0400211 packed = ((uint32_t)(255 * c.a + 0.5f) << 24) |
212 ((uint32_t)(255 * c.r + 0.5f) << 16) |
213 ((uint32_t)(255 * c.g + 0.5f) << 8) |
214 ((uint32_t)(255 * c.b + 0.5f) << 0);
Ben Claytonfccfc562019-12-17 20:37:31 +0000215 break;
216 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
Nicolas Capensb3240d02020-06-10 22:40:19 -0400217 packed = R11G11B10F(c.rgb);
Ben Claytonfccfc562019-12-17 20:37:31 +0000218 break;
219 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
Nicolas Capensb3240d02020-06-10 22:40:19 -0400220 packed = RGB9E5(c.rgb);
221 break;
222 case VK_FORMAT_D32_SFLOAT:
223 ASSERT(clearFormat == VK_FORMAT_D32_SFLOAT);
224 packed = c.d_as_u32; // float reinterpreted as uint32
225 break;
226 case VK_FORMAT_S8_UINT:
227 ASSERT(clearFormat == VK_FORMAT_S8_UINT);
228 packed = static_cast<uint8_t>(c.s);
Ben Claytonfccfc562019-12-17 20:37:31 +0000229 break;
230 default:
231 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -0500232 }
233
Alexis Hetu46159712020-06-15 16:13:51 -0400234 VkImageSubresource subres = {
Nicolas Capens157ba262019-12-10 17:49:14 -0500235 subresourceRange.aspectMask,
236 subresourceRange.baseMipLevel,
Alexis Hetu46159712020-06-15 16:13:51 -0400237 subresourceRange.baseArrayLayer
Nicolas Capens157ba262019-12-10 17:49:14 -0500238 };
239 uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange);
240 uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange);
241
242 VkRect2D area = { { 0, 0 }, { 0, 0 } };
243 if(renderArea)
244 {
245 ASSERT(subresourceRange.levelCount == 1);
246 area = *renderArea;
247 }
248
Alexis Hetu46159712020-06-15 16:13:51 -0400249 for(; subres.mipLevel <= lastMipLevel; subres.mipLevel++)
Nicolas Capens157ba262019-12-10 17:49:14 -0500250 {
Alexis Hetu46159712020-06-15 16:13:51 -0400251 int rowPitchBytes = dest->rowPitchBytes(aspect, subres.mipLevel);
252 int slicePitchBytes = dest->slicePitchBytes(aspect, subres.mipLevel);
253 VkExtent3D extent = dest->getMipLevelExtent(aspect, subres.mipLevel);
Nicolas Capens157ba262019-12-10 17:49:14 -0500254 if(!renderArea)
255 {
256 area.extent.width = extent.width;
257 area.extent.height = extent.height;
258 }
259 if(dest->is3DSlice())
260 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000261 extent.depth = 1; // The 3D image is instead interpreted as a 2D image with layers
Nicolas Capens157ba262019-12-10 17:49:14 -0500262 }
263
Alexis Hetu46159712020-06-15 16:13:51 -0400264 for(subres.arrayLayer = subresourceRange.baseArrayLayer; subres.arrayLayer <= lastLayer; subres.arrayLayer++)
Nicolas Capens157ba262019-12-10 17:49:14 -0500265 {
266 for(uint32_t depth = 0; depth < extent.depth; depth++)
267 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000268 uint8_t *slice = (uint8_t *)dest->getTexelPointer(
Alexis Hetu46159712020-06-15 16:13:51 -0400269 { area.offset.x, area.offset.y, static_cast<int32_t>(depth) }, subres);
Nicolas Capens157ba262019-12-10 17:49:14 -0500270
271 for(int j = 0; j < dest->getSampleCountFlagBits(); j++)
272 {
273 uint8_t *d = slice;
274
275 switch(viewFormat.bytes())
276 {
Nicolas Capensb3240d02020-06-10 22:40:19 -0400277 case 4:
278 for(uint32_t i = 0; i < area.extent.height; i++)
279 {
280 ASSERT(d < dest->end());
281 sw::clear((uint32_t *)d, packed, area.extent.width);
282 d += rowPitchBytes;
283 }
284 break;
Ben Claytonfccfc562019-12-17 20:37:31 +0000285 case 2:
286 for(uint32_t i = 0; i < area.extent.height; i++)
287 {
288 ASSERT(d < dest->end());
289 sw::clear((uint16_t *)d, static_cast<uint16_t>(packed), area.extent.width);
290 d += rowPitchBytes;
291 }
292 break;
Nicolas Capensb3240d02020-06-10 22:40:19 -0400293 case 1:
Ben Claytonfccfc562019-12-17 20:37:31 +0000294 for(uint32_t i = 0; i < area.extent.height; i++)
295 {
296 ASSERT(d < dest->end());
Nicolas Capensb3240d02020-06-10 22:40:19 -0400297 memset(d, packed, area.extent.width);
Ben Claytonfccfc562019-12-17 20:37:31 +0000298 d += rowPitchBytes;
299 }
300 break;
301 default:
302 assert(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500303 }
304
305 slice += slicePitchBytes;
306 }
307 }
308 }
309 }
Alexis Hetu4f438a52020-06-15 16:13:51 -0400310 dest->contentsChanged(subresourceRange);
Nicolas Capens157ba262019-12-10 17:49:14 -0500311
312 return true;
313}
314
315Float4 Blitter::readFloat4(Pointer<Byte> element, const State &state)
316{
317 Float4 c(0.0f, 0.0f, 0.0f, 1.0f);
318
319 switch(state.sourceFormat)
320 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000321 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
322 c.w = Float(Int(*Pointer<Byte>(element)) & Int(0xF));
323 c.x = Float((Int(*Pointer<Byte>(element)) >> 4) & Int(0xF));
324 c.y = Float(Int(*Pointer<Byte>(element + 1)) & Int(0xF));
325 c.z = Float((Int(*Pointer<Byte>(element + 1)) >> 4) & Int(0xF));
326 break;
327 case VK_FORMAT_R8_SINT:
328 case VK_FORMAT_R8_SNORM:
329 c.x = Float(Int(*Pointer<SByte>(element)));
330 c.w = float(0x7F);
331 break;
332 case VK_FORMAT_R8_UNORM:
333 case VK_FORMAT_R8_UINT:
334 case VK_FORMAT_R8_SRGB:
335 c.x = Float(Int(*Pointer<Byte>(element)));
336 c.w = float(0xFF);
337 break;
338 case VK_FORMAT_R16_SINT:
339 case VK_FORMAT_R16_SNORM:
340 c.x = Float(Int(*Pointer<Short>(element)));
341 c.w = float(0x7FFF);
342 break;
343 case VK_FORMAT_R16_UNORM:
344 case VK_FORMAT_R16_UINT:
345 c.x = Float(Int(*Pointer<UShort>(element)));
346 c.w = float(0xFFFF);
347 break;
348 case VK_FORMAT_R32_SINT:
349 c.x = Float(*Pointer<Int>(element));
350 c.w = float(0x7FFFFFFF);
351 break;
352 case VK_FORMAT_R32_UINT:
353 c.x = Float(*Pointer<UInt>(element));
354 c.w = float(0xFFFFFFFF);
355 break;
356 case VK_FORMAT_B8G8R8A8_SRGB:
357 case VK_FORMAT_B8G8R8A8_UNORM:
358 c = Float4(*Pointer<Byte4>(element)).zyxw;
359 break;
360 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
361 case VK_FORMAT_R8G8B8A8_SINT:
362 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
363 case VK_FORMAT_R8G8B8A8_SNORM:
364 c = Float4(*Pointer<SByte4>(element));
365 break;
366 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
367 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
368 case VK_FORMAT_R8G8B8A8_UNORM:
369 case VK_FORMAT_R8G8B8A8_UINT:
370 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
371 case VK_FORMAT_R8G8B8A8_SRGB:
372 c = Float4(*Pointer<Byte4>(element));
373 break;
374 case VK_FORMAT_R16G16B16A16_SINT:
Nicolas Capensf6f11212020-07-01 00:27:23 -0400375 case VK_FORMAT_R16G16B16A16_SNORM:
Ben Claytonfccfc562019-12-17 20:37:31 +0000376 c = Float4(*Pointer<Short4>(element));
377 break;
378 case VK_FORMAT_R16G16B16A16_UNORM:
379 case VK_FORMAT_R16G16B16A16_UINT:
380 c = Float4(*Pointer<UShort4>(element));
381 break;
382 case VK_FORMAT_R32G32B32A32_SINT:
383 c = Float4(*Pointer<Int4>(element));
384 break;
385 case VK_FORMAT_R32G32B32A32_UINT:
386 c = Float4(*Pointer<UInt4>(element));
387 break;
388 case VK_FORMAT_R8G8_SINT:
389 case VK_FORMAT_R8G8_SNORM:
390 c.x = Float(Int(*Pointer<SByte>(element + 0)));
391 c.y = Float(Int(*Pointer<SByte>(element + 1)));
392 c.w = float(0x7F);
393 break;
394 case VK_FORMAT_R8G8_UNORM:
395 case VK_FORMAT_R8G8_UINT:
396 case VK_FORMAT_R8G8_SRGB:
397 c.x = Float(Int(*Pointer<Byte>(element + 0)));
398 c.y = Float(Int(*Pointer<Byte>(element + 1)));
399 c.w = float(0xFF);
400 break;
401 case VK_FORMAT_R16G16_SINT:
402 case VK_FORMAT_R16G16_SNORM:
403 c.x = Float(Int(*Pointer<Short>(element + 0)));
404 c.y = Float(Int(*Pointer<Short>(element + 2)));
405 c.w = float(0x7FFF);
406 break;
407 case VK_FORMAT_R16G16_UNORM:
408 case VK_FORMAT_R16G16_UINT:
409 c.x = Float(Int(*Pointer<UShort>(element + 0)));
410 c.y = Float(Int(*Pointer<UShort>(element + 2)));
411 c.w = float(0xFFFF);
412 break;
413 case VK_FORMAT_R32G32_SINT:
414 c.x = Float(*Pointer<Int>(element + 0));
415 c.y = Float(*Pointer<Int>(element + 4));
416 c.w = float(0x7FFFFFFF);
417 break;
418 case VK_FORMAT_R32G32_UINT:
419 c.x = Float(*Pointer<UInt>(element + 0));
420 c.y = Float(*Pointer<UInt>(element + 4));
421 c.w = float(0xFFFFFFFF);
422 break;
423 case VK_FORMAT_R32G32B32A32_SFLOAT:
424 c = *Pointer<Float4>(element);
425 break;
426 case VK_FORMAT_R32G32_SFLOAT:
427 c.x = *Pointer<Float>(element + 0);
428 c.y = *Pointer<Float>(element + 4);
429 break;
430 case VK_FORMAT_R32_SFLOAT:
431 c.x = *Pointer<Float>(element);
432 break;
433 case VK_FORMAT_R16G16B16A16_SFLOAT:
434 c.w = Float(*Pointer<Half>(element + 6));
435 case VK_FORMAT_R16G16B16_SFLOAT:
436 c.z = Float(*Pointer<Half>(element + 4));
437 case VK_FORMAT_R16G16_SFLOAT:
438 c.y = Float(*Pointer<Half>(element + 2));
439 case VK_FORMAT_R16_SFLOAT:
440 c.x = Float(*Pointer<Half>(element));
441 break;
442 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
443 c = r11g11b10Unpack(*Pointer<UInt>(element));
444 break;
445 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
446 // This type contains a common 5 bit exponent (E) and a 9 bit the mantissa for R, G and B.
447 c.x = Float(*Pointer<UInt>(element) & UInt(0x000001FF)); // R's mantissa (bits 0-8)
448 c.y = Float((*Pointer<UInt>(element) & UInt(0x0003FE00)) >> 9); // G's mantissa (bits 9-17)
449 c.z = Float((*Pointer<UInt>(element) & UInt(0x07FC0000)) >> 18); // B's mantissa (bits 18-26)
450 c *= Float4(
451 // 2^E, using the exponent (bits 27-31) and treating it as an unsigned integer value
452 Float(UInt(1) << ((*Pointer<UInt>(element) & UInt(0xF8000000)) >> 27)) *
453 // Since the 9 bit mantissa values currently stored in RGB were converted straight
454 // from int to float (in the [0, 1<<9] range instead of the [0, 1] range), they
455 // are (1 << 9) times too high.
456 // Also, the exponent has 5 bits and we compute the exponent bias of floating point
457 // formats using "2^(k-1) - 1", so, in this case, the exponent bias is 2^(5-1)-1 = 15
458 // Exponent bias (15) + number of mantissa bits per component (9) = 24
459 Float(1.0f / (1 << 24)));
460 c.w = 1.0f;
461 break;
462 case VK_FORMAT_R5G6B5_UNORM_PACK16:
463 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
464 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
465 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
466 break;
467 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
468 c.w = Float(Int((*Pointer<UShort>(element) & UShort(0x8000)) >> UShort(15)));
469 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0x7C00)) >> UShort(10)));
470 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x03E0)) >> UShort(5)));
471 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
472 break;
473 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
474 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
475 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
476 c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
477 c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
478 c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
479 break;
Alexis Hetub8a61bf2020-01-09 15:26:34 -0500480 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
481 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
482 c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
483 c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
484 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
485 c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
486 break;
Ben Claytonfccfc562019-12-17 20:37:31 +0000487 case VK_FORMAT_D16_UNORM:
488 c.x = Float(Int((*Pointer<UShort>(element))));
489 break;
490 case VK_FORMAT_X8_D24_UNORM_PACK32:
491 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0xFFFFFF00)) >> 8));
492 break;
493 case VK_FORMAT_D32_SFLOAT:
494 c.x = *Pointer<Float>(element);
495 break;
496 case VK_FORMAT_S8_UINT:
497 c.x = Float(Int(*Pointer<Byte>(element)));
498 break;
499 default:
500 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -0500501 }
502
503 return c;
504}
505
506void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
507{
508 bool writeR = state.writeRed;
509 bool writeG = state.writeGreen;
510 bool writeB = state.writeBlue;
511 bool writeA = state.writeAlpha;
512 bool writeRGBA = writeR && writeG && writeB && writeA;
513
514 switch(state.destFormat)
515 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000516 case VK_FORMAT_R4G4_UNORM_PACK8:
517 if(writeR | writeG)
Nicolas Capens157ba262019-12-10 17:49:14 -0500518 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000519 if(!writeR)
520 {
521 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
522 (*Pointer<Byte>(element) & Byte(0xF0));
523 }
524 else if(!writeG)
525 {
526 *Pointer<Byte>(element) = (*Pointer<Byte>(element) & Byte(0xF)) |
527 (Byte(RoundInt(Float(c.x))) << Byte(4));
528 }
529 else
530 {
531 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
532 (Byte(RoundInt(Float(c.x))) << Byte(4));
533 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500534 }
Ben Claytonfccfc562019-12-17 20:37:31 +0000535 break;
536 case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
537 if(writeR || writeG || writeB || writeA)
Nicolas Capens157ba262019-12-10 17:49:14 -0500538 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000539 *Pointer<UShort>(element) = (writeR ? ((UShort(RoundInt(Float(c.x))) & UShort(0xF)) << UShort(12)) : (*Pointer<UShort>(element) & UShort(0x000F))) |
540 (writeG ? ((UShort(RoundInt(Float(c.y))) & UShort(0xF)) << UShort(8)) : (*Pointer<UShort>(element) & UShort(0x00F0))) |
541 (writeB ? ((UShort(RoundInt(Float(c.z))) & UShort(0xF)) << UShort(4)) : (*Pointer<UShort>(element) & UShort(0x0F00))) |
542 (writeA ? (UShort(RoundInt(Float(c.w))) & UShort(0xF)) : (*Pointer<UShort>(element) & UShort(0xF000)));
543 }
544 break;
545 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
546 if(writeRGBA)
547 {
548 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) & Int(0xF)) |
549 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
550 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
551 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12);
Nicolas Capens157ba262019-12-10 17:49:14 -0500552 }
553 else
554 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000555 unsigned short mask = (writeA ? 0x000F : 0x0000) |
556 (writeR ? 0x00F0 : 0x0000) |
557 (writeG ? 0x0F00 : 0x0000) |
558 (writeB ? 0xF000 : 0x0000);
559 unsigned short unmask = ~mask;
560 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
561 ((UShort(RoundInt(Float(c.w)) & Int(0xF)) |
562 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
563 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
564 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12)) &
565 UShort(mask));
Nicolas Capens157ba262019-12-10 17:49:14 -0500566 }
Ben Claytonfccfc562019-12-17 20:37:31 +0000567 break;
568 case VK_FORMAT_B8G8R8A8_SRGB:
569 case VK_FORMAT_B8G8R8A8_UNORM:
570 if(writeRGBA)
571 {
572 Short4 c0 = RoundShort4(c.zyxw);
573 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
574 }
575 else
576 {
577 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
578 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
579 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
580 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
581 }
582 break;
583 case VK_FORMAT_B8G8R8_SNORM:
584 if(writeB) { *Pointer<SByte>(element + 0) = SByte(RoundInt(Float(c.z))); }
585 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
586 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
587 break;
588 case VK_FORMAT_B8G8R8_UNORM:
589 case VK_FORMAT_B8G8R8_SRGB:
Nicolas Capens157ba262019-12-10 17:49:14 -0500590 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
591 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
592 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000593 break;
594 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
595 case VK_FORMAT_R8G8B8A8_UNORM:
596 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
597 case VK_FORMAT_R8G8B8A8_SRGB:
598 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
599 case VK_FORMAT_R8G8B8A8_UINT:
600 case VK_FORMAT_R8G8B8A8_USCALED:
601 case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
602 if(writeRGBA)
603 {
604 Short4 c0 = RoundShort4(c);
605 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
606 }
607 else
608 {
609 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
610 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
611 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
612 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
613 }
614 break;
615 case VK_FORMAT_R32G32B32A32_SFLOAT:
616 if(writeRGBA)
617 {
618 *Pointer<Float4>(element) = c;
619 }
620 else
621 {
622 if(writeR) { *Pointer<Float>(element) = c.x; }
623 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
624 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
625 if(writeA) { *Pointer<Float>(element + 12) = c.w; }
626 }
627 break;
628 case VK_FORMAT_R32G32B32_SFLOAT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500629 if(writeR) { *Pointer<Float>(element) = c.x; }
630 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
631 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
Ben Claytonfccfc562019-12-17 20:37:31 +0000632 break;
633 case VK_FORMAT_R32G32_SFLOAT:
634 if(writeR && writeG)
635 {
636 *Pointer<Float2>(element) = Float2(c);
637 }
638 else
639 {
640 if(writeR) { *Pointer<Float>(element) = c.x; }
641 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
642 }
643 break;
644 case VK_FORMAT_R32_SFLOAT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500645 if(writeR) { *Pointer<Float>(element) = c.x; }
Ben Claytonfccfc562019-12-17 20:37:31 +0000646 break;
647 case VK_FORMAT_R16G16B16A16_SFLOAT:
648 if(writeA) { *Pointer<Half>(element + 6) = Half(c.w); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500649 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000650 case VK_FORMAT_R16G16B16_SFLOAT:
651 if(writeB) { *Pointer<Half>(element + 4) = Half(c.z); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500652 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000653 case VK_FORMAT_R16G16_SFLOAT:
654 if(writeG) { *Pointer<Half>(element + 2) = Half(c.y); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500655 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000656 case VK_FORMAT_R16_SFLOAT:
657 if(writeR) { *Pointer<Half>(element) = Half(c.x); }
658 break;
659 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
Nicolas Capens157ba262019-12-10 17:49:14 -0500660 {
Alexis Hetu24c49dd2019-12-13 16:32:43 -0500661 UInt rgb = r11g11b10Pack(c);
Nicolas Capens157ba262019-12-10 17:49:14 -0500662
663 UInt old = *Pointer<UInt>(element);
664
665 unsigned int mask = (writeR ? 0x000007FF : 0) |
666 (writeG ? 0x003FF800 : 0) |
667 (writeB ? 0xFFC00000 : 0);
668
669 *Pointer<UInt>(element) = (rgb & mask) | (old & ~mask);
670 }
671 break;
Ben Claytonfccfc562019-12-17 20:37:31 +0000672 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
Nicolas Capens157ba262019-12-10 17:49:14 -0500673 {
674 ASSERT(writeRGBA); // Can't sensibly write just part of this format.
675
676 // Vulkan 1.1.117 section 15.2.1 RGB to Shared Exponent Conversion
677
678 constexpr int N = 9; // number of mantissa bits per component
679 constexpr int B = 15; // exponent bias
680 constexpr int E_max = 31; // maximum possible biased exponent value
681
682 // Maximum representable value.
683 constexpr float sharedexp_max = ((static_cast<float>(1 << N) - 1) / static_cast<float>(1 << N)) * static_cast<float>(1 << (E_max - B));
684
685 // Clamp components to valid range. NaN becomes 0.
Ben Claytonfccfc562019-12-17 20:37:31 +0000686 Float red_c = Min(IfThenElse(!(c.x > 0), Float(0), Float(c.x)), sharedexp_max);
Nicolas Capens157ba262019-12-10 17:49:14 -0500687 Float green_c = Min(IfThenElse(!(c.y > 0), Float(0), Float(c.y)), sharedexp_max);
Ben Claytonfccfc562019-12-17 20:37:31 +0000688 Float blue_c = Min(IfThenElse(!(c.z > 0), Float(0), Float(c.z)), sharedexp_max);
Nicolas Capens157ba262019-12-10 17:49:14 -0500689
690 // We're reducing the mantissa to 9 bits, so we must round up if the next
691 // bit is 1. In other words add 0.5 to the new mantissa's position and
692 // allow overflow into the exponent so we can scale correctly.
693 constexpr int half = 1 << (23 - N);
694 Float red_r = As<Float>(As<Int>(red_c) + half);
695 Float green_r = As<Float>(As<Int>(green_c) + half);
696 Float blue_r = As<Float>(As<Int>(blue_c) + half);
697
698 // The largest component determines the shared exponent. It can't be lower
699 // than 0 (after bias subtraction) so also limit to the mimimum representable.
700 constexpr float min_s = 0.5f / (1 << B);
701 Float max_s = Max(Max(red_r, green_r), Max(blue_r, min_s));
702
703 // Obtain the reciprocal of the shared exponent by inverting the bits,
704 // and scale by the new mantissa's size. Note that the IEEE-754 single-precision
705 // format has an implicit leading 1, but this shared component format does not.
706 Float scale = As<Float>((As<Int>(max_s) & 0x7F800000) ^ 0x7F800000) * (1 << (N - 2));
707
708 UInt R9 = RoundInt(red_c * scale);
709 UInt G9 = UInt(RoundInt(green_c * scale));
710 UInt B9 = UInt(RoundInt(blue_c * scale));
711 UInt E5 = (As<UInt>(max_s) >> 23) - 127 + 15 + 1;
712
713 UInt E5B9G9R9 = (E5 << 27) | (B9 << 18) | (G9 << 9) | R9;
714
715 *Pointer<UInt>(element) = E5B9G9R9;
716 }
717 break;
Ben Claytonfccfc562019-12-17 20:37:31 +0000718 case VK_FORMAT_B8G8R8A8_SNORM:
719 if(writeB) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.z))); }
720 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
721 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
722 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
723 break;
724 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
725 case VK_FORMAT_R8G8B8A8_SINT:
726 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
727 case VK_FORMAT_R8G8B8A8_SNORM:
728 case VK_FORMAT_R8G8B8A8_SSCALED:
729 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
730 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500731 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000732 case VK_FORMAT_R8G8B8_SINT:
733 case VK_FORMAT_R8G8B8_SNORM:
734 case VK_FORMAT_R8G8B8_SSCALED:
735 if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500736 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000737 case VK_FORMAT_R8G8_SINT:
738 case VK_FORMAT_R8G8_SNORM:
739 case VK_FORMAT_R8G8_SSCALED:
740 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500741 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000742 case VK_FORMAT_R8_SINT:
743 case VK_FORMAT_R8_SNORM:
744 case VK_FORMAT_R8_SSCALED:
745 if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
746 break;
747 case VK_FORMAT_R8G8B8_UINT:
748 case VK_FORMAT_R8G8B8_UNORM:
749 case VK_FORMAT_R8G8B8_USCALED:
750 case VK_FORMAT_R8G8B8_SRGB:
751 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500752 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000753 case VK_FORMAT_R8G8_UINT:
754 case VK_FORMAT_R8G8_UNORM:
755 case VK_FORMAT_R8G8_USCALED:
756 case VK_FORMAT_R8G8_SRGB:
757 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500758 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000759 case VK_FORMAT_R8_UINT:
760 case VK_FORMAT_R8_UNORM:
761 case VK_FORMAT_R8_USCALED:
762 case VK_FORMAT_R8_SRGB:
763 if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
764 break;
765 case VK_FORMAT_R16G16B16A16_SINT:
766 case VK_FORMAT_R16G16B16A16_SNORM:
767 case VK_FORMAT_R16G16B16A16_SSCALED:
768 if(writeRGBA)
769 {
770 *Pointer<Short4>(element) = Short4(RoundInt(c));
771 }
772 else
773 {
774 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
775 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
776 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
777 if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
778 }
779 break;
780 case VK_FORMAT_R16G16B16_SINT:
781 case VK_FORMAT_R16G16B16_SNORM:
782 case VK_FORMAT_R16G16B16_SSCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500783 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
784 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
785 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000786 break;
787 case VK_FORMAT_R16G16_SINT:
788 case VK_FORMAT_R16G16_SNORM:
789 case VK_FORMAT_R16G16_SSCALED:
790 if(writeR && writeG)
791 {
792 *Pointer<Short2>(element) = Short2(Short4(RoundInt(c)));
793 }
794 else
795 {
796 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
797 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
798 }
799 break;
800 case VK_FORMAT_R16_SINT:
801 case VK_FORMAT_R16_SNORM:
802 case VK_FORMAT_R16_SSCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500803 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000804 break;
805 case VK_FORMAT_R16G16B16A16_UINT:
806 case VK_FORMAT_R16G16B16A16_UNORM:
807 case VK_FORMAT_R16G16B16A16_USCALED:
808 if(writeRGBA)
809 {
810 *Pointer<UShort4>(element) = UShort4(RoundInt(c));
811 }
812 else
813 {
814 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
815 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
816 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
817 if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
818 }
819 break;
820 case VK_FORMAT_R16G16B16_UINT:
821 case VK_FORMAT_R16G16B16_UNORM:
822 case VK_FORMAT_R16G16B16_USCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500823 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
824 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
825 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000826 break;
827 case VK_FORMAT_R16G16_UINT:
828 case VK_FORMAT_R16G16_UNORM:
829 case VK_FORMAT_R16G16_USCALED:
830 if(writeR && writeG)
831 {
832 *Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c)));
833 }
834 else
835 {
836 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
837 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
838 }
839 break;
840 case VK_FORMAT_R16_UINT:
841 case VK_FORMAT_R16_UNORM:
842 case VK_FORMAT_R16_USCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500843 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000844 break;
845 case VK_FORMAT_R32G32B32A32_SINT:
846 if(writeRGBA)
847 {
848 *Pointer<Int4>(element) = RoundInt(c);
849 }
850 else
851 {
852 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
853 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
854 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
855 if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
856 }
857 break;
858 case VK_FORMAT_R32G32B32_SINT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500859 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500860 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000861 case VK_FORMAT_R32G32_SINT:
862 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500863 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000864 case VK_FORMAT_R32_SINT:
865 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
866 break;
867 case VK_FORMAT_R32G32B32A32_UINT:
868 if(writeRGBA)
869 {
870 *Pointer<UInt4>(element) = UInt4(RoundInt(c));
871 }
872 else
873 {
874 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
875 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
876 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
877 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
878 }
879 break;
880 case VK_FORMAT_R32G32B32_UINT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500881 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500882 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000883 case VK_FORMAT_R32G32_UINT:
884 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500885 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000886 case VK_FORMAT_R32_UINT:
887 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
888 break;
889 case VK_FORMAT_R5G6B5_UNORM_PACK16:
890 if(writeR && writeG && writeB)
891 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500892 *Pointer<UShort>(element) = UShort(PackFields(RoundInt(c.xyzz), { 11, 5, 0, 0 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000893 }
894 else
895 {
896 unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
897 unsigned short unmask = ~mask;
898 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500899 (UShort(PackFields(RoundInt(c.xyzz), { 11, 5, 0, 0 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000900 UShort(mask));
901 }
902 break;
903 case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
904 if(writeRGBA)
905 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500906 *Pointer<UShort>(element) = UShort(PackFields(RoundInt(c), { 11, 6, 1, 0 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000907 }
908 else
909 {
910 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
911 (writeR ? 0x7C00 : 0x0000) |
912 (writeG ? 0x03E0 : 0x0000) |
913 (writeB ? 0x001F : 0x0000);
914 unsigned short unmask = ~mask;
915 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500916 (UShort(PackFields(RoundInt(c), { 11, 6, 1, 0 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000917 UShort(mask));
918 }
919 break;
920 case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
921 if(writeRGBA)
922 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500923 *Pointer<UShort>(element) = UShort(PackFields(RoundInt(c), { 1, 6, 11, 0 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000924 }
925 else
926 {
927 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
928 (writeR ? 0x7C00 : 0x0000) |
929 (writeG ? 0x03E0 : 0x0000) |
930 (writeB ? 0x001F : 0x0000);
931 unsigned short unmask = ~mask;
932 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500933 (UShort(PackFields(RoundInt(c), { 1, 6, 11, 0 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000934 UShort(mask));
935 }
936 break;
937 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
938 if(writeRGBA)
939 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500940 *Pointer<UShort>(element) = UShort(PackFields(RoundInt(c), { 10, 5, 0, 15 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000941 }
942 else
943 {
944 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
945 (writeR ? 0x7C00 : 0x0000) |
946 (writeG ? 0x03E0 : 0x0000) |
947 (writeB ? 0x001F : 0x0000);
948 unsigned short unmask = ~mask;
949 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500950 (UShort(PackFields(RoundInt(c), { 10, 5, 0, 15 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000951 UShort(mask));
952 }
953 break;
954 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
955 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
956 case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
957 if(writeRGBA)
958 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500959 *Pointer<UInt>(element) = As<UInt>(PackFields(RoundInt(c), { 0, 10, 20, 30 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000960 }
961 else
962 {
963 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
964 (writeB ? 0x3FF00000 : 0x0000) |
965 (writeG ? 0x000FFC00 : 0x0000) |
966 (writeR ? 0x000003FF : 0x0000);
967 unsigned int unmask = ~mask;
968 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500969 (As<UInt>(PackFields(RoundInt(c), { 0, 10, 20, 30 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000970 UInt(mask));
971 }
972 break;
973 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
974 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
975 case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
976 if(writeRGBA)
977 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500978 *Pointer<UInt>(element) = As<UInt>(PackFields(RoundInt(c), { 20, 10, 0, 30 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000979 }
980 else
981 {
982 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
983 (writeR ? 0x3FF00000 : 0x0000) |
984 (writeG ? 0x000FFC00 : 0x0000) |
985 (writeB ? 0x000003FF : 0x0000);
986 unsigned int unmask = ~mask;
987 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500988 (As<UInt>(PackFields(RoundInt(c), { 20, 10, 0, 30 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000989 UInt(mask));
990 }
991 break;
992 case VK_FORMAT_D16_UNORM:
993 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x)));
994 break;
995 case VK_FORMAT_X8_D24_UNORM_PACK32:
996 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) << 8);
997 break;
998 case VK_FORMAT_D32_SFLOAT:
999 *Pointer<Float>(element) = c.x;
1000 break;
1001 case VK_FORMAT_S8_UINT:
1002 *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
1003 break;
1004 default:
1005 UNSUPPORTED("Blitter destination format %d", (int)state.destFormat);
1006 break;
Nicolas Capens157ba262019-12-10 17:49:14 -05001007 }
1008}
1009
1010Int4 Blitter::readInt4(Pointer<Byte> element, const State &state)
1011{
1012 Int4 c(0, 0, 0, 1);
1013
1014 switch(state.sourceFormat)
1015 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001016 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1017 case VK_FORMAT_R8G8B8A8_SINT:
1018 c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
1019 c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001020 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001021 case VK_FORMAT_R8G8_SINT:
1022 c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001023 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001024 case VK_FORMAT_R8_SINT:
1025 c = Insert(c, Int(*Pointer<SByte>(element)), 0);
1026 break;
1027 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1028 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000003FF))), 0);
1029 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10), 1);
1030 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20), 2);
1031 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30), 3);
1032 break;
Alexis Hetub8a61bf2020-01-09 15:26:34 -05001033 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1034 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000003FF))), 2);
1035 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10), 1);
1036 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20), 0);
1037 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30), 3);
1038 break;
Ben Claytonfccfc562019-12-17 20:37:31 +00001039 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1040 case VK_FORMAT_R8G8B8A8_UINT:
1041 c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
1042 c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001043 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001044 case VK_FORMAT_R8G8_UINT:
1045 c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001046 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001047 case VK_FORMAT_R8_UINT:
1048 case VK_FORMAT_S8_UINT:
1049 c = Insert(c, Int(*Pointer<Byte>(element)), 0);
1050 break;
1051 case VK_FORMAT_R16G16B16A16_SINT:
1052 c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
1053 c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001054 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001055 case VK_FORMAT_R16G16_SINT:
1056 c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001057 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001058 case VK_FORMAT_R16_SINT:
1059 c = Insert(c, Int(*Pointer<Short>(element)), 0);
1060 break;
1061 case VK_FORMAT_R16G16B16A16_UINT:
1062 c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
1063 c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001064 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001065 case VK_FORMAT_R16G16_UINT:
1066 c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001067 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001068 case VK_FORMAT_R16_UINT:
1069 c = Insert(c, Int(*Pointer<UShort>(element)), 0);
1070 break;
1071 case VK_FORMAT_R32G32B32A32_SINT:
1072 case VK_FORMAT_R32G32B32A32_UINT:
1073 c = *Pointer<Int4>(element);
1074 break;
1075 case VK_FORMAT_R32G32_SINT:
1076 case VK_FORMAT_R32G32_UINT:
1077 c = Insert(c, *Pointer<Int>(element + 4), 1);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001078 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001079 case VK_FORMAT_R32_SINT:
1080 case VK_FORMAT_R32_UINT:
1081 c = Insert(c, *Pointer<Int>(element), 0);
1082 break;
1083 default:
1084 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -05001085 }
1086
1087 return c;
1088}
1089
1090void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
1091{
1092 bool writeR = state.writeRed;
1093 bool writeG = state.writeGreen;
1094 bool writeB = state.writeBlue;
1095 bool writeA = state.writeAlpha;
1096 bool writeRGBA = writeR && writeG && writeB && writeA;
1097
1098 switch(state.destFormat)
1099 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001100 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
Alexis Hetub8a61bf2020-01-09 15:26:34 -05001101 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
Ben Claytonfccfc562019-12-17 20:37:31 +00001102 c = Min(As<UInt4>(c), UInt4(0x03FF, 0x03FF, 0x03FF, 0x0003));
1103 break;
1104 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1105 case VK_FORMAT_R8G8B8A8_UINT:
1106 case VK_FORMAT_R8G8B8_UINT:
1107 case VK_FORMAT_R8G8_UINT:
1108 case VK_FORMAT_R8_UINT:
1109 case VK_FORMAT_R8G8B8A8_USCALED:
1110 case VK_FORMAT_R8G8B8_USCALED:
1111 case VK_FORMAT_R8G8_USCALED:
1112 case VK_FORMAT_R8_USCALED:
1113 case VK_FORMAT_S8_UINT:
1114 c = Min(As<UInt4>(c), UInt4(0xFF));
1115 break;
1116 case VK_FORMAT_R16G16B16A16_UINT:
1117 case VK_FORMAT_R16G16B16_UINT:
1118 case VK_FORMAT_R16G16_UINT:
1119 case VK_FORMAT_R16_UINT:
1120 case VK_FORMAT_R16G16B16A16_USCALED:
1121 case VK_FORMAT_R16G16B16_USCALED:
1122 case VK_FORMAT_R16G16_USCALED:
1123 case VK_FORMAT_R16_USCALED:
1124 c = Min(As<UInt4>(c), UInt4(0xFFFF));
1125 break;
1126 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1127 case VK_FORMAT_R8G8B8A8_SINT:
1128 case VK_FORMAT_R8G8_SINT:
1129 case VK_FORMAT_R8_SINT:
1130 case VK_FORMAT_R8G8B8A8_SSCALED:
1131 case VK_FORMAT_R8G8B8_SSCALED:
1132 case VK_FORMAT_R8G8_SSCALED:
1133 case VK_FORMAT_R8_SSCALED:
1134 c = Min(Max(c, Int4(-0x80)), Int4(0x7F));
1135 break;
1136 case VK_FORMAT_R16G16B16A16_SINT:
1137 case VK_FORMAT_R16G16B16_SINT:
1138 case VK_FORMAT_R16G16_SINT:
1139 case VK_FORMAT_R16_SINT:
1140 case VK_FORMAT_R16G16B16A16_SSCALED:
1141 case VK_FORMAT_R16G16B16_SSCALED:
1142 case VK_FORMAT_R16G16_SSCALED:
1143 case VK_FORMAT_R16_SSCALED:
1144 c = Min(Max(c, Int4(-0x8000)), Int4(0x7FFF));
1145 break;
1146 default:
1147 break;
Nicolas Capens157ba262019-12-10 17:49:14 -05001148 }
1149
1150 switch(state.destFormat)
1151 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001152 case VK_FORMAT_B8G8R8A8_SINT:
1153 case VK_FORMAT_B8G8R8A8_SSCALED:
1154 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001155 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001156 case VK_FORMAT_B8G8R8_SINT:
1157 case VK_FORMAT_B8G8R8_SSCALED:
1158 if(writeB) { *Pointer<SByte>(element) = SByte(Extract(c, 2)); }
1159 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
1160 if(writeR) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 0)); }
1161 break;
1162 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1163 case VK_FORMAT_R8G8B8A8_SINT:
1164 case VK_FORMAT_R8G8B8A8_SSCALED:
1165 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
1166 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001167 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001168 case VK_FORMAT_R8G8B8_SINT:
1169 case VK_FORMAT_R8G8B8_SSCALED:
1170 if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001171 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001172 case VK_FORMAT_R8G8_SINT:
1173 case VK_FORMAT_R8G8_SSCALED:
1174 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001175 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001176 case VK_FORMAT_R8_SINT:
1177 case VK_FORMAT_R8_SSCALED:
1178 if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
1179 break;
1180 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1181 case VK_FORMAT_A2B10G10R10_SINT_PACK32:
1182 case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
1183 case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
1184 if(writeRGBA)
1185 {
Alexis Hetu3716c202019-12-19 17:09:08 -05001186 *Pointer<UInt>(element) = As<UInt>(PackFields(c, { 0, 10, 20, 30 }));
Ben Claytonfccfc562019-12-17 20:37:31 +00001187 }
1188 else
1189 {
1190 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1191 (writeB ? 0x3FF00000 : 0x0000) |
1192 (writeG ? 0x000FFC00 : 0x0000) |
1193 (writeR ? 0x000003FF : 0x0000);
1194 unsigned int unmask = ~mask;
1195 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -05001196 (As<UInt>(PackFields(c, { 0, 10, 20, 30 })) & UInt(mask));
Ben Claytonfccfc562019-12-17 20:37:31 +00001197 }
1198 break;
1199 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1200 case VK_FORMAT_A2R10G10B10_SINT_PACK32:
1201 case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
1202 case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
1203 if(writeRGBA)
1204 {
Alexis Hetu3716c202019-12-19 17:09:08 -05001205 *Pointer<UInt>(element) = As<UInt>(PackFields(c, { 20, 10, 0, 30 }));
Ben Claytonfccfc562019-12-17 20:37:31 +00001206 }
1207 else
1208 {
1209 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1210 (writeR ? 0x3FF00000 : 0x0000) |
1211 (writeG ? 0x000FFC00 : 0x0000) |
1212 (writeB ? 0x000003FF : 0x0000);
1213 unsigned int unmask = ~mask;
1214 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -05001215 (As<UInt>(PackFields(c, { 20, 10, 0, 30 })) & UInt(mask));
Ben Claytonfccfc562019-12-17 20:37:31 +00001216 }
1217 break;
1218 case VK_FORMAT_B8G8R8A8_UINT:
1219 case VK_FORMAT_B8G8R8A8_USCALED:
1220 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001221 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001222 case VK_FORMAT_B8G8R8_UINT:
1223 case VK_FORMAT_B8G8R8_USCALED:
1224 case VK_FORMAT_B8G8R8_SRGB:
1225 if(writeB) { *Pointer<Byte>(element) = Byte(Extract(c, 2)); }
1226 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
1227 if(writeR) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 0)); }
1228 break;
1229 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1230 case VK_FORMAT_R8G8B8A8_UINT:
1231 case VK_FORMAT_R8G8B8A8_USCALED:
1232 case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
1233 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001234 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001235 case VK_FORMAT_R8G8B8_UINT:
1236 case VK_FORMAT_R8G8B8_USCALED:
1237 if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001238 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001239 case VK_FORMAT_R8G8_UINT:
1240 case VK_FORMAT_R8G8_USCALED:
1241 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001242 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001243 case VK_FORMAT_R8_UINT:
1244 case VK_FORMAT_R8_USCALED:
1245 case VK_FORMAT_S8_UINT:
1246 if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
1247 break;
1248 case VK_FORMAT_R16G16B16A16_SINT:
1249 case VK_FORMAT_R16G16B16A16_SSCALED:
1250 if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001251 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001252 case VK_FORMAT_R16G16B16_SINT:
1253 case VK_FORMAT_R16G16B16_SSCALED:
1254 if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001255 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001256 case VK_FORMAT_R16G16_SINT:
1257 case VK_FORMAT_R16G16_SSCALED:
1258 if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001259 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001260 case VK_FORMAT_R16_SINT:
1261 case VK_FORMAT_R16_SSCALED:
1262 if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
1263 break;
1264 case VK_FORMAT_R16G16B16A16_UINT:
1265 case VK_FORMAT_R16G16B16A16_USCALED:
1266 if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001267 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001268 case VK_FORMAT_R16G16B16_UINT:
1269 case VK_FORMAT_R16G16B16_USCALED:
1270 if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001271 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001272 case VK_FORMAT_R16G16_UINT:
1273 case VK_FORMAT_R16G16_USCALED:
1274 if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001275 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001276 case VK_FORMAT_R16_UINT:
1277 case VK_FORMAT_R16_USCALED:
1278 if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
1279 break;
1280 case VK_FORMAT_R32G32B32A32_SINT:
1281 if(writeRGBA)
1282 {
1283 *Pointer<Int4>(element) = c;
1284 }
1285 else
1286 {
1287 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1288 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1289 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
1290 if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
1291 }
1292 break;
1293 case VK_FORMAT_R32G32B32_SINT:
Nicolas Capens157ba262019-12-10 17:49:14 -05001294 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1295 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1296 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
Ben Claytonfccfc562019-12-17 20:37:31 +00001297 break;
1298 case VK_FORMAT_R32G32_SINT:
1299 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1300 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1301 break;
1302 case VK_FORMAT_R32_SINT:
1303 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1304 break;
1305 case VK_FORMAT_R32G32B32A32_UINT:
1306 if(writeRGBA)
1307 {
1308 *Pointer<UInt4>(element) = As<UInt4>(c);
1309 }
1310 else
1311 {
1312 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1313 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
1314 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
1315 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
1316 }
1317 break;
1318 case VK_FORMAT_R32G32B32_UINT:
Nicolas Capens157ba262019-12-10 17:49:14 -05001319 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001320 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001321 case VK_FORMAT_R32G32_UINT:
1322 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001323 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001324 case VK_FORMAT_R32_UINT:
1325 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1326 break;
1327 default:
1328 UNSUPPORTED("Blitter destination format %d", (int)state.destFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -05001329 }
1330}
1331
1332void Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled)
1333{
1334 float4 scale{}, unscale{};
1335
1336 if(state.clearOperation &&
Nicolas Capens9d9f30d2020-01-12 03:26:18 -05001337 state.sourceFormat.isUnnormalizedInteger() &&
1338 !state.destFormat.isUnnormalizedInteger())
Nicolas Capens157ba262019-12-10 17:49:14 -05001339 {
1340 // If we're clearing a buffer from an int or uint color into a normalized color,
1341 // then the whole range of the int or uint color must be scaled between 0 and 1.
1342 switch(state.sourceFormat)
1343 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001344 case VK_FORMAT_R32G32B32A32_SINT:
1345 unscale = float4(static_cast<float>(0x7FFFFFFF));
1346 break;
1347 case VK_FORMAT_R32G32B32A32_UINT:
1348 unscale = float4(static_cast<float>(0xFFFFFFFF));
1349 break;
1350 default:
1351 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -05001352 }
1353 }
1354 else
1355 {
1356 unscale = state.sourceFormat.getScale();
1357 }
1358
1359 scale = state.destFormat.getScale();
1360
1361 bool srcSRGB = state.sourceFormat.isSRGBformat();
1362 bool dstSRGB = state.destFormat.isSRGBformat();
1363
Ben Claytonfccfc562019-12-17 20:37:31 +00001364 if(state.allowSRGBConversion && ((srcSRGB && !preScaled) || dstSRGB)) // One of the formats is sRGB encoded.
Nicolas Capens157ba262019-12-10 17:49:14 -05001365 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001366 value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale
1367 Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale
Nicolas Capens157ba262019-12-10 17:49:14 -05001368 value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value);
Ben Claytonfccfc562019-12-17 20:37:31 +00001369 value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale
Nicolas Capens157ba262019-12-10 17:49:14 -05001370 }
1371 else if(unscale != scale)
1372 {
1373 value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
1374 }
1375
1376 if(state.sourceFormat.isFloatFormat() && !state.destFormat.isFloatFormat())
1377 {
1378 value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
1379
1380 value = Max(value, Float4(state.destFormat.isUnsignedComponent(0) ? 0.0f : -scale.x,
1381 state.destFormat.isUnsignedComponent(1) ? 0.0f : -scale.y,
1382 state.destFormat.isUnsignedComponent(2) ? 0.0f : -scale.z,
1383 state.destFormat.isUnsignedComponent(3) ? 0.0f : -scale.w));
1384 }
1385}
1386
1387Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes)
1388{
1389 return y * pitchB + x * bytes;
1390}
1391
Alexis Hetu18daa812020-03-11 17:06:53 -04001392Int Blitter::ComputeOffset(Int &x, Int &y, Int &z, Int &sliceB, Int &pitchB, int bytes)
1393{
1394 return z * sliceB + y * pitchB + x * bytes;
1395}
1396
Nicolas Capens2883de92020-01-27 14:58:14 -05001397Float4 Blitter::LinearToSRGB(const Float4 &c)
Nicolas Capens157ba262019-12-10 17:49:14 -05001398{
1399 Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f);
1400 Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f);
1401
1402 Float4 s = c;
1403 s.xyz = Max(lc, ec);
1404
1405 return s;
1406}
1407
Nicolas Capens2883de92020-01-27 14:58:14 -05001408Float4 Blitter::sRGBtoLinear(const Float4 &c)
Nicolas Capens157ba262019-12-10 17:49:14 -05001409{
1410 Float4 lc = c * Float4(1.0f / 12.92f);
1411 Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f));
1412
1413 Int4 linear = CmpLT(c, Float4(0.04045f));
1414
1415 Float4 s = c;
Ben Claytonfccfc562019-12-17 20:37:31 +00001416 s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec))); // TODO: IfThenElse()
Nicolas Capens157ba262019-12-10 17:49:14 -05001417
1418 return s;
1419}
1420
Alexis Hetu18daa812020-03-11 17:06:53 -04001421Float4 Blitter::sample(Pointer<Byte> &source, Float &x, Float &y, Float &z,
1422 Int &sWidth, Int &sHeight, Int &sDepth,
1423 Int &sSliceB, Int &sPitchB, const State &state)
1424{
1425 bool intSrc = state.sourceFormat.isUnnormalizedInteger();
1426 int srcBytes = state.sourceFormat.bytes();
1427
1428 Float4 color;
1429
1430 bool preScaled = false;
1431 if(!state.filter || intSrc)
1432 {
1433 Int X = Int(x);
1434 Int Y = Int(y);
1435 Int Z = Int(z);
1436
1437 if(state.clampToEdge)
1438 {
1439 X = Clamp(X, 0, sWidth - 1);
1440 Y = Clamp(Y, 0, sHeight - 1);
1441 Z = Clamp(Z, 0, sDepth - 1);
1442 }
1443
1444 Pointer<Byte> s = source + ComputeOffset(X, Y, Z, sSliceB, sPitchB, srcBytes);
1445
1446 color = readFloat4(s, state);
1447
1448 if(state.srcSamples > 1) // Resolve multisampled source
1449 {
1450 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
1451 {
1452 ApplyScaleAndClamp(color, state);
1453 preScaled = true;
1454 }
1455 Float4 accum = color;
1456 for(int sample = 1; sample < state.srcSamples; sample++)
1457 {
1458 s += sSliceB;
1459 color = readFloat4(s, state);
1460
1461 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
1462 {
1463 ApplyScaleAndClamp(color, state);
1464 preScaled = true;
1465 }
1466 accum += color;
1467 }
1468 color = accum * Float4(1.0f / static_cast<float>(state.srcSamples));
1469 }
1470 }
1471 else // Bilinear filtering
1472 {
1473 Float X = x;
1474 Float Y = y;
1475 Float Z = z;
1476
1477 if(state.clampToEdge)
1478 {
1479 X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f);
1480 Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f);
1481 Z = Min(Max(z, 0.5f), Float(sDepth) - 0.5f);
1482 }
1483
1484 Float x0 = X - 0.5f;
1485 Float y0 = Y - 0.5f;
1486 Float z0 = Z - 0.5f;
1487
1488 Int X0 = Max(Int(x0), 0);
1489 Int Y0 = Max(Int(y0), 0);
1490 Int Z0 = Max(Int(z0), 0);
1491
1492 Int X1 = X0 + 1;
1493 Int Y1 = Y0 + 1;
1494 X1 = IfThenElse(X1 >= sWidth, X0, X1);
1495 Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1);
1496
1497 if(state.filter3D)
1498 {
1499 Int Z1 = Z0 + 1;
1500 Z1 = IfThenElse(Z1 >= sHeight, Z0, Z1);
1501
1502 Pointer<Byte> s000 = source + ComputeOffset(X0, Y0, Z0, sSliceB, sPitchB, srcBytes);
1503 Pointer<Byte> s010 = source + ComputeOffset(X1, Y0, Z0, sSliceB, sPitchB, srcBytes);
1504 Pointer<Byte> s100 = source + ComputeOffset(X0, Y1, Z0, sSliceB, sPitchB, srcBytes);
1505 Pointer<Byte> s110 = source + ComputeOffset(X1, Y1, Z0, sSliceB, sPitchB, srcBytes);
1506 Pointer<Byte> s001 = source + ComputeOffset(X0, Y0, Z1, sSliceB, sPitchB, srcBytes);
1507 Pointer<Byte> s011 = source + ComputeOffset(X1, Y0, Z1, sSliceB, sPitchB, srcBytes);
1508 Pointer<Byte> s101 = source + ComputeOffset(X0, Y1, Z1, sSliceB, sPitchB, srcBytes);
1509 Pointer<Byte> s111 = source + ComputeOffset(X1, Y1, Z1, sSliceB, sPitchB, srcBytes);
1510
1511 Float4 c000 = readFloat4(s000, state);
1512 Float4 c010 = readFloat4(s010, state);
1513 Float4 c100 = readFloat4(s100, state);
1514 Float4 c110 = readFloat4(s110, state);
1515 Float4 c001 = readFloat4(s001, state);
1516 Float4 c011 = readFloat4(s011, state);
1517 Float4 c101 = readFloat4(s101, state);
1518 Float4 c111 = readFloat4(s111, state);
1519
1520 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
1521 {
1522 ApplyScaleAndClamp(c000, state);
1523 ApplyScaleAndClamp(c010, state);
1524 ApplyScaleAndClamp(c100, state);
1525 ApplyScaleAndClamp(c110, state);
1526 ApplyScaleAndClamp(c001, state);
1527 ApplyScaleAndClamp(c011, state);
1528 ApplyScaleAndClamp(c101, state);
1529 ApplyScaleAndClamp(c111, state);
1530 preScaled = true;
1531 }
1532
1533 Float4 fx = Float4(x0 - Float(X0));
1534 Float4 fy = Float4(y0 - Float(Y0));
1535 Float4 fz = Float4(z0 - Float(Z0));
1536 Float4 ix = Float4(1.0f) - fx;
1537 Float4 iy = Float4(1.0f) - fy;
1538 Float4 iz = Float4(1.0f) - fz;
1539
1540 color = ((c000 * ix + c010 * fx) * iy +
1541 (c100 * ix + c110 * fx) * fy) *
1542 iz +
1543 ((c001 * ix + c011 * fx) * iy +
1544 (c101 * ix + c111 * fx) * fy) *
1545 fz;
1546 }
1547 else
1548 {
1549 Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, Z0, sSliceB, sPitchB, srcBytes);
1550 Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, Z0, sSliceB, sPitchB, srcBytes);
1551 Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, Z0, sSliceB, sPitchB, srcBytes);
1552 Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, Z0, sSliceB, sPitchB, srcBytes);
1553
1554 Float4 c00 = readFloat4(s00, state);
1555 Float4 c01 = readFloat4(s01, state);
1556 Float4 c10 = readFloat4(s10, state);
1557 Float4 c11 = readFloat4(s11, state);
1558
1559 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
1560 {
1561 ApplyScaleAndClamp(c00, state);
1562 ApplyScaleAndClamp(c01, state);
1563 ApplyScaleAndClamp(c10, state);
1564 ApplyScaleAndClamp(c11, state);
1565 preScaled = true;
1566 }
1567
1568 Float4 fx = Float4(x0 - Float(X0));
1569 Float4 fy = Float4(y0 - Float(Y0));
1570 Float4 ix = Float4(1.0f) - fx;
1571 Float4 iy = Float4(1.0f) - fy;
1572
1573 color = (c00 * ix + c01 * fx) * iy +
1574 (c10 * ix + c11 * fx) * fy;
1575 }
1576 }
1577
1578 ApplyScaleAndClamp(color, state, preScaled);
1579
1580 return color;
1581}
1582
Nicolas Capens157ba262019-12-10 17:49:14 -05001583Blitter::BlitRoutineType Blitter::generate(const State &state)
1584{
1585 BlitFunction function;
1586 {
1587 Pointer<Byte> blit(function.Arg<0>());
1588
Ben Claytonfccfc562019-12-17 20:37:31 +00001589 Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData, source));
1590 Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData, dest));
1591 Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData, sPitchB));
1592 Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData, dPitchB));
Alexis Hetu18daa812020-03-11 17:06:53 -04001593 Int sSliceB = *Pointer<Int>(blit + OFFSET(BlitData, sSliceB));
1594 Int dSliceB = *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
Nicolas Capens157ba262019-12-10 17:49:14 -05001595
Ben Claytonfccfc562019-12-17 20:37:31 +00001596 Float x0 = *Pointer<Float>(blit + OFFSET(BlitData, x0));
1597 Float y0 = *Pointer<Float>(blit + OFFSET(BlitData, y0));
Alexis Hetu18daa812020-03-11 17:06:53 -04001598 Float z0 = *Pointer<Float>(blit + OFFSET(BlitData, z0));
Ben Claytonfccfc562019-12-17 20:37:31 +00001599 Float w = *Pointer<Float>(blit + OFFSET(BlitData, w));
1600 Float h = *Pointer<Float>(blit + OFFSET(BlitData, h));
Alexis Hetu18daa812020-03-11 17:06:53 -04001601 Float d = *Pointer<Float>(blit + OFFSET(BlitData, d));
Nicolas Capens157ba262019-12-10 17:49:14 -05001602
Ben Claytonfccfc562019-12-17 20:37:31 +00001603 Int x0d = *Pointer<Int>(blit + OFFSET(BlitData, x0d));
1604 Int x1d = *Pointer<Int>(blit + OFFSET(BlitData, x1d));
1605 Int y0d = *Pointer<Int>(blit + OFFSET(BlitData, y0d));
1606 Int y1d = *Pointer<Int>(blit + OFFSET(BlitData, y1d));
Alexis Hetu18daa812020-03-11 17:06:53 -04001607 Int z0d = *Pointer<Int>(blit + OFFSET(BlitData, z0d));
1608 Int z1d = *Pointer<Int>(blit + OFFSET(BlitData, z1d));
Nicolas Capens157ba262019-12-10 17:49:14 -05001609
Ben Claytonfccfc562019-12-17 20:37:31 +00001610 Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData, sWidth));
1611 Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData, sHeight));
Alexis Hetu18daa812020-03-11 17:06:53 -04001612 Int sDepth = *Pointer<Int>(blit + OFFSET(BlitData, sDepth));
Nicolas Capens157ba262019-12-10 17:49:14 -05001613
Nicolas Capens9d9f30d2020-01-12 03:26:18 -05001614 bool intSrc = state.sourceFormat.isUnnormalizedInteger();
1615 bool intDst = state.destFormat.isUnnormalizedInteger();
Nicolas Capens157ba262019-12-10 17:49:14 -05001616 bool intBoth = intSrc && intDst;
1617 int srcBytes = state.sourceFormat.bytes();
1618 int dstBytes = state.destFormat.bytes();
1619
1620 bool hasConstantColorI = false;
1621 Int4 constantColorI;
1622 bool hasConstantColorF = false;
1623 Float4 constantColorF;
1624 if(state.clearOperation)
1625 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001626 if(intBoth) // Integer types
Nicolas Capens157ba262019-12-10 17:49:14 -05001627 {
1628 constantColorI = readInt4(source, state);
1629 hasConstantColorI = true;
1630 }
1631 else
1632 {
1633 constantColorF = readFloat4(source, state);
1634 hasConstantColorF = true;
1635
1636 ApplyScaleAndClamp(constantColorF, state);
1637 }
1638 }
1639
Alexis Hetu18daa812020-03-11 17:06:53 -04001640 For(Int k = z0d, k < z1d, k++)
Nicolas Capens157ba262019-12-10 17:49:14 -05001641 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001642 Float z = state.clearOperation ? RValue<Float>(z0) : z0 + Float(k) * d;
1643 Pointer<Byte> destSlice = dest + k * dSliceB;
Nicolas Capens157ba262019-12-10 17:49:14 -05001644
Alexis Hetu18daa812020-03-11 17:06:53 -04001645 For(Int j = y0d, j < y1d, j++)
Nicolas Capens157ba262019-12-10 17:49:14 -05001646 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001647 Float y = state.clearOperation ? RValue<Float>(y0) : y0 + Float(j) * h;
1648 Pointer<Byte> destLine = destSlice + j * dPitchB;
Nicolas Capens157ba262019-12-10 17:49:14 -05001649
Alexis Hetu18daa812020-03-11 17:06:53 -04001650 For(Int i = x0d, i < x1d, i++)
Nicolas Capens157ba262019-12-10 17:49:14 -05001651 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001652 Float x = state.clearOperation ? RValue<Float>(x0) : x0 + Float(i) * w;
1653 Pointer<Byte> d = destLine + i * dstBytes;
1654
1655 if(hasConstantColorI)
Nicolas Capens157ba262019-12-10 17:49:14 -05001656 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001657 for(int s = 0; s < state.destSamples; s++)
1658 {
1659 write(constantColorI, d, state);
Nicolas Capens157ba262019-12-10 17:49:14 -05001660
Alexis Hetu18daa812020-03-11 17:06:53 -04001661 d += dSliceB;
1662 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001663 }
Alexis Hetu18daa812020-03-11 17:06:53 -04001664 else if(hasConstantColorF)
Nicolas Capens157ba262019-12-10 17:49:14 -05001665 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001666 for(int s = 0; s < state.destSamples; s++)
1667 {
1668 write(constantColorF, d, state);
Nicolas Capens157ba262019-12-10 17:49:14 -05001669
Alexis Hetu18daa812020-03-11 17:06:53 -04001670 d += dSliceB;
1671 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001672 }
Alexis Hetu18daa812020-03-11 17:06:53 -04001673 else if(intBoth) // Integer types do not support filtering
Nicolas Capens68a82382018-10-02 13:16:55 -04001674 {
Nicolas Capens68a82382018-10-02 13:16:55 -04001675 Int X = Int(x);
1676 Int Y = Int(y);
Alexis Hetu18daa812020-03-11 17:06:53 -04001677 Int Z = Int(z);
Nicolas Capens68a82382018-10-02 13:16:55 -04001678
1679 if(state.clampToEdge)
1680 {
1681 X = Clamp(X, 0, sWidth - 1);
1682 Y = Clamp(Y, 0, sHeight - 1);
Alexis Hetu18daa812020-03-11 17:06:53 -04001683 Z = Clamp(Z, 0, sDepth - 1);
Nicolas Capens68a82382018-10-02 13:16:55 -04001684 }
1685
Alexis Hetu18daa812020-03-11 17:06:53 -04001686 Pointer<Byte> s = source + ComputeOffset(X, Y, Z, sSliceB, sPitchB, srcBytes);
Nicolas Capens68a82382018-10-02 13:16:55 -04001687
Alexis Hetu18daa812020-03-11 17:06:53 -04001688 // When both formats are true integer types, we don't go to float to avoid losing precision
1689 Int4 color = readInt4(s, state);
1690 for(int s = 0; s < state.destSamples; s++)
Alexis Hetuf8df30f2019-10-23 18:03:21 -04001691 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001692 write(color, d, state);
Nicolas Capens157ba262019-12-10 17:49:14 -05001693
Alexis Hetu18daa812020-03-11 17:06:53 -04001694 d += dSliceB;
Nicolas Capens68a82382018-10-02 13:16:55 -04001695 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001696 }
Alexis Hetu18daa812020-03-11 17:06:53 -04001697 else
Nicolas Capens157ba262019-12-10 17:49:14 -05001698 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001699 Float4 color = sample(source, x, y, z, sWidth, sHeight, sDepth, sSliceB, sPitchB, state);
Nicolas Capens157ba262019-12-10 17:49:14 -05001700
Alexis Hetu18daa812020-03-11 17:06:53 -04001701 for(int s = 0; s < state.destSamples; s++)
Nicolas Capens68a82382018-10-02 13:16:55 -04001702 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001703 write(color, d, state);
1704
1705 d += dSliceB;
Nicolas Capens68a82382018-10-02 13:16:55 -04001706 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001707 }
1708 }
1709 }
1710 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001711 }
1712
Nicolas Capens157ba262019-12-10 17:49:14 -05001713 return function("BlitRoutine");
1714}
1715
1716Blitter::BlitRoutineType Blitter::getBlitRoutine(const State &state)
1717{
Ben Clayton377573c2020-04-03 20:36:40 +01001718 marl::lock lock(blitMutex);
Ben Claytonac43aa72020-04-04 00:48:13 +01001719 auto blitRoutine = blitCache.lookup(state);
Nicolas Capens157ba262019-12-10 17:49:14 -05001720
1721 if(!blitRoutine)
Alexis Hetu33642272019-03-01 11:55:59 -05001722 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001723 blitRoutine = generate(state);
1724 blitCache.add(state, blitRoutine);
Alexis Hetu33642272019-03-01 11:55:59 -05001725 }
1726
Nicolas Capens157ba262019-12-10 17:49:14 -05001727 return blitRoutine;
1728}
1729
1730Blitter::CornerUpdateRoutineType Blitter::getCornerUpdateRoutine(const State &state)
1731{
Ben Clayton377573c2020-04-03 20:36:40 +01001732 marl::lock lock(cornerUpdateMutex);
Ben Claytonac43aa72020-04-04 00:48:13 +01001733 auto cornerUpdateRoutine = cornerUpdateCache.lookup(state);
Nicolas Capens157ba262019-12-10 17:49:14 -05001734
1735 if(!cornerUpdateRoutine)
Alexis Hetuf60a2d52019-05-09 14:16:05 -04001736 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001737 cornerUpdateRoutine = generateCornerUpdate(state);
1738 cornerUpdateCache.add(state, cornerUpdateRoutine);
Alexis Hetuf60a2d52019-05-09 14:16:05 -04001739 }
1740
Nicolas Capens157ba262019-12-10 17:49:14 -05001741 return cornerUpdateRoutine;
1742}
1743
Nicolas Capens157ba262019-12-10 17:49:14 -05001744void Blitter::blit(const vk::Image *src, vk::Image *dst, VkImageBlit region, VkFilter filter)
1745{
Nicolas Capens64ed1212020-08-26 14:05:28 -04001746 ASSERT(src->getFormat() != VK_FORMAT_UNDEFINED);
1747 ASSERT(dst->getFormat() != VK_FORMAT_UNDEFINED);
Nicolas Capens157ba262019-12-10 17:49:14 -05001748
Nicolas Capensdd0e6002020-01-24 01:21:47 -05001749 // Vulkan 1.2 section 18.5. Image Copies with Scaling:
1750 // "The layerCount member of srcSubresource and dstSubresource must match"
1751 // "The aspectMask member of srcSubresource and dstSubresource must match"
1752 ASSERT(region.srcSubresource.layerCount == region.dstSubresource.layerCount);
1753 ASSERT(region.srcSubresource.aspectMask == region.dstSubresource.aspectMask);
Nicolas Capens157ba262019-12-10 17:49:14 -05001754
1755 if(region.dstOffsets[0].x > region.dstOffsets[1].x)
1756 {
1757 std::swap(region.srcOffsets[0].x, region.srcOffsets[1].x);
1758 std::swap(region.dstOffsets[0].x, region.dstOffsets[1].x);
1759 }
1760
1761 if(region.dstOffsets[0].y > region.dstOffsets[1].y)
1762 {
1763 std::swap(region.srcOffsets[0].y, region.srcOffsets[1].y);
1764 std::swap(region.dstOffsets[0].y, region.dstOffsets[1].y);
1765 }
1766
Ari Suonpaa07118692020-06-02 12:17:00 +03001767 if(region.dstOffsets[0].z > region.dstOffsets[1].z)
1768 {
1769 std::swap(region.srcOffsets[0].z, region.srcOffsets[1].z);
1770 std::swap(region.dstOffsets[0].z, region.dstOffsets[1].z);
1771 }
1772
Nicolas Capens157ba262019-12-10 17:49:14 -05001773 VkImageAspectFlagBits srcAspect = static_cast<VkImageAspectFlagBits>(region.srcSubresource.aspectMask);
1774 VkImageAspectFlagBits dstAspect = static_cast<VkImageAspectFlagBits>(region.dstSubresource.aspectMask);
1775 VkExtent3D srcExtent = src->getMipLevelExtent(srcAspect, region.srcSubresource.mipLevel);
1776
Nicolas Capens157ba262019-12-10 17:49:14 -05001777 float widthRatio = static_cast<float>(region.srcOffsets[1].x - region.srcOffsets[0].x) /
1778 static_cast<float>(region.dstOffsets[1].x - region.dstOffsets[0].x);
1779 float heightRatio = static_cast<float>(region.srcOffsets[1].y - region.srcOffsets[0].y) /
1780 static_cast<float>(region.dstOffsets[1].y - region.dstOffsets[0].y);
Alexis Hetu18daa812020-03-11 17:06:53 -04001781 float depthRatio = static_cast<float>(region.srcOffsets[1].z - region.srcOffsets[0].z) /
1782 static_cast<float>(region.dstOffsets[1].z - region.dstOffsets[0].z);
Nicolas Capens157ba262019-12-10 17:49:14 -05001783 float x0 = region.srcOffsets[0].x + (0.5f - region.dstOffsets[0].x) * widthRatio;
1784 float y0 = region.srcOffsets[0].y + (0.5f - region.dstOffsets[0].y) * heightRatio;
Alexis Hetu18daa812020-03-11 17:06:53 -04001785 float z0 = region.srcOffsets[0].z + (0.5f - region.dstOffsets[0].z) * depthRatio;
Nicolas Capens157ba262019-12-10 17:49:14 -05001786
1787 auto srcFormat = src->getFormat(srcAspect);
1788 auto dstFormat = dst->getFormat(dstAspect);
1789
1790 bool doFilter = (filter != VK_FILTER_NEAREST);
1791 bool allowSRGBConversion =
Ben Claytonfccfc562019-12-17 20:37:31 +00001792 doFilter ||
1793 (src->getSampleCountFlagBits() > 1) ||
1794 (srcFormat.isSRGBformat() != dstFormat.isSRGBformat());
Nicolas Capens157ba262019-12-10 17:49:14 -05001795
1796 State state(src->getFormat(srcAspect), dst->getFormat(dstAspect), src->getSampleCountFlagBits(), dst->getSampleCountFlagBits(),
1797 Options{ doFilter, allowSRGBConversion });
1798 state.clampToEdge = (region.srcOffsets[0].x < 0) ||
1799 (region.srcOffsets[0].y < 0) ||
1800 (static_cast<uint32_t>(region.srcOffsets[1].x) > srcExtent.width) ||
1801 (static_cast<uint32_t>(region.srcOffsets[1].y) > srcExtent.height) ||
1802 (doFilter && ((x0 < 0.5f) || (y0 < 0.5f)));
Alexis Hetu18daa812020-03-11 17:06:53 -04001803 state.filter3D = (region.srcOffsets[1].z - region.srcOffsets[0].z) !=
1804 (region.dstOffsets[1].z - region.dstOffsets[0].z);
Nicolas Capens157ba262019-12-10 17:49:14 -05001805
1806 auto blitRoutine = getBlitRoutine(state);
1807 if(!blitRoutine)
1808 {
1809 return;
1810 }
1811
Ben Claytonfccfc562019-12-17 20:37:31 +00001812 BlitData data = {
1813 nullptr, // source
1814 nullptr, // dest
1815 src->rowPitchBytes(srcAspect, region.srcSubresource.mipLevel), // sPitchB
1816 dst->rowPitchBytes(dstAspect, region.dstSubresource.mipLevel), // dPitchB
1817 src->slicePitchBytes(srcAspect, region.srcSubresource.mipLevel), // sSliceB
1818 dst->slicePitchBytes(dstAspect, region.dstSubresource.mipLevel), // dSliceB
Nicolas Capens157ba262019-12-10 17:49:14 -05001819
1820 x0,
1821 y0,
Alexis Hetu18daa812020-03-11 17:06:53 -04001822 z0,
Nicolas Capens157ba262019-12-10 17:49:14 -05001823 widthRatio,
1824 heightRatio,
Alexis Hetu18daa812020-03-11 17:06:53 -04001825 depthRatio,
Nicolas Capens157ba262019-12-10 17:49:14 -05001826
Ben Claytonfccfc562019-12-17 20:37:31 +00001827 region.dstOffsets[0].x, // x0d
1828 region.dstOffsets[1].x, // x1d
Alexis Hetu18daa812020-03-11 17:06:53 -04001829 region.dstOffsets[0].y, // y0d
1830 region.dstOffsets[1].y, // y1d
1831 region.dstOffsets[0].z, // z0d
1832 region.dstOffsets[1].z, // z1d
Nicolas Capens157ba262019-12-10 17:49:14 -05001833
Alexis Hetu18daa812020-03-11 17:06:53 -04001834 static_cast<int>(srcExtent.width), // sWidth
1835 static_cast<int>(srcExtent.height), // sHeight
1836 static_cast<int>(srcExtent.depth), // sDepth
Ben Clayton21fb75f2020-04-16 10:36:55 +01001837
1838 false, // filter3D
Nicolas Capens157ba262019-12-10 17:49:14 -05001839 };
1840
Alexis Hetu46159712020-06-15 16:13:51 -04001841 VkImageSubresource srcSubres = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001842 region.srcSubresource.aspectMask,
1843 region.srcSubresource.mipLevel,
Alexis Hetu46159712020-06-15 16:13:51 -04001844 region.srcSubresource.baseArrayLayer
Nicolas Capens157ba262019-12-10 17:49:14 -05001845 };
1846
Alexis Hetu46159712020-06-15 16:13:51 -04001847 VkImageSubresource dstSubres = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001848 region.dstSubresource.aspectMask,
1849 region.dstSubresource.mipLevel,
Alexis Hetu46159712020-06-15 16:13:51 -04001850 region.dstSubresource.baseArrayLayer
Nicolas Capens157ba262019-12-10 17:49:14 -05001851 };
1852
Alexis Hetu46159712020-06-15 16:13:51 -04001853 VkImageSubresourceRange dstSubresRange = {
1854 region.dstSubresource.aspectMask,
1855 region.dstSubresource.mipLevel,
Nicolas Capens64ed1212020-08-26 14:05:28 -04001856 1, // levelCount
Alexis Hetu46159712020-06-15 16:13:51 -04001857 region.dstSubresource.baseArrayLayer,
1858 region.dstSubresource.layerCount
Nicolas Capens157ba262019-12-10 17:49:14 -05001859 };
1860
Alexis Hetu46159712020-06-15 16:13:51 -04001861 uint32_t lastLayer = src->getLastLayerIndex(dstSubresRange);
Nicolas Capens157ba262019-12-10 17:49:14 -05001862
Alexis Hetu46159712020-06-15 16:13:51 -04001863 for(; dstSubres.arrayLayer <= lastLayer; srcSubres.arrayLayer++, dstSubres.arrayLayer++)
Nicolas Capens157ba262019-12-10 17:49:14 -05001864 {
Alexis Hetu46159712020-06-15 16:13:51 -04001865 data.source = src->getTexelPointer({ 0, 0, 0 }, srcSubres);
1866 data.dest = dst->getTexelPointer({ 0, 0, 0 }, dstSubres);
Nicolas Capens157ba262019-12-10 17:49:14 -05001867
Alexis Hetu18daa812020-03-11 17:06:53 -04001868 ASSERT(data.source < src->end());
1869 ASSERT(data.dest < dst->end());
Nicolas Capens157ba262019-12-10 17:49:14 -05001870
Alexis Hetu18daa812020-03-11 17:06:53 -04001871 blitRoutine(&data);
Nicolas Capens157ba262019-12-10 17:49:14 -05001872 }
Alexis Hetu4f438a52020-06-15 16:13:51 -04001873
1874 dst->contentsChanged(dstSubresRange);
Nicolas Capens157ba262019-12-10 17:49:14 -05001875}
1876
Nicolas Capens64ed1212020-08-26 14:05:28 -04001877void Blitter::resolve(const vk::Image *src, vk::Image *dst, VkImageResolve region)
1878{
Nicolas Capens4487e582020-08-26 15:43:22 -04001879 if(fastResolve(src, dst, region))
1880 {
1881 return;
1882 }
1883
1884 // Fall back to a generic blit which performs the resolve.
Nicolas Capens64ed1212020-08-26 14:05:28 -04001885 VkImageBlit blitRegion;
1886
1887 blitRegion.srcOffsets[0] = blitRegion.srcOffsets[1] = region.srcOffset;
1888 blitRegion.srcOffsets[1].x += region.extent.width;
1889 blitRegion.srcOffsets[1].y += region.extent.height;
1890 blitRegion.srcOffsets[1].z += region.extent.depth;
1891
1892 blitRegion.dstOffsets[0] = blitRegion.dstOffsets[1] = region.dstOffset;
1893 blitRegion.dstOffsets[1].x += region.extent.width;
1894 blitRegion.dstOffsets[1].y += region.extent.height;
1895 blitRegion.dstOffsets[1].z += region.extent.depth;
1896
1897 blitRegion.srcSubresource = region.srcSubresource;
1898 blitRegion.dstSubresource = region.dstSubresource;
1899
1900 blit(src, dst, blitRegion, VK_FILTER_NEAREST);
1901}
1902
Nicolas Capens4487e582020-08-26 15:43:22 -04001903static inline uint32_t averageByte4(uint32_t x, uint32_t y)
1904{
1905 return (x & y) + (((x ^ y) >> 1) & 0x7F7F7F7F) + ((x ^ y) & 0x01010101);
1906}
1907
1908bool Blitter::fastResolve(const vk::Image *src, vk::Image *dst, VkImageResolve region)
1909{
1910 // "The aspectMask member of srcSubresource and dstSubresource must only contain VK_IMAGE_ASPECT_COLOR_BIT"
1911 ASSERT(region.srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
1912 ASSERT(region.dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
1913 ASSERT(region.srcSubresource.layerCount == region.dstSubresource.layerCount);
1914
1915 if(region.dstOffset != VkOffset3D{ 0, 0, 0 })
1916 {
1917 return false;
1918 }
1919
1920 if(region.srcOffset != VkOffset3D{ 0, 0, 0 })
1921 {
1922 return false;
1923 }
1924
1925 if(region.srcSubresource.layerCount != 1)
1926 {
1927 return false;
1928 }
1929
1930 if(region.extent != src->getExtent() ||
1931 region.extent != dst->getExtent() ||
1932 region.extent.depth != 1)
1933 {
1934 return false;
1935 }
1936
1937 VkImageSubresource srcSubresource = {
1938 region.srcSubresource.aspectMask,
1939 region.srcSubresource.mipLevel,
1940 region.srcSubresource.baseArrayLayer
1941 };
1942
1943 VkImageSubresource dstSubresource = {
1944 region.dstSubresource.aspectMask,
1945 region.dstSubresource.mipLevel,
1946 region.dstSubresource.baseArrayLayer
1947 };
1948
1949 VkImageSubresourceRange dstSubresourceRange = {
1950 region.dstSubresource.aspectMask,
1951 region.dstSubresource.mipLevel,
1952 1, // levelCount
1953 region.dstSubresource.baseArrayLayer,
1954 region.dstSubresource.layerCount
1955 };
1956
1957 void *source = src->getTexelPointer({ 0, 0, 0 }, srcSubresource);
1958 uint8_t *dest = reinterpret_cast<uint8_t *>(dst->getTexelPointer({ 0, 0, 0 }, dstSubresource));
1959
1960 auto format = src->getFormat();
1961 auto samples = src->getSampleCountFlagBits();
1962 auto extent = src->getExtent();
1963
1964 int width = extent.width;
1965 int height = extent.height;
1966 int pitch = src->rowPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, region.srcSubresource.mipLevel);
1967 int slice = src->slicePitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, region.srcSubresource.mipLevel);
1968
1969 uint8_t *source0 = (uint8_t *)source;
1970 uint8_t *source1 = source0 + slice;
1971 uint8_t *source2 = source1 + slice;
1972 uint8_t *source3 = source2 + slice;
1973
1974 if(format == VK_FORMAT_R8G8B8A8_UNORM || format == VK_FORMAT_B8G8R8A8_UNORM || format == VK_FORMAT_A8B8G8R8_UNORM_PACK32)
1975 {
1976 if(samples == 4)
1977 {
1978 for(int y = 0; y < height; y++)
1979 {
1980 for(int x = 0; x < width; x++)
1981 {
1982 uint32_t c0 = *(uint32_t *)(source0 + 4 * x);
1983 uint32_t c1 = *(uint32_t *)(source1 + 4 * x);
1984 uint32_t c2 = *(uint32_t *)(source2 + 4 * x);
1985 uint32_t c3 = *(uint32_t *)(source3 + 4 * x);
1986
1987 uint32_t c01 = averageByte4(c0, c1);
1988 uint32_t c23 = averageByte4(c2, c3);
1989 uint32_t c03 = averageByte4(c01, c23);
1990
1991 *(uint32_t *)(dest + 4 * x) = c03;
1992 }
1993
1994 source0 += pitch;
1995 source1 += pitch;
1996 source2 += pitch;
1997 source3 += pitch;
1998 dest += pitch;
1999 }
2000 }
2001 else
2002 UNSUPPORTED("Samples: %d", samples);
2003 }
2004 else
2005 {
2006 return false;
2007 }
2008
2009 dst->contentsChanged(dstSubresourceRange);
2010
2011 return true;
2012}
2013
Nicolas Capens64ed1212020-08-26 14:05:28 -04002014void Blitter::copy(const vk::Image *src, uint8_t *dst, unsigned int dstPitch)
2015{
2016 VkExtent3D extent = src->getExtent();
2017 size_t rowBytes = src->getFormat(VK_IMAGE_ASPECT_COLOR_BIT).bytes() * extent.width;
2018 unsigned int srcPitch = src->rowPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0);
2019 ASSERT(dstPitch >= rowBytes && srcPitch >= rowBytes && src->getMipLevelExtent(VK_IMAGE_ASPECT_COLOR_BIT, 0).height >= extent.height);
2020
2021 const uint8_t *s = (uint8_t *)src->getTexelPointer({ 0, 0, 0 }, { VK_IMAGE_ASPECT_COLOR_BIT, 0, 0 });
2022 uint8_t *d = dst;
2023
2024 for(uint32_t y = 0; y < extent.height; y++)
2025 {
2026 memcpy(d, s, rowBytes);
2027
2028 s += srcPitch;
2029 d += dstPitch;
2030 }
2031}
2032
Ben Claytonfccfc562019-12-17 20:37:31 +00002033void Blitter::computeCubeCorner(Pointer<Byte> &layer, Int &x0, Int &x1, Int &y0, Int &y1, Int &pitchB, const State &state)
Nicolas Capens157ba262019-12-10 17:49:14 -05002034{
2035 int bytes = state.sourceFormat.bytes();
2036
2037 Float4 c = readFloat4(layer + ComputeOffset(x0, y1, pitchB, bytes), state) +
2038 readFloat4(layer + ComputeOffset(x1, y0, pitchB, bytes), state) +
2039 readFloat4(layer + ComputeOffset(x1, y1, pitchB, bytes), state);
2040
2041 c *= Float4(1.0f / 3.0f);
2042
2043 write(c, layer + ComputeOffset(x0, y0, pitchB, bytes), state);
2044}
2045
Ben Claytonfccfc562019-12-17 20:37:31 +00002046Blitter::CornerUpdateRoutineType Blitter::generateCornerUpdate(const State &state)
Nicolas Capens157ba262019-12-10 17:49:14 -05002047{
2048 // Reading and writing from/to the same image
2049 ASSERT(state.sourceFormat == state.destFormat);
2050 ASSERT(state.srcSamples == state.destSamples);
2051
Nicolas Capensdd0e6002020-01-24 01:21:47 -05002052 // Vulkan 1.2: "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
2053 // VK_IMAGE_TYPE_2D, flags must not contain VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT"
2054 ASSERT(state.srcSamples == 1);
Nicolas Capens157ba262019-12-10 17:49:14 -05002055
2056 CornerUpdateFunction function;
2057 {
2058 Pointer<Byte> blit(function.Arg<0>());
2059
2060 Pointer<Byte> layers = *Pointer<Pointer<Byte>>(blit + OFFSET(CubeBorderData, layers));
2061 Int pitchB = *Pointer<Int>(blit + OFFSET(CubeBorderData, pitchB));
2062 UInt layerSize = *Pointer<Int>(blit + OFFSET(CubeBorderData, layerSize));
2063 UInt dim = *Pointer<Int>(blit + OFFSET(CubeBorderData, dim));
2064
2065 // Low Border, Low Pixel, High Border, High Pixel
Ben Claytonfccfc562019-12-17 20:37:31 +00002066 Int LB(-1), LP(0), HB(dim), HP(dim - 1);
Nicolas Capens157ba262019-12-10 17:49:14 -05002067
2068 for(int face = 0; face < 6; face++)
2069 {
2070 computeCubeCorner(layers, LB, LP, LB, LP, pitchB, state);
2071 computeCubeCorner(layers, LB, LP, HB, HP, pitchB, state);
2072 computeCubeCorner(layers, HB, HP, LB, LP, pitchB, state);
2073 computeCubeCorner(layers, HB, HP, HB, HP, pitchB, state);
2074 layers = layers + layerSize;
2075 }
2076 }
2077
2078 return function("BlitRoutine");
2079}
2080
Alexis Hetu46159712020-06-15 16:13:51 -04002081void Blitter::updateBorders(vk::Image *image, const VkImageSubresource &subresource)
Nicolas Capens157ba262019-12-10 17:49:14 -05002082{
Alexis Hetu46159712020-06-15 16:13:51 -04002083 ASSERT(image->getArrayLayers() >= (subresource.arrayLayer + 6));
Nicolas Capens157ba262019-12-10 17:49:14 -05002084
2085 // From Vulkan 1.1 spec, section 11.5. Image Views:
2086 // "For cube and cube array image views, the layers of the image view starting
2087 // at baseArrayLayer correspond to faces in the order +X, -X, +Y, -Y, +Z, -Z."
Alexis Hetu46159712020-06-15 16:13:51 -04002088 VkImageSubresource posX = subresource;
2089 VkImageSubresource negX = posX;
2090 negX.arrayLayer++;
2091 VkImageSubresource posY = negX;
2092 posY.arrayLayer++;
2093 VkImageSubresource negY = posY;
2094 negY.arrayLayer++;
2095 VkImageSubresource posZ = negY;
2096 posZ.arrayLayer++;
2097 VkImageSubresource negZ = posZ;
2098 negZ.arrayLayer++;
Nicolas Capens157ba262019-12-10 17:49:14 -05002099
2100 // Copy top / bottom
2101 copyCubeEdge(image, posX, BOTTOM, negY, RIGHT);
2102 copyCubeEdge(image, posY, BOTTOM, posZ, TOP);
2103 copyCubeEdge(image, posZ, BOTTOM, negY, TOP);
2104 copyCubeEdge(image, negX, BOTTOM, negY, LEFT);
2105 copyCubeEdge(image, negY, BOTTOM, negZ, BOTTOM);
2106 copyCubeEdge(image, negZ, BOTTOM, negY, BOTTOM);
2107
2108 copyCubeEdge(image, posX, TOP, posY, RIGHT);
2109 copyCubeEdge(image, posY, TOP, negZ, TOP);
2110 copyCubeEdge(image, posZ, TOP, posY, BOTTOM);
2111 copyCubeEdge(image, negX, TOP, posY, LEFT);
2112 copyCubeEdge(image, negY, TOP, posZ, BOTTOM);
2113 copyCubeEdge(image, negZ, TOP, posY, TOP);
2114
2115 // Copy left / right
2116 copyCubeEdge(image, posX, RIGHT, negZ, LEFT);
2117 copyCubeEdge(image, posY, RIGHT, posX, TOP);
2118 copyCubeEdge(image, posZ, RIGHT, posX, LEFT);
2119 copyCubeEdge(image, negX, RIGHT, posZ, LEFT);
2120 copyCubeEdge(image, negY, RIGHT, posX, BOTTOM);
2121 copyCubeEdge(image, negZ, RIGHT, negX, LEFT);
2122
2123 copyCubeEdge(image, posX, LEFT, posZ, RIGHT);
2124 copyCubeEdge(image, posY, LEFT, negX, TOP);
2125 copyCubeEdge(image, posZ, LEFT, negX, RIGHT);
2126 copyCubeEdge(image, negX, LEFT, negZ, RIGHT);
2127 copyCubeEdge(image, negY, LEFT, negX, BOTTOM);
2128 copyCubeEdge(image, negZ, LEFT, posX, RIGHT);
2129
2130 // Compute corner colors
Alexis Hetu46159712020-06-15 16:13:51 -04002131 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
Nicolas Capens157ba262019-12-10 17:49:14 -05002132 vk::Format format = image->getFormat(aspect);
2133 VkSampleCountFlagBits samples = image->getSampleCountFlagBits();
2134 State state(format, format, samples, samples, Options{ 0xF });
2135
Nicolas Capensdd0e6002020-01-24 01:21:47 -05002136 // Vulkan 1.2: "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
2137 // VK_IMAGE_TYPE_2D, flags must not contain VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT"
2138 ASSERT(samples == VK_SAMPLE_COUNT_1_BIT);
Nicolas Capens157ba262019-12-10 17:49:14 -05002139
2140 auto cornerUpdateRoutine = getCornerUpdateRoutine(state);
2141 if(!cornerUpdateRoutine)
2142 {
2143 return;
2144 }
2145
Alexis Hetu46159712020-06-15 16:13:51 -04002146 VkExtent3D extent = image->getMipLevelExtent(aspect, subresource.mipLevel);
Ben Claytonfccfc562019-12-17 20:37:31 +00002147 CubeBorderData data = {
Nicolas Capens157ba262019-12-10 17:49:14 -05002148 image->getTexelPointer({ 0, 0, 0 }, posX),
Alexis Hetu46159712020-06-15 16:13:51 -04002149 image->rowPitchBytes(aspect, subresource.mipLevel),
Nicolas Capens157ba262019-12-10 17:49:14 -05002150 static_cast<uint32_t>(image->getLayerSize(aspect)),
2151 extent.width
2152 };
2153 cornerUpdateRoutine(&data);
2154}
2155
Ben Claytonfccfc562019-12-17 20:37:31 +00002156void Blitter::copyCubeEdge(vk::Image *image,
Alexis Hetu46159712020-06-15 16:13:51 -04002157 const VkImageSubresource &dstSubresource, Edge dstEdge,
2158 const VkImageSubresource &srcSubresource, Edge srcEdge)
Nicolas Capens157ba262019-12-10 17:49:14 -05002159{
Alexis Hetu46159712020-06-15 16:13:51 -04002160 ASSERT(srcSubresource.aspectMask == dstSubresource.aspectMask);
2161 ASSERT(srcSubresource.mipLevel == dstSubresource.mipLevel);
2162 ASSERT(srcSubresource.arrayLayer != dstSubresource.arrayLayer);
Nicolas Capens157ba262019-12-10 17:49:14 -05002163
2164 // Figure out if the edges to be copied in reverse order respectively from one another
2165 // The copy should be reversed whenever the same edges are contiguous or if we're
2166 // copying top <-> right or bottom <-> left. This is explained by the layout, which is:
2167 //
2168 // | +y |
2169 // | -x | +z | +x | -z |
2170 // | -y |
2171
2172 bool reverse = (srcEdge == dstEdge) ||
2173 ((srcEdge == TOP) && (dstEdge == RIGHT)) ||
2174 ((srcEdge == RIGHT) && (dstEdge == TOP)) ||
2175 ((srcEdge == BOTTOM) && (dstEdge == LEFT)) ||
2176 ((srcEdge == LEFT) && (dstEdge == BOTTOM));
2177
Alexis Hetu46159712020-06-15 16:13:51 -04002178 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(srcSubresource.aspectMask);
Nicolas Capens157ba262019-12-10 17:49:14 -05002179 int bytes = image->getFormat(aspect).bytes();
Alexis Hetu46159712020-06-15 16:13:51 -04002180 int pitchB = image->rowPitchBytes(aspect, srcSubresource.mipLevel);
Nicolas Capens157ba262019-12-10 17:49:14 -05002181
Alexis Hetu46159712020-06-15 16:13:51 -04002182 VkExtent3D extent = image->getMipLevelExtent(aspect, srcSubresource.mipLevel);
Nicolas Capens157ba262019-12-10 17:49:14 -05002183 int w = extent.width;
2184 int h = extent.height;
2185 if(w != h)
2186 {
2187 UNSUPPORTED("Cube doesn't have square faces : (%d, %d)", w, h);
2188 }
2189
2190 // Src is expressed in the regular [0, width-1], [0, height-1] space
2191 bool srcHorizontal = ((srcEdge == TOP) || (srcEdge == BOTTOM));
2192 int srcDelta = srcHorizontal ? bytes : pitchB;
2193 VkOffset3D srcOffset = { (srcEdge == RIGHT) ? (w - 1) : 0, (srcEdge == BOTTOM) ? (h - 1) : 0, 0 };
2194
2195 // Dst contains borders, so it is expressed in the [-1, width], [-1, height] space
2196 bool dstHorizontal = ((dstEdge == TOP) || (dstEdge == BOTTOM));
2197 int dstDelta = (dstHorizontal ? bytes : pitchB) * (reverse ? -1 : 1);
2198 VkOffset3D dstOffset = { (dstEdge == RIGHT) ? w : -1, (dstEdge == BOTTOM) ? h : -1, 0 };
2199
2200 // Don't write in the corners
2201 if(dstHorizontal)
2202 {
2203 dstOffset.x += reverse ? w : 1;
2204 }
2205 else
2206 {
2207 dstOffset.y += reverse ? h : 1;
2208 }
2209
Alexis Hetu46159712020-06-15 16:13:51 -04002210 const uint8_t *src = static_cast<const uint8_t *>(image->getTexelPointer(srcOffset, srcSubresource));
2211 uint8_t *dst = static_cast<uint8_t *>(image->getTexelPointer(dstOffset, dstSubresource));
Nicolas Capens157ba262019-12-10 17:49:14 -05002212 ASSERT((src < image->end()) && ((src + (w * srcDelta)) < image->end()));
2213 ASSERT((dst < image->end()) && ((dst + (w * dstDelta)) < image->end()));
2214
2215 for(int i = 0; i < w; ++i, dst += dstDelta, src += srcDelta)
2216 {
2217 memcpy(dst, src, bytes);
2218 }
2219}
2220
Ben Claytonfccfc562019-12-17 20:37:31 +00002221} // namespace sw