blob: a7d125685631b2f2c184db7ca486361e0c8ff978 [file] [log] [blame]
Nicolas Capens68a82382018-10-02 13:16:55 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Blitter.hpp"
16
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050017#include "Pipeline/ShaderCore.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040018#include "Reactor/Reactor.hpp"
Ben Clayton25e06e02020-02-07 11:19:08 +000019#include "System/Debug.hpp"
Nicolas Capens02cbe8e2019-08-05 15:10:05 -040020#include "System/Half.hpp"
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050021#include "System/Memory.hpp"
Ben Claytonfccfc562019-12-17 20:37:31 +000022#include "Vulkan/VkBuffer.hpp"
Alexis Hetu33642272019-03-01 11:55:59 -050023#include "Vulkan/VkImage.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040024
Nicolas Capensb8c63932019-03-19 01:52:40 -040025#include <utility>
26
Alexis Hetu3716c202019-12-19 17:09:08 -050027namespace {
28rr::RValue<rr::Int> PackFields(rr::Int4 const &ints, const sw::int4 shifts)
29{
30 return (rr::Int(ints.x) << shifts[0]) |
31 (rr::Int(ints.y) << shifts[1]) |
32 (rr::Int(ints.z) << shifts[2]) |
33 (rr::Int(ints.w) << shifts[3]);
34}
35} // namespace
36
Nicolas Capens157ba262019-12-10 17:49:14 -050037namespace sw {
38
Ben Claytonfccfc562019-12-17 20:37:31 +000039Blitter::Blitter()
40 : blitMutex()
41 , blitCache(1024)
42 , cornerUpdateMutex()
43 , cornerUpdateCache(64) // We only need one of these per format
Nicolas Capens68a82382018-10-02 13:16:55 -040044{
Nicolas Capens157ba262019-12-10 17:49:14 -050045}
46
47Blitter::~Blitter()
48{
49}
50
Ben Claytonfccfc562019-12-17 20:37:31 +000051void Blitter::clear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea)
Nicolas Capens157ba262019-12-10 17:49:14 -050052{
53 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
54 vk::Format dstFormat = viewFormat.getAspectFormat(aspect);
55 if(dstFormat == VK_FORMAT_UNDEFINED)
Nicolas Capens68a82382018-10-02 13:16:55 -040056 {
Nicolas Capens157ba262019-12-10 17:49:14 -050057 return;
Nicolas Capens68a82382018-10-02 13:16:55 -040058 }
59
Nicolas Capens157ba262019-12-10 17:49:14 -050060 float *pPixel = static_cast<float *>(pixel);
Nicolas Capens81bc9d92019-12-16 15:05:57 -050061 if(viewFormat.isUnsignedNormalized())
Nicolas Capens68a82382018-10-02 13:16:55 -040062 {
Nicolas Capens157ba262019-12-10 17:49:14 -050063 pPixel[0] = sw::clamp(pPixel[0], 0.0f, 1.0f);
64 pPixel[1] = sw::clamp(pPixel[1], 0.0f, 1.0f);
65 pPixel[2] = sw::clamp(pPixel[2], 0.0f, 1.0f);
66 pPixel[3] = sw::clamp(pPixel[3], 0.0f, 1.0f);
67 }
Nicolas Capens81bc9d92019-12-16 15:05:57 -050068 else if(viewFormat.isSignedNormalized())
Nicolas Capens157ba262019-12-10 17:49:14 -050069 {
70 pPixel[0] = sw::clamp(pPixel[0], -1.0f, 1.0f);
71 pPixel[1] = sw::clamp(pPixel[1], -1.0f, 1.0f);
72 pPixel[2] = sw::clamp(pPixel[2], -1.0f, 1.0f);
73 pPixel[3] = sw::clamp(pPixel[3], -1.0f, 1.0f);
Nicolas Capens68a82382018-10-02 13:16:55 -040074 }
75
Nicolas Capens157ba262019-12-10 17:49:14 -050076 if(fastClear(pixel, format, dest, dstFormat, subresourceRange, renderArea))
Alexis Hetu33642272019-03-01 11:55:59 -050077 {
Nicolas Capens157ba262019-12-10 17:49:14 -050078 return;
79 }
80
81 State state(format, dstFormat, 1, dest->getSampleCountFlagBits(), Options{ 0xF });
82 auto blitRoutine = getBlitRoutine(state);
83 if(!blitRoutine)
84 {
85 return;
86 }
87
Ben Claytonfccfc562019-12-17 20:37:31 +000088 VkImageSubresourceLayers subresLayers = {
Nicolas Capens157ba262019-12-10 17:49:14 -050089 subresourceRange.aspectMask,
90 subresourceRange.baseMipLevel,
91 subresourceRange.baseArrayLayer,
92 1
93 };
94
95 uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange);
96 uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange);
97
98 VkRect2D area = { { 0, 0 }, { 0, 0 } };
99 if(renderArea)
100 {
101 ASSERT(subresourceRange.levelCount == 1);
102 area = *renderArea;
103 }
104
105 for(; subresLayers.mipLevel <= lastMipLevel; subresLayers.mipLevel++)
106 {
107 VkExtent3D extent = dest->getMipLevelExtent(aspect, subresLayers.mipLevel);
108 if(!renderArea)
Alexis Hetu33642272019-03-01 11:55:59 -0500109 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500110 area.extent.width = extent.width;
111 area.extent.height = extent.height;
Alexis Hetu33642272019-03-01 11:55:59 -0500112 }
113
Ben Claytonfccfc562019-12-17 20:37:31 +0000114 BlitData data = {
115 pixel, nullptr, // source, dest
Chris Forbes88289192019-08-28 16:49:36 -0700116
Ben Claytonfccfc562019-12-17 20:37:31 +0000117 format.bytes(), // sPitchB
118 dest->rowPitchBytes(aspect, subresLayers.mipLevel), // dPitchB
119 0, // sSliceB (unused in clear operations)
120 dest->slicePitchBytes(aspect, subresLayers.mipLevel), // dSliceB
Alexis Hetu33642272019-03-01 11:55:59 -0500121
Alexis Hetu18daa812020-03-11 17:06:53 -0400122 0.5f, 0.5f, 0.5f, 0.0f, 0.0f, 0.0f, // x0, y0, z0, w, h, d
Alexis Hetu33642272019-03-01 11:55:59 -0500123
Ben Claytonfccfc562019-12-17 20:37:31 +0000124 area.offset.x, static_cast<int>(area.offset.x + area.extent.width), // x0d, x1d
Alexis Hetu18daa812020-03-11 17:06:53 -0400125 area.offset.y, static_cast<int>(area.offset.y + area.extent.height), // y0d, y1d
126 0, 1, // z0d, z1d
Nicolas Capens157ba262019-12-10 17:49:14 -0500127
Alexis Hetu18daa812020-03-11 17:06:53 -0400128 0, 0, 0, // sWidth, sHeight, sDepth
Ben Clayton21fb75f2020-04-16 10:36:55 +0100129
130 false, // filter3D
Alexis Hetu33642272019-03-01 11:55:59 -0500131 };
132
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500133 if(renderArea && dest->is3DSlice())
Alexis Hetu33642272019-03-01 11:55:59 -0500134 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500135 // Reinterpret layers as depth slices
136 subresLayers.baseArrayLayer = 0;
137 subresLayers.layerCount = 1;
Nicolas Capens81bc9d92019-12-16 15:05:57 -0500138 for(uint32_t depth = subresourceRange.baseArrayLayer; depth <= lastLayer; depth++)
Alexis Hetu33642272019-03-01 11:55:59 -0500139 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000140 data.dest = dest->getTexelPointer({ 0, 0, static_cast<int32_t>(depth) }, subresLayers);
Nicolas Capens157ba262019-12-10 17:49:14 -0500141 blitRoutine(&data);
Nicolas Capens68a82382018-10-02 13:16:55 -0400142 }
143 }
Nicolas Capens88ac3672019-08-01 13:22:34 -0400144 else
Nicolas Capens68a82382018-10-02 13:16:55 -0400145 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500146 for(subresLayers.baseArrayLayer = subresourceRange.baseArrayLayer; subresLayers.baseArrayLayer <= lastLayer; subresLayers.baseArrayLayer++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400147 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500148 for(uint32_t depth = 0; depth < extent.depth; depth++)
Nicolas Capens68a82382018-10-02 13:16:55 -0400149 {
Nicolas Capens157ba262019-12-10 17:49:14 -0500150 data.dest = dest->getTexelPointer({ 0, 0, static_cast<int32_t>(depth) }, subresLayers);
151
152 blitRoutine(&data);
153 }
154 }
155 }
156 }
157}
158
Ben Claytonfccfc562019-12-17 20:37:31 +0000159bool Blitter::fastClear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format &viewFormat, const VkImageSubresourceRange &subresourceRange, const VkRect2D *renderArea)
Nicolas Capens157ba262019-12-10 17:49:14 -0500160{
161 if(format != VK_FORMAT_R32G32B32A32_SFLOAT)
162 {
163 return false;
164 }
165
Ben Claytonfccfc562019-12-17 20:37:31 +0000166 float *color = (float *)pixel;
Nicolas Capens157ba262019-12-10 17:49:14 -0500167 float r = color[0];
168 float g = color[1];
169 float b = color[2];
170 float a = color[3];
171
172 uint32_t packed;
173
174 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
175 switch(viewFormat)
176 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000177 case VK_FORMAT_R5G6B5_UNORM_PACK16:
178 packed = ((uint16_t)(31 * b + 0.5f) << 0) |
179 ((uint16_t)(63 * g + 0.5f) << 5) |
180 ((uint16_t)(31 * r + 0.5f) << 11);
181 break;
182 case VK_FORMAT_B5G6R5_UNORM_PACK16:
183 packed = ((uint16_t)(31 * r + 0.5f) << 0) |
184 ((uint16_t)(63 * g + 0.5f) << 5) |
185 ((uint16_t)(31 * b + 0.5f) << 11);
186 break;
187 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
188 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
189 case VK_FORMAT_R8G8B8A8_UNORM:
190 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
191 ((uint32_t)(255 * b + 0.5f) << 16) |
192 ((uint32_t)(255 * g + 0.5f) << 8) |
193 ((uint32_t)(255 * r + 0.5f) << 0);
194 break;
195 case VK_FORMAT_B8G8R8A8_UNORM:
196 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
197 ((uint32_t)(255 * r + 0.5f) << 16) |
198 ((uint32_t)(255 * g + 0.5f) << 8) |
199 ((uint32_t)(255 * b + 0.5f) << 0);
200 break;
201 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
202 packed = R11G11B10F(color);
203 break;
204 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
205 packed = RGB9E5(color);
206 break;
207 default:
208 return false;
Nicolas Capens157ba262019-12-10 17:49:14 -0500209 }
210
Ben Claytonfccfc562019-12-17 20:37:31 +0000211 VkImageSubresourceLayers subresLayers = {
Nicolas Capens157ba262019-12-10 17:49:14 -0500212 subresourceRange.aspectMask,
213 subresourceRange.baseMipLevel,
214 subresourceRange.baseArrayLayer,
215 1
216 };
217 uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange);
218 uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange);
219
220 VkRect2D area = { { 0, 0 }, { 0, 0 } };
221 if(renderArea)
222 {
223 ASSERT(subresourceRange.levelCount == 1);
224 area = *renderArea;
225 }
226
227 for(; subresLayers.mipLevel <= lastMipLevel; subresLayers.mipLevel++)
228 {
229 int rowPitchBytes = dest->rowPitchBytes(aspect, subresLayers.mipLevel);
230 int slicePitchBytes = dest->slicePitchBytes(aspect, subresLayers.mipLevel);
231 VkExtent3D extent = dest->getMipLevelExtent(aspect, subresLayers.mipLevel);
232 if(!renderArea)
233 {
234 area.extent.width = extent.width;
235 area.extent.height = extent.height;
236 }
237 if(dest->is3DSlice())
238 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000239 extent.depth = 1; // The 3D image is instead interpreted as a 2D image with layers
Nicolas Capens157ba262019-12-10 17:49:14 -0500240 }
241
242 for(subresLayers.baseArrayLayer = subresourceRange.baseArrayLayer; subresLayers.baseArrayLayer <= lastLayer; subresLayers.baseArrayLayer++)
243 {
244 for(uint32_t depth = 0; depth < extent.depth; depth++)
245 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000246 uint8_t *slice = (uint8_t *)dest->getTexelPointer(
247 { area.offset.x, area.offset.y, static_cast<int32_t>(depth) }, subresLayers);
Nicolas Capens157ba262019-12-10 17:49:14 -0500248
249 for(int j = 0; j < dest->getSampleCountFlagBits(); j++)
250 {
251 uint8_t *d = slice;
252
253 switch(viewFormat.bytes())
254 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000255 case 2:
256 for(uint32_t i = 0; i < area.extent.height; i++)
257 {
258 ASSERT(d < dest->end());
259 sw::clear((uint16_t *)d, static_cast<uint16_t>(packed), area.extent.width);
260 d += rowPitchBytes;
261 }
262 break;
263 case 4:
264 for(uint32_t i = 0; i < area.extent.height; i++)
265 {
266 ASSERT(d < dest->end());
267 sw::clear((uint32_t *)d, packed, area.extent.width);
268 d += rowPitchBytes;
269 }
270 break;
271 default:
272 assert(false);
Nicolas Capens157ba262019-12-10 17:49:14 -0500273 }
274
275 slice += slicePitchBytes;
276 }
277 }
278 }
279 }
280
281 return true;
282}
283
284Float4 Blitter::readFloat4(Pointer<Byte> element, const State &state)
285{
286 Float4 c(0.0f, 0.0f, 0.0f, 1.0f);
287
288 switch(state.sourceFormat)
289 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000290 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
291 c.w = Float(Int(*Pointer<Byte>(element)) & Int(0xF));
292 c.x = Float((Int(*Pointer<Byte>(element)) >> 4) & Int(0xF));
293 c.y = Float(Int(*Pointer<Byte>(element + 1)) & Int(0xF));
294 c.z = Float((Int(*Pointer<Byte>(element + 1)) >> 4) & Int(0xF));
295 break;
296 case VK_FORMAT_R8_SINT:
297 case VK_FORMAT_R8_SNORM:
298 c.x = Float(Int(*Pointer<SByte>(element)));
299 c.w = float(0x7F);
300 break;
301 case VK_FORMAT_R8_UNORM:
302 case VK_FORMAT_R8_UINT:
303 case VK_FORMAT_R8_SRGB:
304 c.x = Float(Int(*Pointer<Byte>(element)));
305 c.w = float(0xFF);
306 break;
307 case VK_FORMAT_R16_SINT:
308 case VK_FORMAT_R16_SNORM:
309 c.x = Float(Int(*Pointer<Short>(element)));
310 c.w = float(0x7FFF);
311 break;
312 case VK_FORMAT_R16_UNORM:
313 case VK_FORMAT_R16_UINT:
314 c.x = Float(Int(*Pointer<UShort>(element)));
315 c.w = float(0xFFFF);
316 break;
317 case VK_FORMAT_R32_SINT:
318 c.x = Float(*Pointer<Int>(element));
319 c.w = float(0x7FFFFFFF);
320 break;
321 case VK_FORMAT_R32_UINT:
322 c.x = Float(*Pointer<UInt>(element));
323 c.w = float(0xFFFFFFFF);
324 break;
325 case VK_FORMAT_B8G8R8A8_SRGB:
326 case VK_FORMAT_B8G8R8A8_UNORM:
327 c = Float4(*Pointer<Byte4>(element)).zyxw;
328 break;
329 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
330 case VK_FORMAT_R8G8B8A8_SINT:
331 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
332 case VK_FORMAT_R8G8B8A8_SNORM:
333 c = Float4(*Pointer<SByte4>(element));
334 break;
335 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
336 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
337 case VK_FORMAT_R8G8B8A8_UNORM:
338 case VK_FORMAT_R8G8B8A8_UINT:
339 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
340 case VK_FORMAT_R8G8B8A8_SRGB:
341 c = Float4(*Pointer<Byte4>(element));
342 break;
343 case VK_FORMAT_R16G16B16A16_SINT:
344 c = Float4(*Pointer<Short4>(element));
345 break;
346 case VK_FORMAT_R16G16B16A16_UNORM:
347 case VK_FORMAT_R16G16B16A16_UINT:
348 c = Float4(*Pointer<UShort4>(element));
349 break;
350 case VK_FORMAT_R32G32B32A32_SINT:
351 c = Float4(*Pointer<Int4>(element));
352 break;
353 case VK_FORMAT_R32G32B32A32_UINT:
354 c = Float4(*Pointer<UInt4>(element));
355 break;
356 case VK_FORMAT_R8G8_SINT:
357 case VK_FORMAT_R8G8_SNORM:
358 c.x = Float(Int(*Pointer<SByte>(element + 0)));
359 c.y = Float(Int(*Pointer<SByte>(element + 1)));
360 c.w = float(0x7F);
361 break;
362 case VK_FORMAT_R8G8_UNORM:
363 case VK_FORMAT_R8G8_UINT:
364 case VK_FORMAT_R8G8_SRGB:
365 c.x = Float(Int(*Pointer<Byte>(element + 0)));
366 c.y = Float(Int(*Pointer<Byte>(element + 1)));
367 c.w = float(0xFF);
368 break;
369 case VK_FORMAT_R16G16_SINT:
370 case VK_FORMAT_R16G16_SNORM:
371 c.x = Float(Int(*Pointer<Short>(element + 0)));
372 c.y = Float(Int(*Pointer<Short>(element + 2)));
373 c.w = float(0x7FFF);
374 break;
375 case VK_FORMAT_R16G16_UNORM:
376 case VK_FORMAT_R16G16_UINT:
377 c.x = Float(Int(*Pointer<UShort>(element + 0)));
378 c.y = Float(Int(*Pointer<UShort>(element + 2)));
379 c.w = float(0xFFFF);
380 break;
381 case VK_FORMAT_R32G32_SINT:
382 c.x = Float(*Pointer<Int>(element + 0));
383 c.y = Float(*Pointer<Int>(element + 4));
384 c.w = float(0x7FFFFFFF);
385 break;
386 case VK_FORMAT_R32G32_UINT:
387 c.x = Float(*Pointer<UInt>(element + 0));
388 c.y = Float(*Pointer<UInt>(element + 4));
389 c.w = float(0xFFFFFFFF);
390 break;
391 case VK_FORMAT_R32G32B32A32_SFLOAT:
392 c = *Pointer<Float4>(element);
393 break;
394 case VK_FORMAT_R32G32_SFLOAT:
395 c.x = *Pointer<Float>(element + 0);
396 c.y = *Pointer<Float>(element + 4);
397 break;
398 case VK_FORMAT_R32_SFLOAT:
399 c.x = *Pointer<Float>(element);
400 break;
401 case VK_FORMAT_R16G16B16A16_SFLOAT:
402 c.w = Float(*Pointer<Half>(element + 6));
403 case VK_FORMAT_R16G16B16_SFLOAT:
404 c.z = Float(*Pointer<Half>(element + 4));
405 case VK_FORMAT_R16G16_SFLOAT:
406 c.y = Float(*Pointer<Half>(element + 2));
407 case VK_FORMAT_R16_SFLOAT:
408 c.x = Float(*Pointer<Half>(element));
409 break;
410 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
411 c = r11g11b10Unpack(*Pointer<UInt>(element));
412 break;
413 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
414 // This type contains a common 5 bit exponent (E) and a 9 bit the mantissa for R, G and B.
415 c.x = Float(*Pointer<UInt>(element) & UInt(0x000001FF)); // R's mantissa (bits 0-8)
416 c.y = Float((*Pointer<UInt>(element) & UInt(0x0003FE00)) >> 9); // G's mantissa (bits 9-17)
417 c.z = Float((*Pointer<UInt>(element) & UInt(0x07FC0000)) >> 18); // B's mantissa (bits 18-26)
418 c *= Float4(
419 // 2^E, using the exponent (bits 27-31) and treating it as an unsigned integer value
420 Float(UInt(1) << ((*Pointer<UInt>(element) & UInt(0xF8000000)) >> 27)) *
421 // Since the 9 bit mantissa values currently stored in RGB were converted straight
422 // from int to float (in the [0, 1<<9] range instead of the [0, 1] range), they
423 // are (1 << 9) times too high.
424 // Also, the exponent has 5 bits and we compute the exponent bias of floating point
425 // formats using "2^(k-1) - 1", so, in this case, the exponent bias is 2^(5-1)-1 = 15
426 // Exponent bias (15) + number of mantissa bits per component (9) = 24
427 Float(1.0f / (1 << 24)));
428 c.w = 1.0f;
429 break;
430 case VK_FORMAT_R5G6B5_UNORM_PACK16:
431 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
432 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
433 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
434 break;
435 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
436 c.w = Float(Int((*Pointer<UShort>(element) & UShort(0x8000)) >> UShort(15)));
437 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0x7C00)) >> UShort(10)));
438 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x03E0)) >> UShort(5)));
439 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
440 break;
441 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
442 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
443 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
444 c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
445 c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
446 c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
447 break;
Alexis Hetub8a61bf2020-01-09 15:26:34 -0500448 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
449 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
450 c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
451 c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
452 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
453 c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
454 break;
Ben Claytonfccfc562019-12-17 20:37:31 +0000455 case VK_FORMAT_D16_UNORM:
456 c.x = Float(Int((*Pointer<UShort>(element))));
457 break;
458 case VK_FORMAT_X8_D24_UNORM_PACK32:
459 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0xFFFFFF00)) >> 8));
460 break;
461 case VK_FORMAT_D32_SFLOAT:
462 c.x = *Pointer<Float>(element);
463 break;
464 case VK_FORMAT_S8_UINT:
465 c.x = Float(Int(*Pointer<Byte>(element)));
466 break;
467 default:
468 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -0500469 }
470
471 return c;
472}
473
474void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
475{
476 bool writeR = state.writeRed;
477 bool writeG = state.writeGreen;
478 bool writeB = state.writeBlue;
479 bool writeA = state.writeAlpha;
480 bool writeRGBA = writeR && writeG && writeB && writeA;
481
482 switch(state.destFormat)
483 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000484 case VK_FORMAT_R4G4_UNORM_PACK8:
485 if(writeR | writeG)
Nicolas Capens157ba262019-12-10 17:49:14 -0500486 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000487 if(!writeR)
488 {
489 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
490 (*Pointer<Byte>(element) & Byte(0xF0));
491 }
492 else if(!writeG)
493 {
494 *Pointer<Byte>(element) = (*Pointer<Byte>(element) & Byte(0xF)) |
495 (Byte(RoundInt(Float(c.x))) << Byte(4));
496 }
497 else
498 {
499 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
500 (Byte(RoundInt(Float(c.x))) << Byte(4));
501 }
Nicolas Capens157ba262019-12-10 17:49:14 -0500502 }
Ben Claytonfccfc562019-12-17 20:37:31 +0000503 break;
504 case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
505 if(writeR || writeG || writeB || writeA)
Nicolas Capens157ba262019-12-10 17:49:14 -0500506 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000507 *Pointer<UShort>(element) = (writeR ? ((UShort(RoundInt(Float(c.x))) & UShort(0xF)) << UShort(12)) : (*Pointer<UShort>(element) & UShort(0x000F))) |
508 (writeG ? ((UShort(RoundInt(Float(c.y))) & UShort(0xF)) << UShort(8)) : (*Pointer<UShort>(element) & UShort(0x00F0))) |
509 (writeB ? ((UShort(RoundInt(Float(c.z))) & UShort(0xF)) << UShort(4)) : (*Pointer<UShort>(element) & UShort(0x0F00))) |
510 (writeA ? (UShort(RoundInt(Float(c.w))) & UShort(0xF)) : (*Pointer<UShort>(element) & UShort(0xF000)));
511 }
512 break;
513 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
514 if(writeRGBA)
515 {
516 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) & Int(0xF)) |
517 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
518 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
519 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12);
Nicolas Capens157ba262019-12-10 17:49:14 -0500520 }
521 else
522 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000523 unsigned short mask = (writeA ? 0x000F : 0x0000) |
524 (writeR ? 0x00F0 : 0x0000) |
525 (writeG ? 0x0F00 : 0x0000) |
526 (writeB ? 0xF000 : 0x0000);
527 unsigned short unmask = ~mask;
528 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
529 ((UShort(RoundInt(Float(c.w)) & Int(0xF)) |
530 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
531 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
532 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12)) &
533 UShort(mask));
Nicolas Capens157ba262019-12-10 17:49:14 -0500534 }
Ben Claytonfccfc562019-12-17 20:37:31 +0000535 break;
536 case VK_FORMAT_B8G8R8A8_SRGB:
537 case VK_FORMAT_B8G8R8A8_UNORM:
538 if(writeRGBA)
539 {
540 Short4 c0 = RoundShort4(c.zyxw);
541 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
542 }
543 else
544 {
545 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
546 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
547 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
548 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
549 }
550 break;
551 case VK_FORMAT_B8G8R8_SNORM:
552 if(writeB) { *Pointer<SByte>(element + 0) = SByte(RoundInt(Float(c.z))); }
553 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
554 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
555 break;
556 case VK_FORMAT_B8G8R8_UNORM:
557 case VK_FORMAT_B8G8R8_SRGB:
Nicolas Capens157ba262019-12-10 17:49:14 -0500558 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
559 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
560 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000561 break;
562 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
563 case VK_FORMAT_R8G8B8A8_UNORM:
564 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
565 case VK_FORMAT_R8G8B8A8_SRGB:
566 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
567 case VK_FORMAT_R8G8B8A8_UINT:
568 case VK_FORMAT_R8G8B8A8_USCALED:
569 case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
570 if(writeRGBA)
571 {
572 Short4 c0 = RoundShort4(c);
573 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
574 }
575 else
576 {
577 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
578 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
579 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
580 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
581 }
582 break;
583 case VK_FORMAT_R32G32B32A32_SFLOAT:
584 if(writeRGBA)
585 {
586 *Pointer<Float4>(element) = c;
587 }
588 else
589 {
590 if(writeR) { *Pointer<Float>(element) = c.x; }
591 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
592 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
593 if(writeA) { *Pointer<Float>(element + 12) = c.w; }
594 }
595 break;
596 case VK_FORMAT_R32G32B32_SFLOAT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500597 if(writeR) { *Pointer<Float>(element) = c.x; }
598 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
599 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
Ben Claytonfccfc562019-12-17 20:37:31 +0000600 break;
601 case VK_FORMAT_R32G32_SFLOAT:
602 if(writeR && writeG)
603 {
604 *Pointer<Float2>(element) = Float2(c);
605 }
606 else
607 {
608 if(writeR) { *Pointer<Float>(element) = c.x; }
609 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
610 }
611 break;
612 case VK_FORMAT_R32_SFLOAT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500613 if(writeR) { *Pointer<Float>(element) = c.x; }
Ben Claytonfccfc562019-12-17 20:37:31 +0000614 break;
615 case VK_FORMAT_R16G16B16A16_SFLOAT:
616 if(writeA) { *Pointer<Half>(element + 6) = Half(c.w); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500617 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000618 case VK_FORMAT_R16G16B16_SFLOAT:
619 if(writeB) { *Pointer<Half>(element + 4) = Half(c.z); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500620 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000621 case VK_FORMAT_R16G16_SFLOAT:
622 if(writeG) { *Pointer<Half>(element + 2) = Half(c.y); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500623 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000624 case VK_FORMAT_R16_SFLOAT:
625 if(writeR) { *Pointer<Half>(element) = Half(c.x); }
626 break;
627 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
Nicolas Capens157ba262019-12-10 17:49:14 -0500628 {
Alexis Hetu24c49dd2019-12-13 16:32:43 -0500629 UInt rgb = r11g11b10Pack(c);
Nicolas Capens157ba262019-12-10 17:49:14 -0500630
631 UInt old = *Pointer<UInt>(element);
632
633 unsigned int mask = (writeR ? 0x000007FF : 0) |
634 (writeG ? 0x003FF800 : 0) |
635 (writeB ? 0xFFC00000 : 0);
636
637 *Pointer<UInt>(element) = (rgb & mask) | (old & ~mask);
638 }
639 break;
Ben Claytonfccfc562019-12-17 20:37:31 +0000640 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
Nicolas Capens157ba262019-12-10 17:49:14 -0500641 {
642 ASSERT(writeRGBA); // Can't sensibly write just part of this format.
643
644 // Vulkan 1.1.117 section 15.2.1 RGB to Shared Exponent Conversion
645
646 constexpr int N = 9; // number of mantissa bits per component
647 constexpr int B = 15; // exponent bias
648 constexpr int E_max = 31; // maximum possible biased exponent value
649
650 // Maximum representable value.
651 constexpr float sharedexp_max = ((static_cast<float>(1 << N) - 1) / static_cast<float>(1 << N)) * static_cast<float>(1 << (E_max - B));
652
653 // Clamp components to valid range. NaN becomes 0.
Ben Claytonfccfc562019-12-17 20:37:31 +0000654 Float red_c = Min(IfThenElse(!(c.x > 0), Float(0), Float(c.x)), sharedexp_max);
Nicolas Capens157ba262019-12-10 17:49:14 -0500655 Float green_c = Min(IfThenElse(!(c.y > 0), Float(0), Float(c.y)), sharedexp_max);
Ben Claytonfccfc562019-12-17 20:37:31 +0000656 Float blue_c = Min(IfThenElse(!(c.z > 0), Float(0), Float(c.z)), sharedexp_max);
Nicolas Capens157ba262019-12-10 17:49:14 -0500657
658 // We're reducing the mantissa to 9 bits, so we must round up if the next
659 // bit is 1. In other words add 0.5 to the new mantissa's position and
660 // allow overflow into the exponent so we can scale correctly.
661 constexpr int half = 1 << (23 - N);
662 Float red_r = As<Float>(As<Int>(red_c) + half);
663 Float green_r = As<Float>(As<Int>(green_c) + half);
664 Float blue_r = As<Float>(As<Int>(blue_c) + half);
665
666 // The largest component determines the shared exponent. It can't be lower
667 // than 0 (after bias subtraction) so also limit to the mimimum representable.
668 constexpr float min_s = 0.5f / (1 << B);
669 Float max_s = Max(Max(red_r, green_r), Max(blue_r, min_s));
670
671 // Obtain the reciprocal of the shared exponent by inverting the bits,
672 // and scale by the new mantissa's size. Note that the IEEE-754 single-precision
673 // format has an implicit leading 1, but this shared component format does not.
674 Float scale = As<Float>((As<Int>(max_s) & 0x7F800000) ^ 0x7F800000) * (1 << (N - 2));
675
676 UInt R9 = RoundInt(red_c * scale);
677 UInt G9 = UInt(RoundInt(green_c * scale));
678 UInt B9 = UInt(RoundInt(blue_c * scale));
679 UInt E5 = (As<UInt>(max_s) >> 23) - 127 + 15 + 1;
680
681 UInt E5B9G9R9 = (E5 << 27) | (B9 << 18) | (G9 << 9) | R9;
682
683 *Pointer<UInt>(element) = E5B9G9R9;
684 }
685 break;
Ben Claytonfccfc562019-12-17 20:37:31 +0000686 case VK_FORMAT_B8G8R8A8_SNORM:
687 if(writeB) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.z))); }
688 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
689 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
690 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
691 break;
692 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
693 case VK_FORMAT_R8G8B8A8_SINT:
694 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
695 case VK_FORMAT_R8G8B8A8_SNORM:
696 case VK_FORMAT_R8G8B8A8_SSCALED:
697 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
698 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500699 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000700 case VK_FORMAT_R8G8B8_SINT:
701 case VK_FORMAT_R8G8B8_SNORM:
702 case VK_FORMAT_R8G8B8_SSCALED:
703 if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500704 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000705 case VK_FORMAT_R8G8_SINT:
706 case VK_FORMAT_R8G8_SNORM:
707 case VK_FORMAT_R8G8_SSCALED:
708 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500709 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000710 case VK_FORMAT_R8_SINT:
711 case VK_FORMAT_R8_SNORM:
712 case VK_FORMAT_R8_SSCALED:
713 if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
714 break;
715 case VK_FORMAT_R8G8B8_UINT:
716 case VK_FORMAT_R8G8B8_UNORM:
717 case VK_FORMAT_R8G8B8_USCALED:
718 case VK_FORMAT_R8G8B8_SRGB:
719 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500720 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000721 case VK_FORMAT_R8G8_UINT:
722 case VK_FORMAT_R8G8_UNORM:
723 case VK_FORMAT_R8G8_USCALED:
724 case VK_FORMAT_R8G8_SRGB:
725 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500726 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000727 case VK_FORMAT_R8_UINT:
728 case VK_FORMAT_R8_UNORM:
729 case VK_FORMAT_R8_USCALED:
730 case VK_FORMAT_R8_SRGB:
731 if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
732 break;
733 case VK_FORMAT_R16G16B16A16_SINT:
734 case VK_FORMAT_R16G16B16A16_SNORM:
735 case VK_FORMAT_R16G16B16A16_SSCALED:
736 if(writeRGBA)
737 {
738 *Pointer<Short4>(element) = Short4(RoundInt(c));
739 }
740 else
741 {
742 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
743 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
744 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
745 if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
746 }
747 break;
748 case VK_FORMAT_R16G16B16_SINT:
749 case VK_FORMAT_R16G16B16_SNORM:
750 case VK_FORMAT_R16G16B16_SSCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500751 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
752 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
753 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000754 break;
755 case VK_FORMAT_R16G16_SINT:
756 case VK_FORMAT_R16G16_SNORM:
757 case VK_FORMAT_R16G16_SSCALED:
758 if(writeR && writeG)
759 {
760 *Pointer<Short2>(element) = Short2(Short4(RoundInt(c)));
761 }
762 else
763 {
764 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
765 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
766 }
767 break;
768 case VK_FORMAT_R16_SINT:
769 case VK_FORMAT_R16_SNORM:
770 case VK_FORMAT_R16_SSCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500771 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000772 break;
773 case VK_FORMAT_R16G16B16A16_UINT:
774 case VK_FORMAT_R16G16B16A16_UNORM:
775 case VK_FORMAT_R16G16B16A16_USCALED:
776 if(writeRGBA)
777 {
778 *Pointer<UShort4>(element) = UShort4(RoundInt(c));
779 }
780 else
781 {
782 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
783 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
784 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
785 if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
786 }
787 break;
788 case VK_FORMAT_R16G16B16_UINT:
789 case VK_FORMAT_R16G16B16_UNORM:
790 case VK_FORMAT_R16G16B16_USCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500791 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
792 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
793 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000794 break;
795 case VK_FORMAT_R16G16_UINT:
796 case VK_FORMAT_R16G16_UNORM:
797 case VK_FORMAT_R16G16_USCALED:
798 if(writeR && writeG)
799 {
800 *Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c)));
801 }
802 else
803 {
804 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
805 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
806 }
807 break;
808 case VK_FORMAT_R16_UINT:
809 case VK_FORMAT_R16_UNORM:
810 case VK_FORMAT_R16_USCALED:
Nicolas Capens157ba262019-12-10 17:49:14 -0500811 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
Ben Claytonfccfc562019-12-17 20:37:31 +0000812 break;
813 case VK_FORMAT_R32G32B32A32_SINT:
814 if(writeRGBA)
815 {
816 *Pointer<Int4>(element) = RoundInt(c);
817 }
818 else
819 {
820 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
821 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
822 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
823 if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
824 }
825 break;
826 case VK_FORMAT_R32G32B32_SINT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500827 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500828 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000829 case VK_FORMAT_R32G32_SINT:
830 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500831 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000832 case VK_FORMAT_R32_SINT:
833 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
834 break;
835 case VK_FORMAT_R32G32B32A32_UINT:
836 if(writeRGBA)
837 {
838 *Pointer<UInt4>(element) = UInt4(RoundInt(c));
839 }
840 else
841 {
842 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
843 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
844 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
845 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
846 }
847 break;
848 case VK_FORMAT_R32G32B32_UINT:
Nicolas Capens157ba262019-12-10 17:49:14 -0500849 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500850 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000851 case VK_FORMAT_R32G32_UINT:
852 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
Nicolas Capens0405ba02020-01-16 01:19:21 -0500853 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000854 case VK_FORMAT_R32_UINT:
855 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
856 break;
857 case VK_FORMAT_R5G6B5_UNORM_PACK16:
858 if(writeR && writeG && writeB)
859 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500860 *Pointer<UShort>(element) = UShort(PackFields(RoundInt(c.xyzz), { 11, 5, 0, 0 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000861 }
862 else
863 {
864 unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
865 unsigned short unmask = ~mask;
866 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500867 (UShort(PackFields(RoundInt(c.xyzz), { 11, 5, 0, 0 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000868 UShort(mask));
869 }
870 break;
871 case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
872 if(writeRGBA)
873 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500874 *Pointer<UShort>(element) = UShort(PackFields(RoundInt(c), { 11, 6, 1, 0 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000875 }
876 else
877 {
878 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
879 (writeR ? 0x7C00 : 0x0000) |
880 (writeG ? 0x03E0 : 0x0000) |
881 (writeB ? 0x001F : 0x0000);
882 unsigned short unmask = ~mask;
883 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500884 (UShort(PackFields(RoundInt(c), { 11, 6, 1, 0 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000885 UShort(mask));
886 }
887 break;
888 case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
889 if(writeRGBA)
890 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500891 *Pointer<UShort>(element) = UShort(PackFields(RoundInt(c), { 1, 6, 11, 0 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000892 }
893 else
894 {
895 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
896 (writeR ? 0x7C00 : 0x0000) |
897 (writeG ? 0x03E0 : 0x0000) |
898 (writeB ? 0x001F : 0x0000);
899 unsigned short unmask = ~mask;
900 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500901 (UShort(PackFields(RoundInt(c), { 1, 6, 11, 0 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000902 UShort(mask));
903 }
904 break;
905 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
906 if(writeRGBA)
907 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500908 *Pointer<UShort>(element) = UShort(PackFields(RoundInt(c), { 10, 5, 0, 15 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000909 }
910 else
911 {
912 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
913 (writeR ? 0x7C00 : 0x0000) |
914 (writeG ? 0x03E0 : 0x0000) |
915 (writeB ? 0x001F : 0x0000);
916 unsigned short unmask = ~mask;
917 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500918 (UShort(PackFields(RoundInt(c), { 10, 5, 0, 15 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000919 UShort(mask));
920 }
921 break;
922 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
923 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
924 case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
925 if(writeRGBA)
926 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500927 *Pointer<UInt>(element) = As<UInt>(PackFields(RoundInt(c), { 0, 10, 20, 30 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000928 }
929 else
930 {
931 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
932 (writeB ? 0x3FF00000 : 0x0000) |
933 (writeG ? 0x000FFC00 : 0x0000) |
934 (writeR ? 0x000003FF : 0x0000);
935 unsigned int unmask = ~mask;
936 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500937 (As<UInt>(PackFields(RoundInt(c), { 0, 10, 20, 30 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000938 UInt(mask));
939 }
940 break;
941 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
942 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
943 case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
944 if(writeRGBA)
945 {
Alexis Hetu3716c202019-12-19 17:09:08 -0500946 *Pointer<UInt>(element) = As<UInt>(PackFields(RoundInt(c), { 20, 10, 0, 30 }));
Ben Claytonfccfc562019-12-17 20:37:31 +0000947 }
948 else
949 {
950 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
951 (writeR ? 0x3FF00000 : 0x0000) |
952 (writeG ? 0x000FFC00 : 0x0000) |
953 (writeB ? 0x000003FF : 0x0000);
954 unsigned int unmask = ~mask;
955 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -0500956 (As<UInt>(PackFields(RoundInt(c), { 20, 10, 0, 30 })) &
Ben Claytonfccfc562019-12-17 20:37:31 +0000957 UInt(mask));
958 }
959 break;
960 case VK_FORMAT_D16_UNORM:
961 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x)));
962 break;
963 case VK_FORMAT_X8_D24_UNORM_PACK32:
964 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) << 8);
965 break;
966 case VK_FORMAT_D32_SFLOAT:
967 *Pointer<Float>(element) = c.x;
968 break;
969 case VK_FORMAT_S8_UINT:
970 *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
971 break;
972 default:
973 UNSUPPORTED("Blitter destination format %d", (int)state.destFormat);
974 break;
Nicolas Capens157ba262019-12-10 17:49:14 -0500975 }
976}
977
978Int4 Blitter::readInt4(Pointer<Byte> element, const State &state)
979{
980 Int4 c(0, 0, 0, 1);
981
982 switch(state.sourceFormat)
983 {
Ben Claytonfccfc562019-12-17 20:37:31 +0000984 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
985 case VK_FORMAT_R8G8B8A8_SINT:
986 c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
987 c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
Nicolas Capens0405ba02020-01-16 01:19:21 -0500988 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000989 case VK_FORMAT_R8G8_SINT:
990 c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
Nicolas Capens0405ba02020-01-16 01:19:21 -0500991 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +0000992 case VK_FORMAT_R8_SINT:
993 c = Insert(c, Int(*Pointer<SByte>(element)), 0);
994 break;
995 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
996 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000003FF))), 0);
997 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10), 1);
998 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20), 2);
999 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30), 3);
1000 break;
Alexis Hetub8a61bf2020-01-09 15:26:34 -05001001 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1002 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000003FF))), 2);
1003 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10), 1);
1004 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20), 0);
1005 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30), 3);
1006 break;
Ben Claytonfccfc562019-12-17 20:37:31 +00001007 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1008 case VK_FORMAT_R8G8B8A8_UINT:
1009 c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
1010 c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001011 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001012 case VK_FORMAT_R8G8_UINT:
1013 c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001014 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001015 case VK_FORMAT_R8_UINT:
1016 case VK_FORMAT_S8_UINT:
1017 c = Insert(c, Int(*Pointer<Byte>(element)), 0);
1018 break;
1019 case VK_FORMAT_R16G16B16A16_SINT:
1020 c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
1021 c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001022 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001023 case VK_FORMAT_R16G16_SINT:
1024 c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001025 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001026 case VK_FORMAT_R16_SINT:
1027 c = Insert(c, Int(*Pointer<Short>(element)), 0);
1028 break;
1029 case VK_FORMAT_R16G16B16A16_UINT:
1030 c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
1031 c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001032 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001033 case VK_FORMAT_R16G16_UINT:
1034 c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001035 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001036 case VK_FORMAT_R16_UINT:
1037 c = Insert(c, Int(*Pointer<UShort>(element)), 0);
1038 break;
1039 case VK_FORMAT_R32G32B32A32_SINT:
1040 case VK_FORMAT_R32G32B32A32_UINT:
1041 c = *Pointer<Int4>(element);
1042 break;
1043 case VK_FORMAT_R32G32_SINT:
1044 case VK_FORMAT_R32G32_UINT:
1045 c = Insert(c, *Pointer<Int>(element + 4), 1);
Nicolas Capens0405ba02020-01-16 01:19:21 -05001046 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001047 case VK_FORMAT_R32_SINT:
1048 case VK_FORMAT_R32_UINT:
1049 c = Insert(c, *Pointer<Int>(element), 0);
1050 break;
1051 default:
1052 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -05001053 }
1054
1055 return c;
1056}
1057
1058void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
1059{
1060 bool writeR = state.writeRed;
1061 bool writeG = state.writeGreen;
1062 bool writeB = state.writeBlue;
1063 bool writeA = state.writeAlpha;
1064 bool writeRGBA = writeR && writeG && writeB && writeA;
1065
1066 switch(state.destFormat)
1067 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001068 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
Alexis Hetub8a61bf2020-01-09 15:26:34 -05001069 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
Ben Claytonfccfc562019-12-17 20:37:31 +00001070 c = Min(As<UInt4>(c), UInt4(0x03FF, 0x03FF, 0x03FF, 0x0003));
1071 break;
1072 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1073 case VK_FORMAT_R8G8B8A8_UINT:
1074 case VK_FORMAT_R8G8B8_UINT:
1075 case VK_FORMAT_R8G8_UINT:
1076 case VK_FORMAT_R8_UINT:
1077 case VK_FORMAT_R8G8B8A8_USCALED:
1078 case VK_FORMAT_R8G8B8_USCALED:
1079 case VK_FORMAT_R8G8_USCALED:
1080 case VK_FORMAT_R8_USCALED:
1081 case VK_FORMAT_S8_UINT:
1082 c = Min(As<UInt4>(c), UInt4(0xFF));
1083 break;
1084 case VK_FORMAT_R16G16B16A16_UINT:
1085 case VK_FORMAT_R16G16B16_UINT:
1086 case VK_FORMAT_R16G16_UINT:
1087 case VK_FORMAT_R16_UINT:
1088 case VK_FORMAT_R16G16B16A16_USCALED:
1089 case VK_FORMAT_R16G16B16_USCALED:
1090 case VK_FORMAT_R16G16_USCALED:
1091 case VK_FORMAT_R16_USCALED:
1092 c = Min(As<UInt4>(c), UInt4(0xFFFF));
1093 break;
1094 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1095 case VK_FORMAT_R8G8B8A8_SINT:
1096 case VK_FORMAT_R8G8_SINT:
1097 case VK_FORMAT_R8_SINT:
1098 case VK_FORMAT_R8G8B8A8_SSCALED:
1099 case VK_FORMAT_R8G8B8_SSCALED:
1100 case VK_FORMAT_R8G8_SSCALED:
1101 case VK_FORMAT_R8_SSCALED:
1102 c = Min(Max(c, Int4(-0x80)), Int4(0x7F));
1103 break;
1104 case VK_FORMAT_R16G16B16A16_SINT:
1105 case VK_FORMAT_R16G16B16_SINT:
1106 case VK_FORMAT_R16G16_SINT:
1107 case VK_FORMAT_R16_SINT:
1108 case VK_FORMAT_R16G16B16A16_SSCALED:
1109 case VK_FORMAT_R16G16B16_SSCALED:
1110 case VK_FORMAT_R16G16_SSCALED:
1111 case VK_FORMAT_R16_SSCALED:
1112 c = Min(Max(c, Int4(-0x8000)), Int4(0x7FFF));
1113 break;
1114 default:
1115 break;
Nicolas Capens157ba262019-12-10 17:49:14 -05001116 }
1117
1118 switch(state.destFormat)
1119 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001120 case VK_FORMAT_B8G8R8A8_SINT:
1121 case VK_FORMAT_B8G8R8A8_SSCALED:
1122 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001123 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001124 case VK_FORMAT_B8G8R8_SINT:
1125 case VK_FORMAT_B8G8R8_SSCALED:
1126 if(writeB) { *Pointer<SByte>(element) = SByte(Extract(c, 2)); }
1127 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
1128 if(writeR) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 0)); }
1129 break;
1130 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1131 case VK_FORMAT_R8G8B8A8_SINT:
1132 case VK_FORMAT_R8G8B8A8_SSCALED:
1133 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
1134 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001135 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001136 case VK_FORMAT_R8G8B8_SINT:
1137 case VK_FORMAT_R8G8B8_SSCALED:
1138 if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001139 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001140 case VK_FORMAT_R8G8_SINT:
1141 case VK_FORMAT_R8G8_SSCALED:
1142 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001143 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001144 case VK_FORMAT_R8_SINT:
1145 case VK_FORMAT_R8_SSCALED:
1146 if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
1147 break;
1148 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1149 case VK_FORMAT_A2B10G10R10_SINT_PACK32:
1150 case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
1151 case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
1152 if(writeRGBA)
1153 {
Alexis Hetu3716c202019-12-19 17:09:08 -05001154 *Pointer<UInt>(element) = As<UInt>(PackFields(c, { 0, 10, 20, 30 }));
Ben Claytonfccfc562019-12-17 20:37:31 +00001155 }
1156 else
1157 {
1158 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1159 (writeB ? 0x3FF00000 : 0x0000) |
1160 (writeG ? 0x000FFC00 : 0x0000) |
1161 (writeR ? 0x000003FF : 0x0000);
1162 unsigned int unmask = ~mask;
1163 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -05001164 (As<UInt>(PackFields(c, { 0, 10, 20, 30 })) & UInt(mask));
Ben Claytonfccfc562019-12-17 20:37:31 +00001165 }
1166 break;
1167 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1168 case VK_FORMAT_A2R10G10B10_SINT_PACK32:
1169 case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
1170 case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
1171 if(writeRGBA)
1172 {
Alexis Hetu3716c202019-12-19 17:09:08 -05001173 *Pointer<UInt>(element) = As<UInt>(PackFields(c, { 20, 10, 0, 30 }));
Ben Claytonfccfc562019-12-17 20:37:31 +00001174 }
1175 else
1176 {
1177 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1178 (writeR ? 0x3FF00000 : 0x0000) |
1179 (writeG ? 0x000FFC00 : 0x0000) |
1180 (writeB ? 0x000003FF : 0x0000);
1181 unsigned int unmask = ~mask;
1182 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
Alexis Hetu3716c202019-12-19 17:09:08 -05001183 (As<UInt>(PackFields(c, { 20, 10, 0, 30 })) & UInt(mask));
Ben Claytonfccfc562019-12-17 20:37:31 +00001184 }
1185 break;
1186 case VK_FORMAT_B8G8R8A8_UINT:
1187 case VK_FORMAT_B8G8R8A8_USCALED:
1188 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001189 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001190 case VK_FORMAT_B8G8R8_UINT:
1191 case VK_FORMAT_B8G8R8_USCALED:
1192 case VK_FORMAT_B8G8R8_SRGB:
1193 if(writeB) { *Pointer<Byte>(element) = Byte(Extract(c, 2)); }
1194 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
1195 if(writeR) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 0)); }
1196 break;
1197 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1198 case VK_FORMAT_R8G8B8A8_UINT:
1199 case VK_FORMAT_R8G8B8A8_USCALED:
1200 case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
1201 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001202 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001203 case VK_FORMAT_R8G8B8_UINT:
1204 case VK_FORMAT_R8G8B8_USCALED:
1205 if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001206 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001207 case VK_FORMAT_R8G8_UINT:
1208 case VK_FORMAT_R8G8_USCALED:
1209 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001210 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001211 case VK_FORMAT_R8_UINT:
1212 case VK_FORMAT_R8_USCALED:
1213 case VK_FORMAT_S8_UINT:
1214 if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
1215 break;
1216 case VK_FORMAT_R16G16B16A16_SINT:
1217 case VK_FORMAT_R16G16B16A16_SSCALED:
1218 if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001219 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001220 case VK_FORMAT_R16G16B16_SINT:
1221 case VK_FORMAT_R16G16B16_SSCALED:
1222 if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001223 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001224 case VK_FORMAT_R16G16_SINT:
1225 case VK_FORMAT_R16G16_SSCALED:
1226 if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001227 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001228 case VK_FORMAT_R16_SINT:
1229 case VK_FORMAT_R16_SSCALED:
1230 if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
1231 break;
1232 case VK_FORMAT_R16G16B16A16_UINT:
1233 case VK_FORMAT_R16G16B16A16_USCALED:
1234 if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001235 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001236 case VK_FORMAT_R16G16B16_UINT:
1237 case VK_FORMAT_R16G16B16_USCALED:
1238 if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001239 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001240 case VK_FORMAT_R16G16_UINT:
1241 case VK_FORMAT_R16G16_USCALED:
1242 if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001243 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001244 case VK_FORMAT_R16_UINT:
1245 case VK_FORMAT_R16_USCALED:
1246 if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
1247 break;
1248 case VK_FORMAT_R32G32B32A32_SINT:
1249 if(writeRGBA)
1250 {
1251 *Pointer<Int4>(element) = c;
1252 }
1253 else
1254 {
1255 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1256 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1257 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
1258 if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
1259 }
1260 break;
1261 case VK_FORMAT_R32G32B32_SINT:
Nicolas Capens157ba262019-12-10 17:49:14 -05001262 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1263 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1264 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
Ben Claytonfccfc562019-12-17 20:37:31 +00001265 break;
1266 case VK_FORMAT_R32G32_SINT:
1267 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1268 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1269 break;
1270 case VK_FORMAT_R32_SINT:
1271 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1272 break;
1273 case VK_FORMAT_R32G32B32A32_UINT:
1274 if(writeRGBA)
1275 {
1276 *Pointer<UInt4>(element) = As<UInt4>(c);
1277 }
1278 else
1279 {
1280 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1281 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
1282 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
1283 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
1284 }
1285 break;
1286 case VK_FORMAT_R32G32B32_UINT:
Nicolas Capens157ba262019-12-10 17:49:14 -05001287 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001288 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001289 case VK_FORMAT_R32G32_UINT:
1290 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
Nicolas Capens0405ba02020-01-16 01:19:21 -05001291 // [[fallthrough]]
Ben Claytonfccfc562019-12-17 20:37:31 +00001292 case VK_FORMAT_R32_UINT:
1293 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1294 break;
1295 default:
1296 UNSUPPORTED("Blitter destination format %d", (int)state.destFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -05001297 }
1298}
1299
1300void Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled)
1301{
1302 float4 scale{}, unscale{};
1303
1304 if(state.clearOperation &&
Nicolas Capens9d9f30d2020-01-12 03:26:18 -05001305 state.sourceFormat.isUnnormalizedInteger() &&
1306 !state.destFormat.isUnnormalizedInteger())
Nicolas Capens157ba262019-12-10 17:49:14 -05001307 {
1308 // If we're clearing a buffer from an int or uint color into a normalized color,
1309 // then the whole range of the int or uint color must be scaled between 0 and 1.
1310 switch(state.sourceFormat)
1311 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001312 case VK_FORMAT_R32G32B32A32_SINT:
1313 unscale = float4(static_cast<float>(0x7FFFFFFF));
1314 break;
1315 case VK_FORMAT_R32G32B32A32_UINT:
1316 unscale = float4(static_cast<float>(0xFFFFFFFF));
1317 break;
1318 default:
1319 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens157ba262019-12-10 17:49:14 -05001320 }
1321 }
1322 else
1323 {
1324 unscale = state.sourceFormat.getScale();
1325 }
1326
1327 scale = state.destFormat.getScale();
1328
1329 bool srcSRGB = state.sourceFormat.isSRGBformat();
1330 bool dstSRGB = state.destFormat.isSRGBformat();
1331
Ben Claytonfccfc562019-12-17 20:37:31 +00001332 if(state.allowSRGBConversion && ((srcSRGB && !preScaled) || dstSRGB)) // One of the formats is sRGB encoded.
Nicolas Capens157ba262019-12-10 17:49:14 -05001333 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001334 value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale
1335 Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale
Nicolas Capens157ba262019-12-10 17:49:14 -05001336 value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value);
Ben Claytonfccfc562019-12-17 20:37:31 +00001337 value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale
Nicolas Capens157ba262019-12-10 17:49:14 -05001338 }
1339 else if(unscale != scale)
1340 {
1341 value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
1342 }
1343
1344 if(state.sourceFormat.isFloatFormat() && !state.destFormat.isFloatFormat())
1345 {
1346 value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
1347
1348 value = Max(value, Float4(state.destFormat.isUnsignedComponent(0) ? 0.0f : -scale.x,
1349 state.destFormat.isUnsignedComponent(1) ? 0.0f : -scale.y,
1350 state.destFormat.isUnsignedComponent(2) ? 0.0f : -scale.z,
1351 state.destFormat.isUnsignedComponent(3) ? 0.0f : -scale.w));
1352 }
1353}
1354
1355Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes)
1356{
1357 return y * pitchB + x * bytes;
1358}
1359
Alexis Hetu18daa812020-03-11 17:06:53 -04001360Int Blitter::ComputeOffset(Int &x, Int &y, Int &z, Int &sliceB, Int &pitchB, int bytes)
1361{
1362 return z * sliceB + y * pitchB + x * bytes;
1363}
1364
Nicolas Capens2883de92020-01-27 14:58:14 -05001365Float4 Blitter::LinearToSRGB(const Float4 &c)
Nicolas Capens157ba262019-12-10 17:49:14 -05001366{
1367 Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f);
1368 Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f);
1369
1370 Float4 s = c;
1371 s.xyz = Max(lc, ec);
1372
1373 return s;
1374}
1375
Nicolas Capens2883de92020-01-27 14:58:14 -05001376Float4 Blitter::sRGBtoLinear(const Float4 &c)
Nicolas Capens157ba262019-12-10 17:49:14 -05001377{
1378 Float4 lc = c * Float4(1.0f / 12.92f);
1379 Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f));
1380
1381 Int4 linear = CmpLT(c, Float4(0.04045f));
1382
1383 Float4 s = c;
Ben Claytonfccfc562019-12-17 20:37:31 +00001384 s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec))); // TODO: IfThenElse()
Nicolas Capens157ba262019-12-10 17:49:14 -05001385
1386 return s;
1387}
1388
Alexis Hetu18daa812020-03-11 17:06:53 -04001389Float4 Blitter::sample(Pointer<Byte> &source, Float &x, Float &y, Float &z,
1390 Int &sWidth, Int &sHeight, Int &sDepth,
1391 Int &sSliceB, Int &sPitchB, const State &state)
1392{
1393 bool intSrc = state.sourceFormat.isUnnormalizedInteger();
1394 int srcBytes = state.sourceFormat.bytes();
1395
1396 Float4 color;
1397
1398 bool preScaled = false;
1399 if(!state.filter || intSrc)
1400 {
1401 Int X = Int(x);
1402 Int Y = Int(y);
1403 Int Z = Int(z);
1404
1405 if(state.clampToEdge)
1406 {
1407 X = Clamp(X, 0, sWidth - 1);
1408 Y = Clamp(Y, 0, sHeight - 1);
1409 Z = Clamp(Z, 0, sDepth - 1);
1410 }
1411
1412 Pointer<Byte> s = source + ComputeOffset(X, Y, Z, sSliceB, sPitchB, srcBytes);
1413
1414 color = readFloat4(s, state);
1415
1416 if(state.srcSamples > 1) // Resolve multisampled source
1417 {
1418 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
1419 {
1420 ApplyScaleAndClamp(color, state);
1421 preScaled = true;
1422 }
1423 Float4 accum = color;
1424 for(int sample = 1; sample < state.srcSamples; sample++)
1425 {
1426 s += sSliceB;
1427 color = readFloat4(s, state);
1428
1429 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
1430 {
1431 ApplyScaleAndClamp(color, state);
1432 preScaled = true;
1433 }
1434 accum += color;
1435 }
1436 color = accum * Float4(1.0f / static_cast<float>(state.srcSamples));
1437 }
1438 }
1439 else // Bilinear filtering
1440 {
1441 Float X = x;
1442 Float Y = y;
1443 Float Z = z;
1444
1445 if(state.clampToEdge)
1446 {
1447 X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f);
1448 Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f);
1449 Z = Min(Max(z, 0.5f), Float(sDepth) - 0.5f);
1450 }
1451
1452 Float x0 = X - 0.5f;
1453 Float y0 = Y - 0.5f;
1454 Float z0 = Z - 0.5f;
1455
1456 Int X0 = Max(Int(x0), 0);
1457 Int Y0 = Max(Int(y0), 0);
1458 Int Z0 = Max(Int(z0), 0);
1459
1460 Int X1 = X0 + 1;
1461 Int Y1 = Y0 + 1;
1462 X1 = IfThenElse(X1 >= sWidth, X0, X1);
1463 Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1);
1464
1465 if(state.filter3D)
1466 {
1467 Int Z1 = Z0 + 1;
1468 Z1 = IfThenElse(Z1 >= sHeight, Z0, Z1);
1469
1470 Pointer<Byte> s000 = source + ComputeOffset(X0, Y0, Z0, sSliceB, sPitchB, srcBytes);
1471 Pointer<Byte> s010 = source + ComputeOffset(X1, Y0, Z0, sSliceB, sPitchB, srcBytes);
1472 Pointer<Byte> s100 = source + ComputeOffset(X0, Y1, Z0, sSliceB, sPitchB, srcBytes);
1473 Pointer<Byte> s110 = source + ComputeOffset(X1, Y1, Z0, sSliceB, sPitchB, srcBytes);
1474 Pointer<Byte> s001 = source + ComputeOffset(X0, Y0, Z1, sSliceB, sPitchB, srcBytes);
1475 Pointer<Byte> s011 = source + ComputeOffset(X1, Y0, Z1, sSliceB, sPitchB, srcBytes);
1476 Pointer<Byte> s101 = source + ComputeOffset(X0, Y1, Z1, sSliceB, sPitchB, srcBytes);
1477 Pointer<Byte> s111 = source + ComputeOffset(X1, Y1, Z1, sSliceB, sPitchB, srcBytes);
1478
1479 Float4 c000 = readFloat4(s000, state);
1480 Float4 c010 = readFloat4(s010, state);
1481 Float4 c100 = readFloat4(s100, state);
1482 Float4 c110 = readFloat4(s110, state);
1483 Float4 c001 = readFloat4(s001, state);
1484 Float4 c011 = readFloat4(s011, state);
1485 Float4 c101 = readFloat4(s101, state);
1486 Float4 c111 = readFloat4(s111, state);
1487
1488 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
1489 {
1490 ApplyScaleAndClamp(c000, state);
1491 ApplyScaleAndClamp(c010, state);
1492 ApplyScaleAndClamp(c100, state);
1493 ApplyScaleAndClamp(c110, state);
1494 ApplyScaleAndClamp(c001, state);
1495 ApplyScaleAndClamp(c011, state);
1496 ApplyScaleAndClamp(c101, state);
1497 ApplyScaleAndClamp(c111, state);
1498 preScaled = true;
1499 }
1500
1501 Float4 fx = Float4(x0 - Float(X0));
1502 Float4 fy = Float4(y0 - Float(Y0));
1503 Float4 fz = Float4(z0 - Float(Z0));
1504 Float4 ix = Float4(1.0f) - fx;
1505 Float4 iy = Float4(1.0f) - fy;
1506 Float4 iz = Float4(1.0f) - fz;
1507
1508 color = ((c000 * ix + c010 * fx) * iy +
1509 (c100 * ix + c110 * fx) * fy) *
1510 iz +
1511 ((c001 * ix + c011 * fx) * iy +
1512 (c101 * ix + c111 * fx) * fy) *
1513 fz;
1514 }
1515 else
1516 {
1517 Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, Z0, sSliceB, sPitchB, srcBytes);
1518 Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, Z0, sSliceB, sPitchB, srcBytes);
1519 Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, Z0, sSliceB, sPitchB, srcBytes);
1520 Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, Z0, sSliceB, sPitchB, srcBytes);
1521
1522 Float4 c00 = readFloat4(s00, state);
1523 Float4 c01 = readFloat4(s01, state);
1524 Float4 c10 = readFloat4(s10, state);
1525 Float4 c11 = readFloat4(s11, state);
1526
1527 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
1528 {
1529 ApplyScaleAndClamp(c00, state);
1530 ApplyScaleAndClamp(c01, state);
1531 ApplyScaleAndClamp(c10, state);
1532 ApplyScaleAndClamp(c11, state);
1533 preScaled = true;
1534 }
1535
1536 Float4 fx = Float4(x0 - Float(X0));
1537 Float4 fy = Float4(y0 - Float(Y0));
1538 Float4 ix = Float4(1.0f) - fx;
1539 Float4 iy = Float4(1.0f) - fy;
1540
1541 color = (c00 * ix + c01 * fx) * iy +
1542 (c10 * ix + c11 * fx) * fy;
1543 }
1544 }
1545
1546 ApplyScaleAndClamp(color, state, preScaled);
1547
1548 return color;
1549}
1550
Nicolas Capens157ba262019-12-10 17:49:14 -05001551Blitter::BlitRoutineType Blitter::generate(const State &state)
1552{
1553 BlitFunction function;
1554 {
1555 Pointer<Byte> blit(function.Arg<0>());
1556
Ben Claytonfccfc562019-12-17 20:37:31 +00001557 Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData, source));
1558 Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData, dest));
1559 Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData, sPitchB));
1560 Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData, dPitchB));
Alexis Hetu18daa812020-03-11 17:06:53 -04001561 Int sSliceB = *Pointer<Int>(blit + OFFSET(BlitData, sSliceB));
1562 Int dSliceB = *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
Nicolas Capens157ba262019-12-10 17:49:14 -05001563
Ben Claytonfccfc562019-12-17 20:37:31 +00001564 Float x0 = *Pointer<Float>(blit + OFFSET(BlitData, x0));
1565 Float y0 = *Pointer<Float>(blit + OFFSET(BlitData, y0));
Alexis Hetu18daa812020-03-11 17:06:53 -04001566 Float z0 = *Pointer<Float>(blit + OFFSET(BlitData, z0));
Ben Claytonfccfc562019-12-17 20:37:31 +00001567 Float w = *Pointer<Float>(blit + OFFSET(BlitData, w));
1568 Float h = *Pointer<Float>(blit + OFFSET(BlitData, h));
Alexis Hetu18daa812020-03-11 17:06:53 -04001569 Float d = *Pointer<Float>(blit + OFFSET(BlitData, d));
Nicolas Capens157ba262019-12-10 17:49:14 -05001570
Ben Claytonfccfc562019-12-17 20:37:31 +00001571 Int x0d = *Pointer<Int>(blit + OFFSET(BlitData, x0d));
1572 Int x1d = *Pointer<Int>(blit + OFFSET(BlitData, x1d));
1573 Int y0d = *Pointer<Int>(blit + OFFSET(BlitData, y0d));
1574 Int y1d = *Pointer<Int>(blit + OFFSET(BlitData, y1d));
Alexis Hetu18daa812020-03-11 17:06:53 -04001575 Int z0d = *Pointer<Int>(blit + OFFSET(BlitData, z0d));
1576 Int z1d = *Pointer<Int>(blit + OFFSET(BlitData, z1d));
Nicolas Capens157ba262019-12-10 17:49:14 -05001577
Ben Claytonfccfc562019-12-17 20:37:31 +00001578 Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData, sWidth));
1579 Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData, sHeight));
Alexis Hetu18daa812020-03-11 17:06:53 -04001580 Int sDepth = *Pointer<Int>(blit + OFFSET(BlitData, sDepth));
Nicolas Capens157ba262019-12-10 17:49:14 -05001581
Nicolas Capens9d9f30d2020-01-12 03:26:18 -05001582 bool intSrc = state.sourceFormat.isUnnormalizedInteger();
1583 bool intDst = state.destFormat.isUnnormalizedInteger();
Nicolas Capens157ba262019-12-10 17:49:14 -05001584 bool intBoth = intSrc && intDst;
1585 int srcBytes = state.sourceFormat.bytes();
1586 int dstBytes = state.destFormat.bytes();
1587
1588 bool hasConstantColorI = false;
1589 Int4 constantColorI;
1590 bool hasConstantColorF = false;
1591 Float4 constantColorF;
1592 if(state.clearOperation)
1593 {
Ben Claytonfccfc562019-12-17 20:37:31 +00001594 if(intBoth) // Integer types
Nicolas Capens157ba262019-12-10 17:49:14 -05001595 {
1596 constantColorI = readInt4(source, state);
1597 hasConstantColorI = true;
1598 }
1599 else
1600 {
1601 constantColorF = readFloat4(source, state);
1602 hasConstantColorF = true;
1603
1604 ApplyScaleAndClamp(constantColorF, state);
1605 }
1606 }
1607
Alexis Hetu18daa812020-03-11 17:06:53 -04001608 For(Int k = z0d, k < z1d, k++)
Nicolas Capens157ba262019-12-10 17:49:14 -05001609 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001610 Float z = state.clearOperation ? RValue<Float>(z0) : z0 + Float(k) * d;
1611 Pointer<Byte> destSlice = dest + k * dSliceB;
Nicolas Capens157ba262019-12-10 17:49:14 -05001612
Alexis Hetu18daa812020-03-11 17:06:53 -04001613 For(Int j = y0d, j < y1d, j++)
Nicolas Capens157ba262019-12-10 17:49:14 -05001614 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001615 Float y = state.clearOperation ? RValue<Float>(y0) : y0 + Float(j) * h;
1616 Pointer<Byte> destLine = destSlice + j * dPitchB;
Nicolas Capens157ba262019-12-10 17:49:14 -05001617
Alexis Hetu18daa812020-03-11 17:06:53 -04001618 For(Int i = x0d, i < x1d, i++)
Nicolas Capens157ba262019-12-10 17:49:14 -05001619 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001620 Float x = state.clearOperation ? RValue<Float>(x0) : x0 + Float(i) * w;
1621 Pointer<Byte> d = destLine + i * dstBytes;
1622
1623 if(hasConstantColorI)
Nicolas Capens157ba262019-12-10 17:49:14 -05001624 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001625 for(int s = 0; s < state.destSamples; s++)
1626 {
1627 write(constantColorI, d, state);
Nicolas Capens157ba262019-12-10 17:49:14 -05001628
Alexis Hetu18daa812020-03-11 17:06:53 -04001629 d += dSliceB;
1630 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001631 }
Alexis Hetu18daa812020-03-11 17:06:53 -04001632 else if(hasConstantColorF)
Nicolas Capens157ba262019-12-10 17:49:14 -05001633 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001634 for(int s = 0; s < state.destSamples; s++)
1635 {
1636 write(constantColorF, d, state);
Nicolas Capens157ba262019-12-10 17:49:14 -05001637
Alexis Hetu18daa812020-03-11 17:06:53 -04001638 d += dSliceB;
1639 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001640 }
Alexis Hetu18daa812020-03-11 17:06:53 -04001641 else if(intBoth) // Integer types do not support filtering
Nicolas Capens68a82382018-10-02 13:16:55 -04001642 {
Nicolas Capens68a82382018-10-02 13:16:55 -04001643 Int X = Int(x);
1644 Int Y = Int(y);
Alexis Hetu18daa812020-03-11 17:06:53 -04001645 Int Z = Int(z);
Nicolas Capens68a82382018-10-02 13:16:55 -04001646
1647 if(state.clampToEdge)
1648 {
1649 X = Clamp(X, 0, sWidth - 1);
1650 Y = Clamp(Y, 0, sHeight - 1);
Alexis Hetu18daa812020-03-11 17:06:53 -04001651 Z = Clamp(Z, 0, sDepth - 1);
Nicolas Capens68a82382018-10-02 13:16:55 -04001652 }
1653
Alexis Hetu18daa812020-03-11 17:06:53 -04001654 Pointer<Byte> s = source + ComputeOffset(X, Y, Z, sSliceB, sPitchB, srcBytes);
Nicolas Capens68a82382018-10-02 13:16:55 -04001655
Alexis Hetu18daa812020-03-11 17:06:53 -04001656 // When both formats are true integer types, we don't go to float to avoid losing precision
1657 Int4 color = readInt4(s, state);
1658 for(int s = 0; s < state.destSamples; s++)
Alexis Hetuf8df30f2019-10-23 18:03:21 -04001659 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001660 write(color, d, state);
Nicolas Capens157ba262019-12-10 17:49:14 -05001661
Alexis Hetu18daa812020-03-11 17:06:53 -04001662 d += dSliceB;
Nicolas Capens68a82382018-10-02 13:16:55 -04001663 }
Nicolas Capens157ba262019-12-10 17:49:14 -05001664 }
Alexis Hetu18daa812020-03-11 17:06:53 -04001665 else
Nicolas Capens157ba262019-12-10 17:49:14 -05001666 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001667 Float4 color = sample(source, x, y, z, sWidth, sHeight, sDepth, sSliceB, sPitchB, state);
Nicolas Capens157ba262019-12-10 17:49:14 -05001668
Alexis Hetu18daa812020-03-11 17:06:53 -04001669 for(int s = 0; s < state.destSamples; s++)
Nicolas Capens68a82382018-10-02 13:16:55 -04001670 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001671 write(color, d, state);
1672
1673 d += dSliceB;
Nicolas Capens68a82382018-10-02 13:16:55 -04001674 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001675 }
1676 }
1677 }
1678 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001679 }
1680
Nicolas Capens157ba262019-12-10 17:49:14 -05001681 return function("BlitRoutine");
1682}
1683
1684Blitter::BlitRoutineType Blitter::getBlitRoutine(const State &state)
1685{
Ben Clayton377573c2020-04-03 20:36:40 +01001686 marl::lock lock(blitMutex);
Ben Claytonac43aa72020-04-04 00:48:13 +01001687 auto blitRoutine = blitCache.lookup(state);
Nicolas Capens157ba262019-12-10 17:49:14 -05001688
1689 if(!blitRoutine)
Alexis Hetu33642272019-03-01 11:55:59 -05001690 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001691 blitRoutine = generate(state);
1692 blitCache.add(state, blitRoutine);
Alexis Hetu33642272019-03-01 11:55:59 -05001693 }
1694
Nicolas Capens157ba262019-12-10 17:49:14 -05001695 return blitRoutine;
1696}
1697
1698Blitter::CornerUpdateRoutineType Blitter::getCornerUpdateRoutine(const State &state)
1699{
Ben Clayton377573c2020-04-03 20:36:40 +01001700 marl::lock lock(cornerUpdateMutex);
Ben Claytonac43aa72020-04-04 00:48:13 +01001701 auto cornerUpdateRoutine = cornerUpdateCache.lookup(state);
Nicolas Capens157ba262019-12-10 17:49:14 -05001702
1703 if(!cornerUpdateRoutine)
Alexis Hetuf60a2d52019-05-09 14:16:05 -04001704 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001705 cornerUpdateRoutine = generateCornerUpdate(state);
1706 cornerUpdateCache.add(state, cornerUpdateRoutine);
Alexis Hetuf60a2d52019-05-09 14:16:05 -04001707 }
1708
Nicolas Capens157ba262019-12-10 17:49:14 -05001709 return cornerUpdateRoutine;
1710}
1711
1712void Blitter::blitToBuffer(const vk::Image *src, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *dst, int bufferRowPitch, int bufferSlicePitch)
1713{
1714 auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
1715 auto format = src->getFormat(aspect);
Ben Claytonfccfc562019-12-17 20:37:31 +00001716 State state(format, format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT, Options{ false, false });
Nicolas Capens157ba262019-12-10 17:49:14 -05001717
1718 auto blitRoutine = getBlitRoutine(state);
1719 if(!blitRoutine)
Chris Forbes529eda32019-05-08 10:27:05 -07001720 {
Nicolas Capens157ba262019-12-10 17:49:14 -05001721 return;
Chris Forbes529eda32019-05-08 10:27:05 -07001722 }
1723
Ben Claytonfccfc562019-12-17 20:37:31 +00001724 BlitData data = {
1725 nullptr, // source
1726 dst, // dest
1727 src->rowPitchBytes(aspect, subresource.mipLevel), // sPitchB
1728 bufferRowPitch, // dPitchB
1729 src->slicePitchBytes(aspect, subresource.mipLevel), // sSliceB
1730 bufferSlicePitch, // dSliceB
Chris Forbes529eda32019-05-08 10:27:05 -07001731
Alexis Hetu18daa812020-03-11 17:06:53 -04001732 0.0f, 0.0f, 0.0f, 1.0f, 1.0f, 1.0f,
Chris Forbes529eda32019-05-08 10:27:05 -07001733
Ben Claytonfccfc562019-12-17 20:37:31 +00001734 0, // x0d
1735 static_cast<int>(extent.width), // x1d
Alexis Hetu18daa812020-03-11 17:06:53 -04001736 0, // y0d
1737 static_cast<int>(extent.height), // y1d
1738 0, // z0d
1739 static_cast<int>(extent.depth), // z1d
Chris Forbes529eda32019-05-08 10:27:05 -07001740
Alexis Hetu18daa812020-03-11 17:06:53 -04001741 static_cast<int>(extent.width), // sWidth
1742 static_cast<int>(extent.height), // sHeight
1743 static_cast<int>(extent.depth), // sDepth
Ben Clayton21fb75f2020-04-16 10:36:55 +01001744
1745 false, // filter3D
Nicolas Capens157ba262019-12-10 17:49:14 -05001746 };
Chris Forbes529eda32019-05-08 10:27:05 -07001747
Nicolas Capens157ba262019-12-10 17:49:14 -05001748 VkImageSubresourceLayers srcSubresLayers = subresource;
1749 srcSubresLayers.layerCount = 1;
Chris Forbes529eda32019-05-08 10:27:05 -07001750
Ben Claytonfccfc562019-12-17 20:37:31 +00001751 VkImageSubresourceRange srcSubresRange = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001752 subresource.aspectMask,
1753 subresource.mipLevel,
1754 1,
1755 subresource.baseArrayLayer,
1756 subresource.layerCount
1757 };
Alexis Hetu33642272019-03-01 11:55:59 -05001758
Nicolas Capens157ba262019-12-10 17:49:14 -05001759 uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange);
Alexis Hetu33642272019-03-01 11:55:59 -05001760
Nicolas Capens157ba262019-12-10 17:49:14 -05001761 for(; srcSubresLayers.baseArrayLayer <= lastLayer; srcSubresLayers.baseArrayLayer++)
Alexis Hetub317d962019-04-29 14:07:31 -04001762 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001763 data.source = src->getTexelPointer({ 0, 0, 0 }, srcSubresLayers);
1764 ASSERT(data.source < src->end());
1765 blitRoutine(&data);
Alexis Hetub317d962019-04-29 14:07:31 -04001766 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001767}
Nicolas Capens157ba262019-12-10 17:49:14 -05001768
1769void Blitter::blitFromBuffer(const vk::Image *dst, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *src, int bufferRowPitch, int bufferSlicePitch)
1770{
1771 auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
1772 auto format = dst->getFormat(aspect);
Ben Claytonfccfc562019-12-17 20:37:31 +00001773 State state(format, format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT, Options{ false, false });
Nicolas Capens157ba262019-12-10 17:49:14 -05001774
1775 auto blitRoutine = getBlitRoutine(state);
1776 if(!blitRoutine)
1777 {
1778 return;
1779 }
1780
Ben Claytonfccfc562019-12-17 20:37:31 +00001781 BlitData data = {
1782 src, // source
1783 nullptr, // dest
1784 bufferRowPitch, // sPitchB
1785 dst->rowPitchBytes(aspect, subresource.mipLevel), // dPitchB
1786 bufferSlicePitch, // sSliceB
1787 dst->slicePitchBytes(aspect, subresource.mipLevel), // dSliceB
Nicolas Capens157ba262019-12-10 17:49:14 -05001788
Ben Claytonfccfc562019-12-17 20:37:31 +00001789 static_cast<float>(-offset.x), // x0
1790 static_cast<float>(-offset.y), // y0
Alexis Hetu18daa812020-03-11 17:06:53 -04001791 static_cast<float>(-offset.z), // z0
Ben Claytonfccfc562019-12-17 20:37:31 +00001792 1.0f, // w
1793 1.0f, // h
Alexis Hetu18daa812020-03-11 17:06:53 -04001794 1.0f, // d
Nicolas Capens157ba262019-12-10 17:49:14 -05001795
Ben Claytonfccfc562019-12-17 20:37:31 +00001796 offset.x, // x0d
1797 static_cast<int>(offset.x + extent.width), // x1d
Alexis Hetu18daa812020-03-11 17:06:53 -04001798 offset.y, // y0d
1799 static_cast<int>(offset.y + extent.height), // y1d
1800 offset.z, // z0d
1801 static_cast<int>(offset.z + extent.depth), // z1d
Nicolas Capens157ba262019-12-10 17:49:14 -05001802
Alexis Hetu18daa812020-03-11 17:06:53 -04001803 static_cast<int>(extent.width), // sWidth
1804 static_cast<int>(extent.height), // sHeight;
1805 static_cast<int>(extent.depth), // sDepth;
Ben Clayton21fb75f2020-04-16 10:36:55 +01001806
1807 false, // filter3D
Nicolas Capens157ba262019-12-10 17:49:14 -05001808 };
1809
Nicolas Capens157ba262019-12-10 17:49:14 -05001810 VkImageSubresourceLayers dstSubresLayers = subresource;
1811 dstSubresLayers.layerCount = 1;
1812
Ben Claytonfccfc562019-12-17 20:37:31 +00001813 VkImageSubresourceRange dstSubresRange = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001814 subresource.aspectMask,
1815 subresource.mipLevel,
1816 1,
1817 subresource.baseArrayLayer,
1818 subresource.layerCount
1819 };
1820
1821 uint32_t lastLayer = dst->getLastLayerIndex(dstSubresRange);
1822
1823 for(; dstSubresLayers.baseArrayLayer <= lastLayer; dstSubresLayers.baseArrayLayer++)
1824 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001825 data.dest = dst->getTexelPointer({ 0, 0, 0 }, dstSubresLayers);
1826 ASSERT(data.dest < dst->end());
1827 blitRoutine(&data);
Nicolas Capens157ba262019-12-10 17:49:14 -05001828 }
1829}
1830
1831void Blitter::blit(const vk::Image *src, vk::Image *dst, VkImageBlit region, VkFilter filter)
1832{
1833 if(dst->getFormat() == VK_FORMAT_UNDEFINED)
1834 {
1835 return;
1836 }
1837
Nicolas Capensdd0e6002020-01-24 01:21:47 -05001838 // Vulkan 1.2 section 18.5. Image Copies with Scaling:
1839 // "The layerCount member of srcSubresource and dstSubresource must match"
1840 // "The aspectMask member of srcSubresource and dstSubresource must match"
1841 ASSERT(region.srcSubresource.layerCount == region.dstSubresource.layerCount);
1842 ASSERT(region.srcSubresource.aspectMask == region.dstSubresource.aspectMask);
Nicolas Capens157ba262019-12-10 17:49:14 -05001843
1844 if(region.dstOffsets[0].x > region.dstOffsets[1].x)
1845 {
1846 std::swap(region.srcOffsets[0].x, region.srcOffsets[1].x);
1847 std::swap(region.dstOffsets[0].x, region.dstOffsets[1].x);
1848 }
1849
1850 if(region.dstOffsets[0].y > region.dstOffsets[1].y)
1851 {
1852 std::swap(region.srcOffsets[0].y, region.srcOffsets[1].y);
1853 std::swap(region.dstOffsets[0].y, region.dstOffsets[1].y);
1854 }
1855
1856 VkImageAspectFlagBits srcAspect = static_cast<VkImageAspectFlagBits>(region.srcSubresource.aspectMask);
1857 VkImageAspectFlagBits dstAspect = static_cast<VkImageAspectFlagBits>(region.dstSubresource.aspectMask);
1858 VkExtent3D srcExtent = src->getMipLevelExtent(srcAspect, region.srcSubresource.mipLevel);
1859
Nicolas Capens157ba262019-12-10 17:49:14 -05001860 float widthRatio = static_cast<float>(region.srcOffsets[1].x - region.srcOffsets[0].x) /
1861 static_cast<float>(region.dstOffsets[1].x - region.dstOffsets[0].x);
1862 float heightRatio = static_cast<float>(region.srcOffsets[1].y - region.srcOffsets[0].y) /
1863 static_cast<float>(region.dstOffsets[1].y - region.dstOffsets[0].y);
Alexis Hetu18daa812020-03-11 17:06:53 -04001864 float depthRatio = static_cast<float>(region.srcOffsets[1].z - region.srcOffsets[0].z) /
1865 static_cast<float>(region.dstOffsets[1].z - region.dstOffsets[0].z);
Nicolas Capens157ba262019-12-10 17:49:14 -05001866 float x0 = region.srcOffsets[0].x + (0.5f - region.dstOffsets[0].x) * widthRatio;
1867 float y0 = region.srcOffsets[0].y + (0.5f - region.dstOffsets[0].y) * heightRatio;
Alexis Hetu18daa812020-03-11 17:06:53 -04001868 float z0 = region.srcOffsets[0].z + (0.5f - region.dstOffsets[0].z) * depthRatio;
Nicolas Capens157ba262019-12-10 17:49:14 -05001869
1870 auto srcFormat = src->getFormat(srcAspect);
1871 auto dstFormat = dst->getFormat(dstAspect);
1872
1873 bool doFilter = (filter != VK_FILTER_NEAREST);
1874 bool allowSRGBConversion =
Ben Claytonfccfc562019-12-17 20:37:31 +00001875 doFilter ||
1876 (src->getSampleCountFlagBits() > 1) ||
1877 (srcFormat.isSRGBformat() != dstFormat.isSRGBformat());
Nicolas Capens157ba262019-12-10 17:49:14 -05001878
1879 State state(src->getFormat(srcAspect), dst->getFormat(dstAspect), src->getSampleCountFlagBits(), dst->getSampleCountFlagBits(),
1880 Options{ doFilter, allowSRGBConversion });
1881 state.clampToEdge = (region.srcOffsets[0].x < 0) ||
1882 (region.srcOffsets[0].y < 0) ||
1883 (static_cast<uint32_t>(region.srcOffsets[1].x) > srcExtent.width) ||
1884 (static_cast<uint32_t>(region.srcOffsets[1].y) > srcExtent.height) ||
1885 (doFilter && ((x0 < 0.5f) || (y0 < 0.5f)));
Alexis Hetu18daa812020-03-11 17:06:53 -04001886 state.filter3D = (region.srcOffsets[1].z - region.srcOffsets[0].z) !=
1887 (region.dstOffsets[1].z - region.dstOffsets[0].z);
Nicolas Capens157ba262019-12-10 17:49:14 -05001888
1889 auto blitRoutine = getBlitRoutine(state);
1890 if(!blitRoutine)
1891 {
1892 return;
1893 }
1894
Ben Claytonfccfc562019-12-17 20:37:31 +00001895 BlitData data = {
1896 nullptr, // source
1897 nullptr, // dest
1898 src->rowPitchBytes(srcAspect, region.srcSubresource.mipLevel), // sPitchB
1899 dst->rowPitchBytes(dstAspect, region.dstSubresource.mipLevel), // dPitchB
1900 src->slicePitchBytes(srcAspect, region.srcSubresource.mipLevel), // sSliceB
1901 dst->slicePitchBytes(dstAspect, region.dstSubresource.mipLevel), // dSliceB
Nicolas Capens157ba262019-12-10 17:49:14 -05001902
1903 x0,
1904 y0,
Alexis Hetu18daa812020-03-11 17:06:53 -04001905 z0,
Nicolas Capens157ba262019-12-10 17:49:14 -05001906 widthRatio,
1907 heightRatio,
Alexis Hetu18daa812020-03-11 17:06:53 -04001908 depthRatio,
Nicolas Capens157ba262019-12-10 17:49:14 -05001909
Ben Claytonfccfc562019-12-17 20:37:31 +00001910 region.dstOffsets[0].x, // x0d
1911 region.dstOffsets[1].x, // x1d
Alexis Hetu18daa812020-03-11 17:06:53 -04001912 region.dstOffsets[0].y, // y0d
1913 region.dstOffsets[1].y, // y1d
1914 region.dstOffsets[0].z, // z0d
1915 region.dstOffsets[1].z, // z1d
Nicolas Capens157ba262019-12-10 17:49:14 -05001916
Alexis Hetu18daa812020-03-11 17:06:53 -04001917 static_cast<int>(srcExtent.width), // sWidth
1918 static_cast<int>(srcExtent.height), // sHeight
1919 static_cast<int>(srcExtent.depth), // sDepth
Ben Clayton21fb75f2020-04-16 10:36:55 +01001920
1921 false, // filter3D
Nicolas Capens157ba262019-12-10 17:49:14 -05001922 };
1923
Ben Claytonfccfc562019-12-17 20:37:31 +00001924 VkImageSubresourceLayers srcSubresLayers = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001925 region.srcSubresource.aspectMask,
1926 region.srcSubresource.mipLevel,
1927 region.srcSubresource.baseArrayLayer,
1928 1
1929 };
1930
Ben Claytonfccfc562019-12-17 20:37:31 +00001931 VkImageSubresourceLayers dstSubresLayers = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001932 region.dstSubresource.aspectMask,
1933 region.dstSubresource.mipLevel,
1934 region.dstSubresource.baseArrayLayer,
1935 1
1936 };
1937
Ben Claytonfccfc562019-12-17 20:37:31 +00001938 VkImageSubresourceRange srcSubresRange = {
Nicolas Capens157ba262019-12-10 17:49:14 -05001939 region.srcSubresource.aspectMask,
1940 region.srcSubresource.mipLevel,
1941 1,
1942 region.srcSubresource.baseArrayLayer,
1943 region.srcSubresource.layerCount
1944 };
1945
1946 uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange);
1947
1948 for(; srcSubresLayers.baseArrayLayer <= lastLayer; srcSubresLayers.baseArrayLayer++, dstSubresLayers.baseArrayLayer++)
1949 {
Alexis Hetu18daa812020-03-11 17:06:53 -04001950 data.source = src->getTexelPointer({ 0, 0, 0 }, srcSubresLayers);
1951 data.dest = dst->getTexelPointer({ 0, 0, 0 }, dstSubresLayers);
Nicolas Capens157ba262019-12-10 17:49:14 -05001952
Alexis Hetu18daa812020-03-11 17:06:53 -04001953 ASSERT(data.source < src->end());
1954 ASSERT(data.dest < dst->end());
Nicolas Capens157ba262019-12-10 17:49:14 -05001955
Alexis Hetu18daa812020-03-11 17:06:53 -04001956 blitRoutine(&data);
Nicolas Capens157ba262019-12-10 17:49:14 -05001957 }
1958}
1959
Ben Claytonfccfc562019-12-17 20:37:31 +00001960void Blitter::computeCubeCorner(Pointer<Byte> &layer, Int &x0, Int &x1, Int &y0, Int &y1, Int &pitchB, const State &state)
Nicolas Capens157ba262019-12-10 17:49:14 -05001961{
1962 int bytes = state.sourceFormat.bytes();
1963
1964 Float4 c = readFloat4(layer + ComputeOffset(x0, y1, pitchB, bytes), state) +
1965 readFloat4(layer + ComputeOffset(x1, y0, pitchB, bytes), state) +
1966 readFloat4(layer + ComputeOffset(x1, y1, pitchB, bytes), state);
1967
1968 c *= Float4(1.0f / 3.0f);
1969
1970 write(c, layer + ComputeOffset(x0, y0, pitchB, bytes), state);
1971}
1972
Ben Claytonfccfc562019-12-17 20:37:31 +00001973Blitter::CornerUpdateRoutineType Blitter::generateCornerUpdate(const State &state)
Nicolas Capens157ba262019-12-10 17:49:14 -05001974{
1975 // Reading and writing from/to the same image
1976 ASSERT(state.sourceFormat == state.destFormat);
1977 ASSERT(state.srcSamples == state.destSamples);
1978
Nicolas Capensdd0e6002020-01-24 01:21:47 -05001979 // Vulkan 1.2: "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
1980 // VK_IMAGE_TYPE_2D, flags must not contain VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT"
1981 ASSERT(state.srcSamples == 1);
Nicolas Capens157ba262019-12-10 17:49:14 -05001982
1983 CornerUpdateFunction function;
1984 {
1985 Pointer<Byte> blit(function.Arg<0>());
1986
1987 Pointer<Byte> layers = *Pointer<Pointer<Byte>>(blit + OFFSET(CubeBorderData, layers));
1988 Int pitchB = *Pointer<Int>(blit + OFFSET(CubeBorderData, pitchB));
1989 UInt layerSize = *Pointer<Int>(blit + OFFSET(CubeBorderData, layerSize));
1990 UInt dim = *Pointer<Int>(blit + OFFSET(CubeBorderData, dim));
1991
1992 // Low Border, Low Pixel, High Border, High Pixel
Ben Claytonfccfc562019-12-17 20:37:31 +00001993 Int LB(-1), LP(0), HB(dim), HP(dim - 1);
Nicolas Capens157ba262019-12-10 17:49:14 -05001994
1995 for(int face = 0; face < 6; face++)
1996 {
1997 computeCubeCorner(layers, LB, LP, LB, LP, pitchB, state);
1998 computeCubeCorner(layers, LB, LP, HB, HP, pitchB, state);
1999 computeCubeCorner(layers, HB, HP, LB, LP, pitchB, state);
2000 computeCubeCorner(layers, HB, HP, HB, HP, pitchB, state);
2001 layers = layers + layerSize;
2002 }
2003 }
2004
2005 return function("BlitRoutine");
2006}
2007
Ben Claytonfccfc562019-12-17 20:37:31 +00002008void Blitter::updateBorders(vk::Image *image, const VkImageSubresourceLayers &subresourceLayers)
Nicolas Capens157ba262019-12-10 17:49:14 -05002009{
Nicolas Capensdd0e6002020-01-24 01:21:47 -05002010 ASSERT(image->getArrayLayers() >= (subresourceLayers.baseArrayLayer + 6));
Nicolas Capens157ba262019-12-10 17:49:14 -05002011
2012 // From Vulkan 1.1 spec, section 11.5. Image Views:
2013 // "For cube and cube array image views, the layers of the image view starting
2014 // at baseArrayLayer correspond to faces in the order +X, -X, +Y, -Y, +Z, -Z."
2015 VkImageSubresourceLayers posX = subresourceLayers;
2016 posX.layerCount = 1;
2017 VkImageSubresourceLayers negX = posX;
2018 negX.baseArrayLayer++;
2019 VkImageSubresourceLayers posY = negX;
2020 posY.baseArrayLayer++;
2021 VkImageSubresourceLayers negY = posY;
2022 negY.baseArrayLayer++;
2023 VkImageSubresourceLayers posZ = negY;
2024 posZ.baseArrayLayer++;
2025 VkImageSubresourceLayers negZ = posZ;
2026 negZ.baseArrayLayer++;
2027
2028 // Copy top / bottom
2029 copyCubeEdge(image, posX, BOTTOM, negY, RIGHT);
2030 copyCubeEdge(image, posY, BOTTOM, posZ, TOP);
2031 copyCubeEdge(image, posZ, BOTTOM, negY, TOP);
2032 copyCubeEdge(image, negX, BOTTOM, negY, LEFT);
2033 copyCubeEdge(image, negY, BOTTOM, negZ, BOTTOM);
2034 copyCubeEdge(image, negZ, BOTTOM, negY, BOTTOM);
2035
2036 copyCubeEdge(image, posX, TOP, posY, RIGHT);
2037 copyCubeEdge(image, posY, TOP, negZ, TOP);
2038 copyCubeEdge(image, posZ, TOP, posY, BOTTOM);
2039 copyCubeEdge(image, negX, TOP, posY, LEFT);
2040 copyCubeEdge(image, negY, TOP, posZ, BOTTOM);
2041 copyCubeEdge(image, negZ, TOP, posY, TOP);
2042
2043 // Copy left / right
2044 copyCubeEdge(image, posX, RIGHT, negZ, LEFT);
2045 copyCubeEdge(image, posY, RIGHT, posX, TOP);
2046 copyCubeEdge(image, posZ, RIGHT, posX, LEFT);
2047 copyCubeEdge(image, negX, RIGHT, posZ, LEFT);
2048 copyCubeEdge(image, negY, RIGHT, posX, BOTTOM);
2049 copyCubeEdge(image, negZ, RIGHT, negX, LEFT);
2050
2051 copyCubeEdge(image, posX, LEFT, posZ, RIGHT);
2052 copyCubeEdge(image, posY, LEFT, negX, TOP);
2053 copyCubeEdge(image, posZ, LEFT, negX, RIGHT);
2054 copyCubeEdge(image, negX, LEFT, negZ, RIGHT);
2055 copyCubeEdge(image, negY, LEFT, negX, BOTTOM);
2056 copyCubeEdge(image, negZ, LEFT, posX, RIGHT);
2057
2058 // Compute corner colors
2059 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceLayers.aspectMask);
2060 vk::Format format = image->getFormat(aspect);
2061 VkSampleCountFlagBits samples = image->getSampleCountFlagBits();
2062 State state(format, format, samples, samples, Options{ 0xF });
2063
Nicolas Capensdd0e6002020-01-24 01:21:47 -05002064 // Vulkan 1.2: "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
2065 // VK_IMAGE_TYPE_2D, flags must not contain VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT"
2066 ASSERT(samples == VK_SAMPLE_COUNT_1_BIT);
Nicolas Capens157ba262019-12-10 17:49:14 -05002067
2068 auto cornerUpdateRoutine = getCornerUpdateRoutine(state);
2069 if(!cornerUpdateRoutine)
2070 {
2071 return;
2072 }
2073
2074 VkExtent3D extent = image->getMipLevelExtent(aspect, subresourceLayers.mipLevel);
Ben Claytonfccfc562019-12-17 20:37:31 +00002075 CubeBorderData data = {
Nicolas Capens157ba262019-12-10 17:49:14 -05002076 image->getTexelPointer({ 0, 0, 0 }, posX),
2077 image->rowPitchBytes(aspect, subresourceLayers.mipLevel),
2078 static_cast<uint32_t>(image->getLayerSize(aspect)),
2079 extent.width
2080 };
2081 cornerUpdateRoutine(&data);
2082}
2083
Ben Claytonfccfc562019-12-17 20:37:31 +00002084void Blitter::copyCubeEdge(vk::Image *image,
2085 const VkImageSubresourceLayers &dstSubresourceLayers, Edge dstEdge,
2086 const VkImageSubresourceLayers &srcSubresourceLayers, Edge srcEdge)
Nicolas Capens157ba262019-12-10 17:49:14 -05002087{
2088 ASSERT(srcSubresourceLayers.aspectMask == dstSubresourceLayers.aspectMask);
2089 ASSERT(srcSubresourceLayers.mipLevel == dstSubresourceLayers.mipLevel);
2090 ASSERT(srcSubresourceLayers.baseArrayLayer != dstSubresourceLayers.baseArrayLayer);
2091 ASSERT(srcSubresourceLayers.layerCount == 1);
2092 ASSERT(dstSubresourceLayers.layerCount == 1);
2093
2094 // Figure out if the edges to be copied in reverse order respectively from one another
2095 // The copy should be reversed whenever the same edges are contiguous or if we're
2096 // copying top <-> right or bottom <-> left. This is explained by the layout, which is:
2097 //
2098 // | +y |
2099 // | -x | +z | +x | -z |
2100 // | -y |
2101
2102 bool reverse = (srcEdge == dstEdge) ||
2103 ((srcEdge == TOP) && (dstEdge == RIGHT)) ||
2104 ((srcEdge == RIGHT) && (dstEdge == TOP)) ||
2105 ((srcEdge == BOTTOM) && (dstEdge == LEFT)) ||
2106 ((srcEdge == LEFT) && (dstEdge == BOTTOM));
2107
2108 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(srcSubresourceLayers.aspectMask);
2109 int bytes = image->getFormat(aspect).bytes();
2110 int pitchB = image->rowPitchBytes(aspect, srcSubresourceLayers.mipLevel);
2111
2112 VkExtent3D extent = image->getMipLevelExtent(aspect, srcSubresourceLayers.mipLevel);
2113 int w = extent.width;
2114 int h = extent.height;
2115 if(w != h)
2116 {
2117 UNSUPPORTED("Cube doesn't have square faces : (%d, %d)", w, h);
2118 }
2119
2120 // Src is expressed in the regular [0, width-1], [0, height-1] space
2121 bool srcHorizontal = ((srcEdge == TOP) || (srcEdge == BOTTOM));
2122 int srcDelta = srcHorizontal ? bytes : pitchB;
2123 VkOffset3D srcOffset = { (srcEdge == RIGHT) ? (w - 1) : 0, (srcEdge == BOTTOM) ? (h - 1) : 0, 0 };
2124
2125 // Dst contains borders, so it is expressed in the [-1, width], [-1, height] space
2126 bool dstHorizontal = ((dstEdge == TOP) || (dstEdge == BOTTOM));
2127 int dstDelta = (dstHorizontal ? bytes : pitchB) * (reverse ? -1 : 1);
2128 VkOffset3D dstOffset = { (dstEdge == RIGHT) ? w : -1, (dstEdge == BOTTOM) ? h : -1, 0 };
2129
2130 // Don't write in the corners
2131 if(dstHorizontal)
2132 {
2133 dstOffset.x += reverse ? w : 1;
2134 }
2135 else
2136 {
2137 dstOffset.y += reverse ? h : 1;
2138 }
2139
Ben Claytonfccfc562019-12-17 20:37:31 +00002140 const uint8_t *src = static_cast<const uint8_t *>(image->getTexelPointer(srcOffset, srcSubresourceLayers));
2141 uint8_t *dst = static_cast<uint8_t *>(image->getTexelPointer(dstOffset, dstSubresourceLayers));
Nicolas Capens157ba262019-12-10 17:49:14 -05002142 ASSERT((src < image->end()) && ((src + (w * srcDelta)) < image->end()));
2143 ASSERT((dst < image->end()) && ((dst + (w * dstDelta)) < image->end()));
2144
2145 for(int i = 0; i < w; ++i, dst += dstDelta, src += srcDelta)
2146 {
2147 memcpy(dst, src, bytes);
2148 }
2149}
2150
Ben Claytonfccfc562019-12-17 20:37:31 +00002151} // namespace sw