blob: 6f0b77f4fb58e99c666b06d17e2314f89b1e4f5c [file] [log] [blame]
Nicolas Capens68a82382018-10-02 13:16:55 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "Blitter.hpp"
16
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050017#include "Pipeline/ShaderCore.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040018#include "Reactor/Reactor.hpp"
Nicolas Capens02cbe8e2019-08-05 15:10:05 -040019#include "System/Half.hpp"
Nicolas Capens1d8c8db2018-11-05 16:30:42 -050020#include "System/Memory.hpp"
Chris Forbesebe5f7f2019-01-16 10:38:34 -080021#include "Vulkan/VkDebug.hpp"
Alexis Hetu33642272019-03-01 11:55:59 -050022#include "Vulkan/VkImage.hpp"
Chris Forbes529eda32019-05-08 10:27:05 -070023#include "Vulkan/VkBuffer.hpp"
Nicolas Capens68a82382018-10-02 13:16:55 -040024
Nicolas Capensb8c63932019-03-19 01:52:40 -040025#include <utility>
26
Nicolas Capens68a82382018-10-02 13:16:55 -040027namespace sw
28{
Alexis Hetuf60a2d52019-05-09 14:16:05 -040029 Blitter::Blitter() :
30 blitMutex(),
31 blitCache(1024),
32 cornerUpdateMutex(),
33 cornerUpdateCache(64) // We only need one of these per format
Nicolas Capens68a82382018-10-02 13:16:55 -040034 {
Nicolas Capens68a82382018-10-02 13:16:55 -040035 }
36
37 Blitter::~Blitter()
38 {
Nicolas Capens68a82382018-10-02 13:16:55 -040039 }
40
Alexis Hetu04dae5e2019-04-08 13:41:50 -040041 void Blitter::clear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format& viewFormat, const VkImageSubresourceRange& subresourceRange, const VkRect2D* renderArea)
Alexis Hetu33642272019-03-01 11:55:59 -050042 {
43 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
Nicolas Capenscfe11c72019-05-16 09:58:26 -040044 vk::Format dstFormat = viewFormat.getAspectFormat(aspect);
Alexis Hetu04dae5e2019-04-08 13:41:50 -040045 if(dstFormat == VK_FORMAT_UNDEFINED)
Alexis Hetu33642272019-03-01 11:55:59 -050046 {
47 return;
48 }
49
Chris Forbes88289192019-08-28 16:49:36 -070050 float *pPixel = static_cast<float *>(pixel);
51 if (viewFormat.isUnsignedNormalized())
52 {
53 pPixel[0] = sw::clamp(pPixel[0], 0.0f, 1.0f);
54 pPixel[1] = sw::clamp(pPixel[1], 0.0f, 1.0f);
55 pPixel[2] = sw::clamp(pPixel[2], 0.0f, 1.0f);
56 pPixel[3] = sw::clamp(pPixel[3], 0.0f, 1.0f);
57 }
58 else if (viewFormat.isSignedNormalized())
59 {
60 pPixel[0] = sw::clamp(pPixel[0], -1.0f, 1.0f);
61 pPixel[1] = sw::clamp(pPixel[1], -1.0f, 1.0f);
62 pPixel[2] = sw::clamp(pPixel[2], -1.0f, 1.0f);
63 pPixel[3] = sw::clamp(pPixel[3], -1.0f, 1.0f);
64 }
65
Alexis Hetu04dae5e2019-04-08 13:41:50 -040066 if(fastClear(pixel, format, dest, dstFormat, subresourceRange, renderArea))
Alexis Hetu33642272019-03-01 11:55:59 -050067 {
68 return;
69 }
70
Antonio Maiorano7738ed72019-10-21 11:29:41 -040071 State state(format, dstFormat, 1, dest->getSampleCountFlagBits(), Options{ 0xF });
Ben Clayton6897e9b2019-07-16 17:27:27 +010072 auto blitRoutine = getBlitRoutine(state);
Alexis Hetu33642272019-03-01 11:55:59 -050073 if(!blitRoutine)
74 {
75 return;
76 }
77
78 void(*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry();
79
80 VkImageSubresourceLayers subresLayers =
81 {
82 subresourceRange.aspectMask,
83 subresourceRange.baseMipLevel,
84 subresourceRange.baseArrayLayer,
85 1
86 };
87
88 uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange);
89 uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange);
90
91 VkRect2D area = { { 0, 0 }, { 0, 0 } };
92 if(renderArea)
93 {
94 ASSERT(subresourceRange.levelCount == 1);
95 area = *renderArea;
96 }
97
98 for(; subresLayers.mipLevel <= lastMipLevel; subresLayers.mipLevel++)
99 {
Nicolas Capensba873302019-05-16 11:25:27 -0400100 VkExtent3D extent = dest->getMipLevelExtent(aspect, subresLayers.mipLevel);
Alexis Hetu33642272019-03-01 11:55:59 -0500101 if(!renderArea)
102 {
103 area.extent.width = extent.width;
104 area.extent.height = extent.height;
105 }
106
107 BlitData data =
108 {
109 pixel, nullptr, // source, dest
110
Alexis Hetu25ec7b02019-03-12 14:19:22 -0400111 format.bytes(), // sPitchB
Alexis Hetu33642272019-03-01 11:55:59 -0500112 dest->rowPitchBytes(aspect, subresLayers.mipLevel), // dPitchB
Alexis Hetu54ec7592019-03-20 14:37:16 -0400113 0, // sSliceB (unused in clear operations)
Alexis Hetu33642272019-03-01 11:55:59 -0500114 dest->slicePitchBytes(aspect, subresLayers.mipLevel), // dSliceB
115
116 0.5f, 0.5f, 0.0f, 0.0f, // x0, y0, w, h
117
118 area.offset.y, static_cast<int>(area.offset.y + area.extent.height), // y0d, y1d
119 area.offset.x, static_cast<int>(area.offset.x + area.extent.width), // x0d, x1d
120
121 0, 0, // sWidth, sHeight
122 };
123
Chris Forbes15bf1ef2019-05-02 17:19:12 -0700124 if (renderArea && dest->is3DSlice())
Alexis Hetu33642272019-03-01 11:55:59 -0500125 {
Chris Forbes15bf1ef2019-05-02 17:19:12 -0700126 // Reinterpret layers as depth slices
127 subresLayers.baseArrayLayer = 0;
128 subresLayers.layerCount = 1;
129 for (uint32_t depth = subresourceRange.baseArrayLayer; depth <= lastLayer; depth++)
Alexis Hetu33642272019-03-01 11:55:59 -0500130 {
Chris Forbes15bf1ef2019-05-02 17:19:12 -0700131 data.dest = dest->getTexelPointer({0, 0, static_cast<int32_t>(depth)}, subresLayers);
Alexis Hetu33642272019-03-01 11:55:59 -0500132 blitFunction(&data);
133 }
134 }
Chris Forbes15bf1ef2019-05-02 17:19:12 -0700135 else
136 {
137 for(subresLayers.baseArrayLayer = subresourceRange.baseArrayLayer; subresLayers.baseArrayLayer <= lastLayer; subresLayers.baseArrayLayer++)
138 {
139 for(uint32_t depth = 0; depth < extent.depth; depth++)
140 {
141 data.dest = dest->getTexelPointer({ 0, 0, static_cast<int32_t>(depth) }, subresLayers);
142
143 blitFunction(&data);
144 }
145 }
146 }
Alexis Hetu33642272019-03-01 11:55:59 -0500147 }
148 }
149
Alexis Hetu04dae5e2019-04-08 13:41:50 -0400150 bool Blitter::fastClear(void *pixel, vk::Format format, vk::Image *dest, const vk::Format& viewFormat, const VkImageSubresourceRange& subresourceRange, const VkRect2D* renderArea)
Alexis Hetu33642272019-03-01 11:55:59 -0500151 {
152 if(format != VK_FORMAT_R32G32B32A32_SFLOAT)
153 {
154 return false;
155 }
156
157 float *color = (float*)pixel;
158 float r = color[0];
159 float g = color[1];
160 float b = color[2];
161 float a = color[3];
162
163 uint32_t packed;
164
165 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceRange.aspectMask);
Alexis Hetu04dae5e2019-04-08 13:41:50 -0400166 switch(viewFormat)
Alexis Hetu33642272019-03-01 11:55:59 -0500167 {
168 case VK_FORMAT_R5G6B5_UNORM_PACK16:
169 packed = ((uint16_t)(31 * b + 0.5f) << 0) |
170 ((uint16_t)(63 * g + 0.5f) << 5) |
171 ((uint16_t)(31 * r + 0.5f) << 11);
172 break;
173 case VK_FORMAT_B5G6R5_UNORM_PACK16:
174 packed = ((uint16_t)(31 * r + 0.5f) << 0) |
175 ((uint16_t)(63 * g + 0.5f) << 5) |
176 ((uint16_t)(31 * b + 0.5f) << 11);
177 break;
178 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
179 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
180 case VK_FORMAT_R8G8B8A8_UNORM:
181 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
182 ((uint32_t)(255 * b + 0.5f) << 16) |
183 ((uint32_t)(255 * g + 0.5f) << 8) |
184 ((uint32_t)(255 * r + 0.5f) << 0);
185 break;
186 case VK_FORMAT_B8G8R8A8_UNORM:
187 packed = ((uint32_t)(255 * a + 0.5f) << 24) |
188 ((uint32_t)(255 * r + 0.5f) << 16) |
189 ((uint32_t)(255 * g + 0.5f) << 8) |
190 ((uint32_t)(255 * b + 0.5f) << 0);
191 break;
192 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
193 packed = R11G11B10F(color);
194 break;
195 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
196 packed = RGB9E5(color);
197 break;
198 default:
199 return false;
200 }
201
Ben Clayton00424c12019-03-17 17:29:30 +0000202 VkImageSubresourceLayers subresLayers =
Alexis Hetu33642272019-03-01 11:55:59 -0500203 {
204 subresourceRange.aspectMask,
205 subresourceRange.baseMipLevel,
206 subresourceRange.baseArrayLayer,
207 1
208 };
209 uint32_t lastMipLevel = dest->getLastMipLevel(subresourceRange);
210 uint32_t lastLayer = dest->getLastLayerIndex(subresourceRange);
211
212 VkRect2D area = { { 0, 0 }, { 0, 0 } };
213 if(renderArea)
214 {
215 ASSERT(subresourceRange.levelCount == 1);
216 area = *renderArea;
217 }
218
219 for(; subresLayers.mipLevel <= lastMipLevel; subresLayers.mipLevel++)
220 {
221 int rowPitchBytes = dest->rowPitchBytes(aspect, subresLayers.mipLevel);
222 int slicePitchBytes = dest->slicePitchBytes(aspect, subresLayers.mipLevel);
Nicolas Capensba873302019-05-16 11:25:27 -0400223 VkExtent3D extent = dest->getMipLevelExtent(aspect, subresLayers.mipLevel);
Alexis Hetu33642272019-03-01 11:55:59 -0500224 if(!renderArea)
225 {
226 area.extent.width = extent.width;
227 area.extent.height = extent.height;
228 }
Alexis Hetu32ac8312019-04-15 17:20:29 -0400229 if(dest->is3DSlice())
230 {
231 extent.depth = 1; // The 3D image is instead interpreted as a 2D image with layers
232 }
Alexis Hetu33642272019-03-01 11:55:59 -0500233
234 for(subresLayers.baseArrayLayer = subresourceRange.baseArrayLayer; subresLayers.baseArrayLayer <= lastLayer; subresLayers.baseArrayLayer++)
235 {
236 for(uint32_t depth = 0; depth < extent.depth; depth++)
237 {
238 uint8_t *slice = (uint8_t*)dest->getTexelPointer(
239 { area.offset.x, area.offset.y, static_cast<int32_t>(depth) }, subresLayers);
240
241 for(int j = 0; j < dest->getSampleCountFlagBits(); j++)
242 {
243 uint8_t *d = slice;
244
Alexis Hetu04dae5e2019-04-08 13:41:50 -0400245 switch(viewFormat.bytes())
Alexis Hetu33642272019-03-01 11:55:59 -0500246 {
247 case 2:
248 for(uint32_t i = 0; i < area.extent.height; i++)
249 {
Alexis Hetu32ac8312019-04-15 17:20:29 -0400250 ASSERT(d < dest->end());
Alexis Hetu126bd7a2019-05-10 17:07:42 -0400251 sw::clear((uint16_t*)d, static_cast<uint16_t>(packed), area.extent.width);
Alexis Hetu33642272019-03-01 11:55:59 -0500252 d += rowPitchBytes;
253 }
254 break;
255 case 4:
256 for(uint32_t i = 0; i < area.extent.height; i++)
257 {
Alexis Hetu32ac8312019-04-15 17:20:29 -0400258 ASSERT(d < dest->end());
Alexis Hetu33642272019-03-01 11:55:59 -0500259 sw::clear((uint32_t*)d, packed, area.extent.width);
260 d += rowPitchBytes;
261 }
262 break;
263 default:
264 assert(false);
265 }
266
267 slice += slicePitchBytes;
268 }
269 }
270 }
271 }
272
273 return true;
274 }
275
Nicolas Capens88ac3672019-08-01 13:22:34 -0400276 Float4 Blitter::readFloat4(Pointer<Byte> element, const State &state)
Nicolas Capens68a82382018-10-02 13:16:55 -0400277 {
Nicolas Capens88ac3672019-08-01 13:22:34 -0400278 Float4 c(0.0f, 0.0f, 0.0f, 1.0f);
Nicolas Capens68a82382018-10-02 13:16:55 -0400279
280 switch(state.sourceFormat)
281 {
Alexis Hetua28671d2018-12-19 17:23:26 -0500282 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
283 c.w = Float(Int(*Pointer<Byte>(element)) & Int(0xF));
284 c.x = Float((Int(*Pointer<Byte>(element)) >> 4) & Int(0xF));
285 c.y = Float(Int(*Pointer<Byte>(element + 1)) & Int(0xF));
286 c.z = Float((Int(*Pointer<Byte>(element + 1)) >> 4) & Int(0xF));
287 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500288 case VK_FORMAT_R8_SINT:
289 case VK_FORMAT_R8_SNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -0400290 c.x = Float(Int(*Pointer<SByte>(element)));
291 c.w = float(0x7F);
292 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500293 case VK_FORMAT_R8_UNORM:
294 case VK_FORMAT_R8_UINT:
Hernan Liatis762741b2019-06-26 17:57:55 -0700295 case VK_FORMAT_R8_SRGB:
Nicolas Capens68a82382018-10-02 13:16:55 -0400296 c.x = Float(Int(*Pointer<Byte>(element)));
297 c.w = float(0xFF);
298 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500299 case VK_FORMAT_R16_SINT:
Nicolas Capens30857ca2019-08-07 14:05:08 -0400300 case VK_FORMAT_R16_SNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -0400301 c.x = Float(Int(*Pointer<Short>(element)));
302 c.w = float(0x7FFF);
303 break;
Nicolas Capens30857ca2019-08-07 14:05:08 -0400304 case VK_FORMAT_R16_UNORM:
Alexis Hetudd152e12018-11-14 13:39:28 -0500305 case VK_FORMAT_R16_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400306 c.x = Float(Int(*Pointer<UShort>(element)));
307 c.w = float(0xFFFF);
308 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500309 case VK_FORMAT_R32_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400310 c.x = Float(*Pointer<Int>(element));
311 c.w = float(0x7FFFFFFF);
312 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500313 case VK_FORMAT_R32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400314 c.x = Float(*Pointer<UInt>(element));
315 c.w = float(0xFFFFFFFF);
316 break;
Alexis Hetua28671d2018-12-19 17:23:26 -0500317 case VK_FORMAT_B8G8R8A8_SRGB:
Alexis Hetudd152e12018-11-14 13:39:28 -0500318 case VK_FORMAT_B8G8R8A8_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -0400319 c = Float4(*Pointer<Byte4>(element)).zyxw;
320 break;
Alexis Hetua28671d2018-12-19 17:23:26 -0500321 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
Alexis Hetudd152e12018-11-14 13:39:28 -0500322 case VK_FORMAT_R8G8B8A8_SINT:
Alexis Hetua28671d2018-12-19 17:23:26 -0500323 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
Alexis Hetudd152e12018-11-14 13:39:28 -0500324 case VK_FORMAT_R8G8B8A8_SNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -0400325 c = Float4(*Pointer<SByte4>(element));
326 break;
Alexis Hetua28671d2018-12-19 17:23:26 -0500327 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
328 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
Alexis Hetudd152e12018-11-14 13:39:28 -0500329 case VK_FORMAT_R8G8B8A8_UNORM:
330 case VK_FORMAT_R8G8B8A8_UINT:
Alexis Hetua28671d2018-12-19 17:23:26 -0500331 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
Alexis Hetudd152e12018-11-14 13:39:28 -0500332 case VK_FORMAT_R8G8B8A8_SRGB:
Nicolas Capens68a82382018-10-02 13:16:55 -0400333 c = Float4(*Pointer<Byte4>(element));
334 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500335 case VK_FORMAT_R16G16B16A16_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400336 c = Float4(*Pointer<Short4>(element));
337 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500338 case VK_FORMAT_R16G16B16A16_UNORM:
339 case VK_FORMAT_R16G16B16A16_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400340 c = Float4(*Pointer<UShort4>(element));
341 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500342 case VK_FORMAT_R32G32B32A32_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400343 c = Float4(*Pointer<Int4>(element));
344 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500345 case VK_FORMAT_R32G32B32A32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400346 c = Float4(*Pointer<UInt4>(element));
347 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500348 case VK_FORMAT_R8G8_SINT:
349 case VK_FORMAT_R8G8_SNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -0400350 c.x = Float(Int(*Pointer<SByte>(element + 0)));
351 c.y = Float(Int(*Pointer<SByte>(element + 1)));
352 c.w = float(0x7F);
353 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500354 case VK_FORMAT_R8G8_UNORM:
355 case VK_FORMAT_R8G8_UINT:
Alexis Hetu45d34d22019-06-28 15:58:54 -0400356 case VK_FORMAT_R8G8_SRGB:
Nicolas Capens68a82382018-10-02 13:16:55 -0400357 c.x = Float(Int(*Pointer<Byte>(element + 0)));
358 c.y = Float(Int(*Pointer<Byte>(element + 1)));
359 c.w = float(0xFF);
360 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500361 case VK_FORMAT_R16G16_SINT:
Nicolas Capens30857ca2019-08-07 14:05:08 -0400362 case VK_FORMAT_R16G16_SNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -0400363 c.x = Float(Int(*Pointer<Short>(element + 0)));
364 c.y = Float(Int(*Pointer<Short>(element + 2)));
365 c.w = float(0x7FFF);
366 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500367 case VK_FORMAT_R16G16_UNORM:
368 case VK_FORMAT_R16G16_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400369 c.x = Float(Int(*Pointer<UShort>(element + 0)));
370 c.y = Float(Int(*Pointer<UShort>(element + 2)));
371 c.w = float(0xFFFF);
372 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500373 case VK_FORMAT_R32G32_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400374 c.x = Float(*Pointer<Int>(element + 0));
375 c.y = Float(*Pointer<Int>(element + 4));
376 c.w = float(0x7FFFFFFF);
377 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500378 case VK_FORMAT_R32G32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400379 c.x = Float(*Pointer<UInt>(element + 0));
380 c.y = Float(*Pointer<UInt>(element + 4));
381 c.w = float(0xFFFFFFFF);
382 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500383 case VK_FORMAT_R32G32B32A32_SFLOAT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400384 c = *Pointer<Float4>(element);
385 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500386 case VK_FORMAT_R32G32_SFLOAT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400387 c.x = *Pointer<Float>(element + 0);
388 c.y = *Pointer<Float>(element + 4);
389 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500390 case VK_FORMAT_R32_SFLOAT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400391 c.x = *Pointer<Float>(element);
392 break;
Ben Clayton00424c12019-03-17 17:29:30 +0000393 case VK_FORMAT_R16G16B16A16_SFLOAT:
394 c.w = Float(*Pointer<Half>(element + 6));
395 case VK_FORMAT_R16G16B16_SFLOAT:
396 c.z = Float(*Pointer<Half>(element + 4));
397 case VK_FORMAT_R16G16_SFLOAT:
398 c.y = Float(*Pointer<Half>(element + 2));
399 case VK_FORMAT_R16_SFLOAT:
400 c.x = Float(*Pointer<Half>(element));
Alexis Hetu734e2572018-12-20 14:00:49 -0500401 break;
Alexis Hetu5131ec92018-12-20 16:14:21 -0500402 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
403 // 10 (or 11) bit float formats are unsigned formats with a 5 bit exponent and a 5 (or 6) bit mantissa.
404 // Since the Half float format also has a 5 bit exponent, we can convert these formats to half by
405 // copy/pasting the bits so the the exponent bits and top mantissa bits are aligned to the half format.
406 // In this case, we have:
407 // B B B B B B B B B B G G G G G G G G G G G R R R R R R R R R R R
408 // 1st Short: |xxxxxxxxxx---------------------|
409 // 2nd Short: |xxxx---------------------xxxxxx|
410 // 3rd Short: |--------------------xxxxxxxxxxxx|
411 // These memory reads overlap, but each of them contains an entire channel, so we can read this without
412 // any int -> short conversion.
413 c.x = Float(As<Half>((*Pointer<UShort>(element + 0) & UShort(0x07FF)) << UShort(4)));
414 c.y = Float(As<Half>((*Pointer<UShort>(element + 1) & UShort(0x3FF8)) << UShort(1)));
415 c.z = Float(As<Half>((*Pointer<UShort>(element + 2) & UShort(0xFFC0)) >> UShort(1)));
416 break;
417 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
418 // This type contains a common 5 bit exponent (E) and a 9 bit the mantissa for R, G and B.
Ben Clayton00424c12019-03-17 17:29:30 +0000419 c.x = Float(*Pointer<UInt>(element) & UInt(0x000001FF)); // R's mantissa (bits 0-8)
420 c.y = Float((*Pointer<UInt>(element) & UInt(0x0003FE00)) >> 9); // G's mantissa (bits 9-17)
Alexis Hetu5131ec92018-12-20 16:14:21 -0500421 c.z = Float((*Pointer<UInt>(element) & UInt(0x07FC0000)) >> 18); // B's mantissa (bits 18-26)
422 c *= Float4(
423 // 2^E, using the exponent (bits 27-31) and treating it as an unsigned integer value
424 Float(UInt(1) << ((*Pointer<UInt>(element) & UInt(0xF8000000)) >> 27)) *
425 // Since the 9 bit mantissa values currently stored in RGB were converted straight
426 // from int to float (in the [0, 1<<9] range instead of the [0, 1] range), they
427 // are (1 << 9) times too high.
428 // Also, the exponent has 5 bits and we compute the exponent bias of floating point
429 // formats using "2^(k-1) - 1", so, in this case, the exponent bias is 2^(5-1)-1 = 15
430 // Exponent bias (15) + number of mantissa bits per component (9) = 24
431 Float(1.0f / (1 << 24)));
432 c.w = 1.0f;
433 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500434 case VK_FORMAT_R5G6B5_UNORM_PACK16:
Nicolas Capens68a82382018-10-02 13:16:55 -0400435 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
436 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
437 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
438 break;
Alexis Hetu457bd9b2018-12-20 13:18:18 -0500439 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
440 c.w = Float(Int((*Pointer<UShort>(element) & UShort(0x8000)) >> UShort(15)));
441 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0x7C00)) >> UShort(10)));
442 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x03E0)) >> UShort(5)));
443 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
444 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500445 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
446 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -0400447 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
448 c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
449 c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
450 c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
451 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500452 case VK_FORMAT_D16_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -0400453 c.x = Float(Int((*Pointer<UShort>(element))));
454 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500455 case VK_FORMAT_D24_UNORM_S8_UINT:
456 case VK_FORMAT_X8_D24_UNORM_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -0400457 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0xFFFFFF00)) >> 8));
458 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500459 case VK_FORMAT_D32_SFLOAT:
460 case VK_FORMAT_D32_SFLOAT_S8_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400461 c.x = *Pointer<Float>(element);
462 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500463 case VK_FORMAT_S8_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400464 c.x = Float(Int(*Pointer<Byte>(element)));
465 break;
466 default:
Nicolas Capens88ac3672019-08-01 13:22:34 -0400467 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens68a82382018-10-02 13:16:55 -0400468 }
469
Nicolas Capens88ac3672019-08-01 13:22:34 -0400470 return c;
Nicolas Capens68a82382018-10-02 13:16:55 -0400471 }
472
Nicolas Capens88ac3672019-08-01 13:22:34 -0400473 void Blitter::write(Float4 &c, Pointer<Byte> element, const State &state)
Nicolas Capens68a82382018-10-02 13:16:55 -0400474 {
475 bool writeR = state.writeRed;
476 bool writeG = state.writeGreen;
477 bool writeB = state.writeBlue;
478 bool writeA = state.writeAlpha;
479 bool writeRGBA = writeR && writeG && writeB && writeA;
480
481 switch(state.destFormat)
482 {
Alexis Hetue04d9b02019-01-16 14:42:24 -0500483 case VK_FORMAT_R4G4_UNORM_PACK8:
484 if(writeR | writeG)
485 {
486 if(!writeR)
487 {
488 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
489 (*Pointer<Byte>(element) & Byte(0xF0));
490 }
491 else if(!writeG)
492 {
493 *Pointer<Byte>(element) = (*Pointer<Byte>(element) & Byte(0xF)) |
494 (Byte(RoundInt(Float(c.x))) << Byte(4));
495 }
496 else
497 {
498 *Pointer<Byte>(element) = (Byte(RoundInt(Float(c.y))) & Byte(0xF)) |
499 (Byte(RoundInt(Float(c.x))) << Byte(4));
500 }
501 }
502 break;
503 case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
504 if(writeR || writeG || writeB || writeA)
505 {
506 *Pointer<UShort>(element) = (writeR ? ((UShort(RoundInt(Float(c.x))) & UShort(0xF)) << UShort(12)) :
507 (*Pointer<UShort>(element) & UShort(0x000F))) |
508 (writeG ? ((UShort(RoundInt(Float(c.y))) & UShort(0xF)) << UShort(8)) :
509 (*Pointer<UShort>(element) & UShort(0x00F0))) |
510 (writeB ? ((UShort(RoundInt(Float(c.z))) & UShort(0xF)) << UShort(4)) :
511 (*Pointer<UShort>(element) & UShort(0x0F00))) |
512 (writeA ? (UShort(RoundInt(Float(c.w))) & UShort(0xF)) :
513 (*Pointer<UShort>(element) & UShort(0xF000)));
514 }
515 break;
Alexis Hetua28671d2018-12-19 17:23:26 -0500516 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
517 if(writeRGBA)
518 {
Ben Clayton00424c12019-03-17 17:29:30 +0000519 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) & Int(0xF)) |
520 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
521 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
522 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12);
Alexis Hetua28671d2018-12-19 17:23:26 -0500523 }
524 else
525 {
526 unsigned short mask = (writeA ? 0x000F : 0x0000) |
527 (writeR ? 0x00F0 : 0x0000) |
528 (writeG ? 0x0F00 : 0x0000) |
529 (writeB ? 0xF000 : 0x0000);
530 unsigned short unmask = ~mask;
531 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
Ben Clayton00424c12019-03-17 17:29:30 +0000532 ((UShort(RoundInt(Float(c.w)) & Int(0xF)) |
533 UShort((RoundInt(Float(c.x)) & Int(0xF)) << 4) |
534 UShort((RoundInt(Float(c.y)) & Int(0xF)) << 8) |
Alexis Hetua28671d2018-12-19 17:23:26 -0500535 UShort((RoundInt(Float(c.z)) & Int(0xF)) << 12)) & UShort(mask));
536 }
537 break;
538 case VK_FORMAT_B8G8R8A8_SRGB:
Alexis Hetudd152e12018-11-14 13:39:28 -0500539 case VK_FORMAT_B8G8R8A8_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -0400540 if(writeRGBA)
541 {
542 Short4 c0 = RoundShort4(c.zyxw);
543 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
544 }
545 else
546 {
547 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
548 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
549 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
550 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
551 }
552 break;
Alexis Hetue04d9b02019-01-16 14:42:24 -0500553 case VK_FORMAT_B8G8R8_SNORM:
554 if(writeB) { *Pointer<SByte>(element + 0) = SByte(RoundInt(Float(c.z))); }
555 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
556 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
557 break;
558 case VK_FORMAT_B8G8R8_UNORM:
559 case VK_FORMAT_B8G8R8_SRGB:
560 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
561 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
562 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
563 break;
Alexis Hetua28671d2018-12-19 17:23:26 -0500564 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
Alexis Hetudd152e12018-11-14 13:39:28 -0500565 case VK_FORMAT_R8G8B8A8_UNORM:
Alexis Hetua28671d2018-12-19 17:23:26 -0500566 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
Alexis Hetudd152e12018-11-14 13:39:28 -0500567 case VK_FORMAT_R8G8B8A8_SRGB:
Alexis Hetue04d9b02019-01-16 14:42:24 -0500568 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
569 case VK_FORMAT_R8G8B8A8_UINT:
570 case VK_FORMAT_R8G8B8A8_USCALED:
571 case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -0400572 if(writeRGBA)
573 {
574 Short4 c0 = RoundShort4(c);
575 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0));
576 }
577 else
578 {
579 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
580 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
581 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
582 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
583 }
584 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500585 case VK_FORMAT_R32G32B32A32_SFLOAT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400586 if(writeRGBA)
587 {
588 *Pointer<Float4>(element) = c;
589 }
590 else
591 {
592 if(writeR) { *Pointer<Float>(element) = c.x; }
593 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
594 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
595 if(writeA) { *Pointer<Float>(element + 12) = c.w; }
596 }
597 break;
Alexis Hetue04d9b02019-01-16 14:42:24 -0500598 case VK_FORMAT_R32G32B32_SFLOAT:
599 if(writeR) { *Pointer<Float>(element) = c.x; }
600 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
601 if(writeB) { *Pointer<Float>(element + 8) = c.z; }
602 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500603 case VK_FORMAT_R32G32_SFLOAT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400604 if(writeR && writeG)
605 {
606 *Pointer<Float2>(element) = Float2(c);
607 }
608 else
609 {
610 if(writeR) { *Pointer<Float>(element) = c.x; }
611 if(writeG) { *Pointer<Float>(element + 4) = c.y; }
612 }
613 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500614 case VK_FORMAT_R32_SFLOAT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400615 if(writeR) { *Pointer<Float>(element) = c.x; }
616 break;
Ben Clayton00424c12019-03-17 17:29:30 +0000617 case VK_FORMAT_R16G16B16A16_SFLOAT:
618 if(writeA) { *Pointer<Half>(element + 6) = Half(c.w); }
619 case VK_FORMAT_R16G16B16_SFLOAT:
620 if(writeB) { *Pointer<Half>(element + 4) = Half(c.z); }
621 case VK_FORMAT_R16G16_SFLOAT:
622 if(writeG) { *Pointer<Half>(element + 2) = Half(c.y); }
623 case VK_FORMAT_R16_SFLOAT:
624 if(writeR) { *Pointer<Half>(element) = Half(c.x); }
Alexis Hetu734e2572018-12-20 14:00:49 -0500625 break;
Nicolas Capensb121b742019-08-01 13:22:34 -0400626 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
627 {
628 // 10 (or 11) bit float formats are unsigned formats with a 5 bit exponent and a 5 (or 6) bit mantissa.
629 // Since the 16-bit half-precision float format also has a 5 bit exponent, we can extract these minifloats from them.
630
631 // FIXME(b/138944025): Handle negative values, Inf, and NaN.
632 // FIXME(b/138944025): Perform rounding before truncating the mantissa.
633 UInt r = (UInt(As<UShort>(Half(c.x))) & 0x00007FF0) >> 4;
634 UInt g = (UInt(As<UShort>(Half(c.y))) & 0x00007FF0) << 7;
635 UInt b = (UInt(As<UShort>(Half(c.z))) & 0x00007FE0) << 17;
636
637 UInt rgb = r | g | b;
638
639 UInt old = *Pointer<UInt>(element);
640
641 unsigned int mask = (writeR ? 0x000007FF : 0) |
642 (writeG ? 0x003FF800 : 0) |
643 (writeB ? 0xFFC00000 : 0);
644
645 *Pointer<UInt>(element) = (rgb & mask) | (old & ~mask);
646 }
647 break;
Nicolas Capens20572b42019-08-06 21:56:50 -0400648 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
649 {
650 ASSERT(writeRGBA); // Can't sensibly write just part of this format.
651
652 // Vulkan 1.1.117 section 15.2.1 RGB to Shared Exponent Conversion
653
654 constexpr int N = 9; // number of mantissa bits per component
655 constexpr int B = 15; // exponent bias
656 constexpr int E_max = 31; // maximum possible biased exponent value
657
658 // Maximum representable value.
659 constexpr float sharedexp_max = ((static_cast<float>(1 << N) - 1) / static_cast<float>(1 << N)) * static_cast<float>(1 << (E_max - B));
660
Nicolas Capense7151f72019-08-22 10:57:36 -0400661 // Clamp components to valid range. NaN becomes 0.
662 Float red_c = Min(IfThenElse(!(c.x > 0), Float(0), Float(c.x)), sharedexp_max);
663 Float green_c = Min(IfThenElse(!(c.y > 0), Float(0), Float(c.y)), sharedexp_max);
664 Float blue_c = Min(IfThenElse(!(c.z > 0), Float(0), Float(c.z)), sharedexp_max);
Nicolas Capens20572b42019-08-06 21:56:50 -0400665
666 // We're reducing the mantissa to 9 bits, so we must round up if the next
667 // bit is 1. In other words add 0.5 to the new mantissa's position and
668 // allow overflow into the exponent so we can scale correctly.
669 constexpr int half = 1 << (23 - N);
670 Float red_r = As<Float>(As<Int>(red_c) + half);
671 Float green_r = As<Float>(As<Int>(green_c) + half);
672 Float blue_r = As<Float>(As<Int>(blue_c) + half);
673
674 // The largest component determines the shared exponent. It can't be lower
675 // than 0 (after bias subtraction) so also limit to the mimimum representable.
676 constexpr float min_s = 0.5f / (1 << B);
677 Float max_s = Max(Max(red_r, green_r), Max(blue_r, min_s));
678
679 // Obtain the reciprocal of the shared exponent by inverting the bits,
680 // and scale by the new mantissa's size. Note that the IEEE-754 single-precision
681 // format has an implicit leading 1, but this shared component format does not.
682 Float scale = As<Float>((As<Int>(max_s) & 0x7F800000) ^ 0x7F800000) * (1 << (N - 2));
683
684 UInt R9 = RoundInt(red_c * scale);
685 UInt G9 = UInt(RoundInt(green_c * scale));
686 UInt B9 = UInt(RoundInt(blue_c * scale));
687 UInt E5 = (As<UInt>(max_s) >> 23) - 127 + 15 + 1;
688
689 UInt E5B9G9R9 = (E5 << 27) | (B9 << 18) | (G9 << 9) | R9;
690
691 *Pointer<UInt>(element) = E5B9G9R9;
692 }
693 break;
Alexis Hetue04d9b02019-01-16 14:42:24 -0500694 case VK_FORMAT_B8G8R8A8_SNORM:
695 if(writeB) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.z))); }
696 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
697 if(writeR) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.x))); }
698 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
699 break;
Alexis Hetua28671d2018-12-19 17:23:26 -0500700 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
Alexis Hetudd152e12018-11-14 13:39:28 -0500701 case VK_FORMAT_R8G8B8A8_SINT:
Alexis Hetua28671d2018-12-19 17:23:26 -0500702 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
Alexis Hetudd152e12018-11-14 13:39:28 -0500703 case VK_FORMAT_R8G8B8A8_SNORM:
Alexis Hetue04d9b02019-01-16 14:42:24 -0500704 case VK_FORMAT_R8G8B8A8_SSCALED:
705 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -0400706 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
Alexis Hetue04d9b02019-01-16 14:42:24 -0500707 case VK_FORMAT_R8G8B8_SINT:
708 case VK_FORMAT_R8G8B8_SNORM:
709 case VK_FORMAT_R8G8B8_SSCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -0400710 if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
Alexis Hetudd152e12018-11-14 13:39:28 -0500711 case VK_FORMAT_R8G8_SINT:
712 case VK_FORMAT_R8G8_SNORM:
Alexis Hetue04d9b02019-01-16 14:42:24 -0500713 case VK_FORMAT_R8G8_SSCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -0400714 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
Alexis Hetudd152e12018-11-14 13:39:28 -0500715 case VK_FORMAT_R8_SINT:
716 case VK_FORMAT_R8_SNORM:
Alexis Hetue04d9b02019-01-16 14:42:24 -0500717 case VK_FORMAT_R8_SSCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -0400718 if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
719 break;
Alexis Hetue04d9b02019-01-16 14:42:24 -0500720 case VK_FORMAT_R8G8B8_UINT:
721 case VK_FORMAT_R8G8B8_UNORM:
722 case VK_FORMAT_R8G8B8_USCALED:
Alexis Hetu45d34d22019-06-28 15:58:54 -0400723 case VK_FORMAT_R8G8B8_SRGB:
Nicolas Capens68a82382018-10-02 13:16:55 -0400724 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
Alexis Hetudd152e12018-11-14 13:39:28 -0500725 case VK_FORMAT_R8G8_UINT:
726 case VK_FORMAT_R8G8_UNORM:
Alexis Hetue04d9b02019-01-16 14:42:24 -0500727 case VK_FORMAT_R8G8_USCALED:
Alexis Hetu45d34d22019-06-28 15:58:54 -0400728 case VK_FORMAT_R8G8_SRGB:
Nicolas Capens68a82382018-10-02 13:16:55 -0400729 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
Alexis Hetudd152e12018-11-14 13:39:28 -0500730 case VK_FORMAT_R8_UINT:
731 case VK_FORMAT_R8_UNORM:
Alexis Hetue04d9b02019-01-16 14:42:24 -0500732 case VK_FORMAT_R8_USCALED:
Hernan Liatis762741b2019-06-26 17:57:55 -0700733 case VK_FORMAT_R8_SRGB:
Nicolas Capens68a82382018-10-02 13:16:55 -0400734 if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
735 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500736 case VK_FORMAT_R16G16B16A16_SINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -0500737 case VK_FORMAT_R16G16B16A16_SNORM:
738 case VK_FORMAT_R16G16B16A16_SSCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -0400739 if(writeRGBA)
740 {
741 *Pointer<Short4>(element) = Short4(RoundInt(c));
742 }
743 else
744 {
745 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
746 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
747 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
748 if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
749 }
750 break;
Alexis Hetue04d9b02019-01-16 14:42:24 -0500751 case VK_FORMAT_R16G16B16_SINT:
752 case VK_FORMAT_R16G16B16_SNORM:
753 case VK_FORMAT_R16G16B16_SSCALED:
754 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
755 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
756 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
757 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500758 case VK_FORMAT_R16G16_SINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -0500759 case VK_FORMAT_R16G16_SNORM:
760 case VK_FORMAT_R16G16_SSCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -0400761 if(writeR && writeG)
762 {
763 *Pointer<Short2>(element) = Short2(Short4(RoundInt(c)));
764 }
765 else
766 {
767 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
768 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
769 }
770 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500771 case VK_FORMAT_R16_SINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -0500772 case VK_FORMAT_R16_SNORM:
773 case VK_FORMAT_R16_SSCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -0400774 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
775 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500776 case VK_FORMAT_R16G16B16A16_UINT:
777 case VK_FORMAT_R16G16B16A16_UNORM:
Alexis Hetue04d9b02019-01-16 14:42:24 -0500778 case VK_FORMAT_R16G16B16A16_USCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -0400779 if(writeRGBA)
780 {
781 *Pointer<UShort4>(element) = UShort4(RoundInt(c));
782 }
783 else
784 {
785 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
786 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
787 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
788 if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
789 }
790 break;
Alexis Hetue04d9b02019-01-16 14:42:24 -0500791 case VK_FORMAT_R16G16B16_UINT:
792 case VK_FORMAT_R16G16B16_UNORM:
793 case VK_FORMAT_R16G16B16_USCALED:
794 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
795 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
796 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
797 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500798 case VK_FORMAT_R16G16_UINT:
799 case VK_FORMAT_R16G16_UNORM:
Alexis Hetue04d9b02019-01-16 14:42:24 -0500800 case VK_FORMAT_R16G16_USCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -0400801 if(writeR && writeG)
802 {
803 *Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c)));
804 }
805 else
806 {
807 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
808 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
809 }
810 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500811 case VK_FORMAT_R16_UINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -0500812 case VK_FORMAT_R16_UNORM:
813 case VK_FORMAT_R16_USCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -0400814 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
815 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500816 case VK_FORMAT_R32G32B32A32_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400817 if(writeRGBA)
818 {
819 *Pointer<Int4>(element) = RoundInt(c);
820 }
821 else
822 {
823 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
824 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
825 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
826 if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
827 }
828 break;
Alexis Hetue04d9b02019-01-16 14:42:24 -0500829 case VK_FORMAT_R32G32B32_SINT:
830 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
Alexis Hetudd152e12018-11-14 13:39:28 -0500831 case VK_FORMAT_R32G32_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400832 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
Alexis Hetudd152e12018-11-14 13:39:28 -0500833 case VK_FORMAT_R32_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400834 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
835 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500836 case VK_FORMAT_R32G32B32A32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400837 if(writeRGBA)
838 {
839 *Pointer<UInt4>(element) = UInt4(RoundInt(c));
840 }
841 else
842 {
843 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
844 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
845 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
846 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
847 }
848 break;
Alexis Hetue04d9b02019-01-16 14:42:24 -0500849 case VK_FORMAT_R32G32B32_UINT:
850 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
Alexis Hetudd152e12018-11-14 13:39:28 -0500851 case VK_FORMAT_R32G32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400852 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
Alexis Hetudd152e12018-11-14 13:39:28 -0500853 case VK_FORMAT_R32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400854 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
855 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500856 case VK_FORMAT_R5G6B5_UNORM_PACK16:
Nicolas Capens68a82382018-10-02 13:16:55 -0400857 if(writeR && writeG && writeB)
858 {
859 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
860 (RoundInt(Float(c.y)) << Int(5)) |
861 (RoundInt(Float(c.x)) << Int(11)));
862 }
863 else
864 {
865 unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
866 unsigned short unmask = ~mask;
867 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
868 (UShort(RoundInt(Float(c.z)) |
869 (RoundInt(Float(c.y)) << Int(5)) |
870 (RoundInt(Float(c.x)) << Int(11))) & UShort(mask));
871 }
872 break;
Alexis Hetue04d9b02019-01-16 14:42:24 -0500873 case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
874 if(writeRGBA)
875 {
876 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) |
877 (RoundInt(Float(c.z)) << Int(1)) |
878 (RoundInt(Float(c.y)) << Int(6)) |
879 (RoundInt(Float(c.x)) << Int(11)));
880 }
881 else
882 {
883 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
884 (writeR ? 0x7C00 : 0x0000) |
885 (writeG ? 0x03E0 : 0x0000) |
886 (writeB ? 0x001F : 0x0000);
887 unsigned short unmask = ~mask;
888 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
889 (UShort(RoundInt(Float(c.w)) |
890 (RoundInt(Float(c.z)) << Int(1)) |
891 (RoundInt(Float(c.y)) << Int(6)) |
892 (RoundInt(Float(c.x)) << Int(11))) & UShort(mask));
893 }
894 break;
895 case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
896 if(writeRGBA)
897 {
898 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.w)) |
899 (RoundInt(Float(c.x)) << Int(1)) |
900 (RoundInt(Float(c.y)) << Int(6)) |
901 (RoundInt(Float(c.z)) << Int(11)));
902 }
903 else
904 {
905 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
906 (writeR ? 0x7C00 : 0x0000) |
907 (writeG ? 0x03E0 : 0x0000) |
908 (writeB ? 0x001F : 0x0000);
909 unsigned short unmask = ~mask;
910 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
911 (UShort(RoundInt(Float(c.w)) |
912 (RoundInt(Float(c.x)) << Int(1)) |
913 (RoundInt(Float(c.y)) << Int(6)) |
914 (RoundInt(Float(c.z)) << Int(11))) & UShort(mask));
915 }
916 break;
Alexis Hetu457bd9b2018-12-20 13:18:18 -0500917 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
918 if(writeRGBA)
919 {
920 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
921 (RoundInt(Float(c.y)) << Int(5)) |
922 (RoundInt(Float(c.x)) << Int(10)) |
923 (RoundInt(Float(c.w)) << Int(15)));
924 }
925 else
926 {
927 unsigned short mask = (writeA ? 0x8000 : 0x0000) |
928 (writeR ? 0x7C00 : 0x0000) |
929 (writeG ? 0x03E0 : 0x0000) |
930 (writeB ? 0x001F : 0x0000);
931 unsigned short unmask = ~mask;
932 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
933 (UShort(RoundInt(Float(c.z)) |
934 (RoundInt(Float(c.y)) << Int(5)) |
935 (RoundInt(Float(c.x)) << Int(10)) |
936 (RoundInt(Float(c.w)) << Int(15))) & UShort(mask));
937 }
938 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500939 case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
940 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
Alexis Hetue04d9b02019-01-16 14:42:24 -0500941 case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -0400942 if(writeRGBA)
943 {
944 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) |
945 (RoundInt(Float(c.y)) << 10) |
946 (RoundInt(Float(c.z)) << 20) |
947 (RoundInt(Float(c.w)) << 30));
948 }
949 else
950 {
951 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
952 (writeB ? 0x3FF00000 : 0x0000) |
953 (writeG ? 0x000FFC00 : 0x0000) |
954 (writeR ? 0x000003FF : 0x0000);
955 unsigned int unmask = ~mask;
956 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
957 (UInt(RoundInt(Float(c.x)) |
Alexis Hetua28671d2018-12-19 17:23:26 -0500958 (RoundInt(Float(c.y)) << 10) |
959 (RoundInt(Float(c.z)) << 20) |
960 (RoundInt(Float(c.w)) << 30)) & UInt(mask));
Nicolas Capens68a82382018-10-02 13:16:55 -0400961 }
962 break;
Alexis Hetue04d9b02019-01-16 14:42:24 -0500963 case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
964 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
965 case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
966 if(writeRGBA)
967 {
968 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.z)) |
969 (RoundInt(Float(c.y)) << 10) |
970 (RoundInt(Float(c.x)) << 20) |
971 (RoundInt(Float(c.w)) << 30));
972 }
973 else
974 {
975 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
976 (writeR ? 0x3FF00000 : 0x0000) |
977 (writeG ? 0x000FFC00 : 0x0000) |
978 (writeB ? 0x000003FF : 0x0000);
979 unsigned int unmask = ~mask;
980 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
981 (UInt(RoundInt(Float(c.z)) |
982 (RoundInt(Float(c.y)) << 10) |
983 (RoundInt(Float(c.x)) << 20) |
984 (RoundInt(Float(c.w)) << 30)) & UInt(mask));
985 }
986 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500987 case VK_FORMAT_D16_UNORM:
Nicolas Capens68a82382018-10-02 13:16:55 -0400988 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x)));
989 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500990 case VK_FORMAT_D24_UNORM_S8_UINT:
991 case VK_FORMAT_X8_D24_UNORM_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -0400992 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) << 8);
993 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500994 case VK_FORMAT_D32_SFLOAT:
995 case VK_FORMAT_D32_SFLOAT_S8_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400996 *Pointer<Float>(element) = c.x;
997 break;
Alexis Hetudd152e12018-11-14 13:39:28 -0500998 case VK_FORMAT_S8_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -0400999 *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
1000 break;
1001 default:
Nicolas Capens88ac3672019-08-01 13:22:34 -04001002 UNSUPPORTED("Blitter destination format %d", (int)state.destFormat);
1003 break;
Nicolas Capens68a82382018-10-02 13:16:55 -04001004 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001005 }
1006
Nicolas Capens88ac3672019-08-01 13:22:34 -04001007 Int4 Blitter::readInt4(Pointer<Byte> element, const State &state)
Nicolas Capens68a82382018-10-02 13:16:55 -04001008 {
Nicolas Capens88ac3672019-08-01 13:22:34 -04001009 Int4 c(0, 0, 0, 1);
Nicolas Capens68a82382018-10-02 13:16:55 -04001010
1011 switch(state.sourceFormat)
1012 {
Alexis Hetua28671d2018-12-19 17:23:26 -05001013 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
Alexis Hetudd152e12018-11-14 13:39:28 -05001014 case VK_FORMAT_R8G8B8A8_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001015 c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
Alexis Hetua28671d2018-12-19 17:23:26 -05001016 c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
Alexis Hetudd152e12018-11-14 13:39:28 -05001017 case VK_FORMAT_R8G8_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001018 c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
Alexis Hetudd152e12018-11-14 13:39:28 -05001019 case VK_FORMAT_R8_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001020 c = Insert(c, Int(*Pointer<SByte>(element)), 0);
1021 break;
Alexis Hetua28671d2018-12-19 17:23:26 -05001022 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1023 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000003FF))), 0);
1024 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10), 1);
1025 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20), 2);
1026 c = Insert(c, Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30), 3);
1027 break;
1028 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
Alexis Hetudd152e12018-11-14 13:39:28 -05001029 case VK_FORMAT_R8G8B8A8_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001030 c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
Alexis Hetua28671d2018-12-19 17:23:26 -05001031 c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
Alexis Hetudd152e12018-11-14 13:39:28 -05001032 case VK_FORMAT_R8G8_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001033 c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
Alexis Hetudd152e12018-11-14 13:39:28 -05001034 case VK_FORMAT_R8_UINT:
Alexis Hetu1c462eb2019-10-02 10:11:53 -04001035 case VK_FORMAT_S8_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001036 c = Insert(c, Int(*Pointer<Byte>(element)), 0);
1037 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001038 case VK_FORMAT_R16G16B16A16_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001039 c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
Alexis Hetua28671d2018-12-19 17:23:26 -05001040 c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
Alexis Hetudd152e12018-11-14 13:39:28 -05001041 case VK_FORMAT_R16G16_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001042 c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
Alexis Hetudd152e12018-11-14 13:39:28 -05001043 case VK_FORMAT_R16_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001044 c = Insert(c, Int(*Pointer<Short>(element)), 0);
1045 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001046 case VK_FORMAT_R16G16B16A16_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001047 c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
Alexis Hetua28671d2018-12-19 17:23:26 -05001048 c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
Alexis Hetudd152e12018-11-14 13:39:28 -05001049 case VK_FORMAT_R16G16_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001050 c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
Alexis Hetudd152e12018-11-14 13:39:28 -05001051 case VK_FORMAT_R16_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001052 c = Insert(c, Int(*Pointer<UShort>(element)), 0);
1053 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001054 case VK_FORMAT_R32G32B32A32_SINT:
1055 case VK_FORMAT_R32G32B32A32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001056 c = *Pointer<Int4>(element);
1057 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001058 case VK_FORMAT_R32G32_SINT:
1059 case VK_FORMAT_R32G32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001060 c = Insert(c, *Pointer<Int>(element + 4), 1);
Alexis Hetudd152e12018-11-14 13:39:28 -05001061 case VK_FORMAT_R32_SINT:
1062 case VK_FORMAT_R32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001063 c = Insert(c, *Pointer<Int>(element), 0);
1064 break;
1065 default:
Nicolas Capens88ac3672019-08-01 13:22:34 -04001066 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens68a82382018-10-02 13:16:55 -04001067 }
1068
Nicolas Capens88ac3672019-08-01 13:22:34 -04001069 return c;
Nicolas Capens68a82382018-10-02 13:16:55 -04001070 }
1071
Nicolas Capens88ac3672019-08-01 13:22:34 -04001072 void Blitter::write(Int4 &c, Pointer<Byte> element, const State &state)
Nicolas Capens68a82382018-10-02 13:16:55 -04001073 {
1074 bool writeR = state.writeRed;
1075 bool writeG = state.writeGreen;
1076 bool writeB = state.writeBlue;
1077 bool writeA = state.writeAlpha;
1078 bool writeRGBA = writeR && writeG && writeB && writeA;
1079
1080 switch(state.destFormat)
1081 {
Alexis Hetu6d8d3c32018-12-21 12:03:31 -05001082 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
1083 c = Min(As<UInt4>(c), UInt4(0x03FF, 0x03FF, 0x03FF, 0x0003));
1084 break;
1085 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1086 case VK_FORMAT_R8G8B8A8_UINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001087 case VK_FORMAT_R8G8B8_UINT:
Alexis Hetu6d8d3c32018-12-21 12:03:31 -05001088 case VK_FORMAT_R8G8_UINT:
1089 case VK_FORMAT_R8_UINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001090 case VK_FORMAT_R8G8B8A8_USCALED:
1091 case VK_FORMAT_R8G8B8_USCALED:
1092 case VK_FORMAT_R8G8_USCALED:
1093 case VK_FORMAT_R8_USCALED:
Alexis Hetu1c462eb2019-10-02 10:11:53 -04001094 case VK_FORMAT_S8_UINT:
Alexis Hetu6d8d3c32018-12-21 12:03:31 -05001095 c = Min(As<UInt4>(c), UInt4(0xFF));
1096 break;
1097 case VK_FORMAT_R16G16B16A16_UINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001098 case VK_FORMAT_R16G16B16_UINT:
Alexis Hetu6d8d3c32018-12-21 12:03:31 -05001099 case VK_FORMAT_R16G16_UINT:
1100 case VK_FORMAT_R16_UINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001101 case VK_FORMAT_R16G16B16A16_USCALED:
1102 case VK_FORMAT_R16G16B16_USCALED:
1103 case VK_FORMAT_R16G16_USCALED:
1104 case VK_FORMAT_R16_USCALED:
Alexis Hetu6d8d3c32018-12-21 12:03:31 -05001105 c = Min(As<UInt4>(c), UInt4(0xFFFF));
1106 break;
1107 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1108 case VK_FORMAT_R8G8B8A8_SINT:
1109 case VK_FORMAT_R8G8_SINT:
1110 case VK_FORMAT_R8_SINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001111 case VK_FORMAT_R8G8B8A8_SSCALED:
1112 case VK_FORMAT_R8G8B8_SSCALED:
1113 case VK_FORMAT_R8G8_SSCALED:
1114 case VK_FORMAT_R8_SSCALED:
Alexis Hetu6d8d3c32018-12-21 12:03:31 -05001115 c = Min(Max(c, Int4(-0x80)), Int4(0x7F));
1116 break;
1117 case VK_FORMAT_R16G16B16A16_SINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001118 case VK_FORMAT_R16G16B16_SINT:
Alexis Hetu6d8d3c32018-12-21 12:03:31 -05001119 case VK_FORMAT_R16G16_SINT:
1120 case VK_FORMAT_R16_SINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001121 case VK_FORMAT_R16G16B16A16_SSCALED:
1122 case VK_FORMAT_R16G16B16_SSCALED:
1123 case VK_FORMAT_R16G16_SSCALED:
1124 case VK_FORMAT_R16_SSCALED:
Alexis Hetu6d8d3c32018-12-21 12:03:31 -05001125 c = Min(Max(c, Int4(-0x8000)), Int4(0x7FFF));
1126 break;
1127 default:
1128 break;
1129 }
1130
1131 switch(state.destFormat)
1132 {
Alexis Hetue04d9b02019-01-16 14:42:24 -05001133 case VK_FORMAT_B8G8R8A8_SINT:
1134 case VK_FORMAT_B8G8R8A8_SSCALED:
1135 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
1136 case VK_FORMAT_B8G8R8_SINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001137 case VK_FORMAT_B8G8R8_SSCALED:
1138 if(writeB) { *Pointer<SByte>(element) = SByte(Extract(c, 2)); }
1139 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
1140 if(writeR) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 0)); }
1141 break;
Alexis Hetua28671d2018-12-19 17:23:26 -05001142 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
Alexis Hetudd152e12018-11-14 13:39:28 -05001143 case VK_FORMAT_R8G8B8A8_SINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001144 case VK_FORMAT_R8G8B8A8_SSCALED:
1145 case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -04001146 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
Alexis Hetue04d9b02019-01-16 14:42:24 -05001147 case VK_FORMAT_R8G8B8_SINT:
1148 case VK_FORMAT_R8G8B8_SSCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -04001149 if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
Alexis Hetudd152e12018-11-14 13:39:28 -05001150 case VK_FORMAT_R8G8_SINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001151 case VK_FORMAT_R8G8_SSCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -04001152 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
Alexis Hetudd152e12018-11-14 13:39:28 -05001153 case VK_FORMAT_R8_SINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001154 case VK_FORMAT_R8_SSCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -04001155 if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
1156 break;
Alexis Hetua28671d2018-12-19 17:23:26 -05001157 case VK_FORMAT_A2B10G10R10_UINT_PACK32:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001158 case VK_FORMAT_A2B10G10R10_SINT_PACK32:
1159 case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
1160 case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
Alexis Hetua28671d2018-12-19 17:23:26 -05001161 if(writeRGBA)
1162 {
1163 *Pointer<UInt>(element) =
1164 UInt((Extract(c, 0)) | (Extract(c, 1) << 10) | (Extract(c, 2) << 20) | (Extract(c, 3) << 30));
1165 }
1166 else
1167 {
1168 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1169 (writeB ? 0x3FF00000 : 0x0000) |
1170 (writeG ? 0x000FFC00 : 0x0000) |
1171 (writeR ? 0x000003FF : 0x0000);
1172 unsigned int unmask = ~mask;
1173 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
1174 (UInt(Extract(c, 0) | (Extract(c, 1) << 10) | (Extract(c, 2) << 20) | (Extract(c, 3) << 30)) & UInt(mask));
1175 }
1176 break;
Alexis Hetue04d9b02019-01-16 14:42:24 -05001177 case VK_FORMAT_A2R10G10B10_UINT_PACK32:
1178 case VK_FORMAT_A2R10G10B10_SINT_PACK32:
1179 case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
1180 case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
1181 if(writeRGBA)
1182 {
1183 *Pointer<UInt>(element) =
1184 UInt((Extract(c, 2)) | (Extract(c, 1) << 10) | (Extract(c, 0) << 20) | (Extract(c, 3) << 30));
1185 }
1186 else
1187 {
1188 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
1189 (writeR ? 0x3FF00000 : 0x0000) |
1190 (writeG ? 0x000FFC00 : 0x0000) |
1191 (writeB ? 0x000003FF : 0x0000);
1192 unsigned int unmask = ~mask;
1193 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
1194 (UInt(Extract(c, 2) | (Extract(c, 1) << 10) | (Extract(c, 0) << 20) | (Extract(c, 3) << 30)) & UInt(mask));
1195 }
1196 break;
1197 case VK_FORMAT_B8G8R8A8_UINT:
1198 case VK_FORMAT_B8G8R8A8_USCALED:
1199 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
1200 case VK_FORMAT_B8G8R8_UINT:
1201 case VK_FORMAT_B8G8R8_USCALED:
Alexis Hetu45d34d22019-06-28 15:58:54 -04001202 case VK_FORMAT_B8G8R8_SRGB:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001203 if(writeB) { *Pointer<Byte>(element) = Byte(Extract(c, 2)); }
1204 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
1205 if(writeR) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 0)); }
1206 break;
Alexis Hetua28671d2018-12-19 17:23:26 -05001207 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
Alexis Hetudd152e12018-11-14 13:39:28 -05001208 case VK_FORMAT_R8G8B8A8_UINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001209 case VK_FORMAT_R8G8B8A8_USCALED:
1210 case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
Nicolas Capens68a82382018-10-02 13:16:55 -04001211 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
Alexis Hetue04d9b02019-01-16 14:42:24 -05001212 case VK_FORMAT_R8G8B8_UINT:
1213 case VK_FORMAT_R8G8B8_USCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -04001214 if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
Alexis Hetudd152e12018-11-14 13:39:28 -05001215 case VK_FORMAT_R8G8_UINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001216 case VK_FORMAT_R8G8_USCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -04001217 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
Alexis Hetudd152e12018-11-14 13:39:28 -05001218 case VK_FORMAT_R8_UINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001219 case VK_FORMAT_R8_USCALED:
Alexis Hetu1c462eb2019-10-02 10:11:53 -04001220 case VK_FORMAT_S8_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001221 if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
1222 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001223 case VK_FORMAT_R16G16B16A16_SINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001224 case VK_FORMAT_R16G16B16A16_SSCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -04001225 if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
Alexis Hetue04d9b02019-01-16 14:42:24 -05001226 case VK_FORMAT_R16G16B16_SINT:
1227 case VK_FORMAT_R16G16B16_SSCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -04001228 if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
Alexis Hetudd152e12018-11-14 13:39:28 -05001229 case VK_FORMAT_R16G16_SINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001230 case VK_FORMAT_R16G16_SSCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -04001231 if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
Alexis Hetudd152e12018-11-14 13:39:28 -05001232 case VK_FORMAT_R16_SINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001233 case VK_FORMAT_R16_SSCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -04001234 if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
1235 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001236 case VK_FORMAT_R16G16B16A16_UINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001237 case VK_FORMAT_R16G16B16A16_USCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -04001238 if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
Alexis Hetue04d9b02019-01-16 14:42:24 -05001239 case VK_FORMAT_R16G16B16_UINT:
1240 case VK_FORMAT_R16G16B16_USCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -04001241 if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
Alexis Hetudd152e12018-11-14 13:39:28 -05001242 case VK_FORMAT_R16G16_UINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001243 case VK_FORMAT_R16G16_USCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -04001244 if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
Alexis Hetudd152e12018-11-14 13:39:28 -05001245 case VK_FORMAT_R16_UINT:
Alexis Hetue04d9b02019-01-16 14:42:24 -05001246 case VK_FORMAT_R16_USCALED:
Nicolas Capens68a82382018-10-02 13:16:55 -04001247 if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
1248 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001249 case VK_FORMAT_R32G32B32A32_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001250 if(writeRGBA)
1251 {
1252 *Pointer<Int4>(element) = c;
1253 }
1254 else
1255 {
1256 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1257 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1258 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
1259 if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
1260 }
1261 break;
Alexis Hetue04d9b02019-01-16 14:42:24 -05001262 case VK_FORMAT_R32G32B32_SINT:
1263 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1264 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1265 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
1266 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001267 case VK_FORMAT_R32G32_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001268 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1269 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
1270 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001271 case VK_FORMAT_R32_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001272 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
1273 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001274 case VK_FORMAT_R32G32B32A32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001275 if(writeRGBA)
1276 {
1277 *Pointer<UInt4>(element) = As<UInt4>(c);
1278 }
1279 else
1280 {
1281 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1282 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
1283 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
1284 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
1285 }
1286 break;
Alexis Hetue04d9b02019-01-16 14:42:24 -05001287 case VK_FORMAT_R32G32B32_UINT:
1288 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
Alexis Hetudd152e12018-11-14 13:39:28 -05001289 case VK_FORMAT_R32G32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001290 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
Alexis Hetudd152e12018-11-14 13:39:28 -05001291 case VK_FORMAT_R32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001292 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
1293 break;
1294 default:
Nicolas Capens88ac3672019-08-01 13:22:34 -04001295 UNSUPPORTED("Blitter destination format %d", (int)state.destFormat);
Nicolas Capens68a82382018-10-02 13:16:55 -04001296 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001297 }
1298
Nicolas Capens88ac3672019-08-01 13:22:34 -04001299 void Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled)
Nicolas Capens68a82382018-10-02 13:16:55 -04001300 {
Antonio Maiorano53096e42019-09-24 13:00:22 -04001301 float4 scale{}, unscale{};
Nicolas Capens88ac3672019-08-01 13:22:34 -04001302
Nicolas Capens68a82382018-10-02 13:16:55 -04001303 if(state.clearOperation &&
Alexis Hetu25ec7b02019-03-12 14:19:22 -04001304 state.sourceFormat.isNonNormalizedInteger() &&
1305 !state.destFormat.isNonNormalizedInteger())
Nicolas Capens68a82382018-10-02 13:16:55 -04001306 {
1307 // If we're clearing a buffer from an int or uint color into a normalized color,
1308 // then the whole range of the int or uint color must be scaled between 0 and 1.
1309 switch(state.sourceFormat)
1310 {
Alexis Hetudd152e12018-11-14 13:39:28 -05001311 case VK_FORMAT_R32G32B32A32_SINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001312 unscale = replicate(static_cast<float>(0x7FFFFFFF));
1313 break;
Alexis Hetudd152e12018-11-14 13:39:28 -05001314 case VK_FORMAT_R32G32B32A32_UINT:
Nicolas Capens68a82382018-10-02 13:16:55 -04001315 unscale = replicate(static_cast<float>(0xFFFFFFFF));
1316 break;
1317 default:
Nicolas Capens88ac3672019-08-01 13:22:34 -04001318 UNSUPPORTED("Blitter source format %d", (int)state.sourceFormat);
Nicolas Capens68a82382018-10-02 13:16:55 -04001319 }
1320 }
Nicolas Capens88ac3672019-08-01 13:22:34 -04001321 else
Nicolas Capens68a82382018-10-02 13:16:55 -04001322 {
Nicolas Capens88ac3672019-08-01 13:22:34 -04001323 unscale = state.sourceFormat.getScale();
Nicolas Capens68a82382018-10-02 13:16:55 -04001324 }
1325
Nicolas Capens88ac3672019-08-01 13:22:34 -04001326 scale = state.destFormat.getScale();
Nicolas Capens68a82382018-10-02 13:16:55 -04001327
Alexis Hetu25ec7b02019-03-12 14:19:22 -04001328 bool srcSRGB = state.sourceFormat.isSRGBformat();
1329 bool dstSRGB = state.destFormat.isSRGBformat();
Nicolas Capens68a82382018-10-02 13:16:55 -04001330
Antonio Maiorano7738ed72019-10-21 11:29:41 -04001331 if(state.allowSRGBConversion && ((srcSRGB && !preScaled) || dstSRGB)) // One of the formats is sRGB encoded.
Nicolas Capens68a82382018-10-02 13:16:55 -04001332 {
1333 value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale
1334 Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale
1335 value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value);
1336 value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale
1337 }
1338 else if(unscale != scale)
1339 {
1340 value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
1341 }
1342
Alexis Hetu25ec7b02019-03-12 14:19:22 -04001343 if(state.sourceFormat.isFloatFormat() && !state.destFormat.isFloatFormat())
Nicolas Capens68a82382018-10-02 13:16:55 -04001344 {
1345 value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
1346
Alexis Hetu25ec7b02019-03-12 14:19:22 -04001347 value = Max(value, Float4(state.destFormat.isUnsignedComponent(0) ? 0.0f : -scale.x,
1348 state.destFormat.isUnsignedComponent(1) ? 0.0f : -scale.y,
1349 state.destFormat.isUnsignedComponent(2) ? 0.0f : -scale.z,
1350 state.destFormat.isUnsignedComponent(3) ? 0.0f : -scale.w));
Nicolas Capens68a82382018-10-02 13:16:55 -04001351 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001352 }
1353
1354 Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes, bool quadLayout)
1355 {
1356 if(!quadLayout)
1357 {
1358 return y * pitchB + x * bytes;
1359 }
1360 else
1361 {
1362 // (x & ~1) * 2 + (x & 1) == (x - (x & 1)) * 2 + (x & 1) == x * 2 - (x & 1) * 2 + (x & 1) == x * 2 - (x & 1)
1363 return (y & Int(~1)) * pitchB +
Alexis Hetu1c462eb2019-10-02 10:11:53 -04001364 ((((y & Int(1)) + x) << 1) - (x & Int(1))) * bytes;
Nicolas Capens68a82382018-10-02 13:16:55 -04001365 }
1366 }
1367
1368 Float4 Blitter::LinearToSRGB(Float4 &c)
1369 {
1370 Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f);
1371 Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f);
1372
1373 Float4 s = c;
1374 s.xyz = Max(lc, ec);
1375
1376 return s;
1377 }
1378
1379 Float4 Blitter::sRGBtoLinear(Float4 &c)
1380 {
1381 Float4 lc = c * Float4(1.0f / 12.92f);
1382 Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f));
1383
1384 Int4 linear = CmpLT(c, Float4(0.04045f));
1385
1386 Float4 s = c;
Ben Clayton5e9441a2019-05-24 07:43:42 +01001387 s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec))); // TODO: IfThenElse()
Nicolas Capens68a82382018-10-02 13:16:55 -04001388
1389 return s;
1390 }
1391
Ben Clayton6897e9b2019-07-16 17:27:27 +01001392 std::shared_ptr<Routine> Blitter::generate(const State &state)
Nicolas Capens68a82382018-10-02 13:16:55 -04001393 {
1394 Function<Void(Pointer<Byte>)> function;
1395 {
1396 Pointer<Byte> blit(function.Arg<0>());
1397
1398 Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source));
1399 Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest));
1400 Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB));
1401 Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB));
1402
1403 Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0));
1404 Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0));
1405 Float w = *Pointer<Float>(blit + OFFSET(BlitData,w));
1406 Float h = *Pointer<Float>(blit + OFFSET(BlitData,h));
1407
1408 Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d));
1409 Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d));
1410 Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d));
1411 Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d));
1412
1413 Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth));
1414 Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight));
1415
Alexis Hetu25ec7b02019-03-12 14:19:22 -04001416 bool intSrc = state.sourceFormat.isNonNormalizedInteger();
1417 bool intDst = state.destFormat.isNonNormalizedInteger();
Nicolas Capens68a82382018-10-02 13:16:55 -04001418 bool intBoth = intSrc && intDst;
Alexis Hetu25ec7b02019-03-12 14:19:22 -04001419 bool srcQuadLayout = state.sourceFormat.hasQuadLayout();
1420 bool dstQuadLayout = state.destFormat.hasQuadLayout();
1421 int srcBytes = state.sourceFormat.bytes();
1422 int dstBytes = state.destFormat.bytes();
Nicolas Capens68a82382018-10-02 13:16:55 -04001423
1424 bool hasConstantColorI = false;
1425 Int4 constantColorI;
1426 bool hasConstantColorF = false;
1427 Float4 constantColorF;
1428 if(state.clearOperation)
1429 {
1430 if(intBoth) // Integer types
1431 {
Nicolas Capens88ac3672019-08-01 13:22:34 -04001432 constantColorI = readInt4(source, state);
Nicolas Capens68a82382018-10-02 13:16:55 -04001433 hasConstantColorI = true;
1434 }
1435 else
1436 {
Nicolas Capens88ac3672019-08-01 13:22:34 -04001437 constantColorF = readFloat4(source, state);
Nicolas Capens68a82382018-10-02 13:16:55 -04001438 hasConstantColorF = true;
1439
Nicolas Capens88ac3672019-08-01 13:22:34 -04001440 ApplyScaleAndClamp(constantColorF, state);
Nicolas Capens68a82382018-10-02 13:16:55 -04001441 }
1442 }
1443
1444 For(Int j = y0d, j < y1d, j++)
1445 {
1446 Float y = state.clearOperation ? RValue<Float>(y0) : y0 + Float(j) * h;
1447 Pointer<Byte> destLine = dest + (dstQuadLayout ? j & Int(~1) : RValue<Int>(j)) * dPitchB;
1448
1449 For(Int i = x0d, i < x1d, i++)
1450 {
1451 Float x = state.clearOperation ? RValue<Float>(x0) : x0 + Float(i) * w;
1452 Pointer<Byte> d = destLine + (dstQuadLayout ? (((j & Int(1)) << 1) + (i * 2) - (i & Int(1))) : RValue<Int>(i)) * dstBytes;
1453
1454 if(hasConstantColorI)
1455 {
Nicolas Capens88ac3672019-08-01 13:22:34 -04001456 write(constantColorI, d, state);
Nicolas Capens68a82382018-10-02 13:16:55 -04001457 }
1458 else if(hasConstantColorF)
1459 {
1460 for(int s = 0; s < state.destSamples; s++)
1461 {
Nicolas Capens88ac3672019-08-01 13:22:34 -04001462 write(constantColorF, d, state);
Nicolas Capens68a82382018-10-02 13:16:55 -04001463
1464 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB));
1465 }
1466 }
1467 else if(intBoth) // Integer types do not support filtering
1468 {
Nicolas Capens68a82382018-10-02 13:16:55 -04001469 Int X = Int(x);
1470 Int Y = Int(y);
1471
1472 if(state.clampToEdge)
1473 {
1474 X = Clamp(X, 0, sWidth - 1);
1475 Y = Clamp(Y, 0, sHeight - 1);
1476 }
1477
1478 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1479
Nicolas Capens88ac3672019-08-01 13:22:34 -04001480 // When both formats are true integer types, we don't go to float to avoid losing precision
1481 Int4 color = readInt4(s, state);
1482 write(color, d, state);
Nicolas Capens68a82382018-10-02 13:16:55 -04001483 }
1484 else
1485 {
1486 Float4 color;
1487
1488 bool preScaled = false;
1489 if(!state.filter || intSrc)
1490 {
1491 Int X = Int(x);
1492 Int Y = Int(y);
1493
1494 if(state.clampToEdge)
1495 {
1496 X = Clamp(X, 0, sWidth - 1);
1497 Y = Clamp(Y, 0, sHeight - 1);
1498 }
1499
1500 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1501
Nicolas Capens88ac3672019-08-01 13:22:34 -04001502 color = readFloat4(s, state);
Alexis Hetu54ec7592019-03-20 14:37:16 -04001503
1504 if(state.srcSamples > 1) // Resolve multisampled source
1505 {
Antonio Maiorano7738ed72019-10-21 11:29:41 -04001506 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
Alexis Hetua4308132019-06-13 09:55:26 -04001507 {
Nicolas Capens88ac3672019-08-01 13:22:34 -04001508 ApplyScaleAndClamp(color, state);
Alexis Hetua4308132019-06-13 09:55:26 -04001509 preScaled = true;
1510 }
Alexis Hetu54ec7592019-03-20 14:37:16 -04001511 Float4 accum = color;
Alexis Hetu126bd7a2019-05-10 17:07:42 -04001512 for(int sample = 1; sample < state.srcSamples; sample++)
Alexis Hetu54ec7592019-03-20 14:37:16 -04001513 {
1514 s += *Pointer<Int>(blit + OFFSET(BlitData, sSliceB));
Nicolas Capens88ac3672019-08-01 13:22:34 -04001515 color = readFloat4(s, state);
1516
Antonio Maiorano7738ed72019-10-21 11:29:41 -04001517 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
Alexis Hetu3f80dad2019-05-22 18:23:47 -04001518 {
Nicolas Capens88ac3672019-08-01 13:22:34 -04001519 ApplyScaleAndClamp(color, state);
Alexis Hetu3f80dad2019-05-22 18:23:47 -04001520 preScaled = true;
1521 }
Alexis Hetu54ec7592019-03-20 14:37:16 -04001522 accum += color;
1523 }
1524 color = accum * Float4(1.0f / static_cast<float>(state.srcSamples));
1525 }
Nicolas Capens68a82382018-10-02 13:16:55 -04001526 }
1527 else // Bilinear filtering
1528 {
1529 Float X = x;
1530 Float Y = y;
1531
1532 if(state.clampToEdge)
1533 {
1534 X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f);
1535 Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f);
1536 }
1537
1538 Float x0 = X - 0.5f;
1539 Float y0 = Y - 0.5f;
1540
1541 Int X0 = Max(Int(x0), 0);
1542 Int Y0 = Max(Int(y0), 0);
1543
1544 Int X1 = X0 + 1;
1545 Int Y1 = Y0 + 1;
1546 X1 = IfThenElse(X1 >= sWidth, X0, X1);
1547 Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1);
1548
1549 Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes, srcQuadLayout);
1550 Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes, srcQuadLayout);
1551 Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes, srcQuadLayout);
1552 Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes, srcQuadLayout);
1553
Nicolas Capens88ac3672019-08-01 13:22:34 -04001554 Float4 c00 = readFloat4(s00, state);
1555 Float4 c01 = readFloat4(s01, state);
1556 Float4 c10 = readFloat4(s10, state);
1557 Float4 c11 = readFloat4(s11, state);
Nicolas Capens68a82382018-10-02 13:16:55 -04001558
Antonio Maiorano7738ed72019-10-21 11:29:41 -04001559 if(state.allowSRGBConversion && state.sourceFormat.isSRGBformat()) // sRGB -> RGB
Nicolas Capens68a82382018-10-02 13:16:55 -04001560 {
Nicolas Capens88ac3672019-08-01 13:22:34 -04001561 ApplyScaleAndClamp(c00, state);
1562 ApplyScaleAndClamp(c01, state);
1563 ApplyScaleAndClamp(c10, state);
1564 ApplyScaleAndClamp(c11, state);
Nicolas Capens68a82382018-10-02 13:16:55 -04001565 preScaled = true;
1566 }
1567
1568 Float4 fx = Float4(x0 - Float(X0));
1569 Float4 fy = Float4(y0 - Float(Y0));
1570 Float4 ix = Float4(1.0f) - fx;
1571 Float4 iy = Float4(1.0f) - fy;
1572
1573 color = (c00 * ix + c01 * fx) * iy +
1574 (c10 * ix + c11 * fx) * fy;
1575 }
1576
Nicolas Capens88ac3672019-08-01 13:22:34 -04001577 ApplyScaleAndClamp(color, state, preScaled);
Nicolas Capens68a82382018-10-02 13:16:55 -04001578
1579 for(int s = 0; s < state.destSamples; s++)
1580 {
Nicolas Capens88ac3672019-08-01 13:22:34 -04001581 write(color, d, state);
Nicolas Capens68a82382018-10-02 13:16:55 -04001582
1583 d += *Pointer<Int>(blit + OFFSET(BlitData,dSliceB));
1584 }
1585 }
1586 }
1587 }
1588 }
1589
Ben Clayton056d6922019-07-04 12:41:13 +01001590 return function("BlitRoutine");
Nicolas Capens68a82382018-10-02 13:16:55 -04001591 }
1592
Ben Clayton6897e9b2019-07-16 17:27:27 +01001593 std::shared_ptr<Routine> Blitter::getBlitRoutine(const State &state)
Alexis Hetu33642272019-03-01 11:55:59 -05001594 {
Alexis Hetuf60a2d52019-05-09 14:16:05 -04001595 std::unique_lock<std::mutex> lock(blitMutex);
Ben Clayton6897e9b2019-07-16 17:27:27 +01001596 auto blitRoutine = blitCache.query(state);
Alexis Hetu33642272019-03-01 11:55:59 -05001597
1598 if(!blitRoutine)
1599 {
1600 blitRoutine = generate(state);
Alexis Hetuf60a2d52019-05-09 14:16:05 -04001601 blitCache.add(state, blitRoutine);
Alexis Hetu33642272019-03-01 11:55:59 -05001602 }
1603
Alexis Hetu33642272019-03-01 11:55:59 -05001604 return blitRoutine;
1605 }
1606
Ben Clayton6897e9b2019-07-16 17:27:27 +01001607 std::shared_ptr<Routine> Blitter::getCornerUpdateRoutine(const State &state)
Alexis Hetuf60a2d52019-05-09 14:16:05 -04001608 {
1609 std::unique_lock<std::mutex> lock(cornerUpdateMutex);
Ben Clayton6897e9b2019-07-16 17:27:27 +01001610 auto cornerUpdateRoutine = cornerUpdateCache.query(state);
Alexis Hetuf60a2d52019-05-09 14:16:05 -04001611
1612 if(!cornerUpdateRoutine)
1613 {
1614 cornerUpdateRoutine = generateCornerUpdate(state);
Alexis Hetuf60a2d52019-05-09 14:16:05 -04001615 cornerUpdateCache.add(state, cornerUpdateRoutine);
1616 }
1617
1618 return cornerUpdateRoutine;
1619 }
1620
Chris Forbes529eda32019-05-08 10:27:05 -07001621 void Blitter::blitToBuffer(const vk::Image *src, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *dst, int bufferRowPitch, int bufferSlicePitch)
1622 {
1623 auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
1624 auto format = src->getFormat(aspect);
1625 State state(format, format.getNonQuadLayoutFormat(), VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT,
Antonio Maiorano7738ed72019-10-21 11:29:41 -04001626 Options{false, false});
Chris Forbes529eda32019-05-08 10:27:05 -07001627
Ben Clayton6897e9b2019-07-16 17:27:27 +01001628 auto blitRoutine = getBlitRoutine(state);
Chris Forbes529eda32019-05-08 10:27:05 -07001629 if(!blitRoutine)
1630 {
1631 return;
1632 }
1633
1634 void(*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry();
1635
1636 BlitData data =
1637 {
1638 nullptr, // source
1639 dst, // dest
1640 src->rowPitchBytes(aspect, subresource.mipLevel), // sPitchB
1641 bufferRowPitch, // dPitchB
1642 src->slicePitchBytes(aspect, subresource.mipLevel), // sSliceB
1643 bufferSlicePitch, // dSliceB
1644
1645 0, 0, 1, 1,
1646
1647 0, // y0d
1648 static_cast<int>(extent.height), // y1d
1649 0, // x0d
1650 static_cast<int>(extent.width), // x1d
1651
1652 static_cast<int>(extent.width), // sWidth
1653 static_cast<int>(extent.height) // sHeight;
1654 };
1655
1656 VkOffset3D srcOffset = { 0, 0, offset.z };
1657
1658 VkImageSubresourceLayers srcSubresLayers = subresource;
1659 srcSubresLayers.layerCount = 1;
1660
1661 VkImageSubresourceRange srcSubresRange =
1662 {
1663 subresource.aspectMask,
1664 subresource.mipLevel,
1665 1,
1666 subresource.baseArrayLayer,
1667 subresource.layerCount
1668 };
1669
1670 uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange);
1671
1672 for(; srcSubresLayers.baseArrayLayer <= lastLayer; srcSubresLayers.baseArrayLayer++)
1673 {
1674 srcOffset.z = offset.z;
1675
1676 for(auto i = 0u; i < extent.depth; i++)
1677 {
1678 data.source = src->getTexelPointer(srcOffset, srcSubresLayers);
1679 ASSERT(data.source < src->end());
1680 blitFunction(&data);
1681 srcOffset.z++;
1682 data.dest = (dst += bufferSlicePitch);
1683 }
1684 }
1685 }
1686
1687 void Blitter::blitFromBuffer(const vk::Image *dst, VkImageSubresourceLayers subresource, VkOffset3D offset, VkExtent3D extent, uint8_t *src, int bufferRowPitch, int bufferSlicePitch)
1688 {
1689 auto aspect = static_cast<VkImageAspectFlagBits>(subresource.aspectMask);
1690 auto format = dst->getFormat(aspect);
1691 State state(format.getNonQuadLayoutFormat(), format, VK_SAMPLE_COUNT_1_BIT, VK_SAMPLE_COUNT_1_BIT,
Antonio Maiorano7738ed72019-10-21 11:29:41 -04001692 Options{false, false});
Chris Forbes529eda32019-05-08 10:27:05 -07001693
Ben Clayton6897e9b2019-07-16 17:27:27 +01001694 auto blitRoutine = getBlitRoutine(state);
Chris Forbes529eda32019-05-08 10:27:05 -07001695 if(!blitRoutine)
1696 {
1697 return;
1698 }
1699
1700 void(*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry();
1701
1702 BlitData data =
1703 {
1704 src, // source
1705 nullptr, // dest
1706 bufferRowPitch, // sPitchB
1707 dst->rowPitchBytes(aspect, subresource.mipLevel), // dPitchB
1708 bufferSlicePitch, // sSliceB
1709 dst->slicePitchBytes(aspect, subresource.mipLevel), // dSliceB
1710
1711 0, 0, 1, 1,
1712
1713 offset.y, // y0d
1714 static_cast<int>(offset.y + extent.height), // y1d
1715 offset.x, // x0d
1716 static_cast<int>(offset.x + extent.width), // x1d
1717
1718 static_cast<int>(extent.width), // sWidth
1719 static_cast<int>(extent.height) // sHeight;
1720 };
1721
1722 VkOffset3D dstOffset = { 0, 0, offset.z };
1723
1724 VkImageSubresourceLayers dstSubresLayers = subresource;
1725 dstSubresLayers.layerCount = 1;
1726
1727 VkImageSubresourceRange dstSubresRange =
1728 {
1729 subresource.aspectMask,
1730 subresource.mipLevel,
1731 1,
1732 subresource.baseArrayLayer,
1733 subresource.layerCount
1734 };
1735
1736 uint32_t lastLayer = dst->getLastLayerIndex(dstSubresRange);
1737
1738 for(; dstSubresLayers.baseArrayLayer <= lastLayer; dstSubresLayers.baseArrayLayer++)
1739 {
1740 dstOffset.z = offset.z;
1741
1742 for(auto i = 0u; i < extent.depth; i++)
1743 {
1744 data.dest = dst->getTexelPointer(dstOffset, dstSubresLayers);
1745 ASSERT(data.dest < dst->end());
1746 blitFunction(&data);
1747 dstOffset.z++;
1748 data.source = (src += bufferSlicePitch);
1749 }
1750 }
1751 }
1752
Alexis Hetuac873342019-04-17 15:59:03 -04001753 void Blitter::blit(const vk::Image *src, vk::Image *dst, VkImageBlit region, VkFilter filter)
Alexis Hetu33642272019-03-01 11:55:59 -05001754 {
1755 if(dst->getFormat() == VK_FORMAT_UNDEFINED)
1756 {
1757 return;
1758 }
1759
Alexis Hetu377077a2019-03-14 15:10:51 -04001760 if((region.srcSubresource.layerCount != region.dstSubresource.layerCount) ||
Alexis Hetu33642272019-03-01 11:55:59 -05001761 (region.srcSubresource.aspectMask != region.dstSubresource.aspectMask))
1762 {
Ben Clayton00424c12019-03-17 17:29:30 +00001763 UNIMPLEMENTED("region");
Alexis Hetu33642272019-03-01 11:55:59 -05001764 }
1765
1766 if(region.dstOffsets[0].x > region.dstOffsets[1].x)
1767 {
Nicolas Capensb8c63932019-03-19 01:52:40 -04001768 std::swap(region.srcOffsets[0].x, region.srcOffsets[1].x);
1769 std::swap(region.dstOffsets[0].x, region.dstOffsets[1].x);
Alexis Hetu33642272019-03-01 11:55:59 -05001770 }
1771
1772 if(region.dstOffsets[0].y > region.dstOffsets[1].y)
1773 {
Nicolas Capensb8c63932019-03-19 01:52:40 -04001774 std::swap(region.srcOffsets[0].y, region.srcOffsets[1].y);
1775 std::swap(region.dstOffsets[0].y, region.dstOffsets[1].y);
Alexis Hetu33642272019-03-01 11:55:59 -05001776 }
1777
Nicolas Capensba873302019-05-16 11:25:27 -04001778 VkImageAspectFlagBits srcAspect = static_cast<VkImageAspectFlagBits>(region.srcSubresource.aspectMask);
1779 VkImageAspectFlagBits dstAspect = static_cast<VkImageAspectFlagBits>(region.dstSubresource.aspectMask);
1780 VkExtent3D srcExtent = src->getMipLevelExtent(srcAspect, region.srcSubresource.mipLevel);
Alexis Hetu33642272019-03-01 11:55:59 -05001781
1782 int32_t numSlices = (region.srcOffsets[1].z - region.srcOffsets[0].z);
1783 ASSERT(numSlices == (region.dstOffsets[1].z - region.dstOffsets[0].z));
1784
Alexis Hetue24bc662019-03-21 18:04:29 -04001785 float widthRatio = static_cast<float>(region.srcOffsets[1].x - region.srcOffsets[0].x) /
1786 static_cast<float>(region.dstOffsets[1].x - region.dstOffsets[0].x);
1787 float heightRatio = static_cast<float>(region.srcOffsets[1].y - region.srcOffsets[0].y) /
1788 static_cast<float>(region.dstOffsets[1].y - region.dstOffsets[0].y);
1789 float x0 = region.srcOffsets[0].x + (0.5f - region.dstOffsets[0].x) * widthRatio;
1790 float y0 = region.srcOffsets[0].y + (0.5f - region.dstOffsets[0].y) * heightRatio;
1791
Antonio Maiorano7738ed72019-10-21 11:29:41 -04001792 auto srcFormat = src->getFormat(srcAspect);
1793 auto dstFormat = dst->getFormat(dstAspect);
1794
Alexis Hetue24bc662019-03-21 18:04:29 -04001795 bool doFilter = (filter != VK_FILTER_NEAREST);
Antonio Maiorano7738ed72019-10-21 11:29:41 -04001796 bool allowSRGBConversion =
1797 doFilter ||
1798 (src->getSampleCountFlagBits() > 1) ||
1799 (srcFormat.isSRGBformat() != dstFormat.isSRGBformat());
1800
Alexis Hetu54ec7592019-03-20 14:37:16 -04001801 State state(src->getFormat(srcAspect), dst->getFormat(dstAspect), src->getSampleCountFlagBits(), dst->getSampleCountFlagBits(),
Antonio Maiorano7738ed72019-10-21 11:29:41 -04001802 Options{ doFilter, allowSRGBConversion });
Alexis Hetu33642272019-03-01 11:55:59 -05001803 state.clampToEdge = (region.srcOffsets[0].x < 0) ||
1804 (region.srcOffsets[0].y < 0) ||
1805 (static_cast<uint32_t>(region.srcOffsets[1].x) > srcExtent.width) ||
Alexis Hetue24bc662019-03-21 18:04:29 -04001806 (static_cast<uint32_t>(region.srcOffsets[1].y) > srcExtent.height) ||
1807 (doFilter && ((x0 < 0.5f) || (y0 < 0.5f)));
Alexis Hetu33642272019-03-01 11:55:59 -05001808
Ben Clayton6897e9b2019-07-16 17:27:27 +01001809 auto blitRoutine = getBlitRoutine(state);
Alexis Hetu33642272019-03-01 11:55:59 -05001810 if(!blitRoutine)
1811 {
1812 return;
1813 }
1814
1815 void(*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry();
1816
Alexis Hetue24bc662019-03-21 18:04:29 -04001817 BlitData data =
1818 {
1819 nullptr, // source
1820 nullptr, // dest
1821 src->rowPitchBytes(srcAspect, region.srcSubresource.mipLevel), // sPitchB
1822 dst->rowPitchBytes(dstAspect, region.dstSubresource.mipLevel), // dPitchB
Alexis Hetu54ec7592019-03-20 14:37:16 -04001823 src->slicePitchBytes(srcAspect, region.srcSubresource.mipLevel), // sSliceB
Alexis Hetue24bc662019-03-21 18:04:29 -04001824 dst->slicePitchBytes(dstAspect, region.dstSubresource.mipLevel), // dSliceB
Alexis Hetu33642272019-03-01 11:55:59 -05001825
Alexis Hetue24bc662019-03-21 18:04:29 -04001826 x0,
1827 y0,
1828 widthRatio,
1829 heightRatio,
Alexis Hetu33642272019-03-01 11:55:59 -05001830
Alexis Hetue24bc662019-03-21 18:04:29 -04001831 region.dstOffsets[0].y, // y0d
1832 region.dstOffsets[1].y, // y1d
1833 region.dstOffsets[0].x, // x0d
1834 region.dstOffsets[1].x, // x1d
Alexis Hetu33642272019-03-01 11:55:59 -05001835
Alexis Hetue24bc662019-03-21 18:04:29 -04001836 static_cast<int>(srcExtent.width), // sWidth
1837 static_cast<int>(srcExtent.height) // sHeight;
1838 };
Alexis Hetu33642272019-03-01 11:55:59 -05001839
1840 VkOffset3D srcOffset = { 0, 0, region.srcOffsets[0].z };
1841 VkOffset3D dstOffset = { 0, 0, region.dstOffsets[0].z };
1842
Alexis Hetu377077a2019-03-14 15:10:51 -04001843 VkImageSubresourceLayers srcSubresLayers =
Alexis Hetu33642272019-03-01 11:55:59 -05001844 {
Alexis Hetu377077a2019-03-14 15:10:51 -04001845 region.srcSubresource.aspectMask,
1846 region.srcSubresource.mipLevel,
1847 region.srcSubresource.baseArrayLayer,
1848 1
1849 };
1850
1851 VkImageSubresourceLayers dstSubresLayers =
1852 {
1853 region.dstSubresource.aspectMask,
1854 region.dstSubresource.mipLevel,
1855 region.dstSubresource.baseArrayLayer,
1856 1
1857 };
1858
1859 VkImageSubresourceRange srcSubresRange =
1860 {
1861 region.srcSubresource.aspectMask,
1862 region.srcSubresource.mipLevel,
1863 1,
1864 region.srcSubresource.baseArrayLayer,
1865 region.srcSubresource.layerCount
1866 };
1867
1868 uint32_t lastLayer = src->getLastLayerIndex(srcSubresRange);
1869
1870 for(; srcSubresLayers.baseArrayLayer <= lastLayer; srcSubresLayers.baseArrayLayer++, dstSubresLayers.baseArrayLayer++)
1871 {
1872 srcOffset.z = region.srcOffsets[0].z;
1873 dstOffset.z = region.dstOffsets[0].z;
1874
1875 for(int i = 0; i < numSlices; i++)
1876 {
1877 data.source = src->getTexelPointer(srcOffset, srcSubresLayers);
1878 data.dest = dst->getTexelPointer(dstOffset, dstSubresLayers);
1879
1880 ASSERT(data.source < src->end());
1881 ASSERT(data.dest < dst->end());
1882
1883 blitFunction(&data);
1884 srcOffset.z++;
1885 dstOffset.z++;
1886 }
Alexis Hetu33642272019-03-01 11:55:59 -05001887 }
1888 }
Alexis Hetub317d962019-04-29 14:07:31 -04001889
1890 void Blitter::computeCubeCorner(Pointer<Byte>& layer, Int& x0, Int& x1, Int& y0, Int& y1, Int& pitchB, const State& state)
1891 {
1892 int bytes = state.sourceFormat.bytes();
1893 bool quadLayout = state.sourceFormat.hasQuadLayout();
1894
Nicolas Capens88ac3672019-08-01 13:22:34 -04001895 Float4 c = readFloat4(layer + ComputeOffset(x0, y1, pitchB, bytes, quadLayout), state) +
1896 readFloat4(layer + ComputeOffset(x1, y0, pitchB, bytes, quadLayout), state) +
1897 readFloat4(layer + ComputeOffset(x1, y1, pitchB, bytes, quadLayout), state);
1898
1899 c *= Float4(1.0f / 3.0f);
1900
1901 write(c, layer + ComputeOffset(x0, y0, pitchB, bytes, quadLayout), state);
Alexis Hetub317d962019-04-29 14:07:31 -04001902 }
1903
Ben Clayton6897e9b2019-07-16 17:27:27 +01001904 std::shared_ptr<Routine> Blitter::generateCornerUpdate(const State& state)
Alexis Hetub317d962019-04-29 14:07:31 -04001905 {
1906 // Reading and writing from/to the same image
1907 ASSERT(state.sourceFormat == state.destFormat);
1908 ASSERT(state.srcSamples == state.destSamples);
1909
1910 if(state.srcSamples != 1)
1911 {
1912 UNIMPLEMENTED("state.srcSamples %d", state.srcSamples);
1913 }
1914
1915 Function<Void(Pointer<Byte>)> function;
1916 {
1917 Pointer<Byte> blit(function.Arg<0>());
1918
1919 Pointer<Byte> layers = *Pointer<Pointer<Byte>>(blit + OFFSET(CubeBorderData, layers));
1920 Int pitchB = *Pointer<Int>(blit + OFFSET(CubeBorderData, pitchB));
1921 UInt layerSize = *Pointer<Int>(blit + OFFSET(CubeBorderData, layerSize));
1922 UInt dim = *Pointer<Int>(blit + OFFSET(CubeBorderData, dim));
1923
1924 // Low Border, Low Pixel, High Border, High Pixel
1925 Int LB(-1), LP(0), HB(dim), HP(dim-1);
1926
Nicolas Capensbb575d42019-05-31 15:36:59 -04001927 for(int face = 0; face < 6; face++)
Alexis Hetub317d962019-04-29 14:07:31 -04001928 {
1929 computeCubeCorner(layers, LB, LP, LB, LP, pitchB, state);
1930 computeCubeCorner(layers, LB, LP, HB, HP, pitchB, state);
1931 computeCubeCorner(layers, HB, HP, LB, LP, pitchB, state);
1932 computeCubeCorner(layers, HB, HP, HB, HP, pitchB, state);
1933 layers = layers + layerSize;
1934 }
1935 }
1936
Ben Clayton056d6922019-07-04 12:41:13 +01001937 return function("BlitRoutine");
Alexis Hetub317d962019-04-29 14:07:31 -04001938 }
1939
1940 void Blitter::updateBorders(vk::Image* image, const VkImageSubresourceLayers& subresourceLayers)
1941 {
1942 if(image->getArrayLayers() < (subresourceLayers.baseArrayLayer + 6))
1943 {
1944 UNIMPLEMENTED("image->getArrayLayers() %d, baseArrayLayer %d",
1945 image->getArrayLayers(), subresourceLayers.baseArrayLayer);
1946 }
1947
1948 // From Vulkan 1.1 spec, section 11.5. Image Views:
1949 // "For cube and cube array image views, the layers of the image view starting
1950 // at baseArrayLayer correspond to faces in the order +X, -X, +Y, -Y, +Z, -Z."
1951 VkImageSubresourceLayers posX = subresourceLayers;
1952 posX.layerCount = 1;
1953 VkImageSubresourceLayers negX = posX;
1954 negX.baseArrayLayer++;
1955 VkImageSubresourceLayers posY = negX;
1956 posY.baseArrayLayer++;
1957 VkImageSubresourceLayers negY = posY;
1958 negY.baseArrayLayer++;
1959 VkImageSubresourceLayers posZ = negY;
1960 posZ.baseArrayLayer++;
1961 VkImageSubresourceLayers negZ = posZ;
1962 negZ.baseArrayLayer++;
1963
1964 // Copy top / bottom
1965 copyCubeEdge(image, posX, BOTTOM, negY, RIGHT);
1966 copyCubeEdge(image, posY, BOTTOM, posZ, TOP);
1967 copyCubeEdge(image, posZ, BOTTOM, negY, TOP);
1968 copyCubeEdge(image, negX, BOTTOM, negY, LEFT);
1969 copyCubeEdge(image, negY, BOTTOM, negZ, BOTTOM);
1970 copyCubeEdge(image, negZ, BOTTOM, negY, BOTTOM);
1971
1972 copyCubeEdge(image, posX, TOP, posY, RIGHT);
1973 copyCubeEdge(image, posY, TOP, negZ, TOP);
1974 copyCubeEdge(image, posZ, TOP, posY, BOTTOM);
1975 copyCubeEdge(image, negX, TOP, posY, LEFT);
1976 copyCubeEdge(image, negY, TOP, posZ, BOTTOM);
1977 copyCubeEdge(image, negZ, TOP, posY, TOP);
1978
1979 // Copy left / right
1980 copyCubeEdge(image, posX, RIGHT, negZ, LEFT);
1981 copyCubeEdge(image, posY, RIGHT, posX, TOP);
1982 copyCubeEdge(image, posZ, RIGHT, posX, LEFT);
1983 copyCubeEdge(image, negX, RIGHT, posZ, LEFT);
1984 copyCubeEdge(image, negY, RIGHT, posX, BOTTOM);
1985 copyCubeEdge(image, negZ, RIGHT, negX, LEFT);
1986
1987 copyCubeEdge(image, posX, LEFT, posZ, RIGHT);
1988 copyCubeEdge(image, posY, LEFT, negX, TOP);
1989 copyCubeEdge(image, posZ, LEFT, negX, RIGHT);
1990 copyCubeEdge(image, negX, LEFT, negZ, RIGHT);
1991 copyCubeEdge(image, negY, LEFT, negX, BOTTOM);
1992 copyCubeEdge(image, negZ, LEFT, posX, RIGHT);
1993
1994 // Compute corner colors
1995 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(subresourceLayers.aspectMask);
1996 vk::Format format = image->getFormat(aspect);
1997 VkSampleCountFlagBits samples = image->getSampleCountFlagBits();
Antonio Maiorano7738ed72019-10-21 11:29:41 -04001998 State state(format, format, samples, samples, Options{ 0xF });
Alexis Hetub317d962019-04-29 14:07:31 -04001999
2000 if(samples != VK_SAMPLE_COUNT_1_BIT)
2001 {
2002 UNIMPLEMENTED("Multi-sampled cube: %d samples", static_cast<int>(samples));
2003 }
2004
Ben Clayton6897e9b2019-07-16 17:27:27 +01002005 auto cornerUpdateRoutine = getCornerUpdateRoutine(state);
Alexis Hetub317d962019-04-29 14:07:31 -04002006 if(!cornerUpdateRoutine)
2007 {
Alexis Hetuf60a2d52019-05-09 14:16:05 -04002008 return;
Alexis Hetub317d962019-04-29 14:07:31 -04002009 }
2010
Alexis Hetub317d962019-04-29 14:07:31 -04002011 void(*cornerUpdateFunction)(const CubeBorderData *data) = (void(*)(const CubeBorderData*))cornerUpdateRoutine->getEntry();
2012
Nicolas Capensba873302019-05-16 11:25:27 -04002013 VkExtent3D extent = image->getMipLevelExtent(aspect, subresourceLayers.mipLevel);
Alexis Hetub317d962019-04-29 14:07:31 -04002014 CubeBorderData data =
2015 {
2016 image->getTexelPointer({ 0, 0, 0 }, posX),
2017 image->rowPitchBytes(aspect, subresourceLayers.mipLevel),
2018 static_cast<uint32_t>(image->getLayerSize(aspect)),
2019 extent.width
2020 };
2021 cornerUpdateFunction(&data);
2022 }
2023
2024 void Blitter::copyCubeEdge(vk::Image* image,
2025 const VkImageSubresourceLayers& dstSubresourceLayers, Edge dstEdge,
2026 const VkImageSubresourceLayers& srcSubresourceLayers, Edge srcEdge)
2027 {
2028 ASSERT(srcSubresourceLayers.aspectMask == dstSubresourceLayers.aspectMask);
2029 ASSERT(srcSubresourceLayers.mipLevel == dstSubresourceLayers.mipLevel);
2030 ASSERT(srcSubresourceLayers.baseArrayLayer != dstSubresourceLayers.baseArrayLayer);
2031 ASSERT(srcSubresourceLayers.layerCount == 1);
2032 ASSERT(dstSubresourceLayers.layerCount == 1);
2033
2034 // Figure out if the edges to be copied in reverse order respectively from one another
2035 // The copy should be reversed whenever the same edges are contiguous or if we're
2036 // copying top <-> right or bottom <-> left. This is explained by the layout, which is:
2037 //
2038 // | +y |
2039 // | -x | +z | +x | -z |
2040 // | -y |
2041
2042 bool reverse = (srcEdge == dstEdge) ||
2043 ((srcEdge == TOP) && (dstEdge == RIGHT)) ||
2044 ((srcEdge == RIGHT) && (dstEdge == TOP)) ||
2045 ((srcEdge == BOTTOM) && (dstEdge == LEFT)) ||
2046 ((srcEdge == LEFT) && (dstEdge == BOTTOM));
2047
2048 VkImageAspectFlagBits aspect = static_cast<VkImageAspectFlagBits>(srcSubresourceLayers.aspectMask);
2049 int bytes = image->getFormat(aspect).bytes();
2050 int pitchB = image->rowPitchBytes(aspect, srcSubresourceLayers.mipLevel);
2051
Nicolas Capensba873302019-05-16 11:25:27 -04002052 VkExtent3D extent = image->getMipLevelExtent(aspect, srcSubresourceLayers.mipLevel);
Alexis Hetub317d962019-04-29 14:07:31 -04002053 int w = extent.width;
2054 int h = extent.height;
2055 if(w != h)
2056 {
Nicolas Capens88ac3672019-08-01 13:22:34 -04002057 UNSUPPORTED("Cube doesn't have square faces : (%d, %d)", w, h);
Alexis Hetub317d962019-04-29 14:07:31 -04002058 }
2059
2060 // Src is expressed in the regular [0, width-1], [0, height-1] space
2061 bool srcHorizontal = ((srcEdge == TOP) || (srcEdge == BOTTOM));
2062 int srcDelta = srcHorizontal ? bytes : pitchB;
2063 VkOffset3D srcOffset = { (srcEdge == RIGHT) ? (w - 1) : 0, (srcEdge == BOTTOM) ? (h - 1) : 0, 0 };
2064
2065 // Dst contains borders, so it is expressed in the [-1, width], [-1, height] space
2066 bool dstHorizontal = ((dstEdge == TOP) || (dstEdge == BOTTOM));
2067 int dstDelta = (dstHorizontal ? bytes : pitchB) * (reverse ? -1 : 1);
2068 VkOffset3D dstOffset = { (dstEdge == RIGHT) ? w : -1, (dstEdge == BOTTOM) ? h : -1, 0 };
2069
2070 // Don't write in the corners
2071 if(dstHorizontal)
2072 {
2073 dstOffset.x += reverse ? w : 1;
2074 }
2075 else
2076 {
2077 dstOffset.y += reverse ? h : 1;
2078 }
2079
2080 const uint8_t* src = static_cast<const uint8_t*>(image->getTexelPointer(srcOffset, srcSubresourceLayers));
2081 uint8_t *dst = static_cast<uint8_t*>(image->getTexelPointer(dstOffset, dstSubresourceLayers));
2082 ASSERT((src < image->end()) && ((src + (w * srcDelta)) < image->end()));
2083 ASSERT((dst < image->end()) && ((dst + (w * dstDelta)) < image->end()));
2084
2085 for(int i = 0; i < w; ++i, dst += dstDelta, src += srcDelta)
2086 {
2087 memcpy(dst, src, bytes);
2088 }
2089 }
Nicolas Capens68a82382018-10-02 13:16:55 -04002090}