layers: Fix a bug in depth/stencil packing

Fixed incorrect size calculations for image<->buffer copies
due to special packing rules of depth and stencil formats.
Added test code to layer_validation_tests ImageBufferCopyTests to
exercise these checks.

Change-Id: I90446379ba8888bf91eda6567e18fa1aae9593b1
diff --git a/tests/layer_validation_tests.cpp b/tests/layer_validation_tests.cpp
index d293fbf..2ad0737 100644
--- a/tests/layer_validation_tests.cpp
+++ b/tests/layer_validation_tests.cpp
@@ -9050,8 +9050,6 @@
     // Now test stencil-only
     vkGetPhysicalDeviceFormatProperties(m_device->phy().handle(), VK_FORMAT_S8_UINT, &format_props);
     if (format_props.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) {
-        m_errorMonitor->SetDesiredFailureMsg(VK_DEBUG_REPORT_ERROR_BIT_EXT,
-                                             "Stencil-only image formats must have the VK_IMAGE_ASPECT_STENCIL_BIT set.");
         VkDepthStencilObj s_image(m_device);
         s_image.Init(m_device, 128, 128, VK_FORMAT_S8_UINT);
         ASSERT_TRUE(s_image.initialized());
@@ -9059,6 +9057,8 @@
         img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
         img_barrier.image = s_image.handle();
         // Use of COLOR aspect on depth image is error
+        m_errorMonitor->SetDesiredFailureMsg(VK_DEBUG_REPORT_ERROR_BIT_EXT,
+                                             "Stencil-only image formats must have the VK_IMAGE_ASPECT_STENCIL_BIT set.");
         img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
         vkCmdPipelineBarrier(m_commandBuffer->GetBufferHandle(), VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, 0,
                              0, nullptr, 0, nullptr, 1, &img_barrier);
@@ -15899,6 +15899,11 @@
     VkImageObj image_64k(m_device);        // 128^2 texels, 64k
     VkImageObj image_16k(m_device);        // 64^2 texels, 16k
     VkImageObj image_16k_depth(m_device);  // 64^2 texels, depth, 16k
+    VkImageObj ds_image_4D_1S(m_device);   // 256^2 texels, 512kb (256k depth, 64k stencil, 192k pack)
+    VkImageObj ds_image_3D_1S(m_device);   // 256^2 texels, 256kb (192k depth, 64k stencil)
+    VkImageObj ds_image_2D(m_device);      // 256^2 texels, 128k (128k depth)
+    VkImageObj ds_image_1S(m_device);      // 256^2 texels, 64k (64k stencil)
+
     image_64k.init(128, 128, VK_FORMAT_R8G8B8A8_UINT,
                    VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,
                    VK_IMAGE_TILING_OPTIMAL, 0);
@@ -15911,10 +15916,51 @@
     ASSERT_TRUE(image_16k.initialized());
     ASSERT_TRUE(image_16k_depth.initialized());
 
-    vk_testing::Buffer buffer_64k, buffer_16k;
+    // Verify all needed Depth/Stencil formats are supported
+    bool missing_ds_support = false;
+    VkFormatProperties props = {0, 0, 0};
+    vkGetPhysicalDeviceFormatProperties(m_device->phy().handle(), VK_FORMAT_D32_SFLOAT_S8_UINT, &props);
+    missing_ds_support |= (props.bufferFeatures == 0 && props.linearTilingFeatures == 0 && props.optimalTilingFeatures == 0);
+    vkGetPhysicalDeviceFormatProperties(m_device->phy().handle(), VK_FORMAT_D24_UNORM_S8_UINT, &props);
+    missing_ds_support |= (props.bufferFeatures == 0 && props.linearTilingFeatures == 0 && props.optimalTilingFeatures == 0);
+    vkGetPhysicalDeviceFormatProperties(m_device->phy().handle(), VK_FORMAT_D16_UNORM, &props);
+    missing_ds_support |= (props.bufferFeatures == 0 && props.linearTilingFeatures == 0 && props.optimalTilingFeatures == 0);
+    vkGetPhysicalDeviceFormatProperties(m_device->phy().handle(), VK_FORMAT_S8_UINT, &props);
+    missing_ds_support |= (props.bufferFeatures == 0 && props.linearTilingFeatures == 0 && props.optimalTilingFeatures == 0);
+
+    if (!missing_ds_support) {
+        ds_image_4D_1S.init(
+            256, 256, VK_FORMAT_D32_SFLOAT_S8_UINT,
+            VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+            VK_IMAGE_TILING_OPTIMAL, 0);
+        ASSERT_TRUE(ds_image_4D_1S.initialized());
+
+        ds_image_3D_1S.init(
+            256, 256, VK_FORMAT_D24_UNORM_S8_UINT,
+            VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+            VK_IMAGE_TILING_OPTIMAL, 0);
+        ASSERT_TRUE(ds_image_3D_1S.initialized());
+
+        ds_image_2D.init(
+            256, 256, VK_FORMAT_D16_UNORM,
+            VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+            VK_IMAGE_TILING_OPTIMAL, 0);
+        ASSERT_TRUE(ds_image_2D.initialized());
+
+        ds_image_1S.init(
+            256, 256, VK_FORMAT_S8_UINT,
+            VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+            VK_IMAGE_TILING_OPTIMAL, 0);
+        ASSERT_TRUE(ds_image_1S.initialized());
+    }
+
+    // Allocate buffers
+    vk_testing::Buffer buffer_256k, buffer_128k, buffer_64k, buffer_16k;
     VkMemoryPropertyFlags reqs = 0;
-    buffer_64k.init_as_src_and_dst(*m_device, 128 * 128 * 4, reqs);  // 64k
-    buffer_16k.init_as_src_and_dst(*m_device, 64 * 64 * 4, reqs);    // 16k
+    buffer_256k.init_as_src_and_dst(*m_device, 262144, reqs);  // 256k
+    buffer_128k.init_as_src_and_dst(*m_device, 131072, reqs);  // 128k
+    buffer_64k.init_as_src_and_dst(*m_device, 65536, reqs);    // 64k
+    buffer_16k.init_as_src_and_dst(*m_device, 16384, reqs);    // 16k
 
     VkBufferImageCopy region = {};
     region.bufferRowLength = 0;
@@ -16029,11 +16075,105 @@
                            buffer_16k.handle(), 1, &region);
     m_errorMonitor->VerifyFound();
 
+    // Test Depth/Stencil copies
+    if (missing_ds_support) {
+        printf("             Depth / Stencil formats unsupported - skipping D/S tests.\n");
+    } else {
+        VkBufferImageCopy ds_region = {};
+        ds_region.bufferOffset = 0;
+        ds_region.bufferRowLength = 0;
+        ds_region.bufferImageHeight = 0;
+        ds_region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
+        ds_region.imageSubresource.mipLevel = 0;
+        ds_region.imageSubresource.baseArrayLayer = 0;
+        ds_region.imageSubresource.layerCount = 1;
+        ds_region.imageOffset = {0, 0, 0};
+        ds_region.imageExtent = {256, 256, 1};
+
+        // Depth copies that should succeed
+        m_errorMonitor->ExpectSuccess();  // Extract 4b depth per texel, pack into 256k buffer
+        vkCmdCopyImageToBuffer(m_commandBuffer->GetBufferHandle(), ds_image_4D_1S.handle(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                               buffer_256k.handle(), 1, &ds_region);
+        m_errorMonitor->VerifyNotFound();
+
+        m_errorMonitor->ExpectSuccess();  // Extract 3b depth per texel, pack (loose) into 256k buffer
+        vkCmdCopyImageToBuffer(m_commandBuffer->GetBufferHandle(), ds_image_3D_1S.handle(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                               buffer_256k.handle(), 1, &ds_region);
+        m_errorMonitor->VerifyNotFound();
+
+        m_errorMonitor->ExpectSuccess();  // Copy 2b depth per texel, into 128k buffer
+        vkCmdCopyImageToBuffer(m_commandBuffer->GetBufferHandle(), ds_image_2D.handle(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                               buffer_128k.handle(), 1, &ds_region);
+        m_errorMonitor->VerifyNotFound();
+
+        // Depth copies that should fail
+        ds_region.bufferOffset = 4;
+        m_errorMonitor->SetDesiredFailureMsg(VK_DEBUG_REPORT_ERROR_BIT_EXT,
+                                             VALIDATION_ERROR_01246);  // Extract 4b depth per texel, pack into 256k buffer
+        vkCmdCopyImageToBuffer(m_commandBuffer->GetBufferHandle(), ds_image_4D_1S.handle(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                               buffer_256k.handle(), 1, &ds_region);
+        m_errorMonitor->VerifyFound();
+
+        m_errorMonitor->SetDesiredFailureMsg(VK_DEBUG_REPORT_ERROR_BIT_EXT,
+                                             VALIDATION_ERROR_01246);  // Extract 3b depth per texel, pack (loose) into 256k buffer
+        vkCmdCopyImageToBuffer(m_commandBuffer->GetBufferHandle(), ds_image_3D_1S.handle(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                               buffer_256k.handle(), 1, &ds_region);
+        m_errorMonitor->VerifyFound();
+
+        m_errorMonitor->SetDesiredFailureMsg(VK_DEBUG_REPORT_ERROR_BIT_EXT,
+                                             VALIDATION_ERROR_01246);  // Copy 2b depth per texel, into 128k buffer
+        vkCmdCopyImageToBuffer(m_commandBuffer->GetBufferHandle(), ds_image_2D.handle(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                               buffer_128k.handle(), 1, &ds_region);
+        m_errorMonitor->VerifyFound();
+
+        // Stencil copies that should succeed
+        ds_region.bufferOffset = 0;
+        ds_region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
+        m_errorMonitor->ExpectSuccess();  // Extract 1b stencil per texel, pack into 64k buffer
+        vkCmdCopyImageToBuffer(m_commandBuffer->GetBufferHandle(), ds_image_4D_1S.handle(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                               buffer_64k.handle(), 1, &ds_region);
+        m_errorMonitor->VerifyNotFound();
+
+        m_errorMonitor->ExpectSuccess();  // Extract 1b stencil per texel, pack into 64k buffer
+        vkCmdCopyImageToBuffer(m_commandBuffer->GetBufferHandle(), ds_image_3D_1S.handle(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                               buffer_64k.handle(), 1, &ds_region);
+        m_errorMonitor->VerifyNotFound();
+
+        m_errorMonitor->ExpectSuccess();  // Copy 1b depth per texel, into 64k buffer
+        vkCmdCopyImageToBuffer(m_commandBuffer->GetBufferHandle(), ds_image_1S.handle(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                               buffer_64k.handle(), 1, &ds_region);
+        m_errorMonitor->VerifyNotFound();
+
+        // Stencil copies that should fail
+        m_errorMonitor->SetDesiredFailureMsg(VK_DEBUG_REPORT_ERROR_BIT_EXT,
+                                             VALIDATION_ERROR_01246);  // Extract 1b stencil per texel, pack into 64k buffer
+        vkCmdCopyImageToBuffer(m_commandBuffer->GetBufferHandle(), ds_image_4D_1S.handle(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                               buffer_16k.handle(), 1, &ds_region);
+        m_errorMonitor->VerifyFound();
+
+        m_errorMonitor->SetDesiredFailureMsg(VK_DEBUG_REPORT_ERROR_BIT_EXT,
+                                             VALIDATION_ERROR_01246);  // Extract 1b stencil per texel, pack into 64k buffer
+        ds_region.bufferRowLength = 260;
+        vkCmdCopyImageToBuffer(m_commandBuffer->GetBufferHandle(), ds_image_3D_1S.handle(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                               buffer_64k.handle(), 1, &ds_region);
+        m_errorMonitor->VerifyFound();
+
+        ds_region.bufferRowLength = 0;
+        ds_region.bufferOffset = 4;
+        m_errorMonitor->SetDesiredFailureMsg(VK_DEBUG_REPORT_ERROR_BIT_EXT,
+                                             VALIDATION_ERROR_01246);  // Copy 1b depth per texel, into 64k buffer
+        vkCmdCopyImageToBuffer(m_commandBuffer->GetBufferHandle(), ds_image_1S.handle(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                               buffer_64k.handle(), 1, &ds_region);
+        m_errorMonitor->VerifyFound();
+    }
+
     // Test compressed formats, if supported
     VkPhysicalDeviceFeatures device_features;
     ASSERT_NO_FATAL_FAILURE(GetPhysicalDeviceFeatures(&device_features));
-    if (device_features.textureCompressionBC || device_features.textureCompressionETC2 ||
-        device_features.textureCompressionASTC_LDR) {
+    if (!(device_features.textureCompressionBC || device_features.textureCompressionETC2 ||
+          device_features.textureCompressionASTC_LDR)) {
+        printf("             No compressed formats supported - block compression tests skipped.\n");
+    } else {
         VkImageObj image_16k_4x4comp(m_device);  // 128^2 texels as 32^2 compressed (4x4) blocks, 16k
         if (device_features.textureCompressionBC) {
             image_16k_4x4comp.init(32, 32, VK_FORMAT_BC5_UNORM_BLOCK, VK_IMAGE_USAGE_TRANSFER_SRC_BIT, VK_IMAGE_TILING_OPTIMAL, 0);
@@ -18253,7 +18393,7 @@
     alloc_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
     alloc_info.pNext = NULL;
     alloc_info.memoryTypeIndex = 0;
-
+    
     // Ensure memory is big enough for both bindings
     alloc_info.allocationSize = 0x10000;