bug-14715: DrawIndirect fix

This patch contains fixes to structure layout for draws.
These structures are accessed by HW and must be in a specific order.
This change also includes reordering of the parameters for
vkCmdDraw and vkCmdDrawIndexed.
diff --git a/demos/cube.c b/demos/cube.c
index aba5348..35a2865 100644
--- a/demos/cube.c
+++ b/demos/cube.c
@@ -572,7 +572,7 @@
     vkCmdSetStencilWriteMask(cmd_buf, VK_STENCIL_FACE_FRONT_BIT | VK_STENCIL_FACE_BACK_BIT, 0xff);
     vkCmdSetStencilReference(cmd_buf, VK_STENCIL_FACE_FRONT_BIT | VK_STENCIL_FACE_BACK_BIT, 0);
 
-    vkCmdDraw(cmd_buf, 0, 12 * 3, 0, 1);
+    vkCmdDraw(cmd_buf, 12 * 3, 1, 0, 0);
     vkCmdEndRenderPass(cmd_buf);
 
     err = vkEndCommandBuffer(cmd_buf);
diff --git a/demos/tri.c b/demos/tri.c
index f64c824..f3749cb 100644
--- a/demos/tri.c
+++ b/demos/tri.c
@@ -418,7 +418,7 @@
     VkDeviceSize offsets[1] = {0};
     vkCmdBindVertexBuffers(demo->draw_cmd, VERTEX_BUFFER_BIND_ID, 1, &demo->vertices.buf, offsets);
 
-    vkCmdDraw(demo->draw_cmd, 0, 3, 0, 1);
+    vkCmdDraw(demo->draw_cmd, 3, 1, 0, 0);
     vkCmdEndRenderPass(demo->draw_cmd);
 
     err = vkEndCommandBuffer(demo->draw_cmd);
diff --git a/icd/intel/cmd_pipeline.c b/icd/intel/cmd_pipeline.c
index 05ddd42..239ca52 100644
--- a/icd/intel/cmd_pipeline.c
+++ b/icd/intel/cmd_pipeline.c
@@ -3727,11 +3727,11 @@
 }
 
 ICD_EXPORT void VKAPI vkCmdDraw(
-    VkCmdBuffer                              cmdBuffer,
-    uint32_t                                    firstVertex,
+    VkCmdBuffer                                 cmdBuffer,
     uint32_t                                    vertexCount,
-    uint32_t                                    firstInstance,
-    uint32_t                                    instanceCount)
+    uint32_t                                    instanceCount,
+    uint32_t                                    firstVertex,
+    uint32_t                                    firstInstance)
 {
     struct intel_cmd *cmd = intel_cmd(cmdBuffer);
 
@@ -3740,12 +3740,12 @@
 }
 
 ICD_EXPORT void VKAPI vkCmdDrawIndexed(
-    VkCmdBuffer                              cmdBuffer,
-    uint32_t                                    firstIndex,
+    VkCmdBuffer                                 cmdBuffer,
     uint32_t                                    indexCount,
+    uint32_t                                    instanceCount,
+    uint32_t                                    firstIndex,
     int32_t                                     vertexOffset,
-    uint32_t                                    firstInstance,
-    uint32_t                                    instanceCount)
+    uint32_t                                    firstInstance)
 {
     struct intel_cmd *cmd = intel_cmd(cmdBuffer);
 
diff --git a/icd/nulldrv/nulldrv.c b/icd/nulldrv/nulldrv.c
index 828c427..5b71cfa 100644
--- a/icd/nulldrv/nulldrv.c
+++ b/icd/nulldrv/nulldrv.c
@@ -1155,22 +1155,22 @@
 }
 
 ICD_EXPORT void VKAPI vkCmdDraw(
-    VkCmdBuffer                              cmdBuffer,
-    uint32_t                                    firstVertex,
+    VkCmdBuffer                                 cmdBuffer,
     uint32_t                                    vertexCount,
-    uint32_t                                    firstInstance,
-    uint32_t                                    instanceCount)
+    uint32_t                                    instanceCount,
+    uint32_t                                    firstVertex,
+    uint32_t                                    firstInstance)
 {
     NULLDRV_LOG_FUNC;
 }
 
 ICD_EXPORT void VKAPI vkCmdDrawIndexed(
     VkCmdBuffer                              cmdBuffer,
-    uint32_t                                    firstIndex,
     uint32_t                                    indexCount,
+    uint32_t                                    instanceCount,
+    uint32_t                                    firstIndex,
     int32_t                                     vertexOffset,
-    uint32_t                                    firstInstance,
-    uint32_t                                    instanceCount)
+    uint32_t                                    firstInstance)
 {
     NULLDRV_LOG_FUNC;
 }
diff --git a/include/vulkan.h b/include/vulkan.h
index 8be2b4f..03bcefe 100644
--- a/include/vulkan.h
+++ b/include/vulkan.h
@@ -2013,11 +2013,6 @@
     uint32_t                                    stencil;
 } VkClearDepthStencilValue;
 
-typedef union {
-    VkClearColorValue                           color;
-    VkClearDepthStencilValue                    depthStencil;
-} VkClearValue;
-
 typedef struct {
     VkOffset3D                                  offset;
     VkExtent3D                                  extent;
@@ -2031,6 +2026,11 @@
     VkExtent3D                                  extent;
 } VkImageResolve;
 
+typedef union {
+    VkClearColorValue                           color;
+    VkClearDepthStencilValue                    depthStencil;
+} VkClearValue;
+
 typedef struct {
     VkStructureType                             sType;
     const void*                                 pNext;
@@ -2060,18 +2060,18 @@
 } VkDispatchIndirectCmd;
 
 typedef struct {
-    uint32_t                                    firstIndex;
     uint32_t                                    indexCount;
+    uint32_t                                    instanceCount;
+    uint32_t                                    firstIndex;
     int32_t                                     vertexOffset;
     uint32_t                                    firstInstance;
-    uint32_t                                    instanceCount;
 } VkDrawIndexedIndirectCmd;
 
 typedef struct {
-    uint32_t                                    firstVertex;
     uint32_t                                    vertexCount;
-    uint32_t                                    firstInstance;
     uint32_t                                    instanceCount;
+    uint32_t                                    firstVertex;
+    uint32_t                                    firstInstance;
 } VkDrawIndirectCmd;
 
 typedef struct {
@@ -2208,8 +2208,8 @@
 typedef void (VKAPI *PFN_vkCmdBindDescriptorSets)(VkCmdBuffer cmdBuffer, VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout layout, uint32_t firstSet, uint32_t setCount, const VkDescriptorSet* pDescriptorSets, uint32_t dynamicOffsetCount, const uint32_t* pDynamicOffsets);
 typedef void (VKAPI *PFN_vkCmdBindIndexBuffer)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, VkIndexType indexType);
 typedef void (VKAPI *PFN_vkCmdBindVertexBuffers)(VkCmdBuffer cmdBuffer, uint32_t startBinding, uint32_t bindingCount, const VkBuffer* pBuffers, const VkDeviceSize* pOffsets);
-typedef void (VKAPI *PFN_vkCmdDraw)(VkCmdBuffer cmdBuffer, uint32_t firstVertex, uint32_t vertexCount, uint32_t firstInstance, uint32_t instanceCount);
-typedef void (VKAPI *PFN_vkCmdDrawIndexed)(VkCmdBuffer cmdBuffer, uint32_t firstIndex, uint32_t indexCount, int32_t vertexOffset, uint32_t firstInstance, uint32_t instanceCount);
+typedef void (VKAPI *PFN_vkCmdDraw)(VkCmdBuffer cmdBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance);
+typedef void (VKAPI *PFN_vkCmdDrawIndexed)(VkCmdBuffer cmdBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance);
 typedef void (VKAPI *PFN_vkCmdDrawIndirect)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride);
 typedef void (VKAPI *PFN_vkCmdDrawIndexedIndirect)(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride);
 typedef void (VKAPI *PFN_vkCmdDispatch)(VkCmdBuffer cmdBuffer, uint32_t x, uint32_t y, uint32_t z);
@@ -2806,18 +2806,18 @@
 
 void VKAPI vkCmdDraw(
     VkCmdBuffer                                 cmdBuffer,
-    uint32_t                                    firstVertex,
     uint32_t                                    vertexCount,
-    uint32_t                                    firstInstance,
-    uint32_t                                    instanceCount);
+    uint32_t                                    instanceCount,
+    uint32_t                                    firstVertex,
+    uint32_t                                    firstInstance);
 
 void VKAPI vkCmdDrawIndexed(
     VkCmdBuffer                                 cmdBuffer,
-    uint32_t                                    firstIndex,
     uint32_t                                    indexCount,
+    uint32_t                                    instanceCount,
+    uint32_t                                    firstIndex,
     int32_t                                     vertexOffset,
-    uint32_t                                    firstInstance,
-    uint32_t                                    instanceCount);
+    uint32_t                                    firstInstance);
 
 void VKAPI vkCmdDrawIndirect(
     VkCmdBuffer                                 cmdBuffer,
diff --git a/layers/draw_state.cpp b/layers/draw_state.cpp
index 10fa70c..35f6035 100644
--- a/layers/draw_state.cpp
+++ b/layers/draw_state.cpp
@@ -2325,7 +2325,7 @@
         get_dispatch_table(draw_state_device_table_map, cmdBuffer)->CmdBindVertexBuffers(cmdBuffer, startBinding, bindingCount, pBuffers, pOffsets);
 }
 
-VK_LAYER_EXPORT void VKAPI vkCmdDraw(VkCmdBuffer cmdBuffer, uint32_t firstVertex, uint32_t vertexCount, uint32_t firstInstance, uint32_t instanceCount)
+VK_LAYER_EXPORT void VKAPI vkCmdDraw(VkCmdBuffer cmdBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance)
 {
     VkBool32 skipCall = VK_FALSE;
     GLOBAL_CB_NODE* pCB = getCBNode(cmdBuffer);
@@ -2352,10 +2352,10 @@
         }
     }
     if (VK_FALSE == skipCall)
-        get_dispatch_table(draw_state_device_table_map, cmdBuffer)->CmdDraw(cmdBuffer, firstVertex, vertexCount, firstInstance, instanceCount);
+        get_dispatch_table(draw_state_device_table_map, cmdBuffer)->CmdDraw(cmdBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
 }
 
-VK_LAYER_EXPORT void VKAPI vkCmdDrawIndexed(VkCmdBuffer cmdBuffer, uint32_t firstIndex, uint32_t indexCount, int32_t vertexOffset, uint32_t firstInstance, uint32_t instanceCount)
+VK_LAYER_EXPORT void VKAPI vkCmdDrawIndexed(VkCmdBuffer cmdBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance)
 {
     GLOBAL_CB_NODE* pCB = getCBNode(cmdBuffer);
     VkBool32 skipCall = VK_FALSE;
@@ -2376,7 +2376,7 @@
         }
     }
     if (VK_FALSE == skipCall)
-        get_dispatch_table(draw_state_device_table_map, cmdBuffer)->CmdDrawIndexed(cmdBuffer, firstIndex, indexCount, vertexOffset, firstInstance, instanceCount);
+        get_dispatch_table(draw_state_device_table_map, cmdBuffer)->CmdDrawIndexed(cmdBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance);
 }
 
 VK_LAYER_EXPORT void VKAPI vkCmdDrawIndirect(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride)
diff --git a/layers/param_checker.cpp b/layers/param_checker.cpp
index 2ffeefa..0b6909d 100644
--- a/layers/param_checker.cpp
+++ b/layers/param_checker.cpp
@@ -5688,12 +5688,12 @@
 
 VK_LAYER_EXPORT void VKAPI vkCmdDraw(
     VkCmdBuffer cmdBuffer,
-    uint32_t firstVertex,
     uint32_t vertexCount,
-    uint32_t firstInstance,
-    uint32_t instanceCount)
+    uint32_t instanceCount,
+    uint32_t firstVertex,
+    uint32_t firstInstance)
 {
-    get_dispatch_table(pc_device_table_map, cmdBuffer)->CmdDraw(cmdBuffer, firstVertex, vertexCount, firstInstance, instanceCount);
+    get_dispatch_table(pc_device_table_map, cmdBuffer)->CmdDraw(cmdBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
 
     PostCmdDraw(cmdBuffer, firstVertex, vertexCount, firstInstance, instanceCount);
 }
@@ -5717,13 +5717,13 @@
 
 VK_LAYER_EXPORT void VKAPI vkCmdDrawIndexed(
     VkCmdBuffer cmdBuffer,
-    uint32_t firstIndex,
     uint32_t indexCount,
+    uint32_t instanceCount,
+    uint32_t firstIndex,
     int32_t vertexOffset,
-    uint32_t firstInstance,
-    uint32_t instanceCount)
+    uint32_t firstInstance)
 {
-    get_dispatch_table(pc_device_table_map, cmdBuffer)->CmdDrawIndexed(cmdBuffer, firstIndex, indexCount, vertexOffset, firstInstance, instanceCount);
+    get_dispatch_table(pc_device_table_map, cmdBuffer)->CmdDrawIndexed(cmdBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance);
 
     PostCmdDrawIndexed(cmdBuffer, firstIndex, indexCount, vertexOffset, firstInstance, instanceCount);
 }
diff --git a/loader/trampoline.c b/loader/trampoline.c
index 5074f3b..8ae8135 100644
--- a/loader/trampoline.c
+++ b/loader/trampoline.c
@@ -1220,22 +1220,22 @@
     disp->CmdBindVertexBuffers(cmdBuffer, startBinding, bindingCount, pBuffers, pOffsets);
 }
 
-LOADER_EXPORT void VKAPI vkCmdDraw(VkCmdBuffer cmdBuffer, uint32_t firstVertex, uint32_t vertexCount, uint32_t firstInstance, uint32_t instanceCount)
+LOADER_EXPORT void VKAPI vkCmdDraw(VkCmdBuffer cmdBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance)
 {
     const VkLayerDispatchTable *disp;
 
     disp = loader_get_dispatch(cmdBuffer);
 
-    disp->CmdDraw(cmdBuffer, firstVertex, vertexCount, firstInstance, instanceCount);
+    disp->CmdDraw(cmdBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
 }
 
-LOADER_EXPORT void VKAPI vkCmdDrawIndexed(VkCmdBuffer cmdBuffer, uint32_t firstIndex, uint32_t indexCount, int32_t vertexOffset, uint32_t firstInstance, uint32_t instanceCount)
+LOADER_EXPORT void VKAPI vkCmdDrawIndexed(VkCmdBuffer cmdBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance)
 {
     const VkLayerDispatchTable *disp;
 
     disp = loader_get_dispatch(cmdBuffer);
 
-    disp->CmdDrawIndexed(cmdBuffer, firstIndex, indexCount, vertexOffset, firstInstance, instanceCount);
+    disp->CmdDrawIndexed(cmdBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance);
 }
 
 LOADER_EXPORT void VKAPI vkCmdDrawIndirect(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride)
diff --git a/tests/layer_validation_tests.cpp b/tests/layer_validation_tests.cpp
index ef178ae..b3aceb4 100644
--- a/tests/layer_validation_tests.cpp
+++ b/tests/layer_validation_tests.cpp
@@ -155,10 +155,10 @@
     /* Convenience functions that use built-in command buffer */
     VkResult BeginCommandBuffer() { return BeginCommandBuffer(*m_cmdBuffer); }
     VkResult EndCommandBuffer() { return EndCommandBuffer(*m_cmdBuffer); }
-    void Draw(uint32_t firstVertex, uint32_t vertexCount, uint32_t firstInstance, uint32_t instanceCount)
-        { m_cmdBuffer->Draw(firstVertex, vertexCount, firstInstance, instanceCount); }
-    void DrawIndexed(uint32_t firstVertex, uint32_t vertexCount, int32_t vertexOffset, uint32_t firstInstance, uint32_t instanceCount)
-        { m_cmdBuffer->DrawIndexed(firstVertex, vertexCount, vertexOffset,firstInstance, instanceCount); }
+    void Draw(uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance)
+        { m_cmdBuffer->Draw(vertexCount, instanceCount, firstVertex, firstInstance); }
+    void DrawIndexed(uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance)
+        { m_cmdBuffer->DrawIndexed(indexCount, instanceCount, firstIndex, vertexOffset, firstInstance); }
     void QueueCommandBuffer() { m_cmdBuffer->QueueCommandBuffer(); }
     void QueueCommandBuffer(const VkFence& fence) { m_cmdBuffer->QueueCommandBuffer(fence); }
     void BindVertexBuffer(VkConstantBufferObj *vertexBuffer, VkDeviceSize offset, uint32_t binding)
@@ -301,7 +301,7 @@
     GenericDrawPreparation(pipelineobj, descriptorSet, failMask);
 
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -2584,7 +2584,7 @@
     static const float vbo_data[3] = {1.f, 0.f, 1.f};
     VkConstantBufferObj vbo(m_device, sizeof(vbo_data), sizeof(float), (const void*) &vbo_data);
     BindVertexBuffer(&vbo, (VkDeviceSize)0, 1); // VBO idx 1, but no VBO in PSO
-    Draw(0, 1, 0, 0);
+    Draw(1, 0, 0, 0);
 
     msgFlags = m_errorMonitor->GetState(&msgString);
     ASSERT_TRUE(0 != (msgFlags & VK_DBG_REPORT_ERROR_BIT)) << "Did not receive error after binding Vtx Buffer w/o VBO attached to PSO.";
diff --git a/tests/render_tests.cpp b/tests/render_tests.cpp
index 95f0358..9db980f 100644
--- a/tests/render_tests.cpp
+++ b/tests/render_tests.cpp
@@ -280,10 +280,10 @@
     VkResult BeginCommandBuffer() { return BeginCommandBuffer(*m_cmdBuffer); }
     VkResult BeginCommandBuffer(VkCmdBufferBeginInfo *beginInfo) { return BeginCommandBuffer(*m_cmdBuffer, beginInfo); }
     VkResult EndCommandBuffer() { return EndCommandBuffer(*m_cmdBuffer); }
-    void Draw(uint32_t firstVertex, uint32_t vertexCount, uint32_t firstInstance, uint32_t instanceCount)
-        { m_cmdBuffer->Draw(firstVertex, vertexCount, firstInstance, instanceCount); }
-    void DrawIndexed(uint32_t firstVertex, uint32_t vertexCount, int32_t vertexOffset, uint32_t firstInstance, uint32_t instanceCount)
-        { m_cmdBuffer->DrawIndexed(firstVertex, vertexCount, vertexOffset,firstInstance, instanceCount); }
+    void Draw(uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance)
+        { m_cmdBuffer->Draw(vertexCount, instanceCount, firstVertex, firstInstance); }
+    void DrawIndexed(uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance)
+        { m_cmdBuffer->DrawIndexed(indexCount, instanceCount, firstIndex, vertexOffset, firstInstance); }
     void QueueCommandBuffer() { m_cmdBuffer->QueueCommandBuffer(); }
     void RotateTriangleVSUniform(glm::mat4 Projection, glm::mat4 View, glm::mat4 Model,
                                  VkConstantBufferObj *constantBuffer)
@@ -526,7 +526,7 @@
 #endif
 
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     ASSERT_VK_SUCCESS(EndCommandBuffer());
@@ -797,7 +797,7 @@
     BindVertexBuffer(&meshBuffer, 0, 0);
 
     // render two triangles
-    Draw(0, 6, 0, 1);
+    Draw(6, 1, 0, 0);
 
     // finalize recording of the command buffer
     ASSERT_VK_SUCCESS(EndCommandBuffer());
@@ -884,7 +884,7 @@
 #endif
 
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     ASSERT_VK_SUCCESS(EndCommandBuffer());
@@ -991,7 +991,7 @@
     BindIndexBuffer(&indexBuffer, 0);
 
     // render two triangles
-    DrawIndexed(0, 6, 0, 0, 1);
+    DrawIndexed(6, 1, 0, 0, 0);
 
     // finalize recording of the command buffer
     ASSERT_VK_SUCCESS(EndCommandBuffer());
@@ -1079,7 +1079,7 @@
 #endif
 
     // render triangle
-    Draw(0, 6, 0, 1);
+    Draw(6, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -1165,7 +1165,7 @@
     pDSDumpDot((char*)"triTest2.dot");
 #endif
     // render two triangles
-    Draw(0, 6, 0, 1);
+    Draw(6, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -1265,7 +1265,7 @@
 #endif
 
     // render two triangles
-    Draw(0, 6, 0, 1);
+    Draw(6, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -1354,7 +1354,7 @@
 #endif
 
     // render two triangles
-    Draw(0, 6, 0, 1);
+    Draw(6, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -1430,7 +1430,7 @@
 #endif
 
     // render two triangles
-    Draw(0, 6, 0, 1);
+    Draw(6, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -1506,7 +1506,7 @@
 #endif
 
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -1595,7 +1595,7 @@
 #endif
 
     // render two triangles
-    Draw(0, 6, 0, 1);
+    Draw(6, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -1698,7 +1698,7 @@
 #endif
 
     // render two triangles
-    Draw(0, 6, 0, 1);
+    Draw(6, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -1791,7 +1791,7 @@
 #endif
 
     // render two triangles
-    Draw(0, 6, 0, 1);
+    Draw(6, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -1907,7 +1907,7 @@
 #endif
 
     // render triangles
-    Draw(0, 36, 0, 1);
+    Draw(36, 1, 0, 0);
 
 
     // finalize recording of the command buffer
@@ -1978,7 +1978,7 @@
 #endif
 
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -2048,7 +2048,7 @@
     pDSDumpDot((char*)"triTest2.dot");
 #endif
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -2126,7 +2126,7 @@
     pDSDumpDot((char*)"triTest2.dot");
 #endif
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -2194,7 +2194,7 @@
     pDSDumpDot((char*)"triTest2.dot");
 #endif
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -2281,7 +2281,7 @@
     pDSDumpDot((char*)"triTest2.dot");
 #endif
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -2361,7 +2361,7 @@
     pDSDumpDot((char*)"triTest2.dot");
 #endif
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -2460,7 +2460,7 @@
     pDSDumpDot((char*)"triTest2.dot");
 #endif
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -2554,7 +2554,7 @@
     pDSDumpDot((char*)"triTest2.dot");
 #endif
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -2672,7 +2672,7 @@
     pDSDumpDot((char*)"triTest2.dot");
 #endif
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -2799,7 +2799,7 @@
     pDSDumpDot((char*)"triTest2.dot");
 #endif
     // render triangle
-    Draw(0, num_verts, 0, 1);
+    Draw(num_verts, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -2912,7 +2912,7 @@
     pDSDumpDot((char*)"triTest2.dot");
 #endif
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -3018,7 +3018,7 @@
     pDSDumpDot((char*)"triTest2.dot");
 #endif
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -3267,7 +3267,7 @@
     pDSDumpDot((char*)"triTest2.dot");
 #endif
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -3374,7 +3374,7 @@
     GenericDrawPreparation(pipelineobj, descriptorSet);
 
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -3478,7 +3478,7 @@
     GenericDrawPreparation(pipelineobj, descriptorSet);
 
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -3829,7 +3829,7 @@
     GenericDrawPreparation(pipelineobj, descriptorSet);
 
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -3954,7 +3954,7 @@
     GenericDrawPreparation(pipelineobj, descriptorSet);
 
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
@@ -4089,7 +4089,7 @@
     GenericDrawPreparation(pipelineobj, descriptorSet);
 
     // render triangle
-    Draw(0, 3, 0, 1);
+    Draw(3, 1, 0, 0);
 
     // finalize recording of the command buffer
     EndCommandBuffer();
diff --git a/tests/vkrenderframework.cpp b/tests/vkrenderframework.cpp
index 6e7df3d..d6cf64b 100644
--- a/tests/vkrenderframework.cpp
+++ b/tests/vkrenderframework.cpp
@@ -1505,14 +1505,14 @@
     m_renderTargets.push_back(renderTarget);
 }
 
-void VkCommandBufferObj::DrawIndexed(uint32_t firstIndex, uint32_t indexCount, int32_t vertexOffset, uint32_t firstInstance, uint32_t instanceCount)
+void VkCommandBufferObj::DrawIndexed(uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance)
 {
-    vkCmdDrawIndexed(handle(), firstIndex, indexCount, vertexOffset, firstInstance, instanceCount);
+    vkCmdDrawIndexed(handle(), indexCount, instanceCount, firstIndex, vertexOffset, firstInstance);
 }
 
-void VkCommandBufferObj::Draw(uint32_t firstVertex, uint32_t vertexCount, uint32_t firstInstance, uint32_t instanceCount)
+void VkCommandBufferObj::Draw(uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance)
 {
-    vkCmdDraw(handle(), firstVertex, vertexCount, firstInstance, instanceCount);
+    vkCmdDraw(handle(), vertexCount, instanceCount, firstVertex, firstInstance);
 }
 
 void VkCommandBufferObj::QueueCommandBuffer()
diff --git a/tests/vkrenderframework.h b/tests/vkrenderframework.h
index d3136f2..4bc6d42 100644
--- a/tests/vkrenderframework.h
+++ b/tests/vkrenderframework.h
@@ -185,8 +185,8 @@
     void BeginRenderPass(const VkRenderPassBeginInfo &info);
     void EndRenderPass();
     void FillBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize fill_size, uint32_t data);
-    void Draw(uint32_t firstVertex, uint32_t vertexCount, uint32_t firstInstance, uint32_t instanceCount);
-    void DrawIndexed(uint32_t firstIndex, uint32_t indexCount, int32_t vertexOffset, uint32_t firstInstance, uint32_t instanceCount);
+    void Draw(uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance);
+    void DrawIndexed(uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance);
     void QueueCommandBuffer();
     void QueueCommandBuffer(VkFence fence);
     void SetViewport(uint32_t viewportCount, const VkViewport* pViewports);
diff --git a/vktrace/src/vktrace_extensions/vktracevulkan/layers/vktrace_snapshot.c b/vktrace/src/vktrace_extensions/vktracevulkan/layers/vktrace_snapshot.c
index 9a8bd19..a001193 100644
--- a/vktrace/src/vktrace_extensions/vktracevulkan/layers/vktrace_snapshot.c
+++ b/vktrace/src/vktrace_extensions/vktracevulkan/layers/vktrace_snapshot.c
@@ -1296,12 +1296,12 @@
     nextTable.CmdDraw(cmdBuffer, firstVertex, vertexCount, firstInstance, instanceCount);
 }
 
-VK_LAYER_EXPORT void VKAPI vkCmdDrawIndexed(VkCmdBuffer cmdBuffer, uint32_t firstIndex, uint32_t indexCount, int32_t vertexOffset, uint32_t firstInstance, uint32_t instanceCount)
+VK_LAYER_EXPORT void VKAPI vkCmdDrawIndexed(VkCmdBuffer cmdBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance)
 {
     loader_platform_thread_lock_mutex(&objLock);
     ll_increment_use_count(cmdBuffer, VK_OBJECT_TYPE_COMMAND_BUFFER);
     loader_platform_thread_unlock_mutex(&objLock);
-    nextTable.CmdDrawIndexed(cmdBuffer, firstIndex, indexCount, vertexOffset, firstInstance, instanceCount);
+    nextTable.CmdDrawIndexed(cmdBuffer, indexCount, instanceCount, firstIndex, vertexOffset, firstInstance);
 }
 
 VK_LAYER_EXPORT void VKAPI vkCmdDrawIndirect(VkCmdBuffer cmdBuffer, VkBuffer buffer, VkDeviceSize offset, uint32_t count, uint32_t stride)
diff --git a/vulkan.py b/vulkan.py
index 880a107..5323a6a 100755
--- a/vulkan.py
+++ b/vulkan.py
@@ -708,18 +708,18 @@
 
         Proto("void", "CmdDraw",
             [Param("VkCmdBuffer", "cmdBuffer"),
-             Param("uint32_t", "firstVertex"),
              Param("uint32_t", "vertexCount"),
-             Param("uint32_t", "firstInstance"),
-             Param("uint32_t", "instanceCount")]),
+             Param("uint32_t", "instanceCount"),
+             Param("uint32_t", "firstVertex"),
+             Param("uint32_t", "firstInstance")]),
 
         Proto("void", "CmdDrawIndexed",
             [Param("VkCmdBuffer", "cmdBuffer"),
-             Param("uint32_t", "firstIndex"),
              Param("uint32_t", "indexCount"),
+             Param("uint32_t", "instanceCount"),
+             Param("uint32_t", "firstIndex"),
              Param("int32_t", "vertexOffset"),
-             Param("uint32_t", "firstInstance"),
-             Param("uint32_t", "instanceCount")]),
+             Param("uint32_t", "firstInstance")]),
 
         Proto("void", "CmdDrawIndirect",
             [Param("VkCmdBuffer", "cmdBuffer"),