icd/intel/gpu.c - platform/external/vulkan-validation-layers - Gitiles

 /*
  * Vulkan
  *
  * Copyright (C) 2014 LunarG, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included
  * in all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  *
  * Authors:
  *   Chia-I Wu <olv@lunarg.com>
  */

 #include <stdio.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <unistd.h>

 #include "genhw/genhw.h"
 #include "kmd/winsys.h"
 #include "queue.h"
 #include "gpu.h"
 #include "instance.h"
 #include "wsi.h"
 #include "vk_debug_report_lunarg.h"
 #include "vk_debug_marker_lunarg.h"

 static int gpu_open_primary_node(struct intel_gpu *gpu)
 {
     if (gpu->primary_fd_internal < 0)
         gpu->primary_fd_internal = open(gpu->primary_node, O_RDWR);

     return gpu->primary_fd_internal;
 }

 static void gpu_close_primary_node(struct intel_gpu *gpu)
 {
     if (gpu->primary_fd_internal >= 0) {
         close(gpu->primary_fd_internal);
         gpu->primary_fd_internal = -1;
     }
 }

 static int gpu_open_render_node(struct intel_gpu *gpu)
 {
     if (gpu->render_fd_internal < 0 && gpu->render_node) {
         gpu->render_fd_internal = open(gpu->render_node, O_RDWR);
         if (gpu->render_fd_internal < 0) {
             intel_log(gpu, VK_DBG_REPORT_ERROR_BIT, 0, VK_NULL_HANDLE, 0,
                     0, "failed to open %s", gpu->render_node);
         }
     }

     return gpu->render_fd_internal;
 }

 static void gpu_close_render_node(struct intel_gpu *gpu)
 {
     if (gpu->render_fd_internal >= 0) {
         close(gpu->render_fd_internal);
         gpu->render_fd_internal = -1;
     }
 }

 static const char *gpu_get_name(const struct intel_gpu *gpu)
 {
     const char *name = NULL;

     if (gen_is_hsw(gpu->devid)) {
         if (gen_is_desktop(gpu->devid))
             name = "Intel(R) Haswell Desktop";
         else if (gen_is_mobile(gpu->devid))
             name = "Intel(R) Haswell Mobile";
         else if (gen_is_server(gpu->devid))
             name = "Intel(R) Haswell Server";
     }
     else if (gen_is_ivb(gpu->devid)) {
         if (gen_is_desktop(gpu->devid))
             name = "Intel(R) Ivybridge Desktop";
         else if (gen_is_mobile(gpu->devid))
             name = "Intel(R) Ivybridge Mobile";
         else if (gen_is_server(gpu->devid))
             name = "Intel(R) Ivybridge Server";
     }
     else if (gen_is_snb(gpu->devid)) {
         if (gen_is_desktop(gpu->devid))
             name = "Intel(R) Sandybridge Desktop";
         else if (gen_is_mobile(gpu->devid))
             name = "Intel(R) Sandybridge Mobile";
         else if (gen_is_server(gpu->devid))
             name = "Intel(R) Sandybridge Server";
     }

     if (!name)
         name = "Unknown Intel Chipset";

     return name;
 }

 void intel_gpu_destroy(struct intel_gpu *gpu)
 {
     intel_wsi_gpu_cleanup(gpu);

     intel_gpu_cleanup_winsys(gpu);

     intel_free(gpu, gpu->primary_node);
     intel_free(gpu, gpu);
 }

 static int devid_to_gen(int devid)
 {
     int gen;

     if (gen_is_hsw(devid))
         gen = INTEL_GEN(7.5);
     else if (gen_is_ivb(devid))
         gen = INTEL_GEN(7);
     else if (gen_is_snb(devid))
         gen = INTEL_GEN(6);
     else
         gen = -1;

 #ifdef INTEL_GEN_SPECIALIZED
     if (gen != INTEL_GEN(INTEL_GEN_SPECIALIZED))
         gen = -1;
 #endif

     return gen;
 }

 VkResult intel_gpu_create(const struct intel_instance *instance, int devid,
                             const char *primary_node, const char *render_node,
                             struct intel_gpu **gpu_ret)
 {
     const int gen = devid_to_gen(devid);
     size_t primary_len, render_len;
     struct intel_gpu *gpu;

     if (gen < 0) {
         intel_log(instance, VK_DBG_REPORT_WARN_BIT, 0,
                 VK_NULL_HANDLE, 0, 0, "unsupported device id 0x%04x", devid);
         return VK_ERROR_INITIALIZATION_FAILED;
     }

     gpu = intel_alloc(instance, sizeof(*gpu), 0, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
     if (!gpu)
         return VK_ERROR_OUT_OF_HOST_MEMORY;

     memset(gpu, 0, sizeof(*gpu));
     /* there is no VK_DBG_OBJECT_GPU */
     intel_handle_init(&gpu->handle, VK_OBJECT_TYPE_PHYSICAL_DEVICE, instance);

     gpu->devid = devid;

     primary_len = strlen(primary_node);
     render_len = (render_node) ? strlen(render_node) : 0;

     gpu->primary_node = intel_alloc(gpu, primary_len + 1 +
             ((render_len) ? (render_len + 1) : 0), 0, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
     if (!gpu->primary_node) {
         intel_free(instance, gpu);
         return VK_ERROR_OUT_OF_HOST_MEMORY;
     }

     memcpy(gpu->primary_node, primary_node, primary_len + 1);

     if (render_node) {
         gpu->render_node = gpu->primary_node + primary_len + 1;
         memcpy(gpu->render_node, render_node, render_len + 1);
     } else {
         gpu->render_node = gpu->primary_node;
     }

     gpu->gen_opaque = gen;

     switch (intel_gpu_gen(gpu)) {
     case INTEL_GEN(7.5):
         gpu->gt = gen_get_hsw_gt(devid);
         break;
     case INTEL_GEN(7):
         gpu->gt = gen_get_ivb_gt(devid);
         break;
     case INTEL_GEN(6):
         gpu->gt = gen_get_snb_gt(devid);
         break;
     }

     /* 150K dwords */
     gpu->max_batch_buffer_size = sizeof(uint32_t) * 150*1024;

     /* the winsys is prepared for one reloc every two dwords, then minus 2 */
     gpu->batch_buffer_reloc_count =
         gpu->max_batch_buffer_size / sizeof(uint32_t) / 2 - 2;

     gpu->primary_fd_internal = -1;
     gpu->render_fd_internal = -1;

     *gpu_ret = gpu;

     return VK_SUCCESS;
 }

 void intel_gpu_get_limits(VkPhysicalDeviceLimits *pLimits)
 {
     // TODO: fill out more limits
     memset(pLimits, 0, sizeof(*pLimits));

     // no size limit, but no bounded buffer could exceed 2GB
     pLimits->maxBoundDescriptorSets         = 1;
     pLimits->maxComputeWorkGroupInvocations = 512;

     // incremented every 80ns
     pLimits->timestampPeriod = 80.0f;

     // hardware is limited to 16 viewports
     pLimits->maxViewports        = INTEL_MAX_VIEWPORTS;
     pLimits->maxColorAttachments = INTEL_MAX_RENDER_TARGETS;

     // ?
     pLimits->maxImageDimension1D   = 8192;
     pLimits->maxImageDimension2D   = 8192;
     pLimits->maxImageDimension3D   = 8192;
     pLimits->maxImageDimensionCube = 8192;
     pLimits->maxImageArrayLayers   = 2048;
     pLimits->maxTexelBufferElements = 128 * 1024 * 1024;  // 128M texels hard limit
     pLimits->maxUniformBufferRange = 64 * 1024;          // not hard limit

     /* HW has two per-stage resource tables:
      * - samplers, 16 per stage on IVB; blocks of 16 on HSW+ via shader hack, as the
      *   table base ptr used by the sampler hw is under shader sw control.
      *
      * - binding table entries, 250 total on all gens, shared between
      *   textures, RT, images, SSBO, UBO, ...
      *   the top few indices (250-255) are used for 'stateless' access with various cache
      *   options, and for SLM access.
      */
     pLimits->maxPerStageDescriptorSamplers       = 16;        // technically more on HSW+..
     pLimits->maxDescriptorSetSamplers            = 16;

     pLimits->maxPerStageDescriptorUniformBuffers = 128;
     pLimits->maxDescriptorSetUniformBuffers      = 128;

     pLimits->maxPerStageDescriptorSampledImages  = 128;
     pLimits->maxDescriptorSetSampledImages       = 128;

     // storage images and buffers not implemented; left at zero

     // required to support at least two queue priorities
     pLimits->discreteQueuePriorities = 2;
 }

 void intel_gpu_get_props(const struct intel_gpu *gpu,
                          VkPhysicalDeviceProperties *props)
 {
     const char *name;
     size_t name_len;

     props->apiVersion = INTEL_API_VERSION;
     props->driverVersion = INTEL_DRIVER_VERSION;

     props->vendorID = 0x8086;
     props->deviceID = gpu->devid;

     props->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;

     /* copy GPU name */
     name = gpu_get_name(gpu);
     name_len = strlen(name);
     if (name_len > sizeof(props->deviceName) - 1)
         name_len = sizeof(props->deviceName) - 1;
     memcpy(props->deviceName, name, name_len);
     props->deviceName[name_len] = '\0';

     intel_gpu_get_limits(&props->limits);

     intel_gpu_get_sparse_properties(&props->sparseProperties);
 }

 void intel_gpu_get_queue_props(const struct intel_gpu *gpu,
                                enum intel_gpu_engine_type engine,
                                VkQueueFamilyProperties *props)
 {
     switch (engine) {
     case INTEL_GPU_ENGINE_3D:
         props->queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
         props->queueCount = 1;
         props->timestampValidBits = 0;
         props->minImageTransferGranularity.width = 1;
         props->minImageTransferGranularity.height = 1;
         props->minImageTransferGranularity.depth = 1;
         break;
     default:
         assert(!"unknown engine type");
         return;
     }
 }

 void intel_gpu_get_memory_props(const struct intel_gpu *gpu,
                                 VkPhysicalDeviceMemoryProperties *props)
 {
     memset(props, 0, sizeof(VkPhysicalDeviceMemoryProperties));
     props->memoryTypeCount = INTEL_MEMORY_TYPE_COUNT;
     props->memoryHeapCount = INTEL_MEMORY_HEAP_COUNT;

     // For now, Intel will support one memory type
     for (uint32_t i = 0; i < props->memoryTypeCount; i++) {
         assert(props->memoryTypeCount == 1);
         props->memoryTypes[i].propertyFlags = INTEL_MEMORY_PROPERTY_ALL;
         props->memoryTypes[i].heapIndex     = i;
     }

     // For now, Intel will support a single heap with all available memory
     for (uint32_t i = 0; i < props->memoryHeapCount; i++) {
         assert(props->memoryHeapCount == 1);
         props->memoryHeaps[0].size = INTEL_MEMORY_HEAP_SIZE;
     }
 }

 int intel_gpu_get_max_threads(const struct intel_gpu *gpu,
                               VkShaderStageFlagBits stage)
 {
     switch (intel_gpu_gen(gpu)) {
     case INTEL_GEN(7.5):
         switch (stage) {
         case VK_SHADER_STAGE_VERTEX_BIT:
             return (gpu->gt >= 2) ? 280 : 70;
         case VK_SHADER_STAGE_GEOMETRY_BIT:
             /* values from ilo_gpe_init_gs_cso_gen7 */
             return (gpu->gt >= 2) ? 256 : 70;
         case VK_SHADER_STAGE_FRAGMENT_BIT:
             return (gpu->gt == 3) ? 408 :
                    (gpu->gt == 2) ? 204 : 102;
         default:
             break;
         }
         break;
     case INTEL_GEN(7):
         switch (stage) {
         case VK_SHADER_STAGE_VERTEX_BIT:
             return (gpu->gt == 2) ? 128 : 36;
         case VK_SHADER_STAGE_GEOMETRY_BIT:
             /* values from ilo_gpe_init_gs_cso_gen7 */
             return (gpu->gt == 2) ? 128 : 36;
         case VK_SHADER_STAGE_FRAGMENT_BIT:
             return (gpu->gt == 2) ? 172 : 48;
         default:
             break;
         }
         break;
     case INTEL_GEN(6):
         switch (stage) {
         case VK_SHADER_STAGE_VERTEX_BIT:
             return (gpu->gt == 2) ? 60 : 24;
         case VK_SHADER_STAGE_GEOMETRY_BIT:
             /* values from ilo_gpe_init_gs_cso_gen6 */
             return (gpu->gt == 2) ? 28 : 21;
         case VK_SHADER_STAGE_FRAGMENT_BIT:
             return (gpu->gt == 2) ? 80 : 40;
         default:
             break;
         }
         break;
     default:
         break;
     }

     intel_log(gpu, VK_DBG_REPORT_ERROR_BIT, 0, VK_NULL_HANDLE,
             0, 0, "unknown Gen or shader stage");

     switch (stage) {
     case VK_SHADER_STAGE_VERTEX_BIT:
         return 1;
     case VK_SHADER_STAGE_GEOMETRY_BIT:
         return 1;
     case VK_SHADER_STAGE_FRAGMENT_BIT:
         return 4;
     default:
         return 1;
     }
 }

 int intel_gpu_get_primary_fd(struct intel_gpu *gpu)
 {
     return gpu_open_primary_node(gpu);
 }

 VkResult intel_gpu_init_winsys(struct intel_gpu *gpu)
 {
     int fd;

     assert(!gpu->winsys);

     fd = gpu_open_render_node(gpu);
     if (fd < 0)
         return VK_ERROR_INITIALIZATION_FAILED;

     gpu->winsys = intel_winsys_create_for_fd(gpu->handle.instance->icd, fd);
     if (!gpu->winsys) {
         intel_log(gpu, VK_DBG_REPORT_ERROR_BIT, 0,
                 VK_NULL_HANDLE, 0, 0, "failed to create GPU winsys");
         gpu_close_render_node(gpu);
         return VK_ERROR_INITIALIZATION_FAILED;
     }

     return VK_SUCCESS;
 }

 void intel_gpu_cleanup_winsys(struct intel_gpu *gpu)
 {
     if (gpu->winsys) {
         intel_winsys_destroy(gpu->winsys);
         gpu->winsys = NULL;
     }

     gpu_close_primary_node(gpu);
     gpu_close_render_node(gpu);
 }

 enum intel_phy_dev_ext_type intel_gpu_lookup_phy_dev_extension(
         const struct intel_gpu *gpu,
         const char *ext)
 {
     uint32_t type;
     uint32_t array_size = ARRAY_SIZE(intel_phy_dev_gpu_exts);

     for (type = 0; type < array_size; type++) {
         if (compare_vk_extension_properties(&intel_phy_dev_gpu_exts[type], ext))
             break;
     }

     assert(type < array_size || type == INTEL_PHY_DEV_EXT_INVALID);

     return type;
 }

 ICD_EXPORT void VKAPI vkGetPhysicalDeviceProperties(
     VkPhysicalDevice gpu_,
     VkPhysicalDeviceProperties* pProperties)
 {
     struct intel_gpu *gpu = intel_gpu(gpu_);

     intel_gpu_get_props(gpu, pProperties);
 }

 ICD_EXPORT void VKAPI vkGetPhysicalDeviceQueueFamilyProperties(
     VkPhysicalDevice gpu_,
     uint32_t* pQueueFamilyPropertyCount,
     VkQueueFamilyProperties* pProperties)
 {
    struct intel_gpu *gpu = intel_gpu(gpu_);
    int engine;

    if (pProperties == NULL) {
        *pQueueFamilyPropertyCount = INTEL_GPU_ENGINE_COUNT;
        return;
    }

    for (engine = 0; engine < *pQueueFamilyPropertyCount; engine++) {
        intel_gpu_get_queue_props(gpu, engine, pProperties);
        pProperties++;
    }
 }

 ICD_EXPORT void VKAPI vkGetPhysicalDeviceMemoryProperties(
     VkPhysicalDevice gpu_,
     VkPhysicalDeviceMemoryProperties* pProperties)
 {
    struct intel_gpu *gpu = intel_gpu(gpu_);

    intel_gpu_get_memory_props(gpu, pProperties);
 }

 ICD_EXPORT void VKAPI vkGetPhysicalDeviceFeatures(
                                                VkPhysicalDevice physicalDevice,
                                                VkPhysicalDeviceFeatures* pFeatures)
 {
     /* TODO: fill out features */
     memset(pFeatures, 0, sizeof(*pFeatures));
     pFeatures->occlusionQueryPrecise = 1;
 }

 void intel_gpu_get_sparse_properties(VkPhysicalDeviceSparseProperties *pProps)
 {
     memset(pProps, 0, sizeof(*pProps));
 }

 ICD_EXPORT VkResult VKAPI vkEnumerateDeviceExtensionProperties(
         VkPhysicalDevice                            physicalDevice,
         const char*                                 pLayerName,
         uint32_t*                                   pPropertyCount,
         VkExtensionProperties*                      pProperties)
 {
     uint32_t copy_size;
     uint32_t extension_count = ARRAY_SIZE(intel_phy_dev_gpu_exts);

     if (pProperties == NULL) {
         *pPropertyCount = INTEL_PHY_DEV_EXT_COUNT;
         return VK_SUCCESS;
     }

     copy_size = *pPropertyCount < extension_count ? *pPropertyCount : extension_count;
     memcpy(pProperties, intel_phy_dev_gpu_exts, copy_size * sizeof(VkExtensionProperties));
     *pPropertyCount = copy_size;
     if (copy_size < extension_count) {
         return VK_INCOMPLETE;
     }

     return VK_SUCCESS;
 }

 ICD_EXPORT VkResult VKAPI vkEnumerateDeviceLayerProperties(
         VkPhysicalDevice                            physicalDevice,
         uint32_t*                                   pPropertyCount,
         VkLayerProperties*                          pProperties)
 {
     *pPropertyCount = 0;
     return VK_SUCCESS;
 }

 ICD_EXPORT void VKAPI vkGetPhysicalDeviceSparseImageFormatProperties(
     VkPhysicalDevice                            physicalDevice,
     VkFormat                                    format,
     VkImageType                                 type,
     uint32_t                                    samples,
     VkImageUsageFlags                           usage,
     VkImageTiling                               tiling,
     uint32_t*                                   pPropertyCount,
     VkSparseImageFormatProperties*              pProperties)
 {
     *pPropertyCount = 0;
 }
	/*
	* Vulkan
	*
	* Copyright (C) 2014 LunarG, Inc.
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included
	* in all copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
	* DEALINGS IN THE SOFTWARE.
	*
	* Authors:
	* Chia-I Wu <olv@lunarg.com>
	*/

	#include <stdio.h>
	#include <sys/types.h>
	#include <sys/stat.h>
	#include <fcntl.h>
	#include <unistd.h>

	#include "genhw/genhw.h"
	#include "kmd/winsys.h"
	#include "queue.h"
	#include "gpu.h"
	#include "instance.h"
	#include "wsi.h"
	#include "vk_debug_report_lunarg.h"
	#include "vk_debug_marker_lunarg.h"

	static int gpu_open_primary_node(struct intel_gpu *gpu)
	{
	if (gpu->primary_fd_internal < 0)
	gpu->primary_fd_internal = open(gpu->primary_node, O_RDWR);

	return gpu->primary_fd_internal;
	}

	static void gpu_close_primary_node(struct intel_gpu *gpu)
	{
	if (gpu->primary_fd_internal >= 0) {
	close(gpu->primary_fd_internal);
	gpu->primary_fd_internal = -1;
	}
	}

	static int gpu_open_render_node(struct intel_gpu *gpu)
	{
	if (gpu->render_fd_internal < 0 && gpu->render_node) {
	gpu->render_fd_internal = open(gpu->render_node, O_RDWR);
	if (gpu->render_fd_internal < 0) {
	intel_log(gpu, VK_DBG_REPORT_ERROR_BIT, 0, VK_NULL_HANDLE, 0,
	0, "failed to open %s", gpu->render_node);
	}
	}

	return gpu->render_fd_internal;
	}

	static void gpu_close_render_node(struct intel_gpu *gpu)
	{
	if (gpu->render_fd_internal >= 0) {
	close(gpu->render_fd_internal);
	gpu->render_fd_internal = -1;
	}
	}

	static const char gpu_get_name(const struct intel_gpu gpu)
	{
	const char *name = NULL;

	if (gen_is_hsw(gpu->devid)) {
	if (gen_is_desktop(gpu->devid))
	name = "Intel(R) Haswell Desktop";
	else if (gen_is_mobile(gpu->devid))
	name = "Intel(R) Haswell Mobile";
	else if (gen_is_server(gpu->devid))
	name = "Intel(R) Haswell Server";
	}
	else if (gen_is_ivb(gpu->devid)) {
	if (gen_is_desktop(gpu->devid))
	name = "Intel(R) Ivybridge Desktop";
	else if (gen_is_mobile(gpu->devid))
	name = "Intel(R) Ivybridge Mobile";
	else if (gen_is_server(gpu->devid))
	name = "Intel(R) Ivybridge Server";
	}
	else if (gen_is_snb(gpu->devid)) {
	if (gen_is_desktop(gpu->devid))
	name = "Intel(R) Sandybridge Desktop";
	else if (gen_is_mobile(gpu->devid))
	name = "Intel(R) Sandybridge Mobile";
	else if (gen_is_server(gpu->devid))
	name = "Intel(R) Sandybridge Server";
	}

	if (!name)
	name = "Unknown Intel Chipset";

	return name;
	}

	void intel_gpu_destroy(struct intel_gpu *gpu)
	{
	intel_wsi_gpu_cleanup(gpu);

	intel_gpu_cleanup_winsys(gpu);

	intel_free(gpu, gpu->primary_node);
	intel_free(gpu, gpu);
	}

	static int devid_to_gen(int devid)
	{
	int gen;

	if (gen_is_hsw(devid))
	gen = INTEL_GEN(7.5);
	else if (gen_is_ivb(devid))
	gen = INTEL_GEN(7);
	else if (gen_is_snb(devid))
	gen = INTEL_GEN(6);
	else
	gen = -1;

	#ifdef INTEL_GEN_SPECIALIZED
	if (gen != INTEL_GEN(INTEL_GEN_SPECIALIZED))
	gen = -1;
	#endif

	return gen;
	}

	VkResult intel_gpu_create(const struct intel_instance *instance, int devid,
	const char primary_node, const char render_node,
	struct intel_gpu **gpu_ret)
	{
	const int gen = devid_to_gen(devid);
	size_t primary_len, render_len;
	struct intel_gpu *gpu;

	if (gen < 0) {
	intel_log(instance, VK_DBG_REPORT_WARN_BIT, 0,
	VK_NULL_HANDLE, 0, 0, "unsupported device id 0x%04x", devid);
	return VK_ERROR_INITIALIZATION_FAILED;
	}

	gpu = intel_alloc(instance, sizeof(*gpu), 0, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
	if (!gpu)
	return VK_ERROR_OUT_OF_HOST_MEMORY;

	memset(gpu, 0, sizeof(*gpu));
	/* there is no VK_DBG_OBJECT_GPU */
	intel_handle_init(&gpu->handle, VK_OBJECT_TYPE_PHYSICAL_DEVICE, instance);

	gpu->devid = devid;

	primary_len = strlen(primary_node);
	render_len = (render_node) ? strlen(render_node) : 0;

	gpu->primary_node = intel_alloc(gpu, primary_len + 1 +
	((render_len) ? (render_len + 1) : 0), 0, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
	if (!gpu->primary_node) {
	intel_free(instance, gpu);
	return VK_ERROR_OUT_OF_HOST_MEMORY;
	}

	memcpy(gpu->primary_node, primary_node, primary_len + 1);

	if (render_node) {
	gpu->render_node = gpu->primary_node + primary_len + 1;
	memcpy(gpu->render_node, render_node, render_len + 1);
	} else {
	gpu->render_node = gpu->primary_node;
	}

	gpu->gen_opaque = gen;

	switch (intel_gpu_gen(gpu)) {
	case INTEL_GEN(7.5):
	gpu->gt = gen_get_hsw_gt(devid);
	break;
	case INTEL_GEN(7):
	gpu->gt = gen_get_ivb_gt(devid);
	break;
	case INTEL_GEN(6):
	gpu->gt = gen_get_snb_gt(devid);
	break;
	}

	/* 150K dwords */
	gpu->max_batch_buffer_size = sizeof(uint32_t) * 150*1024;

	/* the winsys is prepared for one reloc every two dwords, then minus 2 */
	gpu->batch_buffer_reloc_count =
	gpu->max_batch_buffer_size / sizeof(uint32_t) / 2 - 2;

	gpu->primary_fd_internal = -1;
	gpu->render_fd_internal = -1;

	*gpu_ret = gpu;

	return VK_SUCCESS;
	}

	void intel_gpu_get_limits(VkPhysicalDeviceLimits *pLimits)
	{
	// TODO: fill out more limits
	memset(pLimits, 0, sizeof(*pLimits));

	// no size limit, but no bounded buffer could exceed 2GB
	pLimits->maxBoundDescriptorSets = 1;
	pLimits->maxComputeWorkGroupInvocations = 512;

	// incremented every 80ns
	pLimits->timestampPeriod = 80.0f;

	// hardware is limited to 16 viewports
	pLimits->maxViewports = INTEL_MAX_VIEWPORTS;
	pLimits->maxColorAttachments = INTEL_MAX_RENDER_TARGETS;

	// ?
	pLimits->maxImageDimension1D = 8192;
	pLimits->maxImageDimension2D = 8192;
	pLimits->maxImageDimension3D = 8192;
	pLimits->maxImageDimensionCube = 8192;
	pLimits->maxImageArrayLayers = 2048;
	pLimits->maxTexelBufferElements = 128 * 1024 * 1024; // 128M texels hard limit
	pLimits->maxUniformBufferRange = 64 * 1024; // not hard limit

	/* HW has two per-stage resource tables:
	* - samplers, 16 per stage on IVB; blocks of 16 on HSW+ via shader hack, as the
	* table base ptr used by the sampler hw is under shader sw control.
	*
	* - binding table entries, 250 total on all gens, shared between
	* textures, RT, images, SSBO, UBO, ...
	* the top few indices (250-255) are used for 'stateless' access with various cache
	* options, and for SLM access.
	*/
	pLimits->maxPerStageDescriptorSamplers = 16; // technically more on HSW+..
	pLimits->maxDescriptorSetSamplers = 16;

	pLimits->maxPerStageDescriptorUniformBuffers = 128;
	pLimits->maxDescriptorSetUniformBuffers = 128;

	pLimits->maxPerStageDescriptorSampledImages = 128;
	pLimits->maxDescriptorSetSampledImages = 128;

	// storage images and buffers not implemented; left at zero

	// required to support at least two queue priorities
	pLimits->discreteQueuePriorities = 2;
	}

	void intel_gpu_get_props(const struct intel_gpu *gpu,
	VkPhysicalDeviceProperties *props)
	{
	const char *name;
	size_t name_len;

	props->apiVersion = INTEL_API_VERSION;
	props->driverVersion = INTEL_DRIVER_VERSION;

	props->vendorID = 0x8086;
	props->deviceID = gpu->devid;

	props->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;

	/* copy GPU name */
	name = gpu_get_name(gpu);
	name_len = strlen(name);
	if (name_len > sizeof(props->deviceName) - 1)
	name_len = sizeof(props->deviceName) - 1;
	memcpy(props->deviceName, name, name_len);
	props->deviceName[name_len] = '\0';

	intel_gpu_get_limits(&props->limits);

	intel_gpu_get_sparse_properties(&props->sparseProperties);
	}

	void intel_gpu_get_queue_props(const struct intel_gpu *gpu,
	enum intel_gpu_engine_type engine,
	VkQueueFamilyProperties *props)
	{
	switch (engine) {
	case INTEL_GPU_ENGINE_3D:
	props->queueFlags = VK_QUEUE_GRAPHICS_BIT \| VK_QUEUE_COMPUTE_BIT;
	props->queueCount = 1;
	props->timestampValidBits = 0;
	props->minImageTransferGranularity.width = 1;
	props->minImageTransferGranularity.height = 1;
	props->minImageTransferGranularity.depth = 1;
	break;
	default:
	assert(!"unknown engine type");
	return;
	}
	}

	void intel_gpu_get_memory_props(const struct intel_gpu *gpu,
	VkPhysicalDeviceMemoryProperties *props)
	{
	memset(props, 0, sizeof(VkPhysicalDeviceMemoryProperties));
	props->memoryTypeCount = INTEL_MEMORY_TYPE_COUNT;
	props->memoryHeapCount = INTEL_MEMORY_HEAP_COUNT;

	// For now, Intel will support one memory type
	for (uint32_t i = 0; i < props->memoryTypeCount; i++) {
	assert(props->memoryTypeCount == 1);
	props->memoryTypes[i].propertyFlags = INTEL_MEMORY_PROPERTY_ALL;
	props->memoryTypes[i].heapIndex = i;
	}

	// For now, Intel will support a single heap with all available memory
	for (uint32_t i = 0; i < props->memoryHeapCount; i++) {
	assert(props->memoryHeapCount == 1);
	props->memoryHeaps[0].size = INTEL_MEMORY_HEAP_SIZE;
	}
	}

	int intel_gpu_get_max_threads(const struct intel_gpu *gpu,
	VkShaderStageFlagBits stage)
	{
	switch (intel_gpu_gen(gpu)) {
	case INTEL_GEN(7.5):
	switch (stage) {
	case VK_SHADER_STAGE_VERTEX_BIT:
	return (gpu->gt >= 2) ? 280 : 70;
	case VK_SHADER_STAGE_GEOMETRY_BIT:
	/* values from ilo_gpe_init_gs_cso_gen7 */
	return (gpu->gt >= 2) ? 256 : 70;
	case VK_SHADER_STAGE_FRAGMENT_BIT:
	return (gpu->gt == 3) ? 408 :
	(gpu->gt == 2) ? 204 : 102;
	default:
	break;
	}
	break;
	case INTEL_GEN(7):
	switch (stage) {
	case VK_SHADER_STAGE_VERTEX_BIT:
	return (gpu->gt == 2) ? 128 : 36;
	case VK_SHADER_STAGE_GEOMETRY_BIT:
	/* values from ilo_gpe_init_gs_cso_gen7 */
	return (gpu->gt == 2) ? 128 : 36;
	case VK_SHADER_STAGE_FRAGMENT_BIT:
	return (gpu->gt == 2) ? 172 : 48;
	default:
	break;
	}
	break;
	case INTEL_GEN(6):
	switch (stage) {
	case VK_SHADER_STAGE_VERTEX_BIT:
	return (gpu->gt == 2) ? 60 : 24;
	case VK_SHADER_STAGE_GEOMETRY_BIT:
	/* values from ilo_gpe_init_gs_cso_gen6 */
	return (gpu->gt == 2) ? 28 : 21;
	case VK_SHADER_STAGE_FRAGMENT_BIT:
	return (gpu->gt == 2) ? 80 : 40;
	default:
	break;
	}
	break;
	default:
	break;
	}

	intel_log(gpu, VK_DBG_REPORT_ERROR_BIT, 0, VK_NULL_HANDLE,
	0, 0, "unknown Gen or shader stage");

	switch (stage) {
	case VK_SHADER_STAGE_VERTEX_BIT:
	return 1;
	case VK_SHADER_STAGE_GEOMETRY_BIT:
	return 1;
	case VK_SHADER_STAGE_FRAGMENT_BIT:
	return 4;
	default:
	return 1;
	}
	}

	int intel_gpu_get_primary_fd(struct intel_gpu *gpu)
	{
	return gpu_open_primary_node(gpu);
	}

	VkResult intel_gpu_init_winsys(struct intel_gpu *gpu)
	{
	int fd;

	assert(!gpu->winsys);

	fd = gpu_open_render_node(gpu);
	if (fd < 0)
	return VK_ERROR_INITIALIZATION_FAILED;

	gpu->winsys = intel_winsys_create_for_fd(gpu->handle.instance->icd, fd);
	if (!gpu->winsys) {
	intel_log(gpu, VK_DBG_REPORT_ERROR_BIT, 0,
	VK_NULL_HANDLE, 0, 0, "failed to create GPU winsys");
	gpu_close_render_node(gpu);
	return VK_ERROR_INITIALIZATION_FAILED;
	}

	return VK_SUCCESS;
	}

	void intel_gpu_cleanup_winsys(struct intel_gpu *gpu)
	{
	if (gpu->winsys) {
	intel_winsys_destroy(gpu->winsys);
	gpu->winsys = NULL;
	}

	gpu_close_primary_node(gpu);
	gpu_close_render_node(gpu);
	}

	enum intel_phy_dev_ext_type intel_gpu_lookup_phy_dev_extension(
	const struct intel_gpu *gpu,
	const char *ext)
	{
	uint32_t type;
	uint32_t array_size = ARRAY_SIZE(intel_phy_dev_gpu_exts);

	for (type = 0; type < array_size; type++) {
	if (compare_vk_extension_properties(&intel_phy_dev_gpu_exts[type], ext))
	break;
	}

	assert(type < array_size \|\| type == INTEL_PHY_DEV_EXT_INVALID);

	return type;
	}

	ICD_EXPORT void VKAPI vkGetPhysicalDeviceProperties(
	VkPhysicalDevice gpu_,
	VkPhysicalDeviceProperties* pProperties)
	{
	struct intel_gpu *gpu = intel_gpu(gpu_);

	intel_gpu_get_props(gpu, pProperties);
	}

	ICD_EXPORT void VKAPI vkGetPhysicalDeviceQueueFamilyProperties(
	VkPhysicalDevice gpu_,
	uint32_t* pQueueFamilyPropertyCount,
	VkQueueFamilyProperties* pProperties)
	{
	struct intel_gpu *gpu = intel_gpu(gpu_);
	int engine;

	if (pProperties == NULL) {
	*pQueueFamilyPropertyCount = INTEL_GPU_ENGINE_COUNT;
	return;
	}

	for (engine = 0; engine < *pQueueFamilyPropertyCount; engine++) {
	intel_gpu_get_queue_props(gpu, engine, pProperties);
	pProperties++;
	}
	}

	ICD_EXPORT void VKAPI vkGetPhysicalDeviceMemoryProperties(
	VkPhysicalDevice gpu_,
	VkPhysicalDeviceMemoryProperties* pProperties)
	{
	struct intel_gpu *gpu = intel_gpu(gpu_);

	intel_gpu_get_memory_props(gpu, pProperties);
	}

	ICD_EXPORT void VKAPI vkGetPhysicalDeviceFeatures(
	VkPhysicalDevice physicalDevice,
	VkPhysicalDeviceFeatures* pFeatures)
	{
	/* TODO: fill out features */
	memset(pFeatures, 0, sizeof(*pFeatures));
	pFeatures->occlusionQueryPrecise = 1;
	}

	void intel_gpu_get_sparse_properties(VkPhysicalDeviceSparseProperties *pProps)
	{
	memset(pProps, 0, sizeof(*pProps));
	}

	ICD_EXPORT VkResult VKAPI vkEnumerateDeviceExtensionProperties(
	VkPhysicalDevice physicalDevice,
	const char* pLayerName,
	uint32_t* pPropertyCount,
	VkExtensionProperties* pProperties)
	{
	uint32_t copy_size;
	uint32_t extension_count = ARRAY_SIZE(intel_phy_dev_gpu_exts);

	if (pProperties == NULL) {
	*pPropertyCount = INTEL_PHY_DEV_EXT_COUNT;
	return VK_SUCCESS;
	}

	copy_size = pPropertyCount < extension_count ? pPropertyCount : extension_count;
	memcpy(pProperties, intel_phy_dev_gpu_exts, copy_size * sizeof(VkExtensionProperties));
	*pPropertyCount = copy_size;
	if (copy_size < extension_count) {
	return VK_INCOMPLETE;
	}

	return VK_SUCCESS;
	}

	ICD_EXPORT VkResult VKAPI vkEnumerateDeviceLayerProperties(
	VkPhysicalDevice physicalDevice,
	uint32_t* pPropertyCount,
	VkLayerProperties* pProperties)
	{
	*pPropertyCount = 0;
	return VK_SUCCESS;
	}

	ICD_EXPORT void VKAPI vkGetPhysicalDeviceSparseImageFormatProperties(
	VkPhysicalDevice physicalDevice,
	VkFormat format,
	VkImageType type,
	uint32_t samples,
	VkImageUsageFlags usage,
	VkImageTiling tiling,
	uint32_t* pPropertyCount,
	VkSparseImageFormatProperties* pProperties)
	{
	*pPropertyCount = 0;
	}