blob: 0c35fa4dd8fe72631e85ea734de7baf6edb46941 [file] [log] [blame]
/*
* Vulkan
*
* Copyright (C) 2014 LunarG, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
* Authors:
* Chia-I Wu <olv@lunarg.com>
*/
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include "genhw/genhw.h"
#include "kmd/winsys.h"
#include "queue.h"
#include "gpu.h"
#include "instance.h"
#include "wsi.h"
#include "vk_debug_report_lunarg.h"
#include "vk_debug_marker_lunarg.h"
static int gpu_open_primary_node(struct intel_gpu *gpu)
{
if (gpu->primary_fd_internal < 0)
gpu->primary_fd_internal = open(gpu->primary_node, O_RDWR);
return gpu->primary_fd_internal;
}
static void gpu_close_primary_node(struct intel_gpu *gpu)
{
if (gpu->primary_fd_internal >= 0) {
close(gpu->primary_fd_internal);
gpu->primary_fd_internal = -1;
}
}
static int gpu_open_render_node(struct intel_gpu *gpu)
{
if (gpu->render_fd_internal < 0 && gpu->render_node) {
gpu->render_fd_internal = open(gpu->render_node, O_RDWR);
if (gpu->render_fd_internal < 0) {
intel_log(gpu, VK_DBG_REPORT_ERROR_BIT, 0, VK_NULL_HANDLE, 0,
0, "failed to open %s", gpu->render_node);
}
}
return gpu->render_fd_internal;
}
static void gpu_close_render_node(struct intel_gpu *gpu)
{
if (gpu->render_fd_internal >= 0) {
close(gpu->render_fd_internal);
gpu->render_fd_internal = -1;
}
}
static const char *gpu_get_name(const struct intel_gpu *gpu)
{
const char *name = NULL;
if (gen_is_hsw(gpu->devid)) {
if (gen_is_desktop(gpu->devid))
name = "Intel(R) Haswell Desktop";
else if (gen_is_mobile(gpu->devid))
name = "Intel(R) Haswell Mobile";
else if (gen_is_server(gpu->devid))
name = "Intel(R) Haswell Server";
}
else if (gen_is_ivb(gpu->devid)) {
if (gen_is_desktop(gpu->devid))
name = "Intel(R) Ivybridge Desktop";
else if (gen_is_mobile(gpu->devid))
name = "Intel(R) Ivybridge Mobile";
else if (gen_is_server(gpu->devid))
name = "Intel(R) Ivybridge Server";
}
else if (gen_is_snb(gpu->devid)) {
if (gen_is_desktop(gpu->devid))
name = "Intel(R) Sandybridge Desktop";
else if (gen_is_mobile(gpu->devid))
name = "Intel(R) Sandybridge Mobile";
else if (gen_is_server(gpu->devid))
name = "Intel(R) Sandybridge Server";
}
if (!name)
name = "Unknown Intel Chipset";
return name;
}
void intel_gpu_destroy(struct intel_gpu *gpu)
{
intel_wsi_gpu_cleanup(gpu);
intel_gpu_cleanup_winsys(gpu);
intel_free(gpu, gpu->primary_node);
intel_free(gpu, gpu);
}
static int devid_to_gen(int devid)
{
int gen;
if (gen_is_hsw(devid))
gen = INTEL_GEN(7.5);
else if (gen_is_ivb(devid))
gen = INTEL_GEN(7);
else if (gen_is_snb(devid))
gen = INTEL_GEN(6);
else
gen = -1;
#ifdef INTEL_GEN_SPECIALIZED
if (gen != INTEL_GEN(INTEL_GEN_SPECIALIZED))
gen = -1;
#endif
return gen;
}
VkResult intel_gpu_create(const struct intel_instance *instance, int devid,
const char *primary_node, const char *render_node,
struct intel_gpu **gpu_ret)
{
const int gen = devid_to_gen(devid);
size_t primary_len, render_len;
struct intel_gpu *gpu;
if (gen < 0) {
intel_log(instance, VK_DBG_REPORT_WARN_BIT, 0,
VK_NULL_HANDLE, 0, 0, "unsupported device id 0x%04x", devid);
return VK_ERROR_INITIALIZATION_FAILED;
}
gpu = intel_alloc(instance, sizeof(*gpu), 0, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
if (!gpu)
return VK_ERROR_OUT_OF_HOST_MEMORY;
memset(gpu, 0, sizeof(*gpu));
/* there is no VK_DBG_OBJECT_GPU */
intel_handle_init(&gpu->handle, VK_OBJECT_TYPE_PHYSICAL_DEVICE, instance);
gpu->devid = devid;
primary_len = strlen(primary_node);
render_len = (render_node) ? strlen(render_node) : 0;
gpu->primary_node = intel_alloc(gpu, primary_len + 1 +
((render_len) ? (render_len + 1) : 0), 0, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
if (!gpu->primary_node) {
intel_free(instance, gpu);
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
memcpy(gpu->primary_node, primary_node, primary_len + 1);
if (render_node) {
gpu->render_node = gpu->primary_node + primary_len + 1;
memcpy(gpu->render_node, render_node, render_len + 1);
} else {
gpu->render_node = gpu->primary_node;
}
gpu->gen_opaque = gen;
switch (intel_gpu_gen(gpu)) {
case INTEL_GEN(7.5):
gpu->gt = gen_get_hsw_gt(devid);
break;
case INTEL_GEN(7):
gpu->gt = gen_get_ivb_gt(devid);
break;
case INTEL_GEN(6):
gpu->gt = gen_get_snb_gt(devid);
break;
}
/* 150K dwords */
gpu->max_batch_buffer_size = sizeof(uint32_t) * 150*1024;
/* the winsys is prepared for one reloc every two dwords, then minus 2 */
gpu->batch_buffer_reloc_count =
gpu->max_batch_buffer_size / sizeof(uint32_t) / 2 - 2;
gpu->primary_fd_internal = -1;
gpu->render_fd_internal = -1;
*gpu_ret = gpu;
return VK_SUCCESS;
}
void intel_gpu_get_limits(VkPhysicalDeviceLimits *pLimits)
{
// TODO: fill out more limits
memset(pLimits, 0, sizeof(*pLimits));
// no size limit, but no bounded buffer could exceed 2GB
pLimits->maxBoundDescriptorSets = 1;
pLimits->maxComputeWorkGroupInvocations = 512;
// incremented every 80ns
pLimits->timestampPeriod = 80.0f;
// hardware is limited to 16 viewports
pLimits->maxViewports = INTEL_MAX_VIEWPORTS;
pLimits->maxColorAttachments = INTEL_MAX_RENDER_TARGETS;
// ?
pLimits->maxImageDimension1D = 8192;
pLimits->maxImageDimension2D = 8192;
pLimits->maxImageDimension3D = 8192;
pLimits->maxImageDimensionCube = 8192;
pLimits->maxImageArrayLayers = 2048;
pLimits->maxTexelBufferElements = 128 * 1024 * 1024; // 128M texels hard limit
pLimits->maxUniformBufferRange = 64 * 1024; // not hard limit
/* HW has two per-stage resource tables:
* - samplers, 16 per stage on IVB; blocks of 16 on HSW+ via shader hack, as the
* table base ptr used by the sampler hw is under shader sw control.
*
* - binding table entries, 250 total on all gens, shared between
* textures, RT, images, SSBO, UBO, ...
* the top few indices (250-255) are used for 'stateless' access with various cache
* options, and for SLM access.
*/
pLimits->maxPerStageDescriptorSamplers = 16; // technically more on HSW+..
pLimits->maxDescriptorSetSamplers = 16;
pLimits->maxPerStageDescriptorUniformBuffers = 128;
pLimits->maxDescriptorSetUniformBuffers = 128;
pLimits->maxPerStageDescriptorSampledImages = 128;
pLimits->maxDescriptorSetSampledImages = 128;
// storage images and buffers not implemented; left at zero
// required to support at least two queue priorities
pLimits->discreteQueuePriorities = 2;
}
void intel_gpu_get_props(const struct intel_gpu *gpu,
VkPhysicalDeviceProperties *props)
{
const char *name;
size_t name_len;
props->apiVersion = INTEL_API_VERSION;
props->driverVersion = INTEL_DRIVER_VERSION;
props->vendorID = 0x8086;
props->deviceID = gpu->devid;
props->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
/* copy GPU name */
name = gpu_get_name(gpu);
name_len = strlen(name);
if (name_len > sizeof(props->deviceName) - 1)
name_len = sizeof(props->deviceName) - 1;
memcpy(props->deviceName, name, name_len);
props->deviceName[name_len] = '\0';
intel_gpu_get_limits(&props->limits);
intel_gpu_get_sparse_properties(&props->sparseProperties);
}
void intel_gpu_get_queue_props(const struct intel_gpu *gpu,
enum intel_gpu_engine_type engine,
VkQueueFamilyProperties *props)
{
switch (engine) {
case INTEL_GPU_ENGINE_3D:
props->queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
props->queueCount = 1;
props->timestampValidBits = 0;
props->minImageTransferGranularity.width = 1;
props->minImageTransferGranularity.height = 1;
props->minImageTransferGranularity.depth = 1;
break;
default:
assert(!"unknown engine type");
return;
}
}
void intel_gpu_get_memory_props(const struct intel_gpu *gpu,
VkPhysicalDeviceMemoryProperties *props)
{
memset(props, 0, sizeof(VkPhysicalDeviceMemoryProperties));
props->memoryTypeCount = INTEL_MEMORY_TYPE_COUNT;
props->memoryHeapCount = INTEL_MEMORY_HEAP_COUNT;
// For now, Intel will support one memory type
for (uint32_t i = 0; i < props->memoryTypeCount; i++) {
assert(props->memoryTypeCount == 1);
props->memoryTypes[i].propertyFlags = INTEL_MEMORY_PROPERTY_ALL;
props->memoryTypes[i].heapIndex = i;
}
// For now, Intel will support a single heap with all available memory
for (uint32_t i = 0; i < props->memoryHeapCount; i++) {
assert(props->memoryHeapCount == 1);
props->memoryHeaps[0].size = INTEL_MEMORY_HEAP_SIZE;
}
}
int intel_gpu_get_max_threads(const struct intel_gpu *gpu,
VkShaderStageFlagBits stage)
{
switch (intel_gpu_gen(gpu)) {
case INTEL_GEN(7.5):
switch (stage) {
case VK_SHADER_STAGE_VERTEX_BIT:
return (gpu->gt >= 2) ? 280 : 70;
case VK_SHADER_STAGE_GEOMETRY_BIT:
/* values from ilo_gpe_init_gs_cso_gen7 */
return (gpu->gt >= 2) ? 256 : 70;
case VK_SHADER_STAGE_FRAGMENT_BIT:
return (gpu->gt == 3) ? 408 :
(gpu->gt == 2) ? 204 : 102;
default:
break;
}
break;
case INTEL_GEN(7):
switch (stage) {
case VK_SHADER_STAGE_VERTEX_BIT:
return (gpu->gt == 2) ? 128 : 36;
case VK_SHADER_STAGE_GEOMETRY_BIT:
/* values from ilo_gpe_init_gs_cso_gen7 */
return (gpu->gt == 2) ? 128 : 36;
case VK_SHADER_STAGE_FRAGMENT_BIT:
return (gpu->gt == 2) ? 172 : 48;
default:
break;
}
break;
case INTEL_GEN(6):
switch (stage) {
case VK_SHADER_STAGE_VERTEX_BIT:
return (gpu->gt == 2) ? 60 : 24;
case VK_SHADER_STAGE_GEOMETRY_BIT:
/* values from ilo_gpe_init_gs_cso_gen6 */
return (gpu->gt == 2) ? 28 : 21;
case VK_SHADER_STAGE_FRAGMENT_BIT:
return (gpu->gt == 2) ? 80 : 40;
default:
break;
}
break;
default:
break;
}
intel_log(gpu, VK_DBG_REPORT_ERROR_BIT, 0, VK_NULL_HANDLE,
0, 0, "unknown Gen or shader stage");
switch (stage) {
case VK_SHADER_STAGE_VERTEX_BIT:
return 1;
case VK_SHADER_STAGE_GEOMETRY_BIT:
return 1;
case VK_SHADER_STAGE_FRAGMENT_BIT:
return 4;
default:
return 1;
}
}
int intel_gpu_get_primary_fd(struct intel_gpu *gpu)
{
return gpu_open_primary_node(gpu);
}
VkResult intel_gpu_init_winsys(struct intel_gpu *gpu)
{
int fd;
assert(!gpu->winsys);
fd = gpu_open_render_node(gpu);
if (fd < 0)
return VK_ERROR_INITIALIZATION_FAILED;
gpu->winsys = intel_winsys_create_for_fd(gpu->handle.instance->icd, fd);
if (!gpu->winsys) {
intel_log(gpu, VK_DBG_REPORT_ERROR_BIT, 0,
VK_NULL_HANDLE, 0, 0, "failed to create GPU winsys");
gpu_close_render_node(gpu);
return VK_ERROR_INITIALIZATION_FAILED;
}
return VK_SUCCESS;
}
void intel_gpu_cleanup_winsys(struct intel_gpu *gpu)
{
if (gpu->winsys) {
intel_winsys_destroy(gpu->winsys);
gpu->winsys = NULL;
}
gpu_close_primary_node(gpu);
gpu_close_render_node(gpu);
}
enum intel_phy_dev_ext_type intel_gpu_lookup_phy_dev_extension(
const struct intel_gpu *gpu,
const char *ext)
{
uint32_t type;
uint32_t array_size = ARRAY_SIZE(intel_phy_dev_gpu_exts);
for (type = 0; type < array_size; type++) {
if (compare_vk_extension_properties(&intel_phy_dev_gpu_exts[type], ext))
break;
}
assert(type < array_size || type == INTEL_PHY_DEV_EXT_INVALID);
return type;
}
ICD_EXPORT void VKAPI vkGetPhysicalDeviceProperties(
VkPhysicalDevice gpu_,
VkPhysicalDeviceProperties* pProperties)
{
struct intel_gpu *gpu = intel_gpu(gpu_);
intel_gpu_get_props(gpu, pProperties);
}
ICD_EXPORT void VKAPI vkGetPhysicalDeviceQueueFamilyProperties(
VkPhysicalDevice gpu_,
uint32_t* pQueueFamilyPropertyCount,
VkQueueFamilyProperties* pProperties)
{
struct intel_gpu *gpu = intel_gpu(gpu_);
int engine;
if (pProperties == NULL) {
*pQueueFamilyPropertyCount = INTEL_GPU_ENGINE_COUNT;
return;
}
for (engine = 0; engine < *pQueueFamilyPropertyCount; engine++) {
intel_gpu_get_queue_props(gpu, engine, pProperties);
pProperties++;
}
}
ICD_EXPORT void VKAPI vkGetPhysicalDeviceMemoryProperties(
VkPhysicalDevice gpu_,
VkPhysicalDeviceMemoryProperties* pProperties)
{
struct intel_gpu *gpu = intel_gpu(gpu_);
intel_gpu_get_memory_props(gpu, pProperties);
}
ICD_EXPORT void VKAPI vkGetPhysicalDeviceFeatures(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceFeatures* pFeatures)
{
/* TODO: fill out features */
memset(pFeatures, 0, sizeof(*pFeatures));
pFeatures->occlusionQueryPrecise = 1;
}
void intel_gpu_get_sparse_properties(VkPhysicalDeviceSparseProperties *pProps)
{
memset(pProps, 0, sizeof(*pProps));
}
ICD_EXPORT VkResult VKAPI vkEnumerateDeviceExtensionProperties(
VkPhysicalDevice physicalDevice,
const char* pLayerName,
uint32_t* pPropertyCount,
VkExtensionProperties* pProperties)
{
uint32_t copy_size;
uint32_t extension_count = ARRAY_SIZE(intel_phy_dev_gpu_exts);
if (pProperties == NULL) {
*pPropertyCount = INTEL_PHY_DEV_EXT_COUNT;
return VK_SUCCESS;
}
copy_size = *pPropertyCount < extension_count ? *pPropertyCount : extension_count;
memcpy(pProperties, intel_phy_dev_gpu_exts, copy_size * sizeof(VkExtensionProperties));
*pPropertyCount = copy_size;
if (copy_size < extension_count) {
return VK_INCOMPLETE;
}
return VK_SUCCESS;
}
ICD_EXPORT VkResult VKAPI vkEnumerateDeviceLayerProperties(
VkPhysicalDevice physicalDevice,
uint32_t* pPropertyCount,
VkLayerProperties* pProperties)
{
*pPropertyCount = 0;
return VK_SUCCESS;
}
ICD_EXPORT void VKAPI vkGetPhysicalDeviceSparseImageFormatProperties(
VkPhysicalDevice physicalDevice,
VkFormat format,
VkImageType type,
uint32_t samples,
VkImageUsageFlags usage,
VkImageTiling tiling,
uint32_t* pPropertyCount,
VkSparseImageFormatProperties* pProperties)
{
*pPropertyCount = 0;
}