| /* |
| * Copyright © 2009 Corbin Simpson |
| * Copyright © 2011 Marek Olšák <maraeo@gmail.com> |
| * All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining |
| * a copy of this software and associated documentation files (the |
| * "Software"), to deal in the Software without restriction, including |
| * without limitation the rights to use, copy, modify, merge, publish, |
| * distribute, sub license, and/or sell copies of the Software, and to |
| * permit persons to whom the Software is furnished to do so, subject to |
| * the following conditions: |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
| * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS |
| * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
| * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
| * USE OR OTHER DEALINGS IN THE SOFTWARE. |
| * |
| * The above copyright notice and this permission notice (including the |
| * next paragraph) shall be included in all copies or substantial portions |
| * of the Software. |
| */ |
| |
| #include "radeon_drm_bo.h" |
| #include "radeon_drm_cs.h" |
| #include "radeon_drm_public.h" |
| |
| #include "util/os_file.h" |
| #include "util/u_cpu_detect.h" |
| #include "util/u_memory.h" |
| #include "util/u_hash_table.h" |
| #include "util/u_pointer.h" |
| |
| #include <xf86drm.h> |
| #include <stdio.h> |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| #include <unistd.h> |
| #include <fcntl.h> |
| #include <radeon_surface.h> |
| |
| static struct hash_table *fd_tab = NULL; |
| static mtx_t fd_tab_mutex = _MTX_INITIALIZER_NP; |
| |
| /* Enable/disable feature access for one command stream. |
| * If enable == true, return true on success. |
| * Otherwise, return false. |
| * |
| * We basically do the same thing kernel does, because we have to deal |
| * with multiple contexts (here command streams) backed by one winsys. */ |
| static bool radeon_set_fd_access(struct radeon_drm_cs *applier, |
| struct radeon_drm_cs **owner, |
| mtx_t *mutex, |
| unsigned request, const char *request_name, |
| bool enable) |
| { |
| struct drm_radeon_info info; |
| unsigned value = enable ? 1 : 0; |
| |
| memset(&info, 0, sizeof(info)); |
| |
| mtx_lock(&*mutex); |
| |
| /* Early exit if we are sure the request will fail. */ |
| if (enable) { |
| if (*owner) { |
| mtx_unlock(&*mutex); |
| return false; |
| } |
| } else { |
| if (*owner != applier) { |
| mtx_unlock(&*mutex); |
| return false; |
| } |
| } |
| |
| /* Pass through the request to the kernel. */ |
| info.value = (unsigned long)&value; |
| info.request = request; |
| if (drmCommandWriteRead(applier->ws->fd, DRM_RADEON_INFO, |
| &info, sizeof(info)) != 0) { |
| mtx_unlock(&*mutex); |
| return false; |
| } |
| |
| /* Update the rights in the winsys. */ |
| if (enable) { |
| if (value) { |
| *owner = applier; |
| mtx_unlock(&*mutex); |
| return true; |
| } |
| } else { |
| *owner = NULL; |
| } |
| |
| mtx_unlock(&*mutex); |
| return false; |
| } |
| |
| static bool radeon_get_drm_value(int fd, unsigned request, |
| const char *errname, uint32_t *out) |
| { |
| struct drm_radeon_info info; |
| int retval; |
| |
| memset(&info, 0, sizeof(info)); |
| |
| info.value = (unsigned long)out; |
| info.request = request; |
| |
| retval = drmCommandWriteRead(fd, DRM_RADEON_INFO, &info, sizeof(info)); |
| if (retval) { |
| if (errname) { |
| fprintf(stderr, "radeon: Failed to get %s, error number %d\n", |
| errname, retval); |
| } |
| return false; |
| } |
| return true; |
| } |
| |
| /* Helper function to do the ioctls needed for setup and init. */ |
| static bool do_winsys_init(struct radeon_drm_winsys *ws) |
| { |
| struct drm_radeon_gem_info gem_info; |
| int retval; |
| drmVersionPtr version; |
| |
| memset(&gem_info, 0, sizeof(gem_info)); |
| |
| /* We do things in a specific order here. |
| * |
| * DRM version first. We need to be sure we're running on a KMS chipset. |
| * This is also for some features. |
| * |
| * Then, the PCI ID. This is essential and should return usable numbers |
| * for all Radeons. If this fails, we probably got handed an FD for some |
| * non-Radeon card. |
| * |
| * The GEM info is actually bogus on the kernel side, as well as our side |
| * (see radeon_gem_info_ioctl in radeon_gem.c) but that's alright because |
| * we don't actually use the info for anything yet. |
| * |
| * The GB and Z pipe requests should always succeed, but they might not |
| * return sensical values for all chipsets, but that's alright because |
| * the pipe drivers already know that. |
| */ |
| |
| /* Get DRM version. */ |
| version = drmGetVersion(ws->fd); |
| if (version->version_major != 2 || |
| version->version_minor < 12) { |
| fprintf(stderr, "%s: DRM version is %d.%d.%d but this driver is " |
| "only compatible with 2.12.0 (kernel 3.2) or later.\n", |
| __FUNCTION__, |
| version->version_major, |
| version->version_minor, |
| version->version_patchlevel); |
| drmFreeVersion(version); |
| return false; |
| } |
| |
| ws->info.drm_major = version->version_major; |
| ws->info.drm_minor = version->version_minor; |
| ws->info.drm_patchlevel = version->version_patchlevel; |
| ws->info.is_amdgpu = false; |
| drmFreeVersion(version); |
| |
| /* Get PCI ID. */ |
| if (!radeon_get_drm_value(ws->fd, RADEON_INFO_DEVICE_ID, "PCI ID", |
| &ws->info.pci_id)) |
| return false; |
| |
| /* Check PCI ID. */ |
| switch (ws->info.pci_id) { |
| #define CHIPSET(pci_id, name, cfamily) case pci_id: ws->info.family = CHIP_##cfamily; ws->gen = DRV_R300; break; |
| #include "pci_ids/r300_pci_ids.h" |
| #undef CHIPSET |
| |
| #define CHIPSET(pci_id, name, cfamily) case pci_id: ws->info.family = CHIP_##cfamily; ws->gen = DRV_R600; break; |
| #include "pci_ids/r600_pci_ids.h" |
| #undef CHIPSET |
| |
| #define CHIPSET(pci_id, cfamily) \ |
| case pci_id: \ |
| ws->info.family = CHIP_##cfamily; \ |
| ws->info.name = #cfamily; \ |
| ws->gen = DRV_SI; \ |
| break; |
| #include "pci_ids/radeonsi_pci_ids.h" |
| #undef CHIPSET |
| |
| default: |
| fprintf(stderr, "radeon: Invalid PCI ID.\n"); |
| return false; |
| } |
| |
| switch (ws->info.family) { |
| default: |
| case CHIP_UNKNOWN: |
| fprintf(stderr, "radeon: Unknown family.\n"); |
| return false; |
| case CHIP_R300: |
| case CHIP_R350: |
| case CHIP_RV350: |
| case CHIP_RV370: |
| case CHIP_RV380: |
| case CHIP_RS400: |
| case CHIP_RC410: |
| case CHIP_RS480: |
| ws->info.chip_class = R300; |
| break; |
| case CHIP_R420: /* R4xx-based cores. */ |
| case CHIP_R423: |
| case CHIP_R430: |
| case CHIP_R480: |
| case CHIP_R481: |
| case CHIP_RV410: |
| case CHIP_RS600: |
| case CHIP_RS690: |
| case CHIP_RS740: |
| ws->info.chip_class = R400; |
| break; |
| case CHIP_RV515: /* R5xx-based cores. */ |
| case CHIP_R520: |
| case CHIP_RV530: |
| case CHIP_R580: |
| case CHIP_RV560: |
| case CHIP_RV570: |
| ws->info.chip_class = R500; |
| break; |
| case CHIP_R600: |
| case CHIP_RV610: |
| case CHIP_RV630: |
| case CHIP_RV670: |
| case CHIP_RV620: |
| case CHIP_RV635: |
| case CHIP_RS780: |
| case CHIP_RS880: |
| ws->info.chip_class = R600; |
| break; |
| case CHIP_RV770: |
| case CHIP_RV730: |
| case CHIP_RV710: |
| case CHIP_RV740: |
| ws->info.chip_class = R700; |
| break; |
| case CHIP_CEDAR: |
| case CHIP_REDWOOD: |
| case CHIP_JUNIPER: |
| case CHIP_CYPRESS: |
| case CHIP_HEMLOCK: |
| case CHIP_PALM: |
| case CHIP_SUMO: |
| case CHIP_SUMO2: |
| case CHIP_BARTS: |
| case CHIP_TURKS: |
| case CHIP_CAICOS: |
| ws->info.chip_class = EVERGREEN; |
| break; |
| case CHIP_CAYMAN: |
| case CHIP_ARUBA: |
| ws->info.chip_class = CAYMAN; |
| break; |
| case CHIP_TAHITI: |
| case CHIP_PITCAIRN: |
| case CHIP_VERDE: |
| case CHIP_OLAND: |
| case CHIP_HAINAN: |
| ws->info.chip_class = GFX6; |
| break; |
| case CHIP_BONAIRE: |
| case CHIP_KAVERI: |
| case CHIP_KABINI: |
| case CHIP_HAWAII: |
| ws->info.chip_class = GFX7; |
| break; |
| } |
| |
| /* Set which chips don't have dedicated VRAM. */ |
| switch (ws->info.family) { |
| case CHIP_RS400: |
| case CHIP_RC410: |
| case CHIP_RS480: |
| case CHIP_RS600: |
| case CHIP_RS690: |
| case CHIP_RS740: |
| case CHIP_RS780: |
| case CHIP_RS880: |
| case CHIP_PALM: |
| case CHIP_SUMO: |
| case CHIP_SUMO2: |
| case CHIP_ARUBA: |
| case CHIP_KAVERI: |
| case CHIP_KABINI: |
| ws->info.has_dedicated_vram = false; |
| break; |
| |
| default: |
| ws->info.has_dedicated_vram = true; |
| } |
| |
| ws->info.num_rings[RING_GFX] = 1; |
| /* Check for dma */ |
| ws->info.num_rings[RING_DMA] = 0; |
| /* DMA is disabled on R700. There is IB corruption and hangs. */ |
| if (ws->info.chip_class >= EVERGREEN && ws->info.drm_minor >= 27) { |
| ws->info.num_rings[RING_DMA] = 1; |
| } |
| |
| /* Check for UVD and VCE */ |
| ws->info.has_hw_decode = false; |
| ws->info.vce_fw_version = 0x00000000; |
| if (ws->info.drm_minor >= 32) { |
| uint32_t value = RADEON_CS_RING_UVD; |
| if (radeon_get_drm_value(ws->fd, RADEON_INFO_RING_WORKING, |
| "UVD Ring working", &value)) { |
| ws->info.has_hw_decode = value; |
| ws->info.num_rings[RING_UVD] = 1; |
| } |
| |
| value = RADEON_CS_RING_VCE; |
| if (radeon_get_drm_value(ws->fd, RADEON_INFO_RING_WORKING, |
| NULL, &value) && value) { |
| |
| if (radeon_get_drm_value(ws->fd, RADEON_INFO_VCE_FW_VERSION, |
| "VCE FW version", &value)) { |
| ws->info.vce_fw_version = value; |
| ws->info.num_rings[RING_VCE] = 1; |
| } |
| } |
| } |
| |
| /* Check for userptr support. */ |
| { |
| struct drm_radeon_gem_userptr args = {0}; |
| |
| /* If the ioctl doesn't exist, -EINVAL is returned. |
| * |
| * If the ioctl exists, it should return -EACCES |
| * if RADEON_GEM_USERPTR_READONLY or RADEON_GEM_USERPTR_REGISTER |
| * aren't set. |
| */ |
| ws->info.has_userptr = |
| drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_USERPTR, |
| &args, sizeof(args)) == -EACCES; |
| } |
| |
| /* Get GEM info. */ |
| retval = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_INFO, |
| &gem_info, sizeof(gem_info)); |
| if (retval) { |
| fprintf(stderr, "radeon: Failed to get MM info, error number %d\n", |
| retval); |
| return false; |
| } |
| ws->info.gart_size = gem_info.gart_size; |
| ws->info.vram_size = gem_info.vram_size; |
| ws->info.vram_vis_size = gem_info.vram_visible; |
| /* Older versions of the kernel driver reported incorrect values, and |
| * didn't support more than 256MB of visible VRAM anyway |
| */ |
| if (ws->info.drm_minor < 49) |
| ws->info.vram_vis_size = MIN2(ws->info.vram_vis_size, 256*1024*1024); |
| |
| /* Radeon allocates all buffers contiguously, which makes large allocations |
| * unlikely to succeed. */ |
| if (ws->info.has_dedicated_vram) |
| ws->info.max_alloc_size = ws->info.vram_size * 0.7; |
| else |
| ws->info.max_alloc_size = ws->info.gart_size * 0.7; |
| |
| if (ws->info.drm_minor < 40) |
| ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 256*1024*1024); |
| /* Both 32-bit and 64-bit address spaces only have 4GB. */ |
| ws->info.max_alloc_size = MIN2(ws->info.max_alloc_size, 3ull*1024*1024*1024); |
| |
| /* Get max clock frequency info and convert it to MHz */ |
| radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SCLK, NULL, |
| &ws->info.max_shader_clock); |
| ws->info.max_shader_clock /= 1000; |
| |
| ws->num_cpus = sysconf(_SC_NPROCESSORS_ONLN); |
| |
| /* Generation-specific queries. */ |
| if (ws->gen == DRV_R300) { |
| if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_GB_PIPES, |
| "GB pipe count", |
| &ws->info.r300_num_gb_pipes)) |
| return false; |
| |
| if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_Z_PIPES, |
| "Z pipe count", |
| &ws->info.r300_num_z_pipes)) |
| return false; |
| } |
| else if (ws->gen >= DRV_R600) { |
| uint32_t tiling_config = 0; |
| |
| if (!radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BACKENDS, |
| "num backends", |
| &ws->info.num_render_backends)) |
| return false; |
| |
| /* get the GPU counter frequency, failure is not fatal */ |
| radeon_get_drm_value(ws->fd, RADEON_INFO_CLOCK_CRYSTAL_FREQ, NULL, |
| &ws->info.clock_crystal_freq); |
| |
| radeon_get_drm_value(ws->fd, RADEON_INFO_TILING_CONFIG, NULL, |
| &tiling_config); |
| |
| ws->info.r600_num_banks = |
| ws->info.chip_class >= EVERGREEN ? |
| 4 << ((tiling_config & 0xf0) >> 4) : |
| 4 << ((tiling_config & 0x30) >> 4); |
| |
| ws->info.pipe_interleave_bytes = |
| ws->info.chip_class >= EVERGREEN ? |
| 256 << ((tiling_config & 0xf00) >> 8) : |
| 256 << ((tiling_config & 0xc0) >> 6); |
| |
| if (!ws->info.pipe_interleave_bytes) |
| ws->info.pipe_interleave_bytes = |
| ws->info.chip_class >= EVERGREEN ? 512 : 256; |
| |
| radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_TILE_PIPES, NULL, |
| &ws->info.num_tile_pipes); |
| |
| /* "num_tiles_pipes" must be equal to the number of pipes (Px) in the |
| * pipe config field of the GB_TILE_MODE array. Only one card (Tahiti) |
| * reports a different value (12). Fix it by setting what's in the |
| * GB_TILE_MODE array (8). |
| */ |
| if (ws->gen == DRV_SI && ws->info.num_tile_pipes == 12) |
| ws->info.num_tile_pipes = 8; |
| |
| if (radeon_get_drm_value(ws->fd, RADEON_INFO_BACKEND_MAP, NULL, |
| &ws->info.r600_gb_backend_map)) |
| ws->info.r600_gb_backend_map_valid = true; |
| |
| /* Default value. */ |
| ws->info.enabled_rb_mask = u_bit_consecutive(0, ws->info.num_render_backends); |
| /* |
| * This fails (silently) on non-GCN or older kernels, overwriting the |
| * default enabled_rb_mask with the result of the last query. |
| */ |
| if (ws->gen >= DRV_SI) |
| radeon_get_drm_value(ws->fd, RADEON_INFO_SI_BACKEND_ENABLED_MASK, NULL, |
| &ws->info.enabled_rb_mask); |
| |
| ws->info.r600_has_virtual_memory = false; |
| if (ws->info.drm_minor >= 13) { |
| uint32_t ib_vm_max_size; |
| |
| ws->info.r600_has_virtual_memory = true; |
| if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL, |
| &ws->va_start)) |
| ws->info.r600_has_virtual_memory = false; |
| if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL, |
| &ib_vm_max_size)) |
| ws->info.r600_has_virtual_memory = false; |
| radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL, |
| &ws->va_unmap_working); |
| } |
| if (ws->gen == DRV_R600 && !debug_get_bool_option("RADEON_VA", false)) |
| ws->info.r600_has_virtual_memory = false; |
| } |
| |
| /* Get max pipes, this is only needed for compute shaders. All evergreen+ |
| * chips have at least 2 pipes, so we use 2 as a default. */ |
| ws->info.r600_max_quad_pipes = 2; |
| radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_PIPES, NULL, |
| &ws->info.r600_max_quad_pipes); |
| |
| /* All GPUs have at least one compute unit */ |
| ws->info.num_good_compute_units = 1; |
| radeon_get_drm_value(ws->fd, RADEON_INFO_ACTIVE_CU_COUNT, NULL, |
| &ws->info.num_good_compute_units); |
| |
| radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SE, NULL, |
| &ws->info.max_se); |
| |
| switch (ws->info.family) { |
| case CHIP_HAINAN: |
| case CHIP_KABINI: |
| ws->info.num_tcc_blocks = 2; |
| break; |
| case CHIP_VERDE: |
| case CHIP_OLAND: |
| case CHIP_BONAIRE: |
| case CHIP_KAVERI: |
| ws->info.num_tcc_blocks = 4; |
| break; |
| case CHIP_PITCAIRN: |
| ws->info.num_tcc_blocks = 8; |
| break; |
| case CHIP_TAHITI: |
| ws->info.num_tcc_blocks = 12; |
| break; |
| case CHIP_HAWAII: |
| ws->info.num_tcc_blocks = 16; |
| break; |
| default: |
| ws->info.num_tcc_blocks = 0; |
| break; |
| } |
| |
| if (!ws->info.max_se) { |
| switch (ws->info.family) { |
| default: |
| ws->info.max_se = 1; |
| break; |
| case CHIP_CYPRESS: |
| case CHIP_HEMLOCK: |
| case CHIP_BARTS: |
| case CHIP_CAYMAN: |
| case CHIP_TAHITI: |
| case CHIP_PITCAIRN: |
| case CHIP_BONAIRE: |
| ws->info.max_se = 2; |
| break; |
| case CHIP_HAWAII: |
| ws->info.max_se = 4; |
| break; |
| } |
| } |
| |
| radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL, |
| &ws->info.max_sh_per_se); |
| if (ws->gen == DRV_SI) { |
| ws->info.max_good_cu_per_sa = |
| ws->info.min_good_cu_per_sa = ws->info.num_good_compute_units / |
| (ws->info.max_se * ws->info.max_sh_per_se); |
| } |
| |
| radeon_get_drm_value(ws->fd, RADEON_INFO_ACCEL_WORKING2, NULL, |
| &ws->accel_working2); |
| if (ws->info.family == CHIP_HAWAII && ws->accel_working2 < 2) { |
| fprintf(stderr, "radeon: GPU acceleration for Hawaii disabled, " |
| "returned accel_working2 value %u is smaller than 2. " |
| "Please install a newer kernel.\n", |
| ws->accel_working2); |
| return false; |
| } |
| |
| if (ws->info.chip_class == GFX7) { |
| if (!radeon_get_drm_value(ws->fd, RADEON_INFO_CIK_MACROTILE_MODE_ARRAY, NULL, |
| ws->info.cik_macrotile_mode_array)) { |
| fprintf(stderr, "radeon: Kernel 3.13 is required for Sea Islands support.\n"); |
| return false; |
| } |
| } |
| |
| if (ws->info.chip_class >= GFX6) { |
| if (!radeon_get_drm_value(ws->fd, RADEON_INFO_SI_TILE_MODE_ARRAY, NULL, |
| ws->info.si_tile_mode_array)) { |
| fprintf(stderr, "radeon: Kernel 3.10 is required for Southern Islands support.\n"); |
| return false; |
| } |
| } |
| |
| /* Hawaii with old firmware needs type2 nop packet. |
| * accel_working2 with value 3 indicates the new firmware. |
| */ |
| ws->info.gfx_ib_pad_with_type2 = ws->info.chip_class <= GFX6 || |
| (ws->info.family == CHIP_HAWAII && |
| ws->accel_working2 < 3); |
| ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */ |
| ws->info.ib_alignment = 4096; |
| ws->info.kernel_flushes_hdp_before_ib = ws->info.drm_minor >= 40; |
| /* HTILE is broken with 1D tiling on old kernels and GFX7. */ |
| ws->info.htile_cmask_support_1d_tiling = ws->info.chip_class != GFX7 || |
| ws->info.drm_minor >= 38; |
| ws->info.si_TA_CS_BC_BASE_ADDR_allowed = ws->info.drm_minor >= 48; |
| ws->info.has_bo_metadata = false; |
| ws->info.has_gpu_reset_status_query = ws->info.drm_minor >= 43; |
| ws->info.has_eqaa_surface_allocator = false; |
| ws->info.has_format_bc1_through_bc7 = ws->info.drm_minor >= 31; |
| ws->info.kernel_flushes_tc_l2_after_ib = true; |
| /* Old kernels disallowed register writes via COPY_DATA |
| * that are used for indirect compute dispatches. */ |
| ws->info.has_indirect_compute_dispatch = ws->info.chip_class == GFX7 || |
| (ws->info.chip_class == GFX6 && |
| ws->info.drm_minor >= 45); |
| /* GFX6 doesn't support unaligned loads. */ |
| ws->info.has_unaligned_shader_loads = ws->info.chip_class == GFX7 && |
| ws->info.drm_minor >= 50; |
| ws->info.has_sparse_vm_mappings = false; |
| /* 2D tiling on GFX7 is supported since DRM 2.35.0 */ |
| ws->info.has_2d_tiling = ws->info.chip_class <= GFX6 || ws->info.drm_minor >= 35; |
| ws->info.has_read_registers_query = ws->info.drm_minor >= 42; |
| ws->info.max_alignment = 1024*1024; |
| ws->info.has_graphics = true; |
| ws->info.cpdma_prefetch_writes_memory = true; |
| ws->info.max_wave64_per_simd = 10; |
| ws->info.num_physical_sgprs_per_simd = 512; |
| ws->info.num_physical_wave64_vgprs_per_simd = 256; |
| /* Potential hang on Kabini: */ |
| ws->info.use_late_alloc = ws->info.family != CHIP_KABINI; |
| |
| ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL || |
| strstr(debug_get_option("AMD_DEBUG", ""), "check_vm") != NULL; |
| |
| return true; |
| } |
| |
| static void radeon_winsys_destroy(struct radeon_winsys *rws) |
| { |
| struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; |
| |
| if (util_queue_is_initialized(&ws->cs_queue)) |
| util_queue_destroy(&ws->cs_queue); |
| |
| mtx_destroy(&ws->hyperz_owner_mutex); |
| mtx_destroy(&ws->cmask_owner_mutex); |
| |
| if (ws->info.r600_has_virtual_memory) |
| pb_slabs_deinit(&ws->bo_slabs); |
| pb_cache_deinit(&ws->bo_cache); |
| |
| if (ws->gen >= DRV_R600) { |
| radeon_surface_manager_free(ws->surf_man); |
| } |
| |
| _mesa_hash_table_destroy(ws->bo_names, NULL); |
| _mesa_hash_table_destroy(ws->bo_handles, NULL); |
| _mesa_hash_table_u64_destroy(ws->bo_vas, NULL); |
| mtx_destroy(&ws->bo_handles_mutex); |
| mtx_destroy(&ws->vm32.mutex); |
| mtx_destroy(&ws->vm64.mutex); |
| mtx_destroy(&ws->bo_fence_lock); |
| |
| if (ws->fd >= 0) |
| close(ws->fd); |
| |
| FREE(rws); |
| } |
| |
| static void radeon_query_info(struct radeon_winsys *rws, |
| struct radeon_info *info) |
| { |
| *info = ((struct radeon_drm_winsys *)rws)->info; |
| } |
| |
| static bool radeon_cs_request_feature(struct radeon_cmdbuf *rcs, |
| enum radeon_feature_id fid, |
| bool enable) |
| { |
| struct radeon_drm_cs *cs = radeon_drm_cs(rcs); |
| |
| switch (fid) { |
| case RADEON_FID_R300_HYPERZ_ACCESS: |
| return radeon_set_fd_access(cs, &cs->ws->hyperz_owner, |
| &cs->ws->hyperz_owner_mutex, |
| RADEON_INFO_WANT_HYPERZ, "Hyper-Z", |
| enable); |
| |
| case RADEON_FID_R300_CMASK_ACCESS: |
| return radeon_set_fd_access(cs, &cs->ws->cmask_owner, |
| &cs->ws->cmask_owner_mutex, |
| RADEON_INFO_WANT_CMASK, "AA optimizations", |
| enable); |
| } |
| return false; |
| } |
| |
| uint32_t radeon_drm_get_gpu_reset_counter(struct radeon_drm_winsys *ws) |
| { |
| uint64_t retval = 0; |
| |
| if (!ws->info.has_gpu_reset_status_query) |
| return 0; |
| |
| radeon_get_drm_value(ws->fd, RADEON_INFO_GPU_RESET_COUNTER, |
| "gpu-reset-counter", (uint32_t*)&retval); |
| return retval; |
| } |
| |
| static uint64_t radeon_query_value(struct radeon_winsys *rws, |
| enum radeon_value_id value) |
| { |
| struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; |
| uint64_t retval = 0; |
| |
| switch (value) { |
| case RADEON_REQUESTED_VRAM_MEMORY: |
| return ws->allocated_vram; |
| case RADEON_REQUESTED_GTT_MEMORY: |
| return ws->allocated_gtt; |
| case RADEON_MAPPED_VRAM: |
| return ws->mapped_vram; |
| case RADEON_MAPPED_GTT: |
| return ws->mapped_gtt; |
| case RADEON_BUFFER_WAIT_TIME_NS: |
| return ws->buffer_wait_time; |
| case RADEON_NUM_MAPPED_BUFFERS: |
| return ws->num_mapped_buffers; |
| case RADEON_TIMESTAMP: |
| if (ws->info.drm_minor < 20 || ws->gen < DRV_R600) { |
| assert(0); |
| return 0; |
| } |
| |
| radeon_get_drm_value(ws->fd, RADEON_INFO_TIMESTAMP, "timestamp", |
| (uint32_t*)&retval); |
| return retval; |
| case RADEON_NUM_GFX_IBS: |
| return ws->num_gfx_IBs; |
| case RADEON_NUM_SDMA_IBS: |
| return ws->num_sdma_IBs; |
| case RADEON_NUM_BYTES_MOVED: |
| radeon_get_drm_value(ws->fd, RADEON_INFO_NUM_BYTES_MOVED, |
| "num-bytes-moved", (uint32_t*)&retval); |
| return retval; |
| case RADEON_NUM_EVICTIONS: |
| case RADEON_NUM_VRAM_CPU_PAGE_FAULTS: |
| case RADEON_VRAM_VIS_USAGE: |
| case RADEON_GFX_BO_LIST_COUNTER: |
| case RADEON_GFX_IB_SIZE_COUNTER: |
| return 0; /* unimplemented */ |
| case RADEON_VRAM_USAGE: |
| radeon_get_drm_value(ws->fd, RADEON_INFO_VRAM_USAGE, |
| "vram-usage", (uint32_t*)&retval); |
| return retval; |
| case RADEON_GTT_USAGE: |
| radeon_get_drm_value(ws->fd, RADEON_INFO_GTT_USAGE, |
| "gtt-usage", (uint32_t*)&retval); |
| return retval; |
| case RADEON_GPU_TEMPERATURE: |
| radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_TEMP, |
| "gpu-temp", (uint32_t*)&retval); |
| return retval; |
| case RADEON_CURRENT_SCLK: |
| radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_SCLK, |
| "current-gpu-sclk", (uint32_t*)&retval); |
| return retval; |
| case RADEON_CURRENT_MCLK: |
| radeon_get_drm_value(ws->fd, RADEON_INFO_CURRENT_GPU_MCLK, |
| "current-gpu-mclk", (uint32_t*)&retval); |
| return retval; |
| case RADEON_CS_THREAD_TIME: |
| return util_queue_get_thread_time_nano(&ws->cs_queue, 0); |
| } |
| return 0; |
| } |
| |
| static bool radeon_read_registers(struct radeon_winsys *rws, |
| unsigned reg_offset, |
| unsigned num_registers, uint32_t *out) |
| { |
| struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws; |
| unsigned i; |
| |
| for (i = 0; i < num_registers; i++) { |
| uint32_t reg = reg_offset + i*4; |
| |
| if (!radeon_get_drm_value(ws->fd, RADEON_INFO_READ_REG, NULL, ®)) |
| return false; |
| out[i] = reg; |
| } |
| return true; |
| } |
| |
| DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", true) |
| |
| static bool radeon_winsys_unref(struct radeon_winsys *ws) |
| { |
| struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws; |
| bool destroy; |
| |
| /* When the reference counter drops to zero, remove the fd from the table. |
| * This must happen while the mutex is locked, so that |
| * radeon_drm_winsys_create in another thread doesn't get the winsys |
| * from the table when the counter drops to 0. */ |
| mtx_lock(&fd_tab_mutex); |
| |
| destroy = pipe_reference(&rws->reference, NULL); |
| if (destroy && fd_tab) { |
| _mesa_hash_table_remove_key(fd_tab, intptr_to_pointer(rws->fd)); |
| if (_mesa_hash_table_num_entries(fd_tab) == 0) { |
| _mesa_hash_table_destroy(fd_tab, NULL); |
| fd_tab = NULL; |
| } |
| } |
| |
| mtx_unlock(&fd_tab_mutex); |
| return destroy; |
| } |
| |
| static void radeon_pin_threads_to_L3_cache(struct radeon_winsys *ws, |
| unsigned cache) |
| { |
| struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws; |
| |
| if (util_queue_is_initialized(&rws->cs_queue)) { |
| util_set_thread_affinity(rws->cs_queue.threads[0], |
| util_cpu_caps.L3_affinity_mask[cache], |
| NULL, UTIL_MAX_CPUS); |
| } |
| } |
| |
| static bool radeon_cs_is_secure(struct radeon_cmdbuf* cs) |
| { |
| return false; |
| } |
| |
| PUBLIC struct radeon_winsys * |
| radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config, |
| radeon_screen_create_t screen_create) |
| { |
| struct radeon_drm_winsys *ws; |
| |
| mtx_lock(&fd_tab_mutex); |
| if (!fd_tab) { |
| fd_tab = util_hash_table_create_fd_keys(); |
| } |
| |
| ws = util_hash_table_get(fd_tab, intptr_to_pointer(fd)); |
| if (ws) { |
| pipe_reference(NULL, &ws->reference); |
| mtx_unlock(&fd_tab_mutex); |
| return &ws->base; |
| } |
| |
| ws = CALLOC_STRUCT(radeon_drm_winsys); |
| if (!ws) { |
| mtx_unlock(&fd_tab_mutex); |
| return NULL; |
| } |
| |
| ws->fd = os_dupfd_cloexec(fd); |
| |
| if (!do_winsys_init(ws)) |
| goto fail1; |
| |
| pb_cache_init(&ws->bo_cache, RADEON_MAX_CACHED_HEAPS, |
| 500000, ws->check_vm ? 1.0f : 2.0f, 0, |
| MIN2(ws->info.vram_size, ws->info.gart_size), |
| radeon_bo_destroy, |
| radeon_bo_can_reclaim); |
| |
| if (ws->info.r600_has_virtual_memory) { |
| /* There is no fundamental obstacle to using slab buffer allocation |
| * without GPUVM, but enabling it requires making sure that the drivers |
| * honor the address offset. |
| */ |
| if (!pb_slabs_init(&ws->bo_slabs, |
| RADEON_SLAB_MIN_SIZE_LOG2, RADEON_SLAB_MAX_SIZE_LOG2, |
| RADEON_MAX_SLAB_HEAPS, |
| ws, |
| radeon_bo_can_reclaim_slab, |
| radeon_bo_slab_alloc, |
| radeon_bo_slab_free)) |
| goto fail_cache; |
| |
| ws->info.min_alloc_size = 1 << RADEON_SLAB_MIN_SIZE_LOG2; |
| } else { |
| ws->info.min_alloc_size = ws->info.gart_page_size; |
| } |
| |
| if (ws->gen >= DRV_R600) { |
| ws->surf_man = radeon_surface_manager_new(ws->fd); |
| if (!ws->surf_man) |
| goto fail_slab; |
| } |
| |
| /* init reference */ |
| pipe_reference_init(&ws->reference, 1); |
| |
| /* Set functions. */ |
| ws->base.unref = radeon_winsys_unref; |
| ws->base.destroy = radeon_winsys_destroy; |
| ws->base.query_info = radeon_query_info; |
| ws->base.pin_threads_to_L3_cache = radeon_pin_threads_to_L3_cache; |
| ws->base.cs_request_feature = radeon_cs_request_feature; |
| ws->base.query_value = radeon_query_value; |
| ws->base.read_registers = radeon_read_registers; |
| ws->base.cs_is_secure = radeon_cs_is_secure; |
| |
| radeon_drm_bo_init_functions(ws); |
| radeon_drm_cs_init_functions(ws); |
| radeon_surface_init_functions(ws); |
| |
| (void) mtx_init(&ws->hyperz_owner_mutex, mtx_plain); |
| (void) mtx_init(&ws->cmask_owner_mutex, mtx_plain); |
| |
| ws->bo_names = util_hash_table_create_ptr_keys(); |
| ws->bo_handles = util_hash_table_create_ptr_keys(); |
| ws->bo_vas = _mesa_hash_table_u64_create(NULL); |
| (void) mtx_init(&ws->bo_handles_mutex, mtx_plain); |
| (void) mtx_init(&ws->vm32.mutex, mtx_plain); |
| (void) mtx_init(&ws->vm64.mutex, mtx_plain); |
| (void) mtx_init(&ws->bo_fence_lock, mtx_plain); |
| list_inithead(&ws->vm32.holes); |
| list_inithead(&ws->vm64.holes); |
| |
| /* The kernel currently returns 8MB. Make sure this doesn't change. */ |
| if (ws->va_start > 8 * 1024 * 1024) { |
| /* Not enough 32-bit address space. */ |
| radeon_winsys_destroy(&ws->base); |
| mtx_unlock(&fd_tab_mutex); |
| return NULL; |
| } |
| |
| ws->vm32.start = ws->va_start; |
| ws->vm32.end = 1ull << 32; |
| |
| /* The maximum is 8GB of virtual address space limited by the kernel. |
| * It's obviously not enough for bigger cards, like Hawaiis with 4GB |
| * and 8GB of physical memory and 4GB of GART. |
| * |
| * Older kernels set the limit to 4GB, which is even worse, so they only |
| * have 32-bit address space. |
| */ |
| if (ws->info.drm_minor >= 41) { |
| ws->vm64.start = 1ull << 32; |
| ws->vm64.end = 1ull << 33; |
| } |
| |
| /* TTM aligns the BO size to the CPU page size */ |
| ws->info.gart_page_size = sysconf(_SC_PAGESIZE); |
| ws->info.pte_fragment_size = 64 * 1024; /* GPUVM page size */ |
| |
| if (ws->num_cpus > 1 && debug_get_option_thread()) |
| util_queue_init(&ws->cs_queue, "rcs", 8, 1, 0); |
| |
| /* Create the screen at the end. The winsys must be initialized |
| * completely. |
| * |
| * Alternatively, we could create the screen based on "ws->gen" |
| * and link all drivers into one binary blob. */ |
| ws->base.screen = screen_create(&ws->base, config); |
| if (!ws->base.screen) { |
| radeon_winsys_destroy(&ws->base); |
| mtx_unlock(&fd_tab_mutex); |
| return NULL; |
| } |
| |
| _mesa_hash_table_insert(fd_tab, intptr_to_pointer(ws->fd), ws); |
| |
| /* We must unlock the mutex once the winsys is fully initialized, so that |
| * other threads attempting to create the winsys from the same fd will |
| * get a fully initialized winsys and not just half-way initialized. */ |
| mtx_unlock(&fd_tab_mutex); |
| |
| return &ws->base; |
| |
| fail_slab: |
| if (ws->info.r600_has_virtual_memory) |
| pb_slabs_deinit(&ws->bo_slabs); |
| fail_cache: |
| pb_cache_deinit(&ws->bo_cache); |
| fail1: |
| mtx_unlock(&fd_tab_mutex); |
| if (ws->surf_man) |
| radeon_surface_manager_free(ws->surf_man); |
| if (ws->fd >= 0) |
| close(ws->fd); |
| |
| FREE(ws); |
| return NULL; |
| } |