| /* |
| * Copyright © 2011 Daniel Vetter |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| * |
| * Authors: |
| * Daniel Vetter <daniel.vetter@ffwll.ch> |
| * |
| * Partially based upon gem_tiled_fence_blits.c |
| */ |
| |
| /** @file gem_stress.c |
| * |
| * This is a general gem coherency test. It's designed to eventually replicate |
| * any possible sequence of access patterns. It works by copying a set of tiles |
| * between two sets of backing buffer objects, randomly permutating the assinged |
| * position on each copy operations. |
| * |
| * The copy operation are done in tiny portions (to reduce any race windows |
| * for corruptions, hence increasing the chances for observing one) and are |
| * constantly switched between all means to copy stuff (fenced blitter, unfenced |
| * render, mmap, pwrite/read). |
| * |
| * After every complete move of a set tiling parameters of a buffer are randomly |
| * changed to simulate the effects of libdrm caching. |
| * |
| * Buffers are 1mb big to nicely fit into fences on gen2/3. A few are further |
| * split up to test relaxed fencing. Using this to push the average working set |
| * size over the available gtt space forces objects to be mapped as unfenceable |
| * (and as a side-effect tests gtt map/unmap coherency). |
| * |
| * In short: designed for maximum evilness. |
| */ |
| |
| #include "rendercopy.h" |
| |
| #define CMD_POLY_STIPPLE_OFFSET 0x7906 |
| |
| /** TODO: |
| * - beat on relaxed fencing (i.e. mappable/fenceable tracking in the kernel) |
| * - render copy (to check fence tracking and cache coherency management by the |
| * kernel) |
| * - multi-threading: probably just a wrapper script to launch multiple |
| * instances + an option to accordingly reduce the working set |
| * - gen6 inter-ring coherency (needs render copy, first) |
| * - variable buffer size |
| * - add an option to fork a second process that randomly sends signals to the |
| * first one (to check consistency of the kernel recovery paths) |
| */ |
| |
| drm_intel_bufmgr *bufmgr; |
| struct intel_batchbuffer *batch; |
| int drm_fd; |
| int devid; |
| int num_fences; |
| |
| drm_intel_bo *busy_bo; |
| |
| struct option_struct { |
| unsigned scratch_buf_size; |
| unsigned max_dimension; |
| unsigned num_buffers; |
| int trace_tile; |
| int no_hw; |
| int gpu_busy_load; |
| int use_render; |
| int use_blt; |
| int forced_tiling; |
| int use_cpu_maps; |
| int total_rounds; |
| int fail; |
| int tiles_per_buf; |
| int ducttape; |
| int tile_size; |
| int check_render_cpyfn; |
| int use_signal_helper; |
| }; |
| |
| struct option_struct options; |
| |
| #define MAX_BUFS 4096 |
| #define SCRATCH_BUF_SIZE 1024*1024 |
| #define BUSY_BUF_SIZE (256*4096) |
| #define TILE_BYTES(size) ((size)*(size)*sizeof(uint32_t)) |
| |
| static struct scratch_buf buffers[2][MAX_BUFS]; |
| /* tile i is at logical position tile_permutation[i] */ |
| static unsigned *tile_permutation; |
| static unsigned num_buffers = 0; |
| static unsigned current_set = 0; |
| static unsigned target_set = 0; |
| static unsigned num_total_tiles = 0; |
| |
| int fence_storm = 0; |
| static int gpu_busy_load = 10; |
| |
| struct { |
| unsigned num_failed; |
| unsigned max_failed_reads; |
| } stats; |
| |
| static void tile2xy(struct scratch_buf *buf, unsigned tile, unsigned *x, unsigned *y) |
| { |
| igt_assert(tile < buf->num_tiles); |
| *x = (tile*options.tile_size) % (buf->stride/sizeof(uint32_t)); |
| *y = ((tile*options.tile_size) / (buf->stride/sizeof(uint32_t))) * options.tile_size; |
| } |
| |
| static void emit_blt(drm_intel_bo *src_bo, uint32_t src_tiling, unsigned src_pitch, |
| unsigned src_x, unsigned src_y, unsigned w, unsigned h, |
| drm_intel_bo *dst_bo, uint32_t dst_tiling, unsigned dst_pitch, |
| unsigned dst_x, unsigned dst_y) |
| { |
| uint32_t cmd_bits = 0; |
| |
| if (IS_965(devid) && src_tiling) { |
| src_pitch /= 4; |
| cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED; |
| } |
| |
| if (IS_965(devid) && dst_tiling) { |
| dst_pitch /= 4; |
| cmd_bits |= XY_SRC_COPY_BLT_DST_TILED; |
| } |
| |
| /* copy lower half to upper half */ |
| BEGIN_BATCH(8); |
| OUT_BATCH(XY_SRC_COPY_BLT_CMD | |
| XY_SRC_COPY_BLT_WRITE_ALPHA | |
| XY_SRC_COPY_BLT_WRITE_RGB | |
| cmd_bits); |
| OUT_BATCH((3 << 24) | /* 32 bits */ |
| (0xcc << 16) | /* copy ROP */ |
| dst_pitch); |
| OUT_BATCH(dst_y << 16 | dst_x); |
| OUT_BATCH((dst_y+h) << 16 | (dst_x+w)); |
| OUT_RELOC_FENCED(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); |
| OUT_BATCH(src_y << 16 | src_x); |
| OUT_BATCH(src_pitch); |
| OUT_RELOC_FENCED(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0); |
| ADVANCE_BATCH(); |
| |
| if (IS_GEN6(devid) || IS_GEN7(devid)) { |
| BEGIN_BATCH(3); |
| OUT_BATCH(XY_SETUP_CLIP_BLT_CMD); |
| OUT_BATCH(0); |
| OUT_BATCH(0); |
| ADVANCE_BATCH(); |
| } |
| } |
| |
| /* All this gem trashing wastes too much cpu time, so give the gpu something to |
| * do to increase changes for races. */ |
| static void keep_gpu_busy(void) |
| { |
| int tmp; |
| |
| tmp = 1 << gpu_busy_load; |
| igt_assert(tmp <= 1024); |
| |
| emit_blt(busy_bo, 0, 4096, 0, 0, tmp, 128, |
| busy_bo, 0, 4096, 0, 128); |
| } |
| |
| static void set_to_cpu_domain(struct scratch_buf *buf, int writing) |
| { |
| gem_set_domain(drm_fd, buf->bo->handle, I915_GEM_DOMAIN_CPU, |
| writing ? I915_GEM_DOMAIN_CPU : 0); |
| } |
| |
| static unsigned int copyfunc_seq = 0; |
| static void (*copyfunc)(struct scratch_buf *src, unsigned src_x, unsigned src_y, |
| struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, |
| unsigned logical_tile_no); |
| |
| /* stride, x, y in units of uint32_t! */ |
| static void cpucpy2d(uint32_t *src, unsigned src_stride, unsigned src_x, unsigned src_y, |
| uint32_t *dst, unsigned dst_stride, unsigned dst_x, unsigned dst_y, |
| unsigned logical_tile_no) |
| { |
| int i, j; |
| int failed = 0; |
| |
| for (i = 0; i < options.tile_size; i++) { |
| for (j = 0; j < options.tile_size; j++) { |
| unsigned dst_ofs = dst_x + j + dst_stride * (dst_y + i); |
| unsigned src_ofs = src_x + j + src_stride * (src_y + i); |
| unsigned expect = logical_tile_no*options.tile_size*options.tile_size |
| + i*options.tile_size + j; |
| uint32_t tmp = src[src_ofs]; |
| if (tmp != expect) { |
| printf("mismatch at tile %i pos %i, read %i, expected %i, diff %i\n", |
| logical_tile_no, i*options.tile_size + j, tmp, expect, (int) tmp - expect); |
| if (options.trace_tile >= 0 && options.fail) |
| igt_fail(1); |
| failed++; |
| } |
| /* when not aborting, correct any errors */ |
| dst[dst_ofs] = expect; |
| } |
| } |
| if (failed && options.fail) |
| igt_fail(1); |
| |
| if (failed > stats.max_failed_reads) |
| stats.max_failed_reads = failed; |
| if (failed) |
| stats.num_failed++; |
| } |
| |
| static void cpu_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, |
| struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, |
| unsigned logical_tile_no) |
| { |
| igt_assert(batch->ptr == batch->buffer); |
| |
| if (options.ducttape) |
| drm_intel_bo_wait_rendering(dst->bo); |
| |
| if (options.use_cpu_maps) { |
| set_to_cpu_domain(src, 0); |
| set_to_cpu_domain(dst, 1); |
| } |
| |
| cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y, |
| dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y, |
| logical_tile_no); |
| } |
| |
| static void prw_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, |
| struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, |
| unsigned logical_tile_no) |
| { |
| uint32_t tmp_tile[options.tile_size*options.tile_size]; |
| int i; |
| |
| igt_assert(batch->ptr == batch->buffer); |
| |
| if (options.ducttape) |
| drm_intel_bo_wait_rendering(dst->bo); |
| |
| if (src->tiling == I915_TILING_NONE) { |
| for (i = 0; i < options.tile_size; i++) { |
| unsigned ofs = src_x*sizeof(uint32_t) + src->stride*(src_y + i); |
| drm_intel_bo_get_subdata(src->bo, ofs, |
| options.tile_size*sizeof(uint32_t), |
| tmp_tile + options.tile_size*i); |
| } |
| } else { |
| if (options.use_cpu_maps) |
| set_to_cpu_domain(src, 0); |
| |
| cpucpy2d(src->data, src->stride/sizeof(uint32_t), src_x, src_y, |
| tmp_tile, options.tile_size, 0, 0, logical_tile_no); |
| } |
| |
| if (dst->tiling == I915_TILING_NONE) { |
| for (i = 0; i < options.tile_size; i++) { |
| unsigned ofs = dst_x*sizeof(uint32_t) + dst->stride*(dst_y + i); |
| drm_intel_bo_subdata(dst->bo, ofs, |
| options.tile_size*sizeof(uint32_t), |
| tmp_tile + options.tile_size*i); |
| } |
| } else { |
| if (options.use_cpu_maps) |
| set_to_cpu_domain(dst, 1); |
| |
| cpucpy2d(tmp_tile, options.tile_size, 0, 0, |
| dst->data, dst->stride/sizeof(uint32_t), dst_x, dst_y, |
| logical_tile_no); |
| } |
| } |
| |
| static void blitter_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, |
| struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, |
| unsigned logical_tile_no) |
| { |
| static unsigned keep_gpu_busy_counter = 0; |
| |
| /* check both edges of the fence usage */ |
| if (keep_gpu_busy_counter & 1 && !fence_storm) |
| keep_gpu_busy(); |
| |
| emit_blt(src->bo, src->tiling, src->stride, src_x, src_y, |
| options.tile_size, options.tile_size, |
| dst->bo, dst->tiling, dst->stride, dst_x, dst_y); |
| |
| if (!(keep_gpu_busy_counter & 1) && !fence_storm) |
| keep_gpu_busy(); |
| |
| keep_gpu_busy_counter++; |
| |
| if (src->tiling) |
| fence_storm--; |
| if (dst->tiling) |
| fence_storm--; |
| |
| if (fence_storm <= 1) { |
| fence_storm = 0; |
| intel_batchbuffer_flush(batch); |
| } |
| } |
| |
| static void render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, |
| struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, |
| unsigned logical_tile_no) |
| { |
| static unsigned keep_gpu_busy_counter = 0; |
| render_copyfunc_t rendercopy = get_render_copyfunc(devid); |
| |
| /* check both edges of the fence usage */ |
| if (keep_gpu_busy_counter & 1) |
| keep_gpu_busy(); |
| |
| if (rendercopy) |
| rendercopy(batch, src, src_x, src_y, |
| options.tile_size, options.tile_size, |
| dst, dst_x, dst_y); |
| else |
| blitter_copyfunc(src, src_x, src_y, |
| dst, dst_x, dst_y, |
| logical_tile_no); |
| if (!(keep_gpu_busy_counter & 1)) |
| keep_gpu_busy(); |
| |
| keep_gpu_busy_counter++; |
| intel_batchbuffer_flush(batch); |
| } |
| |
| static void next_copyfunc(int tile) |
| { |
| if (fence_storm) { |
| if (tile == options.trace_tile) |
| printf(" using fence storm\n"); |
| return; |
| } |
| |
| if (copyfunc_seq % 61 == 0 |
| && options.forced_tiling != I915_TILING_NONE) { |
| if (tile == options.trace_tile) |
| printf(" using fence storm\n"); |
| fence_storm = num_fences; |
| copyfunc = blitter_copyfunc; |
| } else if (copyfunc_seq % 17 == 0) { |
| if (tile == options.trace_tile) |
| printf(" using cpu\n"); |
| copyfunc = cpu_copyfunc; |
| } else if (copyfunc_seq % 19 == 0) { |
| if (tile == options.trace_tile) |
| printf(" using prw\n"); |
| copyfunc = prw_copyfunc; |
| } else if (copyfunc_seq % 3 == 0 && options.use_render) { |
| if (tile == options.trace_tile) |
| printf(" using render\n"); |
| copyfunc = render_copyfunc; |
| } else if (options.use_blt){ |
| if (tile == options.trace_tile) |
| printf(" using blitter\n"); |
| copyfunc = blitter_copyfunc; |
| } else if (options.use_render){ |
| if (tile == options.trace_tile) |
| printf(" using render\n"); |
| copyfunc = render_copyfunc; |
| } else { |
| copyfunc = cpu_copyfunc; |
| } |
| |
| copyfunc_seq++; |
| } |
| |
| static void fan_out(void) |
| { |
| uint32_t tmp_tile[options.tile_size*options.tile_size]; |
| uint32_t seq = 0; |
| int i, k; |
| unsigned tile, buf_idx, x, y; |
| |
| for (i = 0; i < num_total_tiles; i++) { |
| tile = i; |
| buf_idx = tile / options.tiles_per_buf; |
| tile %= options.tiles_per_buf; |
| |
| tile2xy(&buffers[current_set][buf_idx], tile, &x, &y); |
| |
| for (k = 0; k < options.tile_size*options.tile_size; k++) |
| tmp_tile[k] = seq++; |
| |
| if (options.use_cpu_maps) |
| set_to_cpu_domain(&buffers[current_set][buf_idx], 1); |
| |
| cpucpy2d(tmp_tile, options.tile_size, 0, 0, |
| buffers[current_set][buf_idx].data, |
| buffers[current_set][buf_idx].stride / sizeof(uint32_t), |
| x, y, i); |
| } |
| |
| for (i = 0; i < num_total_tiles; i++) |
| tile_permutation[i] = i; |
| } |
| |
| static void fan_in_and_check(void) |
| { |
| uint32_t tmp_tile[options.tile_size*options.tile_size]; |
| unsigned tile, buf_idx, x, y; |
| int i; |
| for (i = 0; i < num_total_tiles; i++) { |
| tile = tile_permutation[i]; |
| buf_idx = tile / options.tiles_per_buf; |
| tile %= options.tiles_per_buf; |
| |
| tile2xy(&buffers[current_set][buf_idx], tile, &x, &y); |
| |
| if (options.use_cpu_maps) |
| set_to_cpu_domain(&buffers[current_set][buf_idx], 0); |
| |
| cpucpy2d(buffers[current_set][buf_idx].data, |
| buffers[current_set][buf_idx].stride / sizeof(uint32_t), |
| x, y, |
| tmp_tile, options.tile_size, 0, 0, |
| i); |
| } |
| } |
| |
| static void sanitize_stride(struct scratch_buf *buf) |
| { |
| |
| if (buf_height(buf) > options.max_dimension) |
| buf->stride = buf->size / options.max_dimension; |
| |
| if (buf_height(buf) < options.tile_size) |
| buf->stride = buf->size / options.tile_size; |
| |
| if (buf_width(buf) < options.tile_size) |
| buf->stride = options.tile_size * sizeof(uint32_t); |
| |
| igt_assert(buf->stride <= 8192); |
| igt_assert(buf_width(buf) <= options.max_dimension); |
| igt_assert(buf_height(buf) <= options.max_dimension); |
| |
| igt_assert(buf_width(buf) >= options.tile_size); |
| igt_assert(buf_height(buf) >= options.tile_size); |
| |
| } |
| |
| static void init_buffer(struct scratch_buf *buf, unsigned size) |
| { |
| buf->bo = drm_intel_bo_alloc(bufmgr, "tiled bo", size, 4096); |
| buf->size = size; |
| igt_assert(buf->bo); |
| buf->tiling = I915_TILING_NONE; |
| buf->stride = 4096; |
| |
| sanitize_stride(buf); |
| |
| if (options.no_hw) |
| buf->data = malloc(size); |
| else { |
| if (options.use_cpu_maps) |
| drm_intel_bo_map(buf->bo, 1); |
| else |
| drm_intel_gem_bo_map_gtt(buf->bo); |
| buf->data = buf->bo->virtual; |
| } |
| |
| buf->num_tiles = options.tiles_per_buf; |
| } |
| |
| static void exchange_buf(void *array, unsigned i, unsigned j) |
| { |
| struct scratch_buf *buf_arr, tmp; |
| buf_arr = array; |
| |
| memcpy(&tmp, &buf_arr[i], sizeof(struct scratch_buf)); |
| memcpy(&buf_arr[i], &buf_arr[j], sizeof(struct scratch_buf)); |
| memcpy(&buf_arr[j], &tmp, sizeof(struct scratch_buf)); |
| } |
| |
| |
| static void init_set(unsigned set) |
| { |
| long int r; |
| int i; |
| |
| igt_permute_array(buffers[set], num_buffers, exchange_buf); |
| |
| if (current_set == 1 && options.gpu_busy_load == 0) { |
| gpu_busy_load++; |
| if (gpu_busy_load > 10) |
| gpu_busy_load = 6; |
| } |
| |
| for (i = 0; i < num_buffers; i++) { |
| r = random(); |
| if ((r & 3) != 0) |
| continue; |
| r >>= 2; |
| |
| if ((r & 3) != 0) |
| buffers[set][i].tiling = I915_TILING_X; |
| else |
| buffers[set][i].tiling = I915_TILING_NONE; |
| r >>= 2; |
| if (options.forced_tiling >= 0) |
| buffers[set][i].tiling = options.forced_tiling; |
| |
| if (buffers[set][i].tiling == I915_TILING_NONE) { |
| /* min 64 byte stride */ |
| r %= 8; |
| buffers[set][i].stride = 64 * (1 << r); |
| } else if (IS_GEN2(devid)) { |
| /* min 128 byte stride */ |
| r %= 7; |
| buffers[set][i].stride = 128 * (1 << r); |
| } else { |
| /* min 512 byte stride */ |
| r %= 5; |
| buffers[set][i].stride = 512 * (1 << r); |
| } |
| |
| sanitize_stride(&buffers[set][i]); |
| |
| gem_set_tiling(drm_fd, buffers[set][i].bo->handle, |
| buffers[set][i].tiling, |
| buffers[set][i].stride); |
| |
| if (options.trace_tile != -1 && i == options.trace_tile/options.tiles_per_buf) |
| printf("changing buffer %i containing tile %i: tiling %i, stride %i\n", i, |
| options.trace_tile, |
| buffers[set][i].tiling, buffers[set][i].stride); |
| } |
| } |
| |
| static void exchange_uint(void *array, unsigned i, unsigned j) |
| { |
| unsigned *i_arr = array; |
| unsigned i_tmp; |
| |
| i_tmp = i_arr[i]; |
| i_arr[i] = i_arr[j]; |
| i_arr[j] = i_tmp; |
| } |
| |
| static void copy_tiles(unsigned *permutation) |
| { |
| unsigned src_tile, src_buf_idx, src_x, src_y; |
| unsigned dst_tile, dst_buf_idx, dst_x, dst_y; |
| struct scratch_buf *src_buf, *dst_buf; |
| int i, idx; |
| for (i = 0; i < num_total_tiles; i++) { |
| /* tile_permutation is independent of current_permutation, so |
| * abuse it to randomize the order of the src bos */ |
| idx = tile_permutation[i]; |
| src_buf_idx = idx / options.tiles_per_buf; |
| src_tile = idx % options.tiles_per_buf; |
| src_buf = &buffers[current_set][src_buf_idx]; |
| |
| tile2xy(src_buf, src_tile, &src_x, &src_y); |
| |
| dst_buf_idx = permutation[idx] / options.tiles_per_buf; |
| dst_tile = permutation[idx] % options.tiles_per_buf; |
| dst_buf = &buffers[target_set][dst_buf_idx]; |
| |
| tile2xy(dst_buf, dst_tile, &dst_x, &dst_y); |
| |
| if (options.trace_tile == i) |
| printf("copying tile %i from %i (%i, %i) to %i (%i, %i)", i, |
| tile_permutation[i], src_buf_idx, src_tile, |
| permutation[idx], dst_buf_idx, dst_tile); |
| |
| if (options.no_hw) { |
| cpucpy2d(src_buf->data, |
| src_buf->stride / sizeof(uint32_t), |
| src_x, src_y, |
| dst_buf->data, |
| dst_buf->stride / sizeof(uint32_t), |
| dst_x, dst_y, |
| i); |
| } else { |
| next_copyfunc(i); |
| |
| copyfunc(src_buf, src_x, src_y, dst_buf, dst_x, dst_y, |
| i); |
| } |
| } |
| |
| intel_batchbuffer_flush(batch); |
| } |
| |
| static int get_num_fences(void) |
| { |
| int val; |
| |
| val = gem_available_fences(drm_fd); |
| |
| printf ("total %d fences\n", val); |
| igt_assert(val > 4); |
| |
| return val - 2; |
| } |
| |
| static void sanitize_tiles_per_buf(void) |
| { |
| if (options.tiles_per_buf > options.scratch_buf_size / TILE_BYTES(options.tile_size)) |
| options.tiles_per_buf = options.scratch_buf_size / TILE_BYTES(options.tile_size); |
| } |
| |
| static void parse_options(int argc, char **argv) |
| { |
| int c, tmp; |
| int option_index = 0; |
| static struct option long_options[] = { |
| {"no-hw", 0, 0, 'd'}, |
| {"buf-size", 1, 0, 's'}, |
| {"gpu-busy-load", 1, 0, 'g'}, |
| {"no-signals", 0, 0, 'S'}, |
| {"buffer-count", 1, 0, 'c'}, |
| {"trace-tile", 1, 0, 't'}, |
| {"disable-blt", 0, 0, 'b'}, |
| {"disable-render", 0, 0, 'r'}, |
| {"untiled", 0, 0, 'u'}, |
| {"x-tiled", 0, 0, 'x'}, |
| {"use-cpu-maps", 0, 0, 'm'}, |
| {"rounds", 1, 0, 'o'}, |
| {"no-fail", 0, 0, 'f'}, |
| {"tiles-per-buf", 0, 0, 'p'}, |
| #define DUCTAPE 0xdead0001 |
| {"remove-duct-tape", 0, 0, DUCTAPE}, |
| #define TILESZ 0xdead0002 |
| {"tile-size", 1, 0, TILESZ}, |
| #define CHCK_RENDER 0xdead0003 |
| {"check-render-cpyfn", 0, 0, CHCK_RENDER}, |
| {NULL, 0, 0, 0}, |
| }; |
| |
| options.scratch_buf_size = 256*4096; |
| options.no_hw = 0; |
| options.use_signal_helper = 1; |
| options.gpu_busy_load = 0; |
| options.num_buffers = 0; |
| options.trace_tile = -1; |
| options.use_render = 1; |
| options.use_blt = 1; |
| options.forced_tiling = -1; |
| options.use_cpu_maps = 0; |
| options.total_rounds = 512; |
| options.fail = 1; |
| options.ducttape = 1; |
| options.tile_size = 16; |
| options.tiles_per_buf = options.scratch_buf_size / TILE_BYTES(options.tile_size); |
| options.check_render_cpyfn = 0; |
| |
| while((c = getopt_long(argc, argv, "ds:g:c:t:rbuxmo:fp:", |
| long_options, &option_index)) != -1) { |
| switch(c) { |
| case 'd': |
| options.no_hw = 1; |
| printf("no-hw debug mode\n"); |
| break; |
| case 'S': |
| options.use_signal_helper = 0; |
| printf("disabling that pesky nuisance who keeps interrupting us\n"); |
| break; |
| case 's': |
| tmp = atoi(optarg); |
| if (tmp < options.tile_size*8192) |
| printf("scratch buffer size needs to be at least %i\n", |
| options.tile_size*8192); |
| else if (tmp & (tmp - 1)) { |
| printf("scratch buffer size needs to be a power-of-two\n"); |
| } else { |
| printf("fixed scratch buffer size to %u\n", tmp); |
| options.scratch_buf_size = tmp; |
| sanitize_tiles_per_buf(); |
| } |
| break; |
| case 'g': |
| tmp = atoi(optarg); |
| if (tmp < 0 || tmp > 10) |
| printf("gpu busy load needs to be bigger than 0 and smaller than 10\n"); |
| else { |
| printf("gpu busy load factor set to %i\n", tmp); |
| gpu_busy_load = options.gpu_busy_load = tmp; |
| } |
| break; |
| case 'c': |
| options.num_buffers = atoi(optarg); |
| printf("buffer count set to %i\n", options.num_buffers); |
| break; |
| case 't': |
| options.trace_tile = atoi(optarg); |
| printf("tracing tile %i\n", options.trace_tile); |
| break; |
| case 'r': |
| options.use_render = 0; |
| printf("disabling render copy\n"); |
| break; |
| case 'b': |
| options.use_blt = 0; |
| printf("disabling blt copy\n"); |
| break; |
| case 'u': |
| options.forced_tiling = I915_TILING_NONE; |
| printf("disabling tiling\n"); |
| break; |
| case 'x': |
| if (options.use_cpu_maps) { |
| printf("tiling not possible with cpu maps\n"); |
| } else { |
| options.forced_tiling = I915_TILING_X; |
| printf("using only X-tiling\n"); |
| } |
| break; |
| case 'm': |
| options.use_cpu_maps = 1; |
| options.forced_tiling = I915_TILING_NONE; |
| printf("disabling tiling\n"); |
| break; |
| case 'o': |
| options.total_rounds = atoi(optarg); |
| printf("total rounds %i\n", options.total_rounds); |
| break; |
| case 'f': |
| options.fail = 0; |
| printf("not failing when detecting errors\n"); |
| break; |
| case 'p': |
| options.tiles_per_buf = atoi(optarg); |
| printf("tiles per buffer %i\n", options.tiles_per_buf); |
| break; |
| case DUCTAPE: |
| options.ducttape = 0; |
| printf("applying duct-tape\n"); |
| break; |
| case TILESZ: |
| options.tile_size = atoi(optarg); |
| sanitize_tiles_per_buf(); |
| printf("til size %i\n", options.tile_size); |
| break; |
| case CHCK_RENDER: |
| options.check_render_cpyfn = 1; |
| printf("checking render copy function\n"); |
| break; |
| default: |
| printf("unkown command options\n"); |
| break; |
| } |
| } |
| |
| if (optind < argc) |
| printf("unkown command options\n"); |
| |
| /* actually 32767, according to docs, but that kills our nice pot calculations. */ |
| options.max_dimension = 16*1024; |
| if (options.use_render) { |
| if (IS_GEN2(devid) || IS_GEN3(devid)) |
| options.max_dimension = 2048; |
| else |
| options.max_dimension = 8192; |
| } |
| printf("Limiting buffer to %dx%d\n", |
| options.max_dimension, options.max_dimension); |
| } |
| |
| static void init(void) |
| { |
| int i; |
| unsigned tmp; |
| |
| if (options.num_buffers == 0) { |
| tmp = gem_aperture_size(drm_fd); |
| tmp = tmp > 256*(1024*1024) ? 256*(1024*1024) : tmp; |
| num_buffers = 2 * tmp / options.scratch_buf_size / 3; |
| num_buffers /= 2; |
| printf("using %u buffers\n", num_buffers); |
| } else |
| num_buffers = options.num_buffers; |
| |
| bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096); |
| drm_intel_bufmgr_gem_enable_reuse(bufmgr); |
| drm_intel_bufmgr_gem_enable_fenced_relocs(bufmgr); |
| num_fences = get_num_fences(); |
| batch = intel_batchbuffer_alloc(bufmgr, devid); |
| |
| busy_bo = drm_intel_bo_alloc(bufmgr, "tiled bo", BUSY_BUF_SIZE, 4096); |
| if (options.forced_tiling >= 0) |
| gem_set_tiling(drm_fd, busy_bo->handle, options.forced_tiling, 4096); |
| |
| for (i = 0; i < num_buffers; i++) { |
| init_buffer(&buffers[0][i], options.scratch_buf_size); |
| init_buffer(&buffers[1][i], options.scratch_buf_size); |
| |
| num_total_tiles += buffers[0][i].num_tiles; |
| } |
| current_set = 0; |
| |
| /* just in case it helps reproducability */ |
| srandom(0xdeadbeef); |
| } |
| |
| static void check_render_copyfunc(void) |
| { |
| struct scratch_buf src, dst; |
| uint32_t *ptr; |
| int i, j, pass; |
| |
| if (!options.check_render_cpyfn) |
| return; |
| |
| init_buffer(&src, options.scratch_buf_size); |
| init_buffer(&dst, options.scratch_buf_size); |
| |
| for (pass = 0; pass < 16; pass++) { |
| int sx = random() % (buf_width(&src)-options.tile_size); |
| int sy = random() % (buf_height(&src)-options.tile_size); |
| int dx = random() % (buf_width(&dst)-options.tile_size); |
| int dy = random() % (buf_height(&dst)-options.tile_size); |
| |
| if (options.use_cpu_maps) |
| set_to_cpu_domain(&src, 1); |
| |
| memset(src.data, 0xff, options.scratch_buf_size); |
| for (j = 0; j < options.tile_size; j++) { |
| ptr = (uint32_t*)((char *)src.data + sx*4 + (sy+j) * src.stride); |
| for (i = 0; i < options.tile_size; i++) |
| ptr[i] = j * options.tile_size + i; |
| } |
| |
| render_copyfunc(&src, sx, sy, &dst, dx, dy, 0); |
| |
| if (options.use_cpu_maps) |
| set_to_cpu_domain(&dst, 0); |
| |
| for (j = 0; j < options.tile_size; j++) { |
| ptr = (uint32_t*)((char *)dst.data + dx*4 + (dy+j) * dst.stride); |
| for (i = 0; i < options.tile_size; i++) |
| if (ptr[i] != j * options.tile_size + i) { |
| printf("render copyfunc mismatch at (%d, %d): found %d, expected %d\n", |
| i, j, ptr[i], j*options.tile_size + i); |
| } |
| } |
| } |
| } |
| |
| |
| int main(int argc, char **argv) |
| { |
| int i, j; |
| unsigned *current_permutation, *tmp_permutation; |
| |
| drm_fd = drm_open_any(); |
| devid = intel_get_drm_devid(drm_fd); |
| |
| parse_options(argc, argv); |
| |
| /* start our little helper early before too may allocations occur */ |
| if (options.use_signal_helper) |
| igt_fork_signal_helper(); |
| |
| init(); |
| |
| check_render_copyfunc(); |
| |
| tile_permutation = malloc(num_total_tiles*sizeof(uint32_t)); |
| current_permutation = malloc(num_total_tiles*sizeof(uint32_t)); |
| tmp_permutation = malloc(num_total_tiles*sizeof(uint32_t)); |
| igt_assert(tile_permutation); |
| igt_assert(current_permutation); |
| igt_assert(tmp_permutation); |
| |
| fan_out(); |
| |
| for (i = 0; i < options.total_rounds; i++) { |
| printf("round %i\n", i); |
| if (i % 64 == 63) { |
| fan_in_and_check(); |
| printf("everything correct after %i rounds\n", i + 1); |
| } |
| |
| target_set = (current_set + 1) & 1; |
| init_set(target_set); |
| |
| for (j = 0; j < num_total_tiles; j++) |
| current_permutation[j] = j; |
| igt_permute_array(current_permutation, num_total_tiles, exchange_uint); |
| |
| copy_tiles(current_permutation); |
| |
| memcpy(tmp_permutation, tile_permutation, sizeof(unsigned)*num_total_tiles); |
| |
| /* accumulate the permutations */ |
| for (j = 0; j < num_total_tiles; j++) |
| tile_permutation[j] = current_permutation[tmp_permutation[j]]; |
| |
| current_set = target_set; |
| } |
| |
| fan_in_and_check(); |
| |
| fprintf(stderr, "num failed tiles %u, max incoherent bytes %zd\n", |
| stats.num_failed, stats.max_failed_reads*sizeof(uint32_t)); |
| |
| intel_batchbuffer_free(batch); |
| drm_intel_bufmgr_destroy(bufmgr); |
| |
| close(drm_fd); |
| |
| igt_stop_signal_helper(); |
| |
| return 0; |
| } |