Chris Wilson | 20b6903 | 2011-06-05 11:20:34 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright © 2011 Intel Corporation |
| 3 | * |
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 5 | * copy of this software and associated documentation files (the "Software"), |
| 6 | * to deal in the Software without restriction, including without limitation |
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 8 | * and/or sell copies of the Software, and to permit persons to whom the |
| 9 | * Software is furnished to do so, subject to the following conditions: |
| 10 | * |
| 11 | * The above copyright notice and this permission notice (including the next |
| 12 | * paragraph) shall be included in all copies or substantial portions of the |
| 13 | * Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 21 | * IN THE SOFTWARE. |
| 22 | * |
| 23 | * Authors: |
| 24 | * Chris Wilson <chris@chris-wilson.co.uk> |
| 25 | * |
| 26 | */ |
| 27 | |
| 28 | /** @file gen3_linear_render_blits.c |
| 29 | * |
| 30 | * This is a test of doing many blits, with a working set |
| 31 | * larger than the aperture size. |
| 32 | * |
| 33 | * The goal is to simply ensure the basics work. |
| 34 | */ |
| 35 | |
| 36 | #include <stdlib.h> |
| 37 | #include <stdio.h> |
| 38 | #include <string.h> |
| 39 | #include <assert.h> |
| 40 | #include <fcntl.h> |
| 41 | #include <inttypes.h> |
| 42 | #include <errno.h> |
| 43 | #include <sys/stat.h> |
| 44 | #include <sys/time.h> |
| 45 | #include "drm.h" |
| 46 | #include "i915_drm.h" |
| 47 | #include "drmtest.h" |
| 48 | #include "intel_gpu_tools.h" |
| 49 | |
| 50 | #include "i915_reg.h" |
| 51 | #include "i915_3d.h" |
| 52 | |
| 53 | #define WIDTH 512 |
| 54 | #define HEIGHT 512 |
| 55 | |
| 56 | static uint32_t linear[WIDTH*HEIGHT]; |
| 57 | |
| 58 | static inline uint32_t pack_float(float f) |
| 59 | { |
| 60 | union { |
| 61 | uint32_t dw; |
| 62 | float f; |
| 63 | } u; |
| 64 | u.f = f; |
| 65 | return u.dw; |
| 66 | } |
| 67 | |
| 68 | static uint32_t gem_create(int fd, int size) |
| 69 | { |
| 70 | struct drm_i915_gem_create create; |
| 71 | |
| 72 | create.handle = 0; |
| 73 | create.size = size; |
| 74 | (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create); |
| 75 | assert(create.handle); |
| 76 | |
| 77 | return create.handle; |
| 78 | } |
| 79 | |
Chris Wilson | 20b6903 | 2011-06-05 11:20:34 +0100 | [diff] [blame] | 80 | static uint64_t |
| 81 | gem_aperture_size(int fd) |
| 82 | { |
| 83 | struct drm_i915_gem_get_aperture aperture; |
| 84 | |
| 85 | aperture.aper_size = 512*1024*1024; |
| 86 | (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); |
| 87 | return aperture.aper_size; |
| 88 | } |
| 89 | |
| 90 | static void |
Chris Wilson | 20b6903 | 2011-06-05 11:20:34 +0100 | [diff] [blame] | 91 | gem_read(int fd, uint32_t handle, int offset, int size, void *buf) |
| 92 | { |
| 93 | struct drm_i915_gem_pread pread; |
| 94 | int ret; |
| 95 | |
| 96 | pread.handle = handle; |
| 97 | pread.offset = offset; |
| 98 | pread.size = size; |
| 99 | pread.data_ptr = (uintptr_t)buf; |
| 100 | ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_PREAD, &pread); |
| 101 | assert(ret == 0); |
| 102 | } |
| 103 | |
| 104 | static uint32_t fill_reloc(struct drm_i915_gem_relocation_entry *reloc, |
| 105 | uint32_t offset, |
| 106 | uint32_t handle, |
| 107 | uint32_t read_domain, |
| 108 | uint32_t write_domain) |
| 109 | { |
| 110 | reloc->target_handle = handle; |
| 111 | reloc->delta = 0; |
| 112 | reloc->offset = offset * sizeof(uint32_t); |
| 113 | reloc->presumed_offset = 0; |
| 114 | reloc->read_domains = read_domain; |
| 115 | reloc->write_domain = write_domain; |
| 116 | |
| 117 | return reloc->presumed_offset + reloc->delta; |
| 118 | } |
| 119 | |
| 120 | static void |
| 121 | copy(int fd, uint32_t dst, uint32_t src) |
| 122 | { |
| 123 | uint32_t batch[1024], *b = batch; |
| 124 | struct drm_i915_gem_relocation_entry reloc[2], *r = reloc; |
| 125 | struct drm_i915_gem_exec_object2 obj[3]; |
| 126 | struct drm_i915_gem_execbuffer2 exec; |
| 127 | uint32_t handle; |
| 128 | int ret; |
| 129 | |
| 130 | /* invariant state */ |
| 131 | *b++ = (_3DSTATE_AA_CMD | |
| 132 | AA_LINE_ECAAR_WIDTH_ENABLE | |
| 133 | AA_LINE_ECAAR_WIDTH_1_0 | |
| 134 | AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); |
| 135 | *b++ = (_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | |
| 136 | IAB_MODIFY_ENABLE | |
| 137 | IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) | |
| 138 | IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE << |
| 139 | IAB_SRC_FACTOR_SHIFT) | |
| 140 | IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO << |
| 141 | IAB_DST_FACTOR_SHIFT)); |
| 142 | *b++ = (_3DSTATE_DFLT_DIFFUSE_CMD); |
| 143 | *b++ = (0); |
| 144 | *b++ = (_3DSTATE_DFLT_SPEC_CMD); |
| 145 | *b++ = (0); |
| 146 | *b++ = (_3DSTATE_DFLT_Z_CMD); |
| 147 | *b++ = (0); |
| 148 | *b++ = (_3DSTATE_COORD_SET_BINDINGS | |
| 149 | CSB_TCB(0, 0) | |
| 150 | CSB_TCB(1, 1) | |
| 151 | CSB_TCB(2, 2) | |
| 152 | CSB_TCB(3, 3) | |
| 153 | CSB_TCB(4, 4) | |
| 154 | CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7)); |
| 155 | *b++ = (_3DSTATE_RASTER_RULES_CMD | |
| 156 | ENABLE_POINT_RASTER_RULE | |
| 157 | OGL_POINT_RASTER_RULE | |
| 158 | ENABLE_LINE_STRIP_PROVOKE_VRTX | |
| 159 | ENABLE_TRI_FAN_PROVOKE_VRTX | |
| 160 | LINE_STRIP_PROVOKE_VRTX(1) | |
| 161 | TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D); |
| 162 | *b++ = (_3DSTATE_MODES_4_CMD | |
| 163 | ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) | |
| 164 | ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) | |
| 165 | ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff)); |
| 166 | *b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2); |
| 167 | *b++ = (0x00000000); /* Disable texture coordinate wrap-shortest */ |
| 168 | *b++ = ((1 << S4_POINT_WIDTH_SHIFT) | |
| 169 | S4_LINE_WIDTH_ONE | |
| 170 | S4_CULLMODE_NONE | |
| 171 | S4_VFMT_XY); |
| 172 | *b++ = (0x00000000); /* Stencil. */ |
| 173 | *b++ = (_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); |
| 174 | *b++ = (_3DSTATE_SCISSOR_RECT_0_CMD); |
| 175 | *b++ = (0); |
| 176 | *b++ = (0); |
| 177 | *b++ = (_3DSTATE_DEPTH_SUBRECT_DISABLE); |
| 178 | *b++ = (_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */ |
| 179 | *b++ = (0); |
| 180 | *b++ = (_3DSTATE_STIPPLE); |
| 181 | *b++ = (0x00000000); |
| 182 | *b++ = (_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0); |
| 183 | |
| 184 | /* samler state */ |
| 185 | #define TEX_COUNT 1 |
| 186 | *b++ = (_3DSTATE_MAP_STATE | (3 * TEX_COUNT)); |
| 187 | *b++ = ((1 << TEX_COUNT) - 1); |
| 188 | *b = fill_reloc(r++, b-batch, src, I915_GEM_DOMAIN_SAMPLER, 0); b++; |
| 189 | *b++ = (MAPSURF_32BIT | MT_32BIT_ARGB8888 | |
| 190 | (HEIGHT - 1) << MS3_HEIGHT_SHIFT | |
| 191 | (WIDTH - 1) << MS3_WIDTH_SHIFT); |
| 192 | *b++ = ((WIDTH-1) << MS4_PITCH_SHIFT); |
| 193 | |
| 194 | *b++ = (_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT)); |
| 195 | *b++ = ((1 << TEX_COUNT) - 1); |
| 196 | *b++ = (MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT | |
| 197 | FILTER_NEAREST << SS2_MAG_FILTER_SHIFT | |
| 198 | FILTER_NEAREST << SS2_MIN_FILTER_SHIFT); |
| 199 | *b++ = (TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT | |
| 200 | TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT | |
| 201 | 0 << SS3_TEXTUREMAP_INDEX_SHIFT); |
| 202 | *b++ = (0x00000000); |
| 203 | |
| 204 | /* render target state */ |
| 205 | *b++ = (_3DSTATE_BUF_INFO_CMD); |
| 206 | *b++ = (BUF_3D_ID_COLOR_BACK | WIDTH*4); |
| 207 | *b = fill_reloc(r++, b-batch, dst, |
| 208 | I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); |
| 209 | b++; |
| 210 | |
| 211 | *b++ = (_3DSTATE_DST_BUF_VARS_CMD); |
| 212 | *b++ = (COLR_BUF_ARGB8888 | |
| 213 | DSTORG_HORT_BIAS(0x8) | |
| 214 | DSTORG_VERT_BIAS(0x8)); |
| 215 | |
| 216 | /* draw rect is unconditional */ |
| 217 | *b++ = (_3DSTATE_DRAW_RECT_CMD); |
| 218 | *b++ = (0x00000000); |
| 219 | *b++ = (0x00000000); /* ymin, xmin */ |
| 220 | *b++ = (DRAW_YMAX(HEIGHT - 1) | |
| 221 | DRAW_XMAX(WIDTH - 1)); |
| 222 | /* yorig, xorig (relate to color buffer?) */ |
| 223 | *b++ = (0x00000000); |
| 224 | |
| 225 | /* texfmt */ |
| 226 | *b++ = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) | 2); |
| 227 | *b++ = ((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT)); |
| 228 | *b++ = (~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT) | |
| 229 | S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D)); |
| 230 | *b++ = (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE | |
| 231 | BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | |
| 232 | BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | |
| 233 | BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT); |
| 234 | |
| 235 | /* pixel shader */ |
| 236 | *b++ = (_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2)); |
| 237 | /* decl FS_T0 */ |
| 238 | *b++ = (D0_DCL | |
| 239 | REG_TYPE(FS_T0) << D0_TYPE_SHIFT | |
| 240 | REG_NR(FS_T0) << D0_NR_SHIFT | |
| 241 | ((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); |
| 242 | *b++ = (0); |
| 243 | *b++ = (0); |
| 244 | /* decl FS_S0 */ |
| 245 | *b++ = (D0_DCL | |
| 246 | (REG_TYPE(FS_S0) << D0_TYPE_SHIFT) | |
| 247 | (REG_NR(FS_S0) << D0_NR_SHIFT) | |
| 248 | ((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); |
| 249 | *b++ = (0); |
| 250 | *b++ = (0); |
| 251 | /* texld(FS_OC, FS_S0, FS_T0 */ |
| 252 | *b++ = (T0_TEXLD | |
| 253 | (REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) | |
| 254 | (REG_NR(FS_OC) << T0_DEST_NR_SHIFT) | |
| 255 | (REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT)); |
| 256 | *b++ = ((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) | |
| 257 | (REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT)); |
| 258 | *b++ = (0); |
| 259 | |
| 260 | *b++ = (PRIM3D_RECTLIST | (3*4 - 1)); |
| 261 | *b++ = pack_float(WIDTH); |
| 262 | *b++ = pack_float(HEIGHT); |
| 263 | *b++ = pack_float(WIDTH); |
| 264 | *b++ = pack_float(HEIGHT); |
| 265 | |
| 266 | *b++ = pack_float(0); |
| 267 | *b++ = pack_float(HEIGHT); |
| 268 | *b++ = pack_float(0); |
| 269 | *b++ = pack_float(HEIGHT); |
| 270 | |
| 271 | *b++ = pack_float(0); |
| 272 | *b++ = pack_float(0); |
| 273 | *b++ = pack_float(0); |
| 274 | *b++ = pack_float(0); |
| 275 | |
| 276 | *b++ = MI_BATCH_BUFFER_END; |
| 277 | if ((b - batch) & 1) |
| 278 | *b++ = 0; |
| 279 | |
| 280 | assert(b - batch <= 1024); |
| 281 | handle = gem_create(fd, 4096); |
Daniel Vetter | 319638b | 2012-01-10 15:31:11 +0100 | [diff] [blame^] | 282 | gem_write(fd, handle, 0, batch, (b-batch)*sizeof(batch[0])); |
Chris Wilson | 20b6903 | 2011-06-05 11:20:34 +0100 | [diff] [blame] | 283 | |
| 284 | assert(r-reloc == 2); |
| 285 | |
| 286 | obj[0].handle = dst; |
| 287 | obj[0].relocation_count = 0; |
| 288 | obj[0].relocs_ptr = 0; |
| 289 | obj[0].alignment = 0; |
| 290 | obj[0].offset = 0; |
| 291 | obj[0].flags = 0; |
| 292 | obj[0].rsvd1 = 0; |
| 293 | obj[0].rsvd2 = 0; |
| 294 | |
| 295 | obj[1].handle = src; |
| 296 | obj[1].relocation_count = 0; |
| 297 | obj[1].relocs_ptr = 0; |
| 298 | obj[1].alignment = 0; |
| 299 | obj[1].offset = 0; |
| 300 | obj[1].flags = 0; |
| 301 | obj[1].rsvd1 = 0; |
| 302 | obj[1].rsvd2 = 0; |
| 303 | |
| 304 | obj[2].handle = handle; |
| 305 | obj[2].relocation_count = 2; |
| 306 | obj[2].relocs_ptr = (uintptr_t)reloc; |
| 307 | obj[2].alignment = 0; |
| 308 | obj[2].offset = 0; |
| 309 | obj[2].flags = 0; |
| 310 | obj[2].rsvd1 = obj[2].rsvd2 = 0; |
| 311 | |
| 312 | exec.buffers_ptr = (uintptr_t)obj; |
| 313 | exec.buffer_count = 3; |
| 314 | exec.batch_start_offset = 0; |
| 315 | exec.batch_len = (b-batch)*sizeof(batch[0]); |
| 316 | exec.DR1 = exec.DR4 = 0; |
| 317 | exec.num_cliprects = 0; |
| 318 | exec.cliprects_ptr = 0; |
| 319 | exec.flags = 0; |
| 320 | exec.rsvd1 = exec.rsvd2 = 0; |
| 321 | |
| 322 | ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec); |
| 323 | while (ret && errno == EBUSY) { |
| 324 | drmCommandNone(fd, DRM_I915_GEM_THROTTLE); |
| 325 | ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &exec); |
| 326 | } |
| 327 | assert(ret == 0); |
| 328 | |
| 329 | gem_close(fd, handle); |
| 330 | } |
| 331 | |
| 332 | static uint32_t |
| 333 | create_bo(int fd, uint32_t val) |
| 334 | { |
| 335 | uint32_t handle; |
| 336 | int i; |
| 337 | |
| 338 | handle = gem_create(fd, sizeof(linear)); |
| 339 | |
| 340 | /* Fill the BO with dwords starting at val */ |
| 341 | for (i = 0; i < WIDTH*HEIGHT; i++) |
| 342 | linear[i] = val++; |
Daniel Vetter | 319638b | 2012-01-10 15:31:11 +0100 | [diff] [blame^] | 343 | gem_write(fd, handle, 0, linear, sizeof(linear)); |
Chris Wilson | 20b6903 | 2011-06-05 11:20:34 +0100 | [diff] [blame] | 344 | |
| 345 | return handle; |
| 346 | } |
| 347 | |
| 348 | static void |
| 349 | check_bo(int fd, uint32_t handle, uint32_t val) |
| 350 | { |
| 351 | int i; |
| 352 | |
| 353 | gem_read(fd, handle, 0, sizeof(linear), linear); |
| 354 | for (i = 0; i < WIDTH*HEIGHT; i++) { |
| 355 | if (linear[i] != val) { |
| 356 | fprintf(stderr, "Expected 0x%08x, found 0x%08x " |
| 357 | "at offset 0x%08x\n", |
| 358 | val, linear[i], i * 4); |
| 359 | abort(); |
| 360 | } |
| 361 | val++; |
| 362 | } |
| 363 | } |
| 364 | |
| 365 | int main(int argc, char **argv) |
| 366 | { |
| 367 | uint32_t *handle, *start_val; |
| 368 | uint32_t start = 0; |
| 369 | int i, fd, count; |
| 370 | |
| 371 | fd = drm_open_any(); |
| 372 | |
Daniel Vetter | 21ec8c7 | 2011-09-12 20:56:13 +0200 | [diff] [blame] | 373 | if (!IS_GEN3(intel_get_drm_devid(fd))) { |
| 374 | printf("gen3-only test, doing nothing\n"); |
Daniel Vetter | 19d6995 | 2011-09-13 11:05:13 +0200 | [diff] [blame] | 375 | return 77; |
Daniel Vetter | 21ec8c7 | 2011-09-12 20:56:13 +0200 | [diff] [blame] | 376 | } |
| 377 | |
Chris Wilson | 20b6903 | 2011-06-05 11:20:34 +0100 | [diff] [blame] | 378 | count = 0; |
| 379 | if (argc > 1) |
| 380 | count = atoi(argv[1]); |
| 381 | if (count == 0) |
| 382 | count = 3 * gem_aperture_size(fd) / (1024*1024) / 2; |
| 383 | printf("Using %d 1MiB buffers\n", count); |
| 384 | |
| 385 | handle = malloc(sizeof(uint32_t)*count*2); |
| 386 | start_val = handle + count; |
| 387 | |
| 388 | for (i = 0; i < count; i++) { |
| 389 | handle[i] = create_bo(fd, start); |
| 390 | start_val[i] = start; |
| 391 | start += 1024 * 1024 / 4; |
| 392 | } |
| 393 | |
| 394 | printf("Verifying initialisation...\n"); |
| 395 | for (i = 0; i < count; i++) |
| 396 | check_bo(fd, handle[i], start_val[i]); |
| 397 | |
| 398 | printf("Cyclic blits, forward...\n"); |
| 399 | for (i = 0; i < count * 4; i++) { |
| 400 | int src = i % count; |
| 401 | int dst = (i + 1) % count; |
| 402 | |
| 403 | copy(fd, handle[dst], handle[src]); |
| 404 | start_val[dst] = start_val[src]; |
| 405 | } |
| 406 | for (i = 0; i < count; i++) |
| 407 | check_bo(fd, handle[i], start_val[i]); |
| 408 | |
| 409 | printf("Cyclic blits, backward...\n"); |
| 410 | for (i = 0; i < count * 4; i++) { |
| 411 | int src = (i + 1) % count; |
| 412 | int dst = i % count; |
| 413 | |
| 414 | copy(fd, handle[dst], handle[src]); |
| 415 | start_val[dst] = start_val[src]; |
| 416 | } |
| 417 | for (i = 0; i < count; i++) |
| 418 | check_bo(fd, handle[i], start_val[i]); |
| 419 | |
| 420 | printf("Random blits...\n"); |
| 421 | for (i = 0; i < count * 4; i++) { |
| 422 | int src = random() % count; |
| 423 | int dst = random() % count; |
| 424 | |
| 425 | if (src == dst) |
| 426 | continue; |
| 427 | |
| 428 | copy(fd, handle[dst], handle[src]); |
| 429 | start_val[dst] = start_val[src]; |
| 430 | } |
| 431 | for (i = 0; i < count; i++) |
| 432 | check_bo(fd, handle[i], start_val[i]); |
| 433 | |
| 434 | return 0; |
| 435 | } |