Chris Wilson | 16bafdf | 2014-09-04 09:26:24 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright © 2014 Intel Corporation |
| 3 | * |
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 5 | * copy of this software and associated documentation files (the "Software"), |
| 6 | * to deal in the Software without restriction, including without limitation |
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 8 | * and/or sell copies of the Software, and to permit persons to whom the |
| 9 | * Software is furnished to do so, subject to the following conditions: |
| 10 | * |
| 11 | * The above copyright notice and this permission notice (including the next |
| 12 | * paragraph) shall be included in all copies or substantial portions of the |
| 13 | * Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 21 | * IN THE SOFTWARE. |
| 22 | */ |
| 23 | |
| 24 | #include <string.h> |
Daniel Vetter | 3cd45de | 2015-02-10 17:46:43 +0100 | [diff] [blame] | 25 | #include <signal.h> |
Chris Wilson | 16bafdf | 2014-09-04 09:26:24 +0100 | [diff] [blame] | 26 | #include <errno.h> |
Daniel Vetter | 3cd45de | 2015-02-10 17:46:43 +0100 | [diff] [blame] | 27 | #include <sys/types.h> |
| 28 | #include <sys/stat.h> |
| 29 | #include <fcntl.h> |
Chris Wilson | 16bafdf | 2014-09-04 09:26:24 +0100 | [diff] [blame] | 30 | |
Daniel Vetter | 3cd45de | 2015-02-10 17:46:43 +0100 | [diff] [blame] | 31 | #include "drmtest.h" |
Chris Wilson | 16bafdf | 2014-09-04 09:26:24 +0100 | [diff] [blame] | 32 | #include "igt_core.h" |
| 33 | #include "igt_gt.h" |
| 34 | #include "igt_debugfs.h" |
| 35 | #include "ioctl_wrappers.h" |
| 36 | #include "intel_reg.h" |
Daniel Vetter | c66b242 | 2015-02-06 10:49:20 +0100 | [diff] [blame] | 37 | #include "intel_chipset.h" |
Chris Wilson | 16bafdf | 2014-09-04 09:26:24 +0100 | [diff] [blame] | 38 | |
Daniel Vetter | 3cd45de | 2015-02-10 17:46:43 +0100 | [diff] [blame] | 39 | /** |
| 40 | * SECTION:igt_gt |
| 41 | * @short_description: GT support library |
| 42 | * @title: i-g-t gt |
| 43 | * @include: igt_gt.h |
| 44 | * |
| 45 | * This library provides various auxiliary helper functions to handle general |
| 46 | * interactions with the GT like forcewake handling, injecting hangs or stopping |
| 47 | * engines. |
| 48 | */ |
| 49 | |
| 50 | |
| 51 | /** |
| 52 | * igt_require_hang_ring: |
| 53 | * @fd: open i915 drm file descriptor |
| 54 | * @ring: execbuf ring flag |
| 55 | * |
| 56 | * Convenience helper to check whether advanced hang injection is supported by |
| 57 | * the kernel. Uses igt_skip to automatically skip the test/subtest if this |
| 58 | * isn't the case. |
| 59 | */ |
Daniel Vetter | c66b242 | 2015-02-06 10:49:20 +0100 | [diff] [blame] | 60 | void igt_require_hang_ring(int fd, int ring) |
Chris Wilson | 16bafdf | 2014-09-04 09:26:24 +0100 | [diff] [blame] | 61 | { |
Chris Wilson | fb950bc | 2015-04-13 19:04:13 +0100 | [diff] [blame^] | 62 | gem_context_require_ban_period(fd); |
Daniel Vetter | c66b242 | 2015-02-06 10:49:20 +0100 | [diff] [blame] | 63 | igt_require(intel_gen(intel_get_drm_devid(fd)) >= 5); |
Chris Wilson | 16bafdf | 2014-09-04 09:26:24 +0100 | [diff] [blame] | 64 | } |
| 65 | |
Daniel Vetter | 3cd45de | 2015-02-10 17:46:43 +0100 | [diff] [blame] | 66 | /** |
| 67 | * igt_hang_ring: |
| 68 | * @fd: open i915 drm file descriptor |
| 69 | * @ring: execbuf ring flag |
| 70 | * |
| 71 | * This helper function injects a hanging batch into @ring. It returns a |
| 72 | * #igt_hang_ring_t structure which must be passed to igt_post_hang_ring() for |
| 73 | * hang post-processing (after the gpu hang interaction has been tested. |
| 74 | * |
| 75 | * Returns: |
| 76 | * Structure with helper internal state for igt_post_hang_ring(). |
| 77 | */ |
| 78 | igt_hang_ring_t igt_hang_ring(int fd, int ring) |
Chris Wilson | 16bafdf | 2014-09-04 09:26:24 +0100 | [diff] [blame] | 79 | { |
| 80 | struct drm_i915_gem_relocation_entry reloc; |
| 81 | struct drm_i915_gem_execbuffer2 execbuf; |
| 82 | struct drm_i915_gem_exec_object2 exec; |
| 83 | struct local_i915_gem_context_param param; |
| 84 | uint32_t b[8]; |
| 85 | unsigned ban; |
| 86 | unsigned len; |
| 87 | |
| 88 | param.context = 0; |
| 89 | param.size = 0; |
| 90 | param.param = LOCAL_CONTEXT_PARAM_BAN_PERIOD; |
| 91 | param.value = 0; |
| 92 | gem_context_get_param(fd, ¶m); |
| 93 | ban = param.value; |
| 94 | |
| 95 | param.value = 0; |
Daniel Vetter | 8d21b39 | 2015-02-06 11:10:25 +0100 | [diff] [blame] | 96 | gem_context_set_param(fd, ¶m); |
Chris Wilson | 16bafdf | 2014-09-04 09:26:24 +0100 | [diff] [blame] | 97 | |
| 98 | memset(&reloc, 0, sizeof(reloc)); |
| 99 | memset(&exec, 0, sizeof(exec)); |
| 100 | memset(&execbuf, 0, sizeof(execbuf)); |
| 101 | |
| 102 | exec.handle = gem_create(fd, 4096); |
| 103 | exec.relocation_count = 1; |
| 104 | exec.relocs_ptr = (uintptr_t)&reloc; |
| 105 | |
| 106 | len = 2; |
Daniel Vetter | 3cd45de | 2015-02-10 17:46:43 +0100 | [diff] [blame] | 107 | if (intel_gen(intel_get_drm_devid(fd)) >= 8) |
Chris Wilson | 16bafdf | 2014-09-04 09:26:24 +0100 | [diff] [blame] | 108 | len++; |
| 109 | b[0] = MI_BATCH_BUFFER_START | (len - 2); |
| 110 | b[len] = MI_BATCH_BUFFER_END; |
| 111 | b[len+1] = MI_NOOP; |
| 112 | gem_write(fd, exec.handle, 0, b, sizeof(b)); |
| 113 | |
| 114 | reloc.offset = 4; |
| 115 | reloc.target_handle = exec.handle; |
| 116 | reloc.read_domains = I915_GEM_DOMAIN_COMMAND; |
| 117 | |
| 118 | execbuf.buffers_ptr = (uintptr_t)&exec; |
| 119 | execbuf.buffer_count = 1; |
| 120 | execbuf.batch_len = sizeof(b); |
| 121 | execbuf.flags = ring; |
| 122 | gem_execbuf(fd, &execbuf); |
| 123 | |
| 124 | return (struct igt_hang_ring){ exec.handle, ban }; |
| 125 | } |
| 126 | |
Daniel Vetter | 3cd45de | 2015-02-10 17:46:43 +0100 | [diff] [blame] | 127 | /** |
Thomas Wood | 26f4081 | 2015-02-20 11:31:01 +0000 | [diff] [blame] | 128 | * igt_post_hang_ring: |
Daniel Vetter | 3cd45de | 2015-02-10 17:46:43 +0100 | [diff] [blame] | 129 | * @fd: open i915 drm file descriptor |
| 130 | * @arg: hang state from igt_hang_ring() |
| 131 | * |
| 132 | * This function does the necessary post-processing after a gpu hang injected |
| 133 | * with igt_hang_ring(). |
| 134 | */ |
Chris Wilson | 16bafdf | 2014-09-04 09:26:24 +0100 | [diff] [blame] | 135 | void igt_post_hang_ring(int fd, struct igt_hang_ring arg) |
| 136 | { |
| 137 | struct local_i915_gem_context_param param; |
| 138 | |
| 139 | if (arg.handle == 0) |
| 140 | return; |
| 141 | |
| 142 | gem_set_domain(fd, arg.handle, |
| 143 | I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); |
| 144 | gem_close(fd, arg.handle); |
| 145 | |
| 146 | param.context = 0; |
| 147 | param.size = 0; |
| 148 | param.param = LOCAL_CONTEXT_PARAM_BAN_PERIOD; |
| 149 | param.value = arg.ban; |
| 150 | gem_context_set_param(fd, ¶m); |
| 151 | } |
Daniel Vetter | 3cd45de | 2015-02-10 17:46:43 +0100 | [diff] [blame] | 152 | |
| 153 | /* GPU abusers */ |
| 154 | static struct igt_helper_process hang_helper; |
| 155 | static void __attribute__((noreturn)) |
| 156 | hang_helper_process(pid_t pid, int fd) |
| 157 | { |
| 158 | while (1) { |
| 159 | if (kill(pid, 0)) /* Parent has died, so must we. */ |
| 160 | exit(0); |
| 161 | |
| 162 | igt_post_hang_ring(fd, |
| 163 | igt_hang_ring(fd, I915_EXEC_DEFAULT)); |
| 164 | |
| 165 | sleep(1); |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | /** |
| 170 | * igt_fork_hang_helper: |
| 171 | * |
| 172 | * Fork a child process using #igt_fork_helper to hang the default engine |
| 173 | * of the GPU at regular intervals. |
| 174 | * |
| 175 | * This is useful to exercise slow running code (such as aperture placement) |
| 176 | * which needs to be robust against a GPU reset. |
| 177 | * |
| 178 | * In tests with subtests this function can be called outside of failure |
| 179 | * catching code blocks like #igt_fixture or #igt_subtest. |
| 180 | */ |
| 181 | int igt_fork_hang_helper(void) |
| 182 | { |
| 183 | int fd, gen; |
| 184 | |
| 185 | if (igt_only_list_subtests()) |
| 186 | return 1; |
| 187 | |
| 188 | fd = drm_open_any(); |
| 189 | if (fd == -1) |
| 190 | return 0; |
| 191 | |
| 192 | gen = intel_gen(intel_get_drm_devid(fd)); |
| 193 | if (gen < 5) { |
| 194 | close(fd); |
| 195 | return 0; |
| 196 | } |
| 197 | |
| 198 | igt_fork_helper(&hang_helper) |
| 199 | hang_helper_process(getppid(), fd); |
| 200 | |
| 201 | close(fd); |
| 202 | return 1; |
| 203 | } |
| 204 | |
| 205 | /** |
| 206 | * igt_stop_hang_helper: |
| 207 | * |
| 208 | * Stops the child process spawned with igt_fork_hang_helper(). |
| 209 | * |
| 210 | * In tests with subtests this function can be called outside of failure |
| 211 | * catching code blocks like #igt_fixture or #igt_subtest. |
| 212 | */ |
| 213 | void igt_stop_hang_helper(void) |
| 214 | { |
| 215 | if (igt_only_list_subtests()) |
| 216 | return; |
| 217 | |
| 218 | igt_stop_helper(&hang_helper); |
| 219 | } |
| 220 | |
| 221 | /** |
| 222 | * igt_open_forcewake_handle: |
| 223 | * |
| 224 | * This functions opens the debugfs forcewake file and so prevents the GT from |
| 225 | * suspending. The reference is automatically dropped when the is closed. |
| 226 | * |
| 227 | * Returns: |
| 228 | * The file descriptor of the forcewake handle or -1 if that didn't work out. |
| 229 | */ |
| 230 | int igt_open_forcewake_handle(void) |
| 231 | { |
| 232 | if (getenv("IGT_NO_FORCEWAKE")) |
| 233 | return -1; |
| 234 | return igt_debugfs_open("i915_forcewake_user", O_WRONLY); |
| 235 | } |
| 236 | |
| 237 | /** |
| 238 | * igt_to_stop_ring_flag: |
| 239 | * @ring: the specified ring flag from execbuf ioctl (I915_EXEC_*) |
| 240 | * |
| 241 | * This converts the specified ring to a ring flag to be used |
| 242 | * with igt_get_stop_rings() and igt_set_stop_rings(). |
| 243 | * |
| 244 | * Returns: |
| 245 | * Ring flag for the given ring. |
| 246 | */ |
| 247 | enum stop_ring_flags igt_to_stop_ring_flag(int ring) { |
| 248 | if (ring == I915_EXEC_DEFAULT) |
| 249 | return STOP_RING_RENDER; |
| 250 | |
| 251 | igt_assert(ring && ((ring & ~I915_EXEC_RING_MASK) == 0)); |
| 252 | return 1 << (ring - 1); |
| 253 | } |
| 254 | |
| 255 | static void stop_rings_write(uint32_t mask) |
| 256 | { |
| 257 | int fd; |
| 258 | char buf[80]; |
| 259 | |
| 260 | igt_assert(snprintf(buf, sizeof(buf), "0x%08x", mask) == 10); |
| 261 | fd = igt_debugfs_open("i915_ring_stop", O_WRONLY); |
| 262 | igt_assert(fd >= 0); |
| 263 | |
| 264 | igt_assert(write(fd, buf, strlen(buf)) == strlen(buf)); |
| 265 | close(fd); |
| 266 | } |
| 267 | |
| 268 | /** |
| 269 | * igt_get_stop_rings: |
| 270 | * |
| 271 | * Read current ring flags from 'i915_ring_stop' debugfs entry. |
| 272 | * |
| 273 | * Returns: |
| 274 | * Current ring flags. |
| 275 | */ |
| 276 | enum stop_ring_flags igt_get_stop_rings(void) |
| 277 | { |
| 278 | int fd; |
| 279 | char buf[80]; |
| 280 | int l; |
| 281 | unsigned long long ring_mask; |
| 282 | |
| 283 | fd = igt_debugfs_open("i915_ring_stop", O_RDONLY); |
| 284 | igt_assert(fd >= 0); |
| 285 | l = read(fd, buf, sizeof(buf)-1); |
| 286 | igt_assert(l > 0); |
| 287 | igt_assert(l < sizeof(buf)); |
| 288 | |
| 289 | buf[l] = '\0'; |
| 290 | |
| 291 | close(fd); |
| 292 | |
| 293 | errno = 0; |
| 294 | ring_mask = strtoull(buf, NULL, 0); |
| 295 | igt_assert(errno == 0); |
| 296 | return ring_mask; |
| 297 | } |
| 298 | |
| 299 | /** |
| 300 | * igt_set_stop_rings: |
| 301 | * @flags: Ring flags to write |
| 302 | * |
| 303 | * This writes @flags to 'i915_ring_stop' debugfs entry. Driver will |
| 304 | * prevent the CPU from writing tail pointer for the ring that @flags |
| 305 | * specify. Note that the ring is not stopped right away. Instead any |
| 306 | * further command emissions won't be executed after the flag is set. |
| 307 | * |
| 308 | * This is the least invasive way to make the GPU stuck. Hence you must |
| 309 | * set this after a batch submission with it's own invalid or endless |
| 310 | * looping instructions. In this case it is merely for giving notification |
| 311 | * for the driver that this was simulated hang, as the batch would have |
| 312 | * caused hang in any case. On the other hand if you use a valid or noop |
| 313 | * batch and want to hang the ring (GPU), you must set corresponding flag |
| 314 | * before submitting the batch. |
| 315 | * |
| 316 | * Driver checks periodically if a ring is making any progress, and if |
| 317 | * it is not, it will declare the ring to be hung and will reset the GPU. |
| 318 | * After reset, the driver will clear flags in 'i915_ring_stop' |
| 319 | * |
| 320 | * Note: Always when hanging the GPU, use igt_set_stop_rings() to |
| 321 | * notify the driver. Driver controls hang log messaging based on |
| 322 | * these flags and thus prevents false positives on logs. |
| 323 | */ |
| 324 | void igt_set_stop_rings(enum stop_ring_flags flags) |
| 325 | { |
| 326 | enum stop_ring_flags current; |
| 327 | |
| 328 | igt_assert((flags & ~(STOP_RING_ALL | |
| 329 | STOP_RING_ALLOW_BAN | |
| 330 | STOP_RING_ALLOW_ERRORS)) == 0); |
| 331 | |
| 332 | current = igt_get_stop_rings(); |
| 333 | igt_assert_f(flags == 0 || current == 0, |
| 334 | "previous i915_ring_stop is still 0x%x\n", current); |
| 335 | |
| 336 | stop_rings_write(flags); |
| 337 | current = igt_get_stop_rings(); |
| 338 | igt_warn_on_f(current != flags, |
| 339 | "i915_ring_stop readback mismatch 0x%x vs 0x%x\n", |
| 340 | flags, current); |
| 341 | } |