Introduce a new debug option to wilfully cause the GPU to hang and for the kernel to accuse of being neglectful. --- src/intel/Makefile.sources | 2 + src/intel/common/gen_debug.c | 1 + src/intel/common/gen_debug.h | 1 + src/intel/common/gen_hang.c | 176 ++++++++++++++++++ src/intel/common/gen_hang.h | 51 +++++ src/intel/common/meson.build | 2 + src/mesa/drivers/dri/i965/intel_batchbuffer.c | 14 ++ 7 files changed, 247 insertions(+) create mode 100644 src/intel/common/gen_hang.c create mode 100644 src/intel/common/gen_hang.h
diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources index 94a28d370e8..9058633abfc 100644 --- a/src/intel/Makefile.sources +++ b/src/intel/Makefile.sources @@ -18,6 +18,8 @@ COMMON_FILES = \ common/gen_disasm.h \ common/gen_defines.h \ common/gen_gem.h \ + common/gen_hang.c \ + common/gen_hang.h \ common/gen_l3_config.c \ common/gen_l3_config.h \ common/gen_urb_config.c \ diff --git a/src/intel/common/gen_debug.c b/src/intel/common/gen_debug.c index a978f2f5818..a4dd3965e13 100644 --- a/src/intel/common/gen_debug.c +++ b/src/intel/common/gen_debug.c @@ -85,6 +85,7 @@ static const struct debug_control debug_control[] = { { "nohiz", DEBUG_NO_HIZ }, { "color", DEBUG_COLOR }, { "reemit", DEBUG_REEMIT }, + { "hang", DEBUG_HANG }, { NULL, 0 } }; diff --git a/src/intel/common/gen_debug.h b/src/intel/common/gen_debug.h index 72d7ca20a39..49a93b87ebc 100644 --- a/src/intel/common/gen_debug.h +++ b/src/intel/common/gen_debug.h @@ -83,6 +83,7 @@ extern uint64_t INTEL_DEBUG; #define DEBUG_NO_HIZ (1ull << 39) #define DEBUG_COLOR (1ull << 40) #define DEBUG_REEMIT (1ull << 41) +#define DEBUG_HANG (1ull << 42) /* These flags are not compatible with the disk shader cache */ #define DEBUG_DISK_CACHE_DISABLE_MASK DEBUG_SHADER_TIME diff --git a/src/intel/common/gen_hang.c b/src/intel/common/gen_hang.c new file mode 100644 index 00000000000..5f0dd4e0640 --- /dev/null +++ b/src/intel/common/gen_hang.c @@ -0,0 +1,176 @@ +/* + * Copyright © 2019 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * \file gen_hang.c + * + * Support for wilfully injecting GPU hangs. + */ + +#include <errno.h> +#include <fcntl.h> +#include <stdint.h> +#include <sys/mman.h> +#include <unistd.h> +#include <xf86drm.h> + +#include "drm-uapi/drm.h" +#include "drm-uapi/i915_drm.h" + +#include "gen_hang.h" + +static uint32_t __gem_create(int fd, uint64_t size) +{ + struct drm_i915_gem_create arg = { .size = size }; + drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &arg); + return arg.handle; +} + +static int __gem_set_caching(int fd, uint32_t handle, unsigned int caching) +{ + struct drm_i915_gem_caching arg = { .handle = handle, .caching = caching }; + return drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING, &arg) ? -errno : 0; +} + +static void * +__gem_mmap(int fd, uint32_t handle, uint64_t offset, uint64_t size) +{ + struct drm_i915_gem_mmap arg = { + .handle = handle, + .offset = offset, + .size = size, + .addr_ptr = -1, + }; + drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP, &arg); + return (void *)(uintptr_t)arg.addr_ptr; +} + +static void __gem_close(int fd, uint32_t handle) +{ + drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &handle); +} + +#define HANG_GENMASK 0xff +#define HANG_ALLOW_PREEMPTION (1 << 8) +#define HANG_IMMEDIATE (1 << 9) + +int +gen_inject_hang(int fd, uint32_t ctx, unsigned int flags) +{ + struct drm_i915_gem_relocation_entry reloc[2] = {}; + struct drm_i915_gem_exec_object2 obj = { + .handle = __gem_create(fd, 4096), + .relocation_count = 2, + .relocs_ptr = (uintptr_t)reloc, + }; + struct drm_i915_gem_execbuffer2 eb = { + .buffers_ptr = (uintptr_t)&obj, + .buffer_count = 1, + .rsvd1 = ctx + }; + const int gen = flags & HANG_GENMASK; + uint32_t *batch, *cs; + int err; + + err = __gem_set_caching(fd, obj.handle, 1); + if (err) + goto out_close; + + batch = __gem_mmap(fd, obj.handle, 0, 4096); + if (batch == MAP_FAILED) { + err = -errno; + goto out_close; + } + + reloc[0].target_handle = obj.handle; + reloc[0].delta = 4096 - sizeof(*cs); + cs = batch; + *cs++ = 0x20 << 23 | (gen < 6 ? 1 << 22 : 0) | 2; /* MI_STORE_DWORD_IMM */ + if (gen >= 8) { + reloc[0].offset = sizeof(*cs); + *cs++ = reloc[0].delta; + *cs++ = 0; + } else if (gen >= 4) { + reloc[0].offset = 2 * sizeof(*cs); + *cs++ = 0; + *cs++ = reloc[0].delta; + } else { + reloc[0].offset = sizeof(*cs); + cs[-1]--; + *cs++ = reloc[0].delta; + } + *cs++ = 1; + + if (flags & HANG_ALLOW_PREEMPTION) + batch[16] = 0x5 << 23; + + cs = &batch[1000]; + reloc[1].target_handle = obj.handle; + reloc[1].offset = 1001 * sizeof(*cs); + reloc[1].read_domains = I915_GEM_DOMAIN_COMMAND; + reloc[1].delta = 64; + if (gen >= 8) { + *cs++ = 0x31 << 23 | 1 << 8 | 1; /* MI_BATCH_BUFFER_START */ + *cs++ = reloc[1].delta; + *cs++ = 0; + } else if (gen >= 6) { + *cs++ = 0x31 << 23 | 1 << 8; /* MI_BATCH_BUFFER_START */ + *cs++ = reloc[1].delta; + } else { + *cs++ = 0x31 << 23 | 2 << 6; /* MI_BATCH_BUFFER_START */ + if (gen < 4) + reloc[1].delta |= 1; + *cs = reloc[1].delta; + cs++; + } + *cs++ = 0xa << 23; /* never reached! */ + + if (drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &eb)) { + err = -errno; + goto out; + } + + if (flags & HANG_IMMEDIATE) { + int fd; + + fd = open("/sys/kernel/debug/dri/0/i915_wedged", O_WRONLY); + if (fd < 0) { + err = -errno; + goto out; + } + + while (!*(volatile uint32_t *)&batch[1023]) + ; + + if (write(fd, "-1\n", 3) < 0) + err = -errno; + + close(fd); + } + +out: + munmap(batch, 4096); +out_close: + __gem_close(fd, obj.handle); + return err; +} diff --git a/src/intel/common/gen_hang.h b/src/intel/common/gen_hang.h new file mode 100644 index 00000000000..9efc6e1950d --- /dev/null +++ b/src/intel/common/gen_hang.h @@ -0,0 +1,51 @@ +/* + * Copyright © 2019 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef GEN_HANG_H +#define GEN_HANG_H + +#include <stdint.h> +#include "compiler/shader_enums.h" + +#ifdef __cplusplus +extern "C" { +#endif +/** + * \file gen_hang.h + * + * Wilful GPU hang injection. + */ + +#define HANG_GENMASK 0xff +#define HANG_ALLOW_PREEMPTION (1 << 8) +#define HANG_IMMEDIATE (1 << 9) /* requires debugfs access (root-only!) */ + +int gen_inject_hang(int fd, uint32_t ctx, unsigned int gen_flags); + +#ifdef __cplusplus +} +#endif + +#endif /* GEN_HANG_H */ + diff --git a/src/intel/common/meson.build b/src/intel/common/meson.build index 332e978b0ad..7fa1349e35f 100644 --- a/src/intel/common/meson.build +++ b/src/intel/common/meson.build @@ -30,6 +30,8 @@ files_libintel_common = files( 'gen_disasm.c', 'gen_disasm.h', 'gen_gem.h', + 'gen_hang.c', + 'gen_hang.h', 'gen_l3_config.c', 'gen_l3_config.h', 'gen_urb_config.c', diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index 8097392d22b..1cdf6fd65f5 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -33,6 +33,7 @@ #include "brw_state.h" #include "common/gen_decoder.h" #include "common/gen_gem.h" +#include "common/gen_hang.h" #include "util/hash_table.h" @@ -897,6 +898,19 @@ _intel_batchbuffer_flush_fence(struct brw_context *brw, brw_bo_wait_rendering(brw->batch.batch.bo); } + if (unlikely(INTEL_DEBUG & DEBUG_HANG)) { + static int delay = 100; + if (--delay < 0) { + struct intel_screen *screen = brw->screen; + const struct gen_device_info *devinfo = &screen->devinfo; + __DRIscreen *dri_screen = screen->driScrnPriv; + fprintf(stderr, "injecting GPU hang\n"); + gen_inject_hang(dri_screen->fd, brw->hw_ctx, + devinfo->gen | HANG_ALLOW_PREEMPTION); + delay = 100; + } + } + /* Start a new batch buffer. */ brw_new_batch(brw); -- 2.20.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev