Combining the multiple access checks into a few batches and a single serialising read can reduce detection times from around 100us to 70us on a fast Haswell system.
Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> Cc: Kenneth Graunke <kenn...@whitecape.org> --- src/mesa/drivers/dri/i965/intel_screen.c | 165 +++++++++++++++++++------------ 1 file changed, 101 insertions(+), 64 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 36c7bb2..0b60f13 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1185,6 +1185,13 @@ intel_detect_timestamp(struct intel_screen *screen) return 0; } +struct detect_pipelined_register { + uint32_t reg; + uint32_t expected_value; + unsigned result; + bool reset; +}; + /** * Test if we can use MI_LOAD_REGISTER_MEM from an untrusted batchbuffer. * @@ -1192,102 +1199,134 @@ intel_detect_timestamp(struct intel_screen *screen) * while others don't. Instead of trying to enumerate every case, just * try and write a register and see if works. */ -static bool -intel_detect_pipelined_register(struct intel_screen *screen, - int reg, uint32_t expected_value, bool reset) +static void +__intel_detect_pipelined_registers(struct intel_screen *screen, + struct detect_pipelined_register *r, + int count) { - drm_intel_bo *results, *bo; - uint32_t *batch; - uint32_t offset = 0; - bool success = false; + drm_intel_bo *results; + int i; + + if (count == 0) + return; /* Create a zero'ed temporary buffer for reading our results */ results = drm_intel_bo_alloc(screen->bufmgr, "registers", 4096, 0); if (results == NULL) - goto err; - - bo = drm_intel_bo_alloc(screen->bufmgr, "batchbuffer", 4096, 0); - if (bo == NULL) - goto err_results; + return; - if (drm_intel_bo_map(bo, 1)) - goto err_batch; + /* Emit each access in a separate batch buffer so that if the kernel + * rejects an individual access attempt, we don't incorrectly assume + * all the register accesses are invalid. + */ + for (i = 0; i < count; i++) { + drm_intel_bo *bo; + uint32_t *batch; - batch = bo->virtual; + bo = drm_intel_bo_alloc(screen->bufmgr, "batchbuffer", 4096, 0); + if (bo == NULL) + continue; - /* Write the register. */ - *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2); - *batch++ = reg; - *batch++ = expected_value; + if (drm_intel_bo_map(bo, 1)) + goto err_batch; - /* Save the register's value back to the buffer. */ - *batch++ = MI_STORE_REGISTER_MEM | (3 - 2); - *batch++ = reg; - drm_intel_bo_emit_reloc(bo, (char *)batch -(char *)bo->virtual, - results, offset*sizeof(uint32_t), - I915_GEM_DOMAIN_INSTRUCTION, - I915_GEM_DOMAIN_INSTRUCTION); - *batch++ = results->offset + offset*sizeof(uint32_t); + batch = bo->virtual; - /* And afterwards clear the register */ - if (reset) { + /* Write the register. */ *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2); - *batch++ = reg; - *batch++ = 0; - } + *batch++ = r[i].reg; + *batch++ = r[i].expected_value; + + /* Save the register's value back to the buffer. */ + *batch++ = MI_STORE_REGISTER_MEM | (3 - 2); + *batch++ = r[i].reg; + drm_intel_bo_emit_reloc(bo, (char *)batch -(char *)bo->virtual, + results, i*sizeof(uint32_t), + I915_GEM_DOMAIN_INSTRUCTION, + I915_GEM_DOMAIN_INSTRUCTION); + *batch++ = results->offset + i*sizeof(uint32_t); + + /* And afterwards clear the register */ + if (r[i].reset) { + *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2); + *batch++ = r[i].reg; + *batch++ = 0; + } - *batch++ = MI_BATCH_BUFFER_END; + *batch++ = MI_BATCH_BUFFER_END; - drm_intel_bo_mrb_exec(bo, ALIGN((char *)batch - (char *)bo->virtual, 8), - NULL, 0, 0, - I915_EXEC_RENDER); + drm_intel_bo_mrb_exec(bo, ALIGN((char *)batch - (char *)bo->virtual, 8), + NULL, 0, 0, + I915_EXEC_RENDER); - /* Check whether the value got written. */ +err_batch: + drm_intel_bo_unreference(bo); + } + + /* Check whether the values got written. */ if (drm_intel_bo_map(results, false) == 0) { - success = *((uint32_t *)results->virtual + offset) == expected_value; + uint32_t *data = results->virtual; + for (i = 0; i < count; i++) + if (data[i] == r[i].expected_value) + screen->hw_has_pipelined_register |= r[i].result; drm_intel_bo_unmap(results); } -err_batch: - drm_intel_bo_unreference(bo); -err_results: drm_intel_bo_unreference(results); -err: - return success; } static bool -intel_detect_pipelined_so(struct intel_screen *screen) +intel_detect_pipelined_so(struct intel_screen *screen, + struct detect_pipelined_register *detect) { - /* Supposedly, Broadwell just works. */ - if (screen->devinfo->gen >= 8) - return true; - if (screen->devinfo->gen <= 6) - return false; + return 0; + + /* Supposedly, Broadwell just works. */ + if (screen->devinfo->gen >= 8) { + screen->hw_has_pipelined_register |= HW_HAS_PIPELINED_SOL_OFFSET; + return 0; + } /* We use SO_WRITE_OFFSET0 since you're supposed to write it (unlike the * statistics registers), and we already reset it to zero before using it. */ - return intel_detect_pipelined_register(screen, - GEN7_SO_WRITE_OFFSET(0), - 0x1337d0d0, - false); + detect->reg = GEN7_SO_WRITE_OFFSET(0); + detect->expected_value = 0x1337d0d0; + detect->result = HW_HAS_PIPELINED_SOL_OFFSET; + detect->reset = false; + return 1; } -static bool -intel_detect_pipelined_oacontrol(struct intel_screen *screen) +static int +intel_detect_pipelined_oacontrol(struct intel_screen *screen, + struct detect_pipelined_register *detect) { if (screen->devinfo->gen < 6 || screen->devinfo->gen >= 8) - return false; + return 0; /* Set "Select Context ID" to a particular address (which is likely not a * context), but leave all counting disabled. This should be harmless. */ - return intel_detect_pipelined_register(screen, - OACONTROL, - 0x31337000, - true); + detect->reg = OACONTROL; + detect->expected_value = 0x31337000; + detect->result = HW_HAS_PIPELINED_OACONTROL; + detect->reset = true; + return 1; +} + +static void +intel_detect_pipelined_register_access(struct intel_screen *screen) +{ + struct detect_pipelined_register regs[2], *r =regs; + + /* Combine the multiple register access validation into a single + * round trip through the kernel + GPU. + */ + r += intel_detect_pipelined_so(screen, r); + r += intel_detect_pipelined_oacontrol(screen, r); + + __intel_detect_pipelined_registers(screen, regs, r-regs); } /** @@ -1549,10 +1588,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen); intelScreen->hw_has_timestamp = intel_detect_timestamp(intelScreen); - if (intel_detect_pipelined_so(intelScreen)) - intelScreen->hw_has_pipelined_register |= HW_HAS_PIPELINED_SOL_OFFSET; - if (intel_detect_pipelined_oacontrol(intelScreen)) - intelScreen->hw_has_pipelined_register |= HW_HAS_PIPELINED_OACONTROL; + + intel_detect_pipelined_register_access(intelScreen); const char *force_msaa = getenv("INTEL_FORCE_MSAA"); if (force_msaa) { -- 2.5.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev