configure.ac | 17 include/drm/drm_fourcc.h | 6 intel/Makefile.am | 2 intel/intel_aub.h | 123 +++ intel/intel_bufmgr.h | 19 intel/intel_bufmgr_gem.c | 514 ++++++++++++++- intel/intel_decode.c | 253 +++++-- intel/tests/gen7-3d.batch |binary intel/tests/gen7-3d.batch-ref.txt | 1290 ++++---------------------------------- radeon/radeon_cs_space.c | 15 radeon/radeon_surface.c | 14 xf86drmMode.h | 5 12 files changed, 1025 insertions(+), 1233 deletions(-)
New commits: commit 51c3e7d7335ecdf572968db7d3eed661e8a61810 Author: Eric Anholt <e...@anholt.net> Date: Fri Mar 16 16:11:10 2012 -0700 configure: Bump version for 2.4.32. diff --git a/configure.ac b/configure.ac index 71a596c..ff2c840 100644 --- a/configure.ac +++ b/configure.ac @@ -20,7 +20,7 @@ AC_PREREQ([2.63]) AC_INIT([libdrm], - [2.4.31], + [2.4.32], [https://bugs.freedesktop.org/enter_bug.cgi?product=DRI], [libdrm]) commit 5de5b7484a3a41554e16c02a544a45db5516b031 Author: Eric Anholt <e...@anholt.net> Date: Tue Mar 13 16:49:53 2012 -0700 intel: Quiet two more valgrind complaints with recent changes. These are more cases where valgrind doesn't understand what gets read or written by our ioctls. diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index 51b963f..3c91090 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -2647,6 +2647,7 @@ get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) } } + VG_CLEAR(devid); VG_CLEAR(gp); gp.param = I915_PARAM_CHIPSET_ID; gp.value = &devid; @@ -2790,6 +2791,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) bufmgr_gem->gtt_size -= 256*1024*1024; } + VG_CLEAR(gp); gp.value = &tmp; gp.param = I915_PARAM_HAS_EXECBUF2; commit 9d18ad254afc2afc41a919b86cd51ea40cfd8f0b Author: Eric Anholt <e...@anholt.net> Date: Fri Mar 2 10:27:55 2012 -0800 intel: Add per-dword decode of gen7 3DPRIMITIVE. diff --git a/intel/intel_decode.c b/intel/intel_decode.c index af621d4..df9b704 100644 --- a/intel/intel_decode.c +++ b/intel/intel_decode.c @@ -2577,10 +2577,8 @@ static const char *get_965_element_component(uint32_t data, int component) } } -static const char *get_965_prim_type(uint32_t data) +static const char *get_965_prim_type(uint32_t primtype) { - uint32_t primtype = (data >> 10) & 0x1f; - switch (primtype) { case 0x01: return "point list"; @@ -3009,7 +3007,7 @@ gen4_3DPRIMITIVE(struct drm_intel_decode *ctx) { instr_out(ctx, 0, "3DPRIMITIVE: %s %s\n", - get_965_prim_type(ctx->data[0]), + get_965_prim_type((ctx->data[0] >> 10) & 0x1f), (ctx->data[0] & (1 << 15)) ? "random" : "sequential"); instr_out(ctx, 1, "vertex count\n"); instr_out(ctx, 2, "start vertex\n"); @@ -3021,6 +3019,27 @@ gen4_3DPRIMITIVE(struct drm_intel_decode *ctx) } static int +gen7_3DPRIMITIVE(struct drm_intel_decode *ctx) +{ + bool indirect = !!(ctx->data[0] & (1 << 10)); + + instr_out(ctx, 0, + "3DPRIMITIVE: %s%s\n", + indirect ? " indirect" : "", + (ctx->data[0] & (1 << 8)) ? " predicated" : ""); + instr_out(ctx, 1, "%s %s\n", + get_965_prim_type(ctx->data[1] & 0x3f), + (ctx->data[1] & (1 << 8)) ? "random" : "sequential"); + instr_out(ctx, 2, indirect ? "ignored" : "vertex count\n"); + instr_out(ctx, 3, indirect ? "ignored" : "start vertex\n"); + instr_out(ctx, 4, indirect ? "ignored" : "instance count\n"); + instr_out(ctx, 5, indirect ? "ignored" : "start instance\n"); + instr_out(ctx, 6, indirect ? "ignored" : "index bias\n"); + + return 7; +} + +static int decode_3d_965(struct drm_intel_decode *ctx) { uint32_t opcode; @@ -3120,7 +3139,7 @@ decode_3d_965(struct drm_intel_decode *ctx) { 0x7917, 0x00ff, 2, 2+128*2, "3DSTATE_SO_DECL_LIST" }, { 0x7918, 0x00ff, 4, 4, "3DSTATE_SO_BUFFER" }, { 0x7a00, 0x00ff, 4, 6, "PIPE_CONTROL" }, - { 0x7b00, 0x00ff, 7, 7, "3DPRIMITIVE", 7 }, + { 0x7b00, 0x00ff, 7, 7, NULL, 7, gen7_3DPRIMITIVE }, { 0x7b00, 0x00ff, 6, 6, NULL, 0, gen4_3DPRIMITIVE }, }, *opcode_3d = NULL; diff --git a/intel/tests/gen7-3d.batch-ref.txt b/intel/tests/gen7-3d.batch-ref.txt index 1488ca5..be3c85e 100644 --- a/intel/tests/gen7-3d.batch-ref.txt +++ b/intel/tests/gen7-3d.batch-ref.txt @@ -202,11 +202,11 @@ 0x12300324: 0x11230000: (X, Y, 0.0, 1.0), dst offset 0x00 bytes 0x12300328: 0x02400008: buffer 0: invalid, type 0x0040, src offset 0x0008 bytes 0x1230032c: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes -0x12300330: 0x7b000005: 3DPRIMITIVE -0x12300334: 0x00000007: dword 1 -0x12300338: 0x00000004: dword 2 -0x1230033c: 0x00000000: dword 3 -0x12300340: 0x00000001: dword 4 -0x12300344: 0x00000000: dword 5 -0x12300348: 0x00000000: dword 6 +0x12300330: 0x7b000005: 3DPRIMITIVE: +0x12300334: 0x00000007: quad list sequential +0x12300338: 0x00000004: vertex count +0x1230033c: 0x00000000: start vertex +0x12300340: 0x00000001: instance count +0x12300344: 0x00000000: start instance +0x12300348: 0x00000000: index bias 0x1230034c: 0x05000000: MI_BATCH_BUFFER_END commit 9b87fd9a3df8c59461bc90b4620526d10f9b5771 Author: Eric Anholt <e...@anholt.net> Date: Fri Mar 2 10:18:51 2012 -0800 intel: Move the gen4-6 3DPRIMITIVE handling out of the switch statement. diff --git a/intel/intel_decode.c b/intel/intel_decode.c index 2ea8f67..af621d4 100644 --- a/intel/intel_decode.c +++ b/intel/intel_decode.c @@ -3005,6 +3005,22 @@ gen7_3DSTATE_WM(struct drm_intel_decode *ctx) } static int +gen4_3DPRIMITIVE(struct drm_intel_decode *ctx) +{ + instr_out(ctx, 0, + "3DPRIMITIVE: %s %s\n", + get_965_prim_type(ctx->data[0]), + (ctx->data[0] & (1 << 15)) ? "random" : "sequential"); + instr_out(ctx, 1, "vertex count\n"); + instr_out(ctx, 2, "start vertex\n"); + instr_out(ctx, 3, "instance count\n"); + instr_out(ctx, 4, "start instance\n"); + instr_out(ctx, 5, "index bias\n"); + + return 6; +} + +static int decode_3d_965(struct drm_intel_decode *ctx) { uint32_t opcode; @@ -3105,7 +3121,7 @@ decode_3d_965(struct drm_intel_decode *ctx) { 0x7918, 0x00ff, 4, 4, "3DSTATE_SO_BUFFER" }, { 0x7a00, 0x00ff, 4, 6, "PIPE_CONTROL" }, { 0x7b00, 0x00ff, 7, 7, "3DPRIMITIVE", 7 }, - { 0x7b00, 0x00ff, 6, 6, "3DPRIMITIVE" }, + { 0x7b00, 0x00ff, 6, 6, NULL, 0, gen4_3DPRIMITIVE }, }, *opcode_3d = NULL; opcode = (data[0] & 0xffff0000) >> 16; @@ -3593,20 +3609,6 @@ decode_3d_965(struct drm_intel_decode *ctx) instr_out(ctx, 3, "immediate dword high\n"); return len; } - case 0x7b00: - if (ctx->gen == 7) - break; - - instr_out(ctx, 0, - "3DPRIMITIVE: %s %s\n", - get_965_prim_type(data[0]), - (data[0] & (1 << 15)) ? "random" : "sequential"); - instr_out(ctx, 1, "vertex count\n"); - instr_out(ctx, 2, "start vertex\n"); - instr_out(ctx, 3, "instance count\n"); - instr_out(ctx, 4, "start instance\n"); - instr_out(ctx, 5, "index bias\n"); - return len; } if (opcode_3d) { commit 99c73378a1b440bcf594742445dfe14ab1e89128 Author: Eric Anholt <e...@anholt.net> Date: Fri Feb 10 04:12:15 2012 -0800 intel: Add support for (possibly) unsynchronized maps. This improves the performance of Mesa's GL_MAP_UNSYNCHRONIZED_BIT path in GL_ARB_map_buffer_range. Improves Unigine Tropics performance at 1024x768 by 2.30482% +/- 0.0492146% (n=61) v2: Fix comment grammar. Reviewed-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h index a8062c5..45389e1 100644 --- a/intel/intel_bufmgr.h +++ b/intel/intel_bufmgr.h @@ -156,8 +156,10 @@ void drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr); void drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr); void drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit); +int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo); int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo); int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo); + int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo); void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start); void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable); diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index 0eb57c4..51b963f 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -1189,15 +1189,13 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) return 0; } -int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) +static int +map_gtt(drm_intel_bo *bo) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; - struct drm_i915_gem_set_domain set_domain; int ret; - pthread_mutex_lock(&bufmgr_gem->lock); - if (bo_gem->map_count++ == 0) drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); @@ -1223,7 +1221,6 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) strerror(errno)); if (--bo_gem->map_count == 0) drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); - pthread_mutex_unlock(&bufmgr_gem->lock); return ret; } @@ -1240,7 +1237,6 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) strerror(errno)); if (--bo_gem->map_count == 0) drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); - pthread_mutex_unlock(&bufmgr_gem->lock); return ret; } } @@ -1250,7 +1246,33 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, bo_gem->gtt_virtual); - /* Now move it to the GTT domain so that the CPU caches are flushed */ + return 0; +} + +int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_set_domain set_domain; + int ret; + + pthread_mutex_lock(&bufmgr_gem->lock); + + ret = map_gtt(bo); + if (ret) { + pthread_mutex_unlock(&bufmgr_gem->lock); + return ret; + } + + /* Now move it to the GTT domain so that the GPU and CPU + * caches are flushed and the GPU isn't actively using the + * buffer. + * + * The pagefault handler does this domain change for us when + * it has unbound the BO from the GTT, but it's up to us to + * tell it when we're about to use things if we had done + * rendering and it still happens to be bound to the GTT. + */ VG_CLEAR(set_domain); set_domain.handle = bo_gem->gem_handle; set_domain.read_domains = I915_GEM_DOMAIN_GTT; @@ -1271,6 +1293,42 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) return 0; } +/** + * Performs a mapping of the buffer object like the normal GTT + * mapping, but avoids waiting for the GPU to be done reading from or + * rendering to the buffer. + * + * This is used in the implementation of GL_ARB_map_buffer_range: The + * user asks to create a buffer, then does a mapping, fills some + * space, runs a drawing command, then asks to map it again without + * synchronizing because it guarantees that it won't write over the + * data that the GPU is busy using (or, more specifically, that if it + * does write over the data, it acknowledges that rendering is + * undefined). + */ + +int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + int ret; + + /* If the CPU cache isn't coherent with the GTT, then use a + * regular synchronized mapping. The problem is that we don't + * track where the buffer was last used on the CPU side in + * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so + * we would potentially corrupt the buffer even when the user + * does reasonable things. + */ + if (!bufmgr_gem->has_llc) + return drm_intel_gem_bo_map_gtt(bo); + + pthread_mutex_lock(&bufmgr_gem->lock); + ret = map_gtt(bo); + pthread_mutex_unlock(&bufmgr_gem->lock); + + return ret; +} + static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) { drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; commit 3a8884851b72af012a8cb2beea320f094a58e5eb Author: Eric Anholt <e...@anholt.net> Date: Mon Feb 27 17:26:05 2012 -0800 intel: Fix error check for I915_PARAM_HAS_LLC. drmIoctl returns -1 on error with errno set to the error value. Other users of it in this file just check for != 0, and only use errno when they need to send an error value on to the caller of the API. diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index e87690d..0eb57c4 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -2753,7 +2753,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) gp.param = I915_PARAM_HAS_LLC; ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); - if (ret == -EINVAL) { + if (ret != 0) { /* Kernel does not supports HAS_LLC query, fallback to GPU * generation detection and assume that we have LLC on GEN6/7 */ commit c9ce2edfc8d33e760667529250e86e93ff656c3d Author: Eric Anholt <e...@anholt.net> Date: Fri Mar 9 16:08:23 2012 -0800 intel: Bump the copyright dates on the bufmgr files. We've been hacking these constantly. diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h index fa6f2b8..a8062c5 100644 --- a/intel/intel_bufmgr.h +++ b/intel/intel_bufmgr.h @@ -1,5 +1,5 @@ /* - * Copyright © 2008 Intel Corporation + * Copyright © 2008-2012 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index d56593a..e87690d 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -1,7 +1,7 @@ /************************************************************************** * * Copyright � 2007 Red Hat Inc. - * Copyright � 2007 Intel Corporation + * Copyright � 2007-2012 Intel Corporation * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA * All Rights Reserved. * commit 4db16a9480af2c4f36eb8023193cd54545efbe54 Author: Eric Anholt <e...@anholt.net> Date: Tue Oct 11 15:59:03 2011 -0700 intel: Add .aub file output support. This will allow the driver to capture all of its execution state to a file for later debugging. intel_gpu_dump is limited in that it only captures batchbuffers, and Mesa's captures, while more complete, still capture only a portion of the state involved in execution. This is a squash commit of a long series of hacking as we tried to get the resulting traces to work in the internal simulator. It contains contributions by Yuanhan Liu and Kenneth Graunke. v2: Drop the MI_FLUSH_ENABLE setup. Reviewed-by: Kenneth Graunke <kenn...@whitecape.org> Signed-off-by: Eric Anholt <e...@anholt.net> Signed-off-by: Yuanhan Liu <yuanhan....@linux.intel.com> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> diff --git a/intel/Makefile.am b/intel/Makefile.am index 06362b6..dc01a96 100644 --- a/intel/Makefile.am +++ b/intel/Makefile.am @@ -53,6 +53,7 @@ intel_bufmgr_gem_o_CFLAGS = $(AM_CFLAGS) -c99 libdrm_intelincludedir = ${includedir}/libdrm libdrm_intelinclude_HEADERS = intel_bufmgr.h \ + intel_aub.h \ intel_debug.h # This may be interesting even outside of "make check", due to the -dump option. diff --git a/intel/intel_aub.h b/intel/intel_aub.h new file mode 100644 index 0000000..a36fd53 --- /dev/null +++ b/intel/intel_aub.h @@ -0,0 +1,123 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <e...@anholt.net> + * + */ + +/** @file intel_aub.h + * + * The AUB file is a file format used by Intel's internal simulation + * and other validation tools. It can be used at various levels by a + * driver to input state to the simulated hardware or a replaying + * debugger. + * + * We choose to dump AUB files using the trace block format for ease + * of implementation -- dump out the blocks of memory as plain blobs + * and insert ring commands to execute the batchbuffer blob. + */ + +#ifndef _INTEL_AUB_H +#define _INTEL_AUB_H + +#define AUB_MI_NOOP (0) +#define AUB_MI_BATCH_BUFFER_START (0x31 << 23) +#define AUB_PIPE_CONTROL (0x7a000002) + +/* DW0: instruction type. */ + +#define CMD_AUB (7 << 29) + +#define CMD_AUB_HEADER (CMD_AUB | (1 << 23) | (0x05 << 16)) +/* DW1 */ +# define AUB_HEADER_MAJOR_SHIFT 24 +# define AUB_HEADER_MINOR_SHIFT 16 + +#define CMD_AUB_TRACE_HEADER_BLOCK (CMD_AUB | (1 << 23) | (0x41 << 16)) +#define CMD_AUB_DUMP_BMP (CMD_AUB | (1 << 23) | (0x9e << 16)) + +/* DW1 */ +#define AUB_TRACE_OPERATION_MASK 0x000000ff +#define AUB_TRACE_OP_COMMENT 0x00000000 +#define AUB_TRACE_OP_DATA_WRITE 0x00000001 +#define AUB_TRACE_OP_COMMAND_WRITE 0x00000002 +#define AUB_TRACE_OP_MMIO_WRITE 0x00000003 +// operation = TRACE_DATA_WRITE, Type +#define AUB_TRACE_TYPE_MASK 0x0000ff00 +#define AUB_TRACE_TYPE_NOTYPE (0 << 8) +#define AUB_TRACE_TYPE_BATCH (1 << 8) +#define AUB_TRACE_TYPE_VERTEX_BUFFER (5 << 8) +#define AUB_TRACE_TYPE_2D_MAP (6 << 8) +#define AUB_TRACE_TYPE_CUBE_MAP (7 << 8) +#define AUB_TRACE_TYPE_VOLUME_MAP (9 << 8) +#define AUB_TRACE_TYPE_1D_MAP (10 << 8) +#define AUB_TRACE_TYPE_CONSTANT_BUFFER (11 << 8) +#define AUB_TRACE_TYPE_CONSTANT_URB (12 << 8) +#define AUB_TRACE_TYPE_INDEX_BUFFER (13 << 8) +#define AUB_TRACE_TYPE_GENERAL (14 << 8) +#define AUB_TRACE_TYPE_SURFACE (15 << 8) + + +// operation = TRACE_COMMAND_WRITE, Type = +#define AUB_TRACE_TYPE_RING_HWB (1 << 8) +#define AUB_TRACE_TYPE_RING_PRB0 (2 << 8) +#define AUB_TRACE_TYPE_RING_PRB1 (3 << 8) +#define AUB_TRACE_TYPE_RING_PRB2 (4 << 8) + +// Address space +#define AUB_TRACE_ADDRESS_SPACE_MASK 0x00ff0000 +#define AUB_TRACE_MEMTYPE_GTT (0 << 16) +#define AUB_TRACE_MEMTYPE_LOCAL (1 << 16) +#define AUB_TRACE_MEMTYPE_NONLOCAL (2 << 16) +#define AUB_TRACE_MEMTYPE_PCI (3 << 16) +#define AUB_TRACE_MEMTYPE_GTT_ENTRY (4 << 16) + +/* DW2 */ +// operation = TRACE_DATA_WRITE, Type = TRACE_DATA_WRITE_GENERAL_STATE +#define AUB_TRACE_GENERAL_STATE_MASK 0x000000ff + +#define AUB_TRACE_VS_STATE 0x00000001 +#define AUB_TRACE_GS_STATE 0x00000002 +#define AUB_TRACE_CL_STATE 0x00000003 +#define AUB_TRACE_SF_STATE 0x00000004 +#define AUB_TRACE_WM_STATE 0x00000005 +#define AUB_TRACE_CC_STATE 0x00000006 +#define AUB_TRACE_CL_VP 0x00000007 +#define AUB_TRACE_SF_VP 0x00000008 +#define AUB_TRACE_CC_VP 0x00000009 +#define AUB_TRACE_SAMPLER_STATE 0x0000000a +#define AUB_TRACE_KERNEL 0x0000000b +#define AUB_TRACE_SCRATCH 0x0000000c +#define AUB_TRACE_SDC 0x0000000d +#define AUB_TRACE_BLEND_STATE 0x00000016 +#define AUB_TRACE_DEPTH_STENCIL_STATE 0x00000017 + +// operation = TRACE_DATA_WRITE, Type = TRACE_DATA_WRITE_SURFACE_STATE +#define AUB_TRACE_SURFACE_STATE_MASK 0x00000ff00 +#define AUB_TRACE_BINDING_TABLE 0x000000100 +#define AUB_TRACE_SURFACE_STATE 0x000000200 + +/* DW3: address */ +/* DW4: len */ + +#endif /* _INTEL_AUB_H */ diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h index 8036031..fa6f2b8 100644 --- a/intel/intel_bufmgr.h +++ b/intel/intel_bufmgr.h @@ -36,6 +36,7 @@ #include <stdio.h> #include <stdint.h> +#include <stdio.h> struct drm_clip_rect; @@ -84,6 +85,13 @@ struct _drm_intel_bo { int handle; }; +enum aub_dump_bmp_format { + AUB_DUMP_BMP_FORMAT_8BIT = 1, + AUB_DUMP_BMP_FORMAT_ARGB_4444 = 4, + AUB_DUMP_BMP_FORMAT_ARGB_0888 = 6, + AUB_DUMP_BMP_FORMAT_ARGB_8888 = 7, +}; + #define BO_ALLOC_FOR_RENDER (1<<0) drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name, @@ -154,6 +162,12 @@ int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo); void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start); void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable); +void drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable); +void drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, + int x1, int y1, int width, int height, + enum aub_dump_bmp_format format, + int pitch, int offset); + int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id); int drm_intel_get_aperture_sizes(int fd, size_t *mappable, size_t *total); diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index ba38e50..d56593a 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -58,6 +58,7 @@ #include "intel_bufmgr.h" #include "intel_bufmgr_priv.h" #include "intel_chipset.h" +#include "intel_aub.h" #include "string.h" #include "i915_drm.h" @@ -121,6 +122,9 @@ typedef struct _drm_intel_bufmgr_gem { unsigned int bo_reuse : 1; unsigned int no_exec : 1; bool fenced_relocs; + + FILE *aub_file; + uint32_t aub_offset; } drm_intel_bufmgr_gem; #define DRM_INTEL_RELOC_FENCE (1<<0) @@ -215,6 +219,8 @@ struct _drm_intel_bo_gem { /** Flags that we may need to do the SW_FINSIH ioctl on unmap. */ bool mapped_cpu_write; + + uint32_t aub_offset; }; static unsigned int @@ -1715,6 +1721,247 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) } } +static void +aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data) +{ + fwrite(&data, 1, 4, bufmgr_gem->aub_file); +} + +static void +aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size) +{ + fwrite(data, 1, size, bufmgr_gem->aub_file); +} + +static void +aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + uint32_t *data; + unsigned int i; + + data = malloc(bo->size); + drm_intel_bo_get_subdata(bo, offset, size, data); + + /* Easy mode: write out bo with no relocations */ + if (!bo_gem->reloc_count) { + aub_out_data(bufmgr_gem, data, size); + free(data); + return; + } + + /* Otherwise, handle the relocations while writing. */ + for (i = 0; i < size / 4; i++) { + int r; + for (r = 0; r < bo_gem->reloc_count; r++) { + struct drm_i915_gem_relocation_entry *reloc; + drm_intel_reloc_target *info; + + reloc = &bo_gem->relocs[r]; + info = &bo_gem->reloc_target_info[r]; + + if (reloc->offset == offset + i * 4) { + drm_intel_bo_gem *target_gem; + uint32_t val; + + target_gem = (drm_intel_bo_gem *)info->bo; + + val = reloc->delta; + val += target_gem->aub_offset; + + aub_out(bufmgr_gem, val); + data[i] = val; + break; + } + } + if (r == bo_gem->reloc_count) { + /* no relocation, just the data */ + aub_out(bufmgr_gem, data[i]); + } + } + + free(data); +} + +static void +aub_bo_get_address(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + /* Give the object a graphics address in the AUB file. We + * don't just use the GEM object address because we do AUB + * dumping before execution -- we want to successfully log + * when the hardware might hang, and we might even want to aub + * capture for a driver trying to execute on a different + * generation of hardware by disabling the actual kernel exec + * call. + */ + bo_gem->aub_offset = bufmgr_gem->aub_offset; + bufmgr_gem->aub_offset += bo->size; + /* XXX: Handle aperture overflow. */ + assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024); +} + +static void +aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, + uint32_t offset, uint32_t size) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + aub_out(bufmgr_gem, + CMD_AUB_TRACE_HEADER_BLOCK | + (5 - 2)); + aub_out(bufmgr_gem, + AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE); + aub_out(bufmgr_gem, subtype); + aub_out(bufmgr_gem, bo_gem->aub_offset + offset); + aub_out(bufmgr_gem, size); + aub_write_bo_data(bo, offset, size); +} + +static void +aub_write_bo(drm_intel_bo *bo) +{ + uint32_t block_size; + uint32_t offset; + + aub_bo_get_address(bo); + + /* Break up large objects into multiple writes. Otherwise a + * 128kb VBO would overflow the 16 bits of size field in the + * packet header and everything goes badly after that. + */ + for (offset = 0; offset < bo->size; offset += block_size) { + block_size = bo->size - offset; + + if (block_size > 8 * 4096) + block_size = 8 * 4096; + + aub_write_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0, + offset, block_size); + } +} + +/* + * Make a ringbuffer on fly and dump it + */ +static void +aub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem, + uint32_t batch_buffer, int ring_flag) +{ + uint32_t ringbuffer[4096]; + int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ + int ring_count = 0; + + if (ring_flag == I915_EXEC_BSD) + ring = AUB_TRACE_TYPE_RING_PRB1; + + /* Make a ring buffer to execute our batchbuffer. */ + memset(ringbuffer, 0, sizeof(ringbuffer)); + ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START; + ringbuffer[ring_count++] = batch_buffer; + + /* Write out the ring. This appears to trigger execution of + * the ring in the simulator. + */ + aub_out(bufmgr_gem, + CMD_AUB_TRACE_HEADER_BLOCK | + (5 - 2)); + aub_out(bufmgr_gem, + AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); + aub_out(bufmgr_gem, 0); /* general/surface subtype */ + aub_out(bufmgr_gem, bufmgr_gem->aub_offset); + aub_out(bufmgr_gem, ring_count * 4); + + /* FIXME: Need some flush operations here? */ + aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4); + + /* Update offset pointer */ + bufmgr_gem->aub_offset += 4096; +} + +void +drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, + int x1, int y1, int width, int height, + enum aub_dump_bmp_format format, + int pitch, int offset) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; + uint32_t cpp; + + switch (format) { + case AUB_DUMP_BMP_FORMAT_8BIT: + cpp = 1; + break; + case AUB_DUMP_BMP_FORMAT_ARGB_4444: + cpp = 2; + break; + case AUB_DUMP_BMP_FORMAT_ARGB_0888: + case AUB_DUMP_BMP_FORMAT_ARGB_8888: + cpp = 4; + break; + default: + printf("Unknown AUB dump format %d\n", format); + return; + } + + if (!bufmgr_gem->aub_file) + return; + + aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4); + aub_out(bufmgr_gem, (y1 << 16) | x1); + aub_out(bufmgr_gem, + (format << 24) | + (cpp << 19) | + pitch / 4); + aub_out(bufmgr_gem, (height << 16) | width); + aub_out(bufmgr_gem, bo_gem->aub_offset + offset); + aub_out(bufmgr_gem, + ((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) | + ((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0)); +} + +static void +aub_exec(drm_intel_bo *bo, int ring_flag, int used) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int i; + + if (!bufmgr_gem->aub_file) + return; + + /* Write out all but the batchbuffer to AUB memory */ + for (i = 0; i < bufmgr_gem->exec_count - 1; i++) { + if (bufmgr_gem->exec_bos[i] != bo) + aub_write_bo(bufmgr_gem->exec_bos[i]); + } + + aub_bo_get_address(bo); + + /* Dump the batchbuffer. */ + aub_write_trace_block(bo, AUB_TRACE_TYPE_BATCH, 0, + 0, used); + aub_write_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0, + used, bo->size - used); + + /* Dump ring buffer */ + aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag); + + fflush(bufmgr_gem->aub_file); + + /* + * One frame has been dumped. So reset the aub_offset for the next frame. + * + * FIXME: Can we do this? + */ + bufmgr_gem->aub_offset = 0x10000; +} + static int drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, drm_clip_rect_t * cliprects, int num_cliprects, int DR4) @@ -1830,6 +2077,8 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, execbuf.rsvd1 = 0; execbuf.rsvd2 = 0; + aub_exec(bo, flags, used); + if (bufmgr_gem->no_exec) goto skip_execution; @@ -2360,6 +2609,62 @@ drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr) } /** + * Sets up AUB dumping. + * + * This is a trace file format that can be used with the simulator. + * Packets are emitted in a format somewhat like GPU command packets. + * You can set up a GTT and upload your objects into the referenced + * space, then send off batchbuffers and get BMPs out the other end. + */ +void +drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + int entry = 0x200003; + int i; + int gtt_size = 0x10000; + + if (!enable) { + if (bufmgr_gem->aub_file) { + fclose(bufmgr_gem->aub_file); + bufmgr_gem->aub_file = NULL; + } + } + + if (geteuid() != getuid()) + return; + + bufmgr_gem->aub_file = fopen("intel.aub", "w+"); + if (!bufmgr_gem->aub_file) + return; + + /* Start allocating objects from just after the GTT. */ + bufmgr_gem->aub_offset = gtt_size; + + /* Start with a (required) version packet. */ + aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2)); + aub_out(bufmgr_gem, + (4 << AUB_HEADER_MAJOR_SHIFT) | + (0 << AUB_HEADER_MINOR_SHIFT)); + for (i = 0; i < 8; i++) { + aub_out(bufmgr_gem, 0); /* app name */ + } + aub_out(bufmgr_gem, 0); /* timestamp */ + aub_out(bufmgr_gem, 0); /* timestamp */ + aub_out(bufmgr_gem, 0); /* comment len */ + + /* Set up the GTT. The max we can handle is 256M */ + aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | (5 - 2)); + aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | AUB_TRACE_OP_DATA_WRITE); + aub_out(bufmgr_gem, 0); /* subtype */ + aub_out(bufmgr_gem, 0); /* offset */ + aub_out(bufmgr_gem, gtt_size); /* size */ + for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) { + aub_out(bufmgr_gem, entry); + } +} + +/** * Initializes the GEM buffer manager, which uses the kernel to allocate, map, * and manage map buffer objections. * commit 6e642db7f4a5628ed63ca3c479f06bd6f2ca3893 Author: Kenneth Graunke <kenn...@whitecape.org> Date: Tue Oct 11 14:38:34 2011 -0700 intel: Add support for overriding the PCI ID via an environment variable For example: export INTEL_DEVID_OVERRIDE=0x162 If this variable is set, don't actually submit the batchbuffer to the GPU, it probably contains commands for the wrong generation of hardware. v2: Introduce a getter for the overridden devid, and avoid getenv per exec. Reviewed-by: Yuanhan Liu <yuanhan....@linux.intel.com> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> Signed-off-by: Eric Anholt <e...@anholt.net> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h index 85da8b9..8036031 100644 --- a/intel/intel_bufmgr.h -- To UNSUBSCRIBE, email to debian-x-requ...@lists.debian.org with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org Archive: http://lists.debian.org/e1s9jyz-0002z1...@vasks.debian.org