On Wed, Mar 07, 2012 at 11:21:07AM -0800, Eric Anholt wrote:
> From: Kenneth Graunke <kenn...@whitecape.org>
> 
> This will allow the driver to capture all of its execution state to a
> file for later debugging.  intel_gpu_dump is limited in that it only
> captures batchbuffers, and Mesa's captures, while more complete, still
> capture only a portion of the state involved in execution.
> 
> It also enables us to load traces in our internal simulator.
> 
> Signed-off-by: Eric Anholt <e...@anholt.net>
> Signed-off-by: Yuanhan Liu <yuanhan....@linux.intel.com>
> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org>
> ---
>  intel/Makefile.am        |    1 +
>  intel/intel_aub.h        |  123 ++++++++++++++++++
>  intel/intel_bufmgr.h     |   14 ++
>  intel/intel_bufmgr_gem.c |  315 
> ++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 453 insertions(+), 0 deletions(-)
>  create mode 100644 intel/intel_aub.h
> 
> diff --git a/intel/Makefile.am b/intel/Makefile.am
> index 06362b6..dc01a96 100644
> --- a/intel/Makefile.am
> +++ b/intel/Makefile.am
> @@ -53,6 +53,7 @@ intel_bufmgr_gem_o_CFLAGS = $(AM_CFLAGS) -c99
>  
>  libdrm_intelincludedir = ${includedir}/libdrm
>  libdrm_intelinclude_HEADERS = intel_bufmgr.h \
> +                           intel_aub.h \
>                             intel_debug.h
>  
>  # This may be interesting even outside of "make check", due to the -dump 
> option.
> diff --git a/intel/intel_aub.h b/intel/intel_aub.h
> new file mode 100644
> index 0000000..a36fd53
> --- /dev/null
> +++ b/intel/intel_aub.h
> @@ -0,0 +1,123 @@
> +/*
> + * Copyright © 2010 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Eric Anholt <e...@anholt.net>
> + *
> + */
> +
> +/** @file intel_aub.h
> + *
> + * The AUB file is a file format used by Intel's internal simulation
> + * and other validation tools.  It can be used at various levels by a
> + * driver to input state to the simulated hardware or a replaying
> + * debugger.
> + *
> + * We choose to dump AUB files using the trace block format for ease
> + * of implementation -- dump out the blocks of memory as plain blobs
> + * and insert ring commands to execute the batchbuffer blob.
> + */
> +
> +#ifndef _INTEL_AUB_H
> +#define _INTEL_AUB_H
> +
> +#define AUB_MI_NOOP                  (0)
> +#define AUB_MI_BATCH_BUFFER_START    (0x31 << 23)
> +#define AUB_PIPE_CONTROL             (0x7a000002)
> +
> +/* DW0: instruction type. */
> +
> +#define CMD_AUB                      (7 << 29)
> +
> +#define CMD_AUB_HEADER               (CMD_AUB | (1 << 23) | (0x05 << 16))
> +/* DW1 */
> +# define AUB_HEADER_MAJOR_SHIFT              24
> +# define AUB_HEADER_MINOR_SHIFT              16
> +
> +#define CMD_AUB_TRACE_HEADER_BLOCK (CMD_AUB | (1 << 23) | (0x41 << 16))
> +#define CMD_AUB_DUMP_BMP           (CMD_AUB | (1 << 23) | (0x9e << 16))
> +
> +/* DW1 */
> +#define AUB_TRACE_OPERATION_MASK     0x000000ff
> +#define AUB_TRACE_OP_COMMENT         0x00000000
> +#define AUB_TRACE_OP_DATA_WRITE              0x00000001
> +#define AUB_TRACE_OP_COMMAND_WRITE   0x00000002
> +#define AUB_TRACE_OP_MMIO_WRITE              0x00000003
> +// operation = TRACE_DATA_WRITE, Type
> +#define AUB_TRACE_TYPE_MASK          0x0000ff00
> +#define AUB_TRACE_TYPE_NOTYPE                (0 << 8)
> +#define AUB_TRACE_TYPE_BATCH         (1 << 8)
> +#define AUB_TRACE_TYPE_VERTEX_BUFFER (5 << 8)
> +#define AUB_TRACE_TYPE_2D_MAP                (6 << 8)
> +#define AUB_TRACE_TYPE_CUBE_MAP              (7 << 8)
> +#define AUB_TRACE_TYPE_VOLUME_MAP    (9 << 8)
> +#define AUB_TRACE_TYPE_1D_MAP                (10 << 8)
> +#define AUB_TRACE_TYPE_CONSTANT_BUFFER       (11 << 8)
> +#define AUB_TRACE_TYPE_CONSTANT_URB  (12 << 8)
> +#define AUB_TRACE_TYPE_INDEX_BUFFER  (13 << 8)
> +#define AUB_TRACE_TYPE_GENERAL               (14 << 8)
> +#define AUB_TRACE_TYPE_SURFACE               (15 << 8)
> +
> +
> +// operation = TRACE_COMMAND_WRITE, Type =
> +#define AUB_TRACE_TYPE_RING_HWB              (1 << 8)
> +#define AUB_TRACE_TYPE_RING_PRB0     (2 << 8)
> +#define AUB_TRACE_TYPE_RING_PRB1     (3 << 8)
> +#define AUB_TRACE_TYPE_RING_PRB2     (4 << 8)
> +
> +// Address space
> +#define AUB_TRACE_ADDRESS_SPACE_MASK 0x00ff0000
> +#define AUB_TRACE_MEMTYPE_GTT                (0 << 16)
> +#define AUB_TRACE_MEMTYPE_LOCAL              (1 << 16)
> +#define AUB_TRACE_MEMTYPE_NONLOCAL   (2 << 16)
> +#define AUB_TRACE_MEMTYPE_PCI                (3 << 16)
> +#define AUB_TRACE_MEMTYPE_GTT_ENTRY     (4 << 16)
> +
> +/* DW2 */
> +// operation = TRACE_DATA_WRITE, Type = TRACE_DATA_WRITE_GENERAL_STATE
> +#define AUB_TRACE_GENERAL_STATE_MASK 0x000000ff
> +
> +#define AUB_TRACE_VS_STATE           0x00000001
> +#define AUB_TRACE_GS_STATE           0x00000002
> +#define AUB_TRACE_CL_STATE           0x00000003
> +#define AUB_TRACE_SF_STATE           0x00000004
> +#define AUB_TRACE_WM_STATE           0x00000005
> +#define AUB_TRACE_CC_STATE           0x00000006
> +#define AUB_TRACE_CL_VP                      0x00000007
> +#define AUB_TRACE_SF_VP                      0x00000008
> +#define AUB_TRACE_CC_VP                      0x00000009
> +#define AUB_TRACE_SAMPLER_STATE              0x0000000a
> +#define AUB_TRACE_KERNEL             0x0000000b
> +#define AUB_TRACE_SCRATCH            0x0000000c
> +#define AUB_TRACE_SDC                        0x0000000d
> +#define AUB_TRACE_BLEND_STATE                0x00000016
> +#define AUB_TRACE_DEPTH_STENCIL_STATE        0x00000017
> +
> +// operation = TRACE_DATA_WRITE, Type = TRACE_DATA_WRITE_SURFACE_STATE
> +#define AUB_TRACE_SURFACE_STATE_MASK 0x00000ff00
> +#define AUB_TRACE_BINDING_TABLE              0x000000100
> +#define AUB_TRACE_SURFACE_STATE              0x000000200
> +
> +/* DW3: address */
> +/* DW4: len */
> +
> +#endif /* _INTEL_AUB_H */
> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
> index 8036031..fa6f2b8 100644
> --- a/intel/intel_bufmgr.h
> +++ b/intel/intel_bufmgr.h
> @@ -36,6 +36,7 @@
>  
>  #include <stdio.h>
>  #include <stdint.h>
> +#include <stdio.h>
>  
>  struct drm_clip_rect;
>  
> @@ -84,6 +85,13 @@ struct _drm_intel_bo {
>       int handle;
>  };
>  
> +enum aub_dump_bmp_format {
> +     AUB_DUMP_BMP_FORMAT_8BIT = 1,
> +     AUB_DUMP_BMP_FORMAT_ARGB_4444 = 4,
> +     AUB_DUMP_BMP_FORMAT_ARGB_0888 = 6,
> +     AUB_DUMP_BMP_FORMAT_ARGB_8888 = 7,
> +};
> +
>  #define BO_ALLOC_FOR_RENDER (1<<0)
>  
>  drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
> @@ -154,6 +162,12 @@ int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo);
>  void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start);
>  void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable);
>  
> +void drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable);
> +void drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
> +                                int x1, int y1, int width, int height,
> +                                enum aub_dump_bmp_format format,
> +                                int pitch, int offset);
> +
>  int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id);
>  
>  int drm_intel_get_aperture_sizes(int fd, size_t *mappable, size_t *total);
> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> index ba38e50..45585f7 100644
> --- a/intel/intel_bufmgr_gem.c
> +++ b/intel/intel_bufmgr_gem.c
> @@ -58,6 +58,7 @@
>  #include "intel_bufmgr.h"
>  #include "intel_bufmgr_priv.h"
>  #include "intel_chipset.h"
> +#include "intel_aub.h"
>  #include "string.h"
>  
>  #include "i915_drm.h"
> @@ -121,6 +122,9 @@ typedef struct _drm_intel_bufmgr_gem {
>       unsigned int bo_reuse : 1;
>       unsigned int no_exec : 1;
>       bool fenced_relocs;
> +
> +     FILE *aub_file;
> +     uint32_t aub_offset;
>  } drm_intel_bufmgr_gem;
>  
>  #define DRM_INTEL_RELOC_FENCE (1<<0)
> @@ -215,6 +219,8 @@ struct _drm_intel_bo_gem {
>  
>       /** Flags that we may need to do the SW_FINSIH ioctl on unmap. */
>       bool mapped_cpu_write;
> +
> +     uint32_t aub_offset;
>  };
>  
>  static unsigned int
> @@ -1715,6 +1721,247 @@ drm_intel_update_buffer_offsets2 
> (drm_intel_bufmgr_gem *bufmgr_gem)
>       }
>  }
>  
> +static void
> +aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data)
> +{
> +     fwrite(&data, 1, 4, bufmgr_gem->aub_file);
> +}
> +
> +static void
> +aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size)
> +{
> +     fwrite(data, 1, size, bufmgr_gem->aub_file);
> +}
> +
> +static void
> +aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size)
> +{
> +     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +     uint32_t *data;
> +     unsigned int i;
> +
> +     data = malloc(bo->size);
> +     drm_intel_bo_get_subdata(bo, offset, size, data);
> +
> +     /* Easy mode: write out bo with no relocations */
> +     if (!bo_gem->reloc_count) {
> +             aub_out_data(bufmgr_gem, data, size);
> +             free(data);
> +             return;
> +     }
> +
> +     /* Otherwise, handle the relocations while writing. */
> +     for (i = 0; i < size / 4; i++) {
> +             int r;
> +             for (r = 0; r < bo_gem->reloc_count; r++) {
> +                     struct drm_i915_gem_relocation_entry *reloc;
> +                     drm_intel_reloc_target *info;
> +
> +                     reloc = &bo_gem->relocs[r];
> +                     info = &bo_gem->reloc_target_info[r];
> +
> +                     if (reloc->offset == offset + i * 4) {
> +                             drm_intel_bo_gem *target_gem;
> +                             uint32_t val;
> +
> +                             target_gem = (drm_intel_bo_gem *)info->bo;
> +
> +                             val = reloc->delta;
> +                             val += target_gem->aub_offset;
> +
> +                             aub_out(bufmgr_gem, val);
> +                             data[i] = val;
> +                             break;
> +                     }
> +             }
> +             if (r == bo_gem->reloc_count) {
> +                     /* no relocation, just the data */
> +                     aub_out(bufmgr_gem, data[i]);
> +             }
> +     }
> +
> +     free(data);
> +}
> +
> +static void
> +aub_bo_get_address(drm_intel_bo *bo)
> +{
> +     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +
> +     /* Give the object a graphics address in the AUB file.  We
> +      * don't just use the GEM object address because we do AUB
> +      * dumping before execution -- we want to successfully log
> +      * when the hardware might hang, and we might even want to aub
> +      * capture for a driver trying to execute on a different
> +      * generation of hardware by disabling the actual kernel exec
> +      * call.
> +      */
> +     bo_gem->aub_offset = bufmgr_gem->aub_offset;
> +     bufmgr_gem->aub_offset += bo->size;
> +     /* XXX: Handle aperture overflow. */
> +     assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024);
> +}
> +
> +static void
> +aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
> +                   uint32_t offset, uint32_t size)
> +{
> +     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +
> +     aub_out(bufmgr_gem,
> +             CMD_AUB_TRACE_HEADER_BLOCK |
> +             (5 - 2));
> +     aub_out(bufmgr_gem,
> +             AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE);
> +     aub_out(bufmgr_gem, subtype);
> +     aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
> +     aub_out(bufmgr_gem, size);
> +     aub_write_bo_data(bo, offset, size);
> +}
> +
> +static void
> +aub_write_bo(drm_intel_bo *bo)
> +{
> +     uint32_t block_size;
> +     uint32_t offset;
> +
> +     aub_bo_get_address(bo);
> +
> +     /* Break up large objects into multiple writes.  Otherwise a
> +      * 128kb VBO would overflow the 16 bits of size field in the
> +      * packet header and everything goes badly after that.
> +      */
> +     for (offset = 0; offset < bo->size; offset += block_size) {
> +             block_size = bo->size - offset;
> +
> +             if (block_size > 8 * 4096)
> +                     block_size = 8 * 4096;
> +
> +             aub_write_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0,
> +                                   offset, block_size);
> +     }
> +}
> +
> +/*
> + * Make a ringbuffer on fly and dump it
> + */
> +static void
> +aub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem,
> +                       uint32_t batch_buffer, int ring_flag)
> +{
> +     uint32_t ringbuffer[4096];
> +     int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */
> +     int ring_count = 0;
> +
> +     if (ring_flag == I915_EXEC_BSD)
> +             ring = AUB_TRACE_TYPE_RING_PRB1;
> +
> +     /* Make a ring buffer to execute our batchbuffer. */
> +     memset(ringbuffer, 0, sizeof(ringbuffer));
> +     ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START;
> +     ringbuffer[ring_count++] = batch_buffer;
> +
> +     /* Write out the ring.  This appears to trigger execution of
> +      * the ring in the simulator.
> +      */
> +     aub_out(bufmgr_gem,
> +             CMD_AUB_TRACE_HEADER_BLOCK |
> +             (5 - 2));
> +     aub_out(bufmgr_gem,
> +             AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE);
> +     aub_out(bufmgr_gem, 0); /* general/surface subtype */
> +     aub_out(bufmgr_gem, bufmgr_gem->aub_offset);
> +     aub_out(bufmgr_gem, ring_count * 4);
> +
> +     /* FIXME: Need some flush operations here? */
> +     aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4);
> +
> +     /* Update offset pointer */
> +     bufmgr_gem->aub_offset += 4096;
> +}
> +
> +void
> +drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
> +                           int x1, int y1, int width, int height,
> +                           enum aub_dump_bmp_format format,
> +                           int pitch, int offset)
> +{
> +     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
> +     uint32_t cpp;
> +
> +     switch (format) {
> +     case AUB_DUMP_BMP_FORMAT_8BIT:
> +             cpp = 1;
> +             break;
> +     case AUB_DUMP_BMP_FORMAT_ARGB_4444:
> +             cpp = 2;
> +             break;
> +     case AUB_DUMP_BMP_FORMAT_ARGB_0888:
> +     case AUB_DUMP_BMP_FORMAT_ARGB_8888:
> +             cpp = 4;
> +             break;
> +     default:
> +             printf("Unknown AUB dump format %d\n", format);
> +             return;
> +     }
> +
> +     if (!bufmgr_gem->aub_file)
> +             return;
> +
> +     aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4);
> +     aub_out(bufmgr_gem, (y1 << 16) | x1);
> +     aub_out(bufmgr_gem,
> +             (format << 24) |
> +             (cpp << 19) |
> +             pitch / 4);
> +     aub_out(bufmgr_gem, (height << 16) | width);
> +     aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
> +     aub_out(bufmgr_gem,
> +             ((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) |
> +             ((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0));
> +}
> +
> +static void
> +aub_exec(drm_intel_bo *bo, int ring_flag, int used)
> +{
> +     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +     int i;
> +
> +     if (!bufmgr_gem->aub_file)
> +             return;
> +
> +     /* Write out all but the batchbuffer to AUB memory */
> +     for (i = 0; i < bufmgr_gem->exec_count - 1; i++) {
> +             if (bufmgr_gem->exec_bos[i] != bo)
> +                     aub_write_bo(bufmgr_gem->exec_bos[i]);
> +     }
> +
> +     aub_bo_get_address(bo);
> +
> +     /* Dump the batchbuffer. */
> +     aub_write_trace_block(bo, AUB_TRACE_TYPE_BATCH, 0,
> +                           0, used);
> +     aub_write_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0,
> +                           used, bo->size - used);
> +
> +     /* Dump ring buffer */
> +     aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag);
> +
> +     fflush(bufmgr_gem->aub_file);
> +
> +     /*
> +      * One frame has been dumped. So reset the aub_offset for the next 
> frame.
> +      *
> +      * FIXME: Can we do this?
> +      */
> +     bufmgr_gem->aub_offset = 0x10000;
> +}
> +
>  static int
>  drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
>                     drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
> @@ -1830,6 +2077,8 @@ drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
>       execbuf.rsvd1 = 0;
>       execbuf.rsvd2 = 0;
>  
> +     aub_exec(bo, flags, used);
> +
>       if (bufmgr_gem->no_exec)
>               goto skip_execution;
>  
> @@ -2360,6 +2609,72 @@ drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr 
> *bufmgr)
>  }
>  
>  /**
> + * Sets up AUB dumping.
> + *
> + * This is a trace file format that can be used with the simulator.
> + * Packets are emitted in a format somewhat like GPU command packets.
> + * You can set up a GTT and upload your objects into the referenced
> + * space, then send off batchbuffers and get BMPs out the other end.
> + */
> +void
> +drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable)
> +{
> +     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
> +     int entry = 0x200003;
> +     int i;
> +     int gtt_size = 0x10000;
> +
> +     if (!enable) {
> +             if (bufmgr_gem->aub_file) {
> +                     fclose(bufmgr_gem->aub_file);
> +                     bufmgr_gem->aub_file = NULL;
> +             }
> +     }
> +
> +     if (geteuid() != getuid())
> +             return;
> +
> +     bufmgr_gem->aub_file = fopen("intel.aub", "w+");

I guess it's would be better that we can name the aub dump file
according to the program we are tracing, like if we run:
 $ INTEL_DEBUG=aub glxgears

I guess it would be good if we get a glxgears.aub but not intel.aub.
Otherwise, it would override the former one we dumped. It's somehow a
little un-convenient.


> +     if (!bufmgr_gem->aub_file)
> +             return;
> +
> +     /* Start allocating objects from just after the GTT. */
> +     bufmgr_gem->aub_offset = gtt_size;
> +
> +     /* Start with a (required) version packet. */
> +     aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2));
> +     aub_out(bufmgr_gem,
> +             (4 << AUB_HEADER_MAJOR_SHIFT) |
> +             (0 << AUB_HEADER_MINOR_SHIFT));
> +     for (i = 0; i < 8; i++) {
> +             aub_out(bufmgr_gem, 0); /* app name */
> +     }
> +     aub_out(bufmgr_gem, 0); /* timestamp */
> +     aub_out(bufmgr_gem, 0); /* timestamp */
> +     aub_out(bufmgr_gem, 0); /* comment len */
> +
> +     /* Set up the GTT. The max we can handle is 256M */
> +     aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | (5 - 2));
> +     aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | 
> AUB_TRACE_OP_DATA_WRITE);
> +     aub_out(bufmgr_gem, 0); /* subtype */
> +     aub_out(bufmgr_gem, 0); /* offset */
> +     aub_out(bufmgr_gem, gtt_size); /* size */
> +     for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) {
> +             aub_out(bufmgr_gem, entry);
> +     }
> +
> +     /* MI_FLUSH enable */
> +     if (bufmgr_gem->gen >= 6) {
> +             aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | (5 - 2));
> +             aub_out(bufmgr_gem, AUB_TRACE_OP_MMIO_WRITE);
> +             aub_out(bufmgr_gem, 0);
> +             aub_out(bufmgr_gem, 0x209c);  /* reg addr */
> +             aub_out(bufmgr_gem, 0x04);    /* Length in byte */
> +             aub_out(bufmgr_gem, ((1 << 12) << 16) | (1 << 12));
> +     }

Zhenyu and me came to an agreement that it's the driver side to do the
MI_FLUSH enable stuff. Since using MI_FLUSH is deprecated, and if you still
use it, it is your job to set up the MI_FLUSH enable bit.

> +}
> +
> +/**
>   * Initializes the GEM buffer manager, which uses the kernel to allocate, 
> map,
>   * and manage map buffer objections.
>   *
> -- 
> 1.7.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to