Test description suggested that all platforms were testing qword writes,
while in fact only gen4-gen5 did.

v2: Test dword/qword writes for all available platforms.
v3: Rewrite, drop libdrm/intel_batchbuffer dependencies,
    drop brw_emit_post_sync_nonzero_flush WA for gen6/gen7,
    drop WC_FLUSH/TC_FLUSH on gen4/gen5,
    drop preuse tests, use gem_wait instead of set_domain.
v4: Back to preuse, do not use gem_write.

Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Signed-off-by: Michał Winiarski <michal.winiar...@intel.com>
---
 tests/gem_pipe_control_store_loop.c | 290 +++++++++++++++++++++---------------
 1 file changed, 173 insertions(+), 117 deletions(-)

diff --git a/tests/gem_pipe_control_store_loop.c 
b/tests/gem_pipe_control_store_loop.c
index a155ad1..3cf4b31 100644
--- a/tests/gem_pipe_control_store_loop.c
+++ b/tests/gem_pipe_control_store_loop.c
@@ -26,10 +26,10 @@
  */
 
 /*
- * Testcase: (TLB-)Coherency of pipe_control QW writes
+ * Testcase: (TLB-)Coherency of pipe_control writes
  *
- * Writes a counter-value into an always newly allocated target bo (by 
disabling
- * buffer reuse). Decently trashes on tlb inconsistencies, too.
+ * Writes a counter-value into target bo.
+ * Decently trashes on tlb inconsistencies, too.
  */
 #include "igt.h"
 #include <stdlib.h>
@@ -43,11 +43,11 @@
 #include "drm.h"
 #include "intel_bufmgr.h"
 
-IGT_TEST_DESCRIPTION("Test (TLB-)Coherency of pipe_control QW writes.");
+IGT_TEST_DESCRIPTION("Test (TLB-)Coherency of pipe_control writes.");
 
-static drm_intel_bufmgr *bufmgr;
-struct intel_batchbuffer *batch;
 uint32_t devid;
+int gen;
+int fd;
 
 #define GFX_OP_PIPE_CONTROL    ((0x3<<29)|(0x3<<27)|(0x2<<24)|2)
 #define   PIPE_CONTROL_WRITE_IMMEDIATE (1<<14)
@@ -60,134 +60,190 @@ uint32_t devid;
 #define   PIPE_CONTROL_CS_STALL        (1<<20)
 #define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
 
-/* Like the store dword test, but we create new command buffers each time */
+#define TEST_STORE_LOOP_BUFFER_REUSED  (1 << 0)
+#define TEST_STORE_LOOP_QWORD_WRITE    (1 << 1)
+#define TEST_STORE_LOOP_ALL_FLAGS      (TEST_STORE_LOOP_BUFFER_REUSED | \
+                                        TEST_STORE_LOOP_QWORD_WRITE)
+
+static uint64_t
+preuse(uint32_t buf_handle)
+{
+       int i = 0;
+       uint32_t batch_handle;
+       uint32_t *batch;
+       struct drm_i915_gem_relocation_entry reloc;
+       struct drm_i915_gem_execbuffer2 execbuf;
+       struct drm_i915_gem_exec_object2 object[2];
+
+       batch_handle = gem_create(fd, 4096);
+       batch = gem_mmap__cpu(fd, batch_handle, 0, 4096, PROT_READ | 
PROT_WRITE);
+
+       batch[i++] = XY_COLOR_BLT_CMD_NOLEN |
+                 COLOR_BLT_WRITE_ALPHA |
+                 XY_SRC_COPY_BLT_WRITE_RGB;
+       if (gen >= 8)
+               batch[i - 1] |= 5;
+       else
+               batch[i - 1] |= 4;
+
+       batch[i++] = ((3 << 24) | (0xf0 << 16) | 64);
+       batch[i++] = 0; /* dst x1,y1 */
+       batch[i++] = (1 << 16 | 1); /* dst x2,y2 */
+       batch[i++] = 0; /* reloc */
+       if (gen >= 8)
+               batch[i++] = 0; /* reloc_high */
+       batch[i++] = 0xdeadbeef;
+       batch[i++] = MI_BATCH_BUFFER_END;
+
+       memset(&object, 0, sizeof(object));
+       memset(&reloc, 0, sizeof(reloc));
+       memset(&execbuf, 0, sizeof(execbuf));
+
+       reloc.target_handle = buf_handle;
+       reloc.delta = 0;
+       reloc.offset = 4 * sizeof(batch[0]);
+       reloc.presumed_offset = 0;
+       reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+       reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+
+       object[0].handle = buf_handle;
+       object[1].handle = batch_handle;
+       object[1].relocation_count = 1;
+       object[1].relocs_ptr = (uintptr_t)&reloc;
+
+       if (gen >= 6)
+               execbuf.flags = I915_EXEC_BLT;
+       execbuf.buffers_ptr = (uintptr_t)object;
+       execbuf.buffer_count = 2;
+       gem_execbuf(fd, &execbuf);
+
+       munmap(batch, 4096);
+       gem_close(fd, batch_handle);
+
+       return reloc.presumed_offset;
+}
+
 static void
-store_pipe_control_loop(bool preuse_buffer)
+store_pipe_control_loop(uint32_t flags)
 {
-       int i, val = 0;
+       const bool preuse_buffer = flags & TEST_STORE_LOOP_BUFFER_REUSED;
+       const bool qword_write = flags & TEST_STORE_LOOP_QWORD_WRITE;
+
+       int val, i;
+       uint32_t reloc_offset;
+       uint64_t presumed_offset;
+       uint32_t batch_handle;
+       uint32_t *batch;
+       uint32_t buf_handle;
        uint32_t *buf;
-       drm_intel_bo *target_bo;
-
-       for (i = 0; i < SLOW_QUICK(0x10000, 4); i++) {
-               /* we want to check tlb consistency of the pipe_control target,
-                * so get a new buffer every time around */
-               target_bo = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096);
-               igt_assert(target_bo);
-
-               if (preuse_buffer) {
-                       COLOR_BLIT_COPY_BATCH_START(0);
-                       OUT_BATCH((3 << 24) | (0xf0 << 16) | 64);
-                       OUT_BATCH(0);
-                       OUT_BATCH(1 << 16 | 1);
-
-                       /*
-                        * IMPORTANT: We need to preuse the buffer in a
-                        * different domain than what the pipe control write
-                        * (and kernel wa) uses!
-                        */
-                       OUT_RELOC_FENCED(target_bo,
-                            I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                            0);
-                       OUT_BATCH(0xdeadbeef);
-                       ADVANCE_BATCH();
-
-                       intel_batchbuffer_flush(batch);
-               }
-
-               /* gem_storedw_batches_loop.c is a bit overenthusiastic with
-                * creating new batchbuffers - with buffer reuse disabled, the
-                * support code will do that for us. */
-               if (batch->gen >= 8) {
-                       BEGIN_BATCH(4, 1);
-                       OUT_BATCH(GFX_OP_PIPE_CONTROL + 1);
-                       OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
-                       OUT_RELOC_FENCED(target_bo,
-                            I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION,
-                            PIPE_CONTROL_GLOBAL_GTT);
-                       OUT_BATCH(val); /* write data */
-                       ADVANCE_BATCH();
-
-               } else if (batch->gen >= 6) {
-                       /* work-around hw issue, see 
intel_emit_post_sync_nonzero_flush
-                        * in mesa sources. */
-                       BEGIN_BATCH(4, 1);
-                       OUT_BATCH(GFX_OP_PIPE_CONTROL);
-                       OUT_BATCH(PIPE_CONTROL_CS_STALL |
-                            PIPE_CONTROL_STALL_AT_SCOREBOARD);
-                       OUT_BATCH(0); /* address */
-                       OUT_BATCH(0); /* write data */
-                       ADVANCE_BATCH();
-
-                       BEGIN_BATCH(4, 1);
-                       OUT_BATCH(GFX_OP_PIPE_CONTROL);
-                       OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
-                       OUT_RELOC(target_bo,
-                            I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION, 
-                            PIPE_CONTROL_GLOBAL_GTT);
-                       OUT_BATCH(val); /* write data */
-                       ADVANCE_BATCH();
-               } else if (batch->gen >= 4) {
-                       BEGIN_BATCH(4, 1);
-                       OUT_BATCH(GFX_OP_PIPE_CONTROL | PIPE_CONTROL_WC_FLUSH |
-                                       PIPE_CONTROL_TC_FLUSH |
-                                       PIPE_CONTROL_WRITE_IMMEDIATE | 2);
-                       OUT_RELOC(target_bo,
-                               I915_GEM_DOMAIN_INSTRUCTION, 
I915_GEM_DOMAIN_INSTRUCTION,
-                               PIPE_CONTROL_GLOBAL_GTT);
-                       OUT_BATCH(val);
-                       OUT_BATCH(0xdeadbeef);
-                       ADVANCE_BATCH();
+       struct drm_i915_gem_relocation_entry reloc;
+       struct drm_i915_gem_execbuffer2 execbuf;
+       struct drm_i915_gem_exec_object2 object[2];
+
+       /* no dword writes on gen4/gen5 and gen9+ */
+       if (!qword_write)
+               igt_skip_on(gen < 6 || gen > 8);
+
+       batch_handle = gem_create(fd, 4096);
+       batch = gem_mmap__cpu(fd, batch_handle, 0, 4096, PROT_READ | 
PROT_WRITE);
+
+       for (val = 0; val < SLOW_QUICK(0x10000, 4); val++) {
+               i = 0;
+               buf_handle = gem_create(fd, 4096);
+
+               buf = gem_mmap__cpu(fd, buf_handle, 0, 4096, PROT_READ | 
PROT_WRITE);
+               gem_set_domain(fd, buf_handle, I915_GEM_DOMAIN_CPU, 
I915_GEM_DOMAIN_CPU);
+               buf[0] = 0xdeadbeef;
+               buf[1] = 0xdeadbeef;
+               if (preuse_buffer)
+                       presumed_offset = preuse(buf_handle);
+               else
+                       presumed_offset = 0;
+
+               if (gen == 6)
+                       presumed_offset |= PIPE_CONTROL_GLOBAL_GTT;
+
+               if (gen >= 6) {
+                       batch[i++] = GFX_OP_PIPE_CONTROL + (gen >= 8) + 
qword_write;
+                       batch[i++] = PIPE_CONTROL_WRITE_IMMEDIATE;
+                       batch[i++] = (uint32_t)presumed_offset; /* reloc */
+                       reloc_offset = i - 1;
+                       if (gen >= 8)
+                               batch[i++] = (uint32_t)(presumed_offset >> 32); 
/* reloc_high */
+               } else {
+                       /* qword write */
+                       batch[i++] = (GFX_OP_PIPE_CONTROL |
+                                       PIPE_CONTROL_WRITE_IMMEDIATE);
+                       batch[i++] = (uint32_t)presumed_offset; /* reloc */
+                       reloc_offset = i - 1;
                }
 
-               intel_batchbuffer_flush_on_ring(batch, 0);
-
-               drm_intel_bo_map(target_bo, 1);
-
-               buf = target_bo->virtual;
-               igt_assert(buf[0] == val);
-
-               drm_intel_bo_unmap(target_bo);
-               /* Make doublesure that this buffer won't get reused. */
-               drm_intel_bo_disable_reuse(target_bo);
-               drm_intel_bo_unreference(target_bo);
-
-               val++;
+               batch[i++] = val; /* write data */
+               if (qword_write)
+                       batch[i++] = ~val; /* dword_high */
+               else
+                       batch[i++] = MI_NOOP | 0xabcd;
+               batch[i++] = MI_BATCH_BUFFER_END;
+
+               memset(object, 0, sizeof(object));
+               memset(&reloc, 0, sizeof(reloc));
+               memset(&execbuf, 0, sizeof(execbuf));
+
+               reloc.target_handle = buf_handle;
+               reloc.delta = 0;
+               reloc.offset = reloc_offset * sizeof(batch[0]);
+               reloc.presumed_offset = 0;
+               reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+               reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+
+               if (gen == 6)
+                       object[0].flags |= EXEC_OBJECT_NEEDS_GTT;
+               object[0].handle = buf_handle;
+               object[1].handle = batch_handle;
+               object[1].relocation_count = 1;
+               object[1].relocs_ptr = (uintptr_t)&reloc;
+
+               execbuf.buffers_ptr = (uintptr_t)object;
+               execbuf.buffer_count = 2;
+               gem_execbuf(fd, &execbuf);
+
+               gem_wait(fd, buf_handle, NULL);
+
+               igt_assert_eq_u32(buf[0], val);
+               if (qword_write)
+                       igt_assert_eq_u32(buf[1], ~val);
+               else
+                       igt_assert_eq_u32(buf[1], 0xdeadbeef);
+
+               munmap(buf, 4096);
+               gem_close(fd, buf_handle);
        }
-}
 
-int fd;
+       munmap(batch, 4096);
+       gem_close(fd, batch_handle);
+}
 
 igt_main
 {
        igt_fixture {
                fd = drm_open_driver(DRIVER_INTEL);
                devid = intel_get_drm_devid(fd);
+               gen = intel_gen(devid);
 
-               bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
-               igt_assert(bufmgr);
-
-               igt_skip_on(IS_GEN2(devid) || IS_GEN3(devid));
+               igt_skip_on(gen < 4);
                igt_skip_on(devid == PCI_CHIP_I965_G); /* has totally broken 
pipe control */
-
-               /* IMPORTANT: No call to
-                * drm_intel_bufmgr_gem_enable_reuse(bufmgr);
-                * here because we wan't to have fresh buffers (to trash the 
tlb)
-                * every time! */
-
-               batch = intel_batchbuffer_alloc(bufmgr, devid);
-               igt_assert(batch);
        }
 
-       igt_subtest("fresh-buffer")
-               store_pipe_control_loop(false);
-
-       igt_subtest("reused-buffer")
-               store_pipe_control_loop(true);
-
-       igt_fixture {
-               intel_batchbuffer_free(batch);
-               drm_intel_bufmgr_destroy(bufmgr);
+       for (uint32_t flags = 0; flags < TEST_STORE_LOOP_ALL_FLAGS + 1; 
flags++) {
+               igt_subtest_f("%sbuffer%s",
+                             flags & TEST_STORE_LOOP_BUFFER_REUSED ?
+                             "reused-" : "fresh-",
+                             flags & TEST_STORE_LOOP_QWORD_WRITE ?
+                             "-qword-write" : "") {
+                               store_pipe_control_loop(flags);
+               }
+       }
 
+       igt_fixture
                close(fd);
-       }
 }
-- 
2.8.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to