uxa: enable accelerate for uxa_copy and uxa_solid
     on gen6.

Signed-off-by: Zou Nan hai <nanhai....@intel.com>
---
 src/i830_reg.h          |    2 +
 src/intel_batchbuffer.c |   35 ++++++--
 src/intel_batchbuffer.h |   31 ++++++-
 src/intel_driver.c      |    3 +-
 src/intel_uxa.c         |  230 +++++++++++++++++++++++++++++++++++++++--------
 5 files changed, 248 insertions(+), 53 deletions(-)

diff --git a/src/i830_reg.h b/src/i830_reg.h
index 4080896..93d03cf 100644
--- a/src/i830_reg.h
+++ b/src/i830_reg.h
@@ -32,6 +32,8 @@
 
 /* Flush */
 #define MI_FLUSH                       (0x04<<23)
+#define MI_FLUSH_DW                    (0x26<<23)
+
 #define MI_WRITE_DIRTY_STATE           (1<<4)
 #define MI_END_SCENE                   (1<<3)
 #define MI_GLOBAL_SNAPSHOT_COUNT_RESET (1<<3)
diff --git a/src/intel_batchbuffer.c b/src/intel_batchbuffer.c
index 3b72ba1..cde086b 100644
--- a/src/intel_batchbuffer.c
+++ b/src/intel_batchbuffer.c
@@ -171,6 +171,12 @@ void intel_batch_emit_flush(ScrnInfoPtr scrn, int 
batch_idx)
 
                intel_batch_do_flush(scrn, batch_idx);
        } else {
+               BEGIN_BATCH_BLT(4);
+               OUT_BATCH_BLT(MI_FLUSH_DW | 2);
+               OUT_BATCH_BLT(0);
+               OUT_BATCH_BLT(0);
+               OUT_BATCH_BLT(0);
+               ADVANCE_BATCH_BLT();
        }
 }
 
@@ -193,13 +199,22 @@ void intel_batch_submit(ScrnInfoPtr scrn, int flush, int 
batch_idx)
 
        if (batch->batch_used == 0)
                return;
+       
+       if (batch_idx == RENDER_BATCH) {
+               /* Mark the end of the batchbuffer. */
+               OUT_BATCH(MI_BATCH_BUFFER_END);
+               /* Emit a padding dword if we aren't going to be quad-word 
aligned. */
+               if (batch->batch_used & 1)
+                       OUT_BATCH(MI_NOOP);
+       } else {
+               /* Mark the end of the batchbuffer. */
+               OUT_BATCH_BLT(MI_BATCH_BUFFER_END);
+               /* Emit a padding dword if we aren't going to be quad-word 
aligned. */
+               if (batch->batch_used & 1)
+                       OUT_BATCH_BLT(MI_NOOP);
+       }
 
-       /* Mark the end of the batchbuffer. */
-       OUT_BATCH(MI_BATCH_BUFFER_END);
-       /* Emit a padding dword if we aren't going to be quad-word aligned. */
-       if (batch->batch_used & 1)
-               OUT_BATCH(MI_NOOP);
-
+       
        if (DUMP_BATCHBUFFERS) {
            FILE *file = fopen(DUMP_BATCHBUFFERS, "a");
            if (file) {
@@ -211,9 +226,13 @@ void intel_batch_submit(ScrnInfoPtr scrn, int flush, int 
batch_idx)
        ret = dri_bo_subdata(batch->batch_bo, 0, batch->batch_used*4, 
batch->batch_ptr);
        if (ret == 0) {
                if (batch_idx == RENDER_BATCH) {
-                       ret = dri_bo_exec(batch->batch_bo, batch->batch_used*4,
-                                       NULL, 0, 0xffffffff);
+                        ret = drm_intel_bo_mrb_exec(batch->batch_bo, 
+                                        batch->batch_used*4,
+                                        NULL, 0, 0xffffffff, I915_EXEC_RENDER);
                } else {
+                        ret = drm_intel_bo_mrb_exec(batch->batch_bo, 
+                                        batch->batch_used*4,
+                                        NULL, 0, 0xffffffff, I915_EXEC_BLIT);
                }
        }
        if (ret != 0) {
diff --git a/src/intel_batchbuffer.h b/src/intel_batchbuffer.h
index 1ed3ad8..6d1ee15 100644
--- a/src/intel_batchbuffer.h
+++ b/src/intel_batchbuffer.h
@@ -156,20 +156,35 @@ intel_batch_emit_reloc_pixmap(intel_screen_private 
*intel, PixmapPtr pixmap,
 }
 
 #define ALIGN_BATCH(align) intel_batch_align(intel, align, RENDER_BATCH);
+#define ALIGN_BATCH_BLT(align) intel_batch_align(intel, align, BLT_BATCH);
+
 #define OUT_BATCH(dword) intel_batch_emit_dword(intel, dword, RENDER_BATCH)
+#define OUT_BATCH_BLT(dword) intel_batch_emit_dword(intel, dword, BLT_BATCH)
 
 #define OUT_RELOC(bo, read_domains, write_domains, delta) \
        intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 
0,RENDER_BATCH)
 
+#define OUT_RELOC_BLT(bo, read_domains, write_domains, delta) \
+       intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 
0,BLT_BATCH)
+
 #define OUT_RELOC_FENCED(bo, read_domains, write_domains, delta) \
        intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 
1,RENDER_BATCH)
 
+#define OUT_RELOC_FENCED_BLT(bo, read_domains, write_domains, delta) \
+       intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 
1,BLT_BATCH)
+
 #define OUT_RELOC_PIXMAP(pixmap, reads, write, delta)  \
        intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 0, 
RENDER_BATCH)
 
+#define OUT_RELOC_PIXMAP_BLT(pixmap, reads, write, delta)      \
+       intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 0, 
BLT_BATCH)
+
 #define OUT_RELOC_PIXMAP_FENCED(pixmap, reads, write, delta)   \
        intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 1, 
RENDER_BATCH)
 
+#define OUT_RELOC_PIXMAP_FENCED_BLT(pixmap, reads, write, delta)       \
+       intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 1, 
BLT_BATCH)
+
 union intfloat {
        float f;
        unsigned int ui;
@@ -181,20 +196,23 @@ union intfloat {
        OUT_BATCH(tmp.ui);                      \
 } while(0)
 
-#define BEGIN_BATCH(n)                                                 \
+#define __BEGIN_BATCH(n,batch_idx)                                     \
 do {                                                                   \
-       struct batch *batch = &intel->batch[RENDER_BATCH];              \
+       struct batch *batch = &intel->batch[batch_idx];                 \
        if (batch->batch_emitting != 0)                                 \
                FatalError("%s: BEGIN_BATCH called without closing "    \
                           "ADVANCE_BATCH\n", __FUNCTION__);            \
        assert(!batch->in_batch_atomic);                                \
-       intel_batch_require_space(scrn, intel, (n) * 4, RENDER_BATCH);  \
+       intel_batch_require_space(scrn, intel, (n) * 4, batch_idx);     \
        batch->batch_emitting = (n);                                    \
        batch->batch_emit_start = batch->batch_used;                    \
 } while (0)
 
-#define ADVANCE_BATCH() do {                                           \
-       struct batch *batch = &intel->batch[RENDER_BATCH];              \
+#define BEGIN_BATCH(n)         __BEGIN_BATCH(n,RENDER_BATCH)
+#define BEGIN_BATCH_BLT(n)     __BEGIN_BATCH(n,BLT_BATCH)
+
+#define __ADVANCE_BATCH(batch_idx) do {                                        
\
+       struct batch *batch = &intel->batch[batch_idx];                 \
        if (batch->batch_emitting == 0)                                 \
                FatalError("%s: ADVANCE_BATCH called with no matching " \
                           "BEGIN_BATCH\n", __FUNCTION__);              \
@@ -213,6 +231,9 @@ do {                                                        
                \
        batch->batch_emitting = 0;                                      \
 } while (0)
 
+#define ADVANCE_BATCH(batch_idx) __ADVANCE_BATCH(RENDER_BATCH)
+#define ADVANCE_BATCH_BLT(batch_idx) __ADVANCE_BATCH(BLT_BATCH)
+
 void intel_next_vertex(intel_screen_private *intel);
 static inline void intel_vertex_emit(intel_screen_private *intel, float v)
 {
diff --git a/src/intel_driver.c b/src/intel_driver.c
index b9fb69d..051497d 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -581,8 +581,6 @@ static Bool I830PreInit(ScrnInfoPtr scrn, int flags)
        }
 
        intel->use_shadow = FALSE;
-       if (IS_GEN6(intel))
-               intel->use_shadow = TRUE;
 
        if (xf86IsOptionSet(intel->Options, OPTION_SHADOW)) {
                intel->use_shadow =
@@ -809,6 +807,7 @@ intel_flush_callback(CallbackListPtr *list,
                intel_batch_submit(scrn,
                                   intel->batch[RENDER_BATCH].need_mi_flush 
                                   
||!list_is_empty(&intel->batch[RENDER_BATCH].flush_pixmaps), RENDER_BATCH);
+
        }
 }
 
diff --git a/src/intel_uxa.c b/src/intel_uxa.c
index 05ac3d2..cbd87ca 100644
--- a/src/intel_uxa.c
+++ b/src/intel_uxa.c
@@ -208,17 +208,9 @@ intel_uxa_pixmap_compute_size(PixmapPtr pixmap,
 }
 
 static Bool
-i830_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask)
+generic_uxa_check_solid(DrawablePtr drawable, int alu, Pixel planemask)
 {
        ScrnInfoPtr scrn = xf86Screens[drawable->pScreen->myNum];
-       intel_screen_private *intel = intel_get_screen_private(scrn);
-
-       if (IS_GEN6(intel)) {
-               intel_debug_fallback(scrn,
-                                    "Sandybridge BLT engine not supported\n");
-               return FALSE;
-       }
-
        if (!UXA_PM_IS_SOLID(drawable, planemask)) {
                intel_debug_fallback(scrn, "planemask is not solid\n");
                return FALSE;
@@ -232,7 +224,6 @@ i830_uxa_check_solid(DrawablePtr drawable, int alu, Pixel 
planemask)
        default:
                return FALSE;
        }
-
        return TRUE;
 }
 
@@ -240,7 +231,7 @@ i830_uxa_check_solid(DrawablePtr drawable, int alu, Pixel 
planemask)
  * Sets up hardware state for a series of solid fills.
  */
 static Bool
-i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg)
+generic_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel planemask, Pixel fg)
 {
        ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
        intel_screen_private *intel = intel_get_screen_private(scrn);
@@ -252,10 +243,17 @@ i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel 
planemask, Pixel fg)
        if (!intel_check_pitch_2d(pixmap))
                return FALSE;
 
-       if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table), 
-                               RENDER_BATCH))
-               return FALSE;
-
+       if (IS_GEN6(intel)) {
+               if (!intel_get_aperture_space(scrn, bo_table, 
+                                       ARRAY_SIZE(bo_table), 
+                                       BLT_BATCH))
+                       return FALSE;
+       } else {
+               if (!intel_get_aperture_space(scrn, bo_table, 
+                                       ARRAY_SIZE(bo_table), 
+                                       RENDER_BATCH))
+                       return FALSE;
+       }
        intel->BR[13] = (I830PatternROP[alu] & 0xff) << 16;
        switch (pixmap->drawable.bitsPerPixel) {
        case 8:
@@ -274,6 +272,52 @@ i830_uxa_prepare_solid(PixmapPtr pixmap, int alu, Pixel 
planemask, Pixel fg)
        return TRUE;
 }
 
+static void gen6_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2)
+{
+       ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
+       intel_screen_private *intel = intel_get_screen_private(scrn);
+       unsigned long pitch;
+       uint32_t cmd;
+       if (x1 < 0)
+               x1 = 0;
+       if (y1 < 0)
+               y1 = 0;
+       if (x2 > pixmap->drawable.width)
+               x2 = pixmap->drawable.width;
+       if (y2 > pixmap->drawable.height)
+               y2 = pixmap->drawable.height;
+
+       if (x2 <= x1 || y2 <= y1)
+               return;
+
+       pitch = intel_pixmap_pitch(pixmap);
+       {
+               BEGIN_BATCH_BLT(6);
+
+               cmd = XY_COLOR_BLT_CMD;
+
+               if (pixmap->drawable.bitsPerPixel == 32)
+                       cmd |=
+                           XY_COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB;
+
+               if (intel_pixmap_tiled(pixmap)) {
+                       assert((pitch % 512) == 0);
+                       pitch >>= 2;
+                       cmd |= XY_COLOR_BLT_TILED;
+               }
+
+               OUT_BATCH_BLT(cmd);
+
+               OUT_BATCH_BLT(intel->BR[13] | pitch);
+               OUT_BATCH_BLT((y1 << 16) | (x1 & 0xffff));
+               OUT_BATCH_BLT((y2 << 16) | (x2 & 0xffff));
+               OUT_RELOC_PIXMAP_FENCED_BLT(pixmap, I915_GEM_DOMAIN_RENDER,
+                                       0, 0);
+               OUT_BATCH_BLT(intel->BR[16]);
+               ADVANCE_BATCH_BLT();
+       }
+}
+
 static void i830_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2)
 {
        ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
@@ -324,10 +368,15 @@ static void i830_uxa_solid(PixmapPtr pixmap, int x1, int 
y1, int x2, int y2)
        ironlake_blt_workaround(scrn);
 }
 
-static void i830_uxa_done_solid(PixmapPtr pixmap)
+static void gen6_uxa_done_solid(PixmapPtr pixmap)
 {
        ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
+       intel_batch_submit(scrn, FALSE, BLT_BATCH);
+}
 
+static void i830_uxa_done_solid(PixmapPtr pixmap)
+{
+       ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
        intel_debug_flush(scrn, RENDER_BATCH);
 }
 
@@ -336,17 +385,10 @@ static void i830_uxa_done_solid(PixmapPtr pixmap)
  *   - support planemask using FULL_BLT_CMD?
  */
 static Bool
-i830_uxa_check_copy(PixmapPtr source, PixmapPtr dest,
+generic_uxa_check_copy(PixmapPtr source, PixmapPtr dest,
                    int alu, Pixel planemask)
 {
        ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
-       intel_screen_private *intel = intel_get_screen_private(scrn);
-
-       if (IS_GEN6(intel)) {
-               intel_debug_fallback(scrn,
-                                    "Sandybridge BLT engine not supported\n");
-               return FALSE;
-       }
 
        if (!UXA_PM_IS_SOLID(&source->drawable, planemask)) {
                intel_debug_fallback(scrn, "planemask is not solid");
@@ -375,7 +417,7 @@ i830_uxa_check_copy(PixmapPtr source, PixmapPtr dest,
 }
 
 static Bool
-i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir,
+generic_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, int xdir,
                      int ydir, int alu, Pixel planemask)
 {
        ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
@@ -386,9 +428,18 @@ i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, 
int xdir,
                intel_get_pixmap_bo(dest),
        };
 
-       if (!intel_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table), 
-               RENDER_BATCH))
-               return FALSE;
+
+       if (IS_GEN6(intel)) {
+               if (!intel_get_aperture_space(scrn, bo_table, 
+                                       ARRAY_SIZE(bo_table), 
+                                       BLT_BATCH))
+                       return FALSE;
+       } else {
+               if (!intel_get_aperture_space(scrn, bo_table, 
+                                       ARRAY_SIZE(bo_table), 
+                                       RENDER_BATCH))
+                       return FALSE;
+       }
 
        intel->render_source = source;
 
@@ -408,6 +459,90 @@ i830_uxa_prepare_copy(PixmapPtr source, PixmapPtr dest, 
int xdir,
 }
 
 static void
+gen6_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1,
+             int dst_y1, int w, int h)
+{
+       ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
+       intel_screen_private *intel = intel_get_screen_private(scrn);
+       uint32_t cmd;
+       int dst_x2, dst_y2, src_x2, src_y2;
+       unsigned int dst_pitch, src_pitch;
+
+       dst_x2 = dst_x1 + w;
+       dst_y2 = dst_y1 + h;
+
+       /* XXX Fixup extents as a lamentable workaround for missing
+        * source clipping in the upper layers.
+        */
+       if (dst_x1 < 0)
+               src_x1 -= dst_x1, dst_x1 = 0;
+       if (dst_y1 < 0)
+               src_y1 -= dst_y1, dst_y1 = 0;
+       if (dst_x2 > dest->drawable.width)
+               dst_x2 = dest->drawable.width;
+       if (dst_y2 > dest->drawable.height)
+               dst_y2 = dest->drawable.height;
+
+       src_x2 = src_x1 + (dst_x2 - dst_x1);
+       src_y2 = src_y1 + (dst_y2 - dst_y1);
+
+       if (src_x1 < 0)
+               dst_x1 -= src_x1, src_x1 = 0;
+       if (src_y1 < 0)
+               dst_y1 -= src_y1, src_y1 = 0;
+       if (src_x2 > intel->render_source->drawable.width)
+               dst_x2 -= src_x2 - intel->render_source->drawable.width;
+       if (src_y2 > intel->render_source->drawable.height)
+               dst_y2 -= src_y2 - intel->render_source->drawable.height;
+
+       if (dst_x2 <= dst_x1 || dst_y2 <= dst_y1)
+               return;
+
+       dst_pitch = intel_pixmap_pitch(dest);
+       src_pitch = intel_pixmap_pitch(intel->render_source);
+       {
+               BEGIN_BATCH_BLT(8);
+
+               cmd = XY_SRC_COPY_BLT_CMD;
+
+               if (dest->drawable.bitsPerPixel == 32)
+                       cmd |=
+                           XY_SRC_COPY_BLT_WRITE_ALPHA |
+                           XY_SRC_COPY_BLT_WRITE_RGB;
+
+               if (INTEL_INFO(intel)->gen >= 40) {
+                       if (intel_pixmap_tiled(dest)) {
+                               assert((dst_pitch % 512) == 0);
+                               dst_pitch >>= 2;
+                               cmd |= XY_SRC_COPY_BLT_DST_TILED;
+                       }
+
+                       if (intel_pixmap_tiled(intel->render_source)) {
+                               assert((src_pitch % 512) == 0);
+                               src_pitch >>= 2;
+                               cmd |= XY_SRC_COPY_BLT_SRC_TILED;
+                       }
+               }
+
+               OUT_BATCH_BLT(cmd);
+
+               OUT_BATCH_BLT(intel->BR[13] | dst_pitch);
+               OUT_BATCH_BLT((dst_y1 << 16) | (dst_x1 & 0xffff));
+               OUT_BATCH_BLT((dst_y2 << 16) | (dst_x2 & 0xffff));
+               OUT_RELOC_PIXMAP_FENCED_BLT(dest,
+                                       I915_GEM_DOMAIN_RENDER,
+                                       I915_GEM_DOMAIN_RENDER,
+                                       0);
+               OUT_BATCH_BLT((src_y1 << 16) | (src_x1 & 0xffff));
+               OUT_BATCH_BLT(src_pitch);
+               OUT_RELOC_PIXMAP_FENCED_BLT(intel->render_source,
+                                       I915_GEM_DOMAIN_RENDER, 0,
+                                       0);
+               ADVANCE_BATCH_BLT();
+       }
+}
+
+static void
 i830_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1,
              int dst_y1, int w, int h)
 {
@@ -497,10 +632,16 @@ i830_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int 
dst_x1,
 static void i830_uxa_done_copy(PixmapPtr dest)
 {
        ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
+       intel_debug_flush(scrn, BLT_BATCH);
+}
 
-       intel_debug_flush(scrn, RENDER_BATCH);
+static void gen6_uxa_done_copy(PixmapPtr dest)
+{
+       ScrnInfoPtr scrn = xf86Screens[dest->drawable.pScreen->myNum];
+       intel_batch_submit(scrn, FALSE, BLT_BATCH);
 }
 
+
 /**
  * Do any cleanup from the Composite operation.
  *
@@ -1191,17 +1332,30 @@ Bool intel_uxa_init(ScreenPtr screen)
        intel->vertex_bo = NULL;
 
        /* Solid fill */
-       intel->uxa_driver->check_solid = i830_uxa_check_solid;
-       intel->uxa_driver->prepare_solid = i830_uxa_prepare_solid;
-       intel->uxa_driver->solid = i830_uxa_solid;
-       intel->uxa_driver->done_solid = i830_uxa_done_solid;
+       if (IS_GEN6(intel)) {
+               intel->uxa_driver->check_solid = generic_uxa_check_solid;
+               intel->uxa_driver->prepare_solid = generic_uxa_prepare_solid;
+               intel->uxa_driver->solid = gen6_uxa_solid;
+               intel->uxa_driver->done_solid = gen6_uxa_done_solid;
+       } else {
+               intel->uxa_driver->check_solid = generic_uxa_check_solid;
+               intel->uxa_driver->prepare_solid = generic_uxa_prepare_solid;
+               intel->uxa_driver->solid = i830_uxa_solid;
+               intel->uxa_driver->done_solid = i830_uxa_done_solid;
+       }
 
        /* Copy */
-       intel->uxa_driver->check_copy = i830_uxa_check_copy;
-       intel->uxa_driver->prepare_copy = i830_uxa_prepare_copy;
-       intel->uxa_driver->copy = i830_uxa_copy;
-       intel->uxa_driver->done_copy = i830_uxa_done_copy;
-
+       if (IS_GEN6(intel)) {
+               intel->uxa_driver->check_copy = generic_uxa_check_copy;
+               intel->uxa_driver->prepare_copy = generic_uxa_prepare_copy;
+               intel->uxa_driver->copy = gen6_uxa_copy;
+               intel->uxa_driver->done_copy = gen6_uxa_done_copy;
+       } else {
+               intel->uxa_driver->check_copy = generic_uxa_check_copy;
+               intel->uxa_driver->prepare_copy = generic_uxa_prepare_copy;
+               intel->uxa_driver->copy = i830_uxa_copy;
+               intel->uxa_driver->done_copy = i830_uxa_done_copy;
+       }
        /* Composite */
        if (IS_GEN2(intel)) {
                intel->uxa_driver->check_composite = i830_check_composite;
-- 
1.7.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to