This saves SF URB size by having the WM unit do the transpose from the
SF-friendly coefficient-major layout to the WM-friendly
attribute-major layout.
---
 src/mesa/drivers/dri/i965/brw_context.c  |    1 +
 src/mesa/drivers/dri/i965/brw_context.h  |    1 +
 src/mesa/drivers/dri/i965/brw_sf.c       |   29 ++++++++++-
 src/mesa/drivers/dri/i965/brw_sf_emit.c  |   87 ++++++++++++------------------
 src/mesa/drivers/dri/i965/brw_wm_state.c |    2 +
 5 files changed, 66 insertions(+), 54 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 9483ec6..8618781 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -175,6 +175,7 @@ GLboolean brwCreateContext( int api,
          brw->has_compr4 = GL_TRUE;
       brw->has_aa_line_parameters = GL_TRUE;
       brw->has_pln = GL_TRUE;
+      brw->has_transposed_read = GL_TRUE;
   } else {
       brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965;
       brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 8dfd152..a58ca9f 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -452,6 +452,7 @@ struct brw_context
    GLuint primitive;
 
    GLboolean emit_state_always;
+   GLboolean has_transposed_read;
    GLboolean has_surface_tile_offset;
    GLboolean has_compr4;
    GLboolean has_negative_rhw_bug;
diff --git a/src/mesa/drivers/dri/i965/brw_sf.c 
b/src/mesa/drivers/dri/i965/brw_sf.c
index 6da155b..b6c1923 100644
--- a/src/mesa/drivers/dri/i965/brw_sf.c
+++ b/src/mesa/drivers/dri/i965/brw_sf.c
@@ -65,7 +65,34 @@ static void compile_sf_prog( struct brw_context *brw,
    c.nr_setup_regs = (c.nr_setup_attrs+1)/2;
 
    c.prog_data.urb_read_length = c.nr_attr_regs;
-   c.prog_data.urb_entry_size = c.nr_setup_regs * 2;
+   /* Number of 512-bit URB rows produced. */
+   if (brw->has_transposed_read) {
+      /* Transposed reads: The 3 coefficients we produce are packed
+       * in the URB entry:
+       *
+       * row0: a0.x_x a0.y_x a0.z_x a0.w_x a1.x_x a1.y_x a1.z_x a1.w_x
+       *       a0.x_y a0.y_y a0.z_y a0.w_y a1.x_y a1.y_y a1.z_y a1.w_y
+       * row1: a0.x_c a0.y_c a0.z_c a0.w_c a1.x_c a1.y_c a1.z_c a1.w_c
+       *       a2.x_x a2.y_x a2.z_x a2.w_x a3.x_x a3.y_x a3.z_x a3.w_x
+       * row2: a2.x_y a2.y_y a2.z_y a2.w_y a3.x_y a3.y_y a3.z_y a3.w_y
+       *       a2.x_c a2.y_c a2.z_c a2.w_c a3.x_c a3.y_c a3.z_c a3.w_c
+       *
+       * The WM gets programmed as if it was reading from the else
+       * block below.
+       */
+      c.prog_data.urb_entry_size = ((c.nr_setup_attrs + 3) / 4) * 3;
+   } else {
+      /* Transposed writes into URB.  The rows look like:
+       *
+       * row0: a0.x_x a0.x_y null a0.x_c a0.y_x a0.y_y null a0.y_c
+       *       a0.z_x a0.z_y null a0.z_c a0.w_x a0.w_y null a0.w_c
+       *
+       * So we use a whole row per attribute (and since we write
+       * two-attribute groups to the URB, align the size in case the
+       * disabled last attribute actually gets written).
+       */
+      c.prog_data.urb_entry_size = ALIGN(c.nr_setup_attrs, 2);
+   }
 
    /* Construct map from attribute number to position in the vertex.
     */
diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c 
b/src/mesa/drivers/dri/i965/brw_sf_emit.c
index d3c9756..b35e509 100644
--- a/src/mesa/drivers/dri/i965/brw_sf_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c
@@ -42,6 +42,34 @@
 #include "brw_util.h"
 #include "brw_sf.h"
 
+static void
+do_urb_write(struct brw_sf_compile *c, int attr_pair, bool last)
+{
+   struct brw_compile *p = &c->func;
+   struct brw_context *brw = p->brw;
+   uint32_t offset, swizzle;
+
+   if (brw->has_transposed_read) {
+      offset = attr_pair * 3;
+      swizzle = BRW_URB_SWIZZLE_NONE;
+   } else {
+      offset = attr_pair * 4;
+      swizzle = BRW_URB_SWIZZLE_TRANSPOSE;
+   }
+
+   brw_urb_WRITE(p,
+                brw_null_reg(),
+                0,
+                brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
+                0,     /* allocate */
+                1,     /* used */
+                4,     /* msg len */
+                0,     /* response len */
+                last,  /* eot */
+                last,  /* writes complete */
+                offset,
+                swizzle);
+}
 
 static struct brw_reg get_vert_attr(struct brw_sf_compile *c,
                                    struct brw_reg vert,
@@ -445,6 +473,7 @@ void brw_emit_tri_setup( struct brw_sf_compile *c, 
GLboolean allocate)
       }
 
       {
+
         brw_set_predicate_control_flag_value(p, pc); 
         /* start point for interpolation
          */
@@ -453,18 +482,7 @@ void brw_emit_tri_setup( struct brw_sf_compile *c, 
GLboolean allocate)
         /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
          * the send instruction:
          */     
-        brw_urb_WRITE(p, 
-                      brw_null_reg(),
-                      0,
-                      brw_vec8_grf(0, 0), /* r0, will be copied to m0 */
-                      0,       /* allocate */
-                      1,       /* used */
-                      4,       /* msg len */
-                      0,       /* response len */
-                      last,    /* eot */
-                      last,    /* writes complete */
-                      i*4,     /* offset */
-                      BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF 
to windower" */
+        do_urb_write(c, i, last);
       }
    }
 }
@@ -525,20 +543,7 @@ void brw_emit_line_setup( struct brw_sf_compile *c, 
GLboolean allocate)
          */
         brw_MOV(p, c->m3C0, a0);
 
-        /* Copy m0..m3 to URB. 
-         */
-        brw_urb_WRITE(p, 
-                      brw_null_reg(),
-                      0,
-                      brw_vec8_grf(0, 0),
-                      0,       /* allocate */
-                      1,       /* used */
-                      4,       /* msg len */
-                      0,       /* response len */
-                      last,    /* eot */
-                      last,    /* writes complete */
-                      i*4,     /* urb destination offset */
-                      BRW_URB_SWIZZLE_TRANSPOSE); 
+        do_urb_write(c, i, last);
       }
    } 
 }
@@ -617,19 +622,8 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile 
*c, GLboolean allocate)
 
 
       brw_set_predicate_control_flag_value(p, pc);
-      /* Copy m0..m3 to URB. */
-      brw_urb_WRITE(p,
-                   brw_null_reg(),
-                   0,
-                   brw_vec8_grf(0, 0),
-                   0,  /* allocate */
-                   1,  /* used */
-                   4,  /* msg len */
-                   0,  /* response len */
-                   last,       /* eot */
-                   last,       /* writes complete */
-                   i*4,        /* urb destination offset */
-                   BRW_URB_SWIZZLE_TRANSPOSE);
+
+      do_urb_write(c, i, last);
    }
 }
 
@@ -676,20 +670,7 @@ void brw_emit_point_setup( struct brw_sf_compile *c, 
GLboolean allocate)
 
         brw_MOV(p, c->m3C0, a0); /* constant value */
 
-        /* Copy m0..m3 to URB. 
-         */
-        brw_urb_WRITE(p, 
-                      brw_null_reg(),
-                      0,
-                      brw_vec8_grf(0, 0),
-                      0,       /* allocate */
-                      1,       /* used */
-                      4,       /* msg len */
-                      0,       /* response len */
-                      last,    /* eot */
-                      last,    /* writes complete */
-                      i*4,     /* urb destination offset */
-                      BRW_URB_SWIZZLE_TRANSPOSE);
+        do_urb_write(c, i, last);
       }
    }
 }
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_state.c
index e9ef635..dc81fae 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -230,6 +230,8 @@ wm_unit_create_from_key(struct brw_context *brw, struct 
brw_wm_unit_key *key,
    wm.wm5.line_endcap_aa_region_width = 1;
 
    wm.wm5.polygon_stipple = key->polygon_stipple;
+   if (brw->has_transposed_read)
+      wm.wm5.transposed_urb_read_enable = 1;
 
    if (key->offset_enable) {
       wm.wm5.depth_offset = 1;
-- 
1.7.2.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to