This saves SF URB size by having the WM unit do the transpose from the SF-friendly coefficient-major layout to the WM-friendly attribute-major layout. --- src/mesa/drivers/dri/i965/brw_context.c | 1 + src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_sf.c | 29 ++++++++++- src/mesa/drivers/dri/i965/brw_sf_emit.c | 87 ++++++++++++------------------ src/mesa/drivers/dri/i965/brw_wm_state.c | 2 + 5 files changed, 66 insertions(+), 54 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 9483ec6..8618781 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -175,6 +175,7 @@ GLboolean brwCreateContext( int api, brw->has_compr4 = GL_TRUE; brw->has_aa_line_parameters = GL_TRUE; brw->has_pln = GL_TRUE; + brw->has_transposed_read = GL_TRUE; } else { brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_965; brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 8dfd152..a58ca9f 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -452,6 +452,7 @@ struct brw_context GLuint primitive; GLboolean emit_state_always; + GLboolean has_transposed_read; GLboolean has_surface_tile_offset; GLboolean has_compr4; GLboolean has_negative_rhw_bug; diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 6da155b..b6c1923 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -65,7 +65,34 @@ static void compile_sf_prog( struct brw_context *brw, c.nr_setup_regs = (c.nr_setup_attrs+1)/2; c.prog_data.urb_read_length = c.nr_attr_regs; - c.prog_data.urb_entry_size = c.nr_setup_regs * 2; + /* Number of 512-bit URB rows produced. */ + if (brw->has_transposed_read) { + /* Transposed reads: The 3 coefficients we produce are packed + * in the URB entry: + * + * row0: a0.x_x a0.y_x a0.z_x a0.w_x a1.x_x a1.y_x a1.z_x a1.w_x + * a0.x_y a0.y_y a0.z_y a0.w_y a1.x_y a1.y_y a1.z_y a1.w_y + * row1: a0.x_c a0.y_c a0.z_c a0.w_c a1.x_c a1.y_c a1.z_c a1.w_c + * a2.x_x a2.y_x a2.z_x a2.w_x a3.x_x a3.y_x a3.z_x a3.w_x + * row2: a2.x_y a2.y_y a2.z_y a2.w_y a3.x_y a3.y_y a3.z_y a3.w_y + * a2.x_c a2.y_c a2.z_c a2.w_c a3.x_c a3.y_c a3.z_c a3.w_c + * + * The WM gets programmed as if it was reading from the else + * block below. + */ + c.prog_data.urb_entry_size = ((c.nr_setup_attrs + 3) / 4) * 3; + } else { + /* Transposed writes into URB. The rows look like: + * + * row0: a0.x_x a0.x_y null a0.x_c a0.y_x a0.y_y null a0.y_c + * a0.z_x a0.z_y null a0.z_c a0.w_x a0.w_y null a0.w_c + * + * So we use a whole row per attribute (and since we write + * two-attribute groups to the URB, align the size in case the + * disabled last attribute actually gets written). + */ + c.prog_data.urb_entry_size = ALIGN(c.nr_setup_attrs, 2); + } /* Construct map from attribute number to position in the vertex. */ diff --git a/src/mesa/drivers/dri/i965/brw_sf_emit.c b/src/mesa/drivers/dri/i965/brw_sf_emit.c index d3c9756..b35e509 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_emit.c +++ b/src/mesa/drivers/dri/i965/brw_sf_emit.c @@ -42,6 +42,34 @@ #include "brw_util.h" #include "brw_sf.h" +static void +do_urb_write(struct brw_sf_compile *c, int attr_pair, bool last) +{ + struct brw_compile *p = &c->func; + struct brw_context *brw = p->brw; + uint32_t offset, swizzle; + + if (brw->has_transposed_read) { + offset = attr_pair * 3; + swizzle = BRW_URB_SWIZZLE_NONE; + } else { + offset = attr_pair * 4; + swizzle = BRW_URB_SWIZZLE_TRANSPOSE; + } + + brw_urb_WRITE(p, + brw_null_reg(), + 0, + brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ + 0, /* allocate */ + 1, /* used */ + 4, /* msg len */ + 0, /* response len */ + last, /* eot */ + last, /* writes complete */ + offset, + swizzle); +} static struct brw_reg get_vert_attr(struct brw_sf_compile *c, struct brw_reg vert, @@ -445,6 +473,7 @@ void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate) } { + brw_set_predicate_control_flag_value(p, pc); /* start point for interpolation */ @@ -453,18 +482,7 @@ void brw_emit_tri_setup( struct brw_sf_compile *c, GLboolean allocate) /* Copy m0..m3 to URB. m0 is implicitly copied from r0 in * the send instruction: */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), /* r0, will be copied to m0 */ - 0, /* allocate */ - 1, /* used */ - 4, /* msg len */ - 0, /* response len */ - last, /* eot */ - last, /* writes complete */ - i*4, /* offset */ - BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */ + do_urb_write(c, i, last); } } } @@ -525,20 +543,7 @@ void brw_emit_line_setup( struct brw_sf_compile *c, GLboolean allocate) */ brw_MOV(p, c->m3C0, a0); - /* Copy m0..m3 to URB. - */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - 0, /* allocate */ - 1, /* used */ - 4, /* msg len */ - 0, /* response len */ - last, /* eot */ - last, /* writes complete */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); + do_urb_write(c, i, last); } } } @@ -617,19 +622,8 @@ void brw_emit_point_sprite_setup( struct brw_sf_compile *c, GLboolean allocate) brw_set_predicate_control_flag_value(p, pc); - /* Copy m0..m3 to URB. */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - 0, /* allocate */ - 1, /* used */ - 4, /* msg len */ - 0, /* response len */ - last, /* eot */ - last, /* writes complete */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); + + do_urb_write(c, i, last); } } @@ -676,20 +670,7 @@ void brw_emit_point_setup( struct brw_sf_compile *c, GLboolean allocate) brw_MOV(p, c->m3C0, a0); /* constant value */ - /* Copy m0..m3 to URB. - */ - brw_urb_WRITE(p, - brw_null_reg(), - 0, - brw_vec8_grf(0, 0), - 0, /* allocate */ - 1, /* used */ - 4, /* msg len */ - 0, /* response len */ - last, /* eot */ - last, /* writes complete */ - i*4, /* urb destination offset */ - BRW_URB_SWIZZLE_TRANSPOSE); + do_urb_write(c, i, last); } } } diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index e9ef635..dc81fae 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -230,6 +230,8 @@ wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, wm.wm5.line_endcap_aa_region_width = 1; wm.wm5.polygon_stipple = key->polygon_stipple; + if (brw->has_transposed_read) + wm.wm5.transposed_urb_read_enable = 1; if (key->offset_enable) { wm.wm5.depth_offset = 1; -- 1.7.2.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev