On Fri, Apr 17, 2015 at 11:56 AM, Matt Turner <matts...@gmail.com> wrote: > On Fri, Apr 17, 2015 at 11:45 AM, Jason Ekstrand <ja...@jlekstrand.net> wrote: >> On Tue, Apr 14, 2015 at 4:15 PM, Matt Turner <matts...@gmail.com> wrote: >>> This lets SIMD16 programs on G45 and Gen5 use the PLN instruction. >> >> This patch also changes the meaning of FS_OPCODE_LINTERP. Those >> chainges should be described in the commit message as well. >> >>> On Ironlake: >>> >>> total instructions in shared programs: 5634757 -> 5518055 (-2.07%) >>> instructions in affected programs: 1745837 -> 1629135 (-6.68%) >>> helped: 11439 >>> HURT: 4 >>> --- >>> src/mesa/drivers/dri/i965/brw_fs.cpp | 46 +++++++------------- >>> src/mesa/drivers/dri/i965/brw_fs.h | 3 +- >>> src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 +-- >>> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 13 +++--- >>> src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 8 ++-- >>> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 51 >>> +++++++++++------------ >>> src/mesa/drivers/dri/i965/brw_reg.h | 7 ++++ >>> 7 files changed, 59 insertions(+), 74 deletions(-) >>> >>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp >>> b/src/mesa/drivers/dri/i965/brw_fs.cpp >>> index 5ab8701..6e55f67 100644 >>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp >>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp >>> @@ -1259,8 +1259,7 @@ fs_visitor::emit_fragcoord_interpolation(bool >>> pixel_center_integer, >>> emit(MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)))); >>> } else { >>> emit(FS_OPCODE_LINTERP, wpos, >>> - this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], >>> - this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], >>> + this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC], >>> interp_reg(VARYING_SLOT_POS, 2)); >>> } >>> wpos = offset(wpos, 1); >>> @@ -1302,8 +1301,7 @@ fs_visitor::emit_linterp(const fs_reg &attr, const >>> fs_reg &interp, >>> barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; >>> } >>> return emit(FS_OPCODE_LINTERP, attr, >>> - this->delta_x[barycoord_mode], >>> - this->delta_y[barycoord_mode], interp); >>> + this->delta_xy[barycoord_mode], interp); >>> } >>> >>> void >>> @@ -1851,8 +1849,8 @@ fs_visitor::assign_urb_setup() >>> */ >>> foreach_block_and_inst(block, fs_inst, inst, cfg) { >>> if (inst->opcode == FS_OPCODE_LINTERP) { >>> - assert(inst->src[2].file == HW_REG); >>> - inst->src[2].fixed_hw_reg.nr += urb_start; >>> + assert(inst->src[1].file == HW_REG); >>> + inst->src[1].fixed_hw_reg.nr += urb_start; >>> } >>> >>> if (inst->opcode == FS_OPCODE_CINTERP) { >>> @@ -2106,25 +2104,16 @@ fs_visitor::compact_virtual_grfs() >>> } >>> } >>> >>> - /* Patch all the references to delta_x/delta_y, since they're used in >>> - * register allocation. If they're unused, switch them to BAD_FILE so >>> - * we don't think some random VGRF is delta_x/delta_y. >>> + /* Patch all the references to delta_xy, since they're used in register >>> + * allocation. If they're unused, switch them to BAD_FILE so we don't >>> + * think some random VGRF is delta_xy. >>> */ >>> - for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) { >>> - if (delta_x[i].file == GRF) { >>> - if (remap_table[delta_x[i].reg] != -1) { >>> - delta_x[i].reg = remap_table[delta_x[i].reg]; >>> + for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) { >>> + if (delta_xy[i].file == GRF) { >>> + if (remap_table[delta_xy[i].reg] != -1) { >>> + delta_xy[i].reg = remap_table[delta_xy[i].reg]; >>> } else { >>> - delta_x[i].file = BAD_FILE; >>> - } >>> - } >>> - } >>> - for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) { >>> - if (delta_y[i].file == GRF) { >>> - if (remap_table[delta_y[i].reg] != -1) { >>> - delta_y[i].reg = remap_table[delta_y[i].reg]; >>> - } else { >>> - delta_y[i].file = BAD_FILE; >>> + delta_xy[i].file = BAD_FILE; >>> } >>> } >>> } >>> @@ -2589,14 +2578,9 @@ fs_visitor::opt_register_renaming() >>> if (progress) { >>> invalidate_live_intervals(); >>> >>> - for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) { >>> - if (delta_x[i].file == GRF && remap[delta_x[i].reg] != -1) { >>> - delta_x[i].reg = remap[delta_x[i].reg]; >>> - } >>> - } >>> - for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) { >>> - if (delta_y[i].file == GRF && remap[delta_y[i].reg] != -1) { >>> - delta_y[i].reg = remap[delta_y[i].reg]; >>> + for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) { >>> + if (delta_xy[i].file == GRF && remap[delta_xy[i].reg] != -1) { >>> + delta_xy[i].reg = remap[delta_xy[i].reg]; >>> } >>> } >>> } >>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h >>> b/src/mesa/drivers/dri/i965/brw_fs.h >>> index b7c1c39..e04691c 100644 >>> --- a/src/mesa/drivers/dri/i965/brw_fs.h >>> +++ b/src/mesa/drivers/dri/i965/brw_fs.h >>> @@ -512,8 +512,7 @@ public: >>> fs_reg pixel_y; >>> fs_reg wpos_w; >>> fs_reg pixel_w; >>> - fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; >>> - fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; >>> + fs_reg delta_xy[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT]; >>> fs_reg shader_start_time; >>> fs_reg userplane[MAX_CLIP_PLANES]; >>> >>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >>> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >>> index fc9597e..dba3286 100644 >>> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >>> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >>> @@ -392,11 +392,10 @@ fs_generator::generate_linterp(fs_inst *inst, >>> struct brw_reg dst, struct brw_reg *src) >>> { >>> struct brw_reg delta_x = src[0]; >>> - struct brw_reg delta_y = src[1]; >>> - struct brw_reg interp = src[2]; >>> + struct brw_reg delta_y = offset(src[0], dispatch_width / 8); >> >> This is an awkward use of offset(). It's supposed to take the width >> into account for you. Why do you need to do it explicitly? If >> there's something funny going on here, it deserves a comment. > > I don't see any evidence of it considering width:
Right... Sorry, I was thinking about the fs_reg offset function. > static inline struct brw_reg > offset(struct brw_reg reg, unsigned delta) > { > reg.nr += delta; > return reg; > } > > Presumably you're thinking of offset(fs_reg, unsigned). > > The delta_y variable is only used explicitly on original Gen4 where we > have to use line+mac. The delta values are in consecutive registers > (x0, x1), (y0, y1), and extending it to SIMD16 you add an extra two > registers, giving you (x0, x1), (x2, x3), (y0, y1), (y2, y3). So you > offset 1 register in SIMD8 and two registers in SIMD16. > > Does that warrant a comment? Nope, it's fine. _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev