fs: Fix fs_visitor::VARYING_PULL_CONSTANT_LOAD for doubles

Kenneth Graunke Tue, 10 May 2016 12:37:11 -0700

On Tuesday, May 10, 2016 12:07:26 PM PDT Kenneth Graunke wrote:
> On Tuesday, May 3, 2016 2:22:00 PM PDT Samuel Iglesias Gonsálvez wrote:
> > From: Iago Toral Quiroga <ito...@igalia.com>
> > 
> > ---
> >  src/mesa/drivers/dri/i965/brw_fs.cpp | 49 +++++++++++++++++++++++++++++
+
> ++++--
> >  1 file changed, 47 insertions(+), 2 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/
> i965/brw_fs.cpp
> > index bc81a80..0e69be8 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > @@ -193,8 +193,15 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const 
fs_builder 
> &bld,
> >     else
> >        op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD;
> >  
> > +   /* The pull load message will load a vec4 (16 bytes). If we are 
loading
> > +    * a double this means we are only loading 2 elements worth of data.
> > +    * We also want to use a 32-bit data type for the dst of the load 
> operation
> > +    * so other parts of the driver don't get confused about the size of 
the
> > +    * result.
> > +    */
> >     int regs_written = 4 * (bld.dispatch_width() / 8) * scale;
> > -   fs_reg vec4_result = fs_reg(VGRF, alloc.allocate(regs_written), 
> dst.type);
> > +   fs_reg vec4_result = fs_reg(VGRF, alloc.allocate(regs_written),
> > +                               BRW_REGISTER_TYPE_F);
> >     fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset);
> >     inst->regs_written = regs_written;
> >  
> > @@ -207,7 +214,45 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const 
fs_builder 
> &bld,
> >           inst->mlen = 1 + bld.dispatch_width() / 8;
> >     }
> >  
> > -   bld.MOV(dst, offset(vec4_result, bld, ((const_offset & 0xf) / 4) * 
> scale));
> > +   vec4_result.type = dst.type;
> > +
> > +   /* Our VARYING_PULL_CONSTANT_LOAD reads a vector of 32-bit elements. 
If 
> we
> > +    * are reading doubles this means that we get this:
> > +    *
> > +    *  r0: x0 x0 x0 x0 x0 x0 x0 x0
> > +    *  r1: x1 x1 x1 x1 x1 x1 x1 x1
> > +    *  r2: y0 y0 y0 y0 y0 y0 y0 y0
> > +    *  r3: y1 y1 y1 y1 y1 y1 y1 y1
> > +    *
> > +    * Fix this up so we return valid double elements:
> > +    *
> > +    *  r0: x0 x1 x0 x1 x0 x1 x0 x1
> > +    *  r1: x0 x1 x0 x1 x0 x1 x0 x1
> > +    *  r2: y0 y1 y0 y1 y0 y1 y0 y1
> > +    *  r3: y0 y1 y0 y1 y0 y1 y0 y1
> > +    */
> 
> I think this could be simplified a little...
> 
> > +   if (type_sz(dst.type) == 8) {
> > +      int multiplier = bld.dispatch_width() / 8;
> > +      fs_reg fixed_res =
> > +         fs_reg(VGRF, alloc.allocate(2 * multiplier), 
BRW_REGISTER_TYPE_F);
> > +      /* We only have 2 doubles in a 32-bit vec4 */
> > +      for (int i = 0; i < 2; i++) {
> > +         fs_reg vec4_float =
> > +            horiz_offset(retype(vec4_result, BRW_REGISTER_TYPE_F),
> > +                         multiplier * 16 * i);
> > +
> > +         bld.MOV(stride(fixed_res, 2), vec4_float);
> 
>             ^^^ copies x0 or y0 into fixed_res
> 
> > +         bld.MOV(stride(horiz_offset(fixed_res, 1), 2),
> > +                 horiz_offset(vec4_float, 8 * multiplier));
> > +
>             ^^^ copies x1 or y1 into fixed_res
> 
> > +         bld.MOV(horiz_offset(vec4_result, multiplier * 8 * i),
> > +                 retype(fixed_res, BRW_REGISTER_TYPE_DF));
> 
> This just copies fixed_res back over vec4_result?  I don't think we need
> to do this - vec4_result is just a temporary.  We really want the final
> result in dst.
> 
> > +      }
> > +   }
> > +
> > +   int type_slots = MAX2(type_sz(dst.type) / 4, 1);
> > +   bld.MOV(dst, offset(vec4_result, bld,
> > +                       ((const_offset & 0xf) / (4 * type_slots)) * 
scale));
> >  }
> 
> How about we simplify this to:
> 
>    if (type_sz(dst.type) == 8) {
>       int multiplier = bld.dispatch_width() / 8;
>       fs_reg fixed_res =
>          fs_reg(VGRF, alloc.allocate(2 * multiplier), BRW_REGISTER_TYPE_F);
>       /* We only have 2 doubles in a 32-bit vec4 */
>       for (int i = 0; i < 2; i++) {
>          fs_reg vec4_float =
>             horiz_offset(retype(vec4_result, BRW_REGISTER_TYPE_F),
>                          multiplier * 16 * i);
> 
>          bld.MOV(stride(fixed_res, 2), vec4_float);
>          bld.MOV(stride(horiz_offset(fixed_res, 1), 2),
>                  horiz_offset(vec4_float, 8 * multiplier));
> 
>          // Differences start here: no extra MOV, simplify the end a bit
>       }
> 
>       assert(scale == 1);
>       bld.MOV(dst, offset(fixed_res, bld, (const_offset & 0xf) / 8));
>    } else {
>       bld.MOV(dst, offset(vec4_result, bld, ((const_offset & 0xf) / 4) * 
> scale));
>    }
> 
> With that change,
> Reviewed-by: Kenneth Graunke <kenn...@whitecape.org>


Eh...now that I've read a bit further...I suppose the equivalent change
would be to make SHUFFLE_32BIT_LOAD_RESULT_TO_64BIT return a register
rather than copying to a dst parameter.  And the other callers seem OK
as is.

We can always clean things up later, I'd rather not disrupt the middle
of your patch series for something like this.

With no changes,
Reviewed-by: Kenneth Graunke <kenn...@whitecape.org>

signature.asc
Description: This is a digitally signed message part.

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 11/23] i965/fs: Fix fs_visitor::VARYING_PULL_CONSTANT_LOAD for doubles

Reply via email to