int64: Implement lowering of shift operations

Ian Romanick Mon, 15 Oct 2018 11:46:47 -0700

On 10/14/2018 07:16 PM, Jason Ekstrand wrote:
> On Sun, Oct 14, 2018 at 5:12 PM Matt Turner <matts...@gmail.com
> <mailto:matts...@gmail.com>> wrote:
> 
>     ---
>      src/compiler/nir/nir.h             |   1 +
>      src/compiler/nir/nir_lower_int64.c | 142
>     +++++++++++++++++++++++++++++++++++++
>      2 files changed, 143 insertions(+)
> 
>     diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
>     index 12cbd030e21..2c477126acc 100644
>     --- a/src/compiler/nir/nir.h
>     +++ b/src/compiler/nir/nir.h
>     @@ -3001,6 +3001,7 @@ typedef enum {
>         nir_lower_ineg64    = (1 << 7),
>         nir_lower_logic64   = (1 << 8),
>         nir_lower_minmax64  = (1 << 9),
>     +   nir_lower_shift64   = (1 << 10),
>      } nir_lower_int64_options;
> 
>      bool nir_lower_int64(nir_shader *shader, nir_lower_int64_options
>     options);
>     diff --git a/src/compiler/nir/nir_lower_int64.c
>     b/src/compiler/nir/nir_lower_int64.c
>     index 9cdc8a9d592..25882d3a858 100644
>     --- a/src/compiler/nir/nir_lower_int64.c
>     +++ b/src/compiler/nir/nir_lower_int64.c
>     @@ -90,6 +90,138 @@ lower_ixor64(nir_builder *b, nir_ssa_def *x,
>     nir_ssa_def *y)
>                                          nir_ixor(b, x_hi, y_hi));
>      }
> 
>     +static nir_ssa_def *
>     +lower_ishl64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
>     +{
>     +   /* Implemented as
>     +    *
>     +    * uint64_t lshift(uint64_t x, int c)
>     +    * {
>     +    *    if (c == 0) return x;
>     +    *
>     +    *    uint32_t lo = LO(x), hi = HI(x);
>     +    *
>     +    *    if (c < 32) {
>     +    *       uint32_t lo_shifted = lo << (c & 0x1f);
>     +    *       uint32_t hi_shifted = hi << (c & 0x1f);
>     +    *       uint32_t lo_shifted_hi = lo >> (abs(32 - c) & 0x1f);
> 
> 
> Why the abs and the &?  it's already predicated on c < 32 and negative
> or OOB shifts already have undefined results.


I think the & is unnecessary, and I tend towards removing them.  The
abs() is there so that it's the same expression as the else case.  This
is useful because the NIR code he generates uses a bcsel instead.

Since the NIR code uses bcsel, I feel like the C pseudo-code should use ?:.

>     +    *       return pack_64(lo_shifted, hi_shifted | lo_shifted_hi);
>     +    *    } else {
>     +    *       uint32_t lo_shifted_hi = lo << (abs(32 - c) & 0x1f);
>     +    *       return pack_64(0, lo_shifted_hi);
>     +    *    }
>     +    * }
>     +    */
>     +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
>     +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
>     +
>     +   nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y,
>     nir_imm_int(b, -32)));
> 
> 
> This is iabs(c - 32) (which yields the same result but isn't the same
> expression) and doesn't have the & 0x1f.
>  
> 
>     +   nir_ssa_def *lo_shifted = nir_ishl(b, x_lo, y);
>     +   nir_ssa_def *hi_shifted = nir_ishl(b, x_hi, y);
> 
> 
> In general, all of the 0x1f are missing.  While not having them works on
> i965, there's no guarantee it works in general.  Maybe we should add
> them in and have an i965-specific optimization to delete them again? 
> Maybe it's ok to just not have them.  In any case, the code down here
> should match the code above or there should be a very good comment
> saying why it doesn't.

As long as shifting with a >32 value doesn't make the GPU crash, it
should be fine.  The values produced from those shifts aren't used in
the final result.  Right?  Explaining that in the comment is a good idea.

>     +   nir_ssa_def *lo_shifted_hi = nir_ushr(b, x_lo, reverse_count);
>     +
>     +   nir_ssa_def *res_if_lt_32 =
>     +      nir_pack_64_2x32_split(b, lo_shifted,
>     +                                nir_ior(b, hi_shifted, lo_shifted_hi));
>     +   nir_ssa_def *res_if_ge_32 =
>     +      nir_pack_64_2x32_split(b, nir_imm_int(b, 0),
>     +                                nir_ishl(b, x_lo, reverse_count));
>     +
>     +   return nir_bcsel(b,
>     +                    nir_ieq(b, y, nir_imm_int(b, 0)), x,
>     +                    nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
>     +                                 res_if_ge_32, res_if_lt_32));
>     +}
>     +
>     +static nir_ssa_def *
>     +lower_ishr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
>     +{
>     +   /* Implemented as
>     +    *
>     +    * uint64_t arshift(uint64_t x, int c)
>     +    * {
>     +    *    if (c == 0) return x;
>     +    *
>     +    *    uint32_t lo = LO(x);
>     +    *    int32_t  hi = HI(x);
>     +    *
>     +    *    if (c < 32) {
>     +    *       uint32_t lo_shifted = lo >> (c & 0x1f);
>     +    *       uint32_t hi_shifted = hi >> (c & 0x1f);
>     +    *       uint32_t hi_shifted_lo = hi << (abs(32 - c) & 0x1f);
>     +    *       return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
>     +    *    } else {
>     +    *       uint32_t hi_shifted = hi >> 31;
>     +    *       uint32_t hi_shifted_lo = hi >> (abs(32 - c) & 0x1f);
>     +    *       return pack_64(hi_shifted, hi_shifted_lo);
>     +    *    }
>     +    * }
>     +    */
>     +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
>     +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
>     +
>     +   nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y,
>     nir_imm_int(b, -32)));
>     +   nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);
>     +   nir_ssa_def *hi_shifted = nir_ishr(b, x_hi, y);
>     +   nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);
>     +
>     +   nir_ssa_def *res_if_lt_32 =
>     +      nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),
>     +                                hi_shifted);
>     +   nir_ssa_def *res_if_ge_32 =
>     +      nir_pack_64_2x32_split(b, nir_ishr(b, x_hi, reverse_count),
>     +                                nir_ishr(b, x_hi, nir_imm_int(b, 31)));
>     +
>     +   return nir_bcsel(b,
>     +                    nir_ieq(b, y, nir_imm_int(b, 0)), x,
>     +                    nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
>     +                                 res_if_ge_32, res_if_lt_32));
>     +}
>     +
>     +static nir_ssa_def *
>     +lower_ushr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
>     +{
>     +   /* Implemented as
>     +    *
>     +    * uint64_t rshift(uint64_t x, int c)
>     +    * {
>     +    *    if (c == 0) return x;
>     +    *
>     +    *    uint32_t lo = LO(x), hi = HI(x);
>     +    *
>     +    *    if (c < 32) {
>     +    *       uint32_t lo_shifted = lo >> (c & 0x1f);
>     +    *       uint32_t hi_shifted = hi >> (c & 0x1f);
>     +    *       uint32_t hi_shifted_lo = hi << (abs(32 - c) & 0x1f);
>     +    *       return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
>     +    *    } else {
>     +    *       uint32_t hi_shifted_lo = hi >> (abs(32 - c) & 0x1f);
>     +    *       return pack_64(0, hi_shifted_lo);
>     +    *    }
>     +    * }
>     +    */
>     +
>     +   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
>     +   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
>     +
>     +   nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y,
>     nir_imm_int(b, -32)));
>     +   nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);
>     +   nir_ssa_def *hi_shifted = nir_ushr(b, x_hi, y);
>     +   nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);
>     +
>     +   nir_ssa_def *res_if_lt_32 =
>     +      nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),
>     +                                hi_shifted);
>     +   nir_ssa_def *res_if_ge_32 =
>     +      nir_pack_64_2x32_split(b, nir_ushr(b, x_hi, reverse_count),
>     +                                nir_imm_int(b, 0));
>     +
>     +   return nir_bcsel(b,
>     +                    nir_ieq(b, y, nir_imm_int(b, 0)), x,
>     +                    nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
>     +                                 res_if_ge_32, res_if_lt_32));
>     +}
>     +
>      static nir_ssa_def *
>      lower_iadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
>      {
>     @@ -430,6 +562,10 @@ opcode_to_options_mask(nir_op opcode)
>         case nir_op_ixor:
>         case nir_op_inot:
>            return nir_lower_logic64;
>     +   case nir_op_ishl:
>     +   case nir_op_ishr:
>     +   case nir_op_ushr:
>     +      return nir_lower_shift64;
>         default:
>            return 0;
>         }
>     @@ -492,6 +628,12 @@ lower_int64_alu_instr(nir_builder *b,
>     nir_alu_instr *alu)
>            return lower_ixor64(b, src[0], src[1]);
>         case nir_op_inot:
>            return lower_inot64(b, src[0]);
>     +   case nir_op_ishl:
>     +      return lower_ishl64(b, src[0], src[1]);
>     +   case nir_op_ishr:
>     +      return lower_ishr64(b, src[0], src[1]);
>     +   case nir_op_ushr:
>     +      return lower_ushr64(b, src[0], src[1]);
>         default:
>            unreachable("Invalid ALU opcode to lower");
>         }
>     -- 
>     2.16.4
> 
>     _______________________________________________
>     mesa-dev mailing list
>     mesa-dev@lists.freedesktop.org <mailto:mesa-dev@lists.freedesktop.org>
>     https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 
> 
> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 4/7] nir/int64: Implement lowering of shift operations

Reply via email to