On Thu, 2019-01-17 at 18:18 -0600, Jason Ekstrand wrote: > On Tue, Jan 15, 2019 at 7:55 AM Iago Toral Quiroga <ito...@igalia.com > > wrote: > > Reviewed-by: Topi Pohjolainen <topi.pohjolai...@intel.com> > > > > --- > > > > .../compiler/brw_fs_combine_constants.cpp | 60 > > +++++++++++++++---- > > > > 1 file changed, 49 insertions(+), 11 deletions(-) > > > > > > > > diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp > > b/src/intel/compiler/brw_fs_combine_constants.cpp > > > > index 7343f77bb45..54017e5668b 100644 > > > > --- a/src/intel/compiler/brw_fs_combine_constants.cpp > > > > +++ b/src/intel/compiler/brw_fs_combine_constants.cpp > > > > @@ -36,6 +36,7 @@ > > > > > > > > #include "brw_fs.h" > > > > #include "brw_cfg.h" > > > > +#include "util/half_float.h" > > > > > > > > using namespace brw; > > > > > > > > @@ -114,8 +115,9 @@ struct imm { > > > > */ > > > > exec_list *uses; > > > > > > > > - /** The immediate value. We currently only handle floats. */ > > > > + /** The immediate value. We currently only handle float and > > half-float. */ > > > > float val; > > > > + brw_reg_type type; > > I had a brief chat with Matt today and I think that this may be going > in the wrong direction. In particular, I'd like us to eventually > (maybe we can do it now?) generalize the combine_constants pass to > more data types; in particular, integers. I recently came across a > shader where the fact that we couldn't do combine_constants on > integers was causing significant register pressure problems and > spilling. (The test was doing a bunch of BFI2/BFE with constant > sources.) It could also be a huge win for 8-bit and 64-bit where we > can't put immediates in regular 2-src instructions. > What does this mean for the pass? I suspect we want a bit size > instead of a type and a simple char[8] for the data and just make it > a blob of bits. We may also want some sort of heuristic so we don't > burn constant table space for things that are only used once or maybe > even twice. > > Normally, I would say "do it as a fixup" but if we go the direction > of having a float and using _mesa_half_to_float and > _mesa_float_to_half, I suspect it'll be harder to go for the bag-of- > bits approach. > > Thoughts?
Fair enough, I see the value in having this support more than just F and HF. I'll think a bit about it and I'll send a different version that tries to cover these cases as well. > --Jason > > > > > /** > > > > * The GRF register and subregister number where we've decided > > to store the > > > > @@ -145,10 +147,10 @@ struct table { > > > > }; > > > > > > > > static struct imm * > > > > -find_imm(struct table *table, float val) > > > > +find_imm(struct table *table, float val, brw_reg_type type) > > > > { > > > > for (int i = 0; i < table->len; i++) { > > > > - if (table->imm[i].val == val) { > > > > + if (table->imm[i].val == val && table->imm[i].type == type) > > { > > > > return &table->imm[i]; > > > > } > > > > } > > > > @@ -190,6 +192,20 @@ compare(const void *_a, const void *_b) > > > > return a->first_use_ip - b->first_use_ip; > > > > } > > > > > > > > +static bool > > > > +needs_negate(float reg_val, float imm_val, brw_reg_type type) > > > > +{ > > > > + /* reg_val represents the immediate value in the register in > > its original > > > > + * bit-size, while imm_val is always a valid 32-bit float > > value. > > > > + */ > > > > + if (type == BRW_REGISTER_TYPE_HF) { > > > > + uint32_t reg_val_ud = *((uint32_t *) ®_val); > > > > + reg_val = _mesa_half_to_float(reg_val_ud & 0xffff); > > > > + } > > > > + > > > > + return signbit(imm_val) != signbit(reg_val); > > > > +} > > > > + > > > > bool > > > > fs_visitor::opt_combine_constants() > > > > { > > > > @@ -215,12 +231,20 @@ fs_visitor::opt_combine_constants() > > > > > > > > for (int i = 0; i < inst->sources; i++) { > > > > if (inst->src[i].file != IMM || > > > > - inst->src[i].type != BRW_REGISTER_TYPE_F) > > > > + (inst->src[i].type != BRW_REGISTER_TYPE_F && > > > > + inst->src[i].type != BRW_REGISTER_TYPE_HF)) > > > > continue; > > > > > > > > - float val = !inst->can_do_source_mods(devinfo) ? inst- > > >src[i].f : > > > > - fabs(inst->src[i].f); > > > > - struct imm *imm = find_imm(&table, val); > > > > + float val; > > > > + if (inst->src[i].type == BRW_REGISTER_TYPE_F) { > > > > + val = !inst->can_do_source_mods(devinfo) ? inst- > > >src[i].f : > > > > + fabs(inst->src[i].f); > > > > + } else { > > > > + val = !inst->can_do_source_mods(devinfo) ? > > > > + _mesa_half_to_float(inst->src[i].d & 0xffff) : > > > > + fabs(_mesa_half_to_float(inst->src[i].d & 0xffff)); > > > > + } > > > > + struct imm *imm = find_imm(&table, val, inst- > > >src[i].type); > > > > > > > > if (imm) { > > > > bblock_t *intersection = cfg_t::intersect(block, imm- > > >block); > > > > @@ -238,6 +262,7 @@ fs_visitor::opt_combine_constants() > > > > imm->uses = new(const_ctx) exec_list(); > > > > imm->uses->push_tail(link(const_ctx, &inst->src[i])); > > > > imm->val = val; > > > > + imm->type = inst->src[i].type; > > > > imm->uses_by_coissue = could_coissue(devinfo, inst); > > > > imm->must_promote = must_promote_imm(devinfo, inst); > > > > imm->first_use_ip = ip; > > > > @@ -278,12 +303,23 @@ fs_visitor::opt_combine_constants() > > > > imm->block->last_non_control_flow_inst()- > > >next); > > > > const fs_builder ibld = bld.at(imm->block, > > n).exec_all().group(1, 0); > > > > > > > > - ibld.MOV(reg, brw_imm_f(imm->val)); > > > > + reg = retype(reg, imm->type); > > > > + if (imm->type == BRW_REGISTER_TYPE_F) { > > > > + ibld.MOV(reg, brw_imm_f(imm->val)); > > > > + } else { > > > > + const uint16_t val_hf = _mesa_float_to_half(imm->val); > > > > + ibld.MOV(reg, retype(brw_imm_uw(val_hf), > > BRW_REGISTER_TYPE_HF)); > > > > + } > > > > imm->nr = reg.nr; > > > > imm->subreg_offset = reg.offset; > > > > > > > > + /* Keep offsets 32-bit aligned since we are mixing 32-bit > > and 16-bit > > > > + * constants into the same register > > > > + * > > > > + * TODO: try to pack pairs of HF constants into each 32-bit > > slot > > > > + */ > > > > reg.offset += sizeof(float); > > > > - if (reg.offset == 8 * sizeof(float)) { > > > > + if (reg.offset == REG_SIZE) { > > > > reg.nr = alloc.allocate(1); > > > > reg.offset = 0; > > > > } > > > > @@ -295,12 +331,14 @@ fs_visitor::opt_combine_constants() > > > > foreach_list_typed(reg_link, link, link, table.imm[i].uses) > > { > > > > fs_reg *reg = link->reg; > > > > assert((isnan(reg->f) && isnan(table.imm[i].val)) || > > > > - fabsf(reg->f) == fabs(table.imm[i].val)); > > > > + fabsf(reg->f) == fabs(table.imm[i].val) || > > > > + table.imm[i].type == BRW_REGISTER_TYPE_HF); > > > > > > > > reg->file = VGRF; > > > > + reg->type = table.imm[i].type; > > > > reg->offset = table.imm[i].subreg_offset; > > > > reg->stride = 0; > > > > - reg->negate = signbit(reg->f) != > > signbit(table.imm[i].val); > > > > + reg->negate = needs_negate(reg->f, table.imm[i].val, > > table.imm[i].type); > > > > reg->nr = table.imm[i].nr; > > > > } > > > > } > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev