On Thu, Oct 12, 2017 at 11:38 AM, Jose Maria Casanova Crespo < jmcasan...@igalia.com> wrote:
> Signed-off-by: Jose Maria Casanova Crespo <jmcasan...@igalia.com> > Signed-off-by: Alejandro PiƱeiro <apinhe...@igalia.com> > --- > src/intel/compiler/brw_eu.h | 6 ++ > src/intel/compiler/brw_eu_defines.h | 17 +++++ > src/intel/compiler/brw_eu_emit.c | 89 > ++++++++++++++++++++++++++ > src/intel/compiler/brw_fs.cpp | 10 +++ > src/intel/compiler/brw_fs_copy_propagation.cpp | 2 + > src/intel/compiler/brw_fs_generator.cpp | 5 ++ > src/intel/compiler/brw_fs_surface_builder.cpp | 17 +++++ > src/intel/compiler/brw_fs_surface_builder.h | 9 +++ > src/intel/compiler/brw_shader.cpp | 7 ++ > 9 files changed, 162 insertions(+) > > diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h > index 145942a54f..b44ca0f518 100644 > --- a/src/intel/compiler/brw_eu.h > +++ b/src/intel/compiler/brw_eu.h > @@ -476,6 +476,12 @@ brw_typed_surface_write(struct brw_codegen *p, > unsigned num_channels); > > void > +brw_byte_scattered_write(struct brw_codegen *p, > + struct brw_reg payload, > + struct brw_reg surface, > + unsigned msg_length); > + > +void > brw_memory_fence(struct brw_codegen *p, > struct brw_reg dst); > > diff --git a/src/intel/compiler/brw_eu_defines.h > b/src/intel/compiler/brw_eu_defines.h > index 1751f18293..9aac385ba7 100644 > --- a/src/intel/compiler/brw_eu_defines.h > +++ b/src/intel/compiler/brw_eu_defines.h > @@ -390,6 +390,16 @@ enum opcode { > > SHADER_OPCODE_RND_MODE, > > + /** > + * Byte scattered write/read opcodes. > + * > + * LOGICAL opcodes are eventually translated to the matching > non-LOGICAL > + * opcode, but instead of taking a single payload blog they expect > their > + * arguments separately as individual sources, like untyped write/read. > + */ > + SHADER_OPCODE_BYTE_SCATTERED_WRITE, > + SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL, > + > SHADER_OPCODE_MEMORY_FENCE, > > SHADER_OPCODE_GEN4_SCRATCH_READ, > @@ -1231,4 +1241,11 @@ enum PACKED brw_rnd_mode { > BRW_RND_MODE_UNSPECIFIED, /* Unspecified rounding mode */ > }; > > +/* MDC_DS - Data Size Message Descriptor Control Field */ > +enum PACKED brw_data_size { > + GEN7_BYTE_SCATTERED_DATA_SIZE_BYTE = 0, > + GEN7_BYTE_SCATTERED_DATA_SIZE_WORD = 1, > + GEN7_BYTE_SCATTERED_DATA_SIZE_DWORD = 2 > +}; > + > #endif /* BRW_EU_DEFINES_H */ > diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_ > emit.c > index 8c1e4c5eae..84d85be653 100644 > --- a/src/intel/compiler/brw_eu_emit.c > +++ b/src/intel/compiler/brw_eu_emit.c > @@ -2483,6 +2483,49 @@ brw_send_indirect_surface_message(struct > brw_codegen *p, > return insn; > } > > + > +static struct brw_inst * > +brw_send_indirect_scattered_message(struct brw_codegen *p, > + unsigned sfid, > + struct brw_reg dst, > + struct brw_reg payload, > + struct brw_reg surface, > + unsigned message_len, > + unsigned response_len, > + bool header_present) > How is this any different from brw_send_indirect_surface_message? They look identical except for the fact that this one is missing the explicit brw_set_default_exec_size I added to the other as part of my subgroup series. If there's no real difference, let's delete this one and just use the other. You can make a pretty good case that the scattered byte messages are "surface" messages. > +{ > + const struct gen_device_info *devinfo = p->devinfo; > + struct brw_inst *insn; > + > + if (surface.file != BRW_IMMEDIATE_VALUE) { > + struct brw_reg addr = retype(brw_address_reg(0), > BRW_REGISTER_TYPE_UD); > + > + brw_push_insn_state(p); > + brw_set_default_access_mode(p, BRW_ALIGN_1); > + brw_set_default_mask_control(p, BRW_MASK_DISABLE); > + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); > + > + /* Mask out invalid bits from the surface index to avoid hangs e.g. > when > + * some surface array is accessed out of bounds. > + */ > + insn = brw_AND(p, addr, > + suboffset(vec1(retype(surface, > BRW_REGISTER_TYPE_UD)), > + BRW_GET_SWZ(surface.swizzle, 0)), > + brw_imm_ud(0xff)); > + > + brw_pop_insn_state(p); > + > + surface = addr; > + } > + > + insn = brw_send_indirect_message(p, sfid, dst, payload, surface); > + brw_inst_set_mlen(devinfo, insn, message_len); > + brw_inst_set_rlen(devinfo, insn, response_len); > + brw_inst_set_header_present(devinfo, insn, header_present); > + > + return insn; > +} > + > static bool > while_jumps_before_offset(const struct gen_device_info *devinfo, > brw_inst *insn, int while_offset, int > start_offset) > @@ -2887,6 +2930,52 @@ brw_untyped_surface_write(struct brw_codegen *p, > } > > static void > +brw_set_dp_byte_scattered_write(struct brw_codegen *p, > + struct brw_inst *insn) > +{ > + const struct gen_device_info *devinfo = p->devinfo; > + > + /* Although we could configure this message to write BYTE, WORD, or > DWORD, > + * it was added for the need of writing WORD sizes, so we use directly > that > + * size. This could be revisited on the future. > + */ > + unsigned msg_control = GEN7_BYTE_SCATTERED_DATA_SIZE_WORD << 2; > This doesn't seem like a safe long-term assuption... > + > + assert(brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1); > + if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16) > + msg_control |= 1; > + else > + msg_control |= 0; > + > + brw_inst_set_dp_msg_type(devinfo, insn, > + devinfo->gen >= 8 || devinfo->is_haswell ? > + HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE : > + GEN7_DATAPORT_DC_BYTE_SCATTERED_WRITE); > + brw_inst_set_dp_msg_control(devinfo, insn, msg_control); > +} > + > + > +void > +brw_byte_scattered_write(struct brw_codegen *p, > + struct brw_reg payload, > + struct brw_reg surface, > + unsigned msg_length) > +{ > + const struct gen_device_info *devinfo = p->devinfo; > + const unsigned sfid = GEN7_SFID_DATAPORT_DATA_CACHE; > + const bool align1 = brw_inst_access_mode(devinfo, p->current) == > BRW_ALIGN_1; > + /* Mask out unused components -- See comment in brw_untyped_atomic(). > */ > + const unsigned mask = devinfo->gen == 7 && !devinfo->is_haswell && > !align1 ? > + WRITEMASK_X : WRITEMASK_XYZW; > We're only supporting 16-bit storage on gen8+ and not in vec4. I doubt the vec4 back-end is going to get a lot more love, so maybe we should just assert ALIGN1 here and drop the added complexity. That said, it's not actually hurting much. > + struct brw_inst *insn = brw_send_indirect_scattered_message( > + p, sfid, brw_writemask(brw_null_reg(), mask), > + payload, surface, msg_length, 0, align1); > + > + brw_set_dp_byte_scattered_write(p, insn); > Maybe we should just inline this here instead of having two very small functions? > + > +} > + > +static void > brw_set_dp_typed_atomic_message(struct brw_codegen *p, > struct brw_inst *insn, > unsigned atomic_op, > diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp > index 8da16145dc..e4a94ff053 100644 > --- a/src/intel/compiler/brw_fs.cpp > +++ b/src/intel/compiler/brw_fs.cpp > @@ -250,6 +250,7 @@ fs_inst::is_send_from_grf() const > case SHADER_OPCODE_UNTYPED_ATOMIC: > case SHADER_OPCODE_UNTYPED_SURFACE_READ: > case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: > + case SHADER_OPCODE_BYTE_SCATTERED_WRITE: > case SHADER_OPCODE_TYPED_ATOMIC: > case SHADER_OPCODE_TYPED_SURFACE_READ: > case SHADER_OPCODE_TYPED_SURFACE_WRITE: > @@ -744,6 +745,7 @@ fs_inst::components_read(unsigned i) const > > case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: > case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: > + case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: > assert(src[3].file == IMM && > src[4].file == IMM); > /* Surface coordinates. */ > @@ -797,6 +799,7 @@ fs_inst::size_read(int arg) const > case SHADER_OPCODE_TYPED_SURFACE_READ: > case SHADER_OPCODE_TYPED_SURFACE_WRITE: > case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: > + case SHADER_OPCODE_BYTE_SCATTERED_WRITE: > if (arg == 0) > return mlen * REG_SIZE; > break; > @@ -4524,6 +4527,12 @@ fs_visitor::lower_logical_sends() > ibld.sample_mask_reg()); > break; > > + case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: > + lower_surface_logical_send(ibld, inst, > + SHADER_OPCODE_BYTE_SCATTERED_WRITE, > + ibld.sample_mask_reg()); > + break; > + > case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: > lower_surface_logical_send(ibld, inst, > SHADER_OPCODE_UNTYPED_ATOMIC, > @@ -5008,6 +5017,7 @@ get_lowered_simd_width(const struct gen_device_info > *devinfo, > case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: > case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: > case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: > + case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: > return MIN2(16, inst->exec_size); > > case SHADER_OPCODE_URB_READ_SIMD8: > diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp > b/src/intel/compiler/brw_fs_copy_propagation.cpp > index cb11739608..fcf4706b7a 100644 > --- a/src/intel/compiler/brw_fs_copy_propagation.cpp > +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp > @@ -655,6 +655,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, > acp_entry *entry) > case SHADER_OPCODE_TYPED_ATOMIC: > case SHADER_OPCODE_TYPED_SURFACE_READ: > case SHADER_OPCODE_TYPED_SURFACE_WRITE: > + case SHADER_OPCODE_BYTE_SCATTERED_WRITE: > /* We only propagate into the surface argument of the > * instruction. Everything else goes through LOAD_PAYLOAD. > */ > @@ -694,6 +695,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, > acp_entry *entry) > case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: > case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: > case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: > + case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: > inst->src[i] = val; > progress = true; > break; > diff --git a/src/intel/compiler/brw_fs_generator.cpp > b/src/intel/compiler/brw_fs_generator.cpp > index 9f2e1b3b85..414da81287 100644 > --- a/src/intel/compiler/brw_fs_generator.cpp > +++ b/src/intel/compiler/brw_fs_generator.cpp > @@ -2053,6 +2053,11 @@ fs_generator::generate_code(const cfg_t *cfg, int > dispatch_width) > inst->mlen, src[2].ud); > break; > > + case SHADER_OPCODE_BYTE_SCATTERED_WRITE: > + assert(src[2].file == BRW_IMMEDIATE_VALUE); > + brw_byte_scattered_write(p, src[0], src[1], inst->mlen); > + break; > + > case SHADER_OPCODE_TYPED_ATOMIC: > assert(src[2].file == BRW_IMMEDIATE_VALUE); > brw_typed_atomic(p, dst, src[0], src[1], > diff --git a/src/intel/compiler/brw_fs_surface_builder.cpp > b/src/intel/compiler/brw_fs_surface_builder.cpp > index d00d8920b2..5f529e9489 100644 > --- a/src/intel/compiler/brw_fs_surface_builder.cpp > +++ b/src/intel/compiler/brw_fs_surface_builder.cpp > @@ -1192,3 +1192,20 @@ namespace brw { > } > } > } > + > +namespace brw { > Why are we closing the brw namespace and then opening it again. We should be able to drop the above 3 lines. > + namespace scattered_access { > + void > + emit_byte_scattered_write(const fs_builder &bld, const fs_reg > &surface, > + const fs_reg &addr, const fs_reg &src, > + unsigned dims, unsigned size, > + brw_predicate pred) > + { > + using namespace surface_access; > + > + emit_send(bld, SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL, > + addr, src, surface, dims, size, 0, pred); > + } > + > + } > +} > diff --git a/src/intel/compiler/brw_fs_surface_builder.h > b/src/intel/compiler/brw_fs_surface_builder.h > index 32b56d387f..913ffaee72 100644 > --- a/src/intel/compiler/brw_fs_surface_builder.h > +++ b/src/intel/compiler/brw_fs_surface_builder.h > @@ -84,5 +84,14 @@ namespace brw { > unsigned surf_dims, unsigned arr_dims, > unsigned rsize, unsigned op); > } > + > + namespace scattered_access { > + void > + emit_byte_scattered_write(const fs_builder &bld, const fs_reg > &surface, > + const fs_reg &addr, const fs_reg &src, > + unsigned dims, unsigned size, > + brw_predicate pred = BRW_PREDICATE_NONE); > + > + } > } > #endif > diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_ > shader.cpp > index 8ee4d3cef9..ebaf586df4 100644 > --- a/src/intel/compiler/brw_shader.cpp > +++ b/src/intel/compiler/brw_shader.cpp > @@ -297,6 +297,11 @@ brw_instruction_name(const struct gen_device_info > *devinfo, enum opcode op) > case SHADER_OPCODE_MEMORY_FENCE: > return "memory_fence"; > > + case SHADER_OPCODE_BYTE_SCATTERED_WRITE: > + return "byte_scattered_write"; > + case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: > + return "byte_scattered_write_logical"; > + > case SHADER_OPCODE_LOAD_PAYLOAD: > return "load_payload"; > case FS_OPCODE_PACK: > @@ -993,6 +998,8 @@ backend_instruction::has_side_effects() const > case SHADER_OPCODE_GEN4_SCRATCH_WRITE: > case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: > case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: > + case SHADER_OPCODE_BYTE_SCATTERED_WRITE: > + case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: > case SHADER_OPCODE_TYPED_ATOMIC: > case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: > case SHADER_OPCODE_TYPED_SURFACE_WRITE: > -- > 2.13.6 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev