This uses either the normal scratch read/write messages that we use for spilling or the DWORD scattered read/write messages for when we have indirects. --- src/mesa/drivers/dri/i965/brw_defines.h | 2 + src/mesa/drivers/dri/i965/brw_fs.cpp | 111 ++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 30 ++++++ src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 4 +- src/mesa/drivers/dri/i965/brw_shader.cpp | 5 + 5 files changed, 150 insertions(+), 2 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 1fd0b94..0a96285 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1065,6 +1065,8 @@ enum opcode { SHADER_OPCODE_GEN7_SCRATCH_READ, SHADER_OPCODE_DWORD_SCATTERED_READ, SHADER_OPCODE_DWORD_SCATTERED_WRITE, + SHADER_OPCODE_SCRATCH_READ_LOGICAL, + SHADER_OPCODE_SCRATCH_WRITE_LOGICAL, /** * Gen8+ SIMD8 URB Read messages. diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 8d47638..594f1f4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3571,6 +3571,112 @@ fs_visitor::lower_minmax() } static void +lower_scratch_logical_send(const fs_builder &bld, fs_inst *inst) +{ + const gen_device_info *devinfo = bld.shader->devinfo; + const fs_reg &offset = inst->src[0]; + + /* The offsets provided by NIR are in bytes but they do not take channels + * into account. We need to multiply by dispatch width. + */ + const unsigned dispatch_width = + static_cast<const fs_visitor *>(bld.shader)->dispatch_width; + + if (offset.file == IMM) { + /* The immediate versions are easy. Let's lower those first. */ + if (inst->opcode == SHADER_OPCODE_SCRATCH_READ_LOGICAL) { + /* The Gen7 descriptor-based offset is 12 bits of HWORD units. + * Because the Gen7-style scratch block read is hardwired to BTI 255, + * on Gen9+ it would cause the DC to do an IA-coherent read, what + * largely outweighs the slight advantage from not having to provide + * the address as part of the message header, so we're better off + * using plain old oword block reads. + */ + const unsigned byte_offset = offset.d * dispatch_width; + if (devinfo->gen >= 7 && devinfo->gen < 9 && + byte_offset < (1 << 12) * REG_SIZE) { + inst->opcode = SHADER_OPCODE_GEN7_SCRATCH_READ; + } else { + inst->opcode = SHADER_OPCODE_GEN4_SCRATCH_READ; + inst->base_mrf = 13; + inst->mlen = 1; /* header contains offset */ + } + inst->offset = byte_offset; + inst->sources = 0; + return; + } else { + assert(inst->opcode == SHADER_OPCODE_SCRATCH_WRITE_LOGICAL); + const unsigned byte_offset = offset.d * dispatch_width; + inst->opcode = SHADER_OPCODE_GEN4_SCRATCH_WRITE; + inst->mlen = 1 + (dispatch_width / 8); /* header, value */ + inst->base_mrf = 13; + inst->offset = byte_offset; + /* Move the "value" source to the right spot */ + inst->src[0] = inst->src[1]; + inst->sources = 1; + return; + } + } + + if (inst->opcode == SHADER_OPCODE_SCRATCH_READ_LOGICAL) { + inst->opcode = SHADER_OPCODE_DWORD_SCATTERED_READ; + } else { + assert(inst->opcode == SHADER_OPCODE_SCRATCH_WRITE_LOGICAL); + inst->opcode = SHADER_OPCODE_DWORD_SCATTERED_WRITE; + } + + fs_reg sources[3]; + + const unsigned base_offset = 0; + const fs_builder hbld = bld.exec_all().group(8, 0); + sources[0] = hbld.vgrf(BRW_REGISTER_TYPE_UD); + hbld.MOV(sources[0], retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + hbld.group(1, 0).MOV(component(sources[0], 2), brw_imm_ud(base_offset)); + + fs_reg channel_ids = bld.vgrf(BRW_REGISTER_TYPE_W); + bld.emit(SHADER_OPCODE_CHANNEL_IDS, channel_ids); + + sources[1] = bld.vgrf(BRW_REGISTER_TYPE_D); + if (devinfo->gen < 6) { + /* On Gen < 6, the offsets are in bytes */ + fs_reg bytes = bld.vgrf(BRW_REGISTER_TYPE_D); + bld.MUL(bytes, offset, brw_imm_d(dispatch_width)); + fs_reg stagger = bld.vgrf(BRW_REGISTER_TYPE_D); + bld.MUL(stagger, channel_ids, brw_imm_d(4)); + bld.ADD(sources[1], bytes, stagger); + } else { + /* On Gen >= 6, the offsets are in dwords */ + fs_reg dwords = bld.vgrf(BRW_REGISTER_TYPE_D); + bld.MUL(dwords, offset, brw_imm_d(dispatch_width / 4)); + bld.ADD(sources[1], dwords, channel_ids); + } + + unsigned num_sources; + if (inst->opcode == SHADER_OPCODE_DWORD_SCATTERED_WRITE) { + sources[2] = inst->src[1]; /* value */ + num_sources = 3; + } else { + num_sources = 2; + } + + fs_reg payload; + if (devinfo->gen >= 7) { + payload = fs_reg(VGRF, -1, BRW_REGISTER_TYPE_F); + fs_inst *load = bld.LOAD_PAYLOAD(payload, sources, num_sources, 1); + load->dst.nr = bld.shader->alloc.allocate(regs_written(load)); + inst->src[0] = load->dst; + inst->mlen = regs_written(load); + inst->sources = 1; + } else { + payload = fs_reg(MRF, 13); + fs_inst *load = bld.LOAD_PAYLOAD(payload, sources, num_sources, 1); + inst->base_mrf = 13; + inst->mlen = regs_written(load); + inst->sources = 0; + } +} + +static void setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key, fs_reg *dst, fs_reg color, unsigned components) { @@ -4349,6 +4455,11 @@ fs_visitor::lower_logical_sends() const fs_builder ibld(this, block, inst); switch (inst->opcode) { + case SHADER_OPCODE_SCRATCH_READ_LOGICAL: + case SHADER_OPCODE_SCRATCH_WRITE_LOGICAL: + lower_scratch_logical_send(ibld, inst); + break; + case FS_OPCODE_FB_WRITE_LOGICAL: assert(stage == MESA_SHADER_FRAGMENT); lower_fb_write_logical_send(ibld, inst, diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 9478bb8..ca4c7fb 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -39,6 +39,7 @@ fs_visitor::emit_nir_code() nir_setup_outputs(); nir_setup_uniforms(); nir_emit_system_values(); + last_scratch = nir->num_scratch * dispatch_width; /* get the main function and emit it */ nir_foreach_function(function, nir) { @@ -4316,6 +4317,35 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } + case nir_intrinsic_load_scratch: { + const unsigned base_offset = nir_intrinsic_base(instr); + nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]); + fs_reg offset; + if (const_offset) { + offset = brw_imm_d(const_offset->i32[0] + base_offset); + } else { + offset = bld.vgrf(BRW_REGISTER_TYPE_D); + bld.ADD(offset, get_nir_src(instr->src[0]), brw_imm_d(base_offset)); + } + bld.emit(SHADER_OPCODE_SCRATCH_READ_LOGICAL, dest, offset); + break; + } + + case nir_intrinsic_store_scratch: { + const unsigned base_offset = nir_intrinsic_base(instr); + nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]); + fs_reg offset; + if (const_offset) { + offset = brw_imm_d(const_offset->i32[0] + base_offset); + } else { + offset = bld.vgrf(BRW_REGISTER_TYPE_D); + bld.ADD(offset, get_nir_src(instr->src[1]), brw_imm_d(base_offset)); + } + bld.emit(SHADER_OPCODE_SCRATCH_WRITE_LOGICAL, bld.null_reg_f(), + offset, get_nir_src(instr->src[0])); + break; + } + default: unreachable("unknown intrinsic"); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 5c6f3d4..03ee079 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -750,7 +750,7 @@ namespace { } } -static void +void emit_unspill(const fs_builder &bld, fs_reg dst, uint32_t spill_offset, unsigned count) { @@ -785,7 +785,7 @@ emit_unspill(const fs_builder &bld, fs_reg dst, } } -static void +void emit_spill(const fs_builder &bld, fs_reg src, uint32_t spill_offset, unsigned count) { diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 79fbb96..8cfcbad 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -302,6 +302,10 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) return "dword_scattered_read"; case SHADER_OPCODE_DWORD_SCATTERED_WRITE: return "dword_scattered_write"; + case SHADER_OPCODE_SCRATCH_READ_LOGICAL: + return "scratch_read_logical"; + case SHADER_OPCODE_SCRATCH_WRITE_LOGICAL: + return "scratch_write_logical"; case SHADER_OPCODE_URB_WRITE_SIMD8: return "gen8_urb_write_simd8"; case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: @@ -1010,6 +1014,7 @@ backend_instruction::has_side_effects() const case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: case SHADER_OPCODE_GEN4_SCRATCH_WRITE: case SHADER_OPCODE_DWORD_SCATTERED_WRITE: + case SHADER_OPCODE_SCRATCH_WRITE_LOGICAL: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_TYPED_ATOMIC: -- 2.5.0.400.gff86faf _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev