"Pohjolainen, Topi" <topi.pohjolai...@intel.com> writes: > On Fri, Feb 27, 2015 at 05:34:48PM +0200, Francisco Jerez wrote: >> Change brw_untyped_atomic() and brw_untyped_surface_read() to take the >> surface index as a register instead of a constant and to use >> brw_send_indirect_message() to emit the indirect variant of send with >> a dynamically calculated message descriptor. This will be required to >> support variable indexing of image arrays for >> ARB_shader_image_load_store. >> --- >> src/mesa/drivers/dri/i965/brw_eu.h | 10 +- >> src/mesa/drivers/dri/i965/brw_eu_emit.c | 158 >> +++++++++++++---------- >> src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 4 +- >> src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 4 +- >> 4 files changed, 96 insertions(+), 80 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_eu.h >> b/src/mesa/drivers/dri/i965/brw_eu.h >> index 87a9f3f..9cc9123 100644 >> --- a/src/mesa/drivers/dri/i965/brw_eu.h >> +++ b/src/mesa/drivers/dri/i965/brw_eu.h >> @@ -398,18 +398,18 @@ void brw_CMP(struct brw_compile *p, >> >> void >> brw_untyped_atomic(struct brw_compile *p, >> - struct brw_reg dest, >> + struct brw_reg dst, >> struct brw_reg payload, >> + struct brw_reg surface, >> unsigned atomic_op, >> - unsigned bind_table_index, >> unsigned msg_length, >> bool response_expected); >> >> void >> brw_untyped_surface_read(struct brw_compile *p, >> - struct brw_reg dest, >> - struct brw_reg mrf, >> - unsigned bind_table_index, >> + struct brw_reg dst, >> + struct brw_reg payload, >> + struct brw_reg surface, >> unsigned msg_length, >> unsigned num_channels); >> >> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c >> b/src/mesa/drivers/dri/i965/brw_eu_emit.c >> index 0b655d4..34695bf 100644 >> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c >> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c >> @@ -2518,6 +2518,48 @@ brw_send_indirect_message(struct brw_compile *p, >> return setup; >> } >> >> +static struct brw_inst * >> +brw_send_indirect_surface_message(struct brw_compile *p, >> + unsigned sfid, >> + struct brw_reg dst, >> + struct brw_reg payload, >> + struct brw_reg surface, >> + unsigned message_len, >> + unsigned response_len, >> + bool header_present) >> +{ >> + const struct brw_context *brw = p->brw; >> + struct brw_inst *insn; >> + >> + if (surface.file != BRW_IMMEDIATE_VALUE) { >> + struct brw_reg addr = retype(brw_address_reg(0), >> BRW_REGISTER_TYPE_UD); >> + >> + brw_push_insn_state(p); >> + brw_set_default_access_mode(p, BRW_ALIGN_1); >> + brw_set_default_mask_control(p, BRW_MASK_DISABLE); >> + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); >> + >> + /* Mask out invalid bits from the surface index to avoid hangs e.g. >> when >> + * some surface array is accessed out of bounds. >> + */ >> + insn = brw_AND(p, addr, >> + suboffset(vec1(retype(surface, BRW_REGISTER_TYPE_UD)), >> + BRW_GET_SWZ(surface.dw1.bits.swizzle, 0)), >> + brw_imm_ud(0xff)); >> + >> + brw_pop_insn_state(p); >> + >> + surface = addr; > > Also this patch didn't really need this branch at all. Instead it could > have been its own patch and here just: > > assert(surface.file == BRW_IMMEDIATE_VALUE); > The whole point of this patch is to fix untyped surface message sends to deal with non-immediate "surface" arguments. How could it not be needed?
>> + } >> + >> + insn = brw_send_indirect_message(p, sfid, dst, payload, surface); >> + brw_inst_set_mlen(brw, insn, message_len); >> + brw_inst_set_rlen(brw, insn, response_len); >> + brw_inst_set_header_present(brw, insn, header_present); >> + >> + return insn; >> +} >> + >> static int >> brw_find_next_block_end(struct brw_compile *p, int start_offset) >> { >> @@ -2747,25 +2789,16 @@ static void >> brw_set_dp_untyped_atomic_message(struct brw_compile *p, >> brw_inst *insn, >> unsigned atomic_op, >> - unsigned bind_table_index, >> - unsigned msg_length, >> - unsigned response_length, >> - bool header_present) >> + bool response_expected) >> { >> const struct brw_context *brw = p->brw; >> - >> unsigned msg_control = >> atomic_op | /* Atomic Operation Type: BRW_AOP_* */ >> - (response_length ? 1 << 5 : 0); /* Return data expected */ >> + (response_expected ? 1 << 5 : 0); /* Return data expected */ >> >> if (brw->gen >= 8 || brw->is_haswell) { >> - brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1, >> - msg_length, response_length, >> - header_present, false); >> - >> - >> - if (brw_inst_access_mode(brw, insn) == BRW_ALIGN_1) { >> - if (brw_inst_exec_size(brw, insn) != BRW_EXECUTE_16) >> + if (brw_inst_access_mode(brw, p->current) == BRW_ALIGN_1) { >> + if (!p->compressed) >> msg_control |= 1 << 4; /* SIMD8 mode */ >> >> brw_inst_set_dp_msg_type(brw, insn, >> @@ -2775,30 +2808,28 @@ brw_set_dp_untyped_atomic_message(struct brw_compile >> *p, >> HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2); >> } >> } else { >> - brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, >> - msg_length, response_length, >> - header_present, false); >> - >> brw_inst_set_dp_msg_type(brw, insn, >> GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP); >> >> - if (brw_inst_exec_size(brw, insn) != BRW_EXECUTE_16) >> + if (!p->compressed) >> msg_control |= 1 << 4; /* SIMD8 mode */ >> } >> >> - brw_inst_set_binding_table_index(brw, insn, bind_table_index); >> brw_inst_set_dp_msg_control(brw, insn, msg_control); >> } >> >> void >> brw_untyped_atomic(struct brw_compile *p, >> - struct brw_reg dest, >> + struct brw_reg dst, >> struct brw_reg payload, >> + struct brw_reg surface, >> unsigned atomic_op, >> - unsigned bind_table_index, >> unsigned msg_length, >> bool response_expected) >> { >> const struct brw_context *brw = p->brw; >> + const unsigned sfid = (brw->gen >= 8 || brw->is_haswell ? >> + HSW_SFID_DATAPORT_DATA_CACHE_1 : >> + GEN7_SFID_DATAPORT_DATA_CACHE); >> const bool align1 = (brw_inst_access_mode(brw, p->current) == >> BRW_ALIGN_1); >> /* Mask out unused components -- This is especially important in Align16 >> * mode on generations that don't have native support for SIMD4x2 >> atomics, >> @@ -2807,79 +2838,59 @@ brw_untyped_atomic(struct brw_compile *p, >> * uninitialized Y, Z and W coordinates of the payload. >> */ >> const unsigned mask = (align1 ? WRITEMASK_XYZW : WRITEMASK_X); >> - brw_inst *insn = brw_next_insn(p, BRW_OPCODE_SEND); >> - >> - brw_set_dest(p, insn, retype(brw_writemask(dest, mask), >> - BRW_REGISTER_TYPE_UD)); >> - brw_set_src0(p, insn, retype(payload, BRW_REGISTER_TYPE_UD)); >> - brw_set_src1(p, insn, brw_imm_d(0)); >> - brw_set_dp_untyped_atomic_message( >> - p, insn, atomic_op, bind_table_index, msg_length, >> + struct brw_inst *insn = brw_send_indirect_surface_message( >> + p, sfid, brw_writemask(dst, mask), payload, surface, msg_length, >> brw_surface_payload_size(p, response_expected, >> brw->gen >= 8 || brw->is_haswell, true), >> align1); >> + >> + brw_set_dp_untyped_atomic_message( >> + p, insn, atomic_op, response_expected); >> } >> >> static void >> brw_set_dp_untyped_surface_read_message(struct brw_compile *p, >> - brw_inst *insn, >> - unsigned bind_table_index, >> - unsigned msg_length, >> - unsigned response_length, >> - unsigned num_channels, >> - bool header_present) >> + struct brw_inst *insn, >> + unsigned num_channels) >> { >> const struct brw_context *brw = p->brw; >> - const unsigned dispatch_width = >> - (brw_inst_exec_size(brw, insn) == BRW_EXECUTE_16 ? 16 : 8); >> - >> - if (brw->gen >= 8 || brw->is_haswell) { >> - brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1, >> - msg_length, response_length, >> - header_present, false); >> - >> - brw_inst_set_dp_msg_type(brw, insn, >> - HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ); >> - } else { >> - brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE, >> - msg_length, response_length, >> - header_present, false); >> - >> - brw_inst_set_dp_msg_type(brw, insn, >> - GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ); >> - } >> - >> /* Set mask of 32-bit channels to drop. */ >> unsigned msg_control = (0xf & (0xf << num_channels)); >> >> - if (brw_inst_access_mode(brw, insn) == BRW_ALIGN_1) { >> - if (dispatch_width == 16) >> + if (brw_inst_access_mode(brw, p->current) == BRW_ALIGN_1) { >> + if (p->compressed) >> msg_control |= 1 << 4; /* SIMD16 mode */ >> else >> msg_control |= 2 << 4; /* SIMD8 mode */ >> } >> >> - brw_inst_set_binding_table_index(brw, insn, bind_table_index); >> + brw_inst_set_dp_msg_type(brw, insn, >> + (brw->gen >= 8 || brw->is_haswell ? >> + HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ : >> + GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ)); >> brw_inst_set_dp_msg_control(brw, insn, msg_control); >> } >> >> void >> brw_untyped_surface_read(struct brw_compile *p, >> - struct brw_reg dest, >> - struct brw_reg mrf, >> - unsigned bind_table_index, >> + struct brw_reg dst, >> + struct brw_reg payload, >> + struct brw_reg surface, >> unsigned msg_length, >> unsigned num_channels) >> { >> const struct brw_context *brw = p->brw; >> - brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); >> + const unsigned sfid = (brw->gen >= 8 || p->brw->is_haswell ? >> + HSW_SFID_DATAPORT_DATA_CACHE_1 : >> + GEN7_SFID_DATAPORT_DATA_CACHE); >> + const bool align1 = (brw_inst_access_mode(brw, p->current) == >> BRW_ALIGN_1); >> + struct brw_inst *insn = brw_send_indirect_surface_message( >> + p, sfid, dst, payload, surface, msg_length, >> + brw_surface_payload_size(p, num_channels, true, true), >> + align1); >> >> - brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD)); >> - brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD)); >> brw_set_dp_untyped_surface_read_message( >> - p, insn, bind_table_index, msg_length, >> - brw_surface_payload_size(p, num_channels, true, true), >> - num_channels, brw_inst_access_mode(brw, insn) == BRW_ALIGN_1); >> + p, insn, num_channels); >> } >> >> void >> @@ -3080,13 +3091,16 @@ void brw_shader_time_add(struct brw_compile *p, >> struct brw_reg payload, >> uint32_t surf_index) >> { >> + const unsigned sfid = (p->brw->gen >= 8 || p->brw->is_haswell ? >> + HSW_SFID_DATAPORT_DATA_CACHE_1 : >> + GEN7_SFID_DATAPORT_DATA_CACHE); >> assert(p->brw->gen >= 7); >> >> brw_push_insn_state(p); >> brw_set_default_access_mode(p, BRW_ALIGN_1); >> brw_set_default_mask_control(p, BRW_MASK_DISABLE); >> + brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); >> brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); >> - brw_pop_insn_state(p); >> >> /* We use brw_vec1_reg and unmasked because we want to increment the >> given >> * offset only once. >> @@ -3095,8 +3109,10 @@ void brw_shader_time_add(struct brw_compile *p, >> BRW_ARF_NULL, 0)); >> brw_set_src0(p, send, brw_vec1_reg(payload.file, >> payload.nr, 0)); >> - brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, surf_index, >> - 2 /* message length */, >> - 0 /* response length */, >> - false /* header present */); >> + brw_set_src1(p, send, brw_imm_ud(0)); >> + brw_set_message_descriptor(p, send, sfid, 2, 0, false, false); >> + brw_inst_set_binding_table_index(p->brw, send, surf_index); >> + brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, false); >> + >> + brw_pop_insn_state(p); >> } >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >> index 2fdc3df..f739837 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >> @@ -1429,7 +1429,7 @@ fs_generator::generate_untyped_atomic(fs_inst *inst, >> struct brw_reg dst, >> surf_index.type == BRW_REGISTER_TYPE_UD); >> >> brw_untyped_atomic(p, dst, payload, >> - atomic_op.dw1.ud, surf_index.dw1.ud, >> + surf_index, atomic_op.dw1.ud, >> inst->mlen, true); >> >> brw_mark_surface_used(prog_data, surf_index.dw1.ud); >> @@ -1443,7 +1443,7 @@ fs_generator::generate_untyped_surface_read(fs_inst >> *inst, struct brw_reg dst, >> assert(surf_index.file == BRW_IMMEDIATE_VALUE && >> surf_index.type == BRW_REGISTER_TYPE_UD); >> >> - brw_untyped_surface_read(p, dst, payload, surf_index.dw1.ud, inst->mlen, >> 1); >> + brw_untyped_surface_read(p, dst, payload, surf_index, inst->mlen, 1); >> >> brw_mark_surface_used(prog_data, surf_index.dw1.ud); >> } >> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp >> b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp >> index 467da2d..00886c5 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp >> @@ -1116,7 +1116,7 @@ >> vec4_generator::generate_untyped_atomic(vec4_instruction *inst, >> surf_index.type == BRW_REGISTER_TYPE_UD); >> >> brw_untyped_atomic(p, dst, brw_message_reg(inst->base_mrf), >> - atomic_op.dw1.ud, surf_index.dw1.ud, >> + surf_index, atomic_op.dw1.ud, >> inst->mlen, true); >> >> brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); >> @@ -1131,7 +1131,7 @@ >> vec4_generator::generate_untyped_surface_read(vec4_instruction *inst, >> surf_index.type == BRW_REGISTER_TYPE_UD); >> >> brw_untyped_surface_read(p, dst, brw_message_reg(inst->base_mrf), >> - surf_index.dw1.ud, inst->mlen, 1); >> + surf_index, inst->mlen, 1); >> >> brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud); >> } >> -- >> 2.1.3 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
signature.asc
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev