FIXME: We need to fix the case where not all the attributes fit in the push constant buffer --- src/mesa/drivers/dri/i965/brw_vec4_tes.cpp | 63 +++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 15 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp index 6639c86..8febc15 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_tes.cpp @@ -180,6 +180,8 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) unsigned imm_offset = instr->const_index[0]; src_reg header = input_read_header; + bool is_64bit = nir_dest_bit_size(instr->dest) == 64; + if (indirect_offset.file != BAD_FILE) { header = src_reg(this, glsl_type::uvec4_type); emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header), @@ -190,27 +192,58 @@ vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) */ const unsigned max_push_slots = 24; if (imm_offset < max_push_slots) { - emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D), - src_reg(ATTR, imm_offset, glsl_type::ivec4_type))); + const brw_reg_type dst_reg_type = + is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D; + const glsl_type *src_glsl_type = + is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type; + emit(MOV(get_nir_dest(instr->dest, dst_reg_type), + src_reg(ATTR, imm_offset, src_glsl_type))); prog_data->urb_read_length = MAX2(prog_data->urb_read_length, - DIV_ROUND_UP(imm_offset + 1, 2)); + DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2)); break; } } - dst_reg temp(this, glsl_type::ivec4_type); - vec4_instruction *read = - emit(VEC4_OPCODE_URB_READ, temp, src_reg(header)); - read->offset = imm_offset; - read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; - - /* Copy to target. We might end up with some funky writemasks landing - * in here, but we really don't want them in the above pseudo-ops. - */ - dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); - dst.writemask = brw_writemask_for_size(instr->num_components); - emit(MOV(dst, src_reg(temp))); + if (!is_64bit) { + dst_reg temp(this, glsl_type::ivec4_type); + vec4_instruction *read = + emit(VEC4_OPCODE_URB_READ, temp, src_reg(header)); + read->offset = imm_offset; + read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; + + /* Copy to target. We might end up with some funky writemasks landing + * in here, but we really don't want them in the above pseudo-ops. + */ + dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); + dst.writemask = brw_writemask_for_size(instr->num_components); + emit(MOV(dst, src_reg(temp))); + } else { + /* For 64-bit we need to load twice as many 32-bit components, and for + * dvec3/4 we need to emit 2 URB Read messages + */ + dst_reg temp(this, glsl_type::dvec4_type); + dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D); + + vec4_instruction *read = + emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header)); + read->offset = imm_offset; + read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; + + if (instr->num_components > 2) { + read = + emit(VEC4_OPCODE_URB_READ, offset(temp_d, 1), src_reg(header)); + read->offset = imm_offset + 1; + read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; + } + + dst_reg shuffled(this, glsl_type::dvec4_type); + shuffle_64bit_data(shuffled, src_reg(temp), false); + + dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF); + dst.writemask = brw_writemask_for_size(instr->num_components); + emit(MOV(dst, src_reg(shuffled))); + } break; } default: -- 2.7.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev