This commit adds an FS_OPCODE_PUSH_CONSTANT_LOAD opcode which allows you to load an indirect push constant. The first argument to the function is a non-indirect uniform, the second is the indirect, and the third is an immediate value that provides a bound on the indirect. This way we can provide accurate regs_read() information to optimization passes and things that need to think about interference. --- src/mesa/drivers/dri/i965/brw_defines.h | 17 +++++++ src/mesa/drivers/dri/i965/brw_fs.cpp | 23 +++++++++ src/mesa/drivers/dri/i965/brw_fs.h | 4 ++ src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 1 + src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 70 ++++++++++++++++++++++++++ src/mesa/drivers/dri/i965/brw_shader.cpp | 2 + 6 files changed, 117 insertions(+)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 82a3635..f7f0a2e 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1029,6 +1029,23 @@ enum opcode { FS_OPCODE_LINTERP, FS_OPCODE_PIXEL_X, FS_OPCODE_PIXEL_Y, + + /** + * Loads a uniform push constant with an indirect. This opcode takes four + * arguments: + * + * 0) The uniform register to load only without a NULL reladdr + * 1) An immediate base offset (in bytes) + * 2) A register indirect offset (in bytes) + * 3) The immediate value representing the maximum possible total offset. + * + * The base offset and indirect offset are added together to get a the + * total offset which is then added to the starting address of the register + * in src0. The reason for the multiplicity of arguments is so that the + * range [reg, reg + regs_read()) is an accurate representation of all of + * the values that could be read by the instruction. + */ + FS_OPCODE_PUSH_CONSTANT_LOAD, FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD, diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 3d55dc8..60c9a0f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -796,6 +796,25 @@ fs_inst::regs_read(int arg) const case CS_OPCODE_CS_TERMINATE: return 1; + case FS_OPCODE_PUSH_CONSTANT_LOAD: + if (arg == 0) { + assert(src[3].file == IMM); + unsigned max_indirect = src[3].fixed_hw_reg.dw1.ud; + + if (src[0].file == UNIFORM) { + return (max_indirect / 4) + 1; + } else { + /* This is the case after assign_curb_setup() */ + assert(src[0].file == HW_REG); + + struct brw_reg reg = src[0].fixed_hw_reg; + unsigned base_offset = reg.nr * REG_SIZE + reg.subnr; + unsigned max_offset = base_offset + max_indirect; + return (max_offset / REG_SIZE) - (base_offset / REG_SIZE) + 1; + } + } + break; + default: if (is_tex() && arg == 0 && src[0].file == GRF) return mlen; @@ -4233,6 +4252,10 @@ get_lowered_simd_width(const struct brw_device_info *devinfo, case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: return 8; + case FS_OPCODE_PUSH_CONSTANT_LOAD: + /* Prior to BDW, we only have 8 address registers */ + return devinfo->gen < 8 ? 8 : inst->exec_size; + default: return inst->exec_size; } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 90c9756..6bc434a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -444,6 +444,10 @@ private: void generate_scratch_write(fs_inst *inst, struct brw_reg src); void generate_scratch_read(fs_inst *inst, struct brw_reg dst); void generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst); + void generate_push_constant_load(fs_inst *inst, struct brw_reg dst, + struct brw_reg reg, + struct brw_reg base_offset, + struct brw_reg indirect); void generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst, struct brw_reg index, struct brw_reg offset); diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index c7628dc..cdc6c10 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -71,6 +71,7 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case BRW_OPCODE_PLN: case BRW_OPCODE_MAD: case BRW_OPCODE_LRP: + case FS_OPCODE_PUSH_CONSTANT_LOAD: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index c86ca04..956bfb8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1031,6 +1031,72 @@ fs_generator::generate_scratch_read_gen7(fs_inst *inst, struct brw_reg dst) } void +fs_generator::generate_push_constant_load(fs_inst *inst, struct brw_reg dst, + struct brw_reg reg, + struct brw_reg base_offset_reg, + struct brw_reg indirect) +{ + assert(base_offset_reg.file == BRW_IMMEDIATE_VALUE); + unsigned base_offset = base_offset_reg.dw1.ud; + + /* Add in the register position to get the absolute offset */ + base_offset += reg.nr * REG_SIZE + reg.subnr; + + assert(indirect.type == BRW_REGISTER_TYPE_D || + indirect.type == BRW_REGISTER_TYPE_UD); + + if (indirect.file == BRW_IMMEDIATE_VALUE) { + base_offset += indirect.dw1.d; + + reg.nr = base_offset / REG_SIZE; + reg.subnr = base_offset % REG_SIZE; + brw_MOV(p, dst, reg); + } else { + struct brw_reg addr = vec8(brw_address_reg(0)); + + /* The destination stride of an instruction (in bytes) must be greater + * than or equal to the size of the rest of the instruction. Since the + * address register is of type UW, we can't use a D-type instruction. + * In order to get around this, re re-type to UW and use a stride. + */ + indirect = spread(indirect, 2); + indirect.type = BRW_REGISTER_TYPE_UW; + + if (devinfo->gen < 8) { + /* Prior to Broadwell, there are a couple silly restrictions that + * we have to work around. First, we only have 8 address register + * entries so this is SIMD8-only. + */ + assert(inst->exec_size <= 8); + + /* Finally, the bottom 5 bits of the base offset and the bottom 5 + * bits of the indirect must add to less than 32. In other words, + * the hardware needs to be able to add the bottom five bits of the + * two to get the subnumber and add the next 7 bits of each to get + * the actual register number. Since uniforms frequently cross + * register boundaries, this makes it almost useless. We could try + * and do something clever where we use a actual base offset if + * base_offset % 32 == 0 but that would mean we were generating + * different code depending on the base offset. Instead, for the + * sake of consistency, we'll just do the add ourselves. + */ + brw_ADD(p, addr, indirect, brw_imm_uw(base_offset)); + base_offset = 0; + } else { + /* On Broadwell and above, we have 16 address registers and + * everything seems to "just work". + */ + brw_MOV(p, addr, indirect); + } + + /* Get a VxH indirect for a0.0. */ + struct brw_reg src = brw_VxH_indirect(0, base_offset); + + brw_MOV(p, dst, retype(src, dst.type)); + } +} + +void fs_generator::generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg dst, struct brw_reg index, @@ -1951,6 +2017,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) generate_urb_write(inst, src[0]); break; + case FS_OPCODE_PUSH_CONSTANT_LOAD: + generate_push_constant_load(inst, dst, src[0], src[1], src[2]); + break; + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: generate_uniform_pull_constant_load(inst, dst, src[0], src[1]); break; diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index a7453fa..fdbcca5 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -695,6 +695,8 @@ brw_instruction_name(enum opcode op) case FS_OPCODE_PIXEL_Y: return "pixel_y"; + case FS_OPCODE_PUSH_CONSTANT_LOAD: + return "push_const"; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: return "uniform_pull_const"; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: -- 2.4.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev