v2: Remove lewd comment [for idr]. Signed-off-by: Chad Versace <chad.vers...@linux.intel.com> --- src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_fs.h | 7 ++ .../dri/i965/brw_fs_channel_expressions.cpp | 12 ++++ src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 39 +++++++++- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 82 +++++++++++++++++++++- 5 files changed, 138 insertions(+), 3 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index e2f1e65..e3d297c 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -726,6 +726,7 @@ enum opcode { FS_OPCODE_MOV_DISPATCH_TO_FLAGS, FS_OPCODE_DISCARD_JUMP, FS_OPCODE_SET_GLOBAL_OFFSET, + FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, VS_OPCODE_URB_WRITE, VS_OPCODE_SCRATCH_READ, diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index b47b0d0..49e2ed0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -355,6 +355,10 @@ public: fs_reg fix_math_operand(fs_reg src); fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); + void emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y); + void emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0); + void emit_unpack_half_2x16_split_y(fs_reg dst, fs_reg src0); + void emit_minmax(uint32_t conditionalmod, fs_reg dst, fs_reg src0, fs_reg src1); bool try_emit_saturate(ir_expression *ir); @@ -541,6 +545,9 @@ private: struct brw_reg src, struct brw_reg offset); void generate_discard_jump(fs_inst *inst); + void generate_unpack_half_2x16_split_y(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src); void patch_discard_jumps_to_fb_writes(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 58521ee..e19da51 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -342,9 +342,21 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) assert(!"not yet supported"); break; + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_half_2x16: + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_half_2x16: case ir_quadop_vector: assert(!"should have been lowered"); break; + + case ir_unop_unpack_half_2x16_split_x: + case ir_unop_unpack_half_2x16_split_y: + case ir_binop_pack_half_2x16_split: + assert("!not reached: expression operates on scalars only"); + break; } ir->remove(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 324e665..0ff296c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -923,6 +923,34 @@ fs_generator::generate_set_global_offset(fs_inst *inst, } void +fs_generator::generate_unpack_half_2x16_split_y(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src) +{ + assert(intel->gen >= 7); + + /* src has the form of unpackHalf2x16's input: + * + * w z y x + * |undef|undef|undef|0xhhhhllll| + * + * We wish to access only the "hhhh" bits of the source register, and hence + * must access it with a 16 bit subregister offset. To do so, we must + * halve the size of the source data type from UD to UW and compensate by + * doubling the stride. + */ + assert(src.type == BRW_REGISTER_TYPE_UD); + src.type = BRW_REGISTER_TYPE_UW; + if (src.vstride > 0) + ++src.vstride; + if (src.hstride > 0) + ++src.hstride; + src.subnr += 2; + + brw_F16TO32(p, dst, src); +} + +void fs_generator::generate_code(exec_list *instructions) { int last_native_insn_offset = p->next_insn_offset; @@ -1082,7 +1110,12 @@ fs_generator::generate_code(exec_list *instructions) case BRW_OPCODE_SHL: brw_SHL(p, dst, src[0], src[1]); break; - + case BRW_OPCODE_F32TO16: + brw_F32TO16(p, dst, src[0]); + break; + case BRW_OPCODE_F16TO32: + brw_F16TO32(p, dst, src[0]); + break; case BRW_OPCODE_CMP: brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); break; @@ -1229,6 +1262,10 @@ fs_generator::generate_code(exec_list *instructions) generate_set_global_offset(inst, dst, src[0], src[1]); break; + case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: + generate_unpack_half_2x16_split_y(inst, dst, src[0]); + break; + default: if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) { _mesa_problem(ctx, "Unsupported opcode `%s' in FS", diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 5885989..042ccca 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -538,7 +538,20 @@ fs_visitor::visit(ir_expression *ir) BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE, this->result, op[0], op[1]); break; - + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_unorm_2x16: + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_half_2x16: + case ir_unop_pack_half_2x16: + assert(!"not reached: should be handled by lower_packing_builtins"); + break; + case ir_unop_unpack_half_2x16_split_x: + emit_unpack_half_2x16_split_x(this->result, op[0]); + break; + case ir_unop_unpack_half_2x16_split_y: + emit_unpack_half_2x16_split_y(this->result, op[0]); + break; case ir_binop_pow: emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); break; @@ -566,7 +579,9 @@ fs_visitor::visit(ir_expression *ir) else inst = emit(SHR(this->result, op[0], op[1])); break; - + case ir_binop_pack_half_2x16_split: + emit_pack_half_2x16_split(this->result, op[0], op[1]); + break; case ir_binop_ubo_load: /* This IR node takes a constant uniform block and a constant or * variable byte offset within the block and loads a vector from that. @@ -2261,6 +2276,69 @@ fs_visitor::emit_fb_writes() } void +fs_visitor::emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y) +{ + if (intel->gen < 7) + assert(!"packHalf2x16 should be lowered"); + + /* uint dst; */ + assert(dst.type == BRW_REGISTER_TYPE_UD); + + /* float x; */ + assert(x.type == BRW_REGISTER_TYPE_F); + + /* float y; */ + assert(y.type == BRW_REGISTER_TYPE_F); + + /* uint tmp; */ + fs_reg tmp(this, glsl_type::uint_type); + + /* dst = f32to16(x); */ + emit(BRW_OPCODE_F32TO16, dst, x); + + /* tmp = f32to16(y); */ + emit(BRW_OPCODE_F32TO16, tmp, y); + + /* tmp <<= 16; */ + emit(BRW_OPCODE_SHL, tmp, tmp, fs_reg(16u)); + + /* dst |= tmp; */ + emit(BRW_OPCODE_OR, dst, dst, tmp); +} + +void +fs_visitor::emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0) +{ + if (intel->gen < 7) + assert(!"unpackHalf2x16 should be lowered"); + + /* float dst; */ + assert(dst.type == BRW_REGISTER_TYPE_F); + + /* uint src0; */ + assert(src0.type == BRW_REGISTER_TYPE_UD); + + /* dst = f16to32(src0); */ + emit(BRW_OPCODE_F16TO32, dst, src0); +} + +void +fs_visitor::emit_unpack_half_2x16_split_y(fs_reg dst, fs_reg src0) +{ + if (intel->gen < 7) + assert(!"unpackHalf2x16 should be lowered"); + + assert(dst.type == BRW_REGISTER_TYPE_F); + assert(src0.type == BRW_REGISTER_TYPE_UD); + + /* For the Y channel, we must emit an instruction with clever region + * addressing. It's only possible to choose such addressing at the + * fs_generator stage, so we postpone with a special FS opcode. + */ + emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, dst, src0); +} + +void fs_visitor::resolve_ud_negate(fs_reg *reg) { if (reg->type != BRW_REGISTER_TYPE_UD || -- 1.8.1.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev