Signed-off-by: Chad Versace <chad.vers...@linux.intel.com> --- src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_fs.h | 7 ++ .../dri/i965/brw_fs_channel_expressions.cpp | 29 +++++++- src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 39 ++++++++++- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 78 +++++++++++++++++++++- 5 files changed, 149 insertions(+), 5 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 22d3e98..1c43d68 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -713,6 +713,7 @@ enum opcode { FS_OPCODE_MOV_DISPATCH_TO_FLAGS, FS_OPCODE_DISCARD_JUMP, FS_OPCODE_SET_GLOBAL_OFFSET, + FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, VS_OPCODE_URB_WRITE, VS_OPCODE_SCRATCH_READ, diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index bcf38f3..59aa28d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -355,6 +355,10 @@ public: fs_reg fix_math_operand(fs_reg src); fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0); fs_inst *emit_math(enum opcode op, fs_reg dst, fs_reg src0, fs_reg src1); + void emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y); + void emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0); + void emit_unpack_half_2x16_split_y(fs_reg dst, fs_reg src0); + void emit_minmax(uint32_t conditionalmod, fs_reg dst, fs_reg src0, fs_reg src1); bool try_emit_saturate(ir_expression *ir); @@ -541,6 +545,9 @@ private: struct brw_reg src, struct brw_reg offset); void generate_discard_jump(fs_inst *inst); + void generate_unpack_half_2x16_split_y(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src); void patch_discard_jumps_to_fb_writes(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 58521ee..7081511 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -76,8 +76,21 @@ channel_expressions_predicate(ir_instruction *ir) return false; for (i = 0; i < expr->get_num_operands(); i++) { - if (expr->operands[i]->type->is_vector()) - return true; + if (expr->operands[i]->type->is_vector()) { + switch (expr->operation) { + case ir_binop_pack_half_2x16_split: + case ir_unop_pack_half_2x16: + case ir_unop_unpack_half_2x16: + case ir_unop_unpack_half_2x16_split_x: + case ir_unop_unpack_half_2x16_split_y: + assert(!"WTF"); + break; + default: + break; + } + + return true; + } } return false; @@ -342,9 +355,21 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) assert(!"not yet supported"); break; + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_half_2x16: + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_half_2x16: case ir_quadop_vector: assert(!"should have been lowered"); break; + + case ir_unop_unpack_half_2x16_split_x: + case ir_unop_unpack_half_2x16_split_y: + case ir_binop_pack_half_2x16_split: + assert("!not reached: expression operates on scalars only"); + break; } ir->remove(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 63f09fe..46e2409 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -920,6 +920,34 @@ fs_generator::generate_set_global_offset(fs_inst *inst, } void +fs_generator::generate_unpack_half_2x16_split_y(fs_inst *inst, + struct brw_reg dst, + struct brw_reg src) +{ + assert(intel->gen >= 7); + + /* src has the form of unpackHalf2x16's input: + * + * w z y x + * |undef|undef|undef|0xhhhhllll| + * + * We wish to access only the "hhhh" bits of the source register, and hence + * must access it with a 16 bit subregister offset. To do so, we must + * halve the size of the source data type from UD to UW and compensate by + * doubling the stride. + */ + assert(src.type == BRW_REGISTER_TYPE_UD); + src.type = BRW_REGISTER_TYPE_UW; + if (src.vstride > 0) + ++src.vstride; + if (src.hstride > 0) + ++src.hstride; + src.subnr += 2; + + brw_F16TO32(p, dst, src); +} + +void fs_generator::generate_code(exec_list *instructions) { int last_native_insn_offset = p->next_insn_offset; @@ -1079,7 +1107,12 @@ fs_generator::generate_code(exec_list *instructions) case BRW_OPCODE_SHL: brw_SHL(p, dst, src[0], src[1]); break; - + case BRW_OPCODE_F32TO16: + brw_F32TO16(p, dst, src[0]); + break; + case BRW_OPCODE_F16TO32: + brw_F16TO32(p, dst, src[0]); + break; case BRW_OPCODE_CMP: brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); break; @@ -1226,6 +1259,10 @@ fs_generator::generate_code(exec_list *instructions) generate_set_global_offset(inst, dst, src[0], src[1]); break; + case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: + generate_unpack_half_2x16_split_y(inst, dst, src[0]); + break; + default: if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) { _mesa_problem(ctx, "Unsupported opcode `%s' in FS", diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index e70d6bf..563d1d5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -536,7 +536,20 @@ fs_visitor::visit(ir_expression *ir) BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE, this->result, op[0], op[1]); break; - + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_unorm_2x16: + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_half_2x16: + case ir_unop_pack_half_2x16: + assert(!"not reached: should be handled by lower_packing_builtins"); + break; + case ir_unop_unpack_half_2x16_split_x: + emit_unpack_half_2x16_split_x(this->result, op[0]); + break; + case ir_unop_unpack_half_2x16_split_y: + emit_unpack_half_2x16_split_y(this->result, op[0]); + break; case ir_binop_pow: emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]); break; @@ -564,7 +577,9 @@ fs_visitor::visit(ir_expression *ir) else inst = emit(SHR(this->result, op[0], op[1])); break; - + case ir_binop_pack_half_2x16_split: + emit_pack_half_2x16_split(this->result, op[0], op[1]); + break; case ir_binop_ubo_load: /* This IR node takes a constant uniform block and a constant or * variable byte offset within the block and loads a vector from that. @@ -2259,6 +2274,65 @@ fs_visitor::emit_fb_writes() } void +fs_visitor::emit_pack_half_2x16_split(fs_reg dst, fs_reg x, fs_reg y) +{ + if (intel->gen < 7) + assert(!"packHalf2x16 should be handled by lower_packing_builtins"); + + /* uint dst; */ + assert(dst.type == BRW_REGISTER_TYPE_UD); + + /* float x; */ + assert(x.type == BRW_REGISTER_TYPE_F); + + /* float y; */ + assert(y.type == BRW_REGISTER_TYPE_F); + + /* uint tmp; */ + fs_reg tmp(this, glsl_type::uint_type); + + /* dst = f32to16(x); */ + emit(BRW_OPCODE_F32TO16, dst, x); + + /* tmp = f32to16(y); */ + emit(BRW_OPCODE_F32TO16, tmp, y); + + /* tmp <<= 16; */ + emit(BRW_OPCODE_SHL, tmp, tmp, fs_reg(16u)); + + /* dst |= tmp; */ + emit(BRW_OPCODE_OR, dst, dst, tmp); +} + +void +fs_visitor::emit_unpack_half_2x16_split_x(fs_reg dst, fs_reg src0) +{ + if (intel->gen < 7) + assert(!"unpackHalf2x16 should be lowered"); + + /* float dst; */ + assert(dst.type == BRW_REGISTER_TYPE_F); + + /* uint src0; */ + assert(src0.type == BRW_REGISTER_TYPE_UD); + + /* dst = f16to32(src0); */ + emit(BRW_OPCODE_F16TO32, dst, src0); +} + +void +fs_visitor::emit_unpack_half_2x16_split_y(fs_reg dst, fs_reg src0) +{ + if (intel->gen < 7) + assert(!"unpackHalf2x16 should be lowered"); + + assert(dst.type == BRW_REGISTER_TYPE_F); + assert(src0.type == BRW_REGISTER_TYPE_UD); + + emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, dst, src0); +} + +void fs_visitor::resolve_ud_negate(fs_reg *reg) { if (reg->type != BRW_REGISTER_TYPE_UD || -- 1.8.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev