Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>
On 23.06.2016 00:03, Marek Olšák wrote:
From: Marek Olšák <marek.ol...@amd.com> --- src/gallium/drivers/radeon/radeon_llvm.h | 3 +++ .../drivers/radeon/radeon_setup_tgsi_llvm.c | 28 ++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index ec16def..61afa7a 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -101,6 +101,9 @@ struct radeon_llvm_context { LLVMValueRef main_fn; LLVMTypeRef return_type; + unsigned fpmath_md_kind; + LLVMValueRef fpmath_md_2p5_ulp; + struct gallivm_state gallivm; }; diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index d395208..79ab4ef 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -1523,19 +1523,34 @@ static void emit_up2h(const struct lp_build_tgsi_action *action, } } +static void emit_fdiv(const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context *bld_base, + struct lp_build_emit_data *emit_data) +{ + struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base); + + emit_data->output[emit_data->chan] = + LLVMBuildFDiv(bld_base->base.gallivm->builder, + emit_data->args[0], emit_data->args[1], ""); + + /* Use v_rcp_f32 instead of precise division. */ + LLVMSetMetadata(emit_data->output[emit_data->chan], + ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp); +} + /* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in * the target machine. f64 needs global unsafe math flags to get rsq. */ static void emit_rsq(const struct lp_build_tgsi_action *action, struct lp_build_tgsi_context *bld_base, struct lp_build_emit_data *emit_data) { - LLVMBuilderRef builder = bld_base->base.gallivm->builder; LLVMValueRef sqrt = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT, emit_data->args[0]); emit_data->output[emit_data->chan] = - LLVMBuildFDiv(builder, bld_base->base.one, sqrt, ""); + lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV, + bld_base->base.one, sqrt); } void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *triple) @@ -1586,6 +1601,14 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *trip bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = radeon_llvm_emit_fetch; bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value; + /* metadata allowing 2.5 ULP */ + ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context, + "fpmath", 6); + LLVMValueRef arg = LLVMMDStringInContext(ctx->gallivm.context, + "float 2.5", 9); + ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context, + &arg, 1); + /* Allocate outputs */ ctx->soa.outputs = ctx->outputs; @@ -1615,6 +1638,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx, const char *trip bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64"; bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac; + bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv; bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg; bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp; bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev