Both D3D10 and OpenCL say that if one the inputs is nan then the other should be returned. To preserve that behavior the patche fixes both the sse and the non-sse paths in both functions.
Signed-off-by: Zack Rusin <za...@vmware.com> --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 60 ++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index e7955aa..7beb117 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -176,12 +176,36 @@ lp_build_min_simple(struct lp_build_context *bld, } if(intrinsic) { - return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, - type, - intr_size, a, b); + LLVMValueRef bmask, max; + /* We need to handle nan's for floating point numbers. If one of the + * inputs is nan the other should be returned (required by both D3D10+ + * and OpenCL) + */ + if (type.floating) { + bmask = LLVMBuildFCmp(bld->gallivm->builder, LLVMRealOEQ, b, b, ""); + max = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, + type, + intr_size, a, b); + return LLVMBuildSelect(bld->gallivm->builder, bmask, max, a, ""); + } else { + return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, + type, + intr_size, a, b); + } } cond = lp_build_cmp(bld, PIPE_FUNC_LESS, a, b); + /* We need to handle nan's for floating point numbers. If one of the + * inputs is nan the other should be returned (required by both D3D10+ + * and OpenCL) + */ + if (type.floating) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); + LLVMValueRef nanmask = LLVMBuildFCmp(bld->gallivm->builder, + LLVMRealOEQ, a, a, ""); + nanmask = LLVMBuildSExt(bld->gallivm->builder, nanmask, int_vec_type, ""); + cond = LLVMBuildAnd(bld->gallivm->builder, cond, nanmask, ""); + } return lp_build_select(bld, cond, a, b); } @@ -293,12 +317,36 @@ lp_build_max_simple(struct lp_build_context *bld, } if(intrinsic) { - return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, - type, - intr_size, a, b); + LLVMValueRef bmask, min; + /* We need to handle nan's for floating point numbers. If one of the + * inputs is nan the other should be returned (required by both D3D10+ + * and OpenCL) + */ + if (type.floating) { + bmask = LLVMBuildFCmp(bld->gallivm->builder, LLVMRealOEQ, b, b, ""); + min = lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, + type, + intr_size, a, b); + return LLVMBuildSelect(bld->gallivm->builder, bmask, min, a, ""); + } else { + return lp_build_intrinsic_binary_anylength(bld->gallivm, intrinsic, + type, + intr_size, a, b); + } } cond = lp_build_cmp(bld, PIPE_FUNC_GREATER, a, b); + /* We need to handle nan's for floating point numbers. If one of the + * inputs is nan the other should be returned (required by both D3D10+ + * and OpenCL) + */ + if (type.floating) { + LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); + LLVMValueRef nanmask = LLVMBuildFCmp(bld->gallivm->builder, + LLVMRealOEQ, a, a, ""); + nanmask = LLVMBuildSExt(bld->gallivm->builder, nanmask, int_vec_type, ""); + cond = LLVMBuildAnd(bld->gallivm->builder, cond, nanmask, ""); + } return lp_build_select(bld, cond, a, b); } -- 1.7.10.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev