From: Adhemerval Zanella <azane...@linux.vnet.ibm.com> This adds another rounding mode to the enum, which happens otherwise to match SSE4.1's rounding modes. This should be safe as long as the IROUND case never hits the SSE4.1 path.
Reviewed-by: Adam Jackson <a...@redhat.com> Signed-off-by: Adhemerval Zanella <azane...@linux.vnet.ibm.com> --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index ec05026..021cd6e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1360,10 +1360,17 @@ lp_build_int_to_float(struct lp_build_context *bld, static boolean arch_rounding_available(const struct lp_type type) { + /* SSE4 vector rounding. */ if ((util_cpu_caps.has_sse4_1 && (type.length == 1 || type.width*type.length == 128)) || (util_cpu_caps.has_avx && type.width*type.length == 256)) return TRUE; + /* SSE2 vector to word. */ + else if ((util_cpu_caps.has_sse2 && + ((type.width == 32) && (type.length == 1 || type.length == 4))) || + (util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) + return TRUE; + /* Altivec rounding and vector to word. */ else if ((util_cpu_caps.has_altivec && (type.width == 32 && type.length == 4))) return TRUE; @@ -1376,7 +1383,8 @@ enum lp_build_round_mode LP_BUILD_ROUND_NEAREST = 0, LP_BUILD_ROUND_FLOOR = 1, LP_BUILD_ROUND_CEIL = 2, - LP_BUILD_ROUND_TRUNCATE = 3 + LP_BUILD_ROUND_TRUNCATE = 3, + LP_BUILD_IROUND = 4 }; /** @@ -1400,6 +1408,7 @@ lp_build_round_sse41(struct lp_build_context *bld, assert(lp_check_value(type, a)); assert(util_cpu_caps.has_sse4_1); + assert(mode != LP_BUILD_IROUND); if (type.length == 1) { LLVMTypeRef vec_type; @@ -1526,8 +1535,6 @@ lp_build_iround_nearest_sse2(struct lp_build_context *bld, } -/* - */ static INLINE LLVMValueRef lp_build_round_altivec(struct lp_build_context *bld, LLVMValueRef a, @@ -1536,8 +1543,10 @@ lp_build_round_altivec(struct lp_build_context *bld, LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const char *intrinsic = NULL; + LLVMTypeRef ret_type = bld->vec_type; assert(type.floating); + assert(type.width == 32); assert(lp_check_value(type, a)); assert(util_cpu_caps.has_altivec); @@ -1555,9 +1564,12 @@ lp_build_round_altivec(struct lp_build_context *bld, case LP_BUILD_ROUND_TRUNCATE: intrinsic = "llvm.ppc.altivec.vrfiz"; break; + case LP_BUILD_IROUND: + ret_type = lp_build_int_vec_type(bld->gallivm, bld->type); + intrinsic = "llvm.ppc.altivec.vctsxs"; } - return lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, a); + return lp_build_intrinsic_unary(builder, intrinsic, ret_type, a); } static INLINE LLVMValueRef @@ -1565,7 +1577,9 @@ lp_build_round_arch(struct lp_build_context *bld, LLVMValueRef a, enum lp_build_round_mode mode) { - if (util_cpu_caps.has_sse4_1) + if (util_cpu_caps.has_sse2 && (mode == LP_BUILD_IROUND)) + return lp_build_iround_nearest_sse2(bld, a); + else if (util_cpu_caps.has_sse4_1) return lp_build_round_sse41(bld, a, mode); else /* (util_cpu_caps.has_altivec) */ return lp_build_round_altivec(bld, a, mode); @@ -1893,11 +1907,6 @@ lp_build_iround(struct lp_build_context *bld, assert(lp_check_value(type, a)); - if ((util_cpu_caps.has_sse2 && - ((type.width == 32) && (type.length == 1 || type.length == 4))) || - (util_cpu_caps.has_avx && type.width == 32 && type.length == 8)) { - return lp_build_iround_nearest_sse2(bld, a); - } if (arch_rounding_available(type)) { res = lp_build_round_arch(bld, a, LP_BUILD_ROUND_NEAREST); } -- 1.7.11.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev