From: Marek Olšák <marek.ol...@amd.com>

---
 src/amd/common/ac_llvm_build.c                | 19 +++++++++++++++++--
 src/amd/common/ac_llvm_build.h                |  1 +
 src/amd/common/ac_nir_to_llvm.c               |  2 +-
 .../drivers/radeonsi/si_shader_tgsi_mem.c     |  4 +---
 4 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index fc6dc396d38..ed510a34d6f 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -551,20 +551,36 @@ LLVMValueRef ac_build_expand_to_vec4(struct 
ac_llvm_context *ctx,
                }
                elemtype = LLVMTypeOf(value);
        }
 
        while (num_channels < 4)
                chan[num_channels++] = LLVMGetUndef(elemtype);
 
        return ac_build_gather_values(ctx, chan, 4);
 }
 
+LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+       unsigned type_size = ac_get_type_size(LLVMTypeOf(value));
+       const char *name;
+
+       if (type_size == 2)
+               name = "llvm.rint.f16";
+       else if (type_size == 4)
+               name = "llvm.rint.f32";
+       else
+               name = "llvm.rint.f64";
+
+       return ac_build_intrinsic(ctx, name, LLVMTypeOf(value), &value, 1,
+                                 AC_FUNC_ATTR_READNONE);
+}
+
 LLVMValueRef
 ac_build_fdiv(struct ac_llvm_context *ctx,
              LLVMValueRef num,
              LLVMValueRef den)
 {
        /* If we do (num / den), LLVM >= 7.0 does:
         *    return num * v_rcp_f32(den * (fabs(den) > 0x1.0p+96f ? 0x1.0p-32f 
: 1.0f));
         *
         * If we do (num * (1 / den)), LLVM does:
         *    return num * v_rcp_f32(den);
@@ -729,22 +745,21 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx,
                       LLVMValueRef *coords_arg,
                       LLVMValueRef *derivs_arg)
 {
 
        LLVMBuilderRef builder = ctx->builder;
        struct cube_selection_coords selcoords;
        LLVMValueRef coords[3];
        LLVMValueRef invma;
 
        if (is_array && !is_lod) {
-               LLVMValueRef tmp = coords_arg[3];
-               tmp = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &tmp, 
1, 0);
+               LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]);
 
                /* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
                 *
                 *    "For Array forms, the array layer used will be
                 *
                 *       max(0, min(d−1, floor(layer+0.5)))
                 *
                 *     where d is the depth of the texture array and layer
                 *     comes from the component indicated in the tables below.
                 *     Workaroudn for an issue where the layer is taken from a
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 83aad02183e..32d62450dfe 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -168,20 +168,21 @@ ac_build_gather_values_extended(struct ac_llvm_context 
*ctx,
                                unsigned value_stride,
                                bool load,
                                bool always_vector);
 LLVMValueRef
 ac_build_gather_values(struct ac_llvm_context *ctx,
                       LLVMValueRef *values,
                       unsigned value_count);
 LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
                                     LLVMValueRef value,
                                     unsigned num_channels);
+LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value);
 
 LLVMValueRef
 ac_build_fdiv(struct ac_llvm_context *ctx,
              LLVMValueRef num,
              LLVMValueRef den);
 
 LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
                                LLVMValueRef num,
                                LLVMValueRef multiplier,
                                LLVMValueRef pre_shift,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 312383db36c..ffc64a79d95 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3304,21 +3304,21 @@ static void tex_fetch_ptrs(struct ac_nir_context *ctx,
        }
        if (fmask_ptr && (instr->op == nir_texop_txf_ms ||
                          instr->op == nir_texop_samples_identical))
                *fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, 
AC_DESC_FMASK, instr, false, false);
 }
 
 static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx,
                                      LLVMValueRef coord)
 {
        coord = ac_to_float(ctx, coord);
-       coord = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &coord, 1, 
0);
+       coord = ac_build_round(ctx, coord);
        coord = ac_to_integer(ctx, coord);
        return coord;
 }
 
 static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 {
        LLVMValueRef result = NULL;
        struct ac_image_args args = { 0 };
        LLVMValueRef fmask_ptr = NULL, sample_index = NULL;
        LLVMValueRef ddx = NULL, ddy = NULL;
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index cabc448a082..8c44831bccb 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -1439,23 +1439,21 @@ static void build_tex_intrinsic(const struct 
lp_build_tgsi_action *action,
                                       opcode == TGSI_OPCODE_TXD,
                                       target == TGSI_TEXTURE_CUBE_ARRAY ||
                                       target == TGSI_TEXTURE_SHADOWCUBE_ARRAY,
                                       opcode == TGSI_OPCODE_LODQ,
                                       args.coords, args.derivs);
        } else if (tgsi_is_array_sampler(target) &&
                   opcode != TGSI_OPCODE_TXF &&
                   opcode != TGSI_OPCODE_TXF_LZ &&
                   ctx->screen->info.chip_class <= VI) {
                unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2;
-               args.coords[array_coord] =
-                       ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32,
-                                          &args.coords[array_coord], 1, 0);
+               args.coords[array_coord] = ac_build_round(&ctx->ac, 
args.coords[array_coord]);
        }
 
        /* 1D textures are allocated and used as 2D on GFX9. */
        if (ctx->screen->info.chip_class >= GFX9) {
                LLVMValueRef filler;
 
                /* Use 0.5, so that we don't sample the border color. */
                if (opcode == TGSI_OPCODE_TXF ||
                    opcode == TGSI_OPCODE_TXF_LZ)
                        filler = ctx->i32_0;
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to