Re: [Mesa-dev] [PATCH 01/11] radeonsi: use ac helpers for bitcasts

Nicolai Hähnle Mon, 02 Oct 2017 02:59:23 -0700

On 29.09.2017 16:49, Marek Olšák wrote:

From: Marek Olšák <marek.ol...@amd.com>


---
  src/gallium/drivers/radeonsi/si_shader.c           | 147 ++++++++-------------
  src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c  |  14 +-
  src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c  |  38 +++---
  .../drivers/radeonsi/si_shader_tgsi_setup.c        |  14 +-
  4 files changed, 86 insertions(+), 127 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 71f0f4a..c420573 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -252,22 +252,21 @@ build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
   */
  static LLVMValueRef unpack_param(struct si_shader_context *ctx,
                                 unsigned param, unsigned rshift,
                                 unsigned bitwidth)
  {
        struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef value = LLVMGetParam(ctx->main_fn,
                                          param);

if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMFloatTypeKind)

-               value = bitcast(&ctx->bld_base,
-                               TGSI_TYPE_UNSIGNED, value);
+               value = ac_to_integer(&ctx->ac, value);

if (rshift)

                value = LLVMBuildLShr(gallivm->builder, value,
                                      LLVMConstInt(ctx->i32, rshift, 0), "");

if (rshift + bitwidth < 32) {

                unsigned mask = (1 << bitwidth) - 1;
                value = LLVMBuildAnd(gallivm->builder, value,
                                     LLVMConstInt(ctx->i32, mask, 0), "");
        }
@@ -553,21 +552,21 @@ void si_llvm_load_input_vs(
                /* The hardware returns an unsigned value; convert it to a
                 * signed one.
                 */
                LLVMValueRef tmp = out[3];
                LLVMValueRef c30 = LLVMConstInt(ctx->i32, 30, 0);

/* First, recover the sign-extended signed integer value. */

                if (fix_fetch == SI_FIX_FETCH_A2_SSCALED)
                        tmp = LLVMBuildFPToUI(gallivm->builder, tmp, ctx->i32, 
"");
                else
-                       tmp = LLVMBuildBitCast(gallivm->builder, tmp, ctx->i32, 
"");
+                       tmp = ac_to_integer(&ctx->ac, tmp);

/* For the integer-like cases, do a natural sign extension.

                 *
                 * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
                 * and happen to contain 0, 1, 2, 3 as the two LSBs of the
                 * exponent.
                 */
                tmp = LLVMBuildShl(gallivm->builder, tmp,
                                   fix_fetch == SI_FIX_FETCH_A2_SNORM ?
                                   LLVMConstInt(ctx->i32, 7, 0) : c30, "");
@@ -583,67 +582,63 @@ void si_llvm_load_input_vs(
                } else if (fix_fetch == SI_FIX_FETCH_A2_SSCALED) {
                        tmp = LLVMBuildSIToFP(gallivm->builder, tmp, ctx->f32, 
"");
                }

out[3] = tmp;

                break;
        }
        case SI_FIX_FETCH_RGBA_32_UNORM:
        case SI_FIX_FETCH_RGBX_32_UNORM:
                for (chan = 0; chan < 4; chan++) {
-                       out[chan] = LLVMBuildBitCast(gallivm->builder, 
out[chan],
-                                                    ctx->i32, "");
+                       out[chan] = ac_to_integer(&ctx->ac, out[chan]);
                        out[chan] = LLVMBuildUIToFP(gallivm->builder,
                                                    out[chan], ctx->f32, "");
                        out[chan] = LLVMBuildFMul(gallivm->builder, out[chan],
                                                  LLVMConstReal(ctx->f32, 1.0 / 
UINT_MAX), "");
                }
                /* RGBX UINT returns 1 in alpha, which would be rounded to 0 by 
normalizing. */
                if (fix_fetch == SI_FIX_FETCH_RGBX_32_UNORM)
                        out[3] = LLVMConstReal(ctx->f32, 1);
                break;
        case SI_FIX_FETCH_RGBA_32_SNORM:
        case SI_FIX_FETCH_RGBX_32_SNORM:
        case SI_FIX_FETCH_RGBA_32_FIXED:
        case SI_FIX_FETCH_RGBX_32_FIXED: {
                double scale;
                if (fix_fetch >= SI_FIX_FETCH_RGBA_32_FIXED)
                        scale = 1.0 / 0x10000;
                else
                        scale = 1.0 / INT_MAX;

for (chan = 0; chan < 4; chan++) {

-                       out[chan] = LLVMBuildBitCast(gallivm->builder, 
out[chan],
-                                                    ctx->i32, "");
+                       out[chan] = ac_to_integer(&ctx->ac, out[chan]);
                        out[chan] = LLVMBuildSIToFP(gallivm->builder,
                                                    out[chan], ctx->f32, "");
                        out[chan] = LLVMBuildFMul(gallivm->builder, out[chan],
                                                  LLVMConstReal(ctx->f32, scale), 
"");
                }
                /* RGBX SINT returns 1 in alpha, which would be rounded to 0 by 
normalizing. */
                if (fix_fetch == SI_FIX_FETCH_RGBX_32_SNORM ||
                    fix_fetch == SI_FIX_FETCH_RGBX_32_FIXED)
                        out[3] = LLVMConstReal(ctx->f32, 1);
                break;
        }
        case SI_FIX_FETCH_RGBA_32_USCALED:
                for (chan = 0; chan < 4; chan++) {
-                       out[chan] = LLVMBuildBitCast(gallivm->builder, 
out[chan],
-                                                    ctx->i32, "");
+                       out[chan] = ac_to_integer(&ctx->ac, out[chan]);
                        out[chan] = LLVMBuildUIToFP(gallivm->builder,
                                                    out[chan], ctx->f32, "");
                }
                break;
        case SI_FIX_FETCH_RGBA_32_SSCALED:
                for (chan = 0; chan < 4; chan++) {
-                       out[chan] = LLVMBuildBitCast(gallivm->builder, 
out[chan],
-                                                    ctx->i32, "");
+                       out[chan] = ac_to_integer(&ctx->ac, out[chan]);
                        out[chan] = LLVMBuildSIToFP(gallivm->builder,
                                                    out[chan], ctx->f32, "");
                }
                break;
        case SI_FIX_FETCH_RG_64_FLOAT:
                for (chan = 0; chan < 2; chan++)
                        out[chan] = extract_double_to_float(ctx, input[0], 
chan);

out[2] = LLVMConstReal(ctx->f32, 0);

                out[3] = LLVMConstReal(ctx->f32, 1);
@@ -666,22 +661,21 @@ void si_llvm_load_input_vs(
        case SI_FIX_FETCH_RGB_16_INT:
                for (chan = 0; chan < 3; chan++) {
                        out[chan] = LLVMBuildExtractElement(gallivm->builder,
                                                            input[chan],
                                                            ctx->i32_0, "");
                }
                if (fix_fetch == SI_FIX_FETCH_RGB_8 ||
                    fix_fetch == SI_FIX_FETCH_RGB_16) {
                        out[3] = LLVMConstReal(ctx->f32, 1);
                } else {
-                       out[3] = LLVMBuildBitCast(gallivm->builder, ctx->i32_1,
-                                                 ctx->f32, "");
+                       out[3] = ac_to_float(&ctx->ac, ctx->i32_1);
                }
                break;
        }
  }

static void declare_input_vs(

        struct si_shader_context *ctx,
        unsigned input_index,
        const struct tgsi_full_declaration *decl,
        LLVMValueRef out[4])
@@ -1066,26 +1060,25 @@ static LLVMValueRef lds_load(struct 
lp_build_tgsi_context *bld_base,
   *
   * \param swizzle     offset (typically 0..3)
   * \param dw_addr     address in dwords
   * \param value               value to store
   */
  static void lds_store(struct lp_build_tgsi_context *bld_base,
                      unsigned dw_offset_imm, LLVMValueRef dw_addr,
                      LLVMValueRef value)
  {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = &ctx->gallivm;

dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,

                            LLVMConstInt(ctx->i32, dw_offset_imm, 0));

- value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");

+       value = ac_to_integer(&ctx->ac, value);
        ac_build_indexed_store(&ctx->ac, ctx->lds,
                               dw_addr, value);
  }

static LLVMValueRef desc_from_addr_base64k(struct si_shader_context *ctx,

                                                  unsigned param)
  {
        LLVMBuilderRef builder = ctx->gallivm.builder;

LLVMValueRef addr = LLVMGetParam(ctx->main_fn, param);

@@ -1219,21 +1212,21 @@ static void store_output_tcs(struct 
lp_build_tgsi_context *bld_base,
                chan_index = u_bit_scan(&writemask);
                LLVMValueRef value = dst[chan_index];

if (inst->Instruction.Saturate)

                        value = ac_build_clamp(&ctx->ac, value);

/* Skip LDS stores if there is no LDS read of this output. */

                if (!skip_lds_store)
                        lds_store(bld_base, chan_index, dw_addr, value);

- value = LLVMBuildBitCast(gallivm->builder, value, ctx->i32, "");

+               value = ac_to_integer(&ctx->ac, value);
                values[chan_index] = value;

if (reg->Register.WriteMask != 0xF && !is_tess_factor) {

                        ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
                                                    buf_addr, base,
                                                    4 * chan_index, 1, 0, true, 
false);
                }

/* Write tess factors into VGPRs for the epilog. */

                if (is_tess_factor &&
@@ -2079,42 +2072,38 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
                args->compr = 1; /* COMPR flag */

for (chan = 0; chan < 2; chan++) {

                        LLVMValueRef pack_args[2] = {
                                values[2 * chan],
                                values[2 * chan + 1]
                        };
                        LLVMValueRef packed;

packed = ac_build_cvt_pkrtz_f16(&ctx->ac, pack_args);

-                       args->out[chan] =
-                               LLVMBuildBitCast(ctx->gallivm.builder,
-                                                packed, ctx->f32, "");
+                       args->out[chan] = ac_to_float(&ctx->ac, packed);
                }
                break;

case V_028714_SPI_SHADER_UNORM16_ABGR:

                for (chan = 0; chan < 4; chan++) {
                        val[chan] = ac_build_clamp(&ctx->ac, values[chan]);
                        val[chan] = LLVMBuildFMul(builder, val[chan],
                                                  LLVMConstReal(ctx->f32, 65535), 
"");
                        val[chan] = LLVMBuildFAdd(builder, val[chan],
                                                  LLVMConstReal(ctx->f32, 0.5), 
"");
                        val[chan] = LLVMBuildFPToUI(builder, val[chan],
                                                    ctx->i32, "");
                }

args->compr = 1; /* COMPR flag */

-               args->out[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
-                                 si_llvm_pack_two_int16(ctx, val));
-               args->out[1] = bitcast(bld_base, TGSI_TYPE_FLOAT,
-                                 si_llvm_pack_two_int16(ctx, val+2));
+               args->out[0] = ac_to_float(&ctx->ac, 
si_llvm_pack_two_int16(ctx, val));
+               args->out[1] = ac_to_float(&ctx->ac, 
si_llvm_pack_two_int16(ctx, val+2));
                break;

case V_028714_SPI_SHADER_SNORM16_ABGR:

                for (chan = 0; chan < 4; chan++) {
                        /* Clamp between [-1, 1]. */
                        val[chan] = lp_build_emit_llvm_binary(bld_base, 
TGSI_OPCODE_MIN,
                                                              values[chan],
                                                              
LLVMConstReal(ctx->f32, 1));
                        val[chan] = lp_build_emit_llvm_binary(bld_base, 
TGSI_OPCODE_MAX,
                                                              val[chan],
@@ -2126,74 +2115,68 @@ static void si_llvm_init_export_args(struct 
lp_build_tgsi_context *bld_base,
                        val[chan] = LLVMBuildFAdd(builder, val[chan],
                                        LLVMBuildSelect(builder,
                                                LLVMBuildFCmp(builder, 
LLVMRealOGE,
                                                              val[chan], base->zero, 
""),
                                                LLVMConstReal(ctx->f32, 0.5),
                                                LLVMConstReal(ctx->f32, -0.5), ""), 
"");
                        val[chan] = LLVMBuildFPToSI(builder, val[chan], ctx->i32, 
"");
                }

args->compr = 1; /* COMPR flag */

-               args->out[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
-                                 si_llvm_pack_two_int32_as_int16(ctx, val));
-               args->out[1] = bitcast(bld_base, TGSI_TYPE_FLOAT,
-                                 si_llvm_pack_two_int32_as_int16(ctx, val+2));
+               args->out[0] = ac_to_float(&ctx->ac, 
si_llvm_pack_two_int32_as_int16(ctx, val));
+               args->out[1] = ac_to_float(&ctx->ac, 
si_llvm_pack_two_int32_as_int16(ctx, val+2));
                break;

case V_028714_SPI_SHADER_UINT16_ABGR: {

                LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
                        is_int8 ? 255 : is_int10 ? 1023 : 65535, 0);
                LLVMValueRef max_alpha =
                        !is_int10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);

/* Clamp. */

                for (chan = 0; chan < 4; chan++) {
-                       val[chan] = bitcast(bld_base, TGSI_TYPE_UNSIGNED, 
values[chan]);
+                       val[chan] = ac_to_integer(&ctx->ac, values[chan]);
                        val[chan] = lp_build_emit_llvm_binary(bld_base, 
TGSI_OPCODE_UMIN,
                                        val[chan],
                                        chan == 3 ? max_alpha : max_rgb);
                }

args->compr = 1; /* COMPR flag */

-               args->out[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
-                                 si_llvm_pack_two_int16(ctx, val));
-               args->out[1] = bitcast(bld_base, TGSI_TYPE_FLOAT,
-                                 si_llvm_pack_two_int16(ctx, val+2));
+               args->out[0] = ac_to_float(&ctx->ac, 
si_llvm_pack_two_int16(ctx, val));
+               args->out[1] = ac_to_float(&ctx->ac, 
si_llvm_pack_two_int16(ctx, val+2));
                break;
        }

case V_028714_SPI_SHADER_SINT16_ABGR: {

                LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
                        is_int8 ? 127 : is_int10 ? 511 : 32767, 0);
                LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
                        is_int8 ? -128 : is_int10 ? -512 : -32768, 0);
                LLVMValueRef max_alpha =
                        !is_int10 ? max_rgb : ctx->i32_1;
                LLVMValueRef min_alpha =
                        !is_int10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);

/* Clamp. */

                for (chan = 0; chan < 4; chan++) {
-                       val[chan] = bitcast(bld_base, TGSI_TYPE_UNSIGNED, 
values[chan]);
+                       val[chan] = ac_to_integer(&ctx->ac, values[chan]);
                        val[chan] = lp_build_emit_llvm_binary(bld_base,
                                        TGSI_OPCODE_IMIN,
                                        val[chan], chan == 3 ? max_alpha : 
max_rgb);
                        val[chan] = lp_build_emit_llvm_binary(bld_base,
                                        TGSI_OPCODE_IMAX,
                                        val[chan], chan == 3 ? min_alpha : 
min_rgb);
                }

args->compr = 1; /* COMPR flag */

-               args->out[0] = bitcast(bld_base, TGSI_TYPE_FLOAT,
-                                 si_llvm_pack_two_int32_as_int16(ctx, val));
-               args->out[1] = bitcast(bld_base, TGSI_TYPE_FLOAT,
-                                 si_llvm_pack_two_int32_as_int16(ctx, val+2));
+               args->out[0] = ac_to_float(&ctx->ac, 
si_llvm_pack_two_int32_as_int16(ctx, val));
+               args->out[1] = ac_to_float(&ctx->ac, 
si_llvm_pack_two_int32_as_int16(ctx, val+2));
                break;
        }

case V_028714_SPI_SHADER_32_ABGR:

                memcpy(&args->out[0], values, sizeof(values[0]) * 4);
                break;
        }
  }

static void si_alpha_test(struct lp_build_tgsi_context *bld_base,

@@ -2225,21 +2208,21 @@ static LLVMValueRef 
si_scale_alpha_by_sample_mask(struct lp_build_tgsi_context *
                                                  LLVMValueRef alpha,
                                                  unsigned samplemask_param)
  {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMValueRef coverage;

/* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */

        coverage = LLVMGetParam(ctx->main_fn,
                                samplemask_param);
-       coverage = bitcast(bld_base, TGSI_TYPE_SIGNED, coverage);
+       coverage = ac_to_integer(&ctx->ac, coverage);

coverage = lp_build_intrinsic(gallivm->builder, "llvm.ctpop.i32",

                                   ctx->i32,
                                   &coverage, 1, LP_FUNC_ATTR_READNONE);

coverage = LLVMBuildUIToFP(gallivm->builder, coverage,

                                   ctx->f32, "");

coverage = LLVMBuildFMul(gallivm->builder, coverage,

                                 LLVMConstReal(ctx->f32,
@@ -2328,23 +2311,21 @@ static void emit_streamout_output(struct 
si_shader_context *ctx,
        LLVMValueRef out[4];

assert(num_comps && num_comps <= 4);

        if (!num_comps || num_comps > 4)
                return;

/* Load the output as int. */

        for (int j = 0; j < num_comps; j++) {
                assert(stream_out->stream == shader_out->vertex_stream[start + 
j]);

- out[j] = LLVMBuildBitCast(builder,

-                                         shader_out->values[start + j],
-                               ctx->i32, "");
+               out[j] = ac_to_integer(&ctx->ac, shader_out->values[start + j]);
        }

/* Pack the output. */

        LLVMValueRef vdata = NULL;

switch (num_comps) {

        case 1: /* as i32 */
                vdata = out[0];
                break;
        case 2: /* as v2i32 */
@@ -2594,42 +2575,39 @@ static void si_llvm_export_vs(struct 
lp_build_tgsi_context *bld_base,
                        /* The output is a float, but the hw expects an integer
                         * with the first bit containing the edge flag. */
                        edgeflag_value = LLVMBuildFPToUI(ctx->gallivm.builder,
                                                         edgeflag_value,
                                                         ctx->i32, "");
                        edgeflag_value = ac_build_umin(&ctx->ac,
                                                      edgeflag_value,
                                                      ctx->i32_1);

/* The LLVM intrinsic expects a float. */

-                       pos_args[1].out[1] = 
LLVMBuildBitCast(ctx->gallivm.builder,
-                                                         edgeflag_value,
-                                                         ctx->f32, "");
+                       pos_args[1].out[1] = ac_to_float(&ctx->ac, 
edgeflag_value);
                }

if (ctx->screen->b.chip_class >= GFX9) {

                        /* GFX9 has the layer in out.z[10:0] and the viewport
                         * index in out.z[19:16].
                         */
                        if (shader->selector->info.writes_layer)
                                pos_args[1].out[2] = layer_value;

if (shader->selector->info.writes_viewport_index) {

                                LLVMValueRef v = viewport_index_value;

- v = bitcast(bld_base, TGSI_TYPE_UNSIGNED, v);

+                               v = ac_to_integer(&ctx->ac, v);
                                v = LLVMBuildShl(ctx->gallivm.builder, v,
                                                 LLVMConstInt(ctx->i32, 16, 0), 
"");
                                v = LLVMBuildOr(ctx->gallivm.builder, v,
-                                               bitcast(bld_base, 
TGSI_TYPE_UNSIGNED,
-                                                       pos_args[1].out[2]), 
"");
-                               pos_args[1].out[2] = bitcast(bld_base, 
TGSI_TYPE_FLOAT, v);
+                                               ac_to_integer(&ctx->ac,  
pos_args[1].out[2]), "");
+                               pos_args[1].out[2] = ac_to_float(&ctx->ac, v);
                                pos_args[1].enabled_channels |= 1 << 2;
                        }
                } else {
                        if (shader->selector->info.writes_layer)
                                pos_args[1].out[2] = layer_value;

if (shader->selector->info.writes_viewport_index) {

                                pos_args[1].out[3] = viewport_index_value;
                                pos_args[1].enabled_channels |= 1 << 3;
                        }
@@ -2888,21 +2866,21 @@ si_insert_input_ret(struct si_shader_context *ctx, 
LLVMValueRef ret,
  }

static LLVMValueRef

  si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueRef ret,
                          unsigned param, unsigned return_index)
  {
        LLVMBuilderRef builder = ctx->gallivm.builder;
        LLVMValueRef p = LLVMGetParam(ctx->main_fn, param);

return LLVMBuildInsertValue(builder, ret,

-                                   LLVMBuildBitCast(builder, p, ctx->f32, ""),
+                                   ac_to_float(&ctx->ac, p),
                                    return_index, "");
  }

static LLVMValueRef

  si_insert_input_ptr_as_2xi32(struct si_shader_context *ctx, LLVMValueRef ret,
                             unsigned param, unsigned return_index)
  {
        LLVMBuilderRef builder = ctx->gallivm.builder;
        LLVMValueRef ptr, lo, hi;

@@ -2974,39 +2952,39 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)

                                          GFX6_SGPR_TCS_FACTOR_ADDR_BASE64K);
                /* Tess offchip and tess factor offsets are after user SGPRs. */
                ret = si_insert_input_ret(ctx, ret, 
ctx->param_tcs_offchip_offset,
                                          GFX6_TCS_NUM_USER_SGPR);
                ret = si_insert_input_ret(ctx, ret, 
ctx->param_tcs_factor_offset,
                                          GFX6_TCS_NUM_USER_SGPR + 1);
                vgpr = GFX6_TCS_NUM_USER_SGPR + 2;
        }

/* VGPRs */

-       rel_patch_id = bitcast(bld_base, TGSI_TYPE_FLOAT, rel_patch_id);
-       invocation_id = bitcast(bld_base, TGSI_TYPE_FLOAT, invocation_id);
-       tf_lds_offset = bitcast(bld_base, TGSI_TYPE_FLOAT, tf_lds_offset);
+       rel_patch_id = ac_to_float(&ctx->ac, rel_patch_id);
+       invocation_id = ac_to_float(&ctx->ac, invocation_id);
+       tf_lds_offset = ac_to_float(&ctx->ac, tf_lds_offset);

/* Leave a hole corresponding to the two input VGPRs. This ensures that

         * the invocation_id output does not alias the param_tcs_rel_ids input,
         * which saves a V_MOV on gfx9.
         */
        vgpr += 2;

ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");

        ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");

if (ctx->shader->selector->tcs_info.tessfactors_are_def_in_all_invocs) {

                vgpr++; /* skip the tess factor LDS offset */
                for (unsigned i = 0; i < 6; i++) {
                        LLVMValueRef value =
                                LLVMBuildLoad(builder, ctx->invoc0_tess_factors[i], 
"");
-                       value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
+                       value = ac_to_float(&ctx->ac, value);
                        ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, 
"");
                }
        } else {
                ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, 
"");
        }
        ctx->return_value = ret;
  }

/* Pass TCS inputs from LS to TCS on GFX9. */

  static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
@@ -3159,21 +3137,21 @@ static void si_llvm_emit_es_epilogue(struct 
lp_build_tgsi_context *bld_base)

if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX ||

                    info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
                        continue;

param = si_shader_io_get_unique_index(info->output_semantic_name[i],

                                                      
info->output_semantic_index[i]);

for (chan = 0; chan < 4; chan++) {

                        LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, 
out_ptr[chan], "");
-                       out_val = LLVMBuildBitCast(gallivm->builder, out_val, ctx->i32, 
"");
+                       out_val = ac_to_integer(&ctx->ac, out_val);

/* GFX9 has the ESGS ring in LDS. */

                        if (ctx->screen->b.chip_class >= GFX9) {
                                lds_store(bld_base, param * 4 + chan, lds_base, 
out_val);
                                continue;
                        }

ac_build_buffer_store_dword(&ctx->ac,

                                                    ctx->esgs_ring,
                                                    out_val, 1, NULL, soffset,
@@ -3272,22 +3250,21 @@ static void si_llvm_emit_vs_epilogue(struct 
ac_shader_abi *abi,
                }
        }

if (ctx->shader->selector->so.num_outputs)

                si_llvm_emit_streamout(ctx, outputs, i, 0);

/* Export PrimitiveID. */

        if (ctx->shader->key.mono.u.vs_export_prim_id) {
                outputs[i].semantic_name = TGSI_SEMANTIC_PRIMID;
                outputs[i].semantic_index = 0;
-               outputs[i].values[0] = LLVMBuildBitCast(gallivm->builder,
-                               get_primitive_id(ctx, 0), ctx->f32, "");
+               outputs[i].values[0] = ac_to_float(&ctx->ac, 
get_primitive_id(ctx, 0));
                for (j = 1; j < 4; j++)
                        outputs[i].values[j] = LLVMConstReal(ctx->f32, 0);

memset(outputs[i].vertex_stream, 0,

                       sizeof(outputs[i].vertex_stream));
                i++;
        }

si_llvm_export_vs(&ctx->bld_base, outputs, i);

        FREE(outputs);
@@ -3350,24 +3327,24 @@ static void si_export_mrt_z(struct 
lp_build_tgsi_context *bld_base,
        args.out[1] = base->undef; /* G, stencil test value[0:7], stencil op 
value[8:15] */
        args.out[2] = base->undef; /* B, sample mask */
        args.out[3] = base->undef; /* A, alpha to mask */

if (format == V_028710_SPI_SHADER_UINT16_ABGR) {

                assert(!depth);
                args.compr = 1; /* COMPR flag */

if (stencil) {

                        /* Stencil should be in X[23:16]. */
-                       stencil = bitcast(bld_base, TGSI_TYPE_UNSIGNED, 
stencil);
+                       stencil = ac_to_integer(&ctx->ac, stencil);
                        stencil = LLVMBuildShl(ctx->gallivm.builder, stencil,
                                               LLVMConstInt(ctx->i32, 16, 0), 
"");
-                       args.out[0] = bitcast(bld_base, TGSI_TYPE_FLOAT, 
stencil);
+                       args.out[0] = ac_to_float(&ctx->ac, stencil);
                        mask |= 0x3;
                }
                if (samplemask) {
                        /* SampleMask should be in Y[15:0]. */
                        args.out[1] = samplemask;
                        mask |= 0xc;
                }
        } else {
                if (depth) {
                        args.out[0] = depth;
@@ -3549,24 +3526,23 @@ static void si_llvm_return_fs_outputs(struct 
ac_shader_abi *abi,
                        fprintf(stderr, "Warning: SI unhandled fs output 
type:%d\n",
                                semantic_name);
                }
        }

/* Fill the return structure. */

        ret = ctx->return_value;

/* Set SGPRs. */

        ret = LLVMBuildInsertValue(builder, ret,
-                                  LLVMBuildBitCast(ctx->ac.builder,
-                                               LLVMGetParam(ctx->main_fn,
-                                                       SI_PARAM_ALPHA_REF),
-                                               ctx->i32, ""),
+                                  ac_to_integer(&ctx->ac,
+                                                 LLVMGetParam(ctx->main_fn,
+                                                              
SI_PARAM_ALPHA_REF)),
                                   SI_SGPR_ALPHA_REF, "");

/* Set VGPRs */

        first_vgpr = vgpr = SI_SGPR_ALPHA_REF + 1;
        for (i = 0; i < ARRAY_SIZE(color); i++) {
                if (!color[i][0])
                        continue;

for (j = 0; j < 4; j++)

                        ret = LLVMBuildInsertValue(builder, ret, color[i][j], vgpr++, 
"");
@@ -3648,37 +3624,36 @@ LLVMTypeRef si_const_array(LLVMTypeRef elem_type, int 
num_elements)
        return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
                               CONST_ADDR_SPACE);
  }

static void si_llvm_emit_ddxy(

        const struct lp_build_tgsi_action *action,
        struct lp_build_tgsi_context *bld_base,
        struct lp_build_emit_data *emit_data)
  {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = &ctx->gallivm;
        unsigned opcode = emit_data->info->opcode;
        LLVMValueRef val;
        int idx;
        unsigned mask;

if (opcode == TGSI_OPCODE_DDX_FINE)

                mask = AC_TID_MASK_LEFT;
        else if (opcode == TGSI_OPCODE_DDY_FINE)
                mask = AC_TID_MASK_TOP;
        else
                mask = AC_TID_MASK_TOP_LEFT;

/* for DDX we want to next X pixel, DDY next Y pixel. */

        idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? 1 
: 2;

- val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");

+       val = ac_to_integer(&ctx->ac, emit_data->args[0]);
        val = ac_build_ddxy(&ctx->ac, mask, idx, val);
        emit_data->output[emit_data->chan] = val;
  }

/*

   * this takes an I,J coordinate pair,
   * and works out the X and Y derivatives.
   * it returns DDX(I), DDX(J), DDY(I), DDY(J).
   */
  static LLVMValueRef si_llvm_emit_ddxy_interp(
@@ -3720,22 +3695,21 @@ static void interp_fetch_args(
        } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) {
                LLVMValueRef sample_position;
                LLVMValueRef sample_id;
                LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f);

/* fetch sample ID, then fetch its sample position,

                 * and place into first two channels.
                 */
                sample_id = lp_build_emit_fetch(bld_base,
                                                emit_data->inst, 1, 
TGSI_CHAN_X);
-               sample_id = LLVMBuildBitCast(gallivm->builder, sample_id,
-                                            ctx->i32, "");
+               sample_id = ac_to_integer(&ctx->ac, sample_id);

/* Section 8.13.2 (Interpolation Functions) of the OpenGL Shading

                 * Language 4.50 spec says about interpolateAtSample:
                 *
                 *    "Returns the value of the input interpolant variable at
                 *     the location of sample number sample. If multisample
                 *     buffers are not available, the input variable will be
                 *     evaluated at the center of the pixel. If sample sample
                 *     does not exist, the position used to interpolate the
                 *     input variable is undefined."
@@ -3844,49 +3818,44 @@ static void build_interp_intrinsic(const struct 
lp_build_tgsi_action *action,
                        LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, 0);
                        LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, 0);
                        LLVMValueRef ddx_el = 
LLVMBuildExtractElement(gallivm->builder,
                                                                      ddxy_out, ix_ll, 
"");
                        LLVMValueRef ddy_el = 
LLVMBuildExtractElement(gallivm->builder,
                                                                      ddxy_out, iy_ll, 
"");
                        LLVMValueRef interp_el = 
LLVMBuildExtractElement(gallivm->builder,
                                                                         interp_param, 
ix_ll, "");
                        LLVMValueRef temp1, temp2;

- interp_el = LLVMBuildBitCast(gallivm->builder, interp_el,

-                                                    ctx->f32, "");
+                       interp_el = ac_to_float(&ctx->ac, interp_el);

temp1 = LLVMBuildFMul(gallivm->builder, ddx_el, emit_data->args[0], "");temp1 = LLVMBuildFAdd(gallivm->builder, temp1, interp_el, "");temp2 = LLVMBuildFMul(gallivm->builder, ddy_el, emit_data->args[1], "");ij_out[i] = LLVMBuildFAdd(gallivm->builder, temp2, temp1, "");

                }
                interp_param = lp_build_gather_values(gallivm, ij_out, 2);
        }

- if (interp_param) {

-               interp_param = LLVMBuildBitCast(gallivm->builder,
-                       interp_param, LLVMVectorType(ctx->f32, 2), "");
-       }
+       if (interp_param)
+               interp_param = ac_to_float(&ctx->ac, interp_param);

for (chan = 0; chan < 4; chan++) {

                LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->f32, 
input_array_size));
                unsigned schan = 
tgsi_util_get_full_src_register_swizzle(&inst->Src[0], chan);

for (unsigned idx = 0; idx < input_array_size; ++idx) {

                        LLVMValueRef v, i = NULL, j = NULL;

if (interp_param) {

-                               interp_param = 
LLVMBuildBitCast(gallivm->builder,
-                                       interp_param, LLVMVectorType(ctx->f32, 2), 
"");
                                i = LLVMBuildExtractElement(
                                        gallivm->builder, interp_param, ctx->i32_0, 
"");
                                j = LLVMBuildExtractElement(
                                        gallivm->builder, interp_param, ctx->i32_1, 
"");
                        }
                        v = si_build_fs_interp(ctx, input_base + idx, schan,
                                               prim_mask, i, j);

gather = LLVMBuildInsertElement(gallivm->builder,

                                gather, v, LLVMConstInt(ctx->i32, idx, false), 
"");
@@ -3965,31 +3934,28 @@ static void read_invoc_fetch_args(
                                                 1, TGSI_CHAN_X);
        emit_data->arg_count = 2;
  }

static void read_lane_emit(

        const struct lp_build_tgsi_action *action,
        struct lp_build_tgsi_context *bld_base,
        struct lp_build_emit_data *emit_data)
  {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMBuilderRef builder = ctx->gallivm.builder;

/* We currently have no other way to prevent LLVM from lifting the icmp

         * calls to a dominating basic block.
         */
        ac_build_optimization_barrier(&ctx->ac, &emit_data->args[0]);

- for (unsigned i = 0; i < emit_data->arg_count; ++i) {

-               emit_data->args[i] = LLVMBuildBitCast(builder, 
emit_data->args[i],
-                                                     ctx->i32, "");
-       }
+       for (unsigned i = 0; i < emit_data->arg_count; ++i)
+               emit_data->args[i] = ac_to_integer(&ctx->ac, 
emit_data->args[i]);

emit_data->output[emit_data->chan] =

                ac_build_intrinsic(&ctx->ac, action->intr_name,
                                   ctx->i32, emit_data->args, 
emit_data->arg_count,
                                   AC_FUNC_ATTR_READNONE |
                                   AC_FUNC_ATTR_CONVERGENT);
  }

static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,

                                       struct lp_build_emit_data *emit_data)
@@ -4067,21 +4033,21 @@ static void si_llvm_emit_vertex(

LLVMValueRef out_val = LLVMBuildLoad(gallivm->builder, out_ptr[chan], "");

                        LLVMValueRef voffset =
                                LLVMConstInt(ctx->i32, offset *
                                             
shader->selector->gs_max_out_vertices, 0);
                        offset++;

voffset = lp_build_add(uint, voffset, gs_next_vertex);

                        voffset = lp_build_mul_imm(uint, voffset, 4);

- out_val = LLVMBuildBitCast(gallivm->builder, out_val, ctx->i32, "");

+                       out_val = ac_to_integer(&ctx->ac, out_val);

ac_build_buffer_store_dword(&ctx->ac,

                                                    ctx->gsvs_ring[stream],
                                                    out_val, 1,
                                                    voffset, soffset, 0,
                                                    1, 1, true, true);
                }
        }

gs_next_vertex = lp_build_add(uint, gs_next_vertex,

@@ -4810,21 +4776,21 @@ static void si_llvm_emit_polygon_stipple(struct 
si_shader_context *ctx,
        address[1] = unpack_param(ctx, param_pos_fixed_pt, 16, 5);

/* Load the buffer descriptor. */

        slot = LLVMConstInt(ctx->i32, SI_PS_CONST_POLY_STIPPLE, 0);
        desc = ac_build_indexed_load_const(&ctx->ac, param_rw_buffers, slot);

/* The stipple pattern is 32x32, each row has 32 bits. */

        offset = LLVMBuildMul(builder, address[1],
                              LLVMConstInt(ctx->i32, 4, 0), "");
        row = buffer_load_const(ctx, desc, offset);
-       row = LLVMBuildBitCast(builder, row, ctx->i32, "");
+       row = ac_to_integer(&ctx->ac, row);
        bit = LLVMBuildLShr(builder, row, address[0], "");
        bit = LLVMBuildTrunc(builder, bit, ctx->i1, "");

/* The intrinsic kills the thread if arg < 0. */

        bit = LLVMBuildSelect(builder, bit, LLVMConstReal(ctx->f32, 0),
                              LLVMConstReal(ctx->f32, -1), "");
        ac_build_kill(&ctx->ac, bit);
  }

void si_shader_binary_read_config(struct ac_shader_binary *binary,

@@ -6053,21 +6019,21 @@ static void si_build_gs_prolog_function(struct 
si_shader_context *ctx,
        /* Copy inputs to outputs. This should be no-op, as the registers match,
         * but it will prevent the compiler from overwriting them 
unintentionally.
         */
        ret = ctx->return_value;
        for (unsigned i = 0; i < num_sgprs; i++) {
                LLVMValueRef p = LLVMGetParam(func, i);
                ret = LLVMBuildInsertValue(builder, ret, p, i, "");
        }
        for (unsigned i = 0; i < num_vgprs; i++) {
                LLVMValueRef p = LLVMGetParam(func, num_sgprs + i);
-               p = LLVMBuildBitCast(builder, p, ctx->f32, "");
+               p = ac_to_float(&ctx->ac, p);
                ret = LLVMBuildInsertValue(builder, ret, p, num_sgprs + i, "");
        }

if (key->gs_prolog.states.tri_strip_adj_fix) {

                /* Remap the input vertices for every other primitive. */
                const unsigned gfx6_vtx_params[6] = {
                        num_sgprs,
                        num_sgprs + 1,
                        num_sgprs + 3,
                        num_sgprs + 4,
@@ -6102,29 +6068,29 @@ static void si_build_gs_prolog_function(struct 
si_shader_context *ctx,
                        vtx_out[i] = LLVMBuildSelect(builder, rotate, rotated, base, 
"");
                }

if (ctx->screen->b.chip_class >= GFX9) {

                        for (unsigned i = 0; i < 3; i++) {
                                LLVMValueRef hi, out;

hi = LLVMBuildShl(builder, vtx_out[i*2+1],

                                                  LLVMConstInt(ctx->i32, 16, 0), 
"");
                                out = LLVMBuildOr(builder, vtx_out[i*2], hi, 
"");
-                               out = LLVMBuildBitCast(builder, out, ctx->f32, 
"");
+                               out = ac_to_float(&ctx->ac, out);
                                ret = LLVMBuildInsertValue(builder, ret, out,
                                                           gfx9_vtx_params[i], 
"");
                        }
                } else {
                        for (unsigned i = 0; i < 6; i++) {
                                LLVMValueRef out;

- out = LLVMBuildBitCast(builder, vtx_out[i], ctx->f32, "");

+                               out = ac_to_float(&ctx->ac, vtx_out[i]);
                                ret = LLVMBuildInsertValue(builder, ret, out,
                                                           gfx6_vtx_params[i], 
"");
                        }
                }
        }

LLVMBuildRet(builder, ret);

/**

@@ -6866,21 +6832,21 @@ static void si_build_vs_prolog_function(struct 
si_shader_context *ctx,
        /* Copy inputs to outputs. This should be no-op, as the registers match,
         * but it will prevent the compiler from overwriting them 
unintentionally.
         */
        ret = ctx->return_value;
        for (i = 0; i < key->vs_prolog.num_input_sgprs; i++) {
                LLVMValueRef p = LLVMGetParam(func, i);
                ret = LLVMBuildInsertValue(gallivm->builder, ret, p, i, "");
        }
        for (i = 0; i < num_input_vgprs; i++) {
                LLVMValueRef p = input_vgprs[i];
-               p = LLVMBuildBitCast(gallivm->builder, p, ctx->f32, "");
+               p = ac_to_float(&ctx->ac, p);
                ret = LLVMBuildInsertValue(gallivm->builder, ret, p,
                                           key->vs_prolog.num_input_sgprs + i, 
"");
        }

/* Compute vertex load indices from instance divisors. */

        LLVMValueRef instance_divisor_constbuf = NULL;

if (key->vs_prolog.states.instance_divisor_is_fetched) {

                LLVMValueRef list = si_prolog_get_rw_buffers(ctx);
                LLVMValueRef buf_index =
@@ -6895,38 +6861,37 @@ static void si_build_vs_prolog_function(struct 
si_shader_context *ctx,
                bool divisor_is_fetched =
                        key->vs_prolog.states.instance_divisor_is_fetched & (1u 
<< i);
                LLVMValueRef index;

if (divisor_is_one || divisor_is_fetched) {

                        LLVMValueRef divisor = ctx->i32_1;

if (divisor_is_fetched) {

                                divisor = buffer_load_const(ctx, 
instance_divisor_constbuf,
                                                            
LLVMConstInt(ctx->i32, i * 4, 0));
-                               divisor = LLVMBuildBitCast(gallivm->builder, 
divisor,
-                                                          ctx->i32, "");
+                               divisor = ac_to_integer(&ctx->ac, divisor);
                        }

/* InstanceID / Divisor + StartInstance */

                        index = get_instance_index_for_fetch(ctx,
                                                             user_sgpr_base +
                                                             
SI_SGPR_START_INSTANCE,
                                                             divisor);
                } else {
                        /* VertexID + BaseVertex */
                        index = LLVMBuildAdd(gallivm->builder,
                                             ctx->abi.vertex_id,
                                             LLVMGetParam(func, user_sgpr_base +
                                                                SI_SGPR_BASE_VERTEX), 
"");
                }

- index = LLVMBuildBitCast(gallivm->builder, index, ctx->f32, "");

+               index = ac_to_float(&ctx->ac, index);
                ret = LLVMBuildInsertValue(gallivm->builder, ret, index,
                                           fninfo.num_params + i, "");
        }

si_llvm_build_ret(ctx, ret);

static bool si_get_vs_prolog(struct si_screen *sscreen,

                             LLVMTargetMachineRef tm,
                             struct si_shader *shader,
@@ -7310,21 +7275,21 @@ static void si_build_ps_prolog_function(struct 
si_shader_context *ctx,
                        interp[1] = LLVMBuildExtractValue(gallivm->builder, ret,
                                                          interp_vgpr + 1, "");
                        interp_ij = lp_build_gather_values(gallivm, interp, 2);
                }

/* Use the absolute location of the input. */

                prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);

if (key->ps_prolog.states.color_two_side) {

                        face = LLVMGetParam(func, face_vgpr);
-                       face = LLVMBuildBitCast(gallivm->builder, face, ctx->i32, 
"");
+                       face = ac_to_integer(&ctx->ac, face);
                }

interp_fs_input(ctx,

                                key->ps_prolog.color_attr_index[i],
                                TGSI_SEMANTIC_COLOR, i,
                                key->ps_prolog.num_interp_inputs,
                                key->ps_prolog.colors_read, interp_ij,
                                prim_mask, face, color);

while (writemask) {

@@ -7361,29 +7326,29 @@ static void si_build_ps_prolog_function(struct 
si_shader_context *ctx,
                        0x0001,
                };
                assert(key->ps_prolog.states.samplemask_log_ps_iter < 
ARRAY_SIZE(ps_iter_masks));

uint32_t ps_iter_mask = ps_iter_masks[key->ps_prolog.states.samplemask_log_ps_iter];

                unsigned ancillary_vgpr = key->ps_prolog.num_input_sgprs +
                                          key->ps_prolog.ancillary_vgpr_index;
                LLVMValueRef sampleid = unpack_param(ctx, ancillary_vgpr, 8, 4);
                LLVMValueRef samplemask = LLVMGetParam(func, ancillary_vgpr + 
1);

- samplemask = LLVMBuildBitCast(gallivm->builder, samplemask, ctx->i32, "");

+               samplemask = ac_to_integer(&ctx->ac, samplemask);
                samplemask = LLVMBuildAnd(
                        gallivm->builder,
                        samplemask,
                        LLVMBuildShl(gallivm->builder,
                                     LLVMConstInt(ctx->i32, ps_iter_mask, 
false),
                                     sampleid, ""),
                        "");
-               samplemask = LLVMBuildBitCast(gallivm->builder, samplemask, ctx->f32, 
"");
+               samplemask = ac_to_float(&ctx->ac, samplemask);

ret = LLVMBuildInsertValue(gallivm->builder, ret, samplemask,

                                           ancillary_vgpr + 1, "");
        }

/* Tell LLVM to insert WQM instruction sequence when needed. */

        if (key->ps_prolog.wqm) {
                LLVMAddTargetDependentFunctionAttr(func,
                                                   "amdgpu-ps-wqm-outputs", "");
        }
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
index 818ca49..3dcbb23 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_alu.c
@@ -121,24 +121,24 @@ static void emit_icmp(const struct lp_build_tgsi_action 
*action,
        v = LLVMBuildSExtOrBitCast(builder, v,
                        LLVMInt32TypeInContext(context), "");

emit_data->output[emit_data->chan] = v;

static void emit_ucmp(const struct lp_build_tgsi_action *action,

                      struct lp_build_tgsi_context *bld_base,
                      struct lp_build_emit_data *emit_data)
  {
+       struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMBuilderRef builder = bld_base->base.gallivm->builder;

- LLVMValueRef arg0 = LLVMBuildBitCast(builder, emit_data->args[0],

-                                            bld_base->uint_bld.elem_type, "");
+       LLVMValueRef arg0 = ac_to_integer(&ctx->ac, emit_data->args[0]);

LLVMValueRef v = LLVMBuildICmp(builder, LLVMIntNE, arg0,

                                       bld_base->uint_bld.zero, "");

emit_data->output[emit_data->chan] =

                LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], 
"");
  }

static void emit_cmp(const struct lp_build_tgsi_action *action,

                     struct lp_build_tgsi_context *bld_base,
@@ -235,23 +235,23 @@ static void emit_dcmp(const struct lp_build_tgsi_action 
*action,
        v = LLVMBuildSExtOrBitCast(builder, v,
                        LLVMInt32TypeInContext(context), "");

emit_data->output[emit_data->chan] = v;

static void emit_not(const struct lp_build_tgsi_action *action,

                     struct lp_build_tgsi_context *bld_base,
                     struct lp_build_emit_data *emit_data)
  {
+       struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
-       LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED,
-                       emit_data->args[0]);
+       LLVMValueRef v = ac_to_integer(&ctx->ac, emit_data->args[0]);
        emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, "");
  }

static void emit_arl(const struct lp_build_tgsi_action *action,

                     struct lp_build_tgsi_context *bld_base,
                     struct lp_build_emit_data *emit_data)
  {
        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
        LLVMValueRef floor_index =  lp_build_emit_llvm_unary(bld_base, 
TGSI_OPCODE_FLR, emit_data->args[0]);
        emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
@@ -675,36 +675,36 @@ static void up2h_fetch_args(struct lp_build_tgsi_context 
*bld_base,
                            struct lp_build_emit_data *emit_data)
  {
        emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
                                                 0, TGSI_CHAN_X);
  }

static void emit_up2h(const struct lp_build_tgsi_action *action,

                      struct lp_build_tgsi_context *bld_base,
                      struct lp_build_emit_data *emit_data)
  {
+       struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
        LLVMContextRef context = bld_base->base.gallivm->context;
        struct lp_build_context *uint_bld = &bld_base->uint_bld;
-       LLVMTypeRef fp16, i16;
+       LLVMTypeRef i16;
        LLVMValueRef const16, input, val;
        unsigned i;

- fp16 = LLVMHalfTypeInContext(context);

        i16 = LLVMInt16TypeInContext(context);
        const16 = lp_build_const_int32(uint_bld->gallivm, 16);
        input = emit_data->args[0];

for (i = 0; i < 2; i++) {

                val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : 
input;
                val = LLVMBuildTrunc(builder, val, i16, "");
-               val = LLVMBuildBitCast(builder, val, fp16, "");
+               val = ac_to_float(&ctx->ac, val);
                emit_data->output[i] =
                        LLVMBuildFPExt(builder, val, bld_base->base.elem_type, 
"");
        }
  }

static void emit_fdiv(const struct lp_build_tgsi_action *action,

                      struct lp_build_tgsi_context *bld_base,
                      struct lp_build_emit_data *emit_data)
  {
        struct si_shader_context *ctx = si_shader_context(bld_base);
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index ba13d3b..887475b 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -241,21 +241,21 @@ static LLVMValueRef image_fetch_coords(
        struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        unsigned target = inst->Memory.Texture;
        unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
        LLVMValueRef coords[4];
        LLVMValueRef tmp;
        int chan;

for (chan = 0; chan < num_coords; ++chan) {

                tmp = lp_build_emit_fetch(bld_base, inst, src, chan);
-               tmp = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
+               tmp = ac_to_integer(&ctx->ac, tmp);
                coords[chan] = tmp;
        }

if (ctx->screen->b.chip_class >= GFX9) {

                /* 1D textures are allocated and used as 2D on GFX9. */
                if (target == TGSI_TEXTURE_1D) {
                        coords[1] = ctx->i32_0;
                        num_coords++;
                } else if (target == TGSI_TEXTURE_1D_ARRAY) {
                        coords[2] = coords[1];
@@ -360,38 +360,36 @@ static void buffer_append_args(
                        i1true : i1false; /* glc */
        }
        emit_data->args[emit_data->arg_count++] = i1false; /* slc */
  }

static void load_fetch_args(

                struct lp_build_tgsi_context * bld_base,
                struct lp_build_emit_data * emit_data)
  {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = &ctx->gallivm;
        const struct tgsi_full_instruction * inst = emit_data->inst;
        unsigned target = inst->Memory.Texture;
        LLVMValueRef rsrc;

emit_data->dst_type = ctx->v4f32;if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||

                   inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
-               LLVMBuilderRef builder = gallivm->builder;
                LLVMValueRef offset;
                LLVMValueRef tmp;

bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF;

                rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo);

tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);

-               offset = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
+               offset = ac_to_integer(&ctx->ac, tmp);

buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,

                                   offset, false, false);
        } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
                   tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
                LLVMValueRef coords;

image_fetch_rsrc(bld_base, &inst->Src[0], false, target, &rsrc);

                coords = image_fetch_coords(bld_base, inst, 1, rsrc);

@@ -460,21 +458,21 @@ static void load_emit_buffer(struct si_shader_context *ctx,

  static LLVMValueRef get_memory_ptr(struct si_shader_context *ctx,
                                     const struct tgsi_full_instruction *inst,
                                     LLVMTypeRef type, int arg)
  {
        struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        LLVMValueRef offset, ptr;
        int addr_space;

offset = lp_build_emit_fetch(&ctx->bld_base, inst, arg, 0);

-       offset = LLVMBuildBitCast(builder, offset, ctx->i32, "");
+       offset = ac_to_integer(&ctx->ac, offset);

ptr = ctx->shared_memory;

        ptr = LLVMBuildGEP(builder, ptr, &offset, 1, "");
        addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
        ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, addr_space), 
"");

return ptr;

static void load_emit_memory(

@@ -624,21 +622,20 @@ static void load_emit(
                                get_load_intr_attribs(can_speculate));
        }
  }

static void store_fetch_args(

                struct lp_build_tgsi_context * bld_base,
                struct lp_build_emit_data * emit_data)
  {
        struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = &ctx->gallivm;
-       LLVMBuilderRef builder = gallivm->builder;
        const struct tgsi_full_instruction * inst = emit_data->inst;
        struct tgsi_full_src_register memory;
        LLVMValueRef chans[4];
        LLVMValueRef data;
        LLVMValueRef rsrc;
        unsigned chan;

emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);for (chan = 0; chan < 4; ++chan) {

@@ -650,21 +647,21 @@ static void store_fetch_args(

memory = tgsi_full_src_register_from_dst(&inst->Dst[0]);if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {

                LLVMValueRef offset;
                LLVMValueRef tmp;

rsrc = shader_buffer_fetch_rsrc(ctx, &memory, false);tmp = lp_build_emit_fetch(bld_base, inst, 0, 0);

-               offset = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
+               offset = ac_to_integer(&ctx->ac, tmp);

buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,

                                   offset, false, false);
        } else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE ||
                   tgsi_is_bindless_image_file(inst->Dst[0].Register.File)) {
                unsigned target = inst->Memory.Texture;
                LLVMValueRef coords;

/* 8bit/16bit TC L1 write corruption bug on SI.

                 * All store opcodes not aligned to a dword are affected.
@@ -840,51 +837,49 @@ static void store_emit(
                                emit_data->args, emit_data->arg_count,
                                get_store_intr_attribs(writeonly_memory));
        }
  }

static void atomic_fetch_args(

                struct lp_build_tgsi_context * bld_base,
                struct lp_build_emit_data * emit_data)
  {
        struct si_shader_context *ctx = si_shader_context(bld_base);
-       struct gallivm_state *gallivm = &ctx->gallivm;
-       LLVMBuilderRef builder = gallivm->builder;
        const struct tgsi_full_instruction * inst = emit_data->inst;
        LLVMValueRef data1, data2;
        LLVMValueRef rsrc;
        LLVMValueRef tmp;

emit_data->dst_type = ctx->f32;tmp = lp_build_emit_fetch(bld_base, inst, 2, 0);

-       data1 = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
+       data1 = ac_to_integer(&ctx->ac, tmp);

if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {

                tmp = lp_build_emit_fetch(bld_base, inst, 3, 0);
-               data2 = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
+               data2 = ac_to_integer(&ctx->ac, tmp);
        }

/* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware order

         * of arguments, which is reversed relative to TGSI (and GLSL)
         */
        if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
                emit_data->args[emit_data->arg_count++] = data2;
        emit_data->args[emit_data->arg_count++] = data1;

if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {

                LLVMValueRef offset;

rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false);tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);

-               offset = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
+               offset = ac_to_integer(&ctx->ac, tmp);

buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,

                                   offset, true, false);
        } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
                   tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
                unsigned target = inst->Memory.Texture;
                LLVMValueRef coords;

image_fetch_rsrc(bld_base, &inst->Src[0], true, target, &rsrc);

                coords = image_fetch_coords(bld_base, inst, 1, rsrc);
@@ -904,28 +899,28 @@ static void atomic_fetch_args(
  static void atomic_emit_memory(struct si_shader_context *ctx,
                                 struct lp_build_emit_data *emit_data) {
        struct gallivm_state *gallivm = &ctx->gallivm;
        LLVMBuilderRef builder = gallivm->builder;
        const struct tgsi_full_instruction * inst = emit_data->inst;
        LLVMValueRef ptr, result, arg;

ptr = get_memory_ptr(ctx, inst, ctx->i32, 1);arg = lp_build_emit_fetch(&ctx->bld_base, inst, 2, 0);

-       arg = LLVMBuildBitCast(builder, arg, ctx->i32, "");
+       arg = ac_to_integer(&ctx->ac, arg);

if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {

                LLVMValueRef new_data;
                new_data = lp_build_emit_fetch(&ctx->bld_base,
                                               inst, 3, 0);

- new_data = LLVMBuildBitCast(builder, new_data, ctx->i32, "");

+               new_data = ac_to_integer(&ctx->ac, new_data);

result = LLVMBuildAtomicCmpXchg(builder, ptr, arg, new_data,

                                       LLVMAtomicOrderingSequentiallyConsistent,
                                       LLVMAtomicOrderingSequentiallyConsistent,
                                       false);

result = LLVMBuildExtractValue(builder, result, 0, "");

        } else {
                LLVMAtomicRMWBinOp op;

@@ -1000,22 +995,21 @@ static void atomic_emit(ac_build_type_name_for_intr(LLVMTypeOf(coords), coords_type, sizeof(coords_type));

                snprintf(intrinsic_name, sizeof(intrinsic_name),
                         "llvm.amdgcn.image.atomic.%s.%s",
                         action->intr_name, coords_type);
        }

tmp = lp_build_intrinsic(

                builder, intrinsic_name, ctx->i32,
                emit_data->args, emit_data->arg_count, 0);
-       emit_data->output[emit_data->chan] =
-               LLVMBuildBitCast(builder, tmp, ctx->f32, "");
+       emit_data->output[emit_data->chan] = ac_to_float(&ctx->ac, tmp);
  }

static void set_tex_fetch_args(struct si_shader_context *ctx,

                               struct lp_build_emit_data *emit_data,
                               unsigned target,
                               LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
                               LLVMValueRef *param, unsigned count,
                               unsigned dmask)
  {
        struct gallivm_state *gallivm = &ctx->gallivm;
@@ -1545,24 +1539,22 @@ static void tex_fetch_args(
        if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF)
                address[count++] = coords[3];
        else if (opcode == TGSI_OPCODE_TXL2)
                address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 
TGSI_CHAN_X);

if (count > 16) {

                assert(!"Cannot handle more than 16 texture address 
parameters");
                count = 16;
        }

- for (chan = 0; chan < count; chan++ ) {


May as well have removed the additional space there :)

But anyway:

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>

-               address[chan] = LLVMBuildBitCast(gallivm->builder,
-                                                address[chan], ctx->i32, "");
-       }
+       for (chan = 0; chan < count; chan++ )
+               address[chan] = ac_to_integer(&ctx->ac, address[chan]);

/* Adjust the sample index according to FMASK.

         *
         * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
         * which is the identity mapping. Each nibble says which physical sample
         * should be fetched to get that sample.
         *
         * For example, 0x11111100 means there are only 2 samples stored and
         * the second sample covers 3/4 of the pixel. When reading samples 0
         * and 1, return physical sample 0 (determined by the first two 0s
@@ -1796,23 +1788,23 @@ si_lower_gather4_integer(struct si_shader_context *ctx,
                        half_texel[c] = LLVMBuildFMul(builder, half_texel[c],
                                                      LLVMConstReal(ctx->f32, -0.5), 
"");
                }
        }

for (c = 0; c < 2; c++) {

                LLVMValueRef tmp;
                LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + 
c, 0);

tmp = LLVMBuildExtractElement(builder, coord, index, "");

-               tmp = LLVMBuildBitCast(builder, tmp, ctx->f32, "");
+               tmp = ac_to_float(&ctx->ac, tmp);
                tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], "");
-               tmp = LLVMBuildBitCast(builder, tmp, ctx->i32, "");
+               tmp = ac_to_integer(&ctx->ac, tmp);
                coord = LLVMBuildInsertElement(builder, coord, tmp, index, "");
        }

args->addr = coord;return NULL;

/* The second half of the cube texture 8_8_8_8 integer workaround: adjust the

   * result after the gather operation.
@@ -1832,21 +1824,21 @@ si_fix_gather4_integer_result(struct si_shader_context 
*ctx,
                LLVMValueRef chanv = LLVMConstInt(ctx->i32, chan, false);
                LLVMValueRef value;
                LLVMValueRef wa_value;

value = LLVMBuildExtractElement(builder, result, chanv, "");if (return_type == TGSI_RETURN_TYPE_UINT)

                        wa_value = LLVMBuildFPToUI(builder, value, ctx->i32, 
"");
                else
                        wa_value = LLVMBuildFPToSI(builder, value, ctx->i32, 
"");
-               wa_value = LLVMBuildBitCast(builder, wa_value, ctx->f32, "");
+               wa_value = ac_to_float(&ctx->ac, wa_value);
                value = LLVMBuildSelect(builder, wa, wa_value, value, "");

result = LLVMBuildInsertElement(builder, result, value, chanv, "");

return result;

static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,

                                struct lp_build_tgsi_context *bld_base,
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c 
b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index d6529f2..5b20ff3 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -399,32 +399,33 @@ get_pointer_into_array(struct si_shader_context *ctx,
        idxs[1] = index;
        return LLVMBuildGEP(builder, alloca, idxs, 2, "");
  }

LLVMValueRef

  si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
                         enum tgsi_opcode_type type,
                         LLVMValueRef ptr,
                         LLVMValueRef ptr2)
  {
+       struct si_shader_context *ctx = si_shader_context(bld_base);
        LLVMBuilderRef builder = bld_base->base.gallivm->builder;
        LLVMValueRef result;

result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));result = LLVMBuildInsertElement(builder,

                                        result,
-                                       bitcast(bld_base, TGSI_TYPE_UNSIGNED, 
ptr),
+                                       ac_to_integer(&ctx->ac, ptr),
                                        bld_base->int_bld.zero, "");
        result = LLVMBuildInsertElement(builder,
                                        result,
-                                       bitcast(bld_base, TGSI_TYPE_UNSIGNED, 
ptr2),
+                                       ac_to_integer(&ctx->ac, ptr2),
                                        bld_base->int_bld.one, "");
        return bitcast(bld_base, type, result);
  }

static LLVMValueRef

  emit_array_fetch(struct lp_build_tgsi_context *bld_base,
                 unsigned File, enum tgsi_opcode_type type,
                 struct tgsi_declaration_range range,
                 unsigned swizzle)
  {
@@ -906,21 +907,21 @@ void si_llvm_emit_store(struct lp_build_tgsi_context 
*bld_base,
                if (inst->Instruction.Saturate)
                        value = ac_build_clamp(&ctx->ac, value);

if (reg->Register.File == TGSI_FILE_ADDRESS) {

                        temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
                        LLVMBuildStore(builder, value, temp_ptr);
                        continue;
                }

if (!tgsi_type_is_64bit(dtype))

-                       value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
+                       value = ac_to_float(&ctx->ac, value);

if (reg->Register.Indirect) {

                        unsigned file = reg->Register.File;
                        unsigned reg_index = reg->Register.Index;
                        store_value_to_array(bld_base, value, file, chan_index,
                                             reg_index, &reg->Indirect);
                } else {
                        switch(reg->Register.File) {
                        case TGSI_FILE_OUTPUT:
                                temp_ptr = 
ctx->outputs[reg->Register.Index][chan_index];
@@ -946,22 +947,22 @@ void si_llvm_emit_store(struct lp_build_tgsi_context 
*bld_base,
                                LLVMBuildStore(builder, value, temp_ptr);
                        else {
                                LLVMValueRef ptr = LLVMBuildBitCast(builder, 
value,
                                                                    
LLVMVectorType(ctx->i32, 2), "");
                                LLVMValueRef val2;
                                value = LLVMBuildExtractElement(builder, ptr,
                                                                ctx->i32_0, "");
                                val2 = LLVMBuildExtractElement(builder, ptr,
                                                               ctx->i32_1, "");

- LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);

-                               LLVMBuildStore(builder, bitcast(bld_base, 
TGSI_TYPE_FLOAT, val2), temp_ptr2);
+                               LLVMBuildStore(builder, ac_to_float(&ctx->ac, 
value), temp_ptr);
+                               LLVMBuildStore(builder, ac_to_float(&ctx->ac, 
val2), temp_ptr2);
                        }
                }
        }
  }

static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)

  {
        char buf[32];
        /* Subtract 1 so that the number shown is that of the corresponding
         * opcode in the TGSI dump, e.g. an if block has the same suffix as
@@ -1118,25 +1119,26 @@ static void if_emit(const struct lp_build_tgsi_action 
*action,
                        emit_data->args[0],
                        bld_base->base.zero, "");

if_cond_emit(action, bld_base, emit_data, cond);

static void uif_emit(const struct lp_build_tgsi_action *action,

                     struct lp_build_tgsi_context *bld_base,
                     struct lp_build_emit_data *emit_data)
  {
+       struct si_shader_context *ctx = si_shader_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
        LLVMValueRef cond;

cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,

-               bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
+               ac_to_integer(&ctx->ac, emit_data->args[0]),
                        bld_base->int_bld.zero, "");

if_cond_emit(action, bld_base, emit_data, cond);

static void emit_immediate(struct lp_build_tgsi_context *bld_base,

                           const struct tgsi_full_immediate *imm)
  {
        unsigned i;
        struct si_shader_context *ctx = si_shader_context(bld_base);



--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 01/11] radeonsi: use ac helpers for bitcasts

Reply via email to