This one do not apply anylonger after Samuel's commit
amd/common: add ac_build_waitcnt()
#225b19880204024a805cc54b1001d09ef3b58054

For your motivation:
I've tested V1 and V2 of the whole series (before the latest master commits) and could ran _all_ my 'normal' stuff.

Even UH run with GREAT tess speed without any hick ups.

GREAT stuff!

V1 + V2 have my tb even before Nicolai formulated his comments.

BTW There are many commits waiting...;-)

Cheers,
Dieter

Am 13.12.2017 08:52, schrieb Timothy Arceri:
We will call these from the radeonsi NIR backend.

Reviewed-by: Nicolai Hähnle <nicolai.haeh...@amd.com>
---
 src/amd/common/ac_llvm_build.c  | 24 +++++++++++++++++
 src/amd/common/ac_llvm_build.h  |  8 ++++++
src/amd/common/ac_nir_to_llvm.c | 58 +++++++++++++----------------------------
 3 files changed, 50 insertions(+), 40 deletions(-)

diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index b2bf1bf7b51..faa08b6301c 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -91,20 +91,44 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
LLVMContextRef context,

        args[0] = LLVMConstReal(ctx->f32, 2.5);
        ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);

        ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
                                                        "amdgpu.uniform", 14);

        ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
 }

+int
+ac_get_llvm_num_components(LLVMValueRef value)
+{
+       LLVMTypeRef type = LLVMTypeOf(value);
+       unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
+                                     ? LLVMGetVectorSize(type)
+                                     : 1;
+       return num_components;
+}
+
+LLVMValueRef
+ac_llvm_extract_elem(struct ac_llvm_context *ac,
+                    LLVMValueRef value,
+                    int index)
+{
+       int count = ac_get_llvm_num_components(value);
+
+       if (count == 1)
+               return value;
+
+       return LLVMBuildExtractElement(ac->builder, value,
+                                      LLVMConstInt(ac->i32, index, false), "");
+}
+
 unsigned
 ac_get_type_size(LLVMTypeRef type)
 {
        LLVMTypeKind kind = LLVMGetTypeKind(type);

        switch (kind) {
        case LLVMIntegerTypeKind:
                return LLVMGetIntTypeWidth(type) / 8;
        case LLVMFloatTypeKind:
                return 4;
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 655dc1dcc86..c14b0d9f019 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -75,20 +75,28 @@ struct ac_llvm_context {

        enum chip_class chip_class;

        LLVMValueRef lds;
 };

 void
ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
                     enum chip_class chip_class);

+int
+ac_get_llvm_num_components(LLVMValueRef value);
+
+LLVMValueRef
+ac_llvm_extract_elem(struct ac_llvm_context *ac,
+                    LLVMValueRef value,
+                    int index);
+
 unsigned ac_get_type_size(LLVMTypeRef type);

LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t); LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v); LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
 LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);

 LLVMValueRef
 ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
                   LLVMTypeRef return_type, LLVMValueRef *params,
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 6f84604d54a..6060df75314 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -995,46 +995,24 @@ static void create_function(struct
nir_to_llvm_context *ctx,
                        set_userdata_location_shader(ctx, 
AC_UD_PS_SAMPLE_POS_OFFSET,
&user_sgpr_idx, 1);
                }
                break;
        default:
                unreachable("Shader stage not implemented");
        }

        ctx->shader_info->num_user_sgprs = user_sgpr_idx;
 }

-static int get_llvm_num_components(LLVMValueRef value)
-{
-       LLVMTypeRef type = LLVMTypeOf(value);
-       unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
-                                     ? LLVMGetVectorSize(type)
-                                     : 1;
-       return num_components;
-}
-
-static LLVMValueRef llvm_extract_elem(struct ac_llvm_context *ac,
-                                     LLVMValueRef value,
-                                     int index)
-{
-       int count = get_llvm_num_components(value);
-
-       if (count == 1)
-               return value;
-
-       return LLVMBuildExtractElement(ac->builder, value,
-                                      LLVMConstInt(ac->i32, index, false), "");
-}
-
 static LLVMValueRef trim_vector(struct ac_llvm_context *ctx,
                                 LLVMValueRef value, unsigned count)
 {
-       unsigned num_components = get_llvm_num_components(value);
+       unsigned num_components = ac_get_llvm_num_components(value);
        if (count == num_components)
                return value;

        LLVMValueRef masks[] = {
LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false), LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};

        if (count == 1)
                return LLVMBuildExtractElement(ctx->builder, value, masks[0],
                                               "");
@@ -2321,21 +2299,21 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
                                                      ctx->ac.i32_0, "");

                        tmp = LLVMBuildExtractElement(ctx->ac.builder,
base_data, LLVMConstInt(ctx->ac.i32, start + 1, false), "");
                        data = LLVMBuildInsertElement(ctx->ac.builder, data, 
tmp,
                                                      ctx->ac.i32_1, "");
                        store_name = "llvm.amdgcn.buffer.store.v2f32";

                } else {
                        assert(count == 1);
-                       if (get_llvm_num_components(base_data) > 1)
+                       if (ac_get_llvm_num_components(base_data) > 1)
                                data = LLVMBuildExtractElement(ctx->ac.builder, 
base_data,
                                                               LLVMConstInt(ctx->ac.i32, 
start, false), "");
                        else
                                data = base_data;
                        store_name = "llvm.amdgcn.buffer.store.f32";
                }

                offset = base_offset;
                if (start != 0) {
                        offset = LLVMBuildAdd(ctx->ac.builder, offset,
LLVMConstInt(ctx->ac.i32, start * 4, false), "");
@@ -2348,23 +2326,23 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
 }

 static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
 {
        const char *name;
        LLVMValueRef params[6];
        int arg_count = 0;

        if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
-               params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx,
instr->src[3]), 0);
+               params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, 
get_src(ctx,
instr->src[3]), 0);
        }
-       params[arg_count++] = llvm_extract_elem(&ctx->ac, get_src(ctx,
instr->src[2]), 0);
+       params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx,
instr->src[2]), 0);
        params[arg_count++] = ctx->abi->load_ssbo(ctx->abi,
                                                 get_src(ctx, instr->src[0]),
                                                 true);
params[arg_count++] = LLVMConstInt(ctx->ac.i32, 0, false); /* vindex */
        params[arg_count++] = get_src(ctx, instr->src[1]);      /* voffset */
        params[arg_count++] = LLVMConstInt(ctx->ac.i1, 0, false);  /* slc */

        switch (instr->intrinsic) {
        case nir_intrinsic_ssbo_atomic_add:
                name = "llvm.amdgcn.buffer.atomic.add";
@@ -2827,21 +2805,21 @@ store_tcs_output(struct ac_shader_abi *abi,

        bool is_tess_factor = false;
        if (location == VARYING_SLOT_TESS_LEVEL_INNER ||
            location == VARYING_SLOT_TESS_LEVEL_OUTER)
                is_tess_factor = true;

        unsigned base = is_compact ? const_index : 0;
        for (unsigned chan = 0; chan < 8; chan++) {
                if (!(writemask & (1 << chan)))
                        continue;
- LLVMValueRef value = llvm_extract_elem(&ctx->ac, src, chan - component); + LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);

                if (store_lds || is_tess_factor)
                        ac_lds_store(&ctx->ac, dw_addr, value);

                if (!is_tess_factor && writemask != 0xF)
ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
                                                    buf_addr, ctx->oc_lds,
                                                    4 * (base + chan), 1, 0, 
true, false);

                dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
@@ -3119,21 +3097,21 @@ visit_store_var(struct ac_nir_context *ctx,
        int writemask = instr->const_index[0] << comp;
        LLVMValueRef indir_index;
        unsigned const_index;
        get_deref_offset(ctx, instr->variables[0], false,
                         NULL, NULL, &const_index, &indir_index);

        if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
                int old_writemask = writemask;

                src = LLVMBuildBitCast(ctx->ac.builder, src,
-                                      LLVMVectorType(ctx->ac.f32,
get_llvm_num_components(src) * 2),
+                                      LLVMVectorType(ctx->ac.f32,
ac_get_llvm_num_components(src) * 2),
                                       "");

                writemask = 0;
                for (unsigned chan = 0; chan < 4; chan++) {
                        if (old_writemask & (1 << chan))
                                writemask |= 3u << (2 * chan);
                }
        }

        switch (instr->variables[0]->var->data.mode) {
@@ -3157,21 +3135,21 @@ visit_store_var(struct ac_nir_context *ctx,
                                                    const_index, location, 
driver_location,
                                                    src, comp, is_patch, 
is_compact, writemask);
                        return;
                }

                for (unsigned chan = 0; chan < 8; chan++) {
                        int stride = 4;
                        if (!(writemask & (1 << chan)))
                                continue;

-                       value = llvm_extract_elem(&ctx->ac, src, chan - comp);
+                       value = ac_llvm_extract_elem(&ctx->ac, src, chan - 
comp);

                        if (instr->variables[0]->var->data.compact)
                                stride = 1;
                        if (indir_index) {
                                unsigned count = glsl_count_attribute_slots(
                                                instr->variables[0]->var->type, 
false);
                                count -= chan / 4;
                                LLVMValueRef tmp_vec = 
ac_build_gather_values_extended(
                                                &ctx->ac, ctx->outputs + idx + 
chan, count,
                                                stride, true, true);
@@ -3186,21 +3164,21 @@ visit_store_var(struct ac_nir_context *ctx,

                                LLVMBuildStore(ctx->ac.builder, value, 
temp_ptr);
                        }
                }
                break;
        case nir_var_local:
                for (unsigned chan = 0; chan < 8; chan++) {
                        if (!(writemask & (1 << chan)))
                                continue;

-                       value = llvm_extract_elem(&ctx->ac, src, chan);
+                       value = ac_llvm_extract_elem(&ctx->ac, src, chan);
                        if (indir_index) {
                                unsigned count = glsl_count_attribute_slots(
                                        instr->variables[0]->var->type, false);
                                count -= chan / 4;
                                LLVMValueRef tmp_vec = 
ac_build_gather_values_extended(
                                        &ctx->ac, ctx->locals + idx + chan, 
count,
                                        4, true, true);

                                tmp_vec = 
LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
                                                                 value, indir_index, 
"");
@@ -3226,22 +3204,22 @@ visit_store_var(struct ac_nir_context *ctx,
                           ctx->ac.builder, val,
                           LLVMGetElementType(LLVMTypeOf(address)), "");
                        LLVMBuildStore(ctx->ac.builder, val, address);
                } else {
                        for (unsigned chan = 0; chan < 4; chan++) {
                                if (!(writemask & (1 << chan)))
                                        continue;
                                LLVMValueRef ptr =
                                        LLVMBuildStructGEP(ctx->ac.builder,
                                                           address, chan, "");
-                               LLVMValueRef src = llvm_extract_elem(&ctx->ac, 
val,
-                                                                    chan);
+                               LLVMValueRef src = 
ac_llvm_extract_elem(&ctx->ac, val,
+                                                                       chan);
                                src = LLVMBuildBitCast(
                                   ctx->ac.builder, src,
                                   LLVMGetElementType(LLVMTypeOf(ptr)), "");
                                LLVMBuildStore(ctx->ac.builder, src, ptr);
                        }
                }
                break;
        }
        default:
                break;
@@ -3359,21 +3337,21 @@ static LLVMValueRef get_image_coords(struct
ac_nir_context *ctx,
        if(instr->variables[0]->deref.child)
                type = instr->variables[0]->deref.child->type;

        LLVMValueRef src0 = get_src(ctx, instr->src[0]);
        LLVMValueRef coords[4];
        LLVMValueRef masks[] = {
LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false), LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
        };
        LLVMValueRef res;
-       LLVMValueRef sample_index = llvm_extract_elem(&ctx->ac, get_src(ctx,
instr->src[1]), 0);
+       LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac,
get_src(ctx, instr->src[1]), 0);

        int count;
        enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
        bool is_array = glsl_sampler_type_is_array(type);
        bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
                             dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
        bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
                      dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D;
        count = image_type_to_components_count(dim, is_array);
@@ -3406,21 +3384,21 @@ static LLVMValueRef get_image_coords(struct
ac_nir_context *ctx,
        if (count == 1 && !gfx9_1d) {
                if (instr->src[0].ssa->num_components)
                        res = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], 
"");
                else
                        res = src0;
        } else {
                int chan;
                if (is_ms)
                        count--;
                for (chan = 0; chan < count; ++chan) {
-                       coords[chan] = llvm_extract_elem(&ctx->ac, src0, chan);
+                       coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, 
chan);
                }
                if (add_frag_pos) {
                        for (chan = 0; chan < 2; ++chan)
                                coords[chan] = LLVMBuildAdd(ctx->ac.builder, 
coords[chan],
LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan],
                                                ctx->ac.i32, ""), "");
                        coords[2] = ac_to_integer(&ctx->ac,
ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]);
                        count++;
                }

                if (gfx9_1d) {
@@ -4607,50 +4585,50 @@ static void visit_tex(struct ac_nir_context
*ctx, nir_tex_instr *instr)
                samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
                                       samples, "");
                samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
                                          ctx->ac.i32_1, "");
                result = samples;
                goto write_result;
        }

        if (coord)
                for (chan = 0; chan < instr->coord_components; chan++)
-                       coords[chan] = llvm_extract_elem(&ctx->ac, coord, chan);
+                       coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, 
chan);

        if (offsets && instr->op != nir_texop_txf) {
                LLVMValueRef offset[3], pack;
                for (chan = 0; chan < 3; ++chan)
                        offset[chan] = ctx->ac.i32_0;

                args.offset = true;
-               for (chan = 0; chan < get_llvm_num_components(offsets); chan++) 
{
-                       offset[chan] = llvm_extract_elem(&ctx->ac, offsets, 
chan);
+               for (chan = 0; chan < ac_get_llvm_num_components(offsets); 
chan++) {
+                       offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, 
chan);
                        offset[chan] = LLVMBuildAnd(ctx->ac.builder, 
offset[chan],
                                                    LLVMConstInt(ctx->ac.i32, 0x3f, 
false), "");
                        if (chan)
                                offset[chan] = LLVMBuildShl(ctx->ac.builder, 
offset[chan],
                                                            LLVMConstInt(ctx->ac.i32, 
chan * 8, false), "");
                }
                pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
                pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
                address[count++] = pack;

        }
        /* pack LOD bias value */
        if (instr->op == nir_texop_txb && bias) {
                address[count++] = bias;
        }

        /* Pack depth comparison value */
        if (instr->is_shadow && comparator) {
                LLVMValueRef z = ac_to_float(&ctx->ac,
- llvm_extract_elem(&ctx->ac, comparator, 0)); + ac_llvm_extract_elem(&ctx->ac, comparator, 0));

/* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT,
                 * so the depth comparison value isn't clamped for Z16 and
                 * Z24 anymore. Do it manually here.
                 *
                 * It's unnecessary if the original texture format was
                 * Z32_FLOAT, but we don't know that here.
                 */
                if (ctx->ac.chip_class == VI && 
ctx->abi->clamp_shadow_reference)
                        z = ac_build_clamp(&ctx->ac, z);
@@ -4680,22 +4658,22 @@ static void visit_tex(struct ac_nir_context
*ctx, nir_tex_instr *instr)
                                num_dest_deriv_channels = 2;
                                num_deriv_comp = 2;
                        } else {
                                num_dest_deriv_channels = 1;
                                num_deriv_comp = 1;
                        }
                        break;
                }

                for (unsigned i = 0; i < num_src_deriv_channels; i++) {
- derivs[i] = ac_to_float(&ctx->ac, llvm_extract_elem(&ctx->ac, ddx, i));
-                       derivs[num_dest_deriv_channels + i] = 
ac_to_float(&ctx->ac,
llvm_extract_elem(&ctx->ac, ddy, i));
+ derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i));
+                       derivs[num_dest_deriv_channels + i] = 
ac_to_float(&ctx->ac,
ac_llvm_extract_elem(&ctx->ac, ddy, i));
                }
for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
                        derivs[i] = ctx->ac.f32_0;
                        derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
                }
        }

        if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
                for (chan = 0; chan < instr->coord_components; chan++)
                        coords[chan] = ac_to_float(&ctx->ac, coords[chan]);
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to