On 6/3/19 9:33 PM, Marek Olšák wrote:
SI doesn't support buffer_load_dwordx3 and buffer_store_dwordx3, but it supports buffer_load_format_xyz and buffer_store_format_xyz.
OK, I will update.

Marek

On Mon, Jun 3, 2019 at 9:09 AM Samuel Pitoiset <samuel.pitoi...@gmail.com <mailto:samuel.pitoi...@gmail.com>> wrote:

    I thought LLVM was able to handle that itself but actually it
    does not. That means we shouldn't try to emit vec3 on SI because
    it's unsupported.

    Fixes: 6970a9a6ca9 ("ac,radv: remove the vec3 restriction with
    LLVM 9+")"
    Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com
    <mailto:samuel.pitoi...@gmail.com>>
    ---
     src/amd/common/ac_llvm_build.c    | 12 ++++++------
     src/amd/common/ac_llvm_util.h     |  9 +++++++++
     src/amd/common/ac_nir_to_llvm.c   |  3 ++-
     src/amd/vulkan/radv_nir_to_llvm.c |  2 +-
     4 files changed, 18 insertions(+), 8 deletions(-)

    diff --git a/src/amd/common/ac_llvm_build.c
    b/src/amd/common/ac_llvm_build.c
    index 613c1eef942..7f5c8ef873c 100644
    --- a/src/amd/common/ac_llvm_build.c
    +++ b/src/amd/common/ac_llvm_build.c
    @@ -1167,7 +1167,7 @@ ac_build_llvm8_buffer_store_common(struct
    ac_llvm_context *ctx,
            args[idx++] = voffset ? voffset : ctx->i32_0;
            args[idx++] = soffset ? soffset : ctx->i32_0;
            args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc
    ? 2 : 0), 0);
    -       unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4
    : num_channels;
    +       unsigned func = !ac_has_vec3_support(ctx->chip_class) &&
    num_channels == 3 ? 4 : num_channels;
            const char *indexing_kind = structurized ? "struct" : "raw";
            char name[256], type_name[8];

    @@ -1227,7 +1227,7 @@ ac_build_buffer_store_dword(struct
    ac_llvm_context *ctx,
     {
            /* Split 3 channel stores, because only LLVM 9+ support
    3-channel
             * intrinsics. */
    -       if (num_channels == 3 && HAVE_LLVM < 0x900) {
    +       if (num_channels == 3 &&
    !ac_has_vec3_support(ctx->chip_class)) {
                    LLVMValueRef v[3], v01;

                    for (int i = 0; i < 3; i++) {
    @@ -1354,7 +1354,7 @@ ac_build_llvm8_buffer_load_common(struct
    ac_llvm_context *ctx,
            args[idx++] = voffset ? voffset : ctx->i32_0;
            args[idx++] = soffset ? soffset : ctx->i32_0;
            args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc
    ? 2 : 0), 0);
    -       unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4
    : num_channels;
    +       unsigned func = !ac_has_vec3_support(ctx->chip_class) &&
    num_channels == 3 ? 4 : num_channels;
            const char *indexing_kind = structurized ? "struct" : "raw";
            char name[256], type_name[8];

    @@ -1420,7 +1420,7 @@ ac_build_buffer_load(struct ac_llvm_context
    *ctx,
                    if (num_channels == 1)
                            return result[0];

    -               if (num_channels == 3 && HAVE_LLVM < 0x900)
    +               if (num_channels == 3 &&
    !ac_has_vec3_support(ctx->chip_class))
                            result[num_channels++] =
    LLVMGetUndef(ctx->f32);
                    return ac_build_gather_values(ctx, result,
    num_channels);
            }
    @@ -1512,7 +1512,7 @@ ac_build_llvm8_tbuffer_load(struct
    ac_llvm_context *ctx,
            args[idx++] = soffset ? soffset : ctx->i32_0;
            args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
            args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc
    ? 2 : 0), 0);
    -       unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4
    : num_channels;
    +       unsigned func = !ac_has_vec3_support(ctx->chip_class) &&
    num_channels == 3 ? 4 : num_channels;
            const char *indexing_kind = structurized ? "struct" : "raw";
            char name[256], type_name[8];

    @@ -2011,7 +2011,7 @@ ac_build_llvm8_tbuffer_store(struct
    ac_llvm_context *ctx,
            args[idx++] = soffset ? soffset : ctx->i32_0;
            args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
            args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc
    ? 2 : 0), 0);
    -       unsigned func = HAVE_LLVM < 0x900 && num_channels == 3 ? 4
    : num_channels;
    +       unsigned func = !ac_has_vec3_support(ctx->chip_class) &&
    num_channels == 3 ? 4 : num_channels;
            const char *indexing_kind = structurized ? "struct" : "raw";
            char name[256], type_name[8];

    diff --git a/src/amd/common/ac_llvm_util.h
    b/src/amd/common/ac_llvm_util.h
    index ca00540da80..a45647a3360 100644
    --- a/src/amd/common/ac_llvm_util.h
    +++ b/src/amd/common/ac_llvm_util.h
    @@ -146,6 +146,15 @@ bool ac_compile_module_to_binary(struct
    ac_compiler_passes *p, LLVMModuleRef mod
     void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr);
     void ac_enable_global_isel(LLVMTargetMachineRef tm);

    +static inline bool
    +ac_has_vec3_support(enum chip_class chip)
    +{
    +       if (chip == GFX6)
    +               return false;
    +
    +       return HAVE_LLVM >= 0x900;
    +}
    +
     #ifdef __cplusplus
     }
     #endif
    diff --git a/src/amd/common/ac_nir_to_llvm.c
    b/src/amd/common/ac_nir_to_llvm.c
    index 51f92a6b062..429dac63d63 100644
    --- a/src/amd/common/ac_nir_to_llvm.c
    +++ b/src/amd/common/ac_nir_to_llvm.c
    @@ -1576,7 +1576,8 @@ static void visit_store_ssbo(struct
    ac_nir_context *ctx,

                    /* Due to an LLVM limitation with LLVM < 9, split
    3-element
                     * writes into a 2-element and a 1-element write. */
    -               if (count == 3 && (elem_size_bytes != 4 ||
    HAVE_LLVM < 0x900)) {
    +               if (count == 3 &&
    +                   (elem_size_bytes != 4 ||
    !ac_has_vec3_support(ctx->ac.chip_class))) {
                            writemask |= 1 << (start + 2);
                            count = 2;
                    }
    diff --git a/src/amd/vulkan/radv_nir_to_llvm.c
    b/src/amd/vulkan/radv_nir_to_llvm.c
    index dca4bebcdd1..ab552ae34ab 100644
    --- a/src/amd/vulkan/radv_nir_to_llvm.c
    +++ b/src/amd/vulkan/radv_nir_to_llvm.c
    @@ -2768,7 +2768,7 @@ radv_emit_stream_output(struct
    radv_shader_context *ctx,
                    /* fall through */
            case 4: /* as v4i32 */
                    vdata = ac_build_gather_values(&ctx->ac, out,
    -                                              HAVE_LLVM < 0x900 ?
    + !ac_has_vec3_support(ctx->ac.chip_class) ?
     util_next_power_of_two(num_comps) :
                                                   num_comps);
                    break;
-- 2.21.0

    _______________________________________________
    mesa-dev mailing list
    mesa-dev@lists.freedesktop.org <mailto:mesa-dev@lists.freedesktop.org>
    https://lists.freedesktop.org/mailman/listinfo/mesa-dev

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to