From: Marek Olšák <marek.ol...@amd.com> --- .../drivers/radeonsi/si_shader_tgsi_mem.c | 45 ++++++++++++++++--- src/gallium/drivers/radeonsi/si_state.c | 7 +-- 2 files changed, 42 insertions(+), 10 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c index 8c44831bccb..2ba3f251ff8 100644 --- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c +++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c @@ -691,31 +691,39 @@ static void store_emit( is_image, /* may_store_unaligned */ writeonly_memory); if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) { store_emit_buffer(ctx, args.resource, inst->Dst[0].Register.WriteMask, value, voffset, args.cache_policy, writeonly_memory); return; } if (target == TGSI_TEXTURE_BUFFER) { - LLVMValueRef buf_args[] = { + LLVMValueRef buf_args[6] = { value, args.resource, vindex, ctx->i32_0, /* voffset */ - LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_glc), 0), - LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_slc), 0), }; + if (HAVE_LLVM >= 0x0800) { + buf_args[4] = ctx->i32_0; /* soffset */ + buf_args[5] = LLVMConstInt(ctx->i1, args.cache_policy, 0); + } else { + buf_args[4] = LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_glc), 0); + buf_args[5] = LLVMConstInt(ctx->i1, !!(args.cache_policy & ac_slc), 0); + } + emit_data->output[emit_data->chan] = ac_build_intrinsic( - &ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", + &ctx->ac, + HAVE_LLVM >= 0x0800 ? "llvm.amdgcn.struct.buffer.store.format.v4f32" : + "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt, buf_args, 6, ac_get_store_intr_attribs(writeonly_memory)); } else { args.opcode = ac_image_store; args.data[0] = value; args.dim = ac_image_dim_from_tgsi_target(ctx->screen, inst->Memory.Texture); args.attributes = ac_get_store_intr_attribs(writeonly_memory); args.dmask = 0xf; emit_data->output[emit_data->chan] = @@ -823,25 +831,52 @@ static void atomic_emit( args.resource = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false); voffset = ac_to_integer(&ctx->ac, lp_build_emit_fetch(bld_base, inst, 1, 0)); } else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE || tgsi_is_bindless_image_file(inst->Src[0].Register.File)) { image_fetch_rsrc(bld_base, &inst->Src[0], true, inst->Memory.Texture, &args.resource); image_fetch_coords(bld_base, inst, 1, args.resource, args.coords); vindex = args.coords[0]; /* for buffers only */ } - if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || + if (HAVE_LLVM >= 0x0800 && + inst->Src[0].Register.File != TGSI_FILE_BUFFER && inst->Memory.Texture == TGSI_TEXTURE_BUFFER) { LLVMValueRef buf_args[7]; unsigned num_args = 0; + buf_args[num_args++] = args.data[0]; + if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) + buf_args[num_args++] = args.data[1]; + + buf_args[num_args++] = args.resource; + buf_args[num_args++] = vindex; + buf_args[num_args++] = voffset; + buf_args[num_args++] = ctx->i32_0; /* soffset */ + buf_args[num_args++] = LLVMConstInt(ctx->i32, args.cache_policy & ac_slc, 0); + + char intrinsic_name[64]; + snprintf(intrinsic_name, sizeof(intrinsic_name), + "llvm.amdgcn.struct.buffer.atomic.%s", action->intr_name); + emit_data->output[emit_data->chan] = + ac_to_float(&ctx->ac, + ac_build_intrinsic(&ctx->ac, intrinsic_name, + ctx->i32, buf_args, num_args, 0)); + return; + } + + if (inst->Src[0].Register.File == TGSI_FILE_BUFFER || + (HAVE_LLVM < 0x0800 && + inst->Memory.Texture == TGSI_TEXTURE_BUFFER)) { + LLVMValueRef buf_args[7]; + unsigned num_args = 0; + buf_args[num_args++] = args.data[0]; if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) buf_args[num_args++] = args.data[1]; buf_args[num_args++] = args.resource; buf_args[num_args++] = vindex; buf_args[num_args++] = voffset; buf_args[num_args++] = args.cache_policy & ac_slc ? ctx->i1true : ctx->i1false; char intrinsic_name[40]; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index e3b45fa6ea7..41aa4ef3336 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3606,28 +3606,25 @@ si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf, * ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when * using SMEM. This can be done in the shader by clearing STRIDE with s_and. * That way the same descriptor can be used by both SMEM and VMEM. * * GFX9: * - For SMEM and STRIDE == 0, it's in byte units. * - For SMEM and STRIDE != 0, it's in units of STRIDE. * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units. * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE. */ - if (screen->info.chip_class >= GFX9) - /* When vindex == 0, LLVM sets IDXEN = 0, thus changing units + if (screen->info.chip_class >= GFX9 && HAVE_LLVM < 0x0800) + /* When vindex == 0, LLVM < 8.0 sets IDXEN = 0, thus changing units * from STRIDE to bytes. This works around it by setting * NUM_RECORDS to at least the size of one element, so that * the first element is readable when IDXEN == 0. - * - * TODO: Fix this in LLVM, but do we need a new intrinsic where - * IDXEN is enforced? */ num_records = num_records ? MAX2(num_records, stride) : 0; else if (screen->info.chip_class == VI) num_records *= stride; state[4] = 0; state[5] = S_008F04_STRIDE(stride); state[6] = num_records; state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev