Patches 1-3: Reviewed-by: Marek Olšák <marek.ol...@amd.com> Patch 4: Acked-by: Marek Olšák <marek.ol...@amd.com>
Marek On Tue, Apr 19, 2016 at 7:52 PM, Tom Stellard <thomas.stell...@amd.com> wrote: > The ds_bpermute instruction allows threads to transfer data directly > to or from the vgprs of other threads. These instructions use the lds > hardware to transfer data, but do not read or write lds memory. > > DDX BEFORE: | DDX AFTER: > | > v_mbcnt_lo_u32_b32_e64 v2, -1, 0 | v_mbcnt_lo_u32_b32_e64 v2, -1, 0 > v_mbcnt_hi_u32_b32_e64 v2, -1, v2 | v_mbcnt_hi_u32_b32_e64 v2, -1, v2 > v_lshlrev_b32_e32 v4, 2, v2 | v_and_b32_e32 v2, 60, v2 > v_and_b32_e32 v2, 60, v2 | v_lshlrev_b32_e32 v2, 2, v2 > v_lshlrev_b32_e32 v3, 2, v2 | ds_bpermute_b32 v3, v2, v0 > s_mov_b32 m0, -1 | ds_bpermute_b32 v0, v2, v0 offset:4 > ds_write_b32 v4, v0 | s_waitcnt lgkmcnt(0) > s_waitcnt lgkmcnt(0) | > v_or_b32_e32 v0, 1, v2 | > v_lshlrev_b32_e32 v0, 2, v0 | > ds_read_b32 v1, v3 | > ds_read_b32 v0, v0 | > s_waitcnt lgkmcnt(0) | > | > LDS: 1 blocks | LDS: 0 blocks > --- > src/gallium/drivers/radeonsi/si_shader.c | 42 > +++++++++++++++++++++++--------- > 1 file changed, 30 insertions(+), 12 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_shader.c > b/src/gallium/drivers/radeonsi/si_shader.c > index 2a747f9..d3e445b 100644 > --- a/src/gallium/drivers/radeonsi/si_shader.c > +++ b/src/gallium/drivers/radeonsi/si_shader.c > @@ -4162,6 +4162,7 @@ static void si_llvm_emit_ddxy( > LLVMValueRef indices[2]; > LLVMValueRef store_ptr, load_ptr0, load_ptr1; > LLVMValueRef tl, trbl, result[4]; > + LLVMValueRef tl_tid, trbl_tid; > unsigned swizzle[4]; > unsigned c; > int idx; > @@ -4179,20 +4180,24 @@ static void si_llvm_emit_ddxy( > else > mask = TID_MASK_TOP_LEFT; > > - indices[1] = LLVMBuildAnd(gallivm->builder, indices[1], > - lp_build_const_int32(gallivm, mask), ""); > + tl_tid = LLVMBuildAnd(gallivm->builder, indices[1], > + lp_build_const_int32(gallivm, mask), ""); > + indices[1] = tl_tid; > load_ptr0 = LLVMBuildGEP(gallivm->builder, ctx->lds, > indices, 2, ""); > > /* for DDX we want to next X pixel, DDY next Y pixel. */ > idx = (opcode == TGSI_OPCODE_DDX || opcode == TGSI_OPCODE_DDX_FINE) ? > 1 : 2; > - indices[1] = LLVMBuildAdd(gallivm->builder, indices[1], > + trbl_tid = LLVMBuildAdd(gallivm->builder, indices[1], > lp_build_const_int32(gallivm, idx), ""); > + indices[1] = trbl_tid; > load_ptr1 = LLVMBuildGEP(gallivm->builder, ctx->lds, > indices, 2, ""); > > for (c = 0; c < 4; ++c) { > unsigned i; > + LLVMValueRef val; > + LLVMValueRef args[2]; > > swizzle[c] = > tgsi_util_get_full_src_register_swizzle(&inst->Src[0], c); > for (i = 0; i < c; ++i) { > @@ -4204,18 +4209,31 @@ static void si_llvm_emit_ddxy( > if (i != c) > continue; > > - LLVMBuildStore(gallivm->builder, > - LLVMBuildBitCast(gallivm->builder, > - lp_build_emit_fetch(bld_base, > inst, 0, c), > - ctx->i32, ""), > - store_ptr); > + val = LLVMBuildBitCast(gallivm->builder, > + lp_build_emit_fetch(bld_base, inst, 0, c), > + ctx->i32, ""); > > - tl = LLVMBuildLoad(gallivm->builder, load_ptr0, ""); > - tl = LLVMBuildBitCast(gallivm->builder, tl, ctx->f32, ""); > + if ((HAVE_LLVM >= 0x0309) && ctx->screen->b.family >= > CHIP_TONGA) { > > - trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, ""); > - trbl = LLVMBuildBitCast(gallivm->builder, trbl, ctx->f32, ""); > + args[0] = LLVMBuildMul(gallivm->builder, tl_tid, > + lp_build_const_int32(gallivm, 4), > ""); > + args[1] = val; > + tl = lp_build_intrinsic(gallivm->builder, > + "llvm.amdgcn.ds.bpermute", ctx->i32, > + args, 2, LLVMReadNoneAttribute); > > + args[0] = LLVMBuildMul(gallivm->builder, trbl_tid, > + lp_build_const_int32(gallivm, 4), > ""); > + trbl = lp_build_intrinsic(gallivm->builder, > + "llvm.amdgcn.ds.bpermute", ctx->i32, > + args, 2, LLVMReadNoneAttribute); > + } else { > + LLVMBuildStore(gallivm->builder, val, store_ptr); > + tl = LLVMBuildLoad(gallivm->builder, load_ptr0, ""); > + trbl = LLVMBuildLoad(gallivm->builder, load_ptr1, ""); > + } > + tl = LLVMBuildBitCast(gallivm->builder, tl, ctx->f32, ""); > + trbl = LLVMBuildBitCast(gallivm->builder, trbl, ctx->f32, ""); > result[c] = LLVMBuildFSub(gallivm->builder, trbl, tl, ""); > } > > -- > 2.1.0 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev