Issue 120256
Summary Failing assertion in AMDGPUAttributor with ptrtoint casts and AS 3
Labels backend:AMDGPU, crash-on-valid
Assignees
Reporter ritter-x2a
    I observe a failing assertion in the AMDGPUAttributor in code with ptrtoint casts and address space 3. It occurs on trunk since commit 41ed16c3b3362e51b7063eaef6461ab704c1ec7a by @jwanggit86.

Reproducer:
```
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
target triple = "amdgcn-amd-amdhsa"

@buf_shared = internal addrspace(3) global [2080 x i8] undef, align 16

define protected amdgpu_kernel void @foo(ptr addrspace(1) nocapture noundef writeonly initializes((0, 1)) %res.coerce) local_unnamed_addr {
entry:
  %conv.i = and i32 trunc (i64 sub (i64 16, i64 ptrtoint (ptr addrspacecast (ptr addrspace(3) @buf_shared to ptr) to i64)) to i32), 15
  %add.ptr = getelementptr inbounds nuw i8, ptr addrspace(3) @buf_shared, i32 %conv.i
  %0 = load i8, ptr addrspace(3) %add.ptr, align 1
  store i8 %0, ptr addrspace(1) %res.coerce, align 1
  ret void
}
```

`opt -mcpu=gfx1030 --amdgpu-attributor frame.ll` with the above as `frame.ll` yields:

```
opt: /home/faritter/projects/ritter-x2a-fork/llvm-project/llvm/include/llvm/Support/Casting.h:578: decltype(auto) llvm::cast(From*) [with To = llvm::PointerType; From = llvm::Type]: Assertion `isa<To>(Val) && "cast<Ty>() argument of incompatible type!"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.      Program arguments: ../../build/bin/opt -mcpu=gfx1030 --amdgpu-attributor frame.ll
1.      Running pass 'AMDGPU Attributor' on module 'frame.ll'.
 #0 0x00005d350dd6d050 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (../../build/bin/opt+0x531f050)
 #1 0x00005d350dd6a46f llvm::sys::RunSignalHandlers() (../../build/bin/opt+0x531c46f)
 #2 0x00005d350dd6a5c5 SignalHandler(int) Signals.cpp:0:0
 #3 0x000078045a842520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
 #4 0x000078045a8969fc __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x000078045a8969fc __pthread_kill_internal ./nptl/pthread_kill.c:78:10
 #6 0x000078045a8969fc pthread_kill ./nptl/pthread_kill.c:89:10
 #7 0x000078045a842476 gsignal ./signal/../sysdeps/posix/raise.c:27:6
 #8 0x000078045a8287f3 abort ./stdlib/abort.c:81:7
 #9 0x000078045a82871b _nl_load_domain ./intl/loadmsgcat.c:1177:9
#10 0x000078045a839e96 (/lib/x86_64-linux-gnu/libc.so.6+0x39e96)
#11 0x00005d3509d95b2b (anonymous namespace)::AAAMDAttributesFunction::needFlatScratchInit(llvm::Attributor&) AMDGPUAttributor.cpp:0:0
#12 0x00005d3509da0aeb (anonymous namespace)::AAAMDAttributesFunction::updateImpl(llvm::Attributor&) AMDGPUAttributor.cpp:0:0
#13 0x00005d350c2c03ea llvm::AbstractAttribute::update(llvm::Attributor&) (../../build/bin/opt+0x38723ea)
#14 0x00005d350c2d319d llvm::Attributor::updateAA(llvm::AbstractAttribute&) (../../build/bin/opt+0x388519d)
#15 0x00005d3509d9fb96 (anonymous namespace)::AAAMDAttributes const* llvm::Attributor::getOrCreateAAFor<(anonymous namespace)::AAAMDAttributes>(llvm::IRPosition, llvm::AbstractAttribute const*, llvm::DepClassTy, bool, bool) (.constprop.0) AMDGPUAttributor.cpp:0:0
#16 0x00005d3509da213b (anonymous namespace)::runImpl(llvm::Module&, llvm::AnalysisGetter&, llvm::TargetMachine&, llvm::AMDGPUAttributorOptions) (.constprop.0) AMDGPUAttributor.cpp:0:0
#17 0x00005d3509da2a3e (anonymous namespace)::AMDGPUAttributorLegacy::runOnModule(llvm::Module&) AMDGPUAttributor.cpp:0:0
#18 0x00005d350db050a9 llvm::legacy::PassManagerImpl::run(llvm::Module&) (../../build/bin/opt+0x50b70a9)
#19 0x00005d35095b6d12 optMain (../../build/bin/opt+0xb68d12)
#20 0x000078045a829d90 __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:58:16
#21 0x000078045a829e40 call_init ./csu/../csu/libc-start.c:128:20
#22 0x000078045a829e40 __libc_start_main ./csu/../csu/libc-start.c:379:5
#23 0x00005d35095ac855 _start (../../build/bin/opt+0xb5e855)
Aborted (core dumped)
```

Used cmake options (probably not minimal):
```
-DCMAKE_BUILD_TYPE:STRING=Release
-DCMAKE_CXX_COMPILER_LAUNCHER:STRING=ccache
"-DLLVM_ENABLE_PROJECTS:STRING=clang;lld;clang-tools-extra"
"-DLLVM_ENABLE_RUNTIMES:STRING=compiler-rt;openmp"
-DLLVM_ENABLE_ASSERTIONS:BOOL=TRUE
-DLLVM_FORCE_ENABLE_STATS:BOOL=TRUE
-DLLVM_USE_SPLIT_DWARF:BOOL=TRUE
-DLLVM_ENABLE_DUMP:BOOL=TRUE
```

I reduced the above IR from this HIP code:
```
// clang -xhip --offload-arch=gfx1030 -isystem /opt/rocm/include --driver-mode=g++ -O3 ./frame.hip

#include "hip/hip_runtime.h"

#define ALIGNMENT_ZERO_BITS 4
#define NUM_MOVE_THREADS 64
#define BYTES_PER_THREAD 32
#define ALIGNMENT_MASK ((1u << ALIGNMENT_ZERO_BITS) - 1)
#define BUFFER_SIZE (NUM_MOVE_THREADS * BYTES_PER_THREAD)
#define ADD_ALIGN_SLACK(e) ((e) + 2 * (ALIGNMENT_MASK + 1))
#define BUFFER_ALLOC_SIZE ADD_ALIGN_SLACK(BUFFER_SIZE)

__host__ __device__
uint64_t compute_alignment_offset(uint8_t *orig_ptr) {
  uint64_t ptr = (uint64_t) orig_ptr;
  uint64_t alignment_bits = (ptr & ALIGNMENT_MASK);
  return ((ALIGNMENT_MASK - alignment_bits) + 1) & ALIGNMENT_MASK;
}

__global__
void MoveKernelThroughput(uint8_t * res) {
  __shared__ uint8_t buf_shared[BUFFER_ALLOC_SIZE];
  uint8_t *buf_adjusted = buf_shared + compute_alignment_offset(buf_shared);
  *res = *buf_adjusted;
}

```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to