Issue 139598
Summary [AMDGPU] amdgpu-postlegalizer-combiner incorrectly removes zeroing mask
Labels backend:AMDGPU
Assignees
Reporter arichardson
    It appears the amdgpu-postlegalizer-combiner pass is removing an AND operation that should still be required. I noticed this while trying to add the new `ptrtoaddr` lowering to GlobalISel, but I believe this issue should be reproducible with other code too.

Input code:
```
define i256 @ptrtoaddr_ext(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) {
  %ret = ptrtoaddr ptr addrspace(8) %ptr to i256
  ret i256 %ret
}
```

This is then lowered to the following G_PTRTOINT+G_TRUNC+G_ZEXT:

```
*** IR Dump After AMDGPUPreLegalizerCombiner (amdgpu-prelegalizer-combiner) on ptrtoaddr_ext ***
# Machine code for function ptrtoaddr_ext: IsSSA, TracksLiveness
Function Live Ins: $sgpr4_sgpr5 in %2, $sgpr6_sgpr7 in %3, $sgpr8_sgpr9 in %4, $sgpr10_sgpr11 in %5, $sgpr12 in %6, $sgpr13 in %7, $sgpr14 in %8, $sgpr15 in %9

bb.1 (%ir-block.0):
  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
  %14:_(s32) = COPY $vgpr4
  %15:_(s32) = COPY $vgpr5
  %16:_(s32) = COPY $vgpr6
  %17:_(s32) = COPY $vgpr7
  %1:_(p8) = G_MERGE_VALUES %14:_(s32), %15:_(s32), %16:_(s32), %17:_(s32)
  %18:_(s128) = G_PTRTOINT %1:_(p8)
  %19:_(s48) = G_TRUNC %18:_(s128)
  %20:_(s256) = G_ZEXT %19:_(s48)
  %21:_(s32), %22:_(s32), %23:_(s32), %24:_(s32), %25:_(s32), %26:_(s32), %27:_(s32), %28:_(s32) = G_UNMERGE_VALUES %20:_(s256)
  $vgpr0 = COPY %21:_(s32)
 $vgpr1 = COPY %22:_(s32)
  $vgpr2 = COPY %23:_(s32)
  $vgpr3 = COPY %24:_(s32)
  $vgpr4 = COPY %25:_(s32)
  $vgpr5 = COPY %26:_(s32)
  $vgpr6 = COPY %27:_(s32)
  $vgpr7 = COPY %28:_(s32)
  SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7

# End machine code for function ptrtoaddr_ext.
```
These operations are then transformed to a G_AND of the lower 48-bits for the first part, and zeroes for the rest of the parts:

```
*** IR Dump After Legalizer (legalizer) on ptrtoaddr_ext ***
# Machine code for function ptrtoaddr_ext: IsSSA, TracksLiveness, Legalized
Function Live Ins: $sgpr4_sgpr5 in %2, $sgpr6_sgpr7 in %3, $sgpr8_sgpr9 in %4, $sgpr10_sgpr11 in %5, $sgpr12 in %6, $sgpr13 in %7, $sgpr14 in %8, $sgpr15 in %9

bb.1 (%ir-block.0):
  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
  %14:_(s32) = COPY $vgpr4
  %15:_(s32) = COPY $vgpr5
  %16:_(s32) = COPY $vgpr6
  %17:_(s32) = COPY $vgpr7
  %1:_(p8) = G_MERGE_VALUES %14:_(s32), %15:_(s32), %16:_(s32), %17:_(s32)
  %18:_(s128) = G_PTRTOINT %1:_(p8)
  %31:_(s64), %32:_(s64) = G_UNMERGE_VALUES %18:_(s128)
  %33:_(s64) = G_IMPLICIT_DEF
 %46:_(s64) = G_CONSTANT i64 281474976710655
  %47:_(s64) = G_CONSTANT i64 0
  %42:_(s64) = G_AND %31:_, %46:_
  %43:_(s64) = G_AND %32:_, %47:_
 %44:_(s64) = G_AND %33:_, %47:_
  %45:_(s64) = G_AND %33:_, %47:_
 %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %42:_(s64)
  %23:_(s32), %24:_(s32) = G_UNMERGE_VALUES %43:_(s64)
  %25:_(s32), %26:_(s32) = G_UNMERGE_VALUES %44:_(s64)
  %27:_(s32), %28:_(s32) = G_UNMERGE_VALUES %45:_(s64)
  $vgpr0 = COPY %21:_(s32)
  $vgpr1 = COPY %22:_(s32)
  $vgpr2 = COPY %23:_(s32)
  $vgpr3 = COPY %24:_(s32)
  $vgpr4 = COPY %25:_(s32)
 $vgpr5 = COPY %26:_(s32)
  $vgpr6 = COPY %27:_(s32)
  $vgpr7 = COPY %28:_(s32)
  SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7

# End machine code for function ptrtoaddr_ext.
```

However, then AMDGPUPostLegalizerCombiner appears to break this code by removing the G_AND and just copying the first 128 bits instead of only the first 48:

```
*** IR Dump After AMDGPUPostLegalizerCombiner (amdgpu-postlegalizer-combiner) on ptrtoaddr_ext ***
# Machine code for function ptrtoaddr_ext: IsSSA, TracksLiveness, Legalized
Function Live Ins: $sgpr4_sgpr5 in %2, $sgpr6_sgpr7 in %3, $sgpr8_sgpr9 in %4, $sgpr10_sgpr11 in %5, $sgpr12 in %6, $sgpr13 in %7, $sgpr14 in %8, $sgpr15 in %9

bb.1 (%ir-block.0):
  liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
  %14:_(s32) = COPY $vgpr4
  %15:_(s32) = COPY $vgpr5
 %16:_(s32) = COPY $vgpr6
  %17:_(s32) = COPY $vgpr7
  %1:_(p8) = G_MERGE_VALUES %14:_(s32), %15:_(s32), %16:_(s32), %17:_(s32)
  %18:_(s128) = G_PTRTOINT %1:_(p8)
  %31:_(s64), %32:_(s64) = G_UNMERGE_VALUES %18:_(s128)
  %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %31:_(s64)
 %23:_(s32), %24:_(s32) = G_UNMERGE_VALUES %32:_(s64)
  %25:_(s32) = G_CONSTANT i32 0
  %26:_(s32) = G_CONSTANT i32 0
  %27:_(s32) = G_CONSTANT i32 0
  %28:_(s32) = G_CONSTANT i32 0
  $vgpr0 = COPY %21:_(s32)
  $vgpr1 = COPY %22:_(s32)
  $vgpr2 = COPY %23:_(s32)
  $vgpr3 = COPY %24:_(s32)
 $vgpr4 = COPY %25:_(s32)
  $vgpr5 = COPY %26:_(s32)
  $vgpr6 = COPY %27:_(s32)
  $vgpr7 = COPY %28:_(s32)
  SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
# End machine code for function ptrtoaddr_ext.
```

With SDAG I get the expected result since I believe it doesn't run this pass:
```
define i256 @ptrtoaddr_ext(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) {
; GISEL-LABEL: ptrtoaddr_ext:
; GISEL: ; %bb.0:
; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT:    v_mov_b32_e32 v0, v4
; GISEL-NEXT:    v_mov_b32_e32 v1, v5
; GISEL-NEXT:    v_mov_b32_e32 v2, v6
; GISEL-NEXT:    v_mov_b32_e32 v3, v7
; GISEL-NEXT:    v_mov_b32_e32 v4, 0
; GISEL-NEXT:    v_mov_b32_e32 v5, 0
; GISEL-NEXT:    v_mov_b32_e32 v6, 0
; GISEL-NEXT:    v_mov_b32_e32 v7, 0
; GISEL-NEXT:    s_setpc_b64 s[30:31]
;
; SDAG-LABEL: ptrtoaddr_ext:
; SDAG: ; %bb.0:
; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT:    v_mov_b32_e32 v0, v4
; SDAG-NEXT:    v_and_b32_e32 v1, 0xffff, v5
; SDAG-NEXT:    v_mov_b32_e32 v2, 0
; SDAG-NEXT:    v_mov_b32_e32 v3, 0
; SDAG-NEXT:    v_mov_b32_e32 v4, 0
; SDAG-NEXT:    v_mov_b32_e32 v5, 0
; SDAG-NEXT:    v_mov_b32_e32 v6, 0
; SDAG-NEXT:    v_mov_b32_e32 v7, 0
; SDAG-NEXT:    s_setpc_b64 s[30:31]
  %ret = ptrtoaddr ptr addrspace(8) %ptr to i256
  ret i256 %ret
}
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to