Issue |
139598
|
Summary |
[AMDGPU] amdgpu-postlegalizer-combiner incorrectly removes zeroing mask
|
Labels |
backend:AMDGPU
|
Assignees |
|
Reporter |
arichardson
|
It appears the amdgpu-postlegalizer-combiner pass is removing an AND operation that should still be required. I noticed this while trying to add the new `ptrtoaddr` lowering to GlobalISel, but I believe this issue should be reproducible with other code too.
Input code:
```
define i256 @ptrtoaddr_ext(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) {
%ret = ptrtoaddr ptr addrspace(8) %ptr to i256
ret i256 %ret
}
```
This is then lowered to the following G_PTRTOINT+G_TRUNC+G_ZEXT:
```
*** IR Dump After AMDGPUPreLegalizerCombiner (amdgpu-prelegalizer-combiner) on ptrtoaddr_ext ***
# Machine code for function ptrtoaddr_ext: IsSSA, TracksLiveness
Function Live Ins: $sgpr4_sgpr5 in %2, $sgpr6_sgpr7 in %3, $sgpr8_sgpr9 in %4, $sgpr10_sgpr11 in %5, $sgpr12 in %6, $sgpr13 in %7, $sgpr14 in %8, $sgpr15 in %9
bb.1 (%ir-block.0):
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
%14:_(s32) = COPY $vgpr4
%15:_(s32) = COPY $vgpr5
%16:_(s32) = COPY $vgpr6
%17:_(s32) = COPY $vgpr7
%1:_(p8) = G_MERGE_VALUES %14:_(s32), %15:_(s32), %16:_(s32), %17:_(s32)
%18:_(s128) = G_PTRTOINT %1:_(p8)
%19:_(s48) = G_TRUNC %18:_(s128)
%20:_(s256) = G_ZEXT %19:_(s48)
%21:_(s32), %22:_(s32), %23:_(s32), %24:_(s32), %25:_(s32), %26:_(s32), %27:_(s32), %28:_(s32) = G_UNMERGE_VALUES %20:_(s256)
$vgpr0 = COPY %21:_(s32)
$vgpr1 = COPY %22:_(s32)
$vgpr2 = COPY %23:_(s32)
$vgpr3 = COPY %24:_(s32)
$vgpr4 = COPY %25:_(s32)
$vgpr5 = COPY %26:_(s32)
$vgpr6 = COPY %27:_(s32)
$vgpr7 = COPY %28:_(s32)
SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
# End machine code for function ptrtoaddr_ext.
```
These operations are then transformed to a G_AND of the lower 48-bits for the first part, and zeroes for the rest of the parts:
```
*** IR Dump After Legalizer (legalizer) on ptrtoaddr_ext ***
# Machine code for function ptrtoaddr_ext: IsSSA, TracksLiveness, Legalized
Function Live Ins: $sgpr4_sgpr5 in %2, $sgpr6_sgpr7 in %3, $sgpr8_sgpr9 in %4, $sgpr10_sgpr11 in %5, $sgpr12 in %6, $sgpr13 in %7, $sgpr14 in %8, $sgpr15 in %9
bb.1 (%ir-block.0):
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
%14:_(s32) = COPY $vgpr4
%15:_(s32) = COPY $vgpr5
%16:_(s32) = COPY $vgpr6
%17:_(s32) = COPY $vgpr7
%1:_(p8) = G_MERGE_VALUES %14:_(s32), %15:_(s32), %16:_(s32), %17:_(s32)
%18:_(s128) = G_PTRTOINT %1:_(p8)
%31:_(s64), %32:_(s64) = G_UNMERGE_VALUES %18:_(s128)
%33:_(s64) = G_IMPLICIT_DEF
%46:_(s64) = G_CONSTANT i64 281474976710655
%47:_(s64) = G_CONSTANT i64 0
%42:_(s64) = G_AND %31:_, %46:_
%43:_(s64) = G_AND %32:_, %47:_
%44:_(s64) = G_AND %33:_, %47:_
%45:_(s64) = G_AND %33:_, %47:_
%21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %42:_(s64)
%23:_(s32), %24:_(s32) = G_UNMERGE_VALUES %43:_(s64)
%25:_(s32), %26:_(s32) = G_UNMERGE_VALUES %44:_(s64)
%27:_(s32), %28:_(s32) = G_UNMERGE_VALUES %45:_(s64)
$vgpr0 = COPY %21:_(s32)
$vgpr1 = COPY %22:_(s32)
$vgpr2 = COPY %23:_(s32)
$vgpr3 = COPY %24:_(s32)
$vgpr4 = COPY %25:_(s32)
$vgpr5 = COPY %26:_(s32)
$vgpr6 = COPY %27:_(s32)
$vgpr7 = COPY %28:_(s32)
SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
# End machine code for function ptrtoaddr_ext.
```
However, then AMDGPUPostLegalizerCombiner appears to break this code by removing the G_AND and just copying the first 128 bits instead of only the first 48:
```
*** IR Dump After AMDGPUPostLegalizerCombiner (amdgpu-postlegalizer-combiner) on ptrtoaddr_ext ***
# Machine code for function ptrtoaddr_ext: IsSSA, TracksLiveness, Legalized
Function Live Ins: $sgpr4_sgpr5 in %2, $sgpr6_sgpr7 in %3, $sgpr8_sgpr9 in %4, $sgpr10_sgpr11 in %5, $sgpr12 in %6, $sgpr13 in %7, $sgpr14 in %8, $sgpr15 in %9
bb.1 (%ir-block.0):
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
%14:_(s32) = COPY $vgpr4
%15:_(s32) = COPY $vgpr5
%16:_(s32) = COPY $vgpr6
%17:_(s32) = COPY $vgpr7
%1:_(p8) = G_MERGE_VALUES %14:_(s32), %15:_(s32), %16:_(s32), %17:_(s32)
%18:_(s128) = G_PTRTOINT %1:_(p8)
%31:_(s64), %32:_(s64) = G_UNMERGE_VALUES %18:_(s128)
%21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %31:_(s64)
%23:_(s32), %24:_(s32) = G_UNMERGE_VALUES %32:_(s64)
%25:_(s32) = G_CONSTANT i32 0
%26:_(s32) = G_CONSTANT i32 0
%27:_(s32) = G_CONSTANT i32 0
%28:_(s32) = G_CONSTANT i32 0
$vgpr0 = COPY %21:_(s32)
$vgpr1 = COPY %22:_(s32)
$vgpr2 = COPY %23:_(s32)
$vgpr3 = COPY %24:_(s32)
$vgpr4 = COPY %25:_(s32)
$vgpr5 = COPY %26:_(s32)
$vgpr6 = COPY %27:_(s32)
$vgpr7 = COPY %28:_(s32)
SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
# End machine code for function ptrtoaddr_ext.
```
With SDAG I get the expected result since I believe it doesn't run this pass:
```
define i256 @ptrtoaddr_ext(ptr addrspace(8) %ignored, ptr addrspace(8) %ptr) {
; GISEL-LABEL: ptrtoaddr_ext:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GISEL-NEXT: v_mov_b32_e32 v0, v4
; GISEL-NEXT: v_mov_b32_e32 v1, v5
; GISEL-NEXT: v_mov_b32_e32 v2, v6
; GISEL-NEXT: v_mov_b32_e32 v3, v7
; GISEL-NEXT: v_mov_b32_e32 v4, 0
; GISEL-NEXT: v_mov_b32_e32 v5, 0
; GISEL-NEXT: v_mov_b32_e32 v6, 0
; GISEL-NEXT: v_mov_b32_e32 v7, 0
; GISEL-NEXT: s_setpc_b64 s[30:31]
;
; SDAG-LABEL: ptrtoaddr_ext:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SDAG-NEXT: v_mov_b32_e32 v0, v4
; SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v5
; SDAG-NEXT: v_mov_b32_e32 v2, 0
; SDAG-NEXT: v_mov_b32_e32 v3, 0
; SDAG-NEXT: v_mov_b32_e32 v4, 0
; SDAG-NEXT: v_mov_b32_e32 v5, 0
; SDAG-NEXT: v_mov_b32_e32 v6, 0
; SDAG-NEXT: v_mov_b32_e32 v7, 0
; SDAG-NEXT: s_setpc_b64 s[30:31]
%ret = ptrtoaddr ptr addrspace(8) %ptr to i256
ret i256 %ret
}
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs