[PATCH] D150670: [WebAssembly] Disable generation of fshl/fshr for rotates

Paulo Matos via Phabricator via cfe-commits Fri, 26 May 2023 06:22:14 -0700

pmatos updated this revision to Diff 526044.
pmatos added a comment.

Update the patch by removing target specific changes in CGBuiltin. 
Leave fshl/fshr unchanged for rotates. This actually fixes a todo in fshl/r 
test.


@nikic What do you think of the current patch?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D150670/new/

https://reviews.llvm.org/D150670

Files:
  clang/test/CodeGen/WebAssembly/wasm-rotate.c
  llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
  llvm/test/CodeGen/WebAssembly/rotate-i3264.ll
  llvm/test/Transforms/InstCombine/fsh.ll

Index: llvm/test/Transforms/InstCombine/fsh.ll
===================================================================
--- llvm/test/Transforms/InstCombine/fsh.ll
+++ llvm/test/Transforms/InstCombine/fsh.ll
@@ -440,12 +440,10 @@
   ret <2 x i32> %r
 }
 
-; TODO: Don't let SimplifyDemandedBits split up a rotate - keep the same operand.
-
 define i32 @rotl_common_demanded(i32 %a0) {
 ; CHECK-LABEL: @rotl_common_demanded(
 ; CHECK-NEXT:    [[X:%.*]] = xor i32 [[A0:%.*]], 2
-; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[A0]], i32 8)
+; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[X]], i32 8)
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %x = xor i32 %a0, 2
@@ -456,7 +454,7 @@
 define i33 @rotr_common_demanded(i33 %a0) {
 ; CHECK-LABEL: @rotr_common_demanded(
 ; CHECK-NEXT:    [[X:%.*]] = xor i33 [[A0:%.*]], 2
-; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X]], i33 [[A0]], i33 25)
+; CHECK-NEXT:    [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X]], i33 [[X]], i33 25)
 ; CHECK-NEXT:    ret i33 [[R]]
 ;
   %x = xor i33 %a0, 2
@@ -662,7 +660,8 @@
 
 define i32 @fshl_mask_args_same1(i32 %a) {
 ; CHECK-LABEL: @fshl_mask_args_same1(
-; CHECK-NEXT:    [[T2:%.*]] = lshr i32 [[A:%.*]], 16
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[A:%.*]], -65536
+; CHECK-NEXT:    [[T2:%.*]] = call i32 @llvm.fshl.i32(i32 [[T1]], i32 [[T1]], i32 16)
 ; CHECK-NEXT:    ret i32 [[T2]]
 ;
   %t1 = and i32 %a, 4294901760 ; 0xffff0000
@@ -718,7 +717,7 @@
 define <2 x i32> @fshr_mask_args_same_vector2(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @fshr_mask_args_same_vector2(
 ; CHECK-NEXT:    [[T1:%.*]] = and <2 x i32> [[A:%.*]], <i32 1000000, i32 100000>
-; CHECK-NEXT:    [[T3:%.*]] = lshr exact <2 x i32> [[T1]], <i32 3, i32 3>
+; CHECK-NEXT:    [[T3:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[T1]], <2 x i32> [[T1]], <2 x i32> <i32 29, i32 29>)
 ; CHECK-NEXT:    ret <2 x i32> [[T3]]
 ;
   %t1 = and <2 x i32> %a, <i32 1000000, i32 100000>
Index: llvm/test/CodeGen/WebAssembly/rotate-i3264.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/WebAssembly/rotate-i3264.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: sed 's/iX/i32/g' %s | llc --mtriple=wasm32-unknown-unknown | FileCheck --check-prefix=I32 %s
+; RUN: sed 's/iX/i64/g' %s | llc --mtriple=wasm64-unknown-unknown | FileCheck --check-prefix=I64 %s
+
+declare iX @llvm.fshl.iX(iX, iX, iX)
+declare iX @llvm.fshr.iX(iX, iX, iX)
+
+define iX @testLeft(iX noundef %0, iX noundef %1) {
+; I32-LABEL: testLeft:
+; I32:         .functype testLeft (i32, i32) -> (i32)
+; I32-NEXT:  # %bb.0:
+; I32-NEXT:    local.get 0
+; I32-NEXT:    local.get 1
+; I32-NEXT:    i32.rotl
+; I32-NEXT:    # fallthrough-return
+;
+; I64-LABEL: testLeft:
+; I64:         .functype testLeft (i64, i64) -> (i64)
+; I64-NEXT:  # %bb.0:
+; I64-NEXT:    local.get 0
+; I64-NEXT:    local.get 1
+; I64-NEXT:    i64.rotl
+; I64-NEXT:    # fallthrough-return
+  %3 = call iX @llvm.fshl.iX(iX %0, iX %0, iX %1)
+  ret iX %3
+}
+
+define iX @testRight(iX noundef %0, iX noundef %1) {
+; I32-LABEL: testRight:
+; I32:         .functype testRight (i32, i32) -> (i32)
+; I32-NEXT:  # %bb.0:
+; I32-NEXT:    local.get 0
+; I32-NEXT:    local.get 1
+; I32-NEXT:    i32.rotr
+; I32-NEXT:    # fallthrough-return
+;
+; I64-LABEL: testRight:
+; I64:         .functype testRight (i64, i64) -> (i64)
+; I64-NEXT:  # %bb.0:
+; I64-NEXT:    local.get 0
+; I64-NEXT:    local.get 1
+; I64-NEXT:    i64.rotr
+; I64-NEXT:    # fallthrough-return
+  %3 = call iX @llvm.fshr.iX(iX %0, iX %0, iX %1)
+  ret iX %3
+}
Index: llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -924,11 +924,13 @@
         if (II->getIntrinsicID() == Intrinsic::fshr)
           ShiftAmt = BitWidth - ShiftAmt;
 
-        APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt));
-        APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt));
-        if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Depth + 1) ||
-            SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1))
-          return I;
+        if (I->getOperand(0) != I->getOperand(1)) {
+          APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt));
+          APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt));
+          if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Depth + 1) ||
+              SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1))
+            return I;
+        }
 
         Known.Zero = LHSKnown.Zero.shl(ShiftAmt) |
                      RHSKnown.Zero.lshr(BitWidth - ShiftAmt);
Index: clang/test/CodeGen/WebAssembly/wasm-rotate.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/WebAssembly/wasm-rotate.c
@@ -0,0 +1,53 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -o - -emit-llvm %s | FileCheck --check-prefix=WEBASSEMBLY32 %s
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown -o - -emit-llvm %s | FileCheck --check-prefix=WEBASSEMBLY64 %s
+
+// WEBASSEMBLY32-LABEL: define i32 @test32
+// WEBASSEMBLY32-SAME: (i32 noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// WEBASSEMBLY32-NEXT:  entry:
+// WEBASSEMBLY32-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// WEBASSEMBLY32-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// WEBASSEMBLY32-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// WEBASSEMBLY32-NEXT:    [[AND:%.*]] = and i32 [[TMP0]], -16711936
+// WEBASSEMBLY32-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fshl.i32(i32 [[AND]], i32 [[AND]], i32 8)
+// WEBASSEMBLY32-NEXT:    ret i32 [[TMP1]]
+//
+// WEBASSEMBLY64-LABEL: define i32 @test32
+// WEBASSEMBLY64-SAME: (i32 noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// WEBASSEMBLY64-NEXT:  entry:
+// WEBASSEMBLY64-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// WEBASSEMBLY64-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// WEBASSEMBLY64-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// WEBASSEMBLY64-NEXT:    [[AND:%.*]] = and i32 [[TMP0]], -16711936
+// WEBASSEMBLY64-NEXT:    [[TMP1:%.*]] = call i32 @llvm.fshl.i32(i32 [[AND]], i32 [[AND]], i32 8)
+// WEBASSEMBLY64-NEXT:    ret i32 [[TMP1]]
+//
+unsigned int test32(unsigned int x) {
+  return __builtin_rotateleft32((x & 0xFF00FF00), 8);
+}
+
+// WEBASSEMBLY32-LABEL: define i32 @test64
+// WEBASSEMBLY32-SAME: (i32 noundef [[X:%.*]]) #[[ATTR0]] {
+// WEBASSEMBLY32-NEXT:  entry:
+// WEBASSEMBLY32-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// WEBASSEMBLY32-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// WEBASSEMBLY32-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// WEBASSEMBLY32-NEXT:    [[CONV:%.*]] = zext i32 [[TMP0]] to i64
+// WEBASSEMBLY32-NEXT:    [[AND:%.*]] = and i64 [[CONV]], -71777214294589696
+// WEBASSEMBLY32-NEXT:    [[TMP1:%.*]] = call i64 @llvm.fshl.i64(i64 [[AND]], i64 [[AND]], i64 8)
+// WEBASSEMBLY32-NEXT:    [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
+// WEBASSEMBLY32-NEXT:    ret i32 [[CONV1]]
+//
+// WEBASSEMBLY64-LABEL: define i64 @test64
+// WEBASSEMBLY64-SAME: (i64 noundef [[X:%.*]]) #[[ATTR0]] {
+// WEBASSEMBLY64-NEXT:  entry:
+// WEBASSEMBLY64-NEXT:    [[X_ADDR:%.*]] = alloca i64, align 8
+// WEBASSEMBLY64-NEXT:    store i64 [[X]], ptr [[X_ADDR]], align 8
+// WEBASSEMBLY64-NEXT:    [[TMP0:%.*]] = load i64, ptr [[X_ADDR]], align 8
+// WEBASSEMBLY64-NEXT:    [[AND:%.*]] = and i64 [[TMP0]], -71777214294589696
+// WEBASSEMBLY64-NEXT:    [[TMP1:%.*]] = call i64 @llvm.fshl.i64(i64 [[AND]], i64 [[AND]], i64 8)
+// WEBASSEMBLY64-NEXT:    ret i64 [[TMP1]]
+//
+unsigned long test64(unsigned long x) {
+  return __builtin_rotateleft64((x & 0xFF00FF00FF00FF00L), 8);
+}

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D150670: [WebAssembly] Disable generation of fshl/fshr for rotates

Reply via email to