pmatos updated this revision to Diff 526044. pmatos added a comment. Update the patch by removing target specific changes in CGBuiltin. Leave fshl/fshr unchanged for rotates. This actually fixes a todo in fshl/r test.
@nikic What do you think of the current patch? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D150670/new/ https://reviews.llvm.org/D150670 Files: clang/test/CodeGen/WebAssembly/wasm-rotate.c llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp llvm/test/CodeGen/WebAssembly/rotate-i3264.ll llvm/test/Transforms/InstCombine/fsh.ll
Index: llvm/test/Transforms/InstCombine/fsh.ll =================================================================== --- llvm/test/Transforms/InstCombine/fsh.ll +++ llvm/test/Transforms/InstCombine/fsh.ll @@ -440,12 +440,10 @@ ret <2 x i32> %r } -; TODO: Don't let SimplifyDemandedBits split up a rotate - keep the same operand. - define i32 @rotl_common_demanded(i32 %a0) { ; CHECK-LABEL: @rotl_common_demanded( ; CHECK-NEXT: [[X:%.*]] = xor i32 [[A0:%.*]], 2 -; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[A0]], i32 8) +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[X]], i32 8) ; CHECK-NEXT: ret i32 [[R]] ; %x = xor i32 %a0, 2 @@ -456,7 +454,7 @@ define i33 @rotr_common_demanded(i33 %a0) { ; CHECK-LABEL: @rotr_common_demanded( ; CHECK-NEXT: [[X:%.*]] = xor i33 [[A0:%.*]], 2 -; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X]], i33 [[A0]], i33 25) +; CHECK-NEXT: [[R:%.*]] = call i33 @llvm.fshl.i33(i33 [[X]], i33 [[X]], i33 25) ; CHECK-NEXT: ret i33 [[R]] ; %x = xor i33 %a0, 2 @@ -662,7 +660,8 @@ define i32 @fshl_mask_args_same1(i32 %a) { ; CHECK-LABEL: @fshl_mask_args_same1( -; CHECK-NEXT: [[T2:%.*]] = lshr i32 [[A:%.*]], 16 +; CHECK-NEXT: [[T1:%.*]] = and i32 [[A:%.*]], -65536 +; CHECK-NEXT: [[T2:%.*]] = call i32 @llvm.fshl.i32(i32 [[T1]], i32 [[T1]], i32 16) ; CHECK-NEXT: ret i32 [[T2]] ; %t1 = and i32 %a, 4294901760 ; 0xffff0000 @@ -718,7 +717,7 @@ define <2 x i32> @fshr_mask_args_same_vector2(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @fshr_mask_args_same_vector2( ; CHECK-NEXT: [[T1:%.*]] = and <2 x i32> [[A:%.*]], <i32 1000000, i32 100000> -; CHECK-NEXT: [[T3:%.*]] = lshr exact <2 x i32> [[T1]], <i32 3, i32 3> +; CHECK-NEXT: [[T3:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[T1]], <2 x i32> [[T1]], <2 x i32> <i32 29, i32 29>) ; CHECK-NEXT: ret <2 x i32> [[T3]] ; %t1 = and <2 x i32> %a, <i32 1000000, i32 100000> Index: llvm/test/CodeGen/WebAssembly/rotate-i3264.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/WebAssembly/rotate-i3264.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: sed 's/iX/i32/g' %s | llc --mtriple=wasm32-unknown-unknown | FileCheck --check-prefix=I32 %s +; RUN: sed 's/iX/i64/g' %s | llc --mtriple=wasm64-unknown-unknown | FileCheck --check-prefix=I64 %s + +declare iX @llvm.fshl.iX(iX, iX, iX) +declare iX @llvm.fshr.iX(iX, iX, iX) + +define iX @testLeft(iX noundef %0, iX noundef %1) { +; I32-LABEL: testLeft: +; I32: .functype testLeft (i32, i32) -> (i32) +; I32-NEXT: # %bb.0: +; I32-NEXT: local.get 0 +; I32-NEXT: local.get 1 +; I32-NEXT: i32.rotl +; I32-NEXT: # fallthrough-return +; +; I64-LABEL: testLeft: +; I64: .functype testLeft (i64, i64) -> (i64) +; I64-NEXT: # %bb.0: +; I64-NEXT: local.get 0 +; I64-NEXT: local.get 1 +; I64-NEXT: i64.rotl +; I64-NEXT: # fallthrough-return + %3 = call iX @llvm.fshl.iX(iX %0, iX %0, iX %1) + ret iX %3 +} + +define iX @testRight(iX noundef %0, iX noundef %1) { +; I32-LABEL: testRight: +; I32: .functype testRight (i32, i32) -> (i32) +; I32-NEXT: # %bb.0: +; I32-NEXT: local.get 0 +; I32-NEXT: local.get 1 +; I32-NEXT: i32.rotr +; I32-NEXT: # fallthrough-return +; +; I64-LABEL: testRight: +; I64: .functype testRight (i64, i64) -> (i64) +; I64-NEXT: # %bb.0: +; I64-NEXT: local.get 0 +; I64-NEXT: local.get 1 +; I64-NEXT: i64.rotr +; I64-NEXT: # fallthrough-return + %3 = call iX @llvm.fshr.iX(iX %0, iX %0, iX %1) + ret iX %3 +} Index: llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -924,11 +924,13 @@ if (II->getIntrinsicID() == Intrinsic::fshr) ShiftAmt = BitWidth - ShiftAmt; - APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt)); - APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt)); - if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Depth + 1) || - SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1)) - return I; + if (I->getOperand(0) != I->getOperand(1)) { + APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt)); + APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt)); + if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Depth + 1) || + SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1)) + return I; + } Known.Zero = LHSKnown.Zero.shl(ShiftAmt) | RHSKnown.Zero.lshr(BitWidth - ShiftAmt); Index: clang/test/CodeGen/WebAssembly/wasm-rotate.c =================================================================== --- /dev/null +++ clang/test/CodeGen/WebAssembly/wasm-rotate.c @@ -0,0 +1,53 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// RUN: %clang_cc1 -triple wasm32-unknown-unknown -o - -emit-llvm %s | FileCheck --check-prefix=WEBASSEMBLY32 %s +// RUN: %clang_cc1 -triple wasm64-unknown-unknown -o - -emit-llvm %s | FileCheck --check-prefix=WEBASSEMBLY64 %s + +// WEBASSEMBLY32-LABEL: define i32 @test32 +// WEBASSEMBLY32-SAME: (i32 noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// WEBASSEMBLY32-NEXT: entry: +// WEBASSEMBLY32-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// WEBASSEMBLY32-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// WEBASSEMBLY32-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 +// WEBASSEMBLY32-NEXT: [[AND:%.*]] = and i32 [[TMP0]], -16711936 +// WEBASSEMBLY32-NEXT: [[TMP1:%.*]] = call i32 @llvm.fshl.i32(i32 [[AND]], i32 [[AND]], i32 8) +// WEBASSEMBLY32-NEXT: ret i32 [[TMP1]] +// +// WEBASSEMBLY64-LABEL: define i32 @test32 +// WEBASSEMBLY64-SAME: (i32 noundef [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// WEBASSEMBLY64-NEXT: entry: +// WEBASSEMBLY64-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// WEBASSEMBLY64-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// WEBASSEMBLY64-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 +// WEBASSEMBLY64-NEXT: [[AND:%.*]] = and i32 [[TMP0]], -16711936 +// WEBASSEMBLY64-NEXT: [[TMP1:%.*]] = call i32 @llvm.fshl.i32(i32 [[AND]], i32 [[AND]], i32 8) +// WEBASSEMBLY64-NEXT: ret i32 [[TMP1]] +// +unsigned int test32(unsigned int x) { + return __builtin_rotateleft32((x & 0xFF00FF00), 8); +} + +// WEBASSEMBLY32-LABEL: define i32 @test64 +// WEBASSEMBLY32-SAME: (i32 noundef [[X:%.*]]) #[[ATTR0]] { +// WEBASSEMBLY32-NEXT: entry: +// WEBASSEMBLY32-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// WEBASSEMBLY32-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// WEBASSEMBLY32-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 +// WEBASSEMBLY32-NEXT: [[CONV:%.*]] = zext i32 [[TMP0]] to i64 +// WEBASSEMBLY32-NEXT: [[AND:%.*]] = and i64 [[CONV]], -71777214294589696 +// WEBASSEMBLY32-NEXT: [[TMP1:%.*]] = call i64 @llvm.fshl.i64(i64 [[AND]], i64 [[AND]], i64 8) +// WEBASSEMBLY32-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32 +// WEBASSEMBLY32-NEXT: ret i32 [[CONV1]] +// +// WEBASSEMBLY64-LABEL: define i64 @test64 +// WEBASSEMBLY64-SAME: (i64 noundef [[X:%.*]]) #[[ATTR0]] { +// WEBASSEMBLY64-NEXT: entry: +// WEBASSEMBLY64-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// WEBASSEMBLY64-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 +// WEBASSEMBLY64-NEXT: [[TMP0:%.*]] = load i64, ptr [[X_ADDR]], align 8 +// WEBASSEMBLY64-NEXT: [[AND:%.*]] = and i64 [[TMP0]], -71777214294589696 +// WEBASSEMBLY64-NEXT: [[TMP1:%.*]] = call i64 @llvm.fshl.i64(i64 [[AND]], i64 [[AND]], i64 8) +// WEBASSEMBLY64-NEXT: ret i64 [[TMP1]] +// +unsigned long test64(unsigned long x) { + return __builtin_rotateleft64((x & 0xFF00FF00FF00FF00L), 8); +}
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits