simon_tatham updated this revision to Diff 239609. simon_tatham marked an inline comment as done. simon_tatham edited the summary of this revision. simon_tatham added a comment.
I've revised the MC representations of VBIC and VORR as suggested, but that was a big enough patch that I've done it separately as D73205 <https://reviews.llvm.org/D73205>. This patch now sits on top of that one. Changing VBIC and VORR meant I could do the isel for the unpredicated forms in pure Tablegen. But the predicated ones would still have needed C++, because the IR intrinsics would have wanted the immediate in its natural form, but by the time you generate an instruction, it has to be re-encoded as NEON. The simplest way was to stop adding new IR intrinsics, and instead encode the predicated instructions as a select. Then I still get to use isel lowering's conversion into VBICIMM/VORRIMM which does the immediate translation for me. Adjusting the VMOVL pattern to expect the result of my modified lowering has made all those unrelated MVE codegen tests go back to the way they were before, so the new version of this patch doesn't have to change anything there. Also added a negative llc test with an immediate that doesn't fit into VBICIMM, to prove that it gets sensibly selected as a different instruction sequence and nothing crashes. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D72934/new/ https://reviews.llvm.org/D72934 Files: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td clang/include/clang/Sema/Sema.h clang/lib/Sema/SemaChecking.cpp clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c clang/test/Sema/arm-mve-immediates.c clang/utils/TableGen/MveEmitter.cpp llvm/lib/Target/ARM/ARMISelLowering.cpp llvm/lib/Target/ARM/ARMInstrInfo.td llvm/lib/Target/ARM/ARMInstrMVE.td llvm/lib/Target/ARM/ARMInstrNEON.td llvm/test/CodeGen/Thumb2/mve-intrinsics/bitwise-imm.ll
Index: llvm/test/CodeGen/Thumb2/mve-intrinsics/bitwise-imm.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-intrinsics/bitwise-imm.ll @@ -0,0 +1,365 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <8 x i16> @test_vbicq_n_u16_sh0(<8 x i16> %a) { +; CHECK-LABEL: test_vbicq_n_u16_sh0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic.i16 q0, #0x64 +; CHECK-NEXT: bx lr +entry: + %0 = and <8 x i16> %a, <i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101> + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vbicq_n_u16_sh8(<8 x i16> %a) { +; CHECK-LABEL: test_vbicq_n_u16_sh8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic.i16 q0, #0x6400 +; CHECK-NEXT: bx lr +entry: + %0 = and <8 x i16> %a, <i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601> + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh0(<4 x i32> %a) { +; CHECK-LABEL: test_vbicq_n_u32_sh0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic.i32 q0, #0x64 +; CHECK-NEXT: bx lr +entry: + %0 = and <4 x i32> %a, <i32 -101, i32 -101, i32 -101, i32 -101> + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh8(<4 x i32> %a) { +; CHECK-LABEL: test_vbicq_n_u32_sh8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic.i32 q0, #0x6400 +; CHECK-NEXT: bx lr +entry: + %0 = and <4 x i32> %a, <i32 -25601, i32 -25601, i32 -25601, i32 -25601> + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh16(<4 x i32> %a) { +; CHECK-LABEL: test_vbicq_n_u32_sh16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic.i32 q0, #0x640000 +; CHECK-NEXT: bx lr +entry: + %0 = and <4 x i32> %a, <i32 -6553601, i32 -6553601, i32 -6553601, i32 -6553601> + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh24(<4 x i32> %a) { +; CHECK-LABEL: test_vbicq_n_u32_sh24: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic.i32 q0, #0x64000000 +; CHECK-NEXT: bx lr +entry: + %0 = and <4 x i32> %a, <i32 -1677721601, i32 -1677721601, i32 -1677721601, i32 -1677721601> + ret <4 x i32> %0 +} + +; The immediate in this case is legal for a VMVN but not for a VBIC, +; so in this case we expect to see the constant being prepared in +; another register. +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_illegal(<4 x i32> %a) { +; CHECK-LABEL: test_vbicq_n_u32_illegal: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn.i32 q1, #0x54ff +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = and <4 x i32> %a, <i32 -21760, i32 -21760, i32 -21760, i32 -21760> + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vorrq_n_u16_sh0(<8 x i16> %a) { +; CHECK-LABEL: test_vorrq_n_u16_sh0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr.i16 q0, #0x64 +; CHECK-NEXT: bx lr +entry: + %0 = or <8 x i16> %a, <i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100> + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vorrq_n_u16_sh8(<8 x i16> %a) { +; CHECK-LABEL: test_vorrq_n_u16_sh8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr.i16 q0, #0x6400 +; CHECK-NEXT: bx lr +entry: + %0 = or <8 x i16> %a, <i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600> + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh0(<4 x i32> %a) { +; CHECK-LABEL: test_vorrq_n_u32_sh0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr.i32 q0, #0x64 +; CHECK-NEXT: bx lr +entry: + %0 = or <4 x i32> %a, <i32 100, i32 100, i32 100, i32 100> + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh8(<4 x i32> %a) { +; CHECK-LABEL: test_vorrq_n_u32_sh8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr.i32 q0, #0x6400 +; CHECK-NEXT: bx lr +entry: + %0 = or <4 x i32> %a, <i32 25600, i32 25600, i32 25600, i32 25600> + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh16(<4 x i32> %a) { +; CHECK-LABEL: test_vorrq_n_u32_sh16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr.i32 q0, #0x640000 +; CHECK-NEXT: bx lr +entry: + %0 = or <4 x i32> %a, <i32 6553600, i32 6553600, i32 6553600, i32 6553600> + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh24(<4 x i32> %a) { +; CHECK-LABEL: test_vorrq_n_u32_sh24: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr.i32 q0, #0x64000000 +; CHECK-NEXT: bx lr +entry: + %0 = or <4 x i32> %a, <i32 1677721600, i32 1677721600, i32 1677721600, i32 1677721600> + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vbicq_m_n_u16_sh0(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vbicq_m_n_u16_sh0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict.i16 q0, #0x64 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = and <8 x i16> %a, <i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101, i16 -101> + %3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vbicq_m_n_u16_sh8(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vbicq_m_n_u16_sh8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict.i16 q0, #0x6400 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = and <8 x i16> %a, <i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601, i16 -25601> + %3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh0(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vbicq_m_n_u32_sh0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict.i32 q0, #0x64 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = and <4 x i32> %a, <i32 -101, i32 -101, i32 -101, i32 -101> + %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh8(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vbicq_m_n_u32_sh8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict.i32 q0, #0x6400 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = and <4 x i32> %a, <i32 -25601, i32 -25601, i32 -25601, i32 -25601> + %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh16(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vbicq_m_n_u32_sh16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict.i32 q0, #0x640000 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = and <4 x i32> %a, <i32 -6553601, i32 -6553601, i32 -6553601, i32 -6553601> + %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh24(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vbicq_m_n_u32_sh24: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict.i32 q0, #0x64000000 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = and <4 x i32> %a, <i32 -1677721601, i32 -1677721601, i32 -1677721601, i32 -1677721601> + %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vorrq_m_n_u16_sh0(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vorrq_m_n_u16_sh0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt.i16 q0, #0x64 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = or <8 x i16> %a, <i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100, i16 100> + %3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vorrq_m_n_u16_sh8(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vorrq_m_n_u16_sh8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt.i16 q0, #0x6400 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = or <8 x i16> %a, <i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600, i16 25600> + %3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh0(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vorrq_m_n_u32_sh0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt.i32 q0, #0x64 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = or <4 x i32> %a, <i32 100, i32 100, i32 100, i32 100> + %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh8(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vorrq_m_n_u32_sh8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt.i32 q0, #0x6400 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = or <4 x i32> %a, <i32 25600, i32 25600, i32 25600, i32 25600> + %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh16(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vorrq_m_n_u32_sh16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt.i32 q0, #0x640000 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = or <4 x i32> %a, <i32 6553600, i32 6553600, i32 6553600, i32 6553600> + %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh24(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vorrq_m_n_u32_sh24: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt.i32 q0, #0x64000000 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = or <4 x i32> %a, <i32 1677721600, i32 1677721600, i32 1677721600, i32 1677721600> + %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_n_u16() { +; CHECK-LABEL: test_vmvnq_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn.i16 q0, #0xaa00 +; CHECK-NEXT: bx lr +entry: + ret <8 x i16> <i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521> +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_n_u32() { +; CHECK-LABEL: test_vmvnq_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn.i32 q0, #0xaa00 +; CHECK-NEXT: bx lr +entry: + ret <4 x i32> <i32 -43521, i32 -43521, i32 -43521, i32 -43521> +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_m_n_u16(<8 x i16> %inactive, i16 zeroext %p) { +; CHECK-LABEL: test_vmvnq_m_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmvnt.i16 q0, #0xaa00 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = select <8 x i1> %1, <8 x i16> <i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521, i16 -43521>, <8 x i16> %inactive + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_m_n_u32(<4 x i32> %inactive, i16 zeroext %p) { +; CHECK-LABEL: test_vmvnq_m_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmvnt.i32 q0, #0xaa00 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = select <4 x i1> %1, <4 x i32> <i32 -43521, i32 -43521, i32 -43521, i32 -43521>, <4 x i32> %inactive + ret <4 x i32> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) Index: llvm/lib/Target/ARM/ARMInstrNEON.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrNEON.td +++ llvm/lib/Target/ARM/ARMInstrNEON.td @@ -509,11 +509,6 @@ def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>; def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>; -def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, - SDTCisVT<2, i32>]>; -def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; -def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; - def NEONvbsl : SDNode<"ARMISD::VBSL", SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, @@ -5296,7 +5291,7 @@ IIC_VMOVImm, "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", [(set DPR:$Vd, - (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { + (v4i16 (ARMvorrImm DPR:$src, timm:$SIMM)))]> { let Inst{9} = SIMM{9}; } @@ -5305,7 +5300,7 @@ IIC_VMOVImm, "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", [(set DPR:$Vd, - (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { + (v2i32 (ARMvorrImm DPR:$src, timm:$SIMM)))]> { let Inst{10-9} = SIMM{10-9}; } @@ -5314,7 +5309,7 @@ IIC_VMOVImm, "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", [(set QPR:$Vd, - (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { + (v8i16 (ARMvorrImm QPR:$src, timm:$SIMM)))]> { let Inst{9} = SIMM{9}; } @@ -5323,7 +5318,7 @@ IIC_VMOVImm, "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", [(set QPR:$Vd, - (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { + (v4i32 (ARMvorrImm QPR:$src, timm:$SIMM)))]> { let Inst{10-9} = SIMM{10-9}; } @@ -5347,7 +5342,7 @@ IIC_VMOVImm, "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", [(set DPR:$Vd, - (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { + (v4i16 (ARMvbicImm DPR:$src, timm:$SIMM)))]> { let Inst{9} = SIMM{9}; } @@ -5356,7 +5351,7 @@ IIC_VMOVImm, "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", [(set DPR:$Vd, - (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { + (v2i32 (ARMvbicImm DPR:$src, timm:$SIMM)))]> { let Inst{10-9} = SIMM{10-9}; } @@ -5365,7 +5360,7 @@ IIC_VMOVImm, "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", [(set QPR:$Vd, - (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { + (v8i16 (ARMvbicImm QPR:$src, timm:$SIMM)))]> { let Inst{9} = SIMM{9}; } @@ -5374,7 +5369,7 @@ IIC_VMOVImm, "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", [(set QPR:$Vd, - (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { + (v4i32 (ARMvbicImm QPR:$src, timm:$SIMM)))]> { let Inst{10-9} = SIMM{10-9}; } Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1367,37 +1367,51 @@ let Inst{3-0} = imm{3-0}; } -class MVE_VORR<string suffix, bit hw, Operand imm_type> - : MVE_bit_cmode<"vorr", suffix, hw, (ins MQPR:$Qd_src, imm_type:$imm)> { - let Inst{5} = 0b0; - let validForTailPredication = 1; +multiclass MVE_bit_cmode_p<string iname, bit opcode, + MVEVectorVTInfo VTI, Operand imm_type, SDNode op> { + def "" : MVE_bit_cmode<iname, VTI.Suffix, VTI.Size{0}, + (ins MQPR:$Qd_src, imm_type:$imm)> { + let Inst{5} = opcode; + let validForTailPredication = 1; + } + + defvar Inst = !cast<Instruction>(NAME); + defvar UnpredPat = (VTI.Vec (op (VTI.Vec MQPR:$src), timm:$simm)); + + let Predicates = [HasMVEInt] in { + def : Pat<UnpredPat, (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm))>; + def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), + UnpredPat, (VTI.Vec MQPR:$src))), + (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm, + ARMVCCThen, (VTI.Pred VCCR:$pred)))>; + } +} + +multiclass MVE_VORRimm<MVEVectorVTInfo VTI, Operand imm_type> { + defm "": MVE_bit_cmode_p<"vorr", 0, VTI, imm_type, ARMvorrImm>; +} +multiclass MVE_VBICimm<MVEVectorVTInfo VTI, Operand imm_type> { + defm "": MVE_bit_cmode_p<"vbic", 1, VTI, imm_type, ARMvbicImm>; } -def MVE_VORRimmi16 : MVE_VORR<"i16", 1, nImmSplatI16>; -def MVE_VORRimmi32 : MVE_VORR<"i32", 0, nImmSplatI32>; +defm MVE_VORRimmi16 : MVE_VORRimm<MVE_v8i16, nImmSplatI16>; +defm MVE_VORRimmi32 : MVE_VORRimm<MVE_v4i32, nImmSplatI32>; +defm MVE_VBICimmi16 : MVE_VBICimm<MVE_v8i16, nImmSplatI16>; +defm MVE_VBICimmi32 : MVE_VBICimm<MVE_v4i32, nImmSplatI32>; def MVE_VORNimmi16 : MVEInstAlias<"vorn${vp}.i16\t$Qd, $imm", (MVE_VORRimmi16 MQPR:$Qd, nImmSplatNotI16:$imm, vpred_n:$vp), 0>; def MVE_VORNimmi32 : MVEInstAlias<"vorn${vp}.i32\t$Qd, $imm", (MVE_VORRimmi32 MQPR:$Qd, nImmSplatNotI32:$imm, vpred_n:$vp), 0>; -def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm", - (MVE_VORR MQPR:$Qd, MQPR:$Qm, MQPR:$Qm, vpred_r:$vp)>; - -class MVE_VBIC<string suffix, bit hw, Operand imm_type> - : MVE_bit_cmode<"vbic", suffix, hw, (ins MQPR:$Qd_src, imm_type:$imm)> { - let Inst{5} = 0b1; - let validForTailPredication = 1; -} - -def MVE_VBICimmi16 : MVE_VBIC<"i16", 1, nImmSplatI16>; -def MVE_VBICimmi32 : MVE_VBIC<"i32", 0, nImmSplatI32>; - def MVE_VANDimmi16 : MVEInstAlias<"vand${vp}.i16\t$Qd, $imm", (MVE_VBICimmi16 MQPR:$Qd, nImmSplatNotI16:$imm, vpred_n:$vp), 0>; def MVE_VANDimmi32 : MVEInstAlias<"vand${vp}.i32\t$Qd, $imm", (MVE_VBICimmi32 MQPR:$Qd, nImmSplatNotI32:$imm, vpred_n:$vp), 0>; +def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm", + (MVE_VORR MQPR:$Qd, MQPR:$Qm, MQPR:$Qm, vpred_r:$vp)>; + class MVE_VMOV_lane_direction { bit bit_20; dag oops; @@ -2206,6 +2220,15 @@ def : Pat<(v4f32 (ARMvmovFPImm timm:$simm)), (v4f32 (MVE_VMOVimmf32 nImmVMOVF32:$simm))>; + + def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (ARMvmvnImm timm:$simm), + MQPR:$inactive)), + (v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm, + ARMVCCThen, VCCR:$pred, MQPR:$inactive))>; + def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (ARMvmvnImm timm:$simm), + MQPR:$inactive)), + (v4i32 (MVE_VMVNimmi32 nImmSplatI32:$simm, + ARMVCCThen, VCCR:$pred, MQPR:$inactive))>; } class MVE_VMINMAXA<string iname, string suffix, bits<2> size, @@ -2341,7 +2364,7 @@ def : Pat<(and (v4i32 MQPR:$src), (v4i32 (ARMvmovImm (i32 0xCFF)))), (MVE_VMOVLu16bh MQPR:$src)>; // zext_inreg 8 -> 16 - def : Pat<(and (v8i16 MQPR:$src), (v8i16 (ARMvmovImm (i32 0x8FF)))), + def : Pat<(ARMvbicImm (v8i16 MQPR:$src), (i32 0xAFF)), (MVE_VMOVLu8bh MQPR:$src)>; } Index: llvm/lib/Target/ARM/ARMInstrInfo.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrInfo.td +++ llvm/lib/Target/ARM/ARMInstrInfo.td @@ -274,6 +274,10 @@ def ARMvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; def ARMvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; +def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def ARMvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; +def ARMvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -12176,7 +12176,7 @@ APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; - if (BVN && Subtarget->hasNEON() && + if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) && BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VbicVT; @@ -12483,7 +12483,7 @@ APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; - if (BVN && Subtarget->hasNEON() && + if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) && BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VorrVT; Index: clang/utils/TableGen/MveEmitter.cpp =================================================================== --- clang/utils/TableGen/MveEmitter.cpp +++ clang/utils/TableGen/MveEmitter.cpp @@ -883,38 +883,41 @@ break; case ImmediateArg::BoundsType::UInt: lo = 0; - hi = IA.i1; + hi = llvm::APInt::getMaxValue(IA.i1).zext(128); break; } - llvm::APInt typelo, typehi; - unsigned Bits = IA.ArgType->sizeInBits(); - if (cast<ScalarType>(IA.ArgType)->kind() == ScalarTypeKind::SignedInt) { - typelo = llvm::APInt::getSignedMinValue(Bits).sext(128); - typehi = llvm::APInt::getSignedMaxValue(Bits).sext(128); - } else { - typelo = llvm::APInt::getMinValue(Bits).zext(128); - typehi = llvm::APInt::getMaxValue(Bits).zext(128); - } - std::string Index = utostr(kv.first); - if (lo.sle(typelo) && hi.sge(typehi)) - SemaChecks.push_back("SemaBuiltinConstantArg(TheCall, " + Index + ")"); - else + // Emit a range check if the legal range of values for the + // immediate is smaller than the _possible_ range of values for + // its type. + unsigned ArgTypeBits = IA.ArgType->sizeInBits(); + llvm::APInt ArgTypeRange = llvm::APInt::getMaxValue(ArgTypeBits).zext(128); + llvm::APInt ActualRange = (hi-lo).trunc(64).sext(128); + if (ActualRange.ult(ArgTypeRange)) SemaChecks.push_back("SemaBuiltinConstantArgRange(TheCall, " + Index + ", " + signedHexLiteral(lo) + ", " + signedHexLiteral(hi) + ")"); if (!IA.ExtraCheckType.empty()) { std::string Suffix; - if (!IA.ExtraCheckArgs.empty()) - Suffix = (Twine(", ") + IA.ExtraCheckArgs).str(); + if (!IA.ExtraCheckArgs.empty()) { + std::string tmp; + StringRef Arg = IA.ExtraCheckArgs; + if (Arg == "!lanesize") { + tmp = utostr(IA.ArgType->sizeInBits()); + Arg = tmp; + } + Suffix = (Twine(", ") + Arg).str(); + } SemaChecks.push_back((Twine("SemaBuiltinConstantArg") + IA.ExtraCheckType + "(TheCall, " + Index + Suffix + ")") .str()); } + + assert(!SemaChecks.empty()); } if (SemaChecks.empty()) return ""; Index: clang/test/Sema/arm-mve-immediates.c =================================================================== --- clang/test/Sema/arm-mve-immediates.c +++ clang/test/Sema/arm-mve-immediates.c @@ -203,3 +203,73 @@ vsriq(vw, vw, 0); // expected-error {{argument value 0 is outside the valid range [1, 32]}} vsriq(vw, vw, 33); // expected-error {{argument value 33 is outside the valid range [1, 32]}} } + +void test_simd_bic_orr(int16x8_t h, int32x4_t w) +{ + h = vbicq(h, 0x0000); + h = vbicq(h, 0x0001); + h = vbicq(h, 0x00FF); + h = vbicq(h, 0x0100); + h = vbicq(h, 0x0101); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}} + h = vbicq(h, 0x01FF); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}} + h = vbicq(h, 0xFF00); + + w = vbicq(w, 0x00000000); + w = vbicq(w, 0x00000001); + w = vbicq(w, 0x000000FF); + w = vbicq(w, 0x00000100); + w = vbicq(w, 0x0000FF00); + w = vbicq(w, 0x00010000); + w = vbicq(w, 0x00FF0000); + w = vbicq(w, 0x01000000); + w = vbicq(w, 0xFF000000); + w = vbicq(w, 0x01000001); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}} + w = vbicq(w, 0x01FFFFFF); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}} + + h = vorrq(h, 0x0000); + h = vorrq(h, 0x0001); + h = vorrq(h, 0x00FF); + h = vorrq(h, 0x0100); + h = vorrq(h, 0x0101); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}} + h = vorrq(h, 0x01FF); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}} + h = vorrq(h, 0xFF00); + + w = vorrq(w, 0x00000000); + w = vorrq(w, 0x00000001); + w = vorrq(w, 0x000000FF); + w = vorrq(w, 0x00000100); + w = vorrq(w, 0x0000FF00); + w = vorrq(w, 0x00010000); + w = vorrq(w, 0x00FF0000); + w = vorrq(w, 0x01000000); + w = vorrq(w, 0xFF000000); + w = vorrq(w, 0x01000001); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}} + w = vorrq(w, 0x01FFFFFF); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}} +} + +void test_simd_vmvn(void) +{ + uint16x8_t h; + h = vmvnq_n_u16(0x0000); + h = vmvnq_n_u16(0x0001); + h = vmvnq_n_u16(0x00FF); + h = vmvnq_n_u16(0x0100); + h = vmvnq_n_u16(0x0101); // expected-error {{argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF}} + h = vmvnq_n_u16(0x01FF); + h = vmvnq_n_u16(0xFF00); + + uint32x4_t w; + w = vmvnq_n_u32(0x00000000); + w = vmvnq_n_u32(0x00000001); + w = vmvnq_n_u32(0x000000FF); + w = vmvnq_n_u32(0x00000100); + w = vmvnq_n_u32(0x0000FF00); + w = vmvnq_n_u32(0x00010000); + w = vmvnq_n_u32(0x00FF0000); + w = vmvnq_n_u32(0x01000000); + w = vmvnq_n_u32(0xFF000000); + w = vmvnq_n_u32(0x01000001); // expected-error {{argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF}} + w = vmvnq_n_u32(0x01FFFFFF); // expected-error {{argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF}} + w = vmvnq_n_u32(0x0001FFFF); // expected-error {{argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF}} + w = vmvnq_n_u32(0x000001FF); +} Index: clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c =================================================================== --- /dev/null +++ clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c @@ -0,0 +1,402 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include <arm_mve.h> + +// CHECK-LABEL: @test_vbicq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], <i16 11007, i16 11007, i16 11007, i16 11007, i16 11007, i16 11007, i16 11007, i16 11007> +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vbicq_n_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vbicq(a, 0xd500); +#else /* POLYMORPHIC */ + return vbicq_n_s16(a, 0xd500); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], <i32 -252, i32 -252, i32 -252, i32 -252> +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vbicq_n_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vbicq(a, 0xfb); +#else /* POLYMORPHIC */ + return vbicq_n_s32(a, 0xfb); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], <i16 -243, i16 -243, i16 -243, i16 -243, i16 -243, i16 -243, i16 -243, i16 -243> +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vbicq_n_u16(uint16x8_t a) +{ +#ifdef POLYMORPHIC + return vbicq(a, 0xf2); +#else /* POLYMORPHIC */ + return vbicq_n_u16(a, 0xf2); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], <i32 -8193, i32 -8193, i32 -8193, i32 -8193> +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vbicq_n_u32(uint32x4_t a) +{ +#ifdef POLYMORPHIC + return vbicq(a, 0x2000); +#else /* POLYMORPHIC */ + return vbicq_n_u32(a, 0x2000); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], <i16 195, i16 195, i16 195, i16 195, i16 195, i16 195, i16 195, i16 195> +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vorrq_n_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vorrq(a, 0xc3); +#else /* POLYMORPHIC */ + return vorrq_n_s16(a, 0xc3); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], <i32 65536, i32 65536, i32 65536, i32 65536> +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vorrq_n_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vorrq(a, 0x10000); +#else /* POLYMORPHIC */ + return vorrq_n_s32(a, 0x10000); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], <i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096> +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vorrq_n_u16(uint16x8_t a) +{ +#ifdef POLYMORPHIC + return vorrq(a, 0xf000); +#else /* POLYMORPHIC */ + return vorrq_n_u16(a, 0xf000); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], <i32 8978432, i32 8978432, i32 8978432, i32 8978432> +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vorrq_n_u32(uint32x4_t a) +{ +#ifdef POLYMORPHIC + return vorrq(a, 0x890000); +#else /* POLYMORPHIC */ + return vorrq_n_u32(a, 0x890000); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret <8 x i16> <i16 27391, i16 27391, i16 27391, i16 27391, i16 27391, i16 27391, i16 27391, i16 27391> +// +int16x8_t test_vmvnq_n_s16() +{ + return vmvnq_n_s16(0x9500); +} + +// CHECK-LABEL: @test_vmvnq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret <4 x i32> <i32 -5570561, i32 -5570561, i32 -5570561, i32 -5570561> +// +int32x4_t test_vmvnq_n_s32() +{ + return vmvnq_n_s32(0x550000); +} + +// CHECK-LABEL: @test_vmvnq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret <8 x i16> <i16 -18689, i16 -18689, i16 -18689, i16 -18689, i16 -18689, i16 -18689, i16 -18689, i16 -18689> +// +uint16x8_t test_vmvnq_n_u16() +{ + return vmvnq_n_u16(0x4900); +} + +// CHECK-LABEL: @test_vmvnq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret <4 x i32> <i32 1023410175, i32 1023410175, i32 1023410175, i32 1023410175> +// +uint32x4_t test_vmvnq_n_u32() +{ + return vmvnq_n_u32(0xc3000000); +} + +// CHECK-LABEL: @test_vbicq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], <i16 -11265, i16 -11265, i16 -11265, i16 -11265, i16 -11265, i16 -11265, i16 -11265, i16 -11265> +// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]] +// CHECK-NEXT: ret <8 x i16> [[TMP3]] +// +int16x8_t test_vbicq_m_n_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vbicq_m_n(a, 0x2c00, p); +#else /* POLYMORPHIC */ + return vbicq_m_n_s16(a, 0x2c00, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], <i32 -13893633, i32 -13893633, i32 -13893633, i32 -13893633> +// CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]] +// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// +int32x4_t test_vbicq_m_n_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vbicq_m_n(a, 0xd40000, p); +#else /* POLYMORPHIC */ + return vbicq_m_n_s32(a, 0xd40000, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], <i16 -37, i16 -37, i16 -37, i16 -37, i16 -37, i16 -37, i16 -37, i16 -37> +// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]] +// CHECK-NEXT: ret <8 x i16> [[TMP3]] +// +uint16x8_t test_vbicq_m_n_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vbicq_m_n(a, 0x24, p); +#else /* POLYMORPHIC */ + return vbicq_m_n_u16(a, 0x24, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], <i32 -1644167169, i32 -1644167169, i32 -1644167169, i32 -1644167169> +// CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]] +// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// +uint32x4_t test_vbicq_m_n_u32(uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vbicq_m_n(a, 0x62000000, p); +#else /* POLYMORPHIC */ + return vbicq_m_n_u32(a, 0x62000000, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], <i16 13568, i16 13568, i16 13568, i16 13568, i16 13568, i16 13568, i16 13568, i16 13568> +// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]] +// CHECK-NEXT: ret <8 x i16> [[TMP3]] +// +int16x8_t test_vorrq_m_n_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vorrq_m_n(a, 0x3500, p); +#else /* POLYMORPHIC */ + return vorrq_m_n_s16(a, 0x3500, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], <i32 654311424, i32 654311424, i32 654311424, i32 654311424> +// CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]] +// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// +int32x4_t test_vorrq_m_n_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vorrq_m_n(a, 0x27000000, p); +#else /* POLYMORPHIC */ + return vorrq_m_n_s32(a, 0x27000000, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], <i16 175, i16 175, i16 175, i16 175, i16 175, i16 175, i16 175, i16 175> +// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]] +// CHECK-NEXT: ret <8 x i16> [[TMP3]] +// +uint16x8_t test_vorrq_m_n_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vorrq_m_n(a, 0xaf, p); +#else /* POLYMORPHIC */ + return vorrq_m_n_u16(a, 0xaf, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], <i32 89, i32 89, i32 89, i32 89> +// CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]] +// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// +uint32x4_t test_vorrq_m_n_u32(uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vorrq_m_n(a, 0x59, p); +#else /* POLYMORPHIC */ + return vorrq_m_n_u32(a, 0x59, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 -3841, i16 -3841, i16 -3841, i16 -3841, i16 -3841, i16 -3841, i16 -3841, i16 -3841>, <8 x i16> [[INACTIVE:%.*]] +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmvnq_m_n_s16(int16x8_t inactive, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, 0xf00, p); +#else /* POLYMORPHIC */ + return vmvnq_m_n_s16(inactive, 0xf00, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -18945, i32 -18945, i32 -18945, i32 -18945>, <4 x i32> [[INACTIVE:%.*]] +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmvnq_m_n_s32(int32x4_t inactive, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, 0x4a00, p); +#else /* POLYMORPHIC */ + return vmvnq_m_n_s32(inactive, 0x4a00, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 23295, i16 23295, i16 23295, i16 23295, i16 23295, i16 23295, i16 23295, i16 23295>, <8 x i16> [[INACTIVE:%.*]] +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmvnq_m_n_u16(uint16x8_t inactive, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, 0xa500, p); +#else /* POLYMORPHIC */ + return vmvnq_m_n_u16(inactive, 0xa500, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -63489, i32 -63489, i32 -63489, i32 -63489>, <4 x i32> [[INACTIVE:%.*]] +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmvnq_m_n_u32(uint32x4_t inactive, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, 0xf800, p); +#else /* POLYMORPHIC */ + return vmvnq_m_n_u32(inactive, 0xf800, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_x_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 767, i16 767, i16 767, i16 767, i16 767, i16 767, i16 767, i16 767>, <8 x i16> undef +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmvnq_x_n_s16(mve_pred16_t p) +{ + return vmvnq_x_n_s16(0xfd00, p); +} + +// CHECK-LABEL: @test_vmvnq_x_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -12189697, i32 -12189697, i32 -12189697, i32 -12189697>, <4 x i32> undef +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmvnq_x_n_s32(mve_pred16_t p) +{ + return vmvnq_x_n_s32(0xba0000, p); +} + +// CHECK-LABEL: @test_vmvnq_x_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 -21505, i16 -21505, i16 -21505, i16 -21505, i16 -21505, i16 -21505, i16 -21505, i16 -21505>, <8 x i16> undef +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmvnq_x_n_u16(mve_pred16_t p) +{ + return vmvnq_x_n_u16(0x5400, p); +} + +// CHECK-LABEL: @test_vmvnq_x_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -4865, i32 -4865, i32 -4865, i32 -4865>, <4 x i32> undef +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmvnq_x_n_u32(mve_pred16_t p) +{ + return vmvnq_x_n_u32(0x1300, p); +} + Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -5592,7 +5592,8 @@ /// SemaBuiltinConstantArgShiftedByte - Check if argument ArgNum of TheCall is /// a constant expression representing an arbitrary byte value shifted left by /// a multiple of 8 bits. -bool Sema::SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum) { +bool Sema::SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum, + unsigned ArgBits) { llvm::APSInt Result; // We can't check the value of a dependent argument. @@ -5604,6 +5605,10 @@ if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) return true; + // Truncate to the given size. + Result = Result.getLoBits(ArgBits); + Result.setIsUnsigned(true); + if (IsShiftedByte(Result)) return false; @@ -5617,7 +5622,8 @@ /// 0x00FF, 0x01FF, ..., 0xFFFF). This strange range check is needed for some /// Arm MVE intrinsics. bool Sema::SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall, - int ArgNum) { + int ArgNum, + unsigned ArgBits) { llvm::APSInt Result; // We can't check the value of a dependent argument. @@ -5629,6 +5635,10 @@ if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) return true; + // Truncate to the given size. + Result = Result.getLoBits(ArgBits); + Result.setIsUnsigned(true); + // Check to see if it's in either of the required forms. if (IsShiftedByte(Result) || (Result > 0 && Result < 0x10000 && (Result & 0xFF) == 0xFF)) Index: clang/include/clang/Sema/Sema.h =================================================================== --- clang/include/clang/Sema/Sema.h +++ clang/include/clang/Sema/Sema.h @@ -11709,8 +11709,10 @@ bool SemaBuiltinConstantArgMultiple(CallExpr *TheCall, int ArgNum, unsigned Multiple); bool SemaBuiltinConstantArgPower2(CallExpr *TheCall, int ArgNum); - bool SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum); - bool SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall, int ArgNum); + bool SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum, + unsigned ArgBits); + bool SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall, int ArgNum, + unsigned ArgBits); bool SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall, int ArgNum, unsigned ExpectedFieldNum, bool AllowName); Index: clang/include/clang/Basic/arm_mve_defs.td =================================================================== --- clang/include/clang/Basic/arm_mve_defs.td +++ clang/include/clang/Basic/arm_mve_defs.td @@ -319,6 +319,7 @@ int base = base_; Type type = type_; } +def IB_ExtraArg_LaneSize; // ----------------------------------------------------------------------------- // End-user definitions for immediate arguments. @@ -327,11 +328,13 @@ // intrinsics like vmvnq or vorrq. imm_simd_restrictive has to be an 8-bit // value shifted left by a whole number of bytes; imm_simd_vmvn can also be of // the form 0xXXFF for some byte value XX. -def imm_simd_restrictive : Immediate<u32, IB_UEltValue> { +def imm_simd_restrictive : Immediate<Scalar, IB_UEltValue> { let extra = "ShiftedByte"; + let extraarg = "!lanesize"; } -def imm_simd_vmvn : Immediate<u32, IB_UEltValue> { +def imm_simd_vmvn : Immediate<Scalar, IB_UEltValue> { let extra = "ShiftedByteOrXXFF"; + let extraarg = "!lanesize"; } // imm_1toN can take any value from 1 to N inclusive, where N is the number of @@ -457,26 +460,31 @@ // A wrapper to define both _m and _x versions of a predicated // intrinsic. +// +// We provide optional parameters to override the polymorphic name +// types separately for the _m and _x variants, because sometimes they +// polymorph differently (typically because the type of the inactive +// parameter can be used as a disambiguator if it's present). multiclass IntrinsicMX<Type rettype, dag arguments, dag cg, int wantXVariant = 1, string nameSuffix = "", + PolymorphicNameType pnt_m = PNT_Type, PolymorphicNameType pnt_x = PNT_Type> { // The _m variant takes an initial parameter called $inactive, which // provides the input value of the output register, i.e. all the // inactive lanes in the predicated operation take their values from // this. def "_m" # nameSuffix: - Intrinsic<rettype, !con((args rettype:$inactive), arguments), cg>; + Intrinsic<rettype, !con((args rettype:$inactive), arguments), cg> { + let pnt = pnt_m; + } foreach unusedVar = !if(!eq(wantXVariant, 1), [1], []<int>) in { // The _x variant leaves off that parameter, and simply uses an // undef value of the same type. + def "_x" # nameSuffix: - Intrinsic<rettype, arguments, (seq (undef rettype):$inactive, cg)> { - // Allow overriding of the polymorphic name type, because - // sometimes the _m and _x variants polymorph differently - // (typically because the type of the inactive parameter can be - // used as a disambiguator if it's present). + Intrinsic<rettype, arguments, (seq (undef rettype):$inactive, cg)> { let pnt = pnt_x; } } Index: clang/include/clang/Basic/arm_mve.td =================================================================== --- clang/include/clang/Basic/arm_mve.td +++ clang/include/clang/Basic/arm_mve.td @@ -116,6 +116,28 @@ NameOverride<"vmulq">; } +let params = !listconcat(T.Int16, T.Int32) in { + let pnt = PNT_None in { + def vmvnq_n: Intrinsic<Vector, (args imm_simd_vmvn:$imm), + (not (splat (Scalar $imm)))>; + } + defm vmvnq: IntrinsicMX<Vector, (args imm_simd_vmvn:$imm, Predicate:$pred), + (select $pred, (not (splat (Scalar $imm))), $inactive), + 1, "_n", PNT_NType, PNT_None>; + let pnt = PNT_NType in { + def vbicq_n: Intrinsic<Vector, (args Vector:$v, imm_simd_restrictive:$imm), + (and $v, (not (splat (Scalar $imm))))>; + def vorrq_n: Intrinsic<Vector, (args Vector:$v, imm_simd_restrictive:$imm), + (or $v, (splat (Scalar $imm)))>; + } + def vbicq_m_n: Intrinsic< + Vector, (args Vector:$v, imm_simd_restrictive:$imm, Predicate:$pred), + (select $pred, (and $v, (not (splat (Scalar $imm)))), $v)>; + def vorrq_m_n: Intrinsic< + Vector, (args Vector:$v, imm_simd_restrictive:$imm, Predicate:$pred), + (select $pred, (or $v, (splat (Scalar $imm))), $v)>; +} + // The bitcasting below is not overcomplicating the IR because while // Vector and UVector may be different vector types at the C level i.e. // vectors of same size signed/unsigned ints. Once they're lowered
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits