llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-aarch64 Author: Amina Chabane (Amichaxx) <details> <summary>Changes</summary> Issue originally raised in https://github.com/llvm/llvm-project/issues/71362#issuecomment-3028515618. Certain NEON intrinsics that operate on poly types (e.g. poly8x8_t) failed to compile with the -fno-lax-vector-conversions flag. This patch updates NeonEmitter.cpp to insert an explicit __builtin_bit_cast from poly types to the required signed integer vector types when generating lane-related intrinsics. A test 'neon-bitcast-poly.ll' is included. --- Full diff: https://github.com/llvm/llvm-project/pull/149329.diff 2 Files Affected: - (modified) clang/utils/TableGen/NeonEmitter.cpp (+9-6) - (added) llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll (+51) ``````````diff diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index 409f1c4f71834..1bd8c8b58c396 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -1401,14 +1401,17 @@ void Intrinsic::emitBodyAsBuiltinCall() { if (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling())) { CastToType.makeInteger(8, true); Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")"; - } else if (LocalCK == ClassI) { - if (CastToType.isInteger()) { - CastToType.makeSigned(); - Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")"; - } + } else if ((T.isPoly() || (T.isVector() && T.isInteger() && !T.isSigned() && + (StringRef(Name).contains("_p8") || + StringRef(Name).contains("_p16") || + StringRef(Name).contains("_p64"))))) { + CastToType.makeSigned(); + Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")"; + } else if (LocalCK == ClassI && CastToType.isInteger()) { + CastToType.makeSigned(); + Arg = "__builtin_bit_cast(" + CastToType.str() + ", " + Arg + ")"; } } - S += Arg + ", "; } diff --git a/llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll b/llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll new file mode 100644 index 0000000000000..b577eb1e34b09 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/neon-bitcast-poly.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s + +; This test verifies that NEON intrinsics using polynomial types (poly8/16/64) emit correct AArch64 instructions +; after bitcasting to signed integer vectors. These intrinsics would previously fail under -fno-lax-vector-conversions. + +define <8 x i8> @_Z18test_vcopy_lane_p811__Poly8x8_tS_(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: _Z18test_vcopy_lane_p811__Poly8x8_tS_: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v0.b[0], v1.b[0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret +entry: + %vset_lane = shufflevector <8 x i8> %b, <8 x i8> %a, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> + ret <8 x i8> %vset_lane +} + +define <4 x i16> @_Z18test_vset_lane_p16t12__Poly16x4_t(i16 %val, <4 x i16> %vec) { +; CHECK-LABEL: _Z18test_vset_lane_p16t12__Poly16x4_t: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov v0.h[0], w0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret +entry: + %vset_lane = insertelement <4 x i16> %vec, i16 %val, i64 0 + ret <4 x i16> %vset_lane +} + +define i64 @_Z18test_vget_lane_p6412__Poly64x1_t(<1 x i64> %vec){ +; CHECK-LABEL: _Z18test_vget_lane_p6412__Poly64x1_t: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %vget_lane = extractelement <1 x i64> %vec, i64 0 + ret i64 %vget_lane +} + +define <16 x i8> @_Z18test_vsetq_lane_p8h12__Poly8x16_t(i8 %val, <16 x i8> %vec){ +; CHECK-LABEL: _Z18test_vsetq_lane_p8h12__Poly8x16_t: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov v0.b[0], w0 +; CHECK-NEXT: ret +entry: + %vset_lane = insertelement <16 x i8> %vec, i8 %val, i64 0 + ret <16 x i8> %vset_lane +} `````````` </details> https://github.com/llvm/llvm-project/pull/149329 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits