https://github.com/pawosm-arm updated https://github.com/llvm/llvm-project/pull/183307
>From c34a6bb2b45d62aa5f79458dff5dd9399d47de55 Mon Sep 17 00:00:00 2001 From: Paul Osmialowski <[email protected]> Date: Tue, 10 Feb 2026 20:29:46 +0000 Subject: [PATCH 1/2] RFC: clang, libc: Extend the ext_vector_type attribute to support the scalable vector sizes This patch is a work-in-progress snapshot of the ongoing process to introduce a portable way of representing the scalable vector data types, or extend existing general vector data types with the ability to cover the scalable vectors too. This is far from being complete, the test coverage is insufficient, the discussion behind it is still ongoing, yet we would like to share this, just to find out what any other potential users of this may think about it. The `ext_vector_type` attribute has been added to Clang in order to introduce the OpenCL vector types. In case of Arm's NEON, the types annotated with this attribute are compatible with the `neon_vector_type` types, and can be used by the intrinsics defined in the arm-neon.h header. The Clang's builtin functions that deal with vector data types (e.g., `__builtin_vectorelements()`) also see no difference in the vector data types defined with either of these attributes. Also, the `[]` operator works correctly with those data types. The `ext_vector_type` attribute is not supported by GCC, but it is so neat that there were proposals to introduce it in there, see [1]. The libc's C++ support layer contains this libc/src/__support/CPP/simd.h header file, which uses the same attribute to introduce a portable simd data type. Using the `ext_vector_type attribute` (as it is being done in the libc's simd.h header file) is a neat way of representing vector data types in the architecture agnostic manner. In order to extend this with ability to cover the scalable vectors, there seem to be two possible solutions: 1. To introduce a new attribute, e.g., `ext_scalable_vector_type`. 2. To extend the existing `ext_vector_type` attribute with the ability to encode scalable vector sizes in some way, e.g., with negative numbers. In this patch I'm exploring the second path (to extend currently existing attribute), yet most of the challenges that it introduces will be similar also when we take the first path. [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88602 --- clang/lib/Sema/SemaType.cpp | 10 + clang/lib/Sema/TreeTransform.h | 5 +- clang/test/CodeGen/64bit-swiftcall.c | 128 +++++++++++ clang/test/CodeGen/arm64-abi-sve.c | 230 ++++++++++++++++++++ clang/test/CodeGen/builtin_vectorelements.c | 34 ++- libc/src/__support/CPP/simd.h | 47 ++-- libc/test/src/__support/CPP/simd_test.cpp | 14 ++ 7 files changed, 443 insertions(+), 25 deletions(-) create mode 100644 clang/test/CodeGen/arm64-abi-sve.c diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 28d1d63ff7acf..4248c4e6c945d 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -2435,6 +2435,16 @@ QualType Sema::BuildExtVectorType(QualType T, Expr *SizeExpr, } if (VecSize->isNegative()) { + if (Context.getTargetInfo().hasFeature("sve")) { + // The length of an SVE vector type is only known at runtime, but it is + // always a multiple of 128bits. + unsigned NumEls = 128U / Context.getTypeSize(T); + unsigned NF = static_cast<unsigned>(-1L * VecSize->getZExtValue()); + QualType Result = Context.getScalableVectorType(T, NumEls * NF); + if (!Result.isNull()) + return Result; + } + Diag(SizeExpr->getExprLoc(), diag::err_attribute_vec_negative_size); return QualType(); } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index fb32b0e70e3c9..1329f8d9967e9 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -5445,6 +5445,9 @@ TypeSourceInfo *TreeTransform<Derived>::TransformType(TypeSourceInfo *TSI) { QualType Result = getDerived().TransformType(TLB, TL); if (Result.isNull()) return nullptr; + if (isa<DependentSizedExtVectorType>(TL.getType()) && + isa<BuiltinType>(Result)) + return SemaRef.Context.CreateTypeSourceInfo(Result); return TLB.getTypeSourceInfo(SemaRef.Context, Result); } @@ -6132,7 +6135,7 @@ QualType TreeTransform<Derived>::TransformDependentSizedExtVectorType( DependentSizedExtVectorTypeLoc NewTL = TLB.push<DependentSizedExtVectorTypeLoc>(Result); NewTL.setNameLoc(TL.getNameLoc()); - } else { + } else if (!isa<BuiltinType>(Result)) { ExtVectorTypeLoc NewTL = TLB.push<ExtVectorTypeLoc>(Result); NewTL.setNameLoc(TL.getNameLoc()); } diff --git a/clang/test/CodeGen/64bit-swiftcall.c b/clang/test/CodeGen/64bit-swiftcall.c index 448bca7acbca3..cc60ac0f6844c 100644 --- a/clang/test/CodeGen/64bit-swiftcall.c +++ b/clang/test/CodeGen/64bit-swiftcall.c @@ -2,6 +2,7 @@ // RUN: %clang_cc1 -no-enable-noundef-analysis -triple x86_64-apple-darwin10 -target-cpu core2 -emit-llvm -o - %s | FileCheck %s --check-prefix=X86-64 // RUN: %clang_cc1 -no-enable-noundef-analysis -triple arm64-apple-ios9 -target-cpu cyclone -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -no-enable-noundef-analysis -triple arm64-apple-ios9 -target-cpu cyclone -emit-llvm -o - %s | FileCheck %s --check-prefix=ARM64 +// RUN: %clang_cc1 -no-enable-noundef-analysis -triple arm64-apple-ios9 -target-feature +sve -emit-llvm -o - %s | FileCheck %s --check-prefixes=ARM64,ARM64-SVE // REQUIRES: aarch64-registered-target,x86-registered-target @@ -1059,3 +1060,130 @@ TEST(vector_union) // CHECK-LABEL: define swiftcc { float, float, float, float } @return_vector_union() // CHECK-LABEL: define swiftcc void @take_vector_union(float %0, float %1, float %2, float %3) + +#if defined(__ARM_FEATURE_SVE) + +#define SCALABLE_SIZE(N) (-1 * ((signed)(N))) + +typedef float svfloat1 __attribute__((ext_vector_type(SCALABLE_SIZE(1)))); +typedef float svfloat4 __attribute__((ext_vector_type(SCALABLE_SIZE(4)))); +typedef double svdouble1 __attribute__((ext_vector_type(SCALABLE_SIZE(1)))); +typedef double svdouble4 __attribute__((ext_vector_type(SCALABLE_SIZE(4)))); +typedef int svint1 __attribute__((ext_vector_type(SCALABLE_SIZE(1)))); +typedef int svint4 __attribute__((ext_vector_type(SCALABLE_SIZE(4)))); +typedef signed char svchar1 __attribute__((ext_vector_type(SCALABLE_SIZE(1)))); +typedef signed char svchar4 __attribute__((ext_vector_type(SCALABLE_SIZE(4)))); +typedef short svshort1 __attribute__((ext_vector_type(SCALABLE_SIZE(1)))); +typedef short svshort4 __attribute__((ext_vector_type(SCALABLE_SIZE(4)))); +typedef long long svlong1 __attribute__((ext_vector_type(SCALABLE_SIZE(1)))); +typedef long long svlong4 __attribute__((ext_vector_type(SCALABLE_SIZE(4)))); + +TEST(__SVFloat32_t) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return___SVFloat32_t() +// ARM64-SVE: ret [[SVFLOAT1_T:.+]] %0 + +TEST(svfloat1) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return_svfloat1() +// ARM64-SVE: ret [[SVFLOAT1_T]] %0 +// ARM64-SVE-LABEL: define{{.*}} swiftcc void @take_svfloat1(<vscale x 4 x float> %v) + +TEST(__clang_svfloat32x4_t) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return___clang_svfloat32x4_t() +// ARM64-SVE: ret [[SVFLOAT4_T:.+]] %0 + +TEST(svfloat4) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return_svfloat4() +// ARM64-SVE: ret [[SVFLOAT4_T]] %0 +// ARM64-SVE-LABEL: define{{.*}} swiftcc void @take_svfloat4(<vscale x 4 x float> %v.coerce0, <vscale x 4 x float> %v.coerce1, <vscale x 4 x float> %v.coerce2, <vscale x 4 x float> %v.coerce3) + +TEST(__SVFloat64_t) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return___SVFloat64_t() +// ARM64-SVE: ret [[SVDOUBLE1_T:.+]] %0 + +TEST(svdouble1) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return_svdouble1() +// ARM64-SVE: ret [[SVDOUBLE1_T]] %0 +// ARM64-SVE-LABEL: define{{.*}} swiftcc void @take_svdouble1(<vscale x 2 x double> %v) + +TEST(__clang_svfloat64x4_t) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return___clang_svfloat64x4_t() +// ARM64-SVE: ret [[SVDOUBLE4_T:.+]] %0 + +TEST(svdouble4) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return_svdouble4() +// ARM64-SVE: ret [[SVDOUBLE4_T]] %0 +// ARM64-SVE-LABEL: define{{.*}} swiftcc void @take_svdouble4(<vscale x 2 x double> %v.coerce0, <vscale x 2 x double> %v.coerce1, <vscale x 2 x double> %v.coerce2, <vscale x 2 x double> %v.coerce3) + +TEST(__SVInt32_t) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return___SVInt32_t() +// ARM64-SVE: ret [[SVINT1_T:.+]] %0 + +TEST(svint1) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return_svint1() +// ARM64-SVE: ret [[SVINT1_T]] %0 +// ARM64-SVE-LABEL: define{{.*}} swiftcc void @take_svint1(<vscale x 4 x i32> %v) + +TEST(__clang_svint32x4_t) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return___clang_svint32x4_t() +// ARM64-SVE: ret [[SVINT4_T:.+]] %0 + +TEST(svint4) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return_svint4() +// ARM64-SVE: ret [[SVINT4_T]] %0 +// ARM64-SVE-LABEL: define{{.*}} swiftcc void @take_svint4(<vscale x 4 x i32> %v.coerce0, <vscale x 4 x i32> %v.coerce1, <vscale x 4 x i32> %v.coerce2, <vscale x 4 x i32> %v.coerce3) + +TEST(__SVInt8_t) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return___SVInt8_t() +// ARM64-SVE: ret [[SVCHAR1_T:.+]] %0 + +TEST(svchar1) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return_svchar1() +// ARM64-SVE: ret [[SVCHAR1_T]] %0 +// ARM64-SVE-LABEL: define{{.*}} swiftcc void @take_svchar1(<vscale x 16 x i8> %v) + +TEST(__clang_svint8x4_t) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return___clang_svint8x4_t() +// ARM64-SVE: ret [[SVCHAR4_T:.+]] %0 + +TEST(svchar4) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return_svchar4() +// ARM64-SVE: ret [[SVCHAR4_T]] %0 +// ARM64-SVE-LABEL: define{{.*}} swiftcc void @take_svchar4(<vscale x 16 x i8> %v.coerce0, <vscale x 16 x i8> %v.coerce1, <vscale x 16 x i8> %v.coerce2, <vscale x 16 x i8> %v.coerce3) + +TEST(__SVInt16_t) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return___SVInt16_t() +// ARM64-SVE: ret [[SVSHORT1_T:.+]] %0 + +TEST(svshort1) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return_svshort1() +// ARM64-SVE: ret [[SVSHORT1_T]] %0 +// ARM64-SVE-LABEL: define{{.*}} swiftcc void @take_svshort1(<vscale x 8 x i16> %v) + +TEST(__clang_svint16x4_t) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return___clang_svint16x4_t() +// ARM64-SVE: ret [[SVSHORT4_T:.+]] %0 + +TEST(svshort4) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return_svshort4() +// ARM64-SVE: ret [[SVSHORT4_T]] %0 +// ARM64-SVE-LABEL: define{{.*}} swiftcc void @take_svshort4(<vscale x 8 x i16> %v.coerce0, <vscale x 8 x i16> %v.coerce1, <vscale x 8 x i16> %v.coerce2, <vscale x 8 x i16> %v.coerce3) + +TEST(__SVInt64_t) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return___SVInt64_t() +// ARM64-SVE: ret [[SVLONG1_T:.+]] %0 + +TEST(svlong1) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return_svlong1() +// ARM64-SVE: ret [[SVLONG1_T]] %0 +// ARM64-SVE-LABEL: define{{.*}} swiftcc void @take_svlong1(<vscale x 2 x i64> %v) + +TEST(__clang_svint64x4_t) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return___clang_svint64x4_t() +// ARM64-SVE: ret [[SVLONG4_T:.+]] %0 + +TEST(svlong4) +// ARM64-SVE-LABEL: define{{.*}} swiftcc {{.+}} @return_svlong4() +// ARM64-SVE: ret [[SVLONG4_T]] %0 +// ARM64-SVE-LABEL: define{{.*}} swiftcc void @take_svlong4(<vscale x 2 x i64> %v.coerce0, <vscale x 2 x i64> %v.coerce1, <vscale x 2 x i64> %v.coerce2, <vscale x 2 x i64> %v.coerce3) + +#endif /* defined(__ARM_FEATURE_SVE) */ diff --git a/clang/test/CodeGen/arm64-abi-sve.c b/clang/test/CodeGen/arm64-abi-sve.c new file mode 100644 index 0000000000000..9570867d76e6e --- /dev/null +++ b/clang/test/CodeGen/arm64-abi-sve.c @@ -0,0 +1,230 @@ +// RUN: %clang_cc1 -triple arm64-apple-ios7 -target-abi darwinpcs -target-feature +sve -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-linux-android -target-feature +sve -emit-llvm -o - %s | FileCheck %s + +#include <stdarg.h> + +#define SCALABLE_SIZE(N) (-1 * ((signed)(N))) + +typedef __attribute__(( ext_vector_type(SCALABLE_SIZE(1)) )) char __char1s; +typedef __attribute__(( ext_vector_type(SCALABLE_SIZE(2)) )) char __char2s; +typedef __attribute__(( ext_vector_type(SCALABLE_SIZE(3)) )) char __char3s; +typedef __attribute__(( ext_vector_type(SCALABLE_SIZE(4)) )) char __char4s; +typedef __attribute__(( ext_vector_type(SCALABLE_SIZE(1)) )) short __short1s; +typedef __attribute__(( ext_vector_type(SCALABLE_SIZE(2)) )) short __short2s; +typedef __attribute__(( ext_vector_type(SCALABLE_SIZE(3)) )) short __short3s; +typedef __attribute__(( ext_vector_type(SCALABLE_SIZE(1)) )) int __int1s; +typedef __attribute__(( ext_vector_type(SCALABLE_SIZE(4)) )) int __int4s; +typedef __attribute__(( ext_vector_type(SCALABLE_SIZE(1)) )) double __double1s; +typedef __attribute__(( ext_vector_type(SCALABLE_SIZE(2)) )) double __double2s; + +double svfunc__char1s(__char1s arg); + +double vec_s1c(int fixed, __char1s c1s) { +// CHECK-LABEL: @vec_s1c +// CHECK: [[PTR:%.*]] = alloca <vscale x 16 x i8>, align 16 +// CHECK: store <vscale x 16 x i8> %c1s, ptr [[PTR]], align 16 +// CHECK: [[CALL:%.*]] = call double @svfunc__char1s(<vscale x 16 x i8> {{%.*}}) + double sum = fixed; + + return sum + svfunc__char1s(c1s); +} + +double test_s1c(__char1s *in) { +// CHECK-LABEL: @test_s1c +// CHECK: call double @vec_s1c(i32 noundef 1, <vscale x 16 x i8> {{%.*}}) + return vec_s1c(1, *in); +} + +double svfunc__char2s(__char2s arg); + +double vec_s2c(int fixed, __char2s c2s) { +// CHECK-LABEL: @vec_s2c +// CHECK: [[PTR:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 +// CHECK: {{%.*}} = insertvalue {{.*}} poison, {{.*}}.coerce{{.*}} +// CHECK-COUNT-1: {{%.*}} = insertvalue {{.*}} {{%.*}}, {{.*}}.coerce{{.*}} +// CHECK: store { <vscale x 16 x i8>, <vscale x 16 x i8> } {{%.*}}, ptr [[PTR]], align 16 +// CHECK: [[CALL:%.*]] = call double @svfunc__char2s(<vscale x 16 x i8> {{%.*}}.extract0, <vscale x 16 x i8> {{%.*}}.extract1) + double sum = fixed; + + return sum + svfunc__char2s(c2s); +} + +double test_s2c(__char2s *in) { +// CHECK-LABEL: @test_s2c +// CHECK: call double @vec_s2c(i32 noundef 1, <vscale x 16 x i8> {{%.*}}.extract0, <vscale x 16 x i8> {{%.*}}.extract1) + return vec_s2c(1, *in); +} + +double svfunc__char3s(__char3s arg); + +double vec_s3c(int fixed, __char3s c3s) { +// CHECK-LABEL: @vec_s3c +// CHECK: [[PTR:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 +// CHECK: {{%.*}} = insertvalue {{.*}} poison, {{.*}}.coerce{{.*}} +// CHECK-COUNT-2: {{%.*}} = insertvalue {{.*}} {{%.*}}, {{.*}}.coerce{{.*}} +// CHECK: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } {{%.*}}, ptr [[PTR]], align 16 +// CHECK: [[CALL:%.*]] = call double @svfunc__char3s(<vscale x 16 x i8> {{%.*}}.extract0, <vscale x 16 x i8> {{%.*}}.extract1, <vscale x 16 x i8> {{%.*}}.extract2) + double sum = fixed; + + return sum + svfunc__char3s(c3s); +} + +double test_s3c(__char3s *in) { +// CHECK-LABEL: @test_s3c +// CHECK: call double @vec_s3c(i32 noundef 1, <vscale x 16 x i8> {{%.*}}.extract0, <vscale x 16 x i8> {{%.*}}.extract1, <vscale x 16 x i8> {{%.*}}.extract2) + return vec_s3c(1, *in); +} + +double svfunc__char4s(__char4s arg); + +double vec_s4c(int fixed, __char4s c4s) { +// CHECK-LABEL: @vec_s4c +// CHECK: [[PTR:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16 +// CHECK: {{%.*}} = insertvalue {{.*}} poison, {{.*}}.coerce{{.*}} +// CHECK-COUNT-3: {{%.*}} = insertvalue {{.*}} {{%.*}}, {{.*}}.coerce{{.*}} +// CHECK: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } {{%.*}}, ptr [[PTR]], align 16 +// CHECK: [[CALL:%.*]] = call double @svfunc__char4s(<vscale x 16 x i8> {{%.*}}.extract0, <vscale x 16 x i8> {{%.*}}.extract1, <vscale x 16 x i8> {{%.*}}.extract2, <vscale x 16 x i8> {{%.*}}.extract3) + double sum = fixed; + + return sum + svfunc__char4s(c4s); +} + +double test_s4c(__char4s *in) { +// CHECK-LABEL: @test_s4c +// CHECK: call double @vec_s4c(i32 noundef 1, <vscale x 16 x i8> {{%.*}}.extract0, <vscale x 16 x i8> {{%.*}}.extract1, <vscale x 16 x i8> {{%.*}}.extract2, <vscale x 16 x i8> {{%.*}}.extract3) + return vec_s4c(1, *in); +} + +double svfunc__short1s(__short1s arg); + +double vec_s1s(int fixed, __short1s s1s) { +// CHECK-LABEL: @vec_s1s +// CHECK: [[PTR:%.*]] = alloca <vscale x 8 x i16>, align 16 +// CHECK: store <vscale x 8 x i16> %s1s, ptr [[PTR]], align 16 +// CHECK: [[CALL:%.*]] = call double @svfunc__short1s(<vscale x 8 x i16> {{%.*}}) + double sum = fixed; + + return sum + svfunc__short1s(s1s); +} + +double test_s1s(__short1s *in) { +// CHECK-LABEL: @test_s1s +// CHECK: call double @vec_s1s(i32 noundef 1, <vscale x 8 x i16> {{%.*}}) + return vec_s1s(1, *in); +} + +double svfunc__short2s(__short2s arg); + +double vec_s2s(int fixed, __short2s s2s) { +// CHECK-LABEL: @vec_s2s +// CHECK: [[PTR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK: {{%.*}} = insertvalue {{.*}} poison, {{.*}}.coerce{{.*}} +// CHECK-COUNT-1: {{%.*}} = insertvalue {{.*}} {{%.*}}, {{.*}}.coerce{{.*}} +// CHECK: store { <vscale x 8 x i16>, <vscale x 8 x i16> } {{%.*}}, ptr [[PTR]], align 16 +// CHECK: [[CALL:%.*]] = call double @svfunc__short2s(<vscale x 8 x i16> {{%.*}}.extract0, <vscale x 8 x i16> {{%.*}}.extract1) + double sum = fixed; + + return sum + svfunc__short2s(s2s); +} + +double test_s2s(__short2s *in) { +// CHECK-LABEL: @test_s2s +// CHECK: call double @vec_s2s(i32 noundef 1, <vscale x 8 x i16> {{%.*}}.extract0, <vscale x 8 x i16> {{%.*}}.extract1) + return vec_s2s(1, *in); +} + +double svfunc__short3s(__short3s arg); + +double vec_s3s(int fixed, __short3s s3s) { +// CHECK-LABEL: @vec_s3s +// CHECK: [[PTR:%.*]] = alloca { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> }, align 16 +// CHECK: {{%.*}} = insertvalue {{.*}} poison, {{.*}}.coerce{{.*}} +// CHECK-COUNT-2: {{%.*}} = insertvalue {{.*}} {{%.*}}, {{.*}}.coerce{{.*}} +// CHECK: store { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } {{%.*}}, ptr [[PTR]], align 16 +// CHECK: [[CALL:%.*]] = call double @svfunc__short3s(<vscale x 8 x i16> {{%.*}}.extract0, <vscale x 8 x i16> {{%.*}}.extract1, <vscale x 8 x i16> {{%.*}}.extract2) + double sum = fixed; + + return sum + svfunc__short3s(s3s); +} + +double test_s3s(__short3s *in) { +// CHECK-LABEL: @test_s3s +// CHECK: call double @vec_s3s(i32 noundef 1, <vscale x 8 x i16> {{%.*}}.extract0, <vscale x 8 x i16> {{%.*}}.extract1, <vscale x 8 x i16> {{%.*}}.extract2) + return vec_s3s(1, *in); +} + +double svfunc__int1s(__int1s arg); + +double vec_s1i(int fixed, __int1s i1s) { +// CHECK-LABEL: @vec_s1i +// CHECK: [[PTR:%.*]] = alloca <vscale x 4 x i32>, align 16 +// CHECK: store <vscale x 4 x i32> %i1s, ptr [[PTR]], align 16 +// CHECK: [[CALL:%.*]] = call double @svfunc__int1s(<vscale x 4 x i32> {{%.*}}) + double sum = fixed; + + return sum + svfunc__int1s(i1s); +} + +double test_s1i(__int1s *in) { +// CHECK-LABEL: @test_s1i +// CHECK: call double @vec_s1i(i32 noundef 1, <vscale x 4 x i32> {{%.*}}) + return vec_s1i(1, *in); +} + +double svfunc__int4s(__int4s arg); + +double vec_s4i(int fixed, __int4s i4s) { +// CHECK-LABEL: @vec_s4i +// CHECK: [[PTR:%.*]] = alloca { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> }, align 16 +// CHECK: {{%.*}} = insertvalue {{.*}} poison, {{.*}}.coerce{{.*}} +// CHECK-COUNT-3: {{%.*}} = insertvalue {{.*}} {{%.*}}, {{.*}}.coerce{{.*}} +// CHECK: store { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } {{%.*}}, ptr [[PTR]], align 16 +// CHECK: [[CALL:%.*]] = call double @svfunc__int4s(<vscale x 4 x i32> {{%.*}}.extract0, <vscale x 4 x i32> {{%.*}}.extract1, <vscale x 4 x i32> {{%.*}}.extract2, <vscale x 4 x i32> {{%.*}}.extract3) + double sum = fixed; + + return sum + svfunc__int4s(i4s); +} + +double test_s4i(__int4s *in) { +// CHECK-LABEL: @test_s4i +// CHECK: call double @vec_s4i(i32 noundef 1, <vscale x 4 x i32> {{%.*}}.extract0, <vscale x 4 x i32> {{%.*}}.extract1, <vscale x 4 x i32> {{%.*}}.extract2, <vscale x 4 x i32> {{%.*}}.extract3) + return vec_s4i(1, *in); +} + +double svfunc__double1s(__double1s arg); + +double vec_s1d(int fixed, __double1s d1s) { +// CHECK-LABEL: @vec_s1d +// CHECK: [[PTR:%.*]] = alloca <vscale x 2 x double>, align 16 +// CHECK: store <vscale x 2 x double> %d1s, ptr [[PTR]], align 16 +// CHECK: [[CALL:%.*]] = call double @svfunc__double1s(<vscale x 2 x double> {{%.*}}) + double sum = fixed; + + return sum + svfunc__double1s(d1s); +} + +double test_s1d(__double1s *in) { +// CHECK-LABEL: @test_s1d +// CHECK: call double @vec_s1d(i32 noundef 1, <vscale x 2 x double> {{%.*}}) + return vec_s1d(1, *in); +} + +double svfunc__double2s(__double2s arg); + +double vec_s2d(int fixed, __double2s d2s) { +// CHECK-LABEL: @vec_s2d +// CHECK: [[PTR:%.*]] = alloca { <vscale x 2 x double>, <vscale x 2 x double> }, align 16 +// CHECK: {{%.*}} = insertvalue {{.*}} poison, {{.*}}.coerce{{.*}} +// CHECK-COUNT-1: {{%.*}} = insertvalue {{.*}} {{%.*}}, {{.*}}.coerce{{.*}} +// CHECK: store { <vscale x 2 x double>, <vscale x 2 x double> } {{%.*}}, ptr [[PTR]], align 16 +// CHECK: [[CALL:%.*]] = call double @svfunc__double2s(<vscale x 2 x double> {{%.*}}.extract0, <vscale x 2 x double> {{%.*}}.extract1) + double sum = fixed; + + return sum + svfunc__double2s(d2s); +} + +double test_s2d(__double2s *in) { +// CHECK-LABEL: @test_s2d +// CHECK: call double @vec_s2d(i32 noundef 1, <vscale x 2 x double> {{%.*}}.extract0, <vscale x 2 x double> {{%.*}}.extract1) + return vec_s2d(1, *in); +} diff --git a/clang/test/CodeGen/builtin_vectorelements.c b/clang/test/CodeGen/builtin_vectorelements.c index 45f7a3c34562b..7047bd930419f 100644 --- a/clang/test/CodeGen/builtin_vectorelements.c +++ b/clang/test/CodeGen/builtin_vectorelements.c @@ -1,13 +1,7 @@ -// RUN: %clang_cc1 -O1 -triple x86_64 %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK %s - -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -O1 -triple aarch64 -target-feature +neon %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,NEON %s - -// REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -O1 -triple aarch64 -target-feature +sve %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,SVE %s - -// REQUIRES: riscv-registered-target -// RUN: %clang_cc1 -O1 -triple riscv64 -target-feature +v %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,RISCV %s +// RUN: %clang_cc1 -O1 -triple x86_64 %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK %s +// RUN: %if aarch64-registered-target %{ %clang_cc1 -O1 -triple aarch64 -target-feature +neon %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,NEON %s %} +// RUN: %if aarch64-registered-target %{ %clang_cc1 -O1 -triple aarch64 -target-feature +sve %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,SVE %s %} +// RUN: %if riscv-registered-target %{ %clang_cc1 -O1 -triple riscv64 -target-feature +v %s -emit-llvm -disable-llvm-passes -o - | FileCheck --check-prefixes=CHECK,RISCV %s %} /// Note that this does not make sense to check for x86 SIMD types, because /// __m128i, __m256i, and __m512i do not specify the element type. There are no @@ -19,6 +13,10 @@ typedef int int8 __attribute__((vector_size(32))); typedef int int16 __attribute__((vector_size(64))); typedef float float2 __attribute__((vector_size(8))); typedef long extLong4 __attribute__((ext_vector_type(4))); +#if defined(__ARM_FEATURE_SVE) +#define SCALABLE_SIZE(N) (-1 * ((signed)(N))) +typedef long extLong1s __attribute__((ext_vector_type(SCALABLE_SIZE(1)))); +#endif int test_builtin_vectorelements_int1() { @@ -82,6 +80,22 @@ int test_builtin_vectorelements_neon64x1() { #if defined(__ARM_FEATURE_SVE) #include <arm_sve.h> +long test_builtin_vectorelements_sve64() { + // SVE: i64 @test_builtin_vectorelements_sve64( + // SVE: [[VSCALE:%.+]] = call i64 [[I64_VSCALE_CALL:@llvm.vscale.i64]]() + // SVE: [[RES:%.+]] = mul nuw i64 [[VSCALE]], [[I64_MUL:2]] + // SVE: ret i64 [[RES]] + return __builtin_vectorelements(svuint64_t); +} + +long test_builtin_vectorelements_extLong1s() { + // SVE-LABEL: i64 @test_builtin_vectorelements_extLong1s( + // SVE: [[VSCALE:%.+]] = call i64 [[I64_VSCALE_CALL]]() + // SVE: [[RES:%.+]] = mul nuw i64 %0, [[I64_MUL]] + // SVE: ret i64 [[RES]] + return __builtin_vectorelements(extLong1s); +} + long test_builtin_vectorelements_sve32() { // SVE: i64 @test_builtin_vectorelements_sve32( // SVE: [[VSCALE:%.+]] = call i64 @llvm.vscale.i64() diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h index 422d2f4c8433d..3eb8779fc5dfb 100644 --- a/libc/src/__support/CPP/simd.h +++ b/libc/src/__support/CPP/simd.h @@ -32,6 +32,9 @@ namespace LIBC_NAMESPACE_DECL { namespace cpp { +template <size_t N> +constexpr signed scalable_size = -1 * static_cast<signed>(N); + namespace internal { #if defined(LIBC_TARGET_CPU_HAS_AVX512F) @@ -52,7 +55,9 @@ template <typename T> LIBC_INLINE constexpr size_t native_vector_size = 1; // Type aliases. template <typename T, size_t N> using fixed_size_simd = T [[clang::ext_vector_type(N)]]; -template <typename T, size_t N = internal::native_vector_size<T>> +template <typename T, size_t N> +using scalable_size_simd = T [[clang::ext_vector_type(scalable_size<N>)]]; +template <typename T, auto N = internal::native_vector_size<T>> using simd = T [[clang::ext_vector_type(N)]]; template <typename T> using simd_mask = simd<bool, internal::native_vector_size<T>>; @@ -64,18 +69,21 @@ struct simd_size : cpp::integral_constant<size_t, __builtin_vectorelements(T)> { template <class T> constexpr size_t simd_size_v = simd_size<T>::value; template <typename T> struct is_simd : cpp::integral_constant<bool, false> {}; -template <typename T, unsigned N> +template <typename T, auto N> struct is_simd<simd<T, N>> : cpp::integral_constant<bool, true> {}; template <class T> constexpr bool is_simd_v = is_simd<T>::value; +template <auto N> +constexpr bool is_scalable_size_v = static_cast<signed>(N) < 0; + template <typename T> struct is_simd_mask : cpp::integral_constant<bool, false> {}; -template <unsigned N> +template <auto N> struct is_simd_mask<simd<bool, N>> : cpp::integral_constant<bool, true> {}; template <class T> constexpr bool is_simd_mask_v = is_simd_mask<T>::value; template <typename T> struct simd_element_type; -template <typename T, size_t N> struct simd_element_type<simd<T, N>> { +template <typename T, auto N> struct simd_element_type<simd<T, N>> { using type = T; }; template <typename T> @@ -153,6 +161,13 @@ using enable_if_integral_t = cpp::enable_if_t<cpp::is_integral_v<T>, T>; template <typename T> using enable_if_simd_t = cpp::enable_if_t<is_simd_v<T>, bool>; +template <auto N> +using enable_if_scalable_size_t = cpp::enable_if_t<is_scalable_size_v<N>, bool>; + +template <auto N> +using enable_if_not_scalable_size_t = + cpp::enable_if_t<!is_scalable_size_v<N>, bool>; + } // namespace internal // Casting. @@ -356,27 +371,31 @@ LIBC_INLINE constexpr static void compress(simd<bool, simd_size_v<T>> mask, T v, } // Construction helpers. -template <typename T, size_t N> +template <typename T, auto N = internal::native_vector_size<T>, + internal::enable_if_not_scalable_size_t<N> = 0> LIBC_INLINE constexpr static simd<T, N> splat(T v) { return simd<T, N>(v); } -template <typename T> LIBC_INLINE constexpr static simd<T> splat(T v) { - return splat<T, simd_size_v<simd<T>>>(v); +template <typename T, auto N = internal::native_vector_size<T>, + internal::enable_if_scalable_size_t<N> = 0> +LIBC_INLINE constexpr static simd<T, N> splat(T v) { + simd<T, N> sv; + size_t n = __builtin_vectorelements(simd<T, N>); + for (unsigned i = 0U; i < n; ++i) + sv[i] = v; + return sv; } -template <typename T, unsigned N> +template <typename T, auto N = internal::native_vector_size<T>> LIBC_INLINE constexpr static simd<T, N> iota(T base = T(0), T step = T(1)) { simd<T, N> v{}; - for (unsigned i = 0; i < N; ++i) + size_t n = __builtin_vectorelements(simd<T, N>); + for (unsigned i = 0U; i < n; ++i) v[i] = base + T(i) * step; return v; } -template <typename T> -LIBC_INLINE constexpr static simd<T> iota(T base = T(0), T step = T(1)) { - return iota<T, simd_size_v<simd<T>>>(base, step); -} // Conditional helpers. -template <typename T, size_t N> +template <typename T, auto N> LIBC_INLINE constexpr static simd<T, N> select(simd<bool, N> m, simd<T, N> x, simd<T, N> y) { return m ? x : y; diff --git a/libc/test/src/__support/CPP/simd_test.cpp b/libc/test/src/__support/CPP/simd_test.cpp index 8bead8461d649..ba052cbef682d 100644 --- a/libc/test/src/__support/CPP/simd_test.cpp +++ b/libc/test/src/__support/CPP/simd_test.cpp @@ -148,3 +148,17 @@ TEST(LlvmLibcSIMDTest, MaskedCompressExpand) { EXPECT_TRUE(cpp::all_of(!mask_expand || v2 <= SIZE / 2)); } + +#if defined(LIBC_TARGET_CPU_HAS_SVE) || defined(LIBC_TARGET_CPU_HAS_SVE2) + +TEST(LlvmLibcSIMDTest, SizelessVectorCreation) { + cpp::simd<int, cpp::scalable_size<1>> svsplat = cpp::splat(5); + cpp::simd<int, cpp::scalable_size<1>> sviota = cpp::iota(0); + + EXPECT_EQ(svsplat[0], 5); + EXPECT_EQ(svsplat[1], 5); + EXPECT_EQ(sviota[0], 0); + EXPECT_EQ(sviota[1], 1); +} + +#endif >From 64b253173b014df076258159717df7880c75eca5 Mon Sep 17 00:00:00 2001 From: Paul Osmialowski <[email protected]> Date: Sun, 1 Mar 2026 09:20:09 +0000 Subject: [PATCH 2/2] Add optional param to ext_vector_type for scalable vectors --- clang/include/clang/AST/ASTContext.h | 4 +- clang/include/clang/AST/TypeBase.h | 9 ++- clang/include/clang/AST/TypeProperties.td | 5 +- clang/include/clang/Basic/Attr.td | 2 +- clang/include/clang/Basic/AttrDocs.td | 7 +- clang/include/clang/Sema/Sema.h | 2 +- clang/lib/AST/ASTContext.cpp | 30 +++++--- clang/lib/AST/ASTImporter.cpp | 3 +- clang/lib/AST/Type.cpp | 8 ++- clang/lib/Sema/HLSLExternalSemaSource.cpp | 2 +- clang/lib/Sema/SemaType.cpp | 39 ++++++---- clang/lib/Sema/TreeTransform.h | 48 ++++++++----- clang/test/CodeGen/64bit-swiftcall.c | 2 +- clang/test/CodeGen/arm64-abi-sve.c | 2 +- clang/test/CodeGen/builtin_vectorelements.c | 2 +- libc/src/__support/CPP/simd.h | 79 ++++++++++----------- libc/test/src/__support/CPP/simd_test.cpp | 4 +- 17 files changed, 145 insertions(+), 103 deletions(-) diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index c8d6de1689512..947e7fa2578f7 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -1778,8 +1778,8 @@ class ASTContext : public RefCountedBase<ASTContext> { /// /// FIXME: We will need these to be uniqued, or at least comparable, at some /// point. - QualType getDependentSizedExtVectorType(QualType VectorType, - Expr *SizeExpr, + QualType getDependentSizedExtVectorType(QualType VectorType, Expr *SizeExpr, + Expr *ScalableExpr, SourceLocation AttrLoc) const; /// Return the unique reference to the matrix type of the specified element diff --git a/clang/include/clang/AST/TypeBase.h b/clang/include/clang/AST/TypeBase.h index 2bec5131dc0d2..a399240416faf 100644 --- a/clang/include/clang/AST/TypeBase.h +++ b/clang/include/clang/AST/TypeBase.h @@ -4103,6 +4103,7 @@ class DependentSizedExtVectorType : public Type, public llvm::FoldingSetNode { friend class ASTContext; Expr *SizeExpr; + Expr *ScalableExpr; /// The element type of the array. QualType ElementType; @@ -4110,10 +4111,12 @@ class DependentSizedExtVectorType : public Type, public llvm::FoldingSetNode { SourceLocation loc; DependentSizedExtVectorType(QualType ElementType, QualType can, - Expr *SizeExpr, SourceLocation loc); + Expr *SizeExpr, Expr *ScalableExpr, + SourceLocation loc); public: Expr *getSizeExpr() const { return SizeExpr; } + Expr *getScalableExpr() const { return ScalableExpr; } QualType getElementType() const { return ElementType; } SourceLocation getAttributeLoc() const { return loc; } @@ -4125,11 +4128,11 @@ class DependentSizedExtVectorType : public Type, public llvm::FoldingSetNode { } void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context) { - Profile(ID, Context, getElementType(), getSizeExpr()); + Profile(ID, Context, getElementType(), getSizeExpr(), getScalableExpr()); } static void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context, - QualType ElementType, Expr *SizeExpr); + QualType ElementType, Expr *SizeExpr, Expr *ScalableExpr); }; enum class VectorKind { diff --git a/clang/include/clang/AST/TypeProperties.td b/clang/include/clang/AST/TypeProperties.td index 8c1e9f209e5ad..0515d9967fe9e 100644 --- a/clang/include/clang/AST/TypeProperties.td +++ b/clang/include/clang/AST/TypeProperties.td @@ -226,12 +226,15 @@ let Class = DependentSizedExtVectorType in { def : Property<"size", ExprRef> { let Read = [{ node->getSizeExpr() }]; } + def : Property<"scalable", ExprRef> { + let Read = [{ node->getScalableExpr() }]; + } def : Property<"attributeLoc", SourceLocation> { let Read = [{ node->getAttributeLoc() }]; } def : Creator<[{ - return ctx.getDependentSizedExtVectorType(elementType, size, attributeLoc); + return ctx.getDependentSizedExtVectorType(elementType, size, scalable, attributeLoc); }]>; } diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index ea3f9df6d8342..851ffd2779ec6 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1879,7 +1879,7 @@ def EnableIf : InheritableAttr { def ExtVectorType : TypeAttr { let Spellings = [Clang<"ext_vector_type">]; - let Args = [ExprArgument<"NumElements">]; + let Args = [ExprArgument<"NumElements">, ExprArgument<"Scalable", 1>]; let Documentation = [ExtVectorTypeDocs]; } diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index cad45501df6d2..0bc595526ebe8 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -1188,10 +1188,11 @@ template instantiation, so the value for ``T::number`` is known. def ExtVectorTypeDocs : Documentation { let Category = DocCatFunction; let Content = [{ -The ``ext_vector_type(N)`` attribute specifies that a type is a vector with N -elements, directly mapping to an LLVM vector type. Originally from OpenCL, it +The ``ext_vector_type(N[,S])`` attribute specifies that a type is a vector with +N elements, directly mapping to an LLVM vector type. Originally from OpenCL, it allows element access the array subscript operator ``[]``, ``sN`` where N is -a hexadecimal value, or ``x, y, z, w`` for graphics-style indexing. +a hexadecimal value, or ``x, y, z, w`` for graphics-style indexing. If S is +non-zero, the vector size is scalable. S is taken to be zero if omitted. This attribute enables efficient SIMD operations and is usable in general-purpose code. diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 0ba3daab764b7..294c068d15dcf 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -15160,7 +15160,7 @@ class Sema final : public SemaBase { /// Build an ext-vector type. /// /// Run the required checks for the extended vector type. - QualType BuildExtVectorType(QualType T, Expr *ArraySize, + QualType BuildExtVectorType(QualType T, Expr *ArraySize, Expr *Scalable, SourceLocation AttrLoc); QualType BuildMatrixType(QualType T, Expr *NumRows, Expr *NumColumns, SourceLocation AttrLoc); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 3f63420cae91e..60063596edcf0 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -4666,12 +4666,12 @@ QualType ASTContext::getExtVectorType(QualType vecType, } QualType -ASTContext::getDependentSizedExtVectorType(QualType vecType, - Expr *SizeExpr, +ASTContext::getDependentSizedExtVectorType(QualType vecType, Expr *SizeExpr, + Expr *ScalableExpr, SourceLocation AttrLoc) const { llvm::FoldingSetNodeID ID; DependentSizedExtVectorType::Profile(ID, *this, getCanonicalType(vecType), - SizeExpr); + SizeExpr, ScalableExpr); void *InsertPos = nullptr; DependentSizedExtVectorType *Canon @@ -4682,12 +4682,13 @@ ASTContext::getDependentSizedExtVectorType(QualType vecType, // the canonical type for a newly-built type. New = new (*this, alignof(DependentSizedExtVectorType)) DependentSizedExtVectorType(vecType, QualType(Canon, 0), SizeExpr, - AttrLoc); + ScalableExpr, AttrLoc); } else { QualType CanonVecTy = getCanonicalType(vecType); if (CanonVecTy == vecType) { New = new (*this, alignof(DependentSizedExtVectorType)) - DependentSizedExtVectorType(vecType, QualType(), SizeExpr, AttrLoc); + DependentSizedExtVectorType(vecType, QualType(), SizeExpr, + ScalableExpr, AttrLoc); DependentSizedExtVectorType *CanonCheck = DependentSizedExtVectorTypes.FindNodeOrInsertPos(ID, InsertPos); @@ -4695,10 +4696,11 @@ ASTContext::getDependentSizedExtVectorType(QualType vecType, (void)CanonCheck; DependentSizedExtVectorTypes.InsertNode(New, InsertPos); } else { - QualType CanonExtTy = getDependentSizedExtVectorType(CanonVecTy, SizeExpr, - SourceLocation()); + QualType CanonExtTy = getDependentSizedExtVectorType( + CanonVecTy, SizeExpr, ScalableExpr, SourceLocation()); New = new (*this, alignof(DependentSizedExtVectorType)) - DependentSizedExtVectorType(vecType, CanonExtTy, SizeExpr, AttrLoc); + DependentSizedExtVectorType(vecType, CanonExtTy, SizeExpr, + ScalableExpr, AttrLoc); } } @@ -13932,6 +13934,12 @@ static auto *getCommonSizeExpr(const ASTContext &Ctx, T *X, T *Y) { return X->getSizeExpr(); } +template <class T> +static auto *getCommonScalableExpr(const ASTContext &Ctx, T *X, T *Y) { + assert(Ctx.hasSameExpr(X->getScalableExpr(), Y->getScalableExpr())); + return X->getScalableExpr(); +} + static auto getCommonSizeModifier(const ArrayType *X, const ArrayType *Y) { assert(X->getSizeModifier() == Y->getSizeModifier()); return X->getSizeModifier(); @@ -14262,9 +14270,9 @@ static QualType getCommonNonSugarTypeNode(const ASTContext &Ctx, const Type *X, case Type::DependentSizedExtVector: { const auto *VX = cast<DependentSizedExtVectorType>(X), *VY = cast<DependentSizedExtVectorType>(Y); - return Ctx.getDependentSizedExtVectorType(getCommonElementType(Ctx, VX, VY), - getCommonSizeExpr(Ctx, VX, VY), - getCommonAttrLoc(VX, VY)); + return Ctx.getDependentSizedExtVectorType( + getCommonElementType(Ctx, VX, VY), getCommonSizeExpr(Ctx, VX, VY), + getCommonScalableExpr(Ctx, VX, VY), getCommonAttrLoc(VX, VY)); } case Type::DependentVector: { const auto *VX = cast<DependentVectorType>(X), diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 101ab2c40973b..8839f53e7bdfb 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -1535,11 +1535,12 @@ ExpectedType ASTNodeImporter::VisitDependentSizedExtVectorType( Error Err = Error::success(); QualType ToElementType = importChecked(Err, T->getElementType()); Expr *ToSizeExpr = importChecked(Err, T->getSizeExpr()); + Expr *ToScalableExpr = importChecked(Err, T->getScalableExpr()); SourceLocation ToAttrLoc = importChecked(Err, T->getAttributeLoc()); if (Err) return std::move(Err); return Importer.getToContext().getDependentSizedExtVectorType( - ToElementType, ToSizeExpr, ToAttrLoc); + ToElementType, ToSizeExpr, ToScalableExpr, ToAttrLoc); } ExpectedType ASTNodeImporter::VisitVectorType(const VectorType *T) { diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index dcdbb62f9d62b..133bd6695573e 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -323,18 +323,20 @@ void DependentVectorType::Profile(llvm::FoldingSetNodeID &ID, DependentSizedExtVectorType::DependentSizedExtVectorType(QualType ElementType, QualType can, Expr *SizeExpr, + Expr *ScalableExpr, SourceLocation loc) : Type(DependentSizedExtVector, can, TypeDependence::DependentInstantiation | ElementType->getDependence() | (SizeExpr ? toTypeDependence(SizeExpr->getDependence()) : TypeDependence::None)), - SizeExpr(SizeExpr), ElementType(ElementType), loc(loc) {} + SizeExpr(SizeExpr), ScalableExpr(ScalableExpr), ElementType(ElementType), + loc(loc) {} void DependentSizedExtVectorType::Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context, - QualType ElementType, - Expr *SizeExpr) { + QualType ElementType, Expr *SizeExpr, + Expr *ScalableExpr) { ID.AddPointer(ElementType.getAsOpaquePtr()); SizeExpr->Profile(ID, Context, true); } diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp index f7862b3a3f594..6090a27a4e3bd 100644 --- a/clang/lib/Sema/HLSLExternalSemaSource.cpp +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -104,7 +104,7 @@ void HLSLExternalSemaSource::defineHLSLVectorAlias() { AST, NestedNameSpecifierLoc(), SourceLocation(), SizeParam, false, DeclarationNameInfo(SizeParam->getDeclName(), SourceLocation()), AST.IntTy, VK_LValue), - SourceLocation()); + nullptr, SourceLocation()); auto *Record = TypeAliasDecl::Create(AST, HLSLNamespace, SourceLocation(), SourceLocation(), &II, diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 4248c4e6c945d..dce99d3a36c96 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -2402,7 +2402,7 @@ QualType Sema::BuildVectorType(QualType CurType, Expr *SizeExpr, } QualType Sema::BuildExtVectorType(QualType T, Expr *SizeExpr, - SourceLocation AttrLoc) { + Expr *ScalableExpr, SourceLocation AttrLoc) { // Unlike gcc's vector_size attribute, we do not allow vectors to be defined // in conjunction with complex types (pointers, arrays, functions, etc.). // @@ -2435,16 +2435,6 @@ QualType Sema::BuildExtVectorType(QualType T, Expr *SizeExpr, } if (VecSize->isNegative()) { - if (Context.getTargetInfo().hasFeature("sve")) { - // The length of an SVE vector type is only known at runtime, but it is - // always a multiple of 128bits. - unsigned NumEls = 128U / Context.getTypeSize(T); - unsigned NF = static_cast<unsigned>(-1L * VecSize->getZExtValue()); - QualType Result = Context.getScalableVectorType(T, NumEls * NF); - if (!Result.isNull()) - return Result; - } - Diag(SizeExpr->getExprLoc(), diag::err_attribute_vec_negative_size); return QualType(); } @@ -2464,10 +2454,29 @@ QualType Sema::BuildExtVectorType(QualType T, Expr *SizeExpr, return QualType(); } + if (ScalableExpr) { + std::optional<llvm::APSInt> VecScalable = + ScalableExpr->getIntegerConstantExpr(Context); + if (VecScalable && static_cast<bool>(VecScalable->getZExtValue())) { + if (Context.getTargetInfo().hasFeature("sve")) { + // The length of an SVE vector type is only known at runtime, but it + // is always a multiple of 128bits. + unsigned NumEls = 128U / Context.getTypeSize(T); + return Context.getScalableVectorType(T, NumEls * VectorSize); + } else { + Diag(AttrLoc, diag::err_attribute_argument_type) + << "ext_vector_type" << AANT_ArgumentIntegerConstant + << ScalableExpr->getSourceRange(); + return QualType(); + } + } + } + return Context.getExtVectorType(T, VectorSize); } - return Context.getDependentSizedExtVectorType(T, SizeExpr, AttrLoc); + return Context.getDependentSizedExtVectorType(T, SizeExpr, ScalableExpr, + AttrLoc); } QualType Sema::BuildMatrixType(QualType ElementTy, Expr *NumRows, Expr *NumCols, @@ -8331,14 +8340,16 @@ static void HandleVectorSizeAttr(QualType &CurType, const ParsedAttr &Attr, static void HandleExtVectorTypeAttr(QualType &CurType, const ParsedAttr &Attr, Sema &S) { // check the attribute arguments. - if (Attr.getNumArgs() != 1) { + if ((Attr.getNumArgs() < 1) || (Attr.getNumArgs() > 2)) { S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) << Attr << 1; return; } Expr *SizeExpr = Attr.getArgAsExpr(0); - QualType T = S.BuildExtVectorType(CurType, SizeExpr, Attr.getLoc()); + Expr *ScalableExpr = (Attr.getNumArgs() > 1) ? Attr.getArgAsExpr(1) : nullptr; + QualType T = + S.BuildExtVectorType(CurType, SizeExpr, ScalableExpr, Attr.getLoc()); if (!T.isNull()) CurType = T; } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 1329f8d9967e9..a70a703a641d6 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1026,7 +1026,7 @@ class TreeTransform { /// By default, performs semantic analysis when building the vector type. /// Subclasses may override this routine to provide different behavior. QualType RebuildExtVectorType(QualType ElementType, unsigned NumElements, - SourceLocation AttributeLoc); + bool Scalable, SourceLocation AttributeLoc); /// Build a new potentially dependently-sized extended vector type /// given the element type and number of elements. @@ -1035,6 +1035,7 @@ class TreeTransform { /// Subclasses may override this routine to provide different behavior. QualType RebuildDependentSizedExtVectorType(QualType ElementType, Expr *SizeExpr, + Expr *ScalableExpr, SourceLocation AttributeLoc); /// Build a new matrix type given the element type and dimensions. @@ -6119,13 +6120,21 @@ QualType TreeTransform<Derived>::TransformDependentSizedExtVectorType( if (Size.isInvalid()) return QualType(); + ExprResult Scalable; + if (T->getScalableExpr()) { + Scalable = getDerived().TransformExpr(T->getScalableExpr()); + Scalable = SemaRef.ActOnConstantExpression(Scalable); + if (Scalable.isInvalid()) + return QualType(); + } + QualType Result = TL.getType(); if (getDerived().AlwaysRebuild() || ElementType != T->getElementType() || Size.get() != T->getSizeExpr()) { - Result = getDerived().RebuildDependentSizedExtVectorType(ElementType, - Size.get(), - T->getAttributeLoc()); + Result = getDerived().RebuildDependentSizedExtVectorType( + ElementType, Size.get(), + T->getScalableExpr() ? Scalable.get() : nullptr, T->getAttributeLoc()); if (Result.isNull()) return QualType(); } @@ -6302,8 +6311,8 @@ QualType TreeTransform<Derived>::TransformExtVectorType(TypeLocBuilder &TLB, QualType Result = TL.getType(); if (getDerived().AlwaysRebuild() || ElementType != T->getElementType()) { - Result = getDerived().RebuildExtVectorType(ElementType, - T->getNumElements(), + Result = getDerived().RebuildExtVectorType(ElementType, T->getNumElements(), + false, /*FIXME*/ SourceLocation()); if (Result.isNull()) return QualType(); @@ -17536,24 +17545,29 @@ QualType TreeTransform<Derived>::RebuildDependentVectorType( return SemaRef.BuildVectorType(ElementType, SizeExpr, AttributeLoc); } -template<typename Derived> -QualType TreeTransform<Derived>::RebuildExtVectorType(QualType ElementType, - unsigned NumElements, - SourceLocation AttributeLoc) { +template <typename Derived> +QualType TreeTransform<Derived>::RebuildExtVectorType( + QualType ElementType, unsigned NumElements, bool Scalable, + SourceLocation AttributeLoc) { llvm::APInt numElements(SemaRef.Context.getIntWidth(SemaRef.Context.IntTy), NumElements, true); + llvm::APInt isScalable(SemaRef.Context.getIntWidth(SemaRef.Context.IntTy), + Scalable); IntegerLiteral *VectorSize = IntegerLiteral::Create(SemaRef.Context, numElements, SemaRef.Context.IntTy, AttributeLoc); - return SemaRef.BuildExtVectorType(ElementType, VectorSize, AttributeLoc); + IntegerLiteral *IsScalable = IntegerLiteral::Create( + SemaRef.Context, isScalable, SemaRef.Context.IntTy, AttributeLoc); + return SemaRef.BuildExtVectorType(ElementType, VectorSize, IsScalable, + AttributeLoc); } -template<typename Derived> -QualType -TreeTransform<Derived>::RebuildDependentSizedExtVectorType(QualType ElementType, - Expr *SizeExpr, - SourceLocation AttributeLoc) { - return SemaRef.BuildExtVectorType(ElementType, SizeExpr, AttributeLoc); +template <typename Derived> +QualType TreeTransform<Derived>::RebuildDependentSizedExtVectorType( + QualType ElementType, Expr *SizeExpr, Expr *ScalableExpr, + SourceLocation AttributeLoc) { + return SemaRef.BuildExtVectorType(ElementType, SizeExpr, ScalableExpr, + AttributeLoc); } template <typename Derived> diff --git a/clang/test/CodeGen/64bit-swiftcall.c b/clang/test/CodeGen/64bit-swiftcall.c index cc60ac0f6844c..4eed2ffbdfeea 100644 --- a/clang/test/CodeGen/64bit-swiftcall.c +++ b/clang/test/CodeGen/64bit-swiftcall.c @@ -1063,7 +1063,7 @@ TEST(vector_union) #if defined(__ARM_FEATURE_SVE) -#define SCALABLE_SIZE(N) (-1 * ((signed)(N))) +#define SCALABLE_SIZE(N) (N), 1 typedef float svfloat1 __attribute__((ext_vector_type(SCALABLE_SIZE(1)))); typedef float svfloat4 __attribute__((ext_vector_type(SCALABLE_SIZE(4)))); diff --git a/clang/test/CodeGen/arm64-abi-sve.c b/clang/test/CodeGen/arm64-abi-sve.c index 9570867d76e6e..23e10f30265a0 100644 --- a/clang/test/CodeGen/arm64-abi-sve.c +++ b/clang/test/CodeGen/arm64-abi-sve.c @@ -3,7 +3,7 @@ #include <stdarg.h> -#define SCALABLE_SIZE(N) (-1 * ((signed)(N))) +#define SCALABLE_SIZE(N) (N), 1 typedef __attribute__(( ext_vector_type(SCALABLE_SIZE(1)) )) char __char1s; typedef __attribute__(( ext_vector_type(SCALABLE_SIZE(2)) )) char __char2s; diff --git a/clang/test/CodeGen/builtin_vectorelements.c b/clang/test/CodeGen/builtin_vectorelements.c index 7047bd930419f..dba5a3fe60fbc 100644 --- a/clang/test/CodeGen/builtin_vectorelements.c +++ b/clang/test/CodeGen/builtin_vectorelements.c @@ -14,7 +14,7 @@ typedef int int16 __attribute__((vector_size(64))); typedef float float2 __attribute__((vector_size(8))); typedef long extLong4 __attribute__((ext_vector_type(4))); #if defined(__ARM_FEATURE_SVE) -#define SCALABLE_SIZE(N) (-1 * ((signed)(N))) +#define SCALABLE_SIZE(N) (N), 1 typedef long extLong1s __attribute__((ext_vector_type(SCALABLE_SIZE(1)))); #endif diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h index 3eb8779fc5dfb..c890ebbdb611a 100644 --- a/libc/src/__support/CPP/simd.h +++ b/libc/src/__support/CPP/simd.h @@ -32,35 +32,41 @@ namespace LIBC_NAMESPACE_DECL { namespace cpp { -template <size_t N> -constexpr signed scalable_size = -1 * static_cast<signed>(N); - namespace internal { #if defined(LIBC_TARGET_CPU_HAS_AVX512F) template <typename T> LIBC_INLINE_VAR constexpr size_t native_vector_size = 64 / sizeof(T); +template <typename T> +LIBC_INLINE_VAR constexpr bool native_vector_scalable = false; #elif defined(LIBC_TARGET_CPU_HAS_AVX2) template <typename T> LIBC_INLINE_VAR constexpr size_t native_vector_size = 32 / sizeof(T); +template <typename T> +LIBC_INLINE_VAR constexpr bool native_vector_scalable = false; #elif defined(LIBC_TARGET_CPU_HAS_SSE2) || defined(LIBC_TARGET_CPU_HAS_ARM_NEON) template <typename T> LIBC_INLINE_VAR constexpr size_t native_vector_size = 16 / sizeof(T); +template <typename T> +LIBC_INLINE_VAR constexpr bool native_vector_scalable = false; #else template <typename T> LIBC_INLINE constexpr size_t native_vector_size = 1; +template <typename T> LIBC_INLINE constexpr bool native_vector_scalable = false; #endif } // namespace internal // Type aliases. template <typename T, size_t N> -using fixed_size_simd = T [[clang::ext_vector_type(N)]]; +using fixed_size_simd = T [[clang::ext_vector_type(N, false)]]; template <typename T, size_t N> -using scalable_size_simd = T [[clang::ext_vector_type(scalable_size<N>)]]; -template <typename T, auto N = internal::native_vector_size<T>> -using simd = T [[clang::ext_vector_type(N)]]; -template <typename T> -using simd_mask = simd<bool, internal::native_vector_size<T>>; +using scalable_size_simd = T [[clang::ext_vector_type(N, true)]]; +template <typename T, size_t N = internal::native_vector_size<T>, + bool S = internal::native_vector_scalable<T>> +using simd = T [[clang::ext_vector_type(N, S)]]; +template <typename T, size_t N = internal::native_vector_size<T>, + bool S = internal::native_vector_scalable<T>> +using simd_mask = simd<bool, N, S>; // Type trait helpers. template <typename T> @@ -69,21 +75,18 @@ struct simd_size : cpp::integral_constant<size_t, __builtin_vectorelements(T)> { template <class T> constexpr size_t simd_size_v = simd_size<T>::value; template <typename T> struct is_simd : cpp::integral_constant<bool, false> {}; -template <typename T, auto N> +template <typename T, unsigned N> struct is_simd<simd<T, N>> : cpp::integral_constant<bool, true> {}; template <class T> constexpr bool is_simd_v = is_simd<T>::value; -template <auto N> -constexpr bool is_scalable_size_v = static_cast<signed>(N) < 0; - template <typename T> struct is_simd_mask : cpp::integral_constant<bool, false> {}; -template <auto N> +template <unsigned N> struct is_simd_mask<simd<bool, N>> : cpp::integral_constant<bool, true> {}; template <class T> constexpr bool is_simd_mask_v = is_simd_mask<T>::value; template <typename T> struct simd_element_type; -template <typename T, auto N> struct simd_element_type<simd<T, N>> { +template <typename T, size_t N> struct simd_element_type<simd<T, N>> { using type = T; }; template <typename T> @@ -161,13 +164,6 @@ using enable_if_integral_t = cpp::enable_if_t<cpp::is_integral_v<T>, T>; template <typename T> using enable_if_simd_t = cpp::enable_if_t<is_simd_v<T>, bool>; -template <auto N> -using enable_if_scalable_size_t = cpp::enable_if_t<is_scalable_size_v<N>, bool>; - -template <auto N> -using enable_if_not_scalable_size_t = - cpp::enable_if_t<!is_scalable_size_v<N>, bool>; - } // namespace internal // Casting. @@ -371,33 +367,36 @@ LIBC_INLINE constexpr static void compress(simd<bool, simd_size_v<T>> mask, T v, } // Construction helpers. -template <typename T, auto N = internal::native_vector_size<T>, - internal::enable_if_not_scalable_size_t<N> = 0> -LIBC_INLINE constexpr static simd<T, N> splat(T v) { - return simd<T, N>(v); -} -template <typename T, auto N = internal::native_vector_size<T>, - internal::enable_if_scalable_size_t<N> = 0> -LIBC_INLINE constexpr static simd<T, N> splat(T v) { - simd<T, N> sv; - size_t n = __builtin_vectorelements(simd<T, N>); +template <typename T, size_t N = internal::native_vector_size<T>, + bool S = internal::native_vector_scalable<T>, + cpp::enable_if_t<!S, bool> = 0> +LIBC_INLINE constexpr static simd<T, N, S> splat(T v) { + return simd<T, N, S>(v); +} +template <typename T, size_t N = internal::native_vector_size<T>, + bool S = internal::native_vector_scalable<T>, + cpp::enable_if_t<S, bool> = 0> +LIBC_INLINE constexpr static simd<T, N, S> splat(T v) { + simd<T, N, S> sv; + size_t n = __builtin_vectorelements(simd<T, N, S>); for (unsigned i = 0U; i < n; ++i) sv[i] = v; return sv; } -template <typename T, auto N = internal::native_vector_size<T>> -LIBC_INLINE constexpr static simd<T, N> iota(T base = T(0), T step = T(1)) { - simd<T, N> v{}; - size_t n = __builtin_vectorelements(simd<T, N>); - for (unsigned i = 0U; i < n; ++i) +template <typename T, size_t N = internal::native_vector_size<T>, + bool S = internal::native_vector_scalable<T>> +LIBC_INLINE constexpr static simd<T, N, S> iota(T base = T(0), T step = T(1)) { + simd<T, N, S> v{}; + size_t n = __builtin_vectorelements(simd<T, N, S>); + for (unsigned i = 0; i < n; ++i) v[i] = base + T(i) * step; return v; } // Conditional helpers. -template <typename T, auto N> -LIBC_INLINE constexpr static simd<T, N> select(simd<bool, N> m, simd<T, N> x, - simd<T, N> y) { +template <typename T, size_t N, bool S> +LIBC_INLINE constexpr static simd<T, N, S> +select(simd<bool, N, S> m, simd<T, N, S> x, simd<T, N, S> y) { return m ? x : y; } diff --git a/libc/test/src/__support/CPP/simd_test.cpp b/libc/test/src/__support/CPP/simd_test.cpp index ba052cbef682d..b119e239f53eb 100644 --- a/libc/test/src/__support/CPP/simd_test.cpp +++ b/libc/test/src/__support/CPP/simd_test.cpp @@ -152,8 +152,8 @@ TEST(LlvmLibcSIMDTest, MaskedCompressExpand) { #if defined(LIBC_TARGET_CPU_HAS_SVE) || defined(LIBC_TARGET_CPU_HAS_SVE2) TEST(LlvmLibcSIMDTest, SizelessVectorCreation) { - cpp::simd<int, cpp::scalable_size<1>> svsplat = cpp::splat(5); - cpp::simd<int, cpp::scalable_size<1>> sviota = cpp::iota(0); + cpp::simd<int, 1U, true> svsplat = cpp::splat<int, 1U, true>(5); + cpp::simd<int, 1U, true> sviota = cpp::iota<int, 1U, true>(0); EXPECT_EQ(svsplat[0], 5); EXPECT_EQ(svsplat[1], 5); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
