simon_tatham updated this revision to Diff 236564. simon_tatham added a comment.
Thanks for the helpful advice! Here's a revised version that checks `getEvaluationKind`. I've also added a test for the `vld2q(...).val[0]` construction you mentioned: you're right, of course, that that also crashed existing clang, and it still crashed with my previous patch applied. This version seems to work in both cases. > and then we can just require EmitTargetBuiltinExpr to return null or > something else that makes it clear that they've done the right thing with the > slot that was passed in The current return value of `EmitTargetBuiltinExpr` will be the `llvm::Value *` corresponding to the last of a sequence of store instructions that writes the constructed aggregate into the return value slot. There's no need to actually use that `Value` for anything in this case, but we can still test that it's non-null, to find out whether `EmitTargetBuiltinExpr` has successfully recognized a builtin and generated some code, or whether we have to fall through to the `ErrorUnsupported`. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D72271/new/ https://reviews.llvm.org/D72271 Files: clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/arm-mve-intrinsics/vld24.c Index: clang/test/CodeGen/arm-mve-intrinsics/vld24.c =================================================================== --- clang/test/CodeGen/arm-mve-intrinsics/vld24.c +++ clang/test/CodeGen/arm-mve-intrinsics/vld24.c @@ -98,3 +98,45 @@ vst2q_f16(addr, value); #endif /* POLYMORPHIC */ } + +// CHECK-LABEL: @load_into_variable( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.mve.vld2q.v8i16.p0i16(i16* [[ADDR:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T:%.*]] undef, <8 x i16> [[TMP1]], 0, 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] [[TMP2]], <8 x i16> [[TMP3]], 0, 1 +// CHECK-NEXT: store <8 x i16> [[TMP1]], <8 x i16>* [[VALUES:%.*]], align 8 +// CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[VALUES]], i32 1 +// CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK-NEXT: ret void +// +void load_into_variable(const uint16_t *addr, uint16x8_t *values) +{ + uint16x8x2_t v; +#ifdef POLYMORPHIC + v = vld2q(addr); +#else /* POLYMORPHIC */ + v = vld2q_u16(addr); +#endif /* POLYMORPHIC */ + values[0] = v.val[0]; + values[1] = v.val[1]; +} + +// CHECK-LABEL: @extract_one_vector( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0i32(i32* [[ADDR:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[STRUCT_INT32X4X2_T:%.*]] undef, <4 x i32> [[TMP1]], 0, 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue [[STRUCT_INT32X4X2_T]] [[TMP2]], <4 x i32> [[TMP3]], 0, 1 +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +int32x4_t extract_one_vector(const int32_t *addr) +{ +#ifdef POLYMORPHIC + return vld2q(addr).val[0]; +#else /* POLYMORPHIC */ + return vld2q_s32(addr).val[0]; +#endif /* POLYMORPHIC */ +} Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -4328,8 +4328,26 @@ } // See if we have a target specific builtin that needs to be lowered. - if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) - return RValue::get(V); + switch (getEvaluationKind(E->getType())) { + case TEK_Scalar: + if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) + return RValue::get(V); + break; + case TEK_Aggregate: { + if (ReturnValue.isNull()) { + Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp"); + ReturnValue = ReturnValueSlot(DestPtr, false); + } + if (EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) + return RValue::getAggregate(ReturnValue.getValue(), + ReturnValue.isVolatile()); + break; + } + case TEK_Complex: + llvm_unreachable("No currently supported builtin returns complex"); + default: + llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); + } ErrorUnsupported(E, "builtin function");
Index: clang/test/CodeGen/arm-mve-intrinsics/vld24.c =================================================================== --- clang/test/CodeGen/arm-mve-intrinsics/vld24.c +++ clang/test/CodeGen/arm-mve-intrinsics/vld24.c @@ -98,3 +98,45 @@ vst2q_f16(addr, value); #endif /* POLYMORPHIC */ } + +// CHECK-LABEL: @load_into_variable( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.mve.vld2q.v8i16.p0i16(i16* [[ADDR:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T:%.*]] undef, <8 x i16> [[TMP1]], 0, 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] [[TMP2]], <8 x i16> [[TMP3]], 0, 1 +// CHECK-NEXT: store <8 x i16> [[TMP1]], <8 x i16>* [[VALUES:%.*]], align 8 +// CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[VALUES]], i32 1 +// CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK-NEXT: ret void +// +void load_into_variable(const uint16_t *addr, uint16x8_t *values) +{ + uint16x8x2_t v; +#ifdef POLYMORPHIC + v = vld2q(addr); +#else /* POLYMORPHIC */ + v = vld2q_u16(addr); +#endif /* POLYMORPHIC */ + values[0] = v.val[0]; + values[1] = v.val[1]; +} + +// CHECK-LABEL: @extract_one_vector( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.p0i32(i32* [[ADDR:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = insertvalue [[STRUCT_INT32X4X2_T:%.*]] undef, <4 x i32> [[TMP1]], 0, 0 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = insertvalue [[STRUCT_INT32X4X2_T]] [[TMP2]], <4 x i32> [[TMP3]], 0, 1 +// CHECK-NEXT: ret <4 x i32> [[TMP1]] +// +int32x4_t extract_one_vector(const int32_t *addr) +{ +#ifdef POLYMORPHIC + return vld2q(addr).val[0]; +#else /* POLYMORPHIC */ + return vld2q_s32(addr).val[0]; +#endif /* POLYMORPHIC */ +} Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -4328,8 +4328,26 @@ } // See if we have a target specific builtin that needs to be lowered. - if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) - return RValue::get(V); + switch (getEvaluationKind(E->getType())) { + case TEK_Scalar: + if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) + return RValue::get(V); + break; + case TEK_Aggregate: { + if (ReturnValue.isNull()) { + Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp"); + ReturnValue = ReturnValueSlot(DestPtr, false); + } + if (EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) + return RValue::getAggregate(ReturnValue.getValue(), + ReturnValue.isVolatile()); + break; + } + case TEK_Complex: + llvm_unreachable("No currently supported builtin returns complex"); + default: + llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); + } ErrorUnsupported(E, "builtin function");
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits