pengfei updated this revision to Diff 374704.
pengfei added a comment.
Move mask to the second to last operand.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D110336/new/
https://reviews.llvm.org/D110336
Files:
clang/include/clang/Basic/BuiltinsX86.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Headers/avx512fp16intrin.h
clang/lib/Sema/SemaChecking.cpp
clang/test/CodeGen/X86/avx512fp16-builtins.c
llvm/include/llvm/IR/IntrinsicsX86.td
Index: llvm/include/llvm/IR/IntrinsicsX86.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsX86.td
+++ llvm/include/llvm/IR/IntrinsicsX86.td
@@ -5754,7 +5754,7 @@
[ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vfcmadd_cph_512
- : GCCBuiltin<"__builtin_ia32_vfcmaddcph512_mask">,
+ : GCCBuiltin<"__builtin_ia32_vfcmaddcph512_mask3">,
Intrinsic<[ llvm_v16f32_ty ],
[ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
llvm_i32_ty ],
@@ -5786,7 +5786,7 @@
[ llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty ],
[ IntrNoMem ]>;
def int_x86_avx512fp16_mask_vfmadd_cph_512
- : GCCBuiltin<"__builtin_ia32_vfmaddcph512_mask">,
+ : GCCBuiltin<"__builtin_ia32_vfmaddcph512_mask3">,
Intrinsic<[ llvm_v16f32_ty ],
[ llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
llvm_i32_ty ],
Index: clang/test/CodeGen/X86/avx512fp16-builtins.c
===================================================================
--- clang/test/CodeGen/X86/avx512fp16-builtins.c
+++ clang/test/CodeGen/X86/avx512fp16-builtins.c
@@ -4086,10 +4086,8 @@
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
- // CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.csh(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 4)
- // CHECK: %{{.*}} = extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: %{{.*}} = insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+ // CHECK: %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
// CHECK: %{{.*}} = bitcast <4 x float> %{{.*}} to <8 x half>
return _mm_mask3_fcmadd_sch(__A, __B, __C, __U);
}
@@ -4119,10 +4117,8 @@
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
- // CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
// CHECK: %{{.*}} = call <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.csh(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 11)
- // CHECK: %{{.*}} = extractelement <4 x float> %{{.*}}, i32 0
- // CHECK: %{{.*}} = insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0
+ // CHECK: %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
// CHECK: %{{.*}} = bitcast <4 x float> %{{.*}} to <8 x half>
return _mm_mask3_fcmadd_round_sch(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
}
@@ -4147,6 +4143,17 @@
return _mm_maskz_fmadd_sch(__U, __A, __B, __C);
}
+__m128h test_mm_mask3_fmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
+ // CHECK-LABEL: @test_mm_mask3_fmadd_sch
+ // CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
+ // CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
+ // CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
+ // CHECK: %{{.*}} = call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.csh(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 4)
+ // CHECK: %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+ // CHECK: %{{.*}} = bitcast <4 x float> %{{.*}} to <8 x half>
+ return _mm_mask3_fmadd_sch(__A, __B, __C, __U);
+}
+
__m128h test_mm_fmadd_round_sch(__m128h __A, __m128h __B, __m128h __C) {
// CHECK-LABEL: @test_mm_fmadd_round_sch
// CHECK: @llvm.x86.avx512fp16.mask.vfmadd.csh
@@ -4167,6 +4174,17 @@
return _mm_maskz_fmadd_round_sch(__U, __A, __B, __C, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
}
+__m128h test_mm_mask3_fmadd_round_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
+ // CHECK-LABEL: @test_mm_mask3_fmadd_round_sch
+ // CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
+ // CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
+ // CHECK: %{{.*}} = bitcast <8 x half> %{{.*}} to <4 x float>
+ // CHECK: %{{.*}} = call <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.csh(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 %{{.*}}, i32 11)
+ // CHECK: %{{.*}} = shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
+ // CHECK: %{{.*}} = bitcast <4 x float> %{{.*}} to <8 x half>
+ return _mm_mask3_fmadd_round_sch(__A, __B, __C, __U, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
+}
+
__m128h test_mm_fcmul_sch(__m128h __A, __m128h __B) {
// CHECK-LABEL: @test_mm_fcmul_sch
// CHECK: @llvm.x86.avx512fp16.mask.vfcmul.csh
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -4112,11 +4112,17 @@
case X86::BI__builtin_ia32_vfmaddsubph512_mask3:
case X86::BI__builtin_ia32_vfmsubaddph512_mask3:
case X86::BI__builtin_ia32_vfmaddcsh_mask:
+ case X86::BI__builtin_ia32_vfmaddcsh_round_mask:
+ case X86::BI__builtin_ia32_vfmaddcsh_round_mask3:
case X86::BI__builtin_ia32_vfmaddcph512_mask:
case X86::BI__builtin_ia32_vfmaddcph512_maskz:
+ case X86::BI__builtin_ia32_vfmaddcph512_mask3:
case X86::BI__builtin_ia32_vfcmaddcsh_mask:
+ case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
+ case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
case X86::BI__builtin_ia32_vfcmaddcph512_mask:
case X86::BI__builtin_ia32_vfcmaddcph512_maskz:
+ case X86::BI__builtin_ia32_vfcmaddcph512_mask3:
case X86::BI__builtin_ia32_vfmulcsh_mask:
case X86::BI__builtin_ia32_vfmulcph512_mask:
case X86::BI__builtin_ia32_vfcmulcsh_mask:
Index: clang/lib/Headers/avx512fp16intrin.h
===================================================================
--- clang/lib/Headers/avx512fp16intrin.h
+++ clang/lib/Headers/avx512fp16intrin.h
@@ -2941,11 +2941,8 @@
static __inline__ __m128h __DEFAULT_FN_ATTRS128
_mm_mask_fcmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
- return (__m128h)__builtin_ia32_selectps_128(
- __U,
- __builtin_ia32_vfcmaddcsh_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C,
- (__mmask8)__U, _MM_FROUND_CUR_DIRECTION),
- (__v4sf)__A);
+ return (__m128h)__builtin_ia32_vfcmaddcsh_round_mask(
+ (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m128h __DEFAULT_FN_ATTRS128
@@ -2957,10 +2954,8 @@
static __inline__ __m128h __DEFAULT_FN_ATTRS128
_mm_mask3_fcmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
- return (__m128h)_mm_move_ss((__m128)__C,
- (__m128)__builtin_ia32_vfcmaddcsh_mask(
- (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U,
- _MM_FROUND_CUR_DIRECTION));
+ return (__m128h)__builtin_ia32_vfcmaddcsh_round_mask3(
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION);
}
#define _mm_fcmadd_round_sch(A, B, C, R) \
@@ -2969,12 +2964,9 @@
(__mmask8)-1, (int)(R)))
#define _mm_mask_fcmadd_round_sch(A, U, B, C, R) \
- ((__m128h)__builtin_ia32_selectps_128( \
- (__mmask8)(U & 1), \
- __builtin_ia32_vfcmaddcsh_mask( \
- (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
- (__mmask8)(U), (int)(R)), \
- (__v4sf)(__m128h)(A)))
+ ((__m128h)__builtin_ia32_vfcmaddcsh_round_mask( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
+ (__mmask8)(U), (int)(R)))
#define _mm_maskz_fcmadd_round_sch(U, A, B, C, R) \
((__m128h)__builtin_ia32_vfcmaddcsh_maskz( \
@@ -2982,9 +2974,9 @@
(__mmask8)(U), (int)(R)))
#define _mm_mask3_fcmadd_round_sch(A, B, C, U, R) \
- ((__m128h)_mm_move_ss((__m128)(C), \
- (__m128)__builtin_ia32_vfcmaddcsh_mask( \
- (__v4sf)(A), (__v4sf)(B), (__v4sf)(C), (U), (R))))
+ ((__m128h)__builtin_ia32_vfcmaddcsh_round_mask3( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
+ (__mmask8)(U), (int)(R)))
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fmadd_sch(__m128h __A,
__m128h __B,
@@ -2996,11 +2988,8 @@
static __inline__ __m128h __DEFAULT_FN_ATTRS128
_mm_mask_fmadd_sch(__m128h __A, __mmask8 __U, __m128h __B, __m128h __C) {
- return (__m128h)__builtin_ia32_selectps_128(
- __U,
- __builtin_ia32_vfmaddcsh_mask((__v4sf)__A, (__v4sf)__B, (__v4sf)__C,
- (__mmask8)__U, _MM_FROUND_CUR_DIRECTION),
- (__v4sf)__A);
+ return (__m128h)__builtin_ia32_vfmaddcsh_round_mask(
+ (__v4sf)__A, (__v4sf)(__B), (__v4sf)(__C), __U, _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m128h __DEFAULT_FN_ATTRS128
@@ -3010,24 +2999,32 @@
_MM_FROUND_CUR_DIRECTION);
}
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask3_fmadd_sch(__m128h __A, __m128h __B, __m128h __C, __mmask8 __U) {
+ return (__m128h)__builtin_ia32_vfmaddcsh_round_mask3(
+ (__v4sf)__A, (__v4sf)__B, (__v4sf)__C, __U, _MM_FROUND_CUR_DIRECTION);
+}
+
#define _mm_fmadd_round_sch(A, B, C, R) \
((__m128h)__builtin_ia32_vfmaddcsh_mask( \
(__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
(__mmask8)-1, (int)(R)))
#define _mm_mask_fmadd_round_sch(A, U, B, C, R) \
- ((__m128h)__builtin_ia32_selectps_128( \
- (__mmask8)(U & 1), \
- __builtin_ia32_vfmaddcsh_mask( \
- (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
- (__mmask8)(U), (int)(R)), \
- (__v4sf)(__m128h)(A)))
+ ((__m128h)__builtin_ia32_vfmaddcsh_round_mask( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
+ (__mmask8)(U), (int)(R)))
#define _mm_maskz_fmadd_round_sch(U, A, B, C, R) \
((__m128h)__builtin_ia32_vfmaddcsh_maskz( \
(__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
(__mmask8)(U), (int)(R)))
+#define _mm_mask3_fmadd_round_sch(A, B, C, U, R) \
+ ((__m128h)__builtin_ia32_vfmaddcsh_round_mask3( \
+ (__v4sf)(__m128h)(A), (__v4sf)(__m128h)(B), (__v4sf)(__m128h)(C), \
+ (__mmask8)(U), (int)(R)))
+
static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_fcmul_sch(__m128h __A,
__m128h __B) {
return (__m128h)__builtin_ia32_vfcmulcsh_mask(
@@ -3177,24 +3174,21 @@
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fcmadd_pch(__m512h __A,
__m512h __B,
__m512h __C) {
- return (__m512h)__builtin_ia32_vfcmaddcph512_mask((__v16sf)__A, (__v16sf)__B,
- (__v16sf)__C, (__mmask16)-1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512h)__builtin_ia32_vfcmaddcph512_mask3(
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512h __DEFAULT_FN_ATTRS512
_mm512_mask_fcmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) {
- return (__m512h)__builtin_ia32_selectps_512(
- __U,
- __builtin_ia32_vfcmaddcph512_mask((__v16sf)__A, (__v16sf)__B,
- (__v16sf)__C, (__mmask16)__U,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf)__A);
+ return (__m512h)__builtin_ia32_vfcmaddcph512_mask(
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512h __DEFAULT_FN_ATTRS512
_mm512_mask3_fcmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) {
- return (__m512h)__builtin_ia32_vfcmaddcph512_mask(
+ return (__m512h)__builtin_ia32_vfcmaddcph512_mask3(
(__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
_MM_FROUND_CUR_DIRECTION);
}
@@ -3207,20 +3201,17 @@
}
#define _mm512_fcmadd_round_pch(A, B, C, R) \
- ((__m512h)__builtin_ia32_vfcmaddcph512_mask( \
+ ((__m512h)__builtin_ia32_vfcmaddcph512_mask3( \
(__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
(__mmask16)-1, (int)(R)))
#define _mm512_mask_fcmadd_round_pch(A, U, B, C, R) \
- ((__m512h)__builtin_ia32_selectps_512( \
- (__mmask16)(U), \
- __builtin_ia32_vfcmaddcph512_mask( \
- (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
- (__mmask16)(U), (int)(R)), \
- (__v16sf)(__m512h)(A)))
+ ((__m512h)__builtin_ia32_vfcmaddcph512_mask( \
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_mask3_fcmadd_round_pch(A, B, C, U, R) \
- ((__m512h)__builtin_ia32_vfcmaddcph512_mask( \
+ ((__m512h)__builtin_ia32_vfcmaddcph512_mask3( \
(__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
(__mmask16)(U), (int)(R)))
@@ -3232,26 +3223,23 @@
static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_fmadd_pch(__m512h __A,
__m512h __B,
__m512h __C) {
- return (__m512h)__builtin_ia32_vfmaddcph512_mask((__v16sf)__A, (__v16sf)__B,
- (__v16sf)__C, (__mmask16)-1,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512h)__builtin_ia32_vfmaddcph512_mask3((__v16sf)__A, (__v16sf)__B,
+ (__v16sf)__C, (__mmask16)-1,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512h __DEFAULT_FN_ATTRS512
_mm512_mask_fmadd_pch(__m512h __A, __mmask16 __U, __m512h __B, __m512h __C) {
- return (__m512h)__builtin_ia32_selectps_512(
- __U,
- __builtin_ia32_vfmaddcph512_mask((__v16sf)__A, (__v16sf)__B, (__v16sf)__C,
- (__mmask16)__U,
- _MM_FROUND_CUR_DIRECTION),
- (__v16sf)__A);
+ return (__m512h)__builtin_ia32_vfmaddcph512_mask((__v16sf)__A, (__v16sf)__B,
+ (__v16sf)__C, (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512h __DEFAULT_FN_ATTRS512
_mm512_mask3_fmadd_pch(__m512h __A, __m512h __B, __m512h __C, __mmask16 __U) {
- return (__m512h)__builtin_ia32_vfmaddcph512_mask((__v16sf)__A, (__v16sf)__B,
- (__v16sf)__C, (__mmask16)__U,
- _MM_FROUND_CUR_DIRECTION);
+ return (__m512h)__builtin_ia32_vfmaddcph512_mask3(
+ (__v16sf)__A, (__v16sf)__B, (__v16sf)__C, (__mmask16)__U,
+ _MM_FROUND_CUR_DIRECTION);
}
static __inline__ __m512h __DEFAULT_FN_ATTRS512
@@ -3262,20 +3250,17 @@
}
#define _mm512_fmadd_round_pch(A, B, C, R) \
- ((__m512h)__builtin_ia32_vfmaddcph512_mask( \
+ ((__m512h)__builtin_ia32_vfmaddcph512_mask3( \
(__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
(__mmask16)-1, (int)(R)))
#define _mm512_mask_fmadd_round_pch(A, U, B, C, R) \
- ((__m512h)__builtin_ia32_selectps_512( \
- (__mmask16)(U), \
- __builtin_ia32_vfmaddcph512_mask( \
- (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
- (__mmask16)(U), (int)(R)), \
- (__v16sf)(__m512h)(A)))
+ ((__m512h)__builtin_ia32_vfmaddcph512_mask( \
+ (__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
+ (__mmask16)(U), (int)(R)))
#define _mm512_mask3_fmadd_round_pch(A, B, C, U, R) \
- ((__m512h)__builtin_ia32_vfmaddcph512_mask( \
+ ((__m512h)__builtin_ia32_vfmaddcph512_mask3( \
(__v16sf)(__m512h)(A), (__v16sf)(__m512h)(B), (__v16sf)(__m512h)(C), \
(__mmask16)(U), (int)(R)))
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -12508,6 +12508,7 @@
SmallVector<Value*, 4> Ops;
bool IsMaskFCmp = false;
+ bool IsConjFMA = false;
// Find out if any arguments are required to be integer constant expressions.
unsigned ICEArguments = 0;
@@ -15046,6 +15047,36 @@
Builder.SetInsertPoint(End);
return Builder.CreateExtractValue(Call, 0);
}
+ case X86::BI__builtin_ia32_vfcmaddcph512_mask:
+ IsConjFMA = true;
+ LLVM_FALLTHROUGH;
+ case X86::BI__builtin_ia32_vfmaddcph512_mask: {
+ Intrinsic::ID IID = IsConjFMA
+ ? Intrinsic::x86_avx512fp16_mask_vfcmadd_cph_512
+ : Intrinsic::x86_avx512fp16_mask_vfmadd_cph_512;
+ Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
+ return EmitX86Select(*this, Ops[3], Call, Ops[0]);
+ }
+ case X86::BI__builtin_ia32_vfcmaddcsh_round_mask:
+ IsConjFMA = true;
+ LLVM_FALLTHROUGH;
+ case X86::BI__builtin_ia32_vfmaddcsh_round_mask: {
+ Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
+ : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
+ Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
+ Value *And = Builder.CreateAnd(Ops[3], llvm::ConstantInt::get(Int8Ty, 1));
+ return EmitX86Select(*this, And, Call, Ops[0]);
+ }
+ case X86::BI__builtin_ia32_vfcmaddcsh_round_mask3:
+ IsConjFMA = true;
+ LLVM_FALLTHROUGH;
+ case X86::BI__builtin_ia32_vfmaddcsh_round_mask3: {
+ Intrinsic::ID IID = IsConjFMA ? Intrinsic::x86_avx512fp16_mask_vfcmadd_csh
+ : Intrinsic::x86_avx512fp16_mask_vfmadd_csh;
+ Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
+ SmallVector<int, 16> Consts({0, 5, 6, 7});
+ return Builder.CreateShuffleVector(Call, Ops[2], Consts);
+ }
}
}
Index: clang/include/clang/Basic/BuiltinsX86.def
===================================================================
--- clang/include/clang/Basic/BuiltinsX86.def
+++ clang/include/clang/Basic/BuiltinsX86.def
@@ -2020,16 +2020,22 @@
TARGET_BUILTIN(__builtin_ia32_vfmaddcph256_maskz, "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
TARGET_BUILTIN(__builtin_ia32_vfmaddcph512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
TARGET_BUILTIN(__builtin_ia32_vfmaddcph512_maskz, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcph512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
TARGET_BUILTIN(__builtin_ia32_vfcmaddcph128_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
TARGET_BUILTIN(__builtin_ia32_vfcmaddcph128_maskz, "V4fV4fV4fV4fUc", "ncV:128:", "avx512fp16,avx512vl")
TARGET_BUILTIN(__builtin_ia32_vfcmaddcph256_mask, "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
TARGET_BUILTIN(__builtin_ia32_vfcmaddcph256_maskz, "V8fV8fV8fV8fUc", "ncV:256:", "avx512fp16,avx512vl")
TARGET_BUILTIN(__builtin_ia32_vfcmaddcph512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
TARGET_BUILTIN(__builtin_ia32_vfcmaddcph512_maskz, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcph512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512fp16")
TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_maskz, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_maskz, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcsh_round_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcsh_round_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
TARGET_BUILTIN(__builtin_ia32_vfmulcsh_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
TARGET_BUILTIN(__builtin_ia32_vfcmulcsh_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512fp16")
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits