================
@@ -1602,12 +1514,40 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned
builtinID,
case X86::BI__builtin_ia32_cmpnltsd:
case X86::BI__builtin_ia32_cmpnlesd:
case X86::BI__builtin_ia32_cmpordsd:
- case X86::BI__builtin_ia32_vcvtph2ps_mask:
- case X86::BI__builtin_ia32_vcvtph2ps256_mask:
- case X86::BI__builtin_ia32_vcvtph2ps512_mask:
- case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
- case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
- case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
+ case X86::BI__builtin_ia32_vcvtph2ps_mask: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ return emitIntrinsicCallOp(builder, loc, "x86.avx512.mask.vcvtph2ps.128",
+ convertType(expr->getType()), ops);
+ }
+ case X86::BI__builtin_ia32_vcvtph2ps256_mask: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ return emitIntrinsicCallOp(builder, loc, "x86.avx512.mask.vcvtph2ps.256",
+ convertType(expr->getType()), ops);
+ }
+ case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ return emitIntrinsicCallOp(builder, loc, "x86.avx512.mask.vcvtph2ps.512",
+ convertType(expr->getType()), ops);
+ }
+ case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ return emitIntrinsicCallOp(builder, loc,
"x86.avx512bf16.mask.cvtneps2bf16.128",
+ convertType(expr->getType()), ops);
+ }
+ case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ return emitIntrinsicCallOp(builder, loc, "x86.avx512bf16.cvtneps2bf16.256",
+ convertType(expr->getType()), ops);
+ }
+ case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
----------------
Priyanshu3820 wrote:
> heyya, for this you should implement sth akin to below to save on space and
> readability
>
> ```
> case X86::BI__builtin_ia32_reduce_fmax_pd512:
> case X86::BI__builtin_ia32_reduce_fmax_ps512:
> case X86::BI__builtin_ia32_reduce_fmax_ph512:
> case X86::BI__builtin_ia32_reduce_fmax_ph256:
> case X86::BI__builtin_ia32_reduce_fmax_ph128: {
> StringRef intrinsicName = "";
> switch (builtinID) {
> case X86::BI__builtin_ia32_reduce_fmax_pd512:
> intrinsicName = "vector.reduce.fmax.v8f64";
> break;
> case X86::BI__builtin_ia32_reduce_fmax_ps512:
> intrinsicName = "vector.reduce.fmax.v16f32";
> break;
> case X86::BI__builtin_ia32_reduce_fmax_ph512:
> intrinsicName = "vector.reduce.fmax.v32f16";
> break;
> case X86::BI__builtin_ia32_reduce_fmax_ph256:
> intrinsicName = "vector.reduce.fmax.v16f16";
> break;
> case X86::BI__builtin_ia32_reduce_fmax_ph128:
> intrinsicName = "vector.reduce.fmax.v8f16";
> break;
> }
> return emitIntrinsicCallOp(...);
> ```
I too considered this way and I take it that it's your implementation of the
reduce intrinsics right? But I think we are supposed to follow the already
established pattern aren't we? I would be more than happy to implement this way
though.
https://github.com/llvm/llvm-project/pull/171615
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits