tkrupa created this revision. tkrupa added a reviewer: itaraban. Herald added a subscriber: cfe-commits.
All *_sqrt_round_s[s|d] intrinsics should execute a square root on zeroth element from B (Ops[1]) and insert in to A (Ops[0]), not the other way around. Repository: rC Clang https://reviews.llvm.org/D48288 Files: lib/CodeGen/CGBuiltin.cpp Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -9907,15 +9907,15 @@ Intrinsic::x86_avx512_mask_sqrt_ss; return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); } - Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0); + Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType()); Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0); int MaskSize = Ops[3]->getType()->getScalarSizeInBits(); llvm::Type *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(), MaskSize); Value *Mask = Builder.CreateBitCast(Ops[3], MaskTy); Mask = Builder.CreateExtractElement(Mask, (uint64_t)0); A = Builder.CreateSelect(Mask, Builder.CreateCall(F, {A}), Src); - return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0); + return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); } case X86::BI__builtin_ia32_sqrtpd256: case X86::BI__builtin_ia32_sqrtpd:
Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -9907,15 +9907,15 @@ Intrinsic::x86_avx512_mask_sqrt_ss; return Builder.CreateCall(CGM.getIntrinsic(IID), Ops); } - Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0); + Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0); Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType()); Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0); int MaskSize = Ops[3]->getType()->getScalarSizeInBits(); llvm::Type *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(), MaskSize); Value *Mask = Builder.CreateBitCast(Ops[3], MaskTy); Mask = Builder.CreateExtractElement(Mask, (uint64_t)0); A = Builder.CreateSelect(Mask, Builder.CreateCall(F, {A}), Src); - return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0); + return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0); } case X86::BI__builtin_ia32_sqrtpd256: case X86::BI__builtin_ia32_sqrtpd:
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits