fhahn updated this revision to Diff 345702.
fhahn added a comment.
Herald added a subscriber: hiraditya.

Thanks for taking a look! I added a phase ordering test and updated the 
pipeline tests as well.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D102478/new/

https://reviews.llvm.org/D102478

Files:
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/test/CodeGen/matrix-type-operators.c
  clang/test/CodeGenCXX/matrix-type-operators.cpp
  clang/test/CodeGenObjC/matrix-type-operators.m
  llvm/include/llvm/IR/MatrixBuilder.h
  llvm/lib/Passes/PassBuilder.cpp
  llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
  llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
  llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll

Index: llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
===================================================================
--- llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
+++ llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
@@ -26,8 +26,7 @@
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP7]], i64 0, i64 [[TMP1]]
 ; CHECK-NEXT:    [[MATRIXEXT7:%.*]] = load double, double* [[TMP9]], align 8
 ; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[MATRIXEXT7]], [[MUL]]
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP7]], i64 0, i64 [[TMP1]]
-; CHECK-NEXT:    store double [[SUB]], double* [[TMP10]], align 8
+; CHECK-NEXT:    store double [[SUB]], double* [[TMP9]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -93,43 +92,99 @@
 ; CHECK-NEXT:    [[CONV6:%.*]] = zext i32 [[I:%.*]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [225 x double]* [[B:%.*]] to <225 x double>*
 ; CHECK-NEXT:    [[CMP212_NOT:%.*]] = icmp eq i32 [[I]], 0
-; CHECK-NEXT:    br i1 [[CMP212_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]]
-; CHECK:       for.cond1.preheader.us.preheader:
-; CHECK-NEXT:    [[DOTPRE_PRE:%.*]] = load <225 x double>, <225 x double>* [[TMP1]], align 8
-; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER_US:%.*]]
+; CHECK-NEXT:    br i1 [[CMP212_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US:%.*]]
 ; CHECK:       for.cond1.preheader.us:
-; CHECK-NEXT:    [[DOTPRE:%.*]] = phi <225 x double> [ [[MATINS_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ [[DOTPRE_PRE]], [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
-; CHECK-NEXT:    [[J_014_US:%.*]] = phi i32 [ [[INC13_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
-; CHECK-NEXT:    [[CONV5_US:%.*]] = zext i32 [[J_014_US]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw nsw i64 [[CONV5_US]], 15
-; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], [[CONV6]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 225
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP4]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[I]], 225
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[CONV6]]
 ; CHECK-NEXT:    br label [[FOR_BODY4_US:%.*]]
 ; CHECK:       for.body4.us:
-; CHECK-NEXT:    [[TMP5:%.*]] = phi <225 x double> [ [[DOTPRE]], [[FOR_COND1_PREHEADER_US]] ], [ [[MATINS_US]], [[FOR_BODY4_US]] ]
 ; CHECK-NEXT:    [[K_013_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY4_US]] ]
 ; CHECK-NEXT:    [[CONV_US:%.*]] = zext i32 [[K_013_US]] to i64
-; CHECK-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP2]], [[CONV_US]]
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i64 [[TMP6]], 225
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP7]])
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP6]]
-; CHECK-NEXT:    [[MATRIXEXT_US:%.*]] = load double, double* [[TMP8]], align 8
-; CHECK-NEXT:    [[MATRIXEXT8_US:%.*]] = extractelement <225 x double> [[TMP5]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i32 [[K_013_US]], 225
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP4]])
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[CONV_US]]
+; CHECK-NEXT:    [[MATRIXEXT_US:%.*]] = load double, double* [[TMP5]], align 8
+; CHECK-NEXT:    [[MATRIXEXT8_US:%.*]] = load double, double* [[TMP3]], align 8
 ; CHECK-NEXT:    [[MUL_US:%.*]] = fmul double [[MATRIXEXT_US]], [[MATRIXEXT8_US]]
-; CHECK-NEXT:    [[MATRIXEXT11_US:%.*]] = extractelement <225 x double> [[TMP5]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[CONV_US]]
+; CHECK-NEXT:    [[MATRIXEXT11_US:%.*]] = load double, double* [[TMP6]], align 8
 ; CHECK-NEXT:    [[SUB_US:%.*]] = fsub double [[MATRIXEXT11_US]], [[MUL_US]]
-; CHECK-NEXT:    [[MATINS_US]] = insertelement <225 x double> [[TMP5]], double [[SUB_US]], i64 [[TMP6]]
-; CHECK-NEXT:    store <225 x double> [[MATINS_US]], <225 x double>* [[TMP1]], align 8
-; CHECK-NEXT:    [[INC_US]] = add nuw i32 [[K_013_US]], 1
+; CHECK-NEXT:    store double [[SUB_US]], double* [[TMP6]], align 8
+; CHECK-NEXT:    [[INC_US]] = add nuw nsw i32 [[K_013_US]], 1
 ; CHECK-NEXT:    [[CMP2_US:%.*]] = icmp ult i32 [[INC_US]], [[I]]
-; CHECK-NEXT:    br i1 [[CMP2_US]], label [[FOR_BODY4_US]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]]
+; CHECK-NEXT:    br i1 [[CMP2_US]], label [[FOR_BODY4_US]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]]
 ; CHECK:       for.cond1.for.cond.cleanup3_crit_edge.us:
-; CHECK-NEXT:    [[INC13_US]] = add nuw nsw i32 [[J_014_US]], 1
-; CHECK-NEXT:    [[CMP_US:%.*]] = icmp ult i32 [[J_014_US]], 3
-; CHECK-NEXT:    br i1 [[CMP_US]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_COND_CLEANUP]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add nuw nsw i64 [[CONV6]], 15
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ult i32 [[I]], 210
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP8]])
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP7]]
+; CHECK-NEXT:    br label [[FOR_BODY4_US_1:%.*]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
+; CHECK:       for.body4.us.1:
+; CHECK-NEXT:    [[K_013_US_1:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ [[INC_US_1:%.*]], [[FOR_BODY4_US_1]] ]
+; CHECK-NEXT:    [[NARROW:%.*]] = add nuw nsw i32 [[K_013_US_1]], 15
+; CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[NARROW]] to i64
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult i32 [[K_013_US_1]], 210
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP11]])
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP10]]
+; CHECK-NEXT:    [[MATRIXEXT_US_1:%.*]] = load double, double* [[TMP12]], align 8
+; CHECK-NEXT:    [[MATRIXEXT8_US_1:%.*]] = load double, double* [[TMP9]], align 8
+; CHECK-NEXT:    [[MUL_US_1:%.*]] = fmul double [[MATRIXEXT_US_1]], [[MATRIXEXT8_US_1]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP10]]
+; CHECK-NEXT:    [[MATRIXEXT11_US_1:%.*]] = load double, double* [[TMP13]], align 8
+; CHECK-NEXT:    [[SUB_US_1:%.*]] = fsub double [[MATRIXEXT11_US_1]], [[MUL_US_1]]
+; CHECK-NEXT:    store double [[SUB_US_1]], double* [[TMP13]], align 8
+; CHECK-NEXT:    [[INC_US_1]] = add nuw nsw i32 [[K_013_US_1]], 1
+; CHECK-NEXT:    [[CMP2_US_1:%.*]] = icmp ult i32 [[INC_US_1]], [[I]]
+; CHECK-NEXT:    br i1 [[CMP2_US_1]], label [[FOR_BODY4_US_1]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1:%.*]]
+; CHECK:       for.cond1.for.cond.cleanup3_crit_edge.us.1:
+; CHECK-NEXT:    [[TMP14:%.*]] = add nuw nsw i64 [[CONV6]], 30
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp ult i32 [[I]], 195
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP15]])
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP14]]
+; CHECK-NEXT:    br label [[FOR_BODY4_US_2:%.*]]
+; CHECK:       for.body4.us.2:
+; CHECK-NEXT:    [[K_013_US_2:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]] ], [ [[INC_US_2:%.*]], [[FOR_BODY4_US_2]] ]
+; CHECK-NEXT:    [[NARROW16:%.*]] = add nuw nsw i32 [[K_013_US_2]], 30
+; CHECK-NEXT:    [[TMP17:%.*]] = zext i32 [[NARROW16]] to i64
+; CHECK-NEXT:    [[TMP18:%.*]] = icmp ult i32 [[K_013_US_2]], 195
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP18]])
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP17]]
+; CHECK-NEXT:    [[MATRIXEXT_US_2:%.*]] = load double, double* [[TMP19]], align 8
+; CHECK-NEXT:    [[MATRIXEXT8_US_2:%.*]] = load double, double* [[TMP16]], align 8
+; CHECK-NEXT:    [[MUL_US_2:%.*]] = fmul double [[MATRIXEXT_US_2]], [[MATRIXEXT8_US_2]]
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP17]]
+; CHECK-NEXT:    [[MATRIXEXT11_US_2:%.*]] = load double, double* [[TMP20]], align 8
+; CHECK-NEXT:    [[SUB_US_2:%.*]] = fsub double [[MATRIXEXT11_US_2]], [[MUL_US_2]]
+; CHECK-NEXT:    store double [[SUB_US_2]], double* [[TMP20]], align 8
+; CHECK-NEXT:    [[INC_US_2]] = add nuw nsw i32 [[K_013_US_2]], 1
+; CHECK-NEXT:    [[CMP2_US_2:%.*]] = icmp ult i32 [[INC_US_2]], [[I]]
+; CHECK-NEXT:    br i1 [[CMP2_US_2]], label [[FOR_BODY4_US_2]], label [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2:%.*]]
+; CHECK:       for.cond1.for.cond.cleanup3_crit_edge.us.2:
+; CHECK-NEXT:    [[TMP21:%.*]] = add nuw nsw i64 [[CONV6]], 45
+; CHECK-NEXT:    [[TMP22:%.*]] = icmp ult i32 [[I]], 180
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP22]])
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP21]]
+; CHECK-NEXT:    br label [[FOR_BODY4_US_3:%.*]]
+; CHECK:       for.body4.us.3:
+; CHECK-NEXT:    [[K_013_US_3:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]] ], [ [[INC_US_3:%.*]], [[FOR_BODY4_US_3]] ]
+; CHECK-NEXT:    [[NARROW17:%.*]] = add nuw nsw i32 [[K_013_US_3]], 45
+; CHECK-NEXT:    [[TMP24:%.*]] = zext i32 [[NARROW17]] to i64
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp ult i32 [[K_013_US_3]], 180
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP25]])
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP24]]
+; CHECK-NEXT:    [[MATRIXEXT_US_3:%.*]] = load double, double* [[TMP26]], align 8
+; CHECK-NEXT:    [[MATRIXEXT8_US_3:%.*]] = load double, double* [[TMP23]], align 8
+; CHECK-NEXT:    [[MUL_US_3:%.*]] = fmul double [[MATRIXEXT_US_3]], [[MATRIXEXT8_US_3]]
+; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP1]], i64 0, i64 [[TMP24]]
+; CHECK-NEXT:    [[MATRIXEXT11_US_3:%.*]] = load double, double* [[TMP27]], align 8
+; CHECK-NEXT:    [[SUB_US_3:%.*]] = fsub double [[MATRIXEXT11_US_3]], [[MUL_US_3]]
+; CHECK-NEXT:    store double [[SUB_US_3]], double* [[TMP27]], align 8
+; CHECK-NEXT:    [[INC_US_3]] = add nuw nsw i32 [[K_013_US_3]], 1
+; CHECK-NEXT:    [[CMP2_US_3:%.*]] = icmp ult i32 [[INC_US_3]], [[I]]
+; CHECK-NEXT:    br i1 [[CMP2_US_3]], label [[FOR_BODY4_US_3]], label [[FOR_COND_CLEANUP]]
 ;
 entry:
   %i.addr = alloca i32, align 4
Index: llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
===================================================================
--- llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
+++ llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
@@ -109,6 +109,7 @@
 ; LEGACY-NEXT:         Dominator Tree Construction
 ; LEGACY-NEXT:         Basic Alias Analysis (stateless AA impl)
 ; LEGACY-NEXT:         Function Alias Analysis Results
+; LEGACY-NEXT:         Optimize scalar/vector ops
 ; LEGACY-NEXT:         Memory SSA
 ; LEGACY-NEXT:         Natural Loop Information
 ; LEGACY-NEXT:         Canonicalize natural loops
@@ -424,6 +425,7 @@
 ; NEWPM-NEXT: Running pass: LoopSimplifyPass on f
 ; NEWPM-NEXT: Running pass: LCSSAPass on f
 ; NEWPM-NEXT: Running pass: SROA on f
+; NEWPM-NEXT: Running pass: VectorCombinePass on f
 ; NEWPM-NEXT: Running pass: MergedLoadStoreMotionPass on f
 ; NEWPM-NEXT: Running pass: GVN on f
 ; NEWPM-NEXT: Running analysis: MemoryDependenceAnalysis on f
Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
===================================================================
--- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -427,6 +427,11 @@
   MPM.add(createCFGSimplificationPass());      // Merge & remove BBs
   MPM.add(createReassociatePass());           // Reassociate expressions
 
+  // The matrix extension can introduce large vector operations early, which can
+  // benefit from running vector-combine early on.
+  if (EnableMatrix)
+    MPM.add(createVectorCombinePass());
+
   // Begin the loop pass pipeline.
   if (EnableSimpleLoopUnswitch) {
     // The simple loop unswitch pass relies on separate cleanup passes. Schedule
Index: llvm/lib/Passes/PassBuilder.cpp
===================================================================
--- llvm/lib/Passes/PassBuilder.cpp
+++ llvm/lib/Passes/PassBuilder.cpp
@@ -807,6 +807,11 @@
   // Delete small array after loop unroll.
   FPM.addPass(SROA());
 
+  // The matrix extension can introduce large vector operations early, which can
+  // benefit from running vector-combine early on.
+  if (EnableMatrix)
+    FPM.addPass(VectorCombinePass());
+
   // Eliminate redundancies.
   FPM.addPass(MergedLoadStoreMotionPass());
   if (RunNewGVN)
Index: llvm/include/llvm/IR/MatrixBuilder.h
===================================================================
--- llvm/include/llvm/IR/MatrixBuilder.h
+++ llvm/include/llvm/IR/MatrixBuilder.h
@@ -231,9 +231,25 @@
                : (IsUnsigned ? B.CreateUDiv(LHS, RHS) : B.CreateSDiv(LHS, RHS));
   }
 
-  /// Extracts the element at (\p RowIdx, \p ColumnIdx) from \p Matrix.
-  Value *CreateExtractElement(Value *Matrix, Value *RowIdx, Value *ColumnIdx,
-                              unsigned NumRows, Twine const &Name = "") {
+  /// Create an assumption that \p Idx is less than \p NumElements.
+  void CreateIndexAssumption(Value *Idx, unsigned NumElements,
+                             Twine const &Name = "") {
+
+    Value *NumElts =
+        B.getIntN(Idx->getType()->getScalarSizeInBits(), NumElements);
+
+    auto *Cmp = B.CreateICmpULT(Idx, NumElts);
+    if (!isa<llvm::ConstantInt>(Cmp)) {
+      Function *TheFn =
+          Intrinsic::getDeclaration(getModule(), Intrinsic::assume);
+      B.CreateCall(TheFn->getFunctionType(), TheFn, {Cmp}, Name);
+    }
+  }
+
+  /// Compute the index to access the element at (\p RowIdx, \p ColumnIdx) from
+  /// a matrix with \p NumRows embedded in a vector.
+  Value *CreateIndex(Value *RowIdx, Value *ColumnIdx, unsigned NumRows,
+                     Twine const &Name = "") {
 
     unsigned MaxWidth = std::max(RowIdx->getType()->getScalarSizeInBits(),
                                  ColumnIdx->getType()->getScalarSizeInBits());
@@ -241,9 +257,7 @@
     RowIdx = B.CreateZExt(RowIdx, IntTy);
     ColumnIdx = B.CreateZExt(ColumnIdx, IntTy);
     Value *NumRowsV = B.getIntN(MaxWidth, NumRows);
-    return B.CreateExtractElement(
-        Matrix, B.CreateAdd(B.CreateMul(ColumnIdx, NumRowsV), RowIdx),
-        "matext");
+    return B.CreateAdd(B.CreateMul(ColumnIdx, NumRowsV), RowIdx);
   }
 };
 
Index: clang/test/CodeGenObjC/matrix-type-operators.m
===================================================================
--- clang/test/CodeGenObjC/matrix-type-operators.m
+++ clang/test/CodeGenObjC/matrix-type-operators.m
@@ -22,9 +22,11 @@
 // CHECK-NEXT:    [[IV2_PTR:%.*]] = bitcast %0* [[IV2]] to i8*
 // CHECK-NEXT:    [[CALL1:%.*]] = call i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* [[IV2_PTR]], i8* [[SEL2]])
 // CHECK-NEXT:    [[CONV2:%.*]] = sext i32 [[CALL1]] to i64
-// CHECK-NEXT:    [[MAT:%.*]] = load <16 x double>, <16 x double>* {{.*}} align 8
 // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[CONV2]], 4
 // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[CONV]]
+// CHECK-NEXT:   [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
+// CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+// CHECK-NEXT:    [[MAT:%.*]] = load <16 x double>, <16 x double>* {{.*}} align 8
 // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <16 x double> [[MAT]], i64 [[IDX2]]
 // CHECK-NEXT:    ret double [[MATEXT]]
 //
@@ -49,12 +51,14 @@
 // CHECK-NEXT:    [[IV2_PTR:%.*]] = bitcast %0* [[IV2]] to i8*
 // CHECK-NEXT:    [[CALL1:%.*]] = call i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* [[IV2_PTR]], i8* [[SEL2]])
 // CHECK-NEXT:    [[CONV2:%.*]] = sext i32 [[CALL1]] to i64
+// CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[CONV2]], 4
+// CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[CONV]]
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
+// CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
 // CHECK-NEXT:    [[M:%.*]] = load %1*, %1** %m.addr, align 8
 // CHECK-NEXT:    [[SEL3:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load !7
 // CHECK-NEXT:    [[M_PTR:%.*]] = bitcast %1* [[M]] to i8*
 // CHECK-NEXT:    [[MAT:%.*]] = call <16 x double> bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to <16 x double> (i8*, i8*)*)(i8* [[M_PTR]], i8* [[SEL3]])
-// CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[CONV2]], 4
-// CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[CONV]]
 // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <16 x double> [[MAT]], i64 [[IDX2]]
 // CHECK-NEXT:    ret double [[MATEXT]]
 //
Index: clang/test/CodeGenCXX/matrix-type-operators.cpp
===================================================================
--- clang/test/CodeGenCXX/matrix-type-operators.cpp
+++ clang/test/CodeGenCXX/matrix-type-operators.cpp
@@ -219,6 +219,8 @@
   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [4 x i32]* {{.*}} to <4 x i32>*
+  // CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX2]], 4
+  // CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
   // CHECK-NEXT:    [[MAT:%.*]] = load <4 x i32>, <4 x i32>* [[MAT_ADDR]], align 4
   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <4 x i32> [[MAT]], i32 [[E]], i64 [[IDX2]]
   // CHECK-NEXT:    store <4 x i32> [[MATINS]], <4 x i32>* [[MAT_ADDR]], align 4
@@ -243,6 +245,8 @@
   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 3
   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [24 x float]* {{.*}} to <24 x float>*
+  // CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX2]], 24
+  // CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
   // CHECK-NEXT:    [[MAT:%.*]] = load <24 x float>, <24 x float>* [[MAT_ADDR]], align 4
   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <24 x float> [[MAT]], float [[E]], i64 [[IDX2]]
   // CHECK-NEXT:    store <24 x float> [[MATINS]], <24 x float>* [[MAT_ADDR]], align 4
@@ -315,11 +319,13 @@
   // CHECK-NEXT:    [[J:%.*]] = call i32 @_ZN15UnsignedWrappercvjEv(%struct.UnsignedWrapper* {{[^,]*}} %j)
   // CHECK-NEXT:    [[J_SUB:%.*]] = sub i32 [[J]], 1
   // CHECK-NEXT:    [[J_SUB_EXT:%.*]] = zext i32 [[J_SUB]] to i64
+  // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_SUB_EXT]], 4
+  // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_ADD_EXT]]
+  // CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
+  // CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = load [16 x double]*, [16 x double]** %m.addr, align 8
   // CHECK-NEXT:    [[MAT_ADDR2:%.*]] = bitcast [16 x double]* [[MAT_ADDR]] to <16 x double>*
   // CHECK-NEXT:    [[MAT:%.*]] = load <16 x double>, <16 x double>* [[MAT_ADDR2]], align 8
-  // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_SUB_EXT]], 4
-  // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_ADD_EXT]]
   // CHECK-NEXT:    [[MATEXT:%.*]]  = extractelement <16 x double> [[MAT]], i64 [[IDX2]]
   // CHECK-NEXT:    ret double [[MATEXT]]
   return m[i + 1][j - 1];
@@ -358,6 +364,8 @@
   // CHECK-NEXT:   [[IDX1:%.*]] = mul i64 [[I2_EXT]], 4
   // CHECK-NEXT:   [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
   // CHECK-NEXT:   [[MAT_ADDR:%.*]] = bitcast [16 x float]* %result to <16 x float>*
+  // CHECK-NEXT:   [[CMP:%.*]] = icmp ult i64 [[IDX2]], 16
+  // CHECK-NEXT:   call void @llvm.assume(i1 [[CMP]])
   // CHECK-NEXT:   [[MAT:%.*]] = load <16 x float>, <16 x float>* [[MAT_ADDR]], align 4
   // CHECK-NEXT:   [[MATINS:%.*]] = insertelement <16 x float> [[MAT]], float 1.000000e+00, i64 [[IDX2]]
   // CHECK-NEXT:   store <16 x float> [[MATINS]], <16 x float>* [[MAT_ADDR]], align 4
@@ -386,6 +394,8 @@
   // CHECK-NEXT:   [[IDX1:%.*]] = mul i64 [[I2_EXT]], 5
   // CHECK-NEXT:   [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
   // CHECK-NEXT:   [[MAT_ADDR:%.*]] = bitcast [25 x i32]* %result to <25 x i32>*
+  // CHECK-NEXT:   [[CMP:%.*]] = icmp ult i64 [[IDX2]], 25
+  // CHECK-NEXT:   call void @llvm.assume(i1 [[CMP]])
   // CHECK-NEXT:   [[MAT:%.*]] = load <25 x i32>, <25 x i32>* [[MAT_ADDR]], align 4
   // CHECK-NEXT:   [[MATINS:%.*]] = insertelement <25 x i32> [[MAT]], i32 1, i64 [[IDX2]]
   // CHECK-NEXT:   store <25 x i32> [[MATINS]], <25 x i32>* [[MAT_ADDR]], align 4
Index: clang/test/CodeGen/matrix-type-operators.c
===================================================================
--- clang/test/CodeGen/matrix-type-operators.c
+++ clang/test/CodeGen/matrix-type-operators.c
@@ -874,6 +874,8 @@
   // CHECK-NEXT:    [[K_EXT:%.*]] = zext i32 [[K]] to i64
   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K_EXT]], 2
   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
+  // CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
+  // CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
   // CHECK-NEXT:    store <6 x float> [[MATINS]], <6 x float>* [[MAT_ADDR]], align 4
@@ -890,6 +892,8 @@
   // CHECK-NEXT:    [[K:%.*]] = load i64, i64* %k.addr, align 8
   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K]], 2
   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J_EXT]]
+  // CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
+  // CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_ADDR:%.*]], align 4
   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <6 x float> [[MAT]], float [[E]], i64 [[IDX2]]
   // CHECK-NEXT:    store <6 x float> [[MATINS]], <6 x float>* [[MAT_ADDR]], align 4
@@ -907,6 +911,8 @@
   // CHECK-NEXT:    [[I2_ADD:%.*]] = add nsw i32 4, [[I2]]
   // CHECK-NEXT:    [[ADD_EXT:%.*]] = sext i32 [[I2_ADD]] to i64
   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 18, [[ADD_EXT]]
+  // CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
+  // CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
   // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* [[MAT_ADDR:%.*]], align 4
   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <27 x i32> [[MAT]], i32 [[I1]], i64 [[IDX2]]
   // CHECK-NEXT:    store <27 x i32> [[MATINS]], <27 x i32>* [[MAT_ADDR]], align 4
@@ -980,9 +986,11 @@
   // CHECK-LABEL: @extract_int(
   // CHECK:         [[J1:%.*]] = load i64, i64* %j.addr, align 8
   // CHECK-NEXT:    [[J2:%.*]] = load i64, i64* %j.addr, align 8
-  // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J2]], 9
   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[J1]]
+  // CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX2]], 27
+  // CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+  // CHECK-NEXT:    [[MAT:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <27 x i32> [[MAT]], i64 [[IDX2]]
   // CHECK-NEXT:    ret i32 [[MATEXT]]
 
@@ -995,13 +1003,15 @@
   // CHECK-LABEL: @test_extract_matrix_pointer1(
   // CHECK:         [[J:%.*]] = load i32, i32* %j.addr, align 4
   // CHECK-NEXT:    [[J_EXT:%.*]] = zext i32 [[J]] to i64
+  // CHECK-NEXT:    [[IDX:%.*]] = add i64 3, [[J_EXT]]
+  // CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX]], 6
+  // CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
   // CHECK-NEXT:    [[PTR:%.*]] = load [6 x double]**, [6 x double]*** %ptr.addr, align 8
   // CHECK-NEXT:    [[PTR_IDX:%.*]] = getelementptr inbounds [6 x double]*, [6 x double]** [[PTR]], i64 1
   // CHECK-NEXT:    [[PTR2:%.*]] = load [6 x double]*, [6 x double]** [[PTR_IDX]], align 8
   // CHECK-NEXT:    [[PTR2_IDX:%.*]] = getelementptr inbounds [6 x double], [6 x double]* [[PTR2]], i64 2
   // CHECK-NEXT:    [[MAT_ADDR:%.*]] = bitcast [6 x double]* [[PTR2_IDX]] to <6 x double>*
   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x double>, <6 x double>* [[MAT_ADDR]], align 8
-  // CHECK-NEXT:    [[IDX:%.*]] = add i64 3, [[J_EXT]]
   // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <6 x double> [[MAT]], i64 [[IDX]]
   // CHECK-NEXT:    ret double [[MATEXT]]
 
@@ -1027,13 +1037,17 @@
   // CHECK-LABEL: @insert_extract(
   // CHECK:         [[K:%.*]] = load i16, i16* %k.addr, align 2
   // CHECK-NEXT:    [[K_EXT:%.*]] = sext i16 [[K]] to i64
-  // CHECK-NEXT:    [[MAT:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR:%.*]], align 4
   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[K_EXT]], 3
   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], 0
-  // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 [[IDX]]
+  // CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX2]], 9
+  // CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+  // CHECK-NEXT:    [[MAT:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR:%.*]], align 4
+  // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <9 x float> [[MAT]], i64 [[IDX2]]
   // CHECK-NEXT:    [[J:%.*]] = load i64, i64* %j.addr, align 8
   // CHECK-NEXT:    [[IDX3:%.*]] = mul i64 [[J]], 3
   // CHECK-NEXT:    [[IDX4:%.*]] = add i64 [[IDX3]], 2
+  // CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX4]], 9
+  // CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
   // CHECK-NEXT:    [[MAT2:%.*]] = load <9 x float>, <9 x float>* [[MAT_ADDR]], align 4
   // CHECK-NEXT:    [[MATINS:%.*]] = insertelement <9 x float> [[MAT2]], float [[MATEXT]], i64 [[IDX4]]
   // CHECK-NEXT:    store <9 x float> [[MATINS]], <9 x float>* [[MAT_ADDR]], align 4
@@ -1068,9 +1082,13 @@
   // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[J_EXT]], 2
   // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[I_EXT]]
   // CHECK-NEXT:    [[MAT_PTR:%.*]] = bitcast [6 x float]* %mat to <6 x float>*
+  // CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
+  // CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
   // CHECK-NEXT:    [[MAT:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4
   // CHECK-NEXT:    [[EXT:%.*]] = extractelement <6 x float> [[MAT]], i64 [[IDX2]]
   // CHECK-NEXT:    [[SUM:%.*]] = fadd float [[EXT]], {{.*}}
+  // CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[IDX2]], 6
+  // CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
   // CHECK-NEXT:    [[MAT2:%.*]] = load <6 x float>, <6 x float>* [[MAT_PTR]], align 4
   // CHECK-NEXT:    [[INS:%.*]] = insertelement <6 x float> [[MAT2]], float [[SUM]], i64 [[IDX2]]
   // CHECK-NEXT:    store <6 x float> [[INS]], <6 x float>* [[MAT_PTR]], align 4
@@ -1085,23 +1103,29 @@
   // CHECK-NEXT:  [[I1_EXT:%.*]] = sext i32 [[I1]] to i64
   // CHECK-NEXT:  [[J1:%.*]] = load i32, i32* %j.addr, align 4
   // CHECK-NEXT:  [[J1_EXT:%.*]] = sext i32 [[J1]] to i64
-  // CHECK-NEXT:  [[A:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
   // CHECK-NEXT:  [[IDX1_1:%.*]] = mul i64 [[J1_EXT]], 9
   // CHECK-NEXT:  [[IDX1_2:%.*]] = add i64 [[IDX1_1]], [[I1_EXT]]
+  // CHECK-NEXT:  [[CMP:%.*]] = icmp ult i64 [[IDX1_2]], 27
+  // CHECK-NEXT:  call void @llvm.assume(i1 [[CMP]])
+  // CHECK-NEXT:  [[A:%.*]] = load <27 x i32>, <27 x i32>* %0, align 4
   // CHECK-NEXT:  [[MI1:%.*]] = extractelement <27 x i32> [[A]], i64 [[IDX1_2]]
   // CHECK-NEXT:  [[MI1_EXT:%.*]] = sext i32 [[MI1]] to i64
   // CHECK-NEXT:  [[J2:%.*]] = load i32, i32* %j.addr, align 4
   // CHECK-NEXT:  [[J2_EXT:%.*]] = sext i32 [[J2]] to i64
   // CHECK-NEXT:  [[I2:%.*]] = load i32, i32* %i.addr, align 4
   // CHECK-NEXT:  [[I2_EXT:%.*]] = sext i32 [[I2]] to i64
-  // CHECK-NEXT:  [[A2:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
   // CHECK-NEXT:  [[IDX2_1:%.*]] = mul i64 [[I2_EXT]], 9
   // CHECK-NEXT:  [[IDX2_2:%.*]] = add i64 [[IDX2_1]], [[J2_EXT]]
+  // CHECK-NEXT:  [[CMP:%.*]] = icmp ult i64 [[IDX2_2]], 27
+  // CHECK-NEXT:  call void @llvm.assume(i1 [[CMP]])
+  // CHECK-NEXT:  [[A2:%.*]] = load <27 x i32>, <27 x i32>* {{.*}}, align 4
   // CHECK-NEXT:  [[MI2:%.*]] = extractelement <27 x i32> [[A2]], i64 [[IDX2_2]]
   // CHECK-NEXT:  [[MI3:%.*]] = add nsw i32 [[MI2]], 2
   // CHECK-NEXT:  [[MI3_EXT:%.*]] = sext i32 [[MI3]] to i64
   // CHECK-NEXT:  [[IDX3_1:%.*]] = mul i64 [[MI3_EXT]], 5
   // CHECK-NEXT:  [[IDX3_2:%.*]] = add i64 [[IDX3_1]], [[MI1_EXT]]
+  // CHECK-NEXT:  [[CMP:%.*]] = icmp ult i64 [[IDX3_2]], 25
+  // CHECK-NEXT:  call void @llvm.assume(i1 [[CMP]])
   // CHECK-NEXT:  [[B:%.*]] = load <25 x double>, <25 x double>* [[B_PTR:%.*]], align 8
   // CHECK-NEXT:  [[INS:%.*]] = insertelement <25 x double> [[B]], double 1.500000e+00, i64 [[IDX3_2]]
   // CHECK-NEXT:  store <25 x double> [[INS]], <25 x double>* [[B_PTR]], align 8
Index: clang/lib/CodeGen/CGExprScalar.cpp
===================================================================
--- clang/lib/CodeGen/CGExprScalar.cpp
+++ clang/lib/CodeGen/CGExprScalar.cpp
@@ -1754,13 +1754,17 @@
   // integer value.
   Value *RowIdx = Visit(E->getRowIdx());
   Value *ColumnIdx = Visit(E->getColumnIdx());
+
+  auto *MatrixTy = E->getBase()->getType()->castAs<ConstantMatrixType>();
+  unsigned NumRows = MatrixTy->getNumRows();
+  llvm::MatrixBuilder<CGBuilderTy> MB(Builder);
+  Value *Idx = MB.CreateIndex(RowIdx, ColumnIdx, NumRows);
+  MB.CreateIndexAssumption(Idx, MatrixTy->getNumElementsFlattened());
+
   Value *Matrix = Visit(E->getBase());
 
   // TODO: Should we emit bounds checks with SanitizerKind::ArrayBounds?
-  llvm::MatrixBuilder<CGBuilderTy> MB(Builder);
-  return MB.CreateExtractElement(
-      Matrix, RowIdx, ColumnIdx,
-      E->getBase()->getType()->castAs<ConstantMatrixType>()->getNumRows());
+  return Builder.CreateExtractElement(Matrix, Idx, "matrixext");
 }
 
 static int getMaskElt(llvm::ShuffleVectorInst *SVI, unsigned Idx,
Index: clang/lib/CodeGen/CGExpr.cpp
===================================================================
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -35,6 +35,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/MatrixBuilder.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
@@ -1939,6 +1940,10 @@
     return EmitLoadOfGlobalRegLValue(LV);
 
   if (LV.isMatrixElt()) {
+    llvm::Value *Idx = LV.getMatrixIdx();
+    auto MatTy = LV.getType()->getAs<ConstantMatrixType>();
+    llvm::MatrixBuilder<CGBuilderTy> MB(Builder);
+    MB.CreateIndexAssumption(Idx, MatTy->getNumElementsFlattened());
     llvm::LoadInst *Load =
         Builder.CreateLoad(LV.getMatrixAddress(), LV.isVolatileQualified());
     return RValue::get(
@@ -2080,9 +2085,13 @@
       return EmitStoreThroughGlobalRegLValue(Src, Dst);
 
     if (Dst.isMatrixElt()) {
-      llvm::Value *Vec = Builder.CreateLoad(Dst.getMatrixAddress());
-      Vec = Builder.CreateInsertElement(Vec, Src.getScalarVal(),
-                                        Dst.getMatrixIdx(), "matins");
+      llvm::Value *Idx = Dst.getMatrixIdx();
+      auto MatTy = Dst.getType()->getAs<ConstantMatrixType>();
+      llvm::MatrixBuilder<CGBuilderTy> MB(Builder);
+      MB.CreateIndexAssumption(Idx, MatTy->getNumElementsFlattened());
+      llvm::Instruction *Load = Builder.CreateLoad(Dst.getMatrixAddress());
+      llvm::Value *Vec =
+          Builder.CreateInsertElement(Load, Src.getScalarVal(), Idx, "matins");
       Builder.CreateStore(Vec, Dst.getMatrixAddress(),
                           Dst.isVolatileQualified());
       return;
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to