[PATCH] D121829: [clang][AArc64][SVE] Implement vector-scalar operators

David Truby via Phabricator via cfe-commits Wed, 16 Mar 2022 10:14:34 -0700

DavidTruby created this revision.
Herald added subscribers: ctetreau, psnobl, tschuett.
Herald added a reviewer: efriedma.
Herald added a project: All.
DavidTruby requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.


This patch extends the support for C/C++ operators for SVE
types to allow one of the arguments to be a scalar, in which
case a vector splat is performed.

Depends on D121119 <https://reviews.llvm.org/D121119>


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D121829

Files:
  clang/include/clang/Sema/Sema.h
  clang/lib/CodeGen/CGExprScalar.cpp
  clang/lib/Sema/SemaExpr.cpp
  clang/test/CodeGen/aarch64-sve-vector-arith-ops.c

Index: clang/test/CodeGen/aarch64-sve-vector-arith-ops.c
===================================================================
--- clang/test/CodeGen/aarch64-sve-vector-arith-ops.c
+++ clang/test/CodeGen/aarch64-sve-vector-arith-ops.c
@@ -47,37 +47,45 @@
 
 // CHECK-LABEL: @add_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[ADD]]
 //
-svuint8_t add_u8(svuint8_t a, svuint8_t b) {
+svuint8_t add_u8(svuint8_t a, uint8_t b) {
   return a + b;
 }
 
 // CHECK-LABEL: @add_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[ADD]]
 //
-svuint16_t add_u16(svuint16_t a, svuint16_t b) {
+svuint16_t add_u16(svuint16_t a, uint16_t b) {
   return a + b;
 }
 
 // CHECK-LABEL: @add_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
 //
-svuint32_t add_u32(svuint32_t a, svuint32_t b) {
+svuint32_t add_u32(svuint32_t a, uint32_t b) {
   return a + b;
 }
 
 // CHECK-LABEL: @add_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 2 x i64> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[ADD]]
 //
-svuint64_t add_u64(svuint64_t a, svuint64_t b) {
+svuint64_t add_u64(svuint64_t a, uint64_t b) {
   return a + b;
 }
 
@@ -146,37 +154,45 @@
 
 // CHECK-LABEL: @add_inplace_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[ADD]]
 //
-svuint8_t add_inplace_u8(svuint8_t a, svuint8_t b) {
+svuint8_t add_inplace_u8(svuint8_t a, uint8_t b) {
   return a += b;
 }
 
 // CHECK-LABEL: @add_inplace_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[ADD]]
 //
-svuint16_t add_inplace_u16(svuint16_t a, svuint16_t b) {
+svuint16_t add_inplace_u16(svuint16_t a, uint16_t b) {
   return a += b;
 }
 
 // CHECK-LABEL: @add_inplace_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
 //
-svuint32_t add_inplace_u32(svuint32_t a, svuint32_t b) {
+svuint32_t add_inplace_u32(svuint32_t a, uint32_t b) {
   return a += b;
 }
 
 // CHECK-LABEL: @add_inplace_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 2 x i64> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[ADD]]
 //
-svuint64_t add_inplace_u64(svuint64_t a, svuint64_t b) {
+svuint64_t add_inplace_u64(svuint64_t a, uint64_t b) {
   return a += b;
 }
 
@@ -207,6 +223,121 @@
   return a += b;
 }
 
+// CHECK-LABEL: @add_scalar_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[ADD]]
+//
+svint8_t add_scalar_i8(svint8_t a, int8_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[ADD]]
+//
+svint16_t add_scalar_i16(svint16_t a, int16_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
+//
+svint32_t add_scalar_i32(svint32_t a, int32_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[ADD]]
+//
+svint64_t add_scalar_i64(svint64_t a, int64_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[ADD]]
+//
+svuint8_t add_scalar_u8(svuint8_t a, uint8_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[ADD]]
+//
+svuint16_t add_scalar_u16(svuint16_t a, uint16_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[ADD]]
+//
+svuint32_t add_scalar_u32(svuint32_t a, uint32_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[ADD]]
+//
+svuint64_t add_scalar_u64(svuint64_t a, uint64_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <vscale x 8 x half> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    ret <vscale x 8 x half> [[ADD]]
+//
+svfloat16_t add_scalar_f16(svfloat16_t a, svfloat16_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <vscale x 4 x float> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    ret <vscale x 4 x float> [[ADD]]
+//
+svfloat32_t add_scalar_f32(svfloat32_t a, svfloat32_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <vscale x 2 x double> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    ret <vscale x 2 x double> [[ADD]]
+//
+svfloat64_t add_scalar_f64(svfloat64_t a, svfloat64_t b) {
+  return a + b;
+}
+
 // SUBTRACTION
 
 // CHECK-LABEL: @sub_i8(
@@ -247,37 +378,45 @@
 
 // CHECK-LABEL: @sub_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[SUB]]
 //
-svuint8_t sub_u8(svuint8_t a, svuint8_t b) {
+svuint8_t sub_u8(svuint8_t a, uint8_t b) {
   return a - b;
 }
 
 // CHECK-LABEL: @sub_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[SUB]]
 //
-svuint16_t sub_u16(svuint16_t a, svuint16_t b) {
+svuint16_t sub_u16(svuint16_t a, uint16_t b) {
   return a - b;
 }
 
 // CHECK-LABEL: @sub_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[SUB]]
 //
-svuint32_t sub_u32(svuint32_t a, svuint32_t b) {
+svuint32_t sub_u32(svuint32_t a, uint32_t b) {
   return a - b;
 }
 
 // CHECK-LABEL: @sub_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 2 x i64> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[SUB]]
 //
-svuint64_t sub_u64(svuint64_t a, svuint64_t b) {
+svuint64_t sub_u64(svuint64_t a, uint64_t b) {
   return a - b;
 }
 
@@ -346,37 +485,45 @@
 
 // CHECK-LABEL: @sub_inplace_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[SUB]]
 //
-svuint8_t sub_inplace_u8(svuint8_t a, svuint8_t b) {
+svuint8_t sub_inplace_u8(svuint8_t a, uint8_t b) {
   return a - b;
 }
 
 // CHECK-LABEL: @sub_inplace_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[SUB]]
 //
-svuint16_t sub_inplace_u16(svuint16_t a, svuint16_t b) {
+svuint16_t sub_inplace_u16(svuint16_t a, uint16_t b) {
   return a - b;
 }
 
 // CHECK-LABEL: @sub_inplace_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[SUB]]
 //
-svuint32_t sub_inplace_u32(svuint32_t a, svuint32_t b) {
+svuint32_t sub_inplace_u32(svuint32_t a, uint32_t b) {
   return a - b;
 }
 
 // CHECK-LABEL: @sub_inplace_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 2 x i64> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[SUB]]
 //
-svuint64_t sub_inplace_u64(svuint64_t a, svuint64_t b) {
+svuint64_t sub_inplace_u64(svuint64_t a, uint64_t b) {
   return a - b;
 }
 
@@ -407,6 +554,121 @@
   return a - b;
 }
 
+// CHECK-LABEL: @sub_scalar_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[SUB]]
+//
+svint8_t sub_scalar_i8(svint8_t a, int8_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[SUB]]
+//
+svint16_t sub_scalar_i16(svint16_t a, int16_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[SUB]]
+//
+svint32_t sub_scalar_i32(svint32_t a, int32_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[SUB]]
+//
+svint64_t sub_scalar_i64(svint64_t a, int64_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[SUB]]
+//
+svuint8_t sub_scalar_u8(svuint8_t a, uint8_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[SUB]]
+//
+svuint16_t sub_scalar_u16(svuint16_t a, uint16_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[SUB]]
+//
+svuint32_t sub_scalar_u32(svuint32_t a, uint32_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[SUB]]
+//
+svuint64_t sub_scalar_u64(svuint64_t a, uint64_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <vscale x 8 x half> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    ret <vscale x 8 x half> [[SUB]]
+//
+svfloat16_t sub_scalar_f16(svfloat16_t a, svfloat16_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <vscale x 4 x float> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    ret <vscale x 4 x float> [[SUB]]
+//
+svfloat32_t sub_scalar_f32(svfloat32_t a, svfloat32_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <vscale x 2 x double> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    ret <vscale x 2 x double> [[SUB]]
+//
+svfloat64_t sub_scalar_f64(svfloat64_t a, svfloat64_t b) {
+  return a - b;
+}
+
 // MULTIPLICATION
 
 // CHECK-LABEL: @mul_i8(
@@ -447,37 +709,45 @@
 
 // CHECK-LABEL: @mul_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[MUL]]
 //
-svuint8_t mul_u8(svuint8_t a, svuint8_t b) {
+svuint8_t mul_u8(svuint8_t a, uint8_t b) {
   return a * b;
 }
 
 // CHECK-LABEL: @mul_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[MUL]]
 //
-svuint16_t mul_u16(svuint16_t a, svuint16_t b) {
+svuint16_t mul_u16(svuint16_t a, uint16_t b) {
   return a * b;
 }
 
 // CHECK-LABEL: @mul_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[MUL]]
 //
-svuint32_t mul_u32(svuint32_t a, svuint32_t b) {
+svuint32_t mul_u32(svuint32_t a, uint32_t b) {
   return a * b;
 }
 
 // CHECK-LABEL: @mul_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 2 x i64> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[MUL]]
 //
-svuint64_t mul_u64(svuint64_t a, svuint64_t b) {
+svuint64_t mul_u64(svuint64_t a, uint64_t b) {
   return a * b;
 }
 
@@ -546,37 +816,45 @@
 
 // CHECK-LABEL: @mul_inplace_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[MUL]]
 //
-svuint8_t mul_inplace_u8(svuint8_t a, svuint8_t b) {
+svuint8_t mul_inplace_u8(svuint8_t a, uint8_t b) {
   return a * b;
 }
 
 // CHECK-LABEL: @mul_inplace_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[MUL]]
 //
-svuint16_t mul_inplace_u16(svuint16_t a, svuint16_t b) {
+svuint16_t mul_inplace_u16(svuint16_t a, uint16_t b) {
   return a * b;
 }
 
 // CHECK-LABEL: @mul_inplace_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[MUL]]
 //
-svuint32_t mul_inplace_u32(svuint32_t a, svuint32_t b) {
+svuint32_t mul_inplace_u32(svuint32_t a, uint32_t b) {
   return a * b;
 }
 
 // CHECK-LABEL: @mul_inplace_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 2 x i64> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[MUL]]
 //
-svuint64_t mul_inplace_u64(svuint64_t a, svuint64_t b) {
+svuint64_t mul_inplace_u64(svuint64_t a, uint64_t b) {
   return a * b;
 }
 
@@ -607,6 +885,121 @@
   return a * b;
 }
 
+// CHECK-LABEL: @mul_scalar_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[MUL]]
+//
+svint8_t mul_scalar_i8(svint8_t a, int8_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[MUL]]
+//
+svint16_t mul_scalar_i16(svint16_t a, int16_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[MUL]]
+//
+svint32_t mul_scalar_i32(svint32_t a, int32_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[MUL]]
+//
+svint64_t mul_scalar_i64(svint64_t a, int64_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[MUL]]
+//
+svuint8_t mul_scalar_u8(svuint8_t a, uint8_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[MUL]]
+//
+svuint16_t mul_scalar_u16(svuint16_t a, uint16_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[MUL]]
+//
+svuint32_t mul_scalar_u32(svuint32_t a, uint32_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[MUL]]
+//
+svuint64_t mul_scalar_u64(svuint64_t a, uint64_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <vscale x 8 x half> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    ret <vscale x 8 x half> [[MUL]]
+//
+svfloat16_t mul_scalar_f16(svfloat16_t a, svfloat16_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <vscale x 4 x float> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    ret <vscale x 4 x float> [[MUL]]
+//
+svfloat32_t mul_scalar_f32(svfloat32_t a, svfloat32_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <vscale x 2 x double> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    ret <vscale x 2 x double> [[MUL]]
+//
+svfloat64_t mul_scalar_f64(svfloat64_t a, svfloat64_t b) {
+  return a * b;
+}
+
 // DIVISION
 
 // CHECK-LABEL: @div_i8(
@@ -647,37 +1040,45 @@
 
 // CHECK-LABEL: @div_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[DIV]]
 //
-svuint8_t div_u8(svuint8_t a, svuint8_t b) {
+svuint8_t div_u8(svuint8_t a, uint8_t b) {
   return a / b;
 }
 
 // CHECK-LABEL: @div_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[DIV]]
 //
-svuint16_t div_u16(svuint16_t a, svuint16_t b) {
+svuint16_t div_u16(svuint16_t a, uint16_t b) {
   return a / b;
 }
 
 // CHECK-LABEL: @div_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
 //
-svuint32_t div_u32(svuint32_t a, svuint32_t b) {
+svuint32_t div_u32(svuint32_t a, uint32_t b) {
   return a / b;
 }
 
 // CHECK-LABEL: @div_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 2 x i64> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[DIV]]
 //
-svuint64_t div_u64(svuint64_t a, svuint64_t b) {
+svuint64_t div_u64(svuint64_t a, uint64_t b) {
   return a / b;
 }
 
@@ -746,37 +1147,45 @@
 
 // CHECK-LABEL: @div_inplace_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[DIV]]
 //
-svuint8_t div_inplace_u8(svuint8_t a, svuint8_t b) {
+svuint8_t div_inplace_u8(svuint8_t a, uint8_t b) {
   return a / b;
 }
 
 // CHECK-LABEL: @div_inplace_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[DIV]]
 //
-svuint16_t div_inplace_u16(svuint16_t a, svuint16_t b) {
+svuint16_t div_inplace_u16(svuint16_t a, uint16_t b) {
   return a / b;
 }
 
 // CHECK-LABEL: @div_inplace_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
 //
-svuint32_t div_inplace_u32(svuint32_t a, svuint32_t b) {
+svuint32_t div_inplace_u32(svuint32_t a, uint32_t b) {
   return a / b;
 }
 
 // CHECK-LABEL: @div_inplace_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 2 x i64> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[DIV]]
 //
-svuint64_t div_inplace_u64(svuint64_t a, svuint64_t b) {
+svuint64_t div_inplace_u64(svuint64_t a, uint64_t b) {
   return a / b;
 }
 
@@ -807,6 +1216,121 @@
   return a / b;
 }
 
+// CHECK-LABEL: @div_scalar_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[DIV]]
+//
+svint8_t div_scalar_i8(svint8_t a, int8_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[DIV]]
+//
+svint16_t div_scalar_i16(svint16_t a, int16_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
+//
+svint32_t div_scalar_i32(svint32_t a, int32_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[DIV]]
+//
+svint64_t div_scalar_i64(svint64_t a, int64_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[DIV]]
+//
+svuint8_t div_scalar_u8(svuint8_t a, uint8_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[DIV]]
+//
+svuint16_t div_scalar_u16(svuint16_t a, uint16_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[DIV]]
+//
+svuint32_t div_scalar_u32(svuint32_t a, uint32_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[DIV]]
+//
+svuint64_t div_scalar_u64(svuint64_t a, uint64_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <vscale x 8 x half> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    ret <vscale x 8 x half> [[DIV]]
+//
+svfloat16_t div_scalar_f16(svfloat16_t a, svfloat16_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <vscale x 4 x float> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    ret <vscale x 4 x float> [[DIV]]
+//
+svfloat32_t div_scalar_f32(svfloat32_t a, svfloat32_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <vscale x 2 x double> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    ret <vscale x 2 x double> [[DIV]]
+//
+svfloat64_t div_scalar_f64(svfloat64_t a, svfloat64_t b) {
+  return a / b;
+}
+
 // REMAINDER
 
 // CHECK-LABEL: @rem_i8(
@@ -847,37 +1371,45 @@
 
 // CHECK-LABEL: @rem_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[REM]]
 //
-svuint8_t rem_u8(svuint8_t a, svuint8_t b) {
+svuint8_t rem_u8(svuint8_t a, uint8_t b) {
   return a % b;
 }
 
 // CHECK-LABEL: @rem_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[REM]]
 //
-svuint16_t rem_u16(svuint16_t a, svuint16_t b) {
+svuint16_t rem_u16(svuint16_t a, uint16_t b) {
   return a % b;
 }
 
 // CHECK-LABEL: @rem_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[REM]]
 //
-svuint32_t rem_u32(svuint32_t a, svuint32_t b) {
+svuint32_t rem_u32(svuint32_t a, uint32_t b) {
   return a % b;
 }
 
 // CHECK-LABEL: @rem_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 2 x i64> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[REM]]
 //
-svuint64_t rem_u64(svuint64_t a, svuint64_t b) {
+svuint64_t rem_u64(svuint64_t a, uint64_t b) {
   return a % b;
 }
 
@@ -919,36 +1451,132 @@
 
 // CHECK-LABEL: @rem_inplace_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 16 x i8> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[REM]]
 //
-svuint8_t rem_inplace_u8(svuint8_t a, svuint8_t b) {
+svuint8_t rem_inplace_u8(svuint8_t a, uint8_t b) {
   return a % b;
 }
 
 // CHECK-LABEL: @rem_inplace_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 8 x i16> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[REM]]
 //
-svuint16_t rem_inplace_u16(svuint16_t a, svuint16_t b) {
+svuint16_t rem_inplace_u16(svuint16_t a, uint16_t b) {
   return a % b;
 }
 
 // CHECK-LABEL: @rem_inplace_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 4 x i32> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[REM]]
 //
-svuint32_t rem_inplace_u32(svuint32_t a, svuint32_t b) {
+svuint32_t rem_inplace_u32(svuint32_t a, uint32_t b) {
   return a % b;
 }
 
 // CHECK-LABEL: @rem_inplace_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 2 x i64> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[REM]]
+//
+svuint64_t rem_inplace_u64(svuint64_t a, uint64_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = srem <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[REM]]
+//
+svint8_t rem_scalar_i8(svint8_t a, int8_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = srem <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[REM]]
+//
+svint16_t rem_scalar_i16(svint16_t a, int16_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = srem <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[REM]]
+//
+svint32_t rem_scalar_i32(svint32_t a, int32_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = srem <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 2 x i64> [[REM]]
+//
+svint64_t rem_scalar_i64(svint64_t a, int64_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[SPLAT_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 16 x i8> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 16 x i8> [[REM]]
+//
+svuint8_t rem_scalar_u8(svuint8_t a, uint8_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[SPLAT_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 8 x i16> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 8 x i16> [[REM]]
+//
+svuint16_t rem_scalar_u16(svuint16_t a, uint16_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[SPLAT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 4 x i32> [[A:%.*]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[REM]]
+//
+svuint32_t rem_scalar_u32(svuint32_t a, uint32_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[B:%.*]], i32 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[SPLAT_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <vscale x 2 x i64> [[A:%.*]], [[SPLAT_SPLAT]]
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[REM]]
 //
-svuint64_t rem_inplace_u64(svuint64_t a, svuint64_t b) {
+svuint64_t rem_scalar_u64(svuint64_t a, uint64_t b) {
   return a % b;
 }
Index: clang/lib/Sema/SemaExpr.cpp
===================================================================
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -10465,7 +10465,17 @@
 
 QualType Sema::CheckSizelessVectorOperands(ExprResult &LHS, ExprResult &RHS,
                                            SourceLocation Loc,
+                                           bool IsCompAssign,
                                            ArithConvKind OperationKind) {
+  if (!IsCompAssign) {
+    LHS = DefaultFunctionArrayLvalueConversion(LHS.get());
+    if (LHS.isInvalid())
+      return QualType();
+  }
+  RHS = DefaultFunctionArrayLvalueConversion(RHS.get());
+  if (RHS.isInvalid())
+    return QualType();
+
   QualType LHSType = LHS.get()->getType().getUnqualifiedType();
   QualType RHSType = RHS.get()->getType().getUnqualifiedType();
 
@@ -10483,6 +10493,26 @@
   if (Context.hasSameType(LHSType, RHSType))
     return LHSType;
 
+  auto tryScalableVectorConvert = [this](ExprResult *Src, QualType SrcType,
+                                         QualType DestType) {
+    const QualType DestBaseType = DestType->getSveEltType(Context);
+    if (DestBaseType->getUnqualifiedDesugaredType() ==
+        SrcType->getUnqualifiedDesugaredType()) {
+      unsigned DiagID = diag::err_typecheck_invalid_operands;
+      if (!tryVectorConvertAndSplat(*this, Src, SrcType, DestBaseType, DestType,
+                                    DiagID))
+        return DestType;
+    }
+    return QualType();
+  };
+
+  if (LHSType->isVLSTBuiltinType() && !RHSType->isVLSTBuiltinType())
+    return tryScalableVectorConvert(&RHS, RHSType, LHSType);
+
+  if (RHSType->isVLSTBuiltinType() && !LHSType->isVLSTBuiltinType())
+    return tryScalableVectorConvert((IsCompAssign ? nullptr : &LHS), LHSType,
+                                    RHSType);
+
   Diag(Loc, DiagID) << LHSType << RHSType << LHS.get()->getSourceRange()
                     << RHS.get()->getSourceRange();
   return QualType();
@@ -10602,7 +10632,8 @@
                                /*AllowBooleanOperation*/ false,
                                /*ReportInvalid*/ true);
   if (LHSTy->isVLSTBuiltinType() || RHSTy->isVLSTBuiltinType())
-    return CheckSizelessVectorOperands(LHS, RHS, Loc, ACK_Arithmetic);
+    return CheckSizelessVectorOperands(LHS, RHS, Loc, IsCompAssign,
+                                       ACK_Arithmetic);
   if (!IsDiv &&
       (LHSTy->isConstantMatrixType() || RHSTy->isConstantMatrixType()))
     return CheckMatrixMultiplyOperands(LHS, RHS, Loc, IsCompAssign);
@@ -10642,17 +10673,12 @@
     return InvalidOperands(Loc, LHS, RHS);
   }
 
-  if (LHS.get()->getType()->isVLSTBuiltinType() &&
+  if (LHS.get()->getType()->isVLSTBuiltinType() ||
       RHS.get()->getType()->isVLSTBuiltinType()) {
-    if (LHS.get()
-            ->getType()
-            ->getSveEltType(Context)
-            ->hasIntegerRepresentation() &&
-        RHS.get()
-            ->getType()
-            ->getSveEltType(Context)
-            ->hasIntegerRepresentation())
-      return CheckSizelessVectorOperands(LHS, RHS, Loc, ACK_Arithmetic);
+    if (LHS.get()->getType()->hasIntegerRepresentation() &&
+        RHS.get()->getType()->hasIntegerRepresentation())
+      return CheckSizelessVectorOperands(LHS, RHS, Loc, IsCompAssign,
+                                         ACK_Arithmetic);
 
     return InvalidOperands(Loc, LHS, RHS);
   }
@@ -10967,7 +10993,7 @@
   if (LHS.get()->getType()->isVLSTBuiltinType() ||
       RHS.get()->getType()->isVLSTBuiltinType()) {
     QualType compType =
-        CheckSizelessVectorOperands(LHS, RHS, Loc, ACK_Arithmetic);
+        CheckSizelessVectorOperands(LHS, RHS, Loc, CompLHSTy, ACK_Arithmetic);
     if (CompLHSTy)
       *CompLHSTy = compType;
     return compType;
@@ -11082,7 +11108,7 @@
   if (LHS.get()->getType()->isVLSTBuiltinType() ||
       RHS.get()->getType()->isVLSTBuiltinType()) {
     QualType compType =
-        CheckSizelessVectorOperands(LHS, RHS, Loc, ACK_Arithmetic);
+        CheckSizelessVectorOperands(LHS, RHS, Loc, CompLHSTy, ACK_Arithmetic);
     if (CompLHSTy)
       *CompLHSTy = compType;
     return compType;
@@ -12897,7 +12923,8 @@
       RHS.get()->getType()->isVLSTBuiltinType()) {
     if (LHS.get()->getType()->hasIntegerRepresentation() &&
         RHS.get()->getType()->hasIntegerRepresentation())
-      return CheckSizelessVectorOperands(LHS, RHS, Loc, ACK_BitwiseOp);
+      return CheckSizelessVectorOperands(LHS, RHS, Loc, IsCompAssign,
+                                         ACK_BitwiseOp);
     return InvalidOperands(Loc, LHS, RHS);
   }
 
@@ -12905,7 +12932,8 @@
       RHS.get()->getType()->isVLSTBuiltinType()) {
     if (LHS.get()->getType()->hasIntegerRepresentation() &&
         RHS.get()->getType()->hasIntegerRepresentation())
-      return CheckSizelessVectorOperands(LHS, RHS, Loc, ACK_BitwiseOp);
+      return CheckSizelessVectorOperands(LHS, RHS, Loc, IsCompAssign,
+                                         ACK_BitwiseOp);
     return InvalidOperands(Loc, LHS, RHS);
   }
 
Index: clang/lib/CodeGen/CGExprScalar.cpp
===================================================================
--- clang/lib/CodeGen/CGExprScalar.cpp
+++ clang/lib/CodeGen/CGExprScalar.cpp
@@ -32,6 +32,7 @@
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/FixedPointBuilder.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -40,6 +41,7 @@
 #include "llvm/IR/IntrinsicsPowerPC.h"
 #include "llvm/IR/MatrixBuilder.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/TypeSize.h"
 #include <cstdarg>
 
 using namespace clang;
@@ -2329,9 +2331,10 @@
   }
   case CK_VectorSplat: {
     llvm::Type *DstTy = ConvertType(DestTy);
-    Value *Elt = Visit(const_cast<Expr*>(E));
+    Value *Elt = Visit(const_cast<Expr *>(E));
     // Splat the element across to all elements
-    unsigned NumElements = cast<llvm::FixedVectorType>(DstTy)->getNumElements();
+    llvm::ElementCount NumElements =
+        cast<llvm::VectorType>(DstTy)->getElementCount();
     return Builder.CreateVectorSplat(NumElements, Elt, "splat");
   }
 
Index: clang/include/clang/Sema/Sema.h
===================================================================
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -11943,7 +11943,7 @@
 
   // type checking for sizeless vector binary operators.
   QualType CheckSizelessVectorOperands(ExprResult &LHS, ExprResult &RHS,
-                                       SourceLocation Loc,
+                                       SourceLocation Loc, bool IsCompAssign,
                                        ArithConvKind OperationKind);
 
   /// Type checking for matrix binary operators.

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D121829: [clang][AArc64][SVE] Implement vector-scalar operators

Reply via email to