Conanap updated this revision to Diff 281974.
Conanap marked 2 inline comments as done.
Conanap removed a reviewer: power-llvm-team.
Conanap added a comment.
Herald added a subscriber: kbarton.
Some updates on formatting and updated to match an updated test file.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D82502/new/
https://reviews.llvm.org/D82502
Files:
clang/lib/Headers/altivec.h
clang/test/CodeGen/builtins-ppc-p10vector.c
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCISelLowering.h
llvm/lib/Target/PowerPC/PPCInstrPrefix.td
llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
Index: llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
@@ -1,13 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-; RUN: FileCheck %s
+; RUN: FileCheck %s --check-prefix=CHECK-LE
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-; RUN: FileCheck %s
+; RUN: FileCheck %s --check-prefix=CHECK-BE
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O0 \
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-; RUN: FileCheck %s --check-prefix=CHECK-O0
+; RUN: FileCheck %s --check-prefix=CHECK-O0
; These test cases aims to test the builtins for the Power10 VSX vector
; instructions introduced in ISA 3.1.
@@ -15,13 +15,21 @@
declare i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8>, i32)
define signext i32 @test_vec_test_lsbb_all_ones(<16 x i8> %vuca) {
-; CHECK-LABEL: test_vec_test_lsbb_all_ones:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvtlsbb cr0, v2
-; CHECK-NEXT: mfocrf r3, 128
-; CHECK-NEXT: srwi r3, r3, 31
-; CHECK-NEXT: extsw r3, r3
-; CHECK-NEXT: blr
+; CHECK-LE-LABEL: test_vec_test_lsbb_all_ones:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: xvtlsbb cr0, v2
+; CHECK-LE-NEXT: mfocrf r3, 128
+; CHECK-LE-NEXT: srwi r3, r3, 31
+; CHECK-LE-NEXT: extsw r3, r3
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: test_vec_test_lsbb_all_ones:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: xvtlsbb cr0, v2
+; CHECK-BE-NEXT: mfocrf r3, 128
+; CHECK-BE-NEXT: srwi r3, r3, 31
+; CHECK-BE-NEXT: extsw r3, r3
+; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: test_vec_test_lsbb_all_ones:
; CHECK-O0: # %bb.0: # %entry
@@ -36,13 +44,21 @@
}
define signext i32 @test_vec_test_lsbb_all_zeros(<16 x i8> %vuca) {
-; CHECK-LABEL: test_vec_test_lsbb_all_zeros:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvtlsbb cr0, v2
-; CHECK-NEXT: mfocrf r3, 128
-; CHECK-NEXT: rlwinm r3, r3, 3, 31, 31
-; CHECK-NEXT: extsw r3, r3
-; CHECK-NEXT: blr
+; CHECK-LE-LABEL: test_vec_test_lsbb_all_zeros:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: xvtlsbb cr0, v2
+; CHECK-LE-NEXT: mfocrf r3, 128
+; CHECK-LE-NEXT: rlwinm r3, r3, 3, 31, 31
+; CHECK-LE-NEXT: extsw r3, r3
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: test_vec_test_lsbb_all_zeros:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: xvtlsbb cr0, v2
+; CHECK-BE-NEXT: mfocrf r3, 128
+; CHECK-BE-NEXT: rlwinm r3, r3, 3, 31, 31
+; CHECK-BE-NEXT: extsw r3, r3
+; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: test_vec_test_lsbb_all_zeros:
; CHECK-O0: # %bb.0: # %entry
@@ -57,10 +73,15 @@
}
define void @vec_xst_trunc_sc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) {
-; CHECK-LABEL: vec_xst_trunc_sc:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: stxvrbx v2, r6, r5
-; CHECK-NEXT: blr
+; CHECK-LE-LABEL: vec_xst_trunc_sc:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: stxvrbx v2, r6, r5
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_sc:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: stxvrbx v2, r6, r5
+; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xst_trunc_sc:
; CHECK-O0: # %bb.0: # %entry
@@ -79,10 +100,15 @@
}
define void @vec_xst_trunc_uc(<1 x i128> %__vec, i64 %__offset, i8* nocapture %__ptr) {
-; CHECK-LABEL: vec_xst_trunc_uc:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: stxvrbx v2, r6, r5
-; CHECK-NEXT: blr
+; CHECK-LE-LABEL: vec_xst_trunc_uc:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: stxvrbx v2, r6, r5
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_uc:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: stxvrbx v2, r6, r5
+; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xst_trunc_uc:
; CHECK-O0: # %bb.0: # %entry
@@ -101,11 +127,17 @@
}
define void @vec_xst_trunc_ss(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) {
-; CHECK-LABEL: vec_xst_trunc_ss:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: sldi r3, r5, 1
-; CHECK-NEXT: stxvrhx v2, r6, r3
-; CHECK-NEXT: blr
+; CHECK-LE-LABEL: vec_xst_trunc_ss:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: sldi r3, r5, 1
+; CHECK-LE-NEXT: stxvrhx v2, r6, r3
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_ss:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: sldi r3, r5, 1
+; CHECK-BE-NEXT: stxvrhx v2, r6, r3
+; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xst_trunc_ss:
; CHECK-O0: # %bb.0: # %entry
@@ -125,11 +157,17 @@
}
define void @vec_xst_trunc_us(<1 x i128> %__vec, i64 %__offset, i16* nocapture %__ptr) {
-; CHECK-LABEL: vec_xst_trunc_us:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: sldi r3, r5, 1
-; CHECK-NEXT: stxvrhx v2, r6, r3
-; CHECK-NEXT: blr
+; CHECK-LE-LABEL: vec_xst_trunc_us:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: sldi r3, r5, 1
+; CHECK-LE-NEXT: stxvrhx v2, r6, r3
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_us:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: sldi r3, r5, 1
+; CHECK-BE-NEXT: stxvrhx v2, r6, r3
+; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xst_trunc_us:
; CHECK-O0: # %bb.0: # %entry
@@ -149,11 +187,17 @@
}
define void @vec_xst_trunc_si(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) {
-; CHECK-LABEL: vec_xst_trunc_si:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: sldi r3, r5, 2
-; CHECK-NEXT: stxvrwx v2, r6, r3
-; CHECK-NEXT: blr
+; CHECK-LE-LABEL: vec_xst_trunc_si:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: sldi r3, r5, 2
+; CHECK-LE-NEXT: stxvrwx v2, r6, r3
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_si:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: sldi r3, r5, 2
+; CHECK-BE-NEXT: stxvrwx v2, r6, r3
+; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xst_trunc_si:
; CHECK-O0: # %bb.0: # %entry
@@ -173,11 +217,17 @@
}
define void @vec_xst_trunc_ui(<1 x i128> %__vec, i64 %__offset, i32* nocapture %__ptr) {
-; CHECK-LABEL: vec_xst_trunc_ui:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: sldi r3, r5, 2
-; CHECK-NEXT: stxvrwx v2, r6, r3
-; CHECK-NEXT: blr
+; CHECK-LE-LABEL: vec_xst_trunc_ui:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: sldi r3, r5, 2
+; CHECK-LE-NEXT: stxvrwx v2, r6, r3
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_ui:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: sldi r3, r5, 2
+; CHECK-BE-NEXT: stxvrwx v2, r6, r3
+; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xst_trunc_ui:
; CHECK-O0: # %bb.0: # %entry
@@ -197,11 +247,17 @@
}
define void @vec_xst_trunc_sll(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr) {
-; CHECK-LABEL: vec_xst_trunc_sll:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: sldi r3, r5, 3
-; CHECK-NEXT: stxvrdx v2, r6, r3
-; CHECK-NEXT: blr
+; CHECK-LE-LABEL: vec_xst_trunc_sll:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: sldi r3, r5, 3
+; CHECK-LE-NEXT: stxvrdx v2, r6, r3
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_sll:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: sldi r3, r5, 3
+; CHECK-BE-NEXT: stxvrdx v2, r6, r3
+; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xst_trunc_sll:
; CHECK-O0: # %bb.0: # %entry
@@ -219,11 +275,17 @@
}
define void @vec_xst_trunc_ull(<1 x i128> %__vec, i64 %__offset, i64* nocapture %__ptr) {
-; CHECK-LABEL: vec_xst_trunc_ull:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: sldi r3, r5, 3
-; CHECK-NEXT: stxvrdx v2, r6, r3
-; CHECK-NEXT: blr
+; CHECK-LE-LABEL: vec_xst_trunc_ull:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: sldi r3, r5, 3
+; CHECK-LE-NEXT: stxvrdx v2, r6, r3
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: vec_xst_trunc_ull:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: sldi r3, r5, 3
+; CHECK-BE-NEXT: stxvrdx v2, r6, r3
+; CHECK-BE-NEXT: blr
;
; CHECK-O0-LABEL: vec_xst_trunc_ull:
; CHECK-O0: # %bb.0: # %entry
@@ -239,3 +301,236 @@
store i64 %conv, i64* %add.ptr, align 8
ret void
}
+
+define dso_local <1 x i128> @vec_xl_zext(i64 %__offset, i8* nocapture readonly %__pointer) {
+; CHECK-LE-LABEL: vec_xl_zext:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: lxvrbx v2, r4, r3
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: vec_xl_zext:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lxvrbx v2, r4, r3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-O0-LABEL: vec_xl_zext:
+; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: lxvrbx vs0, r4, r3
+; CHECK-O0-NEXT: xxlor v2, vs0, vs0
+; CHECK-O0-NEXT: blr
+entry:
+ %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset
+ %0 = load i8, i8* %add.ptr, align 1
+ %conv = zext i8 %0 to i128
+ %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+ ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_short(i64 %__offset, i16* nocapture readonly %__pointer) {
+; CHECK-LE-LABEL: vec_xl_zext_short:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: sldi r3, r3, 1
+; CHECK-LE-NEXT: lxvrhx v2, r4, r3
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: vec_xl_zext_short:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: sldi r3, r3, 1
+; CHECK-BE-NEXT: lxvrhx v2, r4, r3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-O0-LABEL: vec_xl_zext_short:
+; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: sldi r3, r3, 1
+; CHECK-O0-NEXT: lxvrhx vs0, r4, r3
+; CHECK-O0-NEXT: xxlor v2, vs0, vs0
+; CHECK-O0-NEXT: blr
+entry:
+ %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset
+ %0 = load i16, i16* %add.ptr, align 2
+ %conv = zext i16 %0 to i128
+ %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+ ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_word(i64 %__offset, i32* nocapture readonly %__pointer) {
+; CHECK-LE-LABEL: vec_xl_zext_word:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: sldi r3, r3, 2
+; CHECK-LE-NEXT: lxvrwx v2, r4, r3
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: vec_xl_zext_word:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: sldi r3, r3, 2
+; CHECK-BE-NEXT: lxvrwx v2, r4, r3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-O0-LABEL: vec_xl_zext_word:
+; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: sldi r3, r3, 2
+; CHECK-O0-NEXT: lxvrwx vs0, r4, r3
+; CHECK-O0-NEXT: xxlor v2, vs0, vs0
+; CHECK-O0-NEXT: blr
+entry:
+ %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset
+ %0 = load i32, i32* %add.ptr, align 4
+ %conv = zext i32 %0 to i128
+ %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+ ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_zext_dw(i64 %__offset, i64* nocapture readonly %__pointer) {
+; CHECK-LE-LABEL: vec_xl_zext_dw:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: sldi r3, r3, 3
+; CHECK-LE-NEXT: lxvrdx v2, r4, r3
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: vec_xl_zext_dw:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: sldi r3, r3, 3
+; CHECK-BE-NEXT: lxvrdx v2, r4, r3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-O0-LABEL: vec_xl_zext_dw:
+; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: sldi r3, r3, 3
+; CHECK-O0-NEXT: lxvrdx vs0, r4, r3
+; CHECK-O0-NEXT: xxlor v2, vs0, vs0
+; CHECK-O0-NEXT: blr
+entry:
+ %add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset
+ %0 = load i64, i64* %add.ptr, align 8
+ %conv = zext i64 %0 to i128
+ %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+ ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_sext_b(i64 %offset, i8* %p) {
+; CHECK-LE-LABEL: vec_xl_sext_b:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: lbzx r3, r4, r3
+; CHECK-LE-NEXT: extsb r3, r3
+; CHECK-LE-NEXT: sradi r4, r3, 63
+; CHECK-LE-NEXT: mtvsrdd v2, r4, r3
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: vec_xl_sext_b:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: lbzx r3, r4, r3
+; CHECK-BE-NEXT: extsb r3, r3
+; CHECK-BE-NEXT: sradi r4, r3, 63
+; CHECK-BE-NEXT: mtvsrdd v2, r4, r3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-O0-LABEL: vec_xl_sext_b:
+; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: lbzx r3, r4, r3
+; CHECK-O0-NEXT: extsb r3, r3
+; CHECK-O0-NEXT: sradi r4, r3, 63
+; CHECK-O0-NEXT: mtvsrdd v2, r4, r3
+; CHECK-O0-NEXT: blr
+entry:
+ %add.ptr = getelementptr inbounds i8, i8* %p, i64 %offset
+ %0 = load i8, i8* %add.ptr, align 1
+ %conv = sext i8 %0 to i128
+ %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+ ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_sext_h(i64 %offset, i16* %p) {
+; CHECK-LE-LABEL: vec_xl_sext_h:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: sldi r3, r3, 1
+; CHECK-LE-NEXT: lhax r3, r4, r3
+; CHECK-LE-NEXT: sradi r4, r3, 63
+; CHECK-LE-NEXT: mtvsrdd v2, r4, r3
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: vec_xl_sext_h:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: sldi r3, r3, 1
+; CHECK-BE-NEXT: lhax r3, r4, r3
+; CHECK-BE-NEXT: sradi r4, r3, 63
+; CHECK-BE-NEXT: mtvsrdd v2, r4, r3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-O0-LABEL: vec_xl_sext_h:
+; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: sldi r3, r3, 1
+; CHECK-O0-NEXT: lhax r3, r4, r3
+; CHECK-O0-NEXT: sradi r4, r3, 63
+; CHECK-O0-NEXT: mtvsrdd v2, r4, r3
+; CHECK-O0-NEXT: blr
+entry:
+ %add.ptr = getelementptr inbounds i16, i16* %p, i64 %offset
+ %0 = load i16, i16* %add.ptr, align 2
+ %conv = sext i16 %0 to i128
+ %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+ ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_sext_w(i64 %offset, i32* %p) {
+; CHECK-LE-LABEL: vec_xl_sext_w:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: sldi r3, r3, 2
+; CHECK-LE-NEXT: lwax r3, r4, r3
+; CHECK-LE-NEXT: sradi r4, r3, 63
+; CHECK-LE-NEXT: mtvsrdd v2, r4, r3
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: vec_xl_sext_w:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: sldi r3, r3, 2
+; CHECK-BE-NEXT: lwax r3, r4, r3
+; CHECK-BE-NEXT: sradi r4, r3, 63
+; CHECK-BE-NEXT: mtvsrdd v2, r4, r3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-O0-LABEL: vec_xl_sext_w:
+; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: sldi r3, r3, 2
+; CHECK-O0-NEXT: lwax r3, r4, r3
+; CHECK-O0-NEXT: sradi r4, r3, 63
+; CHECK-O0-NEXT: mtvsrdd v2, r4, r3
+; CHECK-O0-NEXT: blr
+entry:
+ %add.ptr = getelementptr inbounds i32, i32* %p, i64 %offset
+ %0 = load i32, i32* %add.ptr, align 4
+ %conv = sext i32 %0 to i128
+ %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+ ret <1 x i128> %splat.splatinsert
+}
+
+define dso_local <1 x i128> @vec_xl_sext_d(i64 %offset, i64* %p) {
+; CHECK-LE-LABEL: vec_xl_sext_d:
+; CHECK-LE: # %bb.0: # %entry
+; CHECK-LE-NEXT: sldi r3, r3, 3
+; CHECK-LE-NEXT: ldx r3, r4, r3
+; CHECK-LE-NEXT: sradi r4, r3, 63
+; CHECK-LE-NEXT: mtvsrdd v2, r4, r3
+; CHECK-LE-NEXT: blr
+;
+; CHECK-BE-LABEL: vec_xl_sext_d:
+; CHECK-BE: # %bb.0: # %entry
+; CHECK-BE-NEXT: sldi r3, r3, 3
+; CHECK-BE-NEXT: ldx r3, r4, r3
+; CHECK-BE-NEXT: sradi r4, r3, 63
+; CHECK-BE-NEXT: mtvsrdd v2, r4, r3
+; CHECK-BE-NEXT: blr
+;
+; CHECK-O0-LABEL: vec_xl_sext_d:
+; CHECK-O0: # %bb.0: # %entry
+; CHECK-O0-NEXT: sldi r3, r3, 3
+; CHECK-O0-NEXT: ldx r3, r4, r3
+; CHECK-O0-NEXT: sradi r4, r3, 63
+; CHECK-O0-NEXT: mtvsrdd v2, r4, r3
+; CHECK-O0-NEXT: blr
+entry:
+ %add.ptr = getelementptr inbounds i64, i64* %p, i64 %offset
+ %0 = load i64, i64* %add.ptr, align 8
+ %conv = sext i64 %0 to i128
+ %splat.splatinsert = insertelement <1 x i128> undef, i128 %conv, i32 0
+ ret <1 x i128> %splat.splatinsert
+}
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -18,6 +18,15 @@
// address computations).
class isPCRel { bit PCRel = 1; }
+// PowerPC specific type constraints.
+def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [
+ SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
+]>;
+
+// PPC Specific DAG Nodes.
+def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX,
+ [SDNPHasChain, SDNPMayLoad]>;
+
// Top-level class for prefixed instructions.
class PI<bits<6> pref, bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin> : Instruction {
@@ -1134,6 +1143,15 @@
(EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>;
def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)),
(EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
+
+ def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 8)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRBX xoaddr:$src), VRRC))>;
+ def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 16)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRHX xoaddr:$src), VRRC))>;
+ def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 32)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRWX xoaddr:$src), VRRC))>;
+ def : Pat <(v1i128 (PPClxvrzx xoaddr:$src, 64)),
+ (v1i128 (COPY_TO_REGCLASS (LXVRDX xoaddr:$src), VRRC))>;
}
let AddedComplexity = 400, Predicates = [IsISA3_1] in {
Index: llvm/lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -477,6 +477,12 @@
/// an xxswapd.
LXVD2X,
+ /// LXVRZX - Load VSX Vector Rightmost and Zero Extend
+ /// This node represents v1i128 BUILD_VECTOR of a zero extending load
+ /// instruction from <byte, halfword, word, or doubleword> to i128.
+ /// Allows utilization of the Load VSX Vector Rightmost Instructions.
+ LXVRZX,
+
/// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
/// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
/// the vector type to load vector in big-endian element order.
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1434,6 +1434,7 @@
case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
+ case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
}
return nullptr;
}
@@ -13383,6 +13384,45 @@
return SDValue();
}
+// Look for the pattern of a load from a narrow width to i128, feeding
+// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
+// (LXVRZX). This node represents a zero extending load that will be matched
+// to the Load VSX Vector Rightmost instructions.
+static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
+ SDLoc dl(N);
+
+ // This combine is only eligible for a BUILD_VECTOR of v1i128.
+ // Other return types are not valid for the LXVRZX replacement.
+ if (N->getValueType(0) != MVT::v1i128)
+ return SDValue();
+
+ SDValue Operand = N->getOperand(0);
+ // Proceed with the transformation if the operand to the BUILD_VECTOR
+ // is a load instruction.
+ if (Operand.getOpcode() != ISD::LOAD)
+ return SDValue();
+
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
+ EVT MemoryType = LD->getMemoryVT();
+
+ // This transformation is only valid if the we are loading either a byte,
+ // halfword, word, or doubleword.
+ bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
+ MemoryType == MVT::i32 || MemoryType == MVT::i64;
+
+ // Ensure that the load from the narrow width is being zero extended to i128.
+ if (!ValidLDType || (LD->getValueType(0) != MVT::i128) ||
+ (LD->getExtensionType() != ISD::ZEXTLOAD && LD->getExtensionType() != ISD::EXTLOAD))
+ return SDValue();
+
+ SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr(),
+ DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), dl)};
+
+ return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, dl,
+ DAG.getVTList(MVT::v1i128, MVT::Other),
+ LoadOps, MemoryType, LD->getMemOperand());
+}
+
SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
DAGCombinerInfo &DCI) const {
assert(N->getOpcode() == ISD::BUILD_VECTOR &&
@@ -13420,6 +13460,14 @@
return Reduced;
}
+ // On Power10, the Load VSX Vector Rightmost instructions can be utilized
+ // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
+ // is a load from <valid narrow width> to i128.
+ if (Subtarget.isISA3_1()) {
+ SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
+ if (BVOfZLoad)
+ return BVOfZLoad;
+ }
if (N->getValueType(0) != MVT::v2f64)
return SDValue();
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -20,10 +20,14 @@
vector unsigned __int128 vui128a, vui128b, vui128c;
vector float vfa, vfb;
vector double vda, vdb;
-unsigned int uia, uib;
-unsigned char uca;
-unsigned short usa;
-unsigned long long ulla;
+signed int *iap;
+unsigned int uia, uib, *uiap;
+signed char *cap;
+unsigned char uca, *ucap;
+signed short *sap;
+unsigned short usa, *usap;
+signed long long *llap, llb;
+unsigned long long ulla, *ullap;
vector unsigned long long test_vpdepd(void) {
// CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64>
@@ -634,3 +638,59 @@
// CHECK-NEXT: ret i32
return vec_test_lsbb_all_zeros(vuca);
}
+
+vector signed __int128 test_vec_xl_sext_i8(void) {
+ // CHECK: load i8
+ // CHECK: sext i8
+ // CHECK: ret <1 x i128>
+ return vec_xl_sext(llb, cap);
+}
+
+vector signed __int128 test_vec_xl_sext_i16(void) {
+ // CHECK: load i16
+ // CHECK: sext i16
+ // CHECK: ret <1 x i128>
+ return vec_xl_sext(llb, sap);
+}
+
+vector signed __int128 test_vec_xl_sext_i32(void) {
+ // CHECK: load i32
+ // CHECK: sext i32
+ // CHECK: ret <1 x i128>
+ return vec_xl_sext(llb, iap);
+}
+
+vector signed __int128 test_vec_xl_sext_i64(void) {
+ // CHECK: load i64
+ // CHECK: sext i64
+ // CHECK: ret <1 x i128>
+ return vec_xl_sext(llb, llap);
+}
+
+vector unsigned __int128 test_vec_xl_zext_i8(void) {
+ // CHECK: load i8
+ // CHECK: zext i8
+ // CHECK: ret <1 x i128>
+ return vec_xl_zext(llb, ucap);
+}
+
+vector unsigned __int128 test_vec_xl_zext_i16(void) {
+ // CHECK: load i16
+ // CHECK: zext i16
+ // CHECK: ret <1 x i128>
+ return vec_xl_zext(llb, usap);
+}
+
+vector unsigned __int128 test_vec_xl_zext_i32(void) {
+ // CHECK: load i32
+ // CHECK: zext i32
+ // CHECK: ret <1 x i128>
+ return vec_xl_zext(llb, uiap);
+}
+
+vector unsigned __int128 test_vec_xl_zext_i64(void) {
+ // CHECK: load i64
+ // CHECK: zext i64
+ // CHECK: ret <1 x i128>
+ return vec_xl_zext(llb, ullap);
+}
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -16509,6 +16509,54 @@
#define vec_xl_be vec_xl
#endif
+#if defined(__POWER10_VECTOR__) && defined(__VSX__)
+
+/* vect_xl_sext */
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_sext(signed long long __offset, signed char *__pointer) {
+ return (vector unsigned __int128)*(__pointer + __offset);
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_sext(signed long long __offset, signed short *__pointer) {
+ return (vector unsigned __int128)*(__pointer + __offset);
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_sext(signed long long __offset, signed int *__pointer) {
+ return (vector unsigned __int128)*(__pointer + __offset);
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_sext(signed long long __offset, signed long long *__pointer) {
+ return (vector unsigned __int128)*(__pointer + __offset);
+}
+
+/* vec_xl_zext */
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_zext(signed long long __offset, unsigned char *__pointer) {
+ return (vector unsigned __int128)*(__pointer + __offset);
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_zext(signed long long __offset, unsigned short *__pointer) {
+ return (vector unsigned __int128)*(__pointer + __offset);
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_zext(signed long long __offset, unsigned int *__pointer) {
+ return (vector unsigned __int128)*(__pointer + __offset);
+}
+
+static __inline__ vector unsigned __int128 __ATTRS_o_ai
+vec_xl_zext(signed long long __offset, unsigned long long *__pointer) {
+ return (vector unsigned __int128)*(__pointer + __offset);
+}
+
+#endif
+
/* vec_xst */
static inline __ATTRS_o_ai void vec_xst(vector signed char __vec,
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits