On 10/21/21 12:45 PM, matheus.fe...@eldorado.org.br wrote:
From: Matheus Ferst <matheus.fe...@eldorado.org.br>
Implement the following PowerISA v3.1 instructions:
vextdubvlx: Vector Extract Double Unsigned Byte to VSR using
GPR-specified Left-Index
vextduhvlx: Vector Extract Double Unsigned Halfword to VSR using
GPR-specified Left-Index
vextduwvlx: Vector Extract Double Unsigned Word to VSR using
GPR-specified Left-Index
vextddvlx: Vector Extract Double Unsigned Doubleword to VSR using
GPR-specified Left-Index
vextdubvrx: Vector Extract Double Unsigned Byte to VSR using
GPR-specified Right-Index
vextduhvrx: Vector Extract Double Unsigned Halfword to VSR using
GPR-specified Right-Index
vextduwvrx: Vector Extract Double Unsigned Word to VSR using
GPR-specified Right-Index
vextddvrx: Vector Extract Double Unsigned Doubleword to VSR using
GPR-specified Right-Index
Signed-off-by: Luis Pires <luis.pi...@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.fe...@eldorado.org.br>
---
target/ppc/helper.h | 4 +++
target/ppc/insn32.decode | 12 +++++++++
target/ppc/int_helper.c | 41 ++++++++++++++++++++++++++++-
target/ppc/translate/vmx-impl.c.inc | 37 ++++++++++++++++++++++++++
4 files changed, 93 insertions(+), 1 deletion(-)
diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 53c65ca1c7..ac8ab7e436 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -336,6 +336,10 @@ DEF_HELPER_2(vextuwlx, tl, tl, avr)
DEF_HELPER_2(vextubrx, tl, tl, avr)
DEF_HELPER_2(vextuhrx, tl, tl, avr)
DEF_HELPER_2(vextuwrx, tl, tl, avr)
+DEF_HELPER_5(VEXTDUBVLX, void, env, avr, avr, avr, tl)
+DEF_HELPER_5(VEXTDUHVLX, void, env, avr, avr, avr, tl)
+DEF_HELPER_5(VEXTDUWVLX, void, env, avr, avr, avr, tl)
+DEF_HELPER_5(VEXTDDVLX, void, env, avr, avr, avr, tl)
DEF_HELPER_2(vsbox, void, avr, avr)
DEF_HELPER_3(vcipher, void, avr, avr, avr)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 2eb7fb4e92..e438177b32 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -38,6 +38,9 @@
%dx_d 6:s10 16:5 0:1
@DX ...... rt:5 ..... .......... ..... . &DX d=%dx_d
+&VA vrt vra vrb rc
+@VA ...... vrt:5 vra:5 vrb:5 rc:5 ...... &VA
+
&VN vrt vra vrb sh
@VN ...... vrt:5 vra:5 vrb:5 .. sh:3 ...... &VN
@@ -347,6 +350,15 @@ VPEXTD 000100 ..... ..... ..... 10110001101 @VX
## Vector Permute and Formatting Instruction
+VEXTDUBVLX 000100 ..... ..... ..... ..... 011000 @VA
+VEXTDUBVRX 000100 ..... ..... ..... ..... 011001 @VA
+VEXTDUHVLX 000100 ..... ..... ..... ..... 011010 @VA
+VEXTDUHVRX 000100 ..... ..... ..... ..... 011011 @VA
+VEXTDUWVLX 000100 ..... ..... ..... ..... 011100 @VA
+VEXTDUWVRX 000100 ..... ..... ..... ..... 011101 @VA
+VEXTDDVLX 000100 ..... ..... ..... ..... 011110 @VA
+VEXTDDVRX 000100 ..... ..... ..... ..... 011111 @VA
+
VINSERTB 000100 ..... - .... ..... 01100001101 @VX_uim4
VINSERTH 000100 ..... - .... ..... 01101001101 @VX_uim4
VINSERTW 000100 ..... - .... ..... 01110001101 @VX_uim4
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 5a925a564d..1577ea8788 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1673,8 +1673,47 @@ VINSX(B, uint8_t)
VINSX(H, uint16_t)
VINSX(W, uint32_t)
VINSX(D, uint64_t)
-#undef ELEM_ADDR
#undef VINSX
+#define VEXTDVLX(NAME, TYPE) \
+void glue(glue(helper_VEXTD, NAME), VLX)(CPUPPCState *env, ppc_avr_t *t,
\
+ ppc_avr_t *a, ppc_avr_t *b,
\
+ target_ulong index)
\
+{
\
+ const int array_size = ARRAY_SIZE(t->u8), elem_size = sizeof(TYPE);
\
+ const target_long idx = index;
\
+
\
+ if (idx < 0) {
\
+ qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for VEXTD" #NAME "VRX
at"\
+ " 0x" TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n", env->nip,
\
+ 32 - elem_size - idx, 32 - elem_size);
\
+ } else if (idx + elem_size <= array_size) {
\
+ t->VsrD(0) = *(TYPE *)ELEM_ADDR(a, idx, elem_size);
\
You need an unaligned load here.
+ t->VsrD(1) = 0;
\
+ } else if (idx < array_size) {
\
+ ppc_avr_t tmp = { .u64 = { 0, 0 } };
\
+ const int len_a = array_size - idx, len_b = elem_size - len_a;
\
+
\
+ memmove(ELEM_ADDR(&tmp, array_size / 2 - elem_size, len_a),
\
+ ELEM_ADDR(a, idx, len_a), len_a);
\
+ memmove(ELEM_ADDR(&tmp, array_size / 2 - len_b, len_b),
\
+ ELEM_ADDR(b, 0, len_b), len_b);
\
You know tmp does not overlap the source; memcpy will do.
+
\
+ *t = tmp;
\
+ } else if (idx + elem_size <= 2 * array_size) {
\
+ t->VsrD(0) = *(TYPE *)ELEM_ADDR(b, idx - array_size, elem_size);
\
Another unaligned load.
Or... we could set this up as
ppc_avr_t tmp[2] = { *a, *b };
memset(t, 0, sizeof(*t));
if (idx >= 0 && idx + elem_size <= sizeof(tmp)) {
memcpy(t + 8 - elem_size, (char *)&tmp + idx, elem_size);
}
... with some sort of host-endian adjustment which I'm too lazy to work out at
the moment.
r~