[Qemu-devel] [PATCH] pseries: Add H_SET_MODE hcall to change guest exception endianness
Hi Anthony, > > +if (resource == 4) { > > This ought to be a #define. There's no else here, is that expected? > Should you return failure for a different resource? Good point, I made it a define. We were returning H_P2 for a different resource, but it was a bit of a twisted maze of return statements. I tried to clear it up in this version. > Without knowing this interface better, a few things come to mind. > > Is mflags a boolean? If so, you can reduce this to a single loop and > drop the switch() statement. If mflags is truly a set of flags, it > would be nice to use #define to give the flags a proper symbolic name. Unfortunately it isn't a boolean, but yes it should have be made clearer with a #define. Anton -- pseries: Add H_SET_MODE hcall to change guest exception endianness H_SET_MODE is used for controlling various partition settings. One of these settings is the endianness a guest takes its exceptions in. Signed-off-by: Anton Blanchard --- diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 16bfab9..de639f6 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -262,7 +262,7 @@ static void *spapr_create_fdt_skel(const char *cpu_model, uint32_t start_prop = cpu_to_be32(initrd_base); uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt" -"\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk"; +"\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk\0hcall-set-mode"; char qemu_hypertas_prop[] = "hcall-memop1"; uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)}; uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)}; diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 67d6cd9..89e6a00 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -657,6 +657,54 @@ static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } +static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr, + target_ulong opcode, target_ulong *args) +{ +CPUState *cs; +target_ulong mflags = args[0]; +target_ulong resource = args[1]; +target_ulong value1 = args[2]; +target_ulong value2 = args[3]; +target_ulong ret = H_P2; + +if (resource == H_SET_MODE_ENDIAN) { +if (value1) { +ret = H_P3; +goto out; +} +if (value2) { +ret = H_P4; +goto out; +} + +switch (mflags) { +case H_SET_MODE_ENDIAN_BIG: +for (cs = first_cpu; cs != NULL; cs = cs->next_cpu) { +PowerPCCPU *cp = POWERPC_CPU(cs); +CPUPPCState *env = &cp->env; +env->spr[SPR_LPCR] &= ~LPCR_ILE; +} +ret = H_SUCCESS; +break; + +case H_SET_MODE_ENDIAN_LITTLE: +for (cs = first_cpu; cs != NULL; cs = cs->next_cpu) { +PowerPCCPU *cp = POWERPC_CPU(cs); +CPUPPCState *env = &cp->env; +env->spr[SPR_LPCR] |= LPCR_ILE; +} +ret = H_SUCCESS; +break; + +default: +ret = H_UNSUPPORTED_FLAG; +} +} + +out: +return ret; +} + static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1]; static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX - KVMPPC_HCALL_BASE + 1]; @@ -734,6 +782,8 @@ static void hypercall_register_types(void) /* qemu/KVM-PPC specific hcalls */ spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas); + +spapr_register_hypercall(H_SET_MODE, h_set_mode); } type_init(hypercall_register_types) diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 9fc1972..ab42813 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -109,6 +109,15 @@ typedef struct sPAPREnvironment { #define H_NOT_ENOUGH_RESOURCES -44 #define H_R_STATE -45 #define H_RESCINDEND -46 +#define H_P2 -55 +#define H_P3 -56 +#define H_P4 -57 +#define H_P5 -58 +#define H_P6 -59 +#define H_P7 -60 +#define H_P8 -61 +#define H_P9 -62 +#define H_UNSUPPORTED_FLAG -256 #define H_MULTI_THREADS_ACTIVE -9005 @@ -143,6 +152,11 @@ typedef struct sPAPREnvironment { #define H_PP1 (1ULL<<(63-62)) #define H_PP2 (1ULL<<(63-63)) +/* H_SET_MODE flags */ +#define H_SET_MODE_ENDIAN 4 +#define H_SET_MODE_ENDIAN_BIG 0 +#define H_SET_MODE_ENDIAN_LITTLE 1 + /* VASI States */ #define H_VASI_INVALID0 #define H_VASI_ENABLED1 @@ -267,7 +281,8 @@ typedef struct sPAPREnvironment { #define H_GET_EM_PARMS 0x2B8 #define H_SET_MPP 0x2D0 #define H_GET_MPP
[Qemu-devel] [PATCH] spapr-vlan: Don't touch last entry in buffer list
The last 8 bytes of the buffer list is defined to contain the number of dropped frames. At the moment we use it to store rx entries, which trips up ethtool -S: rx_no_buffer: 9223380832981355136 Fix this by skipping the last buffer list entry. Signed-off-by: Anton Blanchard --- diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c index 2d47df6..23c47d3 100644 --- a/hw/net/spapr_llan.c +++ b/hw/net/spapr_llan.c @@ -72,7 +72,14 @@ typedef uint64_t vlan_bd_t; #define VLAN_RXQ_BD_OFF 0 #define VLAN_FILTER_BD_OFF 8 #define VLAN_RX_BDS_OFF 16 -#define VLAN_MAX_BUFS((SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF) / 8) +/* + * The final 8 bytes of the buffer list is a counter of frames dropped + * because there was not a buffer in the buffer list capable of holding + * the frame. We must avoid it, or the operating system will report garbage + * for this statistic. + */ +#define VLAN_RX_BDS_LEN (SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF - 8) +#define VLAN_MAX_BUFS(VLAN_RX_BDS_LEN / 8) #define TYPE_VIO_SPAPR_VLAN_DEVICE "spapr-vlan" #define VIO_SPAPR_VLAN_DEVICE(obj) \ @@ -119,7 +126,7 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf, do { buf_ptr += 8; -if (buf_ptr >= SPAPR_TCE_PAGE_SIZE) { +if (buf_ptr >= (VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF)) { buf_ptr = VLAN_RX_BDS_OFF; } @@ -397,7 +404,7 @@ static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu, do { dev->add_buf_ptr += 8; -if (dev->add_buf_ptr >= SPAPR_TCE_PAGE_SIZE) { +if (dev->add_buf_ptr >= (VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF)) { dev->add_buf_ptr = VLAN_RX_BDS_OFF; }
[Qemu-devel] [Bug 965327] Re: virtio-pci: can't reserve io 0x0000-0x001f
Scubbing our ppc64 bugs. Thanks for the update Ken, I'll close this. -- You received this bug notification because you are a member of qemu- devel-ml, which is subscribed to QEMU. https://bugs.launchpad.net/bugs/965327 Title: virtio-pci: can't reserve io 0x-0x001f Status in QEMU: New Bug description: Before 2012-03-05 I was able to successfully enable a virtio-pci block device from a sPAPR pseries ppc64 Linux guest. With the current git master branch after this date I get the following error: virtio-pci :00:00.0: device not available (can't reserve [io 0x-0x001f]) virtio-pci: probe of :00:00.0 failed with error -22 virtio-pci :00:01.0: device not available (can't reserve [io 0x-0x003f]) virtio-pci: probe of :00:01.0 failed with error -22 Full details: - command line: - ./testing/qemu/ppc64-softmmu/qemu-system-ppc64 \ -L ./testing/qemu/pc-bios \ -M pseries \ -m 1024 \ -rtc base=localtime \ -parallel none \ -netdev type=user,id=mynet0,hostfwd=tcp:127.0.0.1:9011-10.0.2.11:22 \ -device virtio-net-pci,netdev=mynet0 \ -drive file=images/suse-ppc.img,if=virtio,index=0,media=disk,cache=unsafe \ -kernel images/iso/suseboot/vmlinux \ -append "root=/dev/mapper/system-root ro audit=0 selinux=0 apparmor=0 console=tty0 console=ttyPZ0" \ -initrd images/iso/suseboot/initrd.img \ -gdb tcp::1234 -- BEFORE virtio-pci "bug/user error?" introduced: -- sPAPR memory map: RTAS : 0x3fff..3fff0013 FDT : 0x3ffe..3ffe Kernel : 0x0040..01abad7b Ramdisk : 0x01ad..02053df7 Firmware load: 0x..000d6ec0 Firmware runtime : 0x3d7e..3ffe sPAPR reset SLOF ** QEMU Starting Build Date = Mar 3 2012 21:46:40 FW Version = git-440e662879c4fc3c Press "s" to enter Open Firmware. Populating /vdevice methods Populating /vdevice/v-scsi@2000 VSCSI: Initializing VSCSI: Looking for disks SCSI ID 2 CD-ROM : "QEMU QEMU CD-ROM 1.0." Populating /vdevice/vty@3000 Populating /pci@0,0 Adapters on 00 (D) : 1af4 1000virtio [ net ] 00 0800 (D) : 1af4 1001virtio [ block ] No NVRAM common partition, re-initializing... Using default console: /vdevice/vty@3000 Detected RAM kernel at 40 (16bad7c bytes) Welcome to Open Firmware Copyright (c) 2004, 2011 IBM Corporation All rights reserved. This program and the accompanying materials are made available under the terms of the BSD License available at http://www.opensource.org/licenses/bsd-license.php Booting from memory... OF stdout device is: /vdevice/vty@3000 Preparing to boot Linux version 3.2.0-2-ppc64 (geeko@buildhost) (gcc version 4.6.2 20111212 [gcc-4_6-branch revision 18] (SUSE Linux) ) #1 SMP Wed Jan 25 10:51:08 UTC 2012 (2206a5c) Detected machine type: 0101 Max number of cores passed to firmware: 1024 (NR_CPUS = 1024) Calling ibm,client-architecture-support... not implemented couldn't open /packages/elf-loader command line: root=/dev/mapper/system-root ro audit=0 selinux=0 apparmor=0 console=tty0 console=ttyPZ0 memory layout at init: memory_limit : (16 MB aligned) alloc_bottom : 01ad alloc_top: 3000 alloc_top_hi : 4000 rmo_top : 3000 ram_top : 4000 instantiating rtas at 0x2fff... done Querying for OPAL presence... not there. boot cpu hw idx 0 copying OF device tree... Building dt strings... Building dt structure... Device tree strings 0x020e -> 0x020e0635 Device tree struct 0x020f -> 0x0210 Calling quiesce... returning from prom_init Using pSeries machine description Using 1TB segments Found initrd at 0xc1ad:0xc2053df8 bootconsole [udbg0] enabled CPU maps initialized for 1 thread per core Starting Linux PPC64 #1 SMP Wed Jan 25 10:51:08 UTC 2012 (2206a5c) - ppc64_pft_size= 0x18 physicalMemorySize= 0x4000 htab_hash_mask= 0x1 - Initializing cgroup subsys cpuset Initializing cgroup subsys cpu Linux version 3.2.0-2-ppc64 (geeko@buildhost) (gc
[Qemu-devel] [PATCH 0/5] 64bit PowerPC little endian support
This patchset adds support for 64bit PowerPC little endian on POWER7. Linux kernel patches to support this were sent out earlier today: https://lists.ozlabs.org/pipermail/linuxppc-dev/2013-August/109849.html Anton -- Anton Blanchard (4): target-ppc: POWER7 supports the MSR_LE bit target-ppc: USE LPCR_ILE to control exception endian on POWER7 pseries: Add H_SET_MODE hcall to change guest exception endianness disas/ppc.c: Fix little endian disassembly Benjamin Herrenschmidt (1): pseries: Fix loading of little endian kernels disas/ppc.c | 3 ++- hw/ppc/spapr.c | 15 +-- hw/ppc/spapr_hcall.c| 44 include/hw/ppc/spapr.h | 12 +++- target-ppc/cpu.h| 2 ++ target-ppc/excp_helper.c| 10 ++ target-ppc/translate_init.c | 2 +- 7 files changed, 83 insertions(+), 5 deletions(-) -- 1.8.1.2
[Qemu-devel] [PATCH 5/5] pseries: Fix loading of little endian kernels
From: Benjamin Herrenschmidt Try loading the kernel as little endian if it fails big endian. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Anton Blanchard --- hw/ppc/spapr.c | 13 - 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index de639f6..639b719 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -253,6 +253,7 @@ static void *spapr_create_fdt_skel(const char *cpu_model, hwaddr initrd_base, hwaddr initrd_size, hwaddr kernel_size, + bool little_endian, const char *boot_device, const char *kernel_cmdline, uint32_t epow_irq) @@ -306,6 +307,9 @@ static void *spapr_create_fdt_skel(const char *cpu_model, cpu_to_be64(kernel_size) }; _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop; +if (little_endian) { +_FDT((fdt_property(fdt, "qemu,boot-kernel-le", NULL, 0))); +} } if (boot_device) { _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device))); @@ -1082,6 +1086,7 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args) uint32_t initrd_base = 0; long kernel_size = 0, initrd_size = 0; long load_limit, rtas_limit, fw_size; +bool kernel_le = false; char *filename; msi_supported = true; @@ -1261,6 +1266,12 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args) kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL, NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0); if (kernel_size < 0) { +kernel_size = load_elf(kernel_filename, + translate_kernel_address, NULL, + NULL, &lowaddr, NULL, 0, ELF_MACHINE, 0); +kernel_le = kernel_size > 0; +} +if (kernel_size < 0) { kernel_size = load_image_targphys(kernel_filename, KERNEL_LOAD_ADDR, load_limit - KERNEL_LOAD_ADDR); @@ -1310,7 +1321,7 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args) /* Prepare the device tree */ spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, initrd_base, initrd_size, -kernel_size, +kernel_size, kernel_le, boot_device, kernel_cmdline, spapr->epow_irq); assert(spapr->fdt_skel != NULL); -- 1.8.1.2
[Qemu-devel] [PATCH 1/5] target-ppc: POWER7 supports the MSR_LE bit
Add MSR_LE to the msr_mask for POWER7. Signed-off-by: Anton Blanchard --- target-ppc/translate_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index b14aec8..33914bc 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -7122,7 +7122,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data) PPC_SEGMENT_64B | PPC_SLBI | PPC_POPCNTB | PPC_POPCNTWD; pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205; -pcc->msr_mask = 0x8204FF36ULL; +pcc->msr_mask = 0x8204FF37ULL; pcc->mmu_model = POWERPC_MMU_2_06; #if defined(CONFIG_SOFTMMU) pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault; -- 1.8.1.2
[Qemu-devel] [PATCH 3/5] pseries: Add H_SET_MODE hcall to change guest exception endianness
H_SET_MODE is used for controlling various partition settings. One of these settings is the endianness a guest takes its exceptions in. Signed-off-by: Anton Blanchard --- hw/ppc/spapr.c | 2 +- hw/ppc/spapr_hcall.c | 44 include/hw/ppc/spapr.h | 12 +++- 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 16bfab9..de639f6 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -262,7 +262,7 @@ static void *spapr_create_fdt_skel(const char *cpu_model, uint32_t start_prop = cpu_to_be32(initrd_base); uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt" -"\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk"; +"\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk\0hcall-set-mode"; char qemu_hypertas_prop[] = "hcall-memop1"; uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)}; uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)}; diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index 67d6cd9..79e1b61 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -657,6 +657,48 @@ static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPREnvironment *spapr, return H_SUCCESS; } +static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr, + target_ulong opcode, target_ulong *args) +{ +CPUState *cs; +target_ulong mflags = args[0]; +target_ulong resource = args[1]; +target_ulong value1 = args[2]; +target_ulong value2 = args[3]; + +if (resource == 4) { +if (value1) { +return H_P3; +} +if (value2) { +return H_P4; +} + +switch (mflags) { +case 0: +for (cs = first_cpu; cs != NULL; cs = cs->next_cpu) { +PowerPCCPU *cp = POWERPC_CPU(cs); +CPUPPCState *env = &cp->env; +env->spr[SPR_LPCR] &= ~LPCR_ILE; +} +return H_SUCCESS; + +case 1: +for (cs = first_cpu; cs != NULL; cs = cs->next_cpu) { +PowerPCCPU *cp = POWERPC_CPU(cs); +CPUPPCState *env = &cp->env; +env->spr[SPR_LPCR] |= LPCR_ILE; +} +return H_SUCCESS; + +default: +return H_UNSUPPORTED_FLAG; +} +} + +return H_P2; +} + static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1]; static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX - KVMPPC_HCALL_BASE + 1]; @@ -734,6 +776,8 @@ static void hypercall_register_types(void) /* qemu/KVM-PPC specific hcalls */ spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas); + +spapr_register_hypercall(H_SET_MODE, h_set_mode); } type_init(hypercall_register_types) diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h index 9fc1972..3ceec7a 100644 --- a/include/hw/ppc/spapr.h +++ b/include/hw/ppc/spapr.h @@ -109,7 +109,16 @@ typedef struct sPAPREnvironment { #define H_NOT_ENOUGH_RESOURCES -44 #define H_R_STATE -45 #define H_RESCINDEND -46 +#define H_P2 -55 +#define H_P3 -56 +#define H_P4 -57 +#define H_P5 -58 +#define H_P6 -59 +#define H_P7 -60 +#define H_P8 -61 +#define H_P9 -62 #define H_MULTI_THREADS_ACTIVE -9005 +#define H_UNSUPPORTED_FLAG -256 /* Long Busy is a condition that can be returned by the firmware @@ -267,7 +276,8 @@ typedef struct sPAPREnvironment { #define H_GET_EM_PARMS 0x2B8 #define H_SET_MPP 0x2D0 #define H_GET_MPP 0x2D4 -#define MAX_HCALL_OPCODEH_GET_MPP +#define H_SET_MODE 0x31C +#define MAX_HCALL_OPCODEH_SET_MODE /* The hcalls above are standardized in PAPR and implemented by pHyp * as well. -- 1.8.1.2
[Qemu-devel] [PATCH 2/5] target-ppc: USE LPCR_ILE to control exception endian on POWER7
On POWER7, LPCR_ILE is used to control what endian guests take their exceptions in so use it instead of MSR_ILE. Signed-off-by: Anton Blanchard --- target-ppc/cpu.h | 2 ++ target-ppc/excp_helper.c | 10 ++ 2 files changed, 12 insertions(+) diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 711db08..422a6bb 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -453,6 +453,8 @@ struct ppc_slb_t { #define MSR_RI 1 /* Recoverable interrupt1*/ #define MSR_LE 0 /* Little-endian mode 1 hflags */ +#define LPCR_ILE (1 << (63-38)) + #define msr_sf ((env->msr >> MSR_SF) & 1) #define msr_isf ((env->msr >> MSR_ISF) & 1) #define msr_shv ((env->msr >> MSR_SHV) & 1) diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c index e9fcad8..e957761 100644 --- a/target-ppc/excp_helper.c +++ b/target-ppc/excp_helper.c @@ -611,9 +611,19 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) tlb_flush(env, 1); } +#ifdef TARGET_PPC64 +if (excp_model == POWERPC_EXCP_POWER7) { +if (env->spr[SPR_LPCR] & LPCR_ILE) { +new_msr |= (target_ulong)1 << MSR_LE; +} +} else if (msr_ile) { +new_msr |= (target_ulong)1 << MSR_LE; +} +#else if (msr_ile) { new_msr |= (target_ulong)1 << MSR_LE; } +#endif /* Jump to handler */ vector = env->excp_vectors[excp]; -- 1.8.1.2
[Qemu-devel] [PATCH 4/5] disas/ppc.c: Fix little endian disassembly
Use info->endian to select the endian of the instruction to be disassembled. Signed-off-by: Anton Blanchard --- disas/ppc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/disas/ppc.c b/disas/ppc.c index c149506..99c4cbc 100644 --- a/disas/ppc.c +++ b/disas/ppc.c @@ -5157,7 +5157,8 @@ int print_insn_ppc (bfd_vma memaddr, struct disassemble_info *info) { int dialect = (char *) info->private_data - (char *) 0; - return print_insn_powerpc (memaddr, info, 1, dialect); + return print_insn_powerpc (memaddr, info, info->endian == BFD_ENDIAN_BIG, + dialect); } /* Print a big endian PowerPC instruction. */ -- 1.8.1.2
Re: [Qemu-devel] [PATCH 1/7] virtio: allow byte swapping for vring and config access
Hi, > > The distinction is important in QEMU. ppc64 is still > > TARGET_WORDS_BIGENDIAN. We still want most stl_phys to treat > > integers as big endian. There's just this extra concept that CPU > > loads/stores are sometimes byte swapped. That affects virtio but > > not a lot else. > > You've redefined endian here; please don't do that. Endian is the > order in memory which a CPU does loads and stores. From any > reasonable definition, PPC is bi-endian. > > It's actually a weird thing for the qemu core to know at all: almost > everything which cares is in target-specific code. The exceptions are > gdb stubs and virtio, both of which are "native endian" (and that > weird code in exec.c: what is notdirty_mem_write?). > > Your argument that we shouldn't fix stl_* might be justifiable (ie. > just hack virtio and gdb as one-offs), but it's neither clear nor > "least surprise". Here is the hack I have to get gdbstub going with a little endian PowerPC kernel. Basically: LE guest -> BE QEMU -> BE gdb (pointing at the LE vmlinux) In this setup, gdb expects registers to be sent in little endian mode. It's a pretty big mistake for the gdb remote protocol to be using native endian to transfer registers especially when there is no other protocol negotation to work out what endian that is. Anton -- Index: b/gdbstub.c === --- a/gdbstub.c +++ b/gdbstub.c @@ -317,6 +317,8 @@ static GDBState *gdbserver_state; bool gdb_has_xml; +bool gdbstub_cross_endian; + #ifdef CONFIG_USER_ONLY /* XXX: This is not thread safe. Do we care? */ static int gdbserver_fd = -1; Index: b/include/exec/gdbstub.h === --- a/include/exec/gdbstub.h +++ b/include/exec/gdbstub.h @@ -42,8 +42,13 @@ static inline int cpu_index(CPUState *cp /* The GDB remote protocol transfers values in target byte order. This means * we can use the raw memory access routines to access the value buffer. * Conveniently, these also handle the case where the buffer is mis-aligned. + * + * We do need to byte swap if the CPU isn't running in the QEMU compiled + * target endian mode. */ +extern bool gdbstub_cross_endian; + static inline int gdb_get_reg8(uint8_t *mem_buf, uint8_t val) { stb_p(mem_buf, val); @@ -52,28 +57,49 @@ static inline int gdb_get_reg8(uint8_t * static inline int gdb_get_reg16(uint8_t *mem_buf, uint16_t val) { -stw_p(mem_buf, val); +if (gdbstub_cross_endian) +stw_p(mem_buf, bswap16(val)); +else +stw_p(mem_buf, val); return 2; } static inline int gdb_get_reg32(uint8_t *mem_buf, uint32_t val) { -stl_p(mem_buf, val); +if (gdbstub_cross_endian) +stq_p(mem_buf, bswap32(val)); +else +stl_p(mem_buf, val); return 4; } static inline int gdb_get_reg64(uint8_t *mem_buf, uint64_t val) { -stq_p(mem_buf, val); +if (gdbstub_cross_endian) +stq_p(mem_buf, bswap64(val)); +else +stq_p(mem_buf, val); return 8; } #if TARGET_LONG_BITS == 64 #define gdb_get_regl(buf, val) gdb_get_reg64(buf, val) -#define ldtul_p(addr) ldq_p(addr) +static inline uint64_t ldtul_p(const void *ptr) +{ + uint64_t tmp = ldq_p(ptr); + if (gdbstub_cross_endian) + tmp = bswap64(tmp); + return tmp; +} #else #define gdb_get_regl(buf, val) gdb_get_reg32(buf, val) -#define ldtul_p(addr) ldl_p(addr) +static inline uint32_t ldtul_p(const void *ptr) +{ + uint32_t tmp = ldl_p(ptr); + if (gdbstub_cross_endian) + tmp = bswap32(tmp); + return tmp; +} #endif #endif
[Qemu-devel] [PATCH] pseries: Fix stalls on hypervisor virtual console
A number of users are reporting stalls when using the pseries hypervisor virtual console. A simple test case is to paste 15 or 17 characters at a time into the console. Pasting 15 characters at a time works fine but pasting 17 characters hangs for a random amount of time. Other activity (network, qemu monitor etc) unblocks it. If qemu-char tries to send more than 16 characters at once, vty_can_receive returns false. At this point we have to wait for the guest to consume that output. Everything is good so far. The problem occurs when the the guest does consume the output. We need to signal back to the qemu-char layer that we are ready for more input. Without this we block until something else kicks us (eg network activity). Cc: qemu-sta...@nongnu.org Signed-off-by: Anton Blanchard --- Index: b/hw/char/spapr_vty.c === --- a/hw/char/spapr_vty.c +++ b/hw/char/spapr_vty.c @@ -47,6 +47,8 @@ static int vty_getchars(VIOsPAPRDevice * buf[n++] = dev->buf[dev->out++ % VTERM_BUFSIZE]; } +qemu_chr_accept_input(dev->chardev); + return n; }
[Qemu-devel] [PATCH] hypervisor property clashes with hypervisor node
dtc fails on a recent QEMU snapshot: ERROR (name_properties): "name" property in /hypervisor#1 is incorrect ("hypervisor" instead of base node name) Looking at the device tree we have a hypervisor property: # lsprop hypervisor hypervisor "kvm" But we also have a hypervisor node, with a name that doesn't match: # lsprop hypervisor#1/ name "hypervisor" compatible "linux,kvm" linux,phandle7e5eb5d8 (2120136152) Commit c08ce91d309c (spapr: add uuid/host details to device tree) looks to have collided with an earlier patch. Remove the hypervisor property. Signed-off-by: Anton Blanchard --- Index: b/hw/ppc/spapr.c === --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -362,10 +362,6 @@ static void *spapr_create_fdt_skel(hwadd _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)"))); _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries"))); -if (kvm_enabled()) { -_FDT((fdt_property_string(fdt, "hypervisor", "kvm"))); -} - /* * Add info to guest to indentify which host is it being run on * and what is the uuid of the guest
[Qemu-devel] [PATCH 2/6] target-ppc: POWER8 supports isel
POWER8 supports isel, so enable it in QEMU. Signed-off-by: Anton Blanchard Signed-off-by: Cédric Le Goater --- target-ppc/translate_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index a82c8f9..4fda0fd 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -7157,7 +7157,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) pcc->pvr_mask = CPU_POWERPC_POWER8_MASK; pcc->init_proc = init_proc_POWER8; pcc->check_pow = check_pow_nocheck; -pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | +pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | PPC_FLOAT_FRSQRTES | -- 1.8.3.2
[Qemu-devel] [PATCH 4/6] target-ppc: MSR_POW not supported on POWER7/7+/8
Remove MSR_POW from the msr_mask for POWER7/7+/8. Signed-off-by: Anton Blanchard Signed-off-by: Cédric Le Goater --- target-ppc/translate_init.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 87c00a1..d07e186 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -7075,7 +7075,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data) PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206; -pcc->msr_mask = 0x8284FF37ULL; +pcc->msr_mask = 0x8280FF37ULL; pcc->mmu_model = POWERPC_MMU_2_06; #if defined(CONFIG_SOFTMMU) pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault; @@ -7118,7 +7118,7 @@ POWERPC_FAMILY(POWER7P)(ObjectClass *oc, void *data) PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206; -pcc->msr_mask = 0x8284FF37ULL; +pcc->msr_mask = 0x8280FF37ULL; pcc->mmu_model = POWERPC_MMU_2_06; #if defined(CONFIG_SOFTMMU) pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault; @@ -7175,7 +7175,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 | PPC2_ISA205 | PPC2_ISA207S; -pcc->msr_mask = 0x8284FF37ULL; +pcc->msr_mask = 0x8280FF37ULL; pcc->mmu_model = POWERPC_MMU_2_06; #if defined(CONFIG_SOFTMMU) pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault; -- 1.8.3.2
[Qemu-devel] [PATCH 1/6] target-ppc: POWER8 supports the MSR_LE bit
Add MSR_LE to the msr_mask for POWER8. Signed-off-by: Anton Blanchard Signed-off-by: Cédric Le Goater --- target-ppc/translate_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 7f53c33..a82c8f9 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -7175,7 +7175,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 | PPC2_ISA205 | PPC2_ISA207S; -pcc->msr_mask = 0x8284FF36ULL; +pcc->msr_mask = 0x8284FF37ULL; pcc->mmu_model = POWERPC_MMU_2_06; #if defined(CONFIG_SOFTMMU) pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault; -- 1.8.3.2
[Qemu-devel] [PATCH 5/6] target-ppc: Fix Book3S PMU SPRs
Most of the PMU SPRs were wrong on Book3S. Signed-off-by: Anton Blanchard --- target-ppc/cpu.h| 29 - target-ppc/translate_init.c | 139 +++- 2 files changed, 153 insertions(+), 15 deletions(-) diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 2719c08..7082041 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -1452,54 +1452,81 @@ static inline int cpu_mmu_index (CPUPPCState *env) #define SPR_MPC_MI_CTR(0x300) #define SPR_PERF1 (0x301) #define SPR_RCPU_MI_RBA1 (0x301) +#define SPR_BOOK3S_UMMCR2 (0x301) #define SPR_PERF2 (0x302) #define SPR_RCPU_MI_RBA2 (0x302) #define SPR_MPC_MI_AP (0x302) -#define SPR_MMCRA (0x302) +#define SPR_BOOK3S_UMMCRA (0x302) #define SPR_PERF3 (0x303) #define SPR_RCPU_MI_RBA3 (0x303) #define SPR_MPC_MI_EPN(0x303) +#define SPR_BOOK3S_UPMC1 (0x303) #define SPR_PERF4 (0x304) +#define SPR_BOOK3S_UPMC2 (0x304) #define SPR_PERF5 (0x305) #define SPR_MPC_MI_TWC(0x305) +#define SPR_BOOK3S_UPMC3 (0x305) #define SPR_PERF6 (0x306) #define SPR_MPC_MI_RPN(0x306) +#define SPR_BOOK3S_UPMC4 (0x306) #define SPR_PERF7 (0x307) +#define SPR_BOOK3S_UPMC5 (0x307) #define SPR_PERF8 (0x308) #define SPR_RCPU_L2U_RBA0 (0x308) #define SPR_MPC_MD_CTR(0x308) +#define SPR_BOOK3S_UPMC6 (0x308) #define SPR_PERF9 (0x309) #define SPR_RCPU_L2U_RBA1 (0x309) #define SPR_MPC_MD_CASID (0x309) +#define SPR_BOOK3S_UPMC7 (0x309) #define SPR_PERFA (0x30A) #define SPR_RCPU_L2U_RBA2 (0x30A) #define SPR_MPC_MD_AP (0x30A) +#define SPR_BOOK3S_UPMC8 (0x30A) #define SPR_PERFB (0x30B) #define SPR_RCPU_L2U_RBA3 (0x30B) #define SPR_MPC_MD_EPN(0x30B) +#define SPR_BOOK3S_UMMCR0 (0x30B) #define SPR_PERFC (0x30C) #define SPR_MPC_MD_TWB(0x30C) +#define SPR_BOOK3S_USIAR (0x30C) #define SPR_PERFD (0x30D) #define SPR_MPC_MD_TWC(0x30D) +#define SPR_BOOK3S_USDAR (0x30D) #define SPR_PERFE (0x30E) #define SPR_MPC_MD_RPN(0x30E) +#define SPR_BOOK3S_UMMCR1 (0x30E) #define SPR_PERFF (0x30F) #define SPR_MPC_MD_TW (0x30F) #define SPR_UPERF0(0x310) #define SPR_UPERF1(0x311) +#define SPR_BOOK3S_MMCR2 (0x311) #define SPR_UPERF2(0x312) +#define SPR_BOOK3S_MMCRA (0x312) #define SPR_UPERF3(0x313) +#define SPR_BOOK3S_PMC1 (0x313) #define SPR_UPERF4(0x314) +#define SPR_BOOK3S_PMC2 (0x314) #define SPR_UPERF5(0x315) +#define SPR_BOOK3S_PMC3 (0x315) #define SPR_UPERF6(0x316) +#define SPR_BOOK3S_PMC4 (0x316) #define SPR_UPERF7(0x317) +#define SPR_BOOK3S_PMC5 (0x317) #define SPR_UPERF8(0x318) +#define SPR_BOOK3S_PMC6 (0x318) #define SPR_UPERF9(0x319) +#define SPR_BOOK3S_PMC7 (0x319) #define SPR_UPERFA(0x31A) +#define SPR_BOOK3S_PMC8 (0x31A) #define SPR_UPERFB(0x31B) +#define SPR_BOOK3S_MMCR0 (0x31B) #define SPR_UPERFC(0x31C) +#define SPR_BOOK3S_SIAR (0x31C) #define SPR_UPERFD(0x31D) +#define SPR_BOOK3S_SDAR (0x31D) #define SPR_UPERFE(0x31E) +#define SPR_BOOK3S_MMCR1 (0x31E) #define SPR_UPERFF(0x31F) #define SPR_RCPU_MI_RA0 (0x320) #define SPR_MPC_MI_DBCAM (0x320) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index d07e186..273e37d 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -6629,10 +6629,128 @@ static int check_pow_970 (CPUPPCState *env) return 0; } +/* SPR common to all book3s implementations */ +static void gen_spr_book3s (CPUPPCState *env) +{ +/* Breakpoints */ +/* XXX : not implemented */ +spr_register_kvm(env, SPR_DABR, "DABR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_DABR, 0x); +/* XXX : not implemented */ +spr_register(env, SPR_IABR, "IABR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + 0x); + +/* Performance monitors */ +/* XXX : not implemented */ +spr_register_kvm(env, SPR_BOOK3S_MMCR0, "MMCR0", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_MMCR0, 0x); +/* XXX : not implemented */ +spr_register_kvm(env, SPR_BOOK3S_MMCR1, "MMCR1", + SPR_NOACCESS, SPR_NOACC
[Qemu-devel] [PATCH 3/6] target-ppc: POWER7+ supports the MSR_VSX bit
Without MSR_VSX we die early during a Linux boot. Signed-off-by: Anton Blanchard Signed-off-by: Cédric Le Goater --- target-ppc/translate_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 4fda0fd..87c00a1 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -7118,7 +7118,7 @@ POWERPC_FAMILY(POWER7P)(ObjectClass *oc, void *data) PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206; -pcc->msr_mask = 0x8204FF37ULL; +pcc->msr_mask = 0x8284FF37ULL; pcc->mmu_model = POWERPC_MMU_2_06; #if defined(CONFIG_SOFTMMU) pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault; -- 1.8.3.2
[Qemu-devel] [PATCH 6/6] target-ppc: Add PMC7/8 to 970
970 CPUs have PMC7/8. Create gen_spr_970 to avoid replicating it 3 times, and simplify the existing code. Signed-off-by: Anton Blanchard --- target-ppc/translate_init.c | 89 - 1 file changed, 39 insertions(+), 50 deletions(-) diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 273e37d..50b2603 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -6747,12 +6747,13 @@ static void gen_spr_book3s (CPUPPCState *env) 0x); } -static void init_proc_970 (CPUPPCState *env) +static void gen_spr_970 (CPUPPCState *env) { -gen_spr_ne_601(env); -gen_spr_book3s(env); -/* Time base */ -gen_tbl(env); +spr_register(env, SPR_HIOR, "SPR_HIOR", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_hior, &spr_write_hior, + 0x); + /* Hardware implementation registers */ /* XXX : not implemented */ spr_register(env, SPR_HID0, "HID0", @@ -6769,13 +6770,40 @@ static void init_proc_970 (CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, POWERPC970_HID5_INIT); + +/* Performance monitors */ +/* XXX : not implemented */ +spr_register_kvm(env, SPR_BOOK3S_PMC7, "PMC7", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_PMC7, 0x); +/* XXX : not implemented */ +spr_register_kvm(env, SPR_BOOK3S_PMC8, "PMC8", + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_generic, + KVM_REG_PPC_PMC8, 0x); +/* XXX : not implemented */ +spr_register(env, SPR_BOOK3S_UPMC7, "UPMC7", + &spr_read_ureg, SPR_NOACCESS, + &spr_read_ureg, SPR_NOACCESS, + 0x); +/* XXX : not implemented */ +spr_register(env, SPR_BOOK3S_UPMC8, "UPMC8", + &spr_read_ureg, SPR_NOACCESS, + &spr_read_ureg, SPR_NOACCESS, + 0x); +} + +static void init_proc_970 (CPUPPCState *env) +{ +gen_spr_ne_601(env); +gen_spr_book3s(env); +gen_spr_970(env); +/* Time base */ +gen_tbl(env); /* Memory management */ /* XXX: not correct */ gen_low_BATs(env); -spr_register(env, SPR_HIOR, "SPR_HIOR", - SPR_NOACCESS, SPR_NOACCESS, - &spr_read_hior, &spr_write_hior, - 0x); #if !defined(CONFIG_USER_ONLY) env->slb_nr = 32; #endif @@ -6831,31 +6859,12 @@ static void init_proc_970FX (CPUPPCState *env) { gen_spr_ne_601(env); gen_spr_book3s(env); +gen_spr_970(env); /* Time base */ gen_tbl(env); -/* Hardware implementation registers */ -/* XXX : not implemented */ -spr_register(env, SPR_HID0, "HID0", - SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_clear, - 0x6000); -/* XXX : not implemented */ -spr_register(env, SPR_HID1, "HID1", - SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic, - 0x); -/* XXX : not implemented */ -spr_register(env, SPR_970_HID5, "HID5", - SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic, - POWERPC970_HID5_INIT); /* Memory management */ /* XXX: not correct */ gen_low_BATs(env); -spr_register(env, SPR_HIOR, "SPR_HIOR", - SPR_NOACCESS, SPR_NOACCESS, - &spr_read_hior, &spr_write_hior, - 0x); spr_register(env, SPR_CTRL, "SPR_CTRL", SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, &spr_write_generic, @@ -6923,32 +6932,12 @@ static void init_proc_970MP (CPUPPCState *env) { gen_spr_ne_601(env); gen_spr_book3s(env); +gen_spr_970(env); /* Time base */ gen_tbl(env); -/* Hardware implementation registers */ -/* XXX : not implemented */ -spr_register(env, SPR_HID0, "HID0", - SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_clear, - 0x6000); -/* XXX : not implemented */ -spr_register(env, SPR_HID1, "HID1", - SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic, - 0x); -/* XXX : not implemented */ -spr_register(env, SPR_970_HID5, "HID5", - SPR_NOACCESS, SPR_NOA
Re: [Qemu-devel] [PATCH 0/9] target-ppc: VSX Bug Fixes
Hi Tom, > This patch series addresses bugs in the recently added VSX > instructions. Two general defects are fixed: Thanks! This series fixes the issue I had with wget. Tested-by: Anton Blanchard Anton
[Qemu-devel] [PATCH] target-ppc: dump DAR and DSISR
The DAR and DSISR can be very useful when debugging issues, so add them to ppc_cpu_dump_state. We had another bug in this area: all of the v2.06 MMU types were missing. Signed-off-by: Anton Blanchard --- Index: b/target-ppc/translate.c === --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -9861,8 +9861,13 @@ void ppc_cpu_dump_state(CPUState *cs, FI case POWERPC_MMU_SOFT_74xx: #if defined(TARGET_PPC64) case POWERPC_MMU_64B: +case POWERPC_MMU_2_06: +case POWERPC_MMU_2_06a: +case POWERPC_MMU_2_06d: #endif -cpu_fprintf(f, " SDR1 " TARGET_FMT_lx "\n", env->spr[SPR_SDR1]); +cpu_fprintf(f, " SDR1 " TARGET_FMT_lx " DAR " TARGET_FMT_lx + " DSISR " TARGET_FMT_lx "\n", env->spr[SPR_SDR1], +env->spr[SPR_DAR], env->spr[SPR_DSISR]); break; case POWERPC_MMU_BOOKE206: cpu_fprintf(f, " MAS0 " TARGET_FMT_lx " MAS1 " TARGET_FMT_lx
[Qemu-devel] [PATCH 1/2] target-ppc: Fix invalid SPR read/write warnings
Invalid and privileged SPR warnings currently print the wrong address. While fixing that, also make it clear that we are printing both the decimal and hexadecimal SPR number. Before: Trying to read invalid spr 896 380 at 0714 After: Trying to read invalid spr 896 (0x380) at 0710 Signed-off-by: Anton Blanchard --- Index: b/target-ppc/translate.c === --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -4005,19 +4005,19 @@ static inline void gen_op_mfspr(DisasCon * allowing userland application to read the PVR */ if (sprn != SPR_PVR) { -qemu_log("Trying to read privileged spr %d %03x at " - TARGET_FMT_lx "\n", sprn, sprn, ctx->nip); -printf("Trying to read privileged spr %d %03x at " - TARGET_FMT_lx "\n", sprn, sprn, ctx->nip); +qemu_log("Trying to read privileged spr %d (0x%03x) at " + TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4); +printf("Trying to read privileged spr %d (0x%03x) at " + TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4); } gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG); } } else { /* Not defined */ -qemu_log("Trying to read invalid spr %d %03x at " -TARGET_FMT_lx "\n", sprn, sprn, ctx->nip); -printf("Trying to read invalid spr %d %03x at " TARGET_FMT_lx "\n", - sprn, sprn, ctx->nip); +qemu_log("Trying to read invalid spr %d (0x%03x) at " + TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4); +printf("Trying to read invalid spr %d (0x%03x) at " + TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4); gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR); } } @@ -4150,18 +4150,18 @@ static void gen_mtspr(DisasContext *ctx) (*write_cb)(ctx, sprn, rS(ctx->opcode)); } else { /* Privilege exception */ -qemu_log("Trying to write privileged spr %d %03x at " - TARGET_FMT_lx "\n", sprn, sprn, ctx->nip); -printf("Trying to write privileged spr %d %03x at " TARGET_FMT_lx - "\n", sprn, sprn, ctx->nip); +qemu_log("Trying to write privileged spr %d (0x%03x) at " + TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4); +printf("Trying to write privileged spr %d (0x%03x) at " + TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4); gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG); } } else { /* Not defined */ -qemu_log("Trying to write invalid spr %d %03x at " - TARGET_FMT_lx "\n", sprn, sprn, ctx->nip); -printf("Trying to write invalid spr %d %03x at " TARGET_FMT_lx "\n", - sprn, sprn, ctx->nip); +qemu_log("Trying to write invalid spr %d (0x%03x) at " + TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4); +printf("Trying to write invalid spr %d (0x%03x) at " + TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4); gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR); } }
[Qemu-devel] [PATCH 2/2] target-ppc: Add read and write of PPR SPR
Recent Linux kernels save and restore the PPR across exceptions so we need to handle it. Signed-off-by: Anton Blanchard --- Index: b/target-ppc/translate_init.c === --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -7010,6 +7010,10 @@ static void init_proc_POWER7 (CPUPPCStat &spr_read_generic, &spr_write_generic, &spr_read_generic, &spr_write_generic, 0x); +spr_register(env, SPR_PPR, "PPR", + &spr_read_generic, &spr_write_generic, + &spr_read_generic, &spr_write_generic, + 0x); #if !defined(CONFIG_USER_ONLY) env->slb_nr = 32; #endif
[Qemu-devel] [PATCH 1/7] Declare and Enable VSX
From: Tom Musta This patch adds the flag POWERPC_FLAG_VSX to the list of defined flags and also adds this flag to the list of supported features of the Power7 and Power8 CPUs. Additionally, the VSX instructions are added to the list of TCG-enabled instruction. Signed-off-by: Tom Musta Signed-off-by: Anton Blanchard --- Index: b/target-ppc/cpu.h === --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -549,6 +549,8 @@ enum { POWERPC_FLAG_BUS_CLK = 0x0002, /* Has CFAR */ POWERPC_FLAG_CFAR = 0x0004, +/* Has VSX */ +POWERPC_FLAG_VSX = 0x0008, }; /*/ @@ -1870,7 +1872,8 @@ enum { /* Book I 2.05 PowerPC specification */ PPC2_ISA205= 0x0020ULL, -#define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_DBRX | PPC2_ISA205) +#define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \ + PPC2_ISA205) }; /*/ Index: b/target-ppc/translate_init.c === --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -7242,7 +7242,8 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, pcc->bfd_mach = bfd_mach_ppc64; pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE | POWERPC_FLAG_BE | POWERPC_FLAG_PMM | - POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR; + POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR | + POWERPC_FLAG_VSX; pcc->l1_dcache_size = 0x8000; pcc->l1_icache_size = 0x8000; } @@ -7276,7 +7277,8 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, pcc->bfd_mach = bfd_mach_ppc64; pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE | POWERPC_FLAG_BE | POWERPC_FLAG_PMM | - POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR; + POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR | + POWERPC_FLAG_VSX; pcc->l1_dcache_size = 0x8000; pcc->l1_icache_size = 0x8000; }
[Qemu-devel] [PATCH 2/7] Add MSR VSX and Associated Exception
From: Tom Musta This patch adds support for the VSX bit of the PowerPC Machine State Register (MSR) as well as the corresponding VSX Unavailable exception. The VSX bit is added to the defined bits masks of the Power7 and Power8 CPU models. Signed-off-by: Tom Musta Signed-off-by: Anton Blanchard --- Index: b/target-ppc/cpu.h === --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -236,6 +236,8 @@ enum { POWERPC_EXCP_NMEXTBR = 91, /* Non maskable external breakpoint */ POWERPC_EXCP_ITLBE= 92, /* Instruction TLB error */ POWERPC_EXCP_DTLBE= 93, /* Data TLB error*/ +/* VSX Unavailable (Power ISA 2.06 and later)*/ +POWERPC_EXCP_VSXU = 94, /* VSX Unavailable */ /* EOL */ POWERPC_EXCP_NB = 96, /* QEMU exceptions: used internally during code translation */ @@ -427,6 +429,7 @@ struct ppc_slb_t { #define MSR_VR 25 /* altivec availablex hflags */ #define MSR_SPE 25 /* SPE enable for BookE x hflags */ #define MSR_AP 23 /* Access privilege state on 602 hflags */ +#define MSR_VSX 23 /* Vector Scalar Extension (ISA 2.06 and later) x hflags */ #define MSR_SA 22 /* Supervisor access mode on 602 hflags */ #define MSR_KEY 19 /* key bit on 603e */ #define MSR_POW 18 /* Power management */ @@ -467,6 +470,7 @@ struct ppc_slb_t { #define msr_vr ((env->msr >> MSR_VR) & 1) #define msr_spe ((env->msr >> MSR_SPE) & 1) #define msr_ap ((env->msr >> MSR_AP) & 1) +#define msr_vsx ((env->msr >> MSR_VSX) & 1) #define msr_sa ((env->msr >> MSR_SA) & 1) #define msr_key ((env->msr >> MSR_KEY) & 1) #define msr_pow ((env->msr >> MSR_POW) & 1) Index: b/target-ppc/excp_helper.c === --- a/target-ppc/excp_helper.c +++ b/target-ppc/excp_helper.c @@ -390,6 +390,11 @@ static inline void powerpc_excp(PowerPCC new_msr |= (target_ulong)MSR_HVB; } goto store_current; +case POWERPC_EXCP_VSXU: /* VSX unavailable exception */ +if (lpes1 == 0) { +new_msr |= (target_ulong)MSR_HVB; +} +goto store_current; case POWERPC_EXCP_PIT: /* Programmable interval timer interrupt*/ LOG_EXCP("PIT exception\n"); goto store_next; Index: b/target-ppc/translate.c === --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -199,6 +199,7 @@ typedef struct DisasContext { #endif int fpu_enabled; int altivec_enabled; +int vsx_enabled; int spe_enabled; ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */ int singlestep_enabled; @@ -9763,6 +9764,11 @@ static inline void gen_intermediate_code ctx.altivec_enabled = msr_vr; else ctx.altivec_enabled = 0; +if ((env->flags & POWERPC_FLAG_VSX) && msr_vsx) { +ctx.vsx_enabled = msr_vsx; +} else { +ctx.vsx_enabled = 0; +} if ((env->flags & POWERPC_FLAG_SE) && msr_se) ctx.singlestep_enabled = CPU_SINGLE_STEP; else Index: b/target-ppc/translate_init.c === --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -3061,6 +3061,7 @@ static void init_excp_POWER7 (CPUPPCStat env->excp_vectors[POWERPC_EXCP_TRACE]= 0x0D00; env->excp_vectors[POWERPC_EXCP_PERFM]= 0x0F00; env->excp_vectors[POWERPC_EXCP_VPU] = 0x0F20; +env->excp_vectors[POWERPC_EXCP_VSXU] = 0x0F40; env->excp_vectors[POWERPC_EXCP_IABR] = 0x1300; env->excp_vectors[POWERPC_EXCP_MAINT]= 0x1600; env->excp_vectors[POWERPC_EXCP_VPUA] = 0x1700; @@ -7232,7 +7233,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, PPC_SEGMENT_64B | PPC_SLBI | PPC_POPCNTB | PPC_POPCNTWD; pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205; -pcc->msr_mask = 0x8204FF37ULL; +pcc->msr_mask = 0x8284FF37ULL; pcc->mmu_model = POWERPC_MMU_2_06; #if defined(CONFIG_SOFTMMU) pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault; @@ -7267,7 +7268,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, PPC_SEGMENT_64B | PPC_SLBI | PPC_POPCNTB | PPC_POPCNTWD; pcc->insns_flags2 = PPC2_VSX | PPC2_DF
[Qemu-devel] [PATCH 3/7] Add VSX Instruction Decoders
From: Tom Musta This patch adds decoders for the VSX fields XT, XS, XA, XB and DM. The first four are split fields and a general helper for these types of fields is also added. Signed-off-by: Tom Musta Signed-off-by: Anton Blanchard --- Index: b/target-ppc/translate.c === --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -370,6 +370,12 @@ static inline int32_t name(uint32_t opco return (int16_t)((opcode >> (shift)) & ((1 << (nb)) - 1));\ } +#define EXTRACT_HELPER_SPLIT(name, shift1, nb1, shift2, nb2) \ +static inline uint32_t name(uint32_t opcode) \ +{ \ +return (((opcode >> (shift1)) & ((1 << (nb1)) - 1)) << nb2) | \ +((opcode >> (shift2)) & ((1 << (nb2)) - 1)); \ +} /* Opcode part 1 */ EXTRACT_HELPER(opc1, 26, 6); /* Opcode part 2 */ @@ -484,6 +490,11 @@ static inline target_ulong MASK(uint32_t return ret; } +EXTRACT_HELPER_SPLIT(xT, 0, 1, 21, 5); +EXTRACT_HELPER_SPLIT(xS, 0, 1, 21, 5); +EXTRACT_HELPER_SPLIT(xA, 2, 1, 16, 5); +EXTRACT_HELPER_SPLIT(xB, 1, 1, 11, 5); +EXTRACT_HELPER(DM, 8, 2); /*/ /* PowerPC instructions table*/
[Qemu-devel] [PATCH 4/7] Add VSR to Global Registers
From: Tom Musta This patch adds VSX VSRs to the the list of global register indices. More specifically, it adds the lower halves of the first 32 VSRs to the list of global register indices. The upper halves of the first 32 VSRs are already defined via cpu_fpr[]. And the second 32 VSRs are already defined via the cpu_avrh[] and cpu_avrl[] arrays. Signed-off-by: Tom Musta Signed-off-by: Anton Blanchard --- Index: b/target-ppc/translate.c === --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -51,6 +51,7 @@ static char cpu_reg_names[10*3 + 22*4 /* #endif + 10*4 + 22*5 /* FPR */ + 2*(10*6 + 22*7) /* AVRh, AVRl */ ++ 10*5 + 22*6 /* VSR */ + 8*5 /* CRF */]; static TCGv cpu_gpr[32]; #if !defined(TARGET_PPC64) @@ -58,6 +59,7 @@ static TCGv cpu_gprh[32]; #endif static TCGv_i64 cpu_fpr[32]; static TCGv_i64 cpu_avrh[32], cpu_avrl[32]; +static TCGv_i64 cpu_vsr[32]; static TCGv_i32 cpu_crf[8]; static TCGv cpu_nip; static TCGv cpu_msr; @@ -137,6 +139,11 @@ void ppc_translate_init(void) #endif p += (i < 10) ? 6 : 7; cpu_reg_names_size -= (i < 10) ? 6 : 7; +snprintf(p, cpu_reg_names_size, "vsr%d", i); +cpu_vsr[i] = tcg_global_mem_new_i64(TCG_AREG0, + offsetof(CPUPPCState, vsr[i]), p); +p += (i < 10) ? 5 : 6; +cpu_reg_names_size -= (i < 10) ? 5 : 6; } cpu_nip = tcg_global_mem_new(TCG_AREG0, @@ -6980,6 +6987,26 @@ GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20 GEN_VAFORM_PAIRED(vsel, vperm, 21) GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23) +/*** VSX extension ***/ + +static inline TCGv_i64 cpu_vsrh(int n) +{ +if (n < 32) { +return cpu_fpr[n]; +} else { +return cpu_avrh[n-32]; +} +} + +static inline TCGv_i64 cpu_vsrl(int n) +{ +if (n < 32) { +return cpu_vsr[n]; +} else { +return cpu_avrl[n-32]; +} +} + /*** SPE extension ***/ /* Register moves */
[Qemu-devel] [PATCH 5/7] Add lxvd2x
From: Tom Musta This patch adds the lxvd2x instruction. Signed-off-by: Tom Musta Signed-off-by: Anton Blanchard --- Index: b/target-ppc/translate.c === --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -7007,6 +7007,22 @@ static inline TCGv_i64 cpu_vsrl(int n) } } +static void gen_lxvd2x(DisasContext *ctx) +{ +TCGv EA; +if (unlikely(!ctx->vsx_enabled)) { +gen_exception(ctx, POWERPC_EXCP_VSXU); +return; +} +gen_set_access_type(ctx, ACCESS_INT); +EA = tcg_temp_new(); +gen_addr_reg_index(ctx, EA); +gen_qemu_ld64(ctx, cpu_vsrh(xT(ctx->opcode)), EA); +tcg_gen_addi_tl(EA, EA, 8); +gen_qemu_ld64(ctx, cpu_vsrl(xT(ctx->opcode)), EA); +tcg_temp_free(EA); +} + /*** SPE extension ***/ /* Register moves */ @@ -9456,6 +9472,8 @@ GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20 GEN_VAFORM_PAIRED(vsel, vperm, 21), GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23), +GEN_HANDLER_E(lxvd2x, 0x1F, 0x0C, 0x1A, 0, PPC_NONE, PPC2_VSX), + #undef GEN_SPE #define GEN_SPE(name0, name1, opc2, opc3, inval0, inval1, type) \ GEN_OPCODE_DUAL(name0##_##name1, 0x04, opc2, opc3, inval0, inval1, type, PPC_NONE)
[Qemu-devel] [PATCH 7/7] Add xxpermdi
From: Tom Musta This patch adds the xxpermdi instruction. The instruction uses bits 22, 23, 29 and 30 for non-opcode fields (DM, AX and BX). This results in overloading of the opcode table with aliases, which can be seen in the GEN_XX3FORM_DM macro. Signed-off-by: Tom Musta Signed-off-by: Anton Blanchard --- Index: b/target-ppc/translate.c === --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -7039,10 +7039,28 @@ static void gen_stxvd2x(DisasContext *ct tcg_temp_free(EA); } +static void gen_xxpermdi(DisasContext *ctx) +{ +if (unlikely(!ctx->vsx_enabled)) { +gen_exception(ctx, POWERPC_EXCP_VSXU); +return; +} + +if ((DM(ctx->opcode) & 2) == 0) { +tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), cpu_vsrh(xA(ctx->opcode))); +} else { +tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), cpu_vsrl(xA(ctx->opcode))); +} +if ((DM(ctx->opcode) & 1) == 0) { +tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), cpu_vsrh(xB(ctx->opcode))); +} else { +tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), cpu_vsrl(xB(ctx->opcode))); +} +} + /*** SPE extension ***/ /* Register moves */ - static inline void gen_evmra(DisasContext *ctx) { @@ -9492,6 +9510,27 @@ GEN_HANDLER_E(lxvd2x, 0x1F, 0x0C, 0x1A, GEN_HANDLER_E(stxvd2x, 0x1F, 0xC, 0x1E, 0, PPC_NONE, PPC2_VSX), +#undef GEN_XX3FORM_DM +#define GEN_XX3FORM_DM(name, opc2, opc3) \ +GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\ +GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\ +GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x02, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\ +GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x00, 0, PPC_NONE, PPC2_VSX),\ +GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x04, 0, PPC_NONE, PPC2_VSX),\ +GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x04, 0, PPC_NONE, PPC2_VSX),\ +GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x02, opc3|0x04, 0, PPC_NONE, PPC2_VSX),\ +GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x04, 0, PPC_NONE, PPC2_VSX),\ +GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x08, 0, PPC_NONE, PPC2_VSX),\ +GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x08, 0, PPC_NONE, PPC2_VSX),\ +GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x02, opc3|0x08, 0, PPC_NONE, PPC2_VSX),\ +GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x08, 0, PPC_NONE, PPC2_VSX),\ +GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x0C, 0, PPC_NONE, PPC2_VSX),\ +GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x0C, 0, PPC_NONE, PPC2_VSX),\ +GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x02, opc3|0x0C, 0, PPC_NONE, PPC2_VSX),\ +GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x0C, 0, PPC_NONE, PPC2_VSX) + +GEN_XX3FORM_DM(xxpermdi, 0x08, 0x01), + #undef GEN_SPE #define GEN_SPE(name0, name1, opc2, opc3, inval0, inval1, type) \ GEN_OPCODE_DUAL(name0##_##name1, 0x04, opc2, opc3, inval0, inval1, type, PPC_NONE)
[Qemu-devel] [PATCH 6/7] Add stxvd2x
From: Tom Musta This patch adds the stxvd2x instruction. Signed-off-by: Tom Musta Signed-off-by: Anton Blanchard --- Index: b/target-ppc/translate.c === --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -7023,6 +7023,22 @@ static void gen_lxvd2x(DisasContext *ctx tcg_temp_free(EA); } +static void gen_stxvd2x(DisasContext *ctx) +{ +TCGv EA; +if (unlikely(!ctx->vsx_enabled)) { +gen_exception(ctx, POWERPC_EXCP_VSXU); +return; +} +gen_set_access_type(ctx, ACCESS_INT); +EA = tcg_temp_new(); +gen_addr_reg_index(ctx, EA); +gen_qemu_st64(ctx, cpu_vsrh(xS(ctx->opcode)), EA); +tcg_gen_addi_tl(EA, EA, 8); +gen_qemu_st64(ctx, cpu_vsrl(xS(ctx->opcode)), EA); +tcg_temp_free(EA); +} + /*** SPE extension ***/ /* Register moves */ @@ -9474,6 +9490,8 @@ GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23) GEN_HANDLER_E(lxvd2x, 0x1F, 0x0C, 0x1A, 0, PPC_NONE, PPC2_VSX), +GEN_HANDLER_E(stxvd2x, 0x1F, 0xC, 0x1E, 0, PPC_NONE, PPC2_VSX), + #undef GEN_SPE #define GEN_SPE(name0, name1, opc2, opc3, inval0, inval1, type) \ GEN_OPCODE_DUAL(name0##_##name1, 0x04, opc2, opc3, inval0, inval1, type, PPC_NONE)
[Qemu-devel] [PATCH] spapr: Clear LPCR_ILE during reset
Since an OS can set LPCR_ILE we must clear it during reset. Otherwise if we reset into an OS with a different endian we die when we take the first exception. This fixes an issue seen on both full emulation and KVM. Signed-off-by: Anton Blanchard --- diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 93d02c1..4d45197 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -739,6 +739,8 @@ static void spapr_cpu_reset(void *opaque) env->spr[SPR_HIOR] = 0; +env->spr[SPR_LPCR] &= ~LPCR_ILE; + env->external_htab = (uint8_t *)spapr->htab; env->htab_base = -1; env->htab_mask = HTAB_SIZE(spapr) - 1;
[Qemu-devel] [PATCH 1/4] target-ppc: POWER8 supports the MSR_LE bit
Add MSR_LE to the msr_mask for POWER8. Signed-off-by: Anton Blanchard --- diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 3eafbb0..7661543 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -7173,7 +7173,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207; -pcc->msr_mask = 0x8284FF36ULL; +pcc->msr_mask = 0x8284FF37ULL; pcc->mmu_model = POWERPC_MMU_2_06; #if defined(CONFIG_SOFTMMU) pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
[Qemu-devel] [PATCH 2/4] target-ppc: POWER8 supports isel
POWER8 supports isel, so enable it in QEMU. Signed-off-by: Anton Blanchard --- diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index 3eafbb0..7661543 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -7156,7 +7156,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) pcc->pvr_mask = CPU_POWERPC_POWER8_MASK; pcc->init_proc = init_proc_POWER8; pcc->check_pow = check_pow_nocheck; -pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | +pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | PPC_FLOAT_FRSQRTES |
[Qemu-devel] [PATCH 3/4] target-ppc: POWER7+ supports the MSR_VSX bit
Without MSR_VSX we die early during a Linux boot. Signed-off-by: Anton Blanchard --- Index: b/target-ppc/translate_init.c === --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -7117,7 +7117,7 @@ POWERPC_FAMILY(POWER7P)(ObjectClass *oc, PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206; -pcc->msr_mask = 0x8204FF37ULL; +pcc->msr_mask = 0x8284FF37ULL; pcc->mmu_model = POWERPC_MMU_2_06; #if defined(CONFIG_SOFTMMU) pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
[Qemu-devel] [PATCH 4/4] target-ppc: MSR_POW not supported on POWER7/7+/8
Remove MSR_POW from the msr_mask for POWER7/7+/8. Signed-off-by: Anton Blanchard --- Index: b/target-ppc/translate_init.c === --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -7074,7 +7074,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206; -pcc->msr_mask = 0x8284FF37ULL; +pcc->msr_mask = 0x8280FF37ULL; pcc->mmu_model = POWERPC_MMU_2_06; #if defined(CONFIG_SOFTMMU) pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault; @@ -7117,7 +7117,7 @@ POWERPC_FAMILY(POWER7P)(ObjectClass *oc, PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206; -pcc->msr_mask = 0x8284FF37ULL; +pcc->msr_mask = 0x8280FF37ULL; pcc->mmu_model = POWERPC_MMU_2_06; #if defined(CONFIG_SOFTMMU) pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault; @@ -7173,7 +7173,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207; -pcc->msr_mask = 0x8284FF37ULL; +pcc->msr_mask = 0x8280FF37ULL; pcc->mmu_model = POWERPC_MMU_2_06; #if defined(CONFIG_SOFTMMU) pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
[Qemu-devel] [PATCH] target-ppc: gdbstub: Add VSX support
Add the XML and functions to get and set VSX registers. Signed-off-by: Anton Blanchard --- configure | 6 +++--- gdb-xml/power-vsx.xml | 44 target-ppc/translate_init.c | 22 ++ 3 files changed, 69 insertions(+), 3 deletions(-) create mode 100644 gdb-xml/power-vsx.xml diff --git a/configure b/configure index 589798e..235b3d2 100755 --- a/configure +++ b/configure @@ -5182,20 +5182,20 @@ case "$target_name" in ppc64) TARGET_BASE_ARCH=ppc TARGET_ABI_DIR=ppc -gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml" +gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml power-vsx.xml" ;; ppc64le) TARGET_ARCH=ppc64 TARGET_BASE_ARCH=ppc TARGET_ABI_DIR=ppc -gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml" +gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml power-vsx.xml" ;; ppc64abi32) TARGET_ARCH=ppc64 TARGET_BASE_ARCH=ppc TARGET_ABI_DIR=ppc echo "TARGET_ABI32=y" >> $config_target_mak -gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml" +gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml power-vsx.xml" ;; sh4|sh4eb) TARGET_ARCH=sh4 diff --git a/gdb-xml/power-vsx.xml b/gdb-xml/power-vsx.xml new file mode 100644 index 000..fd290e9 --- /dev/null +++ b/gdb-xml/power-vsx.xml @@ -0,0 +1,44 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c index d74f4f0..efde425 100644 --- a/target-ppc/translate_init.c +++ b/target-ppc/translate_init.c @@ -8870,6 +8870,24 @@ static int gdb_set_spe_reg(CPUPPCState *env, uint8_t *mem_buf, int n) return 0; } +static int gdb_get_vsx_reg(CPUPPCState *env, uint8_t *mem_buf, int n) +{ +if (n < 32) { +stq_p(mem_buf, env->vsr[n]); +return 8; +} +return 0; +} + +static int gdb_set_vsx_reg(CPUPPCState *env, uint8_t *mem_buf, int n) +{ +if (n < 32) { +env->vsr[n] = ldq_p(mem_buf); +return 8; +} +return 0; +} + static int ppc_fixup_cpu(PowerPCCPU *cpu) { CPUPPCState *env = &cpu->env; @@ -8967,6 +8985,10 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error **errp) gdb_register_coprocessor(cs, gdb_get_spe_reg, gdb_set_spe_reg, 34, "power-spe.xml", 0); } +if (pcc->insns_flags2 & PPC2_VSX) { +gdb_register_coprocessor(cs, gdb_get_vsx_reg, gdb_set_vsx_reg, + 32, "power-vsx.xml", 0); +} qemu_init_vcpu(cs); -- 2.1.0
Re: [Qemu-devel] [PATCH] target-ppc: gdbstub: Add VSX support
Hi Alex, > On 24.03.15 09:59, Anton Blanchard wrote: > > Add the XML and functions to get and set VSX registers. > > Awesome, thanks. Have you verified that this works for LE as well as > BE guests? Unfortunately all our XML gdbstub routines have endian issues (FPU, Altivec and now VMX). I only caught that the other day. I can work on reusing maybe_bswap_register() from gdbstub.c. Anton
[Qemu-devel] [PATCH] nvme: 64kB page size fixes
Initialise our maximum page size capability to 64kB and increase the page_size variable from 16 to 32 bits. Signed-off-by: Anton Blanchard -- diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 1327658..aa1ed98 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -811,6 +811,7 @@ static int nvme_init(PCIDevice *pci_dev) NVME_CAP_SET_AMS(n->bar.cap, 1); NVME_CAP_SET_TO(n->bar.cap, 0xf); NVME_CAP_SET_CSS(n->bar.cap, 1); +NVME_CAP_SET_MPSMAX(n->bar.cap, 4); n->bar.vs = 0x00010001; n->bar.intmc = n->bar.intms = 0; diff --git a/hw/block/nvme.h b/hw/block/nvme.h index 993c511..b6ccb65 100644 --- a/hw/block/nvme.h +++ b/hw/block/nvme.h @@ -688,7 +688,7 @@ typedef struct NvmeCtrl { NvmeBar bar; BlockConfconf; -uint16_tpage_size; +uint32_tpage_size; uint16_tpage_bits; uint16_tmax_prp_ents; uint16_tcqe_size;
[Qemu-devel] target-ppc: Fix SRR0 when taking unaligned exceptions
We are setting SRR0 to the instruction before the one causing the unaligned exception. A quick testcase: . = 0x100 .globl _start _start: /* Cause a 0x600 */ li 3,0x1 stwcx. 3,0,3 1: b 1b . = 0x600 1: b 1b Built into something we can load as a BIOS image: gcc -mbig -c test.S ld -EB -Ttext 0x0 -o test test.o objcopy -O binary test test.bin Run with: qemu-system-ppc64 -nographic -bios test.bin Shows an incorrect SRR0 (points at the li): SRR0 0100 With the patch we get the correct SRR0: SRR0 0104 Signed-off-by: Anton Blanchard --- linux-user/main.c| 2 +- target-ppc/excp_helper.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/linux-user/main.c b/linux-user/main.c index c855bcc..9100130 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -1650,7 +1650,7 @@ void cpu_loop(CPUPPCState *env) info.si_signo = TARGET_SIGBUS; info.si_errno = 0; info.si_code = TARGET_BUS_ADRALN; -info._sifields._sigfault._addr = env->nip - 4; +info._sifields._sigfault._addr = env->nip; queue_signal(env, info.si_signo, &info); break; case POWERPC_EXCP_PROGRAM: /* Program exception */ diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c index b803475..4250106 100644 --- a/target-ppc/excp_helper.c +++ b/target-ppc/excp_helper.c @@ -200,7 +200,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp) /* Get rS/rD and rA from faulting opcode */ env->spr[SPR_DSISR] |= (cpu_ldl_code(env, (env->nip - 4)) & 0x03FF) >> 16; -goto store_current; +goto store_next; case POWERPC_EXCP_PROGRAM: /* Program exception*/ switch (env->error_code & ~0xF) { case POWERPC_EXCP_FP: -- 2.1.4
[Qemu-devel] [PATCH 0/4] Fix ppc64 tcg issues
Hi, qemu is currently broken on ppc64. After applying the following patches I am able to boot a ppc64 and x86-64 image successfully. Anton
[Qemu-devel] [PATCH 3/4] tcg-ppc64: Fix add2_i64
add2_i64 was adding the lower double word to the upper double word of each input. Fix this so we add the lower double words, then the upper double words with carry propagation. Cc: qemu-sta...@nongnu.org Signed-off-by: Anton Blanchard --- sub2 has similar issues, I haven't fixed it because I don't have a testcase yet. Index: b/tcg/ppc64/tcg-target.c === --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -1958,18 +1958,18 @@ static void tcg_out_op (TCGContext *s, T environment. So in 64-bit mode it's always carry-out of bit 63. The fallback code using deposit works just as well for 32-bit. */ a0 = args[0], a1 = args[1]; -if (a0 == args[4] || (!const_args[5] && a0 == args[5])) { +if (a0 == args[3] || (!const_args[5] && a0 == args[5])) { a0 = TCG_REG_R0; } -if (const_args[3]) { -tcg_out32(s, ADDIC | TAI(a0, args[2], args[3])); +if (const_args[4]) { +tcg_out32(s, ADDIC | TAI(a0, args[2], args[4])); } else { -tcg_out32(s, ADDC | TAB(a0, args[2], args[3])); +tcg_out32(s, ADDC | TAB(a0, args[2], args[4])); } if (const_args[5]) { -tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[4])); +tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3])); } else { -tcg_out32(s, ADDE | TAB(a1, args[4], args[5])); +tcg_out32(s, ADDE | TAB(a1, args[3], args[5])); } if (a0 != args[0]) { tcg_out_mov(s, TCG_TYPE_I64, args[0], a0); @@ -2147,7 +2147,7 @@ static const TCGTargetOpDef ppc_op_defs[ { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, -{ INDEX_op_add2_i64, { "r", "r", "r", "rI", "r", "rZM" } }, +{ INDEX_op_add2_i64, { "r", "r", "r", "r", "rI", "rZM" } }, { INDEX_op_sub2_i64, { "r", "r", "rI", "r", "rZM", "r" } }, { INDEX_op_muls2_i64, { "r", "r", "r", "r" } }, { INDEX_op_mulu2_i64, { "r", "r", "r", "r" } },
[Qemu-devel] [PATCH 4/4] tcg-ppc64: rotr_i32 rotates wrong amount
rotr_i32 calculates the amount to left shift and puts it into a temporary, but then doesn't use it when doing the shift. Cc: qemu-sta...@nongnu.org Signed-off-by: Anton Blanchard --- Index: b/tcg/ppc64/tcg-target.c === --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -1661,7 +1661,7 @@ static void tcg_out_op (TCGContext *s, T tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31); } else { tcg_out32(s, SUBFIC | TAI(0, args[2], 32)); -tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2]) +tcg_out32(s, RLWNM | SAB(args[1], args[0], 0) | MB(0) | ME(31)); } break;
[Qemu-devel] [PATCH 2/4] tcg-ppc64: bswap64 rotates output 32 bits
If our input and output is in the same register, bswap64 tries to undo a rotate of the input. This just ends up rotating the output. Cc: qemu-sta...@nongnu.org Signed-off-by: Anton Blanchard --- diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c index 0fcf2b5..64fb0af 100644 --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -1922,8 +1922,6 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args, if (a0 == 0) { tcg_out_mov(s, TCG_TYPE_I64, args[0], a0); -/* Revert the source rotate that we performed above. */ -tcg_out_rld(s, RLDICL, a1, a1, 32, 0); } break;
[Qemu-devel] [PATCH 1/4] tcg-ppc64: Fix RLDCL opcode
The rldcl instruction doesn't have an sh field, so the minor opcode of 8 is actually 4 when using the XO30 macro. Cc: qemu-sta...@nongnu.org Signed-off-by: Anton Blanchard --- Index: b/tcg/ppc64/tcg-target.c === --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -357,7 +357,7 @@ static int tcg_target_const_match (tcg_t #define RLDICL XO30( 0) #define RLDICR XO30( 1) #define RLDIMI XO30( 3) -#define RLDCL XO30( 8) +#define RLDCL XO30( 4) #define BCLR XO19( 16) #define BCCTR XO19(528)
Re: [Qemu-devel] [PATCH 1/4] tcg-ppc64: Fix RLDCL opcode
Hi Richard, > But that suggests then that we ought not be using XO30. > Or at least adding a comment. Good idea, how does this look? Anton -- The rldcl instruction doesn't have an sh field, so the minor opcode is shifted 1 bit. We were using the XO30 macro which shifted the minor opcode 2 bits. Remove XO30 and add MD30 and MDS30 macros which match the Power ISA categories. Cc: qemu-sta...@nongnu.org Signed-off-by: Anton Blanchard --- Index: b/tcg/ppc64/tcg-target.c === --- a/tcg/ppc64/tcg-target.c +++ b/tcg/ppc64/tcg-target.c @@ -308,7 +308,8 @@ static int tcg_target_const_match (tcg_t #define OPCD(opc) ((opc)<<26) #define XO19(opc) (OPCD(19)|((opc)<<1)) -#define XO30(opc) (OPCD(30)|((opc)<<2)) +#define MD30(opc) (OPCD(30)|((opc)<<2)) +#define MDS30(opc) (OPCD(30)|((opc)<<1)) #define XO31(opc) (OPCD(31)|((opc)<<1)) #define XO58(opc) (OPCD(58)|(opc)) #define XO62(opc) (OPCD(62)|(opc)) @@ -354,10 +355,10 @@ static int tcg_target_const_match (tcg_t #define RLWINM OPCD( 21) #define RLWNM OPCD( 23) -#define RLDICL XO30( 0) -#define RLDICR XO30( 1) -#define RLDIMI XO30( 3) -#define RLDCL XO30( 8) +#define RLDICL MD30( 0) +#define RLDICR MD30( 1) +#define RLDIMI MD30( 3) +#define RLDCL MDS30( 8) #define BCLR XO19( 16) #define BCCTR XO19(528)
[Qemu-devel] [PATCH] pseries: Fix loading of little endian kernels
From: Benjamin Herrenschmidt Try loading the kernel as little endian if it fails big endian. Signed-off-by: Benjamin Herrenschmidt Reviewed-by: Anton Blanchard --- Index: b/hw/ppc/spapr.c === --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -273,6 +273,7 @@ static void *spapr_create_fdt_skel(const hwaddr initrd_base, hwaddr initrd_size, hwaddr kernel_size, + bool little_endian, const char *boot_device, const char *kernel_cmdline, uint32_t epow_irq) @@ -326,6 +327,9 @@ static void *spapr_create_fdt_skel(const cpu_to_be64(kernel_size) }; _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop; +if (little_endian) { +_FDT((fdt_property(fdt, "qemu,boot-kernel-le", NULL, 0))); +} } if (boot_device) { _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device))); @@ -1102,6 +1106,7 @@ static void ppc_spapr_init(QEMUMachineIn uint32_t initrd_base = 0; long kernel_size = 0, initrd_size = 0; long load_limit, rtas_limit, fw_size; +bool kernel_le = false; char *filename; msi_supported = true; @@ -1282,6 +1287,12 @@ static void ppc_spapr_init(QEMUMachineIn kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL, NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0); if (kernel_size < 0) { +kernel_size = load_elf(kernel_filename, + translate_kernel_address, NULL, + NULL, &lowaddr, NULL, 0, ELF_MACHINE, 0); +kernel_le = kernel_size > 0; +} +if (kernel_size < 0) { kernel_size = load_image_targphys(kernel_filename, KERNEL_LOAD_ADDR, load_limit - KERNEL_LOAD_ADDR); @@ -1331,7 +1342,7 @@ static void ppc_spapr_init(QEMUMachineIn /* Prepare the device tree */ spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, initrd_base, initrd_size, -kernel_size, +kernel_size, kernel_le, boot_device, kernel_cmdline, spapr->epow_irq); assert(spapr->fdt_skel != NULL);
[Qemu-devel] [PATCH] ppc: Add CFAR, DAR and DSISR to the dictionary of printable registers
From: Tom Musta The CFAR, DAR and DSISR registers are currently missing from the dictionary of registers that may be printed in the QEMU console. These are interesting registers when debugging. With this patch, the following commands work properly: (qemu) print $cfar (qemu) print $dar (qemu) print $dsisr Signed-off-by: Tom Musta Reviewed-by: Anton Blanchard --- Index: b/monitor.c === --- a/monitor.c +++ b/monitor.c @@ -3186,6 +3186,9 @@ static const MonitorDef monitor_defs[] = { "srr0", offsetof(CPUPPCState, spr[SPR_SRR0]) }, { "srr1", offsetof(CPUPPCState, spr[SPR_SRR1]) }, +{ "dar", offsetof(CPUPPCState, spr[SPR_DAR]) }, +{ "dsisr", offsetof(CPUPPCState, spr[SPR_DSISR]) }, +{ "cfar", offsetof(CPUPPCState, spr[SPR_CFAR]) }, { "sprg0", offsetof(CPUPPCState, spr[SPR_SPRG0]) }, { "sprg1", offsetof(CPUPPCState, spr[SPR_SPRG1]) }, { "sprg2", offsetof(CPUPPCState, spr[SPR_SPRG2]) },
[Qemu-devel] [PATCH] target-ppc: Little Endian Correction to Load/Store Vector Element
From: Tom Musta The Load Vector Element (lve*x) and Store Vector Element (stve*x) instructions not only byte-swap in Little Endian mode, they also invert the element that is accessed. For example, the RTL for lvehx contains this: eb <-- EA[60:63] if Big-Endian byte ordering then VRT[8*eb:8*eb+15] <-- MEM(EA,2) else VRT[112-(8*eb):127-(8*eb)] <-- MEM(EA,2) This patch adds the element inversion, as described in the last line of the RTL. Signed-off-by: Tom Musta Reviewed-by: Anton Blanchard --- Index: b/target-ppc/mem_helper.c === --- a/target-ppc/mem_helper.c +++ b/target-ppc/mem_helper.c @@ -212,6 +212,7 @@ target_ulong helper_lscbx(CPUPPCState *e int index = (addr & 0xf) >> sh; \ \ if (msr_le) { \ +index = n_elems - index - 1;\ r->element[LO_IDX ? index : (adjust - index)] = \ swap(access(env, addr));\ } else {\ @@ -236,6 +237,7 @@ LVE(lvewx, cpu_ldl_data, bswap32, u32) int index = (addr & 0xf) >> sh; \ \ if (msr_le) { \ +index = n_elems - index - 1;\ access(env, addr, swap(r->element[LO_IDX ? index : \ (adjust - index)])); \ } else {\
Re: [Qemu-devel] PR KVM and TM issues
Hi Alexey, > > I can't get an Ubuntu Wily guest to boot on an Ubuntu Wily host in > > PR KVM mode. The kernel in both cases is 4.2. To reproduce: > > > > wget -N > > https://cloud-images.ubuntu.com/wily/current/wily-server-cloudimg-ppc64el-disk1.img > > > > qemu-system-ppc64 -cpu POWER8 -enable-kvm -machine pseries,kvm-type=PR -m > > 4G -nographic -vga none -drive > > file=wily-server-cloudimg-ppc64el-disk1.img,if=virtio > > > > Should TM work inside a PR KVM guest? > > If I read the kernel code correctly (kvmppc_set_one_reg_hv vs. > kvmppc_set_one_reg_pr), no, it should not be expected to work. I see a couple of issues, patches to follow: 1. QEMU needs to clear the TM feature bit in the ibm,pa-features array when running in PR KVM mode. 2. Linux needs to clear the user TM feature bits if TM gets disabled at runtime via the ibm,pa-features bit. Anton
[Qemu-devel] [PATCH] spapr: Don't set the TM ibm, pa-features bit in PR KVM mode
We don't support transactional memory in PR KVM, so don't tell the OS that we do. Signed-off-by: Anton Blanchard --- diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index e7be21e..538bd87 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -696,6 +696,12 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, } else /* env->mmu_model == POWERPC_MMU_2_07 */ { pa_features = pa_features_207; pa_size = sizeof(pa_features_207); + +/* Don't enable TM in PR KVM mode */ +if (kvm_enabled() && +kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) { +pa_features[24] &= ~0x80; +} } if (env->ci_large_pages) { pa_features[3] |= 0x20;
[Qemu-devel] [PATCH] powerpc: Clear user CPU feature bits if TM is disabled at runtime
In check_cpu_pa_features() we check a number of bits in the ibm,pa-features array and set and clear CPU features based on what we find. One of these bits is CPU_FTR_TM, the transactional memory feature bit. If this does disable TM at runtime, then we need to tell userspace about it by clearing the user CPU feature bits. Without this patch userspace processes will think they can execute TM instructions and get killed when they try. Signed-off-by: Anton Blanchard Cc: sta...@vger.kernel.org --- Michael I've added stable here because I'm seeing this on a number of distros and would like to get it backported, but I'll leave it up to you if it should go there. diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index f98be83..98c6c86 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -822,4 +822,18 @@ static int __init disable_hardlockup_detector(void) return 0; } early_initcall(disable_hardlockup_detector); + +static int __init update_cpu_user_features(void) +{ + /* +* Firmware might have disabled TM by clearing the relevant +* bit in the ibm,pa-features array. In this case we need to +* tell userspace. +*/ + if (!cpu_has_feature(CPU_FTR_TM)) + cur_cpu_spec->cpu_user_features2 &= ~(PPC_FEATURE2_HTM|PPC_FEATURE2_HTM_NOSC); + + return 0; +} +early_initcall(update_cpu_user_features); #endif
[Qemu-devel] [PATCH 1/3] powerpc: scan_features() updates incorrect bits
The real LE feature entry in the ibm_pa_feature struct has the wrong number of elements. Instead of checking for byte 5, bit 0, we check for byte 0, bit 0, and we also incorrectly update cpu user feature bit 5. Fixes: 44ae3ab3358e ("powerpc: Free up some CPU feature bits by moving out MMU-related features") Signed-off-by: Anton Blanchard Cc: sta...@vger.kernel.org --- arch/powerpc/kernel/prom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 7030b03..9a3a7c6 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -158,7 +158,7 @@ static struct ibm_pa_feature { {CPU_FTR_NOEXECUTE, 0, 0, 0, 6, 0}, {CPU_FTR_NODSISRALIGN, 0, 0,1, 1, 1}, {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, - {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0}, + {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 0, 5, 0, 0}, /* * If the kernel doesn't support TM (ie. CONFIG_PPC_TRANSACTIONAL_MEM=n), * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP -- 2.7.4
[Qemu-devel] [PATCH 2/3] powerpc: Update cpu_user_features2 in scan_features()
scan_features() updates cpu_user_features but not cpu_user_features2. Amongst other things, cpu_user_features2 contains the user TM feature bits which we must keep in sync with the kernel TM feature bit. Signed-off-by: Anton Blanchard Cc: sta...@vger.kernel.org --- arch/powerpc/kernel/prom.c | 19 +++ 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 9a3a7c6..99709bb 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -148,23 +148,24 @@ static struct ibm_pa_feature { unsigned long cpu_features; /* CPU_FTR_xxx bit */ unsigned long mmu_features; /* MMU_FTR_xxx bit */ unsigned intcpu_user_ftrs; /* PPC_FEATURE_xxx bit */ + unsigned intcpu_user_ftrs2; /* PPC_FEATURE2_xxx bit */ unsigned char pabyte; /* byte number in ibm,pa-features */ unsigned char pabit; /* bit number (big-endian) */ unsigned char invert; /* if 1, pa bit set => clear feature */ } ibm_pa_features[] __initdata = { - {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0}, - {0, 0, PPC_FEATURE_HAS_FPU, 0, 1, 0}, - {CPU_FTR_CTRL, 0, 0,0, 3, 0}, - {CPU_FTR_NOEXECUTE, 0, 0, 0, 6, 0}, - {CPU_FTR_NODSISRALIGN, 0, 0,1, 1, 1}, - {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0}, - {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 0, 5, 0, 0}, + {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0, 0}, + {0, 0, PPC_FEATURE_HAS_FPU, 0, 0, 1, 0}, + {CPU_FTR_CTRL, 0, 0, 0, 0, 3, 0}, + {CPU_FTR_NOEXECUTE, 0, 0, 0,0, 6, 0}, + {CPU_FTR_NODSISRALIGN, 0, 0, 0, 1, 1, 1}, + {0, MMU_FTR_CI_LARGE_PAGE, 0, 0,1, 2, 0}, + {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 0, 0, 5, 0, 0}, /* * If the kernel doesn't support TM (ie. CONFIG_PPC_TRANSACTIONAL_MEM=n), * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP * which is 0 if the kernel doesn't support TM. */ - {CPU_FTR_TM_COMP, 0, 0, 22, 0, 0}, + {CPU_FTR_TM_COMP, 0, 0, 0, 22, 0, 0}, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, @@ -195,10 +196,12 @@ static void __init scan_features(unsigned long node, const unsigned char *ftrs, if (bit ^ fp->invert) { cur_cpu_spec->cpu_features |= fp->cpu_features; cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs; + cur_cpu_spec->cpu_user_features2 |= fp->cpu_user_ftrs2; cur_cpu_spec->mmu_features |= fp->mmu_features; } else { cur_cpu_spec->cpu_features &= ~fp->cpu_features; cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs; + cur_cpu_spec->cpu_user_features2 &= ~fp->cpu_user_ftrs2; cur_cpu_spec->mmu_features &= ~fp->mmu_features; } } -- 2.7.4
[Qemu-devel] [PATCH 3/3] powerpc: Update TM user feature bits in scan_features()
We need to update the user TM feature bits (PPC_FEATURE2_HTM and PPC_FEATURE2_HTM) to mirror what we do with the kernel TM feature bit. At the moment, if firmware reports TM is not available we turn off the kernel TM feature bit but leave the userspace ones on. Userspace thinks it can execute TM instructions and it dies trying. This (together with a QEMU patch) fixes PR KVM, which doesn't currently support TM. Signed-off-by: Anton Blanchard Cc: sta...@vger.kernel.org --- arch/powerpc/kernel/prom.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 99709bb..5beffd7 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -161,11 +161,12 @@ static struct ibm_pa_feature { {0, MMU_FTR_CI_LARGE_PAGE, 0, 0,1, 2, 0}, {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 0, 0, 5, 0, 0}, /* -* If the kernel doesn't support TM (ie. CONFIG_PPC_TRANSACTIONAL_MEM=n), -* we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP -* which is 0 if the kernel doesn't support TM. +* If the kernel doesn't support TM (ie CONFIG_PPC_TRANSACTIONAL_MEM=n), +* we don't want to turn on TM here, so we use the *_COMP versions +* which are 0 if the kernel doesn't support TM. */ - {CPU_FTR_TM_COMP, 0, 0, 0, 22, 0, 0}, + {CPU_FTR_TM_COMP, 0, 0, +PPC_FEATURE2_HTM_COMP|PPC_FEATURE2_HTM_NOSC_COMP, 22, 0, 0}, }; static void __init scan_features(unsigned long node, const unsigned char *ftrs, -- 2.7.4
[Qemu-devel] [PATCH v2] spapr: Don't set the TM ibm, pa-features bit in PR KVM mode
We don't support transactional memory in PR KVM, so don't tell the OS that we do. Signed-off-by: Anton Blanchard --- v2: Fix build with CONFIG_KVM disabled, noticed by Alex. diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index b69995e..dc3e3c9 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -696,6 +696,14 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, } else /* env->mmu_model == POWERPC_MMU_2_07 */ { pa_features = pa_features_207; pa_size = sizeof(pa_features_207); + +#ifdef CONFIG_KVM +/* Don't enable TM in PR KVM mode */ +if (kvm_enabled() && +kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) { +pa_features[24] &= ~0x80; +} +#endif } if (env->ci_large_pages) { pa_features[3] |= 0x20;
[Qemu-devel] [PATCH 1/2] Add PowerPC AT_HWCAP2 definitions
From: Anton Blanchard We need the PPC_FEATURE2_HAS_HTM bit in a subsequent patch, so add the PowerPC AT_HWCAP2 definitions. Signed-off-by: Anton Blanchard --- diff --git a/include/elf.h b/include/elf.h index 28d448b..8533b2a 100644 --- a/include/elf.h +++ b/include/elf.h @@ -477,6 +477,19 @@ typedef struct { #define PPC_FEATURE_TRUE_LE 0x0002 #define PPC_FEATURE_PPC_LE 0x0001 +/* Bits present in AT_HWCAP2 for PowerPC. */ + +#define PPC_FEATURE2_ARCH_2_07 0x8000 +#define PPC_FEATURE2_HAS_HTM0x4000 +#define PPC_FEATURE2_HAS_DSCR 0x2000 +#define PPC_FEATURE2_HAS_EBB0x1000 +#define PPC_FEATURE2_HAS_ISEL 0x0800 +#define PPC_FEATURE2_HAS_TAR0x0400 +#define PPC_FEATURE2_HAS_VEC_CRYPTO 0x0200 +#define PPC_FEATURE2_HTM_NOSC 0x0100 +#define PPC_FEATURE2_ARCH_3_00 0x0080 +#define PPC_FEATURE2_HAS_IEEE1280x0040 + /* Bits present in AT_HWCAP for Sparc. */ #define HWCAP_SPARC_FLUSH 0x0001
[Qemu-devel] [PATCH 2/2] spapr: Better handling of ibm, pa-features TM bit
From: Anton Blanchard There are a few issues with our handling of the ibm,pa-features TM bit: - We don't support transactional memory in PR KVM, so don't tell the OS that we do. - In full emulation we have a minimal implementation of TM that always fails, so for performance reasons lets not tell the OS that we support it either. - In HV KVM mode, we should mirror the host TM enabled state by looking at the AT_HWCAP2 bit. Signed-off-by: Anton Blanchard --- diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 0636642..c403fbb 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -620,7 +620,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, -0x80, 0x00, 0x80, 0x00, 0x80, 0x00 }; +0x80, 0x00, 0x80, 0x00, 0x00, 0x00 }; uint8_t *pa_features; size_t pa_size; @@ -697,6 +697,19 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset, } else /* env->mmu_model == POWERPC_MMU_2_07 */ { pa_features = pa_features_207; pa_size = sizeof(pa_features_207); + +#ifdef CONFIG_KVM +/* Only enable TM in HV KVM mode */ +if (kvm_enabled() && +!kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) { +unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2); + +/* Guest should inherit host TM enabled bit */ +if (hwcap2 & PPC_FEATURE2_HAS_HTM) { +pa_features[24] |= 0x80; +} +} +#endif } if (env->ci_large_pages) { pa_features[3] |= 0x20;
Re: [Qemu-devel] [PULL 03/13] target-ppc: Use 32-bit rotate instead of deposit + 64-bit rotate
Hi, > From: Richard Henderson > > A 32-bit rotate insn is more common on hosts than a deposit insn, > and if the host has neither the result is truely horrific. > > At the same time, tidy up the temporaries within these functions, > drop the over-use of "likely", drop some checks for identity that > will also be checked by tcg-op.c functions, and special case mask > without rotate within rlwinm. This breaks masks that wrap: li r3,-1 li r4,-1 rlwnm r3,r3,r4,22,8 We expect: ff8003ff But get: ff8003ff Anton > Signed-off-by: Richard Henderson > Signed-off-by: David Gibson > --- > target-ppc/translate.c | 172 > - 1 file changed, 70 > insertions(+), 102 deletions(-) > > diff --git a/target-ppc/translate.c b/target-ppc/translate.c > index 3ea6625..b392ecc 100644 > --- a/target-ppc/translate.c > +++ b/target-ppc/translate.c > @@ -1610,141 +1610,109 @@ static void gen_cntlzd(DisasContext *ctx) > /* rlwimi & rlwimi. */ > static void gen_rlwimi(DisasContext *ctx) > { > -uint32_t mb, me, sh; > - > -mb = MB(ctx->opcode); > -me = ME(ctx->opcode); > -sh = SH(ctx->opcode); > -if (likely(sh == (31-me) && mb <= me)) { > -tcg_gen_deposit_tl(cpu_gpr[rA(ctx->opcode)], > cpu_gpr[rA(ctx->opcode)], > - cpu_gpr[rS(ctx->opcode)], sh, me - mb + > 1); > +TCGv t_ra = cpu_gpr[rA(ctx->opcode)]; > +TCGv t_rs = cpu_gpr[rS(ctx->opcode)]; > +uint32_t sh = SH(ctx->opcode); > +uint32_t mb = MB(ctx->opcode); > +uint32_t me = ME(ctx->opcode); > + > +if (sh == (31-me) && mb <= me) { > +tcg_gen_deposit_tl(t_ra, t_ra, t_rs, sh, me - mb + 1); > } else { > target_ulong mask; > +TCGv_i32 t0; > TCGv t1; > -TCGv t0 = tcg_temp_new(); > -#if defined(TARGET_PPC64) > -tcg_gen_deposit_i64(t0, cpu_gpr[rS(ctx->opcode)], > -cpu_gpr[rS(ctx->opcode)], 32, 32); > -tcg_gen_rotli_i64(t0, t0, sh); > -#else > -tcg_gen_rotli_i32(t0, cpu_gpr[rS(ctx->opcode)], sh); > -#endif > + > #if defined(TARGET_PPC64) > mb += 32; > me += 32; > #endif > mask = MASK(mb, me); > + > +t0 = tcg_temp_new_i32(); > t1 = tcg_temp_new(); > -tcg_gen_andi_tl(t0, t0, mask); > -tcg_gen_andi_tl(t1, cpu_gpr[rA(ctx->opcode)], ~mask); > -tcg_gen_or_tl(cpu_gpr[rA(ctx->opcode)], t0, t1); > -tcg_temp_free(t0); > +tcg_gen_trunc_tl_i32(t0, t_rs); > +tcg_gen_rotli_i32(t0, t0, sh); > +tcg_gen_extu_i32_tl(t1, t0); > +tcg_temp_free_i32(t0); > + > +tcg_gen_andi_tl(t1, t1, mask); > +tcg_gen_andi_tl(t_ra, t_ra, ~mask); > +tcg_gen_or_tl(t_ra, t_ra, t1); > tcg_temp_free(t1); > } > -if (unlikely(Rc(ctx->opcode) != 0)) > -gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); > +if (unlikely(Rc(ctx->opcode) != 0)) { > +gen_set_Rc0(ctx, t_ra); > +} > } > > /* rlwinm & rlwinm. */ > static void gen_rlwinm(DisasContext *ctx) > { > -uint32_t mb, me, sh; > - > -sh = SH(ctx->opcode); > -mb = MB(ctx->opcode); > -me = ME(ctx->opcode); > +TCGv t_ra = cpu_gpr[rA(ctx->opcode)]; > +TCGv t_rs = cpu_gpr[rS(ctx->opcode)]; > +uint32_t sh = SH(ctx->opcode); > +uint32_t mb = MB(ctx->opcode); > +uint32_t me = ME(ctx->opcode); > > -if (likely(mb == 0 && me == (31 - sh))) { > -if (likely(sh == 0)) { > -tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], > cpu_gpr[rS(ctx->opcode)]); > -} else { > -TCGv t0 = tcg_temp_new(); > -tcg_gen_ext32u_tl(t0, cpu_gpr[rS(ctx->opcode)]); > -tcg_gen_shli_tl(t0, t0, sh); > -tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], t0); > -tcg_temp_free(t0); > -} > -} else if (likely(sh != 0 && me == 31 && sh == (32 - mb))) { > -TCGv t0 = tcg_temp_new(); > -tcg_gen_ext32u_tl(t0, cpu_gpr[rS(ctx->opcode)]); > -tcg_gen_shri_tl(t0, t0, mb); > -tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], t0); > -tcg_temp_free(t0); > -} else if (likely(mb == 0 && me == 31)) { > -TCGv_i32 t0 = tcg_temp_new_i32(); > -tcg_gen_trunc_tl_i32(t0, cpu_gpr[rS(ctx->opcode)]); > -tcg_gen_rotli_i32(t0, t0, sh); > -tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t0); > -tcg_temp_free_i32(t0); > +if (mb == 0 && me == (31 - sh)) { > +tcg_gen_shli_tl(t_ra, t_rs, sh); > +tcg_gen_ext32u_tl(t_ra, t_ra); > +} else if (sh != 0 && me == 31 && sh == (32 - mb)) { > +tcg_gen_ext32u_tl(t_ra, t_rs); > +tcg_gen_shri_tl(t_ra, t_ra, mb); > } else { > -TCGv t0 = tcg_temp_new(); > -#if defined(TARGET_PPC64) > -tcg_gen_deposit_i64(t0, cpu_gpr[rS(ctx->opcode)], > -cpu_gpr[rS(ctx->opcode)], 32, 32); > -tcg_gen_rotli_i64(t0, t0, sh); > -#else > -
Re: [Qemu-devel] [PULL 03/13] target-ppc: Use 32-bit rotate instead of deposit + 64-bit rotate
Hi rth, > > Bother. I've tentatively put a revert into ppc-for-2.7. Richard, > > do you have a better idea how to fix it? > > Please try the following. Thanks! This passes my tests. Feel free to add: Tested-by: Anton Blanchard Anton
Re: [Qemu-devel] [PULL 03/13] target-ppc: Use 32-bit rotate instead of deposit + 64-bit rotate
Hi, > > > Bother. I've tentatively put a revert into ppc-for-2.7. Richard, > > > do you have a better idea how to fix it? > > > > Please try the following. > > Thanks! This passes my tests. Feel free to add: > > Tested-by: Anton Blanchard Actually I think I've found a problem: lis r4,0x7fff@h ori r4,r4,0x7fff@l rlwinm r3,r4,0,25,1 32 bit rotate is defined as a 64 bit rotate of 2 copies of the 32 bit value, so we expect 0x7fff407f, but get 0x407f. Not sure if anything out there depends on it though. Anton
Re: [Qemu-devel] [PATCH v2] target-ppc: Fix rlwimi, rlwinm, rlwnm
Hi rth, > In 63ae0915f8ec, I arranged to use a 32-bit rotate, without > considering the effect of a mask value that wraps around to > the high bits of the word. Thanks, that passes my tests. Tested-by: Anton Blanchard Anton > Signed-off-by: Richard Henderson > --- > target-ppc/translate.c | 73 > +++--- 1 file changed, 51 > insertions(+), 22 deletions(-) > > diff --git a/target-ppc/translate.c b/target-ppc/translate.c > index b689475..23bc054 100644 > --- a/target-ppc/translate.c > +++ b/target-ppc/translate.c > @@ -1636,7 +1636,6 @@ static void gen_rlwimi(DisasContext *ctx) > tcg_gen_deposit_tl(t_ra, t_ra, t_rs, sh, me - mb + 1); > } else { > target_ulong mask; > -TCGv_i32 t0; > TCGv t1; > > #if defined(TARGET_PPC64) > @@ -1645,12 +1644,21 @@ static void gen_rlwimi(DisasContext *ctx) > #endif > mask = MASK(mb, me); > > -t0 = tcg_temp_new_i32(); > t1 = tcg_temp_new(); > -tcg_gen_trunc_tl_i32(t0, t_rs); > -tcg_gen_rotli_i32(t0, t0, sh); > -tcg_gen_extu_i32_tl(t1, t0); > -tcg_temp_free_i32(t0); > +if (mask <= 0xu) { > +TCGv_i32 t0 = tcg_temp_new_i32(); > +tcg_gen_trunc_tl_i32(t0, t_rs); > +tcg_gen_rotli_i32(t0, t0, sh); > +tcg_gen_extu_i32_tl(t1, t0); > +tcg_temp_free_i32(t0); > +} else { > +#if defined(TARGET_PPC64) > +tcg_gen_deposit_i64(t1, t_rs, t_rs, 32, 32); > +tcg_gen_rotli_i64(t1, t1, sh); > +#else > +g_assert_not_reached(); > +#endif > +} > > tcg_gen_andi_tl(t1, t1, mask); > tcg_gen_andi_tl(t_ra, t_ra, ~mask); > @@ -1678,20 +1686,28 @@ static void gen_rlwinm(DisasContext *ctx) > tcg_gen_ext32u_tl(t_ra, t_rs); > tcg_gen_shri_tl(t_ra, t_ra, mb); > } else { > +target_ulong mask; > #if defined(TARGET_PPC64) > mb += 32; > me += 32; > #endif > -if (sh == 0) { > -tcg_gen_andi_tl(t_ra, t_rs, MASK(mb, me)); > -} else { > -TCGv_i32 t0 = tcg_temp_new_i32(); > +mask = MASK(mb, me); > > +if (mask <= 0xu) { > +TCGv_i32 t0 = tcg_temp_new_i32(); > tcg_gen_trunc_tl_i32(t0, t_rs); > tcg_gen_rotli_i32(t0, t0, sh); > -tcg_gen_andi_i32(t0, t0, MASK(mb, me)); > +tcg_gen_andi_i32(t0, t0, mask); > tcg_gen_extu_i32_tl(t_ra, t0); > tcg_temp_free_i32(t0); > +} else { > +#if defined(TARGET_PPC64) > +tcg_gen_deposit_i64(t_ra, t_rs, t_rs, 32, 32); > +tcg_gen_rotli_i64(t_ra, t_ra, sh); > +tcg_gen_andi_i64(t_ra, t_ra, mask); > +#else > +g_assert_not_reached(); > +#endif > } > } > if (unlikely(Rc(ctx->opcode) != 0)) { > @@ -1707,24 +1723,37 @@ static void gen_rlwnm(DisasContext *ctx) > TCGv t_rb = cpu_gpr[rB(ctx->opcode)]; > uint32_t mb = MB(ctx->opcode); > uint32_t me = ME(ctx->opcode); > -TCGv_i32 t0, t1; > +target_ulong mask; > > #if defined(TARGET_PPC64) > mb += 32; > me += 32; > #endif > +mask = MASK(mb, me); > > -t0 = tcg_temp_new_i32(); > -t1 = tcg_temp_new_i32(); > -tcg_gen_trunc_tl_i32(t0, t_rb); > -tcg_gen_trunc_tl_i32(t1, t_rs); > -tcg_gen_andi_i32(t0, t0, 0x1f); > -tcg_gen_rotl_i32(t1, t1, t0); > -tcg_temp_free_i32(t0); > +if (mask <= 0xu) { > +TCGv_i32 t0 = tcg_temp_new_i32(); > +TCGv_i32 t1 = tcg_temp_new_i32(); > +tcg_gen_trunc_tl_i32(t0, t_rb); > +tcg_gen_trunc_tl_i32(t1, t_rs); > +tcg_gen_andi_i32(t0, t0, 0x1f); > +tcg_gen_rotl_i32(t1, t1, t0); > +tcg_gen_extu_i32_tl(t_ra, t1); > +tcg_temp_free_i32(t0); > +tcg_temp_free_i32(t1); > +} else { > +#if defined(TARGET_PPC64) > +TCGv_i64 t0 = tcg_temp_new_i64(); > +tcg_gen_andi_i64(t0, t_rb, 0x1f); > +tcg_gen_deposit_i64(t_ra, t_rs, t_rs, 32, 32); > +tcg_gen_rotl_i64(t_ra, t_ra, t0); > +tcg_temp_free_i64(t0); > +#else > +g_assert_not_reached(); > +#endif > +} > > -tcg_gen_andi_i32(t1, t1, MASK(mb, me)); > -tcg_gen_extu_i32_tl(t_ra, t1); > -tcg_temp_free_i32(t1); > +tcg_gen_andi_tl(t_ra, t_ra, mask); > > if (unlikely(Rc(ctx->opcode) != 0)) { > gen_set_Rc0(ctx, t_ra);
[Qemu-devel] [PATCH 1/9] target/ppc: Fix xvxsigdp
Fix a typo in xvxsigdp where we put both results into the lower doubleword. Fixes: dd977e4f45cb ("target/ppc: Optimize x[sv]xsigdp using deposit_i64()") Signed-off-by: Anton Blanchard --- target/ppc/translate/vsx-impl.inc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c index 11d9b75d01..4d8ca7cf32 100644 --- a/target/ppc/translate/vsx-impl.inc.c +++ b/target/ppc/translate/vsx-impl.inc.c @@ -1820,7 +1820,7 @@ static void gen_xvxsigdp(DisasContext *ctx) tcg_gen_movi_i64(t0, 0x0010); tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, zr, zr, t0); tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, nan, zr, t0); -tcg_gen_deposit_i64(xth, t0, xbl, 0, 52); +tcg_gen_deposit_i64(xtl, t0, xbl, 0, 52); set_cpu_vsrl(xT(ctx->opcode), xtl); tcg_temp_free_i64(t0); -- 2.20.1
[Qemu-devel] [PATCH 3/9] target/ppc: Fix xxbrq, xxbrw
Fix a typo in xxbrq and xxbrw where we put both results into the lower doubleword. Fixes: 8b3b2d75c7c0 ("introduce get_cpu_vsr{l,h}() and set_cpu_vsr{l,h}() helpers for VSR register access") Signed-off-by: Anton Blanchard --- target/ppc/translate/vsx-impl.inc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c index d050cc03ed..05b75105be 100644 --- a/target/ppc/translate/vsx-impl.inc.c +++ b/target/ppc/translate/vsx-impl.inc.c @@ -1192,7 +1192,7 @@ static void gen_xxbrq(DisasContext *ctx) tcg_gen_bswap64_i64(xtl, xbh); set_cpu_vsrl(xT(ctx->opcode), xtl); tcg_gen_mov_i64(xth, t0); -set_cpu_vsrl(xT(ctx->opcode), xth); +set_cpu_vsrh(xT(ctx->opcode), xth); tcg_temp_free_i64(t0); tcg_temp_free_i64(xth); @@ -1220,7 +1220,7 @@ static void gen_xxbrw(DisasContext *ctx) get_cpu_vsrl(xbl, xB(ctx->opcode)); gen_bswap32x4(xth, xtl, xbh, xbl); -set_cpu_vsrl(xT(ctx->opcode), xth); +set_cpu_vsrh(xT(ctx->opcode), xth); set_cpu_vsrl(xT(ctx->opcode), xtl); tcg_temp_free_i64(xth); -- 2.20.1
[Qemu-devel] [PATCH 2/9] target/ppc: Fix xxspltib
xxspltib raises a VMX or a VSX exception depending on the register set it is operating on. We had a check, but it was backwards. Fixes: f113283525a4 ("target-ppc: add xxspltib instruction") Signed-off-by: Anton Blanchard --- target/ppc/translate/vsx-impl.inc.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c index 4d8ca7cf32..d050cc03ed 100644 --- a/target/ppc/translate/vsx-impl.inc.c +++ b/target/ppc/translate/vsx-impl.inc.c @@ -1355,16 +1355,17 @@ static void gen_xxspltib(DisasContext *ctx) int rt = xT(ctx->opcode); if (rt < 32) { -if (unlikely(!ctx->altivec_enabled)) { -gen_exception(ctx, POWERPC_EXCP_VPU); +if (unlikely(!ctx->vsx_enabled)) { +gen_exception(ctx, POWERPC_EXCP_VSXU); return; } } else { -if (unlikely(!ctx->vsx_enabled)) { -gen_exception(ctx, POWERPC_EXCP_VSXU); +if (unlikely(!ctx->altivec_enabled)) { +gen_exception(ctx, POWERPC_EXCP_VPU); return; } } +printf("XT %x IMM8 %x\n", rt, uim8); tcg_gen_gvec_dup8i(vsr_full_offset(rt), 16, 16, uim8); } -- 2.20.1
[Qemu-devel] [PATCH 5/9] target/ppc: Fix xvabs[sd]p, xvnabs[sd]p, xvneg[sd]p, xvcpsgn[sd]p
We were using set_cpu_vsr* when we should have used set_cpu_vsrl* Fixes: 8b3b2d75c7c0 ("introduce get_cpu_vsr{l,h}() and set_cpu_vsr{l,h}() helpers for VSR register access") Signed-off-by: Anton Blanchard --- target/ppc/translate/vsx-impl.inc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c index c13f84e745..0a48020e3b 100644 --- a/target/ppc/translate/vsx-impl.inc.c +++ b/target/ppc/translate/vsx-impl.inc.c @@ -859,8 +859,8 @@ static void glue(gen_, name)(DisasContext *ctx) \ xbh = tcg_temp_new_i64();\ xbl = tcg_temp_new_i64();\ sgm = tcg_temp_new_i64();\ -set_cpu_vsrh(xB(ctx->opcode), xbh); \ -set_cpu_vsrl(xB(ctx->opcode), xbl); \ +get_cpu_vsrh(xbh, xB(ctx->opcode)); \ +get_cpu_vsrl(xbl, xB(ctx->opcode)); \ tcg_gen_movi_i64(sgm, sgn_mask); \ switch (op) {\ case OP_ABS: { \ -- 2.20.1
[Qemu-devel] [PATCH 7/9] target/ppc: Fix vrlwmi and vrlwnm
We should only look at 5 bits of each byte, not 6. Fixes: 3e00884f4e9f ("target-ppc: add vrldnmi and vrlwmi instructions") Signed-off-by: Anton Blanchard --- target/ppc/int_helper.c | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index fd715b4076..111586c981 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -1652,7 +1652,7 @@ void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b) } } -#define VRLMI(name, size, element, insert)\ +#define VRLMI(name, size, element, insert, modifier_bits) \ void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ { \ int i;\ @@ -1662,9 +1662,9 @@ void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ uint##size##_t src3 = r->element[i]; \ uint##size##_t begin, end, shift, mask, rot_val; \ \ -shift = extract##size(src2, 0, 6);\ -end = extract##size(src2, 8, 6);\ -begin = extract##size(src2, 16, 6); \ +shift = extract##size(src2, 0, modifier_bits);\ +end = extract##size(src2, 8, modifier_bits);\ +begin = extract##size(src2, 16, modifier_bits); \ rot_val = rol##size(src1, shift); \ mask = mask_u##size(begin, end); \ if (insert) { \ @@ -1675,10 +1675,10 @@ void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) \ } \ } -VRLMI(vrldmi, 64, u64, 1); -VRLMI(vrlwmi, 32, u32, 1); -VRLMI(vrldnm, 64, u64, 0); -VRLMI(vrlwnm, 32, u32, 0); +VRLMI(vrldmi, 64, u64, 1, 6); +VRLMI(vrlwmi, 32, u32, 1, 5); +VRLMI(vrldnm, 64, u64, 0, 6); +VRLMI(vrlwnm, 32, u32, 0, 5); void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c) -- 2.20.1
[Qemu-devel] [PATCH 4/9] target/ppc: Fix lxvw4x, lxvh8x and lxvb16x
During the conversion these instructions were incorrectly treated as stores. We need to use set_cpu_vsr* and not get_cpu_vsr*. Fixes: 8b3b2d75c7c0 ("introduce get_cpu_vsr{l,h}() and set_cpu_vsr{l,h}() helpers for VSR register access") Signed-off-by: Anton Blanchard --- target/ppc/translate/vsx-impl.inc.c | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c index 05b75105be..c13f84e745 100644 --- a/target/ppc/translate/vsx-impl.inc.c +++ b/target/ppc/translate/vsx-impl.inc.c @@ -102,8 +102,7 @@ static void gen_lxvw4x(DisasContext *ctx) } xth = tcg_temp_new_i64(); xtl = tcg_temp_new_i64(); -get_cpu_vsrh(xth, xT(ctx->opcode)); -get_cpu_vsrl(xtl, xT(ctx->opcode)); + gen_set_access_type(ctx, ACCESS_INT); EA = tcg_temp_new(); @@ -126,6 +125,8 @@ static void gen_lxvw4x(DisasContext *ctx) tcg_gen_addi_tl(EA, EA, 8); tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ); } +set_cpu_vsrh(xT(ctx->opcode), xth); +set_cpu_vsrl(xT(ctx->opcode), xtl); tcg_temp_free(EA); tcg_temp_free_i64(xth); tcg_temp_free_i64(xtl); @@ -185,8 +186,6 @@ static void gen_lxvh8x(DisasContext *ctx) } xth = tcg_temp_new_i64(); xtl = tcg_temp_new_i64(); -get_cpu_vsrh(xth, xT(ctx->opcode)); -get_cpu_vsrl(xtl, xT(ctx->opcode)); gen_set_access_type(ctx, ACCESS_INT); EA = tcg_temp_new(); @@ -197,6 +196,8 @@ static void gen_lxvh8x(DisasContext *ctx) if (ctx->le_mode) { gen_bswap16x8(xth, xtl, xth, xtl); } +set_cpu_vsrh(xT(ctx->opcode), xth); +set_cpu_vsrl(xT(ctx->opcode), xtl); tcg_temp_free(EA); tcg_temp_free_i64(xth); tcg_temp_free_i64(xtl); @@ -214,14 +215,14 @@ static void gen_lxvb16x(DisasContext *ctx) } xth = tcg_temp_new_i64(); xtl = tcg_temp_new_i64(); -get_cpu_vsrh(xth, xT(ctx->opcode)); -get_cpu_vsrl(xtl, xT(ctx->opcode)); gen_set_access_type(ctx, ACCESS_INT); EA = tcg_temp_new(); gen_addr_reg_index(ctx, EA); tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ); tcg_gen_addi_tl(EA, EA, 8); tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ); +set_cpu_vsrh(xT(ctx->opcode), xth); +set_cpu_vsrl(xT(ctx->opcode), xtl); tcg_temp_free(EA); tcg_temp_free_i64(xth); tcg_temp_free_i64(xtl); -- 2.20.1
[Qemu-devel] [PATCH 6/9] target/ppc: Fix vslv and vsrv
vslv and vsrv are broken on little endian, we append 00 to the high byte not the low byte. Fix it by using the VsrB() accessor. Signed-off-by: Anton Blanchard --- target/ppc/int_helper.c | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index f6a088ac08..fd715b4076 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -1800,10 +1800,10 @@ void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) size = ARRAY_SIZE(r->u8); for (i = 0; i < size; i++) { -shift = b->u8[i] & 0x7; /* extract shift value */ -bytes = (a->u8[i] << 8) + /* extract adjacent bytes */ -(((i + 1) < size) ? a->u8[i + 1] : 0); -r->u8[i] = (bytes << shift) >> 8; /* shift and store result */ +shift = b->VsrB(i) & 0x7; /* extract shift value */ +bytes = (a->VsrB(i) << 8) + /* extract adjacent bytes */ +(((i + 1) < size) ? a->VsrB(i + 1) : 0); +r->VsrB(i) = (bytes << shift) >> 8; /* shift and store result */ } } @@ -1818,10 +1818,10 @@ void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) * order will guarantee that computed result is not fed back. */ for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) { -shift = b->u8[i] & 0x7; /* extract shift value */ -bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i]; +shift = b->VsrB(i) & 0x7; /* extract shift value */ +bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i); /* extract adjacent bytes */ -r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */ +r->VsrB(i) = (bytes >> shift) & 0xFF; /* shift and store result */ } } -- 2.20.1
[Qemu-devel] [PATCH 8/9] target/ppc: Fix dtstsfi and dtstsfiq
The immediate field is 6 bits, not 5. Fixes: 217f6b88058f ("target-ppc: add dtstsfi[q] instructions") Signed-off-by: Anton Blanchard --- target/ppc/internal.h | 2 ++ target/ppc/translate/dfp-impl.inc.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/target/ppc/internal.h b/target/ppc/internal.h index fb6f64ed1e..4719369cc5 100644 --- a/target/ppc/internal.h +++ b/target/ppc/internal.h @@ -123,6 +123,8 @@ static inline uint32_t SPR(uint32_t opcode) EXTRACT_SHELPER(SIMM, 0, 16); /* 16 bits unsigned immediate value */ EXTRACT_HELPER(UIMM, 0, 16); +/* 6 bits unsigned immediate value */ +EXTRACT_HELPER(UIMM6, 16, 6); /* 5 bits signed immediate value */ EXTRACT_SHELPER(SIMM5, 16, 5); /* 5 bits signed immediate value */ diff --git a/target/ppc/translate/dfp-impl.inc.c b/target/ppc/translate/dfp-impl.inc.c index 6c556dc2e1..5b01c9239d 100644 --- a/target/ppc/translate/dfp-impl.inc.c +++ b/target/ppc/translate/dfp-impl.inc.c @@ -55,7 +55,7 @@ static void gen_##name(DisasContext *ctx) \ return; \ } \ gen_update_nip(ctx, ctx->base.pc_next - 4);\ -uim = tcg_const_i32(UIMM5(ctx->opcode)); \ +uim = tcg_const_i32(UIMM6(ctx->opcode)); \ rb = gen_fprp_ptr(rB(ctx->opcode)); \ gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \ cpu_env, uim, rb); \ -- 2.20.1
[Qemu-devel] [PATCH 9/9] target/ppc: Fix vsum2sws
A recent cleanup changed the pre zeroing of the result from 64 bit to 32 bit operations: -result.u64[i] = 0; +result.VsrW(i) = 0; This corrupts the result. Fixes: 60594fea298d ("target/ppc: remove various HOST_WORDS_BIGENDIAN hacks in int_helper.c") Signed-off-by: Anton Blanchard --- target/ppc/int_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 111586c981..b8b3279f71 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -2038,7 +2038,7 @@ void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b) for (i = 0; i < ARRAY_SIZE(r->u64); i++) { int64_t t = (int64_t)b->VsrSW(upper + i * 2); -result.VsrW(i) = 0; +result.VsrD(i) = 0; for (j = 0; j < ARRAY_SIZE(r->u64); j++) { t += a->VsrSW(2 * i + j); } -- 2.20.1
Re: [Qemu-devel] [PATCH 1/9] target/ppc: Fix xvxsigdp
Hi Alexey, > Out of curiosity - how did you find this one and (especially) the next > one - "Fix xxspltib"? Is there some testsuite, or by just looking at > the code? Thanks, I'm running test cases and comparing results between QEMU and real hardware. Thanks, Anton
[Qemu-devel] [PATCH v2] target/ppc: Fix xxspltib
xxspltib raises a VMX or a VSX exception depending on the register set it is operating on. We had a check, but it was backwards. Fixes: f113283525a4 ("target-ppc: add xxspltib instruction") Signed-off-by: Anton Blanchard --- target/ppc/translate/vsx-impl.inc.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c index 4d8ca7cf32..4812a374aa 100644 --- a/target/ppc/translate/vsx-impl.inc.c +++ b/target/ppc/translate/vsx-impl.inc.c @@ -1355,13 +1355,13 @@ static void gen_xxspltib(DisasContext *ctx) int rt = xT(ctx->opcode); if (rt < 32) { -if (unlikely(!ctx->altivec_enabled)) { -gen_exception(ctx, POWERPC_EXCP_VPU); +if (unlikely(!ctx->vsx_enabled)) { +gen_exception(ctx, POWERPC_EXCP_VSXU); return; } } else { -if (unlikely(!ctx->vsx_enabled)) { -gen_exception(ctx, POWERPC_EXCP_VSXU); +if (unlikely(!ctx->altivec_enabled)) { +gen_exception(ctx, POWERPC_EXCP_VPU); return; } } -- 2.20.1
[Qemu-devel] [PATCH] target/ppc: Optimise VSX_LOAD_SCALAR_DS and VSX_VECTOR_LOAD_STORE
A few small optimisations: In VSX_LOAD_SCALAR_DS() we can don't need to read the VSR via get_cpu_vsrh(). Split VSX_VECTOR_LOAD_STORE() into two functions. Loads only need to write the VSRs (set_cpu_vsr*()) and stores only need to read the VSRs (get_cpu_vsr*()) Thanks to Mark Cave-Ayland for the suggestions. Signed-off-by: Anton Blanchard --- target/ppc/translate/vsx-impl.inc.c | 68 - 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c index 4b7627f53b..cdb44b8b70 100644 --- a/target/ppc/translate/vsx-impl.inc.c +++ b/target/ppc/translate/vsx-impl.inc.c @@ -228,7 +228,7 @@ static void gen_lxvb16x(DisasContext *ctx) tcg_temp_free_i64(xtl); } -#define VSX_VECTOR_LOAD_STORE(name, op, indexed)\ +#define VSX_VECTOR_LOAD(name, op, indexed) \ static void gen_##name(DisasContext *ctx) \ { \ int xt; \ @@ -255,8 +255,6 @@ static void gen_##name(DisasContext *ctx) \ } \ xth = tcg_temp_new_i64(); \ xtl = tcg_temp_new_i64(); \ -get_cpu_vsrh(xth, xt); \ -get_cpu_vsrl(xtl, xt); \ gen_set_access_type(ctx, ACCESS_INT); \ EA = tcg_temp_new();\ if (indexed) { \ @@ -282,10 +280,61 @@ static void gen_##name(DisasContext *ctx) \ tcg_temp_free_i64(xtl); \ } -VSX_VECTOR_LOAD_STORE(lxv, ld_i64, 0) -VSX_VECTOR_LOAD_STORE(stxv, st_i64, 0) -VSX_VECTOR_LOAD_STORE(lxvx, ld_i64, 1) -VSX_VECTOR_LOAD_STORE(stxvx, st_i64, 1) +VSX_VECTOR_LOAD(lxv, ld_i64, 0) +VSX_VECTOR_LOAD(lxvx, ld_i64, 1) + +#define VSX_VECTOR_STORE(name, op, indexed) \ +static void gen_##name(DisasContext *ctx) \ +{ \ +int xt; \ +TCGv EA;\ +TCGv_i64 xth; \ +TCGv_i64 xtl; \ +\ +if (indexed) { \ +xt = xT(ctx->opcode); \ +} else {\ +xt = DQxT(ctx->opcode); \ +} \ +\ +if (xt < 32) { \ +if (unlikely(!ctx->vsx_enabled)) { \ +gen_exception(ctx, POWERPC_EXCP_VSXU); \ +return; \ +} \ +} else {\ +if (unlikely(!ctx->altivec_enabled)) { \ +gen_exception(ctx, POWERPC_EXCP_VPU); \ +return; \ +} \ +} \ +xth = tcg_temp_new_i64(); \ +xtl = tcg_temp_new_i64(); \ +get_cpu_vsrh(xth, xt); \ +get_cpu_vsrl(xtl, xt); \ +gen_set_access_type(ctx, ACCESS_INT); \ +EA = tcg_temp_new();\ +if (indexed) { \ +gen_addr_reg_index(ctx, EA);\ +} else {\ +gen_addr_imm_index(ctx, EA, 0x0F); \ +} \ +if (ctx->le_mode) { \ +tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_LEQ); \ +tcg_gen_addi_tl(EA, EA, 8); \ +tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_LEQ); \ +} else {\ +tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_BEQ); \ +tcg_gen_addi_tl(EA, EA, 8); \ +tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_BEQ); \ +} \ +tcg_temp_free(EA);
Re: [Qemu-devel] [PATCH 4/9] target/ppc: Fix lxvw4x, lxvh8x and lxvb16x
Hi Mark, > Following on from this I've just gone through the load/store > operations once again and spotted two things: > > > 1) VSX_LOAD_SCALAR_DS has an extra get_cpu_vsrh() which can be removed > > diff --git a/target/ppc/translate/vsx-impl.inc.c > b/target/ppc/translate/vsx-impl.inc.c index 11d9b75d01..004ea56c4f > 100644 --- a/target/ppc/translate/vsx-impl.inc.c > +++ b/target/ppc/translate/vsx-impl.inc.c > @@ -329,7 +329,6 @@ static void gen_##name(DisasContext > *ctx) \ > return; > \ } \ xth > = tcg_temp_new_i64(); \ > -get_cpu_vsrh(xth, rD(ctx->opcode) + 32); \ > gen_set_access_type(ctx, ACCESS_INT); \ > EA = tcg_temp_new(); \ > gen_addr_imm_index(ctx, EA, 0x03);\ Looks good. I also noticed we had two stores that needed to be fixed: VSX_LOAD_SCALAR_DS(stxsd, st64_i64) VSX_LOAD_SCALAR_DS(stxssp, st32fs) > 2) VSX_VECTOR_LOAD_STORE is confusing and should be split into > separate VSX_VECTOR_LOAD and VSX_VECTOR_STORE macros Good idea. I also removed (what I assume) are redundant set_cpu_vsr* and get_cpu_vsr* calls. > Does that sound reasonable? I'm also thinking that we should consider > adding a CC to stable for patches 4, 5 and 9 in this series since > these are genuine regressions. Fine with me. If David agrees, I'm not sure if he can rebase them or if I can send them manually if they have been already committed. Thanks, Anton
[Qemu-devel] [PATCH v2] target/ppc: Fix xvabs[sd]p, xvnabs[sd]p, xvneg[sd]p, xvcpsgn[sd]p
We were using set_cpu_vsr*() when we should have used get_cpu_vsr*(). Fixes: 8b3b2d75c7c0 ("introduce get_cpu_vsr{l,h}() and set_cpu_vsr{l,h}() helpers for VSR register access") Signed-off-by: Anton Blanchard --- target/ppc/translate/vsx-impl.inc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/ppc/translate/vsx-impl.inc.c b/target/ppc/translate/vsx-impl.inc.c index b487136d52..4b7627f53b 100644 --- a/target/ppc/translate/vsx-impl.inc.c +++ b/target/ppc/translate/vsx-impl.inc.c @@ -859,8 +859,8 @@ static void glue(gen_, name)(DisasContext *ctx) \ xbh = tcg_temp_new_i64();\ xbl = tcg_temp_new_i64();\ sgm = tcg_temp_new_i64();\ -set_cpu_vsrh(xB(ctx->opcode), xbh); \ -set_cpu_vsrl(xB(ctx->opcode), xbl); \ +get_cpu_vsrh(xbh, xB(ctx->opcode)); \ +get_cpu_vsrl(xbl, xB(ctx->opcode)); \ tcg_gen_movi_i64(sgm, sgn_mask); \ switch (op) {\ case OP_ABS: { \ -- 2.20.1
[Qemu-devel] [PATCH 1/2] powerpc: Fix emulation of mcrf in emulate_step()
From: Anton Blanchard The mcrf emulation code was looking at the CR fields in the reverse order. It also relied on reserved fields being zero which is somewhat fragile, so fix that too. Cc: sta...@vger.kernel.org Signed-off-by: Anton Blanchard --- arch/powerpc/lib/sstep.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 33117f8a0882..fb84f51b1f0b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -683,8 +683,10 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, case 19: switch ((instr >> 1) & 0x3ff) { case 0: /* mcrf */ - rd = (instr >> 21) & 0x1c; - ra = (instr >> 16) & 0x1c; + rd = 7 - ((instr >> 23) & 0x7); + ra = 7 - ((instr >> 18) & 0x7); + rd *= 4; + ra *= 4; val = (regs->ccr >> ra) & 0xf; regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd); goto instr_done; -- 2.11.0
[Qemu-devel] [PATCH] target/ppc: Fix size of struct PPCElfPrstatus
From: Anton Blanchard gdb refuses to parse QEMU memory dumps because struct PPCElfPrstatus is the wrong size. Fix it. Signed-off-by: Anton Blanchard Fixes: e62fbc54d459 ("target-ppc: dump-guest-memory support") --- target/ppc/arch_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c index 28d9cc7d79..8e9397aa58 100644 --- a/target/ppc/arch_dump.c +++ b/target/ppc/arch_dump.c @@ -50,7 +50,7 @@ struct PPCUserRegStruct { struct PPCElfPrstatus { char pad1[112]; struct PPCUserRegStruct pr_reg; -reg_t pad2[4]; +char pad2[40]; } QEMU_PACKED; -- 2.11.0
Re: [Qemu-devel] [Qemu-ppc] KVM-PR is broken with current QEMU
Hi Thomas, > So if you like, I can try to come up with a small patch series that > cleans up this mess - and I could also include an updated versions of > Anton's patch there unless he wants to redo the changes on his own...? Thanks for looking at this. I'm travelling (stuck in an airport at the moment) and wont be able to get to this for a few days. If you could incorporate my fixes that would be great! From memory we were waiting on KVM_CAP_PPC_HTM, which thanks to Sam is now upstream in 23528bb21ee2 Anton
Re: [Qemu-devel] [PATCH 2/2] ppc: Fix 64K pages support in full emulation
Hi, > From: Benjamin Herrenschmidt > > We were always advertising only 4K & 16M. Additionally the code wasn't > properly matching the page size with the PTE content, which meant we > could potentially hit an incorrect PTE if the guest used multiple > sizes. > > Finally, honor the CPU capabilities when decoding the size from the > SLB so we don't try to use 64K pages on 970. > > This still doesn't add support for MPSS (Multiple Page Sizes per > Segment) This is causing issues booting an Ubuntu yakety cloud image. I'm running on a ppc64le box (I don't think it reproduces on x86-64). cat << EOF > my-user-data #cloud-config password: password chpasswd: { expire: False } ssh_pwauth: True EOF cloud-localds my-seed.img my-user-data wget -N https://cloud-images.ubuntu.com/yakkety/current/yakkety-server-cloudimg-ppc64el.img qemu-system-ppc64 -M pseries -cpu POWER8 -nographic -vga none -m 4G -drive file=test.img -drive file=my-seed.img -net user -net nic The cloud-init scripts never finish, so the ubuntu user's password is never updated. With the above cloud config you should be able to log in with ubuntu/password. Anton
[Qemu-devel] [PATCH] ppc: Fix xsrdpi, xvrdpi and xvrspi rounding
From: Anton Blanchard xsrdpi, xvrdpi and xvrspi use the round ties away method, not round nearest even. Signed-off-by: Anton Blanchard --- target-ppc/fpu_helper.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c index 4ef893b..d9795d0 100644 --- a/target-ppc/fpu_helper.c +++ b/target-ppc/fpu_helper.c @@ -2689,19 +2689,19 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) \ helper_float_check_status(env);\ } -VSX_ROUND(xsrdpi, 1, float64, VsrD(0), float_round_nearest_even, 1) +VSX_ROUND(xsrdpi, 1, float64, VsrD(0), float_round_ties_away, 1) VSX_ROUND(xsrdpic, 1, float64, VsrD(0), FLOAT_ROUND_CURRENT, 1) VSX_ROUND(xsrdpim, 1, float64, VsrD(0), float_round_down, 1) VSX_ROUND(xsrdpip, 1, float64, VsrD(0), float_round_up, 1) VSX_ROUND(xsrdpiz, 1, float64, VsrD(0), float_round_to_zero, 1) -VSX_ROUND(xvrdpi, 2, float64, VsrD(i), float_round_nearest_even, 0) +VSX_ROUND(xvrdpi, 2, float64, VsrD(i), float_round_ties_away, 0) VSX_ROUND(xvrdpic, 2, float64, VsrD(i), FLOAT_ROUND_CURRENT, 0) VSX_ROUND(xvrdpim, 2, float64, VsrD(i), float_round_down, 0) VSX_ROUND(xvrdpip, 2, float64, VsrD(i), float_round_up, 0) VSX_ROUND(xvrdpiz, 2, float64, VsrD(i), float_round_to_zero, 0) -VSX_ROUND(xvrspi, 4, float32, VsrW(i), float_round_nearest_even, 0) +VSX_ROUND(xvrspi, 4, float32, VsrW(i), float_round_ties_away, 0) VSX_ROUND(xvrspic, 4, float32, VsrW(i), FLOAT_ROUND_CURRENT, 0) VSX_ROUND(xvrspim, 4, float32, VsrW(i), float_round_down, 0) VSX_ROUND(xvrspip, 4, float32, VsrW(i), float_round_up, 0) -- 2.7.4
Re: [Qemu-devel] [PATCH] ppc: Fix xsrdpi, xvrdpi and xvrspi rounding
Hi David, > I take it float_round_ties_away is the same thing the architecture > refers to as "round to Nearest Away"? Yeah. I noticed it when 0.5 got rounded to 0 on QEMU and 1.0 on real hardware. Anton
[PATCH] ppc/spapr: Fix 32 bit logical memory block size assumptions
When testing large LMB sizes (eg 4GB), I found a couple of places that assume they are 32bit in size. Signed-off-by: Anton Blanchard --- hw/ppc/spapr.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index a1b06defe6..0ba2526215 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -558,7 +558,8 @@ static int spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr, int nb_numa_nodes = machine->numa_state->num_nodes; int ret, i, offset; uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE; -uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)}; +uint32_t prop_lmb_size[] = {cpu_to_be32(lmb_size >> 32), +cpu_to_be32(lmb_size & 0x)}; uint32_t *int_buf, *cur_index, buf_len; int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; MemoryDeviceInfoList *dimms = NULL; @@ -899,7 +900,8 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt) uint32_t lrdr_capacity[] = { cpu_to_be32(max_device_addr >> 32), cpu_to_be32(max_device_addr & 0x), -0, cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE), +cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE >> 32), +cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE & 0x), cpu_to_be32(ms->smp.max_cpus / ms->smp.threads), }; uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0); -- 2.26.2
Re: [Qemu-devel] [PATCH 4/9] target/ppc: Fix lxvw4x, lxvh8x and lxvb16x
Hi, > I've now had a bit of time to look through this and I believe it is > correct, so: > > Reviewed-by: Mark Cave-Ayland Thanks Mark. David: any chance we could get this merged? I can't run a recent Ubuntu image successfully without it. sshd hangs when I try to ssh into it. Thanks, Anton
[PATCH] target/riscv: Fix vcompress with rvv_ta_all_1s
vcompress packs vl or less fields into vd, so the tail starts after the last packed field. Signed-off-by: Anton Blanchard --- target/riscv/vector_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 072bd444b1..ccb32e6122 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -5132,7 +5132,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ } \ env->vstart = 0; \ /* set tail elements to 1s */ \ -vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ +vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \ } /* Compress into vd elements of vs2 where vs1 is enabled */ -- 2.34.1
Re: [CAUTION - External Sender] Re: [PATCH] target/riscv: Fix vcompress with rvv_ta_all_1s
Hi Alistair, On Wed, Oct 30, 2024 at 2:39 PM Alistair Francis wrote: > > vcompress packs vl or less fields into vd, so the tail starts after the > > last packed field. > > Is that right? > > It's different from every other vector command. Although the wording > in the spec is very confusing It is confusing. This thread has some clarification, and we should probably follow up on the suggestion to improve the ISA wording: https://github.com/riscv/riscv-v-spec/issues/796 Thanks, Anton
[PATCH v2] target/riscv: Fix vcompress with rvv_ta_all_1s
vcompress packs vl or less fields into vd, so the tail starts after the last packed field. This could be more clearly expressed in the ISA, but for now this thread helps to explain it: https://github.com/riscv/riscv-v-spec/issues/796 Signed-off-by: Anton Blanchard --- target/riscv/vector_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 072bd444b1..ccb32e6122 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -5132,7 +5132,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ } \ env->vstart = 0; \ /* set tail elements to 1s */ \ -vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ +vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \ } /* Compress into vd elements of vs2 where vs1 is enabled */ -- 2.34.1
[PATCH v2] target/riscv: Add Tenstorrent Ascalon CPU
Add a CPU entry for the Tenstorrent Ascalon CPU, a series of 2 wide to 8 wide RV64 cores. More details can be found at https://tenstorrent.com/ip/tt-ascalon Signed-off-by: Anton Blanchard --- target/riscv/cpu-qom.h | 1 + target/riscv/cpu.c | 67 ++ 2 files changed, 68 insertions(+) diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h index 62115375cd..6547642287 100644 --- a/target/riscv/cpu-qom.h +++ b/target/riscv/cpu-qom.h @@ -49,6 +49,7 @@ #define TYPE_RISCV_CPU_SIFIVE_U54 RISCV_CPU_TYPE_NAME("sifive-u54") #define TYPE_RISCV_CPU_THEAD_C906 RISCV_CPU_TYPE_NAME("thead-c906") #define TYPE_RISCV_CPU_VEYRON_V1RISCV_CPU_TYPE_NAME("veyron-v1") +#define TYPE_RISCV_CPU_TT_ASCALON RISCV_CPU_TYPE_NAME("tt-ascalon") #define TYPE_RISCV_CPU_HOST RISCV_CPU_TYPE_NAME("host") OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index f219f0c3b5..8447ad0dfb 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -579,6 +579,72 @@ static void rv64_veyron_v1_cpu_init(Object *obj) #endif } +/* Tenstorrent Ascalon */ +static void rv64_tt_ascalon_cpu_init(Object *obj) +{ +CPURISCVState *env = &RISCV_CPU(obj)->env; +RISCVCPU *cpu = RISCV_CPU(obj); + +riscv_cpu_set_misa_ext(env, RVG | RVC | RVS | RVU | RVH | RVV); +env->priv_ver = PRIV_VERSION_1_13_0; + +/* Enable ISA extensions */ +cpu->cfg.mmu = true; +cpu->cfg.vlenb = 256 >> 3; +cpu->cfg.elen = 64; +cpu->env.vext_ver = VEXT_VERSION_1_00_0; +cpu->cfg.rvv_ma_all_1s = true; +cpu->cfg.rvv_ta_all_1s = true; +cpu->cfg.misa_w = true; +cpu->cfg.pmp = true; +cpu->cfg.cbom_blocksize = 64; +cpu->cfg.cbop_blocksize = 64; +cpu->cfg.cboz_blocksize = 64; +cpu->cfg.ext_zic64b = true; +cpu->cfg.ext_zicbom = true; +cpu->cfg.ext_zicbop = true; +cpu->cfg.ext_zicboz = true; +cpu->cfg.ext_zicntr = true; +cpu->cfg.ext_zicond = true; +cpu->cfg.ext_zicsr = true; +cpu->cfg.ext_zifencei = true; +cpu->cfg.ext_zihintntl = true; +cpu->cfg.ext_zihintpause = true; +cpu->cfg.ext_zihpm = true; +cpu->cfg.ext_zimop = true; +cpu->cfg.ext_zawrs = true; +cpu->cfg.ext_zfa = true; +cpu->cfg.ext_zfbfmin = true; +cpu->cfg.ext_zfh = true; +cpu->cfg.ext_zfhmin = true; +cpu->cfg.ext_zcb = true; +cpu->cfg.ext_zcmop = true; +cpu->cfg.ext_zba = true; +cpu->cfg.ext_zbb = true; +cpu->cfg.ext_zbs = true; +cpu->cfg.ext_zkt = true; +cpu->cfg.ext_zvbb = true; +cpu->cfg.ext_zvbc = true; +cpu->cfg.ext_zvfbfmin = true; +cpu->cfg.ext_zvfbfwma = true; +cpu->cfg.ext_zvfh = true; +cpu->cfg.ext_zvfhmin = true; +cpu->cfg.ext_zvkng = true; +cpu->cfg.ext_smaia = true; +cpu->cfg.ext_smstateen = true; +cpu->cfg.ext_ssaia = true; +cpu->cfg.ext_sscofpmf = true; +cpu->cfg.ext_sstc = true; +cpu->cfg.ext_svade = true; +cpu->cfg.ext_svinval = true; +cpu->cfg.ext_svnapot = true; +cpu->cfg.ext_svpbmt = true; + +#ifndef CONFIG_USER_ONLY +set_satp_mode_max_supported(cpu, VM_1_10_SV57); +#endif +} + #ifdef CONFIG_TCG static void rv128_base_cpu_init(Object *obj) { @@ -2982,6 +3048,7 @@ static const TypeInfo riscv_cpu_type_infos[] = { DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_U54, MXL_RV64, rv64_sifive_u_cpu_init), DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SHAKTI_C, MXL_RV64, rv64_sifive_u_cpu_init), DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_THEAD_C906, MXL_RV64, rv64_thead_c906_cpu_init), +DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_TT_ASCALON, MXL_RV64, rv64_tt_ascalon_cpu_init), DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_VEYRON_V1, MXL_RV64, rv64_veyron_v1_cpu_init), #ifdef CONFIG_TCG DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE128, MXL_RV128, rv128_base_cpu_init), -- 2.34.1
Re: [CAUTION - External Sender] Re: [PATCH] target/riscv: Add Tenstorrent Ascalon CPU
Hi Philippe, On Sun, Nov 10, 2024 at 5:21 AM Philippe Mathieu-Daudé wrote: > Generally speaking (I'm not objecting to this patch as is), for > DEFINE_VENDOR_CPU() it would be nice to have reference to some > documentation -- at least to review whether the cpu features > announced make sense or not --. > > For this particular IP I'm not finding anything on the company > website...: > https://docs.tenstorrent.com/search.html?q=Ascalon This has some more details, including a 1 page PDF. Should I add the URL to the commit message? https://tenstorrent.com/ip/tt-ascalon Thanks, Anton
[PATCH] target/riscv: Add Tenstorrent Ascalon CPU
Add a CPU entry for the Tenstorrent Ascalon CPU, a series of 2 wide to 8 wide RV64 cores. Signed-off-by: Anton Blanchard --- target/riscv/cpu-qom.h | 1 + target/riscv/cpu.c | 67 ++ 2 files changed, 68 insertions(+) diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h index 62115375cd..6547642287 100644 --- a/target/riscv/cpu-qom.h +++ b/target/riscv/cpu-qom.h @@ -49,6 +49,7 @@ #define TYPE_RISCV_CPU_SIFIVE_U54 RISCV_CPU_TYPE_NAME("sifive-u54") #define TYPE_RISCV_CPU_THEAD_C906 RISCV_CPU_TYPE_NAME("thead-c906") #define TYPE_RISCV_CPU_VEYRON_V1RISCV_CPU_TYPE_NAME("veyron-v1") +#define TYPE_RISCV_CPU_TT_ASCALON RISCV_CPU_TYPE_NAME("tt-ascalon") #define TYPE_RISCV_CPU_HOST RISCV_CPU_TYPE_NAME("host") OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index f219f0c3b5..8447ad0dfb 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -579,6 +579,72 @@ static void rv64_veyron_v1_cpu_init(Object *obj) #endif } +/* Tenstorrent Ascalon */ +static void rv64_tt_ascalon_cpu_init(Object *obj) +{ +CPURISCVState *env = &RISCV_CPU(obj)->env; +RISCVCPU *cpu = RISCV_CPU(obj); + +riscv_cpu_set_misa_ext(env, RVG | RVC | RVS | RVU | RVH | RVV); +env->priv_ver = PRIV_VERSION_1_13_0; + +/* Enable ISA extensions */ +cpu->cfg.mmu = true; +cpu->cfg.vlenb = 256 >> 3; +cpu->cfg.elen = 64; +cpu->env.vext_ver = VEXT_VERSION_1_00_0; +cpu->cfg.rvv_ma_all_1s = true; +cpu->cfg.rvv_ta_all_1s = true; +cpu->cfg.misa_w = true; +cpu->cfg.pmp = true; +cpu->cfg.cbom_blocksize = 64; +cpu->cfg.cbop_blocksize = 64; +cpu->cfg.cboz_blocksize = 64; +cpu->cfg.ext_zic64b = true; +cpu->cfg.ext_zicbom = true; +cpu->cfg.ext_zicbop = true; +cpu->cfg.ext_zicboz = true; +cpu->cfg.ext_zicntr = true; +cpu->cfg.ext_zicond = true; +cpu->cfg.ext_zicsr = true; +cpu->cfg.ext_zifencei = true; +cpu->cfg.ext_zihintntl = true; +cpu->cfg.ext_zihintpause = true; +cpu->cfg.ext_zihpm = true; +cpu->cfg.ext_zimop = true; +cpu->cfg.ext_zawrs = true; +cpu->cfg.ext_zfa = true; +cpu->cfg.ext_zfbfmin = true; +cpu->cfg.ext_zfh = true; +cpu->cfg.ext_zfhmin = true; +cpu->cfg.ext_zcb = true; +cpu->cfg.ext_zcmop = true; +cpu->cfg.ext_zba = true; +cpu->cfg.ext_zbb = true; +cpu->cfg.ext_zbs = true; +cpu->cfg.ext_zkt = true; +cpu->cfg.ext_zvbb = true; +cpu->cfg.ext_zvbc = true; +cpu->cfg.ext_zvfbfmin = true; +cpu->cfg.ext_zvfbfwma = true; +cpu->cfg.ext_zvfh = true; +cpu->cfg.ext_zvfhmin = true; +cpu->cfg.ext_zvkng = true; +cpu->cfg.ext_smaia = true; +cpu->cfg.ext_smstateen = true; +cpu->cfg.ext_ssaia = true; +cpu->cfg.ext_sscofpmf = true; +cpu->cfg.ext_sstc = true; +cpu->cfg.ext_svade = true; +cpu->cfg.ext_svinval = true; +cpu->cfg.ext_svnapot = true; +cpu->cfg.ext_svpbmt = true; + +#ifndef CONFIG_USER_ONLY +set_satp_mode_max_supported(cpu, VM_1_10_SV57); +#endif +} + #ifdef CONFIG_TCG static void rv128_base_cpu_init(Object *obj) { @@ -2982,6 +3048,7 @@ static const TypeInfo riscv_cpu_type_infos[] = { DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_U54, MXL_RV64, rv64_sifive_u_cpu_init), DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SHAKTI_C, MXL_RV64, rv64_sifive_u_cpu_init), DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_THEAD_C906, MXL_RV64, rv64_thead_c906_cpu_init), +DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_TT_ASCALON, MXL_RV64, rv64_tt_ascalon_cpu_init), DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_VEYRON_V1, MXL_RV64, rv64_veyron_v1_cpu_init), #ifdef CONFIG_TCG DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE128, MXL_RV128, rv128_base_cpu_init), -- 2.34.1
[PATCH 04/12] target/riscv: handle vadd.vv form mask and source overlap
Signed-off-by: Anton Blanchard --- target/riscv/insn_trans/trans_rvv.c.inc | 1 + 1 file changed, 1 insertion(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index bc2780497e..f5ba1c4280 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -403,6 +403,7 @@ static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm) static bool vext_check_sss(DisasContext *s, int vd, int vs1, int vs2, int vm) { return vext_check_ss(s, vd, vs2, vm) && + require_vm(vm, vs1) && require_align(vs1, s->lmul); } -- 2.34.1
[PATCH 01/12] target/riscv: Source vector registers cannot overlap mask register
Add the relevant ISA paragraphs explaining why source (and destination) registers cannot overlap the mask register. Signed-off-by: Anton Blanchard --- target/riscv/insn_trans/trans_rvv.c.inc | 29 ++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index b9883a5d32..20b1cb127b 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -100,10 +100,33 @@ static bool require_scale_rvfmin(DisasContext *s) } } -/* Destination vector register group cannot overlap source mask register. */ -static bool require_vm(int vm, int vd) +/* + * Source and destination vector register groups cannot overlap source mask + * register: + * + * A vector register cannot be used to provide source operands with more than + * one EEW for a single instruction. A mask register source is considered to + * have EEW=1 for this constraint. An encoding that would result in the same + * vector register being read with two or more different EEWs, including when + * the vector register appears at different positions within two or more vector + * register groups, is reserved. + * (Section 5.2) + * + * A destination vector register group can overlap a source vector + * register group only if one of the following holds: + * 1. The destination EEW equals the source EEW. + * 2. The destination EEW is smaller than the source EEW and the overlap + * is in the lowest-numbered part of the source register group. + * 3. The destination EEW is greater than the source EEW, the source EMUL + * is at least 1, and the overlap is in the highest-numbered part of + * the destination register group. + * For the purpose of determining register group overlap constraints, mask + * elements have EEW=1. + * (Section 5.2) + */ +static bool require_vm(int vm, int v) { -return (vm != 0 || vd != 0); +return (vm != 0 || v != 0); } static bool require_nf(int vd, int nf, int lmul) -- 2.34.1
[PATCH 10/12] target/riscv: handle vwadd.wv form vs1 and vs2 overlap
for 2*SEW = 2*SEW op SEW instructions vs2 and vs1 cannot overlap because it would mean a register is read with two different SEW settings. Signed-off-by: Anton Blanchard --- target/riscv/insn_trans/trans_rvv.c.inc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 2309d9abd0..312d8b1b81 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -549,7 +549,8 @@ static bool vext_check_dds(DisasContext *s, int vd, int vs1, int vs2, int vm) { return vext_check_ds(s, vd, vs1, vm) && require_vm(vm, vs2) && - require_align(vs2, s->lmul + 1); + require_align(vs2, s->lmul + 1) && + !is_overlapped(vs2, 1 << MAX(s->lmul+1, 0), vs1, 1 << MAX(s->lmul, 0)); } static bool vext_check_sd(DisasContext *s, int vd, int vs, int vm) -- 2.34.1
[PATCH 08/12] target/riscv: handle vwadd.vv form mask and source overlap
Signed-off-by: Anton Blanchard --- target/riscv/insn_trans/trans_rvv.c.inc | 1 + 1 file changed, 1 insertion(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index bc22b42801..45b2868c54 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -525,6 +525,7 @@ static bool vext_check_dd(DisasContext *s, int vd, int vs, int vm) static bool vext_check_dss(DisasContext *s, int vd, int vs1, int vs2, int vm) { return vext_check_ds(s, vd, vs2, vm) && + require_vm(vm, vs1) && require_align(vs1, s->lmul) && require_noover(vd, s->lmul + 1, vs1, s->lmul); } -- 2.34.1
[PATCH 03/12] target/riscv: handle vadd.vx form mask and source overlap
Signed-off-by: Anton Blanchard --- target/riscv/insn_trans/trans_rvv.c.inc | 1 + 1 file changed, 1 insertion(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index c66cd95bdb..bc2780497e 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -382,6 +382,7 @@ static bool vext_check_ld_index(DisasContext *s, int vd, int vs2, static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm) { return require_vm(vm, vd) && + require_vm(vm, vs) && require_align(vd, s->lmul) && require_align(vs, s->lmul); } -- 2.34.1
[PATCH 00/12] target/riscv: Fix some RISC-V instruction corner cases
This series fixes some RISC-V instruction corner cases, specifically illegal overlaps between mask and source registers, illegal overlaps between source registers and illegal overlaps between source and destination registers. These were found by looking at miscompares between QEMU and the Tenstorrent fork of Whisper which models this behaviour better than Spike and Sail. Anton Blanchard (12): target/riscv: Source vector registers cannot overlap mask register target/riscv: handle vrgather mask and source overlap target/riscv: handle vadd.vx form mask and source overlap target/riscv: handle vadd.vv form mask and source overlap target/riscv: handle vslide1down.vx form mask and source overlap target/riscv: handle vzext.vf2 form mask and source overlap target/riscv: handle vwadd.vx form mask and source overlap target/riscv: handle vwadd.vv form mask and source overlap target/riscv: handle vwadd.wv form mask and source overlap target/riscv: handle vwadd.wv form vs1 and vs2 overlap target/riscv: Add CHECK arg to GEN_OPFVF_WIDEN_TRANS target/riscv: handle overlap in widening instructions with overwrite target/riscv/insn_trans/trans_rvv.c.inc | 139 ++-- 1 file changed, 108 insertions(+), 31 deletions(-) -- 2.34.1
[PATCH 09/12] target/riscv: handle vwadd.wv form mask and source overlap
Signed-off-by: Anton Blanchard --- target/riscv/insn_trans/trans_rvv.c.inc | 1 + 1 file changed, 1 insertion(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 45b2868c54..2309d9abd0 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -548,6 +548,7 @@ static bool vext_check_dss(DisasContext *s, int vd, int vs1, int vs2, int vm) static bool vext_check_dds(DisasContext *s, int vd, int vs1, int vs2, int vm) { return vext_check_ds(s, vd, vs1, vm) && + require_vm(vm, vs2) && require_align(vs2, s->lmul + 1); } -- 2.34.1
[PATCH 11/12] target/riscv: Add CHECK arg to GEN_OPFVF_WIDEN_TRANS
Signed-off-by: Anton Blanchard --- target/riscv/insn_trans/trans_rvv.c.inc | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 312d8b1b81..2741f8bd8e 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -2410,10 +2410,10 @@ static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a) } /* OPFVF with WIDEN */ -#define GEN_OPFVF_WIDEN_TRANS(NAME) \ +#define GEN_OPFVF_WIDEN_TRANS(NAME, CHECK) \ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ {\ -if (opfvf_widen_check(s, a)) { \ +if (CHECK(s, a)) { \ uint32_t data = 0; \ static gen_helper_opfvf *const fns[2] = {\ gen_helper_##NAME##_h, gen_helper_##NAME##_w,\ @@ -2429,8 +2429,8 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ return false;\ } -GEN_OPFVF_WIDEN_TRANS(vfwadd_vf) -GEN_OPFVF_WIDEN_TRANS(vfwsub_vf) +GEN_OPFVF_WIDEN_TRANS(vfwadd_vf, opfvf_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwsub_vf, opfvf_widen_check) static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a) { @@ -2512,7 +2512,7 @@ GEN_OPFVF_TRANS(vfrdiv_vf, opfvf_check) /* Vector Widening Floating-Point Multiply */ GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwmul_vf) +GEN_OPFVF_WIDEN_TRANS(vfwmul_vf, opfvf_widen_check) /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */ GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check) @@ -2537,10 +2537,10 @@ GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_widen_check) GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_widen_check) GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_widen_check) GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_widen_check) -GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf) -GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf) -GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf) -GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf) +GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf, opfvf_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf, opfvf_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf, opfvf_widen_check) +GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf, opfvf_widen_check) /* Vector Floating-Point Square-Root Instruction */ -- 2.34.1
[PATCH 12/12] target/riscv: handle overlap in widening instructions with overwrite
In these instructions vd is considered a source, so no overlap is allowed between vd and vs1/vs2. See: https://github.com/riscv/riscv-isa-manual/issues/1789 Signed-off-by: Anton Blanchard --- target/riscv/insn_trans/trans_rvv.c.inc | 71 +++-- 1 file changed, 56 insertions(+), 15 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 2741f8bd8e..715008db79 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -1505,6 +1505,16 @@ static bool opivv_widen_check(DisasContext *s, arg_rmrr *a) vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm); } +/* OPIVV with overwrite and WIDEN */ +static bool opivv_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +{ +return require_rvv(s) && + vext_check_isa_ill(s) && + vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) && + !is_overlapped(a->rd, 1 << MAX(s->lmul+1, 0), a->rs1, 1 << MAX(s->lmul, 0)) && + !is_overlapped(a->rd, 1 << MAX(s->lmul+1, 0), a->rs2, 1 << MAX(s->lmul, 0)); +} + static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, gen_helper_gvec_4_ptr *fn, bool (*checkfn)(DisasContext *, arg_rmrr *)) @@ -1552,6 +1562,15 @@ static bool opivx_widen_check(DisasContext *s, arg_rmrr *a) vext_check_ds(s, a->rd, a->rs2, a->vm); } +/* OPIVX with overwrite and WIDEN */ +static bool opivx_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +{ +return require_rvv(s) && + vext_check_isa_ill(s) && + vext_check_ds(s, a->rd, a->rs2, a->vm) && + !is_overlapped(a->rd, 1 << MAX(s->lmul+1, 0), a->rs2, 1 << MAX(s->lmul, 0)); +} + #define GEN_OPIVX_WIDEN_TRANS(NAME, CHECK) \ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)\ { \ @@ -2023,13 +2042,13 @@ GEN_OPIVX_TRANS(vmadd_vx, opivx_check) GEN_OPIVX_TRANS(vnmsub_vx, opivx_check) /* Vector Widening Integer Multiply-Add Instructions */ -GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check) -GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check) -GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_overwrite_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_overwrite_widen_check) +GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_overwrite_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_overwrite_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_overwrite_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_overwrite_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_overwrite_widen_check) /* Vector Integer Merge and Move Instructions */ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) @@ -2370,6 +2389,18 @@ static bool opfvv_widen_check(DisasContext *s, arg_rmrr *a) vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm); } +/* Vector Widening Floating-Point Add/Subtract Instructions with overwrite */ +static bool opfvv_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +{ +return require_rvv(s) && + require_rvf(s) && + require_scale_rvf(s) && + vext_check_isa_ill(s) && + vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) && + !is_overlapped(a->rd, 1 << MAX(s->lmul+1, 0), a->rs1, 1 << MAX(s->lmul, 0)) && + !is_overlapped(a->rd, 1 << MAX(s->lmul+1, 0), a->rs2, 1 << MAX(s->lmul, 0)); +} + /* OPFVV with WIDEN */ #define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK) \ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ @@ -2409,6 +2440,16 @@ static bool opfvf_widen_check(DisasContext *s, arg_rmrr *a) vext_check_ds(s, a->rd, a->rs2, a->vm); } +static bool opfvf_overwrite_widen_check(DisasContext *s, arg_rmrr *a) +{ +return require_rvv(s) && + require_rvf(s) && + require_scale_rvf(s) && + vext_check_isa_ill(s) && + vext_check_ds(s, a->rd, a->rs2, a->vm) && + !is_overlapped(a->rd, 1 << MAX(s->lmul+1, 0), a->rs2, 1 << MAX(s->lmul, 0)); +} + /* OPFVF with WIDEN */ #define GEN_OPFVF_WIDEN_TRANS(NAME, CHECK) \ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ @@ -2533,14 +2574,14 @
[PATCH 07/12] target/riscv: handle vwadd.vx form mask and source overlap
Signed-off-by: Anton Blanchard --- target/riscv/insn_trans/trans_rvv.c.inc | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 0952bcbe2c..bc22b42801 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -458,13 +458,14 @@ static bool vext_check_mss(DisasContext *s, int vd, int vs1, int vs2) * instruction cannot overlap the source mask register (v0). * (Section 5.3) */ -static bool vext_wide_check_common(DisasContext *s, int vd, int vm) +static bool vext_wide_check_common(DisasContext *s, int vd, int vs, int vm) { return (s->lmul <= 2) && (s->sew < MO_64) && ((s->sew + 1) <= (s->cfg_ptr->elen >> 4)) && require_align(vd, s->lmul + 1) && - require_vm(vm, vd); + require_vm(vm, vd) && + require_vm(vm, vs); } /* @@ -498,14 +499,14 @@ static bool vext_narrow_check_common(DisasContext *s, int vd, int vs2, static bool vext_check_ds(DisasContext *s, int vd, int vs, int vm) { -return vext_wide_check_common(s, vd, vm) && +return vext_wide_check_common(s, vd, vs, vm) && require_align(vs, s->lmul) && require_noover(vd, s->lmul + 1, vs, s->lmul); } static bool vext_check_dd(DisasContext *s, int vd, int vs, int vm) { -return vext_wide_check_common(s, vd, vm) && +return vext_wide_check_common(s, vd, vs, vm) && require_align(vs, s->lmul + 1); } -- 2.34.1
[PATCH 02/12] target/riscv: handle vrgather mask and source overlap
Signed-off-by: Anton Blanchard --- target/riscv/insn_trans/trans_rvv.c.inc | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 20b1cb127b..c66cd95bdb 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -3453,7 +3453,9 @@ static bool vrgather_vv_check(DisasContext *s, arg_rmrr *a) require_align(a->rs1, s->lmul) && require_align(a->rs2, s->lmul) && (a->rd != a->rs2 && a->rd != a->rs1) && - require_vm(a->vm, a->rd); + require_vm(a->vm, a->rd) && + require_vm(a->vm, a->rs1) && + require_vm(a->vm, a->rs2); } static bool vrgatherei16_vv_check(DisasContext *s, arg_rmrr *a) @@ -3470,7 +3472,9 @@ static bool vrgatherei16_vv_check(DisasContext *s, arg_rmrr *a) a->rs1, 1 << MAX(emul, 0)) && !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs2, 1 << MAX(s->lmul, 0)) && - require_vm(a->vm, a->rd); + require_vm(a->vm, a->rd) && + require_vm(a->vm, a->rs1) && + require_vm(a->vm, a->rs2); } GEN_OPIVV_TRANS(vrgather_vv, vrgather_vv_check) @@ -3483,7 +3487,8 @@ static bool vrgather_vx_check(DisasContext *s, arg_rmrr *a) require_align(a->rd, s->lmul) && require_align(a->rs2, s->lmul) && (a->rd != a->rs2) && - require_vm(a->vm, a->rd); + require_vm(a->vm, a->rd) && + require_vm(a->vm, a->rs2); } /* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */ -- 2.34.1
[PATCH 05/12] target/riscv: handle vslide1down.vx form mask and source overlap
Signed-off-by: Anton Blanchard --- target/riscv/insn_trans/trans_rvv.c.inc | 1 + 1 file changed, 1 insertion(+) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index f5ba1c4280..a873536eea 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -609,6 +609,7 @@ static bool vext_check_slide(DisasContext *s, int vd, int vs2, { bool ret = require_align(vs2, s->lmul) && require_align(vd, s->lmul) && + require_vm(vm, vs2) && require_vm(vm, vd); if (is_over) { ret &= (vd != vs2); -- 2.34.1