[Qemu-devel] [PATCH] pseries: Add H_SET_MODE hcall to change guest exception endianness

2013-08-19 Thread Anton Blanchard

Hi Anthony,

> > +if (resource == 4) {
> 
> This ought to be a #define.  There's no else here, is that expected?
> Should you return failure for a different resource?

Good point, I made it a define. We were returning H_P2 for a different
resource, but it was a bit of a twisted maze of return statements. I
tried to clear it up in this version.

> Without knowing this interface better, a few things come to mind.
> 
> Is mflags a boolean?  If so, you can reduce this to a single loop and
> drop the switch() statement.  If mflags is truly a set of flags, it
> would be nice to use #define to give the flags a proper symbolic name.

Unfortunately it isn't a boolean, but yes it should have be made
clearer with a #define.

Anton
--

pseries: Add H_SET_MODE hcall to change guest exception endianness

H_SET_MODE is used for controlling various partition settings. One
of these settings is the endianness a guest takes its exceptions in.

Signed-off-by: Anton Blanchard 
---

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 16bfab9..de639f6 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -262,7 +262,7 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
 uint32_t start_prop = cpu_to_be32(initrd_base);
 uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
 char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
-"\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
+"\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk\0hcall-set-mode";
 char qemu_hypertas_prop[] = "hcall-memop1";
 uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
 uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 67d6cd9..89e6a00 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -657,6 +657,54 @@ static target_ulong h_logical_dcbf(PowerPCCPU *cpu, 
sPAPREnvironment *spapr,
 return H_SUCCESS;
 }
 
+static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+   target_ulong opcode, target_ulong *args)
+{
+CPUState *cs;
+target_ulong mflags = args[0];
+target_ulong resource = args[1];
+target_ulong value1 = args[2];
+target_ulong value2 = args[3];
+target_ulong ret = H_P2;
+
+if (resource == H_SET_MODE_ENDIAN) {
+if (value1) {
+ret = H_P3;
+goto out;
+}
+if (value2) {
+ret = H_P4;
+goto out;
+}
+
+switch (mflags) {
+case H_SET_MODE_ENDIAN_BIG:
+for (cs = first_cpu; cs != NULL; cs = cs->next_cpu) {
+PowerPCCPU *cp = POWERPC_CPU(cs);
+CPUPPCState *env = &cp->env;
+env->spr[SPR_LPCR] &= ~LPCR_ILE;
+}
+ret = H_SUCCESS;
+break;
+
+case H_SET_MODE_ENDIAN_LITTLE:
+for (cs = first_cpu; cs != NULL; cs = cs->next_cpu) {
+PowerPCCPU *cp = POWERPC_CPU(cs);
+CPUPPCState *env = &cp->env;
+env->spr[SPR_LPCR] |= LPCR_ILE;
+}
+ret = H_SUCCESS;
+break;
+
+default:
+ret = H_UNSUPPORTED_FLAG;
+}
+}
+
+out:
+return ret;
+}
+
 static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
 static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX - 
KVMPPC_HCALL_BASE + 1];
 
@@ -734,6 +782,8 @@ static void hypercall_register_types(void)
 
 /* qemu/KVM-PPC specific hcalls */
 spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas);
+
+spapr_register_hypercall(H_SET_MODE, h_set_mode);
 }
 
 type_init(hypercall_register_types)
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 9fc1972..ab42813 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -109,6 +109,15 @@ typedef struct sPAPREnvironment {
 #define H_NOT_ENOUGH_RESOURCES -44
 #define H_R_STATE -45
 #define H_RESCINDEND  -46
+#define H_P2  -55
+#define H_P3  -56
+#define H_P4  -57
+#define H_P5  -58
+#define H_P6  -59
+#define H_P7  -60
+#define H_P8  -61
+#define H_P9  -62
+#define H_UNSUPPORTED_FLAG -256
 #define H_MULTI_THREADS_ACTIVE -9005
 
 
@@ -143,6 +152,11 @@ typedef struct sPAPREnvironment {
 #define H_PP1 (1ULL<<(63-62))
 #define H_PP2 (1ULL<<(63-63))
 
+/* H_SET_MODE flags */
+#define H_SET_MODE_ENDIAN  4
+#define  H_SET_MODE_ENDIAN_BIG 0
+#define  H_SET_MODE_ENDIAN_LITTLE  1
+
 /* VASI States */
 #define H_VASI_INVALID0
 #define H_VASI_ENABLED1
@@ -267,7 +281,8 @@ typedef struct sPAPREnvironment {
 #define H_GET_EM_PARMS  0x2B8
 #define H_SET_MPP   0x2D0
 #define H_GET_MPP  

[Qemu-devel] [PATCH] spapr-vlan: Don't touch last entry in buffer list

2014-08-21 Thread Anton Blanchard
The last 8 bytes of the buffer list is defined to contain the number
of dropped frames. At the moment we use it to store rx entries,
which trips up ethtool -S:

rx_no_buffer: 9223380832981355136

Fix this by skipping the last buffer list entry.

Signed-off-by: Anton Blanchard 
---

diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c
index 2d47df6..23c47d3 100644
--- a/hw/net/spapr_llan.c
+++ b/hw/net/spapr_llan.c
@@ -72,7 +72,14 @@ typedef uint64_t vlan_bd_t;
 #define VLAN_RXQ_BD_OFF  0
 #define VLAN_FILTER_BD_OFF   8
 #define VLAN_RX_BDS_OFF  16
-#define VLAN_MAX_BUFS((SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF) / 8)
+/*
+ * The final 8 bytes of the buffer list is a counter of frames dropped
+ * because there was not a buffer in the buffer list capable of holding
+ * the frame. We must avoid it, or the operating system will report garbage
+ * for this statistic.
+ */
+#define VLAN_RX_BDS_LEN  (SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF - 8)
+#define VLAN_MAX_BUFS(VLAN_RX_BDS_LEN / 8)
 
 #define TYPE_VIO_SPAPR_VLAN_DEVICE "spapr-vlan"
 #define VIO_SPAPR_VLAN_DEVICE(obj) \
@@ -119,7 +126,7 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const 
uint8_t *buf,
 
 do {
 buf_ptr += 8;
-if (buf_ptr >= SPAPR_TCE_PAGE_SIZE) {
+if (buf_ptr >= (VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF)) {
 buf_ptr = VLAN_RX_BDS_OFF;
 }
 
@@ -397,7 +404,7 @@ static target_ulong h_add_logical_lan_buffer(PowerPCCPU 
*cpu,
 
 do {
 dev->add_buf_ptr += 8;
-if (dev->add_buf_ptr >= SPAPR_TCE_PAGE_SIZE) {
+if (dev->add_buf_ptr >= (VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF)) {
 dev->add_buf_ptr = VLAN_RX_BDS_OFF;
 }
 



[Qemu-devel] [Bug 965327] Re: virtio-pci: can't reserve io 0x0000-0x001f

2014-08-22 Thread Anton Blanchard
Scubbing our ppc64 bugs. Thanks for the update Ken, I'll close this.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/965327

Title:
  virtio-pci: can't reserve io 0x-0x001f

Status in QEMU:
  New

Bug description:
  Before 2012-03-05 I was able to successfully enable a virtio-pci block
  device from a sPAPR pseries ppc64 Linux guest. With the current git
  master branch after this date I get the following error:

  virtio-pci :00:00.0: device not available (can't reserve [io  
0x-0x001f])
  virtio-pci: probe of :00:00.0 failed with error -22
  virtio-pci :00:01.0: device not available (can't reserve [io  
0x-0x003f])
  virtio-pci: probe of :00:01.0 failed with error -22

  
  Full details:

  -
  command line:
  -
   ./testing/qemu/ppc64-softmmu/qemu-system-ppc64 \
-L ./testing/qemu/pc-bios \
-M pseries \
-m 1024 \
-rtc base=localtime \
-parallel none \
-netdev 
type=user,id=mynet0,hostfwd=tcp:127.0.0.1:9011-10.0.2.11:22 \
-device virtio-net-pci,netdev=mynet0 \
-drive 
file=images/suse-ppc.img,if=virtio,index=0,media=disk,cache=unsafe \
-kernel images/iso/suseboot/vmlinux \
-append "root=/dev/mapper/system-root ro audit=0 
selinux=0 apparmor=0 console=tty0 console=ttyPZ0" \
-initrd images/iso/suseboot/initrd.img \
-gdb tcp::1234

  
  --
  BEFORE virtio-pci "bug/user error?" introduced:
  --
  sPAPR memory map:
  RTAS : 0x3fff..3fff0013
  FDT  : 0x3ffe..3ffe
  Kernel   : 0x0040..01abad7b
  Ramdisk  : 0x01ad..02053df7
  Firmware load: 0x..000d6ec0
  Firmware runtime : 0x3d7e..3ffe
  sPAPR reset

  SLOF **
  QEMU Starting
  Build Date = Mar  3 2012 21:46:40
   FW Version = git-440e662879c4fc3c
   Press "s" to enter Open Firmware.

  Populating /vdevice methods
  Populating /vdevice/v-scsi@2000
  VSCSI: Initializing
  VSCSI: Looking for disks
SCSI ID 2 CD-ROM   : "QEMU QEMU CD-ROM  1.0."
  Populating /vdevice/vty@3000
  Populating /pci@0,0
   Adapters on 
   00  (D) : 1af4 1000virtio [ net ]
   00 0800 (D) : 1af4 1001virtio [ block ]
  No NVRAM common partition, re-initializing...
  Using default console: /vdevice/vty@3000
  Detected RAM kernel at 40 (16bad7c bytes)

Welcome to Open Firmware

Copyright (c) 2004, 2011 IBM Corporation All rights reserved.
This program and the accompanying materials are made available
under the terms of the BSD License available at
http://www.opensource.org/licenses/bsd-license.php

  Booting from memory...
  OF stdout device is: /vdevice/vty@3000
  Preparing to boot Linux version 3.2.0-2-ppc64 (geeko@buildhost) (gcc version 
4.6.2 20111212 [gcc-4_6-branch revision 18] (SUSE Linux) ) #1 SMP Wed Jan 
25 10:51:08 UTC 2012 (2206a5c)
  Detected machine type: 0101
  Max number of cores passed to firmware: 1024 (NR_CPUS = 1024)
  Calling ibm,client-architecture-support... not implemented
  couldn't open /packages/elf-loader
  command line: root=/dev/mapper/system-root ro audit=0 selinux=0 apparmor=0 
console=tty0 console=ttyPZ0
  memory layout at init:
memory_limit :  (16 MB aligned)
alloc_bottom : 01ad
alloc_top: 3000
alloc_top_hi : 4000
rmo_top  : 3000
ram_top  : 4000
  instantiating rtas at 0x2fff... done
  Querying for OPAL presence... not there.
  boot cpu hw idx 0
  copying OF device tree...
  Building dt strings...
  Building dt structure...
  Device tree strings 0x020e -> 0x020e0635
  Device tree struct  0x020f -> 0x0210
  Calling quiesce...
  returning from prom_init
  Using pSeries machine description
  Using 1TB segments
  Found initrd at 0xc1ad:0xc2053df8
  bootconsole [udbg0] enabled
  CPU maps initialized for 1 thread per core
  Starting Linux PPC64 #1 SMP Wed Jan 25 10:51:08 UTC 2012 (2206a5c)
  -
  ppc64_pft_size= 0x18
  physicalMemorySize= 0x4000
  htab_hash_mask= 0x1
  -
  Initializing cgroup subsys cpuset
  Initializing cgroup subsys cpu
  Linux version 3.2.0-2-ppc64 (geeko@buildhost) (gc

[Qemu-devel] [PATCH 0/5] 64bit PowerPC little endian support

2013-08-06 Thread Anton Blanchard
This patchset adds support for 64bit PowerPC little endian on POWER7.

Linux kernel patches to support this were sent out earlier today:

https://lists.ozlabs.org/pipermail/linuxppc-dev/2013-August/109849.html

Anton
--

Anton Blanchard (4):
  target-ppc: POWER7 supports the MSR_LE bit
  target-ppc: USE LPCR_ILE to control exception endian on POWER7
  pseries: Add H_SET_MODE hcall to change guest exception endianness
  disas/ppc.c: Fix little endian disassembly

Benjamin Herrenschmidt (1):
  pseries: Fix loading of little endian kernels

 disas/ppc.c |  3 ++-
 hw/ppc/spapr.c  | 15 +--
 hw/ppc/spapr_hcall.c| 44 
 include/hw/ppc/spapr.h  | 12 +++-
 target-ppc/cpu.h|  2 ++
 target-ppc/excp_helper.c| 10 ++
 target-ppc/translate_init.c |  2 +-
 7 files changed, 83 insertions(+), 5 deletions(-)

-- 
1.8.1.2




[Qemu-devel] [PATCH 5/5] pseries: Fix loading of little endian kernels

2013-08-06 Thread Anton Blanchard
From: Benjamin Herrenschmidt 

Try loading the kernel as little endian if it fails big endian.

Signed-off-by: Benjamin Herrenschmidt 
Signed-off-by: Anton Blanchard 
---
 hw/ppc/spapr.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index de639f6..639b719 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -253,6 +253,7 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
hwaddr initrd_base,
hwaddr initrd_size,
hwaddr kernel_size,
+   bool little_endian,
const char *boot_device,
const char *kernel_cmdline,
uint32_t epow_irq)
@@ -306,6 +307,9 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
   cpu_to_be64(kernel_size) };
 
 _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop;
+if (little_endian) {
+_FDT((fdt_property(fdt, "qemu,boot-kernel-le", NULL, 0)));
+}
 }
 if (boot_device) {
 _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
@@ -1082,6 +1086,7 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
 uint32_t initrd_base = 0;
 long kernel_size = 0, initrd_size = 0;
 long load_limit, rtas_limit, fw_size;
+bool kernel_le = false;
 char *filename;
 
 msi_supported = true;
@@ -1261,6 +1266,12 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
 kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0);
 if (kernel_size < 0) {
+kernel_size = load_elf(kernel_filename,
+   translate_kernel_address, NULL,
+   NULL, &lowaddr, NULL, 0, ELF_MACHINE, 0);
+kernel_le = kernel_size > 0;
+}
+if (kernel_size < 0) {
 kernel_size = load_image_targphys(kernel_filename,
   KERNEL_LOAD_ADDR,
   load_limit - KERNEL_LOAD_ADDR);
@@ -1310,7 +1321,7 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
 /* Prepare the device tree */
 spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
 initrd_base, initrd_size,
-kernel_size,
+kernel_size, kernel_le,
 boot_device, kernel_cmdline,
 spapr->epow_irq);
 assert(spapr->fdt_skel != NULL);
-- 
1.8.1.2




[Qemu-devel] [PATCH 1/5] target-ppc: POWER7 supports the MSR_LE bit

2013-08-06 Thread Anton Blanchard
Add MSR_LE to the msr_mask for POWER7.

Signed-off-by: Anton Blanchard 
---
 target-ppc/translate_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index b14aec8..33914bc 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7122,7 +7122,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
PPC_SEGMENT_64B | PPC_SLBI |
PPC_POPCNTB | PPC_POPCNTWD;
 pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205;
-pcc->msr_mask = 0x8204FF36ULL;
+pcc->msr_mask = 0x8204FF37ULL;
 pcc->mmu_model = POWERPC_MMU_2_06;
 #if defined(CONFIG_SOFTMMU)
 pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
-- 
1.8.1.2




[Qemu-devel] [PATCH 3/5] pseries: Add H_SET_MODE hcall to change guest exception endianness

2013-08-06 Thread Anton Blanchard
H_SET_MODE is used for controlling various partition settings. One
of these settings is the endianness a guest takes its exceptions in.

Signed-off-by: Anton Blanchard 
---
 hw/ppc/spapr.c |  2 +-
 hw/ppc/spapr_hcall.c   | 44 
 include/hw/ppc/spapr.h | 12 +++-
 3 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 16bfab9..de639f6 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -262,7 +262,7 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
 uint32_t start_prop = cpu_to_be32(initrd_base);
 uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
 char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
-"\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
+"\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk\0hcall-set-mode";
 char qemu_hypertas_prop[] = "hcall-memop1";
 uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
 uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 67d6cd9..79e1b61 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -657,6 +657,48 @@ static target_ulong h_logical_dcbf(PowerPCCPU *cpu, 
sPAPREnvironment *spapr,
 return H_SUCCESS;
 }
 
+static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr,
+   target_ulong opcode, target_ulong *args)
+{
+CPUState *cs;
+target_ulong mflags = args[0];
+target_ulong resource = args[1];
+target_ulong value1 = args[2];
+target_ulong value2 = args[3];
+
+if (resource == 4) {
+if (value1) {
+return H_P3;
+}
+if (value2) {
+return H_P4;
+}
+
+switch (mflags) {
+case 0:
+for (cs = first_cpu; cs != NULL; cs = cs->next_cpu) {
+PowerPCCPU *cp = POWERPC_CPU(cs);
+CPUPPCState *env = &cp->env;
+env->spr[SPR_LPCR] &= ~LPCR_ILE;
+}
+return H_SUCCESS;
+
+case 1:
+for (cs = first_cpu; cs != NULL; cs = cs->next_cpu) {
+PowerPCCPU *cp = POWERPC_CPU(cs);
+CPUPPCState *env = &cp->env;
+env->spr[SPR_LPCR] |= LPCR_ILE;
+}
+return H_SUCCESS;
+
+default:
+return H_UNSUPPORTED_FLAG;
+}
+}
+
+return H_P2;
+}
+
 static spapr_hcall_fn papr_hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
 static spapr_hcall_fn kvmppc_hypercall_table[KVMPPC_HCALL_MAX - 
KVMPPC_HCALL_BASE + 1];
 
@@ -734,6 +776,8 @@ static void hypercall_register_types(void)
 
 /* qemu/KVM-PPC specific hcalls */
 spapr_register_hypercall(KVMPPC_H_RTAS, h_rtas);
+
+spapr_register_hypercall(H_SET_MODE, h_set_mode);
 }
 
 type_init(hypercall_register_types)
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 9fc1972..3ceec7a 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -109,7 +109,16 @@ typedef struct sPAPREnvironment {
 #define H_NOT_ENOUGH_RESOURCES -44
 #define H_R_STATE -45
 #define H_RESCINDEND  -46
+#define H_P2  -55
+#define H_P3  -56
+#define H_P4  -57
+#define H_P5  -58
+#define H_P6  -59
+#define H_P7  -60
+#define H_P8  -61
+#define H_P9  -62
 #define H_MULTI_THREADS_ACTIVE -9005
+#define H_UNSUPPORTED_FLAG -256
 
 
 /* Long Busy is a condition that can be returned by the firmware
@@ -267,7 +276,8 @@ typedef struct sPAPREnvironment {
 #define H_GET_EM_PARMS  0x2B8
 #define H_SET_MPP   0x2D0
 #define H_GET_MPP   0x2D4
-#define MAX_HCALL_OPCODEH_GET_MPP
+#define H_SET_MODE  0x31C
+#define MAX_HCALL_OPCODEH_SET_MODE
 
 /* The hcalls above are standardized in PAPR and implemented by pHyp
  * as well.
-- 
1.8.1.2




[Qemu-devel] [PATCH 2/5] target-ppc: USE LPCR_ILE to control exception endian on POWER7

2013-08-06 Thread Anton Blanchard
On POWER7, LPCR_ILE is used to control what endian guests take
their exceptions in so use it instead of MSR_ILE.

Signed-off-by: Anton Blanchard 
---
 target-ppc/cpu.h |  2 ++
 target-ppc/excp_helper.c | 10 ++
 2 files changed, 12 insertions(+)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 711db08..422a6bb 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -453,6 +453,8 @@ struct ppc_slb_t {
 #define MSR_RI   1  /* Recoverable interrupt1*/
 #define MSR_LE   0  /* Little-endian mode   1 hflags */
 
+#define LPCR_ILE (1 << (63-38))
+
 #define msr_sf   ((env->msr >> MSR_SF)   & 1)
 #define msr_isf  ((env->msr >> MSR_ISF)  & 1)
 #define msr_shv  ((env->msr >> MSR_SHV)  & 1)
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index e9fcad8..e957761 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -611,9 +611,19 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
excp_model, int excp)
 tlb_flush(env, 1);
 }
 
+#ifdef TARGET_PPC64
+if (excp_model == POWERPC_EXCP_POWER7) {
+if (env->spr[SPR_LPCR] & LPCR_ILE) {
+new_msr |= (target_ulong)1 << MSR_LE;
+}
+} else if (msr_ile) {
+new_msr |= (target_ulong)1 << MSR_LE;
+}
+#else
 if (msr_ile) {
 new_msr |= (target_ulong)1 << MSR_LE;
 }
+#endif
 
 /* Jump to handler */
 vector = env->excp_vectors[excp];
-- 
1.8.1.2




[Qemu-devel] [PATCH 4/5] disas/ppc.c: Fix little endian disassembly

2013-08-06 Thread Anton Blanchard
Use info->endian to select the endian of the instruction to
be disassembled.

Signed-off-by: Anton Blanchard 
---
 disas/ppc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/disas/ppc.c b/disas/ppc.c
index c149506..99c4cbc 100644
--- a/disas/ppc.c
+++ b/disas/ppc.c
@@ -5157,7 +5157,8 @@ int
 print_insn_ppc (bfd_vma memaddr, struct disassemble_info *info)
 {
   int dialect = (char *) info->private_data - (char *) 0;
-  return print_insn_powerpc (memaddr, info, 1, dialect);
+  return print_insn_powerpc (memaddr, info, info->endian == BFD_ENDIAN_BIG,
+ dialect);
 }
 
 /* Print a big endian PowerPC instruction.  */
-- 
1.8.1.2




Re: [Qemu-devel] [PATCH 1/7] virtio: allow byte swapping for vring and config access

2013-08-08 Thread Anton Blanchard

Hi,

> > The distinction is important in QEMU.  ppc64 is still
> > TARGET_WORDS_BIGENDIAN.  We still want most stl_phys to treat
> > integers as big endian.  There's just this extra concept that CPU
> > loads/stores are sometimes byte swapped.  That affects virtio but
> > not a lot else.
> 
> You've redefined endian here; please don't do that.  Endian is the
> order in memory which a CPU does loads and stores.  From any
> reasonable definition, PPC is bi-endian.
> 
> It's actually a weird thing for the qemu core to know at all: almost
> everything which cares is in target-specific code.  The exceptions are
> gdb stubs and virtio, both of which are "native endian" (and that
> weird code in exec.c: what is notdirty_mem_write?).
> 
> Your argument that we shouldn't fix stl_* might be justifiable (ie.
> just hack virtio and gdb as one-offs), but it's neither clear nor
> "least surprise".

Here is the hack I have to get gdbstub going with a little endian
PowerPC kernel. Basically:

LE guest -> BE QEMU -> BE gdb (pointing at the LE vmlinux)

In this setup, gdb expects registers to be sent in little endian mode.

It's a pretty big mistake for the gdb remote protocol to be using
native endian to transfer registers especially when there is no other
protocol negotation to work out what endian that is.

Anton
--

Index: b/gdbstub.c
===
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -317,6 +317,8 @@ static GDBState *gdbserver_state;
 
 bool gdb_has_xml;
 
+bool gdbstub_cross_endian;
+
 #ifdef CONFIG_USER_ONLY
 /* XXX: This is not thread safe.  Do we care?  */
 static int gdbserver_fd = -1;
Index: b/include/exec/gdbstub.h
===
--- a/include/exec/gdbstub.h
+++ b/include/exec/gdbstub.h
@@ -42,8 +42,13 @@ static inline int cpu_index(CPUState *cp
 /* The GDB remote protocol transfers values in target byte order.
This means
  * we can use the raw memory access routines to access the value
buffer.
  * Conveniently, these also handle the case where the buffer is
mis-aligned.
+ *
+ * We do need to byte swap if the CPU isn't running in the QEMU
compiled
+ * target endian mode.
  */
 
+extern bool gdbstub_cross_endian;
+
 static inline int gdb_get_reg8(uint8_t *mem_buf, uint8_t val)
 {
 stb_p(mem_buf, val);
@@ -52,28 +57,49 @@ static inline int gdb_get_reg8(uint8_t *
 
 static inline int gdb_get_reg16(uint8_t *mem_buf, uint16_t val)
 {
-stw_p(mem_buf, val);
+if (gdbstub_cross_endian)
+stw_p(mem_buf, bswap16(val));
+else
+stw_p(mem_buf, val);
 return 2;
 }
 
 static inline int gdb_get_reg32(uint8_t *mem_buf, uint32_t val)
 {
-stl_p(mem_buf, val);
+if (gdbstub_cross_endian)
+stq_p(mem_buf, bswap32(val));
+else
+stl_p(mem_buf, val);
 return 4;
 }
 
 static inline int gdb_get_reg64(uint8_t *mem_buf, uint64_t val)
 {
-stq_p(mem_buf, val);
+if (gdbstub_cross_endian)
+stq_p(mem_buf, bswap64(val));
+else
+stq_p(mem_buf, val);
 return 8;
 }
 
 #if TARGET_LONG_BITS == 64
 #define gdb_get_regl(buf, val) gdb_get_reg64(buf, val)
-#define ldtul_p(addr) ldq_p(addr)
+static inline uint64_t ldtul_p(const void *ptr)
+{
+   uint64_t tmp = ldq_p(ptr);
+   if (gdbstub_cross_endian)
+   tmp = bswap64(tmp);
+   return tmp;
+}
 #else
 #define gdb_get_regl(buf, val) gdb_get_reg32(buf, val)
-#define ldtul_p(addr) ldl_p(addr)
+static inline uint32_t ldtul_p(const void *ptr)
+{
+   uint32_t tmp = ldl_p(ptr);
+   if (gdbstub_cross_endian)
+   tmp = bswap32(tmp);
+   return tmp;
+}
 #endif
 
 #endif



[Qemu-devel] [PATCH] pseries: Fix stalls on hypervisor virtual console

2013-08-12 Thread Anton Blanchard

A number of users are reporting stalls when using the pseries
hypervisor virtual console.

A simple test case is to paste 15 or 17 characters at a time
into the console. Pasting 15 characters at a time works fine
but pasting 17 characters hangs for a random amount of time.
Other activity (network, qemu monitor etc) unblocks it.

If qemu-char tries to send more than 16 characters at once, 
vty_can_receive returns false. At this point we have to
wait for the guest to consume that output. Everything is good
so far.

The problem occurs when the the guest does consume the output.
We need to signal back to the qemu-char layer that we are
ready for more input. Without this we block until something
else kicks us (eg network activity).

Cc: qemu-sta...@nongnu.org
Signed-off-by: Anton Blanchard 
---

Index: b/hw/char/spapr_vty.c
===
--- a/hw/char/spapr_vty.c
+++ b/hw/char/spapr_vty.c
@@ -47,6 +47,8 @@ static int vty_getchars(VIOsPAPRDevice *
 buf[n++] = dev->buf[dev->out++ % VTERM_BUFSIZE];
 }
 
+qemu_chr_accept_input(dev->chardev);
+
 return n;
 }
 



[Qemu-devel] [PATCH] hypervisor property clashes with hypervisor node

2014-08-29 Thread Anton Blanchard
dtc fails on a recent QEMU snapshot:

ERROR (name_properties): "name" property in /hypervisor#1 is incorrect 
("hypervisor" instead of base node name)

Looking at the device tree we have a hypervisor property:

# lsprop hypervisor
hypervisor   "kvm"

But we also have a hypervisor node, with a name that doesn't match:

# lsprop hypervisor#1/
name "hypervisor"
compatible   "linux,kvm"
linux,phandle7e5eb5d8 (2120136152)

Commit c08ce91d309c (spapr: add uuid/host details to device tree)
looks to have collided with an earlier patch. Remove the hypervisor
property.

Signed-off-by: Anton Blanchard 
---

Index: b/hw/ppc/spapr.c
===
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -362,10 +362,6 @@ static void *spapr_create_fdt_skel(hwadd
 _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by 
qemu)")));
 _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));
 
-if (kvm_enabled()) {
-_FDT((fdt_property_string(fdt, "hypervisor", "kvm")));
-}
-
 /*
  * Add info to guest to indentify which host is it being run on
  * and what is the uuid of the guest



[Qemu-devel] [PATCH 2/6] target-ppc: POWER8 supports isel

2014-03-24 Thread Anton Blanchard
POWER8 supports isel, so enable it in QEMU.

Signed-off-by: Anton Blanchard 
Signed-off-by: Cédric Le Goater 
---
 target-ppc/translate_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index a82c8f9..4fda0fd 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7157,7 +7157,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
 pcc->pvr_mask = CPU_POWERPC_POWER8_MASK;
 pcc->init_proc = init_proc_POWER8;
 pcc->check_pow = check_pow_nocheck;
-pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
PPC_FLOAT_FRSQRTES |
-- 
1.8.3.2




[Qemu-devel] [PATCH 4/6] target-ppc: MSR_POW not supported on POWER7/7+/8

2014-03-24 Thread Anton Blanchard
Remove MSR_POW from the msr_mask for POWER7/7+/8.

Signed-off-by: Anton Blanchard 
Signed-off-by: Cédric Le Goater 
---
 target-ppc/translate_init.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 87c00a1..d07e186 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7075,7 +7075,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
 PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
 PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
 PPC2_FP_TST_ISA206;
-pcc->msr_mask = 0x8284FF37ULL;
+pcc->msr_mask = 0x8280FF37ULL;
 pcc->mmu_model = POWERPC_MMU_2_06;
 #if defined(CONFIG_SOFTMMU)
 pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
@@ -7118,7 +7118,7 @@ POWERPC_FAMILY(POWER7P)(ObjectClass *oc, void *data)
 PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
 PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
 PPC2_FP_TST_ISA206;
-pcc->msr_mask = 0x8284FF37ULL;
+pcc->msr_mask = 0x8280FF37ULL;
 pcc->mmu_model = POWERPC_MMU_2_06;
 #if defined(CONFIG_SOFTMMU)
 pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
@@ -7175,7 +7175,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
 PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
 PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
 PPC2_ISA205 | PPC2_ISA207S;
-pcc->msr_mask = 0x8284FF37ULL;
+pcc->msr_mask = 0x8280FF37ULL;
 pcc->mmu_model = POWERPC_MMU_2_06;
 #if defined(CONFIG_SOFTMMU)
 pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
-- 
1.8.3.2




[Qemu-devel] [PATCH 1/6] target-ppc: POWER8 supports the MSR_LE bit

2014-03-24 Thread Anton Blanchard
Add MSR_LE to the msr_mask for POWER8.

Signed-off-by: Anton Blanchard 
Signed-off-by: Cédric Le Goater 
---
 target-ppc/translate_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 7f53c33..a82c8f9 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7175,7 +7175,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
 PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
 PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
 PPC2_ISA205 | PPC2_ISA207S;
-pcc->msr_mask = 0x8284FF36ULL;
+pcc->msr_mask = 0x8284FF37ULL;
 pcc->mmu_model = POWERPC_MMU_2_06;
 #if defined(CONFIG_SOFTMMU)
 pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
-- 
1.8.3.2




[Qemu-devel] [PATCH 5/6] target-ppc: Fix Book3S PMU SPRs

2014-03-24 Thread Anton Blanchard
Most of the PMU SPRs were wrong on Book3S.

Signed-off-by: Anton Blanchard 
---
 target-ppc/cpu.h|  29 -
 target-ppc/translate_init.c | 139 +++-
 2 files changed, 153 insertions(+), 15 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 2719c08..7082041 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1452,54 +1452,81 @@ static inline int cpu_mmu_index (CPUPPCState *env)
 #define SPR_MPC_MI_CTR(0x300)
 #define SPR_PERF1 (0x301)
 #define SPR_RCPU_MI_RBA1  (0x301)
+#define SPR_BOOK3S_UMMCR2 (0x301)
 #define SPR_PERF2 (0x302)
 #define SPR_RCPU_MI_RBA2  (0x302)
 #define SPR_MPC_MI_AP (0x302)
-#define SPR_MMCRA (0x302)
+#define SPR_BOOK3S_UMMCRA (0x302)
 #define SPR_PERF3 (0x303)
 #define SPR_RCPU_MI_RBA3  (0x303)
 #define SPR_MPC_MI_EPN(0x303)
+#define SPR_BOOK3S_UPMC1  (0x303)
 #define SPR_PERF4 (0x304)
+#define SPR_BOOK3S_UPMC2  (0x304)
 #define SPR_PERF5 (0x305)
 #define SPR_MPC_MI_TWC(0x305)
+#define SPR_BOOK3S_UPMC3  (0x305)
 #define SPR_PERF6 (0x306)
 #define SPR_MPC_MI_RPN(0x306)
+#define SPR_BOOK3S_UPMC4  (0x306)
 #define SPR_PERF7 (0x307)
+#define SPR_BOOK3S_UPMC5  (0x307)
 #define SPR_PERF8 (0x308)
 #define SPR_RCPU_L2U_RBA0 (0x308)
 #define SPR_MPC_MD_CTR(0x308)
+#define SPR_BOOK3S_UPMC6  (0x308)
 #define SPR_PERF9 (0x309)
 #define SPR_RCPU_L2U_RBA1 (0x309)
 #define SPR_MPC_MD_CASID  (0x309)
+#define SPR_BOOK3S_UPMC7  (0x309)
 #define SPR_PERFA (0x30A)
 #define SPR_RCPU_L2U_RBA2 (0x30A)
 #define SPR_MPC_MD_AP (0x30A)
+#define SPR_BOOK3S_UPMC8  (0x30A)
 #define SPR_PERFB (0x30B)
 #define SPR_RCPU_L2U_RBA3 (0x30B)
 #define SPR_MPC_MD_EPN(0x30B)
+#define SPR_BOOK3S_UMMCR0 (0x30B)
 #define SPR_PERFC (0x30C)
 #define SPR_MPC_MD_TWB(0x30C)
+#define SPR_BOOK3S_USIAR  (0x30C)
 #define SPR_PERFD (0x30D)
 #define SPR_MPC_MD_TWC(0x30D)
+#define SPR_BOOK3S_USDAR  (0x30D)
 #define SPR_PERFE (0x30E)
 #define SPR_MPC_MD_RPN(0x30E)
+#define SPR_BOOK3S_UMMCR1 (0x30E)
 #define SPR_PERFF (0x30F)
 #define SPR_MPC_MD_TW (0x30F)
 #define SPR_UPERF0(0x310)
 #define SPR_UPERF1(0x311)
+#define SPR_BOOK3S_MMCR2  (0x311)
 #define SPR_UPERF2(0x312)
+#define SPR_BOOK3S_MMCRA  (0x312)
 #define SPR_UPERF3(0x313)
+#define SPR_BOOK3S_PMC1   (0x313)
 #define SPR_UPERF4(0x314)
+#define SPR_BOOK3S_PMC2   (0x314)
 #define SPR_UPERF5(0x315)
+#define SPR_BOOK3S_PMC3   (0x315)
 #define SPR_UPERF6(0x316)
+#define SPR_BOOK3S_PMC4   (0x316)
 #define SPR_UPERF7(0x317)
+#define SPR_BOOK3S_PMC5   (0x317)
 #define SPR_UPERF8(0x318)
+#define SPR_BOOK3S_PMC6   (0x318)
 #define SPR_UPERF9(0x319)
+#define SPR_BOOK3S_PMC7   (0x319)
 #define SPR_UPERFA(0x31A)
+#define SPR_BOOK3S_PMC8   (0x31A)
 #define SPR_UPERFB(0x31B)
+#define SPR_BOOK3S_MMCR0  (0x31B)
 #define SPR_UPERFC(0x31C)
+#define SPR_BOOK3S_SIAR   (0x31C)
 #define SPR_UPERFD(0x31D)
+#define SPR_BOOK3S_SDAR   (0x31D)
 #define SPR_UPERFE(0x31E)
+#define SPR_BOOK3S_MMCR1  (0x31E)
 #define SPR_UPERFF(0x31F)
 #define SPR_RCPU_MI_RA0   (0x320)
 #define SPR_MPC_MI_DBCAM  (0x320)
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index d07e186..273e37d 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -6629,10 +6629,128 @@ static int check_pow_970 (CPUPPCState *env)
 return 0;
 }
 
+/* SPR common to all book3s implementations */
+static void gen_spr_book3s (CPUPPCState *env)
+{
+/* Breakpoints */
+/* XXX : not implemented */
+spr_register_kvm(env, SPR_DABR, "DABR",
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_generic, &spr_write_generic,
+ KVM_REG_PPC_DABR, 0x);
+/* XXX : not implemented */
+spr_register(env, SPR_IABR, "IABR",
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_generic, &spr_write_generic,
+ 0x);
+
+/* Performance monitors */
+/* XXX : not implemented */
+spr_register_kvm(env, SPR_BOOK3S_MMCR0, "MMCR0",
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_generic, &spr_write_generic,
+ KVM_REG_PPC_MMCR0, 0x);
+/* XXX : not implemented */
+spr_register_kvm(env, SPR_BOOK3S_MMCR1, "MMCR1",
+ SPR_NOACCESS, SPR_NOACC

[Qemu-devel] [PATCH 3/6] target-ppc: POWER7+ supports the MSR_VSX bit

2014-03-24 Thread Anton Blanchard
Without MSR_VSX we die early during a Linux boot.

Signed-off-by: Anton Blanchard 
Signed-off-by: Cédric Le Goater 
---
 target-ppc/translate_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 4fda0fd..87c00a1 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7118,7 +7118,7 @@ POWERPC_FAMILY(POWER7P)(ObjectClass *oc, void *data)
 PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
 PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
 PPC2_FP_TST_ISA206;
-pcc->msr_mask = 0x8204FF37ULL;
+pcc->msr_mask = 0x8284FF37ULL;
 pcc->mmu_model = POWERPC_MMU_2_06;
 #if defined(CONFIG_SOFTMMU)
 pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
-- 
1.8.3.2




[Qemu-devel] [PATCH 6/6] target-ppc: Add PMC7/8 to 970

2014-03-24 Thread Anton Blanchard
970 CPUs have PMC7/8. Create gen_spr_970 to avoid replicating
it 3 times, and simplify the existing code.

Signed-off-by: Anton Blanchard 
---
 target-ppc/translate_init.c | 89 -
 1 file changed, 39 insertions(+), 50 deletions(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 273e37d..50b2603 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -6747,12 +6747,13 @@ static void gen_spr_book3s (CPUPPCState *env)
  0x);
 }
 
-static void init_proc_970 (CPUPPCState *env)
+static void gen_spr_970 (CPUPPCState *env)
 {
-gen_spr_ne_601(env);
-gen_spr_book3s(env);
-/* Time base */
-gen_tbl(env);
+spr_register(env, SPR_HIOR, "SPR_HIOR",
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_hior, &spr_write_hior,
+ 0x);
+
 /* Hardware implementation registers */
 /* XXX : not implemented */
 spr_register(env, SPR_HID0, "HID0",
@@ -6769,13 +6770,40 @@ static void init_proc_970 (CPUPPCState *env)
  SPR_NOACCESS, SPR_NOACCESS,
  &spr_read_generic, &spr_write_generic,
  POWERPC970_HID5_INIT);
+
+/* Performance monitors */
+/* XXX : not implemented */
+spr_register_kvm(env, SPR_BOOK3S_PMC7, "PMC7",
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_generic, &spr_write_generic,
+ KVM_REG_PPC_PMC7, 0x);
+/* XXX : not implemented */
+spr_register_kvm(env, SPR_BOOK3S_PMC8, "PMC8",
+ SPR_NOACCESS, SPR_NOACCESS,
+ &spr_read_generic, &spr_write_generic,
+ KVM_REG_PPC_PMC8, 0x);
+/* XXX : not implemented */
+spr_register(env, SPR_BOOK3S_UPMC7, "UPMC7",
+ &spr_read_ureg, SPR_NOACCESS,
+ &spr_read_ureg, SPR_NOACCESS,
+ 0x);
+/* XXX : not implemented */
+spr_register(env, SPR_BOOK3S_UPMC8, "UPMC8",
+ &spr_read_ureg, SPR_NOACCESS,
+ &spr_read_ureg, SPR_NOACCESS,
+ 0x);
+}
+
+static void init_proc_970 (CPUPPCState *env)
+{
+gen_spr_ne_601(env);
+gen_spr_book3s(env);
+gen_spr_970(env);
+/* Time base */
+gen_tbl(env);
 /* Memory management */
 /* XXX: not correct */
 gen_low_BATs(env);
-spr_register(env, SPR_HIOR, "SPR_HIOR",
- SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_hior, &spr_write_hior,
- 0x);
 #if !defined(CONFIG_USER_ONLY)
 env->slb_nr = 32;
 #endif
@@ -6831,31 +6859,12 @@ static void init_proc_970FX (CPUPPCState *env)
 {
 gen_spr_ne_601(env);
 gen_spr_book3s(env);
+gen_spr_970(env);
 /* Time base */
 gen_tbl(env);
-/* Hardware implementation registers */
-/* XXX : not implemented */
-spr_register(env, SPR_HID0, "HID0",
- SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_clear,
- 0x6000);
-/* XXX : not implemented */
-spr_register(env, SPR_HID1, "HID1",
- SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic,
- 0x);
-/* XXX : not implemented */
-spr_register(env, SPR_970_HID5, "HID5",
- SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic,
- POWERPC970_HID5_INIT);
 /* Memory management */
 /* XXX: not correct */
 gen_low_BATs(env);
-spr_register(env, SPR_HIOR, "SPR_HIOR",
- SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_hior, &spr_write_hior,
- 0x);
 spr_register(env, SPR_CTRL, "SPR_CTRL",
  SPR_NOACCESS, SPR_NOACCESS,
  SPR_NOACCESS, &spr_write_generic,
@@ -6923,32 +6932,12 @@ static void init_proc_970MP (CPUPPCState *env)
 {
 gen_spr_ne_601(env);
 gen_spr_book3s(env);
+gen_spr_970(env);
 /* Time base */
 gen_tbl(env);
-/* Hardware implementation registers */
-/* XXX : not implemented */
-spr_register(env, SPR_HID0, "HID0",
- SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_clear,
- 0x6000);
-/* XXX : not implemented */
-spr_register(env, SPR_HID1, "HID1",
- SPR_NOACCESS, SPR_NOACCESS,
- &spr_read_generic, &spr_write_generic,
- 0x);
-/* XXX : not implemented */
-spr_register(env, SPR_970_HID5, "HID5",
- SPR_NOACCESS, SPR_NOA

Re: [Qemu-devel] [PATCH 0/9] target-ppc: VSX Bug Fixes

2014-03-27 Thread Anton Blanchard

Hi Tom,

> This patch series addresses bugs in the recently added VSX
> instructions.  Two general defects are fixed:

Thanks! This series fixes the issue I had with wget.

Tested-by: Anton Blanchard 

Anton



[Qemu-devel] [PATCH] target-ppc: dump DAR and DSISR

2013-12-23 Thread Anton Blanchard

The DAR and DSISR can be very useful when debugging issues, so add
them to ppc_cpu_dump_state. We had another bug in this area: all
of the v2.06 MMU types were missing.

Signed-off-by: Anton Blanchard 
---

Index: b/target-ppc/translate.c
===
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -9861,8 +9861,13 @@ void ppc_cpu_dump_state(CPUState *cs, FI
 case POWERPC_MMU_SOFT_74xx:
 #if defined(TARGET_PPC64)
 case POWERPC_MMU_64B:
+case POWERPC_MMU_2_06:
+case POWERPC_MMU_2_06a:
+case POWERPC_MMU_2_06d:
 #endif
-cpu_fprintf(f, " SDR1 " TARGET_FMT_lx "\n", env->spr[SPR_SDR1]);
+cpu_fprintf(f, " SDR1 " TARGET_FMT_lx "   DAR " TARGET_FMT_lx
+   "  DSISR " TARGET_FMT_lx "\n", env->spr[SPR_SDR1],
+env->spr[SPR_DAR], env->spr[SPR_DSISR]);
 break;
 case POWERPC_MMU_BOOKE206:
 cpu_fprintf(f, " MAS0 " TARGET_FMT_lx "  MAS1 " TARGET_FMT_lx



[Qemu-devel] [PATCH 1/2] target-ppc: Fix invalid SPR read/write warnings

2013-05-01 Thread Anton Blanchard

Invalid and privileged SPR warnings currently print the wrong
address. While fixing that, also make it clear that we are
printing both the decimal and hexadecimal SPR number.

Before:

  Trying to read invalid spr 896 380 at 0714

After:

  Trying to read invalid spr 896 (0x380) at 0710

Signed-off-by: Anton Blanchard 
---

Index: b/target-ppc/translate.c
===
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -4005,19 +4005,19 @@ static inline void gen_op_mfspr(DisasCon
  * allowing userland application to read the PVR
  */
 if (sprn != SPR_PVR) {
-qemu_log("Trying to read privileged spr %d %03x at "
- TARGET_FMT_lx "\n", sprn, sprn, ctx->nip);
-printf("Trying to read privileged spr %d %03x at "
-   TARGET_FMT_lx "\n", sprn, sprn, ctx->nip);
+qemu_log("Trying to read privileged spr %d (0x%03x) at "
+ TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
+printf("Trying to read privileged spr %d (0x%03x) at "
+   TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
 }
 gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 }
 } else {
 /* Not defined */
-qemu_log("Trying to read invalid spr %d %03x at "
-TARGET_FMT_lx "\n", sprn, sprn, ctx->nip);
-printf("Trying to read invalid spr %d %03x at " TARGET_FMT_lx "\n",
-   sprn, sprn, ctx->nip);
+qemu_log("Trying to read invalid spr %d (0x%03x) at "
+ TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
+printf("Trying to read invalid spr %d (0x%03x) at "
+   TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
 gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
 }
 }
@@ -4150,18 +4150,18 @@ static void gen_mtspr(DisasContext *ctx)
 (*write_cb)(ctx, sprn, rS(ctx->opcode));
 } else {
 /* Privilege exception */
-qemu_log("Trying to write privileged spr %d %03x at "
- TARGET_FMT_lx "\n", sprn, sprn, ctx->nip);
-printf("Trying to write privileged spr %d %03x at " TARGET_FMT_lx
-   "\n", sprn, sprn, ctx->nip);
+qemu_log("Trying to write privileged spr %d (0x%03x) at "
+ TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
+printf("Trying to write privileged spr %d (0x%03x) at "
+   TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
 gen_inval_exception(ctx, POWERPC_EXCP_PRIV_REG);
 }
 } else {
 /* Not defined */
-qemu_log("Trying to write invalid spr %d %03x at "
- TARGET_FMT_lx "\n", sprn, sprn, ctx->nip);
-printf("Trying to write invalid spr %d %03x at " TARGET_FMT_lx "\n",
-   sprn, sprn, ctx->nip);
+qemu_log("Trying to write invalid spr %d (0x%03x) at "
+ TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
+printf("Trying to write invalid spr %d (0x%03x) at "
+   TARGET_FMT_lx "\n", sprn, sprn, ctx->nip - 4);
 gen_inval_exception(ctx, POWERPC_EXCP_INVAL_SPR);
 }
 }



[Qemu-devel] [PATCH 2/2] target-ppc: Add read and write of PPR SPR

2013-05-01 Thread Anton Blanchard

Recent Linux kernels save and restore the PPR across exceptions
so we need to handle it.

Signed-off-by: Anton Blanchard 
---

Index: b/target-ppc/translate_init.c
===
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7010,6 +7010,10 @@ static void init_proc_POWER7 (CPUPPCStat
  &spr_read_generic, &spr_write_generic,
  &spr_read_generic, &spr_write_generic,
  0x);
+spr_register(env, SPR_PPR, "PPR",
+ &spr_read_generic, &spr_write_generic,
+ &spr_read_generic, &spr_write_generic,
+ 0x);
 #if !defined(CONFIG_USER_ONLY)
 env->slb_nr = 32;
 #endif




[Qemu-devel] [PATCH 1/7] Declare and Enable VSX

2013-10-22 Thread Anton Blanchard
From: Tom Musta 

This patch adds the flag POWERPC_FLAG_VSX to the list of defined
flags and also adds this flag to the list of supported features of
the Power7 and Power8 CPUs.  Additionally, the VSX instructions
are added to the list of TCG-enabled instruction.

Signed-off-by: Tom Musta 
Signed-off-by: Anton Blanchard 
---

Index: b/target-ppc/cpu.h
===
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -549,6 +549,8 @@ enum {
 POWERPC_FLAG_BUS_CLK  = 0x0002,
 /* Has CFAR  */
 POWERPC_FLAG_CFAR = 0x0004,
+/* Has VSX   */
+POWERPC_FLAG_VSX  = 0x0008,
 };
 
 /*/
@@ -1870,7 +1872,8 @@ enum {
 /* Book I 2.05 PowerPC specification */
 PPC2_ISA205= 0x0020ULL,
 
-#define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_DBRX | PPC2_ISA205)
+#define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \
+  PPC2_ISA205)
 };
 
 /*/
Index: b/target-ppc/translate_init.c
===
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7242,7 +7242,8 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc,
 pcc->bfd_mach = bfd_mach_ppc64;
 pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
  POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
- POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR;
+ POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
+ POWERPC_FLAG_VSX;
 pcc->l1_dcache_size = 0x8000;
 pcc->l1_icache_size = 0x8000;
 }
@@ -7276,7 +7277,8 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc,
 pcc->bfd_mach = bfd_mach_ppc64;
 pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
  POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
- POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR;
+ POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
+ POWERPC_FLAG_VSX;
 pcc->l1_dcache_size = 0x8000;
 pcc->l1_icache_size = 0x8000;
 }



[Qemu-devel] [PATCH 2/7] Add MSR VSX and Associated Exception

2013-10-22 Thread Anton Blanchard
From: Tom Musta 

This patch adds support for the VSX bit of the PowerPC Machine
State Register (MSR) as well as the corresponding VSX Unavailable
exception.

The VSX bit is added to the defined bits masks of the Power7 and
Power8 CPU models.

Signed-off-by: Tom Musta 
Signed-off-by: Anton Blanchard 
---

Index: b/target-ppc/cpu.h
===
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -236,6 +236,8 @@ enum {
 POWERPC_EXCP_NMEXTBR  = 91, /* Non maskable external breakpoint  */
 POWERPC_EXCP_ITLBE= 92, /* Instruction TLB error */
 POWERPC_EXCP_DTLBE= 93, /* Data TLB error*/
+/* VSX Unavailable (Power ISA 2.06 and later)*/
+POWERPC_EXCP_VSXU = 94, /* VSX Unavailable   */
 /* EOL   */
 POWERPC_EXCP_NB   = 96,
 /* QEMU exceptions: used internally during code translation  */
@@ -427,6 +429,7 @@ struct ppc_slb_t {
 #define MSR_VR   25 /* altivec availablex hflags */
 #define MSR_SPE  25 /* SPE enable for BookE x hflags */
 #define MSR_AP   23 /* Access privilege state on 602  hflags */
+#define MSR_VSX  23 /* Vector Scalar Extension (ISA 2.06 and later) x hflags */
 #define MSR_SA   22 /* Supervisor access mode on 602  hflags */
 #define MSR_KEY  19 /* key bit on 603e   */
 #define MSR_POW  18 /* Power management  */
@@ -467,6 +470,7 @@ struct ppc_slb_t {
 #define msr_vr   ((env->msr >> MSR_VR)   & 1)
 #define msr_spe  ((env->msr >> MSR_SPE)  & 1)
 #define msr_ap   ((env->msr >> MSR_AP)   & 1)
+#define msr_vsx  ((env->msr >> MSR_VSX)  & 1)
 #define msr_sa   ((env->msr >> MSR_SA)   & 1)
 #define msr_key  ((env->msr >> MSR_KEY)  & 1)
 #define msr_pow  ((env->msr >> MSR_POW)  & 1)
Index: b/target-ppc/excp_helper.c
===
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -390,6 +390,11 @@ static inline void powerpc_excp(PowerPCC
 new_msr |= (target_ulong)MSR_HVB;
 }
 goto store_current;
+case POWERPC_EXCP_VSXU:   /* VSX unavailable exception   */
+if (lpes1 == 0) {
+new_msr |= (target_ulong)MSR_HVB;
+}
+goto store_current;
 case POWERPC_EXCP_PIT:   /* Programmable interval timer interrupt*/
 LOG_EXCP("PIT exception\n");
 goto store_next;
Index: b/target-ppc/translate.c
===
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -199,6 +199,7 @@ typedef struct DisasContext {
 #endif
 int fpu_enabled;
 int altivec_enabled;
+int vsx_enabled;
 int spe_enabled;
 ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
 int singlestep_enabled;
@@ -9763,6 +9764,11 @@ static inline void gen_intermediate_code
 ctx.altivec_enabled = msr_vr;
 else
 ctx.altivec_enabled = 0;
+if ((env->flags & POWERPC_FLAG_VSX) && msr_vsx) {
+ctx.vsx_enabled = msr_vsx;
+} else {
+ctx.vsx_enabled = 0;
+}
 if ((env->flags & POWERPC_FLAG_SE) && msr_se)
 ctx.singlestep_enabled = CPU_SINGLE_STEP;
 else
Index: b/target-ppc/translate_init.c
===
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -3061,6 +3061,7 @@ static void init_excp_POWER7 (CPUPPCStat
 env->excp_vectors[POWERPC_EXCP_TRACE]= 0x0D00;
 env->excp_vectors[POWERPC_EXCP_PERFM]= 0x0F00;
 env->excp_vectors[POWERPC_EXCP_VPU]  = 0x0F20;
+env->excp_vectors[POWERPC_EXCP_VSXU] = 0x0F40;
 env->excp_vectors[POWERPC_EXCP_IABR] = 0x1300;
 env->excp_vectors[POWERPC_EXCP_MAINT]= 0x1600;
 env->excp_vectors[POWERPC_EXCP_VPUA] = 0x1700;
@@ -7232,7 +7233,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc,
PPC_SEGMENT_64B | PPC_SLBI |
PPC_POPCNTB | PPC_POPCNTWD;
 pcc->insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205;
-pcc->msr_mask = 0x8204FF37ULL;
+pcc->msr_mask = 0x8284FF37ULL;
 pcc->mmu_model = POWERPC_MMU_2_06;
 #if defined(CONFIG_SOFTMMU)
 pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
@@ -7267,7 +7268,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc,
PPC_SEGMENT_64B | PPC_SLBI |
PPC_POPCNTB | PPC_POPCNTWD;
 pcc->insns_flags2 = PPC2_VSX | PPC2_DF

[Qemu-devel] [PATCH 3/7] Add VSX Instruction Decoders

2013-10-22 Thread Anton Blanchard
From: Tom Musta 

This patch adds decoders for the VSX fields XT, XS, XA, XB and
DM.  The first four are split fields and a general helper for
these types of fields is also added.

Signed-off-by: Tom Musta 
Signed-off-by: Anton Blanchard 
---

Index: b/target-ppc/translate.c
===
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -370,6 +370,12 @@ static inline int32_t name(uint32_t opco
 return (int16_t)((opcode >> (shift)) & ((1 << (nb)) - 1));\
 }
 
+#define EXTRACT_HELPER_SPLIT(name, shift1, nb1, shift2, nb2)  \
+static inline uint32_t name(uint32_t opcode)  \
+{ \
+return (((opcode >> (shift1)) & ((1 << (nb1)) - 1)) << nb2) | \
+((opcode >> (shift2)) & ((1 << (nb2)) - 1));  \
+}
 /* Opcode part 1 */
 EXTRACT_HELPER(opc1, 26, 6);
 /* Opcode part 2 */
@@ -484,6 +490,11 @@ static inline target_ulong MASK(uint32_t
 return ret;
 }
 
+EXTRACT_HELPER_SPLIT(xT, 0, 1, 21, 5);
+EXTRACT_HELPER_SPLIT(xS, 0, 1, 21, 5);
+EXTRACT_HELPER_SPLIT(xA, 2, 1, 16, 5);
+EXTRACT_HELPER_SPLIT(xB, 1, 1, 11, 5);
+EXTRACT_HELPER(DM, 8, 2);
 /*/
 /* PowerPC instructions table*/
 



[Qemu-devel] [PATCH 4/7] Add VSR to Global Registers

2013-10-22 Thread Anton Blanchard
From: Tom Musta 

This patch adds VSX VSRs to the the list of global register indices.
More specifically, it adds the lower halves of the first 32 VSRs to
the list of global register indices.  The upper halves of the first
32 VSRs are already defined via cpu_fpr[].  And the second 32 VSRs
are already defined via the cpu_avrh[] and cpu_avrl[] arrays.

Signed-off-by: Tom Musta 
Signed-off-by: Anton Blanchard 
---

Index: b/target-ppc/translate.c
===
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -51,6 +51,7 @@ static char cpu_reg_names[10*3 + 22*4 /*
 #endif
 + 10*4 + 22*5 /* FPR */
 + 2*(10*6 + 22*7) /* AVRh, AVRl */
++ 10*5 + 22*6 /* VSR */
 + 8*5 /* CRF */];
 static TCGv cpu_gpr[32];
 #if !defined(TARGET_PPC64)
@@ -58,6 +59,7 @@ static TCGv cpu_gprh[32];
 #endif
 static TCGv_i64 cpu_fpr[32];
 static TCGv_i64 cpu_avrh[32], cpu_avrl[32];
+static TCGv_i64 cpu_vsr[32];
 static TCGv_i32 cpu_crf[8];
 static TCGv cpu_nip;
 static TCGv cpu_msr;
@@ -137,6 +139,11 @@ void ppc_translate_init(void)
 #endif
 p += (i < 10) ? 6 : 7;
 cpu_reg_names_size -= (i < 10) ? 6 : 7;
+snprintf(p, cpu_reg_names_size, "vsr%d", i);
+cpu_vsr[i] = tcg_global_mem_new_i64(TCG_AREG0,
+ offsetof(CPUPPCState, vsr[i]), p);
+p += (i < 10) ? 5 : 6;
+cpu_reg_names_size -= (i < 10) ? 5 : 6;
 }
 
 cpu_nip = tcg_global_mem_new(TCG_AREG0,
@@ -6980,6 +6987,26 @@ GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20
 GEN_VAFORM_PAIRED(vsel, vperm, 21)
 GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23)
 
+/***   VSX extension   ***/
+
+static inline TCGv_i64 cpu_vsrh(int n)
+{
+if (n < 32) {
+return cpu_fpr[n];
+} else {
+return cpu_avrh[n-32];
+}
+}
+
+static inline TCGv_i64 cpu_vsrl(int n)
+{
+if (n < 32) {
+return cpu_vsr[n];
+} else {
+return cpu_avrl[n-32];
+}
+}
+
 /***   SPE extension   ***/
 /* Register moves */
 



[Qemu-devel] [PATCH 5/7] Add lxvd2x

2013-10-22 Thread Anton Blanchard
From: Tom Musta 

This patch adds the lxvd2x instruction.

Signed-off-by: Tom Musta 
Signed-off-by: Anton Blanchard 
---

Index: b/target-ppc/translate.c
===
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7007,6 +7007,22 @@ static inline TCGv_i64 cpu_vsrl(int n)
 }
 }
 
+static void gen_lxvd2x(DisasContext *ctx)
+{
+TCGv EA;
+if (unlikely(!ctx->vsx_enabled)) {
+gen_exception(ctx, POWERPC_EXCP_VSXU);
+return;
+}
+gen_set_access_type(ctx, ACCESS_INT);
+EA = tcg_temp_new();
+gen_addr_reg_index(ctx, EA);
+gen_qemu_ld64(ctx, cpu_vsrh(xT(ctx->opcode)), EA);
+tcg_gen_addi_tl(EA, EA, 8);
+gen_qemu_ld64(ctx, cpu_vsrl(xT(ctx->opcode)), EA);
+tcg_temp_free(EA);
+}
+
 /***   SPE extension   ***/
 /* Register moves */
 
@@ -9456,6 +9472,8 @@ GEN_VAFORM_PAIRED(vmsumshm, vmsumshs, 20
 GEN_VAFORM_PAIRED(vsel, vperm, 21),
 GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23),
 
+GEN_HANDLER_E(lxvd2x, 0x1F, 0x0C, 0x1A, 0, PPC_NONE, PPC2_VSX),
+
 #undef GEN_SPE
 #define GEN_SPE(name0, name1, opc2, opc3, inval0, inval1, type) \
 GEN_OPCODE_DUAL(name0##_##name1, 0x04, opc2, opc3, inval0, inval1, type, 
PPC_NONE)



[Qemu-devel] [PATCH 7/7] Add xxpermdi

2013-10-22 Thread Anton Blanchard
From: Tom Musta 

This patch adds the xxpermdi instruction.  The instruction
uses bits 22, 23, 29 and 30 for non-opcode fields (DM, AX
and BX).  This results in overloading of the opcode table
with aliases, which can be seen in the GEN_XX3FORM_DM
macro.

Signed-off-by: Tom Musta 
Signed-off-by: Anton Blanchard 
---

Index: b/target-ppc/translate.c
===
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7039,10 +7039,28 @@ static void gen_stxvd2x(DisasContext *ct
 tcg_temp_free(EA);
 }
 
+static void gen_xxpermdi(DisasContext *ctx)
+{
+if (unlikely(!ctx->vsx_enabled)) {
+gen_exception(ctx, POWERPC_EXCP_VSXU);
+return;
+}
+
+if ((DM(ctx->opcode) & 2) == 0) {
+tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), cpu_vsrh(xA(ctx->opcode)));
+} else {
+tcg_gen_mov_i64(cpu_vsrh(xT(ctx->opcode)), cpu_vsrl(xA(ctx->opcode)));
+}
+if ((DM(ctx->opcode) & 1) == 0) {
+tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), cpu_vsrh(xB(ctx->opcode)));
+} else {
+tcg_gen_mov_i64(cpu_vsrl(xT(ctx->opcode)), cpu_vsrl(xB(ctx->opcode)));
+}
+}
+
 /***   SPE extension   ***/
 /* Register moves */
 
-
 static inline void gen_evmra(DisasContext *ctx)
 {
 
@@ -9492,6 +9510,27 @@ GEN_HANDLER_E(lxvd2x, 0x1F, 0x0C, 0x1A,
 
 GEN_HANDLER_E(stxvd2x, 0x1F, 0xC, 0x1E, 0, PPC_NONE, PPC2_VSX),
 
+#undef GEN_XX3FORM_DM
+#define GEN_XX3FORM_DM(name, opc2, opc3) \
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x00, 0, PPC_NONE, 
PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x00, 0, PPC_NONE, 
PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x02, opc3|0x00, 0, PPC_NONE, 
PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x00, 0, PPC_NONE, 
PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x04, 0, PPC_NONE, 
PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x04, 0, PPC_NONE, 
PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x02, opc3|0x04, 0, PPC_NONE, 
PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x04, 0, PPC_NONE, 
PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x08, 0, PPC_NONE, 
PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x08, 0, PPC_NONE, 
PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x02, opc3|0x08, 0, PPC_NONE, 
PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x08, 0, PPC_NONE, 
PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x00, opc3|0x0C, 0, PPC_NONE, 
PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x01, opc3|0x0C, 0, PPC_NONE, 
PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x02, opc3|0x0C, 0, PPC_NONE, 
PPC2_VSX),\
+GEN_HANDLER2_E(name, #name, 0x3C, opc2|0x03, opc3|0x0C, 0, PPC_NONE, PPC2_VSX)
+
+GEN_XX3FORM_DM(xxpermdi, 0x08, 0x01),
+
 #undef GEN_SPE
 #define GEN_SPE(name0, name1, opc2, opc3, inval0, inval1, type) \
 GEN_OPCODE_DUAL(name0##_##name1, 0x04, opc2, opc3, inval0, inval1, type, 
PPC_NONE)



[Qemu-devel] [PATCH 6/7] Add stxvd2x

2013-10-22 Thread Anton Blanchard
From: Tom Musta 

This patch adds the stxvd2x instruction.

Signed-off-by: Tom Musta 
Signed-off-by: Anton Blanchard 
---

Index: b/target-ppc/translate.c
===
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7023,6 +7023,22 @@ static void gen_lxvd2x(DisasContext *ctx
 tcg_temp_free(EA);
 }
 
+static void gen_stxvd2x(DisasContext *ctx)
+{
+TCGv EA;
+if (unlikely(!ctx->vsx_enabled)) {
+gen_exception(ctx, POWERPC_EXCP_VSXU);
+return;
+}
+gen_set_access_type(ctx, ACCESS_INT);
+EA = tcg_temp_new();
+gen_addr_reg_index(ctx, EA);
+gen_qemu_st64(ctx, cpu_vsrh(xS(ctx->opcode)), EA);
+tcg_gen_addi_tl(EA, EA, 8);
+gen_qemu_st64(ctx, cpu_vsrl(xS(ctx->opcode)), EA);
+tcg_temp_free(EA);
+}
+
 /***   SPE extension   ***/
 /* Register moves */
 
@@ -9474,6 +9490,8 @@ GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23)
 
 GEN_HANDLER_E(lxvd2x, 0x1F, 0x0C, 0x1A, 0, PPC_NONE, PPC2_VSX),
 
+GEN_HANDLER_E(stxvd2x, 0x1F, 0xC, 0x1E, 0, PPC_NONE, PPC2_VSX),
+
 #undef GEN_SPE
 #define GEN_SPE(name0, name1, opc2, opc3, inval0, inval1, type) \
 GEN_OPCODE_DUAL(name0##_##name1, 0x04, opc2, opc3, inval0, inval1, type, 
PPC_NONE)



[Qemu-devel] [PATCH] spapr: Clear LPCR_ILE during reset

2014-03-05 Thread Anton Blanchard

Since an OS can set LPCR_ILE we must clear it during reset. Otherwise
if we reset into an OS with a different endian we die when we take
the first exception.

This fixes an issue seen on both full emulation and KVM.

Signed-off-by: Anton Blanchard 
---

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 93d02c1..4d45197 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -739,6 +739,8 @@ static void spapr_cpu_reset(void *opaque)
 
 env->spr[SPR_HIOR] = 0;
 
+env->spr[SPR_LPCR] &= ~LPCR_ILE;
+
 env->external_htab = (uint8_t *)spapr->htab;
 env->htab_base = -1;
 env->htab_mask = HTAB_SIZE(spapr) - 1;



[Qemu-devel] [PATCH 1/4] target-ppc: POWER8 supports the MSR_LE bit

2014-03-05 Thread Anton Blanchard

Add MSR_LE to the msr_mask for POWER8.

Signed-off-by: Anton Blanchard 
---

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 3eafbb0..7661543 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7173,7 +7173,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
 PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
 PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
 PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207;
-pcc->msr_mask = 0x8284FF36ULL;
+pcc->msr_mask = 0x8284FF37ULL;
 pcc->mmu_model = POWERPC_MMU_2_06;
 #if defined(CONFIG_SOFTMMU)
 pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;



[Qemu-devel] [PATCH 2/4] target-ppc: POWER8 supports isel

2014-03-05 Thread Anton Blanchard

POWER8 supports isel, so enable it in QEMU.

Signed-off-by: Anton Blanchard 
---

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 3eafbb0..7661543 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7156,7 +7156,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
 pcc->pvr_mask = CPU_POWERPC_POWER8_MASK;
 pcc->init_proc = init_proc_POWER8;
 pcc->check_pow = check_pow_nocheck;
-pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB |
+pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB |
PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES |
PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE |
PPC_FLOAT_FRSQRTES |



[Qemu-devel] [PATCH 3/4] target-ppc: POWER7+ supports the MSR_VSX bit

2014-03-05 Thread Anton Blanchard

Without MSR_VSX we die early during a Linux boot.

Signed-off-by: Anton Blanchard 
---

Index: b/target-ppc/translate_init.c
===
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7117,7 +7117,7 @@ POWERPC_FAMILY(POWER7P)(ObjectClass *oc,
 PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
 PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
 PPC2_FP_TST_ISA206;
-pcc->msr_mask = 0x8204FF37ULL;
+pcc->msr_mask = 0x8284FF37ULL;
 pcc->mmu_model = POWERPC_MMU_2_06;
 #if defined(CONFIG_SOFTMMU)
 pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;



[Qemu-devel] [PATCH 4/4] target-ppc: MSR_POW not supported on POWER7/7+/8

2014-03-05 Thread Anton Blanchard

Remove MSR_POW from the msr_mask for POWER7/7+/8.

Signed-off-by: Anton Blanchard 
---

Index: b/target-ppc/translate_init.c
===
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7074,7 +7074,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc,
 PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
 PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
 PPC2_FP_TST_ISA206;
-pcc->msr_mask = 0x8284FF37ULL;
+pcc->msr_mask = 0x8280FF37ULL;
 pcc->mmu_model = POWERPC_MMU_2_06;
 #if defined(CONFIG_SOFTMMU)
 pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
@@ -7117,7 +7117,7 @@ POWERPC_FAMILY(POWER7P)(ObjectClass *oc,
 PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
 PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
 PPC2_FP_TST_ISA206;
-pcc->msr_mask = 0x8284FF37ULL;
+pcc->msr_mask = 0x8280FF37ULL;
 pcc->mmu_model = POWERPC_MMU_2_06;
 #if defined(CONFIG_SOFTMMU)
 pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;
@@ -7173,7 +7173,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc,
 PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
 PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
 PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207;
-pcc->msr_mask = 0x8284FF37ULL;
+pcc->msr_mask = 0x8280FF37ULL;
 pcc->mmu_model = POWERPC_MMU_2_06;
 #if defined(CONFIG_SOFTMMU)
 pcc->handle_mmu_fault = ppc_hash64_handle_mmu_fault;



[Qemu-devel] [PATCH] target-ppc: gdbstub: Add VSX support

2015-03-24 Thread Anton Blanchard
Add the XML and functions to get and set VSX registers.

Signed-off-by: Anton Blanchard 
---
 configure   |  6 +++---
 gdb-xml/power-vsx.xml   | 44 
 target-ppc/translate_init.c | 22 ++
 3 files changed, 69 insertions(+), 3 deletions(-)
 create mode 100644 gdb-xml/power-vsx.xml

diff --git a/configure b/configure
index 589798e..235b3d2 100755
--- a/configure
+++ b/configure
@@ -5182,20 +5182,20 @@ case "$target_name" in
   ppc64)
 TARGET_BASE_ARCH=ppc
 TARGET_ABI_DIR=ppc
-gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml 
power-spe.xml"
+gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml 
power-spe.xml power-vsx.xml"
   ;;
   ppc64le)
 TARGET_ARCH=ppc64
 TARGET_BASE_ARCH=ppc
 TARGET_ABI_DIR=ppc
-gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml 
power-spe.xml"
+gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml 
power-spe.xml power-vsx.xml"
   ;;
   ppc64abi32)
 TARGET_ARCH=ppc64
 TARGET_BASE_ARCH=ppc
 TARGET_ABI_DIR=ppc
 echo "TARGET_ABI32=y" >> $config_target_mak
-gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml 
power-spe.xml"
+gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml 
power-spe.xml power-vsx.xml"
   ;;
   sh4|sh4eb)
 TARGET_ARCH=sh4
diff --git a/gdb-xml/power-vsx.xml b/gdb-xml/power-vsx.xml
new file mode 100644
index 000..fd290e9
--- /dev/null
+++ b/gdb-xml/power-vsx.xml
@@ -0,0 +1,44 @@
+
+
+
+
+
+
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+  
+
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index d74f4f0..efde425 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8870,6 +8870,24 @@ static int gdb_set_spe_reg(CPUPPCState *env, uint8_t 
*mem_buf, int n)
 return 0;
 }
 
+static int gdb_get_vsx_reg(CPUPPCState *env, uint8_t *mem_buf, int n)
+{
+if (n < 32) {
+stq_p(mem_buf, env->vsr[n]);
+return 8;
+}
+return 0;
+}
+
+static int gdb_set_vsx_reg(CPUPPCState *env, uint8_t *mem_buf, int n)
+{
+if (n < 32) {
+env->vsr[n] = ldq_p(mem_buf);
+return 8;
+}
+return 0;
+}
+
 static int ppc_fixup_cpu(PowerPCCPU *cpu)
 {
 CPUPPCState *env = &cpu->env;
@@ -8967,6 +8985,10 @@ static void ppc_cpu_realizefn(DeviceState *dev, Error 
**errp)
 gdb_register_coprocessor(cs, gdb_get_spe_reg, gdb_set_spe_reg,
  34, "power-spe.xml", 0);
 }
+if (pcc->insns_flags2 & PPC2_VSX) {
+gdb_register_coprocessor(cs, gdb_get_vsx_reg, gdb_set_vsx_reg,
+ 32, "power-vsx.xml", 0);
+}
 
 qemu_init_vcpu(cs);
 
-- 
2.1.0




Re: [Qemu-devel] [PATCH] target-ppc: gdbstub: Add VSX support

2015-03-24 Thread Anton Blanchard
Hi Alex,

> On 24.03.15 09:59, Anton Blanchard wrote:
> > Add the XML and functions to get and set VSX registers.
> 
> Awesome, thanks. Have you verified that this works for LE as well as
> BE guests?

Unfortunately all our XML gdbstub routines have endian issues (FPU,
Altivec and now VMX). I only caught that the other day.

I can work on reusing maybe_bswap_register() from gdbstub.c.

Anton



[Qemu-devel] [PATCH] nvme: 64kB page size fixes

2014-11-26 Thread Anton Blanchard
Initialise our maximum page size capability to 64kB and increase
the page_size variable from 16 to 32 bits.

Signed-off-by: Anton Blanchard 
--

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 1327658..aa1ed98 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -811,6 +811,7 @@ static int nvme_init(PCIDevice *pci_dev)
 NVME_CAP_SET_AMS(n->bar.cap, 1);
 NVME_CAP_SET_TO(n->bar.cap, 0xf);
 NVME_CAP_SET_CSS(n->bar.cap, 1);
+NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
 
 n->bar.vs = 0x00010001;
 n->bar.intmc = n->bar.intms = 0;
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
index 993c511..b6ccb65 100644
--- a/hw/block/nvme.h
+++ b/hw/block/nvme.h
@@ -688,7 +688,7 @@ typedef struct NvmeCtrl {
 NvmeBar  bar;
 BlockConfconf;
 
-uint16_tpage_size;
+uint32_tpage_size;
 uint16_tpage_bits;
 uint16_tmax_prp_ents;
 uint16_tcqe_size;



[Qemu-devel] target-ppc: Fix SRR0 when taking unaligned exceptions

2015-07-01 Thread Anton Blanchard
We are setting SRR0 to the instruction before the one causing the
unaligned exception. A quick testcase:

. = 0x100
.globl _start
_start:
/* Cause a 0x600 */
li  3,0x1
stwcx.  3,0,3
1:  b   1b

. = 0x600
1:  b   1b

Built into something we can load as a BIOS image:

gcc -mbig -c test.S
ld -EB -Ttext 0x0 -o test test.o
objcopy -O binary test test.bin

Run with:

qemu-system-ppc64 -nographic -bios test.bin

Shows an incorrect SRR0 (points at the li):

SRR0 0100

With the patch we get the correct SRR0:

SRR0 0104

Signed-off-by: Anton Blanchard 
---
 linux-user/main.c| 2 +-
 target-ppc/excp_helper.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/linux-user/main.c b/linux-user/main.c
index c855bcc..9100130 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -1650,7 +1650,7 @@ void cpu_loop(CPUPPCState *env)
 info.si_signo = TARGET_SIGBUS;
 info.si_errno = 0;
 info.si_code = TARGET_BUS_ADRALN;
-info._sifields._sigfault._addr = env->nip - 4;
+info._sifields._sigfault._addr = env->nip;
 queue_signal(env, info.si_signo, &info);
 break;
 case POWERPC_EXCP_PROGRAM:  /* Program exception */
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index b803475..4250106 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -200,7 +200,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
excp_model, int excp)
 /* Get rS/rD and rA from faulting opcode */
 env->spr[SPR_DSISR] |= (cpu_ldl_code(env, (env->nip - 4))
 & 0x03FF) >> 16;
-goto store_current;
+goto store_next;
 case POWERPC_EXCP_PROGRAM:   /* Program exception*/
 switch (env->error_code & ~0xF) {
 case POWERPC_EXCP_FP:
-- 
2.1.4




[Qemu-devel] [PATCH 0/4] Fix ppc64 tcg issues

2013-06-02 Thread Anton Blanchard

Hi,

qemu is currently broken on ppc64. After applying the following patches
I am able to boot a ppc64 and x86-64 image successfully. 

Anton



[Qemu-devel] [PATCH 3/4] tcg-ppc64: Fix add2_i64

2013-06-02 Thread Anton Blanchard

add2_i64 was adding the lower double word to the upper double word
of each input. Fix this so we add the lower double words, then the
upper double words with carry propagation.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Anton Blanchard 
---

sub2 has similar issues, I haven't fixed it because I don't have
a testcase yet.

Index: b/tcg/ppc64/tcg-target.c
===
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1958,18 +1958,18 @@ static void tcg_out_op (TCGContext *s, T
environment.  So in 64-bit mode it's always carry-out of bit 63.
The fallback code using deposit works just as well for 32-bit.  */
 a0 = args[0], a1 = args[1];
-if (a0 == args[4] || (!const_args[5] && a0 == args[5])) {
+if (a0 == args[3] || (!const_args[5] && a0 == args[5])) {
 a0 = TCG_REG_R0;
 }
-if (const_args[3]) {
-tcg_out32(s, ADDIC | TAI(a0, args[2], args[3]));
+if (const_args[4]) {
+tcg_out32(s, ADDIC | TAI(a0, args[2], args[4]));
 } else {
-tcg_out32(s, ADDC | TAB(a0, args[2], args[3]));
+tcg_out32(s, ADDC | TAB(a0, args[2], args[4]));
 }
 if (const_args[5]) {
-tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[4]));
+tcg_out32(s, (args[5] ? ADDME : ADDZE) | RT(a1) | RA(args[3]));
 } else {
-tcg_out32(s, ADDE | TAB(a1, args[4], args[5]));
+tcg_out32(s, ADDE | TAB(a1, args[3], args[5]));
 }
 if (a0 != args[0]) {
 tcg_out_mov(s, TCG_TYPE_I64, args[0], a0);
@@ -2147,7 +2147,7 @@ static const TCGTargetOpDef ppc_op_defs[
 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
 
-{ INDEX_op_add2_i64, { "r", "r", "r", "rI", "r", "rZM" } },
+{ INDEX_op_add2_i64, { "r", "r", "r", "r", "rI", "rZM" } },
 { INDEX_op_sub2_i64, { "r", "r", "rI", "r", "rZM", "r" } },
 { INDEX_op_muls2_i64, { "r", "r", "r", "r" } },
 { INDEX_op_mulu2_i64, { "r", "r", "r", "r" } },



[Qemu-devel] [PATCH 4/4] tcg-ppc64: rotr_i32 rotates wrong amount

2013-06-02 Thread Anton Blanchard

rotr_i32 calculates the amount to left shift and puts it into a
temporary, but then doesn't use it when doing the shift.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Anton Blanchard 
---

Index: b/tcg/ppc64/tcg-target.c
===
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1661,7 +1661,7 @@ static void tcg_out_op (TCGContext *s, T
 tcg_out_rlw(s, RLWINM, args[0], args[1], 32 - args[2], 0, 31);
 } else {
 tcg_out32(s, SUBFIC | TAI(0, args[2], 32));
-tcg_out32(s, RLWNM | SAB(args[1], args[0], args[2])
+tcg_out32(s, RLWNM | SAB(args[1], args[0], 0)
  | MB(0) | ME(31));
 }
 break;



[Qemu-devel] [PATCH 2/4] tcg-ppc64: bswap64 rotates output 32 bits

2013-06-02 Thread Anton Blanchard

If our input and output is in the same register, bswap64 tries to
undo a rotate of the input. This just ends up rotating the output.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Anton Blanchard 
---

diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 0fcf2b5..64fb0af 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1922,8 +1922,6 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, 
const TCGArg *args,
 
 if (a0 == 0) {
 tcg_out_mov(s, TCG_TYPE_I64, args[0], a0);
-/* Revert the source rotate that we performed above.  */
-tcg_out_rld(s, RLDICL, a1, a1, 32, 0);
 }
 break;
 



[Qemu-devel] [PATCH 1/4] tcg-ppc64: Fix RLDCL opcode

2013-06-02 Thread Anton Blanchard

The rldcl instruction doesn't have an sh field, so the minor opcode
of 8 is actually 4 when using the XO30 macro.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Anton Blanchard 
---

Index: b/tcg/ppc64/tcg-target.c
===
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -357,7 +357,7 @@ static int tcg_target_const_match (tcg_t
 #define RLDICL XO30(  0)
 #define RLDICR XO30(  1)
 #define RLDIMI XO30(  3)
-#define RLDCL  XO30(  8)
+#define RLDCL  XO30(  4)
 
 #define BCLR   XO19( 16)
 #define BCCTR  XO19(528)



Re: [Qemu-devel] [PATCH 1/4] tcg-ppc64: Fix RLDCL opcode

2013-06-11 Thread Anton Blanchard

Hi Richard,

> But that suggests then that we ought not be using XO30.
> Or at least adding a comment.

Good idea, how does this look?

Anton
--

The rldcl instruction doesn't have an sh field, so the minor opcode
is shifted 1 bit. We were using the XO30 macro which shifted the
minor opcode 2 bits.

Remove XO30 and add MD30 and MDS30 macros which match the
Power ISA categories.

Cc: qemu-sta...@nongnu.org
Signed-off-by: Anton Blanchard 
---

Index: b/tcg/ppc64/tcg-target.c
===
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -308,7 +308,8 @@ static int tcg_target_const_match (tcg_t
 
 #define OPCD(opc) ((opc)<<26)
 #define XO19(opc) (OPCD(19)|((opc)<<1))
-#define XO30(opc) (OPCD(30)|((opc)<<2))
+#define MD30(opc) (OPCD(30)|((opc)<<2))
+#define MDS30(opc) (OPCD(30)|((opc)<<1))
 #define XO31(opc) (OPCD(31)|((opc)<<1))
 #define XO58(opc) (OPCD(58)|(opc))
 #define XO62(opc) (OPCD(62)|(opc))
@@ -354,10 +355,10 @@ static int tcg_target_const_match (tcg_t
 #define RLWINM OPCD( 21)
 #define RLWNM  OPCD( 23)
 
-#define RLDICL XO30(  0)
-#define RLDICR XO30(  1)
-#define RLDIMI XO30(  3)
-#define RLDCL  XO30(  8)
+#define RLDICL MD30(  0)
+#define RLDICR MD30(  1)
+#define RLDIMI MD30(  3)
+#define RLDCL  MDS30( 8)
 
 #define BCLR   XO19( 16)
 #define BCCTR  XO19(528)



[Qemu-devel] [PATCH] pseries: Fix loading of little endian kernels

2013-09-25 Thread Anton Blanchard
From: Benjamin Herrenschmidt 

Try loading the kernel as little endian if it fails big endian.

Signed-off-by: Benjamin Herrenschmidt 
Reviewed-by: Anton Blanchard 
---

Index: b/hw/ppc/spapr.c
===
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -273,6 +273,7 @@ static void *spapr_create_fdt_skel(const
hwaddr initrd_base,
hwaddr initrd_size,
hwaddr kernel_size,
+   bool little_endian,
const char *boot_device,
const char *kernel_cmdline,
uint32_t epow_irq)
@@ -326,6 +327,9 @@ static void *spapr_create_fdt_skel(const
   cpu_to_be64(kernel_size) };
 
 _FDT((fdt_property(fdt, "qemu,boot-kernel", &kprop, sizeof(kprop;
+if (little_endian) {
+_FDT((fdt_property(fdt, "qemu,boot-kernel-le", NULL, 0)));
+}
 }
 if (boot_device) {
 _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
@@ -1102,6 +1106,7 @@ static void ppc_spapr_init(QEMUMachineIn
 uint32_t initrd_base = 0;
 long kernel_size = 0, initrd_size = 0;
 long load_limit, rtas_limit, fw_size;
+bool kernel_le = false;
 char *filename;
 
 msi_supported = true;
@@ -1282,6 +1287,12 @@ static void ppc_spapr_init(QEMUMachineIn
 kernel_size = load_elf(kernel_filename, translate_kernel_address, NULL,
NULL, &lowaddr, NULL, 1, ELF_MACHINE, 0);
 if (kernel_size < 0) {
+kernel_size = load_elf(kernel_filename,
+   translate_kernel_address, NULL,
+   NULL, &lowaddr, NULL, 0, ELF_MACHINE, 0);
+kernel_le = kernel_size > 0;
+}
+if (kernel_size < 0) {
 kernel_size = load_image_targphys(kernel_filename,
   KERNEL_LOAD_ADDR,
   load_limit - KERNEL_LOAD_ADDR);
@@ -1331,7 +1342,7 @@ static void ppc_spapr_init(QEMUMachineIn
 /* Prepare the device tree */
 spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
 initrd_base, initrd_size,
-kernel_size,
+kernel_size, kernel_le,
 boot_device, kernel_cmdline,
 spapr->epow_irq);
 assert(spapr->fdt_skel != NULL);



[Qemu-devel] [PATCH] ppc: Add CFAR, DAR and DSISR to the dictionary of printable registers

2013-09-25 Thread Anton Blanchard
From: Tom Musta 

The CFAR, DAR and DSISR registers are currently missing from the 
dictionary of registers that may be printed in the QEMU console.
These are interesting registers when debugging.  With this patch,
the following commands work properly:

 (qemu) print $cfar
 (qemu) print $dar
 (qemu) print $dsisr

Signed-off-by: Tom Musta 
Reviewed-by: Anton Blanchard 
---

Index: b/monitor.c
===
--- a/monitor.c
+++ b/monitor.c
@@ -3186,6 +3186,9 @@ static const MonitorDef monitor_defs[] =
 
 { "srr0", offsetof(CPUPPCState, spr[SPR_SRR0]) },
 { "srr1", offsetof(CPUPPCState, spr[SPR_SRR1]) },
+{ "dar", offsetof(CPUPPCState, spr[SPR_DAR]) },
+{ "dsisr", offsetof(CPUPPCState, spr[SPR_DSISR]) },
+{ "cfar", offsetof(CPUPPCState, spr[SPR_CFAR]) },
 { "sprg0", offsetof(CPUPPCState, spr[SPR_SPRG0]) },
 { "sprg1", offsetof(CPUPPCState, spr[SPR_SPRG1]) },
 { "sprg2", offsetof(CPUPPCState, spr[SPR_SPRG2]) },



[Qemu-devel] [PATCH] target-ppc: Little Endian Correction to Load/Store Vector Element

2013-09-25 Thread Anton Blanchard
From: Tom Musta 

The Load Vector Element (lve*x) and Store Vector Element (stve*x)
instructions not only byte-swap in Little Endian mode, they also
invert the element that is accessed. For example, the RTL for
lvehx contains this:

 eb <-- EA[60:63]
 if Big-Endian byte ordering then
 VRT[8*eb:8*eb+15] <-- MEM(EA,2)
 else
 VRT[112-(8*eb):127-(8*eb)] <-- MEM(EA,2)

This patch adds the element inversion, as described in the last line
of the RTL.

Signed-off-by: Tom Musta 
Reviewed-by: Anton Blanchard 
---

Index: b/target-ppc/mem_helper.c
===
--- a/target-ppc/mem_helper.c
+++ b/target-ppc/mem_helper.c
@@ -212,6 +212,7 @@ target_ulong helper_lscbx(CPUPPCState *e
 int index = (addr & 0xf) >> sh; \
 \
 if (msr_le) {   \
+index = n_elems - index - 1;\
 r->element[LO_IDX ? index : (adjust - index)] = \
 swap(access(env, addr));\
 } else {\
@@ -236,6 +237,7 @@ LVE(lvewx, cpu_ldl_data, bswap32, u32)
 int index = (addr & 0xf) >> sh; \
 \
 if (msr_le) {   \
+index = n_elems - index - 1;\
 access(env, addr, swap(r->element[LO_IDX ? index :  \
   (adjust - index)]));  \
 } else {\



Re: [Qemu-devel] PR KVM and TM issues

2016-04-04 Thread Anton Blanchard
Hi Alexey,

> > I can't get an Ubuntu Wily guest to boot on an Ubuntu Wily host in
> > PR KVM mode. The kernel in both cases is 4.2. To reproduce:
> >
> > wget -N 
> > https://cloud-images.ubuntu.com/wily/current/wily-server-cloudimg-ppc64el-disk1.img
> >
> > qemu-system-ppc64 -cpu POWER8 -enable-kvm -machine pseries,kvm-type=PR -m 
> > 4G -nographic -vga none -drive 
> > file=wily-server-cloudimg-ppc64el-disk1.img,if=virtio
> >
> > Should TM work inside a PR KVM guest?
> 
> If I read the kernel code correctly (kvmppc_set_one_reg_hv vs. 
> kvmppc_set_one_reg_pr), no, it should not be expected to work.

I see a couple of issues, patches to follow:

1. QEMU needs to clear the TM feature bit in the ibm,pa-features array
when running in PR KVM mode.

2. Linux needs to clear the user TM feature bits if TM gets disabled
at runtime via the ibm,pa-features bit.

Anton



[Qemu-devel] [PATCH] spapr: Don't set the TM ibm, pa-features bit in PR KVM mode

2016-04-04 Thread Anton Blanchard
We don't support transactional memory in PR KVM, so don't tell
the OS that we do.

Signed-off-by: Anton Blanchard 
---

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index e7be21e..538bd87 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -696,6 +696,12 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, 
int offset,
 } else /* env->mmu_model == POWERPC_MMU_2_07 */ {
 pa_features = pa_features_207;
 pa_size = sizeof(pa_features_207);
+
+/* Don't enable TM in PR KVM mode */
+if (kvm_enabled() &&
+kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
+pa_features[24] &= ~0x80;
+}
 }
 if (env->ci_large_pages) {
 pa_features[3] |= 0x20;



[Qemu-devel] [PATCH] powerpc: Clear user CPU feature bits if TM is disabled at runtime

2016-04-04 Thread Anton Blanchard
In check_cpu_pa_features() we check a number of bits in the
ibm,pa-features array and set and clear CPU features based on what
we find. One of these bits is CPU_FTR_TM, the transactional memory
feature bit.

If this does disable TM at runtime, then we need to tell userspace
about it by clearing the user CPU feature bits.

Without this patch userspace processes will think they can execute
TM instructions and get killed when they try.

Signed-off-by: Anton Blanchard 
Cc: sta...@vger.kernel.org
---

Michael I've added stable here because I'm seeing this on a number
of distros and would like to get it backported, but I'll leave it up
to you if it should go there.

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index f98be83..98c6c86 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -822,4 +822,18 @@ static int __init disable_hardlockup_detector(void)
return 0;
 }
 early_initcall(disable_hardlockup_detector);
+
+static int __init update_cpu_user_features(void)
+{
+   /*
+* Firmware might have disabled TM by clearing the relevant
+* bit in the ibm,pa-features array. In this case we need to
+* tell userspace.
+*/
+   if (!cpu_has_feature(CPU_FTR_TM))
+   cur_cpu_spec->cpu_user_features2 &= 
~(PPC_FEATURE2_HTM|PPC_FEATURE2_HTM_NOSC);
+
+   return 0;
+}
+early_initcall(update_cpu_user_features);
 #endif



[Qemu-devel] [PATCH 1/3] powerpc: scan_features() updates incorrect bits

2016-04-14 Thread Anton Blanchard
The real LE feature entry in the ibm_pa_feature struct has the
wrong number of elements. Instead of checking for byte 5, bit 0,
we check for byte 0, bit 0, and we also incorrectly update cpu user
feature bit 5.

Fixes: 44ae3ab3358e ("powerpc: Free up some CPU feature bits by moving out 
MMU-related features")
Signed-off-by: Anton Blanchard 
Cc: sta...@vger.kernel.org
---
 arch/powerpc/kernel/prom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 7030b03..9a3a7c6 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -158,7 +158,7 @@ static struct ibm_pa_feature {
{CPU_FTR_NOEXECUTE, 0, 0,   0, 6, 0},
{CPU_FTR_NODSISRALIGN, 0, 0,1, 1, 1},
{0, MMU_FTR_CI_LARGE_PAGE, 0,   1, 2, 0},
-   {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0},
+   {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 0, 5, 0, 0},
/*
 * If the kernel doesn't support TM (ie. 
CONFIG_PPC_TRANSACTIONAL_MEM=n),
 * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP
-- 
2.7.4




[Qemu-devel] [PATCH 2/3] powerpc: Update cpu_user_features2 in scan_features()

2016-04-14 Thread Anton Blanchard
scan_features() updates cpu_user_features but not cpu_user_features2.

Amongst other things, cpu_user_features2 contains the user TM feature
bits which we must keep in sync with the kernel TM feature bit.

Signed-off-by: Anton Blanchard 
Cc: sta...@vger.kernel.org
---
 arch/powerpc/kernel/prom.c | 19 +++
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 9a3a7c6..99709bb 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -148,23 +148,24 @@ static struct ibm_pa_feature {
unsigned long   cpu_features;   /* CPU_FTR_xxx bit */
unsigned long   mmu_features;   /* MMU_FTR_xxx bit */
unsigned intcpu_user_ftrs;  /* PPC_FEATURE_xxx bit */
+   unsigned intcpu_user_ftrs2; /* PPC_FEATURE2_xxx bit */
unsigned char   pabyte; /* byte number in ibm,pa-features */
unsigned char   pabit;  /* bit number (big-endian) */
unsigned char   invert; /* if 1, pa bit set => clear feature */
 } ibm_pa_features[] __initdata = {
-   {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0},
-   {0, 0, PPC_FEATURE_HAS_FPU, 0, 1, 0},
-   {CPU_FTR_CTRL, 0, 0,0, 3, 0},
-   {CPU_FTR_NOEXECUTE, 0, 0,   0, 6, 0},
-   {CPU_FTR_NODSISRALIGN, 0, 0,1, 1, 1},
-   {0, MMU_FTR_CI_LARGE_PAGE, 0,   1, 2, 0},
-   {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 0, 5, 0, 0},
+   {0, 0, PPC_FEATURE_HAS_MMU, 0,  0, 0, 0},
+   {0, 0, PPC_FEATURE_HAS_FPU, 0,  0, 1, 0},
+   {CPU_FTR_CTRL, 0, 0, 0, 0, 3, 0},
+   {CPU_FTR_NOEXECUTE, 0, 0, 0,0, 6, 0},
+   {CPU_FTR_NODSISRALIGN, 0, 0, 0, 1, 1, 1},
+   {0, MMU_FTR_CI_LARGE_PAGE, 0, 0,1, 2, 0},
+   {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 0, 0, 5, 0, 0},
/*
 * If the kernel doesn't support TM (ie. 
CONFIG_PPC_TRANSACTIONAL_MEM=n),
 * we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP
 * which is 0 if the kernel doesn't support TM.
 */
-   {CPU_FTR_TM_COMP, 0, 0, 22, 0, 0},
+   {CPU_FTR_TM_COMP, 0, 0, 0,  22, 0, 0},
 };
 
 static void __init scan_features(unsigned long node, const unsigned char *ftrs,
@@ -195,10 +196,12 @@ static void __init scan_features(unsigned long node, 
const unsigned char *ftrs,
if (bit ^ fp->invert) {
cur_cpu_spec->cpu_features |= fp->cpu_features;
cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs;
+   cur_cpu_spec->cpu_user_features2 |= fp->cpu_user_ftrs2;
cur_cpu_spec->mmu_features |= fp->mmu_features;
} else {
cur_cpu_spec->cpu_features &= ~fp->cpu_features;
cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs;
+   cur_cpu_spec->cpu_user_features2 &= ~fp->cpu_user_ftrs2;
cur_cpu_spec->mmu_features &= ~fp->mmu_features;
}
}
-- 
2.7.4




[Qemu-devel] [PATCH 3/3] powerpc: Update TM user feature bits in scan_features()

2016-04-14 Thread Anton Blanchard
We need to update the user TM feature bits (PPC_FEATURE2_HTM and
PPC_FEATURE2_HTM) to mirror what we do with the kernel TM feature
bit.

At the moment, if firmware reports TM is not available we turn off
the kernel TM feature bit but leave the userspace ones on. Userspace
thinks it can execute TM instructions and it dies trying.

This (together with a QEMU patch) fixes PR KVM, which doesn't currently
support TM.

Signed-off-by: Anton Blanchard 
Cc: sta...@vger.kernel.org
---
 arch/powerpc/kernel/prom.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 99709bb..5beffd7 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -161,11 +161,12 @@ static struct ibm_pa_feature {
{0, MMU_FTR_CI_LARGE_PAGE, 0, 0,1, 2, 0},
{CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 0, 0, 5, 0, 0},
/*
-* If the kernel doesn't support TM (ie. 
CONFIG_PPC_TRANSACTIONAL_MEM=n),
-* we don't want to turn on CPU_FTR_TM here, so we use CPU_FTR_TM_COMP
-* which is 0 if the kernel doesn't support TM.
+* If the kernel doesn't support TM (ie CONFIG_PPC_TRANSACTIONAL_MEM=n),
+* we don't want to turn on TM here, so we use the *_COMP versions
+* which are 0 if the kernel doesn't support TM.
 */
-   {CPU_FTR_TM_COMP, 0, 0, 0,  22, 0, 0},
+   {CPU_FTR_TM_COMP, 0, 0,
+PPC_FEATURE2_HTM_COMP|PPC_FEATURE2_HTM_NOSC_COMP, 22, 0, 0},
 };
 
 static void __init scan_features(unsigned long node, const unsigned char *ftrs,
-- 
2.7.4




[Qemu-devel] [PATCH v2] spapr: Don't set the TM ibm, pa-features bit in PR KVM mode

2016-04-29 Thread Anton Blanchard
We don't support transactional memory in PR KVM, so don't tell
the OS that we do.

Signed-off-by: Anton Blanchard 
---

v2: Fix build with CONFIG_KVM disabled, noticed by Alex.

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index b69995e..dc3e3c9 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -696,6 +696,14 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, 
int offset,
 } else /* env->mmu_model == POWERPC_MMU_2_07 */ {
 pa_features = pa_features_207;
 pa_size = sizeof(pa_features_207);
+
+#ifdef CONFIG_KVM
+/* Don't enable TM in PR KVM mode */
+if (kvm_enabled() &&
+kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
+pa_features[24] &= ~0x80;
+}
+#endif
 }
 if (env->ci_large_pages) {
 pa_features[3] |= 0x20;



[Qemu-devel] [PATCH 1/2] Add PowerPC AT_HWCAP2 definitions

2016-06-07 Thread Anton Blanchard
From: Anton Blanchard 

We need the PPC_FEATURE2_HAS_HTM bit in a subsequent patch, so
add the PowerPC AT_HWCAP2 definitions.

Signed-off-by: Anton Blanchard 
---

diff --git a/include/elf.h b/include/elf.h
index 28d448b..8533b2a 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -477,6 +477,19 @@ typedef struct {
 #define PPC_FEATURE_TRUE_LE 0x0002
 #define PPC_FEATURE_PPC_LE  0x0001
 
+/* Bits present in AT_HWCAP2 for PowerPC.  */
+
+#define PPC_FEATURE2_ARCH_2_07  0x8000
+#define PPC_FEATURE2_HAS_HTM0x4000
+#define PPC_FEATURE2_HAS_DSCR   0x2000
+#define PPC_FEATURE2_HAS_EBB0x1000
+#define PPC_FEATURE2_HAS_ISEL   0x0800
+#define PPC_FEATURE2_HAS_TAR0x0400
+#define PPC_FEATURE2_HAS_VEC_CRYPTO 0x0200
+#define PPC_FEATURE2_HTM_NOSC   0x0100
+#define PPC_FEATURE2_ARCH_3_00  0x0080
+#define PPC_FEATURE2_HAS_IEEE1280x0040
+
 /* Bits present in AT_HWCAP for Sparc.  */
 
 #define HWCAP_SPARC_FLUSH   0x0001




[Qemu-devel] [PATCH 2/2] spapr: Better handling of ibm, pa-features TM bit

2016-06-07 Thread Anton Blanchard
From: Anton Blanchard 

There are a few issues with our handling of the ibm,pa-features
TM bit:

- We don't support transactional memory in PR KVM, so don't tell
  the OS that we do.

- In full emulation we have a minimal implementation of TM that always
  fails, so for performance reasons lets not tell the OS that we
  support it either.

- In HV KVM mode, we should mirror the host TM enabled state by
  looking at the AT_HWCAP2 bit.

Signed-off-by: Anton Blanchard 
---

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 0636642..c403fbb 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -620,7 +620,7 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, 
int offset,
 0xf6, 0x1f, 0xc7, 0xc0, 0x80, 0xf0,
 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
 0x00, 0x00, 0x00, 0x00, 0x80, 0x00,
-0x80, 0x00, 0x80, 0x00, 0x80, 0x00 };
+0x80, 0x00, 0x80, 0x00, 0x00, 0x00 };
 uint8_t *pa_features;
 size_t pa_size;
 
@@ -697,6 +697,19 @@ static void spapr_populate_cpu_dt(CPUState *cs, void *fdt, 
int offset,
 } else /* env->mmu_model == POWERPC_MMU_2_07 */ {
 pa_features = pa_features_207;
 pa_size = sizeof(pa_features_207);
+
+#ifdef CONFIG_KVM
+/* Only enable TM in HV KVM mode */
+if (kvm_enabled() &&
+!kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
+unsigned long hwcap2 = qemu_getauxval(AT_HWCAP2);
+
+/* Guest should inherit host TM enabled bit */
+if (hwcap2 & PPC_FEATURE2_HAS_HTM) {
+pa_features[24] |= 0x80;
+}
+}
+#endif
 }
 if (env->ci_large_pages) {
 pa_features[3] |= 0x20;




Re: [Qemu-devel] [PULL 03/13] target-ppc: Use 32-bit rotate instead of deposit + 64-bit rotate

2016-06-15 Thread Anton Blanchard
Hi,

> From: Richard Henderson 
> 
> A 32-bit rotate insn is more common on hosts than a deposit insn,
> and if the host has neither the result is truely horrific.
> 
> At the same time, tidy up the temporaries within these functions,
> drop the over-use of "likely", drop some checks for identity that
> will also be checked by tcg-op.c functions, and special case mask
> without rotate within rlwinm.

This breaks masks that wrap:

li  r3,-1
li  r4,-1
rlwnm   r3,r3,r4,22,8

We expect:

ff8003ff

But get:

ff8003ff

Anton

> Signed-off-by: Richard Henderson 
> Signed-off-by: David Gibson 
> ---
>  target-ppc/translate.c | 172
> - 1 file changed, 70
> insertions(+), 102 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 3ea6625..b392ecc 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -1610,141 +1610,109 @@ static void gen_cntlzd(DisasContext *ctx)
>  /* rlwimi & rlwimi. */
>  static void gen_rlwimi(DisasContext *ctx)
>  {
> -uint32_t mb, me, sh;
> -
> -mb = MB(ctx->opcode);
> -me = ME(ctx->opcode);
> -sh = SH(ctx->opcode);
> -if (likely(sh == (31-me) && mb <= me)) {
> -tcg_gen_deposit_tl(cpu_gpr[rA(ctx->opcode)],
> cpu_gpr[rA(ctx->opcode)],
> -   cpu_gpr[rS(ctx->opcode)], sh, me - mb +
> 1);
> +TCGv t_ra = cpu_gpr[rA(ctx->opcode)];
> +TCGv t_rs = cpu_gpr[rS(ctx->opcode)];
> +uint32_t sh = SH(ctx->opcode);
> +uint32_t mb = MB(ctx->opcode);
> +uint32_t me = ME(ctx->opcode);
> +
> +if (sh == (31-me) && mb <= me) {
> +tcg_gen_deposit_tl(t_ra, t_ra, t_rs, sh, me - mb + 1);
>  } else {
>  target_ulong mask;
> +TCGv_i32 t0;
>  TCGv t1;
> -TCGv t0 = tcg_temp_new();
> -#if defined(TARGET_PPC64)
> -tcg_gen_deposit_i64(t0, cpu_gpr[rS(ctx->opcode)],
> -cpu_gpr[rS(ctx->opcode)], 32, 32);
> -tcg_gen_rotli_i64(t0, t0, sh);
> -#else
> -tcg_gen_rotli_i32(t0, cpu_gpr[rS(ctx->opcode)], sh);
> -#endif
> +
>  #if defined(TARGET_PPC64)
>  mb += 32;
>  me += 32;
>  #endif
>  mask = MASK(mb, me);
> +
> +t0 = tcg_temp_new_i32();
>  t1 = tcg_temp_new();
> -tcg_gen_andi_tl(t0, t0, mask);
> -tcg_gen_andi_tl(t1, cpu_gpr[rA(ctx->opcode)], ~mask);
> -tcg_gen_or_tl(cpu_gpr[rA(ctx->opcode)], t0, t1);
> -tcg_temp_free(t0);
> +tcg_gen_trunc_tl_i32(t0, t_rs);
> +tcg_gen_rotli_i32(t0, t0, sh);
> +tcg_gen_extu_i32_tl(t1, t0);
> +tcg_temp_free_i32(t0);
> +
> +tcg_gen_andi_tl(t1, t1, mask);
> +tcg_gen_andi_tl(t_ra, t_ra, ~mask);
> +tcg_gen_or_tl(t_ra, t_ra, t1);
>  tcg_temp_free(t1);
>  }
> -if (unlikely(Rc(ctx->opcode) != 0))
> -gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
> +if (unlikely(Rc(ctx->opcode) != 0)) {
> +gen_set_Rc0(ctx, t_ra);
> +}
>  }
>  
>  /* rlwinm & rlwinm. */
>  static void gen_rlwinm(DisasContext *ctx)
>  {
> -uint32_t mb, me, sh;
> -
> -sh = SH(ctx->opcode);
> -mb = MB(ctx->opcode);
> -me = ME(ctx->opcode);
> +TCGv t_ra = cpu_gpr[rA(ctx->opcode)];
> +TCGv t_rs = cpu_gpr[rS(ctx->opcode)];
> +uint32_t sh = SH(ctx->opcode);
> +uint32_t mb = MB(ctx->opcode);
> +uint32_t me = ME(ctx->opcode);
>  
> -if (likely(mb == 0 && me == (31 - sh))) {
> -if (likely(sh == 0)) {
> -tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)],
> cpu_gpr[rS(ctx->opcode)]);
> -} else {
> -TCGv t0 = tcg_temp_new();
> -tcg_gen_ext32u_tl(t0, cpu_gpr[rS(ctx->opcode)]);
> -tcg_gen_shli_tl(t0, t0, sh);
> -tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], t0);
> -tcg_temp_free(t0);
> -}
> -} else if (likely(sh != 0 && me == 31 && sh == (32 - mb))) {
> -TCGv t0 = tcg_temp_new();
> -tcg_gen_ext32u_tl(t0, cpu_gpr[rS(ctx->opcode)]);
> -tcg_gen_shri_tl(t0, t0, mb);
> -tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], t0);
> -tcg_temp_free(t0);
> -} else if (likely(mb == 0 && me == 31)) {
> -TCGv_i32 t0 = tcg_temp_new_i32();
> -tcg_gen_trunc_tl_i32(t0, cpu_gpr[rS(ctx->opcode)]);
> -tcg_gen_rotli_i32(t0, t0, sh);
> -tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t0);
> -tcg_temp_free_i32(t0);
> +if (mb == 0 && me == (31 - sh)) {
> +tcg_gen_shli_tl(t_ra, t_rs, sh);
> +tcg_gen_ext32u_tl(t_ra, t_ra);
> +} else if (sh != 0 && me == 31 && sh == (32 - mb)) {
> +tcg_gen_ext32u_tl(t_ra, t_rs);
> +tcg_gen_shri_tl(t_ra, t_ra, mb);
>  } else {
> -TCGv t0 = tcg_temp_new();
> -#if defined(TARGET_PPC64)
> -tcg_gen_deposit_i64(t0, cpu_gpr[rS(ctx->opcode)],
> -cpu_gpr[rS(ctx->opcode)], 32, 32);
> -tcg_gen_rotli_i64(t0, t0, sh);
> -#else
> - 

Re: [Qemu-devel] [PULL 03/13] target-ppc: Use 32-bit rotate instead of deposit + 64-bit rotate

2016-06-17 Thread Anton Blanchard
Hi rth,

> > Bother.  I've tentatively put a revert into ppc-for-2.7.  Richard,
> > do you have a better idea how to fix it?  
> 
> Please try the following.

Thanks! This passes my tests. Feel free to add:

Tested-by: Anton Blanchard 

Anton



Re: [Qemu-devel] [PULL 03/13] target-ppc: Use 32-bit rotate instead of deposit + 64-bit rotate

2016-06-17 Thread Anton Blanchard
Hi,

> > > Bother.  I've tentatively put a revert into ppc-for-2.7.  Richard,
> > > do you have a better idea how to fix it?
> > 
> > Please try the following.  
> 
> Thanks! This passes my tests. Feel free to add:
> 
> Tested-by: Anton Blanchard 

Actually I think I've found a problem:

lis r4,0x7fff@h
ori r4,r4,0x7fff@l
rlwinm  r3,r4,0,25,1

32 bit rotate is defined as a 64 bit rotate of 2 copies of the 32 bit
value, so we expect 0x7fff407f, but get 0x407f.

Not sure if anything out there depends on it though.

Anton




Re: [Qemu-devel] [PATCH v2] target-ppc: Fix rlwimi, rlwinm, rlwnm

2016-06-18 Thread Anton Blanchard
Hi rth,

> In 63ae0915f8ec, I arranged to use a 32-bit rotate, without
> considering the effect of a mask value that wraps around to
> the high bits of the word.

Thanks, that passes my tests.

Tested-by: Anton Blanchard 

Anton

> Signed-off-by: Richard Henderson 
> ---
>  target-ppc/translate.c | 73
> +++--- 1 file changed, 51
> insertions(+), 22 deletions(-)
> 
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index b689475..23bc054 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -1636,7 +1636,6 @@ static void gen_rlwimi(DisasContext *ctx)
>  tcg_gen_deposit_tl(t_ra, t_ra, t_rs, sh, me - mb + 1);
>  } else {
>  target_ulong mask;
> -TCGv_i32 t0;
>  TCGv t1;
>  
>  #if defined(TARGET_PPC64)
> @@ -1645,12 +1644,21 @@ static void gen_rlwimi(DisasContext *ctx)
>  #endif
>  mask = MASK(mb, me);
>  
> -t0 = tcg_temp_new_i32();
>  t1 = tcg_temp_new();
> -tcg_gen_trunc_tl_i32(t0, t_rs);
> -tcg_gen_rotli_i32(t0, t0, sh);
> -tcg_gen_extu_i32_tl(t1, t0);
> -tcg_temp_free_i32(t0);
> +if (mask <= 0xu) {
> +TCGv_i32 t0 = tcg_temp_new_i32();
> +tcg_gen_trunc_tl_i32(t0, t_rs);
> +tcg_gen_rotli_i32(t0, t0, sh);
> +tcg_gen_extu_i32_tl(t1, t0);
> +tcg_temp_free_i32(t0);
> +} else {
> +#if defined(TARGET_PPC64)
> +tcg_gen_deposit_i64(t1, t_rs, t_rs, 32, 32);
> +tcg_gen_rotli_i64(t1, t1, sh);
> +#else
> +g_assert_not_reached();
> +#endif
> +}
>  
>  tcg_gen_andi_tl(t1, t1, mask);
>  tcg_gen_andi_tl(t_ra, t_ra, ~mask);
> @@ -1678,20 +1686,28 @@ static void gen_rlwinm(DisasContext *ctx)
>  tcg_gen_ext32u_tl(t_ra, t_rs);
>  tcg_gen_shri_tl(t_ra, t_ra, mb);
>  } else {
> +target_ulong mask;
>  #if defined(TARGET_PPC64)
>  mb += 32;
>  me += 32;
>  #endif
> -if (sh == 0) {
> -tcg_gen_andi_tl(t_ra, t_rs, MASK(mb, me));
> -} else {
> -TCGv_i32 t0 = tcg_temp_new_i32();
> +mask = MASK(mb, me);
>  
> +if (mask <= 0xu) {
> +TCGv_i32 t0 = tcg_temp_new_i32();
>  tcg_gen_trunc_tl_i32(t0, t_rs);
>  tcg_gen_rotli_i32(t0, t0, sh);
> -tcg_gen_andi_i32(t0, t0, MASK(mb, me));
> +tcg_gen_andi_i32(t0, t0, mask);
>  tcg_gen_extu_i32_tl(t_ra, t0);
>  tcg_temp_free_i32(t0);
> +} else {
> +#if defined(TARGET_PPC64)
> +tcg_gen_deposit_i64(t_ra, t_rs, t_rs, 32, 32);
> +tcg_gen_rotli_i64(t_ra, t_ra, sh);
> +tcg_gen_andi_i64(t_ra, t_ra, mask);
> +#else
> +g_assert_not_reached();
> +#endif
>  }
>  }
>  if (unlikely(Rc(ctx->opcode) != 0)) {
> @@ -1707,24 +1723,37 @@ static void gen_rlwnm(DisasContext *ctx)
>  TCGv t_rb = cpu_gpr[rB(ctx->opcode)];
>  uint32_t mb = MB(ctx->opcode);
>  uint32_t me = ME(ctx->opcode);
> -TCGv_i32 t0, t1;
> +target_ulong mask;
>  
>  #if defined(TARGET_PPC64)
>  mb += 32;
>  me += 32;
>  #endif
> +mask = MASK(mb, me);
>  
> -t0 = tcg_temp_new_i32();
> -t1 = tcg_temp_new_i32();
> -tcg_gen_trunc_tl_i32(t0, t_rb);
> -tcg_gen_trunc_tl_i32(t1, t_rs);
> -tcg_gen_andi_i32(t0, t0, 0x1f);
> -tcg_gen_rotl_i32(t1, t1, t0);
> -tcg_temp_free_i32(t0);
> +if (mask <= 0xu) {
> +TCGv_i32 t0 = tcg_temp_new_i32();
> +TCGv_i32 t1 = tcg_temp_new_i32();
> +tcg_gen_trunc_tl_i32(t0, t_rb);
> +tcg_gen_trunc_tl_i32(t1, t_rs);
> +tcg_gen_andi_i32(t0, t0, 0x1f);
> +tcg_gen_rotl_i32(t1, t1, t0);
> +tcg_gen_extu_i32_tl(t_ra, t1);
> +tcg_temp_free_i32(t0);
> +tcg_temp_free_i32(t1);
> +} else {
> +#if defined(TARGET_PPC64)
> +TCGv_i64 t0 = tcg_temp_new_i64();
> +tcg_gen_andi_i64(t0, t_rb, 0x1f);
> +tcg_gen_deposit_i64(t_ra, t_rs, t_rs, 32, 32);
> +tcg_gen_rotl_i64(t_ra, t_ra, t0);
> +tcg_temp_free_i64(t0);
> +#else
> +g_assert_not_reached();
> +#endif
> +}
>  
> -tcg_gen_andi_i32(t1, t1, MASK(mb, me));
> -tcg_gen_extu_i32_tl(t_ra, t1);
> -tcg_temp_free_i32(t1);
> +tcg_gen_andi_tl(t_ra, t_ra, mask);
>  
>  if (unlikely(Rc(ctx->opcode) != 0)) {
>  gen_set_Rc0(ctx, t_ra);




[Qemu-devel] [PATCH 1/9] target/ppc: Fix xvxsigdp

2019-05-06 Thread Anton Blanchard
Fix a typo in xvxsigdp where we put both results into the lower
doubleword.

Fixes: dd977e4f45cb ("target/ppc: Optimize x[sv]xsigdp using deposit_i64()")
Signed-off-by: Anton Blanchard 
---
 target/ppc/translate/vsx-impl.inc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/ppc/translate/vsx-impl.inc.c 
b/target/ppc/translate/vsx-impl.inc.c
index 11d9b75d01..4d8ca7cf32 100644
--- a/target/ppc/translate/vsx-impl.inc.c
+++ b/target/ppc/translate/vsx-impl.inc.c
@@ -1820,7 +1820,7 @@ static void gen_xvxsigdp(DisasContext *ctx)
 tcg_gen_movi_i64(t0, 0x0010);
 tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, zr, zr, t0);
 tcg_gen_movcond_i64(TCG_COND_EQ, t0, exp, nan, zr, t0);
-tcg_gen_deposit_i64(xth, t0, xbl, 0, 52);
+tcg_gen_deposit_i64(xtl, t0, xbl, 0, 52);
 set_cpu_vsrl(xT(ctx->opcode), xtl);
 
 tcg_temp_free_i64(t0);
-- 
2.20.1




[Qemu-devel] [PATCH 3/9] target/ppc: Fix xxbrq, xxbrw

2019-05-06 Thread Anton Blanchard
Fix a typo in xxbrq and xxbrw where we put both results into the lower
doubleword.

Fixes: 8b3b2d75c7c0 ("introduce get_cpu_vsr{l,h}() and set_cpu_vsr{l,h}() 
helpers for VSR register access")
Signed-off-by: Anton Blanchard 
---
 target/ppc/translate/vsx-impl.inc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/ppc/translate/vsx-impl.inc.c 
b/target/ppc/translate/vsx-impl.inc.c
index d050cc03ed..05b75105be 100644
--- a/target/ppc/translate/vsx-impl.inc.c
+++ b/target/ppc/translate/vsx-impl.inc.c
@@ -1192,7 +1192,7 @@ static void gen_xxbrq(DisasContext *ctx)
 tcg_gen_bswap64_i64(xtl, xbh);
 set_cpu_vsrl(xT(ctx->opcode), xtl);
 tcg_gen_mov_i64(xth, t0);
-set_cpu_vsrl(xT(ctx->opcode), xth);
+set_cpu_vsrh(xT(ctx->opcode), xth);
 
 tcg_temp_free_i64(t0);
 tcg_temp_free_i64(xth);
@@ -1220,7 +1220,7 @@ static void gen_xxbrw(DisasContext *ctx)
 get_cpu_vsrl(xbl, xB(ctx->opcode));
 
 gen_bswap32x4(xth, xtl, xbh, xbl);
-set_cpu_vsrl(xT(ctx->opcode), xth);
+set_cpu_vsrh(xT(ctx->opcode), xth);
 set_cpu_vsrl(xT(ctx->opcode), xtl);
 
 tcg_temp_free_i64(xth);
-- 
2.20.1




[Qemu-devel] [PATCH 2/9] target/ppc: Fix xxspltib

2019-05-06 Thread Anton Blanchard
xxspltib raises a VMX or a VSX exception depending on the register
set it is operating on. We had a check, but it was backwards.

Fixes: f113283525a4 ("target-ppc: add xxspltib instruction")
Signed-off-by: Anton Blanchard 
---
 target/ppc/translate/vsx-impl.inc.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/target/ppc/translate/vsx-impl.inc.c 
b/target/ppc/translate/vsx-impl.inc.c
index 4d8ca7cf32..d050cc03ed 100644
--- a/target/ppc/translate/vsx-impl.inc.c
+++ b/target/ppc/translate/vsx-impl.inc.c
@@ -1355,16 +1355,17 @@ static void gen_xxspltib(DisasContext *ctx)
 int rt = xT(ctx->opcode);
 
 if (rt < 32) {
-if (unlikely(!ctx->altivec_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_VPU);
+if (unlikely(!ctx->vsx_enabled)) {
+gen_exception(ctx, POWERPC_EXCP_VSXU);
 return;
 }
 } else {
-if (unlikely(!ctx->vsx_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_VSXU);
+if (unlikely(!ctx->altivec_enabled)) {
+gen_exception(ctx, POWERPC_EXCP_VPU);
 return;
 }
 }
+printf("XT %x IMM8 %x\n", rt, uim8);
 tcg_gen_gvec_dup8i(vsr_full_offset(rt), 16, 16, uim8);
 }
 
-- 
2.20.1




[Qemu-devel] [PATCH 5/9] target/ppc: Fix xvabs[sd]p, xvnabs[sd]p, xvneg[sd]p, xvcpsgn[sd]p

2019-05-06 Thread Anton Blanchard
We were using set_cpu_vsr* when we should have used set_cpu_vsrl*

Fixes: 8b3b2d75c7c0 ("introduce get_cpu_vsr{l,h}() and set_cpu_vsr{l,h}() 
helpers for VSR register access")
Signed-off-by: Anton Blanchard 
---
 target/ppc/translate/vsx-impl.inc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/ppc/translate/vsx-impl.inc.c 
b/target/ppc/translate/vsx-impl.inc.c
index c13f84e745..0a48020e3b 100644
--- a/target/ppc/translate/vsx-impl.inc.c
+++ b/target/ppc/translate/vsx-impl.inc.c
@@ -859,8 +859,8 @@ static void glue(gen_, name)(DisasContext *ctx) 
 \
 xbh = tcg_temp_new_i64();\
 xbl = tcg_temp_new_i64();\
 sgm = tcg_temp_new_i64();\
-set_cpu_vsrh(xB(ctx->opcode), xbh);  \
-set_cpu_vsrl(xB(ctx->opcode), xbl);  \
+get_cpu_vsrh(xbh, xB(ctx->opcode));  \
+get_cpu_vsrl(xbl, xB(ctx->opcode));  \
 tcg_gen_movi_i64(sgm, sgn_mask); \
 switch (op) {\
 case OP_ABS: {   \
-- 
2.20.1




[Qemu-devel] [PATCH 7/9] target/ppc: Fix vrlwmi and vrlwnm

2019-05-06 Thread Anton Blanchard
We should only look at 5 bits of each byte, not 6.

Fixes: 3e00884f4e9f ("target-ppc: add vrldnmi and vrlwmi instructions")
Signed-off-by: Anton Blanchard 
---
 target/ppc/int_helper.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index fd715b4076..111586c981 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1652,7 +1652,7 @@ void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, 
ppc_avr_t *b)
 }
 }
 
-#define VRLMI(name, size, element, insert)\
+#define VRLMI(name, size, element, insert, modifier_bits) \
 void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
 { \
 int i;\
@@ -1662,9 +1662,9 @@ void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t 
*b)  \
 uint##size##_t src3 = r->element[i];  \
 uint##size##_t begin, end, shift, mask, rot_val;  \
   \
-shift = extract##size(src2, 0, 6);\
-end   = extract##size(src2, 8, 6);\
-begin = extract##size(src2, 16, 6);   \
+shift = extract##size(src2, 0, modifier_bits);\
+end   = extract##size(src2, 8, modifier_bits);\
+begin = extract##size(src2, 16, modifier_bits);   \
 rot_val = rol##size(src1, shift); \
 mask = mask_u##size(begin, end);  \
 if (insert) { \
@@ -1675,10 +1675,10 @@ void helper_##name(ppc_avr_t *r, ppc_avr_t *a, 
ppc_avr_t *b)  \
 } \
 }
 
-VRLMI(vrldmi, 64, u64, 1);
-VRLMI(vrlwmi, 32, u32, 1);
-VRLMI(vrldnm, 64, u64, 0);
-VRLMI(vrlwnm, 32, u32, 0);
+VRLMI(vrldmi, 64, u64, 1, 6);
+VRLMI(vrlwmi, 32, u32, 1, 5);
+VRLMI(vrldnm, 64, u64, 0, 6);
+VRLMI(vrlwnm, 32, u32, 0, 5);
 
 void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
  ppc_avr_t *c)
-- 
2.20.1




[Qemu-devel] [PATCH 4/9] target/ppc: Fix lxvw4x, lxvh8x and lxvb16x

2019-05-06 Thread Anton Blanchard
During the conversion these instructions were incorrectly treated as
stores. We need to use set_cpu_vsr* and not get_cpu_vsr*.

Fixes: 8b3b2d75c7c0 ("introduce get_cpu_vsr{l,h}() and set_cpu_vsr{l,h}() 
helpers for VSR register access")
Signed-off-by: Anton Blanchard 
---
 target/ppc/translate/vsx-impl.inc.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/target/ppc/translate/vsx-impl.inc.c 
b/target/ppc/translate/vsx-impl.inc.c
index 05b75105be..c13f84e745 100644
--- a/target/ppc/translate/vsx-impl.inc.c
+++ b/target/ppc/translate/vsx-impl.inc.c
@@ -102,8 +102,7 @@ static void gen_lxvw4x(DisasContext *ctx)
 }
 xth = tcg_temp_new_i64();
 xtl = tcg_temp_new_i64();
-get_cpu_vsrh(xth, xT(ctx->opcode));
-get_cpu_vsrl(xtl, xT(ctx->opcode));
+
 gen_set_access_type(ctx, ACCESS_INT);
 EA = tcg_temp_new();
 
@@ -126,6 +125,8 @@ static void gen_lxvw4x(DisasContext *ctx)
 tcg_gen_addi_tl(EA, EA, 8);
 tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
 }
+set_cpu_vsrh(xT(ctx->opcode), xth);
+set_cpu_vsrl(xT(ctx->opcode), xtl);
 tcg_temp_free(EA);
 tcg_temp_free_i64(xth);
 tcg_temp_free_i64(xtl);
@@ -185,8 +186,6 @@ static void gen_lxvh8x(DisasContext *ctx)
 }
 xth = tcg_temp_new_i64();
 xtl = tcg_temp_new_i64();
-get_cpu_vsrh(xth, xT(ctx->opcode));
-get_cpu_vsrl(xtl, xT(ctx->opcode));
 gen_set_access_type(ctx, ACCESS_INT);
 
 EA = tcg_temp_new();
@@ -197,6 +196,8 @@ static void gen_lxvh8x(DisasContext *ctx)
 if (ctx->le_mode) {
 gen_bswap16x8(xth, xtl, xth, xtl);
 }
+set_cpu_vsrh(xT(ctx->opcode), xth);
+set_cpu_vsrl(xT(ctx->opcode), xtl);
 tcg_temp_free(EA);
 tcg_temp_free_i64(xth);
 tcg_temp_free_i64(xtl);
@@ -214,14 +215,14 @@ static void gen_lxvb16x(DisasContext *ctx)
 }
 xth = tcg_temp_new_i64();
 xtl = tcg_temp_new_i64();
-get_cpu_vsrh(xth, xT(ctx->opcode));
-get_cpu_vsrl(xtl, xT(ctx->opcode));
 gen_set_access_type(ctx, ACCESS_INT);
 EA = tcg_temp_new();
 gen_addr_reg_index(ctx, EA);
 tcg_gen_qemu_ld_i64(xth, EA, ctx->mem_idx, MO_BEQ);
 tcg_gen_addi_tl(EA, EA, 8);
 tcg_gen_qemu_ld_i64(xtl, EA, ctx->mem_idx, MO_BEQ);
+set_cpu_vsrh(xT(ctx->opcode), xth);
+set_cpu_vsrl(xT(ctx->opcode), xtl);
 tcg_temp_free(EA);
 tcg_temp_free_i64(xth);
 tcg_temp_free_i64(xtl);
-- 
2.20.1




[Qemu-devel] [PATCH 6/9] target/ppc: Fix vslv and vsrv

2019-05-06 Thread Anton Blanchard
vslv and vsrv are broken on little endian, we append 00 to the
high byte not the low byte. Fix it by using the VsrB() accessor.

Signed-off-by: Anton Blanchard 
---
 target/ppc/int_helper.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index f6a088ac08..fd715b4076 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -1800,10 +1800,10 @@ void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t 
*b)
 
 size = ARRAY_SIZE(r->u8);
 for (i = 0; i < size; i++) {
-shift = b->u8[i] & 0x7; /* extract shift value */
-bytes = (a->u8[i] << 8) + /* extract adjacent bytes */
-(((i + 1) < size) ? a->u8[i + 1] : 0);
-r->u8[i] = (bytes << shift) >> 8;   /* shift and store result */
+shift = b->VsrB(i) & 0x7; /* extract shift value */
+bytes = (a->VsrB(i) << 8) +   /* extract adjacent bytes */
+(((i + 1) < size) ? a->VsrB(i + 1) : 0);
+r->VsrB(i) = (bytes << shift) >> 8;   /* shift and store result */
 }
 }
 
@@ -1818,10 +1818,10 @@ void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t 
*b)
  * order will guarantee that computed result is not fed back.
  */
 for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
-shift = b->u8[i] & 0x7; /* extract shift value */
-bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
+shift = b->VsrB(i) & 0x7;   /* extract shift value */
+bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
 /* extract adjacent bytes */
-r->u8[i] = (bytes >> shift) & 0xFF; /* shift and store result */
+r->VsrB(i) = (bytes >> shift) & 0xFF;   /* shift and store result */
 }
 }
 
-- 
2.20.1




[Qemu-devel] [PATCH 8/9] target/ppc: Fix dtstsfi and dtstsfiq

2019-05-06 Thread Anton Blanchard
The immediate field is 6 bits, not 5.

Fixes: 217f6b88058f ("target-ppc: add dtstsfi[q] instructions")
Signed-off-by: Anton Blanchard 
---
 target/ppc/internal.h   | 2 ++
 target/ppc/translate/dfp-impl.inc.c | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/target/ppc/internal.h b/target/ppc/internal.h
index fb6f64ed1e..4719369cc5 100644
--- a/target/ppc/internal.h
+++ b/target/ppc/internal.h
@@ -123,6 +123,8 @@ static inline uint32_t SPR(uint32_t opcode)
 EXTRACT_SHELPER(SIMM, 0, 16);
 /* 16 bits unsigned immediate value */
 EXTRACT_HELPER(UIMM, 0, 16);
+/* 6 bits unsigned immediate value */
+EXTRACT_HELPER(UIMM6, 16, 6);
 /* 5 bits signed immediate value */
 EXTRACT_SHELPER(SIMM5, 16, 5);
 /* 5 bits signed immediate value */
diff --git a/target/ppc/translate/dfp-impl.inc.c 
b/target/ppc/translate/dfp-impl.inc.c
index 6c556dc2e1..5b01c9239d 100644
--- a/target/ppc/translate/dfp-impl.inc.c
+++ b/target/ppc/translate/dfp-impl.inc.c
@@ -55,7 +55,7 @@ static void gen_##name(DisasContext *ctx) \
 return;   \
 } \
 gen_update_nip(ctx, ctx->base.pc_next - 4);\
-uim = tcg_const_i32(UIMM5(ctx->opcode));  \
+uim = tcg_const_i32(UIMM6(ctx->opcode));  \
 rb = gen_fprp_ptr(rB(ctx->opcode));   \
 gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
   cpu_env, uim, rb);  \
-- 
2.20.1




[Qemu-devel] [PATCH 9/9] target/ppc: Fix vsum2sws

2019-05-06 Thread Anton Blanchard
A recent cleanup changed the pre zeroing of the result from 64 bit
to 32 bit operations:

-result.u64[i] = 0;
+result.VsrW(i) = 0;

This corrupts the result.

Fixes: 60594fea298d ("target/ppc: remove various HOST_WORDS_BIGENDIAN hacks in 
int_helper.c")
Signed-off-by: Anton Blanchard 
---
 target/ppc/int_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 111586c981..b8b3279f71 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -2038,7 +2038,7 @@ void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, 
ppc_avr_t *a, ppc_avr_t *b)
 for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
 int64_t t = (int64_t)b->VsrSW(upper + i * 2);
 
-result.VsrW(i) = 0;
+result.VsrD(i) = 0;
 for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
 t += a->VsrSW(2 * i + j);
 }
-- 
2.20.1




Re: [Qemu-devel] [PATCH 1/9] target/ppc: Fix xvxsigdp

2019-05-06 Thread Anton Blanchard
Hi Alexey,

> Out of curiosity - how did you find this one and (especially) the next
> one - "Fix xxspltib"? Is there some testsuite, or by just looking at
> the code? Thanks,

I'm running test cases and comparing results between QEMU and real
hardware.

Thanks,
Anton



[Qemu-devel] [PATCH v2] target/ppc: Fix xxspltib

2019-05-08 Thread Anton Blanchard
xxspltib raises a VMX or a VSX exception depending on the register
set it is operating on. We had a check, but it was backwards.

Fixes: f113283525a4 ("target-ppc: add xxspltib instruction")
Signed-off-by: Anton Blanchard 
---
 target/ppc/translate/vsx-impl.inc.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/target/ppc/translate/vsx-impl.inc.c 
b/target/ppc/translate/vsx-impl.inc.c
index 4d8ca7cf32..4812a374aa 100644
--- a/target/ppc/translate/vsx-impl.inc.c
+++ b/target/ppc/translate/vsx-impl.inc.c
@@ -1355,13 +1355,13 @@ static void gen_xxspltib(DisasContext *ctx)
 int rt = xT(ctx->opcode);
 
 if (rt < 32) {
-if (unlikely(!ctx->altivec_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_VPU);
+if (unlikely(!ctx->vsx_enabled)) {
+gen_exception(ctx, POWERPC_EXCP_VSXU);
 return;
 }
 } else {
-if (unlikely(!ctx->vsx_enabled)) {
-gen_exception(ctx, POWERPC_EXCP_VSXU);
+if (unlikely(!ctx->altivec_enabled)) {
+gen_exception(ctx, POWERPC_EXCP_VPU);
 return;
 }
 }
-- 
2.20.1




[Qemu-devel] [PATCH] target/ppc: Optimise VSX_LOAD_SCALAR_DS and VSX_VECTOR_LOAD_STORE

2019-05-08 Thread Anton Blanchard
A few small optimisations:

In VSX_LOAD_SCALAR_DS() we can don't need to read the VSR via
get_cpu_vsrh().

Split VSX_VECTOR_LOAD_STORE() into two functions. Loads only need to
write the VSRs (set_cpu_vsr*()) and stores only need to read the VSRs
(get_cpu_vsr*())

Thanks to Mark Cave-Ayland for the suggestions.

Signed-off-by: Anton Blanchard 
---
 target/ppc/translate/vsx-impl.inc.c | 68 -
 1 file changed, 58 insertions(+), 10 deletions(-)

diff --git a/target/ppc/translate/vsx-impl.inc.c 
b/target/ppc/translate/vsx-impl.inc.c
index 4b7627f53b..cdb44b8b70 100644
--- a/target/ppc/translate/vsx-impl.inc.c
+++ b/target/ppc/translate/vsx-impl.inc.c
@@ -228,7 +228,7 @@ static void gen_lxvb16x(DisasContext *ctx)
 tcg_temp_free_i64(xtl);
 }
 
-#define VSX_VECTOR_LOAD_STORE(name, op, indexed)\
+#define VSX_VECTOR_LOAD(name, op, indexed)  \
 static void gen_##name(DisasContext *ctx)   \
 {   \
 int xt; \
@@ -255,8 +255,6 @@ static void gen_##name(DisasContext *ctx)   
\
 }   \
 xth = tcg_temp_new_i64();   \
 xtl = tcg_temp_new_i64();   \
-get_cpu_vsrh(xth, xt);  \
-get_cpu_vsrl(xtl, xt);  \
 gen_set_access_type(ctx, ACCESS_INT);   \
 EA = tcg_temp_new();\
 if (indexed) {  \
@@ -282,10 +280,61 @@ static void gen_##name(DisasContext *ctx) 
  \
 tcg_temp_free_i64(xtl); \
 }
 
-VSX_VECTOR_LOAD_STORE(lxv, ld_i64, 0)
-VSX_VECTOR_LOAD_STORE(stxv, st_i64, 0)
-VSX_VECTOR_LOAD_STORE(lxvx, ld_i64, 1)
-VSX_VECTOR_LOAD_STORE(stxvx, st_i64, 1)
+VSX_VECTOR_LOAD(lxv, ld_i64, 0)
+VSX_VECTOR_LOAD(lxvx, ld_i64, 1)
+
+#define VSX_VECTOR_STORE(name, op, indexed) \
+static void gen_##name(DisasContext *ctx)   \
+{   \
+int xt; \
+TCGv EA;\
+TCGv_i64 xth;   \
+TCGv_i64 xtl;   \
+\
+if (indexed) {  \
+xt = xT(ctx->opcode);   \
+} else {\
+xt = DQxT(ctx->opcode); \
+}   \
+\
+if (xt < 32) {  \
+if (unlikely(!ctx->vsx_enabled)) {  \
+gen_exception(ctx, POWERPC_EXCP_VSXU);  \
+return; \
+}   \
+} else {\
+if (unlikely(!ctx->altivec_enabled)) {  \
+gen_exception(ctx, POWERPC_EXCP_VPU);   \
+return; \
+}   \
+}   \
+xth = tcg_temp_new_i64();   \
+xtl = tcg_temp_new_i64();   \
+get_cpu_vsrh(xth, xt);  \
+get_cpu_vsrl(xtl, xt);  \
+gen_set_access_type(ctx, ACCESS_INT);   \
+EA = tcg_temp_new();\
+if (indexed) {  \
+gen_addr_reg_index(ctx, EA);\
+} else {\
+gen_addr_imm_index(ctx, EA, 0x0F);  \
+}   \
+if (ctx->le_mode) { \
+tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_LEQ);   \
+tcg_gen_addi_tl(EA, EA, 8); \
+tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_LEQ);   \
+} else {\
+tcg_gen_qemu_##op(xth, EA, ctx->mem_idx, MO_BEQ);   \
+tcg_gen_addi_tl(EA, EA, 8); \
+tcg_gen_qemu_##op(xtl, EA, ctx->mem_idx, MO_BEQ);   \
+}   \
+tcg_temp_free(EA);   

Re: [Qemu-devel] [PATCH 4/9] target/ppc: Fix lxvw4x, lxvh8x and lxvb16x

2019-05-08 Thread Anton Blanchard
Hi Mark,

> Following on from this I've just gone through the load/store
> operations once again and spotted two things:
> 
> 
> 1) VSX_LOAD_SCALAR_DS has an extra get_cpu_vsrh() which can be removed
> 
> diff --git a/target/ppc/translate/vsx-impl.inc.c
> b/target/ppc/translate/vsx-impl.inc.c index 11d9b75d01..004ea56c4f
> 100644 --- a/target/ppc/translate/vsx-impl.inc.c
> +++ b/target/ppc/translate/vsx-impl.inc.c
> @@ -329,7 +329,6 @@ static void gen_##name(DisasContext
> *ctx) \
> return;
> \ } \ xth
> = tcg_temp_new_i64(); \
> -get_cpu_vsrh(xth, rD(ctx->opcode) + 32);  \
>  gen_set_access_type(ctx, ACCESS_INT); \
>  EA = tcg_temp_new();  \
>  gen_addr_imm_index(ctx, EA, 0x03);\

Looks good. I also noticed we had two stores that needed to be fixed:

VSX_LOAD_SCALAR_DS(stxsd, st64_i64)
VSX_LOAD_SCALAR_DS(stxssp, st32fs)

> 2) VSX_VECTOR_LOAD_STORE is confusing and should be split into
> separate VSX_VECTOR_LOAD and VSX_VECTOR_STORE macros

Good idea. I also removed (what I assume) are redundant set_cpu_vsr*
and get_cpu_vsr* calls.

> Does that sound reasonable? I'm also thinking that we should consider
> adding a CC to stable for patches 4, 5 and 9 in this series since
> these are genuine regressions.

Fine with me. If David agrees, I'm not sure if he can rebase them or
if I can send them manually if they have been already committed.

Thanks,
Anton



[Qemu-devel] [PATCH v2] target/ppc: Fix xvabs[sd]p, xvnabs[sd]p, xvneg[sd]p, xvcpsgn[sd]p

2019-05-08 Thread Anton Blanchard
We were using set_cpu_vsr*() when we should have used get_cpu_vsr*().

Fixes: 8b3b2d75c7c0 ("introduce get_cpu_vsr{l,h}() and set_cpu_vsr{l,h}() 
helpers for VSR register access")
Signed-off-by: Anton Blanchard 
---
 target/ppc/translate/vsx-impl.inc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/ppc/translate/vsx-impl.inc.c 
b/target/ppc/translate/vsx-impl.inc.c
index b487136d52..4b7627f53b 100644
--- a/target/ppc/translate/vsx-impl.inc.c
+++ b/target/ppc/translate/vsx-impl.inc.c
@@ -859,8 +859,8 @@ static void glue(gen_, name)(DisasContext *ctx) 
 \
 xbh = tcg_temp_new_i64();\
 xbl = tcg_temp_new_i64();\
 sgm = tcg_temp_new_i64();\
-set_cpu_vsrh(xB(ctx->opcode), xbh);  \
-set_cpu_vsrl(xB(ctx->opcode), xbl);  \
+get_cpu_vsrh(xbh, xB(ctx->opcode));  \
+get_cpu_vsrl(xbl, xB(ctx->opcode));  \
 tcg_gen_movi_i64(sgm, sgn_mask); \
 switch (op) {\
 case OP_ABS: {   \
-- 
2.20.1




[Qemu-devel] [PATCH 1/2] powerpc: Fix emulation of mcrf in emulate_step()

2017-06-14 Thread Anton Blanchard
From: Anton Blanchard 

The mcrf emulation code was looking at the CR fields in the reverse
order. It also relied on reserved fields being zero which is somewhat
fragile, so fix that too.

Cc: sta...@vger.kernel.org
Signed-off-by: Anton Blanchard 
---
 arch/powerpc/lib/sstep.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 33117f8a0882..fb84f51b1f0b 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -683,8 +683,10 @@ int analyse_instr(struct instruction_op *op, struct 
pt_regs *regs,
case 19:
switch ((instr >> 1) & 0x3ff) {
case 0: /* mcrf */
-   rd = (instr >> 21) & 0x1c;
-   ra = (instr >> 16) & 0x1c;
+   rd = 7 - ((instr >> 23) & 0x7);
+   ra = 7 - ((instr >> 18) & 0x7);
+   rd *= 4;
+   ra *= 4;
val = (regs->ccr >> ra) & 0xf;
regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd);
goto instr_done;
-- 
2.11.0




[Qemu-devel] [PATCH] target/ppc: Fix size of struct PPCElfPrstatus

2017-04-10 Thread Anton Blanchard
From: Anton Blanchard 

gdb refuses to parse QEMU memory dumps because struct PPCElfPrstatus
is the wrong size. Fix it.

Signed-off-by: Anton Blanchard 
Fixes: e62fbc54d459 ("target-ppc: dump-guest-memory support")
---
 target/ppc/arch_dump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c
index 28d9cc7d79..8e9397aa58 100644
--- a/target/ppc/arch_dump.c
+++ b/target/ppc/arch_dump.c
@@ -50,7 +50,7 @@ struct PPCUserRegStruct {
 struct PPCElfPrstatus {
 char pad1[112];
 struct PPCUserRegStruct pr_reg;
-reg_t pad2[4];
+char pad2[40];
 } QEMU_PACKED;
 
 
-- 
2.11.0




Re: [Qemu-devel] [Qemu-ppc] KVM-PR is broken with current QEMU

2016-09-22 Thread Anton Blanchard
Hi Thomas,

> So if you like, I can try to come up with a small patch series that
> cleans up this mess - and I could also include an updated versions of
> Anton's patch there unless he wants to redo the changes on his own...?

Thanks for looking at this. I'm travelling (stuck in an airport at the
moment) and wont be able to get to this for a few days. If you could
incorporate my fixes that would be great!

From memory we were waiting on KVM_CAP_PPC_HTM, which thanks to Sam is
now upstream in 23528bb21ee2

Anton



Re: [Qemu-devel] [PATCH 2/2] ppc: Fix 64K pages support in full emulation

2016-06-30 Thread Anton Blanchard
Hi,

> From: Benjamin Herrenschmidt 
> 
> We were always advertising only 4K & 16M. Additionally the code wasn't
> properly matching the page size with the PTE content, which meant we
> could potentially hit an incorrect PTE if the guest used multiple
> sizes.
> 
> Finally, honor the CPU capabilities when decoding the size from the
> SLB so we don't try to use 64K pages on 970.
> 
> This still doesn't add support for MPSS (Multiple Page Sizes per
> Segment)

This is causing issues booting an Ubuntu yakety cloud image. I'm
running on a ppc64le box (I don't think it reproduces on x86-64).

cat << EOF > my-user-data
#cloud-config
password: password
chpasswd: { expire: False }
ssh_pwauth: True
EOF

cloud-localds my-seed.img my-user-data

wget -N 
https://cloud-images.ubuntu.com/yakkety/current/yakkety-server-cloudimg-ppc64el.img

qemu-system-ppc64 -M pseries -cpu POWER8 -nographic -vga none -m 4G -drive 
file=test.img -drive file=my-seed.img -net user -net nic

The cloud-init scripts never finish, so the ubuntu user's
password is never updated. With the above cloud config you
should be able to log in with ubuntu/password.

Anton



[Qemu-devel] [PATCH] ppc: Fix xsrdpi, xvrdpi and xvrspi rounding

2016-07-03 Thread Anton Blanchard
From: Anton Blanchard 

xsrdpi, xvrdpi and xvrspi use the round ties away method, not round
nearest even.

Signed-off-by: Anton Blanchard 
---
 target-ppc/fpu_helper.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 4ef893b..d9795d0 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -2689,19 +2689,19 @@ void helper_##op(CPUPPCState *env, uint32_t opcode) 
   \
 helper_float_check_status(env);\
 }
 
-VSX_ROUND(xsrdpi, 1, float64, VsrD(0), float_round_nearest_even, 1)
+VSX_ROUND(xsrdpi, 1, float64, VsrD(0), float_round_ties_away, 1)
 VSX_ROUND(xsrdpic, 1, float64, VsrD(0), FLOAT_ROUND_CURRENT, 1)
 VSX_ROUND(xsrdpim, 1, float64, VsrD(0), float_round_down, 1)
 VSX_ROUND(xsrdpip, 1, float64, VsrD(0), float_round_up, 1)
 VSX_ROUND(xsrdpiz, 1, float64, VsrD(0), float_round_to_zero, 1)
 
-VSX_ROUND(xvrdpi, 2, float64, VsrD(i), float_round_nearest_even, 0)
+VSX_ROUND(xvrdpi, 2, float64, VsrD(i), float_round_ties_away, 0)
 VSX_ROUND(xvrdpic, 2, float64, VsrD(i), FLOAT_ROUND_CURRENT, 0)
 VSX_ROUND(xvrdpim, 2, float64, VsrD(i), float_round_down, 0)
 VSX_ROUND(xvrdpip, 2, float64, VsrD(i), float_round_up, 0)
 VSX_ROUND(xvrdpiz, 2, float64, VsrD(i), float_round_to_zero, 0)
 
-VSX_ROUND(xvrspi, 4, float32, VsrW(i), float_round_nearest_even, 0)
+VSX_ROUND(xvrspi, 4, float32, VsrW(i), float_round_ties_away, 0)
 VSX_ROUND(xvrspic, 4, float32, VsrW(i), FLOAT_ROUND_CURRENT, 0)
 VSX_ROUND(xvrspim, 4, float32, VsrW(i), float_round_down, 0)
 VSX_ROUND(xvrspip, 4, float32, VsrW(i), float_round_up, 0)
-- 
2.7.4




Re: [Qemu-devel] [PATCH] ppc: Fix xsrdpi, xvrdpi and xvrspi rounding

2016-07-04 Thread Anton Blanchard
Hi David,

> I take it float_round_ties_away is the same thing the architecture
> refers to as "round to Nearest Away"?

Yeah. I noticed it when 0.5 got rounded to 0 on QEMU and 1.0 on real
hardware.

Anton



[PATCH] ppc/spapr: Fix 32 bit logical memory block size assumptions

2020-07-14 Thread Anton Blanchard
When testing large LMB sizes (eg 4GB), I found a couple of places
that assume they are 32bit in size.

Signed-off-by: Anton Blanchard 
---
 hw/ppc/spapr.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index a1b06defe6..0ba2526215 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -558,7 +558,8 @@ static int 
spapr_dt_dynamic_reconfiguration_memory(SpaprMachineState *spapr,
 int nb_numa_nodes = machine->numa_state->num_nodes;
 int ret, i, offset;
 uint64_t lmb_size = SPAPR_MEMORY_BLOCK_SIZE;
-uint32_t prop_lmb_size[] = {0, cpu_to_be32(lmb_size)};
+uint32_t prop_lmb_size[] = {cpu_to_be32(lmb_size >> 32),
+cpu_to_be32(lmb_size & 0x)};
 uint32_t *int_buf, *cur_index, buf_len;
 int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1;
 MemoryDeviceInfoList *dimms = NULL;
@@ -899,7 +900,8 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void 
*fdt)
 uint32_t lrdr_capacity[] = {
 cpu_to_be32(max_device_addr >> 32),
 cpu_to_be32(max_device_addr & 0x),
-0, cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE),
+cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE >> 32),
+cpu_to_be32(SPAPR_MEMORY_BLOCK_SIZE & 0x),
 cpu_to_be32(ms->smp.max_cpus / ms->smp.threads),
 };
 uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0);
-- 
2.26.2




Re: [Qemu-devel] [PATCH 4/9] target/ppc: Fix lxvw4x, lxvh8x and lxvb16x

2019-05-21 Thread Anton Blanchard
Hi,

> I've now had a bit of time to look through this and I believe it is
> correct, so:
> 
> Reviewed-by: Mark Cave-Ayland 

Thanks Mark. David: any chance we could get this merged? I can't run a
recent Ubuntu image successfully without it. sshd hangs when I try to
ssh into it.

Thanks,
Anton



[PATCH] target/riscv: Fix vcompress with rvv_ta_all_1s

2024-10-29 Thread Anton Blanchard
vcompress packs vl or less fields into vd, so the tail starts after the
last packed field.

Signed-off-by: Anton Blanchard 
---
 target/riscv/vector_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 072bd444b1..ccb32e6122 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -5132,7 +5132,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void 
*vs2,   \
 } \
 env->vstart = 0;  \
 /* set tail elements to 1s */ \
-vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);  \
+vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \
 }
 
 /* Compress into vd elements of vs2 where vs1 is enabled */
-- 
2.34.1




Re: [CAUTION - External Sender] Re: [PATCH] target/riscv: Fix vcompress with rvv_ta_all_1s

2024-10-29 Thread Anton Blanchard
Hi Alistair,

On Wed, Oct 30, 2024 at 2:39 PM Alistair Francis 
wrote:
> > vcompress packs vl or less fields into vd, so the tail starts after the
> > last packed field.
>
> Is that right?
>
> It's different from every other vector command. Although the wording
> in the spec is very confusing

It is confusing. This thread has some clarification, and we should probably
follow up on the suggestion to improve the ISA wording:

https://github.com/riscv/riscv-v-spec/issues/796

Thanks,
Anton


[PATCH v2] target/riscv: Fix vcompress with rvv_ta_all_1s

2024-10-29 Thread Anton Blanchard
vcompress packs vl or less fields into vd, so the tail starts after the
last packed field. This could be more clearly expressed in the ISA,
but for now this thread helps to explain it:

https://github.com/riscv/riscv-v-spec/issues/796

Signed-off-by: Anton Blanchard 
---
 target/riscv/vector_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 072bd444b1..ccb32e6122 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -5132,7 +5132,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void 
*vs2,   \
 } \
 env->vstart = 0;  \
 /* set tail elements to 1s */ \
-vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz);  \
+vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \
 }
 
 /* Compress into vd elements of vs2 where vs1 is enabled */
-- 
2.34.1




[PATCH v2] target/riscv: Add Tenstorrent Ascalon CPU

2024-11-13 Thread Anton Blanchard
Add a CPU entry for the Tenstorrent Ascalon CPU, a series of 2 wide to
8 wide RV64 cores. More details can be found at
https://tenstorrent.com/ip/tt-ascalon

Signed-off-by: Anton Blanchard 
---
 target/riscv/cpu-qom.h |  1 +
 target/riscv/cpu.c | 67 ++
 2 files changed, 68 insertions(+)

diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h
index 62115375cd..6547642287 100644
--- a/target/riscv/cpu-qom.h
+++ b/target/riscv/cpu-qom.h
@@ -49,6 +49,7 @@
 #define TYPE_RISCV_CPU_SIFIVE_U54   RISCV_CPU_TYPE_NAME("sifive-u54")
 #define TYPE_RISCV_CPU_THEAD_C906   RISCV_CPU_TYPE_NAME("thead-c906")
 #define TYPE_RISCV_CPU_VEYRON_V1RISCV_CPU_TYPE_NAME("veyron-v1")
+#define TYPE_RISCV_CPU_TT_ASCALON   RISCV_CPU_TYPE_NAME("tt-ascalon")
 #define TYPE_RISCV_CPU_HOST RISCV_CPU_TYPE_NAME("host")
 
 OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index f219f0c3b5..8447ad0dfb 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -579,6 +579,72 @@ static void rv64_veyron_v1_cpu_init(Object *obj)
 #endif
 }
 
+/* Tenstorrent Ascalon */
+static void rv64_tt_ascalon_cpu_init(Object *obj)
+{
+CPURISCVState *env = &RISCV_CPU(obj)->env;
+RISCVCPU *cpu = RISCV_CPU(obj);
+
+riscv_cpu_set_misa_ext(env, RVG | RVC | RVS | RVU | RVH | RVV);
+env->priv_ver = PRIV_VERSION_1_13_0;
+
+/* Enable ISA extensions */
+cpu->cfg.mmu = true;
+cpu->cfg.vlenb = 256 >> 3;
+cpu->cfg.elen = 64;
+cpu->env.vext_ver = VEXT_VERSION_1_00_0;
+cpu->cfg.rvv_ma_all_1s = true;
+cpu->cfg.rvv_ta_all_1s = true;
+cpu->cfg.misa_w = true;
+cpu->cfg.pmp = true;
+cpu->cfg.cbom_blocksize = 64;
+cpu->cfg.cbop_blocksize = 64;
+cpu->cfg.cboz_blocksize = 64;
+cpu->cfg.ext_zic64b = true;
+cpu->cfg.ext_zicbom = true;
+cpu->cfg.ext_zicbop = true;
+cpu->cfg.ext_zicboz = true;
+cpu->cfg.ext_zicntr = true;
+cpu->cfg.ext_zicond = true;
+cpu->cfg.ext_zicsr = true;
+cpu->cfg.ext_zifencei = true;
+cpu->cfg.ext_zihintntl = true;
+cpu->cfg.ext_zihintpause = true;
+cpu->cfg.ext_zihpm = true;
+cpu->cfg.ext_zimop = true;
+cpu->cfg.ext_zawrs = true;
+cpu->cfg.ext_zfa = true;
+cpu->cfg.ext_zfbfmin = true;
+cpu->cfg.ext_zfh = true;
+cpu->cfg.ext_zfhmin = true;
+cpu->cfg.ext_zcb = true;
+cpu->cfg.ext_zcmop = true;
+cpu->cfg.ext_zba = true;
+cpu->cfg.ext_zbb = true;
+cpu->cfg.ext_zbs = true;
+cpu->cfg.ext_zkt = true;
+cpu->cfg.ext_zvbb = true;
+cpu->cfg.ext_zvbc = true;
+cpu->cfg.ext_zvfbfmin = true;
+cpu->cfg.ext_zvfbfwma = true;
+cpu->cfg.ext_zvfh = true;
+cpu->cfg.ext_zvfhmin = true;
+cpu->cfg.ext_zvkng = true;
+cpu->cfg.ext_smaia = true;
+cpu->cfg.ext_smstateen = true;
+cpu->cfg.ext_ssaia = true;
+cpu->cfg.ext_sscofpmf = true;
+cpu->cfg.ext_sstc = true;
+cpu->cfg.ext_svade = true;
+cpu->cfg.ext_svinval = true;
+cpu->cfg.ext_svnapot = true;
+cpu->cfg.ext_svpbmt = true;
+
+#ifndef CONFIG_USER_ONLY
+set_satp_mode_max_supported(cpu, VM_1_10_SV57);
+#endif
+}
+
 #ifdef CONFIG_TCG
 static void rv128_base_cpu_init(Object *obj)
 {
@@ -2982,6 +3048,7 @@ static const TypeInfo riscv_cpu_type_infos[] = {
 DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_U54, MXL_RV64,  
rv64_sifive_u_cpu_init),
 DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SHAKTI_C,   MXL_RV64,  
rv64_sifive_u_cpu_init),
 DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_THEAD_C906, MXL_RV64,  
rv64_thead_c906_cpu_init),
+DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_TT_ASCALON, MXL_RV64,  
rv64_tt_ascalon_cpu_init),
 DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_VEYRON_V1,  MXL_RV64,  
rv64_veyron_v1_cpu_init),
 #ifdef CONFIG_TCG
 DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE128,   MXL_RV128, 
rv128_base_cpu_init),
-- 
2.34.1




Re: [CAUTION - External Sender] Re: [PATCH] target/riscv: Add Tenstorrent Ascalon CPU

2024-11-09 Thread Anton Blanchard
Hi Philippe,

On Sun, Nov 10, 2024 at 5:21 AM Philippe Mathieu-Daudé
 wrote:
> Generally speaking (I'm not objecting to this patch as is), for
> DEFINE_VENDOR_CPU() it would be nice to have reference to some
> documentation -- at least to review whether the cpu features
> announced make sense or not --.
>
> For this particular IP I'm not finding anything on the company
> website...:
> https://docs.tenstorrent.com/search.html?q=Ascalon

This has some more details, including a 1 page PDF. Should I add the URL to
the commit message?

https://tenstorrent.com/ip/tt-ascalon

Thanks,
Anton



[PATCH] target/riscv: Add Tenstorrent Ascalon CPU

2024-11-08 Thread Anton Blanchard
Add a CPU entry for the Tenstorrent Ascalon CPU, a series of 2 wide to
8 wide RV64 cores.

Signed-off-by: Anton Blanchard 
---
 target/riscv/cpu-qom.h |  1 +
 target/riscv/cpu.c | 67 ++
 2 files changed, 68 insertions(+)

diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h
index 62115375cd..6547642287 100644
--- a/target/riscv/cpu-qom.h
+++ b/target/riscv/cpu-qom.h
@@ -49,6 +49,7 @@
 #define TYPE_RISCV_CPU_SIFIVE_U54   RISCV_CPU_TYPE_NAME("sifive-u54")
 #define TYPE_RISCV_CPU_THEAD_C906   RISCV_CPU_TYPE_NAME("thead-c906")
 #define TYPE_RISCV_CPU_VEYRON_V1RISCV_CPU_TYPE_NAME("veyron-v1")
+#define TYPE_RISCV_CPU_TT_ASCALON   RISCV_CPU_TYPE_NAME("tt-ascalon")
 #define TYPE_RISCV_CPU_HOST RISCV_CPU_TYPE_NAME("host")
 
 OBJECT_DECLARE_CPU_TYPE(RISCVCPU, RISCVCPUClass, RISCV_CPU)
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index f219f0c3b5..8447ad0dfb 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -579,6 +579,72 @@ static void rv64_veyron_v1_cpu_init(Object *obj)
 #endif
 }
 
+/* Tenstorrent Ascalon */
+static void rv64_tt_ascalon_cpu_init(Object *obj)
+{
+CPURISCVState *env = &RISCV_CPU(obj)->env;
+RISCVCPU *cpu = RISCV_CPU(obj);
+
+riscv_cpu_set_misa_ext(env, RVG | RVC | RVS | RVU | RVH | RVV);
+env->priv_ver = PRIV_VERSION_1_13_0;
+
+/* Enable ISA extensions */
+cpu->cfg.mmu = true;
+cpu->cfg.vlenb = 256 >> 3;
+cpu->cfg.elen = 64;
+cpu->env.vext_ver = VEXT_VERSION_1_00_0;
+cpu->cfg.rvv_ma_all_1s = true;
+cpu->cfg.rvv_ta_all_1s = true;
+cpu->cfg.misa_w = true;
+cpu->cfg.pmp = true;
+cpu->cfg.cbom_blocksize = 64;
+cpu->cfg.cbop_blocksize = 64;
+cpu->cfg.cboz_blocksize = 64;
+cpu->cfg.ext_zic64b = true;
+cpu->cfg.ext_zicbom = true;
+cpu->cfg.ext_zicbop = true;
+cpu->cfg.ext_zicboz = true;
+cpu->cfg.ext_zicntr = true;
+cpu->cfg.ext_zicond = true;
+cpu->cfg.ext_zicsr = true;
+cpu->cfg.ext_zifencei = true;
+cpu->cfg.ext_zihintntl = true;
+cpu->cfg.ext_zihintpause = true;
+cpu->cfg.ext_zihpm = true;
+cpu->cfg.ext_zimop = true;
+cpu->cfg.ext_zawrs = true;
+cpu->cfg.ext_zfa = true;
+cpu->cfg.ext_zfbfmin = true;
+cpu->cfg.ext_zfh = true;
+cpu->cfg.ext_zfhmin = true;
+cpu->cfg.ext_zcb = true;
+cpu->cfg.ext_zcmop = true;
+cpu->cfg.ext_zba = true;
+cpu->cfg.ext_zbb = true;
+cpu->cfg.ext_zbs = true;
+cpu->cfg.ext_zkt = true;
+cpu->cfg.ext_zvbb = true;
+cpu->cfg.ext_zvbc = true;
+cpu->cfg.ext_zvfbfmin = true;
+cpu->cfg.ext_zvfbfwma = true;
+cpu->cfg.ext_zvfh = true;
+cpu->cfg.ext_zvfhmin = true;
+cpu->cfg.ext_zvkng = true;
+cpu->cfg.ext_smaia = true;
+cpu->cfg.ext_smstateen = true;
+cpu->cfg.ext_ssaia = true;
+cpu->cfg.ext_sscofpmf = true;
+cpu->cfg.ext_sstc = true;
+cpu->cfg.ext_svade = true;
+cpu->cfg.ext_svinval = true;
+cpu->cfg.ext_svnapot = true;
+cpu->cfg.ext_svpbmt = true;
+
+#ifndef CONFIG_USER_ONLY
+set_satp_mode_max_supported(cpu, VM_1_10_SV57);
+#endif
+}
+
 #ifdef CONFIG_TCG
 static void rv128_base_cpu_init(Object *obj)
 {
@@ -2982,6 +3048,7 @@ static const TypeInfo riscv_cpu_type_infos[] = {
 DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_U54, MXL_RV64,  
rv64_sifive_u_cpu_init),
 DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SHAKTI_C,   MXL_RV64,  
rv64_sifive_u_cpu_init),
 DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_THEAD_C906, MXL_RV64,  
rv64_thead_c906_cpu_init),
+DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_TT_ASCALON, MXL_RV64,  
rv64_tt_ascalon_cpu_init),
 DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_VEYRON_V1,  MXL_RV64,  
rv64_veyron_v1_cpu_init),
 #ifdef CONFIG_TCG
 DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE128,   MXL_RV128, 
rv128_base_cpu_init),
-- 
2.34.1




[PATCH 04/12] target/riscv: handle vadd.vv form mask and source overlap

2025-01-25 Thread Anton Blanchard
Signed-off-by: Anton Blanchard 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index bc2780497e..f5ba1c4280 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -403,6 +403,7 @@ static bool vext_check_ss(DisasContext *s, int vd, int vs, 
int vm)
 static bool vext_check_sss(DisasContext *s, int vd, int vs1, int vs2, int vm)
 {
 return vext_check_ss(s, vd, vs2, vm) &&
+   require_vm(vm, vs1) &&
require_align(vs1, s->lmul);
 }
 
-- 
2.34.1




[PATCH 01/12] target/riscv: Source vector registers cannot overlap mask register

2025-01-25 Thread Anton Blanchard
Add the relevant ISA paragraphs explaining why source (and destination)
registers cannot overlap the mask register.

Signed-off-by: Anton Blanchard 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 29 ++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index b9883a5d32..20b1cb127b 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -100,10 +100,33 @@ static bool require_scale_rvfmin(DisasContext *s)
 }
 }
 
-/* Destination vector register group cannot overlap source mask register. */
-static bool require_vm(int vm, int vd)
+/*
+ * Source and destination vector register groups cannot overlap source mask
+ * register:
+ *
+ * A vector register cannot be used to provide source operands with more than
+ * one EEW for a single instruction. A mask register source is considered to
+ * have EEW=1 for this constraint. An encoding that would result in the same
+ * vector register being read with two or more different EEWs, including when
+ * the vector register appears at different positions within two or more vector
+ * register groups, is reserved.
+ * (Section 5.2)
+ *
+ * A destination vector register group can overlap a source vector
+ * register group only if one of the following holds:
+ *  1. The destination EEW equals the source EEW.
+ *  2. The destination EEW is smaller than the source EEW and the overlap
+ * is in the lowest-numbered part of the source register group.
+ *  3. The destination EEW is greater than the source EEW, the source EMUL
+ * is at least 1, and the overlap is in the highest-numbered part of
+ * the destination register group.
+ * For the purpose of determining register group overlap constraints, mask
+ * elements have EEW=1.
+ * (Section 5.2)
+ */
+static bool require_vm(int vm, int v)
 {
-return (vm != 0 || vd != 0);
+return (vm != 0 || v != 0);
 }
 
 static bool require_nf(int vd, int nf, int lmul)
-- 
2.34.1




[PATCH 10/12] target/riscv: handle vwadd.wv form vs1 and vs2 overlap

2025-01-25 Thread Anton Blanchard
for 2*SEW = 2*SEW op SEW instructions vs2 and vs1 cannot overlap
because it would mean a register is read with two different SEW
settings.

Signed-off-by: Anton Blanchard 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 2309d9abd0..312d8b1b81 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -549,7 +549,8 @@ static bool vext_check_dds(DisasContext *s, int vd, int 
vs1, int vs2, int vm)
 {
 return vext_check_ds(s, vd, vs1, vm) &&
require_vm(vm, vs2) &&
-   require_align(vs2, s->lmul + 1);
+   require_align(vs2, s->lmul + 1) &&
+   !is_overlapped(vs2, 1 << MAX(s->lmul+1, 0), vs1, 1 << MAX(s->lmul, 
0));
 }
 
 static bool vext_check_sd(DisasContext *s, int vd, int vs, int vm)
-- 
2.34.1




[PATCH 08/12] target/riscv: handle vwadd.vv form mask and source overlap

2025-01-25 Thread Anton Blanchard
Signed-off-by: Anton Blanchard 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index bc22b42801..45b2868c54 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -525,6 +525,7 @@ static bool vext_check_dd(DisasContext *s, int vd, int vs, 
int vm)
 static bool vext_check_dss(DisasContext *s, int vd, int vs1, int vs2, int vm)
 {
 return vext_check_ds(s, vd, vs2, vm) &&
+   require_vm(vm, vs1) &&
require_align(vs1, s->lmul) &&
require_noover(vd, s->lmul + 1, vs1, s->lmul);
 }
-- 
2.34.1




[PATCH 03/12] target/riscv: handle vadd.vx form mask and source overlap

2025-01-25 Thread Anton Blanchard
Signed-off-by: Anton Blanchard 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index c66cd95bdb..bc2780497e 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -382,6 +382,7 @@ static bool vext_check_ld_index(DisasContext *s, int vd, 
int vs2,
 static bool vext_check_ss(DisasContext *s, int vd, int vs, int vm)
 {
 return require_vm(vm, vd) &&
+   require_vm(vm, vs) &&
require_align(vd, s->lmul) &&
require_align(vs, s->lmul);
 }
-- 
2.34.1




[PATCH 00/12] target/riscv: Fix some RISC-V instruction corner cases

2025-01-25 Thread Anton Blanchard
This series fixes some RISC-V instruction corner cases, specifically
illegal overlaps between mask and source registers, illegal overlaps
between source registers and illegal overlaps between source and
destination registers. These were found by looking at miscompares
between QEMU and the Tenstorrent fork of Whisper which models this
behaviour better than Spike and Sail.

Anton Blanchard (12):
  target/riscv: Source vector registers cannot overlap mask register
  target/riscv: handle vrgather mask and source overlap
  target/riscv: handle vadd.vx form mask and source overlap
  target/riscv: handle vadd.vv form mask and source overlap
  target/riscv: handle vslide1down.vx form mask and source overlap
  target/riscv: handle vzext.vf2 form mask and source overlap
  target/riscv: handle vwadd.vx form mask and source overlap
  target/riscv: handle vwadd.vv form mask and source overlap
  target/riscv: handle vwadd.wv form mask and source overlap
  target/riscv: handle vwadd.wv form vs1 and vs2 overlap
  target/riscv: Add CHECK arg to GEN_OPFVF_WIDEN_TRANS
  target/riscv: handle overlap in widening instructions with overwrite

 target/riscv/insn_trans/trans_rvv.c.inc | 139 ++--
 1 file changed, 108 insertions(+), 31 deletions(-)

-- 
2.34.1




[PATCH 09/12] target/riscv: handle vwadd.wv form mask and source overlap

2025-01-25 Thread Anton Blanchard
Signed-off-by: Anton Blanchard 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 45b2868c54..2309d9abd0 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -548,6 +548,7 @@ static bool vext_check_dss(DisasContext *s, int vd, int 
vs1, int vs2, int vm)
 static bool vext_check_dds(DisasContext *s, int vd, int vs1, int vs2, int vm)
 {
 return vext_check_ds(s, vd, vs1, vm) &&
+   require_vm(vm, vs2) &&
require_align(vs2, s->lmul + 1);
 }
 
-- 
2.34.1




[PATCH 11/12] target/riscv: Add CHECK arg to GEN_OPFVF_WIDEN_TRANS

2025-01-25 Thread Anton Blanchard
Signed-off-by: Anton Blanchard 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 312d8b1b81..2741f8bd8e 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -2410,10 +2410,10 @@ static bool opfvf_widen_check(DisasContext *s, arg_rmrr 
*a)
 }
 
 /* OPFVF with WIDEN */
-#define GEN_OPFVF_WIDEN_TRANS(NAME)  \
+#define GEN_OPFVF_WIDEN_TRANS(NAME, CHECK)   \
 static bool trans_##NAME(DisasContext *s, arg_rmrr *a)   \
 {\
-if (opfvf_widen_check(s, a)) {   \
+if (CHECK(s, a)) {   \
 uint32_t data = 0;   \
 static gen_helper_opfvf *const fns[2] = {\
 gen_helper_##NAME##_h, gen_helper_##NAME##_w,\
@@ -2429,8 +2429,8 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a)
   \
 return false;\
 }
 
-GEN_OPFVF_WIDEN_TRANS(vfwadd_vf)
-GEN_OPFVF_WIDEN_TRANS(vfwsub_vf)
+GEN_OPFVF_WIDEN_TRANS(vfwadd_vf, opfvf_widen_check)
+GEN_OPFVF_WIDEN_TRANS(vfwsub_vf, opfvf_widen_check)
 
 static bool opfwv_widen_check(DisasContext *s, arg_rmrr *a)
 {
@@ -2512,7 +2512,7 @@ GEN_OPFVF_TRANS(vfrdiv_vf,  opfvf_check)
 
 /* Vector Widening Floating-Point Multiply */
 GEN_OPFVV_WIDEN_TRANS(vfwmul_vv, opfvv_widen_check)
-GEN_OPFVF_WIDEN_TRANS(vfwmul_vf)
+GEN_OPFVF_WIDEN_TRANS(vfwmul_vf, opfvf_widen_check)
 
 /* Vector Single-Width Floating-Point Fused Multiply-Add Instructions */
 GEN_OPFVV_TRANS(vfmacc_vv, opfvv_check)
@@ -2537,10 +2537,10 @@ GEN_OPFVV_WIDEN_TRANS(vfwmacc_vv, opfvv_widen_check)
 GEN_OPFVV_WIDEN_TRANS(vfwnmacc_vv, opfvv_widen_check)
 GEN_OPFVV_WIDEN_TRANS(vfwmsac_vv, opfvv_widen_check)
 GEN_OPFVV_WIDEN_TRANS(vfwnmsac_vv, opfvv_widen_check)
-GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf)
-GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf)
-GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf)
-GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf)
+GEN_OPFVF_WIDEN_TRANS(vfwmacc_vf, opfvf_widen_check)
+GEN_OPFVF_WIDEN_TRANS(vfwnmacc_vf, opfvf_widen_check)
+GEN_OPFVF_WIDEN_TRANS(vfwmsac_vf, opfvf_widen_check)
+GEN_OPFVF_WIDEN_TRANS(vfwnmsac_vf, opfvf_widen_check)
 
 /* Vector Floating-Point Square-Root Instruction */
 
-- 
2.34.1




[PATCH 12/12] target/riscv: handle overlap in widening instructions with overwrite

2025-01-25 Thread Anton Blanchard
In these instructions vd is considered a source, so no overlap
is allowed between vd and vs1/vs2. See:

https://github.com/riscv/riscv-isa-manual/issues/1789

Signed-off-by: Anton Blanchard 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 71 +++--
 1 file changed, 56 insertions(+), 15 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 2741f8bd8e..715008db79 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -1505,6 +1505,16 @@ static bool opivv_widen_check(DisasContext *s, arg_rmrr 
*a)
vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm);
 }
 
+/* OPIVV with overwrite and WIDEN */
+static bool opivv_overwrite_widen_check(DisasContext *s, arg_rmrr *a)
+{
+return require_rvv(s) &&
+   vext_check_isa_ill(s) &&
+   vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) &&
+   !is_overlapped(a->rd, 1 << MAX(s->lmul+1, 0), a->rs1, 1 << 
MAX(s->lmul, 0)) &&
+   !is_overlapped(a->rd, 1 << MAX(s->lmul+1, 0), a->rs2, 1 << 
MAX(s->lmul, 0));
+}
+
 static bool do_opivv_widen(DisasContext *s, arg_rmrr *a,
gen_helper_gvec_4_ptr *fn,
bool (*checkfn)(DisasContext *, arg_rmrr *))
@@ -1552,6 +1562,15 @@ static bool opivx_widen_check(DisasContext *s, arg_rmrr 
*a)
vext_check_ds(s, a->rd, a->rs2, a->vm);
 }
 
+/* OPIVX with overwrite and WIDEN */
+static bool opivx_overwrite_widen_check(DisasContext *s, arg_rmrr *a)
+{
+return require_rvv(s) &&
+   vext_check_isa_ill(s) &&
+   vext_check_ds(s, a->rd, a->rs2, a->vm) &&
+   !is_overlapped(a->rd, 1 << MAX(s->lmul+1, 0), a->rs2, 1 << 
MAX(s->lmul, 0));
+}
+
 #define GEN_OPIVX_WIDEN_TRANS(NAME, CHECK) \
 static bool trans_##NAME(DisasContext *s, arg_rmrr *a)\
 { \
@@ -2023,13 +2042,13 @@ GEN_OPIVX_TRANS(vmadd_vx, opivx_check)
 GEN_OPIVX_TRANS(vnmsub_vx, opivx_check)
 
 /* Vector Widening Integer Multiply-Add Instructions */
-GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check)
-GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check)
-GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check)
-GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_widen_check)
-GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_widen_check)
-GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_widen_check)
-GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_widen_check)
+GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_overwrite_widen_check)
+GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_overwrite_widen_check)
+GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_overwrite_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_overwrite_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_overwrite_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_overwrite_widen_check)
+GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_overwrite_widen_check)
 
 /* Vector Integer Merge and Move Instructions */
 static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a)
@@ -2370,6 +2389,18 @@ static bool opfvv_widen_check(DisasContext *s, arg_rmrr 
*a)
vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm);
 }
 
+/* Vector Widening Floating-Point Add/Subtract Instructions with overwrite */
+static bool opfvv_overwrite_widen_check(DisasContext *s, arg_rmrr *a)
+{
+return require_rvv(s) &&
+   require_rvf(s) &&
+   require_scale_rvf(s) &&
+   vext_check_isa_ill(s) &&
+   vext_check_dss(s, a->rd, a->rs1, a->rs2, a->vm) &&
+   !is_overlapped(a->rd, 1 << MAX(s->lmul+1, 0), a->rs1, 1 << 
MAX(s->lmul, 0)) &&
+   !is_overlapped(a->rd, 1 << MAX(s->lmul+1, 0), a->rs2, 1 << 
MAX(s->lmul, 0));
+}
+
 /* OPFVV with WIDEN */
 #define GEN_OPFVV_WIDEN_TRANS(NAME, CHECK)   \
 static bool trans_##NAME(DisasContext *s, arg_rmrr *a)   \
@@ -2409,6 +2440,16 @@ static bool opfvf_widen_check(DisasContext *s, arg_rmrr 
*a)
vext_check_ds(s, a->rd, a->rs2, a->vm);
 }
 
+static bool opfvf_overwrite_widen_check(DisasContext *s, arg_rmrr *a)
+{
+return require_rvv(s) &&
+   require_rvf(s) &&
+   require_scale_rvf(s) &&
+   vext_check_isa_ill(s) &&
+   vext_check_ds(s, a->rd, a->rs2, a->vm) &&
+   !is_overlapped(a->rd, 1 << MAX(s->lmul+1, 0), a->rs2, 1 << 
MAX(s->lmul, 0));
+}
+
 /* OPFVF with WIDEN */
 #define GEN_OPFVF_WIDEN_TRANS(NAME, CHECK)   \
 static bool trans_##NAME(DisasContext *s, arg_rmrr *a)   \
@@ -2533,14 +2574,14 @

[PATCH 07/12] target/riscv: handle vwadd.vx form mask and source overlap

2025-01-25 Thread Anton Blanchard
Signed-off-by: Anton Blanchard 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 0952bcbe2c..bc22b42801 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -458,13 +458,14 @@ static bool vext_check_mss(DisasContext *s, int vd, int 
vs1, int vs2)
  *  instruction cannot overlap the source mask register (v0).
  *  (Section 5.3)
  */
-static bool vext_wide_check_common(DisasContext *s, int vd, int vm)
+static bool vext_wide_check_common(DisasContext *s, int vd, int vs, int vm)
 {
 return (s->lmul <= 2) &&
(s->sew < MO_64) &&
((s->sew + 1) <= (s->cfg_ptr->elen >> 4)) &&
require_align(vd, s->lmul + 1) &&
-   require_vm(vm, vd);
+   require_vm(vm, vd) &&
+   require_vm(vm, vs);
 }
 
 /*
@@ -498,14 +499,14 @@ static bool vext_narrow_check_common(DisasContext *s, int 
vd, int vs2,
 
 static bool vext_check_ds(DisasContext *s, int vd, int vs, int vm)
 {
-return vext_wide_check_common(s, vd, vm) &&
+return vext_wide_check_common(s, vd, vs, vm) &&
require_align(vs, s->lmul) &&
require_noover(vd, s->lmul + 1, vs, s->lmul);
 }
 
 static bool vext_check_dd(DisasContext *s, int vd, int vs, int vm)
 {
-return vext_wide_check_common(s, vd, vm) &&
+return vext_wide_check_common(s, vd, vs, vm) &&
require_align(vs, s->lmul + 1);
 }
 
-- 
2.34.1




[PATCH 02/12] target/riscv: handle vrgather mask and source overlap

2025-01-25 Thread Anton Blanchard
Signed-off-by: Anton Blanchard 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index 20b1cb127b..c66cd95bdb 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -3453,7 +3453,9 @@ static bool vrgather_vv_check(DisasContext *s, arg_rmrr 
*a)
require_align(a->rs1, s->lmul) &&
require_align(a->rs2, s->lmul) &&
(a->rd != a->rs2 && a->rd != a->rs1) &&
-   require_vm(a->vm, a->rd);
+   require_vm(a->vm, a->rd) &&
+   require_vm(a->vm, a->rs1) &&
+   require_vm(a->vm, a->rs2);
 }
 
 static bool vrgatherei16_vv_check(DisasContext *s, arg_rmrr *a)
@@ -3470,7 +3472,9 @@ static bool vrgatherei16_vv_check(DisasContext *s, 
arg_rmrr *a)
   a->rs1, 1 << MAX(emul, 0)) &&
!is_overlapped(a->rd, 1 << MAX(s->lmul, 0),
   a->rs2, 1 << MAX(s->lmul, 0)) &&
-   require_vm(a->vm, a->rd);
+   require_vm(a->vm, a->rd) &&
+   require_vm(a->vm, a->rs1) &&
+   require_vm(a->vm, a->rs2);
 }
 
 GEN_OPIVV_TRANS(vrgather_vv, vrgather_vv_check)
@@ -3483,7 +3487,8 @@ static bool vrgather_vx_check(DisasContext *s, arg_rmrr 
*a)
require_align(a->rd, s->lmul) &&
require_align(a->rs2, s->lmul) &&
(a->rd != a->rs2) &&
-   require_vm(a->vm, a->rd);
+   require_vm(a->vm, a->rd) &&
+   require_vm(a->vm, a->rs2);
 }
 
 /* vrgather.vx vd, vs2, rs1, vm # vd[i] = (x[rs1] >= VLMAX) ? 0 : vs2[rs1] */
-- 
2.34.1




[PATCH 05/12] target/riscv: handle vslide1down.vx form mask and source overlap

2025-01-25 Thread Anton Blanchard
Signed-off-by: Anton Blanchard 
---
 target/riscv/insn_trans/trans_rvv.c.inc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
b/target/riscv/insn_trans/trans_rvv.c.inc
index f5ba1c4280..a873536eea 100644
--- a/target/riscv/insn_trans/trans_rvv.c.inc
+++ b/target/riscv/insn_trans/trans_rvv.c.inc
@@ -609,6 +609,7 @@ static bool vext_check_slide(DisasContext *s, int vd, int 
vs2,
 {
 bool ret = require_align(vs2, s->lmul) &&
require_align(vd, s->lmul) &&
+   require_vm(vm, vs2) &&
require_vm(vm, vd);
 if (is_over) {
 ret &= (vd != vs2);
-- 
2.34.1




  1   2   >