date:20181117

[Qemu-devel] [PATCH] hw/arm/sysbus-fdt: fix assert in match function

2018-11-17 Thread Peng Hao

In match function it should not call OBJECK_CHECK. When there is
a mismatch, we should continue to match rather than assert().

Signed-off-by: Peng Hao 
---
 hw/arm/sysbus-fdt.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c
index 0e24c80..41b962d 100644
--- a/hw/arm/sysbus-fdt.c
+++ b/hw/arm/sysbus-fdt.c
@@ -419,10 +419,15 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, 
void *opaque)
 static bool vfio_platform_match(SysBusDevice *sbdev,
 const BindingEntry *entry)
 {
-VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
+VFIOPlatformDevice *vdev;
 const char *compat;
 unsigned int n;
 
+vdev = (VFIOPlatformDevice *) object_dynamic_cast(OBJECT(sbdev),
+TYPE_VFIO_PLATFORM);
+if (!vdev)
+return false;
+
 for (n = vdev->num_compat, compat = vdev->compat; n > 0;
  n--, compat += strlen(compat) + 1) {
 if (!strcmp(entry->compat, compat)) {
-- 
1.8.3.1

Re: [Qemu-devel] [PATCH v2 05/22] hw/rdma: Add support for MAD packets

2018-11-17 Thread Marcel Apfelbaum


Hi Yuval,

On 11/11/18 12:31 PM, Yuval Shaia wrote:

On Sat, Nov 10, 2018 at 08:15:27PM +0200, Marcel Apfelbaum wrote:

Hi Yuval

On 11/8/18 6:08 PM, Yuval Shaia wrote:

MAD (Management Datagram) packets are widely used by various modules
both in kernel and in user space for example the rdma_* API which is
used to create and maintain "connection" layer on top of RDMA uses
several types of MAD packets.

Can you add a link to MAD spec to commit or event in the code?

Have no idea where to take it from, does it requires some subscription or
so?


No subscription required:
    https://www.infinibandta.org/ibta-specifications-download/
    Volume 1 Architecture Specification, Release 1.1
    Chapter 13.4



To support MAD packets the device uses an external utility
(contrib/rdmacm-mux) to relay packets from and to the guest driver.

Can the device be used without MADs support?

Since we have a support now i don't see a reason why we like to use (or
even expose) device with no MAD support.


Good point, we just need to make sure users know how to enable MADs.
Thanks,
Marcel


If not, can you update the pvrdma documentation to
reflect the changes?

Sure, missed that, will document the changes in v3.


Signed-off-by: Yuval Shaia 
---
   hw/rdma/rdma_backend.c  | 263 +++-
   hw/rdma/rdma_backend.h  |   4 +-
   hw/rdma/rdma_backend_defs.h |  10 +-
   hw/rdma/vmw/pvrdma.h|   2 +
   hw/rdma/vmw/pvrdma_main.c   |   4 +-
   5 files changed, 273 insertions(+), 10 deletions(-)

diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
index 1e148398a2..3eb0099f8d 100644
--- a/hw/rdma/rdma_backend.c
+++ b/hw/rdma/rdma_backend.c
@@ -16,8 +16,13 @@
   #include "qemu/osdep.h"
   #include "qemu/error-report.h"
   #include "qapi/error.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qnum.h"
   #include 
+#include 
+#include 
+#include 
   #include "trace.h"
   #include "rdma_utils.h"
@@ -33,16 +38,25 @@
   #define VENDOR_ERR_MAD_SEND 0x206
   #define VENDOR_ERR_INVLKEY  0x207
   #define VENDOR_ERR_MR_SMALL 0x208
+#define VENDOR_ERR_INV_MAD_BUFF 0x209
+#define VENDOR_ERR_INV_NUM_SGE  0x210
   #define THR_NAME_LEN 16
   #define THR_POLL_TO  5000
+#define MAD_HDR_SIZE sizeof(struct ibv_grh)
+
   typedef struct BackendCtx {
-uint64_t req_id;
   void *up_ctx;
   bool is_tx_req;
+struct ibv_sge sge; /* Used to save MAD recv buffer */
   } BackendCtx;
+struct backend_umad {
+struct ib_user_mad hdr;
+char mad[RDMA_MAX_PRIVATE_DATA];
+};
+
   static void (*comp_handler)(int status, unsigned int vendor_err, void *ctx);
   static void dummy_comp_handler(int status, unsigned int vendor_err, void 
*ctx)
@@ -286,6 +300,49 @@ static int build_host_sge_array(RdmaDeviceResources 
*rdma_dev_res,
   return 0;
   }
+static int mad_send(RdmaBackendDev *backend_dev, struct ibv_sge *sge,
+uint32_t num_sge)
+{
+struct backend_umad umad = {0};
+char *hdr, *msg;
+int ret;
+
+pr_dbg("num_sge=%d\n", num_sge);
+
+if (num_sge != 2) {
+return -EINVAL;
+}
+
+umad.hdr.length = sge[0].length + sge[1].length;
+pr_dbg("msg_len=%d\n", umad.hdr.length);
+
+if (umad.hdr.length > sizeof(umad.mad)) {
+return -ENOMEM;
+}
+
+umad.hdr.addr.qpn = htobe32(1);
+umad.hdr.addr.grh_present = 1;
+umad.hdr.addr.gid_index = backend_dev->backend_gid_idx;
+memcpy(umad.hdr.addr.gid, backend_dev->gid.raw, sizeof(umad.hdr.addr.gid));
+umad.hdr.addr.hop_limit = 1;
+
+hdr = rdma_pci_dma_map(backend_dev->dev, sge[0].addr, sge[0].length);
+msg = rdma_pci_dma_map(backend_dev->dev, sge[1].addr, sge[1].length);
+
+memcpy(&umad.mad[0], hdr, sge[0].length);
+memcpy(&umad.mad[sge[0].length], msg, sge[1].length);
+
+rdma_pci_dma_unmap(backend_dev->dev, msg, sge[1].length);
+rdma_pci_dma_unmap(backend_dev->dev, hdr, sge[0].length);
+
+ret = qemu_chr_fe_write(backend_dev->mad_chr_be, (const uint8_t *)&umad,
+sizeof(umad));
+
+pr_dbg("qemu_chr_fe_write=%d\n", ret);
+
+return (ret != sizeof(umad));
+}
+
   void rdma_backend_post_send(RdmaBackendDev *backend_dev,
   RdmaBackendQP *qp, uint8_t qp_type,
   struct ibv_sge *sge, uint32_t num_sge,
@@ -304,9 +361,13 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
   comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx);
   } else if (qp_type == IBV_QPT_GSI) {
   pr_dbg("QP1\n");
-comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx);
+rc = mad_send(backend_dev, sge, num_sge);
+if (rc) {
+comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx);
+} else {
+comp_handler(IBV_WC_SUCCESS, 0, ctx);
+}
   }
-pr_dbg("qp->ibqp is NULL for qp_type %d!!!\n", qp_type);
   return;

Re: [Qemu-devel] [PATCH v2 13/22] hw/pvrdma: Make sure PCI function 0 is vmxnet3

2018-11-17 Thread Marcel Apfelbaum





On 11/11/18 9:45 AM, Yuval Shaia wrote:

On Sat, Nov 10, 2018 at 08:27:44PM +0200, Marcel Apfelbaum wrote:


On 11/8/18 6:08 PM, Yuval Shaia wrote:

Guest driver enforces it, we should also.

Signed-off-by: Yuval Shaia 
---
   hw/rdma/vmw/pvrdma.h  | 2 ++
   hw/rdma/vmw/pvrdma_main.c | 3 +++
   2 files changed, 5 insertions(+)

diff --git a/hw/rdma/vmw/pvrdma.h b/hw/rdma/vmw/pvrdma.h
index b019cb843a..10a3c4fb7c 100644
--- a/hw/rdma/vmw/pvrdma.h
+++ b/hw/rdma/vmw/pvrdma.h
@@ -20,6 +20,7 @@
   #include "hw/pci/pci.h"
   #include "hw/pci/msix.h"
   #include "chardev/char-fe.h"
+#include "hw/net/vmxnet3_defs.h"
   #include "../rdma_backend_defs.h"
   #include "../rdma_rm_defs.h"
@@ -85,6 +86,7 @@ typedef struct PVRDMADev {
   RdmaBackendDev backend_dev;
   RdmaDeviceResources rdma_dev_res;
   CharBackend mad_chr;
+VMXNET3State *func0;
   } PVRDMADev;
   #define PVRDMA_DEV(dev) OBJECT_CHECK(PVRDMADev, (dev), PVRDMA_HW_NAME)
diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
index ac8c092db0..fa6468d221 100644
--- a/hw/rdma/vmw/pvrdma_main.c
+++ b/hw/rdma/vmw/pvrdma_main.c
@@ -576,6 +576,9 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp)
   return;
   }
+/* Break if not vmxnet3 device in slot 0 */
+dev->func0 = VMXNET3(pci_get_function_0(pdev));
+

I don't see the error code flow in case VMXNET3 is not func 0.
Am I missing something?

Yes, this is a dynamic cast that will break the process when fail to cast.

This is the error message that you will get in case that device on function
0 is not vmxnet3:

pvrdma_main.c:589:pvrdma_realize: Object 0x557b959841a0 is not an instance of 
type vmxnet3


I am not sure we will see this error if QEMU is compiled in Release mode.
I think object_dynamic_cast_assert throws this error only if 
CONFIG_QOM_CAST_DEBUG

is set, and is possible the mentioned flag is not set in Release.

Thanks,
Marcel





Thanks,
Marcel


   memdev_root = object_resolve_path("/objects", NULL);
   if (memdev_root) {
   object_child_foreach(memdev_root, pvrdma_check_ram_shared, 
&ram_shared);

Re: [Qemu-devel] [PATCH v3 03/23] hw/rdma: Return qpn 1 if ibqp is NULL

2018-11-17 Thread Marcel Apfelbaum





On 11/13/18 9:12 AM, Yuval Shaia wrote:

Device is not supporting QP0, only QP1.

Signed-off-by: Yuval Shaia 
---
  hw/rdma/rdma_backend.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
index 86e8fe8ab6..3ccc9a2494 100644
--- a/hw/rdma/rdma_backend.h
+++ b/hw/rdma/rdma_backend.h
@@ -33,7 +33,7 @@ static inline union ibv_gid *rdma_backend_gid(RdmaBackendDev 
*dev)
  
  static inline uint32_t rdma_backend_qpn(const RdmaBackendQP *qp)

  {
-return qp->ibqp ? qp->ibqp->qp_num : 0;
+return qp->ibqp ? qp->ibqp->qp_num : 1;
  }
  
  static inline uint32_t rdma_backend_mr_lkey(const RdmaBackendMR *mr)


Reviewed-by: Marcel Apfelbaum

Thanks,
Marcel

Re: [Qemu-devel] [PATCH v3 05/23] hw/rdma: Add support for MAD packets

2018-11-17 Thread Marcel Apfelbaum


Hi Yuval,

On 11/13/18 9:12 AM, Yuval Shaia wrote:

MAD (Management Datagram) packets are widely used by various modules


Please add a link to Spec, I sent it in the V1 mail-thread
Please add it also as a comment in the code. I know MADs
are a complicated matter, but if somebody wants to have a look...


both in kernel and in user space for example the rdma_* API which is
used to create and maintain "connection" layer on top of RDMA uses
several types of MAD packets.
To support MAD packets the device uses an external utility
(contrib/rdmacm-mux) to relay packets from and to the guest driver.

Signed-off-by: Yuval Shaia 
---
  hw/rdma/rdma_backend.c  | 263 +++-
  hw/rdma/rdma_backend.h  |   4 +-
  hw/rdma/rdma_backend_defs.h |  10 +-
  hw/rdma/vmw/pvrdma.h|   2 +
  hw/rdma/vmw/pvrdma_main.c   |   4 +-
  5 files changed, 273 insertions(+), 10 deletions(-)

diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
index 1e148398a2..3eb0099f8d 100644
--- a/hw/rdma/rdma_backend.c
+++ b/hw/rdma/rdma_backend.c


rdma_backend is getting huge, have you consider taking out
mad related code?


@@ -16,8 +16,13 @@
  #include "qemu/osdep.h"
  #include "qemu/error-report.h"
  #include "qapi/error.h"
+#include "qapi/qmp/qlist.h"
+#include "qapi/qmp/qnum.h"
  
  #include 

+#include 
+#include 
+#include 
  
  #include "trace.h"

  #include "rdma_utils.h"
@@ -33,16 +38,25 @@
  #define VENDOR_ERR_MAD_SEND 0x206
  #define VENDOR_ERR_INVLKEY  0x207
  #define VENDOR_ERR_MR_SMALL 0x208
+#define VENDOR_ERR_INV_MAD_BUFF 0x209
+#define VENDOR_ERR_INV_NUM_SGE  0x210
  
  #define THR_NAME_LEN 16

  #define THR_POLL_TO  5000
  
+#define MAD_HDR_SIZE sizeof(struct ibv_grh)

+
  typedef struct BackendCtx {
-uint64_t req_id;
  void *up_ctx;
  bool is_tx_req;
+struct ibv_sge sge; /* Used to save MAD recv buffer */
  } BackendCtx;
  
+struct backend_umad {

+struct ib_user_mad hdr;
+char mad[RDMA_MAX_PRIVATE_DATA];
+};
+
  static void (*comp_handler)(int status, unsigned int vendor_err, void *ctx);
  
  static void dummy_comp_handler(int status, unsigned int vendor_err, void *ctx)

@@ -286,6 +300,49 @@ static int build_host_sge_array(RdmaDeviceResources 
*rdma_dev_res,
  return 0;
  }
  
+static int mad_send(RdmaBackendDev *backend_dev, struct ibv_sge *sge,

+uint32_t num_sge)
+{
+struct backend_umad umad = {0};
+char *hdr, *msg;
+int ret;
+
+pr_dbg("num_sge=%d\n", num_sge);
+
+if (num_sge != 2) {
+return -EINVAL;
+}
+
+umad.hdr.length = sge[0].length + sge[1].length;
+pr_dbg("msg_len=%d\n", umad.hdr.length);
+
+if (umad.hdr.length > sizeof(umad.mad)) {
+return -ENOMEM;
+}
+
+umad.hdr.addr.qpn = htobe32(1);
+umad.hdr.addr.grh_present = 1;
+umad.hdr.addr.gid_index = backend_dev->backend_gid_idx;
+memcpy(umad.hdr.addr.gid, backend_dev->gid.raw, sizeof(umad.hdr.addr.gid));
+umad.hdr.addr.hop_limit = 1;
+
+hdr = rdma_pci_dma_map(backend_dev->dev, sge[0].addr, sge[0].length);
+msg = rdma_pci_dma_map(backend_dev->dev, sge[1].addr, sge[1].length);
+


If rdma_pci_dma_map fails it will return NULL 


+memcpy(&umad.mad[0], hdr, sge[0].length);
+memcpy(&umad.mad[sge[0].length], msg, sge[1].length);
+


... and here we access a NULL pointer.
Maybe is possible to return some error here.

+rdma_pci_dma_unmap(backend_dev->dev, msg, sge[1].length);
+rdma_pci_dma_unmap(backend_dev->dev, hdr, sge[0].length);
+
+ret = qemu_chr_fe_write(backend_dev->mad_chr_be, (const uint8_t *)&umad,
+sizeof(umad));
+
+pr_dbg("qemu_chr_fe_write=%d\n", ret);
+
+return (ret != sizeof(umad));
+}
+
  void rdma_backend_post_send(RdmaBackendDev *backend_dev,
  RdmaBackendQP *qp, uint8_t qp_type,
  struct ibv_sge *sge, uint32_t num_sge,
@@ -304,9 +361,13 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
  comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx);
  } else if (qp_type == IBV_QPT_GSI) {
  pr_dbg("QP1\n");
-comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx);
+rc = mad_send(backend_dev, sge, num_sge);
+if (rc) {
+comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx);
+} else {
+comp_handler(IBV_WC_SUCCESS, 0, ctx);
+}
  }
-pr_dbg("qp->ibqp is NULL for qp_type %d!!!\n", qp_type);
  return;
  }
  
@@ -370,6 +431,48 @@ out_free_bctx:

  g_free(bctx);
  }
  
+static unsigned int save_mad_recv_buffer(RdmaBackendDev *backend_dev,

+ struct ibv_sge *sge, uint32_t num_sge,
+ void *ctx)
+{
+BackendCtx *bctx;
+int rc;
+uint32_t bctx_id;
+
+if (num_sge != 1) {
+pr_dbg("Invalid

Re: [Qemu-devel] [PATCH v3 09/23] hw/pvrdma: Set the correct opcode for send completion

2018-11-17 Thread Marcel Apfelbaum





On 11/13/18 9:12 AM, Yuval Shaia wrote:

opcode for WC should be set by the device and not taken from work
element.

Signed-off-by: Yuval Shaia 
---
  hw/rdma/vmw/pvrdma_qp_ops.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c
index 7b0f440fda..3388be1926 100644
--- a/hw/rdma/vmw/pvrdma_qp_ops.c
+++ b/hw/rdma/vmw/pvrdma_qp_ops.c
@@ -154,7 +154,7 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
  comp_ctx->cq_handle = qp->send_cq_handle;
  comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
  comp_ctx->cqe.qp = qp_handle;
-comp_ctx->cqe.opcode = wqe->hdr.opcode;
+comp_ctx->cqe.opcode = IBV_WC_SEND;
  
  rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type,

 (struct ibv_sge *)&wqe->sge[0], 
wqe->hdr.num_sge,


Reviewed-by: Marcel Apfelbaum
Thanks,
Marcel

Re: [Qemu-devel] [PATCH v3 14/23] hw/rdma: Initialize node_guid from vmxnet3 mac address

2018-11-17 Thread Marcel Apfelbaum





On 11/13/18 9:13 AM, Yuval Shaia wrote:

node_guid should be set once device is load.
Make node_guid be GID format (32 bit) of PCI function 0 vmxnet3 device's
MAC.

A new function was added to do the conversion.
So for example the MAC 56:b6:44:e9:62:dc will be converted to GID
54b6:44ff:fee9:62dc.

Signed-off-by: Yuval Shaia 
---
  hw/rdma/rdma_utils.h  |  9 +
  hw/rdma/vmw/pvrdma_cmd.c  | 10 --
  hw/rdma/vmw/pvrdma_main.c |  5 -
  3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/hw/rdma/rdma_utils.h b/hw/rdma/rdma_utils.h
index 989db249ef..202abb3366 100644
--- a/hw/rdma/rdma_utils.h
+++ b/hw/rdma/rdma_utils.h
@@ -63,4 +63,13 @@ extern unsigned long pr_dbg_cnt;
  void *rdma_pci_dma_map(PCIDevice *dev, dma_addr_t addr, dma_addr_t plen);
  void rdma_pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len);
  
+static inline void addrconf_addr_eui48(uint8_t *eui, const char *addr)

+{
+memcpy(eui, addr, 3);
+eui[3] = 0xFF;
+eui[4] = 0xFE;
+memcpy(eui + 5, addr + 3, 3);
+eui[0] ^= 2;
+}
+
  #endif
diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
index a334f6205e..2979582fac 100644
--- a/hw/rdma/vmw/pvrdma_cmd.c
+++ b/hw/rdma/vmw/pvrdma_cmd.c
@@ -592,16 +592,6 @@ static int create_bind(PVRDMADev *dev, union 
pvrdma_cmd_req *req,
  return -EINVAL;
  }
  
-/* TODO: Since drivers stores node_guid at load_dsr phase then this

- * assignment is not relevant, i need to figure out a way how to
- * retrieve MAC of our netdev */
-if (!cmd->index) {
-dev->node_guid =
-dev->rdma_dev_res.ports[0].gid_tbl[0].gid.global.interface_id;
-pr_dbg("dev->node_guid=0x%llx\n",
-   (long long unsigned int)be64_to_cpu(dev->node_guid));
-}
-
  return 0;
  }
  
diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c

index fa6468d221..95e9322b7c 100644
--- a/hw/rdma/vmw/pvrdma_main.c
+++ b/hw/rdma/vmw/pvrdma_main.c
@@ -264,7 +264,7 @@ static void init_dsr_dev_caps(PVRDMADev *dev)
  dsr->caps.sys_image_guid = 0;
  pr_dbg("sys_image_guid=%" PRIx64 "\n", dsr->caps.sys_image_guid);
  
-dsr->caps.node_guid = cpu_to_be64(dev->node_guid);

+dsr->caps.node_guid = dev->node_guid;
  pr_dbg("node_guid=%" PRIx64 "\n", be64_to_cpu(dsr->caps.node_guid));
  
  dsr->caps.phys_port_cnt = MAX_PORTS;

@@ -579,6 +579,9 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp)
  /* Break if not vmxnet3 device in slot 0 */
  dev->func0 = VMXNET3(pci_get_function_0(pdev));
  
+addrconf_addr_eui48((unsigned char *)&dev->node_guid,

+(const char *)&dev->func0->conf.macaddr.a);
+
  memdev_root = object_resolve_path("/objects", NULL);
  if (memdev_root) {
  object_child_foreach(memdev_root, pvrdma_check_ram_shared, 
&ram_shared);


Reviewed-by: Marcel Apfelbaum

Thanks,
Marcel

Re: [Qemu-devel] [PATCH v3 15/23] hw/pvrdma: Make device state depend on Ethernet function state

2018-11-17 Thread Marcel Apfelbaum





On 11/13/18 9:13 AM, Yuval Shaia wrote:

User should be able to control the device by changing Ethernet function
state so if user runs 'ifconfig ens3 down' the PVRDMA function should be
down as well.

Signed-off-by: Yuval Shaia 
---
  hw/rdma/vmw/pvrdma_cmd.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
index 2979582fac..0d3c818c20 100644
--- a/hw/rdma/vmw/pvrdma_cmd.c
+++ b/hw/rdma/vmw/pvrdma_cmd.c
@@ -139,7 +139,8 @@ static int query_port(PVRDMADev *dev, union pvrdma_cmd_req 
*req,
  resp->hdr.ack = PVRDMA_CMD_QUERY_PORT_RESP;
  resp->hdr.err = 0;
  
-resp->attrs.state = attrs.state;

+resp->attrs.state = dev->func0->device_active ? attrs.state :
+PVRDMA_PORT_DOWN;
  resp->attrs.max_mtu = attrs.max_mtu;
  resp->attrs.active_mtu = attrs.active_mtu;
  resp->attrs.phys_state = attrs.phys_state;


Reviewed-by: Marcel Apfelbaum

Thanks,
Marcel

Re: [Qemu-devel] [PATCH v3 16/23] hw/pvrdma: Fill all CQE fields

2018-11-17 Thread Marcel Apfelbaum





On 11/13/18 9:13 AM, Yuval Shaia wrote:

Add ability to pass specific WC attributes to CQE such as GRH_BIT flag.

Signed-off-by: Yuval Shaia 
---
  hw/rdma/rdma_backend.c  | 59 +++--
  hw/rdma/rdma_backend.h  |  4 +--
  hw/rdma/vmw/pvrdma_qp_ops.c | 31 +++
  3 files changed, 58 insertions(+), 36 deletions(-)

diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
index 5675504165..e453bda8f9 100644
--- a/hw/rdma/rdma_backend.c
+++ b/hw/rdma/rdma_backend.c
@@ -59,13 +59,24 @@ struct backend_umad {
  char mad[RDMA_MAX_PRIVATE_DATA];
  };
  
-static void (*comp_handler)(int status, unsigned int vendor_err, void *ctx);

+static void (*comp_handler)(void *ctx, struct ibv_wc *wc);
  
-static void dummy_comp_handler(int status, unsigned int vendor_err, void *ctx)

+static void dummy_comp_handler(void *ctx, struct ibv_wc *wc)
  {
  pr_err("No completion handler is registered\n");
  }
  
+static inline void complete_work(enum ibv_wc_status status, uint32_t vendor_err,

+ void *ctx)
+{
+struct ibv_wc wc = {0};
+
+wc.status = status;
+wc.vendor_err = vendor_err;
+
+comp_handler(ctx, &wc);
+}
+
  static void poll_cq(RdmaDeviceResources *rdma_dev_res, struct ibv_cq *ibcq)
  {
  int i, ne;
@@ -90,7 +101,7 @@ static void poll_cq(RdmaDeviceResources *rdma_dev_res, 
struct ibv_cq *ibcq)
  }
  pr_dbg("Processing %s CQE\n", bctx->is_tx_req ? "send" : "recv");
  
-comp_handler(wc[i].status, wc[i].vendor_err, bctx->up_ctx);

+comp_handler(bctx->up_ctx, &wc[i]);
  
  rdma_rm_dealloc_cqe_ctx(rdma_dev_res, wc[i].wr_id);

  g_free(bctx);
@@ -184,8 +195,8 @@ static void start_comp_thread(RdmaBackendDev *backend_dev)
 comp_handler_thread, backend_dev, 
QEMU_THREAD_DETACHED);
  }
  
-void rdma_backend_register_comp_handler(void (*handler)(int status,

-unsigned int vendor_err, void *ctx))
+void rdma_backend_register_comp_handler(void (*handler)(void *ctx,
+ struct ibv_wc *wc))
  {
  comp_handler = handler;
  }
@@ -369,14 +380,14 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
  if (!qp->ibqp) { /* This field does not get initialized for QP0 and QP1 */
  if (qp_type == IBV_QPT_SMI) {
  pr_dbg("QP0 unsupported\n");
-comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx);
+complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx);
  } else if (qp_type == IBV_QPT_GSI) {
  pr_dbg("QP1\n");
  rc = mad_send(backend_dev, sgid_idx, sgid, sge, num_sge);
  if (rc) {
-comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx);
+complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx);
  } else {
-comp_handler(IBV_WC_SUCCESS, 0, ctx);
+complete_work(IBV_WC_SUCCESS, 0, ctx);
  }
  }
  return;
@@ -385,7 +396,7 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
  pr_dbg("num_sge=%d\n", num_sge);
  if (!num_sge) {
  pr_dbg("num_sge=0\n");
-comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NO_SGE, ctx);
+complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NO_SGE, ctx);
  return;
  }
  
@@ -396,21 +407,21 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,

  rc = rdma_rm_alloc_cqe_ctx(backend_dev->rdma_dev_res, &bctx_id, bctx);
  if (unlikely(rc)) {
  pr_dbg("Failed to allocate cqe_ctx\n");
-comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
+complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_NOMEM, ctx);
  goto out_free_bctx;
  }
  
  rc = build_host_sge_array(backend_dev->rdma_dev_res, new_sge, sge, num_sge);

  if (rc) {
  pr_dbg("Error: Failed to build host SGE array\n");
-comp_handler(IBV_WC_GENERAL_ERR, rc, ctx);
+complete_work(IBV_WC_GENERAL_ERR, rc, ctx);
  goto out_dealloc_cqe_ctx;
  }
  
  if (qp_type == IBV_QPT_UD) {

  wr.wr.ud.ah = create_ah(backend_dev, qp->ibpd, sgid_idx, dgid);
  if (!wr.wr.ud.ah) {
-comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
+complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
  goto out_dealloc_cqe_ctx;
  }
  wr.wr.ud.remote_qpn = dqpn;
@@ -428,7 +439,7 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
  if (rc) {
  pr_dbg("Fail (%d, %d) to post send WQE to qpn %d\n", rc, errno,
  qp->ibqp->qp_num);
-comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
+complete_work(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
  goto out_dealloc_cqe_ctx;
  }
  
@@ -497,13 +508,13 @@ void rdma_ba

Re: [Qemu-devel] [PATCH v3 17/23] hw/pvrdma: Fill error code in command's response

2018-11-17 Thread Marcel Apfelbaum





On 11/13/18 9:13 AM, Yuval Shaia wrote:

Driver checks error code let's set it.

Signed-off-by: Yuval Shaia 
---
  hw/rdma/vmw/pvrdma_cmd.c | 67 
  1 file changed, 48 insertions(+), 19 deletions(-)

diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
index 0d3c818c20..a326c5d470 100644
--- a/hw/rdma/vmw/pvrdma_cmd.c
+++ b/hw/rdma/vmw/pvrdma_cmd.c
@@ -131,7 +131,8 @@ static int query_port(PVRDMADev *dev, union pvrdma_cmd_req 
*req,
  
  if (rdma_backend_query_port(&dev->backend_dev,

  (struct ibv_port_attr *)&attrs)) {
-return -ENOMEM;
+resp->hdr.err = -ENOMEM;
+goto out;
  }
  
  memset(resp, 0, sizeof(*resp));

@@ -150,7 +151,9 @@ static int query_port(PVRDMADev *dev, union pvrdma_cmd_req 
*req,
  resp->attrs.active_width = 1;
  resp->attrs.active_speed = 1;
  
-return 0;

+out:
+pr_dbg("ret=%d\n", resp->hdr.err);
+return resp->hdr.err;
  }
  
  static int query_pkey(PVRDMADev *dev, union pvrdma_cmd_req *req,

@@ -170,7 +173,7 @@ static int query_pkey(PVRDMADev *dev, union pvrdma_cmd_req 
*req,
  resp->pkey = PVRDMA_PKEY;
  pr_dbg("pkey=0x%x\n", resp->pkey);
  
-return 0;

+return resp->hdr.err;
  }
  
  static int create_pd(PVRDMADev *dev, union pvrdma_cmd_req *req,

@@ -200,7 +203,9 @@ static int destroy_pd(PVRDMADev *dev, union pvrdma_cmd_req 
*req,
  
  rdma_rm_dealloc_pd(&dev->rdma_dev_res, cmd->pd_handle);
  
-return 0;

+rsp->hdr.err = 0;


Is it possible to ensure err is 0 by default during hdr creation
instead of manually setting it every time?

Thanks,
Marcel


+
+return rsp->hdr.err;
  }
  
  static int create_mr(PVRDMADev *dev, union pvrdma_cmd_req *req,

@@ -251,7 +256,9 @@ static int destroy_mr(PVRDMADev *dev, union pvrdma_cmd_req 
*req,
  
  rdma_rm_dealloc_mr(&dev->rdma_dev_res, cmd->mr_handle);
  
-return 0;

+rsp->hdr.err = 0;
+
+return rsp->hdr.err;
  }
  
  static int create_cq_ring(PCIDevice *pci_dev , PvrdmaRing **ring,

@@ -353,7 +360,8 @@ static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req 
*req,
  cq = rdma_rm_get_cq(&dev->rdma_dev_res, cmd->cq_handle);
  if (!cq) {
  pr_dbg("Invalid CQ handle\n");
-return -EINVAL;
+rsp->hdr.err = -EINVAL;
+goto out;
  }
  
  ring = (PvrdmaRing *)cq->opaque;

@@ -364,7 +372,11 @@ static int destroy_cq(PVRDMADev *dev, union pvrdma_cmd_req 
*req,
  
  rdma_rm_dealloc_cq(&dev->rdma_dev_res, cmd->cq_handle);
  
-return 0;

+rsp->hdr.err = 0;
+
+out:
+pr_dbg("ret=%d\n", rsp->hdr.err);
+return rsp->hdr.err;
  }
  
  static int create_qp_rings(PCIDevice *pci_dev, uint64_t pdir_dma,

@@ -553,7 +565,8 @@ static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req 
*req,
  qp = rdma_rm_get_qp(&dev->rdma_dev_res, cmd->qp_handle);
  if (!qp) {
  pr_dbg("Invalid QP handle\n");
-return -EINVAL;
+rsp->hdr.err = -EINVAL;
+goto out;
  }
  
  rdma_rm_dealloc_qp(&dev->rdma_dev_res, cmd->qp_handle);

@@ -567,7 +580,11 @@ static int destroy_qp(PVRDMADev *dev, union pvrdma_cmd_req 
*req,
  rdma_pci_dma_unmap(PCI_DEVICE(dev), ring->ring_state, TARGET_PAGE_SIZE);
  g_free(ring);
  
-return 0;

+rsp->hdr.err = 0;
+
+out:
+pr_dbg("ret=%d\n", rsp->hdr.err);
+return rsp->hdr.err;
  }
  
  static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req *req,

@@ -580,7 +597,8 @@ static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req 
*req,
  pr_dbg("index=%d\n", cmd->index);
  
  if (cmd->index >= MAX_PORT_GIDS) {

-return -EINVAL;
+rsp->hdr.err = -EINVAL;
+goto out;
  }
  
  pr_dbg("gid[%d]=0x%llx,0x%llx\n", cmd->index,

@@ -590,10 +608,15 @@ static int create_bind(PVRDMADev *dev, union 
pvrdma_cmd_req *req,
  rc = rdma_rm_add_gid(&dev->rdma_dev_res, &dev->backend_dev,
   dev->backend_eth_device_name, gid, cmd->index);
  if (rc < 0) {
-return -EINVAL;
+rsp->hdr.err = rc;
+goto out;
  }
  
-return 0;

+rsp->hdr.err = 0;
+
+out:
+pr_dbg("ret=%d\n", rsp->hdr.err);
+return rsp->hdr.err;
  }
  
  static int destroy_bind(PVRDMADev *dev, union pvrdma_cmd_req *req,

@@ -606,7 +629,8 @@ static int destroy_bind(PVRDMADev *dev, union 
pvrdma_cmd_req *req,
  pr_dbg("index=%d\n", cmd->index);
  
  if (cmd->index >= MAX_PORT_GIDS) {

-return -EINVAL;
+rsp->hdr.err = -EINVAL;
+goto out;
  }
  
  rc = rdma_rm_del_gid(&dev->rdma_dev_res, &dev->backend_dev,

@@ -617,7 +641,11 @@ static int destroy_bind(PVRDMADev *dev, union 
pvrdma_cmd_req *req,
  goto out;
  }
  
-return 0;

+rsp->hdr.err = 0;
+
+out:
+pr_dbg("ret=%d\n", rsp->hdr.err);
+return rsp->hdr.err;
  }
  
  static int create_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,

@@ -634,9 +662,8 @@ static int create_uc(PVRDMA

Re: [Qemu-devel] [PATCH v3 18/23] hw/rdma: Remove unneeded code that handles more that one port

2018-11-17 Thread Marcel Apfelbaum





On 11/13/18 9:13 AM, Yuval Shaia wrote:

Device supports only one port, let's remove a dead code that handles
more than one port.

Signed-off-by: Yuval Shaia 
---
  hw/rdma/rdma_rm.c  | 34 --
  hw/rdma/rdma_rm.h  |  2 +-
  hw/rdma/rdma_rm_defs.h |  4 ++--
  3 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
index fe0979415d..0a5ab8935a 100644
--- a/hw/rdma/rdma_rm.c
+++ b/hw/rdma/rdma_rm.c
@@ -545,7 +545,7 @@ int rdma_rm_add_gid(RdmaDeviceResources *dev_res, 
RdmaBackendDev *backend_dev,
  return -EINVAL;
  }
  
-memcpy(&dev_res->ports[0].gid_tbl[gid_idx].gid, gid, sizeof(*gid));

+memcpy(&dev_res->port.gid_tbl[gid_idx].gid, gid, sizeof(*gid));
  
  return 0;

  }
@@ -556,15 +556,15 @@ int rdma_rm_del_gid(RdmaDeviceResources *dev_res, 
RdmaBackendDev *backend_dev,
  int rc;
  
  rc = rdma_backend_del_gid(backend_dev, ifname,

-  &dev_res->ports[0].gid_tbl[gid_idx].gid);
+  &dev_res->port.gid_tbl[gid_idx].gid);
  if (rc < 0) {
  pr_dbg("Fail to delete gid\n");
  return -EINVAL;
  }
  
-memset(dev_res->ports[0].gid_tbl[gid_idx].gid.raw, 0,

-   sizeof(dev_res->ports[0].gid_tbl[gid_idx].gid));
-dev_res->ports[0].gid_tbl[gid_idx].backend_gid_index = -1;
+memset(dev_res->port.gid_tbl[gid_idx].gid.raw, 0,
+   sizeof(dev_res->port.gid_tbl[gid_idx].gid));
+dev_res->port.gid_tbl[gid_idx].backend_gid_index = -1;
  
  return 0;

  }
@@ -577,16 +577,16 @@ int rdma_rm_get_backend_gid_index(RdmaDeviceResources 
*dev_res,
  return -EINVAL;
  }
  
-if (unlikely(dev_res->ports[0].gid_tbl[sgid_idx].backend_gid_index == -1)) {

-dev_res->ports[0].gid_tbl[sgid_idx].backend_gid_index =
+if (unlikely(dev_res->port.gid_tbl[sgid_idx].backend_gid_index == -1)) {
+dev_res->port.gid_tbl[sgid_idx].backend_gid_index =
  rdma_backend_get_gid_index(backend_dev,
-   
&dev_res->ports[0].gid_tbl[sgid_idx].gid);
+   &dev_res->port.gid_tbl[sgid_idx].gid);
  }
  
  pr_dbg("backend_gid_index=%d\n",

-   dev_res->ports[0].gid_tbl[sgid_idx].backend_gid_index);
+   dev_res->port.gid_tbl[sgid_idx].backend_gid_index);
  
-return dev_res->ports[0].gid_tbl[sgid_idx].backend_gid_index;

+return dev_res->port.gid_tbl[sgid_idx].backend_gid_index;
  }
  
  static void destroy_qp_hash_key(gpointer data)

@@ -596,15 +596,13 @@ static void destroy_qp_hash_key(gpointer data)
  
  static void init_ports(RdmaDeviceResources *dev_res)

  {
-int i, j;
+int i;
  
-memset(dev_res->ports, 0, sizeof(dev_res->ports));

+memset(&dev_res->port, 0, sizeof(dev_res->port));
  
-for (i = 0; i < MAX_PORTS; i++) {

-dev_res->ports[i].state = IBV_PORT_DOWN;
-for (j = 0; j < MAX_PORT_GIDS; j++) {
-dev_res->ports[i].gid_tbl[j].backend_gid_index = -1;
-}
+dev_res->port.state = IBV_PORT_DOWN;
+for (i = 0; i < MAX_PORT_GIDS; i++) {
+dev_res->port.gid_tbl[i].backend_gid_index = -1;
  }
  }
  
@@ -613,7 +611,7 @@ static void fini_ports(RdmaDeviceResources *dev_res,

  {
  int i;
  
-dev_res->ports[0].state = IBV_PORT_DOWN;

+dev_res->port.state = IBV_PORT_DOWN;
  for (i = 0; i < MAX_PORT_GIDS; i++) {
  rdma_rm_del_gid(dev_res, backend_dev, ifname, i);
  }
diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h
index a7169b4e89..3c602c04c0 100644
--- a/hw/rdma/rdma_rm.h
+++ b/hw/rdma/rdma_rm.h
@@ -79,7 +79,7 @@ int rdma_rm_get_backend_gid_index(RdmaDeviceResources 
*dev_res,
  static inline union ibv_gid *rdma_rm_get_gid(RdmaDeviceResources *dev_res,
   int sgid_idx)
  {
-return &dev_res->ports[0].gid_tbl[sgid_idx].gid;
+return &dev_res->port.gid_tbl[sgid_idx].gid;
  }
  
  #endif

diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
index 7b3435f991..0ba61d1838 100644
--- a/hw/rdma/rdma_rm_defs.h
+++ b/hw/rdma/rdma_rm_defs.h
@@ -18,7 +18,7 @@
  
  #include "rdma_backend_defs.h"
  
-#define MAX_PORTS 1

+#define MAX_PORTS 1 /* Do not change - we support only one port */
  #define MAX_PORT_GIDS 255
  #define MAX_GIDS  MAX_PORT_GIDS
  #define MAX_PORT_PKEYS1
@@ -97,7 +97,7 @@ typedef struct RdmaRmPort {
  } RdmaRmPort;
  
  typedef struct RdmaDeviceResources {

-RdmaRmPort ports[MAX_PORTS];
+RdmaRmPort port;
  RdmaRmResTbl pd_tbl;
  RdmaRmResTbl mr_tbl;
  RdmaRmResTbl uc_tbl;


Reviewed-by: Marcel Apfelbaum

Thanks,
Marcel

Re: [Qemu-devel] [PATCH v3 20/23] hw/pvrdma: Clean device's resource when system is shutdown

2018-11-17 Thread Marcel Apfelbaum





On 11/13/18 9:13 AM, Yuval Shaia wrote:

In order to clean some external resources such as GIDs, QPs etc,
register to receive notification when VM is shutdown.

Signed-off-by: Yuval Shaia 
---
  hw/rdma/vmw/pvrdma.h  |  2 ++
  hw/rdma/vmw/pvrdma_main.c | 12 
  2 files changed, 14 insertions(+)

diff --git a/hw/rdma/vmw/pvrdma.h b/hw/rdma/vmw/pvrdma.h
index 10a3c4fb7c..ffae36986e 100644
--- a/hw/rdma/vmw/pvrdma.h
+++ b/hw/rdma/vmw/pvrdma.h
@@ -17,6 +17,7 @@
  #define PVRDMA_PVRDMA_H
  
  #include "qemu/units.h"

+#include "qemu/notify.h"
  #include "hw/pci/pci.h"
  #include "hw/pci/msix.h"
  #include "chardev/char-fe.h"
@@ -87,6 +88,7 @@ typedef struct PVRDMADev {
  RdmaDeviceResources rdma_dev_res;
  CharBackend mad_chr;
  VMXNET3State *func0;
+Notifier shutdown_notifier;
  } PVRDMADev;
  #define PVRDMA_DEV(dev) OBJECT_CHECK(PVRDMADev, (dev), PVRDMA_HW_NAME)
  
diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c

index 95e9322b7c..45a59cddf9 100644
--- a/hw/rdma/vmw/pvrdma_main.c
+++ b/hw/rdma/vmw/pvrdma_main.c
@@ -24,6 +24,7 @@
  #include "hw/qdev-properties.h"
  #include "cpu.h"
  #include "trace.h"
+#include "sysemu/sysemu.h"
  
  #include "../rdma_rm.h"

  #include "../rdma_backend.h"
@@ -559,6 +560,14 @@ static int pvrdma_check_ram_shared(Object *obj, void 
*opaque)
  return 0;
  }
  
+static void pvrdma_shutdown_notifier(Notifier *n, void *opaque)

+{
+PVRDMADev *dev = container_of(n, PVRDMADev, shutdown_notifier);
+PCIDevice *pci_dev = PCI_DEVICE(dev);
+
+pvrdma_fini(pci_dev);
+}
+
  static void pvrdma_realize(PCIDevice *pdev, Error **errp)
  {
  int rc;
@@ -623,6 +632,9 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp)
  goto out;
  }
  
+dev->shutdown_notifier.notify = pvrdma_shutdown_notifier;

+qemu_register_shutdown_notifier(&dev->shutdown_notifier);
+
  out:
  if (rc) {
  error_append_hint(errp, "Device fail to load\n");


Reviewed-by: Marcel Apfelbaum

Thanks,
Marcel

Re: [Qemu-devel] [PATCH v3 21/23] hw/rdma: Do not use bitmap_zero_extend to free bitmap

2018-11-17 Thread Marcel Apfelbaum





On 11/13/18 9:13 AM, Yuval Shaia wrote:

bitmap_zero_extend is designed to work for extending, not for
shrinking.
Using g_free instead.

Signed-off-by: Yuval Shaia 
---
  hw/rdma/rdma_rm.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
index 0a5ab8935a..35a96d9a64 100644
--- a/hw/rdma/rdma_rm.c
+++ b/hw/rdma/rdma_rm.c
@@ -43,7 +43,7 @@ static inline void res_tbl_free(RdmaRmResTbl *tbl)
  {
  qemu_mutex_destroy(&tbl->lock);
  g_free(tbl->tbl);
-bitmap_zero_extend(tbl->bitmap, tbl->tbl_sz, 0);
+g_free(tbl->bitmap);
  }
  
  static inline void *res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle)


Reviewed-by: Marcel Apfelbaum

Thanks,
Marcel

Re: [Qemu-devel] [PATCH v3 22/23] hw/rdma: Do not call rdma_backend_del_gid on an empty gid

2018-11-17 Thread Marcel Apfelbaum





On 11/13/18 9:13 AM, Yuval Shaia wrote:

When device goes down the function fini_ports loops over all entries in
gid table regardless of the fact whether entry is valid or not. In case
that entry is not valid we'd like to skip from any further processing in
backend device.

Signed-off-by: Yuval Shaia 
---
  hw/rdma/rdma_rm.c | 4 
  1 file changed, 4 insertions(+)

diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
index 35a96d9a64..e3f6b2f6ea 100644
--- a/hw/rdma/rdma_rm.c
+++ b/hw/rdma/rdma_rm.c
@@ -555,6 +555,10 @@ int rdma_rm_del_gid(RdmaDeviceResources *dev_res, 
RdmaBackendDev *backend_dev,
  {
  int rc;
  
+if (!dev_res->port.gid_tbl[gid_idx].gid.global.interface_id) {

+return 0;
+}
+
  rc = rdma_backend_del_gid(backend_dev, ifname,
&dev_res->port.gid_tbl[gid_idx].gid);
  if (rc < 0) {


Reviewed-by: Marcel Apfelbaum

Thanks,
Marcel

Re: [Qemu-devel] [PATCH v3 23/23] docs: Update pvrdma device documentation

2018-11-17 Thread Marcel Apfelbaum





On 11/13/18 9:13 AM, Yuval Shaia wrote:

Interface with the device is changed with the addition of support for
MAD packets.
Adjust documentation accordingly.

While there fix a minor mistake which may lead to think that there is a
relation between using RXE on host and the compatibility with bare-metal
peers.

Signed-off-by: Yuval Shaia 
---
  docs/pvrdma.txt | 103 +++-
  1 file changed, 84 insertions(+), 19 deletions(-)

diff --git a/docs/pvrdma.txt b/docs/pvrdma.txt
index 5599318159..9e8d1674b7 100644
--- a/docs/pvrdma.txt
+++ b/docs/pvrdma.txt
@@ -9,8 +9,9 @@ It works with its Linux Kernel driver AS IS, no need for any 
special guest
  modifications.
  
  While it complies with the VMware device, it can also communicate with bare

-metal RDMA-enabled machines and does not require an RDMA HCA in the host, it
-can work with Soft-RoCE (rxe).
+metal RDMA-enabled machines as peers.
+
+It does not require an RDMA HCA in the host, it can work with Soft-RoCE (rxe).
  
  It does not require the whole guest RAM to be pinned allowing memory

  over-commit and, even if not implemented yet, migration support will be
@@ -78,29 +79,93 @@ the required RDMA libraries.
  
  3. Usage

  
+
+
+3.1 VM Memory settings
+==
  Currently the device is working only with memory backed RAM
  and it must be mark as "shared":
 -m 1G \
 -object memory-backend-ram,id=mb1,size=1G,share \
 -numa node,memdev=mb1 \
  
-The pvrdma device is composed of two functions:

- - Function 0 is a vmxnet Ethernet Device which is redundant in Guest
-   but is required to pass the ibdevice GID using its MAC.
-   Examples:
- For an rxe backend using eth0 interface it will use its mac:
-   -device vmxnet3,addr=.0,multifunction=on,mac=
- For an SRIOV VF, we take the Ethernet Interface exposed by it:
-   -device vmxnet3,multifunction=on,mac=
- - Function 1 is the actual device:
-   -device 
pvrdma,addr=.1,backend-dev=,backend-gid-idx=,backend-port=
-   where the ibdevice can be rxe or RDMA VF (e.g. mlx5_4)
- Note: Pay special attention that the GID at backend-gid-idx matches vmxnet's 
MAC.
- The rules of conversion are part of the RoCE spec, but since manual conversion
- is not required, spotting problems is not hard:
-Example: GID: fe80::::7efe:90ff:fecb:743a
- MAC: 7c:fe:90:cb:74:3a
-Note the difference between the first byte of the MAC and the GID.
+
+3.2 MAD Multiplexer
+===
+MAD Multiplexer is a service that exposes MAD-like interface for VMs in
+order to overcome the limitation where only single entity can register with
+MAD layer to send and receive RDMA-CM MAD packets.
+
+To build rdmacm-mux run
+# make rdmacm-mux
+
+The program accepts 3 command line arguments and exposes a UNIX socket to
+be used to relay control and data messages to and from the service.
+-s unix-socket-path   Path to unix socket to listen on
+  (default /var/run/rdmacm-mux)
+-d rdma-device-name   Name of RDMA device to register with
+  (default rxe0)
+-p rdma-device-port   Port number of RDMA device to register with
+  (default 1)
+The final UNIX socket file name is a concatenation of the 3 arguments so
+for example for device name mlx5_0 and port 2 the file
+/var/run/rdmacm-mux-mlx5_0-2 will be created.
+
+Please refer to contrib/rdmacm-mux for more details.
+
+
+3.3 PCI devices settings
+
+RoCE device exposes two functions - Ethernet and RDMA.
+To support it, pvrdma device is composed of two PCI functions, an Ethernet
+device of type vmxnet3 on PCI slot 0 and a pvrdma device on PCI slot 1. The
+Ethernet function can be used for other Ethernet purposes such as IP.
+
+
+3.4 Device parameters
+=
+- netdev: Specifies the Ethernet device on host. For Soft-RoCE (rxe) this
+  would be the Ethernet device used to create it. For any other physical
+  RoCE device this would be the netdev name of the device.


I didn't understand, can you please elaborate? We need the ibdev,
this is clear, but what is the "ethernet device on host", how do
we get it and how it is used?

Thanks,
Marcel


+- ibdev: The IB device name on host for example rxe0, mlx5_0 etc.
+- mad-chardev: The name of the MAD multiplexer char device.
+- ibport: In case of multi-port device (such as Mellanox's HCA) this
+  specify the port to use. If not set 1 will be used.
+- dev-caps-max-mr-size: The maximum size of MR.
+- dev-caps-max-qp: Maximum number of QPs.
+- dev-caps-max-sge: Maximum number of SGE elements in WR.
+- dev-caps-max-cq: Maximum number of CQs.
+- dev-caps-max-mr: Maximum number of MRs.
+- dev-caps-max-pd: Maximum number of PDs.
+- dev-caps-max-ah: Maximum number of AHs.
+
+Notes:
+- The first 3 parameters are mandatory settings, the rest have their
+  defaults.
+- The last 8 parameters (the ones that prefixed by dev-caps) defines the top
+  limits but

Re: [Qemu-devel] [PATCH v3 11/23] hw/pvrdma: Add support to allow guest to configure GID table

2018-11-17 Thread Marcel Apfelbaum





On 11/13/18 9:13 AM, Yuval Shaia wrote:

The control over the RDMA device's GID table is done by updating the
device's Ethernet function addresses.
Usually the first GID entry is determine by the MAC address, the second


s/determine/determined


by the first IPv6 address and the third by the IPv4 address. Other
entries can be added by adding more IP addresses. The opposite is the
same, i.e. whenever an address is removed, the corresponding GID entry
is removed.

The process is done by the network and RDMA stacks. Whenever an address
is added the ib_core driver is notified and calls the device driver
add_gid function which in turn update the device.

To support this in pvrdma device we need to hook into the create_bind
and destroy_bind HW commands triggered by pvrdma driver in guest.
Whenever a changed is made to the pvrdma device's GID table a special

without 'a'


QMP messages is sent to be processed by libvirt to update the address of
the backend Ethernet device.


So the device can't be used without libvirt? How can we
use it anyway only with QEMU ?



Signed-off-by: Yuval Shaia 
---
  hw/rdma/rdma_backend.c  | 243 +++-


rdma_backend.c is gettting larger...


  hw/rdma/rdma_backend.h  |  22 ++--
  hw/rdma/rdma_backend_defs.h |   3 +-
  hw/rdma/rdma_rm.c   | 104 ++-
  hw/rdma/rdma_rm.h   |  17 ++-
  hw/rdma/rdma_rm_defs.h  |   9 +-
  hw/rdma/rdma_utils.h|  15 +++
  hw/rdma/vmw/pvrdma.h|   2 +-
  hw/rdma/vmw/pvrdma_cmd.c|  55 
  hw/rdma/vmw/pvrdma_main.c   |  25 +---
  hw/rdma/vmw/pvrdma_qp_ops.c |  20 +++
  11 files changed, 370 insertions(+), 145 deletions(-)

diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
index 3eb0099f8d..5675504165 100644
--- a/hw/rdma/rdma_backend.c
+++ b/hw/rdma/rdma_backend.c
@@ -18,12 +18,14 @@
  #include "qapi/error.h"
  #include "qapi/qmp/qlist.h"
  #include "qapi/qmp/qnum.h"
+#include "qapi/qapi-events-rdma.h"
  
  #include 

  #include 
  #include 
  #include 
  
+#include "contrib/rdmacm-mux/rdmacm-mux.h"

  #include "trace.h"
  #include "rdma_utils.h"
  #include "rdma_rm.h"
@@ -300,11 +302,11 @@ static int build_host_sge_array(RdmaDeviceResources 
*rdma_dev_res,
  return 0;
  }
  
-static int mad_send(RdmaBackendDev *backend_dev, struct ibv_sge *sge,

-uint32_t num_sge)
+static int mad_send(RdmaBackendDev *backend_dev, uint8_t sgid_idx,
+union ibv_gid *sgid, struct ibv_sge *sge, uint32_t num_sge)
  {
-struct backend_umad umad = {0};
-char *hdr, *msg;
+RdmaCmMuxMsg msg = {0};
+char *hdr, *data;
  int ret;
  
  pr_dbg("num_sge=%d\n", num_sge);

@@ -313,41 +315,50 @@ static int mad_send(RdmaBackendDev *backend_dev, struct 
ibv_sge *sge,
  return -EINVAL;
  }
  
-umad.hdr.length = sge[0].length + sge[1].length;

-pr_dbg("msg_len=%d\n", umad.hdr.length);
+msg.hdr.msg_type = RDMACM_MUX_MSG_TYPE_MAD;
+memcpy(msg.hdr.sgid.raw, sgid->raw, sizeof(msg.hdr.sgid));
  
-if (umad.hdr.length > sizeof(umad.mad)) {

+msg.umad_len = sge[0].length + sge[1].length;
+pr_dbg("umad_len=%d\n", msg.umad_len);
+
+if (msg.umad_len > sizeof(msg.umad.mad)) {
  return -ENOMEM;
  }
  
-umad.hdr.addr.qpn = htobe32(1);

-umad.hdr.addr.grh_present = 1;
-umad.hdr.addr.gid_index = backend_dev->backend_gid_idx;
-memcpy(umad.hdr.addr.gid, backend_dev->gid.raw, sizeof(umad.hdr.addr.gid));
-umad.hdr.addr.hop_limit = 1;
+msg.umad.hdr.addr.qpn = htobe32(1);
+msg.umad.hdr.addr.grh_present = 1;
+pr_dbg("sgid_idx=%d\n", sgid_idx);
+pr_dbg("sgid=0x%llx\n", sgid->global.interface_id);
+msg.umad.hdr.addr.gid_index = sgid_idx;
+memcpy(msg.umad.hdr.addr.gid, sgid->raw, sizeof(msg.umad.hdr.addr.gid));
+msg.umad.hdr.addr.hop_limit = 1;
  
  hdr = rdma_pci_dma_map(backend_dev->dev, sge[0].addr, sge[0].length);

-msg = rdma_pci_dma_map(backend_dev->dev, sge[1].addr, sge[1].length);
+data = rdma_pci_dma_map(backend_dev->dev, sge[1].addr, sge[1].length);
+
+pr_dbg_buf("mad_hdr", hdr, sge[0].length);
+pr_dbg_buf("mad_data", data, sge[1].length);
  
-memcpy(&umad.mad[0], hdr, sge[0].length);

-memcpy(&umad.mad[sge[0].length], msg, sge[1].length);
+memcpy(&msg.umad.mad[0], hdr, sge[0].length);
+memcpy(&msg.umad.mad[sge[0].length], data, sge[1].length);
  
-rdma_pci_dma_unmap(backend_dev->dev, msg, sge[1].length);

+rdma_pci_dma_unmap(backend_dev->dev, data, sge[1].length);
  rdma_pci_dma_unmap(backend_dev->dev, hdr, sge[0].length);
  
-ret = qemu_chr_fe_write(backend_dev->mad_chr_be, (const uint8_t *)&umad,

-sizeof(umad));
+ret = qemu_chr_fe_write(backend_dev->mad_chr_be, (const uint8_t *)&msg,
+sizeof(msg));
  
  pr_dbg("qemu_chr_fe_write=%d\n", ret);
  
-return (ret != sizeof(umad));

+return (ret != sizeof(msg));
  }

Re: [Qemu-devel] [PATCH V2] migration/colo.c: Fix compilation issue when disable replication

2018-11-17 Thread Zhang Chen

On Sat, Nov 17, 2018, 2:29 AM Dr. David Alan Gilbert  * Zhang Chen (zhangc...@gmail.com) wrote:
> > On Wed, Nov 14, 2018 at 7:17 PM Peter Maydell 
> > wrote:
> >
> > > On 14 November 2018 at 11:06, Thomas Huth  wrote:
> > > > On 2018-11-14 11:47, Peter Xu wrote:
> > > >> On Thu, Nov 01, 2018 at 10:12:26AM +0800, Zhang Chen wrote:
> > > >>> This compilation issue will occur when user use
> --disable-replication
> > > >>> to config Qemu.
> > > >>>
> > > >>> Reported-by: Thomas Huth 
> > > >>> Signed-off-by: Zhang Chen 
> > > >>
> > > >> Hi,
> > > >>
> > > >> How's the status of this patch?  Are we gonna merge it for 3.1?
> > > >>
> > > >> I just posted a similar one without knowing this (until Dave pointed
> > > >> it out).  IMHO it can be a good candidate for 3.1.
> > > >
> > > > Maybe Peter Maydell could apply this directly to the repo as a built
> fix?
> > >
> > > I'd rather it just went through the migration tree, really...
> > > it isn't actually breaking any of our travis jobs or other CI.
> > >
> > >
> > Hi Dave,
> >
> > What do you think about peter's comments?
>
> Yep, that's fine, as Peter Xu says, Juan's picking it up for the next
> one so we should be OK.
>
>
>
OK~ Thanks Peter Xu and Dave.

Zhang Chen



> Dave
>
> > Thanks
> > Zhang Chen
> >
> >
> > > thanks
> > > -- PMM
> > >
> --
> Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
>

Re: [Qemu-devel] [PATCH v2 4/6] target/mips: Fix decoding mechanism of special R5900 opcodes

2018-11-17 Thread Aleksandar Markovic

> From: Fredrik Noring 
> Subject: [PATCH v2 4/6] target/mips: Fix decoding mechanism of special R5900 
> opcodes
> 
> MOVN, MOVZ, MFHI, MFLO, MTHI, MTLO, MULT, MULTU, DIV, DIVU, DMULT,
> DMULTU, DDIV, DDIVU and JR are decoded in decode_opc_special_tx79
> instead of the generic decode_opc_special_legacy.
> 
> Signed-off-by: Fredrik Noring 
> ---

Reviewed-by: Aleksandar Markovic 

with caveat that this should be resolved in 3.1+.

Re: [Qemu-devel] [PATCH v2 6/6] target/mips: Guard check_insn with INSN_R5900 check

2018-11-17 Thread Aleksandar Markovic

> From: Fredrik Noring 
> Subject: [PATCH v2 6/6] target/mips: Guard check_insn with INSN_R5900 check

Reviewed-by: Aleksandar Markovic 

Some minor changes will be made before integrating.

Re: [Qemu-devel] [PATCH v2 5/6] target/mips: Guard check_insn_opc_user_only with INSN_R5900 check

2018-11-17 Thread Aleksandar Markovic

> From: Fredrik Noring 
> Subject: [PATCH v2 5/6] target/mips: Guard check_insn_opc_user_only with 
> INSN_R5900 check

Reviewed-by: Aleksandar Markovic 

Some minor changes will be made before integrating.

Re: [Qemu-devel] [PATCH v2 1/6] target/mips: Fix decoding mechanism of R5900 MFLO1, MFHI1, MTLO1 and MTHI1

2018-11-17 Thread Aleksandar Markovic

> From: Fredrik Noring 
> Subject: [PATCH v2 1/6] target/mips: Fix decoding mechanism of R5900 MFLO1, 
> MFHI1, MTLO1 and MTHI1

Reviewed-by: Aleksandar Markovic

[Qemu-devel] [PULL 01/11] linux-user: Update MIPS specific prctl() implementation

2018-11-17 Thread Aleksandar Markovic

From: Stefan Markovic 

Perform needed checks before actual prctl() PR_SET_FP_MODE and
PR_GET_FP_MODE work based on kernel implementation. Also, update
necessary hflags.

Signed-off-by: Stefan Markovic 
Reviewed-by: Laurent Vivier 
---
 linux-user/syscall.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 5c16692..280137d 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -9554,9 +9554,25 @@ static abi_long do_syscall1(void *cpu_env, int num, 
abi_long arg1,
 {
 CPUMIPSState *env = ((CPUMIPSState *)cpu_env);
 bool old_fr = env->CP0_Status & (1 << CP0St_FR);
+bool old_fre = env->CP0_Config5 & (1 << CP0C5_FRE);
 bool new_fr = arg2 & TARGET_PR_FP_MODE_FR;
 bool new_fre = arg2 & TARGET_PR_FP_MODE_FRE;
 
+const unsigned int known_bits = TARGET_PR_FP_MODE_FR |
+TARGET_PR_FP_MODE_FRE;
+
+/* If nothing to change, return right away, successfully.  */
+if (old_fr == new_fr && old_fre == new_fre) {
+return 0;
+}
+/* Check the value is valid */
+if (arg2 & ~known_bits) {
+return -TARGET_EOPNOTSUPP;
+}
+/* Setting FRE without FR is not supported.  */
+if (new_fre && !new_fr) {
+return -TARGET_EOPNOTSUPP;
+}
 if (new_fr && !(env->active_fpu.fcr0 & (1 << FCR0_F64))) {
 /* FR1 is not supported */
 return -TARGET_EOPNOTSUPP;
@@ -9586,6 +9602,7 @@ static abi_long do_syscall1(void *cpu_env, int num, 
abi_long arg1,
 env->hflags |= MIPS_HFLAG_F64;
 } else {
 env->CP0_Status &= ~(1 << CP0St_FR);
+env->hflags &= ~MIPS_HFLAG_F64;
 }
 if (new_fre) {
 env->CP0_Config5 |= (1 << CP0C5_FRE);
@@ -9594,6 +9611,7 @@ static abi_long do_syscall1(void *cpu_env, int num, 
abi_long arg1,
 }
 } else {
 env->CP0_Config5 &= ~(1 << CP0C5_FRE);
+env->hflags &= ~MIPS_HFLAG_FRE;
 }
 
 return 0;
-- 
2.7.4

[Qemu-devel] [PULL 00/11] MIPS queue for November 2018 (for QEMU 3.1-rc2)

2018-11-17 Thread Aleksandar Markovic

From: Aleksandar Markovic 

The following changes since commit 83c496599cc04926ecbc3e47a37debaa3e38b686:

  Merge remote-tracking branch 
'remotes/kraxel/tags/fixes-31-20181116-pull-request' into staging (2018-11-16 
13:53:01 +)

are available in the git repository at:

  https://github.com/AMarkovic/qemu tags/mips-queue-november-2018

for you to fetch changes up to b6ac8eed1ca277a6ed59a61bb7bd4785190990f6:

  MAINTAINERS: Add Stefan Markovic as a MIPS reviewer (2018-11-17 16:17:08 
+0100)


MIPS queue for November 2018

  - fix MIPS-specific prctl() handling in linux-user
  - fix some issues of R5900 support
  - update MAINTAINERS wrt. MIPS reviewer



Aleksandar Markovic (5):
  target/mips: Rename MMI-related masks
  target/mips: Rename MMI-related opcodes
  target/mips: Rename MMI-related functions
  target/mips: Disable R5900 support
  MAINTAINERS: Add Stefan Markovic as a MIPS reviewer

Fredrik Noring (5):
  target/mips: Fix decoding mechanism of R5900 MFLO1, MFHI1, MTLO1 and
MTHI1
  target/mips: Fix decoding mechanism of R5900 DIV1 and DIVU1
  target/mips: Fix decoding mechanism of special R5900 opcodes
  target/mips: Guard check_insn_opc_user_only with INSN_R5900 check
  target/mips: Guard check_insn with INSN_R5900 check

Stefan Markovic (1):
  linux-user: Update MIPS specific prctl() implementation

 MAINTAINERS  |   9 +
 linux-user/syscall.c |  18 +
 target/mips/translate.c  | 707 +++
 target/mips/translate_init.inc.c |  59 
 4 files changed, 450 insertions(+), 343 deletions(-)

-- 
2.7.4

[Qemu-devel] [PULL 10/11] target/mips: Disable R5900 support

2018-11-17 Thread Aleksandar Markovic

From: Aleksandar Markovic 

Disable R5900 support. There are some outstanding issues related
to ABI support and emulation accuracy, that were not understood
well during review process. Disable to avoid backward compatibility
issues.

Reverts commit ed4f49ba9bb56ebca6987b1083255daf6c89b5de.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Aleksandar Markovic 
---
 target/mips/translate_init.inc.c | 59 
 1 file changed, 59 deletions(-)

diff --git a/target/mips/translate_init.inc.c b/target/mips/translate_init.inc.c
index 85da4a2..acab097 100644
--- a/target/mips/translate_init.inc.c
+++ b/target/mips/translate_init.inc.c
@@ -411,65 +411,6 @@ const mips_def_t mips_defs[] =
 .mmu_type = MMU_TYPE_R4000,
 },
 {
-/*
- * The Toshiba TX System RISC TX79 Core Architecture manual
- *
- * https://wiki.qemu.org/File:C790.pdf
- *
- * describes the C790 processor that is a follow-up to the R5900.
- * There are a few notable differences in that the R5900 FPU
- *
- * - is not IEEE 754-1985 compliant,
- * - does not implement double format, and
- * - its machine code is nonstandard.
- */
-.name = "R5900",
-.CP0_PRid = 0x2E00,
-/* No L2 cache, icache size 32k, dcache size 32k, uncached coherency. 
*/
-.CP0_Config0 = (0x3 << 9) | (0x3 << 6) | (0x2 << CP0C0_K0),
-.CP0_Status_rw_bitmask = 0xF4C79C1F,
-#ifdef CONFIG_USER_ONLY
-/*
- * R5900 hardware traps to the Linux kernel for IEEE 754-1985 and LL/SC
- * emulation. For user only, QEMU is the kernel, so we emulate the 
traps
- * by simply emulating the instructions directly.
- *
- * Note: Config1 is only used internally, the R5900 has only Config0.
- */
-.CP0_Config1 = (1 << CP0C1_FP) | (47 << CP0C1_MMU),
-.CP0_LLAddr_rw_bitmask = 0x,
-.CP0_LLAddr_shift = 4,
-.CP1_fcr0 = (0x38 << FCR0_PRID) | (0x0 << FCR0_REV),
-.CP1_fcr31 = 0,
-.CP1_fcr31_rw_bitmask = 0x0183,
-#else
-/*
- * The R5900 COP1 FPU implements single-precision floating-point
- * operations but is not entirely IEEE 754-1985 compatible. In
- * particular,
- *
- * - NaN (not a number) and +/- infinities are not supported;
- * - exception mechanisms are not fully supported;
- * - denormalized numbers are not supported;
- * - rounding towards nearest and +/- infinities are not supported;
- * - computed results usually differs in the least significant bit;
- * - saturations can differ more than the least significant bit.
- *
- * Since only rounding towards zero is supported, the two least
- * significant bits of FCR31 are hardwired to 01.
- *
- * FPU emulation is disabled here until it is implemented.
- *
- * Note: Config1 is only used internally, the R5900 has only Config0.
- */
-.CP0_Config1 = (47 << CP0C1_MMU),
-#endif /* !CONFIG_USER_ONLY */
-.SEGBITS = 32,
-.PABITS = 32,
-.insn_flags = CPU_R5900 | ASE_MMI,
-.mmu_type = MMU_TYPE_R4000,
-},
-{
 /* A generic CPU supporting MIPS32 Release 6 ISA.
FIXME: Support IEEE 754-2008 FP.
   Eventually this should be replaced by a real CPU model. */
-- 
2.7.4

[Qemu-devel] [PULL 11/11] MAINTAINERS: Add Stefan Markovic as a MIPS reviewer

2018-11-17 Thread Aleksandar Markovic

From: Aleksandar Markovic 

Add Stefan Markovic as a MIPS reviewer. He had several key
contributions to QEMU for MIPS this year. He is a meticulous
person with the ability to think and act on many levels.

Reviewed-by: Stefan Markovic 
Signed-off-by: Aleksandar Markovic 
---
 MAINTAINERS | 9 +
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 4b8db61..f718264 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -190,6 +190,7 @@ F: disas/microblaze.c
 MIPS
 M: Aurelien Jarno 
 M: Aleksandar Markovic 
+R: Stefan Markovic 
 S: Maintained
 F: target/mips/
 F: hw/mips/
@@ -336,6 +337,7 @@ F: target/arm/kvm.c
 
 MIPS
 M: James Hogan 
+R: Stefan Markovic 
 S: Maintained
 F: target/mips/kvm.c
 
@@ -741,27 +743,32 @@ MIPS Machines
 -
 Jazz
 M: Hervé Poussineau 
+R: Stefan Markovic 
 S: Maintained
 F: hw/mips/mips_jazz.c
 
 Malta
 M: Aurelien Jarno 
+R: Stefan Markovic 
 S: Maintained
 F: hw/mips/mips_malta.c
 
 Mipssim
 M: Aleksandar Markovic 
+R: Stefan Markovic 
 S: Odd Fixes
 F: hw/mips/mips_mipssim.c
 F: hw/net/mipsnet.c
 
 R4000
 M: Aurelien Jarno 
+R: Stefan Markovic 
 S: Maintained
 F: hw/mips/mips_r4k.c
 
 Fulong 2E
 M: Aleksandar Markovic 
+R: Stefan Markovic 
 S: Odd Fixes
 F: hw/mips/mips_fulong2e.c
 F: hw/isa/vt82c686.c
@@ -770,6 +777,7 @@ F: include/hw/isa/vt82c686.h
 
 Boston
 M: Paul Burton 
+R: Stefan Markovic 
 S: Maintained
 F: hw/core/loader-fit.c
 F: hw/mips/boston.c
@@ -1992,6 +2000,7 @@ F: disas/i386.c
 
 MIPS target
 M: Aurelien Jarno 
+R: Stefan Markovic 
 S: Maintained
 F: tcg/mips/
 F: disas/mips.c
-- 
2.7.4

[Qemu-devel] [PULL 04/11] target/mips: Fix decoding mechanism of special R5900 opcodes

2018-11-17 Thread Aleksandar Markovic

From: Fredrik Noring 

MOVN, MOVZ, MFHI, MFLO, MTHI, MTLO, MULT, MULTU, DIV, DIVU, DMULT,
DMULTU, DDIV, DDIVU and JR are decoded in decode_opc_special_tx79
instead of the generic decode_opc_special_legacy.

Reviewed-by: Aleksandar Markovic 
Signed-off-by: Fredrik Noring 
---
 target/mips/translate.c | 54 +
 1 file changed, 50 insertions(+), 4 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 3ddd700..a21b277 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -23863,6 +23863,53 @@ static void decode_opc_special_r6(CPUMIPSState *env, 
DisasContext *ctx)
 }
 }
 
+static void decode_opc_special_tx79(CPUMIPSState *env, DisasContext *ctx)
+{
+int rs = extract32(ctx->opcode, 21, 5);
+int rt = extract32(ctx->opcode, 16, 5);
+int rd = extract32(ctx->opcode, 11, 5);
+uint32_t op1 = MASK_SPECIAL(ctx->opcode);
+
+switch (op1) {
+case OPC_MOVN: /* Conditional move */
+case OPC_MOVZ:
+gen_cond_move(ctx, op1, rd, rs, rt);
+break;
+case OPC_MFHI:  /* Move from HI/LO */
+case OPC_MFLO:
+gen_HILO(ctx, op1, 0, rd);
+break;
+case OPC_MTHI:
+case OPC_MTLO:  /* Move to HI/LO */
+gen_HILO(ctx, op1, 0, rs);
+break;
+case OPC_MULT:
+case OPC_MULTU:
+gen_mul_txx9(ctx, op1, rd, rs, rt);
+break;
+case OPC_DIV:
+case OPC_DIVU:
+gen_muldiv(ctx, op1, 0, rs, rt);
+break;
+#if defined(TARGET_MIPS64)
+case OPC_DMULT:
+case OPC_DMULTU:
+case OPC_DDIV:
+case OPC_DDIVU:
+check_insn_opc_user_only(ctx, INSN_R5900);
+gen_muldiv(ctx, op1, 0, rs, rt);
+break;
+#endif
+case OPC_JR:
+gen_compute_branch(ctx, op1, 4, rs, 0, 0, 4);
+break;
+default:/* Invalid */
+MIPS_INVAL("special_tx79");
+generate_exception_end(ctx, EXCP_RI);
+break;
+}
+}
+
 static void decode_opc_special_legacy(CPUMIPSState *env, DisasContext *ctx)
 {
 int rs, rt, rd, sa;
@@ -23878,7 +23925,7 @@ static void decode_opc_special_legacy(CPUMIPSState 
*env, DisasContext *ctx)
 case OPC_MOVN: /* Conditional move */
 case OPC_MOVZ:
 check_insn(ctx, ISA_MIPS4 | ISA_MIPS32 |
-   INSN_LOONGSON2E | INSN_LOONGSON2F | INSN_R5900);
+   INSN_LOONGSON2E | INSN_LOONGSON2F);
 gen_cond_move(ctx, op1, rd, rs, rt);
 break;
 case OPC_MFHI:  /* Move from HI/LO */
@@ -23905,8 +23952,6 @@ static void decode_opc_special_legacy(CPUMIPSState 
*env, DisasContext *ctx)
 check_insn(ctx, INSN_VR54XX);
 op1 = MASK_MUL_VR54XX(ctx->opcode);
 gen_mul_vr54xx(ctx, op1, rd, rs, rt);
-} else if (ctx->insn_flags & INSN_R5900) {
-gen_mul_txx9(ctx, op1, rd, rs, rt);
 } else {
 gen_muldiv(ctx, op1, rd & 3, rs, rt);
 }
@@ -23921,7 +23966,6 @@ static void decode_opc_special_legacy(CPUMIPSState 
*env, DisasContext *ctx)
 case OPC_DDIV:
 case OPC_DDIVU:
 check_insn(ctx, ISA_MIPS3);
-check_insn_opc_user_only(ctx, INSN_R5900);
 check_mips_64(ctx);
 gen_muldiv(ctx, op1, 0, rs, rt);
 break;
@@ -24148,6 +24192,8 @@ static void decode_opc_special(CPUMIPSState *env, 
DisasContext *ctx)
 default:
 if (ctx->insn_flags & ISA_MIPS32R6) {
 decode_opc_special_r6(env, ctx);
+} else if (ctx->insn_flags & INSN_R5900) {
+decode_opc_special_tx79(env, ctx);
 } else {
 decode_opc_special_legacy(env, ctx);
 }
-- 
2.7.4

[Qemu-devel] [PULL 02/11] target/mips: Fix decoding mechanism of R5900 MFLO1, MFHI1, MTLO1 and MTHI1

2018-11-17 Thread Aleksandar Markovic

From: Fredrik Noring 

MFLO1, MFHI1, MTLO1 and MTHI1 are generated in gen_HILO1_tx79 instead of
the generic gen_HILO.

Reviewed-by: Aleksandar Markovic 
Signed-off-by: Fredrik Noring 
---
 target/mips/translate.c | 51 ++---
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 60320cb..8601333 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -4359,24 +4359,56 @@ static void gen_shift(DisasContext *ctx, uint32_t opc,
 tcg_temp_free(t1);
 }
 
+/* Copy GPR to and from TX79 HI1/LO1 register. */
+static void gen_HILO1_tx79(DisasContext *ctx, uint32_t opc, int reg)
+{
+if (reg == 0 && (opc == TX79_MMI_MFHI1 || opc == TX79_MMI_MFLO1)) {
+/* Treat as NOP. */
+return;
+}
+
+switch (opc) {
+case TX79_MMI_MFHI1:
+tcg_gen_mov_tl(cpu_gpr[reg], cpu_HI[1]);
+break;
+case TX79_MMI_MFLO1:
+tcg_gen_mov_tl(cpu_gpr[reg], cpu_LO[1]);
+break;
+case TX79_MMI_MTHI1:
+if (reg != 0) {
+tcg_gen_mov_tl(cpu_HI[1], cpu_gpr[reg]);
+} else {
+tcg_gen_movi_tl(cpu_HI[1], 0);
+}
+break;
+case TX79_MMI_MTLO1:
+if (reg != 0) {
+tcg_gen_mov_tl(cpu_LO[1], cpu_gpr[reg]);
+} else {
+tcg_gen_movi_tl(cpu_LO[1], 0);
+}
+break;
+default:
+MIPS_INVAL("mfthilo1 TX79");
+generate_exception_end(ctx, EXCP_RI);
+break;
+}
+}
+
 /* Arithmetic on HI/LO registers */
 static void gen_HILO(DisasContext *ctx, uint32_t opc, int acc, int reg)
 {
-if (reg == 0 && (opc == OPC_MFHI || opc == TX79_MMI_MFHI1 ||
- opc == OPC_MFLO || opc == TX79_MMI_MFLO1)) {
+if (reg == 0 && (opc == OPC_MFHI || opc == OPC_MFLO)) {
 /* Treat as NOP. */
 return;
 }
 
 if (acc != 0) {
-if (!(ctx->insn_flags & INSN_R5900)) {
-check_dsp(ctx);
-}
+check_dsp(ctx);
 }
 
 switch (opc) {
 case OPC_MFHI:
-case TX79_MMI_MFHI1:
 #if defined(TARGET_MIPS64)
 if (acc != 0) {
 tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_HI[acc]);
@@ -4387,7 +4419,6 @@ static void gen_HILO(DisasContext *ctx, uint32_t opc, int 
acc, int reg)
 }
 break;
 case OPC_MFLO:
-case TX79_MMI_MFLO1:
 #if defined(TARGET_MIPS64)
 if (acc != 0) {
 tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_LO[acc]);
@@ -4398,7 +4429,6 @@ static void gen_HILO(DisasContext *ctx, uint32_t opc, int 
acc, int reg)
 }
 break;
 case OPC_MTHI:
-case TX79_MMI_MTHI1:
 if (reg != 0) {
 #if defined(TARGET_MIPS64)
 if (acc != 0) {
@@ -4413,7 +4443,6 @@ static void gen_HILO(DisasContext *ctx, uint32_t opc, int 
acc, int reg)
 }
 break;
 case OPC_MTLO:
-case TX79_MMI_MTLO1:
 if (reg != 0) {
 #if defined(TARGET_MIPS64)
 if (acc != 0) {
@@ -26500,11 +26529,11 @@ static void decode_tx79_mmi(CPUMIPSState *env, 
DisasContext *ctx)
 break;
 case TX79_MMI_MTLO1:
 case TX79_MMI_MTHI1:
-gen_HILO(ctx, opc, 1, rs);
+gen_HILO1_tx79(ctx, opc, rs);
 break;
 case TX79_MMI_MFLO1:
 case TX79_MMI_MFHI1:
-gen_HILO(ctx, opc, 1, rd);
+gen_HILO1_tx79(ctx, opc, rd);
 break;
 case TX79_MMI_MADD:  /* TODO: TX79_MMI_MADD */
 case TX79_MMI_MADDU: /* TODO: TX79_MMI_MADDU */
-- 
2.7.4

[Qemu-devel] [PULL 03/11] target/mips: Fix decoding mechanism of R5900 DIV1 and DIVU1

2018-11-17 Thread Aleksandar Markovic

From: Fredrik Noring 

DIV1 and DIVU1 are generated in gen_div1_tx79 instead of the generic
gen_muldiv.

Signed-off-by: Fredrik Noring 
Reviewed-by: Philippe Mathieu-Daudé 
---
 target/mips/translate.c | 65 -
 1 file changed, 59 insertions(+), 6 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 8601333..3ddd700 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -4743,6 +4743,63 @@ static void gen_r6_muldiv(DisasContext *ctx, int opc, 
int rd, int rs, int rt)
 tcg_temp_free(t1);
 }
 
+static void gen_div1_tx79(DisasContext *ctx, uint32_t opc, int rs, int rt)
+{
+TCGv t0, t1;
+
+t0 = tcg_temp_new();
+t1 = tcg_temp_new();
+
+gen_load_gpr(t0, rs);
+gen_load_gpr(t1, rt);
+
+switch (opc) {
+case TX79_MMI_DIV1:
+{
+TCGv t2 = tcg_temp_new();
+TCGv t3 = tcg_temp_new();
+tcg_gen_ext32s_tl(t0, t0);
+tcg_gen_ext32s_tl(t1, t1);
+tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
+tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
+tcg_gen_and_tl(t2, t2, t3);
+tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
+tcg_gen_or_tl(t2, t2, t3);
+tcg_gen_movi_tl(t3, 0);
+tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
+tcg_gen_div_tl(cpu_LO[1], t0, t1);
+tcg_gen_rem_tl(cpu_HI[1], t0, t1);
+tcg_gen_ext32s_tl(cpu_LO[1], cpu_LO[1]);
+tcg_gen_ext32s_tl(cpu_HI[1], cpu_HI[1]);
+tcg_temp_free(t3);
+tcg_temp_free(t2);
+}
+break;
+case TX79_MMI_DIVU1:
+{
+TCGv t2 = tcg_const_tl(0);
+TCGv t3 = tcg_const_tl(1);
+tcg_gen_ext32u_tl(t0, t0);
+tcg_gen_ext32u_tl(t1, t1);
+tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
+tcg_gen_divu_tl(cpu_LO[1], t0, t1);
+tcg_gen_remu_tl(cpu_HI[1], t0, t1);
+tcg_gen_ext32s_tl(cpu_LO[1], cpu_LO[1]);
+tcg_gen_ext32s_tl(cpu_HI[1], cpu_HI[1]);
+tcg_temp_free(t3);
+tcg_temp_free(t2);
+}
+break;
+default:
+MIPS_INVAL("div1 TX79");
+generate_exception_end(ctx, EXCP_RI);
+goto out;
+}
+ out:
+tcg_temp_free(t0);
+tcg_temp_free(t1);
+}
+
 static void gen_muldiv(DisasContext *ctx, uint32_t opc,
int acc, int rs, int rt)
 {
@@ -4755,14 +4812,11 @@ static void gen_muldiv(DisasContext *ctx, uint32_t opc,
 gen_load_gpr(t1, rt);
 
 if (acc != 0) {
-if (!(ctx->insn_flags & INSN_R5900)) {
-check_dsp(ctx);
-}
+check_dsp(ctx);
 }
 
 switch (opc) {
 case OPC_DIV:
-case TX79_MMI_DIV1:
 {
 TCGv t2 = tcg_temp_new();
 TCGv t3 = tcg_temp_new();
@@ -4784,7 +4838,6 @@ static void gen_muldiv(DisasContext *ctx, uint32_t opc,
 }
 break;
 case OPC_DIVU:
-case TX79_MMI_DIVU1:
 {
 TCGv t2 = tcg_const_tl(0);
 TCGv t3 = tcg_const_tl(1);
@@ -26525,7 +26578,7 @@ static void decode_tx79_mmi(CPUMIPSState *env, 
DisasContext *ctx)
 break;
 case TX79_MMI_DIV1:
 case TX79_MMI_DIVU1:
-gen_muldiv(ctx, opc, 1, rs, rt);
+gen_div1_tx79(ctx, opc, rs, rt);
 break;
 case TX79_MMI_MTLO1:
 case TX79_MMI_MTHI1:
-- 
2.7.4

[Qemu-devel] [PULL 09/11] target/mips: Rename MMI-related functions

2018-11-17 Thread Aleksandar Markovic

From: Aleksandar Markovic 

Rename MMI-related functions.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Stefan Markovic 
Signed-off-by: Aleksandar Markovic 
---
 target/mips/translate.c | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 98dc468..e9c23a5 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -26464,7 +26464,7 @@ static void decode_opc_special3_legacy(CPUMIPSState 
*env, DisasContext *ctx)
 }
 }
 
-static void decode_tx79_mmi0(CPUMIPSState *env, DisasContext *ctx)
+static void decode_mmi0(CPUMIPSState *env, DisasContext *ctx)
 {
 uint32_t opc = MASK_MMI0(ctx->opcode);
 
@@ -26503,7 +26503,7 @@ static void decode_tx79_mmi0(CPUMIPSState *env, 
DisasContext *ctx)
 }
 }
 
-static void decode_tx79_mmi1(CPUMIPSState *env, DisasContext *ctx)
+static void decode_mmi1(CPUMIPSState *env, DisasContext *ctx)
 {
 uint32_t opc = MASK_MMI1(ctx->opcode);
 
@@ -26535,7 +26535,7 @@ static void decode_tx79_mmi1(CPUMIPSState *env, 
DisasContext *ctx)
 }
 }
 
-static void decode_tx79_mmi2(CPUMIPSState *env, DisasContext *ctx)
+static void decode_mmi2(CPUMIPSState *env, DisasContext *ctx)
 {
 uint32_t opc = MASK_MMI2(ctx->opcode);
 
@@ -26571,7 +26571,7 @@ static void decode_tx79_mmi2(CPUMIPSState *env, 
DisasContext *ctx)
 }
 }
 
-static void decode_tx79_mmi3(CPUMIPSState *env, DisasContext *ctx)
+static void decode_mmi3(CPUMIPSState *env, DisasContext *ctx)
 {
 uint32_t opc = MASK_MMI3(ctx->opcode);
 
@@ -26598,7 +26598,7 @@ static void decode_tx79_mmi3(CPUMIPSState *env, 
DisasContext *ctx)
 }
 }
 
-static void decode_tx79_mmi(CPUMIPSState *env, DisasContext *ctx)
+static void decode_mmi(CPUMIPSState *env, DisasContext *ctx)
 {
 uint32_t opc = MASK_MMI(ctx->opcode);
 int rs = extract32(ctx->opcode, 21, 5);
@@ -26607,16 +26607,16 @@ static void decode_tx79_mmi(CPUMIPSState *env, 
DisasContext *ctx)
 
 switch (opc) {
 case MMI_OPC_CLASS_MMI0:
-decode_tx79_mmi0(env, ctx);
+decode_mmi0(env, ctx);
 break;
 case MMI_OPC_CLASS_MMI1:
-decode_tx79_mmi1(env, ctx);
+decode_mmi1(env, ctx);
 break;
 case MMI_OPC_CLASS_MMI2:
-decode_tx79_mmi2(env, ctx);
+decode_mmi2(env, ctx);
 break;
 case MMI_OPC_CLASS_MMI3:
-decode_tx79_mmi3(env, ctx);
+decode_mmi3(env, ctx);
 break;
 case MMI_OPC_MULT1:
 case MMI_OPC_MULTU1:
@@ -26656,12 +26656,12 @@ static void decode_tx79_mmi(CPUMIPSState *env, 
DisasContext *ctx)
 }
 }
 
-static void decode_tx79_lq(CPUMIPSState *env, DisasContext *ctx)
+static void gen_mmi_lq(CPUMIPSState *env, DisasContext *ctx)
 {
 generate_exception_end(ctx, EXCP_RI);/* TODO: MMI_OPC_LQ */
 }
 
-static void gen_tx79_sq(DisasContext *ctx, int base, int rt, int offset)
+static void gen_mmi_sq(DisasContext *ctx, int base, int rt, int offset)
 {
 generate_exception_end(ctx, EXCP_RI);/* TODO: MMI_OPC_SQ */
 }
@@ -26687,7 +26687,7 @@ static void gen_tx79_sq(DisasContext *ctx, int base, 
int rt, int offset)
  * In user mode, QEMU must verify the upper and lower 11 bits to distinguish
  * between SQ and RDHWR, as the Linux kernel does.
  */
-static void decode_tx79_sq(CPUMIPSState *env, DisasContext *ctx)
+static void decode_mmi_sq(CPUMIPSState *env, DisasContext *ctx)
 {
 int base = extract32(ctx->opcode, 21, 5);
 int rt = extract32(ctx->opcode, 16, 5);
@@ -26705,7 +26705,7 @@ static void decode_tx79_sq(CPUMIPSState *env, 
DisasContext *ctx)
 }
 #endif
 
-gen_tx79_sq(ctx, base, rt, offset);
+gen_mmi_sq(ctx, base, rt, offset);
 }
 
 static void decode_opc_special3(CPUMIPSState *env, DisasContext *ctx)
@@ -28014,7 +28014,7 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
 break;
 case OPC_SPECIAL2:
 if ((ctx->insn_flags & INSN_R5900) && (ctx->insn_flags & ASE_MMI)) {
-decode_tx79_mmi(env, ctx);
+decode_mmi(env, ctx);
 } else if (ctx->insn_flags & ASE_MXU) {
 decode_opc_mxu(env, ctx);
 } else {
@@ -28023,7 +28023,7 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
 break;
 case OPC_SPECIAL3:
 if (ctx->insn_flags & INSN_R5900) {
-decode_tx79_sq(env, ctx);/* MMI_OPC_SQ */
+decode_mmi_sq(env, ctx);/* MMI_OPC_SQ */
 } else {
 decode_opc_special3(env, ctx);
 }
@@ -28698,7 +28698,7 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
 break;
 case OPC_MSA: /* OPC_MDMX */
 if (ctx->insn_flags & INSN_R5900) {
-decode_tx79_lq(env, ctx);/* MMI_OPC_LQ */
+gen_mmi_lq(env, ctx);/* MMI_OPC_LQ */
 } else {
 /* MDMX: Not implemented. */
 gen_msa(env, ctx);
-- 
2.7.4

[Qemu-devel] [PULL 05/11] target/mips: Guard check_insn_opc_user_only with INSN_R5900 check

2018-11-17 Thread Aleksandar Markovic

From: Fredrik Noring 

Avoid using check_opc_user_only() as a decision making code wrt
various architectures. Use ctx->insn_flags checks instead.

Reviewed-by: Aleksandar Markovic 
Signed-off-by: Fredrik Noring 
---
 target/mips/translate.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index a21b277..c79da3c 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -28313,7 +28313,9 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
  break;
 case OPC_LL: /* Load and stores */
 check_insn(ctx, ISA_MIPS2);
-check_insn_opc_user_only(ctx, INSN_R5900);
+if (ctx->insn_flags & INSN_R5900) {
+check_insn_opc_user_only(ctx, INSN_R5900);
+}
 /* Fallthrough */
 case OPC_LWL:
 case OPC_LWR:
@@ -28339,7 +28341,9 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
 case OPC_SC:
 check_insn(ctx, ISA_MIPS2);
  check_insn_opc_removed(ctx, ISA_MIPS32R6);
-check_insn_opc_user_only(ctx, INSN_R5900);
+if (ctx->insn_flags & INSN_R5900) {
+check_insn_opc_user_only(ctx, INSN_R5900);
+}
  gen_st_cond(ctx, op, rt, rs, imm);
  break;
 case OPC_CACHE:
@@ -28607,7 +28611,9 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
 #if defined(TARGET_MIPS64)
 /* MIPS64 opcodes */
 case OPC_LLD:
-check_insn_opc_user_only(ctx, INSN_R5900);
+if (ctx->insn_flags & INSN_R5900) {
+check_insn_opc_user_only(ctx, INSN_R5900);
+}
 /* fall through */
 case OPC_LDL:
 case OPC_LDR:
@@ -28631,7 +28637,9 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
 case OPC_SCD:
 check_insn_opc_removed(ctx, ISA_MIPS32R6);
 check_insn(ctx, ISA_MIPS3);
-check_insn_opc_user_only(ctx, INSN_R5900);
+if (ctx->insn_flags & INSN_R5900) {
+check_insn_opc_user_only(ctx, INSN_R5900);
+}
 check_mips_64(ctx);
 gen_st_cond(ctx, op, rt, rs, imm);
 break;
-- 
2.7.4

[Qemu-devel] [PULL 07/11] target/mips: Rename MMI-related masks

2018-11-17 Thread Aleksandar Markovic

From: Aleksandar Markovic 

Rename MMI-related masks.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Stefan Markovic 
Signed-off-by: Aleksandar Markovic 
---
 target/mips/translate.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 714f2e6..12591c1 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -2201,7 +2201,7 @@ enum {
  *7 111 |   *   |   *   |   *   |   *   | PSLLW |   *   | PSRLW | PSRAW
  */
 
-#define MASK_TX79_MMI(op) (MASK_OP_MAJOR(op) | ((op) & 0x3F))
+#define MASK_MMI(op) (MASK_OP_MAJOR(op) | ((op) & 0x3F))
 enum {
 TX79_MMI_MADD   = 0x00 | TX79_CLASS_MMI, /* Same as OPC_MADD */
 TX79_MMI_MADDU  = 0x01 | TX79_CLASS_MMI, /* Same as OPC_MADDU */
@@ -2252,7 +2252,7 @@ enum {
  *7 111 |   *   |   *   | PEXT5 | PPAC5
  */
 
-#define MASK_TX79_MMI0(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
+#define MASK_MMI0(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
 enum {
 TX79_MMI0_PADDW  = (0x00 << 6) | TX79_MMI_CLASS_MMI0,
 TX79_MMI0_PSUBW  = (0x01 << 6) | TX79_MMI_CLASS_MMI0,
@@ -2303,7 +2303,7 @@ enum {
  *7 111 |   *   |   *   |   *   |   *
  */
 
-#define MASK_TX79_MMI1(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
+#define MASK_MMI1(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
 enum {
 TX79_MMI1_PABSW  = (0x01 << 6) | TX79_MMI_CLASS_MMI1,
 TX79_MMI1_PCEQW  = (0x02 << 6) | TX79_MMI_CLASS_MMI1,
@@ -2347,7 +2347,7 @@ enum {
  *7 111 | PMULTH| PDIVBW| PEXEW | PROT3W
  */
 
-#define MASK_TX79_MMI2(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
+#define MASK_MMI2(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
 enum {
 TX79_MMI2_PMADDW = (0x00 << 6) | TX79_MMI_CLASS_MMI2,
 TX79_MMI2_PSLLVW = (0x02 << 6) | TX79_MMI_CLASS_MMI2,
@@ -2395,7 +2395,7 @@ enum {
  *7 111 |   *   |   *   | PEXCW |   *
  */
 
-#define MASK_TX79_MMI3(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
+#define MASK_MMI3(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
 enum {
 TX79_MMI3_PMADDUW = (0x00 << 6) | TX79_MMI_CLASS_MMI3,
 TX79_MMI3_PSRAVW  = (0x03 << 6) | TX79_MMI_CLASS_MMI3,
@@ -26466,7 +26466,7 @@ static void decode_opc_special3_legacy(CPUMIPSState 
*env, DisasContext *ctx)
 
 static void decode_tx79_mmi0(CPUMIPSState *env, DisasContext *ctx)
 {
-uint32_t opc = MASK_TX79_MMI0(ctx->opcode);
+uint32_t opc = MASK_MMI0(ctx->opcode);
 
 switch (opc) {
 case TX79_MMI0_PADDW: /* TODO: TX79_MMI0_PADDW */
@@ -26505,7 +26505,7 @@ static void decode_tx79_mmi0(CPUMIPSState *env, 
DisasContext *ctx)
 
 static void decode_tx79_mmi1(CPUMIPSState *env, DisasContext *ctx)
 {
-uint32_t opc = MASK_TX79_MMI1(ctx->opcode);
+uint32_t opc = MASK_MMI1(ctx->opcode);
 
 switch (opc) {
 case TX79_MMI1_PABSW: /* TODO: TX79_MMI1_PABSW */
@@ -26537,7 +26537,7 @@ static void decode_tx79_mmi1(CPUMIPSState *env, 
DisasContext *ctx)
 
 static void decode_tx79_mmi2(CPUMIPSState *env, DisasContext *ctx)
 {
-uint32_t opc = MASK_TX79_MMI2(ctx->opcode);
+uint32_t opc = MASK_MMI2(ctx->opcode);
 
 switch (opc) {
 case TX79_MMI2_PMADDW:/* TODO: TX79_MMI2_PMADDW */
@@ -26573,7 +26573,7 @@ static void decode_tx79_mmi2(CPUMIPSState *env, 
DisasContext *ctx)
 
 static void decode_tx79_mmi3(CPUMIPSState *env, DisasContext *ctx)
 {
-uint32_t opc = MASK_TX79_MMI3(ctx->opcode);
+uint32_t opc = MASK_MMI3(ctx->opcode);
 
 switch (opc) {
 case TX79_MMI3_PMADDUW:/* TODO: TX79_MMI3_PMADDUW */
@@ -26600,7 +26600,7 @@ static void decode_tx79_mmi3(CPUMIPSState *env, 
DisasContext *ctx)
 
 static void decode_tx79_mmi(CPUMIPSState *env, DisasContext *ctx)
 {
-uint32_t opc = MASK_TX79_MMI(ctx->opcode);
+uint32_t opc = MASK_MMI(ctx->opcode);
 int rs = extract32(ctx->opcode, 21, 5);
 int rt = extract32(ctx->opcode, 16, 5);
 int rd = extract32(ctx->opcode, 11, 5);
-- 
2.7.4

[Qemu-devel] [PULL 06/11] target/mips: Guard check_insn with INSN_R5900 check

2018-11-17 Thread Aleksandar Markovic

From: Fredrik Noring 

Explicitely mark handling of PREF instruction for R5900 as
treating the same as NOP.

Reviewed-by: Aleksandar Markovic 
Signed-off-by: Fredrik Noring 
---
 target/mips/translate.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index c79da3c..714f2e6 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -28357,9 +28357,12 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
 break;
 case OPC_PREF:
 check_insn_opc_removed(ctx, ISA_MIPS32R6);
-check_insn(ctx, ISA_MIPS4 | ISA_MIPS32 |
-   INSN_R5900);
-/* Treat as NOP. */
+if (ctx->insn_flags & INSN_R5900) {
+/* Treat as NOP. */
+} else {
+check_insn(ctx, ISA_MIPS4 | ISA_MIPS32);
+/* Treat as NOP. */
+}
 break;
 
 /* Floating point (COP1). */
-- 
2.7.4

[Qemu-devel] [PULL 08/11] target/mips: Rename MMI-related opcodes

2018-11-17 Thread Aleksandar Markovic

From: Aleksandar Markovic 

Rename MMI-related opcodes.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Stefan Markovic 
Signed-off-by: Aleksandar Markovic 
---
 target/mips/translate.c | 472 
 1 file changed, 236 insertions(+), 236 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 12591c1..98dc468 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -2138,10 +2138,10 @@ enum {
  * MTSAH   rs, immediate Move Halfword Count to Shift Amount Register
  * PROT3W  rd, rtParallel Rotate 3 Words
  *
- * The TX79-specific Multimedia Instruction encodings
- * ==
+ * MMI (MultiMedia Instruction) encodings
+ * ==
  *
- * TX79 Multimedia Instruction encoding table keys:
+ * MMI instructions encoding table keys:
  *
  * *   This code is reserved for future use. An attempt to execute it
  * causes a Reserved Instruction exception.
@@ -2152,7 +2152,7 @@ enum {
  * DMULTU, DDIV, DDIVU, LL, LLD, SC, SCD, LWC2 and SWC2. An attempt
  * to execute it causes a Reserved Instruction exception.
  *
- * TX79 Multimedia Instructions encoded by opcode field (MMI, LQ, SQ):
+ * MMI instructions encoded by opcode field (MMI, LQ, SQ):
  *
  *  31260
  * +++
@@ -2174,13 +2174,13 @@ enum {
  */
 
 enum {
-TX79_CLASS_MMI = 0x1C << 26,/* Same as OPC_SPECIAL2 */
-TX79_LQ= 0x1E << 26,/* Same as OPC_MSA */
-TX79_SQ= 0x1F << 26,/* Same as OPC_SPECIAL3 */
+MMI_OPC_CLASS_MMI = 0x1C << 26,/* Same as OPC_SPECIAL2 */
+MMI_OPC_LQ= 0x1E << 26,/* Same as OPC_MSA */
+MMI_OPC_SQ= 0x1F << 26,/* Same as OPC_SPECIAL3 */
 };
 
 /*
- * TX79 Multimedia Instructions with opcode field = MMI:
+ * MMI instructions with opcode field = MMI:
  *
  *  3126 5  0
  * ++---++
@@ -2203,35 +2203,35 @@ enum {
 
 #define MASK_MMI(op) (MASK_OP_MAJOR(op) | ((op) & 0x3F))
 enum {
-TX79_MMI_MADD   = 0x00 | TX79_CLASS_MMI, /* Same as OPC_MADD */
-TX79_MMI_MADDU  = 0x01 | TX79_CLASS_MMI, /* Same as OPC_MADDU */
-TX79_MMI_PLZCW  = 0x04 | TX79_CLASS_MMI,
-TX79_MMI_CLASS_MMI0 = 0x08 | TX79_CLASS_MMI,
-TX79_MMI_CLASS_MMI2 = 0x09 | TX79_CLASS_MMI,
-TX79_MMI_MFHI1  = 0x10 | TX79_CLASS_MMI, /* Same minor as OPC_MFHI */
-TX79_MMI_MTHI1  = 0x11 | TX79_CLASS_MMI, /* Same minor as OPC_MTHI */
-TX79_MMI_MFLO1  = 0x12 | TX79_CLASS_MMI, /* Same minor as OPC_MFLO */
-TX79_MMI_MTLO1  = 0x13 | TX79_CLASS_MMI, /* Same minor as OPC_MTLO */
-TX79_MMI_MULT1  = 0x18 | TX79_CLASS_MMI, /* Same minor as OPC_MULT */
-TX79_MMI_MULTU1 = 0x19 | TX79_CLASS_MMI, /* Same minor as OPC_MULTU */
-TX79_MMI_DIV1   = 0x1A | TX79_CLASS_MMI, /* Same minor as OPC_DIV */
-TX79_MMI_DIVU1  = 0x1B | TX79_CLASS_MMI, /* Same minor as OPC_DIVU */
-TX79_MMI_MADD1  = 0x20 | TX79_CLASS_MMI,
-TX79_MMI_MADDU1 = 0x21 | TX79_CLASS_MMI,
-TX79_MMI_CLASS_MMI1 = 0x28 | TX79_CLASS_MMI,
-TX79_MMI_CLASS_MMI3 = 0x29 | TX79_CLASS_MMI,
-TX79_MMI_PMFHL  = 0x30 | TX79_CLASS_MMI,
-TX79_MMI_PMTHL  = 0x31 | TX79_CLASS_MMI,
-TX79_MMI_PSLLH  = 0x34 | TX79_CLASS_MMI,
-TX79_MMI_PSRLH  = 0x36 | TX79_CLASS_MMI,
-TX79_MMI_PSRAH  = 0x37 | TX79_CLASS_MMI,
-TX79_MMI_PSLLW  = 0x3C | TX79_CLASS_MMI,
-TX79_MMI_PSRLW  = 0x3E | TX79_CLASS_MMI,
-TX79_MMI_PSRAW  = 0x3F | TX79_CLASS_MMI,
+MMI_OPC_MADD   = 0x00 | MMI_OPC_CLASS_MMI, /* Same as OPC_MADD */
+MMI_OPC_MADDU  = 0x01 | MMI_OPC_CLASS_MMI, /* Same as OPC_MADDU */
+MMI_OPC_PLZCW  = 0x04 | MMI_OPC_CLASS_MMI,
+MMI_OPC_CLASS_MMI0 = 0x08 | MMI_OPC_CLASS_MMI,
+MMI_OPC_CLASS_MMI2 = 0x09 | MMI_OPC_CLASS_MMI,
+MMI_OPC_MFHI1  = 0x10 | MMI_OPC_CLASS_MMI, /* Same minor as OPC_MFHI */
+MMI_OPC_MTHI1  = 0x11 | MMI_OPC_CLASS_MMI, /* Same minor as OPC_MTHI */
+MMI_OPC_MFLO1  = 0x12 | MMI_OPC_CLASS_MMI, /* Same minor as OPC_MFLO */
+MMI_OPC_MTLO1  = 0x13 | MMI_OPC_CLASS_MMI, /* Same minor as OPC_MTLO */
+MMI_OPC_MULT1  = 0x18 | MMI_OPC_CLASS_MMI, /* Same minor as OPC_MULT */
+MMI_OPC_MULTU1 = 0x19 | MMI_OPC_CLASS_MMI, /* Same min. as OPC_MULTU */
+MMI_OPC_DIV1   = 0x1A | MMI_OPC_CLASS_MMI, /* Same minor as OPC_DIV  */
+MMI_OPC_DIVU1  = 0x1B | MMI_OPC_CLASS_MMI, /* Same minor as OPC_DIVU */
+MMI_OPC_MADD1  = 0x20 | MMI_OPC_CLASS_MMI,
+MMI_OPC_MADDU1 = 0x21 | MMI_OPC_CLASS_MMI,
+MMI_OPC_CLASS_MMI1 = 0x28 | MMI_OPC_CLASS_MMI,
+MMI_OPC_CLASS_MMI3 = 0x29 | MMI_OPC_CLASS_MMI,
+MMI_OPC_PMFHL  = 0x30 | MMI_OPC_CLASS_MMI,
+MMI_OPC_PMT

[Qemu-devel] [PATCH] target/arm: fix smc incorrectly trapping to EL3 when secure is off

2018-11-17 Thread Luc Michel

This commit fixes a case where the CPU would try to go to EL3 when
executing an smc instruction, even though ARM_FEATURE_EL3 is false. This
case is raised when the PSCI conduit is set to smc, but the smc
instruction does not lead to a valid PSCI call.

QEMU crashes with an assertion failure latter on because of incoherent
mmu_idx.

This commit refactors the pre_smc helper by enumerating all the possible
way of handling an scm instruction, and covering the previously missing
case leading to the crash.

The following minimal test would crash before this commit:

.global _start
.text
_start:
ldr x0, =0xdeadbeef  ; invalid PSCI call
smc #0

run with the following command line:

aarch64-linux-gnu-gcc -nostdinc -nostdlib -Wl,-Ttext=4000 \
  -o test test.s

qemu-system-aarch64 -M virt,virtualization=on,secure=off \
-cpu cortex-a57 -kernel test

Signed-off-by: Luc Michel 
---
 target/arm/op_helper.c | 54 +++---
 1 file changed, 46 insertions(+), 8 deletions(-)

diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index eb6fb82fb8..0d6e89e474 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -937,43 +937,81 @@ void HELPER(pre_hvc)(CPUARMState *env)
 void HELPER(pre_smc)(CPUARMState *env, uint32_t syndrome)
 {
 ARMCPU *cpu = arm_env_get_cpu(env);
 int cur_el = arm_current_el(env);
 bool secure = arm_is_secure(env);
-bool smd = env->cp15.scr_el3 & SCR_SMD;
+bool smd_flag = env->cp15.scr_el3 & SCR_SMD;
+
+/*
+ * SMC behaviour is summarized in the following table.
+ * This helper handles the "Trap to EL2" and "Undef insn" cases.
+ * The "Trap to EL3" and "PSCI call" cases are handled in the exception
+ * helper.
+ *
+ *  -> ARM_FEATURE_EL3 and !SMD
+ *   HCR_TSC && NS EL1   !HCR_TSC || !NS EL1
+ *
+ *  Conduit SMC, valid call  Trap to EL2 PSCI Call
+ *  Conduit SMC, inval call  Trap to EL2 Trap to EL3
+ *  Conduit not SMC  Trap to EL2 Trap to EL3
+ *
+ *
+ *  -> ARM_FEATURE_EL3 and SMD
+ *   HCR_TSC && NS EL1   !HCR_TSC || !NS EL1
+ *
+ *  Conduit SMC, valid call  Trap to EL2 PSCI Call
+ *  Conduit SMC, inval call  Trap to EL2 Undef insn
+ *  Conduit not SMC  Trap to EL2 Undef insn
+ *
+ *
+ *  -> !ARM_FEATURE_EL3
+ *   HCR_TSC && NS EL1   !HCR_TSC || !NS EL1
+ *
+ *  Conduit SMC, valid call  Trap to EL2 PSCI Call
+ *  Conduit SMC, inval call  Trap to EL2 Undef insn
+ *  Conduit not SMC  Undef insn  Undef insn
+ */
+
 /* On ARMv8 with EL3 AArch64, SMD applies to both S and NS state.
  * On ARMv8 with EL3 AArch32, or ARMv7 with the Virtualization
  *  extensions, SMD only applies to NS state.
  * On ARMv7 without the Virtualization extensions, the SMD bit
  * doesn't exist, but we forbid the guest to set it to 1 in scr_write(),
  * so we need not special case this here.
  */
-bool undef = arm_feature(env, ARM_FEATURE_AARCH64) ? smd : smd && !secure;
+bool smd = arm_feature(env, ARM_FEATURE_AARCH64) ? smd_flag
+ : smd_flag && !secure;
 
 if (!arm_feature(env, ARM_FEATURE_EL3) &&
 cpu->psci_conduit != QEMU_PSCI_CONDUIT_SMC) {
 /* If we have no EL3 then SMC always UNDEFs and can't be
  * trapped to EL2. PSCI-via-SMC is a sort of ersatz EL3
  * firmware within QEMU, and we want an EL2 guest to be able
  * to forbid its EL1 from making PSCI calls into QEMU's
  * "firmware" via HCR.TSC, so for these purposes treat
  * PSCI-via-SMC as implying an EL3.
+ * This handles the very last line of the previous table.
  */
-undef = true;
-} else if (!secure && cur_el == 1 && (env->cp15.hcr_el2 & HCR_TSC)) {
+raise_exception(env, EXCP_UDEF, syn_uncategorized(),
+exception_target_el(env));
+}
+
+if (!secure && cur_el == 1 && (env->cp15.hcr_el2 & HCR_TSC)) {
 /* In NS EL1, HCR controlled routing to EL2 has priority over SMD.
  * We also want an EL2 guest to be able to forbid its EL1 from
  * making PSCI calls into QEMU's "firmware" via HCR.TSC.
+ * This handles all the "Trap to EL2" cases of the previous table.
  */
 raise_exception(env, EXCP_HYP_TRAP, syndrome, 2);
 }
 
-/* If PSCI is enabled and this looks like a valid PSCI call then
- * suppress the UNDEF -- we'll catch the SMC exception and
- * implement the PSCI call behaviour there.
+/* Catch the two remaining "Undef insn" cases of the previous table:
+ *- PSCI conduit is SMC but we don't have a valid PCSI call,
+ *- We don't have EL3 or SMD is set.
  */
-if (undef &

Re: [Qemu-devel] [PATCH] hw/arm/sysbus-fdt: fix assert in match function

2018-11-17 Thread Auger Eric

Hi Peng,

On 11/17/18 8:22 PM, Peng Hao wrote:
> In match function it should not call OBJECK_CHECK. When there is
> a mismatch, we should continue to match rather than assert().

Normally this issue should have been fixed by
e9ac8e84f0  "hw/arm/sysbus-fdt: Only call match_fn callback if the type
matches". Please can you confirm?

Thanks

Eric
> 
> Signed-off-by: Peng Hao 
> ---
>  hw/arm/sysbus-fdt.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c
> index 0e24c80..41b962d 100644
> --- a/hw/arm/sysbus-fdt.c
> +++ b/hw/arm/sysbus-fdt.c
> @@ -419,10 +419,15 @@ static int add_amd_xgbe_fdt_node(SysBusDevice *sbdev, 
> void *opaque)
>  static bool vfio_platform_match(SysBusDevice *sbdev,
>  const BindingEntry *entry)
>  {
> -VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(sbdev);
> +VFIOPlatformDevice *vdev;
>  const char *compat;
>  unsigned int n;
>  
> +vdev = (VFIOPlatformDevice *) object_dynamic_cast(OBJECT(sbdev),
> +TYPE_VFIO_PLATFORM);
> +if (!vdev)
> +return false;
> +
>  for (n = vdev->num_compat, compat = vdev->compat; n > 0;
>   n--, compat += strlen(compat) + 1) {
>  if (!strcmp(entry->compat, compat)) {
>

Re: [Qemu-devel] [PATCH v3 01/23] contrib/rdmacm-mux: Add implementation of RDMA User MAD multiplexer

2018-11-17 Thread Shamir Rabinovitch

On Tue, Nov 13, 2018 at 09:13:14AM +0200, Yuval Shaia wrote:
> RDMA MAD kernel module (ibcm) disallow more than one MAD-agent for a
> given MAD class.
> This does not go hand-by-hand with qemu pvrdma device's requirements
> where each VM is MAD agent.
> Fix it by adding implementation of RDMA MAD multiplexer service which on
> one hand register as a sole MAD agent with the kernel module and on the
> other hand gives service to more than one VM.
> 
> Design Overview:
> 
> A server process is registered to UMAD framework (for this to work the
> rdma_cm kernel module needs to be unloaded) and creates a unix socket to
> listen to incoming request from clients.
> A client process (such as QEMU) connects to this unix socket and
> registers with its own GID.
> 
> TX:
> ---
> When client needs to send rdma_cm MAD message it construct it the same
> way as without this multiplexer, i.e. creates a umad packet but this
> time it writes its content to the socket instead of calling umad_send().
> The server, upon receiving such a message fetch local_comm_id from it so
> a context for this session can be maintain and relay the message to UMAD
> layer by calling umad_send().
> 
> RX:
> ---
> The server creates a worker thread to process incoming rdma_cm MAD
> messages. When an incoming message arrived (umad_recv()) the server,
> depending on the message type (attr_id) looks for target client by
> either searching in gid->fd table or in local_comm_id->fd table. With
> the extracted fd the server relays to incoming message to the client.
> 
> Signed-off-by: Yuval Shaia 
> ---
>  MAINTAINERS  |   1 +
>  Makefile |   3 +
>  Makefile.objs|   1 +
>  contrib/rdmacm-mux/Makefile.objs |   4 +
>  contrib/rdmacm-mux/main.c| 771 +++
>  contrib/rdmacm-mux/rdmacm-mux.h  |  56 +++
>  6 files changed, 836 insertions(+)
>  create mode 100644 contrib/rdmacm-mux/Makefile.objs
>  create mode 100644 contrib/rdmacm-mux/main.c
>  create mode 100644 contrib/rdmacm-mux/rdmacm-mux.h
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 98a1856afc..e087d58ac6 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -2231,6 +2231,7 @@ S: Maintained
>  F: hw/rdma/*
>  F: hw/rdma/vmw/*
>  F: docs/pvrdma.txt
> +F: contrib/rdmacm-mux/*
>  
>  Build and test automation
>  -
> diff --git a/Makefile b/Makefile
> index f2947186a4..94072776ff 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -418,6 +418,7 @@ dummy := $(call unnest-vars,, \
>  elf2dmp-obj-y \
>  ivshmem-client-obj-y \
>  ivshmem-server-obj-y \
> +rdmacm-mux-obj-y \
>  libvhost-user-obj-y \
>  vhost-user-scsi-obj-y \
>  vhost-user-blk-obj-y \
> @@ -725,6 +726,8 @@ vhost-user-scsi$(EXESUF): $(vhost-user-scsi-obj-y) 
> libvhost-user.a
>   $(call LINK, $^)
>  vhost-user-blk$(EXESUF): $(vhost-user-blk-obj-y) libvhost-user.a
>   $(call LINK, $^)
> +rdmacm-mux$(EXESUF): $(rdmacm-mux-obj-y) $(COMMON_LDADDS)
> + $(call LINK, $^)
>  
>  module_block.h: $(SRC_PATH)/scripts/modules/module_block.py config-host.mak
>   $(call quiet-command,$(PYTHON) $< $@ \
> diff --git a/Makefile.objs b/Makefile.objs
> index 1e1ff387d7..cc7df3ad80 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -194,6 +194,7 @@ vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS)
>  vhost-user-scsi.o-libs := $(LIBISCSI_LIBS)
>  vhost-user-scsi-obj-y = contrib/vhost-user-scsi/
>  vhost-user-blk-obj-y = contrib/vhost-user-blk/
> +rdmacm-mux-obj-y = contrib/rdmacm-mux/
>  
>  ##
>  trace-events-subdirs =
> diff --git a/contrib/rdmacm-mux/Makefile.objs 
> b/contrib/rdmacm-mux/Makefile.objs
> new file mode 100644
> index 00..be3eacb6f7
> --- /dev/null
> +++ b/contrib/rdmacm-mux/Makefile.objs
> @@ -0,0 +1,4 @@
> +ifdef CONFIG_PVRDMA
> +CFLAGS += -libumad -Wno-format-truncation
> +rdmacm-mux-obj-y = main.o
> +endif
> diff --git a/contrib/rdmacm-mux/main.c b/contrib/rdmacm-mux/main.c
> new file mode 100644
> index 00..47cf0ac7bc
> --- /dev/null
> +++ b/contrib/rdmacm-mux/main.c
> @@ -0,0 +1,771 @@
> +/*
> + * QEMU paravirtual RDMA - rdmacm-mux implementation
> + *
> + * Copyright (C) 2018 Oracle
> + * Copyright (C) 2018 Red Hat Inc
> + *
> + * Authors:
> + * Yuval Shaia 
> + * Marcel Apfelbaum 
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include "sys/poll.h"
> +#include "sys/ioctl.h"
> +#include "pthread.h"
> +#include "syslog.h"
> +
> +#include "infiniband/verbs.h"
> +#include "infiniband/umad.h"
> +#include "infiniband/umad_types.h"
> +#include "infiniband/umad_sa.h"
> +#include "infiniband/umad_cm.h"
> +
> +#include "rdmacm-mux.h"
> +
> +#define SCALE_US 10

[Qemu-devel] [ANNOUNCE] SeaBIOS 1.12.0

2018-11-17 Thread Kevin O'Connor

The 1.12.0 version of SeaBIOS has now been released.  For more
information on the release, please see:

http://seabios.org/Releases


New in this release:

* Initial support for "TPM CRB" hardware
* Improved cdrom media reporting in the boot menu on QEMU
* Improved floppy support on real floppy hardware
* SeaVGABIOS support for QEMU "bochs-display" and QEMU "ramfb" displays
* Several bug fixes and code cleanups


For information on obtaining SeaBIOS, please see:

http://seabios.org/Download


= git shortlog -n rel-1.11.0..rel-1.12.0 =

Gerd Hoffmann (12):
  optionrom: enable non-vga display devices
  cbvga: factor out cbvga_setup_modes()
  qemu: add bochs-display support
  cbvga_setup_modes: use real mode number instead of 0x140
  cbvga_list_modes: don't list current mode twice
  cbvga_set_mode: disable clearmem in windows x86 emulator.
  bochs_display_setup: return error on failure
  pmm: use tmp zone on oom
  vgasrc: add allocate_pmm()
  qemu: add qemu ramfb support
  cbvga_set_mode: refine clear display logic
  pretty boot menu entry for cdrom drives

Nikolay Nikolov (11):
  floppy: Introduce the floppy_dor_read() function
  floppy: Introduce floppy_dor_mask()
  floppy: Introduce FLOPPY_DOR_XXX constants
  floppy: Preserve motor and drive sel bits when resetting the floppy 
controller
  floppy: Reset the floppy motor count in floppy_drive_pio()
  floppy: Use timer_check() in floppy_wait_irq()
  floppy: hold the DOR reset bit low for 4 microseconds, when resetting
  floppy: Execute a SPECIFY command after sensing the media type
  floppy: Support up to 4 floppy drives when turning on the floppy motor
  floppy: Wait for the floppy motor to reach a stable speed, after starting
  floppy: Send 4 sense interrupt commands during controller initialization

Kevin O'Connor (10):
  docs: Add sercon-port to Runtime_config.md documentation
  paravirt: Only enable sercon in NOGRAPHIC mode if no other console 
specified
  shadow: Don't invoke a shutdown on reboot unless in a reboot loop
  build: Use git describe --always
  docs: Update Download.md to use git clone via https
  ssdt: Fix building of legacy acpi tables on current iasl compiler
  docs: Update download file link
  sdcard: Increase SDHCI_POWER_ON_TIME to 5ms
  shadow: Rework bios copy code to prevent gcc array-bounds warning
  docs: Note v1.12.0 release

Stefan Berger (5):
  tpm: Add support for TPM2 ACPI table
  tpm: Wait for tpmRegValidSts flag on CRB interface before probing
  tpm: revert return values for successful/failed CRB probing
  tpm: when CRB is active, select, lock it, and check addresses
  tpm: Request access to locality 0

Marc-André Lureau (4):
  x86: add readq()
  tpm: generalize init_timeout()
  tpm: use get_tpm_version() callback
  tpm: add TPM CRB device support

Jing Liu (3):
  pci: fix the return value for truncated capability
  pci: clean up the debug message for pci capability found
  pci: recognize RH PCI legacy bridge resource reservation capability

Stephen Douthit (3):
  tpm: Refactor duplicated wait code in tis_wait_sts() & crb_wait_reg()
  tpm: Wait for interface startup when probing
  tpm: Handle unimplemented TIS_REG_IFACE_ID in tis_get_tpm_version()

Matt DeVillier (2):
  nvme: fix I/O queue length calculation overflow
  SeaVGABios/cbvga: Fix bpp for coreboot framebuffer

Marcel Apfelbaum (1):
  pci: fix 'io hints' capability for RedHat PCI bridges

Paul Menzel (1):
  docs/Download: Use more secure HTTPS URLs where possible

Shmuel Eiderman (1):
  pvscsi: Scan all 64 possible targets

Re: [Qemu-devel] [PULL 00/11] MIPS queue for November 2018 (for QEMU 3.1-rc2)

2018-11-17 Thread Philippe Mathieu-Daudé

Hi Aleksandar,

Le sam. 17 nov. 2018 16:58, Aleksandar Markovic <
aleksandar.marko...@rt-rk.com> a écrit :

> From: Aleksandar Markovic 
>
> The following changes since commit
> 83c496599cc04926ecbc3e47a37debaa3e38b686:
>
>   Merge remote-tracking branch
> 'remotes/kraxel/tags/fixes-31-20181116-pull-request' into staging
> (2018-11-16 13:53:01 +)
>
> are available in the git repository at:
>
>   https://github.com/AMarkovic/qemu tags/mips-queue-november-2018
>
> for you to fetch changes up to b6ac8eed1ca277a6ed59a61bb7bd4785190990f6:
>
>   MAINTAINERS: Add Stefan Markovic as a MIPS reviewer (2018-11-17 16:17:08
> +0100)
>
> 
> MIPS queue for November 2018
>
>   - fix MIPS-specific prctl() handling in linux-user
>   - fix some issues of R5900 support
>   - update MAINTAINERS wrt. MIPS reviewer
>
>
Some patches of this series miss your S-o-b tag.


>
> Aleksandar Markovic (5):
>   target/mips: Rename MMI-related masks
>   target/mips: Rename MMI-related opcodes
>   target/mips: Rename MMI-related functions
>   target/mips: Disable R5900 support
>   MAINTAINERS: Add Stefan Markovic as a MIPS reviewer
>
> Fredrik Noring (5):
>   target/mips: Fix decoding mechanism of R5900 MFLO1, MFHI1, MTLO1 and
> MTHI1
>   target/mips: Fix decoding mechanism of R5900 DIV1 and DIVU1
>   target/mips: Fix decoding mechanism of special R5900 opcodes
>   target/mips: Guard check_insn_opc_user_only with INSN_R5900 check
>   target/mips: Guard check_insn with INSN_R5900 check
>
> Stefan Markovic (1):
>   linux-user: Update MIPS specific prctl() implementation
>
>  MAINTAINERS  |   9 +
>  linux-user/syscall.c |  18 +
>  target/mips/translate.c  | 707
> +++
>  target/mips/translate_init.inc.c |  59 
>  4 files changed, 450 insertions(+), 343 deletions(-)
>
> --
> 2.7.4
>
>
>

Re: [Qemu-devel] [PULL 00/11] MIPS queue for November 2018 (for QEMU 3.1-rc2)

2018-11-17 Thread Aleksandar Markovic

> Some patches of this series miss your S-o-b tag

Will be fixed in a minute.


From: Philippe Mathieu-Daudé 
Sent: Saturday, November 17, 2018 7:09:37 PM
To: Aleksandar Markovic
Cc: qemu-devel@nongnu.org Developers; Peter Maydell; Aleksandar Markovic
Subject: Re: [Qemu-devel] [PULL 00/11] MIPS queue for November 2018 (for QEMU 
3.1-rc2)

Hi Aleksandar,

Le sam. 17 nov. 2018 16:58, Aleksandar Markovic 
mailto:aleksandar.marko...@rt-rk.com>> a écrit :
From: Aleksandar Markovic 
mailto:amarko...@wavecomp.com>>

The following changes since commit 83c496599cc04926ecbc3e47a37debaa3e38b686:

  Merge remote-tracking branch 
'remotes/kraxel/tags/fixes-31-20181116-pull-request' into staging (2018-11-16 
13:53:01 +)

are available in the git repository at:

  https://github.com/AMarkovic/qemu tags/mips-queue-november-2018

for you to fetch changes up to b6ac8eed1ca277a6ed59a61bb7bd4785190990f6:

  MAINTAINERS: Add Stefan Markovic as a MIPS reviewer (2018-11-17 16:17:08 
+0100)


MIPS queue for November 2018

  - fix MIPS-specific prctl() handling in linux-user
  - fix some issues of R5900 support
  - update MAINTAINERS wrt. MIPS reviewer


Some patches of this series miss your S-o-b tag.



Aleksandar Markovic (5):
  target/mips: Rename MMI-related masks
  target/mips: Rename MMI-related opcodes
  target/mips: Rename MMI-related functions
  target/mips: Disable R5900 support
  MAINTAINERS: Add Stefan Markovic as a MIPS reviewer

Fredrik Noring (5):
  target/mips: Fix decoding mechanism of R5900 MFLO1, MFHI1, MTLO1 and
MTHI1
  target/mips: Fix decoding mechanism of R5900 DIV1 and DIVU1
  target/mips: Fix decoding mechanism of special R5900 opcodes
  target/mips: Guard check_insn_opc_user_only with INSN_R5900 check
  target/mips: Guard check_insn with INSN_R5900 check

Stefan Markovic (1):
  linux-user: Update MIPS specific prctl() implementation

 MAINTAINERS  |   9 +
 linux-user/syscall.c |  18 +
 target/mips/translate.c  | 707 +++
 target/mips/translate_init.inc.c |  59 
 4 files changed, 450 insertions(+), 343 deletions(-)

--
2.7.4

Re: [Qemu-devel] [PULL 11/11] MAINTAINERS: Add Stefan Markovic as a MIPS reviewer

2018-11-17 Thread Philippe Mathieu-Daudé

Le sam. 17 nov. 2018 16:56, Aleksandar Markovic <
aleksandar.marko...@rt-rk.com> a écrit :

> From: Aleksandar Markovic 
>
> Add Stefan Markovic as a MIPS reviewer. He had several key
> contributions to QEMU for MIPS this year. He is a meticulous
> person with the ability to think and act on many levels.
>
> Reviewed-by: Stefan Markovic 
> Signed-off-by: Aleksandar Markovic 
> ---
>  MAINTAINERS | 9 +
>  1 file changed, 9 insertions(+)
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 4b8db61..f718264 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -190,6 +190,7 @@ F: disas/microblaze.c
>  MIPS
>  M: Aurelien Jarno 
>  M: Aleksandar Markovic 
> +R: Stefan Markovic 
>  S: Maintained
>  F: target/mips/
>  F: hw/mips/
> @@ -336,6 +337,7 @@ F: target/arm/kvm.c
>
>  MIPS
>  M: James Hogan 
> +R: Stefan Markovic 
>  S: Maintained
>  F: target/mips/kvm.c
>
> @@ -741,27 +743,32 @@ MIPS Machines
>  -
>  Jazz
>  M: Hervé Poussineau 
> +R: Stefan Markovic 
>

I find a bit unfair, while you don't allow to list reviewers not from the
MIPS company to your section, you add reviewers to section where you are
not listed as maintainer.
Why not add yourself as maintainer in these sections?

I understand your concept of "reviewer" as someone whom R-b tag weight more
than unlisted person. Is it how you see it for the MIPS subsystem?

Thanks,

Phil.

 S: Maintained
>  F: hw/mips/mips_jazz.c
>
>  Malta
>  M: Aurelien Jarno 
> +R: Stefan Markovic 
>  S: Maintained
>  F: hw/mips/mips_malta.c
>
>  Mipssim
>  M: Aleksandar Markovic 
> +R: Stefan Markovic 
>  S: Odd Fixes
>  F: hw/mips/mips_mipssim.c
>  F: hw/net/mipsnet.c
>
>  R4000
>  M: Aurelien Jarno 
> +R: Stefan Markovic 
>  S: Maintained
>  F: hw/mips/mips_r4k.c
>
>  Fulong 2E
>  M: Aleksandar Markovic 
> +R: Stefan Markovic 
>  S: Odd Fixes
>  F: hw/mips/mips_fulong2e.c
>  F: hw/isa/vt82c686.c
> @@ -770,6 +777,7 @@ F: include/hw/isa/vt82c686.h
>
>  Boston
>  M: Paul Burton 
> +R: Stefan Markovic 
>  S: Maintained
>  F: hw/core/loader-fit.c
>  F: hw/mips/boston.c
> @@ -1992,6 +2000,7 @@ F: disas/i386.c
>
>  MIPS target
>  M: Aurelien Jarno 
> +R: Stefan Markovic 
>  S: Maintained
>  F: tcg/mips/
>  F: disas/mips.c
> --
> 2.7.4
>
>
>

[Qemu-devel] [PULL v2 00/11] MIPS queue for November 2018 (for QEMU 3.1-rc2) - v2

2018-11-17 Thread Aleksandar Markovic

From: Aleksandar Markovic 

The following changes since commit 83c496599cc04926ecbc3e47a37debaa3e38b686:

  Merge remote-tracking branch 
'remotes/kraxel/tags/fixes-31-20181116-pull-request' into staging (2018-11-16 
13:53:01 +)

are available in the git repository at:

  https://github.com/AMarkovic/qemu tags/mips-queue-november-2018-v2

for you to fetch changes up to 90b27c4c3b2ccf3103ad86fdcda65cd105f95857:

  MAINTAINERS: Add Stefan Markovic as a MIPS reviewer (2018-11-17 19:29:34 
+0100)



MIPS queue for QEMU 3.1-rc2 - v2


  - fix MIPS-specific prctl() handling in linux-user
  - fix some issues of R5900 support
  - update MAINTAINERS wrt. MIPS reviewer

v2:

  - fixed "Signed-off-by:" lines



Aleksandar Markovic (5):
  target/mips: Rename MMI-related masks
  target/mips: Rename MMI-related opcodes
  target/mips: Rename MMI-related functions
  target/mips: Disable R5900 support
  MAINTAINERS: Add Stefan Markovic as a MIPS reviewer

Fredrik Noring (5):
  target/mips: Fix decoding mechanism of R5900 MFLO1, MFHI1, MTLO1 and
MTHI1
  target/mips: Fix decoding mechanism of R5900 DIV1 and DIVU1
  target/mips: Fix decoding mechanism of special R5900 opcodes
  target/mips: Guard check_insn_opc_user_only with INSN_R5900 check
  target/mips: Guard check_insn with INSN_R5900 check

Stefan Markovic (1):
  linux-user: Update MIPS specific prctl() implementation

 MAINTAINERS  |   9 +
 linux-user/syscall.c |  18 +
 target/mips/translate.c  | 707 +++
 target/mips/translate_init.inc.c |  59 
 4 files changed, 450 insertions(+), 343 deletions(-)

-- 
2.7.4

[Qemu-devel] [PULL v2 05/11] target/mips: Guard check_insn_opc_user_only with INSN_R5900 check

2018-11-17 Thread Aleksandar Markovic

From: Fredrik Noring 

Avoid using check_opc_user_only() as a decision making code wrt
various architectures. Use ctx->insn_flags checks instead.

Reviewed-by: Aleksandar Markovic 
Signed-off-by: Fredrik Noring 
Signed-off-by: Aleksandar Markovic 
---
 target/mips/translate.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index a21b277..c79da3c 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -28313,7 +28313,9 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
  break;
 case OPC_LL: /* Load and stores */
 check_insn(ctx, ISA_MIPS2);
-check_insn_opc_user_only(ctx, INSN_R5900);
+if (ctx->insn_flags & INSN_R5900) {
+check_insn_opc_user_only(ctx, INSN_R5900);
+}
 /* Fallthrough */
 case OPC_LWL:
 case OPC_LWR:
@@ -28339,7 +28341,9 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
 case OPC_SC:
 check_insn(ctx, ISA_MIPS2);
  check_insn_opc_removed(ctx, ISA_MIPS32R6);
-check_insn_opc_user_only(ctx, INSN_R5900);
+if (ctx->insn_flags & INSN_R5900) {
+check_insn_opc_user_only(ctx, INSN_R5900);
+}
  gen_st_cond(ctx, op, rt, rs, imm);
  break;
 case OPC_CACHE:
@@ -28607,7 +28611,9 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
 #if defined(TARGET_MIPS64)
 /* MIPS64 opcodes */
 case OPC_LLD:
-check_insn_opc_user_only(ctx, INSN_R5900);
+if (ctx->insn_flags & INSN_R5900) {
+check_insn_opc_user_only(ctx, INSN_R5900);
+}
 /* fall through */
 case OPC_LDL:
 case OPC_LDR:
@@ -28631,7 +28637,9 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
 case OPC_SCD:
 check_insn_opc_removed(ctx, ISA_MIPS32R6);
 check_insn(ctx, ISA_MIPS3);
-check_insn_opc_user_only(ctx, INSN_R5900);
+if (ctx->insn_flags & INSN_R5900) {
+check_insn_opc_user_only(ctx, INSN_R5900);
+}
 check_mips_64(ctx);
 gen_st_cond(ctx, op, rt, rs, imm);
 break;
-- 
2.7.4

[Qemu-devel] [PULL v2 07/11] target/mips: Rename MMI-related masks

2018-11-17 Thread Aleksandar Markovic

From: Aleksandar Markovic 

Rename MMI-related masks.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Stefan Markovic 
Signed-off-by: Aleksandar Markovic 
---
 target/mips/translate.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 714f2e6..12591c1 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -2201,7 +2201,7 @@ enum {
  *7 111 |   *   |   *   |   *   |   *   | PSLLW |   *   | PSRLW | PSRAW
  */
 
-#define MASK_TX79_MMI(op) (MASK_OP_MAJOR(op) | ((op) & 0x3F))
+#define MASK_MMI(op) (MASK_OP_MAJOR(op) | ((op) & 0x3F))
 enum {
 TX79_MMI_MADD   = 0x00 | TX79_CLASS_MMI, /* Same as OPC_MADD */
 TX79_MMI_MADDU  = 0x01 | TX79_CLASS_MMI, /* Same as OPC_MADDU */
@@ -2252,7 +2252,7 @@ enum {
  *7 111 |   *   |   *   | PEXT5 | PPAC5
  */
 
-#define MASK_TX79_MMI0(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
+#define MASK_MMI0(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
 enum {
 TX79_MMI0_PADDW  = (0x00 << 6) | TX79_MMI_CLASS_MMI0,
 TX79_MMI0_PSUBW  = (0x01 << 6) | TX79_MMI_CLASS_MMI0,
@@ -2303,7 +2303,7 @@ enum {
  *7 111 |   *   |   *   |   *   |   *
  */
 
-#define MASK_TX79_MMI1(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
+#define MASK_MMI1(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
 enum {
 TX79_MMI1_PABSW  = (0x01 << 6) | TX79_MMI_CLASS_MMI1,
 TX79_MMI1_PCEQW  = (0x02 << 6) | TX79_MMI_CLASS_MMI1,
@@ -2347,7 +2347,7 @@ enum {
  *7 111 | PMULTH| PDIVBW| PEXEW | PROT3W
  */
 
-#define MASK_TX79_MMI2(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
+#define MASK_MMI2(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
 enum {
 TX79_MMI2_PMADDW = (0x00 << 6) | TX79_MMI_CLASS_MMI2,
 TX79_MMI2_PSLLVW = (0x02 << 6) | TX79_MMI_CLASS_MMI2,
@@ -2395,7 +2395,7 @@ enum {
  *7 111 |   *   |   *   | PEXCW |   *
  */
 
-#define MASK_TX79_MMI3(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
+#define MASK_MMI3(op) (MASK_OP_MAJOR(op) | ((op) & 0x7FF))
 enum {
 TX79_MMI3_PMADDUW = (0x00 << 6) | TX79_MMI_CLASS_MMI3,
 TX79_MMI3_PSRAVW  = (0x03 << 6) | TX79_MMI_CLASS_MMI3,
@@ -26466,7 +26466,7 @@ static void decode_opc_special3_legacy(CPUMIPSState 
*env, DisasContext *ctx)
 
 static void decode_tx79_mmi0(CPUMIPSState *env, DisasContext *ctx)
 {
-uint32_t opc = MASK_TX79_MMI0(ctx->opcode);
+uint32_t opc = MASK_MMI0(ctx->opcode);
 
 switch (opc) {
 case TX79_MMI0_PADDW: /* TODO: TX79_MMI0_PADDW */
@@ -26505,7 +26505,7 @@ static void decode_tx79_mmi0(CPUMIPSState *env, 
DisasContext *ctx)
 
 static void decode_tx79_mmi1(CPUMIPSState *env, DisasContext *ctx)
 {
-uint32_t opc = MASK_TX79_MMI1(ctx->opcode);
+uint32_t opc = MASK_MMI1(ctx->opcode);
 
 switch (opc) {
 case TX79_MMI1_PABSW: /* TODO: TX79_MMI1_PABSW */
@@ -26537,7 +26537,7 @@ static void decode_tx79_mmi1(CPUMIPSState *env, 
DisasContext *ctx)
 
 static void decode_tx79_mmi2(CPUMIPSState *env, DisasContext *ctx)
 {
-uint32_t opc = MASK_TX79_MMI2(ctx->opcode);
+uint32_t opc = MASK_MMI2(ctx->opcode);
 
 switch (opc) {
 case TX79_MMI2_PMADDW:/* TODO: TX79_MMI2_PMADDW */
@@ -26573,7 +26573,7 @@ static void decode_tx79_mmi2(CPUMIPSState *env, 
DisasContext *ctx)
 
 static void decode_tx79_mmi3(CPUMIPSState *env, DisasContext *ctx)
 {
-uint32_t opc = MASK_TX79_MMI3(ctx->opcode);
+uint32_t opc = MASK_MMI3(ctx->opcode);
 
 switch (opc) {
 case TX79_MMI3_PMADDUW:/* TODO: TX79_MMI3_PMADDUW */
@@ -26600,7 +26600,7 @@ static void decode_tx79_mmi3(CPUMIPSState *env, 
DisasContext *ctx)
 
 static void decode_tx79_mmi(CPUMIPSState *env, DisasContext *ctx)
 {
-uint32_t opc = MASK_TX79_MMI(ctx->opcode);
+uint32_t opc = MASK_MMI(ctx->opcode);
 int rs = extract32(ctx->opcode, 21, 5);
 int rt = extract32(ctx->opcode, 16, 5);
 int rd = extract32(ctx->opcode, 11, 5);
-- 
2.7.4

[Qemu-devel] [PULL v2 04/11] target/mips: Fix decoding mechanism of special R5900 opcodes

2018-11-17 Thread Aleksandar Markovic

From: Fredrik Noring 

MOVN, MOVZ, MFHI, MFLO, MTHI, MTLO, MULT, MULTU, DIV, DIVU, DMULT,
DMULTU, DDIV, DDIVU and JR are decoded in decode_opc_special_tx79
instead of the generic decode_opc_special_legacy.

Reviewed-by: Aleksandar Markovic 
Signed-off-by: Fredrik Noring 
Signed-off-by: Aleksandar Markovic 
---
 target/mips/translate.c | 54 +
 1 file changed, 50 insertions(+), 4 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 3ddd700..a21b277 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -23863,6 +23863,53 @@ static void decode_opc_special_r6(CPUMIPSState *env, 
DisasContext *ctx)
 }
 }
 
+static void decode_opc_special_tx79(CPUMIPSState *env, DisasContext *ctx)
+{
+int rs = extract32(ctx->opcode, 21, 5);
+int rt = extract32(ctx->opcode, 16, 5);
+int rd = extract32(ctx->opcode, 11, 5);
+uint32_t op1 = MASK_SPECIAL(ctx->opcode);
+
+switch (op1) {
+case OPC_MOVN: /* Conditional move */
+case OPC_MOVZ:
+gen_cond_move(ctx, op1, rd, rs, rt);
+break;
+case OPC_MFHI:  /* Move from HI/LO */
+case OPC_MFLO:
+gen_HILO(ctx, op1, 0, rd);
+break;
+case OPC_MTHI:
+case OPC_MTLO:  /* Move to HI/LO */
+gen_HILO(ctx, op1, 0, rs);
+break;
+case OPC_MULT:
+case OPC_MULTU:
+gen_mul_txx9(ctx, op1, rd, rs, rt);
+break;
+case OPC_DIV:
+case OPC_DIVU:
+gen_muldiv(ctx, op1, 0, rs, rt);
+break;
+#if defined(TARGET_MIPS64)
+case OPC_DMULT:
+case OPC_DMULTU:
+case OPC_DDIV:
+case OPC_DDIVU:
+check_insn_opc_user_only(ctx, INSN_R5900);
+gen_muldiv(ctx, op1, 0, rs, rt);
+break;
+#endif
+case OPC_JR:
+gen_compute_branch(ctx, op1, 4, rs, 0, 0, 4);
+break;
+default:/* Invalid */
+MIPS_INVAL("special_tx79");
+generate_exception_end(ctx, EXCP_RI);
+break;
+}
+}
+
 static void decode_opc_special_legacy(CPUMIPSState *env, DisasContext *ctx)
 {
 int rs, rt, rd, sa;
@@ -23878,7 +23925,7 @@ static void decode_opc_special_legacy(CPUMIPSState 
*env, DisasContext *ctx)
 case OPC_MOVN: /* Conditional move */
 case OPC_MOVZ:
 check_insn(ctx, ISA_MIPS4 | ISA_MIPS32 |
-   INSN_LOONGSON2E | INSN_LOONGSON2F | INSN_R5900);
+   INSN_LOONGSON2E | INSN_LOONGSON2F);
 gen_cond_move(ctx, op1, rd, rs, rt);
 break;
 case OPC_MFHI:  /* Move from HI/LO */
@@ -23905,8 +23952,6 @@ static void decode_opc_special_legacy(CPUMIPSState 
*env, DisasContext *ctx)
 check_insn(ctx, INSN_VR54XX);
 op1 = MASK_MUL_VR54XX(ctx->opcode);
 gen_mul_vr54xx(ctx, op1, rd, rs, rt);
-} else if (ctx->insn_flags & INSN_R5900) {
-gen_mul_txx9(ctx, op1, rd, rs, rt);
 } else {
 gen_muldiv(ctx, op1, rd & 3, rs, rt);
 }
@@ -23921,7 +23966,6 @@ static void decode_opc_special_legacy(CPUMIPSState 
*env, DisasContext *ctx)
 case OPC_DDIV:
 case OPC_DDIVU:
 check_insn(ctx, ISA_MIPS3);
-check_insn_opc_user_only(ctx, INSN_R5900);
 check_mips_64(ctx);
 gen_muldiv(ctx, op1, 0, rs, rt);
 break;
@@ -24148,6 +24192,8 @@ static void decode_opc_special(CPUMIPSState *env, 
DisasContext *ctx)
 default:
 if (ctx->insn_flags & ISA_MIPS32R6) {
 decode_opc_special_r6(env, ctx);
+} else if (ctx->insn_flags & INSN_R5900) {
+decode_opc_special_tx79(env, ctx);
 } else {
 decode_opc_special_legacy(env, ctx);
 }
-- 
2.7.4

[Qemu-devel] [PULL v2 01/11] linux-user: Update MIPS specific prctl() implementation

2018-11-17 Thread Aleksandar Markovic

From: Stefan Markovic 

Perform needed checks before actual prctl() PR_SET_FP_MODE and
PR_GET_FP_MODE work based on kernel implementation. Also, update
necessary hflags.

Reviewed-by: Laurent Vivier 
Signed-off-by: Stefan Markovic 
Signed-off-by: Aleksandar Markovic 
---
 linux-user/syscall.c | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 5c16692..280137d 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -9554,9 +9554,25 @@ static abi_long do_syscall1(void *cpu_env, int num, 
abi_long arg1,
 {
 CPUMIPSState *env = ((CPUMIPSState *)cpu_env);
 bool old_fr = env->CP0_Status & (1 << CP0St_FR);
+bool old_fre = env->CP0_Config5 & (1 << CP0C5_FRE);
 bool new_fr = arg2 & TARGET_PR_FP_MODE_FR;
 bool new_fre = arg2 & TARGET_PR_FP_MODE_FRE;
 
+const unsigned int known_bits = TARGET_PR_FP_MODE_FR |
+TARGET_PR_FP_MODE_FRE;
+
+/* If nothing to change, return right away, successfully.  */
+if (old_fr == new_fr && old_fre == new_fre) {
+return 0;
+}
+/* Check the value is valid */
+if (arg2 & ~known_bits) {
+return -TARGET_EOPNOTSUPP;
+}
+/* Setting FRE without FR is not supported.  */
+if (new_fre && !new_fr) {
+return -TARGET_EOPNOTSUPP;
+}
 if (new_fr && !(env->active_fpu.fcr0 & (1 << FCR0_F64))) {
 /* FR1 is not supported */
 return -TARGET_EOPNOTSUPP;
@@ -9586,6 +9602,7 @@ static abi_long do_syscall1(void *cpu_env, int num, 
abi_long arg1,
 env->hflags |= MIPS_HFLAG_F64;
 } else {
 env->CP0_Status &= ~(1 << CP0St_FR);
+env->hflags &= ~MIPS_HFLAG_F64;
 }
 if (new_fre) {
 env->CP0_Config5 |= (1 << CP0C5_FRE);
@@ -9594,6 +9611,7 @@ static abi_long do_syscall1(void *cpu_env, int num, 
abi_long arg1,
 }
 } else {
 env->CP0_Config5 &= ~(1 << CP0C5_FRE);
+env->hflags &= ~MIPS_HFLAG_FRE;
 }
 
 return 0;
-- 
2.7.4

[Qemu-devel] [PULL v2 10/11] target/mips: Disable R5900 support

2018-11-17 Thread Aleksandar Markovic

From: Aleksandar Markovic 

Disable R5900 support. There are some outstanding issues related
to ABI support and emulation accuracy, that were not understood
well during review process. Disable to avoid backward compatibility
issues.

Reverts commit ed4f49ba9bb56ebca6987b1083255daf6c89b5de.

Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Aleksandar Markovic 
---
 target/mips/translate_init.inc.c | 59 
 1 file changed, 59 deletions(-)

diff --git a/target/mips/translate_init.inc.c b/target/mips/translate_init.inc.c
index 85da4a2..acab097 100644
--- a/target/mips/translate_init.inc.c
+++ b/target/mips/translate_init.inc.c
@@ -411,65 +411,6 @@ const mips_def_t mips_defs[] =
 .mmu_type = MMU_TYPE_R4000,
 },
 {
-/*
- * The Toshiba TX System RISC TX79 Core Architecture manual
- *
- * https://wiki.qemu.org/File:C790.pdf
- *
- * describes the C790 processor that is a follow-up to the R5900.
- * There are a few notable differences in that the R5900 FPU
- *
- * - is not IEEE 754-1985 compliant,
- * - does not implement double format, and
- * - its machine code is nonstandard.
- */
-.name = "R5900",
-.CP0_PRid = 0x2E00,
-/* No L2 cache, icache size 32k, dcache size 32k, uncached coherency. 
*/
-.CP0_Config0 = (0x3 << 9) | (0x3 << 6) | (0x2 << CP0C0_K0),
-.CP0_Status_rw_bitmask = 0xF4C79C1F,
-#ifdef CONFIG_USER_ONLY
-/*
- * R5900 hardware traps to the Linux kernel for IEEE 754-1985 and LL/SC
- * emulation. For user only, QEMU is the kernel, so we emulate the 
traps
- * by simply emulating the instructions directly.
- *
- * Note: Config1 is only used internally, the R5900 has only Config0.
- */
-.CP0_Config1 = (1 << CP0C1_FP) | (47 << CP0C1_MMU),
-.CP0_LLAddr_rw_bitmask = 0x,
-.CP0_LLAddr_shift = 4,
-.CP1_fcr0 = (0x38 << FCR0_PRID) | (0x0 << FCR0_REV),
-.CP1_fcr31 = 0,
-.CP1_fcr31_rw_bitmask = 0x0183,
-#else
-/*
- * The R5900 COP1 FPU implements single-precision floating-point
- * operations but is not entirely IEEE 754-1985 compatible. In
- * particular,
- *
- * - NaN (not a number) and +/- infinities are not supported;
- * - exception mechanisms are not fully supported;
- * - denormalized numbers are not supported;
- * - rounding towards nearest and +/- infinities are not supported;
- * - computed results usually differs in the least significant bit;
- * - saturations can differ more than the least significant bit.
- *
- * Since only rounding towards zero is supported, the two least
- * significant bits of FCR31 are hardwired to 01.
- *
- * FPU emulation is disabled here until it is implemented.
- *
- * Note: Config1 is only used internally, the R5900 has only Config0.
- */
-.CP0_Config1 = (47 << CP0C1_MMU),
-#endif /* !CONFIG_USER_ONLY */
-.SEGBITS = 32,
-.PABITS = 32,
-.insn_flags = CPU_R5900 | ASE_MMI,
-.mmu_type = MMU_TYPE_R4000,
-},
-{
 /* A generic CPU supporting MIPS32 Release 6 ISA.
FIXME: Support IEEE 754-2008 FP.
   Eventually this should be replaced by a real CPU model. */
-- 
2.7.4

[Qemu-devel] [PULL v2 02/11] target/mips: Fix decoding mechanism of R5900 MFLO1, MFHI1, MTLO1 and MTHI1

2018-11-17 Thread Aleksandar Markovic

From: Fredrik Noring 

MFLO1, MFHI1, MTLO1 and MTHI1 are generated in gen_HILO1_tx79 instead of
the generic gen_HILO.

Reviewed-by: Aleksandar Markovic 
Signed-off-by: Fredrik Noring 
Signed-off-by: Aleksandar Markovic 
---
 target/mips/translate.c | 51 ++---
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 60320cb..8601333 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -4359,24 +4359,56 @@ static void gen_shift(DisasContext *ctx, uint32_t opc,
 tcg_temp_free(t1);
 }
 
+/* Copy GPR to and from TX79 HI1/LO1 register. */
+static void gen_HILO1_tx79(DisasContext *ctx, uint32_t opc, int reg)
+{
+if (reg == 0 && (opc == TX79_MMI_MFHI1 || opc == TX79_MMI_MFLO1)) {
+/* Treat as NOP. */
+return;
+}
+
+switch (opc) {
+case TX79_MMI_MFHI1:
+tcg_gen_mov_tl(cpu_gpr[reg], cpu_HI[1]);
+break;
+case TX79_MMI_MFLO1:
+tcg_gen_mov_tl(cpu_gpr[reg], cpu_LO[1]);
+break;
+case TX79_MMI_MTHI1:
+if (reg != 0) {
+tcg_gen_mov_tl(cpu_HI[1], cpu_gpr[reg]);
+} else {
+tcg_gen_movi_tl(cpu_HI[1], 0);
+}
+break;
+case TX79_MMI_MTLO1:
+if (reg != 0) {
+tcg_gen_mov_tl(cpu_LO[1], cpu_gpr[reg]);
+} else {
+tcg_gen_movi_tl(cpu_LO[1], 0);
+}
+break;
+default:
+MIPS_INVAL("mfthilo1 TX79");
+generate_exception_end(ctx, EXCP_RI);
+break;
+}
+}
+
 /* Arithmetic on HI/LO registers */
 static void gen_HILO(DisasContext *ctx, uint32_t opc, int acc, int reg)
 {
-if (reg == 0 && (opc == OPC_MFHI || opc == TX79_MMI_MFHI1 ||
- opc == OPC_MFLO || opc == TX79_MMI_MFLO1)) {
+if (reg == 0 && (opc == OPC_MFHI || opc == OPC_MFLO)) {
 /* Treat as NOP. */
 return;
 }
 
 if (acc != 0) {
-if (!(ctx->insn_flags & INSN_R5900)) {
-check_dsp(ctx);
-}
+check_dsp(ctx);
 }
 
 switch (opc) {
 case OPC_MFHI:
-case TX79_MMI_MFHI1:
 #if defined(TARGET_MIPS64)
 if (acc != 0) {
 tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_HI[acc]);
@@ -4387,7 +4419,6 @@ static void gen_HILO(DisasContext *ctx, uint32_t opc, int 
acc, int reg)
 }
 break;
 case OPC_MFLO:
-case TX79_MMI_MFLO1:
 #if defined(TARGET_MIPS64)
 if (acc != 0) {
 tcg_gen_ext32s_tl(cpu_gpr[reg], cpu_LO[acc]);
@@ -4398,7 +4429,6 @@ static void gen_HILO(DisasContext *ctx, uint32_t opc, int 
acc, int reg)
 }
 break;
 case OPC_MTHI:
-case TX79_MMI_MTHI1:
 if (reg != 0) {
 #if defined(TARGET_MIPS64)
 if (acc != 0) {
@@ -4413,7 +4443,6 @@ static void gen_HILO(DisasContext *ctx, uint32_t opc, int 
acc, int reg)
 }
 break;
 case OPC_MTLO:
-case TX79_MMI_MTLO1:
 if (reg != 0) {
 #if defined(TARGET_MIPS64)
 if (acc != 0) {
@@ -26500,11 +26529,11 @@ static void decode_tx79_mmi(CPUMIPSState *env, 
DisasContext *ctx)
 break;
 case TX79_MMI_MTLO1:
 case TX79_MMI_MTHI1:
-gen_HILO(ctx, opc, 1, rs);
+gen_HILO1_tx79(ctx, opc, rs);
 break;
 case TX79_MMI_MFLO1:
 case TX79_MMI_MFHI1:
-gen_HILO(ctx, opc, 1, rd);
+gen_HILO1_tx79(ctx, opc, rd);
 break;
 case TX79_MMI_MADD:  /* TODO: TX79_MMI_MADD */
 case TX79_MMI_MADDU: /* TODO: TX79_MMI_MADDU */
-- 
2.7.4

[Qemu-devel] [PULL v2 09/11] target/mips: Rename MMI-related functions

2018-11-17 Thread Aleksandar Markovic

From: Aleksandar Markovic 

Rename MMI-related functions.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Stefan Markovic 
Signed-off-by: Aleksandar Markovic 
---
 target/mips/translate.c | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 98dc468..e9c23a5 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -26464,7 +26464,7 @@ static void decode_opc_special3_legacy(CPUMIPSState 
*env, DisasContext *ctx)
 }
 }
 
-static void decode_tx79_mmi0(CPUMIPSState *env, DisasContext *ctx)
+static void decode_mmi0(CPUMIPSState *env, DisasContext *ctx)
 {
 uint32_t opc = MASK_MMI0(ctx->opcode);
 
@@ -26503,7 +26503,7 @@ static void decode_tx79_mmi0(CPUMIPSState *env, 
DisasContext *ctx)
 }
 }
 
-static void decode_tx79_mmi1(CPUMIPSState *env, DisasContext *ctx)
+static void decode_mmi1(CPUMIPSState *env, DisasContext *ctx)
 {
 uint32_t opc = MASK_MMI1(ctx->opcode);
 
@@ -26535,7 +26535,7 @@ static void decode_tx79_mmi1(CPUMIPSState *env, 
DisasContext *ctx)
 }
 }
 
-static void decode_tx79_mmi2(CPUMIPSState *env, DisasContext *ctx)
+static void decode_mmi2(CPUMIPSState *env, DisasContext *ctx)
 {
 uint32_t opc = MASK_MMI2(ctx->opcode);
 
@@ -26571,7 +26571,7 @@ static void decode_tx79_mmi2(CPUMIPSState *env, 
DisasContext *ctx)
 }
 }
 
-static void decode_tx79_mmi3(CPUMIPSState *env, DisasContext *ctx)
+static void decode_mmi3(CPUMIPSState *env, DisasContext *ctx)
 {
 uint32_t opc = MASK_MMI3(ctx->opcode);
 
@@ -26598,7 +26598,7 @@ static void decode_tx79_mmi3(CPUMIPSState *env, 
DisasContext *ctx)
 }
 }
 
-static void decode_tx79_mmi(CPUMIPSState *env, DisasContext *ctx)
+static void decode_mmi(CPUMIPSState *env, DisasContext *ctx)
 {
 uint32_t opc = MASK_MMI(ctx->opcode);
 int rs = extract32(ctx->opcode, 21, 5);
@@ -26607,16 +26607,16 @@ static void decode_tx79_mmi(CPUMIPSState *env, 
DisasContext *ctx)
 
 switch (opc) {
 case MMI_OPC_CLASS_MMI0:
-decode_tx79_mmi0(env, ctx);
+decode_mmi0(env, ctx);
 break;
 case MMI_OPC_CLASS_MMI1:
-decode_tx79_mmi1(env, ctx);
+decode_mmi1(env, ctx);
 break;
 case MMI_OPC_CLASS_MMI2:
-decode_tx79_mmi2(env, ctx);
+decode_mmi2(env, ctx);
 break;
 case MMI_OPC_CLASS_MMI3:
-decode_tx79_mmi3(env, ctx);
+decode_mmi3(env, ctx);
 break;
 case MMI_OPC_MULT1:
 case MMI_OPC_MULTU1:
@@ -26656,12 +26656,12 @@ static void decode_tx79_mmi(CPUMIPSState *env, 
DisasContext *ctx)
 }
 }
 
-static void decode_tx79_lq(CPUMIPSState *env, DisasContext *ctx)
+static void gen_mmi_lq(CPUMIPSState *env, DisasContext *ctx)
 {
 generate_exception_end(ctx, EXCP_RI);/* TODO: MMI_OPC_LQ */
 }
 
-static void gen_tx79_sq(DisasContext *ctx, int base, int rt, int offset)
+static void gen_mmi_sq(DisasContext *ctx, int base, int rt, int offset)
 {
 generate_exception_end(ctx, EXCP_RI);/* TODO: MMI_OPC_SQ */
 }
@@ -26687,7 +26687,7 @@ static void gen_tx79_sq(DisasContext *ctx, int base, 
int rt, int offset)
  * In user mode, QEMU must verify the upper and lower 11 bits to distinguish
  * between SQ and RDHWR, as the Linux kernel does.
  */
-static void decode_tx79_sq(CPUMIPSState *env, DisasContext *ctx)
+static void decode_mmi_sq(CPUMIPSState *env, DisasContext *ctx)
 {
 int base = extract32(ctx->opcode, 21, 5);
 int rt = extract32(ctx->opcode, 16, 5);
@@ -26705,7 +26705,7 @@ static void decode_tx79_sq(CPUMIPSState *env, 
DisasContext *ctx)
 }
 #endif
 
-gen_tx79_sq(ctx, base, rt, offset);
+gen_mmi_sq(ctx, base, rt, offset);
 }
 
 static void decode_opc_special3(CPUMIPSState *env, DisasContext *ctx)
@@ -28014,7 +28014,7 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
 break;
 case OPC_SPECIAL2:
 if ((ctx->insn_flags & INSN_R5900) && (ctx->insn_flags & ASE_MMI)) {
-decode_tx79_mmi(env, ctx);
+decode_mmi(env, ctx);
 } else if (ctx->insn_flags & ASE_MXU) {
 decode_opc_mxu(env, ctx);
 } else {
@@ -28023,7 +28023,7 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
 break;
 case OPC_SPECIAL3:
 if (ctx->insn_flags & INSN_R5900) {
-decode_tx79_sq(env, ctx);/* MMI_OPC_SQ */
+decode_mmi_sq(env, ctx);/* MMI_OPC_SQ */
 } else {
 decode_opc_special3(env, ctx);
 }
@@ -28698,7 +28698,7 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
 break;
 case OPC_MSA: /* OPC_MDMX */
 if (ctx->insn_flags & INSN_R5900) {
-decode_tx79_lq(env, ctx);/* MMI_OPC_LQ */
+gen_mmi_lq(env, ctx);/* MMI_OPC_LQ */
 } else {
 /* MDMX: Not implemented. */
 gen_msa(env, ctx);
-- 
2.7.4

[Qemu-devel] [PULL v2 08/11] target/mips: Rename MMI-related opcodes

2018-11-17 Thread Aleksandar Markovic

From: Aleksandar Markovic 

Rename MMI-related opcodes.

Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Stefan Markovic 
Signed-off-by: Aleksandar Markovic 
---
 target/mips/translate.c | 472 
 1 file changed, 236 insertions(+), 236 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 12591c1..98dc468 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -2138,10 +2138,10 @@ enum {
  * MTSAH   rs, immediate Move Halfword Count to Shift Amount Register
  * PROT3W  rd, rtParallel Rotate 3 Words
  *
- * The TX79-specific Multimedia Instruction encodings
- * ==
+ * MMI (MultiMedia Instruction) encodings
+ * ==
  *
- * TX79 Multimedia Instruction encoding table keys:
+ * MMI instructions encoding table keys:
  *
  * *   This code is reserved for future use. An attempt to execute it
  * causes a Reserved Instruction exception.
@@ -2152,7 +2152,7 @@ enum {
  * DMULTU, DDIV, DDIVU, LL, LLD, SC, SCD, LWC2 and SWC2. An attempt
  * to execute it causes a Reserved Instruction exception.
  *
- * TX79 Multimedia Instructions encoded by opcode field (MMI, LQ, SQ):
+ * MMI instructions encoded by opcode field (MMI, LQ, SQ):
  *
  *  31260
  * +++
@@ -2174,13 +2174,13 @@ enum {
  */
 
 enum {
-TX79_CLASS_MMI = 0x1C << 26,/* Same as OPC_SPECIAL2 */
-TX79_LQ= 0x1E << 26,/* Same as OPC_MSA */
-TX79_SQ= 0x1F << 26,/* Same as OPC_SPECIAL3 */
+MMI_OPC_CLASS_MMI = 0x1C << 26,/* Same as OPC_SPECIAL2 */
+MMI_OPC_LQ= 0x1E << 26,/* Same as OPC_MSA */
+MMI_OPC_SQ= 0x1F << 26,/* Same as OPC_SPECIAL3 */
 };
 
 /*
- * TX79 Multimedia Instructions with opcode field = MMI:
+ * MMI instructions with opcode field = MMI:
  *
  *  3126 5  0
  * ++---++
@@ -2203,35 +2203,35 @@ enum {
 
 #define MASK_MMI(op) (MASK_OP_MAJOR(op) | ((op) & 0x3F))
 enum {
-TX79_MMI_MADD   = 0x00 | TX79_CLASS_MMI, /* Same as OPC_MADD */
-TX79_MMI_MADDU  = 0x01 | TX79_CLASS_MMI, /* Same as OPC_MADDU */
-TX79_MMI_PLZCW  = 0x04 | TX79_CLASS_MMI,
-TX79_MMI_CLASS_MMI0 = 0x08 | TX79_CLASS_MMI,
-TX79_MMI_CLASS_MMI2 = 0x09 | TX79_CLASS_MMI,
-TX79_MMI_MFHI1  = 0x10 | TX79_CLASS_MMI, /* Same minor as OPC_MFHI */
-TX79_MMI_MTHI1  = 0x11 | TX79_CLASS_MMI, /* Same minor as OPC_MTHI */
-TX79_MMI_MFLO1  = 0x12 | TX79_CLASS_MMI, /* Same minor as OPC_MFLO */
-TX79_MMI_MTLO1  = 0x13 | TX79_CLASS_MMI, /* Same minor as OPC_MTLO */
-TX79_MMI_MULT1  = 0x18 | TX79_CLASS_MMI, /* Same minor as OPC_MULT */
-TX79_MMI_MULTU1 = 0x19 | TX79_CLASS_MMI, /* Same minor as OPC_MULTU */
-TX79_MMI_DIV1   = 0x1A | TX79_CLASS_MMI, /* Same minor as OPC_DIV */
-TX79_MMI_DIVU1  = 0x1B | TX79_CLASS_MMI, /* Same minor as OPC_DIVU */
-TX79_MMI_MADD1  = 0x20 | TX79_CLASS_MMI,
-TX79_MMI_MADDU1 = 0x21 | TX79_CLASS_MMI,
-TX79_MMI_CLASS_MMI1 = 0x28 | TX79_CLASS_MMI,
-TX79_MMI_CLASS_MMI3 = 0x29 | TX79_CLASS_MMI,
-TX79_MMI_PMFHL  = 0x30 | TX79_CLASS_MMI,
-TX79_MMI_PMTHL  = 0x31 | TX79_CLASS_MMI,
-TX79_MMI_PSLLH  = 0x34 | TX79_CLASS_MMI,
-TX79_MMI_PSRLH  = 0x36 | TX79_CLASS_MMI,
-TX79_MMI_PSRAH  = 0x37 | TX79_CLASS_MMI,
-TX79_MMI_PSLLW  = 0x3C | TX79_CLASS_MMI,
-TX79_MMI_PSRLW  = 0x3E | TX79_CLASS_MMI,
-TX79_MMI_PSRAW  = 0x3F | TX79_CLASS_MMI,
+MMI_OPC_MADD   = 0x00 | MMI_OPC_CLASS_MMI, /* Same as OPC_MADD */
+MMI_OPC_MADDU  = 0x01 | MMI_OPC_CLASS_MMI, /* Same as OPC_MADDU */
+MMI_OPC_PLZCW  = 0x04 | MMI_OPC_CLASS_MMI,
+MMI_OPC_CLASS_MMI0 = 0x08 | MMI_OPC_CLASS_MMI,
+MMI_OPC_CLASS_MMI2 = 0x09 | MMI_OPC_CLASS_MMI,
+MMI_OPC_MFHI1  = 0x10 | MMI_OPC_CLASS_MMI, /* Same minor as OPC_MFHI */
+MMI_OPC_MTHI1  = 0x11 | MMI_OPC_CLASS_MMI, /* Same minor as OPC_MTHI */
+MMI_OPC_MFLO1  = 0x12 | MMI_OPC_CLASS_MMI, /* Same minor as OPC_MFLO */
+MMI_OPC_MTLO1  = 0x13 | MMI_OPC_CLASS_MMI, /* Same minor as OPC_MTLO */
+MMI_OPC_MULT1  = 0x18 | MMI_OPC_CLASS_MMI, /* Same minor as OPC_MULT */
+MMI_OPC_MULTU1 = 0x19 | MMI_OPC_CLASS_MMI, /* Same min. as OPC_MULTU */
+MMI_OPC_DIV1   = 0x1A | MMI_OPC_CLASS_MMI, /* Same minor as OPC_DIV  */
+MMI_OPC_DIVU1  = 0x1B | MMI_OPC_CLASS_MMI, /* Same minor as OPC_DIVU */
+MMI_OPC_MADD1  = 0x20 | MMI_OPC_CLASS_MMI,
+MMI_OPC_MADDU1 = 0x21 | MMI_OPC_CLASS_MMI,
+MMI_OPC_CLASS_MMI1 = 0x28 | MMI_OPC_CLASS_MMI,
+MMI_OPC_CLASS_MMI3 = 0x29 | MMI_OPC_CLASS_MMI,
+MMI_OPC_PMFHL  = 0x30 | MMI_OPC_CLASS_MMI,
+MMI_OPC_PMT

[Qemu-devel] [PULL v2 06/11] target/mips: Guard check_insn with INSN_R5900 check

2018-11-17 Thread Aleksandar Markovic

From: Fredrik Noring 

Explicitely mark handling of PREF instruction for R5900 as
treating the same as NOP.

Reviewed-by: Aleksandar Markovic 
Signed-off-by: Fredrik Noring 
Signed-off-by: Aleksandar Markovic 
---
 target/mips/translate.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index c79da3c..714f2e6 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -28357,9 +28357,12 @@ static void decode_opc(CPUMIPSState *env, DisasContext 
*ctx)
 break;
 case OPC_PREF:
 check_insn_opc_removed(ctx, ISA_MIPS32R6);
-check_insn(ctx, ISA_MIPS4 | ISA_MIPS32 |
-   INSN_R5900);
-/* Treat as NOP. */
+if (ctx->insn_flags & INSN_R5900) {
+/* Treat as NOP. */
+} else {
+check_insn(ctx, ISA_MIPS4 | ISA_MIPS32);
+/* Treat as NOP. */
+}
 break;
 
 /* Floating point (COP1). */
-- 
2.7.4

[Qemu-devel] [PULL v2 03/11] target/mips: Fix decoding mechanism of R5900 DIV1 and DIVU1

2018-11-17 Thread Aleksandar Markovic

From: Fredrik Noring 

DIV1 and DIVU1 are generated in gen_div1_tx79 instead of the generic
gen_muldiv.

Signed-off-by: Fredrik Noring 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Aleksandar Markovic 
---
 target/mips/translate.c | 65 -
 1 file changed, 59 insertions(+), 6 deletions(-)

diff --git a/target/mips/translate.c b/target/mips/translate.c
index 8601333..3ddd700 100644
--- a/target/mips/translate.c
+++ b/target/mips/translate.c
@@ -4743,6 +4743,63 @@ static void gen_r6_muldiv(DisasContext *ctx, int opc, 
int rd, int rs, int rt)
 tcg_temp_free(t1);
 }
 
+static void gen_div1_tx79(DisasContext *ctx, uint32_t opc, int rs, int rt)
+{
+TCGv t0, t1;
+
+t0 = tcg_temp_new();
+t1 = tcg_temp_new();
+
+gen_load_gpr(t0, rs);
+gen_load_gpr(t1, rt);
+
+switch (opc) {
+case TX79_MMI_DIV1:
+{
+TCGv t2 = tcg_temp_new();
+TCGv t3 = tcg_temp_new();
+tcg_gen_ext32s_tl(t0, t0);
+tcg_gen_ext32s_tl(t1, t1);
+tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
+tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
+tcg_gen_and_tl(t2, t2, t3);
+tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
+tcg_gen_or_tl(t2, t2, t3);
+tcg_gen_movi_tl(t3, 0);
+tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
+tcg_gen_div_tl(cpu_LO[1], t0, t1);
+tcg_gen_rem_tl(cpu_HI[1], t0, t1);
+tcg_gen_ext32s_tl(cpu_LO[1], cpu_LO[1]);
+tcg_gen_ext32s_tl(cpu_HI[1], cpu_HI[1]);
+tcg_temp_free(t3);
+tcg_temp_free(t2);
+}
+break;
+case TX79_MMI_DIVU1:
+{
+TCGv t2 = tcg_const_tl(0);
+TCGv t3 = tcg_const_tl(1);
+tcg_gen_ext32u_tl(t0, t0);
+tcg_gen_ext32u_tl(t1, t1);
+tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
+tcg_gen_divu_tl(cpu_LO[1], t0, t1);
+tcg_gen_remu_tl(cpu_HI[1], t0, t1);
+tcg_gen_ext32s_tl(cpu_LO[1], cpu_LO[1]);
+tcg_gen_ext32s_tl(cpu_HI[1], cpu_HI[1]);
+tcg_temp_free(t3);
+tcg_temp_free(t2);
+}
+break;
+default:
+MIPS_INVAL("div1 TX79");
+generate_exception_end(ctx, EXCP_RI);
+goto out;
+}
+ out:
+tcg_temp_free(t0);
+tcg_temp_free(t1);
+}
+
 static void gen_muldiv(DisasContext *ctx, uint32_t opc,
int acc, int rs, int rt)
 {
@@ -4755,14 +4812,11 @@ static void gen_muldiv(DisasContext *ctx, uint32_t opc,
 gen_load_gpr(t1, rt);
 
 if (acc != 0) {
-if (!(ctx->insn_flags & INSN_R5900)) {
-check_dsp(ctx);
-}
+check_dsp(ctx);
 }
 
 switch (opc) {
 case OPC_DIV:
-case TX79_MMI_DIV1:
 {
 TCGv t2 = tcg_temp_new();
 TCGv t3 = tcg_temp_new();
@@ -4784,7 +4838,6 @@ static void gen_muldiv(DisasContext *ctx, uint32_t opc,
 }
 break;
 case OPC_DIVU:
-case TX79_MMI_DIVU1:
 {
 TCGv t2 = tcg_const_tl(0);
 TCGv t3 = tcg_const_tl(1);
@@ -26525,7 +26578,7 @@ static void decode_tx79_mmi(CPUMIPSState *env, 
DisasContext *ctx)
 break;
 case TX79_MMI_DIV1:
 case TX79_MMI_DIVU1:
-gen_muldiv(ctx, opc, 1, rs, rt);
+gen_div1_tx79(ctx, opc, rs, rt);
 break;
 case TX79_MMI_MTLO1:
 case TX79_MMI_MTHI1:
-- 
2.7.4

[Qemu-devel] [PULL v2 11/11] MAINTAINERS: Add Stefan Markovic as a MIPS reviewer

2018-11-17 Thread Aleksandar Markovic

From: Aleksandar Markovic 

Add Stefan Markovic as a MIPS reviewer. He had several key
contributions to QEMU for MIPS this year. He is a meticulous
person with the ability to think and act on many levels.

Reviewed-by: Stefan Markovic 
Signed-off-by: Aleksandar Markovic 
---
 MAINTAINERS | 9 +
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 4b8db61..f718264 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -190,6 +190,7 @@ F: disas/microblaze.c
 MIPS
 M: Aurelien Jarno 
 M: Aleksandar Markovic 
+R: Stefan Markovic 
 S: Maintained
 F: target/mips/
 F: hw/mips/
@@ -336,6 +337,7 @@ F: target/arm/kvm.c
 
 MIPS
 M: James Hogan 
+R: Stefan Markovic 
 S: Maintained
 F: target/mips/kvm.c
 
@@ -741,27 +743,32 @@ MIPS Machines
 -
 Jazz
 M: Hervé Poussineau 
+R: Stefan Markovic 
 S: Maintained
 F: hw/mips/mips_jazz.c
 
 Malta
 M: Aurelien Jarno 
+R: Stefan Markovic 
 S: Maintained
 F: hw/mips/mips_malta.c
 
 Mipssim
 M: Aleksandar Markovic 
+R: Stefan Markovic 
 S: Odd Fixes
 F: hw/mips/mips_mipssim.c
 F: hw/net/mipsnet.c
 
 R4000
 M: Aurelien Jarno 
+R: Stefan Markovic 
 S: Maintained
 F: hw/mips/mips_r4k.c
 
 Fulong 2E
 M: Aleksandar Markovic 
+R: Stefan Markovic 
 S: Odd Fixes
 F: hw/mips/mips_fulong2e.c
 F: hw/isa/vt82c686.c
@@ -770,6 +777,7 @@ F: include/hw/isa/vt82c686.h
 
 Boston
 M: Paul Burton 
+R: Stefan Markovic 
 S: Maintained
 F: hw/core/loader-fit.c
 F: hw/mips/boston.c
@@ -1992,6 +2000,7 @@ F: disas/i386.c
 
 MIPS target
 M: Aurelien Jarno 
+R: Stefan Markovic 
 S: Maintained
 F: tcg/mips/
 F: disas/mips.c
-- 
2.7.4

Re: [Qemu-devel] [Qemu-block] Change in qemu 2.12 causes qemu-img convert to NBD to write more data

2018-11-17 Thread Nir Soffer

On Fri, Nov 16, 2018 at 5:26 PM Kevin Wolf  wrote:

> Am 15.11.2018 um 23:27 hat Nir Soffer geschrieben:
> > On Sun, Nov 11, 2018 at 6:11 PM Nir Soffer  wrote:
> >
> > > On Wed, Nov 7, 2018 at 7:55 PM Nir Soffer  wrote:
> > >
> > >> On Wed, Nov 7, 2018 at 7:27 PM Kevin Wolf  wrote:
> > >>
> > >>> Am 07.11.2018 um 15:56 hat Nir Soffer geschrieben:
> > >>> > Wed, Nov 7, 2018 at 4:36 PM Richard W.M. Jones 
> > >>> wrote:
> > >>> >
> > >>> > > Another thing I tried was to change the NBD server (nbdkit) so
> that
> > >>> it
> > >>> > > doesn't advertise zero support to the client:
> > >>> > >
> > >>> > >   $ nbdkit --filter=log --filter=nozero memory size=6G
> > >>> logfile=/tmp/log \
> > >>> > >   --run './qemu-img convert ./fedora-28.img -n $nbd'
> > >>> > >   $ grep '\.\.\.$' /tmp/log | sed 's/.*\([A-Z][a-z]*\).*/\1/' |
> uniq
> > >>> -c
> > >>> > >2154 Write
> > >>> > >
> > >>> > > Not surprisingly no zero commands are issued.  The size of the
> write
> > >>> > > commands is very uneven -- it appears to be send one command per
> > >>> block
> > >>> > > of zeroes or data.
> > >>> > >
> > >>> > > Nir: If we could get information from imageio about whether
> zeroing
> > >>> is
> > >>> > > implemented efficiently or not by the backend, we could change
> > >>> > > virt-v2v / nbdkit to advertise this back to qemu.
> > >>> >
> > >>> > There is no way to detect the capability, ioctl(BLKZEROOUT) always
> > >>> > succeeds, falling back to manual zeroing in the kernel silently
> > >>> >
> > >>> > Even if we could, sending zero on the wire from qemu may be even
> > >>> > slower, and it looks like qemu send even more requests in this case
> > >>> > (2154 vs ~1300).
> > >>> >
> > >>> > Looks like this optimization in qemu side leads to worse
> performance,
> > >>> > so it should not be enabled by default.
> > >>>
> > >>> Well, that's overgeneralising your case a bit. If the backend does
> > >>> support efficient zero writes (which file systems, the most common
> case,
> > >>> generally do), doing one big write_zeroes request at the start can
> > >>> improve performance quite a bit.
> > >>>
> > >>> It seems the problem is that we can't really know whether the
> operation
> > >>> will be efficient because the backends generally don't tell us. Maybe
> > >>> NBD could introduce a flag for this, but in the general case it
> appears
> > >>> to me that we'll have to have a command line option.
> > >>>
> > >>> However, I'm curious what your exact use case and the backend used
> in it
> > >>> is? Can something be improved there to actually get efficient zero
> > >>> writes and get even better performance than by just disabling the big
> > >>> zero write?
> > >>
> > >>
> > >> The backend is some NetApp storage connected via FC. I don't have
> > >> more info on this. We get zero rate of about 1G/s on this storage,
> which
> > >> is quite slow compared with other storage we tested.
> > >>
> > >> One option we check now is if this is the kernel silent fallback to
> manual
> > >> zeroing when the server advertise wrong value of write_same_max_bytes.
> > >>
> > >
> > > We eliminated this using blkdiscard. This is what we get on with this
> > > storage
> > > zeroing 100G LV:
> > >
> > > for i in 1 2 4 8 16 32; do time blkdiscard -z -p ${i}m
> > >
> /dev/6e1d84f9-f939-46e9-b108-0427a08c280c/2d5c06ce-6536-4b3c-a7b6-13c6d8e55ade;
> > > done
> > >
> > > real 4m50.851s
> > > user 0m0.065s
> > > sys 0m1.482s
> > >
> > > real 4m30.504s
> > > user 0m0.047s
> > > sys 0m0.870s
> > >
> > > real 4m19.443s
> > > user 0m0.029s
> > > sys 0m0.508s
> > >
> > > real 4m13.016s
> > > user 0m0.020s
> > > sys 0m0.284s
> > >
> > > real 2m45.888s
> > > user 0m0.011s
> > > sys 0m0.162s
> > >
> > > real 2m10.153s
> > > user 0m0.003s
> > > sys 0m0.100s
> > >
> > > We are investigating why we get low throughput on this server, and also
> > > will check
> > > several other servers.
> > >
> > > Having a command line option to control this behavior sounds good. I
> don't
> > >> have enough data to tell what should be the default, but I think the
> safe
> > >> way would be to keep old behavior.
> > >>
> > >
> > > We file this bug:
> > > https://bugzilla.redhat.com/1648622
> > >
> >
> > More data from even slower storage - zeroing 10G lv on Kaminario K2
> >
> > # time blkdiscard -z -p 32m /dev/test_vg/test_lv2
> >
> > real50m12.425s
> > user0m0.018s
> > sys 2m6.785s
> >
> > Maybe something is wrong with this storage, since we see this:
> >
> > # grep -s "" /sys/block/dm-29/queue/* | grep write_same_max_bytes
> > /sys/block/dm-29/queue/write_same_max_bytes:512
> >
> > Since BLKZEROOUT always fallback to manual slow zeroing silently,
> > maybe we can disable the aggressive pre-zero of the entire device
> > for block devices, and keep this optimization for files when fallocate()
> > is supported?
>
> I'm not sure what the detour through NBD changes, but qemu-img directly
> on a block device doesn't use BLKZEROOUT first, but
> FALLOC_FL_PUNCH_HOLE.


Lo

Re: [Qemu-devel] [Qemu-block] Change in qemu 2.12 causes qemu-img convert to NBD to write more data

2018-11-17 Thread Richard W.M. Jones

On Sat, Nov 17, 2018 at 10:59:26PM +0200, Nir Soffer wrote:
> On Fri, Nov 16, 2018 at 5:26 PM Kevin Wolf  wrote:
> 
> > Am 15.11.2018 um 23:27 hat Nir Soffer geschrieben:
> > > On Sun, Nov 11, 2018 at 6:11 PM Nir Soffer  wrote:
> > >
> > > > On Wed, Nov 7, 2018 at 7:55 PM Nir Soffer  wrote:
> > > >
> > > >> On Wed, Nov 7, 2018 at 7:27 PM Kevin Wolf  wrote:
> > > >>
> > > >>> Am 07.11.2018 um 15:56 hat Nir Soffer geschrieben:
> > > >>> > Wed, Nov 7, 2018 at 4:36 PM Richard W.M. Jones 
> > > >>> wrote:
> > > >>> >
> > > >>> > > Another thing I tried was to change the NBD server (nbdkit) so
> > that
> > > >>> it
> > > >>> > > doesn't advertise zero support to the client:
> > > >>> > >
> > > >>> > >   $ nbdkit --filter=log --filter=nozero memory size=6G
> > > >>> logfile=/tmp/log \
> > > >>> > >   --run './qemu-img convert ./fedora-28.img -n $nbd'
> > > >>> > >   $ grep '\.\.\.$' /tmp/log | sed 's/.*\([A-Z][a-z]*\).*/\1/' |
> > uniq
> > > >>> -c
> > > >>> > >2154 Write
> > > >>> > >
> > > >>> > > Not surprisingly no zero commands are issued.  The size of the
> > write
> > > >>> > > commands is very uneven -- it appears to be send one command per
> > > >>> block
> > > >>> > > of zeroes or data.
> > > >>> > >
> > > >>> > > Nir: If we could get information from imageio about whether
> > zeroing
> > > >>> is
> > > >>> > > implemented efficiently or not by the backend, we could change
> > > >>> > > virt-v2v / nbdkit to advertise this back to qemu.
> > > >>> >
> > > >>> > There is no way to detect the capability, ioctl(BLKZEROOUT) always
> > > >>> > succeeds, falling back to manual zeroing in the kernel silently
> > > >>> >
> > > >>> > Even if we could, sending zero on the wire from qemu may be even
> > > >>> > slower, and it looks like qemu send even more requests in this case
> > > >>> > (2154 vs ~1300).
> > > >>> >
> > > >>> > Looks like this optimization in qemu side leads to worse
> > performance,
> > > >>> > so it should not be enabled by default.
> > > >>>
> > > >>> Well, that's overgeneralising your case a bit. If the backend does
> > > >>> support efficient zero writes (which file systems, the most common
> > case,
> > > >>> generally do), doing one big write_zeroes request at the start can
> > > >>> improve performance quite a bit.
> > > >>>
> > > >>> It seems the problem is that we can't really know whether the
> > operation
> > > >>> will be efficient because the backends generally don't tell us. Maybe
> > > >>> NBD could introduce a flag for this, but in the general case it
> > appears
> > > >>> to me that we'll have to have a command line option.
> > > >>>
> > > >>> However, I'm curious what your exact use case and the backend used
> > in it
> > > >>> is? Can something be improved there to actually get efficient zero
> > > >>> writes and get even better performance than by just disabling the big
> > > >>> zero write?
> > > >>
> > > >>
> > > >> The backend is some NetApp storage connected via FC. I don't have
> > > >> more info on this. We get zero rate of about 1G/s on this storage,
> > which
> > > >> is quite slow compared with other storage we tested.
> > > >>
> > > >> One option we check now is if this is the kernel silent fallback to
> > manual
> > > >> zeroing when the server advertise wrong value of write_same_max_bytes.
> > > >>
> > > >
> > > > We eliminated this using blkdiscard. This is what we get on with this
> > > > storage
> > > > zeroing 100G LV:
> > > >
> > > > for i in 1 2 4 8 16 32; do time blkdiscard -z -p ${i}m
> > > >
> > /dev/6e1d84f9-f939-46e9-b108-0427a08c280c/2d5c06ce-6536-4b3c-a7b6-13c6d8e55ade;
> > > > done
> > > >
> > > > real 4m50.851s
> > > > user 0m0.065s
> > > > sys 0m1.482s
> > > >
> > > > real 4m30.504s
> > > > user 0m0.047s
> > > > sys 0m0.870s
> > > >
> > > > real 4m19.443s
> > > > user 0m0.029s
> > > > sys 0m0.508s
> > > >
> > > > real 4m13.016s
> > > > user 0m0.020s
> > > > sys 0m0.284s
> > > >
> > > > real 2m45.888s
> > > > user 0m0.011s
> > > > sys 0m0.162s
> > > >
> > > > real 2m10.153s
> > > > user 0m0.003s
> > > > sys 0m0.100s
> > > >
> > > > We are investigating why we get low throughput on this server, and also
> > > > will check
> > > > several other servers.
> > > >
> > > > Having a command line option to control this behavior sounds good. I
> > don't
> > > >> have enough data to tell what should be the default, but I think the
> > safe
> > > >> way would be to keep old behavior.
> > > >>
> > > >
> > > > We file this bug:
> > > > https://bugzilla.redhat.com/1648622
> > > >
> > >
> > > More data from even slower storage - zeroing 10G lv on Kaminario K2
> > >
> > > # time blkdiscard -z -p 32m /dev/test_vg/test_lv2
> > >
> > > real50m12.425s
> > > user0m0.018s
> > > sys 2m6.785s
> > >
> > > Maybe something is wrong with this storage, since we see this:
> > >
> > > # grep -s "" /sys/block/dm-29/queue/* | grep write_same_max_bytes
> > > /sys/block/dm-29/queue/write_same_max_bytes:512
> > >
> > > Since BLKZEROOUT always fallback to manual slow

Re: [Qemu-devel] [PATCH 6/6] tests: exercise NBD server in TLS mode

2018-11-17 Thread Eric Blake


On 11/16/18 9:53 AM, Daniel P. Berrangé wrote:

Add tests that validate it is possible to connect to an NBD server
running TLS mode. Also test mis-matched TLS vs non-TLS connections
correctly fail.
---


Missing your Signed-off-by. Can you please supply that, so I can include 
this in my pull request?


Also, I'm getting failures when trying to test it:

@@ -17,9 +17,9 @@

 == check plain client to TLS server fails ==
 option negotiation failed: read failed: Unexpected end-of-file before 
all bytes were read

+write failed (error message): Unable to write to socket: Broken pipe
 qemu-img: Could not open 'nbd://localhost:10809': TLS negotiation 
required before option 8 (structured reply)

 server reported: Option 0x8 not permitted before TLS
-write failed (error message): Unable to write to socket: Broken pipe


which looks like an output race. :(

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [PATCH 6/6] tests: exercise NBD server in TLS mode

2018-11-17 Thread Eric Blake


On 11/16/18 11:20 AM, Eric Blake wrote:

On 11/16/18 9:53 AM, Daniel P. Berrangé wrote:

Add tests that validate it is possible to connect to an NBD server
running TLS mode. Also test mis-matched TLS vs non-TLS connections
correctly fail.
---



+== check TLS client to plain server fails ==
+option negotiation failed: read failed: Unexpected end-of-file before 
all bytes were read


Annoying message; I wonder if we can clean that up. But not this patch's 
problem.




Actually, I tracked this message down to using socat (which actually 
connects and then abruptly exits) when probing whether the socket is up 
and listening.  That is, the message is being produced as a side effect 
of nbd_server_wait_for_tcp_socket rather than during the actual 
$QEMU_IMG command we are interested in testing.




  nbd_pid_file="${TEST_DIR}/qemu-nbd.pid"
  function nbd_server_stop()
@@ -62,3 +63,49 @@ function nbd_server_start_unix_socket()
  $QEMU_NBD -v -t -k "$nbd_unix_socket" $@ &
  nbd_server_wait_for_unix_socket $!
  }
+
+function nbd_server_set_tcp_port()
+{
+    for port in `seq 10809 10909`
+    do
+    socat TCP:$nbd_tcp_addr:$port STDIO < /dev/null 1>/dev/null 2>&1


This is the first use of socat in iotests.  Might not be the most 
portable, but I don't know if I have better ideas. 
nbdkit.git/tests/test-ip.sh greps the output of 'ss -ltn' to locate free 
ports, but I don't know if ss is any better than socat.


So, I'm planning to squash this in, to use ss instead of socat, as follows:

diff --git i/tests/qemu-iotests/common.nbd w/tests/qemu-iotests/common.nbd
index 0483ea7c55a..d73af285abd 100644
--- i/tests/qemu-iotests/common.nbd
+++ w/tests/qemu-iotests/common.nbd
@@ -66,12 +66,12 @@ function nbd_server_start_unix_socket()

 function nbd_server_set_tcp_port()
 {
-for port in `seq 10809 10909`
+(ss --help) >/dev/null 2>&1 || _notrun "ss utility not found, 
skipping test"

+
+for ((port = 10809; port <= 10909; port++))
 do
-   socat TCP:$nbd_tcp_addr:$port STDIO < /dev/null 1>/dev/null 2>&1
-if test $? != 0
-   then
-   nbd_tcp_port=$port
+if ! ss -tln | grep -sqE ":$port\b"; then
+nbd_tcp_port=$port
 return
 fi
 done
@@ -86,9 +86,7 @@ function nbd_server_wait_for_tcp_socket()

 for ((i = 0; i < 300; i++))
 do
-socat TCP:localhost:$nbd_tcp_port STDIO < /dev/null 1>/dev/null 
2>&1

-if test $? == 0
-   then
+if ss -tln | grep -sqE ":$nbd_tcp_port\b"; then
 return
 fi
 kill -s 0 $pid 2>/dev/null
diff --git i/tests/qemu-iotests/233.out w/tests/qemu-iotests/233.out
index eaa410c2703..eb4077f9fd7 100644
--- i/tests/qemu-iotests/233.out
+++ w/tests/qemu-iotests/233.out
@@ -11,12 +11,10 @@ Generating a signed certificate...
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864

 == check TLS client to plain server fails ==
-option negotiation failed: read failed: Unexpected end-of-file before 
all bytes were read
 qemu-img: Could not open 
'driver=nbd,host=127.0.0.1,port=10809,tls-creds=tls0': Denied by server 
for option 5 (starttls)

 server reported: TLS not configured

 == check plain client to TLS server fails ==
-option negotiation failed: read failed: Unexpected end-of-file before 
all bytes were read
 qemu-img: Could not open 'nbd://localhost:10809': TLS negotiation 
required before option 8 (structured reply)

 server reported: Option 0x8 not permitted before TLS
 write failed (error message): Unable to write to socket: Broken pipe


Also, you have to sanitize 233.out to change 10809 into PORT, so the 
test can still pass when it picked a different port.


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [PATCH 6/6] tests: exercise NBD server in TLS mode

2018-11-17 Thread Eric Blake


On 11/17/18 2:49 PM, Eric Blake wrote:

On 11/16/18 9:53 AM, Daniel P. Berrangé wrote:

Add tests that validate it is possible to connect to an NBD server
running TLS mode. Also test mis-matched TLS vs non-TLS connections
correctly fail.
---


Missing your Signed-off-by. Can you please supply that, so I can include 
this in my pull request?


Also, I'm getting failures when trying to test it:

@@ -17,9 +17,9 @@

  == check plain client to TLS server fails ==
  option negotiation failed: read failed: Unexpected end-of-file before 
all bytes were read

+write failed (error message): Unable to write to socket: Broken pipe
  qemu-img: Could not open 'nbd://localhost:10809': TLS negotiation 
required before option 8 (structured reply)

  server reported: Option 0x8 not permitted before TLS
-write failed (error message): Unable to write to socket: Broken pipe


which looks like an output race. :(


Found and squashed it - commit 37ec36f6 fixed plaintext servers to not 
be noisy for NBD_OPT_ABORT, but did not give equal treatment to TLS 
servers. Patch coming up separately.


So, with my fixes, I can add:

Tested-by: Eric Blake 

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

[Qemu-devel] [PATCH 1.5/6] nbd/server: Ignore write errors when replying to NBD_OPT_ABORT

2018-11-17 Thread Eric Blake

Commit 37ec36f6 intentionally ignores errors when trying to reply
to an NBD_OPT_ABORT request for plaintext clients, but did not make
the same change for a TLS server.  Since NBD_OPT_ABORT is
documented as being a potential for an EPIPE when the client hangs
up without waiting for our reply, we don't need to pollute the
server's output with that failure.

Signed-off-by: Eric Blake 
---
 nbd/server.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 056cfa5ad47..dc04513de70 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1134,12 +1134,16 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
 return -EINVAL;

 default:
-ret = nbd_opt_drop(client, NBD_REP_ERR_TLS_REQD, errp,
+/* Let the client keep trying, unless they asked to
+ * quit. Always try to give an error back to the
+ * client; but when replying to OPT_ABORT, be aware
+ * that the client may hang up before receiving the
+ * error, in which case we are fine ignoring the
+ * resulting EPIPE. */
+ret = nbd_opt_drop(client, NBD_REP_ERR_TLS_REQD,
+   option == NBD_OPT_ABORT ? NULL : errp,
"Option 0x%" PRIx32
" not permitted before TLS", option);
-/* Let the client keep trying, unless they asked to
- * quit. In this mode, we've already sent an error, so
- * we can't ack the abort.  */
 if (option == NBD_OPT_ABORT) {
 return 1;
 }
-- 
2.17.2

[Qemu-devel] [PATCH 7/6] iotests: Also test I/O over NBD TLS

2018-11-17 Thread Eric Blake

Enhance test 233 to also perform I/O beyond the initial handshake.

Signed-off-by: Eric Blake 
---

Depends on my tweak to 2/6 to suppress an EIO error message
on a failed read after NBD_CMD_DISC.

 tests/qemu-iotests/233 | 12 +++-
 tests/qemu-iotests/233.out | 10 ++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/233 b/tests/qemu-iotests/233
index a1ba8c09c06..5b6982be6ad 100755
--- a/tests/qemu-iotests/233
+++ b/tests/qemu-iotests/233
@@ -61,7 +61,7 @@ tls_x509_create_client "ca2" "client2"
 echo
 echo "== preparing image =="
 _make_test_img 64M
-
+$QEMU_IO -c 'w -P 0x11 1m 1m' "$TEST_IMG" | _filter_qemu_io

 echo
 echo "== check TLS client to plain server fails =="
@@ -95,6 +95,16 @@ $QEMU_IMG info --image-opts \
 driver=nbd,host=$nbd_tcp_addr,port=$nbd_tcp_port,tls-creds=tls0 \
 2>&1 | sed "s/$nbd_tcp_port/PORT/g"

+echo
+echo "== perform I/O over TLS =="
+QEMU_IO_OPTIONS=$QEMU_IO_OPTIONS_NO_FMT
+$QEMU_IO -c 'r -P 0x11 1m 1m' -c 'w -P 0x22 1m 1m' --image-opts \
+--object tls-creds-x509,dir=${tls_dir}/client1,endpoint=client,id=tls0 \
+driver=nbd,host=$nbd_tcp_addr,port=$nbd_tcp_port,tls-creds=tls0 \
+2>&1 | sed "s/$nbd_tcp_port/PORT/g" | _filter_qemu_io
+
+$QEMU_IO -f qcow2 -r -U -c 'r -P 0x22 1m 1m' "$TEST_IMG" | _filter_qemu_io
+
 # success, all done
 echo "*** done"
 rm -f $seq.full
diff --git a/tests/qemu-iotests/233.out b/tests/qemu-iotests/233.out
index 616e9238c89..94acd9b9479 100644
--- a/tests/qemu-iotests/233.out
+++ b/tests/qemu-iotests/233.out
@@ -9,6 +9,8 @@ Generating a signed certificate...

 == preparing image ==
 Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
+wrote 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)

 == check TLS client to plain server fails ==
 qemu-img: Could not open 'driver=nbd,host=127.0.0.1,port=PORT,tls-creds=tls0': 
Denied by server for option 5 (starttls)
@@ -27,4 +29,12 @@ disk size: unavailable
 == check TLS with different CA fails ==
 option negotiation failed: Verify failed: No certificate was found.
 qemu-img: Could not open 'driver=nbd,host=127.0.0.1,port=PORT,tls-creds=tls0': 
The certificate hasn't got a known issuer
+
+== perform I/O over TLS ==
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+wrote 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 1048576/1048576 bytes at offset 1048576
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 *** done
-- 
2.17.2

Re: [Qemu-devel] [PATCH 2/6 for-3.1] nbd: stop waiting for a NBD response with NBD_CMD_DISC

2018-11-17 Thread Eric Blake


On 11/16/18 9:53 AM, Daniel P. Berrangé wrote:

When sending a NBD_CMD_DISC message there is no reply expected,
however, the nbd_read_eof() coroutine is still waiting for a reply.
In a plain NBD connection this doesn't matter as it will just get an
EOF, however, on a TLS connection it will get an interrupted TLS data
packet. The nbd_read_eof() will then print an error message on the
console due to missing reply to NBD_CMD_DISC.

This can be seen with qemu-img

   $ qemu-img info \
   --object tls-creds-x509,dir=tlsdir,id=tls0,endpoint=client \
   --image-opts driver=nbd,host=127.0.0.1,port=9000,tls-creds=tls0
   qemu-img: Cannot read from TLS channel: Input/output error
   image: nbd://127.0.0.1:9000
   file format: nbd
   virtual size: 10M (10485760 bytes)
   disk size: unavailable

Simply setting the 'quit' flag after sending NBD_CMD_DISC is enough to
get the coroutine to stop waiting for a reply and thus supress the error
message.


Actually, it's not quite enough - once you actually start performing 
I/O, enough coroutines are kicked off that the error still happens:


$  qemu-io -c 'r 1m 1m' -c 'w -P 0x22 1m 1m' --image-opts \
--object tls-creds-x509,dir=scratch/tls/client1,endpoint=client,id=tls0\
 driver=nbd,host=localhost,port=10809,tls-creds=tls0
read 1048576/1048576 bytes at offset 1048576
1 MiB, 1 ops; 0.0430 sec (23.204 MiB/sec and 23.2040 ops/sec)
wrote 1048576/1048576 bytes at offset 1048576
1 MiB, 1 ops; 0.0152 sec (65.479 MiB/sec and 65.4793 ops/sec)
Cannot read from TLS channel: Input/output error

Squashing this in on top of your patch helps, though:

diff --git i/block/nbd-client.c w/block/nbd-client.c
index 5f63e4b8f15..e7916c78996 100644
--- i/block/nbd-client.c
+++ w/block/nbd-client.c
@@ -79,7 +79,14 @@ static coroutine_fn void nbd_read_reply_entry(void 
*opaque)

 assert(s->reply.handle == 0);
 ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
 if (local_err) {
-error_report_err(local_err);
+/* If we are already quitting, either another error has
+ * already been reported, or we requested NBD_CMD_DISC and
+ * don't need to report anything further.  */
+if (!s->quit) {
+error_report_err(local_err);
+} else {
+error_free(local_err);
+}
 }
 if (ret <= 0) {
 break;

But I want to do more testing to make sure I'm not missing out on 
reporting an actual error if I add that.


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [PATCH 0/6] Misc fixes to NBD

2018-11-17 Thread Eric Blake


On 11/16/18 9:53 AM, Daniel P. Berrangé wrote:

This does two minor fixes to the NBD code and adds significant coverage
of the NBD TLS support to detect future problems.

The first two patches should be for 3.1.

The tests can wait till 4.0 if desired.

Daniel P. Berrangé (6):
   nbd: fix whitespace in server error message
   nbd: stop waiting for a NBD response with NBD_CMD_DISC
   tests: pull qemu-nbd iotest helpers into common.nbd file
   tests: check if qemu-nbd is still alive before waiting
   tests: add iotests helpers for dealing with TLS certificates
   tests: exercise NBD server in TLS mode



I'm still missing your S-o-b on 6. I've posted a preliminary version of 
your series with my touchups incorporated, if you'd like to double check 
it, at:


https://repo.or.cz/qemu/ericb.git/shortlog/refs/heads/nbd


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [PATCH 3/6] tests: pull qemu-nbd iotest helpers into common.nbd file

2018-11-17 Thread Eric Blake


On 11/16/18 9:53 AM, Daniel P. Berrangé wrote:

The helpers for starting/stopping qemu-nbd in 058 will be useful in
other test cases, so move them into a common.nbd file.

Signed-off-by: Daniel P. Berrangé 
---



+function nbd_server_start_unix_socket()
+{
+nbd_server_stop
+$QEMU_NBD -v -t -k "$nbd_unix_socket" $@ &


Needs to be "$@" to properly preserve whitespace and/or empty arguments 
(the latter if someone passes -x '' for a default-named export).


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [Qemu-block] Change in qemu 2.12 causes qemu-img convert to NBD to write more data

2018-11-17 Thread Nir Soffer

On Sat, Nov 17, 2018 at 11:13 PM Richard W.M. Jones 
wrote:

> On Sat, Nov 17, 2018 at 10:59:26PM +0200, Nir Soffer wrote:
> > On Fri, Nov 16, 2018 at 5:26 PM Kevin Wolf  wrote:
> >
> > > Am 15.11.2018 um 23:27 hat Nir Soffer geschrieben:
> > > > On Sun, Nov 11, 2018 at 6:11 PM Nir Soffer 
> wrote:
> > > >
> > > > > On Wed, Nov 7, 2018 at 7:55 PM Nir Soffer 
> wrote:
> > > > >
> > > > >> On Wed, Nov 7, 2018 at 7:27 PM Kevin Wolf 
> wrote:
> > > > >>
> > > > >>> Am 07.11.2018 um 15:56 hat Nir Soffer geschrieben:
> > > > >>> > Wed, Nov 7, 2018 at 4:36 PM Richard W.M. Jones <
> rjo...@redhat.com>
> > > > >>> wrote:
> > > > >>> >
> > > > >>> > > Another thing I tried was to change the NBD server (nbdkit)
> so
> > > that
> > > > >>> it
> > > > >>> > > doesn't advertise zero support to the client:
> > > > >>> > >
> > > > >>> > >   $ nbdkit --filter=log --filter=nozero memory size=6G
> > > > >>> logfile=/tmp/log \
> > > > >>> > >   --run './qemu-img convert ./fedora-28.img -n $nbd'
> > > > >>> > >   $ grep '\.\.\.$' /tmp/log | sed
> 's/.*\([A-Z][a-z]*\).*/\1/' |
> > > uniq
> > > > >>> -c
> > > > >>> > >2154 Write
> > > > >>> > >
> > > > >>> > > Not surprisingly no zero commands are issued.  The size of
> the
> > > write
> > > > >>> > > commands is very uneven -- it appears to be send one command
> per
> > > > >>> block
> > > > >>> > > of zeroes or data.
> > > > >>> > >
> > > > >>> > > Nir: If we could get information from imageio about whether
> > > zeroing
> > > > >>> is
> > > > >>> > > implemented efficiently or not by the backend, we could
> change
> > > > >>> > > virt-v2v / nbdkit to advertise this back to qemu.
> > > > >>> >
> > > > >>> > There is no way to detect the capability, ioctl(BLKZEROOUT)
> always
> > > > >>> > succeeds, falling back to manual zeroing in the kernel silently
> > > > >>> >
> > > > >>> > Even if we could, sending zero on the wire from qemu may be
> even
> > > > >>> > slower, and it looks like qemu send even more requests in this
> case
> > > > >>> > (2154 vs ~1300).
> > > > >>> >
> > > > >>> > Looks like this optimization in qemu side leads to worse
> > > performance,
> > > > >>> > so it should not be enabled by default.
> > > > >>>
> > > > >>> Well, that's overgeneralising your case a bit. If the backend
> does
> > > > >>> support efficient zero writes (which file systems, the most
> common
> > > case,
> > > > >>> generally do), doing one big write_zeroes request at the start
> can
> > > > >>> improve performance quite a bit.
> > > > >>>
> > > > >>> It seems the problem is that we can't really know whether the
> > > operation
> > > > >>> will be efficient because the backends generally don't tell us.
> Maybe
> > > > >>> NBD could introduce a flag for this, but in the general case it
> > > appears
> > > > >>> to me that we'll have to have a command line option.
> > > > >>>
> > > > >>> However, I'm curious what your exact use case and the backend
> used
> > > in it
> > > > >>> is? Can something be improved there to actually get efficient
> zero
> > > > >>> writes and get even better performance than by just disabling
> the big
> > > > >>> zero write?
> > > > >>
> > > > >>
> > > > >> The backend is some NetApp storage connected via FC. I don't have
> > > > >> more info on this. We get zero rate of about 1G/s on this storage,
> > > which
> > > > >> is quite slow compared with other storage we tested.
> > > > >>
> > > > >> One option we check now is if this is the kernel silent fallback
> to
> > > manual
> > > > >> zeroing when the server advertise wrong value of
> write_same_max_bytes.
> > > > >>
> > > > >
> > > > > We eliminated this using blkdiscard. This is what we get on with
> this
> > > > > storage
> > > > > zeroing 100G LV:
> > > > >
> > > > > for i in 1 2 4 8 16 32; do time blkdiscard -z -p ${i}m
> > > > >
> > >
> /dev/6e1d84f9-f939-46e9-b108-0427a08c280c/2d5c06ce-6536-4b3c-a7b6-13c6d8e55ade;
> > > > > done
> > > > >
> > > > > real 4m50.851s
> > > > > user 0m0.065s
> > > > > sys 0m1.482s
> > > > >
> > > > > real 4m30.504s
> > > > > user 0m0.047s
> > > > > sys 0m0.870s
> > > > >
> > > > > real 4m19.443s
> > > > > user 0m0.029s
> > > > > sys 0m0.508s
> > > > >
> > > > > real 4m13.016s
> > > > > user 0m0.020s
> > > > > sys 0m0.284s
> > > > >
> > > > > real 2m45.888s
> > > > > user 0m0.011s
> > > > > sys 0m0.162s
> > > > >
> > > > > real 2m10.153s
> > > > > user 0m0.003s
> > > > > sys 0m0.100s
> > > > >
> > > > > We are investigating why we get low throughput on this server, and
> also
> > > > > will check
> > > > > several other servers.
> > > > >
> > > > > Having a command line option to control this behavior sounds good.
> I
> > > don't
> > > > >> have enough data to tell what should be the default, but I think
> the
> > > safe
> > > > >> way would be to keep old behavior.
> > > > >>
> > > > >
> > > > > We file this bug:
> > > > > https://bugzilla.redhat.com/1648622
> > > > >
> > > >
> > > > More data from even slower storage - zeroing 10G lv on Kaminario K2
> > > >
> > > >

Re: [Qemu-devel] [PATCH v3 23/23] docs: Update pvrdma device documentation

2018-11-17 Thread Yuval Shaia

On Sat, Nov 17, 2018 at 02:34:18PM +0200, Marcel Apfelbaum wrote:
> 
> 
> On 11/13/18 9:13 AM, Yuval Shaia wrote:
> > Interface with the device is changed with the addition of support for
> > MAD packets.
> > Adjust documentation accordingly.
> > 
> > While there fix a minor mistake which may lead to think that there is a
> > relation between using RXE on host and the compatibility with bare-metal
> > peers.
> > 
> > Signed-off-by: Yuval Shaia 
> > ---
> >   docs/pvrdma.txt | 103 +++-
> >   1 file changed, 84 insertions(+), 19 deletions(-)
> > 
> > diff --git a/docs/pvrdma.txt b/docs/pvrdma.txt
> > index 5599318159..9e8d1674b7 100644
> > --- a/docs/pvrdma.txt
> > +++ b/docs/pvrdma.txt
> > @@ -9,8 +9,9 @@ It works with its Linux Kernel driver AS IS, no need for 
> > any special guest
> >   modifications.
> >   While it complies with the VMware device, it can also communicate with 
> > bare
> > -metal RDMA-enabled machines and does not require an RDMA HCA in the host, 
> > it
> > -can work with Soft-RoCE (rxe).
> > +metal RDMA-enabled machines as peers.
> > +
> > +It does not require an RDMA HCA in the host, it can work with Soft-RoCE 
> > (rxe).
> >   It does not require the whole guest RAM to be pinned allowing memory
> >   over-commit and, even if not implemented yet, migration support will be
> > @@ -78,29 +79,93 @@ the required RDMA libraries.
> >   3. Usage
> >   
> > +
> > +
> > +3.1 VM Memory settings
> > +==
> >   Currently the device is working only with memory backed RAM
> >   and it must be mark as "shared":
> >  -m 1G \
> >  -object memory-backend-ram,id=mb1,size=1G,share \
> >  -numa node,memdev=mb1 \
> > -The pvrdma device is composed of two functions:
> > - - Function 0 is a vmxnet Ethernet Device which is redundant in Guest
> > -   but is required to pass the ibdevice GID using its MAC.
> > -   Examples:
> > - For an rxe backend using eth0 interface it will use its mac:
> > -   -device vmxnet3,addr=.0,multifunction=on,mac=
> > - For an SRIOV VF, we take the Ethernet Interface exposed by it:
> > -   -device vmxnet3,multifunction=on,mac=
> > - - Function 1 is the actual device:
> > -   -device 
> > pvrdma,addr=.1,backend-dev=,backend-gid-idx=,backend-port=
> > -   where the ibdevice can be rxe or RDMA VF (e.g. mlx5_4)
> > - Note: Pay special attention that the GID at backend-gid-idx matches 
> > vmxnet's MAC.
> > - The rules of conversion are part of the RoCE spec, but since manual 
> > conversion
> > - is not required, spotting problems is not hard:
> > -Example: GID: fe80::::7efe:90ff:fecb:743a
> > - MAC: 7c:fe:90:cb:74:3a
> > -Note the difference between the first byte of the MAC and the GID.
> > +
> > +3.2 MAD Multiplexer
> > +===
> > +MAD Multiplexer is a service that exposes MAD-like interface for VMs in
> > +order to overcome the limitation where only single entity can register with
> > +MAD layer to send and receive RDMA-CM MAD packets.
> > +
> > +To build rdmacm-mux run
> > +# make rdmacm-mux
> > +
> > +The program accepts 3 command line arguments and exposes a UNIX socket to
> > +be used to relay control and data messages to and from the service.
> > +-s unix-socket-path   Path to unix socket to listen on
> > +  (default /var/run/rdmacm-mux)
> > +-d rdma-device-name   Name of RDMA device to register with
> > +  (default rxe0)
> > +-p rdma-device-port   Port number of RDMA device to register with
> > +  (default 1)
> > +The final UNIX socket file name is a concatenation of the 3 arguments so
> > +for example for device name mlx5_0 and port 2 the file
> > +/var/run/rdmacm-mux-mlx5_0-2 will be created.
> > +
> > +Please refer to contrib/rdmacm-mux for more details.
> > +
> > +
> > +3.3 PCI devices settings
> > +
> > +RoCE device exposes two functions - Ethernet and RDMA.
> > +To support it, pvrdma device is composed of two PCI functions, an Ethernet
> > +device of type vmxnet3 on PCI slot 0 and a pvrdma device on PCI slot 1. The
> > +Ethernet function can be used for other Ethernet purposes such as IP.
> > +
> > +
> > +3.4 Device parameters
> > +=
> > +- netdev: Specifies the Ethernet device on host. For Soft-RoCE (rxe) this
> > +  would be the Ethernet device used to create it. For any other physical
> > +  RoCE device this would be the netdev name of the device.
> 
> I didn't understand, can you please elaborate? We need the ibdev,
> this is clear, but what is the "ethernet device on host", how do
> we get it and how it is used?

netdev is used to maintain port's GID table.

Adding GID entry is by assigning new IPv6 address to the corresponding
Ethernet function, opposite is the same, i.e. removing an IPv6 address from
the Ethernet function will delete the corresponding GID from the GID table.

I wish there would be a way to extract

64 matches

Mail list logo