Re: [Qemu-devel] [PATCH 2/2] s390x/pci: use PCI_MSIX_FLAGS on retrieving the MSIX entries

2016-02-22 Thread Yi Min Zhao
于 Mon, 22 Feb 2016 14:15:07 +0100
Christian Borntraeger  写道:

> On 02/19/2016 04:18 PM, Wei Yang wrote:
> > Even PCI_CAP_FLAGS has the same value as PCI_MSIX_FLAGS, the later one is
> > the more proper on retrieving MSIX entries.
> > 
> > This patch uses PCI_MSIX_FLAGS to retrieve the MSIX entries.
> > 
> > Signed-off-by: Wei Yang 
> > CC: Cornelia Huck 
> > CC: Christian Borntraeger 
> > ---
> >  hw/s390x/s390-pci-bus.c |2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
> > index 132588b..9d40039 100644
> > --- a/hw/s390x/s390-pci-bus.c
> > +++ b/hw/s390x/s390-pci-bus.c
> > @@ -523,7 +523,7 @@ static int s390_pcihost_setup_msix(S390PCIBusDevice 
> > *pbdev)
> >  return 0;
> >  }
> > 
> > -ctrl = pci_host_config_read_common(pbdev->pdev, pos + PCI_CAP_FLAGS,
> > +ctrl = pci_host_config_read_common(pbdev->pdev, pos + PCI_MSIX_FLAGS,
> >   pci_config_size(pbdev->pdev), sizeof(ctrl));
> >  table = pci_host_config_read_common(pbdev->pdev, pos + PCI_MSIX_TABLE,
> >   pci_config_size(pbdev->pdev), sizeof(table));
> > 
> 
> looks sane.
> Yi Min, can you ack/nack?
> 
> 

It looks sane to me. A little change.




[Qemu-devel] [PATCH v5 1/1] s390x: pci infrastructure modeling

2015-08-20 Thread Yi Min Zhao
From: Yi Min Zhao 

This patch introduce a new facility(and bus)
to hold devices representing information actually
provided by s390 firmware and I/O configuration.
usage example:
-device s390-pcihost,index=1
-device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
-device vfio-pci,host=:00:00.0,id=vpci1

The first line will create a s390 pci host bridge
and init the root bus. And user must assign a
unique value to index which is a new property of
s390-pcihost device.
The second line will create a s390 pci device to
store s390 specific information, and references
the corresponding vfio pci device via device id.
We create a s390 pci facility bus to hold all the
zpci devices.
The third line will create a standard vfio pci
device, and attach it to the root bus. These are
similiar to the standard process to define a pci
device on other platform.

Signed-off-by: Yi Min Zhao 
---
 hw/s390x/s390-pci-bus.c|  379 ++--
 hw/s390x/s390-pci-bus.h|   54 ++-
 hw/s390x/s390-pci-inst.c   |   80 ++
 hw/s390x/s390-virtio-ccw.c |5 +-
 4 files changed, 392 insertions(+), 126 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 560b66a..524dd79 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -32,16 +32,16 @@ int chsc_sei_nt2_get_event(void *res)
 PciCcdfErr *eccdf;
 int rc = 1;
 SeiContainer *sei_cont;
-S390pciState *s = S390_PCI_HOST_BRIDGE(
-object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+S390PCIFacility *f = S390_PCI_FACILITY(
+object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
 
-if (!s) {
+if (!f) {
 return rc;
 }
 
-sei_cont = QTAILQ_FIRST(&s->pending_sei);
+sei_cont = QTAILQ_FIRST(&f->pending_sei);
 if (sei_cont) {
-QTAILQ_REMOVE(&s->pending_sei, sei_cont, link);
+QTAILQ_REMOVE(&f->pending_sei, sei_cont, link);
 nt2_res->nt = 2;
 nt2_res->cc = sei_cont->cc;
 nt2_res->length = cpu_to_be16(sizeof(ChscSeiNt2Res));
@@ -72,30 +72,41 @@ int chsc_sei_nt2_get_event(void *res)
 
 int chsc_sei_nt2_have_event(void)
 {
-S390pciState *s = S390_PCI_HOST_BRIDGE(
-object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+S390PCIFacility *f = S390_PCI_FACILITY(
+object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
 
-if (!s) {
+if (!f) {
 return 0;
 }
 
-return !QTAILQ_EMPTY(&s->pending_sei);
+return !QTAILQ_EMPTY(&f->pending_sei);
+}
+
+void s390_pci_device_enable(S390PCIBusDevice *zpci)
+{
+zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
+}
+
+void s390_pci_device_disable(S390PCIBusDevice *zpci)
+{
+zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
+if (zpci->is_unplugged) {
+object_unparent(OBJECT(zpci));
+}
 }
 
 S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
 {
 S390PCIBusDevice *pbdev;
-int i;
-S390pciState *s = S390_PCI_HOST_BRIDGE(
-object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+S390PCIFacility *f = S390_PCI_FACILITY(
+object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
 
-if (!s) {
+if (!f) {
 return NULL;
 }
 
-for (i = 0; i < PCI_SLOT_MAX; i++) {
-pbdev = &s->pbdev[i];
-if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
+QTAILQ_FOREACH(pbdev, &f->zpci_list, next) {
+if (pbdev->fid == fid) {
 return pbdev;
 }
 }
@@ -126,39 +137,20 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
 return;
 }
 
-static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
-{
-return PCI_SLOT(pdev->devfn);
-}
-
-static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
+S390PCIBusDevice *s390_pci_find_dev_by_idx(uint64_t idx)
 {
-return PCI_SLOT(pdev->devfn) | FH_VIRT;
-}
-
-S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
-{
-S390PCIBusDevice *pbdev;
-int i;
-int j = 0;
-S390pciState *s = S390_PCI_HOST_BRIDGE(
-object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+S390PCIBusDevice *tmp;
+S390PCIFacility *fac = S390_PCI_FACILITY(
+object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
 
-if (!s) {
+if (!fac) {
 return NULL;
 }
 
-for (i = 0; i < PCI_SLOT_MAX; i++) {
-pbdev = &s->pbdev[i];
-
-if (pbdev->fh == 0) {
-continue;
-}
-
-if (j == idx) {
-return pbdev;
+QTAILQ_FOREACH(tmp, &fac->zpci_list, next) {
+if (idx == tmp->idx && tmp->available) {
+return tmp;
 }
-j++;
 }
 
 return NULL;
@@ -167,16 +159,14 @@ S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
 S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh)
 {
 S390PCIBusDevice *pbdev;
-int i;
-S390pciState *s = 

[Qemu-devel] [PATCH v5 0/1] s390 pci infrastructure modeling

2015-08-20 Thread Yi Min Zhao
From: Yi Min Zhao 

This patch extends the current s390 pci implementation to provide more 
flexibility in configuration of s390 specific device handling.

For each vfio pci device, I create a zpci device to store s390 specific
informations. And attach all of these special zpci devices to the s390 
facility bus. A zpci device references the corresponding PCI device via
device id. 

The new design allows to define multiple host bridges, each host bridge
could hold 32 zpci devices at most. Add a new property named index to 
s390-pcihost device and must assign a unique value to it in qemu command
line.

The topology for this implementation could be:

  dev: s390-pcihost, id ""
index=1
bus: pci.0
  type PCI
  dev: vfio-pci, id "vpci1"
host = ":00:00.0"
..
  dev: vfio-pci, id "vpci2"
host = "0001:00:00.0"
..
  dev: s390-pci-facility, id ""
bus: s390-pci-fac-bus.0
  type s390-pci-fac-bus
  dev: zpci, id "zpci1"
fid = 1 (0x1)
uid = 2 (0x2)
pci_id = "vpci1"
  dev: zpci, id "zpci2"
fid = 6 (0x6)
uid = 7 (0x7)
pci_id = "vpci2"

To make the review easier, I keep all of the old names, such as 
S390PCIBusDevice to name a zpci device. I will make a cleanup 
patch later to change these names to a more suitable name.

Comparing to v4, this patch does the following change:
1.Add index property to s390-pcihost device.
2.Add index to s390PCIBusDevice. Its value is assigned by logical
  OR s390-pcihost device's index and pci slot.
3.Add three usable macros to get zpci index and s390pcihost index.
4.Add a bool member to s390PCIBustDevice named "available" to judge
  whether the relationship of zpci and vfio pci is established. 
  This change fixed a bug generated by previous code in case that
  only define zpci device and then hot unplug it will fail.
5.Rework s390_pci_find_dev_by_idx function.
6.Rework s390_pci_device_hot_unplug_request function to be more
  stable.
7.Rework s390_pci_device_unrealize function to make the process of
  pci device unplug more logical and stable. 

Comparing to v3, this patch does the following change:
1.Exchange vfio-pci and zpci definitions. Define zpci device firstly
  then define vfio-pci device.
2.Unplug either vfio-pci device or zpci device can remove both of them.

Yi Min Zhao (1):
  s390x: pci infrastructure modeling

 hw/s390x/s390-pci-bus.c|  379 ++--
 hw/s390x/s390-pci-bus.h|   54 ++-
 hw/s390x/s390-pci-inst.c   |   80 ++
 hw/s390x/s390-virtio-ccw.c |5 +-
 4 files changed, 392 insertions(+), 126 deletions(-)

-- 
1.7.9




[Qemu-devel] [PATCH v4 0/1] s390 pci infrastruture modelling

2015-07-08 Thread Yi Min Zhao
From: Yi Min Zhao 

This patch extends the current s390 pci implementation to
provide more flexibility in configuration of s390 specific
device handling.

For each vfio pci device, I create a zpci device to store s390
specific informations. And attach all of these special zpci devices
to the s390 facility bus. A zpci device references the corresponding
PCI device via device id. 

The new design allows to define multiple host bridges, each host bridge
could hold 32 zpci devices at most.

The topology for this implementation could be:

  dev: s390-pcihost, id ""
bus: pci.0
  type PCI
  dev: vfio-pci, id "vpci1"
host = ":00:00.0"
..
  dev: vfio-pci, id "vpci2"
host = "0001:00:00.0"
..
  dev: s390-pci-facility, id ""
bus: s390-pci-fac-bus.0
  type s390-pci-fac-bus
  dev: zpci, id "zpci1"
fid = 1 (0x1)
uid = 2 (0x2)
pci_id = "vpci1"
  dev: zpci, id "zpci2"
fid = 6 (0x6)
uid = 7 (0x7)
pci_id = "vpci2"

To make the review easier, I keep all of the old names, such as 
S390PCIBusDevice to name a zpci device. I will make a cleanup 
patch later to change these names to a more suitable name.

Comparing to v3, this patch does the following change:
1.Exchange vfio-pci and zpci definitions. Define zpci device firstly
  then define vfio-pci device.
2.Unplug either vfio-pci device or zpci device can remove both of them.

Yi Min Zhao (1):
  s390 pci infrastructure modelling

 hw/s390x/s390-pci-bus.c|  323 ++--
 hw/s390x/s390-pci-bus.h|   41 +-
 hw/s390x/s390-pci-inst.c   |   12 ++-
 hw/s390x/s390-virtio-ccw.c |5 +-
 4 files changed, 302 insertions(+), 79 deletions(-)

-- 
1.7.9




[Qemu-devel] [PATCH v4 1/1] s390 pci infrastructure modelling

2015-07-08 Thread Yi Min Zhao
From: Yi Min Zhao 

This patch introduce a new facility(and bus)
to hold devices representing information actually
provided by s390 firmware and I/O configuration.
usage example:
-device s390-pcihost
-device zpci,fid=2,uid=5,pci_id=vpci1,id=zpci1
-device vfio-pci,host=:00:00.0,id=vpci1

The first line will create a s390 pci host bridge
and init the root bus.
The second line will create a s390 pci device to
store s390 specific information, and references
the corresponding vfio pci device via device id.
We create a s390 pci facility bus to hold all the
zpci devices.
The third line will create a standard vfio pci
evice, and attach it to the root bus. These are
similiar to the standard process to define a pci
device on other platform.

Signed-off-by: Yi Min Zhao 
---
 hw/s390x/s390-pci-bus.c|  323 ++--
 hw/s390x/s390-pci-bus.h|   41 +-
 hw/s390x/s390-pci-inst.c   |   12 ++-
 hw/s390x/s390-virtio-ccw.c |5 +-
 4 files changed, 302 insertions(+), 79 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 560b66a..c92fa25 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -32,8 +32,8 @@ int chsc_sei_nt2_get_event(void *res)
 PciCcdfErr *eccdf;
 int rc = 1;
 SeiContainer *sei_cont;
-S390pciState *s = S390_PCI_HOST_BRIDGE(
-object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+S390PCIFacility *s = S390_PCI_FACILITY(
+object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
 
 if (!s) {
 return rc;
@@ -72,8 +72,8 @@ int chsc_sei_nt2_get_event(void *res)
 
 int chsc_sei_nt2_have_event(void)
 {
-S390pciState *s = S390_PCI_HOST_BRIDGE(
-object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+S390PCIFacility *s = S390_PCI_FACILITY(
+object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
 
 if (!s) {
 return 0;
@@ -82,20 +82,31 @@ int chsc_sei_nt2_have_event(void)
 return !QTAILQ_EMPTY(&s->pending_sei);
 }
 
+void s390_pci_device_enable(S390PCIBusDevice *zpci)
+{
+zpci->fh = zpci->fh | 1 << ENABLE_BIT_OFFSET;
+}
+
+void s390_pci_device_disable(S390PCIBusDevice *zpci)
+{
+zpci->fh = zpci->fh & ~(1 << ENABLE_BIT_OFFSET);
+if (zpci->is_unplugged) {
+object_unparent(OBJECT(zpci));
+}
+}
+
 S390PCIBusDevice *s390_pci_find_dev_by_fid(uint32_t fid)
 {
 S390PCIBusDevice *pbdev;
-int i;
-S390pciState *s = S390_PCI_HOST_BRIDGE(
-object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+S390PCIFacility *s = S390_PCI_FACILITY(
+object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
 
 if (!s) {
 return NULL;
 }
 
-for (i = 0; i < PCI_SLOT_MAX; i++) {
-pbdev = &s->pbdev[i];
-if ((pbdev->fh != 0) && (pbdev->fid == fid)) {
+QTAILQ_FOREACH(pbdev, &s->zpci_list, next) {
+if (pbdev->fid == fid) {
 return pbdev;
 }
 }
@@ -126,39 +137,24 @@ void s390_pci_sclp_configure(int configure, SCCB *sccb)
 return;
 }
 
-static uint32_t s390_pci_get_pfid(PCIDevice *pdev)
-{
-return PCI_SLOT(pdev->devfn);
-}
-
-static uint32_t s390_pci_get_pfh(PCIDevice *pdev)
-{
-return PCI_SLOT(pdev->devfn) | FH_VIRT;
-}
-
 S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
 {
 S390PCIBusDevice *pbdev;
-int i;
-int j = 0;
-S390pciState *s = S390_PCI_HOST_BRIDGE(
-object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+int i = 0;
+S390PCIFacility *s = S390_PCI_FACILITY(
+object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
 
 if (!s) {
 return NULL;
 }
 
-for (i = 0; i < PCI_SLOT_MAX; i++) {
-pbdev = &s->pbdev[i];
-
-if (pbdev->fh == 0) {
-continue;
-}
-
-if (j == idx) {
+QTAILQ_FOREACH(pbdev, &s->zpci_list, next) {
+if (i == idx) {
 return pbdev;
 }
-j++;
+if (pbdev->configured) {
+i++;
+}
 }
 
 return NULL;
@@ -167,16 +163,14 @@ S390PCIBusDevice *s390_pci_find_dev_by_idx(uint32_t idx)
 S390PCIBusDevice *s390_pci_find_dev_by_fh(uint32_t fh)
 {
 S390PCIBusDevice *pbdev;
-int i;
-S390pciState *s = S390_PCI_HOST_BRIDGE(
-object_resolve_path(TYPE_S390_PCI_HOST_BRIDGE, NULL));
+S390PCIFacility *s = S390_PCI_FACILITY(
+object_resolve_path(TYPE_S390_PCI_FACILITY, NULL));
 
 if (!s || !fh) {
 return NULL;
 }
 
-for (i = 0; i < PCI_SLOT_MAX; i++) {
-pbdev = &s->pbdev[i];
+QTAILQ_FOREACH(pbdev, &s->zpci_list, next) {
 if (pbdev->fh == fh) {
 return pbdev;
 }
@@ -189,8 +183,8 @@ static void s390_pci_generate_event(uint8_t cc, uint16_t 
pec, uint32_t fh,
 uint32_t fid, uint64_t faddr, uint32_t e)
 {
 SeiContainer *sei_cont;
-S39

Re: [Qemu-devel] [PATCH 0/4] four zpci patches

2017-08-23 Thread Yi Min Zhao

Why can't I receive [Qemu-devel] prefixed patches?


在 2017/8/23 下午3:26, Yi Min Zhao 写道:

This patch set contains four small zpci patches to fixup different issues.
1) fixup calculation of msix boundary
2) remove zpci idx from msix message, instead we could use PCIDevice's id to
find zpci device in kvm_arch_fixup_msi_route()
3) fixup ind_offset calculation for adapter interrupt routing entry
4) introduce our own iommu_replay callback

Yi Min Zhao (4):
   s390x/pci: fixup trap_msix()
   s390x/pci: remove idx from msix msg data
   s390x/pci: fixup ind_offset of msix routing entry
   s390x/pci: add iommu replay callback

  hw/s390x/s390-pci-bus.c  | 24 +---
  hw/s390x/s390-pci-bus.h  |  2 ++
  hw/s390x/s390-pci-inst.c | 28 ++--
  target/s390x/kvm.c   | 11 ++-
  4 files changed, 23 insertions(+), 42 deletions(-)






Re: [Qemu-devel] [PATCH 0/4] four zpci patches

2017-08-24 Thread Yi Min Zhao



在 2017/8/24 下午3:13, Cornelia Huck 写道:

On Thu, 24 Aug 2017 13:20:12 +0800
Yi Min Zhao  wrote:


Why can't I receive [Qemu-devel] prefixed patches?


在 2017/8/23 下午3:26, Yi Min Zhao 写道:

This patch set contains four small zpci patches to fixup different issues.
1) fixup calculation of msix boundary
2) remove zpci idx from msix message, instead we could use PCIDevice's id to
 find zpci device in kvm_arch_fixup_msi_route()
3) fixup ind_offset calculation for adapter interrupt routing entry
4) introduce our own iommu_replay callback

Yi Min Zhao (4):
s390x/pci: fixup trap_msix()
s390x/pci: remove idx from msix msg data
s390x/pci: fixup ind_offset of msix routing entry
s390x/pci: add iommu replay callback

   hw/s390x/s390-pci-bus.c  | 24 +---
   hw/s390x/s390-pci-bus.h  |  2 ++
   hw/s390x/s390-pci-inst.c | 28 ++--
   target/s390x/kvm.c   | 11 ++-
   4 files changed, 23 insertions(+), 42 deletions(-)
  

Well, I did not get any of your original patches, just this reply...
seem to be stuck somewhere?



I sent patches to qemu-devel@nongnu.org and cc you also myself.
But what I received looks like CC ones, no [Qemu-devel] prefix.
Let me send them again. I'm not sure the reason.




Re: [Qemu-devel] [PATCH 0/4] four zpci patches

2017-08-27 Thread Yi Min Zhao



在 2017/8/25 上午12:27, Eric Blake 写道:

On 08/24/2017 03:48 AM, Yi Min Zhao wrote:


在 2017/8/24 下午3:13, Cornelia Huck 写道:

On Thu, 24 Aug 2017 13:20:12 +0800
Yi Min Zhao  wrote:


Why can't I receive [Qemu-devel] prefixed patches?

I sent patches to qemu-devel@nongnu.org and cc you also myself.
But what I received looks like CC ones, no [Qemu-devel] prefix.

You can tell mailman whether you want to receive copies of mails through
the list even when you are listed in cc (defaults to on, but some people
like myself set it off to reduce mail); if you switch that option in
your subscription settings, then you will not see the [Qemu-devel]
prefix on any mail where you were cc'd.  [side note: turning off copies
of mail where you are cc'd triggers what I consider to be a mailman bug:
it actively rewrites the to/cc of the email to omit your address in the
copy it sends to the list, which means people who follow up to the list
no longer cc you, and do not know that you were cc'd in the first place]
I didn't tell mailman anything. So I think the receiving copies setting 
is turned on.
What I can receive is only from cc. But as what Conny said, it seems 
that other external reviewers

in cc list didn't receive any mail unless I replied a mail.

Furthermore, there may be other places along the way that do data
de-duplication.  For example, anyone subscribing via a gmail.com address
gets at most one copy of a message based on the message-id; if they are
subscribed to the mailing list and also get a cc of a given message,
only one of those copies will show up (because google suppressed the
second copy with the same message-id, even though the subject lines are
different); the one copy is often (but not always) the one without the
[Qemu-devel] prefix because cc'd mail goes through less processing and
thus tends to arrive sooner than the copy sent through mailman.  [side
note: google calls de-duplication a feature, but you can't turn it off,
and it's one of the reasons that I don't use google for my personal email]






[Qemu-devel] [PATCH 2/4] s390x/pci: remove idx from msix msg data

2017-08-28 Thread Yi Min Zhao
PCIDevcie pointer has been a parameter of kvm_arch_fixup_msi_route().
So we don't need to store zpci idx in msix message data to find out the
specific zpci device. Instead, we could use pci device id to find its
corresponding zpci device.

Signed-off-by: Yi Min Zhao 
---
 hw/s390x/s390-pci-bus.c  | 16 +---
 hw/s390x/s390-pci-bus.h  |  2 ++
 hw/s390x/s390-pci-inst.c | 24 
 target/s390x/kvm.c   |  7 +--
 4 files changed, 12 insertions(+), 37 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 61cfd2138f..9e1f7ff5c5 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -209,8 +209,8 @@ static S390PCIBusDevice 
*s390_pci_find_dev_by_uid(S390pciState *s, uint16_t uid)
 return NULL;
 }
 
-static S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
- const char *target)
+S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
+  const char *target)
 {
 S390PCIBusDevice *pbdev;
 
@@ -475,19 +475,13 @@ static void s390_msi_ctrl_write(void *opaque, hwaddr 
addr, uint64_t data,
 unsigned int size)
 {
 S390PCIBusDevice *pbdev = opaque;
-uint32_t idx = data >> ZPCI_MSI_VEC_BITS;
 uint32_t vec = data & ZPCI_MSI_VEC_MASK;
 uint64_t ind_bit;
 uint32_t sum_bit;
-uint32_t e = 0;
 
-DPRINTF("write_msix data 0x%" PRIx64 " idx %d vec 0x%x\n", data, idx, vec);
-
-if (!pbdev) {
-e |= (vec << ERR_EVENT_MVN_OFFSET);
-s390_pci_generate_error_event(ERR_EVENT_NOMSI, idx, 0, addr, e);
-return;
-}
+assert(pbdev);
+DPRINTF("write_msix data 0x%" PRIx64 " idx %d vec 0x%x\n", data,
+pbdev->idx, vec);
 
 if (pbdev->state != ZPCI_FS_ENABLED) {
 return;
diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
index 67af2c12ff..820c7fa52b 100644
--- a/hw/s390x/s390-pci-bus.h
+++ b/hw/s390x/s390-pci-bus.h
@@ -330,6 +330,8 @@ void s390_pci_generate_error_event(uint16_t pec, uint32_t 
fh, uint32_t fid,
 S390PCIBusDevice *s390_pci_find_dev_by_idx(S390pciState *s, uint32_t idx);
 S390PCIBusDevice *s390_pci_find_dev_by_fh(S390pciState *s, uint32_t fh);
 S390PCIBusDevice *s390_pci_find_dev_by_fid(S390pciState *s, uint32_t fid);
+S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
+  const char *target);
 S390PCIBusDevice *s390_pci_find_next_avail_dev(S390pciState *s,
S390PCIBusDevice *pbdev);
 
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index eba9ffb5f2..8e088f3dc9 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -413,29 +413,6 @@ int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2)
 return 0;
 }
 
-static void update_msix_table_msg_data(S390PCIBusDevice *pbdev, uint64_t 
offset,
-   uint64_t *data, uint8_t len)
-{
-uint32_t val;
-uint8_t *msg_data;
-
-if (offset % PCI_MSIX_ENTRY_SIZE != 8) {
-return;
-}
-
-if (len != 4) {
-DPRINTF("access msix table msg data but len is %d\n", len);
-return;
-}
-
-msg_data = (uint8_t *)data - offset % PCI_MSIX_ENTRY_SIZE +
-   PCI_MSIX_ENTRY_VECTOR_CTRL;
-val = pci_get_long(msg_data) |
-((pbdev->fh & FH_MASK_INDEX) << ZPCI_MSI_VEC_BITS);
-pci_set_long(msg_data, val);
-DPRINTF("update msix msg_data to 0x%" PRIx64 "\n", *data);
-}
-
 static int trap_msix(S390PCIBusDevice *pbdev, uint64_t offset, uint8_t pcias)
 {
 if (pbdev->msix.available && pbdev->msix.table_bar == pcias &&
@@ -508,7 +485,6 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2)
 if (trap_msix(pbdev, offset, pcias)) {
 offset = offset - pbdev->msix.table_offset;
 mr = &pbdev->pdev->msix_table_mmio;
-update_msix_table_msg_data(pbdev, offset, &data, len);
 } else {
 mr = pbdev->pdev->io_regions[pcias].memory;
 }
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 1c68c36663..e348bfb7cc 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2503,10 +2503,13 @@ int kvm_arch_fixup_msi_route(struct 
kvm_irq_routing_entry *route,
  uint64_t address, uint32_t data, PCIDevice *dev)
 {
 S390PCIBusDevice *pbdev;
-uint32_t idx = data >> ZPCI_MSI_VEC_BITS;
 uint32_t vec = data & ZPCI_MSI_VEC_MASK;
 
-pbdev = s390_pci_find_dev_by_idx(s390_get_phb(), idx);
+if (!dev) {
+return -ENODEV;
+}
+
+pbdev = s390_pci_find_dev_by_target(s390_get_phb(), DEVICE(dev)->id);
 if (!pbdev) {
 DPRINTF("add_msi_route no dev\n");
 return -ENODEV;
-- 
2.11.0 (Apple Git-81)




[Qemu-devel] [PATCH 0/4] four zpci patches

2017-08-28 Thread Yi Min Zhao
This patch set contains four small zpci patches to fixup different issues.
1) fixup calculation of msix boundary
2) remove zpci idx from msix message, instead we could use PCIDevice's id to
   find zpci device in kvm_arch_fixup_msi_route()
3) fixup ind_offset calculation for adapter interrupt routing entry
4) introduce our own iommu_replay callback

Yi Min Zhao (4):
  s390x/pci: fixup trap_msix()
  s390x/pci: remove idx from msix msg data
  s390x/pci: fixup ind_offset of msix routing entry
  s390x/pci: add iommu replay callback

 hw/s390x/s390-pci-bus.c  | 24 +---
 hw/s390x/s390-pci-bus.h  |  2 ++
 hw/s390x/s390-pci-inst.c | 28 ++--
 target/s390x/kvm.c   | 11 ++-
 4 files changed, 23 insertions(+), 42 deletions(-)

-- 
2.11.0 (Apple Git-81)




[Qemu-devel] [PATCH 3/4] s390x/pci: fixup ind_offset of msix routing entry

2017-08-28 Thread Yi Min Zhao
The aibvo of zpci device should be constant after issued mpcifc
registering irqs instruction. Each msix vector should offset from the
aibvo. But for flic adapter interrupt, we should use the absolute
offset within the aibv. So let's use the aibvo+vector to fixup msix
routing entry.

Signed-off-by: Yi Min Zhao 
---
 target/s390x/kvm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index e348bfb7cc..c08b7757e7 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2515,14 +2515,12 @@ int kvm_arch_fixup_msi_route(struct 
kvm_irq_routing_entry *route,
 return -ENODEV;
 }
 
-pbdev->routes.adapter.ind_offset = vec;
-
 route->type = KVM_IRQ_ROUTING_S390_ADAPTER;
 route->flags = 0;
 route->u.adapter.summary_addr = pbdev->routes.adapter.summary_addr;
 route->u.adapter.ind_addr = pbdev->routes.adapter.ind_addr;
 route->u.adapter.summary_offset = pbdev->routes.adapter.summary_offset;
-route->u.adapter.ind_offset = pbdev->routes.adapter.ind_offset;
+route->u.adapter.ind_offset = pbdev->routes.adapter.ind_offset + vec;
 route->u.adapter.adapter_id = pbdev->routes.adapter.adapter_id;
 return 0;
 }
-- 
2.11.0 (Apple Git-81)




[Qemu-devel] [PATCH 1/4] s390x/pci: fixup trap_msix()

2017-08-28 Thread Yi Min Zhao
The function trap_msix() is to check if pcistg instruction would access
msix table entries. The correct boundary condition should be
[table_offset, table_offset+entries*entry_size). But the current
condition calculated misses the last entry. So let's fixup it.

Acked-by: Dong Jia Shi 
Reviewed-by: Pierre Morel 
Signed-off-by: Yi Min Zhao 
---
 hw/s390x/s390-pci-inst.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index b7beb8c36a..eba9ffb5f2 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -440,8 +440,8 @@ static int trap_msix(S390PCIBusDevice *pbdev, uint64_t 
offset, uint8_t pcias)
 {
 if (pbdev->msix.available && pbdev->msix.table_bar == pcias &&
 offset >= pbdev->msix.table_offset &&
-offset <= pbdev->msix.table_offset +
-  (pbdev->msix.entries - 1) * PCI_MSIX_ENTRY_SIZE) {
+offset < (pbdev->msix.table_offset +
+  pbdev->msix.entries * PCI_MSIX_ENTRY_SIZE)) {
 return 1;
 } else {
 return 0;
-- 
2.11.0 (Apple Git-81)




[Qemu-devel] [PATCH 4/4] s390x/pci: add iommu replay callback

2017-08-28 Thread Yi Min Zhao
Let's introduce iommu replay callback for s390 pci iommu memory region.
Currently we don't need any dma mapping replay. So let it return
directly. This implementation will avoid meaningless loops calling
translation callback.

Reviewed-by: Pierre Morel 
Reviewed-by: Halil Pasic 
Signed-off-by: Yi Min Zhao 
---
 hw/s390x/s390-pci-bus.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 9e1f7ff5c5..359509ccea 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -407,6 +407,13 @@ static IOMMUTLBEntry 
s390_translate_iommu(IOMMUMemoryRegion *mr, hwaddr addr,
 return ret;
 }
 
+static void s390_pci_iommu_replay(IOMMUMemoryRegion *iommu,
+  IOMMUNotifier *notifier)
+{
+/* we don't need iommu replay currently */
+return;
+}
+
 static S390PCIIOMMU *s390_pci_get_iommu(S390pciState *s, PCIBus *bus,
 int devfn)
 {
@@ -1055,6 +1062,7 @@ static void 
s390_iommu_memory_region_class_init(ObjectClass *klass, void *data)
 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
 
 imrc->translate = s390_translate_iommu;
+imrc->replay = s390_pci_iommu_replay;
 }
 
 static const TypeInfo s390_iommu_memory_region_info = {
-- 
2.11.0 (Apple Git-81)




Re: [Qemu-devel] [PATCH 1/4] s390x/pci: fixup trap_msix()

2017-08-28 Thread Yi Min Zhao



在 2017/8/28 下午10:51, Cornelia Huck 写道:

On Mon, 28 Aug 2017 10:04:44 +0200
Yi Min Zhao  wrote:


The function trap_msix() is to check if pcistg instruction would access
msix table entries. The correct boundary condition should be
[table_offset, table_offset+entries*entry_size). But the current
condition calculated misses the last entry. So let's fixup it.

Acked-by: Dong Jia Shi 
Reviewed-by: Pierre Morel 
Signed-off-by: Yi Min Zhao 
---
  hw/s390x/s390-pci-inst.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index b7beb8c36a..eba9ffb5f2 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -440,8 +440,8 @@ static int trap_msix(S390PCIBusDevice *pbdev, uint64_t 
offset, uint8_t pcias)
  {
  if (pbdev->msix.available && pbdev->msix.table_bar == pcias &&
  offset >= pbdev->msix.table_offset &&
-offset <= pbdev->msix.table_offset +
-  (pbdev->msix.entries - 1) * PCI_MSIX_ENTRY_SIZE) {
+offset < (pbdev->msix.table_offset +
+  pbdev->msix.entries * PCI_MSIX_ENTRY_SIZE)) {
  return 1;
  } else {
  return 0;

What happened before due to the miscalculation? Write to wrong memory
region?



We tried to plug virtio-net pci device but failed. After inspected, we
found that the device uses two msix entries but the last one was
missed. Then we cannot register interrupt successfully because we
should call trap_msixi() in order to save some useful and arch
information into msix message. But what about wrong memory region
didn't happen.




Re: [Qemu-devel] [PATCH 2/4] s390x/pci: remove idx from msix msg data

2017-08-28 Thread Yi Min Zhao



在 2017/8/28 下午11:04, Cornelia Huck 写道:

On Mon, 28 Aug 2017 10:04:45 +0200
Yi Min Zhao  wrote:


PCIDevcie pointer has been a parameter of kvm_arch_fixup_msi_route().

s/PCIDevcie/PCIDevice

Thanks!



So we don't need to store zpci idx in msix message data to find out the
specific zpci device. Instead, we could use pci device id to find its
corresponding zpci device.

Signed-off-by: Yi Min Zhao 
---
  hw/s390x/s390-pci-bus.c  | 16 +---
  hw/s390x/s390-pci-bus.h  |  2 ++
  hw/s390x/s390-pci-inst.c | 24 
  target/s390x/kvm.c   |  7 +--
  4 files changed, 12 insertions(+), 37 deletions(-)
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 1c68c36663..e348bfb7cc 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2503,10 +2503,13 @@ int kvm_arch_fixup_msi_route(struct 
kvm_irq_routing_entry *route,
   uint64_t address, uint32_t data, PCIDevice *dev)
  {
  S390PCIBusDevice *pbdev;
-uint32_t idx = data >> ZPCI_MSI_VEC_BITS;
  uint32_t vec = data & ZPCI_MSI_VEC_MASK;
  
-pbdev = s390_pci_find_dev_by_idx(s390_get_phb(), idx);

+if (!dev) {
+return -ENODEV;
+}
+
+pbdev = s390_pci_find_dev_by_target(s390_get_phb(), DEVICE(dev)->id);

You need to replace the stub for s390_pci_find_dev_by_idx() with a stub
for s390_pci_find_dev_by_target() in s390-pci-stubs.c (on my s390-next
branch), so that it compiles without CONFIG_PCI.

OK. Got it.



  if (!pbdev) {
  DPRINTF("add_msi_route no dev\n");
  return -ENODEV;








Re: [Qemu-devel] [PATCH 3/4] s390x/pci: fixup ind_offset of msix routing entry

2017-08-28 Thread Yi Min Zhao



在 2017/8/28 下午11:33, Cornelia Huck 写道:

On Mon, 28 Aug 2017 10:04:46 +0200
Yi Min Zhao  wrote:


The aibvo of zpci device should be constant after issued mpcifc
registering irqs instruction. Each msix vector should offset from the
aibvo. But for flic adapter interrupt, we should use the absolute
offset within the aibv. So let's use the aibvo+vector to fixup msix
routing entry.

This makes sense, but I would tweak the description a bit.

"The guest uses the mpcifc instruction to register the aibvo of a zpci
device, which is the starting offset of indicators in the indicator
area and thus remains constant. Each msix vector is an offset from the
aibvo. When we map a msix route to an adapter route, we should not
modify the starting offset, but instead add the vector to the starting
offset to get the absolute offset in the specific route."

Much better. Thanks!


I'm wondering how this was ever supposed to work?

I investigated this. Linux kernel always uses 0 as starting offset for
aibvo. And each msix entry is only registered one time. So we didn't
encounter any problem. But the logic here is not right obviously. It
is just a coincidence.




Signed-off-by: Yi Min Zhao 
---
  target/s390x/kvm.c | 4 +---
  1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index e348bfb7cc..c08b7757e7 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2515,14 +2515,12 @@ int kvm_arch_fixup_msi_route(struct 
kvm_irq_routing_entry *route,
  return -ENODEV;
  }
  
-pbdev->routes.adapter.ind_offset = vec;

-
  route->type = KVM_IRQ_ROUTING_S390_ADAPTER;
  route->flags = 0;
  route->u.adapter.summary_addr = pbdev->routes.adapter.summary_addr;
  route->u.adapter.ind_addr = pbdev->routes.adapter.ind_addr;
  route->u.adapter.summary_offset = pbdev->routes.adapter.summary_offset;
-route->u.adapter.ind_offset = pbdev->routes.adapter.ind_offset;
+route->u.adapter.ind_offset = pbdev->routes.adapter.ind_offset + vec;
  route->u.adapter.adapter_id = pbdev->routes.adapter.adapter_id;
  return 0;
  }







Re: [Qemu-devel] [PATCH 4/4] s390x/pci: add iommu replay callback

2017-08-28 Thread Yi Min Zhao



在 2017/8/28 下午11:57, Cornelia Huck 写道:

On Mon, 28 Aug 2017 10:04:47 +0200
Yi Min Zhao  wrote:


Let's introduce iommu replay callback for s390 pci iommu memory region.
Currently we don't need any dma mapping replay. So let it return
directly. This implementation will avoid meaningless loops calling
translation callback.

Reviewed-by: Pierre Morel 
Reviewed-by: Halil Pasic 
Signed-off-by: Yi Min Zhao 
---
  hw/s390x/s390-pci-bus.c | 8 
  1 file changed, 8 insertions(+)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 9e1f7ff5c5..359509ccea 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -407,6 +407,13 @@ static IOMMUTLBEntry 
s390_translate_iommu(IOMMUMemoryRegion *mr, hwaddr addr,
  return ret;
  }
  
+static void s390_pci_iommu_replay(IOMMUMemoryRegion *iommu,

+  IOMMUNotifier *notifier)
+{
+/* we don't need iommu replay currently */

This really needs to be heavier on the _why_. My guess is that anything
which would require replay goes through the rpcit instruction?

My understanding is:
Our arch is different from others. Each pci device has its own iommu, not
like other archs' implementation. So currently there must be no existing
mappings belonging to any newly plugged pci device whose iommu doesn't
have any mapping at the time.
In addition, it's also due to vfio blocking migration. If vfio-pci supports
migration in future, we could implement iommu replay by that time.



+return;
+}
+
  static S390PCIIOMMU *s390_pci_get_iommu(S390pciState *s, PCIBus *bus,
  int devfn)
  {
@@ -1055,6 +1062,7 @@ static void 
s390_iommu_memory_region_class_init(ObjectClass *klass, void *data)
  IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
  
  imrc->translate = s390_translate_iommu;

+imrc->replay = s390_pci_iommu_replay;
  }
  
  static const TypeInfo s390_iommu_memory_region_info = {








Re: [Qemu-devel] [PATCH 1/4] s390x/pci: fixup trap_msix()

2017-08-29 Thread Yi Min Zhao



在 2017/8/29 下午4:00, Cornelia Huck 写道:

On Tue, 29 Aug 2017 12:32:17 +0800
Yi Min Zhao  wrote:


在 2017/8/28 下午10:51, Cornelia Huck 写道:

On Mon, 28 Aug 2017 10:04:44 +0200
Yi Min Zhao  wrote:
  

The function trap_msix() is to check if pcistg instruction would access
msix table entries. The correct boundary condition should be
[table_offset, table_offset+entries*entry_size). But the current
condition calculated misses the last entry. So let's fixup it.

Acked-by: Dong Jia Shi 
Reviewed-by: Pierre Morel 
Signed-off-by: Yi Min Zhao 
---
   hw/s390x/s390-pci-inst.c | 4 ++--
   1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index b7beb8c36a..eba9ffb5f2 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -440,8 +440,8 @@ static int trap_msix(S390PCIBusDevice *pbdev, uint64_t 
offset, uint8_t pcias)
   {
   if (pbdev->msix.available && pbdev->msix.table_bar == pcias &&
   offset >= pbdev->msix.table_offset &&
-offset <= pbdev->msix.table_offset +
-  (pbdev->msix.entries - 1) * PCI_MSIX_ENTRY_SIZE) {
+offset < (pbdev->msix.table_offset +
+  pbdev->msix.entries * PCI_MSIX_ENTRY_SIZE)) {
   return 1;
   } else {
   return 0;

What happened before due to the miscalculation? Write to wrong memory
region?

  

We tried to plug virtio-net pci device but failed. After inspected, we
found that the device uses two msix entries but the last one was
missed. Then we cannot register interrupt successfully because we
should call trap_msixi() in order to save some useful and arch
information into msix message. But what about wrong memory region
didn't happen.

So, the guest just was not able to use the second msix entry, but did
not get any exception?



Yes, didn't get any exception. The guest just kept waiting for something
(I guess that might be the response for interrupt register) and then the
system had no response. What I can do is only destroy the guest.




Re: [Qemu-devel] [PATCH 1/4] s390x/pci: fixup trap_msix()

2017-08-29 Thread Yi Min Zhao



在 2017/8/29 下午4:00, Cornelia Huck 写道:

On Tue, 29 Aug 2017 12:32:17 +0800
Yi Min Zhao  wrote:


在 2017/8/28 下午10:51, Cornelia Huck 写道:

On Mon, 28 Aug 2017 10:04:44 +0200
Yi Min Zhao  wrote:
  

The function trap_msix() is to check if pcistg instruction would access
msix table entries. The correct boundary condition should be
[table_offset, table_offset+entries*entry_size). But the current
condition calculated misses the last entry. So let's fixup it.

Acked-by: Dong Jia Shi 
Reviewed-by: Pierre Morel 
Signed-off-by: Yi Min Zhao 
---
   hw/s390x/s390-pci-inst.c | 4 ++--
   1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index b7beb8c36a..eba9ffb5f2 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -440,8 +440,8 @@ static int trap_msix(S390PCIBusDevice *pbdev, uint64_t 
offset, uint8_t pcias)
   {
   if (pbdev->msix.available && pbdev->msix.table_bar == pcias &&
   offset >= pbdev->msix.table_offset &&
-offset <= pbdev->msix.table_offset +
-  (pbdev->msix.entries - 1) * PCI_MSIX_ENTRY_SIZE) {
+offset < (pbdev->msix.table_offset +
+  pbdev->msix.entries * PCI_MSIX_ENTRY_SIZE)) {
   return 1;
   } else {
   return 0;

What happened before due to the miscalculation? Write to wrong memory
region?

  

We tried to plug virtio-net pci device but failed. After inspected, we
found that the device uses two msix entries but the last one was
missed. Then we cannot register interrupt successfully because we
should call trap_msixi() in order to save some useful and arch
information into msix message. But what about wrong memory region
didn't happen.

So, the guest just was not able to use the second msix entry, but did
not get any exception?


Forget one thing. The zpci idx is saved in msix message. The second msix 
entry has not been

trapped so that no idx has been saved, on the other hand idx 0 is saved. So
kvm_arch_fixup_msi_route() will update irq route according to the zpci 
device whose idx is 0.
So the wrong logic in trap_msix() will result that flic mixes different 
pci devices' adapter interrupts.





Re: [Qemu-devel] [PATCH 4/4] s390x/pci: add iommu replay callback

2017-08-29 Thread Yi Min Zhao



在 2017/8/29 下午4:07, Cornelia Huck 写道:

[Restored cc:s. Please remember to do reply-all.]

On Tue, 29 Aug 2017 12:46:51 +0800
Yi Min Zhao  wrote:


在 2017/8/28 下午11:57, Cornelia Huck 写道:

On Mon, 28 Aug 2017 10:04:47 +0200
Yi Min Zhao  wrote:


Let's introduce iommu replay callback for s390 pci iommu memory region.
Currently we don't need any dma mapping replay. So let it return
directly. This implementation will avoid meaningless loops calling
translation callback.

Reviewed-by: Pierre Morel 
Reviewed-by: Halil Pasic 
Signed-off-by: Yi Min Zhao 
---
   hw/s390x/s390-pci-bus.c | 8 
   1 file changed, 8 insertions(+)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 9e1f7ff5c5..359509ccea 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -407,6 +407,13 @@ static IOMMUTLBEntry 
s390_translate_iommu(IOMMUMemoryRegion *mr, hwaddr addr,
   return ret;
   }
   
+static void s390_pci_iommu_replay(IOMMUMemoryRegion *iommu,

+  IOMMUNotifier *notifier)
+{
+/* we don't need iommu replay currently */

This really needs to be heavier on the _why_. My guess is that anything
which would require replay goes through the rpcit instruction?

My understanding is:
Our arch is different from others. Each pci device has its own iommu, not
like other archs' implementation. So currently there must be no existing
mappings belonging to any newly plugged pci device whose iommu doesn't
have any mapping at the time.

So please put that explanation into the function. (Also, "currently"?
Are we expecting it to change?)
The iommu replay function is originally introduced for vfio. I think 
they want to re-build
the existing mappings because vfio has a copy of mappings in kernel. For 
our case,

the mappings would be cleanup when a pci device unplugged, and new mappings
would be created when a pci device plugged. I think replay only can 
happen during

vfio-pci device migration.



In addition, it's also due to vfio blocking migration. If vfio-pci supports
migration in future, we could implement iommu replay by that time.

That's not an argument: This is the base zpci support, it should not
depend on the supported devices and what they do. (What's the status of
virtio-pci, btw? Does it work with your patches applied, or is there
still more to be done?)


My understanding is virtio-pci doesn't need replay. All mappings of any 
pci device are existing in
guest memory. Once the mappings is built, all of them could be migrated 
along with the guest

system. But I might misunderstand it. Please correct me.




Re: [Qemu-devel] [PATCH 1/4] s390x/pci: fixup trap_msix()

2017-08-29 Thread Yi Min Zhao



在 2017/8/29 下午4:22, Cornelia Huck 写道:

On Tue, 29 Aug 2017 16:12:26 +0800
Yi Min Zhao  wrote:


在 2017/8/29 下午4:00, Cornelia Huck 写道:

On Tue, 29 Aug 2017 12:32:17 +0800
Yi Min Zhao  wrote:
  

在 2017/8/28 下午10:51, Cornelia Huck 写道:

On Mon, 28 Aug 2017 10:04:44 +0200
Yi Min Zhao  wrote:
 

The function trap_msix() is to check if pcistg instruction would access
msix table entries. The correct boundary condition should be
[table_offset, table_offset+entries*entry_size). But the current
condition calculated misses the last entry. So let's fixup it.

Acked-by: Dong Jia Shi 
Reviewed-by: Pierre Morel 
Signed-off-by: Yi Min Zhao 
---
hw/s390x/s390-pci-inst.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index b7beb8c36a..eba9ffb5f2 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -440,8 +440,8 @@ static int trap_msix(S390PCIBusDevice *pbdev, uint64_t 
offset, uint8_t pcias)
{
if (pbdev->msix.available && pbdev->msix.table_bar == pcias &&
offset >= pbdev->msix.table_offset &&
-offset <= pbdev->msix.table_offset +
-  (pbdev->msix.entries - 1) * PCI_MSIX_ENTRY_SIZE) {
+offset < (pbdev->msix.table_offset +
+  pbdev->msix.entries * PCI_MSIX_ENTRY_SIZE)) {
return 1;
} else {
return 0;

What happened before due to the miscalculation? Write to wrong memory
region?

 

We tried to plug virtio-net pci device but failed. After inspected, we
found that the device uses two msix entries but the last one was
missed. Then we cannot register interrupt successfully because we
should call trap_msixi() in order to save some useful and arch
information into msix message. But what about wrong memory region
didn't happen.

So, the guest just was not able to use the second msix entry, but did
not get any exception?

  

Forget one thing. The zpci idx is saved in msix message. The second msix
entry has not been
trapped so that no idx has been saved, on the other hand idx 0 is saved. So
kvm_arch_fixup_msi_route() will update irq route according to the zpci
device whose idx is 0.
So the wrong logic in trap_msix() will result that flic mixes different
pci devices' adapter interrupts.

Ouch. So this only ever worked for the small subset of pci devices we
can passthrough (assuming none of them use more than one msix entry)?
Because any passthroughed pci devices which I tested has more than 2 
msix entries. And not all
entries will be used. I find that the last entry is never touched. But 
virtio pci is much fancy and only
uses two entries. So I encountered the issue when I tested virtio-pci 
device.


I'm tempted to have this cc:ed to stable so we can fixup 2.10 (which is
the first version with usable zpci support).



Thanks!




Re: [Qemu-devel] [PATCH 4/4] s390x/pci: add iommu replay callback

2017-08-29 Thread Yi Min Zhao



在 2017/8/29 下午5:33, Cornelia Huck 写道:

On Tue, 29 Aug 2017 16:26:10 +0800
Yi Min Zhao  wrote:


在 2017/8/29 下午4:07, Cornelia Huck 写道:

[Restored cc:s. Please remember to do reply-all.]

On Tue, 29 Aug 2017 12:46:51 +0800
Yi Min Zhao  wrote:
  

在 2017/8/28 下午11:57, Cornelia Huck 写道:

On Mon, 28 Aug 2017 10:04:47 +0200
Yi Min Zhao  wrote:
  

Let's introduce iommu replay callback for s390 pci iommu memory region.
Currently we don't need any dma mapping replay. So let it return
directly. This implementation will avoid meaningless loops calling
translation callback.

Reviewed-by: Pierre Morel 
Reviewed-by: Halil Pasic 
Signed-off-by: Yi Min Zhao 
---
hw/s390x/s390-pci-bus.c | 8 
1 file changed, 8 insertions(+)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 9e1f7ff5c5..359509ccea 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -407,6 +407,13 @@ static IOMMUTLBEntry 
s390_translate_iommu(IOMMUMemoryRegion *mr, hwaddr addr,
return ret;
}

+static void s390_pci_iommu_replay(IOMMUMemoryRegion *iommu,

+  IOMMUNotifier *notifier)
+{
+/* we don't need iommu replay currently */

This really needs to be heavier on the _why_. My guess is that anything
which would require replay goes through the rpcit instruction?

My understanding is:
Our arch is different from others. Each pci device has its own iommu, not
like other archs' implementation. So currently there must be no existing
mappings belonging to any newly plugged pci device whose iommu doesn't
have any mapping at the time.

So please put that explanation into the function. (Also, "currently"?
Are we expecting it to change?)

The iommu replay function is originally introduced for vfio. I think
they want to re-build
the existing mappings because vfio has a copy of mappings in kernel. For
our case,
the mappings would be cleanup when a pci device unplugged, and new mappings
would be created when a pci device plugged. I think replay only can
happen during
vfio-pci device migration.

So, the base reason is that it is impossible to plug a pci device on
s390x that already has iommu mappings which need to be replayed, which
is due to the "one iommu per zpci device" construct (and independent of
which devices need replay on other architectures)?

Yes.


Then this should go into the explanation above. (And I'd still like to
know what "currently" refers to. I don't like to rely on some kind of
implicit assumptions - are we expecting this to break?)
As our discussion during internal review, we don't need to replay 
currently because vfio-pci device
doesn't support migration. 'currently' refers to the time of vfio-pci 
device migration block.
Only when vfio-pci supports migration in future, we should fill the 
replaying code.


  

In addition, it's also due to vfio blocking migration. If vfio-pci supports
migration in future, we could implement iommu replay by that time.

That's not an argument: This is the base zpci support, it should not
depend on the supported devices and what they do. (What's the status of
virtio-pci, btw? Does it work with your patches applied, or is there
still more to be done?)

  

My understanding is virtio-pci doesn't need replay. All mappings of any
pci device are existing in
guest memory. Once the mappings is built, all of them could be migrated
along with the guest
system. But I might misunderstand it. Please correct me.

My question was whether virtio-pci works with your patches on top at
all - last time I checked on master, virtio-pci devices failed to
realize with a "msi-x is mandatory" message.


I tested. virtio-pci works fine. The message "msix is mandatory" means 
we only support
msix interrupt. If virtio-pci device (like virtio-rng) doesn't support 
msix, we don't allow it

to plug. I thinik this is not related to iommu replay.




Re: [Qemu-devel] [PATCH 4/4] s390x/pci: add iommu replay callback

2017-08-29 Thread Yi Min Zhao



在 2017/8/29 下午5:49, Cornelia Huck 写道:

On Tue, 29 Aug 2017 11:33:53 +0200
Cornelia Huck  wrote:


My question was whether virtio-pci works with your patches on top at
all - last time I checked on master, virtio-pci devices failed to
realize with a "msi-x is mandatory" message.

Just checked again, I still get

qemu-system-s390x: -device virtio-rng-pci: MSI-X support is mandatory in the 
S390 architecture

Any clue to what is missing?



virtio-rng-pci doesn't support msix. But msix is required on s390 arch.
So we avoid plugging any pci device without msix support.




Re: [Qemu-devel] [PATCH 4/4] s390x/pci: add iommu replay callback

2017-08-29 Thread Yi Min Zhao



在 2017/8/29 下午5:57, Cornelia Huck 写道:

On Tue, 29 Aug 2017 17:51:43 +0800
Yi Min Zhao  wrote:


在 2017/8/29 下午5:33, Cornelia Huck 写道:

My question was whether virtio-pci works with your patches on top at
all - last time I checked on master, virtio-pci devices failed to
realize with a "msi-x is mandatory" message.

  

I tested. virtio-pci works fine. The message "msix is mandatory" means
we only support
msix interrupt. If virtio-pci device (like virtio-rng) doesn't support
msix, we don't allow it
to plug.

Ah, that's it (it's a bit strange that not all virtio-pci devices
support msi-x). I can add a virtio-net-pci device just fine.

(Maybe we can enhance the message so that it is clear that it refers to
that particular device?)
Hum, I think so. But it's not urgent. I could post another patch for 
message enhancement

after this series review. Do you agree?



I thinik this is not related to iommu replay.

This question was unrelated to this particular patch, more to the whole
series :)



Yup.




Re: [Qemu-devel] [PATCH RFC v2 8/9] s390x/kvm: msi route fixup for non-pci

2017-07-18 Thread Yi Min Zhao
I think moving the new code on the top of this function would make it 
more readable.



在 2017/7/18 下午10:24, Cornelia Huck 写道:

+} else {
+DPRINTF("fixup_msi_route on non-pci machine?!\n");
+return -ENODEV;
+}





Re: [Qemu-devel] [PATCH RFC v2 8/9] s390x/kvm: msi route fixup for non-pci

2017-07-18 Thread Yi Min Zhao



在 2017/7/18 下午11:22, Cornelia Huck 写道:

On Tue, 18 Jul 2017 11:58:08 -0300
Philippe Mathieu-Daudé  wrote:


Hi Cornelia,

On Tue, Jul 18, 2017 at 11:24 AM, Cornelia Huck  wrote:

If we don't provide pci, we cannot have a pci device for which we
have to translate to adapter routes: just return -ENODEV.

Signed-off-by: Cornelia Huck 
---
  target/s390x/kvm.c | 33 +++--
  1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 6068c3..df0e5af151 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2424,22 +2424,27 @@ int kvm_arch_fixup_msi_route(struct 
kvm_irq_routing_entry *route,
  uint32_t idx = data >> ZPCI_MSI_VEC_BITS;
  uint32_t vec = data & ZPCI_MSI_VEC_MASK;

-pbdev = s390_pci_find_dev_by_idx(s390_get_phb(), idx);
-if (!pbdev) {
-DPRINTF("add_msi_route no dev\n");
-return -ENODEV;
-}
+if (s390_has_feat(S390_FEAT_ZPCI)) {
+pbdev = s390_pci_find_dev_by_idx(s390_get_phb(), idx);
+if (!pbdev) {
+DPRINTF("add_msi_route no dev\n");
+return -ENODEV;
+}

-pbdev->routes.adapter.ind_offset = vec;
+pbdev->routes.adapter.ind_offset = vec;

-route->type = KVM_IRQ_ROUTING_S390_ADAPTER;
-route->flags = 0;
-route->u.adapter.summary_addr = pbdev->routes.adapter.summary_addr;
-route->u.adapter.ind_addr = pbdev->routes.adapter.ind_addr;
-route->u.adapter.summary_offset = pbdev->routes.adapter.summary_offset;
-route->u.adapter.ind_offset = pbdev->routes.adapter.ind_offset;
-route->u.adapter.adapter_id = pbdev->routes.adapter.adapter_id;
-return 0;
+route->type = KVM_IRQ_ROUTING_S390_ADAPTER;
+route->flags = 0;
+route->u.adapter.summary_addr = pbdev->routes.adapter.summary_addr;
+route->u.adapter.ind_addr = pbdev->routes.adapter.ind_addr;
+route->u.adapter.summary_offset = pbdev->routes.adapter.summary_offset;
+route->u.adapter.ind_offset = pbdev->routes.adapter.ind_offset;
+route->u.adapter.adapter_id = pbdev->routes.adapter.adapter_id;
+return 0;
+} else {
+DPRINTF("fixup_msi_route on non-pci machine?!\n");
+return -ENODEV;
+}
  }

  int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
--
2.13.3

What about inverting the check?

+if (!s390_has_feat(S390_FEAT_ZPCI)) {
+DPRINTF("fixup_msi_route on non-pci machine?!\n");
+return -ENODEV;
+}

I usually prefer the more common branch on top, but (1) this causes
more changes in this case and (2) I'm not so sure if zpci on really is
the common case...


Sorry for my duplicated comment. I think we don't know which is more 
common. Currently 2.9
machine doesn' t support zpci facility. But in the future, how will the 
thing change?





Re: [Qemu-devel] [PATCH RFC v2 7/9] s390x/pci: fence off instructions for non-pci

2017-07-18 Thread Yi Min Zhao



在 2017/7/18 下午10:24, Cornelia Huck 写道:

If a guest running on a machine without zpci issues a pci instruction,
throw them an exception.

Signed-off-by: Cornelia Huck 
---
  target/s390x/kvm.c | 54 +-
  1 file changed, 41 insertions(+), 13 deletions(-)

diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 880eccd58a..6068c3 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -1191,7 +1191,11 @@ static int kvm_clp_service_call(S390CPU *cpu, struct 
kvm_run *run)
  {
  uint8_t r2 = (run->s390_sieic.ipb & 0x000f) >> 16;

-return clp_service_call(cpu, r2);
+if (s390_has_feat(S390_FEAT_ZPCI)) {
+return clp_service_call(cpu, r2);
+} else {
+return -1;
+}

Hi Conny,

Should we use a global value to store s390_has_feat(S390_FEAT_ZPCI)?
I'm not sure it's a good idea.

Yi Min

  }

  static int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run)
@@ -1199,7 +1203,11 @@ static int kvm_pcilg_service_call(S390CPU *cpu, struct 
kvm_run *run)
  uint8_t r1 = (run->s390_sieic.ipb & 0x00f0) >> 20;
  uint8_t r2 = (run->s390_sieic.ipb & 0x000f) >> 16;

-return pcilg_service_call(cpu, r1, r2);
+if (s390_has_feat(S390_FEAT_ZPCI)) {
+return pcilg_service_call(cpu, r1, r2);
+} else {
+return -1;
+}
  }

  static int kvm_pcistg_service_call(S390CPU *cpu, struct kvm_run *run)
@@ -1207,7 +1215,11 @@ static int kvm_pcistg_service_call(S390CPU *cpu, struct 
kvm_run *run)
  uint8_t r1 = (run->s390_sieic.ipb & 0x00f0) >> 20;
  uint8_t r2 = (run->s390_sieic.ipb & 0x000f) >> 16;

-return pcistg_service_call(cpu, r1, r2);
+if (s390_has_feat(S390_FEAT_ZPCI)) {
+return pcistg_service_call(cpu, r1, r2);
+} else {
+return -1;
+}
  }

  static int kvm_stpcifc_service_call(S390CPU *cpu, struct kvm_run *run)
@@ -1216,10 +1228,14 @@ static int kvm_stpcifc_service_call(S390CPU *cpu, 
struct kvm_run *run)
  uint64_t fiba;
  uint8_t ar;

-cpu_synchronize_state(CPU(cpu));
-fiba = get_base_disp_rxy(cpu, run, &ar);
+if (s390_has_feat(S390_FEAT_ZPCI)) {
+cpu_synchronize_state(CPU(cpu));
+fiba = get_base_disp_rxy(cpu, run, &ar);

-return stpcifc_service_call(cpu, r1, fiba, ar);
+return stpcifc_service_call(cpu, r1, fiba, ar);
+} else {
+return -1;
+}
  }

  static int kvm_sic_service_call(S390CPU *cpu, struct kvm_run *run)
@@ -1247,7 +1263,11 @@ static int kvm_rpcit_service_call(S390CPU *cpu, struct 
kvm_run *run)
  uint8_t r1 = (run->s390_sieic.ipb & 0x00f0) >> 20;
  uint8_t r2 = (run->s390_sieic.ipb & 0x000f) >> 16;

-return rpcit_service_call(cpu, r1, r2);
+if (s390_has_feat(S390_FEAT_ZPCI)) {
+return rpcit_service_call(cpu, r1, r2);
+} else {
+return -1;
+}
  }

  static int kvm_pcistb_service_call(S390CPU *cpu, struct kvm_run *run)
@@ -1257,10 +1277,14 @@ static int kvm_pcistb_service_call(S390CPU *cpu, struct 
kvm_run *run)
  uint64_t gaddr;
  uint8_t ar;

-cpu_synchronize_state(CPU(cpu));
-gaddr = get_base_disp_rsy(cpu, run, &ar);
+if (s390_has_feat(S390_FEAT_ZPCI)) {
+cpu_synchronize_state(CPU(cpu));
+gaddr = get_base_disp_rsy(cpu, run, &ar);

-return pcistb_service_call(cpu, r1, r3, gaddr, ar);
+return pcistb_service_call(cpu, r1, r3, gaddr, ar);
+} else {
+return -1;
+}
  }

  static int kvm_mpcifc_service_call(S390CPU *cpu, struct kvm_run *run)
@@ -1269,10 +1293,14 @@ static int kvm_mpcifc_service_call(S390CPU *cpu, struct 
kvm_run *run)
  uint64_t fiba;
  uint8_t ar;

-cpu_synchronize_state(CPU(cpu));
-fiba = get_base_disp_rxy(cpu, run, &ar);
+if (s390_has_feat(S390_FEAT_ZPCI)) {
+cpu_synchronize_state(CPU(cpu));
+fiba = get_base_disp_rxy(cpu, run, &ar);

-return mpcifc_service_call(cpu, r1, fiba, ar);
+return mpcifc_service_call(cpu, r1, fiba, ar);
+} else {
+return -1;
+}
  }

  static int handle_b9(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)





Re: [Qemu-devel] [PATCH RFC v2 4/9] s390x/pci: do not advertise pci on non-pci builds

2017-07-19 Thread Yi Min Zhao



在 2017/7/19 下午4:00, Cornelia Huck 写道:

On Tue, 18 Jul 2017 21:56:26 +0200
Christian Borntraeger  wrote:


On 07/18/2017 04:24 PM, Cornelia Huck wrote:

Only set the zpci and aen feature bits on builds that actually
support pci.

Signed-off-by: Cornelia Huck 
---
  target/s390x/kvm.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 831492f9a2..880eccd58a 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2685,8 +2685,10 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, 
Error **errp)
  }

  /* set zpci and aen facilities */
+#ifdef CONFIG_PCI
  set_bit(S390_FEAT_ZPCI, model->features);
  set_bit(S390_FEAT_ADAPTER_EVENT_NOTIFICATION, model->features);
+#endif

  if (s390_known_cpu_type(cpu_type)) {
  /* we want the exact model, even if some features are missing */
   

Not strictly necessary but do you also want to ifdef this

  kvm_vm_enable_cap(s, KVM_CAP_S390_AIS, 0);

call?

If not you could actually even allow AEN but not PCI for !CONFIG_PCI.

I'm a bit unsure about the relationship of ais and aen with pci. I
remember that only adapters for pci currently support suppression,
although it could spread to other adapter types in the future. Not sure
about aen.

So I'd keep the ais enablement call, even though it won't have much of
an effect as no pci adapters will be registered.

As I don't quite remember what aen governed, I need to rely on your
feedback here.


My understanding is that zpci replies on aen. But aen could exist 
independently.
After all, there is other device type using aen. I think only wrapping 
zpci is

enough.




Re: [Qemu-devel] [PATCH RFC v2 4/9] s390x/pci: do not advertise pci on non-pci builds

2017-07-19 Thread Yi Min Zhao



在 2017/7/19 下午5:24, Cornelia Huck 写道:

On Wed, 19 Jul 2017 16:56:18 +0800
Yi Min Zhao  wrote:


在 2017/7/19 下午4:00, Cornelia Huck 写道:

On Tue, 18 Jul 2017 21:56:26 +0200
Christian Borntraeger  wrote:
  

On 07/18/2017 04:24 PM, Cornelia Huck wrote:

Only set the zpci and aen feature bits on builds that actually
support pci.

Signed-off-by: Cornelia Huck 
---
   target/s390x/kvm.c | 2 ++
   1 file changed, 2 insertions(+)

diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 831492f9a2..880eccd58a 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2685,8 +2685,10 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, 
Error **errp)
   }

   /* set zpci and aen facilities */
+#ifdef CONFIG_PCI
   set_bit(S390_FEAT_ZPCI, model->features);
   set_bit(S390_FEAT_ADAPTER_EVENT_NOTIFICATION, model->features);
+#endif

   if (s390_known_cpu_type(cpu_type)) {
   /* we want the exact model, even if some features are missing */
  

Not strictly necessary but do you also want to ifdef this

   kvm_vm_enable_cap(s, KVM_CAP_S390_AIS, 0);

call?

If not you could actually even allow AEN but not PCI for !CONFIG_PCI.

I'm a bit unsure about the relationship of ais and aen with pci. I
remember that only adapters for pci currently support suppression,
although it could spread to other adapter types in the future. Not sure
about aen.

So I'd keep the ais enablement call, even though it won't have much of
an effect as no pci adapters will be registered.

As I don't quite remember what aen governed, I need to rely on your
feedback here.

  

My understanding is that zpci replies on aen. But aen could exist
independently.
After all, there is other device type using aen. I think only wrapping
zpci is
enough.

Ah, was aen the indicator bits related support? If yes, I agree that we
should only turn off zpci.


Yes, set summary and indicator bits. Related stuff is in flic, but not 
in zpci.





Re: [Qemu-devel] [PATCH RFC v2 4/9] s390x/pci: do not advertise pci on non-pci builds

2017-07-19 Thread Yi Min Zhao



在 2017/7/19 下午5:27, Yi Min Zhao 写道:



在 2017/7/19 下午5:24, Cornelia Huck 写道:

On Wed, 19 Jul 2017 16:56:18 +0800
Yi Min Zhao  wrote:


在 2017/7/19 下午4:00, Cornelia Huck 写道:

On Tue, 18 Jul 2017 21:56:26 +0200
Christian Borntraeger  wrote:

On 07/18/2017 04:24 PM, Cornelia Huck wrote:

Only set the zpci and aen feature bits on builds that actually
support pci.

Signed-off-by: Cornelia Huck 
---
   target/s390x/kvm.c | 2 ++
   1 file changed, 2 insertions(+)

diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 831492f9a2..880eccd58a 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2685,8 +2685,10 @@ void 
kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp)

   }

   /* set zpci and aen facilities */
+#ifdef CONFIG_PCI
   set_bit(S390_FEAT_ZPCI, model->features);
   set_bit(S390_FEAT_ADAPTER_EVENT_NOTIFICATION, 
model->features);

+#endif

   if (s390_known_cpu_type(cpu_type)) {
   /* we want the exact model, even if some features are 
missing */

Not strictly necessary but do you also want to ifdef this

   kvm_vm_enable_cap(s, KVM_CAP_S390_AIS, 0);

call?

If not you could actually even allow AEN but not PCI for !CONFIG_PCI.

I'm a bit unsure about the relationship of ais and aen with pci. I
remember that only adapters for pci currently support suppression,
although it could spread to other adapter types in the future. Not 
sure

about aen.

So I'd keep the ais enablement call, even though it won't have much of
an effect as no pci adapters will be registered.

As I don't quite remember what aen governed, I need to rely on your
feedback here.


My understanding is that zpci replies on aen. But aen could exist
independently.
After all, there is other device type using aen. I think only wrapping
zpci is
enough.

Ah, was aen the indicator bits related support? If yes, I agree that we
should only turn off zpci.


Yes, set summary and indicator bits. Related stuff is in flic, but not 
in zpci.





I think of another problem. If we didn't config pci, then we don't have zpci
feature in max cpu model. So how to process the conflict between requested
cpu model and max cpu model. For example, if we start 2.10 machine and
want to use z12 cpu model, maybe the guest cannot startup because of
missing zpci feature. So the only way is we explicitly turn it off in 
qemu cmdline.

But I'm not sure if it's an issue.




Re: [Qemu-devel] [PATCH v3 4/9] s390x/pci: do not advertise pci on non-pci builds

2017-07-26 Thread Yi Min Zhao

Good. This patch resolves the problem I mentioned in previous verion.

Thanks for your work.


在 2017/7/25 下午11:33, Cornelia Huck 写道:

Only set the zpci feature bit on builds that actually support pci.

Signed-off-by: Cornelia Huck 
---
  hw/s390x/s390-pci-bus.c  | 5 +
  hw/s390x/s390-pci-bus.h  | 1 +
  hw/s390x/s390-ci-stub.c | 4 
  target/s390x/kvm.c   | 2 +-
  4 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index c57f6ebae0..7b30d4c7bd 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -34,6 +34,11 @@
  } \
  } while (0)

+void pci_enable_zpci_feature(S390CPUModel *model)
+{
+set_bit(S390_FEAT_ZPCI, model->features);
+}
+
  S390pciState *s390_get_phb(void)
  {
  static S390pciState *phb;
diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
index 5df6292509..d8796536b0 100644
--- a/hw/s390x/s390-pci-bus.h
+++ b/hw/s390x/s390-pci-bus.h
@@ -333,4 +333,5 @@ S390PCIBusDevice *s390_pci_find_dev_by_fid(S390pciState *s, 
uint32_t fid);
  S390PCIBusDevice *s390_pci_find_next_avail_dev(S390pciState *s,
 S390PCIBusDevice *pbdev);

+void pci_enable_zpci_feature(S390CPUModel *model);
  #endif
diff --git a/hw/s390x/s390-pci-stub.c b/hw/s390x/s390-pci-stub.c
index cc7278a865..8ceaf482e7 100644
--- a/hw/s390x/s390-pci-stub.c
+++ b/hw/s390x/s390-pci-stub.c
@@ -72,3 +72,7 @@ S390PCIBusDevice *s390_pci_find_dev_by_idx(S390pciState *s, 
uint32_t idx)
  {
  return NULL;
  }
+
+void pci_enable_zpci_feature(S390CPUModel *model)
+{
+}
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index c4c5791d27..866ac3d414 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2662,7 +2662,7 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, 
Error **errp)
  }

  /* We emulate a zPCI bus and AEN, therefore we don't need HW support */
-set_bit(S390_FEAT_ZPCI, model->features);
+pci_enable_zpci_feature(model);
  set_bit(S390_FEAT_ADAPTER_EVENT_NOTIFICATION, model->features);

  if (s390_known_cpu_type(cpu_type)) {





[Qemu-devel] [PATCH 2/3] s390x/pci: fixup global refresh

2018-01-30 Thread Yi Min Zhao
The VFIO common code doesn't provide the possibility to modify a
previous mapping entry in another way than unmapping and mapping again
with new properties.

To avoid -EEXIST DMA mapping error, this we introduce a GHashTable to
store S390IOTLBEntry instances in order to cache the mapped entries.
When intercepting rpcit instruction, ignore the identical mapped
entries to avoid doing map operations multiple times and do unmap and
re-map operations for the case of updating the valid entries. To
achieve that goal, we also export the DMA walking function and
optimize the code handling errors in rpcit handler.

Acked-by: Pierre Morel 
Signed-off-by: Yi Min Zhao 
---
 hw/s390x/s390-pci-bus.c  | 28 +-
 hw/s390x/s390-pci-bus.h  |  3 ++
 hw/s390x/s390-pci-inst.c | 95 ++--
 3 files changed, 90 insertions(+), 36 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index e349d73abe..b75af26db7 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -461,8 +461,8 @@ out:
 return nto;
 }
 
-static uint16_t s390_guest_io_table_walk(uint64_t g_iota, hwaddr addr,
- S390IOTLBEntry *entry)
+uint16_t s390_guest_io_table_walk(uint64_t g_iota, hwaddr addr,
+  S390IOTLBEntry *entry)
 {
 uint64_t to = s390_pci_get_table_origin(g_iota);
 int8_t ett = 1;
@@ -487,7 +487,8 @@ static IOMMUTLBEntry s390_translate_iommu(IOMMUMemoryRegion 
*mr, hwaddr addr,
   IOMMUAccessFlags flag)
 {
 S390PCIIOMMU *iommu = container_of(mr, S390PCIIOMMU, iommu_mr);
-S390IOTLBEntry entry;
+S390IOTLBEntry *entry;
+uint64_t iova = addr & PAGE_MASK;
 uint16_t error = 0;
 IOMMUTLBEntry ret = {
 .target_as = &address_space_memory,
@@ -515,12 +516,17 @@ static IOMMUTLBEntry 
s390_translate_iommu(IOMMUMemoryRegion *mr, hwaddr addr,
 goto err;
 }
 
-error = s390_guest_io_table_walk(iommu->g_iota, addr, &entry);
-
-ret.iova = entry.iova;
-ret.translated_addr = entry.translated_addr;
-ret.addr_mask = entry.len - 1;
-ret.perm = entry.perm;
+entry = g_hash_table_lookup(iommu->iotlb, &iova);
+if (entry) {
+ret.iova = entry->iova;
+ret.translated_addr = entry->translated_addr;
+ret.addr_mask = entry->len - 1;
+ret.perm = entry->perm;
+} else {
+ret.iova = iova;
+ret.addr_mask = ~PAGE_MASK;
+ret.perm = IOMMU_NONE;
+}
 
 if ((flag != IOMMU_NONE) && !(flag & ret.perm)) {
 error = ERR_EVENT_TPROTE;
@@ -572,6 +578,8 @@ static S390PCIIOMMU *s390_pci_get_iommu(S390pciState *s, 
PCIBus *bus,
 PCI_FUNC(devfn));
 memory_region_init(&iommu->mr, OBJECT(iommu), mr_name, UINT64_MAX);
 address_space_init(&iommu->as, &iommu->mr, as_name);
+iommu->iotlb = g_hash_table_new_full(g_int64_hash, g_int64_equal,
+ NULL, g_free);
 table->iommu[PCI_SLOT(devfn)] = iommu;
 
 g_free(mr_name);
@@ -661,6 +669,7 @@ void s390_pci_iommu_enable(S390PCIIOMMU *iommu)
 void s390_pci_iommu_disable(S390PCIIOMMU *iommu)
 {
 iommu->enabled = false;
+g_hash_table_remove_all(iommu->iotlb);
 memory_region_del_subregion(&iommu->mr, MEMORY_REGION(&iommu->iommu_mr));
 object_unparent(OBJECT(&iommu->iommu_mr));
 }
@@ -676,6 +685,7 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus 
*bus, int32_t devfn)
 }
 
 table->iommu[PCI_SLOT(devfn)] = NULL;
+g_hash_table_destroy(iommu->iotlb);
 address_space_destroy(&iommu->as);
 object_unparent(OBJECT(&iommu->mr));
 object_unparent(OBJECT(iommu));
diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
index ca22ef393b..395bbf0e13 100644
--- a/hw/s390x/s390-pci-bus.h
+++ b/hw/s390x/s390-pci-bus.h
@@ -274,6 +274,7 @@ typedef struct S390PCIIOMMU {
 uint64_t g_iota;
 uint64_t pba;
 uint64_t pal;
+GHashTable *iotlb;
 } S390PCIIOMMU;
 
 typedef struct S390PCIIOMMUTable {
@@ -330,6 +331,8 @@ void s390_pci_iommu_enable(S390PCIIOMMU *iommu);
 void s390_pci_iommu_disable(S390PCIIOMMU *iommu);
 void s390_pci_generate_error_event(uint16_t pec, uint32_t fh, uint32_t fid,
uint64_t faddr, uint32_t e);
+uint16_t s390_guest_io_table_walk(uint64_t g_iota, hwaddr addr,
+  S390IOTLBEntry *entry);
 S390PCIBusDevice *s390_pci_find_dev_by_idx(S390pciState *s, uint32_t idx);
 S390PCIBusDevice *s390_pci_find_dev_by_fh(S390pciState *s, uint32_t fh);
 S390PCIBusDevice *s390_pci_find_dev_by_fid(S390pciState *s, uint32_t fid);
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index 63fa06fb97..997a9cc2e9 100644
--- a/hw/s390x/s390-pci-inst.c
+++

[Qemu-devel] [PATCH 3/3] s390x/pci: use the right pal and pba in reg_ioat()

2018-01-30 Thread Yi Min Zhao
When registering ioat, pba should be comprised of leftmost 52 bits and
rightmost 12 binary zeros, and pal should be comprised of leftmost 52
bits and right most 12 binary ones. Let's fixup this.

Reviewed-by: Pierre Morel 
Signed-off-by: Yi Min Zhao 
---
 hw/s390x/s390-pci-inst.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index 997a9cc2e9..3fcc330fe3 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -865,6 +865,8 @@ static int reg_ioat(CPUS390XState *env, S390PCIIOMMU 
*iommu, ZpciFib fib,
 uint8_t dt = (g_iota >> 2) & 0x7;
 uint8_t t = (g_iota >> 11) & 0x1;
 
+pba &= ~0xfff;
+pal |= 0xfff;
 if (pba > pal || pba < ZPCI_SDMA_ADDR || pal > ZPCI_EDMA_ADDR) {
 s390_program_interrupt(env, PGM_OPERAND, 6, ra);
 return -EINVAL;
-- 
2.14.3 (Apple Git-98)




[Qemu-devel] [PATCH 1/3] s390x/pci: fixup the code walking IOMMU tables

2018-01-30 Thread Yi Min Zhao
Current s390x PCI IOMMU code is lack of flags' checking, including:
1) protection bit
2) table length
3) table offset
4) intermediate tables' invalid bit
5) format control bit

This patch introduces a new struct named S390IOTLBEntry, and makes up
these missed checkings. At the same time, inform the guest with the
corresponding error number when the check fails.

Reviewed-by: Pierre Morel 
Signed-off-by: Yi Min Zhao 
---
 hw/s390x/s390-pci-bus.c  | 223 ++-
 hw/s390x/s390-pci-bus.h  |  10 +++
 hw/s390x/s390-pci-inst.c |  10 ---
 3 files changed, 190 insertions(+), 53 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 2b1e1409bf..e349d73abe 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -309,49 +309,186 @@ static uint64_t get_st_pto(uint64_t entry)
 : 0;
 }
 
-static uint64_t s390_guest_io_table_walk(uint64_t guest_iota,
-  uint64_t guest_dma_address)
+static bool rt_entry_isvalid(uint64_t entry)
 {
-uint64_t sto_a, pto_a, px_a;
-uint64_t sto, pto, pte;
-uint32_t rtx, sx, px;
-
-rtx = calc_rtx(guest_dma_address);
-sx = calc_sx(guest_dma_address);
-px = calc_px(guest_dma_address);
-
-sto_a = guest_iota + rtx * sizeof(uint64_t);
-sto = address_space_ldq(&address_space_memory, sto_a,
-MEMTXATTRS_UNSPECIFIED, NULL);
-sto = get_rt_sto(sto);
-if (!sto) {
-pte = 0;
+return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
+}
+
+static bool pt_entry_isvalid(uint64_t entry)
+{
+return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
+}
+
+static bool entry_isprotected(uint64_t entry)
+{
+return (entry & ZPCI_TABLE_PROT_MASK) == ZPCI_TABLE_PROTECTED;
+}
+
+/* ett is expected table type, -1 page table, 0 segment table, 1 region table 
*/
+static uint64_t get_table_index(uint64_t iova, int8_t ett)
+{
+switch (ett) {
+case -1:
+return calc_px(iova);
+case 0:
+return calc_sx(iova);
+case 1:
+return calc_rtx(iova);
+}
+
+return -1;
+}
+
+static bool entry_isvalid(uint64_t entry, int8_t ett)
+{
+switch (ett) {
+case -1:
+return pt_entry_isvalid(entry);
+case 0:
+case 1:
+return rt_entry_isvalid(entry);
+}
+
+return false;
+}
+
+/* Return true if address translation is done */
+static bool translate_iscomplete(uint64_t entry, int8_t ett)
+{
+switch (ett) {
+case 0:
+return (entry & ZPCI_TABLE_FC) ? true : false;
+case 1:
+return false;
+}
+
+return true;
+}
+
+static uint64_t get_frame_size(int8_t ett)
+{
+switch (ett) {
+case -1:
+return 1ULL << 12;
+case 0:
+return 1ULL << 20;
+case 1:
+return 1ULL << 31;
+}
+
+return 0;
+}
+
+static uint64_t get_next_table_origin(uint64_t entry, int8_t ett)
+{
+switch (ett) {
+case -1:
+return entry & ZPCI_PTE_ADDR_MASK;
+case 0:
+return get_st_pto(entry);
+case 1:
+return get_rt_sto(entry);
+}
+
+return 0;
+}
+
+/**
+ * table_translate: do translation within one table and return the following
+ *  table origin
+ *
+ * @entry: the entry being traslated, the result is stored in this.
+ * @to: the address of table origin.
+ * @ett: expected table type, 1 region table, 0 segment table and -1 page 
table.
+ * @error: error code
+ */
+static uint64_t table_translate(S390IOTLBEntry *entry, uint64_t to, int8_t ett,
+uint16_t *error)
+{
+uint64_t tx, te, nto = 0;
+uint16_t err = 0;
+
+tx = get_table_index(entry->iova, ett);
+te = address_space_ldq(&address_space_memory, to + tx * sizeof(uint64_t),
+   MEMTXATTRS_UNSPECIFIED, NULL);
+
+if (!te) {
+err = ERR_EVENT_INVALTE;
 goto out;
 }
 
-pto_a = sto + sx * sizeof(uint64_t);
-pto = address_space_ldq(&address_space_memory, pto_a,
-MEMTXATTRS_UNSPECIFIED, NULL);
-pto = get_st_pto(pto);
-if (!pto) {
-pte = 0;
+if (!entry_isvalid(te, ett)) {
+entry->perm &= IOMMU_NONE;
 goto out;
 }
 
-px_a = pto + px * sizeof(uint64_t);
-pte = address_space_ldq(&address_space_memory, px_a,
-MEMTXATTRS_UNSPECIFIED, NULL);
+if (ett == 1 && ((te & ZPCI_TABLE_LEN_RTX) != ZPCI_TABLE_LEN_RTX ||
+ te & ZPCI_TABLE_OFFSET_MASK)) {
+err = ERR_EVENT_INVALTL;
+goto out;
+}
 
+nto = get_next_table_origin(te, ett);
+if (!nto) {
+err = ERR_EVENT_TT;
+goto out;
+}
+
+if (entry_isprotected(te)) {
+entry->perm &= IOMMU_RO;
+} else {
+entry->perm &= IOMMU_RW;
+}
+
+if (translate_iscomplete(te, ett)) {

[Qemu-devel] [PATCH 0/3] s390x/pci: fixup and optimize IOTLB code

2018-01-30 Thread Yi Min Zhao
This series contains three patches,
1) optimizes the code including walking DMA tables and rpcit handler
2) fixes the issue caused by IOTLB global refresh 
3) uses the right pal and pba when registering ioat

The issue mentioned above was found when we tested SMC-r tools. This
behavior has been introduced when linux guest started using a global
refresh to purge the whole IOTLB of invalid entries in a lazy fashion
instead of flushing each entry when invalidating table entries.

The previous QEMU implementation didn't keep track of the mapping,
didn't handle correctly the global flush demand from the guest and a
major part of the IOTLB entries were not flushed.

Consequently linux kernel on the host keeping the previous mapping
reports, as it should, -EEXIST DMA mapping error on the next mapping
with the same IOVA. The second patch fixes this issue.

During the investigation, we noticed that the current code walking
PCI IOMMU page tables didn't check important flags of table entries,
including:
1) protection bit
2) table length
3) table offset
4) intermediate tables' invalid bit
5) format control bit

We implement the checking in the first patch before handling the
IOTLB global refresh issue. To keep track of the mapped IOTLB entries
and be able to check if the host IOTLB entries need to be refreshed
we implement a IOTLB cache in QEMU, and introduce some helper
functions to check these bits. All S390IOTLBEntry instances are stored
in a new hashtable which are indexed by IOVA. Each PCI device has its
own IOMMU. Therefore each IOMMU also has its own hashtable caching
corresponding PCI device's DMA entries. Finally, we split 1M
contiguous DMA range into 4K pages to do DMA map, and the code about
error notification is also optimized.

Yi Min Zhao (3):
  s390x/pci: fixup the code walking IOMMU tables
  s390x/pci: fixup global refresh
  s390x/pci: use the right pal and pba in reg_ioat()

 hw/s390x/s390-pci-bus.c  | 233 ++-
 hw/s390x/s390-pci-bus.h  |  13 +++
 hw/s390x/s390-pci-inst.c | 103 ++---
 3 files changed, 271 insertions(+), 78 deletions(-)

-- 
2.14.3 (Apple Git-98)




Re: [Qemu-devel] [PATCH 1/3] s390x/pci: fixup the code walking IOMMU tables

2018-01-31 Thread Yi Min Zhao



在 2018/1/31 下午3:42, Thomas Huth 写道:

On 30.01.2018 10:47, Yi Min Zhao wrote:

Current s390x PCI IOMMU code is lack of flags' checking, including:
1) protection bit
2) table length
3) table offset
4) intermediate tables' invalid bit
5) format control bit

This patch introduces a new struct named S390IOTLBEntry, and makes up
these missed checkings. At the same time, inform the guest with the
corresponding error number when the check fails.

Reviewed-by: Pierre Morel 
Signed-off-by: Yi Min Zhao 
---
  hw/s390x/s390-pci-bus.c  | 223 ++-
  hw/s390x/s390-pci-bus.h  |  10 +++
  hw/s390x/s390-pci-inst.c |  10 ---
  3 files changed, 190 insertions(+), 53 deletions(-)

[...]

@@ -374,26 +511,26 @@ static IOMMUTLBEntry 
s390_translate_iommu(IOMMUMemoryRegion *mr, hwaddr addr,
  DPRINTF("iommu trans addr 0x%" PRIx64 "\n", addr);
  
  if (addr < iommu->pba || addr > iommu->pal) {

-return ret;
+error = ERR_EVENT_OORANGE;
+goto err;
  }
  
-pte = s390_guest_io_table_walk(s390_pci_get_table_origin(iommu->g_iota),

-   addr);
-if (!pte) {
-return ret;
-}
+error = s390_guest_io_table_walk(iommu->g_iota, addr, &entry);
  
-flags = pte & ZPCI_PTE_FLAG_MASK;

-ret.iova = addr;
-ret.translated_addr = pte & ZPCI_PTE_ADDR_MASK;
-ret.addr_mask = 0xfff;
+ret.iova = entry.iova;
+ret.translated_addr = entry.translated_addr;
+ret.addr_mask = entry.len - 1;
+ret.perm = entry.perm;
  
-if (flags & ZPCI_PTE_INVALID) {

-ret.perm = IOMMU_NONE;
-} else {
-ret.perm = IOMMU_RW;
+if ((flag != IOMMU_NONE) && !(flag & ret.perm)) {

You could drop the parentheses around "flag != IOMMU_NONE".

OK. Will update.


For the rest of the patch: Sorry, can't review due to missing PCI spec :-(

Thanks for your review anyway!


  Thomas







Re: [Qemu-devel] [PATCH 1/3] s390x/pci: fixup the code walking IOMMU tables

2018-02-01 Thread Yi Min Zhao



在 2018/1/31 下午6:58, Cornelia Huck 写道:

On Tue, 30 Jan 2018 10:47:13 +0100
Yi Min Zhao  wrote:


Current s390x PCI IOMMU code is lack of flags' checking, including:
1) protection bit
2) table length
3) table offset
4) intermediate tables' invalid bit
5) format control bit

This patch introduces a new struct named S390IOTLBEntry, and makes up
these missed checkings. At the same time, inform the guest with the
corresponding error number when the check fails.

There are a lot of things in this patch I cannot review due to -ENODOC,
but some comments below.


Reviewed-by: Pierre Morel 
Signed-off-by: Yi Min Zhao 
---
  hw/s390x/s390-pci-bus.c  | 223 ++-
  hw/s390x/s390-pci-bus.h  |  10 +++
  hw/s390x/s390-pci-inst.c |  10 ---
  3 files changed, 190 insertions(+), 53 deletions(-)

(...)


+/* ett is expected table type, -1 page table, 0 segment table, 1 region table 
*/
+static uint64_t get_table_index(uint64_t iova, int8_t ett)
+{
+switch (ett) {
+case -1:
+return calc_px(iova);
+case 0:
+return calc_sx(iova);
+case 1:
+return calc_rtx(iova);
+}
+
+return -1;

You use ett to differentiate between the three table types a lot. Is
this an architectured value, or an internal construct?

It's an architectured value to some degree, because it's used to descript
the translation more clearly in the doc.


If you introduced it yourself, it might make sense to switch to an enum
instead. Otherwise, using some #defines would improve readability of
the code.

OK. I will add macros in next version.



+}

(...)


+/**
+ * table_translate: do translation within one table and return the following
+ *  table origin
+ *
+ * @entry: the entry being traslated, the result is stored in this.

s/traslated/translated/

OK.



+ * @to: the address of table origin.
+ * @ett: expected table type, 1 region table, 0 segment table and -1 page 
table.
+ * @error: error code
+ */
+static uint64_t table_translate(S390IOTLBEntry *entry, uint64_t to, int8_t ett,
+uint16_t *error)

(...)


diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index be449210d9..63fa06fb97 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -644,16 +644,6 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2, uintptr_t ra)
  
  while (start < end) {

  entry = imrc->translate(iommu_mr, start, IOMMU_NONE);
-
-if (!entry.translated_addr) {
-pbdev->state = ZPCI_FS_ERROR;
-setcc(cpu, ZPCI_PCI_LS_ERR);
-s390_set_status_code(env, r1, ZPCI_PCI_ST_INSUF_RES);
-s390_pci_generate_error_event(ERR_EVENT_SERR, pbdev->fh, 
pbdev->fid,
-  start, ERR_EVENT_Q_BIT);
-goto out;
-}
-
  memory_region_notify_iommu(iommu_mr, entry);
  start += entry.addr_mask + 1;

You're now progressing even though you might have generated an error
event. Is that what's intended?
Yes, this is wrong. The right thing is only delete the code generating 
error event,

and keep the if check here in this patch.



  }







Re: [Qemu-devel] [PATCH 3/3] s390x/pci: use the right pal and pba in reg_ioat()

2018-02-01 Thread Yi Min Zhao



在 2018/2/1 下午8:02, Cornelia Huck 写道:

On Thu, 1 Feb 2018 12:33:01 +0100
Pierre Morel  wrote:


On 31/01/2018 12:44, Cornelia Huck wrote:

On Tue, 30 Jan 2018 10:47:15 +0100
Yi Min Zhao  wrote:
  

When registering ioat, pba should be comprised of leftmost 52 bits and
rightmost 12 binary zeros, and pal should be comprised of leftmost 52
bits and right most 12 binary ones. Let's fixup this.

Reviewed-by: Pierre Morel 
Signed-off-by: Yi Min Zhao 
---
   hw/s390x/s390-pci-inst.c | 2 ++
   1 file changed, 2 insertions(+)

diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index 997a9cc2e9..3fcc330fe3 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -865,6 +865,8 @@ static int reg_ioat(CPUS390XState *env, S390PCIIOMMU 
*iommu, ZpciFib fib,
   uint8_t dt = (g_iota >> 2) & 0x7;
   uint8_t t = (g_iota >> 11) & 0x1;
   
+pba &= ~0xfff;

+pal |= 0xfff;
   if (pba > pal || pba < ZPCI_SDMA_ADDR || pal > ZPCI_EDMA_ADDR) {
   s390_program_interrupt(env, PGM_OPERAND, 6, ra);
   return -EINVAL;

It seems like pba and pal are part of the fib, which in turn seems to
be provided by the caller. Is that correct? If yes, is it valid for
them to not have the rightmost 12 bits as 0s resp. 1s?

(Probably answered in the architecture, I know. Might make sense to be
a tad more explicit in the description.)
  

Yes it is, only word6 and the bits 0-19 of word 7 are used for PAL and
the zPCI facility treats the right most 12 bits of the PAL as containing
ones.

For PBA words 4 and 0-19 bits of word 5 with 12 0 append on the right
provides the PBA.

The lower 12 bits of words 5 and 7 of the FIB are ignored by the facility.

@Yi Min: may be add the last sentence to the commit message.

@Conny: Is it clearer?

Yes, adding the last sentence makes it clearer. Thanks!



OK. Thanks!




Re: [Qemu-devel] [PATCH v1] pci: Set PCI-bus device entry before initializing bus master

2017-10-09 Thread Yi Min Zhao

I strongly agree. This will make IOMMU assignment easier.


在 2017/10/5 下午8:36, Pierre Morel 写道:

When initializing the PCI-bus master pci_init_bus_master(PCIDevice *)
calls pci_device_iommu_address_space(PCIDevice *) which in turn calls
iommu_bus->iommu_fn(bus, opaque,devfn)

If the device entry of the PCI-bus is not initialized to point to the
PCIDevice structure, the iommu_fn() function, getting only bus and
devno, is unable to retrieve the PCIDevice.

Usually it is not a problem as the DMA address space is depending of
the BUS but in an architecture like S390, each PCI Device can have
its own DMA address space.
Being able to setup the DMA address space at the right moment will
greatly simplify the implementation.

This patch proposes to setup the bus device entry before calling
pci_init_bus_master() and reset the entry in case of error.

Signed-off-by: Pierre Morel 
---
  hw/pci/pci.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 1e6fb88..3e64492 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1026,6 +1026,7 @@ static PCIDevice *do_pci_register_device(PCIDevice 
*pci_dev, PCIBus *bus,
  address_space_init(&pci_dev->bus_master_as,
 &pci_dev->bus_master_container_region, pci_dev->name);

+bus->devices[devfn] = pci_dev;
  if (qdev_hotplug) {
  pci_init_bus_master(pci_dev);
  }
@@ -1062,6 +1063,7 @@ static PCIDevice *do_pci_register_device(PCIDevice 
*pci_dev, PCIBus *bus,
  if (local_err) {
  error_propagate(errp, local_err);
  do_pci_unregister_device(pci_dev);
+bus->devices[devfn] = NULL;
  return NULL;
  }

@@ -1071,7 +1073,6 @@ static PCIDevice *do_pci_register_device(PCIDevice 
*pci_dev, PCIBus *bus,
  config_write = pci_default_write_config;
  pci_dev->config_read = config_read;
  pci_dev->config_write = config_write;
-bus->devices[devfn] = pci_dev;
  pci_dev->version_id = 2; /* Current pci device vmstate version */
  return pci_dev;
  }





Re: [Qemu-devel] [PATCH v2 4/7] s390x/pci: rework PCI STORE BLOCK

2017-11-21 Thread Yi Min Zhao



在 2017/11/22 上午2:07, Pierre Morel 写道:

On 21/11/2017 11:42, Cornelia Huck wrote:

On Thu, 16 Nov 2017 18:51:52 +0100
Pierre Morel  wrote:


Enhance the fault detection.

Fixup the precedence to check the destination path existance
before checking for the source accessibility.

Add the maxstbl entry to both the Query PCI Function Group
response and the PCIBusDevice structure.

Initialize the maxstbl to 128 per default until we get
the actual data from the hardware.

Signed-off-by: Pierre Morel 
Reviewed-by: Yi Min Zhao 
---
  hw/s390x/s390-pci-bus.h  |  1 +
  hw/s390x/s390-pci-inst.c | 62 
+---

  hw/s390x/s390-pci-inst.h |  2 +-
  3 files changed, 40 insertions(+), 25 deletions(-)


@@ -700,8 +685,33 @@ int pcistb_service_call(S390CPU *cpu, uint8_t 
r1, uint8_t r3, uint64_t gaddr,

  break;
  }
  +    if (pcias > 5) {
+    DPRINTF("pcistb invalid space\n");
+    setcc(cpu, ZPCI_PCI_LS_ERR);
+    s390_set_status_code(env, r1, ZPCI_PCI_ST_INVAL_AS);
+    return 0;
+    }
+
+    /* Verify the address, offset and length */
+    /* offset must be a multiple of 8 */
+    if (offset % 8) {
+    goto addressing_error;
+    }
+    /* Length must be greater than 8, a multiple of 8, not greater 
maxstbl */


"not greater than maxstlb"


Better I know but greater that 80 characters, this is why I preferred 
broken English.

What do I do ? break the line or English ?

less than?





+    if ((len <= 8) || (len % 8) || (len > pbdev->maxstbl)) {
+    goto addressing_error;
+    }
+    /* Do not cross a 4K-byte boundary */
+    if (((offset & 0xfff) + len) > 0x1000) {
+    goto addressing_error;
+    }
+    /* Guest address must be double word aligned */
+    if (gaddr & 0x07UL) {
+    goto addressing_error;
+    }
+
  mr = pbdev->pdev->io_regions[pcias].memory;
-    if (!memory_region_access_valid(mr, env->regs[r3], len, true)) {
+    if (!memory_region_access_valid(mr, offset, len, true)) {
  program_interrupt(env, PGM_OPERAND, 6);
  return 0;
  }


Looks good.








Re: [Qemu-devel] [PATCH v3 1/7] s390x/pci: factor out endianess conversion

2017-11-23 Thread Yi Min Zhao



在 2017/11/23 下午6:33, Cornelia Huck 写道:

On Thu, 23 Nov 2017 11:25:10 +0100
Thomas Huth  wrote:


On 23.11.2017 11:08, Cornelia Huck wrote:

On Thu, 23 Nov 2017 11:01:23 +0100
Thomas Huth  wrote:
   

On 23.11.2017 10:49, Cornelia Huck wrote:

On Thu, 23 Nov 2017 09:48:41 +0100
Thomas Huth  wrote:

On 22.11.2017 23:05, Pierre Morel wrote:

[...]

+/**
+ * Swap data contained in s390x big endian registers to little endian
+ * PCI bars.
+ *
+ * @ptr: a pointer to a uint64_t data field
+ * @len: the length of the valid data, must be 1,2,4 or 8
+ */
+static int zpci_endian_swap(uint64_t *ptr, uint8_t len)
+{
+uint64_t data = *ptr;
+
+switch (len) {
+case 1:
+break;
+case 2:
+data = bswap16(data);
+break;
+case 4:
+data = bswap32(data);
+break;
+case 8:
+data = bswap64(data);
+break;
+default:
+return -EINVAL;
+}
+*ptr = data;
+return 0;
+}

While you're at it, I think that should rather be leXX_to_cpu() instead
of bswapXX() here,

I don't think that's correct, as this is supposed to swap BE registers
to LE PCI bars.

Yes, but for the CPU emulation, the registers are stored in the host's
endianness in the CPUS390XState structure. Or why do we byte-swap them
again with cpu_to_be64() during s390_store_status(), for example?

Gah, endian conversion is eating my brain...

So, is the content we get BE or not? I thought in our last discussion
we came to the conclusion that it is.

data is read from / written to env->regs[r1], so this is host endian, as
far as I know. PCI is little endian, so using le32_to_cpu() /
cpu_to_le32() should IMHO be the right way to go here.

By the way, if we want to use both, cpu_to_le and le_to_cpu, depending
on whether we read from or write to PCI, we should maybe *not* put this
code into a separate function?

Yes, if your assessment is correct, we need two functions (I think this
conversion is used in other places in later patches as well). Or are
there mechanisms for that already available?

I have a question, is the data in cpu->regs the guest's endianess?
In our case, the guest is S390. Although the arch is big-endian, the data in
pcilg/stg instructions is little-endian.

Another question, does 'cpu' in cpu_to_le**() or le**_to_cpu() mean the 
host endianess?


If the answers to upper two questions are yes, we actually need handle 
two cases.
1) For pcilg, we need to translate the data to little-endian, thus 
cpu_to_le**().
2) For pcistg, we need to translate the data to host endianess, thus 
le**_to_cpu().



[I really need to continue working on wiring up zpci in tcg, but I keep
getting sidetracked.]

Maybe best if you get it running on a big endian host first ... if it is
then not working on a little endian host, you know that you have to look
for things like these "bswapXX()" statements...

That was exactly my reasoning behind getting tcg to run... but getting
it to run at all is the hard part :)







Re: [Qemu-devel] [qemu-s390x] [PATCH v3 1/7] s390x/pci: factor out endianess conversion

2017-11-23 Thread Yi Min Zhao



在 2017/11/23 下午8:18, Thomas Huth 写道:

On 23.11.2017 13:07, Yi Min Zhao wrote:


在 2017/11/23 下午6:33, Cornelia Huck 写道:

On Thu, 23 Nov 2017 11:25:10 +0100
Thomas Huth  wrote:


On 23.11.2017 11:08, Cornelia Huck wrote:

On Thu, 23 Nov 2017 11:01:23 +0100
Thomas Huth  wrote:
   

On 23.11.2017 10:49, Cornelia Huck wrote:

On Thu, 23 Nov 2017 09:48:41 +0100
Thomas Huth  wrote:

On 22.11.2017 23:05, Pierre Morel wrote:

[...]

+/**
+ * Swap data contained in s390x big endian registers to little
endian
+ * PCI bars.
+ *
+ * @ptr: a pointer to a uint64_t data field
+ * @len: the length of the valid data, must be 1,2,4 or 8
+ */
+static int zpci_endian_swap(uint64_t *ptr, uint8_t len)
+{
+    uint64_t data = *ptr;
+
+    switch (len) {
+    case 1:
+    break;
+    case 2:
+    data = bswap16(data);
+    break;
+    case 4:
+    data = bswap32(data);
+    break;
+    case 8:
+    data = bswap64(data);
+    break;
+    default:
+    return -EINVAL;
+    }
+    *ptr = data;
+    return 0;
+}

While you're at it, I think that should rather be leXX_to_cpu()
instead
of bswapXX() here,

I don't think that's correct, as this is supposed to swap BE
registers
to LE PCI bars.

Yes, but for the CPU emulation, the registers are stored in the host's
endianness in the CPUS390XState structure. Or why do we byte-swap them
again with cpu_to_be64() during s390_store_status(), for example?

Gah, endian conversion is eating my brain...

So, is the content we get BE or not? I thought in our last discussion
we came to the conclusion that it is.

data is read from / written to env->regs[r1], so this is host endian, as
far as I know. PCI is little endian, so using le32_to_cpu() /
cpu_to_le32() should IMHO be the right way to go here.

By the way, if we want to use both, cpu_to_le and le_to_cpu, depending
on whether we read from or write to PCI, we should maybe *not* put this
code into a separate function?

Yes, if your assessment is correct, we need two functions (I think this
conversion is used in other places in later patches as well). Or are
there mechanisms for that already available?

I have a question, is the data in cpu->regs the guest's endianess?

As far as I know, it's host endianness, so on x86 with TCG emulation,
it's little endian.


In our case, the guest is S390. Although the arch is big-endian, the
data in
pcilg/stg instructions is little-endian.

PCI memory is always little endian, right.


Another question, does 'cpu' in cpu_to_le**() or le**_to_cpu() mean the
host endianess?

Yes, the "cpu" in cpu_to_le or le_to_cpu means the host, indeed. It's
confusing :-/


If the answers to upper two questions are yes, we actually need handle
two cases.
1) For pcilg, we need to translate the data to little-endian, thus
cpu_to_le**().
2) For pcistg, we need to translate the data to host endianess, thus
le**_to_cpu().

I think we've got to byte-swap if the host is big endian (s390x), but
not if the host is little endian (x86 with TCG).



Thanks for your replies! We will send the new version ASAP to udpate 
this patch.


  Thomas







Re: [Qemu-devel] [qemu-s390x] [PATCH v3 1/7] s390x/pci: factor out endianess conversion

2017-11-26 Thread Yi Min Zhao



在 2017/11/25 下午9:49, Pierre Morel 写道:

On 24/11/2017 07:19, Yi Min Zhao wrote:



在 2017/11/23 下午8:18, Thomas Huth 写道:

On 23.11.2017 13:07, Yi Min Zhao wrote:


在 2017/11/23 下午6:33, Cornelia Huck 写道:

On Thu, 23 Nov 2017 11:25:10 +0100
Thomas Huth  wrote:


On 23.11.2017 11:08, Cornelia Huck wrote:

On Thu, 23 Nov 2017 11:01:23 +0100
Thomas Huth  wrote:

On 23.11.2017 10:49, Cornelia Huck wrote:

On Thu, 23 Nov 2017 09:48:41 +0100
Thomas Huth  wrote:

On 22.11.2017 23:05, Pierre Morel wrote:

[...]

+/**
+ * Swap data contained in s390x big endian registers to little
endian
+ * PCI bars.
+ *
+ * @ptr: a pointer to a uint64_t data field
+ * @len: the length of the valid data, must be 1,2,4 or 8
+ */
+static int zpci_endian_swap(uint64_t *ptr, uint8_t len)
+{
+    uint64_t data = *ptr;
+
+    switch (len) {
+    case 1:
+    break;
+    case 2:
+    data = bswap16(data);
+    break;
+    case 4:
+    data = bswap32(data);
+    break;
+    case 8:
+    data = bswap64(data);
+    break;
+    default:
+    return -EINVAL;
+    }
+    *ptr = data;
+    return 0;
+}

While you're at it, I think that should rather be leXX_to_cpu()
instead
of bswapXX() here,

I don't think that's correct, as this is supposed to swap BE
registers
to LE PCI bars.
Yes, but for the CPU emulation, the registers are stored in the 
host's
endianness in the CPUS390XState structure. Or why do we 
byte-swap them

again with cpu_to_be64() during s390_store_status(), for example?

Gah, endian conversion is eating my brain...

So, is the content we get BE or not? I thought in our last 
discussion

we came to the conclusion that it is.
data is read from / written to env->regs[r1], so this is host 
endian, as

far as I know. PCI is little endian, so using le32_to_cpu() /
cpu_to_le32() should IMHO be the right way to go here.

By the way, if we want to use both, cpu_to_le and le_to_cpu, 
depending
on whether we read from or write to PCI, we should maybe *not* 
put this

code into a separate function?
Yes, if your assessment is correct, we need two functions (I think 
this

conversion is used in other places in later patches as well). Or are
there mechanisms for that already available?

I have a question, is the data in cpu->regs the guest's endianess?

As far as I know, it's host endianness, so on x86 with TCG emulation,
it's little endian.


In our case, the guest is S390. Although the arch is big-endian, the
data in
pcilg/stg instructions is little-endian.

PCI memory is always little endian, right.

Another question, does 'cpu' in cpu_to_le**() or le**_to_cpu() mean 
the

host endianess?

Yes, the "cpu" in cpu_to_le or le_to_cpu means the host, indeed. It's
confusing :-/


If the answers to upper two questions are yes, we actually need handle
two cases.
1) For pcilg, we need to translate the data to little-endian, thus
cpu_to_le**().
2) For pcistg, we need to translate the data to host endianess, thus
le**_to_cpu().

I think we've got to byte-swap if the host is big endian (s390x), but
not if the host is little endian (x86 with TCG).




Here is my comprehension of this funny swapping:

- TCG for a BE guest and a le host swap bytes because if we do 
(register & 0x01) in the zPCI interception code it must work what ever 
the endianess is.


- Guest always write data Little Endian because it think it writes to 
PCI.


- Kernel standard PCI code needs to swap endianness for a BE host but 
not for a le host.



So it follows:

Z Guest writes data BE in its register and swap its data to le before 
issuing zPCI

The data in register has been already le. For any zPCI instruction accessing
PCI data, the endianess is little-endian. Although s390 is be, its PCI 
instructions

follow PCI Spec (byte ordering is le).

In kernel, s390 pci code swaps the data to le before it really issues 
pcistg.


QEMU intercepts, receives the data from the register and store it
-> Native: it stores as is: -> le

I think you talked about PCI stg (storing data to PCI device).
The data from the register is le. But we swapped it back to be
because qemu in s390 is be. Then any pci_config write would
transfer data from be to le finally. The process is:
1) data from register : le (because the data in pcistg is in le)
2) pcistg intercept handler in qemu : le->be
3) pci->config_write : be->le

-> TCG: it stores swapping data -> BE

For this case, we only talk about the case that the host is le.
As my understanding, the data in the register should be in
the byte ordering which the guest is.

So, for s390 guest, the data in pcistg is le. Then pcistg intercept
handler swaps the data from le to be, thus the final callback
would write the data with the wrong byte ordering to PCI device
because the host is le and cpu_to_le32() would not swaps the data.


QEMU-PCI swaps the bytes always
-> Native : data is now BE
-> TCG: data is now

Re: [Qemu-devel] [qemu-s390x] [PATCH v3 1/7] s390x/pci: factor out endianess conversion

2017-11-27 Thread Yi Min Zhao



在 2017/11/27 下午2:59, Thomas Huth 写道:

On 25.11.2017 14:49, Pierre Morel wrote:

On 24/11/2017 07:19, Yi Min Zhao wrote:


在 2017/11/23 下午8:18, Thomas Huth 写道:

On 23.11.2017 13:07, Yi Min Zhao wrote:

在 2017/11/23 下午6:33, Cornelia Huck 写道:

On Thu, 23 Nov 2017 11:25:10 +0100
Thomas Huth  wrote:


On 23.11.2017 11:08, Cornelia Huck wrote:

On Thu, 23 Nov 2017 11:01:23 +0100
Thomas Huth  wrote:

On 23.11.2017 10:49, Cornelia Huck wrote:

On Thu, 23 Nov 2017 09:48:41 +0100
Thomas Huth  wrote:

On 22.11.2017 23:05, Pierre Morel wrote:

[...]

+/**
+ * Swap data contained in s390x big endian registers to little
endian
+ * PCI bars.
+ *
+ * @ptr: a pointer to a uint64_t data field
+ * @len: the length of the valid data, must be 1,2,4 or 8
+ */
+static int zpci_endian_swap(uint64_t *ptr, uint8_t len)
+{
+    uint64_t data = *ptr;
+
+    switch (len) {
+    case 1:
+    break;
+    case 2:
+    data = bswap16(data);
+    break;
+    case 4:
+    data = bswap32(data);
+    break;
+    case 8:
+    data = bswap64(data);
+    break;
+    default:
+    return -EINVAL;
+    }
+    *ptr = data;
+    return 0;
+}

While you're at it, I think that should rather be leXX_to_cpu()
instead
of bswapXX() here,

I don't think that's correct, as this is supposed to swap BE
registers
to LE PCI bars.

Yes, but for the CPU emulation, the registers are stored in the
host's
endianness in the CPUS390XState structure. Or why do we
byte-swap them
again with cpu_to_be64() during s390_store_status(), for example?

Gah, endian conversion is eating my brain...

So, is the content we get BE or not? I thought in our last
discussion
we came to the conclusion that it is.

data is read from / written to env->regs[r1], so this is host
endian, as
far as I know. PCI is little endian, so using le32_to_cpu() /
cpu_to_le32() should IMHO be the right way to go here.

By the way, if we want to use both, cpu_to_le and le_to_cpu,
depending
on whether we read from or write to PCI, we should maybe *not* put
this
code into a separate function?

Yes, if your assessment is correct, we need two functions (I think
this
conversion is used in other places in later patches as well). Or are
there mechanisms for that already available?

I have a question, is the data in cpu->regs the guest's endianess?

As far as I know, it's host endianness, so on x86 with TCG emulation,
it's little endian.


In our case, the guest is S390. Although the arch is big-endian, the
data in
pcilg/stg instructions is little-endian.

PCI memory is always little endian, right.


Another question, does 'cpu' in cpu_to_le**() or le**_to_cpu() mean the
host endianess?

Yes, the "cpu" in cpu_to_le or le_to_cpu means the host, indeed. It's
confusing :-/


If the answers to upper two questions are yes, we actually need handle
two cases.
1) For pcilg, we need to translate the data to little-endian, thus
cpu_to_le**().
2) For pcistg, we need to translate the data to host endianess, thus
le**_to_cpu().

I think we've got to byte-swap if the host is big endian (s390x), but
not if the host is little endian (x86 with TCG).

Here is my comprehension of this funny swapping:

- TCG for a BE guest and a le host swap bytes because if we do (register
& 0x01) in the zPCI interception code it must work what ever the
endianess is.

Uhhh, I might have missed that the value has already been byte-swapped
once by TCG for env->regs[r1] ...
I want to ask a question. For this case, BE guest and LE host, is 
env->regs[r1] in LE byte ordering?

Now I'm pretty much completely confused ... sorry for the noise if I was
wrong... I think it's best you ignore my comment for now (i.e. go with
bswapXX() instead of le_to_cpuXX()), and if we later wire up zPCI with
TCG, we still can fix this if necessary.

  Thomas







Re: [Qemu-devel] [qemu-s390x] [PATCH v3 1/7] s390x/pci: factor out endianess conversion

2017-11-27 Thread Yi Min Zhao



在 2017/11/27 下午7:13, Thomas Huth 写道:

On 27.11.2017 11:09, Yi Min Zhao wrote:


在 2017/11/27 下午2:59, Thomas Huth 写道:

On 25.11.2017 14:49, Pierre Morel wrote:

On 24/11/2017 07:19, Yi Min Zhao wrote:

在 2017/11/23 下午8:18, Thomas Huth 写道:

On 23.11.2017 13:07, Yi Min Zhao wrote:

在 2017/11/23 下午6:33, Cornelia Huck 写道:

On Thu, 23 Nov 2017 11:25:10 +0100
Thomas Huth  wrote:


On 23.11.2017 11:08, Cornelia Huck wrote:

On Thu, 23 Nov 2017 11:01:23 +0100
Thomas Huth  wrote:

On 23.11.2017 10:49, Cornelia Huck wrote:

On Thu, 23 Nov 2017 09:48:41 +0100
Thomas Huth  wrote:

On 22.11.2017 23:05, Pierre Morel wrote:

[...]

+/**
+ * Swap data contained in s390x big endian registers to
little
endian
+ * PCI bars.
+ *
+ * @ptr: a pointer to a uint64_t data field
+ * @len: the length of the valid data, must be 1,2,4 or 8
+ */
+static int zpci_endian_swap(uint64_t *ptr, uint8_t len)
+{
+    uint64_t data = *ptr;
+
+    switch (len) {
+    case 1:
+    break;
+    case 2:
+    data = bswap16(data);
+    break;
+    case 4:
+    data = bswap32(data);
+    break;
+    case 8:
+    data = bswap64(data);
+    break;
+    default:
+    return -EINVAL;
+    }
+    *ptr = data;
+    return 0;
+}

While you're at it, I think that should rather be leXX_to_cpu()
instead
of bswapXX() here,

I don't think that's correct, as this is supposed to swap BE
registers
to LE PCI bars.

Yes, but for the CPU emulation, the registers are stored in the
host's
endianness in the CPUS390XState structure. Or why do we
byte-swap them
again with cpu_to_be64() during s390_store_status(), for example?

Gah, endian conversion is eating my brain...

So, is the content we get BE or not? I thought in our last
discussion
we came to the conclusion that it is.

data is read from / written to env->regs[r1], so this is host
endian, as
far as I know. PCI is little endian, so using le32_to_cpu() /
cpu_to_le32() should IMHO be the right way to go here.

By the way, if we want to use both, cpu_to_le and le_to_cpu,
depending
on whether we read from or write to PCI, we should maybe *not* put
this
code into a separate function?

Yes, if your assessment is correct, we need two functions (I think
this
conversion is used in other places in later patches as well). Or are
there mechanisms for that already available?

I have a question, is the data in cpu->regs the guest's endianess?

As far as I know, it's host endianness, so on x86 with TCG emulation,
it's little endian.


In our case, the guest is S390. Although the arch is big-endian, the
data in
pcilg/stg instructions is little-endian.

PCI memory is always little endian, right.


Another question, does 'cpu' in cpu_to_le**() or le**_to_cpu()
mean the
host endianess?

Yes, the "cpu" in cpu_to_le or le_to_cpu means the host, indeed. It's
confusing :-/


If the answers to upper two questions are yes, we actually need
handle
two cases.
1) For pcilg, we need to translate the data to little-endian, thus
cpu_to_le**().
2) For pcistg, we need to translate the data to host endianess, thus
le**_to_cpu().

I think we've got to byte-swap if the host is big endian (s390x), but
not if the host is little endian (x86 with TCG).

Here is my comprehension of this funny swapping:

- TCG for a BE guest and a le host swap bytes because if we do (register
& 0x01) in the zPCI interception code it must work what ever the
endianess is.

Uhhh, I might have missed that the value has already been byte-swapped
once by TCG for env->regs[r1] ...

I want to ask a question. For this case, BE guest and LE host, is
env->regs[r1] in LE byte ordering?

Generally env->regs[] are in host byte order, so LE if the host is LE.
Not sure which byte-order is stored in the register by the guest,
though, since I don't have the zPCI spec ... so if the (BE) guest wrote
a LE value in the register, and TCG byte-swapped it again, the value is
suddenly BE again and thus we have to always byte-swap it again...?
Sorry, it's hard to say without having the spec available.
Yes, your understanding is right. The guest is BE but the data in 
register for zPCI instruction is LE.


  Thomas







Re: [Qemu-devel] [qemu-s390x] [PATCH v3 1/7] s390x/pci: factor out endianess conversion

2017-11-27 Thread Yi Min Zhao



在 2017/11/28 上午12:02, Cornelia Huck 写道:

On Mon, 27 Nov 2017 16:53:04 +0100
Pierre Morel  wrote:


On 27/11/2017 16:30, Cornelia Huck wrote:

On Mon, 27 Nov 2017 16:24:08 +0100
Pierre Morel  wrote:
   

On 27/11/2017 15:34, Cornelia Huck wrote:

On Mon, 27 Nov 2017 12:02:55 +0100
Cornelia Huck  wrote:
  

On Mon, 27 Nov 2017 07:59:36 +0100
Thomas Huth  wrote:
 

On 25.11.2017 14:49, Pierre Morel wrote:

On 24/11/2017 07:19, Yi Min Zhao wrote:


在 2017/11/23 下午8:18, Thomas Huth 写道:

On 23.11.2017 13:07, Yi Min Zhao wrote:
 

Another question, does 'cpu' in cpu_to_le**() or le**_to_cpu() mean the
host endianess?

Yes, the "cpu" in cpu_to_le or le_to_cpu means the host, indeed. It's
confusing :-/
  

If the answers to upper two questions are yes, we actually need handle
two cases.
1) For pcilg, we need to translate the data to little-endian, thus
cpu_to_le**().
2) For pcistg, we need to translate the data to host endianess, thus
le**_to_cpu().

I think we've got to byte-swap if the host is big endian (s390x), but
not if the host is little endian (x86 with TCG).

Here is my comprehension of this funny swapping:

- TCG for a BE guest and a le host swap bytes because if we do (register
& 0x01) in the zPCI interception code it must work what ever the
endianess is.

Uhhh, I might have missed that the value has already been byte-swapped
once by TCG for env->regs[r1] ...
Now I'm pretty much completely confused ... sorry for the noise if I was
wrong... I think it's best you ignore my comment for now (i.e. go with
bswapXX() instead of le_to_cpuXX()), and if we later wire up zPCI with
TCG, we still can fix this if necessary.

I'll try my current pci/tcg patches on LPAR with this (or a v4) on top.
If it works there (it doesn't yet on my laptop), we do have a
endianness issue... (unfortunately, the reverse isn't true.)

It does not look too bad: I can get a nice enP1p0s0 device from a
virtio-net-pci with my tcg patches on my laptop (with these patches as
well, of course). So, endianness is likely mostly fine.

On the Lpar and on the Laptop or only on the Lpar ?

Both :)

That's great! :)

Btw, lspci says

0001:00:00.0 Ethernet controller: Red Hat, Inc. Virtio network device
 Subsystem: Red Hat, Inc. Device 0001
 Physical Slot: 
 Flags: bus master, fast devsel, latency 0
 I/O ports at  [disabled]
 [virtual] Memory at 8001 (32-bit, non-prefetchable) 
[size=4K]
 Memory at 8002 (64-bit, prefetchable) [size=16K]
 Expansion ROM at  [disabled] [size=256K]
 Capabilities: [98] MSI-X: Enable+ Count=3 Masked-
 Capabilities: [84] Vendor Specific Information: VirtIO: 
 Capabilities: [70] Vendor Specific Information: VirtIO: Notify
 Capabilities: [60] Vendor Specific Information: VirtIO: DeviceCfg
 Capabilities: [50] Vendor Specific Information: VirtIO: ISR
 Capabilities: [40] Vendor Specific Information: VirtIO: CommonCfg
 Kernel driver in use: virtio-pci

Does that look reasonable to you?



Great! That means the data in env->register has been swapped.
Thanks!




Re: [Qemu-devel] [qemu-s390x] [PATCH] s390x/pci: add common fmb

2018-10-16 Thread Yi Min Zhao




在 2018/10/1 下午5:22, Thomas Huth 写道:

On 2018-09-29 07:48, Yi Min Zhao wrote:

在 2018/9/19 下午3:53, Thomas Huth 写道:

On 2018-09-19 09:08, Yi Min Zhao wrote:

[...]

diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
index 1f7f9b5814..fdf13a19c0 100644
--- a/hw/s390x/s390-pci-bus.h
+++ b/hw/s390x/s390-pci-bus.h
@@ -286,6 +286,28 @@ typedef struct S390PCIIOMMUTable {
    S390PCIIOMMU *iommu[PCI_SLOT_MAX];
    } S390PCIIOMMUTable;
    +/* Function Measurement Block */
+#define DEFAULT_MUI 4000
+#define UPDATE_U_BIT 0x1ULL
+#define FMBK_MASK 0xfULL
+
+typedef struct ZpciFmbFmt0 {
+    uint64_t dma_rbytes;
+    uint64_t dma_wbytes;
+} ZpciFmbFmt0;
+
+typedef struct ZpciFmb {
+    uint8_t format;
+    uint8_t fmt_ind[3];
+    uint32_t sample;
+    uint64_t last_update;
+    uint64_t ld_ops;
+    uint64_t st_ops;
+    uint64_t stb_ops;
+    uint64_t rpcit_ops;
+    ZpciFmbFmt0 fmt0;
+} QEMU_PACKED __attribute((__aligned__(8))) ZpciFmb;

All the fields in this struct are naturally aligned already, so I'd
maybe rather drop the QEMU_PACKED and add a
QEMU_BUILD_BUG_MSG(sizeof(ZpciFmb) != xx, ...) statement afterwards.

Currently we only implement FMT0. There're other FMTs to be implemented
in future.
So here there would be a union and we can't give a specific size to
QEMU_BUILD_BUG_MSG.
Can we use the max size for checking?

I think you could use this to check the beginning of the struct:

At the beginning of the struct? not after it?


QEMU_BUILD_BUG_MSG(offsetof(ZpciFmb, fmt0) != 48, "padding in ZpciFmb");

I think this could satisfy our requirement.



    struct S390PCIBusDevice {
    DeviceState qdev;
    PCIDevice *pdev;
@@ -297,6 +319,8 @@ struct S390PCIBusDevice {
    uint32_t fid;
    bool fid_defined;
    uint64_t fmb_addr;
+    ZpciFmb fmb;

... since you embed it here in another struct which does not have any
alignment requirements. This is likely going to cause an error with GCC
8.1, we've had this problem in the past already:

https://git.qemu.org/?p=qemu.git;a=commitdiff;h=a6e4385dea94850d7b06b0

Ah...I didn't test the code with gcc 8+. GCC I used is 7.2.
It should get the same warining.

Nobody reported the warning in the s390-ccw bios until GCC 8 had been
released, so I assume this is a new warning in GCC 8.


Is the __align__(8) required at all? As far as I understand the code,
the struct is not living inside the guest memory, is it? So you could
simply drop the __align__(8).
But if you need it, I think you have to allocate the memory for ZpciFmb
separately (and use a "ZpciFmb *fmb" here instead).

I want to copy the entire structure to the guest memory during updating
FMB.
It's not a good idea to do copy for all members multiple times.

Sure, but you're doing the updates through address_space_write(), so as
far as I can see there is currently no need for the
attribute((__aligned__(8))) here (or did I miss something?). Thus I'd
like to suggest to simply remove that attribute here.

  Thomas



Agree to remove it.

--
Yi Min




[Qemu-devel] [PATCH v2] s390x/pci: add common fmb

2018-10-22 Thread Yi Min Zhao
Common function measurement block is used to report counters of
successfully issued pcilg/stg/stb and rpcit instructions. This patch
introduces a new struct ZpciFmb and schedules a timer callback to
copy fmb to the guest memory at a interval time which is set to
4s by default. While attemping to update fmb failed, an event error
would be generated. After pcilg/stg/stb and rpcit interception
handlers issue successfully, increase the related counter. The guest
could pass null address to switch off FMB and stop corresponding
timer.

Signed-off-by: Yi Min Zhao 
Reviewed-by: Pierre Morel 
---
Change Log:
v2:
1. Use QEMU_BUILD_BUG_MSG for ZpciFmb struct instead of QEMU_PACKED.

---
 hw/s390x/s390-pci-bus.c  |   3 +-
 hw/s390x/s390-pci-bus.h  |  25 ++
 hw/s390x/s390-pci-inst.c | 105 +--
 hw/s390x/s390-pci-inst.h |   1 +
 4 files changed, 130 insertions(+), 4 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index e42e1b80d6..6cd23175cd 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -976,6 +976,7 @@ static void s390_pcihost_hot_unplug(HotplugHandler 
*hotplug_dev,
 bus = pci_get_bus(pci_dev);
 devfn = pci_dev->devfn;
 object_unparent(OBJECT(pci_dev));
+fmb_timer_free(pbdev);
 s390_pci_msix_free(pbdev);
 s390_pci_iommu_free(s, bus, devfn);
 pbdev->pdev = NULL;
@@ -1147,7 +1148,7 @@ static void s390_pci_device_reset(DeviceState *dev)
 pci_dereg_ioat(pbdev->iommu);
 }
 
-pbdev->fmb_addr = 0;
+fmb_timer_free(pbdev);
 }
 
 static void s390_pci_get_fid(Object *obj, Visitor *v, const char *name,
diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
index 1f7f9b5814..bfbbaca26c 100644
--- a/hw/s390x/s390-pci-bus.h
+++ b/hw/s390x/s390-pci-bus.h
@@ -286,6 +286,29 @@ typedef struct S390PCIIOMMUTable {
 S390PCIIOMMU *iommu[PCI_SLOT_MAX];
 } S390PCIIOMMUTable;
 
+/* Function Measurement Block */
+#define DEFAULT_MUI 4000
+#define UPDATE_U_BIT 0x1ULL
+#define FMBK_MASK 0xfULL
+
+typedef struct ZpciFmbFmt0 {
+uint64_t dma_rbytes;
+uint64_t dma_wbytes;
+} ZpciFmbFmt0;
+
+typedef struct ZpciFmb {
+uint8_t format;
+uint8_t fmt_ind[3];
+uint32_t sample;
+uint64_t last_update;
+uint64_t ld_ops;
+uint64_t st_ops;
+uint64_t stb_ops;
+uint64_t rpcit_ops;
+ZpciFmbFmt0 fmt0;
+} ZpciFmb;
+QEMU_BUILD_BUG_MSG(offsetof(ZpciFmb, fmt0) != 48, "padding in ZpciFmb");
+
 struct S390PCIBusDevice {
 DeviceState qdev;
 PCIDevice *pdev;
@@ -297,6 +320,8 @@ struct S390PCIBusDevice {
 uint32_t fid;
 bool fid_defined;
 uint64_t fmb_addr;
+ZpciFmb fmb;
+QEMUTimer *fmb_timer;
 uint8_t isc;
 uint16_t noi;
 uint16_t maxstbl;
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index 7b61367ee3..1ed5cb91d0 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -19,6 +19,7 @@
 #include "exec/memory-internal.h"
 #include "qemu/error-report.h"
 #include "sysemu/hw_accel.h"
+#include "hw/s390x/tod.h"
 
 #ifndef DEBUG_S390PCI_INST
 #define DEBUG_S390PCI_INST  0
@@ -293,7 +294,7 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
 resgrp->fr = 1;
 stq_p(&resgrp->dasm, 0);
 stq_p(&resgrp->msia, ZPCI_MSI_ADDR);
-stw_p(&resgrp->mui, 0);
+stw_p(&resgrp->mui, DEFAULT_MUI);
 stw_p(&resgrp->i, 128);
 stw_p(&resgrp->maxstbl, 128);
 resgrp->version = 0;
@@ -456,6 +457,10 @@ int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2, uintptr_t ra)
 return 0;
 }
 
+if (pbdev->fmb_addr) {
+pbdev->fmb.ld_ops++;
+}
+
 env->regs[r1] = data;
 setcc(cpu, ZPCI_PCI_LS_OK);
 return 0;
@@ -561,6 +566,10 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2, uintptr_t ra)
 return 0;
 }
 
+if (pbdev->fmb_addr) {
+pbdev->fmb.st_ops++;
+}
+
 setcc(cpu, ZPCI_PCI_LS_OK);
 return 0;
 }
@@ -681,6 +690,9 @@ err:
 s390_set_status_code(env, r1, ZPCI_PCI_ST_FUNC_IN_ERR);
 s390_pci_generate_error_event(error, pbdev->fh, pbdev->fid, start, 0);
 } else {
+if (pbdev->fmb_addr) {
+pbdev->fmb.rpcit_ops++;
+}
 setcc(cpu, ZPCI_PCI_LS_OK);
 }
 return 0;
@@ -783,6 +795,10 @@ int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r3, uint64_t gaddr,
 }
 }
 
+if (pbdev->fmb_addr) {
+pbdev->fmb.stb_ops++;
+}
+
 setcc(cpu, ZPCI_PCI_LS_OK);
 return 0;
 
@@ -889,6 +905,63 @@ void pci_dereg_ioat(S390PCIIOMMU *iommu)
 iommu->g_iota = 0;
 }
 
+void fmb_timer_free(S390PCIBusDevice *pbdev)
+{
+if (pbdev->fmb_timer) {
+timer_del(pbdev->fmb_timer);
+timer_free(pbdev->fmb_timer);
+pbdev->fmb_timer = NULL;

Re: [Qemu-devel] [PATCH v2] s390x/pci: add common fmb

2018-10-23 Thread Yi Min Zhao




在 2018/10/22 下午8:17, Thomas Huth 写道:

On 2018-10-22 10:02, Yi Min Zhao wrote:

Common function measurement block is used to report counters of
successfully issued pcilg/stg/stb and rpcit instructions. This patch
introduces a new struct ZpciFmb and schedules a timer callback to
copy fmb to the guest memory at a interval time which is set to
4s by default. While attemping to update fmb failed, an event error
would be generated. After pcilg/stg/stb and rpcit interception
handlers issue successfully, increase the related counter. The guest
could pass null address to switch off FMB and stop corresponding
timer.

Signed-off-by: Yi Min Zhao 
Reviewed-by: Pierre Morel 
---

[...]

+static int fmb_do_update(S390PCIBusDevice *pbdev, uint8_t offset, int len)
+{
+MemTxResult ret;
+
+ret = address_space_write(&address_space_memory,
+  pbdev->fmb_addr + (uint64_t)offset,
+  MEMTXATTRS_UNSPECIFIED,
+  (uint8_t *)&pbdev->fmb + offset,
+  len);
+if (ret) {
+s390_pci_generate_error_event(ERR_EVENT_FMBA, pbdev->fh, pbdev->fid,
+  pbdev->fmb_addr, 0);
+fmb_timer_free(pbdev);
+}
+
+return ret;
+}
+
+static void fmb_update(void *opaque)
+{
+S390PCIBusDevice *pbdev = opaque;
+int64_t t = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+uint8_t offset = offsetof(ZpciFmb, last_update);
+
+/* Update U bit */
+pbdev->fmb.last_update |= UPDATE_U_BIT;
+if (fmb_do_update(pbdev, offset, sizeof(uint64_t))) {
+return;
+}
+
+/* Update FMB counters */
+pbdev->fmb.sample++;
+if (fmb_do_update(pbdev, 0, sizeof(ZpciFmb))) {
+return;
+}
+
+/* Clear U bit and update the time */
+pbdev->fmb.last_update = time2tod(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+pbdev->fmb.last_update &= ~UPDATE_U_BIT;
+if (fmb_do_update(pbdev, offset, sizeof(uint64_t))) {
+return;
+}
+
+timer_mod(pbdev->fmb_timer, t + DEFAULT_MUI);
+}

Sorry for noticing this in v1 already, but is this code endianess-safe?
I.e. can this also work with qemu-system-s390x running with TCG on a x86
host? I think you might have to use something like this here instead:

   pbdev->fmb.sample = cpu_to_be32(be32_to_cpu(pbdev->fmb.sample) + 1);

etc.

  Thomas



Aha!!! Yes, I think you're right. Indeed, we should consider endianess.

--
Yi Min




Re: [Qemu-devel] [PATCH v2] s390x/pci: add common fmb

2018-10-23 Thread Yi Min Zhao




在 2018/10/24 上午5:25, Cornelia Huck 写道:

On Mon, 22 Oct 2018 13:17:34 +0100
Thomas Huth  wrote:


On 2018-10-22 10:02, Yi Min Zhao wrote:

Common function measurement block is used to report counters of
successfully issued pcilg/stg/stb and rpcit instructions. This patch
introduces a new struct ZpciFmb and schedules a timer callback to
copy fmb to the guest memory at a interval time which is set to
4s by default. While attemping to update fmb failed, an event error
would be generated. After pcilg/stg/stb and rpcit interception
handlers issue successfully, increase the related counter. The guest
could pass null address to switch off FMB and stop corresponding
timer.

Signed-off-by: Yi Min Zhao 
Reviewed-by: Pierre Morel 
---

[...]

+static int fmb_do_update(S390PCIBusDevice *pbdev, uint8_t offset, int len)
+{
+MemTxResult ret;
+
+ret = address_space_write(&address_space_memory,
+  pbdev->fmb_addr + (uint64_t)offset,
+  MEMTXATTRS_UNSPECIFIED,
+  (uint8_t *)&pbdev->fmb + offset,
+  len);
+if (ret) {
+s390_pci_generate_error_event(ERR_EVENT_FMBA, pbdev->fh, pbdev->fid,
+  pbdev->fmb_addr, 0);
+fmb_timer_free(pbdev);
+}
+
+return ret;
+}
+
+static void fmb_update(void *opaque)
+{
+S390PCIBusDevice *pbdev = opaque;
+int64_t t = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+uint8_t offset = offsetof(ZpciFmb, last_update);
+
+/* Update U bit */
+pbdev->fmb.last_update |= UPDATE_U_BIT;
+if (fmb_do_update(pbdev, offset, sizeof(uint64_t))) {
+return;
+}
+
+/* Update FMB counters */
+pbdev->fmb.sample++;
+if (fmb_do_update(pbdev, 0, sizeof(ZpciFmb))) {
+return;
+}
+
+/* Clear U bit and update the time */
+pbdev->fmb.last_update = time2tod(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+pbdev->fmb.last_update &= ~UPDATE_U_BIT;
+if (fmb_do_update(pbdev, offset, sizeof(uint64_t))) {
+return;
+}
+
+timer_mod(pbdev->fmb_timer, t + DEFAULT_MUI);
+}

Sorry for noticing this in v1 already, but is this code endianess-safe?
I.e. can this also work with qemu-system-s390x running with TCG on a x86
host? I think you might have to use something like this here instead:

   pbdev->fmb.sample = cpu_to_be32(be32_to_cpu(pbdev->fmb.sample) + 1);

etc.

Agreed, that may need some endianness handling.

I would test this with tcg on a LE host, but how can I verify this? Yi
Min, do you have some kind of test tooling you can share?


There's no tool now. You could startup a guest. And then in the guest, 
install

PCI driver and read FMB values from /sys/kernel/debug/pci//statistics.

If endianness has error, I think the values must looks wrong.
The right thing is that values increase from 0 and intervally.

--
Yi Min




Re: [Qemu-devel] [PATCH] s390x/pci: add common fmb

2018-09-19 Thread Yi Min Zhao

No comment?


在 2018/9/4 下午5:15, Yi Min Zhao 写道:

Common function measurement block is used to report counters of
successfully issued pcilg/stg/stb and rpcit instructions. This patch
introduces a new struct ZpciFmb and schedules a timer callback to
copy fmb to the guest memory at a interval time which is set to
4s by default. While attemping to update fmb failed, an event error
would be generated. After pcilg/stg/stb and rpcit interception
handlers issue successfully, increase the related counter. The guest
could pass null address to switch off FMB and stop corresponding
timer.

Signed-off-by: Yi Min Zhao 
Reviewed-by: Pierre Morel
---
  hw/s390x/s390-pci-bus.c  |   3 +-
  hw/s390x/s390-pci-bus.h  |  24 +++
  hw/s390x/s390-pci-inst.c | 105 +--
  hw/s390x/s390-pci-inst.h |   1 +
  4 files changed, 129 insertions(+), 4 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index e3e0ebb7f6..7bd0b9d1e5 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -967,6 +967,7 @@ static void s390_pcihost_hot_unplug(HotplugHandler 
*hotplug_dev,
  bus = pci_get_bus(pci_dev);
  devfn = pci_dev->devfn;
  object_unparent(OBJECT(pci_dev));
+fmb_timer_free(pbdev);
  s390_pci_msix_free(pbdev);
  s390_pci_iommu_free(s, bus, devfn);
  pbdev->pdev = NULL;
@@ -1139,7 +1140,7 @@ static void s390_pci_device_reset(DeviceState *dev)
  pci_dereg_ioat(pbdev->iommu);
  }
  
-pbdev->fmb_addr = 0;

+fmb_timer_free(pbdev);
  }
  
  static void s390_pci_get_fid(Object *obj, Visitor *v, const char *name,

diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
index 1f7f9b5814..fdf13a19c0 100644
--- a/hw/s390x/s390-pci-bus.h
+++ b/hw/s390x/s390-pci-bus.h
@@ -286,6 +286,28 @@ typedef struct S390PCIIOMMUTable {
  S390PCIIOMMU *iommu[PCI_SLOT_MAX];
  } S390PCIIOMMUTable;
  
+/* Function Measurement Block */

+#define DEFAULT_MUI 4000
+#define UPDATE_U_BIT 0x1ULL
+#define FMBK_MASK 0xfULL
+
+typedef struct ZpciFmbFmt0 {
+uint64_t dma_rbytes;
+uint64_t dma_wbytes;
+} ZpciFmbFmt0;
+
+typedef struct ZpciFmb {
+uint8_t format;
+uint8_t fmt_ind[3];
+uint32_t sample;
+uint64_t last_update;
+uint64_t ld_ops;
+uint64_t st_ops;
+uint64_t stb_ops;
+uint64_t rpcit_ops;
+ZpciFmbFmt0 fmt0;
+} QEMU_PACKED __attribute((__aligned__(8))) ZpciFmb;
+
  struct S390PCIBusDevice {
  DeviceState qdev;
  PCIDevice *pdev;
@@ -297,6 +319,8 @@ struct S390PCIBusDevice {
  uint32_t fid;
  bool fid_defined;
  uint64_t fmb_addr;
+ZpciFmb fmb;
+QEMUTimer *fmb_timer;
  uint8_t isc;
  uint16_t noi;
  uint16_t maxstbl;
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index 7b61367ee3..1ed5cb91d0 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -19,6 +19,7 @@
  #include "exec/memory-internal.h"
  #include "qemu/error-report.h"
  #include "sysemu/hw_accel.h"
+#include "hw/s390x/tod.h"
  
  #ifndef DEBUG_S390PCI_INST

  #define DEBUG_S390PCI_INST  0
@@ -293,7 +294,7 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
  resgrp->fr = 1;
  stq_p(&resgrp->dasm, 0);
  stq_p(&resgrp->msia, ZPCI_MSI_ADDR);
-stw_p(&resgrp->mui, 0);
+stw_p(&resgrp->mui, DEFAULT_MUI);
  stw_p(&resgrp->i, 128);
  stw_p(&resgrp->maxstbl, 128);
  resgrp->version = 0;
@@ -456,6 +457,10 @@ int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2, uintptr_t ra)
  return 0;
  }
  
+if (pbdev->fmb_addr) {

+pbdev->fmb.ld_ops++;
+}
+
  env->regs[r1] = data;
  setcc(cpu, ZPCI_PCI_LS_OK);
  return 0;
@@ -561,6 +566,10 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2, uintptr_t ra)
  return 0;
  }
  
+if (pbdev->fmb_addr) {

+pbdev->fmb.st_ops++;
+}
+
  setcc(cpu, ZPCI_PCI_LS_OK);
  return 0;
  }
@@ -681,6 +690,9 @@ err:
  s390_set_status_code(env, r1, ZPCI_PCI_ST_FUNC_IN_ERR);
  s390_pci_generate_error_event(error, pbdev->fh, pbdev->fid, start, 0);
  } else {
+if (pbdev->fmb_addr) {
+pbdev->fmb.rpcit_ops++;
+}
  setcc(cpu, ZPCI_PCI_LS_OK);
  }
  return 0;
@@ -783,6 +795,10 @@ int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r3, uint64_t gaddr,
  }
  }
  
+if (pbdev->fmb_addr) {

+pbdev->fmb.stb_ops++;
+}
+
  setcc(cpu, ZPCI_PCI_LS_OK);
  return 0;
  
@@ -889,6 +905,63 @@ void pci_dereg_ioat(S390PCIIOMMU *iommu)

  iommu->g_iota = 0;
  }
  
+void fmb_timer_free(S390PCIBusDevice *pbdev)

+{
+if (pbdev->fmb_timer) {
+timer_del(pbdev->fmb_timer);
+timer_free(pbdev->fmb_timer);
+pbdev->fmb_timer = 

Re: [Qemu-devel] [PATCH] s390x/pci: add common fmb

2018-09-25 Thread Yi Min Zhao




在 2018/9/20 下午6:06, Cornelia Huck 写道:

On Tue,  4 Sep 2018 17:15:49 +0800
Yi Min Zhao  wrote:


Common function measurement block is used to report counters of
successfully issued pcilg/stg/stb and rpcit instructions. This patch
introduces a new struct ZpciFmb and schedules a timer callback to
copy fmb to the guest memory at a interval time which is set to
4s by default. While attemping to update fmb failed, an event error
would be generated. After pcilg/stg/stb and rpcit interception
handlers issue successfully, increase the related counter. The guest
could pass null address to switch off FMB and stop corresponding
timer.

Hard to review without documentation, but some comments below.


Signed-off-by: Yi Min Zhao 
Reviewed-by: Pierre Morel
---
  hw/s390x/s390-pci-bus.c  |   3 +-
  hw/s390x/s390-pci-bus.h  |  24 +++
  hw/s390x/s390-pci-inst.c | 105 +--
  hw/s390x/s390-pci-inst.h |   1 +
  4 files changed, 129 insertions(+), 4 deletions(-)

(...)


diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index 7b61367ee3..1ed5cb91d0 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -456,6 +457,10 @@ int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2, uintptr_t ra)
  return 0;
  }
  
+if (pbdev->fmb_addr) {

+pbdev->fmb.ld_ops++;
+}

As fmb is a part of the structure, just update it unconditionally?
You'll only copy it to the guest if measurements are active anyway.
non-NULL fmb_addr means fmb update is active. So update it if the 
instructions

is successfully handled.



+
  env->regs[r1] = data;
  setcc(cpu, ZPCI_PCI_LS_OK);
  return 0;

(...)


@@ -889,6 +905,63 @@ void pci_dereg_ioat(S390PCIIOMMU *iommu)
  iommu->g_iota = 0;
  }
  
+void fmb_timer_free(S390PCIBusDevice *pbdev)

+{
+if (pbdev->fmb_timer) {
+timer_del(pbdev->fmb_timer);
+timer_free(pbdev->fmb_timer);
+pbdev->fmb_timer = NULL;
+}
+pbdev->fmb_addr = 0;
+memset(&pbdev->fmb, 0, sizeof(ZpciFmb));

Maybe move clearing the buffer to before you enable measurements
instead? (Needed to make my suggestion above work correctly.)
I think it's better to keep clearing code here. As spec, buffer should 
be cleared

if it's disabled although doing as your suggestion could work correctly.



+}
+
+static int fmb_do_update(S390PCIBusDevice *pbdev, uint8_t offset, int len)
+{
+MemTxResult ret;
+
+ret = address_space_write(&address_space_memory,
+  pbdev->fmb_addr + (uint64_t)offset,
+  MEMTXATTRS_UNSPECIFIED,
+  (uint8_t *)&pbdev->fmb + offset,
+  len);
+if (ret) {
+s390_pci_generate_error_event(ERR_EVENT_FMBA, pbdev->fh, pbdev->fid,
+  pbdev->fmb_addr, 0);
+fmb_timer_free(pbdev);

So, failure to update the guest-provided area is supposed to disable
measurements?

As spec, we should stop fmb update.



+}
+
+return ret;
+}
+
+static void fmb_update(void *opaque)
+{
+S390PCIBusDevice *pbdev = opaque;
+int64_t t = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+uint8_t offset = offsetof(ZpciFmb, last_update);
+
+/* Update U bit */
+pbdev->fmb.last_update |= UPDATE_U_BIT;
+if (fmb_do_update(pbdev, offset, sizeof(uint64_t))) {
+return;
+}
+
+/* Update FMB counters */
+pbdev->fmb.sample++;
+if (fmb_do_update(pbdev, 0, sizeof(ZpciFmb))) {
+return;
+}
+
+/* Clear U bit and update the time */
+pbdev->fmb.last_update = time2tod(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+pbdev->fmb.last_update &= ~UPDATE_U_BIT;
+if (fmb_do_update(pbdev, offset, sizeof(uint64_t))) {
+return;
+}
+
+timer_mod(pbdev->fmb_timer, t + DEFAULT_MUI);
+}
+
  int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar,
  uintptr_t ra)
  {
@@ -1018,9 +1091,35 @@ int mpcifc_service_call(S390CPU *cpu, uint8_t r1, 
uint64_t fiba, uint8_t ar,
  s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE);
  }
  break;
-case ZPCI_MOD_FC_SET_MEASURE:
-pbdev->fmb_addr = ldq_p(&fib.fmb_addr);
+case ZPCI_MOD_FC_SET_MEASURE: {
+uint64_t fmb_addr = ldq_p(&fib.fmb_addr);
+
+if (fmb_addr & FMBK_MASK) {
+cc = ZPCI_PCI_LS_ERR;
+s390_pci_generate_error_event(ERR_EVENT_FMBPRO, pbdev->fh,
+  pbdev->fid, fmb_addr, 0);
+fmb_timer_free(pbdev);
+break;
+}
+
+if (!fmb_addr) {
+/* Stop updating FMB. */
+fmb_timer_free(pbdev);
+break;
+}
+
+pbdev->fmb_addr = fmb_addr;
+if (!pbdev->fmb_timer) {
+

Re: [Qemu-devel] [PATCH] s390x/pci: add common fmb

2018-09-28 Thread Yi Min Zhao




在 2018/9/19 下午3:53, Thomas Huth 写道:

On 2018-09-19 09:08, Yi Min Zhao wrote:

No comment?

Since the zPCI spec is not available to the public, it's quite hard to
give any valuable comments here... I'll try anyway...


在 2018/9/4 下午5:15, Yi Min Zhao 写道:

Common function measurement block is used to report counters of
successfully issued pcilg/stg/stb and rpcit instructions. This patch
introduces a new struct ZpciFmb and schedules a timer callback to
copy fmb to the guest memory at a interval time which is set to
4s by default. While attemping to update fmb failed, an event error
would be generated. After pcilg/stg/stb and rpcit interception
handlers issue successfully, increase the related counter. The guest
could pass null address to switch off FMB and stop corresponding
timer.

Signed-off-by: Yi Min Zhao 
Reviewed-by: Pierre Morel
---
   hw/s390x/s390-pci-bus.c  |   3 +-
   hw/s390x/s390-pci-bus.h  |  24 +++
   hw/s390x/s390-pci-inst.c | 105
+--
   hw/s390x/s390-pci-inst.h |   1 +
   4 files changed, 129 insertions(+), 4 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index e3e0ebb7f6..7bd0b9d1e5 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -967,6 +967,7 @@ static void s390_pcihost_hot_unplug(HotplugHandler
*hotplug_dev,
   bus = pci_get_bus(pci_dev);
   devfn = pci_dev->devfn;
   object_unparent(OBJECT(pci_dev));
+    fmb_timer_free(pbdev);
   s390_pci_msix_free(pbdev);
   s390_pci_iommu_free(s, bus, devfn);
   pbdev->pdev = NULL;
@@ -1139,7 +1140,7 @@ static void s390_pci_device_reset(DeviceState *dev)
   pci_dereg_ioat(pbdev->iommu);
   }
   -    pbdev->fmb_addr = 0;
+    fmb_timer_free(pbdev);
   }
     static void s390_pci_get_fid(Object *obj, Visitor *v, const char
*name,
diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
index 1f7f9b5814..fdf13a19c0 100644
--- a/hw/s390x/s390-pci-bus.h
+++ b/hw/s390x/s390-pci-bus.h
@@ -286,6 +286,28 @@ typedef struct S390PCIIOMMUTable {
   S390PCIIOMMU *iommu[PCI_SLOT_MAX];
   } S390PCIIOMMUTable;
   +/* Function Measurement Block */
+#define DEFAULT_MUI 4000
+#define UPDATE_U_BIT 0x1ULL
+#define FMBK_MASK 0xfULL
+
+typedef struct ZpciFmbFmt0 {
+    uint64_t dma_rbytes;
+    uint64_t dma_wbytes;
+} ZpciFmbFmt0;
+
+typedef struct ZpciFmb {
+    uint8_t format;
+    uint8_t fmt_ind[3];
+    uint32_t sample;
+    uint64_t last_update;
+    uint64_t ld_ops;
+    uint64_t st_ops;
+    uint64_t stb_ops;
+    uint64_t rpcit_ops;
+    ZpciFmbFmt0 fmt0;
+} QEMU_PACKED __attribute((__aligned__(8))) ZpciFmb;

All the fields in this struct are naturally aligned already, so I'd
maybe rather drop the QEMU_PACKED and add a
QEMU_BUILD_BUG_MSG(sizeof(ZpciFmb) != xx, ...) statement afterwards.
Currently we only implement FMT0. There're other FMTs to be implemented 
in future.
So here there would be a union and we can't give a specific size to 
QEMU_BUILD_BUG_MSG.

Can we use the max size for checking?


Also the __aligned__(8) is likely not going to work as expected...


   struct S390PCIBusDevice {
   DeviceState qdev;
   PCIDevice *pdev;
@@ -297,6 +319,8 @@ struct S390PCIBusDevice {
   uint32_t fid;
   bool fid_defined;
   uint64_t fmb_addr;
+    ZpciFmb fmb;

... since you embed it here in another struct which does not have any
alignment requirements. This is likely going to cause an error with GCC
8.1, we've had this problem in the past already:

https://git.qemu.org/?p=qemu.git;a=commitdiff;h=a6e4385dea94850d7b06b0

Ah...I didn't test the code with gcc 8+. GCC I used is 7.2.
It should get the same warining.


Is the __align__(8) required at all? As far as I understand the code,
the struct is not living inside the guest memory, is it? So you could
simply drop the __align__(8).
But if you need it, I think you have to allocate the memory for ZpciFmb
separately (and use a "ZpciFmb *fmb" here instead).

I want to copy the entire structure to the guest memory during updating FMB.
It's not a good idea to do copy for all members multiple times.
As your comment, I think we have to allocate memory for ZpciFmb.



+    QEMUTimer *fmb_timer;
   uint8_t isc;
   uint16_t noi;
   uint16_t maxstbl;
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index 7b61367ee3..1ed5cb91d0 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c

[...]

+static int fmb_do_update(S390PCIBusDevice *pbdev, uint8_t offset, int
len)

Any reason for making "offset" an uint8_t only? Seems unnecessary to me
... since you use it for an "offsetof()" value below, I'd like to
suggest to use size_t instead...

Yes.



+{
+    MemTxResult ret;
+
+    ret = address_space_write(&address_space_memory,
+  pbdev->fmb_addr + (uint64_t)offset,

... then you can also

[Qemu-devel] [PATCH v2 1/1] sandbox: disable -sandbox if CONFIG_SECCOMP undefined

2018-05-15 Thread Yi Min Zhao
If CONFIG_SECCOMP is undefined, the option 'elevateprivileges' remains
compiled. This would make libvirt set the corresponding capability and
then trigger the guest startup fails. So this patch excludes the code
regarding seccomp staff if CONFIG_SECCOMP is undefined.

Signed-off-by: Yi Min Zhao 
---
 vl.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/vl.c b/vl.c
index 806eec2ef6..b22d158f5f 100644
--- a/vl.c
+++ b/vl.c
@@ -257,6 +257,7 @@ static QemuOptsList qemu_rtc_opts = {
 },
 };
 
+#ifdef CONFIG_SECCOMP
 static QemuOptsList qemu_sandbox_opts = {
 .name = "sandbox",
 .implied_opt_name = "enable",
@@ -285,6 +286,7 @@ static QemuOptsList qemu_sandbox_opts = {
 { /* end of list */ }
 },
 };
+#endif
 
 static QemuOptsList qemu_option_rom_opts = {
 .name = "option-rom",
@@ -1041,10 +1043,10 @@ static int bt_parse(const char *opt)
 return 1;
 }
 
+#ifdef CONFIG_SECCOMP
 static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
 {
 if (qemu_opt_get_bool(opts, "enable", false)) {
-#ifdef CONFIG_SECCOMP
 uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT
 | QEMU_SECCOMP_SET_OBSOLETE;
 const char *value = NULL;
@@ -1114,14 +1116,11 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, 
Error **errp)
  "in the kernel");
 return -1;
 }
-#else
-error_report("seccomp support is disabled");
-return -1;
-#endif
 }
 
 return 0;
 }
+#endif
 
 static int parse_name(void *opaque, QemuOpts *opts, Error **errp)
 {
@@ -3074,7 +3073,9 @@ int main(int argc, char **argv, char **envp)
 qemu_add_opts(&qemu_mem_opts);
 qemu_add_opts(&qemu_smp_opts);
 qemu_add_opts(&qemu_boot_opts);
+#ifdef CONFIG_SECCOMP
 qemu_add_opts(&qemu_sandbox_opts);
+#endif
 qemu_add_opts(&qemu_add_fd_opts);
 qemu_add_opts(&qemu_object_opts);
 qemu_add_opts(&qemu_tpmdev_opts);
@@ -4071,10 +4072,12 @@ int main(int argc, char **argv, char **envp)
 exit(1);
 }
 
+#ifdef CONFIG_SECCOMP
 if (qemu_opts_foreach(qemu_find_opts("sandbox"),
   parse_sandbox, NULL, NULL)) {
 exit(1);
 }
+#endif
 
 if (qemu_opts_foreach(qemu_find_opts("name"),
   parse_name, NULL, NULL)) {
-- 
Yi Min




[Qemu-devel] [PATCH v2 0/1] Bug: Sandbox: libvirt breakdowns qemu guest

2018-05-15 Thread Yi Min Zhao
1. Problem Description
==
If QEMU is built without seccomp support, 'elevateprivileges' remains compiled.
This option of sandbox is treated as an indication for seccomp blacklist support
in libvirt. This behavior is introduced by the libvirt commits 31ca6a5 and
3527f9d. It would make libvirt build wrong QEMU cmdline, and then the guest
startup would fail.

2. Libvirt Log
==
qemu-system-s390x: -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\
resourcecontrol=deny: seccomp support is disabled

3. Fixup

Compile the code related to sandbox only when CONFIG_SECCOMP is defined.

Yi Min Zhao (1):
  sandbox: disable -sandbox if CONFIG_SECCOMP undefined

 vl.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

-- 
Yi Min




Re: [Qemu-devel] [PATCH v2 1/1] sandbox: disable -sandbox if CONFIG_SECCOMP undefined

2018-05-15 Thread Yi Min Zhao



在 2018/5/15 下午11:25, Eric Blake 写道:

On 05/15/2018 06:33 AM, Yi Min Zhao wrote:

If CONFIG_SECCOMP is undefined, the option 'elevateprivileges' remains
compiled. This would make libvirt set the corresponding capability and
then trigger the guest startup fails. So this patch excludes the code


s/trigger the guest startup fails/trigger failure during guest startup/


regarding seccomp staff if CONFIG_SECCOMP is undefined.


s/staff/command line options/



Signed-off-by: Yi Min Zhao 
---
  vl.c | 13 -
  1 file changed, 8 insertions(+), 5 deletions(-)



A maintainer can touch up the commit message, so:
Reviewed-by: Eric Blake 


Thanks for your comments! Have updated commit msg.




Re: [Qemu-devel] [PATCH v2 1/1] sandbox: disable -sandbox if CONFIG_SECCOMP undefined

2018-05-17 Thread Yi Min Zhao

Add Paolo to CC list. @Paolo, expect your comment. Thanks very much!


在 2018/5/15 下午11:25, Eric Blake 写道:

On 05/15/2018 06:33 AM, Yi Min Zhao wrote:

If CONFIG_SECCOMP is undefined, the option 'elevateprivileges' remains
compiled. This would make libvirt set the corresponding capability and
then trigger the guest startup fails. So this patch excludes the code


s/trigger the guest startup fails/trigger failure during guest startup/


regarding seccomp staff if CONFIG_SECCOMP is undefined.


s/staff/command line options/



Signed-off-by: Yi Min Zhao 
---
  vl.c | 13 -
  1 file changed, 8 insertions(+), 5 deletions(-)



A maintainer can touch up the commit message, so:
Reviewed-by: Eric Blake 






Re: [Qemu-devel] [PATCH v2 1/1] sandbox: disable -sandbox if CONFIG_SECCOMP undefined

2018-05-17 Thread Yi Min Zhao



在 2018/5/17 下午8:41, Eduardo Otubo 写道:

On 15/05/2018 - 19:33:48, Yi Min Zhao wrote:

If CONFIG_SECCOMP is undefined, the option 'elevateprivileges' remains
compiled. This would make libvirt set the corresponding capability and
then trigger the guest startup fails. So this patch excludes the code
regarding seccomp staff if CONFIG_SECCOMP is undefined.

Just a sugestion for the next patch you send: If it's a single patch, you don't
need to format it with a cover-letter. Just put all the description in the body,
or if you need to add a text that shouldn't be included in the commit message,
just add it after the "---" after Signed-off-by.

OK. Thanks for your suggestion.



Signed-off-by: Yi Min Zhao 
---
  vl.c | 13 -
  1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/vl.c b/vl.c
index 806eec2ef6..b22d158f5f 100644
--- a/vl.c
+++ b/vl.c
@@ -257,6 +257,7 @@ static QemuOptsList qemu_rtc_opts = {
  },
  };
  
+#ifdef CONFIG_SECCOMP

  static QemuOptsList qemu_sandbox_opts = {
  .name = "sandbox",
  .implied_opt_name = "enable",
@@ -285,6 +286,7 @@ static QemuOptsList qemu_sandbox_opts = {
  { /* end of list */ }
  },
  };
+#endif
  
  static QemuOptsList qemu_option_rom_opts = {

  .name = "option-rom",
@@ -1041,10 +1043,10 @@ static int bt_parse(const char *opt)
  return 1;
  }
  
+#ifdef CONFIG_SECCOMP

  static int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
  {
  if (qemu_opt_get_bool(opts, "enable", false)) {
-#ifdef CONFIG_SECCOMP
  uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT
  | QEMU_SECCOMP_SET_OBSOLETE;
  const char *value = NULL;
@@ -1114,14 +1116,11 @@ static int parse_sandbox(void *opaque, QemuOpts *opts, 
Error **errp)
   "in the kernel");
  return -1;
  }
-#else
-error_report("seccomp support is disabled");
-return -1;
-#endif

Any reason not to keep the error message on the new #endif location?

If error report is originally wrapped in CONFIG_SECCOMP undefined.
This patch excludes the entire function if CONFIG_SECCOMP is undefined.
So the error report is not needed.



  }
  
  return 0;

  }
+#endif
  
  static int parse_name(void *opaque, QemuOpts *opts, Error **errp)

  {
@@ -3074,7 +3073,9 @@ int main(int argc, char **argv, char **envp)
  qemu_add_opts(&qemu_mem_opts);
  qemu_add_opts(&qemu_smp_opts);
  qemu_add_opts(&qemu_boot_opts);
+#ifdef CONFIG_SECCOMP
  qemu_add_opts(&qemu_sandbox_opts);
+#endif
  qemu_add_opts(&qemu_add_fd_opts);
  qemu_add_opts(&qemu_object_opts);
  qemu_add_opts(&qemu_tpmdev_opts);
@@ -4071,10 +4072,12 @@ int main(int argc, char **argv, char **envp)
  exit(1);
  }
  
+#ifdef CONFIG_SECCOMP

  if (qemu_opts_foreach(qemu_find_opts("sandbox"),
parse_sandbox, NULL, NULL)) {
  exit(1);
  }
+#endif
  
  if (qemu_opts_foreach(qemu_find_opts("name"),

parse_name, NULL, NULL)) {
--
Yi Min


I just wanted a review from Ján, since he is the author of the original libvirt
patch. Does this breaks libvirt logic in any way? If not, ACK on this patch.



OK.




Re: [Qemu-devel] [PATCH v2 1/1] sandbox: disable -sandbox if CONFIG_SECCOMP undefined

2018-05-19 Thread Yi Min Zhao



在 2018/5/18 下午9:07, Ján Tomko 写道:

On Fri, May 18, 2018 at 11:19:16AM +0200, Eduardo Otubo wrote:

On 18/05/2018 - 09:52:12, Ján Tomko wrote:

On Thu, May 17, 2018 at 02:41:09PM +0200, Eduardo Otubo wrote:
> On 15/05/2018 - 19:33:48, Yi Min Zhao wrote:
> > If CONFIG_SECCOMP is undefined, the option 'elevateprivileges' 
remains
> > compiled. This would make libvirt set the corresponding 
capability and
> > then trigger the guest startup fails. So this patch excludes the 
code

> > regarding seccomp staff if CONFIG_SECCOMP is undefined.
>
> Just a sugestion for the next patch you send: If it's a single 
patch, you don't
> need to format it with a cover-letter. Just put all the 
description in the body,
> or if you need to add a text that shouldn't be included in the 
commit message,

> just add it after the "---" after Signed-off-by.
>
> >
> > Signed-off-by: Yi Min Zhao 
> > ---
> >  vl.c | 13 -
> >  1 file changed, 8 insertions(+), 5 deletions(-)
> >



[...]


Current libvirt logic assumes the -sandbox option is always present.
(IIRC it was introduced in QEMU 1.1 and when we switched from help
scraping to capability probing via QMP for QEMU 1.2, there was no
way to detect it)

This patch fixes the usage of QEMU new enough for seccomp blacklist
(where libvirt enables the sandbox by default),
but breaks the usage of QEMU with compiled out sandbox and
setting
 seccomp_sandbox = 0
in libvirt's qemu.conf:

error: internal error: process exited while connecting to monitor:
qemu-git: -sandbox off: There is no option group 'sandbox'


But now libvirt requires QEMU >= 1.5.0 which already supports
query-command-line-options, so if you want the option gone completely
--without-seccomp, I can add the code that probes for it and
make seccomp_sandbox = 0 a no-op if it's compiled out.


This looks like a good solution for the libvirt side. Can you add 
this support

so we can merge this fix?



Patches proposed:
https://www.redhat.com/archives/libvir-list/2018-May/msg01430.html

Jano

Thanks for your work!




Re: [Qemu-devel] [PATCH v2 1/1] sandbox: disable -sandbox if CONFIG_SECCOMP undefined

2018-05-23 Thread Yi Min Zhao



在 2018/5/23 下午3:47, Ján Tomko 写道:

On Sat, May 19, 2018 at 04:20:37PM +0800, Yi Min Zhao wrote:



在 2018/5/18 下午9:07, Ján Tomko 写道:

On Fri, May 18, 2018 at 11:19:16AM +0200, Eduardo Otubo wrote:

On 18/05/2018 - 09:52:12, Ján Tomko wrote:

But now libvirt requires QEMU >= 1.5.0 which already supports
query-command-line-options, so if you want the option gone completely
--without-seccomp, I can add the code that probes for it and
make seccomp_sandbox = 0 a no-op if it's compiled out.


This looks like a good solution for the libvirt side. Can you add
this support
so we can merge this fix?



Patches proposed:
https://www.redhat.com/archives/libvir-list/2018-May/msg01430.html

Jano

Thanks for your work!


Now pushed in libvirt master:
commit b87222a90919040c12fb6d7c8dcc20f944a66495
Author: Ján Tomko 
AuthorDate: 2018-05-18 14:57:51 +0200
Commit: Ján Tomko 
CommitDate: 2018-05-23 09:45:48 +0200

   qemu: only pass -sandbox off if supported

   This way we don't rely on QEMU supplying the -sandbox option
   without CONFIG_SECCOMP.

   Signed-off-by: Ján Tomko 
   Reviewed-by: John Ferlan 

git describe: v4.3.0-258-gb87222a909
https://libvirt.org/git/?p=libvirt.git;a=commitdiff;h=b87222a90919040c12fb6d7c8dcc20f944a66495 



Jano
Thanks! But I have not got response from Paolo.  I have added him to CC 
list.





Re: [Qemu-devel] [PATCH v2 1/1] sandbox: disable -sandbox if CONFIG_SECCOMP undefined

2018-05-23 Thread Yi Min Zhao



在 2018/5/23 下午6:33, Eduardo Otubo 写道:

On 05/23/2018 11:16 AM, Yi Min Zhao wrote:



在 2018/5/23 下午3:47, Ján Tomko 写道:

On Sat, May 19, 2018 at 04:20:37PM +0800, Yi Min Zhao wrote:



在 2018/5/18 下午9:07, Ján Tomko 写道:

On Fri, May 18, 2018 at 11:19:16AM +0200, Eduardo Otubo wrote:

On 18/05/2018 - 09:52:12, Ján Tomko wrote:

But now libvirt requires QEMU >= 1.5.0 which already supports
query-command-line-options, so if you want the option gone 
completely

--without-seccomp, I can add the code that probes for it and
make seccomp_sandbox = 0 a no-op if it's compiled out.


This looks like a good solution for the libvirt side. Can you add
this support
so we can merge this fix?



Patches proposed:
https://www.redhat.com/archives/libvir-list/2018-May/msg01430.html

Jano

Thanks for your work!


Now pushed in libvirt master:
commit b87222a90919040c12fb6d7c8dcc20f944a66495
Author: Ján Tomko 
AuthorDate: 2018-05-18 14:57:51 +0200
Commit: Ján Tomko 
CommitDate: 2018-05-23 09:45:48 +0200

   qemu: only pass -sandbox off if supported

   This way we don't rely on QEMU supplying the -sandbox option
   without CONFIG_SECCOMP.

   Signed-off-by: Ján Tomko 
   Reviewed-by: John Ferlan 

git describe: v4.3.0-258-gb87222a909
https://libvirt.org/git/?p=libvirt.git;a=commitdiff;h=b87222a90919040c12fb6d7c8dcc20f944a66495 



Jano
Thanks! But I have not got response from Paolo.  I have added him to 
CC list.


 I'll just wait one more ACK and will send a pull request on the 
seccomp queue. Thanks for the contribution.




So... what I should do is wait?




Re: [Qemu-devel] [PATCH v2 1/1] sandbox: disable -sandbox if CONFIG_SECCOMP undefined

2018-05-24 Thread Yi Min Zhao



在 2018/5/24 下午9:40, Paolo Bonzini 写道:

On 24/05/2018 09:53, Eduardo Otubo wrote:

Thanks! But I have not got response from Paolo.  I have added him to
CC list.


  I'll just wait one more ACK and will send a pull request on the
seccomp queue. Thanks for the contribution.



So... what I should do is wait?


Yes, even though I think we're safe to proceed without his explicit ack.

The patch is okay; however, as a follow-up, you could consider moving
all the CONFIG_SECCOMP code to qemu-seccomp.c.

This way, the only #ifdef remains the one around qemu_opts_foreach.

Paolo


Thanks for your comment! Indeed, moving to the single C file is much 
more clear.

I will do this after this patch.

@Otubo, what about next step?




[Qemu-devel] [PATCH] s390x/pci: add common fmb

2018-09-04 Thread Yi Min Zhao
Common function measurement block is used to report counters of
successfully issued pcilg/stg/stb and rpcit instructions. This patch
introduces a new struct ZpciFmb and schedules a timer callback to
copy fmb to the guest memory at a interval time which is set to
4s by default. While attemping to update fmb failed, an event error
would be generated. After pcilg/stg/stb and rpcit interception
handlers issue successfully, increase the related counter. The guest
could pass null address to switch off FMB and stop corresponding
timer.

Signed-off-by: Yi Min Zhao 
Reviewed-by: Pierre Morel
---
 hw/s390x/s390-pci-bus.c  |   3 +-
 hw/s390x/s390-pci-bus.h  |  24 +++
 hw/s390x/s390-pci-inst.c | 105 +--
 hw/s390x/s390-pci-inst.h |   1 +
 4 files changed, 129 insertions(+), 4 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index e3e0ebb7f6..7bd0b9d1e5 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -967,6 +967,7 @@ static void s390_pcihost_hot_unplug(HotplugHandler 
*hotplug_dev,
 bus = pci_get_bus(pci_dev);
 devfn = pci_dev->devfn;
 object_unparent(OBJECT(pci_dev));
+fmb_timer_free(pbdev);
 s390_pci_msix_free(pbdev);
 s390_pci_iommu_free(s, bus, devfn);
 pbdev->pdev = NULL;
@@ -1139,7 +1140,7 @@ static void s390_pci_device_reset(DeviceState *dev)
 pci_dereg_ioat(pbdev->iommu);
 }
 
-pbdev->fmb_addr = 0;
+fmb_timer_free(pbdev);
 }
 
 static void s390_pci_get_fid(Object *obj, Visitor *v, const char *name,
diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
index 1f7f9b5814..fdf13a19c0 100644
--- a/hw/s390x/s390-pci-bus.h
+++ b/hw/s390x/s390-pci-bus.h
@@ -286,6 +286,28 @@ typedef struct S390PCIIOMMUTable {
 S390PCIIOMMU *iommu[PCI_SLOT_MAX];
 } S390PCIIOMMUTable;
 
+/* Function Measurement Block */
+#define DEFAULT_MUI 4000
+#define UPDATE_U_BIT 0x1ULL
+#define FMBK_MASK 0xfULL
+
+typedef struct ZpciFmbFmt0 {
+uint64_t dma_rbytes;
+uint64_t dma_wbytes;
+} ZpciFmbFmt0;
+
+typedef struct ZpciFmb {
+uint8_t format;
+uint8_t fmt_ind[3];
+uint32_t sample;
+uint64_t last_update;
+uint64_t ld_ops;
+uint64_t st_ops;
+uint64_t stb_ops;
+uint64_t rpcit_ops;
+ZpciFmbFmt0 fmt0;
+} QEMU_PACKED __attribute((__aligned__(8))) ZpciFmb;
+
 struct S390PCIBusDevice {
 DeviceState qdev;
 PCIDevice *pdev;
@@ -297,6 +319,8 @@ struct S390PCIBusDevice {
 uint32_t fid;
 bool fid_defined;
 uint64_t fmb_addr;
+ZpciFmb fmb;
+QEMUTimer *fmb_timer;
 uint8_t isc;
 uint16_t noi;
 uint16_t maxstbl;
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index 7b61367ee3..1ed5cb91d0 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -19,6 +19,7 @@
 #include "exec/memory-internal.h"
 #include "qemu/error-report.h"
 #include "sysemu/hw_accel.h"
+#include "hw/s390x/tod.h"
 
 #ifndef DEBUG_S390PCI_INST
 #define DEBUG_S390PCI_INST  0
@@ -293,7 +294,7 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
 resgrp->fr = 1;
 stq_p(&resgrp->dasm, 0);
 stq_p(&resgrp->msia, ZPCI_MSI_ADDR);
-stw_p(&resgrp->mui, 0);
+stw_p(&resgrp->mui, DEFAULT_MUI);
 stw_p(&resgrp->i, 128);
 stw_p(&resgrp->maxstbl, 128);
 resgrp->version = 0;
@@ -456,6 +457,10 @@ int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2, uintptr_t ra)
 return 0;
 }
 
+if (pbdev->fmb_addr) {
+pbdev->fmb.ld_ops++;
+}
+
 env->regs[r1] = data;
 setcc(cpu, ZPCI_PCI_LS_OK);
 return 0;
@@ -561,6 +566,10 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2, uintptr_t ra)
 return 0;
 }
 
+if (pbdev->fmb_addr) {
+pbdev->fmb.st_ops++;
+}
+
 setcc(cpu, ZPCI_PCI_LS_OK);
 return 0;
 }
@@ -681,6 +690,9 @@ err:
 s390_set_status_code(env, r1, ZPCI_PCI_ST_FUNC_IN_ERR);
 s390_pci_generate_error_event(error, pbdev->fh, pbdev->fid, start, 0);
 } else {
+if (pbdev->fmb_addr) {
+pbdev->fmb.rpcit_ops++;
+}
 setcc(cpu, ZPCI_PCI_LS_OK);
 }
 return 0;
@@ -783,6 +795,10 @@ int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r3, uint64_t gaddr,
 }
 }
 
+if (pbdev->fmb_addr) {
+pbdev->fmb.stb_ops++;
+}
+
 setcc(cpu, ZPCI_PCI_LS_OK);
 return 0;
 
@@ -889,6 +905,63 @@ void pci_dereg_ioat(S390PCIIOMMU *iommu)
 iommu->g_iota = 0;
 }
 
+void fmb_timer_free(S390PCIBusDevice *pbdev)
+{
+if (pbdev->fmb_timer) {
+timer_del(pbdev->fmb_timer);
+timer_free(pbdev->fmb_timer);
+pbdev->fmb_timer = NULL;
+}
+pbdev->fmb_addr = 0;
+memset(&pbdev->fmb, 0, sizeof(ZpciFmb));
+}

[Qemu-devel] [RFC PATCH 1/1] s390x/pci: add common fmb

2018-04-27 Thread Yi Min Zhao
Common function measurement block is used to report counters of
successfully issued pcilg/stg/stb and rpcit instructions. This patch
introduces a new struct ZpciFmb and schedules a timer callback to
copy fmb to the guest memory at a interval time which is set to
4000ms by default. While attemping to update fmb failed, an event
error would be generated. After pcilg/stg/stb and rpcit interception
handlers issue successfully, increase the related counter.

Signed-off-by: Yi Min Zhao 
---
 hw/s390x/s390-pci-bus.c  |  3 ++-
 hw/s390x/s390-pci-bus.h  | 16 +
 hw/s390x/s390-pci-inst.c | 60 +++-
 hw/s390x/s390-pci-inst.h |  1 +
 4 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 10da87458e..62e121dcf6 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -967,6 +967,7 @@ static void s390_pcihost_hot_unplug(HotplugHandler 
*hotplug_dev,
 bus = pci_get_bus(pci_dev);
 devfn = pci_dev->devfn;
 object_unparent(OBJECT(pci_dev));
+s390_pci_fmb_free(pbdev);
 s390_pci_msix_free(pbdev);
 s390_pci_iommu_free(s, bus, devfn);
 pbdev->pdev = NULL;
@@ -1139,7 +1140,7 @@ static void s390_pci_device_reset(DeviceState *dev)
 pci_dereg_ioat(pbdev->iommu);
 }
 
-pbdev->fmb_addr = 0;
+s390_pci_fmb_free(pbdev);
 }
 
 static void s390_pci_get_fid(Object *obj, Visitor *v, const char *name,
diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
index 1f7f9b5814..c280dfaacc 100644
--- a/hw/s390x/s390-pci-bus.h
+++ b/hw/s390x/s390-pci-bus.h
@@ -286,6 +286,20 @@ typedef struct S390PCIIOMMUTable {
 S390PCIIOMMU *iommu[PCI_SLOT_MAX];
 } S390PCIIOMMUTable;
 
+/* Function Measurement Block */
+#define DEFAULT_MUI 4000
+#define UPDATE_TIME_MASK (~0x1ULL)
+typedef struct ZpciFmb {
+uint32_t format  : 8;
+uint32_t fmt_ind : 24;
+uint32_t sample;
+uint64_t last_update;
+uint64_t ld_ops;
+uint64_t st_ops;
+uint64_t stb_ops;
+uint64_t rpcit_ops;
+} QEMU_PACKED ZpciFmb;
+
 struct S390PCIBusDevice {
 DeviceState qdev;
 PCIDevice *pdev;
@@ -297,6 +311,8 @@ struct S390PCIBusDevice {
 uint32_t fid;
 bool fid_defined;
 uint64_t fmb_addr;
+ZpciFmb fmb;
+QEMUTimer *fmb_timer;
 uint8_t isc;
 uint16_t noi;
 uint16_t maxstbl;
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index 3fcc330fe3..3b64ed0960 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -14,6 +14,7 @@
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "cpu.h"
+#include "internal.h"
 #include "s390-pci-inst.h"
 #include "s390-pci-bus.h"
 #include "exec/memory-internal.h"
@@ -295,7 +296,7 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
 resgrp->fr = 1;
 stq_p(&resgrp->dasm, 0);
 stq_p(&resgrp->msia, ZPCI_MSI_ADDR);
-stw_p(&resgrp->mui, 0);
+stw_p(&resgrp->mui, DEFAULT_MUI);
 stw_p(&resgrp->i, 128);
 stw_p(&resgrp->maxstbl, 128);
 resgrp->version = 0;
@@ -460,6 +461,10 @@ int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2, uintptr_t ra)
 return 0;
 }
 
+if (pbdev->fmb_addr) {
+pbdev->fmb.ld_ops++;
+}
+
 env->regs[r1] = data;
 setcc(cpu, ZPCI_PCI_LS_OK);
 return 0;
@@ -567,6 +572,10 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2, uintptr_t ra)
 return 0;
 }
 
+if (pbdev->fmb_addr) {
+pbdev->fmb.st_ops++;
+}
+
 setcc(cpu, ZPCI_PCI_LS_OK);
 return 0;
 }
@@ -689,6 +698,9 @@ err:
 s390_set_status_code(env, r1, ZPCI_PCI_ST_FUNC_IN_ERR);
 s390_pci_generate_error_event(error, pbdev->fh, pbdev->fid, start, 0);
 } else {
+if (pbdev->fmb_addr) {
+pbdev->fmb.rpcit_ops++;
+}
 setcc(cpu, ZPCI_PCI_LS_OK);
 }
 return 0;
@@ -740,6 +752,8 @@ int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r3, uint64_t gaddr,
 break;
 }
 
+atomic_inc(&pbdev->fmb.stb_ops);
+
 if (pcias > ZPCI_IO_BAR_MAX) {
 DPRINTF("pcistb invalid space\n");
 setcc(cpu, ZPCI_PCI_LS_ERR);
@@ -896,6 +910,42 @@ void pci_dereg_ioat(S390PCIIOMMU *iommu)
 iommu->g_iota = 0;
 }
 
+void s390_pci_fmb_free(S390PCIBusDevice *pbdev)
+{
+if (!pbdev) {
+return;
+}
+
+if (pbdev->fmb_timer) {
+timer_del(pbdev->fmb_timer);
+timer_free(pbdev->fmb_timer);
+pbdev->fmb_timer = NULL;
+}
+pbdev->fmb_addr = 0;
+memset(&pbdev->fmb, 0, sizeof(ZpciFmb));
+}
+
+static void fmb_update(void *opaque)
+{
+S390PCIBusDevice *pbdev = opaque;
+MemTxResult ret;
+
+pbdev->fmb.sa

[Qemu-devel] [RFC PATCH 0/1] s390x/pci: add common fmb

2018-04-27 Thread Yi Min Zhao
Currently the code misses FMB functionality. This patch fixup this. Only common
FMB is supported now.

bb/zyimin/fmb

Yi Min Zhao (1):
  s390x/pci: add common fmb

 hw/s390x/s390-pci-bus.c  |  3 ++-
 hw/s390x/s390-pci-bus.h  | 16 +
 hw/s390x/s390-pci-inst.c | 60 +++-
 hw/s390x/s390-pci-inst.h |  1 +
 4 files changed, 78 insertions(+), 2 deletions(-)

-- 
2.15.1 (Apple Git-101)




Re: [Qemu-devel] [RFC PATCH 0/1] s390x/pci: add common fmb

2018-04-27 Thread Yi Min Zhao

Wrong operation. Please ignore this series.


在 2018/4/27 下午6:02, Yi Min Zhao 写道:

Currently the code misses FMB functionality. This patch fixup this. Only common
FMB is supported now.

bb/zyimin/fmb

Yi Min Zhao (1):
   s390x/pci: add common fmb

  hw/s390x/s390-pci-bus.c  |  3 ++-
  hw/s390x/s390-pci-bus.h  | 16 +
  hw/s390x/s390-pci-inst.c | 60 +++-
  hw/s390x/s390-pci-inst.h |  1 +
  4 files changed, 78 insertions(+), 2 deletions(-)






Re: [Qemu-devel] [PATCH v2 1/1] sandbox: disable -sandbox if CONFIG_SECCOMP undefined

2018-05-28 Thread Yi Min Zhao



在 2018/5/25 下午5:36, Eduardo Otubo 写道:

On 05/25/2018 06:23 AM, Yi Min Zhao wrote:



在 2018/5/24 下午9:40, Paolo Bonzini 写道:

On 24/05/2018 09:53, Eduardo Otubo wrote:
Thanks! But I have not got response from Paolo.  I have added 
him to

CC list.


  I'll just wait one more ACK and will send a pull request on the
seccomp queue. Thanks for the contribution.



So... what I should do is wait?

Yes, even though I think we're safe to proceed without his explicit 
ack.

The patch is okay; however, as a follow-up, you could consider moving
all the CONFIG_SECCOMP code to qemu-seccomp.c.

This way, the only #ifdef remains the one around qemu_opts_foreach.

Paolo


Thanks for your comment! Indeed, moving to the single C file is much 
more clear.

I will do this after this patch.

@Otubo, what about next step?


If you're willing to send v3 with the changes Paolo suggested, I can 
wait to send the pull request. No worries.




OK. I will update the new version with Paolo's suggestion.




[Qemu-devel] [PATCH v3] sandbox: disable -sandbox if CONFIG_SECCOMP undefined

2018-05-29 Thread Yi Min Zhao
If CONFIG_SECCOMP is undefined, the option 'elevateprivileges' remains
compiled. This would make libvirt set the corresponding capability and
then trigger failure during guest startup. This patch moves the code
regarding seccomp command line options to qemu-seccomp.c file and
wraps qemu_opts_foreach finding sandbox option with CONFIG_SECCOMP.
Because parse_sandbox() is moved into qemu-seccomp.c file, change
seccomp_start() to static function.

Signed-off-by: Yi Min Zhao 
---
1. Problem Description
==
If QEMU is built without seccomp support, 'elevateprivileges' remains compiled.
This option of sandbox is treated as an indication for seccomp blacklist support
in libvirt. This behavior is introduced by the libvirt commits 31ca6a5 and
3527f9d. It would make libvirt build wrong QEMU cmdline, and then the guest
startup would fail.

2. Libvirt Log
==
qemu-system-s390x: -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\
resourcecontrol=deny: seccomp support is disabled

3. Fixup

Move the code related ot sandbox to qemu-seccomp.c file and wrap them with
CONFIG_SECCOMP. So compile the code related to sandbox only when
CONFIG_SECCOMP is defined.
---
 include/sysemu/seccomp.h |   3 +-
 qemu-seccomp.c   | 121 ++-
 vl.c | 118 +
 3 files changed, 124 insertions(+), 118 deletions(-)

diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
index 9b092aa23f..fe859894f6 100644
--- a/include/sysemu/seccomp.h
+++ b/include/sysemu/seccomp.h
@@ -21,5 +21,6 @@
 #define QEMU_SECCOMP_SET_SPAWN   (1 << 3)
 #define QEMU_SECCOMP_SET_RESOURCECTL (1 << 4)
 
-int seccomp_start(uint32_t seccomp_opts);
+int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp);
+
 #endif
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index b770a77d33..148e4c6f24 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -13,6 +13,11 @@
  * GNU GPL, version 2 or (at your option) any later version.
  */
 #include "qemu/osdep.h"
+#include "qemu/config-file.h"
+#include "qemu/option.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include 
 #include 
 #include "sysemu/seccomp.h"
 
@@ -96,7 +101,7 @@ static const struct QemuSeccompSyscall blacklist[] = {
 };
 
 
-int seccomp_start(uint32_t seccomp_opts)
+static int seccomp_start(uint32_t seccomp_opts)
 {
 int rc = 0;
 unsigned int i = 0;
@@ -125,3 +130,117 @@ int seccomp_start(uint32_t seccomp_opts)
 seccomp_release(ctx);
 return rc;
 }
+
+#ifdef CONFIG_SECCOMP
+int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
+{
+if (qemu_opt_get_bool(opts, "enable", false)) {
+uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT
+| QEMU_SECCOMP_SET_OBSOLETE;
+const char *value = NULL;
+
+value = qemu_opt_get(opts, "obsolete");
+if (value) {
+if (g_str_equal(value, "allow")) {
+seccomp_opts &= ~QEMU_SECCOMP_SET_OBSOLETE;
+} else if (g_str_equal(value, "deny")) {
+/* this is the default option, this if is here
+ * to provide a little bit of consistency for
+ * the command line */
+} else {
+error_report("invalid argument for obsolete");
+return -1;
+}
+}
+
+value = qemu_opt_get(opts, "elevateprivileges");
+if (value) {
+if (g_str_equal(value, "deny")) {
+seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED;
+} else if (g_str_equal(value, "children")) {
+seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED;
+
+/* calling prctl directly because we're
+ * not sure if host has CAP_SYS_ADMIN set*/
+if (prctl(PR_SET_NO_NEW_PRIVS, 1)) {
+error_report("failed to set no_new_privs "
+ "aborting");
+return -1;
+}
+} else if (g_str_equal(value, "allow")) {
+/* default value */
+} else {
+error_report("invalid argument for elevateprivileges");
+return -1;
+}
+}
+
+value = qemu_opt_get(opts, "spawn");
+if (value) {
+if (g_str_equal(value, "deny")) {
+seccomp_opts |= QEMU_SECCOMP_SET_SPAWN;
+} else if (g_str_equal(value, "allow")) {
+/* default value */
+} else {
+error_report("invalid argument for spawn");
+return -1;
+}
+}
+
+value 

Re: [Qemu-devel] [PATCH v3] sandbox: disable -sandbox if CONFIG_SECCOMP undefined

2018-05-29 Thread Yi Min Zhao




在 2018/5/29 下午4:40, Ján Tomko 写道:

On Tue, May 29, 2018 at 03:31:40PM +0800, Yi Min Zhao wrote:

If CONFIG_SECCOMP is undefined, the option 'elevateprivileges' remains
compiled. This would make libvirt set the corresponding capability and
then trigger failure during guest startup. This patch moves the code
regarding seccomp command line options to qemu-seccomp.c file and
wraps qemu_opts_foreach finding sandbox option with CONFIG_SECCOMP.
Because parse_sandbox() is moved into qemu-seccomp.c file, change
seccomp_start() to static function.

Signed-off-by: Yi Min Zhao 
---
1. Problem Description
==
If QEMU is built without seccomp support, 'elevateprivileges' remains 
compiled.
This option of sandbox is treated as an indication for seccomp 
blacklist support
in libvirt. This behavior is introduced by the libvirt commits 
31ca6a5 and
3527f9d. It would make libvirt build wrong QEMU cmdline, and then the 
guest

startup would fail.

2. Libvirt Log
==
qemu-system-s390x: -sandbox 
on,obsolete=deny,elevateprivileges=deny,spawn=deny,\

resourcecontrol=deny: seccomp support is disabled

3. Fixup

Move the code related ot sandbox to qemu-seccomp.c file and wrap them 
with

CONFIG_SECCOMP. So compile the code related to sandbox only when
CONFIG_SECCOMP is defined.
---
include/sysemu/seccomp.h |   3 +-
qemu-seccomp.c   | 121 
++-
vl.c | 118 
+

3 files changed, 124 insertions(+), 118 deletions(-)



Reviewed-by: Ján Tomko 
Tested-by: Ján Tomko 

Jano

Thanks very much!




Re: [Qemu-devel] [PATCH v3] sandbox: disable -sandbox if CONFIG_SECCOMP undefined

2018-05-29 Thread Yi Min Zhao




在 2018/5/29 下午5:37, Paolo Bonzini 写道:

On 29/05/2018 09:31, Yi Min Zhao wrote:

If CONFIG_SECCOMP is undefined, the option 'elevateprivileges' remains
compiled. This would make libvirt set the corresponding capability and
then trigger failure during guest startup. This patch moves the code
regarding seccomp command line options to qemu-seccomp.c file and
wraps qemu_opts_foreach finding sandbox option with CONFIG_SECCOMP.
Because parse_sandbox() is moved into qemu-seccomp.c file, change
seccomp_start() to static function.

Signed-off-by: Yi Min Zhao 

I had to squash this in:

diff --git a/vl.c b/vl.c
index 1140feb227..66c17ff8f8 100644
--- a/vl.c
+++ b/vl.c
@@ -3842,11 +3842,16 @@ int main(int argc, char **argv, char **envp)
  qtest_log = optarg;
  break;
  case QEMU_OPTION_sandbox:
+#ifndef CONFIG_SECCOMP
  opts = qemu_opts_parse_noisily(qemu_find_opts("sandbox"),
 optarg, true);
  if (!opts) {
  exit(1);
  }
+#else
+error_report("-sandbox support is not enabled in this QEMU 
binary");
+exit(1);
+#endif
  break;
  case QEMU_OPTION_add_fd:
  #ifndef _WIN32


Otherwise "-sandbox" will crash with a NULL pointer dereference in a binary 
without
seccomp support.  Otherwise looks great, thanks!

Paolo
I have updated this. Thanks for your reminder! Could I add your s-o-b, 
r-b or ack?



---
1. Problem Description
==
If QEMU is built without seccomp support, 'elevateprivileges' remains compiled.
This option of sandbox is treated as an indication for seccomp blacklist support
in libvirt. This behavior is introduced by the libvirt commits 31ca6a5 and
3527f9d. It would make libvirt build wrong QEMU cmdline, and then the guest
startup would fail.

2. Libvirt Log
==
qemu-system-s390x: -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\
resourcecontrol=deny: seccomp support is disabled

3. Fixup

Move the code related ot sandbox to qemu-seccomp.c file and wrap them with
CONFIG_SECCOMP. So compile the code related to sandbox only when
CONFIG_SECCOMP is defined.
---
  include/sysemu/seccomp.h |   3 +-
  qemu-seccomp.c   | 121 ++-
  vl.c | 118 +
  3 files changed, 124 insertions(+), 118 deletions(-)

diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
index 9b092aa23f..fe859894f6 100644
--- a/include/sysemu/seccomp.h
+++ b/include/sysemu/seccomp.h
@@ -21,5 +21,6 @@
  #define QEMU_SECCOMP_SET_SPAWN   (1 << 3)
  #define QEMU_SECCOMP_SET_RESOURCECTL (1 << 4)
  
-int seccomp_start(uint32_t seccomp_opts);

+int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp);
+
  #endif
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index b770a77d33..148e4c6f24 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -13,6 +13,11 @@
   * GNU GPL, version 2 or (at your option) any later version.
   */
  #include "qemu/osdep.h"
+#include "qemu/config-file.h"
+#include "qemu/option.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include 
  #include 
  #include "sysemu/seccomp.h"
  
@@ -96,7 +101,7 @@ static const struct QemuSeccompSyscall blacklist[] = {

  };
  
  
-int seccomp_start(uint32_t seccomp_opts)

+static int seccomp_start(uint32_t seccomp_opts)
  {
  int rc = 0;
  unsigned int i = 0;
@@ -125,3 +130,117 @@ int seccomp_start(uint32_t seccomp_opts)
  seccomp_release(ctx);
  return rc;
  }
+
+#ifdef CONFIG_SECCOMP
+int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
+{
+if (qemu_opt_get_bool(opts, "enable", false)) {
+uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT
+| QEMU_SECCOMP_SET_OBSOLETE;
+const char *value = NULL;
+
+value = qemu_opt_get(opts, "obsolete");
+if (value) {
+if (g_str_equal(value, "allow")) {
+seccomp_opts &= ~QEMU_SECCOMP_SET_OBSOLETE;
+} else if (g_str_equal(value, "deny")) {
+/* this is the default option, this if is here
+ * to provide a little bit of consistency for
+ * the command line */
+} else {
+error_report("invalid argument for obsolete");
+return -1;
+}
+}
+
+value = qemu_opt_get(opts, "elevateprivileges");
+if (value) {
+if (g_str_equal(value, "deny")) {
+seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED;
+} else if (g_str_equal(value, "children")) {
+seccomp_

Re: [Qemu-devel] [PATCH v3] sandbox: disable -sandbox if CONFIG_SECCOMP undefined

2018-05-29 Thread Yi Min Zhao




在 2018/5/29 下午5:39, Eduardo Otubo 写道:

On 29/05/2018 - 15:31:40, Yi Min Zhao wrote:

If CONFIG_SECCOMP is undefined, the option 'elevateprivileges' remains
compiled. This would make libvirt set the corresponding capability and
then trigger failure during guest startup. This patch moves the code
regarding seccomp command line options to qemu-seccomp.c file and
wraps qemu_opts_foreach finding sandbox option with CONFIG_SECCOMP.
Because parse_sandbox() is moved into qemu-seccomp.c file, change
seccomp_start() to static function.

Signed-off-by: Yi Min Zhao 
---
1. Problem Description
==
If QEMU is built without seccomp support, 'elevateprivileges' remains compiled.
This option of sandbox is treated as an indication for seccomp blacklist support
in libvirt. This behavior is introduced by the libvirt commits 31ca6a5 and
3527f9d. It would make libvirt build wrong QEMU cmdline, and then the guest
startup would fail.

2. Libvirt Log
==
qemu-system-s390x: -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\
resourcecontrol=deny: seccomp support is disabled

3. Fixup

Move the code related ot sandbox to qemu-seccomp.c file and wrap them with
CONFIG_SECCOMP. So compile the code related to sandbox only when
CONFIG_SECCOMP is defined.
---
  include/sysemu/seccomp.h |   3 +-
  qemu-seccomp.c   | 121 ++-
  vl.c | 118 +
  3 files changed, 124 insertions(+), 118 deletions(-)

diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
index 9b092aa23f..fe859894f6 100644
--- a/include/sysemu/seccomp.h
+++ b/include/sysemu/seccomp.h
@@ -21,5 +21,6 @@
  #define QEMU_SECCOMP_SET_SPAWN   (1 << 3)
  #define QEMU_SECCOMP_SET_RESOURCECTL (1 << 4)
  
-int seccomp_start(uint32_t seccomp_opts);

+int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp);
+
  #endif
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index b770a77d33..148e4c6f24 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -13,6 +13,11 @@
   * GNU GPL, version 2 or (at your option) any later version.
   */
  #include "qemu/osdep.h"
+#include "qemu/config-file.h"
+#include "qemu/option.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include 
  #include 
  #include "sysemu/seccomp.h"
  
@@ -96,7 +101,7 @@ static const struct QemuSeccompSyscall blacklist[] = {

  };
  
  
-int seccomp_start(uint32_t seccomp_opts)

+static int seccomp_start(uint32_t seccomp_opts)
  {
  int rc = 0;
  unsigned int i = 0;
@@ -125,3 +130,117 @@ int seccomp_start(uint32_t seccomp_opts)
  seccomp_release(ctx);
  return rc;
  }
+
+#ifdef CONFIG_SECCOMP
+int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
+{
+if (qemu_opt_get_bool(opts, "enable", false)) {
+uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT
+| QEMU_SECCOMP_SET_OBSOLETE;
+const char *value = NULL;
+
+value = qemu_opt_get(opts, "obsolete");
+if (value) {
+if (g_str_equal(value, "allow")) {
+seccomp_opts &= ~QEMU_SECCOMP_SET_OBSOLETE;
+} else if (g_str_equal(value, "deny")) {
+/* this is the default option, this if is here
+ * to provide a little bit of consistency for
+ * the command line */
+} else {
+error_report("invalid argument for obsolete");
+return -1;
+}
+}
+
+value = qemu_opt_get(opts, "elevateprivileges");
+if (value) {
+if (g_str_equal(value, "deny")) {
+seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED;
+} else if (g_str_equal(value, "children")) {
+seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED;
+
+/* calling prctl directly because we're
+ * not sure if host has CAP_SYS_ADMIN set*/
+if (prctl(PR_SET_NO_NEW_PRIVS, 1)) {
+error_report("failed to set no_new_privs "
+ "aborting");
+return -1;
+}
+} else if (g_str_equal(value, "allow")) {
+/* default value */
+} else {
+error_report("invalid argument for elevateprivileges");
+return -1;
+}
+}
+
+value = qemu_opt_get(opts, "spawn");
+if (value) {
+if (g_str_equal(value, "deny")) {
+seccomp_opts |= QEMU_SECCOMP_SET_SPAWN;
+} else if (g_str_equal(value, "allow")) {
+/* default value */
+} else {
+err

Re: [Qemu-devel] [PATCH v3] sandbox: disable -sandbox if CONFIG_SECCOMP undefined

2018-05-29 Thread Yi Min Zhao




在 2018/5/29 下午5:37, Paolo Bonzini 写道:

On 29/05/2018 09:31, Yi Min Zhao wrote:

If CONFIG_SECCOMP is undefined, the option 'elevateprivileges' remains
compiled. This would make libvirt set the corresponding capability and
then trigger failure during guest startup. This patch moves the code
regarding seccomp command line options to qemu-seccomp.c file and
wraps qemu_opts_foreach finding sandbox option with CONFIG_SECCOMP.
Because parse_sandbox() is moved into qemu-seccomp.c file, change
seccomp_start() to static function.

Signed-off-by: Yi Min Zhao 

I had to squash this in:

diff --git a/vl.c b/vl.c
index 1140feb227..66c17ff8f8 100644
--- a/vl.c
+++ b/vl.c
@@ -3842,11 +3842,16 @@ int main(int argc, char **argv, char **envp)
  qtest_log = optarg;
  break;
  case QEMU_OPTION_sandbox:
+#ifndef CONFIG_SECCOMP

One question, I guess you want to use #ifdef ?

  opts = qemu_opts_parse_noisily(qemu_find_opts("sandbox"),
 optarg, true);
  if (!opts) {
  exit(1);
  }
+#else
+error_report("-sandbox support is not enabled in this QEMU 
binary");
+exit(1);
+#endif
  break;
  case QEMU_OPTION_add_fd:
  #ifndef _WIN32


Otherwise "-sandbox" will crash with a NULL pointer dereference in a binary 
without
seccomp support.  Otherwise looks great, thanks!

Paolo


---
1. Problem Description
==
If QEMU is built without seccomp support, 'elevateprivileges' remains compiled.
This option of sandbox is treated as an indication for seccomp blacklist support
in libvirt. This behavior is introduced by the libvirt commits 31ca6a5 and
3527f9d. It would make libvirt build wrong QEMU cmdline, and then the guest
startup would fail.

2. Libvirt Log
==
qemu-system-s390x: -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\
resourcecontrol=deny: seccomp support is disabled

3. Fixup

Move the code related ot sandbox to qemu-seccomp.c file and wrap them with
CONFIG_SECCOMP. So compile the code related to sandbox only when
CONFIG_SECCOMP is defined.
---
  include/sysemu/seccomp.h |   3 +-
  qemu-seccomp.c   | 121 ++-
  vl.c | 118 +
  3 files changed, 124 insertions(+), 118 deletions(-)

diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
index 9b092aa23f..fe859894f6 100644
--- a/include/sysemu/seccomp.h
+++ b/include/sysemu/seccomp.h
@@ -21,5 +21,6 @@
  #define QEMU_SECCOMP_SET_SPAWN   (1 << 3)
  #define QEMU_SECCOMP_SET_RESOURCECTL (1 << 4)
  
-int seccomp_start(uint32_t seccomp_opts);

+int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp);
+
  #endif
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index b770a77d33..148e4c6f24 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -13,6 +13,11 @@
   * GNU GPL, version 2 or (at your option) any later version.
   */
  #include "qemu/osdep.h"
+#include "qemu/config-file.h"
+#include "qemu/option.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include 
  #include 
  #include "sysemu/seccomp.h"
  
@@ -96,7 +101,7 @@ static const struct QemuSeccompSyscall blacklist[] = {

  };
  
  
-int seccomp_start(uint32_t seccomp_opts)

+static int seccomp_start(uint32_t seccomp_opts)
  {
  int rc = 0;
  unsigned int i = 0;
@@ -125,3 +130,117 @@ int seccomp_start(uint32_t seccomp_opts)
  seccomp_release(ctx);
  return rc;
  }
+
+#ifdef CONFIG_SECCOMP
+int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
+{
+if (qemu_opt_get_bool(opts, "enable", false)) {
+uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT
+| QEMU_SECCOMP_SET_OBSOLETE;
+const char *value = NULL;
+
+value = qemu_opt_get(opts, "obsolete");
+if (value) {
+if (g_str_equal(value, "allow")) {
+seccomp_opts &= ~QEMU_SECCOMP_SET_OBSOLETE;
+} else if (g_str_equal(value, "deny")) {
+/* this is the default option, this if is here
+ * to provide a little bit of consistency for
+ * the command line */
+} else {
+error_report("invalid argument for obsolete");
+return -1;
+}
+}
+
+value = qemu_opt_get(opts, "elevateprivileges");
+if (value) {
+if (g_str_equal(value, "deny")) {
+seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED;
+} else if (g_str_equal(value, "children")) {
+seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED;
+
+/* calling

Re: [Qemu-devel] [PATCH v3] sandbox: disable -sandbox if CONFIG_SECCOMP undefined

2018-05-30 Thread Yi Min Zhao




在 2018/5/30 下午6:54, Eduardo Otubo 写道:

On 29/05/2018 - 18:05:25, Yi Min Zhao wrote:


在 2018/5/29 下午5:37, Paolo Bonzini 写道:

On 29/05/2018 09:31, Yi Min Zhao wrote:

If CONFIG_SECCOMP is undefined, the option 'elevateprivileges' remains
compiled. This would make libvirt set the corresponding capability and
then trigger failure during guest startup. This patch moves the code
regarding seccomp command line options to qemu-seccomp.c file and
wraps qemu_opts_foreach finding sandbox option with CONFIG_SECCOMP.
Because parse_sandbox() is moved into qemu-seccomp.c file, change
seccomp_start() to static function.

Signed-off-by: Yi Min Zhao 

I had to squash this in:

diff --git a/vl.c b/vl.c
index 1140feb227..66c17ff8f8 100644
--- a/vl.c
+++ b/vl.c
@@ -3842,11 +3842,16 @@ int main(int argc, char **argv, char **envp)
   qtest_log = optarg;
   break;
   case QEMU_OPTION_sandbox:
+#ifndef CONFIG_SECCOMP

One question, I guess you want to use #ifdef ?

Yep, I guess he meant #ifdef.

Can you send a v4 with a cleaned up version? Also fixing a typo on the text
(elevateDprivileges).

Thanks for the contribution.

Sure. Thanks!



   opts = qemu_opts_parse_noisily(qemu_find_opts("sandbox"),
  optarg, true);
   if (!opts) {
   exit(1);
   }
+#else
+error_report("-sandbox support is not enabled in this QEMU 
binary");
+exit(1);
+#endif
   break;
   case QEMU_OPTION_add_fd:
   #ifndef _WIN32


Otherwise "-sandbox" will crash with a NULL pointer dereference in a binary 
without
seccomp support.  Otherwise looks great, thanks!

Paolo


---
1. Problem Description
==
If QEMU is built without seccomp support, 'elevateprivileges' remains compiled.
This option of sandbox is treated as an indication for seccomp blacklist support
in libvirt. This behavior is introduced by the libvirt commits 31ca6a5 and
3527f9d. It would make libvirt build wrong QEMU cmdline, and then the guest
startup would fail.

2. Libvirt Log
==
qemu-system-s390x: -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\
resourcecontrol=deny: seccomp support is disabled

3. Fixup

Move the code related ot sandbox to qemu-seccomp.c file and wrap them with
CONFIG_SECCOMP. So compile the code related to sandbox only when
CONFIG_SECCOMP is defined.
---
   include/sysemu/seccomp.h |   3 +-
   qemu-seccomp.c   | 121 
++-
   vl.c | 118 +
   3 files changed, 124 insertions(+), 118 deletions(-)

diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
index 9b092aa23f..fe859894f6 100644
--- a/include/sysemu/seccomp.h
+++ b/include/sysemu/seccomp.h
@@ -21,5 +21,6 @@
   #define QEMU_SECCOMP_SET_SPAWN   (1 << 3)
   #define QEMU_SECCOMP_SET_RESOURCECTL (1 << 4)
-int seccomp_start(uint32_t seccomp_opts);
+int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp);
+
   #endif
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index b770a77d33..148e4c6f24 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -13,6 +13,11 @@
* GNU GPL, version 2 or (at your option) any later version.
*/
   #include "qemu/osdep.h"
+#include "qemu/config-file.h"
+#include "qemu/option.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include 
   #include 
   #include "sysemu/seccomp.h"
@@ -96,7 +101,7 @@ static const struct QemuSeccompSyscall blacklist[] = {
   };
-int seccomp_start(uint32_t seccomp_opts)
+static int seccomp_start(uint32_t seccomp_opts)
   {
   int rc = 0;
   unsigned int i = 0;
@@ -125,3 +130,117 @@ int seccomp_start(uint32_t seccomp_opts)
   seccomp_release(ctx);
   return rc;
   }
+
+#ifdef CONFIG_SECCOMP
+int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
+{
+if (qemu_opt_get_bool(opts, "enable", false)) {
+uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT
+| QEMU_SECCOMP_SET_OBSOLETE;
+const char *value = NULL;
+
+value = qemu_opt_get(opts, "obsolete");
+if (value) {
+if (g_str_equal(value, "allow")) {
+seccomp_opts &= ~QEMU_SECCOMP_SET_OBSOLETE;
+} else if (g_str_equal(value, "deny")) {
+/* this is the default option, this if is here
+ * to provide a little bit of consistency for
+ * the command line */
+} else {
+error_report("invalid argument for obsolete");
+return -1;
+}
+}
+
+value = qemu_opt_get(opts, "elevateprivileges"

Re: [Qemu-devel] [PATCH v3] sandbox: disable -sandbox if CONFIG_SECCOMP undefined

2018-05-30 Thread Yi Min Zhao




在 2018/5/30 下午6:54, Eduardo Otubo 写道:

Can you send a v4 with a cleaned up version? Also fixing a typo on the text
(elevateDprivileges).

Why elevateDprivileges? The option name in the code is 'elevateprivileges'.




[Qemu-devel] [PATCH v4] sandbox: disable -sandbox if CONFIG_SECCOMP undefined

2018-05-30 Thread Yi Min Zhao
If CONFIG_SECCOMP is undefined, the option 'elevateprivileges' remains
compiled. This would make libvirt set the corresponding capability and
then trigger failure during guest startup. This patch moves the code
regarding seccomp command line options to qemu-seccomp.c file and
wraps qemu_opts_foreach finding sandbox option with CONFIG_SECCOMP.
Because parse_sandbox() is moved into qemu-seccomp.c file, change
seccomp_start() to static function.

Signed-off-by: Yi Min Zhao 
Reviewed-by: Ján Tomko 
Tested-by: Ján Tomko 
Acked-by: Eduardo Otubo 
---
 include/sysemu/seccomp.h |   3 +-
 qemu-seccomp.c   | 121 -
 vl.c | 124 +++
 3 files changed, 130 insertions(+), 118 deletions(-)

diff --git a/include/sysemu/seccomp.h b/include/sysemu/seccomp.h
index 9b092aa23f..fe859894f6 100644
--- a/include/sysemu/seccomp.h
+++ b/include/sysemu/seccomp.h
@@ -21,5 +21,6 @@
 #define QEMU_SECCOMP_SET_SPAWN   (1 << 3)
 #define QEMU_SECCOMP_SET_RESOURCECTL (1 << 4)
 
-int seccomp_start(uint32_t seccomp_opts);
+int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp);
+
 #endif
diff --git a/qemu-seccomp.c b/qemu-seccomp.c
index b770a77d33..148e4c6f24 100644
--- a/qemu-seccomp.c
+++ b/qemu-seccomp.c
@@ -13,6 +13,11 @@
  * GNU GPL, version 2 or (at your option) any later version.
  */
 #include "qemu/osdep.h"
+#include "qemu/config-file.h"
+#include "qemu/option.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include 
 #include 
 #include "sysemu/seccomp.h"
 
@@ -96,7 +101,7 @@ static const struct QemuSeccompSyscall blacklist[] = {
 };
 
 
-int seccomp_start(uint32_t seccomp_opts)
+static int seccomp_start(uint32_t seccomp_opts)
 {
 int rc = 0;
 unsigned int i = 0;
@@ -125,3 +130,117 @@ int seccomp_start(uint32_t seccomp_opts)
 seccomp_release(ctx);
 return rc;
 }
+
+#ifdef CONFIG_SECCOMP
+int parse_sandbox(void *opaque, QemuOpts *opts, Error **errp)
+{
+if (qemu_opt_get_bool(opts, "enable", false)) {
+uint32_t seccomp_opts = QEMU_SECCOMP_SET_DEFAULT
+| QEMU_SECCOMP_SET_OBSOLETE;
+const char *value = NULL;
+
+value = qemu_opt_get(opts, "obsolete");
+if (value) {
+if (g_str_equal(value, "allow")) {
+seccomp_opts &= ~QEMU_SECCOMP_SET_OBSOLETE;
+} else if (g_str_equal(value, "deny")) {
+/* this is the default option, this if is here
+ * to provide a little bit of consistency for
+ * the command line */
+} else {
+error_report("invalid argument for obsolete");
+return -1;
+}
+}
+
+value = qemu_opt_get(opts, "elevateprivileges");
+if (value) {
+if (g_str_equal(value, "deny")) {
+seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED;
+} else if (g_str_equal(value, "children")) {
+seccomp_opts |= QEMU_SECCOMP_SET_PRIVILEGED;
+
+/* calling prctl directly because we're
+ * not sure if host has CAP_SYS_ADMIN set*/
+if (prctl(PR_SET_NO_NEW_PRIVS, 1)) {
+error_report("failed to set no_new_privs "
+ "aborting");
+return -1;
+}
+} else if (g_str_equal(value, "allow")) {
+/* default value */
+} else {
+error_report("invalid argument for elevateprivileges");
+return -1;
+}
+}
+
+value = qemu_opt_get(opts, "spawn");
+if (value) {
+if (g_str_equal(value, "deny")) {
+seccomp_opts |= QEMU_SECCOMP_SET_SPAWN;
+} else if (g_str_equal(value, "allow")) {
+/* default value */
+} else {
+error_report("invalid argument for spawn");
+return -1;
+}
+}
+
+value = qemu_opt_get(opts, "resourcecontrol");
+if (value) {
+if (g_str_equal(value, "deny")) {
+seccomp_opts |= QEMU_SECCOMP_SET_RESOURCECTL;
+} else if (g_str_equal(value, "allow")) {
+/* default value */
+} else {
+error_report("invalid argument for resourcecontrol");
+return -1;
+}
+}
+
+if (seccomp_start(seccomp_opts) < 0) {
+error_report("failed to install seccomp syscall filter "
+ "in the kernel");
+return -1;
+}
+}
+
+   

Re: [Qemu-devel] block ais migration for machines <= 2.9

2017-09-20 Thread Yi Min Zhao



在 2017/9/21 上午12:04, Dr. David Alan Gilbert 写道:

* Christian Borntraeger (borntrae...@de.ibm.com) wrote:

Something like the following seems to do the tricks.
Needs proper patch description, review, full test with different kernel 
versions.

Without knowing anything about 'ais' - will this break migration from
2.10 -> 2.10+this fix?

I think it doesn't break. I will have a try later.


Dave


diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 1c7af39..2ff32ba 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -212,6 +212,7 @@ static void ccw_machine_class_init(ObjectClass *oc, void 
*data)
  s390mc->cpu_model_allowed = true;
  s390mc->css_migration_enabled = true;
  s390mc->gs_allowed = true;
+s390mc->ais_allowed = true;
  mc->init = ccw_init;
  mc->reset = s390_machine_reset;
  mc->hot_add_cpu = s390_hot_add_cpu;
@@ -305,6 +306,11 @@ bool gs_allowed(void)
  return false;
  }
  
+bool ais_allowed(void)

+{
+return get_machine_class()->ais_allowed;
+}
+
  static char *machine_get_loadparm(Object *obj, Error **errp)
  {
  S390CcwMachineState *ms = S390_CCW_MACHINE(obj);
@@ -533,6 +539,7 @@ static void ccw_machine_2_9_class_options(MachineClass *mc)
  S390CcwMachineClass *s390mc = S390_MACHINE_CLASS(mc);
  
  s390mc->gs_allowed = false;

+s390mc->ais_allowed = false;
  ccw_machine_2_10_class_options(mc);
  SET_MACHINE_COMPAT(mc, CCW_COMPAT_2_9);
  s390mc->css_migration_enabled = false;
diff --git a/include/hw/s390x/s390-virtio-ccw.h 
b/include/hw/s390x/s390-virtio-ccw.h
index 41a9d28..bba8660 100644
--- a/include/hw/s390x/s390-virtio-ccw.h
+++ b/include/hw/s390x/s390-virtio-ccw.h
@@ -41,6 +41,7 @@ typedef struct S390CcwMachineClass {
  bool cpu_model_allowed;
  bool css_migration_enabled;
  bool gs_allowed;
+bool ais_allowed;
  } S390CcwMachineClass;
  
  /* runtime-instrumentation allowed by the machine */

@@ -49,6 +50,8 @@ bool ri_allowed(void);
  bool cpu_model_allowed(void);
  /* guarded-storage allowed by the machine */
  bool gs_allowed(void);
+/* ais allowed by the machine */
+bool ais_allowed(void);
  
  /**

   * Returns true if (vmstate based) migration of the channel subsystem
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index c4c5791..531d474 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -309,7 +309,9 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
  }
  
  /* Try to enable AIS facility */

-kvm_vm_enable_cap(s, KVM_CAP_S390_AIS, 0);
+if (ais_allowed()) {
+   kvm_vm_enable_cap(s, KVM_CAP_S390_AIS, 0);
+}
  
  qemu_mutex_init(&qemu_sigp_mutex);




--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK







Re: [Qemu-devel] [PATCH/RFC 2/3] s390x/ais: enable ais when migration is available

2017-09-26 Thread Yi Min Zhao



在 2017/9/25 下午7:47, Christian Borntraeger 写道:

On 09/25/2017 01:45 PM, Cornelia Huck wrote:

On Mon, 25 Sep 2017 12:12:49 +0200
Christian Borntraeger  wrote:


On 09/25/2017 12:07 PM, Cornelia Huck wrote:

On Fri, 22 Sep 2017 16:27:00 +0200
Halil Pasic  wrote:
   

One thing I would find very helpful is what do we expect to work and not
work for which version. Kind of a matrix. For instance should vfio pci
work for versions prior 2.11. I think in the not so distant past we
changed how SIC works (so it complains when we don't have ais).

A matrix sounds like a good idea.

I think we do not even need a matrix, a minimum level will suffice because...

I don't think we really ever had a setup that worked out of the box

exactly: ...it never worked until 2.10 and we do not have libvirt support yet.
Now with the fix 2.10 will also not work, so I think its fair to say

PCI passthrough via VFIO will be supported for
- KVM: host kernel >= 4.13
- TCG: TBD
- QEMU >= 2.11
- libvirt TBD

Make that zpci-per-se, no?

with KVM: host kernel >= 4.13 && QEMU >= 2.11
with TCG: tbd, I don't think anybody has time to wire this up for 2.11

Apropos libvirt: How will it determine whether zpci should be
supported? There are some old QEMU + KVM combinations out there that
will have a phb (but not be usable by stock Linux guests as the feature
bits are missing). Version fence? Check for cpu feature support?

I think for multibus or something like that Boris wanted to check for a version
anyway. So maybe 2.11 (now that 2.10 is broken regarding ais) as a minimum QEMU
level would make sense.



I think this makes sense. But I think I have to discuss this with Boris.




Re: [Qemu-devel] [PATCH v2 1/2] s390x/ais: enable ais when migration is available

2017-09-26 Thread Yi Min Zhao



在 2017/9/26 下午10:06, Christian Borntraeger 写道:


On 09/26/2017 03:43 PM, David Hildenbrand wrote:

+/* try enable the AIS facility */
+test_attr.group = KVM_DEV_FLIC_AISM_ALL;
+if (!ioctl(flic_state->fd, KVM_HAS_DEVICE_ATTR, test_attr)) {
+kvm_vm_enable_cap(kvm_state, KVM_CAP_S390_AIS, 0);
+}
+
  return;
  fail:
  error_propagate(errp, errp_local);
@@ -578,7 +584,7 @@ static void kvm_s390_flic_reset(DeviceState *dev)
  
  flic_disable_wait_pfault(flic);
  
-if (fs->ais_supported) {

+if (s390_has_feat(S390_FEAT_ADAPTER_INT_SUPPRESSION)) {

I don't remember if a reset is trigger on realization. Most probably
not, but it could be.

Would it hurt if the following code would not get called if the flic
hasn't been used yet? (possible reset before cpu model has been
initialized) - are kvm_s390_modify_ais_mode() calls required before ais
can be used for the first time?

Hmm, simm/nimm should default to zero in the kernel I guess. So I think it would
not hurt. Yi Min, correct?

Yes, the default mode is all interruption mode, IOW simm = 0 and nimm = 0.



Anyway it seems that reset is NOT called during realize, the first
call is

 #0  0x010e5178 kvm_s390_flic_reset (qemu-system-s390x)
 #1  0x0124bbc4 device_reset (qemu-system-s390x)
 #2  0x01248cd0 qdev_reset_one (qemu-system-s390x)
 #3  0x01249ea4 qdev_walk_children (qemu-system-s390x)
 #4  0x0124fd3a qbus_walk_children (qemu-system-s390x)
 #5  0x01248e6c qbus_reset_all (qemu-system-s390x)
 #6  0x01248eae qbus_reset_all_fn (qemu-system-s390x)
 #7  0x01250a60 qemu_devices_reset (qemu-system-s390x)
 #8  0x01143c80 s390_machine_reset (qemu-system-s390x)
 #9  0x011c5b72 qemu_system_reset (qemu-system-s390x)
 #10 0x011ceb8e main (qemu-system-s390x)
 #11 0x03ff947a289a __libc_start_main (libc.so.6)
 #12 0x01017646 _start (qemu-system-s390x)








Re: [Qemu-devel] [PATCH v2 1/2] s390x/ais: enable ais when migration is available

2017-09-26 Thread Yi Min Zhao



在 2017/9/26 下午9:36, Christian Borntraeger 写道:

@@ -557,6 +557,12 @@ static void kvm_s390_flic_realize(DeviceState *dev, Error 
**errp)
  test_attr.group = KVM_DEV_FLIC_CLEAR_IO_IRQ;
  flic_state->clear_io_supported = !ioctl(flic_state->fd,
  KVM_HAS_DEVICE_ATTR, test_attr);
+/* try enable the AIS facility */
+test_attr.group = KVM_DEV_FLIC_AISM_ALL;
+if (!ioctl(flic_state->fd, KVM_HAS_DEVICE_ATTR, test_attr)) {
+kvm_vm_enable_cap(kvm_state, KVM_CAP_S390_AIS, 0);

Is there an indention error?
Except this, the code LGTM.

+}
+
  return;





Re: [Qemu-devel] [PATCH 1/1] s390x: create a compat s390 phb for <=2.10

2017-09-27 Thread Yi Min Zhao



在 2017/9/27 下午5:47, Cornelia Huck 写道:

On Tue, 26 Sep 2017 20:40:25 +0200
David Hildenbrand  wrote:


On 26.09.2017 18:20, Cornelia Huck wrote:

d32bd032d8 ("s390x/ccw: create s390 phb conditionally") made
creating the s390 phb dependant on the zpci facility. This broke
migration from pre-cpu model machines which was fixed with
8ad9087c4a ("s390x/ccw: create s390 phb for compat reasons as well").
However, that is not enough: Migration from 2.10 with -cpu z13
breaks as well.

Let's create a phb for all pre-2.11 compat machines to fix this.
We leave the zpci facility off to avoid a guest-visible change
with cpu models on.

Reported-by: Christian Borntraeger 
Fixes: d32bd032d8 ("s390x/ccw: create s390 phb conditionally")
Signed-off-by: Cornelia Huck 
---
  hw/s390x/s390-virtio-ccw.c | 8 +++-
  include/hw/s390x/s390-virtio-ccw.h | 1 +
  2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
index 1bcb7000ab..981f1c4336 100644
--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
@@ -247,6 +247,8 @@ static void s390_create_virtio_net(BusState *bus, const 
char *name)
  }
  }
  
+static S390CcwMachineClass *get_machine_class(void);

+
  static void ccw_init(MachineState *machine)
  {
  int ret;
@@ -266,7 +268,7 @@ static void ccw_init(MachineState *machine)
machine->initrd_filename, "s390-ccw.img",
"s390-netboot.img", true);
  
-if (s390_has_feat(S390_FEAT_ZPCI)) {

+if (s390_has_feat(S390_FEAT_ZPCI) || get_machine_class()->phb_compat) {
  DeviceState *dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE);
  object_property_add_child(qdev_get_machine(),
TYPE_S390_PCI_HOST_BRIDGE,
@@ -407,6 +409,7 @@ static void ccw_machine_class_init(ObjectClass *oc, void 
*data)
  s390mc->cpu_model_allowed = true;
  s390mc->css_migration_enabled = true;
  s390mc->gs_allowed = true;
+s390mc->phb_compat = false;
  mc->init = ccw_init;
  mc->reset = s390_machine_reset;
  mc->hot_add_cpu = s390_hot_add_cpu;
@@ -716,6 +719,9 @@ static void ccw_machine_2_10_instance_options(MachineState 
*machine)
  
  static void ccw_machine_2_10_class_options(MachineClass *mc)

  {
+S390CcwMachineClass *s390mc = S390_MACHINE_CLASS(mc);
+
+s390mc->phb_compat = pci_available;
  ccw_machine_2_11_class_options(mc);
  SET_MACHINE_COMPAT(mc, CCW_COMPAT_2_10);
  }
diff --git a/include/hw/s390x/s390-virtio-ccw.h 
b/include/hw/s390x/s390-virtio-ccw.h
index a9a90c2022..fb717afe92 100644
--- a/include/hw/s390x/s390-virtio-ccw.h
+++ b/include/hw/s390x/s390-virtio-ccw.h
@@ -41,6 +41,7 @@ typedef struct S390CcwMachineClass {
  bool cpu_model_allowed;
  bool css_migration_enabled;
  bool gs_allowed;
+bool phb_compat;
  } S390CcwMachineClass;
  
  /* runtime-instrumentation allowed by the machine */
   

I'd really really really (did I mention really?) favor something like a
dummy device, because we could easily handle the !CONFIG_PCI case then.

All these compat options and conditions will kill us someday... we're
already patching around that whole stuff way too much.

If we ever unconditionally created a device, we should keep doing so.

Yes, that whole thing is horrible, especially interaction with compat
machines.

Do you have an idea on how to create such a dummy device (without
having to effectively copy a lot of configured-out code)?



How about in s390_pcihost_hot_plug() we check s390_has_feat(zpci)?
If no zpci feature, we avoid plugging any pci device.
Then we could always create phb.
I think pcibus's vmstate is only data to migrate.




[Qemu-devel] [PATCH v2 0/3] three zpci patches

2017-08-31 Thread Yi Min Zhao
This patch set contains three small zpci patches to fixup different issues.
1) remove zpci idx from msix message, instead we could use PCIDevice's id to
   find zpci device in kvm_arch_fixup_msi_route()
2) fixup ind_offset calculation for adapter interrupt routing entry
3) introduce our own iommu_replay callback

Yi Min Zhao (3):
  s390x/pci: remove idx from msix msg data
  s390x/pci: fixup ind_offset of msix routing entry
  s390x/pci: add iommu replay callback

 hw/s390x/s390-pci-bus.c  | 27 ---
 hw/s390x/s390-pci-bus.h  |  2 ++
 hw/s390x/s390-pci-inst.c | 24 
 hw/s390x/s390-pci-stub.c |  6 ++
 target/s390x/kvm.c   | 11 ++-
 5 files changed, 30 insertions(+), 40 deletions(-)

-- 
Change log:
from v1:
1) Add s390_pci_find_dev_by_target() in s390_pci_stub.c
2) Remove the accepted patch from the series (Thanks for Conny's help).
3) Fixup typo error.
4) Add more comment for s390_pci_iommu_replay().




[Qemu-devel] [PATCH v2 1/3] s390x/pci: remove idx from msix msg data

2017-08-31 Thread Yi Min Zhao
PCIDevice pointer has been a parameter of kvm_arch_fixup_msi_route().
So we don't need to store zpci idx in msix message data to find out the
specific zpci device. Instead, we could use pci device id to find its
corresponding zpci device.

Signed-off-by: Yi Min Zhao 
---
 hw/s390x/s390-pci-bus.c  | 16 +---
 hw/s390x/s390-pci-bus.h  |  2 ++
 hw/s390x/s390-pci-inst.c | 24 
 hw/s390x/s390-pci-stub.c |  6 ++
 target/s390x/kvm.c   |  7 +--
 5 files changed, 18 insertions(+), 37 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 0a31a4ae88..bd8a3e1e1c 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -199,8 +199,8 @@ static S390PCIBusDevice 
*s390_pci_find_dev_by_uid(S390pciState *s, uint16_t uid)
 return NULL;
 }
 
-static S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
- const char *target)
+S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
+  const char *target)
 {
 S390PCIBusDevice *pbdev;
 
@@ -465,19 +465,13 @@ static void s390_msi_ctrl_write(void *opaque, hwaddr 
addr, uint64_t data,
 unsigned int size)
 {
 S390PCIBusDevice *pbdev = opaque;
-uint32_t idx = data >> ZPCI_MSI_VEC_BITS;
 uint32_t vec = data & ZPCI_MSI_VEC_MASK;
 uint64_t ind_bit;
 uint32_t sum_bit;
-uint32_t e = 0;
 
-DPRINTF("write_msix data 0x%" PRIx64 " idx %d vec 0x%x\n", data, idx, vec);
-
-if (!pbdev) {
-e |= (vec << ERR_EVENT_MVN_OFFSET);
-s390_pci_generate_error_event(ERR_EVENT_NOMSI, idx, 0, addr, e);
-return;
-}
+assert(pbdev);
+DPRINTF("write_msix data 0x%" PRIx64 " idx %d vec 0x%x\n", data,
+pbdev->idx, vec);
 
 if (pbdev->state != ZPCI_FS_ENABLED) {
 return;
diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
index bd636abc28..560bd82a0f 100644
--- a/hw/s390x/s390-pci-bus.h
+++ b/hw/s390x/s390-pci-bus.h
@@ -322,6 +322,8 @@ void s390_pci_generate_error_event(uint16_t pec, uint32_t 
fh, uint32_t fid,
 S390PCIBusDevice *s390_pci_find_dev_by_idx(S390pciState *s, uint32_t idx);
 S390PCIBusDevice *s390_pci_find_dev_by_fh(S390pciState *s, uint32_t fh);
 S390PCIBusDevice *s390_pci_find_dev_by_fid(S390pciState *s, uint32_t fid);
+S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
+  const char *target);
 S390PCIBusDevice *s390_pci_find_next_avail_dev(S390pciState *s,
S390PCIBusDevice *pbdev);
 
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index eba9ffb5f2..8e088f3dc9 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -413,29 +413,6 @@ int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2)
 return 0;
 }
 
-static void update_msix_table_msg_data(S390PCIBusDevice *pbdev, uint64_t 
offset,
-   uint64_t *data, uint8_t len)
-{
-uint32_t val;
-uint8_t *msg_data;
-
-if (offset % PCI_MSIX_ENTRY_SIZE != 8) {
-return;
-}
-
-if (len != 4) {
-DPRINTF("access msix table msg data but len is %d\n", len);
-return;
-}
-
-msg_data = (uint8_t *)data - offset % PCI_MSIX_ENTRY_SIZE +
-   PCI_MSIX_ENTRY_VECTOR_CTRL;
-val = pci_get_long(msg_data) |
-((pbdev->fh & FH_MASK_INDEX) << ZPCI_MSI_VEC_BITS);
-pci_set_long(msg_data, val);
-DPRINTF("update msix msg_data to 0x%" PRIx64 "\n", *data);
-}
-
 static int trap_msix(S390PCIBusDevice *pbdev, uint64_t offset, uint8_t pcias)
 {
 if (pbdev->msix.available && pbdev->msix.table_bar == pcias &&
@@ -508,7 +485,6 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2)
 if (trap_msix(pbdev, offset, pcias)) {
 offset = offset - pbdev->msix.table_offset;
 mr = &pbdev->pdev->msix_table_mmio;
-update_msix_table_msg_data(pbdev, offset, &data, len);
 } else {
 mr = pbdev->pdev->io_regions[pcias].memory;
 }
diff --git a/hw/s390x/s390-pci-stub.c b/hw/s390x/s390-pci-stub.c
index 7a642d376c..e501e1b9ea 100644
--- a/hw/s390x/s390-pci-stub.c
+++ b/hw/s390x/s390-pci-stub.c
@@ -74,3 +74,9 @@ S390PCIBusDevice *s390_pci_find_dev_by_idx(S390pciState *s, 
uint32_t idx)
 {
 return NULL;
 }
+
+S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
+  const char *target)
+{
+return NULL;
+}
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 1338c29528..3d490c5e4b 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2533,10 +2533,13 @@ int kvm_arch_fixup_msi_route(struct 
kvm_irq_rou

[Qemu-devel] [PATCH v2 2/3] s390x/pci: fixup ind_offset of msix routing entry

2017-08-31 Thread Yi Min Zhao
The guest uses the mpcifc instruction to register the aibvo of a zpci
device, which is the starting offset of indicators in the indicator
area and thus remains constant. Each msix vector is an offset from the
aibvo. When we map a msix route to an adapter route, we should not
modify the starting offset, but instead add the vector to the starting
offset to get the absolute offset in the specific route.

Signed-off-by: Yi Min Zhao 
---
 target/s390x/kvm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 3d490c5e4b..21ce06966c 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2545,14 +2545,12 @@ int kvm_arch_fixup_msi_route(struct 
kvm_irq_routing_entry *route,
 return -ENODEV;
 }
 
-pbdev->routes.adapter.ind_offset = vec;
-
 route->type = KVM_IRQ_ROUTING_S390_ADAPTER;
 route->flags = 0;
 route->u.adapter.summary_addr = pbdev->routes.adapter.summary_addr;
 route->u.adapter.ind_addr = pbdev->routes.adapter.ind_addr;
 route->u.adapter.summary_offset = pbdev->routes.adapter.summary_offset;
-route->u.adapter.ind_offset = pbdev->routes.adapter.ind_offset;
+route->u.adapter.ind_offset = pbdev->routes.adapter.ind_offset + vec;
 route->u.adapter.adapter_id = pbdev->routes.adapter.adapter_id;
 return 0;
 }
-- 
2.11.0 (Apple Git-81)




[Qemu-devel] [PATCH v2 3/3] s390x/pci: add iommu replay callback

2017-08-31 Thread Yi Min Zhao
Let's introduce iommu replay callback for s390 pci iommu memory region.
Currently we don't need any dma mapping replay. So let it return
directly. This implementation will avoid meaningless loops calling
translation callback.

Reviewed-by: Pierre Morel 
Reviewed-by: Halil Pasic 
Signed-off-by: Yi Min Zhao 
---
 hw/s390x/s390-pci-bus.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index bd8a3e1e1c..69f45e3715 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -397,6 +397,16 @@ static IOMMUTLBEntry 
s390_translate_iommu(IOMMUMemoryRegion *mr, hwaddr addr,
 return ret;
 }
 
+static void s390_pci_iommu_replay(IOMMUMemoryRegion *iommu,
+  IOMMUNotifier *notifier)
+{
+/* It's impossible to plug a pci device on s390x that already has iommu
+ * mappings which need to be replayed, that is due to the "one iommu per
+ * zpci device" construct. So we don't need iommu replay currently.
+ */
+return;
+}
+
 static S390PCIIOMMU *s390_pci_get_iommu(S390pciState *s, PCIBus *bus,
 int devfn)
 {
@@ -1045,6 +1055,7 @@ static void 
s390_iommu_memory_region_class_init(ObjectClass *klass, void *data)
 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
 
 imrc->translate = s390_translate_iommu;
+imrc->replay = s390_pci_iommu_replay;
 }
 
 static const TypeInfo s390_iommu_memory_region_info = {
-- 
2.11.0 (Apple Git-81)




Re: [Qemu-devel] [PATCHv3 1/2] pci: move check for existing devfn into new pci_bus_devfn_available() helper

2017-09-04 Thread Yi Min Zhao



在 2017/7/17 上午4:27, Mark Cave-Ayland 写道:

Also touch up the logic in do_pci_register_device() accordingly.

Signed-off-by: Mark Cave-Ayland 
---
  hw/pci/pci.c |   10 --
  1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 0c6f74a..efc9c86 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -951,6 +951,11 @@ uint16_t pci_requester_id(PCIDevice *dev)
  return pci_req_id_cache_extract(&dev->requester_id_cache);
  }

+static bool pci_bus_devfn_available(PCIBus *bus, int devfn)
+{
+return !(bus->devices[devfn]);

Hi,

I want to ask a question. According to the next patch, you check 
bus->devices[devfn]

and reserved bit separately. Why not move the reserved bit check here?
I think bus->devices[devfn] != NULL or revsered bit set means slot is 
unavailable.


Regards,

Yi Min

+}
+
  /* -1 for devfn means auto assign */
  static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
   const char *name, int devfn,
@@ -974,14 +979,15 @@ static PCIDevice *do_pci_register_device(PCIDevice 
*pci_dev, PCIBus *bus,
  if (devfn < 0) {
  for(devfn = bus->devfn_min ; devfn < ARRAY_SIZE(bus->devices);
  devfn += PCI_FUNC_MAX) {
-if (!bus->devices[devfn])
+if (pci_bus_devfn_available(bus, devfn)) {
  goto found;
+}
  }
  error_setg(errp, "PCI: no slot/function available for %s, all in use",
 name);
  return NULL;
  found: ;
-} else if (bus->devices[devfn]) {
+} else if (!pci_bus_devfn_available(bus, devfn)) {
  error_setg(errp, "PCI: slot %d function %d not available for %s,"
 " in use by %s",
 PCI_SLOT(devfn), PCI_FUNC(devfn), name,





Re: [Qemu-devel] [PATCH v2 1/3] s390x/pci: remove idx from msix msg data

2017-09-05 Thread Yi Min Zhao



在 2017/9/5 下午4:29, Cornelia Huck 写道:

On Fri,  1 Sep 2017 06:22:56 +0200
Yi Min Zhao  wrote:


PCIDevice pointer has been a parameter of kvm_arch_fixup_msi_route().
So we don't need to store zpci idx in msix message data to find out the
specific zpci device. Instead, we could use pci device id to find its
corresponding zpci device.

Signed-off-by: Yi Min Zhao 
---
  hw/s390x/s390-pci-bus.c  | 16 +---
  hw/s390x/s390-pci-bus.h  |  2 ++
  hw/s390x/s390-pci-inst.c | 24 
  hw/s390x/s390-pci-stub.c |  6 ++
  target/s390x/kvm.c   |  7 +--
  5 files changed, 18 insertions(+), 37 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 0a31a4ae88..bd8a3e1e1c 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -199,8 +199,8 @@ static S390PCIBusDevice 
*s390_pci_find_dev_by_uid(S390pciState *s, uint16_t uid)
  return NULL;
  }
  
-static S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,

- const char *target)
+S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
+  const char *target)
  {
  S390PCIBusDevice *pbdev;
  
@@ -465,19 +465,13 @@ static void s390_msi_ctrl_write(void *opaque, hwaddr addr, uint64_t data,

  unsigned int size)
  {
  S390PCIBusDevice *pbdev = opaque;
-uint32_t idx = data >> ZPCI_MSI_VEC_BITS;
  uint32_t vec = data & ZPCI_MSI_VEC_MASK;
  uint64_t ind_bit;
  uint32_t sum_bit;
-uint32_t e = 0;
  
-DPRINTF("write_msix data 0x%" PRIx64 " idx %d vec 0x%x\n", data, idx, vec);

-
-if (!pbdev) {
-e |= (vec << ERR_EVENT_MVN_OFFSET);
-s390_pci_generate_error_event(ERR_EVENT_NOMSI, idx, 0, addr, e);
-return;
-}
+assert(pbdev);

I'm wondering whether you could/should generate an error event here.
The one above probably won't work (as it seems to take idx as a
parameter), but is this really 'this must not happen, we messed up in
our code'? (Probably yes, but I want to be sure.)

I think this must not happen. One a pci device is plugged into zPCI bus.
We would assign a new memory region with zpci device as opaque
for its msix. So if s390_msi_ctrl_write() is called, there must be a write
operation to a pci device's msix ctrl memory region which must has zpci
device as a opaque. The construct is one-msi-mr-per-pci-device.



+DPRINTF("write_msix data 0x%" PRIx64 " idx %d vec 0x%x\n", data,
+pbdev->idx, vec);
  
  if (pbdev->state != ZPCI_FS_ENABLED) {

  return;
diff --git a/hw/s390x/s390-pci-stub.c b/hw/s390x/s390-pci-stub.c
index 7a642d376c..e501e1b9ea 100644
--- a/hw/s390x/s390-pci-stub.c
+++ b/hw/s390x/s390-pci-stub.c
@@ -74,3 +74,9 @@ S390PCIBusDevice *s390_pci_find_dev_by_idx(S390pciState *s, 
uint32_t idx)
  {
  return NULL;
  }

Please remove s390_pci_find_dev_by_idx() from the stubs file, as it is
not used outside of the conditionally-built pci code anymore.
I'm confused. s390_pci_find_dev_by_idx() can be called in 
kvm_arch_fixup_msi_route().

And kvm_arch_fixup_msi_route() can be called by kvm_irqchip_add_msi_route().
As the code, I think s390_pci_find_dev_by_idx() might be called. Could 
you please

explain more?



+
+S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
+  const char *target)
+{
+return NULL;
+}
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 1338c29528..3d490c5e4b 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2533,10 +2533,13 @@ int kvm_arch_fixup_msi_route(struct 
kvm_irq_routing_entry *route,
   uint64_t address, uint32_t data, PCIDevice *dev)
  {
  S390PCIBusDevice *pbdev;
-uint32_t idx = data >> ZPCI_MSI_VEC_BITS;
  uint32_t vec = data & ZPCI_MSI_VEC_MASK;
  
-pbdev = s390_pci_find_dev_by_idx(s390_get_phb(), idx);

+if (!dev) {
+return -ENODEV;

Can this actually happen?

I think this cannot happen. But I'm afraid that I miss something.
So I added this to avoid NULL pointer. But from the code and
my test, there has not been NULL pointer happened.



+}
+
+pbdev = s390_pci_find_dev_by_target(s390_get_phb(), DEVICE(dev)->id);
  if (!pbdev) {
  DPRINTF("add_msi_route no dev\n");
  return -ENODEV;







Re: [Qemu-devel] [PATCH v2 1/3] s390x/pci: remove idx from msix msg data

2017-09-05 Thread Yi Min Zhao



在 2017/9/5 下午4:50, Cornelia Huck 写道:

On Tue, 5 Sep 2017 16:44:37 +0800
Yi Min Zhao  wrote:


在 2017/9/5 下午4:29, Cornelia Huck 写道:

On Fri,  1 Sep 2017 06:22:56 +0200
Yi Min Zhao  wrote:
  

PCIDevice pointer has been a parameter of kvm_arch_fixup_msi_route().
So we don't need to store zpci idx in msix message data to find out the
specific zpci device. Instead, we could use pci device id to find its
corresponding zpci device.

Signed-off-by: Yi Min Zhao 
---
   hw/s390x/s390-pci-bus.c  | 16 +---
   hw/s390x/s390-pci-bus.h  |  2 ++
   hw/s390x/s390-pci-inst.c | 24 
   hw/s390x/s390-pci-stub.c |  6 ++
   target/s390x/kvm.c   |  7 +--
   5 files changed, 18 insertions(+), 37 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 0a31a4ae88..bd8a3e1e1c 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -199,8 +199,8 @@ static S390PCIBusDevice 
*s390_pci_find_dev_by_uid(S390pciState *s, uint16_t uid)
   return NULL;
   }
   
-static S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,

- const char *target)
+S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
+  const char *target)
   {
   S390PCIBusDevice *pbdev;
   
@@ -465,19 +465,13 @@ static void s390_msi_ctrl_write(void *opaque, hwaddr addr, uint64_t data,

   unsigned int size)
   {
   S390PCIBusDevice *pbdev = opaque;
-uint32_t idx = data >> ZPCI_MSI_VEC_BITS;
   uint32_t vec = data & ZPCI_MSI_VEC_MASK;
   uint64_t ind_bit;
   uint32_t sum_bit;
-uint32_t e = 0;
   
-DPRINTF("write_msix data 0x%" PRIx64 " idx %d vec 0x%x\n", data, idx, vec);

-
-if (!pbdev) {
-e |= (vec << ERR_EVENT_MVN_OFFSET);
-s390_pci_generate_error_event(ERR_EVENT_NOMSI, idx, 0, addr, e);
-return;
-}
+assert(pbdev);

I'm wondering whether you could/should generate an error event here.
The one above probably won't work (as it seems to take idx as a
parameter), but is this really 'this must not happen, we messed up in
our code'? (Probably yes, but I want to be sure.)

I think this must not happen. One a pci device is plugged into zPCI bus.
We would assign a new memory region with zpci device as opaque
for its msix. So if s390_msi_ctrl_write() is called, there must be a write
operation to a pci device's msix ctrl memory region which must has zpci
device as a opaque. The construct is one-msi-mr-per-pci-device.

This makes sense.

  

+DPRINTF("write_msix data 0x%" PRIx64 " idx %d vec 0x%x\n", data,
+pbdev->idx, vec);
   
   if (pbdev->state != ZPCI_FS_ENABLED) {

   return;
diff --git a/hw/s390x/s390-pci-stub.c b/hw/s390x/s390-pci-stub.c
index 7a642d376c..e501e1b9ea 100644
--- a/hw/s390x/s390-pci-stub.c
+++ b/hw/s390x/s390-pci-stub.c
@@ -74,3 +74,9 @@ S390PCIBusDevice *s390_pci_find_dev_by_idx(S390pciState *s, 
uint32_t idx)
   {
   return NULL;
   }

Please remove s390_pci_find_dev_by_idx() from the stubs file, as it is
not used outside of the conditionally-built pci code anymore.

I'm confused. s390_pci_find_dev_by_idx() can be called in
kvm_arch_fixup_msi_route().
And kvm_arch_fixup_msi_route() can be called by kvm_irqchip_add_msi_route().
As the code, I think s390_pci_find_dev_by_idx() might be called. Could
you please
explain more?

But this patch replaces this with s390_pci_find_dev_by_target(), no?
Oh! Sorry, I mixed by_target() and by_idx(). Yes, by_idx() should be 
removed.


  

+
+S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
+  const char *target)
+{
+return NULL;
+}
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 1338c29528..3d490c5e4b 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2533,10 +2533,13 @@ int kvm_arch_fixup_msi_route(struct 
kvm_irq_routing_entry *route,
uint64_t address, uint32_t data, PCIDevice *dev)
   {
   S390PCIBusDevice *pbdev;
-uint32_t idx = data >> ZPCI_MSI_VEC_BITS;
   uint32_t vec = data & ZPCI_MSI_VEC_MASK;
   
-pbdev = s390_pci_find_dev_by_idx(s390_get_phb(), idx);

+if (!dev) {
+return -ENODEV;

Can this actually happen?

I think this cannot happen. But I'm afraid that I miss something.
So I added this to avoid NULL pointer. But from the code and
my test, there has not been NULL pointer happened.

I'm wondering if that is in the same category as the instance I
commented on above. Do you want to log something?

For the case above, I ensure that zpci device must exist. But here, I'm 
not sure.

Because it's called from outside. I'm not sure if the caller might call
kvm_irqchip_add_msi_route() with NULL as pci device argument

Re: [Qemu-devel] [PATCH v2 1/3] s390x/pci: remove idx from msix msg data

2017-09-05 Thread Yi Min Zhao



在 2017/9/5 下午5:15, Cornelia Huck 写道:

On Tue, 5 Sep 2017 17:08:14 +0800
Yi Min Zhao  wrote:


在 2017/9/5 下午4:50, Cornelia Huck 写道:

On Tue, 5 Sep 2017 16:44:37 +0800
Yi Min Zhao  wrote:
  

在 2017/9/5 下午4:29, Cornelia Huck 写道:

On Fri,  1 Sep 2017 06:22:56 +0200
Yi Min Zhao  wrote:
 

PCIDevice pointer has been a parameter of kvm_arch_fixup_msi_route().
So we don't need to store zpci idx in msix message data to find out the
specific zpci device. Instead, we could use pci device id to find its
corresponding zpci device.

Signed-off-by: Yi Min Zhao 
---
hw/s390x/s390-pci-bus.c  | 16 +---
hw/s390x/s390-pci-bus.h  |  2 ++
hw/s390x/s390-pci-inst.c | 24 
hw/s390x/s390-pci-stub.c |  6 ++
target/s390x/kvm.c   |  7 +--
5 files changed, 18 insertions(+), 37 deletions(-)

diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 1338c29528..3d490c5e4b 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2533,10 +2533,13 @@ int kvm_arch_fixup_msi_route(struct 
kvm_irq_routing_entry *route,
 uint64_t address, uint32_t data, PCIDevice 
*dev)
{
S390PCIBusDevice *pbdev;
-uint32_t idx = data >> ZPCI_MSI_VEC_BITS;
uint32_t vec = data & ZPCI_MSI_VEC_MASK;

-pbdev = s390_pci_find_dev_by_idx(s390_get_phb(), idx);

+if (!dev) {
+return -ENODEV;

Can this actually happen?

I think this cannot happen. But I'm afraid that I miss something.
So I added this to avoid NULL pointer. But from the code and
my test, there has not been NULL pointer happened.

I'm wondering if that is in the same category as the instance I
commented on above. Do you want to log something?
  

For the case above, I ensure that zpci device must exist. But here, I'm
not sure.
Because it's called from outside. I'm not sure if the caller might call
kvm_irqchip_add_msi_route() with NULL as pci device argument.

Although msix ctrl mr is accessed from outside. But its initialization
is controled by our code and the pointer to zpci device is saved as
mr's opaque.

OK. Maybe add a DPRINTF as for the condition below?

OK. How about DPRINTF("add_msi_route no pci device\n")?
And change the DPRINTF for the below condition to
DPRINTF("add_msi_route no zpci device\n").


 

+}
+
+pbdev = s390_pci_find_dev_by_target(s390_get_phb(), DEVICE(dev)->id);
if (!pbdev) {
DPRINTF("add_msi_route no dev\n");
return -ENODEV;
 
  







Re: [Qemu-devel] [PATCH v2 3/3] s390x/pci: add iommu replay callback

2017-09-05 Thread Yi Min Zhao



在 2017/9/5 下午5:28, Cornelia Huck 写道:

On Fri,  1 Sep 2017 06:22:58 +0200
Yi Min Zhao  wrote:


Let's introduce iommu replay callback for s390 pci iommu memory region.
Currently we don't need any dma mapping replay. So let it return
directly. This implementation will avoid meaningless loops calling
translation callback.

Reviewed-by: Pierre Morel 
Reviewed-by: Halil Pasic 
Signed-off-by: Yi Min Zhao 
---
  hw/s390x/s390-pci-bus.c | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index bd8a3e1e1c..69f45e3715 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -397,6 +397,16 @@ static IOMMUTLBEntry 
s390_translate_iommu(IOMMUMemoryRegion *mr, hwaddr addr,
  return ret;
  }
  
+static void s390_pci_iommu_replay(IOMMUMemoryRegion *iommu,

+  IOMMUNotifier *notifier)
+{
+/* It's impossible to plug a pci device on s390x that already has iommu
+ * mappings which need to be replayed, that is due to the "one iommu per
+ * zpci device" construct. So we don't need iommu replay currently.

I must say that 'currently' still throws me off. Does this refer to
vfio? If yes, reword to something like 'Should we support migration of
vfio-pci devices in the future, we need to revisit this.'?

Yeah, it refers to vfio especially. I update this in next version.



+ */
+return;
+}
+
  static S390PCIIOMMU *s390_pci_get_iommu(S390pciState *s, PCIBus *bus,
  int devfn)
  {
@@ -1045,6 +1055,7 @@ static void 
s390_iommu_memory_region_class_init(ObjectClass *klass, void *data)
  IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
  
  imrc->translate = s390_translate_iommu;

+imrc->replay = s390_pci_iommu_replay;
  }
  
  static const TypeInfo s390_iommu_memory_region_info = {







[Qemu-devel] [PATCH v3 0/3] three zpci patches

2017-09-05 Thread Yi Min Zhao
This patch set contains three small zpci patches to fixup different issues.
1) remove zpci idx from msix message, instead we could use PCIDevice's id to
   find zpci device in kvm_arch_fixup_msi_route()
2) fixup ind_offset calculation for adapter interrupt routing entry
3) introduce our own iommu_replay callback

Yi Min Zhao (3):
  s390x/pci: remove idx from msix msg data
  s390x/pci: fixup ind_offset of msix routing entry
  s390x/pci: add iommu replay callback

 hw/s390x/s390-pci-bus.c  | 28 +---
 hw/s390x/s390-pci-bus.h  |  2 ++
 hw/s390x/s390-pci-inst.c | 24 
 hw/s390x/s390-pci-stub.c |  3 ++-
 target/s390x/kvm.c   | 14 --
 5 files changed, 29 insertions(+), 42 deletions(-)

-- 
Change log:
from v2:
1) Remove s390_pci_find_dev_by_idx() from s390_pci_stub.c
2) Add output message in kvm_arch_fixup_msi_route() for dev argument check.
3) Update the comment for s390_pci_iommu_replay().

from v1:
1) Add s390_pci_find_dev_by_target() in s390_pci_stub.c
2) Remove the accepted patch from the series (Thanks for Conny's help).
3) Fixup typo error.
4) Add more comment for s390_pci_iommu_replay().




[Qemu-devel] [PATCH v3 3/3] s390x/pci: add iommu replay callback

2017-09-05 Thread Yi Min Zhao
Let's introduce iommu replay callback for s390 pci iommu memory region.
Currently we don't need any dma mapping replay. So let it return
directly. This implementation will avoid meaningless loops calling
translation callback.

Reviewed-by: Pierre Morel 
Reviewed-by: Halil Pasic 
Signed-off-by: Yi Min Zhao 
---
 hw/s390x/s390-pci-bus.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index bd8a3e1e1c..3b9965fde0 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -397,6 +397,17 @@ static IOMMUTLBEntry 
s390_translate_iommu(IOMMUMemoryRegion *mr, hwaddr addr,
 return ret;
 }
 
+static void s390_pci_iommu_replay(IOMMUMemoryRegion *iommu,
+  IOMMUNotifier *notifier)
+{
+/* It's impossible to plug a pci device on s390x that already has iommu
+ * mappings which need to be replayed, that is due to the "one iommu per
+ * zpci device" construct. But when we support migration of vfio-pci
+ * devices in future, we need to revisit this.
+ */
+return;
+}
+
 static S390PCIIOMMU *s390_pci_get_iommu(S390pciState *s, PCIBus *bus,
 int devfn)
 {
@@ -1045,6 +1056,7 @@ static void 
s390_iommu_memory_region_class_init(ObjectClass *klass, void *data)
 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
 
 imrc->translate = s390_translate_iommu;
+imrc->replay = s390_pci_iommu_replay;
 }
 
 static const TypeInfo s390_iommu_memory_region_info = {
-- 
2.11.0 (Apple Git-81)




[Qemu-devel] [PATCH v3 1/3] s390x/pci: remove idx from msix msg data

2017-09-05 Thread Yi Min Zhao
PCIDevice pointer has been a parameter of kvm_arch_fixup_msi_route().
So we don't need to store zpci idx in msix message data to find out the
specific zpci device. Instead, we could use pci device id to find its
corresponding zpci device.

Signed-off-by: Yi Min Zhao 
---
 hw/s390x/s390-pci-bus.c  | 16 +---
 hw/s390x/s390-pci-bus.h  |  2 ++
 hw/s390x/s390-pci-inst.c | 24 
 hw/s390x/s390-pci-stub.c |  3 ++-
 target/s390x/kvm.c   | 10 +++---
 5 files changed, 16 insertions(+), 39 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 0a31a4ae88..bd8a3e1e1c 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -199,8 +199,8 @@ static S390PCIBusDevice 
*s390_pci_find_dev_by_uid(S390pciState *s, uint16_t uid)
 return NULL;
 }
 
-static S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
- const char *target)
+S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
+  const char *target)
 {
 S390PCIBusDevice *pbdev;
 
@@ -465,19 +465,13 @@ static void s390_msi_ctrl_write(void *opaque, hwaddr 
addr, uint64_t data,
 unsigned int size)
 {
 S390PCIBusDevice *pbdev = opaque;
-uint32_t idx = data >> ZPCI_MSI_VEC_BITS;
 uint32_t vec = data & ZPCI_MSI_VEC_MASK;
 uint64_t ind_bit;
 uint32_t sum_bit;
-uint32_t e = 0;
 
-DPRINTF("write_msix data 0x%" PRIx64 " idx %d vec 0x%x\n", data, idx, vec);
-
-if (!pbdev) {
-e |= (vec << ERR_EVENT_MVN_OFFSET);
-s390_pci_generate_error_event(ERR_EVENT_NOMSI, idx, 0, addr, e);
-return;
-}
+assert(pbdev);
+DPRINTF("write_msix data 0x%" PRIx64 " idx %d vec 0x%x\n", data,
+pbdev->idx, vec);
 
 if (pbdev->state != ZPCI_FS_ENABLED) {
 return;
diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
index bd636abc28..560bd82a0f 100644
--- a/hw/s390x/s390-pci-bus.h
+++ b/hw/s390x/s390-pci-bus.h
@@ -322,6 +322,8 @@ void s390_pci_generate_error_event(uint16_t pec, uint32_t 
fh, uint32_t fid,
 S390PCIBusDevice *s390_pci_find_dev_by_idx(S390pciState *s, uint32_t idx);
 S390PCIBusDevice *s390_pci_find_dev_by_fh(S390pciState *s, uint32_t fh);
 S390PCIBusDevice *s390_pci_find_dev_by_fid(S390pciState *s, uint32_t fid);
+S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
+  const char *target);
 S390PCIBusDevice *s390_pci_find_next_avail_dev(S390pciState *s,
S390PCIBusDevice *pbdev);
 
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index eba9ffb5f2..8e088f3dc9 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -413,29 +413,6 @@ int pcilg_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2)
 return 0;
 }
 
-static void update_msix_table_msg_data(S390PCIBusDevice *pbdev, uint64_t 
offset,
-   uint64_t *data, uint8_t len)
-{
-uint32_t val;
-uint8_t *msg_data;
-
-if (offset % PCI_MSIX_ENTRY_SIZE != 8) {
-return;
-}
-
-if (len != 4) {
-DPRINTF("access msix table msg data but len is %d\n", len);
-return;
-}
-
-msg_data = (uint8_t *)data - offset % PCI_MSIX_ENTRY_SIZE +
-   PCI_MSIX_ENTRY_VECTOR_CTRL;
-val = pci_get_long(msg_data) |
-((pbdev->fh & FH_MASK_INDEX) << ZPCI_MSI_VEC_BITS);
-pci_set_long(msg_data, val);
-DPRINTF("update msix msg_data to 0x%" PRIx64 "\n", *data);
-}
-
 static int trap_msix(S390PCIBusDevice *pbdev, uint64_t offset, uint8_t pcias)
 {
 if (pbdev->msix.available && pbdev->msix.table_bar == pcias &&
@@ -508,7 +485,6 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2)
 if (trap_msix(pbdev, offset, pcias)) {
 offset = offset - pbdev->msix.table_offset;
 mr = &pbdev->pdev->msix_table_mmio;
-update_msix_table_msg_data(pbdev, offset, &data, len);
 } else {
 mr = pbdev->pdev->io_regions[pcias].memory;
 }
diff --git a/hw/s390x/s390-pci-stub.c b/hw/s390x/s390-pci-stub.c
index 7a642d376c..ad4c5a7719 100644
--- a/hw/s390x/s390-pci-stub.c
+++ b/hw/s390x/s390-pci-stub.c
@@ -70,7 +70,8 @@ S390pciState *s390_get_phb(void)
 return NULL;
 }
 
-S390PCIBusDevice *s390_pci_find_dev_by_idx(S390pciState *s, uint32_t idx)
+S390PCIBusDevice *s390_pci_find_dev_by_target(S390pciState *s,
+  const char *target)
 {
 return NULL;
 }
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 1338c29528..ebbeb4d6b3 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2533,12 +2533,16 @@ int kvm_arch_fixup_msi_route(struct 

[Qemu-devel] [PATCH v3 2/3] s390x/pci: fixup ind_offset of msix routing entry

2017-09-05 Thread Yi Min Zhao
The guest uses the mpcifc instruction to register the aibvo of a zpci
device, which is the starting offset of indicators in the indicator
area and thus remains constant. Each msix vector is an offset from the
aibvo. When we map a msix route to an adapter route, we should not
modify the starting offset, but instead add the vector to the starting
offset to get the absolute offset in the specific route.

Signed-off-by: Yi Min Zhao 
---
 target/s390x/kvm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index ebbeb4d6b3..931b85fe3d 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -2546,14 +2546,12 @@ int kvm_arch_fixup_msi_route(struct 
kvm_irq_routing_entry *route,
 return -ENODEV;
 }
 
-pbdev->routes.adapter.ind_offset = vec;
-
 route->type = KVM_IRQ_ROUTING_S390_ADAPTER;
 route->flags = 0;
 route->u.adapter.summary_addr = pbdev->routes.adapter.summary_addr;
 route->u.adapter.ind_addr = pbdev->routes.adapter.ind_addr;
 route->u.adapter.summary_offset = pbdev->routes.adapter.summary_offset;
-route->u.adapter.ind_offset = pbdev->routes.adapter.ind_offset;
+route->u.adapter.ind_offset = pbdev->routes.adapter.ind_offset + vec;
 route->u.adapter.adapter_id = pbdev->routes.adapter.adapter_id;
 return 0;
 }
-- 
2.11.0 (Apple Git-81)




Re: [Qemu-devel] [PATCH v3 0/3] three zpci patches

2017-09-05 Thread Yi Min Zhao

Thank you very much!


在 2017/9/5 下午7:58, Cornelia Huck 写道:

On Tue,  5 Sep 2017 12:12:57 +0200
Yi Min Zhao  wrote:


This patch set contains three small zpci patches to fixup different issues.
1) remove zpci idx from msix message, instead we could use PCIDevice's id to
find zpci device in kvm_arch_fixup_msi_route()
2) fixup ind_offset calculation for adapter interrupt routing entry
3) introduce our own iommu_replay callback

Yi Min Zhao (3):
   s390x/pci: remove idx from msix msg data
   s390x/pci: fixup ind_offset of msix routing entry
   s390x/pci: add iommu replay callback

  hw/s390x/s390-pci-bus.c  | 28 +---
  hw/s390x/s390-pci-bus.h  |  2 ++
  hw/s390x/s390-pci-inst.c | 24 
  hw/s390x/s390-pci-stub.c |  3 ++-
  target/s390x/kvm.c   | 14 --
  5 files changed, 29 insertions(+), 42 deletions(-)


Thanks, applied.







Re: [Qemu-devel] [PATCH 0/3] iotests: cure s390x failures by switching to ccw

2017-09-06 Thread Yi Min Zhao



在 2017/9/6 下午3:59, Cornelia Huck 写道:

On Wed, 6 Sep 2017 14:57:48 +0800
QingFeng Hao  wrote:


在 2017/9/5 23:16, Cornelia Huck 写道:

Recent changes in s390x made pci support dependant on the zpci cpu
feature, which is not provided on all models (and not on by default).
This means we cannot instatiate pci devices on a standard qemu
invocation for s390x. Moreover, the zpci instructions are not even
wired up for tcg yet, so actually doing anything with those pci devices
is bound to fail on tcg.

Let's follow the existing example in 068 and switch to the (default)
virtio-ccw transport on s390x. The changes for 051 and 067 are split
out as they require adding an output file for s390x (the actual command
lines are part of the output).

We also found this error and YiMin suggested to change the code in ccw_init
as below:

if (pci_available) {
      DeviceState *dev = qdev_create(NULL, TYPE_S390_PCI_HOST_BRIDGE);
      ...
}
We tested it and it can make the 5 cases passed.

OK, looked at this. This won't work: pci_available means "this qemu has
pci support built in". _Working_ zpci, however, depends on the presence
of the zpci feature bit: You'll have a host bridge and can define
devices that have absolutely no chance of working, since all pci
instruction will return errors. You will be in a similar situation
under kvm as under tcg: you can specify virtio-pci devices on the
command line, but they can't work.
Oh. Yes, that makes sense. Actually the first way we thought about was 
change the code

not change the testcases. Thanks for your work.


This probably makes the 5 cases pass as they only rely on the ability
to create the device, not to do anything with them.

So, I still think the right thing to do is to switch to ccw in the
tests (and to wire up pci in tcg, but that's an orthogonal issue).

Agree.








Re: [Qemu-devel] [PATCH 2/7] s390x/pci: rework PCI STORE

2017-11-10 Thread Yi Min Zhao



在 2017/11/10 上午12:50, Cornelia Huck 写道:

On Tue,  7 Nov 2017 18:24:34 +0100
Pierre Morel  wrote:


Enhance the fault detection, correction of the fault reporting.

Signed-off-by: Pierre Morel 
Reviewed-by: Yi Min Zhao 
---
  hw/s390x/s390-pci-inst.c | 41 -
  1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index 8fcb02d..4a2f996 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -469,6 +469,12 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2)
  pcias = (env->regs[r2] >> 16) & 0xf;
  len = env->regs[r2] & 0xf;
  offset = env->regs[r2 + 1];
+data = env->regs[r1];
+
+if (!(fh & FH_MASK_ENABLE)) {

This covers the reserved/standby/disabled states, right?

yes

[...]




Re: [Qemu-devel] [PATCH 6/7] s390x/pci: move the memory region write from pcistg

2017-11-10 Thread Yi Min Zhao



在 2017/11/10 上午3:23, Cornelia Huck 写道:

On Tue,  7 Nov 2017 18:24:38 +0100
Pierre Morel  wrote:


Let's move the memory region write from pcistg into a dedicated
function.
This allows us to prepare a later patch searching for subregions
inside of the memory region.

OK, so here is the memory region write. Do we have any sleeping
endianness bugs in there for when we wire up tcg? I'm not sure how this
plays with the bswaps (see patch 1).

But maybe I've just gotten lost somewhere.

I think there's no error. For PCI bars' MRs, we got the little-endian data
that is exactly fit to the byte ordering of pcilg instruction. For PCI 
config

space, the data has been swapped according to the cpu byte ordering.
So we use zpci_swap_endian() to swap the data back to the little-endian
ordering.



Signed-off-by: Pierre Morel 
Reviewed-by: Yi Min Zhao 
---
  hw/s390x/s390-pci-inst.c | 27 +--
  1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index 50135a0..97f62b5 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -455,12 +455,27 @@ static int trap_msix(S390PCIBusDevice *pbdev, uint64_t 
offset, uint8_t pcias)
  }
  }
  
+static MemTxResult zpci_write_bar(S390PCIBusDevice *pbdev, uint8_t pcias,

+  uint64_t offset, uint64_t data, uint8_t len)
+{
+MemoryRegion *mr;
+
+if (trap_msix(pbdev, offset, pcias)) {
+offset = offset - pbdev->msix.table_offset;
+mr = &pbdev->pdev->msix_table_mmio;
+} else {
+mr = pbdev->pdev->io_regions[pcias].memory;
+}
+
+return memory_region_dispatch_write(mr, offset, data, len,
+MEMTXATTRS_UNSPECIFIED);
+}
+
  int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t r2)
  {
  CPUS390XState *env = &cpu->env;
  uint64_t offset, data;
  S390PCIBusDevice *pbdev;
-MemoryRegion *mr;
  MemTxResult result;
  uint8_t len;
  uint32_t fh;
@@ -517,15 +532,7 @@ int pcistg_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r2)
  return 0;
  }
  
-if (trap_msix(pbdev, offset, pcias)) {

-offset = offset - pbdev->msix.table_offset;
-mr = &pbdev->pdev->msix_table_mmio;
-} else {
-mr = pbdev->pdev->io_regions[pcias].memory;
-}
-
-result = memory_region_dispatch_write(mr, offset, data, len,
- MEMTXATTRS_UNSPECIFIED);
+result = zpci_write_bar(pbdev, pcias, offset, data, len);
  if (result != MEMTX_OK) {
  program_interrupt(env, PGM_OPERAND, 4);
  return 0;







[Qemu-devel] [PATCH] s390x/pci: forbid multifunction pci device

2018-03-13 Thread Yi Min Zhao
Currently we don't support pci multifunction. If a pci with
multifucntion is plugged, the guest will spin forever. This patch fixes
this.

Signed-off-by: Yi Min Zhao 
Reviewed-by: Pierre Morel 
---
 hw/s390x/s390-pci-bus.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 77a50cab36..10da87458e 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -816,6 +816,11 @@ static void s390_pcihost_hot_plug(HotplugHandler 
*hotplug_dev,
 PCIBridge *pb = PCI_BRIDGE(dev);
 PCIDevice *pdev = PCI_DEVICE(dev);
 
+if (pdev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+error_setg(errp, "multifunction not supported in s390");
+return;
+}
+
 pci_bridge_map_irq(pb, dev->id, s390_pci_map_irq);
 pci_setup_iommu(&pb->sec_bus, s390_pci_dma_iommu, s);
 
@@ -835,6 +840,11 @@ static void s390_pcihost_hot_plug(HotplugHandler 
*hotplug_dev,
 } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
 pdev = PCI_DEVICE(dev);
 
+if (pdev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+error_setg(errp, "multifunction not supported in s390");
+return;
+}
+
 if (!dev->id) {
 /* In the case the PCI device does not define an id */
 /* we generate one based on the PCI address */
-- 
2.14.3 (Apple Git-98)




Re: [Qemu-devel] [PATCH] s390x/pci: forbid multifunction pci device

2018-03-13 Thread Yi Min Zhao



在 2018/3/14 下午1:35, Thomas Huth 写道:

On 14.03.2018 06:14, Yi Min Zhao wrote:

Currently we don't support pci multifunction. If a pci with
multifucntion is plugged, the guest will spin forever. This patch fixes
this.

Signed-off-by: Yi Min Zhao 
Reviewed-by: Pierre Morel 
---
  hw/s390x/s390-pci-bus.c | 10 ++
  1 file changed, 10 insertions(+)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 77a50cab36..10da87458e 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -816,6 +816,11 @@ static void s390_pcihost_hot_plug(HotplugHandler 
*hotplug_dev,
  PCIBridge *pb = PCI_BRIDGE(dev);
  PCIDevice *pdev = PCI_DEVICE(dev);

Off-topic: That "PCIDevice *pdev" shadows the pdev variable that is
declared at the beginning of this function. So I think we should rather
change the above line into "pdev = PCI_DEVICE(dev)" instead, without
re-declaring a variable here. (i.e. we should do this in a separate
patch later...).
Thanks for your reminder. Actually I have noticed this. But I thought 
this is not very urgent.

I will do this later.



+if (pdev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+error_setg(errp, "multifunction not supported in s390");
+return;
+}
+
  pci_bridge_map_irq(pb, dev->id, s390_pci_map_irq);
  pci_setup_iommu(&pb->sec_bus, s390_pci_dma_iommu, s);
  
@@ -835,6 +840,11 @@ static void s390_pcihost_hot_plug(HotplugHandler *hotplug_dev,

  } else if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_DEVICE)) {
  pdev = PCI_DEVICE(dev);
  
+if (pdev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {

+error_setg(errp, "multifunction not supported in s390");
+return;
+}
+
  if (!dev->id) {
  /* In the case the PCI device does not define an id */
  /* we generate one based on the PCI address */


Reviewed-by: Thomas Huth 







[Qemu-devel] [PATCH 0/1] Bug: Sandbox: libvirt breakdowns qemu guest

2018-05-06 Thread Yi Min Zhao
1. Problem Description
==
If QEMU is built without seccomp support, 'elevatorprivileges' remains compiled.
This option of sandbox is treated as an indication for seccomp blacklist support
in libvirt. This behavior is introduced by the libvirt commits 31ca6a5 and
3527f9d. It would make libvirt build wrong QEMU cmdline, and then the guest
startup would fail.

2. Libvirt Log
==
qemu-system-s390x: -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,\
resourcecontrol=deny: seccomp support is disabled

3. Fixup

Wrap the options except 'enable' for qemu_sandbox_opts by CONFIG_SECCOMP.

Yi Min Zhao (1):
  sandbox: avoid to compile options if CONFIG_SECCOMP undefined

 vl.c | 2 ++
 1 file changed, 2 insertions(+)

-- 
2.15.1 (Apple Git-101)




  1   2   >