* Cornelia Huck <cornelia.h...@de.ibm.com> [2017-02-20 19:31:13 +0100]:
> On Fri, 17 Feb 2017 09:29:28 +0100 > Dong Jia Shi <bjsdj...@linux.vnet.ibm.com> wrote: > > > To make vfio support subchannel devices, we need a css driver for > > the vfio subchannels. This patch adds a basic vfio-ccw subchannel > > driver for this purpose. > > > > To enable VFIO for vfio-ccw, enable S390_CCW_IOMMU config option > > and configure VFIO as required. > > > > Signed-off-by: Dong Jia Shi <bjsdj...@linux.vnet.ibm.com> > > Acked-by: Pierre Morel <pmo...@linux.vnet.ibm.com> > > --- > > arch/s390/Kconfig | 10 ++ > > arch/s390/include/asm/isc.h | 1 + > > drivers/iommu/Kconfig | 8 ++ > > drivers/s390/cio/Makefile | 3 + > > drivers/s390/cio/vfio_ccw_drv.c | 262 > > ++++++++++++++++++++++++++++++++++++ > > drivers/s390/cio/vfio_ccw_private.h | 25 ++++ > > 6 files changed, 309 insertions(+) > > create mode 100644 drivers/s390/cio/vfio_ccw_drv.c > > create mode 100644 drivers/s390/cio/vfio_ccw_private.h > > > > diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig > > index c6722112..b920df8 100644 > > --- a/arch/s390/Kconfig > > +++ b/arch/s390/Kconfig > > @@ -670,6 +670,16 @@ config EADM_SCH > > To compile this driver as a module, choose M here: the > > module will be called eadm_sch. > > > > +config VFIO_CCW > > + def_tristate n > > + prompt "Support for VFIO-CCW subchannels" > > + depends on S390_CCW_IOMMU && VFIO > > + help > > + This driver allows usage of VFIO-CCW subchannels. > > Hm... > > "This driver allows usage of I/O subchannels via VFIO-CCW." > > ? > This is better. Will change. > > + > > + To compile this driver as a module, choose M here: the > > + module will be called vfio_ccw. > > + > > endmenu > > > > menu "Dump support" > > diff --git a/arch/s390/include/asm/isc.h b/arch/s390/include/asm/isc.h > > index 68d7d68..8a0b721 100644 > > --- a/arch/s390/include/asm/isc.h > > +++ b/arch/s390/include/asm/isc.h > > @@ -16,6 +16,7 @@ > > #define CONSOLE_ISC 1 /* console I/O subchannel */ > > #define EADM_SCH_ISC 4 /* EADM subchannels */ > > #define CHSC_SCH_ISC 7 /* CHSC subchannels */ > > +#define VFIO_CCW_ISC IO_SCH_ISC /* VFIO-CCW I/O subchannels */ > > This is OK for now, I guess; but do we want to have the isc > configurable in the long run? I.e., if a host wants to run its own I/O > devices at a different priority than the devices it passes to a guest? > I think we can keep this as the default value, and provide a driver param to customize the ISC value in the future once we need this. I put this on my LATER list, or I do it in next version? > > /* Adapter interrupts. */ > > #define QDIO_AIRQ_ISC IO_SCH_ISC /* I/O subchannel in qdio mode */ > > #define PCI_ISC 2 /* PCI I/O subchannels */ > > (...) > > > diff --git a/drivers/s390/cio/vfio_ccw_drv.c > > b/drivers/s390/cio/vfio_ccw_drv.c > > new file mode 100644 > > index 0000000..b068207 > > --- /dev/null > > +++ b/drivers/s390/cio/vfio_ccw_drv.c > > @@ -0,0 +1,262 @@ > > +/* > > + * VFIO based Physical Subchannel device driver > > + * > > + * Copyright IBM Corp. 2017 > > + * > > + * Author(s): Dong Jia Shi <bjsdj...@linux.vnet.ibm.com> > > + * Xiao Feng Ren <renxi...@linux.vnet.ibm.com> > > + */ > > + > > +#include <linux/module.h> > > +#include <linux/init.h> > > +#include <linux/device.h> > > +#include <linux/slab.h> > > + > > +#include <asm/isc.h> > > + > > +#include "vfio_ccw_private.h" > > + > > +/* > > + * Helpers > > + */ > > +static int vfio_ccw_sch_quiesce(struct subchannel *sch) > > +{ > > + struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); > > + DECLARE_COMPLETION_ONSTACK(completion); > > + int iretry, ret = 0; > > + > > + spin_lock_irq(sch->lock); > > + if (!sch->schib.pmcw.ena) > > + goto out_unlock; > > + ret = cio_disable_subchannel(sch); > > + if (ret != -EBUSY) > > + goto out_unlock; > > + > > + do { > > + iretry = 255; > > + > > + ret = cio_cancel_halt_clear(sch, &iretry); > > + while (ret == -EBUSY) { > > + /* > > + * Flushing all I/O and wait the > > "Flush all I/O and wait for..." > Ok. > > + * cancel/halt/clear completion. > > + */ > > + private->completion = &completion; > > + spin_unlock_irq(sch->lock); > > + > > + wait_for_completion(&completion); > > What happens for cancel? It won't generate an interrupt. > Right! How about using: wait_for_completion_timeout(&completion, 3*HZ); (I stole '3*HZ' from ccw_device_kill_io.) > > + > > + spin_lock_irq(sch->lock); > > + private->completion = NULL; > > + ret = cio_cancel_halt_clear(sch, &iretry); > > + }; > > + > > + ret = cio_disable_subchannel(sch); > > + } while (ret == -EBUSY); > > + > > +out_unlock: > > + spin_unlock_irq(sch->lock); > > + return ret; > > +} > > + > > +/* > > + * Sysfs interfaces > > + */ > > +static ssize_t chpids_show(struct device *dev, > > + struct device_attribute *attr, > > + char *buf) > > +{ > > + struct subchannel *sch = to_subchannel(dev); > > + struct chsc_ssd_info *ssd = &sch->ssd_info; > > + ssize_t ret = 0; > > + int chp; > > + int mask; > > + > > + for (chp = 0; chp < 8; chp++) { > > + mask = 0x80 >> chp; > > + if (ssd->path_mask & mask) > > + ret += sprintf(buf + ret, "%02x ", ssd->chpid[chp].id); > > + else > > + ret += sprintf(buf + ret, "00 "); > > + } > > + ret += sprintf(buf+ret, "\n"); > > + return ret; > > +} > > + > > +static ssize_t pimpampom_show(struct device *dev, > > + struct device_attribute *attr, > > + char *buf) > > +{ > > + struct subchannel *sch = to_subchannel(dev); > > + struct pmcw *pmcw = &sch->schib.pmcw; > > + > > + return sprintf(buf, "%02x %02x %02x\n", > > + pmcw->pim, pmcw->pam, pmcw->pom); > > +} > > + > > +static DEVICE_ATTR(chpids, 0444, chpids_show, NULL); > > +static DEVICE_ATTR(pimpampom, 0444, pimpampom_show, NULL); > > Quick question: You need to duplicate these so that lscss shows a sane > output for vfio-ccw subchannels? > Yes, and also for retrieving these values in QEMU. > > + > > +static struct attribute *vfio_subchannel_attrs[] = { > > + &dev_attr_chpids.attr, > > + &dev_attr_pimpampom.attr, > > + NULL, > > +}; > > + > > +static struct attribute_group vfio_subchannel_attr_group = { > > + .attrs = vfio_subchannel_attrs, > > +}; > > + > > +/* > > + * Css driver callbacks > > + */ > > +static void vfio_ccw_sch_irq(struct subchannel *sch) > > +{ > > + struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); > > + > > + inc_irq_stat(IRQIO_CIO); > > + > > + if (!private) > > + return; > > + > > + if (private->completion) > > + complete(private->completion); > > +} > > + > > +static int vfio_ccw_sch_probe(struct subchannel *sch) > > +{ > > + struct pmcw *pmcw = &sch->schib.pmcw; > > + struct vfio_ccw_private *private; > > + int ret; > > + > > + if (pmcw->qf) { > > + dev_warn(&sch->dev, "vfio: ccw: do not support QDIO: %s\n", > > s/do/does/ > Ok. > > + dev_name(&sch->dev)); > > + return -ENOTTY; > > Is -ENOTTY the right return code here? -EINVAL? > Ok. Think it again. -EINVAL makes more sense. It's like: "hey, I know it's an I/O subchannel, but not the kind we support". > > + } > > + > > + private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA); > > + if (!private) > > + return -ENOMEM; > > + private->sch = sch; > > + dev_set_drvdata(&sch->dev, private); > > + > > + spin_lock_irq(sch->lock); > > + sch->isc = VFIO_CCW_ISC; > > + ret = cio_enable_subchannel(sch, (u32)(unsigned long)sch); > > + spin_unlock_irq(sch->lock); > > + if (ret) > > + goto out_free; > > + > > + ret = sysfs_create_group(&sch->dev.kobj, &vfio_subchannel_attr_group); > > + if (ret) > > + goto out_disable; > > + > > + return 0; > > + > > +out_disable: > > + cio_disable_subchannel(sch); > > +out_free: > > + dev_set_drvdata(&sch->dev, NULL); > > + kfree(private); > > + return ret; > > +} > > + > > (...) > > > +/** > > + * vfio_ccw_sch_event - process subchannel event > > + * @sch: subchannel > > + * @process: non-zero if function is called in process context > > + * > > + * An unspecified event occurred for this subchannel. Adjust data according > > + * to the current operational state of the subchannel. Return zero when the > > + * event has been handled sufficiently or -EAGAIN when this function should > > + * be called again in process context. > > + */ > > +static int vfio_ccw_sch_event(struct subchannel *sch, int process) > > +{ > > + unsigned long flags; > > + > > + spin_lock_irqsave(sch->lock, flags); > > + if (!device_is_registered(&sch->dev)) > > + goto out_unlock; > > + > > + if (work_pending(&sch->todo_work)) > > + goto out_unlock; > > + > > + if (cio_update_schib(sch)) { > > + /* Not operational. */ > > + css_sched_sch_todo(sch, SCH_TODO_UNREG); > > + > > + /* > > + * TODO: > > + * Probably we should send the machine check to the guest. > > Yes, we should do that later on. Will user space notice that the device > is gone? (I think crw injection should be done by user space.) > Currently we lack this mechanism. I think there are many todos here. I will investigate latter. > > + */ > > + goto out_unlock; > > + } > > + > > +out_unlock: > > + spin_unlock_irqrestore(sch->lock, flags); > > + > > + return 0; > > +} > > (...) > > Looks sane in general from my POV. Thanks for the comments. -- Dong Jia