On Mon, 14 Sep 2020 18:29:29 -0400 Matthew Rosato <mjros...@linux.ibm.com> wrote:
> When an s390 guest is using lazy unmapping, it can result in a very > large number of oustanding DMA requests, far beyond the default > limit configured for vfio. Let's track DMA usage similar to vfio > in the host, and trigger the guest to flush their DMA mappings > before vfio runs out. > > Signed-off-by: Matthew Rosato <mjros...@linux.ibm.com> > --- > hw/s390x/s390-pci-bus.c | 99 > +++++++++++++++++++++++++++++++++++++++++++++--- > hw/s390x/s390-pci-bus.h | 9 +++++ > hw/s390x/s390-pci-inst.c | 29 +++++++++++--- > hw/s390x/s390-pci-inst.h | 3 ++ > 4 files changed, 129 insertions(+), 11 deletions(-) > (...) > @@ -737,6 +742,82 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus > *bus, int32_t devfn) > object_unref(OBJECT(iommu)); > } > > +static bool s390_sync_dma_avail(int fd, unsigned int *avail) Not sure I like the name. It sounds like the function checks whether "sync dma" is available. Maybe s390_update_dma_avail()? > +{ > + struct vfio_iommu_type1_info *info; > + uint32_t argsz; > + bool rval = false; > + int ret; > + > + if (avail == NULL) { > + return false; > + } > + > + argsz = sizeof(struct vfio_iommu_type1_info); > + info = g_malloc0(argsz); > + info->argsz = argsz; > + /* > + * If the specified argsz is not large enough to contain all > + * capabilities it will be updated upon return. In this case > + * use the updated value to get the entire capability chain. > + */ > + ret = ioctl(fd, VFIO_IOMMU_GET_INFO, info); > + if (argsz != info->argsz) { > + argsz = info->argsz; > + info = g_realloc(info, argsz); > + info->argsz = argsz; > + ret = ioctl(fd, VFIO_IOMMU_GET_INFO, info); > + } > + > + if (ret) { > + goto out; > + } > + > + /* If the capability exists, update with the current value */ > + rval = vfio_get_info_dma_avail(info, avail); Adding vfio specific things into the generic s390 pci emulation code looks a bit ugly... I'd prefer to factor that out into a separate file, especially if you plan to add more vfio-specific things in the future. > + > +out: > + g_free(info); > + return rval; > +} > + (...) > diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c > index 2f7a7d7..6af9af4 100644 > --- a/hw/s390x/s390-pci-inst.c > +++ b/hw/s390x/s390-pci-inst.c > @@ -32,6 +32,9 @@ > } \ > } while (0) > > +#define INC_DMA_AVAIL(iommu) if (iommu->dma_limit) iommu->dma_limit->avail++; > +#define DEC_DMA_AVAIL(iommu) if (iommu->dma_limit) iommu->dma_limit->avail--; Hm... maybe lowercase inline functions might be better here? > + > static void s390_set_status_code(CPUS390XState *env, > uint8_t r, uint64_t status_code) > { (...) > @@ -620,6 +629,7 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t > r2, uintptr_t ra) > S390PCIIOMMU *iommu; > S390IOTLBEntry entry; > hwaddr start, end; > + uint32_t dma_avail; > > if (env->psw.mask & PSW_MASK_PSTATE) { > s390_program_interrupt(env, PGM_PRIVILEGED, ra); > @@ -675,8 +685,9 @@ int rpcit_service_call(S390CPU *cpu, uint8_t r1, uint8_t > r2, uintptr_t ra) > } > > start += entry.len; > - while (entry.iova < start && entry.iova < end) { > - s390_pci_update_iotlb(iommu, &entry); > + dma_avail = 1; /* Assume non-zero dma_avail to start */ > + while (entry.iova < start && entry.iova < end && dma_avail > 0) { > + dma_avail = s390_pci_update_iotlb(iommu, &entry); > entry.iova += PAGE_SIZE; > entry.translated_addr += PAGE_SIZE; > } > @@ -689,7 +700,13 @@ err: > s390_pci_generate_error_event(error, pbdev->fh, pbdev->fid, start, > 0); > } else { > pbdev->fmb.counter[ZPCI_FMB_CNT_RPCIT]++; > - setcc(cpu, ZPCI_PCI_LS_OK); > + if (dma_avail > 0) { When I compile this (with a headers update), the compiler moans here about an uninitialized variable. > + setcc(cpu, ZPCI_PCI_LS_OK); > + } else { > + /* vfio DMA mappings are exhausted, trigger a RPCIT */ > + setcc(cpu, ZPCI_PCI_LS_ERR); > + s390_set_status_code(env, r1, ZPCI_RPCIT_ST_INSUFF_RES); > + } > } > return 0; > } (...)