@@ -977,6 +977,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev);
PCIDevice *pdev = NULL;
S390PCIBusDevice *pbdev = NULL;
+ int ret;
if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
PCIBridge *pb = PCI_BRIDGE(dev);
@@ -1027,6 +1028,20 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev);
/* Fill in CLP information passed via the vfio region */
s390_pci_get_clp_info(pbdev);
+
+ /*
+ * For a relaxed-alignment device, setup the special I/O region
+ * if available. Otherwise, the device cannot be passed through.
+ */
+ ret = 0;
+ if (pbdev->pci_group->zpci_group.fr & CLP_RSP_QPCIG_MASK_RELAXED) {
+ ret = s390_pci_get_zpci_io_region(pbdev);
+ }
+ if (ret) {
+ error_setg(errp, "vfio zPCI I/O region support is mandatory "
+ "for %02x:%02x.%01x", pci_dev_bus_num(pdev),
+ PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
+ }
} else {
pbdev->fh |= FH_SHM_EMUL;
@@ -19,6 +19,7 @@
#include "sysemu/hw_accel.h"
#include "hw/s390x/s390-pci-inst.h"
#include "hw/s390x/s390-pci-bus.h"
+#include "hw/s390x/s390-pci-vfio.h"
#include "hw/s390x/tod.h"
#ifndef DEBUG_S390PCI_INST
@@ -897,6 +898,8 @@ int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t r3, uint64_t gaddr,
ret = pbdev->ops.pcistb(pbdev, cpu, gaddr, ar, pcias, len, offset);
switch (ret) {
+ case -EIO:
+ /* fall through */
case -EINVAL:
s390_program_interrupt(env, PGM_OPERAND, ra);
return 0;
@@ -1386,3 +1389,8 @@ void zpci_assign_default_ops(S390PCIBusDevice *pbdev)
pbdev->ops.pcilg = pcilg_default;
pbdev->ops.pcistb = pcistb_default;
}
+
+void zpci_assign_ops_vfio_io_region(S390PCIBusDevice *pbdev)
+{
+ pbdev->ops.pcistb = s390_pci_vfio_pcistb;
+}
@@ -17,6 +17,7 @@
#include "trace.h"
#include "hw/s390x/s390-pci-bus.h"
#include "hw/s390x/s390-pci-clp.h"
+#include "hw/s390x/s390-pci-inst.h"
#include "hw/s390x/s390-pci-vfio.h"
#include "hw/vfio/pci.h"
#include "hw/vfio/vfio-common.h"
@@ -277,3 +278,110 @@ retry:
return;
}
+
+/*
+ * This function will look for the VFIO_REGION_SUBTYPE_IBM_ZPCI_IO vfio
+ * device region, which is used for performing block I/O operations.
+ */
+int s390_pci_get_zpci_io_region(S390PCIBusDevice *pbdev)
+{
+ VFIOPCIDevice *vfio_pci;
+ VFIODevice *vdev;
+ struct vfio_region_info *info;
+ int ret;
+
+ vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+ vdev = &vfio_pci->vbasedev;
+
+ if (vdev->num_regions < VFIO_PCI_NUM_REGIONS + 1) {
+ return -ENOENT;
+ }
+
+ /* Get the I/O region if it's available */
+ if (vfio_get_dev_region_info(vdev,
+ PCI_VENDOR_ID_IBM |
+ VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+ VFIO_REGION_SUBTYPE_IBM_ZPCI_IO, &info)) {
+ return -ENOENT;
+ }
+
+ /* If the size is unexpectedly small, don't use the region */
+ if (sizeof(*pbdev->io_region) > info->size) {
+ return -EINVAL;
+ }
+
+ pbdev->io_region = g_malloc0(info->size);
+
+ /* Check the header for setup information */
+ ret = pread(vfio_pci->vbasedev.fd, &pbdev->io_region->hdr,
+ sizeof(struct vfio_zpci_io_hdr), info->offset);
+ if (ret != sizeof(struct vfio_zpci_io_hdr)) {
+ g_free(pbdev->io_region);
+ pbdev->io_region = 0;
+ ret = -EINVAL;
+ } else {
+ pbdev->io_region_op_offset = info->offset +
+ offsetof(struct vfio_region_zpci_io, req);
+ /* All devices in this group will use the I/O region for PCISTB */
+ pbdev->pci_group->zpci_group.maxstbl = MIN(PAGE_SIZE,
+ pbdev->io_region->hdr.max);
+ ret = 0;
+ }
+ g_free(info);
+
+ /* Register the new handlers for the device if region available */
+ if (pbdev->io_region) {
+ zpci_assign_ops_vfio_io_region(pbdev);
+ }
+
+ return ret;
+}
+
+int s390_pci_vfio_pcistb(S390PCIBusDevice *pbdev, S390CPU *cpu, uint64_t gaddr,
+ uint8_t ar, uint8_t pcias, uint16_t len,
+ uint64_t offset)
+{
+ struct vfio_region_zpci_io *region = pbdev->io_region;
+ VFIOPCIDevice *vfio_pci;
+ uint8_t *buffer;
+ int ret;
+
+ if (region == NULL) {
+ return -EIO;
+ }
+
+ vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev);
+
+ /*
+ * We've already ensured the input can be no larger than a page. PCISTB
+ * requires that the operation payload does not cross a page boundary,
+ * otherwise the operation will be rejected. Therefore, just get a single
+ * page for the write.
+ */
+ buffer = qemu_memalign(PAGE_SIZE, PAGE_SIZE);
+
+ if (s390_cpu_virt_mem_read(cpu, gaddr, ar, buffer, len)) {
+ ret = -EACCES;
+ goto out;
+ }
+
+ region->req.gaddr = (uint64_t)buffer;
+ region->req.offset = offset;
+ region->req.len = len;
+ region->req.pcias = pcias;
+ region->req.flags = VFIO_ZPCI_IO_FLAG_BLOCKW;
+
+ ret = pwrite(vfio_pci->vbasedev.fd, ®ion->req,
+ sizeof(struct vfio_zpci_io_req),
+ pbdev->io_region_op_offset);
+ if (ret != sizeof(struct vfio_zpci_io_req)) {
+ ret = -EIO;
+ } else {
+ ret = 0;
+ }
+
+out:
+ qemu_vfree(buffer);
+
+ return ret;
+}
@@ -355,6 +355,8 @@ struct S390PCIBusDevice {
uint32_t fh;
uint32_t fid;
bool fid_defined;
+ uint64_t io_region_op_offset;
+ struct vfio_region_zpci_io *io_region;
uint64_t fmb_addr;
ZpciFmb fmb;
QEMUTimer *fmb_timer;
@@ -112,6 +112,7 @@ int stpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar,
uintptr_t ra);
void fmb_timer_free(S390PCIBusDevice *pbdev);
void zpci_assign_default_ops(S390PCIBusDevice *pbdev);
+void zpci_assign_ops_vfio_io_region(S390PCIBusDevice *pbdev);
#define ZPCI_IO_BAR_MIN 0
#define ZPCI_IO_BAR_MAX 5
@@ -21,6 +21,10 @@ S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
S390PCIBusDevice *pbdev);
void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt);
void s390_pci_get_clp_info(S390PCIBusDevice *pbdev);
+int s390_pci_get_zpci_io_region(S390PCIBusDevice *pbdev);
+int s390_pci_vfio_pcistb(S390PCIBusDevice *pbdev, S390CPU *cpu, uint64_t gaddr,
+ uint8_t ar, uint8_t pcias, uint16_t len,
+ uint64_t offset);
#else
static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail)
{
@@ -34,6 +38,17 @@ static inline S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
static inline void s390_pci_end_dma_count(S390pciState *s,
S390PCIDMACount *cnt) { }
static inline void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) { }
+static inline int s390_pci_get_zpci_io_region(S390PCIBusDevice *pbdev)
+{
+ return -EINVAL;
+}
+static inline int s390_pci_vfio_pcistb(S390PCIBusDevice *pbdev, S390CPU *cpu,
+ uint64_t gaddr, uint8_t ar,
+ uint8_t pcias, uint16_t len,
+ uint64_t offset)
+{
+ return -EIO;
+}
#endif
#endif
For ISM devices, use the vfio region to handle intercepted PCISTB instructions. This region will allow large block I/O instructions intercepted from the guest to be performed as a single I/O instruction on the host. This ensure that proper write patterns that are expected by the underlying device are respected and ensures that a non-MIO instruction is used to perform the operation (as ISM devices do not support the MIO instruction set). Furthermore, add a requirement that the I/O region must be available in order to pass the device through to the guest. Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> --- hw/s390x/s390-pci-bus.c | 15 ++++++ hw/s390x/s390-pci-inst.c | 8 +++ hw/s390x/s390-pci-vfio.c | 108 +++++++++++++++++++++++++++++++++++++++ include/hw/s390x/s390-pci-bus.h | 2 + include/hw/s390x/s390-pci-inst.h | 1 + include/hw/s390x/s390-pci-vfio.h | 15 ++++++ 6 files changed, 149 insertions(+)