@@ -23,6 +23,7 @@ s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
's390-skeys-kvm.c',
's390-stattrib-kvm.c',
'pv.c',
+ 's390-pci-kvm.c',
))
s390x_ss.add(when: 'CONFIG_TCG', if_true: files(
'tod-tcg.c',
@@ -16,6 +16,7 @@
#include "qapi/visitor.h"
#include "hw/s390x/s390-pci-bus.h"
#include "hw/s390x/s390-pci-inst.h"
+#include "hw/s390x/s390-pci-kvm.h"
#include "hw/s390x/s390-pci-vfio.h"
#include "hw/pci/pci_bus.h"
#include "hw/qdev-properties.h"
@@ -971,12 +972,45 @@ static void s390_pci_update_subordinate(PCIDevice *dev, uint32_t nr)
}
}
+static int s390_pci_interp_plug(S390pciState *s, S390PCIBusDevice *pbdev)
+{
+ uint32_t idx;
+ int rc;
+
+ rc = s390_pci_kvm_plug(pbdev);
+ if (rc) {
+ return rc;
+ }
+
+ /* Next, see if the idx is already in-use */
+ idx = pbdev->fh & FH_MASK_INDEX;
+ if (pbdev->idx != idx) {
+ if (s390_pci_find_dev_by_idx(s, idx)) {
+ return -EINVAL;
+ }
+ /*
+ * Update the idx entry with the passed through idx
+ * If the relinquished idx is lower than next_idx, use it
+ * to replace next_idx
+ */
+ g_hash_table_remove(s->zpci_table, &pbdev->idx);
+ if (idx < s->next_idx) {
+ s->next_idx = idx;
+ }
+ pbdev->idx = idx;
+ g_hash_table_insert(s->zpci_table, &pbdev->idx, pbdev);
+ }
+
+ return 0;
+}
+
static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
Error **errp)
{
S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev);
PCIDevice *pdev = NULL;
S390PCIBusDevice *pbdev = NULL;
+ int rc;
if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
PCIBridge *pb = PCI_BRIDGE(dev);
@@ -1022,12 +1056,35 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
set_pbdev_info(pbdev);
if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
- pbdev->fh |= FH_SHM_VFIO;
+ /*
+ * By default, interpretation is always requested; if the available
+ * facilities indicate it is not available, fallback to the
+ * interception model.
+ */
+ if (pbdev->interp) {
+ if (s390_pci_kvm_zpciop_allowed()) {
+ rc = s390_pci_interp_plug(s, pbdev);
+ if (rc) {
+ error_setg(errp, "Plug failed for zPCI device in "
+ "interpretation mode: %d", rc);
+ return;
+ }
+ } else {
+ DPRINTF("zPCI interpretation facilities missing.\n");
+ pbdev->interp = false;
+ }
+ }
pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev);
/* Fill in CLP information passed via the vfio region */
s390_pci_get_clp_info(pbdev);
+ if (!pbdev->interp) {
+ /* Do vfio passthrough but intercept for I/O */
+ pbdev->fh |= FH_SHM_VFIO;
+ }
} else {
pbdev->fh |= FH_SHM_EMUL;
+ /* Always intercept emulated devices */
+ pbdev->interp = false;
}
if (s390_pci_msix_init(pbdev)) {
@@ -1078,6 +1135,8 @@ static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
pbdev->pdev = NULL;
pbdev->state = ZPCI_FS_RESERVED;
} else if (object_dynamic_cast(OBJECT(dev), TYPE_S390_PCI_DEVICE)) {
+ int rc;
+
pbdev = S390_PCI_DEVICE(dev);
pbdev->fid = 0;
QTAILQ_REMOVE(&s->zpci_devs, pbdev, link);
@@ -1085,6 +1144,11 @@ static void s390_pcihost_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
if (pbdev->iommu->dma_limit) {
s390_pci_end_dma_count(s, pbdev->iommu->dma_limit);
}
+ rc = s390_pci_kvm_unplug(pbdev);
+ if (rc) {
+ error_setg(errp, "Unplug failed for zPCI device in interpretation "
+ "mode rc=%d", rc);
+ }
qdev_unrealize(dev);
}
}
@@ -1360,6 +1424,7 @@ static Property s390_pci_device_properties[] = {
DEFINE_PROP_UINT16("uid", S390PCIBusDevice, uid, UID_UNDEFINED),
DEFINE_PROP_S390_PCI_FID("fid", S390PCIBusDevice, fid),
DEFINE_PROP_STRING("target", S390PCIBusDevice, target),
+ DEFINE_PROP_BOOL("interpret", S390PCIBusDevice, interp, true),
DEFINE_PROP_END_OF_LIST(),
};
@@ -18,6 +18,8 @@
#include "sysemu/hw_accel.h"
#include "hw/s390x/s390-pci-inst.h"
#include "hw/s390x/s390-pci-bus.h"
+#include "hw/s390x/s390-pci-kvm.h"
+#include "hw/s390x/s390-pci-vfio.h"
#include "hw/s390x/tod.h"
#ifndef DEBUG_S390PCI_INST
@@ -156,6 +158,37 @@ out:
return rc;
}
+static int clp_enable_interp(S390PCIBusDevice *pbdev)
+{
+ int rc;
+
+ rc = s390_pci_kvm_interp_enable(pbdev);
+ if (rc) {
+ DPRINTF("Failed to enable interpretation\n");
+ return rc;
+ }
+
+ if (!(pbdev->fh & FH_MASK_ENABLE)) {
+ DPRINTF("Passthrough handle is not enabled\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int clp_disable_interp(S390PCIBusDevice *pbdev)
+{
+ int rc;
+
+ rc = s390_pci_kvm_interp_disable(pbdev);
+ if (rc) {
+ DPRINTF("Failed to disable interpretation\n");
+ return rc;
+ }
+
+ return 0;
+}
+
int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
{
ClpReqHdr *reqh;
@@ -246,7 +279,19 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
goto out;
}
- pbdev->fh |= FH_MASK_ENABLE;
+ /*
+ * If interpretation is specified, attempt to enable this now and
+ * update with the host fh
+ */
+ if (pbdev->interp) {
+ if (clp_enable_interp(pbdev)) {
+ stw_p(&ressetpci->hdr.rsp, CLP_RC_SETPCIFN_ERR);
+ goto out;
+ }
+ } else {
+ pbdev->fh |= FH_MASK_ENABLE;
+ }
+
pbdev->state = ZPCI_FS_ENABLED;
stl_p(&ressetpci->fh, pbdev->fh);
stw_p(&ressetpci->hdr.rsp, CLP_RC_OK);
@@ -257,6 +302,13 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
goto out;
}
device_legacy_reset(DEVICE(pbdev));
+ if (pbdev->interp) {
+ if (clp_disable_interp(pbdev)) {
+ stw_p(&ressetpci->hdr.rsp, CLP_RC_SETPCIFN_ERR);
+ goto out;
+ }
+ }
+ /* Mask off the enabled bit for interpreted devices too */
pbdev->fh &= ~FH_MASK_ENABLE;
pbdev->state = ZPCI_FS_DISABLED;
stl_p(&ressetpci->fh, pbdev->fh);
new file mode 100644
@@ -0,0 +1,112 @@
+/*
+ * s390 zPCI KVM interfaces
+ *
+ * Copyright 2022 IBM Corp.
+ * Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#include "qemu/osdep.h"
+
+#include <linux/kvm.h>
+
+#include "kvm/kvm_s390x.h"
+#include "hw/s390x/s390-pci-bus.h"
+#include "hw/s390x/s390-pci-kvm.h"
+#include "hw/s390x/s390-pci-vfio.h"
+
+bool s390_pci_kvm_zpciop_allowed(void)
+{
+ return s390_has_feat(S390_FEAT_ZPCI_INTERP) && kvm_s390_get_zpci_op();
+}
+
+int s390_pci_kvm_plug(S390PCIBusDevice *pbdev)
+{
+ int rc;
+
+ struct kvm_s390_zpci_op args = {
+ .op = KVM_S390_ZPCIOP_INIT
+ };
+
+ if (!s390_pci_get_host_fh(pbdev, &args.fh)) {
+ return -EINVAL;
+ }
+
+ rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args);
+ if (!rc) {
+ /*
+ * The host device is already in an enabled state, but we always present
+ * the initial device state to the guest as disabled (ZPCI_FS_DISABLED).
+ * Therefore, mask off the enable bit from the passthrough handle until
+ * the guest issues a CLP SET PCI FN later to enable the device.
+ */
+ pbdev->fh = (args.newfh & ~FH_MASK_ENABLE);
+ }
+
+ return rc;
+}
+
+int s390_pci_kvm_unplug(S390PCIBusDevice *pbdev)
+{
+ struct kvm_s390_zpci_op args = {
+ .fh = pbdev->fh | FH_MASK_ENABLE,
+ .op = KVM_S390_ZPCIOP_END
+ };
+
+ return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args);
+}
+
+int s390_pci_kvm_interp_enable(S390PCIBusDevice *pbdev)
+{
+ uint32_t fh;
+ int rc;
+
+ struct kvm_s390_zpci_op args = {
+ .fh = pbdev->fh | FH_MASK_ENABLE,
+ .op = KVM_S390_ZPCIOP_START_INTERP
+ };
+
+ retry:
+ rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args);
+
+ if (rc == -ENODEV) {
+ /*
+ * If the function wasn't found, re-sync the function handle with vfio
+ * and if a change is detected, retry the operation with the new fh.
+ * This can happen while the device is disabled to the guest due to
+ * vfio-triggered events (e.g. vfio hot reset for ISM during plug)
+ */
+ if (!s390_pci_get_host_fh(pbdev, &fh)) {
+ return -EINVAL;
+ }
+ if (fh != args.fh) {
+ args.fh = fh;
+ goto retry;
+ }
+ }
+ if (!rc) {
+ pbdev->fh = args.newfh;
+ }
+
+ return rc;
+}
+
+int s390_pci_kvm_interp_disable(S390PCIBusDevice *pbdev)
+{
+ int rc;
+
+ struct kvm_s390_zpci_op args = {
+ .fh = pbdev->fh,
+ .op = KVM_S390_ZPCIOP_STOP_INTERP
+ };
+
+ rc = kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args);
+ if (!rc) {
+ pbdev->fh = args.newfh;
+ }
+
+ return rc;
+}
@@ -350,6 +350,7 @@ struct S390PCIBusDevice {
IndAddr *indicator;
bool pci_unplug_request_processed;
bool unplug_requested;
+ bool interp;
QTAILQ_ENTRY(S390PCIBusDevice) link;
};
new file mode 100644
@@ -0,0 +1,46 @@
+/*
+ * s390 PCI KVM interfaces
+ *
+ * Copyright 2022 IBM Corp.
+ * Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+
+#ifndef HW_S390_PCI_KVM_H
+#define HW_S390_PCI_KVM_H
+
+#include "hw/s390x/s390-pci-bus.h"
+
+#ifdef CONFIG_KVM
+bool s390_pci_kvm_zpciop_allowed(void);
+int s390_pci_kvm_plug(S390PCIBusDevice *pbdev);
+int s390_pci_kvm_unplug(S390PCIBusDevice *pbdev);
+int s390_pci_kvm_interp_enable(S390PCIBusDevice *pbdev);
+int s390_pci_kvm_interp_disable(S390PCIBusDevice *pbdev);
+#else
+static inline bool s390_pci_kvm_zpciop_allowed(void)
+{
+ return false;
+}
+static inline int s390_pci_kvm_plug(S390PCIBusDevice *pbdev)
+{
+ return -EINVAL;
+}
+static inline int s390_pci_kvm_unplug(S390PCIBusDevice *pbdev)
+{
+ return -EINVAL;
+}
+static inline int s390_pci_kvm_interp_enable(S390PCIBusDevice *pbdev)
+{
+ return -EINVAL;
+}
+static inline int s390_pci_kvm_interp_enable(S390PCIBusDevice *pbdev)
+{
+ return -EINVAL;
+}
+#endif
+
+#endif
@@ -157,6 +157,7 @@ static int cap_ri;
static int cap_hpage_1m;
static int cap_vcpu_resets;
static int cap_protected;
+static int cap_zpci_op;
static int active_cmma;
@@ -358,6 +359,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ);
cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS);
cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED);
+ cap_zpci_op = kvm_check_extension(s, KVM_CAP_S390_ZPCI_OP);
kvm_vm_enable_cap(s, KVM_CAP_S390_USER_SIGP, 0);
kvm_vm_enable_cap(s, KVM_CAP_S390_VECTOR_REGISTERS, 0);
@@ -2567,3 +2569,8 @@ bool kvm_arch_cpu_check_are_resettable(void)
{
return true;
}
+
+int kvm_s390_get_zpci_op(void)
+{
+ return cap_zpci_op;
+}
@@ -27,6 +27,7 @@ void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu);
int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu);
int kvm_s390_get_hpage_1m(void);
int kvm_s390_get_ri(void);
+int kvm_s390_get_zpci_op(void);
int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock);
int kvm_s390_get_clock_ext(uint8_t *tod_high, uint64_t *tod_clock);
int kvm_s390_set_clock(uint8_t tod_high, uint64_t tod_clock);
Use the associated kvm ioctl to enable interpretation for devices when requested. As part of this process, we must use the host function handle rather than a QEMU-generated one -- we use an initial value from vfio CLP and maintain an updated fh value from kvm ioctl response info. By default, unless interpret=off is specified, interpretation support will always be assumed and exploited if the necessary ioctl and features are available on the host kernel. When these are unavailable, we will silently revert to the interception model; this allows existing guest configurations to work unmodified on hosts with and without zPCI interpretation support, allowing QEMU to choose the best support model available. Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com> --- hw/s390x/meson.build | 1 + hw/s390x/s390-pci-bus.c | 67 ++++++++++++++++++- hw/s390x/s390-pci-inst.c | 54 ++++++++++++++- hw/s390x/s390-pci-kvm.c | 112 ++++++++++++++++++++++++++++++++ include/hw/s390x/s390-pci-bus.h | 1 + include/hw/s390x/s390-pci-kvm.h | 46 +++++++++++++ target/s390x/kvm/kvm.c | 7 ++ target/s390x/kvm/kvm_s390x.h | 1 + 8 files changed, 287 insertions(+), 2 deletions(-) create mode 100644 hw/s390x/s390-pci-kvm.c create mode 100644 include/hw/s390x/s390-pci-kvm.h