diff mbox series

[v2,4/9] s390x/pci: enable for load/store intepretation

Message ID 20220114203849.243657-5-mjrosato@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series s390x/pci: zPCI interpretation support | expand

Commit Message

Matthew Rosato Jan. 14, 2022, 8:38 p.m. UTC
Use the associated vfio feature ioctl to enable interpretation for devices
when requested.  As part of this process, we must use the host function
handle rather than a QEMU-generated one -- this is provided as part of the
ioctl payload.

Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
 hw/s390x/s390-pci-bus.c          | 70 +++++++++++++++++++++++++++++++-
 hw/s390x/s390-pci-inst.c         | 63 +++++++++++++++++++++++++++-
 hw/s390x/s390-pci-vfio.c         | 52 ++++++++++++++++++++++++
 include/hw/s390x/s390-pci-bus.h  |  1 +
 include/hw/s390x/s390-pci-vfio.h | 15 +++++++
 5 files changed, 199 insertions(+), 2 deletions(-)

Comments

Thomas Huth Jan. 17, 2022, 2:51 p.m. UTC | #1
On 14/01/2022 21.38, Matthew Rosato wrote:
> Use the associated vfio feature ioctl to enable interpretation for devices
> when requested.  As part of this process, we must use the host function
> handle rather than a QEMU-generated one -- this is provided as part of the
> ioctl payload.
> 
> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ---
>   hw/s390x/s390-pci-bus.c          | 70 +++++++++++++++++++++++++++++++-
>   hw/s390x/s390-pci-inst.c         | 63 +++++++++++++++++++++++++++-
>   hw/s390x/s390-pci-vfio.c         | 52 ++++++++++++++++++++++++
>   include/hw/s390x/s390-pci-bus.h  |  1 +
>   include/hw/s390x/s390-pci-vfio.h | 15 +++++++
>   5 files changed, 199 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
> index 01b58ebc70..a39ccfee05 100644
> --- a/hw/s390x/s390-pci-bus.c
> +++ b/hw/s390x/s390-pci-bus.c
> @@ -971,12 +971,58 @@ static void s390_pci_update_subordinate(PCIDevice *dev, uint32_t nr)
>       }
>   }
>   
> +static int s390_pci_interp_plug(S390pciState *s, S390PCIBusDevice *pbdev)
> +{
> +    uint32_t idx;
> +    int rc;
> +
> +    rc = s390_pci_probe_interp(pbdev);
> +    if (rc) {
> +        return rc;
> +    }
> +
> +    rc = s390_pci_update_passthrough_fh(pbdev);
> +    if (rc) {
> +        return rc;
> +    }
> +
> +    /*
> +     * The host device is already in an enabled state, but we always present
> +     * the initial device state to the guest as disabled (ZPCI_FS_DISABLED).
> +     * Therefore, mask off the enable bit from the passthrough handle until
> +     * the guest issues a CLP SET PCI FN later to enable the device.
> +     */
> +    pbdev->fh &= ~FH_MASK_ENABLE;
> +
> +    /* Next, see if the idx is already in-use */
> +    idx = pbdev->fh & FH_MASK_INDEX;
> +    if (pbdev->idx != idx) {
> +        if (s390_pci_find_dev_by_idx(s, idx)) {
> +            return -EINVAL;
> +        }
> +        /*
> +         * Update the idx entry with the passed through idx
> +         * If the relinquished idx is lower than next_idx, use it
> +         * to replace next_idx
> +         */
> +        g_hash_table_remove(s->zpci_table, &pbdev->idx);
> +        if (idx < s->next_idx) {
> +            s->next_idx = idx;
> +        }
> +        pbdev->idx = idx;
> +        g_hash_table_insert(s->zpci_table, &pbdev->idx, pbdev);
> +    }
> +
> +    return 0;
> +}
> +
>   static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
>                                 Error **errp)
>   {
>       S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev);
>       PCIDevice *pdev = NULL;
>       S390PCIBusDevice *pbdev = NULL;
> +    int rc;
>   
>       if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
>           PCIBridge *pb = PCI_BRIDGE(dev);
> @@ -1022,12 +1068,33 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
>           set_pbdev_info(pbdev);
>   
>           if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> -            pbdev->fh |= FH_SHM_VFIO;
> +            /*
> +             * By default, interpretation is always requested; if the available
> +             * facilities indicate it is not available, fallback to the
> +             * intercept model.
> +             */
> +            if (pbdev->interp && !s390_has_feat(S390_FEAT_ZPCI_INTERP)) {
> +                    DPRINTF("zPCI interpretation facilities missing.\n");
> +                    pbdev->interp = false;
> +                }

Wrong indentation in the above three lines.

> +            if (pbdev->interp) {
> +                rc = s390_pci_interp_plug(s, pbdev);
> +                if (rc) {
> +                    error_setg(errp, "zpci interp plug failed: %d", rc);

The error message is a little bit scarce for something that might be 
presented to the user - maybe write at least "interpretation" instead of 
"interp" ?

> +                    return;
> +                }
> +            }
>               pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev);
>               /* Fill in CLP information passed via the vfio region */
>               s390_pci_get_clp_info(pbdev);
> +            if (!pbdev->interp) {
> +                /* Do vfio passthrough but intercept for I/O */
> +                pbdev->fh |= FH_SHM_VFIO;
> +            }
>           } else {
>               pbdev->fh |= FH_SHM_EMUL;
> +            /* Always intercept emulated devices */
> +            pbdev->interp = false;
>           }

  Thomas
Thomas Huth Jan. 17, 2022, 3:38 p.m. UTC | #2
On 14/01/2022 21.38, Matthew Rosato wrote:
> Use the associated vfio feature ioctl to enable interpretation for devices
> when requested.  As part of this process, we must use the host function
> handle rather than a QEMU-generated one -- this is provided as part of the
> ioctl payload.
> 
> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ---
>   hw/s390x/s390-pci-bus.c          | 70 +++++++++++++++++++++++++++++++-
>   hw/s390x/s390-pci-inst.c         | 63 +++++++++++++++++++++++++++-
>   hw/s390x/s390-pci-vfio.c         | 52 ++++++++++++++++++++++++
>   include/hw/s390x/s390-pci-bus.h  |  1 +
>   include/hw/s390x/s390-pci-vfio.h | 15 +++++++
>   5 files changed, 199 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
> index 01b58ebc70..a39ccfee05 100644
> --- a/hw/s390x/s390-pci-bus.c
> +++ b/hw/s390x/s390-pci-bus.c
...
> @@ -1360,6 +1427,7 @@ static Property s390_pci_device_properties[] = {
>       DEFINE_PROP_UINT16("uid", S390PCIBusDevice, uid, UID_UNDEFINED),
>       DEFINE_PROP_S390_PCI_FID("fid", S390PCIBusDevice, fid),
>       DEFINE_PROP_STRING("target", S390PCIBusDevice, target),
> +    DEFINE_PROP_BOOL("interp", S390PCIBusDevice, interp, true),
>       DEFINE_PROP_END_OF_LIST(),
>   };

Since this is something that the user can see, would it maybe make sense to 
provide a full word instead of an abbreviation here? I.e. "interpret" or 
"interpretation" instead of "interp" ?

  Thomas
Matthew Rosato Jan. 18, 2022, 6:08 p.m. UTC | #3
On 1/17/22 9:51 AM, Thomas Huth wrote:
> On 14/01/2022 21.38, Matthew Rosato wrote:
...
>>   static void s390_pcihost_plug(HotplugHandler *hotplug_dev, 
>> DeviceState *dev,
>>                                 Error **errp)
>>   {
>>       S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev);
>>       PCIDevice *pdev = NULL;
>>       S390PCIBusDevice *pbdev = NULL;
>> +    int rc;
>>       if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
>>           PCIBridge *pb = PCI_BRIDGE(dev);
>> @@ -1022,12 +1068,33 @@ static void s390_pcihost_plug(HotplugHandler 
>> *hotplug_dev, DeviceState *dev,
>>           set_pbdev_info(pbdev);
>>           if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
>> -            pbdev->fh |= FH_SHM_VFIO;
>> +            /*
>> +             * By default, interpretation is always requested; if the 
>> available
>> +             * facilities indicate it is not available, fallback to the
>> +             * intercept model.
>> +             */
>> +            if (pbdev->interp && 
>> !s390_has_feat(S390_FEAT_ZPCI_INTERP)) {
>> +                    DPRINTF("zPCI interpretation facilities 
>> missing.\n");
>> +                    pbdev->interp = false;
>> +                }
> 
> Wrong indentation in the above three lines.

Thanks

> 
>> +            if (pbdev->interp) {
>> +                rc = s390_pci_interp_plug(s, pbdev);
>> +                if (rc) {
>> +                    error_setg(errp, "zpci interp plug failed: %d", rc);
> 
> The error message is a little bit scarce for something that might be 
> presented to the user - maybe write at least "interpretation" instead of 
> "interp" ?
> 
Good point, I'll re-word to something like "Plug failed for zPCI device 
in interpretation mode: %d"

>> +                    return;
>> +                }
>> +            }
>>               pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, 
>> pbdev);
>>               /* Fill in CLP information passed via the vfio region */
>>               s390_pci_get_clp_info(pbdev);
>> +            if (!pbdev->interp) {
>> +                /* Do vfio passthrough but intercept for I/O */
>> +                pbdev->fh |= FH_SHM_VFIO;
>> +            }
>>           } else {
>>               pbdev->fh |= FH_SHM_EMUL;
>> +            /* Always intercept emulated devices */
>> +            pbdev->interp = false;
>>           }
> 
>   Thomas
>
Matthew Rosato Jan. 18, 2022, 6:14 p.m. UTC | #4
On 1/17/22 10:38 AM, Thomas Huth wrote:
> On 14/01/2022 21.38, Matthew Rosato wrote:
>> Use the associated vfio feature ioctl to enable interpretation for 
>> devices
>> when requested.  As part of this process, we must use the host function
>> handle rather than a QEMU-generated one -- this is provided as part of 
>> the
>> ioctl payload.
>>
>> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
>> ---
>>   hw/s390x/s390-pci-bus.c          | 70 +++++++++++++++++++++++++++++++-
>>   hw/s390x/s390-pci-inst.c         | 63 +++++++++++++++++++++++++++-
>>   hw/s390x/s390-pci-vfio.c         | 52 ++++++++++++++++++++++++
>>   include/hw/s390x/s390-pci-bus.h  |  1 +
>>   include/hw/s390x/s390-pci-vfio.h | 15 +++++++
>>   5 files changed, 199 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
>> index 01b58ebc70..a39ccfee05 100644
>> --- a/hw/s390x/s390-pci-bus.c
>> +++ b/hw/s390x/s390-pci-bus.c
> ...
>> @@ -1360,6 +1427,7 @@ static Property s390_pci_device_properties[] = {
>>       DEFINE_PROP_UINT16("uid", S390PCIBusDevice, uid, UID_UNDEFINED),
>>       DEFINE_PROP_S390_PCI_FID("fid", S390PCIBusDevice, fid),
>>       DEFINE_PROP_STRING("target", S390PCIBusDevice, target),
>> +    DEFINE_PROP_BOOL("interp", S390PCIBusDevice, interp, true),
>>       DEFINE_PROP_END_OF_LIST(),
>>   };
> 
> Since this is something that the user can see, would it maybe make sense 
> to provide a full word instead of an abbreviation here? I.e. "interpret" 
> or "interpretation" instead of "interp" ?

I'll go with "interpret" unless someone else has a strong opinion on it.
Pierre Morel Jan. 31, 2022, 9:37 a.m. UTC | #5
On 1/14/22 21:38, Matthew Rosato wrote:
> Use the associated vfio feature ioctl to enable interpretation for devices
> when requested.  As part of this process, we must use the host function
> handle rather than a QEMU-generated one -- this is provided as part of the
> ioctl payload.
> 
> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ---
>   hw/s390x/s390-pci-bus.c          | 70 +++++++++++++++++++++++++++++++-
>   hw/s390x/s390-pci-inst.c         | 63 +++++++++++++++++++++++++++-
>   hw/s390x/s390-pci-vfio.c         | 52 ++++++++++++++++++++++++
>   include/hw/s390x/s390-pci-bus.h  |  1 +
>   include/hw/s390x/s390-pci-vfio.h | 15 +++++++
>   5 files changed, 199 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
> index 01b58ebc70..a39ccfee05 100644
> --- a/hw/s390x/s390-pci-bus.c
> +++ b/hw/s390x/s390-pci-bus.c
> @@ -971,12 +971,58 @@ static void s390_pci_update_subordinate(PCIDevice *dev, uint32_t nr)
>       }
>   }
>   

...snip...

> diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h
> index ff708aef50..42533e38f7 100644
> --- a/include/hw/s390x/s390-pci-vfio.h
> +++ b/include/hw/s390x/s390-pci-vfio.h
> @@ -20,6 +20,9 @@ bool s390_pci_update_dma_avail(int fd, unsigned int *avail);
>   S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
>                                             S390PCIBusDevice *pbdev);
>   void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt);
> +int s390_pci_probe_interp(S390PCIBusDevice *pbdev);
> +int s390_pci_set_interp(S390PCIBusDevice *pbdev, bool enable);
> +int s390_pci_update_passthrough_fh(S390PCIBusDevice *pbdev);
>   void s390_pci_get_clp_info(S390PCIBusDevice *pbdev);
>   #else
>   static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail)
> @@ -33,6 +36,18 @@ static inline S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
>   }
>   static inline void s390_pci_end_dma_count(S390pciState *s,
>                                             S390PCIDMACount *cnt) { }
> +int s390_pci_probe_interp(S390PCIBusDevice *pbdev)

static inline ?

> +{
> +    return -EINVAL;
> +}
> +static inline int s390_pci_set_interp(S390PCIBusDevice *pbdev, bool enable)
> +{
> +    return -EINVAL;
> +}
> +static inline int s390_pci_update_passthrough_fh(S390PCIBusDevice *pbdev)
> +{
> +    return -EINVAL;
> +}
>   static inline void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) { }
>   #endif
>   
>
Pierre Morel Jan. 31, 2022, 2:46 p.m. UTC | #6
On 1/14/22 21:38, Matthew Rosato wrote:
> Use the associated vfio feature ioctl to enable interpretation for devices
> when requested.  As part of this process, we must use the host function
> handle rather than a QEMU-generated one -- this is provided as part of the
> ioctl payload.

I wonder if we should not explain here that having interpretation as a 
default and silently fall back to interception allows backward 
compatibility while allowing performence be chosing by default.
(You can say it better as I do :) )


> 
> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ---
>   hw/s390x/s390-pci-bus.c          | 70 +++++++++++++++++++++++++++++++-
>   hw/s390x/s390-pci-inst.c         | 63 +++++++++++++++++++++++++++-
>   hw/s390x/s390-pci-vfio.c         | 52 ++++++++++++++++++++++++
>   include/hw/s390x/s390-pci-bus.h  |  1 +
>   include/hw/s390x/s390-pci-vfio.h | 15 +++++++
>   5 files changed, 199 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
> index 01b58ebc70..a39ccfee05 100644
> --- a/hw/s390x/s390-pci-bus.c
> +++ b/hw/s390x/s390-pci-bus.c
> @@ -971,12 +971,58 @@ static void s390_pci_update_subordinate(PCIDevice *dev, uint32_t nr)
>       }
>   }
>   
> +static int s390_pci_interp_plug(S390pciState *s, S390PCIBusDevice *pbdev)
> +{
> +    uint32_t idx;
> +    int rc;
> +
> +    rc = s390_pci_probe_interp(pbdev);
> +    if (rc) {
> +        return rc;
> +    }
> +
> +    rc = s390_pci_update_passthrough_fh(pbdev);
> +    if (rc) {
> +        return rc;
> +    }
> +
> +    /*
> +     * The host device is already in an enabled state, but we always present
> +     * the initial device state to the guest as disabled (ZPCI_FS_DISABLED).
> +     * Therefore, mask off the enable bit from the passthrough handle until
> +     * the guest issues a CLP SET PCI FN later to enable the device.
> +     */
> +    pbdev->fh &= ~FH_MASK_ENABLE;
> +
> +    /* Next, see if the idx is already in-use */
> +    idx = pbdev->fh & FH_MASK_INDEX;
> +    if (pbdev->idx != idx) {
> +        if (s390_pci_find_dev_by_idx(s, idx)) {
> +            return -EINVAL;
> +        }
> +        /*
> +         * Update the idx entry with the passed through idx
> +         * If the relinquished idx is lower than next_idx, use it
> +         * to replace next_idx
> +         */
> +        g_hash_table_remove(s->zpci_table, &pbdev->idx);
> +        if (idx < s->next_idx) {
> +            s->next_idx = idx;
> +        }
> +        pbdev->idx = idx;
> +        g_hash_table_insert(s->zpci_table, &pbdev->idx, pbdev);
> +    }
> +
> +    return 0;
> +}
> +
>   static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
>                                 Error **errp)
>   {
>       S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev);
>       PCIDevice *pdev = NULL;
>       S390PCIBusDevice *pbdev = NULL;
> +    int rc;
>   
>       if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
>           PCIBridge *pb = PCI_BRIDGE(dev);
> @@ -1022,12 +1068,33 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
>           set_pbdev_info(pbdev);
>   
>           if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
> -            pbdev->fh |= FH_SHM_VFIO;
> +            /*
> +             * By default, interpretation is always requested; if the available
> +             * facilities indicate it is not available, fallback to the
> +             * intercept model.

s/intercept/interception/ ?

> +             */
> +            if (pbdev->interp && !s390_has_feat(S390_FEAT_ZPCI_INTERP)) {
> +                    DPRINTF("zPCI interpretation facilities missing.\n");
> +                    pbdev->interp = false;
> +                }
> +            if (pbdev->interp) {
> +                rc = s390_pci_interp_plug(s, pbdev);
> +                if (rc) {
> +                    error_setg(errp, "zpci interp plug failed: %d", rc);
> +                    return;
> +                }
> +            }


Can't we rearrange that as
if (pbdev->interp) {
     if (s390_has_feat) {
     } else {
     }
}


>               pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev);
>               /* Fill in CLP information passed via the vfio region */
>               s390_pci_get_clp_info(pbdev);
> +            if (!pbdev->interp) {
> +                /* Do vfio passthrough but intercept for I/O */
> +                pbdev->fh |= FH_SHM_VFIO;
> +            }
>           } else {
>               pbdev->fh |= FH_SHM_EMUL;
> +            /* Always intercept emulated devices */
> +            pbdev->interp = false;
>           }
>   
>           if (s390_pci_msix_init(pbdev)) {
> @@ -1360,6 +1427,7 @@ static Property s390_pci_device_properties[] = {
>       DEFINE_PROP_UINT16("uid", S390PCIBusDevice, uid, UID_UNDEFINED),
>       DEFINE_PROP_S390_PCI_FID("fid", S390PCIBusDevice, fid),
>       DEFINE_PROP_STRING("target", S390PCIBusDevice, target),
> +    DEFINE_PROP_BOOL("interp", S390PCIBusDevice, interp, true),
>       DEFINE_PROP_END_OF_LIST(),
>   };
>   
> diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
> index 6d400d4147..e9a0dc12e4 100644
> --- a/hw/s390x/s390-pci-inst.c
> +++ b/hw/s390x/s390-pci-inst.c
> @@ -18,6 +18,7 @@
>   #include "sysemu/hw_accel.h"
>   #include "hw/s390x/s390-pci-inst.h"
>   #include "hw/s390x/s390-pci-bus.h"
> +#include "hw/s390x/s390-pci-vfio.h"
>   #include "hw/s390x/tod.h"
>   
>   #ifndef DEBUG_S390PCI_INST
> @@ -156,6 +157,47 @@ out:
>       return rc;
>   }
>   
> +static int clp_enable_interp(S390PCIBusDevice *pbdev)
> +{
> +    int rc;
> +
> +    rc = s390_pci_set_interp(pbdev, true);
> +    if (rc) {
> +        DPRINTF("Failed to enable interpretation\n");
> +        return rc;
> +    }
> +    rc = s390_pci_update_passthrough_fh(pbdev);
> +    if (rc) {
> +        DPRINTF("Failed to update passthrough fh\n");
> +        return rc;
> +    }
> +    if (!(pbdev->fh & FH_MASK_ENABLE)) {
> +        DPRINTF("Passthrough handle is not enabled\n");
> +        return -EINVAL;
> +    }
> +
> +    return 0;
> +}
> +
> +static int clp_disable_interp(S390PCIBusDevice *pbdev)
> +{
> +    int rc;
> +
> +    rc = s390_pci_set_interp(pbdev, false);
> +    if (rc) {
> +        DPRINTF("Failed to disable interpretation\n");
> +        return rc;
> +    }
> +
> +    rc = s390_pci_update_passthrough_fh(pbdev);
> +    if (rc) {
> +        DPRINTF("Failed to update passthrough fh\n");
> +        return rc;
> +    }
> +
> +    return 0;
> +}
> +
>   int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
>   {
>       ClpReqHdr *reqh;
> @@ -246,7 +288,19 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
>                   goto out;
>               }
>   
> -            pbdev->fh |= FH_MASK_ENABLE;
> +            /*
> +             * If interpretation is specified, attempt to enable this now and
> +             * update with the host fh
> +             */
> +            if (pbdev->interp) {
> +                if (clp_enable_interp(pbdev)) {
> +                    stw_p(&ressetpci->hdr.rsp, CLP_RC_SETPCIFN_ERR);
> +                    goto out;
> +                }
> +            } else {
> +                pbdev->fh |= FH_MASK_ENABLE;
> +            }
> +
>               pbdev->state = ZPCI_FS_ENABLED;
>               stl_p(&ressetpci->fh, pbdev->fh);
>               stw_p(&ressetpci->hdr.rsp, CLP_RC_OK);
> @@ -257,6 +311,13 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
>                   goto out;
>               }
>               device_legacy_reset(DEVICE(pbdev));
> +            if (pbdev->interp) {
> +                if (clp_disable_interp(pbdev)) {
> +                    stw_p(&ressetpci->hdr.rsp, CLP_RC_SETPCIFN_ERR);
> +                    goto out;
> +                }
> +            }
> +            /* Mask off the enabled bit for interpreted devices too */
>               pbdev->fh &= ~FH_MASK_ENABLE;
>               pbdev->state = ZPCI_FS_DISABLED;
>               stl_p(&ressetpci->fh, pbdev->fh);
> diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
> index 6f80a47e29..2cab3a9e89 100644
> --- a/hw/s390x/s390-pci-vfio.c
> +++ b/hw/s390x/s390-pci-vfio.c
> @@ -97,6 +97,58 @@ void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt)
>       }
>   }
>   
> +int s390_pci_probe_interp(S390PCIBusDevice *pbdev)
> +{
> +    VFIOPCIDevice *vdev = VFIO_PCI(pbdev->pdev);
> +    struct vfio_device_feature feat = {
> +        .argsz = sizeof(struct vfio_device_feature),
> +        .flags = VFIO_DEVICE_FEATURE_PROBE | VFIO_DEVICE_FEATURE_ZPCI_INTERP
> +    };
> +
> +    return ioctl(vdev->vbasedev.fd, VFIO_DEVICE_FEATURE, &feat);
> +}
> +
> +int s390_pci_set_interp(S390PCIBusDevice *pbdev, bool enable)
> +{
> +    VFIOPCIDevice *vdev = VFIO_PCI(pbdev->pdev);
> +    struct vfio_device_zpci_interp *data;
> +    int size = sizeof(struct vfio_device_feature) + sizeof(*data);
> +    g_autofree struct vfio_device_feature *feat = g_malloc0(size);
> +
> +    feat->argsz = size;
> +    feat->flags = VFIO_DEVICE_FEATURE_SET + VFIO_DEVICE_FEATURE_ZPCI_INTERP;
> +
> +    data = (struct vfio_device_zpci_interp *)&feat->data;
> +    if (enable) {
> +        data->flags = VFIO_DEVICE_ZPCI_FLAG_INTERP;
> +    } else {
> +        data->flags = 0;
> +    }
> +
> +    return ioctl(vdev->vbasedev.fd, VFIO_DEVICE_FEATURE, feat);
> +}
> +
> +int s390_pci_update_passthrough_fh(S390PCIBusDevice *pbdev)
> +{
> +    VFIOPCIDevice *vdev = VFIO_PCI(pbdev->pdev);
> +    struct vfio_device_zpci_interp *data;
> +    int size = sizeof(struct vfio_device_feature) + sizeof(*data);
> +    g_autofree struct vfio_device_feature *feat = g_malloc0(size);
> +    int rc;
> +
> +    feat->argsz = size;
> +    feat->flags = VFIO_DEVICE_FEATURE_GET + VFIO_DEVICE_FEATURE_ZPCI_INTERP;
> +
> +    rc = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_FEATURE, feat);
> +    if (rc) {
> +        return rc;
> +    }
> +
> +    data = (struct vfio_device_zpci_interp *)&feat->data;
> +    pbdev->fh = data->fh;
> +    return 0;
> +}
> +
>   static void s390_pci_read_base(S390PCIBusDevice *pbdev,
>                                  struct vfio_device_info *info)
>   {
> diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
> index da3cde2bb4..a9843dfe97 100644
> --- a/include/hw/s390x/s390-pci-bus.h
> +++ b/include/hw/s390x/s390-pci-bus.h
> @@ -350,6 +350,7 @@ struct S390PCIBusDevice {
>       IndAddr *indicator;
>       bool pci_unplug_request_processed;
>       bool unplug_requested;
> +    bool interp;
>       QTAILQ_ENTRY(S390PCIBusDevice) link;
>   };
>   
> diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h
> index ff708aef50..42533e38f7 100644
> --- a/include/hw/s390x/s390-pci-vfio.h
> +++ b/include/hw/s390x/s390-pci-vfio.h
> @@ -20,6 +20,9 @@ bool s390_pci_update_dma_avail(int fd, unsigned int *avail);
>   S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
>                                             S390PCIBusDevice *pbdev);
>   void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt);
> +int s390_pci_probe_interp(S390PCIBusDevice *pbdev);
> +int s390_pci_set_interp(S390PCIBusDevice *pbdev, bool enable);
> +int s390_pci_update_passthrough_fh(S390PCIBusDevice *pbdev);
>   void s390_pci_get_clp_info(S390PCIBusDevice *pbdev);
>   #else
>   static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail)
> @@ -33,6 +36,18 @@ static inline S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
>   }
>   static inline void s390_pci_end_dma_count(S390pciState *s,
>                                             S390PCIDMACount *cnt) { }
> +int s390_pci_probe_interp(S390PCIBusDevice *pbdev)
> +{
> +    return -EINVAL;
> +}
> +static inline int s390_pci_set_interp(S390PCIBusDevice *pbdev, bool enable)
> +{
> +    return -EINVAL;
> +}
> +static inline int s390_pci_update_passthrough_fh(S390PCIBusDevice *pbdev)
> +{
> +    return -EINVAL;
> +}
>   static inline void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) { }
>   #endif
>   
> 

LGTM
With the corrections proposed by Thomas.
Mine... you see what you prefer.

Reviewed-by: Pierre Morel <pmorel@linux.ibm.com>
Matthew Rosato Jan. 31, 2022, 5:11 p.m. UTC | #7
On 1/31/22 9:46 AM, Pierre Morel wrote:
> 
> 
> On 1/14/22 21:38, Matthew Rosato wrote:
>> Use the associated vfio feature ioctl to enable interpretation for 
>> devices
>> when requested.  As part of this process, we must use the host function
>> handle rather than a QEMU-generated one -- this is provided as part of 
>> the
>> ioctl payload.
> 
> I wonder if we should not explain here that having interpretation as a 
> default and silently fall back to interception allows backward 
> compatibility while allowing performence be chosing by default.
> (You can say it better as I do :) )

Good suggestion, I'll think of something to add to the commit message.


>> @@ -1022,12 +1068,33 @@ static void s390_pcihost_plug(HotplugHandler 
>> *hotplug_dev, DeviceState *dev,
>>           set_pbdev_info(pbdev);
>>           if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
>> -            pbdev->fh |= FH_SHM_VFIO;
>> +            /*
>> +             * By default, interpretation is always requested; if the 
>> available
>> +             * facilities indicate it is not available, fallback to the
>> +             * intercept model.
> 
> s/intercept/interception/ ?
> 

OK

>> +             */
>> +            if (pbdev->interp && 
>> !s390_has_feat(S390_FEAT_ZPCI_INTERP)) {
>> +                    DPRINTF("zPCI interpretation facilities 
>> missing.\n");
>> +                    pbdev->interp = false;
>> +                }
>> +            if (pbdev->interp) {
>> +                rc = s390_pci_interp_plug(s, pbdev);
>> +                if (rc) {
>> +                    error_setg(errp, "zpci interp plug failed: %d", rc);
>> +                    return;
>> +                }
>> +            }
> 
> 
> Can't we rearrange that as
> if (pbdev->interp) {
>      if (s390_has_feat) {
>      } else {
>      }
> }

Yep, sure

...

> 
> LGTM
> With the corrections proposed by Thomas.
> Mine... you see what you prefer.
> 
> Reviewed-by: Pierre Morel <pmorel@linux.ibm.com>

Thanks!
diff mbox series

Patch

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 01b58ebc70..a39ccfee05 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -971,12 +971,58 @@  static void s390_pci_update_subordinate(PCIDevice *dev, uint32_t nr)
     }
 }
 
+static int s390_pci_interp_plug(S390pciState *s, S390PCIBusDevice *pbdev)
+{
+    uint32_t idx;
+    int rc;
+
+    rc = s390_pci_probe_interp(pbdev);
+    if (rc) {
+        return rc;
+    }
+
+    rc = s390_pci_update_passthrough_fh(pbdev);
+    if (rc) {
+        return rc;
+    }
+
+    /*
+     * The host device is already in an enabled state, but we always present
+     * the initial device state to the guest as disabled (ZPCI_FS_DISABLED).
+     * Therefore, mask off the enable bit from the passthrough handle until
+     * the guest issues a CLP SET PCI FN later to enable the device.
+     */
+    pbdev->fh &= ~FH_MASK_ENABLE;
+
+    /* Next, see if the idx is already in-use */
+    idx = pbdev->fh & FH_MASK_INDEX;
+    if (pbdev->idx != idx) {
+        if (s390_pci_find_dev_by_idx(s, idx)) {
+            return -EINVAL;
+        }
+        /*
+         * Update the idx entry with the passed through idx
+         * If the relinquished idx is lower than next_idx, use it
+         * to replace next_idx
+         */
+        g_hash_table_remove(s->zpci_table, &pbdev->idx);
+        if (idx < s->next_idx) {
+            s->next_idx = idx;
+        }
+        pbdev->idx = idx;
+        g_hash_table_insert(s->zpci_table, &pbdev->idx, pbdev);
+    }
+
+    return 0;
+}
+
 static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
                               Error **errp)
 {
     S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev);
     PCIDevice *pdev = NULL;
     S390PCIBusDevice *pbdev = NULL;
+    int rc;
 
     if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) {
         PCIBridge *pb = PCI_BRIDGE(dev);
@@ -1022,12 +1068,33 @@  static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
         set_pbdev_info(pbdev);
 
         if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
-            pbdev->fh |= FH_SHM_VFIO;
+            /*
+             * By default, interpretation is always requested; if the available
+             * facilities indicate it is not available, fallback to the
+             * intercept model.
+             */
+            if (pbdev->interp && !s390_has_feat(S390_FEAT_ZPCI_INTERP)) {
+                    DPRINTF("zPCI interpretation facilities missing.\n");
+                    pbdev->interp = false;
+                }
+            if (pbdev->interp) {
+                rc = s390_pci_interp_plug(s, pbdev);
+                if (rc) {
+                    error_setg(errp, "zpci interp plug failed: %d", rc);
+                    return;
+                }
+            }
             pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev);
             /* Fill in CLP information passed via the vfio region */
             s390_pci_get_clp_info(pbdev);
+            if (!pbdev->interp) {
+                /* Do vfio passthrough but intercept for I/O */
+                pbdev->fh |= FH_SHM_VFIO;
+            }
         } else {
             pbdev->fh |= FH_SHM_EMUL;
+            /* Always intercept emulated devices */
+            pbdev->interp = false;
         }
 
         if (s390_pci_msix_init(pbdev)) {
@@ -1360,6 +1427,7 @@  static Property s390_pci_device_properties[] = {
     DEFINE_PROP_UINT16("uid", S390PCIBusDevice, uid, UID_UNDEFINED),
     DEFINE_PROP_S390_PCI_FID("fid", S390PCIBusDevice, fid),
     DEFINE_PROP_STRING("target", S390PCIBusDevice, target),
+    DEFINE_PROP_BOOL("interp", S390PCIBusDevice, interp, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index 6d400d4147..e9a0dc12e4 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -18,6 +18,7 @@ 
 #include "sysemu/hw_accel.h"
 #include "hw/s390x/s390-pci-inst.h"
 #include "hw/s390x/s390-pci-bus.h"
+#include "hw/s390x/s390-pci-vfio.h"
 #include "hw/s390x/tod.h"
 
 #ifndef DEBUG_S390PCI_INST
@@ -156,6 +157,47 @@  out:
     return rc;
 }
 
+static int clp_enable_interp(S390PCIBusDevice *pbdev)
+{
+    int rc;
+
+    rc = s390_pci_set_interp(pbdev, true);
+    if (rc) {
+        DPRINTF("Failed to enable interpretation\n");
+        return rc;
+    }
+    rc = s390_pci_update_passthrough_fh(pbdev);
+    if (rc) {
+        DPRINTF("Failed to update passthrough fh\n");
+        return rc;
+    }
+    if (!(pbdev->fh & FH_MASK_ENABLE)) {
+        DPRINTF("Passthrough handle is not enabled\n");
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+static int clp_disable_interp(S390PCIBusDevice *pbdev)
+{
+    int rc;
+
+    rc = s390_pci_set_interp(pbdev, false);
+    if (rc) {
+        DPRINTF("Failed to disable interpretation\n");
+        return rc;
+    }
+
+    rc = s390_pci_update_passthrough_fh(pbdev);
+    if (rc) {
+        DPRINTF("Failed to update passthrough fh\n");
+        return rc;
+    }
+
+    return 0;
+}
+
 int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
 {
     ClpReqHdr *reqh;
@@ -246,7 +288,19 @@  int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
                 goto out;
             }
 
-            pbdev->fh |= FH_MASK_ENABLE;
+            /*
+             * If interpretation is specified, attempt to enable this now and
+             * update with the host fh
+             */
+            if (pbdev->interp) {
+                if (clp_enable_interp(pbdev)) {
+                    stw_p(&ressetpci->hdr.rsp, CLP_RC_SETPCIFN_ERR);
+                    goto out;
+                }
+            } else {
+                pbdev->fh |= FH_MASK_ENABLE;
+            }
+
             pbdev->state = ZPCI_FS_ENABLED;
             stl_p(&ressetpci->fh, pbdev->fh);
             stw_p(&ressetpci->hdr.rsp, CLP_RC_OK);
@@ -257,6 +311,13 @@  int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra)
                 goto out;
             }
             device_legacy_reset(DEVICE(pbdev));
+            if (pbdev->interp) {
+                if (clp_disable_interp(pbdev)) {
+                    stw_p(&ressetpci->hdr.rsp, CLP_RC_SETPCIFN_ERR);
+                    goto out;
+                }
+            }
+            /* Mask off the enabled bit for interpreted devices too */
             pbdev->fh &= ~FH_MASK_ENABLE;
             pbdev->state = ZPCI_FS_DISABLED;
             stl_p(&ressetpci->fh, pbdev->fh);
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
index 6f80a47e29..2cab3a9e89 100644
--- a/hw/s390x/s390-pci-vfio.c
+++ b/hw/s390x/s390-pci-vfio.c
@@ -97,6 +97,58 @@  void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt)
     }
 }
 
+int s390_pci_probe_interp(S390PCIBusDevice *pbdev)
+{
+    VFIOPCIDevice *vdev = VFIO_PCI(pbdev->pdev);
+    struct vfio_device_feature feat = {
+        .argsz = sizeof(struct vfio_device_feature),
+        .flags = VFIO_DEVICE_FEATURE_PROBE | VFIO_DEVICE_FEATURE_ZPCI_INTERP
+    };
+
+    return ioctl(vdev->vbasedev.fd, VFIO_DEVICE_FEATURE, &feat);
+}
+
+int s390_pci_set_interp(S390PCIBusDevice *pbdev, bool enable)
+{
+    VFIOPCIDevice *vdev = VFIO_PCI(pbdev->pdev);
+    struct vfio_device_zpci_interp *data;
+    int size = sizeof(struct vfio_device_feature) + sizeof(*data);
+    g_autofree struct vfio_device_feature *feat = g_malloc0(size);
+
+    feat->argsz = size;
+    feat->flags = VFIO_DEVICE_FEATURE_SET + VFIO_DEVICE_FEATURE_ZPCI_INTERP;
+
+    data = (struct vfio_device_zpci_interp *)&feat->data;
+    if (enable) {
+        data->flags = VFIO_DEVICE_ZPCI_FLAG_INTERP;
+    } else {
+        data->flags = 0;
+    }
+
+    return ioctl(vdev->vbasedev.fd, VFIO_DEVICE_FEATURE, feat);
+}
+
+int s390_pci_update_passthrough_fh(S390PCIBusDevice *pbdev)
+{
+    VFIOPCIDevice *vdev = VFIO_PCI(pbdev->pdev);
+    struct vfio_device_zpci_interp *data;
+    int size = sizeof(struct vfio_device_feature) + sizeof(*data);
+    g_autofree struct vfio_device_feature *feat = g_malloc0(size);
+    int rc;
+
+    feat->argsz = size;
+    feat->flags = VFIO_DEVICE_FEATURE_GET + VFIO_DEVICE_FEATURE_ZPCI_INTERP;
+
+    rc = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_FEATURE, feat);
+    if (rc) {
+        return rc;
+    }
+
+    data = (struct vfio_device_zpci_interp *)&feat->data;
+    pbdev->fh = data->fh;
+    return 0;
+}
+
 static void s390_pci_read_base(S390PCIBusDevice *pbdev,
                                struct vfio_device_info *info)
 {
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
index da3cde2bb4..a9843dfe97 100644
--- a/include/hw/s390x/s390-pci-bus.h
+++ b/include/hw/s390x/s390-pci-bus.h
@@ -350,6 +350,7 @@  struct S390PCIBusDevice {
     IndAddr *indicator;
     bool pci_unplug_request_processed;
     bool unplug_requested;
+    bool interp;
     QTAILQ_ENTRY(S390PCIBusDevice) link;
 };
 
diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h
index ff708aef50..42533e38f7 100644
--- a/include/hw/s390x/s390-pci-vfio.h
+++ b/include/hw/s390x/s390-pci-vfio.h
@@ -20,6 +20,9 @@  bool s390_pci_update_dma_avail(int fd, unsigned int *avail);
 S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
                                           S390PCIBusDevice *pbdev);
 void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt);
+int s390_pci_probe_interp(S390PCIBusDevice *pbdev);
+int s390_pci_set_interp(S390PCIBusDevice *pbdev, bool enable);
+int s390_pci_update_passthrough_fh(S390PCIBusDevice *pbdev);
 void s390_pci_get_clp_info(S390PCIBusDevice *pbdev);
 #else
 static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail)
@@ -33,6 +36,18 @@  static inline S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s,
 }
 static inline void s390_pci_end_dma_count(S390pciState *s,
                                           S390PCIDMACount *cnt) { }
+int s390_pci_probe_interp(S390PCIBusDevice *pbdev)
+{
+    return -EINVAL;
+}
+static inline int s390_pci_set_interp(S390PCIBusDevice *pbdev, bool enable)
+{
+    return -EINVAL;
+}
+static inline int s390_pci_update_passthrough_fh(S390PCIBusDevice *pbdev)
+{
+    return -EINVAL;
+}
 static inline void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) { }
 #endif