diff mbox

[v2,09/11] vfio: vote a device to do host bus reset

Message ID 1457320984-6540-10-git-send-email-caoj.fnst@cn.fujitsu.com (mailing list archive)
State New, archived
Headers show

Commit Message

Cao jin March 7, 2016, 3:23 a.m. UTC
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
---
 hw/vfio/pci.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/vfio/pci.h |  1 +
 2 files changed, 58 insertions(+)

Comments

Michael S. Tsirkin March 9, 2016, 10:07 a.m. UTC | #1
On Mon, Mar 07, 2016 at 11:23:02AM +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/vfio/pci.h |  1 +
>  2 files changed, 58 insertions(+)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 24848c9..8e902d2 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1937,6 +1937,8 @@ static void vfio_check_host_bus_reset(VFIOPCIDevice *vdev, Error **errp)
>      /* List all affected devices by bus reset */
>      devices = &info->devices[0];
>  
> +    vdev->single_depend_dev = (info->count == 1);
> +
>      /* Verify that we have all the groups required */
>      for (i = 0; i < info->count; i++) {
>          PCIHostDeviceAddress host;
> @@ -2998,6 +3000,49 @@ static void vfio_exitfn(PCIDevice *pdev)
>      vfio_unregister_bars(vdev);
>  }
>  
> +static VFIOPCIDevice *vfio_pci_get_do_reset_device(VFIOPCIDevice *vdev)
> +{
> +    struct vfio_pci_hot_reset_info *info = NULL;
> +    struct vfio_pci_dependent_device *devices;
> +    VFIOGroup *group;
> +    VFIODevice *vbasedev_iter;
> +    int ret;
> +
> +    ret = vfio_get_hot_reset_info(vdev, &info);
> +    if (ret) {
> +        error_report("vfio: Cannot enable AER for device %s,"
> +                     " device does not support hot reset.",
> +                     vdev->vbasedev.name);
> +        return NULL;
> +    }
> +
> +    devices = &info->devices[0];
> +
> +    QLIST_FOREACH(group, &vfio_group_list, next) {
> +        if (group->groupid == devices[0].group_id) {
> +            break;
> +        }
> +    }
> +
> +    if (!group) {
> +        error_report("vfio: Cannot enable AER for device %s, "
> +                    "depends on group %d which is not owned.",
> +                    vdev->vbasedev.name, devices[0].group_id);
> +        return NULL;
> +    }
> +
> +    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> +        if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI ||
> +            !(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> +            continue;
> +        }
> +
> +        return container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> +    }
> +
> +    return NULL;
> +}
> +
>  static void vfio_pci_reset(DeviceState *dev)
>  {
>      PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
> @@ -3005,6 +3050,18 @@ static void vfio_pci_reset(DeviceState *dev)
>  
>      trace_vfio_pci_reset(vdev->vbasedev.name);
>  
> +    if (pdev->bus->in_hot_reset) {

So the full system (fundamental) reset is a subset of the hot reset?
This seems to make no sense. I expect that the fundamental reset
is stronger. What did I miss?


> +        VFIOPCIDevice *tmp;
> +
> +        tmp = vfio_pci_get_do_reset_device(vdev);
> +        if (tmp) {
> +            if (tmp == vdev) {
> +                vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> +            }
> +            return;
> +        }
> +    }
> +
>      vfio_pci_pre_reset(vdev);
>  
>      if (vdev->resetfn && !vdev->resetfn(vdev)) {
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index aff46c2..32bd31f 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -143,6 +143,7 @@ typedef struct VFIOPCIDevice {
>      bool no_kvm_intx;
>      bool no_kvm_msi;
>      bool no_kvm_msix;
> +    bool single_depend_dev;
>  } VFIOPCIDevice;
>  
>  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
> -- 
> 1.9.3
> 
>
Alex Williamson March 9, 2016, 4:37 p.m. UTC | #2
On Mon, 7 Mar 2016 11:23:02 +0800
Cao jin <caoj.fnst@cn.fujitsu.com> wrote:

> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/vfio/pci.h |  1 +
>  2 files changed, 58 insertions(+)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 24848c9..8e902d2 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1937,6 +1937,8 @@ static void vfio_check_host_bus_reset(VFIOPCIDevice *vdev, Error **errp)
>      /* List all affected devices by bus reset */
>      devices = &info->devices[0];
>  
> +    vdev->single_depend_dev = (info->count == 1);
> +
>      /* Verify that we have all the groups required */
>      for (i = 0; i < info->count; i++) {
>          PCIHostDeviceAddress host;
> @@ -2998,6 +3000,49 @@ static void vfio_exitfn(PCIDevice *pdev)
>      vfio_unregister_bars(vdev);
>  }
>  
> +static VFIOPCIDevice *vfio_pci_get_do_reset_device(VFIOPCIDevice *vdev)
> +{
> +    struct vfio_pci_hot_reset_info *info = NULL;
> +    struct vfio_pci_dependent_device *devices;
> +    VFIOGroup *group;
> +    VFIODevice *vbasedev_iter;
> +    int ret;
> +
> +    ret = vfio_get_hot_reset_info(vdev, &info);
> +    if (ret) {
> +        error_report("vfio: Cannot enable AER for device %s,"
> +                     " device does not support hot reset.",
> +                     vdev->vbasedev.name);
> +        return NULL;

Isn't this path called for all devices, not just those trying to
support AER?

> +    }
> +
> +    devices = &info->devices[0];
> +
> +    QLIST_FOREACH(group, &vfio_group_list, next) {
> +        if (group->groupid == devices[0].group_id) {
> +            break;
> +        }
> +    }
> +
> +    if (!group) {
> +        error_report("vfio: Cannot enable AER for device %s, "
> +                    "depends on group %d which is not owned.",
> +                    vdev->vbasedev.name, devices[0].group_id);
> +        return NULL;

Same here, we're not in an AER specific code path and we haven't even
tested if the device is trying to support AER.

> +    }
> +
> +    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> +        if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI ||
> +            !(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> +            continue;
> +        }
> +
> +        return container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);

So the reset device is simply the first device in the group list, this
needs to be documented, but DMA isolation (grouping) and bus isolation
are separate things and we can have more than one group per bus...
often an IOMMU group is a single device, so have we really solved
anything here?  If the next device is on the same bus but in a separate
group, we're going to hot reset again, right?  Thanks,

Alex

> +    }
> +
> +    return NULL;
> +}
> +
>  static void vfio_pci_reset(DeviceState *dev)
>  {
>      PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
> @@ -3005,6 +3050,18 @@ static void vfio_pci_reset(DeviceState *dev)
>  
>      trace_vfio_pci_reset(vdev->vbasedev.name);
>  
> +    if (pdev->bus->in_hot_reset) {
> +        VFIOPCIDevice *tmp;
> +
> +        tmp = vfio_pci_get_do_reset_device(vdev);
> +        if (tmp) {
> +            if (tmp == vdev) {
> +                vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> +            }
> +            return;
> +        }
> +    }
> +
>      vfio_pci_pre_reset(vdev);
>  
>      if (vdev->resetfn && !vdev->resetfn(vdev)) {
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index aff46c2..32bd31f 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -143,6 +143,7 @@ typedef struct VFIOPCIDevice {
>      bool no_kvm_intx;
>      bool no_kvm_msi;
>      bool no_kvm_msix;
> +    bool single_depend_dev;
>  } VFIOPCIDevice;
>  
>  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
Michael S. Tsirkin March 9, 2016, 4:39 p.m. UTC | #3
On Mon, Mar 07, 2016 at 11:23:02AM +0800, Cao jin wrote:
> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> 
> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> ---
>  hw/vfio/pci.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  hw/vfio/pci.h |  1 +
>  2 files changed, 58 insertions(+)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 24848c9..8e902d2 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1937,6 +1937,8 @@ static void vfio_check_host_bus_reset(VFIOPCIDevice *vdev, Error **errp)
>      /* List all affected devices by bus reset */
>      devices = &info->devices[0];
>  
> +    vdev->single_depend_dev = (info->count == 1);
> +
>      /* Verify that we have all the groups required */
>      for (i = 0; i < info->count; i++) {
>          PCIHostDeviceAddress host;
> @@ -2998,6 +3000,49 @@ static void vfio_exitfn(PCIDevice *pdev)
>      vfio_unregister_bars(vdev);
>  }
>  
> +static VFIOPCIDevice *vfio_pci_get_do_reset_device(VFIOPCIDevice *vdev)
> +{
> +    struct vfio_pci_hot_reset_info *info = NULL;
> +    struct vfio_pci_dependent_device *devices;
> +    VFIOGroup *group;
> +    VFIODevice *vbasedev_iter;
> +    int ret;
> +
> +    ret = vfio_get_hot_reset_info(vdev, &info);
> +    if (ret) {
> +        error_report("vfio: Cannot enable AER for device %s,"
> +                     " device does not support hot reset.",
> +                     vdev->vbasedev.name);
> +        return NULL;
> +    }
> +
> +    devices = &info->devices[0];
> +
> +    QLIST_FOREACH(group, &vfio_group_list, next) {
> +        if (group->groupid == devices[0].group_id) {
> +            break;
> +        }
> +    }
> +
> +    if (!group) {
> +        error_report("vfio: Cannot enable AER for device %s, "
> +                    "depends on group %d which is not owned.",
> +                    vdev->vbasedev.name, devices[0].group_id);
> +        return NULL;
> +    }
> +
> +    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> +        if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI ||
> +            !(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> +            continue;
> +        }
> +
> +        return container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> +    }
> +
> +    return NULL;
> +}
> +
>  static void vfio_pci_reset(DeviceState *dev)
>  {
>      PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
> @@ -3005,6 +3050,18 @@ static void vfio_pci_reset(DeviceState *dev)
>  
>      trace_vfio_pci_reset(vdev->vbasedev.name);
>  
> +    if (pdev->bus->in_hot_reset) {

After discussing this with Alex, I think what you
really are looking for is
     if (dev->realized)
in order to skip hot reset when device is plugged-in
initially.


> +        VFIOPCIDevice *tmp;
> +
> +        tmp = vfio_pci_get_do_reset_device(vdev);
> +        if (tmp) {
> +            if (tmp == vdev) {
> +                vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> +            }
> +            return;
> +        }
> +    }
> +
>      vfio_pci_pre_reset(vdev);
>  
>      if (vdev->resetfn && !vdev->resetfn(vdev)) {
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index aff46c2..32bd31f 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -143,6 +143,7 @@ typedef struct VFIOPCIDevice {
>      bool no_kvm_intx;
>      bool no_kvm_msi;
>      bool no_kvm_msix;
> +    bool single_depend_dev;
>  } VFIOPCIDevice;
>  
>  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
> -- 
> 1.9.3
> 
>
Alex Williamson March 9, 2016, 5:09 p.m. UTC | #4
On Wed, 9 Mar 2016 18:39:51 +0200
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> On Mon, Mar 07, 2016 at 11:23:02AM +0800, Cao jin wrote:
> > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > 
> > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > ---
> >  hw/vfio/pci.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  hw/vfio/pci.h |  1 +
> >  2 files changed, 58 insertions(+)
> > 
> > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > index 24848c9..8e902d2 100644
> > --- a/hw/vfio/pci.c
> > +++ b/hw/vfio/pci.c
> > @@ -1937,6 +1937,8 @@ static void vfio_check_host_bus_reset(VFIOPCIDevice *vdev, Error **errp)
> >      /* List all affected devices by bus reset */
> >      devices = &info->devices[0];
> >  
> > +    vdev->single_depend_dev = (info->count == 1);
> > +
> >      /* Verify that we have all the groups required */
> >      for (i = 0; i < info->count; i++) {
> >          PCIHostDeviceAddress host;
> > @@ -2998,6 +3000,49 @@ static void vfio_exitfn(PCIDevice *pdev)
> >      vfio_unregister_bars(vdev);
> >  }
> >  
> > +static VFIOPCIDevice *vfio_pci_get_do_reset_device(VFIOPCIDevice *vdev)
> > +{
> > +    struct vfio_pci_hot_reset_info *info = NULL;
> > +    struct vfio_pci_dependent_device *devices;
> > +    VFIOGroup *group;
> > +    VFIODevice *vbasedev_iter;
> > +    int ret;
> > +
> > +    ret = vfio_get_hot_reset_info(vdev, &info);
> > +    if (ret) {
> > +        error_report("vfio: Cannot enable AER for device %s,"
> > +                     " device does not support hot reset.",
> > +                     vdev->vbasedev.name);
> > +        return NULL;
> > +    }
> > +
> > +    devices = &info->devices[0];
> > +
> > +    QLIST_FOREACH(group, &vfio_group_list, next) {
> > +        if (group->groupid == devices[0].group_id) {
> > +            break;
> > +        }
> > +    }
> > +
> > +    if (!group) {
> > +        error_report("vfio: Cannot enable AER for device %s, "
> > +                    "depends on group %d which is not owned.",
> > +                    vdev->vbasedev.name, devices[0].group_id);
> > +        return NULL;
> > +    }
> > +
> > +    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> > +        if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI ||
> > +            !(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> > +            continue;
> > +        }
> > +
> > +        return container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> > +    }
> > +
> > +    return NULL;
> > +}
> > +
> >  static void vfio_pci_reset(DeviceState *dev)
> >  {
> >      PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
> > @@ -3005,6 +3050,18 @@ static void vfio_pci_reset(DeviceState *dev)
> >  
> >      trace_vfio_pci_reset(vdev->vbasedev.name);
> >  
> > +    if (pdev->bus->in_hot_reset) {  
> 
> After discussing this with Alex, I think what you
> really are looking for is
>      if (dev->realized)
> in order to skip hot reset when device is plugged-in
> initially.

Seems fragile, device_reset() also gets called via pci_device_reset()
which would happen if we hooked up FLR to the device.  That's clearly
intended to be a function local reset, as is I believe the dc->reset
itself, so assuming behavior based on states like dev->realized is
likely to break.  Either we need some external hints, like what's added
here, or we need a separate callback for a bus reset.  Thanks,

Alex

> > +        VFIOPCIDevice *tmp;
> > +
> > +        tmp = vfio_pci_get_do_reset_device(vdev);
> > +        if (tmp) {
> > +            if (tmp == vdev) {
> > +                vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> > +            }
> > +            return;
> > +        }
> > +    }
> > +
> >      vfio_pci_pre_reset(vdev);
> >  
> >      if (vdev->resetfn && !vdev->resetfn(vdev)) {
> > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> > index aff46c2..32bd31f 100644
> > --- a/hw/vfio/pci.h
> > +++ b/hw/vfio/pci.h
> > @@ -143,6 +143,7 @@ typedef struct VFIOPCIDevice {
> >      bool no_kvm_intx;
> >      bool no_kvm_msi;
> >      bool no_kvm_msix;
> > +    bool single_depend_dev;
> >  } VFIOPCIDevice;
> >  
> >  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
> > -- 
> > 1.9.3
> > 
> >
Michael S. Tsirkin March 9, 2016, 5:31 p.m. UTC | #5
On Wed, Mar 09, 2016 at 10:09:11AM -0700, Alex Williamson wrote:
> On Wed, 9 Mar 2016 18:39:51 +0200
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > On Mon, Mar 07, 2016 at 11:23:02AM +0800, Cao jin wrote:
> > > From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > 
> > > Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> > > ---
> > >  hw/vfio/pci.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  hw/vfio/pci.h |  1 +
> > >  2 files changed, 58 insertions(+)
> > > 
> > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> > > index 24848c9..8e902d2 100644
> > > --- a/hw/vfio/pci.c
> > > +++ b/hw/vfio/pci.c
> > > @@ -1937,6 +1937,8 @@ static void vfio_check_host_bus_reset(VFIOPCIDevice *vdev, Error **errp)
> > >      /* List all affected devices by bus reset */
> > >      devices = &info->devices[0];
> > >  
> > > +    vdev->single_depend_dev = (info->count == 1);
> > > +
> > >      /* Verify that we have all the groups required */
> > >      for (i = 0; i < info->count; i++) {
> > >          PCIHostDeviceAddress host;
> > > @@ -2998,6 +3000,49 @@ static void vfio_exitfn(PCIDevice *pdev)
> > >      vfio_unregister_bars(vdev);
> > >  }
> > >  
> > > +static VFIOPCIDevice *vfio_pci_get_do_reset_device(VFIOPCIDevice *vdev)
> > > +{
> > > +    struct vfio_pci_hot_reset_info *info = NULL;
> > > +    struct vfio_pci_dependent_device *devices;
> > > +    VFIOGroup *group;
> > > +    VFIODevice *vbasedev_iter;
> > > +    int ret;
> > > +
> > > +    ret = vfio_get_hot_reset_info(vdev, &info);
> > > +    if (ret) {
> > > +        error_report("vfio: Cannot enable AER for device %s,"
> > > +                     " device does not support hot reset.",
> > > +                     vdev->vbasedev.name);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    devices = &info->devices[0];
> > > +
> > > +    QLIST_FOREACH(group, &vfio_group_list, next) {
> > > +        if (group->groupid == devices[0].group_id) {
> > > +            break;
> > > +        }
> > > +    }
> > > +
> > > +    if (!group) {
> > > +        error_report("vfio: Cannot enable AER for device %s, "
> > > +                    "depends on group %d which is not owned.",
> > > +                    vdev->vbasedev.name, devices[0].group_id);
> > > +        return NULL;
> > > +    }
> > > +
> > > +    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> > > +        if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI ||
> > > +            !(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> > > +            continue;
> > > +        }
> > > +
> > > +        return container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> > > +    }
> > > +
> > > +    return NULL;
> > > +}
> > > +
> > >  static void vfio_pci_reset(DeviceState *dev)
> > >  {
> > >      PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
> > > @@ -3005,6 +3050,18 @@ static void vfio_pci_reset(DeviceState *dev)
> > >  
> > >      trace_vfio_pci_reset(vdev->vbasedev.name);
> > >  
> > > +    if (pdev->bus->in_hot_reset) {  
> > 
> > After discussing this with Alex, I think what you
> > really are looking for is
> >      if (dev->realized)
> > in order to skip hot reset when device is plugged-in
> > initially.
> 
> Seems fragile, device_reset() also gets called via pci_device_reset()
> which would happen if we hooked up FLR to the device.

You can check that as well (through config space, no need
for new flags).

There's also PM reset.

>  That's clearly
> intended to be a function local reset, as is I believe the dc->reset
> itself, so assuming behavior based on states like dev->realized is
> likely to break.  Either we need some external hints, like what's added
> here, or we need a separate callback for a bus reset.  Thanks,
> 
> Alex


So I'm fine with a new flag is_bus_rst, or an API pci_is_bus_rst.

But it has to be set whenever bus is reset, *not* just on secondary bus reset.


> > > +        VFIOPCIDevice *tmp;
> > > +
> > > +        tmp = vfio_pci_get_do_reset_device(vdev);
> > > +        if (tmp) {
> > > +            if (tmp == vdev) {
> > > +                vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> > > +            }
> > > +            return;
> > > +        }
> > > +    }
> > > +
> > >      vfio_pci_pre_reset(vdev);
> > >  
> > >      if (vdev->resetfn && !vdev->resetfn(vdev)) {
> > > diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> > > index aff46c2..32bd31f 100644
> > > --- a/hw/vfio/pci.h
> > > +++ b/hw/vfio/pci.h
> > > @@ -143,6 +143,7 @@ typedef struct VFIOPCIDevice {
> > >      bool no_kvm_intx;
> > >      bool no_kvm_msi;
> > >      bool no_kvm_msix;
> > > +    bool single_depend_dev;
> > >  } VFIOPCIDevice;
> > >  
> > >  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
> > > -- 
> > > 1.9.3
> > > 
> > >
Chen Fan March 10, 2016, 6:15 a.m. UTC | #6
On 03/10/2016 12:37 AM, Alex Williamson wrote:
> On Mon, 7 Mar 2016 11:23:02 +0800
> Cao jin <caoj.fnst@cn.fujitsu.com> wrote:
>
>> From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>>
>> Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
>> ---
>>   hw/vfio/pci.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   hw/vfio/pci.h |  1 +
>>   2 files changed, 58 insertions(+)
>>
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 24848c9..8e902d2 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -1937,6 +1937,8 @@ static void vfio_check_host_bus_reset(VFIOPCIDevice *vdev, Error **errp)
>>       /* List all affected devices by bus reset */
>>       devices = &info->devices[0];
>>   
>> +    vdev->single_depend_dev = (info->count == 1);
>> +
>>       /* Verify that we have all the groups required */
>>       for (i = 0; i < info->count; i++) {
>>           PCIHostDeviceAddress host;
>> @@ -2998,6 +3000,49 @@ static void vfio_exitfn(PCIDevice *pdev)
>>       vfio_unregister_bars(vdev);
>>   }
>>   
>> +static VFIOPCIDevice *vfio_pci_get_do_reset_device(VFIOPCIDevice *vdev)
>> +{
>> +    struct vfio_pci_hot_reset_info *info = NULL;
>> +    struct vfio_pci_dependent_device *devices;
>> +    VFIOGroup *group;
>> +    VFIODevice *vbasedev_iter;
>> +    int ret;
>> +
>> +    ret = vfio_get_hot_reset_info(vdev, &info);
>> +    if (ret) {
>> +        error_report("vfio: Cannot enable AER for device %s,"
>> +                     " device does not support hot reset.",
>> +                     vdev->vbasedev.name);
>> +        return NULL;
> Isn't this path called for all devices, not just those trying to
> support AER?
>
>> +    }
>> +
>> +    devices = &info->devices[0];
>> +
>> +    QLIST_FOREACH(group, &vfio_group_list, next) {
>> +        if (group->groupid == devices[0].group_id) {
>> +            break;
>> +        }
>> +    }
>> +
>> +    if (!group) {
>> +        error_report("vfio: Cannot enable AER for device %s, "
>> +                    "depends on group %d which is not owned.",
>> +                    vdev->vbasedev.name, devices[0].group_id);
>> +        return NULL;
> Same here, we're not in an AER specific code path and we haven't even
> tested if the device is trying to support AER.
>
>> +    }
>> +
>> +    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
>> +        if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI ||
>> +            !(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
>> +            continue;
>> +        }
>> +
>> +        return container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> So the reset device is simply the first device in the group list, this
> needs to be documented, but DMA isolation (grouping) and bus isolation
> are separate things and we can have more than one group per bus...
> often an IOMMU group is a single device, so have we really solved
> anything here?  If the next device is on the same bus but in a separate
> group, we're going to hot reset again, right?  Thanks,
You are right, I recall my original idea was not something what like 
this. :)
I tend to favor MST's ideas which said in V1 series patchset:
"If the assumption that vfio functions are combined
in the same way as on the host, then this is not needed:
just reset when function 0 is reset."

so, seems we could implement this in next version.
1. requiring functions on same host bus must were assigned on the same 
virtual
     bus. here we should not have a subordinate bus. because nowadays bridge
     do not support pass through. so we can simply reset the vfio device 
with the
     smallest devfn in a bus reset as long as one device has aer on the bus.

     with this, we don't need to care the groups isolation, because we 
only need the devices
     by a bus hot reset, if the devices all on one virtual bus. reset 
anyone is sufficient.

2. in order to identify the reset is from a normal reset or aer recovery 
reset, we could
     add a flags in vfio device to mark the aer have occurred in 
err_notifier_handle, if
     subsequent reset is coming with the is_bus_rst in parent bus is 
true. we can
     directly do the hot reset.

Thanks,
Chen

>
> Alex
>
>> +    }
>> +
>> +    return NULL;
>> +}
>> +
>>   static void vfio_pci_reset(DeviceState *dev)
>>   {
>>       PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
>> @@ -3005,6 +3050,18 @@ static void vfio_pci_reset(DeviceState *dev)
>>   
>>       trace_vfio_pci_reset(vdev->vbasedev.name);
>>   
>> +    if (pdev->bus->in_hot_reset) {
>> +        VFIOPCIDevice *tmp;
>> +
>> +        tmp = vfio_pci_get_do_reset_device(vdev);
>> +        if (tmp) {
>> +            if (tmp == vdev) {
>> +                vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
>> +            }
>> +            return;
>> +        }
>> +    }
>> +
>>       vfio_pci_pre_reset(vdev);
>>   
>>       if (vdev->resetfn && !vdev->resetfn(vdev)) {
>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>> index aff46c2..32bd31f 100644
>> --- a/hw/vfio/pci.h
>> +++ b/hw/vfio/pci.h
>> @@ -143,6 +143,7 @@ typedef struct VFIOPCIDevice {
>>       bool no_kvm_intx;
>>       bool no_kvm_msi;
>>       bool no_kvm_msix;
>> +    bool single_depend_dev;
>>   } VFIOPCIDevice;
>>   
>>   uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
>
>
> .
>
Michael S. Tsirkin March 10, 2016, 2:16 p.m. UTC | #7
On Thu, Mar 10, 2016 at 02:15:53PM +0800, Chen Fan wrote:
> 
> On 03/10/2016 12:37 AM, Alex Williamson wrote:
> >On Mon, 7 Mar 2016 11:23:02 +0800
> >Cao jin <caoj.fnst@cn.fujitsu.com> wrote:
> >
> >>From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >>
> >>Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
> >>---
> >>  hw/vfio/pci.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> >>  hw/vfio/pci.h |  1 +
> >>  2 files changed, 58 insertions(+)
> >>
> >>diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> >>index 24848c9..8e902d2 100644
> >>--- a/hw/vfio/pci.c
> >>+++ b/hw/vfio/pci.c
> >>@@ -1937,6 +1937,8 @@ static void vfio_check_host_bus_reset(VFIOPCIDevice *vdev, Error **errp)
> >>      /* List all affected devices by bus reset */
> >>      devices = &info->devices[0];
> >>+    vdev->single_depend_dev = (info->count == 1);
> >>+
> >>      /* Verify that we have all the groups required */
> >>      for (i = 0; i < info->count; i++) {
> >>          PCIHostDeviceAddress host;
> >>@@ -2998,6 +3000,49 @@ static void vfio_exitfn(PCIDevice *pdev)
> >>      vfio_unregister_bars(vdev);
> >>  }
> >>+static VFIOPCIDevice *vfio_pci_get_do_reset_device(VFIOPCIDevice *vdev)
> >>+{
> >>+    struct vfio_pci_hot_reset_info *info = NULL;
> >>+    struct vfio_pci_dependent_device *devices;
> >>+    VFIOGroup *group;
> >>+    VFIODevice *vbasedev_iter;
> >>+    int ret;
> >>+
> >>+    ret = vfio_get_hot_reset_info(vdev, &info);
> >>+    if (ret) {
> >>+        error_report("vfio: Cannot enable AER for device %s,"
> >>+                     " device does not support hot reset.",
> >>+                     vdev->vbasedev.name);
> >>+        return NULL;
> >Isn't this path called for all devices, not just those trying to
> >support AER?
> >
> >>+    }
> >>+
> >>+    devices = &info->devices[0];
> >>+
> >>+    QLIST_FOREACH(group, &vfio_group_list, next) {
> >>+        if (group->groupid == devices[0].group_id) {
> >>+            break;
> >>+        }
> >>+    }
> >>+
> >>+    if (!group) {
> >>+        error_report("vfio: Cannot enable AER for device %s, "
> >>+                    "depends on group %d which is not owned.",
> >>+                    vdev->vbasedev.name, devices[0].group_id);
> >>+        return NULL;
> >Same here, we're not in an AER specific code path and we haven't even
> >tested if the device is trying to support AER.
> >
> >>+    }
> >>+
> >>+    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
> >>+        if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI ||
> >>+            !(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> >>+            continue;
> >>+        }
> >>+
> >>+        return container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
> >So the reset device is simply the first device in the group list, this
> >needs to be documented, but DMA isolation (grouping) and bus isolation
> >are separate things and we can have more than one group per bus...
> >often an IOMMU group is a single device, so have we really solved
> >anything here?  If the next device is on the same bus but in a separate
> >group, we're going to hot reset again, right?  Thanks,
> You are right, I recall my original idea was not something what like this.
> :)
> I tend to favor MST's ideas which said in V1 series patchset:
> "If the assumption that vfio functions are combined
> in the same way as on the host, then this is not needed:
> just reset when function 0 is reset."

If you do this, you can do your checks on realize, without
need for a separate validate callback.


> 
> so, seems we could implement this in next version.
> 1. requiring functions on same host bus must were assigned on the same
> virtual
>     bus. here we should not have a subordinate bus. because nowadays bridge
>     do not support pass through. so we can simply reset the vfio device with
> the
>     smallest devfn in a bus reset as long as one device has aer on the bus.
> 
>     with this, we don't need to care the groups isolation, because we only
> need the devices
>     by a bus hot reset, if the devices all on one virtual bus. reset anyone
> is sufficient.
> 
> 2. in order to identify the reset is from a normal reset or aer recovery
> reset, we could
>     add a flags in vfio device to mark the aer have occurred in
> err_notifier_handle, if
>     subsequent reset is coming with the is_bus_rst in parent bus is true. we
> can
>     directly do the hot reset.
> 
> Thanks,
> Chen
> 
> >
> >Alex
> >
> >>+    }
> >>+
> >>+    return NULL;
> >>+}
> >>+
> >>  static void vfio_pci_reset(DeviceState *dev)
> >>  {
> >>      PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
> >>@@ -3005,6 +3050,18 @@ static void vfio_pci_reset(DeviceState *dev)
> >>      trace_vfio_pci_reset(vdev->vbasedev.name);
> >>+    if (pdev->bus->in_hot_reset) {
> >>+        VFIOPCIDevice *tmp;
> >>+
> >>+        tmp = vfio_pci_get_do_reset_device(vdev);
> >>+        if (tmp) {
> >>+            if (tmp == vdev) {
> >>+                vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> >>+            }
> >>+            return;
> >>+        }
> >>+    }
> >>+
> >>      vfio_pci_pre_reset(vdev);
> >>      if (vdev->resetfn && !vdev->resetfn(vdev)) {
> >>diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> >>index aff46c2..32bd31f 100644
> >>--- a/hw/vfio/pci.h
> >>+++ b/hw/vfio/pci.h
> >>@@ -143,6 +143,7 @@ typedef struct VFIOPCIDevice {
> >>      bool no_kvm_intx;
> >>      bool no_kvm_msi;
> >>      bool no_kvm_msix;
> >>+    bool single_depend_dev;
> >>  } VFIOPCIDevice;
> >>  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
> >
> >
> >.
> >
> 
>
diff mbox

Patch

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 24848c9..8e902d2 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1937,6 +1937,8 @@  static void vfio_check_host_bus_reset(VFIOPCIDevice *vdev, Error **errp)
     /* List all affected devices by bus reset */
     devices = &info->devices[0];
 
+    vdev->single_depend_dev = (info->count == 1);
+
     /* Verify that we have all the groups required */
     for (i = 0; i < info->count; i++) {
         PCIHostDeviceAddress host;
@@ -2998,6 +3000,49 @@  static void vfio_exitfn(PCIDevice *pdev)
     vfio_unregister_bars(vdev);
 }
 
+static VFIOPCIDevice *vfio_pci_get_do_reset_device(VFIOPCIDevice *vdev)
+{
+    struct vfio_pci_hot_reset_info *info = NULL;
+    struct vfio_pci_dependent_device *devices;
+    VFIOGroup *group;
+    VFIODevice *vbasedev_iter;
+    int ret;
+
+    ret = vfio_get_hot_reset_info(vdev, &info);
+    if (ret) {
+        error_report("vfio: Cannot enable AER for device %s,"
+                     " device does not support hot reset.",
+                     vdev->vbasedev.name);
+        return NULL;
+    }
+
+    devices = &info->devices[0];
+
+    QLIST_FOREACH(group, &vfio_group_list, next) {
+        if (group->groupid == devices[0].group_id) {
+            break;
+        }
+    }
+
+    if (!group) {
+        error_report("vfio: Cannot enable AER for device %s, "
+                    "depends on group %d which is not owned.",
+                    vdev->vbasedev.name, devices[0].group_id);
+        return NULL;
+    }
+
+    QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
+        if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI ||
+            !(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+            continue;
+        }
+
+        return container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+    }
+
+    return NULL;
+}
+
 static void vfio_pci_reset(DeviceState *dev)
 {
     PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
@@ -3005,6 +3050,18 @@  static void vfio_pci_reset(DeviceState *dev)
 
     trace_vfio_pci_reset(vdev->vbasedev.name);
 
+    if (pdev->bus->in_hot_reset) {
+        VFIOPCIDevice *tmp;
+
+        tmp = vfio_pci_get_do_reset_device(vdev);
+        if (tmp) {
+            if (tmp == vdev) {
+                vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
+            }
+            return;
+        }
+    }
+
     vfio_pci_pre_reset(vdev);
 
     if (vdev->resetfn && !vdev->resetfn(vdev)) {
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index aff46c2..32bd31f 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -143,6 +143,7 @@  typedef struct VFIOPCIDevice {
     bool no_kvm_intx;
     bool no_kvm_msi;
     bool no_kvm_msix;
+    bool single_depend_dev;
 } VFIOPCIDevice;
 
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);