diff mbox

[qemu,v13,07/16] vfio, memory: Notify IOMMU about starting/stopping being used by VFIO

Message ID 1456823441-46757-8-git-send-email-aik@ozlabs.ru (mailing list archive)
State New, archived
Headers show

Commit Message

Alexey Kardashevskiy March 1, 2016, 9:10 a.m. UTC
This adds a vfio_votify() callback to inform an IOMMU (and then its owner)
that VFIO started using the IOMMU. This is used by the pseries machine to
enable/disable in-kernel acceleration of TCE hypercalls.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
---
 hw/ppc/spapr_iommu.c   |  9 +++++++++
 hw/ppc/spapr_pci.c     | 14 ++++++++------
 hw/vfio/common.c       |  7 +++++++
 include/exec/memory.h  |  2 ++
 include/hw/ppc/spapr.h |  4 ++++
 5 files changed, 30 insertions(+), 6 deletions(-)

Comments

David Gibson March 3, 2016, 5:28 a.m. UTC | #1
On Tue, Mar 01, 2016 at 08:10:32PM +1100, Alexey Kardashevskiy wrote:
> This adds a vfio_votify() callback to inform an IOMMU (and then its owner)
> that VFIO started using the IOMMU. This is used by the pseries machine to
> enable/disable in-kernel acceleration of TCE hypercalls.
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>

Hmm.. the current approach of having a hook when vfio-pci devices are
attached is pretty ugly, but what exactly the case that it doesn't
handle and this approach does?

This two tiered notify system for a single bit is also kinda ugly.

> ---
>  hw/ppc/spapr_iommu.c   |  9 +++++++++
>  hw/ppc/spapr_pci.c     | 14 ++++++++------
>  hw/vfio/common.c       |  7 +++++++
>  include/exec/memory.h  |  2 ++
>  include/hw/ppc/spapr.h |  4 ++++
>  5 files changed, 30 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> index 8a88a74..67a8356 100644
> --- a/hw/ppc/spapr_iommu.c
> +++ b/hw/ppc/spapr_iommu.c
> @@ -136,6 +136,13 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
>      return ret;
>  }
>  
> +static int spapr_tce_vfio_notify(MemoryRegion *iommu, bool attached)
> +{
> +    sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
> +
> +    return spapr_tce_vfio_notify_owner(tcet->owner, tcet, attached);

I'm guessing the "owner" is the PHB, but I'm not entirely clear.

Could you use the QOM parent to get the the PHB instead of storing it
explicitly?

> +}
> +
>  static int spapr_tce_table_post_load(void *opaque, int version_id)
>  {
>      sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
> @@ -167,6 +174,7 @@ static const VMStateDescription vmstate_spapr_tce_table = {
>  
>  static MemoryRegionIOMMUOps spapr_iommu_ops = {
>      .translate = spapr_tce_translate_iommu,
> +    .vfio_notify = spapr_tce_vfio_notify,
>  };
>  
>  static int spapr_tce_table_realize(DeviceState *dev)
> @@ -235,6 +243,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn)
>  
>      tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
>      tcet->liobn = liobn;
> +    tcet->owner = owner;
>  
>      snprintf(tmp, sizeof(tmp), "tce-table-%x", liobn);
>      object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet), NULL);
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index ee0fecf..b0cd148 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -1084,6 +1084,14 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
>      return 0;
>  }
>  
> +int spapr_tce_vfio_notify_owner(DeviceState *dev, sPAPRTCETable *tcet,
> +                                bool attached)
> +{
> +    spapr_tce_set_need_vfio(tcet, attached);

Hmm.. you go to the trouble of storing owner in dev, then don't
actually use it.

> +    return 0;
> +}
> +
>  /* create OF node for pci device and required OF DT properties */
>  static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
>                                       void *fdt, int node_offset)
> @@ -1118,12 +1126,6 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
>      void *fdt = NULL;
>      int fdt_start_offset = 0, fdt_size;
>  
> -    if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
> -        sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn);
> -
> -        spapr_tce_set_need_vfio(tcet, true);
> -    }
> -
>      if (dev->hotplugged) {
>          fdt = create_device_tree(&fdt_size);
>          fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 9bf4c3b..ca3fd47 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -384,6 +384,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
>          QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
>  
>          memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
> +        giommu->iommu->iommu_ops->vfio_notify(section->mr, true);
>          memory_region_iommu_replay(giommu->iommu, &giommu->n,
>                                     vfio_container_granularity(container),
>                                     false);
> @@ -430,6 +431,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
>      VFIOContainer *container = container_of(listener, VFIOContainer, listener);
>      hwaddr iova, end;
>      int ret;
> +    MemoryRegion *iommu = NULL;
>  
>      if (vfio_listener_skipped_section(section)) {
>          trace_vfio_listener_region_del_skip(
> @@ -451,6 +453,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
>          QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
>              if (giommu->iommu == section->mr) {
>                  memory_region_unregister_iommu_notifier(&giommu->n);
> +                iommu = giommu->iommu;
>                  QLIST_REMOVE(giommu, giommu_next);
>                  g_free(giommu);
>                  break;
> @@ -483,6 +486,10 @@ static void vfio_listener_region_del(MemoryListener *listener,
>                       "0x%"HWADDR_PRIx") = %d (%m)",
>                       container, iova, end - iova, ret);
>      }
> +
> +    if (iommu && iommu->iommu_ops && iommu->iommu_ops->vfio_notify) {
> +        iommu->iommu_ops->vfio_notify(section->mr, false);
> +    }

So, if an IOMMU is removed from the guest, this will turn off VFIO
enablement.  However, IIUC this won't get caled in the more likely
case that the address space stays the same, but the VFIO device is
removed.

>  }
>  
>  static const MemoryListener vfio_memory_listener = {
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index d5284c2..9f82629 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -150,6 +150,8 @@ typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps;
>  struct MemoryRegionIOMMUOps {
>      /* Return a TLB entry that contains a given address. */
>      IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is_write);
> +    /* Called when VFIO starts/stops using this */
> +    int (*vfio_notify)(MemoryRegion *iommu, bool attached);
>  };
>  
>  typedef struct CoalescedMemoryRange CoalescedMemoryRange;
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 8aa0c45..5d2f8f4 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -550,6 +550,7 @@ struct sPAPRTCETable {
>      int fd;
>      MemoryRegion root, iommu;
>      struct VIOsPAPRDevice *vdev; /* for @bypass migration compatibility only */
> +    DeviceState *owner;
>      QLIST_ENTRY(sPAPRTCETable) list;
>  };
>  
> @@ -629,4 +630,7 @@ int spapr_rng_populate_dt(void *fdt);
>   */
>  #define SPAPR_LMB_FLAGS_ASSIGNED 0x00000008
>  
> +int spapr_tce_vfio_notify_owner(DeviceState *dev, sPAPRTCETable *tcet,
> +                                bool attached);
> +
>  #endif /* !defined (__HW_SPAPR_H__) */
Alexey Kardashevskiy March 3, 2016, 6:01 a.m. UTC | #2
On 03/03/2016 04:28 PM, David Gibson wrote:
> On Tue, Mar 01, 2016 at 08:10:32PM +1100, Alexey Kardashevskiy wrote:
>> This adds a vfio_votify() callback to inform an IOMMU (and then its owner)
>> that VFIO started using the IOMMU. This is used by the pseries machine to
>> enable/disable in-kernel acceleration of TCE hypercalls.
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>
> Hmm.. the current approach of having a hook when vfio-pci devices are
> attached is pretty ugly, but what exactly the case that it doesn't
> handle and this approach does?

Sorry, I am not following you here. What hook do you mean here?

My hook fixes the case when I want to enable/disable KVM acceleration, 
without these patches, I need to re-count how many vfio-pci devices are 
there and it is more ugly with PCI hotplug/unplug...


> This two tiered notify system for a single bit is also kinda ugly.
>
>> ---
>>   hw/ppc/spapr_iommu.c   |  9 +++++++++
>>   hw/ppc/spapr_pci.c     | 14 ++++++++------
>>   hw/vfio/common.c       |  7 +++++++
>>   include/exec/memory.h  |  2 ++
>>   include/hw/ppc/spapr.h |  4 ++++
>>   5 files changed, 30 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
>> index 8a88a74..67a8356 100644
>> --- a/hw/ppc/spapr_iommu.c
>> +++ b/hw/ppc/spapr_iommu.c
>> @@ -136,6 +136,13 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
>>       return ret;
>>   }
>>
>> +static int spapr_tce_vfio_notify(MemoryRegion *iommu, bool attached)
>> +{
>> +    sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
>> +
>> +    return spapr_tce_vfio_notify_owner(tcet->owner, tcet, attached);
>
> I'm guessing the "owner" is the PHB, but I'm not entirely clear.
>
> Could you use the QOM parent to get the the PHB instead of storing it
> explicitly?


I am pretty sure I am not allowed to use the QOM parent, this is why there 
is no object_get_parent() helper.


>
>> +}
>> +
>>   static int spapr_tce_table_post_load(void *opaque, int version_id)
>>   {
>>       sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
>> @@ -167,6 +174,7 @@ static const VMStateDescription vmstate_spapr_tce_table = {
>>
>>   static MemoryRegionIOMMUOps spapr_iommu_ops = {
>>       .translate = spapr_tce_translate_iommu,
>> +    .vfio_notify = spapr_tce_vfio_notify,
>>   };
>>
>>   static int spapr_tce_table_realize(DeviceState *dev)
>> @@ -235,6 +243,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn)
>>
>>       tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
>>       tcet->liobn = liobn;
>> +    tcet->owner = owner;
>>
>>       snprintf(tmp, sizeof(tmp), "tce-table-%x", liobn);
>>       object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet), NULL);
>> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
>> index ee0fecf..b0cd148 100644
>> --- a/hw/ppc/spapr_pci.c
>> +++ b/hw/ppc/spapr_pci.c
>> @@ -1084,6 +1084,14 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
>>       return 0;
>>   }
>>
>> +int spapr_tce_vfio_notify_owner(DeviceState *dev, sPAPRTCETable *tcet,
>> +                                bool attached)
>> +{
>> +    spapr_tce_set_need_vfio(tcet, attached);
>
> Hmm.. you go to the trouble of storing owner in dev, then don't
> actually use it.


Yeah, I need to clean this, I removed spapr_tce_vfio_notify_owner() from my 
working branch already and call spapr_tce_set_need_vfio() directly from 
spapr_tce_vfio_notify().


>
>> +    return 0;
>> +}
>> +
>>   /* create OF node for pci device and required OF DT properties */
>>   static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
>>                                        void *fdt, int node_offset)
>> @@ -1118,12 +1126,6 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
>>       void *fdt = NULL;
>>       int fdt_start_offset = 0, fdt_size;
>>
>> -    if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
>> -        sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn);
>> -
>> -        spapr_tce_set_need_vfio(tcet, true);
>> -    }
>> -
>>       if (dev->hotplugged) {
>>           fdt = create_device_tree(&fdt_size);
>>           fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 9bf4c3b..ca3fd47 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -384,6 +384,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
>>           QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
>>
>>           memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
>> +        giommu->iommu->iommu_ops->vfio_notify(section->mr, true);
>>           memory_region_iommu_replay(giommu->iommu, &giommu->n,
>>                                      vfio_container_granularity(container),
>>                                      false);
>> @@ -430,6 +431,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
>>       VFIOContainer *container = container_of(listener, VFIOContainer, listener);
>>       hwaddr iova, end;
>>       int ret;
>> +    MemoryRegion *iommu = NULL;
>>
>>       if (vfio_listener_skipped_section(section)) {
>>           trace_vfio_listener_region_del_skip(
>> @@ -451,6 +453,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
>>           QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
>>               if (giommu->iommu == section->mr) {
>>                   memory_region_unregister_iommu_notifier(&giommu->n);
>> +                iommu = giommu->iommu;
>>                   QLIST_REMOVE(giommu, giommu_next);
>>                   g_free(giommu);
>>                   break;
>> @@ -483,6 +486,10 @@ static void vfio_listener_region_del(MemoryListener *listener,
>>                        "0x%"HWADDR_PRIx") = %d (%m)",
>>                        container, iova, end - iova, ret);
>>       }
>> +
>> +    if (iommu && iommu->iommu_ops && iommu->iommu_ops->vfio_notify) {
>> +        iommu->iommu_ops->vfio_notify(section->mr, false);
>> +    }
>
> So, if an IOMMU is removed from the guest, this will turn off VFIO
> enablement.  However, IIUC this won't get caled in the more likely
> case that the address space stays the same, but the VFIO device is
> removed.


When VFIO device is removed, its listener gets removed and this is supposed 
to end up calling vfio_listener_region_del().


>
>>   }
>>
>>   static const MemoryListener vfio_memory_listener = {
>> diff --git a/include/exec/memory.h b/include/exec/memory.h
>> index d5284c2..9f82629 100644
>> --- a/include/exec/memory.h
>> +++ b/include/exec/memory.h
>> @@ -150,6 +150,8 @@ typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps;
>>   struct MemoryRegionIOMMUOps {
>>       /* Return a TLB entry that contains a given address. */
>>       IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is_write);
>> +    /* Called when VFIO starts/stops using this */
>> +    int (*vfio_notify)(MemoryRegion *iommu, bool attached);
>>   };
>>
>>   typedef struct CoalescedMemoryRange CoalescedMemoryRange;
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 8aa0c45..5d2f8f4 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -550,6 +550,7 @@ struct sPAPRTCETable {
>>       int fd;
>>       MemoryRegion root, iommu;
>>       struct VIOsPAPRDevice *vdev; /* for @bypass migration compatibility only */
>> +    DeviceState *owner;
>>       QLIST_ENTRY(sPAPRTCETable) list;
>>   };
>>
>> @@ -629,4 +630,7 @@ int spapr_rng_populate_dt(void *fdt);
>>    */
>>   #define SPAPR_LMB_FLAGS_ASSIGNED 0x00000008
>>
>> +int spapr_tce_vfio_notify_owner(DeviceState *dev, sPAPRTCETable *tcet,
>> +                                bool attached);
>> +
>>   #endif /* !defined (__HW_SPAPR_H__) */
>
David Gibson March 4, 2016, 4:01 a.m. UTC | #3
On Thu, Mar 03, 2016 at 05:01:31PM +1100, Alexey Kardashevskiy wrote:
> On 03/03/2016 04:28 PM, David Gibson wrote:
> >On Tue, Mar 01, 2016 at 08:10:32PM +1100, Alexey Kardashevskiy wrote:
> >>This adds a vfio_votify() callback to inform an IOMMU (and then its owner)
> >>that VFIO started using the IOMMU. This is used by the pseries machine to
> >>enable/disable in-kernel acceleration of TCE hypercalls.
> >>
> >>Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >
> >Hmm.. the current approach of having a hook when vfio-pci devices are
> >attached is pretty ugly, but what exactly the case that it doesn't
> >handle and this approach does?
> 
> Sorry, I am not following you here. What hook do you mean here?
> 
> My hook fixes the case when I want to enable/disable KVM acceleration,
> without these patches, I need to re-count how many vfio-pci devices are
> there and it is more ugly with PCI hotplug/unplug...
> 
> 
> >This two tiered notify system for a single bit is also kinda ugly.
> >
> >>---
> >>  hw/ppc/spapr_iommu.c   |  9 +++++++++
> >>  hw/ppc/spapr_pci.c     | 14 ++++++++------
> >>  hw/vfio/common.c       |  7 +++++++
> >>  include/exec/memory.h  |  2 ++
> >>  include/hw/ppc/spapr.h |  4 ++++
> >>  5 files changed, 30 insertions(+), 6 deletions(-)
> >>
> >>diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> >>index 8a88a74..67a8356 100644
> >>--- a/hw/ppc/spapr_iommu.c
> >>+++ b/hw/ppc/spapr_iommu.c
> >>@@ -136,6 +136,13 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
> >>      return ret;
> >>  }
> >>
> >>+static int spapr_tce_vfio_notify(MemoryRegion *iommu, bool attached)
> >>+{
> >>+    sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
> >>+
> >>+    return spapr_tce_vfio_notify_owner(tcet->owner, tcet, attached);
> >
> >I'm guessing the "owner" is the PHB, but I'm not entirely clear.
> >
> >Could you use the QOM parent to get the the PHB instead of storing it
> >explicitly?
> 
> 
> I am pretty sure I am not allowed to use the QOM parent, this is why there
> is no object_get_parent() helper.

Hmm.. I thought I had this discussion before and accessing the qom
parent from qmp was bad, but it was ok for internal code use.  But I
may be getting muddled with older qdev stuff.

> 
> >
> >>+}
> >>+
> >>  static int spapr_tce_table_post_load(void *opaque, int version_id)
> >>  {
> >>      sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
> >>@@ -167,6 +174,7 @@ static const VMStateDescription vmstate_spapr_tce_table = {
> >>
> >>  static MemoryRegionIOMMUOps spapr_iommu_ops = {
> >>      .translate = spapr_tce_translate_iommu,
> >>+    .vfio_notify = spapr_tce_vfio_notify,
> >>  };
> >>
> >>  static int spapr_tce_table_realize(DeviceState *dev)
> >>@@ -235,6 +243,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn)
> >>
> >>      tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
> >>      tcet->liobn = liobn;
> >>+    tcet->owner = owner;
> >>
> >>      snprintf(tmp, sizeof(tmp), "tce-table-%x", liobn);
> >>      object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet), NULL);
> >>diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> >>index ee0fecf..b0cd148 100644
> >>--- a/hw/ppc/spapr_pci.c
> >>+++ b/hw/ppc/spapr_pci.c
> >>@@ -1084,6 +1084,14 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
> >>      return 0;
> >>  }
> >>
> >>+int spapr_tce_vfio_notify_owner(DeviceState *dev, sPAPRTCETable *tcet,
> >>+                                bool attached)
> >>+{
> >>+    spapr_tce_set_need_vfio(tcet, attached);
> >
> >Hmm.. you go to the trouble of storing owner in dev, then don't
> >actually use it.
> 
> 
> Yeah, I need to clean this, I removed spapr_tce_vfio_notify_owner() from my
> working branch already and call spapr_tce_set_need_vfio() directly from
> spapr_tce_vfio_notify().

Ok.

> 
> 
> >
> >>+    return 0;
> >>+}
> >>+
> >>  /* create OF node for pci device and required OF DT properties */
> >>  static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
> >>                                       void *fdt, int node_offset)
> >>@@ -1118,12 +1126,6 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
> >>      void *fdt = NULL;
> >>      int fdt_start_offset = 0, fdt_size;
> >>
> >>-    if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
> >>-        sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn);
> >>-
> >>-        spapr_tce_set_need_vfio(tcet, true);
> >>-    }
> >>-
> >>      if (dev->hotplugged) {
> >>          fdt = create_device_tree(&fdt_size);
> >>          fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
> >>diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> >>index 9bf4c3b..ca3fd47 100644
> >>--- a/hw/vfio/common.c
> >>+++ b/hw/vfio/common.c
> >>@@ -384,6 +384,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
> >>          QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
> >>
> >>          memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
> >>+        giommu->iommu->iommu_ops->vfio_notify(section->mr, true);
> >>          memory_region_iommu_replay(giommu->iommu, &giommu->n,
> >>                                     vfio_container_granularity(container),
> >>                                     false);
> >>@@ -430,6 +431,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
> >>      VFIOContainer *container = container_of(listener, VFIOContainer, listener);
> >>      hwaddr iova, end;
> >>      int ret;
> >>+    MemoryRegion *iommu = NULL;
> >>
> >>      if (vfio_listener_skipped_section(section)) {
> >>          trace_vfio_listener_region_del_skip(
> >>@@ -451,6 +453,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
> >>          QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
> >>              if (giommu->iommu == section->mr) {
> >>                  memory_region_unregister_iommu_notifier(&giommu->n);
> >>+                iommu = giommu->iommu;
> >>                  QLIST_REMOVE(giommu, giommu_next);
> >>                  g_free(giommu);
> >>                  break;
> >>@@ -483,6 +486,10 @@ static void vfio_listener_region_del(MemoryListener *listener,
> >>                       "0x%"HWADDR_PRIx") = %d (%m)",
> >>                       container, iova, end - iova, ret);
> >>      }
> >>+
> >>+    if (iommu && iommu->iommu_ops && iommu->iommu_ops->vfio_notify) {
> >>+        iommu->iommu_ops->vfio_notify(section->mr, false);
> >>+    }
> >
> >So, if an IOMMU is removed from the guest, this will turn off VFIO
> >enablement.  However, IIUC this won't get caled in the more likely
> >case that the address space stays the same, but the VFIO device is
> >removed.
> 
> 
> When VFIO device is removed, its listener gets removed and this is supposed
> to end up calling vfio_listener_region_del().

Ah, ok region_del() gets called on all existing regions when the
listener is removed.  I guess that makes sense since region_add() is
called on the existing regions when the listener is registered.



> 
> 
> >
> >>  }
> >>
> >>  static const MemoryListener vfio_memory_listener = {
> >>diff --git a/include/exec/memory.h b/include/exec/memory.h
> >>index d5284c2..9f82629 100644
> >>--- a/include/exec/memory.h
> >>+++ b/include/exec/memory.h
> >>@@ -150,6 +150,8 @@ typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps;
> >>  struct MemoryRegionIOMMUOps {
> >>      /* Return a TLB entry that contains a given address. */
> >>      IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is_write);
> >>+    /* Called when VFIO starts/stops using this */
> >>+    int (*vfio_notify)(MemoryRegion *iommu, bool attached);
> >>  };
> >>
> >>  typedef struct CoalescedMemoryRange CoalescedMemoryRange;
> >>diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> >>index 8aa0c45..5d2f8f4 100644
> >>--- a/include/hw/ppc/spapr.h
> >>+++ b/include/hw/ppc/spapr.h
> >>@@ -550,6 +550,7 @@ struct sPAPRTCETable {
> >>      int fd;
> >>      MemoryRegion root, iommu;
> >>      struct VIOsPAPRDevice *vdev; /* for @bypass migration compatibility only */
> >>+    DeviceState *owner;
> >>      QLIST_ENTRY(sPAPRTCETable) list;
> >>  };
> >>
> >>@@ -629,4 +630,7 @@ int spapr_rng_populate_dt(void *fdt);
> >>   */
> >>  #define SPAPR_LMB_FLAGS_ASSIGNED 0x00000008
> >>
> >>+int spapr_tce_vfio_notify_owner(DeviceState *dev, sPAPRTCETable *tcet,
> >>+                                bool attached);
> >>+
> >>  #endif /* !defined (__HW_SPAPR_H__) */
> >
> 
>
diff mbox

Patch

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 8a88a74..67a8356 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -136,6 +136,13 @@  static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
     return ret;
 }
 
+static int spapr_tce_vfio_notify(MemoryRegion *iommu, bool attached)
+{
+    sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
+
+    return spapr_tce_vfio_notify_owner(tcet->owner, tcet, attached);
+}
+
 static int spapr_tce_table_post_load(void *opaque, int version_id)
 {
     sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
@@ -167,6 +174,7 @@  static const VMStateDescription vmstate_spapr_tce_table = {
 
 static MemoryRegionIOMMUOps spapr_iommu_ops = {
     .translate = spapr_tce_translate_iommu,
+    .vfio_notify = spapr_tce_vfio_notify,
 };
 
 static int spapr_tce_table_realize(DeviceState *dev)
@@ -235,6 +243,7 @@  sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn)
 
     tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
     tcet->liobn = liobn;
+    tcet->owner = owner;
 
     snprintf(tmp, sizeof(tmp), "tce-table-%x", liobn);
     object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet), NULL);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index ee0fecf..b0cd148 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1084,6 +1084,14 @@  static int spapr_populate_pci_child_dt(PCIDevice *dev, void *fdt, int offset,
     return 0;
 }
 
+int spapr_tce_vfio_notify_owner(DeviceState *dev, sPAPRTCETable *tcet,
+                                bool attached)
+{
+    spapr_tce_set_need_vfio(tcet, attached);
+
+    return 0;
+}
+
 /* create OF node for pci device and required OF DT properties */
 static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
                                      void *fdt, int node_offset)
@@ -1118,12 +1126,6 @@  static void spapr_phb_add_pci_device(sPAPRDRConnector *drc,
     void *fdt = NULL;
     int fdt_start_offset = 0, fdt_size;
 
-    if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
-        sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn);
-
-        spapr_tce_set_need_vfio(tcet, true);
-    }
-
     if (dev->hotplugged) {
         fdt = create_device_tree(&fdt_size);
         fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 9bf4c3b..ca3fd47 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -384,6 +384,7 @@  static void vfio_listener_region_add(MemoryListener *listener,
         QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
 
         memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
+        giommu->iommu->iommu_ops->vfio_notify(section->mr, true);
         memory_region_iommu_replay(giommu->iommu, &giommu->n,
                                    vfio_container_granularity(container),
                                    false);
@@ -430,6 +431,7 @@  static void vfio_listener_region_del(MemoryListener *listener,
     VFIOContainer *container = container_of(listener, VFIOContainer, listener);
     hwaddr iova, end;
     int ret;
+    MemoryRegion *iommu = NULL;
 
     if (vfio_listener_skipped_section(section)) {
         trace_vfio_listener_region_del_skip(
@@ -451,6 +453,7 @@  static void vfio_listener_region_del(MemoryListener *listener,
         QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
             if (giommu->iommu == section->mr) {
                 memory_region_unregister_iommu_notifier(&giommu->n);
+                iommu = giommu->iommu;
                 QLIST_REMOVE(giommu, giommu_next);
                 g_free(giommu);
                 break;
@@ -483,6 +486,10 @@  static void vfio_listener_region_del(MemoryListener *listener,
                      "0x%"HWADDR_PRIx") = %d (%m)",
                      container, iova, end - iova, ret);
     }
+
+    if (iommu && iommu->iommu_ops && iommu->iommu_ops->vfio_notify) {
+        iommu->iommu_ops->vfio_notify(section->mr, false);
+    }
 }
 
 static const MemoryListener vfio_memory_listener = {
diff --git a/include/exec/memory.h b/include/exec/memory.h
index d5284c2..9f82629 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -150,6 +150,8 @@  typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps;
 struct MemoryRegionIOMMUOps {
     /* Return a TLB entry that contains a given address. */
     IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool is_write);
+    /* Called when VFIO starts/stops using this */
+    int (*vfio_notify)(MemoryRegion *iommu, bool attached);
 };
 
 typedef struct CoalescedMemoryRange CoalescedMemoryRange;
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 8aa0c45..5d2f8f4 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -550,6 +550,7 @@  struct sPAPRTCETable {
     int fd;
     MemoryRegion root, iommu;
     struct VIOsPAPRDevice *vdev; /* for @bypass migration compatibility only */
+    DeviceState *owner;
     QLIST_ENTRY(sPAPRTCETable) list;
 };
 
@@ -629,4 +630,7 @@  int spapr_rng_populate_dt(void *fdt);
  */
 #define SPAPR_LMB_FLAGS_ASSIGNED 0x00000008
 
+int spapr_tce_vfio_notify_owner(DeviceState *dev, sPAPRTCETable *tcet,
+                                bool attached);
+
 #endif /* !defined (__HW_SPAPR_H__) */