diff mbox

[qemu,v14,01/18] memory: Fix IOMMU replay base address

Message ID 1458546426-26222-2-git-send-email-aik@ozlabs.ru (mailing list archive)
State New, archived
Headers show

Commit Message

Alexey Kardashevskiy March 21, 2016, 7:46 a.m. UTC
Since a788f227 "memory: Allow replay of IOMMU mapping notifications"
when new VFIO listener is added, all existing IOMMU mappings are
replayed. However there is a problem that the base address of
an IOMMU memory region (IOMMU MR) is ignored which is not a problem
for the existing user (which is pseries) with its default 32bit DMA
window starting at 0 but it is if there is another DMA window.

This stores the IOMMU's offset_within_address_space and adjusts
the IOVA before calling vfio_dma_map/vfio_dma_unmap.

As the IOMMU notifier expects IOVA offset rather than the absolute
address, this also adjusts IOVA in sPAPR H_PUT_TCE handler before
calling notifier(s).

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
 hw/ppc/spapr_iommu.c          |  2 +-
 hw/vfio/common.c              | 14 ++++++++------
 include/hw/vfio/vfio-common.h |  1 +
 3 files changed, 10 insertions(+), 7 deletions(-)

Comments

David Gibson March 22, 2016, 12:49 a.m. UTC | #1
On Mon, Mar 21, 2016 at 06:46:49PM +1100, Alexey Kardashevskiy wrote:
> Since a788f227 "memory: Allow replay of IOMMU mapping notifications"
> when new VFIO listener is added, all existing IOMMU mappings are
> replayed. However there is a problem that the base address of
> an IOMMU memory region (IOMMU MR) is ignored which is not a problem
> for the existing user (which is pseries) with its default 32bit DMA
> window starting at 0 but it is if there is another DMA window.
> 
> This stores the IOMMU's offset_within_address_space and adjusts
> the IOVA before calling vfio_dma_map/vfio_dma_unmap.
> 
> As the IOMMU notifier expects IOVA offset rather than the absolute
> address, this also adjusts IOVA in sPAPR H_PUT_TCE handler before
> calling notifier(s).
> 
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>

On a closer look, I realised this still isn't quite correct, although
I don't think any cases which would break it exist or are planned.

> ---
>  hw/ppc/spapr_iommu.c          |  2 +-
>  hw/vfio/common.c              | 14 ++++++++------
>  include/hw/vfio/vfio-common.h |  1 +
>  3 files changed, 10 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> index 7dd4588..277f289 100644
> --- a/hw/ppc/spapr_iommu.c
> +++ b/hw/ppc/spapr_iommu.c
> @@ -277,7 +277,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
>      tcet->table[index] = tce;
>  
>      entry.target_as = &address_space_memory,
> -    entry.iova = ioba & page_mask;
> +    entry.iova = (ioba - tcet->bus_offset) & page_mask;
>      entry.translated_addr = tce & page_mask;
>      entry.addr_mask = ~page_mask;
>      entry.perm = spapr_tce_iommu_access_flags(tce);

This bit's right/

> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index fb588d8..d45e2db 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -257,14 +257,14 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
>      VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
>      VFIOContainer *container = giommu->container;
>      IOMMUTLBEntry *iotlb = data;
> +    hwaddr iova = iotlb->iova + giommu->offset_within_address_space;

This bit might be right, depending on how you define giommu->offset_within_address_space.

>      MemoryRegion *mr;
>      hwaddr xlat;
>      hwaddr len = iotlb->addr_mask + 1;
>      void *vaddr;
>      int ret;
>  
> -    trace_vfio_iommu_map_notify(iotlb->iova,
> -                                iotlb->iova + iotlb->addr_mask);
> +    trace_vfio_iommu_map_notify(iova, iova + iotlb->addr_mask);
>  
>      /*
>       * The IOMMU TLB entry we have just covers translation through
> @@ -291,21 +291,21 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
>  
>      if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
>          vaddr = memory_region_get_ram_ptr(mr) + xlat;
> -        ret = vfio_dma_map(container, iotlb->iova,
> +        ret = vfio_dma_map(container, iova,
>                             iotlb->addr_mask + 1, vaddr,
>                             !(iotlb->perm & IOMMU_WO) || mr->readonly);
>          if (ret) {
>              error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
>                           "0x%"HWADDR_PRIx", %p) = %d (%m)",
> -                         container, iotlb->iova,
> +                         container, iova,
>                           iotlb->addr_mask + 1, vaddr, ret);
>          }
>      } else {
> -        ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1);
> +        ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1);
>          if (ret) {
>              error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
>                           "0x%"HWADDR_PRIx") = %d (%m)",
> -                         container, iotlb->iova,
> +                         container, iova,
>                           iotlb->addr_mask + 1, ret);
>          }
>      }

This is fine.

> @@ -377,6 +377,8 @@ static void vfio_listener_region_add(MemoryListener *listener,
>           */
>          giommu = g_malloc0(sizeof(*giommu));
>          giommu->iommu = section->mr;
> +        giommu->offset_within_address_space =
> +            section->offset_within_address_space;

But here there's a problem.  The iova in IOMMUTLBEntry is relative to
the IOMMU MemoryRegion, but - at least in theory - only a subsection
of that MemoryRegion could be mapped into the AddressSpace.

So, to find the IOVA within the AddressSpace, from the IOVA within the
MemoryRegion, you need to first subtract the section's offset within
the MemoryRegion, then add the section's offset within the
AddressSpace.

You could precalculate the combined delta here, but...

>          giommu->container = container;
>          giommu->n.notify = vfio_iommu_map_notify;
>          QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index eb0e1b0..5341e05 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -90,6 +90,7 @@ typedef struct VFIOContainer {
>  typedef struct VFIOGuestIOMMU {
>      VFIOContainer *container;
>      MemoryRegion *iommu;
> +    hwaddr offset_within_address_space;

...it might be simpler to replace both the iommu and
offset_within_address_space fields here with a pointer to the
MemoryRegionSection instead, which should give you all the info you
need.

It might also be worth adding Paolo to the CC for this patch, since he
knows the MemoryRegion stuff better than anyone.

>      Notifier n;
>      QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
>  } VFIOGuestIOMMU;
Alexey Kardashevskiy March 22, 2016, 3:12 a.m. UTC | #2
On 03/22/2016 11:49 AM, David Gibson wrote:
> On Mon, Mar 21, 2016 at 06:46:49PM +1100, Alexey Kardashevskiy wrote:
>> Since a788f227 "memory: Allow replay of IOMMU mapping notifications"
>> when new VFIO listener is added, all existing IOMMU mappings are
>> replayed. However there is a problem that the base address of
>> an IOMMU memory region (IOMMU MR) is ignored which is not a problem
>> for the existing user (which is pseries) with its default 32bit DMA
>> window starting at 0 but it is if there is another DMA window.
>>
>> This stores the IOMMU's offset_within_address_space and adjusts
>> the IOVA before calling vfio_dma_map/vfio_dma_unmap.
>>
>> As the IOMMU notifier expects IOVA offset rather than the absolute
>> address, this also adjusts IOVA in sPAPR H_PUT_TCE handler before
>> calling notifier(s).
>>
>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
>
> On a closer look, I realised this still isn't quite correct, although
> I don't think any cases which would break it exist or are planned.
>
>> ---
>>   hw/ppc/spapr_iommu.c          |  2 +-
>>   hw/vfio/common.c              | 14 ++++++++------
>>   include/hw/vfio/vfio-common.h |  1 +
>>   3 files changed, 10 insertions(+), 7 deletions(-)
>>
>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
>> index 7dd4588..277f289 100644
>> --- a/hw/ppc/spapr_iommu.c
>> +++ b/hw/ppc/spapr_iommu.c
>> @@ -277,7 +277,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
>>       tcet->table[index] = tce;
>>
>>       entry.target_as = &address_space_memory,
>> -    entry.iova = ioba & page_mask;
>> +    entry.iova = (ioba - tcet->bus_offset) & page_mask;
>>       entry.translated_addr = tce & page_mask;
>>       entry.addr_mask = ~page_mask;
>>       entry.perm = spapr_tce_iommu_access_flags(tce);
>
> This bit's right/
>
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index fb588d8..d45e2db 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -257,14 +257,14 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
>>       VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
>>       VFIOContainer *container = giommu->container;
>>       IOMMUTLBEntry *iotlb = data;
>> +    hwaddr iova = iotlb->iova + giommu->offset_within_address_space;
>
> This bit might be right, depending on how you define giommu->offset_within_address_space.
>
>>       MemoryRegion *mr;
>>       hwaddr xlat;
>>       hwaddr len = iotlb->addr_mask + 1;
>>       void *vaddr;
>>       int ret;
>>
>> -    trace_vfio_iommu_map_notify(iotlb->iova,
>> -                                iotlb->iova + iotlb->addr_mask);
>> +    trace_vfio_iommu_map_notify(iova, iova + iotlb->addr_mask);
>>
>>       /*
>>        * The IOMMU TLB entry we have just covers translation through
>> @@ -291,21 +291,21 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
>>
>>       if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
>>           vaddr = memory_region_get_ram_ptr(mr) + xlat;
>> -        ret = vfio_dma_map(container, iotlb->iova,
>> +        ret = vfio_dma_map(container, iova,
>>                              iotlb->addr_mask + 1, vaddr,
>>                              !(iotlb->perm & IOMMU_WO) || mr->readonly);
>>           if (ret) {
>>               error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
>>                            "0x%"HWADDR_PRIx", %p) = %d (%m)",
>> -                         container, iotlb->iova,
>> +                         container, iova,
>>                            iotlb->addr_mask + 1, vaddr, ret);
>>           }
>>       } else {
>> -        ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1);
>> +        ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1);
>>           if (ret) {
>>               error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
>>                            "0x%"HWADDR_PRIx") = %d (%m)",
>> -                         container, iotlb->iova,
>> +                         container, iova,
>>                            iotlb->addr_mask + 1, ret);
>>           }
>>       }
>
> This is fine.
>
>> @@ -377,6 +377,8 @@ static void vfio_listener_region_add(MemoryListener *listener,
>>            */
>>           giommu = g_malloc0(sizeof(*giommu));
>>           giommu->iommu = section->mr;
>> +        giommu->offset_within_address_space =
>> +            section->offset_within_address_space;
>
> But here there's a problem.  The iova in IOMMUTLBEntry is relative to
> the IOMMU MemoryRegion, but - at least in theory - only a subsection
> of that MemoryRegion could be mapped into the AddressSpace.

But the IOMMU MR stays the same - size, offset, and iova will be relative 
to its start, why does it matter if only portion is mapped?


> So, to find the IOVA within the AddressSpace, from the IOVA within the
> MemoryRegion, you need to first subtract the section's offset within
> the MemoryRegion, then add the section's offset within the
> AddressSpace.
>
> You could precalculate the combined delta here, but...
 >
>
>>           giommu->container = container;
>>           giommu->n.notify = vfio_iommu_map_notify;
>>           QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index eb0e1b0..5341e05 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -90,6 +90,7 @@ typedef struct VFIOContainer {
>>   typedef struct VFIOGuestIOMMU {
>>       VFIOContainer *container;
>>       MemoryRegion *iommu;
>> +    hwaddr offset_within_address_space;
>
> ...it might be simpler to replace both the iommu and
> offset_within_address_space fields here with a pointer to the
> MemoryRegionSection instead, which should give you all the info you
> need.


MemoryRegionSection is allocated on stack in listener_add_address_space() 
and seems to be in general some sort of temporary object.


>
> It might also be worth adding Paolo to the CC for this patch, since he
> knows the MemoryRegion stuff better than anyone.


Right, I added him in cc: now.

>
>>       Notifier n;
>>       QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
>>   } VFIOGuestIOMMU;
>
David Gibson March 22, 2016, 3:26 a.m. UTC | #3
On Tue, Mar 22, 2016 at 02:12:30PM +1100, Alexey Kardashevskiy wrote:
> On 03/22/2016 11:49 AM, David Gibson wrote:
> >On Mon, Mar 21, 2016 at 06:46:49PM +1100, Alexey Kardashevskiy wrote:
> >>Since a788f227 "memory: Allow replay of IOMMU mapping notifications"
> >>when new VFIO listener is added, all existing IOMMU mappings are
> >>replayed. However there is a problem that the base address of
> >>an IOMMU memory region (IOMMU MR) is ignored which is not a problem
> >>for the existing user (which is pseries) with its default 32bit DMA
> >>window starting at 0 but it is if there is another DMA window.
> >>
> >>This stores the IOMMU's offset_within_address_space and adjusts
> >>the IOVA before calling vfio_dma_map/vfio_dma_unmap.
> >>
> >>As the IOMMU notifier expects IOVA offset rather than the absolute
> >>address, this also adjusts IOVA in sPAPR H_PUT_TCE handler before
> >>calling notifier(s).
> >>
> >>Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >>Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> >
> >On a closer look, I realised this still isn't quite correct, although
> >I don't think any cases which would break it exist or are planned.
> >
> >>---
> >>  hw/ppc/spapr_iommu.c          |  2 +-
> >>  hw/vfio/common.c              | 14 ++++++++------
> >>  include/hw/vfio/vfio-common.h |  1 +
> >>  3 files changed, 10 insertions(+), 7 deletions(-)
> >>
> >>diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> >>index 7dd4588..277f289 100644
> >>--- a/hw/ppc/spapr_iommu.c
> >>+++ b/hw/ppc/spapr_iommu.c
> >>@@ -277,7 +277,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
> >>      tcet->table[index] = tce;
> >>
> >>      entry.target_as = &address_space_memory,
> >>-    entry.iova = ioba & page_mask;
> >>+    entry.iova = (ioba - tcet->bus_offset) & page_mask;
> >>      entry.translated_addr = tce & page_mask;
> >>      entry.addr_mask = ~page_mask;
> >>      entry.perm = spapr_tce_iommu_access_flags(tce);
> >
> >This bit's right/
> >
> >>diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> >>index fb588d8..d45e2db 100644
> >>--- a/hw/vfio/common.c
> >>+++ b/hw/vfio/common.c
> >>@@ -257,14 +257,14 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
> >>      VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> >>      VFIOContainer *container = giommu->container;
> >>      IOMMUTLBEntry *iotlb = data;
> >>+    hwaddr iova = iotlb->iova + giommu->offset_within_address_space;
> >
> >This bit might be right, depending on how you define giommu->offset_within_address_space.
> >
> >>      MemoryRegion *mr;
> >>      hwaddr xlat;
> >>      hwaddr len = iotlb->addr_mask + 1;
> >>      void *vaddr;
> >>      int ret;
> >>
> >>-    trace_vfio_iommu_map_notify(iotlb->iova,
> >>-                                iotlb->iova + iotlb->addr_mask);
> >>+    trace_vfio_iommu_map_notify(iova, iova + iotlb->addr_mask);
> >>
> >>      /*
> >>       * The IOMMU TLB entry we have just covers translation through
> >>@@ -291,21 +291,21 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
> >>
> >>      if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
> >>          vaddr = memory_region_get_ram_ptr(mr) + xlat;
> >>-        ret = vfio_dma_map(container, iotlb->iova,
> >>+        ret = vfio_dma_map(container, iova,
> >>                             iotlb->addr_mask + 1, vaddr,
> >>                             !(iotlb->perm & IOMMU_WO) || mr->readonly);
> >>          if (ret) {
> >>              error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
> >>                           "0x%"HWADDR_PRIx", %p) = %d (%m)",
> >>-                         container, iotlb->iova,
> >>+                         container, iova,
> >>                           iotlb->addr_mask + 1, vaddr, ret);
> >>          }
> >>      } else {
> >>-        ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1);
> >>+        ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1);
> >>          if (ret) {
> >>              error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> >>                           "0x%"HWADDR_PRIx") = %d (%m)",
> >>-                         container, iotlb->iova,
> >>+                         container, iova,
> >>                           iotlb->addr_mask + 1, ret);
> >>          }
> >>      }
> >
> >This is fine.
> >
> >>@@ -377,6 +377,8 @@ static void vfio_listener_region_add(MemoryListener *listener,
> >>           */
> >>          giommu = g_malloc0(sizeof(*giommu));
> >>          giommu->iommu = section->mr;
> >>+        giommu->offset_within_address_space =
> >>+            section->offset_within_address_space;
> >
> >But here there's a problem.  The iova in IOMMUTLBEntry is relative to
> >the IOMMU MemoryRegion, but - at least in theory - only a subsection
> >of that MemoryRegion could be mapped into the AddressSpace.
> 
> But the IOMMU MR stays the same - size, offset, and iova will be relative to
> its start, why does it matter if only portion is mapped?

Because the portion mapped may not sit at the start of the MR.  For
example if you had a 2G MR, and the second half is mapped at address 0
in the AS, then an IOMMUTLBEntry iova of 1G would translated to AS
address 0.

> >So, to find the IOVA within the AddressSpace, from the IOVA within the
> >MemoryRegion, you need to first subtract the section's offset within
> >the MemoryRegion, then add the section's offset within the
> >AddressSpace.
> >
> >You could precalculate the combined delta here, but...
> >
> >
> >>          giommu->container = container;
> >>          giommu->n.notify = vfio_iommu_map_notify;
> >>          QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
> >>diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> >>index eb0e1b0..5341e05 100644
> >>--- a/include/hw/vfio/vfio-common.h
> >>+++ b/include/hw/vfio/vfio-common.h
> >>@@ -90,6 +90,7 @@ typedef struct VFIOContainer {
> >>  typedef struct VFIOGuestIOMMU {
> >>      VFIOContainer *container;
> >>      MemoryRegion *iommu;
> >>+    hwaddr offset_within_address_space;
> >
> >...it might be simpler to replace both the iommu and
> >offset_within_address_space fields here with a pointer to the
> >MemoryRegionSection instead, which should give you all the info you
> >need.
> 
> 
> MemoryRegionSection is allocated on stack in listener_add_address_space()
> and seems to be in general some sort of temporary object.

Ah, right, I guess you'll have to store the delta, then.

> 
> 
> >
> >It might also be worth adding Paolo to the CC for this patch, since he
> >knows the MemoryRegion stuff better than anyone.
> 
> 
> Right, I added him in cc: now.
> 
> >
> >>      Notifier n;
> >>      QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
> >>  } VFIOGuestIOMMU;
> >
> 
>
Alexey Kardashevskiy March 22, 2016, 4:28 a.m. UTC | #4
On 03/22/2016 02:26 PM, David Gibson wrote:
> On Tue, Mar 22, 2016 at 02:12:30PM +1100, Alexey Kardashevskiy wrote:
>> On 03/22/2016 11:49 AM, David Gibson wrote:
>>> On Mon, Mar 21, 2016 at 06:46:49PM +1100, Alexey Kardashevskiy wrote:
>>>> Since a788f227 "memory: Allow replay of IOMMU mapping notifications"
>>>> when new VFIO listener is added, all existing IOMMU mappings are
>>>> replayed. However there is a problem that the base address of
>>>> an IOMMU memory region (IOMMU MR) is ignored which is not a problem
>>>> for the existing user (which is pseries) with its default 32bit DMA
>>>> window starting at 0 but it is if there is another DMA window.
>>>>
>>>> This stores the IOMMU's offset_within_address_space and adjusts
>>>> the IOVA before calling vfio_dma_map/vfio_dma_unmap.
>>>>
>>>> As the IOMMU notifier expects IOVA offset rather than the absolute
>>>> address, this also adjusts IOVA in sPAPR H_PUT_TCE handler before
>>>> calling notifier(s).
>>>>
>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
>>>
>>> On a closer look, I realised this still isn't quite correct, although
>>> I don't think any cases which would break it exist or are planned.
>>>
>>>> ---
>>>>   hw/ppc/spapr_iommu.c          |  2 +-
>>>>   hw/vfio/common.c              | 14 ++++++++------
>>>>   include/hw/vfio/vfio-common.h |  1 +
>>>>   3 files changed, 10 insertions(+), 7 deletions(-)
>>>>
>>>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
>>>> index 7dd4588..277f289 100644
>>>> --- a/hw/ppc/spapr_iommu.c
>>>> +++ b/hw/ppc/spapr_iommu.c
>>>> @@ -277,7 +277,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
>>>>       tcet->table[index] = tce;
>>>>
>>>>       entry.target_as = &address_space_memory,
>>>> -    entry.iova = ioba & page_mask;
>>>> +    entry.iova = (ioba - tcet->bus_offset) & page_mask;
>>>>       entry.translated_addr = tce & page_mask;
>>>>       entry.addr_mask = ~page_mask;
>>>>       entry.perm = spapr_tce_iommu_access_flags(tce);
>>>
>>> This bit's right/
>>>
>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>> index fb588d8..d45e2db 100644
>>>> --- a/hw/vfio/common.c
>>>> +++ b/hw/vfio/common.c
>>>> @@ -257,14 +257,14 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
>>>>       VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
>>>>       VFIOContainer *container = giommu->container;
>>>>       IOMMUTLBEntry *iotlb = data;
>>>> +    hwaddr iova = iotlb->iova + giommu->offset_within_address_space;
>>>
>>> This bit might be right, depending on how you define giommu->offset_within_address_space.
>>>
>>>>       MemoryRegion *mr;
>>>>       hwaddr xlat;
>>>>       hwaddr len = iotlb->addr_mask + 1;
>>>>       void *vaddr;
>>>>       int ret;
>>>>
>>>> -    trace_vfio_iommu_map_notify(iotlb->iova,
>>>> -                                iotlb->iova + iotlb->addr_mask);
>>>> +    trace_vfio_iommu_map_notify(iova, iova + iotlb->addr_mask);
>>>>
>>>>       /*
>>>>        * The IOMMU TLB entry we have just covers translation through
>>>> @@ -291,21 +291,21 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
>>>>
>>>>       if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
>>>>           vaddr = memory_region_get_ram_ptr(mr) + xlat;
>>>> -        ret = vfio_dma_map(container, iotlb->iova,
>>>> +        ret = vfio_dma_map(container, iova,
>>>>                              iotlb->addr_mask + 1, vaddr,
>>>>                              !(iotlb->perm & IOMMU_WO) || mr->readonly);
>>>>           if (ret) {
>>>>               error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
>>>>                            "0x%"HWADDR_PRIx", %p) = %d (%m)",
>>>> -                         container, iotlb->iova,
>>>> +                         container, iova,
>>>>                            iotlb->addr_mask + 1, vaddr, ret);
>>>>           }
>>>>       } else {
>>>> -        ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1);
>>>> +        ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1);
>>>>           if (ret) {
>>>>               error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
>>>>                            "0x%"HWADDR_PRIx") = %d (%m)",
>>>> -                         container, iotlb->iova,
>>>> +                         container, iova,
>>>>                            iotlb->addr_mask + 1, ret);
>>>>           }
>>>>       }
>>>
>>> This is fine.
>>>
>>>> @@ -377,6 +377,8 @@ static void vfio_listener_region_add(MemoryListener *listener,
>>>>            */
>>>>           giommu = g_malloc0(sizeof(*giommu));
>>>>           giommu->iommu = section->mr;
>>>> +        giommu->offset_within_address_space =
>>>> +            section->offset_within_address_space;
>>>
>>> But here there's a problem.  The iova in IOMMUTLBEntry is relative to
>>> the IOMMU MemoryRegion, but - at least in theory - only a subsection
>>> of that MemoryRegion could be mapped into the AddressSpace.
>>
>> But the IOMMU MR stays the same - size, offset, and iova will be relative to
>> its start, why does it matter if only portion is mapped?
>
> Because the portion mapped may not sit at the start of the MR.  For
> example if you had a 2G MR, and the second half is mapped at address 0
> in the AS,

My imagination fails here. How could you do this in practice?

address_space_init(&as, &root)
memory_region_init(&mr, 2GB)
memory_region_add_subregion(&root, -1GB, &mr)

But offsets are unsigned.

In general, how to map only a half, what memory_region_add_xxx() does that?


> then an IOMMUTLBEntry iova of 1G would translated to AS
> address 0.
 >
>
>>> So, to find the IOVA within the AddressSpace, from the IOVA within the
>>> MemoryRegion, you need to first subtract the section's offset within
>>> the MemoryRegion, then add the section's offset within the
>>> AddressSpace.
>>>
>>> You could precalculate the combined delta here, but...
>>>
>>>
>>>>           giommu->container = container;
>>>>           giommu->n.notify = vfio_iommu_map_notify;
>>>>           QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
>>>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>>>> index eb0e1b0..5341e05 100644
>>>> --- a/include/hw/vfio/vfio-common.h
>>>> +++ b/include/hw/vfio/vfio-common.h
>>>> @@ -90,6 +90,7 @@ typedef struct VFIOContainer {
>>>>   typedef struct VFIOGuestIOMMU {
>>>>       VFIOContainer *container;
>>>>       MemoryRegion *iommu;
>>>> +    hwaddr offset_within_address_space;
>>>
>>> ...it might be simpler to replace both the iommu and
>>> offset_within_address_space fields here with a pointer to the
>>> MemoryRegionSection instead, which should give you all the info you
>>> need.
>>
>>
>> MemoryRegionSection is allocated on stack in listener_add_address_space()
>> and seems to be in general some sort of temporary object.
>
> Ah, right, I guess you'll have to store the delta, then.
>
>>
>>
>>>
>>> It might also be worth adding Paolo to the CC for this patch, since he
>>> knows the MemoryRegion stuff better than anyone.
>>
>>
>> Right, I added him in cc: now.
>>
>>>
>>>>       Notifier n;
>>>>       QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
>>>>   } VFIOGuestIOMMU;
>>>
>>
>>
>
David Gibson March 22, 2016, 4:59 a.m. UTC | #5
On Tue, Mar 22, 2016 at 03:28:52PM +1100, Alexey Kardashevskiy wrote:
> On 03/22/2016 02:26 PM, David Gibson wrote:
> >On Tue, Mar 22, 2016 at 02:12:30PM +1100, Alexey Kardashevskiy wrote:
> >>On 03/22/2016 11:49 AM, David Gibson wrote:
> >>>On Mon, Mar 21, 2016 at 06:46:49PM +1100, Alexey Kardashevskiy wrote:
> >>>>Since a788f227 "memory: Allow replay of IOMMU mapping notifications"
> >>>>when new VFIO listener is added, all existing IOMMU mappings are
> >>>>replayed. However there is a problem that the base address of
> >>>>an IOMMU memory region (IOMMU MR) is ignored which is not a problem
> >>>>for the existing user (which is pseries) with its default 32bit DMA
> >>>>window starting at 0 but it is if there is another DMA window.
> >>>>
> >>>>This stores the IOMMU's offset_within_address_space and adjusts
> >>>>the IOVA before calling vfio_dma_map/vfio_dma_unmap.
> >>>>
> >>>>As the IOMMU notifier expects IOVA offset rather than the absolute
> >>>>address, this also adjusts IOVA in sPAPR H_PUT_TCE handler before
> >>>>calling notifier(s).
> >>>>
> >>>>Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >>>>Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> >>>
> >>>On a closer look, I realised this still isn't quite correct, although
> >>>I don't think any cases which would break it exist or are planned.
> >>>
> >>>>---
> >>>>  hw/ppc/spapr_iommu.c          |  2 +-
> >>>>  hw/vfio/common.c              | 14 ++++++++------
> >>>>  include/hw/vfio/vfio-common.h |  1 +
> >>>>  3 files changed, 10 insertions(+), 7 deletions(-)
> >>>>
> >>>>diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> >>>>index 7dd4588..277f289 100644
> >>>>--- a/hw/ppc/spapr_iommu.c
> >>>>+++ b/hw/ppc/spapr_iommu.c
> >>>>@@ -277,7 +277,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
> >>>>      tcet->table[index] = tce;
> >>>>
> >>>>      entry.target_as = &address_space_memory,
> >>>>-    entry.iova = ioba & page_mask;
> >>>>+    entry.iova = (ioba - tcet->bus_offset) & page_mask;
> >>>>      entry.translated_addr = tce & page_mask;
> >>>>      entry.addr_mask = ~page_mask;
> >>>>      entry.perm = spapr_tce_iommu_access_flags(tce);
> >>>
> >>>This bit's right/
> >>>
> >>>>diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> >>>>index fb588d8..d45e2db 100644
> >>>>--- a/hw/vfio/common.c
> >>>>+++ b/hw/vfio/common.c
> >>>>@@ -257,14 +257,14 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
> >>>>      VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> >>>>      VFIOContainer *container = giommu->container;
> >>>>      IOMMUTLBEntry *iotlb = data;
> >>>>+    hwaddr iova = iotlb->iova + giommu->offset_within_address_space;
> >>>
> >>>This bit might be right, depending on how you define giommu->offset_within_address_space.
> >>>
> >>>>      MemoryRegion *mr;
> >>>>      hwaddr xlat;
> >>>>      hwaddr len = iotlb->addr_mask + 1;
> >>>>      void *vaddr;
> >>>>      int ret;
> >>>>
> >>>>-    trace_vfio_iommu_map_notify(iotlb->iova,
> >>>>-                                iotlb->iova + iotlb->addr_mask);
> >>>>+    trace_vfio_iommu_map_notify(iova, iova + iotlb->addr_mask);
> >>>>
> >>>>      /*
> >>>>       * The IOMMU TLB entry we have just covers translation through
> >>>>@@ -291,21 +291,21 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
> >>>>
> >>>>      if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
> >>>>          vaddr = memory_region_get_ram_ptr(mr) + xlat;
> >>>>-        ret = vfio_dma_map(container, iotlb->iova,
> >>>>+        ret = vfio_dma_map(container, iova,
> >>>>                             iotlb->addr_mask + 1, vaddr,
> >>>>                             !(iotlb->perm & IOMMU_WO) || mr->readonly);
> >>>>          if (ret) {
> >>>>              error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
> >>>>                           "0x%"HWADDR_PRIx", %p) = %d (%m)",
> >>>>-                         container, iotlb->iova,
> >>>>+                         container, iova,
> >>>>                           iotlb->addr_mask + 1, vaddr, ret);
> >>>>          }
> >>>>      } else {
> >>>>-        ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1);
> >>>>+        ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1);
> >>>>          if (ret) {
> >>>>              error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> >>>>                           "0x%"HWADDR_PRIx") = %d (%m)",
> >>>>-                         container, iotlb->iova,
> >>>>+                         container, iova,
> >>>>                           iotlb->addr_mask + 1, ret);
> >>>>          }
> >>>>      }
> >>>
> >>>This is fine.
> >>>
> >>>>@@ -377,6 +377,8 @@ static void vfio_listener_region_add(MemoryListener *listener,
> >>>>           */
> >>>>          giommu = g_malloc0(sizeof(*giommu));
> >>>>          giommu->iommu = section->mr;
> >>>>+        giommu->offset_within_address_space =
> >>>>+            section->offset_within_address_space;
> >>>
> >>>But here there's a problem.  The iova in IOMMUTLBEntry is relative to
> >>>the IOMMU MemoryRegion, but - at least in theory - only a subsection
> >>>of that MemoryRegion could be mapped into the AddressSpace.
> >>
> >>But the IOMMU MR stays the same - size, offset, and iova will be relative to
> >>its start, why does it matter if only portion is mapped?
> >
> >Because the portion mapped may not sit at the start of the MR.  For
> >example if you had a 2G MR, and the second half is mapped at address 0
> >in the AS,
> 
> My imagination fails here. How could you do this in practice?
> 
> address_space_init(&as, &root)
> memory_region_init(&mr, 2GB)
> memory_region_add_subregion(&root, -1GB, &mr)
> 
> But offsets are unsigned.
> 
> In general, how to map only a half, what memory_region_add_xxx()
> does that?

I'm not totally sure, but I think you can do it with:

address_space_init(&as, &root)
memory_region_init(&mr0, 2GB)
memory_region_init_alias(&mr1, &mr0, 1GB, 1GB)
memory_region_add_subregion(&root, 0, &mr1)

But the point is that it's possible for offset_within_region to be
non-zero, and if it is, you need to take it into account.
Alexey Kardashevskiy March 22, 2016, 7:19 a.m. UTC | #6
On 03/22/2016 03:59 PM, David Gibson wrote:
> On Tue, Mar 22, 2016 at 03:28:52PM +1100, Alexey Kardashevskiy wrote:
>> On 03/22/2016 02:26 PM, David Gibson wrote:
>>> On Tue, Mar 22, 2016 at 02:12:30PM +1100, Alexey Kardashevskiy wrote:
>>>> On 03/22/2016 11:49 AM, David Gibson wrote:
>>>>> On Mon, Mar 21, 2016 at 06:46:49PM +1100, Alexey Kardashevskiy wrote:
>>>>>> Since a788f227 "memory: Allow replay of IOMMU mapping notifications"
>>>>>> when new VFIO listener is added, all existing IOMMU mappings are
>>>>>> replayed. However there is a problem that the base address of
>>>>>> an IOMMU memory region (IOMMU MR) is ignored which is not a problem
>>>>>> for the existing user (which is pseries) with its default 32bit DMA
>>>>>> window starting at 0 but it is if there is another DMA window.
>>>>>>
>>>>>> This stores the IOMMU's offset_within_address_space and adjusts
>>>>>> the IOVA before calling vfio_dma_map/vfio_dma_unmap.
>>>>>>
>>>>>> As the IOMMU notifier expects IOVA offset rather than the absolute
>>>>>> address, this also adjusts IOVA in sPAPR H_PUT_TCE handler before
>>>>>> calling notifier(s).
>>>>>>
>>>>>> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>>>>> Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
>>>>>
>>>>> On a closer look, I realised this still isn't quite correct, although
>>>>> I don't think any cases which would break it exist or are planned.
>>>>>
>>>>>> ---
>>>>>>   hw/ppc/spapr_iommu.c          |  2 +-
>>>>>>   hw/vfio/common.c              | 14 ++++++++------
>>>>>>   include/hw/vfio/vfio-common.h |  1 +
>>>>>>   3 files changed, 10 insertions(+), 7 deletions(-)
>>>>>>
>>>>>> diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
>>>>>> index 7dd4588..277f289 100644
>>>>>> --- a/hw/ppc/spapr_iommu.c
>>>>>> +++ b/hw/ppc/spapr_iommu.c
>>>>>> @@ -277,7 +277,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
>>>>>>       tcet->table[index] = tce;
>>>>>>
>>>>>>       entry.target_as = &address_space_memory,
>>>>>> -    entry.iova = ioba & page_mask;
>>>>>> +    entry.iova = (ioba - tcet->bus_offset) & page_mask;
>>>>>>       entry.translated_addr = tce & page_mask;
>>>>>>       entry.addr_mask = ~page_mask;
>>>>>>       entry.perm = spapr_tce_iommu_access_flags(tce);
>>>>>
>>>>> This bit's right/
>>>>>
>>>>>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>>>>>> index fb588d8..d45e2db 100644
>>>>>> --- a/hw/vfio/common.c
>>>>>> +++ b/hw/vfio/common.c
>>>>>> @@ -257,14 +257,14 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
>>>>>>       VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
>>>>>>       VFIOContainer *container = giommu->container;
>>>>>>       IOMMUTLBEntry *iotlb = data;
>>>>>> +    hwaddr iova = iotlb->iova + giommu->offset_within_address_space;
>>>>>
>>>>> This bit might be right, depending on how you define giommu->offset_within_address_space.
>>>>>
>>>>>>       MemoryRegion *mr;
>>>>>>       hwaddr xlat;
>>>>>>       hwaddr len = iotlb->addr_mask + 1;
>>>>>>       void *vaddr;
>>>>>>       int ret;
>>>>>>
>>>>>> -    trace_vfio_iommu_map_notify(iotlb->iova,
>>>>>> -                                iotlb->iova + iotlb->addr_mask);
>>>>>> +    trace_vfio_iommu_map_notify(iova, iova + iotlb->addr_mask);
>>>>>>
>>>>>>       /*
>>>>>>        * The IOMMU TLB entry we have just covers translation through
>>>>>> @@ -291,21 +291,21 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
>>>>>>
>>>>>>       if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
>>>>>>           vaddr = memory_region_get_ram_ptr(mr) + xlat;
>>>>>> -        ret = vfio_dma_map(container, iotlb->iova,
>>>>>> +        ret = vfio_dma_map(container, iova,
>>>>>>                              iotlb->addr_mask + 1, vaddr,
>>>>>>                              !(iotlb->perm & IOMMU_WO) || mr->readonly);
>>>>>>           if (ret) {
>>>>>>               error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
>>>>>>                            "0x%"HWADDR_PRIx", %p) = %d (%m)",
>>>>>> -                         container, iotlb->iova,
>>>>>> +                         container, iova,
>>>>>>                            iotlb->addr_mask + 1, vaddr, ret);
>>>>>>           }
>>>>>>       } else {
>>>>>> -        ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1);
>>>>>> +        ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1);
>>>>>>           if (ret) {
>>>>>>               error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
>>>>>>                            "0x%"HWADDR_PRIx") = %d (%m)",
>>>>>> -                         container, iotlb->iova,
>>>>>> +                         container, iova,
>>>>>>                            iotlb->addr_mask + 1, ret);
>>>>>>           }
>>>>>>       }
>>>>>
>>>>> This is fine.
>>>>>
>>>>>> @@ -377,6 +377,8 @@ static void vfio_listener_region_add(MemoryListener *listener,
>>>>>>            */
>>>>>>           giommu = g_malloc0(sizeof(*giommu));
>>>>>>           giommu->iommu = section->mr;
>>>>>> +        giommu->offset_within_address_space =
>>>>>> +            section->offset_within_address_space;
>>>>>
>>>>> But here there's a problem.  The iova in IOMMUTLBEntry is relative to
>>>>> the IOMMU MemoryRegion, but - at least in theory - only a subsection
>>>>> of that MemoryRegion could be mapped into the AddressSpace.
>>>>
>>>> But the IOMMU MR stays the same - size, offset, and iova will be relative to
>>>> its start, why does it matter if only portion is mapped?
>>>
>>> Because the portion mapped may not sit at the start of the MR.  For
>>> example if you had a 2G MR, and the second half is mapped at address 0
>>> in the AS,
>>
>> My imagination fails here. How could you do this in practice?
>>
>> address_space_init(&as, &root)
>> memory_region_init(&mr, 2GB)
>> memory_region_add_subregion(&root, -1GB, &mr)
>>
>> But offsets are unsigned.
>>
>> In general, how to map only a half, what memory_region_add_xxx()
>> does that?
>
> I'm not totally sure, but I think you can do it with:


Ok. Got it. So, how about this:

s/offset_within_address_space/iommu_offset/

and

giommu->iommu_offset = section->offset_within_address_space -
section->offset_within_region;

?


>
> address_space_init(&as, &root)
> memory_region_init(&mr0, 2GB)
> memory_region_init_alias(&mr1, &mr0, 1GB, 1GB)
> memory_region_add_subregion(&root, 0, &mr1)
>
> But the point is that it's possible for offset_within_region to be
> non-zero, and if it is, you need to take it into account.

I was not arguing this, I was trying to imagine :)
David Gibson March 22, 2016, 11:07 p.m. UTC | #7
On Tue, Mar 22, 2016 at 06:19:40PM +1100, Alexey Kardashevskiy wrote:
> On 03/22/2016 03:59 PM, David Gibson wrote:
> >On Tue, Mar 22, 2016 at 03:28:52PM +1100, Alexey Kardashevskiy wrote:
> >>On 03/22/2016 02:26 PM, David Gibson wrote:
> >>>On Tue, Mar 22, 2016 at 02:12:30PM +1100, Alexey Kardashevskiy wrote:
> >>>>On 03/22/2016 11:49 AM, David Gibson wrote:
> >>>>>On Mon, Mar 21, 2016 at 06:46:49PM +1100, Alexey Kardashevskiy wrote:
> >>>>>>Since a788f227 "memory: Allow replay of IOMMU mapping notifications"
> >>>>>>when new VFIO listener is added, all existing IOMMU mappings are
> >>>>>>replayed. However there is a problem that the base address of
> >>>>>>an IOMMU memory region (IOMMU MR) is ignored which is not a problem
> >>>>>>for the existing user (which is pseries) with its default 32bit DMA
> >>>>>>window starting at 0 but it is if there is another DMA window.
> >>>>>>
> >>>>>>This stores the IOMMU's offset_within_address_space and adjusts
> >>>>>>the IOVA before calling vfio_dma_map/vfio_dma_unmap.
> >>>>>>
> >>>>>>As the IOMMU notifier expects IOVA offset rather than the absolute
> >>>>>>address, this also adjusts IOVA in sPAPR H_PUT_TCE handler before
> >>>>>>calling notifier(s).
> >>>>>>
> >>>>>>Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> >>>>>>Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
> >>>>>
> >>>>>On a closer look, I realised this still isn't quite correct, although
> >>>>>I don't think any cases which would break it exist or are planned.
> >>>>>
> >>>>>>---
> >>>>>>  hw/ppc/spapr_iommu.c          |  2 +-
> >>>>>>  hw/vfio/common.c              | 14 ++++++++------
> >>>>>>  include/hw/vfio/vfio-common.h |  1 +
> >>>>>>  3 files changed, 10 insertions(+), 7 deletions(-)
> >>>>>>
> >>>>>>diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
> >>>>>>index 7dd4588..277f289 100644
> >>>>>>--- a/hw/ppc/spapr_iommu.c
> >>>>>>+++ b/hw/ppc/spapr_iommu.c
> >>>>>>@@ -277,7 +277,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
> >>>>>>      tcet->table[index] = tce;
> >>>>>>
> >>>>>>      entry.target_as = &address_space_memory,
> >>>>>>-    entry.iova = ioba & page_mask;
> >>>>>>+    entry.iova = (ioba - tcet->bus_offset) & page_mask;
> >>>>>>      entry.translated_addr = tce & page_mask;
> >>>>>>      entry.addr_mask = ~page_mask;
> >>>>>>      entry.perm = spapr_tce_iommu_access_flags(tce);
> >>>>>
> >>>>>This bit's right/
> >>>>>
> >>>>>>diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> >>>>>>index fb588d8..d45e2db 100644
> >>>>>>--- a/hw/vfio/common.c
> >>>>>>+++ b/hw/vfio/common.c
> >>>>>>@@ -257,14 +257,14 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
> >>>>>>      VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
> >>>>>>      VFIOContainer *container = giommu->container;
> >>>>>>      IOMMUTLBEntry *iotlb = data;
> >>>>>>+    hwaddr iova = iotlb->iova + giommu->offset_within_address_space;
> >>>>>
> >>>>>This bit might be right, depending on how you define giommu->offset_within_address_space.
> >>>>>
> >>>>>>      MemoryRegion *mr;
> >>>>>>      hwaddr xlat;
> >>>>>>      hwaddr len = iotlb->addr_mask + 1;
> >>>>>>      void *vaddr;
> >>>>>>      int ret;
> >>>>>>
> >>>>>>-    trace_vfio_iommu_map_notify(iotlb->iova,
> >>>>>>-                                iotlb->iova + iotlb->addr_mask);
> >>>>>>+    trace_vfio_iommu_map_notify(iova, iova + iotlb->addr_mask);
> >>>>>>
> >>>>>>      /*
> >>>>>>       * The IOMMU TLB entry we have just covers translation through
> >>>>>>@@ -291,21 +291,21 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
> >>>>>>
> >>>>>>      if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
> >>>>>>          vaddr = memory_region_get_ram_ptr(mr) + xlat;
> >>>>>>-        ret = vfio_dma_map(container, iotlb->iova,
> >>>>>>+        ret = vfio_dma_map(container, iova,
> >>>>>>                             iotlb->addr_mask + 1, vaddr,
> >>>>>>                             !(iotlb->perm & IOMMU_WO) || mr->readonly);
> >>>>>>          if (ret) {
> >>>>>>              error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
> >>>>>>                           "0x%"HWADDR_PRIx", %p) = %d (%m)",
> >>>>>>-                         container, iotlb->iova,
> >>>>>>+                         container, iova,
> >>>>>>                           iotlb->addr_mask + 1, vaddr, ret);
> >>>>>>          }
> >>>>>>      } else {
> >>>>>>-        ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1);
> >>>>>>+        ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1);
> >>>>>>          if (ret) {
> >>>>>>              error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> >>>>>>                           "0x%"HWADDR_PRIx") = %d (%m)",
> >>>>>>-                         container, iotlb->iova,
> >>>>>>+                         container, iova,
> >>>>>>                           iotlb->addr_mask + 1, ret);
> >>>>>>          }
> >>>>>>      }
> >>>>>
> >>>>>This is fine.
> >>>>>
> >>>>>>@@ -377,6 +377,8 @@ static void vfio_listener_region_add(MemoryListener *listener,
> >>>>>>           */
> >>>>>>          giommu = g_malloc0(sizeof(*giommu));
> >>>>>>          giommu->iommu = section->mr;
> >>>>>>+        giommu->offset_within_address_space =
> >>>>>>+            section->offset_within_address_space;
> >>>>>
> >>>>>But here there's a problem.  The iova in IOMMUTLBEntry is relative to
> >>>>>the IOMMU MemoryRegion, but - at least in theory - only a subsection
> >>>>>of that MemoryRegion could be mapped into the AddressSpace.
> >>>>
> >>>>But the IOMMU MR stays the same - size, offset, and iova will be relative to
> >>>>its start, why does it matter if only portion is mapped?
> >>>
> >>>Because the portion mapped may not sit at the start of the MR.  For
> >>>example if you had a 2G MR, and the second half is mapped at address 0
> >>>in the AS,
> >>
> >>My imagination fails here. How could you do this in practice?
> >>
> >>address_space_init(&as, &root)
> >>memory_region_init(&mr, 2GB)
> >>memory_region_add_subregion(&root, -1GB, &mr)
> >>
> >>But offsets are unsigned.
> >>
> >>In general, how to map only a half, what memory_region_add_xxx()
> >>does that?
> >
> >I'm not totally sure, but I think you can do it with:
> 
> 
> Ok. Got it. So, how about this:
> 
> s/offset_within_address_space/iommu_offset/
> 
> and
> 
> giommu->iommu_offset = section->offset_within_address_space -
> section->offset_within_region;

Yes, that should do it.

> 
> ?
> 
> 
> >
> >address_space_init(&as, &root)
> >memory_region_init(&mr0, 2GB)
> >memory_region_init_alias(&mr1, &mr0, 1GB, 1GB)
> >memory_region_add_subregion(&root, 0, &mr1)
> >
> >But the point is that it's possible for offset_within_region to be
> >non-zero, and if it is, you need to take it into account.
> 
> I was not arguing this, I was trying to imagine :)
> 
> 
>
Paolo Bonzini March 23, 2016, 10:58 a.m. UTC | #8
On 22/03/2016 04:26, David Gibson wrote:
>> > >...it might be simpler to replace both the iommu and
>> > >offset_within_address_space fields here with a pointer to the
>> > >MemoryRegionSection instead, which should give you all the info you
>> > >need.
>> 
>> 
>> MemoryRegionSection is allocated on stack in listener_add_address_space()
>> and seems to be in general some sort of temporary object.

If you need the information in a MemoryRegionSection, by all means use
it.  For example users of hw/display/framebuffer.c store a
MemoryRegionSection (in that case, they get it from memory_region_find,
but it doesn't have to be that way).

Otherwise I agree with what David has said.

Paolo
diff mbox

Patch

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 7dd4588..277f289 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -277,7 +277,7 @@  static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
     tcet->table[index] = tce;
 
     entry.target_as = &address_space_memory,
-    entry.iova = ioba & page_mask;
+    entry.iova = (ioba - tcet->bus_offset) & page_mask;
     entry.translated_addr = tce & page_mask;
     entry.addr_mask = ~page_mask;
     entry.perm = spapr_tce_iommu_access_flags(tce);
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index fb588d8..d45e2db 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -257,14 +257,14 @@  static void vfio_iommu_map_notify(Notifier *n, void *data)
     VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
     VFIOContainer *container = giommu->container;
     IOMMUTLBEntry *iotlb = data;
+    hwaddr iova = iotlb->iova + giommu->offset_within_address_space;
     MemoryRegion *mr;
     hwaddr xlat;
     hwaddr len = iotlb->addr_mask + 1;
     void *vaddr;
     int ret;
 
-    trace_vfio_iommu_map_notify(iotlb->iova,
-                                iotlb->iova + iotlb->addr_mask);
+    trace_vfio_iommu_map_notify(iova, iova + iotlb->addr_mask);
 
     /*
      * The IOMMU TLB entry we have just covers translation through
@@ -291,21 +291,21 @@  static void vfio_iommu_map_notify(Notifier *n, void *data)
 
     if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
         vaddr = memory_region_get_ram_ptr(mr) + xlat;
-        ret = vfio_dma_map(container, iotlb->iova,
+        ret = vfio_dma_map(container, iova,
                            iotlb->addr_mask + 1, vaddr,
                            !(iotlb->perm & IOMMU_WO) || mr->readonly);
         if (ret) {
             error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
                          "0x%"HWADDR_PRIx", %p) = %d (%m)",
-                         container, iotlb->iova,
+                         container, iova,
                          iotlb->addr_mask + 1, vaddr, ret);
         }
     } else {
-        ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1);
+        ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1);
         if (ret) {
             error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
                          "0x%"HWADDR_PRIx") = %d (%m)",
-                         container, iotlb->iova,
+                         container, iova,
                          iotlb->addr_mask + 1, ret);
         }
     }
@@ -377,6 +377,8 @@  static void vfio_listener_region_add(MemoryListener *listener,
          */
         giommu = g_malloc0(sizeof(*giommu));
         giommu->iommu = section->mr;
+        giommu->offset_within_address_space =
+            section->offset_within_address_space;
         giommu->container = container;
         giommu->n.notify = vfio_iommu_map_notify;
         QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index eb0e1b0..5341e05 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -90,6 +90,7 @@  typedef struct VFIOContainer {
 typedef struct VFIOGuestIOMMU {
     VFIOContainer *container;
     MemoryRegion *iommu;
+    hwaddr offset_within_address_space;
     Notifier n;
     QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
 } VFIOGuestIOMMU;