diff mbox

[for,2.8,02/11] virtio: convert to use DMA api

Message ID 1472526419-5900-3-git-send-email-jasowang@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jason Wang Aug. 30, 2016, 3:06 a.m. UTC
Currently, all virtio devices bypass IOMMU completely. This is because
address_space_memory is assumed and used during DMA emulation. This
patch converts the virtio core API to use DMA API. This idea is

- introducing a new transport specific helper to query the dma address
  space. (only pci version is implemented).
- query and use this address space during virtio device guest memory
  accessing when iommu platform (VIRTIO_F_IOMMU_PLATFORM) was enabled
  for this device.

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Kevin Wolf <kwolf@redhat.com>
Cc: Amit Shah <amit.shah@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-block@nongnu.org
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/block/virtio-blk.c             |  2 +-
 hw/char/virtio-serial-bus.c       |  3 +-
 hw/scsi/virtio-scsi.c             |  4 ++-
 hw/virtio/virtio-pci.c            | 14 +++++++++
 hw/virtio/virtio.c                | 62 ++++++++++++++++++++++++---------------
 include/hw/virtio/virtio-access.h | 43 ++++++++++++++++++++-------
 include/hw/virtio/virtio-bus.h    |  1 +
 include/hw/virtio/virtio.h        |  8 +++--
 8 files changed, 98 insertions(+), 39 deletions(-)

Comments

Cornelia Huck Aug. 30, 2016, 7:31 a.m. UTC | #1
On Tue, 30 Aug 2016 11:06:50 +0800
Jason Wang <jasowang@redhat.com> wrote:

> Currently, all virtio devices bypass IOMMU completely. This is because
> address_space_memory is assumed and used during DMA emulation. This
> patch converts the virtio core API to use DMA API. This idea is
> 
> - introducing a new transport specific helper to query the dma address
>   space. (only pci version is implemented).
> - query and use this address space during virtio device guest memory
>   accessing when iommu platform (VIRTIO_F_IOMMU_PLATFORM) was enabled
>   for this device.
> 
> Cc: Michael S. Tsirkin <mst@redhat.com>
> Cc: Stefan Hajnoczi <stefanha@redhat.com>
> Cc: Kevin Wolf <kwolf@redhat.com>
> Cc: Amit Shah <amit.shah@redhat.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: qemu-block@nongnu.org
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  hw/block/virtio-blk.c             |  2 +-
>  hw/char/virtio-serial-bus.c       |  3 +-
>  hw/scsi/virtio-scsi.c             |  4 ++-
>  hw/virtio/virtio-pci.c            | 14 +++++++++
>  hw/virtio/virtio.c                | 62 ++++++++++++++++++++++++---------------
>  include/hw/virtio/virtio-access.h | 43 ++++++++++++++++++++-------
>  include/hw/virtio/virtio-bus.h    |  1 +
>  include/hw/virtio/virtio.h        |  8 +++--
>  8 files changed, 98 insertions(+), 39 deletions(-)
> 

> diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h
> index 440b455..4071dad 100644
> --- a/include/hw/virtio/virtio-access.h
> +++ b/include/hw/virtio/virtio-access.h
> @@ -17,12 +17,25 @@
>  #define QEMU_VIRTIO_ACCESS_H
> 
>  #include "hw/virtio/virtio.h"
> +#include "hw/virtio/virtio-bus.h"
>  #include "exec/address-spaces.h"
> 
>  #if defined(TARGET_PPC64) || defined(TARGET_ARM)
>  #define LEGACY_VIRTIO_IS_BIENDIAN 1
>  #endif
> 
> +static inline AddressSpace *virtio_get_dma_as(VirtIODevice *vdev)
> +{
> +    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
> +    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
> +
> +    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
> +        k->get_dma_as) {
> +        return k->get_dma_as(qbus->parent);
> +    }
> +    return &address_space_memory;
> +}

One thing I'm a bit worried about is that we're introducing a check
even if we know that the device will never support
VIRTIO_F_IOMMU_PLATFORM (i.e. virtio-ccw). The qom incantations will
add cycles to any invocation of this.

Is the address space likely to change during device lifetime? Can we
cache it in some way?
Michael S. Tsirkin Aug. 30, 2016, 10:02 a.m. UTC | #2
On Tue, Aug 30, 2016 at 09:31:27AM +0200, Cornelia Huck wrote:
> On Tue, 30 Aug 2016 11:06:50 +0800
> Jason Wang <jasowang@redhat.com> wrote:
> 
> > Currently, all virtio devices bypass IOMMU completely. This is because
> > address_space_memory is assumed and used during DMA emulation. This
> > patch converts the virtio core API to use DMA API. This idea is
> > 
> > - introducing a new transport specific helper to query the dma address
> >   space. (only pci version is implemented).
> > - query and use this address space during virtio device guest memory
> >   accessing when iommu platform (VIRTIO_F_IOMMU_PLATFORM) was enabled
> >   for this device.
> > 
> > Cc: Michael S. Tsirkin <mst@redhat.com>
> > Cc: Stefan Hajnoczi <stefanha@redhat.com>
> > Cc: Kevin Wolf <kwolf@redhat.com>
> > Cc: Amit Shah <amit.shah@redhat.com>
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: qemu-block@nongnu.org
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> >  hw/block/virtio-blk.c             |  2 +-
> >  hw/char/virtio-serial-bus.c       |  3 +-
> >  hw/scsi/virtio-scsi.c             |  4 ++-
> >  hw/virtio/virtio-pci.c            | 14 +++++++++
> >  hw/virtio/virtio.c                | 62 ++++++++++++++++++++++++---------------
> >  include/hw/virtio/virtio-access.h | 43 ++++++++++++++++++++-------
> >  include/hw/virtio/virtio-bus.h    |  1 +
> >  include/hw/virtio/virtio.h        |  8 +++--
> >  8 files changed, 98 insertions(+), 39 deletions(-)
> > 
> 
> > diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h
> > index 440b455..4071dad 100644
> > --- a/include/hw/virtio/virtio-access.h
> > +++ b/include/hw/virtio/virtio-access.h
> > @@ -17,12 +17,25 @@
> >  #define QEMU_VIRTIO_ACCESS_H
> > 
> >  #include "hw/virtio/virtio.h"
> > +#include "hw/virtio/virtio-bus.h"
> >  #include "exec/address-spaces.h"
> > 
> >  #if defined(TARGET_PPC64) || defined(TARGET_ARM)
> >  #define LEGACY_VIRTIO_IS_BIENDIAN 1
> >  #endif
> > 
> > +static inline AddressSpace *virtio_get_dma_as(VirtIODevice *vdev)
> > +{
> > +    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
> > +    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
> > +
> > +    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
> > +        k->get_dma_as) {
> > +        return k->get_dma_as(qbus->parent);
> > +    }
> > +    return &address_space_memory;
> > +}
> 
> One thing I'm a bit worried about is that we're introducing a check
> even if we know that the device will never support
> VIRTIO_F_IOMMU_PLATFORM (i.e. virtio-ccw). The qom incantations will
> add cycles to any invocation of this.

Yes - let's do container_of calls as opposed to QOM on data path.

> Is the address space likely to change during device lifetime? Can we
> cache it in some way?
Michael S. Tsirkin Aug. 30, 2016, 10:21 a.m. UTC | #3
On Tue, Aug 30, 2016 at 01:02:14PM +0300, Michael S. Tsirkin wrote:
> On Tue, Aug 30, 2016 at 09:31:27AM +0200, Cornelia Huck wrote:
> > On Tue, 30 Aug 2016 11:06:50 +0800
> > Jason Wang <jasowang@redhat.com> wrote:
> > 
> > > Currently, all virtio devices bypass IOMMU completely. This is because
> > > address_space_memory is assumed and used during DMA emulation. This
> > > patch converts the virtio core API to use DMA API. This idea is
> > > 
> > > - introducing a new transport specific helper to query the dma address
> > >   space. (only pci version is implemented).
> > > - query and use this address space during virtio device guest memory
> > >   accessing when iommu platform (VIRTIO_F_IOMMU_PLATFORM) was enabled
> > >   for this device.
> > > 
> > > Cc: Michael S. Tsirkin <mst@redhat.com>
> > > Cc: Stefan Hajnoczi <stefanha@redhat.com>
> > > Cc: Kevin Wolf <kwolf@redhat.com>
> > > Cc: Amit Shah <amit.shah@redhat.com>
> > > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > > Cc: qemu-block@nongnu.org
> > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > ---
> > >  hw/block/virtio-blk.c             |  2 +-
> > >  hw/char/virtio-serial-bus.c       |  3 +-
> > >  hw/scsi/virtio-scsi.c             |  4 ++-
> > >  hw/virtio/virtio-pci.c            | 14 +++++++++
> > >  hw/virtio/virtio.c                | 62 ++++++++++++++++++++++++---------------
> > >  include/hw/virtio/virtio-access.h | 43 ++++++++++++++++++++-------
> > >  include/hw/virtio/virtio-bus.h    |  1 +
> > >  include/hw/virtio/virtio.h        |  8 +++--
> > >  8 files changed, 98 insertions(+), 39 deletions(-)
> > > 
> > 
> > > diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h
> > > index 440b455..4071dad 100644
> > > --- a/include/hw/virtio/virtio-access.h
> > > +++ b/include/hw/virtio/virtio-access.h
> > > @@ -17,12 +17,25 @@
> > >  #define QEMU_VIRTIO_ACCESS_H
> > > 
> > >  #include "hw/virtio/virtio.h"
> > > +#include "hw/virtio/virtio-bus.h"
> > >  #include "exec/address-spaces.h"
> > > 
> > >  #if defined(TARGET_PPC64) || defined(TARGET_ARM)
> > >  #define LEGACY_VIRTIO_IS_BIENDIAN 1
> > >  #endif
> > > 
> > > +static inline AddressSpace *virtio_get_dma_as(VirtIODevice *vdev)
> > > +{
> > > +    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
> > > +    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
> > > +
> > > +    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
> > > +        k->get_dma_as) {
> > > +        return k->get_dma_as(qbus->parent);
> > > +    }
> > > +    return &address_space_memory;
> > > +}
> > 
> > One thing I'm a bit worried about is that we're introducing a check
> > even if we know that the device will never support
> > VIRTIO_F_IOMMU_PLATFORM (i.e. virtio-ccw). The qom incantations will
> > add cycles to any invocation of this.
> 
> Yes - let's do container_of calls as opposed to QOM on data path.

BTW downstreams are building with --disable-qom-cast-debug which drops
all QOM casts on data path - one way is to say we just make this the
default upstream as well. Another to say that we want to distinguish
fast path calls from slow path, this way we will be able to bring back
some of the checks.


> > Is the address space likely to change during device lifetime? Can we
> > cache it in some way?
Cornelia Huck Aug. 30, 2016, 11:11 a.m. UTC | #4
On Tue, 30 Aug 2016 13:21:23 +0300
"Michael S. Tsirkin" <mst@redhat.com> wrote:

> BTW downstreams are building with --disable-qom-cast-debug which drops
> all QOM casts on data path - one way is to say we just make this the
> default upstream as well. Another to say that we want to distinguish
> fast path calls from slow path, this way we will be able to bring back
> some of the checks.

I find CONFIG_QOM_CAST_DEBUG a bit inconsistent, btw:

- for object casts, we optimize away all checks and just return the
object for !debug
- for class casts, we optimize away only the caching and still keep the
checking (why would we drop the caching if this can speed up things?)

We certainly want to have debug turned on during development to avoid
nasty surprises later (otherwise, why even bother?), but it makes sense
to turn it off for a release. (Is there an easy way to turn it off for
the release, normal or stable, and keep it during the development
cycle?)
Michael S. Tsirkin Aug. 30, 2016, 11:15 a.m. UTC | #5
On Tue, Aug 30, 2016 at 01:11:05PM +0200, Cornelia Huck wrote:
> On Tue, 30 Aug 2016 13:21:23 +0300
> "Michael S. Tsirkin" <mst@redhat.com> wrote:
> 
> > BTW downstreams are building with --disable-qom-cast-debug which drops
> > all QOM casts on data path - one way is to say we just make this the
> > default upstream as well. Another to say that we want to distinguish
> > fast path calls from slow path, this way we will be able to bring back
> > some of the checks.
> 
> I find CONFIG_QOM_CAST_DEBUG a bit inconsistent, btw:
> 
> - for object casts, we optimize away all checks and just return the
> object for !debug
> - for class casts, we optimize away only the caching and still keep the
> checking (why would we drop the caching if this can speed up things?)
> 
> We certainly want to have debug turned on during development to avoid
> nasty surprises later (otherwise, why even bother?), but it makes sense
> to turn it off for a release. (Is there an easy way to turn it off for
> the release, normal or stable, and keep it during the development
> cycle?)

I think the assumption was class casts are not on data path.
Ideally we'd keep it on for release too for non-datapath things,
to help improve security.
Jason Wang Aug. 31, 2016, 2:47 a.m. UTC | #6
On 2016年08月30日 15:31, Cornelia Huck wrote:
> On Tue, 30 Aug 2016 11:06:50 +0800
> Jason Wang <jasowang@redhat.com> wrote:
>
>> Currently, all virtio devices bypass IOMMU completely. This is because
>> address_space_memory is assumed and used during DMA emulation. This
>> patch converts the virtio core API to use DMA API. This idea is
>>
>> - introducing a new transport specific helper to query the dma address
>>    space. (only pci version is implemented).
>> - query and use this address space during virtio device guest memory
>>    accessing when iommu platform (VIRTIO_F_IOMMU_PLATFORM) was enabled
>>    for this device.
>>
>> Cc: Michael S. Tsirkin <mst@redhat.com>
>> Cc: Stefan Hajnoczi <stefanha@redhat.com>
>> Cc: Kevin Wolf <kwolf@redhat.com>
>> Cc: Amit Shah <amit.shah@redhat.com>
>> Cc: Paolo Bonzini <pbonzini@redhat.com>
>> Cc: qemu-block@nongnu.org
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> ---
>>   hw/block/virtio-blk.c             |  2 +-
>>   hw/char/virtio-serial-bus.c       |  3 +-
>>   hw/scsi/virtio-scsi.c             |  4 ++-
>>   hw/virtio/virtio-pci.c            | 14 +++++++++
>>   hw/virtio/virtio.c                | 62 ++++++++++++++++++++++++---------------
>>   include/hw/virtio/virtio-access.h | 43 ++++++++++++++++++++-------
>>   include/hw/virtio/virtio-bus.h    |  1 +
>>   include/hw/virtio/virtio.h        |  8 +++--
>>   8 files changed, 98 insertions(+), 39 deletions(-)
>>
>> diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h
>> index 440b455..4071dad 100644
>> --- a/include/hw/virtio/virtio-access.h
>> +++ b/include/hw/virtio/virtio-access.h
>> @@ -17,12 +17,25 @@
>>   #define QEMU_VIRTIO_ACCESS_H
>>
>>   #include "hw/virtio/virtio.h"
>> +#include "hw/virtio/virtio-bus.h"
>>   #include "exec/address-spaces.h"
>>
>>   #if defined(TARGET_PPC64) || defined(TARGET_ARM)
>>   #define LEGACY_VIRTIO_IS_BIENDIAN 1
>>   #endif
>>
>> +static inline AddressSpace *virtio_get_dma_as(VirtIODevice *vdev)
>> +{
>> +    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
>> +    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
>> +
>> +    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
>> +        k->get_dma_as) {
>> +        return k->get_dma_as(qbus->parent);
>> +    }
>> +    return &address_space_memory;
>> +}
> One thing I'm a bit worried about is that we're introducing a check
> even if we know that the device will never support
> VIRTIO_F_IOMMU_PLATFORM (i.e. virtio-ccw). The qom incantations will
> add cycles to any invocation of this.
>
> Is the address space likely to change during device lifetime? Can we
> cache it in some way?
>

I think so, we can cache it in vdev and set it during features set.

Then we can avoid qom stuffs during data path.
Wei Xu Sept. 5, 2016, 2:26 a.m. UTC | #7
On 2016年08月30日 11:06, Jason Wang wrote:
> @@ -1587,6 +1595,11 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
>       }
>
>       if (legacy) {
> +        if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
> +            error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by"
> +                       "neither legacy nor transitional device.");
> +            return ;
> +        }

Not sure if i understand it correctly, the transitional device here 
maybe a bit hard to understand, just a tip for your convenience,
besides the denied prompt, can we add what kind of device is supported 
to the message? such as modern device only, like this.

"VIRTIO_F_IOMMU_PLATFORM is supported by modern device only, it
is not supported by either legacy or transitional device."

>           /* legacy and transitional */
>           pci_set_word(config + PCI_SUBSYSTEM_VENDOR_ID,
>                        pci_get_word(config + PCI_VENDOR_ID));
Michael S. Tsirkin Sept. 5, 2016, 2:33 a.m. UTC | #8
On Tue, Aug 30, 2016 at 11:06:50AM +0800, Jason Wang wrote:
> @@ -1587,6 +1595,11 @@ static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
>      }
>  
>      if (legacy) {
> +        if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
> +            error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by"
> +                       "neither legacy nor transitional device.");
> +            return ;
> +        }
>          /* legacy and transitional */
>          pci_set_word(config + PCI_SUBSYSTEM_VENDOR_ID,
>                       pci_get_word(config + PCI_VENDOR_ID));


We probably should have code to fail init if modern is disabled
since the configuration is inconsistent.

But I do not think we should break transitional devices
with this flag.


> @@ -2452,6 +2465,7 @@ static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
>      k->ioeventfd_disabled = virtio_pci_ioeventfd_disabled;
>      k->ioeventfd_set_disabled = virtio_pci_ioeventfd_set_disabled;
>      k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
> +    k->get_dma_as = virtio_pci_get_dma_as;
>  }
>  
>  static const TypeInfo virtio_pci_bus_info = {
> diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
> index 15ee3a7..99ea97c 100644
> --- a/hw/virtio/virtio.c
> +++ b/hw/virtio/virtio.c
> @@ -23,6 +23,7 @@
>  #include "hw/virtio/virtio-bus.h"
>  #include "migration/migration.h"
>  #include "hw/virtio/virtio-access.h"
> +#include "sysemu/dma.h"
>  
>  /*
>   * The alignment to use between consumer and producer parts of vring.
> @@ -122,7 +123,7 @@ void virtio_queue_update_rings(VirtIODevice *vdev, int n)
>  static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
>                              hwaddr desc_pa, int i)
>  {
> -    address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc),
> +    address_space_read(virtio_get_dma_as(vdev), desc_pa + i * sizeof(VRingDesc),
>                         MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
>      virtio_tswap64s(vdev, &desc->addr);
>      virtio_tswap32s(vdev, &desc->len);
> @@ -164,7 +165,7 @@ static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
>      virtio_tswap32s(vq->vdev, &uelem->id);
>      virtio_tswap32s(vq->vdev, &uelem->len);
>      pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
> -    address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED,
> +    address_space_write(virtio_get_dma_as(vq->vdev), pa, MEMTXATTRS_UNSPECIFIED,
>                         (void *)uelem, sizeof(VRingUsedElem));
>  }
>  
> @@ -244,6 +245,7 @@ int virtio_queue_empty(VirtQueue *vq)
>  static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
>                                 unsigned int len)
>  {
> +    AddressSpace *dma_as = virtio_get_dma_as(vq->vdev);
>      unsigned int offset;
>      int i;
>  
> @@ -251,17 +253,17 @@ static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
>      for (i = 0; i < elem->in_num; i++) {
>          size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
>  
> -        cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
> -                                  elem->in_sg[i].iov_len,
> -                                  1, size);
> +        dma_memory_unmap(dma_as, elem->in_sg[i].iov_base, elem->in_sg[i].iov_len,
> +                         DMA_DIRECTION_FROM_DEVICE, size);
>  
>          offset += size;
>      }
>  
>      for (i = 0; i < elem->out_num; i++)
> -        cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
> -                                  elem->out_sg[i].iov_len,
> -                                  0, elem->out_sg[i].iov_len);
> +        dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
> +                         elem->out_sg[i].iov_len,
> +                         DMA_DIRECTION_TO_DEVICE,
> +                         elem->out_sg[i].iov_len);
>  }
>  
>  void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
> @@ -451,7 +453,8 @@ int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
>      return in_bytes <= in_total && out_bytes <= out_total;
>  }
>  
> -static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov,
> +static void virtqueue_map_desc(VirtIODevice *vdev,
> +                               unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov,
>                                 unsigned int max_num_sg, bool is_write,
>                                 hwaddr pa, size_t sz)
>  {
> @@ -471,7 +474,10 @@ static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iove
>              exit(1);
>          }
>  
> -        iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
> +        iov[num_sg].iov_base = dma_memory_map(virtio_get_dma_as(vdev), pa, &len,
> +                                              is_write ?
> +                                              DMA_DIRECTION_FROM_DEVICE:
> +                                              DMA_DIRECTION_TO_DEVICE);
>          iov[num_sg].iov_len = len;
>          addr[num_sg] = pa;
>  
> @@ -482,9 +488,9 @@ static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iove
>      *p_num_sg = num_sg;
>  }
>  
> -static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
> -                                unsigned int *num_sg, unsigned int max_size,
> -                                int is_write)
> +static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
> +                                hwaddr *addr, unsigned int *num_sg,
> +                                unsigned int max_size, int is_write)
>  {
>      unsigned int i;
>      hwaddr len;
> @@ -503,7 +509,10 @@ static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
>  
>      for (i = 0; i < *num_sg; i++) {
>          len = sg[i].iov_len;
> -        sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
> +        sg[i].iov_base = dma_memory_map(virtio_get_dma_as(vdev),
> +                                        addr[i], &len, is_write ?
> +                                        DMA_DIRECTION_FROM_DEVICE :
> +                                        DMA_DIRECTION_TO_DEVICE);
>          if (!sg[i].iov_base) {
>              error_report("virtio: error trying to map MMIO memory");
>              exit(1);
> @@ -515,12 +524,15 @@ static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
>      }
>  }
>  
> -void virtqueue_map(VirtQueueElement *elem)
> +void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
>  {
> -    virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num,
> -                        VIRTQUEUE_MAX_SIZE, 1);
> -    virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num,
> -                        VIRTQUEUE_MAX_SIZE, 0);
> +    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, &elem->in_num,
> +                        MIN(ARRAY_SIZE(elem->in_sg), ARRAY_SIZE(elem->in_addr)),
> +                        1);
> +    virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, &elem->out_num,
> +                        MIN(ARRAY_SIZE(elem->out_sg),
> +                        ARRAY_SIZE(elem->out_addr)),
> +                        0);
>  }
>  
>  void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
> @@ -594,14 +606,14 @@ void *virtqueue_pop(VirtQueue *vq, size_t sz)
>      /* Collect all the descriptors */
>      do {
>          if (desc.flags & VRING_DESC_F_WRITE) {
> -            virtqueue_map_desc(&in_num, addr + out_num, iov + out_num,
> +            virtqueue_map_desc(vdev, &in_num, addr + out_num, iov + out_num,
>                                 VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len);
>          } else {
>              if (in_num) {
>                  error_report("Incorrect order for descriptors");
>                  exit(1);
>              }
> -            virtqueue_map_desc(&out_num, addr, iov,
> +            virtqueue_map_desc(vdev, &out_num, addr, iov,
>                                 VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len);
>          }
>  
> @@ -647,7 +659,7 @@ typedef struct VirtQueueElementOld {
>      struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
>  } VirtQueueElementOld;
>  
> -void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
> +void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
>  {
>      VirtQueueElement *elem;
>      VirtQueueElementOld data;
> @@ -678,7 +690,7 @@ void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
>          elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
>      }
>  
> -    virtqueue_map(elem);
> +    virtqueue_map(vdev, elem);
>      return elem;
>  }
>  
> @@ -733,6 +745,10 @@ static int virtio_validate_features(VirtIODevice *vdev)
>  {
>      VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
>  
> +    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
> +        !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM))
> +        return -EFAULT;
> +
>      if (k->validate_features) {
>          return k->validate_features(vdev);
>      } else {


This will have the effect of breaking all drivers older than 4.8.
I don't think it's necessary since most guests do not enable
the vIOMMU even if it's present. Further, xen guests
are using DMA API even without VIRTIO_F_IOMMU_PLATFORM.
So things will continue to work for many legacy drivers.

> diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h
> index 440b455..4071dad 100644
> --- a/include/hw/virtio/virtio-access.h
> +++ b/include/hw/virtio/virtio-access.h
> @@ -17,12 +17,25 @@
>  #define QEMU_VIRTIO_ACCESS_H
>  
>  #include "hw/virtio/virtio.h"
> +#include "hw/virtio/virtio-bus.h"
>  #include "exec/address-spaces.h"
>  
>  #if defined(TARGET_PPC64) || defined(TARGET_ARM)
>  #define LEGACY_VIRTIO_IS_BIENDIAN 1
>  #endif
>  
> +static inline AddressSpace *virtio_get_dma_as(VirtIODevice *vdev)
> +{
> +    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
> +    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
> +
> +    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
> +        k->get_dma_as) {
> +        return k->get_dma_as(qbus->parent);
> +    }
> +    return &address_space_memory;
> +}
> +
>  static inline bool virtio_access_is_big_endian(VirtIODevice *vdev)
>  {
>  #if defined(LEGACY_VIRTIO_IS_BIENDIAN)
> @@ -40,45 +53,55 @@ static inline bool virtio_access_is_big_endian(VirtIODevice *vdev)
>  
>  static inline uint16_t virtio_lduw_phys(VirtIODevice *vdev, hwaddr pa)
>  {
> +    AddressSpace *dma_as = virtio_get_dma_as(vdev);
> +
>      if (virtio_access_is_big_endian(vdev)) {
> -        return lduw_be_phys(&address_space_memory, pa);
> +        return lduw_be_phys(dma_as, pa);
>      }
> -    return lduw_le_phys(&address_space_memory, pa);
> +    return lduw_le_phys(dma_as, pa);
>  }
>  
>  static inline uint32_t virtio_ldl_phys(VirtIODevice *vdev, hwaddr pa)
>  {
> +    AddressSpace *dma_as = virtio_get_dma_as(vdev);
> +
>      if (virtio_access_is_big_endian(vdev)) {
> -        return ldl_be_phys(&address_space_memory, pa);
> +        return ldl_be_phys(dma_as, pa);
>      }
> -    return ldl_le_phys(&address_space_memory, pa);
> +    return ldl_le_phys(dma_as, pa);
>  }
>  
>  static inline uint64_t virtio_ldq_phys(VirtIODevice *vdev, hwaddr pa)
>  {
> +    AddressSpace *dma_as = virtio_get_dma_as(vdev);
> +
>      if (virtio_access_is_big_endian(vdev)) {
> -        return ldq_be_phys(&address_space_memory, pa);
> +        return ldq_be_phys(dma_as, pa);
>      }
> -    return ldq_le_phys(&address_space_memory, pa);
> +    return ldq_le_phys(dma_as, pa);
>  }
>  
>  static inline void virtio_stw_phys(VirtIODevice *vdev, hwaddr pa,
>                                     uint16_t value)
>  {
> +    AddressSpace *dma_as = virtio_get_dma_as(vdev);
> +
>      if (virtio_access_is_big_endian(vdev)) {
> -        stw_be_phys(&address_space_memory, pa, value);
> +        stw_be_phys(dma_as, pa, value);
>      } else {
> -        stw_le_phys(&address_space_memory, pa, value);
> +        stw_le_phys(dma_as, pa, value);
>      }
>  }
>  
>  static inline void virtio_stl_phys(VirtIODevice *vdev, hwaddr pa,
>                                     uint32_t value)
>  {
> +    AddressSpace *dma_as = virtio_get_dma_as(vdev);
> +
>      if (virtio_access_is_big_endian(vdev)) {
> -        stl_be_phys(&address_space_memory, pa, value);
> +        stl_be_phys(dma_as, pa, value);
>      } else {
> -        stl_le_phys(&address_space_memory, pa, value);
> +        stl_le_phys(dma_as, pa, value);
>      }
>  }
>  
> diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
> index f3e5ef3..608ff48 100644
> --- a/include/hw/virtio/virtio-bus.h
> +++ b/include/hw/virtio/virtio-bus.h
> @@ -98,6 +98,7 @@ typedef struct VirtioBusClass {
>       * Note that changing this will break migration for this transport.
>       */
>      bool has_variable_vring_alignment;
> +    AddressSpace *(*get_dma_as)(DeviceState *d);
>  } VirtioBusClass;
>  
>  struct VirtioBusState {
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index d2490c1..147d062 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -157,9 +157,9 @@ void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
>  void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
>                      unsigned int len, unsigned int idx);
>  
> -void virtqueue_map(VirtQueueElement *elem);
> +void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem);
>  void *virtqueue_pop(VirtQueue *vq, size_t sz);
> -void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz);
> +void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz);
>  void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem);
>  int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
>                            unsigned int out_bytes);
> @@ -252,7 +252,9 @@ typedef struct VirtIORNGConf VirtIORNGConf;
>      DEFINE_PROP_BIT64("notify_on_empty", _state, _field,  \
>                        VIRTIO_F_NOTIFY_ON_EMPTY, true), \
>      DEFINE_PROP_BIT64("any_layout", _state, _field, \
> -                      VIRTIO_F_ANY_LAYOUT, true)
> +                      VIRTIO_F_ANY_LAYOUT, true), \
> +    DEFINE_PROP_BIT64("iommu_platform", _state, _field, \
> +                      VIRTIO_F_IOMMU_PLATFORM, false)
>  
>  hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);
>  hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n);
> -- 
> 2.7.4
Jason Wang Sept. 6, 2016, 6:30 a.m. UTC | #9
On 2016年09月05日 10:26, Wei Xu wrote:
> On 2016年08月30日 11:06, Jason Wang wrote:
>> @@ -1587,6 +1595,11 @@ static void 
>> virtio_pci_device_plugged(DeviceState *d, Error **errp)
>>       }
>>
>>       if (legacy) {
>> +        if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
>> +            error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by"
>> +                       "neither legacy nor transitional device.");
>> +            return ;
>> +        }
>
> Not sure if i understand it correctly, the transitional device here 
> maybe a bit hard to understand, 

"transitional" were defined by spec.

> just a tip for your convenience,
> besides the denied prompt, can we add what kind of device is supported 
> to the message? such as modern device only, like this.
>
> "VIRTIO_F_IOMMU_PLATFORM is supported by modern device only, it
> is not supported by either legacy or transitional device."

Ok.

>
>>           /* legacy and transitional */
>>           pci_set_word(config + PCI_SUBSYSTEM_VENDOR_ID,
>>                        pci_get_word(config + PCI_VENDOR_ID));
>
diff mbox

Patch

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 331d766..8fd6df7 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -856,7 +856,7 @@  static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
             }
         }
 
-        req = qemu_get_virtqueue_element(f, sizeof(VirtIOBlockReq));
+        req = qemu_get_virtqueue_element(vdev, f, sizeof(VirtIOBlockReq));
         virtio_blk_init_request(s, virtio_get_queue(vdev, vq_idx), req);
         req->next = s->rq;
         s->rq = req;
diff --git a/hw/char/virtio-serial-bus.c b/hw/char/virtio-serial-bus.c
index db57a38..94f19ba 100644
--- a/hw/char/virtio-serial-bus.c
+++ b/hw/char/virtio-serial-bus.c
@@ -682,6 +682,7 @@  static void virtio_serial_post_load_timer_cb(void *opaque)
 static int fetch_active_ports_list(QEMUFile *f,
                                    VirtIOSerial *s, uint32_t nr_active_ports)
 {
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
     uint32_t i;
 
     s->post_load = g_malloc0(sizeof(*s->post_load));
@@ -715,7 +716,7 @@  static int fetch_active_ports_list(QEMUFile *f,
             qemu_get_be64s(f, &port->iov_offset);
 
             port->elem =
-                qemu_get_virtqueue_element(f, sizeof(VirtQueueElement));
+                qemu_get_virtqueue_element(vdev, f, sizeof(VirtQueueElement));
 
             /*
              *  Port was throttled on source machine.  Let's
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index ce57ef6..4cc7627 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -197,12 +197,14 @@  static void *virtio_scsi_load_request(QEMUFile *f, SCSIRequest *sreq)
     SCSIBus *bus = sreq->bus;
     VirtIOSCSI *s = container_of(bus, VirtIOSCSI, bus);
     VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
+    VirtIODevice *vdev = VIRTIO_DEVICE(s);
     VirtIOSCSIReq *req;
     uint32_t n;
 
     qemu_get_be32s(f, &n);
     assert(n < vs->conf.num_queues);
-    req = qemu_get_virtqueue_element(f, sizeof(VirtIOSCSIReq) + vs->cdb_size);
+    req = qemu_get_virtqueue_element(vdev, f,
+                                     sizeof(VirtIOSCSIReq) + vs->cdb_size);
     virtio_scsi_init_req(s, vs->cmd_vqs[n], req);
 
     if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICmdReq) + vs->cdb_size,
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 755f921..c10bf55 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1162,6 +1162,14 @@  static int virtio_pci_query_nvectors(DeviceState *d)
     return proxy->nvectors;
 }
 
+static AddressSpace *virtio_pci_get_dma_as(DeviceState *d)
+{
+    VirtIOPCIProxy *proxy = VIRTIO_PCI(d);
+    PCIDevice *dev = &proxy->pci_dev;
+
+    return pci_get_address_space(dev);
+}
+
 static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
                                    struct virtio_pci_cap *cap)
 {
@@ -1587,6 +1595,11 @@  static void virtio_pci_device_plugged(DeviceState *d, Error **errp)
     }
 
     if (legacy) {
+        if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
+            error_setg(errp, "VIRTIO_F_IOMMU_PLATFORM was supported by"
+                       "neither legacy nor transitional device.");
+            return ;
+        }
         /* legacy and transitional */
         pci_set_word(config + PCI_SUBSYSTEM_VENDOR_ID,
                      pci_get_word(config + PCI_VENDOR_ID));
@@ -2452,6 +2465,7 @@  static void virtio_pci_bus_class_init(ObjectClass *klass, void *data)
     k->ioeventfd_disabled = virtio_pci_ioeventfd_disabled;
     k->ioeventfd_set_disabled = virtio_pci_ioeventfd_set_disabled;
     k->ioeventfd_assign = virtio_pci_ioeventfd_assign;
+    k->get_dma_as = virtio_pci_get_dma_as;
 }
 
 static const TypeInfo virtio_pci_bus_info = {
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 15ee3a7..99ea97c 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -23,6 +23,7 @@ 
 #include "hw/virtio/virtio-bus.h"
 #include "migration/migration.h"
 #include "hw/virtio/virtio-access.h"
+#include "sysemu/dma.h"
 
 /*
  * The alignment to use between consumer and producer parts of vring.
@@ -122,7 +123,7 @@  void virtio_queue_update_rings(VirtIODevice *vdev, int n)
 static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
                             hwaddr desc_pa, int i)
 {
-    address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc),
+    address_space_read(virtio_get_dma_as(vdev), desc_pa + i * sizeof(VRingDesc),
                        MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
     virtio_tswap64s(vdev, &desc->addr);
     virtio_tswap32s(vdev, &desc->len);
@@ -164,7 +165,7 @@  static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
     virtio_tswap32s(vq->vdev, &uelem->id);
     virtio_tswap32s(vq->vdev, &uelem->len);
     pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
-    address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED,
+    address_space_write(virtio_get_dma_as(vq->vdev), pa, MEMTXATTRS_UNSPECIFIED,
                        (void *)uelem, sizeof(VRingUsedElem));
 }
 
@@ -244,6 +245,7 @@  int virtio_queue_empty(VirtQueue *vq)
 static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
                                unsigned int len)
 {
+    AddressSpace *dma_as = virtio_get_dma_as(vq->vdev);
     unsigned int offset;
     int i;
 
@@ -251,17 +253,17 @@  static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
     for (i = 0; i < elem->in_num; i++) {
         size_t size = MIN(len - offset, elem->in_sg[i].iov_len);
 
-        cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
-                                  elem->in_sg[i].iov_len,
-                                  1, size);
+        dma_memory_unmap(dma_as, elem->in_sg[i].iov_base, elem->in_sg[i].iov_len,
+                         DMA_DIRECTION_FROM_DEVICE, size);
 
         offset += size;
     }
 
     for (i = 0; i < elem->out_num; i++)
-        cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
-                                  elem->out_sg[i].iov_len,
-                                  0, elem->out_sg[i].iov_len);
+        dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
+                         elem->out_sg[i].iov_len,
+                         DMA_DIRECTION_TO_DEVICE,
+                         elem->out_sg[i].iov_len);
 }
 
 void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
@@ -451,7 +453,8 @@  int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
     return in_bytes <= in_total && out_bytes <= out_total;
 }
 
-static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov,
+static void virtqueue_map_desc(VirtIODevice *vdev,
+                               unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov,
                                unsigned int max_num_sg, bool is_write,
                                hwaddr pa, size_t sz)
 {
@@ -471,7 +474,10 @@  static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iove
             exit(1);
         }
 
-        iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
+        iov[num_sg].iov_base = dma_memory_map(virtio_get_dma_as(vdev), pa, &len,
+                                              is_write ?
+                                              DMA_DIRECTION_FROM_DEVICE:
+                                              DMA_DIRECTION_TO_DEVICE);
         iov[num_sg].iov_len = len;
         addr[num_sg] = pa;
 
@@ -482,9 +488,9 @@  static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iove
     *p_num_sg = num_sg;
 }
 
-static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
-                                unsigned int *num_sg, unsigned int max_size,
-                                int is_write)
+static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
+                                hwaddr *addr, unsigned int *num_sg,
+                                unsigned int max_size, int is_write)
 {
     unsigned int i;
     hwaddr len;
@@ -503,7 +509,10 @@  static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
 
     for (i = 0; i < *num_sg; i++) {
         len = sg[i].iov_len;
-        sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
+        sg[i].iov_base = dma_memory_map(virtio_get_dma_as(vdev),
+                                        addr[i], &len, is_write ?
+                                        DMA_DIRECTION_FROM_DEVICE :
+                                        DMA_DIRECTION_TO_DEVICE);
         if (!sg[i].iov_base) {
             error_report("virtio: error trying to map MMIO memory");
             exit(1);
@@ -515,12 +524,15 @@  static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
     }
 }
 
-void virtqueue_map(VirtQueueElement *elem)
+void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
 {
-    virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num,
-                        VIRTQUEUE_MAX_SIZE, 1);
-    virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num,
-                        VIRTQUEUE_MAX_SIZE, 0);
+    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, &elem->in_num,
+                        MIN(ARRAY_SIZE(elem->in_sg), ARRAY_SIZE(elem->in_addr)),
+                        1);
+    virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, &elem->out_num,
+                        MIN(ARRAY_SIZE(elem->out_sg),
+                        ARRAY_SIZE(elem->out_addr)),
+                        0);
 }
 
 void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
@@ -594,14 +606,14 @@  void *virtqueue_pop(VirtQueue *vq, size_t sz)
     /* Collect all the descriptors */
     do {
         if (desc.flags & VRING_DESC_F_WRITE) {
-            virtqueue_map_desc(&in_num, addr + out_num, iov + out_num,
+            virtqueue_map_desc(vdev, &in_num, addr + out_num, iov + out_num,
                                VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len);
         } else {
             if (in_num) {
                 error_report("Incorrect order for descriptors");
                 exit(1);
             }
-            virtqueue_map_desc(&out_num, addr, iov,
+            virtqueue_map_desc(vdev, &out_num, addr, iov,
                                VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len);
         }
 
@@ -647,7 +659,7 @@  typedef struct VirtQueueElementOld {
     struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
 } VirtQueueElementOld;
 
-void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
+void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
 {
     VirtQueueElement *elem;
     VirtQueueElementOld data;
@@ -678,7 +690,7 @@  void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
         elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
     }
 
-    virtqueue_map(elem);
+    virtqueue_map(vdev, elem);
     return elem;
 }
 
@@ -733,6 +745,10 @@  static int virtio_validate_features(VirtIODevice *vdev)
 {
     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
 
+    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
+        !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM))
+        return -EFAULT;
+
     if (k->validate_features) {
         return k->validate_features(vdev);
     } else {
diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h
index 440b455..4071dad 100644
--- a/include/hw/virtio/virtio-access.h
+++ b/include/hw/virtio/virtio-access.h
@@ -17,12 +17,25 @@ 
 #define QEMU_VIRTIO_ACCESS_H
 
 #include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-bus.h"
 #include "exec/address-spaces.h"
 
 #if defined(TARGET_PPC64) || defined(TARGET_ARM)
 #define LEGACY_VIRTIO_IS_BIENDIAN 1
 #endif
 
+static inline AddressSpace *virtio_get_dma_as(VirtIODevice *vdev)
+{
+    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
+    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
+
+    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
+        k->get_dma_as) {
+        return k->get_dma_as(qbus->parent);
+    }
+    return &address_space_memory;
+}
+
 static inline bool virtio_access_is_big_endian(VirtIODevice *vdev)
 {
 #if defined(LEGACY_VIRTIO_IS_BIENDIAN)
@@ -40,45 +53,55 @@  static inline bool virtio_access_is_big_endian(VirtIODevice *vdev)
 
 static inline uint16_t virtio_lduw_phys(VirtIODevice *vdev, hwaddr pa)
 {
+    AddressSpace *dma_as = virtio_get_dma_as(vdev);
+
     if (virtio_access_is_big_endian(vdev)) {
-        return lduw_be_phys(&address_space_memory, pa);
+        return lduw_be_phys(dma_as, pa);
     }
-    return lduw_le_phys(&address_space_memory, pa);
+    return lduw_le_phys(dma_as, pa);
 }
 
 static inline uint32_t virtio_ldl_phys(VirtIODevice *vdev, hwaddr pa)
 {
+    AddressSpace *dma_as = virtio_get_dma_as(vdev);
+
     if (virtio_access_is_big_endian(vdev)) {
-        return ldl_be_phys(&address_space_memory, pa);
+        return ldl_be_phys(dma_as, pa);
     }
-    return ldl_le_phys(&address_space_memory, pa);
+    return ldl_le_phys(dma_as, pa);
 }
 
 static inline uint64_t virtio_ldq_phys(VirtIODevice *vdev, hwaddr pa)
 {
+    AddressSpace *dma_as = virtio_get_dma_as(vdev);
+
     if (virtio_access_is_big_endian(vdev)) {
-        return ldq_be_phys(&address_space_memory, pa);
+        return ldq_be_phys(dma_as, pa);
     }
-    return ldq_le_phys(&address_space_memory, pa);
+    return ldq_le_phys(dma_as, pa);
 }
 
 static inline void virtio_stw_phys(VirtIODevice *vdev, hwaddr pa,
                                    uint16_t value)
 {
+    AddressSpace *dma_as = virtio_get_dma_as(vdev);
+
     if (virtio_access_is_big_endian(vdev)) {
-        stw_be_phys(&address_space_memory, pa, value);
+        stw_be_phys(dma_as, pa, value);
     } else {
-        stw_le_phys(&address_space_memory, pa, value);
+        stw_le_phys(dma_as, pa, value);
     }
 }
 
 static inline void virtio_stl_phys(VirtIODevice *vdev, hwaddr pa,
                                    uint32_t value)
 {
+    AddressSpace *dma_as = virtio_get_dma_as(vdev);
+
     if (virtio_access_is_big_endian(vdev)) {
-        stl_be_phys(&address_space_memory, pa, value);
+        stl_be_phys(dma_as, pa, value);
     } else {
-        stl_le_phys(&address_space_memory, pa, value);
+        stl_le_phys(dma_as, pa, value);
     }
 }
 
diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h
index f3e5ef3..608ff48 100644
--- a/include/hw/virtio/virtio-bus.h
+++ b/include/hw/virtio/virtio-bus.h
@@ -98,6 +98,7 @@  typedef struct VirtioBusClass {
      * Note that changing this will break migration for this transport.
      */
     bool has_variable_vring_alignment;
+    AddressSpace *(*get_dma_as)(DeviceState *d);
 } VirtioBusClass;
 
 struct VirtioBusState {
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index d2490c1..147d062 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -157,9 +157,9 @@  void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
 void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
                     unsigned int len, unsigned int idx);
 
-void virtqueue_map(VirtQueueElement *elem);
+void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem);
 void *virtqueue_pop(VirtQueue *vq, size_t sz);
-void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz);
+void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz);
 void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem);
 int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
                           unsigned int out_bytes);
@@ -252,7 +252,9 @@  typedef struct VirtIORNGConf VirtIORNGConf;
     DEFINE_PROP_BIT64("notify_on_empty", _state, _field,  \
                       VIRTIO_F_NOTIFY_ON_EMPTY, true), \
     DEFINE_PROP_BIT64("any_layout", _state, _field, \
-                      VIRTIO_F_ANY_LAYOUT, true)
+                      VIRTIO_F_ANY_LAYOUT, true), \
+    DEFINE_PROP_BIT64("iommu_platform", _state, _field, \
+                      VIRTIO_F_IOMMU_PLATFORM, false)
 
 hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n);
 hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n);