diff mbox series

[for-4.2,v10,09/15] virtio-iommu: Implement translate

Message ID 20190730172137.23114-10-eric.auger@redhat.com (mailing list archive)
State New, archived
Headers show
Series VIRTIO-IOMMU device | expand

Commit Message

Eric Auger July 30, 2019, 5:21 p.m. UTC
This patch implements the translate callback

Signed-off-by: Eric Auger <eric.auger@redhat.com>

---
v6 -> v7:
- implemented bypass-mode

v5 -> v6:
- replace error_report by qemu_log_mask

v4 -> v5:
- check the device domain is not NULL
- s/printf/error_report
- set flags to IOMMU_NONE in case of all translation faults
---
 hw/virtio/trace-events   |  1 +
 hw/virtio/virtio-iommu.c | 58 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 58 insertions(+), 1 deletion(-)

Comments

Peter Xu Aug. 19, 2019, 8:24 a.m. UTC | #1
On Tue, Jul 30, 2019 at 07:21:31PM +0200, Eric Auger wrote:
> @@ -464,19 +464,75 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
>                                              int iommu_idx)
>  {
>      IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
> +    VirtIOIOMMU *s = sdev->viommu;
>      uint32_t sid;
> +    viommu_endpoint *ep;
> +    viommu_mapping *mapping;
> +    viommu_interval interval;
> +    bool bypass_allowed;
> +
> +    interval.low = addr;
> +    interval.high = addr + 1;
>  
>      IOMMUTLBEntry entry = {
>          .target_as = &address_space_memory,
>          .iova = addr,
>          .translated_addr = addr,
> -        .addr_mask = ~(hwaddr)0,
> +        .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
>          .perm = IOMMU_NONE,
>      };
>  
> +    bypass_allowed = virtio_has_feature(s->acked_features,
> +                                        VIRTIO_IOMMU_F_BYPASS);
> +
>      sid = virtio_iommu_get_sid(sdev);
>  
>      trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag);
> +    qemu_mutex_lock(&s->mutex);
> +
> +    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
> +    if (!ep) {
> +        if (!bypass_allowed) {
> +            error_report("%s sid=%d is not known!!", __func__, sid);

Maybe use error_report_once() to avoid DOS attack?  Also would it be
good to unify the debug prints?  I see both error_report() and
qemu_log_mask() are used in the whole patchset.  Or is that attempted?

> +        } else {
> +            entry.perm = flag;
> +        }
> +        goto unlock;
> +    }
> +
> +    if (!ep->domain) {
> +        if (!bypass_allowed) {
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +                          "%s %02x:%02x.%01x not attached to any domain\n",
> +                          __func__, PCI_BUS_NUM(sid),
> +                          PCI_SLOT(sid), PCI_FUNC(sid));
> +        } else {
> +            entry.perm = flag;
> +        }
> +        goto unlock;
> +    }
> +
> +    mapping = g_tree_lookup(ep->domain->mappings, (gpointer)(&interval));
> +    if (!mapping) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "%s no mapping for 0x%"PRIx64" for sid=%d\n",
> +                      __func__, addr, sid);
> +        goto unlock;
> +    }
> +
> +    if (((flag & IOMMU_RO) && !(mapping->flags & VIRTIO_IOMMU_MAP_F_READ)) ||
> +        ((flag & IOMMU_WO) && !(mapping->flags & VIRTIO_IOMMU_MAP_F_WRITE))) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "Permission error on 0x%"PRIx64"(%d): allowed=%d\n",
> +                      addr, flag, mapping->flags);
> +        goto unlock;
> +    }
> +    entry.translated_addr = addr - mapping->virt_addr + mapping->phys_addr;
> +    entry.perm = flag;
> +    trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid);
> +
> +unlock:
> +    qemu_mutex_unlock(&s->mutex);
>      return entry;
>  }
>  
> -- 
> 2.20.1
> 

Regards,
Eric Auger Sept. 3, 2019, 11:45 a.m. UTC | #2
Hi Peter,

On 8/19/19 10:24 AM, Peter Xu wrote:
> On Tue, Jul 30, 2019 at 07:21:31PM +0200, Eric Auger wrote:
>> @@ -464,19 +464,75 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
>>                                              int iommu_idx)
>>  {
>>      IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
>> +    VirtIOIOMMU *s = sdev->viommu;
>>      uint32_t sid;
>> +    viommu_endpoint *ep;
>> +    viommu_mapping *mapping;
>> +    viommu_interval interval;
>> +    bool bypass_allowed;
>> +
>> +    interval.low = addr;
>> +    interval.high = addr + 1;
>>  
>>      IOMMUTLBEntry entry = {
>>          .target_as = &address_space_memory,
>>          .iova = addr,
>>          .translated_addr = addr,
>> -        .addr_mask = ~(hwaddr)0,
>> +        .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
>>          .perm = IOMMU_NONE,
>>      };
>>  
>> +    bypass_allowed = virtio_has_feature(s->acked_features,
>> +                                        VIRTIO_IOMMU_F_BYPASS);
>> +
>>      sid = virtio_iommu_get_sid(sdev);
>>  
>>      trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag);
>> +    qemu_mutex_lock(&s->mutex);
>> +
>> +    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
>> +    if (!ep) {
>> +        if (!bypass_allowed) {
>> +            error_report("%s sid=%d is not known!!", __func__, sid);
> 
> Maybe use error_report_once() to avoid DOS attack?  Also would it be
> good to unify the debug prints?  I see both error_report() and
> qemu_log_mask() are used in the whole patchset.  Or is that attempted?

I switched to error_report_once()

I understand that qemu_log_mask() should be used whenever the root cause
is a bad action of the guest OS (in below case, the EP was not attached
to any domain). Above, there is an EP that attempts to talk through the
IOMMU and this was not expected (rather a platform description issue or
a qemu bug).

Thanks

Eric
> 
>> +        } else {
>> +            entry.perm = flag;
>> +        }
>> +        goto unlock;
>> +    }
>> +
>> +    if (!ep->domain) {
>> +        if (!bypass_allowed) {
>> +            qemu_log_mask(LOG_GUEST_ERROR,
>> +                          "%s %02x:%02x.%01x not attached to any domain\n",
>> +                          __func__, PCI_BUS_NUM(sid),
>> +                          PCI_SLOT(sid), PCI_FUNC(sid));
>> +        } else {
>> +            entry.perm = flag;
>> +        }
>> +        goto unlock;
>> +    }
>> +
>> +    mapping = g_tree_lookup(ep->domain->mappings, (gpointer)(&interval));
>> +    if (!mapping) {
>> +        qemu_log_mask(LOG_GUEST_ERROR,
>> +                      "%s no mapping for 0x%"PRIx64" for sid=%d\n",
>> +                      __func__, addr, sid);
>> +        goto unlock;
>> +    }
>> +
>> +    if (((flag & IOMMU_RO) && !(mapping->flags & VIRTIO_IOMMU_MAP_F_READ)) ||
>> +        ((flag & IOMMU_WO) && !(mapping->flags & VIRTIO_IOMMU_MAP_F_WRITE))) {
>> +        qemu_log_mask(LOG_GUEST_ERROR,
>> +                      "Permission error on 0x%"PRIx64"(%d): allowed=%d\n",
>> +                      addr, flag, mapping->flags);
>> +        goto unlock;
>> +    }
>> +    entry.translated_addr = addr - mapping->virt_addr + mapping->phys_addr;
>> +    entry.perm = flag;
>> +    trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid);
>> +
>> +unlock:
>> +    qemu_mutex_unlock(&s->mutex);
>>      return entry;
>>  }
>>  
>> -- 
>> 2.20.1
>>
> 
> Regards,
>
Peter Xu Sept. 4, 2019, 1:58 a.m. UTC | #3
On Tue, Sep 03, 2019 at 01:45:22PM +0200, Auger Eric wrote:
> Hi Peter,
> 
> On 8/19/19 10:24 AM, Peter Xu wrote:
> > On Tue, Jul 30, 2019 at 07:21:31PM +0200, Eric Auger wrote:
> >> @@ -464,19 +464,75 @@ static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
> >>                                              int iommu_idx)
> >>  {
> >>      IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
> >> +    VirtIOIOMMU *s = sdev->viommu;
> >>      uint32_t sid;
> >> +    viommu_endpoint *ep;
> >> +    viommu_mapping *mapping;
> >> +    viommu_interval interval;
> >> +    bool bypass_allowed;
> >> +
> >> +    interval.low = addr;
> >> +    interval.high = addr + 1;
> >>  
> >>      IOMMUTLBEntry entry = {
> >>          .target_as = &address_space_memory,
> >>          .iova = addr,
> >>          .translated_addr = addr,
> >> -        .addr_mask = ~(hwaddr)0,
> >> +        .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
> >>          .perm = IOMMU_NONE,
> >>      };
> >>  
> >> +    bypass_allowed = virtio_has_feature(s->acked_features,
> >> +                                        VIRTIO_IOMMU_F_BYPASS);
> >> +
> >>      sid = virtio_iommu_get_sid(sdev);
> >>  
> >>      trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag);
> >> +    qemu_mutex_lock(&s->mutex);
> >> +
> >> +    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
> >> +    if (!ep) {
> >> +        if (!bypass_allowed) {
> >> +            error_report("%s sid=%d is not known!!", __func__, sid);
> > 
> > Maybe use error_report_once() to avoid DOS attack?  Also would it be
> > good to unify the debug prints?  I see both error_report() and
> > qemu_log_mask() are used in the whole patchset.  Or is that attempted?
> 
> I switched to error_report_once()
> 
> I understand that qemu_log_mask() should be used whenever the root cause
> is a bad action of the guest OS (in below case, the EP was not attached
> to any domain). Above, there is an EP that attempts to talk through the
> IOMMU and this was not expected (rather a platform description issue or
> a qemu bug).

I see. It's a bit unclear at least to me on how to use these.  I have
seen, and used error_report*() to report guest misbehaves as well just
for the debugging and triaging simply because error_report*() will
always be there even without "-d" (because when issue happens most
users are without it...).  Then with these information captured by
either libvirt or direct QEMU users we can triage guest bugs easier.
I hope I'm not severly wrong, and please feel free to use
qemu_log_mask() no matter what.

Regards,
diff mbox series

Patch

diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index 25a71b0505..8257065159 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -74,3 +74,4 @@  virtio_iommu_put_domain(uint32_t domain_id) "Free domain=%d"
 virtio_iommu_unmap_left_interval(uint64_t low, uint64_t high, uint64_t next_low, uint64_t next_high) "Unmap left [0x%"PRIx64",0x%"PRIx64"], new interval=[0x%"PRIx64",0x%"PRIx64"]"
 virtio_iommu_unmap_right_interval(uint64_t low, uint64_t high, uint64_t next_low, uint64_t next_high) "Unmap right [0x%"PRIx64",0x%"PRIx64"], new interval=[0x%"PRIx64",0x%"PRIx64"]"
 virtio_iommu_unmap_inc_interval(uint64_t low, uint64_t high) "Unmap inc [0x%"PRIx64",0x%"PRIx64"]"
+virtio_iommu_translate_out(uint64_t virt_addr, uint64_t phys_addr, uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index 4706b9da6e..a8de583f9a 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -464,19 +464,75 @@  static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr,
                                             int iommu_idx)
 {
     IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
+    VirtIOIOMMU *s = sdev->viommu;
     uint32_t sid;
+    viommu_endpoint *ep;
+    viommu_mapping *mapping;
+    viommu_interval interval;
+    bool bypass_allowed;
+
+    interval.low = addr;
+    interval.high = addr + 1;
 
     IOMMUTLBEntry entry = {
         .target_as = &address_space_memory,
         .iova = addr,
         .translated_addr = addr,
-        .addr_mask = ~(hwaddr)0,
+        .addr_mask = (1 << ctz32(s->config.page_size_mask)) - 1,
         .perm = IOMMU_NONE,
     };
 
+    bypass_allowed = virtio_has_feature(s->acked_features,
+                                        VIRTIO_IOMMU_F_BYPASS);
+
     sid = virtio_iommu_get_sid(sdev);
 
     trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag);
+    qemu_mutex_lock(&s->mutex);
+
+    ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(sid));
+    if (!ep) {
+        if (!bypass_allowed) {
+            error_report("%s sid=%d is not known!!", __func__, sid);
+        } else {
+            entry.perm = flag;
+        }
+        goto unlock;
+    }
+
+    if (!ep->domain) {
+        if (!bypass_allowed) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s %02x:%02x.%01x not attached to any domain\n",
+                          __func__, PCI_BUS_NUM(sid),
+                          PCI_SLOT(sid), PCI_FUNC(sid));
+        } else {
+            entry.perm = flag;
+        }
+        goto unlock;
+    }
+
+    mapping = g_tree_lookup(ep->domain->mappings, (gpointer)(&interval));
+    if (!mapping) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s no mapping for 0x%"PRIx64" for sid=%d\n",
+                      __func__, addr, sid);
+        goto unlock;
+    }
+
+    if (((flag & IOMMU_RO) && !(mapping->flags & VIRTIO_IOMMU_MAP_F_READ)) ||
+        ((flag & IOMMU_WO) && !(mapping->flags & VIRTIO_IOMMU_MAP_F_WRITE))) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "Permission error on 0x%"PRIx64"(%d): allowed=%d\n",
+                      addr, flag, mapping->flags);
+        goto unlock;
+    }
+    entry.translated_addr = addr - mapping->virt_addr + mapping->phys_addr;
+    entry.perm = flag;
+    trace_virtio_iommu_translate_out(addr, entry.translated_addr, sid);
+
+unlock:
+    qemu_mutex_unlock(&s->mutex);
     return entry;
 }