diff mbox series

[V2,2/4] intel-iommu: drop VTDBus

Message ID 20220321055429.10260-3-jasowang@redhat.com (mailing list archive)
State New, archived
Headers show
Series PASID support for Intel IOMMU | expand

Commit Message

Jason Wang March 21, 2022, 5:54 a.m. UTC
We introduce VTDBus structure as an intermediate step for searching
the address space. This works well with SID based matching/lookup. But
when we want to support SID plus PASID based address space lookup,
this intermediate steps turns out to be a burden. So the patch simply
drops the VTDBus structure and use the PCIBus and devfn as the key for
the g_hash_table(). This simplifies the codes and the future PASID
extension.

To prevent being slower for past vtd_find_as_from_bus_num() callers, a
vtd_as cache indexed by the bus number is introduced to store the last
recent search result of a vtd_as belongs to a specific bus.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/i386/intel_iommu.c         | 238 +++++++++++++++++-----------------
 include/hw/i386/intel_iommu.h |  11 +-
 2 files changed, 123 insertions(+), 126 deletions(-)

Comments

Peter Xu April 22, 2022, 1:17 a.m. UTC | #1
Hi, Jason,

Mostly good to me, just a few nitpicks below.

On Mon, Mar 21, 2022 at 01:54:27PM +0800, Jason Wang wrote:
> We introduce VTDBus structure as an intermediate step for searching
> the address space. This works well with SID based matching/lookup. But
> when we want to support SID plus PASID based address space lookup,
> this intermediate steps turns out to be a burden. So the patch simply
> drops the VTDBus structure and use the PCIBus and devfn as the key for
> the g_hash_table(). This simplifies the codes and the future PASID
> extension.
> 
> To prevent being slower for past vtd_find_as_from_bus_num() callers, a
> vtd_as cache indexed by the bus number is introduced to store the last
> recent search result of a vtd_as belongs to a specific bus.
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  hw/i386/intel_iommu.c         | 238 +++++++++++++++++-----------------
>  include/hw/i386/intel_iommu.h |  11 +-
>  2 files changed, 123 insertions(+), 126 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 90964b201c..5851a17d0e 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -61,6 +61,16 @@
>      }                                                                         \
>  }
>  
> +/*
> + * PCI bus number (or SID) is not reliable since the device is usaully
> + * initalized before guest can configure the PCI bridge
> + * (SECONDARY_BUS_NUMBER).
> + */
> +struct vtd_as_key {
> +    PCIBus *bus;
> +    uint8_t devfn;
> +};
> +
>  static void vtd_address_space_refresh_all(IntelIOMMUState *s);
>  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
>  
> @@ -210,6 +220,31 @@ static guint vtd_uint64_hash(gconstpointer v)
>      return (guint)*(const uint64_t *)v;
>  }
>  
> +static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2)
> +{
> +    const struct vtd_as_key *key1 = v1;
> +    const struct vtd_as_key *key2 = v2;
> +
> +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
> +}
> +
> +static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
> +{
> +    return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL);
> +}

Nit: we could directly drop this one and use PCI_BUILD_BDF().

> +
> +/*
> + * Note that we use pointer to PCIBus as the key, so hashing/shifting
> + * based on the pointer value is intended.

Thanks for the comment; that helps.

Should we also mention that this hash is not the only interface to identify
two vtd_as*, say, even if on a 32bit system we got last 24 bits collapsed
on two vtd_as* pointers, we can still have vtd_as_equal() to guard us?

> + */
> +static guint vtd_as_hash(gconstpointer v)
> +{
> +    const struct vtd_as_key *key = v;
> +    guint value = (guint)(uintptr_t)key->bus;
> +
> +    return (guint)(value << 8 | key->devfn);
> +}
> +
>  static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
>                                            gpointer user_data)
>  {
> @@ -248,22 +283,14 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
>  static void vtd_reset_context_cache_locked(IntelIOMMUState *s)
>  {
>      VTDAddressSpace *vtd_as;
> -    VTDBus *vtd_bus;
> -    GHashTableIter bus_it;
> -    uint32_t devfn_it;
> +    GHashTableIter as_it;
>  
>      trace_vtd_context_cache_reset();
>  
> -    g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
> +    g_hash_table_iter_init(&as_it, s->vtd_as);
>  
> -    while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
> -        for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
> -            vtd_as = vtd_bus->dev_as[devfn_it];
> -            if (!vtd_as) {
> -                continue;
> -            }
> -            vtd_as->context_cache_entry.context_cache_gen = 0;
> -        }
> +    while (g_hash_table_iter_next (&as_it, NULL, (void**)&vtd_as)) {
> +        vtd_as->context_cache_entry.context_cache_gen = 0;
>      }
>      s->context_cache_gen = 1;
>  }
> @@ -993,32 +1020,6 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
>      return slpte & rsvd_mask;
>  }
>  
> -/* Find the VTD address space associated with a given bus number */
> -static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
> -{
> -    VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num];
> -    GHashTableIter iter;
> -
> -    if (vtd_bus) {
> -        return vtd_bus;
> -    }
> -
> -    /*
> -     * Iterate over the registered buses to find the one which
> -     * currently holds this bus number and update the bus_num
> -     * lookup table.
> -     */
> -    g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
> -    while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
> -        if (pci_bus_num(vtd_bus->bus) == bus_num) {
> -            s->vtd_as_by_bus_num[bus_num] = vtd_bus;
> -            return vtd_bus;
> -        }
> -    }
> -
> -    return NULL;
> -}
> -
>  /* Given the @iova, get relevant @slptep. @slpte_level will be the last level
>   * of the translation, can be used for deciding the size of large page.
>   */
> @@ -1634,24 +1635,13 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
>  
>  static void vtd_switch_address_space_all(IntelIOMMUState *s)
>  {
> +    VTDAddressSpace *vtd_as;
>      GHashTableIter iter;
> -    VTDBus *vtd_bus;
> -    int i;
> -
> -    g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
> -    while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
> -        for (i = 0; i < PCI_DEVFN_MAX; i++) {
> -            if (!vtd_bus->dev_as[i]) {
> -                continue;
> -            }
> -            vtd_switch_address_space(vtd_bus->dev_as[i]);
> -        }
> -    }
> -}
>  
> -static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
> -{
> -    return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL);
> +    g_hash_table_iter_init(&iter, s->vtd_as);
> +    while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_as)) {
> +        vtd_switch_address_space(vtd_as);
> +    }
>  }
>  
>  static const bool vtd_qualified_faults[] = {
> @@ -1688,18 +1678,39 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
>      return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST;
>  }
>  
> +static gboolean vtd_find_as_by_sid(gpointer key, gpointer value,
> +                                   gpointer user_data)
> +{
> +    struct vtd_as_key *as_key = (struct vtd_as_key *)key;
> +    uint16_t target_sid = *(uint16_t *)user_data;
> +    uint16_t sid = vtd_make_source_id(pci_bus_num(as_key->bus),
> +                                      as_key->devfn);
> +    return sid == target_sid;
> +}
> +
> +static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
> +{
> +    uint8_t bus_num = sid >> 8;

PCI_BUS_NUM(sid)?

> +    VTDAddressSpace *vtd_as = s->vtd_as_cache[bus_num];
> +
> +    if (vtd_as &&
> +        (sid == vtd_make_source_id(pci_bus_num(vtd_as->bus),
> +                                   vtd_as->devfn))) {
> +        return vtd_as;
> +    }
> +
> +    vtd_as = g_hash_table_find(s->vtd_as, vtd_find_as_by_sid, &sid);
> +    s->vtd_as_cache[bus_num] = vtd_as;
> +
> +    return vtd_as;
> +}
> +
>  static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id)
>  {
> -    VTDBus *vtd_bus;
>      VTDAddressSpace *vtd_as;
>      bool success = false;
>  
> -    vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
> -    if (!vtd_bus) {
> -        goto out;
> -    }
> -
> -    vtd_as = vtd_bus->dev_as[VTD_SID_TO_DEVFN(source_id)];
> +    vtd_as = vtd_get_as_by_sid(s, source_id);
>      if (!vtd_as) {
>          goto out;
>      }
> @@ -1907,11 +1918,10 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
>                                            uint16_t source_id,
>                                            uint16_t func_mask)
>  {
> +    GHashTableIter as_it;
>      uint16_t mask;
> -    VTDBus *vtd_bus;
>      VTDAddressSpace *vtd_as;
>      uint8_t bus_n, devfn;
> -    uint16_t devfn_it;
>  
>      trace_vtd_inv_desc_cc_devices(source_id, func_mask);
>  
> @@ -1934,32 +1944,31 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
>      mask = ~mask;
>  
>      bus_n = VTD_SID_TO_BUS(source_id);
> -    vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
> -    if (vtd_bus) {
> -        devfn = VTD_SID_TO_DEVFN(source_id);
> -        for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
> -            vtd_as = vtd_bus->dev_as[devfn_it];
> -            if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
> -                trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
> -                                             VTD_PCI_FUNC(devfn_it));
> -                vtd_iommu_lock(s);
> -                vtd_as->context_cache_entry.context_cache_gen = 0;
> -                vtd_iommu_unlock(s);
> -                /*
> -                 * Do switch address space when needed, in case if the
> -                 * device passthrough bit is switched.
> -                 */
> -                vtd_switch_address_space(vtd_as);
> -                /*
> -                 * So a device is moving out of (or moving into) a
> -                 * domain, resync the shadow page table.
> -                 * This won't bring bad even if we have no such
> -                 * notifier registered - the IOMMU notification
> -                 * framework will skip MAP notifications if that
> -                 * happened.
> -                 */
> -                vtd_sync_shadow_page_table(vtd_as);
> -            }
> +    devfn = VTD_SID_TO_DEVFN(source_id);
> +
> +    g_hash_table_iter_init(&as_it, s->vtd_as);
> +    while (g_hash_table_iter_next(&as_it, NULL, (void**)&vtd_as)) {
> +        if ((pci_bus_num(vtd_as->bus) == bus_n) &&
> +            (vtd_as->devfn & mask) == (devfn & mask)) {
> +            trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(vtd_as->devfn),
> +                                         VTD_PCI_FUNC(vtd_as->devfn));
> +            vtd_iommu_lock(s);
> +            vtd_as->context_cache_entry.context_cache_gen = 0;
> +            vtd_iommu_unlock(s);
> +            /*
> +             * Do switch address space when needed, in case if the
> +             * device passthrough bit is switched.
> +             */
> +            vtd_switch_address_space(vtd_as);
> +            /*
> +             * So a device is moving out of (or moving into) a
> +             * domain, resync the shadow page table.
> +             * This won't bring bad even if we have no such
> +             * notifier registered - the IOMMU notification
> +             * framework will skip MAP notifications if that
> +             * happened.
> +             */
> +            vtd_sync_shadow_page_table(vtd_as);
>          }
>      }
>  }
> @@ -2473,18 +2482,13 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
>  {
>      VTDAddressSpace *vtd_dev_as;
>      IOMMUTLBEvent event;
> -    struct VTDBus *vtd_bus;
>      hwaddr addr;
>      uint64_t sz;
>      uint16_t sid;
> -    uint8_t devfn;
>      bool size;
> -    uint8_t bus_num;
>  
>      addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
>      sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
> -    devfn = sid & 0xff;
> -    bus_num = sid >> 8;
>      size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
>  
>      if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
> @@ -2495,12 +2499,11 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
>          return false;
>      }
>  
> -    vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
> -    if (!vtd_bus) {
> -        goto done;
> -    }
> -
> -    vtd_dev_as = vtd_bus->dev_as[devfn];
> +    /*
> +     * Using sid is OK since the guest should have finished the
> +     * initialization of both the bus and device.
> +     */
> +    vtd_dev_as = vtd_get_as_by_sid(s, sid);
>      if (!vtd_dev_as) {
>          goto done;
>      }
> @@ -3426,27 +3429,27 @@ static const MemoryRegionOps vtd_mem_ir_ops = {
>  
>  VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
>  {
> -    uintptr_t key = (uintptr_t)bus;
> -    VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key);
> +    /*
> +     * We can't simply use sid here since the bus number might not be
> +     * initialized by the guest.
> +     */
> +    struct vtd_as_key key = {
> +        .bus = bus,
> +        .devfn = devfn,
> +    };
>      VTDAddressSpace *vtd_dev_as;
>      char name[128];
>  
> -    if (!vtd_bus) {
> -        uintptr_t *new_key = g_malloc(sizeof(*new_key));
> -        *new_key = (uintptr_t)bus;
> -        /* No corresponding free() */
> -        vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \
> -                            PCI_DEVFN_MAX);
> -        vtd_bus->bus = bus;
> -        g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus);
> -    }
> +    vtd_dev_as = g_hash_table_lookup(s->vtd_as, &key);
> +    if (!vtd_dev_as) {
> +        struct vtd_as_key *new_key = g_malloc(sizeof(*new_key));
>  
> -    vtd_dev_as = vtd_bus->dev_as[devfn];
> +        new_key->bus = bus;
> +        new_key->devfn = devfn;
>  
> -    if (!vtd_dev_as) {
>          snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn),
>                   PCI_FUNC(devfn));
> -        vtd_bus->dev_as[devfn] = vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
> +        vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
>  
>          vtd_dev_as->bus = bus;
>          vtd_dev_as->devfn = (uint8_t)devfn;
> @@ -3502,6 +3505,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
>                                              &vtd_dev_as->nodmar, 0);
>  
>          vtd_switch_address_space(vtd_dev_as);
> +
> +        g_hash_table_insert(s->vtd_as, new_key, vtd_dev_as);
>      }
>      return vtd_dev_as;
>  }
> @@ -3875,7 +3880,6 @@ static void vtd_realize(DeviceState *dev, Error **errp)
>  
>      QLIST_INIT(&s->vtd_as_with_notifiers);
>      qemu_mutex_init(&s->iommu_lock);
> -    memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
>      memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
>                            "intel_iommu", DMAR_REG_SIZE);
>  
> @@ -3897,8 +3901,8 @@ static void vtd_realize(DeviceState *dev, Error **errp)
>      /* No corresponding destroy */
>      s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
>                                       g_free, g_free);
> -    s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
> -                                              g_free, g_free);
> +    s->vtd_as = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
> +                                      g_free, g_free);
>      vtd_init(s);
>      sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
>      pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 3b5ac869db..fa1bed353c 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -58,7 +58,6 @@ typedef struct VTDContextEntry VTDContextEntry;
>  typedef struct VTDContextCacheEntry VTDContextCacheEntry;
>  typedef struct VTDAddressSpace VTDAddressSpace;
>  typedef struct VTDIOTLBEntry VTDIOTLBEntry;
> -typedef struct VTDBus VTDBus;
>  typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
>  typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
>  typedef struct VTDPASIDDirEntry VTDPASIDDirEntry;
> @@ -111,12 +110,6 @@ struct VTDAddressSpace {
>      IOVATree *iova_tree;          /* Traces mapped IOVA ranges */
>  };
>  
> -struct VTDBus {
> -    PCIBus* bus;		/* A reference to the bus to provide translation for */
> -    /* A table of VTDAddressSpace objects indexed by devfn */
> -    VTDAddressSpace *dev_as[];
> -};
> -
>  struct VTDIOTLBEntry {
>      uint64_t gfn;
>      uint16_t domain_id;
> @@ -253,8 +246,8 @@ struct IntelIOMMUState {
>      uint32_t context_cache_gen;     /* Should be in [1,MAX] */
>      GHashTable *iotlb;              /* IOTLB */
>  
> -    GHashTable *vtd_as_by_busptr;   /* VTDBus objects indexed by PCIBus* reference */
> -    VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */
> +    GHashTable *vtd_as;             /* VTD address space indexed by source id*/

It's not indexed by source ID but vtd_as_key?

Meanwhile how about renaming it to vtd_address_spaces?  Since we use vtd_as
as the name for VTDAddressSpace* in most code paths, so imho it'll be
easier to identify the two.

> +    VTDAddressSpace *vtd_as_cache[VTD_PCI_BUS_MAX]; /* VTD address space cache */
>      /* list of registered notifiers */
>      QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
>  
> -- 
> 2.25.1
>
Jason Wang April 22, 2022, 6:26 a.m. UTC | #2
On Fri, Apr 22, 2022 at 9:17 AM Peter Xu <peterx@redhat.com> wrote:
>
> Hi, Jason,
>
> Mostly good to me, just a few nitpicks below.
>
> On Mon, Mar 21, 2022 at 01:54:27PM +0800, Jason Wang wrote:
> > We introduce VTDBus structure as an intermediate step for searching
> > the address space. This works well with SID based matching/lookup. But
> > when we want to support SID plus PASID based address space lookup,
> > this intermediate steps turns out to be a burden. So the patch simply
> > drops the VTDBus structure and use the PCIBus and devfn as the key for
> > the g_hash_table(). This simplifies the codes and the future PASID
> > extension.
> >
> > To prevent being slower for past vtd_find_as_from_bus_num() callers, a
> > vtd_as cache indexed by the bus number is introduced to store the last
> > recent search result of a vtd_as belongs to a specific bus.
> >
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> >  hw/i386/intel_iommu.c         | 238 +++++++++++++++++-----------------
> >  include/hw/i386/intel_iommu.h |  11 +-
> >  2 files changed, 123 insertions(+), 126 deletions(-)
> >
> > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > index 90964b201c..5851a17d0e 100644
> > --- a/hw/i386/intel_iommu.c
> > +++ b/hw/i386/intel_iommu.c
> > @@ -61,6 +61,16 @@
> >      }                                                                         \
> >  }
> >
> > +/*
> > + * PCI bus number (or SID) is not reliable since the device is usaully
> > + * initalized before guest can configure the PCI bridge
> > + * (SECONDARY_BUS_NUMBER).
> > + */
> > +struct vtd_as_key {
> > +    PCIBus *bus;
> > +    uint8_t devfn;
> > +};
> > +
> >  static void vtd_address_space_refresh_all(IntelIOMMUState *s);
> >  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
> >
> > @@ -210,6 +220,31 @@ static guint vtd_uint64_hash(gconstpointer v)
> >      return (guint)*(const uint64_t *)v;
> >  }
> >
> > +static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2)
> > +{
> > +    const struct vtd_as_key *key1 = v1;
> > +    const struct vtd_as_key *key2 = v2;
> > +
> > +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
> > +}
> > +
> > +static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
> > +{
> > +    return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL);
> > +}
>
> Nit: we could directly drop this one and use PCI_BUILD_BDF().

Will fix.

>
> > +
> > +/*
> > + * Note that we use pointer to PCIBus as the key, so hashing/shifting
> > + * based on the pointer value is intended.
>
> Thanks for the comment; that helps.
>
> Should we also mention that this hash is not the only interface to identify
> two vtd_as*, say, even if on a 32bit system we got last 24 bits collapsed
> on two vtd_as* pointers, we can still have vtd_as_equal() to guard us?

Ok. let me add that in the next version.

>
> > + */
> > +static guint vtd_as_hash(gconstpointer v)
> > +{
> > +    const struct vtd_as_key *key = v;
> > +    guint value = (guint)(uintptr_t)key->bus;
> > +
> > +    return (guint)(value << 8 | key->devfn);
> > +}
> > +
> >  static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
> >                                            gpointer user_data)
> >  {
> > @@ -248,22 +283,14 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
> >  static void vtd_reset_context_cache_locked(IntelIOMMUState *s)
> >  {
> >      VTDAddressSpace *vtd_as;
> > -    VTDBus *vtd_bus;
> > -    GHashTableIter bus_it;
> > -    uint32_t devfn_it;
> > +    GHashTableIter as_it;
> >
> >      trace_vtd_context_cache_reset();
> >
> > -    g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
> > +    g_hash_table_iter_init(&as_it, s->vtd_as);
> >
> > -    while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
> > -        for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
> > -            vtd_as = vtd_bus->dev_as[devfn_it];
> > -            if (!vtd_as) {
> > -                continue;
> > -            }
> > -            vtd_as->context_cache_entry.context_cache_gen = 0;
> > -        }
> > +    while (g_hash_table_iter_next (&as_it, NULL, (void**)&vtd_as)) {
> > +        vtd_as->context_cache_entry.context_cache_gen = 0;
> >      }
> >      s->context_cache_gen = 1;
> >  }
> > @@ -993,32 +1020,6 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
> >      return slpte & rsvd_mask;
> >  }
> >
> > -/* Find the VTD address space associated with a given bus number */
> > -static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
> > -{
> > -    VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num];
> > -    GHashTableIter iter;
> > -
> > -    if (vtd_bus) {
> > -        return vtd_bus;
> > -    }
> > -
> > -    /*
> > -     * Iterate over the registered buses to find the one which
> > -     * currently holds this bus number and update the bus_num
> > -     * lookup table.
> > -     */
> > -    g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
> > -    while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
> > -        if (pci_bus_num(vtd_bus->bus) == bus_num) {
> > -            s->vtd_as_by_bus_num[bus_num] = vtd_bus;
> > -            return vtd_bus;
> > -        }
> > -    }
> > -
> > -    return NULL;
> > -}
> > -
> >  /* Given the @iova, get relevant @slptep. @slpte_level will be the last level
> >   * of the translation, can be used for deciding the size of large page.
> >   */
> > @@ -1634,24 +1635,13 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
> >
> >  static void vtd_switch_address_space_all(IntelIOMMUState *s)
> >  {
> > +    VTDAddressSpace *vtd_as;
> >      GHashTableIter iter;
> > -    VTDBus *vtd_bus;
> > -    int i;
> > -
> > -    g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
> > -    while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
> > -        for (i = 0; i < PCI_DEVFN_MAX; i++) {
> > -            if (!vtd_bus->dev_as[i]) {
> > -                continue;
> > -            }
> > -            vtd_switch_address_space(vtd_bus->dev_as[i]);
> > -        }
> > -    }
> > -}
> >
> > -static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
> > -{
> > -    return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL);
> > +    g_hash_table_iter_init(&iter, s->vtd_as);
> > +    while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_as)) {
> > +        vtd_switch_address_space(vtd_as);
> > +    }
> >  }
> >
> >  static const bool vtd_qualified_faults[] = {
> > @@ -1688,18 +1678,39 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
> >      return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST;
> >  }
> >
> > +static gboolean vtd_find_as_by_sid(gpointer key, gpointer value,
> > +                                   gpointer user_data)
> > +{
> > +    struct vtd_as_key *as_key = (struct vtd_as_key *)key;
> > +    uint16_t target_sid = *(uint16_t *)user_data;
> > +    uint16_t sid = vtd_make_source_id(pci_bus_num(as_key->bus),
> > +                                      as_key->devfn);
> > +    return sid == target_sid;
> > +}
> > +
> > +static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
> > +{
> > +    uint8_t bus_num = sid >> 8;
>
> PCI_BUS_NUM(sid)?

Will do.

>
> > +    VTDAddressSpace *vtd_as = s->vtd_as_cache[bus_num];
> > +
> > +    if (vtd_as &&
> > +        (sid == vtd_make_source_id(pci_bus_num(vtd_as->bus),
> > +                                   vtd_as->devfn))) {
> > +        return vtd_as;
> > +    }
> > +
> > +    vtd_as = g_hash_table_find(s->vtd_as, vtd_find_as_by_sid, &sid);
> > +    s->vtd_as_cache[bus_num] = vtd_as;
> > +
> > +    return vtd_as;
> > +}
> > +
> >  static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id)
> >  {
> > -    VTDBus *vtd_bus;
> >      VTDAddressSpace *vtd_as;
> >      bool success = false;
> >
> > -    vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
> > -    if (!vtd_bus) {
> > -        goto out;
> > -    }
> > -
> > -    vtd_as = vtd_bus->dev_as[VTD_SID_TO_DEVFN(source_id)];
> > +    vtd_as = vtd_get_as_by_sid(s, source_id);
> >      if (!vtd_as) {
> >          goto out;
> >      }
> > @@ -1907,11 +1918,10 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
> >                                            uint16_t source_id,
> >                                            uint16_t func_mask)
> >  {
> > +    GHashTableIter as_it;
> >      uint16_t mask;
> > -    VTDBus *vtd_bus;
> >      VTDAddressSpace *vtd_as;
> >      uint8_t bus_n, devfn;
> > -    uint16_t devfn_it;
> >
> >      trace_vtd_inv_desc_cc_devices(source_id, func_mask);
> >
> > @@ -1934,32 +1944,31 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
> >      mask = ~mask;
> >
> >      bus_n = VTD_SID_TO_BUS(source_id);
> > -    vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
> > -    if (vtd_bus) {
> > -        devfn = VTD_SID_TO_DEVFN(source_id);
> > -        for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
> > -            vtd_as = vtd_bus->dev_as[devfn_it];
> > -            if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
> > -                trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
> > -                                             VTD_PCI_FUNC(devfn_it));
> > -                vtd_iommu_lock(s);
> > -                vtd_as->context_cache_entry.context_cache_gen = 0;
> > -                vtd_iommu_unlock(s);
> > -                /*
> > -                 * Do switch address space when needed, in case if the
> > -                 * device passthrough bit is switched.
> > -                 */
> > -                vtd_switch_address_space(vtd_as);
> > -                /*
> > -                 * So a device is moving out of (or moving into) a
> > -                 * domain, resync the shadow page table.
> > -                 * This won't bring bad even if we have no such
> > -                 * notifier registered - the IOMMU notification
> > -                 * framework will skip MAP notifications if that
> > -                 * happened.
> > -                 */
> > -                vtd_sync_shadow_page_table(vtd_as);
> > -            }
> > +    devfn = VTD_SID_TO_DEVFN(source_id);
> > +
> > +    g_hash_table_iter_init(&as_it, s->vtd_as);
> > +    while (g_hash_table_iter_next(&as_it, NULL, (void**)&vtd_as)) {
> > +        if ((pci_bus_num(vtd_as->bus) == bus_n) &&
> > +            (vtd_as->devfn & mask) == (devfn & mask)) {
> > +            trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(vtd_as->devfn),
> > +                                         VTD_PCI_FUNC(vtd_as->devfn));
> > +            vtd_iommu_lock(s);
> > +            vtd_as->context_cache_entry.context_cache_gen = 0;
> > +            vtd_iommu_unlock(s);
> > +            /*
> > +             * Do switch address space when needed, in case if the
> > +             * device passthrough bit is switched.
> > +             */
> > +            vtd_switch_address_space(vtd_as);
> > +            /*
> > +             * So a device is moving out of (or moving into) a
> > +             * domain, resync the shadow page table.
> > +             * This won't bring bad even if we have no such
> > +             * notifier registered - the IOMMU notification
> > +             * framework will skip MAP notifications if that
> > +             * happened.
> > +             */
> > +            vtd_sync_shadow_page_table(vtd_as);
> >          }
> >      }
> >  }
> > @@ -2473,18 +2482,13 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
> >  {
> >      VTDAddressSpace *vtd_dev_as;
> >      IOMMUTLBEvent event;
> > -    struct VTDBus *vtd_bus;
> >      hwaddr addr;
> >      uint64_t sz;
> >      uint16_t sid;
> > -    uint8_t devfn;
> >      bool size;
> > -    uint8_t bus_num;
> >
> >      addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
> >      sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
> > -    devfn = sid & 0xff;
> > -    bus_num = sid >> 8;
> >      size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
> >
> >      if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
> > @@ -2495,12 +2499,11 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
> >          return false;
> >      }
> >
> > -    vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
> > -    if (!vtd_bus) {
> > -        goto done;
> > -    }
> > -
> > -    vtd_dev_as = vtd_bus->dev_as[devfn];
> > +    /*
> > +     * Using sid is OK since the guest should have finished the
> > +     * initialization of both the bus and device.
> > +     */
> > +    vtd_dev_as = vtd_get_as_by_sid(s, sid);
> >      if (!vtd_dev_as) {
> >          goto done;
> >      }
> > @@ -3426,27 +3429,27 @@ static const MemoryRegionOps vtd_mem_ir_ops = {
> >
> >  VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
> >  {
> > -    uintptr_t key = (uintptr_t)bus;
> > -    VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key);
> > +    /*
> > +     * We can't simply use sid here since the bus number might not be
> > +     * initialized by the guest.
> > +     */
> > +    struct vtd_as_key key = {
> > +        .bus = bus,
> > +        .devfn = devfn,
> > +    };
> >      VTDAddressSpace *vtd_dev_as;
> >      char name[128];
> >
> > -    if (!vtd_bus) {
> > -        uintptr_t *new_key = g_malloc(sizeof(*new_key));
> > -        *new_key = (uintptr_t)bus;
> > -        /* No corresponding free() */
> > -        vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \
> > -                            PCI_DEVFN_MAX);
> > -        vtd_bus->bus = bus;
> > -        g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus);
> > -    }
> > +    vtd_dev_as = g_hash_table_lookup(s->vtd_as, &key);
> > +    if (!vtd_dev_as) {
> > +        struct vtd_as_key *new_key = g_malloc(sizeof(*new_key));
> >
> > -    vtd_dev_as = vtd_bus->dev_as[devfn];
> > +        new_key->bus = bus;
> > +        new_key->devfn = devfn;
> >
> > -    if (!vtd_dev_as) {
> >          snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn),
> >                   PCI_FUNC(devfn));
> > -        vtd_bus->dev_as[devfn] = vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
> > +        vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
> >
> >          vtd_dev_as->bus = bus;
> >          vtd_dev_as->devfn = (uint8_t)devfn;
> > @@ -3502,6 +3505,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
> >                                              &vtd_dev_as->nodmar, 0);
> >
> >          vtd_switch_address_space(vtd_dev_as);
> > +
> > +        g_hash_table_insert(s->vtd_as, new_key, vtd_dev_as);
> >      }
> >      return vtd_dev_as;
> >  }
> > @@ -3875,7 +3880,6 @@ static void vtd_realize(DeviceState *dev, Error **errp)
> >
> >      QLIST_INIT(&s->vtd_as_with_notifiers);
> >      qemu_mutex_init(&s->iommu_lock);
> > -    memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
> >      memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
> >                            "intel_iommu", DMAR_REG_SIZE);
> >
> > @@ -3897,8 +3901,8 @@ static void vtd_realize(DeviceState *dev, Error **errp)
> >      /* No corresponding destroy */
> >      s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
> >                                       g_free, g_free);
> > -    s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
> > -                                              g_free, g_free);
> > +    s->vtd_as = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
> > +                                      g_free, g_free);
> >      vtd_init(s);
> >      sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
> >      pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
> > diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> > index 3b5ac869db..fa1bed353c 100644
> > --- a/include/hw/i386/intel_iommu.h
> > +++ b/include/hw/i386/intel_iommu.h
> > @@ -58,7 +58,6 @@ typedef struct VTDContextEntry VTDContextEntry;
> >  typedef struct VTDContextCacheEntry VTDContextCacheEntry;
> >  typedef struct VTDAddressSpace VTDAddressSpace;
> >  typedef struct VTDIOTLBEntry VTDIOTLBEntry;
> > -typedef struct VTDBus VTDBus;
> >  typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
> >  typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
> >  typedef struct VTDPASIDDirEntry VTDPASIDDirEntry;
> > @@ -111,12 +110,6 @@ struct VTDAddressSpace {
> >      IOVATree *iova_tree;          /* Traces mapped IOVA ranges */
> >  };
> >
> > -struct VTDBus {
> > -    PCIBus* bus;             /* A reference to the bus to provide translation for */
> > -    /* A table of VTDAddressSpace objects indexed by devfn */
> > -    VTDAddressSpace *dev_as[];
> > -};
> > -
> >  struct VTDIOTLBEntry {
> >      uint64_t gfn;
> >      uint16_t domain_id;
> > @@ -253,8 +246,8 @@ struct IntelIOMMUState {
> >      uint32_t context_cache_gen;     /* Should be in [1,MAX] */
> >      GHashTable *iotlb;              /* IOTLB */
> >
> > -    GHashTable *vtd_as_by_busptr;   /* VTDBus objects indexed by PCIBus* reference */
> > -    VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */
> > +    GHashTable *vtd_as;             /* VTD address space indexed by source id*/
>
> It's not indexed by source ID but vtd_as_key?

Right, let me fix that.

>
> Meanwhile how about renaming it to vtd_address_spaces?  Since we use vtd_as
> as the name for VTDAddressSpace* in most code paths, so imho it'll be
> easier to identify the two.

Ok.

Thanks

>
> > +    VTDAddressSpace *vtd_as_cache[VTD_PCI_BUS_MAX]; /* VTD address space cache */
> >      /* list of registered notifiers */
> >      QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;
> >
> > --
> > 2.25.1
> >
>
> --
> Peter Xu
>
Peter Xu April 22, 2022, 12:55 p.m. UTC | #3
On Fri, Apr 22, 2022 at 02:26:11PM +0800, Jason Wang wrote:
> On Fri, Apr 22, 2022 at 9:17 AM Peter Xu <peterx@redhat.com> wrote:
> >
> > Hi, Jason,
> >
> > Mostly good to me, just a few nitpicks below.
> >
> > On Mon, Mar 21, 2022 at 01:54:27PM +0800, Jason Wang wrote:
> > > We introduce VTDBus structure as an intermediate step for searching
> > > the address space. This works well with SID based matching/lookup. But
> > > when we want to support SID plus PASID based address space lookup,
> > > this intermediate steps turns out to be a burden. So the patch simply
> > > drops the VTDBus structure and use the PCIBus and devfn as the key for
> > > the g_hash_table(). This simplifies the codes and the future PASID
> > > extension.
> > >
> > > To prevent being slower for past vtd_find_as_from_bus_num() callers, a
> > > vtd_as cache indexed by the bus number is introduced to store the last
> > > recent search result of a vtd_as belongs to a specific bus.
> > >
> > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > ---
> > >  hw/i386/intel_iommu.c         | 238 +++++++++++++++++-----------------
> > >  include/hw/i386/intel_iommu.h |  11 +-
> > >  2 files changed, 123 insertions(+), 126 deletions(-)
> > >
> > > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > > index 90964b201c..5851a17d0e 100644
> > > --- a/hw/i386/intel_iommu.c
> > > +++ b/hw/i386/intel_iommu.c
> > > @@ -61,6 +61,16 @@
> > >      }                                                                         \
> > >  }
> > >
> > > +/*
> > > + * PCI bus number (or SID) is not reliable since the device is usaully
> > > + * initalized before guest can configure the PCI bridge
> > > + * (SECONDARY_BUS_NUMBER).
> > > + */
> > > +struct vtd_as_key {
> > > +    PCIBus *bus;
> > > +    uint8_t devfn;
> > > +};
> > > +
> > >  static void vtd_address_space_refresh_all(IntelIOMMUState *s);
> > >  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
> > >
> > > @@ -210,6 +220,31 @@ static guint vtd_uint64_hash(gconstpointer v)
> > >      return (guint)*(const uint64_t *)v;
> > >  }
> > >
> > > +static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2)
> > > +{
> > > +    const struct vtd_as_key *key1 = v1;
> > > +    const struct vtd_as_key *key2 = v2;
> > > +
> > > +    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
> > > +}
> > > +
> > > +static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
> > > +{
> > > +    return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL);
> > > +}
> >
> > Nit: we could directly drop this one and use PCI_BUILD_BDF().
> 
> Will fix.
> 
> >
> > > +
> > > +/*
> > > + * Note that we use pointer to PCIBus as the key, so hashing/shifting
> > > + * based on the pointer value is intended.
> >
> > Thanks for the comment; that helps.
> >
> > Should we also mention that this hash is not the only interface to identify
> > two vtd_as*, say, even if on a 32bit system we got last 24 bits collapsed
> > on two vtd_as* pointers, we can still have vtd_as_equal() to guard us?
> 
> Ok. let me add that in the next version.
> 
> >
> > > + */
> > > +static guint vtd_as_hash(gconstpointer v)
> > > +{
> > > +    const struct vtd_as_key *key = v;
> > > +    guint value = (guint)(uintptr_t)key->bus;
> > > +
> > > +    return (guint)(value << 8 | key->devfn);
> > > +}
> > > +
> > >  static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
> > >                                            gpointer user_data)
> > >  {
> > > @@ -248,22 +283,14 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
> > >  static void vtd_reset_context_cache_locked(IntelIOMMUState *s)
> > >  {
> > >      VTDAddressSpace *vtd_as;
> > > -    VTDBus *vtd_bus;
> > > -    GHashTableIter bus_it;
> > > -    uint32_t devfn_it;
> > > +    GHashTableIter as_it;
> > >
> > >      trace_vtd_context_cache_reset();
> > >
> > > -    g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
> > > +    g_hash_table_iter_init(&as_it, s->vtd_as);
> > >
> > > -    while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
> > > -        for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
> > > -            vtd_as = vtd_bus->dev_as[devfn_it];
> > > -            if (!vtd_as) {
> > > -                continue;
> > > -            }
> > > -            vtd_as->context_cache_entry.context_cache_gen = 0;
> > > -        }
> > > +    while (g_hash_table_iter_next (&as_it, NULL, (void**)&vtd_as)) {
> > > +        vtd_as->context_cache_entry.context_cache_gen = 0;
> > >      }
> > >      s->context_cache_gen = 1;
> > >  }
> > > @@ -993,32 +1020,6 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
> > >      return slpte & rsvd_mask;
> > >  }
> > >
> > > -/* Find the VTD address space associated with a given bus number */
> > > -static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
> > > -{
> > > -    VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num];
> > > -    GHashTableIter iter;
> > > -
> > > -    if (vtd_bus) {
> > > -        return vtd_bus;
> > > -    }
> > > -
> > > -    /*
> > > -     * Iterate over the registered buses to find the one which
> > > -     * currently holds this bus number and update the bus_num
> > > -     * lookup table.
> > > -     */
> > > -    g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
> > > -    while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
> > > -        if (pci_bus_num(vtd_bus->bus) == bus_num) {
> > > -            s->vtd_as_by_bus_num[bus_num] = vtd_bus;
> > > -            return vtd_bus;
> > > -        }
> > > -    }
> > > -
> > > -    return NULL;
> > > -}
> > > -
> > >  /* Given the @iova, get relevant @slptep. @slpte_level will be the last level
> > >   * of the translation, can be used for deciding the size of large page.
> > >   */
> > > @@ -1634,24 +1635,13 @@ static bool vtd_switch_address_space(VTDAddressSpace *as)
> > >
> > >  static void vtd_switch_address_space_all(IntelIOMMUState *s)
> > >  {
> > > +    VTDAddressSpace *vtd_as;
> > >      GHashTableIter iter;
> > > -    VTDBus *vtd_bus;
> > > -    int i;
> > > -
> > > -    g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
> > > -    while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
> > > -        for (i = 0; i < PCI_DEVFN_MAX; i++) {
> > > -            if (!vtd_bus->dev_as[i]) {
> > > -                continue;
> > > -            }
> > > -            vtd_switch_address_space(vtd_bus->dev_as[i]);
> > > -        }
> > > -    }
> > > -}
> > >
> > > -static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
> > > -{
> > > -    return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL);
> > > +    g_hash_table_iter_init(&iter, s->vtd_as);
> > > +    while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_as)) {
> > > +        vtd_switch_address_space(vtd_as);
> > > +    }
> > >  }
> > >
> > >  static const bool vtd_qualified_faults[] = {
> > > @@ -1688,18 +1678,39 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
> > >      return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST;
> > >  }
> > >
> > > +static gboolean vtd_find_as_by_sid(gpointer key, gpointer value,
> > > +                                   gpointer user_data)
> > > +{
> > > +    struct vtd_as_key *as_key = (struct vtd_as_key *)key;
> > > +    uint16_t target_sid = *(uint16_t *)user_data;
> > > +    uint16_t sid = vtd_make_source_id(pci_bus_num(as_key->bus),
> > > +                                      as_key->devfn);
> > > +    return sid == target_sid;
> > > +}
> > > +
> > > +static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
> > > +{
> > > +    uint8_t bus_num = sid >> 8;
> >
> > PCI_BUS_NUM(sid)?
> 
> Will do.
> 
> >
> > > +    VTDAddressSpace *vtd_as = s->vtd_as_cache[bus_num];
> > > +
> > > +    if (vtd_as &&
> > > +        (sid == vtd_make_source_id(pci_bus_num(vtd_as->bus),
> > > +                                   vtd_as->devfn))) {
> > > +        return vtd_as;
> > > +    }
> > > +
> > > +    vtd_as = g_hash_table_find(s->vtd_as, vtd_find_as_by_sid, &sid);
> > > +    s->vtd_as_cache[bus_num] = vtd_as;
> > > +
> > > +    return vtd_as;
> > > +}
> > > +
> > >  static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id)
> > >  {
> > > -    VTDBus *vtd_bus;
> > >      VTDAddressSpace *vtd_as;
> > >      bool success = false;
> > >
> > > -    vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
> > > -    if (!vtd_bus) {
> > > -        goto out;
> > > -    }
> > > -
> > > -    vtd_as = vtd_bus->dev_as[VTD_SID_TO_DEVFN(source_id)];
> > > +    vtd_as = vtd_get_as_by_sid(s, source_id);
> > >      if (!vtd_as) {
> > >          goto out;
> > >      }
> > > @@ -1907,11 +1918,10 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
> > >                                            uint16_t source_id,
> > >                                            uint16_t func_mask)
> > >  {
> > > +    GHashTableIter as_it;
> > >      uint16_t mask;
> > > -    VTDBus *vtd_bus;
> > >      VTDAddressSpace *vtd_as;
> > >      uint8_t bus_n, devfn;
> > > -    uint16_t devfn_it;
> > >
> > >      trace_vtd_inv_desc_cc_devices(source_id, func_mask);
> > >
> > > @@ -1934,32 +1944,31 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
> > >      mask = ~mask;
> > >
> > >      bus_n = VTD_SID_TO_BUS(source_id);
> > > -    vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
> > > -    if (vtd_bus) {
> > > -        devfn = VTD_SID_TO_DEVFN(source_id);
> > > -        for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
> > > -            vtd_as = vtd_bus->dev_as[devfn_it];
> > > -            if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
> > > -                trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
> > > -                                             VTD_PCI_FUNC(devfn_it));
> > > -                vtd_iommu_lock(s);
> > > -                vtd_as->context_cache_entry.context_cache_gen = 0;
> > > -                vtd_iommu_unlock(s);
> > > -                /*
> > > -                 * Do switch address space when needed, in case if the
> > > -                 * device passthrough bit is switched.
> > > -                 */
> > > -                vtd_switch_address_space(vtd_as);
> > > -                /*
> > > -                 * So a device is moving out of (or moving into) a
> > > -                 * domain, resync the shadow page table.
> > > -                 * This won't bring bad even if we have no such
> > > -                 * notifier registered - the IOMMU notification
> > > -                 * framework will skip MAP notifications if that
> > > -                 * happened.
> > > -                 */
> > > -                vtd_sync_shadow_page_table(vtd_as);
> > > -            }
> > > +    devfn = VTD_SID_TO_DEVFN(source_id);
> > > +
> > > +    g_hash_table_iter_init(&as_it, s->vtd_as);
> > > +    while (g_hash_table_iter_next(&as_it, NULL, (void**)&vtd_as)) {
> > > +        if ((pci_bus_num(vtd_as->bus) == bus_n) &&
> > > +            (vtd_as->devfn & mask) == (devfn & mask)) {
> > > +            trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(vtd_as->devfn),
> > > +                                         VTD_PCI_FUNC(vtd_as->devfn));
> > > +            vtd_iommu_lock(s);
> > > +            vtd_as->context_cache_entry.context_cache_gen = 0;
> > > +            vtd_iommu_unlock(s);
> > > +            /*
> > > +             * Do switch address space when needed, in case if the
> > > +             * device passthrough bit is switched.
> > > +             */
> > > +            vtd_switch_address_space(vtd_as);
> > > +            /*
> > > +             * So a device is moving out of (or moving into) a
> > > +             * domain, resync the shadow page table.
> > > +             * This won't bring bad even if we have no such
> > > +             * notifier registered - the IOMMU notification
> > > +             * framework will skip MAP notifications if that
> > > +             * happened.
> > > +             */
> > > +            vtd_sync_shadow_page_table(vtd_as);
> > >          }
> > >      }
> > >  }
> > > @@ -2473,18 +2482,13 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
> > >  {
> > >      VTDAddressSpace *vtd_dev_as;
> > >      IOMMUTLBEvent event;
> > > -    struct VTDBus *vtd_bus;
> > >      hwaddr addr;
> > >      uint64_t sz;
> > >      uint16_t sid;
> > > -    uint8_t devfn;
> > >      bool size;
> > > -    uint8_t bus_num;
> > >
> > >      addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
> > >      sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
> > > -    devfn = sid & 0xff;
> > > -    bus_num = sid >> 8;
> > >      size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
> > >
> > >      if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
> > > @@ -2495,12 +2499,11 @@ static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
> > >          return false;
> > >      }
> > >
> > > -    vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
> > > -    if (!vtd_bus) {
> > > -        goto done;
> > > -    }
> > > -
> > > -    vtd_dev_as = vtd_bus->dev_as[devfn];
> > > +    /*
> > > +     * Using sid is OK since the guest should have finished the
> > > +     * initialization of both the bus and device.
> > > +     */
> > > +    vtd_dev_as = vtd_get_as_by_sid(s, sid);
> > >      if (!vtd_dev_as) {
> > >          goto done;
> > >      }
> > > @@ -3426,27 +3429,27 @@ static const MemoryRegionOps vtd_mem_ir_ops = {
> > >
> > >  VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
> > >  {
> > > -    uintptr_t key = (uintptr_t)bus;
> > > -    VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key);
> > > +    /*
> > > +     * We can't simply use sid here since the bus number might not be
> > > +     * initialized by the guest.
> > > +     */
> > > +    struct vtd_as_key key = {
> > > +        .bus = bus,
> > > +        .devfn = devfn,
> > > +    };
> > >      VTDAddressSpace *vtd_dev_as;
> > >      char name[128];
> > >
> > > -    if (!vtd_bus) {
> > > -        uintptr_t *new_key = g_malloc(sizeof(*new_key));
> > > -        *new_key = (uintptr_t)bus;
> > > -        /* No corresponding free() */
> > > -        vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \
> > > -                            PCI_DEVFN_MAX);
> > > -        vtd_bus->bus = bus;
> > > -        g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus);
> > > -    }
> > > +    vtd_dev_as = g_hash_table_lookup(s->vtd_as, &key);
> > > +    if (!vtd_dev_as) {
> > > +        struct vtd_as_key *new_key = g_malloc(sizeof(*new_key));
> > >
> > > -    vtd_dev_as = vtd_bus->dev_as[devfn];
> > > +        new_key->bus = bus;
> > > +        new_key->devfn = devfn;
> > >
> > > -    if (!vtd_dev_as) {
> > >          snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn),
> > >                   PCI_FUNC(devfn));
> > > -        vtd_bus->dev_as[devfn] = vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
> > > +        vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
> > >
> > >          vtd_dev_as->bus = bus;
> > >          vtd_dev_as->devfn = (uint8_t)devfn;
> > > @@ -3502,6 +3505,8 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
> > >                                              &vtd_dev_as->nodmar, 0);
> > >
> > >          vtd_switch_address_space(vtd_dev_as);
> > > +
> > > +        g_hash_table_insert(s->vtd_as, new_key, vtd_dev_as);
> > >      }
> > >      return vtd_dev_as;
> > >  }
> > > @@ -3875,7 +3880,6 @@ static void vtd_realize(DeviceState *dev, Error **errp)
> > >
> > >      QLIST_INIT(&s->vtd_as_with_notifiers);
> > >      qemu_mutex_init(&s->iommu_lock);
> > > -    memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
> > >      memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
> > >                            "intel_iommu", DMAR_REG_SIZE);
> > >
> > > @@ -3897,8 +3901,8 @@ static void vtd_realize(DeviceState *dev, Error **errp)
> > >      /* No corresponding destroy */
> > >      s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
> > >                                       g_free, g_free);
> > > -    s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
> > > -                                              g_free, g_free);
> > > +    s->vtd_as = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
> > > +                                      g_free, g_free);
> > >      vtd_init(s);
> > >      sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
> > >      pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
> > > diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> > > index 3b5ac869db..fa1bed353c 100644
> > > --- a/include/hw/i386/intel_iommu.h
> > > +++ b/include/hw/i386/intel_iommu.h
> > > @@ -58,7 +58,6 @@ typedef struct VTDContextEntry VTDContextEntry;
> > >  typedef struct VTDContextCacheEntry VTDContextCacheEntry;
> > >  typedef struct VTDAddressSpace VTDAddressSpace;
> > >  typedef struct VTDIOTLBEntry VTDIOTLBEntry;
> > > -typedef struct VTDBus VTDBus;
> > >  typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
> > >  typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
> > >  typedef struct VTDPASIDDirEntry VTDPASIDDirEntry;
> > > @@ -111,12 +110,6 @@ struct VTDAddressSpace {
> > >      IOVATree *iova_tree;          /* Traces mapped IOVA ranges */
> > >  };
> > >
> > > -struct VTDBus {
> > > -    PCIBus* bus;             /* A reference to the bus to provide translation for */
> > > -    /* A table of VTDAddressSpace objects indexed by devfn */
> > > -    VTDAddressSpace *dev_as[];
> > > -};
> > > -
> > >  struct VTDIOTLBEntry {
> > >      uint64_t gfn;
> > >      uint16_t domain_id;
> > > @@ -253,8 +246,8 @@ struct IntelIOMMUState {
> > >      uint32_t context_cache_gen;     /* Should be in [1,MAX] */
> > >      GHashTable *iotlb;              /* IOTLB */
> > >
> > > -    GHashTable *vtd_as_by_busptr;   /* VTDBus objects indexed by PCIBus* reference */
> > > -    VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */
> > > +    GHashTable *vtd_as;             /* VTD address space indexed by source id*/
> >
> > It's not indexed by source ID but vtd_as_key?
> 
> Right, let me fix that.
> 
> >
> > Meanwhile how about renaming it to vtd_address_spaces?  Since we use vtd_as
> > as the name for VTDAddressSpace* in most code paths, so imho it'll be
> > easier to identify the two.
> 
> Ok.

If with all above nitpicks fixed, please feel free to add:

Reviewed-by: Peter Xu <peterx@redhat.com>

Thanks!
diff mbox series

Patch

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 90964b201c..5851a17d0e 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -61,6 +61,16 @@ 
     }                                                                         \
 }
 
+/*
+ * PCI bus number (or SID) is not reliable since the device is usaully
+ * initalized before guest can configure the PCI bridge
+ * (SECONDARY_BUS_NUMBER).
+ */
+struct vtd_as_key {
+    PCIBus *bus;
+    uint8_t devfn;
+};
+
 static void vtd_address_space_refresh_all(IntelIOMMUState *s);
 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
 
@@ -210,6 +220,31 @@  static guint vtd_uint64_hash(gconstpointer v)
     return (guint)*(const uint64_t *)v;
 }
 
+static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2)
+{
+    const struct vtd_as_key *key1 = v1;
+    const struct vtd_as_key *key2 = v2;
+
+    return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
+}
+
+static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
+{
+    return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL);
+}
+
+/*
+ * Note that we use pointer to PCIBus as the key, so hashing/shifting
+ * based on the pointer value is intended.
+ */
+static guint vtd_as_hash(gconstpointer v)
+{
+    const struct vtd_as_key *key = v;
+    guint value = (guint)(uintptr_t)key->bus;
+
+    return (guint)(value << 8 | key->devfn);
+}
+
 static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
                                           gpointer user_data)
 {
@@ -248,22 +283,14 @@  static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
 static void vtd_reset_context_cache_locked(IntelIOMMUState *s)
 {
     VTDAddressSpace *vtd_as;
-    VTDBus *vtd_bus;
-    GHashTableIter bus_it;
-    uint32_t devfn_it;
+    GHashTableIter as_it;
 
     trace_vtd_context_cache_reset();
 
-    g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
+    g_hash_table_iter_init(&as_it, s->vtd_as);
 
-    while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
-        for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
-            vtd_as = vtd_bus->dev_as[devfn_it];
-            if (!vtd_as) {
-                continue;
-            }
-            vtd_as->context_cache_entry.context_cache_gen = 0;
-        }
+    while (g_hash_table_iter_next (&as_it, NULL, (void**)&vtd_as)) {
+        vtd_as->context_cache_entry.context_cache_gen = 0;
     }
     s->context_cache_gen = 1;
 }
@@ -993,32 +1020,6 @@  static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
     return slpte & rsvd_mask;
 }
 
-/* Find the VTD address space associated with a given bus number */
-static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
-{
-    VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num];
-    GHashTableIter iter;
-
-    if (vtd_bus) {
-        return vtd_bus;
-    }
-
-    /*
-     * Iterate over the registered buses to find the one which
-     * currently holds this bus number and update the bus_num
-     * lookup table.
-     */
-    g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
-    while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
-        if (pci_bus_num(vtd_bus->bus) == bus_num) {
-            s->vtd_as_by_bus_num[bus_num] = vtd_bus;
-            return vtd_bus;
-        }
-    }
-
-    return NULL;
-}
-
 /* Given the @iova, get relevant @slptep. @slpte_level will be the last level
  * of the translation, can be used for deciding the size of large page.
  */
@@ -1634,24 +1635,13 @@  static bool vtd_switch_address_space(VTDAddressSpace *as)
 
 static void vtd_switch_address_space_all(IntelIOMMUState *s)
 {
+    VTDAddressSpace *vtd_as;
     GHashTableIter iter;
-    VTDBus *vtd_bus;
-    int i;
-
-    g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
-    while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
-        for (i = 0; i < PCI_DEVFN_MAX; i++) {
-            if (!vtd_bus->dev_as[i]) {
-                continue;
-            }
-            vtd_switch_address_space(vtd_bus->dev_as[i]);
-        }
-    }
-}
 
-static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
-{
-    return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL);
+    g_hash_table_iter_init(&iter, s->vtd_as);
+    while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_as)) {
+        vtd_switch_address_space(vtd_as);
+    }
 }
 
 static const bool vtd_qualified_faults[] = {
@@ -1688,18 +1678,39 @@  static inline bool vtd_is_interrupt_addr(hwaddr addr)
     return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST;
 }
 
+static gboolean vtd_find_as_by_sid(gpointer key, gpointer value,
+                                   gpointer user_data)
+{
+    struct vtd_as_key *as_key = (struct vtd_as_key *)key;
+    uint16_t target_sid = *(uint16_t *)user_data;
+    uint16_t sid = vtd_make_source_id(pci_bus_num(as_key->bus),
+                                      as_key->devfn);
+    return sid == target_sid;
+}
+
+static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid)
+{
+    uint8_t bus_num = sid >> 8;
+    VTDAddressSpace *vtd_as = s->vtd_as_cache[bus_num];
+
+    if (vtd_as &&
+        (sid == vtd_make_source_id(pci_bus_num(vtd_as->bus),
+                                   vtd_as->devfn))) {
+        return vtd_as;
+    }
+
+    vtd_as = g_hash_table_find(s->vtd_as, vtd_find_as_by_sid, &sid);
+    s->vtd_as_cache[bus_num] = vtd_as;
+
+    return vtd_as;
+}
+
 static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id)
 {
-    VTDBus *vtd_bus;
     VTDAddressSpace *vtd_as;
     bool success = false;
 
-    vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
-    if (!vtd_bus) {
-        goto out;
-    }
-
-    vtd_as = vtd_bus->dev_as[VTD_SID_TO_DEVFN(source_id)];
+    vtd_as = vtd_get_as_by_sid(s, source_id);
     if (!vtd_as) {
         goto out;
     }
@@ -1907,11 +1918,10 @@  static void vtd_context_device_invalidate(IntelIOMMUState *s,
                                           uint16_t source_id,
                                           uint16_t func_mask)
 {
+    GHashTableIter as_it;
     uint16_t mask;
-    VTDBus *vtd_bus;
     VTDAddressSpace *vtd_as;
     uint8_t bus_n, devfn;
-    uint16_t devfn_it;
 
     trace_vtd_inv_desc_cc_devices(source_id, func_mask);
 
@@ -1934,32 +1944,31 @@  static void vtd_context_device_invalidate(IntelIOMMUState *s,
     mask = ~mask;
 
     bus_n = VTD_SID_TO_BUS(source_id);
-    vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
-    if (vtd_bus) {
-        devfn = VTD_SID_TO_DEVFN(source_id);
-        for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
-            vtd_as = vtd_bus->dev_as[devfn_it];
-            if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
-                trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
-                                             VTD_PCI_FUNC(devfn_it));
-                vtd_iommu_lock(s);
-                vtd_as->context_cache_entry.context_cache_gen = 0;
-                vtd_iommu_unlock(s);
-                /*
-                 * Do switch address space when needed, in case if the
-                 * device passthrough bit is switched.
-                 */
-                vtd_switch_address_space(vtd_as);
-                /*
-                 * So a device is moving out of (or moving into) a
-                 * domain, resync the shadow page table.
-                 * This won't bring bad even if we have no such
-                 * notifier registered - the IOMMU notification
-                 * framework will skip MAP notifications if that
-                 * happened.
-                 */
-                vtd_sync_shadow_page_table(vtd_as);
-            }
+    devfn = VTD_SID_TO_DEVFN(source_id);
+
+    g_hash_table_iter_init(&as_it, s->vtd_as);
+    while (g_hash_table_iter_next(&as_it, NULL, (void**)&vtd_as)) {
+        if ((pci_bus_num(vtd_as->bus) == bus_n) &&
+            (vtd_as->devfn & mask) == (devfn & mask)) {
+            trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(vtd_as->devfn),
+                                         VTD_PCI_FUNC(vtd_as->devfn));
+            vtd_iommu_lock(s);
+            vtd_as->context_cache_entry.context_cache_gen = 0;
+            vtd_iommu_unlock(s);
+            /*
+             * Do switch address space when needed, in case if the
+             * device passthrough bit is switched.
+             */
+            vtd_switch_address_space(vtd_as);
+            /*
+             * So a device is moving out of (or moving into) a
+             * domain, resync the shadow page table.
+             * This won't bring bad even if we have no such
+             * notifier registered - the IOMMU notification
+             * framework will skip MAP notifications if that
+             * happened.
+             */
+            vtd_sync_shadow_page_table(vtd_as);
         }
     }
 }
@@ -2473,18 +2482,13 @@  static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
 {
     VTDAddressSpace *vtd_dev_as;
     IOMMUTLBEvent event;
-    struct VTDBus *vtd_bus;
     hwaddr addr;
     uint64_t sz;
     uint16_t sid;
-    uint8_t devfn;
     bool size;
-    uint8_t bus_num;
 
     addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
     sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
-    devfn = sid & 0xff;
-    bus_num = sid >> 8;
     size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
 
     if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
@@ -2495,12 +2499,11 @@  static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
         return false;
     }
 
-    vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
-    if (!vtd_bus) {
-        goto done;
-    }
-
-    vtd_dev_as = vtd_bus->dev_as[devfn];
+    /*
+     * Using sid is OK since the guest should have finished the
+     * initialization of both the bus and device.
+     */
+    vtd_dev_as = vtd_get_as_by_sid(s, sid);
     if (!vtd_dev_as) {
         goto done;
     }
@@ -3426,27 +3429,27 @@  static const MemoryRegionOps vtd_mem_ir_ops = {
 
 VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
 {
-    uintptr_t key = (uintptr_t)bus;
-    VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key);
+    /*
+     * We can't simply use sid here since the bus number might not be
+     * initialized by the guest.
+     */
+    struct vtd_as_key key = {
+        .bus = bus,
+        .devfn = devfn,
+    };
     VTDAddressSpace *vtd_dev_as;
     char name[128];
 
-    if (!vtd_bus) {
-        uintptr_t *new_key = g_malloc(sizeof(*new_key));
-        *new_key = (uintptr_t)bus;
-        /* No corresponding free() */
-        vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \
-                            PCI_DEVFN_MAX);
-        vtd_bus->bus = bus;
-        g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus);
-    }
+    vtd_dev_as = g_hash_table_lookup(s->vtd_as, &key);
+    if (!vtd_dev_as) {
+        struct vtd_as_key *new_key = g_malloc(sizeof(*new_key));
 
-    vtd_dev_as = vtd_bus->dev_as[devfn];
+        new_key->bus = bus;
+        new_key->devfn = devfn;
 
-    if (!vtd_dev_as) {
         snprintf(name, sizeof(name), "vtd-%02x.%x", PCI_SLOT(devfn),
                  PCI_FUNC(devfn));
-        vtd_bus->dev_as[devfn] = vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
+        vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
 
         vtd_dev_as->bus = bus;
         vtd_dev_as->devfn = (uint8_t)devfn;
@@ -3502,6 +3505,8 @@  VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
                                             &vtd_dev_as->nodmar, 0);
 
         vtd_switch_address_space(vtd_dev_as);
+
+        g_hash_table_insert(s->vtd_as, new_key, vtd_dev_as);
     }
     return vtd_dev_as;
 }
@@ -3875,7 +3880,6 @@  static void vtd_realize(DeviceState *dev, Error **errp)
 
     QLIST_INIT(&s->vtd_as_with_notifiers);
     qemu_mutex_init(&s->iommu_lock);
-    memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
     memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
                           "intel_iommu", DMAR_REG_SIZE);
 
@@ -3897,8 +3901,8 @@  static void vtd_realize(DeviceState *dev, Error **errp)
     /* No corresponding destroy */
     s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
                                      g_free, g_free);
-    s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
-                                              g_free, g_free);
+    s->vtd_as = g_hash_table_new_full(vtd_as_hash, vtd_as_equal,
+                                      g_free, g_free);
     vtd_init(s);
     sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
     pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 3b5ac869db..fa1bed353c 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -58,7 +58,6 @@  typedef struct VTDContextEntry VTDContextEntry;
 typedef struct VTDContextCacheEntry VTDContextCacheEntry;
 typedef struct VTDAddressSpace VTDAddressSpace;
 typedef struct VTDIOTLBEntry VTDIOTLBEntry;
-typedef struct VTDBus VTDBus;
 typedef union VTD_IR_TableEntry VTD_IR_TableEntry;
 typedef union VTD_IR_MSIAddress VTD_IR_MSIAddress;
 typedef struct VTDPASIDDirEntry VTDPASIDDirEntry;
@@ -111,12 +110,6 @@  struct VTDAddressSpace {
     IOVATree *iova_tree;          /* Traces mapped IOVA ranges */
 };
 
-struct VTDBus {
-    PCIBus* bus;		/* A reference to the bus to provide translation for */
-    /* A table of VTDAddressSpace objects indexed by devfn */
-    VTDAddressSpace *dev_as[];
-};
-
 struct VTDIOTLBEntry {
     uint64_t gfn;
     uint16_t domain_id;
@@ -253,8 +246,8 @@  struct IntelIOMMUState {
     uint32_t context_cache_gen;     /* Should be in [1,MAX] */
     GHashTable *iotlb;              /* IOTLB */
 
-    GHashTable *vtd_as_by_busptr;   /* VTDBus objects indexed by PCIBus* reference */
-    VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects indexed by bus number */
+    GHashTable *vtd_as;             /* VTD address space indexed by source id*/
+    VTDAddressSpace *vtd_as_cache[VTD_PCI_BUS_MAX]; /* VTD address space cache */
     /* list of registered notifiers */
     QLIST_HEAD(, VTDAddressSpace) vtd_as_with_notifiers;