diff mbox series

[RFC,v2,15/22] intel_iommu: bind/unbind guest page table to host

Message ID 1571920483-3382-16-git-send-email-yi.l.liu@intel.com (mailing list archive)
State New, archived
Headers show
Series intel_iommu: expose Shared Virtual Addressing to VM | expand

Commit Message

Yi Liu Oct. 24, 2019, 12:34 p.m. UTC
This patch captures the guest PASID table entry modifications and
propagates the changes to host to setup nested translation. The
guest page table is configured as 1st level page table (GVA->GPA)
whose translation result would further go through host VT-d 2nd
level page table(GPA->HPA) under nested translation mode. This is
a key part of vSVA support.

Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
---
 hw/i386/intel_iommu.c          | 81 ++++++++++++++++++++++++++++++++++++++++++
 hw/i386/intel_iommu_internal.h | 20 +++++++++++
 2 files changed, 101 insertions(+)

Comments

Peter Xu Nov. 4, 2019, 8:25 p.m. UTC | #1
On Thu, Oct 24, 2019 at 08:34:36AM -0400, Liu Yi L wrote:
> This patch captures the guest PASID table entry modifications and
> propagates the changes to host to setup nested translation. The
> guest page table is configured as 1st level page table (GVA->GPA)
> whose translation result would further go through host VT-d 2nd
> level page table(GPA->HPA) under nested translation mode. This is
> a key part of vSVA support.
> 
> Cc: Kevin Tian <kevin.tian@intel.com>
> Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Cc: Peter Xu <peterx@redhat.com>
> Cc: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> ---
>  hw/i386/intel_iommu.c          | 81 ++++++++++++++++++++++++++++++++++++++++++
>  hw/i386/intel_iommu_internal.h | 20 +++++++++++
>  2 files changed, 101 insertions(+)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index d8827c9..793b0de 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -41,6 +41,7 @@
>  #include "migration/vmstate.h"
>  #include "trace.h"
>  #include "qemu/jhash.h"
> +#include <linux/iommu.h>
>  
>  /* context entry operations */
>  #define VTD_CE_GET_RID2PASID(ce) \
> @@ -695,6 +696,16 @@ static inline uint16_t vtd_pe_get_domain_id(VTDPASIDEntry *pe)
>      return VTD_SM_PASID_ENTRY_DID((pe)->val[1]);
>  }
>  
> +static inline uint32_t vtd_pe_get_fl_aw(VTDPASIDEntry *pe)
> +{
> +    return 48 + ((pe->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM) * 9;
> +}
> +
> +static inline dma_addr_t vtd_pe_get_flpt_base(VTDPASIDEntry *pe)
> +{
> +    return pe->val[2] & VTD_SM_PASID_ENTRY_FLPTPTR;
> +}
> +
>  static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
>  {
>      return pdire->val & 1;
> @@ -1850,6 +1861,67 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
>      vtd_iommu_replay_all(s);
>  }
>  
> +static void vtd_bind_guest_pasid(IntelIOMMUState *s, VTDBus *vtd_bus,
> +            int devfn, int pasid, VTDPASIDEntry *pe, VTDPASIDOp op)
> +{
> +#ifdef __linux__
> +    VTDIOMMUContext *vtd_ic;
> +    IOMMUCTXEventData event_data;
> +    IOMMUCTXPASIDBindData bind;
> +    struct iommu_gpasid_bind_data *g_bind_data;
> +
> +    vtd_ic = vtd_bus->dev_ic[devfn];
> +    if (!vtd_ic) {
> +        return;
> +    }
> +
> +    g_bind_data = g_malloc0(sizeof(*g_bind_data));
> +    bind.flag = 0;
> +    g_bind_data->flags = 0;
> +    g_bind_data->vtd.flags = 0;
> +    switch (op) {
> +    case VTD_PASID_BIND:
> +    case VTD_PASID_UPDATE:
> +        g_bind_data->version = IOMMU_GPASID_BIND_VERSION_1;
> +        g_bind_data->format = IOMMU_PASID_FORMAT_INTEL_VTD;
> +        g_bind_data->gpgd = vtd_pe_get_flpt_base(pe);
> +        g_bind_data->addr_width = vtd_pe_get_fl_aw(pe);
> +        g_bind_data->hpasid = pasid;
> +        g_bind_data->gpasid = pasid;
> +        g_bind_data->flags |= IOMMU_SVA_GPASID_VAL;
> +        g_bind_data->vtd.flags =
> +                             (VTD_SM_PASID_ENTRY_SRE_BIT(pe->val[2]) ? 1 : 0)
> +                           | (VTD_SM_PASID_ENTRY_EAFE_BIT(pe->val[2]) ? 1 : 0)
> +                           | (VTD_SM_PASID_ENTRY_PCD_BIT(pe->val[1]) ? 1 : 0)
> +                           | (VTD_SM_PASID_ENTRY_PWT_BIT(pe->val[1]) ? 1 : 0)
> +                           | (VTD_SM_PASID_ENTRY_EMTE_BIT(pe->val[1]) ? 1 : 0)
> +                           | (VTD_SM_PASID_ENTRY_CD_BIT(pe->val[1]) ? 1 : 0);
> +        g_bind_data->vtd.pat = VTD_SM_PASID_ENTRY_PAT(pe->val[1]);
> +        g_bind_data->vtd.emt = VTD_SM_PASID_ENTRY_EMT(pe->val[1]);
> +        bind.flag |= IOMMU_CTX_BIND_PASID;
> +        break;
> +
> +    case VTD_PASID_UNBIND:
> +        g_bind_data->gpgd = 0;
> +        g_bind_data->addr_width = 0;
> +        g_bind_data->hpasid = pasid;
> +        bind.flag |= IOMMU_CTX_UNBIND_PASID;
> +        break;
> +
> +    default:
> +        printf("Unknown VTDPASIDOp!!\n");

Please don't use printf()..  Here assert() suits.

> +        break;
> +    }
> +    if (bind.flag) {

Will this be untrue?  If not, assert() works too.

> +        event_data.event = IOMMU_CTX_EVENT_PASID_BIND;
> +        bind.data = g_bind_data;
> +        event_data.data = &bind;
> +        iommu_ctx_event_notify(&vtd_ic->iommu_context, &event_data);
> +    }
> +    g_free(g_bind_data);
> +#endif
> +}
> +
>  /* Do a context-cache device-selective invalidation.
>   * @func_mask: FM field after shifting
>   */
> @@ -2528,12 +2600,17 @@ static gboolean vtd_flush_pasid(gpointer key, gpointer value,
>                  pc_entry->pasid_cache_gen = s->pasid_cache_gen;
>                  if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
>                      pc_entry->pasid_entry = pe;
> +                    vtd_bind_guest_pasid(s, vtd_bus, devfn,
> +                                     pasid, &pe, VTD_PASID_UPDATE);
>                      /*
>                       * TODO: when pasid-base-iotlb(piotlb) infrastructure is
>                       * ready, should invalidate QEMU piotlb togehter with this
>                       * change.
>                       */
>                  }
> +            } else {
> +                vtd_bind_guest_pasid(s, vtd_bus, devfn,
> +                                  pasid, NULL, VTD_PASID_UNBIND);

Please see the reply in the other thread on vtd_flush_pasid().  I've
filled in where I feel like this UNBIND should exist, I feel like your
current code could miss some places where you should unbind but didn't.

>              }
>          }
>      }
> @@ -2623,6 +2700,10 @@ static inline void vtd_fill_in_pe_cache(
>  
>      pc_entry->pasid_entry = *pe;
>      pc_entry->pasid_cache_gen = s->pasid_cache_gen;
> +    vtd_bind_guest_pasid(s, vtd_pasid_as->vtd_bus,
> +                         vtd_pasid_as->devfn,
> +                         vtd_pasid_as->pasid,
> +                         pe, VTD_PASID_UPDATE);
>  }
>  
>  static int vtd_pasid_cache_psi(IntelIOMMUState *s,
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 12873e1..13e02e8 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -483,6 +483,14 @@ struct VTDRootEntry {
>  };
>  typedef struct VTDRootEntry VTDRootEntry;
>  
> +enum VTDPASIDOp {
> +    VTD_PASID_BIND,
> +    VTD_PASID_UNBIND,
> +    VTD_PASID_UPDATE,
> +    VTD_OP_NUM
> +};
> +typedef enum VTDPASIDOp VTDPASIDOp;
> +
>  struct VTDPASIDCacheInfo {
>  #define VTD_PASID_CACHE_DOMSI   (1ULL << 0);
>  #define VTD_PASID_CACHE_PASIDSI (1ULL << 1);
> @@ -549,6 +557,18 @@ typedef struct VTDPASIDCacheInfo VTDPASIDCacheInfo;
>  #define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted guest-address-width */
>  #define VTD_SM_PASID_ENTRY_DID(val)    ((val) & VTD_DOMAIN_ID_MASK)
>  
> +/* Adjusted guest-address-width */
> +#define VTD_SM_PASID_ENTRY_FLPM          3ULL
> +#define VTD_SM_PASID_ENTRY_FLPTPTR       (~0xfffULL)
> +#define VTD_SM_PASID_ENTRY_SRE_BIT(val)  (!!((val) & 1ULL))
> +#define VTD_SM_PASID_ENTRY_EAFE_BIT(val) (!!(((val) >> 7) & 1ULL))
> +#define VTD_SM_PASID_ENTRY_PCD_BIT(val)  (!!(((val) >> 31) & 1ULL))
> +#define VTD_SM_PASID_ENTRY_PWT_BIT(val)  (!!(((val) >> 30) & 1ULL))
> +#define VTD_SM_PASID_ENTRY_EMTE_BIT(val) (!!(((val) >> 26) & 1ULL))
> +#define VTD_SM_PASID_ENTRY_CD_BIT(val)   (!!(((val) >> 25) & 1ULL))
> +#define VTD_SM_PASID_ENTRY_PAT(val)      (((val) >> 32) & 0xFFFFFFFFULL)
> +#define VTD_SM_PASID_ENTRY_EMT(val)      (((val) >> 27) & 0x7ULL)
> +
>  /* Second Level Page Translation Pointer*/
>  #define VTD_SM_PASID_ENTRY_SLPTPTR     (~0xfffULL)
>  
> -- 
> 2.7.4
>
Yi Liu Nov. 6, 2019, 8:10 a.m. UTC | #2
> From: Peter Xu [mailto:peterx@redhat.com]
> Sent: Tuesday, November 5, 2019 4:26 AM
> To: Liu, Yi L <yi.l.liu@intel.com>
> Subject: Re: [RFC v2 15/22] intel_iommu: bind/unbind guest page table to host
> 
> On Thu, Oct 24, 2019 at 08:34:36AM -0400, Liu Yi L wrote:
> > This patch captures the guest PASID table entry modifications and
> > propagates the changes to host to setup nested translation. The
> > guest page table is configured as 1st level page table (GVA->GPA)
> > whose translation result would further go through host VT-d 2nd
> > level page table(GPA->HPA) under nested translation mode. This is
> > a key part of vSVA support.
> >
> > Cc: Kevin Tian <kevin.tian@intel.com>
> > Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > Cc: Peter Xu <peterx@redhat.com>
> > Cc: Yi Sun <yi.y.sun@linux.intel.com>
> > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > ---
> >  hw/i386/intel_iommu.c          | 81
> ++++++++++++++++++++++++++++++++++++++++++
> >  hw/i386/intel_iommu_internal.h | 20 +++++++++++
> >  2 files changed, 101 insertions(+)
> >
> > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > index d8827c9..793b0de 100644
> > --- a/hw/i386/intel_iommu.c
> > +++ b/hw/i386/intel_iommu.c
> > @@ -41,6 +41,7 @@
> >  #include "migration/vmstate.h"
> >  #include "trace.h"
> >  #include "qemu/jhash.h"
> > +#include <linux/iommu.h>
> >
> >  /* context entry operations */
> >  #define VTD_CE_GET_RID2PASID(ce) \
> > @@ -695,6 +696,16 @@ static inline uint16_t
> vtd_pe_get_domain_id(VTDPASIDEntry *pe)
> >      return VTD_SM_PASID_ENTRY_DID((pe)->val[1]);
> >  }
> >
> > +static inline uint32_t vtd_pe_get_fl_aw(VTDPASIDEntry *pe)
> > +{
> > +    return 48 + ((pe->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM) * 9;
> > +}
> > +
> > +static inline dma_addr_t vtd_pe_get_flpt_base(VTDPASIDEntry *pe)
> > +{
> > +    return pe->val[2] & VTD_SM_PASID_ENTRY_FLPTPTR;
> > +}
> > +
> >  static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
> >  {
> >      return pdire->val & 1;
> > @@ -1850,6 +1861,67 @@ static void
> vtd_context_global_invalidate(IntelIOMMUState *s)
> >      vtd_iommu_replay_all(s);
> >  }
> >
> > +static void vtd_bind_guest_pasid(IntelIOMMUState *s, VTDBus *vtd_bus,
> > +            int devfn, int pasid, VTDPASIDEntry *pe, VTDPASIDOp op)
> > +{
> > +#ifdef __linux__
> > +    VTDIOMMUContext *vtd_ic;
> > +    IOMMUCTXEventData event_data;
> > +    IOMMUCTXPASIDBindData bind;
> > +    struct iommu_gpasid_bind_data *g_bind_data;
> > +
> > +    vtd_ic = vtd_bus->dev_ic[devfn];
> > +    if (!vtd_ic) {
> > +        return;
> > +    }
> > +
> > +    g_bind_data = g_malloc0(sizeof(*g_bind_data));
> > +    bind.flag = 0;
> > +    g_bind_data->flags = 0;
> > +    g_bind_data->vtd.flags = 0;
> > +    switch (op) {
> > +    case VTD_PASID_BIND:
> > +    case VTD_PASID_UPDATE:
> > +        g_bind_data->version = IOMMU_GPASID_BIND_VERSION_1;
> > +        g_bind_data->format = IOMMU_PASID_FORMAT_INTEL_VTD;
> > +        g_bind_data->gpgd = vtd_pe_get_flpt_base(pe);
> > +        g_bind_data->addr_width = vtd_pe_get_fl_aw(pe);
> > +        g_bind_data->hpasid = pasid;
> > +        g_bind_data->gpasid = pasid;
> > +        g_bind_data->flags |= IOMMU_SVA_GPASID_VAL;
> > +        g_bind_data->vtd.flags =
> > +                             (VTD_SM_PASID_ENTRY_SRE_BIT(pe->val[2]) ? 1 : 0)
> > +                           | (VTD_SM_PASID_ENTRY_EAFE_BIT(pe->val[2]) ? 1 : 0)
> > +                           | (VTD_SM_PASID_ENTRY_PCD_BIT(pe->val[1]) ? 1 : 0)
> > +                           | (VTD_SM_PASID_ENTRY_PWT_BIT(pe->val[1]) ? 1 : 0)
> > +                           | (VTD_SM_PASID_ENTRY_EMTE_BIT(pe->val[1]) ? 1 : 0)
> > +                           | (VTD_SM_PASID_ENTRY_CD_BIT(pe->val[1]) ? 1 : 0);
> > +        g_bind_data->vtd.pat = VTD_SM_PASID_ENTRY_PAT(pe->val[1]);
> > +        g_bind_data->vtd.emt = VTD_SM_PASID_ENTRY_EMT(pe->val[1]);
> > +        bind.flag |= IOMMU_CTX_BIND_PASID;
> > +        break;
> > +
> > +    case VTD_PASID_UNBIND:
> > +        g_bind_data->gpgd = 0;
> > +        g_bind_data->addr_width = 0;
> > +        g_bind_data->hpasid = pasid;
> > +        bind.flag |= IOMMU_CTX_UNBIND_PASID;
> > +        break;
> > +
> > +    default:
> > +        printf("Unknown VTDPASIDOp!!\n");
> 
> Please don't use printf()..  Here assert() suits.

Will correct it. Thanks.

> 
> > +        break;
> > +    }
> > +    if (bind.flag) {
> 
> Will this be untrue?  If not, assert() works too.

yes, it is possible. If an unknown VTDPASIDOp, then no switch case
will initiate bind.flag.

> > +        event_data.event = IOMMU_CTX_EVENT_PASID_BIND;
> > +        bind.data = g_bind_data;
> > +        event_data.data = &bind;
> > +        iommu_ctx_event_notify(&vtd_ic->iommu_context, &event_data);
> > +    }
> > +    g_free(g_bind_data);
> > +#endif
> > +}
> > +
> >  /* Do a context-cache device-selective invalidation.
> >   * @func_mask: FM field after shifting
> >   */
> > @@ -2528,12 +2600,17 @@ static gboolean vtd_flush_pasid(gpointer key,
> gpointer value,
> >                  pc_entry->pasid_cache_gen = s->pasid_cache_gen;
> >                  if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
> >                      pc_entry->pasid_entry = pe;
> > +                    vtd_bind_guest_pasid(s, vtd_bus, devfn,
> > +                                     pasid, &pe, VTD_PASID_UPDATE);
> >                      /*
> >                       * TODO: when pasid-base-iotlb(piotlb) infrastructure is
> >                       * ready, should invalidate QEMU piotlb togehter with this
> >                       * change.
> >                       */
> >                  }
> > +            } else {
> > +                vtd_bind_guest_pasid(s, vtd_bus, devfn,
> > +                                  pasid, NULL, VTD_PASID_UNBIND);
> 
> Please see the reply in the other thread on vtd_flush_pasid().  I've
> filled in where I feel like this UNBIND should exist, I feel like your
> current code could miss some places where you should unbind but didn't.

I've replied in that thread regards to your comments. May you
reconsider it here. Hope, it suits what you thought. If still
something missed, pls feel free to point out.

Regards,
Yi Liu
Peter Xu Nov. 6, 2019, 2:27 p.m. UTC | #3
On Wed, Nov 06, 2019 at 08:10:59AM +0000, Liu, Yi L wrote:
> > From: Peter Xu [mailto:peterx@redhat.com]
> > Sent: Tuesday, November 5, 2019 4:26 AM
> > To: Liu, Yi L <yi.l.liu@intel.com>
> > Subject: Re: [RFC v2 15/22] intel_iommu: bind/unbind guest page table to host
> > 
> > On Thu, Oct 24, 2019 at 08:34:36AM -0400, Liu Yi L wrote:
> > > This patch captures the guest PASID table entry modifications and
> > > propagates the changes to host to setup nested translation. The
> > > guest page table is configured as 1st level page table (GVA->GPA)
> > > whose translation result would further go through host VT-d 2nd
> > > level page table(GPA->HPA) under nested translation mode. This is
> > > a key part of vSVA support.
> > >
> > > Cc: Kevin Tian <kevin.tian@intel.com>
> > > Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > > Cc: Peter Xu <peterx@redhat.com>
> > > Cc: Yi Sun <yi.y.sun@linux.intel.com>
> > > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > > ---
> > >  hw/i386/intel_iommu.c          | 81
> > ++++++++++++++++++++++++++++++++++++++++++
> > >  hw/i386/intel_iommu_internal.h | 20 +++++++++++
> > >  2 files changed, 101 insertions(+)
> > >
> > > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> > > index d8827c9..793b0de 100644
> > > --- a/hw/i386/intel_iommu.c
> > > +++ b/hw/i386/intel_iommu.c
> > > @@ -41,6 +41,7 @@
> > >  #include "migration/vmstate.h"
> > >  #include "trace.h"
> > >  #include "qemu/jhash.h"
> > > +#include <linux/iommu.h>
> > >
> > >  /* context entry operations */
> > >  #define VTD_CE_GET_RID2PASID(ce) \
> > > @@ -695,6 +696,16 @@ static inline uint16_t
> > vtd_pe_get_domain_id(VTDPASIDEntry *pe)
> > >      return VTD_SM_PASID_ENTRY_DID((pe)->val[1]);
> > >  }
> > >
> > > +static inline uint32_t vtd_pe_get_fl_aw(VTDPASIDEntry *pe)
> > > +{
> > > +    return 48 + ((pe->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM) * 9;
> > > +}
> > > +
> > > +static inline dma_addr_t vtd_pe_get_flpt_base(VTDPASIDEntry *pe)
> > > +{
> > > +    return pe->val[2] & VTD_SM_PASID_ENTRY_FLPTPTR;
> > > +}
> > > +
> > >  static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
> > >  {
> > >      return pdire->val & 1;
> > > @@ -1850,6 +1861,67 @@ static void
> > vtd_context_global_invalidate(IntelIOMMUState *s)
> > >      vtd_iommu_replay_all(s);
> > >  }
> > >
> > > +static void vtd_bind_guest_pasid(IntelIOMMUState *s, VTDBus *vtd_bus,
> > > +            int devfn, int pasid, VTDPASIDEntry *pe, VTDPASIDOp op)
> > > +{
> > > +#ifdef __linux__
> > > +    VTDIOMMUContext *vtd_ic;
> > > +    IOMMUCTXEventData event_data;
> > > +    IOMMUCTXPASIDBindData bind;
> > > +    struct iommu_gpasid_bind_data *g_bind_data;
> > > +
> > > +    vtd_ic = vtd_bus->dev_ic[devfn];
> > > +    if (!vtd_ic) {
> > > +        return;
> > > +    }
> > > +
> > > +    g_bind_data = g_malloc0(sizeof(*g_bind_data));
> > > +    bind.flag = 0;
> > > +    g_bind_data->flags = 0;
> > > +    g_bind_data->vtd.flags = 0;
> > > +    switch (op) {
> > > +    case VTD_PASID_BIND:
> > > +    case VTD_PASID_UPDATE:
> > > +        g_bind_data->version = IOMMU_GPASID_BIND_VERSION_1;
> > > +        g_bind_data->format = IOMMU_PASID_FORMAT_INTEL_VTD;
> > > +        g_bind_data->gpgd = vtd_pe_get_flpt_base(pe);
> > > +        g_bind_data->addr_width = vtd_pe_get_fl_aw(pe);
> > > +        g_bind_data->hpasid = pasid;
> > > +        g_bind_data->gpasid = pasid;
> > > +        g_bind_data->flags |= IOMMU_SVA_GPASID_VAL;
> > > +        g_bind_data->vtd.flags =
> > > +                             (VTD_SM_PASID_ENTRY_SRE_BIT(pe->val[2]) ? 1 : 0)
> > > +                           | (VTD_SM_PASID_ENTRY_EAFE_BIT(pe->val[2]) ? 1 : 0)
> > > +                           | (VTD_SM_PASID_ENTRY_PCD_BIT(pe->val[1]) ? 1 : 0)
> > > +                           | (VTD_SM_PASID_ENTRY_PWT_BIT(pe->val[1]) ? 1 : 0)
> > > +                           | (VTD_SM_PASID_ENTRY_EMTE_BIT(pe->val[1]) ? 1 : 0)
> > > +                           | (VTD_SM_PASID_ENTRY_CD_BIT(pe->val[1]) ? 1 : 0);
> > > +        g_bind_data->vtd.pat = VTD_SM_PASID_ENTRY_PAT(pe->val[1]);
> > > +        g_bind_data->vtd.emt = VTD_SM_PASID_ENTRY_EMT(pe->val[1]);
> > > +        bind.flag |= IOMMU_CTX_BIND_PASID;
> > > +        break;
> > > +
> > > +    case VTD_PASID_UNBIND:
> > > +        g_bind_data->gpgd = 0;
> > > +        g_bind_data->addr_width = 0;
> > > +        g_bind_data->hpasid = pasid;
> > > +        bind.flag |= IOMMU_CTX_UNBIND_PASID;
> > > +        break;
> > > +
> > > +    default:
> > > +        printf("Unknown VTDPASIDOp!!\n");
> > 
> > Please don't use printf()..  Here assert() suits.
> 
> Will correct it. Thanks.
> 
> > 
> > > +        break;
> > > +    }
> > > +    if (bind.flag) {
> > 
> > Will this be untrue?  If not, assert() works too.
> 
> yes, it is possible. If an unknown VTDPASIDOp, then no switch case
> will initiate bind.flag.

Then should it be a programming error?  If so we should still use
assert(), I think...

> 
> > > +        event_data.event = IOMMU_CTX_EVENT_PASID_BIND;
> > > +        bind.data = g_bind_data;
> > > +        event_data.data = &bind;
> > > +        iommu_ctx_event_notify(&vtd_ic->iommu_context, &event_data);
> > > +    }
> > > +    g_free(g_bind_data);
> > > +#endif
> > > +}
> > > +
> > >  /* Do a context-cache device-selective invalidation.
> > >   * @func_mask: FM field after shifting
> > >   */
> > > @@ -2528,12 +2600,17 @@ static gboolean vtd_flush_pasid(gpointer key,
> > gpointer value,
> > >                  pc_entry->pasid_cache_gen = s->pasid_cache_gen;
> > >                  if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
> > >                      pc_entry->pasid_entry = pe;
> > > +                    vtd_bind_guest_pasid(s, vtd_bus, devfn,
> > > +                                     pasid, &pe, VTD_PASID_UPDATE);
> > >                      /*
> > >                       * TODO: when pasid-base-iotlb(piotlb) infrastructure is
> > >                       * ready, should invalidate QEMU piotlb togehter with this
> > >                       * change.
> > >                       */
> > >                  }
> > > +            } else {
> > > +                vtd_bind_guest_pasid(s, vtd_bus, devfn,
> > > +                                  pasid, NULL, VTD_PASID_UNBIND);
> > 
> > Please see the reply in the other thread on vtd_flush_pasid().  I've
> > filled in where I feel like this UNBIND should exist, I feel like your
> > current code could miss some places where you should unbind but didn't.
> 
> I've replied in that thread regards to your comments. May you
> reconsider it here. Hope, it suits what you thought. If still
> something missed, pls feel free to point out.

Ok let's wait to see your next version.  Thanks,
diff mbox series

Patch

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index d8827c9..793b0de 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -41,6 +41,7 @@ 
 #include "migration/vmstate.h"
 #include "trace.h"
 #include "qemu/jhash.h"
+#include <linux/iommu.h>
 
 /* context entry operations */
 #define VTD_CE_GET_RID2PASID(ce) \
@@ -695,6 +696,16 @@  static inline uint16_t vtd_pe_get_domain_id(VTDPASIDEntry *pe)
     return VTD_SM_PASID_ENTRY_DID((pe)->val[1]);
 }
 
+static inline uint32_t vtd_pe_get_fl_aw(VTDPASIDEntry *pe)
+{
+    return 48 + ((pe->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM) * 9;
+}
+
+static inline dma_addr_t vtd_pe_get_flpt_base(VTDPASIDEntry *pe)
+{
+    return pe->val[2] & VTD_SM_PASID_ENTRY_FLPTPTR;
+}
+
 static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
 {
     return pdire->val & 1;
@@ -1850,6 +1861,67 @@  static void vtd_context_global_invalidate(IntelIOMMUState *s)
     vtd_iommu_replay_all(s);
 }
 
+static void vtd_bind_guest_pasid(IntelIOMMUState *s, VTDBus *vtd_bus,
+            int devfn, int pasid, VTDPASIDEntry *pe, VTDPASIDOp op)
+{
+#ifdef __linux__
+    VTDIOMMUContext *vtd_ic;
+    IOMMUCTXEventData event_data;
+    IOMMUCTXPASIDBindData bind;
+    struct iommu_gpasid_bind_data *g_bind_data;
+
+    vtd_ic = vtd_bus->dev_ic[devfn];
+    if (!vtd_ic) {
+        return;
+    }
+
+    g_bind_data = g_malloc0(sizeof(*g_bind_data));
+    bind.flag = 0;
+    g_bind_data->flags = 0;
+    g_bind_data->vtd.flags = 0;
+    switch (op) {
+    case VTD_PASID_BIND:
+    case VTD_PASID_UPDATE:
+        g_bind_data->version = IOMMU_GPASID_BIND_VERSION_1;
+        g_bind_data->format = IOMMU_PASID_FORMAT_INTEL_VTD;
+        g_bind_data->gpgd = vtd_pe_get_flpt_base(pe);
+        g_bind_data->addr_width = vtd_pe_get_fl_aw(pe);
+        g_bind_data->hpasid = pasid;
+        g_bind_data->gpasid = pasid;
+        g_bind_data->flags |= IOMMU_SVA_GPASID_VAL;
+        g_bind_data->vtd.flags =
+                             (VTD_SM_PASID_ENTRY_SRE_BIT(pe->val[2]) ? 1 : 0)
+                           | (VTD_SM_PASID_ENTRY_EAFE_BIT(pe->val[2]) ? 1 : 0)
+                           | (VTD_SM_PASID_ENTRY_PCD_BIT(pe->val[1]) ? 1 : 0)
+                           | (VTD_SM_PASID_ENTRY_PWT_BIT(pe->val[1]) ? 1 : 0)
+                           | (VTD_SM_PASID_ENTRY_EMTE_BIT(pe->val[1]) ? 1 : 0)
+                           | (VTD_SM_PASID_ENTRY_CD_BIT(pe->val[1]) ? 1 : 0);
+        g_bind_data->vtd.pat = VTD_SM_PASID_ENTRY_PAT(pe->val[1]);
+        g_bind_data->vtd.emt = VTD_SM_PASID_ENTRY_EMT(pe->val[1]);
+        bind.flag |= IOMMU_CTX_BIND_PASID;
+        break;
+
+    case VTD_PASID_UNBIND:
+        g_bind_data->gpgd = 0;
+        g_bind_data->addr_width = 0;
+        g_bind_data->hpasid = pasid;
+        bind.flag |= IOMMU_CTX_UNBIND_PASID;
+        break;
+
+    default:
+        printf("Unknown VTDPASIDOp!!\n");
+        break;
+    }
+    if (bind.flag) {
+        event_data.event = IOMMU_CTX_EVENT_PASID_BIND;
+        bind.data = g_bind_data;
+        event_data.data = &bind;
+        iommu_ctx_event_notify(&vtd_ic->iommu_context, &event_data);
+    }
+    g_free(g_bind_data);
+#endif
+}
+
 /* Do a context-cache device-selective invalidation.
  * @func_mask: FM field after shifting
  */
@@ -2528,12 +2600,17 @@  static gboolean vtd_flush_pasid(gpointer key, gpointer value,
                 pc_entry->pasid_cache_gen = s->pasid_cache_gen;
                 if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
                     pc_entry->pasid_entry = pe;
+                    vtd_bind_guest_pasid(s, vtd_bus, devfn,
+                                     pasid, &pe, VTD_PASID_UPDATE);
                     /*
                      * TODO: when pasid-base-iotlb(piotlb) infrastructure is
                      * ready, should invalidate QEMU piotlb togehter with this
                      * change.
                      */
                 }
+            } else {
+                vtd_bind_guest_pasid(s, vtd_bus, devfn,
+                                  pasid, NULL, VTD_PASID_UNBIND);
             }
         }
     }
@@ -2623,6 +2700,10 @@  static inline void vtd_fill_in_pe_cache(
 
     pc_entry->pasid_entry = *pe;
     pc_entry->pasid_cache_gen = s->pasid_cache_gen;
+    vtd_bind_guest_pasid(s, vtd_pasid_as->vtd_bus,
+                         vtd_pasid_as->devfn,
+                         vtd_pasid_as->pasid,
+                         pe, VTD_PASID_UPDATE);
 }
 
 static int vtd_pasid_cache_psi(IntelIOMMUState *s,
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 12873e1..13e02e8 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -483,6 +483,14 @@  struct VTDRootEntry {
 };
 typedef struct VTDRootEntry VTDRootEntry;
 
+enum VTDPASIDOp {
+    VTD_PASID_BIND,
+    VTD_PASID_UNBIND,
+    VTD_PASID_UPDATE,
+    VTD_OP_NUM
+};
+typedef enum VTDPASIDOp VTDPASIDOp;
+
 struct VTDPASIDCacheInfo {
 #define VTD_PASID_CACHE_DOMSI   (1ULL << 0);
 #define VTD_PASID_CACHE_PASIDSI (1ULL << 1);
@@ -549,6 +557,18 @@  typedef struct VTDPASIDCacheInfo VTDPASIDCacheInfo;
 #define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted guest-address-width */
 #define VTD_SM_PASID_ENTRY_DID(val)    ((val) & VTD_DOMAIN_ID_MASK)
 
+/* Adjusted guest-address-width */
+#define VTD_SM_PASID_ENTRY_FLPM          3ULL
+#define VTD_SM_PASID_ENTRY_FLPTPTR       (~0xfffULL)
+#define VTD_SM_PASID_ENTRY_SRE_BIT(val)  (!!((val) & 1ULL))
+#define VTD_SM_PASID_ENTRY_EAFE_BIT(val) (!!(((val) >> 7) & 1ULL))
+#define VTD_SM_PASID_ENTRY_PCD_BIT(val)  (!!(((val) >> 31) & 1ULL))
+#define VTD_SM_PASID_ENTRY_PWT_BIT(val)  (!!(((val) >> 30) & 1ULL))
+#define VTD_SM_PASID_ENTRY_EMTE_BIT(val) (!!(((val) >> 26) & 1ULL))
+#define VTD_SM_PASID_ENTRY_CD_BIT(val)   (!!(((val) >> 25) & 1ULL))
+#define VTD_SM_PASID_ENTRY_PAT(val)      (((val) >> 32) & 0xFFFFFFFFULL)
+#define VTD_SM_PASID_ENTRY_EMT(val)      (((val) >> 27) & 0x7ULL)
+
 /* Second Level Page Translation Pointer*/
 #define VTD_SM_PASID_ENTRY_SLPTPTR     (~0xfffULL)