diff mbox series

[v1,14/22] intel_iommu: bind/unbind guest page table to host

Message ID 1584880579-12178-15-git-send-email-yi.l.liu@intel.com (mailing list archive)
State New, archived
Headers show
Series intel_iommu: expose Shared Virtual Addressing to VMs | expand

Commit Message

Yi Liu March 22, 2020, 12:36 p.m. UTC
This patch captures the guest PASID table entry modifications and
propagates the changes to host to setup dual stage DMA translation.
The guest page table is configured as 1st level page table (GVA->GPA)
whose translation result would further go through host VT-d 2nd
level page table(GPA->HPA) under nested translation mode. This is the
key part of vSVA support, and also a key to support IOVA over 1st-
level page table for Intel VT-d in virtualization environment.

Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Yi Sun <yi.y.sun@linux.intel.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Richard Henderson <rth@twiddle.net>
Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
---
 hw/i386/intel_iommu.c          | 98 +++++++++++++++++++++++++++++++++++++++---
 hw/i386/intel_iommu_internal.h | 25 +++++++++++
 2 files changed, 118 insertions(+), 5 deletions(-)

Comments

Peter Xu March 24, 2020, 5:46 p.m. UTC | #1
On Sun, Mar 22, 2020 at 05:36:11AM -0700, Liu Yi L wrote:
> This patch captures the guest PASID table entry modifications and
> propagates the changes to host to setup dual stage DMA translation.
> The guest page table is configured as 1st level page table (GVA->GPA)
> whose translation result would further go through host VT-d 2nd
> level page table(GPA->HPA) under nested translation mode. This is the
> key part of vSVA support, and also a key to support IOVA over 1st-
> level page table for Intel VT-d in virtualization environment.
> 
> Cc: Kevin Tian <kevin.tian@intel.com>
> Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Cc: Peter Xu <peterx@redhat.com>
> Cc: Yi Sun <yi.y.sun@linux.intel.com>
> Cc: Paolo Bonzini <pbonzini@redhat.com>
> Cc: Richard Henderson <rth@twiddle.net>
> Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> ---
>  hw/i386/intel_iommu.c          | 98 +++++++++++++++++++++++++++++++++++++++---
>  hw/i386/intel_iommu_internal.h | 25 +++++++++++
>  2 files changed, 118 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index c985cae..0423c83 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -41,6 +41,7 @@
>  #include "migration/vmstate.h"
>  #include "trace.h"
>  #include "qemu/jhash.h"
> +#include <linux/iommu.h>
>  
>  /* context entry operations */
>  #define VTD_CE_GET_RID2PASID(ce) \
> @@ -695,6 +696,16 @@ static inline uint16_t vtd_pe_get_domain_id(VTDPASIDEntry *pe)
>      return VTD_SM_PASID_ENTRY_DID((pe)->val[1]);
>  }
>  
> +static inline uint32_t vtd_pe_get_fl_aw(VTDPASIDEntry *pe)
> +{
> +    return 48 + ((pe->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM) * 9;
> +}
> +
> +static inline dma_addr_t vtd_pe_get_flpt_base(VTDPASIDEntry *pe)
> +{
> +    return pe->val[2] & VTD_SM_PASID_ENTRY_FLPTPTR;
> +}
> +
>  static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
>  {
>      return pdire->val & 1;
> @@ -1856,6 +1867,81 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
>      vtd_iommu_replay_all(s);
>  }
>  
> +/**
> + * Caller should hold iommu_lock.
> + */
> +static int vtd_bind_guest_pasid(IntelIOMMUState *s, VTDBus *vtd_bus,
> +                                int devfn, int pasid, VTDPASIDEntry *pe,
> +                                VTDPASIDOp op)
> +{
> +    VTDHostIOMMUContext *vtd_dev_icx;
> +    HostIOMMUContext *host_icx;
> +    DualIOMMUStage1BindData *bind_data;
> +    struct iommu_gpasid_bind_data *g_bind_data;
> +    int ret = -1;
> +
> +    vtd_dev_icx = vtd_bus->dev_icx[devfn];
> +    if (!vtd_dev_icx) {
> +        return -EINVAL;
> +    }
> +
> +    host_icx = vtd_dev_icx->host_icx;
> +    if (!host_icx) {
> +        return -EINVAL;
> +    }
> +
> +    if (!(host_icx->stage1_formats
> +             & IOMMU_PASID_FORMAT_INTEL_VTD)) {
> +        error_report_once("IOMMU Stage 1 format is not compatible!\n");

Shouldn't we fail with this?

> +    }
> +
> +    bind_data = g_malloc0(sizeof(*bind_data));
> +    bind_data->pasid = pasid;
> +    g_bind_data = &bind_data->bind_data.gpasid_bind;
> +
> +    g_bind_data->flags = 0;
> +    g_bind_data->vtd.flags = 0;
> +    switch (op) {
> +    case VTD_PASID_BIND:
> +    case VTD_PASID_UPDATE:

Is VTD_PASID_UPDATE used anywhere?

But since it's called "UPDATE"... I really want to confirm with you
that the bind() to the kernel will handle the UPDATE case, right?  I
mean, we need to unbind first if there is an existing pgtable pointer.

If the answer is yes, then I think we're good, but we really need to
comment it somewhere about the fact.

> +        g_bind_data->version = IOMMU_UAPI_VERSION;
> +        g_bind_data->format = IOMMU_PASID_FORMAT_INTEL_VTD;
> +        g_bind_data->gpgd = vtd_pe_get_flpt_base(pe);
> +        g_bind_data->addr_width = vtd_pe_get_fl_aw(pe);
> +        g_bind_data->hpasid = pasid;
> +        g_bind_data->gpasid = pasid;
> +        g_bind_data->flags |= IOMMU_SVA_GPASID_VAL;
> +        g_bind_data->vtd.flags =
> +                             (VTD_SM_PASID_ENTRY_SRE_BIT(pe->val[2]) ? 1 : 0)
> +                           | (VTD_SM_PASID_ENTRY_EAFE_BIT(pe->val[2]) ? 1 : 0)
> +                           | (VTD_SM_PASID_ENTRY_PCD_BIT(pe->val[1]) ? 1 : 0)
> +                           | (VTD_SM_PASID_ENTRY_PWT_BIT(pe->val[1]) ? 1 : 0)
> +                           | (VTD_SM_PASID_ENTRY_EMTE_BIT(pe->val[1]) ? 1 : 0)
> +                           | (VTD_SM_PASID_ENTRY_CD_BIT(pe->val[1]) ? 1 : 0);
> +        g_bind_data->vtd.pat = VTD_SM_PASID_ENTRY_PAT(pe->val[1]);
> +        g_bind_data->vtd.emt = VTD_SM_PASID_ENTRY_EMT(pe->val[1]);
> +        ret = host_iommu_ctx_bind_stage1_pgtbl(host_icx, bind_data);
> +        break;
> +    case VTD_PASID_UNBIND:
> +        g_bind_data->version = IOMMU_UAPI_VERSION;
> +        g_bind_data->format = IOMMU_PASID_FORMAT_INTEL_VTD;
> +        g_bind_data->gpgd = 0;
> +        g_bind_data->addr_width = 0;
> +        g_bind_data->hpasid = pasid;
> +        g_bind_data->gpasid = pasid;
> +        g_bind_data->flags |= IOMMU_SVA_GPASID_VAL;
> +        ret = host_iommu_ctx_unbind_stage1_pgtbl(host_icx, bind_data);
> +        break;
> +    default:
> +        error_report_once("Unknown VTDPASIDOp!!!\n");
> +        break;
> +    }
> +
> +    g_free(bind_data);
> +
> +    return ret;
> +}
> +
>  /* Do a context-cache device-selective invalidation.
>   * @func_mask: FM field after shifting
>   */
> @@ -2481,10 +2567,10 @@ static inline void vtd_fill_in_pe_in_cache(IntelIOMMUState *s,
>  
>      pc_entry->pasid_entry = *pe;
>      pc_entry->pasid_cache_gen = s->pasid_cache_gen;
> -    /*
> -     * TODO:
> -     * - send pasid bind to host for passthru devices
> -     */
> +    vtd_bind_guest_pasid(s, vtd_pasid_as->vtd_bus,
> +                         vtd_pasid_as->devfn,
> +                         vtd_pasid_as->pasid,
> +                         pe, VTD_PASID_BIND);
>  }
>  
>  /**
> @@ -2574,11 +2660,13 @@ static gboolean vtd_flush_pasid(gpointer key, gpointer value,
>       * - when pasid-base-iotlb(piotlb) infrastructure is ready,
>       *   should invalidate QEMU piotlb togehter with this change.
>       */
> +
>      return false;
>  remove:
> +    vtd_bind_guest_pasid(s, vtd_bus, devfn,
> +                         pasid, NULL, VTD_PASID_UNBIND);
>      /*
>       * TODO:
> -     * - send pasid bind to host for passthru devices
>       * - when pasid-base-iotlb(piotlb) infrastructure is ready,
>       *   should invalidate QEMU piotlb togehter with this change.
>       */
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 01fd95c..4451acf 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -516,6 +516,20 @@ typedef struct VTDRootEntry VTDRootEntry;
>  #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL | ~VTD_HAW_MASK(aw))
>  #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
>  
> +enum VTD_DUAL_STAGE_UAPI {
> +    UAPI_BIND_GPASID,
> +    UAPI_NUM
> +};
> +typedef enum VTD_DUAL_STAGE_UAPI VTD_DUAL_STAGE_UAPI;
> +
> +enum VTDPASIDOp {
> +    VTD_PASID_BIND,
> +    VTD_PASID_UNBIND,
> +    VTD_PASID_UPDATE,

Same here (whether to drop?).

> +    VTD_OP_NUM
> +};
> +typedef enum VTDPASIDOp VTDPASIDOp;
> +
>  struct VTDPASIDCacheInfo {
>  #define VTD_PASID_CACHE_GLOBAL   (1ULL << 0)
>  #define VTD_PASID_CACHE_DOMSI    (1ULL << 1)
> @@ -552,6 +566,17 @@ typedef struct VTDPASIDCacheInfo VTDPASIDCacheInfo;
>  #define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted guest-address-width */
>  #define VTD_SM_PASID_ENTRY_DID(val)    ((val) & VTD_DOMAIN_ID_MASK)
>  
> +#define VTD_SM_PASID_ENTRY_FLPM          3ULL
> +#define VTD_SM_PASID_ENTRY_FLPTPTR       (~0xfffULL)
> +#define VTD_SM_PASID_ENTRY_SRE_BIT(val)  (!!((val) & 1ULL))
> +#define VTD_SM_PASID_ENTRY_EAFE_BIT(val) (!!(((val) >> 7) & 1ULL))
> +#define VTD_SM_PASID_ENTRY_PCD_BIT(val)  (!!(((val) >> 31) & 1ULL))
> +#define VTD_SM_PASID_ENTRY_PWT_BIT(val)  (!!(((val) >> 30) & 1ULL))
> +#define VTD_SM_PASID_ENTRY_EMTE_BIT(val) (!!(((val) >> 26) & 1ULL))
> +#define VTD_SM_PASID_ENTRY_CD_BIT(val)   (!!(((val) >> 25) & 1ULL))
> +#define VTD_SM_PASID_ENTRY_PAT(val)      (((val) >> 32) & 0xFFFFFFFFULL)
> +#define VTD_SM_PASID_ENTRY_EMT(val)      (((val) >> 27) & 0x7ULL)
> +
>  /* Second Level Page Translation Pointer*/
>  #define VTD_SM_PASID_ENTRY_SLPTPTR     (~0xfffULL)
>  
> -- 
> 2.7.4
>
Yi Liu March 25, 2020, 12:42 p.m. UTC | #2
> From: Peter Xu
> Sent: Wednesday, March 25, 2020 1:47 AM
> To: Liu, Yi L <yi.l.liu@intel.com>
> Subject: Re: [PATCH v1 14/22] intel_iommu: bind/unbind guest page table to host
> 
> On Sun, Mar 22, 2020 at 05:36:11AM -0700, Liu Yi L wrote:
> > This patch captures the guest PASID table entry modifications and
> > propagates the changes to host to setup dual stage DMA translation.
> > The guest page table is configured as 1st level page table (GVA->GPA)
> > whose translation result would further go through host VT-d 2nd level
> > page table(GPA->HPA) under nested translation mode. This is the key
> > part of vSVA support, and also a key to support IOVA over 1st- level
> > page table for Intel VT-d in virtualization environment.
> >
> > Cc: Kevin Tian <kevin.tian@intel.com>
> > Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > Cc: Peter Xu <peterx@redhat.com>
> > Cc: Yi Sun <yi.y.sun@linux.intel.com>
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Richard Henderson <rth@twiddle.net>
> > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > ---
> >  hw/i386/intel_iommu.c          | 98
> +++++++++++++++++++++++++++++++++++++++---
> >  hw/i386/intel_iommu_internal.h | 25 +++++++++++
> >  2 files changed, 118 insertions(+), 5 deletions(-)
> >
> > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index
> > c985cae..0423c83 100644
> > --- a/hw/i386/intel_iommu.c
> > +++ b/hw/i386/intel_iommu.c
> > @@ -41,6 +41,7 @@
> >  #include "migration/vmstate.h"
> >  #include "trace.h"
> >  #include "qemu/jhash.h"
> > +#include <linux/iommu.h>
> >
> >  /* context entry operations */
> >  #define VTD_CE_GET_RID2PASID(ce) \
> > @@ -695,6 +696,16 @@ static inline uint16_t
> vtd_pe_get_domain_id(VTDPASIDEntry *pe)
> >      return VTD_SM_PASID_ENTRY_DID((pe)->val[1]);
> >  }
> >
> > +static inline uint32_t vtd_pe_get_fl_aw(VTDPASIDEntry *pe) {
> > +    return 48 + ((pe->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM) * 9; }
> > +
> > +static inline dma_addr_t vtd_pe_get_flpt_base(VTDPASIDEntry *pe) {
> > +    return pe->val[2] & VTD_SM_PASID_ENTRY_FLPTPTR; }
> > +
> >  static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)  {
> >      return pdire->val & 1;
> > @@ -1856,6 +1867,81 @@ static void
> vtd_context_global_invalidate(IntelIOMMUState *s)
> >      vtd_iommu_replay_all(s);
> >  }
> >
> > +/**
> > + * Caller should hold iommu_lock.
> > + */
> > +static int vtd_bind_guest_pasid(IntelIOMMUState *s, VTDBus *vtd_bus,
> > +                                int devfn, int pasid, VTDPASIDEntry *pe,
> > +                                VTDPASIDOp op) {
> > +    VTDHostIOMMUContext *vtd_dev_icx;
> > +    HostIOMMUContext *host_icx;
> > +    DualIOMMUStage1BindData *bind_data;
> > +    struct iommu_gpasid_bind_data *g_bind_data;
> > +    int ret = -1;
> > +
> > +    vtd_dev_icx = vtd_bus->dev_icx[devfn];
> > +    if (!vtd_dev_icx) {
> > +        return -EINVAL;
> > +    }
> > +
> > +    host_icx = vtd_dev_icx->host_icx;
> > +    if (!host_icx) {
> > +        return -EINVAL;
> > +    }
> > +
> > +    if (!(host_icx->stage1_formats
> > +             & IOMMU_PASID_FORMAT_INTEL_VTD)) {
> > +        error_report_once("IOMMU Stage 1 format is not
> > + compatible!\n");
> 
> Shouldn't we fail with this?

oh, yes. no need to go further though host should also fail it.

> > +    }
> > +
> > +    bind_data = g_malloc0(sizeof(*bind_data));
> > +    bind_data->pasid = pasid;
> > +    g_bind_data = &bind_data->bind_data.gpasid_bind;
> > +
> > +    g_bind_data->flags = 0;
> > +    g_bind_data->vtd.flags = 0;
> > +    switch (op) {
> > +    case VTD_PASID_BIND:
> > +    case VTD_PASID_UPDATE:
> 
> Is VTD_PASID_UPDATE used anywhere?
> 
> But since it's called "UPDATE"... I really want to confirm with you that the bind() to
> the kernel will handle the UPDATE case, right?  I mean, we need to unbind first if
> there is an existing pgtable pointer.

I guess you mean host kernel. right? Actually, it's fine. host kernel
only needs to fill in the latest pgtable pointer and permission configs
to the pasid entry and then issue a cache invalidation. No need to do
unbind firstly since kernel always needs to flush cache after modifying
a pasid entry (includes valid->valid).

> 
> If the answer is yes, then I think we're good, but we really need to comment it
> somewhere about the fact.
> 
> > +        g_bind_data->version = IOMMU_UAPI_VERSION;
> > +        g_bind_data->format = IOMMU_PASID_FORMAT_INTEL_VTD;
> > +        g_bind_data->gpgd = vtd_pe_get_flpt_base(pe);
> > +        g_bind_data->addr_width = vtd_pe_get_fl_aw(pe);
> > +        g_bind_data->hpasid = pasid;
> > +        g_bind_data->gpasid = pasid;
> > +        g_bind_data->flags |= IOMMU_SVA_GPASID_VAL;
> > +        g_bind_data->vtd.flags =
> > +                             (VTD_SM_PASID_ENTRY_SRE_BIT(pe->val[2]) ? 1 : 0)
> > +                           | (VTD_SM_PASID_ENTRY_EAFE_BIT(pe->val[2]) ? 1 : 0)
> > +                           | (VTD_SM_PASID_ENTRY_PCD_BIT(pe->val[1]) ? 1 : 0)
> > +                           | (VTD_SM_PASID_ENTRY_PWT_BIT(pe->val[1]) ? 1 : 0)
> > +                           | (VTD_SM_PASID_ENTRY_EMTE_BIT(pe->val[1]) ? 1 : 0)
> > +                           | (VTD_SM_PASID_ENTRY_CD_BIT(pe->val[1]) ? 1 : 0);
> > +        g_bind_data->vtd.pat = VTD_SM_PASID_ENTRY_PAT(pe->val[1]);
> > +        g_bind_data->vtd.emt = VTD_SM_PASID_ENTRY_EMT(pe->val[1]);
> > +        ret = host_iommu_ctx_bind_stage1_pgtbl(host_icx, bind_data);
> > +        break;
> > +    case VTD_PASID_UNBIND:
> > +        g_bind_data->version = IOMMU_UAPI_VERSION;
> > +        g_bind_data->format = IOMMU_PASID_FORMAT_INTEL_VTD;
> > +        g_bind_data->gpgd = 0;
> > +        g_bind_data->addr_width = 0;
> > +        g_bind_data->hpasid = pasid;
> > +        g_bind_data->gpasid = pasid;
> > +        g_bind_data->flags |= IOMMU_SVA_GPASID_VAL;
> > +        ret = host_iommu_ctx_unbind_stage1_pgtbl(host_icx, bind_data);
> > +        break;
> > +    default:
> > +        error_report_once("Unknown VTDPASIDOp!!!\n");
> > +        break;
> > +    }
> > +
> > +    g_free(bind_data);
> > +
> > +    return ret;
> > +}
> > +
> >  /* Do a context-cache device-selective invalidation.
> >   * @func_mask: FM field after shifting
> >   */
> > @@ -2481,10 +2567,10 @@ static inline void
> > vtd_fill_in_pe_in_cache(IntelIOMMUState *s,
> >
> >      pc_entry->pasid_entry = *pe;
> >      pc_entry->pasid_cache_gen = s->pasid_cache_gen;
> > -    /*
> > -     * TODO:
> > -     * - send pasid bind to host for passthru devices
> > -     */
> > +    vtd_bind_guest_pasid(s, vtd_pasid_as->vtd_bus,
> > +                         vtd_pasid_as->devfn,
> > +                         vtd_pasid_as->pasid,
> > +                         pe, VTD_PASID_BIND);
> >  }
> >
> >  /**
> > @@ -2574,11 +2660,13 @@ static gboolean vtd_flush_pasid(gpointer key,
> gpointer value,
> >       * - when pasid-base-iotlb(piotlb) infrastructure is ready,
> >       *   should invalidate QEMU piotlb togehter with this change.
> >       */
> > +
> >      return false;
> >  remove:
> > +    vtd_bind_guest_pasid(s, vtd_bus, devfn,
> > +                         pasid, NULL, VTD_PASID_UNBIND);
> >      /*
> >       * TODO:
> > -     * - send pasid bind to host for passthru devices
> >       * - when pasid-base-iotlb(piotlb) infrastructure is ready,
> >       *   should invalidate QEMU piotlb togehter with this change.
> >       */
> > diff --git a/hw/i386/intel_iommu_internal.h
> > b/hw/i386/intel_iommu_internal.h index 01fd95c..4451acf 100644
> > --- a/hw/i386/intel_iommu_internal.h
> > +++ b/hw/i386/intel_iommu_internal.h
> > @@ -516,6 +516,20 @@ typedef struct VTDRootEntry VTDRootEntry;
> > #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL |
> ~VTD_HAW_MASK(aw))
> >  #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
> >
> > +enum VTD_DUAL_STAGE_UAPI {
> > +    UAPI_BIND_GPASID,
> > +    UAPI_NUM
> > +};
> > +typedef enum VTD_DUAL_STAGE_UAPI VTD_DUAL_STAGE_UAPI;
> > +
> > +enum VTDPASIDOp {
> > +    VTD_PASID_BIND,
> > +    VTD_PASID_UNBIND,
> > +    VTD_PASID_UPDATE,
> 
> Same here (whether to drop?).
>
If above reply doesn't make sense, may drop it.

Regards,
Yi Liu
Yi Liu March 25, 2020, 12:47 p.m. UTC | #3
> From: Peter Xu <peterx@redhat.com>
> Sent: Wednesday, March 25, 2020 1:47 AM
> To: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Wednesday, March 25, 2020 1:47 AM
> Subject: Re: [PATCH v1 14/22] intel_iommu: bind/unbind guest page table to host
> 
> On Sun, Mar 22, 2020 at 05:36:11AM -0700, Liu Yi L wrote:
> > This patch captures the guest PASID table entry modifications and
> > propagates the changes to host to setup dual stage DMA translation.
> > The guest page table is configured as 1st level page table (GVA->GPA)
> > whose translation result would further go through host VT-d 2nd level
> > page table(GPA->HPA) under nested translation mode. This is the key
> > part of vSVA support, and also a key to support IOVA over 1st- level
> > page table for Intel VT-d in virtualization environment.
> >
> > Cc: Kevin Tian <kevin.tian@intel.com>
> > Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > Cc: Peter Xu <peterx@redhat.com>
> > Cc: Yi Sun <yi.y.sun@linux.intel.com>
> > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > Cc: Richard Henderson <rth@twiddle.net>
> > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > ---
> >  hw/i386/intel_iommu.c          | 98
> +++++++++++++++++++++++++++++++++++++++---
> >  hw/i386/intel_iommu_internal.h | 25 +++++++++++
> >  2 files changed, 118 insertions(+), 5 deletions(-)
> >
> > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index
> > c985cae..0423c83 100644
> > --- a/hw/i386/intel_iommu.c
> > +++ b/hw/i386/intel_iommu.c
> > @@ -41,6 +41,7 @@
> >  #include "migration/vmstate.h"
> >  #include "trace.h"
> >  #include "qemu/jhash.h"
> > +#include <linux/iommu.h>
> >
> >  /* context entry operations */
> >  #define VTD_CE_GET_RID2PASID(ce) \
> > @@ -695,6 +696,16 @@ static inline uint16_t
> vtd_pe_get_domain_id(VTDPASIDEntry *pe)
> >      return VTD_SM_PASID_ENTRY_DID((pe)->val[1]);
> >  }
> >
[...]
> > +
> > +    bind_data = g_malloc0(sizeof(*bind_data));
> > +    bind_data->pasid = pasid;
> > +    g_bind_data = &bind_data->bind_data.gpasid_bind;
> > +
> > +    g_bind_data->flags = 0;
> > +    g_bind_data->vtd.flags = 0;
> > +    switch (op) {
> > +    case VTD_PASID_BIND:
> > +    case VTD_PASID_UPDATE:
> 
> Is VTD_PASID_UPDATE used anywhere?

Hmmm, there is update case in the code. But, this macro is not
used explicitly in this patch. Maybe I should drop it.

Regards,
Yi Liu
Peter Xu March 25, 2020, 2:56 p.m. UTC | #4
On Wed, Mar 25, 2020 at 12:42:58PM +0000, Liu, Yi L wrote:
> > From: Peter Xu
> > Sent: Wednesday, March 25, 2020 1:47 AM
> > To: Liu, Yi L <yi.l.liu@intel.com>
> > Subject: Re: [PATCH v1 14/22] intel_iommu: bind/unbind guest page table to host
> > 
> > On Sun, Mar 22, 2020 at 05:36:11AM -0700, Liu Yi L wrote:
> > > This patch captures the guest PASID table entry modifications and
> > > propagates the changes to host to setup dual stage DMA translation.
> > > The guest page table is configured as 1st level page table (GVA->GPA)
> > > whose translation result would further go through host VT-d 2nd level
> > > page table(GPA->HPA) under nested translation mode. This is the key
> > > part of vSVA support, and also a key to support IOVA over 1st- level
> > > page table for Intel VT-d in virtualization environment.
> > >
> > > Cc: Kevin Tian <kevin.tian@intel.com>
> > > Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > > Cc: Peter Xu <peterx@redhat.com>
> > > Cc: Yi Sun <yi.y.sun@linux.intel.com>
> > > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > > Cc: Richard Henderson <rth@twiddle.net>
> > > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > > ---
> > >  hw/i386/intel_iommu.c          | 98
> > +++++++++++++++++++++++++++++++++++++++---
> > >  hw/i386/intel_iommu_internal.h | 25 +++++++++++
> > >  2 files changed, 118 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index
> > > c985cae..0423c83 100644
> > > --- a/hw/i386/intel_iommu.c
> > > +++ b/hw/i386/intel_iommu.c
> > > @@ -41,6 +41,7 @@
> > >  #include "migration/vmstate.h"
> > >  #include "trace.h"
> > >  #include "qemu/jhash.h"
> > > +#include <linux/iommu.h>
> > >
> > >  /* context entry operations */
> > >  #define VTD_CE_GET_RID2PASID(ce) \
> > > @@ -695,6 +696,16 @@ static inline uint16_t
> > vtd_pe_get_domain_id(VTDPASIDEntry *pe)
> > >      return VTD_SM_PASID_ENTRY_DID((pe)->val[1]);
> > >  }
> > >
> > > +static inline uint32_t vtd_pe_get_fl_aw(VTDPASIDEntry *pe) {
> > > +    return 48 + ((pe->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM) * 9; }
> > > +
> > > +static inline dma_addr_t vtd_pe_get_flpt_base(VTDPASIDEntry *pe) {
> > > +    return pe->val[2] & VTD_SM_PASID_ENTRY_FLPTPTR; }
> > > +
> > >  static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)  {
> > >      return pdire->val & 1;
> > > @@ -1856,6 +1867,81 @@ static void
> > vtd_context_global_invalidate(IntelIOMMUState *s)
> > >      vtd_iommu_replay_all(s);
> > >  }
> > >
> > > +/**
> > > + * Caller should hold iommu_lock.
> > > + */
> > > +static int vtd_bind_guest_pasid(IntelIOMMUState *s, VTDBus *vtd_bus,
> > > +                                int devfn, int pasid, VTDPASIDEntry *pe,
> > > +                                VTDPASIDOp op) {
> > > +    VTDHostIOMMUContext *vtd_dev_icx;
> > > +    HostIOMMUContext *host_icx;
> > > +    DualIOMMUStage1BindData *bind_data;
> > > +    struct iommu_gpasid_bind_data *g_bind_data;
> > > +    int ret = -1;
> > > +
> > > +    vtd_dev_icx = vtd_bus->dev_icx[devfn];
> > > +    if (!vtd_dev_icx) {
> > > +        return -EINVAL;
> > > +    }
> > > +
> > > +    host_icx = vtd_dev_icx->host_icx;
> > > +    if (!host_icx) {
> > > +        return -EINVAL;
> > > +    }
> > > +
> > > +    if (!(host_icx->stage1_formats
> > > +             & IOMMU_PASID_FORMAT_INTEL_VTD)) {
> > > +        error_report_once("IOMMU Stage 1 format is not
> > > + compatible!\n");
> > 
> > Shouldn't we fail with this?
> 
> oh, yes. no need to go further though host should also fail it.
> 
> > > +    }
> > > +
> > > +    bind_data = g_malloc0(sizeof(*bind_data));
> > > +    bind_data->pasid = pasid;
> > > +    g_bind_data = &bind_data->bind_data.gpasid_bind;
> > > +
> > > +    g_bind_data->flags = 0;
> > > +    g_bind_data->vtd.flags = 0;
> > > +    switch (op) {
> > > +    case VTD_PASID_BIND:
> > > +    case VTD_PASID_UPDATE:
> > 
> > Is VTD_PASID_UPDATE used anywhere?
> > 
> > But since it's called "UPDATE"... I really want to confirm with you that the bind() to
> > the kernel will handle the UPDATE case, right?  I mean, we need to unbind first if
> > there is an existing pgtable pointer.
> 
> I guess you mean host kernel. right? Actually, it's fine. host kernel
> only needs to fill in the latest pgtable pointer and permission configs
> to the pasid entry and then issue a cache invalidation. No need to do
> unbind firstly since kernel always needs to flush cache after modifying
> a pasid entry (includes valid->valid).

Great.

> 
> > 
> > If the answer is yes, then I think we're good, but we really need to comment it
> > somewhere about the fact.
> > 
> > > +        g_bind_data->version = IOMMU_UAPI_VERSION;
> > > +        g_bind_data->format = IOMMU_PASID_FORMAT_INTEL_VTD;
> > > +        g_bind_data->gpgd = vtd_pe_get_flpt_base(pe);
> > > +        g_bind_data->addr_width = vtd_pe_get_fl_aw(pe);
> > > +        g_bind_data->hpasid = pasid;
> > > +        g_bind_data->gpasid = pasid;
> > > +        g_bind_data->flags |= IOMMU_SVA_GPASID_VAL;
> > > +        g_bind_data->vtd.flags =
> > > +                             (VTD_SM_PASID_ENTRY_SRE_BIT(pe->val[2]) ? 1 : 0)
> > > +                           | (VTD_SM_PASID_ENTRY_EAFE_BIT(pe->val[2]) ? 1 : 0)
> > > +                           | (VTD_SM_PASID_ENTRY_PCD_BIT(pe->val[1]) ? 1 : 0)
> > > +                           | (VTD_SM_PASID_ENTRY_PWT_BIT(pe->val[1]) ? 1 : 0)
> > > +                           | (VTD_SM_PASID_ENTRY_EMTE_BIT(pe->val[1]) ? 1 : 0)
> > > +                           | (VTD_SM_PASID_ENTRY_CD_BIT(pe->val[1]) ? 1 : 0);
> > > +        g_bind_data->vtd.pat = VTD_SM_PASID_ENTRY_PAT(pe->val[1]);
> > > +        g_bind_data->vtd.emt = VTD_SM_PASID_ENTRY_EMT(pe->val[1]);
> > > +        ret = host_iommu_ctx_bind_stage1_pgtbl(host_icx, bind_data);
> > > +        break;
> > > +    case VTD_PASID_UNBIND:
> > > +        g_bind_data->version = IOMMU_UAPI_VERSION;
> > > +        g_bind_data->format = IOMMU_PASID_FORMAT_INTEL_VTD;
> > > +        g_bind_data->gpgd = 0;
> > > +        g_bind_data->addr_width = 0;
> > > +        g_bind_data->hpasid = pasid;
> > > +        g_bind_data->gpasid = pasid;
> > > +        g_bind_data->flags |= IOMMU_SVA_GPASID_VAL;
> > > +        ret = host_iommu_ctx_unbind_stage1_pgtbl(host_icx, bind_data);
> > > +        break;
> > > +    default:
> > > +        error_report_once("Unknown VTDPASIDOp!!!\n");
> > > +        break;
> > > +    }
> > > +
> > > +    g_free(bind_data);
> > > +
> > > +    return ret;
> > > +}
> > > +
> > >  /* Do a context-cache device-selective invalidation.
> > >   * @func_mask: FM field after shifting
> > >   */
> > > @@ -2481,10 +2567,10 @@ static inline void
> > > vtd_fill_in_pe_in_cache(IntelIOMMUState *s,
> > >
> > >      pc_entry->pasid_entry = *pe;
> > >      pc_entry->pasid_cache_gen = s->pasid_cache_gen;
> > > -    /*
> > > -     * TODO:
> > > -     * - send pasid bind to host for passthru devices
> > > -     */
> > > +    vtd_bind_guest_pasid(s, vtd_pasid_as->vtd_bus,
> > > +                         vtd_pasid_as->devfn,
> > > +                         vtd_pasid_as->pasid,
> > > +                         pe, VTD_PASID_BIND);
> > >  }
> > >
> > >  /**
> > > @@ -2574,11 +2660,13 @@ static gboolean vtd_flush_pasid(gpointer key,
> > gpointer value,
> > >       * - when pasid-base-iotlb(piotlb) infrastructure is ready,
> > >       *   should invalidate QEMU piotlb togehter with this change.
> > >       */
> > > +
> > >      return false;
> > >  remove:
> > > +    vtd_bind_guest_pasid(s, vtd_bus, devfn,
> > > +                         pasid, NULL, VTD_PASID_UNBIND);
> > >      /*
> > >       * TODO:
> > > -     * - send pasid bind to host for passthru devices
> > >       * - when pasid-base-iotlb(piotlb) infrastructure is ready,
> > >       *   should invalidate QEMU piotlb togehter with this change.
> > >       */
> > > diff --git a/hw/i386/intel_iommu_internal.h
> > > b/hw/i386/intel_iommu_internal.h index 01fd95c..4451acf 100644
> > > --- a/hw/i386/intel_iommu_internal.h
> > > +++ b/hw/i386/intel_iommu_internal.h
> > > @@ -516,6 +516,20 @@ typedef struct VTDRootEntry VTDRootEntry;
> > > #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL |
> > ~VTD_HAW_MASK(aw))
> > >  #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
> > >
> > > +enum VTD_DUAL_STAGE_UAPI {
> > > +    UAPI_BIND_GPASID,
> > > +    UAPI_NUM
> > > +};
> > > +typedef enum VTD_DUAL_STAGE_UAPI VTD_DUAL_STAGE_UAPI;
> > > +
> > > +enum VTDPASIDOp {
> > > +    VTD_PASID_BIND,
> > > +    VTD_PASID_UNBIND,
> > > +    VTD_PASID_UPDATE,
> > 
> > Same here (whether to drop?).
> >
> If above reply doesn't make sense, may drop it.

Your reply makes perfect sense, but still, could we drop it because
it's not used? :)

I suggest drop UPDATE, then either:

  - comment at VTD_PASID_BIND that when binding exists, we'll update
    the entry so the caller does not need to call unbind, or,

  - rename BIND to BIND_UPDATE to show that

What do you think?
Yi Liu March 26, 2020, 3:04 a.m. UTC | #5
> From: Peter Xu <peterx@redhat.com>
> Sent: Wednesday, March 25, 2020 10:57 PM
> To: Liu, Yi L <yi.l.liu@intel.com>
> Subject: Re: [PATCH v1 14/22] intel_iommu: bind/unbind guest page table to host
> 
> On Wed, Mar 25, 2020 at 12:42:58PM +0000, Liu, Yi L wrote:
> > > From: Peter Xu
> > > Sent: Wednesday, March 25, 2020 1:47 AM
> > > To: Liu, Yi L <yi.l.liu@intel.com>
> > > Subject: Re: [PATCH v1 14/22] intel_iommu: bind/unbind guest page
> > > table to host
> > >
> > > On Sun, Mar 22, 2020 at 05:36:11AM -0700, Liu Yi L wrote:
> > > > This patch captures the guest PASID table entry modifications and
> > > > propagates the changes to host to setup dual stage DMA translation.
> > > > The guest page table is configured as 1st level page table
> > > > (GVA->GPA) whose translation result would further go through host
> > > > VT-d 2nd level page table(GPA->HPA) under nested translation mode.
> > > > This is the key part of vSVA support, and also a key to support
> > > > IOVA over 1st- level page table for Intel VT-d in virtualization environment.
> > > >
> > > > Cc: Kevin Tian <kevin.tian@intel.com>
> > > > Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
> > > > Cc: Peter Xu <peterx@redhat.com>
> > > > Cc: Yi Sun <yi.y.sun@linux.intel.com>
> > > > Cc: Paolo Bonzini <pbonzini@redhat.com>
> > > > Cc: Richard Henderson <rth@twiddle.net>
> > > > Signed-off-by: Liu Yi L <yi.l.liu@intel.com>
> > > > ---
> > > >  hw/i386/intel_iommu.c          | 98
> > > +++++++++++++++++++++++++++++++++++++++---
> > > >  hw/i386/intel_iommu_internal.h | 25 +++++++++++
> > > >  2 files changed, 118 insertions(+), 5 deletions(-)
> > > >
> > > > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index
> > > > c985cae..0423c83 100644
> > > > --- a/hw/i386/intel_iommu.c
> > > > +++ b/hw/i386/intel_iommu.c
> > > > @@ -41,6 +41,7 @@
> > > >  #include "migration/vmstate.h"
> > > >  #include "trace.h"
> > > >  #include "qemu/jhash.h"
> > > > +#include <linux/iommu.h>
> > > >
> > > >  /* context entry operations */
> > > >  #define VTD_CE_GET_RID2PASID(ce) \ @@ -695,6 +696,16 @@ static
> > > > inline uint16_t
> > > vtd_pe_get_domain_id(VTDPASIDEntry *pe)
> > > >      return VTD_SM_PASID_ENTRY_DID((pe)->val[1]);
> > > >  }
> > > >
> > > > +static inline uint32_t vtd_pe_get_fl_aw(VTDPASIDEntry *pe) {
> > > > +    return 48 + ((pe->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM) *
> > > > +9; }
> > > > +
> > > > +static inline dma_addr_t vtd_pe_get_flpt_base(VTDPASIDEntry *pe) {
> > > > +    return pe->val[2] & VTD_SM_PASID_ENTRY_FLPTPTR; }
> > > > +
> > > >  static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)  {
> > > >      return pdire->val & 1;
> > > > @@ -1856,6 +1867,81 @@ static void
> > > vtd_context_global_invalidate(IntelIOMMUState *s)
> > > >      vtd_iommu_replay_all(s);
> > > >  }
> > > >
> > > > +/**
> > > > + * Caller should hold iommu_lock.
> > > > + */
> > > > +static int vtd_bind_guest_pasid(IntelIOMMUState *s, VTDBus *vtd_bus,
> > > > +                                int devfn, int pasid, VTDPASIDEntry *pe,
> > > > +                                VTDPASIDOp op) {
> > > > +    VTDHostIOMMUContext *vtd_dev_icx;
> > > > +    HostIOMMUContext *host_icx;
> > > > +    DualIOMMUStage1BindData *bind_data;
> > > > +    struct iommu_gpasid_bind_data *g_bind_data;
> > > > +    int ret = -1;
> > > > +
> > > > +    vtd_dev_icx = vtd_bus->dev_icx[devfn];
> > > > +    if (!vtd_dev_icx) {
> > > > +        return -EINVAL;
> > > > +    }
> > > > +
> > > > +    host_icx = vtd_dev_icx->host_icx;
> > > > +    if (!host_icx) {
> > > > +        return -EINVAL;
> > > > +    }
> > > > +
> > > > +    if (!(host_icx->stage1_formats
> > > > +             & IOMMU_PASID_FORMAT_INTEL_VTD)) {
> > > > +        error_report_once("IOMMU Stage 1 format is not
> > > > + compatible!\n");
> > >
> > > Shouldn't we fail with this?
> >
> > oh, yes. no need to go further though host should also fail it.
> >
> > > > +    }
> > > > +
> > > > +    bind_data = g_malloc0(sizeof(*bind_data));
> > > > +    bind_data->pasid = pasid;
> > > > +    g_bind_data = &bind_data->bind_data.gpasid_bind;
> > > > +
> > > > +    g_bind_data->flags = 0;
> > > > +    g_bind_data->vtd.flags = 0;
> > > > +    switch (op) {
> > > > +    case VTD_PASID_BIND:
> > > > +    case VTD_PASID_UPDATE:
> > >
> > > Is VTD_PASID_UPDATE used anywhere?
> > >
> > > But since it's called "UPDATE"... I really want to confirm with you
> > > that the bind() to the kernel will handle the UPDATE case, right?  I
> > > mean, we need to unbind first if there is an existing pgtable pointer.
> >
> > I guess you mean host kernel. right? Actually, it's fine. host kernel
> > only needs to fill in the latest pgtable pointer and permission
> > configs to the pasid entry and then issue a cache invalidation. No
> > need to do unbind firstly since kernel always needs to flush cache
> > after modifying a pasid entry (includes valid->valid).
> 
> Great.
> 
> >
> > >
> > > If the answer is yes, then I think we're good, but we really need to
> > > comment it somewhere about the fact.
> > >
> > > > +        g_bind_data->version = IOMMU_UAPI_VERSION;
> > > > +        g_bind_data->format = IOMMU_PASID_FORMAT_INTEL_VTD;
> > > > +        g_bind_data->gpgd = vtd_pe_get_flpt_base(pe);
> > > > +        g_bind_data->addr_width = vtd_pe_get_fl_aw(pe);
> > > > +        g_bind_data->hpasid = pasid;
> > > > +        g_bind_data->gpasid = pasid;
> > > > +        g_bind_data->flags |= IOMMU_SVA_GPASID_VAL;
> > > > +        g_bind_data->vtd.flags =
> > > > +                             (VTD_SM_PASID_ENTRY_SRE_BIT(pe->val[2]) ? 1 : 0)
> > > > +                           | (VTD_SM_PASID_ENTRY_EAFE_BIT(pe->val[2]) ? 1 : 0)
> > > > +                           | (VTD_SM_PASID_ENTRY_PCD_BIT(pe->val[1]) ? 1 : 0)
> > > > +                           | (VTD_SM_PASID_ENTRY_PWT_BIT(pe->val[1]) ? 1 : 0)
> > > > +                           | (VTD_SM_PASID_ENTRY_EMTE_BIT(pe->val[1]) ? 1 : 0)
> > > > +                           | (VTD_SM_PASID_ENTRY_CD_BIT(pe->val[1]) ? 1 : 0);
> > > > +        g_bind_data->vtd.pat = VTD_SM_PASID_ENTRY_PAT(pe->val[1]);
> > > > +        g_bind_data->vtd.emt = VTD_SM_PASID_ENTRY_EMT(pe->val[1]);
> > > > +        ret = host_iommu_ctx_bind_stage1_pgtbl(host_icx, bind_data);
> > > > +        break;
> > > > +    case VTD_PASID_UNBIND:
> > > > +        g_bind_data->version = IOMMU_UAPI_VERSION;
> > > > +        g_bind_data->format = IOMMU_PASID_FORMAT_INTEL_VTD;
> > > > +        g_bind_data->gpgd = 0;
> > > > +        g_bind_data->addr_width = 0;
> > > > +        g_bind_data->hpasid = pasid;
> > > > +        g_bind_data->gpasid = pasid;
> > > > +        g_bind_data->flags |= IOMMU_SVA_GPASID_VAL;
> > > > +        ret = host_iommu_ctx_unbind_stage1_pgtbl(host_icx, bind_data);
> > > > +        break;
> > > > +    default:
> > > > +        error_report_once("Unknown VTDPASIDOp!!!\n");
> > > > +        break;
> > > > +    }
> > > > +
> > > > +    g_free(bind_data);
> > > > +
> > > > +    return ret;
> > > > +}
> > > > +
> > > >  /* Do a context-cache device-selective invalidation.
> > > >   * @func_mask: FM field after shifting
> > > >   */
> > > > @@ -2481,10 +2567,10 @@ static inline void
> > > > vtd_fill_in_pe_in_cache(IntelIOMMUState *s,
> > > >
> > > >      pc_entry->pasid_entry = *pe;
> > > >      pc_entry->pasid_cache_gen = s->pasid_cache_gen;
> > > > -    /*
> > > > -     * TODO:
> > > > -     * - send pasid bind to host for passthru devices
> > > > -     */
> > > > +    vtd_bind_guest_pasid(s, vtd_pasid_as->vtd_bus,
> > > > +                         vtd_pasid_as->devfn,
> > > > +                         vtd_pasid_as->pasid,
> > > > +                         pe, VTD_PASID_BIND);
> > > >  }
> > > >
> > > >  /**
> > > > @@ -2574,11 +2660,13 @@ static gboolean vtd_flush_pasid(gpointer
> > > > key,
> > > gpointer value,
> > > >       * - when pasid-base-iotlb(piotlb) infrastructure is ready,
> > > >       *   should invalidate QEMU piotlb togehter with this change.
> > > >       */
> > > > +
> > > >      return false;
> > > >  remove:
> > > > +    vtd_bind_guest_pasid(s, vtd_bus, devfn,
> > > > +                         pasid, NULL, VTD_PASID_UNBIND);
> > > >      /*
> > > >       * TODO:
> > > > -     * - send pasid bind to host for passthru devices
> > > >       * - when pasid-base-iotlb(piotlb) infrastructure is ready,
> > > >       *   should invalidate QEMU piotlb togehter with this change.
> > > >       */
> > > > diff --git a/hw/i386/intel_iommu_internal.h
> > > > b/hw/i386/intel_iommu_internal.h index 01fd95c..4451acf 100644
> > > > --- a/hw/i386/intel_iommu_internal.h
> > > > +++ b/hw/i386/intel_iommu_internal.h
> > > > @@ -516,6 +516,20 @@ typedef struct VTDRootEntry VTDRootEntry;
> > > > #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL |
> > > ~VTD_HAW_MASK(aw))
> > > >  #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
> > > >
> > > > +enum VTD_DUAL_STAGE_UAPI {
> > > > +    UAPI_BIND_GPASID,
> > > > +    UAPI_NUM
> > > > +};
> > > > +typedef enum VTD_DUAL_STAGE_UAPI VTD_DUAL_STAGE_UAPI;
> > > > +
> > > > +enum VTDPASIDOp {
> > > > +    VTD_PASID_BIND,
> > > > +    VTD_PASID_UNBIND,
> > > > +    VTD_PASID_UPDATE,
> > >
> > > Same here (whether to drop?).
> > >
> > If above reply doesn't make sense, may drop it.
> 
> Your reply makes perfect sense, but still, could we drop it because it's not used? :)
> 
> I suggest drop UPDATE, then either:
> 
>   - comment at VTD_PASID_BIND that when binding exists, we'll update
>     the entry so the caller does not need to call unbind, or,
> 
>   - rename BIND to BIND_UPDATE to show that
> 
> What do you think?

I see. At the beginning, there is explicit usage for it. But due to code
merge, there is no explicit usage now. So I can drop it. But yeah, I'll
see if it is needed when switch to use replay code for both PSI and DSI/GSI
case. If so, will apply the two options in your reply.

Regards,
Yi Liu
diff mbox series

Patch

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index c985cae..0423c83 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -41,6 +41,7 @@ 
 #include "migration/vmstate.h"
 #include "trace.h"
 #include "qemu/jhash.h"
+#include <linux/iommu.h>
 
 /* context entry operations */
 #define VTD_CE_GET_RID2PASID(ce) \
@@ -695,6 +696,16 @@  static inline uint16_t vtd_pe_get_domain_id(VTDPASIDEntry *pe)
     return VTD_SM_PASID_ENTRY_DID((pe)->val[1]);
 }
 
+static inline uint32_t vtd_pe_get_fl_aw(VTDPASIDEntry *pe)
+{
+    return 48 + ((pe->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM) * 9;
+}
+
+static inline dma_addr_t vtd_pe_get_flpt_base(VTDPASIDEntry *pe)
+{
+    return pe->val[2] & VTD_SM_PASID_ENTRY_FLPTPTR;
+}
+
 static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
 {
     return pdire->val & 1;
@@ -1856,6 +1867,81 @@  static void vtd_context_global_invalidate(IntelIOMMUState *s)
     vtd_iommu_replay_all(s);
 }
 
+/**
+ * Caller should hold iommu_lock.
+ */
+static int vtd_bind_guest_pasid(IntelIOMMUState *s, VTDBus *vtd_bus,
+                                int devfn, int pasid, VTDPASIDEntry *pe,
+                                VTDPASIDOp op)
+{
+    VTDHostIOMMUContext *vtd_dev_icx;
+    HostIOMMUContext *host_icx;
+    DualIOMMUStage1BindData *bind_data;
+    struct iommu_gpasid_bind_data *g_bind_data;
+    int ret = -1;
+
+    vtd_dev_icx = vtd_bus->dev_icx[devfn];
+    if (!vtd_dev_icx) {
+        return -EINVAL;
+    }
+
+    host_icx = vtd_dev_icx->host_icx;
+    if (!host_icx) {
+        return -EINVAL;
+    }
+
+    if (!(host_icx->stage1_formats
+             & IOMMU_PASID_FORMAT_INTEL_VTD)) {
+        error_report_once("IOMMU Stage 1 format is not compatible!\n");
+    }
+
+    bind_data = g_malloc0(sizeof(*bind_data));
+    bind_data->pasid = pasid;
+    g_bind_data = &bind_data->bind_data.gpasid_bind;
+
+    g_bind_data->flags = 0;
+    g_bind_data->vtd.flags = 0;
+    switch (op) {
+    case VTD_PASID_BIND:
+    case VTD_PASID_UPDATE:
+        g_bind_data->version = IOMMU_UAPI_VERSION;
+        g_bind_data->format = IOMMU_PASID_FORMAT_INTEL_VTD;
+        g_bind_data->gpgd = vtd_pe_get_flpt_base(pe);
+        g_bind_data->addr_width = vtd_pe_get_fl_aw(pe);
+        g_bind_data->hpasid = pasid;
+        g_bind_data->gpasid = pasid;
+        g_bind_data->flags |= IOMMU_SVA_GPASID_VAL;
+        g_bind_data->vtd.flags =
+                             (VTD_SM_PASID_ENTRY_SRE_BIT(pe->val[2]) ? 1 : 0)
+                           | (VTD_SM_PASID_ENTRY_EAFE_BIT(pe->val[2]) ? 1 : 0)
+                           | (VTD_SM_PASID_ENTRY_PCD_BIT(pe->val[1]) ? 1 : 0)
+                           | (VTD_SM_PASID_ENTRY_PWT_BIT(pe->val[1]) ? 1 : 0)
+                           | (VTD_SM_PASID_ENTRY_EMTE_BIT(pe->val[1]) ? 1 : 0)
+                           | (VTD_SM_PASID_ENTRY_CD_BIT(pe->val[1]) ? 1 : 0);
+        g_bind_data->vtd.pat = VTD_SM_PASID_ENTRY_PAT(pe->val[1]);
+        g_bind_data->vtd.emt = VTD_SM_PASID_ENTRY_EMT(pe->val[1]);
+        ret = host_iommu_ctx_bind_stage1_pgtbl(host_icx, bind_data);
+        break;
+    case VTD_PASID_UNBIND:
+        g_bind_data->version = IOMMU_UAPI_VERSION;
+        g_bind_data->format = IOMMU_PASID_FORMAT_INTEL_VTD;
+        g_bind_data->gpgd = 0;
+        g_bind_data->addr_width = 0;
+        g_bind_data->hpasid = pasid;
+        g_bind_data->gpasid = pasid;
+        g_bind_data->flags |= IOMMU_SVA_GPASID_VAL;
+        ret = host_iommu_ctx_unbind_stage1_pgtbl(host_icx, bind_data);
+        break;
+    default:
+        error_report_once("Unknown VTDPASIDOp!!!\n");
+        break;
+    }
+
+    g_free(bind_data);
+
+    return ret;
+}
+
 /* Do a context-cache device-selective invalidation.
  * @func_mask: FM field after shifting
  */
@@ -2481,10 +2567,10 @@  static inline void vtd_fill_in_pe_in_cache(IntelIOMMUState *s,
 
     pc_entry->pasid_entry = *pe;
     pc_entry->pasid_cache_gen = s->pasid_cache_gen;
-    /*
-     * TODO:
-     * - send pasid bind to host for passthru devices
-     */
+    vtd_bind_guest_pasid(s, vtd_pasid_as->vtd_bus,
+                         vtd_pasid_as->devfn,
+                         vtd_pasid_as->pasid,
+                         pe, VTD_PASID_BIND);
 }
 
 /**
@@ -2574,11 +2660,13 @@  static gboolean vtd_flush_pasid(gpointer key, gpointer value,
      * - when pasid-base-iotlb(piotlb) infrastructure is ready,
      *   should invalidate QEMU piotlb togehter with this change.
      */
+
     return false;
 remove:
+    vtd_bind_guest_pasid(s, vtd_bus, devfn,
+                         pasid, NULL, VTD_PASID_UNBIND);
     /*
      * TODO:
-     * - send pasid bind to host for passthru devices
      * - when pasid-base-iotlb(piotlb) infrastructure is ready,
      *   should invalidate QEMU piotlb togehter with this change.
      */
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 01fd95c..4451acf 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -516,6 +516,20 @@  typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL | ~VTD_HAW_MASK(aw))
 #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
 
+enum VTD_DUAL_STAGE_UAPI {
+    UAPI_BIND_GPASID,
+    UAPI_NUM
+};
+typedef enum VTD_DUAL_STAGE_UAPI VTD_DUAL_STAGE_UAPI;
+
+enum VTDPASIDOp {
+    VTD_PASID_BIND,
+    VTD_PASID_UNBIND,
+    VTD_PASID_UPDATE,
+    VTD_OP_NUM
+};
+typedef enum VTDPASIDOp VTDPASIDOp;
+
 struct VTDPASIDCacheInfo {
 #define VTD_PASID_CACHE_GLOBAL   (1ULL << 0)
 #define VTD_PASID_CACHE_DOMSI    (1ULL << 1)
@@ -552,6 +566,17 @@  typedef struct VTDPASIDCacheInfo VTDPASIDCacheInfo;
 #define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted guest-address-width */
 #define VTD_SM_PASID_ENTRY_DID(val)    ((val) & VTD_DOMAIN_ID_MASK)
 
+#define VTD_SM_PASID_ENTRY_FLPM          3ULL
+#define VTD_SM_PASID_ENTRY_FLPTPTR       (~0xfffULL)
+#define VTD_SM_PASID_ENTRY_SRE_BIT(val)  (!!((val) & 1ULL))
+#define VTD_SM_PASID_ENTRY_EAFE_BIT(val) (!!(((val) >> 7) & 1ULL))
+#define VTD_SM_PASID_ENTRY_PCD_BIT(val)  (!!(((val) >> 31) & 1ULL))
+#define VTD_SM_PASID_ENTRY_PWT_BIT(val)  (!!(((val) >> 30) & 1ULL))
+#define VTD_SM_PASID_ENTRY_EMTE_BIT(val) (!!(((val) >> 26) & 1ULL))
+#define VTD_SM_PASID_ENTRY_CD_BIT(val)   (!!(((val) >> 25) & 1ULL))
+#define VTD_SM_PASID_ENTRY_PAT(val)      (((val) >> 32) & 0xFFFFFFFFULL)
+#define VTD_SM_PASID_ENTRY_EMT(val)      (((val) >> 27) & 0x7ULL)
+
 /* Second Level Page Translation Pointer*/
 #define VTD_SM_PASID_ENTRY_SLPTPTR     (~0xfffULL)