diff mbox series

vdpa: Add support for no-IOMMU mode

Message ID 20240530101823.1210161-1-schalla@marvell.com (mailing list archive)
State New, archived
Headers show
Series vdpa: Add support for no-IOMMU mode | expand

Commit Message

Srujana Challa May 30, 2024, 10:18 a.m. UTC
This commit introduces support for an UNSAFE, no-IOMMU mode in the
vhost-vdpa driver. When enabled, this mode provides no device isolation,
no DMA translation, no host kernel protection, and cannot be used for
device assignment to virtual machines. It requires RAWIO permissions
and will taint the kernel.
This mode requires enabling the "enable_vhost_vdpa_unsafe_noiommu_mode"
option on the vhost-vdpa driver. This mode would be useful to get
better performance on specifice low end machines and can be leveraged
by embedded platforms where applications run in controlled environment.

Signed-off-by: Srujana Challa <schalla@marvell.com>
---
 drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

Comments

Jason Wang May 31, 2024, 2:26 a.m. UTC | #1
On Thu, May 30, 2024 at 6:18 PM Srujana Challa <schalla@marvell.com> wrote:
>
> This commit introduces support for an UNSAFE, no-IOMMU mode in the
> vhost-vdpa driver. When enabled, this mode provides no device isolation,
> no DMA translation, no host kernel protection, and cannot be used for
> device assignment to virtual machines. It requires RAWIO permissions
> and will taint the kernel.
> This mode requires enabling the "enable_vhost_vdpa_unsafe_noiommu_mode"
> option on the vhost-vdpa driver. This mode would be useful to get
> better performance on specifice low end machines and can be leveraged
> by embedded platforms where applications run in controlled environment.

I wonder if it's better to do it per driver:

1) we have device that use its own IOMMU, one example is the mlx5 vDPA device
2) we have software devices which doesn't require IOMMU at all (but
still with protection)

Thanks

>
> Signed-off-by: Srujana Challa <schalla@marvell.com>
> ---
>  drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
>  1 file changed, 23 insertions(+)
>
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index bc4a51e4638b..d071c30125aa 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -36,6 +36,11 @@ enum {
>
>  #define VHOST_VDPA_IOTLB_BUCKETS 16
>
> +bool vhost_vdpa_noiommu;
> +module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
> +                  vhost_vdpa_noiommu, bool, 0644);
> +MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
> +
>  struct vhost_vdpa_as {
>         struct hlist_node hash_link;
>         struct vhost_iotlb iotlb;
> @@ -60,6 +65,7 @@ struct vhost_vdpa {
>         struct vdpa_iova_range range;
>         u32 batch_asid;
>         bool suspended;
> +       bool noiommu_en;
>  };
>
>  static DEFINE_IDA(vhost_vdpa_ida);
> @@ -887,6 +893,10 @@ static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,
>  {
>         struct vdpa_device *vdpa = v->vdpa;
>         const struct vdpa_config_ops *ops = vdpa->config;
> +
> +       if (v->noiommu_en)
> +               return;
> +
>         if (ops->dma_map) {
>                 ops->dma_unmap(vdpa, asid, map->start, map->size);
>         } else if (ops->set_map == NULL) {
> @@ -980,6 +990,9 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
>         if (r)
>                 return r;
>
> +       if (v->noiommu_en)
> +               goto skip_map;
> +
>         if (ops->dma_map) {
>                 r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
>         } else if (ops->set_map) {
> @@ -995,6 +1008,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
>                 return r;
>         }
>
> +skip_map:
>         if (!vdpa->use_va)
>                 atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
>
> @@ -1298,6 +1312,7 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
>         struct vdpa_device *vdpa = v->vdpa;
>         const struct vdpa_config_ops *ops = vdpa->config;
>         struct device *dma_dev = vdpa_get_dma_dev(vdpa);
> +       struct iommu_domain *domain;
>         const struct bus_type *bus;
>         int ret;
>
> @@ -1305,6 +1320,14 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
>         if (ops->set_map || ops->dma_map)
>                 return 0;
>
> +       domain = iommu_get_domain_for_dev(dma_dev);
> +       if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY) &&
> +           vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
> +               add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> +               dev_warn(&v->dev, "Adding kernel taint for noiommu on device\n");
> +               v->noiommu_en = true;
> +               return 0;
> +       }
>         bus = dma_dev->bus;
>         if (!bus)
>                 return -EFAULT;
> --
> 2.25.1
>
kernel test robot June 1, 2024, 7:13 p.m. UTC | #2
Hi Srujana,

kernel test robot noticed the following build warnings:

[auto build test WARNING on mst-vhost/linux-next]
[also build test WARNING on linus/master v6.10-rc1 next-20240531]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Srujana-Challa/vdpa-Add-support-for-no-IOMMU-mode/20240530-182129
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git linux-next
patch link:    https://lore.kernel.org/r/20240530101823.1210161-1-schalla%40marvell.com
patch subject: [PATCH] vdpa: Add support for no-IOMMU mode
config: powerpc64-randconfig-r123-20240601 (https://download.01.org/0day-ci/archive/20240602/202406020255.Xp36fzlp-lkp@intel.com/config)
compiler: powerpc64-linux-gcc (GCC) 13.2.0
reproduce: (https://download.01.org/0day-ci/archive/20240602/202406020255.Xp36fzlp-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202406020255.Xp36fzlp-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
>> drivers/vhost/vdpa.c:39:6: sparse: sparse: symbol 'vhost_vdpa_noiommu' was not declared. Should it be static?

vim +/vhost_vdpa_noiommu +39 drivers/vhost/vdpa.c

    38	
  > 39	bool vhost_vdpa_noiommu;
    40	module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
    41			   vhost_vdpa_noiommu, bool, 0644);
    42	MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
    43
Srujana Challa June 4, 2024, 9:29 a.m. UTC | #3
> Subject: [EXTERNAL] Re: [PATCH] vdpa: Add support for no-IOMMU mode
> 
> Prioritize security for external emails: Confirm sender and content safety
> before clicking links or opening attachments
> 
> ----------------------------------------------------------------------
> On Thu, May 30, 2024 at 6:18 PM Srujana Challa <schalla@marvell.com>
> wrote:
> >
> > This commit introduces support for an UNSAFE, no-IOMMU mode in the
> > vhost-vdpa driver. When enabled, this mode provides no device
> > isolation, no DMA translation, no host kernel protection, and cannot
> > be used for device assignment to virtual machines. It requires RAWIO
> > permissions and will taint the kernel.
> > This mode requires enabling the
> "enable_vhost_vdpa_unsafe_noiommu_mode"
> > option on the vhost-vdpa driver. This mode would be useful to get
> > better performance on specifice low end machines and can be leveraged
> > by embedded platforms where applications run in controlled environment.
> 
> I wonder if it's better to do it per driver:
> 
> 1) we have device that use its own IOMMU, one example is the mlx5 vDPA
> device
> 2) we have software devices which doesn't require IOMMU at all (but still with
> protection)

If I understand correctly, you’re suggesting that we create a module parameter
specific to the vdpa driver. Then, we can add a flag to the ‘struct vdpa_device’
and set that flag within the vdpa driver based on the module parameter.
Finally, we would use this flag to taint the kernel and go in no-iommu path
in the vhost-vdpa driver?
> 
> Thanks
> 
> >
> > Signed-off-by: Srujana Challa <schalla@marvell.com>
> > ---
> >  drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
> >  1 file changed, 23 insertions(+)
> >
> > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index
> > bc4a51e4638b..d071c30125aa 100644
> > --- a/drivers/vhost/vdpa.c
> > +++ b/drivers/vhost/vdpa.c
> > @@ -36,6 +36,11 @@ enum {
> >
> >  #define VHOST_VDPA_IOTLB_BUCKETS 16
> >
> > +bool vhost_vdpa_noiommu;
> > +module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
> > +                  vhost_vdpa_noiommu, bool, 0644);
> > +MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode,
> "Enable
> > +UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no
> > +DMA translation, no host kernel protection, cannot be used for device
> > +assignment to virtual machines, requires RAWIO permissions, and will
> > +taint the kernel.  If you do not know what this is for, step away.
> > +(default: false)");
> > +
> >  struct vhost_vdpa_as {
> >         struct hlist_node hash_link;
> >         struct vhost_iotlb iotlb;
> > @@ -60,6 +65,7 @@ struct vhost_vdpa {
> >         struct vdpa_iova_range range;
> >         u32 batch_asid;
> >         bool suspended;
> > +       bool noiommu_en;
> >  };
> >
> >  static DEFINE_IDA(vhost_vdpa_ida);
> > @@ -887,6 +893,10 @@ static void vhost_vdpa_general_unmap(struct
> > vhost_vdpa *v,  {
> >         struct vdpa_device *vdpa = v->vdpa;
> >         const struct vdpa_config_ops *ops = vdpa->config;
> > +
> > +       if (v->noiommu_en)
> > +               return;
> > +
> >         if (ops->dma_map) {
> >                 ops->dma_unmap(vdpa, asid, map->start, map->size);
> >         } else if (ops->set_map == NULL) { @@ -980,6 +990,9 @@ static
> > int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
> >         if (r)
> >                 return r;
> >
> > +       if (v->noiommu_en)
> > +               goto skip_map;
> > +
> >         if (ops->dma_map) {
> >                 r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
> >         } else if (ops->set_map) {
> > @@ -995,6 +1008,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
> struct vhost_iotlb *iotlb,
> >                 return r;
> >         }
> >
> > +skip_map:
> >         if (!vdpa->use_va)
> >                 atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
> >
> > @@ -1298,6 +1312,7 @@ static int vhost_vdpa_alloc_domain(struct
> vhost_vdpa *v)
> >         struct vdpa_device *vdpa = v->vdpa;
> >         const struct vdpa_config_ops *ops = vdpa->config;
> >         struct device *dma_dev = vdpa_get_dma_dev(vdpa);
> > +       struct iommu_domain *domain;
> >         const struct bus_type *bus;
> >         int ret;
> >
> > @@ -1305,6 +1320,14 @@ static int vhost_vdpa_alloc_domain(struct
> vhost_vdpa *v)
> >         if (ops->set_map || ops->dma_map)
> >                 return 0;
> >
> > +       domain = iommu_get_domain_for_dev(dma_dev);
> > +       if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY) &&
> > +           vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
> > +               add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > +               dev_warn(&v->dev, "Adding kernel taint for noiommu on
> device\n");
> > +               v->noiommu_en = true;
> > +               return 0;
> > +       }
> >         bus = dma_dev->bus;
> >         if (!bus)
> >                 return -EFAULT;
> > --
> > 2.25.1
> >
Stefano Garzarella June 4, 2024, 11:55 a.m. UTC | #4
On Fri, May 31, 2024 at 10:26:31AM GMT, Jason Wang wrote:
>On Thu, May 30, 2024 at 6:18 PM Srujana Challa <schalla@marvell.com> wrote:
>>
>> This commit introduces support for an UNSAFE, no-IOMMU mode in the
>> vhost-vdpa driver. When enabled, this mode provides no device isolation,
>> no DMA translation, no host kernel protection, and cannot be used for
>> device assignment to virtual machines. It requires RAWIO permissions
>> and will taint the kernel.
>> This mode requires enabling the "enable_vhost_vdpa_unsafe_noiommu_mode"
>> option on the vhost-vdpa driver. This mode would be useful to get
>> better performance on specifice low end machines and can be leveraged
>> by embedded platforms where applications run in controlled environment.
>
>I wonder if it's better to do it per driver:
>
>1) we have device that use its own IOMMU, one example is the mlx5 vDPA device
>2) we have software devices which doesn't require IOMMU at all (but
>still with protection)

It worries me even if the module parameter is the best thing.
What about a sysfs entry?

Thanks,
Stefano

>
>Thanks
>
>>
>> Signed-off-by: Srujana Challa <schalla@marvell.com>
>> ---
>>  drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
>>  1 file changed, 23 insertions(+)
>>
>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>> index bc4a51e4638b..d071c30125aa 100644
>> --- a/drivers/vhost/vdpa.c
>> +++ b/drivers/vhost/vdpa.c
>> @@ -36,6 +36,11 @@ enum {
>>
>>  #define VHOST_VDPA_IOTLB_BUCKETS 16
>>
>> +bool vhost_vdpa_noiommu;
>> +module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
>> +                  vhost_vdpa_noiommu, bool, 0644);
>> +MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
>> +
>>  struct vhost_vdpa_as {
>>         struct hlist_node hash_link;
>>         struct vhost_iotlb iotlb;
>> @@ -60,6 +65,7 @@ struct vhost_vdpa {
>>         struct vdpa_iova_range range;
>>         u32 batch_asid;
>>         bool suspended;
>> +       bool noiommu_en;
>>  };
>>
>>  static DEFINE_IDA(vhost_vdpa_ida);
>> @@ -887,6 +893,10 @@ static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,
>>  {
>>         struct vdpa_device *vdpa = v->vdpa;
>>         const struct vdpa_config_ops *ops = vdpa->config;
>> +
>> +       if (v->noiommu_en)
>> +               return;
>> +
>>         if (ops->dma_map) {
>>                 ops->dma_unmap(vdpa, asid, map->start, map->size);
>>         } else if (ops->set_map == NULL) {
>> @@ -980,6 +990,9 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
>>         if (r)
>>                 return r;
>>
>> +       if (v->noiommu_en)
>> +               goto skip_map;
>> +
>>         if (ops->dma_map) {
>>                 r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
>>         } else if (ops->set_map) {
>> @@ -995,6 +1008,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
>>                 return r;
>>         }
>>
>> +skip_map:
>>         if (!vdpa->use_va)
>>                 atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
>>
>> @@ -1298,6 +1312,7 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
>>         struct vdpa_device *vdpa = v->vdpa;
>>         const struct vdpa_config_ops *ops = vdpa->config;
>>         struct device *dma_dev = vdpa_get_dma_dev(vdpa);
>> +       struct iommu_domain *domain;
>>         const struct bus_type *bus;
>>         int ret;
>>
>> @@ -1305,6 +1320,14 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
>>         if (ops->set_map || ops->dma_map)
>>                 return 0;
>>
>> +       domain = iommu_get_domain_for_dev(dma_dev);
>> +       if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY) &&
>> +           vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
>> +               add_taint(TAINT_USER, LOCKDEP_STILL_OK);
>> +               dev_warn(&v->dev, "Adding kernel taint for noiommu on device\n");
>> +               v->noiommu_en = true;
>> +               return 0;
>> +       }
>>         bus = dma_dev->bus;
>>         if (!bus)
>>                 return -EFAULT;
>> --
>> 2.25.1
>>
>
>
Jason Wang June 6, 2024, 12:14 a.m. UTC | #5
On Tue, Jun 4, 2024 at 7:55 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> On Fri, May 31, 2024 at 10:26:31AM GMT, Jason Wang wrote:
> >On Thu, May 30, 2024 at 6:18 PM Srujana Challa <schalla@marvell.com> wrote:
> >>
> >> This commit introduces support for an UNSAFE, no-IOMMU mode in the
> >> vhost-vdpa driver. When enabled, this mode provides no device isolation,
> >> no DMA translation, no host kernel protection, and cannot be used for
> >> device assignment to virtual machines. It requires RAWIO permissions
> >> and will taint the kernel.
> >> This mode requires enabling the "enable_vhost_vdpa_unsafe_noiommu_mode"
> >> option on the vhost-vdpa driver. This mode would be useful to get
> >> better performance on specifice low end machines and can be leveraged
> >> by embedded platforms where applications run in controlled environment.
> >
> >I wonder if it's better to do it per driver:
> >
> >1) we have device that use its own IOMMU, one example is the mlx5 vDPA device
> >2) we have software devices which doesn't require IOMMU at all (but
> >still with protection)
>
> It worries me even if the module parameter is the best thing.
> What about a sysfs entry?

Not sure, but VFIO uses a module parameter, and using sysfs requires
some synchronization. We need either disable the write when the device
is probed or allow change the value.

Thanks

>
> Thanks,
> Stefano
>
> >
> >Thanks
> >
> >>
> >> Signed-off-by: Srujana Challa <schalla@marvell.com>
> >> ---
> >>  drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
> >>  1 file changed, 23 insertions(+)
> >>
> >> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> >> index bc4a51e4638b..d071c30125aa 100644
> >> --- a/drivers/vhost/vdpa.c
> >> +++ b/drivers/vhost/vdpa.c
> >> @@ -36,6 +36,11 @@ enum {
> >>
> >>  #define VHOST_VDPA_IOTLB_BUCKETS 16
> >>
> >> +bool vhost_vdpa_noiommu;
> >> +module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
> >> +                  vhost_vdpa_noiommu, bool, 0644);
> >> +MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
> >> +
> >>  struct vhost_vdpa_as {
> >>         struct hlist_node hash_link;
> >>         struct vhost_iotlb iotlb;
> >> @@ -60,6 +65,7 @@ struct vhost_vdpa {
> >>         struct vdpa_iova_range range;
> >>         u32 batch_asid;
> >>         bool suspended;
> >> +       bool noiommu_en;
> >>  };
> >>
> >>  static DEFINE_IDA(vhost_vdpa_ida);
> >> @@ -887,6 +893,10 @@ static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,
> >>  {
> >>         struct vdpa_device *vdpa = v->vdpa;
> >>         const struct vdpa_config_ops *ops = vdpa->config;
> >> +
> >> +       if (v->noiommu_en)
> >> +               return;
> >> +
> >>         if (ops->dma_map) {
> >>                 ops->dma_unmap(vdpa, asid, map->start, map->size);
> >>         } else if (ops->set_map == NULL) {
> >> @@ -980,6 +990,9 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
> >>         if (r)
> >>                 return r;
> >>
> >> +       if (v->noiommu_en)
> >> +               goto skip_map;
> >> +
> >>         if (ops->dma_map) {
> >>                 r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
> >>         } else if (ops->set_map) {
> >> @@ -995,6 +1008,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
> >>                 return r;
> >>         }
> >>
> >> +skip_map:
> >>         if (!vdpa->use_va)
> >>                 atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
> >>
> >> @@ -1298,6 +1312,7 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
> >>         struct vdpa_device *vdpa = v->vdpa;
> >>         const struct vdpa_config_ops *ops = vdpa->config;
> >>         struct device *dma_dev = vdpa_get_dma_dev(vdpa);
> >> +       struct iommu_domain *domain;
> >>         const struct bus_type *bus;
> >>         int ret;
> >>
> >> @@ -1305,6 +1320,14 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
> >>         if (ops->set_map || ops->dma_map)
> >>                 return 0;
> >>
> >> +       domain = iommu_get_domain_for_dev(dma_dev);
> >> +       if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY) &&
> >> +           vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
> >> +               add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> >> +               dev_warn(&v->dev, "Adding kernel taint for noiommu on device\n");
> >> +               v->noiommu_en = true;
> >> +               return 0;
> >> +       }
> >>         bus = dma_dev->bus;
> >>         if (!bus)
> >>                 return -EFAULT;
> >> --
> >> 2.25.1
> >>
> >
> >
>
Jason Wang June 6, 2024, 12:15 a.m. UTC | #6
On Tue, Jun 4, 2024 at 5:29 PM Srujana Challa <schalla@marvell.com> wrote:
>
> > Subject: [EXTERNAL] Re: [PATCH] vdpa: Add support for no-IOMMU mode
> >
> > Prioritize security for external emails: Confirm sender and content safety
> > before clicking links or opening attachments
> >
> > ----------------------------------------------------------------------
> > On Thu, May 30, 2024 at 6:18 PM Srujana Challa <schalla@marvell.com>
> > wrote:
> > >
> > > This commit introduces support for an UNSAFE, no-IOMMU mode in the
> > > vhost-vdpa driver. When enabled, this mode provides no device
> > > isolation, no DMA translation, no host kernel protection, and cannot
> > > be used for device assignment to virtual machines. It requires RAWIO
> > > permissions and will taint the kernel.
> > > This mode requires enabling the
> > "enable_vhost_vdpa_unsafe_noiommu_mode"
> > > option on the vhost-vdpa driver. This mode would be useful to get
> > > better performance on specifice low end machines and can be leveraged
> > > by embedded platforms where applications run in controlled environment.
> >
> > I wonder if it's better to do it per driver:
> >
> > 1) we have device that use its own IOMMU, one example is the mlx5 vDPA
> > device
> > 2) we have software devices which doesn't require IOMMU at all (but still with
> > protection)
>
> If I understand correctly, you’re suggesting that we create a module parameter
> specific to the vdpa driver. Then, we can add a flag to the ‘struct vdpa_device’
> and set that flag within the vdpa driver based on the module parameter.
> Finally, we would use this flag to taint the kernel and go in no-iommu path
> in the vhost-vdpa driver?

If it's possible, I would like to avoid changing the vDPA core.

Thanks

> >
> > Thanks
> >
> > >
> > > Signed-off-by: Srujana Challa <schalla@marvell.com>
> > > ---
> > >  drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
> > >  1 file changed, 23 insertions(+)
> > >
> > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index
> > > bc4a51e4638b..d071c30125aa 100644
> > > --- a/drivers/vhost/vdpa.c
> > > +++ b/drivers/vhost/vdpa.c
> > > @@ -36,6 +36,11 @@ enum {
> > >
> > >  #define VHOST_VDPA_IOTLB_BUCKETS 16
> > >
> > > +bool vhost_vdpa_noiommu;
> > > +module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > +                  vhost_vdpa_noiommu, bool, 0644);
> > > +MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode,
> > "Enable
> > > +UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no
> > > +DMA translation, no host kernel protection, cannot be used for device
> > > +assignment to virtual machines, requires RAWIO permissions, and will
> > > +taint the kernel.  If you do not know what this is for, step away.
> > > +(default: false)");
> > > +
> > >  struct vhost_vdpa_as {
> > >         struct hlist_node hash_link;
> > >         struct vhost_iotlb iotlb;
> > > @@ -60,6 +65,7 @@ struct vhost_vdpa {
> > >         struct vdpa_iova_range range;
> > >         u32 batch_asid;
> > >         bool suspended;
> > > +       bool noiommu_en;
> > >  };
> > >
> > >  static DEFINE_IDA(vhost_vdpa_ida);
> > > @@ -887,6 +893,10 @@ static void vhost_vdpa_general_unmap(struct
> > > vhost_vdpa *v,  {
> > >         struct vdpa_device *vdpa = v->vdpa;
> > >         const struct vdpa_config_ops *ops = vdpa->config;
> > > +
> > > +       if (v->noiommu_en)
> > > +               return;
> > > +
> > >         if (ops->dma_map) {
> > >                 ops->dma_unmap(vdpa, asid, map->start, map->size);
> > >         } else if (ops->set_map == NULL) { @@ -980,6 +990,9 @@ static
> > > int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
> > >         if (r)
> > >                 return r;
> > >
> > > +       if (v->noiommu_en)
> > > +               goto skip_map;
> > > +
> > >         if (ops->dma_map) {
> > >                 r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
> > >         } else if (ops->set_map) {
> > > @@ -995,6 +1008,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
> > struct vhost_iotlb *iotlb,
> > >                 return r;
> > >         }
> > >
> > > +skip_map:
> > >         if (!vdpa->use_va)
> > >                 atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
> > >
> > > @@ -1298,6 +1312,7 @@ static int vhost_vdpa_alloc_domain(struct
> > vhost_vdpa *v)
> > >         struct vdpa_device *vdpa = v->vdpa;
> > >         const struct vdpa_config_ops *ops = vdpa->config;
> > >         struct device *dma_dev = vdpa_get_dma_dev(vdpa);
> > > +       struct iommu_domain *domain;
> > >         const struct bus_type *bus;
> > >         int ret;
> > >
> > > @@ -1305,6 +1320,14 @@ static int vhost_vdpa_alloc_domain(struct
> > vhost_vdpa *v)
> > >         if (ops->set_map || ops->dma_map)
> > >                 return 0;
> > >
> > > +       domain = iommu_get_domain_for_dev(dma_dev);
> > > +       if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY) &&
> > > +           vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
> > > +               add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > > +               dev_warn(&v->dev, "Adding kernel taint for noiommu on
> > device\n");
> > > +               v->noiommu_en = true;
> > > +               return 0;
> > > +       }
> > >         bus = dma_dev->bus;
> > >         if (!bus)
> > >                 return -EFAULT;
> > > --
> > > 2.25.1
> > >
>
Srujana Challa June 12, 2024, 9:22 a.m. UTC | #7
> Subject: Re: [EXTERNAL] Re: [PATCH] vdpa: Add support for no-IOMMU mode
> 
> On Tue, Jun 4, 2024 at 5:29 PM Srujana Challa <schalla@marvell.com> wrote:
> >
> > > Subject: [EXTERNAL] Re: [PATCH] vdpa: Add support for no-IOMMU mode
> > >
> > > Prioritize security for external emails: Confirm sender and content
> > > safety before clicking links or opening attachments
> > >
> > > --------------------------------------------------------------------
> > > -- On Thu, May 30, 2024 at 6:18 PM Srujana Challa
> > > <schalla@marvell.com>
> > > wrote:
> > > >
> > > > This commit introduces support for an UNSAFE, no-IOMMU mode in the
> > > > vhost-vdpa driver. When enabled, this mode provides no device
> > > > isolation, no DMA translation, no host kernel protection, and
> > > > cannot be used for device assignment to virtual machines. It
> > > > requires RAWIO permissions and will taint the kernel.
> > > > This mode requires enabling the
> > > "enable_vhost_vdpa_unsafe_noiommu_mode"
> > > > option on the vhost-vdpa driver. This mode would be useful to get
> > > > better performance on specifice low end machines and can be
> > > > leveraged by embedded platforms where applications run in controlled
> environment.
> > >
> > > I wonder if it's better to do it per driver:
> > >
> > > 1) we have device that use its own IOMMU, one example is the mlx5
> > > vDPA device
> > > 2) we have software devices which doesn't require IOMMU at all (but
> > > still with
> > > protection)
> >
> > If I understand correctly, you’re suggesting that we create a module
> > parameter specific to the vdpa driver. Then, we can add a flag to the ‘struct
> vdpa_device’
> > and set that flag within the vdpa driver based on the module parameter.
> > Finally, we would use this flag to taint the kernel and go in no-iommu
> > path in the vhost-vdpa driver?
> 
> If it's possible, I would like to avoid changing the vDPA core.
> 
> Thanks
According to my understanding of the discussion at the
https://lore.kernel.org/all/20240422164108-mutt-send-email-mst@kernel.org,
Michael has suggested focusing on implementing a no-IOMMU mode in vdpa.
Michael, could you please confirm if it's fine to transfer all these relevant
modifications to Marvell's vdpa driver?

Thanks.
> 
> > >
> > > Thanks
> > >
> > > >
> > > > Signed-off-by: Srujana Challa <schalla@marvell.com>
> > > > ---
> > > >  drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
> > > >  1 file changed, 23 insertions(+)
> > > >
> > > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index
> > > > bc4a51e4638b..d071c30125aa 100644
> > > > --- a/drivers/vhost/vdpa.c
> > > > +++ b/drivers/vhost/vdpa.c
> > > > @@ -36,6 +36,11 @@ enum {
> > > >
> > > >  #define VHOST_VDPA_IOTLB_BUCKETS 16
> > > >
> > > > +bool vhost_vdpa_noiommu;
> > > > +module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > > +                  vhost_vdpa_noiommu, bool, 0644);
> > > > +MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > "Enable
> > > > +UNSAFE, no-IOMMU mode.  This mode provides no device isolation,
> > > > +no DMA translation, no host kernel protection, cannot be used for
> > > > +device assignment to virtual machines, requires RAWIO
> > > > +permissions, and will taint the kernel.  If you do not know what this is
> for, step away.
> > > > +(default: false)");
> > > > +
> > > >  struct vhost_vdpa_as {
> > > >         struct hlist_node hash_link;
> > > >         struct vhost_iotlb iotlb;
> > > > @@ -60,6 +65,7 @@ struct vhost_vdpa {
> > > >         struct vdpa_iova_range range;
> > > >         u32 batch_asid;
> > > >         bool suspended;
> > > > +       bool noiommu_en;
> > > >  };
> > > >
> > > >  static DEFINE_IDA(vhost_vdpa_ida); @@ -887,6 +893,10 @@ static
> > > > void vhost_vdpa_general_unmap(struct vhost_vdpa *v,  {
> > > >         struct vdpa_device *vdpa = v->vdpa;
> > > >         const struct vdpa_config_ops *ops = vdpa->config;
> > > > +
> > > > +       if (v->noiommu_en)
> > > > +               return;
> > > > +
> > > >         if (ops->dma_map) {
> > > >                 ops->dma_unmap(vdpa, asid, map->start, map->size);
> > > >         } else if (ops->set_map == NULL) { @@ -980,6 +990,9 @@
> > > > static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb
> *iotlb,
> > > >         if (r)
> > > >                 return r;
> > > >
> > > > +       if (v->noiommu_en)
> > > > +               goto skip_map;
> > > > +
> > > >         if (ops->dma_map) {
> > > >                 r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
> > > >         } else if (ops->set_map) { @@ -995,6 +1008,7 @@ static int
> > > > vhost_vdpa_map(struct vhost_vdpa *v,
> > > struct vhost_iotlb *iotlb,
> > > >                 return r;
> > > >         }
> > > >
> > > > +skip_map:
> > > >         if (!vdpa->use_va)
> > > >                 atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
> > > >
> > > > @@ -1298,6 +1312,7 @@ static int vhost_vdpa_alloc_domain(struct
> > > vhost_vdpa *v)
> > > >         struct vdpa_device *vdpa = v->vdpa;
> > > >         const struct vdpa_config_ops *ops = vdpa->config;
> > > >         struct device *dma_dev = vdpa_get_dma_dev(vdpa);
> > > > +       struct iommu_domain *domain;
> > > >         const struct bus_type *bus;
> > > >         int ret;
> > > >
> > > > @@ -1305,6 +1320,14 @@ static int vhost_vdpa_alloc_domain(struct
> > > vhost_vdpa *v)
> > > >         if (ops->set_map || ops->dma_map)
> > > >                 return 0;
> > > >
> > > > +       domain = iommu_get_domain_for_dev(dma_dev);
> > > > +       if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY) &&
> > > > +           vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
> > > > +               add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > > > +               dev_warn(&v->dev, "Adding kernel taint for noiommu
> > > > + on
> > > device\n");
> > > > +               v->noiommu_en = true;
> > > > +               return 0;
> > > > +       }
> > > >         bus = dma_dev->bus;
> > > >         if (!bus)
> > > >                 return -EFAULT;
> > > > --
> > > > 2.25.1
> > > >
> >
Michael S. Tsirkin June 12, 2024, 12:32 p.m. UTC | #8
On Wed, Jun 12, 2024 at 09:22:43AM +0000, Srujana Challa wrote:
> 
> > Subject: Re: [EXTERNAL] Re: [PATCH] vdpa: Add support for no-IOMMU mode
> > 
> > On Tue, Jun 4, 2024 at 5:29 PM Srujana Challa <schalla@marvell.com> wrote:
> > >
> > > > Subject: [EXTERNAL] Re: [PATCH] vdpa: Add support for no-IOMMU mode
> > > >
> > > > Prioritize security for external emails: Confirm sender and content
> > > > safety before clicking links or opening attachments
> > > >
> > > > --------------------------------------------------------------------
> > > > -- On Thu, May 30, 2024 at 6:18 PM Srujana Challa
> > > > <schalla@marvell.com>
> > > > wrote:
> > > > >
> > > > > This commit introduces support for an UNSAFE, no-IOMMU mode in the
> > > > > vhost-vdpa driver. When enabled, this mode provides no device
> > > > > isolation, no DMA translation, no host kernel protection, and
> > > > > cannot be used for device assignment to virtual machines. It
> > > > > requires RAWIO permissions and will taint the kernel.
> > > > > This mode requires enabling the
> > > > "enable_vhost_vdpa_unsafe_noiommu_mode"
> > > > > option on the vhost-vdpa driver. This mode would be useful to get
> > > > > better performance on specifice low end machines and can be
> > > > > leveraged by embedded platforms where applications run in controlled
> > environment.
> > > >
> > > > I wonder if it's better to do it per driver:
> > > >
> > > > 1) we have device that use its own IOMMU, one example is the mlx5
> > > > vDPA device
> > > > 2) we have software devices which doesn't require IOMMU at all (but
> > > > still with
> > > > protection)
> > >
> > > If I understand correctly, you’re suggesting that we create a module
> > > parameter specific to the vdpa driver. Then, we can add a flag to the ‘struct
> > vdpa_device’
> > > and set that flag within the vdpa driver based on the module parameter.
> > > Finally, we would use this flag to taint the kernel and go in no-iommu
> > > path in the vhost-vdpa driver?
> > 
> > If it's possible, I would like to avoid changing the vDPA core.
> > 
> > Thanks
> According to my understanding of the discussion at the
> https://lore.kernel.org/all/20240422164108-mutt-send-email-mst@kernel.org,
> Michael has suggested focusing on implementing a no-IOMMU mode in vdpa.
> Michael, could you please confirm if it's fine to transfer all these relevant
> modifications to Marvell's vdpa driver?
> 
> Thanks.


All I said is that octeon driver can be merged without this support.
Then work on no-iommu can start separately.


Whether this belongs in the driver or the core would depend on
what the use-case is. I have not figured it out yet.
What you describe seems generic not card-specific though.
Jason why do you  want this in the driver?

> > 
> > > >
> > > > Thanks
> > > >
> > > > >
> > > > > Signed-off-by: Srujana Challa <schalla@marvell.com>
> > > > > ---
> > > > >  drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
> > > > >  1 file changed, 23 insertions(+)
> > > > >
> > > > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index
> > > > > bc4a51e4638b..d071c30125aa 100644
> > > > > --- a/drivers/vhost/vdpa.c
> > > > > +++ b/drivers/vhost/vdpa.c
> > > > > @@ -36,6 +36,11 @@ enum {
> > > > >
> > > > >  #define VHOST_VDPA_IOTLB_BUCKETS 16
> > > > >
> > > > > +bool vhost_vdpa_noiommu;
> > > > > +module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > > > +                  vhost_vdpa_noiommu, bool, 0644);
> > > > > +MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > > "Enable
> > > > > +UNSAFE, no-IOMMU mode.  This mode provides no device isolation,
> > > > > +no DMA translation, no host kernel protection, cannot be used for
> > > > > +device assignment to virtual machines, requires RAWIO
> > > > > +permissions, and will taint the kernel.  If you do not know what this is
> > for, step away.
> > > > > +(default: false)");
> > > > > +
> > > > >  struct vhost_vdpa_as {
> > > > >         struct hlist_node hash_link;
> > > > >         struct vhost_iotlb iotlb;
> > > > > @@ -60,6 +65,7 @@ struct vhost_vdpa {
> > > > >         struct vdpa_iova_range range;
> > > > >         u32 batch_asid;
> > > > >         bool suspended;
> > > > > +       bool noiommu_en;
> > > > >  };
> > > > >
> > > > >  static DEFINE_IDA(vhost_vdpa_ida); @@ -887,6 +893,10 @@ static
> > > > > void vhost_vdpa_general_unmap(struct vhost_vdpa *v,  {
> > > > >         struct vdpa_device *vdpa = v->vdpa;
> > > > >         const struct vdpa_config_ops *ops = vdpa->config;
> > > > > +
> > > > > +       if (v->noiommu_en)
> > > > > +               return;
> > > > > +
> > > > >         if (ops->dma_map) {
> > > > >                 ops->dma_unmap(vdpa, asid, map->start, map->size);
> > > > >         } else if (ops->set_map == NULL) { @@ -980,6 +990,9 @@
> > > > > static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb
> > *iotlb,
> > > > >         if (r)
> > > > >                 return r;
> > > > >
> > > > > +       if (v->noiommu_en)
> > > > > +               goto skip_map;
> > > > > +
> > > > >         if (ops->dma_map) {
> > > > >                 r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
> > > > >         } else if (ops->set_map) { @@ -995,6 +1008,7 @@ static int
> > > > > vhost_vdpa_map(struct vhost_vdpa *v,
> > > > struct vhost_iotlb *iotlb,
> > > > >                 return r;
> > > > >         }
> > > > >
> > > > > +skip_map:
> > > > >         if (!vdpa->use_va)
> > > > >                 atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
> > > > >
> > > > > @@ -1298,6 +1312,7 @@ static int vhost_vdpa_alloc_domain(struct
> > > > vhost_vdpa *v)
> > > > >         struct vdpa_device *vdpa = v->vdpa;
> > > > >         const struct vdpa_config_ops *ops = vdpa->config;
> > > > >         struct device *dma_dev = vdpa_get_dma_dev(vdpa);
> > > > > +       struct iommu_domain *domain;
> > > > >         const struct bus_type *bus;
> > > > >         int ret;
> > > > >
> > > > > @@ -1305,6 +1320,14 @@ static int vhost_vdpa_alloc_domain(struct
> > > > vhost_vdpa *v)
> > > > >         if (ops->set_map || ops->dma_map)
> > > > >                 return 0;
> > > > >
> > > > > +       domain = iommu_get_domain_for_dev(dma_dev);
> > > > > +       if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY) &&
> > > > > +           vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
> > > > > +               add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > > > > +               dev_warn(&v->dev, "Adding kernel taint for noiommu
> > > > > + on
> > > > device\n");
> > > > > +               v->noiommu_en = true;
> > > > > +               return 0;
> > > > > +       }
> > > > >         bus = dma_dev->bus;
> > > > >         if (!bus)
> > > > >                 return -EFAULT;
> > > > > --
> > > > > 2.25.1
> > > > >
> > >
>
Jason Wang June 17, 2024, 1:38 a.m. UTC | #9
On Wed, Jun 12, 2024 at 8:32 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Wed, Jun 12, 2024 at 09:22:43AM +0000, Srujana Challa wrote:
> >
> > > Subject: Re: [EXTERNAL] Re: [PATCH] vdpa: Add support for no-IOMMU mode
> > >
> > > On Tue, Jun 4, 2024 at 5:29 PM Srujana Challa <schalla@marvell.com> wrote:
> > > >
> > > > > Subject: [EXTERNAL] Re: [PATCH] vdpa: Add support for no-IOMMU mode
> > > > >
> > > > > Prioritize security for external emails: Confirm sender and content
> > > > > safety before clicking links or opening attachments
> > > > >
> > > > > --------------------------------------------------------------------
> > > > > -- On Thu, May 30, 2024 at 6:18 PM Srujana Challa
> > > > > <schalla@marvell.com>
> > > > > wrote:
> > > > > >
> > > > > > This commit introduces support for an UNSAFE, no-IOMMU mode in the
> > > > > > vhost-vdpa driver. When enabled, this mode provides no device
> > > > > > isolation, no DMA translation, no host kernel protection, and
> > > > > > cannot be used for device assignment to virtual machines. It
> > > > > > requires RAWIO permissions and will taint the kernel.
> > > > > > This mode requires enabling the
> > > > > "enable_vhost_vdpa_unsafe_noiommu_mode"
> > > > > > option on the vhost-vdpa driver. This mode would be useful to get
> > > > > > better performance on specifice low end machines and can be
> > > > > > leveraged by embedded platforms where applications run in controlled
> > > environment.
> > > > >
> > > > > I wonder if it's better to do it per driver:
> > > > >
> > > > > 1) we have device that use its own IOMMU, one example is the mlx5
> > > > > vDPA device
> > > > > 2) we have software devices which doesn't require IOMMU at all (but
> > > > > still with
> > > > > protection)
> > > >
> > > > If I understand correctly, you’re suggesting that we create a module
> > > > parameter specific to the vdpa driver. Then, we can add a flag to the ‘struct
> > > vdpa_device’
> > > > and set that flag within the vdpa driver based on the module parameter.
> > > > Finally, we would use this flag to taint the kernel and go in no-iommu
> > > > path in the vhost-vdpa driver?
> > >
> > > If it's possible, I would like to avoid changing the vDPA core.
> > >
> > > Thanks
> > According to my understanding of the discussion at the
> > https://lore.kernel.org/all/20240422164108-mutt-send-email-mst@kernel.org,
> > Michael has suggested focusing on implementing a no-IOMMU mode in vdpa.
> > Michael, could you please confirm if it's fine to transfer all these relevant
> > modifications to Marvell's vdpa driver?
> >
> > Thanks.
>
>
> All I said is that octeon driver can be merged without this support.
> Then work on no-iommu can start separately.
>
>
> Whether this belongs in the driver or the core would depend on
> what the use-case is. I have not figured it out yet.
> What you describe seems generic not card-specific though.
> Jason why do you  want this in the driver?

For two reasons:

1) no-IOMMU mode have security implications, make it per driver is
less intrusive
2) I don't know what does "no-IOMMU" mean for software device or
device with on-chip IOMMU

Thanks

>
> > >
> > > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > > Signed-off-by: Srujana Challa <schalla@marvell.com>
> > > > > > ---
> > > > > >  drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
> > > > > >  1 file changed, 23 insertions(+)
> > > > > >
> > > > > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index
> > > > > > bc4a51e4638b..d071c30125aa 100644
> > > > > > --- a/drivers/vhost/vdpa.c
> > > > > > +++ b/drivers/vhost/vdpa.c
> > > > > > @@ -36,6 +36,11 @@ enum {
> > > > > >
> > > > > >  #define VHOST_VDPA_IOTLB_BUCKETS 16
> > > > > >
> > > > > > +bool vhost_vdpa_noiommu;
> > > > > > +module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > > > > +                  vhost_vdpa_noiommu, bool, 0644);
> > > > > > +MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > > > "Enable
> > > > > > +UNSAFE, no-IOMMU mode.  This mode provides no device isolation,
> > > > > > +no DMA translation, no host kernel protection, cannot be used for
> > > > > > +device assignment to virtual machines, requires RAWIO
> > > > > > +permissions, and will taint the kernel.  If you do not know what this is
> > > for, step away.
> > > > > > +(default: false)");
> > > > > > +
> > > > > >  struct vhost_vdpa_as {
> > > > > >         struct hlist_node hash_link;
> > > > > >         struct vhost_iotlb iotlb;
> > > > > > @@ -60,6 +65,7 @@ struct vhost_vdpa {
> > > > > >         struct vdpa_iova_range range;
> > > > > >         u32 batch_asid;
> > > > > >         bool suspended;
> > > > > > +       bool noiommu_en;
> > > > > >  };
> > > > > >
> > > > > >  static DEFINE_IDA(vhost_vdpa_ida); @@ -887,6 +893,10 @@ static
> > > > > > void vhost_vdpa_general_unmap(struct vhost_vdpa *v,  {
> > > > > >         struct vdpa_device *vdpa = v->vdpa;
> > > > > >         const struct vdpa_config_ops *ops = vdpa->config;
> > > > > > +
> > > > > > +       if (v->noiommu_en)
> > > > > > +               return;
> > > > > > +
> > > > > >         if (ops->dma_map) {
> > > > > >                 ops->dma_unmap(vdpa, asid, map->start, map->size);
> > > > > >         } else if (ops->set_map == NULL) { @@ -980,6 +990,9 @@
> > > > > > static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb
> > > *iotlb,
> > > > > >         if (r)
> > > > > >                 return r;
> > > > > >
> > > > > > +       if (v->noiommu_en)
> > > > > > +               goto skip_map;
> > > > > > +
> > > > > >         if (ops->dma_map) {
> > > > > >                 r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
> > > > > >         } else if (ops->set_map) { @@ -995,6 +1008,7 @@ static int
> > > > > > vhost_vdpa_map(struct vhost_vdpa *v,
> > > > > struct vhost_iotlb *iotlb,
> > > > > >                 return r;
> > > > > >         }
> > > > > >
> > > > > > +skip_map:
> > > > > >         if (!vdpa->use_va)
> > > > > >                 atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
> > > > > >
> > > > > > @@ -1298,6 +1312,7 @@ static int vhost_vdpa_alloc_domain(struct
> > > > > vhost_vdpa *v)
> > > > > >         struct vdpa_device *vdpa = v->vdpa;
> > > > > >         const struct vdpa_config_ops *ops = vdpa->config;
> > > > > >         struct device *dma_dev = vdpa_get_dma_dev(vdpa);
> > > > > > +       struct iommu_domain *domain;
> > > > > >         const struct bus_type *bus;
> > > > > >         int ret;
> > > > > >
> > > > > > @@ -1305,6 +1320,14 @@ static int vhost_vdpa_alloc_domain(struct
> > > > > vhost_vdpa *v)
> > > > > >         if (ops->set_map || ops->dma_map)
> > > > > >                 return 0;
> > > > > >
> > > > > > +       domain = iommu_get_domain_for_dev(dma_dev);
> > > > > > +       if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY) &&
> > > > > > +           vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
> > > > > > +               add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > > > > > +               dev_warn(&v->dev, "Adding kernel taint for noiommu
> > > > > > + on
> > > > > device\n");
> > > > > > +               v->noiommu_en = true;
> > > > > > +               return 0;
> > > > > > +       }
> > > > > >         bus = dma_dev->bus;
> > > > > >         if (!bus)
> > > > > >                 return -EFAULT;
> > > > > > --
> > > > > > 2.25.1
> > > > > >
> > > >
> >
>
Michael S. Tsirkin July 17, 2024, 9:50 a.m. UTC | #10
On Thu, May 30, 2024 at 03:48:23PM +0530, Srujana Challa wrote:
> This commit introduces support for an UNSAFE, no-IOMMU mode in the
> vhost-vdpa driver. When enabled, this mode provides no device isolation,
> no DMA translation, no host kernel protection, and cannot be used for
> device assignment to virtual machines. It requires RAWIO permissions
> and will taint the kernel.
> This mode requires enabling the "enable_vhost_vdpa_unsafe_noiommu_mode"
> option on the vhost-vdpa driver. This mode would be useful to get
> better performance on specifice low end machines and can be leveraged
> by embedded platforms where applications run in controlled environment.
> 
> Signed-off-by: Srujana Challa <schalla@marvell.com>

Thought hard about that.
I think given vfio supports this, we can do that too, and
the extension is small.

However, it looks like setting this parameter will automatically
change the behaviour for existing userspace when IOMMU_DOMAIN_IDENTITY
is set.

I suggest a new domain type for use just for this purpose.  This way if
host has an iommu, then the same kernel can run both VMs with
isolation and unsafe embedded apps without.

> ---
>  drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
>  1 file changed, 23 insertions(+)
> 
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index bc4a51e4638b..d071c30125aa 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -36,6 +36,11 @@ enum {
>  
>  #define VHOST_VDPA_IOTLB_BUCKETS 16
>  
> +bool vhost_vdpa_noiommu;
> +module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
> +		   vhost_vdpa_noiommu, bool, 0644);
> +MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
> +
>  struct vhost_vdpa_as {
>  	struct hlist_node hash_link;
>  	struct vhost_iotlb iotlb;
> @@ -60,6 +65,7 @@ struct vhost_vdpa {
>  	struct vdpa_iova_range range;
>  	u32 batch_asid;
>  	bool suspended;
> +	bool noiommu_en;
>  };
>  
>  static DEFINE_IDA(vhost_vdpa_ida);
> @@ -887,6 +893,10 @@ static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,
>  {
>  	struct vdpa_device *vdpa = v->vdpa;
>  	const struct vdpa_config_ops *ops = vdpa->config;
> +
> +	if (v->noiommu_en)
> +		return;
> +
>  	if (ops->dma_map) {
>  		ops->dma_unmap(vdpa, asid, map->start, map->size);
>  	} else if (ops->set_map == NULL) {
> @@ -980,6 +990,9 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
>  	if (r)
>  		return r;
>  
> +	if (v->noiommu_en)
> +		goto skip_map;
> +
>  	if (ops->dma_map) {
>  		r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
>  	} else if (ops->set_map) {
> @@ -995,6 +1008,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
>  		return r;
>  	}
>  
> +skip_map:
>  	if (!vdpa->use_va)
>  		atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
>  
> @@ -1298,6 +1312,7 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
>  	struct vdpa_device *vdpa = v->vdpa;
>  	const struct vdpa_config_ops *ops = vdpa->config;
>  	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
> +	struct iommu_domain *domain;
>  	const struct bus_type *bus;
>  	int ret;
>  
> @@ -1305,6 +1320,14 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
>  	if (ops->set_map || ops->dma_map)
>  		return 0;
>  
> +	domain = iommu_get_domain_for_dev(dma_dev);
> +	if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY) &&
> +	    vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {

So if userspace does not have CAP_SYS_RAWIO instead of failing
with a permission error the functionality changes silently?
That's confusing, I think.


> +		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> +		dev_warn(&v->dev, "Adding kernel taint for noiommu on device\n");
> +		v->noiommu_en = true;
> +		return 0;
> +	}
>  	bus = dma_dev->bus;
>  	if (!bus)
>  		return -EFAULT;
> -- 
> 2.25.1
Srujana Challa July 19, 2024, 3:39 p.m. UTC | #11
> On Thu, May 30, 2024 at 03:48:23PM +0530, Srujana Challa wrote:
> > This commit introduces support for an UNSAFE, no-IOMMU mode in the
> > vhost-vdpa driver. When enabled, this mode provides no device
> > isolation, no DMA translation, no host kernel protection, and cannot
> > be used for device assignment to virtual machines. It requires RAWIO
> > permissions and will taint the kernel.
> > This mode requires enabling the
> "enable_vhost_vdpa_unsafe_noiommu_mode"
> > option on the vhost-vdpa driver. This mode would be useful to get
> > better performance on specifice low end machines and can be leveraged
> > by embedded platforms where applications run in controlled environment.
> >
> > Signed-off-by: Srujana Challa <schalla@marvell.com>
> 
> Thought hard about that.
> I think given vfio supports this, we can do that too, and the extension is small.
> 
> However, it looks like setting this parameter will automatically change the
> behaviour for existing userspace when IOMMU_DOMAIN_IDENTITY is set.
> 
> I suggest a new domain type for use just for this purpose.  This way if host has
> an iommu, then the same kernel can run both VMs with isolation and unsafe
> embedded apps without.
Could you provide further details on this concept? What criteria would determine
the configuration of the new domain type? Would this require a boot parameter
similar to IOMMU_DOMAIN_IDENTITY, such as iommu.passthrough=1 or iommu.pt?
> 
> > ---
> >  drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
> >  1 file changed, 23 insertions(+)
> >
> > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index
> > bc4a51e4638b..d071c30125aa 100644
> > --- a/drivers/vhost/vdpa.c
> > +++ b/drivers/vhost/vdpa.c
> > @@ -36,6 +36,11 @@ enum {
> >
> >  #define VHOST_VDPA_IOTLB_BUCKETS 16
> >
> > +bool vhost_vdpa_noiommu;
> > +module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
> > +		   vhost_vdpa_noiommu, bool, 0644);
> > +MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode,
> "Enable
> > +UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no
> > +DMA translation, no host kernel protection, cannot be used for device
> > +assignment to virtual machines, requires RAWIO permissions, and will
> > +taint the kernel.  If you do not know what this is for, step away.
> > +(default: false)");
> > +
> >  struct vhost_vdpa_as {
> >  	struct hlist_node hash_link;
> >  	struct vhost_iotlb iotlb;
> > @@ -60,6 +65,7 @@ struct vhost_vdpa {
> >  	struct vdpa_iova_range range;
> >  	u32 batch_asid;
> >  	bool suspended;
> > +	bool noiommu_en;
> >  };
> >
> >  static DEFINE_IDA(vhost_vdpa_ida);
> > @@ -887,6 +893,10 @@ static void vhost_vdpa_general_unmap(struct
> > vhost_vdpa *v,  {
> >  	struct vdpa_device *vdpa = v->vdpa;
> >  	const struct vdpa_config_ops *ops = vdpa->config;
> > +
> > +	if (v->noiommu_en)
> > +		return;
> > +
> >  	if (ops->dma_map) {
> >  		ops->dma_unmap(vdpa, asid, map->start, map->size);
> >  	} else if (ops->set_map == NULL) {
> > @@ -980,6 +990,9 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
> struct vhost_iotlb *iotlb,
> >  	if (r)
> >  		return r;
> >
> > +	if (v->noiommu_en)
> > +		goto skip_map;
> > +
> >  	if (ops->dma_map) {
> >  		r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
> >  	} else if (ops->set_map) {
> > @@ -995,6 +1008,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
> struct vhost_iotlb *iotlb,
> >  		return r;
> >  	}
> >
> > +skip_map:
> >  	if (!vdpa->use_va)
> >  		atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
> >
> > @@ -1298,6 +1312,7 @@ static int vhost_vdpa_alloc_domain(struct
> vhost_vdpa *v)
> >  	struct vdpa_device *vdpa = v->vdpa;
> >  	const struct vdpa_config_ops *ops = vdpa->config;
> >  	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
> > +	struct iommu_domain *domain;
> >  	const struct bus_type *bus;
> >  	int ret;
> >
> > @@ -1305,6 +1320,14 @@ static int vhost_vdpa_alloc_domain(struct
> vhost_vdpa *v)
> >  	if (ops->set_map || ops->dma_map)
> >  		return 0;
> >
> > +	domain = iommu_get_domain_for_dev(dma_dev);
> > +	if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY) &&
> > +	    vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
> 
> So if userspace does not have CAP_SYS_RAWIO instead of failing with a
> permission error the functionality changes silently?
> That's confusing, I think.
Yes, you are correct. I will modify the code to return error when vhost_vdpa_noiommu
is set and CAP_SYS_RAWIO is not set.

Thanks.
> 
> 
> > +		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > +		dev_warn(&v->dev, "Adding kernel taint for noiommu on
> device\n");
> > +		v->noiommu_en = true;
> > +		return 0;
> > +	}
> >  	bus = dma_dev->bus;
> >  	if (!bus)
> >  		return -EFAULT;
> > --
> > 2.25.1
Jason Wang July 22, 2024, 7:22 a.m. UTC | #12
On Fri, Jul 19, 2024 at 11:40 PM Srujana Challa <schalla@marvell.com> wrote:
>
> > On Thu, May 30, 2024 at 03:48:23PM +0530, Srujana Challa wrote:
> > > This commit introduces support for an UNSAFE, no-IOMMU mode in the
> > > vhost-vdpa driver. When enabled, this mode provides no device
> > > isolation, no DMA translation, no host kernel protection, and cannot
> > > be used for device assignment to virtual machines. It requires RAWIO
> > > permissions and will taint the kernel.
> > > This mode requires enabling the
> > "enable_vhost_vdpa_unsafe_noiommu_mode"
> > > option on the vhost-vdpa driver. This mode would be useful to get
> > > better performance on specifice low end machines and can be leveraged
> > > by embedded platforms where applications run in controlled environment.
> > >
> > > Signed-off-by: Srujana Challa <schalla@marvell.com>
> >
> > Thought hard about that.
> > I think given vfio supports this, we can do that too, and the extension is small.
> >
> > However, it looks like setting this parameter will automatically change the
> > behaviour for existing userspace when IOMMU_DOMAIN_IDENTITY is set.
> >
> > I suggest a new domain type for use just for this purpose.

I'm not sure I get this, we want to bypass IOMMU, so it doesn't even
have a doman.

> This way if host has
> > an iommu, then the same kernel can run both VMs with isolation and unsafe
> > embedded apps without.
> Could you provide further details on this concept? What criteria would determine
> the configuration of the new domain type? Would this require a boot parameter
> similar to IOMMU_DOMAIN_IDENTITY, such as iommu.passthrough=1 or iommu.pt?

Thanks

> >
> > > ---
> > >  drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
> > >  1 file changed, 23 insertions(+)
> > >
> > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index
> > > bc4a51e4638b..d071c30125aa 100644
> > > --- a/drivers/vhost/vdpa.c
> > > +++ b/drivers/vhost/vdpa.c
> > > @@ -36,6 +36,11 @@ enum {
> > >
> > >  #define VHOST_VDPA_IOTLB_BUCKETS 16
> > >
> > > +bool vhost_vdpa_noiommu;
> > > +module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > +              vhost_vdpa_noiommu, bool, 0644);
> > > +MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode,
> > "Enable
> > > +UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no
> > > +DMA translation, no host kernel protection, cannot be used for device
> > > +assignment to virtual machines, requires RAWIO permissions, and will
> > > +taint the kernel.  If you do not know what this is for, step away.
> > > +(default: false)");
> > > +
> > >  struct vhost_vdpa_as {
> > >     struct hlist_node hash_link;
> > >     struct vhost_iotlb iotlb;
> > > @@ -60,6 +65,7 @@ struct vhost_vdpa {
> > >     struct vdpa_iova_range range;
> > >     u32 batch_asid;
> > >     bool suspended;
> > > +   bool noiommu_en;
> > >  };
> > >
> > >  static DEFINE_IDA(vhost_vdpa_ida);
> > > @@ -887,6 +893,10 @@ static void vhost_vdpa_general_unmap(struct
> > > vhost_vdpa *v,  {
> > >     struct vdpa_device *vdpa = v->vdpa;
> > >     const struct vdpa_config_ops *ops = vdpa->config;
> > > +
> > > +   if (v->noiommu_en)
> > > +           return;
> > > +
> > >     if (ops->dma_map) {
> > >             ops->dma_unmap(vdpa, asid, map->start, map->size);
> > >     } else if (ops->set_map == NULL) {
> > > @@ -980,6 +990,9 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
> > struct vhost_iotlb *iotlb,
> > >     if (r)
> > >             return r;
> > >
> > > +   if (v->noiommu_en)
> > > +           goto skip_map;
> > > +
> > >     if (ops->dma_map) {
> > >             r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
> > >     } else if (ops->set_map) {
> > > @@ -995,6 +1008,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
> > struct vhost_iotlb *iotlb,
> > >             return r;
> > >     }
> > >
> > > +skip_map:
> > >     if (!vdpa->use_va)
> > >             atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
> > >
> > > @@ -1298,6 +1312,7 @@ static int vhost_vdpa_alloc_domain(struct
> > vhost_vdpa *v)
> > >     struct vdpa_device *vdpa = v->vdpa;
> > >     const struct vdpa_config_ops *ops = vdpa->config;
> > >     struct device *dma_dev = vdpa_get_dma_dev(vdpa);
> > > +   struct iommu_domain *domain;
> > >     const struct bus_type *bus;
> > >     int ret;
> > >
> > > @@ -1305,6 +1320,14 @@ static int vhost_vdpa_alloc_domain(struct
> > vhost_vdpa *v)
> > >     if (ops->set_map || ops->dma_map)
> > >             return 0;
> > >
> > > +   domain = iommu_get_domain_for_dev(dma_dev);
> > > +   if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY) &&
> > > +       vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
> >
> > So if userspace does not have CAP_SYS_RAWIO instead of failing with a
> > permission error the functionality changes silently?
> > That's confusing, I think.
> Yes, you are correct. I will modify the code to return error when vhost_vdpa_noiommu
> is set and CAP_SYS_RAWIO is not set.
>
> Thanks.
> >
> >
> > > +           add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > > +           dev_warn(&v->dev, "Adding kernel taint for noiommu on
> > device\n");
> > > +           v->noiommu_en = true;
> > > +           return 0;
> > > +   }
> > >     bus = dma_dev->bus;
> > >     if (!bus)
> > >             return -EFAULT;
> > > --
> > > 2.25.1
>
Michael S. Tsirkin July 22, 2024, 7:50 a.m. UTC | #13
On Mon, Jul 22, 2024 at 03:22:22PM +0800, Jason Wang wrote:
> On Fri, Jul 19, 2024 at 11:40 PM Srujana Challa <schalla@marvell.com> wrote:
> >
> > > On Thu, May 30, 2024 at 03:48:23PM +0530, Srujana Challa wrote:
> > > > This commit introduces support for an UNSAFE, no-IOMMU mode in the
> > > > vhost-vdpa driver. When enabled, this mode provides no device
> > > > isolation, no DMA translation, no host kernel protection, and cannot
> > > > be used for device assignment to virtual machines. It requires RAWIO
> > > > permissions and will taint the kernel.
> > > > This mode requires enabling the
> > > "enable_vhost_vdpa_unsafe_noiommu_mode"
> > > > option on the vhost-vdpa driver. This mode would be useful to get
> > > > better performance on specifice low end machines and can be leveraged
> > > > by embedded platforms where applications run in controlled environment.
> > > >
> > > > Signed-off-by: Srujana Challa <schalla@marvell.com>
> > >
> > > Thought hard about that.
> > > I think given vfio supports this, we can do that too, and the extension is small.
> > >
> > > However, it looks like setting this parameter will automatically change the
> > > behaviour for existing userspace when IOMMU_DOMAIN_IDENTITY is set.
> > >
> > > I suggest a new domain type for use just for this purpose.
> 
> I'm not sure I get this, we want to bypass IOMMU, so it doesn't even
> have a doman.

yes, a fake one. or come up with some other flag that userspace
will set.

> > This way if host has
> > > an iommu, then the same kernel can run both VMs with isolation and unsafe
> > > embedded apps without.
> > Could you provide further details on this concept? What criteria would determine
> > the configuration of the new domain type? Would this require a boot parameter
> > similar to IOMMU_DOMAIN_IDENTITY, such as iommu.passthrough=1 or iommu.pt?
> 
> Thanks
> 
> > >
> > > > ---
> > > >  drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
> > > >  1 file changed, 23 insertions(+)
> > > >
> > > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index
> > > > bc4a51e4638b..d071c30125aa 100644
> > > > --- a/drivers/vhost/vdpa.c
> > > > +++ b/drivers/vhost/vdpa.c
> > > > @@ -36,6 +36,11 @@ enum {
> > > >
> > > >  #define VHOST_VDPA_IOTLB_BUCKETS 16
> > > >
> > > > +bool vhost_vdpa_noiommu;
> > > > +module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > > +              vhost_vdpa_noiommu, bool, 0644);
> > > > +MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > "Enable
> > > > +UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no
> > > > +DMA translation, no host kernel protection, cannot be used for device
> > > > +assignment to virtual machines, requires RAWIO permissions, and will
> > > > +taint the kernel.  If you do not know what this is for, step away.
> > > > +(default: false)");
> > > > +
> > > >  struct vhost_vdpa_as {
> > > >     struct hlist_node hash_link;
> > > >     struct vhost_iotlb iotlb;
> > > > @@ -60,6 +65,7 @@ struct vhost_vdpa {
> > > >     struct vdpa_iova_range range;
> > > >     u32 batch_asid;
> > > >     bool suspended;
> > > > +   bool noiommu_en;
> > > >  };
> > > >
> > > >  static DEFINE_IDA(vhost_vdpa_ida);
> > > > @@ -887,6 +893,10 @@ static void vhost_vdpa_general_unmap(struct
> > > > vhost_vdpa *v,  {
> > > >     struct vdpa_device *vdpa = v->vdpa;
> > > >     const struct vdpa_config_ops *ops = vdpa->config;
> > > > +
> > > > +   if (v->noiommu_en)
> > > > +           return;
> > > > +
> > > >     if (ops->dma_map) {
> > > >             ops->dma_unmap(vdpa, asid, map->start, map->size);
> > > >     } else if (ops->set_map == NULL) {
> > > > @@ -980,6 +990,9 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
> > > struct vhost_iotlb *iotlb,
> > > >     if (r)
> > > >             return r;
> > > >
> > > > +   if (v->noiommu_en)
> > > > +           goto skip_map;
> > > > +
> > > >     if (ops->dma_map) {
> > > >             r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
> > > >     } else if (ops->set_map) {
> > > > @@ -995,6 +1008,7 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
> > > struct vhost_iotlb *iotlb,
> > > >             return r;
> > > >     }
> > > >
> > > > +skip_map:
> > > >     if (!vdpa->use_va)
> > > >             atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
> > > >
> > > > @@ -1298,6 +1312,7 @@ static int vhost_vdpa_alloc_domain(struct
> > > vhost_vdpa *v)
> > > >     struct vdpa_device *vdpa = v->vdpa;
> > > >     const struct vdpa_config_ops *ops = vdpa->config;
> > > >     struct device *dma_dev = vdpa_get_dma_dev(vdpa);
> > > > +   struct iommu_domain *domain;
> > > >     const struct bus_type *bus;
> > > >     int ret;
> > > >
> > > > @@ -1305,6 +1320,14 @@ static int vhost_vdpa_alloc_domain(struct
> > > vhost_vdpa *v)
> > > >     if (ops->set_map || ops->dma_map)
> > > >             return 0;
> > > >
> > > > +   domain = iommu_get_domain_for_dev(dma_dev);
> > > > +   if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY) &&
> > > > +       vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
> > >
> > > So if userspace does not have CAP_SYS_RAWIO instead of failing with a
> > > permission error the functionality changes silently?
> > > That's confusing, I think.
> > Yes, you are correct. I will modify the code to return error when vhost_vdpa_noiommu
> > is set and CAP_SYS_RAWIO is not set.
> >
> > Thanks.
> > >
> > >
> > > > +           add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > > > +           dev_warn(&v->dev, "Adding kernel taint for noiommu on
> > > device\n");
> > > > +           v->noiommu_en = true;
> > > > +           return 0;
> > > > +   }
> > > >     bus = dma_dev->bus;
> > > >     if (!bus)
> > > >             return -EFAULT;
> > > > --
> > > > 2.25.1
> >
Srujana Challa July 23, 2024, 7:10 a.m. UTC | #14
> On Mon, Jul 22, 2024 at 03:22:22PM +0800, Jason Wang wrote:
> > On Fri, Jul 19, 2024 at 11:40 PM Srujana Challa <schalla@marvell.com>
> wrote:
> > >
> > > > On Thu, May 30, 2024 at 03:48:23PM +0530, Srujana Challa wrote:
> > > > > This commit introduces support for an UNSAFE, no-IOMMU mode in
> > > > > the vhost-vdpa driver. When enabled, this mode provides no
> > > > > device isolation, no DMA translation, no host kernel protection,
> > > > > and cannot be used for device assignment to virtual machines. It
> > > > > requires RAWIO permissions and will taint the kernel.
> > > > > This mode requires enabling the
> > > > "enable_vhost_vdpa_unsafe_noiommu_mode"
> > > > > option on the vhost-vdpa driver. This mode would be useful to
> > > > > get better performance on specifice low end machines and can be
> > > > > leveraged by embedded platforms where applications run in controlled
> environment.
> > > > >
> > > > > Signed-off-by: Srujana Challa <schalla@marvell.com>
> > > >
> > > > Thought hard about that.
> > > > I think given vfio supports this, we can do that too, and the extension is
> small.
> > > >
> > > > However, it looks like setting this parameter will automatically
> > > > change the behaviour for existing userspace when
> IOMMU_DOMAIN_IDENTITY is set.
Our initial thought was to support only for no-iommu case, in which domain itself
won't be exist.   So, we can modify the code as below to check for only presence of domain.
I think,  only handling of no-iommu case wouldn't effect the existing userspace.
+   if ((!domain) && vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {

> > > >
> > > > I suggest a new domain type for use just for this purpose.
> >
> > I'm not sure I get this, we want to bypass IOMMU, so it doesn't even
> > have a doman.
> 
> yes, a fake one. or come up with some other flag that userspace will set.
> 
> > > This way if host has
> > > > an iommu, then the same kernel can run both VMs with isolation and
> > > > unsafe embedded apps without.
> > > Could you provide further details on this concept? What criteria
> > > would determine the configuration of the new domain type? Would this
> > > require a boot parameter similar to IOMMU_DOMAIN_IDENTITY, such as
> iommu.passthrough=1 or iommu.pt?
> >
> > Thanks
> >
> > > >
> > > > > ---
> > > > >  drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
> > > > >  1 file changed, 23 insertions(+)
> > > > >
> > > > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index
> > > > > bc4a51e4638b..d071c30125aa 100644
> > > > > --- a/drivers/vhost/vdpa.c
> > > > > +++ b/drivers/vhost/vdpa.c
> > > > > @@ -36,6 +36,11 @@ enum {
> > > > >
> > > > >  #define VHOST_VDPA_IOTLB_BUCKETS 16
> > > > >
> > > > > +bool vhost_vdpa_noiommu;
> > > > > +module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > > > +              vhost_vdpa_noiommu, bool, 0644);
> > > > > +MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > > "Enable
> > > > > +UNSAFE, no-IOMMU mode.  This mode provides no device isolation,
> > > > > +no DMA translation, no host kernel protection, cannot be used
> > > > > +for device assignment to virtual machines, requires RAWIO
> > > > > +permissions, and will taint the kernel.  If you do not know what this is
> for, step away.
> > > > > +(default: false)");
> > > > > +
> > > > >  struct vhost_vdpa_as {
> > > > >     struct hlist_node hash_link;
> > > > >     struct vhost_iotlb iotlb;
> > > > > @@ -60,6 +65,7 @@ struct vhost_vdpa {
> > > > >     struct vdpa_iova_range range;
> > > > >     u32 batch_asid;
> > > > >     bool suspended;
> > > > > +   bool noiommu_en;
> > > > >  };
> > > > >
> > > > >  static DEFINE_IDA(vhost_vdpa_ida); @@ -887,6 +893,10 @@ static
> > > > > void vhost_vdpa_general_unmap(struct vhost_vdpa *v,  {
> > > > >     struct vdpa_device *vdpa = v->vdpa;
> > > > >     const struct vdpa_config_ops *ops = vdpa->config;
> > > > > +
> > > > > +   if (v->noiommu_en)
> > > > > +           return;
> > > > > +
> > > > >     if (ops->dma_map) {
> > > > >             ops->dma_unmap(vdpa, asid, map->start, map->size);
> > > > >     } else if (ops->set_map == NULL) { @@ -980,6 +990,9 @@
> > > > > static int vhost_vdpa_map(struct vhost_vdpa *v,
> > > > struct vhost_iotlb *iotlb,
> > > > >     if (r)
> > > > >             return r;
> > > > >
> > > > > +   if (v->noiommu_en)
> > > > > +           goto skip_map;
> > > > > +
> > > > >     if (ops->dma_map) {
> > > > >             r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
> > > > >     } else if (ops->set_map) {
> > > > > @@ -995,6 +1008,7 @@ static int vhost_vdpa_map(struct vhost_vdpa
> > > > > *v,
> > > > struct vhost_iotlb *iotlb,
> > > > >             return r;
> > > > >     }
> > > > >
> > > > > +skip_map:
> > > > >     if (!vdpa->use_va)
> > > > >             atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
> > > > >
> > > > > @@ -1298,6 +1312,7 @@ static int vhost_vdpa_alloc_domain(struct
> > > > vhost_vdpa *v)
> > > > >     struct vdpa_device *vdpa = v->vdpa;
> > > > >     const struct vdpa_config_ops *ops = vdpa->config;
> > > > >     struct device *dma_dev = vdpa_get_dma_dev(vdpa);
> > > > > +   struct iommu_domain *domain;
> > > > >     const struct bus_type *bus;
> > > > >     int ret;
> > > > >
> > > > > @@ -1305,6 +1320,14 @@ static int vhost_vdpa_alloc_domain(struct
> > > > vhost_vdpa *v)
> > > > >     if (ops->set_map || ops->dma_map)
> > > > >             return 0;
> > > > >
> > > > > +   domain = iommu_get_domain_for_dev(dma_dev);
> > > > > +   if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY) &&
> > > > > +       vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
> > > >
> > > > So if userspace does not have CAP_SYS_RAWIO instead of failing
> > > > with a permission error the functionality changes silently?
> > > > That's confusing, I think.
> > > Yes, you are correct. I will modify the code to return error when
> > > vhost_vdpa_noiommu is set and CAP_SYS_RAWIO is not set.
> > >
> > > Thanks.
> > > >
> > > >
> > > > > +           add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > > > > +           dev_warn(&v->dev, "Adding kernel taint for noiommu
> > > > > + on
> > > > device\n");
> > > > > +           v->noiommu_en = true;
> > > > > +           return 0;
> > > > > +   }
> > > > >     bus = dma_dev->bus;
> > > > >     if (!bus)
> > > > >             return -EFAULT;
> > > > > --
> > > > > 2.25.1
> > >
Michael S. Tsirkin July 23, 2024, 11:04 a.m. UTC | #15
On Tue, Jul 23, 2024 at 07:10:52AM +0000, Srujana Challa wrote:
> > On Mon, Jul 22, 2024 at 03:22:22PM +0800, Jason Wang wrote:
> > > On Fri, Jul 19, 2024 at 11:40 PM Srujana Challa <schalla@marvell.com>
> > wrote:
> > > >
> > > > > On Thu, May 30, 2024 at 03:48:23PM +0530, Srujana Challa wrote:
> > > > > > This commit introduces support for an UNSAFE, no-IOMMU mode in
> > > > > > the vhost-vdpa driver. When enabled, this mode provides no
> > > > > > device isolation, no DMA translation, no host kernel protection,
> > > > > > and cannot be used for device assignment to virtual machines. It
> > > > > > requires RAWIO permissions and will taint the kernel.
> > > > > > This mode requires enabling the
> > > > > "enable_vhost_vdpa_unsafe_noiommu_mode"
> > > > > > option on the vhost-vdpa driver. This mode would be useful to
> > > > > > get better performance on specifice low end machines and can be
> > > > > > leveraged by embedded platforms where applications run in controlled
> > environment.
> > > > > >
> > > > > > Signed-off-by: Srujana Challa <schalla@marvell.com>
> > > > >
> > > > > Thought hard about that.
> > > > > I think given vfio supports this, we can do that too, and the extension is
> > small.
> > > > >
> > > > > However, it looks like setting this parameter will automatically
> > > > > change the behaviour for existing userspace when
> > IOMMU_DOMAIN_IDENTITY is set.
> Our initial thought was to support only for no-iommu case, in which domain itself
> won't be exist.   So, we can modify the code as below to check for only presence of domain.
> I think,  only handling of no-iommu case wouldn't effect the existing userspace.
> +   if ((!domain) && vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {

I would prefer some explicit action.
Just not specifying a domain is something I'd like
to keep reserved for something of more wide usefulness.


> > > > >
> > > > > I suggest a new domain type for use just for this purpose.
> > >
> > > I'm not sure I get this, we want to bypass IOMMU, so it doesn't even
> > > have a doman.
> > 
> > yes, a fake one. or come up with some other flag that userspace will set.
> > 
> > > > This way if host has
> > > > > an iommu, then the same kernel can run both VMs with isolation and
> > > > > unsafe embedded apps without.
> > > > Could you provide further details on this concept? What criteria
> > > > would determine the configuration of the new domain type? Would this
> > > > require a boot parameter similar to IOMMU_DOMAIN_IDENTITY, such as
> > iommu.passthrough=1 or iommu.pt?
> > >
> > > Thanks
> > >
> > > > >
> > > > > > ---
> > > > > >  drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
> > > > > >  1 file changed, 23 insertions(+)
> > > > > >
> > > > > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index
> > > > > > bc4a51e4638b..d071c30125aa 100644
> > > > > > --- a/drivers/vhost/vdpa.c
> > > > > > +++ b/drivers/vhost/vdpa.c
> > > > > > @@ -36,6 +36,11 @@ enum {
> > > > > >
> > > > > >  #define VHOST_VDPA_IOTLB_BUCKETS 16
> > > > > >
> > > > > > +bool vhost_vdpa_noiommu;
> > > > > > +module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > > > > +              vhost_vdpa_noiommu, bool, 0644);
> > > > > > +MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > > > "Enable
> > > > > > +UNSAFE, no-IOMMU mode.  This mode provides no device isolation,
> > > > > > +no DMA translation, no host kernel protection, cannot be used
> > > > > > +for device assignment to virtual machines, requires RAWIO
> > > > > > +permissions, and will taint the kernel.  If you do not know what this is
> > for, step away.
> > > > > > +(default: false)");
> > > > > > +
> > > > > >  struct vhost_vdpa_as {
> > > > > >     struct hlist_node hash_link;
> > > > > >     struct vhost_iotlb iotlb;
> > > > > > @@ -60,6 +65,7 @@ struct vhost_vdpa {
> > > > > >     struct vdpa_iova_range range;
> > > > > >     u32 batch_asid;
> > > > > >     bool suspended;
> > > > > > +   bool noiommu_en;
> > > > > >  };
> > > > > >
> > > > > >  static DEFINE_IDA(vhost_vdpa_ida); @@ -887,6 +893,10 @@ static
> > > > > > void vhost_vdpa_general_unmap(struct vhost_vdpa *v,  {
> > > > > >     struct vdpa_device *vdpa = v->vdpa;
> > > > > >     const struct vdpa_config_ops *ops = vdpa->config;
> > > > > > +
> > > > > > +   if (v->noiommu_en)
> > > > > > +           return;
> > > > > > +
> > > > > >     if (ops->dma_map) {
> > > > > >             ops->dma_unmap(vdpa, asid, map->start, map->size);
> > > > > >     } else if (ops->set_map == NULL) { @@ -980,6 +990,9 @@
> > > > > > static int vhost_vdpa_map(struct vhost_vdpa *v,
> > > > > struct vhost_iotlb *iotlb,
> > > > > >     if (r)
> > > > > >             return r;
> > > > > >
> > > > > > +   if (v->noiommu_en)
> > > > > > +           goto skip_map;
> > > > > > +
> > > > > >     if (ops->dma_map) {
> > > > > >             r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
> > > > > >     } else if (ops->set_map) {
> > > > > > @@ -995,6 +1008,7 @@ static int vhost_vdpa_map(struct vhost_vdpa
> > > > > > *v,
> > > > > struct vhost_iotlb *iotlb,
> > > > > >             return r;
> > > > > >     }
> > > > > >
> > > > > > +skip_map:
> > > > > >     if (!vdpa->use_va)
> > > > > >             atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
> > > > > >
> > > > > > @@ -1298,6 +1312,7 @@ static int vhost_vdpa_alloc_domain(struct
> > > > > vhost_vdpa *v)
> > > > > >     struct vdpa_device *vdpa = v->vdpa;
> > > > > >     const struct vdpa_config_ops *ops = vdpa->config;
> > > > > >     struct device *dma_dev = vdpa_get_dma_dev(vdpa);
> > > > > > +   struct iommu_domain *domain;
> > > > > >     const struct bus_type *bus;
> > > > > >     int ret;
> > > > > >
> > > > > > @@ -1305,6 +1320,14 @@ static int vhost_vdpa_alloc_domain(struct
> > > > > vhost_vdpa *v)
> > > > > >     if (ops->set_map || ops->dma_map)
> > > > > >             return 0;
> > > > > >
> > > > > > +   domain = iommu_get_domain_for_dev(dma_dev);
> > > > > > +   if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY) &&
> > > > > > +       vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
> > > > >
> > > > > So if userspace does not have CAP_SYS_RAWIO instead of failing
> > > > > with a permission error the functionality changes silently?
> > > > > That's confusing, I think.
> > > > Yes, you are correct. I will modify the code to return error when
> > > > vhost_vdpa_noiommu is set and CAP_SYS_RAWIO is not set.
> > > >
> > > > Thanks.
> > > > >
> > > > >
> > > > > > +           add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > > > > > +           dev_warn(&v->dev, "Adding kernel taint for noiommu
> > > > > > + on
> > > > > device\n");
> > > > > > +           v->noiommu_en = true;
> > > > > > +           return 0;
> > > > > > +   }
> > > > > >     bus = dma_dev->bus;
> > > > > >     if (!bus)
> > > > > >             return -EFAULT;
> > > > > > --
> > > > > > 2.25.1
> > > >
>
Srujana Challa Aug. 6, 2024, 11:47 a.m. UTC | #16
> On Tue, Jul 23, 2024 at 07:10:52AM +0000, Srujana Challa wrote:
> > > On Mon, Jul 22, 2024 at 03:22:22PM +0800, Jason Wang wrote:
> > > > On Fri, Jul 19, 2024 at 11:40 PM Srujana Challa
> > > > <schalla@marvell.com>
> > > wrote:
> > > > >
> > > > > > On Thu, May 30, 2024 at 03:48:23PM +0530, Srujana Challa wrote:
> > > > > > > This commit introduces support for an UNSAFE, no-IOMMU mode
> > > > > > > in the vhost-vdpa driver. When enabled, this mode provides
> > > > > > > no device isolation, no DMA translation, no host kernel
> > > > > > > protection, and cannot be used for device assignment to
> > > > > > > virtual machines. It requires RAWIO permissions and will taint the
> kernel.
> > > > > > > This mode requires enabling the
> > > > > > "enable_vhost_vdpa_unsafe_noiommu_mode"
> > > > > > > option on the vhost-vdpa driver. This mode would be useful
> > > > > > > to get better performance on specifice low end machines and
> > > > > > > can be leveraged by embedded platforms where applications
> > > > > > > run in controlled
> > > environment.
> > > > > > >
> > > > > > > Signed-off-by: Srujana Challa <schalla@marvell.com>
> > > > > >
> > > > > > Thought hard about that.
> > > > > > I think given vfio supports this, we can do that too, and the
> > > > > > extension is
> > > small.
> > > > > >
> > > > > > However, it looks like setting this parameter will
> > > > > > automatically change the behaviour for existing userspace when
> > > IOMMU_DOMAIN_IDENTITY is set.
> > Our initial thought was to support only for no-iommu case, in which domain
> itself
> > won't be exist.   So, we can modify the code as below to check for only
> presence of domain.
> > I think,  only handling of no-iommu case wouldn't effect the existing
> userspace.
> > +   if ((!domain) && vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
> 
> I would prefer some explicit action.
> Just not specifying a domain is something I'd like to keep reserved for
> something of more wide usefulness.
Can we introduce a new feature like VHOST_BACKEND_F_NOIOMMU in 
VHOST_VDPA_BACKEND_FEATURES?  We can have below logic based on
this feature bit negotiation.
Thanks.
> 
> 
> > > > > >
> > > > > > I suggest a new domain type for use just for this purpose.
> > > >
> > > > I'm not sure I get this, we want to bypass IOMMU, so it doesn't
> > > > even have a doman.
> > >
> > > yes, a fake one. or come up with some other flag that userspace will set.
> > >
> > > > > This way if host has
> > > > > > an iommu, then the same kernel can run both VMs with isolation
> > > > > > and unsafe embedded apps without.
> > > > > Could you provide further details on this concept? What criteria
> > > > > would determine the configuration of the new domain type? Would
> > > > > this require a boot parameter similar to IOMMU_DOMAIN_IDENTITY,
> > > > > such as
> > > iommu.passthrough=1 or iommu.pt?
> > > >
> > > > Thanks
> > > >
> > > > > >
> > > > > > > ---
> > > > > > >  drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
> > > > > > >  1 file changed, 23 insertions(+)
> > > > > > >
> > > > > > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> > > > > > > index bc4a51e4638b..d071c30125aa 100644
> > > > > > > --- a/drivers/vhost/vdpa.c
> > > > > > > +++ b/drivers/vhost/vdpa.c
> > > > > > > @@ -36,6 +36,11 @@ enum {
> > > > > > >
> > > > > > >  #define VHOST_VDPA_IOTLB_BUCKETS 16
> > > > > > >
> > > > > > > +bool vhost_vdpa_noiommu;
> > > > > > >
> +module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > > > > > +              vhost_vdpa_noiommu, bool, 0644);
> > > > > > >
> +MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > > > > "Enable
> > > > > > > +UNSAFE, no-IOMMU mode.  This mode provides no device
> > > > > > > +isolation, no DMA translation, no host kernel protection,
> > > > > > > +cannot be used for device assignment to virtual machines,
> > > > > > > +requires RAWIO permissions, and will taint the kernel.  If
> > > > > > > +you do not know what this is
> > > for, step away.
> > > > > > > +(default: false)");
> > > > > > > +
> > > > > > >  struct vhost_vdpa_as {
> > > > > > >     struct hlist_node hash_link;
> > > > > > >     struct vhost_iotlb iotlb; @@ -60,6 +65,7 @@ struct
> > > > > > > vhost_vdpa {
> > > > > > >     struct vdpa_iova_range range;
> > > > > > >     u32 batch_asid;
> > > > > > >     bool suspended;
> > > > > > > +   bool noiommu_en;
> > > > > > >  };
> > > > > > >
> > > > > > >  static DEFINE_IDA(vhost_vdpa_ida); @@ -887,6 +893,10 @@
> > > > > > > static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,  {
> > > > > > >     struct vdpa_device *vdpa = v->vdpa;
> > > > > > >     const struct vdpa_config_ops *ops = vdpa->config;
> > > > > > > +
> > > > > > > +   if (v->noiommu_en)
> > > > > > > +           return;
> > > > > > > +
> > > > > > >     if (ops->dma_map) {
> > > > > > >             ops->dma_unmap(vdpa, asid, map->start, map->size);
> > > > > > >     } else if (ops->set_map == NULL) { @@ -980,6 +990,9 @@
> > > > > > > static int vhost_vdpa_map(struct vhost_vdpa *v,
> > > > > > struct vhost_iotlb *iotlb,
> > > > > > >     if (r)
> > > > > > >             return r;
> > > > > > >
> > > > > > > +   if (v->noiommu_en)
> > > > > > > +           goto skip_map;
> > > > > > > +
> > > > > > >     if (ops->dma_map) {
> > > > > > >             r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
> > > > > > >     } else if (ops->set_map) { @@ -995,6 +1008,7 @@ static
> > > > > > > int vhost_vdpa_map(struct vhost_vdpa *v,
> > > > > > struct vhost_iotlb *iotlb,
> > > > > > >             return r;
> > > > > > >     }
> > > > > > >
> > > > > > > +skip_map:
> > > > > > >     if (!vdpa->use_va)
> > > > > > >             atomic64_add(PFN_DOWN(size),
> > > > > > > &dev->mm->pinned_vm);
> > > > > > >
> > > > > > > @@ -1298,6 +1312,7 @@ static int
> > > > > > > vhost_vdpa_alloc_domain(struct
> > > > > > vhost_vdpa *v)
> > > > > > >     struct vdpa_device *vdpa = v->vdpa;
> > > > > > >     const struct vdpa_config_ops *ops = vdpa->config;
> > > > > > >     struct device *dma_dev = vdpa_get_dma_dev(vdpa);
> > > > > > > +   struct iommu_domain *domain;
> > > > > > >     const struct bus_type *bus;
> > > > > > >     int ret;
> > > > > > >
> > > > > > > @@ -1305,6 +1320,14 @@ static int
> > > > > > > vhost_vdpa_alloc_domain(struct
> > > > > > vhost_vdpa *v)
> > > > > > >     if (ops->set_map || ops->dma_map)
> > > > > > >             return 0;
> > > > > > >
> > > > > > > +   domain = iommu_get_domain_for_dev(dma_dev);
> > > > > > > +   if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
> &&
> > > > > > > +       vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
> > > > > >
> > > > > > So if userspace does not have CAP_SYS_RAWIO instead of failing
> > > > > > with a permission error the functionality changes silently?
> > > > > > That's confusing, I think.
> > > > > Yes, you are correct. I will modify the code to return error
> > > > > when vhost_vdpa_noiommu is set and CAP_SYS_RAWIO is not set.
> > > > >
> > > > > Thanks.
> > > > > >
> > > > > >
> > > > > > > +           add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > > > > > > +           dev_warn(&v->dev, "Adding kernel taint for
> > > > > > > + noiommu on
> > > > > > device\n");
> > > > > > > +           v->noiommu_en = true;
> > > > > > > +           return 0;
> > > > > > > +   }
> > > > > > >     bus = dma_dev->bus;
> > > > > > >     if (!bus)
> > > > > > >             return -EFAULT;
> > > > > > > --
> > > > > > > 2.25.1
> > > > >
> >
Srujana Challa Aug. 28, 2024, 9:08 a.m. UTC | #17
> Subject: RE: [EXTERNAL] Re: [PATCH] vdpa: Add support for no-IOMMU mode
> 
> > On Tue, Jul 23, 2024 at 07:10:52AM +0000, Srujana Challa wrote:
> > > > On Mon, Jul 22, 2024 at 03:22:22PM +0800, Jason Wang wrote:
> > > > > On Fri, Jul 19, 2024 at 11:40 PM Srujana Challa
> > > > > <schalla@marvell.com>
> > > > wrote:
> > > > > >
> > > > > > > On Thu, May 30, 2024 at 03:48:23PM +0530, Srujana Challa wrote:
> > > > > > > > This commit introduces support for an UNSAFE, no-IOMMU
> > > > > > > > mode in the vhost-vdpa driver. When enabled, this mode
> > > > > > > > provides no device isolation, no DMA translation, no host
> > > > > > > > kernel protection, and cannot be used for device
> > > > > > > > assignment to virtual machines. It requires RAWIO
> > > > > > > > permissions and will taint the
> > kernel.
> > > > > > > > This mode requires enabling the
> > > > > > > "enable_vhost_vdpa_unsafe_noiommu_mode"
> > > > > > > > option on the vhost-vdpa driver. This mode would be useful
> > > > > > > > to get better performance on specifice low end machines
> > > > > > > > and can be leveraged by embedded platforms where
> > > > > > > > applications run in controlled
> > > > environment.
> > > > > > > >
> > > > > > > > Signed-off-by: Srujana Challa <schalla@marvell.com>
> > > > > > >
> > > > > > > Thought hard about that.
> > > > > > > I think given vfio supports this, we can do that too, and
> > > > > > > the extension is
> > > > small.
> > > > > > >
> > > > > > > However, it looks like setting this parameter will
> > > > > > > automatically change the behaviour for existing userspace
> > > > > > > when
> > > > IOMMU_DOMAIN_IDENTITY is set.
> > > Our initial thought was to support only for no-iommu case, in which
> > > domain
> > itself
> > > won't be exist.   So, we can modify the code as below to check for only
> > presence of domain.
> > > I think,  only handling of no-iommu case wouldn't effect the
> > > existing
> > userspace.
> > > +   if ((!domain) && vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO))
> {
> >
> > I would prefer some explicit action.
> > Just not specifying a domain is something I'd like to keep reserved
> > for something of more wide usefulness.
> Can we introduce a new feature like VHOST_BACKEND_F_NOIOMMU in
> VHOST_VDPA_BACKEND_FEATURES?  We can have below logic based on this
> feature bit negotiation.
> Thanks.
Michael, could you please confirm if adding a new feature to VHOST_VDPA_BACKEND_FEATURES
is an appropriate solution to support no-IOMMU for the vhost-vdpa backend?

> >
> >
> > > > > > >
> > > > > > > I suggest a new domain type for use just for this purpose.
> > > > >
> > > > > I'm not sure I get this, we want to bypass IOMMU, so it doesn't
> > > > > even have a doman.
> > > >
> > > > yes, a fake one. or come up with some other flag that userspace will set.
> > > >
> > > > > > This way if host has
> > > > > > > an iommu, then the same kernel can run both VMs with
> > > > > > > isolation and unsafe embedded apps without.
> > > > > > Could you provide further details on this concept? What
> > > > > > criteria would determine the configuration of the new domain
> > > > > > type? Would this require a boot parameter similar to
> > > > > > IOMMU_DOMAIN_IDENTITY, such as
> > > > iommu.passthrough=1 or iommu.pt?
> > > > >
> > > > > Thanks
> > > > >
> > > > > > >
> > > > > > > > ---
> > > > > > > >  drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
> > > > > > > >  1 file changed, 23 insertions(+)
> > > > > > > >
> > > > > > > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> > > > > > > > index bc4a51e4638b..d071c30125aa 100644
> > > > > > > > --- a/drivers/vhost/vdpa.c
> > > > > > > > +++ b/drivers/vhost/vdpa.c
> > > > > > > > @@ -36,6 +36,11 @@ enum {
> > > > > > > >
> > > > > > > >  #define VHOST_VDPA_IOTLB_BUCKETS 16
> > > > > > > >
> > > > > > > > +bool vhost_vdpa_noiommu;
> > > > > > > >
> > +module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > > > > > > +              vhost_vdpa_noiommu, bool, 0644);
> > > > > > > >
> > +MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > > > > > "Enable
> > > > > > > > +UNSAFE, no-IOMMU mode.  This mode provides no device
> > > > > > > > +isolation, no DMA translation, no host kernel protection,
> > > > > > > > +cannot be used for device assignment to virtual machines,
> > > > > > > > +requires RAWIO permissions, and will taint the kernel.
> > > > > > > > +If you do not know what this is
> > > > for, step away.
> > > > > > > > +(default: false)");
> > > > > > > > +
> > > > > > > >  struct vhost_vdpa_as {
> > > > > > > >     struct hlist_node hash_link;
> > > > > > > >     struct vhost_iotlb iotlb; @@ -60,6 +65,7 @@ struct
> > > > > > > > vhost_vdpa {
> > > > > > > >     struct vdpa_iova_range range;
> > > > > > > >     u32 batch_asid;
> > > > > > > >     bool suspended;
> > > > > > > > +   bool noiommu_en;
> > > > > > > >  };
> > > > > > > >
> > > > > > > >  static DEFINE_IDA(vhost_vdpa_ida); @@ -887,6 +893,10 @@
> > > > > > > > static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,  {
> > > > > > > >     struct vdpa_device *vdpa = v->vdpa;
> > > > > > > >     const struct vdpa_config_ops *ops = vdpa->config;
> > > > > > > > +
> > > > > > > > +   if (v->noiommu_en)
> > > > > > > > +           return;
> > > > > > > > +
> > > > > > > >     if (ops->dma_map) {
> > > > > > > >             ops->dma_unmap(vdpa, asid, map->start, map->size);
> > > > > > > >     } else if (ops->set_map == NULL) { @@ -980,6 +990,9 @@
> > > > > > > > static int vhost_vdpa_map(struct vhost_vdpa *v,
> > > > > > > struct vhost_iotlb *iotlb,
> > > > > > > >     if (r)
> > > > > > > >             return r;
> > > > > > > >
> > > > > > > > +   if (v->noiommu_en)
> > > > > > > > +           goto skip_map;
> > > > > > > > +
> > > > > > > >     if (ops->dma_map) {
> > > > > > > >             r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
> > > > > > > >     } else if (ops->set_map) { @@ -995,6 +1008,7 @@ static
> > > > > > > > int vhost_vdpa_map(struct vhost_vdpa *v,
> > > > > > > struct vhost_iotlb *iotlb,
> > > > > > > >             return r;
> > > > > > > >     }
> > > > > > > >
> > > > > > > > +skip_map:
> > > > > > > >     if (!vdpa->use_va)
> > > > > > > >             atomic64_add(PFN_DOWN(size),
> > > > > > > > &dev->mm->pinned_vm);
> > > > > > > >
> > > > > > > > @@ -1298,6 +1312,7 @@ static int
> > > > > > > > vhost_vdpa_alloc_domain(struct
> > > > > > > vhost_vdpa *v)
> > > > > > > >     struct vdpa_device *vdpa = v->vdpa;
> > > > > > > >     const struct vdpa_config_ops *ops = vdpa->config;
> > > > > > > >     struct device *dma_dev = vdpa_get_dma_dev(vdpa);
> > > > > > > > +   struct iommu_domain *domain;
> > > > > > > >     const struct bus_type *bus;
> > > > > > > >     int ret;
> > > > > > > >
> > > > > > > > @@ -1305,6 +1320,14 @@ static int
> > > > > > > > vhost_vdpa_alloc_domain(struct
> > > > > > > vhost_vdpa *v)
> > > > > > > >     if (ops->set_map || ops->dma_map)
> > > > > > > >             return 0;
> > > > > > > >
> > > > > > > > +   domain = iommu_get_domain_for_dev(dma_dev);
> > > > > > > > +   if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
> > &&
> > > > > > > > +       vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
> > > > > > >
> > > > > > > So if userspace does not have CAP_SYS_RAWIO instead of
> > > > > > > failing with a permission error the functionality changes silently?
> > > > > > > That's confusing, I think.
> > > > > > Yes, you are correct. I will modify the code to return error
> > > > > > when vhost_vdpa_noiommu is set and CAP_SYS_RAWIO is not set.
> > > > > >
> > > > > > Thanks.
> > > > > > >
> > > > > > >
> > > > > > > > +           add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > > > > > > > +           dev_warn(&v->dev, "Adding kernel taint for
> > > > > > > > + noiommu on
> > > > > > > device\n");
> > > > > > > > +           v->noiommu_en = true;
> > > > > > > > +           return 0;
> > > > > > > > +   }
> > > > > > > >     bus = dma_dev->bus;
> > > > > > > >     if (!bus)
> > > > > > > >             return -EFAULT;
> > > > > > > > --
> > > > > > > > 2.25.1
> > > > > >
> > >
Michael S. Tsirkin Sept. 10, 2024, 5:56 a.m. UTC | #18
On Wed, Aug 28, 2024 at 09:08:13AM +0000, Srujana Challa wrote:
> > Subject: RE: [EXTERNAL] Re: [PATCH] vdpa: Add support for no-IOMMU mode
> > 
> > > On Tue, Jul 23, 2024 at 07:10:52AM +0000, Srujana Challa wrote:
> > > > > On Mon, Jul 22, 2024 at 03:22:22PM +0800, Jason Wang wrote:
> > > > > > On Fri, Jul 19, 2024 at 11:40 PM Srujana Challa
> > > > > > <schalla@marvell.com>
> > > > > wrote:
> > > > > > >
> > > > > > > > On Thu, May 30, 2024 at 03:48:23PM +0530, Srujana Challa wrote:
> > > > > > > > > This commit introduces support for an UNSAFE, no-IOMMU
> > > > > > > > > mode in the vhost-vdpa driver. When enabled, this mode
> > > > > > > > > provides no device isolation, no DMA translation, no host
> > > > > > > > > kernel protection, and cannot be used for device
> > > > > > > > > assignment to virtual machines. It requires RAWIO
> > > > > > > > > permissions and will taint the
> > > kernel.
> > > > > > > > > This mode requires enabling the
> > > > > > > > "enable_vhost_vdpa_unsafe_noiommu_mode"
> > > > > > > > > option on the vhost-vdpa driver. This mode would be useful
> > > > > > > > > to get better performance on specifice low end machines
> > > > > > > > > and can be leveraged by embedded platforms where
> > > > > > > > > applications run in controlled
> > > > > environment.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Srujana Challa <schalla@marvell.com>
> > > > > > > >
> > > > > > > > Thought hard about that.
> > > > > > > > I think given vfio supports this, we can do that too, and
> > > > > > > > the extension is
> > > > > small.
> > > > > > > >
> > > > > > > > However, it looks like setting this parameter will
> > > > > > > > automatically change the behaviour for existing userspace
> > > > > > > > when
> > > > > IOMMU_DOMAIN_IDENTITY is set.
> > > > Our initial thought was to support only for no-iommu case, in which
> > > > domain
> > > itself
> > > > won't be exist.   So, we can modify the code as below to check for only
> > > presence of domain.
> > > > I think,  only handling of no-iommu case wouldn't effect the
> > > > existing
> > > userspace.
> > > > +   if ((!domain) && vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO))
> > {
> > >
> > > I would prefer some explicit action.
> > > Just not specifying a domain is something I'd like to keep reserved
> > > for something of more wide usefulness.
> > Can we introduce a new feature like VHOST_BACKEND_F_NOIOMMU in
> > VHOST_VDPA_BACKEND_FEATURES?  We can have below logic based on this
> > feature bit negotiation.
> > Thanks.
> Michael, could you please confirm if adding a new feature to VHOST_VDPA_BACKEND_FEATURES
> is an appropriate solution to support no-IOMMU for the vhost-vdpa backend?


Yes. So the idea is to require both a module parameter, and a
flag set by userspace, to make sure users do not mistakenly
try to assign such devices to VMs.

Thanks.

> > >
> > >
> > > > > > > >
> > > > > > > > I suggest a new domain type for use just for this purpose.
> > > > > >
> > > > > > I'm not sure I get this, we want to bypass IOMMU, so it doesn't
> > > > > > even have a doman.
> > > > >
> > > > > yes, a fake one. or come up with some other flag that userspace will set.
> > > > >
> > > > > > > This way if host has
> > > > > > > > an iommu, then the same kernel can run both VMs with
> > > > > > > > isolation and unsafe embedded apps without.
> > > > > > > Could you provide further details on this concept? What
> > > > > > > criteria would determine the configuration of the new domain
> > > > > > > type? Would this require a boot parameter similar to
> > > > > > > IOMMU_DOMAIN_IDENTITY, such as
> > > > > iommu.passthrough=1 or iommu.pt?
> > > > > >
> > > > > > Thanks
> > > > > >
> > > > > > > >
> > > > > > > > > ---
> > > > > > > > >  drivers/vhost/vdpa.c | 23 +++++++++++++++++++++++
> > > > > > > > >  1 file changed, 23 insertions(+)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> > > > > > > > > index bc4a51e4638b..d071c30125aa 100644
> > > > > > > > > --- a/drivers/vhost/vdpa.c
> > > > > > > > > +++ b/drivers/vhost/vdpa.c
> > > > > > > > > @@ -36,6 +36,11 @@ enum {
> > > > > > > > >
> > > > > > > > >  #define VHOST_VDPA_IOTLB_BUCKETS 16
> > > > > > > > >
> > > > > > > > > +bool vhost_vdpa_noiommu;
> > > > > > > > >
> > > +module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > > > > > > > +              vhost_vdpa_noiommu, bool, 0644);
> > > > > > > > >
> > > +MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode,
> > > > > > > > "Enable
> > > > > > > > > +UNSAFE, no-IOMMU mode.  This mode provides no device
> > > > > > > > > +isolation, no DMA translation, no host kernel protection,
> > > > > > > > > +cannot be used for device assignment to virtual machines,
> > > > > > > > > +requires RAWIO permissions, and will taint the kernel.
> > > > > > > > > +If you do not know what this is
> > > > > for, step away.
> > > > > > > > > +(default: false)");
> > > > > > > > > +
> > > > > > > > >  struct vhost_vdpa_as {
> > > > > > > > >     struct hlist_node hash_link;
> > > > > > > > >     struct vhost_iotlb iotlb; @@ -60,6 +65,7 @@ struct
> > > > > > > > > vhost_vdpa {
> > > > > > > > >     struct vdpa_iova_range range;
> > > > > > > > >     u32 batch_asid;
> > > > > > > > >     bool suspended;
> > > > > > > > > +   bool noiommu_en;
> > > > > > > > >  };
> > > > > > > > >
> > > > > > > > >  static DEFINE_IDA(vhost_vdpa_ida); @@ -887,6 +893,10 @@
> > > > > > > > > static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,  {
> > > > > > > > >     struct vdpa_device *vdpa = v->vdpa;
> > > > > > > > >     const struct vdpa_config_ops *ops = vdpa->config;
> > > > > > > > > +
> > > > > > > > > +   if (v->noiommu_en)
> > > > > > > > > +           return;
> > > > > > > > > +
> > > > > > > > >     if (ops->dma_map) {
> > > > > > > > >             ops->dma_unmap(vdpa, asid, map->start, map->size);
> > > > > > > > >     } else if (ops->set_map == NULL) { @@ -980,6 +990,9 @@
> > > > > > > > > static int vhost_vdpa_map(struct vhost_vdpa *v,
> > > > > > > > struct vhost_iotlb *iotlb,
> > > > > > > > >     if (r)
> > > > > > > > >             return r;
> > > > > > > > >
> > > > > > > > > +   if (v->noiommu_en)
> > > > > > > > > +           goto skip_map;
> > > > > > > > > +
> > > > > > > > >     if (ops->dma_map) {
> > > > > > > > >             r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
> > > > > > > > >     } else if (ops->set_map) { @@ -995,6 +1008,7 @@ static
> > > > > > > > > int vhost_vdpa_map(struct vhost_vdpa *v,
> > > > > > > > struct vhost_iotlb *iotlb,
> > > > > > > > >             return r;
> > > > > > > > >     }
> > > > > > > > >
> > > > > > > > > +skip_map:
> > > > > > > > >     if (!vdpa->use_va)
> > > > > > > > >             atomic64_add(PFN_DOWN(size),
> > > > > > > > > &dev->mm->pinned_vm);
> > > > > > > > >
> > > > > > > > > @@ -1298,6 +1312,7 @@ static int
> > > > > > > > > vhost_vdpa_alloc_domain(struct
> > > > > > > > vhost_vdpa *v)
> > > > > > > > >     struct vdpa_device *vdpa = v->vdpa;
> > > > > > > > >     const struct vdpa_config_ops *ops = vdpa->config;
> > > > > > > > >     struct device *dma_dev = vdpa_get_dma_dev(vdpa);
> > > > > > > > > +   struct iommu_domain *domain;
> > > > > > > > >     const struct bus_type *bus;
> > > > > > > > >     int ret;
> > > > > > > > >
> > > > > > > > > @@ -1305,6 +1320,14 @@ static int
> > > > > > > > > vhost_vdpa_alloc_domain(struct
> > > > > > > > vhost_vdpa *v)
> > > > > > > > >     if (ops->set_map || ops->dma_map)
> > > > > > > > >             return 0;
> > > > > > > > >
> > > > > > > > > +   domain = iommu_get_domain_for_dev(dma_dev);
> > > > > > > > > +   if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
> > > &&
> > > > > > > > > +       vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
> > > > > > > >
> > > > > > > > So if userspace does not have CAP_SYS_RAWIO instead of
> > > > > > > > failing with a permission error the functionality changes silently?
> > > > > > > > That's confusing, I think.
> > > > > > > Yes, you are correct. I will modify the code to return error
> > > > > > > when vhost_vdpa_noiommu is set and CAP_SYS_RAWIO is not set.
> > > > > > >
> > > > > > > Thanks.
> > > > > > > >
> > > > > > > >
> > > > > > > > > +           add_taint(TAINT_USER, LOCKDEP_STILL_OK);
> > > > > > > > > +           dev_warn(&v->dev, "Adding kernel taint for
> > > > > > > > > + noiommu on
> > > > > > > > device\n");
> > > > > > > > > +           v->noiommu_en = true;
> > > > > > > > > +           return 0;
> > > > > > > > > +   }
> > > > > > > > >     bus = dma_dev->bus;
> > > > > > > > >     if (!bus)
> > > > > > > > >             return -EFAULT;
> > > > > > > > > --
> > > > > > > > > 2.25.1
> > > > > > >
> > > >
>
diff mbox series

Patch

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index bc4a51e4638b..d071c30125aa 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -36,6 +36,11 @@  enum {
 
 #define VHOST_VDPA_IOTLB_BUCKETS 16
 
+bool vhost_vdpa_noiommu;
+module_param_named(enable_vhost_vdpa_unsafe_noiommu_mode,
+		   vhost_vdpa_noiommu, bool, 0644);
+MODULE_PARM_DESC(enable_vhost_vdpa_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode.  This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel.  If you do not know what this is for, step away. (default: false)");
+
 struct vhost_vdpa_as {
 	struct hlist_node hash_link;
 	struct vhost_iotlb iotlb;
@@ -60,6 +65,7 @@  struct vhost_vdpa {
 	struct vdpa_iova_range range;
 	u32 batch_asid;
 	bool suspended;
+	bool noiommu_en;
 };
 
 static DEFINE_IDA(vhost_vdpa_ida);
@@ -887,6 +893,10 @@  static void vhost_vdpa_general_unmap(struct vhost_vdpa *v,
 {
 	struct vdpa_device *vdpa = v->vdpa;
 	const struct vdpa_config_ops *ops = vdpa->config;
+
+	if (v->noiommu_en)
+		return;
+
 	if (ops->dma_map) {
 		ops->dma_unmap(vdpa, asid, map->start, map->size);
 	} else if (ops->set_map == NULL) {
@@ -980,6 +990,9 @@  static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
 	if (r)
 		return r;
 
+	if (v->noiommu_en)
+		goto skip_map;
+
 	if (ops->dma_map) {
 		r = ops->dma_map(vdpa, asid, iova, size, pa, perm, opaque);
 	} else if (ops->set_map) {
@@ -995,6 +1008,7 @@  static int vhost_vdpa_map(struct vhost_vdpa *v, struct vhost_iotlb *iotlb,
 		return r;
 	}
 
+skip_map:
 	if (!vdpa->use_va)
 		atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
 
@@ -1298,6 +1312,7 @@  static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
 	struct vdpa_device *vdpa = v->vdpa;
 	const struct vdpa_config_ops *ops = vdpa->config;
 	struct device *dma_dev = vdpa_get_dma_dev(vdpa);
+	struct iommu_domain *domain;
 	const struct bus_type *bus;
 	int ret;
 
@@ -1305,6 +1320,14 @@  static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
 	if (ops->set_map || ops->dma_map)
 		return 0;
 
+	domain = iommu_get_domain_for_dev(dma_dev);
+	if ((!domain || domain->type == IOMMU_DOMAIN_IDENTITY) &&
+	    vhost_vdpa_noiommu && capable(CAP_SYS_RAWIO)) {
+		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+		dev_warn(&v->dev, "Adding kernel taint for noiommu on device\n");
+		v->noiommu_en = true;
+		return 0;
+	}
 	bus = dma_dev->bus;
 	if (!bus)
 		return -EFAULT;