diff mbox series

[V1,2/5] vfio: option to unmap all

Message ID 1609861013-129801-3-git-send-email-steven.sistare@oracle.com (mailing list archive)
State New, archived
Headers show
Series vfio virtual address update | expand

Commit Message

Steven Sistare Jan. 5, 2021, 3:36 p.m. UTC
For VFIO_IOMMU_UNMAP_DMA, delete all mappings if iova=0 and size=0.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 drivers/vfio/vfio_iommu_type1.c | 11 ++++++++---
 include/uapi/linux/vfio.h       |  3 ++-
 2 files changed, 10 insertions(+), 4 deletions(-)

Comments

Alex Williamson Jan. 8, 2021, 7:35 p.m. UTC | #1
Hi Steve,

On Tue,  5 Jan 2021 07:36:50 -0800
Steve Sistare <steven.sistare@oracle.com> wrote:

> For VFIO_IOMMU_UNMAP_DMA, delete all mappings if iova=0 and size=0.

Only the latter is invalid, iova=0 is not special, so does it make
sense to use this combination to invoke something special?  It seems
like it opens the door that any size less than the minimum mapping
granularity means something special.

Why not use a flag to trigger an unmap-all?

Does userspace have any means to know this is supported other than to
test it before creating any mappings?

What's the intended interaction with retrieving the dirty bitmap during
an unmap-all?

> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> ---
>  drivers/vfio/vfio_iommu_type1.c | 11 ++++++++---
>  include/uapi/linux/vfio.h       |  3 ++-
>  2 files changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 02228d0..3dc501d 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -1079,6 +1079,8 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>  	size_t unmapped = 0, pgsize;
>  	int ret = 0, retries = 0;
>  	unsigned long pgshift;
> +	dma_addr_t iova;
> +	unsigned long size;
>  
>  	mutex_lock(&iommu->lock);
>  
> @@ -1090,7 +1092,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>  		goto unlock;
>  	}
>  
> -	if (!unmap->size || unmap->size & (pgsize - 1)) {
> +	if ((!unmap->size && unmap->iova) || unmap->size & (pgsize - 1)) {
>  		ret = -EINVAL;
>  		goto unlock;
>  	}
> @@ -1154,8 +1156,11 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,

It looks like the code just above this would have an issue if there are
dma mappings at iova=0.

>  		}
>  	}
>  
> -	while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) {
> -		if (!iommu->v2 && unmap->iova > dma->iova)
> +	iova = unmap->iova;
> +	size = unmap->size ? unmap->size : SIZE_MAX;

AFAICT the only difference of this versus the user calling the unmap
with iova=0 size=SIZE_MAX is that SIZE_MAX will throw an -EINVAL due to
page size alignment.  If we assume there are no IOMMUs with 1 byte page
size, the special combination could instead be {0, SIZE_MAX}.  Or the
caller could just track a high water mark for their mappings and use
the interface that exists.  Thanks,

Alex

> +
> +	while ((dma = vfio_find_dma(iommu, iova, size))) {
> +		if (!iommu->v2 && iova > dma->iova)
>  			break;
>  		/*
>  		 * Task with same address space who mapped this iova range is
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 9204705..896e527 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -1073,7 +1073,8 @@ struct vfio_bitmap {
>   * Caller sets argsz.  The actual unmapped size is returned in the size
>   * field.  No guarantee is made to the user that arbitrary unmaps of iova
>   * or size different from those used in the original mapping call will
> - * succeed.
> + * succeed.  If iova=0 and size=0, all addresses are unmapped.
> + *
>   * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap
>   * before unmapping IO virtual addresses. When this flag is set, the user must
>   * provide a struct vfio_bitmap in data[]. User must provide zero-allocated
Steven Sistare Jan. 11, 2021, 9:09 p.m. UTC | #2
On 1/8/2021 2:35 PM, Alex Williamson wrote:
> Hi Steve,
> 
> On Tue,  5 Jan 2021 07:36:50 -0800
> Steve Sistare <steven.sistare@oracle.com> wrote:
> 
>> For VFIO_IOMMU_UNMAP_DMA, delete all mappings if iova=0 and size=0.
> 
> Only the latter is invalid, iova=0 is not special, so does it make
> sense to use this combination to invoke something special?  It seems
> like it opens the door that any size less than the minimum mapping
> granularity means something special.
> 
> Why not use a flag to trigger an unmap-all?

Hi Alex, that would be fine.

> Does userspace have any means to know this is supported other than to
> test it before creating any mappings?

Not currently.  We could overload VFIO_SUSPEND, or define a new extension code.
 
> What's the intended interaction with retrieving the dirty bitmap during
> an unmap-all?

Undefined and broken if there are gaps between segments :(  Good catch, thanks.  
I will disallow the combination of unmap-all and get-dirty-bitmap.

>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>> ---
>>  drivers/vfio/vfio_iommu_type1.c | 11 ++++++++---
>>  include/uapi/linux/vfio.h       |  3 ++-
>>  2 files changed, 10 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
>> index 02228d0..3dc501d 100644
>> --- a/drivers/vfio/vfio_iommu_type1.c
>> +++ b/drivers/vfio/vfio_iommu_type1.c
>> @@ -1079,6 +1079,8 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>>  	size_t unmapped = 0, pgsize;
>>  	int ret = 0, retries = 0;
>>  	unsigned long pgshift;
>> +	dma_addr_t iova;
>> +	unsigned long size;
>>  
>>  	mutex_lock(&iommu->lock);
>>  
>> @@ -1090,7 +1092,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>>  		goto unlock;
>>  	}
>>  
>> -	if (!unmap->size || unmap->size & (pgsize - 1)) {
>> +	if ((!unmap->size && unmap->iova) || unmap->size & (pgsize - 1)) {
>>  		ret = -EINVAL;
>>  		goto unlock;
>>  	}
>> @@ -1154,8 +1156,11 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
> 
> It looks like the code just above this would have an issue if there are
> dma mappings at iova=0.

Are you referring to this code?

        if (iommu->v2) {
                dma = vfio_find_dma(iommu, unmap->iova, 1);
                if (dma && dma->iova != unmap->iova) {
                        ret = -EINVAL;

Both unmap->iova and dma->iova would be 0, so I don't see the problem.

>>  		}
>>  	}
>>  
>> -	while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) {
>> -		if (!iommu->v2 && unmap->iova > dma->iova)
>> +	iova = unmap->iova;
>> +	size = unmap->size ? unmap->size : SIZE_MAX;
> 
> AFAICT the only difference of this versus the user calling the unmap
> with iova=0 size=SIZE_MAX is that SIZE_MAX will throw an -EINVAL due to
> page size alignment.  If we assume there are no IOMMUs with 1 byte page
> size, the special combination could instead be {0, SIZE_MAX}.  

Fine, but we would still need to document it specifically so the user knows that 
the unaligned SIZE_MAX does not return EINVAL.

> Or the
> caller could just track a high water mark for their mappings and use
> the interface that exists.  Thanks,

I am trying to avoid the need to modify existing code, for legacy qemu live update.
Either a new flag or {0, SIZE_MAX} is suitable.  Which do you prefer?

- Steve
 
>> +
>> +	while ((dma = vfio_find_dma(iommu, iova, size))) {
>> +		if (!iommu->v2 && iova > dma->iova)
>>  			break;
>>  		/*
>>  		 * Task with same address space who mapped this iova range is
>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>> index 9204705..896e527 100644
>> --- a/include/uapi/linux/vfio.h
>> +++ b/include/uapi/linux/vfio.h
>> @@ -1073,7 +1073,8 @@ struct vfio_bitmap {
>>   * Caller sets argsz.  The actual unmapped size is returned in the size
>>   * field.  No guarantee is made to the user that arbitrary unmaps of iova
>>   * or size different from those used in the original mapping call will
>> - * succeed.
>> + * succeed.  If iova=0 and size=0, all addresses are unmapped.
>> + *
>>   * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap
>>   * before unmapping IO virtual addresses. When this flag is set, the user must
>>   * provide a struct vfio_bitmap in data[]. User must provide zero-allocated
>
Alex Williamson Jan. 13, 2021, 7:41 p.m. UTC | #3
On Mon, 11 Jan 2021 16:09:18 -0500
Steven Sistare <steven.sistare@oracle.com> wrote:

> On 1/8/2021 2:35 PM, Alex Williamson wrote:
> > Hi Steve,
> > 
> > On Tue,  5 Jan 2021 07:36:50 -0800
> > Steve Sistare <steven.sistare@oracle.com> wrote:
> >   
> >> For VFIO_IOMMU_UNMAP_DMA, delete all mappings if iova=0 and size=0.  
> > 
> > Only the latter is invalid, iova=0 is not special, so does it make
> > sense to use this combination to invoke something special?  It seems
> > like it opens the door that any size less than the minimum mapping
> > granularity means something special.
> > 
> > Why not use a flag to trigger an unmap-all?  
> 
> Hi Alex, that would be fine.
> 
> > Does userspace have any means to know this is supported other than to
> > test it before creating any mappings?  
> 
> Not currently.  We could overload VFIO_SUSPEND, or define a new extension code.

Either an extension or a capability on the IOMMU_INFO return data.
If I interpret our trend on which to use, an extension seems
appropriate here as were only indicating support for a feature with no
additional data to return.

> > What's the intended interaction with retrieving the dirty bitmap during
> > an unmap-all?  
> 
> Undefined and broken if there are gaps between segments :(  Good catch, thanks.  
> I will disallow the combination of unmap-all and get-dirty-bitmap.
> 
> >> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> >> ---
> >>  drivers/vfio/vfio_iommu_type1.c | 11 ++++++++---
> >>  include/uapi/linux/vfio.h       |  3 ++-
> >>  2 files changed, 10 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> >> index 02228d0..3dc501d 100644
> >> --- a/drivers/vfio/vfio_iommu_type1.c
> >> +++ b/drivers/vfio/vfio_iommu_type1.c
> >> @@ -1079,6 +1079,8 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
> >>  	size_t unmapped = 0, pgsize;
> >>  	int ret = 0, retries = 0;
> >>  	unsigned long pgshift;
> >> +	dma_addr_t iova;
> >> +	unsigned long size;
> >>  
> >>  	mutex_lock(&iommu->lock);
> >>  
> >> @@ -1090,7 +1092,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
> >>  		goto unlock;
> >>  	}
> >>  
> >> -	if (!unmap->size || unmap->size & (pgsize - 1)) {
> >> +	if ((!unmap->size && unmap->iova) || unmap->size & (pgsize - 1)) {
> >>  		ret = -EINVAL;
> >>  		goto unlock;
> >>  	}
> >> @@ -1154,8 +1156,11 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,  
> > 
> > It looks like the code just above this would have an issue if there are
> > dma mappings at iova=0.  
> 
> Are you referring to this code?
> 
>         if (iommu->v2) {
>                 dma = vfio_find_dma(iommu, unmap->iova, 1);
>                 if (dma && dma->iova != unmap->iova) {
>                         ret = -EINVAL;
> 
> Both unmap->iova and dma->iova would be 0, so I don't see the problem.

Yeah, I think I was mistaken.  Thanks,

Alex

> >>  		}
> >>  	}
> >>  
> >> -	while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) {
> >> -		if (!iommu->v2 && unmap->iova > dma->iova)
> >> +	iova = unmap->iova;
> >> +	size = unmap->size ? unmap->size : SIZE_MAX;  
> > 
> > AFAICT the only difference of this versus the user calling the unmap
> > with iova=0 size=SIZE_MAX is that SIZE_MAX will throw an -EINVAL due to
> > page size alignment.  If we assume there are no IOMMUs with 1 byte page
> > size, the special combination could instead be {0, SIZE_MAX}.    
> 
> Fine, but we would still need to document it specifically so the user knows that 
> the unaligned SIZE_MAX does not return EINVAL.
> 
> > Or the
> > caller could just track a high water mark for their mappings and use
> > the interface that exists.  Thanks,  
> 
> I am trying to avoid the need to modify existing code, for legacy qemu live update.
> Either a new flag or {0, SIZE_MAX} is suitable.  Which do you prefer?
> 
> - Steve
>  
> >> +
> >> +	while ((dma = vfio_find_dma(iommu, iova, size))) {
> >> +		if (!iommu->v2 && iova > dma->iova)
> >>  			break;
> >>  		/*
> >>  		 * Task with same address space who mapped this iova range is
> >> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> >> index 9204705..896e527 100644
> >> --- a/include/uapi/linux/vfio.h
> >> +++ b/include/uapi/linux/vfio.h
> >> @@ -1073,7 +1073,8 @@ struct vfio_bitmap {
> >>   * Caller sets argsz.  The actual unmapped size is returned in the size
> >>   * field.  No guarantee is made to the user that arbitrary unmaps of iova
> >>   * or size different from those used in the original mapping call will
> >> - * succeed.
> >> + * succeed.  If iova=0 and size=0, all addresses are unmapped.
> >> + *
> >>   * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap
> >>   * before unmapping IO virtual addresses. When this flag is set, the user must
> >>   * provide a struct vfio_bitmap in data[]. User must provide zero-allocated  
> >   
>
diff mbox series

Patch

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 02228d0..3dc501d 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -1079,6 +1079,8 @@  static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 	size_t unmapped = 0, pgsize;
 	int ret = 0, retries = 0;
 	unsigned long pgshift;
+	dma_addr_t iova;
+	unsigned long size;
 
 	mutex_lock(&iommu->lock);
 
@@ -1090,7 +1092,7 @@  static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 		goto unlock;
 	}
 
-	if (!unmap->size || unmap->size & (pgsize - 1)) {
+	if ((!unmap->size && unmap->iova) || unmap->size & (pgsize - 1)) {
 		ret = -EINVAL;
 		goto unlock;
 	}
@@ -1154,8 +1156,11 @@  static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 		}
 	}
 
-	while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) {
-		if (!iommu->v2 && unmap->iova > dma->iova)
+	iova = unmap->iova;
+	size = unmap->size ? unmap->size : SIZE_MAX;
+
+	while ((dma = vfio_find_dma(iommu, iova, size))) {
+		if (!iommu->v2 && iova > dma->iova)
 			break;
 		/*
 		 * Task with same address space who mapped this iova range is
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 9204705..896e527 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -1073,7 +1073,8 @@  struct vfio_bitmap {
  * Caller sets argsz.  The actual unmapped size is returned in the size
  * field.  No guarantee is made to the user that arbitrary unmaps of iova
  * or size different from those used in the original mapping call will
- * succeed.
+ * succeed.  If iova=0 and size=0, all addresses are unmapped.
+ *
  * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap
  * before unmapping IO virtual addresses. When this flag is set, the user must
  * provide a struct vfio_bitmap in data[]. User must provide zero-allocated