diff mbox

vfio: Simplify capability helper

Message ID 20171212195850.13691.40496.stgit@gimli.home (mailing list archive)
State New, archived
Headers show

Commit Message

Alex Williamson Dec. 12, 2017, 7:59 p.m. UTC
The vfio_info_add_capability() helper requires the caller to pass a
capability ID, which it then uses to fill in header fields, assuming
hard coded versions.  This makes for an awkward and rigid interface.
The only thing we want this helper to do is allocate sufficient
space in the caps buffer and chain this capability into the list.
Reduce it to that simple task.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/gpu/drm/i915/gvt/kvmgt.c |   15 +++++++----
 drivers/vfio/pci/vfio_pci.c      |   14 ++++++----
 drivers/vfio/vfio.c              |   52 +++-----------------------------------
 include/linux/vfio.h             |    3 +-
 4 files changed, 24 insertions(+), 60 deletions(-)

Comments

Alexey Kardashevskiy Dec. 13, 2017, 1:13 a.m. UTC | #1
On 13/12/17 06:59, Alex Williamson wrote:
> The vfio_info_add_capability() helper requires the caller to pass a
> capability ID, which it then uses to fill in header fields, assuming
> hard coded versions.  This makes for an awkward and rigid interface.
> The only thing we want this helper to do is allocate sufficient
> space in the caps buffer and chain this capability into the list.
> Reduce it to that simple task.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>


Makes more sense now, thanks. I'll repost mine on top of this.


Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>


Below one observation, unrelated to this patch.

> ---
>  drivers/gpu/drm/i915/gvt/kvmgt.c |   15 +++++++----
>  drivers/vfio/pci/vfio_pci.c      |   14 ++++++----
>  drivers/vfio/vfio.c              |   52 +++-----------------------------------
>  include/linux/vfio.h             |    3 +-
>  4 files changed, 24 insertions(+), 60 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index 96060920a6fe..0a7d084da1a2 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -1012,6 +1012,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
>  			if (!sparse)
>  				return -ENOMEM;
>  
> +			sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
> +			sparse->header.version = 1;
>  			sparse->nr_areas = nr_areas;
>  			cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;


@cap_type_id is initialized in just one of many cases of switch
(info.index) and after the entire switch, there is switch (cap_type_id). I
wonder why compiler missed "potentially uninitialized variable here,
although there is no bug - @cap_type_id is in sync with @spapse. It would
make it cleaner imho just to have vfio_info_add_capability() next to the
header initialization.



>  			sparse->areas[0].offset =
> @@ -1033,7 +1035,9 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
>  			break;
>  		default:
>  			{
> -				struct vfio_region_info_cap_type cap_type;
> +				struct vfio_region_info_cap_type cap_type = {
> +					.header.id = VFIO_REGION_INFO_CAP_TYPE,
> +					.header.version = 1 };
>  
>  				if (info.index >= VFIO_PCI_NUM_REGIONS +
>  						vgpu->vdev.num_regions)
> @@ -1050,8 +1054,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
>  				cap_type.subtype = vgpu->vdev.region[i].subtype;
>  
>  				ret = vfio_info_add_capability(&caps,
> -						VFIO_REGION_INFO_CAP_TYPE,
> -						&cap_type);
> +							&cap_type.header,
> +							sizeof(cap_type));
>  				if (ret)
>  					return ret;
>  			}
> @@ -1061,8 +1065,9 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
>  			switch (cap_type_id) {
>  			case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
>  				ret = vfio_info_add_capability(&caps,
> -					VFIO_REGION_INFO_CAP_SPARSE_MMAP,
> -					sparse);
> +					&sparse->header, sizeof(*sparse) +
> +					(sparse->nr_areas *
> +						sizeof(*sparse->areas)));
>  				kfree(sparse);
>  				if (ret)
>  					return ret;
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index f041b1a6cf66..a73e40983880 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -582,6 +582,8 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
>  	if (!sparse)
>  		return -ENOMEM;
>  
> +	sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
> +	sparse->header.version = 1;
>  	sparse->nr_areas = nr_areas;
>  
>  	if (vdev->msix_offset & PAGE_MASK) {
> @@ -597,8 +599,7 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
>  		i++;
>  	}
>  
> -	ret = vfio_info_add_capability(caps, VFIO_REGION_INFO_CAP_SPARSE_MMAP,
> -				       sparse);
> +	ret = vfio_info_add_capability(caps, &sparse->header, size);
>  	kfree(sparse);
>  
>  	return ret;
> @@ -741,7 +742,9 @@ static long vfio_pci_ioctl(void *device_data,
>  			break;
>  		default:
>  		{
> -			struct vfio_region_info_cap_type cap_type;
> +			struct vfio_region_info_cap_type cap_type = {
> +					.header.id = VFIO_REGION_INFO_CAP_TYPE,
> +					.header.version = 1 };
>  
>  			if (info.index >=
>  			    VFIO_PCI_NUM_REGIONS + vdev->num_regions)
> @@ -756,9 +759,8 @@ static long vfio_pci_ioctl(void *device_data,
>  			cap_type.type = vdev->region[i].type;
>  			cap_type.subtype = vdev->region[i].subtype;
>  
> -			ret = vfio_info_add_capability(&caps,
> -						      VFIO_REGION_INFO_CAP_TYPE,
> -						      &cap_type);
> +			ret = vfio_info_add_capability(&caps, &cap_type.header,
> +						       sizeof(cap_type));
>  			if (ret)
>  				return ret;
>  
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 2bc3705a99bd..721f97f8dac1 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -1857,63 +1857,19 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
>  }
>  EXPORT_SYMBOL(vfio_info_cap_shift);
>  
> -static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type)
> +int vfio_info_add_capability(struct vfio_info_cap *caps,
> +			     struct vfio_info_cap_header *cap, size_t size)
>  {
>  	struct vfio_info_cap_header *header;
> -	struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type;
> -	size_t size;
>  
> -	size = sizeof(*sparse) + sparse->nr_areas *  sizeof(*sparse->areas);
> -	header = vfio_info_cap_add(caps, size,
> -				   VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
> +	header = vfio_info_cap_add(caps, size, cap->id, cap->version);
>  	if (IS_ERR(header))
>  		return PTR_ERR(header);
>  
> -	sparse_cap = container_of(header,
> -			struct vfio_region_info_cap_sparse_mmap, header);
> -	sparse_cap->nr_areas = sparse->nr_areas;
> -	memcpy(sparse_cap->areas, sparse->areas,
> -	       sparse->nr_areas * sizeof(*sparse->areas));
> -	return 0;
> -}
> -
> -static int region_type_cap(struct vfio_info_cap *caps, void *cap_type)
> -{
> -	struct vfio_info_cap_header *header;
> -	struct vfio_region_info_cap_type *type_cap, *cap = cap_type;
> +	memcpy(header + 1, cap + 1, size - sizeof(*header));
>  
> -	header = vfio_info_cap_add(caps, sizeof(*cap),
> -				   VFIO_REGION_INFO_CAP_TYPE, 1);
> -	if (IS_ERR(header))
> -		return PTR_ERR(header);
> -
> -	type_cap = container_of(header, struct vfio_region_info_cap_type,
> -				header);
> -	type_cap->type = cap->type;
> -	type_cap->subtype = cap->subtype;
>  	return 0;
>  }
> -
> -int vfio_info_add_capability(struct vfio_info_cap *caps, int cap_type_id,
> -			     void *cap_type)
> -{
> -	int ret = -EINVAL;
> -
> -	if (!cap_type)
> -		return 0;
> -
> -	switch (cap_type_id) {
> -	case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
> -		ret = sparse_mmap_cap(caps, cap_type);
> -		break;
> -
> -	case VFIO_REGION_INFO_CAP_TYPE:
> -		ret = region_type_cap(caps, cap_type);
> -		break;
> -	}
> -
> -	return ret;
> -}
>  EXPORT_SYMBOL(vfio_info_add_capability);
>  
>  int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index a47b985341d1..66741ab087c1 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -145,7 +145,8 @@ extern struct vfio_info_cap_header *vfio_info_cap_add(
>  extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
>  
>  extern int vfio_info_add_capability(struct vfio_info_cap *caps,
> -				    int cap_type_id, void *cap_type);
> +				    struct vfio_info_cap_header *cap,
> +				    size_t size);
>  
>  extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
>  					      int num_irqs, int max_irq_type,
>
Peter Xu Dec. 13, 2017, 6:49 a.m. UTC | #2
On Tue, Dec 12, 2017 at 12:59:39PM -0700, Alex Williamson wrote:
> The vfio_info_add_capability() helper requires the caller to pass a
> capability ID, which it then uses to fill in header fields, assuming
> hard coded versions.  This makes for an awkward and rigid interface.
> The only thing we want this helper to do is allocate sufficient
> space in the caps buffer and chain this capability into the list.
> Reduce it to that simple task.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>

Reviewed-by: Peter Xu <peterx@redhat.com>

Though during review I had a question related to the function
msix_sparse_mmap_cap(): Is it possible that one PCI device BAR is very
small (e.g., 4K) that only contains the MSI-X table (and another small
PBA area)?  If so, should we just mark that region unmappable instead
of setting vfio_region_info_cap_sparse_mmap.nr_areas to 1 in
msix_sparse_mmap_cap()?

	/* If MSI-X table is aligned to the start or end, only one area */
	if (((vdev->msix_offset & PAGE_MASK) == 0) ||
	    (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end))
		nr_areas = 1;

Thanks,
Zhenyu Wang Dec. 13, 2017, 7:01 a.m. UTC | #3
On 2017.12.13 12:13:34 +1100, Alexey Kardashevskiy wrote:
> On 13/12/17 06:59, Alex Williamson wrote:
> > The vfio_info_add_capability() helper requires the caller to pass a
> > capability ID, which it then uses to fill in header fields, assuming
> > hard coded versions.  This makes for an awkward and rigid interface.
> > The only thing we want this helper to do is allocate sufficient
> > space in the caps buffer and chain this capability into the list.
> > Reduce it to that simple task.
> > 
> > Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> 
> 
> Makes more sense now, thanks. I'll repost mine on top of this.
> 
> 
> Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
> 
>

Looks good for KVMGT part.

Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>

> Below one observation, unrelated to this patch.
> 
> > ---
> >  drivers/gpu/drm/i915/gvt/kvmgt.c |   15 +++++++----
> >  drivers/vfio/pci/vfio_pci.c      |   14 ++++++----
> >  drivers/vfio/vfio.c              |   52 +++-----------------------------------
> >  include/linux/vfio.h             |    3 +-
> >  4 files changed, 24 insertions(+), 60 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
> > index 96060920a6fe..0a7d084da1a2 100644
> > --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> > +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> > @@ -1012,6 +1012,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
> >  			if (!sparse)
> >  				return -ENOMEM;
> >  
> > +			sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
> > +			sparse->header.version = 1;
> >  			sparse->nr_areas = nr_areas;
> >  			cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
> 
> 
> @cap_type_id is initialized in just one of many cases of switch
> (info.index) and after the entire switch, there is switch (cap_type_id). I
> wonder why compiler missed "potentially uninitialized variable here,
> although there is no bug - @cap_type_id is in sync with @spapse. It would
> make it cleaner imho just to have vfio_info_add_capability() next to the
> header initialization.
> 

yeah, we could clean that up, thanks for pointing out.

> 
> 
> >  			sparse->areas[0].offset =
> > @@ -1033,7 +1035,9 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
> >  			break;
> >  		default:
> >  			{
> > -				struct vfio_region_info_cap_type cap_type;
> > +				struct vfio_region_info_cap_type cap_type = {
> > +					.header.id = VFIO_REGION_INFO_CAP_TYPE,
> > +					.header.version = 1 };
> >  
> >  				if (info.index >= VFIO_PCI_NUM_REGIONS +
> >  						vgpu->vdev.num_regions)
> > @@ -1050,8 +1054,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
> >  				cap_type.subtype = vgpu->vdev.region[i].subtype;
> >  
> >  				ret = vfio_info_add_capability(&caps,
> > -						VFIO_REGION_INFO_CAP_TYPE,
> > -						&cap_type);
> > +							&cap_type.header,
> > +							sizeof(cap_type));
> >  				if (ret)
> >  					return ret;
> >  			}
> > @@ -1061,8 +1065,9 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
> >  			switch (cap_type_id) {
> >  			case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
> >  				ret = vfio_info_add_capability(&caps,
> > -					VFIO_REGION_INFO_CAP_SPARSE_MMAP,
> > -					sparse);
> > +					&sparse->header, sizeof(*sparse) +
> > +					(sparse->nr_areas *
> > +						sizeof(*sparse->areas)));
> >  				kfree(sparse);
> >  				if (ret)
> >  					return ret;
> > diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> > index f041b1a6cf66..a73e40983880 100644
> > --- a/drivers/vfio/pci/vfio_pci.c
> > +++ b/drivers/vfio/pci/vfio_pci.c
> > @@ -582,6 +582,8 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
> >  	if (!sparse)
> >  		return -ENOMEM;
> >  
> > +	sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
> > +	sparse->header.version = 1;
> >  	sparse->nr_areas = nr_areas;
> >  
> >  	if (vdev->msix_offset & PAGE_MASK) {
> > @@ -597,8 +599,7 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
> >  		i++;
> >  	}
> >  
> > -	ret = vfio_info_add_capability(caps, VFIO_REGION_INFO_CAP_SPARSE_MMAP,
> > -				       sparse);
> > +	ret = vfio_info_add_capability(caps, &sparse->header, size);
> >  	kfree(sparse);
> >  
> >  	return ret;
> > @@ -741,7 +742,9 @@ static long vfio_pci_ioctl(void *device_data,
> >  			break;
> >  		default:
> >  		{
> > -			struct vfio_region_info_cap_type cap_type;
> > +			struct vfio_region_info_cap_type cap_type = {
> > +					.header.id = VFIO_REGION_INFO_CAP_TYPE,
> > +					.header.version = 1 };
> >  
> >  			if (info.index >=
> >  			    VFIO_PCI_NUM_REGIONS + vdev->num_regions)
> > @@ -756,9 +759,8 @@ static long vfio_pci_ioctl(void *device_data,
> >  			cap_type.type = vdev->region[i].type;
> >  			cap_type.subtype = vdev->region[i].subtype;
> >  
> > -			ret = vfio_info_add_capability(&caps,
> > -						      VFIO_REGION_INFO_CAP_TYPE,
> > -						      &cap_type);
> > +			ret = vfio_info_add_capability(&caps, &cap_type.header,
> > +						       sizeof(cap_type));
> >  			if (ret)
> >  				return ret;
> >  
> > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> > index 2bc3705a99bd..721f97f8dac1 100644
> > --- a/drivers/vfio/vfio.c
> > +++ b/drivers/vfio/vfio.c
> > @@ -1857,63 +1857,19 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
> >  }
> >  EXPORT_SYMBOL(vfio_info_cap_shift);
> >  
> > -static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type)
> > +int vfio_info_add_capability(struct vfio_info_cap *caps,
> > +			     struct vfio_info_cap_header *cap, size_t size)
> >  {
> >  	struct vfio_info_cap_header *header;
> > -	struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type;
> > -	size_t size;
> >  
> > -	size = sizeof(*sparse) + sparse->nr_areas *  sizeof(*sparse->areas);
> > -	header = vfio_info_cap_add(caps, size,
> > -				   VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
> > +	header = vfio_info_cap_add(caps, size, cap->id, cap->version);
> >  	if (IS_ERR(header))
> >  		return PTR_ERR(header);
> >  
> > -	sparse_cap = container_of(header,
> > -			struct vfio_region_info_cap_sparse_mmap, header);
> > -	sparse_cap->nr_areas = sparse->nr_areas;
> > -	memcpy(sparse_cap->areas, sparse->areas,
> > -	       sparse->nr_areas * sizeof(*sparse->areas));
> > -	return 0;
> > -}
> > -
> > -static int region_type_cap(struct vfio_info_cap *caps, void *cap_type)
> > -{
> > -	struct vfio_info_cap_header *header;
> > -	struct vfio_region_info_cap_type *type_cap, *cap = cap_type;
> > +	memcpy(header + 1, cap + 1, size - sizeof(*header));
> >  
> > -	header = vfio_info_cap_add(caps, sizeof(*cap),
> > -				   VFIO_REGION_INFO_CAP_TYPE, 1);
> > -	if (IS_ERR(header))
> > -		return PTR_ERR(header);
> > -
> > -	type_cap = container_of(header, struct vfio_region_info_cap_type,
> > -				header);
> > -	type_cap->type = cap->type;
> > -	type_cap->subtype = cap->subtype;
> >  	return 0;
> >  }
> > -
> > -int vfio_info_add_capability(struct vfio_info_cap *caps, int cap_type_id,
> > -			     void *cap_type)
> > -{
> > -	int ret = -EINVAL;
> > -
> > -	if (!cap_type)
> > -		return 0;
> > -
> > -	switch (cap_type_id) {
> > -	case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
> > -		ret = sparse_mmap_cap(caps, cap_type);
> > -		break;
> > -
> > -	case VFIO_REGION_INFO_CAP_TYPE:
> > -		ret = region_type_cap(caps, cap_type);
> > -		break;
> > -	}
> > -
> > -	return ret;
> > -}
> >  EXPORT_SYMBOL(vfio_info_add_capability);
> >  
> >  int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
> > diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> > index a47b985341d1..66741ab087c1 100644
> > --- a/include/linux/vfio.h
> > +++ b/include/linux/vfio.h
> > @@ -145,7 +145,8 @@ extern struct vfio_info_cap_header *vfio_info_cap_add(
> >  extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
> >  
> >  extern int vfio_info_add_capability(struct vfio_info_cap *caps,
> > -				    int cap_type_id, void *cap_type);
> > +				    struct vfio_info_cap_header *cap,
> > +				    size_t size);
> >  
> >  extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
> >  					      int num_irqs, int max_irq_type,
> > 
> 
> 
> -- 
> Alexey
Kirti Wankhede Dec. 13, 2017, 9:04 a.m. UTC | #4
On 12/13/2017 12:31 PM, Zhenyu Wang wrote:
> On 2017.12.13 12:13:34 +1100, Alexey Kardashevskiy wrote:
>> On 13/12/17 06:59, Alex Williamson wrote:
>>> The vfio_info_add_capability() helper requires the caller to pass a
>>> capability ID, which it then uses to fill in header fields, assuming
>>> hard coded versions.  This makes for an awkward and rigid interface.
>>> The only thing we want this helper to do is allocate sufficient
>>> space in the caps buffer and chain this capability into the list.
>>> Reduce it to that simple task.
>>>
>>> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
>>
>>
>> Makes more sense now, thanks. I'll repost mine on top of this.
>>
>>
>> Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>
>>
>>
> 
> Looks good for KVMGT part.
> 
> Acked-by: Zhenyu Wang <zhenyuw@linux.intel.com>
> 

Looks good to me too.

Reviewed-by: Kirti Wankhede <kwankhede@nvidia.com>

Thanks,
Kirti


>> Below one observation, unrelated to this patch.
>>
>>> ---
>>>  drivers/gpu/drm/i915/gvt/kvmgt.c |   15 +++++++----
>>>  drivers/vfio/pci/vfio_pci.c      |   14 ++++++----
>>>  drivers/vfio/vfio.c              |   52 +++-----------------------------------
>>>  include/linux/vfio.h             |    3 +-
>>>  4 files changed, 24 insertions(+), 60 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
>>> index 96060920a6fe..0a7d084da1a2 100644
>>> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
>>> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
>>> @@ -1012,6 +1012,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
>>>  			if (!sparse)
>>>  				return -ENOMEM;
>>>  
>>> +			sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
>>> +			sparse->header.version = 1;
>>>  			sparse->nr_areas = nr_areas;
>>>  			cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
>>
>>
>> @cap_type_id is initialized in just one of many cases of switch
>> (info.index) and after the entire switch, there is switch (cap_type_id). I
>> wonder why compiler missed "potentially uninitialized variable here,
>> although there is no bug - @cap_type_id is in sync with @spapse. It would
>> make it cleaner imho just to have vfio_info_add_capability() next to the
>> header initialization.
>>
> 
> yeah, we could clean that up, thanks for pointing out.
> 
>>
>>
>>>  			sparse->areas[0].offset =
>>> @@ -1033,7 +1035,9 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
>>>  			break;
>>>  		default:
>>>  			{
>>> -				struct vfio_region_info_cap_type cap_type;
>>> +				struct vfio_region_info_cap_type cap_type = {
>>> +					.header.id = VFIO_REGION_INFO_CAP_TYPE,
>>> +					.header.version = 1 };
>>>  
>>>  				if (info.index >= VFIO_PCI_NUM_REGIONS +
>>>  						vgpu->vdev.num_regions)
>>> @@ -1050,8 +1054,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
>>>  				cap_type.subtype = vgpu->vdev.region[i].subtype;
>>>  
>>>  				ret = vfio_info_add_capability(&caps,
>>> -						VFIO_REGION_INFO_CAP_TYPE,
>>> -						&cap_type);
>>> +							&cap_type.header,
>>> +							sizeof(cap_type));
>>>  				if (ret)
>>>  					return ret;
>>>  			}
>>> @@ -1061,8 +1065,9 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
>>>  			switch (cap_type_id) {
>>>  			case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
>>>  				ret = vfio_info_add_capability(&caps,
>>> -					VFIO_REGION_INFO_CAP_SPARSE_MMAP,
>>> -					sparse);
>>> +					&sparse->header, sizeof(*sparse) +
>>> +					(sparse->nr_areas *
>>> +						sizeof(*sparse->areas)));
>>>  				kfree(sparse);
>>>  				if (ret)
>>>  					return ret;
>>> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
>>> index f041b1a6cf66..a73e40983880 100644
>>> --- a/drivers/vfio/pci/vfio_pci.c
>>> +++ b/drivers/vfio/pci/vfio_pci.c
>>> @@ -582,6 +582,8 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
>>>  	if (!sparse)
>>>  		return -ENOMEM;
>>>  
>>> +	sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
>>> +	sparse->header.version = 1;
>>>  	sparse->nr_areas = nr_areas;
>>>  
>>>  	if (vdev->msix_offset & PAGE_MASK) {
>>> @@ -597,8 +599,7 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
>>>  		i++;
>>>  	}
>>>  
>>> -	ret = vfio_info_add_capability(caps, VFIO_REGION_INFO_CAP_SPARSE_MMAP,
>>> -				       sparse);
>>> +	ret = vfio_info_add_capability(caps, &sparse->header, size);
>>>  	kfree(sparse);
>>>  
>>>  	return ret;
>>> @@ -741,7 +742,9 @@ static long vfio_pci_ioctl(void *device_data,
>>>  			break;
>>>  		default:
>>>  		{
>>> -			struct vfio_region_info_cap_type cap_type;
>>> +			struct vfio_region_info_cap_type cap_type = {
>>> +					.header.id = VFIO_REGION_INFO_CAP_TYPE,
>>> +					.header.version = 1 };
>>>  
>>>  			if (info.index >=
>>>  			    VFIO_PCI_NUM_REGIONS + vdev->num_regions)
>>> @@ -756,9 +759,8 @@ static long vfio_pci_ioctl(void *device_data,
>>>  			cap_type.type = vdev->region[i].type;
>>>  			cap_type.subtype = vdev->region[i].subtype;
>>>  
>>> -			ret = vfio_info_add_capability(&caps,
>>> -						      VFIO_REGION_INFO_CAP_TYPE,
>>> -						      &cap_type);
>>> +			ret = vfio_info_add_capability(&caps, &cap_type.header,
>>> +						       sizeof(cap_type));
>>>  			if (ret)
>>>  				return ret;
>>>  
>>> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
>>> index 2bc3705a99bd..721f97f8dac1 100644
>>> --- a/drivers/vfio/vfio.c
>>> +++ b/drivers/vfio/vfio.c
>>> @@ -1857,63 +1857,19 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
>>>  }
>>>  EXPORT_SYMBOL(vfio_info_cap_shift);
>>>  
>>> -static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type)
>>> +int vfio_info_add_capability(struct vfio_info_cap *caps,
>>> +			     struct vfio_info_cap_header *cap, size_t size)
>>>  {
>>>  	struct vfio_info_cap_header *header;
>>> -	struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type;
>>> -	size_t size;
>>>  
>>> -	size = sizeof(*sparse) + sparse->nr_areas *  sizeof(*sparse->areas);
>>> -	header = vfio_info_cap_add(caps, size,
>>> -				   VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
>>> +	header = vfio_info_cap_add(caps, size, cap->id, cap->version);
>>>  	if (IS_ERR(header))
>>>  		return PTR_ERR(header);
>>>  
>>> -	sparse_cap = container_of(header,
>>> -			struct vfio_region_info_cap_sparse_mmap, header);
>>> -	sparse_cap->nr_areas = sparse->nr_areas;
>>> -	memcpy(sparse_cap->areas, sparse->areas,
>>> -	       sparse->nr_areas * sizeof(*sparse->areas));
>>> -	return 0;
>>> -}
>>> -
>>> -static int region_type_cap(struct vfio_info_cap *caps, void *cap_type)
>>> -{
>>> -	struct vfio_info_cap_header *header;
>>> -	struct vfio_region_info_cap_type *type_cap, *cap = cap_type;
>>> +	memcpy(header + 1, cap + 1, size - sizeof(*header));
>>>  
>>> -	header = vfio_info_cap_add(caps, sizeof(*cap),
>>> -				   VFIO_REGION_INFO_CAP_TYPE, 1);
>>> -	if (IS_ERR(header))
>>> -		return PTR_ERR(header);
>>> -
>>> -	type_cap = container_of(header, struct vfio_region_info_cap_type,
>>> -				header);
>>> -	type_cap->type = cap->type;
>>> -	type_cap->subtype = cap->subtype;
>>>  	return 0;
>>>  }
>>> -
>>> -int vfio_info_add_capability(struct vfio_info_cap *caps, int cap_type_id,
>>> -			     void *cap_type)
>>> -{
>>> -	int ret = -EINVAL;
>>> -
>>> -	if (!cap_type)
>>> -		return 0;
>>> -
>>> -	switch (cap_type_id) {
>>> -	case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
>>> -		ret = sparse_mmap_cap(caps, cap_type);
>>> -		break;
>>> -
>>> -	case VFIO_REGION_INFO_CAP_TYPE:
>>> -		ret = region_type_cap(caps, cap_type);
>>> -		break;
>>> -	}
>>> -
>>> -	return ret;
>>> -}
>>>  EXPORT_SYMBOL(vfio_info_add_capability);
>>>  
>>>  int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
>>> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
>>> index a47b985341d1..66741ab087c1 100644
>>> --- a/include/linux/vfio.h
>>> +++ b/include/linux/vfio.h
>>> @@ -145,7 +145,8 @@ extern struct vfio_info_cap_header *vfio_info_cap_add(
>>>  extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
>>>  
>>>  extern int vfio_info_add_capability(struct vfio_info_cap *caps,
>>> -				    int cap_type_id, void *cap_type);
>>> +				    struct vfio_info_cap_header *cap,
>>> +				    size_t size);
>>>  
>>>  extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
>>>  					      int num_irqs, int max_irq_type,
>>>
>>
>>
>> -- 
>> Alexey
>
Eric Auger Dec. 13, 2017, 9:23 a.m. UTC | #5
Hi Alex,

On 12/12/17 20:59, Alex Williamson wrote:
> The vfio_info_add_capability() helper requires the caller to pass a
> capability ID, which it then uses to fill in header fields, assuming
> hard coded versions.  This makes for an awkward and rigid interface.
> The only thing we want this helper to do is allocate sufficient
> space in the caps buffer and chain this capability into the list.
> Reduce it to that simple task.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> ---
>  drivers/gpu/drm/i915/gvt/kvmgt.c |   15 +++++++----
>  drivers/vfio/pci/vfio_pci.c      |   14 ++++++----
>  drivers/vfio/vfio.c              |   52 +++-----------------------------------
>  include/linux/vfio.h             |    3 +-
>  4 files changed, 24 insertions(+), 60 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
> index 96060920a6fe..0a7d084da1a2 100644
> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c
> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
> @@ -1012,6 +1012,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
>  			if (!sparse)
>  				return -ENOMEM;
>  
> +			sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
> +			sparse->header.version = 1;
>  			sparse->nr_areas = nr_areas;
>  			cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
>  			sparse->areas[0].offset =
> @@ -1033,7 +1035,9 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
>  			break;
>  		default:
>  			{
> -				struct vfio_region_info_cap_type cap_type;
> +				struct vfio_region_info_cap_type cap_type = {
> +					.header.id = VFIO_REGION_INFO_CAP_TYPE,
> +					.header.version = 1 };
>  
>  				if (info.index >= VFIO_PCI_NUM_REGIONS +
>  						vgpu->vdev.num_regions)
> @@ -1050,8 +1054,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
>  				cap_type.subtype = vgpu->vdev.region[i].subtype;
>  
>  				ret = vfio_info_add_capability(&caps,
> -						VFIO_REGION_INFO_CAP_TYPE,
> -						&cap_type);
> +							&cap_type.header,
> +							sizeof(cap_type));
>  				if (ret)
>  					return ret;
>  			}
> @@ -1061,8 +1065,9 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
>  			switch (cap_type_id) {
>  			case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
>  				ret = vfio_info_add_capability(&caps,
> -					VFIO_REGION_INFO_CAP_SPARSE_MMAP,
> -					sparse);
> +					&sparse->header, sizeof(*sparse) +
> +					(sparse->nr_areas *
> +						sizeof(*sparse->areas)));
nit: there is a size local variable which would be usable here.
>  				kfree(sparse);
>  				if (ret)
>  					return ret;
> diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
> index f041b1a6cf66..a73e40983880 100644
> --- a/drivers/vfio/pci/vfio_pci.c
> +++ b/drivers/vfio/pci/vfio_pci.c
> @@ -582,6 +582,8 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
>  	if (!sparse)
>  		return -ENOMEM;
>  
> +	sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
> +	sparse->header.version = 1;
>  	sparse->nr_areas = nr_areas;
>  
>  	if (vdev->msix_offset & PAGE_MASK) {
> @@ -597,8 +599,7 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
>  		i++;
>  	}
>  
> -	ret = vfio_info_add_capability(caps, VFIO_REGION_INFO_CAP_SPARSE_MMAP,
> -				       sparse);
> +	ret = vfio_info_add_capability(caps, &sparse->header, size);
>  	kfree(sparse);
>  
>  	return ret;
> @@ -741,7 +742,9 @@ static long vfio_pci_ioctl(void *device_data,
>  			break;
>  		default:
>  		{
> -			struct vfio_region_info_cap_type cap_type;
> +			struct vfio_region_info_cap_type cap_type = {
> +					.header.id = VFIO_REGION_INFO_CAP_TYPE,
> +					.header.version = 1 };
>  
>  			if (info.index >=
>  			    VFIO_PCI_NUM_REGIONS + vdev->num_regions)
> @@ -756,9 +759,8 @@ static long vfio_pci_ioctl(void *device_data,
>  			cap_type.type = vdev->region[i].type;
>  			cap_type.subtype = vdev->region[i].subtype;
>  
> -			ret = vfio_info_add_capability(&caps,
> -						      VFIO_REGION_INFO_CAP_TYPE,
> -						      &cap_type);
> +			ret = vfio_info_add_capability(&caps, &cap_type.header,
> +						       sizeof(cap_type));
>  			if (ret)
>  				return ret;
>  
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index 2bc3705a99bd..721f97f8dac1 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -1857,63 +1857,19 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
>  }
>  EXPORT_SYMBOL(vfio_info_cap_shift);
>  
> -static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type)
> +int vfio_info_add_capability(struct vfio_info_cap *caps,
> +			     struct vfio_info_cap_header *cap, size_t size)
>  {
>  	struct vfio_info_cap_header *header;
> -	struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type;
> -	size_t size;
>  
> -	size = sizeof(*sparse) + sparse->nr_areas *  sizeof(*sparse->areas);
> -	header = vfio_info_cap_add(caps, size,
> -				   VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
> +	header = vfio_info_cap_add(caps, size, cap->id, cap->version);
>  	if (IS_ERR(header))
>  		return PTR_ERR(header);
>  
> -	sparse_cap = container_of(header,
> -			struct vfio_region_info_cap_sparse_mmap, header);
> -	sparse_cap->nr_areas = sparse->nr_areas;
> -	memcpy(sparse_cap->areas, sparse->areas,
> -	       sparse->nr_areas * sizeof(*sparse->areas));
> -	return 0;
> -}
> -
> -static int region_type_cap(struct vfio_info_cap *caps, void *cap_type)
> -{
> -	struct vfio_info_cap_header *header;
> -	struct vfio_region_info_cap_type *type_cap, *cap = cap_type;
> +	memcpy(header + 1, cap + 1, size - sizeof(*header));
>  
> -	header = vfio_info_cap_add(caps, sizeof(*cap),
> -				   VFIO_REGION_INFO_CAP_TYPE, 1);
> -	if (IS_ERR(header))
> -		return PTR_ERR(header);
> -
> -	type_cap = container_of(header, struct vfio_region_info_cap_type,
> -				header);
> -	type_cap->type = cap->type;
> -	type_cap->subtype = cap->subtype;
>  	return 0;
>  }
> -
> -int vfio_info_add_capability(struct vfio_info_cap *caps, int cap_type_id,
> -			     void *cap_type)
> -{
> -	int ret = -EINVAL;
> -
> -	if (!cap_type)
> -		return 0;
> -
> -	switch (cap_type_id) {
> -	case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
> -		ret = sparse_mmap_cap(caps, cap_type);
> -		break;
> -
> -	case VFIO_REGION_INFO_CAP_TYPE:
> -		ret = region_type_cap(caps, cap_type);
> -		break;
> -	}
> -
> -	return ret;
> -}
>  EXPORT_SYMBOL(vfio_info_add_capability);
>  
>  int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index a47b985341d1..66741ab087c1 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -145,7 +145,8 @@ extern struct vfio_info_cap_header *vfio_info_cap_add(
>  extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
>  
>  extern int vfio_info_add_capability(struct vfio_info_cap *caps,
> -				    int cap_type_id, void *cap_type);
> +				    struct vfio_info_cap_header *cap,
> +				    size_t size);
>  
>  extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
>  					      int num_irqs, int max_irq_type,
> 

so yet another one:
Reviewed-by: Eric Auger <eric.auger@redhat.com>

Thanks

Eric
Eric Auger Dec. 13, 2017, 3:04 p.m. UTC | #6
Hi Peter,

On 13/12/17 07:49, Peter Xu wrote:
> On Tue, Dec 12, 2017 at 12:59:39PM -0700, Alex Williamson wrote:
>> The vfio_info_add_capability() helper requires the caller to pass a
>> capability ID, which it then uses to fill in header fields, assuming
>> hard coded versions.  This makes for an awkward and rigid interface.
>> The only thing we want this helper to do is allocate sufficient
>> space in the caps buffer and chain this capability into the list.
>> Reduce it to that simple task.
>>
>> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> 
> Reviewed-by: Peter Xu <peterx@redhat.com>
> 
> Though during review I had a question related to the function
> msix_sparse_mmap_cap(): Is it possible that one PCI device BAR is very
> small (e.g., 4K) that only contains the MSI-X table (and another small
> PBA area)?  If so, should we just mark that region unmappable instead
> of setting vfio_region_info_cap_sparse_mmap.nr_areas to 1 in
> msix_sparse_mmap_cap()?
> 
> 	/* If MSI-X table is aligned to the start or end, only one area */
> 	if (((vdev->msix_offset & PAGE_MASK) == 0) ||
> 	    (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end))
> 		nr_areas = 1;
> 
> Thanks,
> 
if I understand the code correctly, if the MSI-X table exactly matches
the BAR, a sparse mmap region is reported with offset/size = 0. Is that
correct?

Thanks

Eric
Eric Auger Dec. 13, 2017, 3:32 p.m. UTC | #7
Hi,

On 13/12/17 16:04, Auger Eric wrote:
> Hi Peter,
> 
> On 13/12/17 07:49, Peter Xu wrote:
>> On Tue, Dec 12, 2017 at 12:59:39PM -0700, Alex Williamson wrote:
>>> The vfio_info_add_capability() helper requires the caller to pass a
>>> capability ID, which it then uses to fill in header fields, assuming
>>> hard coded versions.  This makes for an awkward and rigid interface.
>>> The only thing we want this helper to do is allocate sufficient
>>> space in the caps buffer and chain this capability into the list.
>>> Reduce it to that simple task.
>>>
>>> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
>>
>> Reviewed-by: Peter Xu <peterx@redhat.com>
>>
>> Though during review I had a question related to the function
>> msix_sparse_mmap_cap(): Is it possible that one PCI device BAR is very
>> small (e.g., 4K) that only contains the MSI-X table (and another small
>> PBA area)?  If so, should we just mark that region unmappable instead
>> of setting vfio_region_info_cap_sparse_mmap.nr_areas to 1 in
>> msix_sparse_mmap_cap()?
>>
>> 	/* If MSI-X table is aligned to the start or end, only one area */
>> 	if (((vdev->msix_offset & PAGE_MASK) == 0) ||
>> 	    (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end))
>> 		nr_areas = 1;
>>
>> Thanks,
>>
> if I understand the code correctly, if the MSI-X table exactly matches
> the BAR, a sparse mmap region is reported with offset/size = 0. Is that
> correct?
> 
> Thanks
> 
> Eric
> 
looks fixed by

[RFC PATCH kernel] vfio-pci: Fix sparse capability when no parts of MSIX
BAR can be mapped

Sorry for the noise.

Thanks

Eric
Alex Williamson Dec. 13, 2017, 3:35 p.m. UTC | #8
On Wed, 13 Dec 2017 16:04:48 +0100
Auger Eric <eric.auger@redhat.com> wrote:

> Hi Peter,
> 
> On 13/12/17 07:49, Peter Xu wrote:
> > On Tue, Dec 12, 2017 at 12:59:39PM -0700, Alex Williamson wrote:  
> >> The vfio_info_add_capability() helper requires the caller to pass a
> >> capability ID, which it then uses to fill in header fields, assuming
> >> hard coded versions.  This makes for an awkward and rigid interface.
> >> The only thing we want this helper to do is allocate sufficient
> >> space in the caps buffer and chain this capability into the list.
> >> Reduce it to that simple task.
> >>
> >> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>  
> > 
> > Reviewed-by: Peter Xu <peterx@redhat.com>
> > 
> > Though during review I had a question related to the function
> > msix_sparse_mmap_cap(): Is it possible that one PCI device BAR is very
> > small (e.g., 4K) that only contains the MSI-X table (and another small
> > PBA area)?  If so, should we just mark that region unmappable instead
> > of setting vfio_region_info_cap_sparse_mmap.nr_areas to 1 in
> > msix_sparse_mmap_cap()?
> > 
> > 	/* If MSI-X table is aligned to the start or end, only one area */
> > 	if (((vdev->msix_offset & PAGE_MASK) == 0) ||
> > 	    (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end))
> > 		nr_areas = 1;
> > 
> > Thanks,
> >   
> if I understand the code correctly, if the MSI-X table exactly matches
> the BAR, a sparse mmap region is reported with offset/size = 0. Is that
> correct?

Yes, and that was a compatibility choice when the sparse mmap was
added, retaining the per region mmap flag, but essentially excluding
the whole area with the sparse mmap.  It seemed like it'd be easier for
userspace to understand the distinction.  Now we're trying to remove
the whole mess and allow mmaps covering the MSI-X vector table because
it's a performance killer for systems where the page size is >4K.
Thanks,

Alex
Peter Xu Dec. 15, 2017, 6:18 a.m. UTC | #9
On Wed, Dec 13, 2017 at 08:35:39AM -0700, Alex Williamson wrote:
> On Wed, 13 Dec 2017 16:04:48 +0100
> Auger Eric <eric.auger@redhat.com> wrote:
> 
> > Hi Peter,
> > 
> > On 13/12/17 07:49, Peter Xu wrote:
> > > On Tue, Dec 12, 2017 at 12:59:39PM -0700, Alex Williamson wrote:  
> > >> The vfio_info_add_capability() helper requires the caller to pass a
> > >> capability ID, which it then uses to fill in header fields, assuming
> > >> hard coded versions.  This makes for an awkward and rigid interface.
> > >> The only thing we want this helper to do is allocate sufficient
> > >> space in the caps buffer and chain this capability into the list.
> > >> Reduce it to that simple task.
> > >>
> > >> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>  
> > > 
> > > Reviewed-by: Peter Xu <peterx@redhat.com>
> > > 
> > > Though during review I had a question related to the function
> > > msix_sparse_mmap_cap(): Is it possible that one PCI device BAR is very
> > > small (e.g., 4K) that only contains the MSI-X table (and another small
> > > PBA area)?  If so, should we just mark that region unmappable instead
> > > of setting vfio_region_info_cap_sparse_mmap.nr_areas to 1 in
> > > msix_sparse_mmap_cap()?
> > > 
> > > 	/* If MSI-X table is aligned to the start or end, only one area */
> > > 	if (((vdev->msix_offset & PAGE_MASK) == 0) ||
> > > 	    (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end))
> > > 		nr_areas = 1;
> > > 
> > > Thanks,
> > >   
> > if I understand the code correctly, if the MSI-X table exactly matches
> > the BAR, a sparse mmap region is reported with offset/size = 0. Is that
> > correct?
> 
> Yes, and that was a compatibility choice when the sparse mmap was
> added, retaining the per region mmap flag, but essentially excluding
> the whole area with the sparse mmap.  It seemed like it'd be easier for
> userspace to understand the distinction.

I see.

> Now we're trying to remove
> the whole mess and allow mmaps covering the MSI-X vector table because
> it's a performance killer for systems where the page size is >4K.

Yeah, I just noticed that.  Thanks for explaining!
Peter Xu Dec. 15, 2017, 6:20 a.m. UTC | #10
On Wed, Dec 13, 2017 at 04:32:10PM +0100, Auger Eric wrote:
> Hi,
> 
> On 13/12/17 16:04, Auger Eric wrote:
> > Hi Peter,
> > 
> > On 13/12/17 07:49, Peter Xu wrote:
> >> On Tue, Dec 12, 2017 at 12:59:39PM -0700, Alex Williamson wrote:
> >>> The vfio_info_add_capability() helper requires the caller to pass a
> >>> capability ID, which it then uses to fill in header fields, assuming
> >>> hard coded versions.  This makes for an awkward and rigid interface.
> >>> The only thing we want this helper to do is allocate sufficient
> >>> space in the caps buffer and chain this capability into the list.
> >>> Reduce it to that simple task.
> >>>
> >>> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> >>
> >> Reviewed-by: Peter Xu <peterx@redhat.com>
> >>
> >> Though during review I had a question related to the function
> >> msix_sparse_mmap_cap(): Is it possible that one PCI device BAR is very
> >> small (e.g., 4K) that only contains the MSI-X table (and another small
> >> PBA area)?  If so, should we just mark that region unmappable instead
> >> of setting vfio_region_info_cap_sparse_mmap.nr_areas to 1 in
> >> msix_sparse_mmap_cap()?
> >>
> >> 	/* If MSI-X table is aligned to the start or end, only one area */
> >> 	if (((vdev->msix_offset & PAGE_MASK) == 0) ||
> >> 	    (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end))
> >> 		nr_areas = 1;
> >>
> >> Thanks,
> >>
> > if I understand the code correctly, if the MSI-X table exactly matches
> > the BAR, a sparse mmap region is reported with offset/size = 0. Is that
> > correct?
> > 
> > Thanks
> > 
> > Eric
> > 
> looks fixed by
> 
> [RFC PATCH kernel] vfio-pci: Fix sparse capability when no parts of MSIX
> BAR can be mapped
> 
> Sorry for the noise.

Hi, Eric,

Thanks for the link anyways. :)
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 96060920a6fe..0a7d084da1a2 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1012,6 +1012,8 @@  static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
 			if (!sparse)
 				return -ENOMEM;
 
+			sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
+			sparse->header.version = 1;
 			sparse->nr_areas = nr_areas;
 			cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
 			sparse->areas[0].offset =
@@ -1033,7 +1035,9 @@  static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
 			break;
 		default:
 			{
-				struct vfio_region_info_cap_type cap_type;
+				struct vfio_region_info_cap_type cap_type = {
+					.header.id = VFIO_REGION_INFO_CAP_TYPE,
+					.header.version = 1 };
 
 				if (info.index >= VFIO_PCI_NUM_REGIONS +
 						vgpu->vdev.num_regions)
@@ -1050,8 +1054,8 @@  static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
 				cap_type.subtype = vgpu->vdev.region[i].subtype;
 
 				ret = vfio_info_add_capability(&caps,
-						VFIO_REGION_INFO_CAP_TYPE,
-						&cap_type);
+							&cap_type.header,
+							sizeof(cap_type));
 				if (ret)
 					return ret;
 			}
@@ -1061,8 +1065,9 @@  static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
 			switch (cap_type_id) {
 			case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
 				ret = vfio_info_add_capability(&caps,
-					VFIO_REGION_INFO_CAP_SPARSE_MMAP,
-					sparse);
+					&sparse->header, sizeof(*sparse) +
+					(sparse->nr_areas *
+						sizeof(*sparse->areas)));
 				kfree(sparse);
 				if (ret)
 					return ret;
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index f041b1a6cf66..a73e40983880 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -582,6 +582,8 @@  static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
 	if (!sparse)
 		return -ENOMEM;
 
+	sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
+	sparse->header.version = 1;
 	sparse->nr_areas = nr_areas;
 
 	if (vdev->msix_offset & PAGE_MASK) {
@@ -597,8 +599,7 @@  static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
 		i++;
 	}
 
-	ret = vfio_info_add_capability(caps, VFIO_REGION_INFO_CAP_SPARSE_MMAP,
-				       sparse);
+	ret = vfio_info_add_capability(caps, &sparse->header, size);
 	kfree(sparse);
 
 	return ret;
@@ -741,7 +742,9 @@  static long vfio_pci_ioctl(void *device_data,
 			break;
 		default:
 		{
-			struct vfio_region_info_cap_type cap_type;
+			struct vfio_region_info_cap_type cap_type = {
+					.header.id = VFIO_REGION_INFO_CAP_TYPE,
+					.header.version = 1 };
 
 			if (info.index >=
 			    VFIO_PCI_NUM_REGIONS + vdev->num_regions)
@@ -756,9 +759,8 @@  static long vfio_pci_ioctl(void *device_data,
 			cap_type.type = vdev->region[i].type;
 			cap_type.subtype = vdev->region[i].subtype;
 
-			ret = vfio_info_add_capability(&caps,
-						      VFIO_REGION_INFO_CAP_TYPE,
-						      &cap_type);
+			ret = vfio_info_add_capability(&caps, &cap_type.header,
+						       sizeof(cap_type));
 			if (ret)
 				return ret;
 
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 2bc3705a99bd..721f97f8dac1 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1857,63 +1857,19 @@  void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
 }
 EXPORT_SYMBOL(vfio_info_cap_shift);
 
-static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type)
+int vfio_info_add_capability(struct vfio_info_cap *caps,
+			     struct vfio_info_cap_header *cap, size_t size)
 {
 	struct vfio_info_cap_header *header;
-	struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type;
-	size_t size;
 
-	size = sizeof(*sparse) + sparse->nr_areas *  sizeof(*sparse->areas);
-	header = vfio_info_cap_add(caps, size,
-				   VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
+	header = vfio_info_cap_add(caps, size, cap->id, cap->version);
 	if (IS_ERR(header))
 		return PTR_ERR(header);
 
-	sparse_cap = container_of(header,
-			struct vfio_region_info_cap_sparse_mmap, header);
-	sparse_cap->nr_areas = sparse->nr_areas;
-	memcpy(sparse_cap->areas, sparse->areas,
-	       sparse->nr_areas * sizeof(*sparse->areas));
-	return 0;
-}
-
-static int region_type_cap(struct vfio_info_cap *caps, void *cap_type)
-{
-	struct vfio_info_cap_header *header;
-	struct vfio_region_info_cap_type *type_cap, *cap = cap_type;
+	memcpy(header + 1, cap + 1, size - sizeof(*header));
 
-	header = vfio_info_cap_add(caps, sizeof(*cap),
-				   VFIO_REGION_INFO_CAP_TYPE, 1);
-	if (IS_ERR(header))
-		return PTR_ERR(header);
-
-	type_cap = container_of(header, struct vfio_region_info_cap_type,
-				header);
-	type_cap->type = cap->type;
-	type_cap->subtype = cap->subtype;
 	return 0;
 }
-
-int vfio_info_add_capability(struct vfio_info_cap *caps, int cap_type_id,
-			     void *cap_type)
-{
-	int ret = -EINVAL;
-
-	if (!cap_type)
-		return 0;
-
-	switch (cap_type_id) {
-	case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
-		ret = sparse_mmap_cap(caps, cap_type);
-		break;
-
-	case VFIO_REGION_INFO_CAP_TYPE:
-		ret = region_type_cap(caps, cap_type);
-		break;
-	}
-
-	return ret;
-}
 EXPORT_SYMBOL(vfio_info_add_capability);
 
 int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index a47b985341d1..66741ab087c1 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -145,7 +145,8 @@  extern struct vfio_info_cap_header *vfio_info_cap_add(
 extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
 
 extern int vfio_info_add_capability(struct vfio_info_cap *caps,
-				    int cap_type_id, void *cap_type);
+				    struct vfio_info_cap_header *cap,
+				    size_t size);
 
 extern int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
 					      int num_irqs, int max_irq_type,