diff mbox series

[v2] vfio: align capability structures

Message ID 20230808144216.2656505-1-stefanha@redhat.com (mailing list archive)
State New, archived
Headers show
Series [v2] vfio: align capability structures | expand

Commit Message

Stefan Hajnoczi Aug. 8, 2023, 2:42 p.m. UTC
The VFIO_DEVICE_GET_INFO, VFIO_DEVICE_GET_REGION_INFO, and
VFIO_IOMMU_GET_INFO ioctls fill in an info struct followed by capability
structs:

  +------+---------+---------+-----+
  | info | caps[0] | caps[1] | ... |
  +------+---------+---------+-----+

Both the info and capability struct sizes are not always multiples of
sizeof(u64), leaving u64 fields in later capability structs misaligned.

Userspace applications currently need to handle misalignment manually in
order to support CPU architectures and programming languages with strict
alignment requirements.

Make life easier for userspace by ensuring alignment in the kernel. This
is done by padding info struct definitions and by copying out zeroes
after capability structs that are not aligned.

The new layout is as follows:

  +------+---------+---+---------+-----+
  | info | caps[0] | 0 | caps[1] | ... |
  +------+---------+---+---------+-----+

In this example caps[0] has a size that is not multiples of sizeof(u64),
so zero padding is added to align the subsequent structure.

Adding zero padding between structs does not break the uapi. The memory
layout is specified by the info.cap_offset and caps[i].next fields
filled in by the kernel. Applications use these field values to locate
structs and are therefore unaffected by the addition of zero padding.

Note that code that copies out info structs with padding is updated to
always zero the struct and copy out as many bytes as userspace
requested. This makes the code shorter and avoids potential information
leaks by ensuring padding is initialized.

Originally-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
v2:
- Simplify padding approach as suggested by Alex

 include/uapi/linux/vfio.h        |  2 ++
 drivers/vfio/pci/vfio_pci_core.c | 11 ++---------
 drivers/vfio/vfio_iommu_type1.c  | 11 ++---------
 drivers/vfio/vfio_main.c         |  6 ++++++
 4 files changed, 12 insertions(+), 18 deletions(-)

Comments

Alex Williamson Aug. 8, 2023, 7:26 p.m. UTC | #1
On Tue,  8 Aug 2023 10:42:16 -0400
Stefan Hajnoczi <stefanha@redhat.com> wrote:

> The VFIO_DEVICE_GET_INFO, VFIO_DEVICE_GET_REGION_INFO, and
> VFIO_IOMMU_GET_INFO ioctls fill in an info struct followed by capability
> structs:
> 
>   +------+---------+---------+-----+
>   | info | caps[0] | caps[1] | ... |
>   +------+---------+---------+-----+
> 
> Both the info and capability struct sizes are not always multiples of
> sizeof(u64), leaving u64 fields in later capability structs misaligned.
> 
> Userspace applications currently need to handle misalignment manually in
> order to support CPU architectures and programming languages with strict
> alignment requirements.
> 
> Make life easier for userspace by ensuring alignment in the kernel. This
> is done by padding info struct definitions and by copying out zeroes
> after capability structs that are not aligned.
> 
> The new layout is as follows:
> 
>   +------+---------+---+---------+-----+
>   | info | caps[0] | 0 | caps[1] | ... |
>   +------+---------+---+---------+-----+
> 
> In this example caps[0] has a size that is not multiples of sizeof(u64),
> so zero padding is added to align the subsequent structure.
> 
> Adding zero padding between structs does not break the uapi. The memory
> layout is specified by the info.cap_offset and caps[i].next fields
> filled in by the kernel. Applications use these field values to locate
> structs and are therefore unaffected by the addition of zero padding.
> 
> Note that code that copies out info structs with padding is updated to
> always zero the struct and copy out as many bytes as userspace
> requested. This makes the code shorter and avoids potential information
> leaks by ensuring padding is initialized.
> 
> Originally-by: Alex Williamson <alex.williamson@redhat.com>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> v2:
> - Simplify padding approach as suggested by Alex
> 
>  include/uapi/linux/vfio.h        |  2 ++
>  drivers/vfio/pci/vfio_pci_core.c | 11 ++---------
>  drivers/vfio/vfio_iommu_type1.c  | 11 ++---------
>  drivers/vfio/vfio_main.c         |  6 ++++++
>  4 files changed, 12 insertions(+), 18 deletions(-)
> 
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
> index 20c804bdc09c..8fe85f5c7b61 100644
> --- a/include/uapi/linux/vfio.h
> +++ b/include/uapi/linux/vfio.h
> @@ -217,6 +217,7 @@ struct vfio_device_info {
>  	__u32	num_regions;	/* Max region index + 1 */
>  	__u32	num_irqs;	/* Max IRQ index + 1 */
>  	__u32   cap_offset;	/* Offset within info struct of first cap */
> +	__u32   pad;
>  };
>  #define VFIO_DEVICE_GET_INFO		_IO(VFIO_TYPE, VFIO_BASE + 7)
>  
> @@ -1304,6 +1305,7 @@ struct vfio_iommu_type1_info {
>  #define VFIO_IOMMU_INFO_CAPS	(1 << 1)	/* Info supports caps */
>  	__u64	iova_pgsizes;	/* Bitmap of supported page sizes */
>  	__u32   cap_offset;	/* Offset within info struct of first cap */
> +	__u32   pad;
>  };
>  
>  /*
> diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
> index 20d7b69ea6ff..e2ba2a350f6c 100644
> --- a/drivers/vfio/pci/vfio_pci_core.c
> +++ b/drivers/vfio/pci/vfio_pci_core.c
> @@ -920,24 +920,17 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
>  				   struct vfio_device_info __user *arg)
>  {
>  	unsigned long minsz = offsetofend(struct vfio_device_info, num_irqs);
> -	struct vfio_device_info info;
> +	struct vfio_device_info info = {};
>  	struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
> -	unsigned long capsz;
>  	int ret;
>  
> -	/* For backward compatibility, cannot require this */
> -	capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
> -
>  	if (copy_from_user(&info, arg, minsz))
>  		return -EFAULT;
>  
>  	if (info.argsz < minsz)
>  		return -EINVAL;
>  
> -	if (info.argsz >= capsz) {
> -		minsz = capsz;
> -		info.cap_offset = 0;
> -	}
> +	minsz = min_t(size_t, info.argsz, sizeof(info));

Thanks for catching these, LGTM.  I'll see if anyone else offers a
review in the next couple days and otherwise apply this and
20230801155352.1391945-1-stefanha@redhat.com this week.  Thanks,

Alex

>  
>  	info.flags = VFIO_DEVICE_FLAGS_PCI;
>  
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index ebe0ad31d0b0..f812c475a626 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -2762,27 +2762,20 @@ static int vfio_iommu_dma_avail_build_caps(struct vfio_iommu *iommu,
>  static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
>  				     unsigned long arg)
>  {
> -	struct vfio_iommu_type1_info info;
> +	struct vfio_iommu_type1_info info = {};
>  	unsigned long minsz;
>  	struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
> -	unsigned long capsz;
>  	int ret;
>  
>  	minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
>  
> -	/* For backward compatibility, cannot require this */
> -	capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
> -
>  	if (copy_from_user(&info, (void __user *)arg, minsz))
>  		return -EFAULT;
>  
>  	if (info.argsz < minsz)
>  		return -EINVAL;
>  
> -	if (info.argsz >= capsz) {
> -		minsz = capsz;
> -		info.cap_offset = 0; /* output, no-recopy necessary */
> -	}
> +	minsz = min_t(size_t, info.argsz, sizeof(info));
>  
>  	mutex_lock(&iommu->lock);
>  	info.flags = VFIO_IOMMU_INFO_PGSIZES;
> diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> index f0ca33b2e1df..2850478301d2 100644
> --- a/drivers/vfio/vfio_main.c
> +++ b/drivers/vfio/vfio_main.c
> @@ -1172,6 +1172,9 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
>  	void *buf;
>  	struct vfio_info_cap_header *header, *tmp;
>  
> +	/* Ensure that the next capability struct will be aligned */
> +	size = ALIGN(size, sizeof(u64));
> +
>  	buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
>  	if (!buf) {
>  		kfree(caps->buf);
> @@ -1205,6 +1208,9 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
>  	struct vfio_info_cap_header *tmp;
>  	void *buf = (void *)caps->buf;
>  
> +	/* Capability structs should start with proper alignment */
> +	WARN_ON(!IS_ALIGNED(offset, sizeof(u64)));
> +
>  	for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
>  		tmp->next += offset;
>  }
Tian, Kevin Aug. 9, 2023, 4:12 a.m. UTC | #2
> From: Stefan Hajnoczi <stefanha@redhat.com>
> Sent: Tuesday, August 8, 2023 10:42 PM
> 
> The VFIO_DEVICE_GET_INFO, VFIO_DEVICE_GET_REGION_INFO, and
> VFIO_IOMMU_GET_INFO ioctls fill in an info struct followed by capability
> structs:
> 
>   +------+---------+---------+-----+
>   | info | caps[0] | caps[1] | ... |
>   +------+---------+---------+-----+
> 
> Both the info and capability struct sizes are not always multiples of
> sizeof(u64), leaving u64 fields in later capability structs misaligned.
> 
> Userspace applications currently need to handle misalignment manually in
> order to support CPU architectures and programming languages with strict
> alignment requirements.
> 
> Make life easier for userspace by ensuring alignment in the kernel. This
> is done by padding info struct definitions and by copying out zeroes
> after capability structs that are not aligned.
> 
> The new layout is as follows:
> 
>   +------+---------+---+---------+-----+
>   | info | caps[0] | 0 | caps[1] | ... |
>   +------+---------+---+---------+-----+
> 
> In this example caps[0] has a size that is not multiples of sizeof(u64),
> so zero padding is added to align the subsequent structure.
> 
> Adding zero padding between structs does not break the uapi. The memory
> layout is specified by the info.cap_offset and caps[i].next fields
> filled in by the kernel. Applications use these field values to locate
> structs and are therefore unaffected by the addition of zero padding.
> 
> Note that code that copies out info structs with padding is updated to
> always zero the struct and copy out as many bytes as userspace
> requested. This makes the code shorter and avoids potential information
> leaks by ensuring padding is initialized.
> 
> Originally-by: Alex Williamson <alex.williamson@redhat.com>
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>

As Jason suggested let's also fix vfio_compat in iommufd.

otherwise,

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
diff mbox series

Patch

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 20c804bdc09c..8fe85f5c7b61 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -217,6 +217,7 @@  struct vfio_device_info {
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 	__u32   cap_offset;	/* Offset within info struct of first cap */
+	__u32   pad;
 };
 #define VFIO_DEVICE_GET_INFO		_IO(VFIO_TYPE, VFIO_BASE + 7)
 
@@ -1304,6 +1305,7 @@  struct vfio_iommu_type1_info {
 #define VFIO_IOMMU_INFO_CAPS	(1 << 1)	/* Info supports caps */
 	__u64	iova_pgsizes;	/* Bitmap of supported page sizes */
 	__u32   cap_offset;	/* Offset within info struct of first cap */
+	__u32   pad;
 };
 
 /*
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 20d7b69ea6ff..e2ba2a350f6c 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -920,24 +920,17 @@  static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
 				   struct vfio_device_info __user *arg)
 {
 	unsigned long minsz = offsetofend(struct vfio_device_info, num_irqs);
-	struct vfio_device_info info;
+	struct vfio_device_info info = {};
 	struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
-	unsigned long capsz;
 	int ret;
 
-	/* For backward compatibility, cannot require this */
-	capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
-
 	if (copy_from_user(&info, arg, minsz))
 		return -EFAULT;
 
 	if (info.argsz < minsz)
 		return -EINVAL;
 
-	if (info.argsz >= capsz) {
-		minsz = capsz;
-		info.cap_offset = 0;
-	}
+	minsz = min_t(size_t, info.argsz, sizeof(info));
 
 	info.flags = VFIO_DEVICE_FLAGS_PCI;
 
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index ebe0ad31d0b0..f812c475a626 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -2762,27 +2762,20 @@  static int vfio_iommu_dma_avail_build_caps(struct vfio_iommu *iommu,
 static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
 				     unsigned long arg)
 {
-	struct vfio_iommu_type1_info info;
+	struct vfio_iommu_type1_info info = {};
 	unsigned long minsz;
 	struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
-	unsigned long capsz;
 	int ret;
 
 	minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
 
-	/* For backward compatibility, cannot require this */
-	capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
-
 	if (copy_from_user(&info, (void __user *)arg, minsz))
 		return -EFAULT;
 
 	if (info.argsz < minsz)
 		return -EINVAL;
 
-	if (info.argsz >= capsz) {
-		minsz = capsz;
-		info.cap_offset = 0; /* output, no-recopy necessary */
-	}
+	minsz = min_t(size_t, info.argsz, sizeof(info));
 
 	mutex_lock(&iommu->lock);
 	info.flags = VFIO_IOMMU_INFO_PGSIZES;
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index f0ca33b2e1df..2850478301d2 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -1172,6 +1172,9 @@  struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
 	void *buf;
 	struct vfio_info_cap_header *header, *tmp;
 
+	/* Ensure that the next capability struct will be aligned */
+	size = ALIGN(size, sizeof(u64));
+
 	buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
 	if (!buf) {
 		kfree(caps->buf);
@@ -1205,6 +1208,9 @@  void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
 	struct vfio_info_cap_header *tmp;
 	void *buf = (void *)caps->buf;
 
+	/* Capability structs should start with proper alignment */
+	WARN_ON(!IS_ALIGNED(offset, sizeof(u64)));
+
 	for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
 		tmp->next += offset;
 }