diff mbox series

[v2,2/6] iommufd: Add iommu page fault uapi data

Message ID 20231026024930.382898-3-baolu.lu@linux.intel.com (mailing list archive)
State New
Headers show
Series IOMMUFD: Deliver IO page faults to user space | expand

Commit Message

Baolu Lu Oct. 26, 2023, 2:49 a.m. UTC
For user to handle IO page faults generated by IOMMU hardware when
walking the HWPT managed by the user. One example of the use case
is nested translation, where the first-stage page table is managed
by the user space.

When allocating a user HWPT, the user could opt-in a flag named
IOMMU_HWPT_ALLOC_IOPF_CAPABLE, which indicates that user is capable
of handling IO page faults generated for this HWPT.

On a successful return of hwpt allocation, the user can retrieve
and respond the page faults by reading and writing the fd returned
in out_fault_fd. The format of the page fault and response data is
encoded in the format defined by struct iommu_hwpt_pgfault and
struct iommu_hwpt_response.

The iommu_hwpt_pgfault is mostly like the iommu_fault with some new
members like fault data size and the device object id where the page
fault was originated from.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 include/uapi/linux/iommufd.h | 65 ++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

Comments

Jason Gunthorpe Dec. 1, 2023, 3:14 p.m. UTC | #1
On Thu, Oct 26, 2023 at 10:49:26AM +0800, Lu Baolu wrote:

> + * @IOMMU_HWPT_ALLOC_IOPF_CAPABLE: User is capable of handling IO page faults.

This does not seem like the best name?

Probably like this given my remark in the cover letter:

--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -359,6 +359,7 @@ struct iommu_vfio_ioas {
 enum iommufd_hwpt_alloc_flags {
        IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
        IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
+       IOMMU_HWPT_IOPFD_FD_VALID = 1 << 2,
 };
 
 /**
@@ -440,6 +441,7 @@ struct iommu_hwpt_alloc {
        __u32 data_type;
        __u32 data_len;
        __aligned_u64 data_uptr;
+       __s32 iopf_fd;
 };
 #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)



> @@ -679,6 +688,62 @@ struct iommu_dev_data_arm_smmuv3 {
>  	__u32 sid;
>  };
>  
> +/**
> + * struct iommu_hwpt_pgfault - iommu page fault data
> + * @size: sizeof(struct iommu_hwpt_pgfault)
> + * @flags: Combination of IOMMU_PGFAULT_FLAGS_ flags.
> + *  - PASID_VALID: @pasid field is valid
> + *  - LAST_PAGE: the last page fault in a group
> + *  - PRIV_DATA: @private_data field is valid
> + *  - RESP_NEEDS_PASID: the page response must have the same
> + *                      PASID value as the page request.
> + * @dev_id: id of the originated device
> + * @pasid: Process Address Space ID
> + * @grpid: Page Request Group Index
> + * @perm: requested page permissions (IOMMU_PGFAULT_PERM_* values)
> + * @addr: page address
> + * @private_data: device-specific private information
> + */
> +struct iommu_hwpt_pgfault {
> +	__u32 size;
> +	__u32 flags;
> +#define IOMMU_PGFAULT_FLAGS_PASID_VALID		(1 << 0)
> +#define IOMMU_PGFAULT_FLAGS_LAST_PAGE		(1 << 1)
> +#define IOMMU_PGFAULT_FLAGS_PRIV_DATA		(1 << 2)
> +#define IOMMU_PGFAULT_FLAGS_RESP_NEEDS_PASID	(1 << 3)
> +	__u32 dev_id;
> +	__u32 pasid;
> +	__u32 grpid;
> +	__u32 perm;
> +#define IOMMU_PGFAULT_PERM_READ			(1 << 0)
> +#define IOMMU_PGFAULT_PERM_WRITE		(1 << 1)
> +#define IOMMU_PGFAULT_PERM_EXEC			(1 << 2)
> +#define IOMMU_PGFAULT_PERM_PRIV			(1 << 3)
> +	__u64 addr;
> +	__u64 private_data[2];
> +};

This mixed #define is not the style, these should be in enums,
possibly with kdocs

Use __aligned_u64 also

> +
> +/**
> + * struct iommu_hwpt_response - IOMMU page fault response
> + * @size: sizeof(struct iommu_hwpt_response)
> + * @flags: Must be set to 0
> + * @hwpt_id: hwpt ID of target hardware page table for the response
> + * @dev_id: device ID of target device for the response
> + * @pasid: Process Address Space ID
> + * @grpid: Page Request Group Index
> + * @code: response code. The supported codes include:
> + *        0: Successful; 1: Response Failure; 2: Invalid Request.
> + */
> +struct iommu_hwpt_page_response {
> +	__u32 size;
> +	__u32 flags;
> +	__u32 hwpt_id;
> +	__u32 dev_id;
> +	__u32 pasid;
> +	__u32 grpid;
> +	__u32 code;
> +};

Is it OK to have the user pass in all this detailed information? Is it
a security problem if the user lies? Ie shouldn't we only ack page
faults we actually have outstanding?

IOW should iommu_hwpt_pgfault just have a 'response_cookie' generated
by the kernel that should be placed here? The kernel would keep track
of all this internal stuff?

Jason
Baolu Lu Dec. 8, 2023, 6:35 a.m. UTC | #2
On 12/1/23 11:14 PM, Jason Gunthorpe wrote:
> On Thu, Oct 26, 2023 at 10:49:26AM +0800, Lu Baolu wrote:
> 
>> + * @IOMMU_HWPT_ALLOC_IOPF_CAPABLE: User is capable of handling IO page faults.
> 
> This does not seem like the best name?
> 
> Probably like this given my remark in the cover letter:
> 
> --- a/include/uapi/linux/iommufd.h
> +++ b/include/uapi/linux/iommufd.h
> @@ -359,6 +359,7 @@ struct iommu_vfio_ioas {
>   enum iommufd_hwpt_alloc_flags {
>          IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
>          IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
> +       IOMMU_HWPT_IOPFD_FD_VALID = 1 << 2,
>   };
>   
>   /**
> @@ -440,6 +441,7 @@ struct iommu_hwpt_alloc {
>          __u32 data_type;
>          __u32 data_len;
>          __aligned_u64 data_uptr;
> +       __s32 iopf_fd;
>   };
>   #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)

Yes. Agreed.

>> @@ -679,6 +688,62 @@ struct iommu_dev_data_arm_smmuv3 {
>>   	__u32 sid;
>>   };
>>   
>> +/**
>> + * struct iommu_hwpt_pgfault - iommu page fault data
>> + * @size: sizeof(struct iommu_hwpt_pgfault)
>> + * @flags: Combination of IOMMU_PGFAULT_FLAGS_ flags.
>> + *  - PASID_VALID: @pasid field is valid
>> + *  - LAST_PAGE: the last page fault in a group
>> + *  - PRIV_DATA: @private_data field is valid
>> + *  - RESP_NEEDS_PASID: the page response must have the same
>> + *                      PASID value as the page request.
>> + * @dev_id: id of the originated device
>> + * @pasid: Process Address Space ID
>> + * @grpid: Page Request Group Index
>> + * @perm: requested page permissions (IOMMU_PGFAULT_PERM_* values)
>> + * @addr: page address
>> + * @private_data: device-specific private information
>> + */
>> +struct iommu_hwpt_pgfault {
>> +	__u32 size;
>> +	__u32 flags;
>> +#define IOMMU_PGFAULT_FLAGS_PASID_VALID		(1 << 0)
>> +#define IOMMU_PGFAULT_FLAGS_LAST_PAGE		(1 << 1)
>> +#define IOMMU_PGFAULT_FLAGS_PRIV_DATA		(1 << 2)
>> +#define IOMMU_PGFAULT_FLAGS_RESP_NEEDS_PASID	(1 << 3)
>> +	__u32 dev_id;
>> +	__u32 pasid;
>> +	__u32 grpid;
>> +	__u32 perm;
>> +#define IOMMU_PGFAULT_PERM_READ			(1 << 0)
>> +#define IOMMU_PGFAULT_PERM_WRITE		(1 << 1)
>> +#define IOMMU_PGFAULT_PERM_EXEC			(1 << 2)
>> +#define IOMMU_PGFAULT_PERM_PRIV			(1 << 3)
>> +	__u64 addr;
>> +	__u64 private_data[2];
>> +};
> 
> This mixed #define is not the style, these should be in enums,
> possibly with kdocs
> 
> Use __aligned_u64 also

Sure.

> 
>> +
>> +/**
>> + * struct iommu_hwpt_response - IOMMU page fault response
>> + * @size: sizeof(struct iommu_hwpt_response)
>> + * @flags: Must be set to 0
>> + * @hwpt_id: hwpt ID of target hardware page table for the response
>> + * @dev_id: device ID of target device for the response
>> + * @pasid: Process Address Space ID
>> + * @grpid: Page Request Group Index
>> + * @code: response code. The supported codes include:
>> + *        0: Successful; 1: Response Failure; 2: Invalid Request.
>> + */
>> +struct iommu_hwpt_page_response {
>> +	__u32 size;
>> +	__u32 flags;
>> +	__u32 hwpt_id;
>> +	__u32 dev_id;
>> +	__u32 pasid;
>> +	__u32 grpid;
>> +	__u32 code;
>> +};
> 
> Is it OK to have the user pass in all this detailed information? Is it
> a security problem if the user lies? Ie shouldn't we only ack page
> faults we actually have outstanding?
> 
> IOW should iommu_hwpt_pgfault just have a 'response_cookie' generated
> by the kernel that should be placed here? The kernel would keep track
> of all this internal stuff?

The iommu core has already kept the outstanding faults that have been
awaiting a response. So even if the user lies about a fault, the kernel
does not send the wrong respond message to the device. {device_id,
grpid, code} is just enough from the user. This means the user wants to
respond to the @grpid fault from @device with the @code result.

Best regards,
baolu
diff mbox series

Patch

diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index f9b8b95b36b2..0f00f1dfcded 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -355,9 +355,17 @@  struct iommu_vfio_ioas {
  * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a domain which can serve
  *                                as the parent domain in the nesting
  *                                configuration.
+ * @IOMMU_HWPT_ALLOC_IOPF_CAPABLE: User is capable of handling IO page faults.
+ *                                 On successful return, user can retrieve
+ *                                 faults by reading the @out_fault_fd and
+ *                                 respond the faults by writing it. The fault
+ *                                 data is encoded in the format defined by
+ *                                 iommu_hwpt_pgfault. The response data format
+ *                                 is defined by iommu_hwpt_page_response
  */
 enum iommufd_hwpt_alloc_flags {
 	IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
+	IOMMU_HWPT_ALLOC_IOPF_CAPABLE = 1 << 1,
 };
 
 /**
@@ -476,6 +484,7 @@  struct iommu_hwpt_alloc {
 	__u32 hwpt_type;
 	__u32 data_len;
 	__aligned_u64 data_uptr;
+	__u32 out_fault_fd;
 };
 #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
 
@@ -679,6 +688,62 @@  struct iommu_dev_data_arm_smmuv3 {
 	__u32 sid;
 };
 
+/**
+ * struct iommu_hwpt_pgfault - iommu page fault data
+ * @size: sizeof(struct iommu_hwpt_pgfault)
+ * @flags: Combination of IOMMU_PGFAULT_FLAGS_ flags.
+ *  - PASID_VALID: @pasid field is valid
+ *  - LAST_PAGE: the last page fault in a group
+ *  - PRIV_DATA: @private_data field is valid
+ *  - RESP_NEEDS_PASID: the page response must have the same
+ *                      PASID value as the page request.
+ * @dev_id: id of the originated device
+ * @pasid: Process Address Space ID
+ * @grpid: Page Request Group Index
+ * @perm: requested page permissions (IOMMU_PGFAULT_PERM_* values)
+ * @addr: page address
+ * @private_data: device-specific private information
+ */
+struct iommu_hwpt_pgfault {
+	__u32 size;
+	__u32 flags;
+#define IOMMU_PGFAULT_FLAGS_PASID_VALID		(1 << 0)
+#define IOMMU_PGFAULT_FLAGS_LAST_PAGE		(1 << 1)
+#define IOMMU_PGFAULT_FLAGS_PRIV_DATA		(1 << 2)
+#define IOMMU_PGFAULT_FLAGS_RESP_NEEDS_PASID	(1 << 3)
+	__u32 dev_id;
+	__u32 pasid;
+	__u32 grpid;
+	__u32 perm;
+#define IOMMU_PGFAULT_PERM_READ			(1 << 0)
+#define IOMMU_PGFAULT_PERM_WRITE		(1 << 1)
+#define IOMMU_PGFAULT_PERM_EXEC			(1 << 2)
+#define IOMMU_PGFAULT_PERM_PRIV			(1 << 3)
+	__u64 addr;
+	__u64 private_data[2];
+};
+
+/**
+ * struct iommu_hwpt_response - IOMMU page fault response
+ * @size: sizeof(struct iommu_hwpt_response)
+ * @flags: Must be set to 0
+ * @hwpt_id: hwpt ID of target hardware page table for the response
+ * @dev_id: device ID of target device for the response
+ * @pasid: Process Address Space ID
+ * @grpid: Page Request Group Index
+ * @code: response code. The supported codes include:
+ *        0: Successful; 1: Response Failure; 2: Invalid Request.
+ */
+struct iommu_hwpt_page_response {
+	__u32 size;
+	__u32 flags;
+	__u32 hwpt_id;
+	__u32 dev_id;
+	__u32 pasid;
+	__u32 grpid;
+	__u32 code;
+};
+
 /**
  * struct iommu_set_dev_data - ioctl(IOMMU_SET_DEV_DATA)
  * @size: sizeof(struct iommu_set_dev_data)