Message ID | 20230309080910.607396-8-yi.l.liu@intel.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | iommufd: Add nesting infrastructure | expand |
On 3/9/23 4:09 PM, Yi Liu wrote: > In nested translation, the stage-1 page table is user-managed and used > by IOMMU hardware, so destroying mappings in the stage-1 page table should > be followed with an IOTLB invalidation. s/destroying mappings/update of any present page table entry/ > This adds IOMMU_HWPT_INVALIDATE for IOTLB invalidation. > > Co-developed-by: Nicolin Chen <nicolinc@nvidia.com> > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > --- > drivers/iommu/iommufd/hw_pagetable.c | 56 +++++++++++++++++++++++++ > drivers/iommu/iommufd/iommufd_private.h | 9 ++++ > drivers/iommu/iommufd/main.c | 3 ++ > include/uapi/linux/iommufd.h | 27 ++++++++++++ > 4 files changed, 95 insertions(+) > > diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c > index 64e7cf7142e1..67facca98de1 100644 > --- a/drivers/iommu/iommufd/hw_pagetable.c > +++ b/drivers/iommu/iommufd/hw_pagetable.c > @@ -284,3 +284,59 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) > iommufd_put_object(&idev->obj); > return rc; > } > + > +/* > + * size of page table type specific invalidate_info, indexed by > + * enum iommu_hwpt_type. > + */ > +static const size_t iommufd_hwpt_invalidate_info_size[] = {}; > + > +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd) > +{ > + struct iommu_hwpt_invalidate *cmd = ucmd->cmd; > + struct iommufd_hw_pagetable *hwpt; > + u64 user_ptr; > + u32 user_data_len, klen; > + int rc = 0; > + > + /* > + * For a user-managed HWPT, type should not be IOMMU_HWPT_TYPE_DEFAULT. > + * data_len should not exceed the size of iommufd_invalidate_buffer. > + */ > + if (cmd->data_type == IOMMU_HWPT_TYPE_DEFAULT || !cmd->data_len || > + cmd->data_type >= ARRAY_SIZE(iommufd_hwpt_invalidate_info_size)) "data_len should not exceed the size of iommufd_invalidate_buffer." How is this checked? > + return -EOPNOTSUPP; > + > + hwpt = iommufd_get_hwpt(ucmd, cmd->hwpt_id); > + if (IS_ERR(hwpt)) > + return PTR_ERR(hwpt); > + > + /* Do not allow any kernel-managed hw_pagetable */ > + if (!hwpt->parent) { > + rc = -EINVAL; > + goto out_put_hwpt; > + } > + > + klen = iommufd_hwpt_invalidate_info_size[cmd->data_type]; > + if (!klen) { > + rc = -EINVAL; > + goto out_put_hwpt; > + } > + > + /* > + * Copy the needed fields before reusing the ucmd buffer, this > + * avoids memory allocation in this path. > + */ > + user_ptr = cmd->data_uptr; > + user_data_len = cmd->data_len; Is it a valid case if "user_data_len < klen"? > + > + rc = copy_struct_from_user(cmd, klen, > + u64_to_user_ptr(user_ptr), user_data_len); > + if (rc) > + goto out_put_hwpt; > + > + hwpt->domain->ops->cache_invalidate_user(hwpt->domain, cmd); > +out_put_hwpt: > + iommufd_put_object(&hwpt->obj); > + return rc; > +} > diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h > index 182c074eecdc..d879264d1acf 100644 > --- a/drivers/iommu/iommufd/iommufd_private.h > +++ b/drivers/iommu/iommufd/iommufd_private.h > @@ -265,6 +265,7 @@ struct iommufd_hw_pagetable * > iommufd_hw_pagetable_detach(struct iommufd_device *idev); > void iommufd_hw_pagetable_destroy(struct iommufd_object *obj); > int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd); > +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd); > > static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, > struct iommufd_hw_pagetable *hwpt) > @@ -276,6 +277,14 @@ static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, > refcount_dec(&hwpt->obj.users); > } > > +static inline struct iommufd_hw_pagetable * > +iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id) > +{ > + return container_of(iommufd_get_object(ucmd->ictx, id, > + IOMMUFD_OBJ_HW_PAGETABLE), > + struct iommufd_hw_pagetable, obj); > +} > + > struct iommufd_group { > struct kref ref; > struct mutex lock; > diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c > index 7ab1e2c638a1..2cf45f65b637 100644 > --- a/drivers/iommu/iommufd/main.c > +++ b/drivers/iommu/iommufd/main.c > @@ -263,6 +263,7 @@ union ucmd_buffer { > struct iommu_destroy destroy; > struct iommu_hwpt_alloc hwpt; > struct iommu_hw_info info; > + struct iommu_hwpt_invalidate cache; > struct iommu_ioas_alloc alloc; > struct iommu_ioas_allow_iovas allow_iovas; > struct iommu_ioas_copy ioas_copy; > @@ -298,6 +299,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { > data_uptr), > IOCTL_OP(IOMMU_DEVICE_GET_HW_INFO, iommufd_device_get_hw_info, > struct iommu_hw_info, __reserved), > + IOCTL_OP(IOMMU_HWPT_INVALIDATE, iommufd_hwpt_invalidate, > + struct iommu_hwpt_invalidate, data_uptr), > IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl, > struct iommu_ioas_alloc, out_ioas_id), > IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas, > diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h > index 48781ff40a37..d0962c41f8d6 100644 > --- a/include/uapi/linux/iommufd.h > +++ b/include/uapi/linux/iommufd.h > @@ -47,6 +47,7 @@ enum { > IOMMUFD_CMD_VFIO_IOAS, > IOMMUFD_CMD_HWPT_ALLOC, > IOMMUFD_CMD_DEVICE_GET_HW_INFO, > + IOMMUFD_CMD_HWPT_INVALIDATE, > }; > > /** > @@ -447,4 +448,30 @@ struct iommu_hw_info { > __u32 __reserved; > }; > #define IOMMU_DEVICE_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DEVICE_GET_HW_INFO) > + > +/** > + * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) > + * @size: sizeof(struct iommu_hwpt_invalidate) > + * @hwpt_id: HWPT ID of target hardware page table for the invalidation > + * @data_type: One of enum iommu_hwpt_type > + * @data_len: Length of the type specific data > + * @data_uptr: User pointer to the type specific data > + * > + * Invalidate the iommu cache for user-managed page table. Modifications > + * on user-managed page table should be followed with this operation to > + * sync the IOTLB. This is only needed by user-managed hw_pagetables, so > + * the @data_type should never be IOMMU_HWPT_TYPE_DEFAULT. > + * > + * +==============================+========================================+ > + * | @data_type | Data structure in @data_uptr | > + * +------------------------------+----------------------------------------+ > + */ > +struct iommu_hwpt_invalidate { > + __u32 size; > + __u32 hwpt_id; > + __u32 data_type; > + __u32 data_len; > + __aligned_u64 data_uptr; > +}; > +#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE) > #endif Best regards, baolu
On Thu, Mar 09, 2023 at 12:09:05AM -0800, Yi Liu wrote: > +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd) > +{ > + struct iommu_hwpt_invalidate *cmd = ucmd->cmd; > + struct iommufd_hw_pagetable *hwpt; > + u64 user_ptr; > + u32 user_data_len, klen; > + int rc = 0; > + > + /* > + * For a user-managed HWPT, type should not be IOMMU_HWPT_TYPE_DEFAULT. > + * data_len should not exceed the size of iommufd_invalidate_buffer. > + */ > + if (cmd->data_type == IOMMU_HWPT_TYPE_DEFAULT || !cmd->data_len || > + cmd->data_type >= ARRAY_SIZE(iommufd_hwpt_invalidate_info_size)) > + return -EOPNOTSUPP; This needs to do the standard check for zeros in unknown trailing data bit. Check that alloc does it too Jason
> From: Baolu Lu <baolu.lu@linux.intel.com> > Sent: Friday, March 10, 2023 11:16 AM > > On 3/9/23 4:09 PM, Yi Liu wrote: > > In nested translation, the stage-1 page table is user-managed and used > > by IOMMU hardware, so destroying mappings in the stage-1 page table > should > > be followed with an IOTLB invalidation. > > s/destroying mappings/update of any present page table entry/ Right. Not only destroying. > > This adds IOMMU_HWPT_INVALIDATE for IOTLB invalidation. > > > > Co-developed-by: Nicolin Chen <nicolinc@nvidia.com> > > Signed-off-by: Nicolin Chen <nicolinc@nvidia.com> > > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > > --- > > drivers/iommu/iommufd/hw_pagetable.c | 56 > +++++++++++++++++++++++++ > > drivers/iommu/iommufd/iommufd_private.h | 9 ++++ > > drivers/iommu/iommufd/main.c | 3 ++ > > include/uapi/linux/iommufd.h | 27 ++++++++++++ > > 4 files changed, 95 insertions(+) > > > > diff --git a/drivers/iommu/iommufd/hw_pagetable.c > b/drivers/iommu/iommufd/hw_pagetable.c > > index 64e7cf7142e1..67facca98de1 100644 > > --- a/drivers/iommu/iommufd/hw_pagetable.c > > +++ b/drivers/iommu/iommufd/hw_pagetable.c > > @@ -284,3 +284,59 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd > *ucmd) > > iommufd_put_object(&idev->obj); > > return rc; > > } > > + > > +/* > > + * size of page table type specific invalidate_info, indexed by > > + * enum iommu_hwpt_type. > > + */ > > +static const size_t iommufd_hwpt_invalidate_info_size[] = {}; > > + > > +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd) > > +{ > > + struct iommu_hwpt_invalidate *cmd = ucmd->cmd; > > + struct iommufd_hw_pagetable *hwpt; > > + u64 user_ptr; > > + u32 user_data_len, klen; > > + int rc = 0; > > + > > + /* > > + * For a user-managed HWPT, type should not be > IOMMU_HWPT_TYPE_DEFAULT. > > + * data_len should not exceed the size of > iommufd_invalidate_buffer. > > + */ > > + if (cmd->data_type == IOMMU_HWPT_TYPE_DEFAULT || !cmd- > >data_len || > > + cmd->data_type >= > ARRAY_SIZE(iommufd_hwpt_invalidate_info_size)) > > "data_len should not exceed the size of iommufd_invalidate_buffer." > > How is this checked? Hmmm, this is a stale comment I suppose. > > > + return -EOPNOTSUPP; > > + > > + hwpt = iommufd_get_hwpt(ucmd, cmd->hwpt_id); > > + if (IS_ERR(hwpt)) > > + return PTR_ERR(hwpt); > > + > > + /* Do not allow any kernel-managed hw_pagetable */ > > + if (!hwpt->parent) { > > + rc = -EINVAL; > > + goto out_put_hwpt; > > + } > > + > > + klen = iommufd_hwpt_invalidate_info_size[cmd->data_type]; > > + if (!klen) { > > + rc = -EINVAL; > > + goto out_put_hwpt; > > + } > > + > > + /* > > + * Copy the needed fields before reusing the ucmd buffer, this > > + * avoids memory allocation in this path. > > + */ > > + user_ptr = cmd->data_uptr; > > + user_data_len = cmd->data_len; > > Is it a valid case if "user_data_len < klen"? Yes. e.g. an old qemu running on a new kernel which has new field added in the end of the data structure. Regards, Yi Liu
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Saturday, March 11, 2023 1:50 AM > > On Thu, Mar 09, 2023 at 12:09:05AM -0800, Yi Liu wrote: > > +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd) > > +{ > > + struct iommu_hwpt_invalidate *cmd = ucmd->cmd; > > + struct iommufd_hw_pagetable *hwpt; > > + u64 user_ptr; > > + u32 user_data_len, klen; > > + int rc = 0; > > + > > + /* > > + * For a user-managed HWPT, type should not be > IOMMU_HWPT_TYPE_DEFAULT. > > + * data_len should not exceed the size of > iommufd_invalidate_buffer. > > + */ > > + if (cmd->data_type == IOMMU_HWPT_TYPE_DEFAULT || !cmd- > >data_len || > > + cmd->data_type >= > ARRAY_SIZE(iommufd_hwpt_invalidate_info_size)) > > + return -EOPNOTSUPP; > > This needs to do the standard check for zeros in unknown trailing data > bit. Check that alloc does it too Yes. would add it in both path. Regards, Yi Liu
> From: Jason Gunthorpe <jgg@nvidia.com> > Sent: Saturday, March 11, 2023 1:50 AM > > On Thu, Mar 09, 2023 at 12:09:05AM -0800, Yi Liu wrote: > > +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd) > > +{ > > + struct iommu_hwpt_invalidate *cmd = ucmd->cmd; > > + struct iommufd_hw_pagetable *hwpt; > > + u64 user_ptr; > > + u32 user_data_len, klen; > > + int rc = 0; > > + > > + /* > > + * For a user-managed HWPT, type should not be > IOMMU_HWPT_TYPE_DEFAULT. > > + * data_len should not exceed the size of > iommufd_invalidate_buffer. > > + */ > > + if (cmd->data_type == IOMMU_HWPT_TYPE_DEFAULT || !cmd- > >data_len || > > + cmd->data_type >= > ARRAY_SIZE(iommufd_hwpt_invalidate_info_size)) > > + return -EOPNOTSUPP; > > This needs to do the standard check for zeros in unknown trailing data > bit. Check that alloc does it too Maybe it has been covered by the copy_struct_from_user(). Is it? + /* + * Copy the needed fields before reusing the ucmd buffer, this + * avoids memory allocation in this path. + */ + user_ptr = cmd->data_uptr; + user_data_len = cmd->data_len; + + rc = copy_struct_from_user(cmd, klen, + u64_to_user_ptr(user_ptr), user_data_len); Regards, Yi Liu
On Tue, Mar 14, 2023 at 04:18:21AM +0000, Liu, Yi L wrote: > > From: Jason Gunthorpe <jgg@nvidia.com> > > Sent: Saturday, March 11, 2023 1:50 AM > > > > On Thu, Mar 09, 2023 at 12:09:05AM -0800, Yi Liu wrote: > > > +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd) > > > +{ > > > + struct iommu_hwpt_invalidate *cmd = ucmd->cmd; > > > + struct iommufd_hw_pagetable *hwpt; > > > + u64 user_ptr; > > > + u32 user_data_len, klen; > > > + int rc = 0; > > > + > > > + /* > > > + * For a user-managed HWPT, type should not be > > IOMMU_HWPT_TYPE_DEFAULT. > > > + * data_len should not exceed the size of > > iommufd_invalidate_buffer. > > > + */ > > > + if (cmd->data_type == IOMMU_HWPT_TYPE_DEFAULT || !cmd- > > >data_len || > > > + cmd->data_type >= > > ARRAY_SIZE(iommufd_hwpt_invalidate_info_size)) > > > + return -EOPNOTSUPP; > > > > This needs to do the standard check for zeros in unknown trailing data > > bit. Check that alloc does it too > > Maybe it has been covered by the copy_struct_from_user(). Is it? Yes Jason
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index 64e7cf7142e1..67facca98de1 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -284,3 +284,59 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd) iommufd_put_object(&idev->obj); return rc; } + +/* + * size of page table type specific invalidate_info, indexed by + * enum iommu_hwpt_type. + */ +static const size_t iommufd_hwpt_invalidate_info_size[] = {}; + +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd) +{ + struct iommu_hwpt_invalidate *cmd = ucmd->cmd; + struct iommufd_hw_pagetable *hwpt; + u64 user_ptr; + u32 user_data_len, klen; + int rc = 0; + + /* + * For a user-managed HWPT, type should not be IOMMU_HWPT_TYPE_DEFAULT. + * data_len should not exceed the size of iommufd_invalidate_buffer. + */ + if (cmd->data_type == IOMMU_HWPT_TYPE_DEFAULT || !cmd->data_len || + cmd->data_type >= ARRAY_SIZE(iommufd_hwpt_invalidate_info_size)) + return -EOPNOTSUPP; + + hwpt = iommufd_get_hwpt(ucmd, cmd->hwpt_id); + if (IS_ERR(hwpt)) + return PTR_ERR(hwpt); + + /* Do not allow any kernel-managed hw_pagetable */ + if (!hwpt->parent) { + rc = -EINVAL; + goto out_put_hwpt; + } + + klen = iommufd_hwpt_invalidate_info_size[cmd->data_type]; + if (!klen) { + rc = -EINVAL; + goto out_put_hwpt; + } + + /* + * Copy the needed fields before reusing the ucmd buffer, this + * avoids memory allocation in this path. + */ + user_ptr = cmd->data_uptr; + user_data_len = cmd->data_len; + + rc = copy_struct_from_user(cmd, klen, + u64_to_user_ptr(user_ptr), user_data_len); + if (rc) + goto out_put_hwpt; + + hwpt->domain->ops->cache_invalidate_user(hwpt->domain, cmd); +out_put_hwpt: + iommufd_put_object(&hwpt->obj); + return rc; +} diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index 182c074eecdc..d879264d1acf 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -265,6 +265,7 @@ struct iommufd_hw_pagetable * iommufd_hw_pagetable_detach(struct iommufd_device *idev); void iommufd_hw_pagetable_destroy(struct iommufd_object *obj); int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd); +int iommufd_hwpt_invalidate(struct iommufd_ucmd *ucmd); static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, struct iommufd_hw_pagetable *hwpt) @@ -276,6 +277,14 @@ static inline void iommufd_hw_pagetable_put(struct iommufd_ctx *ictx, refcount_dec(&hwpt->obj.users); } +static inline struct iommufd_hw_pagetable * +iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id) +{ + return container_of(iommufd_get_object(ucmd->ictx, id, + IOMMUFD_OBJ_HW_PAGETABLE), + struct iommufd_hw_pagetable, obj); +} + struct iommufd_group { struct kref ref; struct mutex lock; diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 7ab1e2c638a1..2cf45f65b637 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -263,6 +263,7 @@ union ucmd_buffer { struct iommu_destroy destroy; struct iommu_hwpt_alloc hwpt; struct iommu_hw_info info; + struct iommu_hwpt_invalidate cache; struct iommu_ioas_alloc alloc; struct iommu_ioas_allow_iovas allow_iovas; struct iommu_ioas_copy ioas_copy; @@ -298,6 +299,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { data_uptr), IOCTL_OP(IOMMU_DEVICE_GET_HW_INFO, iommufd_device_get_hw_info, struct iommu_hw_info, __reserved), + IOCTL_OP(IOMMU_HWPT_INVALIDATE, iommufd_hwpt_invalidate, + struct iommu_hwpt_invalidate, data_uptr), IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl, struct iommu_ioas_alloc, out_ioas_id), IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas, diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 48781ff40a37..d0962c41f8d6 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -47,6 +47,7 @@ enum { IOMMUFD_CMD_VFIO_IOAS, IOMMUFD_CMD_HWPT_ALLOC, IOMMUFD_CMD_DEVICE_GET_HW_INFO, + IOMMUFD_CMD_HWPT_INVALIDATE, }; /** @@ -447,4 +448,30 @@ struct iommu_hw_info { __u32 __reserved; }; #define IOMMU_DEVICE_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DEVICE_GET_HW_INFO) + +/** + * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) + * @size: sizeof(struct iommu_hwpt_invalidate) + * @hwpt_id: HWPT ID of target hardware page table for the invalidation + * @data_type: One of enum iommu_hwpt_type + * @data_len: Length of the type specific data + * @data_uptr: User pointer to the type specific data + * + * Invalidate the iommu cache for user-managed page table. Modifications + * on user-managed page table should be followed with this operation to + * sync the IOTLB. This is only needed by user-managed hw_pagetables, so + * the @data_type should never be IOMMU_HWPT_TYPE_DEFAULT. + * + * +==============================+========================================+ + * | @data_type | Data structure in @data_uptr | + * +------------------------------+----------------------------------------+ + */ +struct iommu_hwpt_invalidate { + __u32 size; + __u32 hwpt_id; + __u32 data_type; + __u32 data_len; + __aligned_u64 data_uptr; +}; +#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE) #endif