Message ID | 20231020222804.21850-8-joao.m.martins@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | IOMMUFD Dirty Tracking | expand |
On Sat, Oct 21, 2023, at 00:27, Joao Martins wrote: > +int iommufd_check_iova_range(struct iommufd_ioas *ioas, > + struct iommu_hwpt_get_dirty_bitmap *bitmap) > +{ > + unsigned long npages; > + size_t iommu_pgsize; > + int rc = -EINVAL; > + > + if (!bitmap->page_size) > + return rc; > + > + npages = bitmap->length / bitmap->page_size; The 64-bit division causes a link failure on 32-bit architectures: arm-linux-gnueabi-ld: drivers/iommu/iommufd/hw_pagetable.o: in function `iommufd_check_iova_range': hw_pagetable.c:(.text+0x77e): undefined reference to `__aeabi_uldivmod' arm-linux-gnueabi/bin/arm-linux-gnueabi-ld: drivers/iommu/iommufd/hw_pagetable.o: in function `iommufd_hwpt_get_dirty_bitmap': hw_pagetable.c:(.text+0x84c): undefined reference to `__aeabi_uldivmod' I think it is safe to assume that the length of the bitmap does not overflow an 'unsigned long', so it's probably best to add a range check plus type cast, rather than an expensive div_u64() here. > +/** > + * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP) > + * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap) > + * @hwpt_id: HW pagetable ID that represents the IOMMU domain > + * @flags: Must be zero > + * @iova: base IOVA of the bitmap first bit > + * @length: IOVA range size > + * @page_size: page size granularity of each bit in the bitmap > + * @data: bitmap where to set the dirty bits. The bitmap bits each > + * represent a page_size which you deviate from an arbitrary iova. > + * > + * Checking a given IOVA is dirty: > + * > + * data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64)) > + * > + * Walk the IOMMU pagetables for a given IOVA range to return a bitmap > + * with the dirty IOVAs. In doing so it will also by default clear any > + * dirty bit metadata set in the IOPTE. > + */ > +struct iommu_hwpt_get_dirty_bitmap { > + __u32 size; > + __u32 hwpt_id; > + __u32 flags; > + __u32 __reserved; > + __aligned_u64 iova; > + __aligned_u64 length; > + __aligned_u64 page_size; > + __aligned_u64 *data; > +}; > +#define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \ > + IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP) > + This is a flawed definition for an ioctl data structure. While it appears that you have tried hard to follow the recommendations in Documentation/driver-api/ioctl.rst, you accidentally added a pointer here, which breaks compat mode handling because of the uninitialized padding after the 32-bit 'data' pointer. The correct fix here is to use a __u64 value for the pointer itself and convert it like: diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index a26f8ae1034dd..3f6db5b18c266 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -465,7 +465,7 @@ iommu_read_and_clear_dirty(struct iommu_domain *domain, return -EOPNOTSUPP; iter = iova_bitmap_alloc(bitmap->iova, bitmap->length, - bitmap->page_size, bitmap->data); + bitmap->page_size, u64_to_user_ptr(bitmap->data)); if (IS_ERR(iter)) return -ENOMEM; diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 50f98f01fe100..7fbf2d8a3c9b8 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -538,7 +538,7 @@ struct iommu_hwpt_get_dirty_bitmap { __aligned_u64 iova; __aligned_u64 length; __aligned_u64 page_size; - __aligned_u64 *data; + __aligned_u64 data; }; #define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \ IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP) Arnd
On 23/10/2023 10:09, Arnd Bergmann wrote: > On Sat, Oct 21, 2023, at 00:27, Joao Martins wrote: > >> +int iommufd_check_iova_range(struct iommufd_ioas *ioas, >> + struct iommu_hwpt_get_dirty_bitmap *bitmap) >> +{ >> + unsigned long npages; >> + size_t iommu_pgsize; >> + int rc = -EINVAL; >> + >> + if (!bitmap->page_size) >> + return rc; >> + >> + npages = bitmap->length / bitmap->page_size; > > > The 64-bit division causes a link failure on 32-bit architectures: > > arm-linux-gnueabi-ld: drivers/iommu/iommufd/hw_pagetable.o: in function `iommufd_check_iova_range': > hw_pagetable.c:(.text+0x77e): undefined reference to `__aeabi_uldivmod' > arm-linux-gnueabi/bin/arm-linux-gnueabi-ld: drivers/iommu/iommufd/hw_pagetable.o: in function `iommufd_hwpt_get_dirty_bitmap': > hw_pagetable.c:(.text+0x84c): undefined reference to `__aeabi_uldivmod' > > I think it is safe to assume that the length of the bitmap > does not overflow an 'unsigned long', Also I do check for the overflow but comes later; I should move the overflow check from iopt_read_and_clear_dirty_data() into here as it's the only entry point of that function, while deleting the duplicated alignment check from iopt_read_and_clear_dirty_data(). > so it's probably > best to add a range check plus type cast, rather than an > expensive div_u64() here. > OK >> +/** >> + * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP) >> + * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap) >> + * @hwpt_id: HW pagetable ID that represents the IOMMU domain >> + * @flags: Must be zero >> + * @iova: base IOVA of the bitmap first bit >> + * @length: IOVA range size >> + * @page_size: page size granularity of each bit in the bitmap >> + * @data: bitmap where to set the dirty bits. The bitmap bits each >> + * represent a page_size which you deviate from an arbitrary iova. >> + * >> + * Checking a given IOVA is dirty: >> + * >> + * data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64)) >> + * >> + * Walk the IOMMU pagetables for a given IOVA range to return a bitmap >> + * with the dirty IOVAs. In doing so it will also by default clear any >> + * dirty bit metadata set in the IOPTE. >> + */ >> +struct iommu_hwpt_get_dirty_bitmap { >> + __u32 size; >> + __u32 hwpt_id; >> + __u32 flags; >> + __u32 __reserved; >> + __aligned_u64 iova; >> + __aligned_u64 length; >> + __aligned_u64 page_size; >> + __aligned_u64 *data; >> +}; >> +#define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \ >> + IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP) >> + > > This is a flawed definition for an ioctl data structure. While > it appears that you have tried hard to follow the recommendations > in Documentation/driver-api/ioctl.rst, you accidentally added > a pointer here, which breaks compat mode handling because of > the uninitialized padding after the 32-bit 'data' pointer. > Right > The correct fix here is to use a __u64 value for the pointer > itself and convert it like: > > diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c > index a26f8ae1034dd..3f6db5b18c266 100644 > --- a/drivers/iommu/iommufd/io_pagetable.c > +++ b/drivers/iommu/iommufd/io_pagetable.c > @@ -465,7 +465,7 @@ iommu_read_and_clear_dirty(struct iommu_domain *domain, > return -EOPNOTSUPP; > > iter = iova_bitmap_alloc(bitmap->iova, bitmap->length, > - bitmap->page_size, bitmap->data); > + bitmap->page_size, u64_to_user_ptr(bitmap->data)); > if (IS_ERR(iter)) > return -ENOMEM; > > diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h > index 50f98f01fe100..7fbf2d8a3c9b8 100644 > --- a/include/uapi/linux/iommufd.h > +++ b/include/uapi/linux/iommufd.h > @@ -538,7 +538,7 @@ struct iommu_hwpt_get_dirty_bitmap { > __aligned_u64 iova; > __aligned_u64 length; > __aligned_u64 page_size; > - __aligned_u64 *data; > + __aligned_u64 data; > }; This was meant to be like this from the beginning but it appears I've made the the mistake since the first version. Thanks for both; I will fix it and respin it for v6.
On Mon, Oct 23, 2023 at 10:28:13AM +0100, Joao Martins wrote: > > so it's probably > > best to add a range check plus type cast, rather than an > > expensive div_u64() here. > > OK Just keep it simple, we don't need to optimize for 32 bit. div_u64 will make the compiler happy. > >> +struct iommu_hwpt_get_dirty_bitmap { > >> + __u32 size; > >> + __u32 hwpt_id; > >> + __u32 flags; > >> + __u32 __reserved; > >> + __aligned_u64 iova; > >> + __aligned_u64 length; > >> + __aligned_u64 page_size; > >> + __aligned_u64 *data; > >> +}; > >> +#define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \ > >> + IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP) > >> + > > > > This is a flawed definition for an ioctl data structure. While > > it appears that you have tried hard to follow the recommendations > > in Documentation/driver-api/ioctl.rst, you accidentally added > > a pointer here, which breaks compat mode handling because of > > the uninitialized padding after the 32-bit 'data' pointer. > > > Right oops how did we all miss that extra character :| Jason
On Mon, Oct 23, 2023, at 14:10, Jason Gunthorpe wrote: > On Mon, Oct 23, 2023 at 10:28:13AM +0100, Joao Martins wrote: >> > so it's probably >> > best to add a range check plus type cast, rather than an >> > expensive div_u64() here. >> >> OK > > Just keep it simple, we don't need to optimize for 32 bit. div_u64 > will make the compiler happy. Fair enough. FWIW, I tried adding just the range check to see if that would make the compiler turn it into a 32-bit division, but that didn't work. Some type of range check might still be good to have for unrelated reasons. Arnd
On 23/10/2023 13:41, Arnd Bergmann wrote: > On Mon, Oct 23, 2023, at 14:10, Jason Gunthorpe wrote: >> On Mon, Oct 23, 2023 at 10:28:13AM +0100, Joao Martins wrote: >>>> so it's probably >>>> best to add a range check plus type cast, rather than an >>>> expensive div_u64() here. >>> >>> OK >> >> Just keep it simple, we don't need to optimize for 32 bit. div_u64 >> will make the compiler happy. > > Fair enough. FWIW, I tried adding just the range check to see > if that would make the compiler turn it into a 32-bit division, > but that didn't work. > > Some type of range check might still be good to have for > unrelated reasons. I can reproduce the arm32 build problem and I'm applying this diff below to this patch to fix it. It essentially moves all the checks to iommufd_check_iova_range(), including range-check and adding div_u64. Additionally, perhaps should also move the iommufd_check_iova_range() invocation via io_pagetable.c code rather than hw-pagetable code? It seems to make more sense as there's nothing hw-pagetable specific that needs to be in here. diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index a25042b0d3ba..4705954c51fe 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -224,23 +224,27 @@ int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd) int iommufd_check_iova_range(struct iommufd_ioas *ioas, struct iommu_hwpt_get_dirty_bitmap *bitmap) { - unsigned long npages; + unsigned long npages, last_iova, iova = bitmap->iova; + unsigned long length = bitmap->length; size_t iommu_pgsize; int rc = -EINVAL; if (!bitmap->page_size) return rc; - npages = bitmap->length / bitmap->page_size; + if (check_add_overflow(iova, length - 1, &last_iova)) + return -EOVERFLOW; + + npages = div_u64(bitmap->length, bitmap->page_size); if (!npages || (npages > ULONG_MAX)) return rc; iommu_pgsize = ioas->iopt.iova_alignment; - if (bitmap->iova & (iommu_pgsize - 1)) + if (iova & (iommu_pgsize - 1)) return rc; - if (!bitmap->length || bitmap->length & (iommu_pgsize - 1)) + if (length || length & (iommu_pgsize - 1)) return rc; return 0; diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 9955797862eb..00f5f60dc27e 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -486,15 +486,7 @@ int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, unsigned long flags, struct iommu_hwpt_get_dirty_bitmap *bitmap) { - unsigned long last_iova, iova = bitmap->iova; - unsigned long length = bitmap->length; - int ret = -EINVAL; - - if ((iova & (iopt->iova_alignment - 1))) - return -EINVAL; - - if (check_add_overflow(iova, length - 1, &last_iova)) - return -EOVERFLOW; + int ret; down_read(&iopt->iova_rwsem); ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap);
On Mon, Oct 23, 2023 at 04:56:01PM +0100, Joao Martins wrote: > On 23/10/2023 13:41, Arnd Bergmann wrote: > > On Mon, Oct 23, 2023, at 14:10, Jason Gunthorpe wrote: > >> On Mon, Oct 23, 2023 at 10:28:13AM +0100, Joao Martins wrote: > >>>> so it's probably > >>>> best to add a range check plus type cast, rather than an > >>>> expensive div_u64() here. > >>> > >>> OK > >> > >> Just keep it simple, we don't need to optimize for 32 bit. div_u64 > >> will make the compiler happy. > > > > Fair enough. FWIW, I tried adding just the range check to see > > if that would make the compiler turn it into a 32-bit division, > > but that didn't work. > > > > Some type of range check might still be good to have for > > unrelated reasons. > > I can reproduce the arm32 build problem and I'm applying this diff below to this > patch to fix it. It essentially moves all the checks to > iommufd_check_iova_range(), including range-check and adding div_u64. > > Additionally, perhaps should also move the iommufd_check_iova_range() invocation > via io_pagetable.c code rather than hw-pagetable code? It seems to make more > sense as there's nothing hw-pagetable specific that needs to be in here. Don't you need the IOAS though? Write it like this: int iommufd_check_iova_range(struct iommufd_ioas *ioas, struct iommu_hwpt_get_dirty_bitmap *bitmap) { size_t iommu_pgsize = ioas->iopt.iova_alignment; u64 last_iova; if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova)) return -EOVERFLOW; if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX) return -EOVERFLOW; if ((bitmap->iova & (iommu_pgsize - 1)) || ((last_iova + 1) & (iommu_pgsize - 1))) return -EINVAL; return 0; } And if 0 should really be rejected then check iova == last_iova Jason
On 23/10/2023 17:16, Jason Gunthorpe wrote: > On Mon, Oct 23, 2023 at 04:56:01PM +0100, Joao Martins wrote: >> On 23/10/2023 13:41, Arnd Bergmann wrote: >>> On Mon, Oct 23, 2023, at 14:10, Jason Gunthorpe wrote: >>>> On Mon, Oct 23, 2023 at 10:28:13AM +0100, Joao Martins wrote: >>>>>> so it's probably >>>>>> best to add a range check plus type cast, rather than an >>>>>> expensive div_u64() here. >>>>> >>>>> OK >>>> >>>> Just keep it simple, we don't need to optimize for 32 bit. div_u64 >>>> will make the compiler happy. >>> >>> Fair enough. FWIW, I tried adding just the range check to see >>> if that would make the compiler turn it into a 32-bit division, >>> but that didn't work. >>> >>> Some type of range check might still be good to have for >>> unrelated reasons. >> >> I can reproduce the arm32 build problem and I'm applying this diff below to this >> patch to fix it. It essentially moves all the checks to >> iommufd_check_iova_range(), including range-check and adding div_u64. >> >> Additionally, perhaps should also move the iommufd_check_iova_range() invocation >> via io_pagetable.c code rather than hw-pagetable code? It seems to make more >> sense as there's nothing hw-pagetable specific that needs to be in here. > > Don't you need the IOAS though? > No, for these checks I only need the iopt, which I already pass into iopt_read_and_clear_dirty_data(). Everything can really be placed in the later. > Write it like this: > > int iommufd_check_iova_range(struct iommufd_ioas *ioas, > struct iommu_hwpt_get_dirty_bitmap *bitmap) > { > size_t iommu_pgsize = ioas->iopt.iova_alignment; > u64 last_iova; > > if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova)) > return -EOVERFLOW; > > if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX) > return -EOVERFLOW; > > if ((bitmap->iova & (iommu_pgsize - 1)) || > ((last_iova + 1) & (iommu_pgsize - 1))) > return -EINVAL; > return 0; > } > > And if 0 should really be rejected then check iova == last_iova It should; Perhaps extending the above and replicate that second the ::page_size alignment check is important as it's what's used by the bitmap e.g. int iommufd_check_iova_range(struct iommufd_ioas *ioas, struct iommu_hwpt_get_dirty_bitmap *bitmap) { size_t iommu_pgsize = ioas->iopt.iova_alignment; u64 last_iova; if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova)) return -EOVERFLOW; if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX) return -EOVERFLOW; if ((bitmap->iova & (iommu_pgsize - 1)) || ((last_iova + 1) & (iommu_pgsize - 1))) return -EINVAL; if ((bitmap->iova & (bitmap->page_size - 1)) || ((last_iova + 1) & (bitmap->page_size - 1))) return -EINVAL; return 0; }
On Mon, Oct 23, 2023 at 05:31:22PM +0100, Joao Martins wrote: > > Write it like this: > > > > int iommufd_check_iova_range(struct iommufd_ioas *ioas, > > struct iommu_hwpt_get_dirty_bitmap *bitmap) > > { > > size_t iommu_pgsize = ioas->iopt.iova_alignment; > > u64 last_iova; > > > > if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova)) > > return -EOVERFLOW; > > > > if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX) > > return -EOVERFLOW; > > > > if ((bitmap->iova & (iommu_pgsize - 1)) || > > ((last_iova + 1) & (iommu_pgsize - 1))) > > return -EINVAL; > > return 0; > > } > > > > And if 0 should really be rejected then check iova == last_iova > > It should; Perhaps extending the above and replicate that second the ::page_size > alignment check is important as it's what's used by the bitmap e.g. That makes sense, much clearer what it is trying to do that way Jason
On 23/10/2023 17:34, Jason Gunthorpe wrote: > On Mon, Oct 23, 2023 at 05:31:22PM +0100, Joao Martins wrote: >>> Write it like this: >>> >>> int iommufd_check_iova_range(struct iommufd_ioas *ioas, >>> struct iommu_hwpt_get_dirty_bitmap *bitmap) >>> { >>> size_t iommu_pgsize = ioas->iopt.iova_alignment; >>> u64 last_iova; >>> >>> if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova)) >>> return -EOVERFLOW; >>> >>> if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX) >>> return -EOVERFLOW; >>> >>> if ((bitmap->iova & (iommu_pgsize - 1)) || >>> ((last_iova + 1) & (iommu_pgsize - 1))) >>> return -EINVAL; >>> return 0; >>> } >>> >>> And if 0 should really be rejected then check iova == last_iova >> >> It should; Perhaps extending the above and replicate that second the ::page_size >> alignment check is important as it's what's used by the bitmap e.g. > > That makes sense, much clearer what it is trying to do that way In regards to clearity, I am still checking if bitmap::page_size being 0 or not, to avoid being so implicitly in the last check i.e. (bitmap->iova & (bitmap->page_size - 1)) being an and to 0xfffffffff.
On Mon, Oct 23, 2023 at 06:55:56PM +0100, Joao Martins wrote: > On 23/10/2023 17:34, Jason Gunthorpe wrote: > > On Mon, Oct 23, 2023 at 05:31:22PM +0100, Joao Martins wrote: > >>> Write it like this: > >>> > >>> int iommufd_check_iova_range(struct iommufd_ioas *ioas, > >>> struct iommu_hwpt_get_dirty_bitmap *bitmap) > >>> { > >>> size_t iommu_pgsize = ioas->iopt.iova_alignment; > >>> u64 last_iova; > >>> > >>> if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova)) > >>> return -EOVERFLOW; > >>> > >>> if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX) > >>> return -EOVERFLOW; > >>> > >>> if ((bitmap->iova & (iommu_pgsize - 1)) || > >>> ((last_iova + 1) & (iommu_pgsize - 1))) > >>> return -EINVAL; > >>> return 0; > >>> } > >>> > >>> And if 0 should really be rejected then check iova == last_iova > >> > >> It should; Perhaps extending the above and replicate that second the ::page_size > >> alignment check is important as it's what's used by the bitmap e.g. > > > > That makes sense, much clearer what it is trying to do that way > > In regards to clearity, I am still checking if bitmap::page_size being 0 or not, > to avoid being so implicitly in the last check i.e. (bitmap->iova & > (bitmap->page_size - 1)) being an and to 0xfffffffff. I think you have to check explicitly, iova/last_iova could also be zero.. Jason
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index db7ff8c99e29..72be43fa5248 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -220,3 +220,55 @@ int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd) iommufd_put_object(&hwpt->obj); return rc; } + +int iommufd_check_iova_range(struct iommufd_ioas *ioas, + struct iommu_hwpt_get_dirty_bitmap *bitmap) +{ + unsigned long npages; + size_t iommu_pgsize; + int rc = -EINVAL; + + if (!bitmap->page_size) + return rc; + + npages = bitmap->length / bitmap->page_size; + if (!npages || (npages > ULONG_MAX)) + return rc; + + iommu_pgsize = ioas->iopt.iova_alignment; + + if (bitmap->iova & (iommu_pgsize - 1)) + return rc; + + if (!bitmap->length || bitmap->length & (iommu_pgsize - 1)) + return rc; + + return 0; +} + +int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd) +{ + struct iommu_hwpt_get_dirty_bitmap *cmd = ucmd->cmd; + struct iommufd_hw_pagetable *hwpt; + struct iommufd_ioas *ioas; + int rc = -EOPNOTSUPP; + + if ((cmd->flags || cmd->__reserved)) + return -EOPNOTSUPP; + + hwpt = iommufd_get_hwpt(ucmd, cmd->hwpt_id); + if (IS_ERR(hwpt)) + return PTR_ERR(hwpt); + + ioas = hwpt->ioas; + rc = iommufd_check_iova_range(ioas, cmd); + if (rc) + goto out_put; + + rc = iopt_read_and_clear_dirty_data(&ioas->iopt, hwpt->domain, + cmd->flags, cmd); + +out_put: + iommufd_put_object(&hwpt->obj); + return rc; +} diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index 535d73466e15..cb168fc415b0 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -15,6 +15,7 @@ #include <linux/err.h> #include <linux/slab.h> #include <linux/errno.h> +#include <uapi/linux/iommufd.h> #include "io_pagetable.h" #include "double_span.h" @@ -412,6 +413,95 @@ int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, return 0; } +struct iova_bitmap_fn_arg { + struct io_pagetable *iopt; + struct iommu_domain *domain; + struct iommu_dirty_bitmap *dirty; +}; + +static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap, + unsigned long iova, size_t length, + void *opaque) +{ + struct iopt_area *area; + struct iopt_area_contig_iter iter; + struct iova_bitmap_fn_arg *arg = opaque; + struct iommu_domain *domain = arg->domain; + struct iommu_dirty_bitmap *dirty = arg->dirty; + const struct iommu_dirty_ops *ops = domain->dirty_ops; + unsigned long last_iova = iova + length - 1; + int ret; + + iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) { + unsigned long last = min(last_iova, iopt_area_last_iova(area)); + + ret = ops->read_and_clear_dirty(domain, iter.cur_iova, + last - iter.cur_iova + 1, + 0, dirty); + if (ret) + return ret; + } + + if (!iopt_area_contig_done(&iter)) + return -EINVAL; + return 0; +} + +static int iommu_read_and_clear_dirty(struct iommu_domain *domain, + struct io_pagetable *iopt, + unsigned long flags, + struct iommu_hwpt_get_dirty_bitmap *bitmap) +{ + const struct iommu_dirty_ops *ops = domain->dirty_ops; + struct iommu_iotlb_gather gather; + struct iommu_dirty_bitmap dirty; + struct iova_bitmap_fn_arg arg; + struct iova_bitmap *iter; + int ret = 0; + + if (!ops || !ops->read_and_clear_dirty) + return -EOPNOTSUPP; + + iter = iova_bitmap_alloc(bitmap->iova, bitmap->length, + bitmap->page_size, bitmap->data); + if (IS_ERR(iter)) + return -ENOMEM; + + iommu_dirty_bitmap_init(&dirty, iter, &gather); + + arg.iopt = iopt; + arg.domain = domain; + arg.dirty = &dirty; + iova_bitmap_for_each(iter, &arg, __iommu_read_and_clear_dirty); + + iommu_iotlb_sync(domain, &gather); + iova_bitmap_free(iter); + + return ret; +} + +int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, + struct iommu_domain *domain, + unsigned long flags, + struct iommu_hwpt_get_dirty_bitmap *bitmap) +{ + unsigned long last_iova, iova = bitmap->iova; + unsigned long length = bitmap->length; + int ret = -EINVAL; + + if ((iova & (iopt->iova_alignment - 1))) + return -EINVAL; + + if (check_add_overflow(iova, length - 1, &last_iova)) + return -EOVERFLOW; + + down_read(&iopt->iova_rwsem); + ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap); + up_read(&iopt->iova_rwsem); + + return ret; +} + static int iopt_clear_dirty_data(struct io_pagetable *iopt, struct iommu_domain *domain) { diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h index f896f47c705a..a2f83cd9706d 100644 --- a/drivers/iommu/iommufd/iommufd_private.h +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -8,6 +8,8 @@ #include <linux/xarray.h> #include <linux/refcount.h> #include <linux/uaccess.h> +#include <linux/iommu.h> +#include <linux/iova_bitmap.h> #include <uapi/linux/iommufd.h> struct iommu_domain; @@ -71,6 +73,10 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, unsigned long length, unsigned long *unmapped); int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped); +int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt, + struct iommu_domain *domain, + unsigned long flags, + struct iommu_hwpt_get_dirty_bitmap *bitmap); int iopt_set_dirty_tracking(struct io_pagetable *iopt, struct iommu_domain *domain, bool enable); @@ -226,6 +232,8 @@ int iommufd_option_rlimit_mode(struct iommu_option *cmd, struct iommufd_ctx *ictx); int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd); +int iommufd_check_iova_range(struct iommufd_ioas *ioas, + struct iommu_hwpt_get_dirty_bitmap *bitmap); /* * A HW pagetable is called an iommu_domain inside the kernel. This user object @@ -252,6 +260,8 @@ static inline struct iommufd_hw_pagetable *iommufd_get_hwpt( struct iommufd_hw_pagetable, obj); } int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd); +int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd); + struct iommufd_hw_pagetable * iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, struct iommufd_device *idev, u32 flags, diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c index 46fedd779714..d50f42a730aa 100644 --- a/drivers/iommu/iommufd/main.c +++ b/drivers/iommu/iommufd/main.c @@ -307,6 +307,7 @@ union ucmd_buffer { struct iommu_destroy destroy; struct iommu_hw_info info; struct iommu_hwpt_alloc hwpt; + struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap; struct iommu_hwpt_set_dirty_tracking set_dirty_tracking; struct iommu_ioas_alloc alloc; struct iommu_ioas_allow_iovas allow_iovas; @@ -343,6 +344,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { __reserved), IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, __reserved), + IOCTL_OP(IOMMU_HWPT_GET_DIRTY_BITMAP, iommufd_hwpt_get_dirty_bitmap, + struct iommu_hwpt_get_dirty_bitmap, data), IOCTL_OP(IOMMU_HWPT_SET_DIRTY_TRACKING, iommufd_hwpt_set_dirty_tracking, struct iommu_hwpt_set_dirty_tracking, __reserved), IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl, @@ -555,5 +558,6 @@ MODULE_ALIAS_MISCDEV(VFIO_MINOR); MODULE_ALIAS("devname:vfio/vfio"); #endif MODULE_IMPORT_NS(IOMMUFD_INTERNAL); +MODULE_IMPORT_NS(IOMMUFD); MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices"); MODULE_LICENSE("GPL"); diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index 525b6e5cedee..06724bbe8af3 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -48,6 +48,7 @@ enum { IOMMUFD_CMD_HWPT_ALLOC, IOMMUFD_CMD_GET_HW_INFO, IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, + IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, }; /** @@ -480,4 +481,37 @@ struct iommu_hwpt_set_dirty_tracking { }; #define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \ IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING) + +/** + * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP) + * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap) + * @hwpt_id: HW pagetable ID that represents the IOMMU domain + * @flags: Must be zero + * @iova: base IOVA of the bitmap first bit + * @length: IOVA range size + * @page_size: page size granularity of each bit in the bitmap + * @data: bitmap where to set the dirty bits. The bitmap bits each + * represent a page_size which you deviate from an arbitrary iova. + * + * Checking a given IOVA is dirty: + * + * data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64)) + * + * Walk the IOMMU pagetables for a given IOVA range to return a bitmap + * with the dirty IOVAs. In doing so it will also by default clear any + * dirty bit metadata set in the IOPTE. + */ +struct iommu_hwpt_get_dirty_bitmap { + __u32 size; + __u32 hwpt_id; + __u32 flags; + __u32 __reserved; + __aligned_u64 iova; + __aligned_u64 length; + __aligned_u64 page_size; + __aligned_u64 *data; +}; +#define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \ + IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP) + #endif