[i-g-t,2/3] i915/gem_userptr_blits: Exercise userptr + userfaultfd
diff mbox series

Message ID 20191108204932.6197-2-chris@chris-wilson.co.uk
State New
Headers show
Series
  • [i-g-t,1/3] i915/gem_exec_schedule: Split pi-ringfull into two tests
Related show

Commit Message

Chris Wilson Nov. 8, 2019, 8:49 p.m. UTC
Register a userspace fault handler for a memory region that we also pass
to the GPU via userptr, and make sure the pagefault is properly serviced
before we execute.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/i915/gem_userptr_blits.c | 119 ++++++++++++++++++++++++++++++++-
 1 file changed, 118 insertions(+), 1 deletion(-)

Comments

Tvrtko Ursulin Nov. 11, 2019, 4:48 p.m. UTC | #1
On 08/11/2019 20:49, Chris Wilson wrote:
> Register a userspace fault handler for a memory region that we also pass
> to the GPU via userptr, and make sure the pagefault is properly serviced
> before we execute.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   tests/i915/gem_userptr_blits.c | 119 ++++++++++++++++++++++++++++++++-
>   1 file changed, 118 insertions(+), 1 deletion(-)
> 
> diff --git a/tests/i915/gem_userptr_blits.c b/tests/i915/gem_userptr_blits.c
> index 11d6f4a1c..774a9f92c 100644
> --- a/tests/i915/gem_userptr_blits.c
> +++ b/tests/i915/gem_userptr_blits.c
> @@ -36,6 +36,8 @@
>    * The goal is to simply ensure the basics work.
>    */
>   
> +#include <linux/userfaultfd.h>
> +
>   #include "igt.h"
>   #include <stdlib.h>
>   #include <stdio.h>
> @@ -44,9 +46,11 @@
>   #include <inttypes.h>
>   #include <errno.h>
>   #include <setjmp.h>
> +#include <sys/ioctl.h>
> +#include <sys/mman.h>
>   #include <sys/stat.h>
> +#include <sys/syscall.h>
>   #include <sys/time.h>
> -#include <sys/mman.h>
>   #include <glib.h>
>   #include <signal.h>
>   #include <pthread.h>
> @@ -1831,6 +1835,116 @@ static void test_invalidate_close_race(int fd, bool overlap)
>   	free(t_data.ptr);
>   }
>   
> +struct ufd_thread {
> +	uint32_t *page;
> +	int i915;
> +};
> +
> +static uint32_t create_page(int i915, void *page)
> +{
> +	uint32_t handle;
> +
> +	gem_userptr(i915, page, 4096, 0, 0, &handle);
> +	return handle;
> +}
> +
> +static uint32_t create_batch(int i915)
> +{
> +	const uint32_t bbe = MI_BATCH_BUFFER_END;
> +	uint32_t handle;
> +
> +	handle = gem_create(i915, 4096);
> +	gem_write(i915, handle, 0, &bbe, sizeof(bbe));
> +
> +	return handle;
> +}
> +
> +static void *ufd_thread(void *arg)
> +{
> +	struct ufd_thread *t = arg;
> +	struct drm_i915_gem_exec_object2 obj[2] = {
> +		{ .handle = create_page(t->i915, t->page) },
> +		{ .handle = create_batch(t->i915) },
> +	};
> +	struct drm_i915_gem_execbuffer2 eb = {
> +		.buffers_ptr = to_user_pointer(obj),
> +		.buffer_count = ARRAY_SIZE(obj),
> +	};
> +
> +	igt_debug("submitting fault\n");
> +	gem_execbuf(t->i915, &eb);
> +	gem_sync(t->i915, obj[1].handle);
> +
> +	for (int i = 0; i < ARRAY_SIZE(obj); i++)
> +		gem_close(t->i915, obj[i].handle);
> +
> +	t->i915 = -1;
> +	return NULL;
> +}
> +
> +static int userfaultfd(int flags)
> +{
> +	return syscall(SYS_userfaultfd, flags);
> +}
> +
> +static void test_userfault(int i915)
> +{
> +	struct uffdio_api api = { .api = UFFD_API };
> +	struct uffdio_register reg;
> +	struct uffdio_copy copy;
> +	struct uffd_msg msg;
> +	struct ufd_thread t;
> +	pthread_t thread;
> +	char poison[4096];
> +	int ufd;
> +
> +	/*
> +	 * Register a page with userfaultfd, and wrap that inside a userptr bo.
> +	 * When we try to use gup insider userptr_get_pages, it will trigger
> +	 * a pagefault that is sent to the userfaultfd for servicing. This
> +	 * is arbitrarily slow, as the submission must wait until the fault
> +	 * is serviced by the userspace fault handler.
> +	 */
> +
> +	ufd = userfaultfd(0);
> +	igt_require_f(ufd != -1, "kernel support for userfaultfd\n");
> +	igt_require_f(ioctl(ufd, UFFDIO_API, &api) == 0 && api.api == UFFD_API,
> +		      "userfaultfd API v%lld:%lld\n", UFFD_API, api.api);
> +
> +	t.i915 = i915;
> +
> +	t.page = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0);
> +	igt_assert(t.page != MAP_FAILED);
> +
> +	memset(&reg, 0, sizeof(reg));
> +	reg.mode = UFFDIO_REGISTER_MODE_MISSING;
> +	reg.range.start = to_user_pointer(t.page);
> +	reg.range.len = 4096;
> +	do_ioctl(ufd, UFFDIO_REGISTER, &reg);
> +	igt_assert(reg.ioctls == UFFD_API_RANGE_IOCTLS);
> +
> +	igt_assert(pthread_create(&thread, NULL, ufd_thread, &t) == 0);
> +
> +	/* Wait for the fault */
> +	igt_assert_eq(read(ufd, &msg, sizeof(msg)), sizeof(msg));
> +	igt_assert_eq(msg.event, UFFD_EVENT_PAGEFAULT);
> +	igt_assert(from_user_pointer(msg.arg.pagefault.address) == t.page);
> +
> +	/* Faulting thread remains blocked */
> +	igt_assert_eq(t.i915, i915);

This looks could be a false negative since nothing says the thread is 
not blocked just not got round resetting t->i915.

> +
> +	memset(&copy, 0, sizeof(copy));
> +	copy.dst = msg.arg.pagefault.address;
> +	copy.src = to_user_pointer(memset(poison, 0xc5, sizeof(poison)));
> +	copy.len = 4096;
> +	do_ioctl(ufd, UFFDIO_COPY, &copy);

What is the point of poison data?

Would it work better to have a hanging batch registered with userfault 
and then replace it with a valid batch here? That would ensure execbuf 
was  blocked until userfault handler finishes otherwise we get a GPU hang.

Regards,

Tvrtko

> +
> +	pthread_join(thread, NULL);
> +
> +	munmap(t.page, 4096);
> +	close(ufd);
> +}
> +
>   uint64_t total_ram;
>   uint64_t aperture_size;
>   int fd, count;
> @@ -1902,6 +2016,9 @@ igt_main_args("c:", NULL, help_str, opt_handler, NULL)
>   		igt_subtest("forbidden-operations")
>   			test_forbidden_ops(fd);
>   
> +		igt_subtest("userfault")
> +			test_userfault(fd);
> +
>   		igt_subtest("relocations")
>   			test_relocations(fd);
>   	}
>
Chris Wilson Nov. 11, 2019, 4:58 p.m. UTC | #2
Quoting Tvrtko Ursulin (2019-11-11 16:48:14)
> 
> On 08/11/2019 20:49, Chris Wilson wrote:
> > Register a userspace fault handler for a memory region that we also pass
> > to the GPU via userptr, and make sure the pagefault is properly serviced
> > before we execute.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > ---
> >   tests/i915/gem_userptr_blits.c | 119 ++++++++++++++++++++++++++++++++-
> >   1 file changed, 118 insertions(+), 1 deletion(-)
> > 
> > diff --git a/tests/i915/gem_userptr_blits.c b/tests/i915/gem_userptr_blits.c
> > index 11d6f4a1c..774a9f92c 100644
> > --- a/tests/i915/gem_userptr_blits.c
> > +++ b/tests/i915/gem_userptr_blits.c
> > @@ -36,6 +36,8 @@
> >    * The goal is to simply ensure the basics work.
> >    */
> >   
> > +#include <linux/userfaultfd.h>
> > +
> >   #include "igt.h"
> >   #include <stdlib.h>
> >   #include <stdio.h>
> > @@ -44,9 +46,11 @@
> >   #include <inttypes.h>
> >   #include <errno.h>
> >   #include <setjmp.h>
> > +#include <sys/ioctl.h>
> > +#include <sys/mman.h>
> >   #include <sys/stat.h>
> > +#include <sys/syscall.h>
> >   #include <sys/time.h>
> > -#include <sys/mman.h>
> >   #include <glib.h>
> >   #include <signal.h>
> >   #include <pthread.h>
> > @@ -1831,6 +1835,116 @@ static void test_invalidate_close_race(int fd, bool overlap)
> >       free(t_data.ptr);
> >   }
> >   
> > +struct ufd_thread {
> > +     uint32_t *page;
> > +     int i915;
> > +};
> > +
> > +static uint32_t create_page(int i915, void *page)
> > +{
> > +     uint32_t handle;
> > +
> > +     gem_userptr(i915, page, 4096, 0, 0, &handle);
> > +     return handle;
> > +}
> > +
> > +static uint32_t create_batch(int i915)
> > +{
> > +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> > +     uint32_t handle;
> > +
> > +     handle = gem_create(i915, 4096);
> > +     gem_write(i915, handle, 0, &bbe, sizeof(bbe));
> > +
> > +     return handle;
> > +}
> > +
> > +static void *ufd_thread(void *arg)
> > +{
> > +     struct ufd_thread *t = arg;
> > +     struct drm_i915_gem_exec_object2 obj[2] = {
> > +             { .handle = create_page(t->i915, t->page) },
> > +             { .handle = create_batch(t->i915) },
> > +     };
> > +     struct drm_i915_gem_execbuffer2 eb = {
> > +             .buffers_ptr = to_user_pointer(obj),
> > +             .buffer_count = ARRAY_SIZE(obj),
> > +     };
> > +
> > +     igt_debug("submitting fault\n");
> > +     gem_execbuf(t->i915, &eb);
> > +     gem_sync(t->i915, obj[1].handle);
> > +
> > +     for (int i = 0; i < ARRAY_SIZE(obj); i++)
> > +             gem_close(t->i915, obj[i].handle);
> > +
> > +     t->i915 = -1;
> > +     return NULL;
> > +}
> > +
> > +static int userfaultfd(int flags)
> > +{
> > +     return syscall(SYS_userfaultfd, flags);
> > +}
> > +
> > +static void test_userfault(int i915)
> > +{
> > +     struct uffdio_api api = { .api = UFFD_API };
> > +     struct uffdio_register reg;
> > +     struct uffdio_copy copy;
> > +     struct uffd_msg msg;
> > +     struct ufd_thread t;
> > +     pthread_t thread;
> > +     char poison[4096];
> > +     int ufd;
> > +
> > +     /*
> > +      * Register a page with userfaultfd, and wrap that inside a userptr bo.
> > +      * When we try to use gup insider userptr_get_pages, it will trigger
> > +      * a pagefault that is sent to the userfaultfd for servicing. This
> > +      * is arbitrarily slow, as the submission must wait until the fault
> > +      * is serviced by the userspace fault handler.
> > +      */
> > +
> > +     ufd = userfaultfd(0);
> > +     igt_require_f(ufd != -1, "kernel support for userfaultfd\n");
> > +     igt_require_f(ioctl(ufd, UFFDIO_API, &api) == 0 && api.api == UFFD_API,
> > +                   "userfaultfd API v%lld:%lld\n", UFFD_API, api.api);
> > +
> > +     t.i915 = i915;
> > +
> > +     t.page = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0);
> > +     igt_assert(t.page != MAP_FAILED);
> > +
> > +     memset(&reg, 0, sizeof(reg));
> > +     reg.mode = UFFDIO_REGISTER_MODE_MISSING;
> > +     reg.range.start = to_user_pointer(t.page);
> > +     reg.range.len = 4096;
> > +     do_ioctl(ufd, UFFDIO_REGISTER, &reg);
> > +     igt_assert(reg.ioctls == UFFD_API_RANGE_IOCTLS);
> > +
> > +     igt_assert(pthread_create(&thread, NULL, ufd_thread, &t) == 0);
> > +
> > +     /* Wait for the fault */
> > +     igt_assert_eq(read(ufd, &msg, sizeof(msg)), sizeof(msg));
> > +     igt_assert_eq(msg.event, UFFD_EVENT_PAGEFAULT);
> > +     igt_assert(from_user_pointer(msg.arg.pagefault.address) == t.page);
> > +
> > +     /* Faulting thread remains blocked */
> > +     igt_assert_eq(t.i915, i915);
> 
> This looks could be a false negative since nothing says the thread is 
> not blocked just not got round resetting t->i915.

There's a gem_sync() in the thread. Our goal is that the thread is
blocked (either at submit or in the sync) until we service the fault.

> > +     memset(&copy, 0, sizeof(copy));
> > +     copy.dst = msg.arg.pagefault.address;
> > +     copy.src = to_user_pointer(memset(poison, 0xc5, sizeof(poison)));
> > +     copy.len = 4096;
> > +     do_ioctl(ufd, UFFDIO_COPY, &copy);
> 
> What is the point of poison data?

Just a tell tale.
 
> Would it work better to have a hanging batch registered with userfault 
> and then replace it with a valid batch here? That would ensure execbuf 
> was  blocked until userfault handler finishes otherwise we get a GPU hang.

Strictly we can't use userptr for the batch itself, as aiui that
requires LLC for the CS coherency. One of the following tests uses the
userfault handler as the sync point for replacing a poisoned batch with
a valid one, with the intention of looking for the GPU hang if the fault
was missed. That's the test I started with, my goal here was to focus
on the userptr + userfault as simply as I could; so gem_sync().
-Chris
Tvrtko Ursulin Nov. 11, 2019, 5:54 p.m. UTC | #3
On 11/11/2019 16:58, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-11-11 16:48:14)
>>
>> On 08/11/2019 20:49, Chris Wilson wrote:
>>> Register a userspace fault handler for a memory region that we also pass
>>> to the GPU via userptr, and make sure the pagefault is properly serviced
>>> before we execute.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>> ---
>>>    tests/i915/gem_userptr_blits.c | 119 ++++++++++++++++++++++++++++++++-
>>>    1 file changed, 118 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/tests/i915/gem_userptr_blits.c b/tests/i915/gem_userptr_blits.c
>>> index 11d6f4a1c..774a9f92c 100644
>>> --- a/tests/i915/gem_userptr_blits.c
>>> +++ b/tests/i915/gem_userptr_blits.c
>>> @@ -36,6 +36,8 @@
>>>     * The goal is to simply ensure the basics work.
>>>     */
>>>    
>>> +#include <linux/userfaultfd.h>
>>> +
>>>    #include "igt.h"
>>>    #include <stdlib.h>
>>>    #include <stdio.h>
>>> @@ -44,9 +46,11 @@
>>>    #include <inttypes.h>
>>>    #include <errno.h>
>>>    #include <setjmp.h>
>>> +#include <sys/ioctl.h>
>>> +#include <sys/mman.h>
>>>    #include <sys/stat.h>
>>> +#include <sys/syscall.h>
>>>    #include <sys/time.h>
>>> -#include <sys/mman.h>
>>>    #include <glib.h>
>>>    #include <signal.h>
>>>    #include <pthread.h>
>>> @@ -1831,6 +1835,116 @@ static void test_invalidate_close_race(int fd, bool overlap)
>>>        free(t_data.ptr);
>>>    }
>>>    
>>> +struct ufd_thread {
>>> +     uint32_t *page;
>>> +     int i915;
>>> +};
>>> +
>>> +static uint32_t create_page(int i915, void *page)
>>> +{
>>> +     uint32_t handle;
>>> +
>>> +     gem_userptr(i915, page, 4096, 0, 0, &handle);
>>> +     return handle;
>>> +}
>>> +
>>> +static uint32_t create_batch(int i915)
>>> +{
>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>> +     uint32_t handle;
>>> +
>>> +     handle = gem_create(i915, 4096);
>>> +     gem_write(i915, handle, 0, &bbe, sizeof(bbe));
>>> +
>>> +     return handle;
>>> +}
>>> +
>>> +static void *ufd_thread(void *arg)
>>> +{
>>> +     struct ufd_thread *t = arg;
>>> +     struct drm_i915_gem_exec_object2 obj[2] = {
>>> +             { .handle = create_page(t->i915, t->page) },
>>> +             { .handle = create_batch(t->i915) },
>>> +     };
>>> +     struct drm_i915_gem_execbuffer2 eb = {
>>> +             .buffers_ptr = to_user_pointer(obj),
>>> +             .buffer_count = ARRAY_SIZE(obj),
>>> +     };
>>> +
>>> +     igt_debug("submitting fault\n");
>>> +     gem_execbuf(t->i915, &eb);
>>> +     gem_sync(t->i915, obj[1].handle);
>>> +
>>> +     for (int i = 0; i < ARRAY_SIZE(obj); i++)
>>> +             gem_close(t->i915, obj[i].handle);
>>> +
>>> +     t->i915 = -1;
>>> +     return NULL;
>>> +}
>>> +
>>> +static int userfaultfd(int flags)
>>> +{
>>> +     return syscall(SYS_userfaultfd, flags);
>>> +}
>>> +
>>> +static void test_userfault(int i915)
>>> +{
>>> +     struct uffdio_api api = { .api = UFFD_API };
>>> +     struct uffdio_register reg;
>>> +     struct uffdio_copy copy;
>>> +     struct uffd_msg msg;
>>> +     struct ufd_thread t;
>>> +     pthread_t thread;
>>> +     char poison[4096];
>>> +     int ufd;
>>> +
>>> +     /*
>>> +      * Register a page with userfaultfd, and wrap that inside a userptr bo.
>>> +      * When we try to use gup insider userptr_get_pages, it will trigger
>>> +      * a pagefault that is sent to the userfaultfd for servicing. This
>>> +      * is arbitrarily slow, as the submission must wait until the fault
>>> +      * is serviced by the userspace fault handler.
>>> +      */
>>> +
>>> +     ufd = userfaultfd(0);
>>> +     igt_require_f(ufd != -1, "kernel support for userfaultfd\n");
>>> +     igt_require_f(ioctl(ufd, UFFDIO_API, &api) == 0 && api.api == UFFD_API,
>>> +                   "userfaultfd API v%lld:%lld\n", UFFD_API, api.api);
>>> +
>>> +     t.i915 = i915;
>>> +
>>> +     t.page = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0);
>>> +     igt_assert(t.page != MAP_FAILED);
>>> +
>>> +     memset(&reg, 0, sizeof(reg));
>>> +     reg.mode = UFFDIO_REGISTER_MODE_MISSING;
>>> +     reg.range.start = to_user_pointer(t.page);
>>> +     reg.range.len = 4096;
>>> +     do_ioctl(ufd, UFFDIO_REGISTER, &reg);
>>> +     igt_assert(reg.ioctls == UFFD_API_RANGE_IOCTLS);
>>> +
>>> +     igt_assert(pthread_create(&thread, NULL, ufd_thread, &t) == 0);
>>> +
>>> +     /* Wait for the fault */
>>> +     igt_assert_eq(read(ufd, &msg, sizeof(msg)), sizeof(msg));
>>> +     igt_assert_eq(msg.event, UFFD_EVENT_PAGEFAULT);
>>> +     igt_assert(from_user_pointer(msg.arg.pagefault.address) == t.page);
>>> +
>>> +     /* Faulting thread remains blocked */
>>> +     igt_assert_eq(t.i915, i915);
>>
>> This looks could be a false negative since nothing says the thread is
>> not blocked just not got round resetting t->i915.
> 
> There's a gem_sync() in the thread. Our goal is that the thread is
> blocked (either at submit or in the sync) until we service the fault.

What I meant was it could have passed gem_execbuf and gem_sync, just not 
got to the t->i915 = -1 line yet. Am I being to pedantic? Maybe using 
output fence and passing it back to parent thread would be easier? 
Parent then does igt_assert_eq(poll(fd, some_timeout), 0).
>>> +     memset(&copy, 0, sizeof(copy));
>>> +     copy.dst = msg.arg.pagefault.address;
>>> +     copy.src = to_user_pointer(memset(poison, 0xc5, sizeof(poison)));
>>> +     copy.len = 4096;
>>> +     do_ioctl(ufd, UFFDIO_COPY, &copy);
>>
>> What is the point of poison data?
> 
> Just a tell tale.
>   
>> Would it work better to have a hanging batch registered with userfault
>> and then replace it with a valid batch here? That would ensure execbuf
>> was  blocked until userfault handler finishes otherwise we get a GPU hang.
> 
> Strictly we can't use userptr for the batch itself, as aiui that
> requires LLC for the CS coherency. One of the following tests uses the
> userfault handler as the sync point for replacing a poisoned batch with
> a valid one, with the intention of looking for the GPU hang if the fault
> was missed. That's the test I started with, my goal here was to focus
> on the userptr + userfault as simply as I could; so gem_sync().

Okay that's fair then.

Regards,

Tvrtko
Chris Wilson Nov. 11, 2019, 6:07 p.m. UTC | #4
Quoting Tvrtko Ursulin (2019-11-11 17:54:27)
> 
> On 11/11/2019 16:58, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-11-11 16:48:14)
> >>
> >> On 08/11/2019 20:49, Chris Wilson wrote:
> >>> Register a userspace fault handler for a memory region that we also pass
> >>> to the GPU via userptr, and make sure the pagefault is properly serviced
> >>> before we execute.
> >>>
> >>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>> ---
> >>>    tests/i915/gem_userptr_blits.c | 119 ++++++++++++++++++++++++++++++++-
> >>>    1 file changed, 118 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/tests/i915/gem_userptr_blits.c b/tests/i915/gem_userptr_blits.c
> >>> index 11d6f4a1c..774a9f92c 100644
> >>> --- a/tests/i915/gem_userptr_blits.c
> >>> +++ b/tests/i915/gem_userptr_blits.c
> >>> @@ -36,6 +36,8 @@
> >>>     * The goal is to simply ensure the basics work.
> >>>     */
> >>>    
> >>> +#include <linux/userfaultfd.h>
> >>> +
> >>>    #include "igt.h"
> >>>    #include <stdlib.h>
> >>>    #include <stdio.h>
> >>> @@ -44,9 +46,11 @@
> >>>    #include <inttypes.h>
> >>>    #include <errno.h>
> >>>    #include <setjmp.h>
> >>> +#include <sys/ioctl.h>
> >>> +#include <sys/mman.h>
> >>>    #include <sys/stat.h>
> >>> +#include <sys/syscall.h>
> >>>    #include <sys/time.h>
> >>> -#include <sys/mman.h>
> >>>    #include <glib.h>
> >>>    #include <signal.h>
> >>>    #include <pthread.h>
> >>> @@ -1831,6 +1835,116 @@ static void test_invalidate_close_race(int fd, bool overlap)
> >>>        free(t_data.ptr);
> >>>    }
> >>>    
> >>> +struct ufd_thread {
> >>> +     uint32_t *page;
> >>> +     int i915;
> >>> +};
> >>> +
> >>> +static uint32_t create_page(int i915, void *page)
> >>> +{
> >>> +     uint32_t handle;
> >>> +
> >>> +     gem_userptr(i915, page, 4096, 0, 0, &handle);
> >>> +     return handle;
> >>> +}
> >>> +
> >>> +static uint32_t create_batch(int i915)
> >>> +{
> >>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
> >>> +     uint32_t handle;
> >>> +
> >>> +     handle = gem_create(i915, 4096);
> >>> +     gem_write(i915, handle, 0, &bbe, sizeof(bbe));
> >>> +
> >>> +     return handle;
> >>> +}
> >>> +
> >>> +static void *ufd_thread(void *arg)
> >>> +{
> >>> +     struct ufd_thread *t = arg;
> >>> +     struct drm_i915_gem_exec_object2 obj[2] = {
> >>> +             { .handle = create_page(t->i915, t->page) },
> >>> +             { .handle = create_batch(t->i915) },
> >>> +     };
> >>> +     struct drm_i915_gem_execbuffer2 eb = {
> >>> +             .buffers_ptr = to_user_pointer(obj),
> >>> +             .buffer_count = ARRAY_SIZE(obj),
> >>> +     };
> >>> +
> >>> +     igt_debug("submitting fault\n");
> >>> +     gem_execbuf(t->i915, &eb);
> >>> +     gem_sync(t->i915, obj[1].handle);
> >>> +
> >>> +     for (int i = 0; i < ARRAY_SIZE(obj); i++)
> >>> +             gem_close(t->i915, obj[i].handle);
> >>> +
> >>> +     t->i915 = -1;
> >>> +     return NULL;
> >>> +}
> >>> +
> >>> +static int userfaultfd(int flags)
> >>> +{
> >>> +     return syscall(SYS_userfaultfd, flags);
> >>> +}
> >>> +
> >>> +static void test_userfault(int i915)
> >>> +{
> >>> +     struct uffdio_api api = { .api = UFFD_API };
> >>> +     struct uffdio_register reg;
> >>> +     struct uffdio_copy copy;
> >>> +     struct uffd_msg msg;
> >>> +     struct ufd_thread t;
> >>> +     pthread_t thread;
> >>> +     char poison[4096];
> >>> +     int ufd;
> >>> +
> >>> +     /*
> >>> +      * Register a page with userfaultfd, and wrap that inside a userptr bo.
> >>> +      * When we try to use gup insider userptr_get_pages, it will trigger
> >>> +      * a pagefault that is sent to the userfaultfd for servicing. This
> >>> +      * is arbitrarily slow, as the submission must wait until the fault
> >>> +      * is serviced by the userspace fault handler.
> >>> +      */
> >>> +
> >>> +     ufd = userfaultfd(0);
> >>> +     igt_require_f(ufd != -1, "kernel support for userfaultfd\n");
> >>> +     igt_require_f(ioctl(ufd, UFFDIO_API, &api) == 0 && api.api == UFFD_API,
> >>> +                   "userfaultfd API v%lld:%lld\n", UFFD_API, api.api);
> >>> +
> >>> +     t.i915 = i915;
> >>> +
> >>> +     t.page = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0);
> >>> +     igt_assert(t.page != MAP_FAILED);
> >>> +
> >>> +     memset(&reg, 0, sizeof(reg));
> >>> +     reg.mode = UFFDIO_REGISTER_MODE_MISSING;
> >>> +     reg.range.start = to_user_pointer(t.page);
> >>> +     reg.range.len = 4096;
> >>> +     do_ioctl(ufd, UFFDIO_REGISTER, &reg);
> >>> +     igt_assert(reg.ioctls == UFFD_API_RANGE_IOCTLS);
> >>> +
> >>> +     igt_assert(pthread_create(&thread, NULL, ufd_thread, &t) == 0);
> >>> +
> >>> +     /* Wait for the fault */
> >>> +     igt_assert_eq(read(ufd, &msg, sizeof(msg)), sizeof(msg));
> >>> +     igt_assert_eq(msg.event, UFFD_EVENT_PAGEFAULT);
> >>> +     igt_assert(from_user_pointer(msg.arg.pagefault.address) == t.page);
> >>> +
> >>> +     /* Faulting thread remains blocked */
> >>> +     igt_assert_eq(t.i915, i915);
> >>
> >> This looks could be a false negative since nothing says the thread is
> >> not blocked just not got round resetting t->i915.
> > 
> > There's a gem_sync() in the thread. Our goal is that the thread is
> > blocked (either at submit or in the sync) until we service the fault.
> 
> What I meant was it could have passed gem_execbuf and gem_sync, just not 
> got to the t->i915 = -1 line yet. Am I being to pedantic? Maybe using 
> output fence and passing it back to parent thread would be easier? 
> Parent then does igt_assert_eq(poll(fd, some_timeout), 0).

That could only be if the fault never occurred, in which case we would
still be blocking on the read(). I think that's a reasonable level for
us not to care about -- it's the same as our depending on write()/read()
for synchronising between parent & child elsewhere. No?
-Chris
Tvrtko Ursulin Nov. 12, 2019, 8:26 a.m. UTC | #5
On 11/11/2019 18:07, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-11-11 17:54:27)
>>
>> On 11/11/2019 16:58, Chris Wilson wrote:
>>> Quoting Tvrtko Ursulin (2019-11-11 16:48:14)
>>>>
>>>> On 08/11/2019 20:49, Chris Wilson wrote:
>>>>> Register a userspace fault handler for a memory region that we also pass
>>>>> to the GPU via userptr, and make sure the pagefault is properly serviced
>>>>> before we execute.
>>>>>
>>>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>>>> ---
>>>>>     tests/i915/gem_userptr_blits.c | 119 ++++++++++++++++++++++++++++++++-
>>>>>     1 file changed, 118 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/tests/i915/gem_userptr_blits.c b/tests/i915/gem_userptr_blits.c
>>>>> index 11d6f4a1c..774a9f92c 100644
>>>>> --- a/tests/i915/gem_userptr_blits.c
>>>>> +++ b/tests/i915/gem_userptr_blits.c
>>>>> @@ -36,6 +36,8 @@
>>>>>      * The goal is to simply ensure the basics work.
>>>>>      */
>>>>>     
>>>>> +#include <linux/userfaultfd.h>
>>>>> +
>>>>>     #include "igt.h"
>>>>>     #include <stdlib.h>
>>>>>     #include <stdio.h>
>>>>> @@ -44,9 +46,11 @@
>>>>>     #include <inttypes.h>
>>>>>     #include <errno.h>
>>>>>     #include <setjmp.h>
>>>>> +#include <sys/ioctl.h>
>>>>> +#include <sys/mman.h>
>>>>>     #include <sys/stat.h>
>>>>> +#include <sys/syscall.h>
>>>>>     #include <sys/time.h>
>>>>> -#include <sys/mman.h>
>>>>>     #include <glib.h>
>>>>>     #include <signal.h>
>>>>>     #include <pthread.h>
>>>>> @@ -1831,6 +1835,116 @@ static void test_invalidate_close_race(int fd, bool overlap)
>>>>>         free(t_data.ptr);
>>>>>     }
>>>>>     
>>>>> +struct ufd_thread {
>>>>> +     uint32_t *page;
>>>>> +     int i915;
>>>>> +};
>>>>> +
>>>>> +static uint32_t create_page(int i915, void *page)
>>>>> +{
>>>>> +     uint32_t handle;
>>>>> +
>>>>> +     gem_userptr(i915, page, 4096, 0, 0, &handle);
>>>>> +     return handle;
>>>>> +}
>>>>> +
>>>>> +static uint32_t create_batch(int i915)
>>>>> +{
>>>>> +     const uint32_t bbe = MI_BATCH_BUFFER_END;
>>>>> +     uint32_t handle;
>>>>> +
>>>>> +     handle = gem_create(i915, 4096);
>>>>> +     gem_write(i915, handle, 0, &bbe, sizeof(bbe));
>>>>> +
>>>>> +     return handle;
>>>>> +}
>>>>> +
>>>>> +static void *ufd_thread(void *arg)
>>>>> +{
>>>>> +     struct ufd_thread *t = arg;
>>>>> +     struct drm_i915_gem_exec_object2 obj[2] = {
>>>>> +             { .handle = create_page(t->i915, t->page) },
>>>>> +             { .handle = create_batch(t->i915) },
>>>>> +     };
>>>>> +     struct drm_i915_gem_execbuffer2 eb = {
>>>>> +             .buffers_ptr = to_user_pointer(obj),
>>>>> +             .buffer_count = ARRAY_SIZE(obj),
>>>>> +     };
>>>>> +
>>>>> +     igt_debug("submitting fault\n");
>>>>> +     gem_execbuf(t->i915, &eb);
>>>>> +     gem_sync(t->i915, obj[1].handle);
>>>>> +
>>>>> +     for (int i = 0; i < ARRAY_SIZE(obj); i++)
>>>>> +             gem_close(t->i915, obj[i].handle);
>>>>> +
>>>>> +     t->i915 = -1;
>>>>> +     return NULL;
>>>>> +}
>>>>> +
>>>>> +static int userfaultfd(int flags)
>>>>> +{
>>>>> +     return syscall(SYS_userfaultfd, flags);
>>>>> +}
>>>>> +
>>>>> +static void test_userfault(int i915)
>>>>> +{
>>>>> +     struct uffdio_api api = { .api = UFFD_API };
>>>>> +     struct uffdio_register reg;
>>>>> +     struct uffdio_copy copy;
>>>>> +     struct uffd_msg msg;
>>>>> +     struct ufd_thread t;
>>>>> +     pthread_t thread;
>>>>> +     char poison[4096];
>>>>> +     int ufd;
>>>>> +
>>>>> +     /*
>>>>> +      * Register a page with userfaultfd, and wrap that inside a userptr bo.
>>>>> +      * When we try to use gup insider userptr_get_pages, it will trigger
>>>>> +      * a pagefault that is sent to the userfaultfd for servicing. This
>>>>> +      * is arbitrarily slow, as the submission must wait until the fault
>>>>> +      * is serviced by the userspace fault handler.
>>>>> +      */
>>>>> +
>>>>> +     ufd = userfaultfd(0);
>>>>> +     igt_require_f(ufd != -1, "kernel support for userfaultfd\n");
>>>>> +     igt_require_f(ioctl(ufd, UFFDIO_API, &api) == 0 && api.api == UFFD_API,
>>>>> +                   "userfaultfd API v%lld:%lld\n", UFFD_API, api.api);
>>>>> +
>>>>> +     t.i915 = i915;
>>>>> +
>>>>> +     t.page = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0);
>>>>> +     igt_assert(t.page != MAP_FAILED);
>>>>> +
>>>>> +     memset(&reg, 0, sizeof(reg));
>>>>> +     reg.mode = UFFDIO_REGISTER_MODE_MISSING;
>>>>> +     reg.range.start = to_user_pointer(t.page);
>>>>> +     reg.range.len = 4096;
>>>>> +     do_ioctl(ufd, UFFDIO_REGISTER, &reg);
>>>>> +     igt_assert(reg.ioctls == UFFD_API_RANGE_IOCTLS);
>>>>> +
>>>>> +     igt_assert(pthread_create(&thread, NULL, ufd_thread, &t) == 0);
>>>>> +
>>>>> +     /* Wait for the fault */
>>>>> +     igt_assert_eq(read(ufd, &msg, sizeof(msg)), sizeof(msg));
>>>>> +     igt_assert_eq(msg.event, UFFD_EVENT_PAGEFAULT);
>>>>> +     igt_assert(from_user_pointer(msg.arg.pagefault.address) == t.page);
>>>>> +
>>>>> +     /* Faulting thread remains blocked */
>>>>> +     igt_assert_eq(t.i915, i915);
>>>>
>>>> This looks could be a false negative since nothing says the thread is
>>>> not blocked just not got round resetting t->i915.
>>>
>>> There's a gem_sync() in the thread. Our goal is that the thread is
>>> blocked (either at submit or in the sync) until we service the fault.
>>
>> What I meant was it could have passed gem_execbuf and gem_sync, just not
>> got to the t->i915 = -1 line yet. Am I being to pedantic? Maybe using
>> output fence and passing it back to parent thread would be easier?
>> Parent then does igt_assert_eq(poll(fd, some_timeout), 0).
> 
> That could only be if the fault never occurred, in which case we would
> still be blocking on the read(). I think that's a reasonable level for
> us not to care about -- it's the same as our depending on write()/read()
> for synchronising between parent & child elsewhere. No?

Using userfaultfd is a bit more complex so I thought of extra checks. 
But okay, if probably is overly paranoid.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

Patch
diff mbox series

diff --git a/tests/i915/gem_userptr_blits.c b/tests/i915/gem_userptr_blits.c
index 11d6f4a1c..774a9f92c 100644
--- a/tests/i915/gem_userptr_blits.c
+++ b/tests/i915/gem_userptr_blits.c
@@ -36,6 +36,8 @@ 
  * The goal is to simply ensure the basics work.
  */
 
+#include <linux/userfaultfd.h>
+
 #include "igt.h"
 #include <stdlib.h>
 #include <stdio.h>
@@ -44,9 +46,11 @@ 
 #include <inttypes.h>
 #include <errno.h>
 #include <setjmp.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
 #include <sys/stat.h>
+#include <sys/syscall.h>
 #include <sys/time.h>
-#include <sys/mman.h>
 #include <glib.h>
 #include <signal.h>
 #include <pthread.h>
@@ -1831,6 +1835,116 @@  static void test_invalidate_close_race(int fd, bool overlap)
 	free(t_data.ptr);
 }
 
+struct ufd_thread {
+	uint32_t *page;
+	int i915;
+};
+
+static uint32_t create_page(int i915, void *page)
+{
+	uint32_t handle;
+
+	gem_userptr(i915, page, 4096, 0, 0, &handle);
+	return handle;
+}
+
+static uint32_t create_batch(int i915)
+{
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	uint32_t handle;
+
+	handle = gem_create(i915, 4096);
+	gem_write(i915, handle, 0, &bbe, sizeof(bbe));
+
+	return handle;
+}
+
+static void *ufd_thread(void *arg)
+{
+	struct ufd_thread *t = arg;
+	struct drm_i915_gem_exec_object2 obj[2] = {
+		{ .handle = create_page(t->i915, t->page) },
+		{ .handle = create_batch(t->i915) },
+	};
+	struct drm_i915_gem_execbuffer2 eb = {
+		.buffers_ptr = to_user_pointer(obj),
+		.buffer_count = ARRAY_SIZE(obj),
+	};
+
+	igt_debug("submitting fault\n");
+	gem_execbuf(t->i915, &eb);
+	gem_sync(t->i915, obj[1].handle);
+
+	for (int i = 0; i < ARRAY_SIZE(obj); i++)
+		gem_close(t->i915, obj[i].handle);
+
+	t->i915 = -1;
+	return NULL;
+}
+
+static int userfaultfd(int flags)
+{
+	return syscall(SYS_userfaultfd, flags);
+}
+
+static void test_userfault(int i915)
+{
+	struct uffdio_api api = { .api = UFFD_API };
+	struct uffdio_register reg;
+	struct uffdio_copy copy;
+	struct uffd_msg msg;
+	struct ufd_thread t;
+	pthread_t thread;
+	char poison[4096];
+	int ufd;
+
+	/*
+	 * Register a page with userfaultfd, and wrap that inside a userptr bo.
+	 * When we try to use gup insider userptr_get_pages, it will trigger
+	 * a pagefault that is sent to the userfaultfd for servicing. This
+	 * is arbitrarily slow, as the submission must wait until the fault
+	 * is serviced by the userspace fault handler.
+	 */
+
+	ufd = userfaultfd(0);
+	igt_require_f(ufd != -1, "kernel support for userfaultfd\n");
+	igt_require_f(ioctl(ufd, UFFDIO_API, &api) == 0 && api.api == UFFD_API,
+		      "userfaultfd API v%lld:%lld\n", UFFD_API, api.api);
+
+	t.i915 = i915;
+
+	t.page = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0);
+	igt_assert(t.page != MAP_FAILED);
+
+	memset(&reg, 0, sizeof(reg));
+	reg.mode = UFFDIO_REGISTER_MODE_MISSING;
+	reg.range.start = to_user_pointer(t.page);
+	reg.range.len = 4096;
+	do_ioctl(ufd, UFFDIO_REGISTER, &reg);
+	igt_assert(reg.ioctls == UFFD_API_RANGE_IOCTLS);
+
+	igt_assert(pthread_create(&thread, NULL, ufd_thread, &t) == 0);
+
+	/* Wait for the fault */
+	igt_assert_eq(read(ufd, &msg, sizeof(msg)), sizeof(msg));
+	igt_assert_eq(msg.event, UFFD_EVENT_PAGEFAULT);
+	igt_assert(from_user_pointer(msg.arg.pagefault.address) == t.page);
+
+	/* Faulting thread remains blocked */
+	igt_assert_eq(t.i915, i915);
+
+	memset(&copy, 0, sizeof(copy));
+	copy.dst = msg.arg.pagefault.address;
+	copy.src = to_user_pointer(memset(poison, 0xc5, sizeof(poison)));
+	copy.len = 4096;
+	do_ioctl(ufd, UFFDIO_COPY, &copy);
+
+	pthread_join(thread, NULL);
+
+	munmap(t.page, 4096);
+	close(ufd);
+}
+
 uint64_t total_ram;
 uint64_t aperture_size;
 int fd, count;
@@ -1902,6 +2016,9 @@  igt_main_args("c:", NULL, help_str, opt_handler, NULL)
 		igt_subtest("forbidden-operations")
 			test_forbidden_ops(fd);
 
+		igt_subtest("userfault")
+			test_userfault(fd);
+
 		igt_subtest("relocations")
 			test_relocations(fd);
 	}