diff mbox

tests/gem_userptr_blits: Race between object creation and multi-threaded mm ops

Message ID 1405332206-6292-1-git-send-email-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin July 14, 2014, 10:03 a.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Userptr v23 was not thread safe against memory map operations and object
creation from separate threads. MMU notifier callback would get triggered
on a partially constructed object causing a NULL pointer dereference.

This test excercises that path a bit. In my testing it would trigger it
every time and easily, but unfortunately a test pass here does not guarantee
the absence of the race.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 tests/Makefile.am         |  2 ++
 tests/gem_userptr_blits.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

Comments

Chris Wilson July 14, 2014, 10:34 a.m. UTC | #1
On Mon, Jul 14, 2014 at 11:03:26AM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Userptr v23 was not thread safe against memory map operations and object
> creation from separate threads. MMU notifier callback would get triggered
> on a partially constructed object causing a NULL pointer dereference.
> 
> This test excercises that path a bit. In my testing it would trigger it
> every time and easily, but unfortunately a test pass here does not guarantee
> the absence of the race.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  tests/Makefile.am         |  2 ++
>  tests/gem_userptr_blits.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 56 insertions(+)
> 
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index 2878624..e207509 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -65,6 +65,8 @@ prime_self_import_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>  prime_self_import_LDADD = $(LDADD) -lpthread
>  gen7_forcewake_mt_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>  gen7_forcewake_mt_LDADD = $(LDADD) -lpthread
> +gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
> +gem_userptr_blits_LDADD = $(LDADD) -lpthread
>  
>  gem_wait_render_timeout_LDADD = $(LDADD) -lrt
>  kms_flip_LDADD = $(LDADD) -lrt -lpthread
> diff --git a/tests/gem_userptr_blits.c b/tests/gem_userptr_blits.c
> index 2eb127f..0213868 100644
> --- a/tests/gem_userptr_blits.c
> +++ b/tests/gem_userptr_blits.c
> @@ -47,6 +47,7 @@
>  #include <sys/time.h>
>  #include <sys/mman.h>
>  #include <signal.h>
> +#include <pthread.h>
>  
>  #include "drm.h"
>  #include "i915_drm.h"
> @@ -1107,6 +1108,56 @@ static int test_unmap_cycles(int fd, int expected)
>  	return 0;
>  }
>  
> +static volatile int stop_mm_stress_thread;

> +static void *mm_stress_thread(void *data)
> +{
> +        void *ptr;
> +        int ret;
> +
> +        while (!stop_mm_stress_thread) {
> +		ptr = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
> +				MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +		assert(ptr != MAP_FAILED);
> +		ret = munmap(ptr, PAGE_SIZE);
> +		assert(ret == 0);
> +        }
> +
> +        return NULL;
> +}
> +
> +static int test_stress_mm(int fd)
> +{
> +	int ret;
> +	pthread_t t;
> +	unsigned int loops = 100000;
> +	uint32_t handle;
> +	void *ptr;
> +
> +	assert(posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE) == 0);
> +
> +	ret = pthread_create(&t, NULL, mm_stress_thread, NULL);
> +	assert(ret == 0);
> +
> +	while (loops--) {
> +		ret = gem_userptr(fd, ptr, PAGE_SIZE, 0, &handle);
> +		assert(ret == 0);
> +
> +		gem_close(fd, handle);
> +	}
> +
> +	stop_mm_stress_thread = 1;
> +
> +	free(ptr);
> +
> +	ret = pthread_cancel(t);

You don't have any cancellation points in the loop. (mmap may or may not
be, it is not required to be.)

But rather than use a global, just pass a pointer to a local struct.

Oh, and igt_assert. But kill the asserts in mm_stress_thread() first.
-Chris
Tvrtko Ursulin July 14, 2014, 10:44 a.m. UTC | #2
On 07/14/2014 11:34 AM, Chris Wilson wrote:
> On Mon, Jul 14, 2014 at 11:03:26AM +0100, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Userptr v23 was not thread safe against memory map operations and object
>> creation from separate threads. MMU notifier callback would get triggered
>> on a partially constructed object causing a NULL pointer dereference.
>>
>> This test excercises that path a bit. In my testing it would trigger it
>> every time and easily, but unfortunately a test pass here does not guarantee
>> the absence of the race.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> ---
>>   tests/Makefile.am         |  2 ++
>>   tests/gem_userptr_blits.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++
>>   2 files changed, 56 insertions(+)
>>
>> diff --git a/tests/Makefile.am b/tests/Makefile.am
>> index 2878624..e207509 100644
>> --- a/tests/Makefile.am
>> +++ b/tests/Makefile.am
>> @@ -65,6 +65,8 @@ prime_self_import_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>>   prime_self_import_LDADD = $(LDADD) -lpthread
>>   gen7_forcewake_mt_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>>   gen7_forcewake_mt_LDADD = $(LDADD) -lpthread
>> +gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>> +gem_userptr_blits_LDADD = $(LDADD) -lpthread
>>
>>   gem_wait_render_timeout_LDADD = $(LDADD) -lrt
>>   kms_flip_LDADD = $(LDADD) -lrt -lpthread
>> diff --git a/tests/gem_userptr_blits.c b/tests/gem_userptr_blits.c
>> index 2eb127f..0213868 100644
>> --- a/tests/gem_userptr_blits.c
>> +++ b/tests/gem_userptr_blits.c
>> @@ -47,6 +47,7 @@
>>   #include <sys/time.h>
>>   #include <sys/mman.h>
>>   #include <signal.h>
>> +#include <pthread.h>
>>
>>   #include "drm.h"
>>   #include "i915_drm.h"
>> @@ -1107,6 +1108,56 @@ static int test_unmap_cycles(int fd, int expected)
>>   	return 0;
>>   }
>>
>> +static volatile int stop_mm_stress_thread;
>
>> +static void *mm_stress_thread(void *data)
>> +{
>> +        void *ptr;
>> +        int ret;
>> +
>> +        while (!stop_mm_stress_thread) {
>> +		ptr = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
>> +				MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
>> +		assert(ptr != MAP_FAILED);
>> +		ret = munmap(ptr, PAGE_SIZE);
>> +		assert(ret == 0);
>> +        }
>> +
>> +        return NULL;
>> +}
>> +
>> +static int test_stress_mm(int fd)
>> +{
>> +	int ret;
>> +	pthread_t t;
>> +	unsigned int loops = 100000;
>> +	uint32_t handle;
>> +	void *ptr;
>> +
>> +	assert(posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE) == 0);
>> +
>> +	ret = pthread_create(&t, NULL, mm_stress_thread, NULL);
>> +	assert(ret == 0);
>> +
>> +	while (loops--) {
>> +		ret = gem_userptr(fd, ptr, PAGE_SIZE, 0, &handle);
>> +		assert(ret == 0);
>> +
>> +		gem_close(fd, handle);
>> +	}
>> +
>> +	stop_mm_stress_thread = 1;
>> +
>> +	free(ptr);
>> +
>> +	ret = pthread_cancel(t);
>
> You don't have any cancellation points in the loop. (mmap may or may not
> be, it is not required to be.)
>
> But rather than use a global, just pass a pointer to a local struct.

It doesn't need both a cancellation point and a flag. Should I just add 
pthread_testcancel in the loop and not have any flag at all?

> Oh, and igt_assert. But kill the asserts in mm_stress_thread() first.

Why remove completely? My thinking was to use assert vs igt_assert to 
distinguish between assumptions about system behaviour, and igt_assert 
for assertions about tested functionality.

Tvrtko
Chris Wilson July 14, 2014, 1:07 p.m. UTC | #3
> >You don't have any cancellation points in the loop. (mmap may or may not
> >be, it is not required to be.)
> >
> >But rather than use a global, just pass a pointer to a local struct.
> 
> It doesn't need both a cancellation point and a flag. Should I just
> add pthread_testcancel in the loop and not have any flag at all?

testcancel also neatly avoids the handwavely lack of mb().

> >Oh, and igt_assert. But kill the asserts in mm_stress_thread() first.
> 
> Why remove completely? My thinking was to use assert vs igt_assert
> to distinguish between assumptions about system behaviour, and
> igt_assert for assertions about tested functionality.

If the assert fires you make the igt test runner angry. Might as well 
report a test failure rather than break down completely.
-Chris
Tvrtko Ursulin July 14, 2014, 1:13 p.m. UTC | #4
On 07/14/2014 02:07 PM, Chris Wilson wrote:
>>> You don't have any cancellation points in the loop. (mmap may or may not
>>> be, it is not required to be.)
>>>
>>> But rather than use a global, just pass a pointer to a local struct.
>>
>> It doesn't need both a cancellation point and a flag. Should I just
>> add pthread_testcancel in the loop and not have any flag at all?
>
> testcancel also neatly avoids the handwavely lack of mb().

Barrier for what? But it doesn't matter, I'll re-spin with testcancel.

>>> Oh, and igt_assert. But kill the asserts in mm_stress_thread() first.
>>
>> Why remove completely? My thinking was to use assert vs igt_assert
>> to distinguish between assumptions about system behaviour, and
>> igt_assert for assertions about tested functionality.
>
> If the assert fires you make the igt test runner angry. Might as well
> report a test failure rather than break down completely.

I am not familiar with the test runner, but if it cannot handle a test 
failing in a way other than it expects it so it deserves to be angry. :) 
But OK, I'll change it.

Tvrtko
Chris Wilson July 14, 2014, 1:27 p.m. UTC | #5
On Mon, Jul 14, 2014 at 02:13:22PM +0100, Tvrtko Ursulin wrote:
> On 07/14/2014 02:07 PM, Chris Wilson wrote:
> >>>You don't have any cancellation points in the loop. (mmap may or may not
> >>>be, it is not required to be.)
> >>>
> >>>But rather than use a global, just pass a pointer to a local struct.
> >>
> >>It doesn't need both a cancellation point and a flag. Should I just
> >>add pthread_testcancel in the loop and not have any flag at all?
> >
> >testcancel also neatly avoids the handwavely lack of mb().
> 
> Barrier for what? But it doesn't matter, I'll re-spin with testcancel.

It just makes an assumption that the compiler and processor don't skip
the read. Since it so simple to be pedagolically correct, it seems
pointless to leave it using volatile.
 
> >>>Oh, and igt_assert. But kill the asserts in mm_stress_thread() first.
> >>
> >>Why remove completely? My thinking was to use assert vs igt_assert
> >>to distinguish between assumptions about system behaviour, and
> >>igt_assert for assertions about tested functionality.
> >
> >If the assert fires you make the igt test runner angry. Might as well
> >report a test failure rather than break down completely.
> 
> I am not familiar with the test runner, but if it cannot handle a
> test failing in a way other than it expects it so it deserves to be
> angry. :) But OK, I'll change it.

Actualy the SIGABRT will be delivered to the thread so you just get an
ugly assert and a PASS if you do not propagate the failure...
-Chris
tim.gore@intel.com July 18, 2014, 9:20 a.m. UTC | #6
Unfortunately Android threads do not support cancel and testcancel, so this
Test cannot build for android.
Do we really need a cancellation point, since we don't need to cancel the thread.
Tvrtko's original solution seemed workable, if a bit less polished.

  Tim

> -----Original Message-----
> From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf
> Of Chris Wilson
> Sent: Monday, July 14, 2014 2:28 PM
> To: Tvrtko Ursulin
> Cc: Intel-gfx@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH] tests/gem_userptr_blits: Race between
> object creation and multi-threaded mm ops
> 
> On Mon, Jul 14, 2014 at 02:13:22PM +0100, Tvrtko Ursulin wrote:
> > On 07/14/2014 02:07 PM, Chris Wilson wrote:
> > >>>You don't have any cancellation points in the loop. (mmap may or
> > >>>may not be, it is not required to be.)
> > >>>
> > >>>But rather than use a global, just pass a pointer to a local struct.
> > >>
> > >>It doesn't need both a cancellation point and a flag. Should I just
> > >>add pthread_testcancel in the loop and not have any flag at all?
> > >
> > >testcancel also neatly avoids the handwavely lack of mb().
> >
> > Barrier for what? But it doesn't matter, I'll re-spin with testcancel.
> 
> It just makes an assumption that the compiler and processor don't skip the
> read. Since it so simple to be pedagolically correct, it seems pointless to leave
> it using volatile.
> 
> > >>>Oh, and igt_assert. But kill the asserts in mm_stress_thread() first.
> > >>
> > >>Why remove completely? My thinking was to use assert vs igt_assert
> > >>to distinguish between assumptions about system behaviour, and
> > >>igt_assert for assertions about tested functionality.
> > >
> > >If the assert fires you make the igt test runner angry. Might as well
> > >report a test failure rather than break down completely.
> >
> > I am not familiar with the test runner, but if it cannot handle a test
> > failing in a way other than it expects it so it deserves to be angry.
> > :) But OK, I'll change it.
> 
> Actualy the SIGABRT will be delivered to the thread so you just get an ugly
> assert and a PASS if you do not propagate the failure...
> -Chris
> 
> --
> Chris Wilson, Intel Open Source Technology Centre
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Tvrtko Ursulin July 18, 2014, 9:36 a.m. UTC | #7
On 07/18/2014 10:20 AM, Gore, Tim wrote:
> Unfortunately Android threads do not support cancel and testcancel, so this
> Test cannot build for android.
> Do we really need a cancellation point, since we don't need to cancel the thread.
> Tvrtko's original solution seemed workable, if a bit less polished.

Solution with a flag is fine in my opinion, but I also need to get rid 
of asserts in the thread. This is was Chris was pointing out, I always 
forget that threads have too much independence so SIGBART in a thread 
won't kill the process. I've put it on my TODO for the lower priority 
tasks, since I would still like to propagate any failures in the thread 
to subtest fail.

Tvrtko
diff mbox

Patch

diff --git a/tests/Makefile.am b/tests/Makefile.am
index 2878624..e207509 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -65,6 +65,8 @@  prime_self_import_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 prime_self_import_LDADD = $(LDADD) -lpthread
 gen7_forcewake_mt_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
 gen7_forcewake_mt_LDADD = $(LDADD) -lpthread
+gem_userptr_blits_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
+gem_userptr_blits_LDADD = $(LDADD) -lpthread
 
 gem_wait_render_timeout_LDADD = $(LDADD) -lrt
 kms_flip_LDADD = $(LDADD) -lrt -lpthread
diff --git a/tests/gem_userptr_blits.c b/tests/gem_userptr_blits.c
index 2eb127f..0213868 100644
--- a/tests/gem_userptr_blits.c
+++ b/tests/gem_userptr_blits.c
@@ -47,6 +47,7 @@ 
 #include <sys/time.h>
 #include <sys/mman.h>
 #include <signal.h>
+#include <pthread.h>
 
 #include "drm.h"
 #include "i915_drm.h"
@@ -1107,6 +1108,56 @@  static int test_unmap_cycles(int fd, int expected)
 	return 0;
 }
 
+static volatile int stop_mm_stress_thread;
+
+static void *mm_stress_thread(void *data)
+{
+        void *ptr;
+        int ret;
+
+        while (!stop_mm_stress_thread) {
+		ptr = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
+				MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+		assert(ptr != MAP_FAILED);
+		ret = munmap(ptr, PAGE_SIZE);
+		assert(ret == 0);
+        }
+
+        return NULL;
+}
+
+static int test_stress_mm(int fd)
+{
+	int ret;
+	pthread_t t;
+	unsigned int loops = 100000;
+	uint32_t handle;
+	void *ptr;
+
+	assert(posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE) == 0);
+
+	ret = pthread_create(&t, NULL, mm_stress_thread, NULL);
+	assert(ret == 0);
+
+	while (loops--) {
+		ret = gem_userptr(fd, ptr, PAGE_SIZE, 0, &handle);
+		assert(ret == 0);
+
+		gem_close(fd, handle);
+	}
+
+	stop_mm_stress_thread = 1;
+
+	free(ptr);
+
+	ret = pthread_cancel(t);
+	assert(ret == 0);
+	ret = pthread_join(t, NULL);
+	assert(ret == 0);
+
+	return 0;
+}
+
 unsigned int total_ram;
 uint64_t aperture_size;
 int fd, count;
@@ -1261,6 +1312,9 @@  int main(int argc, char **argv)
 	igt_subtest("sync-unmap-after-close")
 		test_unmap_after_close(fd);
 
+	igt_subtest("stress-mm")
+	        test_stress_mm(fd);
+
 	igt_subtest("coherency-sync")
 		test_coherency(fd, count);