diff mbox series

[i-g-t,2/2] tests/gem_exec_await: Add a memory pressure subtest

Message ID 20181119152229.8390-2-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series [i-g-t,1/2] tests/gem_exec_await: Relax the busy spinner | expand

Commit Message

Tvrtko Ursulin Nov. 19, 2018, 3:22 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Memory pressure subtest attempts to provoke system overload which can
cause GPU hangs, especially when combined with spin batches which do
not allow for some nop instructions to provide relief.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 tests/i915/gem_exec_await.c | 107 ++++++++++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)

Comments

Chris Wilson Nov. 19, 2018, 3:36 p.m. UTC | #1
Quoting Tvrtko Ursulin (2018-11-19 15:22:29)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Memory pressure subtest attempts to provoke system overload which can
> cause GPU hangs, especially when combined with spin batches which do
> not allow for some nop instructions to provide relief.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  tests/i915/gem_exec_await.c | 107 ++++++++++++++++++++++++++++++++++++
>  1 file changed, 107 insertions(+)
> 
> diff --git a/tests/i915/gem_exec_await.c b/tests/i915/gem_exec_await.c
> index 3ea5b5903c6b..ccb5159a6fe1 100644
> --- a/tests/i915/gem_exec_await.c
> +++ b/tests/i915/gem_exec_await.c
> @@ -30,6 +30,11 @@
>  
>  #include <sys/ioctl.h>
>  #include <sys/signal.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <pthread.h>
> +#include <sched.h>
>  
>  #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
>  #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
> @@ -227,6 +232,92 @@ static void wide(int fd, int ring_size, int timeout, unsigned int flags)
>         free(exec);
>  }
>  
> +struct thread {
> +       pthread_t thread;
> +       volatile bool done;
> +};
> +
> +static unsigned long get_avail_ram_mb(void)

intel_get_avail_ram_mb() ?

> +#define PAGE_SIZE 4096
> +static void *mempressure(void *arg)
> +{
> +       struct thread *thread = arg;
> +       const unsigned int sz_mb = 2;
> +       const unsigned int sz = sz_mb << 20;
> +       unsigned int n = 0, max = 0;
> +       unsigned int blocks;
> +       void **ptr = NULL;
> +
> +       while (!thread->done) {

You can use READ_ONCE(thread->done) here for familiarity.

> +               unsigned long ram_mb = get_avail_ram_mb();
> +
> +               if (!ptr) {
> +                       blocks = ram_mb / sz_mb;
> +                       ptr = calloc(blocks, sizeof(void *));
> +                       igt_assert(ptr);
> +               } else if (ram_mb < 384) {
> +                       blocks = max + 1;
> +               }
> +
> +               if (ptr[n])
> +                       munmap(ptr[n], sz);
> +
> +               ptr[n] = mmap(NULL, sz, PROT_WRITE,
> +                             MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> +               assert(ptr[n] != MAP_FAILED);
> +
> +               madvise(ptr[n], sz, MADV_HUGEPAGE);
> +
> +               for (size_t page = 0; page < sz; page += PAGE_SIZE)
> +                       *(volatile uint32_t *)((unsigned char *)ptr[n] + page) =
> +                               0;
> +
> +               if (n > max)
> +                       max = n;
> +
> +               n++;
> +
> +               if (n >= blocks)
> +                       n = 0;

Another method would be to use mlock to force exhaustion.

However, as the supposition is that rcu is part of the underlying
mechanism if you fill the dentry cache we'll exercise both the shrinker
and RCU.
-Chris
Tvrtko Ursulin Nov. 19, 2018, 3:54 p.m. UTC | #2
On 19/11/2018 15:36, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-11-19 15:22:29)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Memory pressure subtest attempts to provoke system overload which can
>> cause GPU hangs, especially when combined with spin batches which do
>> not allow for some nop instructions to provide relief.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   tests/i915/gem_exec_await.c | 107 ++++++++++++++++++++++++++++++++++++
>>   1 file changed, 107 insertions(+)
>>
>> diff --git a/tests/i915/gem_exec_await.c b/tests/i915/gem_exec_await.c
>> index 3ea5b5903c6b..ccb5159a6fe1 100644
>> --- a/tests/i915/gem_exec_await.c
>> +++ b/tests/i915/gem_exec_await.c
>> @@ -30,6 +30,11 @@
>>   
>>   #include <sys/ioctl.h>
>>   #include <sys/signal.h>
>> +#include <sys/types.h>
>> +#include <sys/stat.h>
>> +#include <fcntl.h>
>> +#include <pthread.h>
>> +#include <sched.h>
>>   
>>   #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
>>   #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
>> @@ -227,6 +232,92 @@ static void wide(int fd, int ring_size, int timeout, unsigned int flags)
>>          free(exec);
>>   }
>>   
>> +struct thread {
>> +       pthread_t thread;
>> +       volatile bool done;
>> +};
>> +
>> +static unsigned long get_avail_ram_mb(void)
> 
> intel_get_avail_ram_mb() ?

I thought so but when things went slow I looked inside and concluded it 
is not suitable.

>> +#define PAGE_SIZE 4096
>> +static void *mempressure(void *arg)
>> +{
>> +       struct thread *thread = arg;
>> +       const unsigned int sz_mb = 2;
>> +       const unsigned int sz = sz_mb << 20;
>> +       unsigned int n = 0, max = 0;
>> +       unsigned int blocks;
>> +       void **ptr = NULL;
>> +
>> +       while (!thread->done) {
> 
> You can use READ_ONCE(thread->done) here for familiarity.

Okay, didn't realize we copied it to IGT.

>> +               unsigned long ram_mb = get_avail_ram_mb();
>> +
>> +               if (!ptr) {
>> +                       blocks = ram_mb / sz_mb;
>> +                       ptr = calloc(blocks, sizeof(void *));
>> +                       igt_assert(ptr);
>> +               } else if (ram_mb < 384) {
>> +                       blocks = max + 1;
>> +               }
>> +
>> +               if (ptr[n])
>> +                       munmap(ptr[n], sz);
>> +
>> +               ptr[n] = mmap(NULL, sz, PROT_WRITE,
>> +                             MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
>> +               assert(ptr[n] != MAP_FAILED);
>> +
>> +               madvise(ptr[n], sz, MADV_HUGEPAGE);
>> +
>> +               for (size_t page = 0; page < sz; page += PAGE_SIZE)
>> +                       *(volatile uint32_t *)((unsigned char *)ptr[n] + page) =
>> +                               0;
>> +
>> +               if (n > max)
>> +                       max = n;
>> +
>> +               n++;
>> +
>> +               if (n >= blocks)
>> +                       n = 0;
> 
> Another method would be to use mlock to force exhaustion.
> 
> However, as the supposition is that rcu is part of the underlying
> mechanism if you fill the dentry cache we'll exercise both the shrinker
> and RCU.

As said in previous reply, in my testing, well at least the one thing I 
was able to reproduce and which has the same symptoms as the bug, the 
problem went away with the addition of nops.

But yeah, maybe that could be an indirect effect.

Also this cleaned up patch does not cut it any longer. :( I seems I've 
lost the magic ingredient to reproduce the stalls during cleanups. I 
have to go back and add stuff to get it back.

Regards,

Tvrtko
Chris Wilson Nov. 19, 2018, 5:07 p.m. UTC | #3
Quoting Tvrtko Ursulin (2018-11-19 15:54:44)
> 
> On 19/11/2018 15:36, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-11-19 15:22:29)
> >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >> +static unsigned long get_avail_ram_mb(void)
> > 
> > intel_get_avail_ram_mb() ?
> 
> I thought so but when things went slow I looked inside and concluded it 
> is not suitable.

Oh... That'll be the purge_vm_caches. We probably want to split it out.
-Chris
diff mbox series

Patch

diff --git a/tests/i915/gem_exec_await.c b/tests/i915/gem_exec_await.c
index 3ea5b5903c6b..ccb5159a6fe1 100644
--- a/tests/i915/gem_exec_await.c
+++ b/tests/i915/gem_exec_await.c
@@ -30,6 +30,11 @@ 
 
 #include <sys/ioctl.h>
 #include <sys/signal.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
 
 #define LOCAL_I915_EXEC_NO_RELOC (1<<11)
 #define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
@@ -227,6 +232,92 @@  static void wide(int fd, int ring_size, int timeout, unsigned int flags)
 	free(exec);
 }
 
+struct thread {
+	pthread_t thread;
+	volatile bool done;
+};
+
+static unsigned long get_meminfo(const char *info, const char *tag)
+{
+        const char *str;
+        unsigned long val;
+
+        str = strstr(info, tag);
+        if (str && sscanf(str + strlen(tag), " %lu", &val) == 1)
+                return val >> 10;
+
+        igt_warn("Unrecognised /proc/meminfo field: '%s'\n", tag);
+        return 0;
+}
+
+static unsigned long get_avail_ram_mb(void)
+{
+	int fd;
+	int ret;
+	char buf[4096];
+	unsigned long ram;
+
+	fd = open("/proc/meminfo", O_RDONLY);
+	igt_assert_fd(fd);
+
+	ret = read(fd, buf, sizeof(buf));
+	igt_assert(ret >= 0);
+
+	close(fd);
+
+	ram = get_meminfo(buf, "MemAvailable:");
+	ram += get_meminfo(buf, "Buffers:");
+	ram += get_meminfo(buf, "Cached:");
+	ram += get_meminfo(buf, "SwapCached:");
+
+	return ram;
+}
+
+#define PAGE_SIZE 4096
+static void *mempressure(void *arg)
+{
+	struct thread *thread = arg;
+	const unsigned int sz_mb = 2;
+	const unsigned int sz = sz_mb << 20;
+	unsigned int n = 0, max = 0;
+	unsigned int blocks;
+	void **ptr = NULL;
+
+	while (!thread->done) {
+		unsigned long ram_mb = get_avail_ram_mb();
+
+		if (!ptr) {
+			blocks = ram_mb / sz_mb;
+			ptr = calloc(blocks, sizeof(void *));
+			igt_assert(ptr);
+		} else if (ram_mb < 384) {
+			blocks = max + 1;
+		}
+
+		if (ptr[n])
+			munmap(ptr[n], sz);
+
+		ptr[n] = mmap(NULL, sz, PROT_WRITE,
+			      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+		assert(ptr[n] != MAP_FAILED);
+
+		madvise(ptr[n], sz, MADV_HUGEPAGE);
+
+		for (size_t page = 0; page < sz; page += PAGE_SIZE)
+			*(volatile uint32_t *)((unsigned char *)ptr[n] + page) =
+				0;
+
+		if (n > max)
+			max = n;
+
+		n++;
+
+		if (n >= blocks)
+			n = 0;
+	}
+
+	return NULL;
+}
 igt_main
 {
 	int ring_size = 0;
@@ -255,6 +346,22 @@  igt_main
 		wide(device, ring_size, 20, CONTEXTS);
 	}
 
+	igt_subtest("wide-contexts-mempressure") {
+		struct thread thread = { };
+		int ret;
+
+		gem_require_contexts(device);
+
+		ret = pthread_create(&thread.thread, NULL, mempressure,
+				     &thread);
+		igt_assert_eq(ret, 0);
+
+		wide(device, ring_size, 20, CONTEXTS);
+
+		thread.done = true;
+		pthread_join(thread.thread, NULL);
+	}
+
 	igt_fixture {
 		igt_stop_hang_detector();
 		close(device);