diff mbox

[i-g-t] igt/drv_suspend: Suspend under memory pressure

Message ID 20180607205054.2149-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson June 7, 2018, 8:50 p.m. UTC
Recently we discovered that we have a race between swapping and
suspend in our resume path (we might be trying to page in an object
after disabling the block devices). Let's try to exercise that by
exhausting all of system memory before suspend.

v2: Explicitly share the large memory area on forking to avoid running
out of memory inside the suspend helpers (for they fork!)

References: https://bugs.freedesktop.org/show_bug.cgi?id=106640
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
Reviewed-by: Antonio Argenziano <antonio.argenziano@intel.com>
---
 lib/igt_core.c      | 34 ++++++++++++++++------------
 lib/igt_core.h      |  1 +
 tests/drv_suspend.c | 54 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 75 insertions(+), 14 deletions(-)

Comments

Musial, Ewelina June 8, 2018, 10:21 a.m. UTC | #1
On Thu, Jun 07, 2018 at 09:50:54PM +0100, Chris Wilson wrote:
> Recently we discovered that we have a race between swapping and
> suspend in our resume path (we might be trying to page in an object
> after disabling the block devices). Let's try to exercise that by
> exhausting all of system memory before suspend.
> 
> v2: Explicitly share the large memory area on forking to avoid running
> out of memory inside the suspend helpers (for they fork!)
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=106640
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
> Reviewed-by: Antonio Argenziano <antonio.argenziano@intel.com>
> ---
>  lib/igt_core.c      | 34 ++++++++++++++++------------
>  lib/igt_core.h      |  1 +
>  tests/drv_suspend.c | 54 +++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 75 insertions(+), 14 deletions(-)
> 
> diff --git a/lib/igt_core.c b/lib/igt_core.c
> index 5092a3f03..06d8b0370 100644
> --- a/lib/igt_core.c
> +++ b/lib/igt_core.c
> @@ -1702,20 +1702,7 @@ void igt_child_done(pid_t pid)
>  		test_children[i] = test_children[i + 1];
>  }
>  
> -/**
> - * igt_waitchildren:
> - *
> - * Wait for all children forked with igt_fork.
> - *
> - * The magic here is that exit codes from children will be correctly propagated
> - * to the main thread, including the relevant exit code if a child thread failed.
> - * Of course if multiple children failed with different exit codes the resulting
> - * exit code will be non-deterministic.
> - *
> - * Note that igt_skip() will not be forwarded, feature tests need to be done
> - * before spawning threads with igt_fork().
> - */
> -void igt_waitchildren(void)
> +int __igt_waitchildren(void)
>  {
>  	int err = 0;
>  	int count;
> @@ -1761,6 +1748,25 @@ void igt_waitchildren(void)
>  	}
>  
>  	num_test_children = 0;
> +	return err;
> +}
> +
> +/**
> + * igt_waitchildren:
> + *
> + * Wait for all children forked with igt_fork.
> + *
> + * The magic here is that exit codes from children will be correctly propagated
> + * to the main thread, including the relevant exit code if a child thread failed.
> + * Of course if multiple children failed with different exit codes the resulting
> + * exit code will be non-deterministic.
> + *
> + * Note that igt_skip() will not be forwarded, feature tests need to be done
> + * before spawning threads with igt_fork().
> + */
> +void igt_waitchildren(void)
> +{
> +	int err = __igt_waitchildren();
>  	if (err)
>  		igt_fail(err);
>  }
> diff --git a/lib/igt_core.h b/lib/igt_core.h
> index bf8cd66ca..a73b06493 100644
> --- a/lib/igt_core.h
> +++ b/lib/igt_core.h
> @@ -742,6 +742,7 @@ bool __igt_fork(void);
>  	for (int child = 0; child < (num_children); child++) \
>  		for (; __igt_fork(); exit(0))
>  void igt_child_done(pid_t pid);
> +int __igt_waitchildren(void);
>  void igt_waitchildren(void);
>  void igt_waitchildren_timeout(int seconds, const char *reason);
>  
> diff --git a/tests/drv_suspend.c b/tests/drv_suspend.c
> index 2e39f20ae..9a9ff2005 100644
> --- a/tests/drv_suspend.c
> +++ b/tests/drv_suspend.c
> @@ -160,6 +160,57 @@ test_sysfs_reader(bool hibernate)
>  	igt_stop_helper(&reader);
>  }
>  
> +static void
> +test_shrink(int fd, unsigned int mode)
> +{
> +	uint64_t *can_mlock, pin;
> +
> +	gem_quiescent_gpu(fd);
> +	intel_purge_vm_caches(fd);
> +
> +	pin = (intel_get_total_ram_mb() + 1) << 20;
> +
> +	igt_debug("Total memory %'"PRIu64" B (%'"PRIu64" MiB)\n",
> +		  pin, pin >> 20);
> +	can_mlock = mmap(NULL, pin, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> +	igt_require(can_mlock != MAP_FAILED);
> +
> +	/* Lock all the system memory, forcing the driver into swap and OOM */
> +	for (uint64_t inc = 64 << 20; inc >= 4 << 10; inc >>= 1) {
> +		igt_debug("Testing+ %'"PRIu64" B (%'"PRIu64" MiB)\n",
> +			  *can_mlock, *can_mlock >> 20);
> +
> +		igt_fork(child, 1) {
> +			for (uint64_t bytes = *can_mlock;
> +			     bytes <= pin;
> +			     bytes += inc) {
> +				if (mlock(can_mlock, bytes))
> +					break;
> +
> +				*can_mlock = bytes;
> +				__sync_synchronize();

I am just curious, what __sync_synchronize() is doing in this case?
-Ewelina

> +			}
> +		}
> +		__igt_waitchildren();
> +	}
> +
> +	intel_purge_vm_caches(fd);
> +
> +	igt_require(*can_mlock > 64 << 20);
> +	*can_mlock -= 64 << 20;
> +
> +	igt_debug("Locking %'"PRIu64" B (%'"PRIu64" MiB)\n",
> +		  *can_mlock, *can_mlock >> 20);
> +	igt_assert(!mlock(can_mlock, *can_mlock));
> +	igt_info("Locked %'"PRIu64" B (%'"PRIu64" MiB)\n",
> +		 *can_mlock, *can_mlock >> 20);
> +
> +	intel_purge_vm_caches(fd);
> +	igt_system_suspend_autoresume(mode, SUSPEND_TEST_NONE);
> +
> +	munmap(can_mlock, pin);
> +}
> +
>  static void
>  test_forcewake(int fd, bool hibernate)
>  {
> @@ -199,6 +250,9 @@ igt_main
>  	igt_subtest("sysfs-reader")
>  		test_sysfs_reader(false);
>  
> +	igt_subtest("shrink")
> +		test_shrink(fd, SUSPEND_STATE_MEM);
> +
>  	igt_subtest("forcewake")
>  		test_forcewake(fd, false);
>  
> -- 
> 2.17.1
> 
> _______________________________________________
> igt-dev mailing list
> igt-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev
Chris Wilson June 8, 2018, 10:26 a.m. UTC | #2
Quoting Ewelina Musial (2018-06-08 11:21:41)
> On Thu, Jun 07, 2018 at 09:50:54PM +0100, Chris Wilson wrote:
> > Recently we discovered that we have a race between swapping and
> > suspend in our resume path (we might be trying to page in an object
> > after disabling the block devices). Let's try to exercise that by
> > exhausting all of system memory before suspend.
> > 
> > v2: Explicitly share the large memory area on forking to avoid running
> > out of memory inside the suspend helpers (for they fork!)
> > 
> > References: https://bugs.freedesktop.org/show_bug.cgi?id=106640
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
> > Reviewed-by: Antonio Argenziano <antonio.argenziano@intel.com>
> > ---
> >  lib/igt_core.c      | 34 ++++++++++++++++------------
> >  lib/igt_core.h      |  1 +
> >  tests/drv_suspend.c | 54 +++++++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 75 insertions(+), 14 deletions(-)
> > 
> > diff --git a/lib/igt_core.c b/lib/igt_core.c
> > index 5092a3f03..06d8b0370 100644
> > --- a/lib/igt_core.c
> > +++ b/lib/igt_core.c
> > @@ -1702,20 +1702,7 @@ void igt_child_done(pid_t pid)
> >               test_children[i] = test_children[i + 1];
> >  }
> >  
> > -/**
> > - * igt_waitchildren:
> > - *
> > - * Wait for all children forked with igt_fork.
> > - *
> > - * The magic here is that exit codes from children will be correctly propagated
> > - * to the main thread, including the relevant exit code if a child thread failed.
> > - * Of course if multiple children failed with different exit codes the resulting
> > - * exit code will be non-deterministic.
> > - *
> > - * Note that igt_skip() will not be forwarded, feature tests need to be done
> > - * before spawning threads with igt_fork().
> > - */
> > -void igt_waitchildren(void)
> > +int __igt_waitchildren(void)
> >  {
> >       int err = 0;
> >       int count;
> > @@ -1761,6 +1748,25 @@ void igt_waitchildren(void)
> >       }
> >  
> >       num_test_children = 0;
> > +     return err;
> > +}
> > +
> > +/**
> > + * igt_waitchildren:
> > + *
> > + * Wait for all children forked with igt_fork.
> > + *
> > + * The magic here is that exit codes from children will be correctly propagated
> > + * to the main thread, including the relevant exit code if a child thread failed.
> > + * Of course if multiple children failed with different exit codes the resulting
> > + * exit code will be non-deterministic.
> > + *
> > + * Note that igt_skip() will not be forwarded, feature tests need to be done
> > + * before spawning threads with igt_fork().
> > + */
> > +void igt_waitchildren(void)
> > +{
> > +     int err = __igt_waitchildren();
> >       if (err)
> >               igt_fail(err);
> >  }
> > diff --git a/lib/igt_core.h b/lib/igt_core.h
> > index bf8cd66ca..a73b06493 100644
> > --- a/lib/igt_core.h
> > +++ b/lib/igt_core.h
> > @@ -742,6 +742,7 @@ bool __igt_fork(void);
> >       for (int child = 0; child < (num_children); child++) \
> >               for (; __igt_fork(); exit(0))
> >  void igt_child_done(pid_t pid);
> > +int __igt_waitchildren(void);
> >  void igt_waitchildren(void);
> >  void igt_waitchildren_timeout(int seconds, const char *reason);
> >  
> > diff --git a/tests/drv_suspend.c b/tests/drv_suspend.c
> > index 2e39f20ae..9a9ff2005 100644
> > --- a/tests/drv_suspend.c
> > +++ b/tests/drv_suspend.c
> > @@ -160,6 +160,57 @@ test_sysfs_reader(bool hibernate)
> >       igt_stop_helper(&reader);
> >  }
> >  
> > +static void
> > +test_shrink(int fd, unsigned int mode)
> > +{
> > +     uint64_t *can_mlock, pin;
> > +
> > +     gem_quiescent_gpu(fd);
> > +     intel_purge_vm_caches(fd);
> > +
> > +     pin = (intel_get_total_ram_mb() + 1) << 20;
> > +
> > +     igt_debug("Total memory %'"PRIu64" B (%'"PRIu64" MiB)\n",
> > +               pin, pin >> 20);
> > +     can_mlock = mmap(NULL, pin, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
> > +     igt_require(can_mlock != MAP_FAILED);
> > +
> > +     /* Lock all the system memory, forcing the driver into swap and OOM */
> > +     for (uint64_t inc = 64 << 20; inc >= 4 << 10; inc >>= 1) {
> > +             igt_debug("Testing+ %'"PRIu64" B (%'"PRIu64" MiB)\n",
> > +                       *can_mlock, *can_mlock >> 20);
> > +
> > +             igt_fork(child, 1) {
> > +                     for (uint64_t bytes = *can_mlock;
> > +                          bytes <= pin;
> > +                          bytes += inc) {
> > +                             if (mlock(can_mlock, bytes))
> > +                                     break;
> > +
> > +                             *can_mlock = bytes;
> > +                             __sync_synchronize();
> 
> I am just curious, what __sync_synchronize() is doing in this case?

Paranoia. It's mostly about that can_mlock being shared memory and we
want to stress that the parent sees the update even as the child is
killed. So the paranoid in me says we have better flush our cachelines,
just in case they are one day tagged as belonging to this terminated
process and can be aborted.
-Chris
Chris Wilson June 8, 2018, 1:22 p.m. UTC | #3
Quoting Chris Wilson (2018-06-07 21:50:54)
> Recently we discovered that we have a race between swapping and
> suspend in our resume path (we might be trying to page in an object
> after disabling the block devices). Let's try to exercise that by
> exhausting all of system memory before suspend.
> 
> v2: Explicitly share the large memory area on forking to avoid running
> out of memory inside the suspend helpers (for they fork!)
> 
> References: https://bugs.freedesktop.org/show_bug.cgi?id=106640
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tomi Sarvela <tomi.p.sarvela@intel.com>
> Reviewed-by: Antonio Argenziano <antonio.argenziano@intel.com>

With the more discerning oomkiller, this finally works as intended.
Pushed while the iron is hot.
-Chris
diff mbox

Patch

diff --git a/lib/igt_core.c b/lib/igt_core.c
index 5092a3f03..06d8b0370 100644
--- a/lib/igt_core.c
+++ b/lib/igt_core.c
@@ -1702,20 +1702,7 @@  void igt_child_done(pid_t pid)
 		test_children[i] = test_children[i + 1];
 }
 
-/**
- * igt_waitchildren:
- *
- * Wait for all children forked with igt_fork.
- *
- * The magic here is that exit codes from children will be correctly propagated
- * to the main thread, including the relevant exit code if a child thread failed.
- * Of course if multiple children failed with different exit codes the resulting
- * exit code will be non-deterministic.
- *
- * Note that igt_skip() will not be forwarded, feature tests need to be done
- * before spawning threads with igt_fork().
- */
-void igt_waitchildren(void)
+int __igt_waitchildren(void)
 {
 	int err = 0;
 	int count;
@@ -1761,6 +1748,25 @@  void igt_waitchildren(void)
 	}
 
 	num_test_children = 0;
+	return err;
+}
+
+/**
+ * igt_waitchildren:
+ *
+ * Wait for all children forked with igt_fork.
+ *
+ * The magic here is that exit codes from children will be correctly propagated
+ * to the main thread, including the relevant exit code if a child thread failed.
+ * Of course if multiple children failed with different exit codes the resulting
+ * exit code will be non-deterministic.
+ *
+ * Note that igt_skip() will not be forwarded, feature tests need to be done
+ * before spawning threads with igt_fork().
+ */
+void igt_waitchildren(void)
+{
+	int err = __igt_waitchildren();
 	if (err)
 		igt_fail(err);
 }
diff --git a/lib/igt_core.h b/lib/igt_core.h
index bf8cd66ca..a73b06493 100644
--- a/lib/igt_core.h
+++ b/lib/igt_core.h
@@ -742,6 +742,7 @@  bool __igt_fork(void);
 	for (int child = 0; child < (num_children); child++) \
 		for (; __igt_fork(); exit(0))
 void igt_child_done(pid_t pid);
+int __igt_waitchildren(void);
 void igt_waitchildren(void);
 void igt_waitchildren_timeout(int seconds, const char *reason);
 
diff --git a/tests/drv_suspend.c b/tests/drv_suspend.c
index 2e39f20ae..9a9ff2005 100644
--- a/tests/drv_suspend.c
+++ b/tests/drv_suspend.c
@@ -160,6 +160,57 @@  test_sysfs_reader(bool hibernate)
 	igt_stop_helper(&reader);
 }
 
+static void
+test_shrink(int fd, unsigned int mode)
+{
+	uint64_t *can_mlock, pin;
+
+	gem_quiescent_gpu(fd);
+	intel_purge_vm_caches(fd);
+
+	pin = (intel_get_total_ram_mb() + 1) << 20;
+
+	igt_debug("Total memory %'"PRIu64" B (%'"PRIu64" MiB)\n",
+		  pin, pin >> 20);
+	can_mlock = mmap(NULL, pin, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
+	igt_require(can_mlock != MAP_FAILED);
+
+	/* Lock all the system memory, forcing the driver into swap and OOM */
+	for (uint64_t inc = 64 << 20; inc >= 4 << 10; inc >>= 1) {
+		igt_debug("Testing+ %'"PRIu64" B (%'"PRIu64" MiB)\n",
+			  *can_mlock, *can_mlock >> 20);
+
+		igt_fork(child, 1) {
+			for (uint64_t bytes = *can_mlock;
+			     bytes <= pin;
+			     bytes += inc) {
+				if (mlock(can_mlock, bytes))
+					break;
+
+				*can_mlock = bytes;
+				__sync_synchronize();
+			}
+		}
+		__igt_waitchildren();
+	}
+
+	intel_purge_vm_caches(fd);
+
+	igt_require(*can_mlock > 64 << 20);
+	*can_mlock -= 64 << 20;
+
+	igt_debug("Locking %'"PRIu64" B (%'"PRIu64" MiB)\n",
+		  *can_mlock, *can_mlock >> 20);
+	igt_assert(!mlock(can_mlock, *can_mlock));
+	igt_info("Locked %'"PRIu64" B (%'"PRIu64" MiB)\n",
+		 *can_mlock, *can_mlock >> 20);
+
+	intel_purge_vm_caches(fd);
+	igt_system_suspend_autoresume(mode, SUSPEND_TEST_NONE);
+
+	munmap(can_mlock, pin);
+}
+
 static void
 test_forcewake(int fd, bool hibernate)
 {
@@ -199,6 +250,9 @@  igt_main
 	igt_subtest("sysfs-reader")
 		test_sysfs_reader(false);
 
+	igt_subtest("shrink")
+		test_shrink(fd, SUSPEND_STATE_MEM);
+
 	igt_subtest("forcewake")
 		test_forcewake(fd, false);