Message ID | 20180607205054.2149-1-chris@chris-wilson.co.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Jun 07, 2018 at 09:50:54PM +0100, Chris Wilson wrote: > Recently we discovered that we have a race between swapping and > suspend in our resume path (we might be trying to page in an object > after disabling the block devices). Let's try to exercise that by > exhausting all of system memory before suspend. > > v2: Explicitly share the large memory area on forking to avoid running > out of memory inside the suspend helpers (for they fork!) > > References: https://bugs.freedesktop.org/show_bug.cgi?id=106640 > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tomi Sarvela <tomi.p.sarvela@intel.com> > Reviewed-by: Antonio Argenziano <antonio.argenziano@intel.com> > --- > lib/igt_core.c | 34 ++++++++++++++++------------ > lib/igt_core.h | 1 + > tests/drv_suspend.c | 54 +++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 75 insertions(+), 14 deletions(-) > > diff --git a/lib/igt_core.c b/lib/igt_core.c > index 5092a3f03..06d8b0370 100644 > --- a/lib/igt_core.c > +++ b/lib/igt_core.c > @@ -1702,20 +1702,7 @@ void igt_child_done(pid_t pid) > test_children[i] = test_children[i + 1]; > } > > -/** > - * igt_waitchildren: > - * > - * Wait for all children forked with igt_fork. > - * > - * The magic here is that exit codes from children will be correctly propagated > - * to the main thread, including the relevant exit code if a child thread failed. > - * Of course if multiple children failed with different exit codes the resulting > - * exit code will be non-deterministic. > - * > - * Note that igt_skip() will not be forwarded, feature tests need to be done > - * before spawning threads with igt_fork(). > - */ > -void igt_waitchildren(void) > +int __igt_waitchildren(void) > { > int err = 0; > int count; > @@ -1761,6 +1748,25 @@ void igt_waitchildren(void) > } > > num_test_children = 0; > + return err; > +} > + > +/** > + * igt_waitchildren: > + * > + * Wait for all children forked with igt_fork. > + * > + * The magic here is that exit codes from children will be correctly propagated > + * to the main thread, including the relevant exit code if a child thread failed. > + * Of course if multiple children failed with different exit codes the resulting > + * exit code will be non-deterministic. > + * > + * Note that igt_skip() will not be forwarded, feature tests need to be done > + * before spawning threads with igt_fork(). > + */ > +void igt_waitchildren(void) > +{ > + int err = __igt_waitchildren(); > if (err) > igt_fail(err); > } > diff --git a/lib/igt_core.h b/lib/igt_core.h > index bf8cd66ca..a73b06493 100644 > --- a/lib/igt_core.h > +++ b/lib/igt_core.h > @@ -742,6 +742,7 @@ bool __igt_fork(void); > for (int child = 0; child < (num_children); child++) \ > for (; __igt_fork(); exit(0)) > void igt_child_done(pid_t pid); > +int __igt_waitchildren(void); > void igt_waitchildren(void); > void igt_waitchildren_timeout(int seconds, const char *reason); > > diff --git a/tests/drv_suspend.c b/tests/drv_suspend.c > index 2e39f20ae..9a9ff2005 100644 > --- a/tests/drv_suspend.c > +++ b/tests/drv_suspend.c > @@ -160,6 +160,57 @@ test_sysfs_reader(bool hibernate) > igt_stop_helper(&reader); > } > > +static void > +test_shrink(int fd, unsigned int mode) > +{ > + uint64_t *can_mlock, pin; > + > + gem_quiescent_gpu(fd); > + intel_purge_vm_caches(fd); > + > + pin = (intel_get_total_ram_mb() + 1) << 20; > + > + igt_debug("Total memory %'"PRIu64" B (%'"PRIu64" MiB)\n", > + pin, pin >> 20); > + can_mlock = mmap(NULL, pin, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); > + igt_require(can_mlock != MAP_FAILED); > + > + /* Lock all the system memory, forcing the driver into swap and OOM */ > + for (uint64_t inc = 64 << 20; inc >= 4 << 10; inc >>= 1) { > + igt_debug("Testing+ %'"PRIu64" B (%'"PRIu64" MiB)\n", > + *can_mlock, *can_mlock >> 20); > + > + igt_fork(child, 1) { > + for (uint64_t bytes = *can_mlock; > + bytes <= pin; > + bytes += inc) { > + if (mlock(can_mlock, bytes)) > + break; > + > + *can_mlock = bytes; > + __sync_synchronize(); I am just curious, what __sync_synchronize() is doing in this case? -Ewelina > + } > + } > + __igt_waitchildren(); > + } > + > + intel_purge_vm_caches(fd); > + > + igt_require(*can_mlock > 64 << 20); > + *can_mlock -= 64 << 20; > + > + igt_debug("Locking %'"PRIu64" B (%'"PRIu64" MiB)\n", > + *can_mlock, *can_mlock >> 20); > + igt_assert(!mlock(can_mlock, *can_mlock)); > + igt_info("Locked %'"PRIu64" B (%'"PRIu64" MiB)\n", > + *can_mlock, *can_mlock >> 20); > + > + intel_purge_vm_caches(fd); > + igt_system_suspend_autoresume(mode, SUSPEND_TEST_NONE); > + > + munmap(can_mlock, pin); > +} > + > static void > test_forcewake(int fd, bool hibernate) > { > @@ -199,6 +250,9 @@ igt_main > igt_subtest("sysfs-reader") > test_sysfs_reader(false); > > + igt_subtest("shrink") > + test_shrink(fd, SUSPEND_STATE_MEM); > + > igt_subtest("forcewake") > test_forcewake(fd, false); > > -- > 2.17.1 > > _______________________________________________ > igt-dev mailing list > igt-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/igt-dev
Quoting Ewelina Musial (2018-06-08 11:21:41) > On Thu, Jun 07, 2018 at 09:50:54PM +0100, Chris Wilson wrote: > > Recently we discovered that we have a race between swapping and > > suspend in our resume path (we might be trying to page in an object > > after disabling the block devices). Let's try to exercise that by > > exhausting all of system memory before suspend. > > > > v2: Explicitly share the large memory area on forking to avoid running > > out of memory inside the suspend helpers (for they fork!) > > > > References: https://bugs.freedesktop.org/show_bug.cgi?id=106640 > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > > Cc: Tomi Sarvela <tomi.p.sarvela@intel.com> > > Reviewed-by: Antonio Argenziano <antonio.argenziano@intel.com> > > --- > > lib/igt_core.c | 34 ++++++++++++++++------------ > > lib/igt_core.h | 1 + > > tests/drv_suspend.c | 54 +++++++++++++++++++++++++++++++++++++++++++++ > > 3 files changed, 75 insertions(+), 14 deletions(-) > > > > diff --git a/lib/igt_core.c b/lib/igt_core.c > > index 5092a3f03..06d8b0370 100644 > > --- a/lib/igt_core.c > > +++ b/lib/igt_core.c > > @@ -1702,20 +1702,7 @@ void igt_child_done(pid_t pid) > > test_children[i] = test_children[i + 1]; > > } > > > > -/** > > - * igt_waitchildren: > > - * > > - * Wait for all children forked with igt_fork. > > - * > > - * The magic here is that exit codes from children will be correctly propagated > > - * to the main thread, including the relevant exit code if a child thread failed. > > - * Of course if multiple children failed with different exit codes the resulting > > - * exit code will be non-deterministic. > > - * > > - * Note that igt_skip() will not be forwarded, feature tests need to be done > > - * before spawning threads with igt_fork(). > > - */ > > -void igt_waitchildren(void) > > +int __igt_waitchildren(void) > > { > > int err = 0; > > int count; > > @@ -1761,6 +1748,25 @@ void igt_waitchildren(void) > > } > > > > num_test_children = 0; > > + return err; > > +} > > + > > +/** > > + * igt_waitchildren: > > + * > > + * Wait for all children forked with igt_fork. > > + * > > + * The magic here is that exit codes from children will be correctly propagated > > + * to the main thread, including the relevant exit code if a child thread failed. > > + * Of course if multiple children failed with different exit codes the resulting > > + * exit code will be non-deterministic. > > + * > > + * Note that igt_skip() will not be forwarded, feature tests need to be done > > + * before spawning threads with igt_fork(). > > + */ > > +void igt_waitchildren(void) > > +{ > > + int err = __igt_waitchildren(); > > if (err) > > igt_fail(err); > > } > > diff --git a/lib/igt_core.h b/lib/igt_core.h > > index bf8cd66ca..a73b06493 100644 > > --- a/lib/igt_core.h > > +++ b/lib/igt_core.h > > @@ -742,6 +742,7 @@ bool __igt_fork(void); > > for (int child = 0; child < (num_children); child++) \ > > for (; __igt_fork(); exit(0)) > > void igt_child_done(pid_t pid); > > +int __igt_waitchildren(void); > > void igt_waitchildren(void); > > void igt_waitchildren_timeout(int seconds, const char *reason); > > > > diff --git a/tests/drv_suspend.c b/tests/drv_suspend.c > > index 2e39f20ae..9a9ff2005 100644 > > --- a/tests/drv_suspend.c > > +++ b/tests/drv_suspend.c > > @@ -160,6 +160,57 @@ test_sysfs_reader(bool hibernate) > > igt_stop_helper(&reader); > > } > > > > +static void > > +test_shrink(int fd, unsigned int mode) > > +{ > > + uint64_t *can_mlock, pin; > > + > > + gem_quiescent_gpu(fd); > > + intel_purge_vm_caches(fd); > > + > > + pin = (intel_get_total_ram_mb() + 1) << 20; > > + > > + igt_debug("Total memory %'"PRIu64" B (%'"PRIu64" MiB)\n", > > + pin, pin >> 20); > > + can_mlock = mmap(NULL, pin, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); > > + igt_require(can_mlock != MAP_FAILED); > > + > > + /* Lock all the system memory, forcing the driver into swap and OOM */ > > + for (uint64_t inc = 64 << 20; inc >= 4 << 10; inc >>= 1) { > > + igt_debug("Testing+ %'"PRIu64" B (%'"PRIu64" MiB)\n", > > + *can_mlock, *can_mlock >> 20); > > + > > + igt_fork(child, 1) { > > + for (uint64_t bytes = *can_mlock; > > + bytes <= pin; > > + bytes += inc) { > > + if (mlock(can_mlock, bytes)) > > + break; > > + > > + *can_mlock = bytes; > > + __sync_synchronize(); > > I am just curious, what __sync_synchronize() is doing in this case? Paranoia. It's mostly about that can_mlock being shared memory and we want to stress that the parent sees the update even as the child is killed. So the paranoid in me says we have better flush our cachelines, just in case they are one day tagged as belonging to this terminated process and can be aborted. -Chris
Quoting Chris Wilson (2018-06-07 21:50:54) > Recently we discovered that we have a race between swapping and > suspend in our resume path (we might be trying to page in an object > after disabling the block devices). Let's try to exercise that by > exhausting all of system memory before suspend. > > v2: Explicitly share the large memory area on forking to avoid running > out of memory inside the suspend helpers (for they fork!) > > References: https://bugs.freedesktop.org/show_bug.cgi?id=106640 > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> > Cc: Tomi Sarvela <tomi.p.sarvela@intel.com> > Reviewed-by: Antonio Argenziano <antonio.argenziano@intel.com> With the more discerning oomkiller, this finally works as intended. Pushed while the iron is hot. -Chris
diff --git a/lib/igt_core.c b/lib/igt_core.c index 5092a3f03..06d8b0370 100644 --- a/lib/igt_core.c +++ b/lib/igt_core.c @@ -1702,20 +1702,7 @@ void igt_child_done(pid_t pid) test_children[i] = test_children[i + 1]; } -/** - * igt_waitchildren: - * - * Wait for all children forked with igt_fork. - * - * The magic here is that exit codes from children will be correctly propagated - * to the main thread, including the relevant exit code if a child thread failed. - * Of course if multiple children failed with different exit codes the resulting - * exit code will be non-deterministic. - * - * Note that igt_skip() will not be forwarded, feature tests need to be done - * before spawning threads with igt_fork(). - */ -void igt_waitchildren(void) +int __igt_waitchildren(void) { int err = 0; int count; @@ -1761,6 +1748,25 @@ void igt_waitchildren(void) } num_test_children = 0; + return err; +} + +/** + * igt_waitchildren: + * + * Wait for all children forked with igt_fork. + * + * The magic here is that exit codes from children will be correctly propagated + * to the main thread, including the relevant exit code if a child thread failed. + * Of course if multiple children failed with different exit codes the resulting + * exit code will be non-deterministic. + * + * Note that igt_skip() will not be forwarded, feature tests need to be done + * before spawning threads with igt_fork(). + */ +void igt_waitchildren(void) +{ + int err = __igt_waitchildren(); if (err) igt_fail(err); } diff --git a/lib/igt_core.h b/lib/igt_core.h index bf8cd66ca..a73b06493 100644 --- a/lib/igt_core.h +++ b/lib/igt_core.h @@ -742,6 +742,7 @@ bool __igt_fork(void); for (int child = 0; child < (num_children); child++) \ for (; __igt_fork(); exit(0)) void igt_child_done(pid_t pid); +int __igt_waitchildren(void); void igt_waitchildren(void); void igt_waitchildren_timeout(int seconds, const char *reason); diff --git a/tests/drv_suspend.c b/tests/drv_suspend.c index 2e39f20ae..9a9ff2005 100644 --- a/tests/drv_suspend.c +++ b/tests/drv_suspend.c @@ -160,6 +160,57 @@ test_sysfs_reader(bool hibernate) igt_stop_helper(&reader); } +static void +test_shrink(int fd, unsigned int mode) +{ + uint64_t *can_mlock, pin; + + gem_quiescent_gpu(fd); + intel_purge_vm_caches(fd); + + pin = (intel_get_total_ram_mb() + 1) << 20; + + igt_debug("Total memory %'"PRIu64" B (%'"PRIu64" MiB)\n", + pin, pin >> 20); + can_mlock = mmap(NULL, pin, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0); + igt_require(can_mlock != MAP_FAILED); + + /* Lock all the system memory, forcing the driver into swap and OOM */ + for (uint64_t inc = 64 << 20; inc >= 4 << 10; inc >>= 1) { + igt_debug("Testing+ %'"PRIu64" B (%'"PRIu64" MiB)\n", + *can_mlock, *can_mlock >> 20); + + igt_fork(child, 1) { + for (uint64_t bytes = *can_mlock; + bytes <= pin; + bytes += inc) { + if (mlock(can_mlock, bytes)) + break; + + *can_mlock = bytes; + __sync_synchronize(); + } + } + __igt_waitchildren(); + } + + intel_purge_vm_caches(fd); + + igt_require(*can_mlock > 64 << 20); + *can_mlock -= 64 << 20; + + igt_debug("Locking %'"PRIu64" B (%'"PRIu64" MiB)\n", + *can_mlock, *can_mlock >> 20); + igt_assert(!mlock(can_mlock, *can_mlock)); + igt_info("Locked %'"PRIu64" B (%'"PRIu64" MiB)\n", + *can_mlock, *can_mlock >> 20); + + intel_purge_vm_caches(fd); + igt_system_suspend_autoresume(mode, SUSPEND_TEST_NONE); + + munmap(can_mlock, pin); +} + static void test_forcewake(int fd, bool hibernate) { @@ -199,6 +250,9 @@ igt_main igt_subtest("sysfs-reader") test_sysfs_reader(false); + igt_subtest("shrink") + test_shrink(fd, SUSPEND_STATE_MEM); + igt_subtest("forcewake") test_forcewake(fd, false);