diff mbox

[igt] igt/perf_pmu: Disable all cpus

Message ID 20180220214008.6544-1-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson Feb. 20, 2018, 9:40 p.m. UTC
Rather than iteratively disable and then immediately reenable a CPU,
turn off each in turn, forcing the PMU events onto the next CPU without
allowing them to retreat back to CPU0 after the first. If this fails,
immediately reboot the system.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 lib/Makefile.sources |  2 ++
 lib/igt_sysrq.c      | 20 ++++++++++++++++++
 lib/igt_sysrq.h      | 30 +++++++++++++++++++++++++++
 lib/meson.build      |  2 ++
 tests/perf_pmu.c     | 57 ++++++++++++++++++++++++++++++++++------------------
 5 files changed, 92 insertions(+), 19 deletions(-)
 create mode 100644 lib/igt_sysrq.c
 create mode 100644 lib/igt_sysrq.h

Comments

Tvrtko Ursulin Feb. 21, 2018, 9:11 a.m. UTC | #1
On 20/02/2018 21:40, Chris Wilson wrote:
> Rather than iteratively disable and then immediately reenable a CPU,
> turn off each in turn, forcing the PMU events onto the next CPU without
> allowing them to retreat back to CPU0 after the first. If this fails,

Hm, interesting and I think it possibly makes sense to test both 
migration patterns.

> immediately reboot the system.

Yes, we already agreed on this one and I was planning to do it so thanks 
for beating me to it.

> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>   lib/Makefile.sources |  2 ++
>   lib/igt_sysrq.c      | 20 ++++++++++++++++++
>   lib/igt_sysrq.h      | 30 +++++++++++++++++++++++++++
>   lib/meson.build      |  2 ++
>   tests/perf_pmu.c     | 57 ++++++++++++++++++++++++++++++++++------------------
>   5 files changed, 92 insertions(+), 19 deletions(-)
>   create mode 100644 lib/igt_sysrq.c
>   create mode 100644 lib/igt_sysrq.h
> 
> diff --git a/lib/Makefile.sources b/lib/Makefile.sources
> index 86fbfeef..e4a9b059 100644
> --- a/lib/Makefile.sources
> +++ b/lib/Makefile.sources
> @@ -33,6 +33,8 @@ lib_source_list =	 	\
>   	igt_stats.h		\
>   	igt_sysfs.c		\
>   	igt_sysfs.h		\
> +	igt_sysrq.c		\
> +	igt_sysrq.h		\
>   	igt_x86.h		\
>   	igt_x86.c		\
>   	igt_vgem.c		\
> diff --git a/lib/igt_sysrq.c b/lib/igt_sysrq.c
> new file mode 100644
> index 00000000..32fb4a39
> --- /dev/null
> +++ b/lib/igt_sysrq.c
> @@ -0,0 +1,20 @@
> +#include <unistd.h>
> +#include <fcntl.h>
> +#include <stdlib.h>
> +#include <sys/reboot.h>
> +
> +#include "igt_sysrq.h"
> +
> +void igt_sysrq_reboot(void)
> +{
> +	sync();
> +
> +	/* Try to be nice at first, and if that fails pull the trigger */
> +	if (reboot(RB_AUTOBOOT)) {
> +		int fd = open("/proc/sysrq-trigger", O_WRONLY);
> +		write(fd, "b", 2);
> +		close(fd);
> +	}
> +
> +	abort();
> +}
> diff --git a/lib/igt_sysrq.h b/lib/igt_sysrq.h
> new file mode 100644
> index 00000000..422473d2
> --- /dev/null
> +++ b/lib/igt_sysrq.h
> @@ -0,0 +1,30 @@
> +/*
> + * Copyright © 2018 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef __IGT_SYSRQ_H__
> +#define __IGT_SYSRQ_H__
> +
> +void igt_sysrq_reboot(void) __attribute__((noreturn));
> +
> +#endif /* __IGT_SYSRQ_H__ */
> diff --git a/lib/meson.build b/lib/meson.build
> index 94ea0799..2c611348 100644
> --- a/lib/meson.build
> +++ b/lib/meson.build
> @@ -21,6 +21,7 @@ lib_headers = [
>   	'igt_stats.h',
>   	'igt_syncobj.h',
>   	'igt_sysfs.h',
> +	'igt_sysrq.h',
>   	'igt_x86.h',
>   	'igt_vgem.h',
>   	'instdone.h',
> @@ -67,6 +68,7 @@ lib_sources = [
>   	'igt_stats.c',
>   	'igt_syncobj.c',
>   	'igt_sysfs.c',
> +	'igt_sysrq.c',
>   	'igt_vgem.c',
>   	'igt_x86.c',
>   	'instdone.c',
> diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
> index 7fab73e2..1421fd9a 100644
> --- a/tests/perf_pmu.c
> +++ b/tests/perf_pmu.c
> @@ -41,6 +41,7 @@
>   #include "igt_core.h"
>   #include "igt_perf.h"
>   #include "igt_sysfs.h"
> +#include "igt_sysrq.h"
>   #include "igt_pm.h"
>   #include "sw_sync.h"
>   
> @@ -957,6 +958,16 @@ static bool cpu0_hotplug_support(void)
>   	return access("/sys/devices/system/cpu/cpu0/online", W_OK) == 0;
>   }
>   
> +static int open_cpu_online(int cpu)
> +{
> +	char name[128];
> +
> +	igt_assert_lt(snprintf(name, sizeof(name),
> +			       "/sys/devices/system/cpu/cpu%d/online",
> +			       cpu), sizeof(name));
> +	return open(name, O_WRONLY);
> +}
> +
>   static void cpu_hotplug(int gem_fd)
>   {
>   	igt_spin_t *spin[2];
> @@ -988,35 +999,43 @@ static void cpu_hotplug(int gem_fd)
>   	 */
>   	igt_fork(child, 1) {
>   		int cpu = 0;
> +		int cpufd;
> +		int err;
>   
>   		close(link[0]);
>   
> +		/* Offline each cpu in turn */
>   		for (;;) {
> -			char name[128];
> -			int cpufd;
> -
> -			igt_assert_lt(snprintf(name, sizeof(name),
> -					       "/sys/devices/system/cpu/cpu%d/online",
> -					       cpu), sizeof(name));
> -			cpufd = open(name, O_WRONLY);
> -			if (cpufd == -1) {
> -				igt_assert(cpu > 0);
> -				/*
> -				 * Signal parent that we cycled through all
> -				 * CPUs and we are done.
> -				 */
> -				igt_assert_eq(write(link[1], "*", 1), 1);
> +			cpufd = open_cpu_online(cpu);
> +			igt_assert(cpufd != -1);
> +
> +			err = write(cpufd, "0", 2);
> +			close(cpufd);
> +			if (err < 0)
>   				break;

Keep off-lining until no more CPUs to offline? I had to try it! :) Ok, 
last one will fail to offline. But I think it needs a comment.

> -			}
>   
> -			/* Offline followed by online a CPU. */
> -			igt_assert_eq(write(cpufd, "0", 2), 2);
>   			usleep(1e6);
> -			igt_assert_eq(write(cpufd, "1", 2), 2);
> +			cpu++;
> +		}
>   
> +		/* Then bring them back online */
> +		while (cpu--) {
> +			cpufd = open_cpu_online(cpu);
> +			err = write(cpufd, "1", 2);
>   			close(cpufd);

Need to online in the same order or the PMU will stay on some higher CPU 
making the subsequent tests fail. Or I need to improve the helpers to 
hunt for the correct CPU, as perf tool does.

> -			cpu++;
> +
> +			if (err < 0) {
> +				igt_warn("Failed to bring CPU%d back online\n",
> +					 cpu);
> +				igt_sysrq_reboot(); > +			}
>   		}
> +
> +		/*
> +		 * Signal parent that we cycled through all
> +		 * CPUs and we are done.
> +		 */
> +		igt_assert_eq(write(link[1], "*", 1), 1);
>   	}
>   
>   	close(link[1]);
> 

Regards,

Tvrtko
Chris Wilson Feb. 21, 2018, 9:17 a.m. UTC | #2
Quoting Tvrtko Ursulin (2018-02-21 09:11:15)
> 
> On 20/02/2018 21:40, Chris Wilson wrote:
> > Rather than iteratively disable and then immediately reenable a CPU,
> > turn off each in turn, forcing the PMU events onto the next CPU without
> > allowing them to retreat back to CPU0 after the first. If this fails,
> 
> Hm, interesting and I think it possibly makes sense to test both 
> migration patterns.

Yup.

> > @@ -988,35 +999,43 @@ static void cpu_hotplug(int gem_fd)
> >        */
> >       igt_fork(child, 1) {
> >               int cpu = 0;
> > +             int cpufd;
> > +             int err;
> >   
> >               close(link[0]);
> >   
> > +             /* Offline each cpu in turn */
> >               for (;;) {
> > -                     char name[128];
> > -                     int cpufd;
> > -
> > -                     igt_assert_lt(snprintf(name, sizeof(name),
> > -                                            "/sys/devices/system/cpu/cpu%d/online",
> > -                                            cpu), sizeof(name));
> > -                     cpufd = open(name, O_WRONLY);
> > -                     if (cpufd == -1) {
> > -                             igt_assert(cpu > 0);
> > -                             /*
> > -                              * Signal parent that we cycled through all
> > -                              * CPUs and we are done.
> > -                              */
> > -                             igt_assert_eq(write(link[1], "*", 1), 1);
> > +                     cpufd = open_cpu_online(cpu);
> > +                     igt_assert(cpufd != -1);
> > +
> > +                     err = write(cpufd, "0", 2);
> > +                     close(cpufd);
> > +                     if (err < 0)
> >                               break;
> 
> Keep off-lining until no more CPUs to offline? I had to try it! :) Ok, 
> last one will fail to offline. But I think it needs a comment.

I thought that was a fun trick to try and offline the last cpu :)

> > -                     }
> >   
> > -                     /* Offline followed by online a CPU. */
> > -                     igt_assert_eq(write(cpufd, "0", 2), 2);
> >                       usleep(1e6);
> > -                     igt_assert_eq(write(cpufd, "1", 2), 2);
> > +                     cpu++;
> > +             }
> >   
> > +             /* Then bring them back online */
> > +             while (cpu--) {
> > +                     cpufd = open_cpu_online(cpu);
> > +                     err = write(cpufd, "1", 2);
> >                       close(cpufd);
> 
> Need to online in the same order or the PMU will stay on some higher CPU 
> making the subsequent tests fail. Or I need to improve the helpers to 
> hunt for the correct CPU, as perf tool does.

Ah. I was expecting everytime we onlined a new cpu, the notifier would
move the pmu. Why do the subsequent tests fail? In my naivety I expected
one CPU is as good as any other for pmu. Do we need to put a trivial
test inside the online/offline loops?
-Chris
Tvrtko Ursulin Feb. 21, 2018, 9:24 a.m. UTC | #3
On 21/02/2018 09:17, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-02-21 09:11:15)
>>
>> On 20/02/2018 21:40, Chris Wilson wrote:
>>> Rather than iteratively disable and then immediately reenable a CPU,
>>> turn off each in turn, forcing the PMU events onto the next CPU without
>>> allowing them to retreat back to CPU0 after the first. If this fails,
>>
>> Hm, interesting and I think it possibly makes sense to test both
>> migration patterns.
> 
> Yup.
> 
>>> @@ -988,35 +999,43 @@ static void cpu_hotplug(int gem_fd)
>>>         */
>>>        igt_fork(child, 1) {
>>>                int cpu = 0;
>>> +             int cpufd;
>>> +             int err;
>>>    
>>>                close(link[0]);
>>>    
>>> +             /* Offline each cpu in turn */
>>>                for (;;) {
>>> -                     char name[128];
>>> -                     int cpufd;
>>> -
>>> -                     igt_assert_lt(snprintf(name, sizeof(name),
>>> -                                            "/sys/devices/system/cpu/cpu%d/online",
>>> -                                            cpu), sizeof(name));
>>> -                     cpufd = open(name, O_WRONLY);
>>> -                     if (cpufd == -1) {
>>> -                             igt_assert(cpu > 0);
>>> -                             /*
>>> -                              * Signal parent that we cycled through all
>>> -                              * CPUs and we are done.
>>> -                              */
>>> -                             igt_assert_eq(write(link[1], "*", 1), 1);
>>> +                     cpufd = open_cpu_online(cpu);
>>> +                     igt_assert(cpufd != -1);
>>> +
>>> +                     err = write(cpufd, "0", 2);
>>> +                     close(cpufd);
>>> +                     if (err < 0)
>>>                                break;
>>
>> Keep off-lining until no more CPUs to offline? I had to try it! :) Ok,
>> last one will fail to offline. But I think it needs a comment.
> 
> I thought that was a fun trick to try and offline the last cpu :)
> 
>>> -                     }
>>>    
>>> -                     /* Offline followed by online a CPU. */
>>> -                     igt_assert_eq(write(cpufd, "0", 2), 2);
>>>                        usleep(1e6);
>>> -                     igt_assert_eq(write(cpufd, "1", 2), 2);
>>> +                     cpu++;
>>> +             }
>>>    
>>> +             /* Then bring them back online */
>>> +             while (cpu--) {
>>> +                     cpufd = open_cpu_online(cpu);
>>> +                     err = write(cpufd, "1", 2);
>>>                        close(cpufd);
>>
>> Need to online in the same order or the PMU will stay on some higher CPU
>> making the subsequent tests fail. Or I need to improve the helpers to
>> hunt for the correct CPU, as perf tool does.
> 
> Ah. I was expecting everytime we onlined a new cpu, the notifier would
> move the pmu. Why do the subsequent tests fail? In my naivety I expected
> one CPU is as good as any other for pmu. Do we need to put a trivial
> test inside the online/offline loops?

It only moves it to the first available CPU once it gets kicked out from 
the one it was on.

So with the above pattern of offline all and online in reverse, it will 
happily stay on the last CPU. And IGTs only try to open on the first CPU 
so it fails to open it from then on.

As I said, it would be easy to support opening our PMU regardless on 
which CPU it currently lives on by wrapping the "try the next cpu" logic 
in the perf open wrappers. In kernel perf tool for instance does that 
already.

I could also change i915 to always try to move to CPU0 if it is 
available, on any online events. But I am not sure that's in the spirit 
of things.

I think making IGT perf open wrapper more robust makes most sense.

Regards,

Tvrtko
Chris Wilson Feb. 21, 2018, 9:30 a.m. UTC | #4
Quoting Tvrtko Ursulin (2018-02-21 09:24:46)
> As I said, it would be easy to support opening our PMU regardless on 
> which CPU it currently lives on by wrapping the "try the next cpu" logic 
> in the perf open wrappers. In kernel perf tool for instance does that 
> already.
> 
> I could also change i915 to always try to move to CPU0 if it is 
> available, on any online events. But I am not sure that's in the spirit 
> of things.

Ok, I got lost at why i915 perf requires a specific CPU, it is just that
is part of the perf ABI, right?

> I think making IGT perf open wrapper more robust makes most sense.

Yeah, if it's part of the ABI we shouldn't just expect CPU0. Then
throwing a test of the robust perf open in the midst of online/offline
seems sensible.

I guess we will will end up with a perf open before and after each
stage, and keep the pmu fd around until the end.
-Chris
diff mbox

Patch

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index 86fbfeef..e4a9b059 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -33,6 +33,8 @@  lib_source_list =	 	\
 	igt_stats.h		\
 	igt_sysfs.c		\
 	igt_sysfs.h		\
+	igt_sysrq.c		\
+	igt_sysrq.h		\
 	igt_x86.h		\
 	igt_x86.c		\
 	igt_vgem.c		\
diff --git a/lib/igt_sysrq.c b/lib/igt_sysrq.c
new file mode 100644
index 00000000..32fb4a39
--- /dev/null
+++ b/lib/igt_sysrq.c
@@ -0,0 +1,20 @@ 
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/reboot.h>
+
+#include "igt_sysrq.h"
+
+void igt_sysrq_reboot(void)
+{
+	sync();
+
+	/* Try to be nice at first, and if that fails pull the trigger */
+	if (reboot(RB_AUTOBOOT)) {
+		int fd = open("/proc/sysrq-trigger", O_WRONLY);
+		write(fd, "b", 2);
+		close(fd);
+	}
+
+	abort();
+}
diff --git a/lib/igt_sysrq.h b/lib/igt_sysrq.h
new file mode 100644
index 00000000..422473d2
--- /dev/null
+++ b/lib/igt_sysrq.h
@@ -0,0 +1,30 @@ 
+/*
+ * Copyright © 2018 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __IGT_SYSRQ_H__
+#define __IGT_SYSRQ_H__
+
+void igt_sysrq_reboot(void) __attribute__((noreturn));
+
+#endif /* __IGT_SYSRQ_H__ */
diff --git a/lib/meson.build b/lib/meson.build
index 94ea0799..2c611348 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -21,6 +21,7 @@  lib_headers = [
 	'igt_stats.h',
 	'igt_syncobj.h',
 	'igt_sysfs.h',
+	'igt_sysrq.h',
 	'igt_x86.h',
 	'igt_vgem.h',
 	'instdone.h',
@@ -67,6 +68,7 @@  lib_sources = [
 	'igt_stats.c',
 	'igt_syncobj.c',
 	'igt_sysfs.c',
+	'igt_sysrq.c',
 	'igt_vgem.c',
 	'igt_x86.c',
 	'instdone.c',
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
index 7fab73e2..1421fd9a 100644
--- a/tests/perf_pmu.c
+++ b/tests/perf_pmu.c
@@ -41,6 +41,7 @@ 
 #include "igt_core.h"
 #include "igt_perf.h"
 #include "igt_sysfs.h"
+#include "igt_sysrq.h"
 #include "igt_pm.h"
 #include "sw_sync.h"
 
@@ -957,6 +958,16 @@  static bool cpu0_hotplug_support(void)
 	return access("/sys/devices/system/cpu/cpu0/online", W_OK) == 0;
 }
 
+static int open_cpu_online(int cpu)
+{
+	char name[128];
+
+	igt_assert_lt(snprintf(name, sizeof(name),
+			       "/sys/devices/system/cpu/cpu%d/online",
+			       cpu), sizeof(name));
+	return open(name, O_WRONLY);
+}
+
 static void cpu_hotplug(int gem_fd)
 {
 	igt_spin_t *spin[2];
@@ -988,35 +999,43 @@  static void cpu_hotplug(int gem_fd)
 	 */
 	igt_fork(child, 1) {
 		int cpu = 0;
+		int cpufd;
+		int err;
 
 		close(link[0]);
 
+		/* Offline each cpu in turn */
 		for (;;) {
-			char name[128];
-			int cpufd;
-
-			igt_assert_lt(snprintf(name, sizeof(name),
-					       "/sys/devices/system/cpu/cpu%d/online",
-					       cpu), sizeof(name));
-			cpufd = open(name, O_WRONLY);
-			if (cpufd == -1) {
-				igt_assert(cpu > 0);
-				/*
-				 * Signal parent that we cycled through all
-				 * CPUs and we are done.
-				 */
-				igt_assert_eq(write(link[1], "*", 1), 1);
+			cpufd = open_cpu_online(cpu);
+			igt_assert(cpufd != -1);
+
+			err = write(cpufd, "0", 2);
+			close(cpufd);
+			if (err < 0)
 				break;
-			}
 
-			/* Offline followed by online a CPU. */
-			igt_assert_eq(write(cpufd, "0", 2), 2);
 			usleep(1e6);
-			igt_assert_eq(write(cpufd, "1", 2), 2);
+			cpu++;
+		}
 
+		/* Then bring them back online */
+		while (cpu--) {
+			cpufd = open_cpu_online(cpu);
+			err = write(cpufd, "1", 2);
 			close(cpufd);
-			cpu++;
+
+			if (err < 0) {
+				igt_warn("Failed to bring CPU%d back online\n",
+					 cpu);
+				igt_sysrq_reboot();
+			}
 		}
+
+		/*
+		 * Signal parent that we cycled through all
+		 * CPUs and we are done.
+		 */
+		igt_assert_eq(write(link[1], "*", 1), 1);
 	}
 
 	close(link[1]);