diff mbox series

selftests/timers/posix_timers: reimplement check_timer_distribution()

Message ID 20240406150950.GA3060@redhat.com (mailing list archive)
State New
Headers show
Series selftests/timers/posix_timers: reimplement check_timer_distribution() | expand

Commit Message

Oleg Nesterov April 6, 2024, 3:09 p.m. UTC
Thomas says:

	The signal distribution test has a tendency to hang for a long
	time as the signal delivery is not really evenly distributed. In
	fact it might never be distributed across all threads ever in
	the way it is written.

To me even the

	This primarily tests that the kernel does not favour any one.

comment doesn't look right. The kernel does favour a thread which hits
the timer interrupt when CLOCK_PROCESS_CPUTIME_ID expires.

The new version simply checks that the group leader sleeping in join()
never receives SIGALRM, cpu_timer_fire() should always send the signal
to the thread which burns cpu.

Without the commit bcb7ee79029d ("posix-timers: Prefer delivery of signals
to the current thread") the test-case fails immediately, the very 1st tick
wakes the leader up. Otherwise it quickly succeeds after 100 ticks.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 tools/testing/selftests/timers/posix_timers.c | 102 ++++++++----------
 1 file changed, 46 insertions(+), 56 deletions(-)

Comments

Oleg Nesterov April 6, 2024, 3:10 p.m. UTC | #1
Dmitry, Thomas,

To simplify the review I've attached the code with this patch applied below.

Yes, this changes the "semantics" of check_timer_distribution(), perhaps it
should be renamed.

But I do not see a better approach, and in fact I think that

	Test that all running threads _eventually_ receive CLOCK_PROCESS_CPUTIME_ID

is the wrong goal.

Do you agree?

Oleg.
-------------------------------------------------------------------------------

static pthread_t ctd_thread;
static volatile int ctd_count, ctd_failed;

static void ctd_sighandler(int sig)
{
	if (pthread_self() != ctd_thread)
		ctd_failed = 1;
	ctd_count--;
}

static void *ctd_thread_func(void *arg)
{
	struct itimerspec val = {
		.it_value.tv_sec = 0,
		.it_value.tv_nsec = 1000 * 1000,
		.it_interval.tv_sec = 0,
		.it_interval.tv_nsec = 1000 * 1000,
	};
	timer_t id;

	/* 1/10 seconds to ensure the leader sleeps */
	usleep(10000);

	ctd_count = 100;
	if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id))
		return "Can't create timer";
	if (timer_settime(id, 0, &val, NULL))
		return "Can't set timer";

	while (ctd_count > 0 && !ctd_failed)
		;

	if (timer_delete(id))
		return "Can't delete timer";

	return NULL;
}

/*
 * Test that only the running thread receives the timer signal.
 */
static int check_timer_distribution(void)
{
	const char *errmsg;

	signal(SIGALRM, ctd_sighandler);

	errmsg = "Can't create thread";
	if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL))
		goto err;

	errmsg = "Can't join thread";
	if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg)
		goto err;

	if (ctd_failed)
		ksft_test_result_skip("No signal distribution. Assuming old kernel\n");
	else
		ksft_test_result_pass("check signal distribution\n");

	return 0;
err:
	ksft_print_msg(errmsg);
	return -1;
}
Thomas Gleixner April 6, 2024, 10 p.m. UTC | #2
On Sat, Apr 06 2024 at 17:10, Oleg Nesterov wrote:
> Yes, this changes the "semantics" of check_timer_distribution(), perhaps it
> should be renamed.

Definitely.

> But I do not see a better approach, and in fact I think that
>
> 	Test that all running threads _eventually_ receive CLOCK_PROCESS_CPUTIME_ID
>
> is the wrong goal.
>
> Do you agree?

No argument from my side. All we can test is that the leader is not
woken up.

Thanks,

        tglx
Dmitry Vyukov April 8, 2024, 8:30 a.m. UTC | #3
On Sat, 6 Apr 2024 at 17:12, Oleg Nesterov <oleg@redhat.com> wrote:
>
> Dmitry, Thomas,
>
> To simplify the review I've attached the code with this patch applied below.
>
> Yes, this changes the "semantics" of check_timer_distribution(), perhaps it
> should be renamed.
>
> But I do not see a better approach, and in fact I think that
>
>         Test that all running threads _eventually_ receive CLOCK_PROCESS_CPUTIME_ID
>
> is the wrong goal.
>
> Do you agree?
>
> Oleg.
> -------------------------------------------------------------------------------
>
> static pthread_t ctd_thread;
> static volatile int ctd_count, ctd_failed;
>
> static void ctd_sighandler(int sig)
> {
>         if (pthread_self() != ctd_thread)
>                 ctd_failed = 1;
>         ctd_count--;
> }
>
> static void *ctd_thread_func(void *arg)
> {
>         struct itimerspec val = {
>                 .it_value.tv_sec = 0,
>                 .it_value.tv_nsec = 1000 * 1000,
>                 .it_interval.tv_sec = 0,
>                 .it_interval.tv_nsec = 1000 * 1000,
>         };
>         timer_t id;
>
>         /* 1/10 seconds to ensure the leader sleeps */
>         usleep(10000);
>
>         ctd_count = 100;
>         if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id))
>                 return "Can't create timer";
>         if (timer_settime(id, 0, &val, NULL))
>                 return "Can't set timer";
>
>         while (ctd_count > 0 && !ctd_failed)
>                 ;
>
>         if (timer_delete(id))
>                 return "Can't delete timer";
>
>         return NULL;
> }
>
> /*
>  * Test that only the running thread receives the timer signal.
>  */
> static int check_timer_distribution(void)
> {
>         const char *errmsg;
>
>         signal(SIGALRM, ctd_sighandler);
>
>         errmsg = "Can't create thread";
>         if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL))
>                 goto err;
>
>         errmsg = "Can't join thread";
>         if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg)
>                 goto err;
>
>         if (ctd_failed)
>                 ksft_test_result_skip("No signal distribution. Assuming old kernel\n");

Shouldn't the test fail here? The goal of a test is to fail when
things don't work.
I don't see any other ksft_test_result_fail() calls, and it does not
look that the test will hang on incorrect distribution.


>         else
>                 ksft_test_result_pass("check signal distribution\n");
>
>         return 0;
> err:
>         ksft_print_msg(errmsg);
>         return -1;
> }
>
Thomas Gleixner April 8, 2024, 10:01 a.m. UTC | #4
On Mon, Apr 08 2024 at 10:30, Dmitry Vyukov wrote:
> On Sat, 6 Apr 2024 at 17:12, Oleg Nesterov <oleg@redhat.com> wrote:
>>         if (ctd_failed)
>>                 ksft_test_result_skip("No signal distribution. Assuming old kernel\n");
>
> Shouldn't the test fail here? The goal of a test is to fail when
> things don't work.
> I don't see any other ksft_test_result_fail() calls, and it does not
> look that the test will hang on incorrect distribution.

I have a fixup for older kernels. I'll get to Olegs patch and the fixup
later today.

Thanks,

        tglx
Oleg Nesterov April 8, 2024, 10:26 a.m. UTC | #5
On 04/08, Dmitry Vyukov wrote:
>
> >
> >         if (ctd_failed)
> >                 ksft_test_result_skip("No signal distribution. Assuming old kernel\n");
>
> Shouldn't the test fail here? The goal of a test is to fail when
> things don't work.

I've copied this from the previous patch from Thomas, I am fine
either way.

> I don't see any other ksft_test_result_fail() calls, and it does not
> look that the test will hang on incorrect distribution.

Yes, it should never hang.

Thanks,

Oleg.
Oleg Nesterov April 8, 2024, 6:49 p.m. UTC | #6
On 04/08, Oleg Nesterov wrote:
>
> On 04/08, Dmitry Vyukov wrote:
> >
> > >
> > >         if (ctd_failed)
> > >                 ksft_test_result_skip("No signal distribution. Assuming old kernel\n");
> >
> > Shouldn't the test fail here? The goal of a test is to fail when
> > things don't work.
>
> I've copied this from the previous patch from Thomas, I am fine
> either way.
>
> > I don't see any other ksft_test_result_fail() calls, and it does not
> > look that the test will hang on incorrect distribution.
>
> Yes, it should never hang.

Forgot to say...

To me this test should simply do

	ksft_test_result(!ctd_failed, "check signal distribution\n");
	return 0;

but I am not familiar with tools/testing/selftests/ and I am not sure
I understand the last email from Thomas.

I agree with whatever you and Thomas decide.

Oleg.
Thomas Gleixner April 8, 2024, 10:17 p.m. UTC | #7
On Mon, Apr 08 2024 at 20:49, Oleg Nesterov wrote:
> To me this test should simply do
>
> 	ksft_test_result(!ctd_failed, "check signal distribution\n");
> 	return 0;

Right.

> but I am not familiar with tools/testing/selftests/ and I am not sure
> I understand the last email from Thomas.

The discussion started about running new tests on older kernels. As this
is a feature and not a bug fix that obviously fails on older kernels.

So something like the uncompiled below should work.

Thanks,

        tglx
---
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -184,80 +184,83 @@ static int check_timer_create(int which)
 	return 0;
 }
 
-int remain;
-__thread int got_signal;
+static pthread_t ctd_thread;
+static volatile int ctd_count, ctd_failed;
 
-static void *distribution_thread(void *arg)
+static void ctd_sighandler(int sig)
 {
-	while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
-	return NULL;
-}
-
-static void distribution_handler(int nr)
-{
-	if (!__atomic_exchange_n(&got_signal, 1, __ATOMIC_RELAXED))
-		__atomic_fetch_sub(&remain, 1, __ATOMIC_RELAXED);
+	if (pthread_self() != ctd_thread)
+		ctd_failed = 1;
+	ctd_count--;
 }
 
-/*
- * Test that all running threads _eventually_ receive CLOCK_PROCESS_CPUTIME_ID
- * timer signals. This primarily tests that the kernel does not favour any one.
- */
-static int check_timer_distribution(void)
+static void *ctd_thread_func(void *arg)
 {
-	int err, i;
-	timer_t id;
-	const int nthreads = 10;
-	pthread_t threads[nthreads];
 	struct itimerspec val = {
 		.it_value.tv_sec = 0,
 		.it_value.tv_nsec = 1000 * 1000,
 		.it_interval.tv_sec = 0,
 		.it_interval.tv_nsec = 1000 * 1000,
 	};
+	timer_t id;
 
-	remain = nthreads + 1;  /* worker threads + this thread */
-	signal(SIGALRM, distribution_handler);
-	err = timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id);
-	if (err < 0) {
-		ksft_perror("Can't create timer");
-		return -1;
-	}
-	err = timer_settime(id, 0, &val, NULL);
-	if (err < 0) {
-		ksft_perror("Can't set timer");
-		return -1;
-	}
+	/* 1/10 seconds to ensure the leader sleeps */
+	usleep(10000);
 
-	for (i = 0; i < nthreads; i++) {
-		err = pthread_create(&threads[i], NULL, distribution_thread,
-				     NULL);
-		if (err) {
-			ksft_print_msg("Can't create thread: %s (%d)\n",
-				       strerror(errno), errno);
-			return -1;
-		}
-	}
+	ctd_count = 100;
+	if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id))
+		return "Can't create timer";
+	if (timer_settime(id, 0, &val, NULL))
+		return "Can't set timer";
 
-	/* Wait for all threads to receive the signal. */
-	while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
+	while (ctd_count > 0 && !ctd_failed)
+		;
 
-	for (i = 0; i < nthreads; i++) {
-		err = pthread_join(threads[i], NULL);
-		if (err) {
-			ksft_print_msg("Can't join thread: %s (%d)\n",
-				       strerror(errno), errno);
-			return -1;
-		}
-	}
+	if (timer_delete(id))
+		return "Can't delete timer";
 
-	if (timer_delete(id)) {
-		ksft_perror("Can't delete timer");
+	return NULL;
+}
+
+static bool check_kernel_version(unsigned int min_major, unsigned int min_minor)
+{
+	unsigned int major, minor;
+	struct utsname info;
+
+	uname(&info);
+	if (sscanf(info.release, "%u.%u.", &major, &minor) != 2)
+		ksft_exit_fail();
+	return major > min_major || (major == min_major && minor >= min_minor);
+}
+
+/*
+ * Test that only the running thread receives the timer signal.
+ */
+static int check_timer_distribution(void)
+{
+	const char *errmsg;
+
+	if (!check_kernel_version(6, 3)) {
+		ksft_test_result_skip("check signal distribution (old kernel)\n");
 		return 0;
 	}
 
-	ksft_test_result_pass("check_timer_distribution\n");
+	signal(SIGALRM, ctd_sighandler);
+
+	errmsg = "Can't create thread";
+	if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL))
+		goto err;
+
+	errmsg = "Can't join thread";
+	if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg)
+		goto err;
+
+	ksft_test_result(!ctd_failed, "check signal distribution\n");
 	return 0;
+
+err:
+	ksft_print_msg(errmsg);
+	return -1;
 }
 
 int main(int argc, char **argv)
Oleg Nesterov April 9, 2024, 11:10 a.m. UTC | #8
On 04/09, Thomas Gleixner wrote:
>
> The discussion started about running new tests on older kernels. As this
> is a feature and not a bug fix that obviously fails on older kernels.

OK, I see... please see below.

> So something like the uncompiled below should work.

Hmm... this patch doesn't apply to Linus's tree...

It seems that this is because in your tree check_timer_distribution() does

	if (timer_delete(id)) {
		ksft_perror("Can't delete timer");
		return 0;
	}

while in Linus's tree it returns -1 if timer_delete() fails. Nevermind.

Thomas, I am almost shy to continue this discussion and waste your time ;)
But ...

> +static bool check_kernel_version(unsigned int min_major, unsigned int min_minor)
> +{
> +	unsigned int major, minor;
> +	struct utsname info;
> +
> +	uname(&info);
> +	if (sscanf(info.release, "%u.%u.", &major, &minor) != 2)
> +		ksft_exit_fail();
> +	return major > min_major || (major == min_major && minor >= min_minor);
> +}

this looks useful regardless. Perhaps it should be moved into
tools/testing/selftests/kselftest.h as ksft_ck_kernel_version() ?

> +static int check_timer_distribution(void)
> +{
> +	const char *errmsg;
> +
> +	if (!check_kernel_version(6, 3)) {
> +		ksft_test_result_skip("check signal distribution (old kernel)\n");
>  		return 0;

...

> +	ksft_test_result(!ctd_failed, "check signal distribution\n");

Perhaps

	if (!ctd_failed)
		ksft_test_result_pass("check signal distribution\n");
	else if (check_kernel_version(6, 3))
		ksft_test_result_fail("check signal distribution\n");
	else
		ksft_test_result_skip("check signal distribution (old kernel)\n");

makes more sense?

This way it can be used on the older kernels with bcb7ee79029d backported.

Oleg.
Dmitry Vyukov April 9, 2024, 11:45 a.m. UTC | #9
On Tue, 9 Apr 2024 at 13:12, Oleg Nesterov <oleg@redhat.com> wrote:
>
> On 04/09, Thomas Gleixner wrote:
> >
> > The discussion started about running new tests on older kernels. As this
> > is a feature and not a bug fix that obviously fails on older kernels.
>
> OK, I see... please see below.
>
> > So something like the uncompiled below should work.
>
> Hmm... this patch doesn't apply to Linus's tree...
>
> It seems that this is because in your tree check_timer_distribution() does
>
>         if (timer_delete(id)) {
>                 ksft_perror("Can't delete timer");
>                 return 0;
>         }
>
> while in Linus's tree it returns -1 if timer_delete() fails. Nevermind.
>
> Thomas, I am almost shy to continue this discussion and waste your time ;)
> But ...
>
> > +static bool check_kernel_version(unsigned int min_major, unsigned int min_minor)
> > +{
> > +     unsigned int major, minor;
> > +     struct utsname info;
> > +
> > +     uname(&info);
> > +     if (sscanf(info.release, "%u.%u.", &major, &minor) != 2)
> > +             ksft_exit_fail();
> > +     return major > min_major || (major == min_major && minor >= min_minor);
> > +}
>
> this looks useful regardless. Perhaps it should be moved into
> tools/testing/selftests/kselftest.h as ksft_ck_kernel_version() ?
>
> > +static int check_timer_distribution(void)
> > +{
> > +     const char *errmsg;
> > +
> > +     if (!check_kernel_version(6, 3)) {
> > +             ksft_test_result_skip("check signal distribution (old kernel)\n");
> >               return 0;
>
> ...
>
> > +     ksft_test_result(!ctd_failed, "check signal distribution\n");
>
> Perhaps
>
>         if (!ctd_failed)
>                 ksft_test_result_pass("check signal distribution\n");
>         else if (check_kernel_version(6, 3))
>                 ksft_test_result_fail("check signal distribution\n");
>         else
>                 ksft_test_result_skip("check signal distribution (old kernel)\n");
>
> makes more sense?

This looks even better!

> This way it can be used on the older kernels with bcb7ee79029d backported.
>
> Oleg.
>
Thomas Gleixner April 9, 2024, 12:02 p.m. UTC | #10
On Tue, Apr 09 2024 at 13:10, Oleg Nesterov wrote:
> On 04/09, Thomas Gleixner wrote:
> It seems that this is because in your tree check_timer_distribution() does
>
> 	if (timer_delete(id)) {
> 		ksft_perror("Can't delete timer");
> 		return 0;
> 	}
>
> while in Linus's tree it returns -1 if timer_delete()
> fails. Nevermind.

Ooops.

>> +static bool check_kernel_version(unsigned int min_major, unsigned int min_minor)
>> +{
>> +	unsigned int major, minor;
>> +	struct utsname info;
>> +
>> +	uname(&info);
>> +	if (sscanf(info.release, "%u.%u.", &major, &minor) != 2)
>> +		ksft_exit_fail();
>> +	return major > min_major || (major == min_major && minor >= min_minor);
>> +}
>
> this looks useful regardless. Perhaps it should be moved into
> tools/testing/selftests/kselftest.h as ksft_ck_kernel_version() ?

Makes sense.

>> +static int check_timer_distribution(void)
>> +{
>> +	const char *errmsg;
>> +
>> +	if (!check_kernel_version(6, 3)) {
>> +		ksft_test_result_skip("check signal distribution (old kernel)\n");
>>  		return 0;
>
> ..
>
>> +	ksft_test_result(!ctd_failed, "check signal distribution\n");
>
> Perhaps
>
> 	if (!ctd_failed)
> 		ksft_test_result_pass("check signal distribution\n");
> 	else if (check_kernel_version(6, 3))
> 		ksft_test_result_fail("check signal distribution\n");
> 	else
> 		ksft_test_result_skip("check signal distribution (old kernel)\n");
>
> makes more sense?
>
> This way it can be used on the older kernels with bcb7ee79029d backported.

Indeed.
Mark Brown April 11, 2024, 12:41 p.m. UTC | #11
On Sat, Apr 06, 2024 at 05:09:51PM +0200, Oleg Nesterov wrote:
> Thomas says:
> 
> 	The signal distribution test has a tendency to hang for a long
> 	time as the signal delivery is not really evenly distributed. In
> 	fact it might never be distributed across all threads ever in
> 	the way it is written.
> 
> To me even the
> 
> 	This primarily tests that the kernel does not favour any one.
> 
> comment doesn't look right. The kernel does favour a thread which hits
> the timer interrupt when CLOCK_PROCESS_CPUTIME_ID expires.
> 
> The new version simply checks that the group leader sleeping in join()
> never receives SIGALRM, cpu_timer_fire() should always send the signal
> to the thread which burns cpu.
> 
> Without the commit bcb7ee79029d ("posix-timers: Prefer delivery of signals
> to the current thread") the test-case fails immediately, the very 1st tick
> wakes the leader up. Otherwise it quickly succeeds after 100 ticks.

This has landed in -next and is causing warning spam throughout
kselftest when built with clang:

/home/broonie/git/bisect/tools/testing/selftests/kselftest.h:435:6: warning: variable 'major' is used uninitialized whenever '||' condition is true [-Wsometimes-uninitialized]
        if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
            ^~~~~~~~~~~~
/home/broonie/git/bisect/tools/testing/selftests/kselftest.h:438:9: note: uninitialized use occurs here
        return major > min_major || (major == min_major && minor >= min_minor);
               ^~~~~
/home/broonie/git/bisect/tools/testing/selftests/kselftest.h:435:6: note: remove the '||' if its condition is always false
        if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
            ^~~~~~~~~~~~~~~
/home/broonie/git/bisect/tools/testing/selftests/kselftest.h:432:20: note: initialize the variable 'major' to silence this warning
        unsigned int major, minor;
                          ^
                           = 0
Mark Brown April 11, 2024, 12:44 p.m. UTC | #12
On Sat, Apr 06, 2024 at 05:09:51PM +0200, Oleg Nesterov wrote:
> Thomas says:
> 
> 	The signal distribution test has a tendency to hang for a long
> 	time as the signal delivery is not really evenly distributed. In
> 	fact it might never be distributed across all threads ever in
> 	the way it is written.
> 
> To me even the
> 
> 	This primarily tests that the kernel does not favour any one.

Further to my previous mail it's also broken the arm64 selftest builds,
they use kselftest.h with nolibc in order to test low level
functionality mainly used by libc implementations and nolibc doesn't
implement uname():

In file included from za-fork.c:12:
../../kselftest.h:433:17: error: variable has incomplete type 'struct utsname'
        struct utsname info;
                       ^
../../kselftest.h:433:9: note: forward declaration of 'struct utsname'
        struct utsname info;
               ^
../../kselftest.h:435:6: error: call to undeclared function 'uname'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
        if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
            ^
../../kselftest.h:435:22: error: call to undeclared function 'sscanf'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
        if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
                            ^
1 warning and 3 errors generated.
Thomas Gleixner April 11, 2024, 2:17 p.m. UTC | #13
On Thu, Apr 11 2024 at 13:44, Mark Brown wrote:
> On Sat, Apr 06, 2024 at 05:09:51PM +0200, Oleg Nesterov wrote:
>> Thomas says:
>> 
>> 	The signal distribution test has a tendency to hang for a long
>> 	time as the signal delivery is not really evenly distributed. In
>> 	fact it might never be distributed across all threads ever in
>> 	the way it is written.
>> 
>> To me even the
>> 
>> 	This primarily tests that the kernel does not favour any one.
>
> Further to my previous mail it's also broken the arm64 selftest builds,
> they use kselftest.h with nolibc in order to test low level
> functionality mainly used by libc implementations and nolibc doesn't
> implement uname():
>
> In file included from za-fork.c:12:
> ../../kselftest.h:433:17: error: variable has incomplete type 'struct utsname'
>         struct utsname info;
>                        ^
> ../../kselftest.h:433:9: note: forward declaration of 'struct utsname'
>         struct utsname info;
>                ^
> ../../kselftest.h:435:6: error: call to undeclared function 'uname'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
>         if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
>             ^
> ../../kselftest.h:435:22: error: call to undeclared function 'sscanf'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
>         if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)

Grrr. Let me stare at this.
John Stultz April 11, 2024, 3:33 p.m. UTC | #14
On Thu, Apr 11, 2024 at 5:42 AM Mark Brown <broonie@kernel.org> wrote:
> On Sat, Apr 06, 2024 at 05:09:51PM +0200, Oleg Nesterov wrote:
> > Without the commit bcb7ee79029d ("posix-timers: Prefer delivery of signals
> > to the current thread") the test-case fails immediately, the very 1st tick
> > wakes the leader up. Otherwise it quickly succeeds after 100 ticks.
>
> This has landed in -next and is causing warning spam throughout
> kselftest when built with clang:
>
> /home/broonie/git/bisect/tools/testing/selftests/kselftest.h:435:6: warning: variable 'major' is used uninitialized whenever '||' condition is true [-Wsometimes-uninitialized]
>         if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
>             ^~~~~~~~~~~~
> /home/broonie/git/bisect/tools/testing/selftests/kselftest.h:438:9: note: uninitialized use occurs here
>         return major > min_major || (major == min_major && minor >= min_minor);
>                ^~~~~
> /home/broonie/git/bisect/tools/testing/selftests/kselftest.h:435:6: note: remove the '||' if its condition is always false
>         if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
>             ^~~~~~~~~~~~~~~
> /home/broonie/git/bisect/tools/testing/selftests/kselftest.h:432:20: note: initialize the variable 'major' to silence this warning
>         unsigned int major, minor;
>                           ^
>                            = 0

I hit this one too yesterday and included a fix for it here:
  https://lore.kernel.org/lkml/20240410232637.4135564-2-jstultz@google.com/

thanks
-john
Oleg Nesterov April 11, 2024, 3:50 p.m. UTC | #15
On 04/11, Thomas Gleixner wrote:
>
> On Thu, Apr 11 2024 at 13:44, Mark Brown wrote:
> >
> > Further to my previous mail it's also broken the arm64 selftest builds,
> > they use kselftest.h with nolibc in order to test low level
> > functionality mainly used by libc implementations and nolibc doesn't
> > implement uname():
> >
> > In file included from za-fork.c:12:
> > ../../kselftest.h:433:17: error: variable has incomplete type 'struct utsname'
> >         struct utsname info;
> >                        ^
> > ../../kselftest.h:433:9: note: forward declaration of 'struct utsname'
> >         struct utsname info;
> >                ^
> > ../../kselftest.h:435:6: error: call to undeclared function 'uname'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
> >         if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
> >             ^
> > ../../kselftest.h:435:22: error: call to undeclared function 'sscanf'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
> >         if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
>
> Grrr. Let me stare at this.

Damn ;)

Can't we just turn ksft_min_kernel_version() into

	static inline int ksft_min_kernel_version(unsigned int min_major,
						  unsigned int min_minor)
	{
	#ifdef NOLIBC
		return -1;
	#else
		unsigned int major, minor;
		struct utsname info;

		if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
		       ksft_exit_fail_msg("Can't parse kernel version\n");

		return major > min_major || (major == min_major && minor >= min_minor);
	#endif
	}

?

Not sure what should check_timer_distribution() do in this case, to me
ksft_test_result_fail() is fine.

Oleg.
Mark Brown April 11, 2024, 4:03 p.m. UTC | #16
On Thu, Apr 11, 2024 at 05:50:53PM +0200, Oleg Nesterov wrote:
> On 04/11, Thomas Gleixner wrote:

> > Grrr. Let me stare at this.

> Damn ;)

> Can't we just turn ksft_min_kernel_version() into

> 	static inline int ksft_min_kernel_version(unsigned int min_major,
> 						  unsigned int min_minor)
> 	{
> 	#ifdef NOLIBC
> 		return -1;
> 	#else

That'd probably work well enough here.  I think it's reasonable for
someone who wants to build a test that uses ksft_min_kernel_version()
with nolibc to figure out how to implement it, right now it's not
actually getting used with nolibc and just happens to be seen due to
being in the same header.

> Not sure what should check_timer_distribution() do in this case, to me
> ksft_test_result_fail() is fine.

I'd go with skip but yeah.
diff mbox series

Patch

diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index d49dd3ffd0d9..2586a6552737 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -184,80 +184,70 @@  static int check_timer_create(int which)
 	return 0;
 }
 
-int remain;
-__thread int got_signal;
+static pthread_t ctd_thread;
+static volatile int ctd_count, ctd_failed;
 
-static void *distribution_thread(void *arg)
+static void ctd_sighandler(int sig)
 {
-	while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
-	return NULL;
+	if (pthread_self() != ctd_thread)
+		ctd_failed = 1;
+	ctd_count--;
 }
 
-static void distribution_handler(int nr)
+static void *ctd_thread_func(void *arg)
 {
-	if (!__atomic_exchange_n(&got_signal, 1, __ATOMIC_RELAXED))
-		__atomic_fetch_sub(&remain, 1, __ATOMIC_RELAXED);
-}
-
-/*
- * Test that all running threads _eventually_ receive CLOCK_PROCESS_CPUTIME_ID
- * timer signals. This primarily tests that the kernel does not favour any one.
- */
-static int check_timer_distribution(void)
-{
-	int err, i;
-	timer_t id;
-	const int nthreads = 10;
-	pthread_t threads[nthreads];
 	struct itimerspec val = {
 		.it_value.tv_sec = 0,
 		.it_value.tv_nsec = 1000 * 1000,
 		.it_interval.tv_sec = 0,
 		.it_interval.tv_nsec = 1000 * 1000,
 	};
+	timer_t id;
 
-	remain = nthreads + 1;  /* worker threads + this thread */
-	signal(SIGALRM, distribution_handler);
-	err = timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id);
-	if (err < 0) {
-		ksft_perror("Can't create timer");
-		return -1;
-	}
-	err = timer_settime(id, 0, &val, NULL);
-	if (err < 0) {
-		ksft_perror("Can't set timer");
-		return -1;
-	}
+	/* 1/10 seconds to ensure the leader sleeps */
+	usleep(10000);
 
-	for (i = 0; i < nthreads; i++) {
-		err = pthread_create(&threads[i], NULL, distribution_thread,
-				     NULL);
-		if (err) {
-			ksft_print_msg("Can't create thread: %s (%d)\n",
-				       strerror(errno), errno);
-			return -1;
-		}
-	}
+	ctd_count = 100;
+	if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id))
+		return "Can't create timer";
+	if (timer_settime(id, 0, &val, NULL))
+		return "Can't set timer";
 
-	/* Wait for all threads to receive the signal. */
-	while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
+	while (ctd_count > 0 && !ctd_failed)
+		;
 
-	for (i = 0; i < nthreads; i++) {
-		err = pthread_join(threads[i], NULL);
-		if (err) {
-			ksft_print_msg("Can't join thread: %s (%d)\n",
-				       strerror(errno), errno);
-			return -1;
-		}
-	}
+	if (timer_delete(id))
+		return "Can't delete timer";
 
-	if (timer_delete(id)) {
-		ksft_perror("Can't delete timer");
-		return -1;
-	}
+	return NULL;
+}
+
+/*
+ * Test that only the running thread receives the timer signal.
+ */
+static int check_timer_distribution(void)
+{
+	const char *errmsg;
+
+	signal(SIGALRM, ctd_sighandler);
+
+	errmsg = "Can't create thread";
+	if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL))
+		goto err;
+
+	errmsg = "Can't join thread";
+	if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg)
+		goto err;
+
+	if (ctd_failed)
+		ksft_test_result_skip("No signal distribution. Assuming old kernel\n");
+	else
+		ksft_test_result_pass("check signal distribution\n");
 
-	ksft_test_result_pass("check_timer_distribution\n");
 	return 0;
+err:
+	ksft_print_msg(errmsg);
+	return -1;
 }
 
 int main(int argc, char **argv)