diff mbox series

[v4,4/4] KVM: selftests: Run dirty_log_perf_test on specific CPUs

Message ID 20221006171133.372359-5-vipinsh@google.com (mailing list archive)
State New, archived
Headers show
Series dirty_log_perf_test CPU pinning | expand

Commit Message

Vipin Sharma Oct. 6, 2022, 5:11 p.m. UTC
Add command line options, -c,  to run the vCPUs and optionally the main
process on the specific CPUs on a host machine. This is useful as it
provides a way to analyze performance based on the vCPUs and dirty log
worker locations, like on the different numa nodes or on the same numa
nodes.

Link: https://lore.kernel.org/lkml/20220801151928.270380-1-vipinsh@google.com
Signed-off-by: Vipin Sharma <vipinsh@google.com>
Suggested-by: David Matlack <dmatlack@google.com>
Suggested-by: Sean Christopherson <seanjc@google.com>
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/dirty_log_perf_test.c       | 23 +++++++-
 .../selftests/kvm/include/perf_test_util.h    |  6 ++
 .../selftests/kvm/lib/perf_test_util.c        | 58 ++++++++++++++++++-
 3 files changed, 84 insertions(+), 3 deletions(-)

Comments

Sean Christopherson Oct. 6, 2022, 7:50 p.m. UTC | #1
On Thu, Oct 06, 2022, Vipin Sharma wrote:
> Add command line options, -c,  to run the vCPUs and optionally the main

Stale, there's only one option now.   Extra space too.  And an uber nit, just
"vCPUs" instead of "the vCPUs".  And if you introduce "pin" here (though that's
probably unnecessary since it's nearly ubiquitous terminology), it can be used
throughout to be more succinct, e.g.

Add a command line topion, -c, to pin vCPUs to physical CPUs (pCPUS), i.e.
to force vCPUs to run on specific pCPUs.

> process on the specific CPUs on a host machine. This is useful as it
> provides a way to analyze performance based on the vCPUs and dirty log
> worker locations, like on the different numa nodes or on the same numa

s/numa/NUMA

> nodes.

Please add a link to the discussion that led to implementing only 1:1 pinning,
along with a brief blurb to call out that this is deliberately "limited" in order
to keep things simple.

> Link: https://lore.kernel.org/lkml/20220801151928.270380-1-vipinsh@google.com
> Signed-off-by: Vipin Sharma <vipinsh@google.com>
> Suggested-by: David Matlack <dmatlack@google.com>
> Suggested-by: Sean Christopherson <seanjc@google.com>
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>

Suggested-by: is generally intended to document that someone else came up with
the original idea.  Even for significant massages to APIs like this, there's no
need to add Suggested-by for me as I most definitely didn't come up with the
original idea of pinning vCPUs.  It's obviously not a big deal, but sometimes
knowing who originally suggested something does help provide context.

> ---
>  .../selftests/kvm/dirty_log_perf_test.c       | 23 +++++++-
>  .../selftests/kvm/include/perf_test_util.h    |  6 ++
>  .../selftests/kvm/lib/perf_test_util.c        | 58 ++++++++++++++++++-
>  3 files changed, 84 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
> index ecda802b78ff..33f83e423f75 100644
> --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
> +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
> @@ -353,7 +353,7 @@ static void help(char *name)
>  	puts("");
>  	printf("usage: %s [-h] [-i iterations] [-p offset] [-g] "
>  	       "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
> -	       "[-x memslots]\n", name);
> +	       "[-x memslots] [-c physical cpus to run test on]\n", name);
>  	puts("");
>  	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
>  	       TEST_HOST_LOOP_N);
> @@ -383,6 +383,18 @@ static void help(char *name)
>  	backing_src_help("-s");
>  	printf(" -x: Split the memory region into this number of memslots.\n"
>  	       "     (default: 1)\n");
> +	printf(" -c: Comma separated values of the physical CPUs, which will run\n"

Lead with what the option does, then dive into gory details.  In most cases, a user
is going to dump the help text to find something specific (I constantly forget params)
or to get a general idea of what's possible.  E.g. someone should have a clear
understanding of _what_ the command line option does after the first blurb, without
having to understand the details of the command.

> +	       "     the vCPUs, optionally, followed by the main application thread cpu.\n"
> +	       "     Number of values must be at least the number of vCPUs.\n"
> +	       "     The very next number is used to pin main application thread.\n\n"
> +	       "     Example: ./dirty_log_perf_test -v 3 -c 22,23,24,50\n"
> +	       "     This means that the vcpu 0 will run on the physical cpu 22,\n"
> +	       "     vcpu 1 on the physical cpu 23, vcpu 2 on the physical cpu 24\n"
> +	       "     and the main thread will run on cpu 50.\n\n"

This can be much more brief by using common acronyms for pCPU, and simply stating
"pin" instead of "on the", e.g.

	printf(" -c: Pin tasks to physical CPUs.  Takes a list of comma separated\n"
	       "     values (target pCPU), one for each vCPU, plus an optional\n"
	       "     entry for the main application task (specified via entry\n"
	       "     <nr_vcpus + 1>).  If used, entries must be provided for all\n"
	       "     vCPUs, i.e. pinning vCPUs is all or nothing.\n\n"
	       "     Example: ./dirty_log_perf_test -v 3 -c 22,23,24,50\n"
	       "     will create 3 vCPUs, and pin vCPU0=>pCPU22, vCPU1=>pCPU23\n"
	       "     vCPU2=>pCPU24, and pin the application task to pCPU50.\n"
	       "     To leave the application task unpinned, drop the final entry:"
	       "       ./dirty_log_perf_test -v 3 -c 22,23,24\n\n"
	       "     (default: no pinning)\n");


> +	       "     Example: ./dirty_log_perf_test -v 3 -c 22,23,24\n"
> +	       "     Same as the previous example except now main application\n"
> +	       "     thread can run on any physical cpu\n\n"
> +	       "     (default: No cpu mapping)\n");
>  	puts("");
>  	exit(0);
>  }

...

> +static void pin_me_to_pcpu

Maybe s/me/this_task ?

> (int pcpu)

Unless we're using -1 as "don't pin" or "invalid", this should be an unsigned value.

> +{
> +	cpu_set_t cpuset;
> +	int err;
> +
> +	CPU_ZERO(&cpuset);
> +	CPU_SET(pcpu, &cpuset);

To save user pain:

	r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask);
	TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
		    strerror(errno));

	TEST_ASSERT(CPU_ISSET(pcpu, &allowed_mask),
		    "Task '%d' not allowed to run on pCPU '%d'\n");

	CPU_ZERO(&allowed_mask);
	CPU_SET(cpu, &allowed_mask);

that way the user will get an explicit error message if they try to pin a vCPU/task
that has already been affined by something else.  And then, in theory,
sched_setaffinity() should never fail.

Or you could have two cpu_set_t objects and use CPU_AND(), but that seems
unnecessarily complex.

> +	err = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
> +	TEST_ASSERT(err == 0, "sched_setaffinity() errored out for pcpu: %d\n", pcpu);

!err is the preferred style, though I vote to use "r" instead of "err".  And print
the errno so that the user can debug.

	r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask);
	TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
		    errno, strerror(errno));

> +}
> +
>  static void *vcpu_thread_main(void *data)
>  {
>  	struct vcpu_thread *vcpu = data;
> +	int idx = vcpu->vcpu_idx;
> +	struct perf_test_vcpu_args *vcpu_args = &perf_test_args.vcpu_args[idx];
> +
> +	if (perf_test_args.pin_vcpus)
> +		pin_me_to_pcpu(vcpu_args->pcpu);
>  
>  	WRITE_ONCE(vcpu->running, true);
>  
> @@ -255,7 +274,7 @@ static void *vcpu_thread_main(void *data)
>  	while (!READ_ONCE(all_vcpu_threads_running))
>  		;
>  
> -	vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_idx]);
> +	vcpu_thread_fn(vcpu_args);
>  
>  	return NULL;
>  }
> @@ -292,3 +311,40 @@ void perf_test_join_vcpu_threads(int nr_vcpus)
>  	for (i = 0; i < nr_vcpus; i++)
>  		pthread_join(vcpu_threads[i].thread, NULL);
>  }
> +
> +static int pcpu_num(const char *cpu_str)
> +{
> +	int pcpu = atoi_paranoid(cpu_str);

newline after declaration.  Though maybe just omit this helper entirely?  As a
somewhat belated thought, it's trivial to let "-1" mean "don't pin this vCPU".
No idea if there's a use case for that, but it's not any more work to support.

Even if <0 is invalid, what about just having pin_task_to_pcu() do all the
sanity checking?  That way it's more obvious that that helper isn't failing to
sanity check the incoming value.

> +	TEST_ASSERT(pcpu >= 0, "Invalid cpu number: %d\n", pcpu);
> +	return pcpu;
> +}
> +
> +void perf_test_setup_pinning(const char *pcpus_string, int nr_vcpus)
> +{
> +	char delim[2] = ",";
> +	char *cpu, *cpu_list;
> +	int i = 0;
> +
> +	cpu_list = strdup(pcpus_string);
> +	TEST_ASSERT(cpu_list, "strdup() allocation failed.\n");
> +
> +	cpu = strtok(cpu_list, delim);
> +
> +	// 1. Get all pcpus for vcpus

No C++ comments please.

> +	while (cpu && i < nr_vcpus) {
> +		perf_test_args.vcpu_args[i++].pcpu = pcpu_num(cpu);
> +		cpu = strtok(NULL, delim);
> +	}
> +
> +	TEST_ASSERT(i == nr_vcpus,
> +		    "Number of pcpus (%d) not sufficient for the number of vcpus (%d).",
> +		    i, nr_vcpus);

Rather than assert after the fact, use a for-loop:

	for (i = 0; i < nr_vcpus; i++ {
		TEST_ASSERT(cpu, "pCPU not provided for vCPU%d\n", i);
		perf_test_args.vcpu_args[i++].pcpu = atoi_paranoid(cpu);
		cpu = strtok(NULL, delim);
	}

so as to avoid having to consume the loop control variable before and after the
loop.  Or even

	for (i = 0, cpu = strtok(cpu_list, delim);
	     i < nr_vcpus;
	     i++, cpu = strtok(NULL, delim)) {
		TEST_ASSERT(cpu, "pCPU not provided for vCPU%d\n", i);
		perf_test_args.vcpu_args[i++].pcpu = atoi_paranoid(cpu);
	}

Though IMO the latter is gratuitous and hard to read.

> +
> +	perf_test_args.pin_vcpus = true;
> +
> +	// 2. Check if main worker is provided
> +	if (cpu)
> +		pin_me_to_pcpu(pcpu_num(cpu));

Verify the string is now empty?  I.e. that there isn't trailing garbage.

> +
> +	free(cpu_list);
> +}
> -- 
> 2.38.0.rc1.362.ged0d419d3c-goog
>
Sean Christopherson Oct. 6, 2022, 8:26 p.m. UTC | #2
On Thu, Oct 06, 2022, Sean Christopherson wrote:
> On Thu, Oct 06, 2022, Vipin Sharma wrote:
> > +{
> > +	cpu_set_t cpuset;
> > +	int err;
> > +
> > +	CPU_ZERO(&cpuset);
> > +	CPU_SET(pcpu, &cpuset);
> 
> To save user pain:
> 
> 	r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask);
> 	TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
> 		    strerror(errno));

Forgot TEST_ASSERT() already provides errno, this can just be:

	TEST_ASSERT(!r, "sched_getaffinity() failed");

> 
> 	TEST_ASSERT(CPU_ISSET(pcpu, &allowed_mask),
> 		    "Task '%d' not allowed to run on pCPU '%d'\n");
> 
> 	CPU_ZERO(&allowed_mask);
> 	CPU_SET(cpu, &allowed_mask);
> 
> that way the user will get an explicit error message if they try to pin a vCPU/task
> that has already been affined by something else.  And then, in theory,
> sched_setaffinity() should never fail.
> 
> Or you could have two cpu_set_t objects and use CPU_AND(), but that seems
> unnecessarily complex.
> 
> > +	err = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
> > +	TEST_ASSERT(err == 0, "sched_setaffinity() errored out for pcpu: %d\n", pcpu);
> 
> !err is the preferred style, though I vote to use "r" instead of "err".  And print
> the errno so that the user can debug.

As above, ignore the last, forgot TEST_ASSERT() provides errno.

> 
> 	r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask);
> 	TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
> 		    errno, strerror(errno));
Vipin Sharma Oct. 6, 2022, 11:25 p.m. UTC | #3
On Thu, Oct 6, 2022 at 12:50 PM Sean Christopherson <seanjc@google.com> wrote:
>
> On Thu, Oct 06, 2022, Vipin Sharma wrote:
> ...
>
> > +static void pin_me_to_pcpu
>
> Maybe s/me/this_task ?

Sure.

>
> > (int pcpu)
>
> Unless we're using -1 as "don't pin" or "invalid", this should be an unsigned value.
>
> > +{
> > +     cpu_set_t cpuset;
> > +     int err;
> > +
> > +     CPU_ZERO(&cpuset);
> > +     CPU_SET(pcpu, &cpuset);
>
> To save user pain:
>
>         r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask);
>         TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
>                     strerror(errno));
>
>         TEST_ASSERT(CPU_ISSET(pcpu, &allowed_mask),
>                     "Task '%d' not allowed to run on pCPU '%d'\n");
>
>         CPU_ZERO(&allowed_mask);
>         CPU_SET(cpu, &allowed_mask);
>
> that way the user will get an explicit error message if they try to pin a vCPU/task
> that has already been affined by something else.  And then, in theory,
> sched_setaffinity() should never fail.
>
> Or you could have two cpu_set_t objects and use CPU_AND(), but that seems
> unnecessarily complex.
>

sched_setaffinity() doesn't fail when we assign more than one task to
the pCPU, it allows multiple tasks to be on the same pCPU. One of the
reasons it fails is if it is provided a cpu number which is bigger
than what is actually available on the host.

I am not convinced that pinning vCPUs to the same pCPU should throw an
error. We should allow if someone wants to try and compare performance
by over subscribing or any valid combination they want to test.

...

> > +static int pcpu_num(const char *cpu_str)
> > +{
> > +     int pcpu = atoi_paranoid(cpu_str);
>
> newline after declaration.  Though maybe just omit this helper entirely?  As a
> somewhat belated thought, it's trivial to let "-1" mean "don't pin this vCPU".
> No idea if there's a use case for that, but it's not any more work to support.
>
> Even if <0 is invalid, what about just having pin_task_to_pcu() do all the
> sanity checking?  That way it's more obvious that that helper isn't failing to
> sanity check the incoming value.
>

This will go away with atoi_non_negative() API I will write in v5. I
won't even need this function then.

...

> > +     while (cpu && i < nr_vcpus) {
> > +             perf_test_args.vcpu_args[i++].pcpu = pcpu_num(cpu);
> > +             cpu = strtok(NULL, delim);
> > +     }
> > +
> > +     TEST_ASSERT(i == nr_vcpus,
> > +                 "Number of pcpus (%d) not sufficient for the number of vcpus (%d).",
> > +                 i, nr_vcpus);
>
> Rather than assert after the fact, use a for-loop:
>
>         for (i = 0; i < nr_vcpus; i++ {
>                 TEST_ASSERT(cpu, "pCPU not provided for vCPU%d\n", i);
>                 perf_test_args.vcpu_args[i++].pcpu = atoi_paranoid(cpu);
>                 cpu = strtok(NULL, delim);
>         }
>
> so as to avoid having to consume the loop control variable before and after the
> loop.  Or even
>
>         for (i = 0, cpu = strtok(cpu_list, delim);
>              i < nr_vcpus;
>              i++, cpu = strtok(NULL, delim)) {
>                 TEST_ASSERT(cpu, "pCPU not provided for vCPU%d\n", i);
>                 perf_test_args.vcpu_args[i++].pcpu = atoi_paranoid(cpu);
>         }
>
> Though IMO the latter is gratuitous and hard to read.
>

I will use the former one.

> > +
> > +     perf_test_args.pin_vcpus = true;
> > +
> > +     // 2. Check if main worker is provided
> > +     if (cpu)
> > +             pin_me_to_pcpu(pcpu_num(cpu));
>
> Verify the string is now empty?  I.e. that there isn't trailing garbage.
>

Okay, I will add the verification.

All other suggestions to which I haven't responded, I agree with them
and will make the changes in v5.
Sean Christopherson Oct. 7, 2022, 12:14 a.m. UTC | #4
On Thu, Oct 06, 2022, Vipin Sharma wrote:
> On Thu, Oct 6, 2022 at 12:50 PM Sean Christopherson <seanjc@google.com> wrote:
> > > +{
> > > +     cpu_set_t cpuset;
> > > +     int err;
> > > +
> > > +     CPU_ZERO(&cpuset);
> > > +     CPU_SET(pcpu, &cpuset);
> >
> > To save user pain:
> >
> >         r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask);
> >         TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
> >                     strerror(errno));
> >
> >         TEST_ASSERT(CPU_ISSET(pcpu, &allowed_mask),
> >                     "Task '%d' not allowed to run on pCPU '%d'\n");
> >
> >         CPU_ZERO(&allowed_mask);
> >         CPU_SET(cpu, &allowed_mask);
> >
> > that way the user will get an explicit error message if they try to pin a vCPU/task
> > that has already been affined by something else.  And then, in theory,
> > sched_setaffinity() should never fail.
> >
> > Or you could have two cpu_set_t objects and use CPU_AND(), but that seems
> > unnecessarily complex.
> >
> 
> sched_setaffinity() doesn't fail when we assign more than one task to
> the pCPU, it allows multiple tasks to be on the same pCPU. One of the
> reasons it fails is if it is provided a cpu number which is bigger
> than what is actually available on the host.
> 
> I am not convinced that pinning vCPUs to the same pCPU should throw an
> error. We should allow if someone wants to try and compare performance
> by over subscribing or any valid combination they want to test.

Oh, I'm not talking about the user pinning multiple vCPUs to the same pCPU via
the test, I'm talking about the user, or more likely something in the users's
environment, restricting what pCPUs the user's tasks are allowed on.  E.g. if
the test is run in shell that has been restricted to CPU8 via cgroups, then
sched_setaffinity() will fail if the user tries to pin vCPUs to any other CPU.
Vipin Sharma Oct. 7, 2022, 5:39 p.m. UTC | #5
On Thu, Oct 6, 2022 at 5:14 PM Sean Christopherson <seanjc@google.com> wrote:
>
> On Thu, Oct 06, 2022, Vipin Sharma wrote:
> > On Thu, Oct 6, 2022 at 12:50 PM Sean Christopherson <seanjc@google.com> wrote:
> > > > +{
> > > > +     cpu_set_t cpuset;
> > > > +     int err;
> > > > +
> > > > +     CPU_ZERO(&cpuset);
> > > > +     CPU_SET(pcpu, &cpuset);
> > >
> > > To save user pain:
> > >
> > >         r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask);
> > >         TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
> > >                     strerror(errno));
> > >
> > >         TEST_ASSERT(CPU_ISSET(pcpu, &allowed_mask),
> > >                     "Task '%d' not allowed to run on pCPU '%d'\n");
> > >
> > >         CPU_ZERO(&allowed_mask);
> > >         CPU_SET(cpu, &allowed_mask);
> > >
> > > that way the user will get an explicit error message if they try to pin a vCPU/task
> > > that has already been affined by something else.  And then, in theory,
> > > sched_setaffinity() should never fail.
> > >
> > > Or you could have two cpu_set_t objects and use CPU_AND(), but that seems
> > > unnecessarily complex.
> > >
> >
> > sched_setaffinity() doesn't fail when we assign more than one task to
> > the pCPU, it allows multiple tasks to be on the same pCPU. One of the
> > reasons it fails is if it is provided a cpu number which is bigger
> > than what is actually available on the host.
> >
> > I am not convinced that pinning vCPUs to the same pCPU should throw an
> > error. We should allow if someone wants to try and compare performance
> > by over subscribing or any valid combination they want to test.
>
> Oh, I'm not talking about the user pinning multiple vCPUs to the same pCPU via
> the test, I'm talking about the user, or more likely something in the users's
> environment, restricting what pCPUs the user's tasks are allowed on.  E.g. if
> the test is run in shell that has been restricted to CPU8 via cgroups, then
> sched_setaffinity() will fail if the user tries to pin vCPUs to any other CPU.

I see, I will add this validation.
Vipin Sharma Oct. 8, 2022, 12:46 a.m. UTC | #6
On Fri, Oct 7, 2022 at 10:39 AM Vipin Sharma <vipinsh@google.com> wrote:
>
> On Thu, Oct 6, 2022 at 5:14 PM Sean Christopherson <seanjc@google.com> wrote:
> >
> > On Thu, Oct 06, 2022, Vipin Sharma wrote:
> > > On Thu, Oct 6, 2022 at 12:50 PM Sean Christopherson <seanjc@google.com> wrote:
> > > > > +{
> > > > > +     cpu_set_t cpuset;
> > > > > +     int err;
> > > > > +
> > > > > +     CPU_ZERO(&cpuset);
> > > > > +     CPU_SET(pcpu, &cpuset);
> > > >
> > > > To save user pain:
> > > >
> > > >         r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask);
> > > >         TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
> > > >                     strerror(errno));
> > > >
> > > >         TEST_ASSERT(CPU_ISSET(pcpu, &allowed_mask),
> > > >                     "Task '%d' not allowed to run on pCPU '%d'\n");
> > > >
> > > >         CPU_ZERO(&allowed_mask);
> > > >         CPU_SET(cpu, &allowed_mask);
> > > >
> > > > that way the user will get an explicit error message if they try to pin a vCPU/task
> > > > that has already been affined by something else.  And then, in theory,
> > > > sched_setaffinity() should never fail.
> > > >
> > > > Or you could have two cpu_set_t objects and use CPU_AND(), but that seems
> > > > unnecessarily complex.
> > > >
> > >
> > > sched_setaffinity() doesn't fail when we assign more than one task to
> > > the pCPU, it allows multiple tasks to be on the same pCPU. One of the
> > > reasons it fails is if it is provided a cpu number which is bigger
> > > than what is actually available on the host.
> > >
> > > I am not convinced that pinning vCPUs to the same pCPU should throw an
> > > error. We should allow if someone wants to try and compare performance
> > > by over subscribing or any valid combination they want to test.
> >
> > Oh, I'm not talking about the user pinning multiple vCPUs to the same pCPU via
> > the test, I'm talking about the user, or more likely something in the users's
> > environment, restricting what pCPUs the user's tasks are allowed on.  E.g. if
> > the test is run in shell that has been restricted to CPU8 via cgroups, then
> > sched_setaffinity() will fail if the user tries to pin vCPUs to any other CPU.
>
> I see, I will add this validation.

I think we should drop this check. Current logic is that the new
function perf_test_setup_pinning() parses the vcpu mappings, stores
them in perf_test_vcpu_args{} struct and moves the main thread to the
provided pcpu. But this causes TEST_ASSERT(CPU_ISSET...) to fail for
vcpu threads when they are created because they inherit task affinity
from the main thread which has the pcpu set during setup.

However, this affinity is not strict, so, if TEST_ASSERT(CPU_ISSET...)
is removed then vcpu threads successfully move to their required pcpu
via sched_setaffinity() even though the main thread has different
affinity. If cpus are restricted via cgroups then sched_setaffinity()
fails as expected no matter what.

Another option will be to split the API, perf_test_setup_pinning()
will return the main thread pcpu and dirty_log_perf_test can call
pin_this_task_to_cpu() with the returned pcpu after vcpus have been
started. I do not like this approach, I also think
TEST_ASSERT(CPU_ISSET...) is not reducing user pain that much because
users can still figure out with returned errno what is happening.
Sean Christopherson Oct. 10, 2022, 4:22 p.m. UTC | #7
On Fri, Oct 07, 2022, Vipin Sharma wrote:
> On Fri, Oct 7, 2022 at 10:39 AM Vipin Sharma <vipinsh@google.com> wrote:
> >
> > On Thu, Oct 6, 2022 at 5:14 PM Sean Christopherson <seanjc@google.com> wrote:
> > >
> > > On Thu, Oct 06, 2022, Vipin Sharma wrote:
> > > > On Thu, Oct 6, 2022 at 12:50 PM Sean Christopherson <seanjc@google.com> wrote:
> > > > > > +{
> > > > > > +     cpu_set_t cpuset;
> > > > > > +     int err;
> > > > > > +
> > > > > > +     CPU_ZERO(&cpuset);
> > > > > > +     CPU_SET(pcpu, &cpuset);
> > > > >
> > > > > To save user pain:
> > > > >
> > > > >         r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask);
> > > > >         TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
> > > > >                     strerror(errno));
> > > > >
> > > > >         TEST_ASSERT(CPU_ISSET(pcpu, &allowed_mask),
> > > > >                     "Task '%d' not allowed to run on pCPU '%d'\n");
> > > > >
> > > > >         CPU_ZERO(&allowed_mask);
> > > > >         CPU_SET(cpu, &allowed_mask);
> > > > >
> > > > > that way the user will get an explicit error message if they try to pin a vCPU/task
> > > > > that has already been affined by something else.  And then, in theory,
> > > > > sched_setaffinity() should never fail.
> > > > >
> > > > > Or you could have two cpu_set_t objects and use CPU_AND(), but that seems
> > > > > unnecessarily complex.
> > > > >
> > > >
> > > > sched_setaffinity() doesn't fail when we assign more than one task to
> > > > the pCPU, it allows multiple tasks to be on the same pCPU. One of the
> > > > reasons it fails is if it is provided a cpu number which is bigger
> > > > than what is actually available on the host.
> > > >
> > > > I am not convinced that pinning vCPUs to the same pCPU should throw an
> > > > error. We should allow if someone wants to try and compare performance
> > > > by over subscribing or any valid combination they want to test.
> > >
> > > Oh, I'm not talking about the user pinning multiple vCPUs to the same pCPU via
> > > the test, I'm talking about the user, or more likely something in the users's
> > > environment, restricting what pCPUs the user's tasks are allowed on.  E.g. if
> > > the test is run in shell that has been restricted to CPU8 via cgroups, then
> > > sched_setaffinity() will fail if the user tries to pin vCPUs to any other CPU.
> >
> > I see, I will add this validation.
> 
> I think we should drop this check. Current logic is that the new
> function perf_test_setup_pinning() parses the vcpu mappings, stores
> them in perf_test_vcpu_args{} struct and moves the main thread to the
> provided pcpu. But this causes TEST_ASSERT(CPU_ISSET...) to fail for
> vcpu threads when they are created because they inherit task affinity
> from the main thread which has the pcpu set during setup.
> 
> However, this affinity is not strict, so, if TEST_ASSERT(CPU_ISSET...)
> is removed then vcpu threads successfully move to their required pcpu
> via sched_setaffinity() even though the main thread has different
> affinity. If cpus are restricted via cgroups then sched_setaffinity()
> fails as expected no matter what.
> 
> Another option will be to split the API, perf_test_setup_pinning()
> will return the main thread pcpu and dirty_log_perf_test can call
> pin_this_task_to_cpu() with the returned pcpu after vcpus have been
> started. I do not like this approach, I also think
> TEST_ASSERT(CPU_ISSET...) is not reducing user pain that much because
> users can still figure out with returned errno what is happening.

The easy way to handle this is to take the sched_getaffinity() snapshot during
perf_test_setup_pinning().  You could even do the sanity checking there, e.g.
keep pcpu_num() (maybe rename it to parse_pcpu()?)

static uint32_t parse_pcpu(const char *cpu_str, cpu_set_t *allowed_mask)
{
	uint32_t pcpu = atoi_positive(cpu_str);

	TEST_ASSERT(CPU_ISSET(pcpu, &allowed_mask),
		    "Not allowed to run on pCPU '%d', check cgroups?\n");
	return pcpu;
}


	r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask);
	TEST_ASSERT(!r, "sched_getaffinity() failed");

	for (i = 0; i < nr_vcpus; i++ {
		TEST_ASSERT(cpu, "pCPU not provided for vCPU%d\n", i);

		perf_test_args.vcpu_args[i++].pcpu = parse_pcpu(cpu, &allowed_mask);
		cpu = strtok(NULL, delim);
	}


	if (cpu)
		pin_me_to_pcpu(parse_pcpu(cpu, &allowed_mask));

That'll result in a slightly larger window where the sanity check could get a
false negative, but that's ok.  Detecting conflicts with 100% accuracy isn't
possible since there's always a window where the allowed cpuset could change, the
goal is only to catch the "obvious" cases in order to save the user a bit of debug
time.
diff mbox series

Patch

diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index ecda802b78ff..33f83e423f75 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -353,7 +353,7 @@  static void help(char *name)
 	puts("");
 	printf("usage: %s [-h] [-i iterations] [-p offset] [-g] "
 	       "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
-	       "[-x memslots]\n", name);
+	       "[-x memslots] [-c physical cpus to run test on]\n", name);
 	puts("");
 	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
 	       TEST_HOST_LOOP_N);
@@ -383,6 +383,18 @@  static void help(char *name)
 	backing_src_help("-s");
 	printf(" -x: Split the memory region into this number of memslots.\n"
 	       "     (default: 1)\n");
+	printf(" -c: Comma separated values of the physical CPUs, which will run\n"
+	       "     the vCPUs, optionally, followed by the main application thread cpu.\n"
+	       "     Number of values must be at least the number of vCPUs.\n"
+	       "     The very next number is used to pin main application thread.\n\n"
+	       "     Example: ./dirty_log_perf_test -v 3 -c 22,23,24,50\n"
+	       "     This means that the vcpu 0 will run on the physical cpu 22,\n"
+	       "     vcpu 1 on the physical cpu 23, vcpu 2 on the physical cpu 24\n"
+	       "     and the main thread will run on cpu 50.\n\n"
+	       "     Example: ./dirty_log_perf_test -v 3 -c 22,23,24\n"
+	       "     Same as the previous example except now main application\n"
+	       "     thread can run on any physical cpu\n\n"
+	       "     (default: No cpu mapping)\n");
 	puts("");
 	exit(0);
 }
@@ -398,6 +410,7 @@  int main(int argc, char *argv[])
 		.slots = 1,
 	};
 	int opt;
+	const char *pcpu_list = NULL;
 
 	dirty_log_manual_caps =
 		kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
@@ -406,11 +419,14 @@  int main(int argc, char *argv[])
 
 	guest_modes_append_default();
 
-	while ((opt = getopt(argc, argv, "b:ef:ghi:m:nop:s:v:x:")) != -1) {
+	while ((opt = getopt(argc, argv, "b:c:ef:ghi:m:nop:s:v:x:")) != -1) {
 		switch (opt) {
 		case 'b':
 			guest_percpu_mem_size = parse_size(optarg);
 			break;
+		case 'c':
+			pcpu_list = optarg;
+			break;
 		case 'e':
 			/* 'e' is for evil. */
 			run_vcpus_while_disabling_dirty_logging = true;
@@ -458,6 +474,9 @@  int main(int argc, char *argv[])
 		}
 	}
 
+	if (pcpu_list)
+		perf_test_setup_pinning(pcpu_list, nr_vcpus);
+
 	TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations");
 
 	pr_info("Test iterations: %"PRIu64"\n",	p.iterations);
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index eaa88df0555a..8197260e3b6b 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -27,6 +27,8 @@  struct perf_test_vcpu_args {
 	/* Only used by the host userspace part of the vCPU thread */
 	struct kvm_vcpu *vcpu;
 	int vcpu_idx;
+	/* The pCPU to which this vCPU is pinned. Only valid if pin_vcpus is true. */
+	int pcpu;
 };
 
 struct perf_test_args {
@@ -39,6 +41,8 @@  struct perf_test_args {
 
 	/* Run vCPUs in L2 instead of L1, if the architecture supports it. */
 	bool nested;
+	/* True if all vCPUs are pinned to pCPUs*/
+	bool pin_vcpus;
 
 	struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS];
 };
@@ -60,4 +64,6 @@  void perf_test_guest_code(uint32_t vcpu_id);
 uint64_t perf_test_nested_pages(int nr_vcpus);
 void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]);
 
+void perf_test_setup_pinning(const char *pcpus_string, int nr_vcpus);
+
 #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 9618b37c66f7..5d1aca0482b4 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -2,7 +2,10 @@ 
 /*
  * Copyright (C) 2020, Google LLC.
  */
+#define _GNU_SOURCE
+
 #include <inttypes.h>
+#include <sched.h>
 
 #include "kvm_util.h"
 #include "perf_test_util.h"
@@ -240,9 +243,25 @@  void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_v
 	exit(KSFT_SKIP);
 }
 
+static void pin_me_to_pcpu(int pcpu)
+{
+	cpu_set_t cpuset;
+	int err;
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(pcpu, &cpuset);
+	err = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
+	TEST_ASSERT(err == 0, "sched_setaffinity() errored out for pcpu: %d\n", pcpu);
+}
+
 static void *vcpu_thread_main(void *data)
 {
 	struct vcpu_thread *vcpu = data;
+	int idx = vcpu->vcpu_idx;
+	struct perf_test_vcpu_args *vcpu_args = &perf_test_args.vcpu_args[idx];
+
+	if (perf_test_args.pin_vcpus)
+		pin_me_to_pcpu(vcpu_args->pcpu);
 
 	WRITE_ONCE(vcpu->running, true);
 
@@ -255,7 +274,7 @@  static void *vcpu_thread_main(void *data)
 	while (!READ_ONCE(all_vcpu_threads_running))
 		;
 
-	vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_idx]);
+	vcpu_thread_fn(vcpu_args);
 
 	return NULL;
 }
@@ -292,3 +311,40 @@  void perf_test_join_vcpu_threads(int nr_vcpus)
 	for (i = 0; i < nr_vcpus; i++)
 		pthread_join(vcpu_threads[i].thread, NULL);
 }
+
+static int pcpu_num(const char *cpu_str)
+{
+	int pcpu = atoi_paranoid(cpu_str);
+	TEST_ASSERT(pcpu >= 0, "Invalid cpu number: %d\n", pcpu);
+	return pcpu;
+}
+
+void perf_test_setup_pinning(const char *pcpus_string, int nr_vcpus)
+{
+	char delim[2] = ",";
+	char *cpu, *cpu_list;
+	int i = 0;
+
+	cpu_list = strdup(pcpus_string);
+	TEST_ASSERT(cpu_list, "strdup() allocation failed.\n");
+
+	cpu = strtok(cpu_list, delim);
+
+	// 1. Get all pcpus for vcpus
+	while (cpu && i < nr_vcpus) {
+		perf_test_args.vcpu_args[i++].pcpu = pcpu_num(cpu);
+		cpu = strtok(NULL, delim);
+	}
+
+	TEST_ASSERT(i == nr_vcpus,
+		    "Number of pcpus (%d) not sufficient for the number of vcpus (%d).",
+		    i, nr_vcpus);
+
+	perf_test_args.pin_vcpus = true;
+
+	// 2. Check if main worker is provided
+	if (cpu)
+		pin_me_to_pcpu(pcpu_num(cpu));
+
+	free(cpu_list);
+}