diff mbox series

[v6,2/3] KVM: selftests: randomize which pages are written vs read

Message ID 20220912195849.3989707-3-coltonlewis@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: selftests: randomize memory access of dirty_log_perf_test | expand

Commit Message

Colton Lewis Sept. 12, 2022, 7:58 p.m. UTC
Randomize which pages are written vs read using the random number
generator.

Change the variable wr_fract and associated function calls to
write_percent that now operates as a percentage from 0 to 100 where X
means each page has an X% chance of being written. Change the -f
argument to -w to reflect the new variable semantics. Keep the same
default of 100% writes.

Population always uses 100% writes to ensure all memory is actually
populated and not just mapped to the zero page. The prevents expensive
copy-on-write faults from occurring during the dirty memory iterations
below, which would pollute the performance results.

Signed-off-by: Colton Lewis <coltonlewis@google.com>
Reviewed-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: David Matlack <dmatlack@google.com>
---
 .../selftests/kvm/access_tracking_perf_test.c |  2 +-
 .../selftests/kvm/dirty_log_perf_test.c       | 37 ++++++++++++-------
 .../selftests/kvm/include/perf_test_util.h    |  4 +-
 .../selftests/kvm/lib/perf_test_util.c        | 10 ++---
 4 files changed, 32 insertions(+), 21 deletions(-)

Comments

Sean Christopherson Oct. 7, 2022, 8:55 p.m. UTC | #1
On Mon, Sep 12, 2022, Colton Lewis wrote:
> @@ -393,7 +403,7 @@ int main(int argc, char *argv[])
>  
>  	guest_modes_append_default();
>  
> -	while ((opt = getopt(argc, argv, "ghi:p:m:nb:f:v:or:s:x:")) != -1) {
> +	while ((opt = getopt(argc, argv, "ghi:p:m:nb:v:or:s:x:w:")) != -1) {

This string is getting quite annoying to maintain, e.g. all of these patches
conflict with recent upstream changes, and IIRC will conflict again with Vipin's
changes.  AFAICT, the string passed to getopt() doesn't need to be constant, i.e.
can be built programmatically.  Not in this series, but as future cleanup we should
at least consider a way to make this slightly less painful to maintain.

>  		switch (opt) {
>  		case 'g':
>  			dirty_log_manual_caps = 0;
> @@ -413,10 +423,11 @@ int main(int argc, char *argv[])
>  		case 'b':
>  			guest_percpu_mem_size = parse_size(optarg);
>  			break;
> -		case 'f':
> -			p.wr_fract = atoi(optarg);
> -			TEST_ASSERT(p.wr_fract >= 1,
> -				    "Write fraction cannot be less than one");
> +		case 'w':
> +			p.write_percent = atoi(optarg);
> +			TEST_ASSERT(p.write_percent >= 0
> +				    && p.write_percent <= 100,

&&, ||, etc... go on the previous line.  But in this case, I'd say don't wrap at all,
it's only a few chars over the soft limit.

			TEST_ASSERT(p.write_percent >= 0 && p.write_percent <= 100,

or

			TEST_ASSERT(p.write_percent >= 0 &&
				    p.write_percent <= 100,

And after Vipin's cleanup lands[*], this can be:

			p.write_percent = atoi_positive(optarg);

			TEST_ASSERT(p.write_percent <= 100,

[*] https://lore.kernel.org/all/CAHVum0cD5R9ej09VNvkkqcQsz7PGrxnMqi1E4kqLv+1d63Rg6A@mail.gmail.com

> +				    "Write percentage must be between 0 and 100");
>  			break;
>  		case 'v':
>  			nr_vcpus = atoi(optarg);
> diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
> index f18530984b42..f93f2ea7c6a3 100644
> --- a/tools/testing/selftests/kvm/include/perf_test_util.h
> +++ b/tools/testing/selftests/kvm/include/perf_test_util.h
> @@ -35,7 +35,7 @@ struct perf_test_args {
>  	uint64_t size;
>  	uint64_t guest_page_size;
>  	uint32_t random_seed;
> -	int wr_fract;
> +	uint32_t write_percent;
>  
>  	/* Run vCPUs in L2 instead of L1, if the architecture supports it. */
>  	bool nested;
> @@ -51,7 +51,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
>  				   bool partition_vcpu_memory_access);
>  void perf_test_destroy_vm(struct kvm_vm *vm);
>  
> -void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract);
> +void perf_test_set_write_percent(struct kvm_vm *vm, uint32_t write_percent);
>  void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed);
>  
>  void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));
> diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
> index b1e731de0966..9effd229b75d 100644
> --- a/tools/testing/selftests/kvm/lib/perf_test_util.c
> +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
> @@ -60,7 +60,7 @@ void perf_test_guest_code(uint32_t vcpu_id)
>  			uint64_t addr = gva + (i * pta->guest_page_size);
>  			guest_random(&rand);
>  
> -			if (i % pta->wr_fract == 0)
> +			if (rand % 100 < pta->write_percent)

Aha!  Random bools with a percentage is exactly the type of thing the common RNG
helpers can and should provide.  E.g. this should look something like:

			if (random_bool(rng, pta->write_percent))

As a bonus, if someone wants to implement a more sophisticated percentage system,
then all users of random_bool() will benefit, again without needing to update
users.
David Matlack Oct. 7, 2022, 9:07 p.m. UTC | #2
On Fri, Oct 7, 2022 at 1:55 PM Sean Christopherson <seanjc@google.com> wrote:
>
> On Mon, Sep 12, 2022, Colton Lewis wrote:
> > @@ -393,7 +403,7 @@ int main(int argc, char *argv[])
> >
> >       guest_modes_append_default();
> >
> > -     while ((opt = getopt(argc, argv, "ghi:p:m:nb:f:v:or:s:x:")) != -1) {
> > +     while ((opt = getopt(argc, argv, "ghi:p:m:nb:v:or:s:x:w:")) != -1) {
>
> This string is getting quite annoying to maintain, e.g. all of these patches
> conflict with recent upstream changes, and IIRC will conflict again with Vipin's
> changes.  AFAICT, the string passed to getopt() doesn't need to be constant, i.e.
> can be built programmatically.  Not in this series, but as future cleanup we should
> at least consider a way to make this slightly less painful to maintain.

I have aspirations to consolidate all the memstress (perf_test_util)
option handling within memstress.c, rather than duplicating it across
all tests. That will help cut down the length of the test-specific
option string (although I haven't found a super clean way to do it
yet).
Andrew Jones Oct. 8, 2022, 9:50 a.m. UTC | #3
On Fri, Oct 07, 2022 at 08:55:20PM +0000, Sean Christopherson wrote:
> On Mon, Sep 12, 2022, Colton Lewis wrote:
> > @@ -393,7 +403,7 @@ int main(int argc, char *argv[])
> >  
> >  	guest_modes_append_default();
> >  
> > -	while ((opt = getopt(argc, argv, "ghi:p:m:nb:f:v:or:s:x:")) != -1) {
> > +	while ((opt = getopt(argc, argv, "ghi:p:m:nb:v:or:s:x:w:")) != -1) {
> 
> This string is getting quite annoying to maintain, e.g. all of these patches
> conflict with recent upstream changes, and IIRC will conflict again with Vipin's
> changes.  AFAICT, the string passed to getopt() doesn't need to be constant, i.e.
> can be built programmatically.  Not in this series, but as future cleanup we should
> at least consider a way to make this slightly less painful to maintain.
>

I wonder if a getopt string like above is really saying "we're doing too
much in a single test binary". Are all these switches just for one-off
experiments which developers need? Or, are testers expected to run this
binary multiple times with different combinations of switches? If it's
the latter, then I think we need a test runner script and config file to
capture those separate invocations (similar to kvm-unit-tests). Or, change
from a collection of command line switches to building the file multiple
times with different compile time switches and output filenames. Then,
testers are just expected to run all binaries (which is what I think most
believe / do today).

Thanks,
drew
Sean Christopherson Oct. 10, 2022, 2:46 p.m. UTC | #4
On Sat, Oct 08, 2022, Andrew Jones wrote:
> On Fri, Oct 07, 2022 at 08:55:20PM +0000, Sean Christopherson wrote:
> > On Mon, Sep 12, 2022, Colton Lewis wrote:
> > > @@ -393,7 +403,7 @@ int main(int argc, char *argv[])
> > >  
> > >  	guest_modes_append_default();
> > >  
> > > -	while ((opt = getopt(argc, argv, "ghi:p:m:nb:f:v:or:s:x:")) != -1) {
> > > +	while ((opt = getopt(argc, argv, "ghi:p:m:nb:v:or:s:x:w:")) != -1) {
> > 
> > This string is getting quite annoying to maintain, e.g. all of these patches
> > conflict with recent upstream changes, and IIRC will conflict again with Vipin's
> > changes.  AFAICT, the string passed to getopt() doesn't need to be constant, i.e.
> > can be built programmatically.  Not in this series, but as future cleanup we should
> > at least consider a way to make this slightly less painful to maintain.
> >
> 
> I wonder if a getopt string like above is really saying "we're doing too
> much in a single test binary". Are all these switches just for one-off
> experiments which developers need? Or, are testers expected to run this
> binary multiple times with different combinations of switches?

Even if it's just 2 or 3 switches, I agree we need a way to run those configs by
default.

> If it's the latter, then I think we need a test runner script and config file
> to capture those separate invocations (similar to kvm-unit-tests). Or, change
> from a collection of command line switches to building the file multiple
> times with different compile time switches and output filenames.

What about a mix of those two approaches and having individual scripts for each
config?  I like the idea of one executable per config, but we shouldn't need to
compile multiple times.  And that would still allow developers to easily run
non-standard configs.

I'd prefer to avoid adding a test runner, partly because I can never remember the
invocation strings, partly becuase I don't want to encourage mega tests like the
VMX and SVM KVM-unit-tests.
Andrew Jones Oct. 10, 2022, 4:38 p.m. UTC | #5
On Mon, Oct 10, 2022 at 02:46:24PM +0000, Sean Christopherson wrote:
> On Sat, Oct 08, 2022, Andrew Jones wrote:
> > On Fri, Oct 07, 2022 at 08:55:20PM +0000, Sean Christopherson wrote:
> > > On Mon, Sep 12, 2022, Colton Lewis wrote:
> > > > @@ -393,7 +403,7 @@ int main(int argc, char *argv[])
> > > >  
> > > >  	guest_modes_append_default();
> > > >  
> > > > -	while ((opt = getopt(argc, argv, "ghi:p:m:nb:f:v:or:s:x:")) != -1) {
> > > > +	while ((opt = getopt(argc, argv, "ghi:p:m:nb:v:or:s:x:w:")) != -1) {
> > > 
> > > This string is getting quite annoying to maintain, e.g. all of these patches
> > > conflict with recent upstream changes, and IIRC will conflict again with Vipin's
> > > changes.  AFAICT, the string passed to getopt() doesn't need to be constant, i.e.
> > > can be built programmatically.  Not in this series, but as future cleanup we should
> > > at least consider a way to make this slightly less painful to maintain.
> > >
> > 
> > I wonder if a getopt string like above is really saying "we're doing too
> > much in a single test binary". Are all these switches just for one-off
> > experiments which developers need? Or, are testers expected to run this
> > binary multiple times with different combinations of switches?
> 
> Even if it's just 2 or 3 switches, I agree we need a way to run those configs by
> default.
> 
> > If it's the latter, then I think we need a test runner script and config file
> > to capture those separate invocations (similar to kvm-unit-tests). Or, change
> > from a collection of command line switches to building the file multiple
> > times with different compile time switches and output filenames.
> 
> What about a mix of those two approaches and having individual scripts for each
> config?  I like the idea of one executable per config, but we shouldn't need to
> compile multiple times.  And that would still allow developers to easily run
> non-standard configs.

Sounds good to me. Come to think of it we have that in kvm-unit-tests too
after running 'make standalone', which generates individual scripts per
configuration. IIRC, people doing testing seemed to prefer running the
standalone versions with their own runners too.

Thanks,
drew

> 
> I'd prefer to avoid adding a test runner, partly because I can never remember the
> invocation strings, partly becuase I don't want to encourage mega tests like the
> VMX and SVM KVM-unit-tests.
diff mbox series

Patch

diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index d8909032317a..d86046ef3a0b 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -274,7 +274,7 @@  static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description)
 static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access,
 			  const char *description)
 {
-	perf_test_set_wr_fract(vm, (access == ACCESS_READ) ? INT_MAX : 1);
+	perf_test_set_write_percent(vm, (access == ACCESS_READ) ? 0 : 100);
 	iteration_work = ITERATION_ACCESS_MEMORY;
 	run_iteration(vm, vcpus, description);
 }
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index a89a620f50d4..dfa5957332b1 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -122,10 +122,10 @@  static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
 struct test_params {
 	unsigned long iterations;
 	uint64_t phys_offset;
-	int wr_fract;
 	bool partition_vcpu_memory_access;
 	enum vm_mem_backing_src_type backing_src;
 	int slots;
+	uint32_t write_percent;
 	uint32_t random_seed;
 };
 
@@ -224,7 +224,6 @@  static void run_test(enum vm_guest_mode mode, void *arg)
 	/* If no argument provided, random seed will be 0. */
 	pr_info("Random seed: %u\n", p->random_seed);
 	perf_test_set_random_seed(vm, p->random_seed);
-	perf_test_set_wr_fract(vm, p->wr_fract);
 
 	guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
 	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
@@ -249,6 +248,14 @@  static void run_test(enum vm_guest_mode mode, void *arg)
 	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
 		vcpu_last_completed_iteration[vcpu_id] = -1;
 
+	/*
+	 * Use 100% writes during the population phase to ensure all
+	 * memory is actually populated and not just mapped to the zero
+	 * page. The prevents expensive copy-on-write faults from
+	 * occurring during the dirty memory iterations below, which
+	 * would pollute the performance results.
+	 */
+	perf_test_set_write_percent(vm, 100);
 	perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
 
 	/* Allow the vCPUs to populate memory */
@@ -270,6 +277,8 @@  static void run_test(enum vm_guest_mode mode, void *arg)
 	pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
 		ts_diff.tv_sec, ts_diff.tv_nsec);
 
+	perf_test_set_write_percent(vm, p->write_percent);
+
 	while (iteration < p->iterations) {
 		/*
 		 * Incrementing the iteration number will start the vCPUs
@@ -342,7 +351,7 @@  static void help(char *name)
 	puts("");
 	printf("usage: %s [-h] [-i iterations] [-p offset] [-g] "
 	       "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-r random seed ] [-s mem type]"
-	       "[-x memslots]\n", name);
+	       "[-x memslots] [-w percentage]\n", name);
 	puts("");
 	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
 	       TEST_HOST_LOOP_N);
@@ -359,10 +368,6 @@  static void help(char *name)
 	printf(" -b: specify the size of the memory region which should be\n"
 	       "     dirtied by each vCPU. e.g. 10M or 3G.\n"
 	       "     (default: 1G)\n");
-	printf(" -f: specify the fraction of pages which should be written to\n"
-	       "     as opposed to simply read, in the form\n"
-	       "     1/<fraction of pages to write>.\n"
-	       "     (default: 1 i.e. all pages are written to.)\n");
 	printf(" -v: specify the number of vCPUs to run.\n");
 	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
 	       "     them into a separate region of memory for each vCPU.\n");
@@ -370,6 +375,11 @@  static void help(char *name)
 	backing_src_help("-s");
 	printf(" -x: Split the memory region into this number of memslots.\n"
 	       "     (default: 1)\n");
+	printf(" -w: specify the percentage of pages which should be written to\n"
+	       "     as an integer from 0-100 inclusive. This is probabalistic,\n"
+	       "     so -w X means each page has an X%% chance of writing\n"
+	       "     and a (100-X)%% chance of reading.\n"
+	       "     (default: 100 i.e. all pages are written to.)\n");
 	puts("");
 	exit(0);
 }
@@ -379,10 +389,10 @@  int main(int argc, char *argv[])
 	int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
 	struct test_params p = {
 		.iterations = TEST_HOST_LOOP_N,
-		.wr_fract = 1,
 		.partition_vcpu_memory_access = true,
 		.backing_src = DEFAULT_VM_MEM_SRC,
 		.slots = 1,
+		.write_percent = 100,
 	};
 	int opt;
 
@@ -393,7 +403,7 @@  int main(int argc, char *argv[])
 
 	guest_modes_append_default();
 
-	while ((opt = getopt(argc, argv, "ghi:p:m:nb:f:v:or:s:x:")) != -1) {
+	while ((opt = getopt(argc, argv, "ghi:p:m:nb:v:or:s:x:w:")) != -1) {
 		switch (opt) {
 		case 'g':
 			dirty_log_manual_caps = 0;
@@ -413,10 +423,11 @@  int main(int argc, char *argv[])
 		case 'b':
 			guest_percpu_mem_size = parse_size(optarg);
 			break;
-		case 'f':
-			p.wr_fract = atoi(optarg);
-			TEST_ASSERT(p.wr_fract >= 1,
-				    "Write fraction cannot be less than one");
+		case 'w':
+			p.write_percent = atoi(optarg);
+			TEST_ASSERT(p.write_percent >= 0
+				    && p.write_percent <= 100,
+				    "Write percentage must be between 0 and 100");
 			break;
 		case 'v':
 			nr_vcpus = atoi(optarg);
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index f18530984b42..f93f2ea7c6a3 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -35,7 +35,7 @@  struct perf_test_args {
 	uint64_t size;
 	uint64_t guest_page_size;
 	uint32_t random_seed;
-	int wr_fract;
+	uint32_t write_percent;
 
 	/* Run vCPUs in L2 instead of L1, if the architecture supports it. */
 	bool nested;
@@ -51,7 +51,7 @@  struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
 				   bool partition_vcpu_memory_access);
 void perf_test_destroy_vm(struct kvm_vm *vm);
 
-void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract);
+void perf_test_set_write_percent(struct kvm_vm *vm, uint32_t write_percent);
 void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed);
 
 void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index b1e731de0966..9effd229b75d 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -60,7 +60,7 @@  void perf_test_guest_code(uint32_t vcpu_id)
 			uint64_t addr = gva + (i * pta->guest_page_size);
 			guest_random(&rand);
 
-			if (i % pta->wr_fract == 0)
+			if (rand % 100 < pta->write_percent)
 				*(uint64_t *)addr = 0x0123456789ABCDEF;
 			else
 				READ_ONCE(*(uint64_t *)addr);
@@ -118,7 +118,7 @@  struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
 	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
 
 	/* By default vCPUs will write to memory. */
-	pta->wr_fract = 1;
+	pta->write_percent = 100;
 
 	/*
 	 * Snapshot the non-huge page size.  This is used by the guest code to
@@ -220,10 +220,10 @@  void perf_test_destroy_vm(struct kvm_vm *vm)
 	kvm_vm_free(vm);
 }
 
-void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract)
+void perf_test_set_write_percent(struct kvm_vm *vm, uint32_t write_percent)
 {
-	perf_test_args.wr_fract = wr_fract;
-	sync_global_to_guest(vm, perf_test_args);
+	perf_test_args.write_percent = write_percent;
+	sync_global_to_guest(vm, perf_test_args.write_percent);
 }
 
 void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed)