diff mbox series

[1/3] KVM: selftests: Add random table to randomize memory access

Message ID 20220810175830.2175089-2-coltonlewis@google.com (mailing list archive)
State New, archived
Headers show
Series KVM: selftests: Randomize memory access of dirty_log_perf_test | expand

Commit Message

Colton Lewis Aug. 10, 2022, 5:58 p.m. UTC
Linear access through all pages does not seem to replicate performance
problems with realistic dirty logging workloads. Make the test more
sophisticated through random access. Each vcpu has its own sequence of
random numbers that are refilled after every iteration. Having the
main thread fill the table for every vcpu is less efficient than
having each vcpu generate its own numbers, but this ensures threading
nondeterminism won't destroy reproducibility with a given random seed.

Signed-off-by: Colton Lewis <coltonlewis@google.com>
---
 .../selftests/kvm/dirty_log_perf_test.c       | 13 ++++-
 .../selftests/kvm/include/perf_test_util.h    |  4 ++
 .../selftests/kvm/lib/perf_test_util.c        | 47 +++++++++++++++++++
 3 files changed, 63 insertions(+), 1 deletion(-)

Comments

David Matlack Aug. 10, 2022, 11:18 p.m. UTC | #1
On Wed, Aug 10, 2022 at 05:58:28PM +0000, Colton Lewis wrote:
> Linear access through all pages does not seem to replicate performance

State what the patch does first, then the background/motivation.

> problems with realistic dirty logging workloads. Make the test more
> sophisticated through random access. Each vcpu has its own sequence of
> random numbers that are refilled after every iteration. Having the
> main thread fill the table for every vcpu is less efficient than
> having each vcpu generate its own numbers, but this ensures threading
> nondeterminism won't destroy reproducibility with a given random seed.

Make it clear what this patch does specifically. e.g. "Make the test
more sophisticated through random access" is a bit misleading since all
this patch does is create a table of random numbers.

> 
> Signed-off-by: Colton Lewis <coltonlewis@google.com>
> ---
>  .../selftests/kvm/dirty_log_perf_test.c       | 13 ++++-
>  .../selftests/kvm/include/perf_test_util.h    |  4 ++
>  .../selftests/kvm/lib/perf_test_util.c        | 47 +++++++++++++++++++
>  3 files changed, 63 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
> index f99e39a672d3..80a1cbe7fbb0 100644
> --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
> +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
> @@ -132,6 +132,7 @@ struct test_params {
>  	bool partition_vcpu_memory_access;
>  	enum vm_mem_backing_src_type backing_src;
>  	int slots;
> +	uint32_t random_seed;
>  };
>  
>  static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
> @@ -243,6 +244,10 @@ static void run_test(enum vm_guest_mode mode, void *arg)
>  	/* Start the iterations */
>  	iteration = 0;
>  	host_quit = false;
> +	srandom(p->random_seed);
> +	pr_info("Random seed: %d\n", p->random_seed);
> +	alloc_random_table(nr_vcpus, guest_percpu_mem_size >> vm->page_shift);
> +	fill_random_table(nr_vcpus, guest_percpu_mem_size >> vm->page_shift);

Drive the allocate and filling of the random table in perf_test_util.c
as part of VM setup, and also move random_seed to perf_test_args.

This will reduce the amount of code needed in the test to use
perf_test_util with random accesses.  dirty_log_perf_test is the only
test using random accesses right now, but I could see us wanting to use
it in demand_paging_test and access_tracking_perf_test in the near
future.

You can still have the test refresh the random table every iteration by
exporting e.g. perf_test_refresh_random_table() for use by tests.

>  
>  	clock_gettime(CLOCK_MONOTONIC, &start);
>  	for (i = 0; i < nr_vcpus; i++)
> @@ -270,6 +275,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
>  		ts_diff.tv_sec, ts_diff.tv_nsec);
>  
>  	while (iteration < p->iterations) {
> +		fill_random_table(nr_vcpus, guest_percpu_mem_size >> vm->page_shift);

I wonder if it would be better to use the same random access pattern
across iterations. One of the reasons to have multiple iterations is to
see how the guest performance changes as the memory moves through
different phases of dirty tracking. e.g. KVM might be splitting huge
pages during the first iteration but not the second. If the access
pattern is also changing across iterations that could make it harder to
identify performance changes due to KVM.

>  		/*
>  		 * Incrementing the iteration number will start the vCPUs
>  		 * dirtying memory again.
> @@ -380,6 +386,7 @@ static void help(char *name)
>  	printf(" -v: specify the number of vCPUs to run.\n");
>  	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
>  	       "     them into a separate region of memory for each vCPU.\n");
> +	printf(" -r: specify the starting random seed.\n");
>  	backing_src_help("-s");
>  	printf(" -x: Split the memory region into this number of memslots.\n"
>  	       "     (default: 1)\n");
> @@ -396,6 +403,7 @@ int main(int argc, char *argv[])
>  		.partition_vcpu_memory_access = true,
>  		.backing_src = DEFAULT_VM_MEM_SRC,
>  		.slots = 1,
> +		.random_seed = time(NULL),

Perhaps the default seed should be a hard-coded value so that users
running the test with default arguments get deterministic results across
runs.

>  	};
>  	int opt;
>  
> @@ -406,7 +414,7 @@ int main(int argc, char *argv[])
>  
>  	guest_modes_append_default();
>  
> -	while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:os:x:")) != -1) {
> +	while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:or:s:x:")) != -1) {
>  		switch (opt) {
>  		case 'e':
>  			/* 'e' is for evil. */
> @@ -442,6 +450,9 @@ int main(int argc, char *argv[])
>  		case 'o':
>  			p.partition_vcpu_memory_access = false;
>  			break;
> +		case 'r':
> +			p.random_seed = atoi(optarg);
> +			break;
>  		case 's':
>  			p.backing_src = parse_backing_src_type(optarg);
>  			break;
> diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
> index eaa88df0555a..597875d0c3db 100644
> --- a/tools/testing/selftests/kvm/include/perf_test_util.h
> +++ b/tools/testing/selftests/kvm/include/perf_test_util.h
> @@ -44,6 +44,10 @@ struct perf_test_args {
>  };
>  
>  extern struct perf_test_args perf_test_args;
> +extern uint32_t **random_table;

Adding random_table to perf_test_util.h is unnecessary in this commit
(it's only used in perf_test_util.c).

> +
> +void alloc_random_table(uint32_t nr_vcpus, uint32_t nr_randoms);
> +void fill_random_table(uint32_t nr_vcpus, uint32_t nr_randoms);

Use perf_test_ prefixes for symbols visible outside of perf_test_util.c.

e.g.

  perf_test_random_table
  perf_test_alloc_random_table()
  perf_test_fill_random_table()

>  
>  struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
>  				   uint64_t vcpu_memory_bytes, int slots,
> diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
> index 9618b37c66f7..b04e8d2c0f37 100644
> --- a/tools/testing/selftests/kvm/lib/perf_test_util.c
> +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
> @@ -9,6 +9,10 @@
>  #include "processor.h"
>  
>  struct perf_test_args perf_test_args;
> +/* This pointer points to guest memory and must be converted with
> + * addr_gva2hva to be accessed from the host.
> + */
> +uint32_t **random_table;

Use vm_vaddr_t for variables that contain guest virtual addresses
(exception within guest_code(), of course).

>  
>  /*
>   * Guest virtual memory offset of the testing memory slot.
> @@ -70,6 +74,49 @@ void perf_test_guest_code(uint32_t vcpu_idx)
>  	}
>  }
>  
> +void alloc_random_table(uint32_t nr_vcpus, uint32_t nr_randoms)
> +{
> +	struct perf_test_args *pta = &perf_test_args;
> +	uint32_t **host_random_table;
> +
> +	random_table = (uint32_t **)vm_vaddr_alloc(
> +		pta->vm,
> +		nr_vcpus * sizeof(uint32_t *),
> +		(vm_vaddr_t)0);

I notice vm_vaddr_alloc_pages() and vcpu_alloc_cpuid() use
KVM_UTIL_MIN_VADDR for the min. Should we use that here too?

If so, this is a good opporunity to rename vm_vaddr_alloc() to
__vm_vaddr_alloc() and introduce:

vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz)
{
        return __vm_vaddr_alloc(vm, sz, KVM_UTIL_MIN_VADDR);
}

> +	host_random_table = addr_gva2hva(pta->vm, (vm_vaddr_t)random_table);
> +	pr_debug("Random start addr: %p %p.\n", random_table, host_random_table);
> +
> +	for (uint32_t i = 0; i < nr_vcpus; i++) {
> +		host_random_table[i] = (uint32_t *)vm_vaddr_alloc(

The per-vCPU random table should go in perf_test_vcpu_args along with
all the other per-vCPU information that is set up by the test and
consumed by the guest code.

This will reduce some of the complexity here because you won't need to
allocate the top-level array of pointers.

> +			pta->vm,
> +			nr_randoms * sizeof(uint32_t),
> +			(vm_vaddr_t)0);
> +		pr_debug("Random row addr: %p %p.\n",
> +			 host_random_table[i],
> +			 addr_gva2hva(pta->vm, (vm_vaddr_t)host_random_table[i]));

Logging the host virtual addresses of the random table would probably
not be valuable. But logging the guest virtual address would probably be
more useful. The guest virtual address space management it pretty
ad-hoc.

> +	}
> +}
> +
> +void fill_random_table(uint32_t nr_vcpus, uint32_t nr_randoms)
> +{
> +	struct perf_test_args *pta = &perf_test_args;
> +	uint32_t **host_random_table = addr_gva2hva(pta->vm, (vm_vaddr_t)random_table);
> +	uint32_t *host_row;
> +
> +	pr_debug("Random start addr: %p %p.\n", random_table, host_random_table);
> +
> +	for (uint32_t i = 0; i < nr_vcpus; i++) {
> +		host_row = addr_gva2hva(pta->vm, (vm_vaddr_t)host_random_table[i]);
> +		pr_debug("Random row addr: %p %p.\n", host_random_table[i], host_row);
> +
> +		for (uint32_t j = 0; j < nr_randoms; j++)
> +			host_row[j] = random();
> +
> +		pr_debug("New randoms row %d: %d, %d, %d...\n",
> +			 i, host_row[0], host_row[1], host_row[2]);
> +	}
> +}
> +
>  void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus,
>  			   struct kvm_vcpu *vcpus[],
>  			   uint64_t vcpu_memory_bytes,
> -- 
> 2.37.1.559.g78731f0fdb-goog
>
David Matlack Aug. 10, 2022, 11:26 p.m. UTC | #2
On Wed, Aug 10, 2022 at 04:18:10PM -0700, David Matlack wrote:
> On Wed, Aug 10, 2022 at 05:58:28PM +0000, Colton Lewis wrote:
> > Linear access through all pages does not seem to replicate performance
> 
> State what the patch does first, then the background/motivation.
> 
> > problems with realistic dirty logging workloads. Make the test more
> > sophisticated through random access. Each vcpu has its own sequence of
> > random numbers that are refilled after every iteration. Having the
> > main thread fill the table for every vcpu is less efficient than
> > having each vcpu generate its own numbers, but this ensures threading
> > nondeterminism won't destroy reproducibility with a given random seed.
> 
> Make it clear what this patch does specifically. e.g. "Make the test
> more sophisticated through random access" is a bit misleading since all
> this patch does is create a table of random numbers.

Please also call out how this change affects memory usage of the test.

> 
> > 
> > Signed-off-by: Colton Lewis <coltonlewis@google.com>
> > ---
> >  .../selftests/kvm/dirty_log_perf_test.c       | 13 ++++-
> >  .../selftests/kvm/include/perf_test_util.h    |  4 ++
> >  .../selftests/kvm/lib/perf_test_util.c        | 47 +++++++++++++++++++
> >  3 files changed, 63 insertions(+), 1 deletion(-)
> > 
> > diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
> > index f99e39a672d3..80a1cbe7fbb0 100644
> > --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
> > +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
> > @@ -132,6 +132,7 @@ struct test_params {
> >  	bool partition_vcpu_memory_access;
> >  	enum vm_mem_backing_src_type backing_src;
> >  	int slots;
> > +	uint32_t random_seed;
> >  };
> >  
> >  static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
> > @@ -243,6 +244,10 @@ static void run_test(enum vm_guest_mode mode, void *arg)
> >  	/* Start the iterations */
> >  	iteration = 0;
> >  	host_quit = false;
> > +	srandom(p->random_seed);
> > +	pr_info("Random seed: %d\n", p->random_seed);
> > +	alloc_random_table(nr_vcpus, guest_percpu_mem_size >> vm->page_shift);
> > +	fill_random_table(nr_vcpus, guest_percpu_mem_size >> vm->page_shift);
> 
> Drive the allocate and filling of the random table in perf_test_util.c
> as part of VM setup, and also move random_seed to perf_test_args.
> 
> This will reduce the amount of code needed in the test to use
> perf_test_util with random accesses.  dirty_log_perf_test is the only
> test using random accesses right now, but I could see us wanting to use
> it in demand_paging_test and access_tracking_perf_test in the near
> future.
> 
> You can still have the test refresh the random table every iteration by
> exporting e.g. perf_test_refresh_random_table() for use by tests.
> 
> >  
> >  	clock_gettime(CLOCK_MONOTONIC, &start);
> >  	for (i = 0; i < nr_vcpus; i++)
> > @@ -270,6 +275,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
> >  		ts_diff.tv_sec, ts_diff.tv_nsec);
> >  
> >  	while (iteration < p->iterations) {
> > +		fill_random_table(nr_vcpus, guest_percpu_mem_size >> vm->page_shift);
> 
> I wonder if it would be better to use the same random access pattern
> across iterations. One of the reasons to have multiple iterations is to
> see how the guest performance changes as the memory moves through
> different phases of dirty tracking. e.g. KVM might be splitting huge
> pages during the first iteration but not the second. If the access
> pattern is also changing across iterations that could make it harder to
> identify performance changes due to KVM.
> 
> >  		/*
> >  		 * Incrementing the iteration number will start the vCPUs
> >  		 * dirtying memory again.
> > @@ -380,6 +386,7 @@ static void help(char *name)
> >  	printf(" -v: specify the number of vCPUs to run.\n");
> >  	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
> >  	       "     them into a separate region of memory for each vCPU.\n");
> > +	printf(" -r: specify the starting random seed.\n");
> >  	backing_src_help("-s");
> >  	printf(" -x: Split the memory region into this number of memslots.\n"
> >  	       "     (default: 1)\n");
> > @@ -396,6 +403,7 @@ int main(int argc, char *argv[])
> >  		.partition_vcpu_memory_access = true,
> >  		.backing_src = DEFAULT_VM_MEM_SRC,
> >  		.slots = 1,
> > +		.random_seed = time(NULL),
> 
> Perhaps the default seed should be a hard-coded value so that users
> running the test with default arguments get deterministic results across
> runs.
> 
> >  	};
> >  	int opt;
> >  
> > @@ -406,7 +414,7 @@ int main(int argc, char *argv[])
> >  
> >  	guest_modes_append_default();
> >  
> > -	while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:os:x:")) != -1) {
> > +	while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:or:s:x:")) != -1) {
> >  		switch (opt) {
> >  		case 'e':
> >  			/* 'e' is for evil. */
> > @@ -442,6 +450,9 @@ int main(int argc, char *argv[])
> >  		case 'o':
> >  			p.partition_vcpu_memory_access = false;
> >  			break;
> > +		case 'r':
> > +			p.random_seed = atoi(optarg);
> > +			break;
> >  		case 's':
> >  			p.backing_src = parse_backing_src_type(optarg);
> >  			break;
> > diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
> > index eaa88df0555a..597875d0c3db 100644
> > --- a/tools/testing/selftests/kvm/include/perf_test_util.h
> > +++ b/tools/testing/selftests/kvm/include/perf_test_util.h
> > @@ -44,6 +44,10 @@ struct perf_test_args {
> >  };
> >  
> >  extern struct perf_test_args perf_test_args;
> > +extern uint32_t **random_table;
> 
> Adding random_table to perf_test_util.h is unnecessary in this commit
> (it's only used in perf_test_util.c).
> 
> > +
> > +void alloc_random_table(uint32_t nr_vcpus, uint32_t nr_randoms);
> > +void fill_random_table(uint32_t nr_vcpus, uint32_t nr_randoms);
> 
> Use perf_test_ prefixes for symbols visible outside of perf_test_util.c.
> 
> e.g.
> 
>   perf_test_random_table
>   perf_test_alloc_random_table()
>   perf_test_fill_random_table()
> 
> >  
> >  struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
> >  				   uint64_t vcpu_memory_bytes, int slots,
> > diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
> > index 9618b37c66f7..b04e8d2c0f37 100644
> > --- a/tools/testing/selftests/kvm/lib/perf_test_util.c
> > +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
> > @@ -9,6 +9,10 @@
> >  #include "processor.h"
> >  
> >  struct perf_test_args perf_test_args;
> > +/* This pointer points to guest memory and must be converted with
> > + * addr_gva2hva to be accessed from the host.
> > + */
> > +uint32_t **random_table;
> 
> Use vm_vaddr_t for variables that contain guest virtual addresses
> (exception within guest_code(), of course).
> 
> >  
> >  /*
> >   * Guest virtual memory offset of the testing memory slot.
> > @@ -70,6 +74,49 @@ void perf_test_guest_code(uint32_t vcpu_idx)
> >  	}
> >  }
> >  
> > +void alloc_random_table(uint32_t nr_vcpus, uint32_t nr_randoms)
> > +{
> > +	struct perf_test_args *pta = &perf_test_args;
> > +	uint32_t **host_random_table;
> > +
> > +	random_table = (uint32_t **)vm_vaddr_alloc(
> > +		pta->vm,
> > +		nr_vcpus * sizeof(uint32_t *),
> > +		(vm_vaddr_t)0);
> 
> I notice vm_vaddr_alloc_pages() and vcpu_alloc_cpuid() use
> KVM_UTIL_MIN_VADDR for the min. Should we use that here too?
> 
> If so, this is a good opporunity to rename vm_vaddr_alloc() to
> __vm_vaddr_alloc() and introduce:
> 
> vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz)
> {
>         return __vm_vaddr_alloc(vm, sz, KVM_UTIL_MIN_VADDR);
> }
> 
> > +	host_random_table = addr_gva2hva(pta->vm, (vm_vaddr_t)random_table);
> > +	pr_debug("Random start addr: %p %p.\n", random_table, host_random_table);
> > +
> > +	for (uint32_t i = 0; i < nr_vcpus; i++) {
> > +		host_random_table[i] = (uint32_t *)vm_vaddr_alloc(
> 
> The per-vCPU random table should go in perf_test_vcpu_args along with
> all the other per-vCPU information that is set up by the test and
> consumed by the guest code.
> 
> This will reduce some of the complexity here because you won't need to
> allocate the top-level array of pointers.
> 
> > +			pta->vm,
> > +			nr_randoms * sizeof(uint32_t),
> > +			(vm_vaddr_t)0);
> > +		pr_debug("Random row addr: %p %p.\n",
> > +			 host_random_table[i],
> > +			 addr_gva2hva(pta->vm, (vm_vaddr_t)host_random_table[i]));
> 
> Logging the host virtual addresses of the random table would probably
> not be valuable. But logging the guest virtual address would probably be
> more useful. The guest virtual address space management it pretty
> ad-hoc.
> 
> > +	}
> > +}
> > +
> > +void fill_random_table(uint32_t nr_vcpus, uint32_t nr_randoms)
> > +{
> > +	struct perf_test_args *pta = &perf_test_args;
> > +	uint32_t **host_random_table = addr_gva2hva(pta->vm, (vm_vaddr_t)random_table);
> > +	uint32_t *host_row;
> > +
> > +	pr_debug("Random start addr: %p %p.\n", random_table, host_random_table);
> > +
> > +	for (uint32_t i = 0; i < nr_vcpus; i++) {
> > +		host_row = addr_gva2hva(pta->vm, (vm_vaddr_t)host_random_table[i]);
> > +		pr_debug("Random row addr: %p %p.\n", host_random_table[i], host_row);
> > +
> > +		for (uint32_t j = 0; j < nr_randoms; j++)
> > +			host_row[j] = random();
> > +
> > +		pr_debug("New randoms row %d: %d, %d, %d...\n",
> > +			 i, host_row[0], host_row[1], host_row[2]);
> > +	}
> > +}
> > +
> >  void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus,
> >  			   struct kvm_vcpu *vcpus[],
> >  			   uint64_t vcpu_memory_bytes,
> > -- 
> > 2.37.1.559.g78731f0fdb-goog
> >
Colton Lewis Aug. 12, 2022, 4:09 p.m. UTC | #3
On Wed, Aug 10, 2022 at 04:26:39PM -0700, David Matlack wrote:
> On Wed, Aug 10, 2022 at 04:18:10PM -0700, David Matlack wrote:
> > On Wed, Aug 10, 2022 at 05:58:28PM +0000, Colton Lewis wrote:
> > > Linear access through all pages does not seem to replicate performance
> > 
> > State what the patch does first, then the background/motivation.

Will do.

> > 
> > > problems with realistic dirty logging workloads. Make the test more
> > > sophisticated through random access. Each vcpu has its own sequence of
> > > random numbers that are refilled after every iteration. Having the
> > > main thread fill the table for every vcpu is less efficient than
> > > having each vcpu generate its own numbers, but this ensures threading
> > > nondeterminism won't destroy reproducibility with a given random seed.
> > 
> > Make it clear what this patch does specifically. e.g. "Make the test
> > more sophisticated through random access" is a bit misleading since all
> > this patch does is create a table of random numbers.
> 
> Please also call out how this change affects memory usage of the test.

Yes that seems important to mention.

> 
> > 
> > > 
> > > Signed-off-by: Colton Lewis <coltonlewis@google.com>
> > > ---
> > >  .../selftests/kvm/dirty_log_perf_test.c       | 13 ++++-
> > >  .../selftests/kvm/include/perf_test_util.h    |  4 ++
> > >  .../selftests/kvm/lib/perf_test_util.c        | 47 +++++++++++++++++++
> > >  3 files changed, 63 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
> > > index f99e39a672d3..80a1cbe7fbb0 100644
> > > --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
> > > +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
> > > @@ -132,6 +132,7 @@ struct test_params {
> > >  	bool partition_vcpu_memory_access;
> > >  	enum vm_mem_backing_src_type backing_src;
> > >  	int slots;
> > > +	uint32_t random_seed;
> > >  };
> > >  
> > >  static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
> > > @@ -243,6 +244,10 @@ static void run_test(enum vm_guest_mode mode, void *arg)
> > >  	/* Start the iterations */
> > >  	iteration = 0;
> > >  	host_quit = false;
> > > +	srandom(p->random_seed);
> > > +	pr_info("Random seed: %d\n", p->random_seed);
> > > +	alloc_random_table(nr_vcpus, guest_percpu_mem_size >> vm->page_shift);
> > > +	fill_random_table(nr_vcpus, guest_percpu_mem_size >> vm->page_shift);
> > 
> > Drive the allocate and filling of the random table in perf_test_util.c
> > as part of VM setup, and also move random_seed to perf_test_args.
> > 
> > This will reduce the amount of code needed in the test to use
> > perf_test_util with random accesses.  dirty_log_perf_test is the only
> > test using random accesses right now, but I could see us wanting to use
> > it in demand_paging_test and access_tracking_perf_test in the near
> > future.
> > 
> > You can still have the test refresh the random table every iteration by
> > exporting e.g. perf_test_refresh_random_table() for use by tests.

Will do.

> > 
> > >  
> > >  	clock_gettime(CLOCK_MONOTONIC, &start);
> > >  	for (i = 0; i < nr_vcpus; i++)
> > > @@ -270,6 +275,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
> > >  		ts_diff.tv_sec, ts_diff.tv_nsec);
> > >  
> > >  	while (iteration < p->iterations) {
> > > +		fill_random_table(nr_vcpus, guest_percpu_mem_size >> vm->page_shift);
> > 
> > I wonder if it would be better to use the same random access pattern
> > across iterations. One of the reasons to have multiple iterations is to
> > see how the guest performance changes as the memory moves through
> > different phases of dirty tracking. e.g. KVM might be splitting huge
> > pages during the first iteration but not the second. If the access
> > pattern is also changing across iterations that could make it harder to
> > identify performance changes due to KVM.

I hadn't thought about it like that. I agree and it means we don't
have to refresh the random table every iteration.

> > 
> > >  		/*
> > >  		 * Incrementing the iteration number will start the vCPUs
> > >  		 * dirtying memory again.
> > > @@ -380,6 +386,7 @@ static void help(char *name)
> > >  	printf(" -v: specify the number of vCPUs to run.\n");
> > >  	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
> > >  	       "     them into a separate region of memory for each vCPU.\n");
> > > +	printf(" -r: specify the starting random seed.\n");
> > >  	backing_src_help("-s");
> > >  	printf(" -x: Split the memory region into this number of memslots.\n"
> > >  	       "     (default: 1)\n");
> > > @@ -396,6 +403,7 @@ int main(int argc, char *argv[])
> > >  		.partition_vcpu_memory_access = true,
> > >  		.backing_src = DEFAULT_VM_MEM_SRC,
> > >  		.slots = 1,
> > > +		.random_seed = time(NULL),
> > 
> > Perhaps the default seed should be a hard-coded value so that users
> > running the test with default arguments get deterministic results across
> > runs.

I don't think that's a good idea. Always using the same random seed
every time seems to negate the point of making it random. If people
want deterministic results across runs, they can always set the random
seed themselves.

> > 
> > >  	};
> > >  	int opt;
> > >  
> > > @@ -406,7 +414,7 @@ int main(int argc, char *argv[])
> > >  
> > >  	guest_modes_append_default();
> > >  
> > > -	while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:os:x:")) != -1) {
> > > +	while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:or:s:x:")) != -1) {
> > >  		switch (opt) {
> > >  		case 'e':
> > >  			/* 'e' is for evil. */
> > > @@ -442,6 +450,9 @@ int main(int argc, char *argv[])
> > >  		case 'o':
> > >  			p.partition_vcpu_memory_access = false;
> > >  			break;
> > > +		case 'r':
> > > +			p.random_seed = atoi(optarg);
> > > +			break;
> > >  		case 's':
> > >  			p.backing_src = parse_backing_src_type(optarg);
> > >  			break;
> > > diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
> > > index eaa88df0555a..597875d0c3db 100644
> > > --- a/tools/testing/selftests/kvm/include/perf_test_util.h
> > > +++ b/tools/testing/selftests/kvm/include/perf_test_util.h
> > > @@ -44,6 +44,10 @@ struct perf_test_args {
> > >  };
> > >  
> > >  extern struct perf_test_args perf_test_args;
> > > +extern uint32_t **random_table;
> > 
> > Adding random_table to perf_test_util.h is unnecessary in this commit
> > (it's only used in perf_test_util.c).
> >

Will move this to the second commit.

> > > +
> > > +void alloc_random_table(uint32_t nr_vcpus, uint32_t nr_randoms);
> > > +void fill_random_table(uint32_t nr_vcpus, uint32_t nr_randoms);
> > 
> > Use perf_test_ prefixes for symbols visible outside of perf_test_util.c.
> > 
> > e.g.
> > 
> >   perf_test_random_table
> >   perf_test_alloc_random_table()
> >   perf_test_fill_random_table()
> > 

Will do.

> > >  
> > >  struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
> > >  				   uint64_t vcpu_memory_bytes, int slots,
> > > diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
> > > index 9618b37c66f7..b04e8d2c0f37 100644
> > > --- a/tools/testing/selftests/kvm/lib/perf_test_util.c
> > > +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
> > > @@ -9,6 +9,10 @@
> > >  #include "processor.h"
> > >  
> > >  struct perf_test_args perf_test_args;
> > > +/* This pointer points to guest memory and must be converted with
> > > + * addr_gva2hva to be accessed from the host.
> > > + */
> > > +uint32_t **random_table;
> > 
> > Use vm_vaddr_t for variables that contain guest virtual addresses
> > (exception within guest_code(), of course).

Will do.

> > 
> > >  
> > >  /*
> > >   * Guest virtual memory offset of the testing memory slot.
> > > @@ -70,6 +74,49 @@ void perf_test_guest_code(uint32_t vcpu_idx)
> > >  	}
> > >  }
> > >  
> > > +void alloc_random_table(uint32_t nr_vcpus, uint32_t nr_randoms)
> > > +{
> > > +	struct perf_test_args *pta = &perf_test_args;
> > > +	uint32_t **host_random_table;
> > > +
> > > +	random_table = (uint32_t **)vm_vaddr_alloc(
> > > +		pta->vm,
> > > +		nr_vcpus * sizeof(uint32_t *),
> > > +		(vm_vaddr_t)0);
> > 
> > I notice vm_vaddr_alloc_pages() and vcpu_alloc_cpuid() use
> > KVM_UTIL_MIN_VADDR for the min. Should we use that here too?

I was curious so I looked at the history of KVM_UTIL_MIN_VADDR to see
if there's a reason for its value. According to Sean in 106a2e76, it's
arbitrary.

But I'll still use it for consistency's sake.

> > 
> > If so, this is a good opporunity to rename vm_vaddr_alloc() to
> > __vm_vaddr_alloc() and introduce:
> > 
> > vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz)
> > {
> >         return __vm_vaddr_alloc(vm, sz, KVM_UTIL_MIN_VADDR);
> > }
> > 

vm_vaddr_alloc() is used with a different value for that argument in
several places. Changing those calls is beyond scope for this patch.

> > > +	host_random_table = addr_gva2hva(pta->vm, (vm_vaddr_t)random_table);
> > > +	pr_debug("Random start addr: %p %p.\n", random_table, host_random_table);
> > > +
> > > +	for (uint32_t i = 0; i < nr_vcpus; i++) {
> > > +		host_random_table[i] = (uint32_t *)vm_vaddr_alloc(
> > 
> > The per-vCPU random table should go in perf_test_vcpu_args along with
> > all the other per-vCPU information that is set up by the test and
> > consumed by the guest code.
> > 
> > This will reduce some of the complexity here because you won't need to
> > allocate the top-level array of pointers.
> >

Good idea. Will do.

> > > +			pta->vm,
> > > +			nr_randoms * sizeof(uint32_t),
> > > +			(vm_vaddr_t)0);
> > > +		pr_debug("Random row addr: %p %p.\n",
> > > +			 host_random_table[i],
> > > +			 addr_gva2hva(pta->vm, (vm_vaddr_t)host_random_table[i]));
> > 
> > Logging the host virtual addresses of the random table would probably
> > not be valuable. But logging the guest virtual address would probably be
> > more useful. The guest virtual address space management it pretty
> > ad-hoc.
> >

Will do. I added that to make sure the random numbers were different
every iteration, but it is probably not useful.
diff mbox series

Patch

diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index f99e39a672d3..80a1cbe7fbb0 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -132,6 +132,7 @@  struct test_params {
 	bool partition_vcpu_memory_access;
 	enum vm_mem_backing_src_type backing_src;
 	int slots;
+	uint32_t random_seed;
 };
 
 static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
@@ -243,6 +244,10 @@  static void run_test(enum vm_guest_mode mode, void *arg)
 	/* Start the iterations */
 	iteration = 0;
 	host_quit = false;
+	srandom(p->random_seed);
+	pr_info("Random seed: %d\n", p->random_seed);
+	alloc_random_table(nr_vcpus, guest_percpu_mem_size >> vm->page_shift);
+	fill_random_table(nr_vcpus, guest_percpu_mem_size >> vm->page_shift);
 
 	clock_gettime(CLOCK_MONOTONIC, &start);
 	for (i = 0; i < nr_vcpus; i++)
@@ -270,6 +275,7 @@  static void run_test(enum vm_guest_mode mode, void *arg)
 		ts_diff.tv_sec, ts_diff.tv_nsec);
 
 	while (iteration < p->iterations) {
+		fill_random_table(nr_vcpus, guest_percpu_mem_size >> vm->page_shift);
 		/*
 		 * Incrementing the iteration number will start the vCPUs
 		 * dirtying memory again.
@@ -380,6 +386,7 @@  static void help(char *name)
 	printf(" -v: specify the number of vCPUs to run.\n");
 	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
 	       "     them into a separate region of memory for each vCPU.\n");
+	printf(" -r: specify the starting random seed.\n");
 	backing_src_help("-s");
 	printf(" -x: Split the memory region into this number of memslots.\n"
 	       "     (default: 1)\n");
@@ -396,6 +403,7 @@  int main(int argc, char *argv[])
 		.partition_vcpu_memory_access = true,
 		.backing_src = DEFAULT_VM_MEM_SRC,
 		.slots = 1,
+		.random_seed = time(NULL),
 	};
 	int opt;
 
@@ -406,7 +414,7 @@  int main(int argc, char *argv[])
 
 	guest_modes_append_default();
 
-	while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:os:x:")) != -1) {
+	while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:or:s:x:")) != -1) {
 		switch (opt) {
 		case 'e':
 			/* 'e' is for evil. */
@@ -442,6 +450,9 @@  int main(int argc, char *argv[])
 		case 'o':
 			p.partition_vcpu_memory_access = false;
 			break;
+		case 'r':
+			p.random_seed = atoi(optarg);
+			break;
 		case 's':
 			p.backing_src = parse_backing_src_type(optarg);
 			break;
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index eaa88df0555a..597875d0c3db 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -44,6 +44,10 @@  struct perf_test_args {
 };
 
 extern struct perf_test_args perf_test_args;
+extern uint32_t **random_table;
+
+void alloc_random_table(uint32_t nr_vcpus, uint32_t nr_randoms);
+void fill_random_table(uint32_t nr_vcpus, uint32_t nr_randoms);
 
 struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 				   uint64_t vcpu_memory_bytes, int slots,
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 9618b37c66f7..b04e8d2c0f37 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -9,6 +9,10 @@ 
 #include "processor.h"
 
 struct perf_test_args perf_test_args;
+/* This pointer points to guest memory and must be converted with
+ * addr_gva2hva to be accessed from the host.
+ */
+uint32_t **random_table;
 
 /*
  * Guest virtual memory offset of the testing memory slot.
@@ -70,6 +74,49 @@  void perf_test_guest_code(uint32_t vcpu_idx)
 	}
 }
 
+void alloc_random_table(uint32_t nr_vcpus, uint32_t nr_randoms)
+{
+	struct perf_test_args *pta = &perf_test_args;
+	uint32_t **host_random_table;
+
+	random_table = (uint32_t **)vm_vaddr_alloc(
+		pta->vm,
+		nr_vcpus * sizeof(uint32_t *),
+		(vm_vaddr_t)0);
+	host_random_table = addr_gva2hva(pta->vm, (vm_vaddr_t)random_table);
+	pr_debug("Random start addr: %p %p.\n", random_table, host_random_table);
+
+	for (uint32_t i = 0; i < nr_vcpus; i++) {
+		host_random_table[i] = (uint32_t *)vm_vaddr_alloc(
+			pta->vm,
+			nr_randoms * sizeof(uint32_t),
+			(vm_vaddr_t)0);
+		pr_debug("Random row addr: %p %p.\n",
+			 host_random_table[i],
+			 addr_gva2hva(pta->vm, (vm_vaddr_t)host_random_table[i]));
+	}
+}
+
+void fill_random_table(uint32_t nr_vcpus, uint32_t nr_randoms)
+{
+	struct perf_test_args *pta = &perf_test_args;
+	uint32_t **host_random_table = addr_gva2hva(pta->vm, (vm_vaddr_t)random_table);
+	uint32_t *host_row;
+
+	pr_debug("Random start addr: %p %p.\n", random_table, host_random_table);
+
+	for (uint32_t i = 0; i < nr_vcpus; i++) {
+		host_row = addr_gva2hva(pta->vm, (vm_vaddr_t)host_random_table[i]);
+		pr_debug("Random row addr: %p %p.\n", host_random_table[i], host_row);
+
+		for (uint32_t j = 0; j < nr_randoms; j++)
+			host_row[j] = random();
+
+		pr_debug("New randoms row %d: %d, %d, %d...\n",
+			 i, host_row[0], host_row[1], host_row[2]);
+	}
+}
+
 void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus,
 			   struct kvm_vcpu *vcpus[],
 			   uint64_t vcpu_memory_bytes,