Message ID | 20220306220849.215358-4-shivam.kumar1@nutanix.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: Dirty quota-based throttling | expand |
On 07/03/22 3:38 am, Shivam Kumar wrote: > Add selftests for dirty quota throttling with an optional -d parameter > to configure by what value dirty quota should be incremented after > each dirty quota exit. With very small intervals, a smaller value of > dirty quota can ensure that the dirty quota exit code is tested. A zero > value disables dirty quota throttling and thus dirty logging, without > dirty quota throttling, can be tested. > > Suggested-by: Shaju Abraham <shaju.abraham@nutanix.com> > Suggested-by: Manish Mishra <manish.mishra@nutanix.com> > Co-developed-by: Anurag Madnawat <anurag.madnawat@nutanix.com> > Signed-off-by: Anurag Madnawat <anurag.madnawat@nutanix.com> > Signed-off-by: Shivam Kumar <shivam.kumar1@nutanix.com> > --- > tools/testing/selftests/kvm/dirty_log_test.c | 37 +++++++++++++++++-- > .../selftests/kvm/include/kvm_util_base.h | 4 ++ > tools/testing/selftests/kvm/lib/kvm_util.c | 36 ++++++++++++++++++ > 3 files changed, 73 insertions(+), 4 deletions(-) > > diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c > index 3fcd89e195c7..e75d826e21fb 100644 > --- a/tools/testing/selftests/kvm/dirty_log_test.c > +++ b/tools/testing/selftests/kvm/dirty_log_test.c > @@ -65,6 +65,8 @@ > > #define SIG_IPI SIGUSR1 > > +#define TEST_DIRTY_QUOTA_INCREMENT 8 > + > /* > * Guest/Host shared variables. Ensure addr_gva2hva() and/or > * sync_global_to/from_guest() are used when accessing from > @@ -191,6 +193,7 @@ static enum log_mode_t host_log_mode_option = LOG_MODE_ALL; > static enum log_mode_t host_log_mode; > static pthread_t vcpu_thread; > static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT; > +static uint64_t test_dirty_quota_increment = TEST_DIRTY_QUOTA_INCREMENT; > > static void vcpu_kick(void) > { > @@ -210,6 +213,13 @@ static void sem_wait_until(sem_t *sem) > while (ret == -1 && errno == EINTR); > } > > +static void set_dirty_quota(struct kvm_vm *vm, uint64_t dirty_quota) > +{ > + struct kvm_run *run = vcpu_state(vm, VCPU_ID); > + > + vcpu_set_dirty_quota(run, dirty_quota); > +} > + > static bool clear_log_supported(void) > { > return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); > @@ -260,9 +270,13 @@ static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err) > TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR), > "vcpu run failed: errno=%d", err); > > - TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, > - "Invalid guest sync status: exit_reason=%s\n", > - exit_reason_str(run->exit_reason)); > + if (test_dirty_quota_increment && > + run->exit_reason == KVM_EXIT_DIRTY_QUOTA_EXHAUSTED) > + vcpu_handle_dirty_quota_exit(run, test_dirty_quota_increment); > + else > + TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, > + "Invalid guest sync status: exit_reason=%s\n", > + exit_reason_str(run->exit_reason)); > > vcpu_handle_sync_stop(); > } > @@ -377,6 +391,9 @@ static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err) > if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) { > /* We should allow this to continue */ > ; > + } else if (test_dirty_quota_increment && > + run->exit_reason == KVM_EXIT_DIRTY_QUOTA_EXHAUSTED) { > + vcpu_handle_dirty_quota_exit(run, test_dirty_quota_increment); > } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL || > (ret == -1 && err == EINTR)) { > /* Update the flag first before pause */ > @@ -773,6 +790,10 @@ static void run_test(enum vm_guest_mode mode, void *arg) > sync_global_to_guest(vm, guest_test_virt_mem); > sync_global_to_guest(vm, guest_num_pages); > > + /* Initialise dirty quota */ > + if (test_dirty_quota_increment) > + set_dirty_quota(vm, test_dirty_quota_increment); > + > /* Start the iterations */ > iteration = 1; > sync_global_to_guest(vm, iteration); > @@ -814,6 +835,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) > /* Tell the vcpu thread to quit */ > host_quit = true; > log_mode_before_vcpu_join(); > + /* Terminate dirty quota throttling */ > + if (test_dirty_quota_increment) > + set_dirty_quota(vm, 0); > pthread_join(vcpu_thread, NULL); > > pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " > @@ -835,6 +859,8 @@ static void help(char *name) > printf(" -c: specify dirty ring size, in number of entries\n"); > printf(" (only useful for dirty-ring test; default: %"PRIu32")\n", > TEST_DIRTY_RING_COUNT); > + printf(" -q: specify incemental dirty quota (default: %"PRIu32")\n", > + TEST_DIRTY_QUOTA_INCREMENT); > printf(" -i: specify iteration counts (default: %"PRIu64")\n", > TEST_HOST_LOOP_N); > printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n", > @@ -863,11 +889,14 @@ int main(int argc, char *argv[]) > > guest_modes_append_default(); > > - while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) { > + while ((opt = getopt(argc, argv, "c:q:hi:I:p:m:M:")) != -1) { > switch (opt) { > case 'c': > test_dirty_ring_count = strtol(optarg, NULL, 10); > break; > + case 'q': > + test_dirty_quota_increment = strtol(optarg, NULL, 10); > + break; > case 'i': > p.iterations = strtol(optarg, NULL, 10); > break; > diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h > index 4ed6aa049a91..b70732998329 100644 > --- a/tools/testing/selftests/kvm/include/kvm_util_base.h > +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h > @@ -395,4 +395,8 @@ int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid); > > uint32_t guest_get_vcpuid(void); > > +void vcpu_set_dirty_quota(struct kvm_run *run, uint64_t dirty_quota); > +void vcpu_handle_dirty_quota_exit(struct kvm_run *run, > + uint64_t test_dirty_quota_increment); > + > #endif /* SELFTEST_KVM_UTIL_BASE_H */ > diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c > index d8cf851ab119..fa77558d745e 100644 > --- a/tools/testing/selftests/kvm/lib/kvm_util.c > +++ b/tools/testing/selftests/kvm/lib/kvm_util.c > @@ -19,6 +19,7 @@ > #include <linux/kernel.h> > > #define KVM_UTIL_MIN_PFN 2 > +#define PML_BUFFER_SIZE 512 > > static int vcpu_mmap_sz(void); > > @@ -2286,6 +2287,7 @@ static struct exit_reason { > {KVM_EXIT_X86_RDMSR, "RDMSR"}, > {KVM_EXIT_X86_WRMSR, "WRMSR"}, > {KVM_EXIT_XEN, "XEN"}, > + {KVM_EXIT_DIRTY_QUOTA_EXHAUSTED, "DIRTY_QUOTA_EXHAUSTED"}, > #ifdef KVM_EXIT_MEMORY_NOT_PRESENT > {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, > #endif > @@ -2517,3 +2519,37 @@ int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid) > > return ioctl(vcpu->fd, KVM_GET_STATS_FD, NULL); > } > + > +void vcpu_set_dirty_quota(struct kvm_run *run, uint64_t dirty_quota) > +{ > + run->dirty_quota = dirty_quota; > + > + if (dirty_quota) > + pr_info("Dirty quota throttling enabled with initial quota %"PRIu64"\n", > + dirty_quota); > + else > + pr_info("Dirty quota throttling disabled\n"); > +} > + > +void vcpu_handle_dirty_quota_exit(struct kvm_run *run, > + uint64_t test_dirty_quota_increment) > +{ > + uint64_t quota = run->dirty_quota_exit.quota; > + uint64_t count = run->dirty_quota_exit.count; > + > + /* > + * Due to PML, number of pages dirtied by the vcpu can exceed its dirty > + * quota by PML buffer size. > + */ > + TEST_ASSERT(count <= quota + PML_BUFFER_SIZE, "Invalid number of pages > + dirtied: count=%"PRIu64", quota=%"PRIu64"\n", count, quota); > + > + TEST_ASSERT(count >= quota, "Dirty quota exit happened with quota yet to > + be exhausted: count=%"PRIu64", quota=%"PRIu64"\n", count, quota); > + > + if (count > quota) > + pr_info("Dirty quota exit with unequal quota and count: > + count=%"PRIu64", quota=%"PRIu64"\n", count, quota); > + > + run->dirty_quota = count + test_dirty_quota_increment; > +} I'll be grateful if I could get some reviews on this patch. Will help me move forward. Thanks.
On 18/04/22 10:25 am, Shivam Kumar wrote: > > On 07/03/22 3:38 am, Shivam Kumar wrote: >> Add selftests for dirty quota throttling with an optional -d parameter >> to configure by what value dirty quota should be incremented after >> each dirty quota exit. With very small intervals, a smaller value of >> dirty quota can ensure that the dirty quota exit code is tested. A zero >> value disables dirty quota throttling and thus dirty logging, without >> dirty quota throttling, can be tested. >> >> Suggested-by: Shaju Abraham <shaju.abraham@nutanix.com> >> Suggested-by: Manish Mishra <manish.mishra@nutanix.com> >> Co-developed-by: Anurag Madnawat <anurag.madnawat@nutanix.com> >> Signed-off-by: Anurag Madnawat <anurag.madnawat@nutanix.com> >> Signed-off-by: Shivam Kumar <shivam.kumar1@nutanix.com> >> --- >> tools/testing/selftests/kvm/dirty_log_test.c | 37 +++++++++++++++++-- >> .../selftests/kvm/include/kvm_util_base.h | 4 ++ >> tools/testing/selftests/kvm/lib/kvm_util.c | 36 ++++++++++++++++++ >> 3 files changed, 73 insertions(+), 4 deletions(-) >> >> diff --git a/tools/testing/selftests/kvm/dirty_log_test.c >> b/tools/testing/selftests/kvm/dirty_log_test.c >> index 3fcd89e195c7..e75d826e21fb 100644 >> --- a/tools/testing/selftests/kvm/dirty_log_test.c >> +++ b/tools/testing/selftests/kvm/dirty_log_test.c >> @@ -65,6 +65,8 @@ >> #define SIG_IPI SIGUSR1 >> +#define TEST_DIRTY_QUOTA_INCREMENT 8 >> + >> /* >> * Guest/Host shared variables. Ensure addr_gva2hva() and/or >> * sync_global_to/from_guest() are used when accessing from >> @@ -191,6 +193,7 @@ static enum log_mode_t host_log_mode_option = >> LOG_MODE_ALL; >> static enum log_mode_t host_log_mode; >> static pthread_t vcpu_thread; >> static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT; >> +static uint64_t test_dirty_quota_increment = >> TEST_DIRTY_QUOTA_INCREMENT; >> static void vcpu_kick(void) >> { >> @@ -210,6 +213,13 @@ static void sem_wait_until(sem_t *sem) >> while (ret == -1 && errno == EINTR); >> } >> +static void set_dirty_quota(struct kvm_vm *vm, uint64_t dirty_quota) >> +{ >> + struct kvm_run *run = vcpu_state(vm, VCPU_ID); >> + >> + vcpu_set_dirty_quota(run, dirty_quota); >> +} >> + >> static bool clear_log_supported(void) >> { >> return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); >> @@ -260,9 +270,13 @@ static void default_after_vcpu_run(struct kvm_vm >> *vm, int ret, int err) >> TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR), >> "vcpu run failed: errno=%d", err); >> - TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, >> - "Invalid guest sync status: exit_reason=%s\n", >> - exit_reason_str(run->exit_reason)); >> + if (test_dirty_quota_increment && >> + run->exit_reason == KVM_EXIT_DIRTY_QUOTA_EXHAUSTED) >> + vcpu_handle_dirty_quota_exit(run, test_dirty_quota_increment); >> + else >> + TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, >> + "Invalid guest sync status: exit_reason=%s\n", >> + exit_reason_str(run->exit_reason)); >> vcpu_handle_sync_stop(); >> } >> @@ -377,6 +391,9 @@ static void dirty_ring_after_vcpu_run(struct >> kvm_vm *vm, int ret, int err) >> if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) { >> /* We should allow this to continue */ >> ; >> + } else if (test_dirty_quota_increment && >> + run->exit_reason == KVM_EXIT_DIRTY_QUOTA_EXHAUSTED) { >> + vcpu_handle_dirty_quota_exit(run, test_dirty_quota_increment); >> } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL || >> (ret == -1 && err == EINTR)) { >> /* Update the flag first before pause */ >> @@ -773,6 +790,10 @@ static void run_test(enum vm_guest_mode mode, >> void *arg) >> sync_global_to_guest(vm, guest_test_virt_mem); >> sync_global_to_guest(vm, guest_num_pages); >> + /* Initialise dirty quota */ >> + if (test_dirty_quota_increment) >> + set_dirty_quota(vm, test_dirty_quota_increment); >> + >> /* Start the iterations */ >> iteration = 1; >> sync_global_to_guest(vm, iteration); >> @@ -814,6 +835,9 @@ static void run_test(enum vm_guest_mode mode, >> void *arg) >> /* Tell the vcpu thread to quit */ >> host_quit = true; >> log_mode_before_vcpu_join(); >> + /* Terminate dirty quota throttling */ >> + if (test_dirty_quota_increment) >> + set_dirty_quota(vm, 0); >> pthread_join(vcpu_thread, NULL); >> pr_info("Total bits checked: dirty (%"PRIu64"), clear >> (%"PRIu64"), " >> @@ -835,6 +859,8 @@ static void help(char *name) >> printf(" -c: specify dirty ring size, in number of entries\n"); >> printf(" (only useful for dirty-ring test; default: >> %"PRIu32")\n", >> TEST_DIRTY_RING_COUNT); >> + printf(" -q: specify incemental dirty quota (default: >> %"PRIu32")\n", >> + TEST_DIRTY_QUOTA_INCREMENT); >> printf(" -i: specify iteration counts (default: %"PRIu64")\n", >> TEST_HOST_LOOP_N); >> printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n", >> @@ -863,11 +889,14 @@ int main(int argc, char *argv[]) >> guest_modes_append_default(); >> - while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) { >> + while ((opt = getopt(argc, argv, "c:q:hi:I:p:m:M:")) != -1) { >> switch (opt) { >> case 'c': >> test_dirty_ring_count = strtol(optarg, NULL, 10); >> break; >> + case 'q': >> + test_dirty_quota_increment = strtol(optarg, NULL, 10); >> + break; >> case 'i': >> p.iterations = strtol(optarg, NULL, 10); >> break; >> diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h >> b/tools/testing/selftests/kvm/include/kvm_util_base.h >> index 4ed6aa049a91..b70732998329 100644 >> --- a/tools/testing/selftests/kvm/include/kvm_util_base.h >> +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h >> @@ -395,4 +395,8 @@ int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t >> vcpuid); >> uint32_t guest_get_vcpuid(void); >> +void vcpu_set_dirty_quota(struct kvm_run *run, uint64_t dirty_quota); >> +void vcpu_handle_dirty_quota_exit(struct kvm_run *run, >> + uint64_t test_dirty_quota_increment); >> + >> #endif /* SELFTEST_KVM_UTIL_BASE_H */ >> diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c >> b/tools/testing/selftests/kvm/lib/kvm_util.c >> index d8cf851ab119..fa77558d745e 100644 >> --- a/tools/testing/selftests/kvm/lib/kvm_util.c >> +++ b/tools/testing/selftests/kvm/lib/kvm_util.c >> @@ -19,6 +19,7 @@ >> #include <linux/kernel.h> >> #define KVM_UTIL_MIN_PFN 2 >> +#define PML_BUFFER_SIZE 512 >> static int vcpu_mmap_sz(void); >> @@ -2286,6 +2287,7 @@ static struct exit_reason { >> {KVM_EXIT_X86_RDMSR, "RDMSR"}, >> {KVM_EXIT_X86_WRMSR, "WRMSR"}, >> {KVM_EXIT_XEN, "XEN"}, >> + {KVM_EXIT_DIRTY_QUOTA_EXHAUSTED, "DIRTY_QUOTA_EXHAUSTED"}, >> #ifdef KVM_EXIT_MEMORY_NOT_PRESENT >> {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, >> #endif >> @@ -2517,3 +2519,37 @@ int vcpu_get_stats_fd(struct kvm_vm *vm, >> uint32_t vcpuid) >> return ioctl(vcpu->fd, KVM_GET_STATS_FD, NULL); >> } >> + >> +void vcpu_set_dirty_quota(struct kvm_run *run, uint64_t dirty_quota) >> +{ >> + run->dirty_quota = dirty_quota; >> + >> + if (dirty_quota) >> + pr_info("Dirty quota throttling enabled with initial quota >> %"PRIu64"\n", >> + dirty_quota); >> + else >> + pr_info("Dirty quota throttling disabled\n"); >> +} >> + >> +void vcpu_handle_dirty_quota_exit(struct kvm_run *run, >> + uint64_t test_dirty_quota_increment) >> +{ >> + uint64_t quota = run->dirty_quota_exit.quota; >> + uint64_t count = run->dirty_quota_exit.count; >> + >> + /* >> + * Due to PML, number of pages dirtied by the vcpu can exceed >> its dirty >> + * quota by PML buffer size. >> + */ >> + TEST_ASSERT(count <= quota + PML_BUFFER_SIZE, "Invalid number of >> pages >> + dirtied: count=%"PRIu64", quota=%"PRIu64"\n", count, quota); Sean, I don't think this would be valid anymore because as you mentioned, the vcpu can dirty multiple pages in one vmexit. I could use your help here. >> + >> + TEST_ASSERT(count >= quota, "Dirty quota exit happened with >> quota yet to >> + be exhausted: count=%"PRIu64", quota=%"PRIu64"\n", count, >> quota); >> + >> + if (count > quota) >> + pr_info("Dirty quota exit with unequal quota and count: >> + count=%"PRIu64", quota=%"PRIu64"\n", count, quota); >> + >> + run->dirty_quota = count + test_dirty_quota_increment; >> +} > I'll be grateful if I could get some reviews on this patch. Will help > me move forward. Thanks.
On Mon, Apr 18, 2022, Shivam Kumar wrote: > > > +void vcpu_handle_dirty_quota_exit(struct kvm_run *run, > > > + uint64_t test_dirty_quota_increment) > > > +{ > > > + uint64_t quota = run->dirty_quota_exit.quota; > > > + uint64_t count = run->dirty_quota_exit.count; > > > + > > > + /* > > > + * Due to PML, number of pages dirtied by the vcpu can exceed its dirty > > > + * quota by PML buffer size. > > > + */ > > > + TEST_ASSERT(count <= quota + PML_BUFFER_SIZE, "Invalid number of pages > > > + dirtied: count=%"PRIu64", quota=%"PRIu64"\n", count, quota); > Sean, I don't think this would be valid anymore because as you mentioned, the > vcpu can dirty multiple pages in one vmexit. I could use your help here. TL;DR: Should be fine, but s390 likely needs an exception. Practically speaking the 512 entry fuzziness is all but guaranteed to prevent false failures. But, unconditionally allowing for overflow of 512 entries also means the test is unlikely to ever detect violations. So to provide meaningful coverage, this needs to allow overflow if and only if PML is enabled. And that brings us back to false failures due to _legitimate_ scenarios where a vCPU can dirty multiple pages. Emphasis on legitimate, because except for an s390 edge case, I don't think this test's guest code does anything that would dirty multiple pages in a single instruction, e.g. there's no emulation, shouldn't be any descriptor table side effects, etc... So unless I'm missing something, KVM should be able to precisely handle the core run loop. s390 does appear to have a caveat: /* * On s390x, all pages of a 1M segment are initially marked as dirty * when a page of the segment is written to for the very first time. * To compensate this specialty in this test, we need to touch all * pages during the first iteration. */ for (i = 0; i < guest_num_pages; i++) { addr = guest_test_virt_mem + i * guest_page_size; *(uint64_t *)addr = READ_ONCE(iteration); } IIUC, subsequent iterations will be ok, but the first iteration needs to allow for overflow of 256 (AFAIK the test only uses 4kb pages on s390).
On 18/04/22 9:47 pm, Sean Christopherson wrote: > On Mon, Apr 18, 2022, Shivam Kumar wrote: >>>> +void vcpu_handle_dirty_quota_exit(struct kvm_run *run, >>>> + uint64_t test_dirty_quota_increment) >>>> +{ >>>> + uint64_t quota = run->dirty_quota_exit.quota; >>>> + uint64_t count = run->dirty_quota_exit.count; >>>> + >>>> + /* >>>> + * Due to PML, number of pages dirtied by the vcpu can exceed its dirty >>>> + * quota by PML buffer size. >>>> + */ >>>> + TEST_ASSERT(count <= quota + PML_BUFFER_SIZE, "Invalid number of pages >>>> + dirtied: count=%"PRIu64", quota=%"PRIu64"\n", count, quota); >> Sean, I don't think this would be valid anymore because as you mentioned, the >> vcpu can dirty multiple pages in one vmexit. I could use your help here. > TL;DR: Should be fine, but s390 likely needs an exception. > > Practically speaking the 512 entry fuzziness is all but guaranteed to prevent > false failures. > > But, unconditionally allowing for overflow of 512 entries also means the test is > unlikely to ever detect violations. So to provide meaningful coverage, this needs > to allow overflow if and only if PML is enabled. > > And that brings us back to false failures due to _legitimate_ scenarios where a vCPU > can dirty multiple pages. Emphasis on legitimate, because except for an s390 edge > case, I don't think this test's guest code does anything that would dirty multiple > pages in a single instruction, e.g. there's no emulation, shouldn't be any descriptor > table side effects, etc... So unless I'm missing something, KVM should be able to > precisely handle the core run loop. > > s390 does appear to have a caveat: > > /* > * On s390x, all pages of a 1M segment are initially marked as dirty > * when a page of the segment is written to for the very first time. > * To compensate this specialty in this test, we need to touch all > * pages during the first iteration. > */ > for (i = 0; i < guest_num_pages; i++) { > addr = guest_test_virt_mem + i * guest_page_size; > *(uint64_t *)addr = READ_ONCE(iteration); > } > > IIUC, subsequent iterations will be ok, but the first iteration needs to allow > for overflow of 256 (AFAIK the test only uses 4kb pages on s390). Hi Sean, need an advice from your side before sending v4. In my opinion, I should organise my patchset in a way that the first n-1 patches have changes for x86 and the last patch has the changes for s390 and arm64. This can help me move forward for the x86 arch and get help and reviews from s390 and arm64 maintainers in parallel. Please let me know if this makes sense.
On Thu, Apr 28, 2022, Shivam Kumar wrote: > > On 18/04/22 9:47 pm, Sean Christopherson wrote: > > On Mon, Apr 18, 2022, Shivam Kumar wrote: > > > > > +void vcpu_handle_dirty_quota_exit(struct kvm_run *run, > > > > > + uint64_t test_dirty_quota_increment) > > > > > +{ > > > > > + uint64_t quota = run->dirty_quota_exit.quota; > > > > > + uint64_t count = run->dirty_quota_exit.count; > > > > > + > > > > > + /* > > > > > + * Due to PML, number of pages dirtied by the vcpu can exceed its dirty > > > > > + * quota by PML buffer size. > > > > > + */ > > > > > + TEST_ASSERT(count <= quota + PML_BUFFER_SIZE, "Invalid number of pages > > > > > + dirtied: count=%"PRIu64", quota=%"PRIu64"\n", count, quota); > > > Sean, I don't think this would be valid anymore because as you mentioned, the > > > vcpu can dirty multiple pages in one vmexit. I could use your help here. > > TL;DR: Should be fine, but s390 likely needs an exception. > > > > Practically speaking the 512 entry fuzziness is all but guaranteed to prevent > > false failures. > > > > But, unconditionally allowing for overflow of 512 entries also means the test is > > unlikely to ever detect violations. So to provide meaningful coverage, this needs > > to allow overflow if and only if PML is enabled. > > > > And that brings us back to false failures due to _legitimate_ scenarios where a vCPU > > can dirty multiple pages. Emphasis on legitimate, because except for an s390 edge > > case, I don't think this test's guest code does anything that would dirty multiple > > pages in a single instruction, e.g. there's no emulation, shouldn't be any descriptor > > table side effects, etc... So unless I'm missing something, KVM should be able to > > precisely handle the core run loop. > > > > s390 does appear to have a caveat: > > > > /* > > * On s390x, all pages of a 1M segment are initially marked as dirty > > * when a page of the segment is written to for the very first time. > > * To compensate this specialty in this test, we need to touch all > > * pages during the first iteration. > > */ > > for (i = 0; i < guest_num_pages; i++) { > > addr = guest_test_virt_mem + i * guest_page_size; > > *(uint64_t *)addr = READ_ONCE(iteration); > > } > > > > IIUC, subsequent iterations will be ok, but the first iteration needs to allow > > for overflow of 256 (AFAIK the test only uses 4kb pages on s390). > Hi Sean, need an advice from your side before sending v4. In my opinion, I > should organise my patchset in a way that the first n-1 patches have changes > for x86 and the last patch has the changes for s390 and arm64. This can help > me move forward for the x86 arch and get help and reviews from s390 and > arm64 maintainers in parallel. Please let me know if this makes sense. Works for me. It probably makes sense to split s390 and arm64 too, that way you don't need a v5 if one wants the feature and the other does not.
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 3fcd89e195c7..e75d826e21fb 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -65,6 +65,8 @@ #define SIG_IPI SIGUSR1 +#define TEST_DIRTY_QUOTA_INCREMENT 8 + /* * Guest/Host shared variables. Ensure addr_gva2hva() and/or * sync_global_to/from_guest() are used when accessing from @@ -191,6 +193,7 @@ static enum log_mode_t host_log_mode_option = LOG_MODE_ALL; static enum log_mode_t host_log_mode; static pthread_t vcpu_thread; static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT; +static uint64_t test_dirty_quota_increment = TEST_DIRTY_QUOTA_INCREMENT; static void vcpu_kick(void) { @@ -210,6 +213,13 @@ static void sem_wait_until(sem_t *sem) while (ret == -1 && errno == EINTR); } +static void set_dirty_quota(struct kvm_vm *vm, uint64_t dirty_quota) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + + vcpu_set_dirty_quota(run, dirty_quota); +} + static bool clear_log_supported(void) { return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); @@ -260,9 +270,13 @@ static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err) TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR), "vcpu run failed: errno=%d", err); - TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, - "Invalid guest sync status: exit_reason=%s\n", - exit_reason_str(run->exit_reason)); + if (test_dirty_quota_increment && + run->exit_reason == KVM_EXIT_DIRTY_QUOTA_EXHAUSTED) + vcpu_handle_dirty_quota_exit(run, test_dirty_quota_increment); + else + TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, + "Invalid guest sync status: exit_reason=%s\n", + exit_reason_str(run->exit_reason)); vcpu_handle_sync_stop(); } @@ -377,6 +391,9 @@ static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err) if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) { /* We should allow this to continue */ ; + } else if (test_dirty_quota_increment && + run->exit_reason == KVM_EXIT_DIRTY_QUOTA_EXHAUSTED) { + vcpu_handle_dirty_quota_exit(run, test_dirty_quota_increment); } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL || (ret == -1 && err == EINTR)) { /* Update the flag first before pause */ @@ -773,6 +790,10 @@ static void run_test(enum vm_guest_mode mode, void *arg) sync_global_to_guest(vm, guest_test_virt_mem); sync_global_to_guest(vm, guest_num_pages); + /* Initialise dirty quota */ + if (test_dirty_quota_increment) + set_dirty_quota(vm, test_dirty_quota_increment); + /* Start the iterations */ iteration = 1; sync_global_to_guest(vm, iteration); @@ -814,6 +835,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Tell the vcpu thread to quit */ host_quit = true; log_mode_before_vcpu_join(); + /* Terminate dirty quota throttling */ + if (test_dirty_quota_increment) + set_dirty_quota(vm, 0); pthread_join(vcpu_thread, NULL); pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), " @@ -835,6 +859,8 @@ static void help(char *name) printf(" -c: specify dirty ring size, in number of entries\n"); printf(" (only useful for dirty-ring test; default: %"PRIu32")\n", TEST_DIRTY_RING_COUNT); + printf(" -q: specify incemental dirty quota (default: %"PRIu32")\n", + TEST_DIRTY_QUOTA_INCREMENT); printf(" -i: specify iteration counts (default: %"PRIu64")\n", TEST_HOST_LOOP_N); printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n", @@ -863,11 +889,14 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) { + while ((opt = getopt(argc, argv, "c:q:hi:I:p:m:M:")) != -1) { switch (opt) { case 'c': test_dirty_ring_count = strtol(optarg, NULL, 10); break; + case 'q': + test_dirty_quota_increment = strtol(optarg, NULL, 10); + break; case 'i': p.iterations = strtol(optarg, NULL, 10); break; diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 4ed6aa049a91..b70732998329 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -395,4 +395,8 @@ int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid); uint32_t guest_get_vcpuid(void); +void vcpu_set_dirty_quota(struct kvm_run *run, uint64_t dirty_quota); +void vcpu_handle_dirty_quota_exit(struct kvm_run *run, + uint64_t test_dirty_quota_increment); + #endif /* SELFTEST_KVM_UTIL_BASE_H */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index d8cf851ab119..fa77558d745e 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -19,6 +19,7 @@ #include <linux/kernel.h> #define KVM_UTIL_MIN_PFN 2 +#define PML_BUFFER_SIZE 512 static int vcpu_mmap_sz(void); @@ -2286,6 +2287,7 @@ static struct exit_reason { {KVM_EXIT_X86_RDMSR, "RDMSR"}, {KVM_EXIT_X86_WRMSR, "WRMSR"}, {KVM_EXIT_XEN, "XEN"}, + {KVM_EXIT_DIRTY_QUOTA_EXHAUSTED, "DIRTY_QUOTA_EXHAUSTED"}, #ifdef KVM_EXIT_MEMORY_NOT_PRESENT {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, #endif @@ -2517,3 +2519,37 @@ int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid) return ioctl(vcpu->fd, KVM_GET_STATS_FD, NULL); } + +void vcpu_set_dirty_quota(struct kvm_run *run, uint64_t dirty_quota) +{ + run->dirty_quota = dirty_quota; + + if (dirty_quota) + pr_info("Dirty quota throttling enabled with initial quota %"PRIu64"\n", + dirty_quota); + else + pr_info("Dirty quota throttling disabled\n"); +} + +void vcpu_handle_dirty_quota_exit(struct kvm_run *run, + uint64_t test_dirty_quota_increment) +{ + uint64_t quota = run->dirty_quota_exit.quota; + uint64_t count = run->dirty_quota_exit.count; + + /* + * Due to PML, number of pages dirtied by the vcpu can exceed its dirty + * quota by PML buffer size. + */ + TEST_ASSERT(count <= quota + PML_BUFFER_SIZE, "Invalid number of pages + dirtied: count=%"PRIu64", quota=%"PRIu64"\n", count, quota); + + TEST_ASSERT(count >= quota, "Dirty quota exit happened with quota yet to + be exhausted: count=%"PRIu64", quota=%"PRIu64"\n", count, quota); + + if (count > quota) + pr_info("Dirty quota exit with unequal quota and count: + count=%"PRIu64", quota=%"PRIu64"\n", count, quota); + + run->dirty_quota = count + test_dirty_quota_increment; +}