Message ID | 20200402110250.63677-1-borntraeger@de.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [kvm-unit-tests,v2] s390x/smp: add minimal test for sigp sense running status | expand |
On 4/2/20 1:02 PM, Christian Borntraeger wrote: > make sure that sigp sense running status returns a sane value for s/m/M/ > stopped CPUs. To avoid potential races with the stop being processed we > wait until sense running status is first 0. ENOPARSE "...is first 0?" > > Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> > --- > lib/s390x/smp.c | 2 +- > lib/s390x/smp.h | 2 +- > s390x/smp.c | 13 +++++++++++++ > 3 files changed, 15 insertions(+), 2 deletions(-) > > diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c > index 5ed8b7b..492cb05 100644 > --- a/lib/s390x/smp.c > +++ b/lib/s390x/smp.c > @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr) > return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); > } > > -bool smp_cpu_running(uint16_t addr) > +bool smp_sense_running_status(uint16_t addr) > { > if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED) > return true; > diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h > index a8b98c0..639ec92 100644 > --- a/lib/s390x/smp.h > +++ b/lib/s390x/smp.h > @@ -40,7 +40,7 @@ struct cpu_status { > int smp_query_num_cpus(void); > struct cpu *smp_cpu_from_addr(uint16_t addr); > bool smp_cpu_stopped(uint16_t addr); > -bool smp_cpu_running(uint16_t addr); > +bool smp_sense_running_status(uint16_t addr); That's completely unrelated to the test > int smp_cpu_restart(uint16_t addr); > int smp_cpu_start(uint16_t addr, struct psw psw); > int smp_cpu_stop(uint16_t addr); > diff --git a/s390x/smp.c b/s390x/smp.c > index 79cdc1f..b4b1ff2 100644 > --- a/s390x/smp.c > +++ b/s390x/smp.c > @@ -210,6 +210,18 @@ static void test_emcall(void) > report_prefix_pop(); > } > > +static void test_sense_running(void) > +{ > + report_prefix_push("sense_running"); > + /* make sure CPU is stopped */ > + smp_cpu_stop(1); > + /* wait for stop to succeed. */ > + while(smp_sense_running_status(1)); > + report(!smp_sense_running_status(1), "CPU1 sense claims not running"); That's basically true anyway after the loop, no? > + report_prefix_pop(); > +} > + > + > /* Used to dirty registers of cpu #1 before it is reset */ > static void test_func_initial(void) > { > @@ -319,6 +331,7 @@ int main(void) > test_store_status(); > test_ecall(); > test_emcall(); > + test_sense_running(); > test_reset(); > test_reset_initial(); > smp_cpu_destroy(1); >
On 02.04.20 14:18, Janosch Frank wrote: > On 4/2/20 1:02 PM, Christian Borntraeger wrote: >> make sure that sigp sense running status returns a sane value for > > s/m/M/ > >> stopped CPUs. To avoid potential races with the stop being processed we >> wait until sense running status is first 0. > > ENOPARSE "...is first 0?" Yes, what about "....smp_sense_running_status returns false." ? > >> >> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> >> --- >> lib/s390x/smp.c | 2 +- >> lib/s390x/smp.h | 2 +- >> s390x/smp.c | 13 +++++++++++++ >> 3 files changed, 15 insertions(+), 2 deletions(-) >> >> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c >> index 5ed8b7b..492cb05 100644 >> --- a/lib/s390x/smp.c >> +++ b/lib/s390x/smp.c >> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr) >> return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); >> } >> >> -bool smp_cpu_running(uint16_t addr) >> +bool smp_sense_running_status(uint16_t addr) >> { >> if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED) >> return true; >> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h >> index a8b98c0..639ec92 100644 >> --- a/lib/s390x/smp.h >> +++ b/lib/s390x/smp.h >> @@ -40,7 +40,7 @@ struct cpu_status { >> int smp_query_num_cpus(void); >> struct cpu *smp_cpu_from_addr(uint16_t addr); >> bool smp_cpu_stopped(uint16_t addr); >> -bool smp_cpu_running(uint16_t addr); >> +bool smp_sense_running_status(uint16_t addr); > > That's completely unrelated to the test Right but this name seems to better reflect what the function does. Because this is not the oppositite of cpu_stopped. > >> int smp_cpu_restart(uint16_t addr); >> int smp_cpu_start(uint16_t addr, struct psw psw); >> int smp_cpu_stop(uint16_t addr); >> diff --git a/s390x/smp.c b/s390x/smp.c >> index 79cdc1f..b4b1ff2 100644 >> --- a/s390x/smp.c >> +++ b/s390x/smp.c >> @@ -210,6 +210,18 @@ static void test_emcall(void) >> report_prefix_pop(); >> } >> >> +static void test_sense_running(void) >> +{ >> + report_prefix_push("sense_running"); >> + /* make sure CPU is stopped */ >> + smp_cpu_stop(1); >> + /* wait for stop to succeed. */ >> + while(smp_sense_running_status(1)); >> + report(!smp_sense_running_status(1), "CPU1 sense claims not running"); > > That's basically true anyway after the loop, no? Yes, but you get no "positive" message in the more verbose output variants without a report statement. > >> + report_prefix_pop(); >> +} >> + >> + >> /* Used to dirty registers of cpu #1 before it is reset */ >> static void test_func_initial(void) >> { >> @@ -319,6 +331,7 @@ int main(void) >> test_store_status(); >> test_ecall(); >> test_emcall(); >> + test_sense_running(); >> test_reset(); >> test_reset_initial(); >> smp_cpu_destroy(1); >> > >
On 4/2/20 2:29 PM, Christian Borntraeger wrote: > > > On 02.04.20 14:18, Janosch Frank wrote: >> On 4/2/20 1:02 PM, Christian Borntraeger wrote: >>> make sure that sigp sense running status returns a sane value for >> >> s/m/M/ >> >>> stopped CPUs. To avoid potential races with the stop being processed we >>> wait until sense running status is first 0. >> >> ENOPARSE "...is first 0?" > > Yes, what about "....smp_sense_running_status returns false." ? sure, or "returns 0" "is first 0" just doesn't parse :) > >> >>> >>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> >>> --- >>> lib/s390x/smp.c | 2 +- >>> lib/s390x/smp.h | 2 +- >>> s390x/smp.c | 13 +++++++++++++ >>> 3 files changed, 15 insertions(+), 2 deletions(-) >>> >>> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c >>> index 5ed8b7b..492cb05 100644 >>> --- a/lib/s390x/smp.c >>> +++ b/lib/s390x/smp.c >>> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr) >>> return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); >>> } >>> >>> -bool smp_cpu_running(uint16_t addr) >>> +bool smp_sense_running_status(uint16_t addr) >>> { >>> if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED) >>> return true; >>> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h >>> index a8b98c0..639ec92 100644 >>> --- a/lib/s390x/smp.h >>> +++ b/lib/s390x/smp.h >>> @@ -40,7 +40,7 @@ struct cpu_status { >>> int smp_query_num_cpus(void); >>> struct cpu *smp_cpu_from_addr(uint16_t addr); >>> bool smp_cpu_stopped(uint16_t addr); >>> -bool smp_cpu_running(uint16_t addr); >>> +bool smp_sense_running_status(uint16_t addr); >> >> That's completely unrelated to the test > > Right but this name seems to better reflect what the function does. Because this is not > the oppositite of cpu_stopped. I'm pondering if we want to split that out. >> >>> int smp_cpu_restart(uint16_t addr); >>> int smp_cpu_start(uint16_t addr, struct psw psw); >>> int smp_cpu_stop(uint16_t addr); >>> diff --git a/s390x/smp.c b/s390x/smp.c >>> index 79cdc1f..b4b1ff2 100644 >>> --- a/s390x/smp.c >>> +++ b/s390x/smp.c >>> @@ -210,6 +210,18 @@ static void test_emcall(void) >>> report_prefix_pop(); >>> } >>> >>> +static void test_sense_running(void) >>> +{ >>> + report_prefix_push("sense_running"); >>> + /* make sure CPU is stopped */ >>> + smp_cpu_stop(1); >>> + /* wait for stop to succeed. */ >>> + while(smp_sense_running_status(1)); >>> + report(!smp_sense_running_status(1), "CPU1 sense claims not running"); >> >> That's basically true anyway after the loop, no? > > Yes, but you get no "positive" message in the more verbose output variants > without a report statement. report(true, "CPU1 sense claims not running"); That's also possible, but I leave that up to you. > >> >>> + report_prefix_pop(); >>> +} >>> + >>> + >>> /* Used to dirty registers of cpu #1 before it is reset */ >>> static void test_func_initial(void) >>> { >>> @@ -319,6 +331,7 @@ int main(void) >>> test_store_status(); >>> test_ecall(); >>> test_emcall(); >>> + test_sense_running(); >>> test_reset(); >>> test_reset_initial(); >>> smp_cpu_destroy(1); >>> >> >>
On 02.04.20 15:41, Janosch Frank wrote: > On 4/2/20 2:29 PM, Christian Borntraeger wrote: >> >> >> On 02.04.20 14:18, Janosch Frank wrote: >>> On 4/2/20 1:02 PM, Christian Borntraeger wrote: >>>> make sure that sigp sense running status returns a sane value for >>> >>> s/m/M/ >>> >>>> stopped CPUs. To avoid potential races with the stop being processed we >>>> wait until sense running status is first 0. >>> >>> ENOPARSE "...is first 0?" >> >> Yes, what about "....smp_sense_running_status returns false." ? > > sure, or "returns 0" > "is first 0" just doesn't parse :) > >> >>> >>>> >>>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> >>>> --- >>>> lib/s390x/smp.c | 2 +- >>>> lib/s390x/smp.h | 2 +- >>>> s390x/smp.c | 13 +++++++++++++ >>>> 3 files changed, 15 insertions(+), 2 deletions(-) >>>> >>>> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c >>>> index 5ed8b7b..492cb05 100644 >>>> --- a/lib/s390x/smp.c >>>> +++ b/lib/s390x/smp.c >>>> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr) >>>> return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); >>>> } >>>> >>>> -bool smp_cpu_running(uint16_t addr) >>>> +bool smp_sense_running_status(uint16_t addr) >>>> { >>>> if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED) >>>> return true; >>>> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h >>>> index a8b98c0..639ec92 100644 >>>> --- a/lib/s390x/smp.h >>>> +++ b/lib/s390x/smp.h >>>> @@ -40,7 +40,7 @@ struct cpu_status { >>>> int smp_query_num_cpus(void); >>>> struct cpu *smp_cpu_from_addr(uint16_t addr); >>>> bool smp_cpu_stopped(uint16_t addr); >>>> -bool smp_cpu_running(uint16_t addr); >>>> +bool smp_sense_running_status(uint16_t addr); >>> >>> That's completely unrelated to the test >> >> Right but this name seems to better reflect what the function does. Because this is not >> the oppositite of cpu_stopped. > > I'm pondering if we want to split that out. A single patch for just 2 lines? I dont know. > >>> >>>> int smp_cpu_restart(uint16_t addr); >>>> int smp_cpu_start(uint16_t addr, struct psw psw); >>>> int smp_cpu_stop(uint16_t addr); >>>> diff --git a/s390x/smp.c b/s390x/smp.c >>>> index 79cdc1f..b4b1ff2 100644 >>>> --- a/s390x/smp.c >>>> +++ b/s390x/smp.c >>>> @@ -210,6 +210,18 @@ static void test_emcall(void) >>>> report_prefix_pop(); >>>> } >>>> >>>> +static void test_sense_running(void) >>>> +{ >>>> + report_prefix_push("sense_running"); >>>> + /* make sure CPU is stopped */ >>>> + smp_cpu_stop(1); >>>> + /* wait for stop to succeed. */ >>>> + while(smp_sense_running_status(1)); >>>> + report(!smp_sense_running_status(1), "CPU1 sense claims not running"); >>> >>> That's basically true anyway after the loop, no? >> >> Yes, but you get no "positive" message in the more verbose output variants >> without a report statement. > > report(true, "CPU1 sense claims not running"); > That's also possible, but I leave that up to you. I do not care, both variants are fine. Whatever you or David prefer.
On Thu, 2 Apr 2020 16:47:44 +0200 Christian Borntraeger <borntraeger@de.ibm.com> wrote: > On 02.04.20 15:41, Janosch Frank wrote: > > On 4/2/20 2:29 PM, Christian Borntraeger wrote: > >> > >> > >> On 02.04.20 14:18, Janosch Frank wrote: > >>> On 4/2/20 1:02 PM, Christian Borntraeger wrote: > >>>> make sure that sigp sense running status returns a sane value for > >>> > >>> s/m/M/ > >>> > >>>> stopped CPUs. To avoid potential races with the stop being processed we > >>>> wait until sense running status is first 0. > >>> > >>> ENOPARSE "...is first 0?" > >> > >> Yes, what about "....smp_sense_running_status returns false." ? > > > > sure, or "returns 0" > > "is first 0" just doesn't parse :) > > > >> > >>> > >>>> > >>>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> > >>>> --- > >>>> lib/s390x/smp.c | 2 +- > >>>> lib/s390x/smp.h | 2 +- > >>>> s390x/smp.c | 13 +++++++++++++ > >>>> 3 files changed, 15 insertions(+), 2 deletions(-) > >>>> > >>>> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c > >>>> index 5ed8b7b..492cb05 100644 > >>>> --- a/lib/s390x/smp.c > >>>> +++ b/lib/s390x/smp.c > >>>> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr) > >>>> return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); > >>>> } > >>>> > >>>> -bool smp_cpu_running(uint16_t addr) > >>>> +bool smp_sense_running_status(uint16_t addr) > >>>> { > >>>> if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED) > >>>> return true; > >>>> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h > >>>> index a8b98c0..639ec92 100644 > >>>> --- a/lib/s390x/smp.h > >>>> +++ b/lib/s390x/smp.h > >>>> @@ -40,7 +40,7 @@ struct cpu_status { > >>>> int smp_query_num_cpus(void); > >>>> struct cpu *smp_cpu_from_addr(uint16_t addr); > >>>> bool smp_cpu_stopped(uint16_t addr); > >>>> -bool smp_cpu_running(uint16_t addr); > >>>> +bool smp_sense_running_status(uint16_t addr); > >>> > >>> That's completely unrelated to the test > >> > >> Right but this name seems to better reflect what the function does. Because this is not > >> the oppositite of cpu_stopped. > > > > I'm pondering if we want to split that out. > > A single patch for just 2 lines? I dont know. I vote for keeping it in the patch and simply mentioning it in the commit message. > > > >>> > >>>> int smp_cpu_restart(uint16_t addr); > >>>> int smp_cpu_start(uint16_t addr, struct psw psw); > >>>> int smp_cpu_stop(uint16_t addr); > >>>> diff --git a/s390x/smp.c b/s390x/smp.c > >>>> index 79cdc1f..b4b1ff2 100644 > >>>> --- a/s390x/smp.c > >>>> +++ b/s390x/smp.c > >>>> @@ -210,6 +210,18 @@ static void test_emcall(void) > >>>> report_prefix_pop(); > >>>> } > >>>> > >>>> +static void test_sense_running(void) > >>>> +{ > >>>> + report_prefix_push("sense_running"); > >>>> + /* make sure CPU is stopped */ > >>>> + smp_cpu_stop(1); > >>>> + /* wait for stop to succeed. */ > >>>> + while(smp_sense_running_status(1)); > >>>> + report(!smp_sense_running_status(1), "CPU1 sense claims not running"); > >>> > >>> That's basically true anyway after the loop, no? > >> > >> Yes, but you get no "positive" message in the more verbose output variants > >> without a report statement. > > > > report(true, "CPU1 sense claims not running"); > > That's also possible, but I leave that up to you. > > I do not care, both variants are fine. Whatever you or David prefer. I'd keep the 'check' for !smp_sense_running_status(1) and add a comment.
On 02.04.20 13:02, Christian Borntraeger wrote: > make sure that sigp sense running status returns a sane value for > stopped CPUs. To avoid potential races with the stop being processed we > wait until sense running status is first 0. > > Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> > --- > lib/s390x/smp.c | 2 +- > lib/s390x/smp.h | 2 +- > s390x/smp.c | 13 +++++++++++++ > 3 files changed, 15 insertions(+), 2 deletions(-) > > diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c > index 5ed8b7b..492cb05 100644 > --- a/lib/s390x/smp.c > +++ b/lib/s390x/smp.c > @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr) > return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); > } > > -bool smp_cpu_running(uint16_t addr) > +bool smp_sense_running_status(uint16_t addr) > { > if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED) > return true; > diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h > index a8b98c0..639ec92 100644 > --- a/lib/s390x/smp.h > +++ b/lib/s390x/smp.h > @@ -40,7 +40,7 @@ struct cpu_status { > int smp_query_num_cpus(void); > struct cpu *smp_cpu_from_addr(uint16_t addr); > bool smp_cpu_stopped(uint16_t addr); > -bool smp_cpu_running(uint16_t addr); > +bool smp_sense_running_status(uint16_t addr); > int smp_cpu_restart(uint16_t addr); > int smp_cpu_start(uint16_t addr, struct psw psw); > int smp_cpu_stop(uint16_t addr); > diff --git a/s390x/smp.c b/s390x/smp.c > index 79cdc1f..b4b1ff2 100644 > --- a/s390x/smp.c > +++ b/s390x/smp.c > @@ -210,6 +210,18 @@ static void test_emcall(void) > report_prefix_pop(); > } > > +static void test_sense_running(void) > +{ > + report_prefix_push("sense_running"); > + /* make sure CPU is stopped */ > + smp_cpu_stop(1); > + /* wait for stop to succeed. */ > + while(smp_sense_running_status(1)); > + report(!smp_sense_running_status(1), "CPU1 sense claims not running"); > + report_prefix_pop(); > +} > + > + > /* Used to dirty registers of cpu #1 before it is reset */ > static void test_func_initial(void) > { > @@ -319,6 +331,7 @@ int main(void) > test_store_status(); > test_ecall(); > test_emcall(); > + test_sense_running(); > test_reset(); > test_reset_initial(); > smp_cpu_destroy(1); > TBH, I am still not sure if this is completely free of races. Assume CPU 1 is in handle_stop() if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) kvm_s390_vcpu_stop(vcpu); // CPU 1: gets scheduled out. // CPU 0: while(smp_sense_running_status(1)); finishes // CPU 1: gets scheduled in to return to user space return -EOPNOTSUPP; // CPU 0: report(!smp_sense_running_status(1), "CPU1 sense claims not running"); fails SIGP SENSE RUNNING is simply racy as hell and doesn't give you any guarantees. Which is good enough for some performance improvements (e.g., spinlocks). Now, I can queue this, but I wouldn't be surprised if we see random failures at one point.
On 02.04.20 17:12, David Hildenbrand wrote: > On 02.04.20 13:02, Christian Borntraeger wrote: >> make sure that sigp sense running status returns a sane value for >> stopped CPUs. To avoid potential races with the stop being processed we >> wait until sense running status is first 0. >> >> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> >> --- >> lib/s390x/smp.c | 2 +- >> lib/s390x/smp.h | 2 +- >> s390x/smp.c | 13 +++++++++++++ >> 3 files changed, 15 insertions(+), 2 deletions(-) >> >> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c >> index 5ed8b7b..492cb05 100644 >> --- a/lib/s390x/smp.c >> +++ b/lib/s390x/smp.c >> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr) >> return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); >> } >> >> -bool smp_cpu_running(uint16_t addr) >> +bool smp_sense_running_status(uint16_t addr) >> { >> if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED) >> return true; >> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h >> index a8b98c0..639ec92 100644 >> --- a/lib/s390x/smp.h >> +++ b/lib/s390x/smp.h >> @@ -40,7 +40,7 @@ struct cpu_status { >> int smp_query_num_cpus(void); >> struct cpu *smp_cpu_from_addr(uint16_t addr); >> bool smp_cpu_stopped(uint16_t addr); >> -bool smp_cpu_running(uint16_t addr); >> +bool smp_sense_running_status(uint16_t addr); >> int smp_cpu_restart(uint16_t addr); >> int smp_cpu_start(uint16_t addr, struct psw psw); >> int smp_cpu_stop(uint16_t addr); >> diff --git a/s390x/smp.c b/s390x/smp.c >> index 79cdc1f..b4b1ff2 100644 >> --- a/s390x/smp.c >> +++ b/s390x/smp.c >> @@ -210,6 +210,18 @@ static void test_emcall(void) >> report_prefix_pop(); >> } >> >> +static void test_sense_running(void) >> +{ >> + report_prefix_push("sense_running"); >> + /* make sure CPU is stopped */ >> + smp_cpu_stop(1); >> + /* wait for stop to succeed. */ >> + while(smp_sense_running_status(1)); >> + report(!smp_sense_running_status(1), "CPU1 sense claims not running"); >> + report_prefix_pop(); >> +} >> + >> + >> /* Used to dirty registers of cpu #1 before it is reset */ >> static void test_func_initial(void) >> { >> @@ -319,6 +331,7 @@ int main(void) >> test_store_status(); >> test_ecall(); >> test_emcall(); >> + test_sense_running(); >> test_reset(); >> test_reset_initial(); >> smp_cpu_destroy(1); >> > > TBH, I am still not sure if this is completely free of races. > > Assume CPU 1 is in handle_stop() > > if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) > kvm_s390_vcpu_stop(vcpu); > // CPU 1: gets scheduled out. > // CPU 0: while(smp_sense_running_status(1)); finishes > // CPU 1: gets scheduled in to return to user space > return -EOPNOTSUPP; > // CPU 0: report(!smp_sense_running_status(1), "CPU1 sense claims not > running"); fails > > SIGP SENSE RUNNING is simply racy as hell and doesn't give you any > guarantees. Which is good enough for some performance improvements > (e.g., spinlocks). > > Now, I can queue this, but I wouldn't be surprised if we see random > failures at one point. Which would speak for Janoschs variant. Loop until non running at least once and then report success?
On 02.04.20 17:20, Christian Borntraeger wrote: > > > On 02.04.20 17:12, David Hildenbrand wrote: >> On 02.04.20 13:02, Christian Borntraeger wrote: >>> make sure that sigp sense running status returns a sane value for >>> stopped CPUs. To avoid potential races with the stop being processed we >>> wait until sense running status is first 0. >>> >>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> >>> --- >>> lib/s390x/smp.c | 2 +- >>> lib/s390x/smp.h | 2 +- >>> s390x/smp.c | 13 +++++++++++++ >>> 3 files changed, 15 insertions(+), 2 deletions(-) >>> >>> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c >>> index 5ed8b7b..492cb05 100644 >>> --- a/lib/s390x/smp.c >>> +++ b/lib/s390x/smp.c >>> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr) >>> return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); >>> } >>> >>> -bool smp_cpu_running(uint16_t addr) >>> +bool smp_sense_running_status(uint16_t addr) >>> { >>> if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED) >>> return true; >>> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h >>> index a8b98c0..639ec92 100644 >>> --- a/lib/s390x/smp.h >>> +++ b/lib/s390x/smp.h >>> @@ -40,7 +40,7 @@ struct cpu_status { >>> int smp_query_num_cpus(void); >>> struct cpu *smp_cpu_from_addr(uint16_t addr); >>> bool smp_cpu_stopped(uint16_t addr); >>> -bool smp_cpu_running(uint16_t addr); >>> +bool smp_sense_running_status(uint16_t addr); >>> int smp_cpu_restart(uint16_t addr); >>> int smp_cpu_start(uint16_t addr, struct psw psw); >>> int smp_cpu_stop(uint16_t addr); >>> diff --git a/s390x/smp.c b/s390x/smp.c >>> index 79cdc1f..b4b1ff2 100644 >>> --- a/s390x/smp.c >>> +++ b/s390x/smp.c >>> @@ -210,6 +210,18 @@ static void test_emcall(void) >>> report_prefix_pop(); >>> } >>> >>> +static void test_sense_running(void) >>> +{ >>> + report_prefix_push("sense_running"); >>> + /* make sure CPU is stopped */ >>> + smp_cpu_stop(1); >>> + /* wait for stop to succeed. */ >>> + while(smp_sense_running_status(1)); >>> + report(!smp_sense_running_status(1), "CPU1 sense claims not running"); >>> + report_prefix_pop(); >>> +} >>> + >>> + >>> /* Used to dirty registers of cpu #1 before it is reset */ >>> static void test_func_initial(void) >>> { >>> @@ -319,6 +331,7 @@ int main(void) >>> test_store_status(); >>> test_ecall(); >>> test_emcall(); >>> + test_sense_running(); >>> test_reset(); >>> test_reset_initial(); >>> smp_cpu_destroy(1); >>> >> >> TBH, I am still not sure if this is completely free of races. >> >> Assume CPU 1 is in handle_stop() >> >> if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) >> kvm_s390_vcpu_stop(vcpu); >> // CPU 1: gets scheduled out. >> // CPU 0: while(smp_sense_running_status(1)); finishes >> // CPU 1: gets scheduled in to return to user space >> return -EOPNOTSUPP; >> // CPU 0: report(!smp_sense_running_status(1), "CPU1 sense claims not >> running"); fails >> >> SIGP SENSE RUNNING is simply racy as hell and doesn't give you any >> guarantees. Which is good enough for some performance improvements >> (e.g., spinlocks). >> >> Now, I can queue this, but I wouldn't be surprised if we see random >> failures at one point. > > Which would speak for Janoschs variant. Loop until non running at least once > and then report success? As long as the other CPU isn't always scheduled (unlikely) and always in the kernel (unlikely), this test would even pass without the smp_cpu_stop(). So the test doesn't say much except "sometimes, smp_sense_running_status(1) reports false". Agreed that the smp_cpu_stop() will make that appear faster. If we agree about these semantics, let's add them as a comment to the test.
On 02.04.20 17:25, David Hildenbrand wrote: > On 02.04.20 17:20, Christian Borntraeger wrote: >> >> >> On 02.04.20 17:12, David Hildenbrand wrote: >>> On 02.04.20 13:02, Christian Borntraeger wrote: >>>> make sure that sigp sense running status returns a sane value for >>>> stopped CPUs. To avoid potential races with the stop being processed we >>>> wait until sense running status is first 0. >>>> >>>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> >>>> --- >>>> lib/s390x/smp.c | 2 +- >>>> lib/s390x/smp.h | 2 +- >>>> s390x/smp.c | 13 +++++++++++++ >>>> 3 files changed, 15 insertions(+), 2 deletions(-) >>>> >>>> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c >>>> index 5ed8b7b..492cb05 100644 >>>> --- a/lib/s390x/smp.c >>>> +++ b/lib/s390x/smp.c >>>> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr) >>>> return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); >>>> } >>>> >>>> -bool smp_cpu_running(uint16_t addr) >>>> +bool smp_sense_running_status(uint16_t addr) >>>> { >>>> if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED) >>>> return true; >>>> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h >>>> index a8b98c0..639ec92 100644 >>>> --- a/lib/s390x/smp.h >>>> +++ b/lib/s390x/smp.h >>>> @@ -40,7 +40,7 @@ struct cpu_status { >>>> int smp_query_num_cpus(void); >>>> struct cpu *smp_cpu_from_addr(uint16_t addr); >>>> bool smp_cpu_stopped(uint16_t addr); >>>> -bool smp_cpu_running(uint16_t addr); >>>> +bool smp_sense_running_status(uint16_t addr); >>>> int smp_cpu_restart(uint16_t addr); >>>> int smp_cpu_start(uint16_t addr, struct psw psw); >>>> int smp_cpu_stop(uint16_t addr); >>>> diff --git a/s390x/smp.c b/s390x/smp.c >>>> index 79cdc1f..b4b1ff2 100644 >>>> --- a/s390x/smp.c >>>> +++ b/s390x/smp.c >>>> @@ -210,6 +210,18 @@ static void test_emcall(void) >>>> report_prefix_pop(); >>>> } >>>> >>>> +static void test_sense_running(void) >>>> +{ >>>> + report_prefix_push("sense_running"); >>>> + /* make sure CPU is stopped */ >>>> + smp_cpu_stop(1); >>>> + /* wait for stop to succeed. */ >>>> + while(smp_sense_running_status(1)); >>>> + report(!smp_sense_running_status(1), "CPU1 sense claims not running"); >>>> + report_prefix_pop(); >>>> +} >>>> + >>>> + >>>> /* Used to dirty registers of cpu #1 before it is reset */ >>>> static void test_func_initial(void) >>>> { >>>> @@ -319,6 +331,7 @@ int main(void) >>>> test_store_status(); >>>> test_ecall(); >>>> test_emcall(); >>>> + test_sense_running(); >>>> test_reset(); >>>> test_reset_initial(); >>>> smp_cpu_destroy(1); >>>> >>> >>> TBH, I am still not sure if this is completely free of races. >>> >>> Assume CPU 1 is in handle_stop() >>> >>> if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) >>> kvm_s390_vcpu_stop(vcpu); >>> // CPU 1: gets scheduled out. >>> // CPU 0: while(smp_sense_running_status(1)); finishes >>> // CPU 1: gets scheduled in to return to user space >>> return -EOPNOTSUPP; >>> // CPU 0: report(!smp_sense_running_status(1), "CPU1 sense claims not >>> running"); fails >>> >>> SIGP SENSE RUNNING is simply racy as hell and doesn't give you any >>> guarantees. Which is good enough for some performance improvements >>> (e.g., spinlocks). >>> >>> Now, I can queue this, but I wouldn't be surprised if we see random >>> failures at one point. >> >> Which would speak for Janoschs variant. Loop until non running at least once >> and then report success? > > As long as the other CPU isn't always scheduled (unlikely) and always in > the kernel (unlikely), this test would even pass without the > smp_cpu_stop(). So the test doesn't say much except "sometimes, > smp_sense_running_status(1) reports false". Agreed that the > smp_cpu_stop() will make that appear faster. > > If we agree about these semantics, let's add them as a comment to the test. Something like this: (I also added a test for running = true) static void test_sense_running(void) { report_prefix_push("sense_running"); /* we are running */ report(smp_sense_running_status(0), "CPU0 sense claims running"); /* make sure CPU is stopped to speed up the not running case */ smp_cpu_stop(1); /* Make sure to have at least one time with a not running indication */ while(smp_sense_running_status(1)); report(true, "CPU1 sense claims not running"); report_prefix_pop(); }
On 02.04.20 17:38, Christian Borntraeger wrote: > > > On 02.04.20 17:25, David Hildenbrand wrote: >> On 02.04.20 17:20, Christian Borntraeger wrote: >>> >>> >>> On 02.04.20 17:12, David Hildenbrand wrote: >>>> On 02.04.20 13:02, Christian Borntraeger wrote: >>>>> make sure that sigp sense running status returns a sane value for >>>>> stopped CPUs. To avoid potential races with the stop being processed we >>>>> wait until sense running status is first 0. >>>>> >>>>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> >>>>> --- >>>>> lib/s390x/smp.c | 2 +- >>>>> lib/s390x/smp.h | 2 +- >>>>> s390x/smp.c | 13 +++++++++++++ >>>>> 3 files changed, 15 insertions(+), 2 deletions(-) >>>>> >>>>> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c >>>>> index 5ed8b7b..492cb05 100644 >>>>> --- a/lib/s390x/smp.c >>>>> +++ b/lib/s390x/smp.c >>>>> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr) >>>>> return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); >>>>> } >>>>> >>>>> -bool smp_cpu_running(uint16_t addr) >>>>> +bool smp_sense_running_status(uint16_t addr) >>>>> { >>>>> if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED) >>>>> return true; >>>>> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h >>>>> index a8b98c0..639ec92 100644 >>>>> --- a/lib/s390x/smp.h >>>>> +++ b/lib/s390x/smp.h >>>>> @@ -40,7 +40,7 @@ struct cpu_status { >>>>> int smp_query_num_cpus(void); >>>>> struct cpu *smp_cpu_from_addr(uint16_t addr); >>>>> bool smp_cpu_stopped(uint16_t addr); >>>>> -bool smp_cpu_running(uint16_t addr); >>>>> +bool smp_sense_running_status(uint16_t addr); >>>>> int smp_cpu_restart(uint16_t addr); >>>>> int smp_cpu_start(uint16_t addr, struct psw psw); >>>>> int smp_cpu_stop(uint16_t addr); >>>>> diff --git a/s390x/smp.c b/s390x/smp.c >>>>> index 79cdc1f..b4b1ff2 100644 >>>>> --- a/s390x/smp.c >>>>> +++ b/s390x/smp.c >>>>> @@ -210,6 +210,18 @@ static void test_emcall(void) >>>>> report_prefix_pop(); >>>>> } >>>>> >>>>> +static void test_sense_running(void) >>>>> +{ >>>>> + report_prefix_push("sense_running"); >>>>> + /* make sure CPU is stopped */ >>>>> + smp_cpu_stop(1); >>>>> + /* wait for stop to succeed. */ >>>>> + while(smp_sense_running_status(1)); >>>>> + report(!smp_sense_running_status(1), "CPU1 sense claims not running"); >>>>> + report_prefix_pop(); >>>>> +} >>>>> + >>>>> + >>>>> /* Used to dirty registers of cpu #1 before it is reset */ >>>>> static void test_func_initial(void) >>>>> { >>>>> @@ -319,6 +331,7 @@ int main(void) >>>>> test_store_status(); >>>>> test_ecall(); >>>>> test_emcall(); >>>>> + test_sense_running(); >>>>> test_reset(); >>>>> test_reset_initial(); >>>>> smp_cpu_destroy(1); >>>>> >>>> >>>> TBH, I am still not sure if this is completely free of races. >>>> >>>> Assume CPU 1 is in handle_stop() >>>> >>>> if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) >>>> kvm_s390_vcpu_stop(vcpu); >>>> // CPU 1: gets scheduled out. >>>> // CPU 0: while(smp_sense_running_status(1)); finishes >>>> // CPU 1: gets scheduled in to return to user space >>>> return -EOPNOTSUPP; >>>> // CPU 0: report(!smp_sense_running_status(1), "CPU1 sense claims not >>>> running"); fails >>>> >>>> SIGP SENSE RUNNING is simply racy as hell and doesn't give you any >>>> guarantees. Which is good enough for some performance improvements >>>> (e.g., spinlocks). >>>> >>>> Now, I can queue this, but I wouldn't be surprised if we see random >>>> failures at one point. >>> >>> Which would speak for Janoschs variant. Loop until non running at least once >>> and then report success? >> >> As long as the other CPU isn't always scheduled (unlikely) and always in >> the kernel (unlikely), this test would even pass without the >> smp_cpu_stop(). So the test doesn't say much except "sometimes, >> smp_sense_running_status(1) reports false". Agreed that the >> smp_cpu_stop() will make that appear faster. >> >> If we agree about these semantics, let's add them as a comment to the test. > > > Something like this: (I also added a test for running = true) > > static void test_sense_running(void) > { > report_prefix_push("sense_running"); > /* we are running */ > report(smp_sense_running_status(0), "CPU0 sense claims running"); > /* make sure CPU is stopped to speed up the not running case */ > smp_cpu_stop(1); > /* Make sure to have at least one time with a not running indication */ > while(smp_sense_running_status(1)); > report(true, "CPU1 sense claims not running"); > report_prefix_pop(); > } Yeah, looks better IMHO. Thanks Thanks, David / dhildenb
diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c index 5ed8b7b..492cb05 100644 --- a/lib/s390x/smp.c +++ b/lib/s390x/smp.c @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr) return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED)); } -bool smp_cpu_running(uint16_t addr) +bool smp_sense_running_status(uint16_t addr) { if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED) return true; diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h index a8b98c0..639ec92 100644 --- a/lib/s390x/smp.h +++ b/lib/s390x/smp.h @@ -40,7 +40,7 @@ struct cpu_status { int smp_query_num_cpus(void); struct cpu *smp_cpu_from_addr(uint16_t addr); bool smp_cpu_stopped(uint16_t addr); -bool smp_cpu_running(uint16_t addr); +bool smp_sense_running_status(uint16_t addr); int smp_cpu_restart(uint16_t addr); int smp_cpu_start(uint16_t addr, struct psw psw); int smp_cpu_stop(uint16_t addr); diff --git a/s390x/smp.c b/s390x/smp.c index 79cdc1f..b4b1ff2 100644 --- a/s390x/smp.c +++ b/s390x/smp.c @@ -210,6 +210,18 @@ static void test_emcall(void) report_prefix_pop(); } +static void test_sense_running(void) +{ + report_prefix_push("sense_running"); + /* make sure CPU is stopped */ + smp_cpu_stop(1); + /* wait for stop to succeed. */ + while(smp_sense_running_status(1)); + report(!smp_sense_running_status(1), "CPU1 sense claims not running"); + report_prefix_pop(); +} + + /* Used to dirty registers of cpu #1 before it is reset */ static void test_func_initial(void) { @@ -319,6 +331,7 @@ int main(void) test_store_status(); test_ecall(); test_emcall(); + test_sense_running(); test_reset(); test_reset_initial(); smp_cpu_destroy(1);
make sure that sigp sense running status returns a sane value for stopped CPUs. To avoid potential races with the stop being processed we wait until sense running status is first 0. Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> --- lib/s390x/smp.c | 2 +- lib/s390x/smp.h | 2 +- s390x/smp.c | 13 +++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-)