diff mbox series

[kvm-unit-tests,v2] s390x/smp: add minimal test for sigp sense running status

Message ID 20200402110250.63677-1-borntraeger@de.ibm.com (mailing list archive)
State New, archived
Headers show
Series [kvm-unit-tests,v2] s390x/smp: add minimal test for sigp sense running status | expand

Commit Message

Christian Borntraeger April 2, 2020, 11:02 a.m. UTC
make sure that sigp sense running status returns a sane value for
stopped CPUs. To avoid potential races with the stop being processed we
wait until sense running status is first 0.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 lib/s390x/smp.c |  2 +-
 lib/s390x/smp.h |  2 +-
 s390x/smp.c     | 13 +++++++++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

Comments

Janosch Frank April 2, 2020, 12:18 p.m. UTC | #1
On 4/2/20 1:02 PM, Christian Borntraeger wrote:
> make sure that sigp sense running status returns a sane value for

s/m/M/

> stopped CPUs. To avoid potential races with the stop being processed we
> wait until sense running status is first 0.

ENOPARSE "...is first 0?"

> 
> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
> ---
>  lib/s390x/smp.c |  2 +-
>  lib/s390x/smp.h |  2 +-
>  s390x/smp.c     | 13 +++++++++++++
>  3 files changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c
> index 5ed8b7b..492cb05 100644
> --- a/lib/s390x/smp.c
> +++ b/lib/s390x/smp.c
> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr)
>  	return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
>  }
>  
> -bool smp_cpu_running(uint16_t addr)
> +bool smp_sense_running_status(uint16_t addr)
>  {
>  	if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED)
>  		return true;
> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h
> index a8b98c0..639ec92 100644
> --- a/lib/s390x/smp.h
> +++ b/lib/s390x/smp.h
> @@ -40,7 +40,7 @@ struct cpu_status {
>  int smp_query_num_cpus(void);
>  struct cpu *smp_cpu_from_addr(uint16_t addr);
>  bool smp_cpu_stopped(uint16_t addr);
> -bool smp_cpu_running(uint16_t addr);
> +bool smp_sense_running_status(uint16_t addr);

That's completely unrelated to the test

>  int smp_cpu_restart(uint16_t addr);
>  int smp_cpu_start(uint16_t addr, struct psw psw);
>  int smp_cpu_stop(uint16_t addr);
> diff --git a/s390x/smp.c b/s390x/smp.c
> index 79cdc1f..b4b1ff2 100644
> --- a/s390x/smp.c
> +++ b/s390x/smp.c
> @@ -210,6 +210,18 @@ static void test_emcall(void)
>  	report_prefix_pop();
>  }
>  
> +static void test_sense_running(void)
> +{
> +	report_prefix_push("sense_running");
> +	/* make sure CPU is stopped */
> +	smp_cpu_stop(1);
> +	/* wait for stop to succeed. */
> +	while(smp_sense_running_status(1));
> +	report(!smp_sense_running_status(1), "CPU1 sense claims not running");

That's basically true anyway after the loop, no?

> +	report_prefix_pop();
> +}
> +
> +
>  /* Used to dirty registers of cpu #1 before it is reset */
>  static void test_func_initial(void)
>  {
> @@ -319,6 +331,7 @@ int main(void)
>  	test_store_status();
>  	test_ecall();
>  	test_emcall();
> +	test_sense_running();
>  	test_reset();
>  	test_reset_initial();
>  	smp_cpu_destroy(1);
>
Christian Borntraeger April 2, 2020, 12:29 p.m. UTC | #2
On 02.04.20 14:18, Janosch Frank wrote:
> On 4/2/20 1:02 PM, Christian Borntraeger wrote:
>> make sure that sigp sense running status returns a sane value for
> 
> s/m/M/
> 
>> stopped CPUs. To avoid potential races with the stop being processed we
>> wait until sense running status is first 0.
> 
> ENOPARSE "...is first 0?"

Yes,  what about "....smp_sense_running_status returns false." ?

> 
>>
>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
>> ---
>>  lib/s390x/smp.c |  2 +-
>>  lib/s390x/smp.h |  2 +-
>>  s390x/smp.c     | 13 +++++++++++++
>>  3 files changed, 15 insertions(+), 2 deletions(-)
>>
>> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c
>> index 5ed8b7b..492cb05 100644
>> --- a/lib/s390x/smp.c
>> +++ b/lib/s390x/smp.c
>> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr)
>>  	return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
>>  }
>>  
>> -bool smp_cpu_running(uint16_t addr)
>> +bool smp_sense_running_status(uint16_t addr)
>>  {
>>  	if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED)
>>  		return true;
>> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h
>> index a8b98c0..639ec92 100644
>> --- a/lib/s390x/smp.h
>> +++ b/lib/s390x/smp.h
>> @@ -40,7 +40,7 @@ struct cpu_status {
>>  int smp_query_num_cpus(void);
>>  struct cpu *smp_cpu_from_addr(uint16_t addr);
>>  bool smp_cpu_stopped(uint16_t addr);
>> -bool smp_cpu_running(uint16_t addr);
>> +bool smp_sense_running_status(uint16_t addr);
> 
> That's completely unrelated to the test

Right but this name seems to better reflect what the function does. Because this is not
the oppositite of cpu_stopped.
> 
>>  int smp_cpu_restart(uint16_t addr);
>>  int smp_cpu_start(uint16_t addr, struct psw psw);
>>  int smp_cpu_stop(uint16_t addr);
>> diff --git a/s390x/smp.c b/s390x/smp.c
>> index 79cdc1f..b4b1ff2 100644
>> --- a/s390x/smp.c
>> +++ b/s390x/smp.c
>> @@ -210,6 +210,18 @@ static void test_emcall(void)
>>  	report_prefix_pop();
>>  }
>>  
>> +static void test_sense_running(void)
>> +{
>> +	report_prefix_push("sense_running");
>> +	/* make sure CPU is stopped */
>> +	smp_cpu_stop(1);
>> +	/* wait for stop to succeed. */
>> +	while(smp_sense_running_status(1));
>> +	report(!smp_sense_running_status(1), "CPU1 sense claims not running");
> 
> That's basically true anyway after the loop, no?

Yes, but  you get no "positive" message in the more verbose output variants
without a report statement.

> 
>> +	report_prefix_pop();
>> +}
>> +
>> +
>>  /* Used to dirty registers of cpu #1 before it is reset */
>>  static void test_func_initial(void)
>>  {
>> @@ -319,6 +331,7 @@ int main(void)
>>  	test_store_status();
>>  	test_ecall();
>>  	test_emcall();
>> +	test_sense_running();
>>  	test_reset();
>>  	test_reset_initial();
>>  	smp_cpu_destroy(1);
>>
> 
>
Janosch Frank April 2, 2020, 1:41 p.m. UTC | #3
On 4/2/20 2:29 PM, Christian Borntraeger wrote:
> 
> 
> On 02.04.20 14:18, Janosch Frank wrote:
>> On 4/2/20 1:02 PM, Christian Borntraeger wrote:
>>> make sure that sigp sense running status returns a sane value for
>>
>> s/m/M/
>>
>>> stopped CPUs. To avoid potential races with the stop being processed we
>>> wait until sense running status is first 0.
>>
>> ENOPARSE "...is first 0?"
> 
> Yes,  what about "....smp_sense_running_status returns false." ?

sure, or "returns 0"
"is first 0" just doesn't parse :)

> 
>>
>>>
>>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
>>> ---
>>>  lib/s390x/smp.c |  2 +-
>>>  lib/s390x/smp.h |  2 +-
>>>  s390x/smp.c     | 13 +++++++++++++
>>>  3 files changed, 15 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c
>>> index 5ed8b7b..492cb05 100644
>>> --- a/lib/s390x/smp.c
>>> +++ b/lib/s390x/smp.c
>>> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr)
>>>  	return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
>>>  }
>>>  
>>> -bool smp_cpu_running(uint16_t addr)
>>> +bool smp_sense_running_status(uint16_t addr)
>>>  {
>>>  	if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED)
>>>  		return true;
>>> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h
>>> index a8b98c0..639ec92 100644
>>> --- a/lib/s390x/smp.h
>>> +++ b/lib/s390x/smp.h
>>> @@ -40,7 +40,7 @@ struct cpu_status {
>>>  int smp_query_num_cpus(void);
>>>  struct cpu *smp_cpu_from_addr(uint16_t addr);
>>>  bool smp_cpu_stopped(uint16_t addr);
>>> -bool smp_cpu_running(uint16_t addr);
>>> +bool smp_sense_running_status(uint16_t addr);
>>
>> That's completely unrelated to the test
> 
> Right but this name seems to better reflect what the function does. Because this is not
> the oppositite of cpu_stopped.

I'm pondering if we want to split that out.

>>
>>>  int smp_cpu_restart(uint16_t addr);
>>>  int smp_cpu_start(uint16_t addr, struct psw psw);
>>>  int smp_cpu_stop(uint16_t addr);
>>> diff --git a/s390x/smp.c b/s390x/smp.c
>>> index 79cdc1f..b4b1ff2 100644
>>> --- a/s390x/smp.c
>>> +++ b/s390x/smp.c
>>> @@ -210,6 +210,18 @@ static void test_emcall(void)
>>>  	report_prefix_pop();
>>>  }
>>>  
>>> +static void test_sense_running(void)
>>> +{
>>> +	report_prefix_push("sense_running");
>>> +	/* make sure CPU is stopped */
>>> +	smp_cpu_stop(1);
>>> +	/* wait for stop to succeed. */
>>> +	while(smp_sense_running_status(1));
>>> +	report(!smp_sense_running_status(1), "CPU1 sense claims not running");
>>
>> That's basically true anyway after the loop, no?
> 
> Yes, but  you get no "positive" message in the more verbose output variants
> without a report statement.

report(true, "CPU1 sense claims not running");
That's also possible, but I leave that up to you.

> 
>>
>>> +	report_prefix_pop();
>>> +}
>>> +
>>> +
>>>  /* Used to dirty registers of cpu #1 before it is reset */
>>>  static void test_func_initial(void)
>>>  {
>>> @@ -319,6 +331,7 @@ int main(void)
>>>  	test_store_status();
>>>  	test_ecall();
>>>  	test_emcall();
>>> +	test_sense_running();
>>>  	test_reset();
>>>  	test_reset_initial();
>>>  	smp_cpu_destroy(1);
>>>
>>
>>
Christian Borntraeger April 2, 2020, 2:47 p.m. UTC | #4
On 02.04.20 15:41, Janosch Frank wrote:
> On 4/2/20 2:29 PM, Christian Borntraeger wrote:
>>
>>
>> On 02.04.20 14:18, Janosch Frank wrote:
>>> On 4/2/20 1:02 PM, Christian Borntraeger wrote:
>>>> make sure that sigp sense running status returns a sane value for
>>>
>>> s/m/M/
>>>
>>>> stopped CPUs. To avoid potential races with the stop being processed we
>>>> wait until sense running status is first 0.
>>>
>>> ENOPARSE "...is first 0?"
>>
>> Yes,  what about "....smp_sense_running_status returns false." ?
> 
> sure, or "returns 0"
> "is first 0" just doesn't parse :)
> 
>>
>>>
>>>>
>>>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
>>>> ---
>>>>  lib/s390x/smp.c |  2 +-
>>>>  lib/s390x/smp.h |  2 +-
>>>>  s390x/smp.c     | 13 +++++++++++++
>>>>  3 files changed, 15 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c
>>>> index 5ed8b7b..492cb05 100644
>>>> --- a/lib/s390x/smp.c
>>>> +++ b/lib/s390x/smp.c
>>>> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr)
>>>>  	return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
>>>>  }
>>>>  
>>>> -bool smp_cpu_running(uint16_t addr)
>>>> +bool smp_sense_running_status(uint16_t addr)
>>>>  {
>>>>  	if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED)
>>>>  		return true;
>>>> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h
>>>> index a8b98c0..639ec92 100644
>>>> --- a/lib/s390x/smp.h
>>>> +++ b/lib/s390x/smp.h
>>>> @@ -40,7 +40,7 @@ struct cpu_status {
>>>>  int smp_query_num_cpus(void);
>>>>  struct cpu *smp_cpu_from_addr(uint16_t addr);
>>>>  bool smp_cpu_stopped(uint16_t addr);
>>>> -bool smp_cpu_running(uint16_t addr);
>>>> +bool smp_sense_running_status(uint16_t addr);
>>>
>>> That's completely unrelated to the test
>>
>> Right but this name seems to better reflect what the function does. Because this is not
>> the oppositite of cpu_stopped.
> 
> I'm pondering if we want to split that out.

A single patch for just 2 lines? I dont know.
> 
>>>
>>>>  int smp_cpu_restart(uint16_t addr);
>>>>  int smp_cpu_start(uint16_t addr, struct psw psw);
>>>>  int smp_cpu_stop(uint16_t addr);
>>>> diff --git a/s390x/smp.c b/s390x/smp.c
>>>> index 79cdc1f..b4b1ff2 100644
>>>> --- a/s390x/smp.c
>>>> +++ b/s390x/smp.c
>>>> @@ -210,6 +210,18 @@ static void test_emcall(void)
>>>>  	report_prefix_pop();
>>>>  }
>>>>  
>>>> +static void test_sense_running(void)
>>>> +{
>>>> +	report_prefix_push("sense_running");
>>>> +	/* make sure CPU is stopped */
>>>> +	smp_cpu_stop(1);
>>>> +	/* wait for stop to succeed. */
>>>> +	while(smp_sense_running_status(1));
>>>> +	report(!smp_sense_running_status(1), "CPU1 sense claims not running");
>>>
>>> That's basically true anyway after the loop, no?
>>
>> Yes, but  you get no "positive" message in the more verbose output variants
>> without a report statement.
> 
> report(true, "CPU1 sense claims not running");
> That's also possible, but I leave that up to you.

I do not care, both variants are fine. Whatever you or David prefer.
Cornelia Huck April 2, 2020, 3:03 p.m. UTC | #5
On Thu, 2 Apr 2020 16:47:44 +0200
Christian Borntraeger <borntraeger@de.ibm.com> wrote:

> On 02.04.20 15:41, Janosch Frank wrote:
> > On 4/2/20 2:29 PM, Christian Borntraeger wrote:  
> >>
> >>
> >> On 02.04.20 14:18, Janosch Frank wrote:  
> >>> On 4/2/20 1:02 PM, Christian Borntraeger wrote:  
> >>>> make sure that sigp sense running status returns a sane value for  
> >>>
> >>> s/m/M/
> >>>  
> >>>> stopped CPUs. To avoid potential races with the stop being processed we
> >>>> wait until sense running status is first 0.  
> >>>
> >>> ENOPARSE "...is first 0?"  
> >>
> >> Yes,  what about "....smp_sense_running_status returns false." ?  
> > 
> > sure, or "returns 0"
> > "is first 0" just doesn't parse :)
> >   
> >>  
> >>>  
> >>>>
> >>>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
> >>>> ---
> >>>>  lib/s390x/smp.c |  2 +-
> >>>>  lib/s390x/smp.h |  2 +-
> >>>>  s390x/smp.c     | 13 +++++++++++++
> >>>>  3 files changed, 15 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c
> >>>> index 5ed8b7b..492cb05 100644
> >>>> --- a/lib/s390x/smp.c
> >>>> +++ b/lib/s390x/smp.c
> >>>> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr)
> >>>>  	return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
> >>>>  }
> >>>>  
> >>>> -bool smp_cpu_running(uint16_t addr)
> >>>> +bool smp_sense_running_status(uint16_t addr)
> >>>>  {
> >>>>  	if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED)
> >>>>  		return true;
> >>>> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h
> >>>> index a8b98c0..639ec92 100644
> >>>> --- a/lib/s390x/smp.h
> >>>> +++ b/lib/s390x/smp.h
> >>>> @@ -40,7 +40,7 @@ struct cpu_status {
> >>>>  int smp_query_num_cpus(void);
> >>>>  struct cpu *smp_cpu_from_addr(uint16_t addr);
> >>>>  bool smp_cpu_stopped(uint16_t addr);
> >>>> -bool smp_cpu_running(uint16_t addr);
> >>>> +bool smp_sense_running_status(uint16_t addr);  
> >>>
> >>> That's completely unrelated to the test  
> >>
> >> Right but this name seems to better reflect what the function does. Because this is not
> >> the oppositite of cpu_stopped.  
> > 
> > I'm pondering if we want to split that out.  
> 
> A single patch for just 2 lines? I dont know.

I vote for keeping it in the patch and simply mentioning it in the
commit message.

> >   
> >>>  
> >>>>  int smp_cpu_restart(uint16_t addr);
> >>>>  int smp_cpu_start(uint16_t addr, struct psw psw);
> >>>>  int smp_cpu_stop(uint16_t addr);
> >>>> diff --git a/s390x/smp.c b/s390x/smp.c
> >>>> index 79cdc1f..b4b1ff2 100644
> >>>> --- a/s390x/smp.c
> >>>> +++ b/s390x/smp.c
> >>>> @@ -210,6 +210,18 @@ static void test_emcall(void)
> >>>>  	report_prefix_pop();
> >>>>  }
> >>>>  
> >>>> +static void test_sense_running(void)
> >>>> +{
> >>>> +	report_prefix_push("sense_running");
> >>>> +	/* make sure CPU is stopped */
> >>>> +	smp_cpu_stop(1);
> >>>> +	/* wait for stop to succeed. */
> >>>> +	while(smp_sense_running_status(1));
> >>>> +	report(!smp_sense_running_status(1), "CPU1 sense claims not running");  
> >>>
> >>> That's basically true anyway after the loop, no?  
> >>
> >> Yes, but  you get no "positive" message in the more verbose output variants
> >> without a report statement.  
> > 
> > report(true, "CPU1 sense claims not running");
> > That's also possible, but I leave that up to you.  
> 
> I do not care, both variants are fine. Whatever you or David prefer. 

I'd keep the 'check' for !smp_sense_running_status(1) and add a comment.
David Hildenbrand April 2, 2020, 3:12 p.m. UTC | #6
On 02.04.20 13:02, Christian Borntraeger wrote:
> make sure that sigp sense running status returns a sane value for
> stopped CPUs. To avoid potential races with the stop being processed we
> wait until sense running status is first 0.
> 
> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
> ---
>  lib/s390x/smp.c |  2 +-
>  lib/s390x/smp.h |  2 +-
>  s390x/smp.c     | 13 +++++++++++++
>  3 files changed, 15 insertions(+), 2 deletions(-)
> 
> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c
> index 5ed8b7b..492cb05 100644
> --- a/lib/s390x/smp.c
> +++ b/lib/s390x/smp.c
> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr)
>  	return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
>  }
>  
> -bool smp_cpu_running(uint16_t addr)
> +bool smp_sense_running_status(uint16_t addr)
>  {
>  	if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED)
>  		return true;
> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h
> index a8b98c0..639ec92 100644
> --- a/lib/s390x/smp.h
> +++ b/lib/s390x/smp.h
> @@ -40,7 +40,7 @@ struct cpu_status {
>  int smp_query_num_cpus(void);
>  struct cpu *smp_cpu_from_addr(uint16_t addr);
>  bool smp_cpu_stopped(uint16_t addr);
> -bool smp_cpu_running(uint16_t addr);
> +bool smp_sense_running_status(uint16_t addr);
>  int smp_cpu_restart(uint16_t addr);
>  int smp_cpu_start(uint16_t addr, struct psw psw);
>  int smp_cpu_stop(uint16_t addr);
> diff --git a/s390x/smp.c b/s390x/smp.c
> index 79cdc1f..b4b1ff2 100644
> --- a/s390x/smp.c
> +++ b/s390x/smp.c
> @@ -210,6 +210,18 @@ static void test_emcall(void)
>  	report_prefix_pop();
>  }
>  
> +static void test_sense_running(void)
> +{
> +	report_prefix_push("sense_running");
> +	/* make sure CPU is stopped */
> +	smp_cpu_stop(1);
> +	/* wait for stop to succeed. */
> +	while(smp_sense_running_status(1));
> +	report(!smp_sense_running_status(1), "CPU1 sense claims not running");
> +	report_prefix_pop();
> +}
> +
> +
>  /* Used to dirty registers of cpu #1 before it is reset */
>  static void test_func_initial(void)
>  {
> @@ -319,6 +331,7 @@ int main(void)
>  	test_store_status();
>  	test_ecall();
>  	test_emcall();
> +	test_sense_running();
>  	test_reset();
>  	test_reset_initial();
>  	smp_cpu_destroy(1);
> 

TBH, I am still not sure if this is completely free of races.

Assume CPU 1 is in handle_stop()

if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
	kvm_s390_vcpu_stop(vcpu);
// CPU 1: gets scheduled out.
// CPU 0: while(smp_sense_running_status(1)); finishes
// CPU 1: gets scheduled in to return to user space
return -EOPNOTSUPP;
// CPU 0: report(!smp_sense_running_status(1), "CPU1 sense claims not
running"); fails

SIGP SENSE RUNNING is simply racy as hell and doesn't give you any
guarantees. Which is good enough for some performance improvements
(e.g., spinlocks).

Now, I can queue this, but I wouldn't be surprised if we see random
failures at one point.
Christian Borntraeger April 2, 2020, 3:20 p.m. UTC | #7
On 02.04.20 17:12, David Hildenbrand wrote:
> On 02.04.20 13:02, Christian Borntraeger wrote:
>> make sure that sigp sense running status returns a sane value for
>> stopped CPUs. To avoid potential races with the stop being processed we
>> wait until sense running status is first 0.
>>
>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
>> ---
>>  lib/s390x/smp.c |  2 +-
>>  lib/s390x/smp.h |  2 +-
>>  s390x/smp.c     | 13 +++++++++++++
>>  3 files changed, 15 insertions(+), 2 deletions(-)
>>
>> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c
>> index 5ed8b7b..492cb05 100644
>> --- a/lib/s390x/smp.c
>> +++ b/lib/s390x/smp.c
>> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr)
>>  	return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
>>  }
>>  
>> -bool smp_cpu_running(uint16_t addr)
>> +bool smp_sense_running_status(uint16_t addr)
>>  {
>>  	if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED)
>>  		return true;
>> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h
>> index a8b98c0..639ec92 100644
>> --- a/lib/s390x/smp.h
>> +++ b/lib/s390x/smp.h
>> @@ -40,7 +40,7 @@ struct cpu_status {
>>  int smp_query_num_cpus(void);
>>  struct cpu *smp_cpu_from_addr(uint16_t addr);
>>  bool smp_cpu_stopped(uint16_t addr);
>> -bool smp_cpu_running(uint16_t addr);
>> +bool smp_sense_running_status(uint16_t addr);
>>  int smp_cpu_restart(uint16_t addr);
>>  int smp_cpu_start(uint16_t addr, struct psw psw);
>>  int smp_cpu_stop(uint16_t addr);
>> diff --git a/s390x/smp.c b/s390x/smp.c
>> index 79cdc1f..b4b1ff2 100644
>> --- a/s390x/smp.c
>> +++ b/s390x/smp.c
>> @@ -210,6 +210,18 @@ static void test_emcall(void)
>>  	report_prefix_pop();
>>  }
>>  
>> +static void test_sense_running(void)
>> +{
>> +	report_prefix_push("sense_running");
>> +	/* make sure CPU is stopped */
>> +	smp_cpu_stop(1);
>> +	/* wait for stop to succeed. */
>> +	while(smp_sense_running_status(1));
>> +	report(!smp_sense_running_status(1), "CPU1 sense claims not running");
>> +	report_prefix_pop();
>> +}
>> +
>> +
>>  /* Used to dirty registers of cpu #1 before it is reset */
>>  static void test_func_initial(void)
>>  {
>> @@ -319,6 +331,7 @@ int main(void)
>>  	test_store_status();
>>  	test_ecall();
>>  	test_emcall();
>> +	test_sense_running();
>>  	test_reset();
>>  	test_reset_initial();
>>  	smp_cpu_destroy(1);
>>
> 
> TBH, I am still not sure if this is completely free of races.
> 
> Assume CPU 1 is in handle_stop()
> 
> if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
> 	kvm_s390_vcpu_stop(vcpu);
> // CPU 1: gets scheduled out.
> // CPU 0: while(smp_sense_running_status(1)); finishes
> // CPU 1: gets scheduled in to return to user space
> return -EOPNOTSUPP;
> // CPU 0: report(!smp_sense_running_status(1), "CPU1 sense claims not
> running"); fails
> 
> SIGP SENSE RUNNING is simply racy as hell and doesn't give you any
> guarantees. Which is good enough for some performance improvements
> (e.g., spinlocks).
> 
> Now, I can queue this, but I wouldn't be surprised if we see random
> failures at one point.

Which would speak for Janoschs variant. Loop until non running at least once 
and then report success?
David Hildenbrand April 2, 2020, 3:25 p.m. UTC | #8
On 02.04.20 17:20, Christian Borntraeger wrote:
> 
> 
> On 02.04.20 17:12, David Hildenbrand wrote:
>> On 02.04.20 13:02, Christian Borntraeger wrote:
>>> make sure that sigp sense running status returns a sane value for
>>> stopped CPUs. To avoid potential races with the stop being processed we
>>> wait until sense running status is first 0.
>>>
>>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
>>> ---
>>>  lib/s390x/smp.c |  2 +-
>>>  lib/s390x/smp.h |  2 +-
>>>  s390x/smp.c     | 13 +++++++++++++
>>>  3 files changed, 15 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c
>>> index 5ed8b7b..492cb05 100644
>>> --- a/lib/s390x/smp.c
>>> +++ b/lib/s390x/smp.c
>>> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr)
>>>  	return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
>>>  }
>>>  
>>> -bool smp_cpu_running(uint16_t addr)
>>> +bool smp_sense_running_status(uint16_t addr)
>>>  {
>>>  	if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED)
>>>  		return true;
>>> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h
>>> index a8b98c0..639ec92 100644
>>> --- a/lib/s390x/smp.h
>>> +++ b/lib/s390x/smp.h
>>> @@ -40,7 +40,7 @@ struct cpu_status {
>>>  int smp_query_num_cpus(void);
>>>  struct cpu *smp_cpu_from_addr(uint16_t addr);
>>>  bool smp_cpu_stopped(uint16_t addr);
>>> -bool smp_cpu_running(uint16_t addr);
>>> +bool smp_sense_running_status(uint16_t addr);
>>>  int smp_cpu_restart(uint16_t addr);
>>>  int smp_cpu_start(uint16_t addr, struct psw psw);
>>>  int smp_cpu_stop(uint16_t addr);
>>> diff --git a/s390x/smp.c b/s390x/smp.c
>>> index 79cdc1f..b4b1ff2 100644
>>> --- a/s390x/smp.c
>>> +++ b/s390x/smp.c
>>> @@ -210,6 +210,18 @@ static void test_emcall(void)
>>>  	report_prefix_pop();
>>>  }
>>>  
>>> +static void test_sense_running(void)
>>> +{
>>> +	report_prefix_push("sense_running");
>>> +	/* make sure CPU is stopped */
>>> +	smp_cpu_stop(1);
>>> +	/* wait for stop to succeed. */
>>> +	while(smp_sense_running_status(1));
>>> +	report(!smp_sense_running_status(1), "CPU1 sense claims not running");
>>> +	report_prefix_pop();
>>> +}
>>> +
>>> +
>>>  /* Used to dirty registers of cpu #1 before it is reset */
>>>  static void test_func_initial(void)
>>>  {
>>> @@ -319,6 +331,7 @@ int main(void)
>>>  	test_store_status();
>>>  	test_ecall();
>>>  	test_emcall();
>>> +	test_sense_running();
>>>  	test_reset();
>>>  	test_reset_initial();
>>>  	smp_cpu_destroy(1);
>>>
>>
>> TBH, I am still not sure if this is completely free of races.
>>
>> Assume CPU 1 is in handle_stop()
>>
>> if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
>> 	kvm_s390_vcpu_stop(vcpu);
>> // CPU 1: gets scheduled out.
>> // CPU 0: while(smp_sense_running_status(1)); finishes
>> // CPU 1: gets scheduled in to return to user space
>> return -EOPNOTSUPP;
>> // CPU 0: report(!smp_sense_running_status(1), "CPU1 sense claims not
>> running"); fails
>>
>> SIGP SENSE RUNNING is simply racy as hell and doesn't give you any
>> guarantees. Which is good enough for some performance improvements
>> (e.g., spinlocks).
>>
>> Now, I can queue this, but I wouldn't be surprised if we see random
>> failures at one point.
> 
> Which would speak for Janoschs variant. Loop until non running at least once 
> and then report success?

As long as the other CPU isn't always scheduled (unlikely) and always in
the kernel (unlikely), this test would even pass without the
smp_cpu_stop(). So the test doesn't say much except "sometimes,
smp_sense_running_status(1) reports false". Agreed that the
smp_cpu_stop() will make that appear faster.

If we agree about these semantics, let's add them as a comment to the test.
Christian Borntraeger April 2, 2020, 3:38 p.m. UTC | #9
On 02.04.20 17:25, David Hildenbrand wrote:
> On 02.04.20 17:20, Christian Borntraeger wrote:
>>
>>
>> On 02.04.20 17:12, David Hildenbrand wrote:
>>> On 02.04.20 13:02, Christian Borntraeger wrote:
>>>> make sure that sigp sense running status returns a sane value for
>>>> stopped CPUs. To avoid potential races with the stop being processed we
>>>> wait until sense running status is first 0.
>>>>
>>>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
>>>> ---
>>>>  lib/s390x/smp.c |  2 +-
>>>>  lib/s390x/smp.h |  2 +-
>>>>  s390x/smp.c     | 13 +++++++++++++
>>>>  3 files changed, 15 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c
>>>> index 5ed8b7b..492cb05 100644
>>>> --- a/lib/s390x/smp.c
>>>> +++ b/lib/s390x/smp.c
>>>> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr)
>>>>  	return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
>>>>  }
>>>>  
>>>> -bool smp_cpu_running(uint16_t addr)
>>>> +bool smp_sense_running_status(uint16_t addr)
>>>>  {
>>>>  	if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED)
>>>>  		return true;
>>>> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h
>>>> index a8b98c0..639ec92 100644
>>>> --- a/lib/s390x/smp.h
>>>> +++ b/lib/s390x/smp.h
>>>> @@ -40,7 +40,7 @@ struct cpu_status {
>>>>  int smp_query_num_cpus(void);
>>>>  struct cpu *smp_cpu_from_addr(uint16_t addr);
>>>>  bool smp_cpu_stopped(uint16_t addr);
>>>> -bool smp_cpu_running(uint16_t addr);
>>>> +bool smp_sense_running_status(uint16_t addr);
>>>>  int smp_cpu_restart(uint16_t addr);
>>>>  int smp_cpu_start(uint16_t addr, struct psw psw);
>>>>  int smp_cpu_stop(uint16_t addr);
>>>> diff --git a/s390x/smp.c b/s390x/smp.c
>>>> index 79cdc1f..b4b1ff2 100644
>>>> --- a/s390x/smp.c
>>>> +++ b/s390x/smp.c
>>>> @@ -210,6 +210,18 @@ static void test_emcall(void)
>>>>  	report_prefix_pop();
>>>>  }
>>>>  
>>>> +static void test_sense_running(void)
>>>> +{
>>>> +	report_prefix_push("sense_running");
>>>> +	/* make sure CPU is stopped */
>>>> +	smp_cpu_stop(1);
>>>> +	/* wait for stop to succeed. */
>>>> +	while(smp_sense_running_status(1));
>>>> +	report(!smp_sense_running_status(1), "CPU1 sense claims not running");
>>>> +	report_prefix_pop();
>>>> +}
>>>> +
>>>> +
>>>>  /* Used to dirty registers of cpu #1 before it is reset */
>>>>  static void test_func_initial(void)
>>>>  {
>>>> @@ -319,6 +331,7 @@ int main(void)
>>>>  	test_store_status();
>>>>  	test_ecall();
>>>>  	test_emcall();
>>>> +	test_sense_running();
>>>>  	test_reset();
>>>>  	test_reset_initial();
>>>>  	smp_cpu_destroy(1);
>>>>
>>>
>>> TBH, I am still not sure if this is completely free of races.
>>>
>>> Assume CPU 1 is in handle_stop()
>>>
>>> if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
>>> 	kvm_s390_vcpu_stop(vcpu);
>>> // CPU 1: gets scheduled out.
>>> // CPU 0: while(smp_sense_running_status(1)); finishes
>>> // CPU 1: gets scheduled in to return to user space
>>> return -EOPNOTSUPP;
>>> // CPU 0: report(!smp_sense_running_status(1), "CPU1 sense claims not
>>> running"); fails
>>>
>>> SIGP SENSE RUNNING is simply racy as hell and doesn't give you any
>>> guarantees. Which is good enough for some performance improvements
>>> (e.g., spinlocks).
>>>
>>> Now, I can queue this, but I wouldn't be surprised if we see random
>>> failures at one point.
>>
>> Which would speak for Janoschs variant. Loop until non running at least once 
>> and then report success?
> 
> As long as the other CPU isn't always scheduled (unlikely) and always in
> the kernel (unlikely), this test would even pass without the
> smp_cpu_stop(). So the test doesn't say much except "sometimes,
> smp_sense_running_status(1) reports false". Agreed that the
> smp_cpu_stop() will make that appear faster.
> 
> If we agree about these semantics, let's add them as a comment to the test.


Something like this: (I also added a test for running = true)

static void test_sense_running(void)
{
        report_prefix_push("sense_running");
        /* we are running */
        report(smp_sense_running_status(0), "CPU0 sense claims running");
        /* make sure CPU is stopped to speed up the not running case */
        smp_cpu_stop(1);
        /* Make sure to have at least one time with a not running indication */
        while(smp_sense_running_status(1));
        report(true, "CPU1 sense claims not running");
        report_prefix_pop();
}
David Hildenbrand April 2, 2020, 3:40 p.m. UTC | #10
On 02.04.20 17:38, Christian Borntraeger wrote:
> 
> 
> On 02.04.20 17:25, David Hildenbrand wrote:
>> On 02.04.20 17:20, Christian Borntraeger wrote:
>>>
>>>
>>> On 02.04.20 17:12, David Hildenbrand wrote:
>>>> On 02.04.20 13:02, Christian Borntraeger wrote:
>>>>> make sure that sigp sense running status returns a sane value for
>>>>> stopped CPUs. To avoid potential races with the stop being processed we
>>>>> wait until sense running status is first 0.
>>>>>
>>>>> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
>>>>> ---
>>>>>  lib/s390x/smp.c |  2 +-
>>>>>  lib/s390x/smp.h |  2 +-
>>>>>  s390x/smp.c     | 13 +++++++++++++
>>>>>  3 files changed, 15 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c
>>>>> index 5ed8b7b..492cb05 100644
>>>>> --- a/lib/s390x/smp.c
>>>>> +++ b/lib/s390x/smp.c
>>>>> @@ -58,7 +58,7 @@ bool smp_cpu_stopped(uint16_t addr)
>>>>>  	return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
>>>>>  }
>>>>>  
>>>>> -bool smp_cpu_running(uint16_t addr)
>>>>> +bool smp_sense_running_status(uint16_t addr)
>>>>>  {
>>>>>  	if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED)
>>>>>  		return true;
>>>>> diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h
>>>>> index a8b98c0..639ec92 100644
>>>>> --- a/lib/s390x/smp.h
>>>>> +++ b/lib/s390x/smp.h
>>>>> @@ -40,7 +40,7 @@ struct cpu_status {
>>>>>  int smp_query_num_cpus(void);
>>>>>  struct cpu *smp_cpu_from_addr(uint16_t addr);
>>>>>  bool smp_cpu_stopped(uint16_t addr);
>>>>> -bool smp_cpu_running(uint16_t addr);
>>>>> +bool smp_sense_running_status(uint16_t addr);
>>>>>  int smp_cpu_restart(uint16_t addr);
>>>>>  int smp_cpu_start(uint16_t addr, struct psw psw);
>>>>>  int smp_cpu_stop(uint16_t addr);
>>>>> diff --git a/s390x/smp.c b/s390x/smp.c
>>>>> index 79cdc1f..b4b1ff2 100644
>>>>> --- a/s390x/smp.c
>>>>> +++ b/s390x/smp.c
>>>>> @@ -210,6 +210,18 @@ static void test_emcall(void)
>>>>>  	report_prefix_pop();
>>>>>  }
>>>>>  
>>>>> +static void test_sense_running(void)
>>>>> +{
>>>>> +	report_prefix_push("sense_running");
>>>>> +	/* make sure CPU is stopped */
>>>>> +	smp_cpu_stop(1);
>>>>> +	/* wait for stop to succeed. */
>>>>> +	while(smp_sense_running_status(1));
>>>>> +	report(!smp_sense_running_status(1), "CPU1 sense claims not running");
>>>>> +	report_prefix_pop();
>>>>> +}
>>>>> +
>>>>> +
>>>>>  /* Used to dirty registers of cpu #1 before it is reset */
>>>>>  static void test_func_initial(void)
>>>>>  {
>>>>> @@ -319,6 +331,7 @@ int main(void)
>>>>>  	test_store_status();
>>>>>  	test_ecall();
>>>>>  	test_emcall();
>>>>> +	test_sense_running();
>>>>>  	test_reset();
>>>>>  	test_reset_initial();
>>>>>  	smp_cpu_destroy(1);
>>>>>
>>>>
>>>> TBH, I am still not sure if this is completely free of races.
>>>>
>>>> Assume CPU 1 is in handle_stop()
>>>>
>>>> if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
>>>> 	kvm_s390_vcpu_stop(vcpu);
>>>> // CPU 1: gets scheduled out.
>>>> // CPU 0: while(smp_sense_running_status(1)); finishes
>>>> // CPU 1: gets scheduled in to return to user space
>>>> return -EOPNOTSUPP;
>>>> // CPU 0: report(!smp_sense_running_status(1), "CPU1 sense claims not
>>>> running"); fails
>>>>
>>>> SIGP SENSE RUNNING is simply racy as hell and doesn't give you any
>>>> guarantees. Which is good enough for some performance improvements
>>>> (e.g., spinlocks).
>>>>
>>>> Now, I can queue this, but I wouldn't be surprised if we see random
>>>> failures at one point.
>>>
>>> Which would speak for Janoschs variant. Loop until non running at least once 
>>> and then report success?
>>
>> As long as the other CPU isn't always scheduled (unlikely) and always in
>> the kernel (unlikely), this test would even pass without the
>> smp_cpu_stop(). So the test doesn't say much except "sometimes,
>> smp_sense_running_status(1) reports false". Agreed that the
>> smp_cpu_stop() will make that appear faster.
>>
>> If we agree about these semantics, let's add them as a comment to the test.
> 
> 
> Something like this: (I also added a test for running = true)
> 
> static void test_sense_running(void)
> {
>         report_prefix_push("sense_running");
>         /* we are running */
>         report(smp_sense_running_status(0), "CPU0 sense claims running");
>         /* make sure CPU is stopped to speed up the not running case */
>         smp_cpu_stop(1);
>         /* Make sure to have at least one time with a not running indication */
>         while(smp_sense_running_status(1));
>         report(true, "CPU1 sense claims not running");
>         report_prefix_pop();
> }

Yeah, looks better IMHO. Thanks

Thanks,

David / dhildenb
diff mbox series

Patch

diff --git a/lib/s390x/smp.c b/lib/s390x/smp.c
index 5ed8b7b..492cb05 100644
--- a/lib/s390x/smp.c
+++ b/lib/s390x/smp.c
@@ -58,7 +58,7 @@  bool smp_cpu_stopped(uint16_t addr)
 	return !!(status & (SIGP_STATUS_CHECK_STOP|SIGP_STATUS_STOPPED));
 }
 
-bool smp_cpu_running(uint16_t addr)
+bool smp_sense_running_status(uint16_t addr)
 {
 	if (sigp(addr, SIGP_SENSE_RUNNING, 0, NULL) != SIGP_CC_STATUS_STORED)
 		return true;
diff --git a/lib/s390x/smp.h b/lib/s390x/smp.h
index a8b98c0..639ec92 100644
--- a/lib/s390x/smp.h
+++ b/lib/s390x/smp.h
@@ -40,7 +40,7 @@  struct cpu_status {
 int smp_query_num_cpus(void);
 struct cpu *smp_cpu_from_addr(uint16_t addr);
 bool smp_cpu_stopped(uint16_t addr);
-bool smp_cpu_running(uint16_t addr);
+bool smp_sense_running_status(uint16_t addr);
 int smp_cpu_restart(uint16_t addr);
 int smp_cpu_start(uint16_t addr, struct psw psw);
 int smp_cpu_stop(uint16_t addr);
diff --git a/s390x/smp.c b/s390x/smp.c
index 79cdc1f..b4b1ff2 100644
--- a/s390x/smp.c
+++ b/s390x/smp.c
@@ -210,6 +210,18 @@  static void test_emcall(void)
 	report_prefix_pop();
 }
 
+static void test_sense_running(void)
+{
+	report_prefix_push("sense_running");
+	/* make sure CPU is stopped */
+	smp_cpu_stop(1);
+	/* wait for stop to succeed. */
+	while(smp_sense_running_status(1));
+	report(!smp_sense_running_status(1), "CPU1 sense claims not running");
+	report_prefix_pop();
+}
+
+
 /* Used to dirty registers of cpu #1 before it is reset */
 static void test_func_initial(void)
 {
@@ -319,6 +331,7 @@  int main(void)
 	test_store_status();
 	test_ecall();
 	test_emcall();
+	test_sense_running();
 	test_reset();
 	test_reset_initial();
 	smp_cpu_destroy(1);