diff mbox series

[kvm-unit-tests,v3,2/2] s390x: Test specification exceptions during transaction

Message ID 20211022120156.281567-3-scgl@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series Add specification exception tests | expand

Commit Message

Janis Schoetterl-Glausch Oct. 22, 2021, 12:01 p.m. UTC
Program interruptions during transactional execution cause other
interruption codes.
Check that we see the expected code for (some) specification exceptions.

Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
---
 lib/s390x/asm/arch_def.h |   1 +
 s390x/spec_ex.c          | 172 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 168 insertions(+), 5 deletions(-)

Comments

Claudio Imbrenda Oct. 25, 2021, 5:30 p.m. UTC | #1
On Fri, 22 Oct 2021 14:01:56 +0200
Janis Schoetterl-Glausch <scgl@linux.ibm.com> wrote:

> Program interruptions during transactional execution cause other
> interruption codes.
> Check that we see the expected code for (some) specification exceptions.
> 
> Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
> ---
>  lib/s390x/asm/arch_def.h |   1 +
>  s390x/spec_ex.c          | 172 +++++++++++++++++++++++++++++++++++++--
>  2 files changed, 168 insertions(+), 5 deletions(-)
> 
> diff --git a/lib/s390x/asm/arch_def.h b/lib/s390x/asm/arch_def.h
> index 40626d7..f7fb467 100644
> --- a/lib/s390x/asm/arch_def.h
> +++ b/lib/s390x/asm/arch_def.h
> @@ -55,6 +55,7 @@ struct psw {
>  #define PSW_MASK_BA			0x0000000080000000UL
>  #define PSW_MASK_64			(PSW_MASK_BA | PSW_MASK_EA)
>  
> +#define CTL0_TRANSACT_EX_CTL		(63 -  8)
>  #define CTL0_LOW_ADDR_PROT		(63 - 35)
>  #define CTL0_EDAT			(63 - 40)
>  #define CTL0_IEP			(63 - 43)
> diff --git a/s390x/spec_ex.c b/s390x/spec_ex.c
> index ec3322a..f3628bd 100644
> --- a/s390x/spec_ex.c
> +++ b/s390x/spec_ex.c
> @@ -4,9 +4,14 @@
>   *
>   * Specification exception test.
>   * Tests that specification exceptions occur when expected.
> + * This includes specification exceptions occurring during transactional execution
> + * as these result in another interruption code (the transactional-execution-aborted
> + * bit is set).
>   */
>  #include <stdlib.h>
> +#include <htmintrin.h>
>  #include <libcflat.h>
> +#include <asm/barrier.h>
>  #include <asm/interrupt.h>
>  #include <asm/facility.h>
>  
> @@ -92,18 +97,23 @@ static void not_even(void)
>  struct spec_ex_trigger {
>  	const char *name;
>  	void (*func)(void);
> +	bool transactable;
>  	void (*fixup)(void);
>  };
>  
>  static const struct spec_ex_trigger spec_ex_triggers[] = {
> -	{ "psw_bit_12_is_1", &psw_bit_12_is_1, &fixup_invalid_psw},
> -	{ "bad_alignment", &bad_alignment, NULL},
> -	{ "not_even", &not_even, NULL},
> -	{ NULL, NULL, NULL},
> +	{ "psw_bit_12_is_1", &psw_bit_12_is_1, false, &fixup_invalid_psw},
> +	{ "bad_alignment", &bad_alignment, true, NULL},
> +	{ "not_even", &not_even, true, NULL},
> +	{ NULL, NULL, true, NULL},
>  };
>  
>  struct args {
>  	uint64_t iterations;
> +	uint64_t max_retries;
> +	uint64_t suppress_info;
> +	uint64_t max_failures;
> +	bool diagnose;
>  };
>  
>  static void test_spec_ex(struct args *args,
> @@ -131,14 +141,132 @@ static void test_spec_ex(struct args *args,
>  		    expected_pgm);
>  }
>  
> +#define TRANSACTION_COMPLETED 4
> +#define TRANSACTION_MAX_RETRIES 5
> +
> +/* NULL must be passed to __builtin_tbegin via constant, forbid diagnose from
> + * being NULL to keep things simple
> + */
> +static int __attribute__((nonnull))
> +with_transaction(void (*trigger)(void), struct __htm_tdb *diagnose)
> +{
> +	int cc;
> +

if you want to be extra sure, put an assert here (although I'm not sure
how nonnull works, I have never seen it before)

> +	cc = __builtin_tbegin(diagnose);
> +	if (cc == _HTM_TBEGIN_STARTED) {
> +		trigger();
> +		__builtin_tend();
> +		return -TRANSACTION_COMPLETED;
> +	} else {
> +		return -cc;
> +	}
> +}
> +
> +static int retry_transaction(const struct spec_ex_trigger *trigger, unsigned int max_retries,
> +			     struct __htm_tdb *tdb, uint16_t expected_pgm)
> +{
> +	int trans_result, i;
> +	uint16_t pgm;
> +
> +	for (i = 0; i < max_retries; i++) {
> +		expect_pgm_int();
> +		trans_result = with_transaction(trigger->func, tdb);
> +		if (trans_result == -_HTM_TBEGIN_TRANSIENT) {
> +			mb();
> +			pgm = lc->pgm_int_code;
> +			if (pgm == 0)
> +				continue;
> +			else if (pgm == expected_pgm)
> +				return 0;
> +		}
> +		return trans_result;
> +	}
> +	return -TRANSACTION_MAX_RETRIES;

so this means that a test will be considered failed if the transaction
failed too many times?

this means that could fail if the test is run on busy system, even if
the host running the unit test is correct

also, do you really need to use negative values? it's probably easier
to read if you stick to positive values, and less prone to mistakes if
you accidentally forget a - somewhere.

> +}
> +
> +static void test_spec_ex_trans(struct args *args, const struct spec_ex_trigger *trigger)
> +{
> +	const uint16_t expected_pgm = PGM_INT_CODE_SPECIFICATION
> +			      | PGM_INT_CODE_TX_ABORTED_EVENT;
> +	union {
> +		struct __htm_tdb tdb;
> +		uint64_t dwords[sizeof(struct __htm_tdb) / sizeof(uint64_t)];
> +	} diag;
> +	unsigned int i, failures = 0;
> +	int trans_result;
> +
> +	if (!test_facility(73)) {
> +		report_skip("transactional-execution facility not installed");
> +		return;
> +	}
> +	ctl_set_bit(0, CTL0_TRANSACT_EX_CTL); /* enable transactional-exec */
> +
> +	for (i = 0; i < args->iterations && failures <= args->max_failures; i++) {
> +		register_pgm_cleanup_func(trigger->fixup);
> +		trans_result = retry_transaction(trigger, args->max_retries, &diag.tdb, expected_pgm);

so you retry each iteration up to args->max_retries times, and if a
transaction aborts too many times (maybe because the host system is
very busy), then you consider it a fail

> +		register_pgm_cleanup_func(NULL);
> +		switch (trans_result) {
> +		case 0:
> +			continue;
> +		case -_HTM_TBEGIN_INDETERMINATE:
> +		case -_HTM_TBEGIN_PERSISTENT:
> +			if (failures < args->suppress_info)
> +				report_info("transaction failed with cc %d",
> +					    -trans_result);
> +			break;
> +		case -_HTM_TBEGIN_TRANSIENT:
> +			report_fail("Program interrupt: expected(%d) == received(%d)",
> +				    expected_pgm,
> +				    clear_pgm_int());
> +			goto out;
> +		case -TRANSACTION_COMPLETED:
> +			report_fail("Transaction completed without exception");
> +			goto out;
> +		case -TRANSACTION_MAX_RETRIES:
> +			if (failures < args->suppress_info)
> +				report_info("Retried transaction %lu times without exception",
> +					    args->max_retries);
> +			break;
> +		default:
> +			report_fail("Invalid return transaction result");
> +			goto out;
> +		}
> +
> +		if (failures < args->suppress_info)
> +			report_info("transaction abort code: %llu", diag.tdb.abort_code);
> +		if (args->diagnose && failures < args->suppress_info) {
> +			for (i = 0; i < 32; i++)
> +				report_info("diag+%03d: %016lx", i*8, diag.dwords[i]);
> +		}
> +		++failures;
> +	}
> +	if (failures <= args->max_failures) {
> +		report_pass(
> +			"Program interrupt: always expected(%d) == received(%d), transaction failures: %u",
> +			expected_pgm,
> +			expected_pgm,
> +			failures);
> +	} else {
> +		report_fail("Too many transaction failures: %u", failures);
> +	}
> +	if (failures > args->suppress_info)
> +		report_info("Suppressed some transaction failure information messages");
> +
> +out:
> +	ctl_clear_bit(0, CTL0_TRANSACT_EX_CTL);
> +}
> +
>  static struct args parse_args(int argc, char **argv)
>  {
>  	struct args args = {
>  		.iterations = 1,
> +		.max_retries = 20,
> +		.suppress_info = 20,
> +		.diagnose = false
>  	};
>  	unsigned int i;
>  	long arg;
> -	bool no_arg;
> +	bool no_arg, max_failures = false;
>  	char *end;
>  
>  	for (i = 1; i < argc; i++) {
> @@ -155,11 +283,35 @@ static struct args parse_args(int argc, char **argv)

again, do we _really_ need all these parameters?

>  				report_abort("--iterations needs a positive parameter");
>  			args.iterations = arg;
>  			++i;
> +		} else if (!strcmp("--max-retries", argv[i])) {
> +			if (no_arg)
> +				report_abort("--max-retries needs a positive parameter");
> +			args.max_retries = arg;
> +			++i;
> +		} else if (!strcmp("--suppress-info", argv[i])) {
> +			if (no_arg)
> +				report_abort("--suppress-info needs a positive parameter");
> +			args.suppress_info = arg;
> +			++i;
> +		} else if (!strcmp("--max-failures", argv[i])) {
> +			if (no_arg)
> +				report_abort("--max-failures needs a positive parameter");
> +			args.max_failures = arg;
> +			max_failures = true;
> +			++i;
> +		} else if (!strcmp("--diagnose", argv[i])) {
> +			args.diagnose = true;
> +		} else if (!strcmp("--no-diagnose", argv[i])) {
> +			args.diagnose = false;
>  		} else {
>  			report_abort("Unsupported parameter '%s'",
>  				     argv[i]);
>  		}
>  	}
> +
> +	if (!max_failures)
> +		args.max_failures = args.iterations / 1000;
> +
>  	return args;
>  }
>  
> @@ -177,5 +329,15 @@ int main(int argc, char **argv)
>  	}
>  	report_prefix_pop();
>  
> +	report_prefix_push("specification exception during transaction");
> +	for (i = 0; spec_ex_triggers[i].name; i++) {
> +		if (spec_ex_triggers[i].transactable) {
> +			report_prefix_push(spec_ex_triggers[i].name);
> +			test_spec_ex_trans(&args, &spec_ex_triggers[i]);
> +			report_prefix_pop();
> +		}
> +	}
> +	report_prefix_pop();
> +
>  	return report_summary();
>  }
Christian Borntraeger Oct. 25, 2021, 6:28 p.m. UTC | #2
Am 25.10.21 um 19:30 schrieb Claudio Imbrenda:
> On Fri, 22 Oct 2021 14:01:56 +0200
> Janis Schoetterl-Glausch <scgl@linux.ibm.com> wrote:
> 
>> Program interruptions during transactional execution cause other
>> interruption codes.
>> Check that we see the expected code for (some) specification exceptions.
>>
>> Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
>> ---
>>   lib/s390x/asm/arch_def.h |   1 +
>>   s390x/spec_ex.c          | 172 +++++++++++++++++++++++++++++++++++++--
>>   2 files changed, 168 insertions(+), 5 deletions(-)
>>
>> diff --git a/lib/s390x/asm/arch_def.h b/lib/s390x/asm/arch_def.h
>> index 40626d7..f7fb467 100644
>> --- a/lib/s390x/asm/arch_def.h
>> +++ b/lib/s390x/asm/arch_def.h
>> @@ -55,6 +55,7 @@ struct psw {
>>   #define PSW_MASK_BA			0x0000000080000000UL
>>   #define PSW_MASK_64			(PSW_MASK_BA | PSW_MASK_EA)
>>   
>> +#define CTL0_TRANSACT_EX_CTL		(63 -  8)
>>   #define CTL0_LOW_ADDR_PROT		(63 - 35)
>>   #define CTL0_EDAT			(63 - 40)
>>   #define CTL0_IEP			(63 - 43)
>> diff --git a/s390x/spec_ex.c b/s390x/spec_ex.c
>> index ec3322a..f3628bd 100644
>> --- a/s390x/spec_ex.c
>> +++ b/s390x/spec_ex.c
>> @@ -4,9 +4,14 @@
>>    *
>>    * Specification exception test.
>>    * Tests that specification exceptions occur when expected.
>> + * This includes specification exceptions occurring during transactional execution
>> + * as these result in another interruption code (the transactional-execution-aborted
>> + * bit is set).
>>    */
>>   #include <stdlib.h>
>> +#include <htmintrin.h>
>>   #include <libcflat.h>
>> +#include <asm/barrier.h>
>>   #include <asm/interrupt.h>
>>   #include <asm/facility.h>
>>   
>> @@ -92,18 +97,23 @@ static void not_even(void)
>>   struct spec_ex_trigger {
>>   	const char *name;
>>   	void (*func)(void);
>> +	bool transactable;
>>   	void (*fixup)(void);
>>   };
>>   
>>   static const struct spec_ex_trigger spec_ex_triggers[] = {
>> -	{ "psw_bit_12_is_1", &psw_bit_12_is_1, &fixup_invalid_psw},
>> -	{ "bad_alignment", &bad_alignment, NULL},
>> -	{ "not_even", &not_even, NULL},
>> -	{ NULL, NULL, NULL},
>> +	{ "psw_bit_12_is_1", &psw_bit_12_is_1, false, &fixup_invalid_psw},
>> +	{ "bad_alignment", &bad_alignment, true, NULL},
>> +	{ "not_even", &not_even, true, NULL},
>> +	{ NULL, NULL, true, NULL},
>>   };
>>   
>>   struct args {
>>   	uint64_t iterations;
>> +	uint64_t max_retries;
>> +	uint64_t suppress_info;
>> +	uint64_t max_failures;
>> +	bool diagnose;
>>   };
>>   
>>   static void test_spec_ex(struct args *args,
>> @@ -131,14 +141,132 @@ static void test_spec_ex(struct args *args,
>>   		    expected_pgm);
>>   }
>>   
>> +#define TRANSACTION_COMPLETED 4
>> +#define TRANSACTION_MAX_RETRIES 5
>> +
>> +/* NULL must be passed to __builtin_tbegin via constant, forbid diagnose from
>> + * being NULL to keep things simple
>> + */
>> +static int __attribute__((nonnull))
>> +with_transaction(void (*trigger)(void), struct __htm_tdb *diagnose)
>> +{
>> +	int cc;
>> +
> 
> if you want to be extra sure, put an assert here (although I'm not sure
> how nonnull works, I have never seen it before)
> 
>> +	cc = __builtin_tbegin(diagnose);
>> +	if (cc == _HTM_TBEGIN_STARTED) {
>> +		trigger();
>> +		__builtin_tend();
>> +		return -TRANSACTION_COMPLETED;
>> +	} else {
>> +		return -cc;
>> +	}
>> +}
>> +
>> +static int retry_transaction(const struct spec_ex_trigger *trigger, unsigned int max_retries,
>> +			     struct __htm_tdb *tdb, uint16_t expected_pgm)
>> +{
>> +	int trans_result, i;
>> +	uint16_t pgm;
>> +
>> +	for (i = 0; i < max_retries; i++) {
>> +		expect_pgm_int();
>> +		trans_result = with_transaction(trigger->func, tdb);
>> +		if (trans_result == -_HTM_TBEGIN_TRANSIENT) {
>> +			mb();
>> +			pgm = lc->pgm_int_code;
>> +			if (pgm == 0)
>> +				continue;
>> +			else if (pgm == expected_pgm)
>> +				return 0;
>> +		}
>> +		return trans_result;
>> +	}
>> +	return -TRANSACTION_MAX_RETRIES;
> 
> so this means that a test will be considered failed if the transaction
> failed too many times?
> this means that could fail if the test is run on busy system, even if
> the host running the unit test is correct

Can we use constrained transactions for this test? those will succeed.
Janis Schoetterl-Glausch Oct. 26, 2021, 2:22 p.m. UTC | #3
On 10/25/21 19:30, Claudio Imbrenda wrote:
> On Fri, 22 Oct 2021 14:01:56 +0200
> Janis Schoetterl-Glausch <scgl@linux.ibm.com> wrote:
> 
>> Program interruptions during transactional execution cause other
>> interruption codes.
>> Check that we see the expected code for (some) specification exceptions.
>>
>> Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
>> ---

[...]

>> +#define TRANSACTION_MAX_RETRIES 5
>> +
>> +/* NULL must be passed to __builtin_tbegin via constant, forbid diagnose from
>> + * being NULL to keep things simple
>> + */
>> +static int __attribute__((nonnull))
>> +with_transaction(void (*trigger)(void), struct __htm_tdb *diagnose)
>> +{
>> +	int cc;
>> +
> 
> if you want to be extra sure, put an assert here (although I'm not sure
> how nonnull works, I have never seen it before)

Ok, with nonnull, the compiler might warn you if you pass NULL.
> 
>> +	cc = __builtin_tbegin(diagnose);
>> +	if (cc == _HTM_TBEGIN_STARTED) {
>> +		trigger();
>> +		__builtin_tend();
>> +		return -TRANSACTION_COMPLETED;
>> +	} else {
>> +		return -cc;
>> +	}
>> +}
>> +
>> +static int retry_transaction(const struct spec_ex_trigger *trigger, unsigned int max_retries,
>> +			     struct __htm_tdb *tdb, uint16_t expected_pgm)
>> +{
>> +	int trans_result, i;
>> +	uint16_t pgm;
>> +
>> +	for (i = 0; i < max_retries; i++) {
>> +		expect_pgm_int();
>> +		trans_result = with_transaction(trigger->func, tdb);
>> +		if (trans_result == -_HTM_TBEGIN_TRANSIENT) {
>> +			mb();
>> +			pgm = lc->pgm_int_code;
>> +			if (pgm == 0)
>> +				continue;
>> +			else if (pgm == expected_pgm)
>> +				return 0;
>> +		}
>> +		return trans_result;
>> +	}
>> +	return -TRANSACTION_MAX_RETRIES;
> 
> so this means that a test will be considered failed if the transaction
> failed too many times?

Yes.
> 
> this means that could fail if the test is run on busy system, even if
> the host running the unit test is correct

I suppose so, don't know how likely that is.
> 
> also, do you really need to use negative values? it's probably easier
> to read if you stick to positive values, and less prone to mistakes if
> you accidentally forget a - somewhere.

Ok.
> 
>> +}
>> +
>> +static void test_spec_ex_trans(struct args *args, const struct spec_ex_trigger *trigger)
>> +{
>> +	const uint16_t expected_pgm = PGM_INT_CODE_SPECIFICATION
>> +			      | PGM_INT_CODE_TX_ABORTED_EVENT;
>> +	union {
>> +		struct __htm_tdb tdb;
>> +		uint64_t dwords[sizeof(struct __htm_tdb) / sizeof(uint64_t)];
>> +	} diag;
>> +	unsigned int i, failures = 0;
>> +	int trans_result;
>> +
>> +	if (!test_facility(73)) {
>> +		report_skip("transactional-execution facility not installed");
>> +		return;
>> +	}
>> +	ctl_set_bit(0, CTL0_TRANSACT_EX_CTL); /* enable transactional-exec */
>> +
>> +	for (i = 0; i < args->iterations && failures <= args->max_failures; i++) {
>> +		register_pgm_cleanup_func(trigger->fixup);
>> +		trans_result = retry_transaction(trigger, args->max_retries, &diag.tdb, expected_pgm);
> 
> so you retry each iteration up to args->max_retries times, and if a
> transaction aborts too many times (maybe because the host system is
> very busy), then you consider it a fail
> 

[...]
Claudio Imbrenda Oct. 26, 2021, 2:55 p.m. UTC | #4
On Tue, 26 Oct 2021 16:22:40 +0200
Janis Schoetterl-Glausch <scgl@linux.vnet.ibm.com> wrote:

> On 10/25/21 19:30, Claudio Imbrenda wrote:
> > On Fri, 22 Oct 2021 14:01:56 +0200
> > Janis Schoetterl-Glausch <scgl@linux.ibm.com> wrote:
> >   
> >> Program interruptions during transactional execution cause other
> >> interruption codes.
> >> Check that we see the expected code for (some) specification exceptions.
> >>
> >> Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
> >> ---  
> 
> [...]
> 
> >> +#define TRANSACTION_MAX_RETRIES 5
> >> +
> >> +/* NULL must be passed to __builtin_tbegin via constant, forbid diagnose from
> >> + * being NULL to keep things simple
> >> + */
> >> +static int __attribute__((nonnull))
> >> +with_transaction(void (*trigger)(void), struct __htm_tdb *diagnose)
> >> +{
> >> +	int cc;
> >> +  
> > 
> > if you want to be extra sure, put an assert here (although I'm not sure
> > how nonnull works, I have never seen it before)  
> 
> Ok, with nonnull, the compiler might warn you if you pass NULL.

fair enough

> >   
> >> +	cc = __builtin_tbegin(diagnose);
> >> +	if (cc == _HTM_TBEGIN_STARTED) {
> >> +		trigger();
> >> +		__builtin_tend();
> >> +		return -TRANSACTION_COMPLETED;
> >> +	} else {
> >> +		return -cc;
> >> +	}
> >> +}
> >> +
> >> +static int retry_transaction(const struct spec_ex_trigger *trigger, unsigned int max_retries,
> >> +			     struct __htm_tdb *tdb, uint16_t expected_pgm)
> >> +{
> >> +	int trans_result, i;
> >> +	uint16_t pgm;
> >> +
> >> +	for (i = 0; i < max_retries; i++) {
> >> +		expect_pgm_int();
> >> +		trans_result = with_transaction(trigger->func, tdb);
> >> +		if (trans_result == -_HTM_TBEGIN_TRANSIENT) {
> >> +			mb();
> >> +			pgm = lc->pgm_int_code;
> >> +			if (pgm == 0)
> >> +				continue;
> >> +			else if (pgm == expected_pgm)
> >> +				return 0;
> >> +		}
> >> +		return trans_result;
> >> +	}
> >> +	return -TRANSACTION_MAX_RETRIES;  
> > 
> > so this means that a test will be considered failed if the transaction
> > failed too many times?  
> 
> Yes.
> > 
> > this means that could fail if the test is run on busy system, even if
> > the host running the unit test is correct  
> 
> I suppose so, don't know how likely that is.

I don't like the idea of failing a test when the implementation is
correct, just because the system might be a little more busy than
expected.

if you can't find a way to refactor the test so that it doesn't fail if
there are too many retries, then at least make it a skip?

but I'd really like to see something that does not fail on a correctly
implemented system just because the test machine was too busy.

> > 
> > also, do you really need to use negative values? it's probably easier
> > to read if you stick to positive values, and less prone to mistakes if
> > you accidentally forget a - somewhere.  
> 
> Ok.
> >   
> >> +}
> >> +
> >> +static void test_spec_ex_trans(struct args *args, const struct spec_ex_trigger *trigger)
> >> +{
> >> +	const uint16_t expected_pgm = PGM_INT_CODE_SPECIFICATION
> >> +			      | PGM_INT_CODE_TX_ABORTED_EVENT;
> >> +	union {
> >> +		struct __htm_tdb tdb;
> >> +		uint64_t dwords[sizeof(struct __htm_tdb) / sizeof(uint64_t)];
> >> +	} diag;
> >> +	unsigned int i, failures = 0;
> >> +	int trans_result;
> >> +
> >> +	if (!test_facility(73)) {
> >> +		report_skip("transactional-execution facility not installed");
> >> +		return;
> >> +	}
> >> +	ctl_set_bit(0, CTL0_TRANSACT_EX_CTL); /* enable transactional-exec */
> >> +
> >> +	for (i = 0; i < args->iterations && failures <= args->max_failures; i++) {
> >> +		register_pgm_cleanup_func(trigger->fixup);
> >> +		trans_result = retry_transaction(trigger, args->max_retries, &diag.tdb, expected_pgm);  
> > 
> > so you retry each iteration up to args->max_retries times, and if a
> > transaction aborts too many times (maybe because the host system is
> > very busy), then you consider it a fail
> >   
> 
> [...]
Janis Schoetterl-Glausch Oct. 27, 2021, 10:05 a.m. UTC | #5
On 10/26/21 16:55, Claudio Imbrenda wrote:
> On Tue, 26 Oct 2021 16:22:40 +0200
> Janis Schoetterl-Glausch <scgl@linux.vnet.ibm.com> wrote:
> 
>> On 10/25/21 19:30, Claudio Imbrenda wrote:
>>> On Fri, 22 Oct 2021 14:01:56 +0200
>>> Janis Schoetterl-Glausch <scgl@linux.ibm.com> wrote:
>>>   
>>>> Program interruptions during transactional execution cause other
>>>> interruption codes.
>>>> Check that we see the expected code for (some) specification exceptions.
>>>>
>>>> Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
>>>> ---  
>>
>> [...]
>>
>>>> +#define TRANSACTION_MAX_RETRIES 5
>>>> +
>>>> +/* NULL must be passed to __builtin_tbegin via constant, forbid diagnose from
>>>> + * being NULL to keep things simple
>>>> + */
>>>> +static int __attribute__((nonnull))
>>>> +with_transaction(void (*trigger)(void), struct __htm_tdb *diagnose)
>>>> +{
>>>> +	int cc;
>>>> +  
>>>
>>> if you want to be extra sure, put an assert here (although I'm not sure
>>> how nonnull works, I have never seen it before)  
>>
>> Ok, with nonnull, the compiler might warn you if you pass NULL.
> 
> fair enough
> 
>>>   
>>>> +	cc = __builtin_tbegin(diagnose);
>>>> +	if (cc == _HTM_TBEGIN_STARTED) {
>>>> +		trigger();
>>>> +		__builtin_tend();
>>>> +		return -TRANSACTION_COMPLETED;
>>>> +	} else {
>>>> +		return -cc;
>>>> +	}
>>>> +}
>>>> +
>>>> +static int retry_transaction(const struct spec_ex_trigger *trigger, unsigned int max_retries,
>>>> +			     struct __htm_tdb *tdb, uint16_t expected_pgm)
>>>> +{
>>>> +	int trans_result, i;
>>>> +	uint16_t pgm;
>>>> +
>>>> +	for (i = 0; i < max_retries; i++) {
>>>> +		expect_pgm_int();
>>>> +		trans_result = with_transaction(trigger->func, tdb);
>>>> +		if (trans_result == -_HTM_TBEGIN_TRANSIENT) {
>>>> +			mb();
>>>> +			pgm = lc->pgm_int_code;
>>>> +			if (pgm == 0)
>>>> +				continue;
>>>> +			else if (pgm == expected_pgm)
>>>> +				return 0;
>>>> +		}
>>>> +		return trans_result;
>>>> +	}
>>>> +	return -TRANSACTION_MAX_RETRIES;  
>>>
>>> so this means that a test will be considered failed if the transaction
>>> failed too many times?  
>>
>> Yes.
>>>
>>> this means that could fail if the test is run on busy system, even if
>>> the host running the unit test is correct  
>>
>> I suppose so, don't know how likely that is.
> 
> I don't like the idea of failing a test when the implementation is
> correct, just because the system might be a little more busy than
> expected.

Fair enough, I'll see what I can do.
> 
> if you can't find a way to refactor the test so that it doesn't fail if
> there are too many retries, then at least make it a skip?
> 
> but I'd really like to see something that does not fail on a correctly
> implemented system just because the test machine was too busy.
> 
>>>
>>> also, do you really need to use negative values? it's probably easier
>>> to read if you stick to positive values, and less prone to mistakes if
>>> you accidentally forget a - somewhere.  
>>
>> Ok.
>>>   
>>>> +}
>>>> +
>>>> +static void test_spec_ex_trans(struct args *args, const struct spec_ex_trigger *trigger)
>>>> +{
>>>> +	const uint16_t expected_pgm = PGM_INT_CODE_SPECIFICATION
>>>> +			      | PGM_INT_CODE_TX_ABORTED_EVENT;
>>>> +	union {
>>>> +		struct __htm_tdb tdb;
>>>> +		uint64_t dwords[sizeof(struct __htm_tdb) / sizeof(uint64_t)];
>>>> +	} diag;
>>>> +	unsigned int i, failures = 0;
>>>> +	int trans_result;
>>>> +
>>>> +	if (!test_facility(73)) {
>>>> +		report_skip("transactional-execution facility not installed");
>>>> +		return;
>>>> +	}
>>>> +	ctl_set_bit(0, CTL0_TRANSACT_EX_CTL); /* enable transactional-exec */
>>>> +
>>>> +	for (i = 0; i < args->iterations && failures <= args->max_failures; i++) {
>>>> +		register_pgm_cleanup_func(trigger->fixup);
>>>> +		trans_result = retry_transaction(trigger, args->max_retries, &diag.tdb, expected_pgm);  
>>>
>>> so you retry each iteration up to args->max_retries times, and if a
>>> transaction aborts too many times (maybe because the host system is
>>> very busy), then you consider it a fail
>>>   
>>
>> [...]
>
diff mbox series

Patch

diff --git a/lib/s390x/asm/arch_def.h b/lib/s390x/asm/arch_def.h
index 40626d7..f7fb467 100644
--- a/lib/s390x/asm/arch_def.h
+++ b/lib/s390x/asm/arch_def.h
@@ -55,6 +55,7 @@  struct psw {
 #define PSW_MASK_BA			0x0000000080000000UL
 #define PSW_MASK_64			(PSW_MASK_BA | PSW_MASK_EA)
 
+#define CTL0_TRANSACT_EX_CTL		(63 -  8)
 #define CTL0_LOW_ADDR_PROT		(63 - 35)
 #define CTL0_EDAT			(63 - 40)
 #define CTL0_IEP			(63 - 43)
diff --git a/s390x/spec_ex.c b/s390x/spec_ex.c
index ec3322a..f3628bd 100644
--- a/s390x/spec_ex.c
+++ b/s390x/spec_ex.c
@@ -4,9 +4,14 @@ 
  *
  * Specification exception test.
  * Tests that specification exceptions occur when expected.
+ * This includes specification exceptions occurring during transactional execution
+ * as these result in another interruption code (the transactional-execution-aborted
+ * bit is set).
  */
 #include <stdlib.h>
+#include <htmintrin.h>
 #include <libcflat.h>
+#include <asm/barrier.h>
 #include <asm/interrupt.h>
 #include <asm/facility.h>
 
@@ -92,18 +97,23 @@  static void not_even(void)
 struct spec_ex_trigger {
 	const char *name;
 	void (*func)(void);
+	bool transactable;
 	void (*fixup)(void);
 };
 
 static const struct spec_ex_trigger spec_ex_triggers[] = {
-	{ "psw_bit_12_is_1", &psw_bit_12_is_1, &fixup_invalid_psw},
-	{ "bad_alignment", &bad_alignment, NULL},
-	{ "not_even", &not_even, NULL},
-	{ NULL, NULL, NULL},
+	{ "psw_bit_12_is_1", &psw_bit_12_is_1, false, &fixup_invalid_psw},
+	{ "bad_alignment", &bad_alignment, true, NULL},
+	{ "not_even", &not_even, true, NULL},
+	{ NULL, NULL, true, NULL},
 };
 
 struct args {
 	uint64_t iterations;
+	uint64_t max_retries;
+	uint64_t suppress_info;
+	uint64_t max_failures;
+	bool diagnose;
 };
 
 static void test_spec_ex(struct args *args,
@@ -131,14 +141,132 @@  static void test_spec_ex(struct args *args,
 		    expected_pgm);
 }
 
+#define TRANSACTION_COMPLETED 4
+#define TRANSACTION_MAX_RETRIES 5
+
+/* NULL must be passed to __builtin_tbegin via constant, forbid diagnose from
+ * being NULL to keep things simple
+ */
+static int __attribute__((nonnull))
+with_transaction(void (*trigger)(void), struct __htm_tdb *diagnose)
+{
+	int cc;
+
+	cc = __builtin_tbegin(diagnose);
+	if (cc == _HTM_TBEGIN_STARTED) {
+		trigger();
+		__builtin_tend();
+		return -TRANSACTION_COMPLETED;
+	} else {
+		return -cc;
+	}
+}
+
+static int retry_transaction(const struct spec_ex_trigger *trigger, unsigned int max_retries,
+			     struct __htm_tdb *tdb, uint16_t expected_pgm)
+{
+	int trans_result, i;
+	uint16_t pgm;
+
+	for (i = 0; i < max_retries; i++) {
+		expect_pgm_int();
+		trans_result = with_transaction(trigger->func, tdb);
+		if (trans_result == -_HTM_TBEGIN_TRANSIENT) {
+			mb();
+			pgm = lc->pgm_int_code;
+			if (pgm == 0)
+				continue;
+			else if (pgm == expected_pgm)
+				return 0;
+		}
+		return trans_result;
+	}
+	return -TRANSACTION_MAX_RETRIES;
+}
+
+static void test_spec_ex_trans(struct args *args, const struct spec_ex_trigger *trigger)
+{
+	const uint16_t expected_pgm = PGM_INT_CODE_SPECIFICATION
+			      | PGM_INT_CODE_TX_ABORTED_EVENT;
+	union {
+		struct __htm_tdb tdb;
+		uint64_t dwords[sizeof(struct __htm_tdb) / sizeof(uint64_t)];
+	} diag;
+	unsigned int i, failures = 0;
+	int trans_result;
+
+	if (!test_facility(73)) {
+		report_skip("transactional-execution facility not installed");
+		return;
+	}
+	ctl_set_bit(0, CTL0_TRANSACT_EX_CTL); /* enable transactional-exec */
+
+	for (i = 0; i < args->iterations && failures <= args->max_failures; i++) {
+		register_pgm_cleanup_func(trigger->fixup);
+		trans_result = retry_transaction(trigger, args->max_retries, &diag.tdb, expected_pgm);
+		register_pgm_cleanup_func(NULL);
+		switch (trans_result) {
+		case 0:
+			continue;
+		case -_HTM_TBEGIN_INDETERMINATE:
+		case -_HTM_TBEGIN_PERSISTENT:
+			if (failures < args->suppress_info)
+				report_info("transaction failed with cc %d",
+					    -trans_result);
+			break;
+		case -_HTM_TBEGIN_TRANSIENT:
+			report_fail("Program interrupt: expected(%d) == received(%d)",
+				    expected_pgm,
+				    clear_pgm_int());
+			goto out;
+		case -TRANSACTION_COMPLETED:
+			report_fail("Transaction completed without exception");
+			goto out;
+		case -TRANSACTION_MAX_RETRIES:
+			if (failures < args->suppress_info)
+				report_info("Retried transaction %lu times without exception",
+					    args->max_retries);
+			break;
+		default:
+			report_fail("Invalid return transaction result");
+			goto out;
+		}
+
+		if (failures < args->suppress_info)
+			report_info("transaction abort code: %llu", diag.tdb.abort_code);
+		if (args->diagnose && failures < args->suppress_info) {
+			for (i = 0; i < 32; i++)
+				report_info("diag+%03d: %016lx", i*8, diag.dwords[i]);
+		}
+		++failures;
+	}
+	if (failures <= args->max_failures) {
+		report_pass(
+			"Program interrupt: always expected(%d) == received(%d), transaction failures: %u",
+			expected_pgm,
+			expected_pgm,
+			failures);
+	} else {
+		report_fail("Too many transaction failures: %u", failures);
+	}
+	if (failures > args->suppress_info)
+		report_info("Suppressed some transaction failure information messages");
+
+out:
+	ctl_clear_bit(0, CTL0_TRANSACT_EX_CTL);
+}
+
 static struct args parse_args(int argc, char **argv)
 {
 	struct args args = {
 		.iterations = 1,
+		.max_retries = 20,
+		.suppress_info = 20,
+		.diagnose = false
 	};
 	unsigned int i;
 	long arg;
-	bool no_arg;
+	bool no_arg, max_failures = false;
 	char *end;
 
 	for (i = 1; i < argc; i++) {
@@ -155,11 +283,35 @@  static struct args parse_args(int argc, char **argv)
 				report_abort("--iterations needs a positive parameter");
 			args.iterations = arg;
 			++i;
+		} else if (!strcmp("--max-retries", argv[i])) {
+			if (no_arg)
+				report_abort("--max-retries needs a positive parameter");
+			args.max_retries = arg;
+			++i;
+		} else if (!strcmp("--suppress-info", argv[i])) {
+			if (no_arg)
+				report_abort("--suppress-info needs a positive parameter");
+			args.suppress_info = arg;
+			++i;
+		} else if (!strcmp("--max-failures", argv[i])) {
+			if (no_arg)
+				report_abort("--max-failures needs a positive parameter");
+			args.max_failures = arg;
+			max_failures = true;
+			++i;
+		} else if (!strcmp("--diagnose", argv[i])) {
+			args.diagnose = true;
+		} else if (!strcmp("--no-diagnose", argv[i])) {
+			args.diagnose = false;
 		} else {
 			report_abort("Unsupported parameter '%s'",
 				     argv[i]);
 		}
 	}
+
+	if (!max_failures)
+		args.max_failures = args.iterations / 1000;
+
 	return args;
 }
 
@@ -177,5 +329,15 @@  int main(int argc, char **argv)
 	}
 	report_prefix_pop();
 
+	report_prefix_push("specification exception during transaction");
+	for (i = 0; spec_ex_triggers[i].name; i++) {
+		if (spec_ex_triggers[i].transactable) {
+			report_prefix_push(spec_ex_triggers[i].name);
+			test_spec_ex_trans(&args, &spec_ex_triggers[i]);
+			report_prefix_pop();
+		}
+	}
+	report_prefix_pop();
+
 	return report_summary();
 }