diff mbox series

[v5,13/15] KVM: s390: add function process_gib_alert_list()

Message ID 20181219191756.57973-14-mimu@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series KVM: s390: make use of the GIB | expand

Commit Message

Michael Mueller Dec. 19, 2018, 7:17 p.m. UTC
This function processes the Gib Alert List (GAL). It is required
to run when either a gib alert interruption has been received or
a gisa that is in the alert list is cleared or dropped.

The GAL is build up by millicode, when the respective ISC bit is
set in the Interruption Alert Mask (IAM) and an interruption of
that class is observed.

Signed-off-by: Michael Mueller <mimu@linux.ibm.com>
---
 arch/s390/kvm/interrupt.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 140 insertions(+)

Comments

Pierre Morel Jan. 3, 2019, 2:43 p.m. UTC | #1
On 19/12/2018 20:17, Michael Mueller wrote:
> This function processes the Gib Alert List (GAL). It is required
> to run when either a gib alert interruption has been received or
> a gisa that is in the alert list is cleared or dropped.
> 
> The GAL is build up by millicode, when the respective ISC bit is
> set in the Interruption Alert Mask (IAM) and an interruption of
> that class is observed.
> 
> Signed-off-by: Michael Mueller <mimu@linux.ibm.com>
> ---
>   arch/s390/kvm/interrupt.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 140 insertions(+)
> 
> diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
> index 48a93f5e5333..03e7ba4f215a 100644
> --- a/arch/s390/kvm/interrupt.c
> +++ b/arch/s390/kvm/interrupt.c
> @@ -2941,6 +2941,146 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
>   	return n;
>   }
>   
> +static int __try_airqs_kick(struct kvm *kvm, u8 ipm)

static inline ?

> +{
> +	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
> +	struct kvm_vcpu *vcpu = NULL, *kick_vcpu[MAX_ISC + 1];
> +	int online_vcpus = atomic_read(&kvm->online_vcpus);
> +	u8 ioint_mask, isc_mask, kick_mask = 0x00;
> +	int vcpu_id, kicked = 0;
> +
> +	/* Loop over vcpus in WAIT state. */
> +	for (vcpu_id = find_first_bit(fi->idle_mask, online_vcpus);
> +	     /* Until all pending ISCs have a vcpu open for airqs. */
> +	     (~kick_mask & ipm) && vcpu_id < online_vcpus;
> +	     vcpu_id = find_next_bit(fi->idle_mask, online_vcpus, vcpu_id)) {
> +		vcpu = kvm_get_vcpu(kvm, vcpu_id);
> +		if (psw_ioint_disabled(vcpu))
> +			continue;
> +		ioint_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
> +		for (isc_mask = 0x80; isc_mask; isc_mask >>= 1) {
> +			/* ISC pending in IPM ? */
> +			if (!(ipm & isc_mask))
> +				continue;
> +			/* vcpu for this ISC already found ? */
> +			if (kick_mask & isc_mask)
> +				continue;
> +			/* vcpu open for airq of this ISC ? */
> +			if (!(ioint_mask & isc_mask))
> +				continue;
> +			/* use this vcpu (for all ISCs in ioint_mask) */
> +			kick_mask |= ioint_mask; > +			kick_vcpu[kicked++] = vcpu;
> +		}
> +	}
> +
> +	if (vcpu && ~kick_mask & ipm)
> +		VM_EVENT(kvm, 4, "gib alert undeliverable isc mask 0x%02x",
> +			 ~kick_mask & ipm);
> +
> +	for (vcpu_id = 0; vcpu_id < kicked; vcpu_id++)
> +		kvm_s390_vcpu_wakeup(kick_vcpu[vcpu_id]);
> +
> +	return (online_vcpus != 0) ? kicked : -ENODEV;
> +}
> +
> +static void __floating_airqs_kick(struct kvm *kvm)
static inline ?

> +{
> +	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
> +	int online_vcpus, kicked;
> +	u8 ipm_t0, ipm;
> +
> +	/* Get IPM and return if clean, IAM has been restored. */
> +	ipm = get_ipm(kvm->arch.gisa, IRQ_FLAG_IAM);

If we do not get an IPM here, it must have been stolen by the firmware 
for delivery to the guest.
Then why restoring the IAM?

Or do I miss something?

> +	if (!ipm)
> +		return;
> +retry:
> +	ipm_t0 = ipm;
> +
> +	/* Try to kick some vcpus in WAIT state. */
> +	kicked = __try_airqs_kick(kvm, ipm);
> +	if (kicked < 0)
> +		return;
> +
> +	/* Get IPM and return if clean, IAM has been restored. */
> +	ipm = get_ipm(kvm->arch.gisa, IRQ_FLAG_IAM);
> +	if (!ipm)
> +		return;
> +
> +	/* Start over, if new ISC bits are pending in IPM. */
> +	if ((ipm_t0 ^ ipm) & ~ipm_t0)
> +		goto retry;
> +
> +	/*
> +	 * Return as we just kicked at least one vcpu in WAIT state
> +	 * open for airqs. The IAM will be restored latest when one
> +	 * of them goes into WAIT or STOP state.
> +	 */
> +	if (kicked > 0)
> +		return;
> +
> +	/*
> +	 * No vcpu was kicked either because no vcpu was in WAIT state
> +	 * or none of the vcpus in WAIT state are open for airqs.
> +	 * Return immediately if no vcpus are in WAIT state.
> +	 * There are vcpus in RUN state. They will process the airqs
> +	 * if not closed for airqs as well. In that case the system will
> +	 * delay airqs until a vcpu decides to take airqs again.
> +	 */
> +	online_vcpus = atomic_read(&kvm->online_vcpus);
> +	if (!bitmap_weight(fi->idle_mask, online_vcpus))
> +		return;
> +
> +	/*
> +	 * None of the vcpus in WAIT state take airqs and we might
> +	 * have no running vcpus as at least one vcpu is in WAIT state
> +	 * and IPM is dirty.
> +	 */
> +	set_iam(kvm->arch.gisa, kvm->arch.iam);

I do not understand why we need to set IAM here.
The interrupt will be delivered by the firmware as soon as the PSW or 
CR6 is changed by any vCPU.
...and if this does not happen we can not deliver the interrupt anyway.

> +}
> +
> +#define NULL_GISA_ADDR 0x00000000UL
> +#define NONE_GISA_ADDR 0x00000001UL
> +#define GISA_ADDR_MASK 0xfffff000UL
> +
> +static void __maybe_unused process_gib_alert_list(void)
> +{
> +	u32 final, next_alert, origin = 0UL;
> +	struct kvm_s390_gisa *gisa;
> +	struct kvm *kvm;
> +
> +	do {
> +		/*
> +		 * If the NONE_GISA_ADDR is still stored in the alert list
> +		 * origin, we will leave the outer loop. No further GISA has
> +		 * been added to the alert list by millicode while processing
> +		 * the current alert list.
> +		 */
> +		final = (origin & NONE_GISA_ADDR);
> +		/*
> +		 * Cut off the alert list and store the NONE_GISA_ADDR in the
> +		 * alert list origin to avoid further GAL interruptions.
> +		 * A new alert list can be build up by millicode in parallel
> +		 * for guests not in the yet cut-off alert list. When in the
> +		 * final loop, store the NULL_GISA_ADDR instead. This will re-
> +		 * enable GAL interruptions on the host again.
> +		 */
> +		origin = xchg(&gib->alert_list_origin,
> +			      (!final) ? NONE_GISA_ADDR : NULL_GISA_ADDR);
> +		/* Loop through the just cut-off alert list. */
> +		while (origin & GISA_ADDR_MASK) {
> +			gisa = (struct kvm_s390_gisa *)(u64)origin;
> +			next_alert = gisa->next_alert;
> +			/* Unlink the GISA from the alert list. */
> +			gisa->next_alert = origin;

AFAIU this enable GISA interrupt for the guest...

> +			kvm = container_of(gisa, struct sie_page2, gisa)->kvm;
> +			/* Kick suitable vcpus */
> +			__floating_airqs_kick(kvm);

...and here we kick a VCPU for the guest.

Logically I would do it in the otherway, first kicking the vCPU then 
enabling the GISA interruption again.

If the IPM bit is cleared by the firmware during delivering the 
interrupt to the guest before we enter get_ipm() called by 
__floating_airqs_kick() we will set the IAM despite we have a running 
CPU handling the IRQ.
In the worst case we can also set the IAM with the GISA in the alert list.
Or we must accept that the firmware can deliver the IPM as soon as we 
reset the GISA next field.

> +			origin = next_alert;
> +		}
> +	} while (!final);
> +}
> +
>   static void nullify_gisa(struct kvm_s390_gisa *gisa)
>   {
>   	memset(gisa, 0, sizeof(struct kvm_s390_gisa));
> 

I think that avoiding to restore the IAM during the call to get_ipm 
inside __floating_airqs_kick() would good.

If you agree, with that:

Reviewed-by: Pierre Morel<pmorel@linux.ibm.com>
Michael Mueller Jan. 7, 2019, 7:18 p.m. UTC | #2
On 03.01.19 15:43, Pierre Morel wrote:
> On 19/12/2018 20:17, Michael Mueller wrote:
>> This function processes the Gib Alert List (GAL). It is required
>> to run when either a gib alert interruption has been received or
>> a gisa that is in the alert list is cleared or dropped.
>>
>> The GAL is build up by millicode, when the respective ISC bit is
>> set in the Interruption Alert Mask (IAM) and an interruption of
>> that class is observed.
>>
>> Signed-off-by: Michael Mueller <mimu@linux.ibm.com>
>> ---
>>   arch/s390/kvm/interrupt.c | 140 
>> ++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 140 insertions(+)
>>
>> diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
>> index 48a93f5e5333..03e7ba4f215a 100644
>> --- a/arch/s390/kvm/interrupt.c
>> +++ b/arch/s390/kvm/interrupt.c
>> @@ -2941,6 +2941,146 @@ int kvm_s390_get_irq_state(struct kvm_vcpu 
>> *vcpu, __u8 __user *buf, int len)
>>       return n;
>>   }
>> +static int __try_airqs_kick(struct kvm *kvm, u8 ipm)
> 
> static inline ?
> 
>> +{
>> +    struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
>> +    struct kvm_vcpu *vcpu = NULL, *kick_vcpu[MAX_ISC + 1];
>> +    int online_vcpus = atomic_read(&kvm->online_vcpus);
>> +    u8 ioint_mask, isc_mask, kick_mask = 0x00;
>> +    int vcpu_id, kicked = 0;
>> +
>> +    /* Loop over vcpus in WAIT state. */
>> +    for (vcpu_id = find_first_bit(fi->idle_mask, online_vcpus);
>> +         /* Until all pending ISCs have a vcpu open for airqs. */
>> +         (~kick_mask & ipm) && vcpu_id < online_vcpus;
>> +         vcpu_id = find_next_bit(fi->idle_mask, online_vcpus, 
>> vcpu_id)) {
>> +        vcpu = kvm_get_vcpu(kvm, vcpu_id);
>> +        if (psw_ioint_disabled(vcpu))
>> +            continue;
>> +        ioint_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
>> +        for (isc_mask = 0x80; isc_mask; isc_mask >>= 1) {
>> +            /* ISC pending in IPM ? */
>> +            if (!(ipm & isc_mask))
>> +                continue;
>> +            /* vcpu for this ISC already found ? */
>> +            if (kick_mask & isc_mask)
>> +                continue;
>> +            /* vcpu open for airq of this ISC ? */
>> +            if (!(ioint_mask & isc_mask))
>> +                continue;
>> +            /* use this vcpu (for all ISCs in ioint_mask) */
>> +            kick_mask |= ioint_mask; > +            
>> kick_vcpu[kicked++] = vcpu;
>> +        }
>> +    }
>> +
>> +    if (vcpu && ~kick_mask & ipm)
>> +        VM_EVENT(kvm, 4, "gib alert undeliverable isc mask 0x%02x",
>> +             ~kick_mask & ipm);
>> +
>> +    for (vcpu_id = 0; vcpu_id < kicked; vcpu_id++)
>> +        kvm_s390_vcpu_wakeup(kick_vcpu[vcpu_id]);
>> +
>> +    return (online_vcpus != 0) ? kicked : -ENODEV;
>> +}
>> +
>> +static void __floating_airqs_kick(struct kvm *kvm)
> static inline ?
> 
>> +{
>> +    struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
>> +    int online_vcpus, kicked;
>> +    u8 ipm_t0, ipm;
>> +
>> +    /* Get IPM and return if clean, IAM has been restored. */
>> +    ipm = get_ipm(kvm->arch.gisa, IRQ_FLAG_IAM);
> 
> If we do not get an IPM here, it must have been stolen by the firmware 
> for delivery to the guest.

Yes, a running SIE instance took it before we were able to. But is
it still running now? It could have gone to WAIT before we see
that the IPM is clean. Then it was restored already. Otherwise,
it is still running and will go WAIT and then restore the IAM.

I will do some tests on this.

> Then why restoring the IAM?
> 
> Or do I miss something?
> 
>> +    if (!ipm)
>> +        return;
>> +retry:
>> +    ipm_t0 = ipm;
>> +
>> +    /* Try to kick some vcpus in WAIT state. */
>> +    kicked = __try_airqs_kick(kvm, ipm);
>> +    if (kicked < 0)
>> +        return;
>> +
>> +    /* Get IPM and return if clean, IAM has been restored. */
>> +    ipm = get_ipm(kvm->arch.gisa, IRQ_FLAG_IAM);
>> +    if (!ipm)
>> +        return;
>> +
>> +    /* Start over, if new ISC bits are pending in IPM. */
>> +    if ((ipm_t0 ^ ipm) & ~ipm_t0)
>> +        goto retry;
>> +
>> +    /*
>> +     * Return as we just kicked at least one vcpu in WAIT state
>> +     * open for airqs. The IAM will be restored latest when one
>> +     * of them goes into WAIT or STOP state.
>> +     */
>> +    if (kicked > 0)
>> +        return;
>> +
>> +    /*
>> +     * No vcpu was kicked either because no vcpu was in WAIT state
>> +     * or none of the vcpus in WAIT state are open for airqs.
>> +     * Return immediately if no vcpus are in WAIT state.
>> +     * There are vcpus in RUN state. They will process the airqs
>> +     * if not closed for airqs as well. In that case the system will
>> +     * delay airqs until a vcpu decides to take airqs again.
>> +     */
>> +    online_vcpus = atomic_read(&kvm->online_vcpus);
>> +    if (!bitmap_weight(fi->idle_mask, online_vcpus))
>> +        return;
>> +
>> +    /*
>> +     * None of the vcpus in WAIT state take airqs and we might
>> +     * have no running vcpus as at least one vcpu is in WAIT state
>> +     * and IPM is dirty.
>> +     */
>> +    set_iam(kvm->arch.gisa, kvm->arch.iam);
> 
> I do not understand why we need to set IAM here.
> The interrupt will be delivered by the firmware as soon as the PSW or 
> CR6 is changed by any vCPU.
> ...and if this does not happen we can not deliver the interrupt anyway.
> 
>> +}
>> +
>> +#define NULL_GISA_ADDR 0x00000000UL
>> +#define NONE_GISA_ADDR 0x00000001UL
>> +#define GISA_ADDR_MASK 0xfffff000UL
>> +
>> +static void __maybe_unused process_gib_alert_list(void)
>> +{
>> +    u32 final, next_alert, origin = 0UL;
>> +    struct kvm_s390_gisa *gisa;
>> +    struct kvm *kvm;
>> +
>> +    do {
>> +        /*
>> +         * If the NONE_GISA_ADDR is still stored in the alert list
>> +         * origin, we will leave the outer loop. No further GISA has
>> +         * been added to the alert list by millicode while processing
>> +         * the current alert list.
>> +         */
>> +        final = (origin & NONE_GISA_ADDR);
>> +        /*
>> +         * Cut off the alert list and store the NONE_GISA_ADDR in the
>> +         * alert list origin to avoid further GAL interruptions.
>> +         * A new alert list can be build up by millicode in parallel
>> +         * for guests not in the yet cut-off alert list. When in the
>> +         * final loop, store the NULL_GISA_ADDR instead. This will re-
>> +         * enable GAL interruptions on the host again.
>> +         */
>> +        origin = xchg(&gib->alert_list_origin,
>> +                  (!final) ? NONE_GISA_ADDR : NULL_GISA_ADDR);
>> +        /* Loop through the just cut-off alert list. */
>> +        while (origin & GISA_ADDR_MASK) {
>> +            gisa = (struct kvm_s390_gisa *)(u64)origin;
>> +            next_alert = gisa->next_alert;
>> +            /* Unlink the GISA from the alert list. */
>> +            gisa->next_alert = origin;
> 
> AFAIU this enable GISA interrupt for the guest...

Only together with the IAM being set what could happen if
__floating_airqs_kick() calls get_ipm and the IPM is clean already. :(

> 
>> +            kvm = container_of(gisa, struct sie_page2, gisa)->kvm;
>> +            /* Kick suitable vcpus */
>> +            __floating_airqs_kick(kvm);
> 
> ...and here we kick a VCPU for the guest.
> 
> Logically I would do it in the otherway, first kicking the vCPU then 
> enabling the GISA interruption again.
> 
> If the IPM bit is cleared by the firmware during delivering the 
> interrupt to the guest before we enter get_ipm() called by 
> __floating_airqs_kick() we will set the IAM despite we have a running 
> CPU handling the IRQ.

I will move the unlink below the kick that will assure get_ipm will 
never take the IAM restore path.

> In the worst case we can also set the IAM with the GISA in the alert list.
> Or we must accept that the firmware can deliver the IPM as soon as we 
> reset the GISA next field.

See statement above.

> 
>> +            origin = next_alert;
>> +        }
>> +    } while (!final);
>> +}
>> +
>>   static void nullify_gisa(struct kvm_s390_gisa *gisa)
>>   {
>>       memset(gisa, 0, sizeof(struct kvm_s390_gisa));
>>
> 
> I think that avoiding to restore the IAM during the call to get_ipm 
> inside __floating_airqs_kick() would good.
> 
> If you agree, with that:
> 
> Reviewed-by: Pierre Morel<pmorel@linux.ibm.com>
> 
>
Michael Mueller Jan. 7, 2019, 7:19 p.m. UTC | #3
On 03.01.19 15:43, Pierre Morel wrote:
> On 19/12/2018 20:17, Michael Mueller wrote:
>> This function processes the Gib Alert List (GAL). It is required
>> to run when either a gib alert interruption has been received or
>> a gisa that is in the alert list is cleared or dropped.
>>
>> The GAL is build up by millicode, when the respective ISC bit is
>> set in the Interruption Alert Mask (IAM) and an interruption of
>> that class is observed.
>>
>> Signed-off-by: Michael Mueller <mimu@linux.ibm.com>
>> ---
>>   arch/s390/kvm/interrupt.c | 140 
>> ++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 140 insertions(+)
>>
>> diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
>> index 48a93f5e5333..03e7ba4f215a 100644
>> --- a/arch/s390/kvm/interrupt.c
>> +++ b/arch/s390/kvm/interrupt.c
>> @@ -2941,6 +2941,146 @@ int kvm_s390_get_irq_state(struct kvm_vcpu 
>> *vcpu, __u8 __user *buf, int len)
>>       return n;
>>   }
>>   +static int __try_airqs_kick(struct kvm *kvm, u8 ipm)
>
> static inline ?


will add


>
>> +{
>> +    struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
>> +    struct kvm_vcpu *vcpu = NULL, *kick_vcpu[MAX_ISC + 1];
>> +    int online_vcpus = atomic_read(&kvm->online_vcpus);
>> +    u8 ioint_mask, isc_mask, kick_mask = 0x00;
>> +    int vcpu_id, kicked = 0;
>> +
>> +    /* Loop over vcpus in WAIT state. */
>> +    for (vcpu_id = find_first_bit(fi->idle_mask, online_vcpus);
>> +         /* Until all pending ISCs have a vcpu open for airqs. */
>> +         (~kick_mask & ipm) && vcpu_id < online_vcpus;
>> +         vcpu_id = find_next_bit(fi->idle_mask, online_vcpus, 
>> vcpu_id)) {
>> +        vcpu = kvm_get_vcpu(kvm, vcpu_id);
>> +        if (psw_ioint_disabled(vcpu))
>> +            continue;
>> +        ioint_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
>> +        for (isc_mask = 0x80; isc_mask; isc_mask >>= 1) {
>> +            /* ISC pending in IPM ? */
>> +            if (!(ipm & isc_mask))
>> +                continue;
>> +            /* vcpu for this ISC already found ? */
>> +            if (kick_mask & isc_mask)
>> +                continue;
>> +            /* vcpu open for airq of this ISC ? */
>> +            if (!(ioint_mask & isc_mask))
>> +                continue;
>> +            /* use this vcpu (for all ISCs in ioint_mask) */
>> +            kick_mask |= ioint_mask; > + kick_vcpu[kicked++] = vcpu;
>> +        }
>> +    }
>> +
>> +    if (vcpu && ~kick_mask & ipm)
>> +        VM_EVENT(kvm, 4, "gib alert undeliverable isc mask 0x%02x",
>> +             ~kick_mask & ipm);
>> +
>> +    for (vcpu_id = 0; vcpu_id < kicked; vcpu_id++)
>> +        kvm_s390_vcpu_wakeup(kick_vcpu[vcpu_id]);
>> +
>> +    return (online_vcpus != 0) ? kicked : -ENODEV;
>> +}
>> +
>> +static void __floating_airqs_kick(struct kvm *kvm)
> static inline ?


and here as well


>
>> +{
>> +    struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
>> +    int online_vcpus, kicked;
>> +    u8 ipm_t0, ipm;
>> +
>> +    /* Get IPM and return if clean, IAM has been restored. */
>> +    ipm = get_ipm(kvm->arch.gisa, IRQ_FLAG_IAM);
>
> If we do not get an IPM here, it must have been stolen by the firmware 
> for delivery to the guest.
> Then why restoring the IAM?
>
> Or do I miss something?
>
>> +    if (!ipm)
>> +        return;
>> +retry:
>> +    ipm_t0 = ipm;
>> +
>> +    /* Try to kick some vcpus in WAIT state. */
>> +    kicked = __try_airqs_kick(kvm, ipm);
>> +    if (kicked < 0)
>> +        return;
>> +
>> +    /* Get IPM and return if clean, IAM has been restored. */
>> +    ipm = get_ipm(kvm->arch.gisa, IRQ_FLAG_IAM);
>> +    if (!ipm)
>> +        return;
>> +
>> +    /* Start over, if new ISC bits are pending in IPM. */
>> +    if ((ipm_t0 ^ ipm) & ~ipm_t0)
>> +        goto retry;
>> +
>> +    /*
>> +     * Return as we just kicked at least one vcpu in WAIT state
>> +     * open for airqs. The IAM will be restored latest when one
>> +     * of them goes into WAIT or STOP state.
>> +     */
>> +    if (kicked > 0)
>> +        return;
>> +
>> +    /*
>> +     * No vcpu was kicked either because no vcpu was in WAIT state
>> +     * or none of the vcpus in WAIT state are open for airqs.
>> +     * Return immediately if no vcpus are in WAIT state.
>> +     * There are vcpus in RUN state. They will process the airqs
>> +     * if not closed for airqs as well. In that case the system will
>> +     * delay airqs until a vcpu decides to take airqs again.
>> +     */
>> +    online_vcpus = atomic_read(&kvm->online_vcpus);
>> +    if (!bitmap_weight(fi->idle_mask, online_vcpus))
>> +        return;
>> +
>> +    /*
>> +     * None of the vcpus in WAIT state take airqs and we might
>> +     * have no running vcpus as at least one vcpu is in WAIT state
>> +     * and IPM is dirty.
>> +     */
>> +    set_iam(kvm->arch.gisa, kvm->arch.iam);
>
> I do not understand why we need to set IAM here.
> The interrupt will be delivered by the firmware as soon as the PSW or 
> CR6 is changed by any vCPU.
> ...and if this does not happen we can not deliver the interrupt anyway.
>
>> +}
>> +
>> +#define NULL_GISA_ADDR 0x00000000UL
>> +#define NONE_GISA_ADDR 0x00000001UL
>> +#define GISA_ADDR_MASK 0xfffff000UL
>> +
>> +static void __maybe_unused process_gib_alert_list(void)
>> +{
>> +    u32 final, next_alert, origin = 0UL;
>> +    struct kvm_s390_gisa *gisa;
>> +    struct kvm *kvm;
>> +
>> +    do {
>> +        /*
>> +         * If the NONE_GISA_ADDR is still stored in the alert list
>> +         * origin, we will leave the outer loop. No further GISA has
>> +         * been added to the alert list by millicode while processing
>> +         * the current alert list.
>> +         */
>> +        final = (origin & NONE_GISA_ADDR);
>> +        /*
>> +         * Cut off the alert list and store the NONE_GISA_ADDR in the
>> +         * alert list origin to avoid further GAL interruptions.
>> +         * A new alert list can be build up by millicode in parallel
>> +         * for guests not in the yet cut-off alert list. When in the
>> +         * final loop, store the NULL_GISA_ADDR instead. This will re-
>> +         * enable GAL interruptions on the host again.
>> +         */
>> +        origin = xchg(&gib->alert_list_origin,
>> +                  (!final) ? NONE_GISA_ADDR : NULL_GISA_ADDR);
>> +        /* Loop through the just cut-off alert list. */
>> +        while (origin & GISA_ADDR_MASK) {
>> +            gisa = (struct kvm_s390_gisa *)(u64)origin;
>> +            next_alert = gisa->next_alert;
>> +            /* Unlink the GISA from the alert list. */
>> +            gisa->next_alert = origin;
>
> AFAIU this enable GISA interrupt for the guest...
>
>> +            kvm = container_of(gisa, struct sie_page2, gisa)->kvm;
>> +            /* Kick suitable vcpus */
>> +            __floating_airqs_kick(kvm);
>
> ...and here we kick a VCPU for the guest.
>
> Logically I would do it in the otherway, first kicking the vCPU then 
> enabling the GISA interruption again.
>
> If the IPM bit is cleared by the firmware during delivering the 
> interrupt to the guest before we enter get_ipm() called by 
> __floating_airqs_kick() we will set the IAM despite we have a running 
> CPU handling the IRQ.
> In the worst case we can also set the IAM with the GISA in the alert 
> list.
> Or we must accept that the firmware can deliver the IPM as soon as we 
> reset the GISA next field.
>
>> +            origin = next_alert;
>> +        }
>> +    } while (!final);
>> +}
>> +
>>   static void nullify_gisa(struct kvm_s390_gisa *gisa)
>>   {
>>       memset(gisa, 0, sizeof(struct kvm_s390_gisa));
>>
>
> I think that avoiding to restore the IAM during the call to get_ipm 
> inside __floating_airqs_kick() would good.
>
> If you agree, with that:
>
> Reviewed-by: Pierre Morel<pmorel@linux.ibm.com>
>
>
Heiko Carstens Jan. 8, 2019, 6:37 a.m. UTC | #4
On Mon, Jan 07, 2019 at 08:19:26PM +0100, Michael Mueller wrote:
> 
> On 03.01.19 15:43, Pierre Morel wrote:
> >On 19/12/2018 20:17, Michael Mueller wrote:
> >>This function processes the Gib Alert List (GAL). It is required
> >>to run when either a gib alert interruption has been received or
> >>a gisa that is in the alert list is cleared or dropped.
> >>
> >>The GAL is build up by millicode, when the respective ISC bit is
> >>set in the Interruption Alert Mask (IAM) and an interruption of
> >>that class is observed.
> >>
> >>Signed-off-by: Michael Mueller <mimu@linux.ibm.com>
> >>---
> >>  arch/s390/kvm/interrupt.c | 140
> >>++++++++++++++++++++++++++++++++++++++++++++++
> >>  1 file changed, 140 insertions(+)
> >>
> >>diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
> >>index 48a93f5e5333..03e7ba4f215a 100644
> >>--- a/arch/s390/kvm/interrupt.c
> >>+++ b/arch/s390/kvm/interrupt.c
> >>@@ -2941,6 +2941,146 @@ int kvm_s390_get_irq_state(struct kvm_vcpu
> >>*vcpu, __u8 __user *buf, int len)
> >>      return n;
> >>  }
> >>  +static int __try_airqs_kick(struct kvm *kvm, u8 ipm)
> >
> >static inline ?

Why?

In general it is a good idea to leave it up to the compiler to decide
if it is worth to inline a function or not, unless you have a good
reason to force inlining.
Halil Pasic Jan. 8, 2019, 12:59 p.m. UTC | #5
On Wed, 19 Dec 2018 20:17:54 +0100
Michael Mueller <mimu@linux.ibm.com> wrote:

> This function processes the Gib Alert List (GAL). It is required
> to run when either a gib alert interruption has been received or
> a gisa that is in the alert list is cleared or dropped.
> 
> The GAL is build up by millicode, when the respective ISC bit is
> set in the Interruption Alert Mask (IAM) and an interruption of
> that class is observed.
> 
> Signed-off-by: Michael Mueller <mimu@linux.ibm.com>
> ---
>  arch/s390/kvm/interrupt.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 140 insertions(+)
> 
> diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
> index 48a93f5e5333..03e7ba4f215a 100644
> --- a/arch/s390/kvm/interrupt.c
> +++ b/arch/s390/kvm/interrupt.c
> @@ -2941,6 +2941,146 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
>  	return n;
>  }
>  
> +static int __try_airqs_kick(struct kvm *kvm, u8 ipm)
> +{
> +	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
> +	struct kvm_vcpu *vcpu = NULL, *kick_vcpu[MAX_ISC + 1];
> +	int online_vcpus = atomic_read(&kvm->online_vcpus);
> +	u8 ioint_mask, isc_mask, kick_mask = 0x00;
> +	int vcpu_id, kicked = 0;
> +
> +	/* Loop over vcpus in WAIT state. */
> +	for (vcpu_id = find_first_bit(fi->idle_mask, online_vcpus);
> +	     /* Until all pending ISCs have a vcpu open for airqs. */
> +	     (~kick_mask & ipm) && vcpu_id < online_vcpus;
> +	     vcpu_id = find_next_bit(fi->idle_mask, online_vcpus, vcpu_id)) {
> +		vcpu = kvm_get_vcpu(kvm, vcpu_id);
> +		if (psw_ioint_disabled(vcpu))
> +			continue;
> +		ioint_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
> +		for (isc_mask = 0x80; isc_mask; isc_mask >>= 1) {
> +			/* ISC pending in IPM ? */
> +			if (!(ipm & isc_mask))
> +				continue;
> +			/* vcpu for this ISC already found ? */
> +			if (kick_mask & isc_mask)
> +				continue;
> +			/* vcpu open for airq of this ISC ? */
> +			if (!(ioint_mask & isc_mask))
> +				continue;
> +			/* use this vcpu (for all ISCs in ioint_mask) */
> +			kick_mask |= ioint_mask;
> +			kick_vcpu[kicked++] = vcpu;

Assuming that the vcpu can/will take all ISCs it's currently open for
does not seem right. We kind of rely on this assumption here, or?

> +		}
> +	}
> +
> +	if (vcpu && ~kick_mask & ipm)
> +		VM_EVENT(kvm, 4, "gib alert undeliverable isc mask
> 0x%02x",
> +			 ~kick_mask & ipm);
> +
> +	for (vcpu_id = 0; vcpu_id < kicked; vcpu_id++)
> +		kvm_s390_vcpu_wakeup(kick_vcpu[vcpu_id]);
> +
> +	return (online_vcpus != 0) ? kicked : -ENODEV;
> +}
> +
> +static void __floating_airqs_kick(struct kvm *kvm)
> +{
> +	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
> +	int online_vcpus, kicked;
> +	u8 ipm_t0, ipm;
> +
> +	/* Get IPM and return if clean, IAM has been restored. */
> +	ipm = get_ipm(kvm->arch.gisa, IRQ_FLAG_IAM);
> +	if (!ipm)
> +		return;
> +retry:
> +	ipm_t0 = ipm;
> +
> +	/* Try to kick some vcpus in WAIT state. */
> +	kicked = __try_airqs_kick(kvm, ipm);
> +	if (kicked < 0)
> +		return;
> +
> +	/* Get IPM and return if clean, IAM has been restored. */
> +	ipm = get_ipm(kvm->arch.gisa, IRQ_FLAG_IAM);
> +	if (!ipm)
> +		return;
> +
> +	/* Start over, if new ISC bits are pending in IPM. */
> +	if ((ipm_t0 ^ ipm) & ~ipm_t0)
> +		goto retry;
> +

<MARK A>

> +	/*
> +	 * Return as we just kicked at least one vcpu in WAIT state
> +	 * open for airqs. The IAM will be restored latest when one
> +	 * of them goes into WAIT or STOP state.
> +	 */
> +	if (kicked > 0)
> +		return;

</MARK A>

> +
> +	/*
> +	 * No vcpu was kicked either because no vcpu was in WAIT state
> +	 * or none of the vcpus in WAIT state are open for airqs.
> +	 * Return immediately if no vcpus are in WAIT state.
> +	 * There are vcpus in RUN state. They will process the airqs
> +	 * if not closed for airqs as well. In that case the system will
> +	 * delay airqs until a vcpu decides to take airqs again.
> +	 */
> +	online_vcpus = atomic_read(&kvm->online_vcpus);
> +	if (!bitmap_weight(fi->idle_mask, online_vcpus))
> +		return;
> +
> +	/*
> +	 * None of the vcpus in WAIT state take airqs and we might
> +	 * have no running vcpus as at least one vcpu is in WAIT state
> +	 * and IPM is dirty.
> +	 */
> +	set_iam(kvm->arch.gisa, kvm->arch.iam);
> +}
> +
> +#define NULL_GISA_ADDR 0x00000000UL
> +#define NONE_GISA_ADDR 0x00000001UL
> +#define GISA_ADDR_MASK 0xfffff000UL
> +
> +static void __maybe_unused process_gib_alert_list(void)
> +{
> +	u32 final, next_alert, origin = 0UL;
> +	struct kvm_s390_gisa *gisa;
> +	struct kvm *kvm;
> +
> +	do {
> +		/*
> +		 * If the NONE_GISA_ADDR is still stored in the alert list
> +		 * origin, we will leave the outer loop. No further GISA has
> +		 * been added to the alert list by millicode while processing
> +		 * the current alert list.
> +		 */
> +		final = (origin & NONE_GISA_ADDR);
> +		/*
> +		 * Cut off the alert list and store the NONE_GISA_ADDR in the
> +		 * alert list origin to avoid further GAL interruptions.
> +		 * A new alert list can be build up by millicode in parallel
> +		 * for guests not in the yet cut-off alert list. When in the
> +		 * final loop, store the NULL_GISA_ADDR instead. This will re-
> +		 * enable GAL interruptions on the host again.
> +		 */
> +		origin = xchg(&gib->alert_list_origin,
> +			      (!final) ? NONE_GISA_ADDR : NULL_GISA_ADDR);
> +		/* Loop through the just cut-off alert list. */
> +		while (origin & GISA_ADDR_MASK) {
> +			gisa = (struct kvm_s390_gisa *)(u64)origin;
> +			next_alert = gisa->next_alert;
> +			/* Unlink the GISA from the alert list. */
> +			gisa->next_alert = origin;
> +			kvm = container_of(gisa, struct sie_page2, gisa)->kvm;
> +			/* Kick suitable vcpus */
> +			__floating_airqs_kick(kvm);

We may finish handling the alerted gisa with iam not set e.g.
via some vcpus kicked but ipm still dirty and some vcpus still in wait,
or? 

From the comments it seems we speculate on being in a safe state, as
these are supposed to return to wait or stop soon-ish, and we will set
iam then (See <MARK A>). I don't quite understand.

According to my current understanding we might end up loosing initiative
in this scenario. Or am I wrong?

Regards,
Halil

> +			origin = next_alert;
> +		}
> +	} while (!final);
> +}
> +
>  static void nullify_gisa(struct kvm_s390_gisa *gisa)
>  {
>  	memset(gisa, 0, sizeof(struct kvm_s390_gisa));
Halil Pasic Jan. 8, 2019, 2:27 p.m. UTC | #6
On Mon, 7 Jan 2019 20:18:03 +0100
Michael Mueller <mimu@linux.ibm.com> wrote:

> 
> 
> On 03.01.19 15:43, Pierre Morel wrote:
> > On 19/12/2018 20:17, Michael Mueller wrote:
> >> This function processes the Gib Alert List (GAL). It is required
> >> to run when either a gib alert interruption has been received or
> >> a gisa that is in the alert list is cleared or dropped.
> >>
> >> The GAL is build up by millicode, when the respective ISC bit is
> >> set in the Interruption Alert Mask (IAM) and an interruption of
> >> that class is observed.
> >>
> >> Signed-off-by: Michael Mueller <mimu@linux.ibm.com>
> >> ---
> >>   arch/s390/kvm/interrupt.c | 140 
> >> ++++++++++++++++++++++++++++++++++++++++++++++
> >>   1 file changed, 140 insertions(+)
> >>
> >> diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
> >> index 48a93f5e5333..03e7ba4f215a 100644
> >> --- a/arch/s390/kvm/interrupt.c
> >> +++ b/arch/s390/kvm/interrupt.c
> >> @@ -2941,6 +2941,146 @@ int kvm_s390_get_irq_state(struct kvm_vcpu 
> >> *vcpu, __u8 __user *buf, int len)
> >>       return n;
> >>   }
> >> +static int __try_airqs_kick(struct kvm *kvm, u8 ipm)
> > 
> > static inline ?
> > 
> >> +{
> >> +    struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
> >> +    struct kvm_vcpu *vcpu = NULL, *kick_vcpu[MAX_ISC + 1];
> >> +    int online_vcpus = atomic_read(&kvm->online_vcpus);
> >> +    u8 ioint_mask, isc_mask, kick_mask = 0x00;
> >> +    int vcpu_id, kicked = 0;
> >> +
> >> +    /* Loop over vcpus in WAIT state. */
> >> +    for (vcpu_id = find_first_bit(fi->idle_mask, online_vcpus);
> >> +         /* Until all pending ISCs have a vcpu open for airqs. */
> >> +         (~kick_mask & ipm) && vcpu_id < online_vcpus;
> >> +         vcpu_id = find_next_bit(fi->idle_mask, online_vcpus, 
> >> vcpu_id)) {
> >> +        vcpu = kvm_get_vcpu(kvm, vcpu_id);
> >> +        if (psw_ioint_disabled(vcpu))
> >> +            continue;
> >> +        ioint_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
> >> +        for (isc_mask = 0x80; isc_mask; isc_mask >>= 1) {
> >> +            /* ISC pending in IPM ? */
> >> +            if (!(ipm & isc_mask))
> >> +                continue;
> >> +            /* vcpu for this ISC already found ? */
> >> +            if (kick_mask & isc_mask)
> >> +                continue;
> >> +            /* vcpu open for airq of this ISC ? */
> >> +            if (!(ioint_mask & isc_mask))
> >> +                continue;
> >> +            /* use this vcpu (for all ISCs in ioint_mask) */
> >> +            kick_mask |= ioint_mask; > +            
> >> kick_vcpu[kicked++] = vcpu;
> >> +        }
> >> +    }
> >> +
> >> +    if (vcpu && ~kick_mask & ipm)
> >> +        VM_EVENT(kvm, 4, "gib alert undeliverable isc mask 0x%02x",
> >> +             ~kick_mask & ipm);
> >> +
> >> +    for (vcpu_id = 0; vcpu_id < kicked; vcpu_id++)
> >> +        kvm_s390_vcpu_wakeup(kick_vcpu[vcpu_id]);
> >> +
> >> +    return (online_vcpus != 0) ? kicked : -ENODEV;
> >> +}
> >> +
> >> +static void __floating_airqs_kick(struct kvm *kvm)
> > static inline ?
> > 
> >> +{
> >> +    struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
> >> +    int online_vcpus, kicked;
> >> +    u8 ipm_t0, ipm;
> >> +
> >> +    /* Get IPM and return if clean, IAM has been restored. */
> >> +    ipm = get_ipm(kvm->arch.gisa, IRQ_FLAG_IAM);
> > 
> > If we do not get an IPM here, it must have been stolen by the firmware 
> > for delivery to the guest.
> 
> Yes, a running SIE instance took it before we were able to. But is
> it still running now? It could have gone to WAIT before we see
> that the IPM is clean. Then it was restored already. Otherwise,
> it is still running and will go WAIT and then restore the IAM.
> 
> I will do some tests on this.
> 
> > Then why restoring the IAM?
> > 
> > Or do I miss something?
> > 
> >> +    if (!ipm)
> >> +        return;
> >> +retry:
> >> +    ipm_t0 = ipm;
> >> +
> >> +    /* Try to kick some vcpus in WAIT state. */
> >> +    kicked = __try_airqs_kick(kvm, ipm);
> >> +    if (kicked < 0)
> >> +        return;
> >> +
> >> +    /* Get IPM and return if clean, IAM has been restored. */
> >> +    ipm = get_ipm(kvm->arch.gisa, IRQ_FLAG_IAM);
> >> +    if (!ipm)
> >> +        return;
> >> +
> >> +    /* Start over, if new ISC bits are pending in IPM. */
> >> +    if ((ipm_t0 ^ ipm) & ~ipm_t0)
> >> +        goto retry;
> >> +
> >> +    /*
> >> +     * Return as we just kicked at least one vcpu in WAIT state
> >> +     * open for airqs. The IAM will be restored latest when one
> >> +     * of them goes into WAIT or STOP state.
> >> +     */
> >> +    if (kicked > 0)
> >> +        return;
> >> +
> >> +    /*
> >> +     * No vcpu was kicked either because no vcpu was in WAIT state
> >> +     * or none of the vcpus in WAIT state are open for airqs.
> >> +     * Return immediately if no vcpus are in WAIT state.
> >> +     * There are vcpus in RUN state. They will process the airqs
> >> +     * if not closed for airqs as well. In that case the system will
> >> +     * delay airqs until a vcpu decides to take airqs again.
> >> +     */
> >> +    online_vcpus = atomic_read(&kvm->online_vcpus);
> >> +    if (!bitmap_weight(fi->idle_mask, online_vcpus))
> >> +        return;
> >> +
> >> +    /*
> >> +     * None of the vcpus in WAIT state take airqs and we might
> >> +     * have no running vcpus as at least one vcpu is in WAIT state
> >> +     * and IPM is dirty.
> >> +     */
> >> +    set_iam(kvm->arch.gisa, kvm->arch.iam);
> > 
> > I do not understand why we need to set IAM here.
> > The interrupt will be delivered by the firmware as soon as the PSW or 
> > CR6 is changed by any vCPU.
> > ...and if this does not happen we can not deliver the interrupt anyway.
> > 
> >> +}
> >> +
> >> +#define NULL_GISA_ADDR 0x00000000UL
> >> +#define NONE_GISA_ADDR 0x00000001UL
> >> +#define GISA_ADDR_MASK 0xfffff000UL
> >> +
> >> +static void __maybe_unused process_gib_alert_list(void)
> >> +{
> >> +    u32 final, next_alert, origin = 0UL;
> >> +    struct kvm_s390_gisa *gisa;
> >> +    struct kvm *kvm;
> >> +
> >> +    do {
> >> +        /*
> >> +         * If the NONE_GISA_ADDR is still stored in the alert list
> >> +         * origin, we will leave the outer loop. No further GISA has
> >> +         * been added to the alert list by millicode while processing
> >> +         * the current alert list.
> >> +         */
> >> +        final = (origin & NONE_GISA_ADDR);
> >> +        /*
> >> +         * Cut off the alert list and store the NONE_GISA_ADDR in the
> >> +         * alert list origin to avoid further GAL interruptions.
> >> +         * A new alert list can be build up by millicode in parallel
> >> +         * for guests not in the yet cut-off alert list. When in the
> >> +         * final loop, store the NULL_GISA_ADDR instead. This will re-
> >> +         * enable GAL interruptions on the host again.
> >> +         */
> >> +        origin = xchg(&gib->alert_list_origin,
> >> +                  (!final) ? NONE_GISA_ADDR : NULL_GISA_ADDR);
> >> +        /* Loop through the just cut-off alert list. */
> >> +        while (origin & GISA_ADDR_MASK) {
> >> +            gisa = (struct kvm_s390_gisa *)(u64)origin;
> >> +            next_alert = gisa->next_alert;
> >> +            /* Unlink the GISA from the alert list. */
> >> +            gisa->next_alert = origin;
> > 
> > AFAIU this enable GISA interrupt for the guest...
> 
> Only together with the IAM being set what could happen if
> __floating_airqs_kick() calls get_ipm and the IPM is clean already. :(
> 
> > 
> >> +            kvm = container_of(gisa, struct sie_page2, gisa)->kvm;
> >> +            /* Kick suitable vcpus */
> >> +            __floating_airqs_kick(kvm);
> > 
> > ...and here we kick a VCPU for the guest.
> > 
> > Logically I would do it in the otherway, first kicking the vCPU then 
> > enabling the GISA interruption again.
> > 
> > If the IPM bit is cleared by the firmware during delivering the 
> > interrupt to the guest before we enter get_ipm() called by 
> > __floating_airqs_kick() we will set the IAM despite we have a running 
> > CPU handling the IRQ.
> 
> I will move the unlink below the kick that will assure get_ipm will 
> never take the IAM restore path.
> 
> > In the worst case we can also set the IAM with the GISA in the alert list.
> > Or we must accept that the firmware can deliver the IPM as soon as we 
> > reset the GISA next field.
> 
> See statement above.
> 

I'm very confused by these comments, and especially by your apparent
consensus.

Regards,
Halil
Michael Mueller Jan. 8, 2019, 3:21 p.m. UTC | #7
On 08.01.19 13:59, Halil Pasic wrote:
> On Wed, 19 Dec 2018 20:17:54 +0100
> Michael Mueller <mimu@linux.ibm.com> wrote:
> 
>> This function processes the Gib Alert List (GAL). It is required
>> to run when either a gib alert interruption has been received or
>> a gisa that is in the alert list is cleared or dropped.
>>
>> The GAL is build up by millicode, when the respective ISC bit is
>> set in the Interruption Alert Mask (IAM) and an interruption of
>> that class is observed.
>>
>> Signed-off-by: Michael Mueller <mimu@linux.ibm.com>
>> ---
>>   arch/s390/kvm/interrupt.c | 140 ++++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 140 insertions(+)
>>
>> diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
>> index 48a93f5e5333..03e7ba4f215a 100644
>> --- a/arch/s390/kvm/interrupt.c
>> +++ b/arch/s390/kvm/interrupt.c
>> @@ -2941,6 +2941,146 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
>>   	return n;
>>   }
>>   
>> +static int __try_airqs_kick(struct kvm *kvm, u8 ipm)
>> +{
>> +	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
>> +	struct kvm_vcpu *vcpu = NULL, *kick_vcpu[MAX_ISC + 1];
>> +	int online_vcpus = atomic_read(&kvm->online_vcpus);
>> +	u8 ioint_mask, isc_mask, kick_mask = 0x00;
>> +	int vcpu_id, kicked = 0;
>> +
>> +	/* Loop over vcpus in WAIT state. */
>> +	for (vcpu_id = find_first_bit(fi->idle_mask, online_vcpus);
>> +	     /* Until all pending ISCs have a vcpu open for airqs. */
>> +	     (~kick_mask & ipm) && vcpu_id < online_vcpus;
>> +	     vcpu_id = find_next_bit(fi->idle_mask, online_vcpus, vcpu_id)) {
>> +		vcpu = kvm_get_vcpu(kvm, vcpu_id);
>> +		if (psw_ioint_disabled(vcpu))
>> +			continue;
>> +		ioint_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
>> +		for (isc_mask = 0x80; isc_mask; isc_mask >>= 1) {
>> +			/* ISC pending in IPM ? */
>> +			if (!(ipm & isc_mask))
>> +				continue;
>> +			/* vcpu for this ISC already found ? */
>> +			if (kick_mask & isc_mask)
>> +				continue;
>> +			/* vcpu open for airq of this ISC ? */
>> +			if (!(ioint_mask & isc_mask))
>> +				continue;
>> +			/* use this vcpu (for all ISCs in ioint_mask) */
>> +			kick_mask |= ioint_mask;
>> +			kick_vcpu[kicked++] = vcpu;
> 
> Assuming that the vcpu can/will take all ISCs it's currently open for
> does not seem right. We kind of rely on this assumption here, or?

My latest version of this routine already follows a different strategy.
It looks for a horizontal distribution of pending ISCs among idle vcpus.

> 
>> +		}
>> +	}
>> +
>> +	if (vcpu && ~kick_mask & ipm)
>> +		VM_EVENT(kvm, 4, "gib alert undeliverable isc mask
>> 0x%02x",
>> +			 ~kick_mask & ipm);
>> +
>> +	for (vcpu_id = 0; vcpu_id < kicked; vcpu_id++)
>> +		kvm_s390_vcpu_wakeup(kick_vcpu[vcpu_id]);
>> +
>> +	return (online_vcpus != 0) ? kicked : -ENODEV;
>> +}
>> +
>> +static void __floating_airqs_kick(struct kvm *kvm)
>> +{
>> +	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
>> +	int online_vcpus, kicked;
>> +	u8 ipm_t0, ipm;
>> +
>> +	/* Get IPM and return if clean, IAM has been restored. */
>> +	ipm = get_ipm(kvm->arch.gisa, IRQ_FLAG_IAM);
>> +	if (!ipm)
>> +		return;
>> +retry:
>> +	ipm_t0 = ipm;
>> +
>> +	/* Try to kick some vcpus in WAIT state. */
>> +	kicked = __try_airqs_kick(kvm, ipm);
>> +	if (kicked < 0)
>> +		return;
>> +
>> +	/* Get IPM and return if clean, IAM has been restored. */
>> +	ipm = get_ipm(kvm->arch.gisa, IRQ_FLAG_IAM);
>> +	if (!ipm)
>> +		return;
>> +
>> +	/* Start over, if new ISC bits are pending in IPM. */
>> +	if ((ipm_t0 ^ ipm) & ~ipm_t0)
>> +		goto retry;
>> +
> 
> <MARK A>
> 
>> +	/*
>> +	 * Return as we just kicked at least one vcpu in WAIT state
>> +	 * open for airqs. The IAM will be restored latest when one
>> +	 * of them goes into WAIT or STOP state.
>> +	 */
>> +	if (kicked > 0)
>> +		return;
> 
> </MARK A>
> 
>> +
>> +	/*
>> +	 * No vcpu was kicked either because no vcpu was in WAIT state
>> +	 * or none of the vcpus in WAIT state are open for airqs.
>> +	 * Return immediately if no vcpus are in WAIT state.
>> +	 * There are vcpus in RUN state. They will process the airqs
>> +	 * if not closed for airqs as well. In that case the system will
>> +	 * delay airqs until a vcpu decides to take airqs again.
>> +	 */
>> +	online_vcpus = atomic_read(&kvm->online_vcpus);
>> +	if (!bitmap_weight(fi->idle_mask, online_vcpus))
>> +		return;
>> +
>> +	/*
>> +	 * None of the vcpus in WAIT state take airqs and we might
>> +	 * have no running vcpus as at least one vcpu is in WAIT state
>> +	 * and IPM is dirty.
>> +	 */
>> +	set_iam(kvm->arch.gisa, kvm->arch.iam);
>> +}
>> +
>> +#define NULL_GISA_ADDR 0x00000000UL
>> +#define NONE_GISA_ADDR 0x00000001UL
>> +#define GISA_ADDR_MASK 0xfffff000UL
>> +
>> +static void __maybe_unused process_gib_alert_list(void)
>> +{
>> +	u32 final, next_alert, origin = 0UL;
>> +	struct kvm_s390_gisa *gisa;
>> +	struct kvm *kvm;
>> +
>> +	do {
>> +		/*
>> +		 * If the NONE_GISA_ADDR is still stored in the alert list
>> +		 * origin, we will leave the outer loop. No further GISA has
>> +		 * been added to the alert list by millicode while processing
>> +		 * the current alert list.
>> +		 */
>> +		final = (origin & NONE_GISA_ADDR);
>> +		/*
>> +		 * Cut off the alert list and store the NONE_GISA_ADDR in the
>> +		 * alert list origin to avoid further GAL interruptions.
>> +		 * A new alert list can be build up by millicode in parallel
>> +		 * for guests not in the yet cut-off alert list. When in the
>> +		 * final loop, store the NULL_GISA_ADDR instead. This will re-
>> +		 * enable GAL interruptions on the host again.
>> +		 */
>> +		origin = xchg(&gib->alert_list_origin,
>> +			      (!final) ? NONE_GISA_ADDR : NULL_GISA_ADDR);
>> +		/* Loop through the just cut-off alert list. */
>> +		while (origin & GISA_ADDR_MASK) {
>> +			gisa = (struct kvm_s390_gisa *)(u64)origin;
>> +			next_alert = gisa->next_alert;
>> +			/* Unlink the GISA from the alert list. */
>> +			gisa->next_alert = origin;
>> +			kvm = container_of(gisa, struct sie_page2, gisa)->kvm;
>> +			/* Kick suitable vcpus */
>> +			__floating_airqs_kick(kvm);
> 
> We may finish handling the alerted gisa with iam not set e.g.
> via some vcpus kicked but ipm still dirty and some vcpus still in wait,
> or?

My above mentioned change to the routine identifying the vcpus to kick
will select one vcpu for each ISC pending if possible (depends on the
number of idle vcpus and their respective interruption masks and the
pending ISCs).

That does not exclude the principle scenario that maybe only one vcpu
is kicked and multiple ISCs are pending (ipm still dirty) although
have never observed this with a Linux guest.

What I was trying to avoid was a kind of busy loop running in addition
to the kicked vcpus monitoring the IPM state for resource utilization
reasons.

> 
>  From the comments it seems we speculate on being in a safe state, as
> these are supposed to return to wait or stop soon-ish, and we will set
> iam then (See <MARK A>). I don't quite understand.


Yes, the next vcpu going idle shall restore the IAM or process the
top ISC pending if the iomask (GCR) allows. vcpus are not allowed to go 
in disabled wait (IO int disabled by PSW). If all vcpus always (for
some time) mask a specific ISC the guest does not want to get 
interrupted for that ISC but will as soon a running vcpu will open
the mask again.

> 
> According to my current understanding we might end up loosing initiative
> in this scenario. Or am I wrong?

I currently don't have proof for you being wrong but have not observed
the situation yet.

> 
> Regards,
> Halil
> 
>> +			origin = next_alert;
>> +		}
>> +	} while (!final);
>> +}
>> +
>>   static void nullify_gisa(struct kvm_s390_gisa *gisa)
>>   {
>>   	memset(gisa, 0, sizeof(struct kvm_s390_gisa));
> 

-
Halil Pasic Jan. 8, 2019, 6:34 p.m. UTC | #8
On Tue, 8 Jan 2019 16:21:17 +0100
Michael Mueller <mimu@linux.ibm.com> wrote:

> >> +			gisa->next_alert = origin;
> >> +			kvm = container_of(gisa, struct sie_page2, gisa)->kvm;
> >> +			/* Kick suitable vcpus */
> >> +			__floating_airqs_kick(kvm);  
> > 
> > We may finish handling the alerted gisa with iam not set e.g.
> > via some vcpus kicked but ipm still dirty and some vcpus still in wait,
> > or?  
> 
> My above mentioned change to the routine identifying the vcpus to kick
> will select one vcpu for each ISC pending if possible (depends on the
> number of idle vcpus and their respective interruption masks and the
> pending ISCs).
> 
> That does not exclude the principle scenario that maybe only one vcpu
> is kicked and multiple ISCs are pending (ipm still dirty) although
> have never observed this with a Linux guest.
> 

IMHO we have to differentiate between the general case and between what
can happen with current or historical Linux guests.

Regarding Linux guests, I'm under the impression each version was quite
there. I says so, also because I have a reasonable amount of confidence
in your testing.

> What I was trying to avoid was a kind of busy loop running in addition
> to the kicked vcpus monitoring the IPM state for resource utilization
> reasons.
> 

Got it. I think we need a clean switch-over (between our
code makes sure the no pending interrupts are going to get stalled in
spite of waiting vcpus that could take them, and between we are good now
and if we are not good any more we will get an alert) nevertheless. 

> > 
> >  From the comments it seems we speculate on being in a safe state, as
> > these are supposed to return to wait or stop soon-ish, and we will set
> > iam then (See <MARK A>). I don't quite understand.  
> 
> 
> Yes, the next vcpu going idle shall restore the IAM or process the
> top ISC pending if the iomask (GCR) allows. vcpus are not allowed to go 
> in disabled wait (IO int disabled by PSW). If all vcpus always (for
> some time) mask a specific ISC the guest does not want to get 
> interrupted for that ISC but will as soon a running vcpu will open
> the mask again.
> 

My understanding on the guarantees we can provide based on the fact that
we kicked some vcpus is still lacking. Maybe let us discuss this offline.

> > 
> > According to my current understanding we might end up loosing initiative
> > in this scenario. Or am I wrong?  
> 
> I currently don't have proof for you being wrong but have not observed
> the situation yet.


See above. Proving that no irq's can get substantially delayed
needlessly (where needlessly means, there is a vcpu in wait state that
could take the irq) would be a proof enough that I'm wrong. Let's
make this discussion more efficient by utilizing co-location to cut
down the RTT.

Regards,
Halil
Pierre Morel Jan. 9, 2019, 11:39 a.m. UTC | #9
On 07/01/2019 20:18, Michael Mueller wrote:
> 
> 
> On 03.01.19 15:43, Pierre Morel wrote:
>> On 19/12/2018 20:17, Michael Mueller wrote:
>>> This function processes the Gib Alert List (GAL). It is required
...snip...
> +    struct kvm *kvm;
>>> +
>>> +    do {
>>> +        /*
>>> +         * If the NONE_GISA_ADDR is still stored in the alert list
>>> +         * origin, we will leave the outer loop. No further GISA has
>>> +         * been added to the alert list by millicode while processing
>>> +         * the current alert list.
>>> +         */
>>> +        final = (origin & NONE_GISA_ADDR);
>>> +        /*
>>> +         * Cut off the alert list and store the NONE_GISA_ADDR in the
>>> +         * alert list origin to avoid further GAL interruptions.
>>> +         * A new alert list can be build up by millicode in parallel
>>> +         * for guests not in the yet cut-off alert list. When in the
>>> +         * final loop, store the NULL_GISA_ADDR instead. This will re-
>>> +         * enable GAL interruptions on the host again.
>>> +         */
>>> +        origin = xchg(&gib->alert_list_origin,
>>> +                  (!final) ? NONE_GISA_ADDR : NULL_GISA_ADDR);
>>> +        /* Loop through the just cut-off alert list. */
>>> +        while (origin & GISA_ADDR_MASK) {
>>> +            gisa = (struct kvm_s390_gisa *)(u64)origin;
>>> +            next_alert = gisa->next_alert;
>>> +            /* Unlink the GISA from the alert list. */
>>> +            gisa->next_alert = origin;
>>
>> AFAIU this enable GISA interrupt for the guest...
> 
> Only together with the IAM being set what could happen if
> __floating_airqs_kick() calls get_ipm and the IPM is clean already. :(

confused, AFAIK IAM is used to allow interrupt for the host
not for the guest.

> 
>>
>>> +            kvm = container_of(gisa, struct sie_page2, gisa)->kvm;
>>> +            /* Kick suitable vcpus */
>>> +            __floating_airqs_kick(kvm);
>>
>> ...and here we kick a VCPU for the guest.
>>
>> Logically I would do it in the otherway, first kicking the vCPU then 
>> enabling the GISA interruption again.

!! sorry to have introduce this confusion.
You did it in the right order.
I should have not send these comments after I gave my R-B

>>
>> If the IPM bit is cleared by the firmware during delivering the 
>> interrupt to the guest before we enter get_ipm() called by 
>> __floating_airqs_kick() we will set the IAM despite we have a running 
>> CPU handling the IRQ.
> 
> I will move the unlink below the kick that will assure get_ipm will 
> never take the IAM restore path.

!! Sorry, you were right.
We must re-enable interrupt before kicking the vcpu, as you did, or the 
vcpu could go to wait before it gets the interrupt.

> 
>> In the worst case we can also set the IAM with the GISA in the alert 
>> list.
>> Or we must accept that the firmware can deliver the IPM as soon as we 
>> reset the GISA next field.
> 
> See statement above.
> 
>>
>>> +            origin = next_alert;
>>> +        }
>>> +    } while (!final);
>>> +}
>>> +
>>>   static void nullify_gisa(struct kvm_s390_gisa *gisa)
>>>   {
>>>       memset(gisa, 0, sizeof(struct kvm_s390_gisa));
>>>
>>
>> I think that avoiding to restore the IAM during the call to get_ipm 
>> inside __floating_airqs_kick() would good.

I still think tis assumption is right: We should not set the IAM during 
the kick.

>>
>> If you agree, with that:
>>
>> Reviewed-by: Pierre Morel<pmorel@linux.ibm.com>

Still OK with my R-B, as long as w do not set IAM during the kicking.

Regards,
Pierre
Pierre Morel Jan. 9, 2019, 12:14 p.m. UTC | #10
On 08/01/2019 16:21, Michael Mueller wrote:
> 
> 
> On 08.01.19 13:59, Halil Pasic wrote:
>> On Wed, 19 Dec 2018 20:17:54 +0100
>> Michael Mueller <mimu@linux.ibm.com> wrote:
>>
>>> This function processes the Gib Alert List (GAL). It is required
>>> to run when either a gib alert interruption has been received or
>>> a gisa that is in the alert list is cleared or dropped.
>>>
>>> The GAL is build up by millicode, when the respective ISC bit is
>>> set in the Interruption Alert Mask (IAM) and an interruption of
>>> that class is observed.
>>>
>>> Signed-off-by: Michael Mueller <mimu@linux.ibm.com>
>>> ---
>>>   arch/s390/kvm/interrupt.c | 140 
>>> ++++++++++++++++++++++++++++++++++++++++++++++
>>>   1 file changed, 140 insertions(+)
>>>
>>> diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
>>> index 48a93f5e5333..03e7ba4f215a 100644
>>> --- a/arch/s390/kvm/interrupt.c
>>> +++ b/arch/s390/kvm/interrupt.c
>>> @@ -2941,6 +2941,146 @@ int kvm_s390_get_irq_state(struct kvm_vcpu 
>>> *vcpu, __u8 __user *buf, int len)
>>>       return n;
>>>   }
>>> +static int __try_airqs_kick(struct kvm *kvm, u8 ipm)
>>> +{
>>> +    struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
>>> +    struct kvm_vcpu *vcpu = NULL, *kick_vcpu[MAX_ISC + 1];
>>> +    int online_vcpus = atomic_read(&kvm->online_vcpus);
>>> +    u8 ioint_mask, isc_mask, kick_mask = 0x00;
>>> +    int vcpu_id, kicked = 0;
>>> +
>>> +    /* Loop over vcpus in WAIT state. */
>>> +    for (vcpu_id = find_first_bit(fi->idle_mask, online_vcpus);
>>> +         /* Until all pending ISCs have a vcpu open for airqs. */
>>> +         (~kick_mask & ipm) && vcpu_id < online_vcpus;
>>> +         vcpu_id = find_next_bit(fi->idle_mask, online_vcpus, 
>>> vcpu_id)) {
>>> +        vcpu = kvm_get_vcpu(kvm, vcpu_id);
>>> +        if (psw_ioint_disabled(vcpu))
>>> +            continue;
>>> +        ioint_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
>>> +        for (isc_mask = 0x80; isc_mask; isc_mask >>= 1) {
>>> +            /* ISC pending in IPM ? */
>>> +            if (!(ipm & isc_mask))
>>> +                continue;
>>> +            /* vcpu for this ISC already found ? */
>>> +            if (kick_mask & isc_mask)
>>> +                continue;
>>> +            /* vcpu open for airq of this ISC ? */
>>> +            if (!(ioint_mask & isc_mask))
>>> +                continue;
>>> +            /* use this vcpu (for all ISCs in ioint_mask) */
>>> +            kick_mask |= ioint_mask;
>>> +            kick_vcpu[kicked++] = vcpu;
>>
>> Assuming that the vcpu can/will take all ISCs it's currently open for
>> does not seem right. We kind of rely on this assumption here, or?

why does it not seem right?

> 
> My latest version of this routine already follows a different strategy.
> It looks for a horizontal distribution of pending ISCs among idle vcpus.
> 

May be you should separate the GAL IRQ handling and the algorithm of the 
vCPU to kick in different patches to ease the review.


Regards,
Pierre
Halil Pasic Jan. 9, 2019, 1:10 p.m. UTC | #11
On Wed, 9 Jan 2019 13:14:17 +0100
Pierre Morel <pmorel@linux.ibm.com> wrote:

> On 08/01/2019 16:21, Michael Mueller wrote:
> > 
> > 
> > On 08.01.19 13:59, Halil Pasic wrote:
> >> On Wed, 19 Dec 2018 20:17:54 +0100
> >> Michael Mueller <mimu@linux.ibm.com> wrote:
> >>
> >>> This function processes the Gib Alert List (GAL). It is required
> >>> to run when either a gib alert interruption has been received or
> >>> a gisa that is in the alert list is cleared or dropped.
> >>>
> >>> The GAL is build up by millicode, when the respective ISC bit is
> >>> set in the Interruption Alert Mask (IAM) and an interruption of
> >>> that class is observed.
> >>>
> >>> Signed-off-by: Michael Mueller <mimu@linux.ibm.com>
> >>> ---
> >>>   arch/s390/kvm/interrupt.c | 140 
> >>> ++++++++++++++++++++++++++++++++++++++++++++++
> >>>   1 file changed, 140 insertions(+)
> >>>
> >>> diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
> >>> index 48a93f5e5333..03e7ba4f215a 100644
> >>> --- a/arch/s390/kvm/interrupt.c
> >>> +++ b/arch/s390/kvm/interrupt.c
> >>> @@ -2941,6 +2941,146 @@ int kvm_s390_get_irq_state(struct kvm_vcpu 
> >>> *vcpu, __u8 __user *buf, int len)
> >>>       return n;
> >>>   }
> >>> +static int __try_airqs_kick(struct kvm *kvm, u8 ipm)
> >>> +{
> >>> +    struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
> >>> +    struct kvm_vcpu *vcpu = NULL, *kick_vcpu[MAX_ISC + 1];
> >>> +    int online_vcpus = atomic_read(&kvm->online_vcpus);
> >>> +    u8 ioint_mask, isc_mask, kick_mask = 0x00;
> >>> +    int vcpu_id, kicked = 0;
> >>> +
> >>> +    /* Loop over vcpus in WAIT state. */
> >>> +    for (vcpu_id = find_first_bit(fi->idle_mask, online_vcpus);
> >>> +         /* Until all pending ISCs have a vcpu open for airqs. */
> >>> +         (~kick_mask & ipm) && vcpu_id < online_vcpus;
> >>> +         vcpu_id = find_next_bit(fi->idle_mask, online_vcpus, 
> >>> vcpu_id)) {
> >>> +        vcpu = kvm_get_vcpu(kvm, vcpu_id);
> >>> +        if (psw_ioint_disabled(vcpu))
> >>> +            continue;
> >>> +        ioint_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
> >>> +        for (isc_mask = 0x80; isc_mask; isc_mask >>= 1) {
> >>> +            /* ISC pending in IPM ? */
> >>> +            if (!(ipm & isc_mask))
> >>> +                continue;
> >>> +            /* vcpu for this ISC already found ? */
> >>> +            if (kick_mask & isc_mask)
> >>> +                continue;
> >>> +            /* vcpu open for airq of this ISC ? */
> >>> +            if (!(ioint_mask & isc_mask))
> >>> +                continue;
> >>> +            /* use this vcpu (for all ISCs in ioint_mask) */
> >>> +            kick_mask |= ioint_mask;
> >>> +            kick_vcpu[kicked++] = vcpu;
> >>
> >> Assuming that the vcpu can/will take all ISCs it's currently open for
> >> does not seem right. We kind of rely on this assumption here, or?
> 
> why does it not seem right?
> 

When an interrupt is delivered a psw-swap takes place. The new-psw
may fence IO interrupts. Thus for example if we have the vcpu open for
all ISCs and 0, 1 and 2 pending, we may end up only delivering 0, if the
psw-swap corresponding to delivering 0 closes the vcpu for IO
interrupts. After guest has control, we don't have control over the rest
of the story. 

> > 
> > My latest version of this routine already follows a different strategy.
> > It looks for a horizontal distribution of pending ISCs among idle vcpus.
> > 
> 
> May be you should separate the GAL IRQ handling and the algorithm of the 
> vCPU to kick in different patches to ease the review.
> 
> 

No strong opinion here. I found it convenient to have the most of the
logic in one patch/email.

Regards,
Halil
Pierre Morel Jan. 9, 2019, 2:49 p.m. UTC | #12
On 09/01/2019 14:10, Halil Pasic wrote:
> On Wed, 9 Jan 2019 13:14:17 +0100
> Pierre Morel <pmorel@linux.ibm.com> wrote:
> 
>> On 08/01/2019 16:21, Michael Mueller wrote:
>>>
>>>
>>> On 08.01.19 13:59, Halil Pasic wrote:
>>>> On Wed, 19 Dec 2018 20:17:54 +0100
>>>> Michael Mueller <mimu@linux.ibm.com> wrote:
>>>>
>>>>> This function processes the Gib Alert List (GAL). It is required
>>>>> to run when either a gib alert interruption has been received or
>>>>> a gisa that is in the alert list is cleared or dropped.
>>>>>
>>>>> The GAL is build up by millicode, when the respective ISC bit is
>>>>> set in the Interruption Alert Mask (IAM) and an interruption of
>>>>> that class is observed.
>>>>>
>>>>> Signed-off-by: Michael Mueller <mimu@linux.ibm.com>
>>>>> ---
>>>>>    arch/s390/kvm/interrupt.c | 140
>>>>> ++++++++++++++++++++++++++++++++++++++++++++++
>>>>>    1 file changed, 140 insertions(+)
>>>>>
>>>>> diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
>>>>> index 48a93f5e5333..03e7ba4f215a 100644
>>>>> --- a/arch/s390/kvm/interrupt.c
>>>>> +++ b/arch/s390/kvm/interrupt.c
>>>>> @@ -2941,6 +2941,146 @@ int kvm_s390_get_irq_state(struct kvm_vcpu
>>>>> *vcpu, __u8 __user *buf, int len)
>>>>>        return n;
>>>>>    }
>>>>> +static int __try_airqs_kick(struct kvm *kvm, u8 ipm)
>>>>> +{
>>>>> +    struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
>>>>> +    struct kvm_vcpu *vcpu = NULL, *kick_vcpu[MAX_ISC + 1];
>>>>> +    int online_vcpus = atomic_read(&kvm->online_vcpus);
>>>>> +    u8 ioint_mask, isc_mask, kick_mask = 0x00;
>>>>> +    int vcpu_id, kicked = 0;
>>>>> +
>>>>> +    /* Loop over vcpus in WAIT state. */
>>>>> +    for (vcpu_id = find_first_bit(fi->idle_mask, online_vcpus);
>>>>> +         /* Until all pending ISCs have a vcpu open for airqs. */
>>>>> +         (~kick_mask & ipm) && vcpu_id < online_vcpus;
>>>>> +         vcpu_id = find_next_bit(fi->idle_mask, online_vcpus,
>>>>> vcpu_id)) {
>>>>> +        vcpu = kvm_get_vcpu(kvm, vcpu_id);
>>>>> +        if (psw_ioint_disabled(vcpu))
>>>>> +            continue;
>>>>> +        ioint_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
>>>>> +        for (isc_mask = 0x80; isc_mask; isc_mask >>= 1) {
>>>>> +            /* ISC pending in IPM ? */
>>>>> +            if (!(ipm & isc_mask))
>>>>> +                continue;
>>>>> +            /* vcpu for this ISC already found ? */
>>>>> +            if (kick_mask & isc_mask)
>>>>> +                continue;
>>>>> +            /* vcpu open for airq of this ISC ? */
>>>>> +            if (!(ioint_mask & isc_mask))
>>>>> +                continue;
>>>>> +            /* use this vcpu (for all ISCs in ioint_mask) */
>>>>> +            kick_mask |= ioint_mask;
>>>>> +            kick_vcpu[kicked++] = vcpu;
>>>>
>>>> Assuming that the vcpu can/will take all ISCs it's currently open for
>>>> does not seem right. We kind of rely on this assumption here, or?
>>
>> why does it not seem right?
>>
> 
> When an interrupt is delivered a psw-swap takes place. The new-psw
> may fence IO interrupts. Thus for example if we have the vcpu open for
> all ISCs and 0, 1 and 2 pending, we may end up only delivering 0, if the
> psw-swap corresponding to delivering 0 closes the vcpu for IO
> interrupts. After guest has control, we don't have control over the rest
> of the story.

OK I think I understand your concern, waking up a single waiting vCPU 
per ISC is not enough.
We must wake all vCPU in wait state having at least one matching ISC bit.

Regards,
Pierre
Halil Pasic Jan. 9, 2019, 4:18 p.m. UTC | #13
On Wed, 9 Jan 2019 15:49:56 +0100
Pierre Morel <pmorel@linux.ibm.com> wrote:

> On 09/01/2019 14:10, Halil Pasic wrote:
> > On Wed, 9 Jan 2019 13:14:17 +0100
> > Pierre Morel <pmorel@linux.ibm.com> wrote:
> > 
> >> On 08/01/2019 16:21, Michael Mueller wrote:
> >>>
> >>>
> >>> On 08.01.19 13:59, Halil Pasic wrote:
> >>>> On Wed, 19 Dec 2018 20:17:54 +0100
> >>>> Michael Mueller <mimu@linux.ibm.com> wrote:
> >>>>
> >>>>> This function processes the Gib Alert List (GAL). It is required
> >>>>> to run when either a gib alert interruption has been received or
> >>>>> a gisa that is in the alert list is cleared or dropped.
> >>>>>
> >>>>> The GAL is build up by millicode, when the respective ISC bit is
> >>>>> set in the Interruption Alert Mask (IAM) and an interruption of
> >>>>> that class is observed.
> >>>>>
> >>>>> Signed-off-by: Michael Mueller <mimu@linux.ibm.com>
> >>>>> ---
> >>>>>    arch/s390/kvm/interrupt.c | 140
> >>>>> ++++++++++++++++++++++++++++++++++++++++++++++
> >>>>>    1 file changed, 140 insertions(+)
> >>>>>
> >>>>> diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
> >>>>> index 48a93f5e5333..03e7ba4f215a 100644
> >>>>> --- a/arch/s390/kvm/interrupt.c
> >>>>> +++ b/arch/s390/kvm/interrupt.c
> >>>>> @@ -2941,6 +2941,146 @@ int kvm_s390_get_irq_state(struct kvm_vcpu
> >>>>> *vcpu, __u8 __user *buf, int len)
> >>>>>        return n;
> >>>>>    }
> >>>>> +static int __try_airqs_kick(struct kvm *kvm, u8 ipm)
> >>>>> +{
> >>>>> +    struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
> >>>>> +    struct kvm_vcpu *vcpu = NULL, *kick_vcpu[MAX_ISC + 1];
> >>>>> +    int online_vcpus = atomic_read(&kvm->online_vcpus);
> >>>>> +    u8 ioint_mask, isc_mask, kick_mask = 0x00;
> >>>>> +    int vcpu_id, kicked = 0;
> >>>>> +
> >>>>> +    /* Loop over vcpus in WAIT state. */
> >>>>> +    for (vcpu_id = find_first_bit(fi->idle_mask, online_vcpus);
> >>>>> +         /* Until all pending ISCs have a vcpu open for airqs. */
> >>>>> +         (~kick_mask & ipm) && vcpu_id < online_vcpus;
> >>>>> +         vcpu_id = find_next_bit(fi->idle_mask, online_vcpus,
> >>>>> vcpu_id)) {
> >>>>> +        vcpu = kvm_get_vcpu(kvm, vcpu_id);
> >>>>> +        if (psw_ioint_disabled(vcpu))
> >>>>> +            continue;
> >>>>> +        ioint_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
> >>>>> +        for (isc_mask = 0x80; isc_mask; isc_mask >>= 1) {
> >>>>> +            /* ISC pending in IPM ? */
> >>>>> +            if (!(ipm & isc_mask))
> >>>>> +                continue;
> >>>>> +            /* vcpu for this ISC already found ? */
> >>>>> +            if (kick_mask & isc_mask)
> >>>>> +                continue;
> >>>>> +            /* vcpu open for airq of this ISC ? */
> >>>>> +            if (!(ioint_mask & isc_mask))
> >>>>> +                continue;
> >>>>> +            /* use this vcpu (for all ISCs in ioint_mask) */
> >>>>> +            kick_mask |= ioint_mask;
> >>>>> +            kick_vcpu[kicked++] = vcpu;
> >>>>
> >>>> Assuming that the vcpu can/will take all ISCs it's currently open for
> >>>> does not seem right. We kind of rely on this assumption here, or?
> >>
> >> why does it not seem right?
> >>
> > 
> > When an interrupt is delivered a psw-swap takes place. The new-psw
> > may fence IO interrupts. Thus for example if we have the vcpu open for
> > all ISCs and 0, 1 and 2 pending, we may end up only delivering 0, if the
> > psw-swap corresponding to delivering 0 closes the vcpu for IO
> > interrupts. After guest has control, we don't have control over the rest
> > of the story.
> 
> OK I think I understand your concern, waking up a single waiting vCPU 
> per ISC is not enough.
> We must wake all vCPU in wait state having at least one matching ISC bit.
> 

That is not what I was trying to say, and IMHO generally it also ain't
true that we must. But I may be missing something.

Regards,
Halil
diff mbox series

Patch

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 48a93f5e5333..03e7ba4f215a 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2941,6 +2941,146 @@  int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
 	return n;
 }
 
+static int __try_airqs_kick(struct kvm *kvm, u8 ipm)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+	struct kvm_vcpu *vcpu = NULL, *kick_vcpu[MAX_ISC + 1];
+	int online_vcpus = atomic_read(&kvm->online_vcpus);
+	u8 ioint_mask, isc_mask, kick_mask = 0x00;
+	int vcpu_id, kicked = 0;
+
+	/* Loop over vcpus in WAIT state. */
+	for (vcpu_id = find_first_bit(fi->idle_mask, online_vcpus);
+	     /* Until all pending ISCs have a vcpu open for airqs. */
+	     (~kick_mask & ipm) && vcpu_id < online_vcpus;
+	     vcpu_id = find_next_bit(fi->idle_mask, online_vcpus, vcpu_id)) {
+		vcpu = kvm_get_vcpu(kvm, vcpu_id);
+		if (psw_ioint_disabled(vcpu))
+			continue;
+		ioint_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24);
+		for (isc_mask = 0x80; isc_mask; isc_mask >>= 1) {
+			/* ISC pending in IPM ? */
+			if (!(ipm & isc_mask))
+				continue;
+			/* vcpu for this ISC already found ? */
+			if (kick_mask & isc_mask)
+				continue;
+			/* vcpu open for airq of this ISC ? */
+			if (!(ioint_mask & isc_mask))
+				continue;
+			/* use this vcpu (for all ISCs in ioint_mask) */
+			kick_mask |= ioint_mask;
+			kick_vcpu[kicked++] = vcpu;
+		}
+	}
+
+	if (vcpu && ~kick_mask & ipm)
+		VM_EVENT(kvm, 4, "gib alert undeliverable isc mask 0x%02x",
+			 ~kick_mask & ipm);
+
+	for (vcpu_id = 0; vcpu_id < kicked; vcpu_id++)
+		kvm_s390_vcpu_wakeup(kick_vcpu[vcpu_id]);
+
+	return (online_vcpus != 0) ? kicked : -ENODEV;
+}
+
+static void __floating_airqs_kick(struct kvm *kvm)
+{
+	struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+	int online_vcpus, kicked;
+	u8 ipm_t0, ipm;
+
+	/* Get IPM and return if clean, IAM has been restored. */
+	ipm = get_ipm(kvm->arch.gisa, IRQ_FLAG_IAM);
+	if (!ipm)
+		return;
+retry:
+	ipm_t0 = ipm;
+
+	/* Try to kick some vcpus in WAIT state. */
+	kicked = __try_airqs_kick(kvm, ipm);
+	if (kicked < 0)
+		return;
+
+	/* Get IPM and return if clean, IAM has been restored. */
+	ipm = get_ipm(kvm->arch.gisa, IRQ_FLAG_IAM);
+	if (!ipm)
+		return;
+
+	/* Start over, if new ISC bits are pending in IPM. */
+	if ((ipm_t0 ^ ipm) & ~ipm_t0)
+		goto retry;
+
+	/*
+	 * Return as we just kicked at least one vcpu in WAIT state
+	 * open for airqs. The IAM will be restored latest when one
+	 * of them goes into WAIT or STOP state.
+	 */
+	if (kicked > 0)
+		return;
+
+	/*
+	 * No vcpu was kicked either because no vcpu was in WAIT state
+	 * or none of the vcpus in WAIT state are open for airqs.
+	 * Return immediately if no vcpus are in WAIT state.
+	 * There are vcpus in RUN state. They will process the airqs
+	 * if not closed for airqs as well. In that case the system will
+	 * delay airqs until a vcpu decides to take airqs again.
+	 */
+	online_vcpus = atomic_read(&kvm->online_vcpus);
+	if (!bitmap_weight(fi->idle_mask, online_vcpus))
+		return;
+
+	/*
+	 * None of the vcpus in WAIT state take airqs and we might
+	 * have no running vcpus as at least one vcpu is in WAIT state
+	 * and IPM is dirty.
+	 */
+	set_iam(kvm->arch.gisa, kvm->arch.iam);
+}
+
+#define NULL_GISA_ADDR 0x00000000UL
+#define NONE_GISA_ADDR 0x00000001UL
+#define GISA_ADDR_MASK 0xfffff000UL
+
+static void __maybe_unused process_gib_alert_list(void)
+{
+	u32 final, next_alert, origin = 0UL;
+	struct kvm_s390_gisa *gisa;
+	struct kvm *kvm;
+
+	do {
+		/*
+		 * If the NONE_GISA_ADDR is still stored in the alert list
+		 * origin, we will leave the outer loop. No further GISA has
+		 * been added to the alert list by millicode while processing
+		 * the current alert list.
+		 */
+		final = (origin & NONE_GISA_ADDR);
+		/*
+		 * Cut off the alert list and store the NONE_GISA_ADDR in the
+		 * alert list origin to avoid further GAL interruptions.
+		 * A new alert list can be build up by millicode in parallel
+		 * for guests not in the yet cut-off alert list. When in the
+		 * final loop, store the NULL_GISA_ADDR instead. This will re-
+		 * enable GAL interruptions on the host again.
+		 */
+		origin = xchg(&gib->alert_list_origin,
+			      (!final) ? NONE_GISA_ADDR : NULL_GISA_ADDR);
+		/* Loop through the just cut-off alert list. */
+		while (origin & GISA_ADDR_MASK) {
+			gisa = (struct kvm_s390_gisa *)(u64)origin;
+			next_alert = gisa->next_alert;
+			/* Unlink the GISA from the alert list. */
+			gisa->next_alert = origin;
+			kvm = container_of(gisa, struct sie_page2, gisa)->kvm;
+			/* Kick suitable vcpus */
+			__floating_airqs_kick(kvm);
+			origin = next_alert;
+		}
+	} while (!final);
+}
+
 static void nullify_gisa(struct kvm_s390_gisa *gisa)
 {
 	memset(gisa, 0, sizeof(struct kvm_s390_gisa));