[RFC,2/2] KVM: s390: vsie: store guest addresses of satellite blocks in vsie_page
diff mbox

Message ID 20180116171526.12343-3-david@redhat.com
State New
Headers show

Commit Message

David Hildenbrand Jan. 16, 2018, 5:15 p.m. UTC
This way, the values cannot changed, even if another VCPU might try to
mess with the nested SCB currently getting executed by another VCPU.

We now always use the same gpa for pinning and unpinning a page (for
unpinning, it is only relevant to mark the guest page dirty for
migration).

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 arch/s390/kvm/vsie.c | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

Comments

Christian Borntraeger Jan. 23, 2018, 11:18 a.m. UTC | #1
On 01/16/2018 06:15 PM, David Hildenbrand wrote:
> This way, the values cannot changed, even if another VCPU might try to
> mess with the nested SCB currently getting executed by another VCPU.
> 
> We now always use the same gpa for pinning and unpinning a page (for
> unpinning, it is only relevant to mark the guest page dirty for
> migration).
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>

thanks applied.

> ---
>  arch/s390/kvm/vsie.c | 37 ++++++++++++++++++++++---------------
>  1 file changed, 22 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
> index 79ea444a0534..546d8b106ee0 100644
> --- a/arch/s390/kvm/vsie.c
> +++ b/arch/s390/kvm/vsie.c
> @@ -38,7 +38,13 @@ struct vsie_page {
>  	struct gmap *gmap;			/* 0x0220 */
>  	/* address of the last reported fault to guest2 */
>  	unsigned long fault_addr;		/* 0x0228 */
> -	__u8 reserved[0x0700 - 0x0230];		/* 0x0230 */
> +	/* calculated guest addresses of satellite control blocks */
> +	gpa_t sca_gpa;				/* 0x0230 */
> +	gpa_t itdba_gpa;			/* 0x0238 */
> +	gpa_t gvrd_gpa;				/* 0x0240 */
> +	gpa_t riccbd_gpa;			/* 0x0248 */
> +	gpa_t sdnx_gpa;				/* 0x0250 */
> +	__u8 reserved[0x0700 - 0x0258];		/* 0x0258 */
>  	struct kvm_s390_crypto_cb crycb;	/* 0x0700 */
>  	__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE];	/* 0x0800 */
>  };
> @@ -475,46 +481,42 @@ static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
>  /* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
>  static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>  {
> -	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
>  	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
>  	hpa_t hpa;
> -	gpa_t gpa;
> 
>  	hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
>  	if (hpa) {
> -		gpa = scb_o->scaol & ~0xfUL;
> -		if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
> -			gpa |= (u64) scb_o->scaoh << 32;
> -		unpin_guest_page(vcpu->kvm, gpa, hpa);
> +		unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa);
> +		vsie_page->sca_gpa = 0;
>  		scb_s->scaol = 0;
>  		scb_s->scaoh = 0;
>  	}
> 
>  	hpa = scb_s->itdba;
>  	if (hpa) {
> -		gpa = scb_o->itdba & ~0xffUL;
> -		unpin_guest_page(vcpu->kvm, gpa, hpa);
> +		unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa);
> +		vsie_page->itdba_gpa = 0;
>  		scb_s->itdba = 0;
>  	}
> 
>  	hpa = scb_s->gvrd;
>  	if (hpa) {
> -		gpa = scb_o->gvrd & ~0x1ffUL;
> -		unpin_guest_page(vcpu->kvm, gpa, hpa);
> +		unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa);
> +		vsie_page->gvrd_gpa = 0;
>  		scb_s->gvrd = 0;
>  	}
> 
>  	hpa = scb_s->riccbd;
>  	if (hpa) {
> -		gpa = scb_o->riccbd & ~0x3fUL;
> -		unpin_guest_page(vcpu->kvm, gpa, hpa);
> +		unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa);
> +		vsie_page->riccbd_gpa = 0;
>  		scb_s->riccbd = 0;
>  	}
> 
>  	hpa = scb_s->sdnxo;
>  	if (hpa) {
> -		gpa = scb_o->sdnxo;
> -		unpin_guest_page(vcpu->kvm, gpa, hpa);
> +		unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa);
> +		vsie_page->sdnx_gpa = 0;
>  		scb_s->sdnxo = 0;
>  	}
>  }
> @@ -559,6 +561,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>  		}
>  		if (rc)
>  			goto unpin;
> +		vsie_page->sca_gpa = gpa;
>  		scb_s->scaoh = (u32)((u64)hpa >> 32);
>  		scb_s->scaol = (u32)(u64)hpa;
>  	}
> @@ -575,6 +578,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>  			rc = set_validity_icpt(scb_s, 0x0080U);
>  			goto unpin;
>  		}
> +		vsie_page->itdba_gpa = gpa;
>  		scb_s->itdba = hpa;
>  	}
> 
> @@ -593,6 +597,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>  			rc = set_validity_icpt(scb_s, 0x1310U);
>  			goto unpin;
>  		}
> +		vsie_page->gvrd_gpa = gpa;
>  		scb_s->gvrd = hpa;
>  	}
> 
> @@ -609,6 +614,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>  			goto unpin;
>  		}
>  		/* Validity 0x0044 will be checked by SIE */
> +		vsie_page->riccbd_gpa = gpa;
>  		scb_s->riccbd = hpa;
>  	}
>  	if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
> @@ -636,6 +642,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>  			rc = set_validity_icpt(scb_s, 0x10b0U);
>  			goto unpin;
>  		}
> +		vsie_page->sdnx_gpa = gpa;
>  		scb_s->sdnxo = hpa | sdnxc;
>  	}
>  	return 0;
>
Cornelia Huck Jan. 23, 2018, 12:29 p.m. UTC | #2
On Tue, 16 Jan 2018 18:15:26 +0100
David Hildenbrand <david@redhat.com> wrote:

> This way, the values cannot changed, even if another VCPU might try to

s/changed/be changed/ (or change?)

> mess with the nested SCB currently getting executed by another VCPU.
> 
> We now always use the same gpa for pinning and unpinning a page (for
> unpinning, it is only relevant to mark the guest page dirty for
> migration).
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>  arch/s390/kvm/vsie.c | 37 ++++++++++++++++++++++---------------
>  1 file changed, 22 insertions(+), 15 deletions(-)

Acked-by: Cornelia Huck <cohuck@redhat.com>
Christian Borntraeger March 6, 2018, 11:32 a.m. UTC | #3
David, 

this seems to have broken VSIE für guests with >64vcpus as we only pin
the first page of the SCA. Starting guest3 with 240 vcpu triggers
quickly guest2 crashes. Can you have a look?


On 01/16/2018 06:15 PM, David Hildenbrand wrote:
> This way, the values cannot changed, even if another VCPU might try to
> mess with the nested SCB currently getting executed by another VCPU.
> 
> We now always use the same gpa for pinning and unpinning a page (for
> unpinning, it is only relevant to mark the guest page dirty for
> migration).
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>
> ---
>  arch/s390/kvm/vsie.c | 37 ++++++++++++++++++++++---------------
>  1 file changed, 22 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
> index 79ea444a0534..546d8b106ee0 100644
> --- a/arch/s390/kvm/vsie.c
> +++ b/arch/s390/kvm/vsie.c
> @@ -38,7 +38,13 @@ struct vsie_page {
>  	struct gmap *gmap;			/* 0x0220 */
>  	/* address of the last reported fault to guest2 */
>  	unsigned long fault_addr;		/* 0x0228 */
> -	__u8 reserved[0x0700 - 0x0230];		/* 0x0230 */
> +	/* calculated guest addresses of satellite control blocks */
> +	gpa_t sca_gpa;				/* 0x0230 */
> +	gpa_t itdba_gpa;			/* 0x0238 */
> +	gpa_t gvrd_gpa;				/* 0x0240 */
> +	gpa_t riccbd_gpa;			/* 0x0248 */
> +	gpa_t sdnx_gpa;				/* 0x0250 */
> +	__u8 reserved[0x0700 - 0x0258];		/* 0x0258 */
>  	struct kvm_s390_crypto_cb crycb;	/* 0x0700 */
>  	__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE];	/* 0x0800 */
>  };
> @@ -475,46 +481,42 @@ static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
>  /* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
>  static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>  {
> -	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
>  	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
>  	hpa_t hpa;
> -	gpa_t gpa;
> 
>  	hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
>  	if (hpa) {
> -		gpa = scb_o->scaol & ~0xfUL;
> -		if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
> -			gpa |= (u64) scb_o->scaoh << 32;
> -		unpin_guest_page(vcpu->kvm, gpa, hpa);
> +		unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa);
> +		vsie_page->sca_gpa = 0;
>  		scb_s->scaol = 0;
>  		scb_s->scaoh = 0;
>  	}
> 
>  	hpa = scb_s->itdba;
>  	if (hpa) {
> -		gpa = scb_o->itdba & ~0xffUL;
> -		unpin_guest_page(vcpu->kvm, gpa, hpa);
> +		unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa);
> +		vsie_page->itdba_gpa = 0;
>  		scb_s->itdba = 0;
>  	}
> 
>  	hpa = scb_s->gvrd;
>  	if (hpa) {
> -		gpa = scb_o->gvrd & ~0x1ffUL;
> -		unpin_guest_page(vcpu->kvm, gpa, hpa);
> +		unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa);
> +		vsie_page->gvrd_gpa = 0;
>  		scb_s->gvrd = 0;
>  	}
> 
>  	hpa = scb_s->riccbd;
>  	if (hpa) {
> -		gpa = scb_o->riccbd & ~0x3fUL;
> -		unpin_guest_page(vcpu->kvm, gpa, hpa);
> +		unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa);
> +		vsie_page->riccbd_gpa = 0;
>  		scb_s->riccbd = 0;
>  	}
> 
>  	hpa = scb_s->sdnxo;
>  	if (hpa) {
> -		gpa = scb_o->sdnxo;
> -		unpin_guest_page(vcpu->kvm, gpa, hpa);
> +		unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa);
> +		vsie_page->sdnx_gpa = 0;
>  		scb_s->sdnxo = 0;
>  	}
>  }
> @@ -559,6 +561,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>  		}
>  		if (rc)
>  			goto unpin;
> +		vsie_page->sca_gpa = gpa;
>  		scb_s->scaoh = (u32)((u64)hpa >> 32);
>  		scb_s->scaol = (u32)(u64)hpa;
>  	}
> @@ -575,6 +578,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>  			rc = set_validity_icpt(scb_s, 0x0080U);
>  			goto unpin;
>  		}
> +		vsie_page->itdba_gpa = gpa;
>  		scb_s->itdba = hpa;
>  	}
> 
> @@ -593,6 +597,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>  			rc = set_validity_icpt(scb_s, 0x1310U);
>  			goto unpin;
>  		}
> +		vsie_page->gvrd_gpa = gpa;
>  		scb_s->gvrd = hpa;
>  	}
> 
> @@ -609,6 +614,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>  			goto unpin;
>  		}
>  		/* Validity 0x0044 will be checked by SIE */
> +		vsie_page->riccbd_gpa = gpa;
>  		scb_s->riccbd = hpa;
>  	}
>  	if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
> @@ -636,6 +642,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
>  			rc = set_validity_icpt(scb_s, 0x10b0U);
>  			goto unpin;
>  		}
> +		vsie_page->sdnx_gpa = gpa;
>  		scb_s->sdnxo = hpa | sdnxc;
>  	}
>  	return 0;
>
Christian Borntraeger March 6, 2018, 11:38 a.m. UTC | #4
On 03/06/2018 12:32 PM, Christian Borntraeger wrote:
> David, 
> 
> this seems to have broken VSIE für guests with >64vcpus as we only pin
> the first page of the SCA. Starting guest3 with 240 vcpu triggers
> quickly guest2 crashes. Can you have a look?
> 

Hmm, or maybe it was always broken and we never supported pinning of
ESCA properly?
Christian Borntraeger March 6, 2018, 11:53 a.m. UTC | #5
On 03/06/2018 12:38 PM, Christian Borntraeger wrote:
> On 03/06/2018 12:32 PM, Christian Borntraeger wrote:
>> David, 
>>
>> this seems to have broken VSIE für guests with >64vcpus as we only pin
>> the first page of the SCA. Starting guest3 with 240 vcpu triggers
>> quickly guest2 crashes. Can you have a look?
>>
> 
> Hmm, or maybe it was always broken and we never supported pinning of
> ESCA properly?

4.15.0 is also broken, must be an earlier change.
David Hildenbrand March 6, 2018, 12:03 p.m. UTC | #6
On 06.03.2018 12:32, Christian Borntraeger wrote:
> David, 
> 
> this seems to have broken VSIE für guests with >64vcpus as we only pin
> the first page of the SCA. Starting guest3 with 240 vcpu triggers
> quickly guest2 crashes. Can you have a look?
> 

We only use the sca ipte lock, not the CPU entries. This sounds more
like a general KVM problem.
David Hildenbrand March 6, 2018, 12:09 p.m. UTC | #7
On 06.03.2018 12:53, Christian Borntraeger wrote:
> 
> 
> On 03/06/2018 12:38 PM, Christian Borntraeger wrote:
>> On 03/06/2018 12:32 PM, Christian Borntraeger wrote:
>>> David, 
>>>
>>> this seems to have broken VSIE für guests with >64vcpus as we only pin
>>> the first page of the SCA. Starting guest3 with 240 vcpu triggers
>>> quickly guest2 crashes. Can you have a look?
>>>
>>
>> Hmm, or maybe it was always broken and we never supported pinning of
>> ESCA properly?
> 
> 4.15.0 is also broken, must be an earlier change.
> 

We never claimed ESCA support for L2 (because all SCA CPU entries are
invalid). But I once implemented support for > 64 CPUs if all entries
are unused in kvm

commit a6940674c384ebf56aa0c44f417032de2b67100c
Author: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date:   Mon Aug 8 22:39:32 2016 +0200

    KVM: s390: allow 255 VCPUs when sca entries aren't used

    If the SCA entries aren't used by the hardware (no SIGPIF), we
    can simply not set the entries, stick to the basic sca and allow more
    than 64 VCPUs.

    To hinder any other facility from using these entries, let's properly
    provoke intercepts by not setting the MCN and keeping the entries
    unset.

    This effectively allows when running KVM under KVM (vSIE) or under
z/VM to
    provide more than 64 VCPUs to a guest. Let's limit it to 255 for now, to
    not run into problems if the CPU numbers are limited somewhere else.

    Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
    Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>


Maybe something was broken in that part of the code.

Patch
diff mbox

diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 79ea444a0534..546d8b106ee0 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -38,7 +38,13 @@  struct vsie_page {
 	struct gmap *gmap;			/* 0x0220 */
 	/* address of the last reported fault to guest2 */
 	unsigned long fault_addr;		/* 0x0228 */
-	__u8 reserved[0x0700 - 0x0230];		/* 0x0230 */
+	/* calculated guest addresses of satellite control blocks */
+	gpa_t sca_gpa;				/* 0x0230 */
+	gpa_t itdba_gpa;			/* 0x0238 */
+	gpa_t gvrd_gpa;				/* 0x0240 */
+	gpa_t riccbd_gpa;			/* 0x0248 */
+	gpa_t sdnx_gpa;				/* 0x0250 */
+	__u8 reserved[0x0700 - 0x0258];		/* 0x0258 */
 	struct kvm_s390_crypto_cb crycb;	/* 0x0700 */
 	__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE];	/* 0x0800 */
 };
@@ -475,46 +481,42 @@  static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa)
 /* unpin all blocks previously pinned by pin_blocks(), marking them dirty */
 static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 {
-	struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
 	struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
 	hpa_t hpa;
-	gpa_t gpa;
 
 	hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol;
 	if (hpa) {
-		gpa = scb_o->scaol & ~0xfUL;
-		if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO))
-			gpa |= (u64) scb_o->scaoh << 32;
-		unpin_guest_page(vcpu->kvm, gpa, hpa);
+		unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa);
+		vsie_page->sca_gpa = 0;
 		scb_s->scaol = 0;
 		scb_s->scaoh = 0;
 	}
 
 	hpa = scb_s->itdba;
 	if (hpa) {
-		gpa = scb_o->itdba & ~0xffUL;
-		unpin_guest_page(vcpu->kvm, gpa, hpa);
+		unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa);
+		vsie_page->itdba_gpa = 0;
 		scb_s->itdba = 0;
 	}
 
 	hpa = scb_s->gvrd;
 	if (hpa) {
-		gpa = scb_o->gvrd & ~0x1ffUL;
-		unpin_guest_page(vcpu->kvm, gpa, hpa);
+		unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa);
+		vsie_page->gvrd_gpa = 0;
 		scb_s->gvrd = 0;
 	}
 
 	hpa = scb_s->riccbd;
 	if (hpa) {
-		gpa = scb_o->riccbd & ~0x3fUL;
-		unpin_guest_page(vcpu->kvm, gpa, hpa);
+		unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa);
+		vsie_page->riccbd_gpa = 0;
 		scb_s->riccbd = 0;
 	}
 
 	hpa = scb_s->sdnxo;
 	if (hpa) {
-		gpa = scb_o->sdnxo;
-		unpin_guest_page(vcpu->kvm, gpa, hpa);
+		unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa);
+		vsie_page->sdnx_gpa = 0;
 		scb_s->sdnxo = 0;
 	}
 }
@@ -559,6 +561,7 @@  static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 		}
 		if (rc)
 			goto unpin;
+		vsie_page->sca_gpa = gpa;
 		scb_s->scaoh = (u32)((u64)hpa >> 32);
 		scb_s->scaol = (u32)(u64)hpa;
 	}
@@ -575,6 +578,7 @@  static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 			rc = set_validity_icpt(scb_s, 0x0080U);
 			goto unpin;
 		}
+		vsie_page->itdba_gpa = gpa;
 		scb_s->itdba = hpa;
 	}
 
@@ -593,6 +597,7 @@  static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 			rc = set_validity_icpt(scb_s, 0x1310U);
 			goto unpin;
 		}
+		vsie_page->gvrd_gpa = gpa;
 		scb_s->gvrd = hpa;
 	}
 
@@ -609,6 +614,7 @@  static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 			goto unpin;
 		}
 		/* Validity 0x0044 will be checked by SIE */
+		vsie_page->riccbd_gpa = gpa;
 		scb_s->riccbd = hpa;
 	}
 	if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
@@ -636,6 +642,7 @@  static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 			rc = set_validity_icpt(scb_s, 0x10b0U);
 			goto unpin;
 		}
+		vsie_page->sdnx_gpa = gpa;
 		scb_s->sdnxo = hpa | sdnxc;
 	}
 	return 0;