Message ID | 20180116171526.12343-3-david@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 01/16/2018 06:15 PM, David Hildenbrand wrote: > This way, the values cannot changed, even if another VCPU might try to > mess with the nested SCB currently getting executed by another VCPU. > > We now always use the same gpa for pinning and unpinning a page (for > unpinning, it is only relevant to mark the guest page dirty for > migration). > > Signed-off-by: David Hildenbrand <david@redhat.com> Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com> thanks applied. > --- > arch/s390/kvm/vsie.c | 37 ++++++++++++++++++++++--------------- > 1 file changed, 22 insertions(+), 15 deletions(-) > > diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c > index 79ea444a0534..546d8b106ee0 100644 > --- a/arch/s390/kvm/vsie.c > +++ b/arch/s390/kvm/vsie.c > @@ -38,7 +38,13 @@ struct vsie_page { > struct gmap *gmap; /* 0x0220 */ > /* address of the last reported fault to guest2 */ > unsigned long fault_addr; /* 0x0228 */ > - __u8 reserved[0x0700 - 0x0230]; /* 0x0230 */ > + /* calculated guest addresses of satellite control blocks */ > + gpa_t sca_gpa; /* 0x0230 */ > + gpa_t itdba_gpa; /* 0x0238 */ > + gpa_t gvrd_gpa; /* 0x0240 */ > + gpa_t riccbd_gpa; /* 0x0248 */ > + gpa_t sdnx_gpa; /* 0x0250 */ > + __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */ > struct kvm_s390_crypto_cb crycb; /* 0x0700 */ > __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */ > }; > @@ -475,46 +481,42 @@ static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa) > /* unpin all blocks previously pinned by pin_blocks(), marking them dirty */ > static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > { > - struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; > struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; > hpa_t hpa; > - gpa_t gpa; > > hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol; > if (hpa) { > - gpa = scb_o->scaol & ~0xfUL; > - if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO)) > - gpa |= (u64) scb_o->scaoh << 32; > - unpin_guest_page(vcpu->kvm, gpa, hpa); > + unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa); > + vsie_page->sca_gpa = 0; > scb_s->scaol = 0; > scb_s->scaoh = 0; > } > > hpa = scb_s->itdba; > if (hpa) { > - gpa = scb_o->itdba & ~0xffUL; > - unpin_guest_page(vcpu->kvm, gpa, hpa); > + unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa); > + vsie_page->itdba_gpa = 0; > scb_s->itdba = 0; > } > > hpa = scb_s->gvrd; > if (hpa) { > - gpa = scb_o->gvrd & ~0x1ffUL; > - unpin_guest_page(vcpu->kvm, gpa, hpa); > + unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa); > + vsie_page->gvrd_gpa = 0; > scb_s->gvrd = 0; > } > > hpa = scb_s->riccbd; > if (hpa) { > - gpa = scb_o->riccbd & ~0x3fUL; > - unpin_guest_page(vcpu->kvm, gpa, hpa); > + unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa); > + vsie_page->riccbd_gpa = 0; > scb_s->riccbd = 0; > } > > hpa = scb_s->sdnxo; > if (hpa) { > - gpa = scb_o->sdnxo; > - unpin_guest_page(vcpu->kvm, gpa, hpa); > + unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa); > + vsie_page->sdnx_gpa = 0; > scb_s->sdnxo = 0; > } > } > @@ -559,6 +561,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > } > if (rc) > goto unpin; > + vsie_page->sca_gpa = gpa; > scb_s->scaoh = (u32)((u64)hpa >> 32); > scb_s->scaol = (u32)(u64)hpa; > } > @@ -575,6 +578,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > rc = set_validity_icpt(scb_s, 0x0080U); > goto unpin; > } > + vsie_page->itdba_gpa = gpa; > scb_s->itdba = hpa; > } > > @@ -593,6 +597,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > rc = set_validity_icpt(scb_s, 0x1310U); > goto unpin; > } > + vsie_page->gvrd_gpa = gpa; > scb_s->gvrd = hpa; > } > > @@ -609,6 +614,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > goto unpin; > } > /* Validity 0x0044 will be checked by SIE */ > + vsie_page->riccbd_gpa = gpa; > scb_s->riccbd = hpa; > } > if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { > @@ -636,6 +642,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > rc = set_validity_icpt(scb_s, 0x10b0U); > goto unpin; > } > + vsie_page->sdnx_gpa = gpa; > scb_s->sdnxo = hpa | sdnxc; > } > return 0; >
On Tue, 16 Jan 2018 18:15:26 +0100 David Hildenbrand <david@redhat.com> wrote: > This way, the values cannot changed, even if another VCPU might try to s/changed/be changed/ (or change?) > mess with the nested SCB currently getting executed by another VCPU. > > We now always use the same gpa for pinning and unpinning a page (for > unpinning, it is only relevant to mark the guest page dirty for > migration). > > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > arch/s390/kvm/vsie.c | 37 ++++++++++++++++++++++--------------- > 1 file changed, 22 insertions(+), 15 deletions(-) Acked-by: Cornelia Huck <cohuck@redhat.com>
David, this seems to have broken VSIE für guests with >64vcpus as we only pin the first page of the SCA. Starting guest3 with 240 vcpu triggers quickly guest2 crashes. Can you have a look? On 01/16/2018 06:15 PM, David Hildenbrand wrote: > This way, the values cannot changed, even if another VCPU might try to > mess with the nested SCB currently getting executed by another VCPU. > > We now always use the same gpa for pinning and unpinning a page (for > unpinning, it is only relevant to mark the guest page dirty for > migration). > > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > arch/s390/kvm/vsie.c | 37 ++++++++++++++++++++++--------------- > 1 file changed, 22 insertions(+), 15 deletions(-) > > diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c > index 79ea444a0534..546d8b106ee0 100644 > --- a/arch/s390/kvm/vsie.c > +++ b/arch/s390/kvm/vsie.c > @@ -38,7 +38,13 @@ struct vsie_page { > struct gmap *gmap; /* 0x0220 */ > /* address of the last reported fault to guest2 */ > unsigned long fault_addr; /* 0x0228 */ > - __u8 reserved[0x0700 - 0x0230]; /* 0x0230 */ > + /* calculated guest addresses of satellite control blocks */ > + gpa_t sca_gpa; /* 0x0230 */ > + gpa_t itdba_gpa; /* 0x0238 */ > + gpa_t gvrd_gpa; /* 0x0240 */ > + gpa_t riccbd_gpa; /* 0x0248 */ > + gpa_t sdnx_gpa; /* 0x0250 */ > + __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */ > struct kvm_s390_crypto_cb crycb; /* 0x0700 */ > __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */ > }; > @@ -475,46 +481,42 @@ static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa) > /* unpin all blocks previously pinned by pin_blocks(), marking them dirty */ > static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > { > - struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; > struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; > hpa_t hpa; > - gpa_t gpa; > > hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol; > if (hpa) { > - gpa = scb_o->scaol & ~0xfUL; > - if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO)) > - gpa |= (u64) scb_o->scaoh << 32; > - unpin_guest_page(vcpu->kvm, gpa, hpa); > + unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa); > + vsie_page->sca_gpa = 0; > scb_s->scaol = 0; > scb_s->scaoh = 0; > } > > hpa = scb_s->itdba; > if (hpa) { > - gpa = scb_o->itdba & ~0xffUL; > - unpin_guest_page(vcpu->kvm, gpa, hpa); > + unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa); > + vsie_page->itdba_gpa = 0; > scb_s->itdba = 0; > } > > hpa = scb_s->gvrd; > if (hpa) { > - gpa = scb_o->gvrd & ~0x1ffUL; > - unpin_guest_page(vcpu->kvm, gpa, hpa); > + unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa); > + vsie_page->gvrd_gpa = 0; > scb_s->gvrd = 0; > } > > hpa = scb_s->riccbd; > if (hpa) { > - gpa = scb_o->riccbd & ~0x3fUL; > - unpin_guest_page(vcpu->kvm, gpa, hpa); > + unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa); > + vsie_page->riccbd_gpa = 0; > scb_s->riccbd = 0; > } > > hpa = scb_s->sdnxo; > if (hpa) { > - gpa = scb_o->sdnxo; > - unpin_guest_page(vcpu->kvm, gpa, hpa); > + unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa); > + vsie_page->sdnx_gpa = 0; > scb_s->sdnxo = 0; > } > } > @@ -559,6 +561,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > } > if (rc) > goto unpin; > + vsie_page->sca_gpa = gpa; > scb_s->scaoh = (u32)((u64)hpa >> 32); > scb_s->scaol = (u32)(u64)hpa; > } > @@ -575,6 +578,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > rc = set_validity_icpt(scb_s, 0x0080U); > goto unpin; > } > + vsie_page->itdba_gpa = gpa; > scb_s->itdba = hpa; > } > > @@ -593,6 +597,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > rc = set_validity_icpt(scb_s, 0x1310U); > goto unpin; > } > + vsie_page->gvrd_gpa = gpa; > scb_s->gvrd = hpa; > } > > @@ -609,6 +614,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > goto unpin; > } > /* Validity 0x0044 will be checked by SIE */ > + vsie_page->riccbd_gpa = gpa; > scb_s->riccbd = hpa; > } > if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { > @@ -636,6 +642,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) > rc = set_validity_icpt(scb_s, 0x10b0U); > goto unpin; > } > + vsie_page->sdnx_gpa = gpa; > scb_s->sdnxo = hpa | sdnxc; > } > return 0; >
On 03/06/2018 12:32 PM, Christian Borntraeger wrote: > David, > > this seems to have broken VSIE für guests with >64vcpus as we only pin > the first page of the SCA. Starting guest3 with 240 vcpu triggers > quickly guest2 crashes. Can you have a look? > Hmm, or maybe it was always broken and we never supported pinning of ESCA properly?
On 03/06/2018 12:38 PM, Christian Borntraeger wrote: > On 03/06/2018 12:32 PM, Christian Borntraeger wrote: >> David, >> >> this seems to have broken VSIE für guests with >64vcpus as we only pin >> the first page of the SCA. Starting guest3 with 240 vcpu triggers >> quickly guest2 crashes. Can you have a look? >> > > Hmm, or maybe it was always broken and we never supported pinning of > ESCA properly? 4.15.0 is also broken, must be an earlier change.
On 06.03.2018 12:32, Christian Borntraeger wrote: > David, > > this seems to have broken VSIE für guests with >64vcpus as we only pin > the first page of the SCA. Starting guest3 with 240 vcpu triggers > quickly guest2 crashes. Can you have a look? > We only use the sca ipte lock, not the CPU entries. This sounds more like a general KVM problem.
On 06.03.2018 12:53, Christian Borntraeger wrote: > > > On 03/06/2018 12:38 PM, Christian Borntraeger wrote: >> On 03/06/2018 12:32 PM, Christian Borntraeger wrote: >>> David, >>> >>> this seems to have broken VSIE für guests with >64vcpus as we only pin >>> the first page of the SCA. Starting guest3 with 240 vcpu triggers >>> quickly guest2 crashes. Can you have a look? >>> >> >> Hmm, or maybe it was always broken and we never supported pinning of >> ESCA properly? > > 4.15.0 is also broken, must be an earlier change. > We never claimed ESCA support for L2 (because all SCA CPU entries are invalid). But I once implemented support for > 64 CPUs if all entries are unused in kvm commit a6940674c384ebf56aa0c44f417032de2b67100c Author: David Hildenbrand <dahi@linux.vnet.ibm.com> Date: Mon Aug 8 22:39:32 2016 +0200 KVM: s390: allow 255 VCPUs when sca entries aren't used If the SCA entries aren't used by the hardware (no SIGPIF), we can simply not set the entries, stick to the basic sca and allow more than 64 VCPUs. To hinder any other facility from using these entries, let's properly provoke intercepts by not setting the MCN and keeping the entries unset. This effectively allows when running KVM under KVM (vSIE) or under z/VM to provide more than 64 VCPUs to a guest. Let's limit it to 255 for now, to not run into problems if the CPU numbers are limited somewhere else. Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Maybe something was broken in that part of the code.
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 79ea444a0534..546d8b106ee0 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -38,7 +38,13 @@ struct vsie_page { struct gmap *gmap; /* 0x0220 */ /* address of the last reported fault to guest2 */ unsigned long fault_addr; /* 0x0228 */ - __u8 reserved[0x0700 - 0x0230]; /* 0x0230 */ + /* calculated guest addresses of satellite control blocks */ + gpa_t sca_gpa; /* 0x0230 */ + gpa_t itdba_gpa; /* 0x0238 */ + gpa_t gvrd_gpa; /* 0x0240 */ + gpa_t riccbd_gpa; /* 0x0248 */ + gpa_t sdnx_gpa; /* 0x0250 */ + __u8 reserved[0x0700 - 0x0258]; /* 0x0258 */ struct kvm_s390_crypto_cb crycb; /* 0x0700 */ __u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */ }; @@ -475,46 +481,42 @@ static void unpin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t hpa) /* unpin all blocks previously pinned by pin_blocks(), marking them dirty */ static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) { - struct kvm_s390_sie_block *scb_o = vsie_page->scb_o; struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s; hpa_t hpa; - gpa_t gpa; hpa = (u64) scb_s->scaoh << 32 | scb_s->scaol; if (hpa) { - gpa = scb_o->scaol & ~0xfUL; - if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_64BSCAO)) - gpa |= (u64) scb_o->scaoh << 32; - unpin_guest_page(vcpu->kvm, gpa, hpa); + unpin_guest_page(vcpu->kvm, vsie_page->sca_gpa, hpa); + vsie_page->sca_gpa = 0; scb_s->scaol = 0; scb_s->scaoh = 0; } hpa = scb_s->itdba; if (hpa) { - gpa = scb_o->itdba & ~0xffUL; - unpin_guest_page(vcpu->kvm, gpa, hpa); + unpin_guest_page(vcpu->kvm, vsie_page->itdba_gpa, hpa); + vsie_page->itdba_gpa = 0; scb_s->itdba = 0; } hpa = scb_s->gvrd; if (hpa) { - gpa = scb_o->gvrd & ~0x1ffUL; - unpin_guest_page(vcpu->kvm, gpa, hpa); + unpin_guest_page(vcpu->kvm, vsie_page->gvrd_gpa, hpa); + vsie_page->gvrd_gpa = 0; scb_s->gvrd = 0; } hpa = scb_s->riccbd; if (hpa) { - gpa = scb_o->riccbd & ~0x3fUL; - unpin_guest_page(vcpu->kvm, gpa, hpa); + unpin_guest_page(vcpu->kvm, vsie_page->riccbd_gpa, hpa); + vsie_page->riccbd_gpa = 0; scb_s->riccbd = 0; } hpa = scb_s->sdnxo; if (hpa) { - gpa = scb_o->sdnxo; - unpin_guest_page(vcpu->kvm, gpa, hpa); + unpin_guest_page(vcpu->kvm, vsie_page->sdnx_gpa, hpa); + vsie_page->sdnx_gpa = 0; scb_s->sdnxo = 0; } } @@ -559,6 +561,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) } if (rc) goto unpin; + vsie_page->sca_gpa = gpa; scb_s->scaoh = (u32)((u64)hpa >> 32); scb_s->scaol = (u32)(u64)hpa; } @@ -575,6 +578,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) rc = set_validity_icpt(scb_s, 0x0080U); goto unpin; } + vsie_page->itdba_gpa = gpa; scb_s->itdba = hpa; } @@ -593,6 +597,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) rc = set_validity_icpt(scb_s, 0x1310U); goto unpin; } + vsie_page->gvrd_gpa = gpa; scb_s->gvrd = hpa; } @@ -609,6 +614,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) goto unpin; } /* Validity 0x0044 will be checked by SIE */ + vsie_page->riccbd_gpa = gpa; scb_s->riccbd = hpa; } if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) { @@ -636,6 +642,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) rc = set_validity_icpt(scb_s, 0x10b0U); goto unpin; } + vsie_page->sdnx_gpa = gpa; scb_s->sdnxo = hpa | sdnxc; } return 0;
This way, the values cannot changed, even if another VCPU might try to mess with the nested SCB currently getting executed by another VCPU. We now always use the same gpa for pinning and unpinning a page (for unpinning, it is only relevant to mark the guest page dirty for migration). Signed-off-by: David Hildenbrand <david@redhat.com> --- arch/s390/kvm/vsie.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-)