Message ID | 20191024114059.102802-21-frankja@linux.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: s390: Add support for protected VMs | expand |
On 24/10/2019 13.40, Janosch Frank wrote: > Now that we can't access guest memory anymore, we have a dedicated > sattelite block that's a bounce buffer for instruction data. "satellite block that is ..." > We re-use the memop interface to copy the instruction data to / from > userspace. This lets us re-use a lot of QEMU code which used that > interface to make logical guest memory accesses which are not possible > anymore in protected mode anyway. > > Signed-off-by: Janosch Frank <frankja@linux.ibm.com> > --- > arch/s390/include/asm/kvm_host.h | 5 ++++- > arch/s390/kvm/kvm-s390.c | 31 +++++++++++++++++++++++++++++++ > arch/s390/kvm/pv.c | 9 +++++++++ > 3 files changed, 44 insertions(+), 1 deletion(-) > > diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h > index 5deabf9734d9..2a8a1e21e1c3 100644 > --- a/arch/s390/include/asm/kvm_host.h > +++ b/arch/s390/include/asm/kvm_host.h > @@ -308,7 +308,10 @@ struct kvm_s390_sie_block { > #define CRYCB_FORMAT2 0x00000003 > __u32 crycbd; /* 0x00fc */ > __u64 gcr[16]; /* 0x0100 */ > - __u64 gbea; /* 0x0180 */ > + union { > + __u64 gbea; /* 0x0180 */ > + __u64 sidad; > + }; > __u8 reserved188[8]; /* 0x0188 */ > __u64 sdnxo; /* 0x0190 */ > __u8 reserved198[8]; /* 0x0198 */ > diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c > index 97d3a81e5074..6747cb6cf062 100644 > --- a/arch/s390/kvm/kvm-s390.c > +++ b/arch/s390/kvm/kvm-s390.c > @@ -4416,6 +4416,13 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, > if (mop->size > MEM_OP_MAX_SIZE) > return -E2BIG; > > + /* Protected guests move instruction data over the satellite > + * block which has its own size limit > + */ > + if (kvm_s390_pv_is_protected(vcpu->kvm) && > + mop->size > ((vcpu->arch.sie_block->sidad & 0x0f) + 1) * PAGE_SIZE) > + return -E2BIG; > + > if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { > tmpbuf = vmalloc(mop->size); > if (!tmpbuf) > @@ -4427,10 +4434,22 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, > switch (mop->op) { > case KVM_S390_MEMOP_LOGICAL_READ: > if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { > + if (kvm_s390_pv_is_protected(vcpu->kvm)) { > + r = 0; > + break; Please add a short comment to the code why this is required / ok. > + } > r = check_gva_range(vcpu, mop->gaddr, mop->ar, > mop->size, GACC_FETCH); > break; > } > + if (kvm_s390_pv_is_protected(vcpu->kvm)) { > + r = 0; > + if (copy_to_user(uaddr, (void *)vcpu->arch.sie_block->sidad + > + (mop->gaddr & ~PAGE_MASK), That looks bogus. Couldn't userspace use mop->gaddr = 4095 and mop->size = 4095 to read most of the page beyond the sidad page (assuming that it is mapped, too)? I think you have to take mop->gaddr into account in your new check at the beginning of the function, too. Or should the ioctl maybe even be restricted to mop->gaddr == 0 now? Is there maybe also a way to validate that gaddr & PAGE_MASK really matches the page that we have in sidad? > + mop->size)) > + r = -EFAULT; > + break; > + } > r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); > if (r == 0) { > if (copy_to_user(uaddr, tmpbuf, mop->size)) > @@ -4439,10 +4458,22 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, > break; > case KVM_S390_MEMOP_LOGICAL_WRITE: > if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { > + if (kvm_s390_pv_is_protected(vcpu->kvm)) { > + r = 0; > + break; > + } > r = check_gva_range(vcpu, mop->gaddr, mop->ar, > mop->size, GACC_STORE); > break; > } > + if (kvm_s390_pv_is_protected(vcpu->kvm)) { > + r = 0; > + if (copy_from_user((void *)vcpu->arch.sie_block->sidad + > + (mop->gaddr & ~PAGE_MASK), uaddr, > + mop->size)) dito, of course. > + r = -EFAULT; > + break; > + } > if (copy_from_user(tmpbuf, uaddr, mop->size)) { > r = -EFAULT; > break; Thomas
On 11/14/19 4:36 PM, Thomas Huth wrote: > On 24/10/2019 13.40, Janosch Frank wrote: >> Now that we can't access guest memory anymore, we have a dedicated >> sattelite block that's a bounce buffer for instruction data. > > "satellite block that is ..." > >> We re-use the memop interface to copy the instruction data to / from >> userspace. This lets us re-use a lot of QEMU code which used that >> interface to make logical guest memory accesses which are not possible >> anymore in protected mode anyway. >> >> Signed-off-by: Janosch Frank <frankja@linux.ibm.com> >> --- >> arch/s390/include/asm/kvm_host.h | 5 ++++- >> arch/s390/kvm/kvm-s390.c | 31 +++++++++++++++++++++++++++++++ >> arch/s390/kvm/pv.c | 9 +++++++++ >> 3 files changed, 44 insertions(+), 1 deletion(-) >> >> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h >> index 5deabf9734d9..2a8a1e21e1c3 100644 >> --- a/arch/s390/include/asm/kvm_host.h >> +++ b/arch/s390/include/asm/kvm_host.h >> @@ -308,7 +308,10 @@ struct kvm_s390_sie_block { >> #define CRYCB_FORMAT2 0x00000003 >> __u32 crycbd; /* 0x00fc */ >> __u64 gcr[16]; /* 0x0100 */ >> - __u64 gbea; /* 0x0180 */ >> + union { >> + __u64 gbea; /* 0x0180 */ >> + __u64 sidad; >> + }; >> __u8 reserved188[8]; /* 0x0188 */ >> __u64 sdnxo; /* 0x0190 */ >> __u8 reserved198[8]; /* 0x0198 */ >> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c >> index 97d3a81e5074..6747cb6cf062 100644 >> --- a/arch/s390/kvm/kvm-s390.c >> +++ b/arch/s390/kvm/kvm-s390.c >> @@ -4416,6 +4416,13 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, >> if (mop->size > MEM_OP_MAX_SIZE) >> return -E2BIG; >> >> + /* Protected guests move instruction data over the satellite >> + * block which has its own size limit >> + */ >> + if (kvm_s390_pv_is_protected(vcpu->kvm) && >> + mop->size > ((vcpu->arch.sie_block->sidad & 0x0f) + 1) * PAGE_SIZE) >> + return -E2BIG; >> + >> if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { >> tmpbuf = vmalloc(mop->size); >> if (!tmpbuf) >> @@ -4427,10 +4434,22 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, >> switch (mop->op) { >> case KVM_S390_MEMOP_LOGICAL_READ: >> if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { >> + if (kvm_s390_pv_is_protected(vcpu->kvm)) { >> + r = 0; >> + break; > > Please add a short comment to the code why this is required / ok. > >> + } >> r = check_gva_range(vcpu, mop->gaddr, mop->ar, >> mop->size, GACC_FETCH); >> break; >> } >> + if (kvm_s390_pv_is_protected(vcpu->kvm)) { >> + r = 0; >> + if (copy_to_user(uaddr, (void *)vcpu->arch.sie_block->sidad + >> + (mop->gaddr & ~PAGE_MASK), > > That looks bogus. Couldn't userspace use mop->gaddr = 4095 and mop->size > = 4095 to read most of the page beyond the sidad page (assuming that it > is mapped, too)? > I think you have to take mop->gaddr into account in your new check at > the beginning of the function, too. Ah, right, that needs some fixing. > > Or should the ioctl maybe even be restricted to mop->gaddr == 0 now? Is > there maybe also a way to validate that gaddr & PAGE_MASK really matches > the page that we have in sidad? There was one lonely usage of the ioctl where we still read from an offset, either in IO or SCLP. Having 0 as a requirement would certainly help, but I was a bit afraid of changing too many things in qemu. > >> + mop->size)) >> + r = -EFAULT; >> + break; >> + } >> r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); >> if (r == 0) { >> if (copy_to_user(uaddr, tmpbuf, mop->size)) >> @@ -4439,10 +4458,22 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, >> break; >> case KVM_S390_MEMOP_LOGICAL_WRITE: >> if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { >> + if (kvm_s390_pv_is_protected(vcpu->kvm)) { >> + r = 0; >> + break; >> + } >> r = check_gva_range(vcpu, mop->gaddr, mop->ar, >> mop->size, GACC_STORE); >> break; >> } >> + if (kvm_s390_pv_is_protected(vcpu->kvm)) { >> + r = 0; >> + if (copy_from_user((void *)vcpu->arch.sie_block->sidad + >> + (mop->gaddr & ~PAGE_MASK), uaddr, >> + mop->size)) > > dito, of course. > >> + r = -EFAULT; >> + break; >> + } >> if (copy_from_user(tmpbuf, uaddr, mop->size)) { >> r = -EFAULT; >> break; > > Thomas >
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 5deabf9734d9..2a8a1e21e1c3 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -308,7 +308,10 @@ struct kvm_s390_sie_block { #define CRYCB_FORMAT2 0x00000003 __u32 crycbd; /* 0x00fc */ __u64 gcr[16]; /* 0x0100 */ - __u64 gbea; /* 0x0180 */ + union { + __u64 gbea; /* 0x0180 */ + __u64 sidad; + }; __u8 reserved188[8]; /* 0x0188 */ __u64 sdnxo; /* 0x0190 */ __u8 reserved198[8]; /* 0x0198 */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 97d3a81e5074..6747cb6cf062 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -4416,6 +4416,13 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, if (mop->size > MEM_OP_MAX_SIZE) return -E2BIG; + /* Protected guests move instruction data over the satellite + * block which has its own size limit + */ + if (kvm_s390_pv_is_protected(vcpu->kvm) && + mop->size > ((vcpu->arch.sie_block->sidad & 0x0f) + 1) * PAGE_SIZE) + return -E2BIG; + if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { tmpbuf = vmalloc(mop->size); if (!tmpbuf) @@ -4427,10 +4434,22 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, switch (mop->op) { case KVM_S390_MEMOP_LOGICAL_READ: if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + if (kvm_s390_pv_is_protected(vcpu->kvm)) { + r = 0; + break; + } r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, GACC_FETCH); break; } + if (kvm_s390_pv_is_protected(vcpu->kvm)) { + r = 0; + if (copy_to_user(uaddr, (void *)vcpu->arch.sie_block->sidad + + (mop->gaddr & ~PAGE_MASK), + mop->size)) + r = -EFAULT; + break; + } r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); if (r == 0) { if (copy_to_user(uaddr, tmpbuf, mop->size)) @@ -4439,10 +4458,22 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, break; case KVM_S390_MEMOP_LOGICAL_WRITE: if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + if (kvm_s390_pv_is_protected(vcpu->kvm)) { + r = 0; + break; + } r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, GACC_STORE); break; } + if (kvm_s390_pv_is_protected(vcpu->kvm)) { + r = 0; + if (copy_from_user((void *)vcpu->arch.sie_block->sidad + + (mop->gaddr & ~PAGE_MASK), uaddr, + mop->size)) + r = -EFAULT; + break; + } if (copy_from_user(tmpbuf, uaddr, mop->size)) { r = -EFAULT; break; diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 383e660e2221..be7d558ab897 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -119,6 +119,7 @@ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu) free_pages(vcpu->arch.pv.stor_base, get_order(uv_info.guest_cpu_stor_len)); + free_page(vcpu->arch.sie_block->sidad); /* Clear cpu and vm handle */ memset(&vcpu->arch.sie_block->reserved10, 0, sizeof(vcpu->arch.sie_block->reserved10)); @@ -150,6 +151,14 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu) uvcb.state_origin = (u64)vcpu->arch.sie_block; uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base; + /* Alloc Secure Instruction Data Area Designation */ + vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL | __GFP_ZERO); + if (!vcpu->arch.sie_block->sidad) { + free_pages(vcpu->arch.pv.stor_base, + get_order(uv_info.guest_cpu_stor_len)); + return -ENOMEM; + } + rc = uv_call(0, (u64)&uvcb); VCPU_EVENT(vcpu, 3, "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x", vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
Now that we can't access guest memory anymore, we have a dedicated sattelite block that's a bounce buffer for instruction data. We re-use the memop interface to copy the instruction data to / from userspace. This lets us re-use a lot of QEMU code which used that interface to make logical guest memory accesses which are not possible anymore in protected mode anyway. Signed-off-by: Janosch Frank <frankja@linux.ibm.com> --- arch/s390/include/asm/kvm_host.h | 5 ++++- arch/s390/kvm/kvm-s390.c | 31 +++++++++++++++++++++++++++++++ arch/s390/kvm/pv.c | 9 +++++++++ 3 files changed, 44 insertions(+), 1 deletion(-)