[RFC,20/37] KVM: S390: protvirt: Introduce instruction data area bounce buffer
diff mbox series

Message ID 20191024114059.102802-21-frankja@linux.ibm.com
State New
Headers show
Series
  • KVM: s390: Add support for protected VMs
Related show

Commit Message

Janosch Frank Oct. 24, 2019, 11:40 a.m. UTC
Now that we can't access guest memory anymore, we have a dedicated
sattelite block that's a bounce buffer for instruction data.

We re-use the memop interface to copy the instruction data to / from
userspace. This lets us re-use a lot of QEMU code which used that
interface to make logical guest memory accesses which are not possible
anymore in protected mode anyway.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/include/asm/kvm_host.h |  5 ++++-
 arch/s390/kvm/kvm-s390.c         | 31 +++++++++++++++++++++++++++++++
 arch/s390/kvm/pv.c               |  9 +++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)

Comments

Thomas Huth Nov. 14, 2019, 3:36 p.m. UTC | #1
On 24/10/2019 13.40, Janosch Frank wrote:
> Now that we can't access guest memory anymore, we have a dedicated
> sattelite block that's a bounce buffer for instruction data.

"satellite block that is ..."

> We re-use the memop interface to copy the instruction data to / from
> userspace. This lets us re-use a lot of QEMU code which used that
> interface to make logical guest memory accesses which are not possible
> anymore in protected mode anyway.
> 
> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
> ---
>  arch/s390/include/asm/kvm_host.h |  5 ++++-
>  arch/s390/kvm/kvm-s390.c         | 31 +++++++++++++++++++++++++++++++
>  arch/s390/kvm/pv.c               |  9 +++++++++
>  3 files changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
> index 5deabf9734d9..2a8a1e21e1c3 100644
> --- a/arch/s390/include/asm/kvm_host.h
> +++ b/arch/s390/include/asm/kvm_host.h
> @@ -308,7 +308,10 @@ struct kvm_s390_sie_block {
>  #define CRYCB_FORMAT2 0x00000003
>  	__u32	crycbd;			/* 0x00fc */
>  	__u64	gcr[16];		/* 0x0100 */
> -	__u64	gbea;			/* 0x0180 */
> +	union {
> +		__u64	gbea;			/* 0x0180 */
> +		__u64	sidad;
> +	};
>  	__u8    reserved188[8];		/* 0x0188 */
>  	__u64   sdnxo;			/* 0x0190 */
>  	__u8    reserved198[8];		/* 0x0198 */
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 97d3a81e5074..6747cb6cf062 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -4416,6 +4416,13 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>  	if (mop->size > MEM_OP_MAX_SIZE)
>  		return -E2BIG;
>  
> +	/* Protected guests move instruction data over the satellite
> +	 * block which has its own size limit
> +	 */
> +	if (kvm_s390_pv_is_protected(vcpu->kvm) &&
> +	    mop->size > ((vcpu->arch.sie_block->sidad & 0x0f) + 1) * PAGE_SIZE)
> +		return -E2BIG;
> +
>  	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
>  		tmpbuf = vmalloc(mop->size);
>  		if (!tmpbuf)
> @@ -4427,10 +4434,22 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>  	switch (mop->op) {
>  	case KVM_S390_MEMOP_LOGICAL_READ:
>  		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
> +			if (kvm_s390_pv_is_protected(vcpu->kvm)) {
> +				r = 0;
> +				break;

Please add a short comment to the code why this is required / ok.

> +			}
>  			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
>  					    mop->size, GACC_FETCH);
>  			break;
>  		}
> +		if (kvm_s390_pv_is_protected(vcpu->kvm)) {
> +			r = 0;
> +			if (copy_to_user(uaddr, (void *)vcpu->arch.sie_block->sidad +
> +					 (mop->gaddr & ~PAGE_MASK),

That looks bogus. Couldn't userspace use mop->gaddr = 4095 and mop->size
= 4095 to read most of the page beyond the sidad page (assuming that it
is mapped, too)?
I think you have to take mop->gaddr into account in your new check at
the beginning of the function, too.

Or should the ioctl maybe even be restricted to mop->gaddr == 0 now? Is
there maybe also a way to validate that gaddr & PAGE_MASK really matches
the page that we have in sidad?

> +					 mop->size))
> +				r = -EFAULT;
> +			break;
> +		}
>  		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
>  		if (r == 0) {
>  			if (copy_to_user(uaddr, tmpbuf, mop->size))
> @@ -4439,10 +4458,22 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>  		break;
>  	case KVM_S390_MEMOP_LOGICAL_WRITE:
>  		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
> +			if (kvm_s390_pv_is_protected(vcpu->kvm)) {
> +				r = 0;
> +				break;
> +			}
>  			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
>  					    mop->size, GACC_STORE);
>  			break;
>  		}
> +		if (kvm_s390_pv_is_protected(vcpu->kvm)) {
> +			r = 0;
> +			if (copy_from_user((void *)vcpu->arch.sie_block->sidad +
> +					   (mop->gaddr & ~PAGE_MASK), uaddr,
> +					   mop->size))

dito, of course.

> +				r = -EFAULT;
> +			break;
> +		}
>  		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
>  			r = -EFAULT;
>  			break;

 Thomas
Janosch Frank Nov. 14, 2019, 4:04 p.m. UTC | #2
On 11/14/19 4:36 PM, Thomas Huth wrote:
> On 24/10/2019 13.40, Janosch Frank wrote:
>> Now that we can't access guest memory anymore, we have a dedicated
>> sattelite block that's a bounce buffer for instruction data.
> 
> "satellite block that is ..."
> 
>> We re-use the memop interface to copy the instruction data to / from
>> userspace. This lets us re-use a lot of QEMU code which used that
>> interface to make logical guest memory accesses which are not possible
>> anymore in protected mode anyway.
>>
>> Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
>> ---
>>  arch/s390/include/asm/kvm_host.h |  5 ++++-
>>  arch/s390/kvm/kvm-s390.c         | 31 +++++++++++++++++++++++++++++++
>>  arch/s390/kvm/pv.c               |  9 +++++++++
>>  3 files changed, 44 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
>> index 5deabf9734d9..2a8a1e21e1c3 100644
>> --- a/arch/s390/include/asm/kvm_host.h
>> +++ b/arch/s390/include/asm/kvm_host.h
>> @@ -308,7 +308,10 @@ struct kvm_s390_sie_block {
>>  #define CRYCB_FORMAT2 0x00000003
>>  	__u32	crycbd;			/* 0x00fc */
>>  	__u64	gcr[16];		/* 0x0100 */
>> -	__u64	gbea;			/* 0x0180 */
>> +	union {
>> +		__u64	gbea;			/* 0x0180 */
>> +		__u64	sidad;
>> +	};
>>  	__u8    reserved188[8];		/* 0x0188 */
>>  	__u64   sdnxo;			/* 0x0190 */
>>  	__u8    reserved198[8];		/* 0x0198 */
>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>> index 97d3a81e5074..6747cb6cf062 100644
>> --- a/arch/s390/kvm/kvm-s390.c
>> +++ b/arch/s390/kvm/kvm-s390.c
>> @@ -4416,6 +4416,13 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>>  	if (mop->size > MEM_OP_MAX_SIZE)
>>  		return -E2BIG;
>>  
>> +	/* Protected guests move instruction data over the satellite
>> +	 * block which has its own size limit
>> +	 */
>> +	if (kvm_s390_pv_is_protected(vcpu->kvm) &&
>> +	    mop->size > ((vcpu->arch.sie_block->sidad & 0x0f) + 1) * PAGE_SIZE)
>> +		return -E2BIG;
>> +
>>  	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
>>  		tmpbuf = vmalloc(mop->size);
>>  		if (!tmpbuf)
>> @@ -4427,10 +4434,22 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>>  	switch (mop->op) {
>>  	case KVM_S390_MEMOP_LOGICAL_READ:
>>  		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
>> +			if (kvm_s390_pv_is_protected(vcpu->kvm)) {
>> +				r = 0;
>> +				break;
> 
> Please add a short comment to the code why this is required / ok.
> 
>> +			}
>>  			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
>>  					    mop->size, GACC_FETCH);
>>  			break;
>>  		}
>> +		if (kvm_s390_pv_is_protected(vcpu->kvm)) {
>> +			r = 0;
>> +			if (copy_to_user(uaddr, (void *)vcpu->arch.sie_block->sidad +
>> +					 (mop->gaddr & ~PAGE_MASK),
> 
> That looks bogus. Couldn't userspace use mop->gaddr = 4095 and mop->size
> = 4095 to read most of the page beyond the sidad page (assuming that it
> is mapped, too)?
> I think you have to take mop->gaddr into account in your new check at
> the beginning of the function, too.

Ah, right, that needs some fixing.

> 
> Or should the ioctl maybe even be restricted to mop->gaddr == 0 now? Is
> there maybe also a way to validate that gaddr & PAGE_MASK really matches
> the page that we have in sidad?

There was one lonely usage of the ioctl where we still read from an
offset, either in IO or SCLP. Having 0 as a requirement would certainly
help, but I was a bit afraid of changing too many things in qemu.

> 
>> +					 mop->size))
>> +				r = -EFAULT;
>> +			break;
>> +		}
>>  		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
>>  		if (r == 0) {
>>  			if (copy_to_user(uaddr, tmpbuf, mop->size))
>> @@ -4439,10 +4458,22 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>>  		break;
>>  	case KVM_S390_MEMOP_LOGICAL_WRITE:
>>  		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
>> +			if (kvm_s390_pv_is_protected(vcpu->kvm)) {
>> +				r = 0;
>> +				break;
>> +			}
>>  			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
>>  					    mop->size, GACC_STORE);
>>  			break;
>>  		}
>> +		if (kvm_s390_pv_is_protected(vcpu->kvm)) {
>> +			r = 0;
>> +			if (copy_from_user((void *)vcpu->arch.sie_block->sidad +
>> +					   (mop->gaddr & ~PAGE_MASK), uaddr,
>> +					   mop->size))
> 
> dito, of course.
> 
>> +				r = -EFAULT;
>> +			break;
>> +		}
>>  		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
>>  			r = -EFAULT;
>>  			break;
> 
>  Thomas
>

Patch
diff mbox series

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 5deabf9734d9..2a8a1e21e1c3 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -308,7 +308,10 @@  struct kvm_s390_sie_block {
 #define CRYCB_FORMAT2 0x00000003
 	__u32	crycbd;			/* 0x00fc */
 	__u64	gcr[16];		/* 0x0100 */
-	__u64	gbea;			/* 0x0180 */
+	union {
+		__u64	gbea;			/* 0x0180 */
+		__u64	sidad;
+	};
 	__u8    reserved188[8];		/* 0x0188 */
 	__u64   sdnxo;			/* 0x0190 */
 	__u8    reserved198[8];		/* 0x0198 */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 97d3a81e5074..6747cb6cf062 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -4416,6 +4416,13 @@  static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
 	if (mop->size > MEM_OP_MAX_SIZE)
 		return -E2BIG;
 
+	/* Protected guests move instruction data over the satellite
+	 * block which has its own size limit
+	 */
+	if (kvm_s390_pv_is_protected(vcpu->kvm) &&
+	    mop->size > ((vcpu->arch.sie_block->sidad & 0x0f) + 1) * PAGE_SIZE)
+		return -E2BIG;
+
 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
 		tmpbuf = vmalloc(mop->size);
 		if (!tmpbuf)
@@ -4427,10 +4434,22 @@  static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
 	switch (mop->op) {
 	case KVM_S390_MEMOP_LOGICAL_READ:
 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+			if (kvm_s390_pv_is_protected(vcpu->kvm)) {
+				r = 0;
+				break;
+			}
 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
 					    mop->size, GACC_FETCH);
 			break;
 		}
+		if (kvm_s390_pv_is_protected(vcpu->kvm)) {
+			r = 0;
+			if (copy_to_user(uaddr, (void *)vcpu->arch.sie_block->sidad +
+					 (mop->gaddr & ~PAGE_MASK),
+					 mop->size))
+				r = -EFAULT;
+			break;
+		}
 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
 		if (r == 0) {
 			if (copy_to_user(uaddr, tmpbuf, mop->size))
@@ -4439,10 +4458,22 @@  static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
 		break;
 	case KVM_S390_MEMOP_LOGICAL_WRITE:
 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+			if (kvm_s390_pv_is_protected(vcpu->kvm)) {
+				r = 0;
+				break;
+			}
 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
 					    mop->size, GACC_STORE);
 			break;
 		}
+		if (kvm_s390_pv_is_protected(vcpu->kvm)) {
+			r = 0;
+			if (copy_from_user((void *)vcpu->arch.sie_block->sidad +
+					   (mop->gaddr & ~PAGE_MASK), uaddr,
+					   mop->size))
+				r = -EFAULT;
+			break;
+		}
 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
 			r = -EFAULT;
 			break;
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 383e660e2221..be7d558ab897 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -119,6 +119,7 @@  int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu)
 
 	free_pages(vcpu->arch.pv.stor_base,
 		   get_order(uv_info.guest_cpu_stor_len));
+	free_page(vcpu->arch.sie_block->sidad);
 	/* Clear cpu and vm handle */
 	memset(&vcpu->arch.sie_block->reserved10, 0,
 	       sizeof(vcpu->arch.sie_block->reserved10));
@@ -150,6 +151,14 @@  int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu)
 	uvcb.state_origin = (u64)vcpu->arch.sie_block;
 	uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
 
+	/* Alloc Secure Instruction Data Area Designation */
+	vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL | __GFP_ZERO);
+	if (!vcpu->arch.sie_block->sidad) {
+		free_pages(vcpu->arch.pv.stor_base,
+			   get_order(uv_info.guest_cpu_stor_len));
+		return -ENOMEM;
+	}
+
 	rc = uv_call(0, (u64)&uvcb);
 	VCPU_EVENT(vcpu, 3, "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
 		   vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,