diff mbox series

[v2,20/42] KVM: S390: protvirt: Introduce instruction data area bounce buffer

Message ID 20200214222658.12946-21-borntraeger@de.ibm.com (mailing list archive)
State New, archived
Headers show
Series KVM: s390: Add support for protected VMs | expand

Commit Message

Christian Borntraeger Feb. 14, 2020, 10:26 p.m. UTC
From: Janosch Frank <frankja@linux.ibm.com>

Now that we can't access guest memory anymore, we have a dedicated
satellite block that's a bounce buffer for instruction data.

We re-use the memop interface to copy the instruction data to / from
userspace. This lets us re-use a lot of QEMU code which used that
interface to make logical guest memory accesses which are not possible
anymore in protected mode anyway.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
[borntraeger@de.ibm.com: patch merging, splitting, fixing]
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h | 11 ++++++++-
 arch/s390/kvm/kvm-s390.c         | 42 ++++++++++++++++++++++++++++++++
 arch/s390/kvm/pv.c               |  9 +++++++
 include/uapi/linux/kvm.h         |  6 ++++-
 4 files changed, 66 insertions(+), 2 deletions(-)

Comments

David Hildenbrand Feb. 17, 2020, 11:08 a.m. UTC | #1
> @@ -4460,6 +4489,10 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>  
>  	switch (mop->op) {
>  	case KVM_S390_MEMOP_LOGICAL_READ:
> +		if (kvm_s390_pv_is_protected(vcpu->kvm)) {
> +			r = -EINVAL;
> +			break;
> +		}

Could we have a possible race with disabling code, especially while
concurrently freeing? (sorry if I ask again, there was just a flood of
emails)

>  		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
>  			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
>  					    mop->size, GACC_FETCH);
> @@ -4472,6 +4505,10 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>  		}
>  		break;
>  	case KVM_S390_MEMOP_LOGICAL_WRITE:
> +		if (kvm_s390_pv_is_protected(vcpu->kvm)) {
> +			r = -EINVAL;
> +			break;
> +		}

dito

>  		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
>  			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
>  					    mop->size, GACC_STORE);
> @@ -4483,6 +4520,11 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>  		}
>  		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
>  		break;
> +	case KVM_S390_MEMOP_SIDA_READ:
> +	case KVM_S390_MEMOP_SIDA_WRITE:
> +		/* we are locked against sida going away by the vcpu->mutex */
> +		r = kvm_s390_guest_sida_op(vcpu, mop);
> +		break;
>  	default:
>  		r = -EINVAL;
>  	}
> diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
> index 09573e36c329..80169a9b43ec 100644
> --- a/arch/s390/kvm/pv.c
> +++ b/arch/s390/kvm/pv.c
> @@ -92,6 +92,7 @@ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
>  
>  	free_pages(vcpu->arch.pv.stor_base,
>  		   get_order(uv_info.guest_cpu_stor_len));
> +	free_page(sida_origin(vcpu->arch.sie_block));
>  	vcpu->arch.sie_block->pv_handle_cpu = 0;
>  	vcpu->arch.sie_block->pv_handle_config = 0;
>  	memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
> @@ -121,6 +122,14 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
>  	uvcb.state_origin = (u64)vcpu->arch.sie_block;
>  	uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
>  
> +	/* Alloc Secure Instruction Data Area Designation */
> +	vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL | __GFP_ZERO);
> +	if (!vcpu->arch.sie_block->sidad) {
> +		free_pages(vcpu->arch.pv.stor_base,
> +			   get_order(uv_info.guest_cpu_stor_len));
> +		return -ENOMEM;
> +	}
> +
>  	cc = uv_call(0, (u64)&uvcb);
>  	*rc = uvcb.header.rc;
>  	*rrc = uvcb.header.rrc;
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 207915488502..0fdee1bc3798 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -475,11 +475,15 @@ struct kvm_s390_mem_op {
>  	__u32 op;		/* type of operation */
>  	__u64 buf;		/* buffer in userspace */
>  	__u8 ar;		/* the access register number */
> -	__u8 reserved[31];	/* should be set to 0 */
> +	__u8 reserved21[3];	/* should be set to 0 */
> +	__u32 sida_offset;	/* offset into the sida */
> +	__u8 reserved28[24];	/* should be set to 0 */
>  };

As discussed, I'd prefer an overlaying layout for the sida, as the ar
does not make any sense (correct me if I'm wrong :) )

__u32 op;		/* type of operation */
__u64 buf;		/* buffer in userspace */
uinon {
	__u8 ar;		/* the access register number */
	__u32 sida_offset;	/* offset into the sida */
	__u8 reserved[32];	/* should be set to 0 */
};

With something like that

Reviewed-by: David Hildenbrand <david@redhat.com>
Janosch Frank Feb. 17, 2020, 2:47 p.m. UTC | #2
On 2/17/20 12:08 PM, David Hildenbrand wrote:
>> @@ -4460,6 +4489,10 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>>  
>>  	switch (mop->op) {
>>  	case KVM_S390_MEMOP_LOGICAL_READ:
>> +		if (kvm_s390_pv_is_protected(vcpu->kvm)) {
>> +			r = -EINVAL;
>> +			break;
>> +		}
> 
> Could we have a possible race with disabling code, especially while
> concurrently freeing? (sorry if I ask again, there was just a flood of
> emails)
> 
>>  		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
>>  			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
>>  					    mop->size, GACC_FETCH);
>> @@ -4472,6 +4505,10 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>>  		}
>>  		break;
>>  	case KVM_S390_MEMOP_LOGICAL_WRITE:
>> +		if (kvm_s390_pv_is_protected(vcpu->kvm)) {
>> +			r = -EINVAL;
>> +			break;
>> +		}
> 
> dito
> 
>>  		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
>>  			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
>>  					    mop->size, GACC_STORE);
>> @@ -4483,6 +4520,11 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>>  		}
>>  		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
>>  		break;
>> +	case KVM_S390_MEMOP_SIDA_READ:
>> +	case KVM_S390_MEMOP_SIDA_WRITE:
>> +		/* we are locked against sida going away by the vcpu->mutex */
>> +		r = kvm_s390_guest_sida_op(vcpu, mop);
>> +		break;
>>  	default:
>>  		r = -EINVAL;
>>  	}
>> diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
>> index 09573e36c329..80169a9b43ec 100644
>> --- a/arch/s390/kvm/pv.c
>> +++ b/arch/s390/kvm/pv.c
>> @@ -92,6 +92,7 @@ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
>>  
>>  	free_pages(vcpu->arch.pv.stor_base,
>>  		   get_order(uv_info.guest_cpu_stor_len));
>> +	free_page(sida_origin(vcpu->arch.sie_block));
>>  	vcpu->arch.sie_block->pv_handle_cpu = 0;
>>  	vcpu->arch.sie_block->pv_handle_config = 0;
>>  	memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
>> @@ -121,6 +122,14 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
>>  	uvcb.state_origin = (u64)vcpu->arch.sie_block;
>>  	uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
>>  
>> +	/* Alloc Secure Instruction Data Area Designation */
>> +	vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL | __GFP_ZERO);
>> +	if (!vcpu->arch.sie_block->sidad) {
>> +		free_pages(vcpu->arch.pv.stor_base,
>> +			   get_order(uv_info.guest_cpu_stor_len));
>> +		return -ENOMEM;
>> +	}
>> +
>>  	cc = uv_call(0, (u64)&uvcb);
>>  	*rc = uvcb.header.rc;
>>  	*rrc = uvcb.header.rrc;
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index 207915488502..0fdee1bc3798 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -475,11 +475,15 @@ struct kvm_s390_mem_op {
>>  	__u32 op;		/* type of operation */
>>  	__u64 buf;		/* buffer in userspace */
>>  	__u8 ar;		/* the access register number */
>> -	__u8 reserved[31];	/* should be set to 0 */
>> +	__u8 reserved21[3];	/* should be set to 0 */
>> +	__u32 sida_offset;	/* offset into the sida */
>> +	__u8 reserved28[24];	/* should be set to 0 */
>>  };
> 
> As discussed, I'd prefer an overlaying layout for the sida, as the ar
> does not make any sense (correct me if I'm wrong :) )

That wouldn't work, because we still check mop->ar < 16 in
kvm_s390_guest_mem_op(). Also we currently check mop contents twice
because we overload mem_op() with the SIDA operations.

Using a separate IOCTL is cleaner...

> 
> __u32 op;		/* type of operation */
> __u64 buf;		/* buffer in userspace */
> uinon {
> 	__u8 ar;		/* the access register number */
> 	__u32 sida_offset;	/* offset into the sida */
> 	__u8 reserved[32];	/* should be set to 0 */
> };
> 
> With something like that
> 
> Reviewed-by: David Hildenbrand <david@redhat.com>
Christian Borntraeger Feb. 17, 2020, 3 p.m. UTC | #3
On 17.02.20 15:47, Janosch Frank wrote:
> On 2/17/20 12:08 PM, David Hildenbrand wrote:
>>> @@ -4460,6 +4489,10 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>>>  
>>>  	switch (mop->op) {
>>>  	case KVM_S390_MEMOP_LOGICAL_READ:
>>> +		if (kvm_s390_pv_is_protected(vcpu->kvm)) {
>>> +			r = -EINVAL;
>>> +			break;
>>> +		}
>>
>> Could we have a possible race with disabling code, especially while
>> concurrently freeing? (sorry if I ask again, there was just a flood of
>> emails)

see my other reply. Hopefully fixed soon.[...]

>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>>> index 207915488502..0fdee1bc3798 100644
>>> --- a/include/uapi/linux/kvm.h
>>> +++ b/include/uapi/linux/kvm.h
>>> @@ -475,11 +475,15 @@ struct kvm_s390_mem_op {
>>>  	__u32 op;		/* type of operation */
>>>  	__u64 buf;		/* buffer in userspace */
>>>  	__u8 ar;		/* the access register number */
>>> -	__u8 reserved[31];	/* should be set to 0 */
>>> +	__u8 reserved21[3];	/* should be set to 0 */
>>> +	__u32 sida_offset;	/* offset into the sida */
>>> +	__u8 reserved28[24];	/* should be set to 0 */
>>>  };
>>
>> As discussed, I'd prefer an overlaying layout for the sida, as the ar
>> does not make any sense (correct me if I'm wrong :) )
> 
> That wouldn't work, because we still check mop->ar < 16 in
> kvm_s390_guest_mem_op(). Also we currently check mop contents twice
> because we overload mem_op() with the SIDA operations.
> 
> Using a separate IOCTL is cleaner...

I would rather use the current patch instead of adding a new ioctl.
Janosch Frank Feb. 17, 2020, 3:38 p.m. UTC | #4
On 2/17/20 4:00 PM, Christian Borntraeger wrote:
> 
> 
> On 17.02.20 15:47, Janosch Frank wrote:
>> On 2/17/20 12:08 PM, David Hildenbrand wrote:
>>>> @@ -4460,6 +4489,10 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>>>>  
>>>>  	switch (mop->op) {
>>>>  	case KVM_S390_MEMOP_LOGICAL_READ:
>>>> +		if (kvm_s390_pv_is_protected(vcpu->kvm)) {
>>>> +			r = -EINVAL;
>>>> +			break;
>>>> +		}
>>>
>>> Could we have a possible race with disabling code, especially while
>>> concurrently freeing? (sorry if I ask again, there was just a flood of
>>> emails)
> 
> see my other reply. Hopefully fixed soon.[...]
> 
>>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>>>> index 207915488502..0fdee1bc3798 100644
>>>> --- a/include/uapi/linux/kvm.h
>>>> +++ b/include/uapi/linux/kvm.h
>>>> @@ -475,11 +475,15 @@ struct kvm_s390_mem_op {
>>>>  	__u32 op;		/* type of operation */
>>>>  	__u64 buf;		/* buffer in userspace */
>>>>  	__u8 ar;		/* the access register number */
>>>> -	__u8 reserved[31];	/* should be set to 0 */
>>>> +	__u8 reserved21[3];	/* should be set to 0 */
>>>> +	__u32 sida_offset;	/* offset into the sida */
>>>> +	__u8 reserved28[24];	/* should be set to 0 */
>>>>  };
>>>
>>> As discussed, I'd prefer an overlaying layout for the sida, as the ar
>>> does not make any sense (correct me if I'm wrong :) )
>>
>> That wouldn't work, because we still check mop->ar < 16 in
>> kvm_s390_guest_mem_op(). Also we currently check mop contents twice
>> because we overload mem_op() with the SIDA operations.
>>
>> Using a separate IOCTL is cleaner...
> 
> I would rather use the current patch instead of adding a new ioctl.
> 

Well, then I'd suggest moving the normal memop ops into an own function
and also move the ar check into there.
Christian Borntraeger Feb. 17, 2020, 4:58 p.m. UTC | #5
On 17.02.20 16:38, Janosch Frank wrote:

>> I would rather use the current patch instead of adding a new ioctl.
>>
> 
> Well, then I'd suggest moving the normal memop ops into an own function
> and also move the ar check into there.

Yes, I will refactor that and change the kvm_s390_mem_op structure.
diff mbox series

Patch

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 4fcbb055a565..aa945b101fff 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -127,6 +127,12 @@  struct mcck_volatile_info {
 #define CR14_INITIAL_MASK (CR14_UNUSED_32 | CR14_UNUSED_33 | \
 			   CR14_EXTERNAL_DAMAGE_SUBMASK)
 
+#define SIDAD_SIZE_MASK		0xff
+#define sida_origin(sie_block) \
+	((sie_block)->sidad & PAGE_MASK)
+#define sida_size(sie_block) \
+	((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE)
+
 #define CPUSTAT_STOPPED    0x80000000
 #define CPUSTAT_WAIT       0x10000000
 #define CPUSTAT_ECALL_PEND 0x08000000
@@ -315,7 +321,10 @@  struct kvm_s390_sie_block {
 #define CRYCB_FORMAT2 0x00000003
 	__u32	crycbd;			/* 0x00fc */
 	__u64	gcr[16];		/* 0x0100 */
-	__u64	gbea;			/* 0x0180 */
+	union {
+		__u64	gbea;		/* 0x0180 */
+		__u64	sidad;
+	};
 	__u8    reserved188[8];		/* 0x0188 */
 	__u64   sdnxo;			/* 0x0190 */
 	__u8    reserved198[8];		/* 0x0198 */
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6ebb0dae5a2e..8db82aaf1275 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -4435,6 +4435,34 @@  static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 	return r;
 }
 
+static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu,
+				   struct kvm_s390_mem_op *mop)
+{
+	void __user *uaddr = (void __user *)mop->buf;
+	int r = 0;
+
+	if (mop->flags || !mop->size)
+		return -EINVAL;
+	if (mop->size + mop->sida_offset < mop->size)
+		return -EINVAL;
+	if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block))
+		return -E2BIG;
+
+	switch (mop->op) {
+	case KVM_S390_MEMOP_SIDA_READ:
+		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
+				 mop->sida_offset), mop->size))
+			r = -EFAULT;
+
+		break;
+	case KVM_S390_MEMOP_SIDA_WRITE:
+		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
+				   mop->sida_offset), uaddr, mop->size))
+			r = -EFAULT;
+		break;
+	}
+	return r;
+}
 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
 				  struct kvm_s390_mem_op *mop)
 {
@@ -4444,6 +4472,7 @@  static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
 
+	BUILD_BUG_ON(sizeof(*mop) != 64);
 	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
 		return -EINVAL;
 
@@ -4460,6 +4489,10 @@  static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
 
 	switch (mop->op) {
 	case KVM_S390_MEMOP_LOGICAL_READ:
+		if (kvm_s390_pv_is_protected(vcpu->kvm)) {
+			r = -EINVAL;
+			break;
+		}
 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
 					    mop->size, GACC_FETCH);
@@ -4472,6 +4505,10 @@  static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
 		}
 		break;
 	case KVM_S390_MEMOP_LOGICAL_WRITE:
+		if (kvm_s390_pv_is_protected(vcpu->kvm)) {
+			r = -EINVAL;
+			break;
+		}
 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
 					    mop->size, GACC_STORE);
@@ -4483,6 +4520,11 @@  static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
 		}
 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
 		break;
+	case KVM_S390_MEMOP_SIDA_READ:
+	case KVM_S390_MEMOP_SIDA_WRITE:
+		/* we are locked against sida going away by the vcpu->mutex */
+		r = kvm_s390_guest_sida_op(vcpu, mop);
+		break;
 	default:
 		r = -EINVAL;
 	}
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 09573e36c329..80169a9b43ec 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -92,6 +92,7 @@  int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
 
 	free_pages(vcpu->arch.pv.stor_base,
 		   get_order(uv_info.guest_cpu_stor_len));
+	free_page(sida_origin(vcpu->arch.sie_block));
 	vcpu->arch.sie_block->pv_handle_cpu = 0;
 	vcpu->arch.sie_block->pv_handle_config = 0;
 	memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
@@ -121,6 +122,14 @@  int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
 	uvcb.state_origin = (u64)vcpu->arch.sie_block;
 	uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
 
+	/* Alloc Secure Instruction Data Area Designation */
+	vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL | __GFP_ZERO);
+	if (!vcpu->arch.sie_block->sidad) {
+		free_pages(vcpu->arch.pv.stor_base,
+			   get_order(uv_info.guest_cpu_stor_len));
+		return -ENOMEM;
+	}
+
 	cc = uv_call(0, (u64)&uvcb);
 	*rc = uvcb.header.rc;
 	*rrc = uvcb.header.rrc;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 207915488502..0fdee1bc3798 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -475,11 +475,15 @@  struct kvm_s390_mem_op {
 	__u32 op;		/* type of operation */
 	__u64 buf;		/* buffer in userspace */
 	__u8 ar;		/* the access register number */
-	__u8 reserved[31];	/* should be set to 0 */
+	__u8 reserved21[3];	/* should be set to 0 */
+	__u32 sida_offset;	/* offset into the sida */
+	__u8 reserved28[24];	/* should be set to 0 */
 };
 /* types for kvm_s390_mem_op->op */
 #define KVM_S390_MEMOP_LOGICAL_READ	0
 #define KVM_S390_MEMOP_LOGICAL_WRITE	1
+#define KVM_S390_MEMOP_SIDA_READ	2
+#define KVM_S390_MEMOP_SIDA_WRITE	3
 /* flags for kvm_s390_mem_op->flags */
 #define KVM_S390_MEMOP_F_CHECK_ONLY		(1ULL << 0)
 #define KVM_S390_MEMOP_F_INJECT_EXCEPTION	(1ULL << 1)