diff mbox series

[v3,09/37] KVM: s390: protvirt: Add initial vm and cpu lifecycle handling

Message ID 20200220104020.5343-10-borntraeger@de.ibm.com (mailing list archive)
State New, archived
Headers show
Series KVM: s390: Add support for protected VMs | expand

Commit Message

Christian Borntraeger Feb. 20, 2020, 10:39 a.m. UTC
From: Janosch Frank <frankja@linux.ibm.com>

This contains 3 main changes:
1. changes in SIE control block handling for secure guests
2. helper functions for create/destroy/unpack secure guests
3. KVM_S390_PV_COMMAND ioctl to allow userspace dealing with secure
machines

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
[borntraeger@de.ibm.com: patch merging, splitting, fixing]
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/kvm_host.h |  24 ++-
 arch/s390/include/asm/uv.h       |  69 +++++++++
 arch/s390/kvm/Makefile           |   2 +-
 arch/s390/kvm/kvm-s390.c         | 188 ++++++++++++++++++++++-
 arch/s390/kvm/kvm-s390.h         |  35 +++++
 arch/s390/kvm/pv.c               | 256 +++++++++++++++++++++++++++++++
 include/uapi/linux/kvm.h         |  31 ++++
 7 files changed, 601 insertions(+), 4 deletions(-)
 create mode 100644 arch/s390/kvm/pv.c

Comments

David Hildenbrand Feb. 20, 2020, 1:02 p.m. UTC | #1
> +static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
> +{
> +	int r = 0;
> +	u16 dummy;
> +	void __user *argp = (void __user *)cmd->data;
> +
> +	switch (cmd->cmd) {
> +	case KVM_PV_ENABLE: {
> +		r = -EINVAL;
> +		if (kvm_s390_pv_is_protected(kvm))
> +			break;
> +
> +		r = kvm_s390_pv_alloc_vm(kvm);
> +		if (r)
> +			break;
> +
> +		/* FMT 4 SIE needs esca */
> +		r = sca_switch_to_extended(kvm);
> +		if (r) {
> +			kvm_s390_pv_dealloc_vm(kvm);
> +			kvm_s390_vcpu_unblock_all(kvm);

You forgot to remove that.

> +			mutex_unlock(&kvm->lock);

That's certainly wrong as well.

> +			break;
> +		}
> +		r = kvm_s390_pv_create_vm(kvm, &cmd->rc, &cmd->rrc);
> +		if (!r)
> +			r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
> +		if (r)
> +			kvm_s390_pv_destroy_vm(kvm, &dummy, &dummy);

Should there be a kvm_s390_pv_dealloc_vm() as well?

> +
> +		break;
> +	}
> +	case KVM_PV_DISABLE: {
> +		r = -EINVAL;
> +		if (!kvm_s390_pv_is_protected(kvm))
> +			break;
> +
> +		kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
> +		r = kvm_s390_pv_destroy_vm(kvm, &cmd->rc, &cmd->rrc);
> +		if (!r)
> +			kvm_s390_pv_dealloc_vm(kvm);

Hm, if destroy fails, the CPUs would already have been removed.

Is there a way to make kvm_s390_pv_destroy_vm() never fail? The return
value is always ignored except here ... which looks wrong.

> +		break;
> +	}

[...]

> @@ -2558,10 +2724,21 @@ static void kvm_free_vcpus(struct kvm *kvm)
>  
>  void kvm_arch_destroy_vm(struct kvm *kvm)
>  {
> +	u16 rc, rrc;
>  	kvm_free_vcpus(kvm);
>  	sca_dispose(kvm);
> -	debug_unregister(kvm->arch.dbf);
>  	kvm_s390_gisa_destroy(kvm);
> +	/*
> +	 * We are already at the end of life and kvm->lock is not taken.
> +	 * This is ok as the file descriptor is closed by now and nobody
> +	 * can mess with the pv state. To avoid lockdep_assert_held from
> +	 * complaining we do not use kvm_s390_pv_is_protected.
> +	 */
> +	if (kvm_s390_pv_get_handle(kvm)) {

I'd prefer something like kvm_s390_pv_is_protected_unlocked(), but I
guess for these few use cases, this is fine.


> +		kvm_s390_pv_destroy_vm(kvm, &rc, &rrc);
> +		kvm_s390_pv_dealloc_vm(kvm);
> +	}
> +	debug_unregister(kvm->arch.dbf);
>  	free_page((unsigned long)kvm->arch.sie_page2);
>  	if (!kvm_is_ucontrol(kvm))
>  		gmap_remove(kvm->arch.gmap);
> @@ -2657,6 +2834,9 @@ static int sca_switch_to_extended(struct kvm *kvm)
>  	unsigned int vcpu_idx;
>  	u32 scaol, scaoh;
>  
> +	if (kvm->arch.use_esca)
> +		return 0;
> +
>  	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
>  	if (!new_sca)
>  		return -ENOMEM;
> @@ -2908,6 +3088,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
>  static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
>  {
>  	int rc = 0;
> +	u16 uvrc, uvrrc;
>  
>  	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
>  						    CPUSTAT_SM |
> @@ -2975,6 +3156,11 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
>  
>  	kvm_s390_vcpu_crypto_setup(vcpu);
>  
> +	mutex_lock(&vcpu->kvm->lock);
> +	if (kvm_s390_pv_is_protected(vcpu->kvm))
> +		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
> +	mutex_unlock(&vcpu->kvm->lock);

Do we have to cleanup anything? (e.g., cmma page) I *think*
kvm_arch_vcpu_destroy() is not called when kvm_arch_vcpu_create() fails ...

> +
>  	return rc;
>  }
>  
> diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
> index 83dabb18e4d9..d62de29b2d6c 100644
> --- a/arch/s390/kvm/kvm-s390.h
> +++ b/arch/s390/kvm/kvm-s390.h
> @@ -15,6 +15,7 @@
>  #include <linux/hrtimer.h>
>  #include <linux/kvm.h>
>  #include <linux/kvm_host.h>
> +#include <linux/lockdep.h>
>  #include <asm/facility.h>
>  #include <asm/processor.h>
>  #include <asm/sclp.h>
> @@ -207,6 +208,40 @@ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
>  	return kvm->arch.user_cpu_state_ctrl != 0;
>  }
>  
> +/* implemented in pv.c */
> +void kvm_s390_pv_dealloc_vm(struct kvm *kvm);
> +int kvm_s390_pv_alloc_vm(struct kvm *kvm);
> +int kvm_s390_pv_create_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
> +int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
> +int kvm_s390_pv_destroy_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
> +void kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
> +int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
> +			      u16 *rrc);
> +int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
> +		       unsigned long tweak, u16 *rc, u16 *rrc);
> +
> +static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
> +{
> +	return kvm->arch.pv.handle;
> +}
> +
> +static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
> +{
> +	return vcpu->arch.pv.handle;
> +}
> +
> +static inline bool kvm_s390_pv_is_protected(struct kvm *kvm)

Could have been "kvm_s390_is_protected" or "kvm_s390_is_pv", but also
fine with me. (maybe I even suggested that one without caring about that
detail :) )

[...]

> +
>  /* implemented in interrupt.c */
>  int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
>  void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
> diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
> new file mode 100644
> index 000000000000..67ea9a18ed8f
> --- /dev/null
> +++ b/arch/s390/kvm/pv.c
> @@ -0,0 +1,256 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Hosting Secure Execution virtual machines
> + *
> + * Copyright IBM Corp. 2019
> + *    Author(s): Janosch Frank <frankja@linux.ibm.com>

I'd assume you're an author as well at this point ;)

[...]

> +
> +int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
> +			      u16 *rrc)
> +{
> +	struct uv_cb_ssc uvcb = {
> +		.header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
> +		.header.len = sizeof(uvcb),
> +		.sec_header_origin = (u64)hdr,
> +		.sec_header_len = length,
> +		.guest_handle = kvm_s390_pv_get_handle(kvm),
> +	};
> +	int cc;
> +
> +	cc = uv_call(0, (u64)&uvcb);

int cc = ... could be done.


> +	*rc = uvcb.header.rc;
> +	*rrc = uvcb.header.rrc;
> +	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
> +		     *rc, *rrc);
> +	if (cc)
> +		return -EINVAL;
> +	return 0;
> +}
> +
> +static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak[2],
> +		      u16 *rc, u16 *rrc)
> +{
> +	struct uv_cb_unp uvcb = {
> +		.header.cmd = UVC_CMD_UNPACK_IMG,
> +		.header.len = sizeof(uvcb),
> +		.guest_handle = kvm_s390_pv_get_handle(kvm),
> +		.gaddr = addr,
> +		.tweak[0] = tweak[0],
> +		.tweak[1] = tweak[1],
> +	};
> +	int ret;
> +
> +	ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);

... similarly, with ret.

> +	*rc = uvcb.header.rc;
> +	*rrc = uvcb.header.rrc;
> +
> +	if (ret && ret != -EAGAIN)
> +		KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
> +			     uvcb.gaddr, *rc, *rrc);
> +	return ret;
> +}
> +
> +int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
> +		       unsigned long tweak, u16 *rc, u16 *rrc)
> +{
> +	u64 tw[2] = {tweak, 0};

I have no idea what tweaks are in this context. So I have to trust you
guys on the implementation, because I don't understand it.

Especially, why can't we simply have

s/tweak/tweak/

offset = 0;

while (offset < size) {
	...
	ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
				    ^ no idea what tweak is
	...
	... offset +=  PAGE_SIZE;
}

But maybe I am missing what the whole array is about.

> +	int ret = 0;
> +
> +	if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
> +		return -EINVAL;
> +
> +	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
> +		     addr, size);
> +
> +	while (tw[1] < size) {> +		ret = unpack_one(kvm, addr, tw, rc, rrc);
> +		if (ret == -EAGAIN) {
> +			cond_resched();
> +			if (fatal_signal_pending(current))
> +				break;
> +			continue;
> +		}
> +		if (ret)
> +			break;
> +		addr += PAGE_SIZE;
> +		tw[1] += PAGE_SIZE;
> +	}
> +	if (!ret)
> +		KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
> +	return ret;
> +}

[...]
> +enum pv_cmd_id {
> +	KVM_PV_ENABLE,
> +	KVM_PV_DISABLE,
> +	KVM_PV_VM_SET_SEC_PARMS,
> +	KVM_PV_VM_UNPACK,
> +	KVM_PV_VM_VERIFY,

I wonder if we should just drop "_VM" from all of these ...

[...]
Christian Borntraeger Feb. 20, 2020, 7:44 p.m. UTC | #2
On 20.02.20 14:02, David Hildenbrand wrote:
> 
>> +static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
>> +{
>> +	int r = 0;
>> +	u16 dummy;
>> +	void __user *argp = (void __user *)cmd->data;
>> +
>> +	switch (cmd->cmd) {
>> +	case KVM_PV_ENABLE: {
>> +		r = -EINVAL;
>> +		if (kvm_s390_pv_is_protected(kvm))
>> +			break;
>> +
>> +		r = kvm_s390_pv_alloc_vm(kvm);
>> +		if (r)
>> +			break;
>> +
>> +		/* FMT 4 SIE needs esca */
>> +		r = sca_switch_to_extended(kvm);
>> +		if (r) {
>> +			kvm_s390_pv_dealloc_vm(kvm);
>> +			kvm_s390_vcpu_unblock_all(kvm);
> 
> You forgot to remove that.

ack.
> 
>> +			mutex_unlock(&kvm->lock);
> 
> That's certainly wrong as well.

ack.

> 
>> +			break;
>> +		}
>> +		r = kvm_s390_pv_create_vm(kvm, &cmd->rc, &cmd->rrc);
>> +		if (!r)
>> +			r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
>> +		if (r)
>> +			kvm_s390_pv_destroy_vm(kvm, &dummy, &dummy);
> 
> Should there be a kvm_s390_pv_dealloc_vm() as well?

Hmm yes. But only if destroy does not fail...... see below.

> 
>> +
>> +		break;
>> +	}
>> +	case KVM_PV_DISABLE: {
>> +		r = -EINVAL;
>> +		if (!kvm_s390_pv_is_protected(kvm))
>> +			break;
>> +
>> +		kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
>> +		r = kvm_s390_pv_destroy_vm(kvm, &cmd->rc, &cmd->rrc);
>> +		if (!r)
>> +			kvm_s390_pv_dealloc_vm(kvm);
> 
> Hm, if destroy fails, the CPUs would already have been removed.
> 
> Is there a way to make kvm_s390_pv_destroy_vm() never fail? The return
> value is always ignored except here ... which looks wrong.

This should not fail. But if it does we should not free the memory that
we donated to the ultravisor. We then do have a memory leak, but this is 
necessary to keep the integrity of the host. 
I will fix the other places to only call dealloc when destroy succeeded.

Same for VCPU destroy. If that fails I should not free arch.pv.stor_base
will fix.


> 
>> +		break;
>> +	}
> 
> [...]
> 
>> @@ -2558,10 +2724,21 @@ static void kvm_free_vcpus(struct kvm *kvm)
>>  
>>  void kvm_arch_destroy_vm(struct kvm *kvm)
>>  {
>> +	u16 rc, rrc;
>>  	kvm_free_vcpus(kvm);
>>  	sca_dispose(kvm);
>> -	debug_unregister(kvm->arch.dbf);
>>  	kvm_s390_gisa_destroy(kvm);
>> +	/*
>> +	 * We are already at the end of life and kvm->lock is not taken.
>> +	 * This is ok as the file descriptor is closed by now and nobody
>> +	 * can mess with the pv state. To avoid lockdep_assert_held from
>> +	 * complaining we do not use kvm_s390_pv_is_protected.
>> +	 */
>> +	if (kvm_s390_pv_get_handle(kvm)) {
> 
> I'd prefer something like kvm_s390_pv_is_protected_unlocked(), but I
> guess for these few use cases, this is fine.

yes lets leave it as is... :-)


> 
> 
>> +		kvm_s390_pv_destroy_vm(kvm, &rc, &rrc);
>> +		kvm_s390_pv_dealloc_vm(kvm);
>> +	}
>> +	debug_unregister(kvm->arch.dbf);
>>  	free_page((unsigned long)kvm->arch.sie_page2);
>>  	if (!kvm_is_ucontrol(kvm))
>>  		gmap_remove(kvm->arch.gmap);
>> @@ -2657,6 +2834,9 @@ static int sca_switch_to_extended(struct kvm *kvm)
>>  	unsigned int vcpu_idx;
>>  	u32 scaol, scaoh;
>>  
>> +	if (kvm->arch.use_esca)
>> +		return 0;
>> +
>>  	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
>>  	if (!new_sca)
>>  		return -ENOMEM;
>> @@ -2908,6 +3088,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
>>  static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
>>  {
>>  	int rc = 0;
>> +	u16 uvrc, uvrrc;
>>  
>>  	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
>>  						    CPUSTAT_SM |
>> @@ -2975,6 +3156,11 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
>>  
>>  	kvm_s390_vcpu_crypto_setup(vcpu);
>>  
>> +	mutex_lock(&vcpu->kvm->lock);
>> +	if (kvm_s390_pv_is_protected(vcpu->kvm))
>> +		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
>> +	mutex_unlock(&vcpu->kvm->lock);
> 
> Do we have to cleanup anything? (e.g., cmma page) I *think*
> kvm_arch_vcpu_destroy() is not called when kvm_arch_vcpu_create() fails ...

Right we need to call kvm_s390_vcpu_unsetup_cmma. To me it looks like
this can be done unconditionally if rc!=0:

@@ -3156,8 +3158,11 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
        kvm_s390_vcpu_crypto_setup(vcpu);
 
        mutex_lock(&vcpu->kvm->lock);
-       if (kvm_s390_pv_is_protected(vcpu->kvm))
+       if (kvm_s390_pv_is_protected(vcpu->kvm)) {
                rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
+               if (rc)
+                       kvm_s390_vcpu_unsetup_cmma(vcpu);
+       }
        mutex_unlock(&vcpu->kvm->lock);
 
        return rc;


Everything else looks ok, no need to deallocate or so.

[...]

 */
>>  int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
>>  void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
>> diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
>> new file mode 100644
>> index 000000000000..67ea9a18ed8f
>> --- /dev/null
>> +++ b/arch/s390/kvm/pv.c
>> @@ -0,0 +1,256 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +/*
>> + * Hosting Secure Execution virtual machines
>> + *
>> + * Copyright IBM Corp. 2019
>> + *    Author(s): Janosch Frank <frankja@linux.ibm.com>
> 
> I'd assume you're an author as well at this point ;)

I personally prefer to not have authors in files and after all
I am just cleaning up so that Janosch can take care of QEMU.
But I will at least fixup the Copyright year.


> 
> [...]
> 
>> +
>> +int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
>> +			      u16 *rrc)
>> +{
>> +	struct uv_cb_ssc uvcb = {
>> +		.header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
>> +		.header.len = sizeof(uvcb),
>> +		.sec_header_origin = (u64)hdr,
>> +		.sec_header_len = length,
>> +		.guest_handle = kvm_s390_pv_get_handle(kvm),
>> +	};
>> +	int cc;
>> +
>> +	cc = uv_call(0, (u64)&uvcb);
> 
> int cc = ... could be done.
> 

ack. 
> 
>> +	*rc = uvcb.header.rc;
>> +	*rrc = uvcb.header.rrc;
>> +	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
>> +		     *rc, *rrc);
>> +	if (cc)
>> +		return -EINVAL;
>> +	return 0;
>> +}
>> +
>> +static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak[2],
>> +		      u16 *rc, u16 *rrc)
>> +{
>> +	struct uv_cb_unp uvcb = {
>> +		.header.cmd = UVC_CMD_UNPACK_IMG,
>> +		.header.len = sizeof(uvcb),
>> +		.guest_handle = kvm_s390_pv_get_handle(kvm),
>> +		.gaddr = addr,
>> +		.tweak[0] = tweak[0],
>> +		.tweak[1] = tweak[1],
>> +	};
>> +	int ret;
>> +
>> +	ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
> 
> ... similarly, with ret.


ack

> 
>> +	*rc = uvcb.header.rc;
>> +	*rrc = uvcb.header.rrc;
>> +
>> +	if (ret && ret != -EAGAIN)
>> +		KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
>> +			     uvcb.gaddr, *rc, *rrc);
>> +	return ret;
>> +}
>> +
>> +int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
>> +		       unsigned long tweak, u16 *rc, u16 *rrc)
>> +{
>> +	u64 tw[2] = {tweak, 0};
> 
> I have no idea what tweaks are in this context. So I have to trust you
> guys on the implementation, because I don't understand it.

Its the crypto term. Basically similar idea like salt or nonce.

> 
> Especially, why can't we simply have
> 
> s/tweak/tweak/

? 

> 
> offset = 0;
> 
> while (offset < size) {
> 	...
> 	ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
> 				    ^ no idea what tweak is
> 	...
> 	... offset +=  PAGE_SIZE;
> }
> 
> But maybe I am missing what the whole array is about.

Both values (the initial 64bit value and the address) form the 128 bit
tweak for the decryption. So the offset is actually part of the tweak.

So yes we could rewrite that using offset and not passing through an array
but tweak + offset as separate u64.

will check if this makes it any clearer
> 
>> +	int ret = 0;
>> +
>> +	if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
>> +		return -EINVAL;
>> +
>> +	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
>> +		     addr, size);
>> +
>> +	while (tw[1] < size) {

> +		ret = unpack_one(kvm, addr, tw, rc, rrc);
>> +		if (ret == -EAGAIN) {
>> +			cond_resched();
>> +			if (fatal_signal_pending(current))
>> +				break;
>> +			continue;
>> +		}
>> +		if (ret)
>> +			break;
>> +		addr += PAGE_SIZE;
>> +		tw[1] += PAGE_SIZE;
>> +	}
>> +	if (!ret)
>> +		KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
>> +	return ret;
>> +}
> 
> [...]
>> +enum pv_cmd_id {
>> +	KVM_PV_ENABLE,
>> +	KVM_PV_DISABLE,
>> +	KVM_PV_VM_SET_SEC_PARMS,
>> +	KVM_PV_VM_UNPACK,
>> +	KVM_PV_VM_VERIFY,
> 
> I wonder if we should just drop "_VM" from all of these ...
David Hildenbrand Feb. 21, 2020, 8:22 a.m. UTC | #3
>>> +
>>> +		break;
>>> +	}
>>> +	case KVM_PV_DISABLE: {
>>> +		r = -EINVAL;
>>> +		if (!kvm_s390_pv_is_protected(kvm))
>>> +			break;
>>> +
>>> +		kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
>>> +		r = kvm_s390_pv_destroy_vm(kvm, &cmd->rc, &cmd->rrc);
>>> +		if (!r)
>>> +			kvm_s390_pv_dealloc_vm(kvm);
>>
>> Hm, if destroy fails, the CPUs would already have been removed.
>>
>> Is there a way to make kvm_s390_pv_destroy_vm() never fail? The return
>> value is always ignored except here ... which looks wrong.
> 
> This should not fail. But if it does we should not free the memory that
> we donated to the ultravisor. We then do have a memory leak, but this is 
> necessary to keep the integrity of the host. 
> I will fix the other places to only call dealloc when destroy succeeded.
> 
> Same for VCPU destroy. If that fails I should not free arch.pv.stor_base
> will fix.

A comment would be nice, documenting exactly that.

>  */
>>>  int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
>>>  void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
>>> diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
>>> new file mode 100644
>>> index 000000000000..67ea9a18ed8f
>>> --- /dev/null
>>> +++ b/arch/s390/kvm/pv.c
>>> @@ -0,0 +1,256 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +/*
>>> + * Hosting Secure Execution virtual machines
>>> + *
>>> + * Copyright IBM Corp. 2019
>>> + *    Author(s): Janosch Frank <frankja@linux.ibm.com>
>>
>> I'd assume you're an author as well at this point ;)
> 
> I personally prefer to not have authors in files and after all
> I am just cleaning up so that Janosch can take care of QEMU.
> But I will at least fixup the Copyright year.

For me, that counts as co-author, but yeah, totally your decision :)

>>> +	*rc = uvcb.header.rc;
>>> +	*rrc = uvcb.header.rrc;
>>> +
>>> +	if (ret && ret != -EAGAIN)
>>> +		KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
>>> +			     uvcb.gaddr, *rc, *rrc);
>>> +	return ret;
>>> +}
>>> +
>>> +int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
>>> +		       unsigned long tweak, u16 *rc, u16 *rrc)
>>> +{
>>> +	u64 tw[2] = {tweak, 0};
>>
>> I have no idea what tweaks are in this context. So I have to trust you
>> guys on the implementation, because I don't understand it.
> 
> Its the crypto term. Basically similar idea like salt or nonce.

Understood. "offset"/"address" makes this clearer in this code -
although it will be used as a salt in HW.

> 
>>
>> Especially, why can't we simply have
>>
>> s/tweak/tweak/
> 
> ? 

Ignore, it was part of the process of me figuring out what's going on in
this code :)
Christian Borntraeger Feb. 21, 2020, 8:40 a.m. UTC | #4
On 21.02.20 09:22, David Hildenbrand wrote:
>>>> +
>>>> +		break;
>>>> +	}
>>>> +	case KVM_PV_DISABLE: {
>>>> +		r = -EINVAL;
>>>> +		if (!kvm_s390_pv_is_protected(kvm))
>>>> +			break;
>>>> +
>>>> +		kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
>>>> +		r = kvm_s390_pv_destroy_vm(kvm, &cmd->rc, &cmd->rrc);
>>>> +		if (!r)
>>>> +			kvm_s390_pv_dealloc_vm(kvm);
>>>
>>> Hm, if destroy fails, the CPUs would already have been removed.
>>>
>>> Is there a way to make kvm_s390_pv_destroy_vm() never fail? The return
>>> value is always ignored except here ... which looks wrong.
>>
>> This should not fail. But if it does we should not free the memory that
>> we donated to the ultravisor. We then do have a memory leak, but this is 
>> necessary to keep the integrity of the host. 
>> I will fix the other places to only call dealloc when destroy succeeded.
>>
>> Same for VCPU destroy. If that fails I should not free arch.pv.stor_base
>> will fix.
> 
> A comment would be nice, documenting exactly that.


something like this on top:

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 738f7fefcaec..cad04e26dccf 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2173,6 +2173,11 @@ static void kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
        struct kvm_vcpu *vcpu;
        int i;
 
+       /*
+        * we ignore failures and try to destroy as many CPUs as possible.
+        * At the same time we must not free the assigned ressources when
+        * this fails, as the ultravisor has still access to that memory.
+        */
        kvm_for_each_vcpu(i, vcpu, kvm) {
                mutex_lock(&vcpu->mutex);
                kvm_s390_pv_destroy_cpu(vcpu, rc, rrc);
@@ -2221,6 +2226,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
                        kvm_s390_pv_dealloc_vm(kvm);
                        break;
                }
+               /* we never switch back to bsca from esca */
                r = kvm_s390_pv_create_vm(kvm, &cmd->rc, &cmd->rrc);
                if (r) {
                        kvm_s390_pv_dealloc_vm(kvm);
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 6b581b5f8521..f93811a0a441 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -16,6 +16,7 @@
 #include <asm/mman.h>
 #include "kvm-s390.h"
 
+/* only free ressources when the destroy was successful */
 void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
 {
        vfree(kvm->arch.pv.stor_var);
@@ -62,6 +63,7 @@ int kvm_s390_pv_alloc_vm(struct kvm *kvm)
        return -ENOMEM;
 }
 
+/* this should not fail, but if it does we must not free the donated memory */
 int kvm_s390_pv_destroy_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
 {
        int cc;
David Hildenbrand Feb. 21, 2020, 11:47 a.m. UTC | #5
See my other mail, maybe we can merge alloc+create and handle most of
that conditional freeing in there.

> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 738f7fefcaec..cad04e26dccf 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -2173,6 +2173,11 @@ static void kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
>         struct kvm_vcpu *vcpu;
>         int i;
>  
> +       /*
> +        * we ignore failures and try to destroy as many CPUs as possible.
> +        * At the same time we must not free the assigned ressources when
> +        * this fails, as the ultravisor has still access to that memory.
> +        */

Makes sense.

>         kvm_for_each_vcpu(i, vcpu, kvm) {
>                 mutex_lock(&vcpu->mutex);
>                 kvm_s390_pv_destroy_cpu(vcpu, rc, rrc);
> @@ -2221,6 +2226,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
>                         kvm_s390_pv_dealloc_vm(kvm);
>                         break;
>                 }
> +               /* we never switch back to bsca from esca */
>                 r = kvm_s390_pv_create_vm(kvm, &cmd->rc, &cmd->rrc);

Helpful.
diff mbox series

Patch

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index d058289385a5..1aa2382fe363 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -160,7 +160,13 @@  struct kvm_s390_sie_block {
 	__u8	reserved08[4];		/* 0x0008 */
 #define PROG_IN_SIE (1<<0)
 	__u32	prog0c;			/* 0x000c */
-	__u8	reserved10[16];		/* 0x0010 */
+	union {
+		__u8	reserved10[16];		/* 0x0010 */
+		struct {
+			__u64	pv_handle_cpu;
+			__u64	pv_handle_config;
+		};
+	};
 #define PROG_BLOCK_SIE	(1<<0)
 #define PROG_REQUEST	(1<<1)
 	atomic_t prog20;		/* 0x0020 */
@@ -233,7 +239,7 @@  struct kvm_s390_sie_block {
 #define ECB3_RI  0x01
 	__u8    ecb3;			/* 0x0063 */
 	__u32	scaol;			/* 0x0064 */
-	__u8	reserved68;		/* 0x0068 */
+	__u8	sdf;			/* 0x0068 */
 	__u8    epdx;			/* 0x0069 */
 	__u8    reserved6a[2];		/* 0x006a */
 	__u32	todpr;			/* 0x006c */
@@ -645,6 +651,11 @@  struct kvm_guestdbg_info_arch {
 	unsigned long last_bp;
 };
 
+struct kvm_s390_pv_vcpu {
+	u64 handle;
+	unsigned long stor_base;
+};
+
 struct kvm_vcpu_arch {
 	struct kvm_s390_sie_block *sie_block;
 	/* if vsie is active, currently executed shadow sie control block */
@@ -673,6 +684,7 @@  struct kvm_vcpu_arch {
 	__u64 cputm_start;
 	bool gs_enabled;
 	bool skey_enabled;
+	struct kvm_s390_pv_vcpu pv;
 };
 
 struct kvm_vm_stat {
@@ -843,6 +855,13 @@  struct kvm_s390_gisa_interrupt {
 	DECLARE_BITMAP(kicked_mask, KVM_MAX_VCPUS);
 };
 
+struct kvm_s390_pv {
+	u64 handle;
+	u64 guest_len;
+	unsigned long stor_base;
+	void *stor_var;
+};
+
 struct kvm_arch{
 	void *sca;
 	int use_esca;
@@ -878,6 +897,7 @@  struct kvm_arch{
 	DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
 	DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
 	struct kvm_s390_gisa_interrupt gisa_int;
+	struct kvm_s390_pv pv;
 };
 
 #define KVM_HVA_ERR_BAD		(-1UL)
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index a81af06507a9..09dc6dba94a4 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -23,11 +23,19 @@ 
 #define UVC_RC_INV_STATE	0x0003
 #define UVC_RC_INV_LEN		0x0005
 #define UVC_RC_NO_RESUME	0x0007
+#define UVC_RC_NEED_DESTROY	0x8000
 
 #define UVC_CMD_QUI			0x0001
 #define UVC_CMD_INIT_UV			0x000f
+#define UVC_CMD_CREATE_SEC_CONF		0x0100
+#define UVC_CMD_DESTROY_SEC_CONF	0x0101
+#define UVC_CMD_CREATE_SEC_CPU		0x0120
+#define UVC_CMD_DESTROY_SEC_CPU		0x0121
 #define UVC_CMD_CONV_TO_SEC_STOR	0x0200
 #define UVC_CMD_CONV_FROM_SEC_STOR	0x0201
+#define UVC_CMD_SET_SEC_CONF_PARAMS	0x0300
+#define UVC_CMD_UNPACK_IMG		0x0301
+#define UVC_CMD_VERIFY_IMG		0x0302
 #define UVC_CMD_PIN_PAGE_SHARED		0x0341
 #define UVC_CMD_UNPIN_PAGE_SHARED	0x0342
 #define UVC_CMD_SET_SHARED_ACCESS	0x1000
@@ -37,10 +45,17 @@ 
 enum uv_cmds_inst {
 	BIT_UVC_CMD_QUI = 0,
 	BIT_UVC_CMD_INIT_UV = 1,
+	BIT_UVC_CMD_CREATE_SEC_CONF = 2,
+	BIT_UVC_CMD_DESTROY_SEC_CONF = 3,
+	BIT_UVC_CMD_CREATE_SEC_CPU = 4,
+	BIT_UVC_CMD_DESTROY_SEC_CPU = 5,
 	BIT_UVC_CMD_CONV_TO_SEC_STOR = 6,
 	BIT_UVC_CMD_CONV_FROM_SEC_STOR = 7,
 	BIT_UVC_CMD_SET_SHARED_ACCESS = 8,
 	BIT_UVC_CMD_REMOVE_SHARED_ACCESS = 9,
+	BIT_UVC_CMD_SET_SEC_PARMS = 11,
+	BIT_UVC_CMD_UNPACK_IMG = 13,
+	BIT_UVC_CMD_VERIFY_IMG = 14,
 	BIT_UVC_CMD_PIN_PAGE_SHARED = 21,
 	BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
 };
@@ -52,6 +67,7 @@  struct uv_cb_header {
 	u16 rrc;	/* Return Reason Code */
 } __packed __aligned(8);
 
+/* Query Ultravisor Information */
 struct uv_cb_qui {
 	struct uv_cb_header header;
 	u64 reserved08;
@@ -71,6 +87,7 @@  struct uv_cb_qui {
 	u8  reserveda0[200 - 160];
 } __packed __aligned(8);
 
+/* Initialize Ultravisor */
 struct uv_cb_init {
 	struct uv_cb_header header;
 	u64 reserved08[2];
@@ -79,6 +96,35 @@  struct uv_cb_init {
 	u64 reserved28[4];
 } __packed __aligned(8);
 
+/* Create Guest Configuration */
+struct uv_cb_cgc {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 guest_handle;
+	u64 conf_base_stor_origin;
+	u64 conf_virt_stor_origin;
+	u64 reserved30;
+	u64 guest_stor_origin;
+	u64 guest_stor_len;
+	u64 guest_sca;
+	u64 guest_asce;
+	u64 reserved58[5];
+} __packed __aligned(8);
+
+/* Create Secure CPU */
+struct uv_cb_csc {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 cpu_handle;
+	u64 guest_handle;
+	u64 stor_origin;
+	u8  reserved30[6];
+	u16 num;
+	u64 state_origin;
+	u64 reserved40[4];
+} __packed __aligned(8);
+
+/* Convert to Secure */
 struct uv_cb_cts {
 	struct uv_cb_header header;
 	u64 reserved08[2];
@@ -86,12 +132,34 @@  struct uv_cb_cts {
 	u64 gaddr;
 } __packed __aligned(8);
 
+/* Convert from Secure / Pin Page Shared */
 struct uv_cb_cfs {
 	struct uv_cb_header header;
 	u64 reserved08[2];
 	u64 paddr;
 } __packed __aligned(8);
 
+/* Set Secure Config Parameter */
+struct uv_cb_ssc {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 guest_handle;
+	u64 sec_header_origin;
+	u32 sec_header_len;
+	u32 reserved2c;
+	u64 reserved30[4];
+} __packed __aligned(8);
+
+/* Unpack */
+struct uv_cb_unp {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 guest_handle;
+	u64 gaddr;
+	u64 tweak[2];
+	u64 reserved38[3];
+} __packed __aligned(8);
+
 /*
  * A common UV call struct for calls that take no payload
  * Examples:
@@ -105,6 +173,7 @@  struct uv_cb_nodata {
 	u64 reserved20[4];
 } __packed __aligned(8);
 
+/* Set Shared Access */
 struct uv_cb_share {
 	struct uv_cb_header header;
 	u64 reserved08[3];
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 05ee90a5ea08..12decca22e7c 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -9,6 +9,6 @@  common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o  $(KVM)/async_pf.o $(KVM)/irqch
 ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
 
 kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-objs += diag.o gaccess.o guestdbg.o vsie.o
+kvm-objs += diag.o gaccess.o guestdbg.o vsie.o pv.o
 
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index cc7793525a69..8272a821a621 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -44,6 +44,7 @@ 
 #include <asm/cpacf.h>
 #include <asm/timex.h>
 #include <asm/ap.h>
+#include <asm/uv.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 
@@ -234,8 +235,10 @@  int kvm_arch_check_processor_compat(void)
 	return 0;
 }
 
+/* forward declarations */
 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
 			      unsigned long end);
+static int sca_switch_to_extended(struct kvm *kvm);
 
 static void kvm_clock_sync_scb(struct kvm_s390_sie_block *scb, u64 delta)
 {
@@ -2165,6 +2168,143 @@  static int kvm_s390_set_cmma_bits(struct kvm *kvm,
 	return r;
 }
 
+static void kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		mutex_lock(&vcpu->mutex);
+		kvm_s390_pv_destroy_cpu(vcpu, rc, rrc);
+		mutex_unlock(&vcpu->mutex);
+	}
+}
+
+static int kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	int i, r = 0;
+	u16 dummy;
+
+	struct kvm_vcpu *vcpu;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		mutex_lock(&vcpu->mutex);
+		r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
+		mutex_unlock(&vcpu->mutex);
+		if (r)
+			break;
+	}
+	if (r)
+		kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
+	return r;
+}
+
+static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
+{
+	int r = 0;
+	u16 dummy;
+	void __user *argp = (void __user *)cmd->data;
+
+	switch (cmd->cmd) {
+	case KVM_PV_ENABLE: {
+		r = -EINVAL;
+		if (kvm_s390_pv_is_protected(kvm))
+			break;
+
+		r = kvm_s390_pv_alloc_vm(kvm);
+		if (r)
+			break;
+
+		/* FMT 4 SIE needs esca */
+		r = sca_switch_to_extended(kvm);
+		if (r) {
+			kvm_s390_pv_dealloc_vm(kvm);
+			kvm_s390_vcpu_unblock_all(kvm);
+			mutex_unlock(&kvm->lock);
+			break;
+		}
+		r = kvm_s390_pv_create_vm(kvm, &cmd->rc, &cmd->rrc);
+		if (!r)
+			r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc);
+		if (r)
+			kvm_s390_pv_destroy_vm(kvm, &dummy, &dummy);
+
+		break;
+	}
+	case KVM_PV_DISABLE: {
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm))
+			break;
+
+		kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
+		r = kvm_s390_pv_destroy_vm(kvm, &cmd->rc, &cmd->rrc);
+		if (!r)
+			kvm_s390_pv_dealloc_vm(kvm);
+		break;
+	}
+	case KVM_PV_VM_SET_SEC_PARMS: {
+		struct kvm_s390_pv_sec_parm parms = {};
+		void *hdr;
+
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm))
+			break;
+
+		r = -EFAULT;
+		if (copy_from_user(&parms, argp, sizeof(parms)))
+			break;
+
+		/* Currently restricted to 8KB */
+		r = -EINVAL;
+		if (parms.length > PAGE_SIZE * 2)
+			break;
+
+		r = -ENOMEM;
+		hdr = vmalloc(parms.length);
+		if (!hdr)
+			break;
+
+		r = -EFAULT;
+		if (!copy_from_user(hdr, (void __user *)parms.origin,
+				    parms.length))
+			r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
+						      &cmd->rc, &cmd->rrc);
+
+		vfree(hdr);
+		break;
+	}
+	case KVM_PV_VM_UNPACK: {
+		struct kvm_s390_pv_unp unp = {};
+
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm))
+			break;
+
+		r = -EFAULT;
+		if (copy_from_user(&unp, argp, sizeof(unp)))
+			break;
+
+		r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
+				       &cmd->rc, &cmd->rrc);
+		break;
+	}
+	case KVM_PV_VM_VERIFY: {
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm))
+			break;
+
+		r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+				  UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
+		KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
+			     cmd->rrc);
+		break;
+	}
+	default:
+		return -ENOTTY;
+	}
+	return r;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
@@ -2262,6 +2402,27 @@  long kvm_arch_vm_ioctl(struct file *filp,
 		mutex_unlock(&kvm->slots_lock);
 		break;
 	}
+	case KVM_S390_PV_COMMAND: {
+		struct kvm_pv_cmd args;
+
+		r = 0;
+		if (!is_prot_virt_host()) {
+			r = -EINVAL;
+			break;
+		}
+		if (copy_from_user(&args, argp, sizeof(args))) {
+			r = -EFAULT;
+			break;
+		}
+		mutex_lock(&kvm->lock);
+		r = kvm_s390_handle_pv(kvm, &args);
+		mutex_unlock(&kvm->lock);
+		if (copy_to_user(argp, &args, sizeof(args))) {
+			r = -EFAULT;
+			break;
+		}
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
@@ -2525,6 +2686,8 @@  int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
+	u16 rc, rrc;
+
 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
 	kvm_s390_clear_local_irqs(vcpu);
@@ -2537,6 +2700,9 @@  void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 	if (vcpu->kvm->arch.use_cmma)
 		kvm_s390_vcpu_unsetup_cmma(vcpu);
+	/* We can not hold the vcpu mutex here, we are already dying */
+	if (kvm_s390_pv_cpu_get_handle(vcpu))
+		kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
 	free_page((unsigned long)(vcpu->arch.sie_block));
 }
 
@@ -2558,10 +2724,21 @@  static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	u16 rc, rrc;
 	kvm_free_vcpus(kvm);
 	sca_dispose(kvm);
-	debug_unregister(kvm->arch.dbf);
 	kvm_s390_gisa_destroy(kvm);
+	/*
+	 * We are already at the end of life and kvm->lock is not taken.
+	 * This is ok as the file descriptor is closed by now and nobody
+	 * can mess with the pv state. To avoid lockdep_assert_held from
+	 * complaining we do not use kvm_s390_pv_is_protected.
+	 */
+	if (kvm_s390_pv_get_handle(kvm)) {
+		kvm_s390_pv_destroy_vm(kvm, &rc, &rrc);
+		kvm_s390_pv_dealloc_vm(kvm);
+	}
+	debug_unregister(kvm->arch.dbf);
 	free_page((unsigned long)kvm->arch.sie_page2);
 	if (!kvm_is_ucontrol(kvm))
 		gmap_remove(kvm->arch.gmap);
@@ -2657,6 +2834,9 @@  static int sca_switch_to_extended(struct kvm *kvm)
 	unsigned int vcpu_idx;
 	u32 scaol, scaoh;
 
+	if (kvm->arch.use_esca)
+		return 0;
+
 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
 	if (!new_sca)
 		return -ENOMEM;
@@ -2908,6 +3088,7 @@  static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	int rc = 0;
+	u16 uvrc, uvrrc;
 
 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
 						    CPUSTAT_SM |
@@ -2975,6 +3156,11 @@  static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
 
 	kvm_s390_vcpu_crypto_setup(vcpu);
 
+	mutex_lock(&vcpu->kvm->lock);
+	if (kvm_s390_pv_is_protected(vcpu->kvm))
+		rc = kvm_s390_pv_create_cpu(vcpu, &uvrc, &uvrrc);
+	mutex_unlock(&vcpu->kvm->lock);
+
 	return rc;
 }
 
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 83dabb18e4d9..d62de29b2d6c 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -15,6 +15,7 @@ 
 #include <linux/hrtimer.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
+#include <linux/lockdep.h>
 #include <asm/facility.h>
 #include <asm/processor.h>
 #include <asm/sclp.h>
@@ -207,6 +208,40 @@  static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
 	return kvm->arch.user_cpu_state_ctrl != 0;
 }
 
+/* implemented in pv.c */
+void kvm_s390_pv_dealloc_vm(struct kvm *kvm);
+int kvm_s390_pv_alloc_vm(struct kvm *kvm);
+int kvm_s390_pv_create_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
+int kvm_s390_pv_destroy_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+void kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
+int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
+			      u16 *rrc);
+int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
+		       unsigned long tweak, u16 *rc, u16 *rrc);
+
+static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
+{
+	return kvm->arch.pv.handle;
+}
+
+static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.pv.handle;
+}
+
+static inline bool kvm_s390_pv_is_protected(struct kvm *kvm)
+{
+	lockdep_assert_held(&kvm->lock);
+	return !!kvm_s390_pv_get_handle(kvm);
+}
+
+static inline bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
+{
+	lockdep_assert_held(&vcpu->mutex);
+	return !!kvm_s390_pv_cpu_get_handle(vcpu);
+}
+
 /* implemented in interrupt.c */
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
new file mode 100644
index 000000000000..67ea9a18ed8f
--- /dev/null
+++ b/arch/s390/kvm/pv.c
@@ -0,0 +1,256 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hosting Secure Execution virtual machines
+ *
+ * Copyright IBM Corp. 2019
+ *    Author(s): Janosch Frank <frankja@linux.ibm.com>
+ */
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pagemap.h>
+#include <linux/sched/signal.h>
+#include <asm/pgalloc.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+#include <asm/gmap.h>
+#include <asm/mman.h>
+#include "kvm-s390.h"
+
+void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
+{
+	vfree(kvm->arch.pv.stor_var);
+	free_pages(kvm->arch.pv.stor_base,
+		   get_order(uv_info.guest_base_stor_len));
+	memset(&kvm->arch.pv, 0, sizeof(kvm->arch.pv));
+}
+
+int kvm_s390_pv_alloc_vm(struct kvm *kvm)
+{
+	unsigned long base = uv_info.guest_base_stor_len;
+	unsigned long virt = uv_info.guest_virt_var_stor_len;
+	unsigned long npages = 0, vlen = 0;
+	struct kvm_memory_slot *memslot;
+
+	kvm->arch.pv.stor_var = NULL;
+	kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL, get_order(base));
+	if (!kvm->arch.pv.stor_base)
+		return -ENOMEM;
+
+	/*
+	 * Calculate current guest storage for allocation of the
+	 * variable storage, which is based on the length in MB.
+	 *
+	 * Slots are sorted by GFN
+	 */
+	mutex_lock(&kvm->slots_lock);
+	memslot = kvm_memslots(kvm)->memslots;
+	npages = memslot->base_gfn + memslot->npages;
+	mutex_unlock(&kvm->slots_lock);
+
+	kvm->arch.pv.guest_len = npages * PAGE_SIZE;
+
+	/* Allocate variable storage */
+	vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
+	vlen += uv_info.guest_virt_base_stor_len;
+	kvm->arch.pv.stor_var = vzalloc(vlen);
+	if (!kvm->arch.pv.stor_var)
+		goto out_err;
+	return 0;
+
+out_err:
+	kvm_s390_pv_dealloc_vm(kvm);
+	return -ENOMEM;
+}
+
+int kvm_s390_pv_destroy_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	int cc;
+
+	cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+			   UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
+	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
+	atomic_set(&kvm->mm->context.is_protected, 0);
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
+	return cc;
+}
+
+void kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
+{
+	int cc = 0;
+
+	if (kvm_s390_pv_cpu_get_handle(vcpu)) {
+		cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
+				   UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
+
+		KVM_UV_EVENT(vcpu->kvm, 3,
+			     "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
+			     vcpu->vcpu_id, *rc, *rrc);
+		WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x",
+				*rc, *rrc);
+	}
+
+	free_pages(vcpu->arch.pv.stor_base,
+		   get_order(uv_info.guest_cpu_stor_len));
+	vcpu->arch.sie_block->pv_handle_cpu = 0;
+	vcpu->arch.sie_block->pv_handle_config = 0;
+	memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
+	vcpu->arch.sie_block->sdf = 0;
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+}
+
+int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
+{
+	struct uv_cb_csc uvcb = {
+		.header.cmd = UVC_CMD_CREATE_SEC_CPU,
+		.header.len = sizeof(uvcb),
+	};
+	int cc;
+
+	if (kvm_s390_pv_cpu_get_handle(vcpu))
+		return -EINVAL;
+
+	vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL,
+						   get_order(uv_info.guest_cpu_stor_len));
+	if (!vcpu->arch.pv.stor_base)
+		return -ENOMEM;
+
+	/* Input */
+	uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
+	uvcb.num = vcpu->arch.sie_block->icpua;
+	uvcb.state_origin = (u64)vcpu->arch.sie_block;
+	uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
+
+	cc = uv_call(0, (u64)&uvcb);
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+	KVM_UV_EVENT(vcpu->kvm, 3,
+		     "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
+		     vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
+		     uvcb.header.rrc);
+
+	if (cc) {
+		u16 dummy;
+
+		kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
+		return -EINVAL;
+	}
+
+	/* Output */
+	vcpu->arch.pv.handle = uvcb.cpu_handle;
+	vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
+	vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
+	vcpu->arch.sie_block->sdf = 2;
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+	return 0;
+}
+
+int kvm_s390_pv_create_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	u16 drc, drrc;
+	int cc;
+
+	struct uv_cb_cgc uvcb = {
+		.header.cmd = UVC_CMD_CREATE_SEC_CONF,
+		.header.len = sizeof(uvcb)
+	};
+
+	/* Inputs */
+	uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
+	uvcb.guest_stor_len = kvm->arch.pv.guest_len;
+	uvcb.guest_asce = kvm->arch.gmap->asce;
+	uvcb.guest_sca = (unsigned long)kvm->arch.sca;
+	uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base;
+	uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
+
+	cc = uv_call(0, (u64)&uvcb);
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
+		     uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
+
+	/* Outputs */
+	kvm->arch.pv.handle = uvcb.guest_handle;
+
+	if (cc && (uvcb.header.rc & UVC_RC_NEED_DESTROY)) {
+		kvm_s390_pv_destroy_vm(kvm, &drc, &drrc);
+		return -EINVAL;
+	}
+	kvm->arch.gmap->guest_handle = uvcb.guest_handle;
+	atomic_set(&kvm->mm->context.is_protected, 1);
+	return cc;
+}
+
+int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
+			      u16 *rrc)
+{
+	struct uv_cb_ssc uvcb = {
+		.header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
+		.header.len = sizeof(uvcb),
+		.sec_header_origin = (u64)hdr,
+		.sec_header_len = length,
+		.guest_handle = kvm_s390_pv_get_handle(kvm),
+	};
+	int cc;
+
+	cc = uv_call(0, (u64)&uvcb);
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
+		     *rc, *rrc);
+	if (cc)
+		return -EINVAL;
+	return 0;
+}
+
+static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak[2],
+		      u16 *rc, u16 *rrc)
+{
+	struct uv_cb_unp uvcb = {
+		.header.cmd = UVC_CMD_UNPACK_IMG,
+		.header.len = sizeof(uvcb),
+		.guest_handle = kvm_s390_pv_get_handle(kvm),
+		.gaddr = addr,
+		.tweak[0] = tweak[0],
+		.tweak[1] = tweak[1],
+	};
+	int ret;
+
+	ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
+	*rc = uvcb.header.rc;
+	*rrc = uvcb.header.rrc;
+
+	if (ret && ret != -EAGAIN)
+		KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
+			     uvcb.gaddr, *rc, *rrc);
+	return ret;
+}
+
+int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
+		       unsigned long tweak, u16 *rc, u16 *rrc)
+{
+	u64 tw[2] = {tweak, 0};
+	int ret = 0;
+
+	if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
+		return -EINVAL;
+
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
+		     addr, size);
+
+	while (tw[1] < size) {
+		ret = unpack_one(kvm, addr, tw, rc, rrc);
+		if (ret == -EAGAIN) {
+			cond_resched();
+			if (fatal_signal_pending(current))
+				break;
+			continue;
+		}
+		if (ret)
+			break;
+		addr += PAGE_SIZE;
+		tw[1] += PAGE_SIZE;
+	}
+	if (!ret)
+		KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
+	return ret;
+}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 4b95f9a31a2f..efb5c4d65396 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1478,6 +1478,37 @@  struct kvm_enc_region {
 #define KVM_S390_NORMAL_RESET	_IO(KVMIO,   0xc3)
 #define KVM_S390_CLEAR_RESET	_IO(KVMIO,   0xc4)
 
+struct kvm_s390_pv_sec_parm {
+	__u64 origin;
+	__u64 length;
+};
+
+struct kvm_s390_pv_unp {
+	__u64 addr;
+	__u64 size;
+	__u64 tweak;
+};
+
+enum pv_cmd_id {
+	KVM_PV_ENABLE,
+	KVM_PV_DISABLE,
+	KVM_PV_VM_SET_SEC_PARMS,
+	KVM_PV_VM_UNPACK,
+	KVM_PV_VM_VERIFY,
+};
+
+struct kvm_pv_cmd {
+	__u32 cmd;	/* Command to be executed */
+	__u16 rc;	/* Ultravisor return code */
+	__u16 rrc;	/* Ultravisor return reason code */
+	__u64 data;	/* Data or address */
+	__u32 flags;    /* flags for future extensions. Must be 0 for now */
+	__u32 reserved[3];
+};
+
+/* Available with KVM_CAP_S390_PROTECTED */
+#define KVM_S390_PV_COMMAND		_IOWR(KVMIO, 0xc5, struct kvm_pv_cmd)
+
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
 	/* Guest initialization commands */