diff mbox series

[v2,05/11] KVM: s390: Add optional storage key checking to MEMOP IOCTL

Message ID 20220207165930.1608621-6-scgl@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series KVM: s390: Do storage key checking | expand

Commit Message

Janis Schoetterl-Glausch Feb. 7, 2022, 4:59 p.m. UTC
User space needs a mechanism to perform key checked accesses when
emulating instructions.

The key can be passed as an additional argument.
Having an additional argument is flexible, as user space can
pass the guest PSW's key, in order to make an access the same way the
CPU would, or pass another key if necessary.

Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
Acked-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 49 +++++++++++++++++++++++++++++++---------
 include/uapi/linux/kvm.h |  8 +++++--
 2 files changed, 44 insertions(+), 13 deletions(-)

Comments

Christian Borntraeger Feb. 9, 2022, 7:34 a.m. UTC | #1
Am 07.02.22 um 17:59 schrieb Janis Schoetterl-Glausch:
> User space needs a mechanism to perform key checked accesses when
> emulating instructions.
> 
> The key can be passed as an additional argument.
> Having an additional argument is flexible, as user space can
> pass the guest PSW's key, in order to make an access the same way the
> CPU would, or pass another key if necessary.
> 
> Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
> Acked-by: Janosch Frank <frankja@linux.ibm.com>
> Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> ---
>   arch/s390/kvm/kvm-s390.c | 49 +++++++++++++++++++++++++++++++---------
>   include/uapi/linux/kvm.h |  8 +++++--
>   2 files changed, 44 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index cf347e1a4f17..71e61fb3f0d9 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -32,6 +32,7 @@
>   #include <linux/sched/signal.h>
>   #include <linux/string.h>
>   #include <linux/pgtable.h>
> +#include <linux/bitfield.h>
>   
>   #include <asm/asm-offsets.h>
>   #include <asm/lowcore.h>
> @@ -2359,6 +2360,11 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
>   	return r;
>   }
>   
> +static bool access_key_invalid(u8 access_key)
> +{
> +	return access_key > 0xf;
> +}
> +
>   long kvm_arch_vm_ioctl(struct file *filp,
>   		       unsigned int ioctl, unsigned long arg)
>   {
> @@ -4687,34 +4693,54 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>   				  struct kvm_s390_mem_op *mop)
>   {
>   	void __user *uaddr = (void __user *)mop->buf;
> +	u8 access_key = 0, ar = 0;
>   	void *tmpbuf = NULL;
> +	bool check_reserved;
>   	int r = 0;
>   	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
> -				    | KVM_S390_MEMOP_F_CHECK_ONLY;
> +				    | KVM_S390_MEMOP_F_CHECK_ONLY
> +				    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
>   
> -	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
> +	if (mop->flags & ~supported_flags || !mop->size)
>   		return -EINVAL;
> -
>   	if (mop->size > MEM_OP_MAX_SIZE)
>   		return -E2BIG;
> -
>   	if (kvm_s390_pv_cpu_is_protected(vcpu))
>   		return -EINVAL;
> -
>   	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
>   		tmpbuf = vmalloc(mop->size);
>   		if (!tmpbuf)
>   			return -ENOMEM;
>   	}
> +	ar = mop->ar;
> +	mop->ar = 0;

Why this assignment to 0?

> +	if (ar >= NUM_ACRS)
> +		return -EINVAL;
> +	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
> +		access_key = mop->key;
> +		mop->key = 0;

and this? I think we can leave mop unchanged.

In fact, why do we add the ar and access_key variable?
This breaks the check from above (if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size))  into two checks
and it will create a memleak for tmpbuf.

Simply use mop->key and mop->ar below and get rid of the local variables.
The structure has no concurrency and gcc will handle that just as the local variable.

Other than that this looks good.
> +		if (access_key_invalid(access_key))
> +			return -EINVAL;
> +	}
> +	/*
> +	 * Check that reserved/unused == 0, but only for extensions,
> +	 * so we stay backward compatible.
> +	 * This gives us more design flexibility for future extensions, i.e.
> +	 * we can add functionality without adding a flag.
> +	 */
> +	check_reserved = mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION;
> +	if (check_reserved && memchr_inv(&mop->reserved, 0, sizeof(mop->reserved)))
> +		return -EINVAL;
>   
>   	switch (mop->op) {
>   	case KVM_S390_MEMOP_LOGICAL_READ:
>   		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
> -			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
> -					    mop->size, GACC_FETCH, 0);
> +			r = check_gva_range(vcpu, mop->gaddr, ar, mop->size,
> +					    GACC_FETCH, access_key);
>   			break;
>   		}
> -		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
> +		r = read_guest_with_key(vcpu, mop->gaddr, ar, tmpbuf,
> +					mop->size, access_key);
>   		if (r == 0) {
>   			if (copy_to_user(uaddr, tmpbuf, mop->size))
>   				r = -EFAULT;
> @@ -4722,15 +4748,16 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>   		break;
>   	case KVM_S390_MEMOP_LOGICAL_WRITE:
>   		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
> -			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
> -					    mop->size, GACC_STORE, 0);
> +			r = check_gva_range(vcpu, mop->gaddr, ar, mop->size,
> +					    GACC_STORE, access_key);
>   			break;
>   		}
>   		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
>   			r = -EFAULT;
>   			break;
>   		}
> -		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
> +		r = write_guest_with_key(vcpu, mop->gaddr, ar, tmpbuf,
> +					 mop->size, access_key);
>   		break;
>   	}
>   
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index b46bcdb0cab1..5771b026fbc0 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -562,9 +562,12 @@ struct kvm_s390_mem_op {
>   	__u32 op;		/* type of operation */
>   	__u64 buf;		/* buffer in userspace */
>   	union {
> -		__u8 ar;	/* the access register number */
> +		struct {
> +			__u8 ar;	/* the access register number */
> +			__u8 key;	/* access key to use for storage key protection */
> +		};
>   		__u32 sida_offset; /* offset into the sida */
> -		__u8 reserved[32]; /* should be set to 0 */
> +		__u8 reserved[32]; /* must be set to 0 */
>   	};
>   };
Janis Schoetterl-Glausch Feb. 9, 2022, 8:49 a.m. UTC | #2
On 2/9/22 08:34, Christian Borntraeger wrote:
> Am 07.02.22 um 17:59 schrieb Janis Schoetterl-Glausch:
>> User space needs a mechanism to perform key checked accesses when
>> emulating instructions.
>>
>> The key can be passed as an additional argument.
>> Having an additional argument is flexible, as user space can
>> pass the guest PSW's key, in order to make an access the same way the
>> CPU would, or pass another key if necessary.
>>
>> Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
>> Acked-by: Janosch Frank <frankja@linux.ibm.com>
>> Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
>> ---
>>   arch/s390/kvm/kvm-s390.c | 49 +++++++++++++++++++++++++++++++---------
>>   include/uapi/linux/kvm.h |  8 +++++--
>>   2 files changed, 44 insertions(+), 13 deletions(-)
>>
>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>> index cf347e1a4f17..71e61fb3f0d9 100644
>> --- a/arch/s390/kvm/kvm-s390.c
>> +++ b/arch/s390/kvm/kvm-s390.c
>> @@ -32,6 +32,7 @@
>>   #include <linux/sched/signal.h>
>>   #include <linux/string.h>
>>   #include <linux/pgtable.h>
>> +#include <linux/bitfield.h>
>>     #include <asm/asm-offsets.h>
>>   #include <asm/lowcore.h>
>> @@ -2359,6 +2360,11 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
>>       return r;
>>   }
>>   +static bool access_key_invalid(u8 access_key)
>> +{
>> +    return access_key > 0xf;
>> +}
>> +
>>   long kvm_arch_vm_ioctl(struct file *filp,
>>                  unsigned int ioctl, unsigned long arg)
>>   {
>> @@ -4687,34 +4693,54 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>>                     struct kvm_s390_mem_op *mop)
>>   {
>>       void __user *uaddr = (void __user *)mop->buf;
>> +    u8 access_key = 0, ar = 0;
>>       void *tmpbuf = NULL;
>> +    bool check_reserved;
>>       int r = 0;
>>       const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
>> -                    | KVM_S390_MEMOP_F_CHECK_ONLY;
>> +                    | KVM_S390_MEMOP_F_CHECK_ONLY
>> +                    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
>>   -    if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
>> +    if (mop->flags & ~supported_flags || !mop->size)
>>           return -EINVAL;
>> -
>>       if (mop->size > MEM_OP_MAX_SIZE)
>>           return -E2BIG;
>> -
>>       if (kvm_s390_pv_cpu_is_protected(vcpu))
>>           return -EINVAL;
>> -
>>       if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
>>           tmpbuf = vmalloc(mop->size);
>>           if (!tmpbuf)
>>               return -ENOMEM;
>>       }
>> +    ar = mop->ar;
>> +    mop->ar = 0;
> 
> Why this assignment to 0?

It's so the check of reserved below works like that, they're all part of the anonymous union.
> 
>> +    if (ar >= NUM_ACRS)
>> +        return -EINVAL;
>> +    if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
>> +        access_key = mop->key;
>> +        mop->key = 0;
> 
> and this? I think we can leave mop unchanged.
> 
> In fact, why do we add the ar and access_key variable?
> This breaks the check from above (if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size))  into two checks
> and it will create a memleak for tmpbuf.

I can move the allocation down, goto out or get rid of the reserved check and keep everything as before.
First is simpler, but second makes handling that case more explicit and might help in the future.
Patch 6 has the same issue in the vm ioctl handler.
> 
> Simply use mop->key and mop->ar below and get rid of the local variables.
> The structure has no concurrency and gcc will handle that just as the local variable.
> 
> Other than that this looks good.

[...]
Christian Borntraeger Feb. 9, 2022, 9:08 a.m. UTC | #3
Am 09.02.22 um 09:49 schrieb Janis Schoetterl-Glausch:
> On 2/9/22 08:34, Christian Borntraeger wrote:
>> Am 07.02.22 um 17:59 schrieb Janis Schoetterl-Glausch:
>>> User space needs a mechanism to perform key checked accesses when
>>> emulating instructions.
>>>
>>> The key can be passed as an additional argument.
>>> Having an additional argument is flexible, as user space can
>>> pass the guest PSW's key, in order to make an access the same way the
>>> CPU would, or pass another key if necessary.
>>>
>>> Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
>>> Acked-by: Janosch Frank <frankja@linux.ibm.com>
>>> Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
>>> ---
>>>    arch/s390/kvm/kvm-s390.c | 49 +++++++++++++++++++++++++++++++---------
>>>    include/uapi/linux/kvm.h |  8 +++++--
>>>    2 files changed, 44 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>>> index cf347e1a4f17..71e61fb3f0d9 100644
>>> --- a/arch/s390/kvm/kvm-s390.c
>>> +++ b/arch/s390/kvm/kvm-s390.c
>>> @@ -32,6 +32,7 @@
>>>    #include <linux/sched/signal.h>
>>>    #include <linux/string.h>
>>>    #include <linux/pgtable.h>
>>> +#include <linux/bitfield.h>
>>>      #include <asm/asm-offsets.h>
>>>    #include <asm/lowcore.h>
>>> @@ -2359,6 +2360,11 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
>>>        return r;
>>>    }
>>>    +static bool access_key_invalid(u8 access_key)
>>> +{
>>> +    return access_key > 0xf;
>>> +}
>>> +
>>>    long kvm_arch_vm_ioctl(struct file *filp,
>>>                   unsigned int ioctl, unsigned long arg)
>>>    {
>>> @@ -4687,34 +4693,54 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>>>                      struct kvm_s390_mem_op *mop)
>>>    {
>>>        void __user *uaddr = (void __user *)mop->buf;
>>> +    u8 access_key = 0, ar = 0;
>>>        void *tmpbuf = NULL;
>>> +    bool check_reserved;
>>>        int r = 0;
>>>        const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
>>> -                    | KVM_S390_MEMOP_F_CHECK_ONLY;
>>> +                    | KVM_S390_MEMOP_F_CHECK_ONLY
>>> +                    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
>>>    -    if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
>>> +    if (mop->flags & ~supported_flags || !mop->size)
>>>            return -EINVAL;
>>> -
>>>        if (mop->size > MEM_OP_MAX_SIZE)
>>>            return -E2BIG;
>>> -
>>>        if (kvm_s390_pv_cpu_is_protected(vcpu))
>>>            return -EINVAL;
>>> -
>>>        if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
>>>            tmpbuf = vmalloc(mop->size);
>>>            if (!tmpbuf)
>>>                return -ENOMEM;
>>>        }
>>> +    ar = mop->ar;
>>> +    mop->ar = 0;
>>
>> Why this assignment to 0?
> 
> It's so the check of reserved below works like that, they're all part of the anonymous union.

Ah, I see. This is ugly :-)

>>
>>> +    if (ar >= NUM_ACRS)
>>> +        return -EINVAL;
>>> +    if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
>>> +        access_key = mop->key;
>>> +        mop->key = 0;
>>
>> and this? I think we can leave mop unchanged.
>>
>> In fact, why do we add the ar and access_key variable?
>> This breaks the check from above (if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size))  into two checks
>> and it will create a memleak for tmpbuf.
> 
> I can move the allocation down, goto out or get rid of the reserved check and keep everything as before.
> First is simpler, but second makes handling that case more explicit and might help in the future.

Maybe add a reserved_02 field in the anon struct and check this for being zero and get rid of the local variables?

> Patch 6 has the same issue in the vm ioctl handler.
>>
>> Simply use mop->key and mop->ar below and get rid of the local variables.
>> The structure has no concurrency and gcc will handle that just as the local variable.
>>
>> Other than that this looks good.
> 
> [...]
>
Christian Borntraeger Feb. 9, 2022, 9:34 a.m. UTC | #4
CC Konstantin,

I hope you can find the right people. Looks that my (and Janis) emaildid not make it to linux-s390 and kvm at vger lists.
Message-ID: <6ea27647-fbbe-3962-03a0-8ca5340fc7fd@linux.ibm.com>


Am 09.02.22 um 10:08 schrieb Christian Borntraeger:
> 
> 
> Am 09.02.22 um 09:49 schrieb Janis Schoetterl-Glausch:
>> On 2/9/22 08:34, Christian Borntraeger wrote:
>>> Am 07.02.22 um 17:59 schrieb Janis Schoetterl-Glausch:
>>>> User space needs a mechanism to perform key checked accesses when
>>>> emulating instructions.
>>>>
>>>> The key can be passed as an additional argument.
>>>> Having an additional argument is flexible, as user space can
>>>> pass the guest PSW's key, in order to make an access the same way the
>>>> CPU would, or pass another key if necessary.
>>>>
>>>> Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
>>>> Acked-by: Janosch Frank <frankja@linux.ibm.com>
>>>> Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
>>>> ---
>>>>    arch/s390/kvm/kvm-s390.c | 49 +++++++++++++++++++++++++++++++---------
>>>>    include/uapi/linux/kvm.h |  8 +++++--
>>>>    2 files changed, 44 insertions(+), 13 deletions(-)
>>>>
>>>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>>>> index cf347e1a4f17..71e61fb3f0d9 100644
>>>> --- a/arch/s390/kvm/kvm-s390.c
>>>> +++ b/arch/s390/kvm/kvm-s390.c
>>>> @@ -32,6 +32,7 @@
>>>>    #include <linux/sched/signal.h>
>>>>    #include <linux/string.h>
>>>>    #include <linux/pgtable.h>
>>>> +#include <linux/bitfield.h>
>>>>      #include <asm/asm-offsets.h>
>>>>    #include <asm/lowcore.h>
>>>> @@ -2359,6 +2360,11 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
>>>>        return r;
>>>>    }
>>>>    +static bool access_key_invalid(u8 access_key)
>>>> +{
>>>> +    return access_key > 0xf;
>>>> +}
>>>> +
>>>>    long kvm_arch_vm_ioctl(struct file *filp,
>>>>                   unsigned int ioctl, unsigned long arg)
>>>>    {
>>>> @@ -4687,34 +4693,54 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>>>>                      struct kvm_s390_mem_op *mop)
>>>>    {
>>>>        void __user *uaddr = (void __user *)mop->buf;
>>>> +    u8 access_key = 0, ar = 0;
>>>>        void *tmpbuf = NULL;
>>>> +    bool check_reserved;
>>>>        int r = 0;
>>>>        const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
>>>> -                    | KVM_S390_MEMOP_F_CHECK_ONLY;
>>>> +                    | KVM_S390_MEMOP_F_CHECK_ONLY
>>>> +                    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
>>>>    -    if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
>>>> +    if (mop->flags & ~supported_flags || !mop->size)
>>>>            return -EINVAL;
>>>> -
>>>>        if (mop->size > MEM_OP_MAX_SIZE)
>>>>            return -E2BIG;
>>>> -
>>>>        if (kvm_s390_pv_cpu_is_protected(vcpu))
>>>>            return -EINVAL;
>>>> -
>>>>        if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
>>>>            tmpbuf = vmalloc(mop->size);
>>>>            if (!tmpbuf)
>>>>                return -ENOMEM;
>>>>        }
>>>> +    ar = mop->ar;
>>>> +    mop->ar = 0;
>>>
>>> Why this assignment to 0?
>>
>> It's so the check of reserved below works like that, they're all part of the anonymous union.
> 
> Ah, I see. This is ugly :-)
> 
>>>
>>>> +    if (ar >= NUM_ACRS)
>>>> +        return -EINVAL;
>>>> +    if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
>>>> +        access_key = mop->key;
>>>> +        mop->key = 0;
>>>
>>> and this? I think we can leave mop unchanged.
>>>
>>> In fact, why do we add the ar and access_key variable?
>>> This breaks the check from above (if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size))  into two checks
>>> and it will create a memleak for tmpbuf.
>>
>> I can move the allocation down, goto out or get rid of the reserved check and keep everything as before.
>> First is simpler, but second makes handling that case more explicit and might help in the future.
> 
> Maybe add a reserved_02 field in the anon struct and check this for being zero and get rid of the local variables?
> 
>> Patch 6 has the same issue in the vm ioctl handler.
>>>
>>> Simply use mop->key and mop->ar below and get rid of the local variables.
>>> The structure has no concurrency and gcc will handle that just as the local variable.
>>>
>>> Other than that this looks good.
>>
>> [...]
>>
Janis Schoetterl-Glausch Feb. 9, 2022, 10:01 a.m. UTC | #5
On Wed, 2022-02-09 at 10:08 +0100, Christian Borntraeger wrote:
> 
> Am 09.02.22 um 09:49 schrieb Janis Schoetterl-Glausch:
> > On 2/9/22 08:34, Christian Borntraeger wrote:
> > > Am 07.02.22 um 17:59 schrieb Janis Schoetterl-Glausch:
> > > > User space needs a mechanism to perform key checked accesses when
> > > > emulating instructions.
> > > > 
> > > > The key can be passed as an additional argument.
> > > > Having an additional argument is flexible, as user space can
> > > > pass the guest PSW's key, in order to make an access the same way the
> > > > CPU would, or pass another key if necessary.
> > > > 
> > > > Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
> > > > Acked-by: Janosch Frank <frankja@linux.ibm.com>
> > > > Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> > > > ---
> > > >    arch/s390/kvm/kvm-s390.c | 49 +++++++++++++++++++++++++++++++---------
> > > >    include/uapi/linux/kvm.h |  8 +++++--
> > > >    2 files changed, 44 insertions(+), 13 deletions(-)
> > > > 
> > > > diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> > > > index cf347e1a4f17..71e61fb3f0d9 100644
> > > > --- a/arch/s390/kvm/kvm-s390.c
> > > > +++ b/arch/s390/kvm/kvm-s390.c
> > > > @@ -32,6 +32,7 @@
> > > >    #include <linux/sched/signal.h>
> > > >    #include <linux/string.h>
> > > >    #include <linux/pgtable.h>
> > > > +#include <linux/bitfield.h>
> > > >      #include <asm/asm-offsets.h>
> > > >    #include <asm/lowcore.h>
> > > > @@ -2359,6 +2360,11 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
> > > >        return r;
> > > >    }
> > > >    +static bool access_key_invalid(u8 access_key)
> > > > +{
> > > > +    return access_key > 0xf;
> > > > +}
> > > > +
> > > >    long kvm_arch_vm_ioctl(struct file *filp,
> > > >                   unsigned int ioctl, unsigned long arg)
> > > >    {
> > > > @@ -4687,34 +4693,54 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
> > > >                      struct kvm_s390_mem_op *mop)
> > > >    {
> > > >        void __user *uaddr = (void __user *)mop->buf;
> > > > +    u8 access_key = 0, ar = 0;
> > > >        void *tmpbuf = NULL;
> > > > +    bool check_reserved;
> > > >        int r = 0;
> > > >        const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
> > > > -                    | KVM_S390_MEMOP_F_CHECK_ONLY;
> > > > +                    | KVM_S390_MEMOP_F_CHECK_ONLY
> > > > +                    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
> > > >    -    if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
> > > > +    if (mop->flags & ~supported_flags || !mop->size)
> > > >            return -EINVAL;
> > > > -
> > > >        if (mop->size > MEM_OP_MAX_SIZE)
> > > >            return -E2BIG;
> > > > -
> > > >        if (kvm_s390_pv_cpu_is_protected(vcpu))
> > > >            return -EINVAL;
> > > > -
> > > >        if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
> > > >            tmpbuf = vmalloc(mop->size);
> > > >            if (!tmpbuf)
> > > >                return -ENOMEM;
> > > >        }
> > > > +    ar = mop->ar;
> > > > +    mop->ar = 0;
> > > 
> > > Why this assignment to 0?
> > 
> > It's so the check of reserved below works like that, they're all part of the anonymous union.
> 
> Ah, I see. This is ugly :-)

Yes :)
> 
> > > > +    if (ar >= NUM_ACRS)
> > > > +        return -EINVAL;
> > > > +    if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
> > > > +        access_key = mop->key;
> > > > +        mop->key = 0;
> > > 
> > > and this? I think we can leave mop unchanged.
> > > 
> > > In fact, why do we add the ar and access_key variable?
> > > This breaks the check from above (if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size))  into two checks
> > > and it will create a memleak for tmpbuf.
> > 
> > I can move the allocation down, goto out or get rid of the reserved check and keep everything as before.
> > First is simpler, but second makes handling that case more explicit and might help in the future.
> 
> Maybe add a reserved_02 field in the anon struct and check this for being zero and get rid of the local variables?

I think that would require us adding new fields in the struct by putting them in a union with reserved_02 and so on,
which could get rather messy.

Maybe a comment is good enought?
> 
> > Patch 6 has the same issue in the vm ioctl handler.
> > > Simply use mop->key and mop->ar below and get rid of the local variables.
> > > The structure has no concurrency and gcc will handle that just as the local variable.
> > > 
> > > Other than that this looks good.
> > 
> > [...]
> >
Christian Borntraeger Feb. 9, 2022, 10:08 a.m. UTC | #6
Am 09.02.22 um 11:01 schrieb Janis Schoetterl-Glausch:
> On Wed, 2022-02-09 at 10:08 +0100, Christian Borntraeger wrote:
>>
>> Am 09.02.22 um 09:49 schrieb Janis Schoetterl-Glausch:
>>> On 2/9/22 08:34, Christian Borntraeger wrote:
>>>> Am 07.02.22 um 17:59 schrieb Janis Schoetterl-Glausch:
>>>>> User space needs a mechanism to perform key checked accesses when
>>>>> emulating instructions.
>>>>>
>>>>> The key can be passed as an additional argument.
>>>>> Having an additional argument is flexible, as user space can
>>>>> pass the guest PSW's key, in order to make an access the same way the
>>>>> CPU would, or pass another key if necessary.
>>>>>
>>>>> Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
>>>>> Acked-by: Janosch Frank <frankja@linux.ibm.com>
>>>>> Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
>>>>> ---
>>>>>     arch/s390/kvm/kvm-s390.c | 49 +++++++++++++++++++++++++++++++---------
>>>>>     include/uapi/linux/kvm.h |  8 +++++--
>>>>>     2 files changed, 44 insertions(+), 13 deletions(-)
>>>>>
>>>>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>>>>> index cf347e1a4f17..71e61fb3f0d9 100644
>>>>> --- a/arch/s390/kvm/kvm-s390.c
>>>>> +++ b/arch/s390/kvm/kvm-s390.c
>>>>> @@ -32,6 +32,7 @@
>>>>>     #include <linux/sched/signal.h>
>>>>>     #include <linux/string.h>
>>>>>     #include <linux/pgtable.h>
>>>>> +#include <linux/bitfield.h>
>>>>>       #include <asm/asm-offsets.h>
>>>>>     #include <asm/lowcore.h>
>>>>> @@ -2359,6 +2360,11 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
>>>>>         return r;
>>>>>     }
>>>>>     +static bool access_key_invalid(u8 access_key)
>>>>> +{
>>>>> +    return access_key > 0xf;
>>>>> +}
>>>>> +
>>>>>     long kvm_arch_vm_ioctl(struct file *filp,
>>>>>                    unsigned int ioctl, unsigned long arg)
>>>>>     {
>>>>> @@ -4687,34 +4693,54 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>>>>>                       struct kvm_s390_mem_op *mop)
>>>>>     {
>>>>>         void __user *uaddr = (void __user *)mop->buf;
>>>>> +    u8 access_key = 0, ar = 0;
>>>>>         void *tmpbuf = NULL;
>>>>> +    bool check_reserved;
>>>>>         int r = 0;
>>>>>         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
>>>>> -                    | KVM_S390_MEMOP_F_CHECK_ONLY;
>>>>> +                    | KVM_S390_MEMOP_F_CHECK_ONLY
>>>>> +                    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
>>>>>     -    if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
>>>>> +    if (mop->flags & ~supported_flags || !mop->size)
>>>>>             return -EINVAL;
>>>>> -
>>>>>         if (mop->size > MEM_OP_MAX_SIZE)
>>>>>             return -E2BIG;
>>>>> -
>>>>>         if (kvm_s390_pv_cpu_is_protected(vcpu))
>>>>>             return -EINVAL;
>>>>> -
>>>>>         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
>>>>>             tmpbuf = vmalloc(mop->size);
>>>>>             if (!tmpbuf)
>>>>>                 return -ENOMEM;
>>>>>         }
>>>>> +    ar = mop->ar;
>>>>> +    mop->ar = 0;
>>>>
>>>> Why this assignment to 0?
>>>
>>> It's so the check of reserved below works like that, they're all part of the anonymous union.
>>
>> Ah, I see. This is ugly :-)
> 
> Yes :)
>>
>>>>> +    if (ar >= NUM_ACRS)
>>>>> +        return -EINVAL;
>>>>> +    if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
>>>>> +        access_key = mop->key;
>>>>> +        mop->key = 0;
>>>>
>>>> and this? I think we can leave mop unchanged.
>>>>
>>>> In fact, why do we add the ar and access_key variable?
>>>> This breaks the check from above (if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size))  into two checks
>>>> and it will create a memleak for tmpbuf.
>>>
>>> I can move the allocation down, goto out or get rid of the reserved check and keep everything as before.
>>> First is simpler, but second makes handling that case more explicit and might help in the future.
>>
>> Maybe add a reserved_02 field in the anon struct and check this for being zero and get rid of the local variables?
> 
> I think that would require us adding new fields in the struct by putting them in a union with reserved_02 and so on,
> which could get rather messy.

I think it is fine to rename reserved_02. Maybe rename that to dont_use_02 ?
> 
> Maybe a comment is good enought?
>>
>>> Patch 6 has the same issue in the vm ioctl handler.
>>>> Simply use mop->key and mop->ar below and get rid of the local variables.
>>>> The structure has no concurrency and gcc will handle that just as the local variable.
>>>>
>>>> Other than that this looks good.
>>>
>>> [...]
>>>
>
Janis Schoetterl-Glausch Feb. 9, 2022, 10:39 a.m. UTC | #7
On 2/9/22 11:08, Christian Borntraeger wrote:
> 
> 
> Am 09.02.22 um 11:01 schrieb Janis Schoetterl-Glausch:
>> On Wed, 2022-02-09 at 10:08 +0100, Christian Borntraeger wrote:
>>>
>>> Am 09.02.22 um 09:49 schrieb Janis Schoetterl-Glausch:
>>>> On 2/9/22 08:34, Christian Borntraeger wrote:
>>>>> Am 07.02.22 um 17:59 schrieb Janis Schoetterl-Glausch:
>>>>>> User space needs a mechanism to perform key checked accesses when
>>>>>> emulating instructions.
>>>>>>
>>>>>> The key can be passed as an additional argument.
>>>>>> Having an additional argument is flexible, as user space can
>>>>>> pass the guest PSW's key, in order to make an access the same way the
>>>>>> CPU would, or pass another key if necessary.
>>>>>>
>>>>>> Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
>>>>>> Acked-by: Janosch Frank <frankja@linux.ibm.com>
>>>>>> Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
>>>>>> ---
>>>>>>     arch/s390/kvm/kvm-s390.c | 49 +++++++++++++++++++++++++++++++---------
>>>>>>     include/uapi/linux/kvm.h |  8 +++++--
>>>>>>     2 files changed, 44 insertions(+), 13 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>>>>>> index cf347e1a4f17..71e61fb3f0d9 100644
>>>>>> --- a/arch/s390/kvm/kvm-s390.c
>>>>>> +++ b/arch/s390/kvm/kvm-s390.c
>>>>>> @@ -32,6 +32,7 @@
>>>>>>     #include <linux/sched/signal.h>
>>>>>>     #include <linux/string.h>
>>>>>>     #include <linux/pgtable.h>
>>>>>> +#include <linux/bitfield.h>
>>>>>>       #include <asm/asm-offsets.h>
>>>>>>     #include <asm/lowcore.h>
>>>>>> @@ -2359,6 +2360,11 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
>>>>>>         return r;
>>>>>>     }
>>>>>>     +static bool access_key_invalid(u8 access_key)
>>>>>> +{
>>>>>> +    return access_key > 0xf;
>>>>>> +}
>>>>>> +
>>>>>>     long kvm_arch_vm_ioctl(struct file *filp,
>>>>>>                    unsigned int ioctl, unsigned long arg)
>>>>>>     {
>>>>>> @@ -4687,34 +4693,54 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>>>>>>                       struct kvm_s390_mem_op *mop)
>>>>>>     {
>>>>>>         void __user *uaddr = (void __user *)mop->buf;
>>>>>> +    u8 access_key = 0, ar = 0;
>>>>>>         void *tmpbuf = NULL;
>>>>>> +    bool check_reserved;
>>>>>>         int r = 0;
>>>>>>         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
>>>>>> -                    | KVM_S390_MEMOP_F_CHECK_ONLY;
>>>>>> +                    | KVM_S390_MEMOP_F_CHECK_ONLY
>>>>>> +                    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
>>>>>>     -    if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
>>>>>> +    if (mop->flags & ~supported_flags || !mop->size)
>>>>>>             return -EINVAL;
>>>>>> -
>>>>>>         if (mop->size > MEM_OP_MAX_SIZE)
>>>>>>             return -E2BIG;
>>>>>> -
>>>>>>         if (kvm_s390_pv_cpu_is_protected(vcpu))
>>>>>>             return -EINVAL;
>>>>>> -
>>>>>>         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
>>>>>>             tmpbuf = vmalloc(mop->size);
>>>>>>             if (!tmpbuf)
>>>>>>                 return -ENOMEM;
>>>>>>         }
>>>>>> +    ar = mop->ar;
>>>>>> +    mop->ar = 0;
>>>>>
>>>>> Why this assignment to 0?
>>>>
>>>> It's so the check of reserved below works like that, they're all part of the anonymous union.
>>>
>>> Ah, I see. This is ugly :-)
>>
>> Yes :)
>>>
>>>>>> +    if (ar >= NUM_ACRS)
>>>>>> +        return -EINVAL;
>>>>>> +    if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
>>>>>> +        access_key = mop->key;
>>>>>> +        mop->key = 0;
>>>>>
>>>>> and this? I think we can leave mop unchanged.
>>>>>
>>>>> In fact, why do we add the ar and access_key variable?
>>>>> This breaks the check from above (if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size))  into two checks
>>>>> and it will create a memleak for tmpbuf.
>>>>
>>>> I can move the allocation down, goto out or get rid of the reserved check and keep everything as before.
>>>> First is simpler, but second makes handling that case more explicit and might help in the future.
>>>
>>> Maybe add a reserved_02 field in the anon struct and check this for being zero and get rid of the local variables?
>>
>> I think that would require us adding new fields in the struct by putting them in a union with reserved_02 and so on,
>> which could get rather messy.
> 
> I think it is fine to rename reserved_02. Maybe rename that to dont_use_02 ?

I don't know what kind of stability guarantees we give here, since it can only happen when recompiling with
a new header. dont_use is a lot better than reserved here, after all we tell user space to set
reserved bytes to 0, using reserved_02 to do that would be quite handy and therefore likely.

The question is also what semantic we want for the check.
The way it works right now, user space also needs to set unused fields to 0, e.g. key if the flag is not set.
At least this is the case for the vm memop, the vcpu memop cannot do that because of backward compatibility.
>>
>> Maybe a comment is good enought?
>>>
>>>> Patch 6 has the same issue in the vm ioctl handler.
>>>>> Simply use mop->key and mop->ar below and get rid of the local variables.
>>>>> The structure has no concurrency and gcc will handle that just as the local variable.
>>>>>
>>>>> Other than that this looks good.
>>>>
>>>> [...]
>>>>
>>
Christian Borntraeger Feb. 9, 2022, 10:48 a.m. UTC | #8
Am 09.02.22 um 11:39 schrieb Janis Schoetterl-Glausch:
> On 2/9/22 11:08, Christian Borntraeger wrote:
>>
>>
>> Am 09.02.22 um 11:01 schrieb Janis Schoetterl-Glausch:
>>> On Wed, 2022-02-09 at 10:08 +0100, Christian Borntraeger wrote:
>>>>
>>>> Am 09.02.22 um 09:49 schrieb Janis Schoetterl-Glausch:
>>>>> On 2/9/22 08:34, Christian Borntraeger wrote:
>>>>>> Am 07.02.22 um 17:59 schrieb Janis Schoetterl-Glausch:
>>>>>>> User space needs a mechanism to perform key checked accesses when
>>>>>>> emulating instructions.
>>>>>>>
>>>>>>> The key can be passed as an additional argument.
>>>>>>> Having an additional argument is flexible, as user space can
>>>>>>> pass the guest PSW's key, in order to make an access the same way the
>>>>>>> CPU would, or pass another key if necessary.
>>>>>>>
>>>>>>> Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
>>>>>>> Acked-by: Janosch Frank <frankja@linux.ibm.com>
>>>>>>> Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
>>>>>>> ---
>>>>>>>      arch/s390/kvm/kvm-s390.c | 49 +++++++++++++++++++++++++++++++---------
>>>>>>>      include/uapi/linux/kvm.h |  8 +++++--
>>>>>>>      2 files changed, 44 insertions(+), 13 deletions(-)
>>>>>>>
>>>>>>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>>>>>>> index cf347e1a4f17..71e61fb3f0d9 100644
>>>>>>> --- a/arch/s390/kvm/kvm-s390.c
>>>>>>> +++ b/arch/s390/kvm/kvm-s390.c
>>>>>>> @@ -32,6 +32,7 @@
>>>>>>>      #include <linux/sched/signal.h>
>>>>>>>      #include <linux/string.h>
>>>>>>>      #include <linux/pgtable.h>
>>>>>>> +#include <linux/bitfield.h>
>>>>>>>        #include <asm/asm-offsets.h>
>>>>>>>      #include <asm/lowcore.h>
>>>>>>> @@ -2359,6 +2360,11 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
>>>>>>>          return r;
>>>>>>>      }
>>>>>>>      +static bool access_key_invalid(u8 access_key)
>>>>>>> +{
>>>>>>> +    return access_key > 0xf;
>>>>>>> +}
>>>>>>> +
>>>>>>>      long kvm_arch_vm_ioctl(struct file *filp,
>>>>>>>                     unsigned int ioctl, unsigned long arg)
>>>>>>>      {
>>>>>>> @@ -4687,34 +4693,54 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>>>>>>>                        struct kvm_s390_mem_op *mop)
>>>>>>>      {
>>>>>>>          void __user *uaddr = (void __user *)mop->buf;
>>>>>>> +    u8 access_key = 0, ar = 0;
>>>>>>>          void *tmpbuf = NULL;
>>>>>>> +    bool check_reserved;
>>>>>>>          int r = 0;
>>>>>>>          const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
>>>>>>> -                    | KVM_S390_MEMOP_F_CHECK_ONLY;
>>>>>>> +                    | KVM_S390_MEMOP_F_CHECK_ONLY
>>>>>>> +                    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
>>>>>>>      -    if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
>>>>>>> +    if (mop->flags & ~supported_flags || !mop->size)
>>>>>>>              return -EINVAL;
>>>>>>> -
>>>>>>>          if (mop->size > MEM_OP_MAX_SIZE)
>>>>>>>              return -E2BIG;
>>>>>>> -
>>>>>>>          if (kvm_s390_pv_cpu_is_protected(vcpu))
>>>>>>>              return -EINVAL;
>>>>>>> -
>>>>>>>          if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
>>>>>>>              tmpbuf = vmalloc(mop->size);
>>>>>>>              if (!tmpbuf)
>>>>>>>                  return -ENOMEM;
>>>>>>>          }
>>>>>>> +    ar = mop->ar;
>>>>>>> +    mop->ar = 0;
>>>>>>
>>>>>> Why this assignment to 0?
>>>>>
>>>>> It's so the check of reserved below works like that, they're all part of the anonymous union.
>>>>
>>>> Ah, I see. This is ugly :-)
>>>
>>> Yes :)
>>>>
>>>>>>> +    if (ar >= NUM_ACRS)
>>>>>>> +        return -EINVAL;
>>>>>>> +    if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
>>>>>>> +        access_key = mop->key;
>>>>>>> +        mop->key = 0;
>>>>>>
>>>>>> and this? I think we can leave mop unchanged.
>>>>>>
>>>>>> In fact, why do we add the ar and access_key variable?
>>>>>> This breaks the check from above (if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size))  into two checks
>>>>>> and it will create a memleak for tmpbuf.
>>>>>
>>>>> I can move the allocation down, goto out or get rid of the reserved check and keep everything as before.
>>>>> First is simpler, but second makes handling that case more explicit and might help in the future.
>>>>
>>>> Maybe add a reserved_02 field in the anon struct and check this for being zero and get rid of the local variables?
>>>
>>> I think that would require us adding new fields in the struct by putting them in a union with reserved_02 and so on,
>>> which could get rather messy.
>>
>> I think it is fine to rename reserved_02. Maybe rename that to dont_use_02 ?
> 
> I don't know what kind of stability guarantees we give here, since it can only happen when recompiling with
> a new header. dont_use is a lot better than reserved here, after all we tell user space to set
> reserved bytes to 0, using reserved_02 to do that would be quite handy and therefore likely.
> 
> The question is also what semantic we want for the check.
> The way it works right now, user space also needs to set unused fields to 0, e.g. key if the flag is not set.
> At least this is the case for the vm memop, the vcpu memop cannot do that because of backward compatibility.

As an alternative just remove the check for reserved == 0 and do that later on as an add-on patch?
Janis Schoetterl-Glausch Feb. 9, 2022, 11:04 a.m. UTC | #9
On Wed, 2022-02-09 at 11:48 +0100, Christian Borntraeger wrote:
> 
> Am 09.02.22 um 11:39 schrieb Janis Schoetterl-Glausch:
> > On 2/9/22 11:08, Christian Borntraeger wrote:
> > > 
> > > Am 09.02.22 um 11:01 schrieb Janis Schoetterl-Glausch:
> > > > On Wed, 2022-02-09 at 10:08 +0100, Christian Borntraeger wrote:
> > > > > Am 09.02.22 um 09:49 schrieb Janis Schoetterl-Glausch:
> > > > > > On 2/9/22 08:34, Christian Borntraeger wrote:
> > > > > > > Am 07.02.22 um 17:59 schrieb Janis Schoetterl-Glausch:
> > > > > > > > User space needs a mechanism to perform key checked accesses when
> > > > > > > > emulating instructions.
> > > > > > > > 
> > > > > > > > The key can be passed as an additional argument.
> > > > > > > > Having an additional argument is flexible, as user space can
> > > > > > > > pass the guest PSW's key, in order to make an access the same way the
> > > > > > > > CPU would, or pass another key if necessary.
> > > > > > > > 
> > > > > > > > Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
> > > > > > > > Acked-by: Janosch Frank <frankja@linux.ibm.com>
> > > > > > > > Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> > > > > > > > ---
> > > > > > > >      arch/s390/kvm/kvm-s390.c | 49 +++++++++++++++++++++++++++++++---------
> > > > > > > >      include/uapi/linux/kvm.h |  8 +++++--
> > > > > > > >      2 files changed, 44 insertions(+), 13 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> > > > > > > > index cf347e1a4f17..71e61fb3f0d9 100644
> > > > > > > > --- a/arch/s390/kvm/kvm-s390.c
> > > > > > > > +++ b/arch/s390/kvm/kvm-s390.c
> > > > > > > > @@ -32,6 +32,7 @@
> > > > > > > >      #include <linux/sched/signal.h>
> > > > > > > >      #include <linux/string.h>
> > > > > > > >      #include <linux/pgtable.h>
> > > > > > > > +#include <linux/bitfield.h>
> > > > > > > >        #include <asm/asm-offsets.h>
> > > > > > > >      #include <asm/lowcore.h>
> > > > > > > > @@ -2359,6 +2360,11 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
> > > > > > > >          return r;
> > > > > > > >      }
> > > > > > > >      +static bool access_key_invalid(u8 access_key)
> > > > > > > > +{
> > > > > > > > +    return access_key > 0xf;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >      long kvm_arch_vm_ioctl(struct file *filp,
> > > > > > > >                     unsigned int ioctl, unsigned long arg)
> > > > > > > >      {
> > > > > > > > @@ -4687,34 +4693,54 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
> > > > > > > >                        struct kvm_s390_mem_op *mop)
> > > > > > > >      {
> > > > > > > >          void __user *uaddr = (void __user *)mop->buf;
> > > > > > > > +    u8 access_key = 0, ar = 0;
> > > > > > > >          void *tmpbuf = NULL;
> > > > > > > > +    bool check_reserved;
> > > > > > > >          int r = 0;
> > > > > > > >          const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
> > > > > > > > -                    | KVM_S390_MEMOP_F_CHECK_ONLY;
> > > > > > > > +                    | KVM_S390_MEMOP_F_CHECK_ONLY
> > > > > > > > +                    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
> > > > > > > >      -    if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
> > > > > > > > +    if (mop->flags & ~supported_flags || !mop->size)
> > > > > > > >              return -EINVAL;
> > > > > > > > -
> > > > > > > >          if (mop->size > MEM_OP_MAX_SIZE)
> > > > > > > >              return -E2BIG;
> > > > > > > > -
> > > > > > > >          if (kvm_s390_pv_cpu_is_protected(vcpu))
> > > > > > > >              return -EINVAL;
> > > > > > > > -
> > > > > > > >          if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
> > > > > > > >              tmpbuf = vmalloc(mop->size);
> > > > > > > >              if (!tmpbuf)
> > > > > > > >                  return -ENOMEM;
> > > > > > > >          }
> > > > > > > > +    ar = mop->ar;
> > > > > > > > +    mop->ar = 0;
> > > > > > > 
> > > > > > > Why this assignment to 0?
> > > > > > 
> > > > > > It's so the check of reserved below works like that, they're all part of the anonymous union.
> > > > > 
> > > > > Ah, I see. This is ugly :-)
> > > > 
> > > > Yes :)
> > > > > > > > +    if (ar >= NUM_ACRS)
> > > > > > > > +        return -EINVAL;
> > > > > > > > +    if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
> > > > > > > > +        access_key = mop->key;
> > > > > > > > +        mop->key = 0;
> > > > > > > 
> > > > > > > and this? I think we can leave mop unchanged.
> > > > > > > 
> > > > > > > In fact, why do we add the ar and access_key variable?
> > > > > > > This breaks the check from above (if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size))  into two checks
> > > > > > > and it will create a memleak for tmpbuf.
> > > > > > 
> > > > > > I can move the allocation down, goto out or get rid of the reserved check and keep everything as before.
> > > > > > First is simpler, but second makes handling that case more explicit and might help in the future.
> > > > > 
> > > > > Maybe add a reserved_02 field in the anon struct and check this for being zero and get rid of the local variables?
> > > > 
> > > > I think that would require us adding new fields in the struct by putting them in a union with reserved_02 and so on,
> > > > which could get rather messy.
> > > 
> > > I think it is fine to rename reserved_02. Maybe rename that to dont_use_02 ?
> > 
> > I don't know what kind of stability guarantees we give here, since it can only happen when recompiling with
> > a new header. dont_use is a lot better than reserved here, after all we tell user space to set
> > reserved bytes to 0, using reserved_02 to do that would be quite handy and therefore likely.
> > 
> > The question is also what semantic we want for the check.
> > The way it works right now, user space also needs to set unused fields to 0, e.g. key if the flag is not set.
> > At least this is the case for the vm memop, the vcpu memop cannot do that because of backward compatibility.
> 
> As an alternative just remove the check for reserved == 0 and do that later on as an add-on patch?

That would kinda defeat the purpose of the check, since misbehaving user space programs would
get an error then but not now.
Christian Borntraeger Feb. 9, 2022, 12:11 p.m. UTC | #10
Am 09.02.22 um 12:04 schrieb Janis Schoetterl-Glausch:
> On Wed, 2022-02-09 at 11:48 +0100, Christian Borntraeger wrote:
>>
>> Am 09.02.22 um 11:39 schrieb Janis Schoetterl-Glausch:
>>> On 2/9/22 11:08, Christian Borntraeger wrote:
>>>>
>>>> Am 09.02.22 um 11:01 schrieb Janis Schoetterl-Glausch:
>>>>> On Wed, 2022-02-09 at 10:08 +0100, Christian Borntraeger wrote:
>>>>>> Am 09.02.22 um 09:49 schrieb Janis Schoetterl-Glausch:
>>>>>>> On 2/9/22 08:34, Christian Borntraeger wrote:
>>>>>>>> Am 07.02.22 um 17:59 schrieb Janis Schoetterl-Glausch:
>>>>>>>>> User space needs a mechanism to perform key checked accesses when
>>>>>>>>> emulating instructions.
>>>>>>>>>
>>>>>>>>> The key can be passed as an additional argument.
>>>>>>>>> Having an additional argument is flexible, as user space can
>>>>>>>>> pass the guest PSW's key, in order to make an access the same way the
>>>>>>>>> CPU would, or pass another key if necessary.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
>>>>>>>>> Acked-by: Janosch Frank <frankja@linux.ibm.com>
>>>>>>>>> Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
>>>>>>>>> ---
>>>>>>>>>       arch/s390/kvm/kvm-s390.c | 49 +++++++++++++++++++++++++++++++---------
>>>>>>>>>       include/uapi/linux/kvm.h |  8 +++++--
>>>>>>>>>       2 files changed, 44 insertions(+), 13 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>>>>>>>>> index cf347e1a4f17..71e61fb3f0d9 100644
>>>>>>>>> --- a/arch/s390/kvm/kvm-s390.c
>>>>>>>>> +++ b/arch/s390/kvm/kvm-s390.c
>>>>>>>>> @@ -32,6 +32,7 @@
>>>>>>>>>       #include <linux/sched/signal.h>
>>>>>>>>>       #include <linux/string.h>
>>>>>>>>>       #include <linux/pgtable.h>
>>>>>>>>> +#include <linux/bitfield.h>
>>>>>>>>>         #include <asm/asm-offsets.h>
>>>>>>>>>       #include <asm/lowcore.h>
>>>>>>>>> @@ -2359,6 +2360,11 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
>>>>>>>>>           return r;
>>>>>>>>>       }
>>>>>>>>>       +static bool access_key_invalid(u8 access_key)
>>>>>>>>> +{
>>>>>>>>> +    return access_key > 0xf;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>       long kvm_arch_vm_ioctl(struct file *filp,
>>>>>>>>>                      unsigned int ioctl, unsigned long arg)
>>>>>>>>>       {
>>>>>>>>> @@ -4687,34 +4693,54 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>>>>>>>>>                         struct kvm_s390_mem_op *mop)
>>>>>>>>>       {
>>>>>>>>>           void __user *uaddr = (void __user *)mop->buf;
>>>>>>>>> +    u8 access_key = 0, ar = 0;
>>>>>>>>>           void *tmpbuf = NULL;
>>>>>>>>> +    bool check_reserved;
>>>>>>>>>           int r = 0;
>>>>>>>>>           const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
>>>>>>>>> -                    | KVM_S390_MEMOP_F_CHECK_ONLY;
>>>>>>>>> +                    | KVM_S390_MEMOP_F_CHECK_ONLY
>>>>>>>>> +                    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
>>>>>>>>>       -    if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
>>>>>>>>> +    if (mop->flags & ~supported_flags || !mop->size)
>>>>>>>>>               return -EINVAL;
>>>>>>>>> -
>>>>>>>>>           if (mop->size > MEM_OP_MAX_SIZE)
>>>>>>>>>               return -E2BIG;
>>>>>>>>> -
>>>>>>>>>           if (kvm_s390_pv_cpu_is_protected(vcpu))
>>>>>>>>>               return -EINVAL;
>>>>>>>>> -
>>>>>>>>>           if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
>>>>>>>>>               tmpbuf = vmalloc(mop->size);
>>>>>>>>>               if (!tmpbuf)
>>>>>>>>>                   return -ENOMEM;
>>>>>>>>>           }
>>>>>>>>> +    ar = mop->ar;
>>>>>>>>> +    mop->ar = 0;
>>>>>>>>
>>>>>>>> Why this assignment to 0?
>>>>>>>
>>>>>>> It's so the check of reserved below works like that, they're all part of the anonymous union.
>>>>>>
>>>>>> Ah, I see. This is ugly :-)
>>>>>
>>>>> Yes :)
>>>>>>>>> +    if (ar >= NUM_ACRS)
>>>>>>>>> +        return -EINVAL;
>>>>>>>>> +    if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
>>>>>>>>> +        access_key = mop->key;
>>>>>>>>> +        mop->key = 0;
>>>>>>>>
>>>>>>>> and this? I think we can leave mop unchanged.
>>>>>>>>
>>>>>>>> In fact, why do we add the ar and access_key variable?
>>>>>>>> This breaks the check from above (if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size))  into two checks
>>>>>>>> and it will create a memleak for tmpbuf.
>>>>>>>
>>>>>>> I can move the allocation down, goto out or get rid of the reserved check and keep everything as before.
>>>>>>> First is simpler, but second makes handling that case more explicit and might help in the future.
>>>>>>
>>>>>> Maybe add a reserved_02 field in the anon struct and check this for being zero and get rid of the local variables?
>>>>>
>>>>> I think that would require us adding new fields in the struct by putting them in a union with reserved_02 and so on,
>>>>> which could get rather messy.
>>>>
>>>> I think it is fine to rename reserved_02. Maybe rename that to dont_use_02 ?
>>>
>>> I don't know what kind of stability guarantees we give here, since it can only happen when recompiling with
>>> a new header. dont_use is a lot better than reserved here, after all we tell user space to set
>>> reserved bytes to 0, using reserved_02 to do that would be quite handy and therefore likely.
>>>
>>> The question is also what semantic we want for the check.
>>> The way it works right now, user space also needs to set unused fields to 0, e.g. key if the flag is not set.
>>> At least this is the case for the vm memop, the vcpu memop cannot do that because of backward compatibility.
>>
>> As an alternative just remove the check for reserved == 0 and do that later on as an add-on patch?
> 
> That would kinda defeat the purpose of the check, since misbehaving user space programs would
> get an error then but not now.


As a matter of fact, we do not check today. What about the following.
1. remove the checkreserved logic. its too complicated
2. do not check for reserved to be zero
4. state that the reserved fields are ignored without the appropriate flag
5. add the necessary flag as comment to the fields
6. check for unkmown flags and bail out
Janis Schoetterl-Glausch Feb. 9, 2022, 1:08 p.m. UTC | #11
On 2/9/22 13:11, Christian Borntraeger wrote:
> 
> 
> Am 09.02.22 um 12:04 schrieb Janis Schoetterl-Glausch:
>> On Wed, 2022-02-09 at 11:48 +0100, Christian Borntraeger wrote:
>>>
>>> Am 09.02.22 um 11:39 schrieb Janis Schoetterl-Glausch:
>>>> On 2/9/22 11:08, Christian Borntraeger wrote:
>>>>>
>>>>> Am 09.02.22 um 11:01 schrieb Janis Schoetterl-Glausch:
>>>>>> On Wed, 2022-02-09 at 10:08 +0100, Christian Borntraeger wrote:
>>>>>>> Am 09.02.22 um 09:49 schrieb Janis Schoetterl-Glausch:
>>>>>>>> On 2/9/22 08:34, Christian Borntraeger wrote:
>>>>>>>>> Am 07.02.22 um 17:59 schrieb Janis Schoetterl-Glausch:
>>>>>>>>>> User space needs a mechanism to perform key checked accesses when
>>>>>>>>>> emulating instructions.
>>>>>>>>>>
>>>>>>>>>> The key can be passed as an additional argument.
>>>>>>>>>> Having an additional argument is flexible, as user space can
>>>>>>>>>> pass the guest PSW's key, in order to make an access the same way the
>>>>>>>>>> CPU would, or pass another key if necessary.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
>>>>>>>>>> Acked-by: Janosch Frank <frankja@linux.ibm.com>
>>>>>>>>>> Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
>>>>>>>>>> ---
>>>>>>>>>>       arch/s390/kvm/kvm-s390.c | 49 +++++++++++++++++++++++++++++++---------
>>>>>>>>>>       include/uapi/linux/kvm.h |  8 +++++--
>>>>>>>>>>       2 files changed, 44 insertions(+), 13 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>>>>>>>>>> index cf347e1a4f17..71e61fb3f0d9 100644
>>>>>>>>>> --- a/arch/s390/kvm/kvm-s390.c
>>>>>>>>>> +++ b/arch/s390/kvm/kvm-s390.c
>>>>>>>>>> @@ -32,6 +32,7 @@
>>>>>>>>>>       #include <linux/sched/signal.h>
>>>>>>>>>>       #include <linux/string.h>
>>>>>>>>>>       #include <linux/pgtable.h>
>>>>>>>>>> +#include <linux/bitfield.h>
>>>>>>>>>>         #include <asm/asm-offsets.h>
>>>>>>>>>>       #include <asm/lowcore.h>
>>>>>>>>>> @@ -2359,6 +2360,11 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
>>>>>>>>>>           return r;
>>>>>>>>>>       }
>>>>>>>>>>       +static bool access_key_invalid(u8 access_key)
>>>>>>>>>> +{
>>>>>>>>>> +    return access_key > 0xf;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>>       long kvm_arch_vm_ioctl(struct file *filp,
>>>>>>>>>>                      unsigned int ioctl, unsigned long arg)
>>>>>>>>>>       {
>>>>>>>>>> @@ -4687,34 +4693,54 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>>>>>>>>>>                         struct kvm_s390_mem_op *mop)
>>>>>>>>>>       {
>>>>>>>>>>           void __user *uaddr = (void __user *)mop->buf;
>>>>>>>>>> +    u8 access_key = 0, ar = 0;
>>>>>>>>>>           void *tmpbuf = NULL;
>>>>>>>>>> +    bool check_reserved;
>>>>>>>>>>           int r = 0;
>>>>>>>>>>           const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
>>>>>>>>>> -                    | KVM_S390_MEMOP_F_CHECK_ONLY;
>>>>>>>>>> +                    | KVM_S390_MEMOP_F_CHECK_ONLY
>>>>>>>>>> +                    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
>>>>>>>>>>       -    if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
>>>>>>>>>> +    if (mop->flags & ~supported_flags || !mop->size)
>>>>>>>>>>               return -EINVAL;
>>>>>>>>>> -
>>>>>>>>>>           if (mop->size > MEM_OP_MAX_SIZE)
>>>>>>>>>>               return -E2BIG;
>>>>>>>>>> -
>>>>>>>>>>           if (kvm_s390_pv_cpu_is_protected(vcpu))
>>>>>>>>>>               return -EINVAL;
>>>>>>>>>> -
>>>>>>>>>>           if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
>>>>>>>>>>               tmpbuf = vmalloc(mop->size);
>>>>>>>>>>               if (!tmpbuf)
>>>>>>>>>>                   return -ENOMEM;
>>>>>>>>>>           }
>>>>>>>>>> +    ar = mop->ar;
>>>>>>>>>> +    mop->ar = 0;
>>>>>>>>>
>>>>>>>>> Why this assignment to 0?
>>>>>>>>
>>>>>>>> It's so the check of reserved below works like that, they're all part of the anonymous union.
>>>>>>>
>>>>>>> Ah, I see. This is ugly :-)
>>>>>>
>>>>>> Yes :)
>>>>>>>>>> +    if (ar >= NUM_ACRS)
>>>>>>>>>> +        return -EINVAL;
>>>>>>>>>> +    if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
>>>>>>>>>> +        access_key = mop->key;
>>>>>>>>>> +        mop->key = 0;
>>>>>>>>>
>>>>>>>>> and this? I think we can leave mop unchanged.
>>>>>>>>>
>>>>>>>>> In fact, why do we add the ar and access_key variable?
>>>>>>>>> This breaks the check from above (if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size))  into two checks
>>>>>>>>> and it will create a memleak for tmpbuf.
>>>>>>>>
>>>>>>>> I can move the allocation down, goto out or get rid of the reserved check and keep everything as before.
>>>>>>>> First is simpler, but second makes handling that case more explicit and might help in the future.
>>>>>>>
>>>>>>> Maybe add a reserved_02 field in the anon struct and check this for being zero and get rid of the local variables?
>>>>>>
>>>>>> I think that would require us adding new fields in the struct by putting them in a union with reserved_02 and so on,
>>>>>> which could get rather messy.
>>>>>
>>>>> I think it is fine to rename reserved_02. Maybe rename that to dont_use_02 ?
>>>>
>>>> I don't know what kind of stability guarantees we give here, since it can only happen when recompiling with
>>>> a new header. dont_use is a lot better than reserved here, after all we tell user space to set
>>>> reserved bytes to 0, using reserved_02 to do that would be quite handy and therefore likely.
>>>>
>>>> The question is also what semantic we want for the check.
>>>> The way it works right now, user space also needs to set unused fields to 0, e.g. key if the flag is not set.
>>>> At least this is the case for the vm memop, the vcpu memop cannot do that because of backward compatibility.
>>>
>>> As an alternative just remove the check for reserved == 0 and do that later on as an add-on patch?
>>
>> That would kinda defeat the purpose of the check, since misbehaving user space programs would
>> get an error then but not now.
> 
> 
> As a matter of fact, we do not check today. What about the following.

We don't do it for the vcpu memop, but since we're newly introducing the vm memop we are free to decide what we want.
It's purely about future proofing, e.g. we would have had the possibility to add the key checking feature without a flag,
if the existing memop did the check. Committing ourselves to always adding a flag is fine by me, but I don't like the
previous state of affairs, where user space should set reserved bytes to 0 but it's not enforced.

> 1. remove the checkreserved logic. its too complicated
> 2. do not check for reserved to be zero
> 4. state that the reserved fields are ignored without the appropriate flag
> 5. add the necessary flag as comment to the fields
> 6. check for unkmown flags and bail out

I'll implement this, except maybe 5, since the documentation covers that and the availability of the flags themselves
is conditional on other factors.
Christian Borntraeger Feb. 9, 2022, 1:11 p.m. UTC | #12
Am 09.02.22 um 14:08 schrieb Janis Schoetterl-Glausch:
> On 2/9/22 13:11, Christian Borntraeger wrote:
>>
>>
>> Am 09.02.22 um 12:04 schrieb Janis Schoetterl-Glausch:
>>> On Wed, 2022-02-09 at 11:48 +0100, Christian Borntraeger wrote:
>>>>
>>>> Am 09.02.22 um 11:39 schrieb Janis Schoetterl-Glausch:
>>>>> On 2/9/22 11:08, Christian Borntraeger wrote:
>>>>>>
>>>>>> Am 09.02.22 um 11:01 schrieb Janis Schoetterl-Glausch:
>>>>>>> On Wed, 2022-02-09 at 10:08 +0100, Christian Borntraeger wrote:
>>>>>>>> Am 09.02.22 um 09:49 schrieb Janis Schoetterl-Glausch:
>>>>>>>>> On 2/9/22 08:34, Christian Borntraeger wrote:
>>>>>>>>>> Am 07.02.22 um 17:59 schrieb Janis Schoetterl-Glausch:
>>>>>>>>>>> User space needs a mechanism to perform key checked accesses when
>>>>>>>>>>> emulating instructions.
>>>>>>>>>>>
>>>>>>>>>>> The key can be passed as an additional argument.
>>>>>>>>>>> Having an additional argument is flexible, as user space can
>>>>>>>>>>> pass the guest PSW's key, in order to make an access the same way the
>>>>>>>>>>> CPU would, or pass another key if necessary.
>>>>>>>>>>>
>>>>>>>>>>> Signed-off-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
>>>>>>>>>>> Acked-by: Janosch Frank <frankja@linux.ibm.com>
>>>>>>>>>>> Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
>>>>>>>>>>> ---
>>>>>>>>>>>        arch/s390/kvm/kvm-s390.c | 49 +++++++++++++++++++++++++++++++---------
>>>>>>>>>>>        include/uapi/linux/kvm.h |  8 +++++--
>>>>>>>>>>>        2 files changed, 44 insertions(+), 13 deletions(-)
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>>>>>>>>>>> index cf347e1a4f17..71e61fb3f0d9 100644
>>>>>>>>>>> --- a/arch/s390/kvm/kvm-s390.c
>>>>>>>>>>> +++ b/arch/s390/kvm/kvm-s390.c
>>>>>>>>>>> @@ -32,6 +32,7 @@
>>>>>>>>>>>        #include <linux/sched/signal.h>
>>>>>>>>>>>        #include <linux/string.h>
>>>>>>>>>>>        #include <linux/pgtable.h>
>>>>>>>>>>> +#include <linux/bitfield.h>
>>>>>>>>>>>          #include <asm/asm-offsets.h>
>>>>>>>>>>>        #include <asm/lowcore.h>
>>>>>>>>>>> @@ -2359,6 +2360,11 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
>>>>>>>>>>>            return r;
>>>>>>>>>>>        }
>>>>>>>>>>>        +static bool access_key_invalid(u8 access_key)
>>>>>>>>>>> +{
>>>>>>>>>>> +    return access_key > 0xf;
>>>>>>>>>>> +}
>>>>>>>>>>> +
>>>>>>>>>>>        long kvm_arch_vm_ioctl(struct file *filp,
>>>>>>>>>>>                       unsigned int ioctl, unsigned long arg)
>>>>>>>>>>>        {
>>>>>>>>>>> @@ -4687,34 +4693,54 @@ static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
>>>>>>>>>>>                          struct kvm_s390_mem_op *mop)
>>>>>>>>>>>        {
>>>>>>>>>>>            void __user *uaddr = (void __user *)mop->buf;
>>>>>>>>>>> +    u8 access_key = 0, ar = 0;
>>>>>>>>>>>            void *tmpbuf = NULL;
>>>>>>>>>>> +    bool check_reserved;
>>>>>>>>>>>            int r = 0;
>>>>>>>>>>>            const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
>>>>>>>>>>> -                    | KVM_S390_MEMOP_F_CHECK_ONLY;
>>>>>>>>>>> +                    | KVM_S390_MEMOP_F_CHECK_ONLY
>>>>>>>>>>> +                    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
>>>>>>>>>>>        -    if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
>>>>>>>>>>> +    if (mop->flags & ~supported_flags || !mop->size)
>>>>>>>>>>>                return -EINVAL;
>>>>>>>>>>> -
>>>>>>>>>>>            if (mop->size > MEM_OP_MAX_SIZE)
>>>>>>>>>>>                return -E2BIG;
>>>>>>>>>>> -
>>>>>>>>>>>            if (kvm_s390_pv_cpu_is_protected(vcpu))
>>>>>>>>>>>                return -EINVAL;
>>>>>>>>>>> -
>>>>>>>>>>>            if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
>>>>>>>>>>>                tmpbuf = vmalloc(mop->size);
>>>>>>>>>>>                if (!tmpbuf)
>>>>>>>>>>>                    return -ENOMEM;
>>>>>>>>>>>            }
>>>>>>>>>>> +    ar = mop->ar;
>>>>>>>>>>> +    mop->ar = 0;
>>>>>>>>>>
>>>>>>>>>> Why this assignment to 0?
>>>>>>>>>
>>>>>>>>> It's so the check of reserved below works like that, they're all part of the anonymous union.
>>>>>>>>
>>>>>>>> Ah, I see. This is ugly :-)
>>>>>>>
>>>>>>> Yes :)
>>>>>>>>>>> +    if (ar >= NUM_ACRS)
>>>>>>>>>>> +        return -EINVAL;
>>>>>>>>>>> +    if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
>>>>>>>>>>> +        access_key = mop->key;
>>>>>>>>>>> +        mop->key = 0;
>>>>>>>>>>
>>>>>>>>>> and this? I think we can leave mop unchanged.
>>>>>>>>>>
>>>>>>>>>> In fact, why do we add the ar and access_key variable?
>>>>>>>>>> This breaks the check from above (if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size))  into two checks
>>>>>>>>>> and it will create a memleak for tmpbuf.
>>>>>>>>>
>>>>>>>>> I can move the allocation down, goto out or get rid of the reserved check and keep everything as before.
>>>>>>>>> First is simpler, but second makes handling that case more explicit and might help in the future.
>>>>>>>>
>>>>>>>> Maybe add a reserved_02 field in the anon struct and check this for being zero and get rid of the local variables?
>>>>>>>
>>>>>>> I think that would require us adding new fields in the struct by putting them in a union with reserved_02 and so on,
>>>>>>> which could get rather messy.
>>>>>>
>>>>>> I think it is fine to rename reserved_02. Maybe rename that to dont_use_02 ?
>>>>>
>>>>> I don't know what kind of stability guarantees we give here, since it can only happen when recompiling with
>>>>> a new header. dont_use is a lot better than reserved here, after all we tell user space to set
>>>>> reserved bytes to 0, using reserved_02 to do that would be quite handy and therefore likely.
>>>>>
>>>>> The question is also what semantic we want for the check.
>>>>> The way it works right now, user space also needs to set unused fields to 0, e.g. key if the flag is not set.
>>>>> At least this is the case for the vm memop, the vcpu memop cannot do that because of backward compatibility.
>>>>
>>>> As an alternative just remove the check for reserved == 0 and do that later on as an add-on patch?
>>>
>>> That would kinda defeat the purpose of the check, since misbehaving user space programs would
>>> get an error then but not now.
>>
>>
>> As a matter of fact, we do not check today. What about the following.
> 
> We don't do it for the vcpu memop, but since we're newly introducing the vm memop we are free to decide what we want.
> It's purely about future proofing, e.g. we would have had the possibility to add the key checking feature without a flag,
> if the existing memop did the check. Committing ourselves to always adding a flag is fine by me, but I don't like the
> previous state of affairs, where user space should set reserved bytes to 0 but it's not enforced.
> 
>> 1. remove the checkreserved logic. its too complicated
>> 2. do not check for reserved to be zero
>> 4. state that the reserved fields are ignored without the appropriate flag
>> 5. add the necessary flag as comment to the fields
>> 6. check for unkmown flags and bail out
> 
> I'll implement this, except maybe 5, since the documentation covers that and the availability of the flags themselves
> is conditional on other factors.

Yes, 5 only where it makes sense.
Konstantin Ryabitsev Feb. 9, 2022, 1:16 p.m. UTC | #13
On Wed, Feb 09, 2022 at 10:34:19AM +0100, Christian Borntraeger wrote:
> CC Konstantin,
> 
> I hope you can find the right people. Looks that my (and Janis) emaildid not make it to linux-s390 and kvm at vger lists.
> Message-ID: <6ea27647-fbbe-3962-03a0-8ca5340fc7fd@linux.ibm.com>

I see it in the archives, though:
https://lore.kernel.org/linux-s390/6ea27647-fbbe-3962-03a0-8ca5340fc7fd@linux.ibm.com
https://lore.kernel.org/kvm/6ea27647-fbbe-3962-03a0-8ca5340fc7fd@linux.ibm.com

Perhaps it was just delayed?

-K
Christian Borntraeger Feb. 9, 2022, 1:20 p.m. UTC | #14
Am 09.02.22 um 14:16 schrieb Konstantin Ryabitsev:
> On Wed, Feb 09, 2022 at 10:34:19AM +0100, Christian Borntraeger wrote:
>> CC Konstantin,
>>
>> I hope you can find the right people. Looks that my (and Janis) emaildid not make it to linux-s390 and kvm at vger lists.
>> Message-ID: <6ea27647-fbbe-3962-03a0-8ca5340fc7fd@linux.ibm.com>
> 
> I see it in the archives, though:
> https://lore.kernel.org/linux-s390/6ea27647-fbbe-3962-03a0-8ca5340fc7fd@linux.ibm.com
> https://lore.kernel.org/kvm/6ea27647-fbbe-3962-03a0-8ca5340fc7fd@linux.ibm.com
> 
> Perhaps it was just delayed?

Yes, now they arrived. I had feared that they have been filtered.
Nevermind.
diff mbox series

Patch

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index cf347e1a4f17..71e61fb3f0d9 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -32,6 +32,7 @@ 
 #include <linux/sched/signal.h>
 #include <linux/string.h>
 #include <linux/pgtable.h>
+#include <linux/bitfield.h>
 
 #include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
@@ -2359,6 +2360,11 @@  static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
 	return r;
 }
 
+static bool access_key_invalid(u8 access_key)
+{
+	return access_key > 0xf;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
@@ -4687,34 +4693,54 @@  static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
 				  struct kvm_s390_mem_op *mop)
 {
 	void __user *uaddr = (void __user *)mop->buf;
+	u8 access_key = 0, ar = 0;
 	void *tmpbuf = NULL;
+	bool check_reserved;
 	int r = 0;
 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
-				    | KVM_S390_MEMOP_F_CHECK_ONLY;
+				    | KVM_S390_MEMOP_F_CHECK_ONLY
+				    | KVM_S390_MEMOP_F_SKEY_PROTECTION;
 
-	if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
+	if (mop->flags & ~supported_flags || !mop->size)
 		return -EINVAL;
-
 	if (mop->size > MEM_OP_MAX_SIZE)
 		return -E2BIG;
-
 	if (kvm_s390_pv_cpu_is_protected(vcpu))
 		return -EINVAL;
-
 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
 		tmpbuf = vmalloc(mop->size);
 		if (!tmpbuf)
 			return -ENOMEM;
 	}
+	ar = mop->ar;
+	mop->ar = 0;
+	if (ar >= NUM_ACRS)
+		return -EINVAL;
+	if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
+		access_key = mop->key;
+		mop->key = 0;
+		if (access_key_invalid(access_key))
+			return -EINVAL;
+	}
+	/*
+	 * Check that reserved/unused == 0, but only for extensions,
+	 * so we stay backward compatible.
+	 * This gives us more design flexibility for future extensions, i.e.
+	 * we can add functionality without adding a flag.
+	 */
+	check_reserved = mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION;
+	if (check_reserved && memchr_inv(&mop->reserved, 0, sizeof(mop->reserved)))
+		return -EINVAL;
 
 	switch (mop->op) {
 	case KVM_S390_MEMOP_LOGICAL_READ:
 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
-			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
-					    mop->size, GACC_FETCH, 0);
+			r = check_gva_range(vcpu, mop->gaddr, ar, mop->size,
+					    GACC_FETCH, access_key);
 			break;
 		}
-		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+		r = read_guest_with_key(vcpu, mop->gaddr, ar, tmpbuf,
+					mop->size, access_key);
 		if (r == 0) {
 			if (copy_to_user(uaddr, tmpbuf, mop->size))
 				r = -EFAULT;
@@ -4722,15 +4748,16 @@  static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
 		break;
 	case KVM_S390_MEMOP_LOGICAL_WRITE:
 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
-			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
-					    mop->size, GACC_STORE, 0);
+			r = check_gva_range(vcpu, mop->gaddr, ar, mop->size,
+					    GACC_STORE, access_key);
 			break;
 		}
 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
 			r = -EFAULT;
 			break;
 		}
-		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+		r = write_guest_with_key(vcpu, mop->gaddr, ar, tmpbuf,
+					 mop->size, access_key);
 		break;
 	}
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index b46bcdb0cab1..5771b026fbc0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -562,9 +562,12 @@  struct kvm_s390_mem_op {
 	__u32 op;		/* type of operation */
 	__u64 buf;		/* buffer in userspace */
 	union {
-		__u8 ar;	/* the access register number */
+		struct {
+			__u8 ar;	/* the access register number */
+			__u8 key;	/* access key to use for storage key protection */
+		};
 		__u32 sida_offset; /* offset into the sida */
-		__u8 reserved[32]; /* should be set to 0 */
+		__u8 reserved[32]; /* must be set to 0 */
 	};
 };
 /* types for kvm_s390_mem_op->op */
@@ -575,6 +578,7 @@  struct kvm_s390_mem_op {
 /* flags for kvm_s390_mem_op->flags */
 #define KVM_S390_MEMOP_F_CHECK_ONLY		(1ULL << 0)
 #define KVM_S390_MEMOP_F_INJECT_EXCEPTION	(1ULL << 1)
+#define KVM_S390_MEMOP_F_SKEY_PROTECTION	(1ULL << 2)
 
 /* for KVM_INTERRUPT */
 struct kvm_interrupt {