diff mbox series

[v12,3/3] KVM: s390: resetting the Topology-Change-Report

Message ID 20220711084148.25017-4-pmorel@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series s390x: KVM: CPU Topology | expand

Commit Message

Pierre Morel July 11, 2022, 8:41 a.m. UTC
During a subsystem reset the Topology-Change-Report is cleared.

Let's give userland the possibility to clear the MTCR in the case
of a subsystem reset.

To migrate the MTCR, we give userland the possibility to
query the MTCR state.

We indicate KVM support for the CPU topology facility with a new
KVM capability: KVM_CAP_S390_CPU_TOPOLOGY.

Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
---
 Documentation/virt/kvm/api.rst   | 25 ++++++++++++++
 arch/s390/include/uapi/asm/kvm.h |  1 +
 arch/s390/kvm/kvm-s390.c         | 56 ++++++++++++++++++++++++++++++++
 include/uapi/linux/kvm.h         |  1 +
 4 files changed, 83 insertions(+)

Comments

Janis Schoetterl-Glausch July 11, 2022, 1:22 p.m. UTC | #1
On 7/11/22 10:41, Pierre Morel wrote:
> During a subsystem reset the Topology-Change-Report is cleared.
> 
> Let's give userland the possibility to clear the MTCR in the case
> of a subsystem reset.
> 
> To migrate the MTCR, we give userland the possibility to
> query the MTCR state.
> 
> We indicate KVM support for the CPU topology facility with a new
> KVM capability: KVM_CAP_S390_CPU_TOPOLOGY.
> 
> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>

Reviewed-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>

See nits/comments below.

> ---
>  Documentation/virt/kvm/api.rst   | 25 ++++++++++++++
>  arch/s390/include/uapi/asm/kvm.h |  1 +
>  arch/s390/kvm/kvm-s390.c         | 56 ++++++++++++++++++++++++++++++++
>  include/uapi/linux/kvm.h         |  1 +
>  4 files changed, 83 insertions(+)
> 
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index 11e00a46c610..5e086125d8ad 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -7956,6 +7956,31 @@ should adjust CPUID leaf 0xA to reflect that the PMU is disabled.
>  When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
>  type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
>  
> +8.37 KVM_CAP_S390_CPU_TOPOLOGY
> +------------------------------
> +
> +:Capability: KVM_CAP_S390_CPU_TOPOLOGY
> +:Architectures: s390
> +:Type: vm
> +
> +This capability indicates that KVM will provide the S390 CPU Topology
> +facility which consist of the interpretation of the PTF instruction for
> +the function code 2 along with interception and forwarding of both the
> +PTF instruction with function codes 0 or 1 and the STSI(15,1,x)

Is the architecture allowed to extend STSI without a facility?
If so, if we say here that STSI 15.1.x is passed to user space, then
I think we should have a

if (sel1 != 1)
	goto out_no_data;

or maybe even

if (sel1 != 1 || sel2 < 2 || sel2 > 6)
	goto out_no_data;

in priv.c

> +instruction to the userland hypervisor.
> +
> +The stfle facility 11, CPU Topology facility, should not be indicated
> +to the guest without this capability.
> +
> +When this capability is present, KVM provides a new attribute group
> +on vm fd, KVM_S390_VM_CPU_TOPOLOGY.
> +This new attribute allows to get, set or clear the Modified Change

get or set, now that there is no explicit clear anymore.

> +Topology Report (MTCR) bit of the SCA through the kvm_device_attr
> +structure.> +
> +When getting the Modified Change Topology Report value, the attr->addr

When getting/setting the...

> +must point to a byte where the value will be stored.

... will be stored/retrieved from.
> +
>  9. Known KVM API problems
>  =========================
>  
> diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
> index 7a6b14874d65..a73cf01a1606 100644
> --- a/arch/s390/include/uapi/asm/kvm.h
> +++ b/arch/s390/include/uapi/asm/kvm.h
> @@ -74,6 +74,7 @@ struct kvm_s390_io_adapter_req {
>  #define KVM_S390_VM_CRYPTO		2
>  #define KVM_S390_VM_CPU_MODEL		3
>  #define KVM_S390_VM_MIGRATION		4
> +#define KVM_S390_VM_CPU_TOPOLOGY	5
>  
>  /* kvm attributes for mem_ctrl */
>  #define KVM_S390_VM_MEM_ENABLE_CMMA	0
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 70436bfff53a..b18e0b940b26 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -606,6 +606,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  	case KVM_CAP_S390_PROTECTED:
>  		r = is_prot_virt_host();
>  		break;
> +	case KVM_CAP_S390_CPU_TOPOLOGY:
> +		r = test_facility(11);
> +		break;
>  	default:
>  		r = 0;
>  	}
> @@ -817,6 +820,20 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
>  		icpt_operexc_on_all_vcpus(kvm);
>  		r = 0;
>  		break;
> +	case KVM_CAP_S390_CPU_TOPOLOGY:
> +		r = -EINVAL;
> +		mutex_lock(&kvm->lock);
> +		if (kvm->created_vcpus) {
> +			r = -EBUSY;
> +		} else if (test_facility(11)) {
> +			set_kvm_facility(kvm->arch.model.fac_mask, 11);
> +			set_kvm_facility(kvm->arch.model.fac_list, 11);
> +			r = 0;
> +		}
> +		mutex_unlock(&kvm->lock);
> +		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
> +			 r ? "(not available)" : "(success)");
> +		break;
>  	default:
>  		r = -EINVAL;
>  		break;
> @@ -1717,6 +1734,36 @@ static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
>  	read_unlock(&kvm->arch.sca_lock);
>  }
>  
> +static int kvm_s390_set_topology(struct kvm *kvm, struct kvm_device_attr *attr)

kvm_s390_set_topology_changed maybe?
kvm_s390_get_topology_changed below then.

> +{
> +	if (!test_kvm_facility(kvm, 11))
> +		return -ENXIO;
> +
> +	kvm_s390_update_topology_change_report(kvm, !!attr->attr);
> +	return 0;
> +}
> +
> +static int kvm_s390_get_topology(struct kvm *kvm, struct kvm_device_attr *attr)
> +{
> +	union sca_utility utility;
> +	struct bsca_block *sca;
> +	__u8 topo;
> +
> +	if (!test_kvm_facility(kvm, 11))
> +		return -ENXIO;
> +
> +	read_lock(&kvm->arch.sca_lock);
> +	sca = kvm->arch.sca;
> +	utility.val = READ_ONCE(sca->utility.val);

I don't think you need the READ_ONCE anymore, now that there is a lock it should act as a compile barrier.
> +	read_unlock(&kvm->arch.sca_lock);
> +	topo = utility.mtcr;
> +
> +	if (copy_to_user((void __user *)attr->addr, &topo, sizeof(topo)))

Why void not u8?

> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
[...]
Pierre Morel July 12, 2022, 7:24 a.m. UTC | #2
On 7/11/22 15:22, Janis Schoetterl-Glausch wrote:
> On 7/11/22 10:41, Pierre Morel wrote:
>> During a subsystem reset the Topology-Change-Report is cleared.
>>
>> Let's give userland the possibility to clear the MTCR in the case
>> of a subsystem reset.
>>
>> To migrate the MTCR, we give userland the possibility to
>> query the MTCR state.
>>
>> We indicate KVM support for the CPU topology facility with a new
>> KVM capability: KVM_CAP_S390_CPU_TOPOLOGY.
>>
>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> 
> Reviewed-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
> 

Thanks!

> See nits/comments below.
> 
>> ---
>>   Documentation/virt/kvm/api.rst   | 25 ++++++++++++++
>>   arch/s390/include/uapi/asm/kvm.h |  1 +
>>   arch/s390/kvm/kvm-s390.c         | 56 ++++++++++++++++++++++++++++++++
>>   include/uapi/linux/kvm.h         |  1 +
>>   4 files changed, 83 insertions(+)
>>
>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
>> index 11e00a46c610..5e086125d8ad 100644
>> --- a/Documentation/virt/kvm/api.rst
>> +++ b/Documentation/virt/kvm/api.rst
>> @@ -7956,6 +7956,31 @@ should adjust CPUID leaf 0xA to reflect that the PMU is disabled.
>>   When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
>>   type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
>>   
>> +8.37 KVM_CAP_S390_CPU_TOPOLOGY
>> +------------------------------
>> +
>> +:Capability: KVM_CAP_S390_CPU_TOPOLOGY
>> +:Architectures: s390
>> +:Type: vm
>> +
>> +This capability indicates that KVM will provide the S390 CPU Topology
>> +facility which consist of the interpretation of the PTF instruction for
>> +the function code 2 along with interception and forwarding of both the
>> +PTF instruction with function codes 0 or 1 and the STSI(15,1,x)
> 
> Is the architecture allowed to extend STSI without a facility?
> If so, if we say here that STSI 15.1.x is passed to user space, then
> I think we should have a
> 
> if (sel1 != 1)
> 	goto out_no_data;
> 
> or maybe even
> 
> if (sel1 != 1 || sel2 < 2 || sel2 > 6)
> 	goto out_no_data;
> 
> in priv.c

I am not a big fan of doing everything in the kernel.
Here we have no performance issue since it is an error of the guest if 
it sends a wrong selector.

Even testing the facility or PV in the kernel is for my opinion arguable 
in the case we do not do any treatment in the kernel.

I do not see what it brings to us, it increase the LOCs and makes the 
implementation less easy to evolve.


> 
>> +instruction to the userland hypervisor.
>> +
>> +The stfle facility 11, CPU Topology facility, should not be indicated
>> +to the guest without this capability.
>> +
>> +When this capability is present, KVM provides a new attribute group
>> +on vm fd, KVM_S390_VM_CPU_TOPOLOGY.
>> +This new attribute allows to get, set or clear the Modified Change
> 
> get or set, now that there is no explicit clear anymore.

Yes now it is a set to 0 but the action of clearing remains.

> 
>> +Topology Report (MTCR) bit of the SCA through the kvm_device_attr
>> +structure.> +
>> +When getting the Modified Change Topology Report value, the attr->addr
> 
> When getting/setting the...
> 
>> +must point to a byte where the value will be stored.
> 
> ... will be stored/retrieved from.

OK


>> +
>>   9. Known KVM API problems
>>   =========================
>>   
>> diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
>> index 7a6b14874d65..a73cf01a1606 100644
>> --- a/arch/s390/include/uapi/asm/kvm.h
>> +++ b/arch/s390/include/uapi/asm/kvm.h
>> @@ -74,6 +74,7 @@ struct kvm_s390_io_adapter_req {
>>   #define KVM_S390_VM_CRYPTO		2
>>   #define KVM_S390_VM_CPU_MODEL		3
>>   #define KVM_S390_VM_MIGRATION		4
>> +#define KVM_S390_VM_CPU_TOPOLOGY	5
>>   
>>   /* kvm attributes for mem_ctrl */
>>   #define KVM_S390_VM_MEM_ENABLE_CMMA	0
>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>> index 70436bfff53a..b18e0b940b26 100644
>> --- a/arch/s390/kvm/kvm-s390.c
>> +++ b/arch/s390/kvm/kvm-s390.c
>> @@ -606,6 +606,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>>   	case KVM_CAP_S390_PROTECTED:
>>   		r = is_prot_virt_host();
>>   		break;
>> +	case KVM_CAP_S390_CPU_TOPOLOGY:
>> +		r = test_facility(11);
>> +		break;
>>   	default:
>>   		r = 0;
>>   	}
>> @@ -817,6 +820,20 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
>>   		icpt_operexc_on_all_vcpus(kvm);
>>   		r = 0;
>>   		break;
>> +	case KVM_CAP_S390_CPU_TOPOLOGY:
>> +		r = -EINVAL;
>> +		mutex_lock(&kvm->lock);
>> +		if (kvm->created_vcpus) {
>> +			r = -EBUSY;
>> +		} else if (test_facility(11)) {
>> +			set_kvm_facility(kvm->arch.model.fac_mask, 11);
>> +			set_kvm_facility(kvm->arch.model.fac_list, 11);
>> +			r = 0;
>> +		}
>> +		mutex_unlock(&kvm->lock);
>> +		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
>> +			 r ? "(not available)" : "(success)");
>> +		break;
>>   	default:
>>   		r = -EINVAL;
>>   		break;
>> @@ -1717,6 +1734,36 @@ static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
>>   	read_unlock(&kvm->arch.sca_lock);
>>   }
>>   
>> +static int kvm_s390_set_topology(struct kvm *kvm, struct kvm_device_attr *attr)
> 
> kvm_s390_set_topology_changed maybe?
> kvm_s390_get_topology_changed below then.

No strong opinion, if you prefer I change this.

> 
>> +{
>> +	if (!test_kvm_facility(kvm, 11))
>> +		return -ENXIO;
>> +
>> +	kvm_s390_update_topology_change_report(kvm, !!attr->attr);
>> +	return 0;
>> +}
>> +
>> +static int kvm_s390_get_topology(struct kvm *kvm, struct kvm_device_attr *attr)
>> +{
>> +	union sca_utility utility;
>> +	struct bsca_block *sca;
>> +	__u8 topo;
>> +
>> +	if (!test_kvm_facility(kvm, 11))
>> +		return -ENXIO;
>> +
>> +	read_lock(&kvm->arch.sca_lock);
>> +	sca = kvm->arch.sca;
>> +	utility.val = READ_ONCE(sca->utility.val);
> 
> I don't think you need the READ_ONCE anymore, now that there is a lock it should act as a compile barrier.

I think you are right.

>> +	read_unlock(&kvm->arch.sca_lock);
>> +	topo = utility.mtcr;
>> +
>> +	if (copy_to_user((void __user *)attr->addr, &topo, sizeof(topo)))
> 
> Why void not u8?

I like to say we write on "topo" with the size of "topo".
So we do not need to verify the effective size of topo.
But I understand, it is a UAPI, setting u8 in the copy_to_user makes 
sense too.
For my personal opinion, I would have prefer that userland tell us the 
size it awaits even here, for this special case, since we use a byte, we 
can not do really wrong.

> 
>> +		return -EFAULT;
>> +
>> +	return 0;
>> +}
>> +
> [...]
>
Janis Schoetterl-Glausch July 12, 2022, 8:47 a.m. UTC | #3
On 7/12/22 09:24, Pierre Morel wrote:
> 
> 
> On 7/11/22 15:22, Janis Schoetterl-Glausch wrote:
>> On 7/11/22 10:41, Pierre Morel wrote:
>>> During a subsystem reset the Topology-Change-Report is cleared.
>>>
>>> Let's give userland the possibility to clear the MTCR in the case
>>> of a subsystem reset.
>>>
>>> To migrate the MTCR, we give userland the possibility to
>>> query the MTCR state.
>>>
>>> We indicate KVM support for the CPU topology facility with a new
>>> KVM capability: KVM_CAP_S390_CPU_TOPOLOGY.
>>>
>>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>>
>> Reviewed-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
>>
> 
> Thanks!
> 
>> See nits/comments below.
>>
>>> ---
>>>   Documentation/virt/kvm/api.rst   | 25 ++++++++++++++
>>>   arch/s390/include/uapi/asm/kvm.h |  1 +
>>>   arch/s390/kvm/kvm-s390.c         | 56 ++++++++++++++++++++++++++++++++
>>>   include/uapi/linux/kvm.h         |  1 +
>>>   4 files changed, 83 insertions(+)
>>>
>>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
>>> index 11e00a46c610..5e086125d8ad 100644
>>> --- a/Documentation/virt/kvm/api.rst
>>> +++ b/Documentation/virt/kvm/api.rst
>>> @@ -7956,6 +7956,31 @@ should adjust CPUID leaf 0xA to reflect that the PMU is disabled.
>>>   When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
>>>   type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
>>>   +8.37 KVM_CAP_S390_CPU_TOPOLOGY
>>> +------------------------------
>>> +
>>> +:Capability: KVM_CAP_S390_CPU_TOPOLOGY
>>> +:Architectures: s390
>>> +:Type: vm
>>> +
>>> +This capability indicates that KVM will provide the S390 CPU Topology
>>> +facility which consist of the interpretation of the PTF instruction for
>>> +the function code 2 along with interception and forwarding of both the
>>> +PTF instruction with function codes 0 or 1 and the STSI(15,1,x)
>>
>> Is the architecture allowed to extend STSI without a facility?
>> If so, if we say here that STSI 15.1.x is passed to user space, then
>> I think we should have a
>>
>> if (sel1 != 1)
>>     goto out_no_data;
>>
>> or maybe even
>>
>> if (sel1 != 1 || sel2 < 2 || sel2 > 6)
>>     goto out_no_data;
>>
>> in priv.c
> 
> I am not a big fan of doing everything in the kernel.
> Here we have no performance issue since it is an error of the guest if it sends a wrong selector.
> 
I agree, but I didn't suggest it for performance reasons.
I was thinking about future proofing, that is if the architecture is extended.
We don't know if future extensions are best handled in the kernel or user space,
so if we prevent it from going to user space, we can defer the decision to when we know more.
But that's only relevant if STSI can be extended without a capability, which is why I asked about that.

> Even testing the facility or PV in the kernel is for my opinion arguable in the case we do not do any treatment in the kernel.
> 
> I do not see what it brings to us, it increase the LOCs and makes the implementation less easy to evolve.
> 
> 
>>
>>> +instruction to the userland hypervisor.
>>> +
>>> +The stfle facility 11, CPU Topology facility, should not be indicated
>>> +to the guest without this capability.
>>> +
>>> +When this capability is present, KVM provides a new attribute group
>>> +on vm fd, KVM_S390_VM_CPU_TOPOLOGY.
>>> +This new attribute allows to get, set or clear the Modified Change
>>
>> get or set, now that there is no explicit clear anymore.
> 
> Yes now it is a set to 0 but the action of clearing remains.
> 
>>
>>> +Topology Report (MTCR) bit of the SCA through the kvm_device_attr
>>> +structure.> +
>>> +When getting the Modified Change Topology Report value, the attr->addr
>>
>> When getting/setting the...
>>
>>> +must point to a byte where the value will be stored.
>>
>> ... will be stored/retrieved from.
> 
> OK

Wait no, I didn't get how that works. You're passing the value via attr->attr, not reading it from addr.
> 
> 
>>> +
>>>   9. Known KVM API problems
>>>   =========================
>>>   diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
>>> index 7a6b14874d65..a73cf01a1606 100644
>>> --- a/arch/s390/include/uapi/asm/kvm.h
>>> +++ b/arch/s390/include/uapi/asm/kvm.h
>>> @@ -74,6 +74,7 @@ struct kvm_s390_io_adapter_req {
>>>   #define KVM_S390_VM_CRYPTO        2
>>>   #define KVM_S390_VM_CPU_MODEL        3
>>>   #define KVM_S390_VM_MIGRATION        4
>>> +#define KVM_S390_VM_CPU_TOPOLOGY    5
>>>     /* kvm attributes for mem_ctrl */
>>>   #define KVM_S390_VM_MEM_ENABLE_CMMA    0
>>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>>> index 70436bfff53a..b18e0b940b26 100644
>>> --- a/arch/s390/kvm/kvm-s390.c
>>> +++ b/arch/s390/kvm/kvm-s390.c
>>> @@ -606,6 +606,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>>>       case KVM_CAP_S390_PROTECTED:
>>>           r = is_prot_virt_host();
>>>           break;
>>> +    case KVM_CAP_S390_CPU_TOPOLOGY:
>>> +        r = test_facility(11);
>>> +        break;
>>>       default:
>>>           r = 0;
>>>       }
>>> @@ -817,6 +820,20 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
>>>           icpt_operexc_on_all_vcpus(kvm);
>>>           r = 0;
>>>           break;
>>> +    case KVM_CAP_S390_CPU_TOPOLOGY:
>>> +        r = -EINVAL;
>>> +        mutex_lock(&kvm->lock);
>>> +        if (kvm->created_vcpus) {
>>> +            r = -EBUSY;
>>> +        } else if (test_facility(11)) {
>>> +            set_kvm_facility(kvm->arch.model.fac_mask, 11);
>>> +            set_kvm_facility(kvm->arch.model.fac_list, 11);
>>> +            r = 0;
>>> +        }
>>> +        mutex_unlock(&kvm->lock);
>>> +        VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
>>> +             r ? "(not available)" : "(success)");
>>> +        break;
>>>       default:
>>>           r = -EINVAL;
>>>           break;
>>> @@ -1717,6 +1734,36 @@ static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
>>>       read_unlock(&kvm->arch.sca_lock);
>>>   }
>>>   +static int kvm_s390_set_topology(struct kvm *kvm, struct kvm_device_attr *attr)
>>
>> kvm_s390_set_topology_changed maybe?
>> kvm_s390_get_topology_changed below then.
> 

I won't insist on it, but I do think it's more readable.

> No strong opinion, if you prefer I change this.
> 
>>
>>> +{
>>> +    if (!test_kvm_facility(kvm, 11))
>>> +        return -ENXIO;
>>> +
>>> +    kvm_s390_update_topology_change_report(kvm, !!attr->attr);
>>> +    return 0;
>>> +}
>>> +
>>> +static int kvm_s390_get_topology(struct kvm *kvm, struct kvm_device_attr *attr)
>>> +{
>>> +    union sca_utility utility;
>>> +    struct bsca_block *sca;
>>> +    __u8 topo;
>>> +
>>> +    if (!test_kvm_facility(kvm, 11))
>>> +        return -ENXIO;
>>> +
>>> +    read_lock(&kvm->arch.sca_lock);
>>> +    sca = kvm->arch.sca;
>>> +    utility.val = READ_ONCE(sca->utility.val);
>>
>> I don't think you need the READ_ONCE anymore, now that there is a lock it should act as a compile barrier.
> 
> I think you are right.
> 
>>> +    read_unlock(&kvm->arch.sca_lock);
>>> +    topo = utility.mtcr;
>>> +
>>> +    if (copy_to_user((void __user *)attr->addr, &topo, sizeof(topo)))
>>
>> Why void not u8?
> 
> I like to say we write on "topo" with the size of "topo".
> So we do not need to verify the effective size of topo.
> But I understand, it is a UAPI, setting u8 in the copy_to_user makes sense too.
> For my personal opinion, I would have prefer that userland tell us the size it awaits even here, for this special case, since we use a byte, we can not do really wrong.
You're right, it doesn't make a difference.
What about doing put_user(topo, (u8 *)attr->addr)), seems more straight forward.
> 
>>
>>> +        return -EFAULT;
>>> +
>>> +    return 0;
>>> +}
>>> +
>> [...]
>>
>
Pierre Morel July 12, 2022, 11:17 a.m. UTC | #4
On 7/12/22 10:47, Janis Schoetterl-Glausch wrote:
> On 7/12/22 09:24, Pierre Morel wrote:
>>
>>
>> On 7/11/22 15:22, Janis Schoetterl-Glausch wrote:
>>> On 7/11/22 10:41, Pierre Morel wrote:
>>>> During a subsystem reset the Topology-Change-Report is cleared.
>>>>
>>>> Let's give userland the possibility to clear the MTCR in the case
>>>> of a subsystem reset.
>>>>
>>>> To migrate the MTCR, we give userland the possibility to
>>>> query the MTCR state.
>>>>
>>>> We indicate KVM support for the CPU topology facility with a new
>>>> KVM capability: KVM_CAP_S390_CPU_TOPOLOGY.
>>>>
>>>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>>>
>>> Reviewed-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
>>>
>>
>> Thanks!
>>
>>> See nits/comments below.
>>>
>>>> ---
>>>>    Documentation/virt/kvm/api.rst   | 25 ++++++++++++++
>>>>    arch/s390/include/uapi/asm/kvm.h |  1 +
>>>>    arch/s390/kvm/kvm-s390.c         | 56 ++++++++++++++++++++++++++++++++
>>>>    include/uapi/linux/kvm.h         |  1 +
>>>>    4 files changed, 83 insertions(+)
>>>>
>>>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
>>>> index 11e00a46c610..5e086125d8ad 100644
>>>> --- a/Documentation/virt/kvm/api.rst
>>>> +++ b/Documentation/virt/kvm/api.rst
>>>> @@ -7956,6 +7956,31 @@ should adjust CPUID leaf 0xA to reflect that the PMU is disabled.
>>>>    When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
>>>>    type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
>>>>    +8.37 KVM_CAP_S390_CPU_TOPOLOGY
>>>> +------------------------------
>>>> +
>>>> +:Capability: KVM_CAP_S390_CPU_TOPOLOGY
>>>> +:Architectures: s390
>>>> +:Type: vm
>>>> +
>>>> +This capability indicates that KVM will provide the S390 CPU Topology
>>>> +facility which consist of the interpretation of the PTF instruction for
>>>> +the function code 2 along with interception and forwarding of both the
>>>> +PTF instruction with function codes 0 or 1 and the STSI(15,1,x)
>>>
>>> Is the architecture allowed to extend STSI without a facility?
>>> If so, if we say here that STSI 15.1.x is passed to user space, then
>>> I think we should have a
>>>
>>> if (sel1 != 1)
>>>      goto out_no_data;
>>>
>>> or maybe even
>>>
>>> if (sel1 != 1 || sel2 < 2 || sel2 > 6)
>>>      goto out_no_data;
>>>
>>> in priv.c
>>
>> I am not a big fan of doing everything in the kernel.
>> Here we have no performance issue since it is an error of the guest if it sends a wrong selector.
>>
> I agree, but I didn't suggest it for performance reasons.

Yes, and that is why I do not agree ;)

> I was thinking about future proofing, that is if the architecture is extended.
> We don't know if future extensions are best handled in the kernel or user space,
> so if we prevent it from going to user space, we can defer the decision to when we know more.

If future extensions are better handle in kernel we will handle them in 
kernel, obviously, in this case we will need a patch.

If it is not better handle in kernel we will handle the extensions in 
userland and we will not need a kernel patch making the update of the 
virtual architecture easier and faster.

If we prohibit the extensions in kernel we will need a kernel patch in 
both cases and a userland patch if it is not completely handled in kernel.

In userland we check any wrong selector before the instruction goes back 
to the guest.

> But that's only relevant if STSI can be extended without a capability, which is why I asked about that.

Logicaly any change, extension, in the architecture should be signaled 
by a facility bit or something.

> 
>> Even testing the facility or PV in the kernel is for my opinion arguable in the case we do not do any treatment in the kernel.
>>
>> I do not see what it brings to us, it increase the LOCs and makes the implementation less easy to evolve.
>>
>>
>>>
>>>> +instruction to the userland hypervisor.
>>>> +
>>>> +The stfle facility 11, CPU Topology facility, should not be indicated
>>>> +to the guest without this capability.
>>>> +
>>>> +When this capability is present, KVM provides a new attribute group
>>>> +on vm fd, KVM_S390_VM_CPU_TOPOLOGY.
>>>> +This new attribute allows to get, set or clear the Modified Change
>>>
>>> get or set, now that there is no explicit clear anymore.
>>
>> Yes now it is a set to 0 but the action of clearing remains.
>>
>>>
>>>> +Topology Report (MTCR) bit of the SCA through the kvm_device_attr
>>>> +structure.> +
>>>> +When getting the Modified Change Topology Report value, the attr->addr
>>>
>>> When getting/setting the...
>>>
>>>> +must point to a byte where the value will be stored.
>>>
>>> ... will be stored/retrieved from.
>>
>> OK
> 
> Wait no, I didn't get how that works. You're passing the value via attr->attr, not reading it from addr.

:) OK

>>
>>
>>>> +
>>>>    9. Known KVM API problems
>>>>    =========================
>>>>    diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
>>>> index 7a6b14874d65..a73cf01a1606 100644
>>>> --- a/arch/s390/include/uapi/asm/kvm.h
>>>> +++ b/arch/s390/include/uapi/asm/kvm.h
>>>> @@ -74,6 +74,7 @@ struct kvm_s390_io_adapter_req {
>>>>    #define KVM_S390_VM_CRYPTO        2
>>>>    #define KVM_S390_VM_CPU_MODEL        3
>>>>    #define KVM_S390_VM_MIGRATION        4
>>>> +#define KVM_S390_VM_CPU_TOPOLOGY    5
>>>>      /* kvm attributes for mem_ctrl */
>>>>    #define KVM_S390_VM_MEM_ENABLE_CMMA    0
>>>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>>>> index 70436bfff53a..b18e0b940b26 100644
>>>> --- a/arch/s390/kvm/kvm-s390.c
>>>> +++ b/arch/s390/kvm/kvm-s390.c
>>>> @@ -606,6 +606,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>>>>        case KVM_CAP_S390_PROTECTED:
>>>>            r = is_prot_virt_host();
>>>>            break;
>>>> +    case KVM_CAP_S390_CPU_TOPOLOGY:
>>>> +        r = test_facility(11);
>>>> +        break;
>>>>        default:
>>>>            r = 0;
>>>>        }
>>>> @@ -817,6 +820,20 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
>>>>            icpt_operexc_on_all_vcpus(kvm);
>>>>            r = 0;
>>>>            break;
>>>> +    case KVM_CAP_S390_CPU_TOPOLOGY:
>>>> +        r = -EINVAL;
>>>> +        mutex_lock(&kvm->lock);
>>>> +        if (kvm->created_vcpus) {
>>>> +            r = -EBUSY;
>>>> +        } else if (test_facility(11)) {
>>>> +            set_kvm_facility(kvm->arch.model.fac_mask, 11);
>>>> +            set_kvm_facility(kvm->arch.model.fac_list, 11);
>>>> +            r = 0;
>>>> +        }
>>>> +        mutex_unlock(&kvm->lock);
>>>> +        VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
>>>> +             r ? "(not available)" : "(success)");
>>>> +        break;
>>>>        default:
>>>>            r = -EINVAL;
>>>>            break;
>>>> @@ -1717,6 +1734,36 @@ static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
>>>>        read_unlock(&kvm->arch.sca_lock);
>>>>    }
>>>>    +static int kvm_s390_set_topology(struct kvm *kvm, struct kvm_device_attr *attr)
>>>
>>> kvm_s390_set_topology_changed maybe?
>>> kvm_s390_get_topology_changed below then.
>>
> 
> I won't insist on it, but I do think it's more readable.

OK, I can change it

> 
>> No strong opinion, if you prefer I change this.
>>
>>>
>>>> +{
>>>> +    if (!test_kvm_facility(kvm, 11))
>>>> +        return -ENXIO;
>>>> +
>>>> +    kvm_s390_update_topology_change_report(kvm, !!attr->attr);
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int kvm_s390_get_topology(struct kvm *kvm, struct kvm_device_attr *attr)
>>>> +{
>>>> +    union sca_utility utility;
>>>> +    struct bsca_block *sca;
>>>> +    __u8 topo;
>>>> +
>>>> +    if (!test_kvm_facility(kvm, 11))
>>>> +        return -ENXIO;
>>>> +
>>>> +    read_lock(&kvm->arch.sca_lock);
>>>> +    sca = kvm->arch.sca;
>>>> +    utility.val = READ_ONCE(sca->utility.val);
>>>
>>> I don't think you need the READ_ONCE anymore, now that there is a lock it should act as a compile barrier.
>>
>> I think you are right.
>>
>>>> +    read_unlock(&kvm->arch.sca_lock);
>>>> +    topo = utility.mtcr;
>>>> +
>>>> +    if (copy_to_user((void __user *)attr->addr, &topo, sizeof(topo)))
>>>
>>> Why void not u8?
>>
>> I like to say we write on "topo" with the size of "topo".
>> So we do not need to verify the effective size of topo.
>> But I understand, it is a UAPI, setting u8 in the copy_to_user makes sense too.
>> For my personal opinion, I would have prefer that userland tell us the size it awaits even here, for this special case, since we use a byte, we can not do really wrong.
> You're right, it doesn't make a difference.
> What about doing put_user(topo, (u8 *)attr->addr)), seems more straight forward.

OK

>>
>>>
>>>> +        return -EFAULT;
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>> [...]
>>>
>>
>
Janosch Frank July 13, 2022, 9:01 a.m. UTC | #5
On 7/12/22 13:17, Pierre Morel wrote:
> 
> 
> On 7/12/22 10:47, Janis Schoetterl-Glausch wrote:
>> On 7/12/22 09:24, Pierre Morel wrote:
>>>
>>>
>>> On 7/11/22 15:22, Janis Schoetterl-Glausch wrote:
>>>> On 7/11/22 10:41, Pierre Morel wrote:
>>>>> During a subsystem reset the Topology-Change-Report is cleared.
>>>>>
>>>>> Let's give userland the possibility to clear the MTCR in the case
>>>>> of a subsystem reset.
>>>>>
>>>>> To migrate the MTCR, we give userland the possibility to
>>>>> query the MTCR state.
>>>>>
>>>>> We indicate KVM support for the CPU topology facility with a new
>>>>> KVM capability: KVM_CAP_S390_CPU_TOPOLOGY.
>>>>>
>>>>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>>>>
>>>> Reviewed-by: Janis Schoetterl-Glausch <scgl@linux.ibm.com>
>>>>
>>>
>>> Thanks!
>>>
>>>> See nits/comments below.
>>>>
>>>>> ---
>>>>>     Documentation/virt/kvm/api.rst   | 25 ++++++++++++++
>>>>>     arch/s390/include/uapi/asm/kvm.h |  1 +
>>>>>     arch/s390/kvm/kvm-s390.c         | 56 ++++++++++++++++++++++++++++++++
>>>>>     include/uapi/linux/kvm.h         |  1 +
>>>>>     4 files changed, 83 insertions(+)
>>>>>
>>>>> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
>>>>> index 11e00a46c610..5e086125d8ad 100644
>>>>> --- a/Documentation/virt/kvm/api.rst
>>>>> +++ b/Documentation/virt/kvm/api.rst
>>>>> @@ -7956,6 +7956,31 @@ should adjust CPUID leaf 0xA to reflect that the PMU is disabled.
>>>>>     When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
>>>>>     type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
>>>>>     +8.37 KVM_CAP_S390_CPU_TOPOLOGY
>>>>> +------------------------------
>>>>> +
>>>>> +:Capability: KVM_CAP_S390_CPU_TOPOLOGY
>>>>> +:Architectures: s390
>>>>> +:Type: vm
>>>>> +
>>>>> +This capability indicates that KVM will provide the S390 CPU Topology
>>>>> +facility which consist of the interpretation of the PTF instruction for
>>>>> +the function code 2 along with interception and forwarding of both the
>>>>> +PTF instruction with function codes 0 or 1 and the STSI(15,1,x)
>>>>
>>>> Is the architecture allowed to extend STSI without a facility?
>>>> If so, if we say here that STSI 15.1.x is passed to user space, then
>>>> I think we should have a
>>>>
>>>> if (sel1 != 1)
>>>>       goto out_no_data;
>>>>
>>>> or maybe even
>>>>
>>>> if (sel1 != 1 || sel2 < 2 || sel2 > 6)
>>>>       goto out_no_data;
>>>>
>>>> in priv.c
>>>
>>> I am not a big fan of doing everything in the kernel.
>>> Here we have no performance issue since it is an error of the guest if it sends a wrong selector.
>>>
>> I agree, but I didn't suggest it for performance reasons.
> 
> Yes, and that is why I do not agree ;)
> 
>> I was thinking about future proofing, that is if the architecture is extended.
>> We don't know if future extensions are best handled in the kernel or user space,
>> so if we prevent it from going to user space, we can defer the decision to when we know more.
> 
> If future extensions are better handle in kernel we will handle them in
> kernel, obviously, in this case we will need a patch.
> 
> If it is not better handle in kernel we will handle the extensions in
> userland and we will not need a kernel patch making the update of the
> virtual architecture easier and faster.
> 
> If we prohibit the extensions in kernel we will need a kernel patch in
> both cases and a userland patch if it is not completely handled in kernel.
> 
> In userland we check any wrong selector before the instruction goes back
> to the guest.

I opt for passing the lower selectors down for QEMU to handle.

> 
>> But that's only relevant if STSI can be extended without a capability, which is why I asked about that.
> 
> Logicaly any change, extension, in the architecture should be signaled
> by a facility bit or something.
> 
>>
>>> Even testing the facility or PV in the kernel is for my opinion arguable in the case we do not do any treatment in the kernel.

That's actually a good point.

New instruction interceptions for PV will need to be enabled by KVM via 
a switch somewhere since the UV can't rely on the fact that KVM will 
correctly handle it without an enablement.


So please remove the pv check

>>>
>>> I do not see what it brings to us, it increase the LOCs and makes the implementation less easy to evolve.
>>>
>>>
>>>>
>>>>> +instruction to the userland hypervisor.
>>>>> +
>>>>> +The stfle facility 11, CPU Topology facility, should not be indicated
>>>>> +to the guest without this capability.
>>>>> +
>>>>> +When this capability is present, KVM provides a new attribute group
>>>>> +on vm fd, KVM_S390_VM_CPU_TOPOLOGY.
>>>>> +This new attribute allows to get, set or clear the Modified Change
>>>>
>>>> get or set, now that there is no explicit clear anymore.
>>>
>>> Yes now it is a set to 0 but the action of clearing remains.

Yes

>>>
>>>>
>>>>> +Topology Report (MTCR) bit of the SCA through the kvm_device_attr
>>>>> +structure.> +
>>>>> +When getting the Modified Change Topology Report value, the attr->addr
>>>>
>>>> When getting/setting the...
>>>>
>>>>> +must point to a byte where the value will be stored.
>>>>
>>>> ... will be stored/retrieved from.
>>>
>>> OK
>>
>> Wait no, I didn't get how that works. You're passing the value via attr->attr, not reading it from addr.
> 
> :) OK
> 
>>>
>>>
>>>>> +
>>>>>     9. Known KVM API problems
>>>>>     =========================
>>>>>     diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
>>>>> index 7a6b14874d65..a73cf01a1606 100644
>>>>> --- a/arch/s390/include/uapi/asm/kvm.h
>>>>> +++ b/arch/s390/include/uapi/asm/kvm.h
>>>>> @@ -74,6 +74,7 @@ struct kvm_s390_io_adapter_req {
>>>>>     #define KVM_S390_VM_CRYPTO        2
>>>>>     #define KVM_S390_VM_CPU_MODEL        3
>>>>>     #define KVM_S390_VM_MIGRATION        4
>>>>> +#define KVM_S390_VM_CPU_TOPOLOGY    5
>>>>>       /* kvm attributes for mem_ctrl */
>>>>>     #define KVM_S390_VM_MEM_ENABLE_CMMA    0
>>>>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>>>>> index 70436bfff53a..b18e0b940b26 100644
>>>>> --- a/arch/s390/kvm/kvm-s390.c
>>>>> +++ b/arch/s390/kvm/kvm-s390.c
>>>>> @@ -606,6 +606,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>>>>>         case KVM_CAP_S390_PROTECTED:
>>>>>             r = is_prot_virt_host();
>>>>>             break;
>>>>> +    case KVM_CAP_S390_CPU_TOPOLOGY:
>>>>> +        r = test_facility(11);
>>>>> +        break;
>>>>>         default:
>>>>>             r = 0;
>>>>>         }
>>>>> @@ -817,6 +820,20 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
>>>>>             icpt_operexc_on_all_vcpus(kvm);
>>>>>             r = 0;
>>>>>             break;
>>>>> +    case KVM_CAP_S390_CPU_TOPOLOGY:
>>>>> +        r = -EINVAL;
>>>>> +        mutex_lock(&kvm->lock);
>>>>> +        if (kvm->created_vcpus) {
>>>>> +            r = -EBUSY;
>>>>> +        } else if (test_facility(11)) {
>>>>> +            set_kvm_facility(kvm->arch.model.fac_mask, 11);
>>>>> +            set_kvm_facility(kvm->arch.model.fac_list, 11);
>>>>> +            r = 0;
>>>>> +        }
>>>>> +        mutex_unlock(&kvm->lock);
>>>>> +        VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
>>>>> +             r ? "(not available)" : "(success)");
>>>>> +        break;
>>>>>         default:
>>>>>             r = -EINVAL;
>>>>>             break;
>>>>> @@ -1717,6 +1734,36 @@ static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
>>>>>         read_unlock(&kvm->arch.sca_lock);
>>>>>     }
>>>>>     +static int kvm_s390_set_topology(struct kvm *kvm, struct kvm_device_attr *attr)
>>>>
>>>> kvm_s390_set_topology_changed maybe?
>>>> kvm_s390_get_topology_changed below then.

kvm_s390_set_topology_change_indication

It's long but it's rarely used.
Maybe shorten topology to "topo"

[..]
>>>> I don't think you need the READ_ONCE anymore, now that there is a lock it should act as a compile barrier.
>>>
>>> I think you are right.
>>>
>>>>> +    read_unlock(&kvm->arch.sca_lock);
>>>>> +    topo = utility.mtcr;
>>>>> +
>>>>> +    if (copy_to_user((void __user *)attr->addr, &topo, sizeof(topo)))
>>>>
>>>> Why void not u8?
>>>
>>> I like to say we write on "topo" with the size of "topo".
>>> So we do not need to verify the effective size of topo.
>>> But I understand, it is a UAPI, setting u8 in the copy_to_user makes sense too.
>>> For my personal opinion, I would have prefer that userland tell us the size it awaits even here, for this special case, since we use a byte, we can not do really wrong.
>> You're right, it doesn't make a difference.
>> What about doing put_user(topo, (u8 *)attr->addr)), seems more straight forward.
> 
> OK

(u8 __user *)

Always go the explicit route if possible

> 
>>>
>>>>
>>>>> +        return -EFAULT;
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>> [...]
>>>>
>>>
>>
>
Pierre Morel July 14, 2022, 8:37 a.m. UTC | #6
On 7/13/22 11:01, Janosch Frank wrote:
> On 7/12/22 13:17, Pierre Morel wrote:
>>
>>
>> On 7/12/22 10:47, Janis Schoetterl-Glausch wrote:
>>> On 7/12/22 09:24, Pierre Morel wrote:
>>>>
>>>>

...

>> kernel.
>>
>> In userland we check any wrong selector before the instruction goes back
>> to the guest.
> 
> I opt for passing the lower selectors down for QEMU to handle.

OK

> 
>>
>>> But that's only relevant if STSI can be extended without a 
>>> capability, which is why I asked about that.
>>
>> Logicaly any change, extension, in the architecture should be signaled
>> by a facility bit or something.
>>
>>>
>>>> Even testing the facility or PV in the kernel is for my opinion 
>>>> arguable in the case we do not do any treatment in the kernel.
> 
> That's actually a good point.
> 
> New instruction interceptions for PV will need to be enabled by KVM via 
> a switch somewhere since the UV can't rely on the fact that KVM will 
> correctly handle it without an enablement.
> 
> 
> So please remove the pv check

OK

> 

...

>>>>>>     +static int kvm_s390_set_topology(struct kvm *kvm, struct 
>>>>>> kvm_device_attr *attr)
>>>>>
>>>>> kvm_s390_set_topology_changed maybe?
>>>>> kvm_s390_get_topology_changed below then.
> 
> kvm_s390_set_topology_change_indication
> 
> It's long but it's rarely used.
> Maybe shorten topology to "topo"

OK
I use
kvm_s390_get_topo_change_indication()


Thanks.

Regards,
Pierre
diff mbox series

Patch

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 11e00a46c610..5e086125d8ad 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -7956,6 +7956,31 @@  should adjust CPUID leaf 0xA to reflect that the PMU is disabled.
 When enabled, KVM will exit to userspace with KVM_EXIT_SYSTEM_EVENT of
 type KVM_SYSTEM_EVENT_SUSPEND to process the guest suspend request.
 
+8.37 KVM_CAP_S390_CPU_TOPOLOGY
+------------------------------
+
+:Capability: KVM_CAP_S390_CPU_TOPOLOGY
+:Architectures: s390
+:Type: vm
+
+This capability indicates that KVM will provide the S390 CPU Topology
+facility which consist of the interpretation of the PTF instruction for
+the function code 2 along with interception and forwarding of both the
+PTF instruction with function codes 0 or 1 and the STSI(15,1,x)
+instruction to the userland hypervisor.
+
+The stfle facility 11, CPU Topology facility, should not be indicated
+to the guest without this capability.
+
+When this capability is present, KVM provides a new attribute group
+on vm fd, KVM_S390_VM_CPU_TOPOLOGY.
+This new attribute allows to get, set or clear the Modified Change
+Topology Report (MTCR) bit of the SCA through the kvm_device_attr
+structure.
+
+When getting the Modified Change Topology Report value, the attr->addr
+must point to a byte where the value will be stored.
+
 9. Known KVM API problems
 =========================
 
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 7a6b14874d65..a73cf01a1606 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -74,6 +74,7 @@  struct kvm_s390_io_adapter_req {
 #define KVM_S390_VM_CRYPTO		2
 #define KVM_S390_VM_CPU_MODEL		3
 #define KVM_S390_VM_MIGRATION		4
+#define KVM_S390_VM_CPU_TOPOLOGY	5
 
 /* kvm attributes for mem_ctrl */
 #define KVM_S390_VM_MEM_ENABLE_CMMA	0
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 70436bfff53a..b18e0b940b26 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -606,6 +606,9 @@  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_S390_PROTECTED:
 		r = is_prot_virt_host();
 		break;
+	case KVM_CAP_S390_CPU_TOPOLOGY:
+		r = test_facility(11);
+		break;
 	default:
 		r = 0;
 	}
@@ -817,6 +820,20 @@  int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 		icpt_operexc_on_all_vcpus(kvm);
 		r = 0;
 		break;
+	case KVM_CAP_S390_CPU_TOPOLOGY:
+		r = -EINVAL;
+		mutex_lock(&kvm->lock);
+		if (kvm->created_vcpus) {
+			r = -EBUSY;
+		} else if (test_facility(11)) {
+			set_kvm_facility(kvm->arch.model.fac_mask, 11);
+			set_kvm_facility(kvm->arch.model.fac_list, 11);
+			r = 0;
+		}
+		mutex_unlock(&kvm->lock);
+		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
+			 r ? "(not available)" : "(success)");
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -1717,6 +1734,36 @@  static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
 	read_unlock(&kvm->arch.sca_lock);
 }
 
+static int kvm_s390_set_topology(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	if (!test_kvm_facility(kvm, 11))
+		return -ENXIO;
+
+	kvm_s390_update_topology_change_report(kvm, !!attr->attr);
+	return 0;
+}
+
+static int kvm_s390_get_topology(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+	union sca_utility utility;
+	struct bsca_block *sca;
+	__u8 topo;
+
+	if (!test_kvm_facility(kvm, 11))
+		return -ENXIO;
+
+	read_lock(&kvm->arch.sca_lock);
+	sca = kvm->arch.sca;
+	utility.val = READ_ONCE(sca->utility.val);
+	read_unlock(&kvm->arch.sca_lock);
+	topo = utility.mtcr;
+
+	if (copy_to_user((void __user *)attr->addr, &topo, sizeof(topo)))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 {
 	int ret;
@@ -1737,6 +1784,9 @@  static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 	case KVM_S390_VM_MIGRATION:
 		ret = kvm_s390_vm_set_migration(kvm, attr);
 		break;
+	case KVM_S390_VM_CPU_TOPOLOGY:
+		ret = kvm_s390_set_topology(kvm, attr);
+		break;
 	default:
 		ret = -ENXIO;
 		break;
@@ -1762,6 +1812,9 @@  static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 	case KVM_S390_VM_MIGRATION:
 		ret = kvm_s390_vm_get_migration(kvm, attr);
 		break;
+	case KVM_S390_VM_CPU_TOPOLOGY:
+		ret = kvm_s390_get_topology(kvm, attr);
+		break;
 	default:
 		ret = -ENXIO;
 		break;
@@ -1835,6 +1888,9 @@  static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 	case KVM_S390_VM_MIGRATION:
 		ret = 0;
 		break;
+	case KVM_S390_VM_CPU_TOPOLOGY:
+		ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO;
+		break;
 	default:
 		ret = -ENXIO;
 		break;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 5088bd9f1922..33317d820032 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1157,6 +1157,7 @@  struct kvm_ppc_resize_hpt {
 #define KVM_CAP_VM_TSC_CONTROL 214
 #define KVM_CAP_SYSTEM_EVENT_DATA 215
 #define KVM_CAP_ARM_SYSTEM_SUSPEND 216
+#define KVM_CAP_S390_CPU_TOPOLOGY 217
 
 #ifdef KVM_CAP_IRQ_ROUTING