diff mbox series

[V2] arm/kvm: add support for MTE

Message ID 20240912091616.393685-1-gankulkarni@os.amperecomputing.com (mailing list archive)
State New, archived
Headers show
Series [V2] arm/kvm: add support for MTE | expand

Commit Message

Ganapatrao Kulkarni Sept. 12, 2024, 9:16 a.m. UTC
Extend the 'mte' property for the virt machine to cover KVM as
well. For KVM, we don't allocate tag memory, but instead enable
the capability.

If MTE has been enabled, we need to disable migration, as we do not
yet have a way to migrate the tags as well. Therefore, MTE will stay
off with KVM unless requested explicitly.

This patch is rework of commit b320e21c48ce64853904bea6631c0158cc2ef227
which broke TCG since it made the TCG -cpu max
report the presence of MTE to the guest even if the board hadn't
enabled MTE by wiring up the tag RAM. This meant that if the guest
then tried to use MTE QEMU would segfault accessing the
non-existent tag RAM.

Signed-off-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
---
Changes since V1:
	Added code to enable MTE before reading register
id_aa64pfr1 (unmasked MTE bits).

This patch is boot tested on ARM64 with KVM and on X86 with TCG for mte=on
and default case(i.e, no mte).

 hw/arm/virt.c        | 72 ++++++++++++++++++++++++++------------------
 target/arm/cpu.c     |  7 +++--
 target/arm/cpu.h     |  2 ++
 target/arm/kvm.c     | 59 ++++++++++++++++++++++++++++++++++++
 target/arm/kvm_arm.h | 19 ++++++++++++
 5 files changed, 126 insertions(+), 33 deletions(-)

Comments

Cornelia Huck Sept. 17, 2024, 2:13 p.m. UTC | #1
On Thu, Sep 12 2024, Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:

> Extend the 'mte' property for the virt machine to cover KVM as
> well. For KVM, we don't allocate tag memory, but instead enable
> the capability.
>
> If MTE has been enabled, we need to disable migration, as we do not
> yet have a way to migrate the tags as well. Therefore, MTE will stay
> off with KVM unless requested explicitly.
>
> This patch is rework of commit b320e21c48ce64853904bea6631c0158cc2ef227
> which broke TCG since it made the TCG -cpu max
> report the presence of MTE to the guest even if the board hadn't
> enabled MTE by wiring up the tag RAM. This meant that if the guest
> then tried to use MTE QEMU would segfault accessing the
> non-existent tag RAM.
>
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
> ---
> Changes since V1:
> 	Added code to enable MTE before reading register
> id_aa64pfr1 (unmasked MTE bits).
>
> This patch is boot tested on ARM64 with KVM and on X86 with TCG for mte=on
> and default case(i.e, no mte).
>
>  hw/arm/virt.c        | 72 ++++++++++++++++++++++++++------------------
>  target/arm/cpu.c     |  7 +++--
>  target/arm/cpu.h     |  2 ++
>  target/arm/kvm.c     | 59 ++++++++++++++++++++++++++++++++++++
>  target/arm/kvm_arm.h | 19 ++++++++++++
>  5 files changed, 126 insertions(+), 33 deletions(-)
>

(...)

> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> index 849e2e21b3..29865609fb 100644
> --- a/target/arm/kvm.c
> +++ b/target/arm/kvm.c
> @@ -39,6 +39,7 @@
>  #include "hw/acpi/acpi.h"
>  #include "hw/acpi/ghes.h"
>  #include "target/arm/gtimer.h"
> +#include "migration/blocker.h"
>  
>  const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
>      KVM_CAP_LAST_INFO
> @@ -119,6 +120,21 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
>      if (vmfd < 0) {
>          goto err;
>      }
> +
> +    /*
> +     * MTE bits of register id_aa64pfr1 are masked if MTE is
> +     * not enabled and required to enable before VCPU
> +     * is created. Hence enable MTE(if supported) before VCPU
> +     * is created to read unmasked MTE bits.
> +     */

Maybe

"KVM will mask the MTE bits in id_aa64pfr1 unless the VMM has enabled
the MTE KVM capability, so do it here for probing."

?

> +    if (kvm_arm_mte_supported()) {
> +        KVMState kvm_state;
> +
> +        kvm_state.fd = kvmfd;
> +        kvm_state.vmfd = vmfd;
> +        kvm_vm_enable_cap(&kvm_state, KVM_CAP_ARM_MTE, 0);
> +    }
> +
>      cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
>      if (cpufd < 0) {
>          goto err;
Gustavo Romero Sept. 19, 2024, 3:01 a.m. UTC | #2
Hi Cornelia and Ganapatrao,

On 9/17/24 11:13, Cornelia Huck wrote:
> On Thu, Sep 12 2024, Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
>
>> Extend the 'mte' property for the virt machine to cover KVM as
>> well. For KVM, we don't allocate tag memory, but instead enable
>> the capability.
>>
>> If MTE has been enabled, we need to disable migration, as we do not
>> yet have a way to migrate the tags as well. Therefore, MTE will stay
>> off with KVM unless requested explicitly.
>>
>> This patch is rework of commit b320e21c48ce64853904bea6631c0158cc2ef227
>> which broke TCG since it made the TCG -cpu max
>> report the presence of MTE to the guest even if the board hadn't
>> enabled MTE by wiring up the tag RAM. This meant that if the guest
>> then tried to use MTE QEMU would segfault accessing the
>> non-existent tag RAM.
>>
>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
>> Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
>> ---
>> Changes since V1:
>> 	Added code to enable MTE before reading register
>> id_aa64pfr1 (unmasked MTE bits).
>>
>> This patch is boot tested on ARM64 with KVM and on X86 with TCG for mte=on
>> and default case(i.e, no mte).
>>
>>   hw/arm/virt.c        | 72 ++++++++++++++++++++++++++------------------
>>   target/arm/cpu.c     |  7 +++--
>>   target/arm/cpu.h     |  2 ++
>>   target/arm/kvm.c     | 59 ++++++++++++++++++++++++++++++++++++
>>   target/arm/kvm_arm.h | 19 ++++++++++++
>>   5 files changed, 126 insertions(+), 33 deletions(-)
>>
> (...)
>
>> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
>> index 849e2e21b3..29865609fb 100644
>> --- a/target/arm/kvm.c
>> +++ b/target/arm/kvm.c
>> @@ -39,6 +39,7 @@
>>   #include "hw/acpi/acpi.h"
>>   #include "hw/acpi/ghes.h"
>>   #include "target/arm/gtimer.h"
>> +#include "migration/blocker.h"
>>   
>>   const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
>>       KVM_CAP_LAST_INFO
>> @@ -119,6 +120,21 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
>>       if (vmfd < 0) {
>>           goto err;
>>       }
>> +
>> +    /*
>> +     * MTE bits of register id_aa64pfr1 are masked if MTE is
>> +     * not enabled and required to enable before VCPU
>> +     * is created. Hence enable MTE(if supported) before VCPU
>> +     * is created to read unmasked MTE bits.
>> +     */
> Maybe
>
> "KVM will mask the MTE bits in id_aa64pfr1 unless the VMM has enabled
> the MTE KVM capability, so do it here for probing."
>
> ?

KVM_CAP_ARM_MTE must be set before creating the VCPUs, as

stated in the KVM API docs, so enabling this cap. here for later

probing the MTE bits correctly feels more like a consequence. So

I prefer the the original comment, which mentions that

requirement. But I think something shorter like the following

would work too:

"MTE capability must be enabled by the VMM before creating

any VCPUs. The MTE bits of the ID_AA64PFR1 register are masked

if MTE is not enabled, allowing them to be probed correctly."


Cheers,

Gustavo


>> +    if (kvm_arm_mte_supported()) {
>> +        KVMState kvm_state;
>> +
>> +        kvm_state.fd = kvmfd;
>> +        kvm_state.vmfd = vmfd;
>> +        kvm_vm_enable_cap(&kvm_state, KVM_CAP_ARM_MTE, 0);
>> +    }
>> +
>>       cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
>>       if (cpufd < 0) {
>>           goto err;
Gustavo Romero Sept. 19, 2024, 3:22 a.m. UTC | #3
Hi Ganapatrao,

On 9/12/24 06:16, Ganapatrao Kulkarni wrote:
> Extend the 'mte' property for the virt machine to cover KVM as
> well. For KVM, we don't allocate tag memory, but instead enable
> the capability.
>
> If MTE has been enabled, we need to disable migration, as we do not
> yet have a way to migrate the tags as well. Therefore, MTE will stay
> off with KVM unless requested explicitly.
>
> This patch is rework of commit b320e21c48ce64853904bea6631c0158cc2ef227
> which broke TCG since it made the TCG -cpu max
> report the presence of MTE to the guest even if the board hadn't
> enabled MTE by wiring up the tag RAM. This meant that if the guest
> then tried to use MTE QEMU would segfault accessing the
> non-existent tag RAM.
>
> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
> Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
> ---
> Changes since V1:
> 	Added code to enable MTE before reading register
> id_aa64pfr1 (unmasked MTE bits).
>
> This patch is boot tested on ARM64 with KVM and on X86 with TCG for mte=on
> and default case(i.e, no mte).
>
>   hw/arm/virt.c        | 72 ++++++++++++++++++++++++++------------------
>   target/arm/cpu.c     |  7 +++--
>   target/arm/cpu.h     |  2 ++
>   target/arm/kvm.c     | 59 ++++++++++++++++++++++++++++++++++++
>   target/arm/kvm_arm.h | 19 ++++++++++++
>   5 files changed, 126 insertions(+), 33 deletions(-)

Overall the patch looks good. I just have a couple of questions.


> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
> index 7934b23651..a33af7d996 100644
> --- a/hw/arm/virt.c
> +++ b/hw/arm/virt.c
> @@ -2211,7 +2211,7 @@ static void machvirt_init(MachineState *machine)
>           exit(1);
>       }
>   
> -    if (vms->mte && (kvm_enabled() || hvf_enabled())) {
> +    if (vms->mte && hvf_enabled()) {
>           error_report("mach-virt: %s does not support providing "
>                        "MTE to the guest CPU",
>                        current_accel_name());
> @@ -2281,39 +2281,51 @@ static void machvirt_init(MachineState *machine)
>           }
>   
>           if (vms->mte) {
> -            /* Create the memory region only once, but link to all cpus. */
> -            if (!tag_sysmem) {
> -                /*
> -                 * The property exists only if MemTag is supported.
> -                 * If it is, we must allocate the ram to back that up.
> -                 */
> -                if (!object_property_find(cpuobj, "tag-memory")) {
> -                    error_report("MTE requested, but not supported "
> -                                 "by the guest CPU");
> -                    exit(1);
> +            if (tcg_enabled()) {
> +                /* Create the memory region only once, but link to all cpus. */
> +                if (!tag_sysmem) {
> +                    /*
> +                     * The property exists only if MemTag is supported.
> +                     * If it is, we must allocate the ram to back that up.
> +                     */
> +                    if (!object_property_find(cpuobj, "tag-memory")) {
> +                        error_report("MTE requested, but not supported "
> +                                     "by the guest CPU");
> +                        exit(1);
> +                    }
> +
> +                    tag_sysmem = g_new(MemoryRegion, 1);
> +                    memory_region_init(tag_sysmem, OBJECT(machine),
> +                                       "tag-memory", UINT64_MAX / 32);
> +
> +                    if (vms->secure) {
> +                        secure_tag_sysmem = g_new(MemoryRegion, 1);
> +                        memory_region_init(secure_tag_sysmem, OBJECT(machine),
> +                                           "secure-tag-memory",
> +                                           UINT64_MAX / 32);
> +
> +                        /* As with ram, secure-tag takes precedence over tag. */
> +                        memory_region_add_subregion_overlap(secure_tag_sysmem,
> +                                                            0, tag_sysmem, -1);
> +                    }
>                   }
>   
> -                tag_sysmem = g_new(MemoryRegion, 1);
> -                memory_region_init(tag_sysmem, OBJECT(machine),
> -                                   "tag-memory", UINT64_MAX / 32);
> -
> +                object_property_set_link(cpuobj, "tag-memory",
> +                                         OBJECT(tag_sysmem), &error_abort);
>                   if (vms->secure) {
> -                    secure_tag_sysmem = g_new(MemoryRegion, 1);
> -                    memory_region_init(secure_tag_sysmem, OBJECT(machine),
> -                                       "secure-tag-memory", UINT64_MAX / 32);
> -
> -                    /* As with ram, secure-tag takes precedence over tag.  */
> -                    memory_region_add_subregion_overlap(secure_tag_sysmem, 0,
> -                                                        tag_sysmem, -1);
> +                    object_property_set_link(cpuobj, "secure-tag-memory",
> +                                             OBJECT(secure_tag_sysmem),
> +                                             &error_abort);
>                   }
> -            }
> -
> -            object_property_set_link(cpuobj, "tag-memory", OBJECT(tag_sysmem),
> -                                     &error_abort);
> -            if (vms->secure) {
> -                object_property_set_link(cpuobj, "secure-tag-memory",
> -                                         OBJECT(secure_tag_sysmem),
> -                                         &error_abort);
> +            } else if (kvm_enabled()) {
> +                if (!kvm_arm_mte_supported()) {
> +                    error_report("MTE requested, but not supported by KVM");
> +                    exit(1);
> +                }
> +                kvm_arm_enable_mte(cpuobj, &error_abort);
> +            } else {
> +                    error_report("MTE requested, but not supported ");
> +                    exit(1);
>               }
>           }
>   
> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
> index 19191c2391..a59417aac8 100644
> --- a/target/arm/cpu.c
> +++ b/target/arm/cpu.c
> @@ -2390,11 +2390,12 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
>   
>   #ifndef CONFIG_USER_ONLY
>           /*
> -         * If we do not have tag-memory provided by the machine,
> -         * reduce MTE support to instructions enabled at EL0.
> +         * If we do not have tag-memory provided by the TCG
> +         * nor MTE at KVM enabled, reduce MTE support to
> +         * instructions enabled at EL0.

What controls if MTE insns. (stg, etc.) are enabled on EL0 and EL1

are the ATA and ATA0 bits in SCTRL register. Also, AA64PFR1 is a

register just to report the features available on the CPU, so I

wonder if that comment is indeed correct.

@rth: what do you think?


>            * This matches Cortex-A710 BROADCASTMTE input being LOW.
>            */
> -        if (cpu->tag_memory == NULL) {
> +        if (cpu->tag_memory == NULL && !cpu->kvm_mte) {
>               cpu->isar.id_aa64pfr1 =
>                   FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1);
>           }

If MTE is off on TGC and on KVM, then we're actually setting (deposit) MTE

field in AA64PFR1, meaning that MTE is implemented? or am I missing

something?


> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
> index f065756c5c..8fc8b6398f 100644
> --- a/target/arm/cpu.h
> +++ b/target/arm/cpu.h
> @@ -922,6 +922,8 @@ struct ArchCPU {
>   
>       /* CPU has memory protection unit */
>       bool has_mpu;
> +    /* CPU has MTE enabled in KVM mode */
> +    bool kvm_mte;
>       /* PMSAv7 MPU number of supported regions */
>       uint32_t pmsav7_dregion;
>       /* PMSAv8 MPU number of supported hyp regions */
> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> index 849e2e21b3..29865609fb 100644
> --- a/target/arm/kvm.c
> +++ b/target/arm/kvm.c
> @@ -39,6 +39,7 @@
>   #include "hw/acpi/acpi.h"
>   #include "hw/acpi/ghes.h"
>   #include "target/arm/gtimer.h"
> +#include "migration/blocker.h"
>   
>   const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
>       KVM_CAP_LAST_INFO
> @@ -119,6 +120,21 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
>       if (vmfd < 0) {
>           goto err;
>       }
> +
> +    /*
> +     * MTE bits of register id_aa64pfr1 are masked if MTE is
> +     * not enabled and required to enable before VCPU
> +     * is created. Hence enable MTE(if supported) before VCPU
> +     * is created to read unmasked MTE bits.
> +     */
> +    if (kvm_arm_mte_supported()) {
> +        KVMState kvm_state;
> +
> +        kvm_state.fd = kvmfd;
> +        kvm_state.vmfd = vmfd;
> +        kvm_vm_enable_cap(&kvm_state, KVM_CAP_ARM_MTE, 0);
> +    }
> +
>       cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
>       if (cpufd < 0) {
>           goto err;
> @@ -1793,6 +1809,11 @@ bool kvm_arm_sve_supported(void)
>       return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE);
>   }
>   
> +bool kvm_arm_mte_supported(void)
> +{
> +    return kvm_check_extension(kvm_state, KVM_CAP_ARM_MTE);
> +}
> +
>   QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
>   
>   uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu)
> @@ -2417,3 +2438,41 @@ int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
>       }
>       return 0;
>   }
> +
> +void kvm_arm_enable_mte(Object *cpuobj, Error **errp)
> +{
> +    static bool tried_to_enable;
> +    static bool succeeded_to_enable;
> +    Error *mte_migration_blocker = NULL;
> +    ARMCPU *cpu = ARM_CPU(cpuobj);
> +    int ret;
> +
> +    if (!tried_to_enable) {
> +        /*
> +         * MTE on KVM is enabled on a per-VM basis (and retrying doesn't make
> +         * sense), and we only want a single migration blocker as well.
> +         */
> +        tried_to_enable = true;
> +
> +        ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_MTE, 0);
> +        if (ret) {
> +            error_setg_errno(errp, -ret, "Failed to enable KVM_CAP_ARM_MTE");
> +            return;
> +        }
> +
> +        /* TODO: Add migration support with MTE enabled */
> +        error_setg(&mte_migration_blocker,
> +                   "Live migration disabled due to MTE enabled");
> +        if (migrate_add_blocker(&mte_migration_blocker, errp)) {
> +            error_free(mte_migration_blocker);
> +            return;
> +        }
> +
> +        succeeded_to_enable = true;
> +    }
> +
> +    if (succeeded_to_enable) {
> +        cpu->kvm_mte = true;
> +    }
> +}
> +

nit: remove ending blank line here.


Cheers,

Gustavo


> diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
> index cfaa0d9bc7..4d293618a7 100644
> --- a/target/arm/kvm_arm.h
> +++ b/target/arm/kvm_arm.h
> @@ -188,6 +188,13 @@ bool kvm_arm_pmu_supported(void);
>    */
>   bool kvm_arm_sve_supported(void);
>   
> +/**
> + * kvm_arm_mte_supported:
> + *
> + * Returns: true if KVM can enable MTE, and false otherwise.
> + */
> +bool kvm_arm_mte_supported(void);
> +
>   /**
>    * kvm_arm_get_max_vm_ipa_size:
>    * @ms: Machine state handle
> @@ -214,6 +221,8 @@ void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa);
>   
>   int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level);
>   
> +void kvm_arm_enable_mte(Object *cpuobj, Error **errp);
> +
>   #else
>   
>   /*
> @@ -235,6 +244,11 @@ static inline bool kvm_arm_sve_supported(void)
>       return false;
>   }
>   
> +static inline bool kvm_arm_mte_supported(void)
> +{
> +    return false;
> +}
> +
>   /*
>    * These functions should never actually be called without KVM support.
>    */
> @@ -283,6 +297,11 @@ static inline uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu)
>       g_assert_not_reached();
>   }
>   
> +static inline void kvm_arm_enable_mte(Object *cpuobj, Error **errp)
> +{
> +    g_assert_not_reached();
> +}
> +
>   #endif
>   
>   #endif
Ganapatrao Kulkarni Sept. 19, 2024, 10:11 a.m. UTC | #4
Hi Gustavo,

On 19-09-2024 08:31 am, Gustavo Romero wrote:
> Hi Cornelia and Ganapatrao,
> 
> On 9/17/24 11:13, Cornelia Huck wrote:
>> On Thu, Sep 12 2024, Ganapatrao Kulkarni 
>> <gankulkarni@os.amperecomputing.com> wrote:
>>
>>> Extend the 'mte' property for the virt machine to cover KVM as
>>> well. For KVM, we don't allocate tag memory, but instead enable
>>> the capability.
>>>
>>> If MTE has been enabled, we need to disable migration, as we do not
>>> yet have a way to migrate the tags as well. Therefore, MTE will stay
>>> off with KVM unless requested explicitly.
>>>
>>> This patch is rework of commit b320e21c48ce64853904bea6631c0158cc2ef227
>>> which broke TCG since it made the TCG -cpu max
>>> report the presence of MTE to the guest even if the board hadn't
>>> enabled MTE by wiring up the tag RAM. This meant that if the guest
>>> then tried to use MTE QEMU would segfault accessing the
>>> non-existent tag RAM.
>>>
>>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
>>> Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
>>> ---
>>> Changes since V1:
>>>     Added code to enable MTE before reading register
>>> id_aa64pfr1 (unmasked MTE bits).
>>>
>>> This patch is boot tested on ARM64 with KVM and on X86 with TCG for 
>>> mte=on
>>> and default case(i.e, no mte).
>>>
>>>   hw/arm/virt.c        | 72 ++++++++++++++++++++++++++------------------
>>>   target/arm/cpu.c     |  7 +++--
>>>   target/arm/cpu.h     |  2 ++
>>>   target/arm/kvm.c     | 59 ++++++++++++++++++++++++++++++++++++
>>>   target/arm/kvm_arm.h | 19 ++++++++++++
>>>   5 files changed, 126 insertions(+), 33 deletions(-)
>>>
>> (...)
>>
>>> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
>>> index 849e2e21b3..29865609fb 100644
>>> --- a/target/arm/kvm.c
>>> +++ b/target/arm/kvm.c
>>> @@ -39,6 +39,7 @@
>>>   #include "hw/acpi/acpi.h"
>>>   #include "hw/acpi/ghes.h"
>>>   #include "target/arm/gtimer.h"
>>> +#include "migration/blocker.h"
>>>   const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
>>>       KVM_CAP_LAST_INFO
>>> @@ -119,6 +120,21 @@ bool kvm_arm_create_scratch_host_vcpu(const 
>>> uint32_t *cpus_to_try,
>>>       if (vmfd < 0) {
>>>           goto err;
>>>       }
>>> +
>>> +    /*
>>> +     * MTE bits of register id_aa64pfr1 are masked if MTE is
>>> +     * not enabled and required to enable before VCPU
>>> +     * is created. Hence enable MTE(if supported) before VCPU
>>> +     * is created to read unmasked MTE bits.
>>> +     */
>> Maybe
>>
>> "KVM will mask the MTE bits in id_aa64pfr1 unless the VMM has enabled
>> the MTE KVM capability, so do it here for probing."
>>
>> ?
> 
> KVM_CAP_ARM_MTE must be set before creating the VCPUs, as
> 
> stated in the KVM API docs, so enabling this cap. here for later
> 
> probing the MTE bits correctly feels more like a consequence. So
> 
> I prefer the the original comment, which mentions that
> 
> requirement. But I think something shorter like the following
> 
> would work too:
> 
> "MTE capability must be enabled by the VMM before creating
> 
> any VCPUs. The MTE bits of the ID_AA64PFR1 register are masked
> 
> if MTE is not enabled, allowing them to be probed correctly."
> 
> 

Thanks, this comment seems more precise.
Ganapatrao Kulkarni Sept. 19, 2024, 10:31 a.m. UTC | #5
On 19-09-2024 08:52 am, Gustavo Romero wrote:
> Hi Ganapatrao,
> 
> On 9/12/24 06:16, Ganapatrao Kulkarni wrote:
>> Extend the 'mte' property for the virt machine to cover KVM as
>> well. For KVM, we don't allocate tag memory, but instead enable
>> the capability.
>>
>> If MTE has been enabled, we need to disable migration, as we do not
>> yet have a way to migrate the tags as well. Therefore, MTE will stay
>> off with KVM unless requested explicitly.
>>
>> This patch is rework of commit b320e21c48ce64853904bea6631c0158cc2ef227
>> which broke TCG since it made the TCG -cpu max
>> report the presence of MTE to the guest even if the board hadn't
>> enabled MTE by wiring up the tag RAM. This meant that if the guest
>> then tried to use MTE QEMU would segfault accessing the
>> non-existent tag RAM.
>>
>> Signed-off-by: Cornelia Huck <cohuck@redhat.com>
>> Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
>> ---
>> Changes since V1:
>>     Added code to enable MTE before reading register
>> id_aa64pfr1 (unmasked MTE bits).
>>
>> This patch is boot tested on ARM64 with KVM and on X86 with TCG for 
>> mte=on
>> and default case(i.e, no mte).
>>
>>   hw/arm/virt.c        | 72 ++++++++++++++++++++++++++------------------
>>   target/arm/cpu.c     |  7 +++--
>>   target/arm/cpu.h     |  2 ++
>>   target/arm/kvm.c     | 59 ++++++++++++++++++++++++++++++++++++
>>   target/arm/kvm_arm.h | 19 ++++++++++++
>>   5 files changed, 126 insertions(+), 33 deletions(-)
> 
> Overall the patch looks good. I just have a couple of questions.
> 
> 
>> diff --git a/hw/arm/virt.c b/hw/arm/virt.c
>> index 7934b23651..a33af7d996 100644
>> --- a/hw/arm/virt.c
>> +++ b/hw/arm/virt.c
>> @@ -2211,7 +2211,7 @@ static void machvirt_init(MachineState *machine)
>>           exit(1);
>>       }
>> -    if (vms->mte && (kvm_enabled() || hvf_enabled())) {
>> +    if (vms->mte && hvf_enabled()) {
>>           error_report("mach-virt: %s does not support providing "
>>                        "MTE to the guest CPU",
>>                        current_accel_name());
>> @@ -2281,39 +2281,51 @@ static void machvirt_init(MachineState *machine)
>>           }
>>           if (vms->mte) {
>> -            /* Create the memory region only once, but link to all 
>> cpus. */
>> -            if (!tag_sysmem) {
>> -                /*
>> -                 * The property exists only if MemTag is supported.
>> -                 * If it is, we must allocate the ram to back that up.
>> -                 */
>> -                if (!object_property_find(cpuobj, "tag-memory")) {
>> -                    error_report("MTE requested, but not supported "
>> -                                 "by the guest CPU");
>> -                    exit(1);
>> +            if (tcg_enabled()) {
>> +                /* Create the memory region only once, but link to 
>> all cpus. */
>> +                if (!tag_sysmem) {
>> +                    /*
>> +                     * The property exists only if MemTag is supported.
>> +                     * If it is, we must allocate the ram to back 
>> that up.
>> +                     */
>> +                    if (!object_property_find(cpuobj, "tag-memory")) {
>> +                        error_report("MTE requested, but not supported "
>> +                                     "by the guest CPU");
>> +                        exit(1);
>> +                    }
>> +
>> +                    tag_sysmem = g_new(MemoryRegion, 1);
>> +                    memory_region_init(tag_sysmem, OBJECT(machine),
>> +                                       "tag-memory", UINT64_MAX / 32);
>> +
>> +                    if (vms->secure) {
>> +                        secure_tag_sysmem = g_new(MemoryRegion, 1);
>> +                        memory_region_init(secure_tag_sysmem, 
>> OBJECT(machine),
>> +                                           "secure-tag-memory",
>> +                                           UINT64_MAX / 32);
>> +
>> +                        /* As with ram, secure-tag takes precedence 
>> over tag. */
>> +                        
>> memory_region_add_subregion_overlap(secure_tag_sysmem,
>> +                                                            0, 
>> tag_sysmem, -1);
>> +                    }
>>                   }
>> -                tag_sysmem = g_new(MemoryRegion, 1);
>> -                memory_region_init(tag_sysmem, OBJECT(machine),
>> -                                   "tag-memory", UINT64_MAX / 32);
>> -
>> +                object_property_set_link(cpuobj, "tag-memory",
>> +                                         OBJECT(tag_sysmem), 
>> &error_abort);
>>                   if (vms->secure) {
>> -                    secure_tag_sysmem = g_new(MemoryRegion, 1);
>> -                    memory_region_init(secure_tag_sysmem, 
>> OBJECT(machine),
>> -                                       "secure-tag-memory", 
>> UINT64_MAX / 32);
>> -
>> -                    /* As with ram, secure-tag takes precedence over 
>> tag.  */
>> -                    
>> memory_region_add_subregion_overlap(secure_tag_sysmem, 0,
>> -                                                        tag_sysmem, -1);
>> +                    object_property_set_link(cpuobj, 
>> "secure-tag-memory",
>> +                                             OBJECT(secure_tag_sysmem),
>> +                                             &error_abort);
>>                   }
>> -            }
>> -
>> -            object_property_set_link(cpuobj, "tag-memory", 
>> OBJECT(tag_sysmem),
>> -                                     &error_abort);
>> -            if (vms->secure) {
>> -                object_property_set_link(cpuobj, "secure-tag-memory",
>> -                                         OBJECT(secure_tag_sysmem),
>> -                                         &error_abort);
>> +            } else if (kvm_enabled()) {
>> +                if (!kvm_arm_mte_supported()) {
>> +                    error_report("MTE requested, but not supported by 
>> KVM");
>> +                    exit(1);
>> +                }
>> +                kvm_arm_enable_mte(cpuobj, &error_abort);
>> +            } else {
>> +                    error_report("MTE requested, but not supported ");
>> +                    exit(1);
>>               }
>>           }
>> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
>> index 19191c2391..a59417aac8 100644
>> --- a/target/arm/cpu.c
>> +++ b/target/arm/cpu.c
>> @@ -2390,11 +2390,12 @@ static void arm_cpu_realizefn(DeviceState 
>> *dev, Error **errp)
>>   #ifndef CONFIG_USER_ONLY
>>           /*
>> -         * If we do not have tag-memory provided by the machine,
>> -         * reduce MTE support to instructions enabled at EL0.
>> +         * If we do not have tag-memory provided by the TCG
>> +         * nor MTE at KVM enabled, reduce MTE support to
>> +         * instructions enabled at EL0.
> 
> What controls if MTE insns. (stg, etc.) are enabled on EL0 and EL1
> 
> are the ATA and ATA0 bits in SCTRL register. Also, AA64PFR1 is a
> 
> register just to report the features available on the CPU, so I
> 
> wonder if that comment is indeed correct.
> 
> @rth: what do you think?
> 
> 
>>            * This matches Cortex-A710 BROADCASTMTE input being LOW.
>>            */
>> -        if (cpu->tag_memory == NULL) {
>> +        if (cpu->tag_memory == NULL && !cpu->kvm_mte) {
>>               cpu->isar.id_aa64pfr1 =
>>                   FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1);
>>           }
> 
> If MTE is off on TGC and on KVM, then we're actually setting (deposit) MTE
> 
> field in AA64PFR1, meaning that MTE is implemented? or am I missing
> 
> something?

Thanks for the comment.

Looks like I did a mistake by mixing the TCG and KVM.

I have modified as below diff to keep TCG if loop as it is and adding if 
for KVM case to clear/mask the MTE bits if MTE in KVM mode is not 
enabled by user command(if no mte=on).

Is below diff makes sense?

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index a59417aac8..523996576d 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2390,15 +2390,20 @@ static void arm_cpu_realizefn(DeviceState *dev, 
Error **errp)

  #ifndef CONFIG_USER_ONLY
          /*
-         * If we do not have tag-memory provided by the TCG
-         * nor MTE at KVM enabled, reduce MTE support to
-         * instructions enabled at EL0.
+         * If we do not have tag-memory provided by the TCG,
+         * reduce MTE support to instructions enabled at EL0.
           * This matches Cortex-A710 BROADCASTMTE input being LOW.
           */
-        if (cpu->tag_memory == NULL && !cpu->kvm_mte) {
+        if (tcg_enabled() && cpu->tag_memory == NULL) {
              cpu->isar.id_aa64pfr1 =
                  FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1);
          }
+
+        /* Disable MTE, if it is not enabled by the user for KVM mode.
+         */
+        if (kvm_enabled() && !cpu->kvm_mte) {
+                FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0);
+        }
  #endif
      }


> 
> 
>> diff --git a/target/arm/cpu.h b/target/arm/cpu.h
>> index f065756c5c..8fc8b6398f 100644
>> --- a/target/arm/cpu.h
>> +++ b/target/arm/cpu.h
>> @@ -922,6 +922,8 @@ struct ArchCPU {
>>       /* CPU has memory protection unit */
>>       bool has_mpu;
>> +    /* CPU has MTE enabled in KVM mode */
>> +    bool kvm_mte;
>>       /* PMSAv7 MPU number of supported regions */
>>       uint32_t pmsav7_dregion;
>>       /* PMSAv8 MPU number of supported hyp regions */
>> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
>> index 849e2e21b3..29865609fb 100644
>> --- a/target/arm/kvm.c
>> +++ b/target/arm/kvm.c
>> @@ -39,6 +39,7 @@
>>   #include "hw/acpi/acpi.h"
>>   #include "hw/acpi/ghes.h"
>>   #include "target/arm/gtimer.h"
>> +#include "migration/blocker.h"
>>   const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
>>       KVM_CAP_LAST_INFO
>> @@ -119,6 +120,21 @@ bool kvm_arm_create_scratch_host_vcpu(const 
>> uint32_t *cpus_to_try,
>>       if (vmfd < 0) {
>>           goto err;
>>       }
>> +
>> +    /*
>> +     * MTE bits of register id_aa64pfr1 are masked if MTE is
>> +     * not enabled and required to enable before VCPU
>> +     * is created. Hence enable MTE(if supported) before VCPU
>> +     * is created to read unmasked MTE bits.
>> +     */
>> +    if (kvm_arm_mte_supported()) {
>> +        KVMState kvm_state;
>> +
>> +        kvm_state.fd = kvmfd;
>> +        kvm_state.vmfd = vmfd;
>> +        kvm_vm_enable_cap(&kvm_state, KVM_CAP_ARM_MTE, 0);
>> +    }
>> +
>>       cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
>>       if (cpufd < 0) {
>>           goto err;
>> @@ -1793,6 +1809,11 @@ bool kvm_arm_sve_supported(void)
>>       return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE);
>>   }
>> +bool kvm_arm_mte_supported(void)
>> +{
>> +    return kvm_check_extension(kvm_state, KVM_CAP_ARM_MTE);
>> +}
>> +
>>   QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
>>   uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu)
>> @@ -2417,3 +2438,41 @@ int kvm_arch_remove_sw_breakpoint(CPUState *cs, 
>> struct kvm_sw_breakpoint *bp)
>>       }
>>       return 0;
>>   }
>> +
>> +void kvm_arm_enable_mte(Object *cpuobj, Error **errp)
>> +{
>> +    static bool tried_to_enable;
>> +    static bool succeeded_to_enable;
>> +    Error *mte_migration_blocker = NULL;
>> +    ARMCPU *cpu = ARM_CPU(cpuobj);
>> +    int ret;
>> +
>> +    if (!tried_to_enable) {
>> +        /*
>> +         * MTE on KVM is enabled on a per-VM basis (and retrying 
>> doesn't make
>> +         * sense), and we only want a single migration blocker as well.
>> +         */
>> +        tried_to_enable = true;
>> +
>> +        ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_MTE, 0);
>> +        if (ret) {
>> +            error_setg_errno(errp, -ret, "Failed to enable 
>> KVM_CAP_ARM_MTE");
>> +            return;
>> +        }
>> +
>> +        /* TODO: Add migration support with MTE enabled */
>> +        error_setg(&mte_migration_blocker,
>> +                   "Live migration disabled due to MTE enabled");
>> +        if (migrate_add_blocker(&mte_migration_blocker, errp)) {
>> +            error_free(mte_migration_blocker);
>> +            return;
>> +        }
>> +
>> +        succeeded_to_enable = true;
>> +    }
>> +
>> +    if (succeeded_to_enable) {
>> +        cpu->kvm_mte = true;
>> +    }
>> +}
>> +
> 
> nit: remove ending blank line here.
> 

Sure, thanks.
Cornelia Huck Sept. 19, 2024, 3:05 p.m. UTC | #6
On Thu, Sep 19 2024, Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:

> I have modified as below diff to keep TCG if loop as it is and adding if 
> for KVM case to clear/mask the MTE bits if MTE in KVM mode is not 
> enabled by user command(if no mte=on).
>
> Is below diff makes sense?
>
> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
> index a59417aac8..523996576d 100644
> --- a/target/arm/cpu.c
> +++ b/target/arm/cpu.c
> @@ -2390,15 +2390,20 @@ static void arm_cpu_realizefn(DeviceState *dev, 
> Error **errp)
>
>   #ifndef CONFIG_USER_ONLY
>           /*
> -         * If we do not have tag-memory provided by the TCG
> -         * nor MTE at KVM enabled, reduce MTE support to
> -         * instructions enabled at EL0.
> +         * If we do not have tag-memory provided by the TCG,
> +         * reduce MTE support to instructions enabled at EL0.
>            * This matches Cortex-A710 BROADCASTMTE input being LOW.
>            */
> -        if (cpu->tag_memory == NULL && !cpu->kvm_mte) {
> +        if (tcg_enabled() && cpu->tag_memory == NULL) {
>               cpu->isar.id_aa64pfr1 =
>                   FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1);
>           }
> +
> +        /* Disable MTE, if it is not enabled by the user for KVM mode.
> +         */
> +        if (kvm_enabled() && !cpu->kvm_mte) {
> +                FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0);
> +        }
>   #endif
>       }

Wouldn't that be a possibly guest-visible change?
Ganapatrao Kulkarni Sept. 19, 2024, 3:56 p.m. UTC | #7
On 19-09-2024 08:35 pm, Cornelia Huck wrote:
> On Thu, Sep 19 2024, Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com> wrote:
> 
>> I have modified as below diff to keep TCG if loop as it is and adding if
>> for KVM case to clear/mask the MTE bits if MTE in KVM mode is not
>> enabled by user command(if no mte=on).
>>
>> Is below diff makes sense?
>>
>> diff --git a/target/arm/cpu.c b/target/arm/cpu.c
>> index a59417aac8..523996576d 100644
>> --- a/target/arm/cpu.c
>> +++ b/target/arm/cpu.c
>> @@ -2390,15 +2390,20 @@ static void arm_cpu_realizefn(DeviceState *dev,
>> Error **errp)
>>
>>    #ifndef CONFIG_USER_ONLY
>>            /*
>> -         * If we do not have tag-memory provided by the TCG
>> -         * nor MTE at KVM enabled, reduce MTE support to
>> -         * instructions enabled at EL0.
>> +         * If we do not have tag-memory provided by the TCG,
>> +         * reduce MTE support to instructions enabled at EL0.
>>             * This matches Cortex-A710 BROADCASTMTE input being LOW.
>>             */
>> -        if (cpu->tag_memory == NULL && !cpu->kvm_mte) {
>> +        if (tcg_enabled() && cpu->tag_memory == NULL) {
>>                cpu->isar.id_aa64pfr1 =
>>                    FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1);
>>            }
>> +
>> +        /* Disable MTE, if it is not enabled by the user for KVM mode.
>> +         */
>> +        if (kvm_enabled() && !cpu->kvm_mte) {
>> +                FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 0);
>> +        }
>>    #endif
>>        }
> 
> Wouldn't that be a possibly guest-visible change?
> 

Yes, the MTE bits of id_aa64pfr1 are masked to guest like before.
diff mbox series

Patch

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 7934b23651..a33af7d996 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -2211,7 +2211,7 @@  static void machvirt_init(MachineState *machine)
         exit(1);
     }
 
-    if (vms->mte && (kvm_enabled() || hvf_enabled())) {
+    if (vms->mte && hvf_enabled()) {
         error_report("mach-virt: %s does not support providing "
                      "MTE to the guest CPU",
                      current_accel_name());
@@ -2281,39 +2281,51 @@  static void machvirt_init(MachineState *machine)
         }
 
         if (vms->mte) {
-            /* Create the memory region only once, but link to all cpus. */
-            if (!tag_sysmem) {
-                /*
-                 * The property exists only if MemTag is supported.
-                 * If it is, we must allocate the ram to back that up.
-                 */
-                if (!object_property_find(cpuobj, "tag-memory")) {
-                    error_report("MTE requested, but not supported "
-                                 "by the guest CPU");
-                    exit(1);
+            if (tcg_enabled()) {
+                /* Create the memory region only once, but link to all cpus. */
+                if (!tag_sysmem) {
+                    /*
+                     * The property exists only if MemTag is supported.
+                     * If it is, we must allocate the ram to back that up.
+                     */
+                    if (!object_property_find(cpuobj, "tag-memory")) {
+                        error_report("MTE requested, but not supported "
+                                     "by the guest CPU");
+                        exit(1);
+                    }
+
+                    tag_sysmem = g_new(MemoryRegion, 1);
+                    memory_region_init(tag_sysmem, OBJECT(machine),
+                                       "tag-memory", UINT64_MAX / 32);
+
+                    if (vms->secure) {
+                        secure_tag_sysmem = g_new(MemoryRegion, 1);
+                        memory_region_init(secure_tag_sysmem, OBJECT(machine),
+                                           "secure-tag-memory",
+                                           UINT64_MAX / 32);
+
+                        /* As with ram, secure-tag takes precedence over tag. */
+                        memory_region_add_subregion_overlap(secure_tag_sysmem,
+                                                            0, tag_sysmem, -1);
+                    }
                 }
 
-                tag_sysmem = g_new(MemoryRegion, 1);
-                memory_region_init(tag_sysmem, OBJECT(machine),
-                                   "tag-memory", UINT64_MAX / 32);
-
+                object_property_set_link(cpuobj, "tag-memory",
+                                         OBJECT(tag_sysmem), &error_abort);
                 if (vms->secure) {
-                    secure_tag_sysmem = g_new(MemoryRegion, 1);
-                    memory_region_init(secure_tag_sysmem, OBJECT(machine),
-                                       "secure-tag-memory", UINT64_MAX / 32);
-
-                    /* As with ram, secure-tag takes precedence over tag.  */
-                    memory_region_add_subregion_overlap(secure_tag_sysmem, 0,
-                                                        tag_sysmem, -1);
+                    object_property_set_link(cpuobj, "secure-tag-memory",
+                                             OBJECT(secure_tag_sysmem),
+                                             &error_abort);
                 }
-            }
-
-            object_property_set_link(cpuobj, "tag-memory", OBJECT(tag_sysmem),
-                                     &error_abort);
-            if (vms->secure) {
-                object_property_set_link(cpuobj, "secure-tag-memory",
-                                         OBJECT(secure_tag_sysmem),
-                                         &error_abort);
+            } else if (kvm_enabled()) {
+                if (!kvm_arm_mte_supported()) {
+                    error_report("MTE requested, but not supported by KVM");
+                    exit(1);
+                }
+                kvm_arm_enable_mte(cpuobj, &error_abort);
+            } else {
+                    error_report("MTE requested, but not supported ");
+                    exit(1);
             }
         }
 
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 19191c2391..a59417aac8 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2390,11 +2390,12 @@  static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
 
 #ifndef CONFIG_USER_ONLY
         /*
-         * If we do not have tag-memory provided by the machine,
-         * reduce MTE support to instructions enabled at EL0.
+         * If we do not have tag-memory provided by the TCG
+         * nor MTE at KVM enabled, reduce MTE support to
+         * instructions enabled at EL0.
          * This matches Cortex-A710 BROADCASTMTE input being LOW.
          */
-        if (cpu->tag_memory == NULL) {
+        if (cpu->tag_memory == NULL && !cpu->kvm_mte) {
             cpu->isar.id_aa64pfr1 =
                 FIELD_DP64(cpu->isar.id_aa64pfr1, ID_AA64PFR1, MTE, 1);
         }
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index f065756c5c..8fc8b6398f 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -922,6 +922,8 @@  struct ArchCPU {
 
     /* CPU has memory protection unit */
     bool has_mpu;
+    /* CPU has MTE enabled in KVM mode */
+    bool kvm_mte;
     /* PMSAv7 MPU number of supported regions */
     uint32_t pmsav7_dregion;
     /* PMSAv8 MPU number of supported hyp regions */
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index 849e2e21b3..29865609fb 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -39,6 +39,7 @@ 
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/ghes.h"
 #include "target/arm/gtimer.h"
+#include "migration/blocker.h"
 
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
     KVM_CAP_LAST_INFO
@@ -119,6 +120,21 @@  bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try,
     if (vmfd < 0) {
         goto err;
     }
+
+    /*
+     * MTE bits of register id_aa64pfr1 are masked if MTE is
+     * not enabled and required to enable before VCPU
+     * is created. Hence enable MTE(if supported) before VCPU
+     * is created to read unmasked MTE bits.
+     */
+    if (kvm_arm_mte_supported()) {
+        KVMState kvm_state;
+
+        kvm_state.fd = kvmfd;
+        kvm_state.vmfd = vmfd;
+        kvm_vm_enable_cap(&kvm_state, KVM_CAP_ARM_MTE, 0);
+    }
+
     cpufd = ioctl(vmfd, KVM_CREATE_VCPU, 0);
     if (cpufd < 0) {
         goto err;
@@ -1793,6 +1809,11 @@  bool kvm_arm_sve_supported(void)
     return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE);
 }
 
+bool kvm_arm_mte_supported(void)
+{
+    return kvm_check_extension(kvm_state, KVM_CAP_ARM_MTE);
+}
+
 QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
 
 uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu)
@@ -2417,3 +2438,41 @@  int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
     }
     return 0;
 }
+
+void kvm_arm_enable_mte(Object *cpuobj, Error **errp)
+{
+    static bool tried_to_enable;
+    static bool succeeded_to_enable;
+    Error *mte_migration_blocker = NULL;
+    ARMCPU *cpu = ARM_CPU(cpuobj);
+    int ret;
+
+    if (!tried_to_enable) {
+        /*
+         * MTE on KVM is enabled on a per-VM basis (and retrying doesn't make
+         * sense), and we only want a single migration blocker as well.
+         */
+        tried_to_enable = true;
+
+        ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_ARM_MTE, 0);
+        if (ret) {
+            error_setg_errno(errp, -ret, "Failed to enable KVM_CAP_ARM_MTE");
+            return;
+        }
+
+        /* TODO: Add migration support with MTE enabled */
+        error_setg(&mte_migration_blocker,
+                   "Live migration disabled due to MTE enabled");
+        if (migrate_add_blocker(&mte_migration_blocker, errp)) {
+            error_free(mte_migration_blocker);
+            return;
+        }
+
+        succeeded_to_enable = true;
+    }
+
+    if (succeeded_to_enable) {
+        cpu->kvm_mte = true;
+    }
+}
+
diff --git a/target/arm/kvm_arm.h b/target/arm/kvm_arm.h
index cfaa0d9bc7..4d293618a7 100644
--- a/target/arm/kvm_arm.h
+++ b/target/arm/kvm_arm.h
@@ -188,6 +188,13 @@  bool kvm_arm_pmu_supported(void);
  */
 bool kvm_arm_sve_supported(void);
 
+/**
+ * kvm_arm_mte_supported:
+ *
+ * Returns: true if KVM can enable MTE, and false otherwise.
+ */
+bool kvm_arm_mte_supported(void);
+
 /**
  * kvm_arm_get_max_vm_ipa_size:
  * @ms: Machine state handle
@@ -214,6 +221,8 @@  void kvm_arm_pvtime_init(ARMCPU *cpu, uint64_t ipa);
 
 int kvm_arm_set_irq(int cpu, int irqtype, int irq, int level);
 
+void kvm_arm_enable_mte(Object *cpuobj, Error **errp);
+
 #else
 
 /*
@@ -235,6 +244,11 @@  static inline bool kvm_arm_sve_supported(void)
     return false;
 }
 
+static inline bool kvm_arm_mte_supported(void)
+{
+    return false;
+}
+
 /*
  * These functions should never actually be called without KVM support.
  */
@@ -283,6 +297,11 @@  static inline uint32_t kvm_arm_sve_get_vls(ARMCPU *cpu)
     g_assert_not_reached();
 }
 
+static inline void kvm_arm_enable_mte(Object *cpuobj, Error **errp)
+{
+    g_assert_not_reached();
+}
+
 #endif
 
 #endif