diff mbox series

[v2,3/3] kvm/arm: Align the VMID allocation with the arm64 ASID one

Message ID 20210616155606.2806-4-shameerali.kolothum.thodi@huawei.com (mailing list archive)
State New, archived
Headers show
Series kvm/arm: New VMID allocator based on asid | expand

Commit Message

Shameer Kolothum June 16, 2021, 3:56 p.m. UTC
From: Julien Grall <julien.grall@arm.com>

At the moment, the VMID algorithm will send an SGI to all the CPUs to
force an exit and then broadcast a full TLB flush and I-Cache
invalidation.

This patch use the new VMID allocator. The
benefits are:
    - CPUs are not forced to exit at roll-over. Instead the VMID will be
    marked reserved and the context will be flushed at next exit. This
    will reduce the IPIs traffic.
    - Context invalidation is now per-CPU rather than broadcasted.
    - Catalin has a formal model of the ASID allocator.

With the new algo, the code is now adapted:
    - The function __kvm_flush_vm_context() has been renamed to
    __kvm_tlb_flush_local_all() and now only flushing the current CPU
    context.
    - The call to update_vmid() will be done with preemption disabled
    as the new algo requires to store information per-CPU.
    - The TLBs associated to EL1 will be flushed when booting a CPU to
    deal with stale information. This was previously done on the
    allocation of the first VMID of a new generation.

Signed-off-by: Julien Grall <julien.grall@arm.com>
Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
---
 arch/arm64/include/asm/kvm_asm.h      |   4 +-
 arch/arm64/include/asm/kvm_host.h     |   6 +-
 arch/arm64/include/asm/kvm_mmu.h      |   3 +-
 arch/arm64/kvm/Makefile               |   2 +-
 arch/arm64/kvm/arm.c                  | 115 +++++++-------------------
 arch/arm64/kvm/hyp/nvhe/hyp-main.c    |   6 +-
 arch/arm64/kvm/hyp/nvhe/mem_protect.c |   3 +-
 arch/arm64/kvm/hyp/nvhe/tlb.c         |  10 +--
 arch/arm64/kvm/hyp/vhe/tlb.c          |  10 +--
 arch/arm64/kvm/mmu.c                  |   1 -
 10 files changed, 52 insertions(+), 108 deletions(-)

Comments

Will Deacon July 21, 2021, 4:31 p.m. UTC | #1
[+Quentin]

On Wed, Jun 16, 2021 at 04:56:06PM +0100, Shameer Kolothum wrote:
> From: Julien Grall <julien.grall@arm.com>
> 
> At the moment, the VMID algorithm will send an SGI to all the CPUs to
> force an exit and then broadcast a full TLB flush and I-Cache
> invalidation.
> 
> This patch use the new VMID allocator. The
> benefits are:
>     - CPUs are not forced to exit at roll-over. Instead the VMID will be
>     marked reserved and the context will be flushed at next exit. This
>     will reduce the IPIs traffic.
>     - Context invalidation is now per-CPU rather than broadcasted.
>     - Catalin has a formal model of the ASID allocator.
> 
> With the new algo, the code is now adapted:
>     - The function __kvm_flush_vm_context() has been renamed to
>     __kvm_tlb_flush_local_all() and now only flushing the current CPU
>     context.
>     - The call to update_vmid() will be done with preemption disabled
>     as the new algo requires to store information per-CPU.
>     - The TLBs associated to EL1 will be flushed when booting a CPU to
>     deal with stale information. This was previously done on the
>     allocation of the first VMID of a new generation.
> 
> Signed-off-by: Julien Grall <julien.grall@arm.com>
> Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
> ---
>  arch/arm64/include/asm/kvm_asm.h      |   4 +-
>  arch/arm64/include/asm/kvm_host.h     |   6 +-
>  arch/arm64/include/asm/kvm_mmu.h      |   3 +-
>  arch/arm64/kvm/Makefile               |   2 +-
>  arch/arm64/kvm/arm.c                  | 115 +++++++-------------------
>  arch/arm64/kvm/hyp/nvhe/hyp-main.c    |   6 +-
>  arch/arm64/kvm/hyp/nvhe/mem_protect.c |   3 +-
>  arch/arm64/kvm/hyp/nvhe/tlb.c         |  10 +--
>  arch/arm64/kvm/hyp/vhe/tlb.c          |  10 +--
>  arch/arm64/kvm/mmu.c                  |   1 -
>  10 files changed, 52 insertions(+), 108 deletions(-)

[...]

> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 75a7e8071012..d96284da8571 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -70,9 +70,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
>  void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
>  
>  struct kvm_vmid {
> -	/* The VMID generation used for the virt. memory system */
> -	u64    vmid_gen;
> -	u32    vmid;
> +	atomic64_t id;

Maybe a typedef would be better if this is the only member of the structure?

> diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> index 4b60c0056c04..a02c4877a055 100644
> --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> @@ -106,8 +106,7 @@ int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
>  	mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
>  	mmu->arch = &host_kvm.arch;
>  	mmu->pgt = &host_kvm.pgt;
> -	mmu->vmid.vmid_gen = 0;
> -	mmu->vmid.vmid = 0;
> +	atomic64_set(&mmu->vmid.id, 0);

I think this is the first atomic64 use in the EL2 object, which may pull in
some fatal KCSAN instrumentation. Quentin, have you run into this before?

Might be simple just to zero-initialise mmu for now, if it isn't already.

> diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
> index 83dc3b271bc5..42df9931ed9a 100644
> --- a/arch/arm64/kvm/hyp/nvhe/tlb.c
> +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
> @@ -140,10 +140,10 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
>  	__tlb_switch_to_host(&cxt);
>  }
>  
> -void __kvm_flush_vm_context(void)
> +void __kvm_tlb_flush_local_all(void)
>  {
> -	dsb(ishst);
> -	__tlbi(alle1is);
> +	dsb(nshst);
> +	__tlbi(alle1);
>  
>  	/*
>  	 * VIPT and PIPT caches are not affected by VMID, so no maintenance
> @@ -155,7 +155,7 @@ void __kvm_flush_vm_context(void)
>  	 *
>  	 */
>  	if (icache_is_vpipt())
> -		asm volatile("ic ialluis");
> +		asm volatile("ic iallu" : : );
>  
> -	dsb(ish);
> +	dsb(nsh);

Hmm, I'm wondering whether having this local stuff really makes sense for
VMIDs. For ASIDs, where rollover can be frequent and TLBI could result in
IPI on 32-bit, the local option was important, but here rollover is less
frequent, DVM is relied upon to work and the cost of a hypercall is
significant with nVHE.

So I do think you could simplify patch 2 slightly to drop the
flush_pending and just issue inner-shareable invalidation on rollover.
With that, it might also make it straightforward to clear active_asids
when scheduling out a vCPU, which would solve the other problem I mentioned
about unnecessarily reserving a bunch of the VMID space.

Will
Shameer Kolothum July 22, 2021, 6:45 a.m. UTC | #2
> -----Original Message-----
> From: Will Deacon [mailto:will@kernel.org]
> Sent: 21 July 2021 17:32
> To: Shameerali Kolothum Thodi <shameerali.kolothum.thodi@huawei.com>
> Cc: linux-arm-kernel@lists.infradead.org; kvmarm@lists.cs.columbia.edu;
> linux-kernel@vger.kernel.org; maz@kernel.org; catalin.marinas@arm.com;
> james.morse@arm.com; julien.thierry.kdev@gmail.com;
> suzuki.poulose@arm.com; jean-philippe@linaro.org;
> Alexandru.Elisei@arm.com; Linuxarm <linuxarm@huawei.com>;
> qperret@google.com
> Subject: Re: [PATCH v2 3/3] kvm/arm: Align the VMID allocation with the
> arm64 ASID one
> 
> [+Quentin]
> 
> On Wed, Jun 16, 2021 at 04:56:06PM +0100, Shameer Kolothum wrote:
> > From: Julien Grall <julien.grall@arm.com>
> >
> > At the moment, the VMID algorithm will send an SGI to all the CPUs to
> > force an exit and then broadcast a full TLB flush and I-Cache
> > invalidation.
> >
> > This patch use the new VMID allocator. The
> > benefits are:
> >     - CPUs are not forced to exit at roll-over. Instead the VMID will be
> >     marked reserved and the context will be flushed at next exit. This
> >     will reduce the IPIs traffic.
> >     - Context invalidation is now per-CPU rather than broadcasted.
> >     - Catalin has a formal model of the ASID allocator.
> >
> > With the new algo, the code is now adapted:
> >     - The function __kvm_flush_vm_context() has been renamed to
> >     __kvm_tlb_flush_local_all() and now only flushing the current CPU
> >     context.
> >     - The call to update_vmid() will be done with preemption disabled
> >     as the new algo requires to store information per-CPU.
> >     - The TLBs associated to EL1 will be flushed when booting a CPU to
> >     deal with stale information. This was previously done on the
> >     allocation of the first VMID of a new generation.
> >
> > Signed-off-by: Julien Grall <julien.grall@arm.com>
> > Signed-off-by: Shameer Kolothum
> <shameerali.kolothum.thodi@huawei.com>
> > ---
> >  arch/arm64/include/asm/kvm_asm.h      |   4 +-
> >  arch/arm64/include/asm/kvm_host.h     |   6 +-
> >  arch/arm64/include/asm/kvm_mmu.h      |   3 +-
> >  arch/arm64/kvm/Makefile               |   2 +-
> >  arch/arm64/kvm/arm.c                  | 115 +++++++-------------------
> >  arch/arm64/kvm/hyp/nvhe/hyp-main.c    |   6 +-
> >  arch/arm64/kvm/hyp/nvhe/mem_protect.c |   3 +-
> >  arch/arm64/kvm/hyp/nvhe/tlb.c         |  10 +--
> >  arch/arm64/kvm/hyp/vhe/tlb.c          |  10 +--
> >  arch/arm64/kvm/mmu.c                  |   1 -
> >  10 files changed, 52 insertions(+), 108 deletions(-)
> 
> [...]
> 
> > diff --git a/arch/arm64/include/asm/kvm_host.h
> b/arch/arm64/include/asm/kvm_host.h
> > index 75a7e8071012..d96284da8571 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -70,9 +70,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
> >  void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
> >
> >  struct kvm_vmid {
> > -	/* The VMID generation used for the virt. memory system */
> > -	u64    vmid_gen;
> > -	u32    vmid;
> > +	atomic64_t id;
> 
> Maybe a typedef would be better if this is the only member of the structure?

Ok.

> 
> > diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> > index 4b60c0056c04..a02c4877a055 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> > @@ -106,8 +106,7 @@ int kvm_host_prepare_stage2(void *mem_pgt_pool,
> void *dev_pgt_pool)
> >  	mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
> >  	mmu->arch = &host_kvm.arch;
> >  	mmu->pgt = &host_kvm.pgt;
> > -	mmu->vmid.vmid_gen = 0;
> > -	mmu->vmid.vmid = 0;
> > +	atomic64_set(&mmu->vmid.id, 0);
> 
> I think this is the first atomic64 use in the EL2 object, which may pull in
> some fatal KCSAN instrumentation. Quentin, have you run into this before?
> 
> Might be simple just to zero-initialise mmu for now, if it isn't already.

I will check that.

> 
> > diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c
> b/arch/arm64/kvm/hyp/nvhe/tlb.c
> > index 83dc3b271bc5..42df9931ed9a 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/tlb.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
> > @@ -140,10 +140,10 @@ void __kvm_flush_cpu_context(struct
> kvm_s2_mmu *mmu)
> >  	__tlb_switch_to_host(&cxt);
> >  }
> >
> > -void __kvm_flush_vm_context(void)
> > +void __kvm_tlb_flush_local_all(void)
> >  {
> > -	dsb(ishst);
> > -	__tlbi(alle1is);
> > +	dsb(nshst);
> > +	__tlbi(alle1);
> >
> >  	/*
> >  	 * VIPT and PIPT caches are not affected by VMID, so no maintenance
> > @@ -155,7 +155,7 @@ void __kvm_flush_vm_context(void)
> >  	 *
> >  	 */
> >  	if (icache_is_vpipt())
> > -		asm volatile("ic ialluis");
> > +		asm volatile("ic iallu" : : );
> >
> > -	dsb(ish);
> > +	dsb(nsh);
> 
> Hmm, I'm wondering whether having this local stuff really makes sense for
> VMIDs. For ASIDs, where rollover can be frequent and TLBI could result in
> IPI on 32-bit, the local option was important, but here rollover is less
> frequent, DVM is relied upon to work and the cost of a hypercall is
> significant with nVHE.
> 
> So I do think you could simplify patch 2 slightly to drop the
> flush_pending and just issue inner-shareable invalidation on rollover.
> With that, it might also make it straightforward to clear active_asids
> when scheduling out a vCPU, which would solve the other problem I
> mentioned
> about unnecessarily reserving a bunch of the VMID space.

Ok. I will try out the above suggestion. Hope it will be acceptable for 8 bit 
VMID systems as well as there is a higher chance for rollover especially
when we introduce pinned VMIDs(I am not sure such platforms care about
Pinned VMID or not. If not, we could limit Pinned VMIDs to 16 bit systems).

Thanks,
Shameer
Quentin Perret July 22, 2021, 9:11 a.m. UTC | #3
On Thursday 22 Jul 2021 at 06:45:14 (+0000), Shameerali Kolothum Thodi wrote:
> > From: Will Deacon [mailto:will@kernel.org]
> > > diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> > b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> > > index 4b60c0056c04..a02c4877a055 100644
> > > --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> > > +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> > > @@ -106,8 +106,7 @@ int kvm_host_prepare_stage2(void *mem_pgt_pool,
> > void *dev_pgt_pool)
> > >  	mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
> > >  	mmu->arch = &host_kvm.arch;
> > >  	mmu->pgt = &host_kvm.pgt;
> > > -	mmu->vmid.vmid_gen = 0;
> > > -	mmu->vmid.vmid = 0;
> > > +	atomic64_set(&mmu->vmid.id, 0);
> > 
> > I think this is the first atomic64 use in the EL2 object, which may pull in
> > some fatal KCSAN instrumentation. Quentin, have you run into this before?
> > 
> > Might be simple just to zero-initialise mmu for now, if it isn't already.
> 
> I will check that.

Yes I think what saves us here is that, AFAICT. arm64 doesn't support
KCSAN yet. But the day it does, this should fail to link (hopefully)
because of out-of-line calls into e.g. __kasan_check_write().

So yes, a simple zeroing here is probably preferable.

Thanks,
Quentin
Will Deacon July 22, 2021, 9:50 a.m. UTC | #4
On Thu, Jul 22, 2021 at 06:45:14AM +0000, Shameerali Kolothum Thodi wrote:
> > > diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c
> > b/arch/arm64/kvm/hyp/nvhe/tlb.c
> > > index 83dc3b271bc5..42df9931ed9a 100644
> > > --- a/arch/arm64/kvm/hyp/nvhe/tlb.c
> > > +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
> > > @@ -140,10 +140,10 @@ void __kvm_flush_cpu_context(struct
> > kvm_s2_mmu *mmu)
> > >  	__tlb_switch_to_host(&cxt);
> > >  }
> > >
> > > -void __kvm_flush_vm_context(void)
> > > +void __kvm_tlb_flush_local_all(void)
> > >  {
> > > -	dsb(ishst);
> > > -	__tlbi(alle1is);
> > > +	dsb(nshst);
> > > +	__tlbi(alle1);
> > >
> > >  	/*
> > >  	 * VIPT and PIPT caches are not affected by VMID, so no maintenance
> > > @@ -155,7 +155,7 @@ void __kvm_flush_vm_context(void)
> > >  	 *
> > >  	 */
> > >  	if (icache_is_vpipt())
> > > -		asm volatile("ic ialluis");
> > > +		asm volatile("ic iallu" : : );
> > >
> > > -	dsb(ish);
> > > +	dsb(nsh);
> > 
> > Hmm, I'm wondering whether having this local stuff really makes sense for
> > VMIDs. For ASIDs, where rollover can be frequent and TLBI could result in
> > IPI on 32-bit, the local option was important, but here rollover is less
> > frequent, DVM is relied upon to work and the cost of a hypercall is
> > significant with nVHE.
> > 
> > So I do think you could simplify patch 2 slightly to drop the
> > flush_pending and just issue inner-shareable invalidation on rollover.
> > With that, it might also make it straightforward to clear active_asids
> > when scheduling out a vCPU, which would solve the other problem I
> > mentioned
> > about unnecessarily reserving a bunch of the VMID space.
> 
> Ok. I will try out the above suggestion. Hope it will be acceptable for 8 bit 
> VMID systems as well as there is a higher chance for rollover especially
> when we introduce pinned VMIDs(I am not sure such platforms care about
> Pinned VMID or not. If not, we could limit Pinned VMIDs to 16 bit systems).

So I woke up at 3am in a cold sweat after dreaming about this code.

I think my suggestion above still stands for the VMID allocator, but
interestingly, it would _not_ be valid for the ASID allocator because there
the ASID is part of the active context and so, during the window where the
active_asid is out of sync with the TTBR, receiving a broadcast TLBI from a
concurrent rollover wouldn't be enough to knock out the old ASID from the
TLB despite it subsequently being made available for reallocation. So the
local TLB invalidation is not just a performance hint as I said; it's crucial
to the way the thing works (and this is also why the CnP code has to switch
to the reserved TTBR0).

As an aside: I'm more and more inclined to rip out the CnP stuff given
that it doesn't appear to being any benefits, but does have some clear
downsides. Perhaps something for next week.

Will
Vladimir Murzin July 22, 2021, 3:22 p.m. UTC | #5
On 7/22/21 10:50 AM, Will Deacon wrote:
> As an aside: I'm more and more inclined to rip out the CnP stuff given
> that it doesn't appear to being any benefits, but does have some clear
> downsides. Perhaps something for next week.

Can you please clarify what do you mean by "it doesn't appear to being any
benefits"? IIRC, Cortex-A65 implements CnP hint and I've heard that some
payloads seen improvement...

Cheers
Vladimir
Will Deacon July 22, 2021, 3:38 p.m. UTC | #6
Hi Vladimir,

On Thu, Jul 22, 2021 at 04:22:26PM +0100, Vladimir Murzin wrote:
> On 7/22/21 10:50 AM, Will Deacon wrote:
> > As an aside: I'm more and more inclined to rip out the CnP stuff given
> > that it doesn't appear to being any benefits, but does have some clear
> > downsides. Perhaps something for next week.
> 
> Can you please clarify what do you mean by "it doesn't appear to being any
> benefits"? IIRC, Cortex-A65 implements CnP hint and I've heard that some
> payloads seen improvement...

Has anybody taped that out? I'd have thought building an SMT design in 2021
is a reasonably courageous thing to do.

The issue I'm getting at is that modern cores seem to advertise CnP even
if they ignore it internally, maybe because of some big/little worries?
That would be fine if it didn't introduce complexity and overhead to the
kernel, but it does and therefore I think we should rip it out (or at
least stick it behind a "default n" config option if there are some niche
users).

There are also open questions as to exactly what CnP does because the
architecture is not clear at all (for example TTBRx_EL1.CnP is permitted
to be cached in a TLB).

CHeers,

Will
Marco Elver July 22, 2021, 7:33 p.m. UTC | #7
On Thu, Jul 22, 2021 at 10:11AM +0100, Quentin Perret wrote:
> On Thursday 22 Jul 2021 at 06:45:14 (+0000), Shameerali Kolothum Thodi wrote:
> > > From: Will Deacon [mailto:will@kernel.org]
> > > > diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> > > b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> > > > index 4b60c0056c04..a02c4877a055 100644
> > > > --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> > > > +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
> > > > @@ -106,8 +106,7 @@ int kvm_host_prepare_stage2(void *mem_pgt_pool,
> > > void *dev_pgt_pool)
> > > >  	mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
> > > >  	mmu->arch = &host_kvm.arch;
> > > >  	mmu->pgt = &host_kvm.pgt;
> > > > -	mmu->vmid.vmid_gen = 0;
> > > > -	mmu->vmid.vmid = 0;
> > > > +	atomic64_set(&mmu->vmid.id, 0);
> > > 
> > > I think this is the first atomic64 use in the EL2 object, which may pull in
> > > some fatal KCSAN instrumentation. Quentin, have you run into this before?
> > > 
> > > Might be simple just to zero-initialise mmu for now, if it isn't already.
> > 
> > I will check that.
> 
> Yes I think what saves us here is that, AFAICT. arm64 doesn't support
> KCSAN yet. But the day it does, this should fail to link (hopefully)
> because of out-of-line calls into e.g. __kasan_check_write().
> 
> So yes, a simple zeroing here is probably preferable.

Note: Do not worry about hypothetically breaking with sanitizers here --
whether it's KASAN or KCSAN, they both instrument atomics. In files that
enable instrumentation but the atomic instrumentation should not be
pulled in, use the arch_ variants, but this doesn't apply here because
instrumentation shouldn't even be on.

The indicator that when KCSAN is supported on arm64, the Makefile here
just needs KCSAN_SANITIZE := n, is that all other instrumentation is
also killed entirely:

  $ grep -E "(PROFILE|SANITIZE|INSTRUMENT)" arch/arm64/kvm/hyp/nvhe/Makefile
  GCOV_PROFILE	:= n
  KASAN_SANITIZE	:= n
  UBSAN_SANITIZE	:= n
  KCOV_INSTRUMENT	:= n

KCSAN isn't supported on arm64 yet, and when it does, I believe Mark's
arm64 KCSAN series should take care of things like this.

Thanks,
-- Marco
Vladimir Murzin July 23, 2021, 3:49 p.m. UTC | #8
Hi Will,

On 7/22/21 4:38 PM, Will Deacon wrote:
> Hi Vladimir,
> 
> On Thu, Jul 22, 2021 at 04:22:26PM +0100, Vladimir Murzin wrote:
>> On 7/22/21 10:50 AM, Will Deacon wrote:
>>> As an aside: I'm more and more inclined to rip out the CnP stuff given
>>> that it doesn't appear to being any benefits, but does have some clear
>>> downsides. Perhaps something for next week.
>>
>> Can you please clarify what do you mean by "it doesn't appear to being any
>> benefits"? IIRC, Cortex-A65 implements CnP hint and I've heard that some
>> payloads seen improvement...
> 
> Has anybody taped that out? I'd have thought building an SMT design in 2021
> is a reasonably courageous thing to do.

As you said three can be niche for that...

> 
> The issue I'm getting at is that modern cores seem to advertise CnP even
> if they ignore it internally, maybe because of some big/little worries?

Should we employ CPU errata framework for such cores to demote CnP?

> That would be fine if it didn't introduce complexity and overhead to the
> kernel, but it does and therefore I think we should rip it out (or at
> least stick it behind a "default n" config option if there are some niche
> users).

"default n" still better then no code at all :)

Cheers
Vladimir

> 
> There are also open questions as to exactly what CnP does because the
> architecture is not clear at all (for example TTBRx_EL1.CnP is permitted
> to be cached in a TLB).
> 
> CHeers,
> 
> Will
>
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 5e9b33cbac51..15fc0e555943 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -44,7 +44,7 @@ 
 
 #define __KVM_HOST_SMCCC_FUNC___kvm_hyp_init			0
 #define __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run			1
-#define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context		2
+#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_local_all		2
 #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa		3
 #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid		4
 #define __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context		5
@@ -192,7 +192,7 @@  DECLARE_KVM_NVHE_SYM(__per_cpu_end);
 DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs);
 #define __bp_harden_hyp_vecs	CHOOSE_HYP_SYM(__bp_harden_hyp_vecs)
 
-extern void __kvm_flush_vm_context(void);
+extern void __kvm_tlb_flush_local_all(void);
 extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
 				     int level);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 75a7e8071012..d96284da8571 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -70,9 +70,7 @@  int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
 
 struct kvm_vmid {
-	/* The VMID generation used for the virt. memory system */
-	u64    vmid_gen;
-	u32    vmid;
+	atomic64_t id;
 };
 
 struct kvm_s2_mmu {
@@ -642,8 +640,6 @@  void kvm_arm_resume_guest(struct kvm *kvm);
 #define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__)
 #endif /* __KVM_NVHE_HYPERVISOR__ */
 
-void force_vm_exit(const cpumask_t *mask);
-
 int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
 void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
 
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 2abdea7dcb43..00b22f50df15 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -262,7 +262,8 @@  static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
 	u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0;
 
 	baddr = mmu->pgd_phys;
-	vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT;
+	vmid_field = atomic64_read(&vmid->id) << VTTBR_VMID_SHIFT;
+	vmid_field &= VTTBR_VMID_MASK(kvm_get_vmid_bits());
 	return kvm_phys_to_vttbr(baddr) | vmid_field | cnp;
 }
 
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 589921392cb1..717c4cbf557a 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -16,7 +16,7 @@  kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \
 	 inject_fault.o va_layout.o handle_exit.o \
 	 guest.o debug.o reset.o sys_regs.o \
 	 vgic-sys-reg-v3.o fpsimd.o pmu.o \
-	 arch_timer.o trng.o\
+	 arch_timer.o trng.o vmid.o \
 	 vgic/vgic.o vgic/vgic-init.o \
 	 vgic/vgic-irqfd.o vgic/vgic-v2.o \
 	 vgic/vgic-v3.o vgic/vgic-v4.o \
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index e720148232a0..bd258956f3c3 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -55,11 +55,6 @@  static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
 unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
 DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
 
-/* The VMID used in the VTTBR */
-static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
-static u32 kvm_next_vmid;
-static DEFINE_SPINLOCK(kvm_vmid_lock);
-
 static bool vgic_present;
 
 static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
@@ -491,85 +486,13 @@  bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
 	return vcpu_mode_priv(vcpu);
 }
 
-/* Just ensure a guest exit from a particular CPU */
-static void exit_vm_noop(void *info)
-{
-}
-
-void force_vm_exit(const cpumask_t *mask)
-{
-	preempt_disable();
-	smp_call_function_many(mask, exit_vm_noop, NULL, true);
-	preempt_enable();
-}
-
-/**
- * need_new_vmid_gen - check that the VMID is still valid
- * @vmid: The VMID to check
- *
- * return true if there is a new generation of VMIDs being used
- *
- * The hardware supports a limited set of values with the value zero reserved
- * for the host, so we check if an assigned value belongs to a previous
- * generation, which requires us to assign a new value. If we're the first to
- * use a VMID for the new generation, we must flush necessary caches and TLBs
- * on all CPUs.
- */
-static bool need_new_vmid_gen(struct kvm_vmid *vmid)
-{
-	u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
-	smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
-	return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen);
-}
-
 /**
  * update_vmid - Update the vmid with a valid VMID for the current generation
  * @vmid: The stage-2 VMID information struct
  */
 static void update_vmid(struct kvm_vmid *vmid)
 {
-	if (!need_new_vmid_gen(vmid))
-		return;
-
-	spin_lock(&kvm_vmid_lock);
-
-	/*
-	 * We need to re-check the vmid_gen here to ensure that if another vcpu
-	 * already allocated a valid vmid for this vm, then this vcpu should
-	 * use the same vmid.
-	 */
-	if (!need_new_vmid_gen(vmid)) {
-		spin_unlock(&kvm_vmid_lock);
-		return;
-	}
-
-	/* First user of a new VMID generation? */
-	if (unlikely(kvm_next_vmid == 0)) {
-		atomic64_inc(&kvm_vmid_gen);
-		kvm_next_vmid = 1;
-
-		/*
-		 * On SMP we know no other CPUs can use this CPU's or each
-		 * other's VMID after force_vm_exit returns since the
-		 * kvm_vmid_lock blocks them from reentry to the guest.
-		 */
-		force_vm_exit(cpu_all_mask);
-		/*
-		 * Now broadcast TLB + ICACHE invalidation over the inner
-		 * shareable domain to make sure all data structures are
-		 * clean.
-		 */
-		kvm_call_hyp(__kvm_flush_vm_context);
-	}
-
-	vmid->vmid = kvm_next_vmid;
-	kvm_next_vmid++;
-	kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
-
-	smp_wmb();
-	WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen));
-
-	spin_unlock(&kvm_vmid_lock);
+	kvm_arm_update_vmid(&vmid->id);
 }
 
 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
@@ -737,8 +660,6 @@  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		 */
 		cond_resched();
 
-		update_vmid(&vcpu->arch.hw_mmu->vmid);
-
 		check_vcpu_requests(vcpu);
 
 		/*
@@ -748,6 +669,15 @@  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		 */
 		preempt_disable();
 
+		/*
+		 * The VMID allocator only tracks active VMIDs per
+		 * physical CPU, and therefore the VMID allocated may not be
+		 * preserved on VMID roll-over if the task was preempted,
+		 * making a thread's VMID inactive. So we need to call
+		 * update_vttbr in non-premptible context.
+		 */
+		update_vmid(&vcpu->arch.hw_mmu->vmid);
+
 		kvm_pmu_flush_hwstate(vcpu);
 
 		local_irq_disable();
@@ -786,8 +716,7 @@  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 		 */
 		smp_store_mb(vcpu->mode, IN_GUEST_MODE);
 
-		if (ret <= 0 || need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) ||
-		    kvm_request_pending(vcpu)) {
+		if (ret <= 0 || kvm_request_pending(vcpu)) {
 			vcpu->mode = OUTSIDE_GUEST_MODE;
 			isb(); /* Ensure work in x_flush_hwstate is committed */
 			kvm_pmu_sync_hwstate(vcpu);
@@ -1490,6 +1419,8 @@  static void cpu_hyp_reset(void)
 {
 	if (!is_kernel_in_hyp_mode())
 		__hyp_reset_vectors();
+
+	kvm_call_hyp(__kvm_tlb_flush_local_all);
 }
 
 /*
@@ -1669,9 +1600,26 @@  static bool init_psci_relay(void)
 
 static int init_common_resources(void)
 {
+	int err;
+
+	/*
+	 * Initialize the VMID allocator telling it to allocate a single
+	 * VMID per VM.
+	 */
+	err = kvm_arm_vmid_alloc_init();
+	if (err) {
+		kvm_err("Failed to initialize VMID allocator.\n");
+		return err;
+	}
+
 	return kvm_set_ipa_limit();
 }
 
+static void free_common_resources(void)
+{
+	kvm_arm_vmid_alloc_free();
+}
+
 static int init_subsystems(void)
 {
 	int err = 0;
@@ -2079,7 +2027,7 @@  int kvm_arch_init(void *opaque)
 
 	err = kvm_arm_init_sve();
 	if (err)
-		return err;
+		goto out_err;
 
 	if (!in_hyp_mode) {
 		err = init_hyp_mode();
@@ -2120,6 +2068,7 @@  int kvm_arch_init(void *opaque)
 	if (!in_hyp_mode)
 		teardown_hyp_mode();
 out_err:
+	free_common_resources();
 	return err;
 }
 
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 1632f001f4ed..bd86d3d4d787 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -35,9 +35,9 @@  static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt)
 	__kvm_adjust_pc(kern_hyp_va(vcpu));
 }
 
-static void handle___kvm_flush_vm_context(struct kvm_cpu_context *host_ctxt)
+static void handle___kvm_tlb_flush_local_all(struct kvm_cpu_context *host_ctxt)
 {
-	__kvm_flush_vm_context();
+	__kvm_tlb_flush_local_all();
 }
 
 static void handle___kvm_tlb_flush_vmid_ipa(struct kvm_cpu_context *host_ctxt)
@@ -178,7 +178,7 @@  typedef void (*hcall_t)(struct kvm_cpu_context *);
 static const hcall_t host_hcall[] = {
 	HANDLE_FUNC(__kvm_vcpu_run),
 	HANDLE_FUNC(__kvm_adjust_pc),
-	HANDLE_FUNC(__kvm_flush_vm_context),
+	HANDLE_FUNC(__kvm_tlb_flush_local_all),
 	HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
 	HANDLE_FUNC(__kvm_tlb_flush_vmid),
 	HANDLE_FUNC(__kvm_flush_cpu_context),
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 4b60c0056c04..a02c4877a055 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -106,8 +106,7 @@  int kvm_host_prepare_stage2(void *mem_pgt_pool, void *dev_pgt_pool)
 	mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
 	mmu->arch = &host_kvm.arch;
 	mmu->pgt = &host_kvm.pgt;
-	mmu->vmid.vmid_gen = 0;
-	mmu->vmid.vmid = 0;
+	atomic64_set(&mmu->vmid.id, 0);
 
 	return 0;
 }
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index 83dc3b271bc5..42df9931ed9a 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -140,10 +140,10 @@  void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
 	__tlb_switch_to_host(&cxt);
 }
 
-void __kvm_flush_vm_context(void)
+void __kvm_tlb_flush_local_all(void)
 {
-	dsb(ishst);
-	__tlbi(alle1is);
+	dsb(nshst);
+	__tlbi(alle1);
 
 	/*
 	 * VIPT and PIPT caches are not affected by VMID, so no maintenance
@@ -155,7 +155,7 @@  void __kvm_flush_vm_context(void)
 	 *
 	 */
 	if (icache_is_vpipt())
-		asm volatile("ic ialluis");
+		asm volatile("ic iallu" : : );
 
-	dsb(ish);
+	dsb(nsh);
 }
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index 66f17349f0c3..89f229e77b7d 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -142,10 +142,10 @@  void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
 	__tlb_switch_to_host(&cxt);
 }
 
-void __kvm_flush_vm_context(void)
+void __kvm_tlb_flush_local_all(void)
 {
-	dsb(ishst);
-	__tlbi(alle1is);
+	dsb(nshst);
+	__tlbi(alle1);
 
 	/*
 	 * VIPT and PIPT caches are not affected by VMID, so no maintenance
@@ -157,7 +157,7 @@  void __kvm_flush_vm_context(void)
 	 *
 	 */
 	if (icache_is_vpipt())
-		asm volatile("ic ialluis");
+		asm volatile("ic iallu" : : );
 
-	dsb(ish);
+	dsb(nsh);
 }
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index c10207fed2f3..5c73220356e0 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -473,7 +473,6 @@  int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
 	mmu->arch = &kvm->arch;
 	mmu->pgt = pgt;
 	mmu->pgd_phys = __pa(pgt->pgd);
-	mmu->vmid.vmid_gen = 0;
 	return 0;
 
 out_destroy_pgtable: