diff mbox series

[PATCHv4,4/8] x86/mm: Handle LAM on context switch

Message ID 20220622162230.83474-5-kirill.shutemov@linux.intel.com (mailing list archive)
State New
Headers show
Series Linear Address Masking enabling | expand

Commit Message

Kirill A . Shutemov June 22, 2022, 4:22 p.m. UTC
Linear Address Masking mode for userspace pointers encoded in CR3 bits.
The mode is selected per-thread. Add new thread features indicate that the
thread has Linear Address Masking enabled.

switch_mm_irqs_off() now respects these flags and constructs CR3
accordingly.

The active LAM mode gets recorded in the tlb_state.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/include/asm/mmu.h         |  1 +
 arch/x86/include/asm/mmu_context.h | 24 +++++++++++
 arch/x86/include/asm/tlbflush.h    |  4 ++
 arch/x86/mm/tlb.c                  | 68 +++++++++++++++++++++++-------
 4 files changed, 82 insertions(+), 15 deletions(-)

Comments

Alexander Potapenko June 30, 2022, 8:36 a.m. UTC | #1
On Wed, Jun 22, 2022 at 6:22 PM Kirill A. Shutemov
<kirill.shutemov@linux.intel.com> wrote:
>
> Linear Address Masking mode for userspace pointers encoded in CR3 bits.
> The mode is selected per-thread. Add new thread features indicate that the
> thread has Linear Address Masking enabled.
>
> switch_mm_irqs_off() now respects these flags and constructs CR3
> accordingly.
>
> The active LAM mode gets recorded in the tlb_state.
>
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---
>  arch/x86/include/asm/mmu.h         |  1 +
>  arch/x86/include/asm/mmu_context.h | 24 +++++++++++
>  arch/x86/include/asm/tlbflush.h    |  4 ++
>  arch/x86/mm/tlb.c                  | 68 +++++++++++++++++++++++-------
>  4 files changed, 82 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> index 5d7494631ea9..d150e92163b6 100644
> --- a/arch/x86/include/asm/mmu.h
> +++ b/arch/x86/include/asm/mmu.h
> @@ -40,6 +40,7 @@ typedef struct {
>
>  #ifdef CONFIG_X86_64
>         unsigned short flags;
> +       u64 lam_cr3_mask;
>  #endif

Can you please add a comment for this field?

>
>         struct mutex lock;
> diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
> index b8d40ddeab00..e6eac047c728 100644
> --- a/arch/x86/include/asm/mmu_context.h
> +++ b/arch/x86/include/asm/mmu_context.h
> @@ -91,6 +91,29 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
>  }
>  #endif
>
> +#ifdef CONFIG_X86_64
> +static inline u64 mm_cr3_lam_mask(struct mm_struct *mm)
> +{
> +       return mm->context.lam_cr3_mask;
> +}

For the sake of uniformity, can it be either lam_cr3_mask or
cr3_lam_mask everywhere?

>
> diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
> index 4af5579c7ef7..2d70d75e207f 100644
> --- a/arch/x86/include/asm/tlbflush.h
> +++ b/arch/x86/include/asm/tlbflush.h
> @@ -100,6 +100,10 @@ struct tlb_state {
>          */
>         bool invalidate_other;
>
> +#ifdef CONFIG_X86_64
> +       u8 lam;
> +#endif
> +

Comment here as well, please.

>         /*
>          * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
>          * the corresponding user PCID needs a flush next time we
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index d400b6d9d246..c5c4f76329c2 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -154,17 +154,17 @@ static inline u16 user_pcid(u16 asid)
>         return ret;
>  }
>
> -static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
> +static inline unsigned long build_cr3(pgd_t *pgd, u16 asid, u64 lam)

I think it's more natural to make `lam` an unsigned long, because cr3
is treated as unsigned long everywhere.

>  {
>         if (static_cpu_has(X86_FEATURE_PCID)) {
> -               return __sme_pa(pgd) | kern_pcid(asid);
> +               return __sme_pa(pgd) | kern_pcid(asid) | lam;
>         } else {
>                 VM_WARN_ON_ONCE(asid != 0);
> -               return __sme_pa(pgd);
> +               return __sme_pa(pgd) | lam;
>         }
>  }
>
> -static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
> +static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid, u64 lam)
ditto

>  {
>         VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
>         /*
> @@ -173,7 +173,7 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
>          * boot because all CPU's the have same capabilities:
>          */
>         VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
> -       return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
> +       return __sme_pa(pgd) | kern_pcid(asid) | lam | CR3_NOFLUSH;
>  }
>
>  /*
> @@ -274,15 +274,15 @@ static inline void invalidate_user_asid(u16 asid)
>                   (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
>  }
>
> -static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
> +static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, u64 lam, bool need_flush)
>  {
>         unsigned long new_mm_cr3;
>
>         if (need_flush) {
>                 invalidate_user_asid(new_asid);
> -               new_mm_cr3 = build_cr3(pgdir, new_asid);
> +               new_mm_cr3 = build_cr3(pgdir, new_asid, lam);
>         } else {
> -               new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
> +               new_mm_cr3 = build_cr3_noflush(pgdir, new_asid, lam);
>         }
>
>         /*
> @@ -486,11 +486,38 @@ void cr4_update_pce(void *ignored)
>  static inline void cr4_update_pce_mm(struct mm_struct *mm) { }
>  #endif
>
> +#ifdef CONFIG_X86_64
> +static inline u64 tlbstate_lam_cr3_mask(void)
Please add comments for these methods.
> +{
> +       u64 lam = this_cpu_read(cpu_tlbstate.lam);
> +
> +       return lam << X86_CR3_LAM_U57_BIT;
> +}
> +
> +static inline void set_tlbstate_lam_cr3_mask(u64 mask)
> +{
> +       this_cpu_write(cpu_tlbstate.lam, mask >> X86_CR3_LAM_U57_BIT);
> +}
> +
> +#else
> +
> +static inline u64 tlbstate_lam_cr3_mask(void)
> +{
> +       return 0;
> +}
> +
> +static inline void set_tlbstate_lam_cr3_mask(u64 mask)
> +{
> +}
> +#endif
> +
>  void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
>                         struct task_struct *tsk)
>  {
>         struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
>         u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
> +       u64 prev_lam = tlbstate_lam_cr3_mask();
> +       u64 new_lam = mm_cr3_lam_mask(next);

Ditto.

>         bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
>         unsigned cpu = smp_processor_id();
>         u64 next_tlb_gen;
> @@ -504,6 +531,9 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
>          * cpu_tlbstate.loaded_mm) matches next.
>          *
>          * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
> +        *
> +        * NB: Initial LAM enabling calls us with prev == next. We must update
> +        * CR3 if prev_lam doesn't match the new one.
>          */
>
>         /* We don't want flush_tlb_func() to run concurrently with us. */
> @@ -520,7 +550,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
>          * isn't free.
>          */
>  #ifdef CONFIG_DEBUG_VM
> -       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
> +       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid, prev_lam))) {
>                 /*
>                  * If we were to BUG here, we'd be very likely to kill
>                  * the system so hard that we don't see the call trace.
> @@ -551,7 +581,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
>          * provides that full memory barrier and core serializing
>          * instruction.
>          */
> -       if (real_prev == next) {
> +       if (real_prev == next && prev_lam == new_lam) {

Do we want the warning checks below to only happen if prev_lam==new_lam?

>                 VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
>                            next->context.ctx_id);
>
> @@ -622,15 +652,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
>                 barrier();
>         }
>
> +       set_tlbstate_lam_cr3_mask(new_lam);
>         if (need_flush) {
>                 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
>                 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
> -               load_new_mm_cr3(next->pgd, new_asid, true);
> +               load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
>
>                 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
>         } else {
>                 /* The new ASID is already up to date. */
> -               load_new_mm_cr3(next->pgd, new_asid, false);
> +               load_new_mm_cr3(next->pgd, new_asid, new_lam, false);
>
>                 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
>         }
> @@ -691,6 +722,10 @@ void initialize_tlbstate_and_flush(void)
>         /* Assert that CR3 already references the right mm. */
>         WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
>
> +       /* LAM expected to be disabled in CR3 and init_mm */
> +       WARN_ON(cr3 & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57));
> +       WARN_ON(mm_cr3_lam_mask(&init_mm));
> +
>         /*
>          * Assert that CR4.PCIDE is set if needed.  (CR4.PCIDE initialization
>          * doesn't work like other CR4 bits because it can only be set from
> @@ -700,7 +735,7 @@ void initialize_tlbstate_and_flush(void)
>                 !(cr4_read_shadow() & X86_CR4_PCIDE));
>
>         /* Force ASID 0 and force a TLB flush. */
Please update this comment.

> -       write_cr3(build_cr3(mm->pgd, 0));
> +       write_cr3(build_cr3(mm->pgd, 0, 0));
>
>         /* Reinitialize tlbstate. */
>         this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
> @@ -708,6 +743,7 @@ void initialize_tlbstate_and_flush(void)
>         this_cpu_write(cpu_tlbstate.next_asid, 1);
>         this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
>         this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
> +       set_tlbstate_lam_cr3_mask(0);
>
>         for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
>                 this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
> @@ -1047,8 +1083,10 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
>   */
>  unsigned long __get_current_cr3_fast(void)
>  {
> -       unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
> -               this_cpu_read(cpu_tlbstate.loaded_mm_asid));
> +       unsigned long cr3 =
> +               build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
> +               this_cpu_read(cpu_tlbstate.loaded_mm_asid),
> +               tlbstate_lam_cr3_mask());
>
>         /* For now, be very restrictive about when this can be called. */
>         VM_WARN_ON(in_nmi() || preemptible());
> --
> 2.35.1
>
diff mbox series

Patch

diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 5d7494631ea9..d150e92163b6 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -40,6 +40,7 @@  typedef struct {
 
 #ifdef CONFIG_X86_64
 	unsigned short flags;
+	u64 lam_cr3_mask;
 #endif
 
 	struct mutex lock;
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index b8d40ddeab00..e6eac047c728 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -91,6 +91,29 @@  static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
 }
 #endif
 
+#ifdef CONFIG_X86_64
+static inline u64 mm_cr3_lam_mask(struct mm_struct *mm)
+{
+	return mm->context.lam_cr3_mask;
+}
+
+static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+	mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask;
+}
+
+#else
+
+static inline u64 mm_cr3_lam_mask(struct mm_struct *mm)
+{
+	return 0;
+}
+
+static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+}
+#endif
+
 #define enter_lazy_tlb enter_lazy_tlb
 extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
 
@@ -168,6 +191,7 @@  static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 {
 	arch_dup_pkeys(oldmm, mm);
 	paravirt_arch_dup_mmap(oldmm, mm);
+	dup_lam(oldmm, mm);
 	return ldt_dup_context(oldmm, mm);
 }
 
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 4af5579c7ef7..2d70d75e207f 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -100,6 +100,10 @@  struct tlb_state {
 	 */
 	bool invalidate_other;
 
+#ifdef CONFIG_X86_64
+	u8 lam;
+#endif
+
 	/*
 	 * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
 	 * the corresponding user PCID needs a flush next time we
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index d400b6d9d246..c5c4f76329c2 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -154,17 +154,17 @@  static inline u16 user_pcid(u16 asid)
 	return ret;
 }
 
-static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
+static inline unsigned long build_cr3(pgd_t *pgd, u16 asid, u64 lam)
 {
 	if (static_cpu_has(X86_FEATURE_PCID)) {
-		return __sme_pa(pgd) | kern_pcid(asid);
+		return __sme_pa(pgd) | kern_pcid(asid) | lam;
 	} else {
 		VM_WARN_ON_ONCE(asid != 0);
-		return __sme_pa(pgd);
+		return __sme_pa(pgd) | lam;
 	}
 }
 
-static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
+static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid, u64 lam)
 {
 	VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
 	/*
@@ -173,7 +173,7 @@  static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
 	 * boot because all CPU's the have same capabilities:
 	 */
 	VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
-	return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
+	return __sme_pa(pgd) | kern_pcid(asid) | lam | CR3_NOFLUSH;
 }
 
 /*
@@ -274,15 +274,15 @@  static inline void invalidate_user_asid(u16 asid)
 		  (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
 }
 
-static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
+static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, u64 lam, bool need_flush)
 {
 	unsigned long new_mm_cr3;
 
 	if (need_flush) {
 		invalidate_user_asid(new_asid);
-		new_mm_cr3 = build_cr3(pgdir, new_asid);
+		new_mm_cr3 = build_cr3(pgdir, new_asid, lam);
 	} else {
-		new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
+		new_mm_cr3 = build_cr3_noflush(pgdir, new_asid, lam);
 	}
 
 	/*
@@ -486,11 +486,38 @@  void cr4_update_pce(void *ignored)
 static inline void cr4_update_pce_mm(struct mm_struct *mm) { }
 #endif
 
+#ifdef CONFIG_X86_64
+static inline u64 tlbstate_lam_cr3_mask(void)
+{
+	u64 lam = this_cpu_read(cpu_tlbstate.lam);
+
+	return lam << X86_CR3_LAM_U57_BIT;
+}
+
+static inline void set_tlbstate_lam_cr3_mask(u64 mask)
+{
+	this_cpu_write(cpu_tlbstate.lam, mask >> X86_CR3_LAM_U57_BIT);
+}
+
+#else
+
+static inline u64 tlbstate_lam_cr3_mask(void)
+{
+	return 0;
+}
+
+static inline void set_tlbstate_lam_cr3_mask(u64 mask)
+{
+}
+#endif
+
 void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			struct task_struct *tsk)
 {
 	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
 	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+	u64 prev_lam = tlbstate_lam_cr3_mask();
+	u64 new_lam = mm_cr3_lam_mask(next);
 	bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
 	unsigned cpu = smp_processor_id();
 	u64 next_tlb_gen;
@@ -504,6 +531,9 @@  void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	 * cpu_tlbstate.loaded_mm) matches next.
 	 *
 	 * NB: leave_mm() calls us with prev == NULL and tsk == NULL.
+	 *
+	 * NB: Initial LAM enabling calls us with prev == next. We must update
+	 * CR3 if prev_lam doesn't match the new one.
 	 */
 
 	/* We don't want flush_tlb_func() to run concurrently with us. */
@@ -520,7 +550,7 @@  void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	 * isn't free.
 	 */
 #ifdef CONFIG_DEBUG_VM
-	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
+	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid, prev_lam))) {
 		/*
 		 * If we were to BUG here, we'd be very likely to kill
 		 * the system so hard that we don't see the call trace.
@@ -551,7 +581,7 @@  void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	 * provides that full memory barrier and core serializing
 	 * instruction.
 	 */
-	if (real_prev == next) {
+	if (real_prev == next && prev_lam == new_lam) {
 		VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
 			   next->context.ctx_id);
 
@@ -622,15 +652,16 @@  void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		barrier();
 	}
 
+	set_tlbstate_lam_cr3_mask(new_lam);
 	if (need_flush) {
 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-		load_new_mm_cr3(next->pgd, new_asid, true);
+		load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
 
 		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
 	} else {
 		/* The new ASID is already up to date. */
-		load_new_mm_cr3(next->pgd, new_asid, false);
+		load_new_mm_cr3(next->pgd, new_asid, new_lam, false);
 
 		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
 	}
@@ -691,6 +722,10 @@  void initialize_tlbstate_and_flush(void)
 	/* Assert that CR3 already references the right mm. */
 	WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
 
+	/* LAM expected to be disabled in CR3 and init_mm */
+	WARN_ON(cr3 & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57));
+	WARN_ON(mm_cr3_lam_mask(&init_mm));
+
 	/*
 	 * Assert that CR4.PCIDE is set if needed.  (CR4.PCIDE initialization
 	 * doesn't work like other CR4 bits because it can only be set from
@@ -700,7 +735,7 @@  void initialize_tlbstate_and_flush(void)
 		!(cr4_read_shadow() & X86_CR4_PCIDE));
 
 	/* Force ASID 0 and force a TLB flush. */
-	write_cr3(build_cr3(mm->pgd, 0));
+	write_cr3(build_cr3(mm->pgd, 0, 0));
 
 	/* Reinitialize tlbstate. */
 	this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
@@ -708,6 +743,7 @@  void initialize_tlbstate_and_flush(void)
 	this_cpu_write(cpu_tlbstate.next_asid, 1);
 	this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
 	this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
+	set_tlbstate_lam_cr3_mask(0);
 
 	for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
 		this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
@@ -1047,8 +1083,10 @@  void flush_tlb_kernel_range(unsigned long start, unsigned long end)
  */
 unsigned long __get_current_cr3_fast(void)
 {
-	unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
-		this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+	unsigned long cr3 =
+		build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
+		this_cpu_read(cpu_tlbstate.loaded_mm_asid),
+		tlbstate_lam_cr3_mask());
 
 	/* For now, be very restrictive about when this can be called. */
 	VM_WARN_ON(in_nmi() || preemptible());