@@ -9,6 +9,16 @@
void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot);
void *__kmap_local_page_prot(struct page *page, pgprot_t prot);
void kunmap_local_indexed(void *vaddr);
+void kmap_local_fork(struct task_struct *tsk);
+void __kmap_local_sched_out(void);
+void __kmap_local_sched_in(void);
+static inline void kmap_assert_nomap(void)
+{
+ DEBUG_LOCKS_WARN_ON(current->kmap_ctrl.idx);
+}
+#else
+static inline void kmap_local_fork(struct task_struct *tsk) { }
+static inline void kmap_assert_nomap(void) { }
#endif
#ifdef CONFIG_HIGHMEM
@@ -34,6 +34,7 @@
#include <linux/rseq.h>
#include <linux/seqlock.h>
#include <linux/kcsan.h>
+#include <asm/kmap_size.h>
/* task_struct member predeclarations (sorted alphabetically): */
struct audit_context;
@@ -629,6 +630,13 @@ struct wake_q_node {
struct wake_q_node *next;
};
+struct kmap_ctrl {
+#ifdef CONFIG_KMAP_LOCAL
+ int idx;
+ pte_t pteval[KM_MAX_IDX];
+#endif
+};
+
struct task_struct {
#ifdef CONFIG_THREAD_INFO_IN_TASK
/*
@@ -1294,6 +1302,7 @@ struct task_struct {
unsigned int sequential_io;
unsigned int sequential_io_avg;
#endif
+ struct kmap_ctrl kmap_ctrl;
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
unsigned long task_state_change;
#endif
@@ -2,6 +2,7 @@
#include <linux/context_tracking.h>
#include <linux/entry-common.h>
+#include <linux/highmem.h>
#include <linux/livepatch.h>
#include <linux/audit.h>
@@ -194,6 +195,7 @@ static void exit_to_user_mode_prepare(st
/* Ensure that the address limit is intact and no locks are held */
addr_limit_user_check();
+ kmap_assert_nomap();
lockdep_assert_irqs_disabled();
lockdep_sys_exit();
}
@@ -930,6 +930,7 @@ static struct task_struct *dup_task_stru
account_kernel_stack(tsk, 1);
kcov_task_init(tsk);
+ kmap_local_fork(tsk);
#ifdef CONFIG_FAULT_INJECTION
tsk->fail_nth = 0;
@@ -4051,6 +4051,22 @@ static inline void finish_lock_switch(st
# define finish_arch_post_lock_switch() do { } while (0)
#endif
+static inline void kmap_local_sched_out(void)
+{
+#ifdef CONFIG_KMAP_LOCAL
+ if (unlikely(current->kmap_ctrl.idx))
+ __kmap_local_sched_out();
+#endif
+}
+
+static inline void kmap_local_sched_in(void)
+{
+#ifdef CONFIG_KMAP_LOCAL
+ if (unlikely(current->kmap_ctrl.idx))
+ __kmap_local_sched_in();
+#endif
+}
+
/**
* prepare_task_switch - prepare to switch tasks
* @rq: the runqueue preparing to switch
@@ -4073,6 +4089,7 @@ prepare_task_switch(struct rq *rq, struc
perf_event_task_sched_out(prev, next);
rseq_preempt(prev);
fire_sched_out_preempt_notifiers(prev, next);
+ kmap_local_sched_out();
prepare_task(next);
prepare_arch_switch(next);
}
@@ -4139,6 +4156,7 @@ static struct rq *finish_task_switch(str
finish_lock_switch(rq);
finish_arch_post_lock_switch();
kcov_finish_switch(current);
+ kmap_local_sched_in();
fire_sched_in_preempt_notifiers(current);
/*
@@ -365,8 +365,6 @@ EXPORT_SYMBOL(kunmap_high);
#include <asm/kmap_size.h>
-static DEFINE_PER_CPU(int, __kmap_local_idx);
-
/*
* With DEBUG_KMAP_LOCAL the stack depth is doubled and every second
* slot is unused which acts as a guard page
@@ -379,23 +377,21 @@ static DEFINE_PER_CPU(int, __kmap_local_
static inline int kmap_local_idx_push(void)
{
- int idx = __this_cpu_add_return(__kmap_local_idx, KM_INCR) - 1;
-
WARN_ON_ONCE(in_irq() && !irqs_disabled());
- BUG_ON(idx >= KM_MAX_IDX);
- return idx;
+ current->kmap_ctrl.idx += KM_INCR;
+ BUG_ON(current->kmap_ctrl.idx >= KM_MAX_IDX);
+ return current->kmap_ctrl.idx - 1;
}
static inline int kmap_local_idx(void)
{
- return __this_cpu_read(__kmap_local_idx) - 1;
+ return current->kmap_ctrl.idx - 1;
}
static inline void kmap_local_idx_pop(void)
{
- int idx = __this_cpu_sub_return(__kmap_local_idx, KM_INCR);
-
- BUG_ON(idx < 0);
+ current->kmap_ctrl.idx -= KM_INCR;
+ BUG_ON(current->kmap_ctrl.idx < 0);
}
#ifndef arch_kmap_local_post_map
@@ -464,6 +460,7 @@ void *__kmap_local_pfn_prot(unsigned lon
pteval = pfn_pte(pfn, prot);
set_pte_at(&init_mm, vaddr, kmap_pte - idx, pteval);
arch_kmap_local_post_map(vaddr, pteval);
+ current->kmap_ctrl.pteval[kmap_local_idx()] = pteval;
preempt_enable();
return (void *)vaddr;
@@ -522,10 +519,92 @@ void kunmap_local_indexed(void *vaddr)
arch_kmap_local_pre_unmap(addr);
pte_clear(&init_mm, addr, kmap_pte - idx);
arch_kmap_local_post_unmap(addr);
+ current->kmap_ctrl.pteval[kmap_local_idx()] = __pte(0);
kmap_local_idx_pop();
preempt_enable();
}
EXPORT_SYMBOL(kunmap_local_indexed);
+
+/*
+ * Invoked before switch_to(). This is safe even when during or after
+ * clearing the maps an interrupt which needs a kmap_local happens because
+ * the task::kmap_ctrl.idx is not modified by the unmapping code so a
+ * nested kmap_local will use the next unused index and restore the index
+ * on unmap. The already cleared kmaps of the outgoing task are irrelevant
+ * because the interrupt context does not know about them. The same applies
+ * when scheduling back in for an interrupt which happens before the
+ * restore is complete.
+ */
+void __kmap_local_sched_out(void)
+{
+ struct task_struct *tsk = current;
+ pte_t *kmap_pte = kmap_get_pte();
+ int i;
+
+ /* Clear kmaps */
+ for (i = 0; i < tsk->kmap_ctrl.idx; i++) {
+ pte_t pteval = tsk->kmap_ctrl.pteval[i];
+ unsigned long addr;
+ int idx;
+
+ /* With debug all even slots are unmapped and act as guard */
+ if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) {
+ WARN_ON_ONCE(!pte_none(pteval));
+ continue;
+ }
+ if (WARN_ON_ONCE(pte_none(pteval)))
+ continue;
+
+ /*
+ * This is a horrible hack for XTENSA to calculate the
+ * coloured PTE index. Uses the PFN encoded into the pteval
+ * and the map index calculation because the actual mapped
+ * virtual address is not stored in task::kmap_ctrl.
+ * For any sane architecture this is optimized out.
+ */
+ idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
+
+ addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ arch_kmap_local_pre_unmap(addr);
+ pte_clear(&init_mm, addr, kmap_pte - idx);
+ arch_kmap_local_post_unmap(addr);
+ }
+}
+
+void __kmap_local_sched_in(void)
+{
+ struct task_struct *tsk = current;
+ pte_t *kmap_pte = kmap_get_pte();
+ int i;
+
+ /* Restore kmaps */
+ for (i = 0; i < tsk->kmap_ctrl.idx; i++) {
+ pte_t pteval = tsk->kmap_ctrl.pteval[i];
+ unsigned long addr;
+ int idx;
+
+ /* With debug all even slots are unmapped and act as guard */
+ if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) {
+ WARN_ON_ONCE(!pte_none(pteval));
+ continue;
+ }
+ if (WARN_ON_ONCE(pte_none(pteval)))
+ continue;
+
+ /* See comment in __kmap_local_sched_out() */
+ idx = arch_kmap_local_map_idx(i, pte_pfn(pteval));
+ addr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+ set_pte_at(&init_mm, addr, kmap_pte - idx, pteval);
+ arch_kmap_local_post_map(addr, pteval);
+ }
+}
+
+void kmap_local_fork(struct task_struct *tsk)
+{
+ if (WARN_ON_ONCE(tsk->kmap_ctrl.idx))
+ memset(&tsk->kmap_ctrl, 0, sizeof(tsk->kmap_ctrl));
+}
+
#endif
#if defined(HASHED_PAGE_VIRTUAL)