@@ -19,6 +19,7 @@
#include <linux/nospec.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
+#include <linux/pkeys.h>
#ifdef CONFIG_XEN_PV
#include <xen/xen-ops.h>
@@ -209,6 +210,41 @@ SYSCALL_DEFINE0(ni_syscall)
return -ENOSYS;
}
+#ifdef CONFIG_ARCH_HAS_SUPERVISOR_PKEYS
+/*
+ * PKRS is a per-logical-processor MSR which overlays additional protection for
+ * pages which have been mapped with a protection key.
+ *
+ * The register is not maintained with XSAVE so we have to maintain the MSR
+ * value in software during context switch and exception handling.
+ *
+ * Context switches save the MSR in the task struct thus taking that value to
+ * other processors if necessary.
+ *
+ * To protect against exceptions having access to this memory we save the
+ * current running value and set the PKRS value for the duration of the
+ * exception. Thus preventing exception handlers from having the elevated
+ * access of the interrupted task.
+ */
+noinstr void irq_save_set_pkrs(irqentry_state_t *irq_state, u32 val)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_PKS))
+ return;
+
+ irq_state->thread_pkrs = current->thread.saved_pkrs;
+ write_pkrs(INIT_PKRS_VALUE);
+}
+
+noinstr void irq_restore_pkrs(irqentry_state_t *irq_state)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_PKS))
+ return;
+
+ write_pkrs(irq_state->thread_pkrs);
+ current->thread.saved_pkrs = irq_state->thread_pkrs;
+}
+#endif /* CONFIG_ARCH_HAS_SUPERVISOR_PKEYS */
+
#ifdef CONFIG_XEN_PV
#ifndef CONFIG_PREEMPTION
/*
@@ -272,6 +308,8 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
inhcall = get_and_clear_inhcall();
if (inhcall && !WARN_ON_ONCE(irq_state.exit_rcu)) {
+ /* Normally called by irqentry_exit, we must restore pkrs here */
+ irq_restore_pkrs(&irq_state);
instrumentation_begin();
irqentry_exit_cond_resched();
instrumentation_end();
@@ -31,9 +31,10 @@
#define PKS_NUM_KEYS 16
#ifdef CONFIG_ARCH_HAS_SUPERVISOR_PKEYS
-void write_pkrs(u32 new_pkrs);
+DECLARE_PER_CPU(u32, pkrs_cache);
+noinstr void write_pkrs(u32 new_pkrs);
#else
-static inline void write_pkrs(u32 new_pkrs) { }
+static __always_inline void write_pkrs(u32 new_pkrs) { }
#endif
#endif /*_ASM_X86_PKEYS_INTERNAL_H */
@@ -252,7 +252,7 @@ DEFINE_PER_CPU(u32, pkrs_cache);
* until all prior executions of WRPKRU have completed execution
* and updated the PKRU register.
*/
-void write_pkrs(u32 new_pkrs)
+noinstr void write_pkrs(u32 new_pkrs)
{
u32 *pkrs;
@@ -343,6 +343,8 @@ void irqentry_exit_to_user_mode(struct pt_regs *regs);
#ifndef irqentry_state
/**
* struct irqentry_state - Opaque object for exception state storage
+ * @thread_pkrs: Thread Supervisor Pkey value to be restored when exception is
+ * complete.
* @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the
* exit path has to invoke rcu_irq_exit().
* @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that
@@ -357,6 +359,9 @@ void irqentry_exit_to_user_mode(struct pt_regs *regs);
* the maintenance of the irqentry_*() functions.
*/
typedef struct irqentry_state {
+#ifdef CONFIG_ARCH_HAS_SUPERVISOR_PKEYS
+ u32 thread_pkrs;
+#endif
union {
bool exit_rcu;
bool lockdep;
@@ -364,6 +369,14 @@ typedef struct irqentry_state {
} irqentry_state_t;
#endif
+#ifdef CONFIG_ARCH_HAS_SUPERVISOR_PKEYS
+noinstr void irq_save_set_pkrs(irqentry_state_t *irq_state, u32 val);
+noinstr void irq_restore_pkrs(irqentry_state_t *irq_state);
+#else
+static __always_inline void irq_save_set_pkrs(irqentry_state_t *irq_state, u32 val) { }
+static __always_inline void irq_restore_pkrs(irqentry_state_t *irq_state) { }
+#endif
+
/**
* irqentry_enter - Handle state tracking on ordinary interrupt entries
* @regs: Pointer to pt_regs of interrupted context
@@ -326,7 +326,7 @@ noinstr void irqentry_enter(struct pt_regs *regs, irqentry_state_t *irq_state)
instrumentation_end();
irq_state->exit_rcu = true;
- return;
+ goto done;
}
/*
@@ -340,6 +340,9 @@ noinstr void irqentry_enter(struct pt_regs *regs, irqentry_state_t *irq_state)
/* Use the combo lockdep/tracing function */
trace_hardirqs_off();
instrumentation_end();
+
+done:
+ irq_save_set_pkrs(irq_state, INIT_PKRS_VALUE);
}
void irqentry_exit_cond_resched(void)
@@ -361,7 +364,12 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t *irq_state)
/* Check whether this returns to user mode */
if (user_mode(regs)) {
irqentry_exit_to_user_mode(regs);
- } else if (!regs_irqs_disabled(regs)) {
+ return;
+ }
+
+ irq_restore_pkrs(irq_state);
+
+ if (!regs_irqs_disabled(regs)) {
/*
* If RCU was not watching on entry this needs to be done
* carefully and needs the same ordering of lockdep/tracing
@@ -407,10 +415,12 @@ void noinstr irqentry_nmi_enter(struct pt_regs *regs, irqentry_state_t *irq_stat
trace_hardirqs_off_finish();
ftrace_nmi_enter();
instrumentation_end();
+ irq_save_set_pkrs(irq_state, INIT_PKRS_VALUE);
}
void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t *irq_state)
{
+ irq_restore_pkrs(irq_state);
instrumentation_begin();
ftrace_nmi_exit();
if (irq_state->lockdep) {