@@ -2736,6 +2736,8 @@
nox2apic [X86-64,APIC] Do not enable x2APIC mode.
+ noxpfo [X86-64] Disable XPFO when CONFIG_XPFO is on.
+
cpu0_hotplug [X86] Turn on CPU0 hotplug feature when
CONFIG_BOOTPARAM_HOTPLUG_CPU0 is off.
Some features depend on CPU0. Known dependencies are:
@@ -184,6 +184,7 @@ config X86
select USER_STACKTRACE_SUPPORT
select VIRT_TO_BUS
select X86_FEATURE_NAMES if PROC_FS
+ select ARCH_SUPPORTS_XPFO if X86_64
config INSTRUCTION_DECODER
def_bool y
@@ -1238,6 +1238,29 @@ static inline bool pud_access_permitted(pud_t pud, bool write)
return __pte_access_permitted(pud_val(pud), write);
}
+/*
+ * The current flushing context - we pass it instead of 5 arguments:
+ */
+struct cpa_data {
+ unsigned long *vaddr;
+ pgd_t *pgd;
+ pgprot_t mask_set;
+ pgprot_t mask_clr;
+ unsigned long numpages;
+ int flags;
+ unsigned long pfn;
+ unsigned force_split : 1;
+ int curpage;
+ struct page **pages;
+};
+
+
+int
+try_preserve_large_page(pte_t *kpte, unsigned long address,
+ struct cpa_data *cpa);
+int split_large_page(struct cpa_data *cpa, pte_t *kpte,
+ unsigned long address);
+
#include <asm-generic/pgtable.h>
#endif /* __ASSEMBLY__ */
@@ -39,3 +39,4 @@ obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
+obj-$(CONFIG_XPFO) += xpfo.o
@@ -27,28 +27,12 @@
#include <asm/set_memory.h>
/*
- * The current flushing context - we pass it instead of 5 arguments:
- */
-struct cpa_data {
- unsigned long *vaddr;
- pgd_t *pgd;
- pgprot_t mask_set;
- pgprot_t mask_clr;
- unsigned long numpages;
- int flags;
- unsigned long pfn;
- unsigned force_split : 1;
- int curpage;
- struct page **pages;
-};
-
-/*
* Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings)
* using cpa_lock. So that we don't allow any other cpu, with stale large tlb
* entries change the page attribute in parallel to some other cpu
* splitting a large page entry along with changing the attribute.
*/
-static DEFINE_SPINLOCK(cpa_lock);
+DEFINE_SPINLOCK(cpa_lock);
#define CPA_FLUSHTLB 1
#define CPA_ARRAY 2
@@ -512,7 +496,7 @@ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
#endif
}
-static int
+int
try_preserve_large_page(pte_t *kpte, unsigned long address,
struct cpa_data *cpa)
{
@@ -746,8 +730,8 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
return 0;
}
-static int split_large_page(struct cpa_data *cpa, pte_t *kpte,
- unsigned long address)
+int split_large_page(struct cpa_data *cpa, pte_t *kpte,
+ unsigned long address)
{
struct page *base;
new file mode 100644
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2017 Hewlett Packard Enterprise Development, L.P.
+ * Copyright (C) 2016 Brown University. All rights reserved.
+ *
+ * Authors:
+ * Juerg Haefliger <juerg.haefliger@hpe.com>
+ * Vasileios P. Kemerlis <vpk@cs.brown.edu>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/mm.h>
+
+#include <asm/tlbflush.h>
+
+extern spinlock_t cpa_lock;
+
+/* Update a single kernel page table entry */
+inline void set_kpte(void *kaddr, struct page *page, pgprot_t prot)
+{
+ unsigned int level;
+ pgprot_t msk_clr;
+ pte_t *pte = lookup_address((unsigned long)kaddr, &level);
+
+ BUG_ON(!pte);
+
+ switch (level) {
+ case PG_LEVEL_4K:
+ set_pte_atomic(pte, pfn_pte(page_to_pfn(page), canon_pgprot(prot)));
+ break;
+ case PG_LEVEL_2M:
+ /* We need to check if it's a 2M page or 1GB page before retrieve
+ * pgprot info, as each one will be extracted from a different
+ * page table levels */
+ msk_clr = pmd_pgprot(*(pmd_t*)pte);
+ case PG_LEVEL_1G: {
+ struct cpa_data cpa;
+ int do_split;
+
+ msk_clr = pud_pgprot(*(pud_t*)pte);
+
+ memset(&cpa, 0, sizeof(cpa));
+ cpa.vaddr = kaddr;
+ cpa.pages = &page;
+ cpa.mask_set = prot;
+ cpa.mask_clr = msk_clr;
+ cpa.numpages = 1;
+ cpa.flags = 0;
+ cpa.curpage = 0;
+ cpa.force_split = 0;
+
+
+ do_split = try_preserve_large_page(pte, (unsigned long)kaddr, &cpa);
+ if (do_split) {
+ spin_lock(&cpa_lock);
+ BUG_ON(split_large_page(&cpa, pte, (unsigned long)kaddr));
+ spin_unlock(&cpa_lock);
+ }
+
+ break;
+ }
+ case PG_LEVEL_512G:
+ /* fallthrough, splitting infrastructure doesn't
+ * support 512G pages. */
+ default:
+ BUG();
+ }
+
+}
+
+inline void xpfo_flush_kernel_page(struct page *page, int order)
+{
+ int level;
+ unsigned long size, kaddr;
+
+ kaddr = (unsigned long)page_address(page);
+ lookup_address(kaddr, &level);
+
+ switch (level) {
+ case PG_LEVEL_4K:
+ size = PAGE_SIZE;
+ break;
+ case PG_LEVEL_2M:
+ size = PMD_SIZE;
+ break;
+ case PG_LEVEL_1G:
+ size = PUD_SIZE;
+ break;
+ default:
+ BUG();
+ }
+
+ flush_tlb_kernel_range(kaddr, kaddr + (1 << order) * size);
+}
@@ -7,6 +7,7 @@
#include <linux/mm.h>
#include <linux/uaccess.h>
#include <linux/hardirq.h>
+#include <linux/xpfo.h>
#include <asm/cacheflush.h>
@@ -55,24 +56,34 @@ static inline struct page *kmap_to_page(void *addr)
#ifndef ARCH_HAS_KMAP
static inline void *kmap(struct page *page)
{
+ void *kaddr;
+
might_sleep();
- return page_address(page);
+ kaddr = page_address(page);
+ xpfo_kmap(kaddr, page);
+ return kaddr;
}
static inline void kunmap(struct page *page)
{
+ xpfo_kunmap(page_address(page), page);
}
static inline void *kmap_atomic(struct page *page)
{
+ void *kaddr;
+
preempt_disable();
pagefault_disable();
- return page_address(page);
+ kaddr = page_address(page);
+ xpfo_kmap(kaddr, page);
+ return kaddr;
}
#define kmap_atomic_prot(page, prot) kmap_atomic(page)
static inline void __kunmap_atomic(void *addr)
{
+ xpfo_kunmap(addr, virt_to_page(addr));
pagefault_enable();
preempt_enable();
}
new file mode 100644
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2017 Hewlett Packard Enterprise Development, L.P.
+ * Copyright (C) 2016 Brown University. All rights reserved.
+ *
+ * Authors:
+ * Juerg Haefliger <juerg.haefliger@hpe.com>
+ * Vasileios P. Kemerlis <vpk@cs.brown.edu>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef _LINUX_XPFO_H
+#define _LINUX_XPFO_H
+
+#ifdef CONFIG_XPFO
+
+extern struct page_ext_operations page_xpfo_ops;
+
+void set_kpte(void *kaddr, struct page *page, pgprot_t prot);
+void xpfo_dma_map_unmap_area(bool map, const void *addr, size_t size, int dir);
+void xpfo_flush_kernel_page(struct page *page, int order);
+
+void xpfo_kmap(void *kaddr, struct page *page);
+void xpfo_kunmap(void *kaddr, struct page *page);
+void xpfo_alloc_pages(struct page *page, int order, gfp_t gfp);
+void xpfo_free_pages(struct page *page, int order);
+
+#else /* !CONFIG_XPFO */
+
+static inline void xpfo_kmap(void *kaddr, struct page *page) { }
+static inline void xpfo_kunmap(void *kaddr, struct page *page) { }
+static inline void xpfo_alloc_pages(struct page *page, int order, gfp_t gfp) { }
+static inline void xpfo_free_pages(struct page *page, int order) { }
+
+#endif /* CONFIG_XPFO */
+
+#endif /* _LINUX_XPFO_H */
@@ -104,3 +104,4 @@ obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o
obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
+obj-$(CONFIG_XPFO) += xpfo.o
@@ -1058,6 +1058,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
kernel_poison_pages(page, 1 << order, 0);
kernel_map_pages(page, 1 << order, 0);
kasan_free_pages(page, order);
+ xpfo_free_pages(page, order);
return true;
}
@@ -1753,6 +1754,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
kernel_map_pages(page, 1 << order, 1);
kernel_poison_pages(page, 1 << order, 1);
kasan_alloc_pages(page, order);
+ xpfo_alloc_pages(page, order, gfp_flags);
set_page_owner(page, order, gfp_flags);
}
@@ -7,6 +7,7 @@
#include <linux/kmemleak.h>
#include <linux/page_owner.h>
#include <linux/page_idle.h>
+#include <linux/xpfo.h>
/*
* struct page extension
@@ -65,6 +66,9 @@ static struct page_ext_operations *page_ext_ops[] = {
#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
&page_idle_ops,
#endif
+#ifdef CONFIG_XPFO
+ &page_xpfo_ops,
+#endif
};
static unsigned long total_usage;
new file mode 100644
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2017 Hewlett Packard Enterprise Development, L.P.
+ * Copyright (C) 2016 Brown University. All rights reserved.
+ *
+ * Authors:
+ * Juerg Haefliger <juerg.haefliger@hpe.com>
+ * Vasileios P. Kemerlis <vpk@cs.brown.edu>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/page_ext.h>
+#include <linux/xpfo.h>
+
+#include <asm/tlbflush.h>
+
+/* XPFO page state flags */
+enum xpfo_flags {
+ XPFO_PAGE_USER, /* Page is allocated to user-space */
+ XPFO_PAGE_UNMAPPED, /* Page is unmapped from the linear map */
+};
+
+/* Per-page XPFO house-keeping data */
+struct xpfo {
+ unsigned long flags; /* Page state */
+ bool inited; /* Map counter and lock initialized */
+ atomic_t mapcount; /* Counter for balancing map/unmap requests */
+ spinlock_t maplock; /* Lock to serialize map/unmap requests */
+};
+
+DEFINE_STATIC_KEY_FALSE(xpfo_inited);
+
+static bool xpfo_disabled __initdata;
+
+static int __init noxpfo_param(char *str)
+{
+ xpfo_disabled = true;
+
+ return 0;
+}
+
+early_param("noxpfo", noxpfo_param);
+
+static bool __init need_xpfo(void)
+{
+ if (xpfo_disabled) {
+ printk(KERN_INFO "XPFO disabled\n");
+ return false;
+ }
+
+ return true;
+}
+
+static void init_xpfo(void)
+{
+ printk(KERN_INFO "XPFO enabled\n");
+ static_branch_enable(&xpfo_inited);
+}
+
+struct page_ext_operations page_xpfo_ops = {
+ .size = sizeof(struct xpfo),
+ .need = need_xpfo,
+ .init = init_xpfo,
+};
+
+static inline struct xpfo *lookup_xpfo(struct page *page)
+{
+ return (void *)lookup_page_ext(page) + page_xpfo_ops.offset;
+}
+
+void xpfo_alloc_pages(struct page *page, int order, gfp_t gfp)
+{
+ int i, flush_tlb = 0;
+ struct xpfo *xpfo;
+
+ if (!static_branch_unlikely(&xpfo_inited))
+ return;
+
+ for (i = 0; i < (1 << order); i++) {
+ xpfo = lookup_xpfo(page + i);
+
+ BUG_ON(test_bit(XPFO_PAGE_UNMAPPED, &xpfo->flags));
+
+ /* Initialize the map lock and map counter */
+ if (unlikely(!xpfo->inited)) {
+ spin_lock_init(&xpfo->maplock);
+ atomic_set(&xpfo->mapcount, 0);
+ xpfo->inited = true;
+ }
+ BUG_ON(atomic_read(&xpfo->mapcount));
+
+ if ((gfp & GFP_HIGHUSER) == GFP_HIGHUSER) {
+ /*
+ * Tag the page as a user page and flush the TLB if it
+ * was previously allocated to the kernel.
+ */
+ if (!test_and_set_bit(XPFO_PAGE_USER, &xpfo->flags))
+ flush_tlb = 1;
+ } else {
+ /* Tag the page as a non-user (kernel) page */
+ clear_bit(XPFO_PAGE_USER, &xpfo->flags);
+ }
+ }
+
+ if (flush_tlb)
+ xpfo_flush_kernel_page(page, order);
+}
+
+void xpfo_free_pages(struct page *page, int order)
+{
+ int i;
+ struct xpfo *xpfo;
+
+ if (!static_branch_unlikely(&xpfo_inited))
+ return;
+
+ for (i = 0; i < (1 << order); i++) {
+ xpfo = lookup_xpfo(page + i);
+
+ if (unlikely(!xpfo->inited)) {
+ /*
+ * The page was allocated before page_ext was
+ * initialized, so it is a kernel page.
+ */
+ continue;
+ }
+
+ /*
+ * Map the page back into the kernel if it was previously
+ * allocated to user space.
+ */
+ if (test_and_clear_bit(XPFO_PAGE_UNMAPPED, &xpfo->flags)) {
+ set_kpte(page_address(page + i), page + i,
+ PAGE_KERNEL);
+ }
+ }
+}
+
+void xpfo_kmap(void *kaddr, struct page *page)
+{
+ struct xpfo *xpfo;
+ unsigned long flags;
+
+ if (!static_branch_unlikely(&xpfo_inited))
+ return;
+
+ xpfo = lookup_xpfo(page);
+
+ /*
+ * The page was allocated before page_ext was initialized (which means
+ * it's a kernel page) or it's allocated to the kernel, so nothing to
+ * do.
+ */
+ if (unlikely(!xpfo->inited) || !test_bit(XPFO_PAGE_USER, &xpfo->flags))
+ return;
+
+ spin_lock_irqsave(&xpfo->maplock, flags);
+
+ /*
+ * The page was previously allocated to user space, so map it back
+ * into the kernel. No TLB flush required.
+ */
+ if ((atomic_inc_return(&xpfo->mapcount) == 1) &&
+ test_and_clear_bit(XPFO_PAGE_UNMAPPED, &xpfo->flags))
+ set_kpte(kaddr, page, PAGE_KERNEL);
+
+ spin_unlock_irqrestore(&xpfo->maplock, flags);
+}
+EXPORT_SYMBOL(xpfo_kmap);
+
+void xpfo_kunmap(void *kaddr, struct page *page)
+{
+ struct xpfo *xpfo;
+ unsigned long flags;
+
+ if (!static_branch_unlikely(&xpfo_inited))
+ return;
+
+ xpfo = lookup_xpfo(page);
+
+ /*
+ * The page was allocated before page_ext was initialized (which means
+ * it's a kernel page) or it's allocated to the kernel, so nothing to
+ * do.
+ */
+ if (unlikely(!xpfo->inited) || !test_bit(XPFO_PAGE_USER, &xpfo->flags))
+ return;
+
+ spin_lock_irqsave(&xpfo->maplock, flags);
+
+ /*
+ * The page is to be allocated back to user space, so unmap it from the
+ * kernel, flush the TLB and tag it as a user page.
+ */
+ if (atomic_dec_return(&xpfo->mapcount) == 0) {
+ BUG_ON(test_bit(XPFO_PAGE_UNMAPPED, &xpfo->flags));
+ set_bit(XPFO_PAGE_UNMAPPED, &xpfo->flags);
+ set_kpte(kaddr, page, __pgprot(0));
+ __flush_tlb_one((unsigned long)kaddr);
+ }
+
+ spin_unlock_irqrestore(&xpfo->maplock, flags);
+}
+EXPORT_SYMBOL(xpfo_kunmap);
@@ -6,6 +6,25 @@ menu "Security options"
source security/keys/Kconfig
+config ARCH_SUPPORTS_XPFO
+ bool
+
+config XPFO
+ bool "Enable eXclusive Page Frame Ownership (XPFO)"
+ default n
+ depends on ARCH_SUPPORTS_XPFO
+ select PAGE_EXTENSION
+ help
+ This option offers protection against 'ret2dir' kernel attacks.
+ When enabled, every time a page frame is allocated to user space, it
+ is unmapped from the direct mapped RAM region in kernel space
+ (physmap). Similarly, when a page frame is freed/reclaimed, it is
+ mapped back to physmap.
+
+ There is a slight performance impact when this option is enabled.
+
+ If in doubt, say "N".
+
config SECURITY_DMESG_RESTRICT
bool "Restrict unprivileged access to the kernel syslog"
default n