diff mbox series

[4/5] x86/mm: Provide helpers for unaccepted memory

Message ID 20210810062626.1012-5-kirill.shutemov@linux.intel.com (mailing list archive)
State New
Headers show
Series x86: Impplement support for unaccepted memory | expand

Commit Message

Kirill A. Shutemov Aug. 10, 2021, 6:26 a.m. UTC
Core-mm requires few helpers to support unaccepted memory:

 - accept_memory() checks the range of addresses against the bitmap and
   accept memory if needed;

 - maybe_set_page_offline() checks the bitmap and marks a page with
   PageOffline() if memory acceptance is required on the first
   allocation of the page.

 - clear_page_offline() accepts memory for the page and clears
   PageOffline().

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/boot/compressed/unaccepted_memory.c |  3 +-
 arch/x86/include/asm/page.h                  |  5 ++
 arch/x86/include/asm/unaccepted_memory.h     |  3 +
 arch/x86/mm/Makefile                         |  2 +
 arch/x86/mm/unaccepted_memory.c              | 80 ++++++++++++++++++++
 5 files changed, 91 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/mm/unaccepted_memory.c

Comments

Dave Hansen Aug. 10, 2021, 6:16 p.m. UTC | #1
On 8/9/21 11:26 PM, Kirill A. Shutemov wrote:
> +void accept_memory(phys_addr_t start, phys_addr_t end)
> +{
> +	if (!boot_params.unaccepted_memory)
> +		return;
> +
> +	spin_lock(&unaccepted_memory_lock);
> +	__accept_memory(start, end);
> +	spin_unlock(&unaccepted_memory_lock);
> +}

Isn't this taken in the:

	del_page_from_free_list()->
	clear_page_offline()->
	accept_memory()

call path?

That's underneath:

	spin_lock_irqsave(&zone->lock, flags);

Which means that accept_memory() can happen from interrupt context.  Is
it always covered by another spin_lock_irqsave() which means that it can
use a plain spin_lock()?

If so, it would be nice to call out that logic.  It *looks* like a
spinlock that we would want to be spin_lock_irqsave().
Kirill A. Shutemov Aug. 12, 2021, 8:31 p.m. UTC | #2
On Tue, Aug 10, 2021 at 11:16:26AM -0700, Dave Hansen wrote:
> On 8/9/21 11:26 PM, Kirill A. Shutemov wrote:
> > +void accept_memory(phys_addr_t start, phys_addr_t end)
> > +{
> > +	if (!boot_params.unaccepted_memory)
> > +		return;
> > +
> > +	spin_lock(&unaccepted_memory_lock);
> > +	__accept_memory(start, end);
> > +	spin_unlock(&unaccepted_memory_lock);
> > +}
> 
> Isn't this taken in the:
> 
> 	del_page_from_free_list()->
> 	clear_page_offline()->
> 	accept_memory()
> 
> call path?
> 
> That's underneath:
> 
> 	spin_lock_irqsave(&zone->lock, flags);
> 
> Which means that accept_memory() can happen from interrupt context.  Is
> it always covered by another spin_lock_irqsave() which means that it can
> use a plain spin_lock()?

I didn't give it enough thought yet, but we always run under zone lock
which has to use spin_lock_irqsave() if it called from interrupt context.

Having said that I think it is good idea to move clear_page_offline() out
zone lock. It should help with allocation latency. Not sure how messy it
gets. Merging/splitting path looks complex and I'm not an expert in the
page allocator.

> If so, it would be nice to call out that logic.  It *looks* like a
> spinlock that we would want to be spin_lock_irqsave().
diff mbox series

Patch

diff --git a/arch/x86/boot/compressed/unaccepted_memory.c b/arch/x86/boot/compressed/unaccepted_memory.c
index 17b70627b0cd..818d32169eef 100644
--- a/arch/x86/boot/compressed/unaccepted_memory.c
+++ b/arch/x86/boot/compressed/unaccepted_memory.c
@@ -13,8 +13,7 @@  void mark_unaccepted(struct boot_params *params, u64 start, u64 num)
 	unsigned int npages;
 
 	if ((start & PMD_MASK) == (end & PMD_MASK)) {
-		npages = (end - start) / PAGE_SIZE;
-		__accept_memory(start, start + npages * PAGE_SIZE);
+		__accept_memory(start, end);
 		return;
 	}
 
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 4d5810c8fab7..1e56d76ca474 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -19,6 +19,11 @@ 
 struct page;
 
 #include <linux/range.h>
+
+#ifdef CONFIG_UNACCEPTED_MEMORY
+#include <asm/unaccepted_memory.h>
+#endif
+
 extern struct range pfn_mapped[];
 extern int nr_pfn_mapped;
 
diff --git a/arch/x86/include/asm/unaccepted_memory.h b/arch/x86/include/asm/unaccepted_memory.h
index f1f835d3cd78..712128760131 100644
--- a/arch/x86/include/asm/unaccepted_memory.h
+++ b/arch/x86/include/asm/unaccepted_memory.h
@@ -6,9 +6,12 @@ 
 #include <linux/types.h>
 
 struct boot_params;
+struct page;
 
 void mark_unaccepted(struct boot_params *params, u64 start, u64 num);
 
 void accept_memory(phys_addr_t start, phys_addr_t end);
 
+void maybe_set_page_offline(struct page *page, unsigned int order);
+void clear_page_offline(struct page *page, unsigned int order);
 #endif
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index b31cb52bf1bd..fe4e16322868 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -57,3 +57,5 @@  obj-$(CONFIG_X86_MEM_ENCRYPT_COMMON)	+= mem_encrypt_common.o
 obj-$(CONFIG_AMD_MEM_ENCRYPT)	+= mem_encrypt.o
 obj-$(CONFIG_AMD_MEM_ENCRYPT)	+= mem_encrypt_identity.o
 obj-$(CONFIG_AMD_MEM_ENCRYPT)	+= mem_encrypt_boot.o
+
+obj-$(CONFIG_UNACCEPTED_MEMORY)	+= unaccepted_memory.o
diff --git a/arch/x86/mm/unaccepted_memory.c b/arch/x86/mm/unaccepted_memory.c
new file mode 100644
index 000000000000..e11933f62ead
--- /dev/null
+++ b/arch/x86/mm/unaccepted_memory.c
@@ -0,0 +1,80 @@ 
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/pfn.h>
+#include <linux/spinlock.h>
+
+#include <asm/io.h>
+#include <asm/setup.h>
+#include <asm/unaccepted_memory.h>
+
+static DEFINE_SPINLOCK(unaccepted_memory_lock);
+
+#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
+
+static void __accept_memory(phys_addr_t start, phys_addr_t end)
+{
+	unsigned long *unaccepted_memory;
+	unsigned int rs, re;
+
+	unaccepted_memory = __va(boot_params.unaccepted_memory);
+	bitmap_for_each_set_region(unaccepted_memory, rs, re,
+				   start / PMD_SIZE,
+				   DIV_ROUND_UP(end, PMD_SIZE)) {
+		/* Platform-specific memory-acceptance call goes here */
+		panic("Cannot accept memory");
+		bitmap_clear(unaccepted_memory, rs, re - rs);
+	}
+}
+
+void accept_memory(phys_addr_t start, phys_addr_t end)
+{
+	if (!boot_params.unaccepted_memory)
+		return;
+
+	spin_lock(&unaccepted_memory_lock);
+	__accept_memory(start, end);
+	spin_unlock(&unaccepted_memory_lock);
+}
+
+void __init maybe_set_page_offline(struct page *page, unsigned int order)
+{
+	unsigned long *unaccepted_memory;
+	phys_addr_t addr = page_to_phys(page);
+	bool unaccepted = true;
+	unsigned int i;
+
+	if (!boot_params.unaccepted_memory)
+		return;
+
+	unaccepted_memory = __va(boot_params.unaccepted_memory);
+	spin_lock(&unaccepted_memory_lock);
+	if (order < PMD_ORDER) {
+		BUG_ON(test_bit(addr / PMD_SIZE, unaccepted_memory));
+		goto out;
+	}
+
+	for (i = 0; i < (1 << (order - PMD_ORDER)); i++) {
+		if (!test_bit(addr / PMD_SIZE + i, unaccepted_memory)) {
+			unaccepted = false;
+			break;
+		}
+	}
+
+	if (unaccepted)
+		__SetPageOffline(page);
+	else
+		__accept_memory(addr, addr + (PAGE_SIZE << order));
+out:
+	spin_unlock(&unaccepted_memory_lock);
+}
+
+void clear_page_offline(struct page *page, unsigned int order)
+{
+	phys_addr_t addr = page_to_phys(page);
+
+	/* PageOffline() page on a free list, but no unaccepted memory? Hm. */
+	WARN_ON_ONCE(!boot_params.unaccepted_memory);
+
+	accept_memory(addr, addr + (PAGE_SIZE << order));
+	__ClearPageOffline(page);
+}