@@ -470,6 +470,14 @@ struct zone {
seqlock_t span_seqlock;
#endif
+#ifdef CONFIG_PAGE_REPORTING
+ /*
+ * Pointer to reported page tracking statistics array. The size of
+ * the array is MAX_ORDER - PAGE_REPORTING_MIN_ORDER. NULL when
+ * unused page reporting is not present.
+ */
+ unsigned long *reported_pages;
+#endif
int initialized;
/* Write-intensive fields used from the page allocator */
@@ -545,6 +553,14 @@ enum zone_flags {
ZONE_BOOSTED_WATERMARK, /* zone recently boosted watermarks.
* Cleared when kswapd is woken.
*/
+ ZONE_PAGE_REPORTING_ACTIVE, /* zone enabled page reporting and is
+ * actively flushing the data out of
+ * higher order pages.
+ */
+ ZONE_PAGE_REPORTING_REQUESTED, /* zone enabled page reporting and has
+ * requested flushing the data out of
+ * higher order pages.
+ */
};
static inline unsigned long zone_managed_pages(struct zone *zone)
@@ -163,6 +163,9 @@ enum pageflags {
/* non-lru isolated movable page */
PG_isolated = PG_reclaim,
+
+ /* Buddy pages. Used to track which pages have been reported */
+ PG_reported = PG_uptodate,
};
#ifndef __GENERATING_BOUNDS_H
@@ -432,6 +435,14 @@ static inline bool set_hwpoison_free_buddy_page(struct page *page)
#endif
/*
+ * PageReported() is used to track reported free pages within the Buddy
+ * allocator. We can use the non-atomic version of the test and set
+ * operations as both should be shielded with the zone lock to prevent
+ * any possible races on the setting or clearing of the bit.
+ */
+__PAGEFLAG(Reported, reported, PF_NO_COMPOUND)
+
+/*
* On an anonymous page mapped into a user virtual memory area,
* page->mapping points to its anon_vma, not to a struct address_space;
* with the PAGE_MAPPING_ANON bit set to distinguish it. See rmap.h.
@@ -237,6 +237,17 @@ config COMPACTION
linux-mm@kvack.org.
#
+# support for unused page reporting
+config PAGE_REPORTING
+ bool "Allow for reporting of unused pages"
+ def_bool n
+ help
+ Unused page reporting allows for the incremental acquisition of
+ unused pages from the buddy allocator for the purpose of reporting
+ those pages to another entity, such as a hypervisor, so that the
+ memory can be freed up for other uses.
+
+#
# support for page migration
#
config MIGRATION
@@ -24,6 +24,7 @@
#include <linux/page_owner.h>
#include <linux/psi.h>
#include "internal.h"
+#include "page_reporting.h"
#ifdef CONFIG_COMPACTION
static inline void count_compact_event(enum vm_event_item item)
@@ -1326,6 +1327,8 @@ static int next_search_order(struct compact_control *cc, int order)
continue;
spin_lock_irqsave(&cc->zone->lock, flags);
+ page_reporting_free_area_release(cc->zone, order,
+ MIGRATE_MOVABLE);
freelist = &area->free_list[MIGRATE_MOVABLE];
list_for_each_entry_reverse(freepage, freelist, lru) {
unsigned long pfn;
@@ -1682,6 +1685,8 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc)
continue;
spin_lock_irqsave(&cc->zone->lock, flags);
+ page_reporting_free_area_release(cc->zone, order,
+ MIGRATE_MOVABLE);
freelist = &area->free_list[MIGRATE_MOVABLE];
list_for_each_entry(freepage, freelist, lru) {
unsigned long free_pfn;
@@ -41,6 +41,7 @@
#include "internal.h"
#include "shuffle.h"
+#include "page_reporting.h"
/*
* online_page_callback contains pointer to current page onlining function.
@@ -1558,6 +1559,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
if (!populated_zone(zone)) {
zone_pcp_reset(zone);
build_all_zonelists(NULL);
+ page_reporting_reset_zone(zone);
} else
zone_pcp_update(zone);
@@ -74,6 +74,7 @@
#include <asm/div64.h>
#include "internal.h"
#include "shuffle.h"
+#include "page_reporting.h"
/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
static DEFINE_MUTEX(pcp_batch_high_lock);
@@ -891,10 +892,15 @@ static inline void add_to_free_list(struct page *page, struct zone *zone,
static inline void add_to_free_list_tail(struct page *page, struct zone *zone,
unsigned int order, int migratetype)
{
- struct free_area *area = &zone->free_area[order];
+ struct list_head *tail = get_unreported_tail(zone, order, migratetype);
- list_add_tail(&page->lru, &area->free_list[migratetype]);
- area->nr_free++;
+ /*
+ * To prevent the unreported pages from slipping behind our iterator
+ * we will force them to be inserted in front of it. By doing this
+ * we should only need to make one pass through the freelist.
+ */
+ list_add_tail(&page->lru, tail);
+ zone->free_area[order].nr_free++;
}
/* Used for pages which are on another list */
@@ -903,12 +909,20 @@ static inline void move_to_free_list(struct page *page, struct zone *zone,
{
struct free_area *area = &zone->free_area[order];
+ /* Make certain the page isn't occupying the boundary */
+ if (page_is_reported(page))
+ __del_page_from_reported_list(page, zone);
+
list_move(&page->lru, &area->free_list[migratetype]);
}
static inline void del_page_from_free_list(struct page *page, struct zone *zone,
unsigned int order)
{
+ /* remove page from reported list, and clear reported state */
+ if (page_is_reported(page))
+ del_page_from_reported_list(page, zone, order);
+
list_del(&page->lru);
__ClearPageBuddy(page);
set_page_private(page, 0);
@@ -972,7 +986,7 @@ static inline void del_page_from_free_list(struct page *page, struct zone *zone,
static inline void __free_one_page(struct page *page,
unsigned long pfn,
struct zone *zone, unsigned int order,
- int migratetype)
+ int migratetype, bool reported)
{
struct capture_control *capc = task_capc(zone);
unsigned long uninitialized_var(buddy_pfn);
@@ -1048,7 +1062,9 @@ static inline void __free_one_page(struct page *page,
done_merging:
set_page_order(page, order);
- if (is_shuffle_order(order))
+ if (reported)
+ to_tail = true;
+ else if (is_shuffle_order(order))
to_tail = shuffle_pick_tail();
else
to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order);
@@ -1373,7 +1389,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
if (unlikely(isolated_pageblocks))
mt = get_pageblock_migratetype(page);
- __free_one_page(page, page_to_pfn(page), zone, 0, mt);
+ __free_one_page(page, page_to_pfn(page), zone, 0, mt, false);
trace_mm_page_pcpu_drain(page, 0, mt);
}
spin_unlock(&zone->lock);
@@ -1389,7 +1405,7 @@ static void free_one_page(struct zone *zone,
is_migrate_isolate(migratetype))) {
migratetype = get_pfnblock_migratetype(page, pfn);
}
- __free_one_page(page, pfn, zone, order, migratetype);
+ __free_one_page(page, pfn, zone, order, migratetype, false);
spin_unlock(&zone->lock);
}
@@ -2259,6 +2275,42 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
return NULL;
}
+#ifdef CONFIG_PAGE_REPORTING
+struct list_head **reported_boundary __read_mostly;
+
+/**
+ * free_reported_page - Return a now-reported page back where we got it
+ * @page: Page that was reported
+ * @order: Order of the reported page
+ *
+ * This function will pull the migratetype and order information out
+ * of the page and attempt to return it where it found it. If the page
+ * is added to the free list without changes we will mark it as being
+ * reported.
+ */
+void free_reported_page(struct page *page, unsigned int order)
+{
+ struct zone *zone = page_zone(page);
+ unsigned long pfn;
+ unsigned int mt;
+
+ /* zone lock should be held when this function is called */
+ lockdep_assert_held(&zone->lock);
+
+ pfn = page_to_pfn(page);
+ mt = get_pfnblock_migratetype(page, pfn);
+ __free_one_page(page, pfn, zone, order, mt, true);
+
+ /*
+ * If page was not comingled with another page we can consider
+ * the result to be "reported" since part of the page hasn't been
+ * modified, otherwise we would need to report on the new larger
+ * page.
+ */
+ if (PageBuddy(page) && page_order(page) == order)
+ add_page_to_reported_list(page, zone, order, mt);
+}
+#endif /* CONFIG_PAGE_REPORTING */
/*
* This array describes the order lists are fallen back to when
new file mode 100644
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MM_PAGE_REPORTING_H
+#define _MM_PAGE_REPORTING_H
+
+#include <linux/mmzone.h>
+#include <linux/pageblock-flags.h>
+#include <linux/page-isolation.h>
+#include <linux/jump_label.h>
+#include <linux/slab.h>
+#include <asm/pgtable.h>
+
+#define PAGE_REPORTING_MIN_ORDER pageblock_order
+
+#ifdef CONFIG_PAGE_REPORTING
+/* Reported page accessors, defined in page_alloc.c */
+void free_reported_page(struct page *page, unsigned int order);
+
+#define page_is_reported(_page) unlikely(PageReported(_page))
+
+/* Free reported_pages and reset reported page tracking count to 0 */
+static inline void page_reporting_reset_zone(struct zone *zone)
+{
+ kfree(zone->reported_pages);
+ zone->reported_pages = NULL;
+}
+
+/* Boundary functions */
+static inline pgoff_t
+get_reporting_index(unsigned int order, unsigned int migratetype)
+{
+ /*
+ * We will only ever be dealing with pages greater-than or equal to
+ * PAGE_REPORTING_MIN_ORDER. Since that is the case we can avoid
+ * allocating unused space by limiting our index range to only the
+ * orders that are supported for page reporting.
+ */
+ return (order - PAGE_REPORTING_MIN_ORDER) * MIGRATE_TYPES + migratetype;
+}
+
+extern struct list_head **reported_boundary __read_mostly;
+
+static inline void
+page_reporting_reset_boundary(struct zone *zone, unsigned int order, int mt)
+{
+ int index;
+
+ if (order < PAGE_REPORTING_MIN_ORDER)
+ return;
+ if (!test_bit(ZONE_PAGE_REPORTING_ACTIVE, &zone->flags))
+ return;
+
+ index = get_reporting_index(order, mt);
+ reported_boundary[index] = &zone->free_area[order].free_list[mt];
+}
+
+static inline void page_reporting_disable_boundaries(struct zone *zone)
+{
+ /* zone lock should be held when this function is called */
+ lockdep_assert_held(&zone->lock);
+
+ __clear_bit(ZONE_PAGE_REPORTING_ACTIVE, &zone->flags);
+}
+
+static inline void
+page_reporting_free_area_release(struct zone *zone, unsigned int order, int mt)
+{
+ page_reporting_reset_boundary(zone, order, mt);
+}
+
+/*
+ * Method for obtaining the tail of the free list. Using this allows for
+ * tail insertions of unreported pages into the region that is currently
+ * being scanned so as to avoid interleaving reported and unreported pages.
+ */
+static inline struct list_head *
+get_unreported_tail(struct zone *zone, unsigned int order, int migratetype)
+{
+ if (order >= PAGE_REPORTING_MIN_ORDER &&
+ test_bit(ZONE_PAGE_REPORTING_ACTIVE, &zone->flags))
+ return reported_boundary[get_reporting_index(order,
+ migratetype)];
+
+ return &zone->free_area[order].free_list[migratetype];
+}
+
+/*
+ * Functions for adding/removing reported pages to the freelist.
+ * All of them expect the zone lock to be held to maintain
+ * consistency of the reported list as a subset of the free list.
+ */
+static inline void
+add_page_to_reported_list(struct page *page, struct zone *zone,
+ unsigned int order, unsigned int mt)
+{
+ /*
+ * Default to using index 0, this will be updated later if the zone
+ * is still being processed.
+ */
+ page->index = 0;
+
+ /* flag page as reported */
+ __SetPageReported(page);
+
+ /* update areated page accounting */
+ zone->reported_pages[order - PAGE_REPORTING_MIN_ORDER]++;
+}
+
+static inline void page_reporting_pull_boundary(struct page *page)
+{
+ struct list_head **tail = &reported_boundary[page->index];
+
+ if (*tail == &page->lru)
+ *tail = page->lru.next;
+}
+
+static inline void
+__del_page_from_reported_list(struct page *page, struct zone *zone)
+{
+ /*
+ * Since the page is being pulled from the list we need to update
+ * the boundary, after that we can just update the index so that
+ * the correct boundary will be checked in the future.
+ */
+ if (test_bit(ZONE_PAGE_REPORTING_ACTIVE, &zone->flags))
+ page_reporting_pull_boundary(page);
+}
+
+static inline void
+del_page_from_reported_list(struct page *page, struct zone *zone,
+ unsigned int order)
+{
+ __del_page_from_reported_list(page, zone);
+
+ /* page_private will contain the page order, so just use it directly */
+ zone->reported_pages[order - PAGE_REPORTING_MIN_ORDER]--;
+
+ /* clear the flag so we can report on it when it returns */
+ __ClearPageReported(page);
+}
+
+#else /* CONFIG_PAGE_REPORTING */
+#define page_is_reported(_page) false
+
+static inline void page_reporting_reset_zone(struct zone *zone)
+{
+}
+
+static inline void
+page_reporting_free_area_release(struct zone *zone, unsigned int order, int mt)
+{
+}
+
+static inline struct list_head *
+get_unreported_tail(struct zone *zone, unsigned int order, int migratetype)
+{
+ return &zone->free_area[order].free_list[migratetype];
+}
+
+static inline void
+add_page_to_reported_list(struct page *page, struct zone *zone,
+ int order, int migratetype)
+{
+}
+
+static inline void
+__del_page_from_reported_list(struct page *page, struct zone *zone)
+{
+}
+
+static inline void
+del_page_from_reported_list(struct page *page, struct zone *zone,
+ unsigned int order)
+{
+}
+#endif /* CONFIG_PAGE_REPORTING */
+#endif /*_MM_PAGE_REPORTING_H */