@@ -697,22 +697,15 @@ static void page_list_add_scrub(struct page_info *pg, unsigned int node,
page_list_add(pg, &heap(node, zone, order));
}
-/* Allocate 2^@order contiguous pages. */
-static struct page_info *alloc_heap_pages(
- unsigned int zone_lo, unsigned int zone_hi,
- unsigned int order, unsigned int memflags,
- struct domain *d)
+static struct page_info *get_free_buddy(unsigned int zone_lo,
+ unsigned int zone_hi,
+ unsigned int order, unsigned int memflags,
+ const struct domain *d)
{
- unsigned int i, j, zone = 0, nodemask_retry = 0, first_dirty;
nodeid_t first_node, node = MEMF_get_node(memflags), req_node = node;
- unsigned long request = 1UL << order;
+ nodemask_t nodemask = d ? d->node_affinity : node_online_map;
+ unsigned int j, zone, nodemask_retry = 0;
struct page_info *pg;
- nodemask_t nodemask = (d != NULL ) ? d->node_affinity : node_online_map;
- bool_t need_tlbflush = 0;
- uint32_t tlbflush_timestamp = 0;
-
- /* Make sure there are enough bits in memflags for nodeID. */
- BUILD_BUG_ON((_MEMF_bits - _MEMF_node) < (8 * sizeof(nodeid_t)));
if ( node == NUMA_NO_NODE )
{
@@ -728,34 +721,6 @@ static struct page_info *alloc_heap_pages(
first_node = node;
ASSERT(node < MAX_NUMNODES);
- ASSERT(zone_lo <= zone_hi);
- ASSERT(zone_hi < NR_ZONES);
-
- if ( unlikely(order > MAX_ORDER) )
- return NULL;
-
- spin_lock(&heap_lock);
-
- /*
- * Claimed memory is considered unavailable unless the request
- * is made by a domain with sufficient unclaimed pages.
- */
- if ( (outstanding_claims + request >
- total_avail_pages + tmem_freeable_pages()) &&
- ((memflags & MEMF_no_refcount) ||
- !d || d->outstanding_pages < request) )
- goto not_found;
-
- /*
- * TMEM: When available memory is scarce due to tmem absorbing it, allow
- * only mid-size allocations to avoid worst of fragmentation issues.
- * Others try tmem pools then fail. This is a workaround until all
- * post-dom0-creation-multi-page allocations can be eliminated.
- */
- if ( ((order == 0) || (order >= 9)) &&
- (total_avail_pages <= midsize_alloc_zone_pages) &&
- tmem_freeable_pages() )
- goto try_tmem;
/*
* Start with requested node, but exhaust all node memory in requested
@@ -767,17 +732,17 @@ static struct page_info *alloc_heap_pages(
zone = zone_hi;
do {
/* Check if target node can support the allocation. */
- if ( !avail[node] || (avail[node][zone] < request) )
+ if ( !avail[node] || (avail[node][zone] < (1UL << order)) )
continue;
/* Find smallest order which can satisfy the request. */
for ( j = order; j <= MAX_ORDER; j++ )
if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
- goto found;
+ return pg;
} while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
if ( (memflags & MEMF_exact_node) && req_node != NUMA_NO_NODE )
- goto not_found;
+ return NULL;
/* Pick next node. */
if ( !node_isset(node, nodemask) )
@@ -794,46 +759,96 @@ static struct page_info *alloc_heap_pages(
{
/* When we have tried all in nodemask, we fall back to others. */
if ( (memflags & MEMF_exact_node) || nodemask_retry++ )
- goto not_found;
+ return NULL;
nodes_andnot(nodemask, node_online_map, nodemask);
first_node = node = first_node(nodemask);
if ( node >= MAX_NUMNODES )
- goto not_found;
+ return NULL;
}
}
+}
+
+/* Allocate 2^@order contiguous pages. */
+static struct page_info *alloc_heap_pages(
+ unsigned int zone_lo, unsigned int zone_hi,
+ unsigned int order, unsigned int memflags,
+ struct domain *d)
+{
+ nodeid_t node;
+ unsigned int i, buddy_order, zone, first_dirty;
+ unsigned long request = 1UL << order;
+ struct page_info *pg;
+ bool need_tlbflush = false;
+ uint32_t tlbflush_timestamp = 0;
+
+ /* Make sure there are enough bits in memflags for nodeID. */
+ BUILD_BUG_ON((_MEMF_bits - _MEMF_node) < (8 * sizeof(nodeid_t)));
+
+ ASSERT(zone_lo <= zone_hi);
+ ASSERT(zone_hi < NR_ZONES);
+
+ if ( unlikely(order > MAX_ORDER) )
+ return NULL;
+
+ spin_lock(&heap_lock);
+
+ /*
+ * Claimed memory is considered unavailable unless the request
+ * is made by a domain with sufficient unclaimed pages.
+ */
+ if ( (outstanding_claims + request >
+ total_avail_pages + tmem_freeable_pages()) &&
+ ((memflags & MEMF_no_refcount) ||
+ !d || d->outstanding_pages < request) )
+ {
+ spin_unlock(&heap_lock);
+ return NULL;
+ }
- try_tmem:
- /* Try to free memory from tmem */
- if ( (pg = tmem_relinquish_pages(order, memflags)) != NULL )
+ /*
+ * TMEM: When available memory is scarce due to tmem absorbing it, allow
+ * only mid-size allocations to avoid worst of fragmentation issues.
+ * Others try tmem pools then fail. This is a workaround until all
+ * post-dom0-creation-multi-page allocations can be eliminated.
+ */
+ if ( ((order == 0) || (order >= 9)) &&
+ (total_avail_pages <= midsize_alloc_zone_pages) &&
+ tmem_freeable_pages() )
{
- /* reassigning an already allocated anonymous heap page */
+ /* Try to free memory from tmem. */
+ pg = tmem_relinquish_pages(order, memflags);
spin_unlock(&heap_lock);
return pg;
}
- not_found:
- /* No suitable memory blocks. Fail the request. */
- spin_unlock(&heap_lock);
- return NULL;
+ pg = get_free_buddy(zone_lo, zone_hi, order, memflags, d);
+ if ( !pg )
+ {
+ /* No suitable memory blocks. Fail the request. */
+ spin_unlock(&heap_lock);
+ return NULL;
+ }
- found:
+ node = phys_to_nid(page_to_maddr(pg));
+ zone = page_to_zone(pg);
+ buddy_order = PFN_ORDER(pg);
first_dirty = pg->u.free.first_dirty;
/* We may have to halve the chunk a number of times. */
- while ( j != order )
+ while ( buddy_order != order )
{
- j--;
- page_list_add_scrub(pg, node, zone, j,
- (1U << j) > first_dirty ?
+ buddy_order--;
+ page_list_add_scrub(pg, node, zone, buddy_order,
+ (1U << buddy_order) > first_dirty ?
first_dirty : INVALID_DIRTY_IDX);
- pg += 1U << j;
+ pg += 1U << buddy_order;
if ( first_dirty != INVALID_DIRTY_IDX )
{
/* Adjust first_dirty */
- if ( first_dirty >= 1U << j )
- first_dirty -= 1U << j;
+ if ( first_dirty >= 1U << buddy_order )
+ first_dirty -= 1U << buddy_order;
else
first_dirty = 0; /* We've moved past original first_dirty */
}
This will make code a bit more readable, especially with changes that will be introduced in subsequent patches. Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com> --- Changes in v6: * Rebased due to changes in the first patch (thus dropped Jan's ACK) xen/common/page_alloc.c | 139 +++++++++++++++++++++++++++--------------------- 1 file changed, 77 insertions(+), 62 deletions(-)