@@ -120,7 +120,7 @@ struct page {
* page_pool allocated pages.
*/
unsigned long pp_magic;
- struct page_pool *pp;
+ struct page_pool_item *pp_item;
unsigned long _pp_mapping_pad;
unsigned long dma_addr;
atomic_long_t pp_ref_count;
@@ -39,6 +39,7 @@
#include <net/net_debug.h>
#include <net/dropreason-core.h>
#include <net/netmem.h>
+#include <net/page_pool/types.h>
/**
* DOC: skb checksums
@@ -23,7 +23,7 @@ DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers);
struct net_iov {
unsigned long __unused_padding;
unsigned long pp_magic;
- struct page_pool *pp;
+ struct page_pool_item *pp_item;
struct dmabuf_genpool_chunk_owner *owner;
unsigned long dma_addr;
atomic_long_t pp_ref_count;
@@ -33,7 +33,7 @@ struct net_iov {
*
* struct {
* unsigned long pp_magic;
- * struct page_pool *pp;
+ * struct page_pool_item *pp_item;
* unsigned long _pp_mapping_pad;
* unsigned long dma_addr;
* atomic_long_t pp_ref_count;
@@ -49,7 +49,7 @@ struct net_iov {
static_assert(offsetof(struct page, pg) == \
offsetof(struct net_iov, iov))
NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic);
-NET_IOV_ASSERT_OFFSET(pp, pp);
+NET_IOV_ASSERT_OFFSET(pp_item, pp_item);
NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr);
NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
#undef NET_IOV_ASSERT_OFFSET
@@ -67,6 +67,11 @@ NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count);
*/
typedef unsigned long __bitwise netmem_ref;
+/* Mirror page_pool_item_block, see include/net/page_pool/types.h */
+struct netmem_item_block {
+ struct page_pool *pp;
+};
+
static inline bool netmem_is_net_iov(const netmem_ref netmem)
{
return (__force unsigned long)netmem & NET_IOV;
@@ -127,9 +132,18 @@ static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
}
+static inline struct page_pool_item *netmem_get_pp_item(netmem_ref netmem)
+{
+ return __netmem_clear_lsb(netmem)->pp_item;
+}
+
static inline struct page_pool *netmem_get_pp(netmem_ref netmem)
{
- return __netmem_clear_lsb(netmem)->pp;
+ struct page_pool_item *item = netmem_get_pp_item(netmem);
+ struct netmem_item_block *block;
+
+ block = (struct netmem_item_block *)((unsigned long)item & PAGE_MASK);
+ return block->pp;
}
static inline atomic_long_t *netmem_get_pp_ref_count_ref(netmem_ref netmem)
@@ -83,9 +83,19 @@ static inline u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats)
}
#endif
+static inline struct page_pool_item_block *
+page_pool_item_to_block(struct page_pool_item *item)
+{
+ return (struct page_pool_item_block *)((unsigned long)item & PAGE_MASK);
+}
+
static inline struct page_pool *page_pool_get_pp(struct page *page)
{
- return page->pp;
+ /* The size of item_block is always PAGE_SIZE, the address of item_block
+ * for a specific item can be calculated using 'item & PAGE_MASK', so
+ * that we can find the page_pool object it belongs to.
+ */
+ return page_pool_item_to_block(page->pp_item)->pp;
}
/**
@@ -102,6 +102,7 @@ struct page_pool_params {
* @refill: an allocation which triggered a refill of the cache
* @waive: pages obtained from the ptr ring that cannot be added to
* the cache due to a NUMA mismatch
+ * @item_fast_empty: pre-allocated item cache is empty
*/
struct page_pool_alloc_stats {
u64 fast;
@@ -110,6 +111,7 @@ struct page_pool_alloc_stats {
u64 empty;
u64 refill;
u64 waive;
+ u64 item_fast_empty;
};
/**
@@ -142,6 +144,30 @@ struct page_pool_stats {
};
#endif
+struct page_pool_item {
+ unsigned long state;
+
+ union {
+ netmem_ref pp_netmem;
+ struct llist_node lentry;
+ };
+};
+
+/* The size of item_block is always PAGE_SIZE, so that the address of item_block
+ * for a specific item can be calculated using 'item & PAGE_MASK'
+ */
+struct page_pool_item_block {
+ struct page_pool *pp;
+ struct list_head list;
+ struct page_pool_item items[];
+};
+
+/* Ensure the offset of 'pp' field for both 'page_pool_item_block' and
+ * 'netmem_item_block' are the same.
+ */
+static_assert(offsetof(struct page_pool_item_block, pp) == \
+ offsetof(struct netmem_item_block, pp));
+
/* The whole frag API block must stay within one cacheline. On 32-bit systems,
* sizeof(long) == sizeof(int), so that the block size is ``3 * sizeof(long)``.
* On 64-bit systems, the actual size is ``2 * sizeof(long) + sizeof(int)``.
@@ -161,6 +187,7 @@ struct page_pool {
int cpuid;
u32 pages_state_hold_cnt;
+ struct llist_head hold_items;
bool has_init_callback:1; /* slow::init_callback is set */
bool dma_map:1; /* Perform DMA mapping */
@@ -222,13 +249,20 @@ struct page_pool {
#endif
atomic_t pages_state_release_cnt;
+ /* Synchronizate dma unmapping operation in page_pool_return_page() with
+ * page_pool_destory() when destroy_cnt is non-zero.
+ */
+ spinlock_t item_lock;
+ struct list_head item_blocks;
+ struct llist_head release_items;
+
/* A page_pool is strictly tied to a single RX-queue being
* protected by NAPI, due to above pp_alloc_cache. This
* refcnt serves purpose is to simplify drivers error handling.
*/
refcount_t user_cnt;
- u64 destroy_cnt;
+ unsigned long destroy_cnt;
/* Slow/Control-path information follows */
struct page_pool_params_slow slow;
@@ -85,7 +85,7 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding)
niov = &owner->niovs[index];
niov->pp_magic = 0;
- niov->pp = NULL;
+ niov->pp_item = NULL;
atomic_long_set(&niov->pp_ref_count, 0);
return niov;
@@ -380,7 +380,7 @@ bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem)
if (WARN_ON_ONCE(refcount != 1))
return false;
- page_pool_clear_pp_info(netmem);
+ page_pool_clear_pp_info(pool, netmem);
net_devmem_free_dmabuf(netmem_to_net_iov(netmem));
@@ -18,9 +18,10 @@ static inline void netmem_clear_pp_magic(netmem_ref netmem)
__netmem_clear_lsb(netmem)->pp_magic = 0;
}
-static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool)
+static inline void netmem_set_pp_item(netmem_ref netmem,
+ struct page_pool_item *item)
{
- __netmem_clear_lsb(netmem)->pp = pool;
+ __netmem_clear_lsb(netmem)->pp_item = item;
}
static inline void netmem_set_dma_addr(netmem_ref netmem,
@@ -61,6 +61,7 @@ static const char pp_stats[][ETH_GSTRING_LEN] = {
"rx_pp_alloc_empty",
"rx_pp_alloc_refill",
"rx_pp_alloc_waive",
+ "rx_pp_alloc_item_fast_empty",
"rx_pp_recycle_cached",
"rx_pp_recycle_cache_full",
"rx_pp_recycle_ring",
@@ -94,6 +95,7 @@ bool page_pool_get_stats(const struct page_pool *pool,
stats->alloc_stats.empty += pool->alloc_stats.empty;
stats->alloc_stats.refill += pool->alloc_stats.refill;
stats->alloc_stats.waive += pool->alloc_stats.waive;
+ stats->alloc_stats.item_fast_empty += pool->alloc_stats.item_fast_empty;
for_each_possible_cpu(cpu) {
const struct page_pool_recycle_stats *pcpu =
@@ -139,6 +141,7 @@ u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats)
*data++ = pool_stats->alloc_stats.empty;
*data++ = pool_stats->alloc_stats.refill;
*data++ = pool_stats->alloc_stats.waive;
+ *data++ = pool_stats->alloc_stats.item_fast_empty;
*data++ = pool_stats->recycle_stats.cached;
*data++ = pool_stats->recycle_stats.cache_full;
*data++ = pool_stats->recycle_stats.ring;
@@ -321,6 +324,209 @@ static void page_pool_uninit(struct page_pool *pool)
#endif
}
+#define PAGE_POOL_ITEM_USED 0
+#define PAGE_POOL_ITEM_MAPPED 1
+
+#define ITEMS_PER_PAGE ((PAGE_SIZE - \
+ offsetof(struct page_pool_item_block, items)) / \
+ sizeof(struct page_pool_item))
+
+#if defined(CONFIG_DEBUG_NET)
+#define page_pool_item_set_used(item) \
+ __set_bit(PAGE_POOL_ITEM_USED, &(item)->state)
+
+#define page_pool_item_clear_used(item) \
+ __clear_bit(PAGE_POOL_ITEM_USED, &(item)->state)
+
+#define page_pool_item_is_used(item) \
+ test_bit(PAGE_POOL_ITEM_USED, &(item)->state)
+#else
+#define page_pool_item_set_used(item)
+#define page_pool_item_clear_used(item)
+#define page_pool_item_is_used(item) false
+#endif
+
+#define page_pool_item_set_mapped(item) \
+ __set_bit(PAGE_POOL_ITEM_MAPPED, &(item)->state)
+
+/* Only clear_mapped and is_mapped need to be atomic as they can be
+ * called concurrently.
+ */
+#define page_pool_item_clear_mapped(item) \
+ clear_bit(PAGE_POOL_ITEM_MAPPED, &(item)->state)
+
+#define page_pool_item_is_mapped(item) \
+ test_bit(PAGE_POOL_ITEM_MAPPED, &(item)->state)
+
+static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
+ netmem_ref netmem)
+{
+ struct page_pool_item *item;
+ bool destroyed;
+ dma_addr_t dma;
+
+ if (!pool->dma_map)
+ /* Always account for inflight pages, even if we didn't
+ * map them
+ */
+ return;
+
+ /* Paired with the rcu synchronization in page_pool_destroy() to ensure
+ * synchronize dma unmapping operation between page_pool_destroy() and
+ * page being released to page_pool from networking by using a spinlock
+ * when pool->destroy_cnt is non-zero.
+ */
+ rcu_read_lock();
+ destroyed = !!READ_ONCE(pool->destroy_cnt);
+ item = netmem_get_pp_item(netmem);
+
+ /* To catch the case of item state not setting up correctly as dma
+ * unmapping is always needed when page_pool_destory() is not called
+ * yet.
+ */
+ DEBUG_NET_WARN_ON_ONCE(!destroyed &&
+ !page_pool_item_is_mapped(item));
+ if (unlikely(destroyed)) {
+ spin_lock_bh(&pool->item_lock);
+
+ if (!page_pool_item_is_mapped(item))
+ goto out_unlock;
+ }
+
+ dma = page_pool_get_dma_addr_netmem(netmem);
+
+ /* When page is unmapped, it cannot be returned to our pool */
+ dma_unmap_page_attrs(pool->p.dev, dma,
+ PAGE_SIZE << pool->p.order, pool->p.dma_dir,
+ DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
+ page_pool_set_dma_addr_netmem(netmem, 0);
+ page_pool_item_clear_mapped(item);
+
+out_unlock:
+ if (unlikely(destroyed))
+ spin_unlock_bh(&pool->item_lock);
+
+ rcu_read_unlock();
+}
+
+static void page_pool_item_uninit(struct page_pool *pool)
+{
+ while (!list_empty(&pool->item_blocks)) {
+ struct page_pool_item_block *block;
+
+ block = list_first_entry(&pool->item_blocks,
+ struct page_pool_item_block,
+ list);
+ list_del(&block->list);
+ put_page(virt_to_page(block));
+ }
+}
+
+static int page_pool_item_init(struct page_pool *pool)
+{
+#define PAGE_POOL_MIN_INFLIGHT_ITEMS 512
+ struct page_pool_item_block *block;
+ int item_cnt;
+
+ INIT_LIST_HEAD(&pool->item_blocks);
+ spin_lock_init(&pool->item_lock);
+ init_llist_head(&pool->hold_items);
+ init_llist_head(&pool->release_items);
+
+ item_cnt = pool->p.pool_size * 2 + PP_ALLOC_CACHE_SIZE +
+ PAGE_POOL_MIN_INFLIGHT_ITEMS;
+ for (; item_cnt > 0; item_cnt -= ITEMS_PER_PAGE) {
+ struct page *page;
+ unsigned int i;
+
+ page = alloc_pages_node(pool->p.nid, GFP_KERNEL | __GFP_ZERO,
+ 0);
+ if (!page) {
+ page_pool_item_uninit(pool);
+ return -ENOMEM;
+ }
+
+ block = page_address(page);
+ block->pp = pool;
+ list_add(&block->list, &pool->item_blocks);
+
+ for (i = 0; i < ITEMS_PER_PAGE; i++)
+ __llist_add(&block->items[i].lentry, &pool->hold_items);
+ }
+
+ return 0;
+}
+
+static void page_pool_item_unmap(struct page_pool *pool)
+{
+ struct page_pool_item_block *block;
+
+ if (!pool->dma_map || pool->mp_priv)
+ return;
+
+ list_for_each_entry(block, &pool->item_blocks, list) {
+ struct page_pool_item *items = block->items;
+ int i;
+
+ for (i = 0; i < ITEMS_PER_PAGE; i++) {
+ struct page_pool_item *item = &items[i];
+
+ if (!page_pool_item_is_mapped(item))
+ continue;
+
+ __page_pool_release_page_dma(pool, item->pp_netmem);
+ }
+ }
+}
+
+static struct page_pool_item *page_pool_fast_item_alloc(struct page_pool *pool)
+{
+ struct llist_node *first = pool->hold_items.first;
+
+ if (unlikely(!first)) {
+ first = llist_del_all(&pool->release_items);
+
+ if (unlikely(!first)) {
+ alloc_stat_inc(pool, item_fast_empty);
+ return NULL;
+ }
+ }
+
+ pool->hold_items.first = first->next;
+ return llist_entry(first, struct page_pool_item, lentry);
+}
+
+static bool page_pool_set_item_info(struct page_pool *pool, netmem_ref netmem)
+{
+ struct page_pool_item *item = page_pool_fast_item_alloc(pool);
+
+ if (likely(item)) {
+ item->pp_netmem = netmem;
+ page_pool_item_set_used(item);
+ netmem_set_pp_item(netmem, item);
+ }
+
+ return !!item;
+}
+
+static void page_pool_fast_item_free(struct page_pool *pool,
+ struct page_pool_item *item)
+{
+ llist_add(&item->lentry, &pool->release_items);
+}
+
+static void page_pool_clear_item_info(struct page_pool *pool, netmem_ref netmem)
+{
+ struct page_pool_item *item = netmem_get_pp_item(netmem);
+
+ DEBUG_NET_WARN_ON_ONCE(item->pp_netmem != netmem);
+ DEBUG_NET_WARN_ON_ONCE(page_pool_item_is_mapped(item));
+ DEBUG_NET_WARN_ON_ONCE(!page_pool_item_is_used(item));
+ page_pool_item_clear_used(item);
+ netmem_set_pp_item(netmem, NULL);
+ page_pool_fast_item_free(pool, item);
+}
+
/**
* page_pool_create_percpu() - create a page pool for a given cpu.
* @params: parameters, see struct page_pool_params
@@ -340,12 +546,18 @@ page_pool_create_percpu(const struct page_pool_params *params, int cpuid)
if (err < 0)
goto err_free;
- err = page_pool_list(pool);
+ err = page_pool_item_init(pool);
if (err)
goto err_uninit;
+ err = page_pool_list(pool);
+ if (err)
+ goto err_item_uninit;
+
return pool;
+err_item_uninit:
+ page_pool_item_uninit(pool);
err_uninit:
page_pool_uninit(pool);
err_free:
@@ -460,6 +672,7 @@ page_pool_dma_sync_for_device(const struct page_pool *pool,
static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem)
{
+ struct page_pool_item *item;
dma_addr_t dma;
/* Setup DMA mapping: use 'struct page' area for storing DMA-addr
@@ -477,6 +690,9 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem)
if (page_pool_set_dma_addr_netmem(netmem, dma))
goto unmap_failed;
+ item = netmem_get_pp_item(netmem);
+ DEBUG_NET_WARN_ON_ONCE(page_pool_item_is_mapped(item));
+ page_pool_item_set_mapped(item);
page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len);
return true;
@@ -499,19 +715,24 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
if (unlikely(!page))
return NULL;
- if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page)))) {
- put_page(page);
- return NULL;
- }
+ if (unlikely(!page_pool_set_pp_info(pool, page_to_netmem(page))))
+ goto err_alloc;
+
+ if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page))))
+ goto err_set_info;
alloc_stat_inc(pool, slow_high_order);
- page_pool_set_pp_info(pool, page_to_netmem(page));
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
trace_page_pool_state_hold(pool, page_to_netmem(page),
pool->pages_state_hold_cnt);
return page;
+err_set_info:
+ page_pool_clear_pp_info(pool, page_to_netmem(page));
+err_alloc:
+ put_page(page);
+ return NULL;
}
/* slow path */
@@ -546,12 +767,18 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool,
*/
for (i = 0; i < nr_pages; i++) {
netmem = pool->alloc.cache[i];
+
+ if (unlikely(!page_pool_set_pp_info(pool, netmem))) {
+ put_page(netmem_to_page(netmem));
+ continue;
+ }
+
if (dma_map && unlikely(!page_pool_dma_map(pool, netmem))) {
+ page_pool_clear_pp_info(pool, netmem);
put_page(netmem_to_page(netmem));
continue;
}
- page_pool_set_pp_info(pool, netmem);
pool->alloc.cache[pool->alloc.count++] = netmem;
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
@@ -623,9 +850,11 @@ s32 page_pool_inflight(const struct page_pool *pool, bool strict)
return inflight;
}
-void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
+bool page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
{
- netmem_set_pp(netmem, pool);
+ if (unlikely(!page_pool_set_item_info(pool, netmem)))
+ return false;
+
netmem_or_pp_magic(netmem, PP_SIGNATURE);
/* Ensuring all pages have been split into one fragment initially:
@@ -637,32 +866,14 @@ void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
page_pool_fragment_netmem(netmem, 1);
if (pool->has_init_callback)
pool->slow.init_callback(netmem, pool->slow.init_arg);
-}
-void page_pool_clear_pp_info(netmem_ref netmem)
-{
- netmem_clear_pp_magic(netmem);
- netmem_set_pp(netmem, NULL);
+ return true;
}
-static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
- netmem_ref netmem)
+void page_pool_clear_pp_info(struct page_pool *pool, netmem_ref netmem)
{
- dma_addr_t dma;
-
- if (!pool->dma_map)
- /* Always account for inflight pages, even if we didn't
- * map them
- */
- return;
-
- dma = page_pool_get_dma_addr_netmem(netmem);
-
- /* When page is unmapped, it cannot be returned to our pool */
- dma_unmap_page_attrs(pool->p.dev, dma,
- PAGE_SIZE << pool->p.order, pool->p.dma_dir,
- DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
- page_pool_set_dma_addr_netmem(netmem, 0);
+ netmem_clear_pp_magic(netmem);
+ page_pool_clear_item_info(pool, netmem);
}
/* Disconnects a page (from a page_pool). API users can have a need
@@ -688,7 +899,7 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem)
trace_page_pool_state_release(pool, netmem, count);
if (put) {
- page_pool_clear_pp_info(netmem);
+ page_pool_clear_pp_info(pool, netmem);
put_page(netmem_to_page(netmem));
}
/* An optimization would be to call __free_pages(page, pool->p.order)
@@ -1012,6 +1223,7 @@ static void __page_pool_destroy(struct page_pool *pool)
if (pool->disconnect)
pool->disconnect(pool);
+ page_pool_item_uninit(pool);
page_pool_unlist(pool);
page_pool_uninit(pool);
@@ -1043,7 +1255,7 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
static void page_pool_scrub(struct page_pool *pool)
{
page_pool_empty_alloc_cache_once(pool);
- pool->destroy_cnt++;
+ WRITE_ONCE(pool->destroy_cnt, pool->destroy_cnt + 1);
/* No more consumers should exist, but producers could still
* be in-flight.
@@ -1137,6 +1349,8 @@ void page_pool_destroy(struct page_pool *pool)
*/
synchronize_net();
+ page_pool_item_unmap(pool);
+
page_pool_detached(pool);
pool->defer_start = jiffies;
pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
@@ -36,16 +36,18 @@ static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
}
#if defined(CONFIG_PAGE_POOL)
-void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem);
-void page_pool_clear_pp_info(netmem_ref netmem);
+bool page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem);
+void page_pool_clear_pp_info(struct page_pool *pool, netmem_ref netmem);
int page_pool_check_memory_provider(struct net_device *dev,
struct netdev_rx_queue *rxq);
#else
-static inline void page_pool_set_pp_info(struct page_pool *pool,
+static inline bool page_pool_set_pp_info(struct page_pool *pool,
netmem_ref netmem)
{
+ return true;
}
-static inline void page_pool_clear_pp_info(netmem_ref netmem)
+static inline void page_pool_clear_pp_info(struct page_pool *pool,
+ netmem_ref netmem)
{
}
static inline int page_pool_check_memory_provider(struct net_device *dev,