[RFC,01/12] net: hack together some page sharing

Message ID	20230707183935.997267-2-kuba@kernel.org (mailing list archive)
State	RFC
Delegated to:	Netdev Maintainers
Headers	show Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4BF8310952 for <netdev@vger.kernel.org>; Fri, 7 Jul 2023 18:39:44 +0000 (UTC) From: Jakub Kicinski <kuba@kernel.org> To: netdev@vger.kernel.org Cc: almasrymina@google.com, hawk@kernel.org, ilias.apalodimas@linaro.org, edumazet@google.com, dsahern@gmail.com, michael.chan@broadcom.com, willemb@google.com, Jakub Kicinski <kuba@kernel.org> Subject: [RFC 01/12] net: hack together some page sharing Date: Fri, 7 Jul 2023 11:39:24 -0700 Message-ID: <20230707183935.997267-2-kuba@kernel.org> In-Reply-To: <20230707183935.997267-1-kuba@kernel.org> References: <20230707183935.997267-1-kuba@kernel.org> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	net: huge page backed page_pool \| expand [RFC,00/12] net: huge page backed page_pool [RFC,01/12] net: hack together some page sharing [RFC,02/12] net: create a 1G-huge-page-backed allocator [RFC,03/12] net: page_pool: hide page_pool_release_page() [RFC,04/12] net: page_pool: merge page_pool_release_page() with page_pool_return_page() [RFC,05/12] net: page_pool: factor out releasing DMA from releasing the page [RFC,06/12] net: page_pool: create hooks for custom page providers [RFC,07/12] net: page_pool: add huge page backed memory providers [RFC,08/12] eth: bnxt: let the page pool manage the DMA mapping [RFC,09/12] eth: bnxt: use the page pool for data pages [RFC,10/12] eth: bnxt: make sure we make for recycle skbs before freeing them [RFC,11/12] eth: bnxt: wrap coherent allocations into helpers [RFC,12/12] eth: bnxt: hack in the use of MEP

Context	Check	Description
netdev/series_format	success	Posting correctly formatted
netdev/tree_selection	success	Guessed tree name to be net-next
netdev/fixes_present	success	Fixes tag not required for -next series
netdev/header_inline	success	No static functions without inline keyword in header files
netdev/build_32bit	fail	Errors and warnings before: 1343 this patch: 1344
netdev/cc_maintainers	warning	3 maintainers not CCed: pabeni@redhat.com davem@davemloft.net imagedong@tencent.com
netdev/build_clang	success	Errors and warnings before: 1364 this patch: 1364
netdev/verify_signedoff	success	Signed-off-by tag matches author and committer
netdev/deprecated_api	success	None detected
netdev/check_selftest	success	No net selftest shell script
netdev/verify_fixes	success	No Fixes tag
netdev/build_allmodconfig_warn	fail	Errors and warnings before: 1366 this patch: 1367
netdev/checkpatch	warning	WARNING: Missing or malformed SPDX-License-Identifier tag in line 1 WARNING: Using vsprintf specifier '%px' potentially exposes the kernel memory layout, if you don't really need the address please consider using '%p'. WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: suspect code indent for conditional statements (16, 27)
netdev/kdoc	success	Errors and warnings before: 0 this patch: 0
netdev/source_inline	success	Was 0 now: 0

diff --git a/include/net/dcalloc.h b/include/net/dcalloc.h new file mode 100644 index 000000000000..a85c59d7f844 --- /dev/null +++ b/include/net/dcalloc.h @@ -0,0 +1,18 @@ +#ifndef __NET_DCALLOC_H +#define __NET_DCALLOC_H + +#include <linux/types.h> + +struct device; + +struct dma_cocoa; + +struct dma_cocoa *dma_cocoa_create(struct device *dev, gfp_t gfp); +void dma_cocoa_destroy(struct dma_cocoa *cocoa); + +void *dma_cocoa_alloc(struct dma_cocoa *cocoa, unsigned long size, + dma_addr_t *dma, gfp_t gfp); +void dma_cocoa_free(struct dma_cocoa *cocoa, unsigned long size, void *addr, + dma_addr_t dma); + +#endif diff --git a/net/core/Makefile b/net/core/Makefile index 731db2eaa610..3a98ad5d2b49 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -13,7 +13,7 @@ obj-y += dev.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ fib_notifier.o xdp.o flow_offload.o gro.o \ - netdev-genl.o netdev-genl-gen.o gso.o + netdev-genl.o netdev-genl-gen.o gso.o dcalloc.o obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o diff --git a/net/core/dcalloc.c b/net/core/dcalloc.c new file mode 100644 index 000000000000..af9029018353 --- /dev/null +++ b/net/core/dcalloc.c @@ -0,0 +1,390 @@ +#include "dcalloc.h" + +#include <linux/dma-mapping.h> +#include <linux/sizes.h> +#include <linux/slab.h> + +static bool dma_sal_in_use(struct dma_slow_allocator *sal) +{ + return refcount_read(&sal->user_cnt); +} + +int dma_slow_huge_init(struct dma_slow_huge *shu, void *addr, + unsigned int size, dma_addr_t dma, gfp_t gfp) +{ + struct dma_slow_buddy *bud; + + bud = kzalloc(sizeof(*bud), gfp); + if (!bud) + return -ENOMEM; + + shu->addr = addr; + shu->size = size; + shu->dma = dma; + + INIT_LIST_HEAD(&shu->buddy_list); + + bud->size = size; + bud->free = true; + list_add(&bud->list, &shu->buddy_list); + + return 0; +} + +static struct dma_slow_buddy * +dma_slow_bud_split(struct dma_slow_buddy *bud, gfp_t gfp) +{ + struct dma_slow_buddy *right; + + right = kzalloc(sizeof(*bud), gfp); + if (!right) + return NULL; + + bud->size /= 2; + + right->offset = bud->offset + bud->size; + right->size = bud->size; + right->free = true; + + list_add(&right->list, &bud->list); + + return bud; +} + +static bool dma_slow_bud_coalesce(struct dma_slow_huge *shu) +{ + struct dma_slow_buddy *bud, *left = NULL, *right = NULL; + + list_for_each_entry(bud, &shu->buddy_list, list) { + if (left && bud && + left->free && bud->free && + left->size == bud->size && + (left->offset & bud->offset) == left->offset) { + right = bud; + break; + } + left = bud; + } + + if (!right) + return false; + + left->size *= 2; + list_del(&right->list); + kfree(right); + return true; +} + +static void * +__dma_sal_alloc_buddy(struct dma_slow_allocator *sal, struct dma_slow_huge *shu, + unsigned int size, dma_addr_t *dma, gfp_t gfp) +{ + struct dma_slow_buddy *small_fit = NULL; + struct dma_slow_buddy *bud; + + if (shu->size < size) + return NULL; + + list_for_each_entry(bud, &shu->buddy_list, list) { + if (!bud->free || bud->size < size) + continue; + + if (!small_fit || small_fit->size > bud->size) + small_fit = bud; + if (bud->size == size) + break; + } + if (!small_fit) + return NULL; + bud = small_fit; + + while (bud->size >= size * 2) { + bud = dma_slow_bud_split(bud, gfp); + if (!bud) + return NULL; + } + + bud->free = false; + *dma = shu->dma + bud->offset; + return shu->addr + (bud->offset >> sal->ops->ptr_shf); +} + +static void * +dma_sal_alloc_buddy(struct dma_slow_allocator *sal, unsigned int size, + dma_addr_t *dma, gfp_t gfp) +{ + struct dma_slow_huge *shu; + void *addr; + + list_for_each_entry(shu, &sal->huge, huge) { + addr = __dma_sal_alloc_buddy(sal, shu, size, dma, gfp); + if (addr) + return addr; + } + + if (!sal->ops->alloc_huge) + return NULL; + + shu = kzalloc(sizeof(*shu), gfp); + if (!shu) + return NULL; + if (sal->ops->alloc_huge(sal, shu, size, gfp)) { + kfree(shu); + return NULL; + } + list_add(&shu->huge, &sal->huge); + + return __dma_sal_alloc_buddy(sal, shu, size, dma, gfp); +} + +static bool +__dma_sal_free_buddy(struct dma_slow_allocator *sal, struct dma_slow_huge *shu, + void *addr, unsigned int size, dma_addr_t dma) +{ + struct dma_slow_buddy *bud; + dma_addr_t exp_dma; + void *exp_addr; + + list_for_each_entry(bud, &shu->buddy_list, list) { + exp_dma = shu->dma + bud->offset; + exp_addr = shu->addr + (bud->offset >> sal->ops->ptr_shf); + + if (exp_addr != addr) + continue; + + if (exp_dma != dma || bud->size != size) + pr_warn("mep param mismatch: %u %u, %lu %lu\n", + bud->size, size, (ulong)exp_dma, (ulong)dma); + if (bud->free) + pr_warn("double free: %d %lu\n", size, (ulong)dma); + bud->free = true; + return true; + } + + return false; +} + +static void +dma_slow_maybe_free_huge(struct dma_slow_allocator *sal, + struct dma_slow_huge *shu) +{ + struct dma_slow_buddy *bud; + + bud = list_first_entry(&shu->buddy_list, typeof(*bud), list); + if (!bud->free || bud->size != shu->size) + return; + + if (!sal->ops->alloc_huge) + return; + + kfree(bud); + + sal->ops->free_huge(sal, shu); + list_del(&shu->huge); + kfree(shu); +} + +static bool +dma_sal_free_buddy(struct dma_slow_allocator *sal, void *addr, + unsigned int order, dma_addr_t dma) +{ + struct dma_slow_huge *shu; + bool freed = false; + + list_for_each_entry(shu, &sal->huge, huge) { + freed = __dma_sal_free_buddy(sal, shu, addr, order, dma); + if (freed) + break; + } + if (freed) { + while (dma_slow_bud_coalesce(shu)) + /* I know, it's not efficient. + * But all of SAL is on the config path. + */; + dma_slow_maybe_free_huge(sal, shu); + } + return freed; +} + +static void * +dma_sal_alloc_fb(struct dma_slow_allocator *sal, unsigned int size, + dma_addr_t *dma, gfp_t gfp) +{ + struct dma_slow_fall *fb; + + fb = kzalloc(sizeof(*fb), gfp); + if (!fb) + return NULL; + fb->size = size; + + if (sal->ops->alloc_fall(sal, fb, size, gfp)) { + kfree(fb); + return NULL; + } + list_add(&fb->fb, &sal->fallback); + + *dma = fb->dma; + return fb->addr; +} + +static bool dma_sal_free_fb(struct dma_slow_allocator *sal, void *addr, + unsigned int size, dma_addr_t dma) +{ + struct dma_slow_fall *fb, *pos; + + fb = NULL; + list_for_each_entry(pos, &sal->fallback, fb) + if (pos->addr == addr) { + fb = pos; + break; + } + + if (!fb) { + pr_warn("free: address %px not found\n", addr); + return false; + } + + if (fb->size != size || fb->dma != dma) + pr_warn("free: param mismatch: %u %u, %lu %lu\n", + fb->size, size, (ulong)fb->dma, (ulong)dma); + + list_del(&fb->fb); + sal->ops->free_fall(sal, fb); + kfree(fb); + return true; +} + +void *dma_sal_alloc(struct dma_slow_allocator *sal, unsigned int size, + dma_addr_t *dma, gfp_t gfp) +{ + void *ret; + + ret = dma_sal_alloc_buddy(sal, size, dma, gfp); + if (!ret) + ret = dma_sal_alloc_fb(sal, size, dma, gfp); + if (!ret) + return NULL; + + dma_slow_get(sal); + return ret; +} + +void dma_sal_free(struct dma_slow_allocator *sal, void *addr, + unsigned int size, dma_addr_t dma) +{ + if (!dma_sal_free_buddy(sal, addr, size, dma) && + !dma_sal_free_fb(sal, addr, size, dma)) + return; + + dma_slow_put(sal); +} + +void dma_sal_init(struct dma_slow_allocator *sal, + const struct dma_slow_allocator_ops *ops, + struct device *dev) +{ + sal->ops = ops; + sal->dev = dev; + + INIT_LIST_HEAD(&sal->huge); + INIT_LIST_HEAD(&sal->fallback); + + refcount_set(&sal->user_cnt, 1); +} + +/***************************** + *** DMA COCOA allocator *** + *****************************/ +static int +dma_cocoa_alloc_huge(struct dma_slow_allocator *sal, struct dma_slow_huge *shu, + unsigned int size, gfp_t gfp) +{ + if (size >= SZ_2M) + return -ENOMEM; + + shu->addr = dma_alloc_coherent(sal->dev, SZ_2M, &shu->dma, gfp); + if (!shu->addr) + return -ENOMEM; + + if (dma_slow_huge_init(shu, shu->addr, SZ_2M, shu->dma, gfp)) + goto err_free_dma; + + return 0; + +err_free_dma: + dma_free_coherent(sal->dev, SZ_2M, shu->addr, shu->dma); + return -ENOMEM; +} + +static void +dma_cocoa_free_huge(struct dma_slow_allocator *sal, struct dma_slow_huge *shu) +{ + dma_free_coherent(sal->dev, SZ_2M, shu->addr, shu->dma); +} + +static int +dma_cocoa_alloc_fall(struct dma_slow_allocator *sal, struct dma_slow_fall *fb, + unsigned int size, gfp_t gfp) +{ + fb->addr = dma_alloc_coherent(sal->dev, size, &fb->dma, gfp); + if (!fb->addr) + return -ENOMEM; + return 0; +} + +static void +dma_cocoa_free_fall(struct dma_slow_allocator *sal, struct dma_slow_fall *fb) +{ + dma_free_coherent(sal->dev, fb->size, fb->addr, fb->dma); +} + +struct dma_slow_allocator_ops dma_cocoa_ops = { + .alloc_huge = dma_cocoa_alloc_huge, + .free_huge = dma_cocoa_free_huge, + .alloc_fall = dma_cocoa_alloc_fall, + .free_fall = dma_cocoa_free_fall, +}; + +struct dma_cocoa { + struct dma_slow_allocator sal; +}; + +struct dma_cocoa *dma_cocoa_create(struct device *dev, gfp_t gfp) +{ + struct dma_cocoa *cocoa; + + cocoa = kzalloc(sizeof(*cocoa), gfp); + if (!cocoa) + return NULL; + + dma_sal_init(&cocoa->sal, &dma_cocoa_ops, dev); + + return cocoa; +} + +void dma_cocoa_destroy(struct dma_cocoa *cocoa) +{ + dma_slow_put(&cocoa->sal); + WARN_ON(dma_sal_in_use(&cocoa->sal)); + kfree(cocoa); +} + +void *dma_cocoa_alloc(struct dma_cocoa *cocoa, unsigned long size, + dma_addr_t *dma, gfp_t gfp) +{ + void *addr; + + size = roundup_pow_of_two(size); + addr = dma_sal_alloc(&cocoa->sal, size, dma, gfp); + if (!addr) + return NULL; + memset(addr, 0, size); + return addr; +} + +void dma_cocoa_free(struct dma_cocoa *cocoa, unsigned long size, void *addr, + dma_addr_t dma) +{ + size = roundup_pow_of_two(size); + return dma_sal_free(&cocoa->sal, addr, size, dma); +} diff --git a/net/core/dcalloc.h b/net/core/dcalloc.h new file mode 100644 index 000000000000..c7e75ef0cb81 --- /dev/null +++ b/net/core/dcalloc.h @@ -0,0 +1,93 @@ +#ifndef __DCALLOC_H +#define __DCALLOC_H + +#include <linux/dma-mapping.h> +#include <net/dcalloc.h> + +struct device; + +/* struct dma_slow_huge - AKA @shu, large block which will get chopped up */ +struct dma_slow_huge { + void *addr; + unsigned int size; + dma_addr_t dma; + + struct list_head huge; + struct list_head buddy_list; /* struct dma_slow_buddy */ +}; + +/* Single allocation piece */ +struct dma_slow_buddy { + unsigned int offset; + unsigned int size; + + bool free; + + struct list_head list; +}; + +/* struct dma_slow_fall - AKA @fb, fallback when huge can't be allocated */ +struct dma_slow_fall { + void *addr; + unsigned int size; + dma_addr_t dma; + + struct list_head fb; +}; + +/* struct dma_slow_allocator - AKA @sal, per device allocator */ +struct dma_slow_allocator { + const struct dma_slow_allocator_ops *ops; + struct device *dev; + + unsigned int ptr_shf; + refcount_t user_cnt; + + struct list_head huge; /* struct dma_slow_huge */ + struct list_head fallback; /* struct dma_slow_fall */ +}; + +struct dma_slow_allocator_ops { + u8 ptr_shf; + + int (*alloc_huge)(struct dma_slow_allocator *sal, + struct dma_slow_huge *shu, + unsigned int size, gfp_t gfp); + void (*free_huge)(struct dma_slow_allocator *sal, + struct dma_slow_huge *fb); + int (*alloc_fall)(struct dma_slow_allocator *sal, + struct dma_slow_fall *fb, + unsigned int size, gfp_t gfp); + void (*free_fall)(struct dma_slow_allocator *sal, + struct dma_slow_fall *fb); + + void (*release)(struct dma_slow_allocator *sal); +}; + +int dma_slow_huge_init(struct dma_slow_huge *shu, void *addr, + unsigned int size, dma_addr_t dma, gfp_t gfp); + +void dma_sal_init(struct dma_slow_allocator *sal, + const struct dma_slow_allocator_ops *ops, + struct device *dev); + +void *dma_sal_alloc(struct dma_slow_allocator *sal, unsigned int size, + dma_addr_t *dma, gfp_t gfp); +void dma_sal_free(struct dma_slow_allocator *sal, void *addr, + unsigned int size, dma_addr_t dma); + +static inline void dma_slow_get(struct dma_slow_allocator *sal) +{ + refcount_inc(&sal->user_cnt); +} + +static inline void dma_slow_put(struct dma_slow_allocator *sal) +{ + if (!refcount_dec_and_test(&sal->user_cnt)) + return; + + if (sal->ops->release) + sal->ops->release(sal); +} + +#endif

[RFC,01/12] net: hack together some page sharing

Checks

Commit Message

Patch