Message ID | 20240415131941.51153-7-linyunsheng@huawei.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | First try to replace page_frag with page_frag_cache | expand |
On Mon, Apr 15, 2024 at 6:22 AM Yunsheng Lin <linyunsheng@huawei.com> wrote: > > When page_frag_alloc_* API doesn't need data alignment, the > ALIGN() operation is unnecessary, so change page_frag_alloc_* > API to accept align param instead of align_mask param, and do > the ALIGN()'ing in the inline helper when needed. > > Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> The vast majority of callers are using this aligned one way or another. If anything with your recent changes we should probably be making sure to align the fragsz as well as the offset since most callers were only using the alignment of the fragsz in order to get their alignment. My main concern is that this change implies that most are using an unaligned setup when it is in fact quite the opposite. > --- > include/linux/page_frag_cache.h | 20 ++++++++++++-------- > include/linux/skbuff.h | 12 ++++++------ > mm/page_frag_cache.c | 9 ++++----- > net/core/skbuff.c | 12 +++++------- > net/rxrpc/txbuf.c | 5 +++-- > 5 files changed, 30 insertions(+), 28 deletions(-) > > diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h > index 04810d8d6a7d..cc0ede0912f3 100644 > --- a/include/linux/page_frag_cache.h > +++ b/include/linux/page_frag_cache.h > @@ -25,21 +25,25 @@ struct page_frag_cache { > > void page_frag_cache_drain(struct page_frag_cache *nc); > void __page_frag_cache_drain(struct page *page, unsigned int count); > -void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, > - gfp_t gfp_mask, unsigned int align_mask); > +void *page_frag_alloc(struct page_frag_cache *nc, unsigned int fragsz, > + gfp_t gfp_mask); > + > +static inline void *__page_frag_alloc_align(struct page_frag_cache *nc, > + unsigned int fragsz, gfp_t gfp_mask, > + unsigned int align) > +{ > + nc->offset = ALIGN(nc->offset, align); > + > + return page_frag_alloc(nc, fragsz, gfp_mask); > +} > I would rather not have us breaking up the alignment into another function. It makes this much more difficult to work with. In addition you are adding offsets without actually adding to the pages which makes this seem exploitable. Basically just pass an alignment value of 32K and you are forcing a page eviction regardless. > static inline void *page_frag_alloc_align(struct page_frag_cache *nc, > unsigned int fragsz, gfp_t gfp_mask, > unsigned int align) > { > WARN_ON_ONCE(!is_power_of_2(align)); > - return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align); > -} > > -static inline void *page_frag_alloc(struct page_frag_cache *nc, > - unsigned int fragsz, gfp_t gfp_mask) > -{ > - return page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); > + return __page_frag_alloc_align(nc, fragsz, gfp_mask, align); > } > > void page_frag_free(void *addr); > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h > index f2dc1f735c79..43c704589deb 100644 > --- a/include/linux/skbuff.h > +++ b/include/linux/skbuff.h > @@ -3268,7 +3268,7 @@ static inline void skb_queue_purge(struct sk_buff_head *list) > unsigned int skb_rbtree_purge(struct rb_root *root); > void skb_errqueue_purge(struct sk_buff_head *list); > > -void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); > +void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align); > > /** > * netdev_alloc_frag - allocate a page fragment > @@ -3279,14 +3279,14 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); > */ > static inline void *netdev_alloc_frag(unsigned int fragsz) > { > - return __netdev_alloc_frag_align(fragsz, ~0u); > + return __netdev_alloc_frag_align(fragsz, 1u); > } > > static inline void *netdev_alloc_frag_align(unsigned int fragsz, > unsigned int align) > { > WARN_ON_ONCE(!is_power_of_2(align)); > - return __netdev_alloc_frag_align(fragsz, -align); > + return __netdev_alloc_frag_align(fragsz, align); > } > > struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, > @@ -3346,18 +3346,18 @@ static inline void skb_free_frag(void *addr) > page_frag_free(addr); > } > > -void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); > +void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align); > > static inline void *napi_alloc_frag(unsigned int fragsz) > { > - return __napi_alloc_frag_align(fragsz, ~0u); > + return __napi_alloc_frag_align(fragsz, 1u); > } > > static inline void *napi_alloc_frag_align(unsigned int fragsz, > unsigned int align) > { > WARN_ON_ONCE(!is_power_of_2(align)); > - return __napi_alloc_frag_align(fragsz, -align); > + return __napi_alloc_frag_align(fragsz, align); > } > > struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int length); > diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c > index dc864ee09536..b4408187e1ab 100644 > --- a/mm/page_frag_cache.c > +++ b/mm/page_frag_cache.c > @@ -61,9 +61,8 @@ void __page_frag_cache_drain(struct page *page, unsigned int count) > } > EXPORT_SYMBOL(__page_frag_cache_drain); > > -void *__page_frag_alloc_align(struct page_frag_cache *nc, > - unsigned int fragsz, gfp_t gfp_mask, > - unsigned int align_mask) > +void *page_frag_alloc(struct page_frag_cache *nc, unsigned int fragsz, > + gfp_t gfp_mask) > { > unsigned int size, offset; > struct page *page; > @@ -92,7 +91,7 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc, > size = PAGE_SIZE; > #endif > > - offset = ALIGN(nc->offset, -align_mask); > + offset = nc->offset; > if (unlikely(offset + fragsz > size)) { > page = virt_to_page(nc->va); > > @@ -129,7 +128,7 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc, > > return nc->va + offset; > } > -EXPORT_SYMBOL(__page_frag_alloc_align); > +EXPORT_SYMBOL(page_frag_alloc); > > /* > * Frees a page fragment allocated out of either a compound or order 0 page. > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > index ea052fa710d8..676e2d857f02 100644 > --- a/net/core/skbuff.c > +++ b/net/core/skbuff.c > @@ -306,18 +306,17 @@ void napi_get_frags_check(struct napi_struct *napi) > local_bh_enable(); > } > > -void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) > +void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align) > { > struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); > > fragsz = SKB_DATA_ALIGN(fragsz); > So this is a perfect example. This caller is aligning the size by SMP_CACHE_BYTES. This is the most typical case. Either this or L1_CACHE_BYTES. As such all requests should be aligned to at least that. I would prefer it if we didn't strip the alignment code out of our main allocating function. If anything, maybe we should make it more specific that the expectation is that fragsz is a multiple of the alignment. > - return __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, > - align_mask); > + return __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align); > } > EXPORT_SYMBOL(__napi_alloc_frag_align); > > -void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) > +void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align) > { > void *data; > > @@ -325,15 +324,14 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) > if (in_hardirq() || irqs_disabled()) { > struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache); > > - data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, > - align_mask); > + data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align); > } else { > struct napi_alloc_cache *nc; > > local_bh_disable(); > nc = this_cpu_ptr(&napi_alloc_cache); > data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, > - align_mask); > + align); > local_bh_enable(); > } > return data; > diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c > index e0679658d9de..eb640875bf07 100644 > --- a/net/rxrpc/txbuf.c > +++ b/net/rxrpc/txbuf.c > @@ -32,9 +32,10 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_ > hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr); > total = hoff + sizeof(*whdr) + data_size; > > + data_align = max_t(size_t, data_align, L1_CACHE_BYTES); > mutex_lock(&call->conn->tx_data_alloc_lock); > - buf = __page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp, > - ~(data_align - 1) & ~(L1_CACHE_BYTES - 1)); > + buf = page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp, > + data_align); > mutex_unlock(&call->conn->tx_data_alloc_lock); > if (!buf) { > kfree(txb); > -- > 2.33.0 >
On 2024/4/17 0:08, Alexander Duyck wrote: > On Mon, Apr 15, 2024 at 6:22 AM Yunsheng Lin <linyunsheng@huawei.com> wrote: >> >> When page_frag_alloc_* API doesn't need data alignment, the >> ALIGN() operation is unnecessary, so change page_frag_alloc_* >> API to accept align param instead of align_mask param, and do >> the ALIGN()'ing in the inline helper when needed. >> >> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> > > The vast majority of callers are using this aligned one way or > another. If anything with your recent changes we should probably be > making sure to align the fragsz as well as the offset since most > callers were only using the alignment of the fragsz in order to get > their alignment. > > My main concern is that this change implies that most are using an > unaligned setup when it is in fact quite the opposite. I think the above is depending on what we are about is 'offset unaligned' or 'fragsz unaligned'. 'offset unaligned' seems like the most case here. > >> --- >> include/linux/page_frag_cache.h | 20 ++++++++++++-------- >> include/linux/skbuff.h | 12 ++++++------ >> mm/page_frag_cache.c | 9 ++++----- >> net/core/skbuff.c | 12 +++++------- >> net/rxrpc/txbuf.c | 5 +++-- >> 5 files changed, 30 insertions(+), 28 deletions(-) >> >> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h >> index 04810d8d6a7d..cc0ede0912f3 100644 >> --- a/include/linux/page_frag_cache.h >> +++ b/include/linux/page_frag_cache.h >> @@ -25,21 +25,25 @@ struct page_frag_cache { >> >> void page_frag_cache_drain(struct page_frag_cache *nc); >> void __page_frag_cache_drain(struct page *page, unsigned int count); >> -void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, >> - gfp_t gfp_mask, unsigned int align_mask); >> +void *page_frag_alloc(struct page_frag_cache *nc, unsigned int fragsz, >> + gfp_t gfp_mask); >> + >> +static inline void *__page_frag_alloc_align(struct page_frag_cache *nc, >> + unsigned int fragsz, gfp_t gfp_mask, >> + unsigned int align) >> +{ >> + nc->offset = ALIGN(nc->offset, align); >> + >> + return page_frag_alloc(nc, fragsz, gfp_mask); >> +} >> > > I would rather not have us breaking up the alignment into another > function. It makes this much more difficult to work with. In addition > you are adding offsets without actually adding to the pages which > makes this seem exploitable. Basically just pass an alignment value of > 32K and you are forcing a page eviction regardless. Yes, as you mentioned in patch 9: The "align >= PAGE_SIZE" fix should probably go with your change that > reversed the direction. > >> static inline void *page_frag_alloc_align(struct page_frag_cache *nc, >> unsigned int fragsz, gfp_t gfp_mask, >> unsigned int align) >> { >> WARN_ON_ONCE(!is_power_of_2(align)); >> - return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align); >> -} >> >> -static inline void *page_frag_alloc(struct page_frag_cache *nc, >> - unsigned int fragsz, gfp_t gfp_mask) >> -{ >> - return page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); >> + return __page_frag_alloc_align(nc, fragsz, gfp_mask, align); >> } >> ... >> /* >> * Frees a page fragment allocated out of either a compound or order 0 page. >> diff --git a/net/core/skbuff.c b/net/core/skbuff.c >> index ea052fa710d8..676e2d857f02 100644 >> --- a/net/core/skbuff.c >> +++ b/net/core/skbuff.c >> @@ -306,18 +306,17 @@ void napi_get_frags_check(struct napi_struct *napi) >> local_bh_enable(); >> } >> >> -void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) >> +void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align) >> { >> struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); >> >> fragsz = SKB_DATA_ALIGN(fragsz); >> > > So this is a perfect example. This caller is aligning the size by > SMP_CACHE_BYTES. This is the most typical case. Either this or > L1_CACHE_BYTES. As such all requests should be aligned to at least > that. I would prefer it if we didn't strip the alignment code out of > our main allocating function. If anything, maybe we should make it > more specific that the expectation is that fragsz is a multiple of the > alignment. Let's discuss the above in patch 5.
diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h index 04810d8d6a7d..cc0ede0912f3 100644 --- a/include/linux/page_frag_cache.h +++ b/include/linux/page_frag_cache.h @@ -25,21 +25,25 @@ struct page_frag_cache { void page_frag_cache_drain(struct page_frag_cache *nc); void __page_frag_cache_drain(struct page *page, unsigned int count); -void *__page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, - gfp_t gfp_mask, unsigned int align_mask); +void *page_frag_alloc(struct page_frag_cache *nc, unsigned int fragsz, + gfp_t gfp_mask); + +static inline void *__page_frag_alloc_align(struct page_frag_cache *nc, + unsigned int fragsz, gfp_t gfp_mask, + unsigned int align) +{ + nc->offset = ALIGN(nc->offset, align); + + return page_frag_alloc(nc, fragsz, gfp_mask); +} static inline void *page_frag_alloc_align(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask, unsigned int align) { WARN_ON_ONCE(!is_power_of_2(align)); - return __page_frag_alloc_align(nc, fragsz, gfp_mask, -align); -} -static inline void *page_frag_alloc(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask) -{ - return page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u); + return __page_frag_alloc_align(nc, fragsz, gfp_mask, align); } void page_frag_free(void *addr); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f2dc1f735c79..43c704589deb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3268,7 +3268,7 @@ static inline void skb_queue_purge(struct sk_buff_head *list) unsigned int skb_rbtree_purge(struct rb_root *root); void skb_errqueue_purge(struct sk_buff_head *list); -void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); +void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align); /** * netdev_alloc_frag - allocate a page fragment @@ -3279,14 +3279,14 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); */ static inline void *netdev_alloc_frag(unsigned int fragsz) { - return __netdev_alloc_frag_align(fragsz, ~0u); + return __netdev_alloc_frag_align(fragsz, 1u); } static inline void *netdev_alloc_frag_align(unsigned int fragsz, unsigned int align) { WARN_ON_ONCE(!is_power_of_2(align)); - return __netdev_alloc_frag_align(fragsz, -align); + return __netdev_alloc_frag_align(fragsz, align); } struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, @@ -3346,18 +3346,18 @@ static inline void skb_free_frag(void *addr) page_frag_free(addr); } -void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); +void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align); static inline void *napi_alloc_frag(unsigned int fragsz) { - return __napi_alloc_frag_align(fragsz, ~0u); + return __napi_alloc_frag_align(fragsz, 1u); } static inline void *napi_alloc_frag_align(unsigned int fragsz, unsigned int align) { WARN_ON_ONCE(!is_power_of_2(align)); - return __napi_alloc_frag_align(fragsz, -align); + return __napi_alloc_frag_align(fragsz, align); } struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int length); diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c index dc864ee09536..b4408187e1ab 100644 --- a/mm/page_frag_cache.c +++ b/mm/page_frag_cache.c @@ -61,9 +61,8 @@ void __page_frag_cache_drain(struct page *page, unsigned int count) } EXPORT_SYMBOL(__page_frag_cache_drain); -void *__page_frag_alloc_align(struct page_frag_cache *nc, - unsigned int fragsz, gfp_t gfp_mask, - unsigned int align_mask) +void *page_frag_alloc(struct page_frag_cache *nc, unsigned int fragsz, + gfp_t gfp_mask) { unsigned int size, offset; struct page *page; @@ -92,7 +91,7 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc, size = PAGE_SIZE; #endif - offset = ALIGN(nc->offset, -align_mask); + offset = nc->offset; if (unlikely(offset + fragsz > size)) { page = virt_to_page(nc->va); @@ -129,7 +128,7 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc, return nc->va + offset; } -EXPORT_SYMBOL(__page_frag_alloc_align); +EXPORT_SYMBOL(page_frag_alloc); /* * Frees a page fragment allocated out of either a compound or order 0 page. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ea052fa710d8..676e2d857f02 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -306,18 +306,17 @@ void napi_get_frags_check(struct napi_struct *napi) local_bh_enable(); } -void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) +void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); fragsz = SKB_DATA_ALIGN(fragsz); - return __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, - align_mask); + return __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align); } EXPORT_SYMBOL(__napi_alloc_frag_align); -void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) +void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align) { void *data; @@ -325,15 +324,14 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) if (in_hardirq() || irqs_disabled()) { struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache); - data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, - align_mask); + data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align); } else { struct napi_alloc_cache *nc; local_bh_disable(); nc = this_cpu_ptr(&napi_alloc_cache); data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, - align_mask); + align); local_bh_enable(); } return data; diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index e0679658d9de..eb640875bf07 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -32,9 +32,10 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_ hoff = round_up(sizeof(*whdr), data_align) - sizeof(*whdr); total = hoff + sizeof(*whdr) + data_size; + data_align = max_t(size_t, data_align, L1_CACHE_BYTES); mutex_lock(&call->conn->tx_data_alloc_lock); - buf = __page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp, - ~(data_align - 1) & ~(L1_CACHE_BYTES - 1)); + buf = page_frag_alloc_align(&call->conn->tx_data_alloc, total, gfp, + data_align); mutex_unlock(&call->conn->tx_data_alloc_lock); if (!buf) { kfree(txb);
When page_frag_alloc_* API doesn't need data alignment, the ALIGN() operation is unnecessary, so change page_frag_alloc_* API to accept align param instead of align_mask param, and do the ALIGN()'ing in the inline helper when needed. Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com> --- include/linux/page_frag_cache.h | 20 ++++++++++++-------- include/linux/skbuff.h | 12 ++++++------ mm/page_frag_cache.c | 9 ++++----- net/core/skbuff.c | 12 +++++------- net/rxrpc/txbuf.c | 5 +++-- 5 files changed, 30 insertions(+), 28 deletions(-)