diff mbox series

[1/2,net-next] skbuff: move netlink_large_alloc_large_skb() to skbuff.c

Message ID 20231102062836.19074-1-lirongqing@baidu.com (mailing list archive)
State Deferred
Delegated to: Netdev Maintainers
Headers show
Series [1/2,net-next] skbuff: move netlink_large_alloc_large_skb() to skbuff.c | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/apply fail Patch does not apply to net-next

Commit Message

Li,Rongqing Nov. 2, 2023, 6:28 a.m. UTC
move netlink_alloc_large_skb and netlink_skb_destructor to skbuff.c
and rename them more generic, so they can be used elsewhere large
non-contiguous physical memory is needed

Signed-off-by: Li RongQing <lirongqing@baidu.com>
---
 include/linux/skbuff.h   |  3 +++
 net/core/skbuff.c        | 40 ++++++++++++++++++++++++++++++++++++++++
 net/netlink/af_netlink.c | 41 ++---------------------------------------
 3 files changed, 45 insertions(+), 39 deletions(-)

Comments

Yunsheng Lin Nov. 2, 2023, 11:01 a.m. UTC | #1
On 2023/11/2 14:28, Li RongQing wrote:
> move netlink_alloc_large_skb and netlink_skb_destructor to skbuff.c
> and rename them more generic, so they can be used elsewhere large
> non-contiguous physical memory is needed
> 
> Signed-off-by: Li RongQing <lirongqing@baidu.com>
> ---
>  include/linux/skbuff.h   |  3 +++
>  net/core/skbuff.c        | 40 ++++++++++++++++++++++++++++++++++++++++
>  net/netlink/af_netlink.c | 41 ++---------------------------------------
>  3 files changed, 45 insertions(+), 39 deletions(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 4174c4b..774a401 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -5063,5 +5063,8 @@ static inline void skb_mark_for_recycle(struct sk_buff *skb)
>  ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
>  			     ssize_t maxsize, gfp_t gfp);
>  
> +
> +void large_skb_destructor(struct sk_buff *skb);
> +struct sk_buff *alloc_large_skb(unsigned int size, int broadcast);
>  #endif	/* __KERNEL__ */
>  #endif	/* _LINUX_SKBUFF_H */
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 4570705..20ffcd5 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -6917,3 +6917,43 @@ ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
>  	return spliced ?: ret;
>  }
>  EXPORT_SYMBOL(skb_splice_from_iter);
> +
> +void large_skb_destructor(struct sk_buff *skb)
> +{
> +	if (is_vmalloc_addr(skb->head)) {
> +		if (!skb->cloned ||
> +		    !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
> +			vfree(skb->head);
> +
> +		skb->head = NULL;

There seems to be an assumption that skb returned from
netlink_alloc_large_skb() is not expecting the frag page
for shinfo->frags*, as the above NULL setting will bypass
most of the handling in skb_release_data(),then how can we
ensure that the user is not breaking the assumption if we
make it more generic?


> +	}
> +	if (skb->sk)
> +		sock_rfree(skb);
> +}
> +EXPORT_SYMBOL(large_skb_destructor);
> +
Li,Rongqing Nov. 2, 2023, 12:09 p.m. UTC | #2
> -----Original Message-----
> From: Yunsheng Lin <linyunsheng@huawei.com>
> Sent: Thursday, November 2, 2023 7:02 PM
> To: Li,Rongqing <lirongqing@baidu.com>; netdev@vger.kernel.org
> Subject: Re: [PATCH 1/2][net-next] skbuff: move netlink_large_alloc_large_skb()
> to skbuff.c
> 
> On 2023/11/2 14:28, Li RongQing wrote:
> > move netlink_alloc_large_skb and netlink_skb_destructor to skbuff.c
> > and rename them more generic, so they can be used elsewhere large
> > non-contiguous physical memory is needed
> >
> > Signed-off-by: Li RongQing <lirongqing@baidu.com>
> > ---
> >  include/linux/skbuff.h   |  3 +++
> >  net/core/skbuff.c        | 40
> ++++++++++++++++++++++++++++++++++++++++
> >  net/netlink/af_netlink.c | 41
> > ++---------------------------------------
> >  3 files changed, 45 insertions(+), 39 deletions(-)
> >
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index
> > 4174c4b..774a401 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -5063,5 +5063,8 @@ static inline void skb_mark_for_recycle(struct
> > sk_buff *skb)  ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter
> *iter,
> >  			     ssize_t maxsize, gfp_t gfp);
> >
> > +
> > +void large_skb_destructor(struct sk_buff *skb); struct sk_buff
> > +*alloc_large_skb(unsigned int size, int broadcast);
> >  #endif	/* __KERNEL__ */
> >  #endif	/* _LINUX_SKBUFF_H */
> > diff --git a/net/core/skbuff.c b/net/core/skbuff.c index
> > 4570705..20ffcd5 100644
> > --- a/net/core/skbuff.c
> > +++ b/net/core/skbuff.c
> > @@ -6917,3 +6917,43 @@ ssize_t skb_splice_from_iter(struct sk_buff *skb,
> struct iov_iter *iter,
> >  	return spliced ?: ret;
> >  }
> >  EXPORT_SYMBOL(skb_splice_from_iter);
> > +
> > +void large_skb_destructor(struct sk_buff *skb) {
> > +	if (is_vmalloc_addr(skb->head)) {
> > +		if (!skb->cloned ||
> > +		    !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
> > +			vfree(skb->head);
> > +
> > +		skb->head = NULL;
> 
> There seems to be an assumption that skb returned from
> netlink_alloc_large_skb() is not expecting the frag page for shinfo->frags*, as the
> above NULL setting will bypass most of the handling in skb_release_data(),then
> how can we ensure that the user is not breaking the assumption if we make it
> more generic?
> 

How about to add WARN_ON(skb_shinfo(skb)-> nr_frags) to find this condition


-Li RongQing
>
Yunsheng Lin Nov. 3, 2023, 3:50 a.m. UTC | #3
On 2023/11/2 20:09, Li,Rongqing wrote:
>> -----Original Message-----
>> From: Yunsheng Lin <linyunsheng@huawei.com>
>> Sent: Thursday, November 2, 2023 7:02 PM
>> To: Li,Rongqing <lirongqing@baidu.com>; netdev@vger.kernel.org
>> Subject: Re: [PATCH 1/2][net-next] skbuff: move netlink_large_alloc_large_skb()
>> to skbuff.c
>>
>> On 2023/11/2 14:28, Li RongQing wrote:
>>> move netlink_alloc_large_skb and netlink_skb_destructor to skbuff.c
>>> and rename them more generic, so they can be used elsewhere large
>>> non-contiguous physical memory is needed
>>>
>>> Signed-off-by: Li RongQing <lirongqing@baidu.com>
>>> ---
>>>  include/linux/skbuff.h   |  3 +++
>>>  net/core/skbuff.c        | 40
>> ++++++++++++++++++++++++++++++++++++++++
>>>  net/netlink/af_netlink.c | 41
>>> ++---------------------------------------
>>>  3 files changed, 45 insertions(+), 39 deletions(-)
>>>
>>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index
>>> 4174c4b..774a401 100644
>>> --- a/include/linux/skbuff.h
>>> +++ b/include/linux/skbuff.h
>>> @@ -5063,5 +5063,8 @@ static inline void skb_mark_for_recycle(struct
>>> sk_buff *skb)  ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter
>> *iter,
>>>  			     ssize_t maxsize, gfp_t gfp);
>>>
>>> +
>>> +void large_skb_destructor(struct sk_buff *skb); struct sk_buff
>>> +*alloc_large_skb(unsigned int size, int broadcast);
>>>  #endif	/* __KERNEL__ */
>>>  #endif	/* _LINUX_SKBUFF_H */
>>> diff --git a/net/core/skbuff.c b/net/core/skbuff.c index
>>> 4570705..20ffcd5 100644
>>> --- a/net/core/skbuff.c
>>> +++ b/net/core/skbuff.c
>>> @@ -6917,3 +6917,43 @@ ssize_t skb_splice_from_iter(struct sk_buff *skb,
>> struct iov_iter *iter,
>>>  	return spliced ?: ret;
>>>  }
>>>  EXPORT_SYMBOL(skb_splice_from_iter);
>>> +
>>> +void large_skb_destructor(struct sk_buff *skb) {
>>> +	if (is_vmalloc_addr(skb->head)) {
>>> +		if (!skb->cloned ||
>>> +		    !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
>>> +			vfree(skb->head);
>>> +
>>> +		skb->head = NULL;
>>
>> There seems to be an assumption that skb returned from
>> netlink_alloc_large_skb() is not expecting the frag page for shinfo->frags*, as the
>> above NULL setting will bypass most of the handling in skb_release_data(),then
>> how can we ensure that the user is not breaking the assumption if we make it
>> more generic?
>>
> 
> How about to add WARN_ON(skb_shinfo(skb)-> nr_frags) to find this condition
> 

There is some other handling other than skb_shinfo(skb)-> nr_frags, such as
zcopy, fraglist and pp_recycle handling, I am not sure if adding those check
in the normal datapatch is worth it if netlink_alloc_large_skb() is only used
in the nlmsg operations, which is less performance senstive.

If there are other nlmsg operations that needs it too? if not, maybe we limit
netlink_alloc_large_skb() in nlmsg if we can assume all nlmsg APIs dosen't
break the above assumptionm, introducing something like vnlmsg_new() or only
change nlmsg_new() to use netlink_alloc_large_skb(), so that all nlmsg users can
make use of it.

If there is more user making use of netlink_alloc_large_skb() in the future, we
can make it usable outside of nlmsg.

> 
> -Li RongQing
>>
>
Li,Rongqing Nov. 3, 2023, 5:03 a.m. UTC | #4
> -----Original Message-----
> From: Yunsheng Lin <linyunsheng@huawei.com>
> Sent: Friday, November 3, 2023 11:50 AM
> To: Li,Rongqing <lirongqing@baidu.com>; netdev@vger.kernel.org
> Subject: Re: [PATCH 1/2][net-next] skbuff: move netlink_large_alloc_large_skb()
> to skbuff.c
> 
> On 2023/11/2 20:09, Li,Rongqing wrote:
> >> -----Original Message-----
> >> From: Yunsheng Lin <linyunsheng@huawei.com>
> >> Sent: Thursday, November 2, 2023 7:02 PM
> >> To: Li,Rongqing <lirongqing@baidu.com>; netdev@vger.kernel.org
> >> Subject: Re: [PATCH 1/2][net-next] skbuff: move
> >> netlink_large_alloc_large_skb() to skbuff.c
> >>
> >> On 2023/11/2 14:28, Li RongQing wrote:
> >>> move netlink_alloc_large_skb and netlink_skb_destructor to skbuff.c
> >>> and rename them more generic, so they can be used elsewhere large
> >>> non-contiguous physical memory is needed
> >>>
> >>> Signed-off-by: Li RongQing <lirongqing@baidu.com>
> >>> ---
> >>>  include/linux/skbuff.h   |  3 +++
> >>>  net/core/skbuff.c        | 40
> >> ++++++++++++++++++++++++++++++++++++++++
> >>>  net/netlink/af_netlink.c | 41
> >>> ++---------------------------------------
> >>>  3 files changed, 45 insertions(+), 39 deletions(-)
> >>>
> >>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index
> >>> 4174c4b..774a401 100644
> >>> --- a/include/linux/skbuff.h
> >>> +++ b/include/linux/skbuff.h
> >>> @@ -5063,5 +5063,8 @@ static inline void skb_mark_for_recycle(struct
> >>> sk_buff *skb)  ssize_t skb_splice_from_iter(struct sk_buff *skb,
> >>> struct iov_iter
> >> *iter,
> >>>  			     ssize_t maxsize, gfp_t gfp);
> >>>
> >>> +
> >>> +void large_skb_destructor(struct sk_buff *skb); struct sk_buff
> >>> +*alloc_large_skb(unsigned int size, int broadcast);
> >>>  #endif	/* __KERNEL__ */
> >>>  #endif	/* _LINUX_SKBUFF_H */
> >>> diff --git a/net/core/skbuff.c b/net/core/skbuff.c index
> >>> 4570705..20ffcd5 100644
> >>> --- a/net/core/skbuff.c
> >>> +++ b/net/core/skbuff.c
> >>> @@ -6917,3 +6917,43 @@ ssize_t skb_splice_from_iter(struct sk_buff
> >>> *skb,
> >> struct iov_iter *iter,
> >>>  	return spliced ?: ret;
> >>>  }
> >>>  EXPORT_SYMBOL(skb_splice_from_iter);
> >>> +
> >>> +void large_skb_destructor(struct sk_buff *skb) {
> >>> +	if (is_vmalloc_addr(skb->head)) {
> >>> +		if (!skb->cloned ||
> >>> +		    !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
> >>> +			vfree(skb->head);
> >>> +
> >>> +		skb->head = NULL;
> >>
> >> There seems to be an assumption that skb returned from
> >> netlink_alloc_large_skb() is not expecting the frag page for
> >> shinfo->frags*, as the above NULL setting will bypass most of the
> >> handling in skb_release_data(),then how can we ensure that the user
> >> is not breaking the assumption if we make it more generic?
> >>
> >
> > How about to add WARN_ON(skb_shinfo(skb)-> nr_frags) to find this
> > condition
> >
> 
> There is some other handling other than skb_shinfo(skb)-> nr_frags, such as
> zcopy, fraglist and pp_recycle handling, I am not sure if adding those check in the
> normal datapatch is worth it if netlink_alloc_large_skb() is only used in the nlmsg
> operations, which is less performance senstive.
> 

Add WARN_ON(skb_shinfo(skb)-> nr_frags) only in large_skb_destructor, should not effect performance.

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f9c1f6a..24e16aa 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -6997,6 +6997,7 @@ void large_skb_destructor(struct sk_buff *skb)
                        vfree(skb->head);

                skb->head = NULL;
+               WARN_ON(skb_shinfo(skb)-> nr_frags);
        }
        if (skb->sk)
                sock_rfree(skb);


> If there are other nlmsg operations that needs it too? if not, maybe we limit
> netlink_alloc_large_skb() in nlmsg if we can assume all nlmsg APIs dosen't break
> the above assumptionm, introducing something like vnlmsg_new() or only
> change nlmsg_new() to use netlink_alloc_large_skb(), so that all nlmsg users can
> make use of it.
> 

Reasonable

Thanks

-Li

> If there is more user making use of netlink_alloc_large_skb() in the future, we
> can make it usable outside of nlmsg.
> 
> >
> > -Li RongQing
> >>
> >
diff mbox series

Patch

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4174c4b..774a401 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -5063,5 +5063,8 @@  static inline void skb_mark_for_recycle(struct sk_buff *skb)
 ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
 			     ssize_t maxsize, gfp_t gfp);
 
+
+void large_skb_destructor(struct sk_buff *skb);
+struct sk_buff *alloc_large_skb(unsigned int size, int broadcast);
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_SKBUFF_H */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4570705..20ffcd5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -6917,3 +6917,43 @@  ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
 	return spliced ?: ret;
 }
 EXPORT_SYMBOL(skb_splice_from_iter);
+
+void large_skb_destructor(struct sk_buff *skb)
+{
+	if (is_vmalloc_addr(skb->head)) {
+		if (!skb->cloned ||
+		    !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
+			vfree(skb->head);
+
+		skb->head = NULL;
+	}
+	if (skb->sk)
+		sock_rfree(skb);
+}
+EXPORT_SYMBOL(large_skb_destructor);
+
+struct sk_buff *alloc_large_skb(unsigned int size,
+					       int broadcast)
+{
+	struct sk_buff *skb;
+	void *data;
+
+	if (size <= NLMSG_GOODSIZE || broadcast)
+		return alloc_skb(size, GFP_KERNEL);
+
+	size = SKB_DATA_ALIGN(size) +
+	       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+	data = vmalloc(size);
+	if (!data)
+		return NULL;
+
+	skb = __build_skb(data, size);
+	if (!skb)
+		vfree(data);
+	else
+		skb->destructor = large_skb_destructor;
+
+	return skb;
+}
+EXPORT_SYMBOL(alloc_large_skb);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 642b9d3..1d50b68 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -369,24 +369,11 @@  static void netlink_rcv_wake(struct sock *sk)
 		wake_up_interruptible(&nlk->wait);
 }
 
-static void netlink_skb_destructor(struct sk_buff *skb)
-{
-	if (is_vmalloc_addr(skb->head)) {
-		if (!skb->cloned ||
-		    !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
-			vfree(skb->head);
-
-		skb->head = NULL;
-	}
-	if (skb->sk != NULL)
-		sock_rfree(skb);
-}
-
 static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
 {
 	WARN_ON(skb->sk != NULL);
 	skb->sk = sk;
-	skb->destructor = netlink_skb_destructor;
+	skb->destructor = large_skb_destructor;
 	atomic_add(skb->truesize, &sk->sk_rmem_alloc);
 	sk_mem_charge(sk, skb->truesize);
 }
@@ -1204,30 +1191,6 @@  struct sock *netlink_getsockbyfilp(struct file *filp)
 	return sock;
 }
 
-static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
-					       int broadcast)
-{
-	struct sk_buff *skb;
-	void *data;
-
-	if (size <= NLMSG_GOODSIZE || broadcast)
-		return alloc_skb(size, GFP_KERNEL);
-
-	size = SKB_DATA_ALIGN(size) +
-	       SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-
-	data = vmalloc(size);
-	if (data == NULL)
-		return NULL;
-
-	skb = __build_skb(data, size);
-	if (skb == NULL)
-		vfree(data);
-	else
-		skb->destructor = netlink_skb_destructor;
-
-	return skb;
-}
 
 /*
  * Attach a skb to a netlink socket.
@@ -1882,7 +1845,7 @@  static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	if (len > sk->sk_sndbuf - 32)
 		goto out;
 	err = -ENOBUFS;
-	skb = netlink_alloc_large_skb(len, dst_group);
+	skb = alloc_large_skb(len, dst_group);
 	if (skb == NULL)
 		goto out;