diff mbox series

[net,v2,1/2] net: fix SO_DEVMEM_DONTNEED looping too long

Message ID 20241107210331.3044434-1-almasrymina@google.com (mailing list archive)
State New
Delegated to: Netdev Maintainers
Headers show
Series [net,v2,1/2] net: fix SO_DEVMEM_DONTNEED looping too long | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present fail Series targets non-next tree, but doesn't contain any Fixes tags
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 3 this patch: 3
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 3 this patch: 3
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: line length of 84 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 9 this patch: 9
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-11-08--00-00 (tests: 782)

Commit Message

Mina Almasry Nov. 7, 2024, 9:03 p.m. UTC
Exit early if we're freeing more than 1024 frags, to prevent
looping too long.

Also minor code cleanups:
- Flip checks to reduce indentation.
- Use sizeof(*tokens) everywhere for consistentcy.

Cc: Yi Lai <yi1.lai@linux.intel.com>
Cc: Stanislav Fomichev <sdf@fomichev.me>
Signed-off-by: Mina Almasry <almasrymina@google.com>

---

v2:
- Retain token check to prevent allocation of too much memory.
- Exit early instead of pre-checking in a loop so that we don't penalize
  well behaved applications (sdf)

---
 net/core/sock.c | 42 ++++++++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 18 deletions(-)

Comments

Stanislav Fomichev Nov. 8, 2024, 1:28 a.m. UTC | #1
On 11/07, Mina Almasry wrote:
> Exit early if we're freeing more than 1024 frags, to prevent
> looping too long.
> 
> Also minor code cleanups:
> - Flip checks to reduce indentation.
> - Use sizeof(*tokens) everywhere for consistentcy.
> 
> Cc: Yi Lai <yi1.lai@linux.intel.com>
> Cc: Stanislav Fomichev <sdf@fomichev.me>
> Signed-off-by: Mina Almasry <almasrymina@google.com>
> 
> ---
> 
> v2:
> - Retain token check to prevent allocation of too much memory.
> - Exit early instead of pre-checking in a loop so that we don't penalize
>   well behaved applications (sdf)
> 
> ---
>  net/core/sock.c | 42 ++++++++++++++++++++++++------------------
>  1 file changed, 24 insertions(+), 18 deletions(-)
> 
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 039be95c40cf..da50df485090 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -1052,32 +1052,34 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
>  
>  #ifdef CONFIG_PAGE_POOL
>  
> -/* This is the number of tokens that the user can SO_DEVMEM_DONTNEED in
> - * 1 syscall. The limit exists to limit the amount of memory the kernel
> - * allocates to copy these tokens.
> +/* This is the number of tokens and frags that the user can SO_DEVMEM_DONTNEED
> + * in 1 syscall. The limit exists to limit the amount of memory the kernel
> + * allocates to copy these tokens, and to prevent looping over the frags for
> + * too long.
>   */
>  #define MAX_DONTNEED_TOKENS 128
> +#define MAX_DONTNEED_FRAGS 1024
>  
>  static noinline_for_stack int
>  sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
>  {
>  	unsigned int num_tokens, i, j, k, netmem_num = 0;
>  	struct dmabuf_token *tokens;
> +	int ret = 0, num_frags = 0;
>  	netmem_ref netmems[16];
> -	int ret = 0;
>  
>  	if (!sk_is_tcp(sk))
>  		return -EBADF;
>  
> -	if (optlen % sizeof(struct dmabuf_token) ||
> +	if (optlen % sizeof(*tokens) ||
>  	    optlen > sizeof(*tokens) * MAX_DONTNEED_TOKENS)
>  		return -EINVAL;
>  

[..]

> -	tokens = kvmalloc_array(optlen, sizeof(*tokens), GFP_KERNEL);

Oh, so we currently allocate optlen*8? This is a sneaky fix :-p

> +	num_tokens = optlen / sizeof(*tokens);
> +	tokens = kvmalloc_array(num_tokens, sizeof(*tokens), GFP_KERNEL);
>  	if (!tokens)
>  		return -ENOMEM;
>  
> -	num_tokens = optlen / sizeof(struct dmabuf_token);
>  	if (copy_from_sockptr(tokens, optval, optlen)) {
>  		kvfree(tokens);
>  		return -EFAULT;
> @@ -1086,24 +1088,28 @@ sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
>  	xa_lock_bh(&sk->sk_user_frags);
>  	for (i = 0; i < num_tokens; i++) {
>  		for (j = 0; j < tokens[i].token_count; j++) {

[..]

> +			if (++num_frags > MAX_DONTNEED_FRAGS)
> +				goto frag_limit_reached;
> +

nit: maybe reuse existing ret (and rename it to num_frags) instead of
introducing new num_frags? Both variables now seem to track the same
number.
Mina Almasry Nov. 8, 2024, 1:33 a.m. UTC | #2
On Thu, Nov 7, 2024 at 5:28 PM Stanislav Fomichev <stfomichev@gmail.com> wrote:
>
> On 11/07, Mina Almasry wrote:
> > Exit early if we're freeing more than 1024 frags, to prevent
> > looping too long.
> >
> > Also minor code cleanups:
> > - Flip checks to reduce indentation.
> > - Use sizeof(*tokens) everywhere for consistentcy.
> >
> > Cc: Yi Lai <yi1.lai@linux.intel.com>
> > Cc: Stanislav Fomichev <sdf@fomichev.me>
> > Signed-off-by: Mina Almasry <almasrymina@google.com>
> >
> > ---
> >
> > v2:
> > - Retain token check to prevent allocation of too much memory.
> > - Exit early instead of pre-checking in a loop so that we don't penalize
> >   well behaved applications (sdf)
> >
> > ---
> >  net/core/sock.c | 42 ++++++++++++++++++++++++------------------
> >  1 file changed, 24 insertions(+), 18 deletions(-)
> >
> > diff --git a/net/core/sock.c b/net/core/sock.c
> > index 039be95c40cf..da50df485090 100644
> > --- a/net/core/sock.c
> > +++ b/net/core/sock.c
> > @@ -1052,32 +1052,34 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
> >
> >  #ifdef CONFIG_PAGE_POOL
> >
> > -/* This is the number of tokens that the user can SO_DEVMEM_DONTNEED in
> > - * 1 syscall. The limit exists to limit the amount of memory the kernel
> > - * allocates to copy these tokens.
> > +/* This is the number of tokens and frags that the user can SO_DEVMEM_DONTNEED
> > + * in 1 syscall. The limit exists to limit the amount of memory the kernel
> > + * allocates to copy these tokens, and to prevent looping over the frags for
> > + * too long.
> >   */
> >  #define MAX_DONTNEED_TOKENS 128
> > +#define MAX_DONTNEED_FRAGS 1024
> >
> >  static noinline_for_stack int
> >  sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
> >  {
> >       unsigned int num_tokens, i, j, k, netmem_num = 0;
> >       struct dmabuf_token *tokens;
> > +     int ret = 0, num_frags = 0;
> >       netmem_ref netmems[16];
> > -     int ret = 0;
> >
> >       if (!sk_is_tcp(sk))
> >               return -EBADF;
> >
> > -     if (optlen % sizeof(struct dmabuf_token) ||
> > +     if (optlen % sizeof(*tokens) ||
> >           optlen > sizeof(*tokens) * MAX_DONTNEED_TOKENS)
> >               return -EINVAL;
> >
>
> [..]
>
> > -     tokens = kvmalloc_array(optlen, sizeof(*tokens), GFP_KERNEL);
>
> Oh, so we currently allocate optlen*8? This is a sneaky fix :-p
>
> > +     num_tokens = optlen / sizeof(*tokens);
> > +     tokens = kvmalloc_array(num_tokens, sizeof(*tokens), GFP_KERNEL);
> >       if (!tokens)
> >               return -ENOMEM;
> >
> > -     num_tokens = optlen / sizeof(struct dmabuf_token);
> >       if (copy_from_sockptr(tokens, optval, optlen)) {
> >               kvfree(tokens);
> >               return -EFAULT;
> > @@ -1086,24 +1088,28 @@ sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
> >       xa_lock_bh(&sk->sk_user_frags);
> >       for (i = 0; i < num_tokens; i++) {
> >               for (j = 0; j < tokens[i].token_count; j++) {
>
> [..]
>
> > +                     if (++num_frags > MAX_DONTNEED_FRAGS)
> > +                             goto frag_limit_reached;
> > +
>
> nit: maybe reuse existing ret (and rename it to num_frags) instead of
> introducing new num_frags? Both variables now seem to track the same
> number.

I almost sent it this way, but I think that would be wrong.

num_frags is all the frags inspected.
ret is all the frags freed.

The difference is subtle but critical. We want to exit when we've
inspected 1024 frags, not when we've freed 1024 frags, because the
user may make us loop forever if they ask us to free 10000000 frags of
which none exist.
Stanislav Fomichev Nov. 8, 2024, 2:58 a.m. UTC | #3
On 11/07, Mina Almasry wrote:
> On Thu, Nov 7, 2024 at 5:28 PM Stanislav Fomichev <stfomichev@gmail.com> wrote:
> >
> > On 11/07, Mina Almasry wrote:
> > > Exit early if we're freeing more than 1024 frags, to prevent
> > > looping too long.
> > >
> > > Also minor code cleanups:
> > > - Flip checks to reduce indentation.
> > > - Use sizeof(*tokens) everywhere for consistentcy.
> > >
> > > Cc: Yi Lai <yi1.lai@linux.intel.com>
> > > Cc: Stanislav Fomichev <sdf@fomichev.me>
> > > Signed-off-by: Mina Almasry <almasrymina@google.com>
> > >
> > > ---
> > >
> > > v2:
> > > - Retain token check to prevent allocation of too much memory.
> > > - Exit early instead of pre-checking in a loop so that we don't penalize
> > >   well behaved applications (sdf)
> > >
> > > ---
> > >  net/core/sock.c | 42 ++++++++++++++++++++++++------------------
> > >  1 file changed, 24 insertions(+), 18 deletions(-)
> > >
> > > diff --git a/net/core/sock.c b/net/core/sock.c
> > > index 039be95c40cf..da50df485090 100644
> > > --- a/net/core/sock.c
> > > +++ b/net/core/sock.c
> > > @@ -1052,32 +1052,34 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
> > >
> > >  #ifdef CONFIG_PAGE_POOL
> > >
> > > -/* This is the number of tokens that the user can SO_DEVMEM_DONTNEED in
> > > - * 1 syscall. The limit exists to limit the amount of memory the kernel
> > > - * allocates to copy these tokens.
> > > +/* This is the number of tokens and frags that the user can SO_DEVMEM_DONTNEED
> > > + * in 1 syscall. The limit exists to limit the amount of memory the kernel
> > > + * allocates to copy these tokens, and to prevent looping over the frags for
> > > + * too long.
> > >   */
> > >  #define MAX_DONTNEED_TOKENS 128
> > > +#define MAX_DONTNEED_FRAGS 1024
> > >
> > >  static noinline_for_stack int
> > >  sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
> > >  {
> > >       unsigned int num_tokens, i, j, k, netmem_num = 0;
> > >       struct dmabuf_token *tokens;
> > > +     int ret = 0, num_frags = 0;
> > >       netmem_ref netmems[16];
> > > -     int ret = 0;
> > >
> > >       if (!sk_is_tcp(sk))
> > >               return -EBADF;
> > >
> > > -     if (optlen % sizeof(struct dmabuf_token) ||
> > > +     if (optlen % sizeof(*tokens) ||
> > >           optlen > sizeof(*tokens) * MAX_DONTNEED_TOKENS)
> > >               return -EINVAL;
> > >
> >
> > [..]
> >
> > > -     tokens = kvmalloc_array(optlen, sizeof(*tokens), GFP_KERNEL);
> >
> > Oh, so we currently allocate optlen*8? This is a sneaky fix :-p
> >
> > > +     num_tokens = optlen / sizeof(*tokens);
> > > +     tokens = kvmalloc_array(num_tokens, sizeof(*tokens), GFP_KERNEL);
> > >       if (!tokens)
> > >               return -ENOMEM;
> > >
> > > -     num_tokens = optlen / sizeof(struct dmabuf_token);
> > >       if (copy_from_sockptr(tokens, optval, optlen)) {
> > >               kvfree(tokens);
> > >               return -EFAULT;
> > > @@ -1086,24 +1088,28 @@ sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
> > >       xa_lock_bh(&sk->sk_user_frags);
> > >       for (i = 0; i < num_tokens; i++) {
> > >               for (j = 0; j < tokens[i].token_count; j++) {
> >
> > [..]
> >
> > > +                     if (++num_frags > MAX_DONTNEED_FRAGS)
> > > +                             goto frag_limit_reached;
> > > +
> >
> > nit: maybe reuse existing ret (and rename it to num_frags) instead of
> > introducing new num_frags? Both variables now seem to track the same
> > number.
> 
> I almost sent it this way, but I think that would be wrong.
> 
> num_frags is all the frags inspected.
> ret is all the frags freed.
> 
> The difference is subtle but critical. We want to exit when we've
> inspected 1024 frags, not when we've freed 1024 frags, because the
> user may make us loop forever if they ask us to free 10000000 frags of
> which none exist.

I see. Maybe can mitigate the damage with the following:

for (i = 0; i < min(num_tokens, MAX_DONTNEED_FRAGS); i++)
	for (j = 0; j < min(tokens[i].token_count, MAX_DONTNEED_FRAGS); j++)

In this case, worst case, we loop 1024*1024 on the invalid input :-D
But up to you, separate num_frag works as well (but, as you've seen with
my initial reply, it's not super straightforward).

Acked-by: Stanislav Fomichev <sdf@fomichev.me>
diff mbox series

Patch

diff --git a/net/core/sock.c b/net/core/sock.c
index 039be95c40cf..da50df485090 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1052,32 +1052,34 @@  static int sock_reserve_memory(struct sock *sk, int bytes)
 
 #ifdef CONFIG_PAGE_POOL
 
-/* This is the number of tokens that the user can SO_DEVMEM_DONTNEED in
- * 1 syscall. The limit exists to limit the amount of memory the kernel
- * allocates to copy these tokens.
+/* This is the number of tokens and frags that the user can SO_DEVMEM_DONTNEED
+ * in 1 syscall. The limit exists to limit the amount of memory the kernel
+ * allocates to copy these tokens, and to prevent looping over the frags for
+ * too long.
  */
 #define MAX_DONTNEED_TOKENS 128
+#define MAX_DONTNEED_FRAGS 1024
 
 static noinline_for_stack int
 sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
 {
 	unsigned int num_tokens, i, j, k, netmem_num = 0;
 	struct dmabuf_token *tokens;
+	int ret = 0, num_frags = 0;
 	netmem_ref netmems[16];
-	int ret = 0;
 
 	if (!sk_is_tcp(sk))
 		return -EBADF;
 
-	if (optlen % sizeof(struct dmabuf_token) ||
+	if (optlen % sizeof(*tokens) ||
 	    optlen > sizeof(*tokens) * MAX_DONTNEED_TOKENS)
 		return -EINVAL;
 
-	tokens = kvmalloc_array(optlen, sizeof(*tokens), GFP_KERNEL);
+	num_tokens = optlen / sizeof(*tokens);
+	tokens = kvmalloc_array(num_tokens, sizeof(*tokens), GFP_KERNEL);
 	if (!tokens)
 		return -ENOMEM;
 
-	num_tokens = optlen / sizeof(struct dmabuf_token);
 	if (copy_from_sockptr(tokens, optval, optlen)) {
 		kvfree(tokens);
 		return -EFAULT;
@@ -1086,24 +1088,28 @@  sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
 	xa_lock_bh(&sk->sk_user_frags);
 	for (i = 0; i < num_tokens; i++) {
 		for (j = 0; j < tokens[i].token_count; j++) {
+			if (++num_frags > MAX_DONTNEED_FRAGS)
+				goto frag_limit_reached;
+
 			netmem_ref netmem = (__force netmem_ref)__xa_erase(
 				&sk->sk_user_frags, tokens[i].token_start + j);
 
-			if (netmem &&
-			    !WARN_ON_ONCE(!netmem_is_net_iov(netmem))) {
-				netmems[netmem_num++] = netmem;
-				if (netmem_num == ARRAY_SIZE(netmems)) {
-					xa_unlock_bh(&sk->sk_user_frags);
-					for (k = 0; k < netmem_num; k++)
-						WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
-					netmem_num = 0;
-					xa_lock_bh(&sk->sk_user_frags);
-				}
-				ret++;
+			if (!netmem || WARN_ON_ONCE(!netmem_is_net_iov(netmem)))
+				continue;
+
+			netmems[netmem_num++] = netmem;
+			if (netmem_num == ARRAY_SIZE(netmems)) {
+				xa_unlock_bh(&sk->sk_user_frags);
+				for (k = 0; k < netmem_num; k++)
+					WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
+				netmem_num = 0;
+				xa_lock_bh(&sk->sk_user_frags);
 			}
+			ret++;
 		}
 	}
 
+frag_limit_reached:
 	xa_unlock_bh(&sk->sk_user_frags);
 	for (k = 0; k < netmem_num; k++)
 		WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));