diff mbox series

[net] net: Check for oversized requests in sock_kmalloc()

Message ID 20241216-sock-kmalloc-warn-v1-1-9cb7fdee5b32@rbox.co (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [net] net: Check for oversized requests in sock_kmalloc() | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 1 maintainers not CCed: almasrymina@google.com
netdev/build_clang success Errors and warnings before: 5 this patch: 5
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 4 this patch: 4
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 9 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 9 this patch: 9
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-12-16--15-00 (tests: 794)

Commit Message

Michal Luczaj Dec. 16, 2024, 11:50 a.m. UTC
Allocator explicitly rejects requests of order > MAX_PAGE_ORDER, triggering
a WARN_ON_ONCE_GFP().

Put a size limit in sock_kmalloc().

WARNING: CPU: 6 PID: 1676 at mm/page_alloc.c:4727 __alloc_pages_noprof+0x32e/0x3a0
Call Trace:
 ___kmalloc_large_node+0x71/0xf0
 __kmalloc_large_node_noprof+0x1b/0xf0
 __kmalloc_noprof+0x436/0x560
 sock_kmalloc+0x44/0x60
 ____sys_sendmsg+0x208/0x3a0
 ___sys_sendmsg+0x84/0xd0
 __sys_sendmsg+0x56/0xa0
 do_syscall_64+0x93/0x180
 entry_SYSCALL_64_after_hwframe+0x76/0x7e

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Michal Luczaj <mhal@rbox.co>
---
$ cat test.py
from socket import *
import os

n = 4096 << 10	# PAGE_SIZE << MAX_PAGE_ORDER
n += 1
data = bytes([0] * n)
os.system("sudo sysctl net.core.optmem_max=%d" % (n + 100))

s = socket(AF_INET, SOCK_STREAM)
cm = [(0, 0, data)]
s.sendmsg([b'x'], cm)

'''
s = socket(AF_ALG, SOCK_SEQPACKET)
s.bind(('hash', 'sha256'))
s.setsockopt(SOL_ALG, ALG_SET_KEY, data)
'''
---
 net/core/sock.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)


---
base-commit: 922b4b955a03d19fea98938f33ef0e62d01f5159
change-id: 20241213-sock-kmalloc-warn-0166205c25b2

Best regards,

Comments

Eric Dumazet Dec. 16, 2024, 12:43 p.m. UTC | #1
On Mon, Dec 16, 2024 at 12:51 PM Michal Luczaj <mhal@rbox.co> wrote:
>
> Allocator explicitly rejects requests of order > MAX_PAGE_ORDER, triggering
> a WARN_ON_ONCE_GFP().
>
> Put a size limit in sock_kmalloc().
>
> WARNING: CPU: 6 PID: 1676 at mm/page_alloc.c:4727 __alloc_pages_noprof+0x32e/0x3a0
> Call Trace:
>  ___kmalloc_large_node+0x71/0xf0
>  __kmalloc_large_node_noprof+0x1b/0xf0
>  __kmalloc_noprof+0x436/0x560
>  sock_kmalloc+0x44/0x60
>  ____sys_sendmsg+0x208/0x3a0
>  ___sys_sendmsg+0x84/0xd0
>  __sys_sendmsg+0x56/0xa0
>  do_syscall_64+0x93/0x180
>  entry_SYSCALL_64_after_hwframe+0x76/0x7e
>
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")

I would rather change ____sys_sendmsg() to use something different
than sock_kmalloc().

This would avoid false sharing (on sk->sk_omem_alloc) for a short-lived buffer,
and could help UDP senders sharing a socket...

sock_kmalloc() was really meant for small and long lived allocations
(corroborated by net.core.optmem_max small default value)

diff --git a/net/socket.c b/net/socket.c
index 9a117248f18f13d574d099c80128986c744fa97f..c23d8e20c5c626c54b9a04a416b82f696fa2310c
100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2552,7 +2552,8 @@ static int ____sys_sendmsg(struct socket *sock,
struct msghdr *msg_sys,
                BUILD_BUG_ON(sizeof(struct cmsghdr) !=
                             CMSG_ALIGN(sizeof(struct cmsghdr)));
                if (ctl_len > sizeof(ctl)) {
-                       ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
+                       ctl_buf = kvmalloc(ctl_len, GFP_KERNEL_ACCOUNT |
+                                                   __GFP_NOWARN);
                        if (ctl_buf == NULL)
                                goto out;
                }
@@ -2594,7 +2595,7 @@ static int ____sys_sendmsg(struct socket *sock,
struct msghdr *msg_sys,

 out_freectl:
        if (ctl_buf != ctl)
-               sock_kfree_s(sock->sk, ctl_buf, ctl_len);
+               kvfree(ctl_buf);
 out:
        return err;
 }
Michal Luczaj Dec. 16, 2024, 9:42 p.m. UTC | #2
+cc Herbert. Allocator warning due to sock_kmalloc() in
crypto/af_alg.c:alg_setkey(). Please see the cover letter for a repro:
https://lore.kernel.org/netdev/20241216-sock-kmalloc-warn-v1-1-9cb7fdee5b32@rbox.co/

More comments below:

On 12/16/24 13:43, Eric Dumazet wrote:
> On Mon, Dec 16, 2024 at 12:51 PM Michal Luczaj <mhal@rbox.co> wrote:
>>
>> Allocator explicitly rejects requests of order > MAX_PAGE_ORDER, triggering
>> a WARN_ON_ONCE_GFP().
>>
>> Put a size limit in sock_kmalloc().
>>
>> WARNING: CPU: 6 PID: 1676 at mm/page_alloc.c:4727 __alloc_pages_noprof+0x32e/0x3a0
>> Call Trace:
>>  ___kmalloc_large_node+0x71/0xf0
>>  __kmalloc_large_node_noprof+0x1b/0xf0
>>  __kmalloc_noprof+0x436/0x560
>>  sock_kmalloc+0x44/0x60
>>  ____sys_sendmsg+0x208/0x3a0
>>  ___sys_sendmsg+0x84/0xd0
>>  __sys_sendmsg+0x56/0xa0
>>  do_syscall_64+0x93/0x180
>>  entry_SYSCALL_64_after_hwframe+0x76/0x7e
>>
>> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> 
> I would rather change ____sys_sendmsg() to use something different
> than sock_kmalloc().
> 
> This would avoid false sharing (on sk->sk_omem_alloc) for a short-lived buffer,
> and could help UDP senders sharing a socket...
> 
> sock_kmalloc() was really meant for small and long lived allocations
> (corroborated by net.core.optmem_max small default value)
> 
> diff --git a/net/socket.c b/net/socket.c
> index 9a117248f18f13d574d099c80128986c744fa97f..c23d8e20c5c626c54b9a04a416b82f696fa2310c
> 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -2552,7 +2552,8 @@ static int ____sys_sendmsg(struct socket *sock,
> struct msghdr *msg_sys,
>                 BUILD_BUG_ON(sizeof(struct cmsghdr) !=
>                              CMSG_ALIGN(sizeof(struct cmsghdr)));
>                 if (ctl_len > sizeof(ctl)) {
> -                       ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
> +                       ctl_buf = kvmalloc(ctl_len, GFP_KERNEL_ACCOUNT |
> +                                                   __GFP_NOWARN);
>                         if (ctl_buf == NULL)
>                                 goto out;
>                 }
> @@ -2594,7 +2595,7 @@ static int ____sys_sendmsg(struct socket *sock,
> struct msghdr *msg_sys,
> 
>  out_freectl:
>         if (ctl_buf != ctl)
> -               sock_kfree_s(sock->sk, ctl_buf, ctl_len);
> +               kvfree(ctl_buf);
>  out:
>         return err;
>  }

Got it. I guess the same treatment for cmsghdr_from_user_compat_to_kern()?

Similar problem is with compat_ip_set_mcast_msfilter() and
compat_ipv6_set_mcast_msfilter(), direct kmalloc() this time.
diff mbox series

Patch

diff --git a/net/core/sock.c b/net/core/sock.c
index 74729d20cd0099e748f4c4fe0be42a2d2d47e77a..1a81c5c09c9f8eb6f8a47624fe08b678b2ab19b0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2773,7 +2773,8 @@  void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
 	int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
 
 	if ((unsigned int)size <= optmem_max &&
-	    atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
+	    atomic_read(&sk->sk_omem_alloc) + size < optmem_max &&
+	    size <= PAGE_SIZE << MAX_PAGE_ORDER) {
 		void *mem;
 		/* First do the add, to avoid the race if kmalloc
 		 * might sleep.