diff mbox series

[net,1/4] sockmap, vsock: For connectible sockets allow only connected

Message ID 20250213-vsock-listen-sockmap-nullptr-v1-1-994b7cd2f16b@rbox.co (mailing list archive)
State New
Headers show
Series sockmap, vsock: For connectible sockets allow only connected | expand

Commit Message

Michal Luczaj Feb. 13, 2025, 11:58 a.m. UTC
sockmap expects all vsocks to have a transport assigned, which is expressed
in vsock_proto::psock_update_sk_prot(). However, there is an edge case
where an unconnected (connectible) socket may lose its previously assigned
transport. This is handled with a NULL check in the vsock/BPF recv path.

Another design detail is that listening vsocks are not supposed to have any
transport assigned at all. Which implies they are not supported by the
sockmap. But this is complicated by the fact that a socket, before
switching to TCP_LISTEN, may have had some transport assigned during a
failed connect() attempt. Hence, we may end up with a listening vsock in a
sockmap, which blows up quickly:

KASAN: null-ptr-deref in range [0x0000000000000120-0x0000000000000127]
CPU: 7 UID: 0 PID: 56 Comm: kworker/7:0 Not tainted 6.14.0-rc1+
Workqueue: vsock-loopback vsock_loopback_work
RIP: 0010:vsock_read_skb+0x4b/0x90
Call Trace:
 sk_psock_verdict_data_ready+0xa4/0x2e0
 virtio_transport_recv_pkt+0x1ca8/0x2acc
 vsock_loopback_work+0x27d/0x3f0
 process_one_work+0x846/0x1420
 worker_thread+0x5b3/0xf80
 kthread+0x35a/0x700
 ret_from_fork+0x2d/0x70
 ret_from_fork_asm+0x1a/0x30

For connectible sockets, instead of relying solely on the state of
vsk->transport, tell sockmap to only allow those representing established
connections. This aligns with the behaviour for AF_INET and AF_UNIX.

Fixes: 634f1a7110b4 ("vsock: support sockmap")
Signed-off-by: Michal Luczaj <mhal@rbox.co>
---
 net/core/sock_map.c | 3 +++
 1 file changed, 3 insertions(+)

Comments

Michal Luczaj Feb. 14, 2025, 1:11 p.m. UTC | #1
> ...
> Another design detail is that listening vsocks are not supposed to have any
> transport assigned at all. Which implies they are not supported by the
> sockmap. But this is complicated by the fact that a socket, before
> switching to TCP_LISTEN, may have had some transport assigned during a
> failed connect() attempt. Hence, we may end up with a listening vsock in a
> sockmap, which blows up quickly:
> 
> KASAN: null-ptr-deref in range [0x0000000000000120-0x0000000000000127]
> CPU: 7 UID: 0 PID: 56 Comm: kworker/7:0 Not tainted 6.14.0-rc1+
> Workqueue: vsock-loopback vsock_loopback_work
> RIP: 0010:vsock_read_skb+0x4b/0x90
> Call Trace:
>  sk_psock_verdict_data_ready+0xa4/0x2e0
>  virtio_transport_recv_pkt+0x1ca8/0x2acc
>  vsock_loopback_work+0x27d/0x3f0
>  process_one_work+0x846/0x1420
>  worker_thread+0x5b3/0xf80
>  kthread+0x35a/0x700
>  ret_from_fork+0x2d/0x70
>  ret_from_fork_asm+0x1a/0x30

Perhaps I should have expanded more on the null-ptr-deref itself.

The idea is: force a vsock into assigning a transport and add it to the
sockmap (with a verdict program), but keep it unconnected. Then, drop
the transport and set the vsock to TCP_LISTEN. The moment a new
connection is established:

virtio_transport_recv_pkt()
  virtio_transport_recv_listen()
    sk->sk_data_ready(sk)            i.e. sk_psock_verdict_data_ready()
      ops->read_skb()                i.e. vsock_read_skb()
        vsk->transport->read_skb()   vsk->transport is NULL, boom

Here's a stand-alone repro:

/*
 * # modprobe -a vsock_loopback vhost_vsock
 * # gcc test.c && ./a.out
 */
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <sys/socket.h>
#include <sys/syscall.h>
#include <linux/bpf.h>
#include <linux/vm_sockets.h>

static void die(const char *msg)
{
	perror(msg);
	exit(-1);
}

static int sockmap_create(void)
{
	union bpf_attr attr = {
		.map_type = BPF_MAP_TYPE_SOCKMAP,
		.key_size = sizeof(int),
		.value_size = sizeof(int),
		.max_entries = 1
	};
	int map;

	map = syscall(SYS_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
	if (map < 0)
		die("map_create");

	return map;
}

static void map_update_elem(int fd, int key, int value)
{
	union bpf_attr attr = {
		.map_fd = fd,
		.key = (uint64_t)&key,
		.value = (uint64_t)&value,
		.flags = BPF_ANY
	};

	if (syscall(SYS_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)))
		die("map_update_elem");
}

static int prog_load(void)
{
	/* mov %r0, 1; exit */
	struct bpf_insn insns[] = {
		{ .code = BPF_ALU64 | BPF_MOV | BPF_K, .dst_reg = 0, .imm = 1 },
		{ .code = BPF_JMP | BPF_EXIT }
	};
	union bpf_attr attr = {
		.prog_type = BPF_PROG_TYPE_SK_SKB,
		.insn_cnt = sizeof(insns)/sizeof(insns[0]),
		.insns = (long)insns,
		.license = (long)"",
	};
	
	int prog = syscall(SYS_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
	if (prog < 0)
		die("prog_load");

	return prog;
}

static void link_create(int prog_fd, int target_fd)
{
	union bpf_attr attr = {
		.link_create = {
			.prog_fd = prog_fd,
			.target_fd = target_fd,
			.attach_type = BPF_SK_SKB_VERDICT
		}
	};

	if (syscall(SYS_bpf, BPF_LINK_CREATE, &attr, sizeof(attr)) < 0)
		die("link_create");
}

int main(void)
{
	struct sockaddr_vm addr = {
		.svm_family = AF_VSOCK,
		.svm_cid = VMADDR_CID_LOCAL,
		.svm_port = VMADDR_PORT_ANY
	};
	socklen_t alen = sizeof(addr);
	int s, map, prog, c;

	s = socket(AF_VSOCK, SOCK_SEQPACKET, 0);
	if (s < 0)
		die("socket");

	if (bind(s, (struct sockaddr *)&addr, alen))
		die("bind");

	if (!connect(s, (struct sockaddr *)&addr, alen) || errno != ECONNRESET)
		die("connect #1");

	map = sockmap_create();
	prog = prog_load();
	link_create(prog, map);
	map_update_elem(map, 0, s);

	addr.svm_cid = 0x42424242; /* non-existing */
	if (!connect(s, (struct sockaddr *)&addr, alen) || errno != ESOCKTNOSUPPORT)
		die("connect #2");

	if (listen(s, 1))
		die("listen");

	if (getsockname(s, (struct sockaddr *)&addr, &alen))
		die("getsockname");

	c = socket(AF_VSOCK, SOCK_SEQPACKET, 0);
	if (c < 0)
		die("socket c");

	if (connect(c, (struct sockaddr *)&addr, alen))
		die("connect #3");

	return 0;
}
Stefano Garzarella Feb. 18, 2025, 8:52 a.m. UTC | #2
On Fri, Feb 14, 2025 at 02:11:48PM +0100, Michal Luczaj wrote:
>> ...
>> Another design detail is that listening vsocks are not supposed to have any
>> transport assigned at all. Which implies they are not supported by the
>> sockmap. But this is complicated by the fact that a socket, before
>> switching to TCP_LISTEN, may have had some transport assigned during a
>> failed connect() attempt. Hence, we may end up with a listening vsock in a
>> sockmap, which blows up quickly:
>>
>> KASAN: null-ptr-deref in range [0x0000000000000120-0x0000000000000127]
>> CPU: 7 UID: 0 PID: 56 Comm: kworker/7:0 Not tainted 6.14.0-rc1+
>> Workqueue: vsock-loopback vsock_loopback_work
>> RIP: 0010:vsock_read_skb+0x4b/0x90
>> Call Trace:
>>  sk_psock_verdict_data_ready+0xa4/0x2e0
>>  virtio_transport_recv_pkt+0x1ca8/0x2acc
>>  vsock_loopback_work+0x27d/0x3f0
>>  process_one_work+0x846/0x1420
>>  worker_thread+0x5b3/0xf80
>>  kthread+0x35a/0x700
>>  ret_from_fork+0x2d/0x70
>>  ret_from_fork_asm+0x1a/0x30
>
>Perhaps I should have expanded more on the null-ptr-deref itself.
>
>The idea is: force a vsock into assigning a transport and add it to the
>sockmap (with a verdict program), but keep it unconnected. Then, drop
>the transport and set the vsock to TCP_LISTEN. The moment a new
>connection is established:
>
>virtio_transport_recv_pkt()
>  virtio_transport_recv_listen()
>    sk->sk_data_ready(sk)            i.e. sk_psock_verdict_data_ready()
>      ops->read_skb()                i.e. vsock_read_skb()
>        vsk->transport->read_skb()   vsk->transport is NULL, boom
>

Yes I agree, it's a little clearer with this, but I think it was also 
clear the concept before. So with or without:

Acked-by: Stefano Garzarella <sgarzare@redhat.com>


>Here's a stand-alone repro:
>
>/*
> * # modprobe -a vsock_loopback vhost_vsock
> * # gcc test.c && ./a.out
> */
>#include <stdio.h>
>#include <stdint.h>
>#include <stdlib.h>
>#include <unistd.h>
>#include <errno.h>
>#include <sys/socket.h>
>#include <sys/syscall.h>
>#include <linux/bpf.h>
>#include <linux/vm_sockets.h>
>
>static void die(const char *msg)
>{
>	perror(msg);
>	exit(-1);
>}
>
>static int sockmap_create(void)
>{
>	union bpf_attr attr = {
>		.map_type = BPF_MAP_TYPE_SOCKMAP,
>		.key_size = sizeof(int),
>		.value_size = sizeof(int),
>		.max_entries = 1
>	};
>	int map;
>
>	map = syscall(SYS_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
>	if (map < 0)
>		die("map_create");
>
>	return map;
>}
>
>static void map_update_elem(int fd, int key, int value)
>{
>	union bpf_attr attr = {
>		.map_fd = fd,
>		.key = (uint64_t)&key,
>		.value = (uint64_t)&value,
>		.flags = BPF_ANY
>	};
>
>	if (syscall(SYS_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)))
>		die("map_update_elem");
>}
>
>static int prog_load(void)
>{
>	/* mov %r0, 1; exit */
>	struct bpf_insn insns[] = {
>		{ .code = BPF_ALU64 | BPF_MOV | BPF_K, .dst_reg = 0, .imm = 1 },
>		{ .code = BPF_JMP | BPF_EXIT }
>	};
>	union bpf_attr attr = {
>		.prog_type = BPF_PROG_TYPE_SK_SKB,
>		.insn_cnt = sizeof(insns)/sizeof(insns[0]),
>		.insns = (long)insns,
>		.license = (long)"",
>	};
>	
>	int prog = syscall(SYS_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
>	if (prog < 0)
>		die("prog_load");
>
>	return prog;
>}
>
>static void link_create(int prog_fd, int target_fd)
>{
>	union bpf_attr attr = {
>		.link_create = {
>			.prog_fd = prog_fd,
>			.target_fd = target_fd,
>			.attach_type = BPF_SK_SKB_VERDICT
>		}
>	};
>
>	if (syscall(SYS_bpf, BPF_LINK_CREATE, &attr, sizeof(attr)) < 0)
>		die("link_create");
>}
>
>int main(void)
>{
>	struct sockaddr_vm addr = {
>		.svm_family = AF_VSOCK,
>		.svm_cid = VMADDR_CID_LOCAL,
>		.svm_port = VMADDR_PORT_ANY
>	};
>	socklen_t alen = sizeof(addr);
>	int s, map, prog, c;
>
>	s = socket(AF_VSOCK, SOCK_SEQPACKET, 0);
>	if (s < 0)
>		die("socket");
>
>	if (bind(s, (struct sockaddr *)&addr, alen))
>		die("bind");
>
>	if (!connect(s, (struct sockaddr *)&addr, alen) || errno != ECONNRESET)
>		die("connect #1");
>
>	map = sockmap_create();
>	prog = prog_load();
>	link_create(prog, map);
>	map_update_elem(map, 0, s);
>
>	addr.svm_cid = 0x42424242; /* non-existing */
>	if (!connect(s, (struct sockaddr *)&addr, alen) || errno != ESOCKTNOSUPPORT)
>		die("connect #2");
>
>	if (listen(s, 1))
>		die("listen");
>
>	if (getsockname(s, (struct sockaddr *)&addr, &alen))
>		die("getsockname");
>
>	c = socket(AF_VSOCK, SOCK_SEQPACKET, 0);
>	if (c < 0)
>		die("socket c");
>
>	if (connect(c, (struct sockaddr *)&addr, alen))
>		die("connect #3");
>
>	return 0;
>}
>
diff mbox series

Patch

diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index f1b9b3958792cd599efcb591742874e9b3f4a76b..2f1be9baad0578e2202b5cf79616b6e814c1ed54 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -541,6 +541,9 @@  static bool sock_map_sk_state_allowed(const struct sock *sk)
 		return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
 	if (sk_is_stream_unix(sk))
 		return (1 << sk->sk_state) & TCPF_ESTABLISHED;
+	if (sk_is_vsock(sk) &&
+	    (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET))
+		return (1 << sk->sk_state) & TCPF_ESTABLISHED;
 	return true;
 }