diff mbox series

[RFC,net-next,v6,05/14] af_vsock: use a separate dgram bind table

Message ID 20240710212555.1617795-6-amery.hung@bytedance.com (mailing list archive)
State New, archived
Headers show
Series virtio/vsock: support datagrams | expand

Commit Message

Amery Hung July 10, 2024, 9:25 p.m. UTC
From: Bobby Eshleman <bobby.eshleman@bytedance.com>

This commit adds support for bound dgram sockets to be tracked in a
separate bind table from connectible sockets in order to avoid address
collisions. With this commit, users can simultaneously bind a dgram
socket and connectible socket to the same CID and port.

Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
---
 net/vmw_vsock/af_vsock.c | 103 +++++++++++++++++++++++++++++----------
 1 file changed, 76 insertions(+), 27 deletions(-)

Comments

Stefano Garzarella July 23, 2024, 2:41 p.m. UTC | #1
On Wed, Jul 10, 2024 at 09:25:46PM GMT, Amery Hung wrote:
>From: Bobby Eshleman <bobby.eshleman@bytedance.com>
>
>This commit adds support for bound dgram sockets to be tracked in a
>separate bind table from connectible sockets in order to avoid address
>collisions. With this commit, users can simultaneously bind a dgram
>socket and connectible socket to the same CID and port.
>
>Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
>---
> net/vmw_vsock/af_vsock.c | 103 +++++++++++++++++++++++++++++----------
> 1 file changed, 76 insertions(+), 27 deletions(-)
>
>diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>index d571be9cdbf0..ab08cd81720e 100644
>--- a/net/vmw_vsock/af_vsock.c
>+++ b/net/vmw_vsock/af_vsock.c
>@@ -10,18 +10,23 @@
>  * - There are two kinds of sockets: those created by user action (such as
>  * calling socket(2)) and those created by incoming connection request packets.
>  *
>- * - There are two "global" tables, one for bound sockets (sockets that have
>- * specified an address that they are responsible for) and one for connected
>- * sockets (sockets that have established a connection with another socket).
>- * These tables are "global" in that all sockets on the system are placed
>- * within them. - Note, though, that the bound table contains an extra entry
>- * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in
>- * that list. The bound table is used solely for lookup of sockets when packets
>- * are received and that's not necessary for SOCK_DGRAM sockets since we create
>- * a datagram handle for each and need not perform a lookup.  Keeping SOCK_DGRAM
>- * sockets out of the bound hash buckets will reduce the chance of collisions
>- * when looking for SOCK_STREAM sockets and prevents us from having to check the
>- * socket type in the hash table lookups.
>+ * - There are three "global" tables, one for bound connectible (stream /
>+ * seqpacket) sockets, one for bound datagram sockets, and one for connected
>+ * sockets. Bound sockets are sockets that have specified an address that
>+ * they are responsible for. Connected sockets are sockets that have
>+ * established a connection with another socket. These tables are "global" in
>+ * that all sockets on the system are placed within them. - Note, though,
>+ * that the bound tables contain an extra entry for a list of unbound
>+ * sockets. The bound tables are used solely for lookup of sockets when packets
>+ * are received.
>+ *
>+ * - There are separate bind tables for connectible and datagram sockets to avoid
>+ * address collisions between stream/seqpacket sockets and datagram sockets.
>+ *
>+ * - Transports may elect to NOT use the global datagram bind table by
>+ * implementing the ->dgram_bind() callback. If that callback is implemented,
>+ * the global bind table is not used and the responsibility of bound datagram
>+ * socket tracking is deferred to the transport.
>  *
>  * - Sockets created by user action will either be "client" sockets that
>  * initiate a connection or "server" sockets that listen for connections; we do
>@@ -116,6 +121,7 @@
> static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
> static void vsock_sk_destruct(struct sock *sk);
> static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
>+static bool sock_type_connectible(u16 type);
>
> /* Protocol family. */
> struct proto vsock_proto = {
>@@ -152,21 +158,25 @@ static DEFINE_MUTEX(vsock_register_mutex);
>  * VSocket is stored in the connected hash table.
>  *
>  * Unbound sockets are all put on the same list attached to the end of the hash
>- * table (vsock_unbound_sockets).  Bound sockets are added to the hash table in
>- * the bucket that their local address hashes to (vsock_bound_sockets(addr)
>- * represents the list that addr hashes to).
>+ * tables (vsock_unbound_sockets/vsock_unbound_dgram_sockets).  Bound sockets
>+ * are added to the hash table in the bucket that their local address hashes to
>+ * (vsock_bound_sockets(addr) and vsock_bound_dgram_sockets(addr) represents
>+ * the list that addr hashes to).
>  *
>- * Specifically, we initialize the vsock_bind_table array to a size of
>- * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through
>- * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and
>- * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets.  The hash function
>- * mods with VSOCK_HASH_SIZE to ensure this.
>+ * Specifically, taking connectible sockets as an example we initialize the
>+ * vsock_bind_table array to a size of VSOCK_HASH_SIZE + 1 so that
>+ * vsock_bind_table[0] through vsock_bind_table[VSOCK_HASH_SIZE - 1] are for
>+ * bound sockets and vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets.
>+ * The hash function mods with VSOCK_HASH_SIZE to ensure this.
>+ * Datagrams and vsock_dgram_bind_table operate in the same way.
>  */
> #define MAX_PORT_RETRIES        24
>
> #define VSOCK_HASH(addr)        ((addr)->svm_port % VSOCK_HASH_SIZE)
> #define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)])
>+#define vsock_bound_dgram_sockets(addr) (&vsock_dgram_bind_table[VSOCK_HASH(addr)])
> #define vsock_unbound_sockets     (&vsock_bind_table[VSOCK_HASH_SIZE])
>+#define vsock_unbound_dgram_sockets     (&vsock_dgram_bind_table[VSOCK_HASH_SIZE])
>
> /* XXX This can probably be implemented in a better way. */
> #define VSOCK_CONN_HASH(src, dst)				\
>@@ -182,6 +192,8 @@ struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
> EXPORT_SYMBOL_GPL(vsock_connected_table);
> DEFINE_SPINLOCK(vsock_table_lock);
> EXPORT_SYMBOL_GPL(vsock_table_lock);
>+static struct list_head vsock_dgram_bind_table[VSOCK_HASH_SIZE + 1];
>+static DEFINE_SPINLOCK(vsock_dgram_table_lock);
>
> /* Autobind this socket to the local address if necessary. */
> static int vsock_auto_bind(struct vsock_sock *vsk)
>@@ -204,6 +216,9 @@ static void vsock_init_tables(void)
>
> 	for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
> 		INIT_LIST_HEAD(&vsock_connected_table[i]);
>+
>+	for (i = 0; i < ARRAY_SIZE(vsock_dgram_bind_table); i++)
>+		INIT_LIST_HEAD(&vsock_dgram_bind_table[i]);
> }
>
> static void __vsock_insert_bound(struct list_head *list,
>@@ -271,13 +286,28 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
> 	return NULL;
> }
>
>-static void vsock_insert_unbound(struct vsock_sock *vsk)
>+static void __vsock_insert_dgram_unbound(struct vsock_sock *vsk)
>+{
>+	spin_lock_bh(&vsock_dgram_table_lock);
>+	__vsock_insert_bound(vsock_unbound_dgram_sockets, vsk);
>+	spin_unlock_bh(&vsock_dgram_table_lock);
>+}
>+
>+static void __vsock_insert_connectible_unbound(struct vsock_sock *vsk)
> {
> 	spin_lock_bh(&vsock_table_lock);
> 	__vsock_insert_bound(vsock_unbound_sockets, vsk);
> 	spin_unlock_bh(&vsock_table_lock);
> }
>
>+static void vsock_insert_unbound(struct vsock_sock *vsk)
>+{
>+	if (sock_type_connectible(sk_vsock(vsk)->sk_type))
>+		__vsock_insert_connectible_unbound(vsk);
>+	else
>+		__vsock_insert_dgram_unbound(vsk);
>+}
>+
> void vsock_insert_connected(struct vsock_sock *vsk)
> {
> 	struct list_head *list = vsock_connected_sockets(
>@@ -289,6 +319,14 @@ void vsock_insert_connected(struct vsock_sock *vsk)
> }
> EXPORT_SYMBOL_GPL(vsock_insert_connected);
>
>+static void vsock_remove_dgram_bound(struct vsock_sock *vsk)
>+{
>+	spin_lock_bh(&vsock_dgram_table_lock);
>+	if (__vsock_in_bound_table(vsk))
>+		__vsock_remove_bound(vsk);
>+	spin_unlock_bh(&vsock_dgram_table_lock);
>+}
>+
> void vsock_remove_bound(struct vsock_sock *vsk)
> {
> 	spin_lock_bh(&vsock_table_lock);
>@@ -340,7 +378,10 @@ EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
>
> void vsock_remove_sock(struct vsock_sock *vsk)
> {
>-	vsock_remove_bound(vsk);
>+	if (sock_type_connectible(sk_vsock(vsk)->sk_type))
>+		vsock_remove_bound(vsk);
>+	else
>+		vsock_remove_dgram_bound(vsk);

Can we try to be consistent, for example we have vsock_insert_unbound()
which calls internally sock_type_connectible(), while
vsock_remove_bound() is just for connectible sockets. It's a bit
confusing.

> 	vsock_remove_connected(vsk);
> }
> EXPORT_SYMBOL_GPL(vsock_remove_sock);
>@@ -746,11 +787,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
> 	return vsock_bind_common(vsk, addr, vsock_bind_table, VSOCK_HASH_SIZE + 1);
> }
>
>-static int __vsock_bind_dgram(struct vsock_sock *vsk,
>-			      struct sockaddr_vm *addr)
>+static int vsock_bind_dgram(struct vsock_sock *vsk,
>+			    struct sockaddr_vm *addr)

Why we are renaming this?

> {
>-	if (!vsk->transport || !vsk->transport->dgram_bind)
>-		return -EINVAL;
>+	if (!vsk->transport || !vsk->transport->dgram_bind) {

Why this condition?

Maybe a comment here is needed because I'm lost...

>+		int retval;
>+
>+		spin_lock_bh(&vsock_dgram_table_lock);
>+		retval = vsock_bind_common(vsk, addr, vsock_dgram_bind_table,
>+					   VSOCK_HASH_SIZE);

Should we use VSOCK_HASH_SIZE + 1 here?

Using ARRAY_SIZE(x) should avoid this problem.


>+		spin_unlock_bh(&vsock_dgram_table_lock);
>+
>+		return retval;
>+	}
>
> 	return vsk->transport->dgram_bind(vsk, addr);
> }
>@@ -781,7 +830,7 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
> 		break;
>
> 	case SOCK_DGRAM:
>-		retval = __vsock_bind_dgram(vsk, addr);
>+		retval = vsock_bind_dgram(vsk, addr);
> 		break;
>
> 	default:
>-- 
>2.20.1
>
Amery Hung July 28, 2024, 9:37 p.m. UTC | #2
On Tue, Jul 23, 2024 at 7:41 AM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> On Wed, Jul 10, 2024 at 09:25:46PM GMT, Amery Hung wrote:
> >From: Bobby Eshleman <bobby.eshleman@bytedance.com>
> >
> >This commit adds support for bound dgram sockets to be tracked in a
> >separate bind table from connectible sockets in order to avoid address
> >collisions. With this commit, users can simultaneously bind a dgram
> >socket and connectible socket to the same CID and port.
> >
> >Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
> >---
> > net/vmw_vsock/af_vsock.c | 103 +++++++++++++++++++++++++++++----------
> > 1 file changed, 76 insertions(+), 27 deletions(-)
> >
> >diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
> >index d571be9cdbf0..ab08cd81720e 100644
> >--- a/net/vmw_vsock/af_vsock.c
> >+++ b/net/vmw_vsock/af_vsock.c
> >@@ -10,18 +10,23 @@
> >  * - There are two kinds of sockets: those created by user action (such as
> >  * calling socket(2)) and those created by incoming connection request packets.
> >  *
> >- * - There are two "global" tables, one for bound sockets (sockets that have
> >- * specified an address that they are responsible for) and one for connected
> >- * sockets (sockets that have established a connection with another socket).
> >- * These tables are "global" in that all sockets on the system are placed
> >- * within them. - Note, though, that the bound table contains an extra entry
> >- * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in
> >- * that list. The bound table is used solely for lookup of sockets when packets
> >- * are received and that's not necessary for SOCK_DGRAM sockets since we create
> >- * a datagram handle for each and need not perform a lookup.  Keeping SOCK_DGRAM
> >- * sockets out of the bound hash buckets will reduce the chance of collisions
> >- * when looking for SOCK_STREAM sockets and prevents us from having to check the
> >- * socket type in the hash table lookups.
> >+ * - There are three "global" tables, one for bound connectible (stream /
> >+ * seqpacket) sockets, one for bound datagram sockets, and one for connected
> >+ * sockets. Bound sockets are sockets that have specified an address that
> >+ * they are responsible for. Connected sockets are sockets that have
> >+ * established a connection with another socket. These tables are "global" in
> >+ * that all sockets on the system are placed within them. - Note, though,
> >+ * that the bound tables contain an extra entry for a list of unbound
> >+ * sockets. The bound tables are used solely for lookup of sockets when packets
> >+ * are received.
> >+ *
> >+ * - There are separate bind tables for connectible and datagram sockets to avoid
> >+ * address collisions between stream/seqpacket sockets and datagram sockets.
> >+ *
> >+ * - Transports may elect to NOT use the global datagram bind table by
> >+ * implementing the ->dgram_bind() callback. If that callback is implemented,
> >+ * the global bind table is not used and the responsibility of bound datagram
> >+ * socket tracking is deferred to the transport.
> >  *
> >  * - Sockets created by user action will either be "client" sockets that
> >  * initiate a connection or "server" sockets that listen for connections; we do
> >@@ -116,6 +121,7 @@
> > static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
> > static void vsock_sk_destruct(struct sock *sk);
> > static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
> >+static bool sock_type_connectible(u16 type);
> >
> > /* Protocol family. */
> > struct proto vsock_proto = {
> >@@ -152,21 +158,25 @@ static DEFINE_MUTEX(vsock_register_mutex);
> >  * VSocket is stored in the connected hash table.
> >  *
> >  * Unbound sockets are all put on the same list attached to the end of the hash
> >- * table (vsock_unbound_sockets).  Bound sockets are added to the hash table in
> >- * the bucket that their local address hashes to (vsock_bound_sockets(addr)
> >- * represents the list that addr hashes to).
> >+ * tables (vsock_unbound_sockets/vsock_unbound_dgram_sockets).  Bound sockets
> >+ * are added to the hash table in the bucket that their local address hashes to
> >+ * (vsock_bound_sockets(addr) and vsock_bound_dgram_sockets(addr) represents
> >+ * the list that addr hashes to).
> >  *
> >- * Specifically, we initialize the vsock_bind_table array to a size of
> >- * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through
> >- * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and
> >- * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets.  The hash function
> >- * mods with VSOCK_HASH_SIZE to ensure this.
> >+ * Specifically, taking connectible sockets as an example we initialize the
> >+ * vsock_bind_table array to a size of VSOCK_HASH_SIZE + 1 so that
> >+ * vsock_bind_table[0] through vsock_bind_table[VSOCK_HASH_SIZE - 1] are for
> >+ * bound sockets and vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets.
> >+ * The hash function mods with VSOCK_HASH_SIZE to ensure this.
> >+ * Datagrams and vsock_dgram_bind_table operate in the same way.
> >  */
> > #define MAX_PORT_RETRIES        24
> >
> > #define VSOCK_HASH(addr)        ((addr)->svm_port % VSOCK_HASH_SIZE)
> > #define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)])
> >+#define vsock_bound_dgram_sockets(addr) (&vsock_dgram_bind_table[VSOCK_HASH(addr)])
> > #define vsock_unbound_sockets     (&vsock_bind_table[VSOCK_HASH_SIZE])
> >+#define vsock_unbound_dgram_sockets     (&vsock_dgram_bind_table[VSOCK_HASH_SIZE])
> >
> > /* XXX This can probably be implemented in a better way. */
> > #define VSOCK_CONN_HASH(src, dst)                             \
> >@@ -182,6 +192,8 @@ struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
> > EXPORT_SYMBOL_GPL(vsock_connected_table);
> > DEFINE_SPINLOCK(vsock_table_lock);
> > EXPORT_SYMBOL_GPL(vsock_table_lock);
> >+static struct list_head vsock_dgram_bind_table[VSOCK_HASH_SIZE + 1];
> >+static DEFINE_SPINLOCK(vsock_dgram_table_lock);
> >
> > /* Autobind this socket to the local address if necessary. */
> > static int vsock_auto_bind(struct vsock_sock *vsk)
> >@@ -204,6 +216,9 @@ static void vsock_init_tables(void)
> >
> >       for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
> >               INIT_LIST_HEAD(&vsock_connected_table[i]);
> >+
> >+      for (i = 0; i < ARRAY_SIZE(vsock_dgram_bind_table); i++)
> >+              INIT_LIST_HEAD(&vsock_dgram_bind_table[i]);
> > }
> >
> > static void __vsock_insert_bound(struct list_head *list,
> >@@ -271,13 +286,28 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
> >       return NULL;
> > }
> >
> >-static void vsock_insert_unbound(struct vsock_sock *vsk)
> >+static void __vsock_insert_dgram_unbound(struct vsock_sock *vsk)
> >+{
> >+      spin_lock_bh(&vsock_dgram_table_lock);
> >+      __vsock_insert_bound(vsock_unbound_dgram_sockets, vsk);
> >+      spin_unlock_bh(&vsock_dgram_table_lock);
> >+}
> >+
> >+static void __vsock_insert_connectible_unbound(struct vsock_sock *vsk)
> > {
> >       spin_lock_bh(&vsock_table_lock);
> >       __vsock_insert_bound(vsock_unbound_sockets, vsk);
> >       spin_unlock_bh(&vsock_table_lock);
> > }
> >
> >+static void vsock_insert_unbound(struct vsock_sock *vsk)
> >+{
> >+      if (sock_type_connectible(sk_vsock(vsk)->sk_type))
> >+              __vsock_insert_connectible_unbound(vsk);
> >+      else
> >+              __vsock_insert_dgram_unbound(vsk);
> >+}
> >+
> > void vsock_insert_connected(struct vsock_sock *vsk)
> > {
> >       struct list_head *list = vsock_connected_sockets(
> >@@ -289,6 +319,14 @@ void vsock_insert_connected(struct vsock_sock *vsk)
> > }
> > EXPORT_SYMBOL_GPL(vsock_insert_connected);
> >
> >+static void vsock_remove_dgram_bound(struct vsock_sock *vsk)
> >+{
> >+      spin_lock_bh(&vsock_dgram_table_lock);
> >+      if (__vsock_in_bound_table(vsk))
> >+              __vsock_remove_bound(vsk);
> >+      spin_unlock_bh(&vsock_dgram_table_lock);
> >+}
> >+
> > void vsock_remove_bound(struct vsock_sock *vsk)
> > {
> >       spin_lock_bh(&vsock_table_lock);
> >@@ -340,7 +378,10 @@ EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
> >
> > void vsock_remove_sock(struct vsock_sock *vsk)
> > {
> >-      vsock_remove_bound(vsk);
> >+      if (sock_type_connectible(sk_vsock(vsk)->sk_type))
> >+              vsock_remove_bound(vsk);
> >+      else
> >+              vsock_remove_dgram_bound(vsk);
>
> Can we try to be consistent, for example we have vsock_insert_unbound()
> which calls internally sock_type_connectible(), while
> vsock_remove_bound() is just for connectible sockets. It's a bit
> confusing.

I agree with you. I will make the style more consistent by keeping
vsock_insert_unbound() only work on connectible sockets.

>
> >       vsock_remove_connected(vsk);
> > }
> > EXPORT_SYMBOL_GPL(vsock_remove_sock);
> >@@ -746,11 +787,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
> >       return vsock_bind_common(vsk, addr, vsock_bind_table, VSOCK_HASH_SIZE + 1);
> > }
> >
> >-static int __vsock_bind_dgram(struct vsock_sock *vsk,
> >-                            struct sockaddr_vm *addr)
> >+static int vsock_bind_dgram(struct vsock_sock *vsk,
> >+                          struct sockaddr_vm *addr)
>
> Why we are renaming this?

I will keep the original __vsock_bind_dgram() for consistency.

>
> > {
> >-      if (!vsk->transport || !vsk->transport->dgram_bind)
> >-              return -EINVAL;
> >+      if (!vsk->transport || !vsk->transport->dgram_bind) {
>
> Why this condition?
>
> Maybe a comment here is needed because I'm lost...

We currently use !vsk->transport->dgram_bind to determine if this is
VMCI dgram transport. Will add a comment explaining this.

>
> >+              int retval;
> >+
> >+              spin_lock_bh(&vsock_dgram_table_lock);
> >+              retval = vsock_bind_common(vsk, addr, vsock_dgram_bind_table,
> >+                                         VSOCK_HASH_SIZE);
>
> Should we use VSOCK_HASH_SIZE + 1 here?
>
> Using ARRAY_SIZE(x) should avoid this problem.

Yes. The size here is wrong. I will remove the size check (the
discussion is in patch 4).

Thanks,
Amery



>
>
> >+              spin_unlock_bh(&vsock_dgram_table_lock);
> >+
> >+              return retval;
> >+      }
> >
> >       return vsk->transport->dgram_bind(vsk, addr);
> > }
> >@@ -781,7 +830,7 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
> >               break;
> >
> >       case SOCK_DGRAM:
> >-              retval = __vsock_bind_dgram(vsk, addr);
> >+              retval = vsock_bind_dgram(vsk, addr);
> >               break;
> >
> >       default:
> >--
> >2.20.1
> >
>
Stefano Garzarella July 30, 2024, 8:05 a.m. UTC | #3
On Sun, Jul 28, 2024 at 02:37:24PM GMT, Amery Hung wrote:
>On Tue, Jul 23, 2024 at 7:41 AM Stefano Garzarella <sgarzare@redhat.com> wrote:
>>
>> On Wed, Jul 10, 2024 at 09:25:46PM GMT, Amery Hung wrote:
>> >From: Bobby Eshleman <bobby.eshleman@bytedance.com>
>> >
>> >This commit adds support for bound dgram sockets to be tracked in a
>> >separate bind table from connectible sockets in order to avoid address
>> >collisions. With this commit, users can simultaneously bind a dgram
>> >socket and connectible socket to the same CID and port.
>> >
>> >Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
>> >---
>> > net/vmw_vsock/af_vsock.c | 103 +++++++++++++++++++++++++++++----------
>> > 1 file changed, 76 insertions(+), 27 deletions(-)
>> >
>> >diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
>> >index d571be9cdbf0..ab08cd81720e 100644
>> >--- a/net/vmw_vsock/af_vsock.c
>> >+++ b/net/vmw_vsock/af_vsock.c
>> >@@ -10,18 +10,23 @@
>> >  * - There are two kinds of sockets: those created by user action (such as
>> >  * calling socket(2)) and those created by incoming connection request packets.
>> >  *
>> >- * - There are two "global" tables, one for bound sockets (sockets that have
>> >- * specified an address that they are responsible for) and one for connected
>> >- * sockets (sockets that have established a connection with another socket).
>> >- * These tables are "global" in that all sockets on the system are placed
>> >- * within them. - Note, though, that the bound table contains an extra entry
>> >- * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in
>> >- * that list. The bound table is used solely for lookup of sockets when packets
>> >- * are received and that's not necessary for SOCK_DGRAM sockets since we create
>> >- * a datagram handle for each and need not perform a lookup.  Keeping SOCK_DGRAM
>> >- * sockets out of the bound hash buckets will reduce the chance of collisions
>> >- * when looking for SOCK_STREAM sockets and prevents us from having to check the
>> >- * socket type in the hash table lookups.
>> >+ * - There are three "global" tables, one for bound connectible (stream /
>> >+ * seqpacket) sockets, one for bound datagram sockets, and one for connected
>> >+ * sockets. Bound sockets are sockets that have specified an address that
>> >+ * they are responsible for. Connected sockets are sockets that have
>> >+ * established a connection with another socket. These tables are "global" in
>> >+ * that all sockets on the system are placed within them. - Note, though,
>> >+ * that the bound tables contain an extra entry for a list of unbound
>> >+ * sockets. The bound tables are used solely for lookup of sockets when packets
>> >+ * are received.
>> >+ *
>> >+ * - There are separate bind tables for connectible and datagram sockets to avoid
>> >+ * address collisions between stream/seqpacket sockets and datagram sockets.
>> >+ *
>> >+ * - Transports may elect to NOT use the global datagram bind table by
>> >+ * implementing the ->dgram_bind() callback. If that callback is implemented,
>> >+ * the global bind table is not used and the responsibility of bound datagram
>> >+ * socket tracking is deferred to the transport.
>> >  *
>> >  * - Sockets created by user action will either be "client" sockets that
>> >  * initiate a connection or "server" sockets that listen for connections; we do
>> >@@ -116,6 +121,7 @@
>> > static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
>> > static void vsock_sk_destruct(struct sock *sk);
>> > static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
>> >+static bool sock_type_connectible(u16 type);
>> >
>> > /* Protocol family. */
>> > struct proto vsock_proto = {
>> >@@ -152,21 +158,25 @@ static DEFINE_MUTEX(vsock_register_mutex);
>> >  * VSocket is stored in the connected hash table.
>> >  *
>> >  * Unbound sockets are all put on the same list attached to the end of the hash
>> >- * table (vsock_unbound_sockets).  Bound sockets are added to the hash table in
>> >- * the bucket that their local address hashes to (vsock_bound_sockets(addr)
>> >- * represents the list that addr hashes to).
>> >+ * tables (vsock_unbound_sockets/vsock_unbound_dgram_sockets).  Bound sockets
>> >+ * are added to the hash table in the bucket that their local address hashes to
>> >+ * (vsock_bound_sockets(addr) and vsock_bound_dgram_sockets(addr) represents
>> >+ * the list that addr hashes to).
>> >  *
>> >- * Specifically, we initialize the vsock_bind_table array to a size of
>> >- * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through
>> >- * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and
>> >- * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets.  The hash function
>> >- * mods with VSOCK_HASH_SIZE to ensure this.
>> >+ * Specifically, taking connectible sockets as an example we initialize the
>> >+ * vsock_bind_table array to a size of VSOCK_HASH_SIZE + 1 so that
>> >+ * vsock_bind_table[0] through vsock_bind_table[VSOCK_HASH_SIZE - 1] are for
>> >+ * bound sockets and vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets.
>> >+ * The hash function mods with VSOCK_HASH_SIZE to ensure this.
>> >+ * Datagrams and vsock_dgram_bind_table operate in the same way.
>> >  */
>> > #define MAX_PORT_RETRIES        24
>> >
>> > #define VSOCK_HASH(addr)        ((addr)->svm_port % VSOCK_HASH_SIZE)
>> > #define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)])
>> >+#define vsock_bound_dgram_sockets(addr) (&vsock_dgram_bind_table[VSOCK_HASH(addr)])
>> > #define vsock_unbound_sockets     (&vsock_bind_table[VSOCK_HASH_SIZE])
>> >+#define vsock_unbound_dgram_sockets     (&vsock_dgram_bind_table[VSOCK_HASH_SIZE])
>> >
>> > /* XXX This can probably be implemented in a better way. */
>> > #define VSOCK_CONN_HASH(src, dst)                             \
>> >@@ -182,6 +192,8 @@ struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
>> > EXPORT_SYMBOL_GPL(vsock_connected_table);
>> > DEFINE_SPINLOCK(vsock_table_lock);
>> > EXPORT_SYMBOL_GPL(vsock_table_lock);
>> >+static struct list_head vsock_dgram_bind_table[VSOCK_HASH_SIZE + 1];
>> >+static DEFINE_SPINLOCK(vsock_dgram_table_lock);
>> >
>> > /* Autobind this socket to the local address if necessary. */
>> > static int vsock_auto_bind(struct vsock_sock *vsk)
>> >@@ -204,6 +216,9 @@ static void vsock_init_tables(void)
>> >
>> >       for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
>> >               INIT_LIST_HEAD(&vsock_connected_table[i]);
>> >+
>> >+      for (i = 0; i < ARRAY_SIZE(vsock_dgram_bind_table); i++)
>> >+              INIT_LIST_HEAD(&vsock_dgram_bind_table[i]);
>> > }
>> >
>> > static void __vsock_insert_bound(struct list_head *list,
>> >@@ -271,13 +286,28 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
>> >       return NULL;
>> > }
>> >
>> >-static void vsock_insert_unbound(struct vsock_sock *vsk)
>> >+static void __vsock_insert_dgram_unbound(struct vsock_sock *vsk)
>> >+{
>> >+      spin_lock_bh(&vsock_dgram_table_lock);
>> >+      __vsock_insert_bound(vsock_unbound_dgram_sockets, vsk);
>> >+      spin_unlock_bh(&vsock_dgram_table_lock);
>> >+}
>> >+
>> >+static void __vsock_insert_connectible_unbound(struct vsock_sock *vsk)
>> > {
>> >       spin_lock_bh(&vsock_table_lock);
>> >       __vsock_insert_bound(vsock_unbound_sockets, vsk);
>> >       spin_unlock_bh(&vsock_table_lock);
>> > }
>> >
>> >+static void vsock_insert_unbound(struct vsock_sock *vsk)
>> >+{
>> >+      if (sock_type_connectible(sk_vsock(vsk)->sk_type))
>> >+              __vsock_insert_connectible_unbound(vsk);
>> >+      else
>> >+              __vsock_insert_dgram_unbound(vsk);
>> >+}
>> >+
>> > void vsock_insert_connected(struct vsock_sock *vsk)
>> > {
>> >       struct list_head *list = vsock_connected_sockets(
>> >@@ -289,6 +319,14 @@ void vsock_insert_connected(struct vsock_sock *vsk)
>> > }
>> > EXPORT_SYMBOL_GPL(vsock_insert_connected);
>> >
>> >+static void vsock_remove_dgram_bound(struct vsock_sock *vsk)
>> >+{
>> >+      spin_lock_bh(&vsock_dgram_table_lock);
>> >+      if (__vsock_in_bound_table(vsk))
>> >+              __vsock_remove_bound(vsk);
>> >+      spin_unlock_bh(&vsock_dgram_table_lock);
>> >+}
>> >+
>> > void vsock_remove_bound(struct vsock_sock *vsk)
>> > {
>> >       spin_lock_bh(&vsock_table_lock);
>> >@@ -340,7 +378,10 @@ EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
>> >
>> > void vsock_remove_sock(struct vsock_sock *vsk)
>> > {
>> >-      vsock_remove_bound(vsk);
>> >+      if (sock_type_connectible(sk_vsock(vsk)->sk_type))
>> >+              vsock_remove_bound(vsk);
>> >+      else
>> >+              vsock_remove_dgram_bound(vsk);
>>
>> Can we try to be consistent, for example we have vsock_insert_unbound()
>> which calls internally sock_type_connectible(), while
>> vsock_remove_bound() is just for connectible sockets. It's a bit
>> confusing.
>
>I agree with you. I will make the style more consistent by keeping
>vsock_insert_unbound() only work on connectible sockets.

Maybe I would have done the opposite, making vsock_remove_bound() usable 
on all sockets. But I haven't really looked at whether that's feasible 
or not.

>
>>
>> >       vsock_remove_connected(vsk);
>> > }
>> > EXPORT_SYMBOL_GPL(vsock_remove_sock);
>> >@@ -746,11 +787,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk,
>> >       return vsock_bind_common(vsk, addr, vsock_bind_table, VSOCK_HASH_SIZE + 1);
>> > }
>> >
>> >-static int __vsock_bind_dgram(struct vsock_sock *vsk,
>> >-                            struct sockaddr_vm *addr)
>> >+static int vsock_bind_dgram(struct vsock_sock *vsk,
>> >+                          struct sockaddr_vm *addr)
>>
>> Why we are renaming this?
>
>I will keep the original __vsock_bind_dgram() for consistency.
>
>>
>> > {
>> >-      if (!vsk->transport || !vsk->transport->dgram_bind)
>> >-              return -EINVAL;
>> >+      if (!vsk->transport || !vsk->transport->dgram_bind) {
>>
>> Why this condition?
>>
>> Maybe a comment here is needed because I'm lost...
>
>We currently use !vsk->transport->dgram_bind to determine if this is
>VMCI dgram transport. Will add a comment explaining this.

Thanks, what's not clear to me is why before this series we returned an 
error, whereas now we call vsock_bind_common().

Thanks,
Stefano

>
>>
>> >+              int retval;
>> >+
>> >+              spin_lock_bh(&vsock_dgram_table_lock);
>> >+              retval = vsock_bind_common(vsk, addr, vsock_dgram_bind_table,
>> >+                                         VSOCK_HASH_SIZE);
>>
>> Should we use VSOCK_HASH_SIZE + 1 here?
>>
>> Using ARRAY_SIZE(x) should avoid this problem.
>
>Yes. The size here is wrong. I will remove the size check (the
>discussion is in patch 4).
>
>Thanks,
>Amery
>
>
>
>>
>>
>> >+              spin_unlock_bh(&vsock_dgram_table_lock);
>> >+
>> >+              return retval;
>> >+      }
>> >
>> >       return vsk->transport->dgram_bind(vsk, addr);
>> > }
>> >@@ -781,7 +830,7 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
>> >               break;
>> >
>> >       case SOCK_DGRAM:
>> >-              retval = __vsock_bind_dgram(vsk, addr);
>> >+              retval = vsock_bind_dgram(vsk, addr);
>> >               break;
>> >
>> >       default:
>> >--
>> >2.20.1
>> >
>>
>
diff mbox series

Patch

diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index d571be9cdbf0..ab08cd81720e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -10,18 +10,23 @@ 
  * - There are two kinds of sockets: those created by user action (such as
  * calling socket(2)) and those created by incoming connection request packets.
  *
- * - There are two "global" tables, one for bound sockets (sockets that have
- * specified an address that they are responsible for) and one for connected
- * sockets (sockets that have established a connection with another socket).
- * These tables are "global" in that all sockets on the system are placed
- * within them. - Note, though, that the bound table contains an extra entry
- * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in
- * that list. The bound table is used solely for lookup of sockets when packets
- * are received and that's not necessary for SOCK_DGRAM sockets since we create
- * a datagram handle for each and need not perform a lookup.  Keeping SOCK_DGRAM
- * sockets out of the bound hash buckets will reduce the chance of collisions
- * when looking for SOCK_STREAM sockets and prevents us from having to check the
- * socket type in the hash table lookups.
+ * - There are three "global" tables, one for bound connectible (stream /
+ * seqpacket) sockets, one for bound datagram sockets, and one for connected
+ * sockets. Bound sockets are sockets that have specified an address that
+ * they are responsible for. Connected sockets are sockets that have
+ * established a connection with another socket. These tables are "global" in
+ * that all sockets on the system are placed within them. - Note, though,
+ * that the bound tables contain an extra entry for a list of unbound
+ * sockets. The bound tables are used solely for lookup of sockets when packets
+ * are received.
+ *
+ * - There are separate bind tables for connectible and datagram sockets to avoid
+ * address collisions between stream/seqpacket sockets and datagram sockets.
+ *
+ * - Transports may elect to NOT use the global datagram bind table by
+ * implementing the ->dgram_bind() callback. If that callback is implemented,
+ * the global bind table is not used and the responsibility of bound datagram
+ * socket tracking is deferred to the transport.
  *
  * - Sockets created by user action will either be "client" sockets that
  * initiate a connection or "server" sockets that listen for connections; we do
@@ -116,6 +121,7 @@ 
 static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
 static void vsock_sk_destruct(struct sock *sk);
 static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
+static bool sock_type_connectible(u16 type);
 
 /* Protocol family. */
 struct proto vsock_proto = {
@@ -152,21 +158,25 @@  static DEFINE_MUTEX(vsock_register_mutex);
  * VSocket is stored in the connected hash table.
  *
  * Unbound sockets are all put on the same list attached to the end of the hash
- * table (vsock_unbound_sockets).  Bound sockets are added to the hash table in
- * the bucket that their local address hashes to (vsock_bound_sockets(addr)
- * represents the list that addr hashes to).
+ * tables (vsock_unbound_sockets/vsock_unbound_dgram_sockets).  Bound sockets
+ * are added to the hash table in the bucket that their local address hashes to
+ * (vsock_bound_sockets(addr) and vsock_bound_dgram_sockets(addr) represents
+ * the list that addr hashes to).
  *
- * Specifically, we initialize the vsock_bind_table array to a size of
- * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through
- * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and
- * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets.  The hash function
- * mods with VSOCK_HASH_SIZE to ensure this.
+ * Specifically, taking connectible sockets as an example we initialize the
+ * vsock_bind_table array to a size of VSOCK_HASH_SIZE + 1 so that
+ * vsock_bind_table[0] through vsock_bind_table[VSOCK_HASH_SIZE - 1] are for
+ * bound sockets and vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets.
+ * The hash function mods with VSOCK_HASH_SIZE to ensure this.
+ * Datagrams and vsock_dgram_bind_table operate in the same way.
  */
 #define MAX_PORT_RETRIES        24
 
 #define VSOCK_HASH(addr)        ((addr)->svm_port % VSOCK_HASH_SIZE)
 #define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)])
+#define vsock_bound_dgram_sockets(addr) (&vsock_dgram_bind_table[VSOCK_HASH(addr)])
 #define vsock_unbound_sockets     (&vsock_bind_table[VSOCK_HASH_SIZE])
+#define vsock_unbound_dgram_sockets     (&vsock_dgram_bind_table[VSOCK_HASH_SIZE])
 
 /* XXX This can probably be implemented in a better way. */
 #define VSOCK_CONN_HASH(src, dst)				\
@@ -182,6 +192,8 @@  struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
 EXPORT_SYMBOL_GPL(vsock_connected_table);
 DEFINE_SPINLOCK(vsock_table_lock);
 EXPORT_SYMBOL_GPL(vsock_table_lock);
+static struct list_head vsock_dgram_bind_table[VSOCK_HASH_SIZE + 1];
+static DEFINE_SPINLOCK(vsock_dgram_table_lock);
 
 /* Autobind this socket to the local address if necessary. */
 static int vsock_auto_bind(struct vsock_sock *vsk)
@@ -204,6 +216,9 @@  static void vsock_init_tables(void)
 
 	for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
 		INIT_LIST_HEAD(&vsock_connected_table[i]);
+
+	for (i = 0; i < ARRAY_SIZE(vsock_dgram_bind_table); i++)
+		INIT_LIST_HEAD(&vsock_dgram_bind_table[i]);
 }
 
 static void __vsock_insert_bound(struct list_head *list,
@@ -271,13 +286,28 @@  static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
 	return NULL;
 }
 
-static void vsock_insert_unbound(struct vsock_sock *vsk)
+static void __vsock_insert_dgram_unbound(struct vsock_sock *vsk)
+{
+	spin_lock_bh(&vsock_dgram_table_lock);
+	__vsock_insert_bound(vsock_unbound_dgram_sockets, vsk);
+	spin_unlock_bh(&vsock_dgram_table_lock);
+}
+
+static void __vsock_insert_connectible_unbound(struct vsock_sock *vsk)
 {
 	spin_lock_bh(&vsock_table_lock);
 	__vsock_insert_bound(vsock_unbound_sockets, vsk);
 	spin_unlock_bh(&vsock_table_lock);
 }
 
+static void vsock_insert_unbound(struct vsock_sock *vsk)
+{
+	if (sock_type_connectible(sk_vsock(vsk)->sk_type))
+		__vsock_insert_connectible_unbound(vsk);
+	else
+		__vsock_insert_dgram_unbound(vsk);
+}
+
 void vsock_insert_connected(struct vsock_sock *vsk)
 {
 	struct list_head *list = vsock_connected_sockets(
@@ -289,6 +319,14 @@  void vsock_insert_connected(struct vsock_sock *vsk)
 }
 EXPORT_SYMBOL_GPL(vsock_insert_connected);
 
+static void vsock_remove_dgram_bound(struct vsock_sock *vsk)
+{
+	spin_lock_bh(&vsock_dgram_table_lock);
+	if (__vsock_in_bound_table(vsk))
+		__vsock_remove_bound(vsk);
+	spin_unlock_bh(&vsock_dgram_table_lock);
+}
+
 void vsock_remove_bound(struct vsock_sock *vsk)
 {
 	spin_lock_bh(&vsock_table_lock);
@@ -340,7 +378,10 @@  EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
 
 void vsock_remove_sock(struct vsock_sock *vsk)
 {
-	vsock_remove_bound(vsk);
+	if (sock_type_connectible(sk_vsock(vsk)->sk_type))
+		vsock_remove_bound(vsk);
+	else
+		vsock_remove_dgram_bound(vsk);
 	vsock_remove_connected(vsk);
 }
 EXPORT_SYMBOL_GPL(vsock_remove_sock);
@@ -746,11 +787,19 @@  static int __vsock_bind_connectible(struct vsock_sock *vsk,
 	return vsock_bind_common(vsk, addr, vsock_bind_table, VSOCK_HASH_SIZE + 1);
 }
 
-static int __vsock_bind_dgram(struct vsock_sock *vsk,
-			      struct sockaddr_vm *addr)
+static int vsock_bind_dgram(struct vsock_sock *vsk,
+			    struct sockaddr_vm *addr)
 {
-	if (!vsk->transport || !vsk->transport->dgram_bind)
-		return -EINVAL;
+	if (!vsk->transport || !vsk->transport->dgram_bind) {
+		int retval;
+
+		spin_lock_bh(&vsock_dgram_table_lock);
+		retval = vsock_bind_common(vsk, addr, vsock_dgram_bind_table,
+					   VSOCK_HASH_SIZE);
+		spin_unlock_bh(&vsock_dgram_table_lock);
+
+		return retval;
+	}
 
 	return vsk->transport->dgram_bind(vsk, addr);
 }
@@ -781,7 +830,7 @@  static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
 		break;
 
 	case SOCK_DGRAM:
-		retval = __vsock_bind_dgram(vsk, addr);
+		retval = vsock_bind_dgram(vsk, addr);
 		break;
 
 	default: