diff mbox series

[15/17] nvmet-tcp: enable TLS handshake upcall

Message ID 20230814111943.68325-16-hare@suse.de (mailing list archive)
State Superseded
Headers show
Series nvme: In-kernel TLS support for TCP | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch, async

Commit Message

Hannes Reinecke Aug. 14, 2023, 11:19 a.m. UTC
Add functions to start the TLS handshake upcall when
the TCP TSAS sectype is set to 'tls1.3' and add a config
option NVME_TARGET_TCP_TLS.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/nvme/target/Kconfig    |  15 ++++
 drivers/nvme/target/configfs.c |  21 +++++
 drivers/nvme/target/nvmet.h    |   1 +
 drivers/nvme/target/tcp.c      | 146 ++++++++++++++++++++++++++++++++-
 4 files changed, 179 insertions(+), 4 deletions(-)

Comments

Sagi Grimberg Aug. 14, 2023, 12:48 p.m. UTC | #1
On 8/14/23 14:19, Hannes Reinecke wrote:
> Add functions to start the TLS handshake upcall when
> the TCP TSAS sectype is set to 'tls1.3' and add a config
> option NVME_TARGET_TCP_TLS.

Need to document the refcount added.
Also the general design with upcalling tls handshake in
userspace and continue from there...

> 
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
>   drivers/nvme/target/Kconfig    |  15 ++++
>   drivers/nvme/target/configfs.c |  21 +++++
>   drivers/nvme/target/nvmet.h    |   1 +
>   drivers/nvme/target/tcp.c      | 146 ++++++++++++++++++++++++++++++++-
>   4 files changed, 179 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
> index 79fc64035ee3..8a6c9cae804c 100644
> --- a/drivers/nvme/target/Kconfig
> +++ b/drivers/nvme/target/Kconfig
> @@ -84,6 +84,21 @@ config NVME_TARGET_TCP
>   
>   	  If unsure, say N.
>   
> +config NVME_TARGET_TCP_TLS
> +	bool "NVMe over Fabrics TCP target TLS encryption support"
> +	depends on NVME_TARGET_TCP
> +	select NVME_COMMON
> +	select NVME_KEYRING
> +	select NET_HANDSHAKE
> +	select KEYS
> +	help
> +	  Enables TLS encryption for the NVMe TCP target using the netlink handshake API.
> +
> +	  The TLS handshake daemon is availble at
> +	  https://github.com/oracle/ktls-utils.
> +
> +	  If unsure, say N.
> +
>   config NVME_TARGET_AUTH
>   	bool "NVMe over Fabrics In-band Authentication support"
>   	depends on NVME_TARGET
> diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
> index efbfed310370..ad1fb32c7387 100644
> --- a/drivers/nvme/target/configfs.c
> +++ b/drivers/nvme/target/configfs.c
> @@ -15,6 +15,7 @@
>   #ifdef CONFIG_NVME_TARGET_AUTH
>   #include <linux/nvme-auth.h>
>   #endif
> +#include <linux/nvme-keyring.h>
>   #include <crypto/hash.h>
>   #include <crypto/kpp.h>
>   
> @@ -397,6 +398,17 @@ static ssize_t nvmet_addr_tsas_store(struct config_item *item,
>   	return -EINVAL;
>   
>   found:
> +	if (sectype == NVMF_TCP_SECTYPE_TLS13) {
> +		if (!IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS)) {
> +			pr_err("TLS is not supported\n");
> +			return -EINVAL;
> +		}
> +		if (!port->keyring) {
> +			pr_err("TLS keyring not configured\n");
> +			return -EINVAL;
> +		}
> +	}
> +
>   	nvmet_port_init_tsas_tcp(port, sectype);
>   	/*
>   	 * The TLS implementation currently does not support
> @@ -1815,6 +1827,7 @@ static void nvmet_port_release(struct config_item *item)
>   	flush_workqueue(nvmet_wq);
>   	list_del(&port->global_entry);
>   
> +	key_put(port->keyring);
>   	kfree(port->ana_state);
>   	kfree(port);
>   }
> @@ -1864,6 +1877,14 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
>   		return ERR_PTR(-ENOMEM);
>   	}
>   
> +	if (nvme_keyring_id()) {
> +		port->keyring = key_lookup(nvme_keyring_id());
> +		if (IS_ERR(port->keyring)) {
> +			pr_warn("NVMe keyring not available, disabling TLS\n");
> +			port->keyring = NULL;

why setting this to NULL?

> +		}
> +	}
> +
>   	for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) {
>   		if (i == NVMET_DEFAULT_ANA_GRPID)
>   			port->ana_state[1] = NVME_ANA_OPTIMIZED;
> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
> index 8cfd60f3b564..7f9ae53c1df5 100644
> --- a/drivers/nvme/target/nvmet.h
> +++ b/drivers/nvme/target/nvmet.h
> @@ -158,6 +158,7 @@ struct nvmet_port {
>   	struct config_group		ana_groups_group;
>   	struct nvmet_ana_group		ana_default_group;
>   	enum nvme_ana_state		*ana_state;
> +	struct key			*keyring;
>   	void				*priv;
>   	bool				enabled;
>   	int				inline_data_size;
> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
> index f19ea9d923fd..77fa339008e1 100644
> --- a/drivers/nvme/target/tcp.c
> +++ b/drivers/nvme/target/tcp.c
> @@ -8,9 +8,13 @@
>   #include <linux/init.h>
>   #include <linux/slab.h>
>   #include <linux/err.h>
> +#include <linux/key.h>
>   #include <linux/nvme-tcp.h>
> +#include <linux/nvme-keyring.h>
>   #include <net/sock.h>
>   #include <net/tcp.h>
> +#include <net/tls.h>
> +#include <net/handshake.h>
>   #include <linux/inet.h>
>   #include <linux/llist.h>
>   #include <crypto/hash.h>
> @@ -66,6 +70,16 @@ device_param_cb(idle_poll_period_usecs, &set_param_ops,
>   MODULE_PARM_DESC(idle_poll_period_usecs,
>   		"nvmet tcp io_work poll till idle time period in usecs: Default 0");
>   
> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
> +/*
> + * TLS handshake timeout
> + */
> +static int tls_handshake_timeout = 10;
> +module_param(tls_handshake_timeout, int, 0644);
> +MODULE_PARM_DESC(tls_handshake_timeout,
> +		 "nvme TLS handshake timeout in seconds (default 10)");
> +#endif
> +
>   #define NVMET_TCP_RECV_BUDGET		8
>   #define NVMET_TCP_SEND_BUDGET		8
>   #define NVMET_TCP_IO_WORK_BUDGET	64
> @@ -122,11 +136,13 @@ struct nvmet_tcp_cmd {
>   
>   enum nvmet_tcp_queue_state {
>   	NVMET_TCP_Q_CONNECTING,
> +	NVMET_TCP_Q_TLS_HANDSHAKE,
>   	NVMET_TCP_Q_LIVE,
>   	NVMET_TCP_Q_DISCONNECTING,
>   };
>   
>   struct nvmet_tcp_queue {
> +	struct kref		kref;

Why is kref the first member of the struct?

>   	struct socket		*sock;
>   	struct nvmet_tcp_port	*port;
>   	struct work_struct	io_work;
> @@ -155,6 +171,10 @@ struct nvmet_tcp_queue {
>   	struct ahash_request	*snd_hash;
>   	struct ahash_request	*rcv_hash;
>   
> +	/* TLS state */
> +	key_serial_t		tls_pskid;
> +	struct delayed_work	tls_handshake_work;
> +
>   	unsigned long           poll_end;
>   
>   	spinlock_t		state_lock;
> @@ -1283,12 +1303,21 @@ static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue,
>   	return ret;
>   }
>   
> +static void nvmet_tcp_release_queue(struct kref *kref)
> +{
> +	struct nvmet_tcp_queue *queue =
> +		container_of(kref, struct nvmet_tcp_queue, kref);
> +
> +	WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING);
> +	queue_work(nvmet_wq, &queue->release_work);
> +}
> +
>   static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue)
>   {
>   	spin_lock_bh(&queue->state_lock);
>   	if (queue->state != NVMET_TCP_Q_DISCONNECTING) {
>   		queue->state = NVMET_TCP_Q_DISCONNECTING;
> -		queue_work(nvmet_wq, &queue->release_work);
> +		kref_put(&queue->kref, nvmet_tcp_release_queue);
>   	}
>   	spin_unlock_bh(&queue->state_lock);
>   }
> @@ -1485,6 +1514,8 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w)
>   	mutex_unlock(&nvmet_tcp_queue_mutex);
>   
>   	nvmet_tcp_restore_socket_callbacks(queue);
> +	tls_handshake_cancel(queue->sock->sk);
> +	cancel_delayed_work_sync(&queue->tls_handshake_work);

We should call it tls_handshake_tmo_work or something to make it
clear it is a timeout work.

>   	cancel_work_sync(&queue->io_work);
>   	/* stop accepting incoming data */
>   	queue->rcv_state = NVMET_TCP_RECV_ERR;
> @@ -1512,8 +1543,13 @@ static void nvmet_tcp_data_ready(struct sock *sk)
>   
>   	read_lock_bh(&sk->sk_callback_lock);
>   	queue = sk->sk_user_data;
> -	if (likely(queue))
> -		queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
> +	if (likely(queue)) {
> +		if (queue->data_ready)
> +			queue->data_ready(sk);
> +		if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE)
> +			queue_work_on(queue_cpu(queue), nvmet_tcp_wq,
> +				      &queue->io_work);
> +	}
>   	read_unlock_bh(&sk->sk_callback_lock);
>   }
>   
> @@ -1621,6 +1657,83 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
>   	return ret;
>   }
>   
> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
> +static void nvmet_tcp_tls_handshake_done(void *data, int status,
> +					 key_serial_t peerid)
> +{
> +	struct nvmet_tcp_queue *queue = data;
> +
> +	pr_debug("queue %d: TLS handshake done, key %x, status %d\n",
> +		 queue->idx, peerid, status);
> +	spin_lock_bh(&queue->state_lock);
> +	if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) {

Is this even possible?

> +		pr_warn("queue %d: TLS handshake already completed\n",
> +			queue->idx);
> +		spin_unlock_bh(&queue->state_lock);
> +		kref_put(&queue->kref, nvmet_tcp_release_queue);

How can we get here?

> +		return;
> +	}
> +	if (!status)
> +		queue->tls_pskid = peerid;
> +	queue->state = NVMET_TCP_Q_CONNECTING;
> +	spin_unlock_bh(&queue->state_lock);
> +
> +	cancel_delayed_work_sync(&queue->tls_handshake_work);
> +	if (status) {

Wait, did we assign the sk_state_change in this stage? What will
sock shutdown trigger?

> +		kernel_sock_shutdown(queue->sock, SHUT_RDWR);

Probably the put can be moved to a out: label in the end.

> +		kref_put(&queue->kref, nvmet_tcp_release_queue);
> +		return;
> +	}
> +
> +	pr_debug("queue %d: resetting queue callbacks after TLS handshake\n",
> +		 queue->idx);
> +	nvmet_tcp_set_queue_sock(queue);
> +	kref_put(&queue->kref, nvmet_tcp_release_queue);
> +}
> +
> +static void nvmet_tcp_tls_handshake_timeout_work(struct work_struct *w)
> +{
> +	struct nvmet_tcp_queue *queue = container_of(to_delayed_work(w),
> +			struct nvmet_tcp_queue, tls_handshake_work);
> +
> +	pr_debug("queue %d: TLS handshake timeout\n", queue->idx);

Probably its better to make this pr_warn...

> +	if (!tls_handshake_cancel(queue->sock->sk))
> +		return;
> +	kernel_sock_shutdown(queue->sock, SHUT_RDWR);

Same question here, did we assign sk_state_change yet?

> +	kref_put(&queue->kref, nvmet_tcp_release_queue);
> +}
> +
> +static int nvmet_tcp_tls_handshake(struct nvmet_tcp_queue *queue)
> +{
> +	int ret = -EOPNOTSUPP;
> +	struct tls_handshake_args args;
> +
> +	if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) {
> +		pr_warn("cannot start TLS in state %d\n", queue->state);
> +		return -EINVAL;
> +	}
> +
> +	kref_get(&queue->kref);
> +	pr_debug("queue %d: TLS ServerHello\n", queue->idx);
> +	memset(&args, 0, sizeof(args));
> +	args.ta_sock = queue->sock;
> +	args.ta_done = nvmet_tcp_tls_handshake_done;
> +	args.ta_data = queue;
> +	args.ta_keyring = key_serial(queue->port->nport->keyring);
> +	args.ta_timeout_ms = tls_handshake_timeout * 1000;
> +
> +	ret = tls_server_hello_psk(&args, GFP_KERNEL);
> +	if (ret) {
> +		kref_put(&queue->kref, nvmet_tcp_release_queue);
> +		pr_err("failed to start TLS, err=%d\n", ret);
> +	} else {
> +		queue_delayed_work(nvmet_wq, &queue->tls_handshake_work,
> +				   tls_handshake_timeout * HZ);
> +	}
> +	return ret;
> +}
> +#endif
> +
>   static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
>   		struct socket *newsock)
>   {
> @@ -1636,11 +1749,16 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
>   
>   	INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work);
>   	INIT_WORK(&queue->io_work, nvmet_tcp_io_work);
> +	kref_init(&queue->kref);
>   	queue->sock = newsock;
>   	queue->port = port;
>   	queue->nr_cmds = 0;
>   	spin_lock_init(&queue->state_lock);
> -	queue->state = NVMET_TCP_Q_CONNECTING;
> +	if (queue->port->nport->disc_addr.tsas.tcp.sectype ==
> +	    NVMF_TCP_SECTYPE_TLS13)
> +		queue->state = NVMET_TCP_Q_TLS_HANDSHAKE;
> +	else
> +		queue->state = NVMET_TCP_Q_CONNECTING;
>   	INIT_LIST_HEAD(&queue->free_list);
>   	init_llist_head(&queue->resp_list);
>   	INIT_LIST_HEAD(&queue->resp_send_list);
> @@ -1671,12 +1789,32 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
>   	list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list);
>   	mutex_unlock(&nvmet_tcp_queue_mutex);
>   
> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
> +	INIT_DELAYED_WORK(&queue->tls_handshake_work,
> +			  nvmet_tcp_tls_handshake_timeout_work);
> +	if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) {
> +		struct sock *sk = queue->sock->sk;
> +
> +		/* Restore the default callbacks before starting upcall */
> +		read_lock_bh(&sk->sk_callback_lock);
> +		sk->sk_user_data = NULL;
> +		sk->sk_data_ready = port->data_ready;
> +		read_unlock_bh(&sk->sk_callback_lock);
> +		if (!nvmet_tcp_tls_handshake(queue))
> +			return;
> +
> +		/* TLS handshake failed, terminate the connection */
> +		goto out_destroy_sq;
> +	}
> +#endif
> +
>   	ret = nvmet_tcp_set_queue_sock(queue);
>   	if (ret)
>   		goto out_destroy_sq;
>   
>   	return;
>   out_destroy_sq:
> +	queue->state = NVMET_TCP_Q_DISCONNECTING;

Can you clarify what this is used for?

>   	mutex_lock(&nvmet_tcp_queue_mutex);
>   	list_del_init(&queue->queue_list);
>   	mutex_unlock(&nvmet_tcp_queue_mutex);
Hannes Reinecke Aug. 14, 2023, 2:03 p.m. UTC | #2
On 8/14/23 14:48, Sagi Grimberg wrote:
> 
> 
> On 8/14/23 14:19, Hannes Reinecke wrote:
>> Add functions to start the TLS handshake upcall when
>> the TCP TSAS sectype is set to 'tls1.3' and add a config
>> option NVME_TARGET_TCP_TLS.
> 
> Need to document the refcount added.
> Also the general design with upcalling tls handshake in
> userspace and continue from there...
> 
Okay.

>>
>> Signed-off-by: Hannes Reinecke <hare@suse.de>
>> ---
>>   drivers/nvme/target/Kconfig    |  15 ++++
>>   drivers/nvme/target/configfs.c |  21 +++++
>>   drivers/nvme/target/nvmet.h    |   1 +
>>   drivers/nvme/target/tcp.c      | 146 ++++++++++++++++++++++++++++++++-
>>   4 files changed, 179 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
>> index 79fc64035ee3..8a6c9cae804c 100644
>> --- a/drivers/nvme/target/Kconfig
>> +++ b/drivers/nvme/target/Kconfig
>> @@ -84,6 +84,21 @@ config NVME_TARGET_TCP
>>         If unsure, say N.
>> +config NVME_TARGET_TCP_TLS
>> +    bool "NVMe over Fabrics TCP target TLS encryption support"
>> +    depends on NVME_TARGET_TCP
>> +    select NVME_COMMON
>> +    select NVME_KEYRING
>> +    select NET_HANDSHAKE
>> +    select KEYS
>> +    help
>> +      Enables TLS encryption for the NVMe TCP target using the 
>> netlink handshake API.
>> +
>> +      The TLS handshake daemon is availble at
>> +      https://github.com/oracle/ktls-utils.
>> +
>> +      If unsure, say N.
>> +
>>   config NVME_TARGET_AUTH
>>       bool "NVMe over Fabrics In-band Authentication support"
>>       depends on NVME_TARGET
>> diff --git a/drivers/nvme/target/configfs.c 
>> b/drivers/nvme/target/configfs.c
>> index efbfed310370..ad1fb32c7387 100644
>> --- a/drivers/nvme/target/configfs.c
>> +++ b/drivers/nvme/target/configfs.c
>> @@ -15,6 +15,7 @@
>>   #ifdef CONFIG_NVME_TARGET_AUTH
>>   #include <linux/nvme-auth.h>
>>   #endif
>> +#include <linux/nvme-keyring.h>
>>   #include <crypto/hash.h>
>>   #include <crypto/kpp.h>
>> @@ -397,6 +398,17 @@ static ssize_t nvmet_addr_tsas_store(struct 
>> config_item *item,
>>       return -EINVAL;
>>   found:
>> +    if (sectype == NVMF_TCP_SECTYPE_TLS13) {
>> +        if (!IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS)) {
>> +            pr_err("TLS is not supported\n");
>> +            return -EINVAL;
>> +        }
>> +        if (!port->keyring) {
>> +            pr_err("TLS keyring not configured\n");
>> +            return -EINVAL;
>> +        }
>> +    }
>> +
>>       nvmet_port_init_tsas_tcp(port, sectype);
>>       /*
>>        * The TLS implementation currently does not support
>> @@ -1815,6 +1827,7 @@ static void nvmet_port_release(struct 
>> config_item *item)
>>       flush_workqueue(nvmet_wq);
>>       list_del(&port->global_entry);
>> +    key_put(port->keyring);
>>       kfree(port->ana_state);
>>       kfree(port);
>>   }
>> @@ -1864,6 +1877,14 @@ static struct config_group 
>> *nvmet_ports_make(struct config_group *group,
>>           return ERR_PTR(-ENOMEM);
>>       }
>> +    if (nvme_keyring_id()) {
>> +        port->keyring = key_lookup(nvme_keyring_id());
>> +        if (IS_ERR(port->keyring)) {
>> +            pr_warn("NVMe keyring not available, disabling TLS\n");
>> +            port->keyring = NULL;
> 
> why setting this to NULL?
> 
It's check when changing TSAS; we can only enable TLS if the nvme 
keyring is available.

>> +        }
>> +    }
>> +
>>       for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) {
>>           if (i == NVMET_DEFAULT_ANA_GRPID)
>>               port->ana_state[1] = NVME_ANA_OPTIMIZED;
>> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
>> index 8cfd60f3b564..7f9ae53c1df5 100644
>> --- a/drivers/nvme/target/nvmet.h
>> +++ b/drivers/nvme/target/nvmet.h
>> @@ -158,6 +158,7 @@ struct nvmet_port {
>>       struct config_group        ana_groups_group;
>>       struct nvmet_ana_group        ana_default_group;
>>       enum nvme_ana_state        *ana_state;
>> +    struct key            *keyring;
>>       void                *priv;
>>       bool                enabled;
>>       int                inline_data_size;
>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
>> index f19ea9d923fd..77fa339008e1 100644
>> --- a/drivers/nvme/target/tcp.c
>> +++ b/drivers/nvme/target/tcp.c
>> @@ -8,9 +8,13 @@
>>   #include <linux/init.h>
>>   #include <linux/slab.h>
>>   #include <linux/err.h>
>> +#include <linux/key.h>
>>   #include <linux/nvme-tcp.h>
>> +#include <linux/nvme-keyring.h>
>>   #include <net/sock.h>
>>   #include <net/tcp.h>
>> +#include <net/tls.h>
>> +#include <net/handshake.h>
>>   #include <linux/inet.h>
>>   #include <linux/llist.h>
>>   #include <crypto/hash.h>
>> @@ -66,6 +70,16 @@ device_param_cb(idle_poll_period_usecs, 
>> &set_param_ops,
>>   MODULE_PARM_DESC(idle_poll_period_usecs,
>>           "nvmet tcp io_work poll till idle time period in usecs: 
>> Default 0");
>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
>> +/*
>> + * TLS handshake timeout
>> + */
>> +static int tls_handshake_timeout = 10;
>> +module_param(tls_handshake_timeout, int, 0644);
>> +MODULE_PARM_DESC(tls_handshake_timeout,
>> +         "nvme TLS handshake timeout in seconds (default 10)");
>> +#endif
>> +
>>   #define NVMET_TCP_RECV_BUDGET        8
>>   #define NVMET_TCP_SEND_BUDGET        8
>>   #define NVMET_TCP_IO_WORK_BUDGET    64
>> @@ -122,11 +136,13 @@ struct nvmet_tcp_cmd {
>>   enum nvmet_tcp_queue_state {
>>       NVMET_TCP_Q_CONNECTING,
>> +    NVMET_TCP_Q_TLS_HANDSHAKE,
>>       NVMET_TCP_Q_LIVE,
>>       NVMET_TCP_Q_DISCONNECTING,
>>   };
>>   struct nvmet_tcp_queue {
>> +    struct kref        kref;
> 
> Why is kref the first member of the struct?
> 
Habit.
I don't mind where it'll end up.

>>       struct socket        *sock;
>>       struct nvmet_tcp_port    *port;
>>       struct work_struct    io_work;
>> @@ -155,6 +171,10 @@ struct nvmet_tcp_queue {
>>       struct ahash_request    *snd_hash;
>>       struct ahash_request    *rcv_hash;
>> +    /* TLS state */
>> +    key_serial_t        tls_pskid;
>> +    struct delayed_work    tls_handshake_work;
>> +
>>       unsigned long           poll_end;
>>       spinlock_t        state_lock;
>> @@ -1283,12 +1303,21 @@ static int nvmet_tcp_try_recv(struct 
>> nvmet_tcp_queue *queue,
>>       return ret;
>>   }
>> +static void nvmet_tcp_release_queue(struct kref *kref)
>> +{
>> +    struct nvmet_tcp_queue *queue =
>> +        container_of(kref, struct nvmet_tcp_queue, kref);
>> +
>> +    WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING);
>> +    queue_work(nvmet_wq, &queue->release_work);
>> +}
>> +
>>   static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue 
>> *queue)
>>   {
>>       spin_lock_bh(&queue->state_lock);
>>       if (queue->state != NVMET_TCP_Q_DISCONNECTING) {
>>           queue->state = NVMET_TCP_Q_DISCONNECTING;
>> -        queue_work(nvmet_wq, &queue->release_work);
>> +        kref_put(&queue->kref, nvmet_tcp_release_queue);
>>       }
>>       spin_unlock_bh(&queue->state_lock);
>>   }
>> @@ -1485,6 +1514,8 @@ static void nvmet_tcp_release_queue_work(struct 
>> work_struct *w)
>>       mutex_unlock(&nvmet_tcp_queue_mutex);
>>       nvmet_tcp_restore_socket_callbacks(queue);
>> +    tls_handshake_cancel(queue->sock->sk);
>> +    cancel_delayed_work_sync(&queue->tls_handshake_work);
> 
> We should call it tls_handshake_tmo_work or something to make it
> clear it is a timeout work.
> 
Okay.

>>       cancel_work_sync(&queue->io_work);
>>       /* stop accepting incoming data */
>>       queue->rcv_state = NVMET_TCP_RECV_ERR;
>> @@ -1512,8 +1543,13 @@ static void nvmet_tcp_data_ready(struct sock *sk)
>>       read_lock_bh(&sk->sk_callback_lock);
>>       queue = sk->sk_user_data;
>> -    if (likely(queue))
>> -        queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
>> +    if (likely(queue)) {
>> +        if (queue->data_ready)
>> +            queue->data_ready(sk);
>> +        if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE)
>> +            queue_work_on(queue_cpu(queue), nvmet_tcp_wq,
>> +                      &queue->io_work);
>> +    }
>>       read_unlock_bh(&sk->sk_callback_lock);
>>   }
>> @@ -1621,6 +1657,83 @@ static int nvmet_tcp_set_queue_sock(struct 
>> nvmet_tcp_queue *queue)
>>       return ret;
>>   }
>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
>> +static void nvmet_tcp_tls_handshake_done(void *data, int status,
>> +                     key_serial_t peerid)
>> +{
>> +    struct nvmet_tcp_queue *queue = data;
>> +
>> +    pr_debug("queue %d: TLS handshake done, key %x, status %d\n",
>> +         queue->idx, peerid, status);
>> +    spin_lock_bh(&queue->state_lock);
>> +    if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) {
> 
> Is this even possible?
> 
I guess it can happen when the socket closes during handshake; the 
daemon might still be sending a 'done' event but 
nvmet_tcp_schedule_release_queue() has been called.

>> +        pr_warn("queue %d: TLS handshake already completed\n",
>> +            queue->idx);
>> +        spin_unlock_bh(&queue->state_lock);
>> +        kref_put(&queue->kref, nvmet_tcp_release_queue);
> 
> How can we get here?
> 
See above.

>> +        return;
>> +    }
>> +    if (!status)
>> +        queue->tls_pskid = peerid;
>> +    queue->state = NVMET_TCP_Q_CONNECTING;
>> +    spin_unlock_bh(&queue->state_lock);
>> +
>> +    cancel_delayed_work_sync(&queue->tls_handshake_work);
>> +    if (status) {
> 
> Wait, did we assign the sk_state_change in this stage? What will
> sock shutdown trigger?
> 
That, however is a good point. You might be right.
Will be checking.

>> +        kernel_sock_shutdown(queue->sock, SHUT_RDWR);
> 
> Probably the put can be moved to a out: label in the end.
> 
Probably.

>> +        kref_put(&queue->kref, nvmet_tcp_release_queue);
>> +        return;
>> +    }
>> +
>> +    pr_debug("queue %d: resetting queue callbacks after TLS 
>> handshake\n",
>> +         queue->idx);
>> +    nvmet_tcp_set_queue_sock(queue);
>> +    kref_put(&queue->kref, nvmet_tcp_release_queue);
>> +}
>> +
>> +static void nvmet_tcp_tls_handshake_timeout_work(struct work_struct *w)
>> +{
>> +    struct nvmet_tcp_queue *queue = container_of(to_delayed_work(w),
>> +            struct nvmet_tcp_queue, tls_handshake_work);
>> +
>> +    pr_debug("queue %d: TLS handshake timeout\n", queue->idx);
> 
> Probably its better to make this pr_warn...
> 
Ok.

>> +    if (!tls_handshake_cancel(queue->sock->sk))
>> +        return;
>> +    kernel_sock_shutdown(queue->sock, SHUT_RDWR);
> 
> Same question here, did we assign sk_state_change yet?
> 
Will be checking.

>> +    kref_put(&queue->kref, nvmet_tcp_release_queue);
>> +}
>> +
>> +static int nvmet_tcp_tls_handshake(struct nvmet_tcp_queue *queue)
>> +{
>> +    int ret = -EOPNOTSUPP;
>> +    struct tls_handshake_args args;
>> +
>> +    if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) {
>> +        pr_warn("cannot start TLS in state %d\n", queue->state);
>> +        return -EINVAL;
>> +    }
>> +
>> +    kref_get(&queue->kref);
>> +    pr_debug("queue %d: TLS ServerHello\n", queue->idx);
>> +    memset(&args, 0, sizeof(args));
>> +    args.ta_sock = queue->sock;
>> +    args.ta_done = nvmet_tcp_tls_handshake_done;
>> +    args.ta_data = queue;
>> +    args.ta_keyring = key_serial(queue->port->nport->keyring);
>> +    args.ta_timeout_ms = tls_handshake_timeout * 1000;
>> +
>> +    ret = tls_server_hello_psk(&args, GFP_KERNEL);
>> +    if (ret) {
>> +        kref_put(&queue->kref, nvmet_tcp_release_queue);
>> +        pr_err("failed to start TLS, err=%d\n", ret);
>> +    } else {
>> +        queue_delayed_work(nvmet_wq, &queue->tls_handshake_work,
>> +                   tls_handshake_timeout * HZ);
>> +    }
>> +    return ret;
>> +}
>> +#endif
>> +
>>   static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
>>           struct socket *newsock)
>>   {
>> @@ -1636,11 +1749,16 @@ static void nvmet_tcp_alloc_queue(struct 
>> nvmet_tcp_port *port,
>>       INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work);
>>       INIT_WORK(&queue->io_work, nvmet_tcp_io_work);
>> +    kref_init(&queue->kref);
>>       queue->sock = newsock;
>>       queue->port = port;
>>       queue->nr_cmds = 0;
>>       spin_lock_init(&queue->state_lock);
>> -    queue->state = NVMET_TCP_Q_CONNECTING;
>> +    if (queue->port->nport->disc_addr.tsas.tcp.sectype ==
>> +        NVMF_TCP_SECTYPE_TLS13)
>> +        queue->state = NVMET_TCP_Q_TLS_HANDSHAKE;
>> +    else
>> +        queue->state = NVMET_TCP_Q_CONNECTING;
>>       INIT_LIST_HEAD(&queue->free_list);
>>       init_llist_head(&queue->resp_list);
>>       INIT_LIST_HEAD(&queue->resp_send_list);
>> @@ -1671,12 +1789,32 @@ static void nvmet_tcp_alloc_queue(struct 
>> nvmet_tcp_port *port,
>>       list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list);
>>       mutex_unlock(&nvmet_tcp_queue_mutex);
>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
>> +    INIT_DELAYED_WORK(&queue->tls_handshake_work,
>> +              nvmet_tcp_tls_handshake_timeout_work);
>> +    if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) {
>> +        struct sock *sk = queue->sock->sk;
>> +
>> +        /* Restore the default callbacks before starting upcall */
>> +        read_lock_bh(&sk->sk_callback_lock);
>> +        sk->sk_user_data = NULL;
>> +        sk->sk_data_ready = port->data_ready;
>> +        read_unlock_bh(&sk->sk_callback_lock);
>> +        if (!nvmet_tcp_tls_handshake(queue))
>> +            return;
>> +
>> +        /* TLS handshake failed, terminate the connection */
>> +        goto out_destroy_sq;
>> +    }
>> +#endif
>> +
>>       ret = nvmet_tcp_set_queue_sock(queue);
>>       if (ret)
>>           goto out_destroy_sq;
>>       return;
>>   out_destroy_sq:
>> +    queue->state = NVMET_TCP_Q_DISCONNECTING;
> 
> Can you clarify what this is used for?
> 
Primarily for debugging, to signal that we really are
disconnecting. But yeah, not really required.

Cheers,

Hannes
Sagi Grimberg Aug. 14, 2023, 7:12 p.m. UTC | #3
>>> @@ -1864,6 +1877,14 @@ static struct config_group 
>>> *nvmet_ports_make(struct config_group *group,
>>>           return ERR_PTR(-ENOMEM);
>>>       }
>>> +    if (nvme_keyring_id()) {
>>> +        port->keyring = key_lookup(nvme_keyring_id());
>>> +        if (IS_ERR(port->keyring)) {
>>> +            pr_warn("NVMe keyring not available, disabling TLS\n");
>>> +            port->keyring = NULL;
>>
>> why setting this to NULL?
>>
> It's check when changing TSAS; we can only enable TLS if the nvme 
> keyring is available.

ok

> 
>>> +        }
>>> +    }
>>> +
>>>       for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) {
>>>           if (i == NVMET_DEFAULT_ANA_GRPID)
>>>               port->ana_state[1] = NVME_ANA_OPTIMIZED;
>>> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
>>> index 8cfd60f3b564..7f9ae53c1df5 100644
>>> --- a/drivers/nvme/target/nvmet.h
>>> +++ b/drivers/nvme/target/nvmet.h
>>> @@ -158,6 +158,7 @@ struct nvmet_port {
>>>       struct config_group        ana_groups_group;
>>>       struct nvmet_ana_group        ana_default_group;
>>>       enum nvme_ana_state        *ana_state;
>>> +    struct key            *keyring;
>>>       void                *priv;
>>>       bool                enabled;
>>>       int                inline_data_size;
>>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
>>> index f19ea9d923fd..77fa339008e1 100644
>>> --- a/drivers/nvme/target/tcp.c
>>> +++ b/drivers/nvme/target/tcp.c
>>> @@ -8,9 +8,13 @@
>>>   #include <linux/init.h>
>>>   #include <linux/slab.h>
>>>   #include <linux/err.h>
>>> +#include <linux/key.h>
>>>   #include <linux/nvme-tcp.h>
>>> +#include <linux/nvme-keyring.h>
>>>   #include <net/sock.h>
>>>   #include <net/tcp.h>
>>> +#include <net/tls.h>
>>> +#include <net/handshake.h>
>>>   #include <linux/inet.h>
>>>   #include <linux/llist.h>
>>>   #include <crypto/hash.h>
>>> @@ -66,6 +70,16 @@ device_param_cb(idle_poll_period_usecs, 
>>> &set_param_ops,
>>>   MODULE_PARM_DESC(idle_poll_period_usecs,
>>>           "nvmet tcp io_work poll till idle time period in usecs: 
>>> Default 0");
>>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
>>> +/*
>>> + * TLS handshake timeout
>>> + */
>>> +static int tls_handshake_timeout = 10;
>>> +module_param(tls_handshake_timeout, int, 0644);
>>> +MODULE_PARM_DESC(tls_handshake_timeout,
>>> +         "nvme TLS handshake timeout in seconds (default 10)");
>>> +#endif
>>> +
>>>   #define NVMET_TCP_RECV_BUDGET        8
>>>   #define NVMET_TCP_SEND_BUDGET        8
>>>   #define NVMET_TCP_IO_WORK_BUDGET    64
>>> @@ -122,11 +136,13 @@ struct nvmet_tcp_cmd {
>>>   enum nvmet_tcp_queue_state {
>>>       NVMET_TCP_Q_CONNECTING,
>>> +    NVMET_TCP_Q_TLS_HANDSHAKE,
>>>       NVMET_TCP_Q_LIVE,
>>>       NVMET_TCP_Q_DISCONNECTING,
>>>   };
>>>   struct nvmet_tcp_queue {
>>> +    struct kref        kref;
>>
>> Why is kref the first member of the struct?
>>
> Habit.
> I don't mind where it'll end up.

Move it to the back together with the tls section.

> 
>>>       struct socket        *sock;
>>>       struct nvmet_tcp_port    *port;
>>>       struct work_struct    io_work;
>>> @@ -155,6 +171,10 @@ struct nvmet_tcp_queue {
>>>       struct ahash_request    *snd_hash;
>>>       struct ahash_request    *rcv_hash;
>>> +    /* TLS state */
>>> +    key_serial_t        tls_pskid;
>>> +    struct delayed_work    tls_handshake_work;
>>> +
>>>       unsigned long           poll_end;
>>>       spinlock_t        state_lock;
>>> @@ -1283,12 +1303,21 @@ static int nvmet_tcp_try_recv(struct 
>>> nvmet_tcp_queue *queue,
>>>       return ret;
>>>   }
>>> +static void nvmet_tcp_release_queue(struct kref *kref)
>>> +{
>>> +    struct nvmet_tcp_queue *queue =
>>> +        container_of(kref, struct nvmet_tcp_queue, kref);
>>> +
>>> +    WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING);
>>> +    queue_work(nvmet_wq, &queue->release_work);
>>> +}
>>> +
>>>   static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue 
>>> *queue)
>>>   {
>>>       spin_lock_bh(&queue->state_lock);
>>>       if (queue->state != NVMET_TCP_Q_DISCONNECTING) {
>>>           queue->state = NVMET_TCP_Q_DISCONNECTING;
>>> -        queue_work(nvmet_wq, &queue->release_work);
>>> +        kref_put(&queue->kref, nvmet_tcp_release_queue);
>>>       }
>>>       spin_unlock_bh(&queue->state_lock);
>>>   }
>>> @@ -1485,6 +1514,8 @@ static void nvmet_tcp_release_queue_work(struct 
>>> work_struct *w)
>>>       mutex_unlock(&nvmet_tcp_queue_mutex);
>>>       nvmet_tcp_restore_socket_callbacks(queue);
>>> +    tls_handshake_cancel(queue->sock->sk);
>>> +    cancel_delayed_work_sync(&queue->tls_handshake_work);
>>
>> We should call it tls_handshake_tmo_work or something to make it
>> clear it is a timeout work.
>>
> Okay.
> 
>>>       cancel_work_sync(&queue->io_work);
>>>       /* stop accepting incoming data */
>>>       queue->rcv_state = NVMET_TCP_RECV_ERR;
>>> @@ -1512,8 +1543,13 @@ static void nvmet_tcp_data_ready(struct sock *sk)
>>>       read_lock_bh(&sk->sk_callback_lock);
>>>       queue = sk->sk_user_data;
>>> -    if (likely(queue))
>>> -        queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
>>> +    if (likely(queue)) {
>>> +        if (queue->data_ready)
>>> +            queue->data_ready(sk);
>>> +        if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE)
>>> +            queue_work_on(queue_cpu(queue), nvmet_tcp_wq,
>>> +                      &queue->io_work);
>>> +    }
>>>       read_unlock_bh(&sk->sk_callback_lock);
>>>   }
>>> @@ -1621,6 +1657,83 @@ static int nvmet_tcp_set_queue_sock(struct 
>>> nvmet_tcp_queue *queue)
>>>       return ret;
>>>   }
>>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
>>> +static void nvmet_tcp_tls_handshake_done(void *data, int status,
>>> +                     key_serial_t peerid)
>>> +{
>>> +    struct nvmet_tcp_queue *queue = data;
>>> +
>>> +    pr_debug("queue %d: TLS handshake done, key %x, status %d\n",
>>> +         queue->idx, peerid, status);
>>> +    spin_lock_bh(&queue->state_lock);
>>> +    if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) {
>>
>> Is this even possible?
>>
> I guess it can happen when the socket closes during handshake; the 
> daemon might still be sending a 'done' event but 
> nvmet_tcp_schedule_release_queue() has been called.

Umm, if the socket closes during the handshake then the state
is NVMET_TCP_Q_TLS_HANDSHAKE.

p.s. you call handshake cancel in the release flow so you should be
fenced properly no?
Hannes Reinecke Aug. 15, 2023, 6:29 a.m. UTC | #4
On 8/14/23 21:12, Sagi Grimberg wrote:
> 
>>>> @@ -1864,6 +1877,14 @@ static struct config_group 
>>>> *nvmet_ports_make(struct config_group *group,
>>>>           return ERR_PTR(-ENOMEM);
>>>>       }
>>>> +    if (nvme_keyring_id()) {
>>>> +        port->keyring = key_lookup(nvme_keyring_id());
>>>> +        if (IS_ERR(port->keyring)) {
>>>> +            pr_warn("NVMe keyring not available, disabling TLS\n");
>>>> +            port->keyring = NULL;
>>>
>>> why setting this to NULL?
>>>
>> It's check when changing TSAS; we can only enable TLS if the nvme 
>> keyring is available.
> 
> ok
> 
>>
>>>> +        }
>>>> +    }
>>>> +
>>>>       for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) {
>>>>           if (i == NVMET_DEFAULT_ANA_GRPID)
>>>>               port->ana_state[1] = NVME_ANA_OPTIMIZED;
>>>> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
>>>> index 8cfd60f3b564..7f9ae53c1df5 100644
>>>> --- a/drivers/nvme/target/nvmet.h
>>>> +++ b/drivers/nvme/target/nvmet.h
>>>> @@ -158,6 +158,7 @@ struct nvmet_port {
>>>>       struct config_group        ana_groups_group;
>>>>       struct nvmet_ana_group        ana_default_group;
>>>>       enum nvme_ana_state        *ana_state;
>>>> +    struct key            *keyring;
>>>>       void                *priv;
>>>>       bool                enabled;
>>>>       int                inline_data_size;
>>>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
>>>> index f19ea9d923fd..77fa339008e1 100644
>>>> --- a/drivers/nvme/target/tcp.c
>>>> +++ b/drivers/nvme/target/tcp.c
>>>> @@ -8,9 +8,13 @@
>>>>   #include <linux/init.h>
>>>>   #include <linux/slab.h>
>>>>   #include <linux/err.h>
>>>> +#include <linux/key.h>
>>>>   #include <linux/nvme-tcp.h>
>>>> +#include <linux/nvme-keyring.h>
>>>>   #include <net/sock.h>
>>>>   #include <net/tcp.h>
>>>> +#include <net/tls.h>
>>>> +#include <net/handshake.h>
>>>>   #include <linux/inet.h>
>>>>   #include <linux/llist.h>
>>>>   #include <crypto/hash.h>
>>>> @@ -66,6 +70,16 @@ device_param_cb(idle_poll_period_usecs, 
>>>> &set_param_ops,
>>>>   MODULE_PARM_DESC(idle_poll_period_usecs,
>>>>           "nvmet tcp io_work poll till idle time period in usecs: 
>>>> Default 0");
>>>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
>>>> +/*
>>>> + * TLS handshake timeout
>>>> + */
>>>> +static int tls_handshake_timeout = 10;
>>>> +module_param(tls_handshake_timeout, int, 0644);
>>>> +MODULE_PARM_DESC(tls_handshake_timeout,
>>>> +         "nvme TLS handshake timeout in seconds (default 10)");
>>>> +#endif
>>>> +
>>>>   #define NVMET_TCP_RECV_BUDGET        8
>>>>   #define NVMET_TCP_SEND_BUDGET        8
>>>>   #define NVMET_TCP_IO_WORK_BUDGET    64
>>>> @@ -122,11 +136,13 @@ struct nvmet_tcp_cmd {
>>>>   enum nvmet_tcp_queue_state {
>>>>       NVMET_TCP_Q_CONNECTING,
>>>> +    NVMET_TCP_Q_TLS_HANDSHAKE,
>>>>       NVMET_TCP_Q_LIVE,
>>>>       NVMET_TCP_Q_DISCONNECTING,
>>>>   };
>>>>   struct nvmet_tcp_queue {
>>>> +    struct kref        kref;
>>>
>>> Why is kref the first member of the struct?
>>>
>> Habit.
>> I don't mind where it'll end up.
> 
> Move it to the back together with the tls section.
> 
>>
>>>>       struct socket        *sock;
>>>>       struct nvmet_tcp_port    *port;
>>>>       struct work_struct    io_work;
>>>> @@ -155,6 +171,10 @@ struct nvmet_tcp_queue {
>>>>       struct ahash_request    *snd_hash;
>>>>       struct ahash_request    *rcv_hash;
>>>> +    /* TLS state */
>>>> +    key_serial_t        tls_pskid;
>>>> +    struct delayed_work    tls_handshake_work;
>>>> +
>>>>       unsigned long           poll_end;
>>>>       spinlock_t        state_lock;
>>>> @@ -1283,12 +1303,21 @@ static int nvmet_tcp_try_recv(struct 
>>>> nvmet_tcp_queue *queue,
>>>>       return ret;
>>>>   }
>>>> +static void nvmet_tcp_release_queue(struct kref *kref)
>>>> +{
>>>> +    struct nvmet_tcp_queue *queue =
>>>> +        container_of(kref, struct nvmet_tcp_queue, kref);
>>>> +
>>>> +    WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING);
>>>> +    queue_work(nvmet_wq, &queue->release_work);
>>>> +}
>>>> +
>>>>   static void nvmet_tcp_schedule_release_queue(struct 
>>>> nvmet_tcp_queue *queue)
>>>>   {
>>>>       spin_lock_bh(&queue->state_lock);
>>>>       if (queue->state != NVMET_TCP_Q_DISCONNECTING) {
>>>>           queue->state = NVMET_TCP_Q_DISCONNECTING;
>>>> -        queue_work(nvmet_wq, &queue->release_work);
>>>> +        kref_put(&queue->kref, nvmet_tcp_release_queue);
>>>>       }
>>>>       spin_unlock_bh(&queue->state_lock);
>>>>   }
>>>> @@ -1485,6 +1514,8 @@ static void 
>>>> nvmet_tcp_release_queue_work(struct work_struct *w)
>>>>       mutex_unlock(&nvmet_tcp_queue_mutex);
>>>>       nvmet_tcp_restore_socket_callbacks(queue);
>>>> +    tls_handshake_cancel(queue->sock->sk);
>>>> +    cancel_delayed_work_sync(&queue->tls_handshake_work);
>>>
>>> We should call it tls_handshake_tmo_work or something to make it
>>> clear it is a timeout work.
>>>
>> Okay.
>>
>>>>       cancel_work_sync(&queue->io_work);
>>>>       /* stop accepting incoming data */
>>>>       queue->rcv_state = NVMET_TCP_RECV_ERR;
>>>> @@ -1512,8 +1543,13 @@ static void nvmet_tcp_data_ready(struct sock 
>>>> *sk)
>>>>       read_lock_bh(&sk->sk_callback_lock);
>>>>       queue = sk->sk_user_data;
>>>> -    if (likely(queue))
>>>> -        queue_work_on(queue_cpu(queue), nvmet_tcp_wq, 
>>>> &queue->io_work);
>>>> +    if (likely(queue)) {
>>>> +        if (queue->data_ready)
>>>> +            queue->data_ready(sk);
>>>> +        if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE)
>>>> +            queue_work_on(queue_cpu(queue), nvmet_tcp_wq,
>>>> +                      &queue->io_work);
>>>> +    }
>>>>       read_unlock_bh(&sk->sk_callback_lock);
>>>>   }
>>>> @@ -1621,6 +1657,83 @@ static int nvmet_tcp_set_queue_sock(struct 
>>>> nvmet_tcp_queue *queue)
>>>>       return ret;
>>>>   }
>>>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
>>>> +static void nvmet_tcp_tls_handshake_done(void *data, int status,
>>>> +                     key_serial_t peerid)
>>>> +{
>>>> +    struct nvmet_tcp_queue *queue = data;
>>>> +
>>>> +    pr_debug("queue %d: TLS handshake done, key %x, status %d\n",
>>>> +         queue->idx, peerid, status);
>>>> +    spin_lock_bh(&queue->state_lock);
>>>> +    if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) {
>>>
>>> Is this even possible?
>>>
>> I guess it can happen when the socket closes during handshake; the 
>> daemon might still be sending a 'done' event but 
>> nvmet_tcp_schedule_release_queue() has been called.
> 
> Umm, if the socket closes during the handshake then the state
> is NVMET_TCP_Q_TLS_HANDSHAKE.
> 
But there's a race window between setting it to 
NVMET_TCP_Q_DISCONNECTING and calling tls_handshake_cancel().

> p.s. you call handshake cancel in the release flow so you should be
> fenced properly no?
Not really. But I'll check if I can fix it up.

Cheers,

Hannes
Sagi Grimberg Aug. 15, 2023, 7:01 a.m. UTC | #5
>>>>> @@ -1864,6 +1877,14 @@ static struct config_group 
>>>>> *nvmet_ports_make(struct config_group *group,
>>>>>           return ERR_PTR(-ENOMEM);
>>>>>       }
>>>>> +    if (nvme_keyring_id()) {
>>>>> +        port->keyring = key_lookup(nvme_keyring_id());
>>>>> +        if (IS_ERR(port->keyring)) {
>>>>> +            pr_warn("NVMe keyring not available, disabling TLS\n");
>>>>> +            port->keyring = NULL;
>>>>
>>>> why setting this to NULL?
>>>>
>>> It's check when changing TSAS; we can only enable TLS if the nvme 
>>> keyring is available.
>>
>> ok
>>
>>>
>>>>> +        }
>>>>> +    }
>>>>> +
>>>>>       for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) {
>>>>>           if (i == NVMET_DEFAULT_ANA_GRPID)
>>>>>               port->ana_state[1] = NVME_ANA_OPTIMIZED;
>>>>> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
>>>>> index 8cfd60f3b564..7f9ae53c1df5 100644
>>>>> --- a/drivers/nvme/target/nvmet.h
>>>>> +++ b/drivers/nvme/target/nvmet.h
>>>>> @@ -158,6 +158,7 @@ struct nvmet_port {
>>>>>       struct config_group        ana_groups_group;
>>>>>       struct nvmet_ana_group        ana_default_group;
>>>>>       enum nvme_ana_state        *ana_state;
>>>>> +    struct key            *keyring;
>>>>>       void                *priv;
>>>>>       bool                enabled;
>>>>>       int                inline_data_size;
>>>>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
>>>>> index f19ea9d923fd..77fa339008e1 100644
>>>>> --- a/drivers/nvme/target/tcp.c
>>>>> +++ b/drivers/nvme/target/tcp.c
>>>>> @@ -8,9 +8,13 @@
>>>>>   #include <linux/init.h>
>>>>>   #include <linux/slab.h>
>>>>>   #include <linux/err.h>
>>>>> +#include <linux/key.h>
>>>>>   #include <linux/nvme-tcp.h>
>>>>> +#include <linux/nvme-keyring.h>
>>>>>   #include <net/sock.h>
>>>>>   #include <net/tcp.h>
>>>>> +#include <net/tls.h>
>>>>> +#include <net/handshake.h>
>>>>>   #include <linux/inet.h>
>>>>>   #include <linux/llist.h>
>>>>>   #include <crypto/hash.h>
>>>>> @@ -66,6 +70,16 @@ device_param_cb(idle_poll_period_usecs, 
>>>>> &set_param_ops,
>>>>>   MODULE_PARM_DESC(idle_poll_period_usecs,
>>>>>           "nvmet tcp io_work poll till idle time period in usecs: 
>>>>> Default 0");
>>>>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
>>>>> +/*
>>>>> + * TLS handshake timeout
>>>>> + */
>>>>> +static int tls_handshake_timeout = 10;
>>>>> +module_param(tls_handshake_timeout, int, 0644);
>>>>> +MODULE_PARM_DESC(tls_handshake_timeout,
>>>>> +         "nvme TLS handshake timeout in seconds (default 10)");
>>>>> +#endif
>>>>> +
>>>>>   #define NVMET_TCP_RECV_BUDGET        8
>>>>>   #define NVMET_TCP_SEND_BUDGET        8
>>>>>   #define NVMET_TCP_IO_WORK_BUDGET    64
>>>>> @@ -122,11 +136,13 @@ struct nvmet_tcp_cmd {
>>>>>   enum nvmet_tcp_queue_state {
>>>>>       NVMET_TCP_Q_CONNECTING,
>>>>> +    NVMET_TCP_Q_TLS_HANDSHAKE,
>>>>>       NVMET_TCP_Q_LIVE,
>>>>>       NVMET_TCP_Q_DISCONNECTING,
>>>>>   };
>>>>>   struct nvmet_tcp_queue {
>>>>> +    struct kref        kref;
>>>>
>>>> Why is kref the first member of the struct?
>>>>
>>> Habit.
>>> I don't mind where it'll end up.
>>
>> Move it to the back together with the tls section.
>>
>>>
>>>>>       struct socket        *sock;
>>>>>       struct nvmet_tcp_port    *port;
>>>>>       struct work_struct    io_work;
>>>>> @@ -155,6 +171,10 @@ struct nvmet_tcp_queue {
>>>>>       struct ahash_request    *snd_hash;
>>>>>       struct ahash_request    *rcv_hash;
>>>>> +    /* TLS state */
>>>>> +    key_serial_t        tls_pskid;
>>>>> +    struct delayed_work    tls_handshake_work;
>>>>> +
>>>>>       unsigned long           poll_end;
>>>>>       spinlock_t        state_lock;
>>>>> @@ -1283,12 +1303,21 @@ static int nvmet_tcp_try_recv(struct 
>>>>> nvmet_tcp_queue *queue,
>>>>>       return ret;
>>>>>   }
>>>>> +static void nvmet_tcp_release_queue(struct kref *kref)
>>>>> +{
>>>>> +    struct nvmet_tcp_queue *queue =
>>>>> +        container_of(kref, struct nvmet_tcp_queue, kref);
>>>>> +
>>>>> +    WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING);
>>>>> +    queue_work(nvmet_wq, &queue->release_work);
>>>>> +}
>>>>> +
>>>>>   static void nvmet_tcp_schedule_release_queue(struct 
>>>>> nvmet_tcp_queue *queue)
>>>>>   {
>>>>>       spin_lock_bh(&queue->state_lock);
>>>>>       if (queue->state != NVMET_TCP_Q_DISCONNECTING) {
>>>>>           queue->state = NVMET_TCP_Q_DISCONNECTING;
>>>>> -        queue_work(nvmet_wq, &queue->release_work);
>>>>> +        kref_put(&queue->kref, nvmet_tcp_release_queue);
>>>>>       }
>>>>>       spin_unlock_bh(&queue->state_lock);
>>>>>   }
>>>>> @@ -1485,6 +1514,8 @@ static void 
>>>>> nvmet_tcp_release_queue_work(struct work_struct *w)
>>>>>       mutex_unlock(&nvmet_tcp_queue_mutex);
>>>>>       nvmet_tcp_restore_socket_callbacks(queue);
>>>>> +    tls_handshake_cancel(queue->sock->sk);
>>>>> +    cancel_delayed_work_sync(&queue->tls_handshake_work);
>>>>
>>>> We should call it tls_handshake_tmo_work or something to make it
>>>> clear it is a timeout work.
>>>>
>>> Okay.
>>>
>>>>>       cancel_work_sync(&queue->io_work);
>>>>>       /* stop accepting incoming data */
>>>>>       queue->rcv_state = NVMET_TCP_RECV_ERR;
>>>>> @@ -1512,8 +1543,13 @@ static void nvmet_tcp_data_ready(struct sock 
>>>>> *sk)
>>>>>       read_lock_bh(&sk->sk_callback_lock);
>>>>>       queue = sk->sk_user_data;
>>>>> -    if (likely(queue))
>>>>> -        queue_work_on(queue_cpu(queue), nvmet_tcp_wq, 
>>>>> &queue->io_work);
>>>>> +    if (likely(queue)) {
>>>>> +        if (queue->data_ready)
>>>>> +            queue->data_ready(sk);
>>>>> +        if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE)
>>>>> +            queue_work_on(queue_cpu(queue), nvmet_tcp_wq,
>>>>> +                      &queue->io_work);
>>>>> +    }
>>>>>       read_unlock_bh(&sk->sk_callback_lock);
>>>>>   }
>>>>> @@ -1621,6 +1657,83 @@ static int nvmet_tcp_set_queue_sock(struct 
>>>>> nvmet_tcp_queue *queue)
>>>>>       return ret;
>>>>>   }
>>>>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
>>>>> +static void nvmet_tcp_tls_handshake_done(void *data, int status,
>>>>> +                     key_serial_t peerid)
>>>>> +{
>>>>> +    struct nvmet_tcp_queue *queue = data;
>>>>> +
>>>>> +    pr_debug("queue %d: TLS handshake done, key %x, status %d\n",
>>>>> +         queue->idx, peerid, status);
>>>>> +    spin_lock_bh(&queue->state_lock);
>>>>> +    if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) {
>>>>
>>>> Is this even possible?
>>>>
>>> I guess it can happen when the socket closes during handshake; the 
>>> daemon might still be sending a 'done' event but 
>>> nvmet_tcp_schedule_release_queue() has been called.
>>
>> Umm, if the socket closes during the handshake then the state
>> is NVMET_TCP_Q_TLS_HANDSHAKE.
>>
> But there's a race window between setting it to 
> NVMET_TCP_Q_DISCONNECTING and calling tls_handshake_cancel().
> 
>> p.s. you call handshake cancel in the release flow so you should be
>> fenced properly no?
> Not really. But I'll check if I can fix it up.

The teardown handling feels complicated to me.

How are you testing it btw?
Hannes Reinecke Aug. 15, 2023, 7:20 a.m. UTC | #6
On 8/15/23 09:01, Sagi Grimberg wrote:
> 
>>>>>> @@ -1864,6 +1877,14 @@ static struct config_group 
>>>>>> *nvmet_ports_make(struct config_group *group,
>>>>>>           return ERR_PTR(-ENOMEM);
>>>>>>       }
>>>>>> +    if (nvme_keyring_id()) {
>>>>>> +        port->keyring = key_lookup(nvme_keyring_id());
>>>>>> +        if (IS_ERR(port->keyring)) {
>>>>>> +            pr_warn("NVMe keyring not available, disabling TLS\n");
>>>>>> +            port->keyring = NULL;
>>>>>
>>>>> why setting this to NULL?
>>>>>
>>>> It's check when changing TSAS; we can only enable TLS if the nvme 
>>>> keyring is available.
>>>
>>> ok
>>>
>>>>
>>>>>> +        }
>>>>>> +    }
>>>>>> +
>>>>>>       for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) {
>>>>>>           if (i == NVMET_DEFAULT_ANA_GRPID)
>>>>>>               port->ana_state[1] = NVME_ANA_OPTIMIZED;
>>>>>> diff --git a/drivers/nvme/target/nvmet.h 
>>>>>> b/drivers/nvme/target/nvmet.h
>>>>>> index 8cfd60f3b564..7f9ae53c1df5 100644
>>>>>> --- a/drivers/nvme/target/nvmet.h
>>>>>> +++ b/drivers/nvme/target/nvmet.h
>>>>>> @@ -158,6 +158,7 @@ struct nvmet_port {
>>>>>>       struct config_group        ana_groups_group;
>>>>>>       struct nvmet_ana_group        ana_default_group;
>>>>>>       enum nvme_ana_state        *ana_state;
>>>>>> +    struct key            *keyring;
>>>>>>       void                *priv;
>>>>>>       bool                enabled;
>>>>>>       int                inline_data_size;
>>>>>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
>>>>>> index f19ea9d923fd..77fa339008e1 100644
>>>>>> --- a/drivers/nvme/target/tcp.c
>>>>>> +++ b/drivers/nvme/target/tcp.c
>>>>>> @@ -8,9 +8,13 @@
>>>>>>   #include <linux/init.h>
>>>>>>   #include <linux/slab.h>
>>>>>>   #include <linux/err.h>
>>>>>> +#include <linux/key.h>
>>>>>>   #include <linux/nvme-tcp.h>
>>>>>> +#include <linux/nvme-keyring.h>
>>>>>>   #include <net/sock.h>
>>>>>>   #include <net/tcp.h>
>>>>>> +#include <net/tls.h>
>>>>>> +#include <net/handshake.h>
>>>>>>   #include <linux/inet.h>
>>>>>>   #include <linux/llist.h>
>>>>>>   #include <crypto/hash.h>
>>>>>> @@ -66,6 +70,16 @@ device_param_cb(idle_poll_period_usecs, 
>>>>>> &set_param_ops,
>>>>>>   MODULE_PARM_DESC(idle_poll_period_usecs,
>>>>>>           "nvmet tcp io_work poll till idle time period in usecs: 
>>>>>> Default 0");
>>>>>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
>>>>>> +/*
>>>>>> + * TLS handshake timeout
>>>>>> + */
>>>>>> +static int tls_handshake_timeout = 10;
>>>>>> +module_param(tls_handshake_timeout, int, 0644);
>>>>>> +MODULE_PARM_DESC(tls_handshake_timeout,
>>>>>> +         "nvme TLS handshake timeout in seconds (default 10)");
>>>>>> +#endif
>>>>>> +
>>>>>>   #define NVMET_TCP_RECV_BUDGET        8
>>>>>>   #define NVMET_TCP_SEND_BUDGET        8
>>>>>>   #define NVMET_TCP_IO_WORK_BUDGET    64
>>>>>> @@ -122,11 +136,13 @@ struct nvmet_tcp_cmd {
>>>>>>   enum nvmet_tcp_queue_state {
>>>>>>       NVMET_TCP_Q_CONNECTING,
>>>>>> +    NVMET_TCP_Q_TLS_HANDSHAKE,
>>>>>>       NVMET_TCP_Q_LIVE,
>>>>>>       NVMET_TCP_Q_DISCONNECTING,
>>>>>>   };
>>>>>>   struct nvmet_tcp_queue {
>>>>>> +    struct kref        kref;
>>>>>
>>>>> Why is kref the first member of the struct?
>>>>>
>>>> Habit.
>>>> I don't mind where it'll end up.
>>>
>>> Move it to the back together with the tls section.
>>>
>>>>
>>>>>>       struct socket        *sock;
>>>>>>       struct nvmet_tcp_port    *port;
>>>>>>       struct work_struct    io_work;
>>>>>> @@ -155,6 +171,10 @@ struct nvmet_tcp_queue {
>>>>>>       struct ahash_request    *snd_hash;
>>>>>>       struct ahash_request    *rcv_hash;
>>>>>> +    /* TLS state */
>>>>>> +    key_serial_t        tls_pskid;
>>>>>> +    struct delayed_work    tls_handshake_work;
>>>>>> +
>>>>>>       unsigned long           poll_end;
>>>>>>       spinlock_t        state_lock;
>>>>>> @@ -1283,12 +1303,21 @@ static int nvmet_tcp_try_recv(struct 
>>>>>> nvmet_tcp_queue *queue,
>>>>>>       return ret;
>>>>>>   }
>>>>>> +static void nvmet_tcp_release_queue(struct kref *kref)
>>>>>> +{
>>>>>> +    struct nvmet_tcp_queue *queue =
>>>>>> +        container_of(kref, struct nvmet_tcp_queue, kref);
>>>>>> +
>>>>>> +    WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING);
>>>>>> +    queue_work(nvmet_wq, &queue->release_work);
>>>>>> +}
>>>>>> +
>>>>>>   static void nvmet_tcp_schedule_release_queue(struct 
>>>>>> nvmet_tcp_queue *queue)
>>>>>>   {
>>>>>>       spin_lock_bh(&queue->state_lock);
>>>>>>       if (queue->state != NVMET_TCP_Q_DISCONNECTING) {
>>>>>>           queue->state = NVMET_TCP_Q_DISCONNECTING;
>>>>>> -        queue_work(nvmet_wq, &queue->release_work);
>>>>>> +        kref_put(&queue->kref, nvmet_tcp_release_queue);
>>>>>>       }
>>>>>>       spin_unlock_bh(&queue->state_lock);
>>>>>>   }
>>>>>> @@ -1485,6 +1514,8 @@ static void 
>>>>>> nvmet_tcp_release_queue_work(struct work_struct *w)
>>>>>>       mutex_unlock(&nvmet_tcp_queue_mutex);
>>>>>>       nvmet_tcp_restore_socket_callbacks(queue);
>>>>>> +    tls_handshake_cancel(queue->sock->sk);
>>>>>> +    cancel_delayed_work_sync(&queue->tls_handshake_work);
>>>>>
>>>>> We should call it tls_handshake_tmo_work or something to make it
>>>>> clear it is a timeout work.
>>>>>
>>>> Okay.
>>>>
>>>>>>       cancel_work_sync(&queue->io_work);
>>>>>>       /* stop accepting incoming data */
>>>>>>       queue->rcv_state = NVMET_TCP_RECV_ERR;
>>>>>> @@ -1512,8 +1543,13 @@ static void nvmet_tcp_data_ready(struct 
>>>>>> sock *sk)
>>>>>>       read_lock_bh(&sk->sk_callback_lock);
>>>>>>       queue = sk->sk_user_data;
>>>>>> -    if (likely(queue))
>>>>>> -        queue_work_on(queue_cpu(queue), nvmet_tcp_wq, 
>>>>>> &queue->io_work);
>>>>>> +    if (likely(queue)) {
>>>>>> +        if (queue->data_ready)
>>>>>> +            queue->data_ready(sk);
>>>>>> +        if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE)
>>>>>> +            queue_work_on(queue_cpu(queue), nvmet_tcp_wq,
>>>>>> +                      &queue->io_work);
>>>>>> +    }
>>>>>>       read_unlock_bh(&sk->sk_callback_lock);
>>>>>>   }
>>>>>> @@ -1621,6 +1657,83 @@ static int nvmet_tcp_set_queue_sock(struct 
>>>>>> nvmet_tcp_queue *queue)
>>>>>>       return ret;
>>>>>>   }
>>>>>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS
>>>>>> +static void nvmet_tcp_tls_handshake_done(void *data, int status,
>>>>>> +                     key_serial_t peerid)
>>>>>> +{
>>>>>> +    struct nvmet_tcp_queue *queue = data;
>>>>>> +
>>>>>> +    pr_debug("queue %d: TLS handshake done, key %x, status %d\n",
>>>>>> +         queue->idx, peerid, status);
>>>>>> +    spin_lock_bh(&queue->state_lock);
>>>>>> +    if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) {
>>>>>
>>>>> Is this even possible?
>>>>>
>>>> I guess it can happen when the socket closes during handshake; the 
>>>> daemon might still be sending a 'done' event but 
>>>> nvmet_tcp_schedule_release_queue() has been called.
>>>
>>> Umm, if the socket closes during the handshake then the state
>>> is NVMET_TCP_Q_TLS_HANDSHAKE.
>>>
>> But there's a race window between setting it to 
>> NVMET_TCP_Q_DISCONNECTING and calling tls_handshake_cancel().
>>
>>> p.s. you call handshake cancel in the release flow so you should be
>>> fenced properly no?
>> Not really. But I'll check if I can fix it up.
> 
> The teardown handling feels complicated to me.
> 
You tell me. TLS timeout handling always gets in the way.
But I've reworked it now to look slightly better.

> How are you testing it btw?

As outlined in the patchset description.
I've a target configuration running over the loopback interface.

Will expand to have two VMs talking to each other; however, that
needs more fiddling with the PSK deployment.

Cheers,

Hannes
Sagi Grimberg Aug. 15, 2023, 1:34 p.m. UTC | #7
>> How are you testing it btw?
> 
> As outlined in the patchset description.
> I've a target configuration running over the loopback interface.
> 
> Will expand to have two VMs talking to each other; however, that
> needs more fiddling with the PSK deployment.

Was referring to the timeout part. Would maybe make sense to
run a very short timeouts to see that is behaving...
Hannes Reinecke Aug. 15, 2023, 3:04 p.m. UTC | #8
On 8/15/23 15:34, Sagi Grimberg wrote:
> 
>>> How are you testing it btw?
>>
>> As outlined in the patchset description.
>> I've a target configuration running over the loopback interface.
>>
>> Will expand to have two VMs talking to each other; however, that
>> needs more fiddling with the PSK deployment.
> 
> Was referring to the timeout part. Would maybe make sense to
> run a very short timeouts to see that is behaving...

I'll see to patch it into tlshd.
I used to trigger it quite easily during development, but now that
things have stabilised of course it doesn't happen anymore.
Kinda the point, I guess :-)

Cheers,

Hannes
diff mbox series

Patch

diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index 79fc64035ee3..8a6c9cae804c 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -84,6 +84,21 @@  config NVME_TARGET_TCP
 
 	  If unsure, say N.
 
+config NVME_TARGET_TCP_TLS
+	bool "NVMe over Fabrics TCP target TLS encryption support"
+	depends on NVME_TARGET_TCP
+	select NVME_COMMON
+	select NVME_KEYRING
+	select NET_HANDSHAKE
+	select KEYS
+	help
+	  Enables TLS encryption for the NVMe TCP target using the netlink handshake API.
+
+	  The TLS handshake daemon is availble at
+	  https://github.com/oracle/ktls-utils.
+
+	  If unsure, say N.
+
 config NVME_TARGET_AUTH
 	bool "NVMe over Fabrics In-band Authentication support"
 	depends on NVME_TARGET
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index efbfed310370..ad1fb32c7387 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -15,6 +15,7 @@ 
 #ifdef CONFIG_NVME_TARGET_AUTH
 #include <linux/nvme-auth.h>
 #endif
+#include <linux/nvme-keyring.h>
 #include <crypto/hash.h>
 #include <crypto/kpp.h>
 
@@ -397,6 +398,17 @@  static ssize_t nvmet_addr_tsas_store(struct config_item *item,
 	return -EINVAL;
 
 found:
+	if (sectype == NVMF_TCP_SECTYPE_TLS13) {
+		if (!IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS)) {
+			pr_err("TLS is not supported\n");
+			return -EINVAL;
+		}
+		if (!port->keyring) {
+			pr_err("TLS keyring not configured\n");
+			return -EINVAL;
+		}
+	}
+
 	nvmet_port_init_tsas_tcp(port, sectype);
 	/*
 	 * The TLS implementation currently does not support
@@ -1815,6 +1827,7 @@  static void nvmet_port_release(struct config_item *item)
 	flush_workqueue(nvmet_wq);
 	list_del(&port->global_entry);
 
+	key_put(port->keyring);
 	kfree(port->ana_state);
 	kfree(port);
 }
@@ -1864,6 +1877,14 @@  static struct config_group *nvmet_ports_make(struct config_group *group,
 		return ERR_PTR(-ENOMEM);
 	}
 
+	if (nvme_keyring_id()) {
+		port->keyring = key_lookup(nvme_keyring_id());
+		if (IS_ERR(port->keyring)) {
+			pr_warn("NVMe keyring not available, disabling TLS\n");
+			port->keyring = NULL;
+		}
+	}
+
 	for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) {
 		if (i == NVMET_DEFAULT_ANA_GRPID)
 			port->ana_state[1] = NVME_ANA_OPTIMIZED;
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 8cfd60f3b564..7f9ae53c1df5 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -158,6 +158,7 @@  struct nvmet_port {
 	struct config_group		ana_groups_group;
 	struct nvmet_ana_group		ana_default_group;
 	enum nvme_ana_state		*ana_state;
+	struct key			*keyring;
 	void				*priv;
 	bool				enabled;
 	int				inline_data_size;
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index f19ea9d923fd..77fa339008e1 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -8,9 +8,13 @@ 
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/err.h>
+#include <linux/key.h>
 #include <linux/nvme-tcp.h>
+#include <linux/nvme-keyring.h>
 #include <net/sock.h>
 #include <net/tcp.h>
+#include <net/tls.h>
+#include <net/handshake.h>
 #include <linux/inet.h>
 #include <linux/llist.h>
 #include <crypto/hash.h>
@@ -66,6 +70,16 @@  device_param_cb(idle_poll_period_usecs, &set_param_ops,
 MODULE_PARM_DESC(idle_poll_period_usecs,
 		"nvmet tcp io_work poll till idle time period in usecs: Default 0");
 
+#ifdef CONFIG_NVME_TARGET_TCP_TLS
+/*
+ * TLS handshake timeout
+ */
+static int tls_handshake_timeout = 10;
+module_param(tls_handshake_timeout, int, 0644);
+MODULE_PARM_DESC(tls_handshake_timeout,
+		 "nvme TLS handshake timeout in seconds (default 10)");
+#endif
+
 #define NVMET_TCP_RECV_BUDGET		8
 #define NVMET_TCP_SEND_BUDGET		8
 #define NVMET_TCP_IO_WORK_BUDGET	64
@@ -122,11 +136,13 @@  struct nvmet_tcp_cmd {
 
 enum nvmet_tcp_queue_state {
 	NVMET_TCP_Q_CONNECTING,
+	NVMET_TCP_Q_TLS_HANDSHAKE,
 	NVMET_TCP_Q_LIVE,
 	NVMET_TCP_Q_DISCONNECTING,
 };
 
 struct nvmet_tcp_queue {
+	struct kref		kref;
 	struct socket		*sock;
 	struct nvmet_tcp_port	*port;
 	struct work_struct	io_work;
@@ -155,6 +171,10 @@  struct nvmet_tcp_queue {
 	struct ahash_request	*snd_hash;
 	struct ahash_request	*rcv_hash;
 
+	/* TLS state */
+	key_serial_t		tls_pskid;
+	struct delayed_work	tls_handshake_work;
+
 	unsigned long           poll_end;
 
 	spinlock_t		state_lock;
@@ -1283,12 +1303,21 @@  static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue,
 	return ret;
 }
 
+static void nvmet_tcp_release_queue(struct kref *kref)
+{
+	struct nvmet_tcp_queue *queue =
+		container_of(kref, struct nvmet_tcp_queue, kref);
+
+	WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING);
+	queue_work(nvmet_wq, &queue->release_work);
+}
+
 static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue)
 {
 	spin_lock_bh(&queue->state_lock);
 	if (queue->state != NVMET_TCP_Q_DISCONNECTING) {
 		queue->state = NVMET_TCP_Q_DISCONNECTING;
-		queue_work(nvmet_wq, &queue->release_work);
+		kref_put(&queue->kref, nvmet_tcp_release_queue);
 	}
 	spin_unlock_bh(&queue->state_lock);
 }
@@ -1485,6 +1514,8 @@  static void nvmet_tcp_release_queue_work(struct work_struct *w)
 	mutex_unlock(&nvmet_tcp_queue_mutex);
 
 	nvmet_tcp_restore_socket_callbacks(queue);
+	tls_handshake_cancel(queue->sock->sk);
+	cancel_delayed_work_sync(&queue->tls_handshake_work);
 	cancel_work_sync(&queue->io_work);
 	/* stop accepting incoming data */
 	queue->rcv_state = NVMET_TCP_RECV_ERR;
@@ -1512,8 +1543,13 @@  static void nvmet_tcp_data_ready(struct sock *sk)
 
 	read_lock_bh(&sk->sk_callback_lock);
 	queue = sk->sk_user_data;
-	if (likely(queue))
-		queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work);
+	if (likely(queue)) {
+		if (queue->data_ready)
+			queue->data_ready(sk);
+		if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE)
+			queue_work_on(queue_cpu(queue), nvmet_tcp_wq,
+				      &queue->io_work);
+	}
 	read_unlock_bh(&sk->sk_callback_lock);
 }
 
@@ -1621,6 +1657,83 @@  static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
 	return ret;
 }
 
+#ifdef CONFIG_NVME_TARGET_TCP_TLS
+static void nvmet_tcp_tls_handshake_done(void *data, int status,
+					 key_serial_t peerid)
+{
+	struct nvmet_tcp_queue *queue = data;
+
+	pr_debug("queue %d: TLS handshake done, key %x, status %d\n",
+		 queue->idx, peerid, status);
+	spin_lock_bh(&queue->state_lock);
+	if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) {
+		pr_warn("queue %d: TLS handshake already completed\n",
+			queue->idx);
+		spin_unlock_bh(&queue->state_lock);
+		kref_put(&queue->kref, nvmet_tcp_release_queue);
+		return;
+	}
+	if (!status)
+		queue->tls_pskid = peerid;
+	queue->state = NVMET_TCP_Q_CONNECTING;
+	spin_unlock_bh(&queue->state_lock);
+
+	cancel_delayed_work_sync(&queue->tls_handshake_work);
+	if (status) {
+		kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+		kref_put(&queue->kref, nvmet_tcp_release_queue);
+		return;
+	}
+
+	pr_debug("queue %d: resetting queue callbacks after TLS handshake\n",
+		 queue->idx);
+	nvmet_tcp_set_queue_sock(queue);
+	kref_put(&queue->kref, nvmet_tcp_release_queue);
+}
+
+static void nvmet_tcp_tls_handshake_timeout_work(struct work_struct *w)
+{
+	struct nvmet_tcp_queue *queue = container_of(to_delayed_work(w),
+			struct nvmet_tcp_queue, tls_handshake_work);
+
+	pr_debug("queue %d: TLS handshake timeout\n", queue->idx);
+	if (!tls_handshake_cancel(queue->sock->sk))
+		return;
+	kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+	kref_put(&queue->kref, nvmet_tcp_release_queue);
+}
+
+static int nvmet_tcp_tls_handshake(struct nvmet_tcp_queue *queue)
+{
+	int ret = -EOPNOTSUPP;
+	struct tls_handshake_args args;
+
+	if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) {
+		pr_warn("cannot start TLS in state %d\n", queue->state);
+		return -EINVAL;
+	}
+
+	kref_get(&queue->kref);
+	pr_debug("queue %d: TLS ServerHello\n", queue->idx);
+	memset(&args, 0, sizeof(args));
+	args.ta_sock = queue->sock;
+	args.ta_done = nvmet_tcp_tls_handshake_done;
+	args.ta_data = queue;
+	args.ta_keyring = key_serial(queue->port->nport->keyring);
+	args.ta_timeout_ms = tls_handshake_timeout * 1000;
+
+	ret = tls_server_hello_psk(&args, GFP_KERNEL);
+	if (ret) {
+		kref_put(&queue->kref, nvmet_tcp_release_queue);
+		pr_err("failed to start TLS, err=%d\n", ret);
+	} else {
+		queue_delayed_work(nvmet_wq, &queue->tls_handshake_work,
+				   tls_handshake_timeout * HZ);
+	}
+	return ret;
+}
+#endif
+
 static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
 		struct socket *newsock)
 {
@@ -1636,11 +1749,16 @@  static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
 
 	INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work);
 	INIT_WORK(&queue->io_work, nvmet_tcp_io_work);
+	kref_init(&queue->kref);
 	queue->sock = newsock;
 	queue->port = port;
 	queue->nr_cmds = 0;
 	spin_lock_init(&queue->state_lock);
-	queue->state = NVMET_TCP_Q_CONNECTING;
+	if (queue->port->nport->disc_addr.tsas.tcp.sectype ==
+	    NVMF_TCP_SECTYPE_TLS13)
+		queue->state = NVMET_TCP_Q_TLS_HANDSHAKE;
+	else
+		queue->state = NVMET_TCP_Q_CONNECTING;
 	INIT_LIST_HEAD(&queue->free_list);
 	init_llist_head(&queue->resp_list);
 	INIT_LIST_HEAD(&queue->resp_send_list);
@@ -1671,12 +1789,32 @@  static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port,
 	list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list);
 	mutex_unlock(&nvmet_tcp_queue_mutex);
 
+#ifdef CONFIG_NVME_TARGET_TCP_TLS
+	INIT_DELAYED_WORK(&queue->tls_handshake_work,
+			  nvmet_tcp_tls_handshake_timeout_work);
+	if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) {
+		struct sock *sk = queue->sock->sk;
+
+		/* Restore the default callbacks before starting upcall */
+		read_lock_bh(&sk->sk_callback_lock);
+		sk->sk_user_data = NULL;
+		sk->sk_data_ready = port->data_ready;
+		read_unlock_bh(&sk->sk_callback_lock);
+		if (!nvmet_tcp_tls_handshake(queue))
+			return;
+
+		/* TLS handshake failed, terminate the connection */
+		goto out_destroy_sq;
+	}
+#endif
+
 	ret = nvmet_tcp_set_queue_sock(queue);
 	if (ret)
 		goto out_destroy_sq;
 
 	return;
 out_destroy_sq:
+	queue->state = NVMET_TCP_Q_DISCONNECTING;
 	mutex_lock(&nvmet_tcp_queue_mutex);
 	list_del_init(&queue->queue_list);
 	mutex_unlock(&nvmet_tcp_queue_mutex);