Message ID | 20230814111943.68325-16-hare@suse.de (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | nvme: In-kernel TLS support for TCP | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Not a local patch, async |
On 8/14/23 14:19, Hannes Reinecke wrote: > Add functions to start the TLS handshake upcall when > the TCP TSAS sectype is set to 'tls1.3' and add a config > option NVME_TARGET_TCP_TLS. Need to document the refcount added. Also the general design with upcalling tls handshake in userspace and continue from there... > > Signed-off-by: Hannes Reinecke <hare@suse.de> > --- > drivers/nvme/target/Kconfig | 15 ++++ > drivers/nvme/target/configfs.c | 21 +++++ > drivers/nvme/target/nvmet.h | 1 + > drivers/nvme/target/tcp.c | 146 ++++++++++++++++++++++++++++++++- > 4 files changed, 179 insertions(+), 4 deletions(-) > > diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig > index 79fc64035ee3..8a6c9cae804c 100644 > --- a/drivers/nvme/target/Kconfig > +++ b/drivers/nvme/target/Kconfig > @@ -84,6 +84,21 @@ config NVME_TARGET_TCP > > If unsure, say N. > > +config NVME_TARGET_TCP_TLS > + bool "NVMe over Fabrics TCP target TLS encryption support" > + depends on NVME_TARGET_TCP > + select NVME_COMMON > + select NVME_KEYRING > + select NET_HANDSHAKE > + select KEYS > + help > + Enables TLS encryption for the NVMe TCP target using the netlink handshake API. > + > + The TLS handshake daemon is availble at > + https://github.com/oracle/ktls-utils. > + > + If unsure, say N. > + > config NVME_TARGET_AUTH > bool "NVMe over Fabrics In-band Authentication support" > depends on NVME_TARGET > diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c > index efbfed310370..ad1fb32c7387 100644 > --- a/drivers/nvme/target/configfs.c > +++ b/drivers/nvme/target/configfs.c > @@ -15,6 +15,7 @@ > #ifdef CONFIG_NVME_TARGET_AUTH > #include <linux/nvme-auth.h> > #endif > +#include <linux/nvme-keyring.h> > #include <crypto/hash.h> > #include <crypto/kpp.h> > > @@ -397,6 +398,17 @@ static ssize_t nvmet_addr_tsas_store(struct config_item *item, > return -EINVAL; > > found: > + if (sectype == NVMF_TCP_SECTYPE_TLS13) { > + if (!IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS)) { > + pr_err("TLS is not supported\n"); > + return -EINVAL; > + } > + if (!port->keyring) { > + pr_err("TLS keyring not configured\n"); > + return -EINVAL; > + } > + } > + > nvmet_port_init_tsas_tcp(port, sectype); > /* > * The TLS implementation currently does not support > @@ -1815,6 +1827,7 @@ static void nvmet_port_release(struct config_item *item) > flush_workqueue(nvmet_wq); > list_del(&port->global_entry); > > + key_put(port->keyring); > kfree(port->ana_state); > kfree(port); > } > @@ -1864,6 +1877,14 @@ static struct config_group *nvmet_ports_make(struct config_group *group, > return ERR_PTR(-ENOMEM); > } > > + if (nvme_keyring_id()) { > + port->keyring = key_lookup(nvme_keyring_id()); > + if (IS_ERR(port->keyring)) { > + pr_warn("NVMe keyring not available, disabling TLS\n"); > + port->keyring = NULL; why setting this to NULL? > + } > + } > + > for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) { > if (i == NVMET_DEFAULT_ANA_GRPID) > port->ana_state[1] = NVME_ANA_OPTIMIZED; > diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h > index 8cfd60f3b564..7f9ae53c1df5 100644 > --- a/drivers/nvme/target/nvmet.h > +++ b/drivers/nvme/target/nvmet.h > @@ -158,6 +158,7 @@ struct nvmet_port { > struct config_group ana_groups_group; > struct nvmet_ana_group ana_default_group; > enum nvme_ana_state *ana_state; > + struct key *keyring; > void *priv; > bool enabled; > int inline_data_size; > diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c > index f19ea9d923fd..77fa339008e1 100644 > --- a/drivers/nvme/target/tcp.c > +++ b/drivers/nvme/target/tcp.c > @@ -8,9 +8,13 @@ > #include <linux/init.h> > #include <linux/slab.h> > #include <linux/err.h> > +#include <linux/key.h> > #include <linux/nvme-tcp.h> > +#include <linux/nvme-keyring.h> > #include <net/sock.h> > #include <net/tcp.h> > +#include <net/tls.h> > +#include <net/handshake.h> > #include <linux/inet.h> > #include <linux/llist.h> > #include <crypto/hash.h> > @@ -66,6 +70,16 @@ device_param_cb(idle_poll_period_usecs, &set_param_ops, > MODULE_PARM_DESC(idle_poll_period_usecs, > "nvmet tcp io_work poll till idle time period in usecs: Default 0"); > > +#ifdef CONFIG_NVME_TARGET_TCP_TLS > +/* > + * TLS handshake timeout > + */ > +static int tls_handshake_timeout = 10; > +module_param(tls_handshake_timeout, int, 0644); > +MODULE_PARM_DESC(tls_handshake_timeout, > + "nvme TLS handshake timeout in seconds (default 10)"); > +#endif > + > #define NVMET_TCP_RECV_BUDGET 8 > #define NVMET_TCP_SEND_BUDGET 8 > #define NVMET_TCP_IO_WORK_BUDGET 64 > @@ -122,11 +136,13 @@ struct nvmet_tcp_cmd { > > enum nvmet_tcp_queue_state { > NVMET_TCP_Q_CONNECTING, > + NVMET_TCP_Q_TLS_HANDSHAKE, > NVMET_TCP_Q_LIVE, > NVMET_TCP_Q_DISCONNECTING, > }; > > struct nvmet_tcp_queue { > + struct kref kref; Why is kref the first member of the struct? > struct socket *sock; > struct nvmet_tcp_port *port; > struct work_struct io_work; > @@ -155,6 +171,10 @@ struct nvmet_tcp_queue { > struct ahash_request *snd_hash; > struct ahash_request *rcv_hash; > > + /* TLS state */ > + key_serial_t tls_pskid; > + struct delayed_work tls_handshake_work; > + > unsigned long poll_end; > > spinlock_t state_lock; > @@ -1283,12 +1303,21 @@ static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue, > return ret; > } > > +static void nvmet_tcp_release_queue(struct kref *kref) > +{ > + struct nvmet_tcp_queue *queue = > + container_of(kref, struct nvmet_tcp_queue, kref); > + > + WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING); > + queue_work(nvmet_wq, &queue->release_work); > +} > + > static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue) > { > spin_lock_bh(&queue->state_lock); > if (queue->state != NVMET_TCP_Q_DISCONNECTING) { > queue->state = NVMET_TCP_Q_DISCONNECTING; > - queue_work(nvmet_wq, &queue->release_work); > + kref_put(&queue->kref, nvmet_tcp_release_queue); > } > spin_unlock_bh(&queue->state_lock); > } > @@ -1485,6 +1514,8 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) > mutex_unlock(&nvmet_tcp_queue_mutex); > > nvmet_tcp_restore_socket_callbacks(queue); > + tls_handshake_cancel(queue->sock->sk); > + cancel_delayed_work_sync(&queue->tls_handshake_work); We should call it tls_handshake_tmo_work or something to make it clear it is a timeout work. > cancel_work_sync(&queue->io_work); > /* stop accepting incoming data */ > queue->rcv_state = NVMET_TCP_RECV_ERR; > @@ -1512,8 +1543,13 @@ static void nvmet_tcp_data_ready(struct sock *sk) > > read_lock_bh(&sk->sk_callback_lock); > queue = sk->sk_user_data; > - if (likely(queue)) > - queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); > + if (likely(queue)) { > + if (queue->data_ready) > + queue->data_ready(sk); > + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) > + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, > + &queue->io_work); > + } > read_unlock_bh(&sk->sk_callback_lock); > } > > @@ -1621,6 +1657,83 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) > return ret; > } > > +#ifdef CONFIG_NVME_TARGET_TCP_TLS > +static void nvmet_tcp_tls_handshake_done(void *data, int status, > + key_serial_t peerid) > +{ > + struct nvmet_tcp_queue *queue = data; > + > + pr_debug("queue %d: TLS handshake done, key %x, status %d\n", > + queue->idx, peerid, status); > + spin_lock_bh(&queue->state_lock); > + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) { Is this even possible? > + pr_warn("queue %d: TLS handshake already completed\n", > + queue->idx); > + spin_unlock_bh(&queue->state_lock); > + kref_put(&queue->kref, nvmet_tcp_release_queue); How can we get here? > + return; > + } > + if (!status) > + queue->tls_pskid = peerid; > + queue->state = NVMET_TCP_Q_CONNECTING; > + spin_unlock_bh(&queue->state_lock); > + > + cancel_delayed_work_sync(&queue->tls_handshake_work); > + if (status) { Wait, did we assign the sk_state_change in this stage? What will sock shutdown trigger? > + kernel_sock_shutdown(queue->sock, SHUT_RDWR); Probably the put can be moved to a out: label in the end. > + kref_put(&queue->kref, nvmet_tcp_release_queue); > + return; > + } > + > + pr_debug("queue %d: resetting queue callbacks after TLS handshake\n", > + queue->idx); > + nvmet_tcp_set_queue_sock(queue); > + kref_put(&queue->kref, nvmet_tcp_release_queue); > +} > + > +static void nvmet_tcp_tls_handshake_timeout_work(struct work_struct *w) > +{ > + struct nvmet_tcp_queue *queue = container_of(to_delayed_work(w), > + struct nvmet_tcp_queue, tls_handshake_work); > + > + pr_debug("queue %d: TLS handshake timeout\n", queue->idx); Probably its better to make this pr_warn... > + if (!tls_handshake_cancel(queue->sock->sk)) > + return; > + kernel_sock_shutdown(queue->sock, SHUT_RDWR); Same question here, did we assign sk_state_change yet? > + kref_put(&queue->kref, nvmet_tcp_release_queue); > +} > + > +static int nvmet_tcp_tls_handshake(struct nvmet_tcp_queue *queue) > +{ > + int ret = -EOPNOTSUPP; > + struct tls_handshake_args args; > + > + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) { > + pr_warn("cannot start TLS in state %d\n", queue->state); > + return -EINVAL; > + } > + > + kref_get(&queue->kref); > + pr_debug("queue %d: TLS ServerHello\n", queue->idx); > + memset(&args, 0, sizeof(args)); > + args.ta_sock = queue->sock; > + args.ta_done = nvmet_tcp_tls_handshake_done; > + args.ta_data = queue; > + args.ta_keyring = key_serial(queue->port->nport->keyring); > + args.ta_timeout_ms = tls_handshake_timeout * 1000; > + > + ret = tls_server_hello_psk(&args, GFP_KERNEL); > + if (ret) { > + kref_put(&queue->kref, nvmet_tcp_release_queue); > + pr_err("failed to start TLS, err=%d\n", ret); > + } else { > + queue_delayed_work(nvmet_wq, &queue->tls_handshake_work, > + tls_handshake_timeout * HZ); > + } > + return ret; > +} > +#endif > + > static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, > struct socket *newsock) > { > @@ -1636,11 +1749,16 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, > > INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work); > INIT_WORK(&queue->io_work, nvmet_tcp_io_work); > + kref_init(&queue->kref); > queue->sock = newsock; > queue->port = port; > queue->nr_cmds = 0; > spin_lock_init(&queue->state_lock); > - queue->state = NVMET_TCP_Q_CONNECTING; > + if (queue->port->nport->disc_addr.tsas.tcp.sectype == > + NVMF_TCP_SECTYPE_TLS13) > + queue->state = NVMET_TCP_Q_TLS_HANDSHAKE; > + else > + queue->state = NVMET_TCP_Q_CONNECTING; > INIT_LIST_HEAD(&queue->free_list); > init_llist_head(&queue->resp_list); > INIT_LIST_HEAD(&queue->resp_send_list); > @@ -1671,12 +1789,32 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, > list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list); > mutex_unlock(&nvmet_tcp_queue_mutex); > > +#ifdef CONFIG_NVME_TARGET_TCP_TLS > + INIT_DELAYED_WORK(&queue->tls_handshake_work, > + nvmet_tcp_tls_handshake_timeout_work); > + if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) { > + struct sock *sk = queue->sock->sk; > + > + /* Restore the default callbacks before starting upcall */ > + read_lock_bh(&sk->sk_callback_lock); > + sk->sk_user_data = NULL; > + sk->sk_data_ready = port->data_ready; > + read_unlock_bh(&sk->sk_callback_lock); > + if (!nvmet_tcp_tls_handshake(queue)) > + return; > + > + /* TLS handshake failed, terminate the connection */ > + goto out_destroy_sq; > + } > +#endif > + > ret = nvmet_tcp_set_queue_sock(queue); > if (ret) > goto out_destroy_sq; > > return; > out_destroy_sq: > + queue->state = NVMET_TCP_Q_DISCONNECTING; Can you clarify what this is used for? > mutex_lock(&nvmet_tcp_queue_mutex); > list_del_init(&queue->queue_list); > mutex_unlock(&nvmet_tcp_queue_mutex);
On 8/14/23 14:48, Sagi Grimberg wrote: > > > On 8/14/23 14:19, Hannes Reinecke wrote: >> Add functions to start the TLS handshake upcall when >> the TCP TSAS sectype is set to 'tls1.3' and add a config >> option NVME_TARGET_TCP_TLS. > > Need to document the refcount added. > Also the general design with upcalling tls handshake in > userspace and continue from there... > Okay. >> >> Signed-off-by: Hannes Reinecke <hare@suse.de> >> --- >> drivers/nvme/target/Kconfig | 15 ++++ >> drivers/nvme/target/configfs.c | 21 +++++ >> drivers/nvme/target/nvmet.h | 1 + >> drivers/nvme/target/tcp.c | 146 ++++++++++++++++++++++++++++++++- >> 4 files changed, 179 insertions(+), 4 deletions(-) >> >> diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig >> index 79fc64035ee3..8a6c9cae804c 100644 >> --- a/drivers/nvme/target/Kconfig >> +++ b/drivers/nvme/target/Kconfig >> @@ -84,6 +84,21 @@ config NVME_TARGET_TCP >> If unsure, say N. >> +config NVME_TARGET_TCP_TLS >> + bool "NVMe over Fabrics TCP target TLS encryption support" >> + depends on NVME_TARGET_TCP >> + select NVME_COMMON >> + select NVME_KEYRING >> + select NET_HANDSHAKE >> + select KEYS >> + help >> + Enables TLS encryption for the NVMe TCP target using the >> netlink handshake API. >> + >> + The TLS handshake daemon is availble at >> + https://github.com/oracle/ktls-utils. >> + >> + If unsure, say N. >> + >> config NVME_TARGET_AUTH >> bool "NVMe over Fabrics In-band Authentication support" >> depends on NVME_TARGET >> diff --git a/drivers/nvme/target/configfs.c >> b/drivers/nvme/target/configfs.c >> index efbfed310370..ad1fb32c7387 100644 >> --- a/drivers/nvme/target/configfs.c >> +++ b/drivers/nvme/target/configfs.c >> @@ -15,6 +15,7 @@ >> #ifdef CONFIG_NVME_TARGET_AUTH >> #include <linux/nvme-auth.h> >> #endif >> +#include <linux/nvme-keyring.h> >> #include <crypto/hash.h> >> #include <crypto/kpp.h> >> @@ -397,6 +398,17 @@ static ssize_t nvmet_addr_tsas_store(struct >> config_item *item, >> return -EINVAL; >> found: >> + if (sectype == NVMF_TCP_SECTYPE_TLS13) { >> + if (!IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS)) { >> + pr_err("TLS is not supported\n"); >> + return -EINVAL; >> + } >> + if (!port->keyring) { >> + pr_err("TLS keyring not configured\n"); >> + return -EINVAL; >> + } >> + } >> + >> nvmet_port_init_tsas_tcp(port, sectype); >> /* >> * The TLS implementation currently does not support >> @@ -1815,6 +1827,7 @@ static void nvmet_port_release(struct >> config_item *item) >> flush_workqueue(nvmet_wq); >> list_del(&port->global_entry); >> + key_put(port->keyring); >> kfree(port->ana_state); >> kfree(port); >> } >> @@ -1864,6 +1877,14 @@ static struct config_group >> *nvmet_ports_make(struct config_group *group, >> return ERR_PTR(-ENOMEM); >> } >> + if (nvme_keyring_id()) { >> + port->keyring = key_lookup(nvme_keyring_id()); >> + if (IS_ERR(port->keyring)) { >> + pr_warn("NVMe keyring not available, disabling TLS\n"); >> + port->keyring = NULL; > > why setting this to NULL? > It's check when changing TSAS; we can only enable TLS if the nvme keyring is available. >> + } >> + } >> + >> for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) { >> if (i == NVMET_DEFAULT_ANA_GRPID) >> port->ana_state[1] = NVME_ANA_OPTIMIZED; >> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h >> index 8cfd60f3b564..7f9ae53c1df5 100644 >> --- a/drivers/nvme/target/nvmet.h >> +++ b/drivers/nvme/target/nvmet.h >> @@ -158,6 +158,7 @@ struct nvmet_port { >> struct config_group ana_groups_group; >> struct nvmet_ana_group ana_default_group; >> enum nvme_ana_state *ana_state; >> + struct key *keyring; >> void *priv; >> bool enabled; >> int inline_data_size; >> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c >> index f19ea9d923fd..77fa339008e1 100644 >> --- a/drivers/nvme/target/tcp.c >> +++ b/drivers/nvme/target/tcp.c >> @@ -8,9 +8,13 @@ >> #include <linux/init.h> >> #include <linux/slab.h> >> #include <linux/err.h> >> +#include <linux/key.h> >> #include <linux/nvme-tcp.h> >> +#include <linux/nvme-keyring.h> >> #include <net/sock.h> >> #include <net/tcp.h> >> +#include <net/tls.h> >> +#include <net/handshake.h> >> #include <linux/inet.h> >> #include <linux/llist.h> >> #include <crypto/hash.h> >> @@ -66,6 +70,16 @@ device_param_cb(idle_poll_period_usecs, >> &set_param_ops, >> MODULE_PARM_DESC(idle_poll_period_usecs, >> "nvmet tcp io_work poll till idle time period in usecs: >> Default 0"); >> +#ifdef CONFIG_NVME_TARGET_TCP_TLS >> +/* >> + * TLS handshake timeout >> + */ >> +static int tls_handshake_timeout = 10; >> +module_param(tls_handshake_timeout, int, 0644); >> +MODULE_PARM_DESC(tls_handshake_timeout, >> + "nvme TLS handshake timeout in seconds (default 10)"); >> +#endif >> + >> #define NVMET_TCP_RECV_BUDGET 8 >> #define NVMET_TCP_SEND_BUDGET 8 >> #define NVMET_TCP_IO_WORK_BUDGET 64 >> @@ -122,11 +136,13 @@ struct nvmet_tcp_cmd { >> enum nvmet_tcp_queue_state { >> NVMET_TCP_Q_CONNECTING, >> + NVMET_TCP_Q_TLS_HANDSHAKE, >> NVMET_TCP_Q_LIVE, >> NVMET_TCP_Q_DISCONNECTING, >> }; >> struct nvmet_tcp_queue { >> + struct kref kref; > > Why is kref the first member of the struct? > Habit. I don't mind where it'll end up. >> struct socket *sock; >> struct nvmet_tcp_port *port; >> struct work_struct io_work; >> @@ -155,6 +171,10 @@ struct nvmet_tcp_queue { >> struct ahash_request *snd_hash; >> struct ahash_request *rcv_hash; >> + /* TLS state */ >> + key_serial_t tls_pskid; >> + struct delayed_work tls_handshake_work; >> + >> unsigned long poll_end; >> spinlock_t state_lock; >> @@ -1283,12 +1303,21 @@ static int nvmet_tcp_try_recv(struct >> nvmet_tcp_queue *queue, >> return ret; >> } >> +static void nvmet_tcp_release_queue(struct kref *kref) >> +{ >> + struct nvmet_tcp_queue *queue = >> + container_of(kref, struct nvmet_tcp_queue, kref); >> + >> + WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING); >> + queue_work(nvmet_wq, &queue->release_work); >> +} >> + >> static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue >> *queue) >> { >> spin_lock_bh(&queue->state_lock); >> if (queue->state != NVMET_TCP_Q_DISCONNECTING) { >> queue->state = NVMET_TCP_Q_DISCONNECTING; >> - queue_work(nvmet_wq, &queue->release_work); >> + kref_put(&queue->kref, nvmet_tcp_release_queue); >> } >> spin_unlock_bh(&queue->state_lock); >> } >> @@ -1485,6 +1514,8 @@ static void nvmet_tcp_release_queue_work(struct >> work_struct *w) >> mutex_unlock(&nvmet_tcp_queue_mutex); >> nvmet_tcp_restore_socket_callbacks(queue); >> + tls_handshake_cancel(queue->sock->sk); >> + cancel_delayed_work_sync(&queue->tls_handshake_work); > > We should call it tls_handshake_tmo_work or something to make it > clear it is a timeout work. > Okay. >> cancel_work_sync(&queue->io_work); >> /* stop accepting incoming data */ >> queue->rcv_state = NVMET_TCP_RECV_ERR; >> @@ -1512,8 +1543,13 @@ static void nvmet_tcp_data_ready(struct sock *sk) >> read_lock_bh(&sk->sk_callback_lock); >> queue = sk->sk_user_data; >> - if (likely(queue)) >> - queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); >> + if (likely(queue)) { >> + if (queue->data_ready) >> + queue->data_ready(sk); >> + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) >> + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, >> + &queue->io_work); >> + } >> read_unlock_bh(&sk->sk_callback_lock); >> } >> @@ -1621,6 +1657,83 @@ static int nvmet_tcp_set_queue_sock(struct >> nvmet_tcp_queue *queue) >> return ret; >> } >> +#ifdef CONFIG_NVME_TARGET_TCP_TLS >> +static void nvmet_tcp_tls_handshake_done(void *data, int status, >> + key_serial_t peerid) >> +{ >> + struct nvmet_tcp_queue *queue = data; >> + >> + pr_debug("queue %d: TLS handshake done, key %x, status %d\n", >> + queue->idx, peerid, status); >> + spin_lock_bh(&queue->state_lock); >> + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) { > > Is this even possible? > I guess it can happen when the socket closes during handshake; the daemon might still be sending a 'done' event but nvmet_tcp_schedule_release_queue() has been called. >> + pr_warn("queue %d: TLS handshake already completed\n", >> + queue->idx); >> + spin_unlock_bh(&queue->state_lock); >> + kref_put(&queue->kref, nvmet_tcp_release_queue); > > How can we get here? > See above. >> + return; >> + } >> + if (!status) >> + queue->tls_pskid = peerid; >> + queue->state = NVMET_TCP_Q_CONNECTING; >> + spin_unlock_bh(&queue->state_lock); >> + >> + cancel_delayed_work_sync(&queue->tls_handshake_work); >> + if (status) { > > Wait, did we assign the sk_state_change in this stage? What will > sock shutdown trigger? > That, however is a good point. You might be right. Will be checking. >> + kernel_sock_shutdown(queue->sock, SHUT_RDWR); > > Probably the put can be moved to a out: label in the end. > Probably. >> + kref_put(&queue->kref, nvmet_tcp_release_queue); >> + return; >> + } >> + >> + pr_debug("queue %d: resetting queue callbacks after TLS >> handshake\n", >> + queue->idx); >> + nvmet_tcp_set_queue_sock(queue); >> + kref_put(&queue->kref, nvmet_tcp_release_queue); >> +} >> + >> +static void nvmet_tcp_tls_handshake_timeout_work(struct work_struct *w) >> +{ >> + struct nvmet_tcp_queue *queue = container_of(to_delayed_work(w), >> + struct nvmet_tcp_queue, tls_handshake_work); >> + >> + pr_debug("queue %d: TLS handshake timeout\n", queue->idx); > > Probably its better to make this pr_warn... > Ok. >> + if (!tls_handshake_cancel(queue->sock->sk)) >> + return; >> + kernel_sock_shutdown(queue->sock, SHUT_RDWR); > > Same question here, did we assign sk_state_change yet? > Will be checking. >> + kref_put(&queue->kref, nvmet_tcp_release_queue); >> +} >> + >> +static int nvmet_tcp_tls_handshake(struct nvmet_tcp_queue *queue) >> +{ >> + int ret = -EOPNOTSUPP; >> + struct tls_handshake_args args; >> + >> + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) { >> + pr_warn("cannot start TLS in state %d\n", queue->state); >> + return -EINVAL; >> + } >> + >> + kref_get(&queue->kref); >> + pr_debug("queue %d: TLS ServerHello\n", queue->idx); >> + memset(&args, 0, sizeof(args)); >> + args.ta_sock = queue->sock; >> + args.ta_done = nvmet_tcp_tls_handshake_done; >> + args.ta_data = queue; >> + args.ta_keyring = key_serial(queue->port->nport->keyring); >> + args.ta_timeout_ms = tls_handshake_timeout * 1000; >> + >> + ret = tls_server_hello_psk(&args, GFP_KERNEL); >> + if (ret) { >> + kref_put(&queue->kref, nvmet_tcp_release_queue); >> + pr_err("failed to start TLS, err=%d\n", ret); >> + } else { >> + queue_delayed_work(nvmet_wq, &queue->tls_handshake_work, >> + tls_handshake_timeout * HZ); >> + } >> + return ret; >> +} >> +#endif >> + >> static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, >> struct socket *newsock) >> { >> @@ -1636,11 +1749,16 @@ static void nvmet_tcp_alloc_queue(struct >> nvmet_tcp_port *port, >> INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work); >> INIT_WORK(&queue->io_work, nvmet_tcp_io_work); >> + kref_init(&queue->kref); >> queue->sock = newsock; >> queue->port = port; >> queue->nr_cmds = 0; >> spin_lock_init(&queue->state_lock); >> - queue->state = NVMET_TCP_Q_CONNECTING; >> + if (queue->port->nport->disc_addr.tsas.tcp.sectype == >> + NVMF_TCP_SECTYPE_TLS13) >> + queue->state = NVMET_TCP_Q_TLS_HANDSHAKE; >> + else >> + queue->state = NVMET_TCP_Q_CONNECTING; >> INIT_LIST_HEAD(&queue->free_list); >> init_llist_head(&queue->resp_list); >> INIT_LIST_HEAD(&queue->resp_send_list); >> @@ -1671,12 +1789,32 @@ static void nvmet_tcp_alloc_queue(struct >> nvmet_tcp_port *port, >> list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list); >> mutex_unlock(&nvmet_tcp_queue_mutex); >> +#ifdef CONFIG_NVME_TARGET_TCP_TLS >> + INIT_DELAYED_WORK(&queue->tls_handshake_work, >> + nvmet_tcp_tls_handshake_timeout_work); >> + if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) { >> + struct sock *sk = queue->sock->sk; >> + >> + /* Restore the default callbacks before starting upcall */ >> + read_lock_bh(&sk->sk_callback_lock); >> + sk->sk_user_data = NULL; >> + sk->sk_data_ready = port->data_ready; >> + read_unlock_bh(&sk->sk_callback_lock); >> + if (!nvmet_tcp_tls_handshake(queue)) >> + return; >> + >> + /* TLS handshake failed, terminate the connection */ >> + goto out_destroy_sq; >> + } >> +#endif >> + >> ret = nvmet_tcp_set_queue_sock(queue); >> if (ret) >> goto out_destroy_sq; >> return; >> out_destroy_sq: >> + queue->state = NVMET_TCP_Q_DISCONNECTING; > > Can you clarify what this is used for? > Primarily for debugging, to signal that we really are disconnecting. But yeah, not really required. Cheers, Hannes
>>> @@ -1864,6 +1877,14 @@ static struct config_group >>> *nvmet_ports_make(struct config_group *group, >>> return ERR_PTR(-ENOMEM); >>> } >>> + if (nvme_keyring_id()) { >>> + port->keyring = key_lookup(nvme_keyring_id()); >>> + if (IS_ERR(port->keyring)) { >>> + pr_warn("NVMe keyring not available, disabling TLS\n"); >>> + port->keyring = NULL; >> >> why setting this to NULL? >> > It's check when changing TSAS; we can only enable TLS if the nvme > keyring is available. ok > >>> + } >>> + } >>> + >>> for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) { >>> if (i == NVMET_DEFAULT_ANA_GRPID) >>> port->ana_state[1] = NVME_ANA_OPTIMIZED; >>> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h >>> index 8cfd60f3b564..7f9ae53c1df5 100644 >>> --- a/drivers/nvme/target/nvmet.h >>> +++ b/drivers/nvme/target/nvmet.h >>> @@ -158,6 +158,7 @@ struct nvmet_port { >>> struct config_group ana_groups_group; >>> struct nvmet_ana_group ana_default_group; >>> enum nvme_ana_state *ana_state; >>> + struct key *keyring; >>> void *priv; >>> bool enabled; >>> int inline_data_size; >>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c >>> index f19ea9d923fd..77fa339008e1 100644 >>> --- a/drivers/nvme/target/tcp.c >>> +++ b/drivers/nvme/target/tcp.c >>> @@ -8,9 +8,13 @@ >>> #include <linux/init.h> >>> #include <linux/slab.h> >>> #include <linux/err.h> >>> +#include <linux/key.h> >>> #include <linux/nvme-tcp.h> >>> +#include <linux/nvme-keyring.h> >>> #include <net/sock.h> >>> #include <net/tcp.h> >>> +#include <net/tls.h> >>> +#include <net/handshake.h> >>> #include <linux/inet.h> >>> #include <linux/llist.h> >>> #include <crypto/hash.h> >>> @@ -66,6 +70,16 @@ device_param_cb(idle_poll_period_usecs, >>> &set_param_ops, >>> MODULE_PARM_DESC(idle_poll_period_usecs, >>> "nvmet tcp io_work poll till idle time period in usecs: >>> Default 0"); >>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS >>> +/* >>> + * TLS handshake timeout >>> + */ >>> +static int tls_handshake_timeout = 10; >>> +module_param(tls_handshake_timeout, int, 0644); >>> +MODULE_PARM_DESC(tls_handshake_timeout, >>> + "nvme TLS handshake timeout in seconds (default 10)"); >>> +#endif >>> + >>> #define NVMET_TCP_RECV_BUDGET 8 >>> #define NVMET_TCP_SEND_BUDGET 8 >>> #define NVMET_TCP_IO_WORK_BUDGET 64 >>> @@ -122,11 +136,13 @@ struct nvmet_tcp_cmd { >>> enum nvmet_tcp_queue_state { >>> NVMET_TCP_Q_CONNECTING, >>> + NVMET_TCP_Q_TLS_HANDSHAKE, >>> NVMET_TCP_Q_LIVE, >>> NVMET_TCP_Q_DISCONNECTING, >>> }; >>> struct nvmet_tcp_queue { >>> + struct kref kref; >> >> Why is kref the first member of the struct? >> > Habit. > I don't mind where it'll end up. Move it to the back together with the tls section. > >>> struct socket *sock; >>> struct nvmet_tcp_port *port; >>> struct work_struct io_work; >>> @@ -155,6 +171,10 @@ struct nvmet_tcp_queue { >>> struct ahash_request *snd_hash; >>> struct ahash_request *rcv_hash; >>> + /* TLS state */ >>> + key_serial_t tls_pskid; >>> + struct delayed_work tls_handshake_work; >>> + >>> unsigned long poll_end; >>> spinlock_t state_lock; >>> @@ -1283,12 +1303,21 @@ static int nvmet_tcp_try_recv(struct >>> nvmet_tcp_queue *queue, >>> return ret; >>> } >>> +static void nvmet_tcp_release_queue(struct kref *kref) >>> +{ >>> + struct nvmet_tcp_queue *queue = >>> + container_of(kref, struct nvmet_tcp_queue, kref); >>> + >>> + WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING); >>> + queue_work(nvmet_wq, &queue->release_work); >>> +} >>> + >>> static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue >>> *queue) >>> { >>> spin_lock_bh(&queue->state_lock); >>> if (queue->state != NVMET_TCP_Q_DISCONNECTING) { >>> queue->state = NVMET_TCP_Q_DISCONNECTING; >>> - queue_work(nvmet_wq, &queue->release_work); >>> + kref_put(&queue->kref, nvmet_tcp_release_queue); >>> } >>> spin_unlock_bh(&queue->state_lock); >>> } >>> @@ -1485,6 +1514,8 @@ static void nvmet_tcp_release_queue_work(struct >>> work_struct *w) >>> mutex_unlock(&nvmet_tcp_queue_mutex); >>> nvmet_tcp_restore_socket_callbacks(queue); >>> + tls_handshake_cancel(queue->sock->sk); >>> + cancel_delayed_work_sync(&queue->tls_handshake_work); >> >> We should call it tls_handshake_tmo_work or something to make it >> clear it is a timeout work. >> > Okay. > >>> cancel_work_sync(&queue->io_work); >>> /* stop accepting incoming data */ >>> queue->rcv_state = NVMET_TCP_RECV_ERR; >>> @@ -1512,8 +1543,13 @@ static void nvmet_tcp_data_ready(struct sock *sk) >>> read_lock_bh(&sk->sk_callback_lock); >>> queue = sk->sk_user_data; >>> - if (likely(queue)) >>> - queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); >>> + if (likely(queue)) { >>> + if (queue->data_ready) >>> + queue->data_ready(sk); >>> + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) >>> + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, >>> + &queue->io_work); >>> + } >>> read_unlock_bh(&sk->sk_callback_lock); >>> } >>> @@ -1621,6 +1657,83 @@ static int nvmet_tcp_set_queue_sock(struct >>> nvmet_tcp_queue *queue) >>> return ret; >>> } >>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS >>> +static void nvmet_tcp_tls_handshake_done(void *data, int status, >>> + key_serial_t peerid) >>> +{ >>> + struct nvmet_tcp_queue *queue = data; >>> + >>> + pr_debug("queue %d: TLS handshake done, key %x, status %d\n", >>> + queue->idx, peerid, status); >>> + spin_lock_bh(&queue->state_lock); >>> + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) { >> >> Is this even possible? >> > I guess it can happen when the socket closes during handshake; the > daemon might still be sending a 'done' event but > nvmet_tcp_schedule_release_queue() has been called. Umm, if the socket closes during the handshake then the state is NVMET_TCP_Q_TLS_HANDSHAKE. p.s. you call handshake cancel in the release flow so you should be fenced properly no?
On 8/14/23 21:12, Sagi Grimberg wrote: > >>>> @@ -1864,6 +1877,14 @@ static struct config_group >>>> *nvmet_ports_make(struct config_group *group, >>>> return ERR_PTR(-ENOMEM); >>>> } >>>> + if (nvme_keyring_id()) { >>>> + port->keyring = key_lookup(nvme_keyring_id()); >>>> + if (IS_ERR(port->keyring)) { >>>> + pr_warn("NVMe keyring not available, disabling TLS\n"); >>>> + port->keyring = NULL; >>> >>> why setting this to NULL? >>> >> It's check when changing TSAS; we can only enable TLS if the nvme >> keyring is available. > > ok > >> >>>> + } >>>> + } >>>> + >>>> for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) { >>>> if (i == NVMET_DEFAULT_ANA_GRPID) >>>> port->ana_state[1] = NVME_ANA_OPTIMIZED; >>>> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h >>>> index 8cfd60f3b564..7f9ae53c1df5 100644 >>>> --- a/drivers/nvme/target/nvmet.h >>>> +++ b/drivers/nvme/target/nvmet.h >>>> @@ -158,6 +158,7 @@ struct nvmet_port { >>>> struct config_group ana_groups_group; >>>> struct nvmet_ana_group ana_default_group; >>>> enum nvme_ana_state *ana_state; >>>> + struct key *keyring; >>>> void *priv; >>>> bool enabled; >>>> int inline_data_size; >>>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c >>>> index f19ea9d923fd..77fa339008e1 100644 >>>> --- a/drivers/nvme/target/tcp.c >>>> +++ b/drivers/nvme/target/tcp.c >>>> @@ -8,9 +8,13 @@ >>>> #include <linux/init.h> >>>> #include <linux/slab.h> >>>> #include <linux/err.h> >>>> +#include <linux/key.h> >>>> #include <linux/nvme-tcp.h> >>>> +#include <linux/nvme-keyring.h> >>>> #include <net/sock.h> >>>> #include <net/tcp.h> >>>> +#include <net/tls.h> >>>> +#include <net/handshake.h> >>>> #include <linux/inet.h> >>>> #include <linux/llist.h> >>>> #include <crypto/hash.h> >>>> @@ -66,6 +70,16 @@ device_param_cb(idle_poll_period_usecs, >>>> &set_param_ops, >>>> MODULE_PARM_DESC(idle_poll_period_usecs, >>>> "nvmet tcp io_work poll till idle time period in usecs: >>>> Default 0"); >>>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS >>>> +/* >>>> + * TLS handshake timeout >>>> + */ >>>> +static int tls_handshake_timeout = 10; >>>> +module_param(tls_handshake_timeout, int, 0644); >>>> +MODULE_PARM_DESC(tls_handshake_timeout, >>>> + "nvme TLS handshake timeout in seconds (default 10)"); >>>> +#endif >>>> + >>>> #define NVMET_TCP_RECV_BUDGET 8 >>>> #define NVMET_TCP_SEND_BUDGET 8 >>>> #define NVMET_TCP_IO_WORK_BUDGET 64 >>>> @@ -122,11 +136,13 @@ struct nvmet_tcp_cmd { >>>> enum nvmet_tcp_queue_state { >>>> NVMET_TCP_Q_CONNECTING, >>>> + NVMET_TCP_Q_TLS_HANDSHAKE, >>>> NVMET_TCP_Q_LIVE, >>>> NVMET_TCP_Q_DISCONNECTING, >>>> }; >>>> struct nvmet_tcp_queue { >>>> + struct kref kref; >>> >>> Why is kref the first member of the struct? >>> >> Habit. >> I don't mind where it'll end up. > > Move it to the back together with the tls section. > >> >>>> struct socket *sock; >>>> struct nvmet_tcp_port *port; >>>> struct work_struct io_work; >>>> @@ -155,6 +171,10 @@ struct nvmet_tcp_queue { >>>> struct ahash_request *snd_hash; >>>> struct ahash_request *rcv_hash; >>>> + /* TLS state */ >>>> + key_serial_t tls_pskid; >>>> + struct delayed_work tls_handshake_work; >>>> + >>>> unsigned long poll_end; >>>> spinlock_t state_lock; >>>> @@ -1283,12 +1303,21 @@ static int nvmet_tcp_try_recv(struct >>>> nvmet_tcp_queue *queue, >>>> return ret; >>>> } >>>> +static void nvmet_tcp_release_queue(struct kref *kref) >>>> +{ >>>> + struct nvmet_tcp_queue *queue = >>>> + container_of(kref, struct nvmet_tcp_queue, kref); >>>> + >>>> + WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING); >>>> + queue_work(nvmet_wq, &queue->release_work); >>>> +} >>>> + >>>> static void nvmet_tcp_schedule_release_queue(struct >>>> nvmet_tcp_queue *queue) >>>> { >>>> spin_lock_bh(&queue->state_lock); >>>> if (queue->state != NVMET_TCP_Q_DISCONNECTING) { >>>> queue->state = NVMET_TCP_Q_DISCONNECTING; >>>> - queue_work(nvmet_wq, &queue->release_work); >>>> + kref_put(&queue->kref, nvmet_tcp_release_queue); >>>> } >>>> spin_unlock_bh(&queue->state_lock); >>>> } >>>> @@ -1485,6 +1514,8 @@ static void >>>> nvmet_tcp_release_queue_work(struct work_struct *w) >>>> mutex_unlock(&nvmet_tcp_queue_mutex); >>>> nvmet_tcp_restore_socket_callbacks(queue); >>>> + tls_handshake_cancel(queue->sock->sk); >>>> + cancel_delayed_work_sync(&queue->tls_handshake_work); >>> >>> We should call it tls_handshake_tmo_work or something to make it >>> clear it is a timeout work. >>> >> Okay. >> >>>> cancel_work_sync(&queue->io_work); >>>> /* stop accepting incoming data */ >>>> queue->rcv_state = NVMET_TCP_RECV_ERR; >>>> @@ -1512,8 +1543,13 @@ static void nvmet_tcp_data_ready(struct sock >>>> *sk) >>>> read_lock_bh(&sk->sk_callback_lock); >>>> queue = sk->sk_user_data; >>>> - if (likely(queue)) >>>> - queue_work_on(queue_cpu(queue), nvmet_tcp_wq, >>>> &queue->io_work); >>>> + if (likely(queue)) { >>>> + if (queue->data_ready) >>>> + queue->data_ready(sk); >>>> + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) >>>> + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, >>>> + &queue->io_work); >>>> + } >>>> read_unlock_bh(&sk->sk_callback_lock); >>>> } >>>> @@ -1621,6 +1657,83 @@ static int nvmet_tcp_set_queue_sock(struct >>>> nvmet_tcp_queue *queue) >>>> return ret; >>>> } >>>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS >>>> +static void nvmet_tcp_tls_handshake_done(void *data, int status, >>>> + key_serial_t peerid) >>>> +{ >>>> + struct nvmet_tcp_queue *queue = data; >>>> + >>>> + pr_debug("queue %d: TLS handshake done, key %x, status %d\n", >>>> + queue->idx, peerid, status); >>>> + spin_lock_bh(&queue->state_lock); >>>> + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) { >>> >>> Is this even possible? >>> >> I guess it can happen when the socket closes during handshake; the >> daemon might still be sending a 'done' event but >> nvmet_tcp_schedule_release_queue() has been called. > > Umm, if the socket closes during the handshake then the state > is NVMET_TCP_Q_TLS_HANDSHAKE. > But there's a race window between setting it to NVMET_TCP_Q_DISCONNECTING and calling tls_handshake_cancel(). > p.s. you call handshake cancel in the release flow so you should be > fenced properly no? Not really. But I'll check if I can fix it up. Cheers, Hannes
>>>>> @@ -1864,6 +1877,14 @@ static struct config_group >>>>> *nvmet_ports_make(struct config_group *group, >>>>> return ERR_PTR(-ENOMEM); >>>>> } >>>>> + if (nvme_keyring_id()) { >>>>> + port->keyring = key_lookup(nvme_keyring_id()); >>>>> + if (IS_ERR(port->keyring)) { >>>>> + pr_warn("NVMe keyring not available, disabling TLS\n"); >>>>> + port->keyring = NULL; >>>> >>>> why setting this to NULL? >>>> >>> It's check when changing TSAS; we can only enable TLS if the nvme >>> keyring is available. >> >> ok >> >>> >>>>> + } >>>>> + } >>>>> + >>>>> for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) { >>>>> if (i == NVMET_DEFAULT_ANA_GRPID) >>>>> port->ana_state[1] = NVME_ANA_OPTIMIZED; >>>>> diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h >>>>> index 8cfd60f3b564..7f9ae53c1df5 100644 >>>>> --- a/drivers/nvme/target/nvmet.h >>>>> +++ b/drivers/nvme/target/nvmet.h >>>>> @@ -158,6 +158,7 @@ struct nvmet_port { >>>>> struct config_group ana_groups_group; >>>>> struct nvmet_ana_group ana_default_group; >>>>> enum nvme_ana_state *ana_state; >>>>> + struct key *keyring; >>>>> void *priv; >>>>> bool enabled; >>>>> int inline_data_size; >>>>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c >>>>> index f19ea9d923fd..77fa339008e1 100644 >>>>> --- a/drivers/nvme/target/tcp.c >>>>> +++ b/drivers/nvme/target/tcp.c >>>>> @@ -8,9 +8,13 @@ >>>>> #include <linux/init.h> >>>>> #include <linux/slab.h> >>>>> #include <linux/err.h> >>>>> +#include <linux/key.h> >>>>> #include <linux/nvme-tcp.h> >>>>> +#include <linux/nvme-keyring.h> >>>>> #include <net/sock.h> >>>>> #include <net/tcp.h> >>>>> +#include <net/tls.h> >>>>> +#include <net/handshake.h> >>>>> #include <linux/inet.h> >>>>> #include <linux/llist.h> >>>>> #include <crypto/hash.h> >>>>> @@ -66,6 +70,16 @@ device_param_cb(idle_poll_period_usecs, >>>>> &set_param_ops, >>>>> MODULE_PARM_DESC(idle_poll_period_usecs, >>>>> "nvmet tcp io_work poll till idle time period in usecs: >>>>> Default 0"); >>>>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS >>>>> +/* >>>>> + * TLS handshake timeout >>>>> + */ >>>>> +static int tls_handshake_timeout = 10; >>>>> +module_param(tls_handshake_timeout, int, 0644); >>>>> +MODULE_PARM_DESC(tls_handshake_timeout, >>>>> + "nvme TLS handshake timeout in seconds (default 10)"); >>>>> +#endif >>>>> + >>>>> #define NVMET_TCP_RECV_BUDGET 8 >>>>> #define NVMET_TCP_SEND_BUDGET 8 >>>>> #define NVMET_TCP_IO_WORK_BUDGET 64 >>>>> @@ -122,11 +136,13 @@ struct nvmet_tcp_cmd { >>>>> enum nvmet_tcp_queue_state { >>>>> NVMET_TCP_Q_CONNECTING, >>>>> + NVMET_TCP_Q_TLS_HANDSHAKE, >>>>> NVMET_TCP_Q_LIVE, >>>>> NVMET_TCP_Q_DISCONNECTING, >>>>> }; >>>>> struct nvmet_tcp_queue { >>>>> + struct kref kref; >>>> >>>> Why is kref the first member of the struct? >>>> >>> Habit. >>> I don't mind where it'll end up. >> >> Move it to the back together with the tls section. >> >>> >>>>> struct socket *sock; >>>>> struct nvmet_tcp_port *port; >>>>> struct work_struct io_work; >>>>> @@ -155,6 +171,10 @@ struct nvmet_tcp_queue { >>>>> struct ahash_request *snd_hash; >>>>> struct ahash_request *rcv_hash; >>>>> + /* TLS state */ >>>>> + key_serial_t tls_pskid; >>>>> + struct delayed_work tls_handshake_work; >>>>> + >>>>> unsigned long poll_end; >>>>> spinlock_t state_lock; >>>>> @@ -1283,12 +1303,21 @@ static int nvmet_tcp_try_recv(struct >>>>> nvmet_tcp_queue *queue, >>>>> return ret; >>>>> } >>>>> +static void nvmet_tcp_release_queue(struct kref *kref) >>>>> +{ >>>>> + struct nvmet_tcp_queue *queue = >>>>> + container_of(kref, struct nvmet_tcp_queue, kref); >>>>> + >>>>> + WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING); >>>>> + queue_work(nvmet_wq, &queue->release_work); >>>>> +} >>>>> + >>>>> static void nvmet_tcp_schedule_release_queue(struct >>>>> nvmet_tcp_queue *queue) >>>>> { >>>>> spin_lock_bh(&queue->state_lock); >>>>> if (queue->state != NVMET_TCP_Q_DISCONNECTING) { >>>>> queue->state = NVMET_TCP_Q_DISCONNECTING; >>>>> - queue_work(nvmet_wq, &queue->release_work); >>>>> + kref_put(&queue->kref, nvmet_tcp_release_queue); >>>>> } >>>>> spin_unlock_bh(&queue->state_lock); >>>>> } >>>>> @@ -1485,6 +1514,8 @@ static void >>>>> nvmet_tcp_release_queue_work(struct work_struct *w) >>>>> mutex_unlock(&nvmet_tcp_queue_mutex); >>>>> nvmet_tcp_restore_socket_callbacks(queue); >>>>> + tls_handshake_cancel(queue->sock->sk); >>>>> + cancel_delayed_work_sync(&queue->tls_handshake_work); >>>> >>>> We should call it tls_handshake_tmo_work or something to make it >>>> clear it is a timeout work. >>>> >>> Okay. >>> >>>>> cancel_work_sync(&queue->io_work); >>>>> /* stop accepting incoming data */ >>>>> queue->rcv_state = NVMET_TCP_RECV_ERR; >>>>> @@ -1512,8 +1543,13 @@ static void nvmet_tcp_data_ready(struct sock >>>>> *sk) >>>>> read_lock_bh(&sk->sk_callback_lock); >>>>> queue = sk->sk_user_data; >>>>> - if (likely(queue)) >>>>> - queue_work_on(queue_cpu(queue), nvmet_tcp_wq, >>>>> &queue->io_work); >>>>> + if (likely(queue)) { >>>>> + if (queue->data_ready) >>>>> + queue->data_ready(sk); >>>>> + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) >>>>> + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, >>>>> + &queue->io_work); >>>>> + } >>>>> read_unlock_bh(&sk->sk_callback_lock); >>>>> } >>>>> @@ -1621,6 +1657,83 @@ static int nvmet_tcp_set_queue_sock(struct >>>>> nvmet_tcp_queue *queue) >>>>> return ret; >>>>> } >>>>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS >>>>> +static void nvmet_tcp_tls_handshake_done(void *data, int status, >>>>> + key_serial_t peerid) >>>>> +{ >>>>> + struct nvmet_tcp_queue *queue = data; >>>>> + >>>>> + pr_debug("queue %d: TLS handshake done, key %x, status %d\n", >>>>> + queue->idx, peerid, status); >>>>> + spin_lock_bh(&queue->state_lock); >>>>> + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) { >>>> >>>> Is this even possible? >>>> >>> I guess it can happen when the socket closes during handshake; the >>> daemon might still be sending a 'done' event but >>> nvmet_tcp_schedule_release_queue() has been called. >> >> Umm, if the socket closes during the handshake then the state >> is NVMET_TCP_Q_TLS_HANDSHAKE. >> > But there's a race window between setting it to > NVMET_TCP_Q_DISCONNECTING and calling tls_handshake_cancel(). > >> p.s. you call handshake cancel in the release flow so you should be >> fenced properly no? > Not really. But I'll check if I can fix it up. The teardown handling feels complicated to me. How are you testing it btw?
On 8/15/23 09:01, Sagi Grimberg wrote: > >>>>>> @@ -1864,6 +1877,14 @@ static struct config_group >>>>>> *nvmet_ports_make(struct config_group *group, >>>>>> return ERR_PTR(-ENOMEM); >>>>>> } >>>>>> + if (nvme_keyring_id()) { >>>>>> + port->keyring = key_lookup(nvme_keyring_id()); >>>>>> + if (IS_ERR(port->keyring)) { >>>>>> + pr_warn("NVMe keyring not available, disabling TLS\n"); >>>>>> + port->keyring = NULL; >>>>> >>>>> why setting this to NULL? >>>>> >>>> It's check when changing TSAS; we can only enable TLS if the nvme >>>> keyring is available. >>> >>> ok >>> >>>> >>>>>> + } >>>>>> + } >>>>>> + >>>>>> for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) { >>>>>> if (i == NVMET_DEFAULT_ANA_GRPID) >>>>>> port->ana_state[1] = NVME_ANA_OPTIMIZED; >>>>>> diff --git a/drivers/nvme/target/nvmet.h >>>>>> b/drivers/nvme/target/nvmet.h >>>>>> index 8cfd60f3b564..7f9ae53c1df5 100644 >>>>>> --- a/drivers/nvme/target/nvmet.h >>>>>> +++ b/drivers/nvme/target/nvmet.h >>>>>> @@ -158,6 +158,7 @@ struct nvmet_port { >>>>>> struct config_group ana_groups_group; >>>>>> struct nvmet_ana_group ana_default_group; >>>>>> enum nvme_ana_state *ana_state; >>>>>> + struct key *keyring; >>>>>> void *priv; >>>>>> bool enabled; >>>>>> int inline_data_size; >>>>>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c >>>>>> index f19ea9d923fd..77fa339008e1 100644 >>>>>> --- a/drivers/nvme/target/tcp.c >>>>>> +++ b/drivers/nvme/target/tcp.c >>>>>> @@ -8,9 +8,13 @@ >>>>>> #include <linux/init.h> >>>>>> #include <linux/slab.h> >>>>>> #include <linux/err.h> >>>>>> +#include <linux/key.h> >>>>>> #include <linux/nvme-tcp.h> >>>>>> +#include <linux/nvme-keyring.h> >>>>>> #include <net/sock.h> >>>>>> #include <net/tcp.h> >>>>>> +#include <net/tls.h> >>>>>> +#include <net/handshake.h> >>>>>> #include <linux/inet.h> >>>>>> #include <linux/llist.h> >>>>>> #include <crypto/hash.h> >>>>>> @@ -66,6 +70,16 @@ device_param_cb(idle_poll_period_usecs, >>>>>> &set_param_ops, >>>>>> MODULE_PARM_DESC(idle_poll_period_usecs, >>>>>> "nvmet tcp io_work poll till idle time period in usecs: >>>>>> Default 0"); >>>>>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS >>>>>> +/* >>>>>> + * TLS handshake timeout >>>>>> + */ >>>>>> +static int tls_handshake_timeout = 10; >>>>>> +module_param(tls_handshake_timeout, int, 0644); >>>>>> +MODULE_PARM_DESC(tls_handshake_timeout, >>>>>> + "nvme TLS handshake timeout in seconds (default 10)"); >>>>>> +#endif >>>>>> + >>>>>> #define NVMET_TCP_RECV_BUDGET 8 >>>>>> #define NVMET_TCP_SEND_BUDGET 8 >>>>>> #define NVMET_TCP_IO_WORK_BUDGET 64 >>>>>> @@ -122,11 +136,13 @@ struct nvmet_tcp_cmd { >>>>>> enum nvmet_tcp_queue_state { >>>>>> NVMET_TCP_Q_CONNECTING, >>>>>> + NVMET_TCP_Q_TLS_HANDSHAKE, >>>>>> NVMET_TCP_Q_LIVE, >>>>>> NVMET_TCP_Q_DISCONNECTING, >>>>>> }; >>>>>> struct nvmet_tcp_queue { >>>>>> + struct kref kref; >>>>> >>>>> Why is kref the first member of the struct? >>>>> >>>> Habit. >>>> I don't mind where it'll end up. >>> >>> Move it to the back together with the tls section. >>> >>>> >>>>>> struct socket *sock; >>>>>> struct nvmet_tcp_port *port; >>>>>> struct work_struct io_work; >>>>>> @@ -155,6 +171,10 @@ struct nvmet_tcp_queue { >>>>>> struct ahash_request *snd_hash; >>>>>> struct ahash_request *rcv_hash; >>>>>> + /* TLS state */ >>>>>> + key_serial_t tls_pskid; >>>>>> + struct delayed_work tls_handshake_work; >>>>>> + >>>>>> unsigned long poll_end; >>>>>> spinlock_t state_lock; >>>>>> @@ -1283,12 +1303,21 @@ static int nvmet_tcp_try_recv(struct >>>>>> nvmet_tcp_queue *queue, >>>>>> return ret; >>>>>> } >>>>>> +static void nvmet_tcp_release_queue(struct kref *kref) >>>>>> +{ >>>>>> + struct nvmet_tcp_queue *queue = >>>>>> + container_of(kref, struct nvmet_tcp_queue, kref); >>>>>> + >>>>>> + WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING); >>>>>> + queue_work(nvmet_wq, &queue->release_work); >>>>>> +} >>>>>> + >>>>>> static void nvmet_tcp_schedule_release_queue(struct >>>>>> nvmet_tcp_queue *queue) >>>>>> { >>>>>> spin_lock_bh(&queue->state_lock); >>>>>> if (queue->state != NVMET_TCP_Q_DISCONNECTING) { >>>>>> queue->state = NVMET_TCP_Q_DISCONNECTING; >>>>>> - queue_work(nvmet_wq, &queue->release_work); >>>>>> + kref_put(&queue->kref, nvmet_tcp_release_queue); >>>>>> } >>>>>> spin_unlock_bh(&queue->state_lock); >>>>>> } >>>>>> @@ -1485,6 +1514,8 @@ static void >>>>>> nvmet_tcp_release_queue_work(struct work_struct *w) >>>>>> mutex_unlock(&nvmet_tcp_queue_mutex); >>>>>> nvmet_tcp_restore_socket_callbacks(queue); >>>>>> + tls_handshake_cancel(queue->sock->sk); >>>>>> + cancel_delayed_work_sync(&queue->tls_handshake_work); >>>>> >>>>> We should call it tls_handshake_tmo_work or something to make it >>>>> clear it is a timeout work. >>>>> >>>> Okay. >>>> >>>>>> cancel_work_sync(&queue->io_work); >>>>>> /* stop accepting incoming data */ >>>>>> queue->rcv_state = NVMET_TCP_RECV_ERR; >>>>>> @@ -1512,8 +1543,13 @@ static void nvmet_tcp_data_ready(struct >>>>>> sock *sk) >>>>>> read_lock_bh(&sk->sk_callback_lock); >>>>>> queue = sk->sk_user_data; >>>>>> - if (likely(queue)) >>>>>> - queue_work_on(queue_cpu(queue), nvmet_tcp_wq, >>>>>> &queue->io_work); >>>>>> + if (likely(queue)) { >>>>>> + if (queue->data_ready) >>>>>> + queue->data_ready(sk); >>>>>> + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) >>>>>> + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, >>>>>> + &queue->io_work); >>>>>> + } >>>>>> read_unlock_bh(&sk->sk_callback_lock); >>>>>> } >>>>>> @@ -1621,6 +1657,83 @@ static int nvmet_tcp_set_queue_sock(struct >>>>>> nvmet_tcp_queue *queue) >>>>>> return ret; >>>>>> } >>>>>> +#ifdef CONFIG_NVME_TARGET_TCP_TLS >>>>>> +static void nvmet_tcp_tls_handshake_done(void *data, int status, >>>>>> + key_serial_t peerid) >>>>>> +{ >>>>>> + struct nvmet_tcp_queue *queue = data; >>>>>> + >>>>>> + pr_debug("queue %d: TLS handshake done, key %x, status %d\n", >>>>>> + queue->idx, peerid, status); >>>>>> + spin_lock_bh(&queue->state_lock); >>>>>> + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) { >>>>> >>>>> Is this even possible? >>>>> >>>> I guess it can happen when the socket closes during handshake; the >>>> daemon might still be sending a 'done' event but >>>> nvmet_tcp_schedule_release_queue() has been called. >>> >>> Umm, if the socket closes during the handshake then the state >>> is NVMET_TCP_Q_TLS_HANDSHAKE. >>> >> But there's a race window between setting it to >> NVMET_TCP_Q_DISCONNECTING and calling tls_handshake_cancel(). >> >>> p.s. you call handshake cancel in the release flow so you should be >>> fenced properly no? >> Not really. But I'll check if I can fix it up. > > The teardown handling feels complicated to me. > You tell me. TLS timeout handling always gets in the way. But I've reworked it now to look slightly better. > How are you testing it btw? As outlined in the patchset description. I've a target configuration running over the loopback interface. Will expand to have two VMs talking to each other; however, that needs more fiddling with the PSK deployment. Cheers, Hannes
>> How are you testing it btw? > > As outlined in the patchset description. > I've a target configuration running over the loopback interface. > > Will expand to have two VMs talking to each other; however, that > needs more fiddling with the PSK deployment. Was referring to the timeout part. Would maybe make sense to run a very short timeouts to see that is behaving...
On 8/15/23 15:34, Sagi Grimberg wrote: > >>> How are you testing it btw? >> >> As outlined in the patchset description. >> I've a target configuration running over the loopback interface. >> >> Will expand to have two VMs talking to each other; however, that >> needs more fiddling with the PSK deployment. > > Was referring to the timeout part. Would maybe make sense to > run a very short timeouts to see that is behaving... I'll see to patch it into tlshd. I used to trigger it quite easily during development, but now that things have stabilised of course it doesn't happen anymore. Kinda the point, I guess :-) Cheers, Hannes
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 79fc64035ee3..8a6c9cae804c 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -84,6 +84,21 @@ config NVME_TARGET_TCP If unsure, say N. +config NVME_TARGET_TCP_TLS + bool "NVMe over Fabrics TCP target TLS encryption support" + depends on NVME_TARGET_TCP + select NVME_COMMON + select NVME_KEYRING + select NET_HANDSHAKE + select KEYS + help + Enables TLS encryption for the NVMe TCP target using the netlink handshake API. + + The TLS handshake daemon is availble at + https://github.com/oracle/ktls-utils. + + If unsure, say N. + config NVME_TARGET_AUTH bool "NVMe over Fabrics In-band Authentication support" depends on NVME_TARGET diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index efbfed310370..ad1fb32c7387 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -15,6 +15,7 @@ #ifdef CONFIG_NVME_TARGET_AUTH #include <linux/nvme-auth.h> #endif +#include <linux/nvme-keyring.h> #include <crypto/hash.h> #include <crypto/kpp.h> @@ -397,6 +398,17 @@ static ssize_t nvmet_addr_tsas_store(struct config_item *item, return -EINVAL; found: + if (sectype == NVMF_TCP_SECTYPE_TLS13) { + if (!IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS)) { + pr_err("TLS is not supported\n"); + return -EINVAL; + } + if (!port->keyring) { + pr_err("TLS keyring not configured\n"); + return -EINVAL; + } + } + nvmet_port_init_tsas_tcp(port, sectype); /* * The TLS implementation currently does not support @@ -1815,6 +1827,7 @@ static void nvmet_port_release(struct config_item *item) flush_workqueue(nvmet_wq); list_del(&port->global_entry); + key_put(port->keyring); kfree(port->ana_state); kfree(port); } @@ -1864,6 +1877,14 @@ static struct config_group *nvmet_ports_make(struct config_group *group, return ERR_PTR(-ENOMEM); } + if (nvme_keyring_id()) { + port->keyring = key_lookup(nvme_keyring_id()); + if (IS_ERR(port->keyring)) { + pr_warn("NVMe keyring not available, disabling TLS\n"); + port->keyring = NULL; + } + } + for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) { if (i == NVMET_DEFAULT_ANA_GRPID) port->ana_state[1] = NVME_ANA_OPTIMIZED; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 8cfd60f3b564..7f9ae53c1df5 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -158,6 +158,7 @@ struct nvmet_port { struct config_group ana_groups_group; struct nvmet_ana_group ana_default_group; enum nvme_ana_state *ana_state; + struct key *keyring; void *priv; bool enabled; int inline_data_size; diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index f19ea9d923fd..77fa339008e1 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -8,9 +8,13 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/err.h> +#include <linux/key.h> #include <linux/nvme-tcp.h> +#include <linux/nvme-keyring.h> #include <net/sock.h> #include <net/tcp.h> +#include <net/tls.h> +#include <net/handshake.h> #include <linux/inet.h> #include <linux/llist.h> #include <crypto/hash.h> @@ -66,6 +70,16 @@ device_param_cb(idle_poll_period_usecs, &set_param_ops, MODULE_PARM_DESC(idle_poll_period_usecs, "nvmet tcp io_work poll till idle time period in usecs: Default 0"); +#ifdef CONFIG_NVME_TARGET_TCP_TLS +/* + * TLS handshake timeout + */ +static int tls_handshake_timeout = 10; +module_param(tls_handshake_timeout, int, 0644); +MODULE_PARM_DESC(tls_handshake_timeout, + "nvme TLS handshake timeout in seconds (default 10)"); +#endif + #define NVMET_TCP_RECV_BUDGET 8 #define NVMET_TCP_SEND_BUDGET 8 #define NVMET_TCP_IO_WORK_BUDGET 64 @@ -122,11 +136,13 @@ struct nvmet_tcp_cmd { enum nvmet_tcp_queue_state { NVMET_TCP_Q_CONNECTING, + NVMET_TCP_Q_TLS_HANDSHAKE, NVMET_TCP_Q_LIVE, NVMET_TCP_Q_DISCONNECTING, }; struct nvmet_tcp_queue { + struct kref kref; struct socket *sock; struct nvmet_tcp_port *port; struct work_struct io_work; @@ -155,6 +171,10 @@ struct nvmet_tcp_queue { struct ahash_request *snd_hash; struct ahash_request *rcv_hash; + /* TLS state */ + key_serial_t tls_pskid; + struct delayed_work tls_handshake_work; + unsigned long poll_end; spinlock_t state_lock; @@ -1283,12 +1303,21 @@ static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue, return ret; } +static void nvmet_tcp_release_queue(struct kref *kref) +{ + struct nvmet_tcp_queue *queue = + container_of(kref, struct nvmet_tcp_queue, kref); + + WARN_ON(queue->state != NVMET_TCP_Q_DISCONNECTING); + queue_work(nvmet_wq, &queue->release_work); +} + static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue) { spin_lock_bh(&queue->state_lock); if (queue->state != NVMET_TCP_Q_DISCONNECTING) { queue->state = NVMET_TCP_Q_DISCONNECTING; - queue_work(nvmet_wq, &queue->release_work); + kref_put(&queue->kref, nvmet_tcp_release_queue); } spin_unlock_bh(&queue->state_lock); } @@ -1485,6 +1514,8 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) mutex_unlock(&nvmet_tcp_queue_mutex); nvmet_tcp_restore_socket_callbacks(queue); + tls_handshake_cancel(queue->sock->sk); + cancel_delayed_work_sync(&queue->tls_handshake_work); cancel_work_sync(&queue->io_work); /* stop accepting incoming data */ queue->rcv_state = NVMET_TCP_RECV_ERR; @@ -1512,8 +1543,13 @@ static void nvmet_tcp_data_ready(struct sock *sk) read_lock_bh(&sk->sk_callback_lock); queue = sk->sk_user_data; - if (likely(queue)) - queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); + if (likely(queue)) { + if (queue->data_ready) + queue->data_ready(sk); + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, + &queue->io_work); + } read_unlock_bh(&sk->sk_callback_lock); } @@ -1621,6 +1657,83 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) return ret; } +#ifdef CONFIG_NVME_TARGET_TCP_TLS +static void nvmet_tcp_tls_handshake_done(void *data, int status, + key_serial_t peerid) +{ + struct nvmet_tcp_queue *queue = data; + + pr_debug("queue %d: TLS handshake done, key %x, status %d\n", + queue->idx, peerid, status); + spin_lock_bh(&queue->state_lock); + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) { + pr_warn("queue %d: TLS handshake already completed\n", + queue->idx); + spin_unlock_bh(&queue->state_lock); + kref_put(&queue->kref, nvmet_tcp_release_queue); + return; + } + if (!status) + queue->tls_pskid = peerid; + queue->state = NVMET_TCP_Q_CONNECTING; + spin_unlock_bh(&queue->state_lock); + + cancel_delayed_work_sync(&queue->tls_handshake_work); + if (status) { + kernel_sock_shutdown(queue->sock, SHUT_RDWR); + kref_put(&queue->kref, nvmet_tcp_release_queue); + return; + } + + pr_debug("queue %d: resetting queue callbacks after TLS handshake\n", + queue->idx); + nvmet_tcp_set_queue_sock(queue); + kref_put(&queue->kref, nvmet_tcp_release_queue); +} + +static void nvmet_tcp_tls_handshake_timeout_work(struct work_struct *w) +{ + struct nvmet_tcp_queue *queue = container_of(to_delayed_work(w), + struct nvmet_tcp_queue, tls_handshake_work); + + pr_debug("queue %d: TLS handshake timeout\n", queue->idx); + if (!tls_handshake_cancel(queue->sock->sk)) + return; + kernel_sock_shutdown(queue->sock, SHUT_RDWR); + kref_put(&queue->kref, nvmet_tcp_release_queue); +} + +static int nvmet_tcp_tls_handshake(struct nvmet_tcp_queue *queue) +{ + int ret = -EOPNOTSUPP; + struct tls_handshake_args args; + + if (queue->state != NVMET_TCP_Q_TLS_HANDSHAKE) { + pr_warn("cannot start TLS in state %d\n", queue->state); + return -EINVAL; + } + + kref_get(&queue->kref); + pr_debug("queue %d: TLS ServerHello\n", queue->idx); + memset(&args, 0, sizeof(args)); + args.ta_sock = queue->sock; + args.ta_done = nvmet_tcp_tls_handshake_done; + args.ta_data = queue; + args.ta_keyring = key_serial(queue->port->nport->keyring); + args.ta_timeout_ms = tls_handshake_timeout * 1000; + + ret = tls_server_hello_psk(&args, GFP_KERNEL); + if (ret) { + kref_put(&queue->kref, nvmet_tcp_release_queue); + pr_err("failed to start TLS, err=%d\n", ret); + } else { + queue_delayed_work(nvmet_wq, &queue->tls_handshake_work, + tls_handshake_timeout * HZ); + } + return ret; +} +#endif + static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, struct socket *newsock) { @@ -1636,11 +1749,16 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work); INIT_WORK(&queue->io_work, nvmet_tcp_io_work); + kref_init(&queue->kref); queue->sock = newsock; queue->port = port; queue->nr_cmds = 0; spin_lock_init(&queue->state_lock); - queue->state = NVMET_TCP_Q_CONNECTING; + if (queue->port->nport->disc_addr.tsas.tcp.sectype == + NVMF_TCP_SECTYPE_TLS13) + queue->state = NVMET_TCP_Q_TLS_HANDSHAKE; + else + queue->state = NVMET_TCP_Q_CONNECTING; INIT_LIST_HEAD(&queue->free_list); init_llist_head(&queue->resp_list); INIT_LIST_HEAD(&queue->resp_send_list); @@ -1671,12 +1789,32 @@ static void nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list); mutex_unlock(&nvmet_tcp_queue_mutex); +#ifdef CONFIG_NVME_TARGET_TCP_TLS + INIT_DELAYED_WORK(&queue->tls_handshake_work, + nvmet_tcp_tls_handshake_timeout_work); + if (queue->state == NVMET_TCP_Q_TLS_HANDSHAKE) { + struct sock *sk = queue->sock->sk; + + /* Restore the default callbacks before starting upcall */ + read_lock_bh(&sk->sk_callback_lock); + sk->sk_user_data = NULL; + sk->sk_data_ready = port->data_ready; + read_unlock_bh(&sk->sk_callback_lock); + if (!nvmet_tcp_tls_handshake(queue)) + return; + + /* TLS handshake failed, terminate the connection */ + goto out_destroy_sq; + } +#endif + ret = nvmet_tcp_set_queue_sock(queue); if (ret) goto out_destroy_sq; return; out_destroy_sq: + queue->state = NVMET_TCP_Q_DISCONNECTING; mutex_lock(&nvmet_tcp_queue_mutex); list_del_init(&queue->queue_list); mutex_unlock(&nvmet_tcp_queue_mutex);
Add functions to start the TLS handshake upcall when the TCP TSAS sectype is set to 'tls1.3' and add a config option NVME_TARGET_TCP_TLS. Signed-off-by: Hannes Reinecke <hare@suse.de> --- drivers/nvme/target/Kconfig | 15 ++++ drivers/nvme/target/configfs.c | 21 +++++ drivers/nvme/target/nvmet.h | 1 + drivers/nvme/target/tcp.c | 146 ++++++++++++++++++++++++++++++++- 4 files changed, 179 insertions(+), 4 deletions(-)