Message ID | 20230721143523.56906-7-hare@suse.de (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net/tls: fixes for NVMe-over-TLS | expand |
On 7/21/23 17:35, Hannes Reinecke wrote: > Implement ->read_sock() function for use with nvme-tcp. > > Signed-off-by: Hannes Reinecke <hare@suse.de> > Reviewed-by: Sagi Grimberg <sagi@grimberg.me> > Cc: Boris Pismenny <boris.pismenny@gmail.com> > Cc: Jakub Kicinski <kuba@kernel.org> > Cc: netdev@vger.kernel.org > --- > net/tls/tls.h | 2 ++ > net/tls/tls_main.c | 2 ++ > net/tls/tls_sw.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 93 insertions(+) > > diff --git a/net/tls/tls.h b/net/tls/tls.h > index 86cef1c68e03..7e4d45537deb 100644 > --- a/net/tls/tls.h > +++ b/net/tls/tls.h > @@ -110,6 +110,8 @@ bool tls_sw_sock_is_readable(struct sock *sk); > ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, > struct pipe_inode_info *pipe, > size_t len, unsigned int flags); > +int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, > + sk_read_actor_t read_actor); > > int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); > void tls_device_splice_eof(struct socket *sock); > diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c > index b6896126bb92..7dbb8cd8f809 100644 > --- a/net/tls/tls_main.c > +++ b/net/tls/tls_main.c > @@ -962,10 +962,12 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG] > ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE]; > ops[TLS_BASE][TLS_SW ].splice_read = tls_sw_splice_read; > ops[TLS_BASE][TLS_SW ].poll = tls_sk_poll; > + ops[TLS_BASE][TLS_SW ].read_sock = tls_sw_read_sock; > > ops[TLS_SW ][TLS_SW ] = ops[TLS_SW ][TLS_BASE]; > ops[TLS_SW ][TLS_SW ].splice_read = tls_sw_splice_read; > ops[TLS_SW ][TLS_SW ].poll = tls_sk_poll; > + ops[TLS_SW ][TLS_SW ].read_sock = tls_sw_read_sock; > > #ifdef CONFIG_TLS_DEVICE > ops[TLS_HW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE]; > diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c > index d0636ea13009..f7ffbe7620cb 100644 > --- a/net/tls/tls_sw.c > +++ b/net/tls/tls_sw.c > @@ -2202,6 +2202,95 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, > goto splice_read_end; > } > > +int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, > + sk_read_actor_t read_actor) > +{ > + struct tls_context *tls_ctx = tls_get_ctx(sk); > + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); > + struct strp_msg *rxm = NULL; > + struct sk_buff *skb = NULL; > + struct sk_psock *psock; > + struct tls_msg *tlm; > + ssize_t copied = 0; > + int err, used; > + > + psock = sk_psock_get(sk); > + if (psock) { > + sk_psock_put(sk, psock); > + return -EINVAL; > + } > + err = tls_rx_reader_acquire(sk, ctx, true); > + if (err < 0) > + return err; > + > + /* If crypto failed the connection is broken */ > + err = ctx->async_wait.err; > + if (err) > + goto read_sock_end; > + > + do { > + if (!skb_queue_empty(&ctx->rx_list)) { > + skb = __skb_dequeue(&ctx->rx_list); > + rxm = strp_msg(skb); > + tlm = tls_msg(skb); > + } else { > + struct tls_decrypt_arg darg; > + > + err = tls_rx_rec_wait(sk, NULL, true, true); > + if (err <= 0) > + goto read_sock_end; > + > + memset(&darg.inargs, 0, sizeof(darg.inargs)); > + > + rxm = strp_msg(tls_strp_msg(ctx)); > + tlm = tls_msg(tls_strp_msg(ctx)); > + > + err = tls_rx_one_record(sk, NULL, &darg); > + if (err < 0) { > + tls_err_abort(sk, -EBADMSG); > + goto read_sock_end; > + } > + > + sk_flush_backlog(sk); Question, Based on Jakub's comment, the flush is better spaced out. Why not just do it once at the end? Or alternatively, call tls_read_flush_backlog() ? Or just count by hand every 4 records or 128K (and once in the end)? I don't really know what would be the impact though, but you are effectively releasing and re-acquiring the socket flushing the backlog every record...
On 7/24/23 14:59, Sagi Grimberg wrote: > > > On 7/21/23 17:35, Hannes Reinecke wrote: >> Implement ->read_sock() function for use with nvme-tcp. >> >> Signed-off-by: Hannes Reinecke <hare@suse.de> >> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> >> Cc: Boris Pismenny <boris.pismenny@gmail.com> >> Cc: Jakub Kicinski <kuba@kernel.org> >> Cc: netdev@vger.kernel.org >> --- >> net/tls/tls.h | 2 ++ >> net/tls/tls_main.c | 2 ++ >> net/tls/tls_sw.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++ >> 3 files changed, 93 insertions(+) >> >> diff --git a/net/tls/tls.h b/net/tls/tls.h >> index 86cef1c68e03..7e4d45537deb 100644 >> --- a/net/tls/tls.h >> +++ b/net/tls/tls.h >> @@ -110,6 +110,8 @@ bool tls_sw_sock_is_readable(struct sock *sk); >> ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, >> struct pipe_inode_info *pipe, >> size_t len, unsigned int flags); >> +int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, >> + sk_read_actor_t read_actor); >> int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t >> size); >> void tls_device_splice_eof(struct socket *sock); >> diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c >> index b6896126bb92..7dbb8cd8f809 100644 >> --- a/net/tls/tls_main.c >> +++ b/net/tls/tls_main.c >> @@ -962,10 +962,12 @@ static void build_proto_ops(struct proto_ops >> ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG] >> ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE]; >> ops[TLS_BASE][TLS_SW ].splice_read = tls_sw_splice_read; >> ops[TLS_BASE][TLS_SW ].poll = tls_sk_poll; >> + ops[TLS_BASE][TLS_SW ].read_sock = tls_sw_read_sock; >> ops[TLS_SW ][TLS_SW ] = ops[TLS_SW ][TLS_BASE]; >> ops[TLS_SW ][TLS_SW ].splice_read = tls_sw_splice_read; >> ops[TLS_SW ][TLS_SW ].poll = tls_sk_poll; >> + ops[TLS_SW ][TLS_SW ].read_sock = tls_sw_read_sock; >> #ifdef CONFIG_TLS_DEVICE >> ops[TLS_HW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE]; >> diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c >> index d0636ea13009..f7ffbe7620cb 100644 >> --- a/net/tls/tls_sw.c >> +++ b/net/tls/tls_sw.c >> @@ -2202,6 +2202,95 @@ ssize_t tls_sw_splice_read(struct socket >> *sock, loff_t *ppos, >> goto splice_read_end; >> } >> +int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, >> + sk_read_actor_t read_actor) >> +{ >> + struct tls_context *tls_ctx = tls_get_ctx(sk); >> + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); >> + struct strp_msg *rxm = NULL; >> + struct sk_buff *skb = NULL; >> + struct sk_psock *psock; >> + struct tls_msg *tlm; >> + ssize_t copied = 0; >> + int err, used; >> + >> + psock = sk_psock_get(sk); >> + if (psock) { >> + sk_psock_put(sk, psock); >> + return -EINVAL; >> + } >> + err = tls_rx_reader_acquire(sk, ctx, true); >> + if (err < 0) >> + return err; >> + >> + /* If crypto failed the connection is broken */ >> + err = ctx->async_wait.err; >> + if (err) >> + goto read_sock_end; >> + >> + do { >> + if (!skb_queue_empty(&ctx->rx_list)) { >> + skb = __skb_dequeue(&ctx->rx_list); >> + rxm = strp_msg(skb); >> + tlm = tls_msg(skb); >> + } else { >> + struct tls_decrypt_arg darg; >> + >> + err = tls_rx_rec_wait(sk, NULL, true, true); >> + if (err <= 0) >> + goto read_sock_end; >> + >> + memset(&darg.inargs, 0, sizeof(darg.inargs)); >> + >> + rxm = strp_msg(tls_strp_msg(ctx)); >> + tlm = tls_msg(tls_strp_msg(ctx)); >> + >> + err = tls_rx_one_record(sk, NULL, &darg); >> + if (err < 0) { >> + tls_err_abort(sk, -EBADMSG); >> + goto read_sock_end; >> + } >> + >> + sk_flush_backlog(sk); > > Question, > Based on Jakub's comment, the flush is better spaced out. > Why not just do it once at the end? Or alternatively, > call tls_read_flush_backlog() ? Or just count by hand > every 4 records or 128K (and once in the end)? > > I don't really know what would be the impact though, but > you are effectively releasing and re-acquiring the socket > flushing the backlog every record... I really have no idea. I'll see to modify it to use tls_read_flush_backlog(). Cheers, Hannes
diff --git a/net/tls/tls.h b/net/tls/tls.h index 86cef1c68e03..7e4d45537deb 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -110,6 +110,8 @@ bool tls_sw_sock_is_readable(struct sock *sk); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); +int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t read_actor); int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); void tls_device_splice_eof(struct socket *sock); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index b6896126bb92..7dbb8cd8f809 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -962,10 +962,12 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG] ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE]; ops[TLS_BASE][TLS_SW ].splice_read = tls_sw_splice_read; ops[TLS_BASE][TLS_SW ].poll = tls_sk_poll; + ops[TLS_BASE][TLS_SW ].read_sock = tls_sw_read_sock; ops[TLS_SW ][TLS_SW ] = ops[TLS_SW ][TLS_BASE]; ops[TLS_SW ][TLS_SW ].splice_read = tls_sw_splice_read; ops[TLS_SW ][TLS_SW ].poll = tls_sk_poll; + ops[TLS_SW ][TLS_SW ].read_sock = tls_sw_read_sock; #ifdef CONFIG_TLS_DEVICE ops[TLS_HW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE]; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d0636ea13009..f7ffbe7620cb 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2202,6 +2202,95 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, goto splice_read_end; } +int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t read_actor) +{ + struct tls_context *tls_ctx = tls_get_ctx(sk); + struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + struct strp_msg *rxm = NULL; + struct sk_buff *skb = NULL; + struct sk_psock *psock; + struct tls_msg *tlm; + ssize_t copied = 0; + int err, used; + + psock = sk_psock_get(sk); + if (psock) { + sk_psock_put(sk, psock); + return -EINVAL; + } + err = tls_rx_reader_acquire(sk, ctx, true); + if (err < 0) + return err; + + /* If crypto failed the connection is broken */ + err = ctx->async_wait.err; + if (err) + goto read_sock_end; + + do { + if (!skb_queue_empty(&ctx->rx_list)) { + skb = __skb_dequeue(&ctx->rx_list); + rxm = strp_msg(skb); + tlm = tls_msg(skb); + } else { + struct tls_decrypt_arg darg; + + err = tls_rx_rec_wait(sk, NULL, true, true); + if (err <= 0) + goto read_sock_end; + + memset(&darg.inargs, 0, sizeof(darg.inargs)); + + rxm = strp_msg(tls_strp_msg(ctx)); + tlm = tls_msg(tls_strp_msg(ctx)); + + err = tls_rx_one_record(sk, NULL, &darg); + if (err < 0) { + tls_err_abort(sk, -EBADMSG); + goto read_sock_end; + } + + sk_flush_backlog(sk); + skb = darg.skb; + + tls_rx_rec_done(ctx); + } + + /* read_sock does not support reading control messages */ + if (tlm->control != TLS_RECORD_TYPE_DATA) { + err = -EINVAL; + goto read_sock_requeue; + } + + used = read_actor(desc, skb, rxm->offset, rxm->full_len); + if (used <= 0) { + if (!copied) + err = used; + goto read_sock_requeue; + } + copied += used; + if (used < rxm->full_len) { + rxm->offset += used; + rxm->full_len -= used; + if (!desc->count) + goto read_sock_requeue; + } else { + consume_skb(skb); + if (!desc->count) + skb = NULL; + } + } while (skb); + +read_sock_end: + tls_rx_reader_release(sk, ctx); + return copied ? : err; + +read_sock_requeue: + __skb_queue_head(&ctx->rx_list, skb); + goto read_sock_end; +} + bool tls_sw_sock_is_readable(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk);