diff mbox series

[6/6] net/tls: implement ->read_sock()

Message ID 20230721143523.56906-7-hare@suse.de (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net/tls: fixes for NVMe-over-TLS | expand

Checks

Context Check Description
netdev/series_format warning Target tree name not specified in the subject
netdev/tree_selection success Guessed tree name to be net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1342 this patch: 1342
netdev/cc_maintainers warning 3 maintainers not CCed: borisp@nvidia.com john.fastabend@gmail.com davem@davemloft.net
netdev/build_clang success Errors and warnings before: 1365 this patch: 1365
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1365 this patch: 1365
netdev/checkpatch fail ERROR: space prohibited before that close square bracket ']'
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Hannes Reinecke July 21, 2023, 2:35 p.m. UTC
Implement ->read_sock() function for use with nvme-tcp.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Cc: Boris Pismenny <boris.pismenny@gmail.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: netdev@vger.kernel.org
---
 net/tls/tls.h      |  2 ++
 net/tls/tls_main.c |  2 ++
 net/tls/tls_sw.c   | 89 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 93 insertions(+)

Comments

Sagi Grimberg July 24, 2023, 12:59 p.m. UTC | #1
On 7/21/23 17:35, Hannes Reinecke wrote:
> Implement ->read_sock() function for use with nvme-tcp.
> 
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
> Cc: Boris Pismenny <boris.pismenny@gmail.com>
> Cc: Jakub Kicinski <kuba@kernel.org>
> Cc: netdev@vger.kernel.org
> ---
>   net/tls/tls.h      |  2 ++
>   net/tls/tls_main.c |  2 ++
>   net/tls/tls_sw.c   | 89 ++++++++++++++++++++++++++++++++++++++++++++++
>   3 files changed, 93 insertions(+)
> 
> diff --git a/net/tls/tls.h b/net/tls/tls.h
> index 86cef1c68e03..7e4d45537deb 100644
> --- a/net/tls/tls.h
> +++ b/net/tls/tls.h
> @@ -110,6 +110,8 @@ bool tls_sw_sock_is_readable(struct sock *sk);
>   ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
>   			   struct pipe_inode_info *pipe,
>   			   size_t len, unsigned int flags);
> +int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc,
> +		     sk_read_actor_t read_actor);
>   
>   int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
>   void tls_device_splice_eof(struct socket *sock);
> diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
> index b6896126bb92..7dbb8cd8f809 100644
> --- a/net/tls/tls_main.c
> +++ b/net/tls/tls_main.c
> @@ -962,10 +962,12 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG]
>   	ops[TLS_BASE][TLS_SW  ] = ops[TLS_BASE][TLS_BASE];
>   	ops[TLS_BASE][TLS_SW  ].splice_read	= tls_sw_splice_read;
>   	ops[TLS_BASE][TLS_SW  ].poll		= tls_sk_poll;
> +	ops[TLS_BASE][TLS_SW  ].read_sock	= tls_sw_read_sock;
>   
>   	ops[TLS_SW  ][TLS_SW  ] = ops[TLS_SW  ][TLS_BASE];
>   	ops[TLS_SW  ][TLS_SW  ].splice_read	= tls_sw_splice_read;
>   	ops[TLS_SW  ][TLS_SW  ].poll		= tls_sk_poll;
> +	ops[TLS_SW  ][TLS_SW  ].read_sock	= tls_sw_read_sock;
>   
>   #ifdef CONFIG_TLS_DEVICE
>   	ops[TLS_HW  ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
> diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
> index d0636ea13009..f7ffbe7620cb 100644
> --- a/net/tls/tls_sw.c
> +++ b/net/tls/tls_sw.c
> @@ -2202,6 +2202,95 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
>   	goto splice_read_end;
>   }
>   
> +int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc,
> +		     sk_read_actor_t read_actor)
> +{
> +	struct tls_context *tls_ctx = tls_get_ctx(sk);
> +	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
> +	struct strp_msg *rxm = NULL;
> +	struct sk_buff *skb = NULL;
> +	struct sk_psock *psock;
> +	struct tls_msg *tlm;
> +	ssize_t copied = 0;
> +	int err, used;
> +
> +	psock = sk_psock_get(sk);
> +	if (psock) {
> +		sk_psock_put(sk, psock);
> +		return -EINVAL;
> +	}
> +	err = tls_rx_reader_acquire(sk, ctx, true);
> +	if (err < 0)
> +		return err;
> +
> +	/* If crypto failed the connection is broken */
> +	err = ctx->async_wait.err;
> +	if (err)
> +		goto read_sock_end;
> +
> +	do {
> +		if (!skb_queue_empty(&ctx->rx_list)) {
> +			skb = __skb_dequeue(&ctx->rx_list);
> +			rxm = strp_msg(skb);
> +			tlm = tls_msg(skb);
> +		} else {
> +			struct tls_decrypt_arg darg;
> +
> +			err = tls_rx_rec_wait(sk, NULL, true, true);
> +			if (err <= 0)
> +				goto read_sock_end;
> +
> +			memset(&darg.inargs, 0, sizeof(darg.inargs));
> +
> +			rxm = strp_msg(tls_strp_msg(ctx));
> +			tlm = tls_msg(tls_strp_msg(ctx));
> +
> +			err = tls_rx_one_record(sk, NULL, &darg);
> +			if (err < 0) {
> +				tls_err_abort(sk, -EBADMSG);
> +				goto read_sock_end;
> +			}
> +
> +			sk_flush_backlog(sk);

Question,
Based on Jakub's comment, the flush is better spaced out.
Why not just do it once at the end? Or alternatively,
call tls_read_flush_backlog() ? Or just count by hand
every 4 records or 128K (and once in the end)?

I don't really know what would be the impact though, but
you are effectively releasing and re-acquiring the socket
flushing the backlog every record...
Hannes Reinecke July 24, 2023, 1:47 p.m. UTC | #2
On 7/24/23 14:59, Sagi Grimberg wrote:
> 
> 
> On 7/21/23 17:35, Hannes Reinecke wrote:
>> Implement ->read_sock() function for use with nvme-tcp.
>>
>> Signed-off-by: Hannes Reinecke <hare@suse.de>
>> Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
>> Cc: Boris Pismenny <boris.pismenny@gmail.com>
>> Cc: Jakub Kicinski <kuba@kernel.org>
>> Cc: netdev@vger.kernel.org
>> ---
>>   net/tls/tls.h      |  2 ++
>>   net/tls/tls_main.c |  2 ++
>>   net/tls/tls_sw.c   | 89 ++++++++++++++++++++++++++++++++++++++++++++++
>>   3 files changed, 93 insertions(+)
>>
>> diff --git a/net/tls/tls.h b/net/tls/tls.h
>> index 86cef1c68e03..7e4d45537deb 100644
>> --- a/net/tls/tls.h
>> +++ b/net/tls/tls.h
>> @@ -110,6 +110,8 @@ bool tls_sw_sock_is_readable(struct sock *sk);
>>   ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
>>                  struct pipe_inode_info *pipe,
>>                  size_t len, unsigned int flags);
>> +int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc,
>> +             sk_read_actor_t read_actor);
>>   int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t 
>> size);
>>   void tls_device_splice_eof(struct socket *sock);
>> diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
>> index b6896126bb92..7dbb8cd8f809 100644
>> --- a/net/tls/tls_main.c
>> +++ b/net/tls/tls_main.c
>> @@ -962,10 +962,12 @@ static void build_proto_ops(struct proto_ops 
>> ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG]
>>       ops[TLS_BASE][TLS_SW  ] = ops[TLS_BASE][TLS_BASE];
>>       ops[TLS_BASE][TLS_SW  ].splice_read    = tls_sw_splice_read;
>>       ops[TLS_BASE][TLS_SW  ].poll        = tls_sk_poll;
>> +    ops[TLS_BASE][TLS_SW  ].read_sock    = tls_sw_read_sock;
>>       ops[TLS_SW  ][TLS_SW  ] = ops[TLS_SW  ][TLS_BASE];
>>       ops[TLS_SW  ][TLS_SW  ].splice_read    = tls_sw_splice_read;
>>       ops[TLS_SW  ][TLS_SW  ].poll        = tls_sk_poll;
>> +    ops[TLS_SW  ][TLS_SW  ].read_sock    = tls_sw_read_sock;
>>   #ifdef CONFIG_TLS_DEVICE
>>       ops[TLS_HW  ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
>> diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
>> index d0636ea13009..f7ffbe7620cb 100644
>> --- a/net/tls/tls_sw.c
>> +++ b/net/tls/tls_sw.c
>> @@ -2202,6 +2202,95 @@ ssize_t tls_sw_splice_read(struct socket 
>> *sock,  loff_t *ppos,
>>       goto splice_read_end;
>>   }
>> +int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc,
>> +             sk_read_actor_t read_actor)
>> +{
>> +    struct tls_context *tls_ctx = tls_get_ctx(sk);
>> +    struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
>> +    struct strp_msg *rxm = NULL;
>> +    struct sk_buff *skb = NULL;
>> +    struct sk_psock *psock;
>> +    struct tls_msg *tlm;
>> +    ssize_t copied = 0;
>> +    int err, used;
>> +
>> +    psock = sk_psock_get(sk);
>> +    if (psock) {
>> +        sk_psock_put(sk, psock);
>> +        return -EINVAL;
>> +    }
>> +    err = tls_rx_reader_acquire(sk, ctx, true);
>> +    if (err < 0)
>> +        return err;
>> +
>> +    /* If crypto failed the connection is broken */
>> +    err = ctx->async_wait.err;
>> +    if (err)
>> +        goto read_sock_end;
>> +
>> +    do {
>> +        if (!skb_queue_empty(&ctx->rx_list)) {
>> +            skb = __skb_dequeue(&ctx->rx_list);
>> +            rxm = strp_msg(skb);
>> +            tlm = tls_msg(skb);
>> +        } else {
>> +            struct tls_decrypt_arg darg;
>> +
>> +            err = tls_rx_rec_wait(sk, NULL, true, true);
>> +            if (err <= 0)
>> +                goto read_sock_end;
>> +
>> +            memset(&darg.inargs, 0, sizeof(darg.inargs));
>> +
>> +            rxm = strp_msg(tls_strp_msg(ctx));
>> +            tlm = tls_msg(tls_strp_msg(ctx));
>> +
>> +            err = tls_rx_one_record(sk, NULL, &darg);
>> +            if (err < 0) {
>> +                tls_err_abort(sk, -EBADMSG);
>> +                goto read_sock_end;
>> +            }
>> +
>> +            sk_flush_backlog(sk);
> 
> Question,
> Based on Jakub's comment, the flush is better spaced out.
> Why not just do it once at the end? Or alternatively,
> call tls_read_flush_backlog() ? Or just count by hand
> every 4 records or 128K (and once in the end)?
> 
> I don't really know what would be the impact though, but
> you are effectively releasing and re-acquiring the socket
> flushing the backlog every record...

I really have no idea.
I'll see to modify it to use tls_read_flush_backlog().

Cheers,

Hannes
diff mbox series

Patch

diff --git a/net/tls/tls.h b/net/tls/tls.h
index 86cef1c68e03..7e4d45537deb 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -110,6 +110,8 @@  bool tls_sw_sock_is_readable(struct sock *sk);
 ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
 			   struct pipe_inode_info *pipe,
 			   size_t len, unsigned int flags);
+int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc,
+		     sk_read_actor_t read_actor);
 
 int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 void tls_device_splice_eof(struct socket *sock);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b6896126bb92..7dbb8cd8f809 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -962,10 +962,12 @@  static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG]
 	ops[TLS_BASE][TLS_SW  ] = ops[TLS_BASE][TLS_BASE];
 	ops[TLS_BASE][TLS_SW  ].splice_read	= tls_sw_splice_read;
 	ops[TLS_BASE][TLS_SW  ].poll		= tls_sk_poll;
+	ops[TLS_BASE][TLS_SW  ].read_sock	= tls_sw_read_sock;
 
 	ops[TLS_SW  ][TLS_SW  ] = ops[TLS_SW  ][TLS_BASE];
 	ops[TLS_SW  ][TLS_SW  ].splice_read	= tls_sw_splice_read;
 	ops[TLS_SW  ][TLS_SW  ].poll		= tls_sk_poll;
+	ops[TLS_SW  ][TLS_SW  ].read_sock	= tls_sw_read_sock;
 
 #ifdef CONFIG_TLS_DEVICE
 	ops[TLS_HW  ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index d0636ea13009..f7ffbe7620cb 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -2202,6 +2202,95 @@  ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 	goto splice_read_end;
 }
 
+int tls_sw_read_sock(struct sock *sk, read_descriptor_t *desc,
+		     sk_read_actor_t read_actor)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	struct strp_msg *rxm = NULL;
+	struct sk_buff *skb = NULL;
+	struct sk_psock *psock;
+	struct tls_msg *tlm;
+	ssize_t copied = 0;
+	int err, used;
+
+	psock = sk_psock_get(sk);
+	if (psock) {
+		sk_psock_put(sk, psock);
+		return -EINVAL;
+	}
+	err = tls_rx_reader_acquire(sk, ctx, true);
+	if (err < 0)
+		return err;
+
+	/* If crypto failed the connection is broken */
+	err = ctx->async_wait.err;
+	if (err)
+		goto read_sock_end;
+
+	do {
+		if (!skb_queue_empty(&ctx->rx_list)) {
+			skb = __skb_dequeue(&ctx->rx_list);
+			rxm = strp_msg(skb);
+			tlm = tls_msg(skb);
+		} else {
+			struct tls_decrypt_arg darg;
+
+			err = tls_rx_rec_wait(sk, NULL, true, true);
+			if (err <= 0)
+				goto read_sock_end;
+
+			memset(&darg.inargs, 0, sizeof(darg.inargs));
+
+			rxm = strp_msg(tls_strp_msg(ctx));
+			tlm = tls_msg(tls_strp_msg(ctx));
+
+			err = tls_rx_one_record(sk, NULL, &darg);
+			if (err < 0) {
+				tls_err_abort(sk, -EBADMSG);
+				goto read_sock_end;
+			}
+
+			sk_flush_backlog(sk);
+			skb = darg.skb;
+
+			tls_rx_rec_done(ctx);
+		}
+
+		/* read_sock does not support reading control messages */
+		if (tlm->control != TLS_RECORD_TYPE_DATA) {
+			err = -EINVAL;
+			goto read_sock_requeue;
+		}
+
+		used = read_actor(desc, skb, rxm->offset, rxm->full_len);
+		if (used <= 0) {
+			if (!copied)
+				err = used;
+			goto read_sock_requeue;
+		}
+		copied += used;
+		if (used < rxm->full_len) {
+			rxm->offset += used;
+			rxm->full_len -= used;
+			if (!desc->count)
+				goto read_sock_requeue;
+		} else {
+			consume_skb(skb);
+			if (!desc->count)
+				skb = NULL;
+		}
+	} while (skb);
+
+read_sock_end:
+	tls_rx_reader_release(sk, ctx);
+	return copied ? : err;
+
+read_sock_requeue:
+	__skb_queue_head(&ctx->rx_list, skb);
+	goto read_sock_end;
+}
+
 bool tls_sw_sock_is_readable(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);