Message ID | 167932228666.3131.1680559749292527734.stgit@manet.1015granger.net (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | NFSD support for RPC-with-TLS | expand |
On Mon, 2023-03-20 at 10:24 -0400, Chuck Lever wrote: > From: Chuck Lever <chuck.lever@oracle.com> > > This patch adds opportunitistic RPC-with-TLS to the Linux in-kernel > NFS server. If the client requests RPC-with-TLS and the user space > handshake agent is running, the server will set up a TLS session. > > There are no policy settings yet. For example, the server cannot > yet require the use of RPC-with-TLS to access its data. > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com> > --- > include/linux/sunrpc/svc_xprt.h | 5 ++ > include/linux/sunrpc/svcsock.h | 2 + > include/trace/events/sunrpc.h | 16 ++++++- > net/sunrpc/svc_xprt.c | 5 ++ > net/sunrpc/svcauth_unix.c | 11 ++++- > net/sunrpc/svcsock.c | 91 +++++++++++++++++++++++++++++++++++++++ > 6 files changed, 125 insertions(+), 5 deletions(-) > > diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h > index 775368802762..867479204840 100644 > --- a/include/linux/sunrpc/svc_xprt.h > +++ b/include/linux/sunrpc/svc_xprt.h > @@ -27,7 +27,7 @@ struct svc_xprt_ops { > void (*xpo_detach)(struct svc_xprt *); > void (*xpo_free)(struct svc_xprt *); > void (*xpo_kill_temp_xprt)(struct svc_xprt *); > - void (*xpo_start_tls)(struct svc_xprt *); > + void (*xpo_handshake)(struct svc_xprt *xprt); > }; > > struct svc_xprt_class { > @@ -70,6 +70,9 @@ struct svc_xprt { > #define XPT_LOCAL 12 /* connection from loopback interface */ > #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ > #define XPT_CONG_CTRL 14 /* has congestion control */ > +#define XPT_HANDSHAKE 15 /* xprt requests a handshake */ > +#define XPT_TLS_SESSION 16 /* transport-layer security established */ > +#define XPT_PEER_AUTH 17 /* peer has been authenticated */ > > struct svc_serv *xpt_server; /* service for transport */ > atomic_t xpt_reserved; /* space on outq that is rsvd */ > diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h > index bcc555c7ae9c..1175e1c38bac 100644 > --- a/include/linux/sunrpc/svcsock.h > +++ b/include/linux/sunrpc/svcsock.h > @@ -38,6 +38,8 @@ struct svc_sock { > /* Number of queued send requests */ > atomic_t sk_sendqlen; > > + struct completion sk_handshake_done; > + > struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ > }; > > diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h > index cf286a0a17d0..2667a8db4811 100644 > --- a/include/trace/events/sunrpc.h > +++ b/include/trace/events/sunrpc.h > @@ -1948,7 +1948,10 @@ TRACE_EVENT(svc_stats_latency, > { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \ > { BIT(XPT_LOCAL), "LOCAL" }, \ > { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \ > - { BIT(XPT_CONG_CTRL), "CONG_CTRL" }) > + { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \ > + { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \ > + { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \ > + { BIT(XPT_PEER_AUTH), "PEER_AUTH" }) > > TRACE_EVENT(svc_xprt_create_err, > TP_PROTO( > @@ -2081,6 +2084,17 @@ DEFINE_SVC_XPRT_EVENT(close); > DEFINE_SVC_XPRT_EVENT(detach); > DEFINE_SVC_XPRT_EVENT(free); > > +#define DEFINE_SVC_TLS_EVENT(name) \ > + DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \ > + TP_PROTO(const struct svc_xprt *xprt), \ > + TP_ARGS(xprt)) > + > +DEFINE_SVC_TLS_EVENT(start); > +DEFINE_SVC_TLS_EVENT(upcall); > +DEFINE_SVC_TLS_EVENT(unavailable); > +DEFINE_SVC_TLS_EVENT(not_started); > +DEFINE_SVC_TLS_EVENT(timed_out); > + > TRACE_EVENT(svc_xprt_accept, > TP_PROTO( > const struct svc_xprt *xprt, > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c > index ba629297da4e..b68c04dbf876 100644 > --- a/net/sunrpc/svc_xprt.c > +++ b/net/sunrpc/svc_xprt.c > @@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) > > if (xpt_flags & BIT(XPT_BUSY)) > return false; > - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) > + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE))) > return true; > if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { > if (xprt->xpt_ops->xpo_has_wspace(xprt) && > @@ -829,6 +829,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) > module_put(xprt->xpt_class->xcl_owner); > } > svc_xprt_received(xprt); > + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) { > + xprt->xpt_ops->xpo_handshake(xprt); > + svc_xprt_received(xprt); > } else if (svc_xprt_reserve_slot(rqstp, xprt)) { > /* XPT_DATA|XPT_DEFERRED case: */ > dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", > diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c > index 983c5891cb56..374995201df4 100644 > --- a/net/sunrpc/svcauth_unix.c > +++ b/net/sunrpc/svcauth_unix.c > @@ -17,8 +17,9 @@ > #include <net/ipv6.h> > #include <linux/kernel.h> > #include <linux/user_namespace.h> > -#define RPCDBG_FACILITY RPCDBG_AUTH > +#include <trace/events/sunrpc.h> > > +#define RPCDBG_FACILITY RPCDBG_AUTH > > #include "netns.h" > > @@ -823,6 +824,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp) > { > struct xdr_stream *xdr = &rqstp->rq_arg_stream; > struct svc_cred *cred = &rqstp->rq_cred; > + struct svc_xprt *xprt = rqstp->rq_xprt; > u32 flavor, len; > void *body; > __be32 *p; > @@ -856,14 +858,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp) > if (cred->cr_group_info == NULL) > return SVC_CLOSE; > > - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) { > + if (xprt->xpt_ops->xpo_handshake) { > p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8); > if (!p) > return SVC_CLOSE; > + trace_svc_tls_start(xprt); > *p++ = rpc_auth_null; > *p++ = cpu_to_be32(8); > memcpy(p, "STARTTLS", 8); > + > + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags); > + svc_xprt_enqueue(xprt); > } else { > + trace_svc_tls_unavailable(xprt); > if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, > RPC_AUTH_NULL, NULL, 0) < 0) > return SVC_CLOSE; > diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c > index b6df73cb706a..16ba8d6ab20e 100644 > --- a/net/sunrpc/svcsock.c > +++ b/net/sunrpc/svcsock.c > @@ -44,9 +44,11 @@ > #include <net/tcp.h> > #include <net/tcp_states.h> > #include <net/tls.h> > +#include <net/handshake.h> > #include <linux/uaccess.h> > #include <linux/highmem.h> > #include <asm/ioctls.h> > +#include <linux/key.h> > > #include <linux/sunrpc/types.h> > #include <linux/sunrpc/clnt.h> > @@ -64,6 +66,7 @@ > > #define RPCDBG_FACILITY RPCDBG_SVCXPRT > > +#define SVC_HANDSHAKE_TO (20U * HZ) > > static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, > int flags); > @@ -360,6 +363,8 @@ static void svc_data_ready(struct sock *sk) > rmb(); > svsk->sk_odata(sk); > trace_svcsock_data_ready(&svsk->sk_xprt, 0); > + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags)) > + return; > if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) > svc_xprt_enqueue(&svsk->sk_xprt); > } > @@ -397,6 +402,89 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt) > sock_no_linger(svsk->sk_sock->sk); > } > > +/** > + * svc_tcp_handshake_done - Handshake completion handler > + * @data: address of xprt to wake > + * @status: status of handshake > + * @peerid: serial number of key containing the remote peer's identity > + * > + * If a security policy is specified as an export option, we don't > + * have a specific export here to check. So we set a "TLS session > + * is present" flag on the xprt and let an upper layer enforce local > + * security policy. > + */ > +static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid) > +{ > + struct svc_xprt *xprt = data; > + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); > + > + if (!status) { > + if (peerid != TLS_NO_PEERID) > + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags); > + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags); > + } > + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); > + complete_all(&svsk->sk_handshake_done); > +} > + > +/** > + * svc_tcp_handshake - Perform a transport-layer security handshake > + * @xprt: connected transport endpoint > + * > + */ > +static void svc_tcp_handshake(struct svc_xprt *xprt) > +{ > + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); > + struct tls_handshake_args args = { > + .ta_sock = svsk->sk_sock, > + .ta_done = svc_tcp_handshake_done, > + .ta_data = xprt, > + }; > + int ret; > + > + trace_svc_tls_upcall(xprt); > + > + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags); > + init_completion(&svsk->sk_handshake_done); > + smp_wmb(); > + > + ret = tls_server_hello_x509(&args, GFP_KERNEL); > + if (ret) { > + trace_svc_tls_not_started(xprt); > + goto out_failed; > + } > + > + ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done, > + SVC_HANDSHAKE_TO); Just curious: is this 20s timeout mandated by the spec? It seems like a long time to block a kernel thread if so. Do we need to be concerned with DoS attacks here? Could a client initiate handshakes and then stop communicating, forcing the server to tie up threads with these 20s waits? > + if (ret <= 0) { > + if (tls_handshake_cancel(svsk->sk_sock)) { > + trace_svc_tls_timed_out(xprt); > + goto out_close; > + } > + } > + > + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) { > + trace_svc_tls_unavailable(xprt); > + goto out_close; > + } > + > + /* > + * Mark the transport ready in case the remote sent RPC > + * traffic before the kernel received the handshake > + * completion downcall. > + */ > + set_bit(XPT_DATA, &xprt->xpt_flags); > + svc_xprt_enqueue(xprt); > + return; > + > +out_close: > + set_bit(XPT_CLOSE, &xprt->xpt_flags); > +out_failed: > + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); > + set_bit(XPT_DATA, &xprt->xpt_flags); > + svc_xprt_enqueue(xprt); > +} > + > /* > * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo > */ > @@ -1260,6 +1348,7 @@ static const struct svc_xprt_ops svc_tcp_ops = { > .xpo_has_wspace = svc_tcp_has_wspace, > .xpo_accept = svc_tcp_accept, > .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt, > + .xpo_handshake = svc_tcp_handshake, > }; > > static struct svc_xprt_class svc_tcp_class = { > @@ -1584,6 +1673,8 @@ static void svc_sock_free(struct svc_xprt *xprt) > { > struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); > > + tls_handshake_cancel(svsk->sk_sock); > + > if (svsk->sk_sock->file) > sockfd_put(svsk->sk_sock); > else > >
> On Mar 21, 2023, at 7:43 AM, Jeff Layton <jlayton@kernel.org> wrote: > > On Mon, 2023-03-20 at 10:24 -0400, Chuck Lever wrote: >> From: Chuck Lever <chuck.lever@oracle.com> >> >> This patch adds opportunitistic RPC-with-TLS to the Linux in-kernel >> NFS server. If the client requests RPC-with-TLS and the user space >> handshake agent is running, the server will set up a TLS session. >> >> There are no policy settings yet. For example, the server cannot >> yet require the use of RPC-with-TLS to access its data. >> >> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> >> --- >> include/linux/sunrpc/svc_xprt.h | 5 ++ >> include/linux/sunrpc/svcsock.h | 2 + >> include/trace/events/sunrpc.h | 16 ++++++- >> net/sunrpc/svc_xprt.c | 5 ++ >> net/sunrpc/svcauth_unix.c | 11 ++++- >> net/sunrpc/svcsock.c | 91 +++++++++++++++++++++++++++++++++++++++ >> 6 files changed, 125 insertions(+), 5 deletions(-) >> >> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h >> index 775368802762..867479204840 100644 >> --- a/include/linux/sunrpc/svc_xprt.h >> +++ b/include/linux/sunrpc/svc_xprt.h >> @@ -27,7 +27,7 @@ struct svc_xprt_ops { >> void (*xpo_detach)(struct svc_xprt *); >> void (*xpo_free)(struct svc_xprt *); >> void (*xpo_kill_temp_xprt)(struct svc_xprt *); >> - void (*xpo_start_tls)(struct svc_xprt *); >> + void (*xpo_handshake)(struct svc_xprt *xprt); >> }; >> >> struct svc_xprt_class { >> @@ -70,6 +70,9 @@ struct svc_xprt { >> #define XPT_LOCAL 12 /* connection from loopback interface */ >> #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ >> #define XPT_CONG_CTRL 14 /* has congestion control */ >> +#define XPT_HANDSHAKE 15 /* xprt requests a handshake */ >> +#define XPT_TLS_SESSION 16 /* transport-layer security established */ >> +#define XPT_PEER_AUTH 17 /* peer has been authenticated */ >> >> struct svc_serv *xpt_server; /* service for transport */ >> atomic_t xpt_reserved; /* space on outq that is rsvd */ >> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h >> index bcc555c7ae9c..1175e1c38bac 100644 >> --- a/include/linux/sunrpc/svcsock.h >> +++ b/include/linux/sunrpc/svcsock.h >> @@ -38,6 +38,8 @@ struct svc_sock { >> /* Number of queued send requests */ >> atomic_t sk_sendqlen; >> >> + struct completion sk_handshake_done; >> + >> struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ >> }; >> >> diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h >> index cf286a0a17d0..2667a8db4811 100644 >> --- a/include/trace/events/sunrpc.h >> +++ b/include/trace/events/sunrpc.h >> @@ -1948,7 +1948,10 @@ TRACE_EVENT(svc_stats_latency, >> { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \ >> { BIT(XPT_LOCAL), "LOCAL" }, \ >> { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \ >> - { BIT(XPT_CONG_CTRL), "CONG_CTRL" }) >> + { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \ >> + { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \ >> + { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \ >> + { BIT(XPT_PEER_AUTH), "PEER_AUTH" }) >> >> TRACE_EVENT(svc_xprt_create_err, >> TP_PROTO( >> @@ -2081,6 +2084,17 @@ DEFINE_SVC_XPRT_EVENT(close); >> DEFINE_SVC_XPRT_EVENT(detach); >> DEFINE_SVC_XPRT_EVENT(free); >> >> +#define DEFINE_SVC_TLS_EVENT(name) \ >> + DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \ >> + TP_PROTO(const struct svc_xprt *xprt), \ >> + TP_ARGS(xprt)) >> + >> +DEFINE_SVC_TLS_EVENT(start); >> +DEFINE_SVC_TLS_EVENT(upcall); >> +DEFINE_SVC_TLS_EVENT(unavailable); >> +DEFINE_SVC_TLS_EVENT(not_started); >> +DEFINE_SVC_TLS_EVENT(timed_out); >> + >> TRACE_EVENT(svc_xprt_accept, >> TP_PROTO( >> const struct svc_xprt *xprt, >> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c >> index ba629297da4e..b68c04dbf876 100644 >> --- a/net/sunrpc/svc_xprt.c >> +++ b/net/sunrpc/svc_xprt.c >> @@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) >> >> if (xpt_flags & BIT(XPT_BUSY)) >> return false; >> - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) >> + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE))) >> return true; >> if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { >> if (xprt->xpt_ops->xpo_has_wspace(xprt) && >> @@ -829,6 +829,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) >> module_put(xprt->xpt_class->xcl_owner); >> } >> svc_xprt_received(xprt); >> + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) { >> + xprt->xpt_ops->xpo_handshake(xprt); >> + svc_xprt_received(xprt); >> } else if (svc_xprt_reserve_slot(rqstp, xprt)) { >> /* XPT_DATA|XPT_DEFERRED case: */ >> dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", >> diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c >> index 983c5891cb56..374995201df4 100644 >> --- a/net/sunrpc/svcauth_unix.c >> +++ b/net/sunrpc/svcauth_unix.c >> @@ -17,8 +17,9 @@ >> #include <net/ipv6.h> >> #include <linux/kernel.h> >> #include <linux/user_namespace.h> >> -#define RPCDBG_FACILITY RPCDBG_AUTH >> +#include <trace/events/sunrpc.h> >> >> +#define RPCDBG_FACILITY RPCDBG_AUTH >> >> #include "netns.h" >> >> @@ -823,6 +824,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp) >> { >> struct xdr_stream *xdr = &rqstp->rq_arg_stream; >> struct svc_cred *cred = &rqstp->rq_cred; >> + struct svc_xprt *xprt = rqstp->rq_xprt; >> u32 flavor, len; >> void *body; >> __be32 *p; >> @@ -856,14 +858,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp) >> if (cred->cr_group_info == NULL) >> return SVC_CLOSE; >> >> - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) { >> + if (xprt->xpt_ops->xpo_handshake) { >> p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8); >> if (!p) >> return SVC_CLOSE; >> + trace_svc_tls_start(xprt); >> *p++ = rpc_auth_null; >> *p++ = cpu_to_be32(8); >> memcpy(p, "STARTTLS", 8); >> + >> + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags); >> + svc_xprt_enqueue(xprt); >> } else { >> + trace_svc_tls_unavailable(xprt); >> if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, >> RPC_AUTH_NULL, NULL, 0) < 0) >> return SVC_CLOSE; >> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c >> index b6df73cb706a..16ba8d6ab20e 100644 >> --- a/net/sunrpc/svcsock.c >> +++ b/net/sunrpc/svcsock.c >> @@ -44,9 +44,11 @@ >> #include <net/tcp.h> >> #include <net/tcp_states.h> >> #include <net/tls.h> >> +#include <net/handshake.h> >> #include <linux/uaccess.h> >> #include <linux/highmem.h> >> #include <asm/ioctls.h> >> +#include <linux/key.h> >> >> #include <linux/sunrpc/types.h> >> #include <linux/sunrpc/clnt.h> >> @@ -64,6 +66,7 @@ >> >> #define RPCDBG_FACILITY RPCDBG_SVCXPRT >> >> +#define SVC_HANDSHAKE_TO (20U * HZ) >> >> static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, >> int flags); >> @@ -360,6 +363,8 @@ static void svc_data_ready(struct sock *sk) >> rmb(); >> svsk->sk_odata(sk); >> trace_svcsock_data_ready(&svsk->sk_xprt, 0); >> + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags)) >> + return; >> if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) >> svc_xprt_enqueue(&svsk->sk_xprt); >> } >> @@ -397,6 +402,89 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt) >> sock_no_linger(svsk->sk_sock->sk); >> } >> >> +/** >> + * svc_tcp_handshake_done - Handshake completion handler >> + * @data: address of xprt to wake >> + * @status: status of handshake >> + * @peerid: serial number of key containing the remote peer's identity >> + * >> + * If a security policy is specified as an export option, we don't >> + * have a specific export here to check. So we set a "TLS session >> + * is present" flag on the xprt and let an upper layer enforce local >> + * security policy. >> + */ >> +static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid) >> +{ >> + struct svc_xprt *xprt = data; >> + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); >> + >> + if (!status) { >> + if (peerid != TLS_NO_PEERID) >> + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags); >> + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags); >> + } >> + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); >> + complete_all(&svsk->sk_handshake_done); >> +} >> + >> +/** >> + * svc_tcp_handshake - Perform a transport-layer security handshake >> + * @xprt: connected transport endpoint >> + * >> + */ >> +static void svc_tcp_handshake(struct svc_xprt *xprt) >> +{ >> + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); >> + struct tls_handshake_args args = { >> + .ta_sock = svsk->sk_sock, >> + .ta_done = svc_tcp_handshake_done, >> + .ta_data = xprt, >> + }; >> + int ret; >> + >> + trace_svc_tls_upcall(xprt); >> + >> + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags); >> + init_completion(&svsk->sk_handshake_done); >> + smp_wmb(); >> + >> + ret = tls_server_hello_x509(&args, GFP_KERNEL); >> + if (ret) { >> + trace_svc_tls_not_started(xprt); >> + goto out_failed; >> + } >> + >> + ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done, >> + SVC_HANDSHAKE_TO); > > Just curious: is this 20s timeout mandated by the spec? The spec doesn't mandate a timeout. I simply wanted to guarantee forward progress. > It seems like a long time to block a kernel thread if so. It's about the same as the client side timeout, fwiw. > Do we need to be concerned > with DoS attacks here? Could a client initiate handshakes and then stop > communicating, forcing the server to tie up threads with these 20s > waits? I think a malicious client can do all kinds of similar things already. Do you have a particular timeout value in mind, or is there some other mechanism we can use to better bullet- proof this aspect of the handshake? I'm open to suggestion. >> + if (ret <= 0) { >> + if (tls_handshake_cancel(svsk->sk_sock)) { >> + trace_svc_tls_timed_out(xprt); >> + goto out_close; >> + } >> + } >> + >> + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) { >> + trace_svc_tls_unavailable(xprt); >> + goto out_close; >> + } >> + >> + /* >> + * Mark the transport ready in case the remote sent RPC >> + * traffic before the kernel received the handshake >> + * completion downcall. >> + */ >> + set_bit(XPT_DATA, &xprt->xpt_flags); >> + svc_xprt_enqueue(xprt); >> + return; >> + >> +out_close: >> + set_bit(XPT_CLOSE, &xprt->xpt_flags); >> +out_failed: >> + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); >> + set_bit(XPT_DATA, &xprt->xpt_flags); >> + svc_xprt_enqueue(xprt); >> +} >> + >> /* >> * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo >> */ >> @@ -1260,6 +1348,7 @@ static const struct svc_xprt_ops svc_tcp_ops = { >> .xpo_has_wspace = svc_tcp_has_wspace, >> .xpo_accept = svc_tcp_accept, >> .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt, >> + .xpo_handshake = svc_tcp_handshake, >> }; >> >> static struct svc_xprt_class svc_tcp_class = { >> @@ -1584,6 +1673,8 @@ static void svc_sock_free(struct svc_xprt *xprt) >> { >> struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); >> >> + tls_handshake_cancel(svsk->sk_sock); >> + >> if (svsk->sk_sock->file) >> sockfd_put(svsk->sk_sock); >> else >> >> > > -- > Jeff Layton <jlayton@kernel.org> -- Chuck Lever
On Tue, 2023-03-21 at 14:03 +0000, Chuck Lever III wrote: > > > On Mar 21, 2023, at 7:43 AM, Jeff Layton <jlayton@kernel.org> wrote: > > > > On Mon, 2023-03-20 at 10:24 -0400, Chuck Lever wrote: > > > From: Chuck Lever <chuck.lever@oracle.com> > > > > > > This patch adds opportunitistic RPC-with-TLS to the Linux in-kernel > > > NFS server. If the client requests RPC-with-TLS and the user space > > > handshake agent is running, the server will set up a TLS session. > > > > > > There are no policy settings yet. For example, the server cannot > > > yet require the use of RPC-with-TLS to access its data. > > > > > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com> > > > --- > > > include/linux/sunrpc/svc_xprt.h | 5 ++ > > > include/linux/sunrpc/svcsock.h | 2 + > > > include/trace/events/sunrpc.h | 16 ++++++- > > > net/sunrpc/svc_xprt.c | 5 ++ > > > net/sunrpc/svcauth_unix.c | 11 ++++- > > > net/sunrpc/svcsock.c | 91 +++++++++++++++++++++++++++++++++++++++ > > > 6 files changed, 125 insertions(+), 5 deletions(-) > > > > > > diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h > > > index 775368802762..867479204840 100644 > > > --- a/include/linux/sunrpc/svc_xprt.h > > > +++ b/include/linux/sunrpc/svc_xprt.h > > > @@ -27,7 +27,7 @@ struct svc_xprt_ops { > > > void (*xpo_detach)(struct svc_xprt *); > > > void (*xpo_free)(struct svc_xprt *); > > > void (*xpo_kill_temp_xprt)(struct svc_xprt *); > > > - void (*xpo_start_tls)(struct svc_xprt *); > > > + void (*xpo_handshake)(struct svc_xprt *xprt); > > > }; > > > > > > struct svc_xprt_class { > > > @@ -70,6 +70,9 @@ struct svc_xprt { > > > #define XPT_LOCAL 12 /* connection from loopback interface */ > > > #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ > > > #define XPT_CONG_CTRL 14 /* has congestion control */ > > > +#define XPT_HANDSHAKE 15 /* xprt requests a handshake */ > > > +#define XPT_TLS_SESSION 16 /* transport-layer security established */ > > > +#define XPT_PEER_AUTH 17 /* peer has been authenticated */ > > > > > > struct svc_serv *xpt_server; /* service for transport */ > > > atomic_t xpt_reserved; /* space on outq that is rsvd */ > > > diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h > > > index bcc555c7ae9c..1175e1c38bac 100644 > > > --- a/include/linux/sunrpc/svcsock.h > > > +++ b/include/linux/sunrpc/svcsock.h > > > @@ -38,6 +38,8 @@ struct svc_sock { > > > /* Number of queued send requests */ > > > atomic_t sk_sendqlen; > > > > > > + struct completion sk_handshake_done; > > > + > > > struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ > > > }; > > > > > > diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h > > > index cf286a0a17d0..2667a8db4811 100644 > > > --- a/include/trace/events/sunrpc.h > > > +++ b/include/trace/events/sunrpc.h > > > @@ -1948,7 +1948,10 @@ TRACE_EVENT(svc_stats_latency, > > > { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \ > > > { BIT(XPT_LOCAL), "LOCAL" }, \ > > > { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \ > > > - { BIT(XPT_CONG_CTRL), "CONG_CTRL" }) > > > + { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \ > > > + { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \ > > > + { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \ > > > + { BIT(XPT_PEER_AUTH), "PEER_AUTH" }) > > > > > > TRACE_EVENT(svc_xprt_create_err, > > > TP_PROTO( > > > @@ -2081,6 +2084,17 @@ DEFINE_SVC_XPRT_EVENT(close); > > > DEFINE_SVC_XPRT_EVENT(detach); > > > DEFINE_SVC_XPRT_EVENT(free); > > > > > > +#define DEFINE_SVC_TLS_EVENT(name) \ > > > + DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \ > > > + TP_PROTO(const struct svc_xprt *xprt), \ > > > + TP_ARGS(xprt)) > > > + > > > +DEFINE_SVC_TLS_EVENT(start); > > > +DEFINE_SVC_TLS_EVENT(upcall); > > > +DEFINE_SVC_TLS_EVENT(unavailable); > > > +DEFINE_SVC_TLS_EVENT(not_started); > > > +DEFINE_SVC_TLS_EVENT(timed_out); > > > + > > > TRACE_EVENT(svc_xprt_accept, > > > TP_PROTO( > > > const struct svc_xprt *xprt, > > > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c > > > index ba629297da4e..b68c04dbf876 100644 > > > --- a/net/sunrpc/svc_xprt.c > > > +++ b/net/sunrpc/svc_xprt.c > > > @@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) > > > > > > if (xpt_flags & BIT(XPT_BUSY)) > > > return false; > > > - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) > > > + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE))) > > > return true; > > > if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { > > > if (xprt->xpt_ops->xpo_has_wspace(xprt) && > > > @@ -829,6 +829,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) > > > module_put(xprt->xpt_class->xcl_owner); > > > } > > > svc_xprt_received(xprt); > > > + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) { > > > + xprt->xpt_ops->xpo_handshake(xprt); > > > + svc_xprt_received(xprt); > > > } else if (svc_xprt_reserve_slot(rqstp, xprt)) { > > > /* XPT_DATA|XPT_DEFERRED case: */ > > > dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", > > > diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c > > > index 983c5891cb56..374995201df4 100644 > > > --- a/net/sunrpc/svcauth_unix.c > > > +++ b/net/sunrpc/svcauth_unix.c > > > @@ -17,8 +17,9 @@ > > > #include <net/ipv6.h> > > > #include <linux/kernel.h> > > > #include <linux/user_namespace.h> > > > -#define RPCDBG_FACILITY RPCDBG_AUTH > > > +#include <trace/events/sunrpc.h> > > > > > > +#define RPCDBG_FACILITY RPCDBG_AUTH > > > > > > #include "netns.h" > > > > > > @@ -823,6 +824,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp) > > > { > > > struct xdr_stream *xdr = &rqstp->rq_arg_stream; > > > struct svc_cred *cred = &rqstp->rq_cred; > > > + struct svc_xprt *xprt = rqstp->rq_xprt; > > > u32 flavor, len; > > > void *body; > > > __be32 *p; > > > @@ -856,14 +858,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp) > > > if (cred->cr_group_info == NULL) > > > return SVC_CLOSE; > > > > > > - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) { > > > + if (xprt->xpt_ops->xpo_handshake) { > > > p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8); > > > if (!p) > > > return SVC_CLOSE; > > > + trace_svc_tls_start(xprt); > > > *p++ = rpc_auth_null; > > > *p++ = cpu_to_be32(8); > > > memcpy(p, "STARTTLS", 8); > > > + > > > + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags); > > > + svc_xprt_enqueue(xprt); > > > } else { > > > + trace_svc_tls_unavailable(xprt); > > > if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, > > > RPC_AUTH_NULL, NULL, 0) < 0) > > > return SVC_CLOSE; > > > diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c > > > index b6df73cb706a..16ba8d6ab20e 100644 > > > --- a/net/sunrpc/svcsock.c > > > +++ b/net/sunrpc/svcsock.c > > > @@ -44,9 +44,11 @@ > > > #include <net/tcp.h> > > > #include <net/tcp_states.h> > > > #include <net/tls.h> > > > +#include <net/handshake.h> > > > #include <linux/uaccess.h> > > > #include <linux/highmem.h> > > > #include <asm/ioctls.h> > > > +#include <linux/key.h> > > > > > > #include <linux/sunrpc/types.h> > > > #include <linux/sunrpc/clnt.h> > > > @@ -64,6 +66,7 @@ > > > > > > #define RPCDBG_FACILITY RPCDBG_SVCXPRT > > > > > > +#define SVC_HANDSHAKE_TO (20U * HZ) > > > > > > static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, > > > int flags); > > > @@ -360,6 +363,8 @@ static void svc_data_ready(struct sock *sk) > > > rmb(); > > > svsk->sk_odata(sk); > > > trace_svcsock_data_ready(&svsk->sk_xprt, 0); > > > + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags)) > > > + return; > > > if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) > > > svc_xprt_enqueue(&svsk->sk_xprt); > > > } > > > @@ -397,6 +402,89 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt) > > > sock_no_linger(svsk->sk_sock->sk); > > > } > > > > > > +/** > > > + * svc_tcp_handshake_done - Handshake completion handler > > > + * @data: address of xprt to wake > > > + * @status: status of handshake > > > + * @peerid: serial number of key containing the remote peer's identity > > > + * > > > + * If a security policy is specified as an export option, we don't > > > + * have a specific export here to check. So we set a "TLS session > > > + * is present" flag on the xprt and let an upper layer enforce local > > > + * security policy. > > > + */ > > > +static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid) > > > +{ > > > + struct svc_xprt *xprt = data; > > > + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); > > > + > > > + if (!status) { > > > + if (peerid != TLS_NO_PEERID) > > > + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags); > > > + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags); > > > + } > > > + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); > > > + complete_all(&svsk->sk_handshake_done); > > > +} > > > + > > > +/** > > > + * svc_tcp_handshake - Perform a transport-layer security handshake > > > + * @xprt: connected transport endpoint > > > + * > > > + */ > > > +static void svc_tcp_handshake(struct svc_xprt *xprt) > > > +{ > > > + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); > > > + struct tls_handshake_args args = { > > > + .ta_sock = svsk->sk_sock, > > > + .ta_done = svc_tcp_handshake_done, > > > + .ta_data = xprt, > > > + }; > > > + int ret; > > > + > > > + trace_svc_tls_upcall(xprt); > > > + > > > + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags); > > > + init_completion(&svsk->sk_handshake_done); > > > + smp_wmb(); > > > + > > > + ret = tls_server_hello_x509(&args, GFP_KERNEL); > > > + if (ret) { > > > + trace_svc_tls_not_started(xprt); > > > + goto out_failed; > > > + } > > > + > > > + ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done, > > > + SVC_HANDSHAKE_TO); > > > > Just curious: is this 20s timeout mandated by the spec? > > The spec doesn't mandate a timeout. I simply wanted > to guarantee forward progress. > > > > It seems like a long time to block a kernel thread if so. > > It's about the same as the client side timeout, fwiw. > > > > Do we need to be concerned > > with DoS attacks here? Could a client initiate handshakes and then stop > > communicating, forcing the server to tie up threads with these 20s > > waits? > > I think a malicious client can do all kinds of similar things > already. Do you have a particular timeout value in mind, or > is there some other mechanism we can use to better bullet- > proof this aspect of the handshake? I'm open to suggestion. > I don't have any suggestions, just trying to speculate about ways this could break. The only alternative I could see would be to defer the connection somehow until the reply comes in so that the thread can do other stuff in the meantime. That's something we can potentially add later if we decide it's necessary though. > > > > + if (ret <= 0) { > > > + if (tls_handshake_cancel(svsk->sk_sock)) { > > > + trace_svc_tls_timed_out(xprt); > > > + goto out_close; > > > + } > > > + } > > > + > > > + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) { > > > + trace_svc_tls_unavailable(xprt); > > > + goto out_close; > > > + } > > > + > > > + /* > > > + * Mark the transport ready in case the remote sent RPC > > > + * traffic before the kernel received the handshake > > > + * completion downcall. > > > + */ > > > + set_bit(XPT_DATA, &xprt->xpt_flags); > > > + svc_xprt_enqueue(xprt); > > > + return; > > > + > > > +out_close: > > > + set_bit(XPT_CLOSE, &xprt->xpt_flags); > > > +out_failed: > > > + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); > > > + set_bit(XPT_DATA, &xprt->xpt_flags); > > > + svc_xprt_enqueue(xprt); > > > +} > > > + > > > /* > > > * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo > > > */ > > > @@ -1260,6 +1348,7 @@ static const struct svc_xprt_ops svc_tcp_ops = { > > > .xpo_has_wspace = svc_tcp_has_wspace, > > > .xpo_accept = svc_tcp_accept, > > > .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt, > > > + .xpo_handshake = svc_tcp_handshake, > > > }; > > > > > > static struct svc_xprt_class svc_tcp_class = { > > > @@ -1584,6 +1673,8 @@ static void svc_sock_free(struct svc_xprt *xprt) > > > { > > > struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); > > > > > > + tls_handshake_cancel(svsk->sk_sock); > > > + > > > if (svsk->sk_sock->file) > > > sockfd_put(svsk->sk_sock); > > > else > > > > > > > > > > -- > > Jeff Layton <jlayton@kernel.org> > > -- > Chuck Lever > >
> On Mar 21, 2023, at 10:56 AM, Jeff Layton <jlayton@kernel.org> wrote: > > On Tue, 2023-03-21 at 14:03 +0000, Chuck Lever III wrote: >> >>> On Mar 21, 2023, at 7:43 AM, Jeff Layton <jlayton@kernel.org> wrote: >>> >>> On Mon, 2023-03-20 at 10:24 -0400, Chuck Lever wrote: >>>> From: Chuck Lever <chuck.lever@oracle.com> >>>> >>>> This patch adds opportunitistic RPC-with-TLS to the Linux in-kernel >>>> NFS server. If the client requests RPC-with-TLS and the user space >>>> handshake agent is running, the server will set up a TLS session. >>>> >>>> There are no policy settings yet. For example, the server cannot >>>> yet require the use of RPC-with-TLS to access its data. >>>> >>>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com> >>>> --- >>>> include/linux/sunrpc/svc_xprt.h | 5 ++ >>>> include/linux/sunrpc/svcsock.h | 2 + >>>> include/trace/events/sunrpc.h | 16 ++++++- >>>> net/sunrpc/svc_xprt.c | 5 ++ >>>> net/sunrpc/svcauth_unix.c | 11 ++++- >>>> net/sunrpc/svcsock.c | 91 +++++++++++++++++++++++++++++++++++++++ >>>> 6 files changed, 125 insertions(+), 5 deletions(-) >>>> >>>> diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h >>>> index 775368802762..867479204840 100644 >>>> --- a/include/linux/sunrpc/svc_xprt.h >>>> +++ b/include/linux/sunrpc/svc_xprt.h >>>> @@ -27,7 +27,7 @@ struct svc_xprt_ops { >>>> void (*xpo_detach)(struct svc_xprt *); >>>> void (*xpo_free)(struct svc_xprt *); >>>> void (*xpo_kill_temp_xprt)(struct svc_xprt *); >>>> - void (*xpo_start_tls)(struct svc_xprt *); >>>> + void (*xpo_handshake)(struct svc_xprt *xprt); >>>> }; >>>> >>>> struct svc_xprt_class { >>>> @@ -70,6 +70,9 @@ struct svc_xprt { >>>> #define XPT_LOCAL 12 /* connection from loopback interface */ >>>> #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ >>>> #define XPT_CONG_CTRL 14 /* has congestion control */ >>>> +#define XPT_HANDSHAKE 15 /* xprt requests a handshake */ >>>> +#define XPT_TLS_SESSION 16 /* transport-layer security established */ >>>> +#define XPT_PEER_AUTH 17 /* peer has been authenticated */ >>>> >>>> struct svc_serv *xpt_server; /* service for transport */ >>>> atomic_t xpt_reserved; /* space on outq that is rsvd */ >>>> diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h >>>> index bcc555c7ae9c..1175e1c38bac 100644 >>>> --- a/include/linux/sunrpc/svcsock.h >>>> +++ b/include/linux/sunrpc/svcsock.h >>>> @@ -38,6 +38,8 @@ struct svc_sock { >>>> /* Number of queued send requests */ >>>> atomic_t sk_sendqlen; >>>> >>>> + struct completion sk_handshake_done; >>>> + >>>> struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ >>>> }; >>>> >>>> diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h >>>> index cf286a0a17d0..2667a8db4811 100644 >>>> --- a/include/trace/events/sunrpc.h >>>> +++ b/include/trace/events/sunrpc.h >>>> @@ -1948,7 +1948,10 @@ TRACE_EVENT(svc_stats_latency, >>>> { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \ >>>> { BIT(XPT_LOCAL), "LOCAL" }, \ >>>> { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \ >>>> - { BIT(XPT_CONG_CTRL), "CONG_CTRL" }) >>>> + { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \ >>>> + { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \ >>>> + { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \ >>>> + { BIT(XPT_PEER_AUTH), "PEER_AUTH" }) >>>> >>>> TRACE_EVENT(svc_xprt_create_err, >>>> TP_PROTO( >>>> @@ -2081,6 +2084,17 @@ DEFINE_SVC_XPRT_EVENT(close); >>>> DEFINE_SVC_XPRT_EVENT(detach); >>>> DEFINE_SVC_XPRT_EVENT(free); >>>> >>>> +#define DEFINE_SVC_TLS_EVENT(name) \ >>>> + DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \ >>>> + TP_PROTO(const struct svc_xprt *xprt), \ >>>> + TP_ARGS(xprt)) >>>> + >>>> +DEFINE_SVC_TLS_EVENT(start); >>>> +DEFINE_SVC_TLS_EVENT(upcall); >>>> +DEFINE_SVC_TLS_EVENT(unavailable); >>>> +DEFINE_SVC_TLS_EVENT(not_started); >>>> +DEFINE_SVC_TLS_EVENT(timed_out); >>>> + >>>> TRACE_EVENT(svc_xprt_accept, >>>> TP_PROTO( >>>> const struct svc_xprt *xprt, >>>> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c >>>> index ba629297da4e..b68c04dbf876 100644 >>>> --- a/net/sunrpc/svc_xprt.c >>>> +++ b/net/sunrpc/svc_xprt.c >>>> @@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) >>>> >>>> if (xpt_flags & BIT(XPT_BUSY)) >>>> return false; >>>> - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) >>>> + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE))) >>>> return true; >>>> if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { >>>> if (xprt->xpt_ops->xpo_has_wspace(xprt) && >>>> @@ -829,6 +829,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) >>>> module_put(xprt->xpt_class->xcl_owner); >>>> } >>>> svc_xprt_received(xprt); >>>> + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) { >>>> + xprt->xpt_ops->xpo_handshake(xprt); >>>> + svc_xprt_received(xprt); >>>> } else if (svc_xprt_reserve_slot(rqstp, xprt)) { >>>> /* XPT_DATA|XPT_DEFERRED case: */ >>>> dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", >>>> diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c >>>> index 983c5891cb56..374995201df4 100644 >>>> --- a/net/sunrpc/svcauth_unix.c >>>> +++ b/net/sunrpc/svcauth_unix.c >>>> @@ -17,8 +17,9 @@ >>>> #include <net/ipv6.h> >>>> #include <linux/kernel.h> >>>> #include <linux/user_namespace.h> >>>> -#define RPCDBG_FACILITY RPCDBG_AUTH >>>> +#include <trace/events/sunrpc.h> >>>> >>>> +#define RPCDBG_FACILITY RPCDBG_AUTH >>>> >>>> #include "netns.h" >>>> >>>> @@ -823,6 +824,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp) >>>> { >>>> struct xdr_stream *xdr = &rqstp->rq_arg_stream; >>>> struct svc_cred *cred = &rqstp->rq_cred; >>>> + struct svc_xprt *xprt = rqstp->rq_xprt; >>>> u32 flavor, len; >>>> void *body; >>>> __be32 *p; >>>> @@ -856,14 +858,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp) >>>> if (cred->cr_group_info == NULL) >>>> return SVC_CLOSE; >>>> >>>> - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) { >>>> + if (xprt->xpt_ops->xpo_handshake) { >>>> p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8); >>>> if (!p) >>>> return SVC_CLOSE; >>>> + trace_svc_tls_start(xprt); >>>> *p++ = rpc_auth_null; >>>> *p++ = cpu_to_be32(8); >>>> memcpy(p, "STARTTLS", 8); >>>> + >>>> + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags); >>>> + svc_xprt_enqueue(xprt); >>>> } else { >>>> + trace_svc_tls_unavailable(xprt); >>>> if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, >>>> RPC_AUTH_NULL, NULL, 0) < 0) >>>> return SVC_CLOSE; >>>> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c >>>> index b6df73cb706a..16ba8d6ab20e 100644 >>>> --- a/net/sunrpc/svcsock.c >>>> +++ b/net/sunrpc/svcsock.c >>>> @@ -44,9 +44,11 @@ >>>> #include <net/tcp.h> >>>> #include <net/tcp_states.h> >>>> #include <net/tls.h> >>>> +#include <net/handshake.h> >>>> #include <linux/uaccess.h> >>>> #include <linux/highmem.h> >>>> #include <asm/ioctls.h> >>>> +#include <linux/key.h> >>>> >>>> #include <linux/sunrpc/types.h> >>>> #include <linux/sunrpc/clnt.h> >>>> @@ -64,6 +66,7 @@ >>>> >>>> #define RPCDBG_FACILITY RPCDBG_SVCXPRT >>>> >>>> +#define SVC_HANDSHAKE_TO (20U * HZ) >>>> >>>> static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, >>>> int flags); >>>> @@ -360,6 +363,8 @@ static void svc_data_ready(struct sock *sk) >>>> rmb(); >>>> svsk->sk_odata(sk); >>>> trace_svcsock_data_ready(&svsk->sk_xprt, 0); >>>> + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags)) >>>> + return; >>>> if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) >>>> svc_xprt_enqueue(&svsk->sk_xprt); >>>> } >>>> @@ -397,6 +402,89 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt) >>>> sock_no_linger(svsk->sk_sock->sk); >>>> } >>>> >>>> +/** >>>> + * svc_tcp_handshake_done - Handshake completion handler >>>> + * @data: address of xprt to wake >>>> + * @status: status of handshake >>>> + * @peerid: serial number of key containing the remote peer's identity >>>> + * >>>> + * If a security policy is specified as an export option, we don't >>>> + * have a specific export here to check. So we set a "TLS session >>>> + * is present" flag on the xprt and let an upper layer enforce local >>>> + * security policy. >>>> + */ >>>> +static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid) >>>> +{ >>>> + struct svc_xprt *xprt = data; >>>> + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); >>>> + >>>> + if (!status) { >>>> + if (peerid != TLS_NO_PEERID) >>>> + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags); >>>> + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags); >>>> + } >>>> + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); >>>> + complete_all(&svsk->sk_handshake_done); >>>> +} >>>> + >>>> +/** >>>> + * svc_tcp_handshake - Perform a transport-layer security handshake >>>> + * @xprt: connected transport endpoint >>>> + * >>>> + */ >>>> +static void svc_tcp_handshake(struct svc_xprt *xprt) >>>> +{ >>>> + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); >>>> + struct tls_handshake_args args = { >>>> + .ta_sock = svsk->sk_sock, >>>> + .ta_done = svc_tcp_handshake_done, >>>> + .ta_data = xprt, >>>> + }; >>>> + int ret; >>>> + >>>> + trace_svc_tls_upcall(xprt); >>>> + >>>> + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags); >>>> + init_completion(&svsk->sk_handshake_done); >>>> + smp_wmb(); >>>> + >>>> + ret = tls_server_hello_x509(&args, GFP_KERNEL); >>>> + if (ret) { >>>> + trace_svc_tls_not_started(xprt); >>>> + goto out_failed; >>>> + } >>>> + >>>> + ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done, >>>> + SVC_HANDSHAKE_TO); >>> >>> Just curious: is this 20s timeout mandated by the spec? >> >> The spec doesn't mandate a timeout. I simply wanted >> to guarantee forward progress. >> >> >>> It seems like a long time to block a kernel thread if so. >> >> It's about the same as the client side timeout, fwiw. >> >> >>> Do we need to be concerned >>> with DoS attacks here? Could a client initiate handshakes and then stop >>> communicating, forcing the server to tie up threads with these 20s >>> waits? >> >> I think a malicious client can do all kinds of similar things >> already. Do you have a particular timeout value in mind, or >> is there some other mechanism we can use to better bullet- >> proof this aspect of the handshake? I'm open to suggestion. >> > > I don't have any suggestions, just trying to speculate about ways this > could break. The only alternative I could see would be to defer the > connection somehow until the reply comes in so that the thread can do > other stuff in the meantime. The server has a deferral mechanism, funnily enough. ;-) But we could also just use a system workqueue. Note that the handshake upcall mechanism itself has a limit on the concurrent number of pending handshakes it will allow. That might be enough to prevent a DoS. > That's something we can potentially add > later if we decide it's necessary though. I shortened the timeout and added a comment suggesting this as future work. >>>> + if (ret <= 0) { >>>> + if (tls_handshake_cancel(svsk->sk_sock)) { >>>> + trace_svc_tls_timed_out(xprt); >>>> + goto out_close; >>>> + } >>>> + } >>>> + >>>> + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) { >>>> + trace_svc_tls_unavailable(xprt); >>>> + goto out_close; >>>> + } >>>> + >>>> + /* >>>> + * Mark the transport ready in case the remote sent RPC >>>> + * traffic before the kernel received the handshake >>>> + * completion downcall. >>>> + */ >>>> + set_bit(XPT_DATA, &xprt->xpt_flags); >>>> + svc_xprt_enqueue(xprt); >>>> + return; >>>> + >>>> +out_close: >>>> + set_bit(XPT_CLOSE, &xprt->xpt_flags); >>>> +out_failed: >>>> + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); >>>> + set_bit(XPT_DATA, &xprt->xpt_flags); >>>> + svc_xprt_enqueue(xprt); >>>> +} >>>> + >>>> /* >>>> * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo >>>> */ >>>> @@ -1260,6 +1348,7 @@ static const struct svc_xprt_ops svc_tcp_ops = { >>>> .xpo_has_wspace = svc_tcp_has_wspace, >>>> .xpo_accept = svc_tcp_accept, >>>> .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt, >>>> + .xpo_handshake = svc_tcp_handshake, >>>> }; >>>> >>>> static struct svc_xprt_class svc_tcp_class = { >>>> @@ -1584,6 +1673,8 @@ static void svc_sock_free(struct svc_xprt *xprt) >>>> { >>>> struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); >>>> >>>> + tls_handshake_cancel(svsk->sk_sock); >>>> + >>>> if (svsk->sk_sock->file) >>>> sockfd_put(svsk->sk_sock); >>>> else >>>> >>>> >>> >>> -- >>> Jeff Layton <jlayton@kernel.org> >> >> -- >> Chuck Lever >> >> > > -- > Jeff Layton <jlayton@kernel.org> -- Chuck Lever
On Tue, 2023-03-21 at 16:09 +0000, Chuck Lever III wrote: > > > On Mar 21, 2023, at 10:56 AM, Jeff Layton <jlayton@kernel.org> wrote: > > > > On Tue, 2023-03-21 at 14:03 +0000, Chuck Lever III wrote: > > > > > > > On Mar 21, 2023, at 7:43 AM, Jeff Layton <jlayton@kernel.org> wrote: > > > > > > > > On Mon, 2023-03-20 at 10:24 -0400, Chuck Lever wrote: > > > > > From: Chuck Lever <chuck.lever@oracle.com> > > > > > > > > > > This patch adds opportunitistic RPC-with-TLS to the Linux in-kernel > > > > > NFS server. If the client requests RPC-with-TLS and the user space > > > > > handshake agent is running, the server will set up a TLS session. > > > > > > > > > > There are no policy settings yet. For example, the server cannot > > > > > yet require the use of RPC-with-TLS to access its data. > > > > > > > > > > Signed-off-by: Chuck Lever <chuck.lever@oracle.com> > > > > > --- > > > > > include/linux/sunrpc/svc_xprt.h | 5 ++ > > > > > include/linux/sunrpc/svcsock.h | 2 + > > > > > include/trace/events/sunrpc.h | 16 ++++++- > > > > > net/sunrpc/svc_xprt.c | 5 ++ > > > > > net/sunrpc/svcauth_unix.c | 11 ++++- > > > > > net/sunrpc/svcsock.c | 91 +++++++++++++++++++++++++++++++++++++++ > > > > > 6 files changed, 125 insertions(+), 5 deletions(-) > > > > > > > > > > diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h > > > > > index 775368802762..867479204840 100644 > > > > > --- a/include/linux/sunrpc/svc_xprt.h > > > > > +++ b/include/linux/sunrpc/svc_xprt.h > > > > > @@ -27,7 +27,7 @@ struct svc_xprt_ops { > > > > > void (*xpo_detach)(struct svc_xprt *); > > > > > void (*xpo_free)(struct svc_xprt *); > > > > > void (*xpo_kill_temp_xprt)(struct svc_xprt *); > > > > > - void (*xpo_start_tls)(struct svc_xprt *); > > > > > + void (*xpo_handshake)(struct svc_xprt *xprt); > > > > > }; > > > > > > > > > > struct svc_xprt_class { > > > > > @@ -70,6 +70,9 @@ struct svc_xprt { > > > > > #define XPT_LOCAL 12 /* connection from loopback interface */ > > > > > #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ > > > > > #define XPT_CONG_CTRL 14 /* has congestion control */ > > > > > +#define XPT_HANDSHAKE 15 /* xprt requests a handshake */ > > > > > +#define XPT_TLS_SESSION 16 /* transport-layer security established */ > > > > > +#define XPT_PEER_AUTH 17 /* peer has been authenticated */ > > > > > > > > > > struct svc_serv *xpt_server; /* service for transport */ > > > > > atomic_t xpt_reserved; /* space on outq that is rsvd */ > > > > > diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h > > > > > index bcc555c7ae9c..1175e1c38bac 100644 > > > > > --- a/include/linux/sunrpc/svcsock.h > > > > > +++ b/include/linux/sunrpc/svcsock.h > > > > > @@ -38,6 +38,8 @@ struct svc_sock { > > > > > /* Number of queued send requests */ > > > > > atomic_t sk_sendqlen; > > > > > > > > > > + struct completion sk_handshake_done; > > > > > + > > > > > struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ > > > > > }; > > > > > > > > > > diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h > > > > > index cf286a0a17d0..2667a8db4811 100644 > > > > > --- a/include/trace/events/sunrpc.h > > > > > +++ b/include/trace/events/sunrpc.h > > > > > @@ -1948,7 +1948,10 @@ TRACE_EVENT(svc_stats_latency, > > > > > { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \ > > > > > { BIT(XPT_LOCAL), "LOCAL" }, \ > > > > > { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \ > > > > > - { BIT(XPT_CONG_CTRL), "CONG_CTRL" }) > > > > > + { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \ > > > > > + { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \ > > > > > + { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \ > > > > > + { BIT(XPT_PEER_AUTH), "PEER_AUTH" }) > > > > > > > > > > TRACE_EVENT(svc_xprt_create_err, > > > > > TP_PROTO( > > > > > @@ -2081,6 +2084,17 @@ DEFINE_SVC_XPRT_EVENT(close); > > > > > DEFINE_SVC_XPRT_EVENT(detach); > > > > > DEFINE_SVC_XPRT_EVENT(free); > > > > > > > > > > +#define DEFINE_SVC_TLS_EVENT(name) \ > > > > > + DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \ > > > > > + TP_PROTO(const struct svc_xprt *xprt), \ > > > > > + TP_ARGS(xprt)) > > > > > + > > > > > +DEFINE_SVC_TLS_EVENT(start); > > > > > +DEFINE_SVC_TLS_EVENT(upcall); > > > > > +DEFINE_SVC_TLS_EVENT(unavailable); > > > > > +DEFINE_SVC_TLS_EVENT(not_started); > > > > > +DEFINE_SVC_TLS_EVENT(timed_out); > > > > > + > > > > > TRACE_EVENT(svc_xprt_accept, > > > > > TP_PROTO( > > > > > const struct svc_xprt *xprt, > > > > > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c > > > > > index ba629297da4e..b68c04dbf876 100644 > > > > > --- a/net/sunrpc/svc_xprt.c > > > > > +++ b/net/sunrpc/svc_xprt.c > > > > > @@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) > > > > > > > > > > if (xpt_flags & BIT(XPT_BUSY)) > > > > > return false; > > > > > - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) > > > > > + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE))) > > > > > return true; > > > > > if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { > > > > > if (xprt->xpt_ops->xpo_has_wspace(xprt) && > > > > > @@ -829,6 +829,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) > > > > > module_put(xprt->xpt_class->xcl_owner); > > > > > } > > > > > svc_xprt_received(xprt); > > > > > + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) { > > > > > + xprt->xpt_ops->xpo_handshake(xprt); > > > > > + svc_xprt_received(xprt); > > > > > } else if (svc_xprt_reserve_slot(rqstp, xprt)) { > > > > > /* XPT_DATA|XPT_DEFERRED case: */ > > > > > dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", > > > > > diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c > > > > > index 983c5891cb56..374995201df4 100644 > > > > > --- a/net/sunrpc/svcauth_unix.c > > > > > +++ b/net/sunrpc/svcauth_unix.c > > > > > @@ -17,8 +17,9 @@ > > > > > #include <net/ipv6.h> > > > > > #include <linux/kernel.h> > > > > > #include <linux/user_namespace.h> > > > > > -#define RPCDBG_FACILITY RPCDBG_AUTH > > > > > +#include <trace/events/sunrpc.h> > > > > > > > > > > +#define RPCDBG_FACILITY RPCDBG_AUTH > > > > > > > > > > #include "netns.h" > > > > > > > > > > @@ -823,6 +824,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp) > > > > > { > > > > > struct xdr_stream *xdr = &rqstp->rq_arg_stream; > > > > > struct svc_cred *cred = &rqstp->rq_cred; > > > > > + struct svc_xprt *xprt = rqstp->rq_xprt; > > > > > u32 flavor, len; > > > > > void *body; > > > > > __be32 *p; > > > > > @@ -856,14 +858,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp) > > > > > if (cred->cr_group_info == NULL) > > > > > return SVC_CLOSE; > > > > > > > > > > - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) { > > > > > + if (xprt->xpt_ops->xpo_handshake) { > > > > > p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8); > > > > > if (!p) > > > > > return SVC_CLOSE; > > > > > + trace_svc_tls_start(xprt); > > > > > *p++ = rpc_auth_null; > > > > > *p++ = cpu_to_be32(8); > > > > > memcpy(p, "STARTTLS", 8); > > > > > + > > > > > + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags); > > > > > + svc_xprt_enqueue(xprt); > > > > > } else { > > > > > + trace_svc_tls_unavailable(xprt); > > > > > if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, > > > > > RPC_AUTH_NULL, NULL, 0) < 0) > > > > > return SVC_CLOSE; > > > > > diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c > > > > > index b6df73cb706a..16ba8d6ab20e 100644 > > > > > --- a/net/sunrpc/svcsock.c > > > > > +++ b/net/sunrpc/svcsock.c > > > > > @@ -44,9 +44,11 @@ > > > > > #include <net/tcp.h> > > > > > #include <net/tcp_states.h> > > > > > #include <net/tls.h> > > > > > +#include <net/handshake.h> > > > > > #include <linux/uaccess.h> > > > > > #include <linux/highmem.h> > > > > > #include <asm/ioctls.h> > > > > > +#include <linux/key.h> > > > > > > > > > > #include <linux/sunrpc/types.h> > > > > > #include <linux/sunrpc/clnt.h> > > > > > @@ -64,6 +66,7 @@ > > > > > > > > > > #define RPCDBG_FACILITY RPCDBG_SVCXPRT > > > > > > > > > > +#define SVC_HANDSHAKE_TO (20U * HZ) > > > > > > > > > > static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, > > > > > int flags); > > > > > @@ -360,6 +363,8 @@ static void svc_data_ready(struct sock *sk) > > > > > rmb(); > > > > > svsk->sk_odata(sk); > > > > > trace_svcsock_data_ready(&svsk->sk_xprt, 0); > > > > > + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags)) > > > > > + return; > > > > > if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) > > > > > svc_xprt_enqueue(&svsk->sk_xprt); > > > > > } > > > > > @@ -397,6 +402,89 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt) > > > > > sock_no_linger(svsk->sk_sock->sk); > > > > > } > > > > > > > > > > +/** > > > > > + * svc_tcp_handshake_done - Handshake completion handler > > > > > + * @data: address of xprt to wake > > > > > + * @status: status of handshake > > > > > + * @peerid: serial number of key containing the remote peer's identity > > > > > + * > > > > > + * If a security policy is specified as an export option, we don't > > > > > + * have a specific export here to check. So we set a "TLS session > > > > > + * is present" flag on the xprt and let an upper layer enforce local > > > > > + * security policy. > > > > > + */ > > > > > +static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid) > > > > > +{ > > > > > + struct svc_xprt *xprt = data; > > > > > + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); > > > > > + > > > > > + if (!status) { > > > > > + if (peerid != TLS_NO_PEERID) > > > > > + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags); > > > > > + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags); > > > > > + } > > > > > + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); > > > > > + complete_all(&svsk->sk_handshake_done); > > > > > +} > > > > > + > > > > > +/** > > > > > + * svc_tcp_handshake - Perform a transport-layer security handshake > > > > > + * @xprt: connected transport endpoint > > > > > + * > > > > > + */ > > > > > +static void svc_tcp_handshake(struct svc_xprt *xprt) > > > > > +{ > > > > > + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); > > > > > + struct tls_handshake_args args = { > > > > > + .ta_sock = svsk->sk_sock, > > > > > + .ta_done = svc_tcp_handshake_done, > > > > > + .ta_data = xprt, > > > > > + }; > > > > > + int ret; > > > > > + > > > > > + trace_svc_tls_upcall(xprt); > > > > > + > > > > > + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags); > > > > > + init_completion(&svsk->sk_handshake_done); > > > > > + smp_wmb(); > > > > > + > > > > > + ret = tls_server_hello_x509(&args, GFP_KERNEL); > > > > > + if (ret) { > > > > > + trace_svc_tls_not_started(xprt); > > > > > + goto out_failed; > > > > > + } > > > > > + > > > > > + ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done, > > > > > + SVC_HANDSHAKE_TO); > > > > > > > > Just curious: is this 20s timeout mandated by the spec? > > > > > > The spec doesn't mandate a timeout. I simply wanted > > > to guarantee forward progress. > > > > > > > > > > It seems like a long time to block a kernel thread if so. > > > > > > It's about the same as the client side timeout, fwiw. > > > > > > > > > > Do we need to be concerned > > > > with DoS attacks here? Could a client initiate handshakes and then stop > > > > communicating, forcing the server to tie up threads with these 20s > > > > waits? > > > > > > I think a malicious client can do all kinds of similar things > > > already. Do you have a particular timeout value in mind, or > > > is there some other mechanism we can use to better bullet- > > > proof this aspect of the handshake? I'm open to suggestion. > > > > > > > I don't have any suggestions, just trying to speculate about ways this > > could break. The only alternative I could see would be to defer the > > connection somehow until the reply comes in so that the thread can do > > other stuff in the meantime. > > The server has a deferral mechanism, funnily enough. ;-) But > we could also just use a system workqueue. > Yeah. I wasn't sure if it was suitable for use here, but using the deferral infrastructure might be the way to go. That code is quite difficult to follow though. It's one of sunrpc's dustier corners. I'm not sure I follow what you're thinking on workqueues. Do you mean "defer" the svc_rqst and then queue it to a workqueue when the reply comes in? That might be ok too. We did have those nfsd workqueue patches that we could base the design on. > Note that the handshake upcall mechanism itself has a limit > on the concurrent number of pending handshakes it will allow. > That might be enough to prevent a DoS. > What happens when you hit the limit? Would it be possible for someone malicious to block legitimate TLS connections from getting through by stalling the handshake on a bunch of connections? It might be worthwhile to consider some sort of per-client-address limit as well. Nonetheless, I don't see this as a show stopper that would keep us from merging a working implementation. We can add this sort of mitigation later too. > > That's something we can potentially add > > later if we decide it's necessary though. > > I shortened the timeout and added a comment suggesting this > as future work. > Sounds good. > > > > > > + if (ret <= 0) { > > > > > + if (tls_handshake_cancel(svsk->sk_sock)) { > > > > > + trace_svc_tls_timed_out(xprt); > > > > > + goto out_close; > > > > > + } > > > > > + } > > > > > + > > > > > + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) { > > > > > + trace_svc_tls_unavailable(xprt); > > > > > + goto out_close; > > > > > + } > > > > > + > > > > > + /* > > > > > + * Mark the transport ready in case the remote sent RPC > > > > > + * traffic before the kernel received the handshake > > > > > + * completion downcall. > > > > > + */ > > > > > + set_bit(XPT_DATA, &xprt->xpt_flags); > > > > > + svc_xprt_enqueue(xprt); > > > > > + return; > > > > > + > > > > > +out_close: > > > > > + set_bit(XPT_CLOSE, &xprt->xpt_flags); > > > > > +out_failed: > > > > > + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); > > > > > + set_bit(XPT_DATA, &xprt->xpt_flags); > > > > > + svc_xprt_enqueue(xprt); > > > > > +} > > > > > + > > > > > /* > > > > > * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo > > > > > */ > > > > > @@ -1260,6 +1348,7 @@ static const struct svc_xprt_ops svc_tcp_ops = { > > > > > .xpo_has_wspace = svc_tcp_has_wspace, > > > > > .xpo_accept = svc_tcp_accept, > > > > > .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt, > > > > > + .xpo_handshake = svc_tcp_handshake, > > > > > }; > > > > > > > > > > static struct svc_xprt_class svc_tcp_class = { > > > > > @@ -1584,6 +1673,8 @@ static void svc_sock_free(struct svc_xprt *xprt) > > > > > { > > > > > struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); > > > > > > > > > > + tls_handshake_cancel(svsk->sk_sock); > > > > > + > > > > > if (svsk->sk_sock->file) > > > > > sockfd_put(svsk->sk_sock); > > > > > else > > > > > > > > > > > > > > > > > > -- > > > > Jeff Layton <jlayton@kernel.org> > > > > > > -- > > > Chuck Lever > > > > > > > > > > -- > > Jeff Layton <jlayton@kernel.org> > > -- > Chuck Lever > > >
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 775368802762..867479204840 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -27,7 +27,7 @@ struct svc_xprt_ops { void (*xpo_detach)(struct svc_xprt *); void (*xpo_free)(struct svc_xprt *); void (*xpo_kill_temp_xprt)(struct svc_xprt *); - void (*xpo_start_tls)(struct svc_xprt *); + void (*xpo_handshake)(struct svc_xprt *xprt); }; struct svc_xprt_class { @@ -70,6 +70,9 @@ struct svc_xprt { #define XPT_LOCAL 12 /* connection from loopback interface */ #define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */ #define XPT_CONG_CTRL 14 /* has congestion control */ +#define XPT_HANDSHAKE 15 /* xprt requests a handshake */ +#define XPT_TLS_SESSION 16 /* transport-layer security established */ +#define XPT_PEER_AUTH 17 /* peer has been authenticated */ struct svc_serv *xpt_server; /* service for transport */ atomic_t xpt_reserved; /* space on outq that is rsvd */ diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index bcc555c7ae9c..1175e1c38bac 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -38,6 +38,8 @@ struct svc_sock { /* Number of queued send requests */ atomic_t sk_sendqlen; + struct completion sk_handshake_done; + struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ }; diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index cf286a0a17d0..2667a8db4811 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1948,7 +1948,10 @@ TRACE_EVENT(svc_stats_latency, { BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \ { BIT(XPT_LOCAL), "LOCAL" }, \ { BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \ - { BIT(XPT_CONG_CTRL), "CONG_CTRL" }) + { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \ + { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \ + { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \ + { BIT(XPT_PEER_AUTH), "PEER_AUTH" }) TRACE_EVENT(svc_xprt_create_err, TP_PROTO( @@ -2081,6 +2084,17 @@ DEFINE_SVC_XPRT_EVENT(close); DEFINE_SVC_XPRT_EVENT(detach); DEFINE_SVC_XPRT_EVENT(free); +#define DEFINE_SVC_TLS_EVENT(name) \ + DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \ + TP_PROTO(const struct svc_xprt *xprt), \ + TP_ARGS(xprt)) + +DEFINE_SVC_TLS_EVENT(start); +DEFINE_SVC_TLS_EVENT(upcall); +DEFINE_SVC_TLS_EVENT(unavailable); +DEFINE_SVC_TLS_EVENT(not_started); +DEFINE_SVC_TLS_EVENT(timed_out); + TRACE_EVENT(svc_xprt_accept, TP_PROTO( const struct svc_xprt *xprt, diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index ba629297da4e..b68c04dbf876 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) if (xpt_flags & BIT(XPT_BUSY)) return false; - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE))) return true; if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { if (xprt->xpt_ops->xpo_has_wspace(xprt) && @@ -829,6 +829,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) module_put(xprt->xpt_class->xcl_owner); } svc_xprt_received(xprt); + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) { + xprt->xpt_ops->xpo_handshake(xprt); + svc_xprt_received(xprt); } else if (svc_xprt_reserve_slot(rqstp, xprt)) { /* XPT_DATA|XPT_DEFERRED case: */ dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 983c5891cb56..374995201df4 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -17,8 +17,9 @@ #include <net/ipv6.h> #include <linux/kernel.h> #include <linux/user_namespace.h> -#define RPCDBG_FACILITY RPCDBG_AUTH +#include <trace/events/sunrpc.h> +#define RPCDBG_FACILITY RPCDBG_AUTH #include "netns.h" @@ -823,6 +824,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct svc_cred *cred = &rqstp->rq_cred; + struct svc_xprt *xprt = rqstp->rq_xprt; u32 flavor, len; void *body; __be32 *p; @@ -856,14 +858,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp) if (cred->cr_group_info == NULL) return SVC_CLOSE; - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) { + if (xprt->xpt_ops->xpo_handshake) { p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8); if (!p) return SVC_CLOSE; + trace_svc_tls_start(xprt); *p++ = rpc_auth_null; *p++ = cpu_to_be32(8); memcpy(p, "STARTTLS", 8); + + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); } else { + trace_svc_tls_unavailable(xprt); if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, RPC_AUTH_NULL, NULL, 0) < 0) return SVC_CLOSE; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b6df73cb706a..16ba8d6ab20e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -44,9 +44,11 @@ #include <net/tcp.h> #include <net/tcp_states.h> #include <net/tls.h> +#include <net/handshake.h> #include <linux/uaccess.h> #include <linux/highmem.h> #include <asm/ioctls.h> +#include <linux/key.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/clnt.h> @@ -64,6 +66,7 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT +#define SVC_HANDSHAKE_TO (20U * HZ) static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, int flags); @@ -360,6 +363,8 @@ static void svc_data_ready(struct sock *sk) rmb(); svsk->sk_odata(sk); trace_svcsock_data_ready(&svsk->sk_xprt, 0); + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags)) + return; if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) svc_xprt_enqueue(&svsk->sk_xprt); } @@ -397,6 +402,89 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt) sock_no_linger(svsk->sk_sock->sk); } +/** + * svc_tcp_handshake_done - Handshake completion handler + * @data: address of xprt to wake + * @status: status of handshake + * @peerid: serial number of key containing the remote peer's identity + * + * If a security policy is specified as an export option, we don't + * have a specific export here to check. So we set a "TLS session + * is present" flag on the xprt and let an upper layer enforce local + * security policy. + */ +static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid) +{ + struct svc_xprt *xprt = data; + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + + if (!status) { + if (peerid != TLS_NO_PEERID) + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags); + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags); + } + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); + complete_all(&svsk->sk_handshake_done); +} + +/** + * svc_tcp_handshake - Perform a transport-layer security handshake + * @xprt: connected transport endpoint + * + */ +static void svc_tcp_handshake(struct svc_xprt *xprt) +{ + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + struct tls_handshake_args args = { + .ta_sock = svsk->sk_sock, + .ta_done = svc_tcp_handshake_done, + .ta_data = xprt, + }; + int ret; + + trace_svc_tls_upcall(xprt); + + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags); + init_completion(&svsk->sk_handshake_done); + smp_wmb(); + + ret = tls_server_hello_x509(&args, GFP_KERNEL); + if (ret) { + trace_svc_tls_not_started(xprt); + goto out_failed; + } + + ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done, + SVC_HANDSHAKE_TO); + if (ret <= 0) { + if (tls_handshake_cancel(svsk->sk_sock)) { + trace_svc_tls_timed_out(xprt); + goto out_close; + } + } + + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) { + trace_svc_tls_unavailable(xprt); + goto out_close; + } + + /* + * Mark the transport ready in case the remote sent RPC + * traffic before the kernel received the handshake + * completion downcall. + */ + set_bit(XPT_DATA, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); + return; + +out_close: + set_bit(XPT_CLOSE, &xprt->xpt_flags); +out_failed: + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); + set_bit(XPT_DATA, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); +} + /* * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo */ @@ -1260,6 +1348,7 @@ static const struct svc_xprt_ops svc_tcp_ops = { .xpo_has_wspace = svc_tcp_has_wspace, .xpo_accept = svc_tcp_accept, .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt, + .xpo_handshake = svc_tcp_handshake, }; static struct svc_xprt_class svc_tcp_class = { @@ -1584,6 +1673,8 @@ static void svc_sock_free(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + tls_handshake_cancel(svsk->sk_sock); + if (svsk->sk_sock->file) sockfd_put(svsk->sk_sock); else