diff mbox

[v2] crypto: AF_ALG - race-free access of encryption flag

Message ID 1943686.EAKghbSqDq@positron.chronox.de (mailing list archive)
State Changes Requested
Headers show

Commit Message

Stephan Mueller Nov. 28, 2017, 9:33 p.m. UTC
Hi Herbert,

I verified the correctnes of the patch with Eric's test program.
Without the patch, the issue is present. With the patch, the kernel
happily lives ever after.

Changes v2: change the submission into a proper patch

Ciao
Stephan

---8<---

The function af_alg_get_rsgl may sleep to wait for additional data. In
this case, the socket lock may be dropped. This allows user space to
change values in the socket data structure. Hence, all variables of the
context that are needed before and after the af_alg_get_rsgl must be
copied into a local variable.

This issue applies to the ctx->enc variable only. Therefore, this
value is copied into a local variable that is used for all operations
before and after the potential sleep and lock release. This implies that
any changes on this variable while the kernel waits for data will only
be picked up in another recvmsg operation.

Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: <stable@vger.kernel.org> # v4.14+
Signed-off-by: Stephan Mueller <smueller@chronox.de>
---
 crypto/algif_aead.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

Comments

Eric Biggers Nov. 28, 2017, 10:40 p.m. UTC | #1
On Tue, Nov 28, 2017 at 10:33:09PM +0100, Stephan Müller wrote:
> Hi Herbert,
> 
> I verified the correctnes of the patch with Eric's test program.
> Without the patch, the issue is present. With the patch, the kernel
> happily lives ever after.
> 
> Changes v2: change the submission into a proper patch
> 
> Ciao
> Stephan
> 
> ---8<---
> 
> The function af_alg_get_rsgl may sleep to wait for additional data. In
> this case, the socket lock may be dropped. This allows user space to
> change values in the socket data structure. Hence, all variables of the
> context that are needed before and after the af_alg_get_rsgl must be
> copied into a local variable.
> 
> This issue applies to the ctx->enc variable only. Therefore, this
> value is copied into a local variable that is used for all operations
> before and after the potential sleep and lock release. This implies that
> any changes on this variable while the kernel waits for data will only
> be picked up in another recvmsg operation.
> 

This isn't enough because ->aead_assoclen is also affected.  Below is a program
which causes a crash even with your patch applied.

Also I feel this is just papering over the real problem which is that the wait
is in the wrong place -- and therefore the socket lock is being dropped in the
wrong place.  If it's necessary at all it seems it should happen earlier, before
the stuff from the 'ctx' starts being used.  At the very least, it is very
difficult to understand whether the current code is correct or not.  (Which
usually means it's not.)

#include <linux/if_alg.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>

#define SOL_ALG 0x117
#define ALG_SET_AEAD_ASSOCLEN 4

int main()
{
	srand(time(NULL));
        for (;;) {
                int algfd, reqfd;
                char buf[12];
                char key[16] = { 0 };
                struct sockaddr_alg addr = {
                        .salg_type = "aead",
                        .salg_name = "gcm(aes)",
                };
                struct {
                        struct cmsghdr hdr;
                        __u32 op;
                } set_op = {
                        .hdr = {
                                .cmsg_len = sizeof(set_op),
                                .cmsg_level = SOL_ALG,
                                .cmsg_type = ALG_SET_OP,
                        },
			.op = ALG_OP_ENCRYPT,
                };
                struct msghdr set_op_msg = {
                        .msg_control = &set_op,
                        .msg_controllen = sizeof(set_op),
                };
                struct {
                        struct cmsghdr hdr1;
                        __u32 assoclen;
                        struct cmsghdr hdr2;
                        __u32 op;
                } set_assoclen = {
                        .hdr1 = {
                                .cmsg_len = sizeof(set_assoclen),
                                .cmsg_level = SOL_ALG,
                                .cmsg_type = ALG_SET_AEAD_ASSOCLEN,
                        },
			.assoclen = 1000,
                        .hdr2 = {
                                .cmsg_len = sizeof(set_assoclen),
                                .cmsg_level = SOL_ALG,
                                .cmsg_type = ALG_SET_OP,
                        },
			.op = ALG_OP_ENCRYPT,
                };
                struct msghdr set_assoclen_msg = {
			.msg_iov = &(struct iovec) { .iov_base = buf,
						     .iov_len = 64 },
			.msg_iovlen = 1,
                        .msg_control = &set_assoclen,
                        .msg_controllen = sizeof(set_assoclen),
                };

                algfd = socket(AF_ALG, SOCK_SEQPACKET, 0);

                bind(algfd, (void *)&addr, sizeof(addr));

                setsockopt(algfd, SOL_ALG, ALG_SET_KEY, key, sizeof(key));

                reqfd = accept(algfd, NULL, NULL);

                sendmsg(reqfd, &set_op_msg, 0);

                if (fork() == 0) {
			usleep(rand() % 10000);
                        sendmsg(reqfd, &set_assoclen_msg, 0);
                        break;
                }

		usleep(rand() % 10000);
                read(reqfd, buf, sizeof(buf));
                wait(NULL);
		close(algfd);
		close(reqfd);
        }
}
Herbert Xu Nov. 28, 2017, 11:02 p.m. UTC | #2
On Tue, Nov 28, 2017 at 10:33:09PM +0100, Stephan Müller wrote:
> Hi Herbert,
> 
> I verified the correctnes of the patch with Eric's test program.
> Without the patch, the issue is present. With the patch, the kernel
> happily lives ever after.
> 
> Changes v2: change the submission into a proper patch
> 
> Ciao
> Stephan
> 
> ---8<---
> 
> The function af_alg_get_rsgl may sleep to wait for additional data. In
> this case, the socket lock may be dropped. This allows user space to
> change values in the socket data structure. Hence, all variables of the
> context that are needed before and after the af_alg_get_rsgl must be
> copied into a local variable.
> 
> This issue applies to the ctx->enc variable only. Therefore, this
> value is copied into a local variable that is used for all operations
> before and after the potential sleep and lock release. This implies that
> any changes on this variable while the kernel waits for data will only
> be picked up in another recvmsg operation.
> 
> Reported-by: syzbot <syzkaller@googlegroups.com>
> Cc: <stable@vger.kernel.org> # v4.14+
> Signed-off-by: Stephan Mueller <smueller@chronox.de>

> ---
>  crypto/algif_aead.c | 9 +++++----
>  1 file changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
> index 7d2d162666e5..4ba13c0b97ca 100644
> --- a/crypto/algif_aead.c
> +++ b/crypto/algif_aead.c
> @@ -110,6 +110,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
>  	size_t outlen = 0;		/* [out] RX bufs produced by kernel */
>  	size_t usedpages = 0;		/* [in]  RX bufs to be used from user */
>  	size_t processed = 0;		/* [in]  TX bufs to be consumed */
> +	bool enc = ctx->enc;		/* prevent race if sock lock dropped */

This is wrong.  You can't fetch ctx->enc before you wait.  It has
to be done after the wait as otherwise ctx->enc may not even have
been initialised.

In fact the position of the wait call is just wrong.  You should only
ever do at most one wait at the start of the recvmsg call.  Once you
have enough data from that wait no further waits should be done for
that recvmsg call.  The original commit had it right with just one
wait at the start of recvmsg (400c40cf78da).

Fetching of ctx state should be performed after this wait.

Cheers,
Stephan Mueller Nov. 29, 2017, 6:48 a.m. UTC | #3
Am Mittwoch, 29. November 2017, 00:02:40 CET schrieb Herbert Xu:

Hi Herbert,

> > --- a/crypto/algif_aead.c
> > +++ b/crypto/algif_aead.c
> > @@ -110,6 +110,7 @@ static int _aead_recvmsg(struct socket *sock, struct
> > msghdr *msg,> 
> >  	size_t outlen = 0;		/* [out] RX bufs produced by kernel */
> >  	size_t usedpages = 0;		/* [in]  RX bufs to be used from user */
> >  	size_t processed = 0;		/* [in]  TX bufs to be consumed */
> > 
> > +	bool enc = ctx->enc;		/* prevent race if sock lock dropped */
> 
> This is wrong.  You can't fetch ctx->enc before you wait.  It has
> to be done after the wait as otherwise ctx->enc may not even have
> been initialised.

All ctx variables are initialized in aead_accept_parent_nokey. Thus, if no 
sendmsg call is invoked by user space, at least a valid operation is 
performed.
> 
> In fact the position of the wait call is just wrong.  You should only
> ever do at most one wait at the start of the recvmsg call.  Once you
> have enough data from that wait no further waits should be done for
> that recvmsg call.  The original commit had it right with just one
> wait at the start of recvmsg (400c40cf78da).

I am wondering about the wait call inside the while () in af_alg_get_rsgl: 
this has been taken from algif_skcipher. It would seem that moving the wait 
call to the beginning of the recvmsg call would be applicable to skcipher as 
well. I.e. the wait call should be removed from af_alg_get_rsgl entirely and 
placed at the beginning the recvmsg function in both, the aead and skcipher 
code paths.
> 
> Fetching of ctx state should be performed after this wait.
> 
> Cheers,



Ciao
Stephan
Herbert Xu Nov. 29, 2017, 7:10 a.m. UTC | #4
On Wed, Nov 29, 2017 at 07:48:53AM +0100, Stephan Mueller wrote:
> Am Mittwoch, 29. November 2017, 00:02:40 CET schrieb Herbert Xu:
>
> > This is wrong.  You can't fetch ctx->enc before you wait.  It has
> > to be done after the wait as otherwise ctx->enc may not even have
> > been initialised.
> 
> All ctx variables are initialized in aead_accept_parent_nokey. Thus, if no 
> sendmsg call is invoked by user space, at least a valid operation is 
> performed.

I didn't mean initialised literally, I meant the value the user
specified in sendmsg.  If the recvmsg call precedes sendmsg then
you can't possibly get the value that the user supplied.

> I am wondering about the wait call inside the while () in af_alg_get_rsgl: 
> this has been taken from algif_skcipher. It would seem that moving the wait 
> call to the beginning of the recvmsg call would be applicable to skcipher as 
> well. I.e. the wait call should be removed from af_alg_get_rsgl entirely and 
> placed at the beginning the recvmsg function in both, the aead and skcipher 
> code paths.

It sort of worked for skcipher because it didn't care if ctx->enc
or even ctx->iv changed midstream.  But even there I don't think
we need to wait a second time.  In fact waiting a second time could
result in a dead-lock if no sendmsg call came around.

So we should definitely wait only once for both aead and skcipher.

Back to the original problem of ctx->enc changing midstream.  I
suppose it could actually be valid, e.g., when one request ended
and you wish to sendmsg a second request while the first is yet
to be started by recvmsg.

So perhaps we can fix it by actually putting such request-specific
information into a list along with the data rather than storing
it in ctx directly as we do now.

But anyway this isn't suitable for stable where we should just fix
it by making it not crash.

Thanks,
Stephan Mueller Nov. 29, 2017, 7:17 a.m. UTC | #5
Am Mittwoch, 29. November 2017, 08:10:49 CET schrieb Herbert Xu:

Hi Herbert,

> 
> But anyway this isn't suitable for stable where we should just fix
> it by making it not crash.

I will send a patch right away moving the wait out.

Later on I will elaborate on your suggestion to move the context-specific 
information on a list.

Ciao
Stephan
Stephan Mueller Nov. 29, 2017, 11:05 a.m. UTC | #6
Am Mittwoch, 29. November 2017, 08:10:49 CET schrieb Herbert Xu:

Hi Herbert,

> 
> It sort of worked for skcipher because it didn't care if ctx->enc
> or even ctx->iv changed midstream.  But even there I don't think
> we need to wait a second time.  In fact waiting a second time could
> result in a dead-lock if no sendmsg call came around.

Shouldn't we then create a patch for the pre-4.14 algif_skcipher code that 
moves the wait out of the while loop to the beginning of the function in 
recvmsg?

Ciao
Stephan
Herbert Xu Nov. 29, 2017, 12:17 p.m. UTC | #7
On Wed, Nov 29, 2017 at 12:05:13PM +0100, Stephan Müller wrote:
>
> Shouldn't we then create a patch for the pre-4.14 algif_skcipher code that 
> moves the wait out of the while loop to the beginning of the function in 
> recvmsg?

When I said dead-lock I just meant that the recvmsg will block
indefinitely.  It can still be interrupted from user-space.

As the behaviour is currently undefined anyway, I don't think we
need to backport this fix.  At least not until we maintain a list
of ctx->enc like we discussed.

Cheers,
diff mbox

Patch

diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 7d2d162666e5..4ba13c0b97ca 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -110,6 +110,7 @@  static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 	size_t outlen = 0;		/* [out] RX bufs produced by kernel */
 	size_t usedpages = 0;		/* [in]  RX bufs to be used from user */
 	size_t processed = 0;		/* [in]  TX bufs to be consumed */
+	bool enc = ctx->enc;		/* prevent race if sock lock dropped */
 
 	/*
 	 * Data length provided by caller via sendmsg/sendpage that has not
@@ -137,7 +138,7 @@  static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 	 * buffer provides the tag which is consumed resulting in only the
 	 * plaintext without a buffer for the tag returned to the caller.
 	 */
-	if (ctx->enc)
+	if (enc)
 		outlen = used + as;
 	else
 		outlen = used - as;
@@ -211,7 +212,7 @@  static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 	/* Use the RX SGL as source (and destination) for crypto op. */
 	rsgl_src = areq->first_rsgl.sgl.sg;
 
-	if (ctx->enc) {
+	if (enc) {
 		/*
 		 * Encryption operation - The in-place cipher operation is
 		 * achieved by the following operation:
@@ -288,7 +289,7 @@  static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 		aead_request_set_callback(&areq->cra_u.aead_req,
 					  CRYPTO_TFM_REQ_MAY_BACKLOG,
 					  af_alg_async_cb, areq);
-		err = ctx->enc ? crypto_aead_encrypt(&areq->cra_u.aead_req) :
+		err = enc ? crypto_aead_encrypt(&areq->cra_u.aead_req) :
 				 crypto_aead_decrypt(&areq->cra_u.aead_req);
 
 		/* AIO operation in progress */
@@ -305,7 +306,7 @@  static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
 		aead_request_set_callback(&areq->cra_u.aead_req,
 					  CRYPTO_TFM_REQ_MAY_BACKLOG,
 					  crypto_req_done, &ctx->wait);
-		err = crypto_wait_req(ctx->enc ?
+		err = crypto_wait_req(enc ?
 				crypto_aead_encrypt(&areq->cra_u.aead_req) :
 				crypto_aead_decrypt(&areq->cra_u.aead_req),
 				&ctx->wait);