diff mbox

[v2,2/2] crypto: algif - change algif_skcipher to be asynchronous

Message ID 20150309205525.9648.96093.stgit@tstruk-mobl1 (mailing list archive)
State RFC
Delegated to: Herbert Xu
Headers show

Commit Message

Tadeusz Struk March 9, 2015, 8:55 p.m. UTC
The way the algif_skcipher works currently is that on sendmsg/sendpage it
builds an sgl for the input data and then on read/recvmsg it sends the job
for encryption putting the user to sleep till the data is processed.
This way it can only handle one job at a given time.
This patch changes it to be asynchronous by adding AIO support.

Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
---
 crypto/algif_skcipher.c |  233 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 226 insertions(+), 7 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Stephan Mueller March 10, 2015, 7:41 a.m. UTC | #1
Am Montag, 9. März 2015, 13:55:25 schrieb Tadeusz Struk:

Hi Tadeusz,


>The way the algif_skcipher works currently is that on sendmsg/sendpage
>it builds an sgl for the input data and then on read/recvmsg it sends
>the job for encryption putting the user to sleep till the data is
>processed. This way it can only handle one job at a given time.
>This patch changes it to be asynchronous by adding AIO support.
>
>Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com>
>---
> crypto/algif_skcipher.c |  233
>++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 226
>insertions(+), 7 deletions(-)
>
>diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
>index 0c8a1e5..790ed35 100644
>--- a/crypto/algif_skcipher.c
>+++ b/crypto/algif_skcipher.c
>@@ -39,6 +39,7 @@ struct skcipher_ctx {
>
> 	struct af_alg_completion completion;
>
>+	atomic_t inflight;
> 	unsigned used;
>
> 	unsigned int len;
>@@ -49,9 +50,65 @@ struct skcipher_ctx {
> 	struct ablkcipher_request req;
> };
>
>+struct skcipher_async_rsgl {
>+	struct af_alg_sgl sgl;
>+	struct list_head list;
>+};
>+
>+struct skcipher_async_req {
>+	struct kiocb *iocb;
>+	struct skcipher_async_rsgl first_sgl;
>+	struct list_head list;
>+	struct scatterlist *tsg;
>+	char iv[];
>+};
>+
>+#define GET_SREQ(areq, ctx) (struct skcipher_async_req *)((char *)areq
>+ \ +	crypto_ablkcipher_reqsize(crypto_ablkcipher_reqtfm(&ctx->req)))
>+
>+#define GET_REQ_SIZE(ctx) \
>+	crypto_ablkcipher_reqsize(crypto_ablkcipher_reqtfm(&ctx->req))
>+
>+#define GET_IV_SIZE(ctx) \
>+	crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(&ctx->req))

Wouldn't be an inline function better?
>+
> #define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \
> 		      sizeof(struct scatterlist) - 1)
>
>+static void skcipher_free_async_sgls(struct skcipher_async_req *sreq)
>+{
>+	struct skcipher_async_rsgl *rsgl, *tmp;
>+	struct scatterlist *sgl;
>+	struct scatterlist *sg;
>+	int i, n;
>+
>+	list_for_each_entry_safe(rsgl, tmp, &sreq->list, list) {
>+		af_alg_free_sg(&rsgl->sgl);
>+		if (rsgl != &sreq->first_sgl)
>+			kfree(rsgl);
>+	}
>+	sgl = sreq->tsg;
>+	n = sg_nents(sgl);
>+	for_each_sg(sgl, sg, n, i)
>+		put_page(sg_page(sg));
>+
>+	kfree(sreq->tsg);
>+}
>+
>+static void skcipher_async_cb(struct crypto_async_request *req, int
>err) +{
>+	struct sock *sk = req->data;
>+	struct alg_sock *ask = alg_sk(sk);
>+	struct skcipher_ctx *ctx = ask->private;
>+	struct skcipher_async_req *sreq = GET_SREQ(req, ctx);
>+	struct kiocb *iocb = sreq->iocb;
>+
>+	atomic_dec(&ctx->inflight);
>+	skcipher_free_async_sgls(sreq);
>+	kfree(req);
>+	aio_complete(iocb, err, err);
>+}
>+
> static inline int skcipher_sndbuf(struct sock *sk)
> {
> 	struct alg_sock *ask = alg_sk(sk);
>@@ -96,7 +153,7 @@ static int skcipher_alloc_sgl(struct sock *sk)
> 	return 0;
> }
>
>-static void skcipher_pull_sgl(struct sock *sk, int used)
>+static void skcipher_pull_sgl(struct sock *sk, int used, int put)
> {
> 	struct alg_sock *ask = alg_sk(sk);
> 	struct skcipher_ctx *ctx = ask->private;
>@@ -123,8 +180,8 @@ static void skcipher_pull_sgl(struct sock *sk, int
>used)
>
> 			if (sg[i].length)
> 				return;
>-
>-			put_page(sg_page(sg + i));
>+			if (put)
>+				put_page(sg_page(sg + i));
> 			sg_assign_page(sg + i, NULL);
> 		}
>
>@@ -143,7 +200,7 @@ static void skcipher_free_sgl(struct sock *sk)
> 	struct alg_sock *ask = alg_sk(sk);
> 	struct skcipher_ctx *ctx = ask->private;
>
>-	skcipher_pull_sgl(sk, ctx->used);
>+	skcipher_pull_sgl(sk, ctx->used, 1);
> }
>
> static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags)
>@@ -424,8 +481,149 @@ unlock:
> 	return err ?: size;
> }
>
>-static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock,
>-			    struct msghdr *msg, size_t ignored, int 
flags)
>+static int skcipher_all_sg_nents(struct skcipher_ctx *ctx)
>+{
>+	struct skcipher_sg_list *sgl;
>+	struct scatterlist *sg;
>+	int nents = 0;
>+
>+	list_for_each_entry(sgl, &ctx->tsgl, list) {
>+		sg = sgl->sg;
>+
>+		while (!sg->length)
>+			sg++;
>+
>+		nents += sg_nents(sg);
>+	}
>+	return nents;
>+}
>+
>+static int skcipher_recvmsg_async(struct kiocb *iocb, struct socket
>*sock, +				  struct msghdr *msg, int flags)
>+{
>+	struct sock *sk = sock->sk;
>+	struct alg_sock *ask = alg_sk(sk);
>+	struct skcipher_ctx *ctx = ask->private;
>+	struct skcipher_sg_list *sgl;
>+	struct scatterlist *sg;
>+	struct skcipher_async_req *sreq;
>+	struct ablkcipher_request *req;
>+	struct skcipher_async_rsgl *last_rsgl = NULL;
>+	unsigned int len = 0, tx_nents = skcipher_all_sg_nents(ctx);
>+	unsigned int reqlen = sizeof(struct skcipher_async_req) +
>+				GET_REQ_SIZE(ctx) + GET_IV_SIZE(ctx);
>+	int i = 0;
>+	int err = -ENOMEM;
>+
>+	lock_sock(sk);
>+	req = kmalloc(reqlen, GFP_KERNEL);
>+	if (unlikely(!req))
>+		goto unlock;
>+
>+	sreq = GET_SREQ(req, ctx);
>+	sreq->iocb = iocb;
>+	memset(&sreq->first_sgl, '\0', sizeof(struct 
skcipher_async_rsgl));
>+	INIT_LIST_HEAD(&sreq->list);
>+	sreq->tsg = kcalloc(tx_nents, sizeof(*sg), GFP_KERNEL);

Why do you use kcalloc here and not kmalloc (why is there a memset 
needed considering that sg_init_table is called?

>+	if (unlikely(!sreq->tsg)) {
>+		kfree(req);
>+		goto unlock;
>+	}
>+	sg_init_table(sreq->tsg, tx_nents);
>+	memcpy(sreq->iv, ctx->iv, GET_IV_SIZE(ctx));
>+	ablkcipher_request_set_tfm(req, crypto_ablkcipher_reqtfm(&ctx-
>req));
>+	ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
>+					skcipher_async_cb, sk);
>+
>+	while (iov_iter_count(&msg->msg_iter)) {
>+		struct skcipher_async_rsgl *rsgl;
>+		unsigned long used;
>+
>+		if (!ctx->used) {
>+			err = skcipher_wait_for_data(sk, flags);
>+			if (err)
>+				goto free;
>+		}
>+		sgl = list_first_entry(&ctx->tsgl,
>+				       struct skcipher_sg_list, list);
>+		sg = sgl->sg;
>+
>+		while (!sg->length)
>+			sg++;
>+
>+		used = min_t(unsigned long, ctx->used,
>+			     iov_iter_count(&msg->msg_iter));
>+		used = min_t(unsigned long, used, sg->length);
>+
>+		if (i == tx_nents) {
>+			struct scatterlist *tmp;
>+			int x;
>+			/* Ran out of tx slots in async request
>+			 * need to expand */
>+			tmp = kcalloc(tx_nents * 2, sizeof(*tmp),
>+				      GFP_KERNEL);

Why is the memory increased by a factor of two?

Could you please point me to the logic that limits the allocation to 
prevent user space eating up kernel memory?

Same as above: Why do you use kcalloc here and not kmalloc (why is there 
a memset needed considering that sg_init_table is called?

>+			if (!tmp)
>+				goto free;
>+
>+			sg_init_table(tmp, tx_nents * 2);
>+			for (x = 0; x < tx_nents; x++)
>+				sg_set_page(&tmp[x], sg_page(&sreq-
>tsg[x]),
>+					    sreq->tsg[x].length,
>+					    sreq->tsg[x].offset);
>+			kfree(sreq->tsg);

It seems I miss a point here, but the code is freeing the existing sreg-
>tsg scatterlist just to replace it with the scatterlist that is twice 
as big. The kernel had to fill the scatterlist that is twice as big with 
seemingly the same data we already filled our list with (according to 
the sg_set_page call below).

If I understand that right, isn't that unnecessary work? Why not just 
allocate just one chunk of memory for a scatterlist holding the new 
data, fill it and chain it with the existing scatterlist?

>+			sreq->tsg = tmp;
>+			tx_nents *= 2;
>+		}
>+		/* Need to take over the tx sgl from ctx
>+		 * to the asynch req - these sgls will be freed later */
>+		sg_set_page(sreq->tsg + i++, sg_page(sg), sg->length,
>+			    sg->offset);
>+
>+		if (list_empty(&sreq->list)) {
>+			rsgl = &sreq->first_sgl;
>+			list_add_tail(&rsgl->list, &sreq->list);
>+		} else {
>+			rsgl = kzalloc(sizeof(*rsgl), GFP_KERNEL);

Why is kzalloc called? Shouldn't kmalloc suffice?

>+			if (!rsgl) {
>+				err = -ENOMEM;
>+				goto free;
>+			}
>+			list_add_tail(&rsgl->list, &sreq->list);
>+		}
>+
>+		used = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, used);
>+		err = used;
>+		if (used < 0)
>+			goto free;
>+		if (last_rsgl)
>+			af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
>+
>+		last_rsgl = rsgl;
>+		len += used;
>+		skcipher_pull_sgl(sk, used, 0);
>+		iov_iter_advance(&msg->msg_iter, used);
>+	}
>+
>+	ablkcipher_request_set_crypt(req, sreq->tsg, sreq-
>first_sgl.sgl.sg,
>+				     len, sreq->iv);
>+	err = ctx->enc ? crypto_ablkcipher_encrypt(req) :
>+			 crypto_ablkcipher_decrypt(req);
>+	if (err == -EINPROGRESS) {
>+		atomic_inc(&ctx->inflight);
>+		err = -EIOCBQUEUED;
>+		goto unlock;
>+	}

To avoid races, shouldn't the atomic_inc of inflight happen before the 
encryption / decryption call?

>+free:
>+	skcipher_free_async_sgls(sreq);
>+	kfree(req);
>+unlock:
>+	skcipher_wmem_wakeup(sk);
>+	release_sock(sk);
>+	return err;
>+}
>+
>+static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr
>*msg, +				 int flags)
> {
> 	struct sock *sk = sock->sk;
> 	struct alg_sock *ask = alg_sk(sk);
>@@ -484,7 +682,7 @@ free:
> 			goto unlock;
>
> 		copied += used;
>-		skcipher_pull_sgl(sk, used);
>+		skcipher_pull_sgl(sk, used, 1);
> 		iov_iter_advance(&msg->msg_iter, used);
> 	}
>
>@@ -497,6 +695,13 @@ unlock:
> 	return copied ?: err;
> }
>
>+static int skcipher_recvmsg(struct kiocb *iocb, struct socket *sock,
>+			    struct msghdr *msg, size_t ignored, int 
flags)
>+{
>+	return is_sync_kiocb(iocb) ?
>+		skcipher_recvmsg_sync(sock, msg, flags) :
>+		skcipher_recvmsg_async(iocb, sock, msg, flags);
>+}
>
> static unsigned int skcipher_poll(struct file *file, struct socket
>*sock, poll_table *wait)
>@@ -555,12 +760,25 @@ static int skcipher_setkey(void *private, const
>u8 *key, unsigned int keylen) return crypto_ablkcipher_setkey(private,
>key, keylen);
> }
>
>+static void skcipher_wait(struct sock *sk)
>+{
>+	struct alg_sock *ask = alg_sk(sk);
>+	struct skcipher_ctx *ctx = ask->private;
>+	int ctr = 0;
>+
>+	while (atomic_read(&ctx->inflight) && ctr++ < 100)
>+		msleep(100);

What is the reason for taking 100? Why not 50 or 200?
>+}
>+
> static void skcipher_sock_destruct(struct sock *sk)
> {
> 	struct alg_sock *ask = alg_sk(sk);
> 	struct skcipher_ctx *ctx = ask->private;
> 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(&ctx-
>req);
>
>+	if (atomic_read(&ctx->inflight))
>+		skcipher_wait(sk);
>+
> 	skcipher_free_sgl(sk);
> 	sock_kzfree_s(sk, ctx->iv, crypto_ablkcipher_ivsize(tfm));
> 	sock_kfree_s(sk, ctx, ctx->len);
>@@ -592,6 +810,7 @@ static int skcipher_accept_parent(void *private,
>struct sock *sk) ctx->more = 0;
> 	ctx->merge = 0;
> 	ctx->enc = 0;
>+	atomic_set(&ctx->inflight, 0);
> 	af_alg_init_completion(&ctx->completion);
>
> 	ask->private = ctx;
>
>--
>To unsubscribe from this list: send the line "unsubscribe linux-crypto"
>in the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html


Ciao
Stephan
--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tadeusz Struk March 10, 2015, 6:26 p.m. UTC | #2
Hi Stephan,
On 03/10/2015 12:41 AM, Stephan Mueller wrote:
>> +#define GET_SREQ(areq, ctx) (struct skcipher_async_req *)((char *)areq
>> >+ \ +	crypto_ablkcipher_reqsize(crypto_ablkcipher_reqtfm(&ctx->req)))
>> >+
>> >+#define GET_REQ_SIZE(ctx) \
>> >+	crypto_ablkcipher_reqsize(crypto_ablkcipher_reqtfm(&ctx->req))
>> >+
>> >+#define GET_IV_SIZE(ctx) \
>> >+	crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(&ctx->req))
> Wouldn't be an inline function better?

This is rather an ideological question ;)

>> +	INIT_LIST_HEAD(&sreq->list);
>> >+	sreq->tsg = kcalloc(tx_nents, sizeof(*sg), GFP_KERNEL);
> Why do you use kcalloc here and not kmalloc (why is there a memset 
> needed considering that sg_init_table is called?
> 

This is not only because of the list, but also because of the af_alg_sgl and scatterlists inside the skcipher_async_rsgl struct
I will need to zero them all anyway so it's easier to just use kcalloc().

>> +		if (i == tx_nents) {
>> >+			struct scatterlist *tmp;
>> >+			int x;
>> >+			/* Ran out of tx slots in async request
>> >+			 * need to expand */
>> >+			tmp = kcalloc(tx_nents * 2, sizeof(*tmp),
>> >+				      GFP_KERNEL);
> Why is the memory increased by a factor of two?

The scenario here is as follows - one has written some amount of data, say 64 bytes. We have built an src sgl in skcipher_sendpage() or skcipher_sendmsg(), and now one is calling aio_read(), which triggers the skcipher_recvmsg_async(), and wants to read let's say 128 bytes. We process the first 64 bytes, and by process I mean we put it to the async req, and we get block on:
if (!ctx->used)
	skcipher_wait_for_data(sk, flags);

Then the user writes more data, form a separate thread, and we proceed farther, but because we have only allocated just enough sgl space for the first 64 bytes of src data in the async request, we need to allocate more now. We don't know how much data has been requested in the aio_read() until we reach the end of the iov_iter, so we just want to make it big enough.

> 
> Could you please point me to the logic that limits the allocation to 
> prevent user space eating up kernel memory?

To submit an aio read request you need to provide a valid ptr & size or a valid iov vector. This is validated in fs/aio.c
This becomes the encryption destination buffer.
The src buffer for encryption is built in sendmsg()/sendpage() functions.
These are processed and freed every time the skcipher_recvmsg_async() is called or in the skcipher_async_cb().

> 
> Same as above: Why do you use kcalloc here and not kmalloc (why is there 
> a memset needed considering that sg_init_table is called?
> 

Same as above i.e. because of the af_alg_sgl and scatterlists inside the skcipher_async_rsgl struct.

>> >+			if (!tmp)
>> >+				goto free;
>> >+
>> >+			sg_init_table(tmp, tx_nents * 2);
>> >+			for (x = 0; x < tx_nents; x++)
>> >+				sg_set_page(&tmp[x], sg_page(&sreq-
>> >tsg[x]),
>> >+					    sreq->tsg[x].length,
>> >+					    sreq->tsg[x].offset);
>> >+			kfree(sreq->tsg);
> It seems I miss a point here, but the code is freeing the existing sreg-
>> >tsg scatterlist just to replace it with the scatterlist that is twice 
> as big. The kernel had to fill the scatterlist that is twice as big with 
> seemingly the same data we already filled our list with (according to 
> the sg_set_page call below).
> 
> If I understand that right, isn't that unnecessary work? Why not just 
> allocate just one chunk of memory for a scatterlist holding the new 
> data, fill it and chain it with the existing scatterlist?
> 

That's a valid point, but as I said, the situation is that someone supplied X bytes of plain text for encryption, and tried to read/encrypt Y bytes where Y > X.
At this point we don't know how big the Y is so, we can loop through the whole iov_iter allocating new sgls and linking them together,
or just allocate "big enough" buffer once and try to proceed until this new buffer runs out.
Keep in mind that this situation only occurs when one tries to read more than has been written, followed by a subsequent write that supplies more data.
This whole situation is far from optimal.

>> +		if (list_empty(&sreq->list)) {
>> >+			rsgl = &sreq->first_sgl;
>> >+			list_add_tail(&rsgl->list, &sreq->list);
>> >+		} else {
>> >+			rsgl = kzalloc(sizeof(*rsgl), GFP_KERNEL);
> Why is kzalloc called? Shouldn't kmalloc suffice?
> 

You are correct, in this case it will be better to use kmalloc. af_alg_make_sg() does all the initialization.

>> +	err = ctx->enc ? crypto_ablkcipher_encrypt(req) :
>> >+			 crypto_ablkcipher_decrypt(req);
>> >+	if (err == -EINPROGRESS) {
>> >+		atomic_inc(&ctx->inflight);
>> >+		err = -EIOCBQUEUED;
>> >+		goto unlock;
>> >+	}
> To avoid races, shouldn't the atomic_inc of inflight happen before the 
> encryption / decryption call?
> 

We only increase it when the request has been enqueued to be processed asynchronously, and decrease it in the asynch callback, which is safe.
Incrementing it before will require an else block here that will just decrement it for any other case and that is an overhead.
Also using atomic type guarantees that there will not be any race conditions.

>> +	while (atomic_read(&ctx->inflight) && ctr++ < 100)
>> >+		msleep(100);
> What is the reason for taking 100? Why not 50 or 200?

This is an arbitrary value. We want to wait for any outstanding request before we close the socket.
We don't want to wait too long, but we also want to make sure that we do get all responses back.
At this point there should be no outstanding requests really, but if there are any we will wait between 100ms and 10 sec, which should cater for all cases.

Thanks for taking the time to review my patch and for all your feedback.
I'll send a v3 that uses kmalloc instead of kzalloc to allocate the rsgl.
Regards,
Tadeusz



--
To unsubscribe from this list: send the line "unsubscribe linux-crypto" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index 0c8a1e5..790ed35 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -39,6 +39,7 @@  struct skcipher_ctx {
 
 	struct af_alg_completion completion;
 
+	atomic_t inflight;
 	unsigned used;
 
 	unsigned int len;
@@ -49,9 +50,65 @@  struct skcipher_ctx {
 	struct ablkcipher_request req;
 };
 
+struct skcipher_async_rsgl {
+	struct af_alg_sgl sgl;
+	struct list_head list;
+};
+
+struct skcipher_async_req {
+	struct kiocb *iocb;
+	struct skcipher_async_rsgl first_sgl;
+	struct list_head list;
+	struct scatterlist *tsg;
+	char iv[];
+};
+
+#define GET_SREQ(areq, ctx) (struct skcipher_async_req *)((char *)areq + \
+	crypto_ablkcipher_reqsize(crypto_ablkcipher_reqtfm(&ctx->req)))
+
+#define GET_REQ_SIZE(ctx) \
+	crypto_ablkcipher_reqsize(crypto_ablkcipher_reqtfm(&ctx->req))
+
+#define GET_IV_SIZE(ctx) \
+	crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(&ctx->req))
+
 #define MAX_SGL_ENTS ((4096 - sizeof(struct skcipher_sg_list)) / \
 		      sizeof(struct scatterlist) - 1)
 
+static void skcipher_free_async_sgls(struct skcipher_async_req *sreq)
+{
+	struct skcipher_async_rsgl *rsgl, *tmp;
+	struct scatterlist *sgl;
+	struct scatterlist *sg;
+	int i, n;
+
+	list_for_each_entry_safe(rsgl, tmp, &sreq->list, list) {
+		af_alg_free_sg(&rsgl->sgl);
+		if (rsgl != &sreq->first_sgl)
+			kfree(rsgl);
+	}
+	sgl = sreq->tsg;
+	n = sg_nents(sgl);
+	for_each_sg(sgl, sg, n, i)
+		put_page(sg_page(sg));
+
+	kfree(sreq->tsg);
+}
+
+static void skcipher_async_cb(struct crypto_async_request *req, int err)
+{
+	struct sock *sk = req->data;
+	struct alg_sock *ask = alg_sk(sk);
+	struct skcipher_ctx *ctx = ask->private;
+	struct skcipher_async_req *sreq = GET_SREQ(req, ctx);
+	struct kiocb *iocb = sreq->iocb;
+
+	atomic_dec(&ctx->inflight);
+	skcipher_free_async_sgls(sreq);
+	kfree(req);
+	aio_complete(iocb, err, err);
+}
+
 static inline int skcipher_sndbuf(struct sock *sk)
 {
 	struct alg_sock *ask = alg_sk(sk);
@@ -96,7 +153,7 @@  static int skcipher_alloc_sgl(struct sock *sk)
 	return 0;
 }
 
-static void skcipher_pull_sgl(struct sock *sk, int used)
+static void skcipher_pull_sgl(struct sock *sk, int used, int put)
 {
 	struct alg_sock *ask = alg_sk(sk);
 	struct skcipher_ctx *ctx = ask->private;
@@ -123,8 +180,8 @@  static void skcipher_pull_sgl(struct sock *sk, int used)
 
 			if (sg[i].length)
 				return;
-
-			put_page(sg_page(sg + i));
+			if (put)
+				put_page(sg_page(sg + i));
 			sg_assign_page(sg + i, NULL);
 		}
 
@@ -143,7 +200,7 @@  static void skcipher_free_sgl(struct sock *sk)
 	struct alg_sock *ask = alg_sk(sk);
 	struct skcipher_ctx *ctx = ask->private;
 
-	skcipher_pull_sgl(sk, ctx->used);
+	skcipher_pull_sgl(sk, ctx->used, 1);
 }
 
 static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags)
@@ -424,8 +481,149 @@  unlock:
 	return err ?: size;
 }
 
-static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock,
-			    struct msghdr *msg, size_t ignored, int flags)
+static int skcipher_all_sg_nents(struct skcipher_ctx *ctx)
+{
+	struct skcipher_sg_list *sgl;
+	struct scatterlist *sg;
+	int nents = 0;
+
+	list_for_each_entry(sgl, &ctx->tsgl, list) {
+		sg = sgl->sg;
+
+		while (!sg->length)
+			sg++;
+
+		nents += sg_nents(sg);
+	}
+	return nents;
+}
+
+static int skcipher_recvmsg_async(struct kiocb *iocb, struct socket *sock,
+				  struct msghdr *msg, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct alg_sock *ask = alg_sk(sk);
+	struct skcipher_ctx *ctx = ask->private;
+	struct skcipher_sg_list *sgl;
+	struct scatterlist *sg;
+	struct skcipher_async_req *sreq;
+	struct ablkcipher_request *req;
+	struct skcipher_async_rsgl *last_rsgl = NULL;
+	unsigned int len = 0, tx_nents = skcipher_all_sg_nents(ctx);
+	unsigned int reqlen = sizeof(struct skcipher_async_req) +
+				GET_REQ_SIZE(ctx) + GET_IV_SIZE(ctx);
+	int i = 0;
+	int err = -ENOMEM;
+
+	lock_sock(sk);
+	req = kmalloc(reqlen, GFP_KERNEL);
+	if (unlikely(!req))
+		goto unlock;
+
+	sreq = GET_SREQ(req, ctx);
+	sreq->iocb = iocb;
+	memset(&sreq->first_sgl, '\0', sizeof(struct skcipher_async_rsgl));
+	INIT_LIST_HEAD(&sreq->list);
+	sreq->tsg = kcalloc(tx_nents, sizeof(*sg), GFP_KERNEL);
+	if (unlikely(!sreq->tsg)) {
+		kfree(req);
+		goto unlock;
+	}
+	sg_init_table(sreq->tsg, tx_nents);
+	memcpy(sreq->iv, ctx->iv, GET_IV_SIZE(ctx));
+	ablkcipher_request_set_tfm(req, crypto_ablkcipher_reqtfm(&ctx->req));
+	ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+					skcipher_async_cb, sk);
+
+	while (iov_iter_count(&msg->msg_iter)) {
+		struct skcipher_async_rsgl *rsgl;
+		unsigned long used;
+
+		if (!ctx->used) {
+			err = skcipher_wait_for_data(sk, flags);
+			if (err)
+				goto free;
+		}
+		sgl = list_first_entry(&ctx->tsgl,
+				       struct skcipher_sg_list, list);
+		sg = sgl->sg;
+
+		while (!sg->length)
+			sg++;
+
+		used = min_t(unsigned long, ctx->used,
+			     iov_iter_count(&msg->msg_iter));
+		used = min_t(unsigned long, used, sg->length);
+
+		if (i == tx_nents) {
+			struct scatterlist *tmp;
+			int x;
+			/* Ran out of tx slots in async request
+			 * need to expand */
+			tmp = kcalloc(tx_nents * 2, sizeof(*tmp),
+				      GFP_KERNEL);
+			if (!tmp)
+				goto free;
+
+			sg_init_table(tmp, tx_nents * 2);
+			for (x = 0; x < tx_nents; x++)
+				sg_set_page(&tmp[x], sg_page(&sreq->tsg[x]),
+					    sreq->tsg[x].length,
+					    sreq->tsg[x].offset);
+			kfree(sreq->tsg);
+			sreq->tsg = tmp;
+			tx_nents *= 2;
+		}
+		/* Need to take over the tx sgl from ctx
+		 * to the asynch req - these sgls will be freed later */
+		sg_set_page(sreq->tsg + i++, sg_page(sg), sg->length,
+			    sg->offset);
+
+		if (list_empty(&sreq->list)) {
+			rsgl = &sreq->first_sgl;
+			list_add_tail(&rsgl->list, &sreq->list);
+		} else {
+			rsgl = kzalloc(sizeof(*rsgl), GFP_KERNEL);
+			if (!rsgl) {
+				err = -ENOMEM;
+				goto free;
+			}
+			list_add_tail(&rsgl->list, &sreq->list);
+		}
+
+		used = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, used);
+		err = used;
+		if (used < 0)
+			goto free;
+		if (last_rsgl)
+			af_alg_link_sg(&last_rsgl->sgl, &rsgl->sgl);
+
+		last_rsgl = rsgl;
+		len += used;
+		skcipher_pull_sgl(sk, used, 0);
+		iov_iter_advance(&msg->msg_iter, used);
+	}
+
+	ablkcipher_request_set_crypt(req, sreq->tsg, sreq->first_sgl.sgl.sg,
+				     len, sreq->iv);
+	err = ctx->enc ? crypto_ablkcipher_encrypt(req) :
+			 crypto_ablkcipher_decrypt(req);
+	if (err == -EINPROGRESS) {
+		atomic_inc(&ctx->inflight);
+		err = -EIOCBQUEUED;
+		goto unlock;
+	}
+free:
+	skcipher_free_async_sgls(sreq);
+	kfree(req);
+unlock:
+	skcipher_wmem_wakeup(sk);
+	release_sock(sk);
+	return err;
+}
+
+static int skcipher_recvmsg_sync(struct socket *sock, struct msghdr *msg,
+				 int flags)
 {
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
@@ -484,7 +682,7 @@  free:
 			goto unlock;
 
 		copied += used;
-		skcipher_pull_sgl(sk, used);
+		skcipher_pull_sgl(sk, used, 1);
 		iov_iter_advance(&msg->msg_iter, used);
 	}
 
@@ -497,6 +695,13 @@  unlock:
 	return copied ?: err;
 }
 
+static int skcipher_recvmsg(struct kiocb *iocb, struct socket *sock,
+			    struct msghdr *msg, size_t ignored, int flags)
+{
+	return is_sync_kiocb(iocb) ?
+		skcipher_recvmsg_sync(sock, msg, flags) :
+		skcipher_recvmsg_async(iocb, sock, msg, flags);
+}
 
 static unsigned int skcipher_poll(struct file *file, struct socket *sock,
 				  poll_table *wait)
@@ -555,12 +760,25 @@  static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen)
 	return crypto_ablkcipher_setkey(private, key, keylen);
 }
 
+static void skcipher_wait(struct sock *sk)
+{
+	struct alg_sock *ask = alg_sk(sk);
+	struct skcipher_ctx *ctx = ask->private;
+	int ctr = 0;
+
+	while (atomic_read(&ctx->inflight) && ctr++ < 100)
+		msleep(100);
+}
+
 static void skcipher_sock_destruct(struct sock *sk)
 {
 	struct alg_sock *ask = alg_sk(sk);
 	struct skcipher_ctx *ctx = ask->private;
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(&ctx->req);
 
+	if (atomic_read(&ctx->inflight))
+		skcipher_wait(sk);
+
 	skcipher_free_sgl(sk);
 	sock_kzfree_s(sk, ctx->iv, crypto_ablkcipher_ivsize(tfm));
 	sock_kfree_s(sk, ctx, ctx->len);
@@ -592,6 +810,7 @@  static int skcipher_accept_parent(void *private, struct sock *sk)
 	ctx->more = 0;
 	ctx->merge = 0;
 	ctx->enc = 0;
+	atomic_set(&ctx->inflight, 0);
 	af_alg_init_completion(&ctx->completion);
 
 	ask->private = ctx;