diff mbox series

[v2,09/10] crypto: caam - add crypto_engine support for RSA algorithms

Message ID 1578013373-1956-10-git-send-email-iuliana.prodan@nxp.com (mailing list archive)
State Changes Requested
Delegated to: Herbert Xu
Headers show
Series crypto: caam - backlogging support | expand

Commit Message

Iuliana Prodan Jan. 3, 2020, 1:02 a.m. UTC
Add crypto_engine support for RSA algorithms, to make use of
the engine queue.
The requests, with backlog flag, will be listed into crypto-engine
queue and processed by CAAM when free. In case the queue is empty,
the request is directly sent to CAAM.

Signed-off-by: Iuliana Prodan <iuliana.prodan@nxp.com>
---
 drivers/crypto/caam/caampkc.c | 144 ++++++++++++++++++++++++++++++++----------
 drivers/crypto/caam/caampkc.h |   8 +++
 2 files changed, 120 insertions(+), 32 deletions(-)

Comments

Horia Geanta Jan. 10, 2020, 8:46 a.m. UTC | #1
On 1/3/2020 3:03 AM, Iuliana Prodan wrote:
> +static int akcipher_enqueue_req(struct device *jrdev, u32 *desc,
> +				void (*cbk)(struct device *jrdev, u32 *desc,
> +					    u32 err, void *context),
> +				struct akcipher_request *req,
> +				struct rsa_edesc *edesc)
> +{
> +	struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
> +	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
> +	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
> +	struct caam_rsa_key *key = &ctx->key;
> +	int ret;
> +
> +	if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
> +		return crypto_transfer_akcipher_request_to_engine(jrpriv->engine,
> +								  req);
Resource leak in case transfer fails.

> +	else
> +		ret = caam_jr_enqueue(jrdev, desc, cbk, &edesc->jrentry);
What's the problem with transferring all requests to crypto engine?

> +
> +	if (ret != -EINPROGRESS) {
> +		switch (key->priv_form) {
> +		case FORM1:
> +			rsa_priv_f1_unmap(jrdev, edesc, req);
> +			break;
> +		case FORM2:
> +			rsa_priv_f2_unmap(jrdev, edesc, req);
> +			break;
> +		case FORM3:
> +			rsa_priv_f3_unmap(jrdev, edesc, req);
> +			break;
> +		default:
> +			rsa_pub_unmap(jrdev, edesc, req);
> +		}
> +		rsa_io_unmap(jrdev, edesc, req);
> +		kfree(edesc);
> +	}
> +
> +	return ret;
> +}
> +
>  static int caam_rsa_enc(struct akcipher_request *req)
>  {
>  	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
>  	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
> +	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
>  	struct caam_rsa_key *key = &ctx->key;
>  	struct device *jrdev = ctx->dev;
>  	struct rsa_edesc *edesc;
> @@ -637,13 +726,9 @@ static int caam_rsa_enc(struct akcipher_request *req)
>  	/* Initialize Job Descriptor */
>  	init_rsa_pub_desc(edesc->hw_desc, &edesc->pdb.pub);
>  
> -	ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_pub_done,
> -			      &edesc->jrentry);
> -	if (ret == -EINPROGRESS)
> -		return ret;
> -
> -	rsa_pub_unmap(jrdev, edesc, req);
> -
> +	req_ctx->akcipher_op_done = rsa_pub_done;
This initialization could be moved into akcipher_enqueue_req().

> +	return akcipher_enqueue_req(jrdev, edesc->hw_desc, rsa_pub_done,
> +				    req, edesc);
edesc, edesc->hw_desc parameters not needed - can be deduced internally
via req -> req_ctx -> edesc- > hw_desc.

Horia
Iuliana Prodan Jan. 13, 2020, 9:48 a.m. UTC | #2
On 1/10/2020 10:46 AM, Horia Geanta wrote:
> On 1/3/2020 3:03 AM, Iuliana Prodan wrote:
>> +static int akcipher_enqueue_req(struct device *jrdev, u32 *desc,
>> +				void (*cbk)(struct device *jrdev, u32 *desc,
>> +					    u32 err, void *context),
>> +				struct akcipher_request *req,
>> +				struct rsa_edesc *edesc)
>> +{
>> +	struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
>> +	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
>> +	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
>> +	struct caam_rsa_key *key = &ctx->key;
>> +	int ret;
>> +
>> +	if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
>> +		return crypto_transfer_akcipher_request_to_engine(jrpriv->engine,
>> +								  req);
> Resource leak in case transfer fails.
> 
>> +	else
>> +		ret = caam_jr_enqueue(jrdev, desc, cbk, &edesc->jrentry);
> What's the problem with transferring all requests to crypto engine?
> 
I'll address all your comments in v3.

Regarding the transfer request to crypto-engine: if sending all requests 
to crypto-engine, multibuffer tests, for non-backlogging requests fail 
after only 10 requests, since crypto-engine queue has 10 entries.
Here's an example:
root@imx6qpdlsolox:~# insmod tcrypt.ko mode=422 num_mb=1024
insmod: ERROR: could not insert module tcrypt.ko: Resource temporarily 
unavailable
root@imx6qpdlsolox:~#
root@imx6qpdlsolox:~# dmesg
...
testing speed of multibuffer sha1 (sha1-caam)
tcrypt: test  0 (   16 byte blocks,   16 bytes per update,   1 updates):
tcrypt: concurrent request 11 error -28
tcrypt: concurrent request 13 error -28
tcrypt: concurrent request 14 error -28
tcrypt: concurrent request 16 error -28
tcrypt: concurrent request 18 error -28
tcrypt: concurrent request 20 error -28
tcrypt: concurrent request 22 error -28
tcrypt: concurrent request 24 error -28
tcrypt: concurrent request 26 error -28
tcrypt: concurrent request 28 error -28
tcrypt: concurrent request 30 error -28
tcrypt: concurrent request 32 error -28
tcrypt: concurrent request 34 error -28

tcrypt: concurrent request 1020 error -28
tcrypt: concurrent request 1022 error -28
tcrypt: At least one hashing failed ret=-28
root@imx6qpdlsolox:~#

If sending just the backlog request to crypto-engine, and non-blocking 
directly to CAAM, these tests have a better chance to pass since JR has 
1024 entries.

Will need to work/update crypto-engine: set queue length when initialize 
crypto-engine, and remove serialization of requests in crypto-engine. 
But, until then, I would like to have a backlogging solution in CAAM driver.

Iulia

>> +
>> +	if (ret != -EINPROGRESS) {
>> +		switch (key->priv_form) {
>> +		case FORM1:
>> +			rsa_priv_f1_unmap(jrdev, edesc, req);
>> +			break;
>> +		case FORM2:
>> +			rsa_priv_f2_unmap(jrdev, edesc, req);
>> +			break;
>> +		case FORM3:
>> +			rsa_priv_f3_unmap(jrdev, edesc, req);
>> +			break;
>> +		default:
>> +			rsa_pub_unmap(jrdev, edesc, req);
>> +		}
>> +		rsa_io_unmap(jrdev, edesc, req);
>> +		kfree(edesc);
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>>   static int caam_rsa_enc(struct akcipher_request *req)
>>   {
>>   	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
>>   	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
>> +	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
>>   	struct caam_rsa_key *key = &ctx->key;
>>   	struct device *jrdev = ctx->dev;
>>   	struct rsa_edesc *edesc;
>> @@ -637,13 +726,9 @@ static int caam_rsa_enc(struct akcipher_request *req)
>>   	/* Initialize Job Descriptor */
>>   	init_rsa_pub_desc(edesc->hw_desc, &edesc->pdb.pub);
>>   
>> -	ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_pub_done,
>> -			      &edesc->jrentry);
>> -	if (ret == -EINPROGRESS)
>> -		return ret;
>> -
>> -	rsa_pub_unmap(jrdev, edesc, req);
>> -
>> +	req_ctx->akcipher_op_done = rsa_pub_done;
> This initialization could be moved into akcipher_enqueue_req().
> 
>> +	return akcipher_enqueue_req(jrdev, edesc->hw_desc, rsa_pub_done,
>> +				    req, edesc);
> edesc, edesc->hw_desc parameters not needed - can be deduced internally
> via req -> req_ctx -> edesc- > hw_desc.
> 
> Horia
>
Horia Geanta Jan. 13, 2020, 12:21 p.m. UTC | #3
On 1/13/2020 11:48 AM, Iuliana Prodan wrote:
> On 1/10/2020 10:46 AM, Horia Geanta wrote:
>> On 1/3/2020 3:03 AM, Iuliana Prodan wrote:
>>> +static int akcipher_enqueue_req(struct device *jrdev, u32 *desc,
>>> +				void (*cbk)(struct device *jrdev, u32 *desc,
>>> +					    u32 err, void *context),
>>> +				struct akcipher_request *req,
>>> +				struct rsa_edesc *edesc)
>>> +{
>>> +	struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
>>> +	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
>>> +	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
>>> +	struct caam_rsa_key *key = &ctx->key;
>>> +	int ret;
>>> +
>>> +	if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
>>> +		return crypto_transfer_akcipher_request_to_engine(jrpriv->engine,
>>> +								  req);
>> Resource leak in case transfer fails.
>>
>>> +	else
>>> +		ret = caam_jr_enqueue(jrdev, desc, cbk, &edesc->jrentry);
>> What's the problem with transferring all requests to crypto engine?
>>
> I'll address all your comments in v3.
> 
> Regarding the transfer request to crypto-engine: if sending all requests 
> to crypto-engine, multibuffer tests, for non-backlogging requests fail 
> after only 10 requests, since crypto-engine queue has 10 entries.
> Here's an example:
> root@imx6qpdlsolox:~# insmod tcrypt.ko mode=422 num_mb=1024
> insmod: ERROR: could not insert module tcrypt.ko: Resource temporarily 
> unavailable
> root@imx6qpdlsolox:~#
> root@imx6qpdlsolox:~# dmesg
> ...
> testing speed of multibuffer sha1 (sha1-caam)
> tcrypt: test  0 (   16 byte blocks,   16 bytes per update,   1 updates):
> tcrypt: concurrent request 11 error -28
> tcrypt: concurrent request 13 error -28
> tcrypt: concurrent request 14 error -28
> tcrypt: concurrent request 16 error -28
> tcrypt: concurrent request 18 error -28
> tcrypt: concurrent request 20 error -28
> tcrypt: concurrent request 22 error -28
> tcrypt: concurrent request 24 error -28
> tcrypt: concurrent request 26 error -28
> tcrypt: concurrent request 28 error -28
> tcrypt: concurrent request 30 error -28
> tcrypt: concurrent request 32 error -28
> tcrypt: concurrent request 34 error -28
> 
> tcrypt: concurrent request 1020 error -28
> tcrypt: concurrent request 1022 error -28
> tcrypt: At least one hashing failed ret=-28
> root@imx6qpdlsolox:~#
> 
> If sending just the backlog request to crypto-engine, and non-blocking 
> directly to CAAM, these tests have a better chance to pass since JR has 
> 1024 entries.
> 
> Will need to work/update crypto-engine: set queue length when initialize 
> crypto-engine, and remove serialization of requests in crypto-engine. 
> But, until then, I would like to have a backlogging solution in CAAM driver.
> 
My point is you need to add details about the current limitations
in the commit message (even in the source code, it wouldn't hurt),
justifying the choice of not using crypto engine for all requests.

Horia
Iuliana Prodan Jan. 13, 2020, 1:06 p.m. UTC | #4
On 1/13/2020 2:21 PM, Horia Geanta wrote:
> On 1/13/2020 11:48 AM, Iuliana Prodan wrote:
>> On 1/10/2020 10:46 AM, Horia Geanta wrote:
>>> On 1/3/2020 3:03 AM, Iuliana Prodan wrote:
>>>> +static int akcipher_enqueue_req(struct device *jrdev, u32 *desc,
>>>> +				void (*cbk)(struct device *jrdev, u32 *desc,
>>>> +					    u32 err, void *context),
>>>> +				struct akcipher_request *req,
>>>> +				struct rsa_edesc *edesc)
>>>> +{
>>>> +	struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
>>>> +	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
>>>> +	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
>>>> +	struct caam_rsa_key *key = &ctx->key;
>>>> +	int ret;
>>>> +
>>>> +	if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
>>>> +		return crypto_transfer_akcipher_request_to_engine(jrpriv->engine,
>>>> +								  req);
>>> Resource leak in case transfer fails.
>>>
>>>> +	else
>>>> +		ret = caam_jr_enqueue(jrdev, desc, cbk, &edesc->jrentry);
>>> What's the problem with transferring all requests to crypto engine?
>>>
>> I'll address all your comments in v3.
>>
>> Regarding the transfer request to crypto-engine: if sending all requests
>> to crypto-engine, multibuffer tests, for non-backlogging requests fail
>> after only 10 requests, since crypto-engine queue has 10 entries.
>> Here's an example:
>> root@imx6qpdlsolox:~# insmod tcrypt.ko mode=422 num_mb=1024
>> insmod: ERROR: could not insert module tcrypt.ko: Resource temporarily
>> unavailable
>> root@imx6qpdlsolox:~#
>> root@imx6qpdlsolox:~# dmesg
>> ...
>> testing speed of multibuffer sha1 (sha1-caam)
>> tcrypt: test  0 (   16 byte blocks,   16 bytes per update,   1 updates):
>> tcrypt: concurrent request 11 error -28
>> tcrypt: concurrent request 13 error -28
>> tcrypt: concurrent request 14 error -28
>> tcrypt: concurrent request 16 error -28
>> tcrypt: concurrent request 18 error -28
>> tcrypt: concurrent request 20 error -28
>> tcrypt: concurrent request 22 error -28
>> tcrypt: concurrent request 24 error -28
>> tcrypt: concurrent request 26 error -28
>> tcrypt: concurrent request 28 error -28
>> tcrypt: concurrent request 30 error -28
>> tcrypt: concurrent request 32 error -28
>> tcrypt: concurrent request 34 error -28
>>
>> tcrypt: concurrent request 1020 error -28
>> tcrypt: concurrent request 1022 error -28
>> tcrypt: At least one hashing failed ret=-28
>> root@imx6qpdlsolox:~#
>>
>> If sending just the backlog request to crypto-engine, and non-blocking
>> directly to CAAM, these tests have a better chance to pass since JR has
>> 1024 entries.
>>
>> Will need to work/update crypto-engine: set queue length when initialize
>> crypto-engine, and remove serialization of requests in crypto-engine.
>> But, until then, I would like to have a backlogging solution in CAAM driver.
>>
> My point is you need to add details about the current limitations
> in the commit message (even in the source code, it wouldn't hurt),
> justifying the choice of not using crypto engine for all requests.
> 
Yes, I understand your point and, as I mentioned above, I'll address all 
comments, from all patches, in v3:
- update commit messages;
- handle resource leak in case of crypto-engine transfer;
- remove unnecessary variables, in some structs;
- will remove patch #6.

Iulia
Herbert Xu Jan. 14, 2020, 12:14 a.m. UTC | #5
On Mon, Jan 13, 2020 at 09:48:11AM +0000, Iuliana Prodan wrote:
>
> Regarding the transfer request to crypto-engine: if sending all requests 
> to crypto-engine, multibuffer tests, for non-backlogging requests fail 
> after only 10 requests, since crypto-engine queue has 10 entries.

That isn't right.  The crypto engine should never refuse to accept
a request unless the hardware queue is really full.  Perhaps the
crypto engine code needs to be fixed?

Cheers,
Iuliana Prodan Jan. 14, 2020, 10:40 a.m. UTC | #6
On 1/14/2020 2:14 AM, Herbert Xu wrote:
> On Mon, Jan 13, 2020 at 09:48:11AM +0000, Iuliana Prodan wrote:
>>
>> Regarding the transfer request to crypto-engine: if sending all requests
>> to crypto-engine, multibuffer tests, for non-backlogging requests fail
>> after only 10 requests, since crypto-engine queue has 10 entries.
> 
> That isn't right.  The crypto engine should never refuse to accept
> a request 
Crypto-engine accepts all request that have the backlog flag, the 
non-backlog are accepted till the configured limit (of 10).

> unless the hardware queue is really full.  
Crypto-engine doesn't check the status of hardware queue.
The non-backlog requests are dropped after 10 entries.

> Perhaps the
> crypto engine code needs to be fixed?
To me, crypto-engine seems to be made for backlogged request, that's why 
I'm sending the non-backlog directly to CAAM. The implicit serialization 
of request in crypto-engine is the bottleneck.

But, as I said before, I want to update crypto-engine to set queue 
length when initialize crypto-engine, and remove serialization of 
requests in crypto-engine by adding knowledge about the underlying hw 
accelerator (number of request that can be processed in parallel).
I'll send a RFC with my proposal for crypto-engine enhancements.

But, until then, I would like to have a backlogging solution in CAAM driver.

Thanks,
Iulia
Corentin Labbe Jan. 14, 2020, 1:53 p.m. UTC | #7
On Tue, Jan 14, 2020 at 10:40:53AM +0000, Iuliana Prodan wrote:
> On 1/14/2020 2:14 AM, Herbert Xu wrote:
> > On Mon, Jan 13, 2020 at 09:48:11AM +0000, Iuliana Prodan wrote:
> >>
> >> Regarding the transfer request to crypto-engine: if sending all requests
> >> to crypto-engine, multibuffer tests, for non-backlogging requests fail
> >> after only 10 requests, since crypto-engine queue has 10 entries.
> > 
> > That isn't right.  The crypto engine should never refuse to accept
> > a request 
> Crypto-engine accepts all request that have the backlog flag, the 
> non-backlog are accepted till the configured limit (of 10).
> 
> > unless the hardware queue is really full.  
> Crypto-engine doesn't check the status of hardware queue.
> The non-backlog requests are dropped after 10 entries.
> 
> > Perhaps the
> > crypto engine code needs to be fixed?
> To me, crypto-engine seems to be made for backlogged request, that's why 
> I'm sending the non-backlog directly to CAAM. The implicit serialization 
> of request in crypto-engine is the bottleneck.
> 
> But, as I said before, I want to update crypto-engine to set queue 
> length when initialize crypto-engine, and remove serialization of 
> requests in crypto-engine by adding knowledge about the underlying hw 
> accelerator (number of request that can be processed in parallel).
> I'll send a RFC with my proposal for crypto-engine enhancements.
> 
> But, until then, I would like to have a backlogging solution in CAAM driver.
> 

Hello

I have already something for queue length and processing in parallel.
I will send it soon.

Regards
diff mbox series

Patch

diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index 858cc95..82d5b55 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -118,19 +118,28 @@  static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context)
 {
 	struct caam_akcipher_request_entry *jrentry = context;
 	struct akcipher_request *req = jrentry->base;
+	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
+	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
 	struct rsa_edesc *edesc;
 	int ecode = 0;
 
 	if (err)
 		ecode = caam_jr_strstatus(dev, err);
 
-	edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
+	edesc = req_ctx->edesc;
 
 	rsa_pub_unmap(dev, edesc, req);
 	rsa_io_unmap(dev, edesc, req);
 	kfree(edesc);
 
-	akcipher_request_complete(req, ecode);
+	/*
+	 * If no backlog flag, the completion of the request is done
+	 * by CAAM, not crypto engine.
+	 */
+	if (!jrentry->bklog)
+		akcipher_request_complete(req, ecode);
+	else
+		crypto_finalize_akcipher_request(jrp->engine, req, ecode);
 }
 
 static void rsa_priv_f_done(struct device *dev, u32 *desc, u32 err,
@@ -139,15 +148,17 @@  static void rsa_priv_f_done(struct device *dev, u32 *desc, u32 err,
 	struct caam_akcipher_request_entry *jrentry = context;
 	struct akcipher_request *req = jrentry->base;
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
 	struct caam_rsa_key *key = &ctx->key;
+	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
 	struct rsa_edesc *edesc;
 	int ecode = 0;
 
 	if (err)
 		ecode = caam_jr_strstatus(dev, err);
 
-	edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
+	edesc = req_ctx->edesc;
 
 	switch (key->priv_form) {
 	case FORM1:
@@ -163,7 +174,14 @@  static void rsa_priv_f_done(struct device *dev, u32 *desc, u32 err,
 	rsa_io_unmap(dev, edesc, req);
 	kfree(edesc);
 
-	akcipher_request_complete(req, ecode);
+	/*
+	 * If no backlog flag, the completion of the request is done
+	 * by CAAM, not crypto engine.
+	 */
+	if (!jrentry->bklog)
+		akcipher_request_complete(req, ecode);
+	else
+		crypto_finalize_akcipher_request(jrp->engine, req, ecode);
 }
 
 /**
@@ -312,6 +330,7 @@  static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
 	edesc->dst_nents = dst_nents;
 
 	edesc->jrentry.base = req;
+	req_ctx->edesc = edesc;
 
 	if (!sec4_sg_bytes)
 		return edesc;
@@ -343,6 +362,36 @@  static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
 	return ERR_PTR(-ENOMEM);
 }
 
+static int akcipher_do_one_req(struct crypto_engine *engine, void *areq)
+{
+	struct akcipher_request *req = container_of(areq,
+						    struct akcipher_request,
+						    base);
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_akcipher_request_entry *jrentry;
+	struct device *jrdev = ctx->dev;
+	u32 *desc = req_ctx->edesc->hw_desc;
+	int ret;
+
+	jrentry = &req_ctx->edesc->jrentry;
+	jrentry->bklog = true;
+
+	ret = caam_jr_enqueue(jrdev, desc, req_ctx->akcipher_op_done,
+			      jrentry);
+
+	if (ret != -EINPROGRESS) {
+		rsa_pub_unmap(jrdev, req_ctx->edesc, req);
+		rsa_io_unmap(jrdev, req_ctx->edesc, req);
+		kfree(req_ctx->edesc);
+	} else {
+		ret = 0;
+	}
+
+	return ret;
+}
+
 static int set_rsa_pub_pdb(struct akcipher_request *req,
 			   struct rsa_edesc *edesc)
 {
@@ -606,10 +655,50 @@  static int set_rsa_priv_f3_pdb(struct akcipher_request *req,
 	return -ENOMEM;
 }
 
+static int akcipher_enqueue_req(struct device *jrdev, u32 *desc,
+				void (*cbk)(struct device *jrdev, u32 *desc,
+					    u32 err, void *context),
+				struct akcipher_request *req,
+				struct rsa_edesc *edesc)
+{
+	struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_key *key = &ctx->key;
+	int ret;
+
+	if (req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
+		return crypto_transfer_akcipher_request_to_engine(jrpriv->engine,
+								  req);
+	else
+		ret = caam_jr_enqueue(jrdev, desc, cbk, &edesc->jrentry);
+
+	if (ret != -EINPROGRESS) {
+		switch (key->priv_form) {
+		case FORM1:
+			rsa_priv_f1_unmap(jrdev, edesc, req);
+			break;
+		case FORM2:
+			rsa_priv_f2_unmap(jrdev, edesc, req);
+			break;
+		case FORM3:
+			rsa_priv_f3_unmap(jrdev, edesc, req);
+			break;
+		default:
+			rsa_pub_unmap(jrdev, edesc, req);
+		}
+		rsa_io_unmap(jrdev, edesc, req);
+		kfree(edesc);
+	}
+
+	return ret;
+}
+
 static int caam_rsa_enc(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
 	struct caam_rsa_key *key = &ctx->key;
 	struct device *jrdev = ctx->dev;
 	struct rsa_edesc *edesc;
@@ -637,13 +726,9 @@  static int caam_rsa_enc(struct akcipher_request *req)
 	/* Initialize Job Descriptor */
 	init_rsa_pub_desc(edesc->hw_desc, &edesc->pdb.pub);
 
-	ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_pub_done,
-			      &edesc->jrentry);
-	if (ret == -EINPROGRESS)
-		return ret;
-
-	rsa_pub_unmap(jrdev, edesc, req);
-
+	req_ctx->akcipher_op_done = rsa_pub_done;
+	return akcipher_enqueue_req(jrdev, edesc->hw_desc, rsa_pub_done,
+				    req, edesc);
 init_fail:
 	rsa_io_unmap(jrdev, edesc, req);
 	kfree(edesc);
@@ -654,6 +739,7 @@  static int caam_rsa_dec_priv_f1(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
 	struct device *jrdev = ctx->dev;
 	struct rsa_edesc *edesc;
 	int ret;
@@ -671,13 +757,9 @@  static int caam_rsa_dec_priv_f1(struct akcipher_request *req)
 	/* Initialize Job Descriptor */
 	init_rsa_priv_f1_desc(edesc->hw_desc, &edesc->pdb.priv_f1);
 
-	ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f_done,
-			      &edesc->jrentry);
-	if (ret == -EINPROGRESS)
-		return ret;
-
-	rsa_priv_f1_unmap(jrdev, edesc, req);
-
+	req_ctx->akcipher_op_done = rsa_priv_f_done;
+	return akcipher_enqueue_req(jrdev, edesc->hw_desc, rsa_priv_f_done,
+				    req, edesc);
 init_fail:
 	rsa_io_unmap(jrdev, edesc, req);
 	kfree(edesc);
@@ -688,6 +770,7 @@  static int caam_rsa_dec_priv_f2(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
 	struct device *jrdev = ctx->dev;
 	struct rsa_edesc *edesc;
 	int ret;
@@ -705,13 +788,9 @@  static int caam_rsa_dec_priv_f2(struct akcipher_request *req)
 	/* Initialize Job Descriptor */
 	init_rsa_priv_f2_desc(edesc->hw_desc, &edesc->pdb.priv_f2);
 
-	ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f_done,
-			      &edesc->jrentry);
-	if (ret == -EINPROGRESS)
-		return ret;
-
-	rsa_priv_f2_unmap(jrdev, edesc, req);
-
+	req_ctx->akcipher_op_done = rsa_priv_f_done;
+	return akcipher_enqueue_req(jrdev, edesc->hw_desc, rsa_priv_f_done,
+				    req, edesc);
 init_fail:
 	rsa_io_unmap(jrdev, edesc, req);
 	kfree(edesc);
@@ -722,6 +801,7 @@  static int caam_rsa_dec_priv_f3(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
 	struct device *jrdev = ctx->dev;
 	struct rsa_edesc *edesc;
 	int ret;
@@ -739,13 +819,9 @@  static int caam_rsa_dec_priv_f3(struct akcipher_request *req)
 	/* Initialize Job Descriptor */
 	init_rsa_priv_f3_desc(edesc->hw_desc, &edesc->pdb.priv_f3);
 
-	ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f_done,
-			      &edesc->jrentry);
-	if (ret == -EINPROGRESS)
-		return ret;
-
-	rsa_priv_f3_unmap(jrdev, edesc, req);
-
+	req_ctx->akcipher_op_done = rsa_priv_f_done;
+	return akcipher_enqueue_req(jrdev, edesc->hw_desc, rsa_priv_f_done,
+				    req, edesc);
 init_fail:
 	rsa_io_unmap(jrdev, edesc, req);
 	kfree(edesc);
@@ -1037,6 +1113,10 @@  static int caam_rsa_init_tfm(struct crypto_akcipher *tfm)
 		return -ENOMEM;
 	}
 
+	ctx->enginectx.op.do_one_request = akcipher_do_one_req;
+
+	akcipher_set_reqsize(tfm, sizeof(struct caam_rsa_req_ctx));
+
 	return 0;
 }
 
diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h
index e0b1076..8e6d7e0 100644
--- a/drivers/crypto/caam/caampkc.h
+++ b/drivers/crypto/caam/caampkc.h
@@ -13,6 +13,7 @@ 
 #include "compat.h"
 #include "intern.h"
 #include "pdb.h"
+#include <crypto/engine.h>
 
 /**
  * caam_priv_key_form - CAAM RSA private key representation
@@ -88,11 +89,13 @@  struct caam_rsa_key {
 
 /**
  * caam_rsa_ctx - per session context.
+ * @enginectx   : crypto engine context
  * @key         : RSA key in DMA zone
  * @dev         : device structure
  * @padding_dma : dma address of padding, for adding it to the input
  */
 struct caam_rsa_ctx {
+	struct crypto_engine_ctx enginectx;
 	struct caam_rsa_key key;
 	struct device *dev;
 	dma_addr_t padding_dma;
@@ -104,11 +107,16 @@  struct caam_rsa_ctx {
  * @src           : input scatterlist (stripped of leading zeros)
  * @fixup_src     : input scatterlist (that might be stripped of leading zeros)
  * @fixup_src_len : length of the fixup_src input scatterlist
+ * @edesc         : s/w-extended rsa descriptor
+ * @akcipher_op_done : callback used when operation is done
  */
 struct caam_rsa_req_ctx {
 	struct scatterlist src[2];
 	struct scatterlist *fixup_src;
 	unsigned int fixup_src_len;
+	struct rsa_edesc *edesc;
+	void (*akcipher_op_done)(struct device *jrdev, u32 *desc, u32 err,
+				 void *context);
 };
 
 /*