diff mbox series

[v2,3/7] crypto: acomp - Add request chaining and virtual addresses

Message ID a11883ded326c4f4f80dcf0307ad05fd8e31abc7.1741080140.git.herbert@gondor.apana.org.au (mailing list archive)
State New
Headers show
Series crypto: acomp - Add request chaining and virtual address support | expand

Commit Message

Herbert Xu March 4, 2025, 9:25 a.m. UTC
This adds request chaining and virtual address support to the
acomp interface.

It is identical to the ahash interface, except that a new flag
CRYPTO_ACOMP_REQ_NONDMA has been added to indicate that the
virtual addresses are not suitable for DMA.  This is because
all existing and potential acomp users can provide memory that
is suitable for DMA so there is no need for a fall-back copy
path.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/acompress.c                  | 201 ++++++++++++++++++++++++++++
 include/crypto/acompress.h          |  89 ++++++++++--
 include/crypto/internal/acompress.h |  22 +++
 3 files changed, 299 insertions(+), 13 deletions(-)

Comments

Sridhar, Kanchana P March 4, 2025, 9:59 p.m. UTC | #1
> -----Original Message-----
> From: Herbert Xu <herbert@gondor.apana.org.au>
> Sent: Tuesday, March 4, 2025 1:25 AM
> To: Linux Crypto Mailing List <linux-crypto@vger.kernel.org>
> Cc: linux-mm@kvack.org; Yosry Ahmed <yosry.ahmed@linux.dev>; Sridhar,
> Kanchana P <kanchana.p.sridhar@intel.com>
> Subject: [v2 PATCH 3/7] crypto: acomp - Add request chaining and virtual
> addresses
> 
> This adds request chaining and virtual address support to the
> acomp interface.
> 
> It is identical to the ahash interface, except that a new flag
> CRYPTO_ACOMP_REQ_NONDMA has been added to indicate that the
> virtual addresses are not suitable for DMA.  This is because
> all existing and potential acomp users can provide memory that
> is suitable for DMA so there is no need for a fall-back copy
> path.
> 
> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
> ---
>  crypto/acompress.c                  | 201 ++++++++++++++++++++++++++++
>  include/crypto/acompress.h          |  89 ++++++++++--
>  include/crypto/internal/acompress.h |  22 +++
>  3 files changed, 299 insertions(+), 13 deletions(-)
> 
> diff --git a/crypto/acompress.c b/crypto/acompress.c
> index 30176316140a..d2103d4e42cc 100644
> --- a/crypto/acompress.c
> +++ b/crypto/acompress.c
> @@ -23,6 +23,8 @@ struct crypto_scomp;
> 
>  static const struct crypto_type crypto_acomp_type;
> 
> +static void acomp_reqchain_done(void *data, int err);
> +
>  static inline struct acomp_alg *__crypto_acomp_alg(struct crypto_alg *alg)
>  {
>  	return container_of(alg, struct acomp_alg, calg.base);
> @@ -153,6 +155,205 @@ void acomp_request_free(struct acomp_req *req)
>  }
>  EXPORT_SYMBOL_GPL(acomp_request_free);
> 
> +static bool acomp_request_has_nondma(struct acomp_req *req)
> +{
> +	struct acomp_req *r2;
> +
> +	if (acomp_request_isnondma(req))
> +		return true;
> +
> +	list_for_each_entry(r2, &req->base.list, base.list)
> +		if (acomp_request_isnondma(r2))
> +			return true;
> +
> +	return false;
> +}
> +
> +static void acomp_save_req(struct acomp_req *req, crypto_completion_t
> cplt)
> +{
> +	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
> +	struct acomp_req_chain *state = &req->chain;
> +
> +	if (!acomp_is_async(tfm))
> +		return;
> +
> +	state->compl = req->base.complete;
> +	state->data = req->base.data;
> +	req->base.complete = cplt;
> +	req->base.data = state;
> +	state->req0 = req;
> +}
> +
> +static void acomp_restore_req(struct acomp_req_chain *state)
> +{
> +	struct acomp_req *req = state->req0;
> +	struct crypto_acomp *tfm;
> +
> +	tfm = crypto_acomp_reqtfm(req);
> +	if (!acomp_is_async(tfm))
> +		return;
> +
> +	req->base.complete = state->compl;
> +	req->base.data = state->data;
> +}
> +
> +static void acomp_reqchain_virt(struct acomp_req_chain *state, int err)
> +{
> +	struct acomp_req *req = state->cur;
> +	unsigned int slen = req->slen;
> +	unsigned int dlen = req->dlen;
> +
> +	req->base.err = err;
> +	if (!state->src)
> +		return;
> +
> +	acomp_request_set_virt(req, state->src, state->dst, slen, dlen);
> +	state->src = NULL;
> +}
> +
> +static int acomp_reqchain_finish(struct acomp_req_chain *state,
> +				 int err, u32 mask)
> +{
> +	struct acomp_req *req0 = state->req0;
> +	struct acomp_req *req = state->cur;
> +	struct acomp_req *n;
> +
> +	acomp_reqchain_virt(state, err);

Unless I am missing something, this seems to be future-proofing, based
on the initial checks you've implemented in acomp_do_req_chain().

> +
> +	if (req != req0)
> +		list_add_tail(&req->base.list, &req0->base.list);
> +
> +	list_for_each_entry_safe(req, n, &state->head, base.list) {
> +		list_del_init(&req->base.list);
> +
> +		req->base.flags &= mask;
> +		req->base.complete = acomp_reqchain_done;
> +		req->base.data = state;
> +		state->cur = req;
> +
> +		if (acomp_request_isvirt(req)) {
> +			unsigned int slen = req->slen;
> +			unsigned int dlen = req->dlen;
> +			const u8 *svirt = req->svirt;
> +			u8 *dvirt = req->dvirt;
> +
> +			state->src = svirt;
> +			state->dst = dvirt;
> +
> +			sg_init_one(&state->ssg, svirt, slen);
> +			sg_init_one(&state->dsg, dvirt, dlen);
> +
> +			acomp_request_set_params(req, &state->ssg,
> &state->dsg,
> +						 slen, dlen);
> +		}
> +
> +		err = state->op(req);
> +
> +		if (err == -EINPROGRESS) {
> +			if (!list_empty(&state->head))
> +				err = -EBUSY;
> +			goto out;
> +		}
> +
> +		if (err == -EBUSY)
> +			goto out;

This is a fully synchronous way of processing the request chain, and
will not work for iaa_crypto's submit-then-poll-for-completions paradigm,
essential for us to process the compressions in parallel in hardware.
Without parallelism, we will not derive the full benefits of IAA.

Would you be willing to incorporate the acomp_do_async_req_chain()
that I have implemented in v8 of my patch-series [1], to enable the iaa_crypto
driver's async way of processing the request chain to get the parallelism,
and/or adapt your implementation to enable this?

Better still, if you agree that the virtual address support is entirely future-proofing,
I would like to request you to consider reviewing and improving my well-validated
implementation of request chaining in [1], with the goal of merging it in with
parallel/series support for the reqchain, and introduce virtual address support
at a later time. 

[1] https://patchwork.kernel.org/project/linux-mm/patch/20250303084724.6490-2-kanchana.p.sridhar@intel.com/


> +
> +		acomp_reqchain_virt(state, err);

Is this really needed? From what I can understand, the important thing this
call does for the implementation, is to set the req->base.err. It seems like
compute overhead (which matters for kernel users like zswap) for setting
the request's error status.

In general, the calls to virtual address support are a bit confusing, since you
check right upfront in acomp_do_req_chain()
"if (acomp_request_has_nondma(req)) return -EINVAL".

Imo, it appears that this is all we need until there are in kernel users that
require the virtual address future-proofing. Please correct me if I am missing
something significant.

Also, is my understanding correct that zswap code that sets up the SG lists
for compress/decompress are not impacted by this?


> +		list_add_tail(&req->base.list, &req0->base.list);
> +	}
> +
> +	acomp_restore_req(state);
> +
> +out:
> +	return err;
> +}
> +
> +static void acomp_reqchain_done(void *data, int err)
> +{
> +	struct acomp_req_chain *state = data;
> +	crypto_completion_t compl = state->compl;
> +
> +	data = state->data;
> +
> +	if (err == -EINPROGRESS) {
> +		if (!list_empty(&state->head))
> +			return;
> +		goto notify;
> +	}
> +
> +	err = acomp_reqchain_finish(state, err,
> CRYPTO_TFM_REQ_MAY_BACKLOG);
> +	if (err == -EBUSY)
> +		return;
> +
> +notify:
> +	compl(data, err);
> +}
> +
> +static int acomp_do_req_chain(struct acomp_req *req,
> +			      int (*op)(struct acomp_req *req))
> +{
> +	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
> +	struct acomp_req_chain *state = &req->chain;
> +	int err;
> +
> +	if (crypto_acomp_req_chain(tfm) ||
> +	    (!acomp_request_chained(req) && !acomp_request_isvirt(req)))
> +		return op(req);

Isn't this a bug? If an algorithm opts-in and sets CRYPTO_ALG_REQ_CHAIN
in its cra_flags, the above statement will always be true, the "op" will be
called on the first request, and this will return. Am I missing something?

> +
> +	/*
> +	 * There are no in-kernel users that do this.  If and ever
> +	 * such users come into being then we could add a fall-back
> +	 * path.
> +	 */
> +	if (acomp_request_has_nondma(req))
> +		return -EINVAL;

As mentioned earlier, is this sufficient for now, and is the virtual address
support really future-proofing?

> +
> +	if (acomp_is_async(tfm)) {
> +		acomp_save_req(req, acomp_reqchain_done);
> +		state = req->base.data;
> +	}
> +
> +	state->op = op;
> +	state->cur = req;
> +	state->src = NULL;
> +	INIT_LIST_HEAD(&state->head);
> +	list_splice_init(&req->base.list, &state->head);
> +
> +	if (acomp_request_isvirt(req)) {

Based on the above check for acomp_request_has_nondma(), it should never
get here, IIUC?

In general, can you shed some light on how you envision zswap code to
change based on this patchset?

Thanks,
Kanchana

> +		unsigned int slen = req->slen;
> +		unsigned int dlen = req->dlen;
> +		const u8 *svirt = req->svirt;
> +		u8 *dvirt = req->dvirt;
> +
> +		state->src = svirt;
> +		state->dst = dvirt;
> +
> +		sg_init_one(&state->ssg, svirt, slen);
> +		sg_init_one(&state->dsg, dvirt, dlen);
> +
> +		acomp_request_set_params(req, &state->ssg, &state->dsg,
> +					 slen, dlen);
> +	}
> +
> +	err = op(req);
> +	if (err == -EBUSY || err == -EINPROGRESS)
> +		return -EBUSY;
> +
> +	return acomp_reqchain_finish(state, err, ~0);
> +}
> +
> +int crypto_acomp_compress(struct acomp_req *req)
> +{
> +	return acomp_do_req_chain(req, crypto_acomp_reqtfm(req)-
> >compress);
> +}
> +EXPORT_SYMBOL_GPL(crypto_acomp_compress);
> +
> +int crypto_acomp_decompress(struct acomp_req *req)
> +{
> +	return acomp_do_req_chain(req, crypto_acomp_reqtfm(req)-
> >decompress);
> +}
> +EXPORT_SYMBOL_GPL(crypto_acomp_decompress);
> +
>  void comp_prepare_alg(struct comp_alg_common *alg)
>  {
>  	struct crypto_alg *base = &alg->base;
> diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
> index b6d5136e689d..15bb13e47f8b 100644
> --- a/include/crypto/acompress.h
> +++ b/include/crypto/acompress.h
> @@ -12,10 +12,34 @@
>  #include <linux/atomic.h>
>  #include <linux/container_of.h>
>  #include <linux/crypto.h>
> +#include <linux/scatterlist.h>
> +#include <linux/types.h>
> 
>  #define CRYPTO_ACOMP_ALLOC_OUTPUT	0x00000001
> +
> +/* Set this bit for virtual address instead of SG list. */
> +#define CRYPTO_ACOMP_REQ_VIRT		0x00000002
> +
> +/* Set this bit for if virtual address buffer cannot be used for DMA. */
> +#define CRYPTO_ACOMP_REQ_NONDMA		0x00000004
> +
>  #define CRYPTO_ACOMP_DST_MAX		131072
> 
> +struct acomp_req;
> +
> +struct acomp_req_chain {
> +	struct list_head head;
> +	struct acomp_req *req0;
> +	struct acomp_req *cur;
> +	int (*op)(struct acomp_req *req);
> +	crypto_completion_t compl;
> +	void *data;
> +	struct scatterlist ssg;
> +	struct scatterlist dsg;
> +	const u8 *src;
> +	u8 *dst;
> +};
> +
>  /**
>   * struct acomp_req - asynchronous (de)compression request
>   *
> @@ -24,14 +48,24 @@
>   * @dst:	Destination data
>   * @slen:	Size of the input buffer
>   * @dlen:	Size of the output buffer and number of bytes produced
> + * @chain:	Private API code data, do not use
>   * @__ctx:	Start of private context data
>   */
>  struct acomp_req {
>  	struct crypto_async_request base;
> -	struct scatterlist *src;
> -	struct scatterlist *dst;
> +	union {
> +		struct scatterlist *src;
> +		const u8 *svirt;
> +	};
> +	union {
> +		struct scatterlist *dst;
> +		u8 *dvirt;
> +	};
>  	unsigned int slen;
>  	unsigned int dlen;
> +
> +	struct acomp_req_chain chain;
> +
>  	void *__ctx[] CRYPTO_MINALIGN_ATTR;
>  };
> 
> @@ -200,10 +234,14 @@ static inline void
> acomp_request_set_callback(struct acomp_req *req,
>  					      crypto_completion_t cmpl,
>  					      void *data)
>  {
> +	u32 keep = CRYPTO_ACOMP_ALLOC_OUTPUT |
> CRYPTO_ACOMP_REQ_VIRT;
> +
>  	req->base.complete = cmpl;
>  	req->base.data = data;
> -	req->base.flags &= CRYPTO_ACOMP_ALLOC_OUTPUT;
> -	req->base.flags |= flgs & ~CRYPTO_ACOMP_ALLOC_OUTPUT;
> +	req->base.flags &= keep;
> +	req->base.flags |= flgs & ~keep;
> +
> +	crypto_reqchain_init(&req->base);
>  }
> 
>  /**
> @@ -230,11 +268,42 @@ static inline void
> acomp_request_set_params(struct acomp_req *req,
>  	req->slen = slen;
>  	req->dlen = dlen;
> 
> -	req->base.flags &= ~CRYPTO_ACOMP_ALLOC_OUTPUT;
> +	req->base.flags &= ~(CRYPTO_ACOMP_ALLOC_OUTPUT |
> CRYPTO_ACOMP_REQ_VIRT);
>  	if (!req->dst)
>  		req->base.flags |= CRYPTO_ACOMP_ALLOC_OUTPUT;
>  }
> 
> +/**
> + * acomp_request_set_virt() -- Sets virtual address request parameters
> + *
> + * Sets virtual address parameters required by an acomp operation
> + *
> + * @req:	asynchronous compress request
> + * @src:	virtual address pointer to input buffer
> + * @dst:	virtual address pointer to output buffer.
> + * @slen:	size of the input buffer
> + * @dlen:	size of the output buffer.
> + */
> +static inline void acomp_request_set_virt(struct acomp_req *req,
> +					  const u8 *src, u8 *dst,
> +					  unsigned int slen,
> +					  unsigned int dlen)
> +{
> +	req->svirt = src;
> +	req->dvirt = dst;
> +	req->slen = slen;
> +	req->dlen = dlen;
> +
> +	req->base.flags &= ~CRYPTO_ACOMP_ALLOC_OUTPUT;
> +	req->base.flags |= CRYPTO_ACOMP_REQ_VIRT;
> +}
> +
> +static inline void acomp_request_chain(struct acomp_req *req,
> +				       struct acomp_req *head)
> +{
> +	crypto_request_chain(&req->base, &head->base);
> +}
> +
>  /**
>   * crypto_acomp_compress() -- Invoke asynchronous compress operation
>   *
> @@ -244,10 +313,7 @@ static inline void acomp_request_set_params(struct
> acomp_req *req,
>   *
>   * Return:	zero on success; error code in case of error
>   */
> -static inline int crypto_acomp_compress(struct acomp_req *req)
> -{
> -	return crypto_acomp_reqtfm(req)->compress(req);
> -}
> +int crypto_acomp_compress(struct acomp_req *req);
> 
>  /**
>   * crypto_acomp_decompress() -- Invoke asynchronous decompress
> operation
> @@ -258,9 +324,6 @@ static inline int crypto_acomp_compress(struct
> acomp_req *req)
>   *
>   * Return:	zero on success; error code in case of error
>   */
> -static inline int crypto_acomp_decompress(struct acomp_req *req)
> -{
> -	return crypto_acomp_reqtfm(req)->decompress(req);
> -}
> +int crypto_acomp_decompress(struct acomp_req *req);
> 
>  #endif
> diff --git a/include/crypto/internal/acompress.h
> b/include/crypto/internal/acompress.h
> index 8831edaafc05..b3b48dea7f2f 100644
> --- a/include/crypto/internal/acompress.h
> +++ b/include/crypto/internal/acompress.h
> @@ -109,4 +109,26 @@ void crypto_unregister_acomp(struct acomp_alg
> *alg);
>  int crypto_register_acomps(struct acomp_alg *algs, int count);
>  void crypto_unregister_acomps(struct acomp_alg *algs, int count);
> 
> +static inline bool acomp_request_chained(struct acomp_req *req)
> +{
> +	return crypto_request_chained(&req->base);
> +}
> +
> +static inline bool acomp_request_isvirt(struct acomp_req *req)
> +{
> +	return req->base.flags & CRYPTO_ACOMP_REQ_VIRT;
> +}
> +
> +static inline bool acomp_request_isnondma(struct acomp_req *req)
> +{
> +	return (req->base.flags &
> +		(CRYPTO_ACOMP_REQ_NONDMA |
> CRYPTO_ACOMP_REQ_VIRT)) ==
> +	       (CRYPTO_ACOMP_REQ_NONDMA |
> CRYPTO_ACOMP_REQ_VIRT);
> +}
> +
> +static inline bool crypto_acomp_req_chain(struct crypto_acomp *tfm)
> +{
> +	return crypto_tfm_req_chain(&tfm->base);
> +}
> +
>  #endif
> --
> 2.39.5
Herbert Xu March 5, 2025, 1:51 a.m. UTC | #2
On Tue, Mar 04, 2025 at 09:59:59PM +0000, Sridhar, Kanchana P wrote:
>
> > +static int acomp_reqchain_finish(struct acomp_req_chain *state,
> > +				 int err, u32 mask)
> > +{
> > +	struct acomp_req *req0 = state->req0;
> > +	struct acomp_req *req = state->cur;
> > +	struct acomp_req *n;
> > +
> > +	acomp_reqchain_virt(state, err);
> 
> Unless I am missing something, this seems to be future-proofing, based
> on the initial checks you've implemented in acomp_do_req_chain().
> 
> > +
> > +	if (req != req0)
> > +		list_add_tail(&req->base.list, &req0->base.list);
> > +
> > +	list_for_each_entry_safe(req, n, &state->head, base.list) {
> > +		list_del_init(&req->base.list);
> > +
> > +		req->base.flags &= mask;
> > +		req->base.complete = acomp_reqchain_done;
> > +		req->base.data = state;
> > +		state->cur = req;
> > +
> > +		if (acomp_request_isvirt(req)) {
> > +			unsigned int slen = req->slen;
> > +			unsigned int dlen = req->dlen;
> > +			const u8 *svirt = req->svirt;
> > +			u8 *dvirt = req->dvirt;
> > +
> > +			state->src = svirt;
> > +			state->dst = dvirt;
> > +
> > +			sg_init_one(&state->ssg, svirt, slen);
> > +			sg_init_one(&state->dsg, dvirt, dlen);
> > +
> > +			acomp_request_set_params(req, &state->ssg,
> > &state->dsg,
> > +						 slen, dlen);
> > +		}
> > +
> > +		err = state->op(req);
> > +
> > +		if (err == -EINPROGRESS) {
> > +			if (!list_empty(&state->head))
> > +				err = -EBUSY;
> > +			goto out;
> > +		}
> > +
> > +		if (err == -EBUSY)
> > +			goto out;
> 
> This is a fully synchronous way of processing the request chain, and
> will not work for iaa_crypto's submit-then-poll-for-completions paradigm,
> essential for us to process the compressions in parallel in hardware.
> Without parallelism, we will not derive the full benefits of IAA.

This function is not for chaining drivers at all.  It's for existing
drivers that do *not* support chaining.

If your driver supports chaining, then it should not come through
acomp_reqchain_finish in the first place.  The acomp_reqchain code
translates chained requests to simple unchained ones for the
existing drivers.  If the driver supports chaining natively, then
it will bypass all this go straight to the driver, where you can do
whatever you want with the chained request.

Cheers,
Sridhar, Kanchana P March 5, 2025, 8:09 p.m. UTC | #3
> -----Original Message-----
> From: Herbert Xu <herbert@gondor.apana.org.au>
> Sent: Tuesday, March 4, 2025 5:51 PM
> To: Sridhar, Kanchana P <kanchana.p.sridhar@intel.com>
> Cc: Linux Crypto Mailing List <linux-crypto@vger.kernel.org>; linux-
> mm@kvack.org; Yosry Ahmed <yosry.ahmed@linux.dev>
> Subject: Re: [v2 PATCH 3/7] crypto: acomp - Add request chaining and virtual
> addresses
> 
> On Tue, Mar 04, 2025 at 09:59:59PM +0000, Sridhar, Kanchana P wrote:
> >
> > > +static int acomp_reqchain_finish(struct acomp_req_chain *state,
> > > +				 int err, u32 mask)
> > > +{
> > > +	struct acomp_req *req0 = state->req0;
> > > +	struct acomp_req *req = state->cur;
> > > +	struct acomp_req *n;
> > > +
> > > +	acomp_reqchain_virt(state, err);
> >
> > Unless I am missing something, this seems to be future-proofing, based
> > on the initial checks you've implemented in acomp_do_req_chain().
> >
> > > +
> > > +	if (req != req0)
> > > +		list_add_tail(&req->base.list, &req0->base.list);
> > > +
> > > +	list_for_each_entry_safe(req, n, &state->head, base.list) {
> > > +		list_del_init(&req->base.list);
> > > +
> > > +		req->base.flags &= mask;
> > > +		req->base.complete = acomp_reqchain_done;
> > > +		req->base.data = state;
> > > +		state->cur = req;
> > > +
> > > +		if (acomp_request_isvirt(req)) {
> > > +			unsigned int slen = req->slen;
> > > +			unsigned int dlen = req->dlen;
> > > +			const u8 *svirt = req->svirt;
> > > +			u8 *dvirt = req->dvirt;
> > > +
> > > +			state->src = svirt;
> > > +			state->dst = dvirt;
> > > +
> > > +			sg_init_one(&state->ssg, svirt, slen);
> > > +			sg_init_one(&state->dsg, dvirt, dlen);
> > > +
> > > +			acomp_request_set_params(req, &state->ssg,
> > > &state->dsg,
> > > +						 slen, dlen);
> > > +		}
> > > +
> > > +		err = state->op(req);
> > > +
> > > +		if (err == -EINPROGRESS) {
> > > +			if (!list_empty(&state->head))
> > > +				err = -EBUSY;
> > > +			goto out;
> > > +		}
> > > +
> > > +		if (err == -EBUSY)
> > > +			goto out;
> >
> > This is a fully synchronous way of processing the request chain, and
> > will not work for iaa_crypto's submit-then-poll-for-completions paradigm,
> > essential for us to process the compressions in parallel in hardware.
> > Without parallelism, we will not derive the full benefits of IAA.
> 
> This function is not for chaining drivers at all.  It's for existing
> drivers that do *not* support chaining.
> 
> If your driver supports chaining, then it should not come through
> acomp_reqchain_finish in the first place.  The acomp_reqchain code
> translates chained requests to simple unchained ones for the
> existing drivers.  If the driver supports chaining natively, then
> it will bypass all this go straight to the driver, where you can do
> whatever you want with the chained request.

Hi Herbert,

Can you please take a look at patches 1 (only the acomp_do_async_req_chain() interface),
2 and 4 in my latest v8 "zswap IAA compress batching" series [2], wherein I have tried to
address your comments [1] given in v6, and let me know if this implements
batching with request chaining as you envision?

[1] https://patchwork.kernel.org/comment/26246560/
[2] https://patchwork.kernel.org/project/linux-mm/list/?series=939487

If this architecture looks Ok from your perspective, then can you please
let me know if "acomp_do_async_req_chain()" would be helpful in general,
outside of the iaa_crypto driver, or would your recommendation be for
this to be specific to iaa_crypto?

Thanks,
Kanchana

> 
> Cheers,
> --
> Email: Herbert Xu <herbert@gondor.apana.org.au>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
diff mbox series

Patch

diff --git a/crypto/acompress.c b/crypto/acompress.c
index 30176316140a..d2103d4e42cc 100644
--- a/crypto/acompress.c
+++ b/crypto/acompress.c
@@ -23,6 +23,8 @@  struct crypto_scomp;
 
 static const struct crypto_type crypto_acomp_type;
 
+static void acomp_reqchain_done(void *data, int err);
+
 static inline struct acomp_alg *__crypto_acomp_alg(struct crypto_alg *alg)
 {
 	return container_of(alg, struct acomp_alg, calg.base);
@@ -153,6 +155,205 @@  void acomp_request_free(struct acomp_req *req)
 }
 EXPORT_SYMBOL_GPL(acomp_request_free);
 
+static bool acomp_request_has_nondma(struct acomp_req *req)
+{
+	struct acomp_req *r2;
+
+	if (acomp_request_isnondma(req))
+		return true;
+
+	list_for_each_entry(r2, &req->base.list, base.list)
+		if (acomp_request_isnondma(r2))
+			return true;
+
+	return false;
+}
+
+static void acomp_save_req(struct acomp_req *req, crypto_completion_t cplt)
+{
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+	struct acomp_req_chain *state = &req->chain;
+
+	if (!acomp_is_async(tfm))
+		return;
+
+	state->compl = req->base.complete;
+	state->data = req->base.data;
+	req->base.complete = cplt;
+	req->base.data = state;
+	state->req0 = req;
+}
+
+static void acomp_restore_req(struct acomp_req_chain *state)
+{
+	struct acomp_req *req = state->req0;
+	struct crypto_acomp *tfm;
+
+	tfm = crypto_acomp_reqtfm(req);
+	if (!acomp_is_async(tfm))
+		return;
+
+	req->base.complete = state->compl;
+	req->base.data = state->data;
+}
+
+static void acomp_reqchain_virt(struct acomp_req_chain *state, int err)
+{
+	struct acomp_req *req = state->cur;
+	unsigned int slen = req->slen;
+	unsigned int dlen = req->dlen;
+
+	req->base.err = err;
+	if (!state->src)
+		return;
+
+	acomp_request_set_virt(req, state->src, state->dst, slen, dlen);
+	state->src = NULL;
+}
+
+static int acomp_reqchain_finish(struct acomp_req_chain *state,
+				 int err, u32 mask)
+{
+	struct acomp_req *req0 = state->req0;
+	struct acomp_req *req = state->cur;
+	struct acomp_req *n;
+
+	acomp_reqchain_virt(state, err);
+
+	if (req != req0)
+		list_add_tail(&req->base.list, &req0->base.list);
+
+	list_for_each_entry_safe(req, n, &state->head, base.list) {
+		list_del_init(&req->base.list);
+
+		req->base.flags &= mask;
+		req->base.complete = acomp_reqchain_done;
+		req->base.data = state;
+		state->cur = req;
+
+		if (acomp_request_isvirt(req)) {
+			unsigned int slen = req->slen;
+			unsigned int dlen = req->dlen;
+			const u8 *svirt = req->svirt;
+			u8 *dvirt = req->dvirt;
+
+			state->src = svirt;
+			state->dst = dvirt;
+
+			sg_init_one(&state->ssg, svirt, slen);
+			sg_init_one(&state->dsg, dvirt, dlen);
+
+			acomp_request_set_params(req, &state->ssg, &state->dsg,
+						 slen, dlen);
+		}
+
+		err = state->op(req);
+
+		if (err == -EINPROGRESS) {
+			if (!list_empty(&state->head))
+				err = -EBUSY;
+			goto out;
+		}
+
+		if (err == -EBUSY)
+			goto out;
+
+		acomp_reqchain_virt(state, err);
+		list_add_tail(&req->base.list, &req0->base.list);
+	}
+
+	acomp_restore_req(state);
+
+out:
+	return err;
+}
+
+static void acomp_reqchain_done(void *data, int err)
+{
+	struct acomp_req_chain *state = data;
+	crypto_completion_t compl = state->compl;
+
+	data = state->data;
+
+	if (err == -EINPROGRESS) {
+		if (!list_empty(&state->head))
+			return;
+		goto notify;
+	}
+
+	err = acomp_reqchain_finish(state, err, CRYPTO_TFM_REQ_MAY_BACKLOG);
+	if (err == -EBUSY)
+		return;
+
+notify:
+	compl(data, err);
+}
+
+static int acomp_do_req_chain(struct acomp_req *req,
+			      int (*op)(struct acomp_req *req))
+{
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+	struct acomp_req_chain *state = &req->chain;
+	int err;
+
+	if (crypto_acomp_req_chain(tfm) ||
+	    (!acomp_request_chained(req) && !acomp_request_isvirt(req)))
+		return op(req);
+
+	/*
+	 * There are no in-kernel users that do this.  If and ever
+	 * such users come into being then we could add a fall-back
+	 * path.
+	 */
+	if (acomp_request_has_nondma(req))
+		return -EINVAL;
+
+	if (acomp_is_async(tfm)) {
+		acomp_save_req(req, acomp_reqchain_done);
+		state = req->base.data;
+	}
+
+	state->op = op;
+	state->cur = req;
+	state->src = NULL;
+	INIT_LIST_HEAD(&state->head);
+	list_splice_init(&req->base.list, &state->head);
+
+	if (acomp_request_isvirt(req)) {
+		unsigned int slen = req->slen;
+		unsigned int dlen = req->dlen;
+		const u8 *svirt = req->svirt;
+		u8 *dvirt = req->dvirt;
+
+		state->src = svirt;
+		state->dst = dvirt;
+
+		sg_init_one(&state->ssg, svirt, slen);
+		sg_init_one(&state->dsg, dvirt, dlen);
+
+		acomp_request_set_params(req, &state->ssg, &state->dsg,
+					 slen, dlen);
+	}
+
+	err = op(req);
+	if (err == -EBUSY || err == -EINPROGRESS)
+		return -EBUSY;
+
+	return acomp_reqchain_finish(state, err, ~0);
+}
+
+int crypto_acomp_compress(struct acomp_req *req)
+{
+	return acomp_do_req_chain(req, crypto_acomp_reqtfm(req)->compress);
+}
+EXPORT_SYMBOL_GPL(crypto_acomp_compress);
+
+int crypto_acomp_decompress(struct acomp_req *req)
+{
+	return acomp_do_req_chain(req, crypto_acomp_reqtfm(req)->decompress);
+}
+EXPORT_SYMBOL_GPL(crypto_acomp_decompress);
+
 void comp_prepare_alg(struct comp_alg_common *alg)
 {
 	struct crypto_alg *base = &alg->base;
diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
index b6d5136e689d..15bb13e47f8b 100644
--- a/include/crypto/acompress.h
+++ b/include/crypto/acompress.h
@@ -12,10 +12,34 @@ 
 #include <linux/atomic.h>
 #include <linux/container_of.h>
 #include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include <linux/types.h>
 
 #define CRYPTO_ACOMP_ALLOC_OUTPUT	0x00000001
+
+/* Set this bit for virtual address instead of SG list. */
+#define CRYPTO_ACOMP_REQ_VIRT		0x00000002
+
+/* Set this bit for if virtual address buffer cannot be used for DMA. */
+#define CRYPTO_ACOMP_REQ_NONDMA		0x00000004
+
 #define CRYPTO_ACOMP_DST_MAX		131072
 
+struct acomp_req;
+
+struct acomp_req_chain {
+	struct list_head head;
+	struct acomp_req *req0;
+	struct acomp_req *cur;
+	int (*op)(struct acomp_req *req);
+	crypto_completion_t compl;
+	void *data;
+	struct scatterlist ssg;
+	struct scatterlist dsg;
+	const u8 *src;
+	u8 *dst;
+};
+
 /**
  * struct acomp_req - asynchronous (de)compression request
  *
@@ -24,14 +48,24 @@ 
  * @dst:	Destination data
  * @slen:	Size of the input buffer
  * @dlen:	Size of the output buffer and number of bytes produced
+ * @chain:	Private API code data, do not use
  * @__ctx:	Start of private context data
  */
 struct acomp_req {
 	struct crypto_async_request base;
-	struct scatterlist *src;
-	struct scatterlist *dst;
+	union {
+		struct scatterlist *src;
+		const u8 *svirt;
+	};
+	union {
+		struct scatterlist *dst;
+		u8 *dvirt;
+	};
 	unsigned int slen;
 	unsigned int dlen;
+
+	struct acomp_req_chain chain;
+
 	void *__ctx[] CRYPTO_MINALIGN_ATTR;
 };
 
@@ -200,10 +234,14 @@  static inline void acomp_request_set_callback(struct acomp_req *req,
 					      crypto_completion_t cmpl,
 					      void *data)
 {
+	u32 keep = CRYPTO_ACOMP_ALLOC_OUTPUT | CRYPTO_ACOMP_REQ_VIRT;
+
 	req->base.complete = cmpl;
 	req->base.data = data;
-	req->base.flags &= CRYPTO_ACOMP_ALLOC_OUTPUT;
-	req->base.flags |= flgs & ~CRYPTO_ACOMP_ALLOC_OUTPUT;
+	req->base.flags &= keep;
+	req->base.flags |= flgs & ~keep;
+
+	crypto_reqchain_init(&req->base);
 }
 
 /**
@@ -230,11 +268,42 @@  static inline void acomp_request_set_params(struct acomp_req *req,
 	req->slen = slen;
 	req->dlen = dlen;
 
-	req->base.flags &= ~CRYPTO_ACOMP_ALLOC_OUTPUT;
+	req->base.flags &= ~(CRYPTO_ACOMP_ALLOC_OUTPUT | CRYPTO_ACOMP_REQ_VIRT);
 	if (!req->dst)
 		req->base.flags |= CRYPTO_ACOMP_ALLOC_OUTPUT;
 }
 
+/**
+ * acomp_request_set_virt() -- Sets virtual address request parameters
+ *
+ * Sets virtual address parameters required by an acomp operation
+ *
+ * @req:	asynchronous compress request
+ * @src:	virtual address pointer to input buffer
+ * @dst:	virtual address pointer to output buffer.
+ * @slen:	size of the input buffer
+ * @dlen:	size of the output buffer.
+ */
+static inline void acomp_request_set_virt(struct acomp_req *req,
+					  const u8 *src, u8 *dst,
+					  unsigned int slen,
+					  unsigned int dlen)
+{
+	req->svirt = src;
+	req->dvirt = dst;
+	req->slen = slen;
+	req->dlen = dlen;
+
+	req->base.flags &= ~CRYPTO_ACOMP_ALLOC_OUTPUT;
+	req->base.flags |= CRYPTO_ACOMP_REQ_VIRT;
+}
+
+static inline void acomp_request_chain(struct acomp_req *req,
+				       struct acomp_req *head)
+{
+	crypto_request_chain(&req->base, &head->base);
+}
+
 /**
  * crypto_acomp_compress() -- Invoke asynchronous compress operation
  *
@@ -244,10 +313,7 @@  static inline void acomp_request_set_params(struct acomp_req *req,
  *
  * Return:	zero on success; error code in case of error
  */
-static inline int crypto_acomp_compress(struct acomp_req *req)
-{
-	return crypto_acomp_reqtfm(req)->compress(req);
-}
+int crypto_acomp_compress(struct acomp_req *req);
 
 /**
  * crypto_acomp_decompress() -- Invoke asynchronous decompress operation
@@ -258,9 +324,6 @@  static inline int crypto_acomp_compress(struct acomp_req *req)
  *
  * Return:	zero on success; error code in case of error
  */
-static inline int crypto_acomp_decompress(struct acomp_req *req)
-{
-	return crypto_acomp_reqtfm(req)->decompress(req);
-}
+int crypto_acomp_decompress(struct acomp_req *req);
 
 #endif
diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h
index 8831edaafc05..b3b48dea7f2f 100644
--- a/include/crypto/internal/acompress.h
+++ b/include/crypto/internal/acompress.h
@@ -109,4 +109,26 @@  void crypto_unregister_acomp(struct acomp_alg *alg);
 int crypto_register_acomps(struct acomp_alg *algs, int count);
 void crypto_unregister_acomps(struct acomp_alg *algs, int count);
 
+static inline bool acomp_request_chained(struct acomp_req *req)
+{
+	return crypto_request_chained(&req->base);
+}
+
+static inline bool acomp_request_isvirt(struct acomp_req *req)
+{
+	return req->base.flags & CRYPTO_ACOMP_REQ_VIRT;
+}
+
+static inline bool acomp_request_isnondma(struct acomp_req *req)
+{
+	return (req->base.flags &
+		(CRYPTO_ACOMP_REQ_NONDMA | CRYPTO_ACOMP_REQ_VIRT)) ==
+	       (CRYPTO_ACOMP_REQ_NONDMA | CRYPTO_ACOMP_REQ_VIRT);
+}
+
+static inline bool crypto_acomp_req_chain(struct crypto_acomp *tfm)
+{
+	return crypto_tfm_req_chain(&tfm->base);
+}
+
 #endif