@@ -23,6 +23,19 @@ struct crypto_scomp;
static const struct crypto_type crypto_acomp_type;
+struct acomp_save_req_state {
+ struct list_head head;
+ struct acomp_req *req0;
+ struct acomp_req *cur;
+ int (*op)(struct acomp_req *req);
+ crypto_completion_t compl;
+ void *data;
+};
+
+static void acomp_reqchain_done(void *data, int err);
+static int acomp_save_req(struct acomp_req *req, crypto_completion_t cplt);
+static void acomp_restore_req(struct acomp_req *req);
+
static inline struct acomp_alg *__crypto_acomp_alg(struct crypto_alg *alg)
{
return container_of(alg, struct acomp_alg, calg.base);
@@ -123,6 +136,277 @@ struct crypto_acomp *crypto_alloc_acomp_node(const char *alg_name, u32 type,
}
EXPORT_SYMBOL_GPL(crypto_alloc_acomp_node);
+static int acomp_save_req(struct acomp_req *req, crypto_completion_t cplt)
+{
+ struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+ struct acomp_save_req_state *state;
+ gfp_t gfp;
+ u32 flags;
+
+ if (!acomp_is_async(tfm))
+ return 0;
+
+ flags = acomp_request_flags(req);
+ gfp = (flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC;
+ state = kmalloc(sizeof(*state), gfp);
+ if (!state)
+ return -ENOMEM;
+
+ state->compl = req->base.complete;
+ state->data = req->base.data;
+ state->req0 = req;
+
+ req->base.complete = cplt;
+ req->base.data = state;
+
+ return 0;
+}
+
+static void acomp_restore_req(struct acomp_req *req)
+{
+ struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+ struct acomp_save_req_state *state;
+
+ if (!acomp_is_async(tfm))
+ return;
+
+ state = req->base.data;
+
+ req->base.complete = state->compl;
+ req->base.data = state->data;
+ kfree(state);
+}
+
+static int acomp_reqchain_finish(struct acomp_save_req_state *state,
+ int err, u32 mask)
+{
+ struct acomp_req *req0 = state->req0;
+ struct acomp_req *req = state->cur;
+ struct acomp_req *n;
+
+ req->base.err = err;
+
+ if (req == req0)
+ INIT_LIST_HEAD(&req->base.list);
+ else
+ list_add_tail(&req->base.list, &req0->base.list);
+
+ list_for_each_entry_safe(req, n, &state->head, base.list) {
+ list_del_init(&req->base.list);
+
+ req->base.flags &= mask;
+ req->base.complete = acomp_reqchain_done;
+ req->base.data = state;
+ state->cur = req;
+ err = state->op(req);
+
+ if (err == -EINPROGRESS) {
+ if (!list_empty(&state->head))
+ err = -EBUSY;
+ goto out;
+ }
+
+ if (err == -EBUSY)
+ goto out;
+
+ req->base.err = err;
+ list_add_tail(&req->base.list, &req0->base.list);
+ }
+
+ acomp_restore_req(req0);
+
+out:
+ return err;
+}
+
+static void acomp_reqchain_done(void *data, int err)
+{
+ struct acomp_save_req_state *state = data;
+ crypto_completion_t compl = state->compl;
+
+ data = state->data;
+
+ if (err == -EINPROGRESS) {
+ if (!list_empty(&state->head))
+ return;
+ goto notify;
+ }
+
+ err = acomp_reqchain_finish(state, err, CRYPTO_TFM_REQ_MAY_BACKLOG);
+ if (err == -EBUSY)
+ return;
+
+notify:
+ compl(data, err);
+}
+
+int acomp_do_req_chain(struct acomp_req *req,
+ int (*op)(struct acomp_req *req))
+{
+ struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+ struct acomp_save_req_state *state;
+ struct acomp_save_req_state state0;
+ int err = 0;
+
+ if (!acomp_request_chained(req) || list_empty(&req->base.list) ||
+ !crypto_acomp_req_chain(tfm))
+ return op(req);
+
+ state = &state0;
+
+ if (acomp_is_async(tfm)) {
+ err = acomp_save_req(req, acomp_reqchain_done);
+ if (err) {
+ struct acomp_req *r2;
+
+ req->base.err = err;
+ list_for_each_entry(r2, &req->base.list, base.list)
+ r2->base.err = err;
+
+ return err;
+ }
+
+ state = req->base.data;
+ }
+
+ state->op = op;
+ state->cur = req;
+ INIT_LIST_HEAD(&state->head);
+ list_splice(&req->base.list, &state->head);
+
+ err = op(req);
+ if (err == -EBUSY || err == -EINPROGRESS)
+ return -EBUSY;
+
+ return acomp_reqchain_finish(state, err, ~0);
+}
+EXPORT_SYMBOL_GPL(acomp_do_req_chain);
+
+static void acomp_async_reqchain_done(struct acomp_req *req0,
+ struct list_head *state,
+ int (*op_poll)(struct acomp_req *req))
+{
+ struct acomp_req *req, *n;
+ bool req0_done = false;
+ int err;
+
+ while (!list_empty(state)) {
+
+ if (!req0_done) {
+ err = op_poll(req0);
+ if (!(err == -EAGAIN || err == -EINPROGRESS || err == -EBUSY)) {
+ req0->base.err = err;
+ req0_done = true;
+ }
+ }
+
+ list_for_each_entry_safe(req, n, state, base.list) {
+ err = op_poll(req);
+
+ if (err == -EAGAIN || err == -EINPROGRESS || err == -EBUSY)
+ continue;
+
+ req->base.err = err;
+ list_del_init(&req->base.list);
+ list_add_tail(&req->base.list, &req0->base.list);
+ }
+ }
+
+ while (!req0_done) {
+ err = op_poll(req0);
+ if (!(err == -EAGAIN || err == -EINPROGRESS || err == -EBUSY)) {
+ req0->base.err = err;
+ break;
+ }
+ }
+}
+
+static int acomp_async_reqchain_finish(struct acomp_req *req0,
+ struct list_head *state,
+ int (*op_submit)(struct acomp_req *req),
+ int (*op_poll)(struct acomp_req *req))
+{
+ struct acomp_req *req, *n;
+ int err = 0;
+
+ INIT_LIST_HEAD(&req0->base.list);
+
+ list_for_each_entry_safe(req, n, state, base.list) {
+ BUG_ON(req == req0);
+
+ err = op_submit(req);
+
+ if (!(err == -EINPROGRESS || err == -EBUSY)) {
+ req->base.err = err;
+ list_del_init(&req->base.list);
+ list_add_tail(&req->base.list, &req0->base.list);
+ }
+ }
+
+ acomp_async_reqchain_done(req0, state, op_poll);
+
+ return req0->base.err;
+}
+
+int acomp_do_async_req_chain(struct acomp_req *req,
+ int (*op_submit)(struct acomp_req *req),
+ int (*op_poll)(struct acomp_req *req))
+{
+ struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+ struct list_head state;
+ struct acomp_req *r2;
+ int err = 0;
+ void *req0_data = req->base.data;
+
+ if (!acomp_request_chained(req) || list_empty(&req->base.list) ||
+ !acomp_is_async(tfm) || !crypto_acomp_req_chain(tfm)) {
+
+ err = op_submit(req);
+
+ if (err == -EINPROGRESS || err == -EBUSY) {
+ bool req0_done = false;
+
+ while (!req0_done) {
+ err = op_poll(req);
+ if (!(err == -EAGAIN || err == -EINPROGRESS || err == -EBUSY)) {
+ req->base.err = err;
+ break;
+ }
+ }
+ } else {
+ req->base.err = err;
+ }
+
+ req->base.data = req0_data;
+ if (acomp_is_async(tfm))
+ req->base.complete(req->base.data, req->base.err);
+
+ return err;
+ }
+
+ err = op_submit(req);
+ req->base.err = err;
+
+ if (err && !(err == -EINPROGRESS || err == -EBUSY))
+ goto err_prop;
+
+ INIT_LIST_HEAD(&state);
+ list_splice(&req->base.list, &state);
+
+ err = acomp_async_reqchain_finish(req, &state, op_submit, op_poll);
+ req->base.data = req0_data;
+ req->base.complete(req->base.data, req->base.err);
+
+ return err;
+
+err_prop:
+ list_for_each_entry(r2, &req->base.list, base.list)
+ r2->base.err = err;
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(acomp_do_async_req_chain);
+
struct acomp_req *acomp_request_alloc(struct crypto_acomp *acomp)
{
struct crypto_tfm *tfm = crypto_acomp_tfm(acomp);
@@ -206,6 +206,7 @@ static inline void acomp_request_set_callback(struct acomp_req *req,
req->base.data = data;
req->base.flags &= CRYPTO_ACOMP_ALLOC_OUTPUT;
req->base.flags |= flgs & ~CRYPTO_ACOMP_ALLOC_OUTPUT;
+ req->base.flags &= ~CRYPTO_TFM_REQ_CHAIN;
}
/**
@@ -237,6 +238,46 @@ static inline void acomp_request_set_params(struct acomp_req *req,
req->flags |= CRYPTO_ACOMP_ALLOC_OUTPUT;
}
+static inline u32 acomp_request_flags(struct acomp_req *req)
+{
+ return req->base.flags;
+}
+
+static inline void acomp_reqchain_init(struct acomp_req *req,
+ u32 flags, crypto_completion_t compl,
+ void *data)
+{
+ acomp_request_set_callback(req, flags, compl, data);
+ crypto_reqchain_init(&req->base);
+}
+
+static inline void acomp_reqchain_clear(struct acomp_req *req, void *data)
+{
+ struct crypto_wait *wait = (struct crypto_wait *)data;
+ reinit_completion(&wait->completion);
+ crypto_reqchain_clear(&req->base);
+ acomp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ crypto_req_done, data);
+}
+
+static inline void acomp_request_chain(struct acomp_req *req,
+ struct acomp_req *head)
+{
+ crypto_request_chain(&req->base, &head->base);
+}
+
+int acomp_do_req_chain(struct acomp_req *req,
+ int (*op)(struct acomp_req *req));
+
+int acomp_do_async_req_chain(struct acomp_req *req,
+ int (*op_submit)(struct acomp_req *req),
+ int (*op_poll)(struct acomp_req *req));
+
+static inline int acomp_request_err(struct acomp_req *req)
+{
+ return req->base.err;
+}
+
/**
* crypto_acomp_compress() -- Invoke asynchronous compress operation
*
@@ -271,4 +271,14 @@ static inline u32 crypto_tfm_alg_type(struct crypto_tfm *tfm)
return tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK;
}
+static inline bool crypto_request_chained(struct crypto_async_request *req)
+{
+ return req->flags & CRYPTO_TFM_REQ_CHAIN;
+}
+
+static inline bool crypto_tfm_req_chain(struct crypto_tfm *tfm)
+{
+ return tfm->__crt_alg->cra_flags & CRYPTO_ALG_REQ_CHAIN;
+}
+
#endif /* _CRYPTO_ALGAPI_H */
@@ -84,6 +84,16 @@ static inline void __acomp_request_free(struct acomp_req *req)
kfree_sensitive(req);
}
+static inline bool acomp_request_chained(struct acomp_req *req)
+{
+ return crypto_request_chained(&req->base);
+}
+
+static inline bool crypto_acomp_req_chain(struct crypto_acomp *tfm)
+{
+ return crypto_tfm_req_chain(&tfm->base);
+}
+
/**
* crypto_register_acomp() -- Register asynchronous compression algorithm
*
@@ -13,6 +13,8 @@
#define _LINUX_CRYPTO_H
#include <linux/completion.h>
+#include <linux/errno.h>
+#include <linux/list.h>
#include <linux/refcount.h>
#include <linux/slab.h>
#include <linux/types.h>
@@ -124,6 +126,9 @@
*/
#define CRYPTO_ALG_FIPS_INTERNAL 0x00020000
+/* Set if the algorithm supports request chains. */
+#define CRYPTO_ALG_REQ_CHAIN 0x00040000
+
/*
* Transform masks and values (for crt_flags).
*/
@@ -133,6 +138,7 @@
#define CRYPTO_TFM_REQ_FORBID_WEAK_KEYS 0x00000100
#define CRYPTO_TFM_REQ_MAY_SLEEP 0x00000200
#define CRYPTO_TFM_REQ_MAY_BACKLOG 0x00000400
+#define CRYPTO_TFM_REQ_CHAIN 0x00000800
/*
* Miscellaneous stuff.
@@ -174,6 +180,7 @@ struct crypto_async_request {
struct crypto_tfm *tfm;
u32 flags;
+ int err;
};
/**
@@ -540,5 +547,29 @@ int crypto_comp_decompress(struct crypto_comp *tfm,
const u8 *src, unsigned int slen,
u8 *dst, unsigned int *dlen);
+static inline void crypto_reqchain_init(struct crypto_async_request *req)
+{
+ req->err = -EINPROGRESS;
+ req->flags |= CRYPTO_TFM_REQ_CHAIN;
+ INIT_LIST_HEAD(&req->list);
+}
+
+static inline void crypto_reqchain_clear(struct crypto_async_request *req)
+{
+ req->flags &= ~CRYPTO_TFM_REQ_CHAIN;
+}
+
+static inline void crypto_request_chain(struct crypto_async_request *req,
+ struct crypto_async_request *head)
+{
+ req->err = -EINPROGRESS;
+ list_add_tail(&req->list, &head->list);
+}
+
+static inline bool crypto_tfm_is_async(struct crypto_tfm *tfm)
+{
+ return tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
+}
+
#endif /* _LINUX_CRYPTO_H */
This patch is based on Herbert Xu's request chaining for ahash ("[PATCH 2/6] crypto: hash - Add request chaining API") [1]. The generic framework for request chaining that's provided in the ahash implementation has been used as reference to develop a similar synchronous request chaining framework for crypto_acomp. Furthermore, this commit develops an asynchronous request chaining framework and API that iaa_crypto can use for request chaining with parallelism, in order to fully benefit from Intel IAA's multiple compress/decompress engines in hardware. This allows us to gain significant latency improvements with IAA batching as compared to synchronous request chaining. Usage of acomp request chaining API: ==================================== Any crypto_acomp compressor can avail of request chaining as follows: by calling one of Step 1: Create request chain: Request 0 (the first req in the chain): void acomp_reqchain_init(struct acomp_req *req, u32 flags, crypto_completion_t compl, void *data); Subsequent requests: void acomp_request_chain(struct acomp_req *req, struct acomp_req *head); Step 2: Process the request chain using the specified compress/decompress "op": 2.a) Synchronous: the chain of requests is processed in series: int acomp_do_req_chain(struct acomp_req *req, int (*op)(struct acomp_req *req)); 2.b) Asynchronous: the chain of requests is processed in parallel using a submit-poll paradigm: int acomp_do_async_req_chain(struct acomp_req *req, int (*op_submit)(struct acomp_req *req), int (*op_poll)(struct acomp_req *req)); Request chaining will be used in subsequent patches to implement compress/decompress batching in the iaa_crypto driver for the two supported IAA driver sync_modes: sync_mode = 'sync' will use (2.a), sync_mode = 'async' will use (2.b). These files are directly re-used from [1] which is not yet merged: include/crypto/algapi.h include/linux/crypto.h Hence, I am adding Herbert as the co-developer of this acomp request chaining patch. [1]: https://lore.kernel.org/linux-crypto/677614fbdc70b31df2e26483c8d2cd1510c8af91.1730021644.git.herbert@gondor.apana.org.au/ Suggested-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: Kanchana P Sridhar <kanchana.p.sridhar@intel.com> Co-developed-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: --- crypto/acompress.c | 284 ++++++++++++++++++++++++++++ include/crypto/acompress.h | 41 ++++ include/crypto/algapi.h | 10 + include/crypto/internal/acompress.h | 10 + include/linux/crypto.h | 31 +++ 5 files changed, 376 insertions(+)