@@ -53,6 +53,7 @@ struct caam_drv_private_jr {
struct caam_jrentry_info *entinfo; /* Alloc'ed 1 per ring entry */
spinlock_t inplock ____cacheline_aligned; /* Input ring index lock */
int inp_ring_write_index; /* Input index "tail" */
+ u32 inpring_avail; /* Number of free entries in input ring */
int head; /* entinfo (s/w ring) head index */
dma_addr_t *inpring; /* Base of input ring, alloc DMA-safe */
spinlock_t outlock ____cacheline_aligned; /* Output ring index lock */
@@ -170,8 +170,10 @@ static void caam_jr_dequeue(unsigned long devarg)
void (*usercall)(struct device *dev, u32 *desc, u32 status, void *arg);
u32 *userdesc, userstatus;
void *userarg;
+ u32 outring_used = 0;
- while (rd_reg32(&jrp->rregs->outring_used)) {
+ while (outring_used ||
+ (outring_used = rd_reg32(&jrp->rregs->outring_used))) {
head = READ_ONCE(jrp->head);
@@ -236,6 +238,7 @@ static void caam_jr_dequeue(unsigned long devarg)
/* Finally, execute user's callback */
usercall(dev, userdesc, userstatus, userarg);
+ outring_used--;
}
/* reenable / unmask IRQs */
@@ -345,7 +348,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
head = jrp->head;
tail = READ_ONCE(jrp->tail);
- if (!rd_reg32(&jrp->rregs->inpring_avail) ||
+ if (!jrp->inpring_avail ||
CIRC_SPACE(head, tail, JOBR_DEPTH) <= 0) {
spin_unlock_bh(&jrp->inplock);
dma_unmap_single(dev, desc_dma, desc_size, DMA_TO_DEVICE);
@@ -380,6 +383,10 @@ int caam_jr_enqueue(struct device *dev, u32 *desc,
wr_reg32(&jrp->rregs->inpring_jobadd, 1);
+ jrp->inpring_avail--;
+ if (!jrp->inpring_avail)
+ jrp->inpring_avail = rd_reg32(&jrp->rregs->inpring_avail);
+
spin_unlock_bh(&jrp->inplock);
return 0;
@@ -442,6 +449,7 @@ static int caam_jr_init(struct device *dev)
wr_reg32(&jrp->rregs->outring_size, JOBR_DEPTH);
jrp->ringsize = JOBR_DEPTH;
+ jrp->inpring_avail = JOBR_DEPTH;
spin_lock_init(&jrp->inplock);
spin_lock_init(&jrp->outlock);
Instead of reading job ring's occupancy registers for every req/rsp enqueued/dequeued respectively, we read these registers once and store them in memory. After completing a job enqueue/dequeue, we decrement these values. When these values become zero, we refresh the snapshot of job ring's occupancy registers. This eliminates need of expensive device register read operations for every job enqueued and dequeued and hence makes caam_jr_enqueue() and caam_jr_dequeue() faster. Signed-off-by: Vakul Garg <vakul.garg@nxp.com> --- Changes since v1: - Changed 'i/p' in comment to 'input' - Placed variable 'inpring_avail' near other enqueue related variables. drivers/crypto/caam/intern.h | 1 + drivers/crypto/caam/jr.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-)