[rdma-next,v5,08/12] RDMA/efa: Implement functions that submit and complete admin commands

Message ID	1555330841-7904-9-git-send-email-galpress@amazon.com (mailing list archive)
State	Superseded
Headers	show Return-Path: <linux-rdma-owner@kernel.org> From: Gal Pressman <galpress@amazon.com> To: Jason Gunthorpe <jgg@ziepe.ca>, Doug Ledford <dledford@redhat.com> CC: Yossi Leybovich <sleybo@amazon.com>, Alexander Matushevsky <matua@amazon.com>, Leah Shalev <shalevl@amazon.com>, Dave Goodell <goodell@amazon.com>, Brian Barrett <bbarrett@amazon.com>, <linux-rdma@vger.kernel.org>, Sean Hefty <sean.hefty@intel.com>, "Dennis Dalessandro" <dennis.dalessandro@intel.com>, Leon Romanovsky <leon@kernel.org>, Christoph Hellwig <hch@infradead.org>, Parav Pandit <parav@mellanox.com>, Sagi Grimberg <sagi@grimberg.me>, Steve Wise <larrystevenwise@gmail.com>, Shiraz Saleem <shiraz.saleem@intel.com>, "Gal Pressman" <galpress@amazon.com> Subject: [PATCH rdma-next v5 08/12] RDMA/efa: Implement functions that submit and complete admin commands Date: Mon, 15 Apr 2019 15:20:37 +0300 Message-ID: <1555330841-7904-9-git-send-email-galpress@amazon.com> In-Reply-To: <1555330841-7904-1-git-send-email-galpress@amazon.com> References: <1555330841-7904-1-git-send-email-galpress@amazon.com> MIME-Version: 1.0 Content-Type: text/plain Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk
Series	RDMA/efa: Elastic Fabric Adapter (EFA) driver \| expand [rdma-next,v5,00/12] RDMA/efa: Elastic Fabric Adapter (EFA) driver [rdma-next,v5,01/12] RDMA/core: Introduce RDMA subsystem ibdev_* print functions [rdma-next,v5,02/12] RDMA: Add EFA related definitions [rdma-next,v5,03/12] RDMA/efa: Add EFA device definitions [rdma-next,v5,04/12] RDMA/efa: Add the efa.h header file [rdma-next,v5,05/12] RDMA/efa: Add the efa_com.h file [rdma-next,v5,06/12] RDMA/efa: Add the com service API definitions [rdma-next,v5,07/12] RDMA/efa: Add the ABI definitions [rdma-next,v5,08/12] RDMA/efa: Implement functions that submit and complete admin commands [rdma-next,v5,09/12] RDMA/efa: Add common command handlers [rdma-next,v5,10/12] RDMA/efa: Add EFA verbs implementation [rdma-next,v5,11/12] RDMA/efa: Add the efa module [rdma-next,v5,12/12] RDMA/efa: Add driver to Kconfig/Makefile

diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c new file mode 100644 index 000000000000..630f2b10003a --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -0,0 +1,1162 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause +/* + * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#include "efa_com.h" +#include "efa_regs_defs.h" + +#define ADMIN_CMD_TIMEOUT_US 30000000 /* usecs */ + +#define EFA_REG_READ_TIMEOUT_US 50000 /* usecs */ +#define EFA_MMIO_READ_INVALID 0xffffffff + +#define EFA_POLL_INTERVAL_MS 100 /* msecs */ + +#define EFA_ASYNC_QUEUE_DEPTH 16 +#define EFA_ADMIN_QUEUE_DEPTH 32 + +#define MIN_EFA_VER\ + ((EFA_ADMIN_API_VERSION_MAJOR << EFA_REGS_VERSION_MAJOR_VERSION_SHIFT) | \ + (EFA_ADMIN_API_VERSION_MINOR & EFA_REGS_VERSION_MINOR_VERSION_MASK)) + +#define EFA_CTRL_MAJOR 0 +#define EFA_CTRL_MINOR 0 +#define EFA_CTRL_SUB_MINOR 1 + +#define MIN_EFA_CTRL_VER \ + (((EFA_CTRL_MAJOR) << \ + (EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \ + ((EFA_CTRL_MINOR) << \ + (EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \ + (EFA_CTRL_SUB_MINOR)) + +#define EFA_DMA_ADDR_TO_UINT32_LOW(x) ((u32)((u64)(x))) +#define EFA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32)) + +#define EFA_REGS_ADMIN_INTR_MASK 1 + +enum efa_cmd_status { + EFA_CMD_SUBMITTED, + EFA_CMD_COMPLETED, + /* Abort - canceled by the driver */ + EFA_CMD_ABORTED, +}; + +struct efa_comp_ctx { + struct completion wait_event; + struct efa_admin_acq_entry *user_cqe; + u32 comp_size; + enum efa_cmd_status status; + /* status from the device */ + u8 comp_status; + u8 cmd_opcode; + u8 occupied; +}; + +static const char *efa_com_cmd_str(u8 cmd) +{ +#define EFA_CMD_STR_CASE(_cmd) case EFA_ADMIN_##_cmd: return #_cmd + + switch (cmd) { + EFA_CMD_STR_CASE(CREATE_QP); + EFA_CMD_STR_CASE(MODIFY_QP); + EFA_CMD_STR_CASE(QUERY_QP); + EFA_CMD_STR_CASE(DESTROY_QP); + EFA_CMD_STR_CASE(CREATE_AH); + EFA_CMD_STR_CASE(DESTROY_AH); + EFA_CMD_STR_CASE(REG_MR); + EFA_CMD_STR_CASE(DEREG_MR); + EFA_CMD_STR_CASE(CREATE_CQ); + EFA_CMD_STR_CASE(DESTROY_CQ); + EFA_CMD_STR_CASE(GET_FEATURE); + EFA_CMD_STR_CASE(SET_FEATURE); + EFA_CMD_STR_CASE(GET_STATS); + EFA_CMD_STR_CASE(ALLOC_PD); + EFA_CMD_STR_CASE(DEALLOC_PD); + EFA_CMD_STR_CASE(ALLOC_UAR); + EFA_CMD_STR_CASE(DEALLOC_UAR); + default: return "unknown command opcode"; + } +#undef EFA_CMD_STR_CASE +} + +static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset) +{ + struct efa_com_mmio_read *mmio_read = &edev->mmio_read; + struct efa_admin_mmio_req_read_less_resp *read_resp; + unsigned long exp_time; + u32 mmio_read_reg; + u32 err; + + read_resp = mmio_read->read_resp; + + spin_lock(&mmio_read->lock); + mmio_read->seq_num++; + + /* trash DMA req_id to identify when hardware is done */ + read_resp->req_id = mmio_read->seq_num + 0x9aL; + mmio_read_reg = (offset << EFA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) & + EFA_REGS_MMIO_REG_READ_REG_OFF_MASK; + mmio_read_reg |= mmio_read->seq_num & + EFA_REGS_MMIO_REG_READ_REQ_ID_MASK; + + writel(mmio_read_reg, edev->reg_bar + EFA_REGS_MMIO_REG_READ_OFF); + + exp_time = jiffies + usecs_to_jiffies(mmio_read->mmio_read_timeout); + do { + if (READ_ONCE(read_resp->req_id) == mmio_read->seq_num) + break; + udelay(1); + } while (time_is_after_jiffies(exp_time)); + + if (read_resp->req_id != mmio_read->seq_num) { + ibdev_err(edev->efa_dev, + "Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n", + mmio_read->seq_num, offset, read_resp->req_id, + read_resp->reg_off); + err = EFA_MMIO_READ_INVALID; + goto out; + } + + if (read_resp->reg_off != offset) { + ibdev_err(edev->efa_dev, + "Reading register failed: wrong offset provided\n"); + err = EFA_MMIO_READ_INVALID; + goto out; + } + + err = read_resp->reg_val; +out: + spin_unlock(&mmio_read->lock); + return err; +} + +static int efa_com_admin_init_sq(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_com_admin_sq *sq = &aq->sq; + u16 size = aq->depth * sizeof(*sq->entries); + u32 addr_high; + u32 addr_low; + u32 aq_caps; + + sq->entries = + dma_alloc_coherent(aq->dmadev, size, &sq->dma_addr, GFP_KERNEL); + if (!sq->entries) + return -ENOMEM; + + spin_lock_init(&sq->lock); + + sq->cc = 0; + sq->pc = 0; + sq->phase = 1; + + sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF); + + addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(sq->dma_addr); + addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(sq->dma_addr); + + writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF); + writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF); + + aq_caps = aq->depth & EFA_REGS_AQ_CAPS_AQ_DEPTH_MASK; + aq_caps |= (sizeof(struct efa_admin_aq_entry) << + EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) & + EFA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK; + + writel(aq_caps, edev->reg_bar + EFA_REGS_AQ_CAPS_OFF); + + return 0; +} + +static int efa_com_admin_init_cq(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_com_admin_cq *cq = &aq->cq; + u16 size = aq->depth * sizeof(*cq->entries); + u32 addr_high; + u32 addr_low; + u32 acq_caps; + + cq->entries = + dma_alloc_coherent(aq->dmadev, size, &cq->dma_addr, GFP_KERNEL); + if (!cq->entries) + return -ENOMEM; + + spin_lock_init(&cq->lock); + + cq->cc = 0; + cq->phase = 1; + + addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(cq->dma_addr); + addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(cq->dma_addr); + + writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF); + writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF); + + acq_caps = aq->depth & EFA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK; + acq_caps |= (sizeof(struct efa_admin_acq_entry) << + EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) & + EFA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK; + acq_caps |= (aq->msix_vector_idx << + EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_SHIFT) & + EFA_REGS_ACQ_CAPS_ACQ_MSIX_VECTOR_MASK; + + writel(acq_caps, edev->reg_bar + EFA_REGS_ACQ_CAPS_OFF); + + return 0; +} + +static int efa_com_admin_init_aenq(struct efa_com_dev *edev, + struct efa_aenq_handlers *aenq_handlers) +{ + struct efa_com_aenq *aenq = &edev->aenq; + u32 addr_low, addr_high, aenq_caps; + u16 size; + + if (!aenq_handlers) { + ibdev_err(edev->efa_dev, "aenq handlers pointer is NULL\n"); + return -EINVAL; + } + + size = EFA_ASYNC_QUEUE_DEPTH * sizeof(*aenq->entries); + aenq->entries = dma_alloc_coherent(edev->dmadev, size, &aenq->dma_addr, + GFP_KERNEL); + if (!aenq->entries) + return -ENOMEM; + + aenq->aenq_handlers = aenq_handlers; + aenq->depth = EFA_ASYNC_QUEUE_DEPTH; + aenq->cc = 0; + aenq->phase = 1; + + addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr); + addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr); + + writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF); + writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF); + + aenq_caps = aenq->depth & EFA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK; + aenq_caps |= (sizeof(struct efa_admin_aenq_entry) << + EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) & + EFA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK; + aenq_caps |= (aenq->msix_vector_idx + << EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_SHIFT) & + EFA_REGS_AENQ_CAPS_AENQ_MSIX_VECTOR_MASK; + writel(aenq_caps, edev->reg_bar + EFA_REGS_AENQ_CAPS_OFF); + + /* + * Init cons_db to mark that all entries in the queue + * are initially available + */ + writel(edev->aenq.cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF); + + return 0; +} + +/* ID to be used with efa_com_get_comp_ctx */ +static u16 efa_com_alloc_ctx_id(struct efa_com_admin_queue *aq) +{ + u16 ctx_id; + + spin_lock(&aq->comp_ctx_lock); + ctx_id = aq->comp_ctx_pool[aq->comp_ctx_pool_next]; + aq->comp_ctx_pool_next++; + spin_unlock(&aq->comp_ctx_lock); + + return ctx_id; +} + +static void efa_com_dealloc_ctx_id(struct efa_com_admin_queue *aq, + u16 ctx_id) +{ + spin_lock(&aq->comp_ctx_lock); + aq->comp_ctx_pool_next--; + aq->comp_ctx_pool[aq->comp_ctx_pool_next] = ctx_id; + spin_unlock(&aq->comp_ctx_lock); +} + +static inline void efa_com_put_comp_ctx(struct efa_com_admin_queue *aq, + struct efa_comp_ctx *comp_ctx) +{ + u16 comp_id = comp_ctx->user_cqe->acq_common_descriptor.command & + EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK; + + ibdev_dbg(aq->efa_dev, "Putting completion command_id %d\n", comp_id); + comp_ctx->occupied = 0; + efa_com_dealloc_ctx_id(aq, comp_id); +} + +static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq, + u16 command_id, bool capture) +{ + if (command_id >= aq->depth) { + ibdev_err(aq->efa_dev, + "command id is larger than the queue size. cmd_id: %u queue size %d\n", + command_id, aq->depth); + return NULL; + } + + if (aq->comp_ctx[command_id].occupied && capture) { + ibdev_err(aq->efa_dev, "Completion context is occupied\n"); + return NULL; + } + + if (capture) { + aq->comp_ctx[command_id].occupied = 1; + ibdev_dbg(aq->efa_dev, "Taking completion ctxt command_id %d\n", + command_id); + } + + return &aq->comp_ctx[command_id]; +} + +static struct efa_comp_ctx *__efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, + struct efa_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct efa_admin_acq_entry *comp, + size_t comp_size_in_bytes) +{ + struct efa_comp_ctx *comp_ctx; + u16 queue_size_mask; + u16 ctx_id; + u16 pi; + + queue_size_mask = aq->depth - 1; + pi = aq->sq.pc & queue_size_mask; + + ctx_id = efa_com_alloc_ctx_id(aq); + + cmd->aq_common_descriptor.flags |= aq->sq.phase & + EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK; + + cmd->aq_common_descriptor.command_id |= ctx_id & + EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK; + + comp_ctx = efa_com_get_comp_ctx(aq, ctx_id, true); + if (!comp_ctx) { + efa_com_dealloc_ctx_id(aq, ctx_id); + return ERR_PTR(-EINVAL); + } + + comp_ctx->status = EFA_CMD_SUBMITTED; + comp_ctx->comp_size = comp_size_in_bytes; + comp_ctx->user_cqe = comp; + comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode; + + reinit_completion(&comp_ctx->wait_event); + + memcpy(&aq->sq.entries[pi], cmd, cmd_size_in_bytes); + + aq->sq.pc++; + atomic64_inc(&aq->stats.submitted_cmd); + + if ((aq->sq.pc & queue_size_mask) == 0) + aq->sq.phase = !aq->sq.phase; + + /* barrier not needed in case of writel */ + writel(aq->sq.pc, aq->sq.db_addr); + + return comp_ctx; +} + +static inline int efa_com_init_comp_ctxt(struct efa_com_admin_queue *aq) +{ + size_t pool_size = aq->depth * sizeof(*aq->comp_ctx_pool); + size_t size = aq->depth * sizeof(struct efa_comp_ctx); + struct efa_comp_ctx *comp_ctx; + u16 i; + + aq->comp_ctx = devm_kzalloc(aq->dmadev, size, GFP_KERNEL); + aq->comp_ctx_pool = devm_kzalloc(aq->dmadev, pool_size, GFP_KERNEL); + if (!aq->comp_ctx || !aq->comp_ctx_pool) { + devm_kfree(aq->dmadev, aq->comp_ctx_pool); + devm_kfree(aq->dmadev, aq->comp_ctx); + return -ENOMEM; + } + + for (i = 0; i < aq->depth; i++) { + comp_ctx = efa_com_get_comp_ctx(aq, i, false); + if (comp_ctx) + init_completion(&comp_ctx->wait_event); + + aq->comp_ctx_pool[i] = i; + } + + spin_lock_init(&aq->comp_ctx_lock); + + aq->comp_ctx_pool_next = 0; + + return 0; +} + +static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue *aq, + struct efa_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct efa_admin_acq_entry *comp, + size_t comp_size_in_bytes) +{ + struct efa_comp_ctx *comp_ctx; + + spin_lock(&aq->sq.lock); + if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) { + ibdev_err(aq->efa_dev, "Admin queue is closed\n"); + spin_unlock(&aq->sq.lock); + return ERR_PTR(-ENODEV); + } + + comp_ctx = __efa_com_submit_admin_cmd(aq, cmd, cmd_size_in_bytes, comp, + comp_size_in_bytes); + spin_unlock(&aq->sq.lock); + if (IS_ERR(comp_ctx)) + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + + return comp_ctx; +} + +static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq, + struct efa_admin_acq_entry *cqe) +{ + struct efa_comp_ctx *comp_ctx; + u16 cmd_id; + + cmd_id = cqe->acq_common_descriptor.command & + EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK; + + comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false); + if (!comp_ctx) { + ibdev_err(aq->efa_dev, + "comp_ctx is NULL. Changing the admin queue running state\n"); + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + return; + } + + comp_ctx->status = EFA_CMD_COMPLETED; + comp_ctx->comp_status = cqe->acq_common_descriptor.status; + if (comp_ctx->user_cqe) + memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size); + + if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state)) + complete(&comp_ctx->wait_event); +} + +static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq) +{ + struct efa_admin_acq_entry *cqe; + u16 queue_size_mask; + u16 comp_num = 0; + u8 phase; + u16 ci; + + queue_size_mask = aq->depth - 1; + + ci = aq->cq.cc & queue_size_mask; + phase = aq->cq.phase; + + cqe = &aq->cq.entries[ci]; + + /* Go over all the completions */ + while ((READ_ONCE(cqe->acq_common_descriptor.flags) & + EFA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) { + /* + * Do not read the rest of the completion entry before the + * phase bit was validated + */ + dma_rmb(); + efa_com_handle_single_admin_completion(aq, cqe); + + ci++; + comp_num++; + if (ci == aq->depth) { + ci = 0; + phase = !phase; + } + + cqe = &aq->cq.entries[ci]; + } + + aq->cq.cc += comp_num; + aq->cq.phase = phase; + aq->sq.cc += comp_num; + atomic64_add(comp_num, &aq->stats.completed_cmd); +} + +static int efa_com_comp_status_to_errno(struct efa_com_admin_queue *aq, + u8 comp_status) +{ + switch (comp_status) { + case EFA_ADMIN_SUCCESS: + return 0; + case EFA_ADMIN_RESOURCE_ALLOCATION_FAILURE: + return -ENOMEM; + case EFA_ADMIN_UNSUPPORTED_OPCODE: + return -EOPNOTSUPP; + case EFA_ADMIN_BAD_OPCODE: + case EFA_ADMIN_MALFORMED_REQUEST: + case EFA_ADMIN_ILLEGAL_PARAMETER: + case EFA_ADMIN_UNKNOWN_ERROR: + return -EINVAL; + default: + return -EINVAL; + } +} + +static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_ctx, + struct efa_com_admin_queue *aq) +{ + unsigned long timeout; + unsigned long flags; + int err; + + timeout = jiffies + usecs_to_jiffies(aq->completion_timeout); + + while (1) { + spin_lock_irqsave(&aq->cq.lock, flags); + efa_com_handle_admin_completion(aq); + spin_unlock_irqrestore(&aq->cq.lock, flags); + + if (comp_ctx->status != EFA_CMD_SUBMITTED) + break; + + if (time_is_before_jiffies(timeout)) { + ibdev_err(aq->efa_dev, + "Wait for completion (polling) timeout\n"); + /* EFA didn't have any completion */ + atomic64_inc(&aq->stats.no_completion); + + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + err = -ETIME; + goto out; + } + + msleep(aq->poll_interval); + } + + if (comp_ctx->status == EFA_CMD_ABORTED) { + ibdev_err(aq->efa_dev, "Command was aborted\n"); + atomic64_inc(&aq->stats.aborted_cmd); + err = -ENODEV; + goto out; + } + + WARN_ONCE(comp_ctx->status != EFA_CMD_COMPLETED, + "Invalid completion status %d\n", comp_ctx->status); + + err = efa_com_comp_status_to_errno(aq, comp_ctx->comp_status); +out: + efa_com_put_comp_ctx(aq, comp_ctx); + return err; +} + +static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *comp_ctx, + struct efa_com_admin_queue *aq) +{ + unsigned long flags; + int err; + + wait_for_completion_timeout(&comp_ctx->wait_event, + usecs_to_jiffies(aq->completion_timeout)); + + /* + * In case the command wasn't completed find out the root cause. + * There might be 2 kinds of errors + * 1) No completion (timeout reached) + * 2) There is completion but the device didn't get any msi-x interrupt. + */ + if (comp_ctx->status == EFA_CMD_SUBMITTED) { + spin_lock_irqsave(&aq->cq.lock, flags); + efa_com_handle_admin_completion(aq); + spin_unlock_irqrestore(&aq->cq.lock, flags); + + atomic64_inc(&aq->stats.no_completion); + + if (comp_ctx->status == EFA_CMD_COMPLETED) + ibdev_err(aq->efa_dev, + "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n", + efa_com_cmd_str(comp_ctx->cmd_opcode), + comp_ctx->cmd_opcode, comp_ctx->status, + comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc); + else + ibdev_err(aq->efa_dev, + "The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n", + efa_com_cmd_str(comp_ctx->cmd_opcode), + comp_ctx->cmd_opcode, comp_ctx->status, + comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc); + + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + err = -ETIME; + goto out; + } + + err = efa_com_comp_status_to_errno(aq, comp_ctx->comp_status); +out: + efa_com_put_comp_ctx(aq, comp_ctx); + return err; +} + +/* + * There are two types to wait for completion. + * Polling mode - wait until the completion is available. + * Async mode - wait on wait queue until the completion is ready + * (or the timeout expired). + * It is expected that the IRQ called efa_com_handle_admin_completion + * to mark the completions. + */ +static int efa_com_wait_and_process_admin_cq(struct efa_comp_ctx *comp_ctx, + struct efa_com_admin_queue *aq) +{ + if (test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state)) + return efa_com_wait_and_process_admin_cq_polling(comp_ctx, aq); + + return efa_com_wait_and_process_admin_cq_interrupts(comp_ctx, aq); +} + +/** + * efa_com_cmd_exec - Execute admin command + * @aq: admin queue. + * @cmd: the admin command to execute. + * @cmd_size: the command size. + * @comp: command completion return entry. + * @comp_size: command completion size. + * Submit an admin command and then wait until the device will return a + * completion. + * The completion will be copied into comp. + * + * @return - 0 on success, negative value on failure. + */ +int efa_com_cmd_exec(struct efa_com_admin_queue *aq, + struct efa_admin_aq_entry *cmd, + size_t cmd_size, + struct efa_admin_acq_entry *comp, + size_t comp_size) +{ + struct efa_comp_ctx *comp_ctx; + int err; + + might_sleep(); + + /* In case of queue FULL */ + down(&aq->avail_cmds); + + ibdev_dbg(aq->efa_dev, "%s (opcode %d)\n", + efa_com_cmd_str(cmd->aq_common_descriptor.opcode), + cmd->aq_common_descriptor.opcode); + comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size); + if (IS_ERR(comp_ctx)) { + ibdev_err(aq->efa_dev, + "Failed to submit command %s (opcode %u) err %ld\n", + efa_com_cmd_str(cmd->aq_common_descriptor.opcode), + cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx)); + + up(&aq->avail_cmds); + return PTR_ERR(comp_ctx); + } + + err = efa_com_wait_and_process_admin_cq(comp_ctx, aq); + if (err) + ibdev_err(aq->efa_dev, + "Failed to process command %s (opcode %u) comp_status %d err %d\n", + efa_com_cmd_str(cmd->aq_common_descriptor.opcode), + cmd->aq_common_descriptor.opcode, + comp_ctx->comp_status, err); + + up(&aq->avail_cmds); + + return err; +} + +/** + * efa_com_abort_admin_commands - Abort all the outstanding admin commands. + * @edev: EFA communication layer struct + * + * This method aborts all the outstanding admin commands. + * The caller should then call efa_com_wait_for_abort_completion to make sure + * all the commands were completed. + */ +static void efa_com_abort_admin_commands(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_comp_ctx *comp_ctx; + unsigned long flags; + u16 i; + + spin_lock(&aq->sq.lock); + spin_lock_irqsave(&aq->cq.lock, flags); + for (i = 0; i < aq->depth; i++) { + comp_ctx = efa_com_get_comp_ctx(aq, i, false); + if (!comp_ctx) + break; + + comp_ctx->status = EFA_CMD_ABORTED; + + complete(&comp_ctx->wait_event); + } + spin_unlock_irqrestore(&aq->cq.lock, flags); + spin_unlock(&aq->sq.lock); +} + +/** + * efa_com_wait_for_abort_completion - Wait for admin commands abort. + * @edev: EFA communication layer struct + * + * This method wait until all the outstanding admin commands will be completed. + */ +static void efa_com_wait_for_abort_completion(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + int i; + + /* all mine */ + for (i = 0; i < aq->depth; i++) + down(&aq->avail_cmds); + + /* let it go */ + for (i = 0; i < aq->depth; i++) + up(&aq->avail_cmds); +} + +static void efa_com_admin_flush(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + + clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + + efa_com_abort_admin_commands(edev); + efa_com_wait_for_abort_completion(edev); +} + +/** + * efa_com_admin_destroy - Destroy the admin and the async events queues. + * @edev: EFA communication layer struct + */ +void efa_com_admin_destroy(struct efa_com_dev *edev) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_com_aenq *aenq = &edev->aenq; + struct efa_com_admin_cq *cq = &aq->cq; + struct efa_com_admin_sq *sq = &aq->sq; + u16 size; + + efa_com_admin_flush(edev); + + devm_kfree(edev->dmadev, aq->comp_ctx_pool); + devm_kfree(edev->dmadev, aq->comp_ctx); + + size = aq->depth * sizeof(*sq->entries); + dma_free_coherent(edev->dmadev, size, sq->entries, sq->dma_addr); + + size = aq->depth * sizeof(*cq->entries); + dma_free_coherent(edev->dmadev, size, cq->entries, cq->dma_addr); + + size = aenq->depth * sizeof(*aenq->entries); + dma_free_coherent(edev->dmadev, size, aenq->entries, aenq->dma_addr); +} + +/** + * efa_com_set_admin_polling_mode - Set the admin completion queue polling mode + * @edev: EFA communication layer struct + * @polling: Enable/Disable polling mode + * + * Set the admin completion mode. + */ +void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling) +{ + u32 mask_value = 0; + + if (polling) + mask_value = EFA_REGS_ADMIN_INTR_MASK; + + writel(mask_value, edev->reg_bar + EFA_REGS_INTR_MASK_OFF); + if (polling) + set_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state); + else + clear_bit(EFA_AQ_STATE_POLLING_BIT, &edev->aq.state); +} + +static void efa_com_stats_init(struct efa_com_dev *edev) +{ + atomic64_t *s = (atomic64_t *)&edev->aq.stats; + int i; + + for (i = 0; i < sizeof(edev->aq.stats) / sizeof(*s); i++, s++) + atomic64_set(s, 0); +} + +/** + * efa_com_admin_init - Init the admin and the async queues + * @edev: EFA communication layer struct + * @aenq_handlers: Those handlers to be called upon event. + * + * Initialize the admin submission and completion queues. + * Initialize the asynchronous events notification queues. + * + * @return - 0 on success, negative value on failure. + */ +int efa_com_admin_init(struct efa_com_dev *edev, + struct efa_aenq_handlers *aenq_handlers) +{ + struct efa_com_admin_queue *aq = &edev->aq; + u32 timeout; + u32 dev_sts; + u32 cap; + int err; + + dev_sts = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF); + if (!(dev_sts & EFA_REGS_DEV_STS_READY_MASK)) { + ibdev_err(edev->efa_dev, + "Device isn't ready, abort com init 0x%08x\n", + dev_sts); + return -ENODEV; + } + + aq->depth = EFA_ADMIN_QUEUE_DEPTH; + + aq->dmadev = edev->dmadev; + aq->efa_dev = edev->efa_dev; + set_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state); + + sema_init(&aq->avail_cmds, aq->depth); + + efa_com_stats_init(edev); + + err = efa_com_init_comp_ctxt(aq); + if (err) + return err; + + err = efa_com_admin_init_sq(edev); + if (err) + goto err_destroy_comp_ctxt; + + err = efa_com_admin_init_cq(edev); + if (err) + goto err_destroy_sq; + + efa_com_set_admin_polling_mode(edev, false); + + err = efa_com_admin_init_aenq(edev, aenq_handlers); + if (err) + goto err_destroy_cq; + + cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF); + timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >> + EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT; + if (timeout) + /* the resolution of timeout reg is 100ms */ + aq->completion_timeout = timeout * 100000; + else + aq->completion_timeout = ADMIN_CMD_TIMEOUT_US; + + aq->poll_interval = EFA_POLL_INTERVAL_MS; + + set_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); + + return 0; + +err_destroy_cq: + dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->cq.entries), + aq->cq.entries, aq->cq.dma_addr); +err_destroy_sq: + dma_free_coherent(edev->dmadev, aq->depth * sizeof(*aq->sq.entries), + aq->sq.entries, aq->sq.dma_addr); +err_destroy_comp_ctxt: + devm_kfree(edev->dmadev, aq->comp_ctx); + + return err; +} + +/** + * efa_com_admin_q_comp_intr_handler - admin queue interrupt handler + * @edev: EFA communication layer struct + * + * This method goes over the admin completion queue and wakes up + * all the pending threads that wait on the commands wait event. + * + * @note: Should be called after MSI-X interrupt. + */ +void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev) +{ + unsigned long flags; + + spin_lock_irqsave(&edev->aq.cq.lock, flags); + efa_com_handle_admin_completion(&edev->aq); + spin_unlock_irqrestore(&edev->aq.cq.lock, flags); +} + +/* + * efa_handle_specific_aenq_event: + * return the handler that is relevant to the specific event group + */ +static efa_aenq_handler efa_com_get_specific_aenq_cb(struct efa_com_dev *edev, + u16 group) +{ + struct efa_aenq_handlers *aenq_handlers = edev->aenq.aenq_handlers; + + if (group < EFA_MAX_HANDLERS && aenq_handlers->handlers[group]) + return aenq_handlers->handlers[group]; + + return aenq_handlers->unimplemented_handler; +} + +/** + * efa_com_aenq_intr_handler - AENQ interrupt handler + * @edev: EFA communication layer struct + * @data: Data of interrupt handler. + * + * Go over the async event notification queue and call the proper aenq handler. + */ +void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data) +{ + struct efa_admin_aenq_common_desc *aenq_common; + struct efa_com_aenq *aenq = &edev->aenq; + struct efa_admin_aenq_entry *aenq_e; + efa_aenq_handler handler_cb; + u32 processed = 0; + u8 phase; + u32 ci; + + ci = aenq->cc & (aenq->depth - 1); + phase = aenq->phase; + aenq_e = &aenq->entries[ci]; /* Get first entry */ + aenq_common = &aenq_e->aenq_common_desc; + + /* Go over all the events */ + while ((READ_ONCE(aenq_common->flags) & + EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == phase) { + /* + * Do not read the rest of the completion entry before the + * phase bit was validated + */ + dma_rmb(); + + /* Handle specific event*/ + handler_cb = efa_com_get_specific_aenq_cb(edev, + aenq_common->group); + handler_cb(data, aenq_e); /* call the actual event handler*/ + + /* Get next event entry */ + ci++; + processed++; + + if (ci == aenq->depth) { + ci = 0; + phase = !phase; + } + aenq_e = &aenq->entries[ci]; + aenq_common = &aenq_e->aenq_common_desc; + } + + aenq->cc += processed; + aenq->phase = phase; + + /* Don't update aenq doorbell if there weren't any processed events */ + if (!processed) + return; + + /* barrier not needed in case of writel */ + writel(aenq->cc, edev->reg_bar + EFA_REGS_AENQ_CONS_DB_OFF); +} + +static void efa_com_mmio_reg_read_resp_addr_init(struct efa_com_dev *edev) +{ + struct efa_com_mmio_read *mmio_read = &edev->mmio_read; + u32 addr_high; + u32 addr_low; + + /* dma_addr_bits is unknown at this point */ + addr_high = (mmio_read->read_resp_dma_addr >> 32) & GENMASK(31, 0); + addr_low = mmio_read->read_resp_dma_addr & GENMASK(31, 0); + + writel(addr_high, edev->reg_bar + EFA_REGS_MMIO_RESP_HI_OFF); + writel(addr_low, edev->reg_bar + EFA_REGS_MMIO_RESP_LO_OFF); +} + +int efa_com_mmio_reg_read_init(struct efa_com_dev *edev) +{ + struct efa_com_mmio_read *mmio_read = &edev->mmio_read; + + spin_lock_init(&mmio_read->lock); + mmio_read->read_resp = + dma_alloc_coherent(edev->dmadev, sizeof(*mmio_read->read_resp), + &mmio_read->read_resp_dma_addr, GFP_KERNEL); + if (!mmio_read->read_resp) + return -ENOMEM; + + efa_com_mmio_reg_read_resp_addr_init(edev); + + mmio_read->read_resp->req_id = 0; + mmio_read->seq_num = 0; + mmio_read->mmio_read_timeout = EFA_REG_READ_TIMEOUT_US; + + return 0; +} + +void efa_com_mmio_reg_read_destroy(struct efa_com_dev *edev) +{ + struct efa_com_mmio_read *mmio_read = &edev->mmio_read; + + dma_free_coherent(edev->dmadev, sizeof(*mmio_read->read_resp), + mmio_read->read_resp, mmio_read->read_resp_dma_addr); +} + +int efa_com_validate_version(struct efa_com_dev *edev) +{ + u32 ctrl_ver_masked; + u32 ctrl_ver; + u32 ver; + + /* + * Make sure the EFA version and the controller version are at least + * as the driver expects + */ + ver = efa_com_reg_read32(edev, EFA_REGS_VERSION_OFF); + ctrl_ver = efa_com_reg_read32(edev, + EFA_REGS_CONTROLLER_VERSION_OFF); + + ibdev_dbg(edev->efa_dev, "efa device version: %d.%d\n", + (ver & EFA_REGS_VERSION_MAJOR_VERSION_MASK) >> + EFA_REGS_VERSION_MAJOR_VERSION_SHIFT, + ver & EFA_REGS_VERSION_MINOR_VERSION_MASK); + + if (ver < MIN_EFA_VER) { + ibdev_err(edev->efa_dev, + "EFA version is lower than the minimal version the driver supports\n"); + return -EOPNOTSUPP; + } + + ibdev_dbg(edev->efa_dev, + "efa controller version: %d.%d.%d implementation version %d\n", + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >> + EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT, + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >> + EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT, + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK), + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >> + EFA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT); + + ctrl_ver_masked = + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) | + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) | + (ctrl_ver & EFA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK); + + /* Validate the ctrl version without the implementation ID */ + if (ctrl_ver_masked < MIN_EFA_CTRL_VER) { + ibdev_err(edev->efa_dev, + "EFA ctrl version is lower than the minimal ctrl version the driver supports\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +/** + * efa_com_get_dma_width - Retrieve physical dma address width the device + * supports. + * @edev: EFA communication layer struct + * + * Retrieve the maximum physical address bits the device can handle. + * + * @return: > 0 on Success and negative value otherwise. + */ +int efa_com_get_dma_width(struct efa_com_dev *edev) +{ + u32 caps = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF); + int width; + + width = (caps & EFA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >> + EFA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT; + + ibdev_dbg(edev->efa_dev, "DMA width: %d\n", width); + + if (width < 32 || width > 64) { + ibdev_err(edev->efa_dev, "DMA width illegal value: %d\n", width); + return -EINVAL; + } + + edev->dma_addr_bits = width; + + return width; +} + +static int wait_for_reset_state(struct efa_com_dev *edev, u32 timeout, + u16 exp_state) +{ + u32 val, i; + + for (i = 0; i < timeout; i++) { + val = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF); + + if ((val & EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) == + exp_state) + return 0; + + ibdev_dbg(edev->efa_dev, "Reset indication val %d\n", val); + msleep(EFA_POLL_INTERVAL_MS); + } + + return -ETIME; +} + +/** + * efa_com_dev_reset - Perform device FLR to the device. + * @edev: EFA communication layer struct + * @reset_reason: Specify what is the trigger for the reset in case of an error. + * + * @return - 0 on success, negative value on failure. + */ +int efa_com_dev_reset(struct efa_com_dev *edev, + enum efa_regs_reset_reason_types reset_reason) +{ + u32 stat, timeout, cap, reset_val; + int err; + + stat = efa_com_reg_read32(edev, EFA_REGS_DEV_STS_OFF); + cap = efa_com_reg_read32(edev, EFA_REGS_CAPS_OFF); + + if (!(stat & EFA_REGS_DEV_STS_READY_MASK)) { + ibdev_err(edev->efa_dev, + "Device isn't ready, can't reset device\n"); + return -EINVAL; + } + + timeout = (cap & EFA_REGS_CAPS_RESET_TIMEOUT_MASK) >> + EFA_REGS_CAPS_RESET_TIMEOUT_SHIFT; + if (!timeout) { + ibdev_err(edev->efa_dev, "Invalid timeout value\n"); + return -EINVAL; + } + + /* start reset */ + reset_val = EFA_REGS_DEV_CTL_DEV_RESET_MASK; + reset_val |= (reset_reason << EFA_REGS_DEV_CTL_RESET_REASON_SHIFT) & + EFA_REGS_DEV_CTL_RESET_REASON_MASK; + writel(reset_val, edev->reg_bar + EFA_REGS_DEV_CTL_OFF); + + /* reset clears the mmio readless address, restore it */ + efa_com_mmio_reg_read_resp_addr_init(edev); + + err = wait_for_reset_state(edev, timeout, + EFA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK); + if (err) { + ibdev_err(edev->efa_dev, "Reset indication didn't turn on\n"); + return err; + } + + /* reset done */ + writel(0, edev->reg_bar + EFA_REGS_DEV_CTL_OFF); + err = wait_for_reset_state(edev, timeout, 0); + if (err) { + ibdev_err(edev->efa_dev, "Reset indication didn't turn off\n"); + return err; + } + + timeout = (cap & EFA_REGS_CAPS_ADMIN_CMD_TO_MASK) >> + EFA_REGS_CAPS_ADMIN_CMD_TO_SHIFT; + if (timeout) + /* the resolution of timeout reg is 100ms */ + edev->aq.completion_timeout = timeout * 100000; + else + edev->aq.completion_timeout = ADMIN_CMD_TIMEOUT_US; + + return 0; +}

[rdma-next,v5,08/12] RDMA/efa: Implement functions that submit and complete admin commands

Commit Message

Patch