diff mbox series

[v2,4/4] hw/pvrdma: Add support for SRQ

Message ID 20190326125433.475-5-kamalheib1@gmail.com (mailing list archive)
State New, archived
Headers show
Series pvrdma: Add support for SRQ | expand

Commit Message

Kamal Heib March 26, 2019, 12:54 p.m. UTC
Implement the pvrdma device commands for supporting SRQ

Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
---
 hw/rdma/vmw/pvrdma_cmd.c    | 147 ++++++++++++++++++++++++++++++++++++
 hw/rdma/vmw/pvrdma_main.c   |  16 ++++
 hw/rdma/vmw/pvrdma_qp_ops.c |  46 ++++++++++-
 hw/rdma/vmw/pvrdma_qp_ops.h |   1 +
 4 files changed, 209 insertions(+), 1 deletion(-)

Comments

Yuval Shaia March 27, 2019, 4:16 p.m. UTC | #1
On Tue, Mar 26, 2019 at 02:54:33PM +0200, Kamal Heib wrote:
> Implement the pvrdma device commands for supporting SRQ
> 
> Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
> ---
>  hw/rdma/vmw/pvrdma_cmd.c    | 147 ++++++++++++++++++++++++++++++++++++
>  hw/rdma/vmw/pvrdma_main.c   |  16 ++++
>  hw/rdma/vmw/pvrdma_qp_ops.c |  46 ++++++++++-
>  hw/rdma/vmw/pvrdma_qp_ops.h |   1 +
>  4 files changed, 209 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
> index 510062f05476..fb075048c90c 100644
> --- a/hw/rdma/vmw/pvrdma_cmd.c
> +++ b/hw/rdma/vmw/pvrdma_cmd.c
> @@ -615,6 +615,149 @@ static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
>      return 0;
>  }
>  
> +static int create_srq_ring(PCIDevice *pci_dev, PvrdmaRing **ring,
> +                           uint64_t pdir_dma, uint32_t max_wr,
> +                           uint32_t max_sge, uint32_t nchunks)
> +{
> +    uint64_t *dir = NULL, *tbl = NULL;
> +    PvrdmaRing *r;
> +    int rc = -EINVAL;
> +    char ring_name[MAX_RING_NAME_SZ];
> +    uint32_t wqe_sz;
> +
> +    if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) {
> +        rdma_error_report("Got invalid page count for SRQ ring: %d",
> +                          nchunks);
> +        return rc;
> +    }
> +
> +    dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE);
> +    if (!dir) {
> +        rdma_error_report("Failed to map to SRQ page directory");
> +        goto out;
> +    }
> +
> +    tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE);
> +    if (!tbl) {
> +        rdma_error_report("Failed to map to SRQ page table");
> +        goto out;
> +    }
> +
> +    r = g_malloc(sizeof(*r));
> +    *ring = r;
> +
> +    r->ring_state = (struct pvrdma_ring *)
> +            rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE);
> +    if (!r->ring_state) {
> +        rdma_error_report("Failed to map tp SRQ ring state");
> +        goto out_free_ring_mem;
> +    }
> +
> +    wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
> +                      sizeof(struct pvrdma_sge) * max_sge - 1);
> +    sprintf(ring_name, "srq_ring_%" PRIx64, pdir_dma);
> +    rc = pvrdma_ring_init(r, ring_name, pci_dev, &r->ring_state[1], max_wr,
> +                          wqe_sz, (dma_addr_t *)&tbl[1], nchunks - 1);
> +    if (rc) {
> +        goto out_unmap_ring_state;
> +    }
> +
> +    goto out;
> +
> +out_unmap_ring_state:
> +    rdma_pci_dma_unmap(pci_dev, r->ring_state, TARGET_PAGE_SIZE);
> +
> +out_free_ring_mem:
> +    g_free(r);
> +
> +out:
> +    rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE);
> +    rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE);
> +
> +    return rc;
> +}
> +
> +static void destroy_srq_ring(PvrdmaRing *ring)
> +{
> +    pvrdma_ring_free(ring);
> +    rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE);
> +    g_free(ring);
> +}
> +
> +static int create_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
> +                      union pvrdma_cmd_resp *rsp)
> +{
> +    struct pvrdma_cmd_create_srq *cmd = &req->create_srq;
> +    struct pvrdma_cmd_create_srq_resp *resp = &rsp->create_srq_resp;
> +    PvrdmaRing *ring = NULL;
> +    int rc;
> +
> +    memset(resp, 0, sizeof(*resp));
> +
> +    rc = create_srq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma,
> +                         cmd->attrs.max_wr, cmd->attrs.max_sge,
> +                         cmd->nchunks);
> +    if (rc) {
> +        return rc;
> +    }
> +
> +    rc = rdma_rm_alloc_srq(&dev->rdma_dev_res, cmd->pd_handle,
> +                           cmd->attrs.max_wr, cmd->attrs.max_sge,
> +                           cmd->attrs.srq_limit, &resp->srqn, ring);
> +    if (rc) {
> +        destroy_srq_ring(ring);
> +        return rc;
> +    }
> +
> +    return 0;
> +}
> +
> +static int query_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
> +                     union pvrdma_cmd_resp *rsp)
> +{
> +    struct pvrdma_cmd_query_srq *cmd = &req->query_srq;
> +    struct pvrdma_cmd_query_srq_resp *resp = &rsp->query_srq_resp;
> +
> +    memset(resp, 0, sizeof(*resp));
> +
> +    return rdma_rm_query_srq(&dev->rdma_dev_res, cmd->srq_handle,
> +                             (struct ibv_srq_attr *)&resp->attrs);
> +}
> +
> +static int modify_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
> +                      union pvrdma_cmd_resp *rsp)
> +{
> +    struct pvrdma_cmd_modify_srq *cmd = &req->modify_srq;
> +
> +    /* Only support SRQ limit */
> +    if (!(cmd->attr_mask & IBV_SRQ_LIMIT) ||
> +        (cmd->attr_mask & IBV_SRQ_MAX_WR))
> +            return -EINVAL;
> +
> +    return rdma_rm_modify_srq(&dev->rdma_dev_res, cmd->srq_handle,
> +                              (struct ibv_srq_attr *)&cmd->attrs,
> +                              cmd->attr_mask);
> +}
> +
> +static int destroy_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
> +                       union pvrdma_cmd_resp *rsp)
> +{
> +    struct pvrdma_cmd_destroy_srq *cmd = &req->destroy_srq;
> +    RdmaRmSRQ *srq;
> +    PvrdmaRing *ring;
> +
> +    srq = rdma_rm_get_srq(&dev->rdma_dev_res, cmd->srq_handle);
> +    if (!srq) {
> +        return -EINVAL;
> +    }
> +
> +    rdma_rm_dealloc_srq(&dev->rdma_dev_res, cmd->srq_handle);
> +    ring = (PvrdmaRing *)srq->opaque;

Use after free.
(i know that destroy_qp suffers from the same problem, appreciate if you
could take care of it too).

> +    destroy_srq_ring(ring);
> +
> +    return 0;
> +}
> +
>  struct cmd_handler {
>      uint32_t cmd;
>      uint32_t ack;
> @@ -640,6 +783,10 @@ static struct cmd_handler cmd_handlers[] = {
>      {PVRDMA_CMD_DESTROY_UC,   PVRDMA_CMD_DESTROY_UC_RESP_NOOP,   destroy_uc},
>      {PVRDMA_CMD_CREATE_BIND,  PVRDMA_CMD_CREATE_BIND_RESP_NOOP,  create_bind},
>      {PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind},
> +    {PVRDMA_CMD_CREATE_SRQ,   PVRDMA_CMD_CREATE_SRQ_RESP,        create_srq},
> +    {PVRDMA_CMD_QUERY_SRQ,    PVRDMA_CMD_QUERY_SRQ_RESP,         query_srq},
> +    {PVRDMA_CMD_MODIFY_SRQ,   PVRDMA_CMD_MODIFY_SRQ_RESP,        modify_srq},
> +    {PVRDMA_CMD_DESTROY_SRQ,  PVRDMA_CMD_DESTROY_SRQ_RESP,       destroy_srq},

Thanks for adding more power to this device!!

>  };
>  
>  int pvrdma_exec_cmd(PVRDMADev *dev)
> diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
> index 0b46561bad11..e0d01a3199c1 100644
> --- a/hw/rdma/vmw/pvrdma_main.c
> +++ b/hw/rdma/vmw/pvrdma_main.c
> @@ -53,6 +53,7 @@ static Property pvrdma_dev_properties[] = {
>      DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev,
>                        dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM),
>      DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH),
> +    DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ),
>      DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr),
>      DEFINE_PROP_END_OF_LIST(),
>  };
> @@ -261,6 +262,9 @@ static void init_dsr_dev_caps(PVRDMADev *dev)
>      dsr->caps.max_mr = dev->dev_attr.max_mr;
>      dsr->caps.max_pd = dev->dev_attr.max_pd;
>      dsr->caps.max_ah = dev->dev_attr.max_ah;
> +    dsr->caps.max_srq = dev->dev_attr.max_srq;
> +    dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr;
> +    dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge;
>      dsr->caps.gid_tbl_len = MAX_GIDS;
>      dsr->caps.sys_image_guid = 0;
>      dsr->caps.node_guid = dev->node_guid;
> @@ -485,6 +489,13 @@ static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val,
>              pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK);
>          }
>          break;
> +    case PVRDMA_UAR_SRQ_OFFSET:
> +        if (val & PVRDMA_UAR_SRQ_RECV) {
> +            trace_pvrdma_uar_write(addr, val, "QP", "SRQ",
> +                                   val & PVRDMA_UAR_HANDLE_MASK, 0);
> +        pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK);

Indent is needed here.

> +        }
> +        break;
>      default:
>          rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64,
>                            addr, val);
> @@ -554,6 +565,11 @@ static void init_dev_caps(PVRDMADev *dev)
>  
>      dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) -
>                              TARGET_PAGE_SIZE; /* First page is ring state */
> +
> +    dev->dev_attr.max_srq_wr = pg_tbl_bytes /
> +                                ((sizeof(struct pvrdma_rq_wqe_hdr) +
> +                                sizeof(struct pvrdma_sge)) *
> +                                dev->dev_attr.max_sge) - TARGET_PAGE_SIZE;
>  }
>  
>  static int pvrdma_check_ram_shared(Object *obj, void *opaque)
> diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c
> index 5b9786efbe4b..bd6db858de32 100644
> --- a/hw/rdma/vmw/pvrdma_qp_ops.c
> +++ b/hw/rdma/vmw/pvrdma_qp_ops.c
> @@ -70,7 +70,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
>  
>      memset(cqe1, 0, sizeof(*cqe1));
>      cqe1->wr_id = cqe->wr_id;
> -    cqe1->qp = cqe->qp;
> +    cqe1->qp = cqe->qp ? cqe->qp : wc->qp_num;
>      cqe1->opcode = cqe->opcode;
>      cqe1->status = wc->status;
>      cqe1->byte_len = wc->byte_len;
> @@ -241,6 +241,50 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
>      }
>  }
>  
> +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle)
> +{
> +    RdmaRmSRQ *srq;
> +    PvrdmaRqWqe *wqe;
> +    PvrdmaRing *ring;
> +
> +    srq = rdma_rm_get_srq(&dev->rdma_dev_res, srq_handle);
> +    if (unlikely(!srq)) {
> +        return;
> +    }
> +
> +    ring = (PvrdmaRing *)srq->opaque;
> +
> +    wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring);
> +    while (wqe) {
> +        CompHandlerCtx *comp_ctx;
> +
> +        /* Prepare CQE */
> +        comp_ctx = g_malloc(sizeof(CompHandlerCtx));
> +        comp_ctx->dev = dev;
> +        comp_ctx->cq_handle = srq->recv_cq_handle;
> +        comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
> +        comp_ctx->cqe.qp = 0;
> +        comp_ctx->cqe.opcode = IBV_WC_RECV;
> +
> +        if (wqe->hdr.num_sge > dev->dev_attr.max_sge) {
> +            rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge,
> +                              dev->dev_attr.max_sge);
> +            complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx);
> +            continue;
> +        }
> +
> +        rdma_backend_post_srq_recv(&dev->backend_dev, &srq->backend_srq,
> +                                   (struct ibv_sge *)&wqe->sge[0],
> +                                   wqe->hdr.num_sge,
> +                                   comp_ctx);
> +
> +        pvrdma_ring_read_inc(ring);
> +
> +        wqe = pvrdma_ring_next_elem_read(ring);
> +    }
> +
> +}
> +
>  void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle)
>  {
>      RdmaRmCQ *cq;
> diff --git a/hw/rdma/vmw/pvrdma_qp_ops.h b/hw/rdma/vmw/pvrdma_qp_ops.h
> index 31cb48ba29f1..82e720a76fb4 100644
> --- a/hw/rdma/vmw/pvrdma_qp_ops.h
> +++ b/hw/rdma/vmw/pvrdma_qp_ops.h
> @@ -22,6 +22,7 @@ int pvrdma_qp_ops_init(void);
>  void pvrdma_qp_ops_fini(void);
>  void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle);
>  void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle);
> +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle);
>  void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle);
>  
>  #endif
> -- 
> 2.20.1
> 
>
Kamal Heib April 1, 2019, 6:35 a.m. UTC | #2
On 3/27/19 6:16 PM, Yuval Shaia wrote:
> On Tue, Mar 26, 2019 at 02:54:33PM +0200, Kamal Heib wrote:
>> Implement the pvrdma device commands for supporting SRQ
>>
>> Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
>> ---
>>  hw/rdma/vmw/pvrdma_cmd.c    | 147 ++++++++++++++++++++++++++++++++++++
>>  hw/rdma/vmw/pvrdma_main.c   |  16 ++++
>>  hw/rdma/vmw/pvrdma_qp_ops.c |  46 ++++++++++-
>>  hw/rdma/vmw/pvrdma_qp_ops.h |   1 +
>>  4 files changed, 209 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
>> index 510062f05476..fb075048c90c 100644
>> --- a/hw/rdma/vmw/pvrdma_cmd.c
>> +++ b/hw/rdma/vmw/pvrdma_cmd.c
>> @@ -615,6 +615,149 @@ static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
>>      return 0;
>>  }
>>  
>> +static int create_srq_ring(PCIDevice *pci_dev, PvrdmaRing **ring,
>> +                           uint64_t pdir_dma, uint32_t max_wr,
>> +                           uint32_t max_sge, uint32_t nchunks)
>> +{
>> +    uint64_t *dir = NULL, *tbl = NULL;
>> +    PvrdmaRing *r;
>> +    int rc = -EINVAL;
>> +    char ring_name[MAX_RING_NAME_SZ];
>> +    uint32_t wqe_sz;
>> +
>> +    if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) {
>> +        rdma_error_report("Got invalid page count for SRQ ring: %d",
>> +                          nchunks);
>> +        return rc;
>> +    }
>> +
>> +    dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE);
>> +    if (!dir) {
>> +        rdma_error_report("Failed to map to SRQ page directory");
>> +        goto out;
>> +    }
>> +
>> +    tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE);
>> +    if (!tbl) {
>> +        rdma_error_report("Failed to map to SRQ page table");
>> +        goto out;
>> +    }
>> +
>> +    r = g_malloc(sizeof(*r));
>> +    *ring = r;
>> +
>> +    r->ring_state = (struct pvrdma_ring *)
>> +            rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE);
>> +    if (!r->ring_state) {
>> +        rdma_error_report("Failed to map tp SRQ ring state");
>> +        goto out_free_ring_mem;
>> +    }
>> +
>> +    wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
>> +                      sizeof(struct pvrdma_sge) * max_sge - 1);
>> +    sprintf(ring_name, "srq_ring_%" PRIx64, pdir_dma);
>> +    rc = pvrdma_ring_init(r, ring_name, pci_dev, &r->ring_state[1], max_wr,
>> +                          wqe_sz, (dma_addr_t *)&tbl[1], nchunks - 1);
>> +    if (rc) {
>> +        goto out_unmap_ring_state;
>> +    }
>> +
>> +    goto out;
>> +
>> +out_unmap_ring_state:
>> +    rdma_pci_dma_unmap(pci_dev, r->ring_state, TARGET_PAGE_SIZE);
>> +
>> +out_free_ring_mem:
>> +    g_free(r);
>> +
>> +out:
>> +    rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE);
>> +    rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE);
>> +
>> +    return rc;
>> +}
>> +
>> +static void destroy_srq_ring(PvrdmaRing *ring)
>> +{
>> +    pvrdma_ring_free(ring);
>> +    rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE);
>> +    g_free(ring);
>> +}
>> +
>> +static int create_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
>> +                      union pvrdma_cmd_resp *rsp)
>> +{
>> +    struct pvrdma_cmd_create_srq *cmd = &req->create_srq;
>> +    struct pvrdma_cmd_create_srq_resp *resp = &rsp->create_srq_resp;
>> +    PvrdmaRing *ring = NULL;
>> +    int rc;
>> +
>> +    memset(resp, 0, sizeof(*resp));
>> +
>> +    rc = create_srq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma,
>> +                         cmd->attrs.max_wr, cmd->attrs.max_sge,
>> +                         cmd->nchunks);
>> +    if (rc) {
>> +        return rc;
>> +    }
>> +
>> +    rc = rdma_rm_alloc_srq(&dev->rdma_dev_res, cmd->pd_handle,
>> +                           cmd->attrs.max_wr, cmd->attrs.max_sge,
>> +                           cmd->attrs.srq_limit, &resp->srqn, ring);
>> +    if (rc) {
>> +        destroy_srq_ring(ring);
>> +        return rc;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int query_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
>> +                     union pvrdma_cmd_resp *rsp)
>> +{
>> +    struct pvrdma_cmd_query_srq *cmd = &req->query_srq;
>> +    struct pvrdma_cmd_query_srq_resp *resp = &rsp->query_srq_resp;
>> +
>> +    memset(resp, 0, sizeof(*resp));
>> +
>> +    return rdma_rm_query_srq(&dev->rdma_dev_res, cmd->srq_handle,
>> +                             (struct ibv_srq_attr *)&resp->attrs);
>> +}
>> +
>> +static int modify_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
>> +                      union pvrdma_cmd_resp *rsp)
>> +{
>> +    struct pvrdma_cmd_modify_srq *cmd = &req->modify_srq;
>> +
>> +    /* Only support SRQ limit */
>> +    if (!(cmd->attr_mask & IBV_SRQ_LIMIT) ||
>> +        (cmd->attr_mask & IBV_SRQ_MAX_WR))
>> +            return -EINVAL;
>> +
>> +    return rdma_rm_modify_srq(&dev->rdma_dev_res, cmd->srq_handle,
>> +                              (struct ibv_srq_attr *)&cmd->attrs,
>> +                              cmd->attr_mask);
>> +}
>> +
>> +static int destroy_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
>> +                       union pvrdma_cmd_resp *rsp)
>> +{
>> +    struct pvrdma_cmd_destroy_srq *cmd = &req->destroy_srq;
>> +    RdmaRmSRQ *srq;
>> +    PvrdmaRing *ring;
>> +
>> +    srq = rdma_rm_get_srq(&dev->rdma_dev_res, cmd->srq_handle);
>> +    if (!srq) {
>> +        return -EINVAL;
>> +    }
>> +
>> +    rdma_rm_dealloc_srq(&dev->rdma_dev_res, cmd->srq_handle);
>> +    ring = (PvrdmaRing *)srq->opaque;
> 
> Use after free.
> (i know that destroy_qp suffers from the same problem, appreciate if you
> could take care of it too).
> 

Correct, I'll fix in v3.

>> +    destroy_srq_ring(ring);
>> +
>> +    return 0;
>> +}
>> +
>>  struct cmd_handler {
>>      uint32_t cmd;
>>      uint32_t ack;
>> @@ -640,6 +783,10 @@ static struct cmd_handler cmd_handlers[] = {
>>      {PVRDMA_CMD_DESTROY_UC,   PVRDMA_CMD_DESTROY_UC_RESP_NOOP,   destroy_uc},
>>      {PVRDMA_CMD_CREATE_BIND,  PVRDMA_CMD_CREATE_BIND_RESP_NOOP,  create_bind},
>>      {PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind},
>> +    {PVRDMA_CMD_CREATE_SRQ,   PVRDMA_CMD_CREATE_SRQ_RESP,        create_srq},
>> +    {PVRDMA_CMD_QUERY_SRQ,    PVRDMA_CMD_QUERY_SRQ_RESP,         query_srq},
>> +    {PVRDMA_CMD_MODIFY_SRQ,   PVRDMA_CMD_MODIFY_SRQ_RESP,        modify_srq},
>> +    {PVRDMA_CMD_DESTROY_SRQ,  PVRDMA_CMD_DESTROY_SRQ_RESP,       destroy_srq},
> 
> Thanks for adding more power to this device!!

Sure :-)
> 
>>  };
>>  
>>  int pvrdma_exec_cmd(PVRDMADev *dev)
>> diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
>> index 0b46561bad11..e0d01a3199c1 100644
>> --- a/hw/rdma/vmw/pvrdma_main.c
>> +++ b/hw/rdma/vmw/pvrdma_main.c
>> @@ -53,6 +53,7 @@ static Property pvrdma_dev_properties[] = {
>>      DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev,
>>                        dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM),
>>      DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH),
>> +    DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ),
>>      DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr),
>>      DEFINE_PROP_END_OF_LIST(),
>>  };
>> @@ -261,6 +262,9 @@ static void init_dsr_dev_caps(PVRDMADev *dev)
>>      dsr->caps.max_mr = dev->dev_attr.max_mr;
>>      dsr->caps.max_pd = dev->dev_attr.max_pd;
>>      dsr->caps.max_ah = dev->dev_attr.max_ah;
>> +    dsr->caps.max_srq = dev->dev_attr.max_srq;
>> +    dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr;
>> +    dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge;
>>      dsr->caps.gid_tbl_len = MAX_GIDS;
>>      dsr->caps.sys_image_guid = 0;
>>      dsr->caps.node_guid = dev->node_guid;
>> @@ -485,6 +489,13 @@ static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val,
>>              pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK);
>>          }
>>          break;
>> +    case PVRDMA_UAR_SRQ_OFFSET:
>> +        if (val & PVRDMA_UAR_SRQ_RECV) {
>> +            trace_pvrdma_uar_write(addr, val, "QP", "SRQ",
>> +                                   val & PVRDMA_UAR_HANDLE_MASK, 0);
>> +        pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK);
> 
> Indent is needed here.

I'll fix it in v3.
> 
>> +        }
>> +        break;
>>      default:
>>          rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64,
>>                            addr, val);
>> @@ -554,6 +565,11 @@ static void init_dev_caps(PVRDMADev *dev)
>>  
>>      dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) -
>>                              TARGET_PAGE_SIZE; /* First page is ring state */
>> +
>> +    dev->dev_attr.max_srq_wr = pg_tbl_bytes /
>> +                                ((sizeof(struct pvrdma_rq_wqe_hdr) +
>> +                                sizeof(struct pvrdma_sge)) *
>> +                                dev->dev_attr.max_sge) - TARGET_PAGE_SIZE;
>>  }
>>  
>>  static int pvrdma_check_ram_shared(Object *obj, void *opaque)
>> diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c
>> index 5b9786efbe4b..bd6db858de32 100644
>> --- a/hw/rdma/vmw/pvrdma_qp_ops.c
>> +++ b/hw/rdma/vmw/pvrdma_qp_ops.c
>> @@ -70,7 +70,7 @@ static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
>>  
>>      memset(cqe1, 0, sizeof(*cqe1));
>>      cqe1->wr_id = cqe->wr_id;
>> -    cqe1->qp = cqe->qp;
>> +    cqe1->qp = cqe->qp ? cqe->qp : wc->qp_num;
>>      cqe1->opcode = cqe->opcode;
>>      cqe1->status = wc->status;
>>      cqe1->byte_len = wc->byte_len;
>> @@ -241,6 +241,50 @@ void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
>>      }
>>  }
>>  
>> +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle)
>> +{
>> +    RdmaRmSRQ *srq;
>> +    PvrdmaRqWqe *wqe;
>> +    PvrdmaRing *ring;
>> +
>> +    srq = rdma_rm_get_srq(&dev->rdma_dev_res, srq_handle);
>> +    if (unlikely(!srq)) {
>> +        return;
>> +    }
>> +
>> +    ring = (PvrdmaRing *)srq->opaque;
>> +
>> +    wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring);
>> +    while (wqe) {
>> +        CompHandlerCtx *comp_ctx;
>> +
>> +        /* Prepare CQE */
>> +        comp_ctx = g_malloc(sizeof(CompHandlerCtx));
>> +        comp_ctx->dev = dev;
>> +        comp_ctx->cq_handle = srq->recv_cq_handle;
>> +        comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
>> +        comp_ctx->cqe.qp = 0;
>> +        comp_ctx->cqe.opcode = IBV_WC_RECV;
>> +
>> +        if (wqe->hdr.num_sge > dev->dev_attr.max_sge) {
>> +            rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge,
>> +                              dev->dev_attr.max_sge);
>> +            complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx);
>> +            continue;
>> +        }
>> +
>> +        rdma_backend_post_srq_recv(&dev->backend_dev, &srq->backend_srq,
>> +                                   (struct ibv_sge *)&wqe->sge[0],
>> +                                   wqe->hdr.num_sge,
>> +                                   comp_ctx);
>> +
>> +        pvrdma_ring_read_inc(ring);
>> +
>> +        wqe = pvrdma_ring_next_elem_read(ring);
>> +    }
>> +
>> +}
>> +
>>  void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle)
>>  {
>>      RdmaRmCQ *cq;
>> diff --git a/hw/rdma/vmw/pvrdma_qp_ops.h b/hw/rdma/vmw/pvrdma_qp_ops.h
>> index 31cb48ba29f1..82e720a76fb4 100644
>> --- a/hw/rdma/vmw/pvrdma_qp_ops.h
>> +++ b/hw/rdma/vmw/pvrdma_qp_ops.h
>> @@ -22,6 +22,7 @@ int pvrdma_qp_ops_init(void);
>>  void pvrdma_qp_ops_fini(void);
>>  void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle);
>>  void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle);
>> +void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle);
>>  void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle);
>>  
>>  #endif
>> -- 
>> 2.20.1
>>
>>
diff mbox series

Patch

diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
index 510062f05476..fb075048c90c 100644
--- a/hw/rdma/vmw/pvrdma_cmd.c
+++ b/hw/rdma/vmw/pvrdma_cmd.c
@@ -615,6 +615,149 @@  static int destroy_uc(PVRDMADev *dev, union pvrdma_cmd_req *req,
     return 0;
 }
 
+static int create_srq_ring(PCIDevice *pci_dev, PvrdmaRing **ring,
+                           uint64_t pdir_dma, uint32_t max_wr,
+                           uint32_t max_sge, uint32_t nchunks)
+{
+    uint64_t *dir = NULL, *tbl = NULL;
+    PvrdmaRing *r;
+    int rc = -EINVAL;
+    char ring_name[MAX_RING_NAME_SZ];
+    uint32_t wqe_sz;
+
+    if (!nchunks || nchunks > PVRDMA_MAX_FAST_REG_PAGES) {
+        rdma_error_report("Got invalid page count for SRQ ring: %d",
+                          nchunks);
+        return rc;
+    }
+
+    dir = rdma_pci_dma_map(pci_dev, pdir_dma, TARGET_PAGE_SIZE);
+    if (!dir) {
+        rdma_error_report("Failed to map to SRQ page directory");
+        goto out;
+    }
+
+    tbl = rdma_pci_dma_map(pci_dev, dir[0], TARGET_PAGE_SIZE);
+    if (!tbl) {
+        rdma_error_report("Failed to map to SRQ page table");
+        goto out;
+    }
+
+    r = g_malloc(sizeof(*r));
+    *ring = r;
+
+    r->ring_state = (struct pvrdma_ring *)
+            rdma_pci_dma_map(pci_dev, tbl[0], TARGET_PAGE_SIZE);
+    if (!r->ring_state) {
+        rdma_error_report("Failed to map tp SRQ ring state");
+        goto out_free_ring_mem;
+    }
+
+    wqe_sz = pow2ceil(sizeof(struct pvrdma_rq_wqe_hdr) +
+                      sizeof(struct pvrdma_sge) * max_sge - 1);
+    sprintf(ring_name, "srq_ring_%" PRIx64, pdir_dma);
+    rc = pvrdma_ring_init(r, ring_name, pci_dev, &r->ring_state[1], max_wr,
+                          wqe_sz, (dma_addr_t *)&tbl[1], nchunks - 1);
+    if (rc) {
+        goto out_unmap_ring_state;
+    }
+
+    goto out;
+
+out_unmap_ring_state:
+    rdma_pci_dma_unmap(pci_dev, r->ring_state, TARGET_PAGE_SIZE);
+
+out_free_ring_mem:
+    g_free(r);
+
+out:
+    rdma_pci_dma_unmap(pci_dev, tbl, TARGET_PAGE_SIZE);
+    rdma_pci_dma_unmap(pci_dev, dir, TARGET_PAGE_SIZE);
+
+    return rc;
+}
+
+static void destroy_srq_ring(PvrdmaRing *ring)
+{
+    pvrdma_ring_free(ring);
+    rdma_pci_dma_unmap(ring->dev, ring->ring_state, TARGET_PAGE_SIZE);
+    g_free(ring);
+}
+
+static int create_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
+                      union pvrdma_cmd_resp *rsp)
+{
+    struct pvrdma_cmd_create_srq *cmd = &req->create_srq;
+    struct pvrdma_cmd_create_srq_resp *resp = &rsp->create_srq_resp;
+    PvrdmaRing *ring = NULL;
+    int rc;
+
+    memset(resp, 0, sizeof(*resp));
+
+    rc = create_srq_ring(PCI_DEVICE(dev), &ring, cmd->pdir_dma,
+                         cmd->attrs.max_wr, cmd->attrs.max_sge,
+                         cmd->nchunks);
+    if (rc) {
+        return rc;
+    }
+
+    rc = rdma_rm_alloc_srq(&dev->rdma_dev_res, cmd->pd_handle,
+                           cmd->attrs.max_wr, cmd->attrs.max_sge,
+                           cmd->attrs.srq_limit, &resp->srqn, ring);
+    if (rc) {
+        destroy_srq_ring(ring);
+        return rc;
+    }
+
+    return 0;
+}
+
+static int query_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
+                     union pvrdma_cmd_resp *rsp)
+{
+    struct pvrdma_cmd_query_srq *cmd = &req->query_srq;
+    struct pvrdma_cmd_query_srq_resp *resp = &rsp->query_srq_resp;
+
+    memset(resp, 0, sizeof(*resp));
+
+    return rdma_rm_query_srq(&dev->rdma_dev_res, cmd->srq_handle,
+                             (struct ibv_srq_attr *)&resp->attrs);
+}
+
+static int modify_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
+                      union pvrdma_cmd_resp *rsp)
+{
+    struct pvrdma_cmd_modify_srq *cmd = &req->modify_srq;
+
+    /* Only support SRQ limit */
+    if (!(cmd->attr_mask & IBV_SRQ_LIMIT) ||
+        (cmd->attr_mask & IBV_SRQ_MAX_WR))
+            return -EINVAL;
+
+    return rdma_rm_modify_srq(&dev->rdma_dev_res, cmd->srq_handle,
+                              (struct ibv_srq_attr *)&cmd->attrs,
+                              cmd->attr_mask);
+}
+
+static int destroy_srq(PVRDMADev *dev, union pvrdma_cmd_req *req,
+                       union pvrdma_cmd_resp *rsp)
+{
+    struct pvrdma_cmd_destroy_srq *cmd = &req->destroy_srq;
+    RdmaRmSRQ *srq;
+    PvrdmaRing *ring;
+
+    srq = rdma_rm_get_srq(&dev->rdma_dev_res, cmd->srq_handle);
+    if (!srq) {
+        return -EINVAL;
+    }
+
+    rdma_rm_dealloc_srq(&dev->rdma_dev_res, cmd->srq_handle);
+    ring = (PvrdmaRing *)srq->opaque;
+    destroy_srq_ring(ring);
+
+    return 0;
+}
+
 struct cmd_handler {
     uint32_t cmd;
     uint32_t ack;
@@ -640,6 +783,10 @@  static struct cmd_handler cmd_handlers[] = {
     {PVRDMA_CMD_DESTROY_UC,   PVRDMA_CMD_DESTROY_UC_RESP_NOOP,   destroy_uc},
     {PVRDMA_CMD_CREATE_BIND,  PVRDMA_CMD_CREATE_BIND_RESP_NOOP,  create_bind},
     {PVRDMA_CMD_DESTROY_BIND, PVRDMA_CMD_DESTROY_BIND_RESP_NOOP, destroy_bind},
+    {PVRDMA_CMD_CREATE_SRQ,   PVRDMA_CMD_CREATE_SRQ_RESP,        create_srq},
+    {PVRDMA_CMD_QUERY_SRQ,    PVRDMA_CMD_QUERY_SRQ_RESP,         query_srq},
+    {PVRDMA_CMD_MODIFY_SRQ,   PVRDMA_CMD_MODIFY_SRQ_RESP,        modify_srq},
+    {PVRDMA_CMD_DESTROY_SRQ,  PVRDMA_CMD_DESTROY_SRQ_RESP,       destroy_srq},
 };
 
 int pvrdma_exec_cmd(PVRDMADev *dev)
diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
index 0b46561bad11..e0d01a3199c1 100644
--- a/hw/rdma/vmw/pvrdma_main.c
+++ b/hw/rdma/vmw/pvrdma_main.c
@@ -53,6 +53,7 @@  static Property pvrdma_dev_properties[] = {
     DEFINE_PROP_INT32("dev-caps-max-qp-init-rd-atom", PVRDMADev,
                       dev_attr.max_qp_init_rd_atom, MAX_QP_INIT_RD_ATOM),
     DEFINE_PROP_INT32("dev-caps-max-ah", PVRDMADev, dev_attr.max_ah, MAX_AH),
+    DEFINE_PROP_INT32("dev-caps-max-srq", PVRDMADev, dev_attr.max_srq, MAX_SRQ),
     DEFINE_PROP_CHR("mad-chardev", PVRDMADev, mad_chr),
     DEFINE_PROP_END_OF_LIST(),
 };
@@ -261,6 +262,9 @@  static void init_dsr_dev_caps(PVRDMADev *dev)
     dsr->caps.max_mr = dev->dev_attr.max_mr;
     dsr->caps.max_pd = dev->dev_attr.max_pd;
     dsr->caps.max_ah = dev->dev_attr.max_ah;
+    dsr->caps.max_srq = dev->dev_attr.max_srq;
+    dsr->caps.max_srq_wr = dev->dev_attr.max_srq_wr;
+    dsr->caps.max_srq_sge = dev->dev_attr.max_srq_sge;
     dsr->caps.gid_tbl_len = MAX_GIDS;
     dsr->caps.sys_image_guid = 0;
     dsr->caps.node_guid = dev->node_guid;
@@ -485,6 +489,13 @@  static void pvrdma_uar_write(void *opaque, hwaddr addr, uint64_t val,
             pvrdma_cq_poll(&dev->rdma_dev_res, val & PVRDMA_UAR_HANDLE_MASK);
         }
         break;
+    case PVRDMA_UAR_SRQ_OFFSET:
+        if (val & PVRDMA_UAR_SRQ_RECV) {
+            trace_pvrdma_uar_write(addr, val, "QP", "SRQ",
+                                   val & PVRDMA_UAR_HANDLE_MASK, 0);
+        pvrdma_srq_recv(dev, val & PVRDMA_UAR_HANDLE_MASK);
+        }
+        break;
     default:
         rdma_error_report("Unsupported command, addr=0x%"PRIx64", val=0x%"PRIx64,
                           addr, val);
@@ -554,6 +565,11 @@  static void init_dev_caps(PVRDMADev *dev)
 
     dev->dev_attr.max_cqe = pg_tbl_bytes / sizeof(struct pvrdma_cqe) -
                             TARGET_PAGE_SIZE; /* First page is ring state */
+
+    dev->dev_attr.max_srq_wr = pg_tbl_bytes /
+                                ((sizeof(struct pvrdma_rq_wqe_hdr) +
+                                sizeof(struct pvrdma_sge)) *
+                                dev->dev_attr.max_sge) - TARGET_PAGE_SIZE;
 }
 
 static int pvrdma_check_ram_shared(Object *obj, void *opaque)
diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c
index 5b9786efbe4b..bd6db858de32 100644
--- a/hw/rdma/vmw/pvrdma_qp_ops.c
+++ b/hw/rdma/vmw/pvrdma_qp_ops.c
@@ -70,7 +70,7 @@  static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
 
     memset(cqe1, 0, sizeof(*cqe1));
     cqe1->wr_id = cqe->wr_id;
-    cqe1->qp = cqe->qp;
+    cqe1->qp = cqe->qp ? cqe->qp : wc->qp_num;
     cqe1->opcode = cqe->opcode;
     cqe1->status = wc->status;
     cqe1->byte_len = wc->byte_len;
@@ -241,6 +241,50 @@  void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
     }
 }
 
+void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle)
+{
+    RdmaRmSRQ *srq;
+    PvrdmaRqWqe *wqe;
+    PvrdmaRing *ring;
+
+    srq = rdma_rm_get_srq(&dev->rdma_dev_res, srq_handle);
+    if (unlikely(!srq)) {
+        return;
+    }
+
+    ring = (PvrdmaRing *)srq->opaque;
+
+    wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring);
+    while (wqe) {
+        CompHandlerCtx *comp_ctx;
+
+        /* Prepare CQE */
+        comp_ctx = g_malloc(sizeof(CompHandlerCtx));
+        comp_ctx->dev = dev;
+        comp_ctx->cq_handle = srq->recv_cq_handle;
+        comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
+        comp_ctx->cqe.qp = 0;
+        comp_ctx->cqe.opcode = IBV_WC_RECV;
+
+        if (wqe->hdr.num_sge > dev->dev_attr.max_sge) {
+            rdma_error_report("Invalid num_sge=%d (max %d)", wqe->hdr.num_sge,
+                              dev->dev_attr.max_sge);
+            complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx);
+            continue;
+        }
+
+        rdma_backend_post_srq_recv(&dev->backend_dev, &srq->backend_srq,
+                                   (struct ibv_sge *)&wqe->sge[0],
+                                   wqe->hdr.num_sge,
+                                   comp_ctx);
+
+        pvrdma_ring_read_inc(ring);
+
+        wqe = pvrdma_ring_next_elem_read(ring);
+    }
+
+}
+
 void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle)
 {
     RdmaRmCQ *cq;
diff --git a/hw/rdma/vmw/pvrdma_qp_ops.h b/hw/rdma/vmw/pvrdma_qp_ops.h
index 31cb48ba29f1..82e720a76fb4 100644
--- a/hw/rdma/vmw/pvrdma_qp_ops.h
+++ b/hw/rdma/vmw/pvrdma_qp_ops.h
@@ -22,6 +22,7 @@  int pvrdma_qp_ops_init(void);
 void pvrdma_qp_ops_fini(void);
 void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle);
 void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle);
+void pvrdma_srq_recv(PVRDMADev *dev, uint32_t srq_handle);
 void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle);
 
 #endif