From patchwork Mon Apr 13 11:58:06 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Weihang Li X-Patchwork-Id: 11485473 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id BB6B5912 for ; Mon, 13 Apr 2020 11:58:25 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id A85202073E for ; Mon, 13 Apr 2020 11:58:25 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729296AbgDML6Y (ORCPT ); Mon, 13 Apr 2020 07:58:24 -0400 Received: from szxga07-in.huawei.com ([45.249.212.35]:53284 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728560AbgDML6Y (ORCPT ); Mon, 13 Apr 2020 07:58:24 -0400 Received: from DGGEMS403-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 879E278E8D87A7611CB4; Mon, 13 Apr 2020 19:58:17 +0800 (CST) Received: from localhost.localdomain (10.67.165.24) by DGGEMS403-HUB.china.huawei.com (10.3.19.203) with Microsoft SMTP Server id 14.3.487.0; Mon, 13 Apr 2020 19:58:10 +0800 From: Weihang Li To: , CC: , , Subject: [PATCH for-next 1/6] RDMA/hns: Add support for addressing when hopnum is 0 Date: Mon, 13 Apr 2020 19:58:06 +0800 Message-ID: <1586779091-51410-2-git-send-email-liweihang@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1586779091-51410-1-git-send-email-liweihang@huawei.com> References: <1586779091-51410-1-git-send-email-liweihang@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.67.165.24] X-CFilter-Loop: Reflected Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Xi Wang Currently, WQE and EQE table have already used the mtr interface to config and access memory by multi-hop addressing when hopnum is from 1 to 3. But if hopnum is 0, each table need write its own but repetitive logic, and many duplicate code exists in the mtr interfaces invoke process. So wraps the public logic as 3 functions: hns_roce_mtr_create(), hns_roce_mtr_destroy() and hns_roce_mtr_map() to support hopnum ranges from 0 to 3. In addition, makes the mtr interfaces easier to use. Signed-off-by: Xi Wang Signed-off-by: Weihang Li --- drivers/infiniband/hw/hns/hns_roce_device.h | 46 ++- drivers/infiniband/hw/hns/hns_roce_hem.c | 9 +- drivers/infiniband/hw/hns/hns_roce_hem.h | 5 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 423 ++++++++++++++++++++++++++-- 4 files changed, 450 insertions(+), 33 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index f6b3cf6..4a7afec 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -271,6 +271,9 @@ enum { #define PAGE_ADDR_SHIFT 12 +/* The minimum page count for hardware access page directly. */ +#define HNS_HW_DIRECT_PAGE_COUNT 2 + struct hns_roce_uar { u64 pfn; unsigned long index; @@ -357,13 +360,32 @@ struct hns_roce_hem_list { struct list_head mid_bt[HNS_ROCE_MAX_BT_REGION][HNS_ROCE_MAX_BT_LEVEL]; struct list_head btm_bt; /* link all bottom bt in @mid_bt */ dma_addr_t root_ba; /* pointer to the root ba table */ - int bt_pg_shift; +}; + +struct hns_roce_buf_attr { + struct { + size_t size; /* region size */ + int hopnum; /* multi-hop addressing hop num */ + } region[HNS_ROCE_MAX_BT_REGION]; + int region_count; /* valid region count */ + int page_shift; /* buffer page shift */ + bool fixed_page; /* decide page shift is fixed-size or maximum size */ + int user_access; /* umem access flag */ + bool mtt_only; /* only alloc buffer-required MTT memory */ }; /* memory translate region */ struct hns_roce_mtr { - struct hns_roce_hem_list hem_list; - int buf_pg_shift; + struct hns_roce_hem_list hem_list; /* multi-hop addressing resource */ + struct ib_umem *umem; /* user space buffer */ + struct hns_roce_buf *kmem; /* kernel space buffer */ + struct { + dma_addr_t root_ba; /* root BA table's address */ + bool is_direct; /* addressing without BA table */ + int ba_pg_shift; /* BA table page shift */ + int buf_pg_shift; /* buffer page shift */ + int buf_pg_count; /* buffer page count */ + } hem_cfg; /* config for hardware addressing */ }; struct hns_roce_mw { @@ -1113,6 +1135,16 @@ static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) (offset & (page_size - 1)); } +static inline u64 to_hr_hw_page_addr(u64 addr) +{ + return addr >> PAGE_ADDR_SHIFT; +} + +static inline u32 to_hr_hw_page_shift(u32 page_shift) +{ + return page_shift - PAGE_ADDR_SHIFT; +} + int hns_roce_init_uar_table(struct hns_roce_dev *dev); int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar); void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar); @@ -1144,6 +1176,14 @@ void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, #define MTT_MIN_COUNT 2 int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr); +int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, int page_shift, + struct ib_udata *udata, unsigned long user_addr); +void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, + struct hns_roce_mtr *mtr); +int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_region *regions, int region_cnt, + dma_addr_t *pages, int page_cnt); int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 263338b..a245e75 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1505,7 +1505,7 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, const struct hns_roce_buf_region *regions, - int region_cnt) + int region_cnt, int bt_pg_shift) { const struct hns_roce_buf_region *r; int ofs, end; @@ -1519,7 +1519,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, return -EINVAL; } - unit = (1 << hem_list->bt_pg_shift) / BA_BYTE_LEN; + unit = (1 << bt_pg_shift) / BA_BYTE_LEN; for (i = 0; i < region_cnt; i++) { r = ®ions[i]; if (!r->count) @@ -1566,8 +1566,7 @@ void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, hem_list->root_ba = 0; } -void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list, - int bt_page_order) +void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list) { int i, j; @@ -1576,8 +1575,6 @@ void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list, for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++) for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++) INIT_LIST_HEAD(&hem_list->mid_bt[i][j]); - - hem_list->bt_pg_shift = bt_page_order; } void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 3bb8f78..a00b6c2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -133,14 +133,13 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, struct hns_roce_hem_mhop *mhop); bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type); -void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list, - int bt_page_order); +void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list); int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions, int region_cnt, int unit); int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, const struct hns_roce_buf_region *regions, - int region_cnt); + int region_cnt, int bt_pg_shift); void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list); void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 176f346..b3af369 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1567,8 +1567,9 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw) void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift, int buf_pg_shift) { - hns_roce_hem_list_init(&mtr->hem_list, bt_pg_shift); - mtr->buf_pg_shift = buf_pg_shift; + hns_roce_hem_list_init(&mtr->hem_list); + mtr->hem_cfg.buf_pg_shift = buf_pg_shift; + mtr->hem_cfg.ba_pg_shift = bt_pg_shift; } void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, @@ -1577,19 +1578,23 @@ void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, hns_roce_hem_list_release(hr_dev, &mtr->hem_list); } -static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev, - struct hns_roce_mtr *mtr, dma_addr_t *bufs, - struct hns_roce_buf_region *r) +static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + dma_addr_t *pages, struct hns_roce_buf_region *region) { + __le64 *mtts; int offset; int count; int npage; - u64 *mtts; + u64 addr; int end; int i; - offset = r->offset; - end = offset + r->count; + /* if hopnum is 0, buffer cannot store BAs, so skip write mtt */ + if (!region->hopnum) + return 0; + + offset = region->offset; + end = offset + region->count; npage = 0; while (offset < end) { mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, @@ -1597,13 +1602,13 @@ static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev, if (!mtts) return -ENOBUFS; - /* Save page addr, low 12 bits : 0 */ for (i = 0; i < count; i++) { if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) - mtts[i] = bufs[npage] >> PAGE_ADDR_SHIFT; + addr = to_hr_hw_page_addr(pages[npage]); else - mtts[i] = bufs[npage]; + addr = pages[npage]; + mtts[i] = cpu_to_le64(addr); npage++; } offset += count; @@ -1621,13 +1626,14 @@ int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int i; ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, regions, - region_cnt); + region_cnt, mtr->hem_cfg.ba_pg_shift); if (ret) return ret; + mtr->hem_cfg.root_ba = mtr->hem_list.root_ba; for (i = 0; i < region_cnt; i++) { r = ®ions[i]; - ret = hns_roce_write_mtr(hr_dev, mtr, bufs[i], r); + ret = mtr_map_region(hr_dev, mtr, bufs[i], r); if (ret) { dev_err(hr_dev->dev, "write mtr[%d/%d] err %d,offset=%d.\n", @@ -1644,37 +1650,412 @@ int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, return ret; } +static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr) +{ + int i; + + for (i = 0; i < attr->region_count; i++) + if (attr->region[i].hopnum != HNS_ROCE_HOP_NUM_0 && + attr->region[i].hopnum > 0) + return true; + + /* because the mtr only one root base address, when hopnum is 0 means + * root base address equals the first buffer address, thus all alloced + * memory must in a continuous space accessed by direct mode. + */ + return false; +} + +static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr) +{ + size_t size = 0; + int i; + + for (i = 0; i < attr->region_count; i++) + size += attr->region[i].size; + + return size; +} + +static inline int mtr_umem_page_count(struct ib_umem *umem, int page_shift) +{ + int count = ib_umem_page_count(umem); + + if (page_shift >= PAGE_SHIFT) + count >>= page_shift - PAGE_SHIFT; + else + count <<= PAGE_SHIFT - page_shift; + + return count; +} + +static inline size_t mtr_kmem_direct_size(bool is_direct, size_t alloc_size, + int page_shift) +{ + if (is_direct) + return ALIGN(alloc_size, 1 << page_shift); + else + return HNS_HW_DIRECT_PAGE_COUNT << page_shift; +} + +/* + * check the given pages in continuous address space + * Returns 0 on success, or the error page num. + */ +static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count, + int page_shift) +{ + size_t page_size = 1 << page_shift; + int i; + + for (i = 1; i < page_count; i++) + if (pages[i] - pages[i - 1] != page_size) + return i; + + return 0; +} + +static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) +{ + /* release user buffers */ + if (mtr->umem) { + ib_umem_release(mtr->umem); + mtr->umem = NULL; + } + + /* release kernel buffers */ + if (mtr->kmem) { + hns_roce_buf_free(hr_dev, mtr->kmem); + kfree(mtr->kmem); + mtr->kmem = NULL; + } +} + +static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, bool is_direct, + struct ib_udata *udata, unsigned long user_addr) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + int max_pg_shift = buf_attr->page_shift; + int best_pg_shift = 0; + int all_pg_count = 0; + size_t direct_size; + size_t total_size; + unsigned long tmp; + int ret = 0; + + total_size = mtr_bufs_size(buf_attr); + if (total_size < 1) { + ibdev_err(ibdev, "Failed to check mtr size\n"); + return -EINVAL; + } + + if (udata) { + mtr->kmem = NULL; + mtr->umem = ib_umem_get(ibdev, user_addr, total_size, + buf_attr->user_access); + if (IS_ERR_OR_NULL(mtr->umem)) { + ibdev_err(ibdev, "Failed to get umem, ret %ld\n", + PTR_ERR(mtr->umem)); + return -ENOMEM; + } + if (buf_attr->fixed_page) { + best_pg_shift = max_pg_shift; + } else { + tmp = GENMASK(max_pg_shift, 0); + ret = ib_umem_find_best_pgsz(mtr->umem, tmp, user_addr); + best_pg_shift = (ret <= PAGE_SIZE) ? + PAGE_SHIFT : ilog2(ret); + } + all_pg_count = mtr_umem_page_count(mtr->umem, best_pg_shift); + ret = 0; + } else { + mtr->umem = NULL; + mtr->kmem = kzalloc(sizeof(*mtr->kmem), GFP_KERNEL); + if (!mtr->kmem) { + ibdev_err(ibdev, "Failed to alloc kmem\n"); + return -ENOMEM; + } + direct_size = mtr_kmem_direct_size(is_direct, total_size, + max_pg_shift); + ret = hns_roce_buf_alloc(hr_dev, total_size, direct_size, + mtr->kmem, max_pg_shift); + if (ret) { + ibdev_err(ibdev, "Failed to alloc kmem, ret %d\n", ret); + goto err_alloc_mem; + } else { + best_pg_shift = max_pg_shift; + all_pg_count = mtr->kmem->npages; + } + } + + /* must bigger than minimum hardware page shift */ + if (best_pg_shift < PAGE_ADDR_SHIFT || all_pg_count < 1) { + ret = -EINVAL; + ibdev_err(ibdev, "Failed to check mtr page shift %d count %d\n", + best_pg_shift, all_pg_count); + goto err_alloc_mem; + } + + mtr->hem_cfg.buf_pg_shift = best_pg_shift; + mtr->hem_cfg.buf_pg_count = all_pg_count; + + return 0; +err_alloc_mem: + mtr_free_bufs(hr_dev, mtr); + return ret; +} + +static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + dma_addr_t *pages, int count, int page_shift) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + int npage; + int err; + + if (mtr->umem) + npage = hns_roce_get_umem_bufs(hr_dev, pages, count, 0, + mtr->umem, page_shift); + else + npage = hns_roce_get_kmem_bufs(hr_dev, pages, count, 0, + mtr->kmem); + + if (mtr->hem_cfg.is_direct && npage > 1) { + err = mtr_check_direct_pages(pages, npage, page_shift); + if (err) { + ibdev_err(ibdev, "Failed to check %s direct page-%d\n", + mtr->umem ? "user" : "kernel", err); + npage = err; + } + } + + return npage; +} + +int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_region *regions, int region_cnt, + dma_addr_t *pages, int page_cnt) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_region *r; + int err; + int i; + + for (i = 0; i < region_cnt; i++) { + r = ®ions[i]; + if (r->offset + r->count > page_cnt) { + err = -EINVAL; + ibdev_err(ibdev, + "Failed to check mtr%d end %d + %d, max %d\n", + i, r->offset, r->count, page_cnt); + return err; + } + + err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r); + if (err) { + ibdev_err(ibdev, + "Failed to map mtr%d offset %d, err %d\n", + i, r->offset, err); + return err; + } + } + + return 0; +} + int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) { - u64 *mtts = mtt_buf; int mtt_count; int total = 0; - u64 *addr; + __le64 *mtts; int npage; + u64 addr; int left; - if (mtts == NULL || mtt_max < 1) + if (!mtt_buf || mtt_max < 1) goto done; + /* no mtt memory in direct mode, so just return the buffer address */ + if (mtr->hem_cfg.is_direct) { + npage = offset; + for (total = 0; total < mtt_max; total++, npage++) { + addr = mtr->hem_cfg.root_ba + + (npage << mtr->hem_cfg.buf_pg_shift); + + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) + mtt_buf[total] = to_hr_hw_page_addr(addr); + else + mtt_buf[total] = addr; + } + + goto done; + } + left = mtt_max; while (left > 0) { mtt_count = 0; - addr = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, + mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, offset + total, &mtt_count, NULL); - if (!addr || !mtt_count) + if (!mtts || !mtt_count) goto done; npage = min(mtt_count, left); - memcpy(&mtts[total], addr, BA_BYTE_LEN * npage); left -= npage; - total += npage; + for (mtt_count = 0; mtt_count < npage; mtt_count++) + mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]); } done: if (base_addr) - *base_addr = mtr->hem_list.root_ba; + *base_addr = mtr->hem_cfg.root_ba; return total; } + +/* convert buffer size to page index and page count */ +static int mtr_init_region(struct hns_roce_buf_attr *attr, int page_cnt, + struct hns_roce_buf_region *regions, int region_cnt, + int page_shift) +{ + unsigned int page_size = 1 << page_shift; + int max_region = attr->region_count; + struct hns_roce_buf_region *r; + int page_idx = 0; + int i = 0; + + for (; i < region_cnt && i < max_region && page_idx < page_cnt; i++) { + r = ®ions[i]; + r->hopnum = attr->region[i].hopnum == HNS_ROCE_HOP_NUM_0 ? + 0 : attr->region[i].hopnum; + r->offset = page_idx; + r->count = DIV_ROUND_UP(attr->region[i].size, page_size); + page_idx += r->count; + } + + return i; +} + +/** + * hns_roce_mtr_create - Create hns memory translate region. + * + * @mtr: memory translate region + * @init_attr: init attribute for creating mtr + * @page_shift: page shift for multi-hop base address table + * @udata: user space context, if it's NULL, means kernel space + * @user_addr: userspace virtual address to start at + * @buf_alloced: mtr has private buffer, true means need to alloc + */ +int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, int page_shift, + struct ib_udata *udata, unsigned long user_addr) +{ + struct hns_roce_buf_region regions[HNS_ROCE_MAX_BT_REGION] = {}; + struct ib_device *ibdev = &hr_dev->ib_dev; + dma_addr_t *pages = NULL; + int region_cnt = 0; + int all_pg_cnt; + int get_pg_cnt; + bool has_mtt; + int err = 0; + + has_mtt = mtr_has_mtt(buf_attr); + /* if buffer only need mtt, just init the hem cfg */ + if (buf_attr->mtt_only) { + mtr->hem_cfg.buf_pg_shift = buf_attr->page_shift; + mtr->hem_cfg.buf_pg_count = mtr_bufs_size(buf_attr) >> + buf_attr->page_shift; + mtr->umem = NULL; + mtr->kmem = NULL; + } else { + err = mtr_alloc_bufs(hr_dev, mtr, buf_attr, !has_mtt, udata, + user_addr); + if (err) { + ibdev_err(ibdev, "Failed to alloc mtr bufs, err %d\n", + err); + return err; + } + } + + /* alloc mtt memory */ + all_pg_cnt = mtr->hem_cfg.buf_pg_count; + hns_roce_hem_list_init(&mtr->hem_list); + mtr->hem_cfg.is_direct = !has_mtt; + mtr->hem_cfg.ba_pg_shift = page_shift; + if (has_mtt) { + region_cnt = mtr_init_region(buf_attr, all_pg_cnt, + regions, ARRAY_SIZE(regions), + mtr->hem_cfg.buf_pg_shift); + if (region_cnt < 1) { + err = -ENOBUFS; + ibdev_err(ibdev, "Failed to init mtr region %d\n", + region_cnt); + goto err_alloc_bufs; + } + err = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, + regions, region_cnt, + page_shift); + if (err) { + ibdev_err(ibdev, "Failed to request mtr hem, err %d\n", + err); + goto err_alloc_bufs; + } + mtr->hem_cfg.root_ba = mtr->hem_list.root_ba; + } + + /* no buffer to map */ + if (buf_attr->mtt_only) + return 0; + + /* alloc a tmp array to store buffer's dma address */ + pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL); + if (!pages) { + err = -ENOMEM; + ibdev_err(ibdev, "Failed to alloc mtr page list %d\n", + all_pg_cnt); + goto err_alloc_hem_list; + } + + get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt, + mtr->hem_cfg.buf_pg_shift); + if (get_pg_cnt != all_pg_cnt) { + ibdev_err(ibdev, "Failed to get mtr page %d != %d\n", + get_pg_cnt, all_pg_cnt); + err = -ENOBUFS; + goto err_alloc_page_list; + } + + if (!has_mtt) { + mtr->hem_cfg.root_ba = pages[0]; + } else { + /* write buffer's dma address to BA table */ + err = hns_roce_mtr_map(hr_dev, mtr, regions, region_cnt, pages, + all_pg_cnt); + if (err) { + ibdev_err(ibdev, "Failed to map mtr pages, err %d\n", + err); + goto err_alloc_page_list; + } + } + + /* drop tmp array */ + kvfree(pages); + return 0; +err_alloc_page_list: + kvfree(pages); +err_alloc_hem_list: + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); +err_alloc_bufs: + mtr_free_bufs(hr_dev, mtr); + return err; +} + +void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) +{ + /* release multi-hop addressing resource */ + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); + + /* free buffers */ + mtr_free_bufs(hr_dev, mtr); +} From patchwork Mon Apr 13 11:58:07 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Weihang Li X-Patchwork-Id: 11485467 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id A60CF912 for ; Mon, 13 Apr 2020 11:58:24 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 8D7412073E for ; Mon, 13 Apr 2020 11:58:24 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728335AbgDML6X (ORCPT ); Mon, 13 Apr 2020 07:58:23 -0400 Received: from szxga07-in.huawei.com ([45.249.212.35]:53194 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1729302AbgDML6U (ORCPT ); Mon, 13 Apr 2020 07:58:20 -0400 Received: from DGGEMS403-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 8D108E2429A7FEBA1851; Mon, 13 Apr 2020 19:58:17 +0800 (CST) Received: from localhost.localdomain (10.67.165.24) by DGGEMS403-HUB.china.huawei.com (10.3.19.203) with Microsoft SMTP Server id 14.3.487.0; Mon, 13 Apr 2020 19:58:10 +0800 From: Weihang Li To: , CC: , , Subject: [PATCH for-next 2/6] RDMA/hns: Optimize hns buffer allocation flow Date: Mon, 13 Apr 2020 19:58:07 +0800 Message-ID: <1586779091-51410-3-git-send-email-liweihang@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1586779091-51410-1-git-send-email-liweihang@huawei.com> References: <1586779091-51410-1-git-send-email-liweihang@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.67.165.24] X-CFilter-Loop: Reflected Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Xi Wang When the value of nbufs is 1, the buffer is in direct mode, which may cause confusion. So optimizes current codes to make it easier to maintain and understand. Signed-off-by: Xi Wang Signed-off-by: Lang Cheng Signed-off-by: Weihang Li --- drivers/infiniband/hw/hns/hns_roce_alloc.c | 103 +++++++++++++--------------- drivers/infiniband/hw/hns/hns_roce_cq.c | 18 ++--- drivers/infiniband/hw/hns/hns_roce_device.h | 32 ++++++--- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 2 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 43 +++++------- drivers/infiniband/hw/hns/hns_roce_mr.c | 8 +-- drivers/infiniband/hw/hns/hns_roce_qp.c | 6 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 37 +++++----- 8 files changed, 121 insertions(+), 128 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index da574c2..e04e759 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -157,84 +157,78 @@ void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap) kfree(bitmap->table); } -void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, - struct hns_roce_buf *buf) +void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf) { - int i; struct device *dev = hr_dev->dev; + u32 size = buf->size; + int i; + + if (size == 0) + return; + + buf->size = 0; - if (buf->nbufs == 1) { + if (hns_roce_buf_is_direct(buf)) { dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map); } else { - for (i = 0; i < buf->nbufs; ++i) + for (i = 0; i < buf->npages; ++i) if (buf->page_list[i].buf) dma_free_coherent(dev, 1 << buf->page_shift, buf->page_list[i].buf, buf->page_list[i].map); kfree(buf->page_list); + buf->page_list = NULL; } } int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, struct hns_roce_buf *buf, u32 page_shift) { - int i = 0; - dma_addr_t t; + struct hns_roce_buf_list *buf_list; struct device *dev = hr_dev->dev; - u32 page_size = 1 << page_shift; - u32 order; + u32 page_size; + int i; + + /* The minimum shift of the page accessed by hw is PAGE_ADDR_SHIFT */ + buf->page_shift = max_t(int, PAGE_ADDR_SHIFT, page_shift); - /* SQ/RQ buf lease than one page, SQ + RQ = 8K */ + page_size = 1 << buf->page_shift; + buf->npages = DIV_ROUND_UP(size, page_size); + + /* required size is not bigger than one trunk size */ if (size <= max_direct) { - buf->nbufs = 1; - /* Npages calculated by page_size */ - order = get_order(size); - if (order <= page_shift - PAGE_SHIFT) - order = 0; - else - order -= page_shift - PAGE_SHIFT; - buf->npages = 1 << order; - buf->page_shift = page_shift; - /* MTT PA must be recorded in 4k alignment, t is 4k aligned */ - buf->direct.buf = dma_alloc_coherent(dev, size, &t, + buf->page_list = NULL; + buf->direct.buf = dma_alloc_coherent(dev, size, + &buf->direct.map, GFP_KERNEL); if (!buf->direct.buf) return -ENOMEM; - - buf->direct.map = t; - - while (t & ((1 << buf->page_shift) - 1)) { - --buf->page_shift; - buf->npages *= 2; - } } else { - buf->nbufs = (size + page_size - 1) / page_size; - buf->npages = buf->nbufs; - buf->page_shift = page_shift; - buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), - GFP_KERNEL); - - if (!buf->page_list) + buf_list = kcalloc(buf->npages, sizeof(*buf_list), GFP_KERNEL); + if (!buf_list) return -ENOMEM; - for (i = 0; i < buf->nbufs; ++i) { - buf->page_list[i].buf = dma_alloc_coherent(dev, - page_size, - &t, - GFP_KERNEL); - - if (!buf->page_list[i].buf) - goto err_free; + for (i = 0; i < buf->npages; i++) { + buf_list[i].buf = dma_alloc_coherent(dev, page_size, + &buf_list[i].map, + GFP_KERNEL); + if (!buf_list[i].buf) + break; + } - buf->page_list[i].map = t; + if (i != buf->npages && i > 0) { + while (i-- > 0) + dma_free_coherent(dev, page_size, + buf_list[i].buf, + buf_list[i].map); + kfree(buf_list); + return -ENOMEM; } + buf->page_list = buf_list; } + buf->size = size; return 0; - -err_free: - hns_roce_buf_free(hr_dev, size, buf); - return -ENOMEM; } int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, @@ -246,18 +240,14 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, end = start + buf_cnt; if (end > buf->npages) { dev_err(hr_dev->dev, - "invalid kmem region,offset %d,buf_cnt %d,total %d!\n", + "Failed to check kmem bufs, end %d + %d total %d!\n", start, buf_cnt, buf->npages); return -EINVAL; } total = 0; for (i = start; i < end; i++) - if (buf->nbufs == 1) - bufs[total++] = buf->direct.map + - ((dma_addr_t)i << buf->page_shift); - else - bufs[total++] = buf->page_list[i].map; + bufs[total++] = hns_roce_buf_page(buf, i); return total; } @@ -271,8 +261,9 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int idx = 0; u64 addr; - if (page_shift < PAGE_SHIFT) { - dev_err(hr_dev->dev, "invalid page shift %d!\n", page_shift); + if (page_shift < PAGE_ADDR_SHIFT) { + dev_err(hr_dev->dev, "Failed to check umem page shift %d!\n", + page_shift); return -EINVAL; } diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 5bfb52f..92798ff 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -157,13 +157,12 @@ static int get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, struct hns_roce_ib_create_cq ucmd, struct ib_udata *udata) { - struct hns_roce_buf *buf = &hr_cq->buf; struct hns_roce_mtt *mtt = &hr_cq->mtt; struct ib_umem **umem = &hr_cq->umem; u32 npages; int ret; - *umem = ib_umem_get(&hr_dev->ib_dev, ucmd.buf_addr, buf->size, + *umem = ib_umem_get(&hr_dev->ib_dev, ucmd.buf_addr, hr_cq->buf_size, IB_ACCESS_LOCAL_WRITE); if (IS_ERR(*umem)) return PTR_ERR(*umem); @@ -175,7 +174,7 @@ static int get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, npages = DIV_ROUND_UP(ib_umem_page_count(*umem), 1 << hr_dev->caps.cqe_buf_pg_sz); - ret = hns_roce_mtt_init(hr_dev, npages, buf->page_shift, mtt); + ret = hns_roce_mtt_init(hr_dev, npages, hr_cq->page_shift, mtt); if (ret) goto err_buf; @@ -199,8 +198,9 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) struct hns_roce_mtt *mtt = &hr_cq->mtt; int ret; - ret = hns_roce_buf_alloc(hr_dev, buf->size, (1 << buf->page_shift) * 2, - buf, buf->page_shift); + ret = hns_roce_buf_alloc(hr_dev, hr_cq->buf_size, + (1 << hr_cq->page_shift) * 2, + buf, hr_cq->page_shift); if (ret) goto out; @@ -223,7 +223,7 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_mtt_cleanup(hr_dev, mtt); err_buf: - hns_roce_buf_free(hr_dev, buf->size, buf); + hns_roce_buf_free(hr_dev, buf); out: return ret; @@ -231,7 +231,7 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf); + hns_roce_buf_free(hr_dev, &hr_cq->buf); } static int create_user_cq(struct hns_roce_dev *hr_dev, @@ -367,8 +367,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */ hr_cq->cq_depth = cq_entries; hr_cq->vector = vector; - hr_cq->buf.size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; - hr_cq->buf.page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; + hr_cq->buf_size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; + hr_cq->page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; spin_lock_init(&hr_cq->lock); INIT_LIST_HEAD(&hr_cq->sq_list); INIT_LIST_HEAD(&hr_cq->rq_list); diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 4a7afec..c37617d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -468,7 +468,6 @@ struct hns_roce_buf_list { struct hns_roce_buf { struct hns_roce_buf_list direct; struct hns_roce_buf_list *page_list; - int nbufs; u32 npages; u32 size; int page_shift; @@ -510,6 +509,8 @@ struct hns_roce_cq { u8 db_en; spinlock_t lock; struct ib_umem *umem; + u32 buf_size; + int page_shift; u32 cq_depth; u32 cons_index; u32 *set_ci_db; @@ -549,6 +550,8 @@ struct hns_roce_srq { struct hns_roce_buf buf; u64 *wrid; struct ib_umem *umem; + u32 buf_size; + int page_shift; struct hns_roce_mtt mtt; struct hns_roce_idx_que idx_que; spinlock_t lock; @@ -1124,15 +1127,29 @@ static inline struct hns_roce_qp return xa_load(&hr_dev->qp_table_xa, qpn & (hr_dev->caps.num_qps - 1)); } +static inline bool hns_roce_buf_is_direct(struct hns_roce_buf *buf) +{ + if (buf->page_list) + return false; + + return true; +} + static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) { - u32 page_size = 1 << buf->page_shift; + if (hns_roce_buf_is_direct(buf)) + return (char *)(buf->direct.buf) + (offset & (buf->size - 1)); - if (buf->nbufs == 1) - return (char *)(buf->direct.buf) + offset; + return (char *)(buf->page_list[offset >> buf->page_shift].buf) + + (offset & ((1 << buf->page_shift) - 1)); +} + +static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, int idx) +{ + if (hns_roce_buf_is_direct(buf)) + return buf->direct.map + ((dma_addr_t)idx << buf->page_shift); else - return (char *)(buf->page_list[offset >> buf->page_shift].buf) + - (offset & (page_size - 1)); + return buf->page_list[idx].map; } static inline u64 to_hr_hw_page_addr(u64 addr) @@ -1240,8 +1257,7 @@ struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type, struct ib_udata *udata); int hns_roce_dealloc_mw(struct ib_mw *ibmw); -void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, - struct hns_roce_buf *buf); +void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf); int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, struct hns_roce_buf *buf, u32 page_shift); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 5ff028d..4b54906 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -3666,7 +3666,7 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) ib_umem_release(hr_cq->umem); if (!udata) { /* Free the buff of stored cq */ - hns_roce_buf_free(hr_dev, hr_cq->buf.size, &hr_cq->buf); + hns_roce_buf_free(hr_dev, &hr_cq->buf); } } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index c331667..998015d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4989,24 +4989,14 @@ static void set_eq_cons_index_v2(struct hns_roce_eq *eq) hns_roce_write64(hr_dev, doorbell, eq->doorbell); } -static inline void *get_eqe_buf(struct hns_roce_eq *eq, unsigned long offset) -{ - u32 buf_chk_sz; - - buf_chk_sz = 1 << (eq->eqe_buf_pg_sz + PAGE_SHIFT); - if (eq->buf.nbufs == 1) - return eq->buf.direct.buf + offset % buf_chk_sz; - else - return eq->buf.page_list[offset / buf_chk_sz].buf + - offset % buf_chk_sz; -} - static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) { struct hns_roce_aeqe *aeqe; - aeqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) * - HNS_ROCE_AEQ_ENTRY_SIZE); + aeqe = hns_roce_buf_offset(&eq->buf, + (eq->cons_index & (eq->entries - 1)) * + HNS_ROCE_AEQ_ENTRY_SIZE); + return (roce_get_bit(aeqe->asyn, HNS_ROCE_V2_AEQ_AEQE_OWNER_S) ^ !!(eq->cons_index & eq->entries)) ? aeqe : NULL; } @@ -5103,8 +5093,9 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) { struct hns_roce_ceqe *ceqe; - ceqe = get_eqe_buf(eq, (eq->cons_index & (eq->entries - 1)) * - HNS_ROCE_CEQ_ENTRY_SIZE); + ceqe = hns_roce_buf_offset(&eq->buf, + (eq->cons_index & (eq->entries - 1)) * + HNS_ROCE_CEQ_ENTRY_SIZE); return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^ (!!(eq->cons_index & eq->entries)) ? ceqe : NULL; } @@ -5265,7 +5256,7 @@ static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) hns_roce_mtr_cleanup(hr_dev, &eq->mtr); - hns_roce_buf_free(hr_dev, eq->buf.size, &eq->buf); + hns_roce_buf_free(hr_dev, &eq->buf); } static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, @@ -5290,7 +5281,8 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz; eq->shift = ilog2((unsigned int)eq->entries); - /* if not muti-hop, eqe buffer only use one trunk */ + /* if not multi-hop, eqe buffer only use one trunk */ + eq->eqe_buf_pg_sz = eq->buf.page_shift - PAGE_ADDR_SHIFT; if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) { eq->eqe_ba = eq->buf.direct.map; eq->cur_eqe_ba = eq->eqe_ba; @@ -5432,7 +5424,7 @@ static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, goto done; } - hns_roce_mtr_init(&eq->mtr, PAGE_SHIFT + hr_dev->caps.eqe_ba_pg_sz, + hns_roce_mtr_init(&eq->mtr, PAGE_ADDR_SHIFT + hr_dev->caps.eqe_ba_pg_sz, page_shift); ret = hns_roce_mtr_attach(hr_dev, &eq->mtr, &buf_list, ®ion, 1); if (ret) @@ -5454,23 +5446,24 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) u32 page_shift; u32 mhop_num; u32 max_size; + u32 buf_size; int ret; - page_shift = PAGE_SHIFT + hr_dev->caps.eqe_buf_pg_sz; + page_shift = PAGE_ADDR_SHIFT + hr_dev->caps.eqe_buf_pg_sz; mhop_num = hr_dev->caps.eqe_hop_num; if (!mhop_num) { max_size = 1 << page_shift; - buf->size = max_size; + buf_size = max_size; } else if (mhop_num == HNS_ROCE_HOP_NUM_0) { max_size = eq->entries * eq->eqe_size; - buf->size = max_size; + buf_size = max_size; } else { max_size = 1 << page_shift; - buf->size = PAGE_ALIGN(eq->entries * eq->eqe_size); + buf_size = round_up(eq->entries * eq->eqe_size, max_size); is_mhop = true; } - ret = hns_roce_buf_alloc(hr_dev, buf->size, max_size, buf, page_shift); + ret = hns_roce_buf_alloc(hr_dev, buf_size, max_size, buf, page_shift); if (ret) { dev_err(hr_dev->dev, "alloc eq buf error\n"); return ret; @@ -5486,7 +5479,7 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) return 0; err_alloc: - hns_roce_buf_free(hr_dev, buf->size, buf); + hns_roce_buf_free(hr_dev, buf); return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index b3af369..99e3876 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -900,13 +900,9 @@ int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, if (!page_list) return -ENOMEM; - for (i = 0; i < buf->npages; ++i) { - if (buf->nbufs == 1) - page_list[i] = buf->direct.map + (i << buf->page_shift); - else - page_list[i] = buf->page_list[i].map; + for (i = 0; i < buf->npages; ++i) + page_list[i] = hns_roce_buf_page(buf, i); - } ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list); kfree(page_list); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 6317901..1667f37 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -839,7 +839,7 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ib_umem_release(hr_qp->umem); hr_qp->umem = NULL; } else { - hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); + hns_roce_buf_free(hr_dev, &hr_qp->hr_buf); } ibdev_err(ibdev, "Failed to alloc WQE buffer, ret %d.\n", ret); @@ -855,8 +855,8 @@ static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hr_qp->umem = NULL; } - if (hr_qp->hr_buf.nbufs > 0) - hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); + if (hr_qp->hr_buf.npages > 0) + hns_roce_buf_free(hr_dev, &hr_qp->hr_buf); if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && hr_qp->rq.wqe_cnt) diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 5b3dd1a..9851d76 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -180,7 +180,8 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, { struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); struct hns_roce_ib_create_srq ucmd; - struct hns_roce_buf *buf; + int page_shift; + int page_count; int ret; if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) @@ -191,12 +192,11 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, if (IS_ERR(srq->umem)) return PTR_ERR(srq->umem); - buf = &srq->buf; - buf->npages = (ib_umem_page_count(srq->umem) + - (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / + page_count = (ib_umem_page_count(srq->umem) + + (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / (1 << hr_dev->caps.srqwqe_buf_pg_sz); - buf->page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, + page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, page_count, page_shift, &srq->mtt); if (ret) goto err_user_buf; @@ -214,11 +214,10 @@ static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, goto err_user_srq_mtt; } - buf = &srq->idx_que.idx_buf; - buf->npages = DIV_ROUND_UP(ib_umem_page_count(srq->idx_que.umem), - 1 << hr_dev->caps.idx_buf_pg_sz); - buf->page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, + page_count = DIV_ROUND_UP(ib_umem_page_count(srq->idx_que.umem), + 1 << hr_dev->caps.idx_buf_pg_sz); + page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, page_count, page_shift, &srq->idx_que.mtt); if (ret) { dev_err(hr_dev->dev, "hns_roce_mtt_init error for idx que\n"); @@ -325,15 +324,14 @@ static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size) hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); err_kernel_create_idx: - hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, - &srq->idx_que.idx_buf); + hns_roce_buf_free(hr_dev, &srq->idx_que.idx_buf); kfree(srq->idx_que.bitmap); err_kernel_srq_mtt: hns_roce_mtt_cleanup(hr_dev, &srq->mtt); err_kernel_buf: - hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); + hns_roce_buf_free(hr_dev, &srq->buf); return ret; } @@ -348,14 +346,14 @@ static void destroy_user_srq(struct hns_roce_dev *hr_dev, } static void destroy_kernel_srq(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, int srq_buf_size) + struct hns_roce_srq *srq) { kvfree(srq->wrid); hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, &srq->idx_que.idx_buf); + hns_roce_buf_free(hr_dev, &srq->idx_que.idx_buf); kfree(srq->idx_que.bitmap); hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); + hns_roce_buf_free(hr_dev, &srq->buf); } int hns_roce_create_srq(struct ib_srq *ib_srq, @@ -437,7 +435,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, if (udata) destroy_user_srq(hr_dev, srq); else - destroy_kernel_srq(hr_dev, srq, srq_buf_size); + destroy_kernel_srq(hr_dev, srq); err_srq: return ret; @@ -455,8 +453,7 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); } else { kvfree(srq->wrid); - hns_roce_buf_free(hr_dev, srq->wqe_cnt << srq->wqe_shift, - &srq->buf); + hns_roce_buf_free(hr_dev, &srq->buf); } ib_umem_release(srq->idx_que.umem); ib_umem_release(srq->umem); From patchwork Mon Apr 13 11:58:08 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Weihang Li X-Patchwork-Id: 11485463 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 1777415AB for ; Mon, 13 Apr 2020 11:58:21 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 049D620753 for ; Mon, 13 Apr 2020 11:58:21 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729304AbgDML6U (ORCPT ); Mon, 13 Apr 2020 07:58:20 -0400 Received: from szxga07-in.huawei.com ([45.249.212.35]:53142 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1729296AbgDML6T (ORCPT ); Mon, 13 Apr 2020 07:58:19 -0400 Received: from DGGEMS403-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 764E1D8929F56313FD06; Mon, 13 Apr 2020 19:58:17 +0800 (CST) Received: from localhost.localdomain (10.67.165.24) by DGGEMS403-HUB.china.huawei.com (10.3.19.203) with Microsoft SMTP Server id 14.3.487.0; Mon, 13 Apr 2020 19:58:11 +0800 From: Weihang Li To: , CC: , , Subject: [PATCH for-next 3/6] RDMA/hns: Optimize 0 hop addressing for EQE buffer Date: Mon, 13 Apr 2020 19:58:08 +0800 Message-ID: <1586779091-51410-4-git-send-email-liweihang@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1586779091-51410-1-git-send-email-liweihang@huawei.com> References: <1586779091-51410-1-git-send-email-liweihang@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.67.165.24] X-CFilter-Loop: Reflected Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Xi Wang Use the new mtr interface to simple the hop 0 addressing and multihop addressing process. Signed-off-by: Xi Wang Signed-off-by: Lang Cheng Signed-off-by: Weihang Li --- drivers/infiniband/hw/hns/hns_roce_device.h | 6 - drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 171 ++++++++-------------------- 2 files changed, 48 insertions(+), 129 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index c37617d..39577c2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -794,17 +794,11 @@ struct hns_roce_eq { int over_ignore; int coalesce; int arm_st; - u64 eqe_ba; - int eqe_ba_pg_sz; - int eqe_buf_pg_sz; int hop_num; struct hns_roce_mtr mtr; - struct hns_roce_buf buf; int eq_max_cnt; int eq_period; int shift; - dma_addr_t cur_eqe_ba; - dma_addr_t nxt_eqe_ba; int event_type; int sub_type; }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 998015d..335a637 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -4993,7 +4993,7 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) { struct hns_roce_aeqe *aeqe; - aeqe = hns_roce_buf_offset(&eq->buf, + aeqe = hns_roce_buf_offset(eq->mtr.kmem, (eq->cons_index & (eq->entries - 1)) * HNS_ROCE_AEQ_ENTRY_SIZE); @@ -5093,7 +5093,7 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) { struct hns_roce_ceqe *ceqe; - ceqe = hns_roce_buf_offset(&eq->buf, + ceqe = hns_roce_buf_offset(eq->mtr.kmem, (eq->cons_index & (eq->entries - 1)) * HNS_ROCE_CEQ_ENTRY_SIZE); return (!!(roce_get_bit(ceqe->comp, HNS_ROCE_V2_CEQ_CEQE_OWNER_S))) ^ @@ -5254,17 +5254,15 @@ static void hns_roce_v2_destroy_eqc(struct hns_roce_dev *hr_dev, int eqn) static void free_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { - if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) - hns_roce_mtr_cleanup(hr_dev, &eq->mtr); - hns_roce_buf_free(hr_dev, &eq->buf); + hns_roce_mtr_destroy(hr_dev, &eq->mtr); } -static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq, - void *mb_buf) +static int config_eqc(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, + void *mb_buf) { + u64 eqe_ba[MTT_MIN_COUNT] = { 0 }; struct hns_roce_eq_context *eqc; - u64 ba[MTT_MIN_COUNT] = { 0 }; + u64 bt_ba = 0; int count; eqc = mb_buf; @@ -5272,32 +5270,18 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, /* init eqc */ eq->doorbell = hr_dev->reg_base + ROCEE_VF_EQ_DB_CFG0_REG; - eq->hop_num = hr_dev->caps.eqe_hop_num; eq->cons_index = 0; eq->over_ignore = HNS_ROCE_V2_EQ_OVER_IGNORE_0; eq->coalesce = HNS_ROCE_V2_EQ_COALESCE_0; eq->arm_st = HNS_ROCE_V2_EQ_ALWAYS_ARMED; - eq->eqe_ba_pg_sz = hr_dev->caps.eqe_ba_pg_sz; - eq->eqe_buf_pg_sz = hr_dev->caps.eqe_buf_pg_sz; eq->shift = ilog2((unsigned int)eq->entries); /* if not multi-hop, eqe buffer only use one trunk */ - eq->eqe_buf_pg_sz = eq->buf.page_shift - PAGE_ADDR_SHIFT; - if (!eq->hop_num || eq->hop_num == HNS_ROCE_HOP_NUM_0) { - eq->eqe_ba = eq->buf.direct.map; - eq->cur_eqe_ba = eq->eqe_ba; - if (eq->buf.npages > 1) - eq->nxt_eqe_ba = eq->eqe_ba + (1 << eq->eqe_buf_pg_sz); - else - eq->nxt_eqe_ba = eq->eqe_ba; - } else { - count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, ba, - MTT_MIN_COUNT, &eq->eqe_ba); - eq->cur_eqe_ba = ba[0]; - if (count > 1) - eq->nxt_eqe_ba = ba[1]; - else - eq->nxt_eqe_ba = ba[0]; + count = hns_roce_mtr_find(hr_dev, &eq->mtr, 0, eqe_ba, MTT_MIN_COUNT, + &bt_ba); + if (count < 1) { + dev_err(hr_dev->dev, "failed to find EQE mtr\n"); + return -ENOBUFS; } /* set eqc state */ @@ -5331,12 +5315,12 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, /* set eqe_ba_pg_sz */ roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BA_PG_SZ_M, HNS_ROCE_EQC_BA_PG_SZ_S, - eq->eqe_ba_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(eq->mtr.hem_cfg.ba_pg_shift)); /* set eqe_buf_pg_sz */ roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BUF_PG_SZ_M, HNS_ROCE_EQC_BUF_PG_SZ_S, - eq->eqe_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(eq->mtr.hem_cfg.buf_pg_shift)); /* set eq_producer_idx */ roce_set_field(eqc->byte_8, HNS_ROCE_EQC_PROD_INDX_M, @@ -5355,13 +5339,13 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, HNS_ROCE_EQC_REPORT_TIMER_S, HNS_ROCE_EQ_INIT_REPORT_TIMER); - /* set eqe_ba [34:3] */ + /* set bt_ba [34:3] */ roce_set_field(eqc->eqe_ba0, HNS_ROCE_EQC_EQE_BA_L_M, - HNS_ROCE_EQC_EQE_BA_L_S, eq->eqe_ba >> 3); + HNS_ROCE_EQC_EQE_BA_L_S, bt_ba >> 3); - /* set eqe_ba [64:35] */ + /* set bt_ba [64:35] */ roce_set_field(eqc->eqe_ba1, HNS_ROCE_EQC_EQE_BA_H_M, - HNS_ROCE_EQC_EQE_BA_H_S, eq->eqe_ba >> 35); + HNS_ROCE_EQC_EQE_BA_H_S, bt_ba >> 35); /* set eq shift */ roce_set_field(eqc->byte_28, HNS_ROCE_EQC_SHIFT_M, HNS_ROCE_EQC_SHIFT_S, @@ -5373,15 +5357,15 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, /* set cur_eqe_ba [27:12] */ roce_set_field(eqc->byte_28, HNS_ROCE_EQC_CUR_EQE_BA_L_M, - HNS_ROCE_EQC_CUR_EQE_BA_L_S, eq->cur_eqe_ba >> 12); + HNS_ROCE_EQC_CUR_EQE_BA_L_S, eqe_ba[0] >> 12); /* set cur_eqe_ba [59:28] */ roce_set_field(eqc->byte_32, HNS_ROCE_EQC_CUR_EQE_BA_M_M, - HNS_ROCE_EQC_CUR_EQE_BA_M_S, eq->cur_eqe_ba >> 28); + HNS_ROCE_EQC_CUR_EQE_BA_M_S, eqe_ba[0] >> 28); /* set cur_eqe_ba [63:60] */ roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CUR_EQE_BA_H_M, - HNS_ROCE_EQC_CUR_EQE_BA_H_S, eq->cur_eqe_ba >> 60); + HNS_ROCE_EQC_CUR_EQE_BA_H_S, eqe_ba[0] >> 60); /* set eq consumer idx */ roce_set_field(eqc->byte_36, HNS_ROCE_EQC_CONS_INDX_M, @@ -5389,98 +5373,38 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, /* set nex_eqe_ba[43:12] */ roce_set_field(eqc->nxt_eqe_ba0, HNS_ROCE_EQC_NXT_EQE_BA_L_M, - HNS_ROCE_EQC_NXT_EQE_BA_L_S, eq->nxt_eqe_ba >> 12); + HNS_ROCE_EQC_NXT_EQE_BA_L_S, eqe_ba[1] >> 12); /* set nex_eqe_ba[63:44] */ roce_set_field(eqc->nxt_eqe_ba1, HNS_ROCE_EQC_NXT_EQE_BA_H_M, - HNS_ROCE_EQC_NXT_EQE_BA_H_S, eq->nxt_eqe_ba >> 44); -} + HNS_ROCE_EQC_NXT_EQE_BA_H_S, eqe_ba[1] >> 44); -static int map_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq, - u32 page_shift) -{ - struct hns_roce_buf_region region = {}; - dma_addr_t *buf_list = NULL; - int ba_num; - int ret; - - ba_num = DIV_ROUND_UP(PAGE_ALIGN(eq->entries * eq->eqe_size), - 1 << page_shift); - hns_roce_init_buf_region(®ion, hr_dev->caps.eqe_hop_num, 0, ba_num); - - /* alloc a tmp list for storing eq buf address */ - ret = hns_roce_alloc_buf_list(®ion, &buf_list, 1); - if (ret) { - dev_err(hr_dev->dev, "alloc eq buf_list error\n"); - return ret; - } - - ba_num = hns_roce_get_kmem_bufs(hr_dev, buf_list, region.count, - region.offset, &eq->buf); - if (ba_num != region.count) { - dev_err(hr_dev->dev, "get eqe buf err,expect %d,ret %d.\n", - region.count, ba_num); - ret = -ENOBUFS; - goto done; - } - - hns_roce_mtr_init(&eq->mtr, PAGE_ADDR_SHIFT + hr_dev->caps.eqe_ba_pg_sz, - page_shift); - ret = hns_roce_mtr_attach(hr_dev, &eq->mtr, &buf_list, ®ion, 1); - if (ret) - dev_err(hr_dev->dev, "mtr attach error for eqe\n"); - - goto done; - - hns_roce_mtr_cleanup(hr_dev, &eq->mtr); -done: - hns_roce_free_buf_list(&buf_list, 1); - - return ret; + return 0; } static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { - struct hns_roce_buf *buf = &eq->buf; - bool is_mhop = false; - u32 page_shift; - u32 mhop_num; - u32 max_size; - u32 buf_size; - int ret; + struct hns_roce_buf_attr buf_attr = {}; + int err; - page_shift = PAGE_ADDR_SHIFT + hr_dev->caps.eqe_buf_pg_sz; - mhop_num = hr_dev->caps.eqe_hop_num; - if (!mhop_num) { - max_size = 1 << page_shift; - buf_size = max_size; - } else if (mhop_num == HNS_ROCE_HOP_NUM_0) { - max_size = eq->entries * eq->eqe_size; - buf_size = max_size; - } else { - max_size = 1 << page_shift; - buf_size = round_up(eq->entries * eq->eqe_size, max_size); - is_mhop = true; - } + if (hr_dev->caps.eqe_hop_num == HNS_ROCE_HOP_NUM_0) + eq->hop_num = 0; + else + eq->hop_num = hr_dev->caps.eqe_hop_num; - ret = hns_roce_buf_alloc(hr_dev, buf_size, max_size, buf, page_shift); - if (ret) { - dev_err(hr_dev->dev, "alloc eq buf error\n"); - return ret; - } + buf_attr.page_shift = hr_dev->caps.eqe_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.region[0].size = eq->entries * eq->eqe_size; + buf_attr.region[0].hopnum = eq->hop_num; + buf_attr.region_count = 1; + buf_attr.fixed_page = true; - if (is_mhop) { - ret = map_eq_buf(hr_dev, eq, page_shift); - if (ret) { - dev_err(hr_dev->dev, "map roce buf error\n"); - goto err_alloc; - } - } + err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr, + hr_dev->caps.srqwqe_ba_pg_sz + + PAGE_ADDR_SHIFT, NULL, 0); + if (err) + dev_err(hr_dev->dev, "Failed to alloc EQE mtr, err %d\n", err); - return 0; -err_alloc: - hns_roce_buf_free(hr_dev, buf); - return ret; + return err; } static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, @@ -5492,15 +5416,16 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, /* Allocate mailbox memory */ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) - return PTR_ERR(mailbox); + if (IS_ERR_OR_NULL(mailbox)) + return -ENOMEM; ret = alloc_eq_buf(hr_dev, eq); - if (ret) { - ret = -ENOMEM; + if (ret) goto free_cmd_mbox; - } - hns_roce_config_eqc(hr_dev, eq, mailbox->buf); + + ret = config_eqc(hr_dev, eq, mailbox->buf); + if (ret) + goto err_cmd_mbox; ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, eq->eqn, 0, eq_cmd, HNS_ROCE_CMD_TIMEOUT_MSECS); From patchwork Mon Apr 13 11:58:09 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Weihang Li X-Patchwork-Id: 11485469 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id C12D517D4 for ; Mon, 13 Apr 2020 11:58:24 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id B1F062073E for ; Mon, 13 Apr 2020 11:58:24 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728591AbgDML6Y (ORCPT ); Mon, 13 Apr 2020 07:58:24 -0400 Received: from szxga07-in.huawei.com ([45.249.212.35]:53174 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1727907AbgDML6V (ORCPT ); Mon, 13 Apr 2020 07:58:21 -0400 Received: from DGGEMS403-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 81EF83785452A375314A; Mon, 13 Apr 2020 19:58:17 +0800 (CST) Received: from localhost.localdomain (10.67.165.24) by DGGEMS403-HUB.china.huawei.com (10.3.19.203) with Microsoft SMTP Server id 14.3.487.0; Mon, 13 Apr 2020 19:58:11 +0800 From: Weihang Li To: , CC: , , Subject: [PATCH for-next 4/6] RDMA/hns: Support 0 hop addressing for WQE buffer Date: Mon, 13 Apr 2020 19:58:09 +0800 Message-ID: <1586779091-51410-5-git-send-email-liweihang@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1586779091-51410-1-git-send-email-liweihang@huawei.com> References: <1586779091-51410-1-git-send-email-liweihang@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.67.165.24] X-CFilter-Loop: Reflected Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Xi Wang Add the zero hop addressing support by using new mtr interface for WQE buffer and simple mtr invoking process, so WQE buffer can support hopnum between 0 to 3. Signed-off-by: Xi Wang Signed-off-by: Weihang Li --- drivers/infiniband/hw/hns/hns_roce_device.h | 4 - drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 80 ++++++------ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 36 +++--- drivers/infiniband/hw/hns/hns_roce_qp.c | 182 +++++++--------------------- 4 files changed, 106 insertions(+), 196 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 39577c2..5df4ee3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -681,7 +681,6 @@ struct hns_roce_work { struct hns_roce_qp { struct ib_qp ibqp; - struct hns_roce_buf hr_buf; struct hns_roce_wq rq; struct hns_roce_db rdb; struct hns_roce_db sdb; @@ -691,10 +690,7 @@ struct hns_roce_qp { u32 sq_signal_bits; struct hns_roce_wq sq; - struct ib_umem *umem; - struct hns_roce_mtt mtt; struct hns_roce_mtr mtr; - int wqe_bt_pg_shift; u32 buff_size; struct mutex mutex; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 4b54906..ddf2a45 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2479,7 +2479,6 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, } static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, enum hns_roce_qp_state cur_state, enum hns_roce_qp_state new_state, struct hns_roce_qp_context *context, @@ -2560,6 +2559,29 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, return ret; } +static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + u64 *sq_ba, u64 *rq_ba, dma_addr_t *bt_ba) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + int rq_pa_start; + int count; + + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, sq_ba, 1, bt_ba); + if (count < 1) { + ibdev_err(ibdev, "Failed to find SQ ba\n"); + return -ENOBUFS; + } + rq_pa_start = hr_qp->rq.offset >> hr_qp->mtr.hem_cfg.buf_pg_shift; + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, rq_pa_start, rq_ba, 1, + NULL); + if (!count) { + ibdev_err(ibdev, "Failed to find RQ ba\n"); + return -ENOBUFS; + } + + return 0; +} + static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -2567,25 +2589,20 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct hns_roce_sqp_context *context; - struct device *dev = &hr_dev->pdev->dev; dma_addr_t dma_handle = 0; u32 __iomem *addr; - int rq_pa_start; + u64 sq_ba = 0; + u64 rq_ba = 0; __le32 tmp; u32 reg_val; - u64 *mtts; context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return -ENOMEM; /* Search QP buf's MTTs */ - mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table, - hr_qp->mtt.first_seg, &dma_handle); - if (!mtts) { - dev_err(dev, "qp buf pa find failed\n"); + if (find_wqe_mtt(hr_dev, hr_qp, &sq_ba, &rq_ba, &dma_handle)) goto out; - } if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { roce_set_field(context->qp1c_bytes_4, @@ -2599,11 +2616,11 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M, QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn); - context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); + context->sq_rq_bt_l = cpu_to_le32(dma_handle); roce_set_field(context->qp1c_bytes_12, QP1C_BYTES_12_SQ_RQ_BT_H_M, QP1C_BYTES_12_SQ_RQ_BT_H_S, - ((u32)(dma_handle >> 32))); + upper_32_bits(dma_handle)); roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_HEAD_M, QP1C_BYTES_16_RQ_HEAD_S, hr_qp->rq.head); @@ -2624,14 +2641,12 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_PKEY_IDX_M, QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index); - rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = - cpu_to_le32((u32)(mtts[rq_pa_start])); + context->cur_rq_wqe_ba_l = cpu_to_le32(rq_ba); roce_set_field(context->qp1c_bytes_28, QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M, QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S, - (mtts[rq_pa_start]) >> 32); + upper_32_bits(rq_ba)); roce_set_field(context->qp1c_bytes_28, QP1C_BYTES_28_RQ_CUR_IDX_M, QP1C_BYTES_28_RQ_CUR_IDX_S, 0); @@ -2645,12 +2660,12 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_32_TX_CQ_NUM_S, to_hr_cq(ibqp->send_cq)->cqn); - context->cur_sq_wqe_ba_l = cpu_to_le32((u32)mtts[0]); + context->cur_sq_wqe_ba_l = cpu_to_le32(sq_ba); roce_set_field(context->qp1c_bytes_40, QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M, QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S, - (mtts[0]) >> 32); + upper_32_bits(sq_ba)); roce_set_field(context->qp1c_bytes_40, QP1C_BYTES_40_SQ_CUR_IDX_M, QP1C_BYTES_40_SQ_CUR_IDX_S, 0); @@ -2716,10 +2731,10 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, dma_addr_t dma_handle_2 = 0; dma_addr_t dma_handle = 0; __le32 doorbell[2] = {0}; - int rq_pa_start = 0; u64 *mtts_2 = NULL; int ret = -EINVAL; - u64 *mtts = NULL; + u64 sq_ba = 0; + u64 rq_ba = 0; int port; u8 port_num; u8 *dmac; @@ -2730,12 +2745,8 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, return -ENOMEM; /* Search qp buf's mtts */ - mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table, - hr_qp->mtt.first_seg, &dma_handle); - if (mtts == NULL) { - dev_err(dev, "qp buf pa find failed\n"); + if (find_wqe_mtt(hr_dev, hr_qp, &sq_ba, &rq_ba, &dma_handle)) goto out; - } /* Search IRRL's mtts */ mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table, @@ -2890,11 +2901,11 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, dmac = (u8 *)attr->ah_attr.roce.dmac; - context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); + context->sq_rq_bt_l = cpu_to_le32(dma_handle); roce_set_field(context->qpc_bytes_24, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S, - ((u32)(dma_handle >> 32))); + upper_32_bits(dma_handle)); roce_set_bit(context->qpc_bytes_24, QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S, 1); @@ -2993,14 +3004,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M, QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0); - rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = - cpu_to_le32((u32)(mtts[rq_pa_start])); + context->cur_rq_wqe_ba_l = cpu_to_le32(rq_ba); roce_set_field(context->qpc_bytes_76, QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M, QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S, - mtts[rq_pa_start] >> 32); + upper_32_bits(rq_ba)); roce_set_field(context->qpc_bytes_76, QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M, QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S, 0); @@ -3075,12 +3084,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, goto out; } - context->rx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); + context->rx_cur_sq_wqe_ba_l = cpu_to_le32(sq_ba); roce_set_field(context->qpc_bytes_120, QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M, QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S, - (mtts[0]) >> 32); + upper_32_bits(sq_ba)); roce_set_field(context->qpc_bytes_124, QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M, @@ -3223,12 +3232,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0); - context->tx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); + context->tx_cur_sq_wqe_ba_l = cpu_to_le32(sq_ba); roce_set_field(context->qpc_bytes_188, QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M, QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S, - (mtts[0]) >> 32); + upper_32_bits(sq_ba)); roce_set_bit(context->qpc_bytes_188, QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S, 0); roce_set_field(context->qpc_bytes_188, @@ -3253,8 +3262,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, new_state); /* SW pass context to HW */ - ret = hns_roce_v1_qp_modify(hr_dev, &hr_qp->mtt, - to_hns_roce_state(cur_state), + ret = hns_roce_v1_qp_modify(hr_dev, to_hns_roce_state(cur_state), to_hns_roce_state(new_state), context, hr_qp); if (ret) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 335a637..42bca0a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -164,7 +164,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; extend_sge_num = valid_num_sge - num_in_wqe; sg = wr->sg_list + num_in_wqe; - shift = qp->hr_buf.page_shift; + shift = qp->mtr.hem_cfg.buf_pg_shift; /* * Check whether wr->num_sge sges are in the same page. If not, we @@ -3757,7 +3757,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, int port; /* Search qp buf's mtts */ - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); + page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset / page_size, mtts, MTT_MIN_COUNT, &wqe_sge_ba); @@ -3831,7 +3831,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, - hr_qp->wqe_bt_pg_shift + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.ba_pg_shift)); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, 0); @@ -3839,29 +3839,29 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, - hr_dev->caps.mtt_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.buf_pg_shift)); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0); - context->rq_cur_blk_addr = cpu_to_le32(mtts[0] >> PAGE_ADDR_SHIFT); + context->rq_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); qpc_mask->rq_cur_blk_addr = 0; roce_set_field(context->byte_92_srq_info, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S, - mtts[0] >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(mtts[0]))); roce_set_field(qpc_mask->byte_92_srq_info, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S, 0); - context->rq_nxt_blk_addr = cpu_to_le32(mtts[1] >> PAGE_ADDR_SHIFT); + context->rq_nxt_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[1])); qpc_mask->rq_nxt_blk_addr = 0; roce_set_field(context->byte_104_rq_sge, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S, - mtts[1] >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(mtts[1]))); roce_set_field(qpc_mask->byte_104_rq_sge, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_M, V2_QPC_BYTE_104_RQ_NXT_BLK_ADDR_S, 0); @@ -3995,18 +3995,18 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, /* Search qp buf's mtts */ count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, &sq_cur_blk, 1, NULL); if (count < 1) { - ibdev_err(ibdev, "failed to find buf pa of QP(0x%lx)\n", + ibdev_err(ibdev, "failed to find QP(0x%lx) SQ buf\n", hr_qp->qpn); return -EINVAL; } if (hr_qp->sge.offset) { - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); + page_size = 1 << hr_qp->mtr.hem_cfg.buf_pg_shift; count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->sge.offset / page_size, &sge_cur_blk, 1, NULL); if (count < 1) { - ibdev_err(ibdev, "failed to find sge pa of QP(0x%lx)\n", + ibdev_err(ibdev, "failed to find QP(0x%lx) SGE buf\n", hr_qp->qpn); return -EINVAL; } @@ -4024,11 +4024,11 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, * we should set all bits of the relevant fields in context mask to * 0 at the same time, else set them to 0x1. */ - context->sq_cur_blk_addr = cpu_to_le32(sq_cur_blk >> PAGE_ADDR_SHIFT); + context->sq_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(sq_cur_blk)); roce_set_field(context->byte_168_irrl_idx, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, - sq_cur_blk >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(sq_cur_blk))); qpc_mask->sq_cur_blk_addr = 0; roce_set_field(qpc_mask->byte_168_irrl_idx, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, @@ -4036,26 +4036,24 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, context->sq_cur_sge_blk_addr = ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? - cpu_to_le32(sge_cur_blk >> - PAGE_ADDR_SHIFT) : 0; + cpu_to_le32(to_hr_hw_page_addr(sge_cur_blk)) : 0; roce_set_field(context->byte_184_irrl_idx, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? - (sge_cur_blk >> - (32 + PAGE_ADDR_SHIFT)) : 0); + upper_32_bits(to_hr_hw_page_addr(sge_cur_blk)) : 0); qpc_mask->sq_cur_sge_blk_addr = 0; roce_set_field(qpc_mask->byte_184_irrl_idx, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, 0); context->rx_sq_cur_blk_addr = - cpu_to_le32(sq_cur_blk >> PAGE_ADDR_SHIFT); + cpu_to_le32(to_hr_hw_page_addr(sq_cur_blk)); roce_set_field(context->byte_232_irrl_sge, V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_S, - sq_cur_blk >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(sq_cur_blk))); qpc_mask->rx_sq_cur_blk_addr = 0; roce_set_field(qpc_mask->byte_232_irrl_sge, V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M, diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 1667f37..d05d3cb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -512,63 +512,57 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, static int split_wqe_buf_region(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - struct hns_roce_buf_region *regions, - int region_max, int page_shift) + struct hns_roce_buf_attr *buf_attr) { - int page_size = 1 << page_shift; bool is_extend_sge; - int region_cnt = 0; int buf_size; - int buf_cnt; + int idx = 0; - if (hr_qp->buff_size < 1 || region_max < 1) - return region_cnt; + if (hr_qp->buff_size < 1) + return -EINVAL; + + buf_attr->page_shift = PAGE_ADDR_SHIFT + hr_dev->caps.mtt_buf_pg_sz; + buf_attr->fixed_page = true; + buf_attr->region_count = 0; if (hr_qp->sge.sge_cnt > 0) is_extend_sge = true; else is_extend_sge = false; - /* sq region */ + /* SQ WQE */ if (is_extend_sge) buf_size = hr_qp->sge.offset - hr_qp->sq.offset; else buf_size = hr_qp->rq.offset - hr_qp->sq.offset; - if (buf_size > 0 && region_cnt < region_max) { - buf_cnt = DIV_ROUND_UP(buf_size, page_size); - hns_roce_init_buf_region(®ions[region_cnt], - hr_dev->caps.wqe_sq_hop_num, - hr_qp->sq.offset / page_size, - buf_cnt); - region_cnt++; + if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num; + idx++; } - /* sge region */ - if (is_extend_sge) { - buf_size = hr_qp->rq.offset - hr_qp->sge.offset; - if (buf_size > 0 && region_cnt < region_max) { - buf_cnt = DIV_ROUND_UP(buf_size, page_size); - hns_roce_init_buf_region(®ions[region_cnt], - hr_dev->caps.wqe_sge_hop_num, - hr_qp->sge.offset / page_size, - buf_cnt); - region_cnt++; - } + /* extend SGE in SQ WQE */ + buf_size = hr_qp->rq.offset - hr_qp->sge.offset; + if (buf_size > 0 && is_extend_sge && + idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = + hr_dev->caps.wqe_sge_hop_num; + idx++; } - /* rq region */ + /* RQ WQE */ buf_size = hr_qp->buff_size - hr_qp->rq.offset; - if (buf_size > 0) { - buf_cnt = DIV_ROUND_UP(buf_size, page_size); - hns_roce_init_buf_region(®ions[region_cnt], - hr_dev->caps.wqe_rq_hop_num, - hr_qp->rq.offset / page_size, - buf_cnt); - region_cnt++; + if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num; + idx++; } - return region_cnt; + buf_attr->region_count = idx; + + return 0; } static int set_extend_sge_param(struct hns_roce_dev *hr_dev, @@ -731,72 +725,12 @@ static void free_rq_inline_buf(struct hns_roce_qp *hr_qp) kfree(hr_qp->rq_inl_buf.wqe_list); } -static int map_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - u32 page_shift, bool is_user) -{ -/* WQE buffer include 3 parts: SQ, extend SGE and RQ. */ -#define HNS_ROCE_WQE_REGION_MAX 3 - struct hns_roce_buf_region regions[HNS_ROCE_WQE_REGION_MAX] = {}; - dma_addr_t *buf_list[HNS_ROCE_WQE_REGION_MAX] = {}; - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_buf_region *r; - int region_count; - int buf_count; - int ret; - int i; - - region_count = split_wqe_buf_region(hr_dev, hr_qp, regions, - ARRAY_SIZE(regions), page_shift); - - /* alloc a tmp list to store WQE buffers address */ - ret = hns_roce_alloc_buf_list(regions, buf_list, region_count); - if (ret) { - ibdev_err(ibdev, "Failed to alloc WQE buffer list\n"); - return ret; - } - - for (i = 0; i < region_count; i++) { - r = ®ions[i]; - if (is_user) - buf_count = hns_roce_get_umem_bufs(hr_dev, buf_list[i], - r->count, r->offset, hr_qp->umem, - page_shift); - else - buf_count = hns_roce_get_kmem_bufs(hr_dev, buf_list[i], - r->count, r->offset, &hr_qp->hr_buf); - - if (buf_count != r->count) { - ibdev_err(ibdev, "Failed to get %s WQE buf, expect %d = %d.\n", - is_user ? "user" : "kernel", - r->count, buf_count); - ret = -ENOBUFS; - goto done; - } - } - - hr_qp->wqe_bt_pg_shift = hr_dev->caps.mtt_ba_pg_sz; - hns_roce_mtr_init(&hr_qp->mtr, PAGE_SHIFT + hr_qp->wqe_bt_pg_shift, - page_shift); - ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list, regions, - region_count); - if (ret) - ibdev_err(ibdev, "Failed to attach WQE's mtr\n"); - - goto done; - - hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); -done: - hns_roce_free_buf_list(buf_list, region_count); - - return ret; -} - static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, unsigned long addr) { - u32 page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_attr buf_attr = {}; bool is_rq_buf_inline; int ret; @@ -810,54 +744,30 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, } } - if (udata) { - hr_qp->umem = ib_umem_get(ibdev, addr, hr_qp->buff_size, 0); - if (IS_ERR(hr_qp->umem)) { - ret = PTR_ERR(hr_qp->umem); - goto err_inline; - } - } else { - ret = hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, - (1 << page_shift) * 2, - &hr_qp->hr_buf, page_shift); - if (ret) - goto err_inline; + ret = split_wqe_buf_region(hr_dev, hr_qp, &buf_attr); + if (ret) { + ibdev_err(ibdev, "Failed to split WQE buf, ret %d\n", ret); + goto err_inline; + } + ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr, + PAGE_ADDR_SHIFT + hr_dev->caps.mtt_ba_pg_sz, + udata, addr); + if (ret) { + ibdev_err(ibdev, "Failed to create WQE mtr, ret %d\n", ret); + goto err_inline; } - - ret = map_wqe_buf(hr_dev, hr_qp, page_shift, udata); - if (ret) - goto err_alloc; return 0; - err_inline: if (is_rq_buf_inline) free_rq_inline_buf(hr_qp); -err_alloc: - if (udata) { - ib_umem_release(hr_qp->umem); - hr_qp->umem = NULL; - } else { - hns_roce_buf_free(hr_dev, &hr_qp->hr_buf); - } - - ibdev_err(ibdev, "Failed to alloc WQE buffer, ret %d.\n", ret); - return ret; } static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { - hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); - if (hr_qp->umem) { - ib_umem_release(hr_qp->umem); - hr_qp->umem = NULL; - } - - if (hr_qp->hr_buf.npages > 0) - hns_roce_buf_free(hr_dev, &hr_qp->hr_buf); - + hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr); if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && hr_qp->rq.wqe_cnt) free_rq_inline_buf(hr_qp); @@ -1431,10 +1341,9 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, } } -static void *get_wqe(struct hns_roce_qp *hr_qp, int offset) +static inline void *get_wqe(struct hns_roce_qp *hr_qp, int offset) { - - return hns_roce_buf_offset(&hr_qp->hr_buf, offset); + return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); } void *hns_roce_get_recv_wqe(struct hns_roce_qp *hr_qp, int n) @@ -1449,8 +1358,7 @@ void *hns_roce_get_send_wqe(struct hns_roce_qp *hr_qp, int n) void *hns_roce_get_extend_sge(struct hns_roce_qp *hr_qp, int n) { - return hns_roce_buf_offset(&hr_qp->hr_buf, hr_qp->sge.offset + - (n << hr_qp->sge.sge_shift)); + return get_wqe(hr_qp, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift)); } bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, From patchwork Mon Apr 13 11:58:10 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Weihang Li X-Patchwork-Id: 11485471 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 38EEA15AB for ; Mon, 13 Apr 2020 11:58:25 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 299422063A for ; Mon, 13 Apr 2020 11:58:25 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727907AbgDML6Y (ORCPT ); Mon, 13 Apr 2020 07:58:24 -0400 Received: from szxga07-in.huawei.com ([45.249.212.35]:53158 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1729303AbgDML6V (ORCPT ); Mon, 13 Apr 2020 07:58:21 -0400 Received: from DGGEMS403-HUB.china.huawei.com (unknown [172.30.72.58]) by Forcepoint Email with ESMTP id 7BF0B187178FE4FF3C79; Mon, 13 Apr 2020 19:58:17 +0800 (CST) Received: from localhost.localdomain (10.67.165.24) by DGGEMS403-HUB.china.huawei.com (10.3.19.203) with Microsoft SMTP Server id 14.3.487.0; Mon, 13 Apr 2020 19:58:11 +0800 From: Weihang Li To: , CC: , , Subject: [PATCH for-next 5/6] RDMA/hns: Support 0 hop addressing for SRQ buffer Date: Mon, 13 Apr 2020 19:58:10 +0800 Message-ID: <1586779091-51410-6-git-send-email-liweihang@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1586779091-51410-1-git-send-email-liweihang@huawei.com> References: <1586779091-51410-1-git-send-email-liweihang@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.67.165.24] X-CFilter-Loop: Reflected Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Xi Wang Add the zero hop addressing support by using mtr interface for SRQ buffer, so the hns driver can support addressing hopnum between 0 to 3 for SRQ. Signed-off-by: Xi Wang Signed-off-by: Weihang Li --- drivers/infiniband/hw/hns/hns_roce_device.h | 12 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 20 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 365 +++++++++++----------------- 3 files changed, 153 insertions(+), 244 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 5df4ee3..39af736 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -528,11 +528,8 @@ struct hns_roce_cq { }; struct hns_roce_idx_que { - struct hns_roce_buf idx_buf; + struct hns_roce_mtr mtr; int entry_sz; - u32 buf_size; - struct ib_umem *umem; - struct hns_roce_mtt mtt; unsigned long *bitmap; }; @@ -547,12 +544,9 @@ struct hns_roce_srq { atomic_t refcount; struct completion free; - struct hns_roce_buf buf; + struct hns_roce_mtr buf_mtr; + u64 *wrid; - struct ib_umem *umem; - u32 buf_size; - int page_shift; - struct hns_roce_mtt mtt; struct hns_roce_idx_que idx_que; spinlock_t lock; int head; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 42bca0a..42be8a2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2699,7 +2699,7 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq) static void *get_srq_wqe(struct hns_roce_srq *srq, int n) { - return hns_roce_buf_offset(&srq->buf, n << srq->wqe_shift); + return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift); } static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) @@ -5699,11 +5699,11 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, dma_handle_idx >> 35); srq_context->idx_cur_blk_addr = - cpu_to_le32(mtts_idx[0] >> PAGE_ADDR_SHIFT); + cpu_to_le32(to_hr_hw_page_addr(mtts_idx[0])); roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M, SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S, - mtts_idx[0] >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(mtts_idx[0]))); roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M, SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S, @@ -5713,29 +5713,29 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, - hr_dev->caps.idx_ba_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.ba_pg_shift)); roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, - hr_dev->caps.idx_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.buf_pg_shift)); srq_context->idx_nxt_blk_addr = - cpu_to_le32(mtts_idx[1] >> PAGE_ADDR_SHIFT); + cpu_to_le32(to_hr_hw_page_addr(mtts_idx[1])); roce_set_field(srq_context->rsv_idxnxtblkaddr, SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M, SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S, - mtts_idx[1] >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(mtts_idx[1]))); roce_set_field(srq_context->byte_56_xrc_cqn, SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, cqn); roce_set_field(srq_context->byte_56_xrc_cqn, SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M, SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S, - hr_dev->caps.srqwqe_ba_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift)); roce_set_field(srq_context->byte_56_xrc_cqn, SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M, SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S, - hr_dev->caps.srqwqe_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift)); roce_set_bit(srq_context->db_record_addr_record_en, SRQC_BYTE_60_SRQ_RECORD_EN_S, 0); @@ -5847,7 +5847,7 @@ static void fill_idx_queue(struct hns_roce_idx_que *idx_que, { unsigned int *addr; - addr = (unsigned int *)hns_roce_buf_offset(&idx_que->idx_buf, + addr = (unsigned int *)hns_roce_buf_offset(idx_que->mtr.kmem, cur_idx * idx_que->entry_sz); *addr = wqe_idx; } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 9851d76..e413a97 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -77,56 +77,56 @@ static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, - u16 xrcd, struct hns_roce_mtt *hr_mtt, - u64 db_rec_addr, struct hns_roce_srq *srq) +static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, + u32 pdn, u32 cqn, u16 xrcd, u64 db_rec_addr) { struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_cmd_mailbox *mailbox; - dma_addr_t dma_handle_wqe; - dma_addr_t dma_handle_idx; - u64 *mtts_wqe; - u64 *mtts_idx; + u64 mtts_wqe[MTT_MIN_COUNT] = { 0 }; + u64 mtts_idx[MTT_MIN_COUNT] = { 0 }; + dma_addr_t dma_handle_wqe = 0; + dma_addr_t dma_handle_idx = 0; int ret; /* Get the physical address of srq buf */ - mtts_wqe = hns_roce_table_find(hr_dev, - &hr_dev->mr_table.mtt_srqwqe_table, - srq->mtt.first_seg, - &dma_handle_wqe); - if (!mtts_wqe) { - dev_err(hr_dev->dev, "Failed to find mtt for srq buf.\n"); - return -EINVAL; + ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, + ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); + if (ret < 1) { + ibdev_err(ibdev, "Failed to find mtr for SRQ WQE\n"); + return -ENOBUFS; } /* Get physical address of idx que buf */ - mtts_idx = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_idx_table, - srq->idx_que.mtt.first_seg, - &dma_handle_idx); - if (!mtts_idx) { - dev_err(hr_dev->dev, - "Failed to find mtt for srq idx queue buf.\n"); - return -EINVAL; + ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, + ARRAY_SIZE(mtts_idx), &dma_handle_idx); + if (ret < 1) { + ibdev_err(ibdev, "Failed to find mtr for SRQ idx\n"); + return -ENOBUFS; } ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); if (ret) { - dev_err(hr_dev->dev, - "Failed to alloc a bit from srq bitmap.\n"); + ibdev_err(ibdev, "Failed to alloc SRQ number, err %d\n", ret); return -ENOMEM; } ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn); - if (ret) + if (ret) { + ibdev_err(ibdev, "Failed to get SRQC table, err %d\n", ret); goto err_out; + } ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); - if (ret) + if (ret) { + ibdev_err(ibdev, "Failed to store SRQC, err %d\n", ret); goto err_put; + } mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); - if (IS_ERR(mailbox)) { - ret = PTR_ERR(mailbox); + if (IS_ERR_OR_NULL(mailbox)) { + ret = -ENOMEM; + ibdev_err(ibdev, "Failed to alloc mailbox for SRQC\n"); goto err_xa; } @@ -136,8 +136,10 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); - if (ret) + if (ret) { + ibdev_err(ibdev, "Failed to config SRQC, err %d\n", ret); goto err_xa; + } atomic_set(&srq->refcount, 1); init_completion(&srq->free); @@ -154,8 +156,7 @@ static int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, return ret; } -static void hns_roce_srq_free(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq) +static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; int ret; @@ -175,185 +176,104 @@ static void hns_roce_srq_free(struct hns_roce_dev *hr_dev, hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); } -static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, - int srq_buf_size) +static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, + struct ib_udata *udata, unsigned long addr) { - struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); - struct hns_roce_ib_create_srq ucmd; - int page_shift; - int page_count; - int ret; - - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) - return -EFAULT; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_attr buf_attr = {}; + int sge_size; + int err; - srq->umem = - ib_umem_get(srq->ibsrq.device, ucmd.buf_addr, srq_buf_size, 0); - if (IS_ERR(srq->umem)) - return PTR_ERR(srq->umem); + sge_size = roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, + HNS_ROCE_SGE_SIZE * srq->max_gs)); - page_count = (ib_umem_page_count(srq->umem) + - (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / - (1 << hr_dev->caps.srqwqe_buf_pg_sz); - page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, page_count, page_shift, - &srq->mtt); - if (ret) - goto err_user_buf; + srq->wqe_shift = ilog2(sge_size); - ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem); - if (ret) - goto err_user_srq_mtt; - - /* config index queue BA */ - srq->idx_que.umem = ib_umem_get(srq->ibsrq.device, ucmd.que_addr, - srq->idx_que.buf_size, 0); - if (IS_ERR(srq->idx_que.umem)) { - dev_err(hr_dev->dev, "ib_umem_get error for index queue\n"); - ret = PTR_ERR(srq->idx_que.umem); - goto err_user_srq_mtt; - } + buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.region[0].size = srq->wqe_cnt * sge_size; + buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; + buf_attr.region_count = 1; + buf_attr.fixed_page = true; - page_count = DIV_ROUND_UP(ib_umem_page_count(srq->idx_que.umem), - 1 << hr_dev->caps.idx_buf_pg_sz); - page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, page_count, page_shift, - &srq->idx_que.mtt); - if (ret) { - dev_err(hr_dev->dev, "hns_roce_mtt_init error for idx que\n"); - goto err_user_idx_mtt; - } - - ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt, - srq->idx_que.umem); - if (ret) { - dev_err(hr_dev->dev, - "hns_roce_ib_umem_write_mtt error for idx que\n"); - goto err_user_idx_buf; - } + err = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, + hr_dev->caps.srqwqe_ba_pg_sz + + PAGE_ADDR_SHIFT, udata, addr); + if (err) + ibdev_err(ibdev, "Failed to alloc SRQ buf mtr, err %d\n", err); - return 0; - -err_user_idx_buf: - hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - -err_user_idx_mtt: - ib_umem_release(srq->idx_que.umem); - -err_user_srq_mtt: - hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - -err_user_buf: - ib_umem_release(srq->umem); + return err; +} - return ret; +static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +{ + hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); } -static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq, - u32 page_shift) +static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, + struct ib_udata *udata, unsigned long addr) { - struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct hns_roce_idx_que *idx_que = &srq->idx_que; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_attr buf_attr = {}; + int err; - idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); - if (!idx_que->bitmap) - return -ENOMEM; + srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ; + + buf_attr.page_shift = hr_dev->caps.idx_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.region[0].size = srq->wqe_cnt * HNS_ROCE_IDX_QUE_ENTRY_SZ; + buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num; + buf_attr.region_count = 1; + buf_attr.fixed_page = true; + + err = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, + hr_dev->caps.idx_ba_pg_sz + PAGE_ADDR_SHIFT, + udata, addr); + if (err) { + ibdev_err(ibdev, "Failed to alloc SRQ idx mtr, err %d\n", err); + return err; + } - idx_que->buf_size = srq->idx_que.buf_size; + if (!udata) { + idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); + if (!idx_que->bitmap) { + ibdev_err(ibdev, "Failed to alloc SRQ idx bitmap\n"); + err = -ENOMEM; + goto err_idx_mtr; + } - if (hns_roce_buf_alloc(hr_dev, idx_que->buf_size, (1 << page_shift) * 2, - &idx_que->idx_buf, page_shift)) { - bitmap_free(idx_que->bitmap); - return -ENOMEM; } return 0; +err_idx_mtr: + hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); + + return err; } -static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size) +static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); - u32 page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; - int ret; + struct hns_roce_idx_que *idx_que = &srq->idx_que; - if (hns_roce_buf_alloc(hr_dev, srq_buf_size, (1 << page_shift) * 2, - &srq->buf, page_shift)) - return -ENOMEM; + bitmap_free(idx_que->bitmap); + idx_que->bitmap = NULL; + hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); +} +static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +{ srq->head = 0; srq->tail = srq->wqe_cnt - 1; - - ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, srq->buf.page_shift, - &srq->mtt); - if (ret) - goto err_kernel_buf; - - ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf); - if (ret) - goto err_kernel_srq_mtt; - - page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; - ret = hns_roce_create_idx_que(srq->ibsrq.pd, srq, page_shift); - if (ret) { - dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n", ret); - goto err_kernel_srq_mtt; - } - - /* Init mtt table for idx_que */ - ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages, - srq->idx_que.idx_buf.page_shift, - &srq->idx_que.mtt); - if (ret) - goto err_kernel_create_idx; - - /* Write buffer address into the mtt table */ - ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt, - &srq->idx_que.idx_buf); - if (ret) - goto err_kernel_idx_buf; - srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); - if (!srq->wrid) { - ret = -ENOMEM; - goto err_kernel_idx_buf; - } + if (!srq->wrid) + return -ENOMEM; return 0; - -err_kernel_idx_buf: - hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - -err_kernel_create_idx: - hns_roce_buf_free(hr_dev, &srq->idx_que.idx_buf); - kfree(srq->idx_que.bitmap); - -err_kernel_srq_mtt: - hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - -err_kernel_buf: - hns_roce_buf_free(hr_dev, &srq->buf); - - return ret; -} - -static void destroy_user_srq(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq) -{ - hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - ib_umem_release(srq->idx_que.umem); - hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - ib_umem_release(srq->umem); } -static void destroy_kernel_srq(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq) +static void free_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - kvfree(srq->wrid); - hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - hns_roce_buf_free(hr_dev, &srq->idx_que.idx_buf); - kfree(srq->idx_que.bitmap); - hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - hns_roce_buf_free(hr_dev, &srq->buf); + kfree(srq->wrid); + srq->wrid = NULL; } int hns_roce_create_srq(struct ib_srq *ib_srq, @@ -363,8 +283,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device); struct hns_roce_ib_create_srq_resp resp = {}; struct hns_roce_srq *srq = to_hr_srq(ib_srq); - int srq_desc_size; - int srq_buf_size; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_ib_create_srq ucmd = {}; int ret = 0; u32 cqn; @@ -379,41 +299,45 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); srq->max_gs = init_attr->attr.max_sge; - srq_desc_size = roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, - HNS_ROCE_SGE_SIZE * srq->max_gs)); - - srq->wqe_shift = ilog2(srq_desc_size); - - srq_buf_size = srq->wqe_cnt * srq_desc_size; - - srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ; - srq->idx_que.buf_size = srq->wqe_cnt * srq->idx_que.entry_sz; - srq->mtt.mtt_type = MTT_TYPE_SRQWQE; - srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX; - if (udata) { - ret = create_user_srq(srq, udata, srq_buf_size); + ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (ret) { - dev_err(hr_dev->dev, "Create user srq failed\n"); - goto err_srq; + ibdev_err(ibdev, "Failed to copy SRQ udata, err %d\n", + ret); + return ret; } - } else { - ret = create_kernel_srq(srq, srq_buf_size); + } + + ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr); + if (ret) { + ibdev_err(ibdev, "Failed to alloc SRQ buffer, err %d\n", ret); + return ret; + } + + ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); + if (ret) { + ibdev_err(ibdev, "Failed to alloc SRQ idx, err %d\n", ret); + goto err_buf_alloc; + } + + if (!udata) { + ret = alloc_srq_wrid(hr_dev, srq); if (ret) { - dev_err(hr_dev->dev, "Create kernel srq failed\n"); - goto err_srq; + ibdev_err(ibdev, "Failed to alloc SRQ wrid, err %d\n", + ret); + goto err_idx_alloc; } } cqn = ib_srq_has_cq(init_attr->srq_type) ? to_hr_cq(init_attr->ext.cq)->cqn : 0; - srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; - ret = hns_roce_srq_alloc(hr_dev, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, - &srq->mtt, 0, srq); - if (ret) - goto err_wrid; + ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0); + if (ret) { + ibdev_err(ibdev, "Failed to alloc SRQ context, err %d\n", ret); + goto err_wrid_alloc; + } srq->event = hns_roce_ib_srq_event; resp.srqn = srq->srqn; @@ -429,15 +353,13 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, return 0; err_srqc_alloc: - hns_roce_srq_free(hr_dev, srq); - -err_wrid: - if (udata) - destroy_user_srq(hr_dev, srq); - else - destroy_kernel_srq(hr_dev, srq); - -err_srq: + free_srqc(hr_dev, srq); +err_wrid_alloc: + free_srq_wrid(hr_dev, srq); +err_idx_alloc: + free_srq_idx(hr_dev, srq); +err_buf_alloc: + free_srq_buf(hr_dev, srq); return ret; } @@ -446,17 +368,10 @@ void hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); - hns_roce_srq_free(hr_dev, srq); - hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - - if (udata) { - hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - } else { - kvfree(srq->wrid); - hns_roce_buf_free(hr_dev, &srq->buf); - } - ib_umem_release(srq->idx_que.umem); - ib_umem_release(srq->umem); + free_srqc(hr_dev, srq); + free_srq_idx(hr_dev, srq); + free_srq_wrid(hr_dev, srq); + free_srq_buf(hr_dev, srq); } int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev) From patchwork Mon Apr 13 11:58:11 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Weihang Li X-Patchwork-Id: 11485475 X-Patchwork-Delegate: jgg@ziepe.ca Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id E37F4912 for ; Mon, 13 Apr 2020 11:58:26 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id D00B12073E for ; Mon, 13 Apr 2020 11:58:26 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1728560AbgDML6Z (ORCPT ); Mon, 13 Apr 2020 07:58:25 -0400 Received: from szxga06-in.huawei.com ([45.249.212.32]:38642 "EHLO huawei.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1728558AbgDML6Y (ORCPT ); Mon, 13 Apr 2020 07:58:24 -0400 Received: from DGGEMS403-HUB.china.huawei.com (unknown [172.30.72.60]) by Forcepoint Email with ESMTP id 80E1E3D70D63EC37E379; Mon, 13 Apr 2020 19:58:22 +0800 (CST) Received: from localhost.localdomain (10.67.165.24) by DGGEMS403-HUB.china.huawei.com (10.3.19.203) with Microsoft SMTP Server id 14.3.487.0; Mon, 13 Apr 2020 19:58:12 +0800 From: Weihang Li To: , CC: , , Subject: [PATCH for-next 6/6] RDMA/hns: Support 0 hop addressing for CQE buffer Date: Mon, 13 Apr 2020 19:58:11 +0800 Message-ID: <1586779091-51410-7-git-send-email-liweihang@huawei.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1586779091-51410-1-git-send-email-liweihang@huawei.com> References: <1586779091-51410-1-git-send-email-liweihang@huawei.com> MIME-Version: 1.0 X-Originating-IP: [10.67.165.24] X-CFilter-Loop: Reflected Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org From: Xi Wang Add the zero hop addressing support by using mtr interface for CQE buffer, so the hns driver can support addressing hopnum between 0 to 3 for CQE. Signed-off-by: Xi Wang Signed-off-by: Weihang Li --- drivers/infiniband/hw/hns/hns_roce_cq.c | 351 +++++++++------------------- drivers/infiniband/hw/hns/hns_roce_device.h | 8 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 13 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 15 +- 4 files changed, 122 insertions(+), 265 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 92798ff..d2d7074 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -39,51 +39,40 @@ #include #include "hns_roce_common.h" -static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq) +static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { struct hns_roce_cmd_mailbox *mailbox; - struct hns_roce_hem_table *mtt_table; struct hns_roce_cq_table *cq_table; - struct device *dev = hr_dev->dev; + struct ib_device *ibdev = &hr_dev->ib_dev; + u64 mtts[MTT_MIN_COUNT] = { 0 }; dma_addr_t dma_handle; - u64 *mtts; int ret; - cq_table = &hr_dev->cq_table; - - /* Get the physical address of cq buf */ - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - mtt_table = &hr_dev->mr_table.mtt_cqe_table; - else - mtt_table = &hr_dev->mr_table.mtt_table; - - mtts = hns_roce_table_find(hr_dev, mtt_table, hr_cq->mtt.first_seg, - &dma_handle); - - if (!mtts) { - dev_err(dev, "Failed to find mtt for CQ buf.\n"); + ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts), + &dma_handle); + if (ret < 1) { + ibdev_err(ibdev, "Failed to find CQ mtr\n"); return -EINVAL; } + cq_table = &hr_dev->cq_table; ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); if (ret) { - dev_err(dev, "Num of CQ out of range.\n"); + ibdev_err(ibdev, "Failed to alloc CQ bitmap, err %d\n", ret); return ret; } /* Get CQC memory HEM(Hardware Entry Memory) table */ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); if (ret) { - dev_err(dev, - "Get context mem failed(%d) when CQ(0x%lx) alloc.\n", - ret, hr_cq->cqn); + ibdev_err(ibdev, "Failed to get CQ(0x%lx) context, err %d\n", + hr_cq->cqn, ret); goto err_out; } ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); if (ret) { - dev_err(dev, "Failed to xa_store CQ.\n"); + ibdev_err(ibdev, "Failed to xa_store CQ\n"); goto err_put; } @@ -101,9 +90,9 @@ static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev, HNS_ROCE_CMD_CREATE_CQC, HNS_ROCE_CMD_TIMEOUT_MSECS); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { - dev_err(dev, - "Send cmd mailbox failed(%d) when CQ(0x%lx) alloc.\n", - ret, hr_cq->cqn); + ibdev_err(ibdev, + "Failed to send create cmd for CQ(0x%lx), err %d\n", + hr_cq->cqn, ret); goto err_xa; } @@ -126,7 +115,7 @@ static int hns_roce_alloc_cqc(struct hns_roce_dev *hr_dev, return ret; } -void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct device *dev = hr_dev->dev; @@ -153,190 +142,86 @@ void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); } -static int get_cq_umem(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, - struct hns_roce_ib_create_cq ucmd, - struct ib_udata *udata) -{ - struct hns_roce_mtt *mtt = &hr_cq->mtt; - struct ib_umem **umem = &hr_cq->umem; - u32 npages; - int ret; - - *umem = ib_umem_get(&hr_dev->ib_dev, ucmd.buf_addr, hr_cq->buf_size, - IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(*umem)) - return PTR_ERR(*umem); - - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - mtt->mtt_type = MTT_TYPE_CQE; - else - mtt->mtt_type = MTT_TYPE_WQE; - - npages = DIV_ROUND_UP(ib_umem_page_count(*umem), - 1 << hr_dev->caps.cqe_buf_pg_sz); - ret = hns_roce_mtt_init(hr_dev, npages, hr_cq->page_shift, mtt); - if (ret) - goto err_buf; - - ret = hns_roce_ib_umem_write_mtt(hr_dev, mtt, *umem); - if (ret) - goto err_mtt; - - return 0; - -err_mtt: - hns_roce_mtt_cleanup(hr_dev, mtt); - -err_buf: - ib_umem_release(*umem); - return ret; -} - -static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct ib_udata *udata, unsigned long addr) { - struct hns_roce_buf *buf = &hr_cq->buf; - struct hns_roce_mtt *mtt = &hr_cq->mtt; - int ret; - - ret = hns_roce_buf_alloc(hr_dev, hr_cq->buf_size, - (1 << hr_cq->page_shift) * 2, - buf, hr_cq->page_shift); - if (ret) - goto out; - - if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) - mtt->mtt_type = MTT_TYPE_CQE; - else - mtt->mtt_type = MTT_TYPE_WQE; - - ret = hns_roce_mtt_init(hr_dev, buf->npages, buf->page_shift, mtt); - if (ret) - goto err_buf; - - ret = hns_roce_buf_write_mtt(hr_dev, mtt, buf); - if (ret) - goto err_mtt; - - return 0; - -err_mtt: - hns_roce_mtt_cleanup(hr_dev, mtt); - -err_buf: - hns_roce_buf_free(hr_dev, buf); - -out: - return ret; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_attr buf_attr = {}; + int err; + + buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + PAGE_ADDR_SHIFT; + buf_attr.region[0].size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; + buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; + buf_attr.region_count = 1; + buf_attr.fixed_page = true; + + err = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, + hr_dev->caps.cqe_ba_pg_sz + PAGE_ADDR_SHIFT, + udata, addr); + if (err) + ibdev_err(ibdev, "Failed to alloc CQ mtr, err %d\n", err); + + return err; } static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { - hns_roce_buf_free(hr_dev, &hr_cq->buf); + hns_roce_mtr_destroy(hr_dev, &hr_cq->mtr); } -static int create_user_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq, - struct ib_udata *udata, - struct hns_roce_ib_create_cq_resp *resp) +static int alloc_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct ib_udata *udata, unsigned long addr, + struct hns_roce_ib_create_cq_resp *resp) { - struct hns_roce_ib_create_cq ucmd; - struct device *dev = hr_dev->dev; - int ret; - struct hns_roce_ucontext *context = rdma_udata_to_drv_context( - udata, struct hns_roce_ucontext, ibucontext); - - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { - dev_err(dev, "Failed to copy_from_udata.\n"); - return -EFAULT; - } + bool has_db = hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB; + struct hns_roce_ucontext *uctx; + int err; - /* Get user space address, write it into mtt table */ - ret = get_cq_umem(hr_dev, hr_cq, ucmd, udata); - if (ret) { - dev_err(dev, "Failed to get_cq_umem.\n"); - return ret; - } - - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB && - udata->outlen >= offsetofend(typeof(*resp), cap_flags)) { - ret = hns_roce_db_map_user(context, udata, ucmd.db_addr, - &hr_cq->db); - if (ret) { - dev_err(dev, "cq record doorbell map failed!\n"); - goto err_mtt; + if (udata) { + if (has_db && + udata->outlen >= offsetofend(typeof(*resp), cap_flags)) { + uctx = rdma_udata_to_drv_context(udata, + struct hns_roce_ucontext, ibucontext); + err = hns_roce_db_map_user(uctx, udata, addr, + &hr_cq->db); + if (err) + return err; + hr_cq->db_en = 1; + resp->cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB; } - hr_cq->db_en = 1; - resp->cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB; - } - - return 0; - -err_mtt: - hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); - ib_umem_release(hr_cq->umem); - - return ret; -} - -static int create_kernel_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq) -{ - struct device *dev = hr_dev->dev; - int ret; - - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { - ret = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1); - if (ret) - return ret; - - hr_cq->set_ci_db = hr_cq->db.db_record; - *hr_cq->set_ci_db = 0; - hr_cq->db_en = 1; - } - - /* Init mtt table and write buff address to mtt table */ - ret = alloc_cq_buf(hr_dev, hr_cq); - if (ret) { - dev_err(dev, "Failed to alloc_cq_buf.\n"); - goto err_db; + } else { + if (has_db) { + err = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1); + if (err) + return err; + hr_cq->set_ci_db = hr_cq->db.db_record; + *hr_cq->set_ci_db = 0; + hr_cq->db_en = 1; + } + hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset + + DB_REG_OFFSET * hr_dev->priv_uar.index; } - hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset + - DB_REG_OFFSET * hr_dev->priv_uar.index; - return 0; - -err_db: - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) - hns_roce_free_db(hr_dev, &hr_cq->db); - - return ret; } -static void destroy_user_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq, - struct ib_udata *udata, - struct hns_roce_ib_create_cq_resp *resp) +static void free_cq_db(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct ib_udata *udata) { - struct hns_roce_ucontext *context = rdma_udata_to_drv_context( - udata, struct hns_roce_ucontext, ibucontext); + struct hns_roce_ucontext *uctx; - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB && - udata->outlen >= offsetofend(typeof(*resp), cap_flags)) - hns_roce_db_unmap_user(context, &hr_cq->db); - - hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); - ib_umem_release(hr_cq->umem); -} - -static void destroy_kernel_cq(struct hns_roce_dev *hr_dev, - struct hns_roce_cq *hr_cq) -{ - hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); - free_cq_buf(hr_dev, hr_cq); + if (!hr_cq->db_en) + return; - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) + hr_cq->db_en = 0; + if (udata) { + uctx = rdma_udata_to_drv_context(udata, + struct hns_roce_ucontext, + ibucontext); + hns_roce_db_unmap_user(uctx, &hr_cq->db); + } else { hns_roce_free_db(hr_dev, &hr_cq->db); + } } int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, @@ -345,20 +230,21 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_ib_create_cq_resp resp = {}; struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); - struct device *dev = hr_dev->dev; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_ib_create_cq ucmd = {}; int vector = attr->comp_vector; u32 cq_entries = attr->cqe; int ret; if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { - dev_err(dev, "Create CQ failed. entries=%d, max=%d\n", - cq_entries, hr_dev->caps.max_cqes); + ibdev_err(ibdev, "Failed to check CQ count %d max=%d\n", + cq_entries, hr_dev->caps.max_cqes); return -EINVAL; } if (vector >= hr_dev->caps.num_comp_vectors) { - dev_err(dev, "Create CQ failed, vector=%d, max=%d\n", - vector, hr_dev->caps.num_comp_vectors); + ibdev_err(ibdev, "Failed to check CQ vector=%d max=%d\n", + vector, hr_dev->caps.num_comp_vectors); return -EINVAL; } @@ -367,30 +253,35 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, hr_cq->ib_cq.cqe = cq_entries - 1; /* used as cqe index */ hr_cq->cq_depth = cq_entries; hr_cq->vector = vector; - hr_cq->buf_size = hr_cq->cq_depth * hr_dev->caps.cq_entry_sz; - hr_cq->page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; spin_lock_init(&hr_cq->lock); INIT_LIST_HEAD(&hr_cq->sq_list); INIT_LIST_HEAD(&hr_cq->rq_list); if (udata) { - ret = create_user_cq(hr_dev, hr_cq, udata, &resp); - if (ret) { - dev_err(dev, "Create cq failed in user mode!\n"); - goto err_cq; - } - } else { - ret = create_kernel_cq(hr_dev, hr_cq); + ret = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (ret) { - dev_err(dev, "Create cq failed in kernel mode!\n"); - goto err_cq; + ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n", + ret); + return ret; } } - ret = hns_roce_alloc_cqc(hr_dev, hr_cq); + ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr); + if (ret) { + ibdev_err(ibdev, "Failed to alloc CQ buf, err %d\n", ret); + return ret; + } + + ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp); if (ret) { - dev_err(dev, "Alloc CQ failed(%d).\n", ret); - goto err_dbmap; + ibdev_err(ibdev, "Failed to alloc CQ db, err %d\n", ret); + goto err_cq_buf; + } + + ret = alloc_cqc(hr_dev, hr_cq); + if (ret) { + ibdev_err(ibdev, "Failed to alloc CQ context, err %d\n", ret); + goto err_cq_db; } /* @@ -412,15 +303,11 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, return 0; err_cqc: - hns_roce_free_cqc(hr_dev, hr_cq); - -err_dbmap: - if (udata) - destroy_user_cq(hr_dev, hr_cq, udata, &resp); - else - destroy_kernel_cq(hr_dev, hr_cq); - -err_cq: + free_cqc(hr_dev, hr_cq); +err_cq_db: + free_cq_db(hr_dev, hr_cq, udata); +err_cq_buf: + free_cq_buf(hr_dev, hr_cq); return ret; } @@ -429,28 +316,12 @@ void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); - if (hr_dev->hw->destroy_cq) { + if (hr_dev->hw->destroy_cq) hr_dev->hw->destroy_cq(ib_cq, udata); - return; - } - - hns_roce_free_cqc(hr_dev, hr_cq); - hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); - ib_umem_release(hr_cq->umem); - if (udata) { - if (hr_cq->db_en == 1) - hns_roce_db_unmap_user(rdma_udata_to_drv_context( - udata, - struct hns_roce_ucontext, - ibucontext), - &hr_cq->db); - } else { - /* Free the buff of stored cq */ - free_cq_buf(hr_dev, hr_cq); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) - hns_roce_free_db(hr_dev, &hr_cq->db); - } + free_cq_buf(hr_dev, hr_cq); + free_cq_db(hr_dev, hr_cq, udata); + free_cqc(hr_dev, hr_cq); } void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 39af736..ecbfeb6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -503,14 +503,10 @@ struct hns_roce_db { struct hns_roce_cq { struct ib_cq ib_cq; - struct hns_roce_buf buf; - struct hns_roce_mtt mtt; + struct hns_roce_mtr mtr; struct hns_roce_db db; u8 db_en; spinlock_t lock; - struct ib_umem *umem; - u32 buf_size; - int page_shift; u32 cq_depth; u32 cons_index; u32 *set_ci_db; @@ -1294,8 +1290,6 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); void hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); -void hns_roce_free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq); - int hns_roce_db_map_user(struct hns_roce_ucontext *context, struct ib_udata *udata, unsigned long virt, struct hns_roce_db *db); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index ddf2a45..a1f053c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1972,7 +1972,8 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, static void *get_cqe(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V1_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(hr_cq->mtr.kmem, + n * HNS_ROCE_V1_CQE_ENTRY_SIZE); } static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n) @@ -3644,8 +3645,6 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) u32 cqe_cnt_cur; int wait_time = 0; - hns_roce_free_cqc(hr_dev, hr_cq); - /* * Before freeing cq buffer, we need to ensure that the outstanding CQE * have been written by checking the CQE counter. @@ -3668,14 +3667,6 @@ static void hns_roce_v1_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) } wait_time++; } - - hns_roce_mtt_cleanup(hr_dev, &hr_cq->mtt); - - ib_umem_release(hr_cq->umem); - if (!udata) { - /* Free the buff of stored cq */ - hns_roce_buf_free(hr_dev, &hr_cq->buf); - } } static void set_eq_cons_index_v1(struct hns_roce_eq *eq, int req_not) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 42be8a2..a1f7e36 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2680,7 +2680,8 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw) static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(&hr_cq->buf, n * HNS_ROCE_V2_CQE_ENTRY_SIZE); + return hns_roce_buf_offset(hr_cq->mtr.kmem, + n * HNS_ROCE_V2_CQE_ENTRY_SIZE); } static void *get_sw_cqe_v2(struct hns_roce_cq *hr_cq, int n) @@ -2801,30 +2802,30 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M, V2_CQC_BYTE_8_CQN_S, hr_cq->cqn); - cq_context->cqe_cur_blk_addr = cpu_to_le32(mtts[0] >> PAGE_ADDR_SHIFT); + cq_context->cqe_cur_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[0])); roce_set_field(cq_context->byte_16_hop_addr, V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M, V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S, - mtts[0] >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(mtts[0]))); roce_set_field(cq_context->byte_16_hop_addr, V2_CQC_BYTE_16_CQE_HOP_NUM_M, V2_CQC_BYTE_16_CQE_HOP_NUM_S, hr_dev->caps.cqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : hr_dev->caps.cqe_hop_num); - cq_context->cqe_nxt_blk_addr = cpu_to_le32(mtts[1] >> PAGE_ADDR_SHIFT); + cq_context->cqe_nxt_blk_addr = cpu_to_le32(to_hr_hw_page_addr(mtts[1])); roce_set_field(cq_context->byte_24_pgsz_addr, V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_M, V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_S, - mtts[1] >> (32 + PAGE_ADDR_SHIFT)); + upper_32_bits(to_hr_hw_page_addr(mtts[1]))); roce_set_field(cq_context->byte_24_pgsz_addr, V2_CQC_BYTE_24_CQE_BA_PG_SZ_M, V2_CQC_BYTE_24_CQE_BA_PG_SZ_S, - hr_dev->caps.cqe_ba_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.ba_pg_shift)); roce_set_field(cq_context->byte_24_pgsz_addr, V2_CQC_BYTE_24_CQE_BUF_PG_SZ_M, V2_CQC_BYTE_24_CQE_BUF_PG_SZ_S, - hr_dev->caps.cqe_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(hr_cq->mtr.hem_cfg.buf_pg_shift)); cq_context->cqe_ba = cpu_to_le32(dma_handle >> 3);