From patchwork Mon Mar 18 13:59:48 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Saleem, Shiraz" X-Patchwork-Id: 10857691 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id C9C336C2 for ; Mon, 18 Mar 2019 13:59:57 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id ADC4B29181 for ; Mon, 18 Mar 2019 13:59:57 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id A182D2918C; Mon, 18 Mar 2019 13:59:57 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 0CCF429181 for ; Mon, 18 Mar 2019 13:59:57 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726782AbfCRN74 (ORCPT ); Mon, 18 Mar 2019 09:59:56 -0400 Received: from mga01.intel.com ([192.55.52.88]:9775 "EHLO mga01.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726765AbfCRN74 (ORCPT ); Mon, 18 Mar 2019 09:59:56 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga008.fm.intel.com ([10.253.24.58]) by fmsmga101.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 18 Mar 2019 06:59:55 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.58,494,1544515200"; d="scan'208";a="132576622" Received: from ssaleem-mobl4.amr.corp.intel.com ([10.252.133.160]) by fmsmga008.fm.intel.com with ESMTP; 18 Mar 2019 06:59:54 -0700 From: Shiraz Saleem To: dledford@redhat.com, jgg@ziepe.ca Cc: linux-rdma@vger.kernel.org, Shiraz Saleem Subject: [PATCH v2 rdma-next] RDMA/umem: Combine contiguous PAGE_SIZE regions in SGEs Date: Mon, 18 Mar 2019 08:59:48 -0500 Message-Id: <20190318135948.17348-1-shiraz.saleem@intel.com> X-Mailer: git-send-email 2.8.3 Sender: linux-rdma-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-rdma@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP Combine contiguous regions of PAGE_SIZE pages into single scatter list entries while adding to the scatter table. This minimizes the number of the entries in the scatter list and reduces the DMA mapping overhead, particularly with the IOMMU. This patch should be applied post https://patchwork.kernel.org/cover/10857607/ Suggested-by: Jason Gunthorpe Reviewed-by: Michael J. Ruhl Signed-off-by: Shiraz Saleem Reviewed-by: Ira Weiny --- This patch is decoupled from series https://patchwork.kernel.org/patch/10819993/ as it can be standalone. Changes since v1: * made ib_umem_add_sg_table() static * simplified bool variable 'first' initialization drivers/infiniband/core/umem.c | 92 ++++++++++++++++++++++++++++++++++-------- include/rdma/ib_umem.h | 17 ++++---- 2 files changed, 84 insertions(+), 25 deletions(-) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index fe55515..0b71821 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -39,25 +39,22 @@ #include #include #include +#include #include #include "uverbs.h" - static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) { - struct scatterlist *sg; + struct sg_page_iter sg_iter; struct page *page; - int i; if (umem->nmap > 0) - ib_dma_unmap_sg(dev, umem->sg_head.sgl, - umem->npages, + ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents, DMA_BIDIRECTIONAL); - for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) { - - page = sg_page(sg); + for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->sg_nents, 0) { + page = sg_page_iter_page(&sg_iter); if (!PageDirty(page) && umem->writable && dirty) set_page_dirty_lock(page); put_page(page); @@ -66,6 +63,67 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d sg_free_table(&umem->sg_head); } +/* ib_umem_add_sg_table - Add N contiguous pages to scatter table + * + * sg: current scatterlist entry + * page_list: array of npage struct page pointers + * npages: number of pages in page_list + * nents: [out] number of entries in the scatterlist + * + * Return new end of scatterlist + */ +static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg, + struct page **page_list, + unsigned long npages, + int *nents) +{ + unsigned long first_pfn; + unsigned long i = 0; + bool update_cur_sg = false; + bool first = !sg_page(sg); + + /* Check if new page_list is contiguous with end of previous page_list + */ + if (!first && + (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) == + page_to_pfn(page_list[0]))) + update_cur_sg = true; + + while (i != npages) { + unsigned long len; + struct page *first_page = page_list[i]; + + first_pfn = page_to_pfn(first_page); + + /* Compute the number of contiguous pages we have starting + * at i + */ + for (len = 0; i != npages && + first_pfn + len == page_to_pfn(page_list[i]); + i++, len++) + ; + + /* Squash N contiguous pages from page_list into current sge */ + if (update_cur_sg) { + sg->length += len << PAGE_SHIFT; + sg_set_page(sg, sg_page(sg), sg->length, 0); + update_cur_sg = false; + continue; + } + + /* Squash N contiguous pages into next sge or first sge */ + if (!first) + sg = sg_next(sg); + + (*nents)++; + sg->length = len << PAGE_SHIFT; + sg_set_page(sg, first_page, sg->length, 0); + first = false; + } + + return sg; +} + /** * ib_umem_get - Pin and DMA map userspace memory. * @@ -93,7 +151,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, int ret; int i; unsigned long dma_attrs = 0; - struct scatterlist *sg, *sg_list_start; + struct scatterlist *sg; unsigned int gup_flags = FOLL_WRITE; if (!udata) @@ -185,7 +243,7 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, if (!umem->writable) gup_flags |= FOLL_FORCE; - sg_list_start = umem->sg_head.sgl; + sg = umem->sg_head.sgl; while (npages) { down_read(&mm->mmap_sem); @@ -202,24 +260,24 @@ struct ib_umem *ib_umem_get(struct ib_udata *udata, unsigned long addr, cur_base += ret * PAGE_SIZE; npages -= ret; + sg = ib_umem_add_sg_table(sg, page_list, ret, &umem->sg_nents); + /* Continue to hold the mmap_sem as vma_list access * needs to be protected. */ - for_each_sg(sg_list_start, sg, ret, i) { + for (i = 0; i < ret && umem->hugetlb; i++) { if (vma_list && !is_vm_hugetlb_page(vma_list[i])) umem->hugetlb = 0; - - sg_set_page(sg, page_list[i], PAGE_SIZE, 0); } - up_read(&mm->mmap_sem); - /* preparing for next loop */ - sg_list_start = sg; + up_read(&mm->mmap_sem); } + sg_mark_end(sg); + umem->nmap = ib_dma_map_sg_attrs(context->device, umem->sg_head.sgl, - umem->npages, + umem->sg_nents, DMA_BIDIRECTIONAL, dma_attrs); diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 73af05d..18407a4 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -42,18 +42,19 @@ struct ib_umem_odp; struct ib_umem { - struct ib_ucontext *context; - struct mm_struct *owning_mm; - size_t length; - unsigned long address; - int page_shift; + struct ib_ucontext *context; + struct mm_struct *owning_mm; + size_t length; + unsigned long address; + int page_shift; u32 writable : 1; u32 hugetlb : 1; u32 is_odp : 1; - struct work_struct work; + struct work_struct work; struct sg_table sg_head; - int nmap; - int npages; + int sg_nents; + int nmap; + int npages; }; /* Returns the offset of the umem start relative to the first page. */