Message ID | 20200927064647.3106737-5-leon@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Dynamicaly allocate SG table from the pages | expand |
On Sun, Sep 27, 2020 at 09:46:47AM +0300, Leon Romanovsky wrote: > @@ -296,11 +223,17 @@ static struct ib_umem *__ib_umem_get(struct ib_device *device, > goto umem_release; > > cur_base += ret * PAGE_SIZE; > - npages -= ret; > - > - sg = ib_umem_add_sg_table(sg, page_list, ret, > - dma_get_max_seg_size(device->dma_device), > - &umem->sg_nents); > + npages -= ret; > + sg = __sg_alloc_table_from_pages( > + &umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT, > + dma_get_max_seg_size(device->dma_device), sg, npages, > + GFP_KERNEL); > + umem->sg_nents = umem->sg_head.nents; > + if (IS_ERR(sg)) { > + unpin_user_pages_dirty_lock(page_list, ret, 0); > + ret = PTR_ERR(sg); > + goto umem_release; > + } > } > > sg_mark_end(sg); Does it still need the sg_mark_end? Jason
On Tue, Sep 29, 2020 at 04:59:29PM -0300, Jason Gunthorpe wrote: > On Sun, Sep 27, 2020 at 09:46:47AM +0300, Leon Romanovsky wrote: > > @@ -296,11 +223,17 @@ static struct ib_umem *__ib_umem_get(struct ib_device *device, > > goto umem_release; > > > > cur_base += ret * PAGE_SIZE; > > - npages -= ret; > > - > > - sg = ib_umem_add_sg_table(sg, page_list, ret, > > - dma_get_max_seg_size(device->dma_device), > > - &umem->sg_nents); > > + npages -= ret; > > + sg = __sg_alloc_table_from_pages( > > + &umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT, > > + dma_get_max_seg_size(device->dma_device), sg, npages, > > + GFP_KERNEL); > > + umem->sg_nents = umem->sg_head.nents; > > + if (IS_ERR(sg)) { > > + unpin_user_pages_dirty_lock(page_list, ret, 0); > > + ret = PTR_ERR(sg); > > + goto umem_release; > > + } > > } > > > > sg_mark_end(sg); > > Does it still need the sg_mark_end? It is preserved here for correctness, the release logic doesn't rely on this marker, but it is better to leave it. Thanks > > Jason
On Wed, Sep 30, 2020 at 12:53:21PM +0300, Leon Romanovsky wrote: > On Tue, Sep 29, 2020 at 04:59:29PM -0300, Jason Gunthorpe wrote: > > On Sun, Sep 27, 2020 at 09:46:47AM +0300, Leon Romanovsky wrote: > > > @@ -296,11 +223,17 @@ static struct ib_umem *__ib_umem_get(struct ib_device *device, > > > goto umem_release; > > > > > > cur_base += ret * PAGE_SIZE; > > > - npages -= ret; > > > - > > > - sg = ib_umem_add_sg_table(sg, page_list, ret, > > > - dma_get_max_seg_size(device->dma_device), > > > - &umem->sg_nents); > > > + npages -= ret; > > > + sg = __sg_alloc_table_from_pages( > > > + &umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT, > > > + dma_get_max_seg_size(device->dma_device), sg, npages, > > > + GFP_KERNEL); > > > + umem->sg_nents = umem->sg_head.nents; > > > + if (IS_ERR(sg)) { > > > + unpin_user_pages_dirty_lock(page_list, ret, 0); > > > + ret = PTR_ERR(sg); > > > + goto umem_release; > > > + } > > > } > > > > > > sg_mark_end(sg); > > > > Does it still need the sg_mark_end? > > It is preserved here for correctness, the release logic doesn't rely on > this marker, but it is better to leave it. I mean, my read of __sg_alloc_table_from_pages() is that it already placed it, the final __alloc_table() does it? Jason
On 9/30/2020 2:45 PM, Jason Gunthorpe wrote: > On Wed, Sep 30, 2020 at 12:53:21PM +0300, Leon Romanovsky wrote: >> On Tue, Sep 29, 2020 at 04:59:29PM -0300, Jason Gunthorpe wrote: >>> On Sun, Sep 27, 2020 at 09:46:47AM +0300, Leon Romanovsky wrote: >>>> @@ -296,11 +223,17 @@ static struct ib_umem *__ib_umem_get(struct ib_device *device, >>>> goto umem_release; >>>> >>>> cur_base += ret * PAGE_SIZE; >>>> - npages -= ret; >>>> - >>>> - sg = ib_umem_add_sg_table(sg, page_list, ret, >>>> - dma_get_max_seg_size(device->dma_device), >>>> - &umem->sg_nents); >>>> + npages -= ret; >>>> + sg = __sg_alloc_table_from_pages( >>>> + &umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT, >>>> + dma_get_max_seg_size(device->dma_device), sg, npages, >>>> + GFP_KERNEL); >>>> + umem->sg_nents = umem->sg_head.nents; >>>> + if (IS_ERR(sg)) { >>>> + unpin_user_pages_dirty_lock(page_list, ret, 0); >>>> + ret = PTR_ERR(sg); >>>> + goto umem_release; >>>> + } >>>> } >>>> >>>> sg_mark_end(sg); >>> Does it still need the sg_mark_end? >> It is preserved here for correctness, the release logic doesn't rely on >> this marker, but it is better to leave it. > I mean, my read of __sg_alloc_table_from_pages() is that it already > placed it, the final __alloc_table() does it? > > Jason It marks the last allocated sge, but not the last populated sge (with page).
On Wed, Sep 30, 2020 at 02:53:58PM +0300, Maor Gottlieb wrote: > > On 9/30/2020 2:45 PM, Jason Gunthorpe wrote: > > On Wed, Sep 30, 2020 at 12:53:21PM +0300, Leon Romanovsky wrote: > > > On Tue, Sep 29, 2020 at 04:59:29PM -0300, Jason Gunthorpe wrote: > > > > On Sun, Sep 27, 2020 at 09:46:47AM +0300, Leon Romanovsky wrote: > > > > > @@ -296,11 +223,17 @@ static struct ib_umem *__ib_umem_get(struct ib_device *device, > > > > > goto umem_release; > > > > > > > > > > cur_base += ret * PAGE_SIZE; > > > > > - npages -= ret; > > > > > - > > > > > - sg = ib_umem_add_sg_table(sg, page_list, ret, > > > > > - dma_get_max_seg_size(device->dma_device), > > > > > - &umem->sg_nents); > > > > > + npages -= ret; > > > > > + sg = __sg_alloc_table_from_pages( > > > > > + &umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT, > > > > > + dma_get_max_seg_size(device->dma_device), sg, npages, > > > > > + GFP_KERNEL); > > > > > + umem->sg_nents = umem->sg_head.nents; > > > > > + if (IS_ERR(sg)) { > > > > > + unpin_user_pages_dirty_lock(page_list, ret, 0); > > > > > + ret = PTR_ERR(sg); > > > > > + goto umem_release; > > > > > + } > > > > > } > > > > > > > > > > sg_mark_end(sg); > > > > Does it still need the sg_mark_end? > > > It is preserved here for correctness, the release logic doesn't rely on > > > this marker, but it is better to leave it. > > I mean, my read of __sg_alloc_table_from_pages() is that it already > > placed it, the final __alloc_table() does it? > > It marks the last allocated sge, but not the last populated sge (with page). Why are those different? It looks like the last iteration calls __alloc_table() with an exact number of sges + if (!prv) { + /* Only the last allocation could be less than the maximum */ + table_size = left_pages ? SG_MAX_SINGLE_ALLOC : chunks; + ret = sg_alloc_table(sgt, table_size, gfp_mask); + if (unlikely(ret)) + return ERR_PTR(ret); + } Jason
On 9/30/2020 2:58 PM, Jason Gunthorpe wrote: > On Wed, Sep 30, 2020 at 02:53:58PM +0300, Maor Gottlieb wrote: >> On 9/30/2020 2:45 PM, Jason Gunthorpe wrote: >>> On Wed, Sep 30, 2020 at 12:53:21PM +0300, Leon Romanovsky wrote: >>>> On Tue, Sep 29, 2020 at 04:59:29PM -0300, Jason Gunthorpe wrote: >>>>> On Sun, Sep 27, 2020 at 09:46:47AM +0300, Leon Romanovsky wrote: >>>>>> @@ -296,11 +223,17 @@ static struct ib_umem *__ib_umem_get(struct ib_device *device, >>>>>> goto umem_release; >>>>>> >>>>>> cur_base += ret * PAGE_SIZE; >>>>>> - npages -= ret; >>>>>> - >>>>>> - sg = ib_umem_add_sg_table(sg, page_list, ret, >>>>>> - dma_get_max_seg_size(device->dma_device), >>>>>> - &umem->sg_nents); >>>>>> + npages -= ret; >>>>>> + sg = __sg_alloc_table_from_pages( >>>>>> + &umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT, >>>>>> + dma_get_max_seg_size(device->dma_device), sg, npages, >>>>>> + GFP_KERNEL); >>>>>> + umem->sg_nents = umem->sg_head.nents; >>>>>> + if (IS_ERR(sg)) { >>>>>> + unpin_user_pages_dirty_lock(page_list, ret, 0); >>>>>> + ret = PTR_ERR(sg); >>>>>> + goto umem_release; >>>>>> + } >>>>>> } >>>>>> >>>>>> sg_mark_end(sg); >>>>> Does it still need the sg_mark_end? >>>> It is preserved here for correctness, the release logic doesn't rely on >>>> this marker, but it is better to leave it. >>> I mean, my read of __sg_alloc_table_from_pages() is that it already >>> placed it, the final __alloc_table() does it? >> It marks the last allocated sge, but not the last populated sge (with page). > Why are those different? > > It looks like the last iteration calls __alloc_table() with an exact > number of sges > > + if (!prv) { > + /* Only the last allocation could be less than the maximum */ > + table_size = left_pages ? SG_MAX_SINGLE_ALLOC : chunks; > + ret = sg_alloc_table(sgt, table_size, gfp_mask); > + if (unlikely(ret)) > + return ERR_PTR(ret); > + } > > Jason This is right only for the last iteration. E.g. in the first iteration in case that there are more pages (left_pages), then we allocate SG_MAX_SINGLE_ALLOC. We don't know how many pages from the second iteration will be squashed to the SGE from the first iteration.
On Wed, Sep 30, 2020 at 06:05:15PM +0300, Maor Gottlieb wrote: > This is right only for the last iteration. E.g. in the first iteration in > case that there are more pages (left_pages), then we allocate > SG_MAX_SINGLE_ALLOC. We don't know how many pages from the second iteration > will be squashed to the SGE from the first iteration. Well, it is 0 or 1 SGE's. Check if the first page is mergable and subtract one from the required length? I dislike this sg_mark_end() it is something that should be internal, IMHO. Jason
On 9/30/2020 6:14 PM, Jason Gunthorpe wrote: > On Wed, Sep 30, 2020 at 06:05:15PM +0300, Maor Gottlieb wrote: >> This is right only for the last iteration. E.g. in the first iteration in >> case that there are more pages (left_pages), then we allocate >> SG_MAX_SINGLE_ALLOC. We don't know how many pages from the second iteration >> will be squashed to the SGE from the first iteration. > Well, it is 0 or 1 SGE's. Check if the first page is mergable and > subtract one from the required length? > > I dislike this sg_mark_end() it is something that should be internal, > IMHO. I can move it to __sg_alloc_table_from_pages: sgt->nents = tmp_nents; + if (!left_pages) + sg_mark_end(s); out: return s; > > Jason
On Wed, Sep 30, 2020 at 12:14:06PM -0300, Jason Gunthorpe wrote: > On Wed, Sep 30, 2020 at 06:05:15PM +0300, Maor Gottlieb wrote: > > This is right only for the last iteration. E.g. in the first iteration in > > case that there are more pages (left_pages), then we allocate > > SG_MAX_SINGLE_ALLOC. We don't know how many pages from the second iteration > > will be squashed to the SGE from the first iteration. > > Well, it is 0 or 1 SGE's. Check if the first page is mergable and > subtract one from the required length? > > I dislike this sg_mark_end() it is something that should be internal, > IMHO. I don't think so, but Maor provided possible solution. Can you take the patches? Thanks > > Jason
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 01b680b62846..0ef736970aba 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -63,73 +63,6 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d sg_free_table(&umem->sg_head); } -/* ib_umem_add_sg_table - Add N contiguous pages to scatter table - * - * sg: current scatterlist entry - * page_list: array of npage struct page pointers - * npages: number of pages in page_list - * max_seg_sz: maximum segment size in bytes - * nents: [out] number of entries in the scatterlist - * - * Return new end of scatterlist - */ -static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg, - struct page **page_list, - unsigned long npages, - unsigned int max_seg_sz, - int *nents) -{ - unsigned long first_pfn; - unsigned long i = 0; - bool update_cur_sg = false; - bool first = !sg_page(sg); - - /* Check if new page_list is contiguous with end of previous page_list. - * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0. - */ - if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) == - page_to_pfn(page_list[0]))) - update_cur_sg = true; - - while (i != npages) { - unsigned long len; - struct page *first_page = page_list[i]; - - first_pfn = page_to_pfn(first_page); - - /* Compute the number of contiguous pages we have starting - * at i - */ - for (len = 0; i != npages && - first_pfn + len == page_to_pfn(page_list[i]) && - len < (max_seg_sz >> PAGE_SHIFT); - len++) - i++; - - /* Squash N contiguous pages from page_list into current sge */ - if (update_cur_sg) { - if ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT)) { - sg_set_page(sg, sg_page(sg), - sg->length + (len << PAGE_SHIFT), - 0); - update_cur_sg = false; - continue; - } - update_cur_sg = false; - } - - /* Squash N contiguous pages into next sge or first sge */ - if (!first) - sg = sg_next(sg); - - (*nents)++; - sg_set_page(sg, first_page, len << PAGE_SHIFT, 0); - first = false; - } - - return sg; -} - /** * ib_umem_find_best_pgsz - Find best HW page size to use for this MR * @@ -221,7 +154,7 @@ static struct ib_umem *__ib_umem_get(struct ib_device *device, struct mm_struct *mm; unsigned long npages; int ret; - struct scatterlist *sg; + struct scatterlist *sg = NULL; unsigned int gup_flags = FOLL_WRITE; /* @@ -276,15 +209,9 @@ static struct ib_umem *__ib_umem_get(struct ib_device *device, cur_base = addr & PAGE_MASK; - ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); - if (ret) - goto vma; - if (!umem->writable) gup_flags |= FOLL_FORCE; - sg = umem->sg_head.sgl; - while (npages) { cond_resched(); ret = pin_user_pages_fast(cur_base, @@ -296,11 +223,17 @@ static struct ib_umem *__ib_umem_get(struct ib_device *device, goto umem_release; cur_base += ret * PAGE_SIZE; - npages -= ret; - - sg = ib_umem_add_sg_table(sg, page_list, ret, - dma_get_max_seg_size(device->dma_device), - &umem->sg_nents); + npages -= ret; + sg = __sg_alloc_table_from_pages( + &umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT, + dma_get_max_seg_size(device->dma_device), sg, npages, + GFP_KERNEL); + umem->sg_nents = umem->sg_head.nents; + if (IS_ERR(sg)) { + unpin_user_pages_dirty_lock(page_list, ret, 0); + ret = PTR_ERR(sg); + goto umem_release; + } } sg_mark_end(sg); @@ -322,7 +255,6 @@ static struct ib_umem *__ib_umem_get(struct ib_device *device, umem_release: __ib_umem_release(device, umem, 0); -vma: atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm); out: free_page((unsigned long) page_list);