Message ID | 1516168494-13506-1-git-send-email-Hongbo.He@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Am 17.01.2018 um 06:54 schrieb Roger He: > add this for correctly updating global mem count in ttm_mem_zone. > before that when ttm_mem_global_alloc_page fails, we would update all > dma_page's global mem count in ttm_dma->pages_list. but actually here > we should not update for the last dma_page. > > Signed-off-by: Roger He <Hongbo.He@amd.com> > --- > drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 57 +++++++++++++++++--------------- > 1 file changed, 31 insertions(+), 26 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c > index c7f01a4..3e78ea4 100644 > --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c > +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c > @@ -121,6 +121,8 @@ struct dma_pool { > * @vaddr: The virtual address of the page and a flag if the page belongs to a > * huge pool > * @dma: The bus address of the page. If the page is not allocated > + * @updated_glob_count: to indicate whether we already updated global > + * memory count in ttm_mem_zone > * via the DMA API, it will be -1. > */ > struct dma_page { > @@ -128,6 +130,7 @@ struct dma_page { > unsigned long vaddr; > struct page *p; > dma_addr_t dma; > + bool updated_glob_count; That is not a good idea at all because it massively increases how much memory we will use. Please put that in the vaddr instead. The lower bits should always be zero, so we already put the VADDR_FLAG_HUGE_POOL flag in there. Apart from that it looks good to me. Regards, Christian. > }; > > /* > @@ -874,18 +877,18 @@ static int ttm_dma_page_pool_fill_locked(struct dma_pool *pool, > } > > /* > - * @return count of pages still required to fulfill the request. > * The populate list is actually a stack (not that is matters as TTM > * allocates one page at a time. > + * return dma_page pointer if success, otherwise NULL. > */ > -static int ttm_dma_pool_get_pages(struct dma_pool *pool, > +static struct dma_page *ttm_dma_pool_get_pages(struct dma_pool *pool, > struct ttm_dma_tt *ttm_dma, > unsigned index) > { > - struct dma_page *d_page; > + struct dma_page *d_page = NULL; > struct ttm_tt *ttm = &ttm_dma->ttm; > unsigned long irq_flags; > - int count, r = -ENOMEM; > + int count; > > spin_lock_irqsave(&pool->lock, irq_flags); > count = ttm_dma_page_pool_fill_locked(pool, &irq_flags); > @@ -894,12 +897,11 @@ static int ttm_dma_pool_get_pages(struct dma_pool *pool, > ttm->pages[index] = d_page->p; > ttm_dma->dma_address[index] = d_page->dma; > list_move_tail(&d_page->page_list, &ttm_dma->pages_list); > - r = 0; > pool->npages_in_use += 1; > pool->npages_free -= 1; > } > spin_unlock_irqrestore(&pool->lock, irq_flags); > - return r; > + return d_page; > } > > static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool huge) > @@ -934,6 +936,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, > struct ttm_mem_global *mem_glob = ttm->glob->mem_glob; > unsigned long num_pages = ttm->num_pages; > struct dma_pool *pool; > + struct dma_page *d_page; > enum pool_type type; > unsigned i; > int ret; > @@ -962,8 +965,8 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, > while (num_pages >= HPAGE_PMD_NR) { > unsigned j; > > - ret = ttm_dma_pool_get_pages(pool, ttm_dma, i); > - if (ret != 0) > + d_page = ttm_dma_pool_get_pages(pool, ttm_dma, i); > + if (!d_page) > break; > > ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i], > @@ -973,6 +976,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, > return -ENOMEM; > } > > + d_page->updated_glob_count = true; > for (j = i + 1; j < (i + HPAGE_PMD_NR); ++j) { > ttm->pages[j] = ttm->pages[j - 1] + 1; > ttm_dma->dma_address[j] = ttm_dma->dma_address[j - 1] + > @@ -996,8 +1000,8 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, > } > > while (num_pages) { > - ret = ttm_dma_pool_get_pages(pool, ttm_dma, i); > - if (ret != 0) { > + d_page = ttm_dma_pool_get_pages(pool, ttm_dma, i); > + if (!d_page) { > ttm_dma_unpopulate(ttm_dma, dev); > return -ENOMEM; > } > @@ -1009,6 +1013,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, > return -ENOMEM; > } > > + d_page->updated_glob_count = true; > ++i; > --num_pages; > } > @@ -1049,8 +1054,11 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) > continue; > > count++; > - ttm_mem_global_free_page(ttm->glob->mem_glob, > - d_page->p, pool->size); > + if (d_page->updated_glob_count) { > + ttm_mem_global_free_page(ttm->glob->mem_glob, > + d_page->p, pool->size); > + d_page->updated_glob_count = false; > + } > ttm_dma_page_put(pool, d_page); > } > > @@ -1070,9 +1078,19 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) > > /* make sure pages array match list and count number of pages */ > count = 0; > - list_for_each_entry(d_page, &ttm_dma->pages_list, page_list) { > + list_for_each_entry_safe(d_page, next, &ttm_dma->pages_list, > + page_list) { > ttm->pages[count] = d_page->p; > count++; > + > + if (d_page->updated_glob_count) { > + ttm_mem_global_free_page(ttm->glob->mem_glob, > + d_page->p, pool->size); > + d_page->updated_glob_count = false; > + } > + > + if (is_cached) > + ttm_dma_page_put(pool, d_page); > } > > spin_lock_irqsave(&pool->lock, irq_flags); > @@ -1092,19 +1110,6 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) > } > spin_unlock_irqrestore(&pool->lock, irq_flags); > > - if (is_cached) { > - list_for_each_entry_safe(d_page, next, &ttm_dma->pages_list, page_list) { > - ttm_mem_global_free_page(ttm->glob->mem_glob, > - d_page->p, pool->size); > - ttm_dma_page_put(pool, d_page); > - } > - } else { > - for (i = 0; i < count; i++) { > - ttm_mem_global_free_page(ttm->glob->mem_glob, > - ttm->pages[i], pool->size); > - } > - } > - > INIT_LIST_HEAD(&ttm_dma->pages_list); > for (i = 0; i < ttm->num_pages; i++) { > ttm->pages[i] = NULL;
-----Original Message----- From: Koenig, Christian Sent: Wednesday, January 17, 2018 3:37 PM To: He, Roger <Hongbo.He@amd.com>; dri-devel@lists.freedesktop.org Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com> Subject: Re: [PATCH] drm/ttm: add updated_glob_count in dma_page Am 17.01.2018 um 06:54 schrieb Roger He: > add this for correctly updating global mem count in ttm_mem_zone. > before that when ttm_mem_global_alloc_page fails, we would update all > dma_page's global mem count in ttm_dma->pages_list. but actually here > we should not update for the last dma_page. > > Signed-off-by: Roger He <Hongbo.He@amd.com> > --- > drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 57 +++++++++++++++++--------------- > 1 file changed, 31 insertions(+), 26 deletions(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c > b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c > index c7f01a4..3e78ea4 100644 > --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c > +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c > @@ -121,6 +121,8 @@ struct dma_pool { > * @vaddr: The virtual address of the page and a flag if the page belongs to a > * huge pool > * @dma: The bus address of the page. If the page is not allocated > + * @updated_glob_count: to indicate whether we already updated global > + * memory count in ttm_mem_zone > * via the DMA API, it will be -1. > */ > struct dma_page { > @@ -128,6 +130,7 @@ struct dma_page { > unsigned long vaddr; > struct page *p; > dma_addr_t dma; > + bool updated_glob_count; That is not a good idea at all because it massively increases how much memory we will use. Please put that in the vaddr instead. The lower bits should always be zero, so we already put the VADDR_FLAG_HUGE_POOL flag in there. Good idea, I will try. Thanks Roger(Hongbo.He) > }; > > /* > @@ -874,18 +877,18 @@ static int ttm_dma_page_pool_fill_locked(struct dma_pool *pool, > } > > /* > - * @return count of pages still required to fulfill the request. > * The populate list is actually a stack (not that is matters as TTM > * allocates one page at a time. > + * return dma_page pointer if success, otherwise NULL. > */ > -static int ttm_dma_pool_get_pages(struct dma_pool *pool, > +static struct dma_page *ttm_dma_pool_get_pages(struct dma_pool *pool, > struct ttm_dma_tt *ttm_dma, > unsigned index) > { > - struct dma_page *d_page; > + struct dma_page *d_page = NULL; > struct ttm_tt *ttm = &ttm_dma->ttm; > unsigned long irq_flags; > - int count, r = -ENOMEM; > + int count; > > spin_lock_irqsave(&pool->lock, irq_flags); > count = ttm_dma_page_pool_fill_locked(pool, &irq_flags); @@ -894,12 > +897,11 @@ static int ttm_dma_pool_get_pages(struct dma_pool *pool, > ttm->pages[index] = d_page->p; > ttm_dma->dma_address[index] = d_page->dma; > list_move_tail(&d_page->page_list, &ttm_dma->pages_list); > - r = 0; > pool->npages_in_use += 1; > pool->npages_free -= 1; > } > spin_unlock_irqrestore(&pool->lock, irq_flags); > - return r; > + return d_page; > } > > static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool > huge) @@ -934,6 +936,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, > struct ttm_mem_global *mem_glob = ttm->glob->mem_glob; > unsigned long num_pages = ttm->num_pages; > struct dma_pool *pool; > + struct dma_page *d_page; > enum pool_type type; > unsigned i; > int ret; > @@ -962,8 +965,8 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, > while (num_pages >= HPAGE_PMD_NR) { > unsigned j; > > - ret = ttm_dma_pool_get_pages(pool, ttm_dma, i); > - if (ret != 0) > + d_page = ttm_dma_pool_get_pages(pool, ttm_dma, i); > + if (!d_page) > break; > > ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i], @@ -973,6 > +976,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, > return -ENOMEM; > } > > + d_page->updated_glob_count = true; > for (j = i + 1; j < (i + HPAGE_PMD_NR); ++j) { > ttm->pages[j] = ttm->pages[j - 1] + 1; > ttm_dma->dma_address[j] = ttm_dma->dma_address[j - 1] + @@ -996,8 > +1000,8 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, > } > > while (num_pages) { > - ret = ttm_dma_pool_get_pages(pool, ttm_dma, i); > - if (ret != 0) { > + d_page = ttm_dma_pool_get_pages(pool, ttm_dma, i); > + if (!d_page) { > ttm_dma_unpopulate(ttm_dma, dev); > return -ENOMEM; > } > @@ -1009,6 +1013,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, > return -ENOMEM; > } > > + d_page->updated_glob_count = true; > ++i; > --num_pages; > } > @@ -1049,8 +1054,11 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) > continue; > > count++; > - ttm_mem_global_free_page(ttm->glob->mem_glob, > - d_page->p, pool->size); > + if (d_page->updated_glob_count) { > + ttm_mem_global_free_page(ttm->glob->mem_glob, > + d_page->p, pool->size); > + d_page->updated_glob_count = false; > + } > ttm_dma_page_put(pool, d_page); > } > > @@ -1070,9 +1078,19 @@ void ttm_dma_unpopulate(struct ttm_dma_tt > *ttm_dma, struct device *dev) > > /* make sure pages array match list and count number of pages */ > count = 0; > - list_for_each_entry(d_page, &ttm_dma->pages_list, page_list) { > + list_for_each_entry_safe(d_page, next, &ttm_dma->pages_list, > + page_list) { > ttm->pages[count] = d_page->p; > count++; > + > + if (d_page->updated_glob_count) { > + ttm_mem_global_free_page(ttm->glob->mem_glob, > + d_page->p, pool->size); > + d_page->updated_glob_count = false; > + } > + > + if (is_cached) > + ttm_dma_page_put(pool, d_page); > } > > spin_lock_irqsave(&pool->lock, irq_flags); @@ -1092,19 +1110,6 @@ > void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) > } > spin_unlock_irqrestore(&pool->lock, irq_flags); > > - if (is_cached) { > - list_for_each_entry_safe(d_page, next, &ttm_dma->pages_list, page_list) { > - ttm_mem_global_free_page(ttm->glob->mem_glob, > - d_page->p, pool->size); > - ttm_dma_page_put(pool, d_page); > - } > - } else { > - for (i = 0; i < count; i++) { > - ttm_mem_global_free_page(ttm->glob->mem_glob, > - ttm->pages[i], pool->size); > - } > - } > - > INIT_LIST_HEAD(&ttm_dma->pages_list); > for (i = 0; i < ttm->num_pages; i++) { > ttm->pages[i] = NULL;
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c index c7f01a4..3e78ea4 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c @@ -121,6 +121,8 @@ struct dma_pool { * @vaddr: The virtual address of the page and a flag if the page belongs to a * huge pool * @dma: The bus address of the page. If the page is not allocated + * @updated_glob_count: to indicate whether we already updated global + * memory count in ttm_mem_zone * via the DMA API, it will be -1. */ struct dma_page { @@ -128,6 +130,7 @@ struct dma_page { unsigned long vaddr; struct page *p; dma_addr_t dma; + bool updated_glob_count; }; /* @@ -874,18 +877,18 @@ static int ttm_dma_page_pool_fill_locked(struct dma_pool *pool, } /* - * @return count of pages still required to fulfill the request. * The populate list is actually a stack (not that is matters as TTM * allocates one page at a time. + * return dma_page pointer if success, otherwise NULL. */ -static int ttm_dma_pool_get_pages(struct dma_pool *pool, +static struct dma_page *ttm_dma_pool_get_pages(struct dma_pool *pool, struct ttm_dma_tt *ttm_dma, unsigned index) { - struct dma_page *d_page; + struct dma_page *d_page = NULL; struct ttm_tt *ttm = &ttm_dma->ttm; unsigned long irq_flags; - int count, r = -ENOMEM; + int count; spin_lock_irqsave(&pool->lock, irq_flags); count = ttm_dma_page_pool_fill_locked(pool, &irq_flags); @@ -894,12 +897,11 @@ static int ttm_dma_pool_get_pages(struct dma_pool *pool, ttm->pages[index] = d_page->p; ttm_dma->dma_address[index] = d_page->dma; list_move_tail(&d_page->page_list, &ttm_dma->pages_list); - r = 0; pool->npages_in_use += 1; pool->npages_free -= 1; } spin_unlock_irqrestore(&pool->lock, irq_flags); - return r; + return d_page; } static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool huge) @@ -934,6 +936,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, struct ttm_mem_global *mem_glob = ttm->glob->mem_glob; unsigned long num_pages = ttm->num_pages; struct dma_pool *pool; + struct dma_page *d_page; enum pool_type type; unsigned i; int ret; @@ -962,8 +965,8 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, while (num_pages >= HPAGE_PMD_NR) { unsigned j; - ret = ttm_dma_pool_get_pages(pool, ttm_dma, i); - if (ret != 0) + d_page = ttm_dma_pool_get_pages(pool, ttm_dma, i); + if (!d_page) break; ret = ttm_mem_global_alloc_page(mem_glob, ttm->pages[i], @@ -973,6 +976,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, return -ENOMEM; } + d_page->updated_glob_count = true; for (j = i + 1; j < (i + HPAGE_PMD_NR); ++j) { ttm->pages[j] = ttm->pages[j - 1] + 1; ttm_dma->dma_address[j] = ttm_dma->dma_address[j - 1] + @@ -996,8 +1000,8 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, } while (num_pages) { - ret = ttm_dma_pool_get_pages(pool, ttm_dma, i); - if (ret != 0) { + d_page = ttm_dma_pool_get_pages(pool, ttm_dma, i); + if (!d_page) { ttm_dma_unpopulate(ttm_dma, dev); return -ENOMEM; } @@ -1009,6 +1013,7 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev, return -ENOMEM; } + d_page->updated_glob_count = true; ++i; --num_pages; } @@ -1049,8 +1054,11 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) continue; count++; - ttm_mem_global_free_page(ttm->glob->mem_glob, - d_page->p, pool->size); + if (d_page->updated_glob_count) { + ttm_mem_global_free_page(ttm->glob->mem_glob, + d_page->p, pool->size); + d_page->updated_glob_count = false; + } ttm_dma_page_put(pool, d_page); } @@ -1070,9 +1078,19 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) /* make sure pages array match list and count number of pages */ count = 0; - list_for_each_entry(d_page, &ttm_dma->pages_list, page_list) { + list_for_each_entry_safe(d_page, next, &ttm_dma->pages_list, + page_list) { ttm->pages[count] = d_page->p; count++; + + if (d_page->updated_glob_count) { + ttm_mem_global_free_page(ttm->glob->mem_glob, + d_page->p, pool->size); + d_page->updated_glob_count = false; + } + + if (is_cached) + ttm_dma_page_put(pool, d_page); } spin_lock_irqsave(&pool->lock, irq_flags); @@ -1092,19 +1110,6 @@ void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev) } spin_unlock_irqrestore(&pool->lock, irq_flags); - if (is_cached) { - list_for_each_entry_safe(d_page, next, &ttm_dma->pages_list, page_list) { - ttm_mem_global_free_page(ttm->glob->mem_glob, - d_page->p, pool->size); - ttm_dma_page_put(pool, d_page); - } - } else { - for (i = 0; i < count; i++) { - ttm_mem_global_free_page(ttm->glob->mem_glob, - ttm->pages[i], pool->size); - } - } - INIT_LIST_HEAD(&ttm_dma->pages_list); for (i = 0; i < ttm->num_pages; i++) { ttm->pages[i] = NULL;
add this for correctly updating global mem count in ttm_mem_zone. before that when ttm_mem_global_alloc_page fails, we would update all dma_page's global mem count in ttm_dma->pages_list. but actually here we should not update for the last dma_page. Signed-off-by: Roger He <Hongbo.He@amd.com> --- drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 57 +++++++++++++++++--------------- 1 file changed, 31 insertions(+), 26 deletions(-)