diff mbox

[1/2] drm/ttm: Allow page allocations w/o triggering OOM..

Message ID MWHPR1201MB0127C35FF325E58DD4C7B25CFDEA0@MWHPR1201MB0127.namprd12.prod.outlook.com (mailing list archive)
State New, archived
Headers show

Commit Message

He, Hongbo Jan. 16, 2018, 6:02 a.m. UTC
-----Original Message-----
From: Andrey Grodzovsky [mailto:andrey.grodzovsky@amd.com] 
Sent: Saturday, January 13, 2018 6:29 AM
To: dri-devel@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
Cc: Koenig, Christian <Christian.Koenig@amd.com>; He, Roger <Hongbo.He@amd.com>; Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
Subject: [PATCH 1/2] drm/ttm: Allow page allocations w/o triggering OOM..

This to allow drivers to choose to avoid OOM invocation and handle page allocation failures instead.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c             |  3 +++
 drivers/gpu/drm/ttm/ttm_page_alloc.c     |  6 ++++++
 drivers/gpu/drm/ttm/ttm_page_alloc_dma.c |  3 +++
 drivers/gpu/drm/ttm/ttm_tt.c             | 13 +++++++++++--
 include/drm/ttm/ttm_bo_api.h             |  1 +
 include/drm/ttm/ttm_bo_driver.h          |  4 ++++
 6 files changed, 28 insertions(+), 2 deletions(-)

--
2.7.4

Comments

Christian König Jan. 16, 2018, 8:53 a.m. UTC | #1
Am 16.01.2018 um 07:02 schrieb He, Roger:
>
> -----Original Message-----
> From: Andrey Grodzovsky [mailto:andrey.grodzovsky@amd.com]
> Sent: Saturday, January 13, 2018 6:29 AM
> To: dri-devel@lists.freedesktop.org; amd-gfx@lists.freedesktop.org
> Cc: Koenig, Christian <Christian.Koenig@amd.com>; He, Roger <Hongbo.He@amd.com>; Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
> Subject: [PATCH 1/2] drm/ttm: Allow page allocations w/o triggering OOM..
>
> This to allow drivers to choose to avoid OOM invocation and handle page allocation failures instead.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/ttm/ttm_bo.c             |  3 +++
>   drivers/gpu/drm/ttm/ttm_page_alloc.c     |  6 ++++++
>   drivers/gpu/drm/ttm/ttm_page_alloc_dma.c |  3 +++
>   drivers/gpu/drm/ttm/ttm_tt.c             | 13 +++++++++++--
>   include/drm/ttm/ttm_bo_api.h             |  1 +
>   include/drm/ttm/ttm_bo_driver.h          |  4 ++++
>   6 files changed, 28 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 2eb71ff..f32aab1 100644
> --- a/drivers/gpu/drm/ttm/ttm_bo.c
> +++ b/drivers/gpu/drm/ttm/ttm_bo.c
> @@ -234,6 +234,9 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
>   	if (bdev->need_dma32)
>   		page_flags |= TTM_PAGE_FLAG_DMA32;
>   
> +	if (bdev->no_retry)
> +		page_flags |= TTM_PAGE_FLAG_NO_RETRY;
> +
>   	switch (bo->type) {
>   	case ttm_bo_type_device:
>   		if (zero_alloc)
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> index 0eab24e..f34c843 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
> @@ -741,6 +741,9 @@ static int ttm_page_pool_get_pages(struct ttm_page_pool *pool,
>   		if (ttm_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
>   			gfp_flags |= __GFP_ZERO;
>   
> +		if (ttm_flags & TTM_PAGE_FLAG_NO_RETRY)
> +			gfp_flags |= __GFP_RETRY_MAYFAIL;
> +
>   		/* ttm_alloc_new_pages doesn't reference pool so we can run
>   		 * multiple requests in parallel.
>   		 **/
> @@ -893,6 +896,9 @@ static int ttm_get_pages(struct page **pages, unsigned npages, int flags,
>   		if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
>   			gfp_flags |= __GFP_ZERO;
>   
> +		if (flags & TTM_PAGE_FLAG_NO_RETRY)
> +			gfp_flags |= __GFP_RETRY_MAYFAIL;
> +
>   		if (flags & TTM_PAGE_FLAG_DMA32)
>   			gfp_flags |= GFP_DMA32;
>   		else
> diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> index c7f01a4..6949ef7 100644
> --- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> +++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
> @@ -920,6 +920,9 @@ static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool huge)
>   		gfp_flags &= ~__GFP_COMP;
>   	}
>   
> +	if (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY)
> +		gfp_flags |= __GFP_RETRY_MAYFAIL;
> +
>   	return gfp_flags;
>   }
>   
> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 5a046a3..9e4d43d 100644
> --- a/drivers/gpu/drm/ttm/ttm_tt.c
> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> @@ -301,7 +301,11 @@ int ttm_tt_swapin(struct ttm_tt *ttm)
>   	swap_space = swap_storage->f_mapping;
>   
>   	for (i = 0; i < ttm->num_pages; ++i) {
> -		from_page = shmem_read_mapping_page(swap_space, i);
> +		gfp_t gfp_mask = mapping_gfp_mask(swap_space);
> +
> +		gfp_mask |= (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY ? __GFP_RETRY_MAYFAIL : 0);
> +		from_page = shmem_read_mapping_page_gfp(swap_space, i, gfp_mask);
> +
>   		if (IS_ERR(from_page)) {
>   			ret = PTR_ERR(from_page);
>   			goto out_err;
> @@ -350,10 +354,15 @@ int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistent_swap_storage)
>   	swap_space = swap_storage->f_mapping;
>   
>   	for (i = 0; i < ttm->num_pages; ++i) {
> +		gfp_t gfp_mask = mapping_gfp_mask(swap_space);
> +
> +		gfp_mask |= (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY ?
> +__GFP_RETRY_MAYFAIL : 0);
> +
>   		from_page = ttm->pages[i];
>   		if (unlikely(from_page == NULL))
>   			continue;
> -		to_page = shmem_read_mapping_page(swap_space, i);
> +
> +		to_page = shmem_read_mapping_page_gfp(swap_space, i, gfp_mask);
>   		if (IS_ERR(to_page)) {
>   			ret = PTR_ERR(to_page);
>   			goto out_err;
> diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 2cd025c..099f24b 100644
> --- a/include/drm/ttm/ttm_bo_api.h
> +++ b/include/drm/ttm/ttm_bo_api.h
> @@ -176,6 +176,7 @@ struct ttm_buffer_object {
>   	unsigned long num_pages;
>   	size_t acc_size;
>   
> +
> Please remove this newline here.
> Apart from that,   this patch is Reviewed-by: Roger He <Hongbo.He@amd.com>

Jup agree, apart from the nit pick the patch is Reviewed-by: Christian 
König <christian.koenig@amd.com> as well.

Christian.

>
> Thanks
> Roger(Hongbo.He)
>
>   	/**
>   	* Members not needing protection.
>   	*/
> diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 94064b1..9b417eb 100644
> --- a/include/drm/ttm/ttm_bo_driver.h
> +++ b/include/drm/ttm/ttm_bo_driver.h
> @@ -86,6 +86,7 @@ struct ttm_backend_func {
>   #define TTM_PAGE_FLAG_ZERO_ALLOC      (1 << 6)
>   #define TTM_PAGE_FLAG_DMA32           (1 << 7)
>   #define TTM_PAGE_FLAG_SG              (1 << 8)
> +#define TTM_PAGE_FLAG_NO_RETRY	       (1 << 9)
>   
>   enum ttm_caching_state {
>   	tt_uncached,
> @@ -556,6 +557,7 @@ struct ttm_bo_global {
>    * @dev_mapping: A pointer to the struct address_space representing the
>    * device address space.
>    * @wq: Work queue structure for the delayed delete workqueue.
> + * @no_retry: Don't retry allocation if it fails
>    *
>    */
>   
> @@ -592,6 +594,8 @@ struct ttm_bo_device {
>   	struct delayed_work wq;
>   
>   	bool need_dma32;
> +
> +	bool no_retry;
>   };
>   
>   /**
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 2eb71ff..f32aab1 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -234,6 +234,9 @@  static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
 	if (bdev->need_dma32)
 		page_flags |= TTM_PAGE_FLAG_DMA32;
 
+	if (bdev->no_retry)
+		page_flags |= TTM_PAGE_FLAG_NO_RETRY;
+
 	switch (bo->type) {
 	case ttm_bo_type_device:
 		if (zero_alloc)
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 0eab24e..f34c843 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -741,6 +741,9 @@  static int ttm_page_pool_get_pages(struct ttm_page_pool *pool,
 		if (ttm_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
 			gfp_flags |= __GFP_ZERO;
 
+		if (ttm_flags & TTM_PAGE_FLAG_NO_RETRY)
+			gfp_flags |= __GFP_RETRY_MAYFAIL;
+
 		/* ttm_alloc_new_pages doesn't reference pool so we can run
 		 * multiple requests in parallel.
 		 **/
@@ -893,6 +896,9 @@  static int ttm_get_pages(struct page **pages, unsigned npages, int flags,
 		if (flags & TTM_PAGE_FLAG_ZERO_ALLOC)
 			gfp_flags |= __GFP_ZERO;
 
+		if (flags & TTM_PAGE_FLAG_NO_RETRY)
+			gfp_flags |= __GFP_RETRY_MAYFAIL;
+
 		if (flags & TTM_PAGE_FLAG_DMA32)
 			gfp_flags |= GFP_DMA32;
 		else
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
index c7f01a4..6949ef7 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
@@ -920,6 +920,9 @@  static gfp_t ttm_dma_pool_gfp_flags(struct ttm_dma_tt *ttm_dma, bool huge)
 		gfp_flags &= ~__GFP_COMP;
 	}
 
+	if (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY)
+		gfp_flags |= __GFP_RETRY_MAYFAIL;
+
 	return gfp_flags;
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 5a046a3..9e4d43d 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -301,7 +301,11 @@  int ttm_tt_swapin(struct ttm_tt *ttm)
 	swap_space = swap_storage->f_mapping;
 
 	for (i = 0; i < ttm->num_pages; ++i) {
-		from_page = shmem_read_mapping_page(swap_space, i);
+		gfp_t gfp_mask = mapping_gfp_mask(swap_space);
+
+		gfp_mask |= (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY ? __GFP_RETRY_MAYFAIL : 0);
+		from_page = shmem_read_mapping_page_gfp(swap_space, i, gfp_mask);
+
 		if (IS_ERR(from_page)) {
 			ret = PTR_ERR(from_page);
 			goto out_err;
@@ -350,10 +354,15 @@  int ttm_tt_swapout(struct ttm_tt *ttm, struct file *persistent_swap_storage)
 	swap_space = swap_storage->f_mapping;
 
 	for (i = 0; i < ttm->num_pages; ++i) {
+		gfp_t gfp_mask = mapping_gfp_mask(swap_space);
+
+		gfp_mask |= (ttm->page_flags & TTM_PAGE_FLAG_NO_RETRY ? 
+__GFP_RETRY_MAYFAIL : 0);
+
 		from_page = ttm->pages[i];
 		if (unlikely(from_page == NULL))
 			continue;
-		to_page = shmem_read_mapping_page(swap_space, i);
+
+		to_page = shmem_read_mapping_page_gfp(swap_space, i, gfp_mask);
 		if (IS_ERR(to_page)) {
 			ret = PTR_ERR(to_page);
 			goto out_err;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index 2cd025c..099f24b 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -176,6 +176,7 @@  struct ttm_buffer_object {
 	unsigned long num_pages;
 	size_t acc_size;
 
+
Please remove this newline here.
Apart from that,   this patch is Reviewed-by: Roger He <Hongbo.He@amd.com>

Thanks
Roger(Hongbo.He)

 	/**
 	* Members not needing protection.
 	*/
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 94064b1..9b417eb 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -86,6 +86,7 @@  struct ttm_backend_func {
 #define TTM_PAGE_FLAG_ZERO_ALLOC      (1 << 6)
 #define TTM_PAGE_FLAG_DMA32           (1 << 7)
 #define TTM_PAGE_FLAG_SG              (1 << 8)
+#define TTM_PAGE_FLAG_NO_RETRY	       (1 << 9)
 
 enum ttm_caching_state {
 	tt_uncached,
@@ -556,6 +557,7 @@  struct ttm_bo_global {
  * @dev_mapping: A pointer to the struct address_space representing the
  * device address space.
  * @wq: Work queue structure for the delayed delete workqueue.
+ * @no_retry: Don't retry allocation if it fails
  *
  */
 
@@ -592,6 +594,8 @@  struct ttm_bo_device {
 	struct delayed_work wq;
 
 	bool need_dma32;
+
+	bool no_retry;
 };
 
 /**