diff mbox series

[20/25] drm/amdgpu: DC also loves to allocate stuff where it shouldn't

Message ID 20200707201229.472834-21-daniel.vetter@ffwll.ch (mailing list archive)
State Not Applicable
Headers show
Series dma-fence annotations, round 3 | expand

Commit Message

Daniel Vetter July 7, 2020, 8:12 p.m. UTC
Not going to bother with a complete&pretty commit message, just
offending backtrace:

        kvmalloc_node+0x47/0x80
        dc_create_state+0x1f/0x60 [amdgpu]
        dc_commit_state+0xcb/0x9b0 [amdgpu]
        amdgpu_dm_atomic_commit_tail+0xd31/0x2010 [amdgpu]
        commit_tail+0xa4/0x140 [drm_kms_helper]
        drm_atomic_helper_commit+0x152/0x180 [drm_kms_helper]
        drm_client_modeset_commit_atomic+0x1ea/0x250 [drm]
        drm_client_modeset_commit_locked+0x55/0x190 [drm]
        drm_client_modeset_commit+0x24/0x40 [drm]

v2: Found more in DC code, I'm just going to pile them all up.

Cc: linux-media@vger.kernel.org
Cc: linaro-mm-sig@lists.linaro.org
Cc: linux-rdma@vger.kernel.org
Cc: amd-gfx@lists.freedesktop.org
Cc: intel-gfx@lists.freedesktop.org
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Christian König <christian.koenig@amd.com>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
---
 drivers/gpu/drm/amd/amdgpu/atom.c                 | 2 +-
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +-
 drivers/gpu/drm/amd/display/dc/core/dc.c          | 4 +++-
 3 files changed, 5 insertions(+), 3 deletions(-)

Comments

Daniel Vetter July 14, 2020, 11:12 a.m. UTC | #1
On Tue, Jul 07, 2020 at 10:12:24PM +0200, Daniel Vetter wrote:
> Not going to bother with a complete&pretty commit message, just
> offending backtrace:
> 
>         kvmalloc_node+0x47/0x80
>         dc_create_state+0x1f/0x60 [amdgpu]
>         dc_commit_state+0xcb/0x9b0 [amdgpu]
>         amdgpu_dm_atomic_commit_tail+0xd31/0x2010 [amdgpu]
>         commit_tail+0xa4/0x140 [drm_kms_helper]
>         drm_atomic_helper_commit+0x152/0x180 [drm_kms_helper]
>         drm_client_modeset_commit_atomic+0x1ea/0x250 [drm]
>         drm_client_modeset_commit_locked+0x55/0x190 [drm]
>         drm_client_modeset_commit+0x24/0x40 [drm]
> 
> v2: Found more in DC code, I'm just going to pile them all up.
> 
> Cc: linux-media@vger.kernel.org
> Cc: linaro-mm-sig@lists.linaro.org
> Cc: linux-rdma@vger.kernel.org
> Cc: amd-gfx@lists.freedesktop.org
> Cc: intel-gfx@lists.freedesktop.org
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> Cc: Christian König <christian.koenig@amd.com>
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>

Anyone from amdgpu DC team started to look into this and the subsequent
patches in DC? Note that the last one isn't needed anymore because it's
now fix in upstream with

commit cdaae8371aa9d4ea1648a299b1a75946b9556944
Author: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>
Date:   Mon May 11 14:21:17 2020 -0400

    drm/amd/display: Handle GPU reset for DC block

But that patch has a ton of memory allocations in the reset path now, so
you just replaced one deadlock with another one ...

Note that since amdgpu has it's private atomic_commit_tail implemenation
this won't hold up the generic atomic annotations, but I think it will
hold up the tdr annotations at least. Plus would be nice to fix this
somehow.
-Daniel

> ---
>  drivers/gpu/drm/amd/amdgpu/atom.c                 | 2 +-
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +-
>  drivers/gpu/drm/amd/display/dc/core/dc.c          | 4 +++-
>  3 files changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
> index 4cfc786699c7..1b0c674fab25 100644
> --- a/drivers/gpu/drm/amd/amdgpu/atom.c
> +++ b/drivers/gpu/drm/amd/amdgpu/atom.c
> @@ -1226,7 +1226,7 @@ static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index,
>  	ectx.abort = false;
>  	ectx.last_jump = 0;
>  	if (ws)
> -		ectx.ws = kcalloc(4, ws, GFP_KERNEL);
> +		ectx.ws = kcalloc(4, ws, GFP_ATOMIC);
>  	else
>  		ectx.ws = NULL;
>  
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 6afcc33ff846..3d41eddc7908 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -6872,7 +6872,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
>  		struct dc_stream_update stream_update;
>  	} *bundle;
>  
> -	bundle = kzalloc(sizeof(*bundle), GFP_KERNEL);
> +	bundle = kzalloc(sizeof(*bundle), GFP_ATOMIC);
>  
>  	if (!bundle) {
>  		dm_error("Failed to allocate update bundle\n");
> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
> index 942ceb0f6383..f9a58509efb2 100644
> --- a/drivers/gpu/drm/amd/display/dc/core/dc.c
> +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
> @@ -1475,8 +1475,10 @@ bool dc_post_update_surfaces_to_stream(struct dc *dc)
>  
>  struct dc_state *dc_create_state(struct dc *dc)
>  {
> +	/* No you really cant allocate random crap here this late in
> +	 * atomic_commit_tail. */
>  	struct dc_state *context = kvzalloc(sizeof(struct dc_state),
> -					    GFP_KERNEL);
> +					    GFP_ATOMIC);
>  
>  	if (!context)
>  		return NULL;
> -- 
> 2.27.0
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c
index 4cfc786699c7..1b0c674fab25 100644
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@@ -1226,7 +1226,7 @@  static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index,
 	ectx.abort = false;
 	ectx.last_jump = 0;
 	if (ws)
-		ectx.ws = kcalloc(4, ws, GFP_KERNEL);
+		ectx.ws = kcalloc(4, ws, GFP_ATOMIC);
 	else
 		ectx.ws = NULL;
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 6afcc33ff846..3d41eddc7908 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -6872,7 +6872,7 @@  static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 		struct dc_stream_update stream_update;
 	} *bundle;
 
-	bundle = kzalloc(sizeof(*bundle), GFP_KERNEL);
+	bundle = kzalloc(sizeof(*bundle), GFP_ATOMIC);
 
 	if (!bundle) {
 		dm_error("Failed to allocate update bundle\n");
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 942ceb0f6383..f9a58509efb2 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1475,8 +1475,10 @@  bool dc_post_update_surfaces_to_stream(struct dc *dc)
 
 struct dc_state *dc_create_state(struct dc *dc)
 {
+	/* No you really cant allocate random crap here this late in
+	 * atomic_commit_tail. */
 	struct dc_state *context = kvzalloc(sizeof(struct dc_state),
-					    GFP_KERNEL);
+					    GFP_ATOMIC);
 
 	if (!context)
 		return NULL;