Patchwork drm/atomic-helper: Fix leak in disable_all

login
register
mail settings
Submitter Daniel Vetter
Date July 15, 2017, 9:31 a.m.
Message ID <20170715093106.19873-1-daniel.vetter@ffwll.ch>
Download mbox | patch
Permalink /patch/9842217/
State New
Headers show

Comments

Daniel Vetter - July 15, 2017, 9:31 a.m.
The legacy plane->fb pointer is refcounted by calling
drm_atomic_clean_old_fb().

In practice this isn't a real problem because:
- The caller in the i915 gpu reset code restores the original state
  again, which means the plane->fb pointer won't change, hence can't
  leak.
- Drivers using drm_atomic_helper_shutdown call the fbdev cleanup
  first, and that usually cleans up the fb through
  drm_remove_framebuffer, which does this correctly.
- Without fbdev the only framebuffers are from userspace, and those
  get cleaned up (again using drm_remove_framebuffer) befor the driver
  can even be unloaded.

But in i915 I've switched the cleanup sequence around so that the
_shutdown() calls happens after the drm_remove_framebuffer(), which is
how I discovered this issue.

v2: My analysis why the current code was ok for gpu reset and
suspend/resume was correct, but then I totally failed to realize that
we better keep this symmetric. Thanksfully CI noticed that for
balance, a refcounting bug must exist at 2 places if previously there
was no issue ...

v3: Don't be lazy and compute the plane_mask in
commit_duplicated_state properly too, instead of just using ~0U.

Cc: martin.peres@free.fr
Cc: chris@chris-wilson.co.uk
Acked-by: Dave Airlie <airlied@gmail.com> (v1)
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
---
 drivers/gpu/drm/drm_atomic_helper.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)
Maarten Lankhorst - July 17, 2017, 9:39 a.m.
Op 15-07-17 om 11:31 schreef Daniel Vetter:
> The legacy plane->fb pointer is refcounted by calling
> drm_atomic_clean_old_fb().
>
> In practice this isn't a real problem because:
> - The caller in the i915 gpu reset code restores the original state
>   again, which means the plane->fb pointer won't change, hence can't
>   leak.
> - Drivers using drm_atomic_helper_shutdown call the fbdev cleanup
>   first, and that usually cleans up the fb through
>   drm_remove_framebuffer, which does this correctly.
> - Without fbdev the only framebuffers are from userspace, and those
>   get cleaned up (again using drm_remove_framebuffer) befor the driver
>   can even be unloaded.
>
> But in i915 I've switched the cleanup sequence around so that the
> _shutdown() calls happens after the drm_remove_framebuffer(), which is
> how I discovered this issue.
>
> v2: My analysis why the current code was ok for gpu reset and
> suspend/resume was correct, but then I totally failed to realize that
> we better keep this symmetric. Thanksfully CI noticed that for
> balance, a refcounting bug must exist at 2 places if previously there
> was no issue ...
>
> v3: Don't be lazy and compute the plane_mask in
> commit_duplicated_state properly too, instead of just using ~0U.
>
> Cc: martin.peres@free.fr
> Cc: chris@chris-wilson.co.uk
> Acked-by: Dave Airlie <airlied@gmail.com> (v1)
> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> ---
>  drivers/gpu/drm/drm_atomic_helper.c | 18 ++++++++++++++++--
>  1 file changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
> index b07fc30372d3..545328a9a769 100644
> --- a/drivers/gpu/drm/drm_atomic_helper.c
> +++ b/drivers/gpu/drm/drm_atomic_helper.c
> @@ -2726,6 +2726,7 @@ int drm_atomic_helper_disable_all(struct drm_device *dev,
>  	struct drm_plane *plane;
>  	struct drm_crtc_state *crtc_state;
>  	struct drm_crtc *crtc;
> +	unsigned plane_mask = 0;
>  	int ret, i;
>  
>  	state = drm_atomic_state_alloc(dev);
> @@ -2768,10 +2769,14 @@ int drm_atomic_helper_disable_all(struct drm_device *dev,
>  			goto free;
>  
>  		drm_atomic_set_fb_for_plane(plane_state, NULL);
> +		plane_mask |= BIT(drm_plane_index(plane));
> +		plane->old_fb = plane->fb;
>  	}
>  
>  	ret = drm_atomic_commit(state);
>  free:
> +	if (plane_mask)
> +		drm_atomic_clean_old_fb(dev, plane_mask, ret);
>  	drm_atomic_state_put(state);
>  	return ret;
>  }
> @@ -2902,11 +2907,16 @@ int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state,
>  	struct drm_connector_state *new_conn_state;
>  	struct drm_crtc *crtc;
>  	struct drm_crtc_state *new_crtc_state;
> +	unsigned plane_mask = 0;
> +	struct drm_device *dev = state->dev;
> +	int ret;
>  
>  	state->acquire_ctx = ctx;
>  
> -	for_each_new_plane_in_state(state, plane, new_plane_state, i)
> +	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
> +		plane_mask |= BIT(drm_plane_index(plane));
We should really add a drm_plane_mask/drm_connector_mask at some point, to complement drm_crtc_mask.
>  		state->planes[i].old_state = plane->state;
> +	}
>  
>  	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i)
>  		state->crtcs[i].old_state = crtc->state;
> @@ -2914,7 +2924,11 @@ int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state,
>  	for_each_new_connector_in_state(state, connector, new_conn_state, i)
>  		state->connectors[i].old_state = connector->state;
>  
> -	return drm_atomic_commit(state);
> +	ret = drm_atomic_commit(state);
> +	if (plane_mask)
> +		drm_atomic_clean_old_fb(dev, plane_mask, ret);
Kill the if () part, and make it unconditional? On second thought, I should have done the same in drm_framebuffer.c
> +	return ret;
>  }
>  EXPORT_SYMBOL(drm_atomic_helper_commit_duplicated_state);
>
Daniel Vetter - July 17, 2017, 3:21 p.m.
On Mon, Jul 17, 2017 at 11:39:56AM +0200, Maarten Lankhorst wrote:
> Op 15-07-17 om 11:31 schreef Daniel Vetter:
> > The legacy plane->fb pointer is refcounted by calling
> > drm_atomic_clean_old_fb().
> >
> > In practice this isn't a real problem because:
> > - The caller in the i915 gpu reset code restores the original state
> >   again, which means the plane->fb pointer won't change, hence can't
> >   leak.
> > - Drivers using drm_atomic_helper_shutdown call the fbdev cleanup
> >   first, and that usually cleans up the fb through
> >   drm_remove_framebuffer, which does this correctly.
> > - Without fbdev the only framebuffers are from userspace, and those
> >   get cleaned up (again using drm_remove_framebuffer) befor the driver
> >   can even be unloaded.
> >
> > But in i915 I've switched the cleanup sequence around so that the
> > _shutdown() calls happens after the drm_remove_framebuffer(), which is
> > how I discovered this issue.
> >
> > v2: My analysis why the current code was ok for gpu reset and
> > suspend/resume was correct, but then I totally failed to realize that
> > we better keep this symmetric. Thanksfully CI noticed that for
> > balance, a refcounting bug must exist at 2 places if previously there
> > was no issue ...
> >
> > v3: Don't be lazy and compute the plane_mask in
> > commit_duplicated_state properly too, instead of just using ~0U.
> >
> > Cc: martin.peres@free.fr
> > Cc: chris@chris-wilson.co.uk
> > Acked-by: Dave Airlie <airlied@gmail.com> (v1)
> > Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
> > ---
> >  drivers/gpu/drm/drm_atomic_helper.c | 18 ++++++++++++++++--
> >  1 file changed, 16 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
> > index b07fc30372d3..545328a9a769 100644
> > --- a/drivers/gpu/drm/drm_atomic_helper.c
> > +++ b/drivers/gpu/drm/drm_atomic_helper.c
> > @@ -2726,6 +2726,7 @@ int drm_atomic_helper_disable_all(struct drm_device *dev,
> >  	struct drm_plane *plane;
> >  	struct drm_crtc_state *crtc_state;
> >  	struct drm_crtc *crtc;
> > +	unsigned plane_mask = 0;
> >  	int ret, i;
> >  
> >  	state = drm_atomic_state_alloc(dev);
> > @@ -2768,10 +2769,14 @@ int drm_atomic_helper_disable_all(struct drm_device *dev,
> >  			goto free;
> >  
> >  		drm_atomic_set_fb_for_plane(plane_state, NULL);
> > +		plane_mask |= BIT(drm_plane_index(plane));
> > +		plane->old_fb = plane->fb;
> >  	}
> >  
> >  	ret = drm_atomic_commit(state);
> >  free:
> > +	if (plane_mask)
> > +		drm_atomic_clean_old_fb(dev, plane_mask, ret);
> >  	drm_atomic_state_put(state);
> >  	return ret;
> >  }
> > @@ -2902,11 +2907,16 @@ int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state,
> >  	struct drm_connector_state *new_conn_state;
> >  	struct drm_crtc *crtc;
> >  	struct drm_crtc_state *new_crtc_state;
> > +	unsigned plane_mask = 0;
> > +	struct drm_device *dev = state->dev;
> > +	int ret;
> >  
> >  	state->acquire_ctx = ctx;
> >  
> > -	for_each_new_plane_in_state(state, plane, new_plane_state, i)
> > +	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
> > +		plane_mask |= BIT(drm_plane_index(plane));
> We should really add a drm_plane_mask/drm_connector_mask at some point, to complement drm_crtc_mask.
> >  		state->planes[i].old_state = plane->state;
> > +	}
> >  
> >  	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i)
> >  		state->crtcs[i].old_state = crtc->state;
> > @@ -2914,7 +2924,11 @@ int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state,
> >  	for_each_new_connector_in_state(state, connector, new_conn_state, i)
> >  		state->connectors[i].old_state = connector->state;
> >  
> > -	return drm_atomic_commit(state);
> > +	ret = drm_atomic_commit(state);
> > +	if (plane_mask)
> > +		drm_atomic_clean_old_fb(dev, plane_mask, ret);
> Kill the if () part, and make it unconditional? On second thought, I should have done the same in drm_framebuffer.c

I'd prefer to not bikeshed this stuff too much and just go with what we do
everywhere else (i.e. rmfb code and atomic commit) for consistency.
clean_old_fb is definitely a horrible function with misleading kerneldoc
on top, and I think the best way to clean that up would be to:

- Move the plane_mask computation in drm_atmic_commit and also put the
  clean_old_fb call in there. Maybe give it a more descriptive name like
  update_legacy_fb or whatever. Unexport them.

- This would break the compat helpers, where drm_atomic_commit must not
  update the legacy refcounts, because for set_config, page_flip and the
  plane hooks the core does that already. Create a
  drm_atomic_commit_legacy for these.

But since one of my patches caused an existing issue to pop up as a
regression, and this series fixes it, I'd like to first get the minimal
fix in through drm-intel. And then bikeshed it properly in drm-misc.

Typing the patches is also a bit annoying right now since I have a few
other atomic_commit patches in-flight ...

Thanks, Daniel

> > +	return ret;
> >  }
> >  EXPORT_SYMBOL(drm_atomic_helper_commit_duplicated_state);
> >  
> 
>
Maarten Lankhorst - July 19, 2017, 10:15 a.m.
Op 17-07-17 om 17:21 schreef Daniel Vetter:
> On Mon, Jul 17, 2017 at 11:39:56AM +0200, Maarten Lankhorst wrote:
>> Op 15-07-17 om 11:31 schreef Daniel Vetter:
>>> The legacy plane->fb pointer is refcounted by calling
>>> drm_atomic_clean_old_fb().
>>>
>>> In practice this isn't a real problem because:
>>> - The caller in the i915 gpu reset code restores the original state
>>>   again, which means the plane->fb pointer won't change, hence can't
>>>   leak.
>>> - Drivers using drm_atomic_helper_shutdown call the fbdev cleanup
>>>   first, and that usually cleans up the fb through
>>>   drm_remove_framebuffer, which does this correctly.
>>> - Without fbdev the only framebuffers are from userspace, and those
>>>   get cleaned up (again using drm_remove_framebuffer) befor the driver
>>>   can even be unloaded.
>>>
>>> But in i915 I've switched the cleanup sequence around so that the
>>> _shutdown() calls happens after the drm_remove_framebuffer(), which is
>>> how I discovered this issue.
>>>
>>> v2: My analysis why the current code was ok for gpu reset and
>>> suspend/resume was correct, but then I totally failed to realize that
>>> we better keep this symmetric. Thanksfully CI noticed that for
>>> balance, a refcounting bug must exist at 2 places if previously there
>>> was no issue ...
>>>
>>> v3: Don't be lazy and compute the plane_mask in
>>> commit_duplicated_state properly too, instead of just using ~0U.
>>>
>>> Cc: martin.peres@free.fr
>>> Cc: chris@chris-wilson.co.uk
>>> Acked-by: Dave Airlie <airlied@gmail.com> (v1)
>>> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
>>> ---
>>>  drivers/gpu/drm/drm_atomic_helper.c | 18 ++++++++++++++++--
>>>  1 file changed, 16 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
>>> index b07fc30372d3..545328a9a769 100644
>>> --- a/drivers/gpu/drm/drm_atomic_helper.c
>>> +++ b/drivers/gpu/drm/drm_atomic_helper.c
>>> @@ -2726,6 +2726,7 @@ int drm_atomic_helper_disable_all(struct drm_device *dev,
>>>  	struct drm_plane *plane;
>>>  	struct drm_crtc_state *crtc_state;
>>>  	struct drm_crtc *crtc;
>>> +	unsigned plane_mask = 0;
>>>  	int ret, i;
>>>  
>>>  	state = drm_atomic_state_alloc(dev);
>>> @@ -2768,10 +2769,14 @@ int drm_atomic_helper_disable_all(struct drm_device *dev,
>>>  			goto free;
>>>  
>>>  		drm_atomic_set_fb_for_plane(plane_state, NULL);
>>> +		plane_mask |= BIT(drm_plane_index(plane));
>>> +		plane->old_fb = plane->fb;
>>>  	}
>>>  
>>>  	ret = drm_atomic_commit(state);
>>>  free:
>>> +	if (plane_mask)
>>> +		drm_atomic_clean_old_fb(dev, plane_mask, ret);
>>>  	drm_atomic_state_put(state);
>>>  	return ret;
>>>  }
>>> @@ -2902,11 +2907,16 @@ int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state,
>>>  	struct drm_connector_state *new_conn_state;
>>>  	struct drm_crtc *crtc;
>>>  	struct drm_crtc_state *new_crtc_state;
>>> +	unsigned plane_mask = 0;
>>> +	struct drm_device *dev = state->dev;
>>> +	int ret;
>>>  
>>>  	state->acquire_ctx = ctx;
>>>  
>>> -	for_each_new_plane_in_state(state, plane, new_plane_state, i)
>>> +	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
>>> +		plane_mask |= BIT(drm_plane_index(plane));
>> We should really add a drm_plane_mask/drm_connector_mask at some point, to complement drm_crtc_mask.
>>>  		state->planes[i].old_state = plane->state;
>>> +	}
>>>  
>>>  	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i)
>>>  		state->crtcs[i].old_state = crtc->state;
>>> @@ -2914,7 +2924,11 @@ int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state,
>>>  	for_each_new_connector_in_state(state, connector, new_conn_state, i)
>>>  		state->connectors[i].old_state = connector->state;
>>>  
>>> -	return drm_atomic_commit(state);
>>> +	ret = drm_atomic_commit(state);
>>> +	if (plane_mask)
>>> +		drm_atomic_clean_old_fb(dev, plane_mask, ret);
>> Kill the if () part, and make it unconditional? On second thought, I should have done the same in drm_framebuffer.c
> I'd prefer to not bikeshed this stuff too much and just go with what we do
> everywhere else (i.e. rmfb code and atomic commit) for consistency.
> clean_old_fb is definitely a horrible function with misleading kerneldoc
> on top, and I think the best way to clean that up would be to:
>
> - Move the plane_mask computation in drm_atmic_commit and also put the
>   clean_old_fb call in there. Maybe give it a more descriptive name like
>   update_legacy_fb or whatever. Unexport them.
>
> - This would break the compat helpers, where drm_atomic_commit must not
>   update the legacy refcounts, because for set_config, page_flip and the
>   plane hooks the core does that already. Create a
>   drm_atomic_commit_legacy for these.
>
> But since one of my patches caused an existing issue to pop up as a
> regression, and this series fixes it, I'd like to first get the minimal
> fix in through drm-intel. And then bikeshed it properly in drm-misc.
>
> Typing the patches is also a bit annoying right now since I have a few
> other atomic_commit patches in-flight ...
I'd prefer a bright gray bikeshed,

but hey..

Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>

Patch

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index b07fc30372d3..545328a9a769 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -2726,6 +2726,7 @@  int drm_atomic_helper_disable_all(struct drm_device *dev,
 	struct drm_plane *plane;
 	struct drm_crtc_state *crtc_state;
 	struct drm_crtc *crtc;
+	unsigned plane_mask = 0;
 	int ret, i;
 
 	state = drm_atomic_state_alloc(dev);
@@ -2768,10 +2769,14 @@  int drm_atomic_helper_disable_all(struct drm_device *dev,
 			goto free;
 
 		drm_atomic_set_fb_for_plane(plane_state, NULL);
+		plane_mask |= BIT(drm_plane_index(plane));
+		plane->old_fb = plane->fb;
 	}
 
 	ret = drm_atomic_commit(state);
 free:
+	if (plane_mask)
+		drm_atomic_clean_old_fb(dev, plane_mask, ret);
 	drm_atomic_state_put(state);
 	return ret;
 }
@@ -2902,11 +2907,16 @@  int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state,
 	struct drm_connector_state *new_conn_state;
 	struct drm_crtc *crtc;
 	struct drm_crtc_state *new_crtc_state;
+	unsigned plane_mask = 0;
+	struct drm_device *dev = state->dev;
+	int ret;
 
 	state->acquire_ctx = ctx;
 
-	for_each_new_plane_in_state(state, plane, new_plane_state, i)
+	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
+		plane_mask |= BIT(drm_plane_index(plane));
 		state->planes[i].old_state = plane->state;
+	}
 
 	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i)
 		state->crtcs[i].old_state = crtc->state;
@@ -2914,7 +2924,11 @@  int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state,
 	for_each_new_connector_in_state(state, connector, new_conn_state, i)
 		state->connectors[i].old_state = connector->state;
 
-	return drm_atomic_commit(state);
+	ret = drm_atomic_commit(state);
+	if (plane_mask)
+		drm_atomic_clean_old_fb(dev, plane_mask, ret);
+
+	return ret;
 }
 EXPORT_SYMBOL(drm_atomic_helper_commit_duplicated_state);