diff mbox

[V2] drm/tegra: Fix crash caused by reference count imbalance

Message ID 1463569889-14632-1-git-send-email-jonathanh@nvidia.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jon Hunter May 18, 2016, 11:11 a.m. UTC
Commit d2307dea14a4 ("drm/atomic: use connector references (v3)") added
reference counting for DRM connectors and this caused a crash when
exercising system suspend on Tegra114 Dalmore.

The Tegra DSI driver implements a Tegra specific function,
tegra_dsi_connector_duplicate_state(), to duplicate the connector state
and destroys the state using the generic helper function,
drm_atomic_helper_connector_destroy_state(). Following commit
d2307dea14a4 ("drm/atomic: use connector references (v3)") there is
now an imbalance in the connector reference count because the Tegra
function to duplicate state does not take a reference when duplicating
the state information. However, the generic helper function to destroy
the state information assumes a reference has been taken and during
system suspend, when the connector state is destroyed, this leads to a
crash because we attempt to put the reference for an object that has
already been freed.

Fix this by calling __drm_atomic_helper_connector_duplicate_state() from
tegra_dsi_connector_duplicate_state() to ensure that we take a reference
on a connector if crtc is set. Note that this will also copy the
connector state a 2nd time, but this should be harmless.

By fixing tegra_dsi_connector_duplicate_state() to take a reference,
although a crash was no longer seen, it was then observed that after
each system suspend-resume cycle, the reference would be one greater
than before the suspend-resume cycle. Following commit d2307dea14a4
("drm/atomic: use connector references (v3)"), it was found that we
also need to put the reference when calling the function
tegra_dsi_connector_reset() before freeing the state. Fix this by
updating tegra_dsi_connector_reset() to call the function
__drm_atomic_helper_connector_destroy_state() in order to put the
reference for the connector.

Finally, add a warning if allocating memory for the state information
fails in tegra_dsi_connector_reset().

Fixes: d2307dea14a4 ("drm/atomic: use connector references (v3)")

Signed-off-by: Jon Hunter <jonathanh@nvidia.com>
---

V2 changes:
- Updated to next-20160518
- Replaced open coding of call to drm_connector_reference() with
  __drm_atomic_helper_connector_duplicate_state() per Daniel's feedback.

 drivers/gpu/drm/tegra/dsi.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

Comments

Daniel Vetter May 18, 2016, 12:17 p.m. UTC | #1
On Wed, May 18, 2016 at 12:11:29PM +0100, Jon Hunter wrote:
> Commit d2307dea14a4 ("drm/atomic: use connector references (v3)") added
> reference counting for DRM connectors and this caused a crash when
> exercising system suspend on Tegra114 Dalmore.
> 
> The Tegra DSI driver implements a Tegra specific function,
> tegra_dsi_connector_duplicate_state(), to duplicate the connector state
> and destroys the state using the generic helper function,
> drm_atomic_helper_connector_destroy_state(). Following commit
> d2307dea14a4 ("drm/atomic: use connector references (v3)") there is
> now an imbalance in the connector reference count because the Tegra
> function to duplicate state does not take a reference when duplicating
> the state information. However, the generic helper function to destroy
> the state information assumes a reference has been taken and during
> system suspend, when the connector state is destroyed, this leads to a
> crash because we attempt to put the reference for an object that has
> already been freed.
> 
> Fix this by calling __drm_atomic_helper_connector_duplicate_state() from
> tegra_dsi_connector_duplicate_state() to ensure that we take a reference
> on a connector if crtc is set. Note that this will also copy the
> connector state a 2nd time, but this should be harmless.
> 
> By fixing tegra_dsi_connector_duplicate_state() to take a reference,
> although a crash was no longer seen, it was then observed that after
> each system suspend-resume cycle, the reference would be one greater
> than before the suspend-resume cycle. Following commit d2307dea14a4
> ("drm/atomic: use connector references (v3)"), it was found that we
> also need to put the reference when calling the function
> tegra_dsi_connector_reset() before freeing the state. Fix this by
> updating tegra_dsi_connector_reset() to call the function
> __drm_atomic_helper_connector_destroy_state() in order to put the
> reference for the connector.
> 
> Finally, add a warning if allocating memory for the state information
> fails in tegra_dsi_connector_reset().
> 
> Fixes: d2307dea14a4 ("drm/atomic: use connector references (v3)")
> 
> Signed-off-by: Jon Hunter <jonathanh@nvidia.com>

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Thierry, since Dave hasn't pulled in the drm-misc pull with the
refactoring, should I apply this to drm-misc?
-Daniel


> ---
> 
> V2 changes:
> - Updated to next-20160518
> - Replaced open coding of call to drm_connector_reference() with
>   __drm_atomic_helper_connector_duplicate_state() per Daniel's feedback.
> 
>  drivers/gpu/drm/tegra/dsi.c | 15 +++++++++++----
>  1 file changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
> index 44e102799195..a49bb006182d 100644
> --- a/drivers/gpu/drm/tegra/dsi.c
> +++ b/drivers/gpu/drm/tegra/dsi.c
> @@ -745,13 +745,17 @@ static void tegra_dsi_soft_reset(struct tegra_dsi *dsi)
>  
>  static void tegra_dsi_connector_reset(struct drm_connector *connector)
>  {
> -	struct tegra_dsi_state *state =
> -		kzalloc(sizeof(*state), GFP_KERNEL);
> +	struct tegra_dsi_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
>  
> -	if (state) {
> +	if (WARN_ON(!state))
> +		return;
> +
> +	if (connector->state) {
> +		__drm_atomic_helper_connector_destroy_state(connector->state);
>  		kfree(connector->state);
> -		__drm_atomic_helper_connector_reset(connector, &state->base);
>  	}
> +
> +	__drm_atomic_helper_connector_reset(connector, &state->base);
>  }
>  
>  static struct drm_connector_state *
> @@ -764,6 +768,9 @@ tegra_dsi_connector_duplicate_state(struct drm_connector *connector)
>  	if (!copy)
>  		return NULL;
>  
> +	__drm_atomic_helper_connector_duplicate_state(connector,
> +						      &copy->base);
> +
>  	return &copy->base;
>  }
>  
> -- 
> 2.1.4
>
Thierry Reding May 18, 2016, 2:12 p.m. UTC | #2
On Wed, May 18, 2016 at 12:11:29PM +0100, Jon Hunter wrote:
> Commit d2307dea14a4 ("drm/atomic: use connector references (v3)") added
> reference counting for DRM connectors and this caused a crash when
> exercising system suspend on Tegra114 Dalmore.
> 
> The Tegra DSI driver implements a Tegra specific function,
> tegra_dsi_connector_duplicate_state(), to duplicate the connector state
> and destroys the state using the generic helper function,
> drm_atomic_helper_connector_destroy_state(). Following commit
> d2307dea14a4 ("drm/atomic: use connector references (v3)") there is
> now an imbalance in the connector reference count because the Tegra
> function to duplicate state does not take a reference when duplicating
> the state information. However, the generic helper function to destroy
> the state information assumes a reference has been taken and during
> system suspend, when the connector state is destroyed, this leads to a
> crash because we attempt to put the reference for an object that has
> already been freed.
> 
> Fix this by calling __drm_atomic_helper_connector_duplicate_state() from
> tegra_dsi_connector_duplicate_state() to ensure that we take a reference
> on a connector if crtc is set. Note that this will also copy the
> connector state a 2nd time, but this should be harmless.

Initially when I wrote these subclassing helpers the idea was that
drivers would allocate a new structure, call the subclassing helpers and
then duplicate the driver-specific data. Here's what the implementation
would look like the way I imagined it at the time:

	copy = kzalloc(sizeof(*state), GFP_KERNEL);
	if (!copy)
		return NULL;

	__drm_atomic_helper_connector_duplicate_state(connector, &copy->base);
	copy->timing = state->timing;
	copy->period = state->period;
	copy->vrefresh = state->vrefresh;
	copy->lanes = state->lanes;
	copy->pclk = state->pclk;
	copy->bclk = state->bclk;
	copy->format = state->format;
	copy->mul = state->mul;
	copy->div = state->div;

	return &copy->base;

Of course that has the slight drawback of having to remember that this
implementation needs to be updated when the state structure is changed.
On the other hand that might not be a bad thing, because some of the
data may end up being non-trivial to copy (reference count, ...).

The above has the advantage of avoiding the extra copy, but at the same
time it's a little verbose. I don't feel very strongly either way, so
the current proposal is fine with me.

> By fixing tegra_dsi_connector_duplicate_state() to take a reference,
> although a crash was no longer seen, it was then observed that after
> each system suspend-resume cycle, the reference would be one greater
> than before the suspend-resume cycle. Following commit d2307dea14a4
> ("drm/atomic: use connector references (v3)"), it was found that we
> also need to put the reference when calling the function
> tegra_dsi_connector_reset() before freeing the state. Fix this by
> updating tegra_dsi_connector_reset() to call the function
> __drm_atomic_helper_connector_destroy_state() in order to put the
> reference for the connector.
> 
> Finally, add a warning if allocating memory for the state information
> fails in tegra_dsi_connector_reset().

I don't think that's necessary. The allocator will already provide a
strong warning if the allocation fails, so an additional WARN_ON() is
not going to help much, and in the worst case may even add confusion.

> diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
> index 44e102799195..a49bb006182d 100644
> --- a/drivers/gpu/drm/tegra/dsi.c
> +++ b/drivers/gpu/drm/tegra/dsi.c
> @@ -745,13 +745,17 @@ static void tegra_dsi_soft_reset(struct tegra_dsi *dsi)
>  
>  static void tegra_dsi_connector_reset(struct drm_connector *connector)
>  {
> -	struct tegra_dsi_state *state =
> -		kzalloc(sizeof(*state), GFP_KERNEL);
> +	struct tegra_dsi_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
>  
> -	if (state) {
> +	if (WARN_ON(!state))
> +		return;
> +
> +	if (connector->state) {
> +		__drm_atomic_helper_connector_destroy_state(connector->state);
>  		kfree(connector->state);
> -		__drm_atomic_helper_connector_reset(connector, &state->base);
>  	}
> +
> +	__drm_atomic_helper_connector_reset(connector, &state->base);
>  }
>  
>  static struct drm_connector_state *
> @@ -764,6 +768,9 @@ tegra_dsi_connector_duplicate_state(struct drm_connector *connector)
>  	if (!copy)
>  		return NULL;
>  
> +	__drm_atomic_helper_connector_duplicate_state(connector,
> +						      &copy->base);
> +
>  	return &copy->base;
>  }
>  

With the WARN_ON() dropped this looks good to me:

Acked-by: Thierry Reding <treding@nvidia.com>
diff mbox

Patch

diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
index 44e102799195..a49bb006182d 100644
--- a/drivers/gpu/drm/tegra/dsi.c
+++ b/drivers/gpu/drm/tegra/dsi.c
@@ -745,13 +745,17 @@  static void tegra_dsi_soft_reset(struct tegra_dsi *dsi)
 
 static void tegra_dsi_connector_reset(struct drm_connector *connector)
 {
-	struct tegra_dsi_state *state =
-		kzalloc(sizeof(*state), GFP_KERNEL);
+	struct tegra_dsi_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
 
-	if (state) {
+	if (WARN_ON(!state))
+		return;
+
+	if (connector->state) {
+		__drm_atomic_helper_connector_destroy_state(connector->state);
 		kfree(connector->state);
-		__drm_atomic_helper_connector_reset(connector, &state->base);
 	}
+
+	__drm_atomic_helper_connector_reset(connector, &state->base);
 }
 
 static struct drm_connector_state *
@@ -764,6 +768,9 @@  tegra_dsi_connector_duplicate_state(struct drm_connector *connector)
 	if (!copy)
 		return NULL;
 
+	__drm_atomic_helper_connector_duplicate_state(connector,
+						      &copy->base);
+
 	return &copy->base;
 }