diff mbox

[RESEND,5/6] drm/i915: Remove pci private pointer after destroying the device private

Message ID 20180716080332.32283-5-chris@chris-wilson.co.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Chris Wilson July 16, 2018, 8:03 a.m. UTC
On an aborted module load, we unwind and free our device private - but
we left a dangling pointer to our privates inside the pci_device. After
the attempted aborted unload, we may still get a call to i915_pci_remove()
when the module is removed, potentially chasing stale data.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.c |  1 +
 drivers/gpu/drm/i915/i915_pci.c | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

Comments

Tvrtko Ursulin July 16, 2018, 10:04 a.m. UTC | #1
On 16/07/2018 09:03, Chris Wilson wrote:
> On an aborted module load, we unwind and free our device private - but
> we left a dangling pointer to our privates inside the pci_device. After
> the attempted aborted unload, we may still get a call to i915_pci_remove()
> when the module is removed, potentially chasing stale data.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>   drivers/gpu/drm/i915/i915_drv.c |  1 +
>   drivers/gpu/drm/i915/i915_pci.c | 13 ++++++++++++-
>   2 files changed, 13 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 700ffb611187..3834bd758a2e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1424,6 +1424,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
>   	drm_dev_fini(&dev_priv->drm);
>   out_free:
>   	kfree(dev_priv);
> +	pci_set_drvdata(pdev, NULL);
>   	return ret;
>   }
>   
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index 55543f1b0236..6a4d1388ad2d 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -674,10 +674,16 @@ MODULE_DEVICE_TABLE(pci, pciidlist);
>   
>   static void i915_pci_remove(struct pci_dev *pdev)
>   {
> -	struct drm_device *dev = pci_get_drvdata(pdev);
> +	struct drm_device *dev;
> +
> +	dev = pci_get_drvdata(pdev);
> +	if (!dev) /* driver load aborted, nothing to cleanup */
> +		return;
>   
>   	i915_driver_unload(dev);
>   	drm_dev_put(dev);
> +
> +	pci_set_drvdata(pdev, NULL);
>   }
>   
>   static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
> @@ -712,6 +718,11 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>   	if (err)
>   		return err;
>   
> +	if (i915_inject_load_failure()) {
> +		i915_pci_remove(pdev);
> +		return -ENODEV;
> +	}
> +
>   	err = i915_live_selftests(pdev);
>   	if (err) {
>   		i915_pci_remove(pdev);
> 

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko
Michal Wajdeczko July 16, 2018, 10:32 a.m. UTC | #2
On Mon, 16 Jul 2018 10:03:31 +0200, Chris Wilson  
<chris@chris-wilson.co.uk> wrote:

> On an aborted module load, we unwind and free our device private - but
> we left a dangling pointer to our privates inside the pci_device. After
> the attempted aborted unload, we may still get a call to  
> i915_pci_remove()
> when the module is removed, potentially chasing stale data.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_drv.c |  1 +
>  drivers/gpu/drm/i915/i915_pci.c | 13 ++++++++++++-
>  2 files changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c  
> b/drivers/gpu/drm/i915/i915_drv.c
> index 700ffb611187..3834bd758a2e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1424,6 +1424,7 @@ int i915_driver_load(struct pci_dev *pdev, const  
> struct pci_device_id *ent)
>  	drm_dev_fini(&dev_priv->drm);
>  out_free:
>  	kfree(dev_priv);
> +	pci_set_drvdata(pdev, NULL);
>  	return ret;
>  }
> diff --git a/drivers/gpu/drm/i915/i915_pci.c  
> b/drivers/gpu/drm/i915/i915_pci.c
> index 55543f1b0236..6a4d1388ad2d 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -674,10 +674,16 @@ MODULE_DEVICE_TABLE(pci, pciidlist);
> static void i915_pci_remove(struct pci_dev *pdev)
>  {
> -	struct drm_device *dev = pci_get_drvdata(pdev);
> +	struct drm_device *dev;
> +
> +	dev = pci_get_drvdata(pdev);
> +	if (!dev) /* driver load aborted, nothing to cleanup */
> +		return;
> 	i915_driver_unload(dev);
>  	drm_dev_put(dev);
> +
> +	pci_set_drvdata(pdev, NULL);

Why we can't put this inside i915_driver_release() ?
Then it will be in sync with kfree()

Michal

>  }
> static int i915_pci_probe(struct pci_dev *pdev, const struct  
> pci_device_id *ent)
> @@ -712,6 +718,11 @@ static int i915_pci_probe(struct pci_dev *pdev,  
> const struct pci_device_id *ent)
>  	if (err)
>  		return err;
> +	if (i915_inject_load_failure()) {
> +		i915_pci_remove(pdev);
> +		return -ENODEV;
> +	}
> +
>  	err = i915_live_selftests(pdev);
>  	if (err) {
>  		i915_pci_remove(pdev);
Chris Wilson July 16, 2018, 10:37 a.m. UTC | #3
Quoting Michal Wajdeczko (2018-07-16 11:32:22)
> On Mon, 16 Jul 2018 10:03:31 +0200, Chris Wilson  
> <chris@chris-wilson.co.uk> wrote:
> 
> > On an aborted module load, we unwind and free our device private - but
> > we left a dangling pointer to our privates inside the pci_device. After
> > the attempted aborted unload, we may still get a call to  
> > i915_pci_remove()
> > when the module is removed, potentially chasing stale data.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.c |  1 +
> >  drivers/gpu/drm/i915/i915_pci.c | 13 ++++++++++++-
> >  2 files changed, 13 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c  
> > b/drivers/gpu/drm/i915/i915_drv.c
> > index 700ffb611187..3834bd758a2e 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -1424,6 +1424,7 @@ int i915_driver_load(struct pci_dev *pdev, const  
> > struct pci_device_id *ent)
> >       drm_dev_fini(&dev_priv->drm);
> >  out_free:
> >       kfree(dev_priv);
> > +     pci_set_drvdata(pdev, NULL);
> >       return ret;
> >  }
> > diff --git a/drivers/gpu/drm/i915/i915_pci.c  
> > b/drivers/gpu/drm/i915/i915_pci.c
> > index 55543f1b0236..6a4d1388ad2d 100644
> > --- a/drivers/gpu/drm/i915/i915_pci.c
> > +++ b/drivers/gpu/drm/i915/i915_pci.c
> > @@ -674,10 +674,16 @@ MODULE_DEVICE_TABLE(pci, pciidlist);
> > static void i915_pci_remove(struct pci_dev *pdev)
> >  {
> > -     struct drm_device *dev = pci_get_drvdata(pdev);
> > +     struct drm_device *dev;
> > +
> > +     dev = pci_get_drvdata(pdev);
> > +     if (!dev) /* driver load aborted, nothing to cleanup */
> > +             return;
> >       i915_driver_unload(dev);
> >       drm_dev_put(dev);
> > +
> > +     pci_set_drvdata(pdev, NULL);
> 
> Why we can't put this inside i915_driver_release() ?
> Then it will be in sync with kfree()

It pairs here with the pci remove. The driver release callback (being
the kref_put release) can happen later. That was my other choice, but I
went with doing it here because this we have the pci device here and the
synergy with the get+test followed set. My thinking was that if we tried
to dig out the pci device in a later release function (or pci callback
after remove), that should be clearly erroneous.
-Chris
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 700ffb611187..3834bd758a2e 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1424,6 +1424,7 @@  int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
 	drm_dev_fini(&dev_priv->drm);
 out_free:
 	kfree(dev_priv);
+	pci_set_drvdata(pdev, NULL);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 55543f1b0236..6a4d1388ad2d 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -674,10 +674,16 @@  MODULE_DEVICE_TABLE(pci, pciidlist);
 
 static void i915_pci_remove(struct pci_dev *pdev)
 {
-	struct drm_device *dev = pci_get_drvdata(pdev);
+	struct drm_device *dev;
+
+	dev = pci_get_drvdata(pdev);
+	if (!dev) /* driver load aborted, nothing to cleanup */
+		return;
 
 	i915_driver_unload(dev);
 	drm_dev_put(dev);
+
+	pci_set_drvdata(pdev, NULL);
 }
 
 static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
@@ -712,6 +718,11 @@  static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		return err;
 
+	if (i915_inject_load_failure()) {
+		i915_pci_remove(pdev);
+		return -ENODEV;
+	}
+
 	err = i915_live_selftests(pdev);
 	if (err) {
 		i915_pci_remove(pdev);