diff mbox series

dma-buf: add attachments empty check for dma_buf_release

Message ID 20211019122345.160555-1-guangming.cao@mediatek.com (mailing list archive)
State New, archived
Headers show
Series dma-buf: add attachments empty check for dma_buf_release | expand

Commit Message

Guangming.Cao Oct. 19, 2021, 12:23 p.m. UTC
From: Guangming Cao <Guangming.Cao@mediatek.com>

Since there is no mandatory inspection for attachments in dma_buf_release.
There will be a case that dma_buf already released but attachment is still
in use, which can points to the dmabuf, and it maybe cause
some unexpected issues.

With IOMMU, when this cases occurs, there will have IOMMU address
translation fault(s) followed by this warning,
I think it's useful for dma devices to debug issue.

Signed-off-by: Guangming Cao <Guangming.Cao@mediatek.com>
---
 drivers/dma-buf/dma-buf.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

Comments

Daniel Vetter Oct. 19, 2021, 12:41 p.m. UTC | #1
On Tue, Oct 19, 2021 at 08:23:45PM +0800, guangming.cao@mediatek.com wrote:
> From: Guangming Cao <Guangming.Cao@mediatek.com>
> 
> Since there is no mandatory inspection for attachments in dma_buf_release.
> There will be a case that dma_buf already released but attachment is still
> in use, which can points to the dmabuf, and it maybe cause
> some unexpected issues.
> 
> With IOMMU, when this cases occurs, there will have IOMMU address
> translation fault(s) followed by this warning,
> I think it's useful for dma devices to debug issue.
> 
> Signed-off-by: Guangming Cao <Guangming.Cao@mediatek.com>

This feels a lot like hand-rolling kobject debugging. If you want to do
this then I think adding kobject debug support to
dma_buf/dma_buf_attachment would be better than hand-rolling something
bespoke here.

Also on the patch itself: You don't need the trylock. For correctly
working code non one else can get at the dma-buf, so no locking needed to
iterate through the attachment list. For incorrect code the kernel will be
on fire pretty soon anyway, trying to do locking won't help :-) And
without the trylock we can catch more bugs (e.g. if you also forgot to
unlock and not just forgot to detach).
-Daniel

> ---
>  drivers/dma-buf/dma-buf.c | 23 +++++++++++++++++++++++
>  1 file changed, 23 insertions(+)
> 
> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> index 511fe0d217a0..672404857d6a 100644
> --- a/drivers/dma-buf/dma-buf.c
> +++ b/drivers/dma-buf/dma-buf.c
> @@ -74,6 +74,29 @@ static void dma_buf_release(struct dentry *dentry)
>  	 */
>  	BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active);
>  
> +	/* attachment check */
> +	if (dma_resv_trylock(dmabuf->resv) && WARN(!list_empty(&dmabuf->attachments),
> +	    "%s err, inode:%08lu size:%08zu name:%s exp_name:%s flags:0x%08x mode:0x%08x, %s\n",
> +	    __func__, file_inode(dmabuf->file)->i_ino, dmabuf->size,
> +	    dmabuf->name, dmabuf->exp_name,
> +	    dmabuf->file->f_flags, dmabuf->file->f_mode,
> +	    "Release dmabuf before detach all attachments, dump attach:\n")) {
> +		int attach_cnt = 0;
> +		dma_addr_t dma_addr;
> +		struct dma_buf_attachment *attach_obj;
> +		/* dump all attachment info */
> +		list_for_each_entry(attach_obj, &dmabuf->attachments, node) {
> +			dma_addr = (dma_addr_t)0;
> +			if (attach_obj->sgt)
> +				dma_addr = sg_dma_address(attach_obj->sgt->sgl);
> +			pr_err("attach[%d]: dev:%s dma_addr:0x%-12lx\n",
> +			       attach_cnt, dev_name(attach_obj->dev), dma_addr);
> +			attach_cnt++;
> +		}
> +		pr_err("Total %d devices attached\n\n", attach_cnt);
> +		dma_resv_unlock(dmabuf->resv);
> +	}
> +
>  	dmabuf->ops->release(dmabuf);
>  
>  	if (dmabuf->resv == (struct dma_resv *)&dmabuf[1])
> -- 
> 2.17.1
>
Christian König Oct. 19, 2021, 3:37 p.m. UTC | #2
Am 19.10.21 um 14:41 schrieb Daniel Vetter:
> On Tue, Oct 19, 2021 at 08:23:45PM +0800, guangming.cao@mediatek.com wrote:
>> From: Guangming Cao <Guangming.Cao@mediatek.com>
>>
>> Since there is no mandatory inspection for attachments in dma_buf_release.
>> There will be a case that dma_buf already released but attachment is still
>> in use, which can points to the dmabuf, and it maybe cause
>> some unexpected issues.
>>
>> With IOMMU, when this cases occurs, there will have IOMMU address
>> translation fault(s) followed by this warning,
>> I think it's useful for dma devices to debug issue.
>>
>> Signed-off-by: Guangming Cao <Guangming.Cao@mediatek.com>
> This feels a lot like hand-rolling kobject debugging. If you want to do
> this then I think adding kobject debug support to
> dma_buf/dma_buf_attachment would be better than hand-rolling something
> bespoke here.

Well I would call that overkill.

>
> Also on the patch itself: You don't need the trylock. For correctly
> working code non one else can get at the dma-buf, so no locking needed to
> iterate through the attachment list. For incorrect code the kernel will be
> on fire pretty soon anyway, trying to do locking won't help :-) And
> without the trylock we can catch more bugs (e.g. if you also forgot to
> unlock and not just forgot to detach).

You also don't need the WARN(!list_empty...) because a few line below we 
already have a "WARN_ON(!list_empty(&dmabuf->attachments));".

Christian.

> -Daniel
>
>> ---
>>   drivers/dma-buf/dma-buf.c | 23 +++++++++++++++++++++++
>>   1 file changed, 23 insertions(+)
>>
>> diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
>> index 511fe0d217a0..672404857d6a 100644
>> --- a/drivers/dma-buf/dma-buf.c
>> +++ b/drivers/dma-buf/dma-buf.c
>> @@ -74,6 +74,29 @@ static void dma_buf_release(struct dentry *dentry)
>>   	 */
>>   	BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active);
>>   
>> +	/* attachment check */
>> +	if (dma_resv_trylock(dmabuf->resv) && WARN(!list_empty(&dmabuf->attachments),
>> +	    "%s err, inode:%08lu size:%08zu name:%s exp_name:%s flags:0x%08x mode:0x%08x, %s\n",
>> +	    __func__, file_inode(dmabuf->file)->i_ino, dmabuf->size,
>> +	    dmabuf->name, dmabuf->exp_name,
>> +	    dmabuf->file->f_flags, dmabuf->file->f_mode,
>> +	    "Release dmabuf before detach all attachments, dump attach:\n")) {
>> +		int attach_cnt = 0;
>> +		dma_addr_t dma_addr;
>> +		struct dma_buf_attachment *attach_obj;
>> +		/* dump all attachment info */
>> +		list_for_each_entry(attach_obj, &dmabuf->attachments, node) {
>> +			dma_addr = (dma_addr_t)0;
>> +			if (attach_obj->sgt)
>> +				dma_addr = sg_dma_address(attach_obj->sgt->sgl);
>> +			pr_err("attach[%d]: dev:%s dma_addr:0x%-12lx\n",
>> +			       attach_cnt, dev_name(attach_obj->dev), dma_addr);
>> +			attach_cnt++;
>> +		}
>> +		pr_err("Total %d devices attached\n\n", attach_cnt);
>> +		dma_resv_unlock(dmabuf->resv);
>> +	}
>> +
>>   	dmabuf->ops->release(dmabuf);
>>   
>>   	if (dmabuf->resv == (struct dma_resv *)&dmabuf[1])
>> -- 
>> 2.17.1
>>
Daniel Vetter Oct. 19, 2021, 9:11 p.m. UTC | #3
On Tue, Oct 19, 2021 at 05:37:27PM +0200, Christian König wrote:
> 
> 
> Am 19.10.21 um 14:41 schrieb Daniel Vetter:
> > On Tue, Oct 19, 2021 at 08:23:45PM +0800, guangming.cao@mediatek.com wrote:
> > > From: Guangming Cao <Guangming.Cao@mediatek.com>
> > > 
> > > Since there is no mandatory inspection for attachments in dma_buf_release.
> > > There will be a case that dma_buf already released but attachment is still
> > > in use, which can points to the dmabuf, and it maybe cause
> > > some unexpected issues.
> > > 
> > > With IOMMU, when this cases occurs, there will have IOMMU address
> > > translation fault(s) followed by this warning,
> > > I think it's useful for dma devices to debug issue.
> > > 
> > > Signed-off-by: Guangming Cao <Guangming.Cao@mediatek.com>
> > This feels a lot like hand-rolling kobject debugging. If you want to do
> > this then I think adding kobject debug support to
> > dma_buf/dma_buf_attachment would be better than hand-rolling something
> > bespoke here.
> 
> Well I would call that overkill.

I think if done right the object debug stuff should be able to give you a
backtrace. Which might be useful if you have a dma-buf heaps design where
you really have no clue why a buffer was allocated/attached without some
hints.

> > Also on the patch itself: You don't need the trylock. For correctly
> > working code non one else can get at the dma-buf, so no locking needed to
> > iterate through the attachment list. For incorrect code the kernel will be
> > on fire pretty soon anyway, trying to do locking won't help :-) And
> > without the trylock we can catch more bugs (e.g. if you also forgot to
> > unlock and not just forgot to detach).
> 
> You also don't need the WARN(!list_empty...) because a few line below we
> already have a "WARN_ON(!list_empty(&dmabuf->attachments));".

Yeah this patch here alone isn't really that useful I think. Maybe we
could add the dmabuf->exp_name or so to that warning, but otherwise the
info printed here isn't all that useful for debugging. Grabbing a
backtrace of the allocator or attacher otoh should fairly immedialy point
at the buggy code.
-Daniel

> 
> Christian.
> 
> > -Daniel
> > 
> > > ---
> > >   drivers/dma-buf/dma-buf.c | 23 +++++++++++++++++++++++
> > >   1 file changed, 23 insertions(+)
> > > 
> > > diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
> > > index 511fe0d217a0..672404857d6a 100644
> > > --- a/drivers/dma-buf/dma-buf.c
> > > +++ b/drivers/dma-buf/dma-buf.c
> > > @@ -74,6 +74,29 @@ static void dma_buf_release(struct dentry *dentry)
> > >   	 */
> > >   	BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active);
> > > +	/* attachment check */
> > > +	if (dma_resv_trylock(dmabuf->resv) && WARN(!list_empty(&dmabuf->attachments),
> > > +	    "%s err, inode:%08lu size:%08zu name:%s exp_name:%s flags:0x%08x mode:0x%08x, %s\n",
> > > +	    __func__, file_inode(dmabuf->file)->i_ino, dmabuf->size,
> > > +	    dmabuf->name, dmabuf->exp_name,
> > > +	    dmabuf->file->f_flags, dmabuf->file->f_mode,
> > > +	    "Release dmabuf before detach all attachments, dump attach:\n")) {
> > > +		int attach_cnt = 0;
> > > +		dma_addr_t dma_addr;
> > > +		struct dma_buf_attachment *attach_obj;
> > > +		/* dump all attachment info */
> > > +		list_for_each_entry(attach_obj, &dmabuf->attachments, node) {
> > > +			dma_addr = (dma_addr_t)0;
> > > +			if (attach_obj->sgt)
> > > +				dma_addr = sg_dma_address(attach_obj->sgt->sgl);
> > > +			pr_err("attach[%d]: dev:%s dma_addr:0x%-12lx\n",
> > > +			       attach_cnt, dev_name(attach_obj->dev), dma_addr);
> > > +			attach_cnt++;
> > > +		}
> > > +		pr_err("Total %d devices attached\n\n", attach_cnt);
> > > +		dma_resv_unlock(dmabuf->resv);
> > > +	}
> > > +
> > >   	dmabuf->ops->release(dmabuf);
> > >   	if (dmabuf->resv == (struct dma_resv *)&dmabuf[1])
> > > -- 
> > > 2.17.1
> > > 
>
Guangming.Cao Oct. 26, 2021, 8:52 a.m. UTC | #4
From: Guangming Cao <Guangming.Cao@mediatek.com>

On Tue, 2021-10-19 at 23:11 +0200, Daniel Vetter wrote:
> On Tue, Oct 19, 2021 at 05:37:27PM +0200, Christian K鰊ig wrote:
> > 
> > 
> > Am 19.10.21 um 14:41 schrieb Daniel Vetter:
> > > On Tue, Oct 19, 2021 at 08:23:45PM +0800, 
> > > guangming.cao@mediatek.com wrote:
> > > > From: Guangming Cao <Guangming.Cao@mediatek.com>
> > > > 
> > > > Since there is no mandatory inspection for attachments in
> > > > dma_buf_release.
> > > > There will be a case that dma_buf already released but
> > > > attachment is still
> > > > in use, which can points to the dmabuf, and it maybe cause
> > > > some unexpected issues.
> > > > 
> > > > With IOMMU, when this cases occurs, there will have IOMMU
> > > > address
> > > > translation fault(s) followed by this warning,
> > > > I think it's useful for dma devices to debug issue.
> > > > 
> > > > Signed-off-by: Guangming Cao <Guangming.Cao@mediatek.com>
> > > 
> > > This feels a lot like hand-rolling kobject debugging. If you want
> > > to do
> > > this then I think adding kobject debug support to
> > > dma_buf/dma_buf_attachment would be better than hand-rolling
> > > something
> > > bespoke here.
> > 
> > Well I would call that overkill.
> 
> I think if done right the object debug stuff should be able to give
> you a
> backtrace. Which might be useful if you have a dma-buf heaps design
> where
> you really have no clue why a buffer was allocated/attached without
> some
> hints.
Well, I think it's the finally solution, for current thinking, it maybe bring a high
overloading. Just as this revert patch: 
https://lore.kernel.org/lkml/CA+wgaPMHA+8+LxfGNL+q4=XrdXqfu4TXoWLX7e28z9Z7kPsf-w@mail.gmail.com/
So, we need to find a lightweight way to do this.

Guangming
> 
> > > Also on the patch itself: You don't need the trylock. For
> > > correctly
> > > working code non one else can get at the dma-buf, so no locking
> > > needed to
> > > iterate through the attachment list. For incorrect code the
> > > kernel will be
> > > on fire pretty soon anyway, trying to do locking won't help :-)
> > > And
> > > without the trylock we can catch more bugs (e.g. if you also
> > > forgot to
> > > unlock and not just forgot to detach).

Yes, It's also a error case, I will remove to lock at next version patch. Thanks!

Guangming

> > 
> > You also don't need the WARN(!list_empty...) because a few line
> > below we
> > already have a "WARN_ON(!list_empty(&dmabuf->attachments));".

Sorry, could you tell me wich function will check it?
I didn't found it so I submit this patch.

Guangming
> 
> Yeah this patch here alone isn't really that useful I think. Maybe we
> could add the dmabuf->exp_name or so to that warning, but otherwise
> the
> info printed here isn't all that useful for debugging. Grabbing a

I also printed dmabuf->exp_name in warn message.

The reason adding it here is that some users on ANDROID of dma-buf is not familiar
with linux dma-buf or maybe write some problematic code, add this check can find
who lost call get_dma_buf or any other api can let let the dma-buf lifecycle is
under users' expectation.
Add it just like check in dma-fence:
https://github.com/torvalds/linux/blob/master/drivers/dma-buf/dma-fence.c#L519

Do you have any suggestion to debug this part?

Guangming

> backtrace of the allocator or attacher otoh should fairly immedialy
> point
> at the buggy code.
> -Daniel
> 
> > 
> > Christian.
> > 
> > > -Daniel
> > > 
> > > > ---
> > > >   drivers/dma-buf/dma-buf.c | 23 +++++++++++++++++++++++
> > > >   1 file changed, 23 insertions(+)
> > > > 
> > > > diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-
> > > > buf.c
> > > > index 511fe0d217a0..672404857d6a 100644
> > > > --- a/drivers/dma-buf/dma-buf.c
> > > > +++ b/drivers/dma-buf/dma-buf.c
> > > > @@ -74,6 +74,29 @@ static void dma_buf_release(struct dentry
> > > > *dentry)
> > > >   	 */
> > > >   	BUG_ON(dmabuf->cb_shared.active || dmabuf-
> > > > >cb_excl.active);
> > > > +	/* attachment check */
> > > > +	if (dma_resv_trylock(dmabuf->resv) &&
> > > > WARN(!list_empty(&dmabuf->attachments),
> > > > +	    "%s err, inode:%08lu size:%08zu name:%s exp_name:%s
> > > > flags:0x%08x mode:0x%08x, %s\n",
> > > > +	    __func__, file_inode(dmabuf->file)->i_ino, dmabuf-
> > > > >size,
> > > > +	    dmabuf->name, dmabuf->exp_name,
> > > > +	    dmabuf->file->f_flags, dmabuf->file->f_mode,
> > > > +	    "Release dmabuf before detach all attachments, dump
> > > > attach:\n")) {
> > > > +		int attach_cnt = 0;
> > > > +		dma_addr_t dma_addr;
> > > > +		struct dma_buf_attachment *attach_obj;
> > > > +		/* dump all attachment info */
> > > > +		list_for_each_entry(attach_obj, &dmabuf-
> > > > >attachments, node) {
> > > > +			dma_addr = (dma_addr_t)0;
> > > > +			if (attach_obj->sgt)
> > > > +				dma_addr =
> > > > sg_dma_address(attach_obj->sgt->sgl);
> > > > +			pr_err("attach[%d]: dev:%s
> > > > dma_addr:0x%-12lx\n",
> > > > +			       attach_cnt, dev_name(attach_obj-
> > > > >dev), dma_addr);
> > > > +			attach_cnt++;
> > > > +		}
> > > > +		pr_err("Total %d devices attached\n\n",
> > > > attach_cnt);
> > > > +		dma_resv_unlock(dmabuf->resv);
> > > > +	}
> > > > +
> > > >   	dmabuf->ops->release(dmabuf);
> > > >   	if (dmabuf->resv == (struct dma_resv *)&dmabuf[1])
> > > > -- 
> > > > 2.17.1
> > > > 
> 
>
diff mbox series

Patch

diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 511fe0d217a0..672404857d6a 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -74,6 +74,29 @@  static void dma_buf_release(struct dentry *dentry)
 	 */
 	BUG_ON(dmabuf->cb_shared.active || dmabuf->cb_excl.active);
 
+	/* attachment check */
+	if (dma_resv_trylock(dmabuf->resv) && WARN(!list_empty(&dmabuf->attachments),
+	    "%s err, inode:%08lu size:%08zu name:%s exp_name:%s flags:0x%08x mode:0x%08x, %s\n",
+	    __func__, file_inode(dmabuf->file)->i_ino, dmabuf->size,
+	    dmabuf->name, dmabuf->exp_name,
+	    dmabuf->file->f_flags, dmabuf->file->f_mode,
+	    "Release dmabuf before detach all attachments, dump attach:\n")) {
+		int attach_cnt = 0;
+		dma_addr_t dma_addr;
+		struct dma_buf_attachment *attach_obj;
+		/* dump all attachment info */
+		list_for_each_entry(attach_obj, &dmabuf->attachments, node) {
+			dma_addr = (dma_addr_t)0;
+			if (attach_obj->sgt)
+				dma_addr = sg_dma_address(attach_obj->sgt->sgl);
+			pr_err("attach[%d]: dev:%s dma_addr:0x%-12lx\n",
+			       attach_cnt, dev_name(attach_obj->dev), dma_addr);
+			attach_cnt++;
+		}
+		pr_err("Total %d devices attached\n\n", attach_cnt);
+		dma_resv_unlock(dmabuf->resv);
+	}
+
 	dmabuf->ops->release(dmabuf);
 
 	if (dmabuf->resv == (struct dma_resv *)&dmabuf[1])