diff mbox series

btrfs: fix lockdep warnings on io_uring encoded reads

Message ID 20241112160055.1829361-1-maharmstone@fb.com (mailing list archive)
State New
Headers show
Series btrfs: fix lockdep warnings on io_uring encoded reads | expand

Commit Message

Mark Harmstone Nov. 12, 2024, 4 p.m. UTC
Lockdep doesn't like the fact that btrfs_uring_read_extent() returns to
userspace still holding the inode lock, even though we release it once
the I/O finishes. Add calls to rwsem_release() and rwsem_acquire_read() to
work round this.

Signed-off-by: Mark Harmstone <maharmstone@fb.com>
Reported-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/ioctl.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

Comments

Johannes Thumshirn Nov. 13, 2024, 8:21 a.m. UTC | #1
On 12.11.24 17:01, Mark Harmstone wrote:
> Lockdep doesn't like the fact that btrfs_uring_read_extent() returns to
> userspace still holding the inode lock, even though we release it once
> the I/O finishes. Add calls to rwsem_release() and rwsem_acquire_read() to
> work round this.
> 
> Signed-off-by: Mark Harmstone <maharmstone@fb.com>
> Reported-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>   fs/btrfs/ioctl.c | 14 ++++++++++++++
>   1 file changed, 14 insertions(+)
> 
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 1fdeb216bf6c..6ea01e4f940e 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -4752,6 +4752,11 @@ static void btrfs_uring_read_finished(struct io_uring_cmd *cmd, unsigned int iss
>   	size_t page_offset;
>   	ssize_t ret;
>   
> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +	/* The inode lock has already been acquired in btrfs_uring_read_extent.  */
> +	rwsem_acquire_read(&inode->vfs_inode.i_rwsem.dep_map, 0, 0, _THIS_IP_);
> +#endif
> +
>   	if (priv->err) {
>   		ret = priv->err;
>   		goto out;
> @@ -4860,6 +4865,15 @@ static int btrfs_uring_read_extent(struct kiocb *iocb, struct iov_iter *iter,
>   	 * and inode and freeing the allocations.
>   	 */
>   
> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +	/*
> +	 * We're returning to userspace with the inode lock held, and that's
> +	 * okay - it'll get unlocked in a kthread.  Call rwsem_release to
> +	 * avoid confusing lockdep.
> +	 */
> +	rwsem_release(&inode->vfs_inode.i_rwsem.dep_map, _THIS_IP_);
> +#endif
> +
>   	return -EIOCBQUEUED;
>   
>   out_fail:

Can't say anything about the correctness (as I have no clue), but we 
have wrappers around rwsem_release (btrfs_lockdep_release()) and 
rwsem_acquire_read (btrfs_lockdep_acquire()) that I think serve the 
documentation purpose.
Mark Harmstone Nov. 13, 2024, 10:24 a.m. UTC | #2
On 13/11/24 08:21, Johannes Thumshirn wrote:
> > 
> On 12.11.24 17:01, Mark Harmstone wrote:
>> Lockdep doesn't like the fact that btrfs_uring_read_extent() returns to
>> userspace still holding the inode lock, even though we release it once
>> the I/O finishes. Add calls to rwsem_release() and rwsem_acquire_read() to
>> work round this.
>>
>> Signed-off-by: Mark Harmstone <maharmstone@fb.com>
>> Reported-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
>> ---
>>    fs/btrfs/ioctl.c | 14 ++++++++++++++
>>    1 file changed, 14 insertions(+)
>>
>> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
>> index 1fdeb216bf6c..6ea01e4f940e 100644
>> --- a/fs/btrfs/ioctl.c
>> +++ b/fs/btrfs/ioctl.c
>> @@ -4752,6 +4752,11 @@ static void btrfs_uring_read_finished(struct io_uring_cmd *cmd, unsigned int iss
>>    	size_t page_offset;
>>    	ssize_t ret;
>>    
>> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
>> +	/* The inode lock has already been acquired in btrfs_uring_read_extent.  */
>> +	rwsem_acquire_read(&inode->vfs_inode.i_rwsem.dep_map, 0, 0, _THIS_IP_);
>> +#endif
>> +
>>    	if (priv->err) {
>>    		ret = priv->err;
>>    		goto out;
>> @@ -4860,6 +4865,15 @@ static int btrfs_uring_read_extent(struct kiocb *iocb, struct iov_iter *iter,
>>    	 * and inode and freeing the allocations.
>>    	 */
>>    
>> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
>> +	/*
>> +	 * We're returning to userspace with the inode lock held, and that's
>> +	 * okay - it'll get unlocked in a kthread.  Call rwsem_release to
>> +	 * avoid confusing lockdep.
>> +	 */
>> +	rwsem_release(&inode->vfs_inode.i_rwsem.dep_map, _THIS_IP_);
>> +#endif
>> +
>>    	return -EIOCBQUEUED;
>>    
>>    out_fail:
> 
> Can't say anything about the correctness (as I have no clue), but we
> have wrappers around rwsem_release (btrfs_lockdep_release()) and
> rwsem_acquire_read (btrfs_lockdep_acquire()) that I think serve the
> documentation purpose.

btrfs_lockdep_acquire(a,b) calls rwsem_acquire_read on &a->b_map, so 
can't be used for something located at &inode->vfs_inode.i_rwsem.dep_map.

Mark
Johannes Thumshirn Nov. 13, 2024, 10:26 a.m. UTC | #3
On 13.11.24 11:24, Mark Harmstone wrote:
> On 13/11/24 08:21, Johannes Thumshirn wrote:
>>>
>> On 12.11.24 17:01, Mark Harmstone wrote:
>>> Lockdep doesn't like the fact that btrfs_uring_read_extent() returns to
>>> userspace still holding the inode lock, even though we release it once
>>> the I/O finishes. Add calls to rwsem_release() and rwsem_acquire_read() to
>>> work round this.
>>>
>>> Signed-off-by: Mark Harmstone <maharmstone@fb.com>
>>> Reported-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
>>> ---
>>>     fs/btrfs/ioctl.c | 14 ++++++++++++++
>>>     1 file changed, 14 insertions(+)
>>>
>>> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
>>> index 1fdeb216bf6c..6ea01e4f940e 100644
>>> --- a/fs/btrfs/ioctl.c
>>> +++ b/fs/btrfs/ioctl.c
>>> @@ -4752,6 +4752,11 @@ static void btrfs_uring_read_finished(struct io_uring_cmd *cmd, unsigned int iss
>>>     	size_t page_offset;
>>>     	ssize_t ret;
>>>     
>>> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
>>> +	/* The inode lock has already been acquired in btrfs_uring_read_extent.  */
>>> +	rwsem_acquire_read(&inode->vfs_inode.i_rwsem.dep_map, 0, 0, _THIS_IP_);
>>> +#endif
>>> +
>>>     	if (priv->err) {
>>>     		ret = priv->err;
>>>     		goto out;
>>> @@ -4860,6 +4865,15 @@ static int btrfs_uring_read_extent(struct kiocb *iocb, struct iov_iter *iter,
>>>     	 * and inode and freeing the allocations.
>>>     	 */
>>>     
>>> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
>>> +	/*
>>> +	 * We're returning to userspace with the inode lock held, and that's
>>> +	 * okay - it'll get unlocked in a kthread.  Call rwsem_release to
>>> +	 * avoid confusing lockdep.
>>> +	 */
>>> +	rwsem_release(&inode->vfs_inode.i_rwsem.dep_map, _THIS_IP_);
>>> +#endif
>>> +
>>>     	return -EIOCBQUEUED;
>>>     
>>>     out_fail:
>>
>> Can't say anything about the correctness (as I have no clue), but we
>> have wrappers around rwsem_release (btrfs_lockdep_release()) and
>> rwsem_acquire_read (btrfs_lockdep_acquire()) that I think serve the
>> documentation purpose.
> 
> btrfs_lockdep_acquire(a,b) calls rwsem_acquire_read on &a->b_map, so
> can't be used for something located at &inode->vfs_inode.i_rwsem.dep_map.

Ah my bad, sorry.
David Sterba Nov. 14, 2024, 3:06 p.m. UTC | #4
On Tue, Nov 12, 2024 at 04:00:49PM +0000, Mark Harmstone wrote:
> Lockdep doesn't like the fact that btrfs_uring_read_extent() returns to
> userspace still holding the inode lock, even though we release it once
> the I/O finishes. Add calls to rwsem_release() and rwsem_acquire_read() to
> work round this.
> 
> Signed-off-by: Mark Harmstone <maharmstone@fb.com>
> Reported-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
> ---
>  fs/btrfs/ioctl.c | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
> index 1fdeb216bf6c..6ea01e4f940e 100644
> --- a/fs/btrfs/ioctl.c
> +++ b/fs/btrfs/ioctl.c
> @@ -4752,6 +4752,11 @@ static void btrfs_uring_read_finished(struct io_uring_cmd *cmd, unsigned int iss
>  	size_t page_offset;
>  	ssize_t ret;
>  
> +#ifdef CONFIG_DEBUG_LOCK_ALLOC
> +	/* The inode lock has already been acquired in btrfs_uring_read_extent.  */
> +	rwsem_acquire_read(&inode->vfs_inode.i_rwsem.dep_map, 0, 0, _THIS_IP_);
> +#endif

Please put that to a wrapper, we want to avoid #ifdefs in the middle of
functions (there are exceptions), especially when the macro is for lock
debugging.

The wrapper name can follow naming and syntax of the other
btrfs_lockde_* wrappers, so like btrfs_lockdep_inode_acquire(owner,
lock). There is only one rwsem in inode, but for clarity and consistency
I think it makes sense.

btrfs_lockdep_inode_acquire(inode, i_rwsem);
diff mbox series

Patch

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 1fdeb216bf6c..6ea01e4f940e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4752,6 +4752,11 @@  static void btrfs_uring_read_finished(struct io_uring_cmd *cmd, unsigned int iss
 	size_t page_offset;
 	ssize_t ret;
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	/* The inode lock has already been acquired in btrfs_uring_read_extent.  */
+	rwsem_acquire_read(&inode->vfs_inode.i_rwsem.dep_map, 0, 0, _THIS_IP_);
+#endif
+
 	if (priv->err) {
 		ret = priv->err;
 		goto out;
@@ -4860,6 +4865,15 @@  static int btrfs_uring_read_extent(struct kiocb *iocb, struct iov_iter *iter,
 	 * and inode and freeing the allocations.
 	 */
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	/*
+	 * We're returning to userspace with the inode lock held, and that's
+	 * okay - it'll get unlocked in a kthread.  Call rwsem_release to
+	 * avoid confusing lockdep.
+	 */
+	rwsem_release(&inode->vfs_inode.i_rwsem.dep_map, _THIS_IP_);
+#endif
+
 	return -EIOCBQUEUED;
 
 out_fail: