diff mbox series

block: work around sparse in queue_limits_commit_update

Message ID 20240405085018.243260-1-hch@lst.de (mailing list archive)
State New
Headers show
Series block: work around sparse in queue_limits_commit_update | expand

Commit Message

Christoph Hellwig April 5, 2024, 8:50 a.m. UTC
The spare lock context tracking warns about the mutex unlock in
queue_limits_commit_update despite the __releases annoation:

block/blk-settings.c:263:9: warning: context imbalance in 'queue_limits_commit_update' - wrong
count at exit

As far as I can tell that is because the sparse lock tracking code is
busted for inline functions.  Work around it by splitting an inline
wrapper out of queue_limits_commit_update and doing the unlock there.

Reported-by: John Garry <john.g.garry@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-settings.c   | 40 +++++++++++++++++-----------------------
 include/linux/blkdev.h | 32 +++++++++++++++++++++++++++++---
 2 files changed, 46 insertions(+), 26 deletions(-)

Comments

John Garry April 5, 2024, 12:31 p.m. UTC | #1
On 05/04/2024 09:50, Christoph Hellwig wrote:
> The spare lock context tracking warns about the mutex unlock in
> queue_limits_commit_update despite the __releases annoation:
> 
> block/blk-settings.c:263:9: warning: context imbalance in 'queue_limits_commit_update' - wrong
> count at exit
> 
> As far as I can tell that is because the sparse lock tracking code is
> busted for inline functions.  Work around it by splitting an inline
> wrapper out of queue_limits_commit_update and doing the unlock there.

I find that just removing the __releases() from 
queue_limits_commit_update makes it go away.

I have been playing around with this and I can't see why.

I'm not convinced that mutexes are handled properly by sparse.

Here is a similar issue for net code:

john@localhost:~/mnt_sda4/john/mkp-scsi> make C=2 net/core/sock.o
   CHECK   scripts/mod/empty.c
   CALL    scripts/checksyscalls.sh
   DESCEND objtool
   INSTALL libsubcmd_headers
   CHECK   net/core/sock.c
net/core/sock.c:2393:9: warning: context imbalance in 'sk_clone_lock' - 
wrong count at exit
net/core/sock.c:2397:6: warning: context imbalance in 
'sk_free_unlock_clone' - unexpected unlock
net/core/sock.c:4034:13: warning: context imbalance in 'proto_seq_start' 
- wrong count at exit
net/core/sock.c:4046:13: warning: context imbalance in 'proto_seq_stop' 
- wrong count at exit
john@localhost:~/mnt_sda4/john/mkp-scsi>


static void proto_seq_stop(struct seq_file *seq, void *v)
	__releases(proto_list_mutex)
{
	mutex_unlock(&proto_list_mutex);
}

That seems similar to the queue_limits_commit_update problem, but no 
inlining.

Changing the q->limits_lock to a semaphore seems to make the issue go 
away; but how to annotate queue_limits_set() is tricky regardless, as it 
acquires and then releases silently.

Anyway, changing the code, below, for sparse when it seems somewhat 
broken/unreliable may not be the best approach.

Thanks,
John

> 
> Reported-by: John Garry <john.g.garry@oracle.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>   block/blk-settings.c   | 40 +++++++++++++++++-----------------------
>   include/linux/blkdev.h | 32 +++++++++++++++++++++++++++++---
>   2 files changed, 46 insertions(+), 26 deletions(-)
> 
> diff --git a/block/blk-settings.c b/block/blk-settings.c
> index cdbaef159c4bc3..9ef52b80965dc1 100644
> --- a/block/blk-settings.c
> +++ b/block/blk-settings.c
> @@ -239,31 +239,20 @@ int blk_set_default_limits(struct queue_limits *lim)
>   	return blk_validate_limits(lim);
>   }
>   
> -/**
> - * queue_limits_commit_update - commit an atomic update of queue limits
> - * @q:		queue to update
> - * @lim:	limits to apply
> - *
> - * Apply the limits in @lim that were obtained from queue_limits_start_update()
> - * and updated by the caller to @q.
> - *
> - * Returns 0 if successful, else a negative error code.
> - */
> -int queue_limits_commit_update(struct request_queue *q,
> +int __queue_limits_commit_update(struct request_queue *q,
>   		struct queue_limits *lim)
> -	__releases(q->limits_lock)
>   {
> -	int error = blk_validate_limits(lim);
> -
> -	if (!error) {
> -		q->limits = *lim;
> -		if (q->disk)
> -			blk_apply_bdi_limits(q->disk->bdi, lim);
> -	}
> -	mutex_unlock(&q->limits_lock);
> -	return error;
> +	int error;
> +
> +	error = blk_validate_limits(lim);
> +	if (error)
> +		return error;
> +	q->limits = *lim;
> +	if (q->disk)
> +		blk_apply_bdi_limits(q->disk->bdi, lim);
> +	return 0;
>   }
> -EXPORT_SYMBOL_GPL(queue_limits_commit_update);
> +EXPORT_SYMBOL_GPL(__queue_limits_commit_update);
>   
>   /**
>    * queue_limits_set - apply queue limits to queue
> @@ -278,8 +267,13 @@ EXPORT_SYMBOL_GPL(queue_limits_commit_update);
>    */
>   int queue_limits_set(struct request_queue *q, struct queue_limits *lim)
>   {
> +	int error;
> +
>   	mutex_lock(&q->limits_lock);
> -	return queue_limits_commit_update(q, lim);
> +	error = __queue_limits_commit_update(q, lim);
> +	mutex_unlock(&q->limits_lock);
> +
> +	return error;
>   }
>   EXPORT_SYMBOL_GPL(queue_limits_set);
>   
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index c3e8f7cf96be9e..99f1d2fcec4a2a 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -869,6 +869,15 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset,
>   	return chunk_sectors - (offset & (chunk_sectors - 1));
>   }
>   
> +/*
> + * Note: we want queue_limits_start_update to be inline to avoid passing a huge
> + * strut by value.  This in turn requires the part of queue_limits_commit_update
> + * that unlocks the mutex to be inline as well to not confuse the sparse lock
> + * context tracking.  Never use __queue_limits_commit_update directly.
> + */
> +int __queue_limits_commit_update(struct request_queue *q,
> +		struct queue_limits *lim);
> +
>   /**
>    * queue_limits_start_update - start an atomic update of queue limits
>    * @q:		queue to update
> @@ -883,13 +892,30 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset,
>    */
>   static inline struct queue_limits
>   queue_limits_start_update(struct request_queue *q)
> -	__acquires(q->limits_lock)
>   {
>   	mutex_lock(&q->limits_lock);
>   	return q->limits;
>   }
> -int queue_limits_commit_update(struct request_queue *q,
> -		struct queue_limits *lim);
> +
> +/**
> + * queue_limits_commit_update - commit an atomic update of queue limits
> + * @q:		queue to update
> + * @lim:	limits to apply
> + *
> + * Apply the limits in @lim that were obtained from queue_limits_start_update()
> + * and updated by the caller to @q.
> + *
> + * Returns 0 if successful, else a negative error code.
> + */
> +static inline int queue_limits_commit_update(struct request_queue *q,
> +		struct queue_limits *lim)
> +{
> +	int error = __queue_limits_commit_update(q, lim);
> +
> +	mutex_unlock(&q->limits_lock);
> +	return error;
> +}
> +
>   int queue_limits_set(struct request_queue *q, struct queue_limits *lim);
>   
>   /*
Christoph Hellwig April 5, 2024, 2:38 p.m. UTC | #2
On Fri, Apr 05, 2024 at 01:31:10PM +0100, John Garry wrote:
> Anyway, changing the code, below, for sparse when it seems somewhat 
> broken/unreliable may not be the best approach.

Ok, let's skip this and I'll report a bug to the sparse maintainers
(unless you want to do that, in which case I'll happily leave it to
you).
John Garry April 5, 2024, 3:13 p.m. UTC | #3
On 05/04/2024 15:38, Christoph Hellwig wrote:
> On Fri, Apr 05, 2024 at 01:31:10PM +0100, John Garry wrote:
>> Anyway, changing the code, below, for sparse when it seems somewhat
>> broken/unreliable may not be the best approach.
> Ok, let's skip this and I'll report a bug to the sparse maintainers
> (unless you want to do that, in which case I'll happily leave it to
> you).

I can look at this issue a bit more and report a bug if I can't find the 
real cause of the problem.
John Garry April 5, 2024, 4:43 p.m. UTC | #4
On 05/04/2024 15:38, Christoph Hellwig wrote:
> On Fri, Apr 05, 2024 at 01:31:10PM +0100, John Garry wrote:
>> Anyway, changing the code, below, for sparse when it seems somewhat
>> broken/unreliable may not be the best approach.
> Ok, let's skip this and I'll report a bug to the sparse maintainers
> (unless you want to do that, in which case I'll happily leave it to
> you).

This actually looks like a kernel issue - that being that the mutex API 
is not annotated for lock checking.

For a reference, see this:
https://lore.kernel.org/lkml/cover.1579893447.git.jbi.octave@gmail.com/T/#mbb8bda6c0a7ca7ce19f46df976a8e3b489745488

As a quick hack to prove, you can try this for building blk-setting.c:

---->8----
diff --git a/block/blk-settings.c b/block/blk-settings.c
index cdbaef159c4b..c9da5549f3c2 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -277,6 +277,7 @@ EXPORT_SYMBOL_GPL(queue_limits_commit_update);
   * Returns 0 if successful, else a negative error code.
   */
  int queue_limits_set(struct request_queue *q, struct queue_limits *lim)
+__acquires(q->limits_lock)
  {
   mutex_lock(&q->limits_lock);
   return queue_limits_commit_update(q, lim);
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 67edc4ca2bee..af5d3e20553b 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -143,7 +143,7 @@ do { \
  } while (0)

  #else
-extern void mutex_lock(struct mutex *lock);
+extern __lockfunc void mutex_lock(struct mutex *lock) __acquires(lock);
  extern int __must_check mutex_lock_interruptible(struct mutex *lock);
  extern int __must_check mutex_lock_killable(struct mutex *lock);
  extern void mutex_lock_io(struct mutex *lock);
@@ -162,7 +162,7 @@ extern void mutex_lock_io(struct mutex *lock);
   * Returns 1 if the mutex has been acquired successfully, and 0 on 
contention.
   */
  extern int mutex_trylock(struct mutex *lock);
-extern void mutex_unlock(struct mutex *lock);
+extern __lockfunc void mutex_unlock(struct mutex *lock) __releases(lock);

  extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);

----8<----

I would need to investigate further for any progress in adding that lock 
checking to the mutex API, but it did not look promising from that 
patchset. For now I suppose you can either:
a. remove current annotation.
b. change to a spinlock - I don't think that anything requiring 
scheduling is happening when updating the limits, but would need to 
audit to be sure.

BTW, as for lock checking for inline functions in the header, I think 
that there is no checking there.
Christoph Hellwig April 5, 2024, 5:13 p.m. UTC | #5
On Fri, Apr 05, 2024 at 05:43:24PM +0100, John Garry wrote:
> This actually looks like a kernel issue - that being that the mutex API is 
> not annotated for lock checking.

Oh.  Yeah, that would explain the weird behavior.

> I would need to investigate further for any progress in adding that lock 
> checking to the mutex API, but it did not look promising from that 
> patchset. For now I suppose you can either:
> a. remove current annotation.

I can send a patch for that.

> b. change to a spinlock - I don't think that anything requiring scheduling 
> is happening when updating the limits, but would need to audit to be sure.

With SCSI we'll hold it over the new ->device_configure, which must
be able to sleep.
diff mbox series

Patch

diff --git a/block/blk-settings.c b/block/blk-settings.c
index cdbaef159c4bc3..9ef52b80965dc1 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -239,31 +239,20 @@  int blk_set_default_limits(struct queue_limits *lim)
 	return blk_validate_limits(lim);
 }
 
-/**
- * queue_limits_commit_update - commit an atomic update of queue limits
- * @q:		queue to update
- * @lim:	limits to apply
- *
- * Apply the limits in @lim that were obtained from queue_limits_start_update()
- * and updated by the caller to @q.
- *
- * Returns 0 if successful, else a negative error code.
- */
-int queue_limits_commit_update(struct request_queue *q,
+int __queue_limits_commit_update(struct request_queue *q,
 		struct queue_limits *lim)
-	__releases(q->limits_lock)
 {
-	int error = blk_validate_limits(lim);
-
-	if (!error) {
-		q->limits = *lim;
-		if (q->disk)
-			blk_apply_bdi_limits(q->disk->bdi, lim);
-	}
-	mutex_unlock(&q->limits_lock);
-	return error;
+	int error;
+
+	error = blk_validate_limits(lim);
+	if (error)
+		return error;
+	q->limits = *lim;
+	if (q->disk)
+		blk_apply_bdi_limits(q->disk->bdi, lim);
+	return 0;
 }
-EXPORT_SYMBOL_GPL(queue_limits_commit_update);
+EXPORT_SYMBOL_GPL(__queue_limits_commit_update);
 
 /**
  * queue_limits_set - apply queue limits to queue
@@ -278,8 +267,13 @@  EXPORT_SYMBOL_GPL(queue_limits_commit_update);
  */
 int queue_limits_set(struct request_queue *q, struct queue_limits *lim)
 {
+	int error;
+
 	mutex_lock(&q->limits_lock);
-	return queue_limits_commit_update(q, lim);
+	error = __queue_limits_commit_update(q, lim);
+	mutex_unlock(&q->limits_lock);
+
+	return error;
 }
 EXPORT_SYMBOL_GPL(queue_limits_set);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c3e8f7cf96be9e..99f1d2fcec4a2a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -869,6 +869,15 @@  static inline unsigned int blk_chunk_sectors_left(sector_t offset,
 	return chunk_sectors - (offset & (chunk_sectors - 1));
 }
 
+/*
+ * Note: we want queue_limits_start_update to be inline to avoid passing a huge
+ * strut by value.  This in turn requires the part of queue_limits_commit_update
+ * that unlocks the mutex to be inline as well to not confuse the sparse lock
+ * context tracking.  Never use __queue_limits_commit_update directly.
+ */
+int __queue_limits_commit_update(struct request_queue *q,
+		struct queue_limits *lim);
+
 /**
  * queue_limits_start_update - start an atomic update of queue limits
  * @q:		queue to update
@@ -883,13 +892,30 @@  static inline unsigned int blk_chunk_sectors_left(sector_t offset,
  */
 static inline struct queue_limits
 queue_limits_start_update(struct request_queue *q)
-	__acquires(q->limits_lock)
 {
 	mutex_lock(&q->limits_lock);
 	return q->limits;
 }
-int queue_limits_commit_update(struct request_queue *q,
-		struct queue_limits *lim);
+
+/**
+ * queue_limits_commit_update - commit an atomic update of queue limits
+ * @q:		queue to update
+ * @lim:	limits to apply
+ *
+ * Apply the limits in @lim that were obtained from queue_limits_start_update()
+ * and updated by the caller to @q.
+ *
+ * Returns 0 if successful, else a negative error code.
+ */
+static inline int queue_limits_commit_update(struct request_queue *q,
+		struct queue_limits *lim)
+{
+	int error = __queue_limits_commit_update(q, lim);
+
+	mutex_unlock(&q->limits_lock);
+	return error;
+}
+
 int queue_limits_set(struct request_queue *q, struct queue_limits *lim);
 
 /*