diff mbox

[for-4.16,v3,2/3] block: allow gendisk's request_queue registration to be deferred

Message ID 20180111021256.37490-3-snitzer@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mike Snitzer Jan. 11, 2018, 2:12 a.m. UTC
Since I can remember DM has forced the block layer to allow the
allocation and initialization of the request_queue to be distinct
operations.  Reason for this is block/genhd.c:add_disk() has requires
that the request_queue (and associated bdi) be tied to the gendisk
before add_disk() is called -- because add_disk() also deals with
exposing the request_queue via blk_register_queue().

DM's dynamic creation of arbitrary device types (and associated
request_queue types) requires the DM device's gendisk be available so
that DM table loads can establish a master/slave relationship with
subordinate devices that are referenced by loaded DM tables -- using
bd_link_disk_holder().  But until these DM tables, and their associated
subordinate devices, are known DM cannot know what type of request_queue
it needs -- nor what its queue_limits should be.

This chicken and egg scenario has created all manner of problems for DM
and, at times, the block layer.

Summary of changes:

- Add QUEUE_FLAG_DEFER_REG that a block driver can set to defer the
  required blk_register_queue() until the block driver's request_queue
  is fully initialized.

- Return early from blk_unregister_queue() if QUEUE_FLAG_REGISTERED
  is not set.  It won't be set if a request_queue with
  QUEUE_FLAG_DEFER_REG set never called blk_register_queue() -- as can
  happen if a driver encounters an error and must del_gendisk() before
  calling blk_register_queue().

- Export blk_register_queue().

These changes allow DM to use device_add_disk() to anchor its gendisk as
the "master" for master/slave relationships DM must establish with
subordinate devices referenced in DM tables that get loaded.  Once all
"slave" devices for a DM device are known a request_queue can be
properly initialized and then advertised via sysfs -- important
improvement being that no request_queue resource initialization is
missed -- before these changes DM was known to be missing blk-mq debugfs
and proper block throttle initialization.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 block/blk-sysfs.c      | 4 ++++
 block/genhd.c          | 4 +++-
 include/linux/blkdev.h | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

Comments

Ming Lei Jan. 11, 2018, 2:54 a.m. UTC | #1
On Wed, Jan 10, 2018 at 09:12:55PM -0500, Mike Snitzer wrote:
> Since I can remember DM has forced the block layer to allow the
> allocation and initialization of the request_queue to be distinct
> operations.  Reason for this is block/genhd.c:add_disk() has requires
> that the request_queue (and associated bdi) be tied to the gendisk
> before add_disk() is called -- because add_disk() also deals with
> exposing the request_queue via blk_register_queue().
> 
> DM's dynamic creation of arbitrary device types (and associated
> request_queue types) requires the DM device's gendisk be available so
> that DM table loads can establish a master/slave relationship with
> subordinate devices that are referenced by loaded DM tables -- using
> bd_link_disk_holder().  But until these DM tables, and their associated
> subordinate devices, are known DM cannot know what type of request_queue
> it needs -- nor what its queue_limits should be.
> 
> This chicken and egg scenario has created all manner of problems for DM
> and, at times, the block layer.
> 
> Summary of changes:
> 
> - Add QUEUE_FLAG_DEFER_REG that a block driver can set to defer the
>   required blk_register_queue() until the block driver's request_queue
>   is fully initialized.
> 
> - Return early from blk_unregister_queue() if QUEUE_FLAG_REGISTERED
>   is not set.  It won't be set if a request_queue with
>   QUEUE_FLAG_DEFER_REG set never called blk_register_queue() -- as can
>   happen if a driver encounters an error and must del_gendisk() before
>   calling blk_register_queue().
> 
> - Export blk_register_queue().
> 
> These changes allow DM to use device_add_disk() to anchor its gendisk as
> the "master" for master/slave relationships DM must establish with
> subordinate devices referenced in DM tables that get loaded.  Once all
> "slave" devices for a DM device are known a request_queue can be
> properly initialized and then advertised via sysfs -- important
> improvement being that no request_queue resource initialization is
> missed -- before these changes DM was known to be missing blk-mq debugfs
> and proper block throttle initialization.
> 
> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> ---
>  block/blk-sysfs.c      | 4 ++++
>  block/genhd.c          | 4 +++-
>  include/linux/blkdev.h | 1 +
>  3 files changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 870484eaed1f..2395122875b4 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -921,6 +921,7 @@ int blk_register_queue(struct gendisk *disk)
>  	mutex_unlock(&q->sysfs_lock);
>  	return ret;
>  }
> +EXPORT_SYMBOL_GPL(blk_register_queue);
>  
>  void blk_unregister_queue(struct gendisk *disk)
>  {
> @@ -929,6 +930,9 @@ void blk_unregister_queue(struct gendisk *disk)
>  	if (WARN_ON(!q))
>  		return;
>  
> +	if (!test_bit(QUEUE_FLAG_REGISTERED, &disk->queue->queue_flags))
> +		return;
> +
>  	mutex_lock(&q->sysfs_lock);
>  	queue_flag_clear_unlocked(QUEUE_FLAG_REGISTERED, q);
>  	mutex_unlock(&q->sysfs_lock);
> diff --git a/block/genhd.c b/block/genhd.c
> index 00620e01e043..3912a82ecc4f 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -682,7 +682,6 @@ void device_add_disk(struct device *parent, struct gendisk *disk)
>  				    exact_match, exact_lock, disk);
>  	}
>  	register_disk(parent, disk);
> -	blk_register_queue(disk);
>  
>  	/*
>  	 * Take an extra ref on queue which will be put on disk_release()
> @@ -692,6 +691,9 @@ void device_add_disk(struct device *parent, struct gendisk *disk)
>  
>  	disk_add_events(disk);
>  	blk_integrity_add(disk);
> +
> +	if (!test_bit(QUEUE_FLAG_DEFER_REG, &disk->queue->queue_flags))
> +		blk_register_queue(disk);
>  }
>  EXPORT_SYMBOL(device_add_disk);
>  
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 71a9371c8182..84d144c7b025 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -681,6 +681,7 @@ struct request_queue {
>  #define QUEUE_FLAG_SCSI_PASSTHROUGH 27	/* queue supports SCSI commands */
>  #define QUEUE_FLAG_QUIESCED    28	/* queue has been quiesced */
>  #define QUEUE_FLAG_PREEMPT_ONLY	29	/* only process REQ_PREEMPT requests */
> +#define QUEUE_FLAG_DEFER_REG	30	/* defer registering queue to a disk */
>  
>  #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
>  				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
> -- 
> 2.15.0

This way is safe for all other devices, even for DM it is safe too since
block does deal with NULL .make_request_fn well for legacy path.

Maybe QUEUE_FLAG_DEFER_REG can be renamed as QUEUE_FLAG_REG_BY_DRIVER,
but not a big deal, so

	Reviewed-by: Ming Lei <ming.lei@redhat.com>
Hannes Reinecke Jan. 11, 2018, 7:46 a.m. UTC | #2
On 01/11/2018 03:12 AM, Mike Snitzer wrote:
> Since I can remember DM has forced the block layer to allow the
> allocation and initialization of the request_queue to be distinct
> operations.  Reason for this is block/genhd.c:add_disk() has requires
> that the request_queue (and associated bdi) be tied to the gendisk
> before add_disk() is called -- because add_disk() also deals with
> exposing the request_queue via blk_register_queue().
> 
> DM's dynamic creation of arbitrary device types (and associated
> request_queue types) requires the DM device's gendisk be available so
> that DM table loads can establish a master/slave relationship with
> subordinate devices that are referenced by loaded DM tables -- using
> bd_link_disk_holder().  But until these DM tables, and their associated
> subordinate devices, are known DM cannot know what type of request_queue
> it needs -- nor what its queue_limits should be.
> 
> This chicken and egg scenario has created all manner of problems for DM
> and, at times, the block layer.
> 
> Summary of changes:
> 
> - Add QUEUE_FLAG_DEFER_REG that a block driver can set to defer the
>   required blk_register_queue() until the block driver's request_queue
>   is fully initialized.
> 
> - Return early from blk_unregister_queue() if QUEUE_FLAG_REGISTERED
>   is not set.  It won't be set if a request_queue with
>   QUEUE_FLAG_DEFER_REG set never called blk_register_queue() -- as can
>   happen if a driver encounters an error and must del_gendisk() before
>   calling blk_register_queue().
> 
> - Export blk_register_queue().
> 
> These changes allow DM to use device_add_disk() to anchor its gendisk as
> the "master" for master/slave relationships DM must establish with
> subordinate devices referenced in DM tables that get loaded.  Once all
> "slave" devices for a DM device are known a request_queue can be
> properly initialized and then advertised via sysfs -- important
> improvement being that no request_queue resource initialization is
> missed -- before these changes DM was known to be missing blk-mq debugfs
> and proper block throttle initialization.
> 
> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> ---
>  block/blk-sysfs.c      | 4 ++++
>  block/genhd.c          | 4 +++-
>  include/linux/blkdev.h | 1 +
>  3 files changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 870484eaed1f..2395122875b4 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -921,6 +921,7 @@ int blk_register_queue(struct gendisk *disk)
>  	mutex_unlock(&q->sysfs_lock);
>  	return ret;
>  }
> +EXPORT_SYMBOL_GPL(blk_register_queue);
>  
>  void blk_unregister_queue(struct gendisk *disk)
>  {
> @@ -929,6 +930,9 @@ void blk_unregister_queue(struct gendisk *disk)
>  	if (WARN_ON(!q))
>  		return;
>  
> +	if (!test_bit(QUEUE_FLAG_REGISTERED, &disk->queue->queue_flags))
> +		return;
> +
>  	mutex_lock(&q->sysfs_lock);
>  	queue_flag_clear_unlocked(QUEUE_FLAG_REGISTERED, q);
>  	mutex_unlock(&q->sysfs_lock);
Why can't we use test_and_clear_bit() here?
Shouldn't that relieve the need for the mutex_lock() here, too?

> diff --git a/block/genhd.c b/block/genhd.c
> index 00620e01e043..3912a82ecc4f 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -682,7 +682,6 @@ void device_add_disk(struct device *parent, struct gendisk *disk)
>  				    exact_match, exact_lock, disk);
>  	}
>  	register_disk(parent, disk);
> -	blk_register_queue(disk);
>  
>  	/*
>  	 * Take an extra ref on queue which will be put on disk_release()
> @@ -692,6 +691,9 @@ void device_add_disk(struct device *parent, struct gendisk *disk)
>  
>  	disk_add_events(disk);
>  	blk_integrity_add(disk);
> +
> +	if (!test_bit(QUEUE_FLAG_DEFER_REG, &disk->queue->queue_flags))
> +		blk_register_queue(disk);
>  }
>  EXPORT_SYMBOL(device_add_disk);
>  
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 71a9371c8182..84d144c7b025 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -681,6 +681,7 @@ struct request_queue {
>  #define QUEUE_FLAG_SCSI_PASSTHROUGH 27	/* queue supports SCSI commands */
>  #define QUEUE_FLAG_QUIESCED    28	/* queue has been quiesced */
>  #define QUEUE_FLAG_PREEMPT_ONLY	29	/* only process REQ_PREEMPT requests */
> +#define QUEUE_FLAG_DEFER_REG	30	/* defer registering queue to a disk */
>  
>  #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
>  				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
> 
Where is this flag used?

Cheers,

Hannes
Hannes Reinecke Jan. 11, 2018, 7:56 a.m. UTC | #3
On 01/11/2018 03:12 AM, Mike Snitzer wrote:
> Since I can remember DM has forced the block layer to allow the
> allocation and initialization of the request_queue to be distinct
> operations.  Reason for this is block/genhd.c:add_disk() has requires
> that the request_queue (and associated bdi) be tied to the gendisk
> before add_disk() is called -- because add_disk() also deals with
> exposing the request_queue via blk_register_queue().
> 
> DM's dynamic creation of arbitrary device types (and associated
> request_queue types) requires the DM device's gendisk be available so
> that DM table loads can establish a master/slave relationship with
> subordinate devices that are referenced by loaded DM tables -- using
> bd_link_disk_holder().  But until these DM tables, and their associated
> subordinate devices, are known DM cannot know what type of request_queue
> it needs -- nor what its queue_limits should be.
> 
> This chicken and egg scenario has created all manner of problems for DM
> and, at times, the block layer.
> 
> Summary of changes:
> 
> - Add QUEUE_FLAG_DEFER_REG that a block driver can set to defer the
>   required blk_register_queue() until the block driver's request_queue
>   is fully initialized.
> 
> - Return early from blk_unregister_queue() if QUEUE_FLAG_REGISTERED
>   is not set.  It won't be set if a request_queue with
>   QUEUE_FLAG_DEFER_REG set never called blk_register_queue() -- as can
>   happen if a driver encounters an error and must del_gendisk() before
>   calling blk_register_queue().
> 
> - Export blk_register_queue().
> 
> These changes allow DM to use device_add_disk() to anchor its gendisk as
> the "master" for master/slave relationships DM must establish with
> subordinate devices referenced in DM tables that get loaded.  Once all
> "slave" devices for a DM device are known a request_queue can be
> properly initialized and then advertised via sysfs -- important
> improvement being that no request_queue resource initialization is
> missed -- before these changes DM was known to be missing blk-mq debugfs
> and proper block throttle initialization.
> 
> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> ---
>  block/blk-sysfs.c      | 4 ++++
>  block/genhd.c          | 4 +++-
>  include/linux/blkdev.h | 1 +
>  3 files changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 870484eaed1f..2395122875b4 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -921,6 +921,7 @@ int blk_register_queue(struct gendisk *disk)
>  	mutex_unlock(&q->sysfs_lock);
>  	return ret;
>  }
> +EXPORT_SYMBOL_GPL(blk_register_queue);
>  
>  void blk_unregister_queue(struct gendisk *disk)
>  {
> @@ -929,6 +930,9 @@ void blk_unregister_queue(struct gendisk *disk)
>  	if (WARN_ON(!q))
>  		return;
>  
> +	if (!test_bit(QUEUE_FLAG_REGISTERED, &disk->queue->queue_flags))
> +		return;
> +
>  	mutex_lock(&q->sysfs_lock);
>  	queue_flag_clear_unlocked(QUEUE_FLAG_REGISTERED, q);
>  	mutex_unlock(&q->sysfs_lock);
> diff --git a/block/genhd.c b/block/genhd.c
> index 00620e01e043..3912a82ecc4f 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -682,7 +682,6 @@ void device_add_disk(struct device *parent, struct gendisk *disk)
>  				    exact_match, exact_lock, disk);
>  	}
>  	register_disk(parent, disk);
> -	blk_register_queue(disk);
>  
>  	/*
>  	 * Take an extra ref on queue which will be put on disk_release()
> @@ -692,6 +691,9 @@ void device_add_disk(struct device *parent, struct gendisk *disk)
>  
>  	disk_add_events(disk);
>  	blk_integrity_add(disk);
> +
> +	if (!test_bit(QUEUE_FLAG_DEFER_REG, &disk->queue->queue_flags))
> +		blk_register_queue(disk);
>  }
>  EXPORT_SYMBOL(device_add_disk);
>  
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 71a9371c8182..84d144c7b025 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -681,6 +681,7 @@ struct request_queue {
>  #define QUEUE_FLAG_SCSI_PASSTHROUGH 27	/* queue supports SCSI commands */
>  #define QUEUE_FLAG_QUIESCED    28	/* queue has been quiesced */
>  #define QUEUE_FLAG_PREEMPT_ONLY	29	/* only process REQ_PREEMPT requests */
> +#define QUEUE_FLAG_DEFER_REG	30	/* defer registering queue to a disk */
>  
>  #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
>  				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
> 
Thinking of this a bit more, wouldn't it be better to modify add_disk()
(or, rather, device_add_disk()) to handle this case?
You already moved the call to 'blk_register_queue()' to the end of
device_add_disk(), so it would be trivial to make device_add_disk() a
wrapper function like

void device_add_disk(struct device *parent, struct gendisk *disk) {
  blk_add_disk(parent, disk);
  blk_register_queue(disk);
}

and then call blk_add_disk() from the dm-core.
That would save us the magic 'you have to set this flag before
registering' dance in dm.c...

Cheers,

Hannes
Mike Snitzer Jan. 11, 2018, 5:04 p.m. UTC | #4
On Thu, Jan 11 2018 at  2:46am -0500,
Hannes Reinecke <hare@suse.de> wrote:

> On 01/11/2018 03:12 AM, Mike Snitzer wrote:
> > 
> > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> > index 870484eaed1f..2395122875b4 100644
> > --- a/block/blk-sysfs.c
> > +++ b/block/blk-sysfs.c
> > @@ -929,6 +930,9 @@ void blk_unregister_queue(struct gendisk *disk)
> >  	if (WARN_ON(!q))
> >  		return;
> >  
> > +	if (!test_bit(QUEUE_FLAG_REGISTERED, &disk->queue->queue_flags))
> > +		return;
> > +
> >  	mutex_lock(&q->sysfs_lock);
> >  	queue_flag_clear_unlocked(QUEUE_FLAG_REGISTERED, q);
> >  	mutex_unlock(&q->sysfs_lock);
> Why can't we use test_and_clear_bit() here?
> Shouldn't that relieve the need for the mutex_lock() here, too?

FYI, I looked at this and then got concerned with it enough to chat with
Mikulas (cc'd) about it, here is what he said:

11:54 <mikulas> if (!test_and_clear_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags))
11:55 <mikulas> this is buggy - the operation test_and_clear_bit is atomic - but if it races with some non-atomic read-modify-write operation on q->queue_flags, then the atomic modification would be lost
11:56 <mikulas> you must use always atomic accesses on q->queue_flags or always non-atomic accesses inside a mutex
11:56 <mikulas> queue_flag_clear_unlocked uses non-atomic __clear_bit
11:57 <mikulas> other functions in include/linux/blkdev.h also use non-atomic operations - so you cannot mix them with atomic operations

So my v4, that I'll send out shortly, won't be using test_and_clear_bit()

Thanks,
Mike
Bart Van Assche Jan. 11, 2018, 5:18 p.m. UTC | #5
On Thu, 2018-01-11 at 12:04 -0500, Mike Snitzer wrote:
> So my v4, that I'll send out shortly, won't be using test_and_clear_bit()


Please use queue_flag_set(), queue_flag_clear(), queue_flag_test_and_clear() and/or
queue_flag_test_and_set() to manipulate queue flags.

Thanks,

Bart.
Mike Snitzer Jan. 11, 2018, 5:29 p.m. UTC | #6
On Thu, Jan 11 2018 at 12:18pm -0500,
Bart Van Assche <Bart.VanAssche@wdc.com> wrote:

> On Thu, 2018-01-11 at 12:04 -0500, Mike Snitzer wrote:
> > So my v4, that I'll send out shortly, won't be using test_and_clear_bit()
> 
> Please use queue_flag_set(), queue_flag_clear(), queue_flag_test_and_clear() and/or
> queue_flag_test_and_set() to manipulate queue flags.

Can you please expand on this?  My patch is only using test_bit().

So your comment isn't relevant.

Mike
Bart Van Assche Jan. 11, 2018, 5:47 p.m. UTC | #7
On Thu, 2018-01-11 at 12:29 -0500, Mike Snitzer wrote:
> On Thu, Jan 11 2018 at 12:18pm -0500,

> Bart Van Assche <Bart.VanAssche@wdc.com> wrote:

> 

> > On Thu, 2018-01-11 at 12:04 -0500, Mike Snitzer wrote:

> > > So my v4, that I'll send out shortly, won't be using test_and_clear_bit()

> > 

> > Please use queue_flag_set(), queue_flag_clear(), queue_flag_test_and_clear() and/or

> > queue_flag_test_and_set() to manipulate queue flags.

> 

> Can you please expand on this?  My patch is only using test_bit().


Hello Mike,

I was referring to the following code, which apparenly is existing code:

        mutex_lock(&q->sysfs_lock);
        queue_flag_clear_unlocked(QUEUE_FLAG_REGISTERED, q);
        mutex_unlock(&q->sysfs_lock);

The above code is wrong. Other code that changes the queue flags protects
these changes with the the queue lock. The above code should be changed into
the following:

	spin_lock_irq(q->queue_lock);
	queue_flag_clear(QUEUE_FLAG_REGISTERED, q);
	spin_unlock_irq(q->queue_lock);

The only functions from which it is safe to call queue_flag_(set|clear)_unlocked()
without holding the queue lock are blk_alloc_queue_node() and
__blk_release_queue() because for these functions it is guaranteed that no queue
flag changes can happen from another context, e.g. through a blk_set_queue_dying()
call.

Bart.
Mike Snitzer Jan. 11, 2018, 7:20 p.m. UTC | #8
On Thu, Jan 11 2018 at 12:47pm -0500,
Bart Van Assche <Bart.VanAssche@wdc.com> wrote:

> On Thu, 2018-01-11 at 12:29 -0500, Mike Snitzer wrote:
> > On Thu, Jan 11 2018 at 12:18pm -0500,
> > Bart Van Assche <Bart.VanAssche@wdc.com> wrote:
> > 
> > > On Thu, 2018-01-11 at 12:04 -0500, Mike Snitzer wrote:
> > > > So my v4, that I'll send out shortly, won't be using test_and_clear_bit()
> > > 
> > > Please use queue_flag_set(), queue_flag_clear(), queue_flag_test_and_clear() and/or
> > > queue_flag_test_and_set() to manipulate queue flags.
> > 
> > Can you please expand on this?  My patch is only using test_bit().
> 
> Hello Mike,
> 
> I was referring to the following code, which apparenly is existing code:
> 
>         mutex_lock(&q->sysfs_lock);
>         queue_flag_clear_unlocked(QUEUE_FLAG_REGISTERED, q);
>         mutex_unlock(&q->sysfs_lock);
> 
> The above code is wrong. Other code that changes the queue flags protects
> these changes with the the queue lock. The above code should be changed into
> the following:
> 
> 	spin_lock_irq(q->queue_lock);
> 	queue_flag_clear(QUEUE_FLAG_REGISTERED, q);
> 	spin_unlock_irq(q->queue_lock);
> 
> The only functions from which it is safe to call queue_flag_(set|clear)_unlocked()
> without holding the queue lock are blk_alloc_queue_node() and
> __blk_release_queue() because for these functions it is guaranteed that no queue
> flag changes can happen from another context, e.g. through a blk_set_queue_dying()
> call.

Yes, I noticed the use of sysfs_lock also.  I've fixed it up earlier in
my v4 patchset and build on it.  I'll add your Reported-by too.

Mike
Bart Van Assche Jan. 11, 2018, 7:32 p.m. UTC | #9
On Thu, 2018-01-11 at 14:20 -0500, Mike Snitzer wrote:
> Yes, I noticed the use of sysfs_lock also.  I've fixed it up earlier in

> my v4 patchset and build on it.  I'll add your Reported-by too.


Hello Mike,

There are many more inconsistencies with regard to queue flag manipulation
in the block layer core and in block drivers. I'm working on a series to
address all inconsistencies in the block layer and in the sd driver.

Bart.
Mike Snitzer Jan. 11, 2018, 7:50 p.m. UTC | #10
On Thu, Jan 11 2018 at  2:32pm -0500,
Bart Van Assche <Bart.VanAssche@wdc.com> wrote:

> On Thu, 2018-01-11 at 14:20 -0500, Mike Snitzer wrote:
> > Yes, I noticed the use of sysfs_lock also.  I've fixed it up earlier in
> > my v4 patchset and build on it.  I'll add your Reported-by too.
> 
> Hello Mike,
> 
> There are many more inconsistencies with regard to queue flag manipulation
> in the block layer core and in block drivers. I'm working on a series to
> address all inconsistencies in the block layer and in the sd driver.

OK, well I needed to fix this anyway to build my changes up properly.
I think my v4 changes are ready for Jens to pick up, so you'll just have
to rebase.

I'll send out v4 shortly.

Thanks,
Mike
diff mbox

Patch

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 870484eaed1f..2395122875b4 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -921,6 +921,7 @@  int blk_register_queue(struct gendisk *disk)
 	mutex_unlock(&q->sysfs_lock);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(blk_register_queue);
 
 void blk_unregister_queue(struct gendisk *disk)
 {
@@ -929,6 +930,9 @@  void blk_unregister_queue(struct gendisk *disk)
 	if (WARN_ON(!q))
 		return;
 
+	if (!test_bit(QUEUE_FLAG_REGISTERED, &disk->queue->queue_flags))
+		return;
+
 	mutex_lock(&q->sysfs_lock);
 	queue_flag_clear_unlocked(QUEUE_FLAG_REGISTERED, q);
 	mutex_unlock(&q->sysfs_lock);
diff --git a/block/genhd.c b/block/genhd.c
index 00620e01e043..3912a82ecc4f 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -682,7 +682,6 @@  void device_add_disk(struct device *parent, struct gendisk *disk)
 				    exact_match, exact_lock, disk);
 	}
 	register_disk(parent, disk);
-	blk_register_queue(disk);
 
 	/*
 	 * Take an extra ref on queue which will be put on disk_release()
@@ -692,6 +691,9 @@  void device_add_disk(struct device *parent, struct gendisk *disk)
 
 	disk_add_events(disk);
 	blk_integrity_add(disk);
+
+	if (!test_bit(QUEUE_FLAG_DEFER_REG, &disk->queue->queue_flags))
+		blk_register_queue(disk);
 }
 EXPORT_SYMBOL(device_add_disk);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 71a9371c8182..84d144c7b025 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -681,6 +681,7 @@  struct request_queue {
 #define QUEUE_FLAG_SCSI_PASSTHROUGH 27	/* queue supports SCSI commands */
 #define QUEUE_FLAG_QUIESCED    28	/* queue has been quiesced */
 #define QUEUE_FLAG_PREEMPT_ONLY	29	/* only process REQ_PREEMPT requests */
+#define QUEUE_FLAG_DEFER_REG	30	/* defer registering queue to a disk */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_SAME_COMP)	|	\