Message ID | 20240215070300.2200308-18-hch@lst.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [01/17] ubd: pass queue_limits to blk_mq_alloc_disk | expand |
On Thu, 15 Feb 2024 at 08:04, Christoph Hellwig <hch@lst.de> wrote: > > Pass the queue limit set at initialization time directly to > blk_mq_alloc_disk instead of updating it right after the allocation. > > This requires refactoring the code a bit so that what was mmc_setup_queue > before also allocates the gendisk now and actually sets all limits. > > Signed-off-by: Christoph Hellwig <hch@lst.de> Looks like $subject patch, patch11 and patch12 have already been queued up as they are cooking linux-next. Normally I prefer to funnel these via my mmc tree, to avoid potential conflicts (mostly for mmc, where more active developments are ongoing). Let's leave this as is for the moment, but if we encounter non-trivial conflicts, I assume you can drop the patches from your tree? Kind regards Uffe > --- > drivers/mmc/core/queue.c | 97 +++++++++++++++++++++------------------- > 1 file changed, 52 insertions(+), 45 deletions(-) > > diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c > index 67ad186d132a69..2ae60d208cdf1e 100644 > --- a/drivers/mmc/core/queue.c > +++ b/drivers/mmc/core/queue.c > @@ -174,8 +174,8 @@ static struct scatterlist *mmc_alloc_sg(unsigned short sg_len, gfp_t gfp) > return sg; > } > > -static void mmc_queue_setup_discard(struct request_queue *q, > - struct mmc_card *card) > +static void mmc_queue_setup_discard(struct mmc_card *card, > + struct queue_limits *lim) > { > unsigned max_discard; > > @@ -183,15 +183,17 @@ static void mmc_queue_setup_discard(struct request_queue *q, > if (!max_discard) > return; > > - blk_queue_max_discard_sectors(q, max_discard); > - q->limits.discard_granularity = card->pref_erase << 9; > - /* granularity must not be greater than max. discard */ > - if (card->pref_erase > max_discard) > - q->limits.discard_granularity = SECTOR_SIZE; > + lim->max_hw_discard_sectors = max_discard; > if (mmc_can_secure_erase_trim(card)) > - blk_queue_max_secure_erase_sectors(q, max_discard); > + lim->max_secure_erase_sectors = max_discard; > if (mmc_can_trim(card) && card->erased_byte == 0) > - blk_queue_max_write_zeroes_sectors(q, max_discard); > + lim->max_write_zeroes_sectors = max_discard; > + > + /* granularity must not be greater than max. discard */ > + if (card->pref_erase > max_discard) > + lim->discard_granularity = SECTOR_SIZE; > + else > + lim->discard_granularity = card->pref_erase << 9; > } > > static unsigned short mmc_get_max_segments(struct mmc_host *host) > @@ -341,40 +343,53 @@ static const struct blk_mq_ops mmc_mq_ops = { > .timeout = mmc_mq_timed_out, > }; > > -static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) > +static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq, > + struct mmc_card *card) > { > struct mmc_host *host = card->host; > - unsigned block_size = 512; > + struct queue_limits lim = { }; > + struct gendisk *disk; > > - blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); > - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); > if (mmc_can_erase(card)) > - mmc_queue_setup_discard(mq->queue, card); > + mmc_queue_setup_discard(card, &lim); > > if (!mmc_dev(host)->dma_mask || !*mmc_dev(host)->dma_mask) > - blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH); > - blk_queue_max_hw_sectors(mq->queue, > - min(host->max_blk_count, host->max_req_size / 512)); > - if (host->can_dma_map_merge) > - WARN(!blk_queue_can_use_dma_map_merging(mq->queue, > - mmc_dev(host)), > - "merging was advertised but not possible"); > - blk_queue_max_segments(mq->queue, mmc_get_max_segments(host)); > - > - if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) { > - block_size = card->ext_csd.data_sector_size; > - WARN_ON(block_size != 512 && block_size != 4096); > - } > + lim.bounce = BLK_BOUNCE_HIGH; > + > + lim.max_hw_sectors = min(host->max_blk_count, host->max_req_size / 512); > + > + if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) > + lim.logical_block_size = card->ext_csd.data_sector_size; > + else > + lim.logical_block_size = 512; > + > + WARN_ON_ONCE(lim.logical_block_size != 512 && > + lim.logical_block_size != 4096); > > - blk_queue_logical_block_size(mq->queue, block_size); > /* > - * After blk_queue_can_use_dma_map_merging() was called with succeed, > - * since it calls blk_queue_virt_boundary(), the mmc should not call > - * both blk_queue_max_segment_size(). > + * Setting a virt_boundary implicity sets a max_segment_size, so try > + * to set the hardware one here. > */ > - if (!host->can_dma_map_merge) > - blk_queue_max_segment_size(mq->queue, > - round_down(host->max_seg_size, block_size)); > + if (host->can_dma_map_merge) { > + lim.virt_boundary_mask = dma_get_merge_boundary(mmc_dev(host)); > + lim.max_segments = MMC_DMA_MAP_MERGE_SEGMENTS; > + } else { > + lim.max_segment_size = > + round_down(host->max_seg_size, lim.logical_block_size); > + lim.max_segments = host->max_segs; > + } > + > + disk = blk_mq_alloc_disk(&mq->tag_set, &lim, mq); > + if (IS_ERR(disk)) > + return disk; > + mq->queue = disk->queue; > + > + if (mmc_host_is_spi(host) && host->use_spi_crc) > + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); > + blk_queue_rq_timeout(mq->queue, 60 * HZ); > + > + blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); > + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); > > dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue)); > > @@ -386,6 +401,7 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) > init_waitqueue_head(&mq->wait); > > mmc_crypto_setup_queue(mq->queue, host); > + return disk; > } > > static inline bool mmc_merge_capable(struct mmc_host *host) > @@ -447,18 +463,9 @@ struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) > return ERR_PTR(ret); > > > - disk = blk_mq_alloc_disk(&mq->tag_set, NULL, mq); > - if (IS_ERR(disk)) { > + disk = mmc_alloc_disk(mq, card); > + if (IS_ERR(disk)) > blk_mq_free_tag_set(&mq->tag_set); > - return disk; > - } > - mq->queue = disk->queue; > - > - if (mmc_host_is_spi(host) && host->use_spi_crc) > - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); > - blk_queue_rq_timeout(mq->queue, 60 * HZ); > - > - mmc_setup_queue(mq, card); > return disk; > } > > -- > 2.39.2 >
On Thu, Feb 15, 2024 at 05:40:37PM +0100, Ulf Hansson wrote: > Looks like $subject patch, patch11 and patch12 have already been > queued up as they are cooking linux-next. Normally I prefer to funnel > these via my mmc tree, to avoid potential conflicts (mostly for mmc, > where more active developments are ongoing). None of this is in my fresh linux-next pull, which would be rather surprising anyway as I've just sent them out and Jens isn't that quick to merge unreviewed series :) That being said it depends on prep patches in the block tree and thus I'd prefer merging this entire series through that tree.
On Thu, 15 Feb 2024 at 17:49, Christoph Hellwig <hch@lst.de> wrote: > > On Thu, Feb 15, 2024 at 05:40:37PM +0100, Ulf Hansson wrote: > > Looks like $subject patch, patch11 and patch12 have already been > > queued up as they are cooking linux-next. Normally I prefer to funnel > > these via my mmc tree, to avoid potential conflicts (mostly for mmc, > > where more active developments are ongoing). > > None of this is in my fresh linux-next pull, which would be rather > surprising anyway as I've just sent them out and Jens isn't that > quick to merge unreviewed series :) Weird. :-) > > That being said it depends on prep patches in the block tree and thus > I'd prefer merging this entire series through that tree. Okay, in that case, np! Next time, it would be nice to get that information upfront in a cover-letter or similar. If not too late, feel free to add my acks on the three patches for mmc and memstick. Kind regards Uffe
Hi Christoph, On Thu, Feb 15, 2024 at 9:16 AM Christoph Hellwig <hch@lst.de> wrote: > Pass the queue limit set at initialization time directly to > blk_mq_alloc_disk instead of updating it right after the allocation. > > This requires refactoring the code a bit so that what was mmc_setup_queue > before also allocates the gendisk now and actually sets all limits. > > Signed-off-by: Christoph Hellwig <hch@lst.de> Thanks for your patch, which is now commit 616f876617927732 ("mmc: pass queue_limits to blk_mq_alloc_disk") in block/for-next. I have bisected the following failure on White-Hawk (also seen on other R-Car Gen3/4 systems) to this commit: renesas_sdhi_internal_dmac ee140000.mmc: mmc0 base at 0x00000000ee140000, max clock rate 200 MHz mmc0: new HS400 MMC card at address 0001 ------------[ cut here ]------------ WARNING: CPU: 1 PID: 20 at block/blk-settings.c:202 blk_validate_limits+0x12c/0x1e0 Modules linked in: CPU: 1 PID: 20 Comm: kworker/1:0 Not tainted 6.8.0-rc3-white-hawk-00084-g616f87661792 #223 Hardware name: Renesas White Hawk CPU and Breakout boards based on r8a779g0 (DT) Workqueue: events_freezable mmc_rescan pstate: 00000005 (nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : blk_validate_limits+0x12c/0x1e0 lr : blk_set_default_limits+0x14/0x1c sp : ffffffc0829336f0 x29: ffffffc0829336f0 x28: 0000000000000000 x27: 0000000000000000 Micrel KSZ9031 Gigabit PHY e6800000.ethernet-ffffffff:00: attached PHY driver (mii_bus:phy_addr=e6800000.ethernet-ffffffff:00, irq=30) x26: ffffff8443afd808 x25: ffffffc0825bd000 x24: ffffffc082933878 x23: 00000000ffffffff x22: ffffffc08147b258 x21: ffffff8443ab0c10 x20: 000000000000001a x19: ffffff8443b00000 x18: 0000000043789380 x17: ffffffc0806b2ea8 x16: ffffffc0803ab8b4 x15: ffffffc0803ab444 x14: ffffffc08039c26c x13: ffffffc080506854 x12: ffffffc080506700 x11: ffffffc08050669c x10: ffffffc080506478 x9 : ffffffc0803ad1a0 x8 : ffffff8443019120 x7 : ffffffc0803ad1a0 x6 : 0000000000000000 x5 : 0000000000000000 x4 : 0000000000000a00 x3 : 0000000000000fff x2 : 0000000000000200 x1 : 0000000000010000 x0 : ffffffc082933878 Call trace: blk_validate_limits+0x12c/0x1e0 blk_alloc_queue+0x7c/0x244 blk_mq_alloc_queue+0x4c/0xac __blk_mq_alloc_disk+0x1c/0xc0 mmc_alloc_disk+0x134/0x2b4 mmc_init_queue+0x114/0x12c mmc_blk_alloc_req+0xf0/0x34c mmc_blk_probe+0x230/0x5b8 mmc_bus_probe+0x18/0x20 really_probe+0x138/0x270 __driver_probe_device+0xec/0x104 driver_probe_device+0x4c/0xf8 __device_attach_driver+0xa8/0xc8 bus_for_each_drv+0xa4/0xc8 __device_attach+0xe4/0x144 device_initial_probe+0x10/0x18 bus_probe_device+0x38/0xa0 device_add+0x520/0x654 mmc_add_card+0x12c/0x28c mmc_attach_mmc+0xb8/0x154 mmc_rescan+0x208/0x250 process_scheduled_works+0x2b8/0x41c worker_thread+0x1cc/0x24c kthread+0xd8/0xe8 ret_from_fork+0x10/0x20 irq event stamp: 434 hardirqs last enabled at (433): [<ffffffc0808e0ac0>] _raw_spin_unlock_irq+0x2c/0x40 hardirqs last disabled at (434): [<ffffffc0808dae28>] __schedule+0x1cc/0x84c softirqs last enabled at (192): [<ffffffc080010300>] __do_softirq+0x1ac/0x360 softirqs last disabled at (185): [<ffffffc08001550c>] ____do_softirq+0xc/0x14 ---[ end trace 0000000000000000 ]--- mmcblk: probe of mmc0:0001 failed with error -22 Reverting this commit fixes the issue. > --- a/drivers/mmc/core/queue.c > +++ b/drivers/mmc/core/queue.c > @@ -174,8 +174,8 @@ static struct scatterlist *mmc_alloc_sg(unsigned short sg_len, gfp_t gfp) > return sg; > } > > -static void mmc_queue_setup_discard(struct request_queue *q, > - struct mmc_card *card) > +static void mmc_queue_setup_discard(struct mmc_card *card, > + struct queue_limits *lim) > { > unsigned max_discard; > > @@ -183,15 +183,17 @@ static void mmc_queue_setup_discard(struct request_queue *q, > if (!max_discard) > return; > > - blk_queue_max_discard_sectors(q, max_discard); > - q->limits.discard_granularity = card->pref_erase << 9; > - /* granularity must not be greater than max. discard */ > - if (card->pref_erase > max_discard) > - q->limits.discard_granularity = SECTOR_SIZE; > + lim->max_hw_discard_sectors = max_discard; > if (mmc_can_secure_erase_trim(card)) > - blk_queue_max_secure_erase_sectors(q, max_discard); > + lim->max_secure_erase_sectors = max_discard; > if (mmc_can_trim(card) && card->erased_byte == 0) > - blk_queue_max_write_zeroes_sectors(q, max_discard); > + lim->max_write_zeroes_sectors = max_discard; > + > + /* granularity must not be greater than max. discard */ > + if (card->pref_erase > max_discard) > + lim->discard_granularity = SECTOR_SIZE; > + else > + lim->discard_granularity = card->pref_erase << 9; > } > > static unsigned short mmc_get_max_segments(struct mmc_host *host) > @@ -341,40 +343,53 @@ static const struct blk_mq_ops mmc_mq_ops = { > .timeout = mmc_mq_timed_out, > }; > > -static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) > +static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq, > + struct mmc_card *card) > { > struct mmc_host *host = card->host; > - unsigned block_size = 512; > + struct queue_limits lim = { }; > + struct gendisk *disk; > > - blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); > - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); > if (mmc_can_erase(card)) > - mmc_queue_setup_discard(mq->queue, card); > + mmc_queue_setup_discard(card, &lim); > > if (!mmc_dev(host)->dma_mask || !*mmc_dev(host)->dma_mask) > - blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH); > - blk_queue_max_hw_sectors(mq->queue, > - min(host->max_blk_count, host->max_req_size / 512)); > - if (host->can_dma_map_merge) > - WARN(!blk_queue_can_use_dma_map_merging(mq->queue, > - mmc_dev(host)), > - "merging was advertised but not possible"); > - blk_queue_max_segments(mq->queue, mmc_get_max_segments(host)); > - > - if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) { > - block_size = card->ext_csd.data_sector_size; > - WARN_ON(block_size != 512 && block_size != 4096); > - } > + lim.bounce = BLK_BOUNCE_HIGH; > + > + lim.max_hw_sectors = min(host->max_blk_count, host->max_req_size / 512); > + > + if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) > + lim.logical_block_size = card->ext_csd.data_sector_size; > + else > + lim.logical_block_size = 512; > + > + WARN_ON_ONCE(lim.logical_block_size != 512 && > + lim.logical_block_size != 4096); > > - blk_queue_logical_block_size(mq->queue, block_size); > /* > - * After blk_queue_can_use_dma_map_merging() was called with succeed, > - * since it calls blk_queue_virt_boundary(), the mmc should not call > - * both blk_queue_max_segment_size(). > + * Setting a virt_boundary implicity sets a max_segment_size, so try > + * to set the hardware one here. > */ > - if (!host->can_dma_map_merge) > - blk_queue_max_segment_size(mq->queue, > - round_down(host->max_seg_size, block_size)); > + if (host->can_dma_map_merge) { > + lim.virt_boundary_mask = dma_get_merge_boundary(mmc_dev(host)); > + lim.max_segments = MMC_DMA_MAP_MERGE_SEGMENTS; > + } else { > + lim.max_segment_size = > + round_down(host->max_seg_size, lim.logical_block_size); > + lim.max_segments = host->max_segs; > + } > + > + disk = blk_mq_alloc_disk(&mq->tag_set, &lim, mq); > + if (IS_ERR(disk)) > + return disk; > + mq->queue = disk->queue; > + > + if (mmc_host_is_spi(host) && host->use_spi_crc) > + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); > + blk_queue_rq_timeout(mq->queue, 60 * HZ); > + > + blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); > + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); > > dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue)); > > @@ -386,6 +401,7 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) > init_waitqueue_head(&mq->wait); > > mmc_crypto_setup_queue(mq->queue, host); > + return disk; > } > > static inline bool mmc_merge_capable(struct mmc_host *host) > @@ -447,18 +463,9 @@ struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) > return ERR_PTR(ret); > > > - disk = blk_mq_alloc_disk(&mq->tag_set, NULL, mq); > - if (IS_ERR(disk)) { > + disk = mmc_alloc_disk(mq, card); > + if (IS_ERR(disk)) > blk_mq_free_tag_set(&mq->tag_set); > - return disk; > - } > - mq->queue = disk->queue; > - > - if (mmc_host_is_spi(host) && host->use_spi_crc) > - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); > - blk_queue_rq_timeout(mq->queue, 60 * HZ); > - > - mmc_setup_queue(mq, card); > return disk; > } > > -- > 2.39.2 > > > ______________________________________________________ > Linux MTD discussion mailing list > http://lists.infradead.org/mailman/listinfo/linux-mtd/
On Tue, Feb 20, 2024 at 11:01 PM Geert Uytterhoeven <geert@linux-m68k.org> wrote: > On Thu, Feb 15, 2024 at 9:16 AM Christoph Hellwig <hch@lst.de> wrote: > > Pass the queue limit set at initialization time directly to > > blk_mq_alloc_disk instead of updating it right after the allocation. > > > > This requires refactoring the code a bit so that what was mmc_setup_queue > > before also allocates the gendisk now and actually sets all limits. > > > > Signed-off-by: Christoph Hellwig <hch@lst.de> > > Thanks for your patch, which is now commit 616f876617927732 ("mmc: pass > queue_limits to blk_mq_alloc_disk") in block/for-next. > > I have bisected the following failure on White-Hawk (also seen on > other R-Car Gen3/4 systems) to this commit: > > renesas_sdhi_internal_dmac ee140000.mmc: mmc0 base at > 0x00000000ee140000, max clock rate 200 MHz > mmc0: new HS400 MMC card at address 0001 > ------------[ cut here ]------------ > WARNING: CPU: 1 PID: 20 at block/blk-settings.c:202 > blk_validate_limits+0x12c/0x1e0 Actual capacity should be: mmc0: new HS400 MMC card at address 0001 mmcblk0: mmc0:0001 G1M15L 29.6 GiB mmcblk0boot0: mmc0:0001 G1M15L 31.5 MiB mmcblk0boot1: mmc0:0001 G1M15L 31.5 MiB mmcblk0rpmb: mmc0:0001 G1M15L 4.00 MiB, chardev (245:0) Gr{oetje,eeting}s, Geert
On Tue, Feb 20, 2024 at 11:01:05PM +0100, Geert Uytterhoeven wrote: > Hi Christoph, > > On Thu, Feb 15, 2024 at 9:16 AM Christoph Hellwig <hch@lst.de> wrote: > > Pass the queue limit set at initialization time directly to > > blk_mq_alloc_disk instead of updating it right after the allocation. > > > > This requires refactoring the code a bit so that what was mmc_setup_queue > > before also allocates the gendisk now and actually sets all limits. > > > > Signed-off-by: Christoph Hellwig <hch@lst.de> > > Thanks for your patch, which is now commit 616f876617927732 ("mmc: pass > queue_limits to blk_mq_alloc_disk") in block/for-next. > > I have bisected the following failure on White-Hawk (also seen on > other R-Car Gen3/4 systems) to this commit: > > renesas_sdhi_internal_dmac ee140000.mmc: mmc0 base at > 0x00000000ee140000, max clock rate 200 MHz > mmc0: new HS400 MMC card at address 0001 > ------------[ cut here ]------------ > WARNING: CPU: 1 PID: 20 at block/blk-settings.c:202 > blk_validate_limits+0x12c/0x1e0 This is: if (lim->virt_boundary_mask) { if (WARN_ON_ONCE(lim->max_segment_size && lim->max_segment_size != UINT_MAX)) return -EINVAL; so we end up here with both a virt_boundary_mask and a max_segment_size set, which is rather bogus. I think the problem is the order of check in the core blk_validate_limits that artificially causes this. Can you try this patch? diff --git a/block/blk-settings.c b/block/blk-settings.c index c4406aacc0efc6..2120b6f9fef8ea 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -182,16 +182,6 @@ static int blk_validate_limits(struct queue_limits *lim) if (WARN_ON_ONCE(lim->seg_boundary_mask < PAGE_SIZE - 1)) return -EINVAL; - /* - * The maximum segment size has an odd historic 64k default that - * drivers probably should override. Just like the I/O size we - * require drivers to at least handle a full page per segment. - */ - if (!lim->max_segment_size) - lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; - if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE)) - return -EINVAL; - /* * Devices that require a virtual boundary do not support scatter/gather * I/O natively, but instead require a descriptor list entry for each @@ -203,6 +193,16 @@ static int blk_validate_limits(struct queue_limits *lim) lim->max_segment_size != UINT_MAX)) return -EINVAL; lim->max_segment_size = UINT_MAX; + } else { + /* + * The maximum segment size has an odd historic 64k default that + * drivers probably should override. Just like the I/O size we + * require drivers to at least handle a full page per segment. + */ + if (!lim->max_segment_size) + lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; + if (WARN_ON_ONCE(lim->max_segment_size < PAGE_SIZE)) + return -EINVAL; } /*
Hi Christoph, On Wed, Feb 21, 2024 at 6:44 AM Christoph Hellwig <hch@lst.de> wrote: > On Tue, Feb 20, 2024 at 11:01:05PM +0100, Geert Uytterhoeven wrote: > > On Thu, Feb 15, 2024 at 9:16 AM Christoph Hellwig <hch@lst.de> wrote: > > > Pass the queue limit set at initialization time directly to > > > blk_mq_alloc_disk instead of updating it right after the allocation. > > > > > > This requires refactoring the code a bit so that what was mmc_setup_queue > > > before also allocates the gendisk now and actually sets all limits. > > > > > > Signed-off-by: Christoph Hellwig <hch@lst.de> > > > > Thanks for your patch, which is now commit 616f876617927732 ("mmc: pass > > queue_limits to blk_mq_alloc_disk") in block/for-next. > > > > I have bisected the following failure on White-Hawk (also seen on > > other R-Car Gen3/4 systems) to this commit: > > > > renesas_sdhi_internal_dmac ee140000.mmc: mmc0 base at > > 0x00000000ee140000, max clock rate 200 MHz > > mmc0: new HS400 MMC card at address 0001 > > ------------[ cut here ]------------ > > WARNING: CPU: 1 PID: 20 at block/blk-settings.c:202 > > blk_validate_limits+0x12c/0x1e0 > > This is: > > if (lim->virt_boundary_mask) { > if (WARN_ON_ONCE(lim->max_segment_size && > lim->max_segment_size != UINT_MAX)) > return -EINVAL; > > so we end up here with both a virt_boundary_mask and a > max_segment_size set, which is rather bogus. I think the > problem is the order of check in the core blk_validate_limits > that artificially causes this. Can you try this patch? Thanks, good thinking, as that fixed the issue for me! Tested-by: Geert Uytterhoeven <geert+renesas@glider.be> Gr{oetje,eeting}s, Geert
Hi Christoph, On 15/02/2024 07:03, Christoph Hellwig wrote: > Pass the queue limit set at initialization time directly to > blk_mq_alloc_disk instead of updating it right after the allocation. > > This requires refactoring the code a bit so that what was mmc_setup_queue > before also allocates the gendisk now and actually sets all limits. > > Signed-off-by: Christoph Hellwig <hch@lst.de> > --- > drivers/mmc/core/queue.c | 97 +++++++++++++++++++++------------------- > 1 file changed, 52 insertions(+), 45 deletions(-) > > diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c > index 67ad186d132a69..2ae60d208cdf1e 100644 > --- a/drivers/mmc/core/queue.c > +++ b/drivers/mmc/core/queue.c > @@ -174,8 +174,8 @@ static struct scatterlist *mmc_alloc_sg(unsigned short sg_len, gfp_t gfp) > return sg; > } > > -static void mmc_queue_setup_discard(struct request_queue *q, > - struct mmc_card *card) > +static void mmc_queue_setup_discard(struct mmc_card *card, > + struct queue_limits *lim) > { > unsigned max_discard; > > @@ -183,15 +183,17 @@ static void mmc_queue_setup_discard(struct request_queue *q, > if (!max_discard) > return; > > - blk_queue_max_discard_sectors(q, max_discard); > - q->limits.discard_granularity = card->pref_erase << 9; > - /* granularity must not be greater than max. discard */ > - if (card->pref_erase > max_discard) > - q->limits.discard_granularity = SECTOR_SIZE; > + lim->max_hw_discard_sectors = max_discard; > if (mmc_can_secure_erase_trim(card)) > - blk_queue_max_secure_erase_sectors(q, max_discard); > + lim->max_secure_erase_sectors = max_discard; > if (mmc_can_trim(card) && card->erased_byte == 0) > - blk_queue_max_write_zeroes_sectors(q, max_discard); > + lim->max_write_zeroes_sectors = max_discard; > + > + /* granularity must not be greater than max. discard */ > + if (card->pref_erase > max_discard) > + lim->discard_granularity = SECTOR_SIZE; > + else > + lim->discard_granularity = card->pref_erase << 9; > } > > static unsigned short mmc_get_max_segments(struct mmc_host *host) > @@ -341,40 +343,53 @@ static const struct blk_mq_ops mmc_mq_ops = { > .timeout = mmc_mq_timed_out, > }; > > -static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) > +static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq, > + struct mmc_card *card) > { > struct mmc_host *host = card->host; > - unsigned block_size = 512; > + struct queue_limits lim = { }; > + struct gendisk *disk; > > - blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); > - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); > if (mmc_can_erase(card)) > - mmc_queue_setup_discard(mq->queue, card); > + mmc_queue_setup_discard(card, &lim); > > if (!mmc_dev(host)->dma_mask || !*mmc_dev(host)->dma_mask) > - blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH); > - blk_queue_max_hw_sectors(mq->queue, > - min(host->max_blk_count, host->max_req_size / 512)); > - if (host->can_dma_map_merge) > - WARN(!blk_queue_can_use_dma_map_merging(mq->queue, > - mmc_dev(host)), > - "merging was advertised but not possible"); > - blk_queue_max_segments(mq->queue, mmc_get_max_segments(host)); > - > - if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) { > - block_size = card->ext_csd.data_sector_size; > - WARN_ON(block_size != 512 && block_size != 4096); > - } > + lim.bounce = BLK_BOUNCE_HIGH; > + > + lim.max_hw_sectors = min(host->max_blk_count, host->max_req_size / 512); > + > + if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) > + lim.logical_block_size = card->ext_csd.data_sector_size; > + else > + lim.logical_block_size = 512; > + > + WARN_ON_ONCE(lim.logical_block_size != 512 && > + lim.logical_block_size != 4096); > > - blk_queue_logical_block_size(mq->queue, block_size); > /* > - * After blk_queue_can_use_dma_map_merging() was called with succeed, > - * since it calls blk_queue_virt_boundary(), the mmc should not call > - * both blk_queue_max_segment_size(). > + * Setting a virt_boundary implicity sets a max_segment_size, so try > + * to set the hardware one here. > */ > - if (!host->can_dma_map_merge) > - blk_queue_max_segment_size(mq->queue, > - round_down(host->max_seg_size, block_size)); > + if (host->can_dma_map_merge) { > + lim.virt_boundary_mask = dma_get_merge_boundary(mmc_dev(host)); > + lim.max_segments = MMC_DMA_MAP_MERGE_SEGMENTS; > + } else { > + lim.max_segment_size = > + round_down(host->max_seg_size, lim.logical_block_size); > + lim.max_segments = host->max_segs; > + } > + > + disk = blk_mq_alloc_disk(&mq->tag_set, &lim, mq); > + if (IS_ERR(disk)) > + return disk; > + mq->queue = disk->queue; > + > + if (mmc_host_is_spi(host) && host->use_spi_crc) > + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); > + blk_queue_rq_timeout(mq->queue, 60 * HZ); > + > + blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); > + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); > > dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue)); > > @@ -386,6 +401,7 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) > init_waitqueue_head(&mq->wait); > > mmc_crypto_setup_queue(mq->queue, host); > + return disk; > } > > static inline bool mmc_merge_capable(struct mmc_host *host) > @@ -447,18 +463,9 @@ struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) > return ERR_PTR(ret); > > > - disk = blk_mq_alloc_disk(&mq->tag_set, NULL, mq); > - if (IS_ERR(disk)) { > + disk = mmc_alloc_disk(mq, card); > + if (IS_ERR(disk)) > blk_mq_free_tag_set(&mq->tag_set); > - return disk; > - } > - mq->queue = disk->queue; > - > - if (mmc_host_is_spi(host) && host->use_spi_crc) > - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); > - blk_queue_rq_timeout(mq->queue, 60 * HZ); > - > - mmc_setup_queue(mq, card); > return disk; > } > We have just noticed that since Linux v6.9 was released, that if we build the kernel with 64kB MMU pages, then we see the following WARNING and probe failure ... [ 2.828612] ------------[ cut here ]------------ [ 2.829243] WARNING: CPU: 1 PID: 87 at block/blk-settings.c:206 blk_validate_limits+0x1cc/0x234 [ 2.830417] Modules linked in: [ 2.830963] CPU: 1 PID: 87 Comm: kworker/1:1 Not tainted 6.10.0-rc5 #1 [ 2.831773] Hardware name: NVIDIA Jetson AGX Xavier Developer Kit (DT) [ 2.832538] Workqueue: events_freezable mmc_rescan [ 2.833341] pstate: 80400009 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) [ 2.834263] pc : blk_validate_limits+0x1cc/0x234 [ 2.834927] lr : blk_set_default_limits+0x18/0x24 [ 2.835654] sp : ffff800084c4f730 [ 2.836161] x29: ffff800084c4f730 x28: 0000000000000000 x27: 0000000000000000 [ 2.837448] x26: 0000000000000000 x25: ffff000084130008 x24: 00000000ffffffff [ 2.838607] x23: ffff800082d84000 x22: ffff8000828b79c8 x21: ffff800084c4f8b8 [ 2.839754] x20: 0000000000000008 x19: ffff000084170000 x18: 0000000000000000 [ 2.840831] x17: 00000000900163fe x16: 000000008802ed71 x15: 00000000000003e8 [ 2.842228] x14: 0000000000000002 x13: 00000000000372ef x12: 0000000000002bb0 [ 2.843536] x11: 0000000000000000 x10: 0000000000004400 x9 : 0000000000000000 [ 2.847832] x8 : ffff0000841703a0 x7 : 0000000000000000 x6 : 0000000000000003 [ 2.855075] x5 : ffff000081279e00 x4 : 0000000000000000 x3 : 0000000000000200 [ 2.862140] x2 : 000000000000ffff x1 : 000000000000fe00 x0 : ffff800084c4f8b8 [ 2.869633] Call trace: [ 2.872055] blk_validate_limits+0x1cc/0x234 [ 2.876278] blk_alloc_queue+0x7c/0x260 [ 2.880038] blk_mq_alloc_queue+0x54/0xbc [ 2.884504] __blk_mq_alloc_disk+0x20/0x190 [ 2.888266] mmc_alloc_disk+0xbc/0x250 [ 2.892062] mmc_init_queue+0xe8/0x114 [ 2.896091] mmc_blk_alloc_req+0xb4/0x374 [ 2.900237] mmc_blk_probe+0x1d4/0x650 [ 2.904194] mmc_bus_probe+0x20/0x2c [ 2.907434] really_probe+0xbc/0x2a4 [ 2.911391] __driver_probe_device+0x78/0x12c [ 2.915627] driver_probe_device+0x40/0x160 [ 2.919587] __device_attach_driver+0xb8/0x134 [ 2.923881] bus_for_each_drv+0x80/0xdc [ 2.927664] __device_attach+0xa8/0x1b0 [ 2.931578] device_initial_probe+0x14/0x20 [ 2.935845] bus_probe_device+0xa8/0xac [ 2.939606] device_add+0x590/0x754 [ 2.942864] mmc_add_card+0x238/0x2dc [ 2.946691] mmc_attach_mmc+0x12c/0x174 [ 2.950494] mmc_rescan+0x27c/0x318 [ 2.954172] process_one_work+0x154/0x298 [ 2.957919] worker_thread+0x304/0x408 [ 2.961931] kthread+0x118/0x11c [ 2.965187] ret_from_fork+0x10/0x20 [ 2.968835] ---[ end trace 0000000000000000 ]--- [ 2.974440] mmcblk mmc0:0001: probe with driver mmcblk failed with error -22 Bisect points to this commit. This is very similar to the issue Geert reported [0] but we are still seeing this on the latest mainline with v6.10-rc5. When building with 4kB MMU pages we don't see this issue when testing on the same hardware. Let me know if you have any thoughts? Thanks Jon [0] https://lore.kernel.org/linux-mmc/CAMuHMdWV4nWQHUpBKM2gHWeH9j9c0Di4bhg+F4-SAPEAmZuNSA@mail.gmail.com/
On Thu, Jun 27, 2024 at 10:43:24AM +0100, Jon Hunter wrote: > We have just noticed that since Linux v6.9 was released, that if we > build the kernel with 64kB MMU pages, then we see the following WARNING > and probe failure ... The old code upgraded the limits to the PAGE_SIZE for this case after issunig a warning. Your driver probably incorrectly advertised the lower max_segment_size. Try setting it to 64k. I would have sent you a patch for that, but I can't see what mmc host driver you are using.
On 27/06/2024 10:49, Christoph Hellwig wrote: > On Thu, Jun 27, 2024 at 10:43:24AM +0100, Jon Hunter wrote: >> We have just noticed that since Linux v6.9 was released, that if we >> build the kernel with 64kB MMU pages, then we see the following WARNING >> and probe failure ... > > The old code upgraded the limits to the PAGE_SIZE for this case after > issunig a warning. Your driver probably incorrectly advertised the > lower max_segment_size. Try setting it to 64k. I would have sent you > a patch for that, but I can't see what mmc host driver you are using. We are using the sdhci-tegra.c driver. I don't see it set in there, but I see references to max_seg_size in the main sdhci.c driver. Thanks, Jon
diff --git a/drivers/mmc/core/queue.c b/drivers/mmc/core/queue.c index 67ad186d132a69..2ae60d208cdf1e 100644 --- a/drivers/mmc/core/queue.c +++ b/drivers/mmc/core/queue.c @@ -174,8 +174,8 @@ static struct scatterlist *mmc_alloc_sg(unsigned short sg_len, gfp_t gfp) return sg; } -static void mmc_queue_setup_discard(struct request_queue *q, - struct mmc_card *card) +static void mmc_queue_setup_discard(struct mmc_card *card, + struct queue_limits *lim) { unsigned max_discard; @@ -183,15 +183,17 @@ static void mmc_queue_setup_discard(struct request_queue *q, if (!max_discard) return; - blk_queue_max_discard_sectors(q, max_discard); - q->limits.discard_granularity = card->pref_erase << 9; - /* granularity must not be greater than max. discard */ - if (card->pref_erase > max_discard) - q->limits.discard_granularity = SECTOR_SIZE; + lim->max_hw_discard_sectors = max_discard; if (mmc_can_secure_erase_trim(card)) - blk_queue_max_secure_erase_sectors(q, max_discard); + lim->max_secure_erase_sectors = max_discard; if (mmc_can_trim(card) && card->erased_byte == 0) - blk_queue_max_write_zeroes_sectors(q, max_discard); + lim->max_write_zeroes_sectors = max_discard; + + /* granularity must not be greater than max. discard */ + if (card->pref_erase > max_discard) + lim->discard_granularity = SECTOR_SIZE; + else + lim->discard_granularity = card->pref_erase << 9; } static unsigned short mmc_get_max_segments(struct mmc_host *host) @@ -341,40 +343,53 @@ static const struct blk_mq_ops mmc_mq_ops = { .timeout = mmc_mq_timed_out, }; -static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) +static struct gendisk *mmc_alloc_disk(struct mmc_queue *mq, + struct mmc_card *card) { struct mmc_host *host = card->host; - unsigned block_size = 512; + struct queue_limits lim = { }; + struct gendisk *disk; - blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); - blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); if (mmc_can_erase(card)) - mmc_queue_setup_discard(mq->queue, card); + mmc_queue_setup_discard(card, &lim); if (!mmc_dev(host)->dma_mask || !*mmc_dev(host)->dma_mask) - blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH); - blk_queue_max_hw_sectors(mq->queue, - min(host->max_blk_count, host->max_req_size / 512)); - if (host->can_dma_map_merge) - WARN(!blk_queue_can_use_dma_map_merging(mq->queue, - mmc_dev(host)), - "merging was advertised but not possible"); - blk_queue_max_segments(mq->queue, mmc_get_max_segments(host)); - - if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) { - block_size = card->ext_csd.data_sector_size; - WARN_ON(block_size != 512 && block_size != 4096); - } + lim.bounce = BLK_BOUNCE_HIGH; + + lim.max_hw_sectors = min(host->max_blk_count, host->max_req_size / 512); + + if (mmc_card_mmc(card) && card->ext_csd.data_sector_size) + lim.logical_block_size = card->ext_csd.data_sector_size; + else + lim.logical_block_size = 512; + + WARN_ON_ONCE(lim.logical_block_size != 512 && + lim.logical_block_size != 4096); - blk_queue_logical_block_size(mq->queue, block_size); /* - * After blk_queue_can_use_dma_map_merging() was called with succeed, - * since it calls blk_queue_virt_boundary(), the mmc should not call - * both blk_queue_max_segment_size(). + * Setting a virt_boundary implicity sets a max_segment_size, so try + * to set the hardware one here. */ - if (!host->can_dma_map_merge) - blk_queue_max_segment_size(mq->queue, - round_down(host->max_seg_size, block_size)); + if (host->can_dma_map_merge) { + lim.virt_boundary_mask = dma_get_merge_boundary(mmc_dev(host)); + lim.max_segments = MMC_DMA_MAP_MERGE_SEGMENTS; + } else { + lim.max_segment_size = + round_down(host->max_seg_size, lim.logical_block_size); + lim.max_segments = host->max_segs; + } + + disk = blk_mq_alloc_disk(&mq->tag_set, &lim, mq); + if (IS_ERR(disk)) + return disk; + mq->queue = disk->queue; + + if (mmc_host_is_spi(host) && host->use_spi_crc) + blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); + blk_queue_rq_timeout(mq->queue, 60 * HZ); + + blk_queue_flag_set(QUEUE_FLAG_NONROT, mq->queue); + blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, mq->queue); dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue)); @@ -386,6 +401,7 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) init_waitqueue_head(&mq->wait); mmc_crypto_setup_queue(mq->queue, host); + return disk; } static inline bool mmc_merge_capable(struct mmc_host *host) @@ -447,18 +463,9 @@ struct gendisk *mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card) return ERR_PTR(ret); - disk = blk_mq_alloc_disk(&mq->tag_set, NULL, mq); - if (IS_ERR(disk)) { + disk = mmc_alloc_disk(mq, card); + if (IS_ERR(disk)) blk_mq_free_tag_set(&mq->tag_set); - return disk; - } - mq->queue = disk->queue; - - if (mmc_host_is_spi(host) && host->use_spi_crc) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, mq->queue); - blk_queue_rq_timeout(mq->queue, 60 * HZ); - - mmc_setup_queue(mq, card); return disk; }
Pass the queue limit set at initialization time directly to blk_mq_alloc_disk instead of updating it right after the allocation. This requires refactoring the code a bit so that what was mmc_setup_queue before also allocates the gendisk now and actually sets all limits. Signed-off-by: Christoph Hellwig <hch@lst.de> --- drivers/mmc/core/queue.c | 97 +++++++++++++++++++++------------------- 1 file changed, 52 insertions(+), 45 deletions(-)