diff mbox series

[2/2] Remove bdi_congested() and wb_congested() and related functions

Message ID 163936886727.23860.5245364396572576756.stgit@noble.brown (mailing list archive)
State New
Headers show
Series Remove some 'congested' tests | expand

Commit Message

NeilBrown Dec. 13, 2021, 4:14 a.m. UTC
These functions are no longer useful as the only bdis that report
congestion are in ceph, fuse, and nfs.  None of those bdis can be the
target of the calls in drbd, ext2, nilfs2, or xfs.

Removing the test on bdi_write_contested() in current_may_throttle()
could cause a small change in behaviour, but only when PF_LOCAL_THROTTLE
is set.

So replace the calls by 'false' and simplify the code - and remove the
functions.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 drivers/block/drbd/drbd_int.h |    3 ---
 drivers/block/drbd/drbd_req.c |    3 +--
 fs/ext2/ialloc.c              |    2 --
 fs/nilfs2/segbuf.c            |   11 -----------
 fs/xfs/xfs_buf.c              |    3 ---
 include/linux/backing-dev.h   |   26 --------------------------
 mm/vmscan.c                   |    4 +---
 7 files changed, 2 insertions(+), 50 deletions(-)

Comments

Dave Chinner Dec. 13, 2021, 5:07 a.m. UTC | #1
On Mon, Dec 13, 2021 at 03:14:27PM +1100, NeilBrown wrote:
> These functions are no longer useful as the only bdis that report
> congestion are in ceph, fuse, and nfs.  None of those bdis can be the
> target of the calls in drbd, ext2, nilfs2, or xfs.
> 
> Removing the test on bdi_write_contested() in current_may_throttle()
> could cause a small change in behaviour, but only when PF_LOCAL_THROTTLE
> is set.
> 
> So replace the calls by 'false' and simplify the code - and remove the
> functions.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
....
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 631c5a61d89b..22f73b3e888e 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -843,9 +843,6 @@ xfs_buf_readahead_map(
>  {
>  	struct xfs_buf		*bp;
>  
> -	if (bdi_read_congested(target->bt_bdev->bd_disk->bdi))
> -		return;

Ok, but this isn't a "throttle writeback" test here - it's trying to
avoid having speculative readahead blocking on a full request queue
instead of just skipping the readahead IO. i.e. prevent readahead
thrashing and/or adding unnecessary read load when we already have a
full read queue...

So what is the replacement for that? We want to skip the entire
buffer lookup/setup/read overhead if we're likely to block on IO
submission - is there anything we can use to do this these days?

Cheers,

Dave.
kernel test robot Dec. 13, 2021, 5:56 a.m. UTC | #2
Hi NeilBrown,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on hnaz-mm/master]
[also build test WARNING on axboe-block/for-next konis-nilfs2/upstream xfs-linux/for-next linus/master v5.16-rc5]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/NeilBrown/Remove-some-congested-tests/20211213-121653
base:   https://github.com/hnaz/linux-mm master
config: arc-randconfig-r015-20211213 (https://download.01.org/0day-ci/archive/20211213/202112131323.fj31o6EV-lkp@intel.com/config)
compiler: arceb-elf-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/41802b6debbde3d5553a8067ba2deb2035e6da6e
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review NeilBrown/Remove-some-congested-tests/20211213-121653
        git checkout 41802b6debbde3d5553a8067ba2deb2035e6da6e
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=arc SHELL=/bin/bash fs/ext2/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   fs/ext2/ialloc.c: In function 'ext2_preread_inode':
>> fs/ext2/ialloc.c:173:34: warning: variable 'bdi' set but not used [-Wunused-but-set-variable]
     173 |         struct backing_dev_info *bdi;
         |                                  ^~~


vim +/bdi +173 fs/ext2/ialloc.c

^1da177e4c3f41 Linus Torvalds    2005-04-16  154  
^1da177e4c3f41 Linus Torvalds    2005-04-16  155  /*
^1da177e4c3f41 Linus Torvalds    2005-04-16  156   * We perform asynchronous prereading of the new inode's inode block when
^1da177e4c3f41 Linus Torvalds    2005-04-16  157   * we create the inode, in the expectation that the inode will be written
^1da177e4c3f41 Linus Torvalds    2005-04-16  158   * back soon.  There are two reasons:
^1da177e4c3f41 Linus Torvalds    2005-04-16  159   *
^1da177e4c3f41 Linus Torvalds    2005-04-16  160   * - When creating a large number of files, the async prereads will be
^1da177e4c3f41 Linus Torvalds    2005-04-16  161   *   nicely merged into large reads
^1da177e4c3f41 Linus Torvalds    2005-04-16  162   * - When writing out a large number of inodes, we don't need to keep on
^1da177e4c3f41 Linus Torvalds    2005-04-16  163   *   stalling the writes while we read the inode block.
^1da177e4c3f41 Linus Torvalds    2005-04-16  164   *
^1da177e4c3f41 Linus Torvalds    2005-04-16  165   * FIXME: ext2_get_group_desc() needs to be simplified.
^1da177e4c3f41 Linus Torvalds    2005-04-16  166   */
^1da177e4c3f41 Linus Torvalds    2005-04-16  167  static void ext2_preread_inode(struct inode *inode)
^1da177e4c3f41 Linus Torvalds    2005-04-16  168  {
^1da177e4c3f41 Linus Torvalds    2005-04-16  169  	unsigned long block_group;
^1da177e4c3f41 Linus Torvalds    2005-04-16  170  	unsigned long offset;
^1da177e4c3f41 Linus Torvalds    2005-04-16  171  	unsigned long block;
^1da177e4c3f41 Linus Torvalds    2005-04-16  172  	struct ext2_group_desc * gdp;
^1da177e4c3f41 Linus Torvalds    2005-04-16 @173  	struct backing_dev_info *bdi;
^1da177e4c3f41 Linus Torvalds    2005-04-16  174  
de1414a654e66b Christoph Hellwig 2015-01-14  175  	bdi = inode_to_bdi(inode);
^1da177e4c3f41 Linus Torvalds    2005-04-16  176  
^1da177e4c3f41 Linus Torvalds    2005-04-16  177  	block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
ef2fb67989d30f Eric Sandeen      2007-10-16  178  	gdp = ext2_get_group_desc(inode->i_sb, block_group, NULL);
^1da177e4c3f41 Linus Torvalds    2005-04-16  179  	if (gdp == NULL)
^1da177e4c3f41 Linus Torvalds    2005-04-16  180  		return;
^1da177e4c3f41 Linus Torvalds    2005-04-16  181  
^1da177e4c3f41 Linus Torvalds    2005-04-16  182  	/*
^1da177e4c3f41 Linus Torvalds    2005-04-16  183  	 * Figure out the offset within the block group inode table
^1da177e4c3f41 Linus Torvalds    2005-04-16  184  	 */
^1da177e4c3f41 Linus Torvalds    2005-04-16  185  	offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) *
^1da177e4c3f41 Linus Torvalds    2005-04-16  186  				EXT2_INODE_SIZE(inode->i_sb);
^1da177e4c3f41 Linus Torvalds    2005-04-16  187  	block = le32_to_cpu(gdp->bg_inode_table) +
^1da177e4c3f41 Linus Torvalds    2005-04-16  188  				(offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb));
^1da177e4c3f41 Linus Torvalds    2005-04-16  189  	sb_breadahead(inode->i_sb, block);
^1da177e4c3f41 Linus Torvalds    2005-04-16  190  }
^1da177e4c3f41 Linus Torvalds    2005-04-16  191  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
NeilBrown Dec. 13, 2021, 7:04 a.m. UTC | #3
On Mon, 13 Dec 2021, Dave Chinner wrote:
> On Mon, Dec 13, 2021 at 03:14:27PM +1100, NeilBrown wrote:
> > These functions are no longer useful as the only bdis that report
> > congestion are in ceph, fuse, and nfs.  None of those bdis can be the
> > target of the calls in drbd, ext2, nilfs2, or xfs.
> > 
> > Removing the test on bdi_write_contested() in current_may_throttle()
> > could cause a small change in behaviour, but only when PF_LOCAL_THROTTLE
> > is set.
> > 
> > So replace the calls by 'false' and simplify the code - and remove the
> > functions.
> > 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> ....
> > diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> > index 631c5a61d89b..22f73b3e888e 100644
> > --- a/fs/xfs/xfs_buf.c
> > +++ b/fs/xfs/xfs_buf.c
> > @@ -843,9 +843,6 @@ xfs_buf_readahead_map(
> >  {
> >  	struct xfs_buf		*bp;
> >  
> > -	if (bdi_read_congested(target->bt_bdev->bd_disk->bdi))
> > -		return;
> 
> Ok, but this isn't a "throttle writeback" test here - it's trying to
> avoid having speculative readahead blocking on a full request queue
> instead of just skipping the readahead IO. i.e. prevent readahead
> thrashing and/or adding unnecessary read load when we already have a
> full read queue...
> 
> So what is the replacement for that? We want to skip the entire
> buffer lookup/setup/read overhead if we're likely to block on IO
> submission - is there anything we can use to do this these days?

I don't think there is a concept of a "full read queue" any more.
There are things that can block an IO submission though.
There is allocation of the bio from a mempool, and there is
rq_qos_throttle, and there are probably other places where submission
can block.  I don't think you can tell in advance if a submission is
likely to block.

I think the idea is that the top level of the submission stack should
rate-limit based on the estimated throughput of the stack.  I think
write-back does this.  I don't know about read-ahead.

NeilBrown
diff mbox series

Patch

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index f27d5b0f9a0b..f804b1bfb3e6 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -638,9 +638,6 @@  enum {
 	STATE_SENT,		/* Do not change state/UUIDs while this is set */
 	CALLBACK_PENDING,	/* Whether we have a call_usermodehelper(, UMH_WAIT_PROC)
 				 * pending, from drbd worker context.
-				 * If set, bdi_write_congested() returns true,
-				 * so shrink_page_list() would not recurse into,
-				 * and potentially deadlock on, this drbd worker.
 				 */
 	DISCONNECT_SENT,
 
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 3235532ae077..2e5fb7e442e3 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -909,8 +909,7 @@  static bool remote_due_to_read_balancing(struct drbd_device *device, sector_t se
 
 	switch (rbm) {
 	case RB_CONGESTED_REMOTE:
-		return bdi_read_congested(
-			device->ldev->backing_bdev->bd_disk->bdi);
+		return 0;
 	case RB_LEAST_PENDING:
 		return atomic_read(&device->local_cnt) >
 			atomic_read(&device->ap_pending_cnt) + atomic_read(&device->rs_pending_cnt);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index df14e750e9fe..d632764da240 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -173,8 +173,6 @@  static void ext2_preread_inode(struct inode *inode)
 	struct backing_dev_info *bdi;
 
 	bdi = inode_to_bdi(inode);
-	if (bdi_rw_congested(bdi))
-		return;
 
 	block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
 	gdp = ext2_get_group_desc(inode->i_sb, block_group, NULL);
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 43287b0d3e9b..d1ebc9da7130 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -343,17 +343,6 @@  static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
 	struct bio *bio = wi->bio;
 	int err;
 
-	if (segbuf->sb_nbio > 0 &&
-	    bdi_write_congested(segbuf->sb_super->s_bdi)) {
-		wait_for_completion(&segbuf->sb_bio_event);
-		segbuf->sb_nbio--;
-		if (unlikely(atomic_read(&segbuf->sb_err))) {
-			bio_put(bio);
-			err = -EIO;
-			goto failed;
-		}
-	}
-
 	bio->bi_end_io = nilfs_end_bio_write;
 	bio->bi_private = segbuf;
 	bio_set_op_attrs(bio, mode, mode_flags);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 631c5a61d89b..22f73b3e888e 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -843,9 +843,6 @@  xfs_buf_readahead_map(
 {
 	struct xfs_buf		*bp;
 
-	if (bdi_read_congested(target->bt_bdev->bd_disk->bdi))
-		return;
-
 	xfs_buf_read_map(target, map, nmaps,
 		     XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops,
 		     __this_address);
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 860b675c2929..2d764566280c 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -135,11 +135,6 @@  static inline bool writeback_in_progress(struct bdi_writeback *wb)
 
 struct backing_dev_info *inode_to_bdi(struct inode *inode);
 
-static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
-{
-	return wb->congested & cong_bits;
-}
-
 long congestion_wait(int sync, long timeout);
 
 static inline bool mapping_can_writeback(struct address_space *mapping)
@@ -391,27 +386,6 @@  static inline void wb_blkcg_offline(struct blkcg *blkcg)
 
 #endif	/* CONFIG_CGROUP_WRITEBACK */
 
-static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits)
-{
-	return wb_congested(&bdi->wb, cong_bits);
-}
-
-static inline int bdi_read_congested(struct backing_dev_info *bdi)
-{
-	return bdi_congested(bdi, 1 << WB_sync_congested);
-}
-
-static inline int bdi_write_congested(struct backing_dev_info *bdi)
-{
-	return bdi_congested(bdi, 1 << WB_async_congested);
-}
-
-static inline int bdi_rw_congested(struct backing_dev_info *bdi)
-{
-	return bdi_congested(bdi, (1 << WB_sync_congested) |
-				  (1 << WB_async_congested));
-}
-
 const char *bdi_dev_name(struct backing_dev_info *bdi);
 
 #endif	/* _LINUX_BACKING_DEV_H */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 540aa0ea67ff..f46a7a17dc49 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2321,9 +2321,7 @@  static unsigned int move_pages_to_lru(struct lruvec *lruvec,
  */
 static int current_may_throttle(void)
 {
-	return !(current->flags & PF_LOCAL_THROTTLE) ||
-		current->backing_dev_info == NULL ||
-		bdi_write_congested(current->backing_dev_info);
+	return !(current->flags & PF_LOCAL_THROTTLE);
 }
 
 /*