diff mbox series

[09/23] MM: submit multipage reads for SWP_FS_OPS swap-space

Message ID 164299611278.26253.14950274629759580371.stgit@noble.brown (mailing list archive)
State New
Headers show
Series Repair SWAP-over_NFS | expand

Commit Message

NeilBrown Jan. 24, 2022, 3:48 a.m. UTC
swap_readpage() is given one page at a time, but maybe called repeatedly
in succession.
For block-device swapspace, the blk_plug functionality allows the
multiple pages to be combined together at lower layers.
That cannot be used for SWP_FS_OPS as blk_plug may not exist - it is
only active when CONFIG_BLOCK=y.  Consequently all swap reads over NFS
are single page reads.

With this patch we pass in a pointer-to-pointer when swap_readpage can
store state between calls - much like the effect of blk_plug.  After
calling swap_readpage() some number of times, the state will be passed
to swap_read_unplug() which can submit the combined request.

Some caller currently call blk_finish_plug() *before* the final call to
swap_readpage(), so the last page cannot be included.  This patch moves
blk_finish_plug() to after the last call, and calls swap_read_unplug()
there too.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 mm/madvise.c    |    8 +++-
 mm/memory.c     |    2 +
 mm/page_io.c    |  102 +++++++++++++++++++++++++++++++++++--------------------
 mm/swap.h       |   16 +++++++--
 mm/swap_state.c |   19 +++++++---
 5 files changed, 98 insertions(+), 49 deletions(-)

Comments

kernel test robot Jan. 24, 2022, 8:25 a.m. UTC | #1
Hi NeilBrown,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.17-rc1 next-20220124]
[cannot apply to trondmy-nfs/linux-next cifs/for-next hnaz-mm/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/NeilBrown/Repair-SWAP-over_NFS/20220124-115716
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git dd81e1c7d5fb126e5fbc5c9e334d7b3ec29a16a0
config: powerpc-allnoconfig (https://download.01.org/0day-ci/archive/20220124/202201241613.8J5z5arQ-lkp@intel.com/config)
compiler: powerpc-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/63bff668aa0537d7ccef9ed428809fc16c1a6b6c
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review NeilBrown/Repair-SWAP-over_NFS/20220124-115716
        git checkout 63bff668aa0537d7ccef9ed428809fc16c1a6b6c
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=powerpc SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All error/warnings (new ones prefixed by >>):

   In file included from mm/vmscan.c:61:
   mm/swap.h:66:40: warning: 'struct swap_iocb' declared inside parameter list will not be visible outside of this definition or declaration
      66 |                                 struct swap_iocb **plug);
         |                                        ^~~~~~~~~
>> mm/swap.h:67:1: error: expected identifier or '(' before '{' token
      67 | {
         | ^
   mm/swap.h:65:19: warning: 'swap_readpage' declared 'static' but never defined [-Wunused-function]
      65 | static inline int swap_readpage(struct page *page, bool do_poll,
         |                   ^~~~~~~~~~~~~
--
   In file included from mm/memory.c:89:
   mm/swap.h:66:40: warning: 'struct swap_iocb' declared inside parameter list will not be visible outside of this definition or declaration
      66 |                                 struct swap_iocb **plug);
         |                                        ^~~~~~~~~
>> mm/swap.h:67:1: error: expected identifier or '(' before '{' token
      67 | {
         | ^
>> mm/swap.h:65:19: warning: 'swap_readpage' used but never defined
      65 | static inline int swap_readpage(struct page *page, bool do_poll,
         |                   ^~~~~~~~~~~~~
--
   In file included from mm/page_alloc.c:84:
   mm/swap.h:66:40: warning: 'struct swap_iocb' declared inside parameter list will not be visible outside of this definition or declaration
      66 |                                 struct swap_iocb **plug);
         |                                        ^~~~~~~~~
>> mm/swap.h:67:1: error: expected identifier or '(' before '{' token
      67 | {
         | ^
   mm/page_alloc.c:3821:15: warning: no previous prototype for 'should_fail_alloc_page' [-Wmissing-prototypes]
    3821 | noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
         |               ^~~~~~~~~~~~~~~~~~~~~~
   In file included from mm/page_alloc.c:84:
   mm/swap.h:65:19: warning: 'swap_readpage' declared 'static' but never defined [-Wunused-function]
      65 | static inline int swap_readpage(struct page *page, bool do_poll,
         |                   ^~~~~~~~~~~~~


vim +67 mm/swap.h

50dceef273a619 NeilBrown 2022-01-24  45  
50dceef273a619 NeilBrown 2022-01-24  46  struct page *read_swap_cache_async(swp_entry_t, gfp_t,
50dceef273a619 NeilBrown 2022-01-24  47  				   struct vm_area_struct *vma,
50dceef273a619 NeilBrown 2022-01-24  48  				   unsigned long addr,
63bff668aa0537 NeilBrown 2022-01-24  49  				   bool do_poll,
63bff668aa0537 NeilBrown 2022-01-24  50  				   struct swap_iocb **plug);
50dceef273a619 NeilBrown 2022-01-24  51  struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
50dceef273a619 NeilBrown 2022-01-24  52  				     struct vm_area_struct *vma,
50dceef273a619 NeilBrown 2022-01-24  53  				     unsigned long addr,
50dceef273a619 NeilBrown 2022-01-24  54  				     bool *new_page_allocated);
50dceef273a619 NeilBrown 2022-01-24  55  struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
50dceef273a619 NeilBrown 2022-01-24  56  				    struct vm_fault *vmf);
50dceef273a619 NeilBrown 2022-01-24  57  struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
50dceef273a619 NeilBrown 2022-01-24  58  			      struct vm_fault *vmf);
50dceef273a619 NeilBrown 2022-01-24  59  
12cf545fe71035 NeilBrown 2022-01-24  60  static inline unsigned int page_swap_flags(struct page *page)
12cf545fe71035 NeilBrown 2022-01-24  61  {
12cf545fe71035 NeilBrown 2022-01-24  62  	return page_swap_info(page)->flags;
12cf545fe71035 NeilBrown 2022-01-24  63  }
50dceef273a619 NeilBrown 2022-01-24  64  #else /* CONFIG_SWAP */
63bff668aa0537 NeilBrown 2022-01-24 @65  static inline int swap_readpage(struct page *page, bool do_poll,
63bff668aa0537 NeilBrown 2022-01-24 @66  				struct swap_iocb **plug);
50dceef273a619 NeilBrown 2022-01-24 @67  {
50dceef273a619 NeilBrown 2022-01-24  68  	return 0;
50dceef273a619 NeilBrown 2022-01-24  69  }
50dceef273a619 NeilBrown 2022-01-24  70  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Christoph Hellwig Jan. 24, 2022, 8:52 a.m. UTC | #2
On Mon, Jan 24, 2022 at 02:48:32PM +1100, NeilBrown wrote:
> swap_readpage() is given one page at a time, but maybe called repeatedly
> in succession.
> For block-device swapspace, the blk_plug functionality allows the
> multiple pages to be combined together at lower layers.
> That cannot be used for SWP_FS_OPS as blk_plug may not exist - it is
> only active when CONFIG_BLOCK=y.  Consequently all swap reads over NFS
> are single page reads.
> 
> With this patch we pass in a pointer-to-pointer when swap_readpage can
> store state between calls - much like the effect of blk_plug.  After
> calling swap_readpage() some number of times, the state will be passed
> to swap_read_unplug() which can submit the combined request.
> 
> Some caller currently call blk_finish_plug() *before* the final call to
> swap_readpage(), so the last page cannot be included.  This patch moves
> blk_finish_plug() to after the last call, and calls swap_read_unplug()
> there too.

Buildbot complais that we might need a forward declaration of
struct swap_iocb, but otherwise this looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
kernel test robot Jan. 24, 2022, 9:27 a.m. UTC | #3
Hi NeilBrown,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v5.17-rc1 next-20220124]
[cannot apply to trondmy-nfs/linux-next cifs/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/NeilBrown/Repair-SWAP-over_NFS/20220124-115716
base:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git dd81e1c7d5fb126e5fbc5c9e334d7b3ec29a16a0
config: i386-randconfig-a011-20220124 (https://download.01.org/0day-ci/archive/20220124/202201241747.X9gXaaeP-lkp@intel.com/config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 9006bf424847bf91f0a624ffc27ad165c7b804c4)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/63bff668aa0537d7ccef9ed428809fc16c1a6b6c
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review NeilBrown/Repair-SWAP-over_NFS/20220124-115716
        git checkout 63bff668aa0537d7ccef9ed428809fc16c1a6b6c
        # save the config file to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=i386 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from mm/vmscan.c:61:
   mm/swap.h:66:12: warning: declaration of 'struct swap_iocb' will not be visible outside of this function [-Wvisibility]
                                   struct swap_iocb **plug);
                                          ^
>> mm/swap.h:67:1: error: expected identifier or '('
   {
   ^
   1 warning and 1 error generated.
--
   In file included from mm/shmem.c:41:
   mm/swap.h:66:12: warning: declaration of 'struct swap_iocb' will not be visible outside of this function [-Wvisibility]
                                   struct swap_iocb **plug);
                                          ^
>> mm/swap.h:67:1: error: expected identifier or '('
   {
   ^
   In file included from mm/shmem.c:56:
   include/linux/mman.h:158:9: warning: division by zero is undefined [-Wdivision-by-zero]
                  _calc_vm_trans(flags, MAP_SYNC,       VM_SYNC      ) |
                  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/mman.h:136:21: note: expanded from macro '_calc_vm_trans'
      : ((x) & (bit1)) / ((bit1) / (bit2))))
                       ^ ~~~~~~~~~~~~~~~~~
   2 warnings and 1 error generated.
--
   In file included from mm/page_alloc.c:84:
   mm/swap.h:66:12: warning: declaration of 'struct swap_iocb' will not be visible outside of this function [-Wvisibility]
                                   struct swap_iocb **plug);
                                          ^
>> mm/swap.h:67:1: error: expected identifier or '('
   {
   ^
   mm/page_alloc.c:3821:15: warning: no previous prototype for function 'should_fail_alloc_page' [-Wmissing-prototypes]
   noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
                 ^
   mm/page_alloc.c:3821:10: note: declare 'static' if the function is not intended to be used outside of this translation unit
   noinline bool should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
            ^
            static 
   2 warnings and 1 error generated.


vim +67 mm/swap.h

50dceef273a619 NeilBrown 2022-01-24  45  
50dceef273a619 NeilBrown 2022-01-24  46  struct page *read_swap_cache_async(swp_entry_t, gfp_t,
50dceef273a619 NeilBrown 2022-01-24  47  				   struct vm_area_struct *vma,
50dceef273a619 NeilBrown 2022-01-24  48  				   unsigned long addr,
63bff668aa0537 NeilBrown 2022-01-24  49  				   bool do_poll,
63bff668aa0537 NeilBrown 2022-01-24  50  				   struct swap_iocb **plug);
50dceef273a619 NeilBrown 2022-01-24  51  struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
50dceef273a619 NeilBrown 2022-01-24  52  				     struct vm_area_struct *vma,
50dceef273a619 NeilBrown 2022-01-24  53  				     unsigned long addr,
50dceef273a619 NeilBrown 2022-01-24  54  				     bool *new_page_allocated);
50dceef273a619 NeilBrown 2022-01-24  55  struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
50dceef273a619 NeilBrown 2022-01-24  56  				    struct vm_fault *vmf);
50dceef273a619 NeilBrown 2022-01-24  57  struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
50dceef273a619 NeilBrown 2022-01-24  58  			      struct vm_fault *vmf);
50dceef273a619 NeilBrown 2022-01-24  59  
12cf545fe71035 NeilBrown 2022-01-24  60  static inline unsigned int page_swap_flags(struct page *page)
12cf545fe71035 NeilBrown 2022-01-24  61  {
12cf545fe71035 NeilBrown 2022-01-24  62  	return page_swap_info(page)->flags;
12cf545fe71035 NeilBrown 2022-01-24  63  }
50dceef273a619 NeilBrown 2022-01-24  64  #else /* CONFIG_SWAP */
63bff668aa0537 NeilBrown 2022-01-24  65  static inline int swap_readpage(struct page *page, bool do_poll,
63bff668aa0537 NeilBrown 2022-01-24 @66  				struct swap_iocb **plug);
50dceef273a619 NeilBrown 2022-01-24 @67  {
50dceef273a619 NeilBrown 2022-01-24  68  	return 0;
50dceef273a619 NeilBrown 2022-01-24  69  }
50dceef273a619 NeilBrown 2022-01-24  70  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Mark Hemment Jan. 24, 2022, 1:16 p.m. UTC | #4
On Mon, 24 Jan 2022 at 03:52, NeilBrown <neilb@suse.de> wrote:
>
> swap_readpage() is given one page at a time, but maybe called repeatedly
> in succession.
> For block-device swapspace, the blk_plug functionality allows the
> multiple pages to be combined together at lower layers.
> That cannot be used for SWP_FS_OPS as blk_plug may not exist - it is
> only active when CONFIG_BLOCK=y.  Consequently all swap reads over NFS
> are single page reads.
>
> With this patch we pass in a pointer-to-pointer when swap_readpage can
> store state between calls - much like the effect of blk_plug.  After
> calling swap_readpage() some number of times, the state will be passed
> to swap_read_unplug() which can submit the combined request.
>
> Some caller currently call blk_finish_plug() *before* the final call to
> swap_readpage(), so the last page cannot be included.  This patch moves
> blk_finish_plug() to after the last call, and calls swap_read_unplug()
> there too.
>
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  mm/madvise.c    |    8 +++-
>  mm/memory.c     |    2 +
>  mm/page_io.c    |  102 +++++++++++++++++++++++++++++++++++--------------------
>  mm/swap.h       |   16 +++++++--
>  mm/swap_state.c |   19 +++++++---
>  5 files changed, 98 insertions(+), 49 deletions(-)
>
...
> diff --git a/mm/page_io.c b/mm/page_io.c
> index 6e32ca35d9b6..bcf655d650c8 100644
> --- a/mm/page_io.c
> +++ b/mm/page_io.c
> @@ -390,46 +391,60 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
>  static void sio_read_complete(struct kiocb *iocb, long ret)
>  {
>         struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
> -       struct page *page = sio->bvec.bv_page;
> -
> -       if (ret != 0 && ret != PAGE_SIZE) {
> -               SetPageError(page);
> -               ClearPageUptodate(page);
> -               pr_alert_ratelimited("Read-error on swap-device\n");
> -       } else {
> -               SetPageUptodate(page);
> -               count_vm_event(PSWPIN);
> +       int p;
> +
> +       for (p = 0; p < sio->pages; p++) {
> +               struct page *page = sio->bvec[p].bv_page;
> +               if (ret != 0 && ret != PAGE_SIZE * sio->pages) {
> +                       SetPageError(page);
> +                       ClearPageUptodate(page);
> +                       pr_alert_ratelimited("Read-error on swap-device\n");
> +               } else {
> +                       SetPageUptodate(page);
> +                       count_vm_event(PSWPIN);
> +               }
> +               unlock_page(page);
>         }
> -       unlock_page(page);
>         mempool_free(sio, sio_pool);
>  }

Trivial: on success, could be single call to count_vm_events(PSWPIN,
sio->pages).
Similar comment for PSWPOUT in sio_write_complete()

>
> -static int swap_readpage_fs(struct page *page)
> +static void swap_readpage_fs(struct page *page,
> +                            struct swap_iocb **plug)
>  {
>         struct swap_info_struct *sis = page_swap_info(page);
> -       struct file *swap_file = sis->swap_file;
> -       struct address_space *mapping = swap_file->f_mapping;
> -       struct iov_iter from;
> -       struct swap_iocb *sio;
> +       struct swap_iocb *sio = NULL;
>         loff_t pos = page_file_offset(page);
> -       int ret;
> -
> -       sio = mempool_alloc(sio_pool, GFP_KERNEL);
> -       init_sync_kiocb(&sio->iocb, swap_file);
> -       sio->iocb.ki_pos = pos;
> -       sio->iocb.ki_complete = sio_read_complete;
> -       sio->bvec.bv_page = page;
> -       sio->bvec.bv_len = PAGE_SIZE;
> -       sio->bvec.bv_offset = 0;
>
> -       iov_iter_bvec(&from, READ, &sio->bvec, 1, PAGE_SIZE);
> -       ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
> -       if (ret != -EIOCBQUEUED)
> -               sio_read_complete(&sio->iocb, ret);
> -       return ret;
> +       if (*plug)
> +               sio = *plug;

'plug' can be NULL when called from do_swap_page();
        if (plug && *plug)

Cheers,
Mark
NeilBrown Jan. 26, 2022, 10:04 p.m. UTC | #5
On Tue, 25 Jan 2022, Mark Hemment wrote:
> On Mon, 24 Jan 2022 at 03:52, NeilBrown <neilb@suse.de> wrote:
> >
> > swap_readpage() is given one page at a time, but maybe called repeatedly
> > in succession.
> > For block-device swapspace, the blk_plug functionality allows the
> > multiple pages to be combined together at lower layers.
> > That cannot be used for SWP_FS_OPS as blk_plug may not exist - it is
> > only active when CONFIG_BLOCK=y.  Consequently all swap reads over NFS
> > are single page reads.
> >
> > With this patch we pass in a pointer-to-pointer when swap_readpage can
> > store state between calls - much like the effect of blk_plug.  After
> > calling swap_readpage() some number of times, the state will be passed
> > to swap_read_unplug() which can submit the combined request.
> >
> > Some caller currently call blk_finish_plug() *before* the final call to
> > swap_readpage(), so the last page cannot be included.  This patch moves
> > blk_finish_plug() to after the last call, and calls swap_read_unplug()
> > there too.
> >
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  mm/madvise.c    |    8 +++-
> >  mm/memory.c     |    2 +
> >  mm/page_io.c    |  102 +++++++++++++++++++++++++++++++++++--------------------
> >  mm/swap.h       |   16 +++++++--
> >  mm/swap_state.c |   19 +++++++---
> >  5 files changed, 98 insertions(+), 49 deletions(-)
> >
> ...
> > diff --git a/mm/page_io.c b/mm/page_io.c
> > index 6e32ca35d9b6..bcf655d650c8 100644
> > --- a/mm/page_io.c
> > +++ b/mm/page_io.c
> > @@ -390,46 +391,60 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
> >  static void sio_read_complete(struct kiocb *iocb, long ret)
> >  {
> >         struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
> > -       struct page *page = sio->bvec.bv_page;
> > -
> > -       if (ret != 0 && ret != PAGE_SIZE) {
> > -               SetPageError(page);
> > -               ClearPageUptodate(page);
> > -               pr_alert_ratelimited("Read-error on swap-device\n");
> > -       } else {
> > -               SetPageUptodate(page);
> > -               count_vm_event(PSWPIN);
> > +       int p;
> > +
> > +       for (p = 0; p < sio->pages; p++) {
> > +               struct page *page = sio->bvec[p].bv_page;
> > +               if (ret != 0 && ret != PAGE_SIZE * sio->pages) {
> > +                       SetPageError(page);
> > +                       ClearPageUptodate(page);
> > +                       pr_alert_ratelimited("Read-error on swap-device\n");
> > +               } else {
> > +                       SetPageUptodate(page);
> > +                       count_vm_event(PSWPIN);
> > +               }
> > +               unlock_page(page);
> >         }
> > -       unlock_page(page);
> >         mempool_free(sio, sio_pool);
> >  }
> 
> Trivial: on success, could be single call to count_vm_events(PSWPIN,
> sio->pages).
> Similar comment for PSWPOUT in sio_write_complete()
> 
> >
> > -static int swap_readpage_fs(struct page *page)
> > +static void swap_readpage_fs(struct page *page,
> > +                            struct swap_iocb **plug)
> >  {
> >         struct swap_info_struct *sis = page_swap_info(page);
> > -       struct file *swap_file = sis->swap_file;
> > -       struct address_space *mapping = swap_file->f_mapping;
> > -       struct iov_iter from;
> > -       struct swap_iocb *sio;
> > +       struct swap_iocb *sio = NULL;
> >         loff_t pos = page_file_offset(page);
> > -       int ret;
> > -
> > -       sio = mempool_alloc(sio_pool, GFP_KERNEL);
> > -       init_sync_kiocb(&sio->iocb, swap_file);
> > -       sio->iocb.ki_pos = pos;
> > -       sio->iocb.ki_complete = sio_read_complete;
> > -       sio->bvec.bv_page = page;
> > -       sio->bvec.bv_len = PAGE_SIZE;
> > -       sio->bvec.bv_offset = 0;
> >
> > -       iov_iter_bvec(&from, READ, &sio->bvec, 1, PAGE_SIZE);
> > -       ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
> > -       if (ret != -EIOCBQUEUED)
> > -               sio_read_complete(&sio->iocb, ret);
> > -       return ret;
> > +       if (*plug)
> > +               sio = *plug;
> 
> 'plug' can be NULL when called from do_swap_page();
>         if (plug && *plug)

Thanks for catching that!  I actually want it to be
   if (plug)
       sio = *plug;

which nicely balances the
   if (plug)
       *plug = sio;
at the end of the function.

Thanks,
NeilBrown
Geert Uytterhoeven Feb. 8, 2022, 11:07 a.m. UTC | #6
Hi Neil,

On Mon, Jan 24, 2022 at 5:49 PM NeilBrown <neilb@suse.de> wrote:
> swap_readpage() is given one page at a time, but maybe called repeatedly
> in succession.
> For block-device swapspace, the blk_plug functionality allows the
> multiple pages to be combined together at lower layers.
> That cannot be used for SWP_FS_OPS as blk_plug may not exist - it is
> only active when CONFIG_BLOCK=y.  Consequently all swap reads over NFS
> are single page reads.
>
> With this patch we pass in a pointer-to-pointer when swap_readpage can
> store state between calls - much like the effect of blk_plug.  After
> calling swap_readpage() some number of times, the state will be passed
> to swap_read_unplug() which can submit the combined request.
>
> Some caller currently call blk_finish_plug() *before* the final call to
> swap_readpage(), so the last page cannot be included.  This patch moves
> blk_finish_plug() to after the last call, and calls swap_read_unplug()
> there too.
>
> Signed-off-by: NeilBrown <neilb@suse.de>

Thanks for your patch!

> --- a/mm/swap.h
> +++ b/mm/swap.h

> @@ -53,7 +62,8 @@ static inline unsigned int page_swap_flags(struct page *page)
>         return page_swap_info(page)->flags;
>  }
>  #else /* CONFIG_SWAP */
> -static inline int swap_readpage(struct page *page, bool do_poll)
> +static inline int swap_readpage(struct page *page, bool do_poll,
> +                               struct swap_iocb **plug);

Bogus semicolon.

>  {
>         return 0;
>  }

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
diff mbox series

Patch

diff --git a/mm/madvise.c b/mm/madvise.c
index 1ee4b7583379..2b1ab30af141 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -225,6 +225,7 @@  static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
 	pte_t *orig_pte;
 	struct vm_area_struct *vma = walk->private;
 	unsigned long index;
+	struct swap_iocb *splug = NULL;
 
 	if (pmd_none_or_trans_huge_or_clear_bad(pmd))
 		return 0;
@@ -246,10 +247,11 @@  static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
 			continue;
 
 		page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE,
-							vma, index, false);
+					     vma, index, false, &splug);
 		if (page)
 			put_page(page);
 	}
+	swap_read_unplug(splug);
 
 	return 0;
 }
@@ -265,6 +267,7 @@  static void force_shm_swapin_readahead(struct vm_area_struct *vma,
 	XA_STATE(xas, &mapping->i_pages, linear_page_index(vma, start));
 	pgoff_t end_index = linear_page_index(vma, end + PAGE_SIZE - 1);
 	struct page *page;
+	struct swap_iocb *splug = NULL;
 
 	rcu_read_lock();
 	xas_for_each(&xas, page, end_index) {
@@ -277,13 +280,14 @@  static void force_shm_swapin_readahead(struct vm_area_struct *vma,
 
 		swap = radix_to_swp_entry(page);
 		page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE,
-							NULL, 0, false);
+					     NULL, 0, false, &splug);
 		if (page)
 			put_page(page);
 
 		rcu_read_lock();
 	}
 	rcu_read_unlock();
+	swap_read_unplug(splug);
 
 	lru_add_drain();	/* Push any new pages onto the LRU now */
 }
diff --git a/mm/memory.c b/mm/memory.c
index d25372340107..8bd18c54eaa4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3559,7 +3559,7 @@  vm_fault_t do_swap_page(struct vm_fault *vmf)
 
 				/* To provide entry to swap_readpage() */
 				set_page_private(page, entry.val);
-				swap_readpage(page, true);
+				swap_readpage(page, true, NULL);
 				set_page_private(page, 0);
 			}
 		} else {
diff --git a/mm/page_io.c b/mm/page_io.c
index 6e32ca35d9b6..bcf655d650c8 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -286,7 +286,8 @@  static void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
 
 struct swap_iocb {
 	struct kiocb		iocb;
-	struct bio_vec		bvec;
+	struct bio_vec		bvec[SWAP_CLUSTER_MAX];
+	int			pages;
 };
 static mempool_t *sio_pool;
 
@@ -306,7 +307,7 @@  int sio_pool_init(void)
 static void sio_write_complete(struct kiocb *iocb, long ret)
 {
 	struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
-	struct page *page = sio->bvec.bv_page;
+	struct page *page = sio->bvec[0].bv_page;
 
 	if (ret != 0 && ret != PAGE_SIZE) {
 		/*
@@ -344,10 +345,10 @@  static int swap_writepage_fs(struct page *page, struct writeback_control *wbc)
 	init_sync_kiocb(&sio->iocb, swap_file);
 	sio->iocb.ki_complete = sio_write_complete;
 	sio->iocb.ki_pos = page_file_offset(page);
-	sio->bvec.bv_page = page;
-	sio->bvec.bv_len = PAGE_SIZE;
-	sio->bvec.bv_offset = 0;
-	iov_iter_bvec(&from, WRITE, &sio->bvec, 1, PAGE_SIZE);
+	sio->bvec[0].bv_page = page;
+	sio->bvec[0].bv_len = PAGE_SIZE;
+	sio->bvec[0].bv_offset = 0;
+	iov_iter_bvec(&from, WRITE, &sio->bvec[0], 1, PAGE_SIZE);
 	ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
 	if (ret != -EIOCBQUEUED)
 		sio_write_complete(&sio->iocb, ret);
@@ -390,46 +391,60 @@  int __swap_writepage(struct page *page, struct writeback_control *wbc,
 static void sio_read_complete(struct kiocb *iocb, long ret)
 {
 	struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb);
-	struct page *page = sio->bvec.bv_page;
-
-	if (ret != 0 && ret != PAGE_SIZE) {
-		SetPageError(page);
-		ClearPageUptodate(page);
-		pr_alert_ratelimited("Read-error on swap-device\n");
-	} else {
-		SetPageUptodate(page);
-		count_vm_event(PSWPIN);
+	int p;
+
+	for (p = 0; p < sio->pages; p++) {
+		struct page *page = sio->bvec[p].bv_page;
+		if (ret != 0 && ret != PAGE_SIZE * sio->pages) {
+			SetPageError(page);
+			ClearPageUptodate(page);
+			pr_alert_ratelimited("Read-error on swap-device\n");
+		} else {
+			SetPageUptodate(page);
+			count_vm_event(PSWPIN);
+		}
+		unlock_page(page);
 	}
-	unlock_page(page);
 	mempool_free(sio, sio_pool);
 }
 
-static int swap_readpage_fs(struct page *page)
+static void swap_readpage_fs(struct page *page,
+			     struct swap_iocb **plug)
 {
 	struct swap_info_struct *sis = page_swap_info(page);
-	struct file *swap_file = sis->swap_file;
-	struct address_space *mapping = swap_file->f_mapping;
-	struct iov_iter from;
-	struct swap_iocb *sio;
+	struct swap_iocb *sio = NULL;
 	loff_t pos = page_file_offset(page);
-	int ret;
-
-	sio = mempool_alloc(sio_pool, GFP_KERNEL);
-	init_sync_kiocb(&sio->iocb, swap_file);
-	sio->iocb.ki_pos = pos;
-	sio->iocb.ki_complete = sio_read_complete;
-	sio->bvec.bv_page = page;
-	sio->bvec.bv_len = PAGE_SIZE;
-	sio->bvec.bv_offset = 0;
 
-	iov_iter_bvec(&from, READ, &sio->bvec, 1, PAGE_SIZE);
-	ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
-	if (ret != -EIOCBQUEUED)
-		sio_read_complete(&sio->iocb, ret);
-	return ret;
+	if (*plug)
+		sio = *plug;
+	if (sio) {
+		if (sio->iocb.ki_filp != sis->swap_file ||
+		    sio->iocb.ki_pos + sio->pages * PAGE_SIZE != pos) {
+			swap_read_unplug(sio);
+			sio = NULL;
+		}
+	}
+	if (!sio) {
+		sio = mempool_alloc(sio_pool, GFP_KERNEL);
+		init_sync_kiocb(&sio->iocb, sis->swap_file);
+		sio->iocb.ki_pos = pos;
+		sio->iocb.ki_complete = sio_read_complete;
+		sio->pages = 0;
+	}
+	sio->bvec[sio->pages].bv_page = page;
+	sio->bvec[sio->pages].bv_len = PAGE_SIZE;
+	sio->bvec[sio->pages].bv_offset = 0;
+	sio->pages += 1;
+	if (sio->pages == ARRAY_SIZE(sio->bvec) || !plug) {
+		swap_read_unplug(sio);
+		sio = NULL;
+	}
+	if (plug)
+		*plug = sio;
 }
 
-int swap_readpage(struct page *page, bool synchronous)
+int swap_readpage(struct page *page, bool synchronous,
+		  struct swap_iocb **plug)
 {
 	struct bio *bio;
 	int ret = 0;
@@ -455,7 +470,7 @@  int swap_readpage(struct page *page, bool synchronous)
 	}
 
 	if (data_race(sis->flags & SWP_FS_OPS)) {
-		ret = swap_readpage_fs(page);
+		swap_readpage_fs(page, plug);
 		goto out;
 	}
 
@@ -507,3 +522,16 @@  int swap_readpage(struct page *page, bool synchronous)
 	delayacct_swapin_end();
 	return ret;
 }
+
+void __swap_read_unplug(struct swap_iocb *sio)
+{
+	struct iov_iter from;
+	struct address_space *mapping = sio->iocb.ki_filp->f_mapping;
+	int ret;
+
+	iov_iter_bvec(&from, READ, sio->bvec, sio->pages,
+		      PAGE_SIZE * sio->pages);
+	ret = mapping->a_ops->swap_rw(&sio->iocb, &from);
+	if (ret != -EIOCBQUEUED)
+		sio_read_complete(&sio->iocb, ret);
+}
diff --git a/mm/swap.h b/mm/swap.h
index e8ee995cf8d8..0c79b2478f3f 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -4,7 +4,15 @@ 
 
 /* linux/mm/page_io.c */
 int sio_pool_init(void);
-int swap_readpage(struct page *page, bool do_poll);
+struct swap_iocb;
+int swap_readpage(struct page *page, bool do_poll,
+		  struct swap_iocb **plug);
+void __swap_read_unplug(struct swap_iocb *plug);
+static inline void swap_read_unplug(struct swap_iocb *plug)
+{
+	if (unlikely(plug))
+		__swap_read_unplug(plug);
+}
 int swap_writepage(struct page *page, struct writeback_control *wbc);
 void end_swap_bio_write(struct bio *bio);
 int __swap_writepage(struct page *page, struct writeback_control *wbc,
@@ -38,7 +46,8 @@  struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index);
 struct page *read_swap_cache_async(swp_entry_t, gfp_t,
 				   struct vm_area_struct *vma,
 				   unsigned long addr,
-				   bool do_poll);
+				   bool do_poll,
+				   struct swap_iocb **plug);
 struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
 				     struct vm_area_struct *vma,
 				     unsigned long addr,
@@ -53,7 +62,8 @@  static inline unsigned int page_swap_flags(struct page *page)
 	return page_swap_info(page)->flags;
 }
 #else /* CONFIG_SWAP */
-static inline int swap_readpage(struct page *page, bool do_poll)
+static inline int swap_readpage(struct page *page, bool do_poll,
+				struct swap_iocb **plug);
 {
 	return 0;
 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index d541594be1c3..5cb2c75fa247 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -520,14 +520,16 @@  struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
  * the swap entry is no longer in use.
  */
 struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
-		struct vm_area_struct *vma, unsigned long addr, bool do_poll)
+				   struct vm_area_struct *vma,
+				   unsigned long addr, bool do_poll,
+				   struct swap_iocb **plug)
 {
 	bool page_was_allocated;
 	struct page *retpage = __read_swap_cache_async(entry, gfp_mask,
 			vma, addr, &page_was_allocated);
 
 	if (page_was_allocated)
-		swap_readpage(retpage, do_poll);
+		swap_readpage(retpage, do_poll, plug);
 
 	return retpage;
 }
@@ -621,6 +623,7 @@  struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
 	unsigned long mask;
 	struct swap_info_struct *si = swp_swap_info(entry);
 	struct blk_plug plug;
+	struct swap_iocb *splug = NULL;
 	bool do_poll = true, page_allocated;
 	struct vm_area_struct *vma = vmf->vma;
 	unsigned long addr = vmf->address;
@@ -647,7 +650,7 @@  struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
 		if (!page)
 			continue;
 		if (page_allocated) {
-			swap_readpage(page, false);
+			swap_readpage(page, false, &splug);
 			if (offset != entry_offset) {
 				SetPageReadahead(page);
 				count_vm_event(SWAP_RA);
@@ -658,8 +661,10 @@  struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
 
 	lru_add_drain();	/* Push any new pages onto the LRU now */
 skip:
-	page = read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll);
+	page = read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll,
+				     &splug);
 	blk_finish_plug(&plug);
+	swap_read_unplug(splug);
 	return page;
 }
 
@@ -791,6 +796,7 @@  static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 				       struct vm_fault *vmf)
 {
 	struct blk_plug plug;
+	struct swap_iocb *splug = NULL;
 	struct vm_area_struct *vma = vmf->vma;
 	struct page *page;
 	pte_t *pte, pentry;
@@ -821,7 +827,7 @@  static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 		if (!page)
 			continue;
 		if (page_allocated) {
-			swap_readpage(page, false);
+			swap_readpage(page, false, &splug);
 			if (i != ra_info.offset) {
 				SetPageReadahead(page);
 				count_vm_event(SWAP_RA);
@@ -832,8 +838,9 @@  static struct page *swap_vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 	lru_add_drain();
 skip:
 	page = read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
-				     ra_info.win == 1);
+				     ra_info.win == 1, &splug);
 	blk_finish_plug(&plug);
+	swap_read_unplug(splug);
 	return page;
 }