diff mbox series

[2/2] mm: tlb: add tlb swap entries batch async release

Message ID 20240730114426.511-3-justinjiang@vivo.com (mailing list archive)
State New
Headers show
Series mm: tlb swap entries batch async release | expand

Commit Message

zhiguojiang July 30, 2024, 11:44 a.m. UTC
The main reasons for the prolonged exit of a background process is the
time-consuming release of its swap entries. The proportion of swap memory
occupied by the background process increases with its duration in the
background, and after a period of time, this value can reach 60% or more.
Additionally, the relatively lengthy path for releasing swap entries
further contributes to the longer time required for the background process
to release its swap entries.

In the multiple background applications scenario, when launching a large
memory application such as a camera, system may enter a low memory state,
which will triggers the killing of multiple background processes at the
same time. Due to multiple exiting processes occupying multiple CPUs for
concurrent execution, the current foreground application's CPU resources
are tight and may cause issues such as lagging.

To solve this problem, we have introduced the multiple exiting process
asynchronous swap memory release mechanism, which isolates and caches
swap entries occupied by multiple exit processes, and hands them over
to an asynchronous kworker to complete the release. This allows the
exiting processes to complete quickly and release CPU resources. We have
validated this modification on the products and achieved the expected
benefits.

It offers several benefits:
1. Alleviate the high system cpu load caused by multiple exiting
   processes running simultaneously.
2. Reduce lock competition in swap entry free path by an asynchronous
   kworker instead of multiple exiting processes parallel execution.
3. Release memory occupied by exiting processes more efficiently.

Signed-off-by: Zhiguo Jiang <justinjiang@vivo.com>
---
 include/asm-generic/tlb.h |  50 +++++++
 include/linux/mm_types.h  |  58 ++++++++
 mm/memory.c               |   3 +-
 mm/mmu_gather.c           | 297 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 407 insertions(+), 1 deletion(-)

Comments

kernel test robot July 31, 2024, 9:55 a.m. UTC | #1
Hi Zhiguo,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/Zhiguo-Jiang/mm-move-task_is_dying-to-h-headfile/20240730-215136
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20240730114426.511-3-justinjiang%40vivo.com
patch subject: [PATCH 2/2] mm: tlb: add tlb swap entries batch async release
config: s390-allnoconfig (https://download.01.org/0day-ci/archive/20240731/202407311703.8q8sDQ2p-lkp@intel.com/config)
compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project ccae7b461be339e717d02f99ac857cf0bc7d17fc)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240731/202407311703.8q8sDQ2p-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202407311703.8q8sDQ2p-lkp@intel.com/

All error/warnings (new ones prefixed by >>):

   In file included from arch/s390/mm/init.c:17:
   In file included from include/linux/ptrace.h:10:
   In file included from include/linux/pid_namespace.h:7:
   In file included from include/linux/mm.h:2234:
   include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
   In file included from arch/s390/mm/init.c:25:
   In file included from include/linux/memblock.h:13:
   In file included from arch/s390/include/asm/dma.h:5:
   In file included from include/linux/io.h:14:
   In file included from arch/s390/include/asm/io.h:93:
   include/asm-generic/io.h:548:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     548 |         val = __raw_readb(PCI_IOBASE + addr);
         |                           ~~~~~~~~~~ ^
   include/asm-generic/io.h:561:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     561 |         val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:37:59: note: expanded from macro '__le16_to_cpu'
      37 | #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
         |                                                           ^
   include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
     102 | #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
         |                                                      ^
   In file included from arch/s390/mm/init.c:25:
   In file included from include/linux/memblock.h:13:
   In file included from arch/s390/include/asm/dma.h:5:
   In file included from include/linux/io.h:14:
   In file included from arch/s390/include/asm/io.h:93:
   include/asm-generic/io.h:574:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     574 |         val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:35:59: note: expanded from macro '__le32_to_cpu'
      35 | #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
         |                                                           ^
   include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
     115 | #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
         |                                                      ^
   In file included from arch/s390/mm/init.c:25:
   In file included from include/linux/memblock.h:13:
   In file included from arch/s390/include/asm/dma.h:5:
   In file included from include/linux/io.h:14:
   In file included from arch/s390/include/asm/io.h:93:
   include/asm-generic/io.h:585:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     585 |         __raw_writeb(value, PCI_IOBASE + addr);
         |                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:595:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     595 |         __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:605:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     605 |         __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:693:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     693 |         readsb(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:701:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     701 |         readsw(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:709:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     709 |         readsl(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:718:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     718 |         writesb(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:727:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     727 |         writesw(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:736:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     736 |         writesl(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   In file included from arch/s390/mm/init.c:42:
   In file included from arch/s390/include/asm/tlb.h:39:
>> include/asm-generic/tlb.h:327:6: warning: no previous prototype for function '__tlb_remove_swap_entries' [-Wmissing-prototypes]
     327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
         |      ^
   include/asm-generic/tlb.h:327:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
     327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
         | ^
         | static 
   14 warnings generated.
--
   In file included from arch/s390/mm/pgtable.c:11:
   In file included from include/linux/mm.h:2234:
   include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
   In file included from arch/s390/mm/pgtable.c:22:
   In file included from arch/s390/include/asm/tlb.h:39:
>> include/asm-generic/tlb.h:327:6: warning: no previous prototype for function '__tlb_remove_swap_entries' [-Wmissing-prototypes]
     327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
         |      ^
   include/asm-generic/tlb.h:327:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
     327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
         | ^
         | static 
   2 warnings generated.
--
   In file included from mm/memory.c:44:
   In file included from include/linux/mm.h:2234:
   include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
   In file included from mm/memory.c:45:
   include/linux/mm_inline.h:47:41: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
      47 |         __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
         |                                    ~~~~~~~~~~~ ^ ~~~
   include/linux/mm_inline.h:49:22: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
      49 |                                 NR_ZONE_LRU_BASE + lru, nr_pages);
         |                                 ~~~~~~~~~~~~~~~~ ^ ~~~
   In file included from mm/memory.c:84:
   In file included from arch/s390/include/asm/io.h:93:
   include/asm-generic/io.h:548:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     548 |         val = __raw_readb(PCI_IOBASE + addr);
         |                           ~~~~~~~~~~ ^
   include/asm-generic/io.h:561:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     561 |         val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:37:59: note: expanded from macro '__le16_to_cpu'
      37 | #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
         |                                                           ^
   include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
     102 | #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
         |                                                      ^
   In file included from mm/memory.c:84:
   In file included from arch/s390/include/asm/io.h:93:
   include/asm-generic/io.h:574:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     574 |         val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:35:59: note: expanded from macro '__le32_to_cpu'
      35 | #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
         |                                                           ^
   include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
     115 | #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
         |                                                      ^
   In file included from mm/memory.c:84:
   In file included from arch/s390/include/asm/io.h:93:
   include/asm-generic/io.h:585:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     585 |         __raw_writeb(value, PCI_IOBASE + addr);
         |                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:595:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     595 |         __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:605:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     605 |         __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:693:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     693 |         readsb(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:701:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     701 |         readsw(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:709:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     709 |         readsl(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:718:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     718 |         writesb(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:727:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     727 |         writesw(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:736:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     736 |         writesl(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   In file included from mm/memory.c:88:
   In file included from arch/s390/include/asm/tlb.h:39:
>> include/asm-generic/tlb.h:327:6: warning: no previous prototype for function '__tlb_remove_swap_entries' [-Wmissing-prototypes]
     327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
         |      ^
   include/asm-generic/tlb.h:327:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
     327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
         | ^
         | static 
   16 warnings generated.
--
   In file included from mm/mmap.c:14:
   In file included from include/linux/backing-dev.h:15:
   In file included from include/linux/device.h:32:
   In file included from include/linux/device/driver.h:21:
   In file included from include/linux/module.h:19:
   In file included from include/linux/elf.h:6:
   In file included from arch/s390/include/asm/elf.h:181:
   In file included from arch/s390/include/asm/mmu_context.h:11:
   In file included from arch/s390/include/asm/pgalloc.h:18:
   In file included from include/linux/mm.h:2234:
   include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
   In file included from mm/mmap.c:16:
   include/linux/mm_inline.h:47:41: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
      47 |         __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
         |                                    ~~~~~~~~~~~ ^ ~~~
   include/linux/mm_inline.h:49:22: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
      49 |                                 NR_ZONE_LRU_BASE + lru, nr_pages);
         |                                 ~~~~~~~~~~~~~~~~ ^ ~~~
   In file included from mm/mmap.c:53:
   In file included from arch/s390/include/asm/tlb.h:39:
>> include/asm-generic/tlb.h:327:6: warning: no previous prototype for function '__tlb_remove_swap_entries' [-Wmissing-prototypes]
     327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
         |      ^
   include/asm-generic/tlb.h:327:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
     327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
         | ^
         | static 
   4 warnings generated.
--
   In file included from mm/mremap.c:11:
   In file included from include/linux/mm.h:2234:
   include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
   In file included from mm/mremap.c:12:
   include/linux/mm_inline.h:47:41: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
      47 |         __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
         |                                    ~~~~~~~~~~~ ^ ~~~
   include/linux/mm_inline.h:49:22: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
      49 |                                 NR_ZONE_LRU_BASE + lru, nr_pages);
         |                                 ~~~~~~~~~~~~~~~~ ^ ~~~
   In file included from mm/mremap.c:30:
   In file included from arch/s390/include/asm/tlb.h:39:
>> include/asm-generic/tlb.h:327:6: warning: no previous prototype for function '__tlb_remove_swap_entries' [-Wmissing-prototypes]
     327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
         |      ^
   include/asm-generic/tlb.h:327:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
     327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
         | ^
         | static 
   mm/mremap.c:228:20: warning: unused function 'arch_supports_page_table_move' [-Wunused-function]
     228 | static inline bool arch_supports_page_table_move(void)
         |                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   mm/mremap.c:227:39: note: expanded from macro 'arch_supports_page_table_move'
     227 | #define arch_supports_page_table_move arch_supports_page_table_move
         |                                       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   5 warnings generated.
--
   In file included from kernel/sched/core.c:10:
   In file included from include/linux/highmem.h:10:
   In file included from include/linux/mm.h:2234:
   include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
   In file included from kernel/sched/core.c:34:
   In file included from include/linux/sched/isolation.h:7:
   In file included from include/linux/tick.h:8:
   In file included from include/linux/clockchips.h:14:
   In file included from include/linux/clocksource.h:22:
   In file included from arch/s390/include/asm/io.h:93:
   include/asm-generic/io.h:548:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     548 |         val = __raw_readb(PCI_IOBASE + addr);
         |                           ~~~~~~~~~~ ^
   include/asm-generic/io.h:561:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     561 |         val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:37:59: note: expanded from macro '__le16_to_cpu'
      37 | #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
         |                                                           ^
   include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
     102 | #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
         |                                                      ^
   In file included from kernel/sched/core.c:34:
   In file included from include/linux/sched/isolation.h:7:
   In file included from include/linux/tick.h:8:
   In file included from include/linux/clockchips.h:14:
   In file included from include/linux/clocksource.h:22:
   In file included from arch/s390/include/asm/io.h:93:
   include/asm-generic/io.h:574:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     574 |         val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
         |                                                         ~~~~~~~~~~ ^
   include/uapi/linux/byteorder/big_endian.h:35:59: note: expanded from macro '__le32_to_cpu'
      35 | #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
         |                                                           ^
   include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
     115 | #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
         |                                                      ^
   In file included from kernel/sched/core.c:34:
   In file included from include/linux/sched/isolation.h:7:
   In file included from include/linux/tick.h:8:
   In file included from include/linux/clockchips.h:14:
   In file included from include/linux/clocksource.h:22:
   In file included from arch/s390/include/asm/io.h:93:
   include/asm-generic/io.h:585:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     585 |         __raw_writeb(value, PCI_IOBASE + addr);
         |                             ~~~~~~~~~~ ^
   include/asm-generic/io.h:595:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     595 |         __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:605:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     605 |         __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
         |                                                       ~~~~~~~~~~ ^
   include/asm-generic/io.h:693:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     693 |         readsb(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:701:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     701 |         readsw(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:709:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     709 |         readsl(PCI_IOBASE + addr, buffer, count);
         |                ~~~~~~~~~~ ^
   include/asm-generic/io.h:718:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     718 |         writesb(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:727:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     727 |         writesw(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   include/asm-generic/io.h:736:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
     736 |         writesl(PCI_IOBASE + addr, buffer, count);
         |                 ~~~~~~~~~~ ^
   In file included from kernel/sched/core.c:80:
   In file included from arch/s390/include/asm/tlb.h:39:
>> include/asm-generic/tlb.h:327:6: warning: no previous prototype for function '__tlb_remove_swap_entries' [-Wmissing-prototypes]
     327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
         |      ^
   include/asm-generic/tlb.h:327:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
     327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
         | ^
         | static 
   kernel/sched/core.c:6330:20: warning: unused function 'sched_core_cpu_deactivate' [-Wunused-function]
    6330 | static inline void sched_core_cpu_deactivate(unsigned int cpu) {}
         |                    ^~~~~~~~~~~~~~~~~~~~~~~~~
   15 warnings generated.
..
kernel test robot July 31, 2024, 11:49 a.m. UTC | #2
Hi Zhiguo,

kernel test robot noticed the following build warnings:

[auto build test WARNING on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/Zhiguo-Jiang/mm-move-task_is_dying-to-h-headfile/20240730-215136
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20240730114426.511-3-justinjiang%40vivo.com
patch subject: [PATCH 2/2] mm: tlb: add tlb swap entries batch async release
config: s390-allyesconfig (https://download.01.org/0day-ci/archive/20240731/202407311947.VPJNRqad-lkp@intel.com/config)
compiler: s390-linux-gcc (GCC) 14.1.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240731/202407311947.VPJNRqad-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202407311947.VPJNRqad-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from arch/s390/include/asm/tlb.h:39,
                    from mm/oom_kill.c:49:
>> include/asm-generic/tlb.h:327:6: warning: no previous prototype for '__tlb_remove_swap_entries' [-Wmissing-prototypes]
     327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
         |      ^~~~~~~~~~~~~~~~~~~~~~~~~


vim +/__tlb_remove_swap_entries +327 include/asm-generic/tlb.h

   323	
   324	extern bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
   325			swp_entry_t entry, int nr);
   326	#else
 > 327	bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
   328			swp_entry_t entry, int nr)
   329	{
   330		return false;
   331	}
   332	#endif
   333
Arnd Bergmann July 31, 2024, 12:08 p.m. UTC | #3
On Tue, Jul 30, 2024, at 13:44, Zhiguo Jiang wrote:

> +
> +
> +extern bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
> +		swp_entry_t entry, int nr);
> +#else
> +bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
> +		swp_entry_t entry, int nr)
> +{
> +	return false;
> +}
> +#endif

To address the reported build regression, that function must
be annotated as 'static inline'.

     Arnd
zhiguojiang July 31, 2024, 1:42 p.m. UTC | #4
在 2024/7/31 17:55, kernel test robot 写道:
> [Some people who received this message don't often get email from lkp@intel.com. Learn why this is important at https://aka.ms/LearnAboutSenderIdentification ]
>
> Hi Zhiguo,
>
> kernel test robot noticed the following build errors:
>
> [auto build test ERROR on akpm-mm/mm-everything]
>
> url:    https://github.com/intel-lab-lkp/linux/commits/Zhiguo-Jiang/mm-move-task_is_dying-to-h-headfile/20240730-215136
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> patch link:    https://lore.kernel.org/r/20240730114426.511-3-justinjiang%40vivo.com
> patch subject: [PATCH 2/2] mm: tlb: add tlb swap entries batch async release
> config: s390-allnoconfig (https://download.01.org/0day-ci/archive/20240731/202407311703.8q8sDQ2p-lkp@intel.com/config)
> compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project ccae7b461be339e717d02f99ac857cf0bc7d17fc)
> reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240731/202407311703.8q8sDQ2p-lkp@intel.com/reproduce)
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://lore.kernel.org/oe-kbuild-all/202407311703.8q8sDQ2p-lkp@intel.com/
Hi,

I have fixed the arch s390 config compilation warning in v2. Please 
kindly help to test again.
https://lore.kernel.org/linux-mm/20240731133318.527-1-justinjiang@vivo.com/T/#t

Thanks
> All error/warnings (new ones prefixed by >>):
>
>     In file included from arch/s390/mm/init.c:17:
>     In file included from include/linux/ptrace.h:10:
>     In file included from include/linux/pid_namespace.h:7:
>     In file included from include/linux/mm.h:2234:
>     include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
>       514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
>           |                               ~~~~~~~~~~~ ^ ~~~
>     In file included from arch/s390/mm/init.c:25:
>     In file included from include/linux/memblock.h:13:
>     In file included from arch/s390/include/asm/dma.h:5:
>     In file included from include/linux/io.h:14:
>     In file included from arch/s390/include/asm/io.h:93:
>     include/asm-generic/io.h:548:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       548 |         val = __raw_readb(PCI_IOBASE + addr);
>           |                           ~~~~~~~~~~ ^
>     include/asm-generic/io.h:561:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       561 |         val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
>           |                                                         ~~~~~~~~~~ ^
>     include/uapi/linux/byteorder/big_endian.h:37:59: note: expanded from macro '__le16_to_cpu'
>        37 | #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
>           |                                                           ^
>     include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
>       102 | #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
>           |                                                      ^
>     In file included from arch/s390/mm/init.c:25:
>     In file included from include/linux/memblock.h:13:
>     In file included from arch/s390/include/asm/dma.h:5:
>     In file included from include/linux/io.h:14:
>     In file included from arch/s390/include/asm/io.h:93:
>     include/asm-generic/io.h:574:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       574 |         val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
>           |                                                         ~~~~~~~~~~ ^
>     include/uapi/linux/byteorder/big_endian.h:35:59: note: expanded from macro '__le32_to_cpu'
>        35 | #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
>           |                                                           ^
>     include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
>       115 | #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
>           |                                                      ^
>     In file included from arch/s390/mm/init.c:25:
>     In file included from include/linux/memblock.h:13:
>     In file included from arch/s390/include/asm/dma.h:5:
>     In file included from include/linux/io.h:14:
>     In file included from arch/s390/include/asm/io.h:93:
>     include/asm-generic/io.h:585:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       585 |         __raw_writeb(value, PCI_IOBASE + addr);
>           |                             ~~~~~~~~~~ ^
>     include/asm-generic/io.h:595:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       595 |         __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
>           |                                                       ~~~~~~~~~~ ^
>     include/asm-generic/io.h:605:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       605 |         __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
>           |                                                       ~~~~~~~~~~ ^
>     include/asm-generic/io.h:693:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       693 |         readsb(PCI_IOBASE + addr, buffer, count);
>           |                ~~~~~~~~~~ ^
>     include/asm-generic/io.h:701:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       701 |         readsw(PCI_IOBASE + addr, buffer, count);
>           |                ~~~~~~~~~~ ^
>     include/asm-generic/io.h:709:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       709 |         readsl(PCI_IOBASE + addr, buffer, count);
>           |                ~~~~~~~~~~ ^
>     include/asm-generic/io.h:718:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       718 |         writesb(PCI_IOBASE + addr, buffer, count);
>           |                 ~~~~~~~~~~ ^
>     include/asm-generic/io.h:727:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       727 |         writesw(PCI_IOBASE + addr, buffer, count);
>           |                 ~~~~~~~~~~ ^
>     include/asm-generic/io.h:736:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       736 |         writesl(PCI_IOBASE + addr, buffer, count);
>           |                 ~~~~~~~~~~ ^
>     In file included from arch/s390/mm/init.c:42:
>     In file included from arch/s390/include/asm/tlb.h:39:
>>> include/asm-generic/tlb.h:327:6: warning: no previous prototype for function '__tlb_remove_swap_entries' [-Wmissing-prototypes]
>       327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
>           |      ^
>     include/asm-generic/tlb.h:327:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
>       327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
>           | ^
>           | static
>     14 warnings generated.
> --
>     In file included from arch/s390/mm/pgtable.c:11:
>     In file included from include/linux/mm.h:2234:
>     include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
>       514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
>           |                               ~~~~~~~~~~~ ^ ~~~
>     In file included from arch/s390/mm/pgtable.c:22:
>     In file included from arch/s390/include/asm/tlb.h:39:
>>> include/asm-generic/tlb.h:327:6: warning: no previous prototype for function '__tlb_remove_swap_entries' [-Wmissing-prototypes]
>       327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
>           |      ^
>     include/asm-generic/tlb.h:327:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
>       327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
>           | ^
>           | static
>     2 warnings generated.
> --
>     In file included from mm/memory.c:44:
>     In file included from include/linux/mm.h:2234:
>     include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
>       514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
>           |                               ~~~~~~~~~~~ ^ ~~~
>     In file included from mm/memory.c:45:
>     include/linux/mm_inline.h:47:41: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
>        47 |         __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
>           |                                    ~~~~~~~~~~~ ^ ~~~
>     include/linux/mm_inline.h:49:22: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
>        49 |                                 NR_ZONE_LRU_BASE + lru, nr_pages);
>           |                                 ~~~~~~~~~~~~~~~~ ^ ~~~
>     In file included from mm/memory.c:84:
>     In file included from arch/s390/include/asm/io.h:93:
>     include/asm-generic/io.h:548:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       548 |         val = __raw_readb(PCI_IOBASE + addr);
>           |                           ~~~~~~~~~~ ^
>     include/asm-generic/io.h:561:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       561 |         val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
>           |                                                         ~~~~~~~~~~ ^
>     include/uapi/linux/byteorder/big_endian.h:37:59: note: expanded from macro '__le16_to_cpu'
>        37 | #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
>           |                                                           ^
>     include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
>       102 | #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
>           |                                                      ^
>     In file included from mm/memory.c:84:
>     In file included from arch/s390/include/asm/io.h:93:
>     include/asm-generic/io.h:574:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       574 |         val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
>           |                                                         ~~~~~~~~~~ ^
>     include/uapi/linux/byteorder/big_endian.h:35:59: note: expanded from macro '__le32_to_cpu'
>        35 | #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
>           |                                                           ^
>     include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
>       115 | #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
>           |                                                      ^
>     In file included from mm/memory.c:84:
>     In file included from arch/s390/include/asm/io.h:93:
>     include/asm-generic/io.h:585:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       585 |         __raw_writeb(value, PCI_IOBASE + addr);
>           |                             ~~~~~~~~~~ ^
>     include/asm-generic/io.h:595:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       595 |         __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
>           |                                                       ~~~~~~~~~~ ^
>     include/asm-generic/io.h:605:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       605 |         __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
>           |                                                       ~~~~~~~~~~ ^
>     include/asm-generic/io.h:693:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       693 |         readsb(PCI_IOBASE + addr, buffer, count);
>           |                ~~~~~~~~~~ ^
>     include/asm-generic/io.h:701:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       701 |         readsw(PCI_IOBASE + addr, buffer, count);
>           |                ~~~~~~~~~~ ^
>     include/asm-generic/io.h:709:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       709 |         readsl(PCI_IOBASE + addr, buffer, count);
>           |                ~~~~~~~~~~ ^
>     include/asm-generic/io.h:718:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       718 |         writesb(PCI_IOBASE + addr, buffer, count);
>           |                 ~~~~~~~~~~ ^
>     include/asm-generic/io.h:727:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       727 |         writesw(PCI_IOBASE + addr, buffer, count);
>           |                 ~~~~~~~~~~ ^
>     include/asm-generic/io.h:736:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       736 |         writesl(PCI_IOBASE + addr, buffer, count);
>           |                 ~~~~~~~~~~ ^
>     In file included from mm/memory.c:88:
>     In file included from arch/s390/include/asm/tlb.h:39:
>>> include/asm-generic/tlb.h:327:6: warning: no previous prototype for function '__tlb_remove_swap_entries' [-Wmissing-prototypes]
>       327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
>           |      ^
>     include/asm-generic/tlb.h:327:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
>       327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
>           | ^
>           | static
>     16 warnings generated.
> --
>     In file included from mm/mmap.c:14:
>     In file included from include/linux/backing-dev.h:15:
>     In file included from include/linux/device.h:32:
>     In file included from include/linux/device/driver.h:21:
>     In file included from include/linux/module.h:19:
>     In file included from include/linux/elf.h:6:
>     In file included from arch/s390/include/asm/elf.h:181:
>     In file included from arch/s390/include/asm/mmu_context.h:11:
>     In file included from arch/s390/include/asm/pgalloc.h:18:
>     In file included from include/linux/mm.h:2234:
>     include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
>       514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
>           |                               ~~~~~~~~~~~ ^ ~~~
>     In file included from mm/mmap.c:16:
>     include/linux/mm_inline.h:47:41: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
>        47 |         __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
>           |                                    ~~~~~~~~~~~ ^ ~~~
>     include/linux/mm_inline.h:49:22: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
>        49 |                                 NR_ZONE_LRU_BASE + lru, nr_pages);
>           |                                 ~~~~~~~~~~~~~~~~ ^ ~~~
>     In file included from mm/mmap.c:53:
>     In file included from arch/s390/include/asm/tlb.h:39:
>>> include/asm-generic/tlb.h:327:6: warning: no previous prototype for function '__tlb_remove_swap_entries' [-Wmissing-prototypes]
>       327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
>           |      ^
>     include/asm-generic/tlb.h:327:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
>       327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
>           | ^
>           | static
>     4 warnings generated.
> --
>     In file included from mm/mremap.c:11:
>     In file included from include/linux/mm.h:2234:
>     include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
>       514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
>           |                               ~~~~~~~~~~~ ^ ~~~
>     In file included from mm/mremap.c:12:
>     include/linux/mm_inline.h:47:41: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
>        47 |         __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
>           |                                    ~~~~~~~~~~~ ^ ~~~
>     include/linux/mm_inline.h:49:22: warning: arithmetic between different enumeration types ('enum zone_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
>        49 |                                 NR_ZONE_LRU_BASE + lru, nr_pages);
>           |                                 ~~~~~~~~~~~~~~~~ ^ ~~~
>     In file included from mm/mremap.c:30:
>     In file included from arch/s390/include/asm/tlb.h:39:
>>> include/asm-generic/tlb.h:327:6: warning: no previous prototype for function '__tlb_remove_swap_entries' [-Wmissing-prototypes]
>       327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
>           |      ^
>     include/asm-generic/tlb.h:327:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
>       327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
>           | ^
>           | static
>     mm/mremap.c:228:20: warning: unused function 'arch_supports_page_table_move' [-Wunused-function]
>       228 | static inline bool arch_supports_page_table_move(void)
>           |                    ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>     mm/mremap.c:227:39: note: expanded from macro 'arch_supports_page_table_move'
>       227 | #define arch_supports_page_table_move arch_supports_page_table_move
>           |                                       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>     5 warnings generated.
> --
>     In file included from kernel/sched/core.c:10:
>     In file included from include/linux/highmem.h:10:
>     In file included from include/linux/mm.h:2234:
>     include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
>       514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
>           |                               ~~~~~~~~~~~ ^ ~~~
>     In file included from kernel/sched/core.c:34:
>     In file included from include/linux/sched/isolation.h:7:
>     In file included from include/linux/tick.h:8:
>     In file included from include/linux/clockchips.h:14:
>     In file included from include/linux/clocksource.h:22:
>     In file included from arch/s390/include/asm/io.h:93:
>     include/asm-generic/io.h:548:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       548 |         val = __raw_readb(PCI_IOBASE + addr);
>           |                           ~~~~~~~~~~ ^
>     include/asm-generic/io.h:561:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       561 |         val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
>           |                                                         ~~~~~~~~~~ ^
>     include/uapi/linux/byteorder/big_endian.h:37:59: note: expanded from macro '__le16_to_cpu'
>        37 | #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
>           |                                                           ^
>     include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
>       102 | #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
>           |                                                      ^
>     In file included from kernel/sched/core.c:34:
>     In file included from include/linux/sched/isolation.h:7:
>     In file included from include/linux/tick.h:8:
>     In file included from include/linux/clockchips.h:14:
>     In file included from include/linux/clocksource.h:22:
>     In file included from arch/s390/include/asm/io.h:93:
>     include/asm-generic/io.h:574:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       574 |         val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
>           |                                                         ~~~~~~~~~~ ^
>     include/uapi/linux/byteorder/big_endian.h:35:59: note: expanded from macro '__le32_to_cpu'
>        35 | #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
>           |                                                           ^
>     include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
>       115 | #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
>           |                                                      ^
>     In file included from kernel/sched/core.c:34:
>     In file included from include/linux/sched/isolation.h:7:
>     In file included from include/linux/tick.h:8:
>     In file included from include/linux/clockchips.h:14:
>     In file included from include/linux/clocksource.h:22:
>     In file included from arch/s390/include/asm/io.h:93:
>     include/asm-generic/io.h:585:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       585 |         __raw_writeb(value, PCI_IOBASE + addr);
>           |                             ~~~~~~~~~~ ^
>     include/asm-generic/io.h:595:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       595 |         __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
>           |                                                       ~~~~~~~~~~ ^
>     include/asm-generic/io.h:605:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       605 |         __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
>           |                                                       ~~~~~~~~~~ ^
>     include/asm-generic/io.h:693:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       693 |         readsb(PCI_IOBASE + addr, buffer, count);
>           |                ~~~~~~~~~~ ^
>     include/asm-generic/io.h:701:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       701 |         readsw(PCI_IOBASE + addr, buffer, count);
>           |                ~~~~~~~~~~ ^
>     include/asm-generic/io.h:709:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       709 |         readsl(PCI_IOBASE + addr, buffer, count);
>           |                ~~~~~~~~~~ ^
>     include/asm-generic/io.h:718:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       718 |         writesb(PCI_IOBASE + addr, buffer, count);
>           |                 ~~~~~~~~~~ ^
>     include/asm-generic/io.h:727:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       727 |         writesw(PCI_IOBASE + addr, buffer, count);
>           |                 ~~~~~~~~~~ ^
>     include/asm-generic/io.h:736:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
>       736 |         writesl(PCI_IOBASE + addr, buffer, count);
>           |                 ~~~~~~~~~~ ^
>     In file included from kernel/sched/core.c:80:
>     In file included from arch/s390/include/asm/tlb.h:39:
>>> include/asm-generic/tlb.h:327:6: warning: no previous prototype for function '__tlb_remove_swap_entries' [-Wmissing-prototypes]
>       327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
>           |      ^
>     include/asm-generic/tlb.h:327:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
>       327 | bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
>           | ^
>           | static
>     kernel/sched/core.c:6330:20: warning: unused function 'sched_core_cpu_deactivate' [-Wunused-function]
>      6330 | static inline void sched_core_cpu_deactivate(unsigned int cpu) {}
>           |                    ^~~~~~~~~~~~~~~~~~~~~~~~~
>     15 warnings generated.
> ..
>
> --
> 0-DAY CI Kernel Test Service
> https://github.com/intel/lkp-tests/wiki
zhiguojiang July 31, 2024, 1:47 p.m. UTC | #5
在 2024/7/31 20:08, Arnd Bergmann 写道:
> On Tue, Jul 30, 2024, at 13:44, Zhiguo Jiang wrote:
>
>> +
>> +
>> +extern bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
>> +		swp_entry_t entry, int nr);
>> +#else
>> +bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
>> +		swp_entry_t entry, int nr)
>> +{
>> +	return false;
>> +}
>> +#endif
> To address the reported build regression, that function must
> be annotated as 'static inline'.
>
>       Arnd
Hi Arnd,

I have added 'static inline' for __tlb_remove_swap_entries() in [PATCH 
v2 3/3].
Thanks for your nice guidance.
https://lore.kernel.org/linux-mm/20240731133318.527-1-justinjiang@vivo.com/T/#t

Thanks
Zhiguo
kernel test robot July 31, 2024, 6:19 p.m. UTC | #6
Hi Zhiguo,

kernel test robot noticed the following build warnings:

[auto build test WARNING on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/Zhiguo-Jiang/mm-move-task_is_dying-to-h-headfile/20240730-215136
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20240730114426.511-3-justinjiang%40vivo.com
patch subject: [PATCH 2/2] mm: tlb: add tlb swap entries batch async release
config: i386-randconfig-061-20240731 (https://download.01.org/0day-ci/archive/20240801/202408010150.13yZScv6-lkp@intel.com/config)
compiler: clang version 18.1.5 (https://github.com/llvm/llvm-project 617a15a9eac96088ae5e9134248d8236e34b91b1)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240801/202408010150.13yZScv6-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202408010150.13yZScv6-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
>> mm/mmu_gather.c:54:10: sparse: sparse: symbol 'nr_exiting_processes' was not declared. Should it be static?
   mm/mmu_gather.c: note: in included file (through include/linux/mmzone.h, include/linux/gfp.h):
   include/linux/page-flags.h:235:46: sparse: sparse: self-comparison always evaluates to false
   include/linux/page-flags.h:235:46: sparse: sparse: self-comparison always evaluates to false

vim +/nr_exiting_processes +54 mm/mmu_gather.c

    53	
  > 54	atomic_t nr_exiting_processes = ATOMIC_INIT(0);
    55	static struct kmem_cache *swap_gather_cachep;
    56	static struct workqueue_struct *swapfree_wq;
    57	static DEFINE_STATIC_KEY_TRUE(tlb_swap_asyncfree_disabled);
    58
zhiguojiang Aug. 1, 2024, 6:54 a.m. UTC | #7
在 2024/8/1 2:19, kernel test robot 写道:
> Hi Zhiguo,
>
> kernel test robot noticed the following build warnings:
>
> [auto build test WARNING on akpm-mm/mm-everything]
>
> url:    https://github.com/intel-lab-lkp/linux/commits/Zhiguo-Jiang/mm-move-task_is_dying-to-h-headfile/20240730-215136
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> patch link:    https://lore.kernel.org/r/20240730114426.511-3-justinjiang%40vivo.com
> patch subject: [PATCH 2/2] mm: tlb: add tlb swap entries batch async release
> config: i386-randconfig-061-20240731 (https://download.01.org/0day-ci/archive/20240801/202408010150.13yZScv6-lkp@intel.com/config)
> compiler: clang version 18.1.5 (https://github.com/llvm/llvm-project 617a15a9eac96088ae5e9134248d8236e34b91b1)
> reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240801/202408010150.13yZScv6-lkp@intel.com/reproduce)
>
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://lore.kernel.org/oe-kbuild-all/202408010150.13yZScv6-lkp@intel.com/
>
> sparse warnings: (new ones prefixed by >>)
Hi,

I have fix this compilation warning issue in the bellow patch. Please
kindly help to test with this patch.
https://lore.kernel.org/linux-mm/20240801065035.621-1-justinjiang@vivo.com/T/#u

Thanks
Zhiguo
>>> mm/mmu_gather.c:54:10: sparse: sparse: symbol 'nr_exiting_processes' was not declared. Should it be static?
>     mm/mmu_gather.c: note: in included file (through include/linux/mmzone.h, include/linux/gfp.h):
>     include/linux/page-flags.h:235:46: sparse: sparse: self-comparison always evaluates to false
>     include/linux/page-flags.h:235:46: sparse: sparse: self-comparison always evaluates to false
>
> vim +/nr_exiting_processes +54 mm/mmu_gather.c
>
>      53	
>    > 54	atomic_t nr_exiting_processes = ATOMIC_INIT(0);
>      55	static struct kmem_cache *swap_gather_cachep;
>      56	static struct workqueue_struct *swapfree_wq;
>      57	static DEFINE_STATIC_KEY_TRUE(tlb_swap_asyncfree_disabled);
>      58	
>
diff mbox series

Patch

diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 709830274b75..aab43f1cf97f
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -294,6 +294,43 @@  extern void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma);
 static inline void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
 #endif
 
+#ifndef CONFIG_MMU_GATHER_NO_GATHER
+struct mmu_swap_batch {
+	struct mmu_swap_batch *next;
+	unsigned int nr;
+	unsigned int max;
+	encoded_swpentry_t encoded_entrys[];
+};
+
+#define MAX_SWAP_GATHER_BATCH	\
+	((PAGE_SIZE - sizeof(struct mmu_swap_batch)) / sizeof(void *))
+
+#define MAX_SWAP_GATHER_BATCH_COUNT	(10000UL / MAX_SWAP_GATHER_BATCH)
+
+struct mmu_swap_gather {
+	/*
+	 * the asynchronous kworker to batch
+	 * release swap entries
+	 */
+	struct work_struct free_work;
+
+	/* batch cache swap entries */
+	unsigned int batch_count;
+	struct mmu_swap_batch *active;
+	struct mmu_swap_batch local;
+	encoded_swpentry_t __encoded_entrys[MMU_GATHER_BUNDLE];
+};
+
+extern bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
+		swp_entry_t entry, int nr);
+#else
+bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
+		swp_entry_t entry, int nr)
+{
+	return false;
+}
+#endif
+
 /*
  * struct mmu_gather is an opaque type used by the mm code for passing around
  * any data needed by arch specific code for tlb_remove_page.
@@ -343,6 +380,18 @@  struct mmu_gather {
 	unsigned int		vma_exec : 1;
 	unsigned int		vma_huge : 1;
 	unsigned int		vma_pfn  : 1;
+#ifndef CONFIG_MMU_GATHER_NO_GATHER
+	/*
+	 * Two states of releasing swap entries
+	 * asynchronously:
+	 * swp_freeable - have opportunity to
+	 * release asynchronously future
+	 * swp_freeing - be releasing asynchronously.
+	 */
+	unsigned int		swp_freeable : 1;
+	unsigned int		swp_freeing : 1;
+	unsigned int		swp_disable : 1;
+#endif
 
 	unsigned int		batch_count;
 
@@ -354,6 +403,7 @@  struct mmu_gather {
 #ifdef CONFIG_MMU_GATHER_PAGE_SIZE
 	unsigned int page_size;
 #endif
+	struct mmu_swap_gather *swp;
 #endif
 };
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 485424979254..f26fbff93ff4
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -283,6 +283,64 @@  typedef struct {
 	unsigned long val;
 } swp_entry_t;
 
+/*
+ * encoded_swpentry_t - a type marking the encoded swp_entry_t.
+ *
+ * An 'encoded_swpentry_t' represents a 'swp_enrty_t' with its the highest
+ * bit indicating extra context-dependent information. Only used in swp_entry
+ * asynchronous release path by mmu_swap_gather.
+ */
+typedef struct {
+	unsigned long val;
+} encoded_swpentry_t;
+
+/*
+ * The next item in an encoded_swpentry_t array is the "nr" argument, specifying the
+ * total number of consecutive swap entries associated with the same folio. If this
+ * bit is not set, "nr" is implicitly 1.
+ *
+ * Refer to include\asm\pgtable.h, swp_offset bits: 0 ~ 57, swp_type bits: 58 ~ 62.
+ * Bit63 can be used here.
+ */
+#define ENCODED_SWPENTRY_BIT_NR_ENTRYS_NEXT (1UL << (BITS_PER_LONG - 1))
+
+static __always_inline encoded_swpentry_t
+encode_swpentry(swp_entry_t entry, unsigned long flags)
+{
+	encoded_swpentry_t ret;
+
+	VM_WARN_ON_ONCE(flags & ~ENCODED_SWPENTRY_BIT_NR_ENTRYS_NEXT);
+	ret.val = flags | entry.val;
+	return ret;
+}
+
+static inline unsigned long encoded_swpentry_flags(encoded_swpentry_t entry)
+{
+	return ENCODED_SWPENTRY_BIT_NR_ENTRYS_NEXT & entry.val;
+}
+
+static inline swp_entry_t encoded_swpentry_data(encoded_swpentry_t entry)
+{
+	swp_entry_t ret;
+
+	ret.val = ~ENCODED_SWPENTRY_BIT_NR_ENTRYS_NEXT & entry.val;
+	return ret;
+}
+
+static __always_inline encoded_swpentry_t encode_nr_swpentrys(unsigned long nr)
+{
+	encoded_swpentry_t ret;
+
+	VM_WARN_ON_ONCE(nr & ENCODED_SWPENTRY_BIT_NR_ENTRYS_NEXT);
+	ret.val = nr;
+	return ret;
+}
+
+static __always_inline unsigned long encoded_nr_swpentrys(encoded_swpentry_t entry)
+{
+	return ((~ENCODED_SWPENTRY_BIT_NR_ENTRYS_NEXT) & entry.val);
+}
+
 /**
  * struct folio - Represents a contiguous set of bytes.
  * @flags: Identical to the page flags.
diff --git a/mm/memory.c b/mm/memory.c
index b9f5cc0db3eb..bfa1995558d2
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1650,7 +1650,8 @@  static unsigned long zap_pte_range(struct mmu_gather *tlb,
 			if (!should_zap_cows(details))
 				continue;
 			rss[MM_SWAPENTS] -= nr;
-			free_swap_and_cache_nr(entry, nr);
+			if (!__tlb_remove_swap_entries(tlb, entry, nr))
+				free_swap_and_cache_nr(entry, nr);
 		} else if (is_migration_entry(entry)) {
 			folio = pfn_swap_entry_folio(entry);
 			if (!should_zap_folio(details, folio))
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 99b3e9408aa0..2bb413d052bd
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -9,11 +9,304 @@ 
 #include <linux/smp.h>
 #include <linux/swap.h>
 #include <linux/rmap.h>
+#include <linux/oom.h>
 
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
 #ifndef CONFIG_MMU_GATHER_NO_GATHER
+/*
+ * The swp_entry asynchronous release mechanism for multiple processes exiting
+ * simultaneously.
+ *
+ * During the multiple exiting processes releasing their own mm simultaneously,
+ * the swap entries in the exiting processes are handled by isolating, caching
+ * and handing over to an asynchronous kworker to complete the release.
+ *
+ * The conditions for the exiting process entering the swp_entry asynchronous
+ * release path:
+ * 1. The exiting process's MM_SWAPENTS count is >= SWAP_CLUSTER_MAX, avoiding
+ *    to alloc struct mmu_swap_gather frequently.
+ * 2. The number of exiting processes is >= NR_MIN_EXITING_PROCESSES.
+ *
+ * Since the time for determining the number of exiting processes is dynamic,
+ * the exiting process may start to enter the swp_entry asynchronous release
+ * at the beginning or middle stage of the exiting process's swp_entry release
+ * path.
+ *
+ * Once an exiting process enters the swp_entry asynchronous release, all remaining
+ * swap entries in this exiting process need to be fully released by asynchronous
+ * kworker theoretically.
+ *
+ * The function of the swp_entry asynchronous release:
+ * 1. Alleviate the high system cpu load caused by multiple exiting processes
+ *    running simultaneously.
+ * 2. Reduce lock competition in swap entry free path by an asynchronous kworker
+ *    instead of multiple exiting processes parallel execution.
+ * 3. Release memory occupied by exiting processes more efficiently.
+ */
+
+/*
+ * The min number of exiting processes required for swp_entry asynchronous release
+ */
+#define NR_MIN_EXITING_PROCESSES 2
+
+atomic_t nr_exiting_processes = ATOMIC_INIT(0);
+static struct kmem_cache *swap_gather_cachep;
+static struct workqueue_struct *swapfree_wq;
+static DEFINE_STATIC_KEY_TRUE(tlb_swap_asyncfree_disabled);
+
+static int __init tlb_swap_async_free_setup(void)
+{
+	swapfree_wq = alloc_workqueue("smfree_wq", WQ_UNBOUND |
+		WQ_HIGHPRI | WQ_MEM_RECLAIM, 1);
+	if (!swapfree_wq)
+		goto fail;
+
+	swap_gather_cachep = kmem_cache_create("swap_gather",
+		sizeof(struct mmu_swap_gather),
+		0, SLAB_TYPESAFE_BY_RCU | SLAB_PANIC | SLAB_ACCOUNT,
+		NULL);
+	if (!swap_gather_cachep)
+		goto kcache_fail;
+
+	static_branch_disable(&tlb_swap_asyncfree_disabled);
+	return 0;
+
+kcache_fail:
+	destroy_workqueue(swapfree_wq);
+fail:
+	return -ENOMEM;
+}
+postcore_initcall(tlb_swap_async_free_setup);
+
+static void __tlb_swap_gather_free(struct mmu_swap_gather *swap_gather)
+{
+	struct mmu_swap_batch *swap_batch, *next;
+
+	for (swap_batch = swap_gather->local.next; swap_batch; swap_batch = next) {
+		next = swap_batch->next;
+		free_page((unsigned long)swap_batch);
+	}
+	swap_gather->local.next = NULL;
+	kmem_cache_free(swap_gather_cachep, swap_gather);
+}
+
+static void tlb_swap_async_free_work(struct work_struct *w)
+{
+	int i, nr_multi, nr_free;
+	swp_entry_t start_entry;
+	struct mmu_swap_batch *swap_batch;
+	struct mmu_swap_gather *swap_gather = container_of(w,
+		struct mmu_swap_gather, free_work);
+
+	/* Release swap entries cached in mmu_swap_batch. */
+	for (swap_batch = &swap_gather->local; swap_batch && swap_batch->nr;
+	    swap_batch = swap_batch->next) {
+		nr_free = 0;
+		for (i = 0; i < swap_batch->nr; i++) {
+			if (unlikely(encoded_swpentry_flags(swap_batch->encoded_entrys[i]) &
+			    ENCODED_SWPENTRY_BIT_NR_ENTRYS_NEXT)) {
+				start_entry = encoded_swpentry_data(swap_batch->encoded_entrys[i]);
+				nr_multi = encoded_nr_swpentrys(swap_batch->encoded_entrys[++i]);
+				free_swap_and_cache_nr(start_entry, nr_multi);
+				nr_free += 2;
+			} else {
+				start_entry = encoded_swpentry_data(swap_batch->encoded_entrys[i]);
+				free_swap_and_cache_nr(start_entry, 1);
+				nr_free++;
+			}
+		}
+		swap_batch->nr -= nr_free;
+		WARN_ON_ONCE(swap_batch->nr);
+	}
+	__tlb_swap_gather_free(swap_gather);
+}
+
+static bool __tlb_swap_gather_mmu_check(struct mmu_gather *tlb)
+{
+	/*
+	 * Only the exiting processes with the MM_SWAPENTS counter >=
+	 * SWAP_CLUSTER_MAX have the opportunity to release their swap
+	 * entries by asynchronous kworker.
+	 */
+	if (!task_is_dying() ||
+	    get_mm_counter(tlb->mm, MM_SWAPENTS) < SWAP_CLUSTER_MAX)
+		return true;
+
+	atomic_inc(&nr_exiting_processes);
+	if (atomic_read(&nr_exiting_processes) < NR_MIN_EXITING_PROCESSES)
+		tlb->swp_freeable = 1;
+	else
+		tlb->swp_freeing = 1;
+
+	return false;
+}
+
+/**
+ * __tlb_swap_gather_init - Initialize an mmu_swap_gather structure
+ * for swp_entry tear-down.
+ * @tlb: the mmu_swap_gather structure belongs to tlb
+ */
+static bool __tlb_swap_gather_init(struct mmu_gather *tlb)
+{
+	tlb->swp = kmem_cache_alloc(swap_gather_cachep, GFP_ATOMIC | GFP_NOWAIT);
+	if (unlikely(!tlb->swp))
+		return false;
+
+	tlb->swp->local.next  = NULL;
+	tlb->swp->local.nr    = 0;
+	tlb->swp->local.max   = ARRAY_SIZE(tlb->swp->__encoded_entrys);
+
+	tlb->swp->active      = &tlb->swp->local;
+	tlb->swp->batch_count = 0;
+
+	INIT_WORK(&tlb->swp->free_work, tlb_swap_async_free_work);
+	return true;
+}
+
+static void __tlb_swap_gather_mmu(struct mmu_gather *tlb)
+{
+	if (static_branch_unlikely(&tlb_swap_asyncfree_disabled))
+		return;
+
+	tlb->swp = NULL;
+	tlb->swp_freeable = 0;
+	tlb->swp_freeing = 0;
+	tlb->swp_disable = 0;
+
+	if (__tlb_swap_gather_mmu_check(tlb))
+		return;
+
+	/*
+	 * If the exiting process meets the conditions of
+	 * swp_entry asynchronous release, an mmu_swap_gather
+	 * structure will be initialized.
+	 */
+	if (tlb->swp_freeing)
+		__tlb_swap_gather_init(tlb);
+}
+
+static void __tlb_swap_gather_queuework(struct mmu_gather *tlb, bool finish)
+{
+	queue_work(swapfree_wq, &tlb->swp->free_work);
+	tlb->swp = NULL;
+	if (!finish)
+		__tlb_swap_gather_init(tlb);
+}
+
+static bool __tlb_swap_next_batch(struct mmu_gather *tlb)
+{
+	struct mmu_swap_batch *swap_batch;
+
+	if (tlb->swp->batch_count == MAX_SWAP_GATHER_BATCH_COUNT)
+		goto free;
+
+	swap_batch = (void *)__get_free_page(GFP_ATOMIC | GFP_NOWAIT);
+	if (unlikely(!swap_batch))
+		goto free;
+
+	swap_batch->next = NULL;
+	swap_batch->nr   = 0;
+	swap_batch->max  = MAX_SWAP_GATHER_BATCH;
+
+	tlb->swp->active->next = swap_batch;
+	tlb->swp->active = swap_batch;
+	tlb->swp->batch_count++;
+	return true;
+free:
+	/* batch move to wq */
+	__tlb_swap_gather_queuework(tlb, false);
+	return false;
+}
+
+/**
+ * __tlb_remove_swap_entries - the swap entries in exiting process are
+ * isolated, batch cached in struct mmu_swap_batch.
+ * @tlb: the current mmu_gather
+ * @entry: swp_entry to be isolated and cached
+ * @nr: the number of consecutive entries starting from entry parameter.
+ */
+bool __tlb_remove_swap_entries(struct mmu_gather *tlb,
+			     swp_entry_t entry, int nr)
+{
+	struct mmu_swap_batch *swap_batch;
+	unsigned long flags = 0;
+	bool ret = false;
+
+	if (tlb->swp_disable)
+		return ret;
+
+	if (!tlb->swp_freeable && !tlb->swp_freeing)
+		return ret;
+
+
+	if (tlb->swp_freeable) {
+		if (atomic_read(&nr_exiting_processes) <
+		    NR_MIN_EXITING_PROCESSES)
+			return ret;
+		/*
+		 * If the current number of exiting processes
+		 * is >= NR_MIN_EXITING_PROCESSES, the exiting
+		 * process with swp_freeable state will enter
+		 * swp_freeing state to start releasing its
+		 * remaining swap entries by the asynchronous
+		 * kworker.
+		 */
+		tlb->swp_freeable = 0;
+		tlb->swp_freeing = 1;
+	}
+
+	VM_BUG_ON(tlb->swp_freeable || !tlb->swp_freeing);
+	if (!tlb->swp && !__tlb_swap_gather_init(tlb))
+		return ret;
+
+	swap_batch = tlb->swp->active;
+	if (unlikely(swap_batch->nr >= swap_batch->max - 1)) {
+		__tlb_swap_gather_queuework(tlb, false);
+		return ret;
+	}
+
+	if (likely(nr == 1)) {
+		swap_batch->encoded_entrys[swap_batch->nr++] = encode_swpentry(entry, flags);
+	} else {
+		flags |= ENCODED_SWPENTRY_BIT_NR_ENTRYS_NEXT;
+		swap_batch->encoded_entrys[swap_batch->nr++] = encode_swpentry(entry, flags);
+		swap_batch->encoded_entrys[swap_batch->nr++] = encode_nr_swpentrys(nr);
+	}
+	ret = true;
+
+	if (swap_batch->nr >= swap_batch->max - 1) {
+		if (!__tlb_swap_next_batch(tlb))
+			goto exit;
+		swap_batch = tlb->swp->active;
+	}
+	VM_BUG_ON(swap_batch->nr > swap_batch->max - 1);
+exit:
+	return ret;
+}
+
+static void __tlb_batch_swap_finish(struct mmu_gather *tlb)
+{
+	if (tlb->swp_disable)
+		return;
+
+	if (!tlb->swp_freeable && !tlb->swp_freeing)
+		return;
+
+	if (tlb->swp_freeable) {
+		tlb->swp_freeable = 0;
+		VM_BUG_ON(tlb->swp_freeing);
+		goto exit;
+	}
+	tlb->swp_freeing = 0;
+	if (unlikely(!tlb->swp))
+		goto exit;
+
+	__tlb_swap_gather_queuework(tlb, true);
+exit:
+	atomic_dec(&nr_exiting_processes);
+}
 
 static bool tlb_next_batch(struct mmu_gather *tlb)
 {
@@ -386,6 +679,9 @@  static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 	tlb->local.max  = ARRAY_SIZE(tlb->__pages);
 	tlb->active     = &tlb->local;
 	tlb->batch_count = 0;
+
+	tlb->swp_disable = 1;
+	__tlb_swap_gather_mmu(tlb);
 #endif
 	tlb->delayed_rmap = 0;
 
@@ -466,6 +762,7 @@  void tlb_finish_mmu(struct mmu_gather *tlb)
 
 #ifndef CONFIG_MMU_GATHER_NO_GATHER
 	tlb_batch_list_free(tlb);
+	__tlb_batch_swap_finish(tlb);
 #endif
 	dec_tlb_flush_pending(tlb->mm);
 }