diff mbox series

[v5,8/8] mm: huge_memory: enable debugfs to split huge pages to any order.

Message ID 20240226205534.1603748-9-zi.yan@sent.com (mailing list archive)
State New
Headers show
Series Split a folio to any lower order folios | expand

Commit Message

Zi Yan Feb. 26, 2024, 8:55 p.m. UTC
From: Zi Yan <ziy@nvidia.com>

It is used to test split_huge_page_to_list_to_order for pagecache THPs.
Also add test cases for split_huge_page_to_list_to_order via both
debugfs.

Signed-off-by: Zi Yan <ziy@nvidia.com>
---
 mm/huge_memory.c                              |  34 ++++--
 .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
 2 files changed, 131 insertions(+), 18 deletions(-)

Comments

Aishwarya TCV March 1, 2024, 9:51 a.m. UTC | #1
On 26/02/2024 20:55, Zi Yan wrote:
> From: Zi Yan <ziy@nvidia.com>
> 
> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
> Also add test cases for split_huge_page_to_list_to_order via both
> debugfs.
> 
> Signed-off-by: Zi Yan <ziy@nvidia.com>
> ---
>  mm/huge_memory.c                              |  34 ++++--
>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>  2 files changed, 131 insertions(+), 18 deletions(-)
> 

Hi Zi,

When booting the kernel against next-master(20240228)with Arm64 on
Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
is failing in our CI (with rootfs over NFS). I can send the full logs if
required.

A bisect (full log below) identified this patch as introducing the
failure. Bisected it on the tag "next-20240228" at repo
"https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".

This works fine on  Linux version 6.8.0-rc6


Sample log from failure against run on TX2:
------
07:17:34.056125  # # ------------------------------
07:17:34.056543  # # running ./split_huge_page_test
07:17:34.056839  # # ------------------------------
07:17:34.057114  # # TAP version 13
07:17:34.058564  # # 1..12
07:17:34.156822  # # ok 1 Split huge pages successful
07:17:34.214074  # # ok 2 Split PTE-mapped huge pages successful
07:17:34.215630  # # # Please enable pr_debug in
split_huge_pages_in_file() for more info.
07:17:34.225503  # # # Please check dmesg for more information
07:17:34.225862  # # ok 3 File-backed THP split test done
07:17:34.236944  # # Bail out! Failed to create a file at /mnt/thp_fs#
Planned tests != run tests (12 != 3)
07:17:34.237307  # # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0
07:17:34.237620  # # [FAIL]
07:17:34.246430  # not ok 51 split_huge_page_test # exit=1


Bisect log:
------
git bisect start
# good: [d206a76d7d2726f3b096037f2079ce0bd3ba329b] Linux 6.8-rc6
git bisect good d206a76d7d2726f3b096037f2079ce0bd3ba329b
# bad: [20af1ca418d2c0b11bc2a1fe8c0c88f67bcc2a7e] Add linux-next
specific files for 20240228
git bisect bad 20af1ca418d2c0b11bc2a1fe8c0c88f67bcc2a7e
# bad: [1322f1801e59dddce10591d602d246c1bf49990c] Merge branch 'main' of
git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
git bisect bad 1322f1801e59dddce10591d602d246c1bf49990c
# bad: [a82f70041487790b7b09fe4bb45436e1b57021d3] Merge branch 'dev' of
git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git
git bisect bad a82f70041487790b7b09fe4bb45436e1b57021d3
# bad: [ce90480b9352ba2bebe8946dad9223e3f24c6e9a] Merge branch
'for-next' of
git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git
git bisect bad ce90480b9352ba2bebe8946dad9223e3f24c6e9a
# bad: [5daac92ed3881fd0c656478a301a4e1d124100ee] Merge branch
'mm-everything' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
git bisect bad 5daac92ed3881fd0c656478a301a4e1d124100ee
# good: [acc2643d9e988c63dd4629a9af380ad9ac69c54a] Merge branch
'mm-stable' into mm-unstable
git bisect good acc2643d9e988c63dd4629a9af380ad9ac69c54a
# good: [0294de8fe7d7c1a7eddc979cbf4c1886406e36b7] Merge branch 'fixes'
of git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git
git bisect good 0294de8fe7d7c1a7eddc979cbf4c1886406e36b7
# good: [83e0c8f0e777a1ef0977b2f8189101765703b32d] Merge branch
'mm-nonmm-stable' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
git bisect good 83e0c8f0e777a1ef0977b2f8189101765703b32d
# good: [a739cbe236e0dd3b6ff26a01fa1d31c73d4fac93] mm: memcg: make memcg
huge page split support any order split
git bisect good a739cbe236e0dd3b6ff26a01fa1d31c73d4fac93
# bad: [efb520aa333b2f11daaaaa13f4a598b5ae4ae823] mm: allow non-hugetlb
large folios to be batch processed
git bisect bad efb520aa333b2f11daaaaa13f4a598b5ae4ae823
# bad: [2258bdebb55e3ad3d30fd3849ddb955ff36825de] mm/zsmalloc: don't
hold locks of all pages when free_zspage()
git bisect bad 2258bdebb55e3ad3d30fd3849ddb955ff36825de
# bad: [7fc0be45acf2878cbacc4dba56923c34c3fd8b1e] mm: remove
total_mapcount()
git bisect bad 7fc0be45acf2878cbacc4dba56923c34c3fd8b1e
# good: [d55fac55da2f87ad5a99178e107df09770bbc411] mm: thp: split huge
page to any lower order pages
git bisect good d55fac55da2f87ad5a99178e107df09770bbc411
# bad: [4050d591c1aaf9336c08511fa5984827186e9ad1] mm/memfd: refactor
memfd_tag_pins() and memfd_wait_for_pins()
git bisect bad 4050d591c1aaf9336c08511fa5984827186e9ad1
# bad: [c0ba89c29ef559c95273feb481b049f622c43c17] mm: huge_memory:
enable debugfs to split huge pages to any order
git bisect bad c0ba89c29ef559c95273feb481b049f622c43c17
# first bad commit: [c0ba89c29ef559c95273feb481b049f622c43c17] mm:
huge_memory: enable debugfs to split huge pages to any order


Thanks,
Aishwarya
Ryan Roberts March 1, 2024, 10:33 a.m. UTC | #2
On 01/03/2024 09:51, Aishwarya TCV wrote:
> 
> 
> On 26/02/2024 20:55, Zi Yan wrote:
>> From: Zi Yan <ziy@nvidia.com>
>>
>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>> Also add test cases for split_huge_page_to_list_to_order via both
>> debugfs.
>>
>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>> ---
>>  mm/huge_memory.c                              |  34 ++++--
>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>
> 
> Hi Zi,
> 
> When booting the kernel against next-master(20240228)with Arm64 on
> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
> is failing in our CI (with rootfs over NFS). I can send the full logs if
> required.

Just to add, I took a quick eyeball and I think there a couple of potential issues:

  - In create_pagecache_thp_and_fd() you do *fd = open(testfile, O_CREAT ...);
    where testfile is /mnt/thp_fs/testfile. So if /mnt/thp_fs doesn't exist,
    then the open will fail I think? I'm pretty sure that's what's happening on
    our CI. Suggest the test needs to setup this dir itself. Is thp_fs a mounted
    fs or just a dir? If the latter can you just mktemp()?

  - Later in create_pagecache_thp_and_fd() you fail the test if you don't have a
    filesystem that supports large folios. Can we turn that into a skip? That
    would reduce noise on the CI.

Thanks,
Ryan

> 
> A bisect (full log below) identified this patch as introducing the
> failure. Bisected it on the tag "next-20240228" at repo
> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
> 
> This works fine on  Linux version 6.8.0-rc6
> 
> 
> Sample log from failure against run on TX2:
> ------
> 07:17:34.056125  # # ------------------------------
> 07:17:34.056543  # # running ./split_huge_page_test
> 07:17:34.056839  # # ------------------------------
> 07:17:34.057114  # # TAP version 13
> 07:17:34.058564  # # 1..12
> 07:17:34.156822  # # ok 1 Split huge pages successful
> 07:17:34.214074  # # ok 2 Split PTE-mapped huge pages successful
> 07:17:34.215630  # # # Please enable pr_debug in
> split_huge_pages_in_file() for more info.
> 07:17:34.225503  # # # Please check dmesg for more information
> 07:17:34.225862  # # ok 3 File-backed THP split test done
> 07:17:34.236944  # # Bail out! Failed to create a file at /mnt/thp_fs#
> Planned tests != run tests (12 != 3)
> 07:17:34.237307  # # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0
> 07:17:34.237620  # # [FAIL]
> 07:17:34.246430  # not ok 51 split_huge_page_test # exit=1
> 
> 
> Bisect log:
> ------
> git bisect start
> # good: [d206a76d7d2726f3b096037f2079ce0bd3ba329b] Linux 6.8-rc6
> git bisect good d206a76d7d2726f3b096037f2079ce0bd3ba329b
> # bad: [20af1ca418d2c0b11bc2a1fe8c0c88f67bcc2a7e] Add linux-next
> specific files for 20240228
> git bisect bad 20af1ca418d2c0b11bc2a1fe8c0c88f67bcc2a7e
> # bad: [1322f1801e59dddce10591d602d246c1bf49990c] Merge branch 'main' of
> git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
> git bisect bad 1322f1801e59dddce10591d602d246c1bf49990c
> # bad: [a82f70041487790b7b09fe4bb45436e1b57021d3] Merge branch 'dev' of
> git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git
> git bisect bad a82f70041487790b7b09fe4bb45436e1b57021d3
> # bad: [ce90480b9352ba2bebe8946dad9223e3f24c6e9a] Merge branch
> 'for-next' of
> git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git
> git bisect bad ce90480b9352ba2bebe8946dad9223e3f24c6e9a
> # bad: [5daac92ed3881fd0c656478a301a4e1d124100ee] Merge branch
> 'mm-everything' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
> git bisect bad 5daac92ed3881fd0c656478a301a4e1d124100ee
> # good: [acc2643d9e988c63dd4629a9af380ad9ac69c54a] Merge branch
> 'mm-stable' into mm-unstable
> git bisect good acc2643d9e988c63dd4629a9af380ad9ac69c54a
> # good: [0294de8fe7d7c1a7eddc979cbf4c1886406e36b7] Merge branch 'fixes'
> of git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git
> git bisect good 0294de8fe7d7c1a7eddc979cbf4c1886406e36b7
> # good: [83e0c8f0e777a1ef0977b2f8189101765703b32d] Merge branch
> 'mm-nonmm-stable' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
> git bisect good 83e0c8f0e777a1ef0977b2f8189101765703b32d
> # good: [a739cbe236e0dd3b6ff26a01fa1d31c73d4fac93] mm: memcg: make memcg
> huge page split support any order split
> git bisect good a739cbe236e0dd3b6ff26a01fa1d31c73d4fac93
> # bad: [efb520aa333b2f11daaaaa13f4a598b5ae4ae823] mm: allow non-hugetlb
> large folios to be batch processed
> git bisect bad efb520aa333b2f11daaaaa13f4a598b5ae4ae823
> # bad: [2258bdebb55e3ad3d30fd3849ddb955ff36825de] mm/zsmalloc: don't
> hold locks of all pages when free_zspage()
> git bisect bad 2258bdebb55e3ad3d30fd3849ddb955ff36825de
> # bad: [7fc0be45acf2878cbacc4dba56923c34c3fd8b1e] mm: remove
> total_mapcount()
> git bisect bad 7fc0be45acf2878cbacc4dba56923c34c3fd8b1e
> # good: [d55fac55da2f87ad5a99178e107df09770bbc411] mm: thp: split huge
> page to any lower order pages
> git bisect good d55fac55da2f87ad5a99178e107df09770bbc411
> # bad: [4050d591c1aaf9336c08511fa5984827186e9ad1] mm/memfd: refactor
> memfd_tag_pins() and memfd_wait_for_pins()
> git bisect bad 4050d591c1aaf9336c08511fa5984827186e9ad1
> # bad: [c0ba89c29ef559c95273feb481b049f622c43c17] mm: huge_memory:
> enable debugfs to split huge pages to any order
> git bisect bad c0ba89c29ef559c95273feb481b049f622c43c17
> # first bad commit: [c0ba89c29ef559c95273feb481b049f622c43c17] mm:
> huge_memory: enable debugfs to split huge pages to any order
> 
> 
> Thanks,
> Aishwarya
Mark Brown March 1, 2024, 12:11 p.m. UTC | #3
On Fri, Mar 01, 2024 at 10:33:15AM +0000, Ryan Roberts wrote:

>   - In create_pagecache_thp_and_fd() you do *fd = open(testfile, O_CREAT ...);
>     where testfile is /mnt/thp_fs/testfile. So if /mnt/thp_fs doesn't exist,
>     then the open will fail I think? I'm pretty sure that's what's happening on
>     our CI. Suggest the test needs to setup this dir itself. Is thp_fs a mounted
>     fs or just a dir? If the latter can you just mktemp()?

Mounting on /mnt would also be a bit of an issue, that's something
people are relatively likely to have used for something so could be
disruptive.  If the test is going to do a new mount it's probably better
to do something like make a temporary directory then mount on top of that.
Zi Yan March 1, 2024, 12:52 p.m. UTC | #4
On 1 Mar 2024, at 5:33, Ryan Roberts wrote:

> On 01/03/2024 09:51, Aishwarya TCV wrote:
>>
>>
>> On 26/02/2024 20:55, Zi Yan wrote:
>>> From: Zi Yan <ziy@nvidia.com>
>>>
>>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>>> Also add test cases for split_huge_page_to_list_to_order via both
>>> debugfs.
>>>
>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>> ---
>>>  mm/huge_memory.c                              |  34 ++++--
>>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>>
>>
>> Hi Zi,
>>
>> When booting the kernel against next-master(20240228)with Arm64 on
>> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
>> is failing in our CI (with rootfs over NFS). I can send the full logs if
>> required.
>
> Just to add, I took a quick eyeball and I think there a couple of potential issues:
>
>   - In create_pagecache_thp_and_fd() you do *fd = open(testfile, O_CREAT ...);
>     where testfile is /mnt/thp_fs/testfile. So if /mnt/thp_fs doesn't exist,
>     then the open will fail I think? I'm pretty sure that's what's happening on
>     our CI. Suggest the test needs to setup this dir itself. Is thp_fs a mounted
>     fs or just a dir? If the latter can you just mktemp()?

The former. the page cache folio split tests require a file system supporting
large folio and I used XFS.

>   - Later in create_pagecache_thp_and_fd() you fail the test if you don't have a
>     filesystem that supports large folios. Can we turn that into a skip? That
>     would reduce noise on the CI.

I can do that. But is this a new requirement that self tests have to be finish
in CI/CD environment? Can you provide a guideline for it? Since I always assume
selftests are just ran by human who can set up environment. In addition, I do
not think it is realistic to make the test file to set up all the environment,
since everyone's machine is different. It is much easier to make the CI/CD
environment to make the mount.

>
> Thanks,
> Ryan
>
>>
>> A bisect (full log below) identified this patch as introducing the
>> failure. Bisected it on the tag "next-20240228" at repo
>> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
>>
>> This works fine on  Linux version 6.8.0-rc6
>>
>>
>> Sample log from failure against run on TX2:
>> ------
>> 07:17:34.056125  # # ------------------------------
>> 07:17:34.056543  # # running ./split_huge_page_test
>> 07:17:34.056839  # # ------------------------------
>> 07:17:34.057114  # # TAP version 13
>> 07:17:34.058564  # # 1..12
>> 07:17:34.156822  # # ok 1 Split huge pages successful
>> 07:17:34.214074  # # ok 2 Split PTE-mapped huge pages successful
>> 07:17:34.215630  # # # Please enable pr_debug in
>> split_huge_pages_in_file() for more info.
>> 07:17:34.225503  # # # Please check dmesg for more information
>> 07:17:34.225862  # # ok 3 File-backed THP split test done
>> 07:17:34.236944  # # Bail out! Failed to create a file at /mnt/thp_fs#
>> Planned tests != run tests (12 != 3)
>> 07:17:34.237307  # # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0
>> 07:17:34.237620  # # [FAIL]
>> 07:17:34.246430  # not ok 51 split_huge_page_test # exit=1
>>
>>
>> Bisect log:
>> ------
>> git bisect start
>> # good: [d206a76d7d2726f3b096037f2079ce0bd3ba329b] Linux 6.8-rc6
>> git bisect good d206a76d7d2726f3b096037f2079ce0bd3ba329b
>> # bad: [20af1ca418d2c0b11bc2a1fe8c0c88f67bcc2a7e] Add linux-next
>> specific files for 20240228
>> git bisect bad 20af1ca418d2c0b11bc2a1fe8c0c88f67bcc2a7e
>> # bad: [1322f1801e59dddce10591d602d246c1bf49990c] Merge branch 'main' of
>> git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
>> git bisect bad 1322f1801e59dddce10591d602d246c1bf49990c
>> # bad: [a82f70041487790b7b09fe4bb45436e1b57021d3] Merge branch 'dev' of
>> git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git
>> git bisect bad a82f70041487790b7b09fe4bb45436e1b57021d3
>> # bad: [ce90480b9352ba2bebe8946dad9223e3f24c6e9a] Merge branch
>> 'for-next' of
>> git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git
>> git bisect bad ce90480b9352ba2bebe8946dad9223e3f24c6e9a
>> # bad: [5daac92ed3881fd0c656478a301a4e1d124100ee] Merge branch
>> 'mm-everything' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
>> git bisect bad 5daac92ed3881fd0c656478a301a4e1d124100ee
>> # good: [acc2643d9e988c63dd4629a9af380ad9ac69c54a] Merge branch
>> 'mm-stable' into mm-unstable
>> git bisect good acc2643d9e988c63dd4629a9af380ad9ac69c54a
>> # good: [0294de8fe7d7c1a7eddc979cbf4c1886406e36b7] Merge branch 'fixes'
>> of git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git
>> git bisect good 0294de8fe7d7c1a7eddc979cbf4c1886406e36b7
>> # good: [83e0c8f0e777a1ef0977b2f8189101765703b32d] Merge branch
>> 'mm-nonmm-stable' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
>> git bisect good 83e0c8f0e777a1ef0977b2f8189101765703b32d
>> # good: [a739cbe236e0dd3b6ff26a01fa1d31c73d4fac93] mm: memcg: make memcg
>> huge page split support any order split
>> git bisect good a739cbe236e0dd3b6ff26a01fa1d31c73d4fac93
>> # bad: [efb520aa333b2f11daaaaa13f4a598b5ae4ae823] mm: allow non-hugetlb
>> large folios to be batch processed
>> git bisect bad efb520aa333b2f11daaaaa13f4a598b5ae4ae823
>> # bad: [2258bdebb55e3ad3d30fd3849ddb955ff36825de] mm/zsmalloc: don't
>> hold locks of all pages when free_zspage()
>> git bisect bad 2258bdebb55e3ad3d30fd3849ddb955ff36825de
>> # bad: [7fc0be45acf2878cbacc4dba56923c34c3fd8b1e] mm: remove
>> total_mapcount()
>> git bisect bad 7fc0be45acf2878cbacc4dba56923c34c3fd8b1e
>> # good: [d55fac55da2f87ad5a99178e107df09770bbc411] mm: thp: split huge
>> page to any lower order pages
>> git bisect good d55fac55da2f87ad5a99178e107df09770bbc411
>> # bad: [4050d591c1aaf9336c08511fa5984827186e9ad1] mm/memfd: refactor
>> memfd_tag_pins() and memfd_wait_for_pins()
>> git bisect bad 4050d591c1aaf9336c08511fa5984827186e9ad1
>> # bad: [c0ba89c29ef559c95273feb481b049f622c43c17] mm: huge_memory:
>> enable debugfs to split huge pages to any order
>> git bisect bad c0ba89c29ef559c95273feb481b049f622c43c17
>> # first bad commit: [c0ba89c29ef559c95273feb481b049f622c43c17] mm:
>> huge_memory: enable debugfs to split huge pages to any order
>>
>>
>> Thanks,
>> Aishwarya


--
Best Regards,
Yan, Zi
Zi Yan March 1, 2024, 12:56 p.m. UTC | #5
On 1 Mar 2024, at 7:11, Mark Brown wrote:

> On Fri, Mar 01, 2024 at 10:33:15AM +0000, Ryan Roberts wrote:
>
>>   - In create_pagecache_thp_and_fd() you do *fd = open(testfile, O_CREAT ...);
>>     where testfile is /mnt/thp_fs/testfile. So if /mnt/thp_fs doesn't exist,
>>     then the open will fail I think? I'm pretty sure that's what's happening on
>>     our CI. Suggest the test needs to setup this dir itself. Is thp_fs a mounted
>>     fs or just a dir? If the latter can you just mktemp()?
>
> Mounting on /mnt would also be a bit of an issue, that's something
> people are relatively likely to have used for something so could be
> disruptive.  If the test is going to do a new mount it's probably better
> to do something like make a temporary directory then mount on top of that.

To move it to a temp folder for mounting, the test needs to do the mount.
But it is impossible to know if the running environment has the required FS or not
and where the FS is. Should I add that as a parameter to the test binary?

--
Best Regards,
Yan, Zi
Ryan Roberts March 1, 2024, 1:09 p.m. UTC | #6
On 01/03/2024 12:52, Zi Yan wrote:
> On 1 Mar 2024, at 5:33, Ryan Roberts wrote:
> 
>> On 01/03/2024 09:51, Aishwarya TCV wrote:
>>>
>>>
>>> On 26/02/2024 20:55, Zi Yan wrote:
>>>> From: Zi Yan <ziy@nvidia.com>
>>>>
>>>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>>>> Also add test cases for split_huge_page_to_list_to_order via both
>>>> debugfs.
>>>>
>>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>>> ---
>>>>  mm/huge_memory.c                              |  34 ++++--
>>>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>>>
>>>
>>> Hi Zi,
>>>
>>> When booting the kernel against next-master(20240228)with Arm64 on
>>> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
>>> is failing in our CI (with rootfs over NFS). I can send the full logs if
>>> required.
>>
>> Just to add, I took a quick eyeball and I think there a couple of potential issues:
>>
>>   - In create_pagecache_thp_and_fd() you do *fd = open(testfile, O_CREAT ...);
>>     where testfile is /mnt/thp_fs/testfile. So if /mnt/thp_fs doesn't exist,
>>     then the open will fail I think? I'm pretty sure that's what's happening on
>>     our CI. Suggest the test needs to setup this dir itself. Is thp_fs a mounted
>>     fs or just a dir? If the latter can you just mktemp()?
> 
> The former. the page cache folio split tests require a file system supporting
> large folio and I used XFS.

OK got it.

> 
>>   - Later in create_pagecache_thp_and_fd() you fail the test if you don't have a
>>     filesystem that supports large folios. Can we turn that into a skip? That
>>     would reduce noise on the CI.
> 
> I can do that. But is this a new requirement that self tests have to be finish
> in CI/CD environment? Can you provide a guideline for it? 

I'm not sure what's written down, but certainly anyone should be able to run the
selftests with as little knowledge as possible, and they should only fail if
they detect a real problem. By convention a test should be skipped if the
environment (or kernel) isn't compatible. There are lots of examples of that in
mm selftests (just grep ksft_test_result_skip). mm selftests also has
run_vmtests.sh which does a lot of environment setup (e.g. reserving hugetlb
pages, etc) before actually running the tests.

> Since I always assume
> selftests are just ran by human who can set up environment. 

I believe kernelci have been running mm skeftests on x86 for a long time. We
have started running them against arm64 on our CI for the last couple of months
and it had found a number of real issues in the kernel in -next, so this is
helping find and fix things early. So there is definitely benefit to keeping
these tests clean and robust.

> In addition, I do
> not think it is realistic to make the test file to set up all the environment,
> since everyone's machine is different. It is much easier to make the CI/CD
> environment to make the mount.

That's reasonable, but then the requirements should be documented and you
probably would want to be able to optionally pass the mount on the command line.

Thanks,
Ryan

> 
>>
>> Thanks,
>> Ryan
>>
>>>
>>> A bisect (full log below) identified this patch as introducing the
>>> failure. Bisected it on the tag "next-20240228" at repo
>>> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
>>>
>>> This works fine on  Linux version 6.8.0-rc6
>>>
>>>
>>> Sample log from failure against run on TX2:
>>> ------
>>> 07:17:34.056125  # # ------------------------------
>>> 07:17:34.056543  # # running ./split_huge_page_test
>>> 07:17:34.056839  # # ------------------------------
>>> 07:17:34.057114  # # TAP version 13
>>> 07:17:34.058564  # # 1..12
>>> 07:17:34.156822  # # ok 1 Split huge pages successful
>>> 07:17:34.214074  # # ok 2 Split PTE-mapped huge pages successful
>>> 07:17:34.215630  # # # Please enable pr_debug in
>>> split_huge_pages_in_file() for more info.
>>> 07:17:34.225503  # # # Please check dmesg for more information
>>> 07:17:34.225862  # # ok 3 File-backed THP split test done
>>> 07:17:34.236944  # # Bail out! Failed to create a file at /mnt/thp_fs#
>>> Planned tests != run tests (12 != 3)
>>> 07:17:34.237307  # # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0
>>> 07:17:34.237620  # # [FAIL]
>>> 07:17:34.246430  # not ok 51 split_huge_page_test # exit=1
>>>
>>>
>>> Bisect log:
>>> ------
>>> git bisect start
>>> # good: [d206a76d7d2726f3b096037f2079ce0bd3ba329b] Linux 6.8-rc6
>>> git bisect good d206a76d7d2726f3b096037f2079ce0bd3ba329b
>>> # bad: [20af1ca418d2c0b11bc2a1fe8c0c88f67bcc2a7e] Add linux-next
>>> specific files for 20240228
>>> git bisect bad 20af1ca418d2c0b11bc2a1fe8c0c88f67bcc2a7e
>>> # bad: [1322f1801e59dddce10591d602d246c1bf49990c] Merge branch 'main' of
>>> git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
>>> git bisect bad 1322f1801e59dddce10591d602d246c1bf49990c
>>> # bad: [a82f70041487790b7b09fe4bb45436e1b57021d3] Merge branch 'dev' of
>>> git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git
>>> git bisect bad a82f70041487790b7b09fe4bb45436e1b57021d3
>>> # bad: [ce90480b9352ba2bebe8946dad9223e3f24c6e9a] Merge branch
>>> 'for-next' of
>>> git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git
>>> git bisect bad ce90480b9352ba2bebe8946dad9223e3f24c6e9a
>>> # bad: [5daac92ed3881fd0c656478a301a4e1d124100ee] Merge branch
>>> 'mm-everything' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
>>> git bisect bad 5daac92ed3881fd0c656478a301a4e1d124100ee
>>> # good: [acc2643d9e988c63dd4629a9af380ad9ac69c54a] Merge branch
>>> 'mm-stable' into mm-unstable
>>> git bisect good acc2643d9e988c63dd4629a9af380ad9ac69c54a
>>> # good: [0294de8fe7d7c1a7eddc979cbf4c1886406e36b7] Merge branch 'fixes'
>>> of git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git
>>> git bisect good 0294de8fe7d7c1a7eddc979cbf4c1886406e36b7
>>> # good: [83e0c8f0e777a1ef0977b2f8189101765703b32d] Merge branch
>>> 'mm-nonmm-stable' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
>>> git bisect good 83e0c8f0e777a1ef0977b2f8189101765703b32d
>>> # good: [a739cbe236e0dd3b6ff26a01fa1d31c73d4fac93] mm: memcg: make memcg
>>> huge page split support any order split
>>> git bisect good a739cbe236e0dd3b6ff26a01fa1d31c73d4fac93
>>> # bad: [efb520aa333b2f11daaaaa13f4a598b5ae4ae823] mm: allow non-hugetlb
>>> large folios to be batch processed
>>> git bisect bad efb520aa333b2f11daaaaa13f4a598b5ae4ae823
>>> # bad: [2258bdebb55e3ad3d30fd3849ddb955ff36825de] mm/zsmalloc: don't
>>> hold locks of all pages when free_zspage()
>>> git bisect bad 2258bdebb55e3ad3d30fd3849ddb955ff36825de
>>> # bad: [7fc0be45acf2878cbacc4dba56923c34c3fd8b1e] mm: remove
>>> total_mapcount()
>>> git bisect bad 7fc0be45acf2878cbacc4dba56923c34c3fd8b1e
>>> # good: [d55fac55da2f87ad5a99178e107df09770bbc411] mm: thp: split huge
>>> page to any lower order pages
>>> git bisect good d55fac55da2f87ad5a99178e107df09770bbc411
>>> # bad: [4050d591c1aaf9336c08511fa5984827186e9ad1] mm/memfd: refactor
>>> memfd_tag_pins() and memfd_wait_for_pins()
>>> git bisect bad 4050d591c1aaf9336c08511fa5984827186e9ad1
>>> # bad: [c0ba89c29ef559c95273feb481b049f622c43c17] mm: huge_memory:
>>> enable debugfs to split huge pages to any order
>>> git bisect bad c0ba89c29ef559c95273feb481b049f622c43c17
>>> # first bad commit: [c0ba89c29ef559c95273feb481b049f622c43c17] mm:
>>> huge_memory: enable debugfs to split huge pages to any order
>>>
>>>
>>> Thanks,
>>> Aishwarya
> 
> 
> --
> Best Regards,
> Yan, Zi
Zi Yan March 1, 2024, 1:53 p.m. UTC | #7
On 1 Mar 2024, at 8:09, Ryan Roberts wrote:

> On 01/03/2024 12:52, Zi Yan wrote:
>> On 1 Mar 2024, at 5:33, Ryan Roberts wrote:
>>
>>> On 01/03/2024 09:51, Aishwarya TCV wrote:
>>>>
>>>>
>>>> On 26/02/2024 20:55, Zi Yan wrote:
>>>>> From: Zi Yan <ziy@nvidia.com>
>>>>>
>>>>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>>>>> Also add test cases for split_huge_page_to_list_to_order via both
>>>>> debugfs.
>>>>>
>>>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>>>> ---
>>>>>  mm/huge_memory.c                              |  34 ++++--
>>>>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>>>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>>>>
>>>>
>>>> Hi Zi,
>>>>
>>>> When booting the kernel against next-master(20240228)with Arm64 on
>>>> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
>>>> is failing in our CI (with rootfs over NFS). I can send the full logs if
>>>> required.
>>>
>>> Just to add, I took a quick eyeball and I think there a couple of potential issues:
>>>
>>>   - In create_pagecache_thp_and_fd() you do *fd = open(testfile, O_CREAT ...);
>>>     where testfile is /mnt/thp_fs/testfile. So if /mnt/thp_fs doesn't exist,
>>>     then the open will fail I think? I'm pretty sure that's what's happening on
>>>     our CI. Suggest the test needs to setup this dir itself. Is thp_fs a mounted
>>>     fs or just a dir? If the latter can you just mktemp()?
>>
>> The former. the page cache folio split tests require a file system supporting
>> large folio and I used XFS.
>
> OK got it.
>
>>
>>>   - Later in create_pagecache_thp_and_fd() you fail the test if you don't have a
>>>     filesystem that supports large folios. Can we turn that into a skip? That
>>>     would reduce noise on the CI.
>>
>> I can do that. But is this a new requirement that self tests have to be finish
>> in CI/CD environment? Can you provide a guideline for it?
>
> I'm not sure what's written down, but certainly anyone should be able to run the
> selftests with as little knowledge as possible, and they should only fail if
> they detect a real problem. By convention a test should be skipped if the
> environment (or kernel) isn't compatible. There are lots of examples of that in
> mm selftests (just grep ksft_test_result_skip). mm selftests also has
> run_vmtests.sh which does a lot of environment setup (e.g. reserving hugetlb
> pages, etc) before actually running the tests.

Got it. I will send a fixup to skip the page cache split test when the mount
is not ready, then send a separate patch to set up XFS in run_vmtests.sh and
pass it to this test.

>
>> Since I always assume
>> selftests are just ran by human who can set up environment.
>
> I believe kernelci have been running mm skeftests on x86 for a long time. We
> have started running them against arm64 on our CI for the last couple of months
> and it had found a number of real issues in the kernel in -next, so this is
> helping find and fix things early. So there is definitely benefit to keeping
> these tests clean and robust.

Got it. Make sense.

>
>> In addition, I do
>> not think it is realistic to make the test file to set up all the environment,
>> since everyone's machine is different. It is much easier to make the CI/CD
>> environment to make the mount.
>
> That's reasonable, but then the requirements should be documented and you
> probably would want to be able to optionally pass the mount on the command line.

Will do.

Thank you for the explanation.

>>>
>>> Thanks,
>>> Ryan
>>>
>>>>
>>>> A bisect (full log below) identified this patch as introducing the
>>>> failure. Bisected it on the tag "next-20240228" at repo
>>>> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
>>>>
>>>> This works fine on  Linux version 6.8.0-rc6
>>>>
>>>>
>>>> Sample log from failure against run on TX2:
>>>> ------
>>>> 07:17:34.056125  # # ------------------------------
>>>> 07:17:34.056543  # # running ./split_huge_page_test
>>>> 07:17:34.056839  # # ------------------------------
>>>> 07:17:34.057114  # # TAP version 13
>>>> 07:17:34.058564  # # 1..12
>>>> 07:17:34.156822  # # ok 1 Split huge pages successful
>>>> 07:17:34.214074  # # ok 2 Split PTE-mapped huge pages successful
>>>> 07:17:34.215630  # # # Please enable pr_debug in
>>>> split_huge_pages_in_file() for more info.
>>>> 07:17:34.225503  # # # Please check dmesg for more information
>>>> 07:17:34.225862  # # ok 3 File-backed THP split test done
>>>> 07:17:34.236944  # # Bail out! Failed to create a file at /mnt/thp_fs#
>>>> Planned tests != run tests (12 != 3)
>>>> 07:17:34.237307  # # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0
>>>> 07:17:34.237620  # # [FAIL]
>>>> 07:17:34.246430  # not ok 51 split_huge_page_test # exit=1
>>>>
>>>>
>>>> Bisect log:
>>>> ------
>>>> git bisect start
>>>> # good: [d206a76d7d2726f3b096037f2079ce0bd3ba329b] Linux 6.8-rc6
>>>> git bisect good d206a76d7d2726f3b096037f2079ce0bd3ba329b
>>>> # bad: [20af1ca418d2c0b11bc2a1fe8c0c88f67bcc2a7e] Add linux-next
>>>> specific files for 20240228
>>>> git bisect bad 20af1ca418d2c0b11bc2a1fe8c0c88f67bcc2a7e
>>>> # bad: [1322f1801e59dddce10591d602d246c1bf49990c] Merge branch 'main' of
>>>> git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
>>>> git bisect bad 1322f1801e59dddce10591d602d246c1bf49990c
>>>> # bad: [a82f70041487790b7b09fe4bb45436e1b57021d3] Merge branch 'dev' of
>>>> git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git
>>>> git bisect bad a82f70041487790b7b09fe4bb45436e1b57021d3
>>>> # bad: [ce90480b9352ba2bebe8946dad9223e3f24c6e9a] Merge branch
>>>> 'for-next' of
>>>> git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git
>>>> git bisect bad ce90480b9352ba2bebe8946dad9223e3f24c6e9a
>>>> # bad: [5daac92ed3881fd0c656478a301a4e1d124100ee] Merge branch
>>>> 'mm-everything' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
>>>> git bisect bad 5daac92ed3881fd0c656478a301a4e1d124100ee
>>>> # good: [acc2643d9e988c63dd4629a9af380ad9ac69c54a] Merge branch
>>>> 'mm-stable' into mm-unstable
>>>> git bisect good acc2643d9e988c63dd4629a9af380ad9ac69c54a
>>>> # good: [0294de8fe7d7c1a7eddc979cbf4c1886406e36b7] Merge branch 'fixes'
>>>> of git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git
>>>> git bisect good 0294de8fe7d7c1a7eddc979cbf4c1886406e36b7
>>>> # good: [83e0c8f0e777a1ef0977b2f8189101765703b32d] Merge branch
>>>> 'mm-nonmm-stable' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
>>>> git bisect good 83e0c8f0e777a1ef0977b2f8189101765703b32d
>>>> # good: [a739cbe236e0dd3b6ff26a01fa1d31c73d4fac93] mm: memcg: make memcg
>>>> huge page split support any order split
>>>> git bisect good a739cbe236e0dd3b6ff26a01fa1d31c73d4fac93
>>>> # bad: [efb520aa333b2f11daaaaa13f4a598b5ae4ae823] mm: allow non-hugetlb
>>>> large folios to be batch processed
>>>> git bisect bad efb520aa333b2f11daaaaa13f4a598b5ae4ae823
>>>> # bad: [2258bdebb55e3ad3d30fd3849ddb955ff36825de] mm/zsmalloc: don't
>>>> hold locks of all pages when free_zspage()
>>>> git bisect bad 2258bdebb55e3ad3d30fd3849ddb955ff36825de
>>>> # bad: [7fc0be45acf2878cbacc4dba56923c34c3fd8b1e] mm: remove
>>>> total_mapcount()
>>>> git bisect bad 7fc0be45acf2878cbacc4dba56923c34c3fd8b1e
>>>> # good: [d55fac55da2f87ad5a99178e107df09770bbc411] mm: thp: split huge
>>>> page to any lower order pages
>>>> git bisect good d55fac55da2f87ad5a99178e107df09770bbc411
>>>> # bad: [4050d591c1aaf9336c08511fa5984827186e9ad1] mm/memfd: refactor
>>>> memfd_tag_pins() and memfd_wait_for_pins()
>>>> git bisect bad 4050d591c1aaf9336c08511fa5984827186e9ad1
>>>> # bad: [c0ba89c29ef559c95273feb481b049f622c43c17] mm: huge_memory:
>>>> enable debugfs to split huge pages to any order
>>>> git bisect bad c0ba89c29ef559c95273feb481b049f622c43c17
>>>> # first bad commit: [c0ba89c29ef559c95273feb481b049f622c43c17] mm:
>>>> huge_memory: enable debugfs to split huge pages to any order
>>>>
>>>>
>>>> Thanks,
>>>> Aishwarya
>>
>>
>> --
>> Best Regards,
>> Yan, Zi


--
Best Regards,
Yan, Zi
Zi Yan March 1, 2024, 2 p.m. UTC | #8
On 1 Mar 2024, at 4:51, Aishwarya TCV wrote:

> On 26/02/2024 20:55, Zi Yan wrote:
>> From: Zi Yan <ziy@nvidia.com>
>>
>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>> Also add test cases for split_huge_page_to_list_to_order via both
>> debugfs.
>>
>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>> ---
>>  mm/huge_memory.c                              |  34 ++++--
>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>
>
> Hi Zi,
>
> When booting the kernel against next-master(20240228)with Arm64 on
> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
> is failing in our CI (with rootfs over NFS). I can send the full logs if
> required.
>
> A bisect (full log below) identified this patch as introducing the
> failure. Bisected it on the tag "next-20240228" at repo
> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
>
> This works fine on  Linux version 6.8.0-rc6

Hi Aishwarya,

I am trying to fix the issue. When I am compiling selftests/mm, I encountered
the error below when I run make under the folder. Am I missing any configuration?
Since you are able to run the test, I assume you know what is happening. Thanks.

vm_util.c: In function ‘__pagemap_scan_get_categories’:
vm_util.c:34:28: error: storage size of ‘arg’ isn’t known
   34 |         struct pm_scan_arg arg;
      |                            ^~~
vm_util.c:41:27: error: invalid application of ‘sizeof’ to incomplete type ‘struct pm_scan_arg’
   41 |         arg.size = sizeof(struct pm_scan_arg);
      |                           ^~~~~~
vm_util.c:45:35: error: ‘PAGE_IS_WPALLOWED’ undeclared (first use in this function)
   45 |         arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
      |                                   ^~~~~~~~~~~~~~~~~
vm_util.c:45:35: note: each undeclared identifier is reported only once for each function it appears in
vm_util.c:45:55: error: ‘PAGE_IS_WRITTEN’ undeclared (first use in this function)
   45 |         arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
      |                                                       ^~~~~~~~~~~~~~~
vm_util.c:45:73: error: ‘PAGE_IS_FILE’ undeclared (first use in this function)
   45 |         arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
      |                                                                         ^~~~~~~~~~~~
vm_util.c:46:35: error: ‘PAGE_IS_PRESENT’ undeclared (first use in this function); did you mean ‘PAGEMAP_PRESENT’?
   46 |                                   PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
      |                                   ^~~~~~~~~~~~~~~
      |                                   PAGEMAP_PRESENT
vm_util.c:46:53: error: ‘PAGE_IS_SWAPPED’ undeclared (first use in this function)
   46 |                                   PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
      |                                                     ^~~~~~~~~~~~~~~
vm_util.c:46:71: error: ‘PAGE_IS_PFNZERO’ undeclared (first use in this function)
   46 |                                   PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
      |                                                                       ^~~~~~~~~~~~~~~
vm_util.c:47:35: error: ‘PAGE_IS_HUGE’ undeclared (first use in this function)
   47 |                                   PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY;
      |                                   ^~~~~~~~~~~~
vm_util.c:47:50: error: ‘PAGE_IS_SOFT_DIRTY’ undeclared (first use in this function); did you mean ‘PM_SOFT_DIRTY’?
   47 |                                   PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY;
      |                                                  ^~~~~~~~~~~~~~~~~~
      |                                                  PM_SOFT_DIRTY
vm_util.c:50:26: error: ‘PAGEMAP_SCAN’ undeclared (first use in this function); did you mean ‘PAGEMAP_PFN’?
   50 |         return ioctl(fd, PAGEMAP_SCAN, &arg);
      |                          ^~~~~~~~~~~~
      |                          PAGEMAP_PFN

--
Best Regards,
Yan, Zi
Mark Brown March 1, 2024, 2:14 p.m. UTC | #9
On Fri, Mar 01, 2024 at 07:56:41AM -0500, Zi Yan wrote:
> On 1 Mar 2024, at 7:11, Mark Brown wrote:

> > Mounting on /mnt would also be a bit of an issue, that's something
> > people are relatively likely to have used for something so could be
> > disruptive.  If the test is going to do a new mount it's probably better
> > to do something like make a temporary directory then mount on top of that.

> To move it to a temp folder for mounting, the test needs to do the mount.
> But it is impossible to know if the running environment has the required FS or not
> and where the FS is. Should I add that as a parameter to the test binary?

You can check the supported filesystem types in /proc/filesystems
(possibly also elsewhere, that's just my first thought).  There's some
standard APIs for getting/naming a temporary file or directory which
should pay attention to any system settings - mktemp() is a generally
available one for C code IIRC.
Ryan Roberts March 1, 2024, 2:18 p.m. UTC | #10
On 01/03/2024 13:53, Zi Yan wrote:
> On 1 Mar 2024, at 8:09, Ryan Roberts wrote:
> 
>> On 01/03/2024 12:52, Zi Yan wrote:
>>> On 1 Mar 2024, at 5:33, Ryan Roberts wrote:
>>>
>>>> On 01/03/2024 09:51, Aishwarya TCV wrote:
>>>>>
>>>>>
>>>>> On 26/02/2024 20:55, Zi Yan wrote:
>>>>>> From: Zi Yan <ziy@nvidia.com>
>>>>>>
>>>>>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>>>>>> Also add test cases for split_huge_page_to_list_to_order via both
>>>>>> debugfs.
>>>>>>
>>>>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>>>>> ---
>>>>>>  mm/huge_memory.c                              |  34 ++++--
>>>>>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>>>>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>>>>>
>>>>>
>>>>> Hi Zi,
>>>>>
>>>>> When booting the kernel against next-master(20240228)with Arm64 on
>>>>> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
>>>>> is failing in our CI (with rootfs over NFS). I can send the full logs if
>>>>> required.
>>>>
>>>> Just to add, I took a quick eyeball and I think there a couple of potential issues:
>>>>
>>>>   - In create_pagecache_thp_and_fd() you do *fd = open(testfile, O_CREAT ...);
>>>>     where testfile is /mnt/thp_fs/testfile. So if /mnt/thp_fs doesn't exist,
>>>>     then the open will fail I think? I'm pretty sure that's what's happening on
>>>>     our CI. Suggest the test needs to setup this dir itself. Is thp_fs a mounted
>>>>     fs or just a dir? If the latter can you just mktemp()?
>>>
>>> The former. the page cache folio split tests require a file system supporting
>>> large folio and I used XFS.
>>
>> OK got it.
>>
>>>
>>>>   - Later in create_pagecache_thp_and_fd() you fail the test if you don't have a
>>>>     filesystem that supports large folios. Can we turn that into a skip? That
>>>>     would reduce noise on the CI.
>>>
>>> I can do that. But is this a new requirement that self tests have to be finish
>>> in CI/CD environment? Can you provide a guideline for it?
>>
>> I'm not sure what's written down, but certainly anyone should be able to run the
>> selftests with as little knowledge as possible, and they should only fail if
>> they detect a real problem. By convention a test should be skipped if the
>> environment (or kernel) isn't compatible. There are lots of examples of that in
>> mm selftests (just grep ksft_test_result_skip). mm selftests also has
>> run_vmtests.sh which does a lot of environment setup (e.g. reserving hugetlb
>> pages, etc) before actually running the tests.
> 
> Got it. I will send a fixup to skip the page cache split test when the mount
> is not ready, then send a separate patch to set up XFS in run_vmtests.sh and
> pass it to this test.

Thanks - appeciated!

Although I agree it might be a tall order create and mount an XFS fs in
run_vmtests.sh. Perhaps it might be good enough to add an optional param to the
test to pass a path when running the test manually, and if that's not provided,
just try to create a temp file in the current dir and skip if its not the right
sort of fs?

> 
>>
>>> Since I always assume
>>> selftests are just ran by human who can set up environment.
>>
>> I believe kernelci have been running mm skeftests on x86 for a long time. We
>> have started running them against arm64 on our CI for the last couple of months
>> and it had found a number of real issues in the kernel in -next, so this is
>> helping find and fix things early. So there is definitely benefit to keeping
>> these tests clean and robust.
> 
> Got it. Make sense.
> 
>>
>>> In addition, I do
>>> not think it is realistic to make the test file to set up all the environment,
>>> since everyone's machine is different. It is much easier to make the CI/CD
>>> environment to make the mount.
>>
>> That's reasonable, but then the requirements should be documented and you
>> probably would want to be able to optionally pass the mount on the command line.
> 
> Will do.
> 
> Thank you for the explanation.
> 
>>>>
>>>> Thanks,
>>>> Ryan
>>>>
>>>>>
>>>>> A bisect (full log below) identified this patch as introducing the
>>>>> failure. Bisected it on the tag "next-20240228" at repo
>>>>> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
>>>>>
>>>>> This works fine on  Linux version 6.8.0-rc6
>>>>>
>>>>>
>>>>> Sample log from failure against run on TX2:
>>>>> ------
>>>>> 07:17:34.056125  # # ------------------------------
>>>>> 07:17:34.056543  # # running ./split_huge_page_test
>>>>> 07:17:34.056839  # # ------------------------------
>>>>> 07:17:34.057114  # # TAP version 13
>>>>> 07:17:34.058564  # # 1..12
>>>>> 07:17:34.156822  # # ok 1 Split huge pages successful
>>>>> 07:17:34.214074  # # ok 2 Split PTE-mapped huge pages successful
>>>>> 07:17:34.215630  # # # Please enable pr_debug in
>>>>> split_huge_pages_in_file() for more info.
>>>>> 07:17:34.225503  # # # Please check dmesg for more information
>>>>> 07:17:34.225862  # # ok 3 File-backed THP split test done
>>>>> 07:17:34.236944  # # Bail out! Failed to create a file at /mnt/thp_fs#
>>>>> Planned tests != run tests (12 != 3)
>>>>> 07:17:34.237307  # # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0
>>>>> 07:17:34.237620  # # [FAIL]
>>>>> 07:17:34.246430  # not ok 51 split_huge_page_test # exit=1
>>>>>
>>>>>
>>>>> Bisect log:
>>>>> ------
>>>>> git bisect start
>>>>> # good: [d206a76d7d2726f3b096037f2079ce0bd3ba329b] Linux 6.8-rc6
>>>>> git bisect good d206a76d7d2726f3b096037f2079ce0bd3ba329b
>>>>> # bad: [20af1ca418d2c0b11bc2a1fe8c0c88f67bcc2a7e] Add linux-next
>>>>> specific files for 20240228
>>>>> git bisect bad 20af1ca418d2c0b11bc2a1fe8c0c88f67bcc2a7e
>>>>> # bad: [1322f1801e59dddce10591d602d246c1bf49990c] Merge branch 'main' of
>>>>> git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
>>>>> git bisect bad 1322f1801e59dddce10591d602d246c1bf49990c
>>>>> # bad: [a82f70041487790b7b09fe4bb45436e1b57021d3] Merge branch 'dev' of
>>>>> git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4.git
>>>>> git bisect bad a82f70041487790b7b09fe4bb45436e1b57021d3
>>>>> # bad: [ce90480b9352ba2bebe8946dad9223e3f24c6e9a] Merge branch
>>>>> 'for-next' of
>>>>> git://git.kernel.org/pub/scm/linux/kernel/git/tmlind/linux-omap.git
>>>>> git bisect bad ce90480b9352ba2bebe8946dad9223e3f24c6e9a
>>>>> # bad: [5daac92ed3881fd0c656478a301a4e1d124100ee] Merge branch
>>>>> 'mm-everything' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
>>>>> git bisect bad 5daac92ed3881fd0c656478a301a4e1d124100ee
>>>>> # good: [acc2643d9e988c63dd4629a9af380ad9ac69c54a] Merge branch
>>>>> 'mm-stable' into mm-unstable
>>>>> git bisect good acc2643d9e988c63dd4629a9af380ad9ac69c54a
>>>>> # good: [0294de8fe7d7c1a7eddc979cbf4c1886406e36b7] Merge branch 'fixes'
>>>>> of git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-power-supply.git
>>>>> git bisect good 0294de8fe7d7c1a7eddc979cbf4c1886406e36b7
>>>>> # good: [83e0c8f0e777a1ef0977b2f8189101765703b32d] Merge branch
>>>>> 'mm-nonmm-stable' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
>>>>> git bisect good 83e0c8f0e777a1ef0977b2f8189101765703b32d
>>>>> # good: [a739cbe236e0dd3b6ff26a01fa1d31c73d4fac93] mm: memcg: make memcg
>>>>> huge page split support any order split
>>>>> git bisect good a739cbe236e0dd3b6ff26a01fa1d31c73d4fac93
>>>>> # bad: [efb520aa333b2f11daaaaa13f4a598b5ae4ae823] mm: allow non-hugetlb
>>>>> large folios to be batch processed
>>>>> git bisect bad efb520aa333b2f11daaaaa13f4a598b5ae4ae823
>>>>> # bad: [2258bdebb55e3ad3d30fd3849ddb955ff36825de] mm/zsmalloc: don't
>>>>> hold locks of all pages when free_zspage()
>>>>> git bisect bad 2258bdebb55e3ad3d30fd3849ddb955ff36825de
>>>>> # bad: [7fc0be45acf2878cbacc4dba56923c34c3fd8b1e] mm: remove
>>>>> total_mapcount()
>>>>> git bisect bad 7fc0be45acf2878cbacc4dba56923c34c3fd8b1e
>>>>> # good: [d55fac55da2f87ad5a99178e107df09770bbc411] mm: thp: split huge
>>>>> page to any lower order pages
>>>>> git bisect good d55fac55da2f87ad5a99178e107df09770bbc411
>>>>> # bad: [4050d591c1aaf9336c08511fa5984827186e9ad1] mm/memfd: refactor
>>>>> memfd_tag_pins() and memfd_wait_for_pins()
>>>>> git bisect bad 4050d591c1aaf9336c08511fa5984827186e9ad1
>>>>> # bad: [c0ba89c29ef559c95273feb481b049f622c43c17] mm: huge_memory:
>>>>> enable debugfs to split huge pages to any order
>>>>> git bisect bad c0ba89c29ef559c95273feb481b049f622c43c17
>>>>> # first bad commit: [c0ba89c29ef559c95273feb481b049f622c43c17] mm:
>>>>> huge_memory: enable debugfs to split huge pages to any order
>>>>>
>>>>>
>>>>> Thanks,
>>>>> Aishwarya
>>>
>>>
>>> --
>>> Best Regards,
>>> Yan, Zi
> 
> 
> --
> Best Regards,
> Yan, Zi
Ryan Roberts March 1, 2024, 2:23 p.m. UTC | #11
On 01/03/2024 14:00, Zi Yan wrote:
> On 1 Mar 2024, at 4:51, Aishwarya TCV wrote:
> 
>> On 26/02/2024 20:55, Zi Yan wrote:
>>> From: Zi Yan <ziy@nvidia.com>
>>>
>>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>>> Also add test cases for split_huge_page_to_list_to_order via both
>>> debugfs.
>>>
>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>> ---
>>>  mm/huge_memory.c                              |  34 ++++--
>>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>>
>>
>> Hi Zi,
>>
>> When booting the kernel against next-master(20240228)with Arm64 on
>> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
>> is failing in our CI (with rootfs over NFS). I can send the full logs if
>> required.
>>
>> A bisect (full log below) identified this patch as introducing the
>> failure. Bisected it on the tag "next-20240228" at repo
>> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
>>
>> This works fine on  Linux version 6.8.0-rc6
> 
> Hi Aishwarya,
> 
> I am trying to fix the issue. When I am compiling selftests/mm, I encountered
> the error below when I run make under the folder. Am I missing any configuration?
> Since you are able to run the test, I assume you know what is happening. Thanks.

for what its worth, I usually compile from the top level directory with:

# make headers_install
# make -C tools/testing/selftests TARGETS=mm install INSTALL_PATH=~/kself

Perhaps the below is due to the headers not being exported properly. Bad things definitely happen if you omit the headers_install step.

> 
> vm_util.c: In function ‘__pagemap_scan_get_categories’:
> vm_util.c:34:28: error: storage size of ‘arg’ isn’t known
>    34 |         struct pm_scan_arg arg;
>       |                            ^~~
> vm_util.c:41:27: error: invalid application of ‘sizeof’ to incomplete type ‘struct pm_scan_arg’
>    41 |         arg.size = sizeof(struct pm_scan_arg);
>       |                           ^~~~~~
> vm_util.c:45:35: error: ‘PAGE_IS_WPALLOWED’ undeclared (first use in this function)
>    45 |         arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
>       |                                   ^~~~~~~~~~~~~~~~~
> vm_util.c:45:35: note: each undeclared identifier is reported only once for each function it appears in
> vm_util.c:45:55: error: ‘PAGE_IS_WRITTEN’ undeclared (first use in this function)
>    45 |         arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
>       |                                                       ^~~~~~~~~~~~~~~
> vm_util.c:45:73: error: ‘PAGE_IS_FILE’ undeclared (first use in this function)
>    45 |         arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
>       |                                                                         ^~~~~~~~~~~~
> vm_util.c:46:35: error: ‘PAGE_IS_PRESENT’ undeclared (first use in this function); did you mean ‘PAGEMAP_PRESENT’?
>    46 |                                   PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
>       |                                   ^~~~~~~~~~~~~~~
>       |                                   PAGEMAP_PRESENT
> vm_util.c:46:53: error: ‘PAGE_IS_SWAPPED’ undeclared (first use in this function)
>    46 |                                   PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
>       |                                                     ^~~~~~~~~~~~~~~
> vm_util.c:46:71: error: ‘PAGE_IS_PFNZERO’ undeclared (first use in this function)
>    46 |                                   PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
>       |                                                                       ^~~~~~~~~~~~~~~
> vm_util.c:47:35: error: ‘PAGE_IS_HUGE’ undeclared (first use in this function)
>    47 |                                   PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY;
>       |                                   ^~~~~~~~~~~~
> vm_util.c:47:50: error: ‘PAGE_IS_SOFT_DIRTY’ undeclared (first use in this function); did you mean ‘PM_SOFT_DIRTY’?
>    47 |                                   PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY;
>       |                                                  ^~~~~~~~~~~~~~~~~~
>       |                                                  PM_SOFT_DIRTY
> vm_util.c:50:26: error: ‘PAGEMAP_SCAN’ undeclared (first use in this function); did you mean ‘PAGEMAP_PFN’?
>    50 |         return ioctl(fd, PAGEMAP_SCAN, &arg);
>       |                          ^~~~~~~~~~~~
>       |                          PAGEMAP_PFN
> 
> --
> Best Regards,
> Yan, Zi
Mark Brown March 1, 2024, 2:24 p.m. UTC | #12
On Fri, Mar 01, 2024 at 01:09:04PM +0000, Ryan Roberts wrote:
> On 01/03/2024 12:52, Zi Yan wrote:

> > I can do that. But is this a new requirement that self tests have to be finish
> > in CI/CD environment? Can you provide a guideline for it? 

> I'm not sure what's written down, but certainly anyone should be able to run the
> selftests with as little knowledge as possible, and they should only fail if
> they detect a real problem. By convention a test should be skipped if the

It does get mentioned in talks and things but TBH I think it's just
generally a good practice thing.

> > Since I always assume
> > selftests are just ran by human who can set up environment. 

> I believe kernelci have been running mm skeftests on x86 for a long time. We
> have started running them against arm64 on our CI for the last couple of months
> and it had found a number of real issues in the kernel in -next, so this is
> helping find and fix things early. So there is definitely benefit to keeping
> these tests clean and robust.

They're also being routinely run on at least some platforms by LKFT
(though from a quick check it seems not with the magic command line
options to set up secretmem and huge pages) and CKI, possibly also some
of the other CI systems I'm less aware of.
Mark Brown March 1, 2024, 2:27 p.m. UTC | #13
On Fri, Mar 01, 2024 at 02:18:16PM +0000, Ryan Roberts wrote:

> Although I agree it might be a tall order create and mount an XFS fs in
> run_vmtests.sh. Perhaps it might be good enough to add an optional param to the
> test to pass a path when running the test manually, and if that's not provided,
> just try to create a temp file in the current dir and skip if its not the right
> sort of fs?

Yeah, if it needs to be a specific kind of on disk filesystem then that
needs a lot more integration with CI systems (a lot of them run entirely
from nfsroot by default!).  Being able to specify the location via an
environment variable would also be good, it could fall back to the
current directory if one isn't set up.
Zi Yan March 1, 2024, 2:33 p.m. UTC | #14
On 1 Mar 2024, at 9:23, Ryan Roberts wrote:

> On 01/03/2024 14:00, Zi Yan wrote:
>> On 1 Mar 2024, at 4:51, Aishwarya TCV wrote:
>>
>>> On 26/02/2024 20:55, Zi Yan wrote:
>>>> From: Zi Yan <ziy@nvidia.com>
>>>>
>>>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>>>> Also add test cases for split_huge_page_to_list_to_order via both
>>>> debugfs.
>>>>
>>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>>> ---
>>>>  mm/huge_memory.c                              |  34 ++++--
>>>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>>>
>>>
>>> Hi Zi,
>>>
>>> When booting the kernel against next-master(20240228)with Arm64 on
>>> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
>>> is failing in our CI (with rootfs over NFS). I can send the full logs if
>>> required.
>>>
>>> A bisect (full log below) identified this patch as introducing the
>>> failure. Bisected it on the tag "next-20240228" at repo
>>> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
>>>
>>> This works fine on  Linux version 6.8.0-rc6
>>
>> Hi Aishwarya,
>>
>> I am trying to fix the issue. When I am compiling selftests/mm, I encountered
>> the error below when I run make under the folder. Am I missing any configuration?
>> Since you are able to run the test, I assume you know what is happening. Thanks.
>
> for what its worth, I usually compile from the top level directory with:
>
> # make headers_install
> # make -C tools/testing/selftests TARGETS=mm install INSTALL_PATH=~/kself
>
> Perhaps the below is due to the headers not being exported properly. Bad things definitely happen if you omit the headers_install step.

It works. Thanks a lot!

>>
>> vm_util.c: In function ‘__pagemap_scan_get_categories’:
>> vm_util.c:34:28: error: storage size of ‘arg’ isn’t known
>>    34 |         struct pm_scan_arg arg;
>>       |                            ^~~
>> vm_util.c:41:27: error: invalid application of ‘sizeof’ to incomplete type ‘struct pm_scan_arg’
>>    41 |         arg.size = sizeof(struct pm_scan_arg);
>>       |                           ^~~~~~
>> vm_util.c:45:35: error: ‘PAGE_IS_WPALLOWED’ undeclared (first use in this function)
>>    45 |         arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
>>       |                                   ^~~~~~~~~~~~~~~~~
>> vm_util.c:45:35: note: each undeclared identifier is reported only once for each function it appears in
>> vm_util.c:45:55: error: ‘PAGE_IS_WRITTEN’ undeclared (first use in this function)
>>    45 |         arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
>>       |                                                       ^~~~~~~~~~~~~~~
>> vm_util.c:45:73: error: ‘PAGE_IS_FILE’ undeclared (first use in this function)
>>    45 |         arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
>>       |                                                                         ^~~~~~~~~~~~
>> vm_util.c:46:35: error: ‘PAGE_IS_PRESENT’ undeclared (first use in this function); did you mean ‘PAGEMAP_PRESENT’?
>>    46 |                                   PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
>>       |                                   ^~~~~~~~~~~~~~~
>>       |                                   PAGEMAP_PRESENT
>> vm_util.c:46:53: error: ‘PAGE_IS_SWAPPED’ undeclared (first use in this function)
>>    46 |                                   PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
>>       |                                                     ^~~~~~~~~~~~~~~
>> vm_util.c:46:71: error: ‘PAGE_IS_PFNZERO’ undeclared (first use in this function)
>>    46 |                                   PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
>>       |                                                                       ^~~~~~~~~~~~~~~
>> vm_util.c:47:35: error: ‘PAGE_IS_HUGE’ undeclared (first use in this function)
>>    47 |                                   PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY;
>>       |                                   ^~~~~~~~~~~~
>> vm_util.c:47:50: error: ‘PAGE_IS_SOFT_DIRTY’ undeclared (first use in this function); did you mean ‘PM_SOFT_DIRTY’?
>>    47 |                                   PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY;
>>       |                                                  ^~~~~~~~~~~~~~~~~~
>>       |                                                  PM_SOFT_DIRTY
>> vm_util.c:50:26: error: ‘PAGEMAP_SCAN’ undeclared (first use in this function); did you mean ‘PAGEMAP_PFN’?
>>    50 |         return ioctl(fd, PAGEMAP_SCAN, &arg);
>>       |                          ^~~~~~~~~~~~
>>       |                          PAGEMAP_PFN
>>
>> --
>> Best Regards,
>> Yan, Zi


--
Best Regards,
Yan, Zi
Zi Yan March 1, 2024, 3:21 p.m. UTC | #15
On 1 Mar 2024, at 9:27, Mark Brown wrote:

> On Fri, Mar 01, 2024 at 02:18:16PM +0000, Ryan Roberts wrote:
>
>> Although I agree it might be a tall order create and mount an XFS fs in
>> run_vmtests.sh. Perhaps it might be good enough to add an optional param to the
>> test to pass a path when running the test manually, and if that's not provided,
>> just try to create a temp file in the current dir and skip if its not the right
>> sort of fs?
>
> Yeah, if it needs to be a specific kind of on disk filesystem then that
> needs a lot more integration with CI systems (a lot of them run entirely
> from nfsroot by default!).  Being able to specify the location via an
> environment variable would also be good, it could fall back to the
> current directory if one isn't set up.

Sure. lkp creates a XFS image and mount it. I will give it a try. If it is too
hard to do, I will do what Ryan suggested above.

--
Best Regards,
Yan, Zi
Zi Yan March 1, 2024, 7:37 p.m. UTC | #16
On 1 Mar 2024, at 4:51, Aishwarya TCV wrote:

> On 26/02/2024 20:55, Zi Yan wrote:
>> From: Zi Yan <ziy@nvidia.com>
>>
>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>> Also add test cases for split_huge_page_to_list_to_order via both
>> debugfs.
>>
>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>> ---
>>  mm/huge_memory.c                              |  34 ++++--
>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>
>
> Hi Zi,
>
> When booting the kernel against next-master(20240228)with Arm64 on
> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
> is failing in our CI (with rootfs over NFS). I can send the full logs if
> required.
>
> A bisect (full log below) identified this patch as introducing the
> failure. Bisected it on the tag "next-20240228" at repo
> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
>
> This works fine on  Linux version 6.8.0-rc6

Hi Aishwarya,

Can you try the attached patch and see if it fixes the failure? I changed
the test to accept XFS dev as input, mount XFS on a temp folder under /tmp,
and skip if no XFS is mounted.

Thanks.

--
Best Regards,
Yan, Zi
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index cf09fdc9ef22..047883473b84 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -26,7 +26,6 @@ uint64_t pmd_pagesize;
 
 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
 #define SMAP_PATH "/proc/self/smaps"
-#define THP_FS_PATH "/mnt/thp_fs"
 #define INPUT_MAX 80
 
 #define PID_FMT "%d,0x%lx,0x%lx,%d"
@@ -268,7 +267,45 @@ void split_file_backed_thp(void)
 	ksft_exit_fail_msg("Error occurred\n");
 }
 
-void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, char **addr)
+bool prepare_thp_fs(const char *xfs_dev_loc, char *thp_fs_template,
+		const char **thp_fs_loc)
+{
+	bool mounted = false;
+
+	*thp_fs_loc = mkdtemp(thp_fs_template);
+
+	if (!*thp_fs_loc)
+		ksft_exit_fail_msg("cannot create temp folder\n");
+
+	if (xfs_dev_loc) {
+		int status = mount(xfs_dev_loc, *thp_fs_loc, "xfs", 0, NULL);
+
+		if (status)
+			ksft_exit_fail_msg("Unable to mount xfs for testing\n");
+		mounted = true;
+	}
+	return mounted;
+}
+
+void cleanup_thp_fs(const char *thp_fs_loc, bool mounted)
+{
+	int status;
+
+	if (mounted) {
+		status = umount(thp_fs_loc);
+		if (status)
+			ksft_exit_fail_msg("Unable to umount %s\n",
+					   thp_fs_loc);
+	}
+
+	status = rmdir(thp_fs_loc);
+	if (status)
+		ksft_exit_fail_msg("cannot remove tmp dir: %s\n",
+				   strerror(errno));
+}
+
+int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
+		char **addr)
 {
 	size_t i;
 	int dummy;
@@ -277,7 +314,7 @@ void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
 
 	*fd = open(testfile, O_CREAT | O_RDWR, 0664);
 	if (*fd == -1)
-		ksft_exit_fail_msg("Failed to create a file at "THP_FS_PATH);
+		ksft_exit_fail_msg("Failed to create a file at %s\n", testfile);
 
 	for (i = 0; i < fd_size; i++) {
 		unsigned char byte = (unsigned char)i;
@@ -299,7 +336,7 @@ void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
 
 	*fd = open(testfile, O_RDWR);
 	if (*fd == -1) {
-		ksft_perror("Failed to open a file at "THP_FS_PATH);
+		ksft_perror("Failed to open testfile\n");
 		goto err_out_unlink;
 	}
 
@@ -314,26 +351,37 @@ void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
 		dummy += *(*addr + i);
 
 	if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) {
-		ksft_print_msg("No large pagecache folio generated, please mount a filesystem supporting large folio at "THP_FS_PATH"\n");
-		goto err_out_close;
+		ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n");
+		unlink(testfile);
+		ksft_test_result_skip("Pagecache folio split skipped\n");
+		return -2;
 	}
-	return;
+	return 0;
 err_out_close:
 	close(*fd);
 err_out_unlink:
 	unlink(testfile);
 	ksft_exit_fail_msg("Failed to create large pagecache folios\n");
+	return -1;
 }
 
-void split_thp_in_pagecache_to_order(size_t fd_size, int order)
+void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_loc)
 {
 	int fd;
 	char *addr;
 	size_t i;
-	const char testfile[] = THP_FS_PATH "/test";
+	char testfile[INPUT_MAX];
 	int err = 0;
 
-	create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+	err = snprintf(testfile, INPUT_MAX, "%s/test", fs_loc);
+
+	if (err < 0)
+		ksft_exit_fail_msg("cannot generate right test file name\n");
+
+	err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+	if (err)
+		return;
+	err = 0;
 
 	write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order);
 
@@ -351,6 +399,7 @@ void split_thp_in_pagecache_to_order(size_t fd_size, int order)
 	}
 
 out:
+	munmap(addr, fd_size);
 	close(fd);
 	unlink(testfile);
 	if (err)
@@ -360,8 +409,11 @@ void split_thp_in_pagecache_to_order(size_t fd_size, int order)
 
 int main(int argc, char **argv)
 {
-	int i;
+	int i, mounted;
 	size_t fd_size;
+	char *optional_xfs_dev_loc = NULL;
+	char fs_loc_template[] = "/tmp/thp_fs_XXXXXX";
+	const char *fs_loc;
 
 	ksft_print_header();
 
@@ -370,6 +422,9 @@ int main(int argc, char **argv)
 		ksft_finished();
 	}
 
+	if (argc > 1)
+		optional_xfs_dev_loc = argv[1];
+
 	ksft_set_plan(3+9);
 
 	pagesize = getpagesize();
@@ -384,8 +439,11 @@ int main(int argc, char **argv)
 	split_pte_mapped_thp();
 	split_file_backed_thp();
 
+	mounted = prepare_thp_fs(optional_xfs_dev_loc, fs_loc_template,
+				 &fs_loc);
 	for (i = 8; i >= 0; i--)
-		split_thp_in_pagecache_to_order(fd_size, i);
+		split_thp_in_pagecache_to_order(fd_size, i, fs_loc);
+	cleanup_thp_fs(fs_loc, mounted);
 
 	ksft_finished();
Zi Yan March 1, 2024, 7:41 p.m. UTC | #17
On 1 Mar 2024, at 9:27, Mark Brown wrote:

> On Fri, Mar 01, 2024 at 02:18:16PM +0000, Ryan Roberts wrote:
>
>> Although I agree it might be a tall order create and mount an XFS fs in
>> run_vmtests.sh. Perhaps it might be good enough to add an optional param to the
>> test to pass a path when running the test manually, and if that's not provided,
>> just try to create a temp file in the current dir and skip if its not the right
>> sort of fs?
>
> Yeah, if it needs to be a specific kind of on disk filesystem then that
> needs a lot more integration with CI systems (a lot of them run entirely
> from nfsroot by default!).  Being able to specify the location via an
> environment variable would also be good, it could fall back to the
> current directory if one isn't set up.
Hi Mark,

I have changed the test[1] to:
1. accept XFS dev as an input,
2. mount XFS on a temp folder,
3. skip the test instead of fail if no XFS is mounted.

Let me know if the change looks good. Thanks.

[1] https://lore.kernel.org/linux-mm/A111EB95-0AF5-4715-82A4-70B8AD900A93@nvidia.com/T/#u

--
Best Regards,
Yan, Zi
Zi Yan March 1, 2024, 8:02 p.m. UTC | #18
On 1 Mar 2024, at 14:37, Zi Yan wrote:

> On 1 Mar 2024, at 4:51, Aishwarya TCV wrote:
>
>> On 26/02/2024 20:55, Zi Yan wrote:
>>> From: Zi Yan <ziy@nvidia.com>
>>>
>>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>>> Also add test cases for split_huge_page_to_list_to_order via both
>>> debugfs.
>>>
>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>> ---
>>>  mm/huge_memory.c                              |  34 ++++--
>>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>>
>>
>> Hi Zi,
>>
>> When booting the kernel against next-master(20240228)with Arm64 on
>> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
>> is failing in our CI (with rootfs over NFS). I can send the full logs if
>> required.
>>
>> A bisect (full log below) identified this patch as introducing the
>> failure. Bisected it on the tag "next-20240228" at repo
>> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
>>
>> This works fine on  Linux version 6.8.0-rc6
>
> Hi Aishwarya,
>
> Can you try the attached patch and see if it fixes the failure? I changed
> the test to accept XFS dev as input, mount XFS on a temp folder under /tmp,
> and skip if no XFS is mounted.

Please try this updated one. It allows you to specify a XFS device path
in SPLIT_HUGE_PAGE_TEST_XFS_PATH env variable, which is passed to
split_huge_page_test in run_vmtests.sh. It at least allow CI/CD to run
the test without too much change.

--
Best Regards,
Yan, Zi
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index fe140a9f4f9d..22e45207cf6b 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -412,7 +412,7 @@ CATEGORY="thp" run_test ./khugepaged -s 2
 
 CATEGORY="thp" run_test ./transhuge-stress -d 20
 
-CATEGORY="thp" run_test ./split_huge_page_test
+CATEGORY="thp" run_test ./split_huge_page_test ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
 
 CATEGORY="migration" run_test ./migration
 
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index cf09fdc9ef22..047883473b84 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -26,7 +26,6 @@ uint64_t pmd_pagesize;
 
 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
 #define SMAP_PATH "/proc/self/smaps"
-#define THP_FS_PATH "/mnt/thp_fs"
 #define INPUT_MAX 80
 
 #define PID_FMT "%d,0x%lx,0x%lx,%d"
@@ -268,7 +267,45 @@ void split_file_backed_thp(void)
 	ksft_exit_fail_msg("Error occurred\n");
 }
 
-void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, char **addr)
+bool prepare_thp_fs(const char *xfs_dev_loc, char *thp_fs_template,
+		const char **thp_fs_loc)
+{
+	bool mounted = false;
+
+	*thp_fs_loc = mkdtemp(thp_fs_template);
+
+	if (!*thp_fs_loc)
+		ksft_exit_fail_msg("cannot create temp folder\n");
+
+	if (xfs_dev_loc) {
+		int status = mount(xfs_dev_loc, *thp_fs_loc, "xfs", 0, NULL);
+
+		if (status)
+			ksft_exit_fail_msg("Unable to mount xfs for testing\n");
+		mounted = true;
+	}
+	return mounted;
+}
+
+void cleanup_thp_fs(const char *thp_fs_loc, bool mounted)
+{
+	int status;
+
+	if (mounted) {
+		status = umount(thp_fs_loc);
+		if (status)
+			ksft_exit_fail_msg("Unable to umount %s\n",
+					   thp_fs_loc);
+	}
+
+	status = rmdir(thp_fs_loc);
+	if (status)
+		ksft_exit_fail_msg("cannot remove tmp dir: %s\n",
+				   strerror(errno));
+}
+
+int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
+		char **addr)
 {
 	size_t i;
 	int dummy;
@@ -277,7 +314,7 @@ void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
 
 	*fd = open(testfile, O_CREAT | O_RDWR, 0664);
 	if (*fd == -1)
-		ksft_exit_fail_msg("Failed to create a file at "THP_FS_PATH);
+		ksft_exit_fail_msg("Failed to create a file at %s\n", testfile);
 
 	for (i = 0; i < fd_size; i++) {
 		unsigned char byte = (unsigned char)i;
@@ -299,7 +336,7 @@ void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
 
 	*fd = open(testfile, O_RDWR);
 	if (*fd == -1) {
-		ksft_perror("Failed to open a file at "THP_FS_PATH);
+		ksft_perror("Failed to open testfile\n");
 		goto err_out_unlink;
 	}
 
@@ -314,26 +351,37 @@ void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
 		dummy += *(*addr + i);
 
 	if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) {
-		ksft_print_msg("No large pagecache folio generated, please mount a filesystem supporting large folio at "THP_FS_PATH"\n");
-		goto err_out_close;
+		ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n");
+		unlink(testfile);
+		ksft_test_result_skip("Pagecache folio split skipped\n");
+		return -2;
 	}
-	return;
+	return 0;
 err_out_close:
 	close(*fd);
 err_out_unlink:
 	unlink(testfile);
 	ksft_exit_fail_msg("Failed to create large pagecache folios\n");
+	return -1;
 }
 
-void split_thp_in_pagecache_to_order(size_t fd_size, int order)
+void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_loc)
 {
 	int fd;
 	char *addr;
 	size_t i;
-	const char testfile[] = THP_FS_PATH "/test";
+	char testfile[INPUT_MAX];
 	int err = 0;
 
-	create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+	err = snprintf(testfile, INPUT_MAX, "%s/test", fs_loc);
+
+	if (err < 0)
+		ksft_exit_fail_msg("cannot generate right test file name\n");
+
+	err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+	if (err)
+		return;
+	err = 0;
 
 	write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order);
 
@@ -351,6 +399,7 @@ void split_thp_in_pagecache_to_order(size_t fd_size, int order)
 	}
 
 out:
+	munmap(addr, fd_size);
 	close(fd);
 	unlink(testfile);
 	if (err)
@@ -360,8 +409,11 @@ void split_thp_in_pagecache_to_order(size_t fd_size, int order)
 
 int main(int argc, char **argv)
 {
-	int i;
+	int i, mounted;
 	size_t fd_size;
+	char *optional_xfs_dev_loc = NULL;
+	char fs_loc_template[] = "/tmp/thp_fs_XXXXXX";
+	const char *fs_loc;
 
 	ksft_print_header();
 
@@ -370,6 +422,9 @@ int main(int argc, char **argv)
 		ksft_finished();
 	}
 
+	if (argc > 1)
+		optional_xfs_dev_loc = argv[1];
+
 	ksft_set_plan(3+9);
 
 	pagesize = getpagesize();
@@ -384,8 +439,11 @@ int main(int argc, char **argv)
 	split_pte_mapped_thp();
 	split_file_backed_thp();
 
+	mounted = prepare_thp_fs(optional_xfs_dev_loc, fs_loc_template,
+				 &fs_loc);
 	for (i = 8; i >= 0; i--)
-		split_thp_in_pagecache_to_order(fd_size, i);
+		split_thp_in_pagecache_to_order(fd_size, i, fs_loc);
+	cleanup_thp_fs(fs_loc, mounted);
 
 	ksft_finished();
Zi Yan March 1, 2024, 9:10 p.m. UTC | #19
On 1 Mar 2024, at 15:02, Zi Yan wrote:

> On 1 Mar 2024, at 14:37, Zi Yan wrote:
>
>> On 1 Mar 2024, at 4:51, Aishwarya TCV wrote:
>>
>>> On 26/02/2024 20:55, Zi Yan wrote:
>>>> From: Zi Yan <ziy@nvidia.com>
>>>>
>>>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>>>> Also add test cases for split_huge_page_to_list_to_order via both
>>>> debugfs.
>>>>
>>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>>> ---
>>>>  mm/huge_memory.c                              |  34 ++++--
>>>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>>>
>>>
>>> Hi Zi,
>>>
>>> When booting the kernel against next-master(20240228)with Arm64 on
>>> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
>>> is failing in our CI (with rootfs over NFS). I can send the full logs if
>>> required.
>>>
>>> A bisect (full log below) identified this patch as introducing the
>>> failure. Bisected it on the tag "next-20240228" at repo
>>> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
>>>
>>> This works fine on  Linux version 6.8.0-rc6
>>
>> Hi Aishwarya,
>>
>> Can you try the attached patch and see if it fixes the failure? I changed
>> the test to accept XFS dev as input, mount XFS on a temp folder under /tmp,
>> and skip if no XFS is mounted.
>
> Please try this updated one. It allows you to specify a XFS device path
> in SPLIT_HUGE_PAGE_TEST_XFS_PATH env variable, which is passed to
> split_huge_page_test in run_vmtests.sh. It at least allow CI/CD to run
> the test without too much change.

OK. This hopefully will be my last churn. Now split_huge_page_test accepts
a path that is backed by XFS and run_vmtest.sh creates a XFS image in /tmp,
mounts it in /tmp, and gives the path to split_huge_page_test. I tested
it locally and it works. Let me know if you have any issue. Thanks.

--
Best Regards,
Yan, Zi
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index fe140a9f4f9d..ffdec5dc0b03 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -412,7 +412,27 @@ CATEGORY="thp" run_test ./khugepaged -s 2
 
 CATEGORY="thp" run_test ./transhuge-stress -d 20
 
-CATEGORY="thp" run_test ./split_huge_page_test
+# Try to create XFS if not provided
+if [ -z "${SPLIT_HUGE_PAGE_TEST_XFS_PATH}" ]; then
+    if test_selected "thp"; then
+        if grep xfs /proc/filesystems &>/dev/null; then
+            XFS_IMG=$(mktemp /tmp/xfs_img_XXXXXX)
+            SPLIT_HUGE_PAGE_TEST_XFS_PATH=$(mktemp -d /tmp/xfs_dir_XXXXXX)
+            truncate -s 314572800 ${XFS_IMG}
+            mkfs.xfs -q ${XFS_IMG}
+            mount -o loop ${XFS_IMG} ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+            MOUNTED_XFS=1
+        fi
+    fi
+fi
+
+CATEGORY="thp" run_test ./split_huge_page_test ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+
+if [ -n "${MOUNTED_XFS}" ]; then
+    umount ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+    rmdir ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+    rm -f ${XFS_IMG}
+fi
 
 CATEGORY="migration" run_test ./migration
 
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index cf09fdc9ef22..0b367affaa0d 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -26,7 +26,6 @@ uint64_t pmd_pagesize;
 
 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
 #define SMAP_PATH "/proc/self/smaps"
-#define THP_FS_PATH "/mnt/thp_fs"
 #define INPUT_MAX 80
 
 #define PID_FMT "%d,0x%lx,0x%lx,%d"
@@ -268,7 +267,37 @@ void split_file_backed_thp(void)
 	ksft_exit_fail_msg("Error occurred\n");
 }
 
-void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, char **addr)
+bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template,
+		const char **thp_fs_loc)
+{
+	if (xfs_path) {
+		*thp_fs_loc = xfs_path;
+		return false;
+	}
+
+	*thp_fs_loc = mkdtemp(thp_fs_template);
+
+	if (!*thp_fs_loc)
+		ksft_exit_fail_msg("cannot create temp folder\n");
+
+	return true;
+}
+
+void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp)
+{
+	int status;
+
+	if (!created_tmp)
+		return;
+
+	status = rmdir(thp_fs_loc);
+	if (status)
+		ksft_exit_fail_msg("cannot remove tmp dir: %s\n",
+				   strerror(errno));
+}
+
+int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
+		char **addr)
 {
 	size_t i;
 	int dummy;
@@ -277,7 +306,7 @@ void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
 
 	*fd = open(testfile, O_CREAT | O_RDWR, 0664);
 	if (*fd == -1)
-		ksft_exit_fail_msg("Failed to create a file at "THP_FS_PATH);
+		ksft_exit_fail_msg("Failed to create a file at %s\n", testfile);
 
 	for (i = 0; i < fd_size; i++) {
 		unsigned char byte = (unsigned char)i;
@@ -299,7 +328,7 @@ void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
 
 	*fd = open(testfile, O_RDWR);
 	if (*fd == -1) {
-		ksft_perror("Failed to open a file at "THP_FS_PATH);
+		ksft_perror("Failed to open testfile\n");
 		goto err_out_unlink;
 	}
 
@@ -314,26 +343,37 @@ void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
 		dummy += *(*addr + i);
 
 	if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) {
-		ksft_print_msg("No large pagecache folio generated, please mount a filesystem supporting large folio at "THP_FS_PATH"\n");
-		goto err_out_close;
+		ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n");
+		unlink(testfile);
+		ksft_test_result_skip("Pagecache folio split skipped\n");
+		return -2;
 	}
-	return;
+	return 0;
 err_out_close:
 	close(*fd);
 err_out_unlink:
 	unlink(testfile);
 	ksft_exit_fail_msg("Failed to create large pagecache folios\n");
+	return -1;
 }
 
-void split_thp_in_pagecache_to_order(size_t fd_size, int order)
+void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_loc)
 {
 	int fd;
 	char *addr;
 	size_t i;
-	const char testfile[] = THP_FS_PATH "/test";
+	char testfile[INPUT_MAX];
 	int err = 0;
 
-	create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+	err = snprintf(testfile, INPUT_MAX, "%s/test", fs_loc);
+
+	if (err < 0)
+		ksft_exit_fail_msg("cannot generate right test file name\n");
+
+	err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+	if (err)
+		return;
+	err = 0;
 
 	write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order);
 
@@ -351,6 +391,7 @@ void split_thp_in_pagecache_to_order(size_t fd_size, int order)
 	}
 
 out:
+	munmap(addr, fd_size);
 	close(fd);
 	unlink(testfile);
 	if (err)
@@ -362,6 +403,10 @@ int main(int argc, char **argv)
 {
 	int i;
 	size_t fd_size;
+	char *optional_xfs_path = NULL;
+	char fs_loc_template[] = "/tmp/thp_fs_XXXXXX";
+	const char *fs_loc;
+	bool created_tmp;
 
 	ksft_print_header();
 
@@ -370,6 +415,9 @@ int main(int argc, char **argv)
 		ksft_finished();
 	}
 
+	if (argc > 1)
+		optional_xfs_path = argv[1];
+
 	ksft_set_plan(3+9);
 
 	pagesize = getpagesize();
@@ -384,8 +432,11 @@ int main(int argc, char **argv)
 	split_pte_mapped_thp();
 	split_file_backed_thp();
 
+	created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template,
+			&fs_loc);
 	for (i = 8; i >= 0; i--)
-		split_thp_in_pagecache_to_order(fd_size, i);
+		split_thp_in_pagecache_to_order(fd_size, i, fs_loc);
+	cleanup_thp_fs(fs_loc, created_tmp);
 
 	ksft_finished();
Aishwarya TCV March 4, 2024, 9:50 a.m. UTC | #20
On 01/03/2024 21:10, Zi Yan wrote:
> On 1 Mar 2024, at 15:02, Zi Yan wrote:
> 
>> On 1 Mar 2024, at 14:37, Zi Yan wrote:
>>
>>> On 1 Mar 2024, at 4:51, Aishwarya TCV wrote:
>>>
>>>> On 26/02/2024 20:55, Zi Yan wrote:
>>>>> From: Zi Yan <ziy@nvidia.com>
>>>>>
>>>>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>>>>> Also add test cases for split_huge_page_to_list_to_order via both
>>>>> debugfs.
>>>>>
>>>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>>>> ---
>>>>>  mm/huge_memory.c                              |  34 ++++--
>>>>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>>>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>>>>
>>>>
>>>> Hi Zi,
>>>>
>>>> When booting the kernel against next-master(20240228)with Arm64 on
>>>> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
>>>> is failing in our CI (with rootfs over NFS). I can send the full logs if
>>>> required.
>>>>
>>>> A bisect (full log below) identified this patch as introducing the
>>>> failure. Bisected it on the tag "next-20240228" at repo
>>>> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
>>>>
>>>> This works fine on  Linux version 6.8.0-rc6
>>>
>>> Hi Aishwarya,
>>>
>>> Can you try the attached patch and see if it fixes the failure? I changed
>>> the test to accept XFS dev as input, mount XFS on a temp folder under /tmp,
>>> and skip if no XFS is mounted.
>>
>> Please try this updated one. It allows you to specify a XFS device path
>> in SPLIT_HUGE_PAGE_TEST_XFS_PATH env variable, which is passed to
>> split_huge_page_test in run_vmtests.sh. It at least allow CI/CD to run
>> the test without too much change.
> 
> OK. This hopefully will be my last churn. Now split_huge_page_test accepts
> a path that is backed by XFS and run_vmtest.sh creates a XFS image in /tmp,
> mounts it in /tmp, and gives the path to split_huge_page_test. I tested
> it locally and it works. Let me know if you have any issue. Thanks.
> 
> --
> Best Regards,
> Yan, Zi

Hi Zi,

Tested the patch by applying it on next-20240304. Logs from our CI with
rootfs over nfs is attached below. "Bail out! cannot remove tmp dir:
Directory not empty" is still observed.


Test run log:
# # ------------------------------
# # running ./split_huge_page_test
# # ------------------------------
# # TAP version 13
# # 1..12
# # ok 1 Split huge pages successful
# # ok 2 Split PTE-mapped huge pages successful
# # # Please enable pr_debug in split_huge_pages_in_file() for more info.
# # # Please check dmesg for more information
# # ok 3 File-backed THP split test done
<6>[  639.821657] split_huge_page (111099): drop_caches: 3
<6>[  639.821657] split_huge_page (111099): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 4 # SKIP Pagecache folio split skipped
<6>[  645.392184] split_huge_page (111099): drop_caches: 3
<6>[  645.392184] split_huge_page (111099): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 5 # SKIP Pagecache folio split skipped
<6>[  650.938248] split_huge_page (111099): drop_caches: 3
<6>[  650.938248] split_huge_page (111099): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 6 # SKIP Pagecache folio split skipped
<6>[  656.500149] split_huge_page (111099): drop_caches: 3
<6>[  656.500149] split_huge_page (111099): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 7 # SKIP Pagecache folio split skipped
<6>[  662.044085] split_huge_page (111099): drop_caches: 3
<6>[  662.044085] split_huge_page (111099): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 8 # SKIP Pagecache folio split skipped
<6>[  667.591841] split_huge_page (111099): drop_caches: 3
<6>[  667.591841] split_huge_page (111099): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 9 # SKIP Pagecache folio split skipped
<6>[  673.172441] split_huge_page (111099): drop_caches: 3
<6>[  673.172441] split_huge_page (111099): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 10 # SKIP Pagecache folio split skipped
<6>[  678.726263] split_huge_page (111099): drop_caches: 3
<6>[  678.726263] split_huge_page (111099): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 11 # SKIP Pagecache folio split skipped
<6>[  684.272851] split_huge_page (111099): drop_caches: 3
<6>[  684.272851] split_huge_page (111099): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 12 # SKIP Pagecache folio split skipped
# # Bail out! cannot remove tmp dir: Directory not empty
# # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:9 error:0
# # [FAIL]
# not ok 51 split_huge_page_test # exit=1
# # ------------------

Thanks,
Aishwarya
Zi Yan March 4, 2024, 2:58 p.m. UTC | #21
On 4 Mar 2024, at 4:50, Aishwarya TCV wrote:

> On 01/03/2024 21:10, Zi Yan wrote:
>> On 1 Mar 2024, at 15:02, Zi Yan wrote:
>>
>>> On 1 Mar 2024, at 14:37, Zi Yan wrote:
>>>
>>>> On 1 Mar 2024, at 4:51, Aishwarya TCV wrote:
>>>>
>>>>> On 26/02/2024 20:55, Zi Yan wrote:
>>>>>> From: Zi Yan <ziy@nvidia.com>
>>>>>>
>>>>>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>>>>>> Also add test cases for split_huge_page_to_list_to_order via both
>>>>>> debugfs.
>>>>>>
>>>>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>>>>> ---
>>>>>>  mm/huge_memory.c                              |  34 ++++--
>>>>>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>>>>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>>>>>
>>>>>
>>>>> Hi Zi,
>>>>>
>>>>> When booting the kernel against next-master(20240228)with Arm64 on
>>>>> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
>>>>> is failing in our CI (with rootfs over NFS). I can send the full logs if
>>>>> required.
>>>>>
>>>>> A bisect (full log below) identified this patch as introducing the
>>>>> failure. Bisected it on the tag "next-20240228" at repo
>>>>> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
>>>>>
>>>>> This works fine on  Linux version 6.8.0-rc6
>>>>
>>>> Hi Aishwarya,
>>>>
>>>> Can you try the attached patch and see if it fixes the failure? I changed
>>>> the test to accept XFS dev as input, mount XFS on a temp folder under /tmp,
>>>> and skip if no XFS is mounted.
>>>
>>> Please try this updated one. It allows you to specify a XFS device path
>>> in SPLIT_HUGE_PAGE_TEST_XFS_PATH env variable, which is passed to
>>> split_huge_page_test in run_vmtests.sh. It at least allow CI/CD to run
>>> the test without too much change.
>>
>> OK. This hopefully will be my last churn. Now split_huge_page_test accepts
>> a path that is backed by XFS and run_vmtest.sh creates a XFS image in /tmp,
>> mounts it in /tmp, and gives the path to split_huge_page_test. I tested
>> it locally and it works. Let me know if you have any issue. Thanks.
>>
>> --
>> Best Regards,
>> Yan, Zi
>
> Hi Zi,
>
> Tested the patch by applying it on next-20240304. Logs from our CI with
> rootfs over nfs is attached below. "Bail out! cannot remove tmp dir:
> Directory not empty" is still observed.

Hi Aishwarya,

Do you have the config file for the CI kernel? And /tmp is also on nfs?
Any detailed information about CI machine environment? I cannot reproduce
the error locally, either on bare metal or VM. Maybe because my /tmp is
not NFS mounted?

>
>
> Test run log:
> # # ------------------------------
> # # running ./split_huge_page_test
> # # ------------------------------
> # # TAP version 13
> # # 1..12
> # # ok 1 Split huge pages successful
> # # ok 2 Split PTE-mapped huge pages successful
> # # # Please enable pr_debug in split_huge_pages_in_file() for more info.
> # # # Please check dmesg for more information
> # # ok 3 File-backed THP split test done
> <6>[  639.821657] split_huge_page (111099): drop_caches: 3
> <6>[  639.821657] split_huge_page (111099): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 4 # SKIP Pagecache folio split skipped
> <6>[  645.392184] split_huge_page (111099): drop_caches: 3
> <6>[  645.392184] split_huge_page (111099): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 5 # SKIP Pagecache folio split skipped
> <6>[  650.938248] split_huge_page (111099): drop_caches: 3
> <6>[  650.938248] split_huge_page (111099): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 6 # SKIP Pagecache folio split skipped
> <6>[  656.500149] split_huge_page (111099): drop_caches: 3
> <6>[  656.500149] split_huge_page (111099): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 7 # SKIP Pagecache folio split skipped
> <6>[  662.044085] split_huge_page (111099): drop_caches: 3
> <6>[  662.044085] split_huge_page (111099): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 8 # SKIP Pagecache folio split skipped
> <6>[  667.591841] split_huge_page (111099): drop_caches: 3
> <6>[  667.591841] split_huge_page (111099): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 9 # SKIP Pagecache folio split skipped
> <6>[  673.172441] split_huge_page (111099): drop_caches: 3
> <6>[  673.172441] split_huge_page (111099): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 10 # SKIP Pagecache folio split skipped
> <6>[  678.726263] split_huge_page (111099): drop_caches: 3
> <6>[  678.726263] split_huge_page (111099): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 11 # SKIP Pagecache folio split skipped
> <6>[  684.272851] split_huge_page (111099): drop_caches: 3
> <6>[  684.272851] split_huge_page (111099): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 12 # SKIP Pagecache folio split skipped
> # # Bail out! cannot remove tmp dir: Directory not empty
> # # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:9 error:0
> # # [FAIL]
> # not ok 51 split_huge_page_test # exit=1
> # # ------------------
>
> Thanks,
> Aishwarya


--
Best Regards,
Yan, Zi
Aishwarya TCV March 4, 2024, 3:44 p.m. UTC | #22
On 04/03/2024 14:58, Zi Yan wrote:
> On 4 Mar 2024, at 4:50, Aishwarya TCV wrote:
> 
>> On 01/03/2024 21:10, Zi Yan wrote:
>>> On 1 Mar 2024, at 15:02, Zi Yan wrote:
>>>
>>>> On 1 Mar 2024, at 14:37, Zi Yan wrote:
>>>>
>>>>> On 1 Mar 2024, at 4:51, Aishwarya TCV wrote:
>>>>>
>>>>>> On 26/02/2024 20:55, Zi Yan wrote:
>>>>>>> From: Zi Yan <ziy@nvidia.com>
>>>>>>>
>>>>>>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>>>>>>> Also add test cases for split_huge_page_to_list_to_order via both
>>>>>>> debugfs.
>>>>>>>
>>>>>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>>>>>> ---
>>>>>>>  mm/huge_memory.c                              |  34 ++++--
>>>>>>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>>>>>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>>>>>>
>>>>>>
>>>>>> Hi Zi,
>>>>>>
>>>>>> When booting the kernel against next-master(20240228)with Arm64 on
>>>>>> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
>>>>>> is failing in our CI (with rootfs over NFS). I can send the full logs if
>>>>>> required.
>>>>>>
>>>>>> A bisect (full log below) identified this patch as introducing the
>>>>>> failure. Bisected it on the tag "next-20240228" at repo
>>>>>> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
>>>>>>
>>>>>> This works fine on  Linux version 6.8.0-rc6
>>>>>
>>>>> Hi Aishwarya,
>>>>>
>>>>> Can you try the attached patch and see if it fixes the failure? I changed
>>>>> the test to accept XFS dev as input, mount XFS on a temp folder under /tmp,
>>>>> and skip if no XFS is mounted.
>>>>
>>>> Please try this updated one. It allows you to specify a XFS device path
>>>> in SPLIT_HUGE_PAGE_TEST_XFS_PATH env variable, which is passed to
>>>> split_huge_page_test in run_vmtests.sh. It at least allow CI/CD to run
>>>> the test without too much change.
>>>
>>> OK. This hopefully will be my last churn. Now split_huge_page_test accepts
>>> a path that is backed by XFS and run_vmtest.sh creates a XFS image in /tmp,
>>> mounts it in /tmp, and gives the path to split_huge_page_test. I tested
>>> it locally and it works. Let me know if you have any issue. Thanks.
>>>
>>> --
>>> Best Regards,
>>> Yan, Zi
>>
>> Hi Zi,
>>
>> Tested the patch by applying it on next-20240304. Logs from our CI with
>> rootfs over nfs is attached below. "Bail out! cannot remove tmp dir:
>> Directory not empty" is still observed.
> 
> Hi Aishwarya,
> 
> Do you have the config file for the CI kernel? And /tmp is also on nfs?
> Any detailed information about CI machine environment? I cannot reproduce
> the error locally, either on bare metal or VM. Maybe because my /tmp is
> not NFS mounted?
> 

Hi Zi,

Please find the details below. Hope it helps.

Do you have the config file for the CI kernel?
- We are using:
defconfig+https://github.com/torvalds/linux/blob/master/tools/testing/selftests/mm/config

And /tmp is also on nfs?
- Yes

Any detailed information about CI machine environment?
- We are running the test using LAVA device Cavium Thunder X2 (TX2),
- We have very similar rootfs as - nfsrootfs:
https://storage.kernelci.org/images/rootfs/debian/bullseye-kselftest/20240129.0/arm64/full.rootfs.tar.xz
- We are using grub boot method over nfs
- Additionally Ryan mentioned "Looks like it is failing because he is
trying to delete the temp dir with rmdir() but rmdir() requires the
directory to be empty, which it is not."

Thanks,
Aishwarya

>>
>>
>> Test run log:
>> # # ------------------------------
>> # # running ./split_huge_page_test
>> # # ------------------------------
>> # # TAP version 13
>> # # 1..12
>> # # ok 1 Split huge pages successful
>> # # ok 2 Split PTE-mapped huge pages successful
>> # # # Please enable pr_debug in split_huge_pages_in_file() for more info.
>> # # # Please check dmesg for more information
>> # # ok 3 File-backed THP split test done
>> <6>[  639.821657] split_huge_page (111099): drop_caches: 3
>> <6>[  639.821657] split_huge_page (111099): drop_caches: 3
>> # # # No large pagecache folio generated, please provide a filesystem
>> supporting large folio
>> # # ok 4 # SKIP Pagecache folio split skipped
>> <6>[  645.392184] split_huge_page (111099): drop_caches: 3
>> <6>[  645.392184] split_huge_page (111099): drop_caches: 3
>> # # # No large pagecache folio generated, please provide a filesystem
>> supporting large folio
>> # # ok 5 # SKIP Pagecache folio split skipped
>> <6>[  650.938248] split_huge_page (111099): drop_caches: 3
>> <6>[  650.938248] split_huge_page (111099): drop_caches: 3
>> # # # No large pagecache folio generated, please provide a filesystem
>> supporting large folio
>> # # ok 6 # SKIP Pagecache folio split skipped
>> <6>[  656.500149] split_huge_page (111099): drop_caches: 3
>> <6>[  656.500149] split_huge_page (111099): drop_caches: 3
>> # # # No large pagecache folio generated, please provide a filesystem
>> supporting large folio
>> # # ok 7 # SKIP Pagecache folio split skipped
>> <6>[  662.044085] split_huge_page (111099): drop_caches: 3
>> <6>[  662.044085] split_huge_page (111099): drop_caches: 3
>> # # # No large pagecache folio generated, please provide a filesystem
>> supporting large folio
>> # # ok 8 # SKIP Pagecache folio split skipped
>> <6>[  667.591841] split_huge_page (111099): drop_caches: 3
>> <6>[  667.591841] split_huge_page (111099): drop_caches: 3
>> # # # No large pagecache folio generated, please provide a filesystem
>> supporting large folio
>> # # ok 9 # SKIP Pagecache folio split skipped
>> <6>[  673.172441] split_huge_page (111099): drop_caches: 3
>> <6>[  673.172441] split_huge_page (111099): drop_caches: 3
>> # # # No large pagecache folio generated, please provide a filesystem
>> supporting large folio
>> # # ok 10 # SKIP Pagecache folio split skipped
>> <6>[  678.726263] split_huge_page (111099): drop_caches: 3
>> <6>[  678.726263] split_huge_page (111099): drop_caches: 3
>> # # # No large pagecache folio generated, please provide a filesystem
>> supporting large folio
>> # # ok 11 # SKIP Pagecache folio split skipped
>> <6>[  684.272851] split_huge_page (111099): drop_caches: 3
>> <6>[  684.272851] split_huge_page (111099): drop_caches: 3
>> # # # No large pagecache folio generated, please provide a filesystem
>> supporting large folio
>> # # ok 12 # SKIP Pagecache folio split skipped
>> # # Bail out! cannot remove tmp dir: Directory not empty
>> # # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:9 error:0
>> # # [FAIL]
>> # not ok 51 split_huge_page_test # exit=1
>> # # ------------------
>>
>> Thanks,
>> Aishwarya
> 
> 
> --
> Best Regards,
> Yan, Zi
Zi Yan March 4, 2024, 3:57 p.m. UTC | #23
On 4 Mar 2024, at 10:44, Aishwarya TCV wrote:

> On 04/03/2024 14:58, Zi Yan wrote:
>> On 4 Mar 2024, at 4:50, Aishwarya TCV wrote:
>>
>>> On 01/03/2024 21:10, Zi Yan wrote:
>>>> On 1 Mar 2024, at 15:02, Zi Yan wrote:
>>>>
>>>>> On 1 Mar 2024, at 14:37, Zi Yan wrote:
>>>>>
>>>>>> On 1 Mar 2024, at 4:51, Aishwarya TCV wrote:
>>>>>>
>>>>>>> On 26/02/2024 20:55, Zi Yan wrote:
>>>>>>>> From: Zi Yan <ziy@nvidia.com>
>>>>>>>>
>>>>>>>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>>>>>>>> Also add test cases for split_huge_page_to_list_to_order via both
>>>>>>>> debugfs.
>>>>>>>>
>>>>>>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>>>>>>> ---
>>>>>>>>  mm/huge_memory.c                              |  34 ++++--
>>>>>>>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>>>>>>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>>>>>>>
>>>>>>>
>>>>>>> Hi Zi,
>>>>>>>
>>>>>>> When booting the kernel against next-master(20240228)with Arm64 on
>>>>>>> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
>>>>>>> is failing in our CI (with rootfs over NFS). I can send the full logs if
>>>>>>> required.
>>>>>>>
>>>>>>> A bisect (full log below) identified this patch as introducing the
>>>>>>> failure. Bisected it on the tag "next-20240228" at repo
>>>>>>> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
>>>>>>>
>>>>>>> This works fine on  Linux version 6.8.0-rc6
>>>>>>
>>>>>> Hi Aishwarya,
>>>>>>
>>>>>> Can you try the attached patch and see if it fixes the failure? I changed
>>>>>> the test to accept XFS dev as input, mount XFS on a temp folder under /tmp,
>>>>>> and skip if no XFS is mounted.
>>>>>
>>>>> Please try this updated one. It allows you to specify a XFS device path
>>>>> in SPLIT_HUGE_PAGE_TEST_XFS_PATH env variable, which is passed to
>>>>> split_huge_page_test in run_vmtests.sh. It at least allow CI/CD to run
>>>>> the test without too much change.
>>>>
>>>> OK. This hopefully will be my last churn. Now split_huge_page_test accepts
>>>> a path that is backed by XFS and run_vmtest.sh creates a XFS image in /tmp,
>>>> mounts it in /tmp, and gives the path to split_huge_page_test. I tested
>>>> it locally and it works. Let me know if you have any issue. Thanks.
>>>>
>>>> --
>>>> Best Regards,
>>>> Yan, Zi
>>>
>>> Hi Zi,
>>>
>>> Tested the patch by applying it on next-20240304. Logs from our CI with
>>> rootfs over nfs is attached below. "Bail out! cannot remove tmp dir:
>>> Directory not empty" is still observed.
>>
>> Hi Aishwarya,
>>
>> Do you have the config file for the CI kernel? And /tmp is also on nfs?
>> Any detailed information about CI machine environment? I cannot reproduce
>> the error locally, either on bare metal or VM. Maybe because my /tmp is
>> not NFS mounted?
>>
>
> Hi Zi,
>
> Please find the details below. Hope it helps.
>
> Do you have the config file for the CI kernel?
> - We are using:
> defconfig+https://github.com/torvalds/linux/blob/master/tools/testing/selftests/mm/config
>
> And /tmp is also on nfs?
> - Yes
>
> Any detailed information about CI machine environment?
> - We are running the test using LAVA device Cavium Thunder X2 (TX2),
> - We have very similar rootfs as - nfsrootfs:
> https://storage.kernelci.org/images/rootfs/debian/bullseye-kselftest/20240129.0/arm64/full.rootfs.tar.xz
> - We are using grub boot method over nfs
> - Additionally Ryan mentioned "Looks like it is failing because he is
> trying to delete the temp dir with rmdir() but rmdir() requires the
> directory to be empty, which it is not."

Hi Aishwarya,

Thank you for the information and I am able to reproduce it on a NFS folder.
The error comes from that the opened test files are not munmapped and their
file descriptors are not closed in the skip path. NFS creates .nfsXXX files
for them, making the temp folder not empty.

The attached patch cleans up properly and works on a NFS folder. Let me know
if it works on your side. Thanks.

--
Best Regards,
Yan, Zi
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index fe140a9f4f9d..ffdec5dc0b03 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -412,7 +412,27 @@ CATEGORY="thp" run_test ./khugepaged -s 2
 
 CATEGORY="thp" run_test ./transhuge-stress -d 20
 
-CATEGORY="thp" run_test ./split_huge_page_test
+# Try to create XFS if not provided
+if [ -z "${SPLIT_HUGE_PAGE_TEST_XFS_PATH}" ]; then
+    if test_selected "thp"; then
+        if grep xfs /proc/filesystems &>/dev/null; then
+            XFS_IMG=$(mktemp /tmp/xfs_img_XXXXXX)
+            SPLIT_HUGE_PAGE_TEST_XFS_PATH=$(mktemp -d /tmp/xfs_dir_XXXXXX)
+            truncate -s 314572800 ${XFS_IMG}
+            mkfs.xfs -q ${XFS_IMG}
+            mount -o loop ${XFS_IMG} ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+            MOUNTED_XFS=1
+        fi
+    fi
+fi
+
+CATEGORY="thp" run_test ./split_huge_page_test ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+
+if [ -n "${MOUNTED_XFS}" ]; then
+    umount ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+    rmdir ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+    rm -f ${XFS_IMG}
+fi
 
 CATEGORY="migration" run_test ./migration
 
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index cf09fdc9ef22..856662d2f87a 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -26,7 +26,6 @@ uint64_t pmd_pagesize;
 
 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
 #define SMAP_PATH "/proc/self/smaps"
-#define THP_FS_PATH "/mnt/thp_fs"
 #define INPUT_MAX 80
 
 #define PID_FMT "%d,0x%lx,0x%lx,%d"
@@ -268,7 +267,37 @@ void split_file_backed_thp(void)
 	ksft_exit_fail_msg("Error occurred\n");
 }
 
-void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, char **addr)
+bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template,
+		const char **thp_fs_loc)
+{
+	if (xfs_path) {
+		*thp_fs_loc = xfs_path;
+		return false;
+	}
+
+	*thp_fs_loc = mkdtemp(thp_fs_template);
+
+	if (!*thp_fs_loc)
+		ksft_exit_fail_msg("cannot create temp folder\n");
+
+	return true;
+}
+
+void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp)
+{
+	int status;
+
+	if (!created_tmp)
+		return;
+
+	status = rmdir(thp_fs_loc);
+	if (status)
+		ksft_exit_fail_msg("cannot remove tmp dir: %s\n",
+				   strerror(errno));
+}
+
+int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
+		char **addr)
 {
 	size_t i;
 	int dummy;
@@ -277,7 +306,7 @@ void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
 
 	*fd = open(testfile, O_CREAT | O_RDWR, 0664);
 	if (*fd == -1)
-		ksft_exit_fail_msg("Failed to create a file at "THP_FS_PATH);
+		ksft_exit_fail_msg("Failed to create a file at %s\n", testfile);
 
 	for (i = 0; i < fd_size; i++) {
 		unsigned char byte = (unsigned char)i;
@@ -299,7 +328,7 @@ void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
 
 	*fd = open(testfile, O_RDWR);
 	if (*fd == -1) {
-		ksft_perror("Failed to open a file at "THP_FS_PATH);
+		ksft_perror("Failed to open testfile\n");
 		goto err_out_unlink;
 	}
 
@@ -314,26 +343,39 @@ void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
 		dummy += *(*addr + i);
 
 	if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) {
-		ksft_print_msg("No large pagecache folio generated, please mount a filesystem supporting large folio at "THP_FS_PATH"\n");
-		goto err_out_close;
+		ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n");
+		munmap(*addr, fd_size);
+		close(*fd);
+		unlink(testfile);
+		ksft_test_result_skip("Pagecache folio split skipped\n");
+		return -2;
 	}
-	return;
+	return 0;
 err_out_close:
 	close(*fd);
 err_out_unlink:
 	unlink(testfile);
 	ksft_exit_fail_msg("Failed to create large pagecache folios\n");
+	return -1;
 }
 
-void split_thp_in_pagecache_to_order(size_t fd_size, int order)
+void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_loc)
 {
 	int fd;
 	char *addr;
 	size_t i;
-	const char testfile[] = THP_FS_PATH "/test";
+	char testfile[INPUT_MAX];
 	int err = 0;
 
-	create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+	err = snprintf(testfile, INPUT_MAX, "%s/test", fs_loc);
+
+	if (err < 0)
+		ksft_exit_fail_msg("cannot generate right test file name\n");
+
+	err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+	if (err)
+		return;
+	err = 0;
 
 	write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order);
 
@@ -351,6 +393,7 @@ void split_thp_in_pagecache_to_order(size_t fd_size, int order)
 	}
 
 out:
+	munmap(addr, fd_size);
 	close(fd);
 	unlink(testfile);
 	if (err)
@@ -362,6 +405,10 @@ int main(int argc, char **argv)
 {
 	int i;
 	size_t fd_size;
+	char *optional_xfs_path = NULL;
+	char fs_loc_template[] = "/tmp/thp_fs_XXXXXX";
+	const char *fs_loc;
+	bool created_tmp;
 
 	ksft_print_header();
 
@@ -370,6 +417,9 @@ int main(int argc, char **argv)
 		ksft_finished();
 	}
 
+	if (argc > 1)
+		optional_xfs_path = argv[1];
+
 	ksft_set_plan(3+9);
 
 	pagesize = getpagesize();
@@ -384,8 +434,11 @@ int main(int argc, char **argv)
 	split_pte_mapped_thp();
 	split_file_backed_thp();
 
+	created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template,
+			&fs_loc);
 	for (i = 8; i >= 0; i--)
-		split_thp_in_pagecache_to_order(fd_size, i);
+		split_thp_in_pagecache_to_order(fd_size, i, fs_loc);
+	cleanup_thp_fs(fs_loc, created_tmp);
 
 	ksft_finished();
Aishwarya TCV March 4, 2024, 6:25 p.m. UTC | #24
On 04/03/2024 15:57, Zi Yan wrote:
> On 4 Mar 2024, at 10:44, Aishwarya TCV wrote:
> 
>> On 04/03/2024 14:58, Zi Yan wrote:
>>> On 4 Mar 2024, at 4:50, Aishwarya TCV wrote:
>>>
>>>> On 01/03/2024 21:10, Zi Yan wrote:
>>>>> On 1 Mar 2024, at 15:02, Zi Yan wrote:
>>>>>
>>>>>> On 1 Mar 2024, at 14:37, Zi Yan wrote:
>>>>>>
>>>>>>> On 1 Mar 2024, at 4:51, Aishwarya TCV wrote:
>>>>>>>
>>>>>>>> On 26/02/2024 20:55, Zi Yan wrote:
>>>>>>>>> From: Zi Yan <ziy@nvidia.com>
>>>>>>>>>
>>>>>>>>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>>>>>>>>> Also add test cases for split_huge_page_to_list_to_order via both
>>>>>>>>> debugfs.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>>>>>>>> ---
>>>>>>>>>  mm/huge_memory.c                              |  34 ++++--
>>>>>>>>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>>>>>>>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>>>>>>>>
>>>>>>>>
>>>>>>>> Hi Zi,
>>>>>>>>
>>>>>>>> When booting the kernel against next-master(20240228)with Arm64 on
>>>>>>>> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
>>>>>>>> is failing in our CI (with rootfs over NFS). I can send the full logs if
>>>>>>>> required.
>>>>>>>>
>>>>>>>> A bisect (full log below) identified this patch as introducing the
>>>>>>>> failure. Bisected it on the tag "next-20240228" at repo
>>>>>>>> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
>>>>>>>>
>>>>>>>> This works fine on  Linux version 6.8.0-rc6
>>>>>>>
>>>>>>> Hi Aishwarya,
>>>>>>>
>>>>>>> Can you try the attached patch and see if it fixes the failure? I changed
>>>>>>> the test to accept XFS dev as input, mount XFS on a temp folder under /tmp,
>>>>>>> and skip if no XFS is mounted.
>>>>>>
>>>>>> Please try this updated one. It allows you to specify a XFS device path
>>>>>> in SPLIT_HUGE_PAGE_TEST_XFS_PATH env variable, which is passed to
>>>>>> split_huge_page_test in run_vmtests.sh. It at least allow CI/CD to run
>>>>>> the test without too much change.
>>>>>
>>>>> OK. This hopefully will be my last churn. Now split_huge_page_test accepts
>>>>> a path that is backed by XFS and run_vmtest.sh creates a XFS image in /tmp,
>>>>> mounts it in /tmp, and gives the path to split_huge_page_test. I tested
>>>>> it locally and it works. Let me know if you have any issue. Thanks.
>>>>>
>>>>> --
>>>>> Best Regards,
>>>>> Yan, Zi
>>>>
>>>> Hi Zi,
>>>>
>>>> Tested the patch by applying it on next-20240304. Logs from our CI with
>>>> rootfs over nfs is attached below. "Bail out! cannot remove tmp dir:
>>>> Directory not empty" is still observed.
>>>
>>> Hi Aishwarya,
>>>
>>> Do you have the config file for the CI kernel? And /tmp is also on nfs?
>>> Any detailed information about CI machine environment? I cannot reproduce
>>> the error locally, either on bare metal or VM. Maybe because my /tmp is
>>> not NFS mounted?
>>>
>>
>> Hi Zi,
>>
>> Please find the details below. Hope it helps.
>>
>> Do you have the config file for the CI kernel?
>> - We are using:
>> defconfig+https://github.com/torvalds/linux/blob/master/tools/testing/selftests/mm/config
>>
>> And /tmp is also on nfs?
>> - Yes
>>
>> Any detailed information about CI machine environment?
>> - We are running the test using LAVA device Cavium Thunder X2 (TX2),
>> - We have very similar rootfs as - nfsrootfs:
>> https://storage.kernelci.org/images/rootfs/debian/bullseye-kselftest/20240129.0/arm64/full.rootfs.tar.xz
>> - We are using grub boot method over nfs
>> - Additionally Ryan mentioned "Looks like it is failing because he is
>> trying to delete the temp dir with rmdir() but rmdir() requires the
>> directory to be empty, which it is not."
> 
> Hi Aishwarya,
> 
> Thank you for the information and I am able to reproduce it on a NFS folder.
> The error comes from that the opened test files are not munmapped and their
> file descriptors are not closed in the skip path. NFS creates .nfsXXX files
> for them, making the temp folder not empty.
> 
> The attached patch cleans up properly and works on a NFS folder. Let me know
> if it works on your side. Thanks.
> 
> --
> Best Regards,
> Yan, Zi

Hi Zi,

Tested the attached patch on next-20240304. Confirming that the test is
running fine. Test run log is attached below.

Test run log:
# # ------------------------------
# # running ./split_huge_page_test
# # ------------------------------
# # TAP version 13
# # 1..12
# # ok 1 Split huge pages successful
# # ok 2 Split PTE-mapped huge pages successful
# # # Please enable pr_debug in split_huge_pages_in_file() for more info.
# # # Please check dmesg for more information
# # ok 3 File-backed THP split test done
<6>[ 1769.710429] split_huge_page (111119): drop_caches: 3
<6>[ 1769.710429] split_huge_page (111119): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 4 # SKIP Pagecache folio split skipped
<6>[ 1775.302315] split_huge_page (111119): drop_caches: 3
<6>[ 1775.302315] split_huge_page (111119): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 5 # SKIP Pagecache folio split skipped
<6>[ 1780.924147] split_huge_page (111119): drop_caches: 3
<6>[ 1780.924147] split_huge_page (111119): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 6 # SKIP Pagecache folio split skipped
<6>[ 1786.524931] split_huge_page (111119): drop_caches: 3
<6>[ 1786.524931] split_huge_page (111119): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 7 # SKIP Pagecache folio split skipped
<6>[ 1792.112869] split_huge_page (111119): drop_caches: 3
<6>[ 1792.112869] split_huge_page (111119): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 8 # SKIP Pagecache folio split skipped
<6>[ 1797.718863] split_huge_page (111119): drop_caches: 3
<6>[ 1797.718863] split_huge_page (111119): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 9 # SKIP Pagecache folio split skipped
<6>[ 1803.332343] split_huge_page (111119): drop_caches: 3
<6>[ 1803.332343] split_huge_page (111119): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 10 # SKIP Pagecache folio split skipped
<6>[ 1808.947913] split_huge_page (111119): drop_caches: 3
<6>[ 1808.947913] split_huge_page (111119): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 11 # SKIP Pagecache folio split skipped
<6>[ 1814.537995] split_huge_page (111119): drop_caches: 3
<6>[ 1814.537995] split_huge_page (111119): drop_caches: 3
# # # No large pagecache folio generated, please provide a filesystem
supporting large folio
# # ok 12 # SKIP Pagecache folio split skipped
# # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:9 error:0
# # [PASS]
# ok 51 split_huge_page_test
# # -------------------

Thanks,
Aishwarya
Zi Yan March 4, 2024, 6:26 p.m. UTC | #25
On 4 Mar 2024, at 13:25, Aishwarya TCV wrote:

> On 04/03/2024 15:57, Zi Yan wrote:
>> On 4 Mar 2024, at 10:44, Aishwarya TCV wrote:
>>
>>> On 04/03/2024 14:58, Zi Yan wrote:
>>>> On 4 Mar 2024, at 4:50, Aishwarya TCV wrote:
>>>>
>>>>> On 01/03/2024 21:10, Zi Yan wrote:
>>>>>> On 1 Mar 2024, at 15:02, Zi Yan wrote:
>>>>>>
>>>>>>> On 1 Mar 2024, at 14:37, Zi Yan wrote:
>>>>>>>
>>>>>>>> On 1 Mar 2024, at 4:51, Aishwarya TCV wrote:
>>>>>>>>
>>>>>>>>> On 26/02/2024 20:55, Zi Yan wrote:
>>>>>>>>>> From: Zi Yan <ziy@nvidia.com>
>>>>>>>>>>
>>>>>>>>>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>>>>>>>>>> Also add test cases for split_huge_page_to_list_to_order via both
>>>>>>>>>> debugfs.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>>>>>>>>> ---
>>>>>>>>>>  mm/huge_memory.c                              |  34 ++++--
>>>>>>>>>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>>>>>>>>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Hi Zi,
>>>>>>>>>
>>>>>>>>> When booting the kernel against next-master(20240228)with Arm64 on
>>>>>>>>> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
>>>>>>>>> is failing in our CI (with rootfs over NFS). I can send the full logs if
>>>>>>>>> required.
>>>>>>>>>
>>>>>>>>> A bisect (full log below) identified this patch as introducing the
>>>>>>>>> failure. Bisected it on the tag "next-20240228" at repo
>>>>>>>>> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
>>>>>>>>>
>>>>>>>>> This works fine on  Linux version 6.8.0-rc6
>>>>>>>>
>>>>>>>> Hi Aishwarya,
>>>>>>>>
>>>>>>>> Can you try the attached patch and see if it fixes the failure? I changed
>>>>>>>> the test to accept XFS dev as input, mount XFS on a temp folder under /tmp,
>>>>>>>> and skip if no XFS is mounted.
>>>>>>>
>>>>>>> Please try this updated one. It allows you to specify a XFS device path
>>>>>>> in SPLIT_HUGE_PAGE_TEST_XFS_PATH env variable, which is passed to
>>>>>>> split_huge_page_test in run_vmtests.sh. It at least allow CI/CD to run
>>>>>>> the test without too much change.
>>>>>>
>>>>>> OK. This hopefully will be my last churn. Now split_huge_page_test accepts
>>>>>> a path that is backed by XFS and run_vmtest.sh creates a XFS image in /tmp,
>>>>>> mounts it in /tmp, and gives the path to split_huge_page_test. I tested
>>>>>> it locally and it works. Let me know if you have any issue. Thanks.
>>>>>>
>>>>>> --
>>>>>> Best Regards,
>>>>>> Yan, Zi
>>>>>
>>>>> Hi Zi,
>>>>>
>>>>> Tested the patch by applying it on next-20240304. Logs from our CI with
>>>>> rootfs over nfs is attached below. "Bail out! cannot remove tmp dir:
>>>>> Directory not empty" is still observed.
>>>>
>>>> Hi Aishwarya,
>>>>
>>>> Do you have the config file for the CI kernel? And /tmp is also on nfs?
>>>> Any detailed information about CI machine environment? I cannot reproduce
>>>> the error locally, either on bare metal or VM. Maybe because my /tmp is
>>>> not NFS mounted?
>>>>
>>>
>>> Hi Zi,
>>>
>>> Please find the details below. Hope it helps.
>>>
>>> Do you have the config file for the CI kernel?
>>> - We are using:
>>> defconfig+https://github.com/torvalds/linux/blob/master/tools/testing/selftests/mm/config
>>>
>>> And /tmp is also on nfs?
>>> - Yes
>>>
>>> Any detailed information about CI machine environment?
>>> - We are running the test using LAVA device Cavium Thunder X2 (TX2),
>>> - We have very similar rootfs as - nfsrootfs:
>>> https://storage.kernelci.org/images/rootfs/debian/bullseye-kselftest/20240129.0/arm64/full.rootfs.tar.xz
>>> - We are using grub boot method over nfs
>>> - Additionally Ryan mentioned "Looks like it is failing because he is
>>> trying to delete the temp dir with rmdir() but rmdir() requires the
>>> directory to be empty, which it is not."
>>
>> Hi Aishwarya,
>>
>> Thank you for the information and I am able to reproduce it on a NFS folder.
>> The error comes from that the opened test files are not munmapped and their
>> file descriptors are not closed in the skip path. NFS creates .nfsXXX files
>> for them, making the temp folder not empty.
>>
>> The attached patch cleans up properly and works on a NFS folder. Let me know
>> if it works on your side. Thanks.
>>
>> --
>> Best Regards,
>> Yan, Zi
>
> Hi Zi,
>
> Tested the attached patch on next-20240304. Confirming that the test is
> running fine. Test run log is attached below.
>
> Test run log:
> # # ------------------------------
> # # running ./split_huge_page_test
> # # ------------------------------
> # # TAP version 13
> # # 1..12
> # # ok 1 Split huge pages successful
> # # ok 2 Split PTE-mapped huge pages successful
> # # # Please enable pr_debug in split_huge_pages_in_file() for more info.
> # # # Please check dmesg for more information
> # # ok 3 File-backed THP split test done
> <6>[ 1769.710429] split_huge_page (111119): drop_caches: 3
> <6>[ 1769.710429] split_huge_page (111119): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 4 # SKIP Pagecache folio split skipped
> <6>[ 1775.302315] split_huge_page (111119): drop_caches: 3
> <6>[ 1775.302315] split_huge_page (111119): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 5 # SKIP Pagecache folio split skipped
> <6>[ 1780.924147] split_huge_page (111119): drop_caches: 3
> <6>[ 1780.924147] split_huge_page (111119): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 6 # SKIP Pagecache folio split skipped
> <6>[ 1786.524931] split_huge_page (111119): drop_caches: 3
> <6>[ 1786.524931] split_huge_page (111119): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 7 # SKIP Pagecache folio split skipped
> <6>[ 1792.112869] split_huge_page (111119): drop_caches: 3
> <6>[ 1792.112869] split_huge_page (111119): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 8 # SKIP Pagecache folio split skipped
> <6>[ 1797.718863] split_huge_page (111119): drop_caches: 3
> <6>[ 1797.718863] split_huge_page (111119): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 9 # SKIP Pagecache folio split skipped
> <6>[ 1803.332343] split_huge_page (111119): drop_caches: 3
> <6>[ 1803.332343] split_huge_page (111119): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 10 # SKIP Pagecache folio split skipped
> <6>[ 1808.947913] split_huge_page (111119): drop_caches: 3
> <6>[ 1808.947913] split_huge_page (111119): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 11 # SKIP Pagecache folio split skipped
> <6>[ 1814.537995] split_huge_page (111119): drop_caches: 3
> <6>[ 1814.537995] split_huge_page (111119): drop_caches: 3
> # # # No large pagecache folio generated, please provide a filesystem
> supporting large folio
> # # ok 12 # SKIP Pagecache folio split skipped
> # # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:9 error:0
> # # [PASS]
> # ok 51 split_huge_page_test
> # # -------------------

Thanks a lot for testing.

--
Best Regards,
Yan, Zi
Zi Yan March 4, 2024, 6:31 p.m. UTC | #26
On 4 Mar 2024, at 10:57, Zi Yan wrote:

> On 4 Mar 2024, at 10:44, Aishwarya TCV wrote:
>
>> On 04/03/2024 14:58, Zi Yan wrote:
>>> On 4 Mar 2024, at 4:50, Aishwarya TCV wrote:
>>>
>>>> On 01/03/2024 21:10, Zi Yan wrote:
>>>>> On 1 Mar 2024, at 15:02, Zi Yan wrote:
>>>>>
>>>>>> On 1 Mar 2024, at 14:37, Zi Yan wrote:
>>>>>>
>>>>>>> On 1 Mar 2024, at 4:51, Aishwarya TCV wrote:
>>>>>>>
>>>>>>>> On 26/02/2024 20:55, Zi Yan wrote:
>>>>>>>>> From: Zi Yan <ziy@nvidia.com>
>>>>>>>>>
>>>>>>>>> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
>>>>>>>>> Also add test cases for split_huge_page_to_list_to_order via both
>>>>>>>>> debugfs.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>>>>>>>> ---
>>>>>>>>>  mm/huge_memory.c                              |  34 ++++--
>>>>>>>>>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>>>>>>>>>  2 files changed, 131 insertions(+), 18 deletions(-)
>>>>>>>>>
>>>>>>>>
>>>>>>>> Hi Zi,
>>>>>>>>
>>>>>>>> When booting the kernel against next-master(20240228)with Arm64 on
>>>>>>>> Marvell Thunder X2 (TX2), the kselftest-mm test 'split_huge_page_test'
>>>>>>>> is failing in our CI (with rootfs over NFS). I can send the full logs if
>>>>>>>> required.
>>>>>>>>
>>>>>>>> A bisect (full log below) identified this patch as introducing the
>>>>>>>> failure. Bisected it on the tag "next-20240228" at repo
>>>>>>>> "https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git".
>>>>>>>>
>>>>>>>> This works fine on  Linux version 6.8.0-rc6
>>>>>>>
>>>>>>> Hi Aishwarya,
>>>>>>>
>>>>>>> Can you try the attached patch and see if it fixes the failure? I changed
>>>>>>> the test to accept XFS dev as input, mount XFS on a temp folder under /tmp,
>>>>>>> and skip if no XFS is mounted.
>>>>>>
>>>>>> Please try this updated one. It allows you to specify a XFS device path
>>>>>> in SPLIT_HUGE_PAGE_TEST_XFS_PATH env variable, which is passed to
>>>>>> split_huge_page_test in run_vmtests.sh. It at least allow CI/CD to run
>>>>>> the test without too much change.
>>>>>
>>>>> OK. This hopefully will be my last churn. Now split_huge_page_test accepts
>>>>> a path that is backed by XFS and run_vmtest.sh creates a XFS image in /tmp,
>>>>> mounts it in /tmp, and gives the path to split_huge_page_test. I tested
>>>>> it locally and it works. Let me know if you have any issue. Thanks.
>>>>>
>>>>> --
>>>>> Best Regards,
>>>>> Yan, Zi
>>>>
>>>> Hi Zi,
>>>>
>>>> Tested the patch by applying it on next-20240304. Logs from our CI with
>>>> rootfs over nfs is attached below. "Bail out! cannot remove tmp dir:
>>>> Directory not empty" is still observed.
>>>
>>> Hi Aishwarya,
>>>
>>> Do you have the config file for the CI kernel? And /tmp is also on nfs?
>>> Any detailed information about CI machine environment? I cannot reproduce
>>> the error locally, either on bare metal or VM. Maybe because my /tmp is
>>> not NFS mounted?
>>>
>>
>> Hi Zi,
>>
>> Please find the details below. Hope it helps.
>>
>> Do you have the config file for the CI kernel?
>> - We are using:
>> defconfig+https://github.com/torvalds/linux/blob/master/tools/testing/selftests/mm/config
>>
>> And /tmp is also on nfs?
>> - Yes
>>
>> Any detailed information about CI machine environment?
>> - We are running the test using LAVA device Cavium Thunder X2 (TX2),
>> - We have very similar rootfs as - nfsrootfs:
>> https://storage.kernelci.org/images/rootfs/debian/bullseye-kselftest/20240129.0/arm64/full.rootfs.tar.xz
>> - We are using grub boot method over nfs
>> - Additionally Ryan mentioned "Looks like it is failing because he is
>> trying to delete the temp dir with rmdir() but rmdir() requires the
>> directory to be empty, which it is not."
>
> Hi Aishwarya,
>
> Thank you for the information and I am able to reproduce it on a NFS folder.
> The error comes from that the opened test files are not munmapped and their
> file descriptors are not closed in the skip path. NFS creates .nfsXXX files
> for them, making the temp folder not empty.
>
> The attached patch cleans up properly and works on a NFS folder. Let me know
> if it works on your side. Thanks.

Hi Andrew,

Could you fold the patch from [1] to this one (i.e., Patch 8)? Let me know
if you want me to send it separately or resend the entire patchset. Thanks.

[1] https://lore.kernel.org/linux-mm/57259BE4-D4A1-4F57-8AEA-DE526C43DE44@nvidia.com/

--
Best Regards,
Yan, Zi
Zi Yan March 7, 2024, 3:06 p.m. UTC | #27
On 26 Feb 2024, at 15:55, Zi Yan wrote:

> From: Zi Yan <ziy@nvidia.com>
>
> It is used to test split_huge_page_to_list_to_order for pagecache THPs.
> Also add test cases for split_huge_page_to_list_to_order via both
> debugfs.
>
> Signed-off-by: Zi Yan <ziy@nvidia.com>
> ---
>  mm/huge_memory.c                              |  34 ++++--
>  .../selftests/mm/split_huge_page_test.c       | 115 +++++++++++++++++-
>  2 files changed, 131 insertions(+), 18 deletions(-)

Hi Andrew,

This is the fixup for patch 8. It is based on the discussion
with Dan Carpenter at https://lore.kernel.org/linux-mm/7dda9283-b437-4cf8-ab0d-83c330deb9c0@moroto.mountain/. It checks new_order input from
debugfs and skips folios early if new_order is greater than the folio order.

Thanks.

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a81a09236c16..42d4f62d7760 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3484,6 +3484,9 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
                        goto next;

                total++;
+
+               if (new_order >= folio_order(folio))
+                       goto next;
                /*
                 * For folios with private, split_huge_page_to_list_to_order()
                 * will try to drop it before split and then check if the folio
@@ -3550,6 +3553,9 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
                total++;
                nr_pages = folio_nr_pages(folio);

+               if (new_order >= folio_order(folio))
+                       goto next;
+
                if (!folio_trylock(folio))
                        goto next;



--
Best Regards,
Yan, Zi
diff mbox series

Patch

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8b47a96a28f9..50d146eb248f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3422,7 +3422,7 @@  static inline bool vma_not_suitable_for_thp_split(struct vm_area_struct *vma)
 }
 
 static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
-				unsigned long vaddr_end)
+				unsigned long vaddr_end, unsigned int new_order)
 {
 	int ret = 0;
 	struct task_struct *task;
@@ -3486,13 +3486,19 @@  static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
 			goto next;
 
 		total++;
-		if (!can_split_folio(folio, NULL))
+		/*
+		 * For folios with private, split_huge_page_to_list_to_order()
+		 * will try to drop it before split and then check if the folio
+		 * can be split or not. So skip the check here.
+		 */
+		if (!folio_test_private(folio) &&
+		    !can_split_folio(folio, NULL))
 			goto next;
 
 		if (!folio_trylock(folio))
 			goto next;
 
-		if (!split_folio(folio))
+		if (!split_folio_to_order(folio, new_order))
 			split++;
 
 		folio_unlock(folio);
@@ -3510,7 +3516,7 @@  static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
 }
 
 static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
-				pgoff_t off_end)
+				pgoff_t off_end, unsigned int new_order)
 {
 	struct filename *file;
 	struct file *candidate;
@@ -3549,7 +3555,7 @@  static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
 		if (!folio_trylock(folio))
 			goto next;
 
-		if (!split_folio(folio))
+		if (!split_folio_to_order(folio, new_order))
 			split++;
 
 		folio_unlock(folio);
@@ -3574,10 +3580,14 @@  static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
 {
 	static DEFINE_MUTEX(split_debug_mutex);
 	ssize_t ret;
-	/* hold pid, start_vaddr, end_vaddr or file_path, off_start, off_end */
+	/*
+	 * hold pid, start_vaddr, end_vaddr, new_order or
+	 * file_path, off_start, off_end, new_order
+	 */
 	char input_buf[MAX_INPUT_BUF_SZ];
 	int pid;
 	unsigned long vaddr_start, vaddr_end;
+	unsigned int new_order = 0;
 
 	ret = mutex_lock_interruptible(&split_debug_mutex);
 	if (ret)
@@ -3606,29 +3616,29 @@  static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
 			goto out;
 		}
 
-		ret = sscanf(buf, "0x%lx,0x%lx", &off_start, &off_end);
-		if (ret != 2) {
+		ret = sscanf(buf, "0x%lx,0x%lx,%d", &off_start, &off_end, &new_order);
+		if (ret != 2 && ret != 3) {
 			ret = -EINVAL;
 			goto out;
 		}
-		ret = split_huge_pages_in_file(file_path, off_start, off_end);
+		ret = split_huge_pages_in_file(file_path, off_start, off_end, new_order);
 		if (!ret)
 			ret = input_len;
 
 		goto out;
 	}
 
-	ret = sscanf(input_buf, "%d,0x%lx,0x%lx", &pid, &vaddr_start, &vaddr_end);
+	ret = sscanf(input_buf, "%d,0x%lx,0x%lx,%d", &pid, &vaddr_start, &vaddr_end, &new_order);
 	if (ret == 1 && pid == 1) {
 		split_huge_pages_all();
 		ret = strlen(input_buf);
 		goto out;
-	} else if (ret != 3) {
+	} else if (ret != 3 && ret != 4) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	ret = split_huge_pages_pid(pid, vaddr_start, vaddr_end);
+	ret = split_huge_pages_pid(pid, vaddr_start, vaddr_end, new_order);
 	if (!ret)
 		ret = strlen(input_buf);
 out:
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index 7b698a848bab..cf09fdc9ef22 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -16,6 +16,7 @@ 
 #include <sys/mount.h>
 #include <malloc.h>
 #include <stdbool.h>
+#include <time.h>
 #include "vm_util.h"
 #include "../kselftest.h"
 
@@ -24,10 +25,12 @@  unsigned int pageshift;
 uint64_t pmd_pagesize;
 
 #define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
+#define SMAP_PATH "/proc/self/smaps"
+#define THP_FS_PATH "/mnt/thp_fs"
 #define INPUT_MAX 80
 
-#define PID_FMT "%d,0x%lx,0x%lx"
-#define PATH_FMT "%s,0x%lx,0x%lx"
+#define PID_FMT "%d,0x%lx,0x%lx,%d"
+#define PATH_FMT "%s,0x%lx,0x%lx,%d"
 
 #define PFN_MASK     ((1UL<<55)-1)
 #define KPF_THP      (1UL<<22)
@@ -102,7 +105,7 @@  void split_pmd_thp(void)
 
 	/* split all THPs */
 	write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
-		(uint64_t)one_page + len);
+		(uint64_t)one_page + len, 0);
 
 	for (i = 0; i < len; i++)
 		if (one_page[i] != (char)i)
@@ -177,7 +180,7 @@  void split_pte_mapped_thp(void)
 
 	/* split all remapped THPs */
 	write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped,
-		      (uint64_t)pte_mapped + pagesize * 4);
+		      (uint64_t)pte_mapped + pagesize * 4, 0);
 
 	/* smap does not show THPs after mremap, use kpageflags instead */
 	thp_size = 0;
@@ -237,7 +240,7 @@  void split_file_backed_thp(void)
 	}
 
 	/* split the file-backed THP */
-	write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end);
+	write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, 0);
 
 	status = unlink(testfile);
 	if (status) {
@@ -265,8 +268,101 @@  void split_file_backed_thp(void)
 	ksft_exit_fail_msg("Error occurred\n");
 }
 
+void create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, char **addr)
+{
+	size_t i;
+	int dummy;
+
+	srand(time(NULL));
+
+	*fd = open(testfile, O_CREAT | O_RDWR, 0664);
+	if (*fd == -1)
+		ksft_exit_fail_msg("Failed to create a file at "THP_FS_PATH);
+
+	for (i = 0; i < fd_size; i++) {
+		unsigned char byte = (unsigned char)i;
+
+		write(*fd, &byte, sizeof(byte));
+	}
+	close(*fd);
+	sync();
+	*fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
+	if (*fd == -1) {
+		ksft_perror("open drop_caches");
+		goto err_out_unlink;
+	}
+	if (write(*fd, "3", 1) != 1) {
+		ksft_perror("write to drop_caches");
+		goto err_out_unlink;
+	}
+	close(*fd);
+
+	*fd = open(testfile, O_RDWR);
+	if (*fd == -1) {
+		ksft_perror("Failed to open a file at "THP_FS_PATH);
+		goto err_out_unlink;
+	}
+
+	*addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0);
+	if (*addr == (char *)-1) {
+		ksft_perror("cannot mmap");
+		goto err_out_close;
+	}
+	madvise(*addr, fd_size, MADV_HUGEPAGE);
+
+	for (size_t i = 0; i < fd_size; i++)
+		dummy += *(*addr + i);
+
+	if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) {
+		ksft_print_msg("No large pagecache folio generated, please mount a filesystem supporting large folio at "THP_FS_PATH"\n");
+		goto err_out_close;
+	}
+	return;
+err_out_close:
+	close(*fd);
+err_out_unlink:
+	unlink(testfile);
+	ksft_exit_fail_msg("Failed to create large pagecache folios\n");
+}
+
+void split_thp_in_pagecache_to_order(size_t fd_size, int order)
+{
+	int fd;
+	char *addr;
+	size_t i;
+	const char testfile[] = THP_FS_PATH "/test";
+	int err = 0;
+
+	create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+
+	write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order);
+
+	for (i = 0; i < fd_size; i++)
+		if (*(addr + i) != (char)i) {
+			ksft_print_msg("%lu byte corrupted in the file\n", i);
+			err = EXIT_FAILURE;
+			goto out;
+		}
+
+	if (!check_huge_file(addr, 0, pmd_pagesize)) {
+		ksft_print_msg("Still FilePmdMapped not split\n");
+		err = EXIT_FAILURE;
+		goto out;
+	}
+
+out:
+	close(fd);
+	unlink(testfile);
+	if (err)
+		ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order);
+	ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order);
+}
+
 int main(int argc, char **argv)
 {
+	int i;
+	size_t fd_size;
+
 	ksft_print_header();
 
 	if (geteuid() != 0) {
@@ -274,7 +370,7 @@  int main(int argc, char **argv)
 		ksft_finished();
 	}
 
-	ksft_set_plan(3);
+	ksft_set_plan(3+9);
 
 	pagesize = getpagesize();
 	pageshift = ffs(pagesize) - 1;
@@ -282,9 +378,16 @@  int main(int argc, char **argv)
 	if (!pmd_pagesize)
 		ksft_exit_fail_msg("Reading PMD pagesize failed\n");
 
+	fd_size = 2 * pmd_pagesize;
+
 	split_pmd_thp();
 	split_pte_mapped_thp();
 	split_file_backed_thp();
 
+	for (i = 8; i >= 0; i--)
+		split_thp_in_pagecache_to_order(fd_size, i);
+
 	ksft_finished();
+
+	return 0;
 }