diff mbox series

[v2,bpf] lib/buildid: handle memfd_secret() files in build_id_parse()

Message ID 20241016221629.1043883-1-andrii@kernel.org (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series [v2,bpf] lib/buildid: handle memfd_secret() files in build_id_parse() | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag present in non-next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 9 this patch: 9
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers fail 1 blamed authors not CCed: eddyz87@gmail.com; 10 maintainers not CCed: song@kernel.org haoluo@google.com akpm@linux-foundation.org john.fastabend@gmail.com martin.lau@linux.dev sdf@fomichev.me kpsingh@kernel.org yonghong.song@linux.dev eddyz87@gmail.com jolsa@kernel.org
netdev/build_clang success Errors and warnings before: 7 this patch: 7
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 7 this patch: 7
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 17 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-14 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-VM_Test-15 success Logs for set-matrix
bpf/vmtest-bpf-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-11 fail Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-VM_Test-13 success Logs for s390x-gcc / test
bpf/vmtest-bpf-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-PR fail PR summary
bpf/vmtest-bpf-VM_Test-17 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-VM_Test-16 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-VM_Test-19 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-18 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-20 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-24 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-26 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-VM_Test-25 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-23 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-27 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-28 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-21 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-VM_Test-31 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-VM_Test-34 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-32 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-33 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-39 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-VM_Test-35 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18

Commit Message

Andrii Nakryiko Oct. 16, 2024, 10:16 p.m. UTC
From memfd_secret(2) manpage:

  The memory areas backing the file created with memfd_secret(2) are
  visible only to the processes that have access to the file descriptor.
  The memory region is removed from the kernel page tables and only the
  page tables of the processes holding the file descriptor map the
  corresponding physical memory. (Thus, the pages in the region can't be
  accessed by the kernel itself, so that, for example, pointers to the
  region can't be passed to system calls.)

So folios backed by such secretmem files are not mapped into kernel
address space and shouldn't be accessed, in general.

To make this a bit more generic of a fix and prevent regression in the
future for similar special mappings, do a generic check of whether the
folio we got is mapped with kernel_page_present(), as suggested in [1].
This will handle secretmem, and any future special cases that use
a similar approach.

Original report and repro can be found in [0].

  [0] https://lore.kernel.org/bpf/ZwyG8Uro%2FSyTXAni@ly-workstation/
  [1] https://lore.kernel.org/bpf/CAJD7tkbpEMx-eC4A-z8Jm1ikrY_KJVjWO+mhhz1_fni4x+COKw@mail.gmail.com/

Reported-by: Yi Lai <yi1.lai@intel.com>
Suggested-by: Yosry Ahmed <yosryahmed@google.com>
Fixes: de3ec364c3c3 ("lib/buildid: add single folio-based file reader abstraction")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 lib/buildid.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

Comments

Yosry Ahmed Oct. 16, 2024, 10:21 p.m. UTC | #1
On Wed, Oct 16, 2024 at 3:16 PM Andrii Nakryiko <andrii@kernel.org> wrote:
>
> From memfd_secret(2) manpage:
>
>   The memory areas backing the file created with memfd_secret(2) are
>   visible only to the processes that have access to the file descriptor.
>   The memory region is removed from the kernel page tables and only the
>   page tables of the processes holding the file descriptor map the
>   corresponding physical memory. (Thus, the pages in the region can't be
>   accessed by the kernel itself, so that, for example, pointers to the
>   region can't be passed to system calls.)
>
> So folios backed by such secretmem files are not mapped into kernel
> address space and shouldn't be accessed, in general.
>
> To make this a bit more generic of a fix and prevent regression in the
> future for similar special mappings, do a generic check of whether the
> folio we got is mapped with kernel_page_present(), as suggested in [1].
> This will handle secretmem, and any future special cases that use
> a similar approach.
>
> Original report and repro can be found in [0].
>
>   [0] https://lore.kernel.org/bpf/ZwyG8Uro%2FSyTXAni@ly-workstation/
>   [1] https://lore.kernel.org/bpf/CAJD7tkbpEMx-eC4A-z8Jm1ikrY_KJVjWO+mhhz1_fni4x+COKw@mail.gmail.com/
>
> Reported-by: Yi Lai <yi1.lai@intel.com>
> Suggested-by: Yosry Ahmed <yosryahmed@google.com>
> Fixes: de3ec364c3c3 ("lib/buildid: add single folio-based file reader abstraction")
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>  lib/buildid.c | 5 ++++-
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/lib/buildid.c b/lib/buildid.c
> index 290641d92ac1..90df64fd64c1 100644
> --- a/lib/buildid.c
> +++ b/lib/buildid.c
> @@ -5,6 +5,7 @@
>  #include <linux/elf.h>
>  #include <linux/kernel.h>
>  #include <linux/pagemap.h>
> +#include <linux/set_memory.h>
>
>  #define BUILD_ID 3
>
> @@ -74,7 +75,9 @@ static int freader_get_folio(struct freader *r, loff_t file_off)
>                 filemap_invalidate_unlock_shared(r->file->f_mapping);
>         }
>
> -       if (IS_ERR(r->folio) || !folio_test_uptodate(r->folio)) {
> +       if (IS_ERR(r->folio) ||
> +           !kernel_page_present(&r->folio->page) ||
> +           !folio_test_uptodate(r->folio)) {

Do we need a comment here about the kernel_page_present() check to
make it clear that it is handling things like secretmem?

>                 if (!IS_ERR(r->folio))
>                         folio_put(r->folio);
>                 r->folio = NULL;
> --
> 2.43.5
>
Shakeel Butt Oct. 16, 2024, 11:57 p.m. UTC | #2
On Wed, Oct 16, 2024 at 03:16:29PM GMT, Andrii Nakryiko wrote:
> From memfd_secret(2) manpage:
> 
>   The memory areas backing the file created with memfd_secret(2) are
>   visible only to the processes that have access to the file descriptor.
>   The memory region is removed from the kernel page tables and only the
>   page tables of the processes holding the file descriptor map the
>   corresponding physical memory. (Thus, the pages in the region can't be
>   accessed by the kernel itself, so that, for example, pointers to the
>   region can't be passed to system calls.)
> 
> So folios backed by such secretmem files are not mapped into kernel
> address space and shouldn't be accessed, in general.
> 
> To make this a bit more generic of a fix and prevent regression in the
> future for similar special mappings, do a generic check of whether the
> folio we got is mapped with kernel_page_present(), as suggested in [1].
> This will handle secretmem, and any future special cases that use
> a similar approach.
> 
> Original report and repro can be found in [0].
> 
>   [0] https://lore.kernel.org/bpf/ZwyG8Uro%2FSyTXAni@ly-workstation/
>   [1] https://lore.kernel.org/bpf/CAJD7tkbpEMx-eC4A-z8Jm1ikrY_KJVjWO+mhhz1_fni4x+COKw@mail.gmail.com/
> 
> Reported-by: Yi Lai <yi1.lai@intel.com>
> Suggested-by: Yosry Ahmed <yosryahmed@google.com>
> Fixes: de3ec364c3c3 ("lib/buildid: add single folio-based file reader abstraction")
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>

Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Daniel Borkmann Oct. 17, 2024, 8:59 a.m. UTC | #3
On 10/17/24 12:16 AM, Andrii Nakryiko wrote:
>  From memfd_secret(2) manpage:
> 
>    The memory areas backing the file created with memfd_secret(2) are
>    visible only to the processes that have access to the file descriptor.
>    The memory region is removed from the kernel page tables and only the
>    page tables of the processes holding the file descriptor map the
>    corresponding physical memory. (Thus, the pages in the region can't be
>    accessed by the kernel itself, so that, for example, pointers to the
>    region can't be passed to system calls.)
> 
> So folios backed by such secretmem files are not mapped into kernel
> address space and shouldn't be accessed, in general.
> 
> To make this a bit more generic of a fix and prevent regression in the
> future for similar special mappings, do a generic check of whether the
> folio we got is mapped with kernel_page_present(), as suggested in [1].
> This will handle secretmem, and any future special cases that use
> a similar approach.
> 
> Original report and repro can be found in [0].
> 
>    [0] https://lore.kernel.org/bpf/ZwyG8Uro%2FSyTXAni@ly-workstation/
>    [1] https://lore.kernel.org/bpf/CAJD7tkbpEMx-eC4A-z8Jm1ikrY_KJVjWO+mhhz1_fni4x+COKw@mail.gmail.com/
> 
> Reported-by: Yi Lai <yi1.lai@intel.com>
> Suggested-by: Yosry Ahmed <yosryahmed@google.com>
> Fixes: de3ec364c3c3 ("lib/buildid: add single folio-based file reader abstraction")
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>   lib/buildid.c | 5 ++++-
>   1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/buildid.c b/lib/buildid.c
> index 290641d92ac1..90df64fd64c1 100644
> --- a/lib/buildid.c
> +++ b/lib/buildid.c
> @@ -5,6 +5,7 @@
>   #include <linux/elf.h>
>   #include <linux/kernel.h>
>   #include <linux/pagemap.h>
> +#include <linux/set_memory.h>
>   
>   #define BUILD_ID 3
>   
> @@ -74,7 +75,9 @@ static int freader_get_folio(struct freader *r, loff_t file_off)
>   		filemap_invalidate_unlock_shared(r->file->f_mapping);
>   	}
>   
> -	if (IS_ERR(r->folio) || !folio_test_uptodate(r->folio)) {
> +	if (IS_ERR(r->folio) ||
> +	    !kernel_page_present(&r->folio->page) ||
> +	    !folio_test_uptodate(r->folio)) {

BPF CI fails to build this on s390 (+ Ilya & others):

   [...]
     CC      crypto/ctr.o
   ../lib/buildid.c: In function ‘freader_get_folio’:
   ../lib/buildid.c:79:14: error: implicit declaration of function ‘kernel_page_present’ [-Werror=implicit-function-declaration]
      79 |             !kernel_page_present(&r->folio->page) ||
         |              ^~~~~~~~~~~~~~~~~~~
     CC      net/sched/cls_bpf.o
   [...]

Interestingly, the generic kernel_page_present() which returns true under
!CONFIG_ARCH_HAS_SET_DIRECT_MAP is not used here since s390 selects the
CONFIG_ARCH_HAS_SET_DIRECT_MAP, but does not provide an implementation of
the function compared to the others which select it (x86, arm64, riscv).
Relevant commit is 0490d6d7ba0a ("s390/mm: enable ARCH_HAS_SET_DIRECT_MAP").

>   		if (!IS_ERR(r->folio))
>   			folio_put(r->folio);
>   		r->folio = NULL;
David Hildenbrand Oct. 17, 2024, 9:18 a.m. UTC | #4
On 17.10.24 00:16, Andrii Nakryiko wrote:
>  From memfd_secret(2) manpage:
> 
>    The memory areas backing the file created with memfd_secret(2) are
>    visible only to the processes that have access to the file descriptor.
>    The memory region is removed from the kernel page tables and only the
>    page tables of the processes holding the file descriptor map the
>    corresponding physical memory. (Thus, the pages in the region can't be
>    accessed by the kernel itself, so that, for example, pointers to the
>    region can't be passed to system calls.)
> 
> So folios backed by such secretmem files are not mapped into kernel
> address space and shouldn't be accessed, in general.
> 
> To make this a bit more generic of a fix and prevent regression in the
> future for similar special mappings, do a generic check of whether the
> folio we got is mapped with kernel_page_present(), as suggested in [1].
> This will handle secretmem, and any future special cases that use
> a similar approach.
> 
> Original report and repro can be found in [0].
> 
>    [0] https://lore.kernel.org/bpf/ZwyG8Uro%2FSyTXAni@ly-workstation/
>    [1] https://lore.kernel.org/bpf/CAJD7tkbpEMx-eC4A-z8Jm1ikrY_KJVjWO+mhhz1_fni4x+COKw@mail.gmail.com/
> 
> Reported-by: Yi Lai <yi1.lai@intel.com>
> Suggested-by: Yosry Ahmed <yosryahmed@google.com>
> Fixes: de3ec364c3c3 ("lib/buildid: add single folio-based file reader abstraction")
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>   lib/buildid.c | 5 ++++-
>   1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/buildid.c b/lib/buildid.c
> index 290641d92ac1..90df64fd64c1 100644
> --- a/lib/buildid.c
> +++ b/lib/buildid.c
> @@ -5,6 +5,7 @@
>   #include <linux/elf.h>
>   #include <linux/kernel.h>
>   #include <linux/pagemap.h>
> +#include <linux/set_memory.h>
>   
>   #define BUILD_ID 3
>   
> @@ -74,7 +75,9 @@ static int freader_get_folio(struct freader *r, loff_t file_off)
>   		filemap_invalidate_unlock_shared(r->file->f_mapping);
>   	}
>   
> -	if (IS_ERR(r->folio) || !folio_test_uptodate(r->folio)) {
> +	if (IS_ERR(r->folio) ||
> +	    !kernel_page_present(&r->folio->page) ||
> +	    !folio_test_uptodate(r->folio)) {
>   		if (!IS_ERR(r->folio))
>   			folio_put(r->folio);
>   		r->folio = NULL;

As replied elsewhere, can't we take a look at the mapping?

We do the same thing in gup_fast_folio_allowed() where we check 
secretmem_mapping().
kernel test robot Oct. 17, 2024, 11:07 a.m. UTC | #5
Hi Andrii,

kernel test robot noticed the following build errors:

[auto build test ERROR on bpf/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Andrii-Nakryiko/lib-buildid-handle-memfd_secret-files-in-build_id_parse/20241017-061747
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git master
patch link:    https://lore.kernel.org/r/20241016221629.1043883-1-andrii%40kernel.org
patch subject: [PATCH v2 bpf] lib/buildid: handle memfd_secret() files in build_id_parse()
config: s390-allnoconfig (https://download.01.org/0day-ci/archive/20241017/202410171803.RRmMX9xL-lkp@intel.com/config)
compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project bfe84f7085d82d06d61c632a7bad1e692fd159e4)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241017/202410171803.RRmMX9xL-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202410171803.RRmMX9xL-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from lib/buildid.c:5:
   In file included from include/linux/elf.h:6:
   In file included from arch/s390/include/asm/elf.h:181:
   In file included from arch/s390/include/asm/mmu_context.h:11:
   In file included from arch/s390/include/asm/pgalloc.h:18:
   In file included from include/linux/mm.h:2213:
   include/linux/vmstat.h:518:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     518 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
>> lib/buildid.c:79:7: error: call to undeclared function 'kernel_page_present'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
      79 |             !kernel_page_present(&r->folio->page) ||
         |              ^
   1 warning and 1 error generated.


vim +/kernel_page_present +79 lib/buildid.c

    58	
    59	static int freader_get_folio(struct freader *r, loff_t file_off)
    60	{
    61		/* check if we can just reuse current folio */
    62		if (r->folio && file_off >= r->folio_off &&
    63		    file_off < r->folio_off + folio_size(r->folio))
    64			return 0;
    65	
    66		freader_put_folio(r);
    67	
    68		r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT);
    69	
    70		/* if sleeping is allowed, wait for the page, if necessary */
    71		if (r->may_fault && (IS_ERR(r->folio) || !folio_test_uptodate(r->folio))) {
    72			filemap_invalidate_lock_shared(r->file->f_mapping);
    73			r->folio = read_cache_folio(r->file->f_mapping, file_off >> PAGE_SHIFT,
    74						    NULL, r->file);
    75			filemap_invalidate_unlock_shared(r->file->f_mapping);
    76		}
    77	
    78		if (IS_ERR(r->folio) ||
  > 79		    !kernel_page_present(&r->folio->page) ||
    80		    !folio_test_uptodate(r->folio)) {
    81			if (!IS_ERR(r->folio))
    82				folio_put(r->folio);
    83			r->folio = NULL;
    84			return -EFAULT;
    85		}
    86	
    87		r->folio_off = folio_pos(r->folio);
    88		r->addr = kmap_local_folio(r->folio, 0);
    89	
    90		return 0;
    91	}
    92
kernel test robot Oct. 17, 2024, 11:59 a.m. UTC | #6
Hi Andrii,

kernel test robot noticed the following build errors:

[auto build test ERROR on bpf/master]

url:    https://github.com/intel-lab-lkp/linux/commits/Andrii-Nakryiko/lib-buildid-handle-memfd_secret-files-in-build_id_parse/20241017-061747
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git master
patch link:    https://lore.kernel.org/r/20241016221629.1043883-1-andrii%40kernel.org
patch subject: [PATCH v2 bpf] lib/buildid: handle memfd_secret() files in build_id_parse()
config: s390-allyesconfig (https://download.01.org/0day-ci/archive/20241017/202410171938.aMzLRpxM-lkp@intel.com/config)
compiler: s390-linux-gcc (GCC) 14.1.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241017/202410171938.aMzLRpxM-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202410171938.aMzLRpxM-lkp@intel.com/

All errors (new ones prefixed by >>):

   lib/buildid.c: In function 'freader_get_folio':
>> lib/buildid.c:79:14: error: implicit declaration of function 'kernel_page_present' [-Wimplicit-function-declaration]
      79 |             !kernel_page_present(&r->folio->page) ||
         |              ^~~~~~~~~~~~~~~~~~~


vim +/kernel_page_present +79 lib/buildid.c

    58	
    59	static int freader_get_folio(struct freader *r, loff_t file_off)
    60	{
    61		/* check if we can just reuse current folio */
    62		if (r->folio && file_off >= r->folio_off &&
    63		    file_off < r->folio_off + folio_size(r->folio))
    64			return 0;
    65	
    66		freader_put_folio(r);
    67	
    68		r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT);
    69	
    70		/* if sleeping is allowed, wait for the page, if necessary */
    71		if (r->may_fault && (IS_ERR(r->folio) || !folio_test_uptodate(r->folio))) {
    72			filemap_invalidate_lock_shared(r->file->f_mapping);
    73			r->folio = read_cache_folio(r->file->f_mapping, file_off >> PAGE_SHIFT,
    74						    NULL, r->file);
    75			filemap_invalidate_unlock_shared(r->file->f_mapping);
    76		}
    77	
    78		if (IS_ERR(r->folio) ||
  > 79		    !kernel_page_present(&r->folio->page) ||
    80		    !folio_test_uptodate(r->folio)) {
    81			if (!IS_ERR(r->folio))
    82				folio_put(r->folio);
    83			r->folio = NULL;
    84			return -EFAULT;
    85		}
    86	
    87		r->folio_off = folio_pos(r->folio);
    88		r->addr = kmap_local_folio(r->folio, 0);
    89	
    90		return 0;
    91	}
    92
Shakeel Butt Oct. 17, 2024, 4:35 p.m. UTC | #7
On Thu, Oct 17, 2024 at 11:18:34AM GMT, David Hildenbrand wrote:
> On 17.10.24 00:16, Andrii Nakryiko wrote:
> >  From memfd_secret(2) manpage:
> > 
> >    The memory areas backing the file created with memfd_secret(2) are
> >    visible only to the processes that have access to the file descriptor.
> >    The memory region is removed from the kernel page tables and only the
> >    page tables of the processes holding the file descriptor map the
> >    corresponding physical memory. (Thus, the pages in the region can't be
> >    accessed by the kernel itself, so that, for example, pointers to the
> >    region can't be passed to system calls.)
> > 
> > So folios backed by such secretmem files are not mapped into kernel
> > address space and shouldn't be accessed, in general.
> > 
> > To make this a bit more generic of a fix and prevent regression in the
> > future for similar special mappings, do a generic check of whether the
> > folio we got is mapped with kernel_page_present(), as suggested in [1].
> > This will handle secretmem, and any future special cases that use
> > a similar approach.
> > 
> > Original report and repro can be found in [0].
> > 
> >    [0] https://lore.kernel.org/bpf/ZwyG8Uro%2FSyTXAni@ly-workstation/
> >    [1] https://lore.kernel.org/bpf/CAJD7tkbpEMx-eC4A-z8Jm1ikrY_KJVjWO+mhhz1_fni4x+COKw@mail.gmail.com/
> > 
> > Reported-by: Yi Lai <yi1.lai@intel.com>
> > Suggested-by: Yosry Ahmed <yosryahmed@google.com>
> > Fixes: de3ec364c3c3 ("lib/buildid: add single folio-based file reader abstraction")
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > ---
> >   lib/buildid.c | 5 ++++-
> >   1 file changed, 4 insertions(+), 1 deletion(-)
> > 
> > diff --git a/lib/buildid.c b/lib/buildid.c
> > index 290641d92ac1..90df64fd64c1 100644
> > --- a/lib/buildid.c
> > +++ b/lib/buildid.c
> > @@ -5,6 +5,7 @@
> >   #include <linux/elf.h>
> >   #include <linux/kernel.h>
> >   #include <linux/pagemap.h>
> > +#include <linux/set_memory.h>
> >   #define BUILD_ID 3
> > @@ -74,7 +75,9 @@ static int freader_get_folio(struct freader *r, loff_t file_off)
> >   		filemap_invalidate_unlock_shared(r->file->f_mapping);
> >   	}
> > -	if (IS_ERR(r->folio) || !folio_test_uptodate(r->folio)) {
> > +	if (IS_ERR(r->folio) ||
> > +	    !kernel_page_present(&r->folio->page) ||
> > +	    !folio_test_uptodate(r->folio)) {
> >   		if (!IS_ERR(r->folio))
> >   			folio_put(r->folio);
> >   		r->folio = NULL;
> 
> As replied elsewhere, can't we take a look at the mapping?
> 
> We do the same thing in gup_fast_folio_allowed() where we check
> secretmem_mapping().

Responded on the v1 but I think we can go with v1 of this work as
whoever will be working on unmapping folios from direct map will need to
fix gup_fast_folio_allowed(), they can fix this code as well. Also it
seems like some arch don't have kernel_page_present() and builds are
failing.

Andrii, let's move forward with the v1 patch.

> 
> 
> -- 
> Cheers,
> 
> David / dhildenb
>
Andrii Nakryiko Oct. 17, 2024, 5:35 p.m. UTC | #8
On Thu, Oct 17, 2024 at 9:35 AM Shakeel Butt <shakeel.butt@linux.dev> wrote:
>
> On Thu, Oct 17, 2024 at 11:18:34AM GMT, David Hildenbrand wrote:
> > On 17.10.24 00:16, Andrii Nakryiko wrote:
> > >  From memfd_secret(2) manpage:
> > >
> > >    The memory areas backing the file created with memfd_secret(2) are
> > >    visible only to the processes that have access to the file descriptor.
> > >    The memory region is removed from the kernel page tables and only the
> > >    page tables of the processes holding the file descriptor map the
> > >    corresponding physical memory. (Thus, the pages in the region can't be
> > >    accessed by the kernel itself, so that, for example, pointers to the
> > >    region can't be passed to system calls.)
> > >
> > > So folios backed by such secretmem files are not mapped into kernel
> > > address space and shouldn't be accessed, in general.
> > >
> > > To make this a bit more generic of a fix and prevent regression in the
> > > future for similar special mappings, do a generic check of whether the
> > > folio we got is mapped with kernel_page_present(), as suggested in [1].
> > > This will handle secretmem, and any future special cases that use
> > > a similar approach.
> > >
> > > Original report and repro can be found in [0].
> > >
> > >    [0] https://lore.kernel.org/bpf/ZwyG8Uro%2FSyTXAni@ly-workstation/
> > >    [1] https://lore.kernel.org/bpf/CAJD7tkbpEMx-eC4A-z8Jm1ikrY_KJVjWO+mhhz1_fni4x+COKw@mail.gmail.com/
> > >
> > > Reported-by: Yi Lai <yi1.lai@intel.com>
> > > Suggested-by: Yosry Ahmed <yosryahmed@google.com>
> > > Fixes: de3ec364c3c3 ("lib/buildid: add single folio-based file reader abstraction")
> > > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > > ---
> > >   lib/buildid.c | 5 ++++-
> > >   1 file changed, 4 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/lib/buildid.c b/lib/buildid.c
> > > index 290641d92ac1..90df64fd64c1 100644
> > > --- a/lib/buildid.c
> > > +++ b/lib/buildid.c
> > > @@ -5,6 +5,7 @@
> > >   #include <linux/elf.h>
> > >   #include <linux/kernel.h>
> > >   #include <linux/pagemap.h>
> > > +#include <linux/set_memory.h>
> > >   #define BUILD_ID 3
> > > @@ -74,7 +75,9 @@ static int freader_get_folio(struct freader *r, loff_t file_off)
> > >             filemap_invalidate_unlock_shared(r->file->f_mapping);
> > >     }
> > > -   if (IS_ERR(r->folio) || !folio_test_uptodate(r->folio)) {
> > > +   if (IS_ERR(r->folio) ||
> > > +       !kernel_page_present(&r->folio->page) ||
> > > +       !folio_test_uptodate(r->folio)) {
> > >             if (!IS_ERR(r->folio))
> > >                     folio_put(r->folio);
> > >             r->folio = NULL;
> >
> > As replied elsewhere, can't we take a look at the mapping?
> >
> > We do the same thing in gup_fast_folio_allowed() where we check
> > secretmem_mapping().
>
> Responded on the v1 but I think we can go with v1 of this work as
> whoever will be working on unmapping folios from direct map will need to
> fix gup_fast_folio_allowed(), they can fix this code as well. Also it
> seems like some arch don't have kernel_page_present() and builds are
> failing.
>

Yeah, we are lucky that BPF CI tested s390x and caught this issue.

> Andrii, let's move forward with the v1 patch.

Let me post v3 based on v1 (checking for secretmem_mapping()), but
I'll change return code to -EFAULT, so in the future this can be
rolled into generic error handling code path with no change in error
code.

>
> >
> >
> > --
> > Cheers,
> >
> > David / dhildenb
> >
Heiko Carstens Oct. 17, 2024, 5:54 p.m. UTC | #9
On Thu, Oct 17, 2024 at 10:35:27AM -0700, Andrii Nakryiko wrote:
> On Thu, Oct 17, 2024 at 9:35 AM Shakeel Butt <shakeel.butt@linux.dev> wrote:
> > On Thu, Oct 17, 2024 at 11:18:34AM GMT, David Hildenbrand wrote:
> > > As replied elsewhere, can't we take a look at the mapping?
> > >
> > > We do the same thing in gup_fast_folio_allowed() where we check
> > > secretmem_mapping().
> >
> > Responded on the v1 but I think we can go with v1 of this work as
> > whoever will be working on unmapping folios from direct map will need to
> > fix gup_fast_folio_allowed(), they can fix this code as well. Also it
> > seems like some arch don't have kernel_page_present() and builds are
> > failing.
> >
> 
> Yeah, we are lucky that BPF CI tested s390x and caught this issue.
> 
> > Andrii, let's move forward with the v1 patch.
> 
> Let me post v3 based on v1 (checking for secretmem_mapping()), but
> I'll change return code to -EFAULT, so in the future this can be
> rolled into generic error handling code path with no change in error
> code.

Ok, I've seen that you don't need kernel_page_present() anymore, just
after I implemented it for s390. I guess I'll send the patch below
(with a different commit message) upstream anyway, just in case
somebody else comes up with a similar use case.

From b625edc35de64293b728b030c62f7aaa65c8627e Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Thu, 17 Oct 2024 19:41:07 +0200
Subject: [PATCH] s390/pageattr: Implement missing kernel_page_present()

kernel_page_present() was intentionally not implemented when adding
ARCH_HAS_SET_DIRECT_MAP support, since it was only used for suspend/resume
which is not supported anymore on s390.

However a new bpf use case now leads to a compile error specific to
s390. Implement kernel_page_present() to fix this.

Reported-by: Daniel Borkmann <daniel@iogearbox.net>
Closes: https://lore.kernel.org/all/045de961-ac69-40cc-b141-ab70ec9377ec@iogearbox.net
Fixes: 0490d6d7ba0a ("s390/mm: enable ARCH_HAS_SET_DIRECT_MAP")
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/set_memory.h |  1 +
 arch/s390/mm/pageattr.c            | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h
index 06fbabe2f66c..cb4cc0f59012 100644
--- a/arch/s390/include/asm/set_memory.h
+++ b/arch/s390/include/asm/set_memory.h
@@ -62,5 +62,6 @@ __SET_MEMORY_FUNC(set_memory_4k, SET_MEMORY_4K)
 
 int set_direct_map_invalid_noflush(struct page *page);
 int set_direct_map_default_noflush(struct page *page);
+bool kernel_page_present(struct page *page);
 
 #endif
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 5f805ad42d4c..aec9eb16b6f7 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -406,6 +406,21 @@ int set_direct_map_default_noflush(struct page *page)
 	return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF);
 }
 
+bool kernel_page_present(struct page *page)
+{
+	unsigned long addr;
+	unsigned int cc;
+
+	addr = (unsigned long)page_address(page);
+	asm volatile(
+		"	lra	%[addr],0(%[addr])\n"
+		"	ipm	%[cc]\n"
+		: [cc] "=d" (cc), [addr] "+a" (addr)
+		:
+		: "cc");
+	return (cc >> 28) == 0;
+}
+
 #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
 
 static void ipte_range(pte_t *pte, unsigned long address, int nr)
Andrii Nakryiko Oct. 17, 2024, 6:23 p.m. UTC | #10
On Thu, Oct 17, 2024 at 10:54 AM Heiko Carstens <hca@linux.ibm.com> wrote:
>
> On Thu, Oct 17, 2024 at 10:35:27AM -0700, Andrii Nakryiko wrote:
> > On Thu, Oct 17, 2024 at 9:35 AM Shakeel Butt <shakeel.butt@linux.dev> wrote:
> > > On Thu, Oct 17, 2024 at 11:18:34AM GMT, David Hildenbrand wrote:
> > > > As replied elsewhere, can't we take a look at the mapping?
> > > >
> > > > We do the same thing in gup_fast_folio_allowed() where we check
> > > > secretmem_mapping().
> > >
> > > Responded on the v1 but I think we can go with v1 of this work as
> > > whoever will be working on unmapping folios from direct map will need to
> > > fix gup_fast_folio_allowed(), they can fix this code as well. Also it
> > > seems like some arch don't have kernel_page_present() and builds are
> > > failing.
> > >
> >
> > Yeah, we are lucky that BPF CI tested s390x and caught this issue.
> >
> > > Andrii, let's move forward with the v1 patch.
> >
> > Let me post v3 based on v1 (checking for secretmem_mapping()), but
> > I'll change return code to -EFAULT, so in the future this can be
> > rolled into generic error handling code path with no change in error
> > code.
>
> Ok, I've seen that you don't need kernel_page_present() anymore, just
> after I implemented it for s390. I guess I'll send the patch below
> (with a different commit message) upstream anyway, just in case
> somebody else comes up with a similar use case.

Please do send a patch, yes. It's good to have complete implementation
of this API regardless. We can then switch to either
kernel_page_present() or an alternative approach mentioned in [0] by
David Hildenbrand, in the next release cycle, for instance. Thanks.

  [0] https://lore.kernel.org/all/c87a4ba0-b9c4-4044-b0c3-c1112601494f@redhat.com/

>
> From b625edc35de64293b728b030c62f7aaa65c8627e Mon Sep 17 00:00:00 2001
> From: Heiko Carstens <hca@linux.ibm.com>
> Date: Thu, 17 Oct 2024 19:41:07 +0200
> Subject: [PATCH] s390/pageattr: Implement missing kernel_page_present()
>
> kernel_page_present() was intentionally not implemented when adding
> ARCH_HAS_SET_DIRECT_MAP support, since it was only used for suspend/resume
> which is not supported anymore on s390.
>
> However a new bpf use case now leads to a compile error specific to
> s390. Implement kernel_page_present() to fix this.
>
> Reported-by: Daniel Borkmann <daniel@iogearbox.net>
> Closes: https://lore.kernel.org/all/045de961-ac69-40cc-b141-ab70ec9377ec@iogearbox.net
> Fixes: 0490d6d7ba0a ("s390/mm: enable ARCH_HAS_SET_DIRECT_MAP")
> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
> ---
>  arch/s390/include/asm/set_memory.h |  1 +
>  arch/s390/mm/pageattr.c            | 15 +++++++++++++++
>  2 files changed, 16 insertions(+)
>
> diff --git a/arch/s390/include/asm/set_memory.h b/arch/s390/include/asm/set_memory.h
> index 06fbabe2f66c..cb4cc0f59012 100644
> --- a/arch/s390/include/asm/set_memory.h
> +++ b/arch/s390/include/asm/set_memory.h
> @@ -62,5 +62,6 @@ __SET_MEMORY_FUNC(set_memory_4k, SET_MEMORY_4K)
>
>  int set_direct_map_invalid_noflush(struct page *page);
>  int set_direct_map_default_noflush(struct page *page);
> +bool kernel_page_present(struct page *page);
>
>  #endif
> diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
> index 5f805ad42d4c..aec9eb16b6f7 100644
> --- a/arch/s390/mm/pageattr.c
> +++ b/arch/s390/mm/pageattr.c
> @@ -406,6 +406,21 @@ int set_direct_map_default_noflush(struct page *page)
>         return __set_memory((unsigned long)page_to_virt(page), 1, SET_MEMORY_DEF);
>  }
>
> +bool kernel_page_present(struct page *page)
> +{
> +       unsigned long addr;
> +       unsigned int cc;
> +
> +       addr = (unsigned long)page_address(page);
> +       asm volatile(
> +               "       lra     %[addr],0(%[addr])\n"
> +               "       ipm     %[cc]\n"
> +               : [cc] "=d" (cc), [addr] "+a" (addr)
> +               :
> +               : "cc");
> +       return (cc >> 28) == 0;
> +}
> +
>  #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
>
>  static void ipte_range(pte_t *pte, unsigned long address, int nr)
> --
> 2.45.2
>
diff mbox series

Patch

diff --git a/lib/buildid.c b/lib/buildid.c
index 290641d92ac1..90df64fd64c1 100644
--- a/lib/buildid.c
+++ b/lib/buildid.c
@@ -5,6 +5,7 @@ 
 #include <linux/elf.h>
 #include <linux/kernel.h>
 #include <linux/pagemap.h>
+#include <linux/set_memory.h>
 
 #define BUILD_ID 3
 
@@ -74,7 +75,9 @@  static int freader_get_folio(struct freader *r, loff_t file_off)
 		filemap_invalidate_unlock_shared(r->file->f_mapping);
 	}
 
-	if (IS_ERR(r->folio) || !folio_test_uptodate(r->folio)) {
+	if (IS_ERR(r->folio) ||
+	    !kernel_page_present(&r->folio->page) ||
+	    !folio_test_uptodate(r->folio)) {
 		if (!IS_ERR(r->folio))
 			folio_put(r->folio);
 		r->folio = NULL;