diff mbox series

[RFC,bpf-next,2/4] bpf: Add three kfunc helpers for bpf fs inode iterator

Message ID 20230507040107.3755166-3-houtao@huaweicloud.com (mailing list archive)
State RFC
Delegated to: BPF
Headers show
Series Introduce bpf iterators for file-system | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/apply fail Patch does not apply to bpf-next
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain_full }}
bpf/vmtest-bpf-next-VM_Test-2 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 fail Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 fail Logs for build for aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-5 fail Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-9 success Logs for veristat

Commit Message

Hou Tao May 7, 2023, 4:01 a.m. UTC
From: Hou Tao <houtao1@huawei.com>

Add kfunc helpers for bpf fs inode iterator to inspect the details of
inode page cache:
1) bpf_filemap_cachestat. Basically copied from cachestat patchset by
   Nhat Pham [0]. It returns the number of cached page, dirty pages and
   writeback pages in the passed inode.
2) bpf_filemap_find_present & bpf_filemap_get_order. These two helpers
   are used to find the order of the present folios in page cache.

The following is the output from bpf selftest when trying to show the
cached status and folios order of a xfs inode:

  sb: bsize 4096 s_op xfs_super_operations s_type xfs_fs_type name xfs
  ino: inode nlink 1 inum 131 size 10485760, name inode.test
  cache: cached 2560 dirty 0 wb 0 evicted 0
  orders:
    page offset 0 order 2
    page offset 4 order 2
    page offset 8 order 2
    page offset 12 order 2
    page offset 16 order 4
    page offset 32 order 4
    page offset 48 order 4
    page offset 64 order 5
    page offset 96 order 4
    page offset 112 order 4
    ......

[0]: https://lore.kernel.org/linux-mm/20230503013608.2431726-1-nphamcs@gmail.com/T/#t

Signed-off-by: Hou Tao <houtao1@huawei.com>
---
 include/linux/fs.h        |  4 ++
 include/uapi/linux/mman.h |  8 ++++
 kernel/bpf/helpers.c      | 26 +++++++++++++
 mm/filemap.c              | 77 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 115 insertions(+)
diff mbox series

Patch

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 67495ef79bb2..5ce17e87c4f6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -46,6 +46,7 @@ 
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
+#include <uapi/linux/mman.h>
 
 struct backing_dev_info;
 struct bdi_writeback;
@@ -3191,4 +3192,7 @@  extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
 extern int generic_fadvise(struct file *file, loff_t offset, loff_t len,
 			   int advice);
 
+extern void filemap_cachestat(struct address_space *mapping, pgoff_t first_index,
+			      pgoff_t last_index, struct cachestat *cs);
+
 #endif /* _LINUX_FS_H */
diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h
index f55bc680b5b0..6e9aa23aa124 100644
--- a/include/uapi/linux/mman.h
+++ b/include/uapi/linux/mman.h
@@ -4,6 +4,7 @@ 
 
 #include <asm/mman.h>
 #include <asm-generic/hugetlb_encode.h>
+#include <linux/types.h>
 
 #define MREMAP_MAYMOVE		1
 #define MREMAP_FIXED		2
@@ -41,4 +42,11 @@ 
 #define MAP_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
 #define MAP_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB
 
+struct cachestat {
+	__u64 nr_cache;
+	__u64 nr_dirty;
+	__u64 nr_writeback;
+	__u64 nr_evicted;
+};
+
 #endif /* _UAPI_LINUX_MMAN_H */
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index bb6b4637ebf2..95174d1ef5bb 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -22,6 +22,7 @@ 
 #include <linux/security.h>
 #include <linux/btf_ids.h>
 #include <linux/bpf_mem_alloc.h>
+#include <uapi/linux/mman.h>
 
 #include "../../lib/kstrtox.h"
 
@@ -2170,6 +2171,27 @@  __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
 	return p;
 }
 
+__bpf_kfunc void bpf_filemap_cachestat(struct inode *inode, unsigned long from,
+				       unsigned long last, struct cachestat *cs)
+{
+	filemap_cachestat(inode->i_mapping, from, last, cs);
+}
+
+__bpf_kfunc long bpf_filemap_find_present(struct inode *inode, unsigned long from,
+					  unsigned long last)
+{
+	unsigned long index = from;
+
+	if (!xa_find(&inode->i_mapping->i_pages, &index, last, XA_PRESENT))
+		return ULONG_MAX;
+	return index;
+}
+
+__bpf_kfunc long bpf_filemap_get_order(struct inode *inode, unsigned long index)
+{
+	return xa_get_order(&inode->i_mapping->i_pages, index);
+}
+
 /**
  * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data.
  * @ptr: The dynptr whose data slice to retrieve
@@ -2402,6 +2424,10 @@  BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
 #endif
 BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
+/* TODO: KF_TRUSTED_ARGS is missing */
+BTF_ID_FLAGS(func, bpf_filemap_cachestat);
+BTF_ID_FLAGS(func, bpf_filemap_find_present);
+BTF_ID_FLAGS(func, bpf_filemap_get_order);
 BTF_SET8_END(generic_btf_ids)
 
 static const struct btf_kfunc_id_set generic_kfunc_set = {
diff --git a/mm/filemap.c b/mm/filemap.c
index 2723104cc06a..fc63a02a9b0d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -4122,3 +4122,80 @@  bool filemap_release_folio(struct folio *folio, gfp_t gfp)
 	return try_to_free_buffers(folio);
 }
 EXPORT_SYMBOL(filemap_release_folio);
+
+/**
+ * filemap_cachestat() - compute the page cache statistics of a mapping
+ * @mapping:	The mapping to compute the statistics for.
+ * @first_index:	The starting page cache index.
+ * @last_index:	The final page index (inclusive).
+ * @cs:	the cachestat struct to write the result to.
+ *
+ * This will query the page cache statistics of a mapping in the
+ * page range of [first_index, last_index] (inclusive). The statistics
+ * queried include: number of dirty pages, number of pages marked for
+ * writeback, and the number of (recently) evicted pages.
+ */
+void filemap_cachestat(struct address_space *mapping, pgoff_t first_index,
+		       pgoff_t last_index, struct cachestat *cs)
+{
+	XA_STATE(xas, &mapping->i_pages, first_index);
+	struct folio *folio;
+
+	rcu_read_lock();
+	xas_for_each(&xas, folio, last_index) {
+		unsigned long nr_pages;
+		pgoff_t folio_first_index, folio_last_index;
+
+		if (xas_retry(&xas, folio))
+			continue;
+
+		if (xa_is_value(folio)) {
+			/* page is evicted */
+			void *shadow = (void *)folio;
+			bool workingset; /* not used */
+			int order = xa_get_order(xas.xa, xas.xa_index);
+
+			nr_pages = 1 << order;
+			/* rounds down to the nearest multiple of 2^order */
+			folio_first_index = xas.xa_index >> order << order;
+			folio_last_index = folio_first_index + nr_pages - 1;
+
+			/* Folios might straddle the range boundaries, only count covered pages */
+			if (folio_first_index < first_index)
+				nr_pages -= first_index - folio_first_index;
+
+			if (folio_last_index > last_index)
+				nr_pages -= folio_last_index - last_index;
+
+			cs->nr_evicted += nr_pages;
+			goto resched;
+		}
+
+		nr_pages = folio_nr_pages(folio);
+		folio_first_index = folio_pgoff(folio);
+		folio_last_index = folio_first_index + nr_pages - 1;
+
+		/* Folios might straddle the range boundaries, only count covered pages */
+		if (folio_first_index < first_index)
+			nr_pages -= first_index - folio_first_index;
+
+		if (folio_last_index > last_index)
+			nr_pages -= folio_last_index - last_index;
+
+		/* page is in cache */
+		cs->nr_cache += nr_pages;
+
+		if (folio_test_dirty(folio))
+			cs->nr_dirty += nr_pages;
+
+		if (folio_test_writeback(folio))
+			cs->nr_writeback += nr_pages;
+
+resched:
+		if (need_resched()) {
+			xas_pause(&xas);
+			cond_resched_rcu();
+		}
+	}
+	rcu_read_unlock();
+}