diff mbox series

[v2] fs/proc/kcore.c: add mmap interface

Message ID 20210601082241.13378-1-zhoufeng.zf@bytedance.com (mailing list archive)
State New, archived
Headers show
Series [v2] fs/proc/kcore.c: add mmap interface | expand

Commit Message

Feng Zhou June 1, 2021, 8:22 a.m. UTC
From: ZHOUFENG <zhoufeng.zf@bytedance.com>

When we do the kernel monitor, use the DRGN
(https://github.com/osandov/drgn) access to kernel data structures,
found that the system calls a lot. DRGN is implemented by reading
/proc/kcore. After looking at the kcore code, it is found that kcore
does not implement mmap, resulting in frequent context switching
triggered by read. Therefore, we want to add mmap interface to optimize
performance. Since vmalloc and module areas will change with allocation
and release, consistency cannot be guaranteed, so mmap interface only
maps KCORE_TEXT and KCORE_RAM.

The test results:
1. the default version of kcore
real 11.00
user 8.53
sys 3.59

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
99.64  128.578319          12  11168701           pread64
...
------ ----------- ----------- --------- --------- ----------------
100.00  129.042853              11193748       966 total

2. added kcore for the mmap interface
real 6.44
user 7.32
sys 0.24

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
32.94    0.130120          24      5317       315 futex
11.66    0.046077          21      2231         1 lstat
 9.23    0.036449         177       206           mmap
...
------ ----------- ----------- --------- --------- ----------------
100.00    0.395077                 25435       971 total

The test results show that the number of system calls and time
consumption are significantly reduced.

Thanks to Andrew Morton for your advice.

Co-developed-by: CHENYING <chenying.kernel@bytedance.com>
Signed-off-by: CHENYING <chenying.kernel@bytedance.com>
Signed-off-by: ZHOUFENG <zhoufeng.zf@bytedance.com>
---
Updates since v1:
- Replace EAGAIN with the return value of remap_pfn_range(). more details
can be seen from here:
https://lore.kernel.org/patchwork/patch/1436352/

 fs/proc/kcore.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

Comments

Andrew Morton June 2, 2021, 2:22 a.m. UTC | #1
On Tue,  1 Jun 2021 16:22:41 +0800 Feng zhou <zhoufeng.zf@bytedance.com> wrote:

> From: ZHOUFENG <zhoufeng.zf@bytedance.com>
> 
> When we do the kernel monitor, use the DRGN
> (https://github.com/osandov/drgn) access to kernel data structures,
> found that the system calls a lot. DRGN is implemented by reading
> /proc/kcore. After looking at the kcore code, it is found that kcore
> does not implement mmap, resulting in frequent context switching
> triggered by read. Therefore, we want to add mmap interface to optimize
> performance. Since vmalloc and module areas will change with allocation
> and release, consistency cannot be guaranteed, so mmap interface only
> maps KCORE_TEXT and KCORE_RAM.
> 
> ...
>
> +static int mmap_kcore(struct file *file, struct vm_area_struct *vma)
> +{
> +	size_t size = vma->vm_end - vma->vm_start;
> +	u64 start, pfn;
> +	int nphdr;
> +	size_t data_offset;
> +	size_t phdrs_len, notes_len;
> +	struct kcore_list *m = NULL;
> +	int ret = 0;
> +
> +	down_read(&kclist_lock);
> +
> +	get_kcore_size(&nphdr, &phdrs_len, &notes_len, &data_offset);
> +
> +	start = kc_offset_to_vaddr(((u64)vma->vm_pgoff << PAGE_SHIFT) -
> +		((data_offset >> PAGE_SHIFT) << PAGE_SHIFT));
> +
> +	list_for_each_entry(m, &kclist_head, list) {
> +		if (start >= m->addr && size <= m->size)
> +			break;
> +	}
> +
> +	if (&m->list == &kclist_head) {
> +		ret = -EINVAL;
> +		goto out;
> +	}
> +
> +	if (vma->vm_flags & (VM_WRITE | VM_EXEC)) {
> +		ret = -EPERM;
> +		goto out;
> +	}
> +
> +	vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
> +	vma->vm_flags |= VM_MIXEDMAP;
> +	vma->vm_ops = &kcore_mmap_ops;
> +
> +	if (kern_addr_valid(start)) {
> +		if (m->type == KCORE_RAM || m->type == KCORE_REMAP)

KCORE_REMAP was removed by
https://lkml.kernel.org/r/20210526093041.8800-2-david@redhat.com

I did this:

--- a/fs/proc/kcore.c~fs-proc-kcorec-add-mmap-interface-fix
+++ a/fs/proc/kcore.c
@@ -660,7 +660,7 @@ static int mmap_kcore(struct file *file,
 	vma->vm_ops = &kcore_mmap_ops;
 
 	if (kern_addr_valid(start)) {
-		if (m->type == KCORE_RAM || m->type == KCORE_REMAP)
+		if (m->type == KCORE_RAM)
 			pfn = __pa(start) >> PAGE_SHIFT;
 		else if (m->type == KCORE_TEXT)
 			pfn = __pa_symbol(start) >> PAGE_SHIFT;
Feng Zhou June 2, 2021, 2:31 a.m. UTC | #2
在 2021/6/2 上午10:22, Andrew Morton 写道:
> On Tue,  1 Jun 2021 16:22:41 +0800 Feng zhou <zhoufeng.zf@bytedance.com> wrote:
> 
>> From: ZHOUFENG <zhoufeng.zf@bytedance.com>
>>
>> When we do the kernel monitor, use the DRGN
>> (https://github.com/osandov/drgn) access to kernel data structures,
>> found that the system calls a lot. DRGN is implemented by reading
>> /proc/kcore. After looking at the kcore code, it is found that kcore
>> does not implement mmap, resulting in frequent context switching
>> triggered by read. Therefore, we want to add mmap interface to optimize
>> performance. Since vmalloc and module areas will change with allocation
>> and release, consistency cannot be guaranteed, so mmap interface only
>> maps KCORE_TEXT and KCORE_RAM.
>>
>> ...
>>
>> +static int mmap_kcore(struct file *file, struct vm_area_struct *vma)
>> +{
>> +	size_t size = vma->vm_end - vma->vm_start;
>> +	u64 start, pfn;
>> +	int nphdr;
>> +	size_t data_offset;
>> +	size_t phdrs_len, notes_len;
>> +	struct kcore_list *m = NULL;
>> +	int ret = 0;
>> +
>> +	down_read(&kclist_lock);
>> +
>> +	get_kcore_size(&nphdr, &phdrs_len, &notes_len, &data_offset);
>> +
>> +	start = kc_offset_to_vaddr(((u64)vma->vm_pgoff << PAGE_SHIFT) -
>> +		((data_offset >> PAGE_SHIFT) << PAGE_SHIFT));
>> +
>> +	list_for_each_entry(m, &kclist_head, list) {
>> +		if (start >= m->addr && size <= m->size)
>> +			break;
>> +	}
>> +
>> +	if (&m->list == &kclist_head) {
>> +		ret = -EINVAL;
>> +		goto out;
>> +	}
>> +
>> +	if (vma->vm_flags & (VM_WRITE | VM_EXEC)) {
>> +		ret = -EPERM;
>> +		goto out;
>> +	}
>> +
>> +	vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
>> +	vma->vm_flags |= VM_MIXEDMAP;
>> +	vma->vm_ops = &kcore_mmap_ops;
>> +
>> +	if (kern_addr_valid(start)) {
>> +		if (m->type == KCORE_RAM || m->type == KCORE_REMAP)
> 
> KCORE_REMAP was removed by
> https://lkml.kernel.org/r/20210526093041.8800-2-david@redhat.com
> 
> I did this:
> 
> --- a/fs/proc/kcore.c~fs-proc-kcorec-add-mmap-interface-fix
> +++ a/fs/proc/kcore.c
> @@ -660,7 +660,7 @@ static int mmap_kcore(struct file *file,
>   	vma->vm_ops = &kcore_mmap_ops;
>   
>   	if (kern_addr_valid(start)) {
> -		if (m->type == KCORE_RAM || m->type == KCORE_REMAP)
> +		if (m->type == KCORE_RAM)
>   			pfn = __pa(start) >> PAGE_SHIFT;
>   		else if (m->type == KCORE_TEXT)
>   			pfn = __pa_symbol(start) >> PAGE_SHIFT;
> 

   Thank you very much.
diff mbox series

Patch

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 4d2e64e9016c..91b19f63a298 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -573,11 +573,78 @@  static int release_kcore(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static vm_fault_t mmap_kcore_fault(struct vm_fault *vmf)
+{
+	return VM_FAULT_SIGBUS;
+}
+
+static const struct vm_operations_struct kcore_mmap_ops = {
+	.fault = mmap_kcore_fault,
+};
+
+static int mmap_kcore(struct file *file, struct vm_area_struct *vma)
+{
+	size_t size = vma->vm_end - vma->vm_start;
+	u64 start, pfn;
+	int nphdr;
+	size_t data_offset;
+	size_t phdrs_len, notes_len;
+	struct kcore_list *m = NULL;
+	int ret = 0;
+
+	down_read(&kclist_lock);
+
+	get_kcore_size(&nphdr, &phdrs_len, &notes_len, &data_offset);
+
+	start = kc_offset_to_vaddr(((u64)vma->vm_pgoff << PAGE_SHIFT) -
+		((data_offset >> PAGE_SHIFT) << PAGE_SHIFT));
+
+	list_for_each_entry(m, &kclist_head, list) {
+		if (start >= m->addr && size <= m->size)
+			break;
+	}
+
+	if (&m->list == &kclist_head) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (vma->vm_flags & (VM_WRITE | VM_EXEC)) {
+		ret = -EPERM;
+		goto out;
+	}
+
+	vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
+	vma->vm_flags |= VM_MIXEDMAP;
+	vma->vm_ops = &kcore_mmap_ops;
+
+	if (kern_addr_valid(start)) {
+		if (m->type == KCORE_RAM || m->type == KCORE_REMAP)
+			pfn = __pa(start) >> PAGE_SHIFT;
+		else if (m->type == KCORE_TEXT)
+			pfn = __pa_symbol(start) >> PAGE_SHIFT;
+		else {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		ret = remap_pfn_range(vma, vma->vm_start, pfn, size,
+				vma->vm_page_prot);
+	} else {
+		ret = -EFAULT;
+	}
+
+out:
+	up_read(&kclist_lock);
+	return ret;
+}
+
 static const struct proc_ops kcore_proc_ops = {
 	.proc_read	= read_kcore,
 	.proc_open	= open_kcore,
 	.proc_release	= release_kcore,
 	.proc_lseek	= default_llseek,
+	.proc_mmap	= mmap_kcore,
 };
 
 /* just remember that we have to update kcore */