Message ID | 20210601082241.13378-1-zhoufeng.zf@bytedance.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] fs/proc/kcore.c: add mmap interface | expand |
On Tue, 1 Jun 2021 16:22:41 +0800 Feng zhou <zhoufeng.zf@bytedance.com> wrote: > From: ZHOUFENG <zhoufeng.zf@bytedance.com> > > When we do the kernel monitor, use the DRGN > (https://github.com/osandov/drgn) access to kernel data structures, > found that the system calls a lot. DRGN is implemented by reading > /proc/kcore. After looking at the kcore code, it is found that kcore > does not implement mmap, resulting in frequent context switching > triggered by read. Therefore, we want to add mmap interface to optimize > performance. Since vmalloc and module areas will change with allocation > and release, consistency cannot be guaranteed, so mmap interface only > maps KCORE_TEXT and KCORE_RAM. > > ... > > +static int mmap_kcore(struct file *file, struct vm_area_struct *vma) > +{ > + size_t size = vma->vm_end - vma->vm_start; > + u64 start, pfn; > + int nphdr; > + size_t data_offset; > + size_t phdrs_len, notes_len; > + struct kcore_list *m = NULL; > + int ret = 0; > + > + down_read(&kclist_lock); > + > + get_kcore_size(&nphdr, &phdrs_len, ¬es_len, &data_offset); > + > + start = kc_offset_to_vaddr(((u64)vma->vm_pgoff << PAGE_SHIFT) - > + ((data_offset >> PAGE_SHIFT) << PAGE_SHIFT)); > + > + list_for_each_entry(m, &kclist_head, list) { > + if (start >= m->addr && size <= m->size) > + break; > + } > + > + if (&m->list == &kclist_head) { > + ret = -EINVAL; > + goto out; > + } > + > + if (vma->vm_flags & (VM_WRITE | VM_EXEC)) { > + ret = -EPERM; > + goto out; > + } > + > + vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); > + vma->vm_flags |= VM_MIXEDMAP; > + vma->vm_ops = &kcore_mmap_ops; > + > + if (kern_addr_valid(start)) { > + if (m->type == KCORE_RAM || m->type == KCORE_REMAP) KCORE_REMAP was removed by https://lkml.kernel.org/r/20210526093041.8800-2-david@redhat.com I did this: --- a/fs/proc/kcore.c~fs-proc-kcorec-add-mmap-interface-fix +++ a/fs/proc/kcore.c @@ -660,7 +660,7 @@ static int mmap_kcore(struct file *file, vma->vm_ops = &kcore_mmap_ops; if (kern_addr_valid(start)) { - if (m->type == KCORE_RAM || m->type == KCORE_REMAP) + if (m->type == KCORE_RAM) pfn = __pa(start) >> PAGE_SHIFT; else if (m->type == KCORE_TEXT) pfn = __pa_symbol(start) >> PAGE_SHIFT;
在 2021/6/2 上午10:22, Andrew Morton 写道: > On Tue, 1 Jun 2021 16:22:41 +0800 Feng zhou <zhoufeng.zf@bytedance.com> wrote: > >> From: ZHOUFENG <zhoufeng.zf@bytedance.com> >> >> When we do the kernel monitor, use the DRGN >> (https://github.com/osandov/drgn) access to kernel data structures, >> found that the system calls a lot. DRGN is implemented by reading >> /proc/kcore. After looking at the kcore code, it is found that kcore >> does not implement mmap, resulting in frequent context switching >> triggered by read. Therefore, we want to add mmap interface to optimize >> performance. Since vmalloc and module areas will change with allocation >> and release, consistency cannot be guaranteed, so mmap interface only >> maps KCORE_TEXT and KCORE_RAM. >> >> ... >> >> +static int mmap_kcore(struct file *file, struct vm_area_struct *vma) >> +{ >> + size_t size = vma->vm_end - vma->vm_start; >> + u64 start, pfn; >> + int nphdr; >> + size_t data_offset; >> + size_t phdrs_len, notes_len; >> + struct kcore_list *m = NULL; >> + int ret = 0; >> + >> + down_read(&kclist_lock); >> + >> + get_kcore_size(&nphdr, &phdrs_len, ¬es_len, &data_offset); >> + >> + start = kc_offset_to_vaddr(((u64)vma->vm_pgoff << PAGE_SHIFT) - >> + ((data_offset >> PAGE_SHIFT) << PAGE_SHIFT)); >> + >> + list_for_each_entry(m, &kclist_head, list) { >> + if (start >= m->addr && size <= m->size) >> + break; >> + } >> + >> + if (&m->list == &kclist_head) { >> + ret = -EINVAL; >> + goto out; >> + } >> + >> + if (vma->vm_flags & (VM_WRITE | VM_EXEC)) { >> + ret = -EPERM; >> + goto out; >> + } >> + >> + vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); >> + vma->vm_flags |= VM_MIXEDMAP; >> + vma->vm_ops = &kcore_mmap_ops; >> + >> + if (kern_addr_valid(start)) { >> + if (m->type == KCORE_RAM || m->type == KCORE_REMAP) > > KCORE_REMAP was removed by > https://lkml.kernel.org/r/20210526093041.8800-2-david@redhat.com > > I did this: > > --- a/fs/proc/kcore.c~fs-proc-kcorec-add-mmap-interface-fix > +++ a/fs/proc/kcore.c > @@ -660,7 +660,7 @@ static int mmap_kcore(struct file *file, > vma->vm_ops = &kcore_mmap_ops; > > if (kern_addr_valid(start)) { > - if (m->type == KCORE_RAM || m->type == KCORE_REMAP) > + if (m->type == KCORE_RAM) > pfn = __pa(start) >> PAGE_SHIFT; > else if (m->type == KCORE_TEXT) > pfn = __pa_symbol(start) >> PAGE_SHIFT; > Thank you very much.
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 4d2e64e9016c..91b19f63a298 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -573,11 +573,78 @@ static int release_kcore(struct inode *inode, struct file *file) return 0; } +static vm_fault_t mmap_kcore_fault(struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +static const struct vm_operations_struct kcore_mmap_ops = { + .fault = mmap_kcore_fault, +}; + +static int mmap_kcore(struct file *file, struct vm_area_struct *vma) +{ + size_t size = vma->vm_end - vma->vm_start; + u64 start, pfn; + int nphdr; + size_t data_offset; + size_t phdrs_len, notes_len; + struct kcore_list *m = NULL; + int ret = 0; + + down_read(&kclist_lock); + + get_kcore_size(&nphdr, &phdrs_len, ¬es_len, &data_offset); + + start = kc_offset_to_vaddr(((u64)vma->vm_pgoff << PAGE_SHIFT) - + ((data_offset >> PAGE_SHIFT) << PAGE_SHIFT)); + + list_for_each_entry(m, &kclist_head, list) { + if (start >= m->addr && size <= m->size) + break; + } + + if (&m->list == &kclist_head) { + ret = -EINVAL; + goto out; + } + + if (vma->vm_flags & (VM_WRITE | VM_EXEC)) { + ret = -EPERM; + goto out; + } + + vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC); + vma->vm_flags |= VM_MIXEDMAP; + vma->vm_ops = &kcore_mmap_ops; + + if (kern_addr_valid(start)) { + if (m->type == KCORE_RAM || m->type == KCORE_REMAP) + pfn = __pa(start) >> PAGE_SHIFT; + else if (m->type == KCORE_TEXT) + pfn = __pa_symbol(start) >> PAGE_SHIFT; + else { + ret = -EFAULT; + goto out; + } + + ret = remap_pfn_range(vma, vma->vm_start, pfn, size, + vma->vm_page_prot); + } else { + ret = -EFAULT; + } + +out: + up_read(&kclist_lock); + return ret; +} + static const struct proc_ops kcore_proc_ops = { .proc_read = read_kcore, .proc_open = open_kcore, .proc_release = release_kcore, .proc_lseek = default_llseek, + .proc_mmap = mmap_kcore, }; /* just remember that we have to update kcore */