Message ID | 20190912231820.590276-1-lucian@fb.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm: memory: fix /proc/meminfo reporting for MLOCK_ONFAULT | expand |
On Fri, Sep 13, 2019 at 4:49 AM Lucian Adrian Grijincu <lucian@fb.com> wrote: > > As pages are faulted in MLOCK_ONFAULT correctly updates > /proc/self/smaps, but doesn't update /proc/meminfo's Mlocked field. > > - Before this /proc/meminfo fields didn't change as pages were faulted in: > > ``` > = Start = > /proc/meminfo > Unevictable: 10128 kB > Mlocked: 10132 kB > = Creating testfile = > > = after mlock2(MLOCK_ONFAULT) = > /proc/meminfo > Unevictable: 10128 kB > Mlocked: 10132 kB > /proc/self/smaps > 7f8714000000-7f8754000000 rw-s 00000000 08:04 50857050 /root/testfile > Locked: 0 kB > > = after reading half of the file = > /proc/meminfo > Unevictable: 10128 kB > Mlocked: 10132 kB > /proc/self/smaps > 7f8714000000-7f8754000000 rw-s 00000000 08:04 50857050 /root/testfile > Locked: 524288 kB > > = after reading the entire the file = > /proc/meminfo > Unevictable: 10128 kB > Mlocked: 10132 kB > /proc/self/smaps > 7f8714000000-7f8754000000 rw-s 00000000 08:04 50857050 /root/testfile > Locked: 1048576 kB > > = after munmap = > /proc/meminfo > Unevictable: 10128 kB > Mlocked: 10132 kB > /proc/self/smaps > ``` > > - After: /proc/meminfo fields are properly updated as pages are touched: > > ``` > = Start = > /proc/meminfo > Unevictable: 60 kB > Mlocked: 60 kB > = Creating testfile = > > = after mlock2(MLOCK_ONFAULT) = > /proc/meminfo > Unevictable: 60 kB > Mlocked: 60 kB > /proc/self/smaps > 7f2b9c600000-7f2bdc600000 rw-s 00000000 08:04 63045798 /root/testfile > Locked: 0 kB > > = after reading half of the file = > /proc/meminfo > Unevictable: 524220 kB > Mlocked: 524220 kB > /proc/self/smaps > 7f2b9c600000-7f2bdc600000 rw-s 00000000 08:04 63045798 /root/testfile > Locked: 524288 kB > > = after reading the entire the file = > /proc/meminfo > Unevictable: 1048496 kB > Mlocked: 1048508 kB > /proc/self/smaps > 7f2b9c600000-7f2bdc600000 rw-s 00000000 08:04 63045798 /root/testfile > Locked: 1048576 kB > > = after munmap = > /proc/meminfo > Unevictable: 176 kB > Mlocked: 60 kB > /proc/self/smaps > ``` > > Repro code. > --- > > int mlock2wrap(const void* addr, size_t len, int flags) { > return syscall(SYS_mlock2, addr, len, flags); > } > > void smaps() { > char smapscmd[1000]; > snprintf( > smapscmd, > sizeof(smapscmd) - 1, > "grep testfile -A 20 /proc/%d/smaps | grep -E '(testfile|Locked)'", > getpid()); > printf("/proc/self/smaps\n"); > fflush(stdout); > system(smapscmd); > } > > void meminfo() { > const char* meminfocmd = "grep -E '(Mlocked|Unevictable)' /proc/meminfo"; > printf("/proc/meminfo\n"); > fflush(stdout); > system(meminfocmd); > } > > { \ > int rc = (call); \ > if (rc != 0) { \ > printf("error %d %s\n", rc, strerror(errno)); \ > exit(1); \ > } \ > } > int main(int argc, char* argv[]) { > printf("= Start =\n"); > meminfo(); > > printf("= Creating testfile =\n"); > size_t size = 1 << 30; // 1 GiB > int fd = open("testfile", O_CREAT | O_RDWR, 0666); > { > void* buf = malloc(size); > write(fd, buf, size); > free(buf); > } > int ret = 0; > void* addr = NULL; > addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); > > if (argc > 1) { > PCHECK(mlock2wrap(addr, size, MLOCK_ONFAULT)); > printf("= after mlock2(MLOCK_ONFAULT) =\n"); > meminfo(); > smaps(); > > for (size_t i = 0; i < size / 2; i += 4096) { > ret += ((char*)addr)[i]; > } > printf("= after reading half of the file =\n"); > meminfo(); > smaps(); > > for (size_t i = 0; i < size; i += 4096) { > ret += ((char*)addr)[i]; > } > printf("= after reading the entire the file =\n"); > meminfo(); > smaps(); > > } else { > PCHECK(mlock(addr, size)); > printf("= after mlock =\n"); > meminfo(); > smaps(); > } > > PCHECK(munmap(addr, size)); > printf("= after munmap =\n"); > meminfo(); > smaps(); > > return ret; > } > > --- > > Signed-off-by: Lucian Adrian Grijincu <lucian@fb.com> > --- > mm/memory.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/mm/memory.c b/mm/memory.c > index e0c232fe81d9..7e8dc3ed4e89 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -3311,6 +3311,9 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, > } else { > inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); > page_add_file_rmap(page, false); > + if ((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED && > + !PageTransCompound(page)) Do we need to check against VM_SPECIAL ? > + mlock_vma_page(page); > } > set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry); > > -- > 2.17.1 > >
On 9/13/19, 04:18, "Souptick Joarder" <jrdr.linux@gmail.com> wrote: On Fri, Sep 13, 2019 at 4:49 AM Lucian Adrian Grijincu <lucian@fb.com> wrote: > > As pages are faulted in MLOCK_ONFAULT correctly updates > /proc/self/smaps, but doesn't update /proc/meminfo's Mlocked field. > > - Before this /proc/meminfo fields didn't change as pages were faulted in: > diff --git a/mm/memory.c b/mm/memory.c > index e0c232fe81d9..7e8dc3ed4e89 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -3311,6 +3311,9 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, > } else { > inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); > page_add_file_rmap(page, false); > + if ((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED && > + !PageTransCompound(page)) Do we need to check against VM_SPECIAL ? I think you're right. mlock/mlock2 already checks and doesn't set VM_LOCKED if VM_SPECIAL is set: https://github.com/torvalds/linux/blob/v5.2/mm/mlock.c#L519-L533 /* * mlock_fixup - handle mlock[all]/munlock[all] requests. * * Filters out "special" vmas -- VM_LOCKED never gets set for these, and * munlock is a no-op. However, for some special vmas, we go ahead and * populate the ptes. * * For vmas that pass the filters, merge/split as appropriate. */ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, vm_flags_t newflags) { struct mm_struct *mm = vma->vm_mm; pgoff_t pgoff; int nr_pages; int ret = 0; int lock = !!(newflags & VM_LOCKED); vm_flags_t old_flags = vma->vm_flags; if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm) || vma_is_dax(vma)) /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ goto out; I got thrown off by this check https://github.com/torvalds/linux/blob/v5.2/mm/swap.c#L454-L469 void lru_cache_add_active_or_unevictable(struct page *page, struct vm_area_struct *vma) { VM_BUG_ON_PAGE(PageLRU(page), page); if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) SetPageActive(page); else if (!TestSetPageMlocked(page)) { /* * We use the irq-unsafe __mod_zone_page_stat because this * counter is not modified from interrupt context, and the pte * lock is held(spinlock), which implies preemption disabled. */ __mod_zone_page_state(page_zone(page), NR_MLOCK, hpage_nr_pages(page)); count_vm_event(UNEVICTABLE_PGMLOCKED); I'll remove VM_SPECIAL and re-submit. -- Lucian
diff --git a/mm/memory.c b/mm/memory.c index e0c232fe81d9..7e8dc3ed4e89 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3311,6 +3311,9 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, } else { inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page)); page_add_file_rmap(page, false); + if ((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) == VM_LOCKED && + !PageTransCompound(page)) + mlock_vma_page(page); } set_pte_at(vma->vm_mm, vmf->address, vmf->pte, entry);
As pages are faulted in MLOCK_ONFAULT correctly updates /proc/self/smaps, but doesn't update /proc/meminfo's Mlocked field. - Before this /proc/meminfo fields didn't change as pages were faulted in: ``` = Start = /proc/meminfo Unevictable: 10128 kB Mlocked: 10132 kB = Creating testfile = = after mlock2(MLOCK_ONFAULT) = /proc/meminfo Unevictable: 10128 kB Mlocked: 10132 kB /proc/self/smaps 7f8714000000-7f8754000000 rw-s 00000000 08:04 50857050 /root/testfile Locked: 0 kB = after reading half of the file = /proc/meminfo Unevictable: 10128 kB Mlocked: 10132 kB /proc/self/smaps 7f8714000000-7f8754000000 rw-s 00000000 08:04 50857050 /root/testfile Locked: 524288 kB = after reading the entire the file = /proc/meminfo Unevictable: 10128 kB Mlocked: 10132 kB /proc/self/smaps 7f8714000000-7f8754000000 rw-s 00000000 08:04 50857050 /root/testfile Locked: 1048576 kB = after munmap = /proc/meminfo Unevictable: 10128 kB Mlocked: 10132 kB /proc/self/smaps ``` - After: /proc/meminfo fields are properly updated as pages are touched: ``` = Start = /proc/meminfo Unevictable: 60 kB Mlocked: 60 kB = Creating testfile = = after mlock2(MLOCK_ONFAULT) = /proc/meminfo Unevictable: 60 kB Mlocked: 60 kB /proc/self/smaps 7f2b9c600000-7f2bdc600000 rw-s 00000000 08:04 63045798 /root/testfile Locked: 0 kB = after reading half of the file = /proc/meminfo Unevictable: 524220 kB Mlocked: 524220 kB /proc/self/smaps 7f2b9c600000-7f2bdc600000 rw-s 00000000 08:04 63045798 /root/testfile Locked: 524288 kB = after reading the entire the file = /proc/meminfo Unevictable: 1048496 kB Mlocked: 1048508 kB /proc/self/smaps 7f2b9c600000-7f2bdc600000 rw-s 00000000 08:04 63045798 /root/testfile Locked: 1048576 kB = after munmap = /proc/meminfo Unevictable: 176 kB Mlocked: 60 kB /proc/self/smaps ``` Repro code. --- int mlock2wrap(const void* addr, size_t len, int flags) { return syscall(SYS_mlock2, addr, len, flags); } void smaps() { char smapscmd[1000]; snprintf( smapscmd, sizeof(smapscmd) - 1, "grep testfile -A 20 /proc/%d/smaps | grep -E '(testfile|Locked)'", getpid()); printf("/proc/self/smaps\n"); fflush(stdout); system(smapscmd); } void meminfo() { const char* meminfocmd = "grep -E '(Mlocked|Unevictable)' /proc/meminfo"; printf("/proc/meminfo\n"); fflush(stdout); system(meminfocmd); } { \ int rc = (call); \ if (rc != 0) { \ printf("error %d %s\n", rc, strerror(errno)); \ exit(1); \ } \ } int main(int argc, char* argv[]) { printf("= Start =\n"); meminfo(); printf("= Creating testfile =\n"); size_t size = 1 << 30; // 1 GiB int fd = open("testfile", O_CREAT | O_RDWR, 0666); { void* buf = malloc(size); write(fd, buf, size); free(buf); } int ret = 0; void* addr = NULL; addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (argc > 1) { PCHECK(mlock2wrap(addr, size, MLOCK_ONFAULT)); printf("= after mlock2(MLOCK_ONFAULT) =\n"); meminfo(); smaps(); for (size_t i = 0; i < size / 2; i += 4096) { ret += ((char*)addr)[i]; } printf("= after reading half of the file =\n"); meminfo(); smaps(); for (size_t i = 0; i < size; i += 4096) { ret += ((char*)addr)[i]; } printf("= after reading the entire the file =\n"); meminfo(); smaps(); } else { PCHECK(mlock(addr, size)); printf("= after mlock =\n"); meminfo(); smaps(); } PCHECK(munmap(addr, size)); printf("= after munmap =\n"); meminfo(); smaps(); return ret; } --- Signed-off-by: Lucian Adrian Grijincu <lucian@fb.com> --- mm/memory.c | 3 +++ 1 file changed, 3 insertions(+)