Message ID | 20250410000022.1901-5-sj@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm/madvise: batch tlb flushes for MADV_DONTNEED and MADV_FREE | expand |
On Wed, Apr 09, 2025 at 05:00:22PM -0700, SeongJae Park wrote: > MADV_DONTNEED[_LOCKED] handling for [process_]madvise() flushes tlb for > each vma of each address range. Update the logic to do tlb flushes in a > batched way. Initialize an mmu_gather object from do_madvise() and > vector_madvise(), which are the entry level functions for > [process_]madvise(), respectively. And pass those objects to the > function for per-vma work, via madvise_behavior struct. Make the > per-vma logic not flushes tlb on their own but just saves the tlb > entries to the received mmu_gather object. For this internal logic > change, make zap_page_range_single_batched() non-static and use it > directly from madvise_dontneed_single_vma(). Finally, the entry level > functions flush the tlb entries that gathered for the entire user > request, at once. > > Signed-off-by: SeongJae Park <sj@kernel.org> Thanks, as usual always a pleasure to review your series :) Cheers for these changes! Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > --- > mm/internal.h | 3 +++ > mm/madvise.c | 11 ++++++++--- > mm/memory.c | 4 ++-- > 3 files changed, 13 insertions(+), 5 deletions(-) > > diff --git a/mm/internal.h b/mm/internal.h > index ef92e88738fe..c5f9dd007215 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -435,6 +435,9 @@ void unmap_page_range(struct mmu_gather *tlb, > struct vm_area_struct *vma, > unsigned long addr, unsigned long end, > struct zap_details *details); > +void zap_page_range_single_batched(struct mmu_gather *tlb, > + struct vm_area_struct *vma, unsigned long addr, > + unsigned long size, struct zap_details *details); > int folio_unmap_invalidate(struct address_space *mapping, struct folio *folio, > gfp_t gfp); > > diff --git a/mm/madvise.c b/mm/madvise.c > index 951038a9f36f..8433ac9b27e0 100644 > --- a/mm/madvise.c > +++ b/mm/madvise.c > @@ -851,7 +851,8 @@ static int madvise_free_single_vma(struct madvise_behavior *madv_behavior, > * An interface that causes the system to free clean pages and flush > * dirty pages is already available as msync(MS_INVALIDATE). > */ > -static long madvise_dontneed_single_vma(struct vm_area_struct *vma, > +static long madvise_dontneed_single_vma(struct madvise_behavior *madv_behavior, > + struct vm_area_struct *vma, > unsigned long start, unsigned long end) > { > struct zap_details details = { > @@ -859,7 +860,8 @@ static long madvise_dontneed_single_vma(struct vm_area_struct *vma, > .even_cows = true, > }; > > - zap_page_range_single(vma, start, end - start, &details); > + zap_page_range_single_batched( > + madv_behavior->tlb, vma, start, end - start, &details); > return 0; > } > > @@ -950,7 +952,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma, > } > > if (behavior == MADV_DONTNEED || behavior == MADV_DONTNEED_LOCKED) > - return madvise_dontneed_single_vma(vma, start, end); > + return madvise_dontneed_single_vma( > + madv_behavior, vma, start, end); > else if (behavior == MADV_FREE) > return madvise_free_single_vma(madv_behavior, vma, start, end); > else > @@ -1628,6 +1631,8 @@ static void madvise_unlock(struct mm_struct *mm, int behavior) > static bool madvise_batch_tlb_flush(int behavior) > { > switch (behavior) { > + case MADV_DONTNEED: > + case MADV_DONTNEED_LOCKED: > case MADV_FREE: > return true; > default: > diff --git a/mm/memory.c b/mm/memory.c > index 690695643dfb..559f3e194438 100644 > --- a/mm/memory.c > +++ b/mm/memory.c > @@ -1998,7 +1998,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, > mmu_notifier_invalidate_range_end(&range); > } > > -/* > +/** > * zap_page_range_single_batched - remove user pages in a given range > * @tlb: pointer to the caller's struct mmu_gather > * @vma: vm_area_struct holding the applicable pages > @@ -2009,7 +2009,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, > * @tlb shouldn't be NULL. The range must fit into one VMA. If @vma is for > * hugetlb, @tlb is flushed and re-initialized by this function. > */ > -static void zap_page_range_single_batched(struct mmu_gather *tlb, > +void zap_page_range_single_batched(struct mmu_gather *tlb, > struct vm_area_struct *vma, unsigned long address, > unsigned long size, struct zap_details *details) > { > -- > 2.39.5
diff --git a/mm/internal.h b/mm/internal.h index ef92e88738fe..c5f9dd007215 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -435,6 +435,9 @@ void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long addr, unsigned long end, struct zap_details *details); +void zap_page_range_single_batched(struct mmu_gather *tlb, + struct vm_area_struct *vma, unsigned long addr, + unsigned long size, struct zap_details *details); int folio_unmap_invalidate(struct address_space *mapping, struct folio *folio, gfp_t gfp); diff --git a/mm/madvise.c b/mm/madvise.c index 951038a9f36f..8433ac9b27e0 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -851,7 +851,8 @@ static int madvise_free_single_vma(struct madvise_behavior *madv_behavior, * An interface that causes the system to free clean pages and flush * dirty pages is already available as msync(MS_INVALIDATE). */ -static long madvise_dontneed_single_vma(struct vm_area_struct *vma, +static long madvise_dontneed_single_vma(struct madvise_behavior *madv_behavior, + struct vm_area_struct *vma, unsigned long start, unsigned long end) { struct zap_details details = { @@ -859,7 +860,8 @@ static long madvise_dontneed_single_vma(struct vm_area_struct *vma, .even_cows = true, }; - zap_page_range_single(vma, start, end - start, &details); + zap_page_range_single_batched( + madv_behavior->tlb, vma, start, end - start, &details); return 0; } @@ -950,7 +952,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma, } if (behavior == MADV_DONTNEED || behavior == MADV_DONTNEED_LOCKED) - return madvise_dontneed_single_vma(vma, start, end); + return madvise_dontneed_single_vma( + madv_behavior, vma, start, end); else if (behavior == MADV_FREE) return madvise_free_single_vma(madv_behavior, vma, start, end); else @@ -1628,6 +1631,8 @@ static void madvise_unlock(struct mm_struct *mm, int behavior) static bool madvise_batch_tlb_flush(int behavior) { switch (behavior) { + case MADV_DONTNEED: + case MADV_DONTNEED_LOCKED: case MADV_FREE: return true; default: diff --git a/mm/memory.c b/mm/memory.c index 690695643dfb..559f3e194438 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1998,7 +1998,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, mmu_notifier_invalidate_range_end(&range); } -/* +/** * zap_page_range_single_batched - remove user pages in a given range * @tlb: pointer to the caller's struct mmu_gather * @vma: vm_area_struct holding the applicable pages @@ -2009,7 +2009,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, * @tlb shouldn't be NULL. The range must fit into one VMA. If @vma is for * hugetlb, @tlb is flushed and re-initialized by this function. */ -static void zap_page_range_single_batched(struct mmu_gather *tlb, +void zap_page_range_single_batched(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *details) {
MADV_DONTNEED[_LOCKED] handling for [process_]madvise() flushes tlb for each vma of each address range. Update the logic to do tlb flushes in a batched way. Initialize an mmu_gather object from do_madvise() and vector_madvise(), which are the entry level functions for [process_]madvise(), respectively. And pass those objects to the function for per-vma work, via madvise_behavior struct. Make the per-vma logic not flushes tlb on their own but just saves the tlb entries to the received mmu_gather object. For this internal logic change, make zap_page_range_single_batched() non-static and use it directly from madvise_dontneed_single_vma(). Finally, the entry level functions flush the tlb entries that gathered for the entire user request, at once. Signed-off-by: SeongJae Park <sj@kernel.org> --- mm/internal.h | 3 +++ mm/madvise.c | 11 ++++++++--- mm/memory.c | 4 ++-- 3 files changed, 13 insertions(+), 5 deletions(-)