Message ID | 20250310172318.653630-6-sj@kernel.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm/madvise: batch tlb flushes for MADV_DONTNEED and MADV_FREE | expand |
On Mon, Mar 10, 2025 at 10:23:14AM -0700, SeongJae Park wrote: > To implement batched tlb flushes for MADV_DONTNEED[_LOCKED] and > MADV_FREE, an mmu_gather object in addition to the behavior integer need > to be passed to the internal logics. Using a struct can make it easy > without increasing the number of parameters of all code paths towards > the internal logic. Define a struct for the purpose and use it on the > code path that starts from madvise_do_behavior() and ends on > madvise_dontneed_free(). Oh a helper struct! I like these! Nitty but... I wonder if we should just add the the mmu_gather field immediately even if it isn't used yet? Also I feel like this patch and 6 should be swapped around, as you are laying the groundwork here for patch 7 but then doing something unrelated in 6, unless I'm missing something. Also maybe add a bit in commit msg about changing the madvise_walk_vmas() visitor type signature (I wonder if that'd be better as a typedef tbh?) However, this change looks fine aside from nits (and you know, helper struct and I'm sold obviously ;) so: Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > > Signed-off-by: SeongJae Park <sj@kernel.org> > --- > mm/madvise.c | 36 ++++++++++++++++++++++++------------ > 1 file changed, 24 insertions(+), 12 deletions(-) > > diff --git a/mm/madvise.c b/mm/madvise.c > index 469c25690a0e..ba2a78795207 100644 > --- a/mm/madvise.c > +++ b/mm/madvise.c > @@ -890,11 +890,16 @@ static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma, > return true; > } > > +struct madvise_behavior { > + int behavior; > +}; > + > static long madvise_dontneed_free(struct vm_area_struct *vma, > struct vm_area_struct **prev, > unsigned long start, unsigned long end, > - int behavior) > + struct madvise_behavior *madv_behavior) Nitty, but not sure about the need for 'madv_' here. I think keeping this as 'behavior' is fine, as the type is very clear. > { > + int behavior = madv_behavior->behavior; > struct mm_struct *mm = vma->vm_mm; > > *prev = vma; > @@ -1249,8 +1254,10 @@ static long madvise_guard_remove(struct vm_area_struct *vma, > static int madvise_vma_behavior(struct vm_area_struct *vma, > struct vm_area_struct **prev, > unsigned long start, unsigned long end, > - unsigned long behavior) > + void *behavior_arg) > { > + struct madvise_behavior *arg = behavior_arg; > + int behavior = arg->behavior; > int error; > struct anon_vma_name *anon_name; > unsigned long new_flags = vma->vm_flags; > @@ -1270,7 +1277,7 @@ static int madvise_vma_behavior(struct vm_area_struct *vma, > case MADV_FREE: > case MADV_DONTNEED: > case MADV_DONTNEED_LOCKED: > - return madvise_dontneed_free(vma, prev, start, end, behavior); > + return madvise_dontneed_free(vma, prev, start, end, arg); > case MADV_NORMAL: > new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ; > break; > @@ -1487,10 +1494,10 @@ static bool process_madvise_remote_valid(int behavior) > */ > static > int madvise_walk_vmas(struct mm_struct *mm, unsigned long start, > - unsigned long end, unsigned long arg, > + unsigned long end, void *arg, > int (*visit)(struct vm_area_struct *vma, > struct vm_area_struct **prev, unsigned long start, > - unsigned long end, unsigned long arg)) > + unsigned long end, void *arg)) > { > struct vm_area_struct *vma; > struct vm_area_struct *prev; > @@ -1548,7 +1555,7 @@ int madvise_walk_vmas(struct mm_struct *mm, unsigned long start, > static int madvise_vma_anon_name(struct vm_area_struct *vma, > struct vm_area_struct **prev, > unsigned long start, unsigned long end, > - unsigned long anon_name) > + void *anon_name) > { > int error; > > @@ -1557,7 +1564,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma, > return -EBADF; > > error = madvise_update_vma(vma, prev, start, end, vma->vm_flags, > - (struct anon_vma_name *)anon_name); > + anon_name); > > /* > * madvise() returns EAGAIN if kernel resources, such as > @@ -1589,7 +1596,7 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, > if (end == start) > return 0; > > - return madvise_walk_vmas(mm, start, end, (unsigned long)anon_name, > + return madvise_walk_vmas(mm, start, end, anon_name, > madvise_vma_anon_name); > } > #endif /* CONFIG_ANON_VMA_NAME */ > @@ -1673,8 +1680,10 @@ static bool is_madvise_populate(int behavior) > } > > static int madvise_do_behavior(struct mm_struct *mm, > - unsigned long start, size_t len_in, int behavior) > + unsigned long start, size_t len_in, > + struct madvise_behavior *madv_behavior) > { > + int behavior = madv_behavior->behavior; > struct blk_plug plug; > unsigned long end; > int error; > @@ -1688,7 +1697,7 @@ static int madvise_do_behavior(struct mm_struct *mm, > if (is_madvise_populate(behavior)) > error = madvise_populate(mm, start, end, behavior); > else > - error = madvise_walk_vmas(mm, start, end, behavior, > + error = madvise_walk_vmas(mm, start, end, madv_behavior, > madvise_vma_behavior); > blk_finish_plug(&plug); > return error; > @@ -1769,13 +1778,14 @@ static int madvise_do_behavior(struct mm_struct *mm, > int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior) > { > int error; > + struct madvise_behavior madv_behavior = {.behavior = behavior}; > > if (madvise_should_skip(start, len_in, behavior, &error)) > return error; > error = madvise_lock(mm, behavior); > if (error) > return error; > - error = madvise_do_behavior(mm, start, len_in, behavior); > + error = madvise_do_behavior(mm, start, len_in, &madv_behavior); > madvise_unlock(mm, behavior); > > return error; > @@ -1792,6 +1802,7 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter, > { > ssize_t ret = 0; > size_t total_len; > + struct madvise_behavior madv_behavior = {.behavior = behavior}; > > total_len = iov_iter_count(iter); > > @@ -1807,7 +1818,8 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter, > if (madvise_should_skip(start, len_in, behavior, &error)) > ret = error; > else > - ret = madvise_do_behavior(mm, start, len_in, behavior); > + ret = madvise_do_behavior(mm, start, len_in, > + &madv_behavior); > /* > * An madvise operation is attempting to restart the syscall, > * but we cannot proceed as it would not be correct to repeat > -- > 2.39.5
On Tue, 11 Mar 2025 12:17:40 +0000 Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote: > On Mon, Mar 10, 2025 at 10:23:14AM -0700, SeongJae Park wrote: > > To implement batched tlb flushes for MADV_DONTNEED[_LOCKED] and > > MADV_FREE, an mmu_gather object in addition to the behavior integer need > > to be passed to the internal logics. Using a struct can make it easy > > without increasing the number of parameters of all code paths towards > > the internal logic. Define a struct for the purpose and use it on the > > code path that starts from madvise_do_behavior() and ends on > > madvise_dontneed_free(). > > Oh a helper struct! I like these! > > Nitty but... > > I wonder if we should just add the the mmu_gather field immediately even if > it isn't used yet? I will do so in the next spin. > > Also I feel like this patch and 6 should be swapped around, as you are > laying the groundwork here for patch 7 but then doing something unrelated > in 6, unless I'm missing something. I actually introduced patch 6 before this one initially. But I started thinking this patch could help not only tlb flushes batching but potential future madvise behavior extensions, and ended up chaging the order in current way. I have no strong opinion and your suggestion also sounds nice to me. I will swap those in the next version unless someone makes voice. > > Also maybe add a bit in commit msg about changing the madvise_walk_vmas() > visitor type signature I will do so, in the next version. > (I wonder if that'd be better as a typedef tbh?) Something like below? typedef void *madvise_walk_arg; I think that could make the code easier to read. But I feel the void pointer is also not very bad for the current simple static functions use case, so I'd like keep this as is if you don't mind. Please let me know if I'm missing your point. > > However, this change looks fine aside from nits (and you know, helper > struct and I'm sold obviously ;) so: > > Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Thank you! :) > > > > > Signed-off-by: SeongJae Park <sj@kernel.org> > > --- > > mm/madvise.c | 36 ++++++++++++++++++++++++------------ > > 1 file changed, 24 insertions(+), 12 deletions(-) > > > > diff --git a/mm/madvise.c b/mm/madvise.c > > index 469c25690a0e..ba2a78795207 100644 > > --- a/mm/madvise.c > > +++ b/mm/madvise.c > > @@ -890,11 +890,16 @@ static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma, > > return true; > > } > > > > +struct madvise_behavior { > > + int behavior; > > +}; > > + > > static long madvise_dontneed_free(struct vm_area_struct *vma, > > struct vm_area_struct **prev, > > unsigned long start, unsigned long end, > > - int behavior) > > + struct madvise_behavior *madv_behavior) > > Nitty, but not sure about the need for 'madv_' here. I think keeping this as > 'behavior' is fine, as the type is very clear. Agreed. I wanted to reduce the name conflict causing diff lines, but I think your suggestion is the right thing to do for long term. Thanks, SJ [...]
On Tue, Mar 11, 2025 at 01:56:17PM -0700, SeongJae Park wrote: > On Tue, 11 Mar 2025 12:17:40 +0000 Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote: > > > On Mon, Mar 10, 2025 at 10:23:14AM -0700, SeongJae Park wrote: > > > To implement batched tlb flushes for MADV_DONTNEED[_LOCKED] and > > > MADV_FREE, an mmu_gather object in addition to the behavior integer need > > > to be passed to the internal logics. Using a struct can make it easy > > > without increasing the number of parameters of all code paths towards > > > the internal logic. Define a struct for the purpose and use it on the > > > code path that starts from madvise_do_behavior() and ends on > > > madvise_dontneed_free(). > > > > Oh a helper struct! I like these! > > > > Nitty but... > > > > I wonder if we should just add the the mmu_gather field immediately even if > > it isn't used yet? > > I will do so in the next spin. > > > > > Also I feel like this patch and 6 should be swapped around, as you are > > laying the groundwork here for patch 7 but then doing something unrelated > > in 6, unless I'm missing something. > > I actually introduced patch 6 before this one initially. But I started > thinking this patch could help not only tlb flushes batching but potential > future madvise behavior extensions, and ended up chaging the order in current > way. I have no strong opinion and your suggestion also sounds nice to me. I > will swap those in the next version unless someone makes voice. > > > > > Also maybe add a bit in commit msg about changing the madvise_walk_vmas() > > visitor type signature > > I will do so, in the next version. > > > (I wonder if that'd be better as a typedef tbh?) > > Something like below? > > typedef void *madvise_walk_arg; > > I think that could make the code easier to read. But I feel the void pointer > is also not very bad for the current simple static functions use case, so I'd > like keep this as is if you don't mind. > > Please let me know if I'm missing your point. No to be clear I meant the: int (*visit)(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, unsigned long arg) Function pointer. But this is not a big deal and let's leave it as-is for now, we can address this later potentially! :) > > > > > However, this change looks fine aside from nits (and you know, helper > > struct and I'm sold obviously ;) so: > > > > Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > > Thank you! :) > > > > > > > > > Signed-off-by: SeongJae Park <sj@kernel.org> > > > --- > > > mm/madvise.c | 36 ++++++++++++++++++++++++------------ > > > 1 file changed, 24 insertions(+), 12 deletions(-) > > > > > > diff --git a/mm/madvise.c b/mm/madvise.c > > > index 469c25690a0e..ba2a78795207 100644 > > > --- a/mm/madvise.c > > > +++ b/mm/madvise.c > > > @@ -890,11 +890,16 @@ static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma, > > > return true; > > > } > > > > > > +struct madvise_behavior { > > > + int behavior; > > > +}; > > > + > > > static long madvise_dontneed_free(struct vm_area_struct *vma, > > > struct vm_area_struct **prev, > > > unsigned long start, unsigned long end, > > > - int behavior) > > > + struct madvise_behavior *madv_behavior) > > > > Nitty, but not sure about the need for 'madv_' here. I think keeping this as > > 'behavior' is fine, as the type is very clear. > > Agreed. I wanted to reduce the name conflict causing diff lines, but I think > your suggestion is the right thing to do for long term. > > > Thanks, > SJ > > [...] Thanks for being so flexible on the feedback! Appreciated :>)
diff --git a/mm/madvise.c b/mm/madvise.c index 469c25690a0e..ba2a78795207 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -890,11 +890,16 @@ static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma, return true; } +struct madvise_behavior { + int behavior; +}; + static long madvise_dontneed_free(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, - int behavior) + struct madvise_behavior *madv_behavior) { + int behavior = madv_behavior->behavior; struct mm_struct *mm = vma->vm_mm; *prev = vma; @@ -1249,8 +1254,10 @@ static long madvise_guard_remove(struct vm_area_struct *vma, static int madvise_vma_behavior(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, - unsigned long behavior) + void *behavior_arg) { + struct madvise_behavior *arg = behavior_arg; + int behavior = arg->behavior; int error; struct anon_vma_name *anon_name; unsigned long new_flags = vma->vm_flags; @@ -1270,7 +1277,7 @@ static int madvise_vma_behavior(struct vm_area_struct *vma, case MADV_FREE: case MADV_DONTNEED: case MADV_DONTNEED_LOCKED: - return madvise_dontneed_free(vma, prev, start, end, behavior); + return madvise_dontneed_free(vma, prev, start, end, arg); case MADV_NORMAL: new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ; break; @@ -1487,10 +1494,10 @@ static bool process_madvise_remote_valid(int behavior) */ static int madvise_walk_vmas(struct mm_struct *mm, unsigned long start, - unsigned long end, unsigned long arg, + unsigned long end, void *arg, int (*visit)(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, - unsigned long end, unsigned long arg)) + unsigned long end, void *arg)) { struct vm_area_struct *vma; struct vm_area_struct *prev; @@ -1548,7 +1555,7 @@ int madvise_walk_vmas(struct mm_struct *mm, unsigned long start, static int madvise_vma_anon_name(struct vm_area_struct *vma, struct vm_area_struct **prev, unsigned long start, unsigned long end, - unsigned long anon_name) + void *anon_name) { int error; @@ -1557,7 +1564,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma, return -EBADF; error = madvise_update_vma(vma, prev, start, end, vma->vm_flags, - (struct anon_vma_name *)anon_name); + anon_name); /* * madvise() returns EAGAIN if kernel resources, such as @@ -1589,7 +1596,7 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, if (end == start) return 0; - return madvise_walk_vmas(mm, start, end, (unsigned long)anon_name, + return madvise_walk_vmas(mm, start, end, anon_name, madvise_vma_anon_name); } #endif /* CONFIG_ANON_VMA_NAME */ @@ -1673,8 +1680,10 @@ static bool is_madvise_populate(int behavior) } static int madvise_do_behavior(struct mm_struct *mm, - unsigned long start, size_t len_in, int behavior) + unsigned long start, size_t len_in, + struct madvise_behavior *madv_behavior) { + int behavior = madv_behavior->behavior; struct blk_plug plug; unsigned long end; int error; @@ -1688,7 +1697,7 @@ static int madvise_do_behavior(struct mm_struct *mm, if (is_madvise_populate(behavior)) error = madvise_populate(mm, start, end, behavior); else - error = madvise_walk_vmas(mm, start, end, behavior, + error = madvise_walk_vmas(mm, start, end, madv_behavior, madvise_vma_behavior); blk_finish_plug(&plug); return error; @@ -1769,13 +1778,14 @@ static int madvise_do_behavior(struct mm_struct *mm, int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior) { int error; + struct madvise_behavior madv_behavior = {.behavior = behavior}; if (madvise_should_skip(start, len_in, behavior, &error)) return error; error = madvise_lock(mm, behavior); if (error) return error; - error = madvise_do_behavior(mm, start, len_in, behavior); + error = madvise_do_behavior(mm, start, len_in, &madv_behavior); madvise_unlock(mm, behavior); return error; @@ -1792,6 +1802,7 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter, { ssize_t ret = 0; size_t total_len; + struct madvise_behavior madv_behavior = {.behavior = behavior}; total_len = iov_iter_count(iter); @@ -1807,7 +1818,8 @@ static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter, if (madvise_should_skip(start, len_in, behavior, &error)) ret = error; else - ret = madvise_do_behavior(mm, start, len_in, behavior); + ret = madvise_do_behavior(mm, start, len_in, + &madv_behavior); /* * An madvise operation is attempting to restart the syscall, * but we cannot proceed as it would not be correct to repeat
To implement batched tlb flushes for MADV_DONTNEED[_LOCKED] and MADV_FREE, an mmu_gather object in addition to the behavior integer need to be passed to the internal logics. Using a struct can make it easy without increasing the number of parameters of all code paths towards the internal logic. Define a struct for the purpose and use it on the code path that starts from madvise_do_behavior() and ends on madvise_dontneed_free(). Signed-off-by: SeongJae Park <sj@kernel.org> --- mm/madvise.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-)