Message ID | 20240906230512.124643-1-shakeel.butt@linux.dev (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm: replace xa_get_order with xas_get_order where appropriate | expand |
* Shakeel Butt <shakeel.butt@linux.dev> [240906 19:05]: > The tracing of invalidation and truncation operations on large files > showed that xa_get_order() is among the top functions where kernel > spends a lot of CPUs. xa_get_order() needs to traverse the tree to reach > the right node for a given index and then extract the order of the > entry. However it seems like at many places it is being called within an > already happening tree traversal where there is no need to do another > traversal. Just use xas_get_order() at those places. > > Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev> This change alters areas that already do the rcu locking and the internal state of the xas isn't altered, so the external loops are not affected, afaict. Looks good to me. Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com> > --- > mm/filemap.c | 6 +++--- > mm/shmem.c | 2 +- > 2 files changed, 4 insertions(+), 4 deletions(-) > > diff --git a/mm/filemap.c b/mm/filemap.c > index 070dee9791a9..7e3412941a8d 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -2112,7 +2112,7 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start, > VM_BUG_ON_FOLIO(!folio_contains(folio, xas.xa_index), > folio); > } else { > - nr = 1 << xa_get_order(&mapping->i_pages, xas.xa_index); > + nr = 1 << xas_get_order(&xas); > base = xas.xa_index & ~(nr - 1); > /* Omit order>0 value which begins before the start */ > if (base < *start) > @@ -3001,7 +3001,7 @@ static inline loff_t folio_seek_hole_data(struct xa_state *xas, > static inline size_t seek_folio_size(struct xa_state *xas, struct folio *folio) > { > if (xa_is_value(folio)) > - return PAGE_SIZE << xa_get_order(xas->xa, xas->xa_index); > + return PAGE_SIZE << xas_get_order(xas); > return folio_size(folio); > } > > @@ -4297,7 +4297,7 @@ static void filemap_cachestat(struct address_space *mapping, > if (xas_retry(&xas, folio)) > continue; > > - order = xa_get_order(xas.xa, xas.xa_index); > + order = xas_get_order(&xas); > nr_pages = 1 << order; > folio_first_index = round_down(xas.xa_index, 1 << order); > folio_last_index = folio_first_index + nr_pages - 1; > diff --git a/mm/shmem.c b/mm/shmem.c > index 866d46d0c43d..4002c4f47d4d 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -893,7 +893,7 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping, > if (xas_retry(&xas, page)) > continue; > if (xa_is_value(page)) > - swapped += 1 << xa_get_order(xas.xa, xas.xa_index); > + swapped += 1 << xas_get_order(&xas); > if (xas.xa_index == max) > break; > if (need_resched()) { > -- > 2.43.5 > >
On 2024/9/7 07:05, Shakeel Butt wrote: > The tracing of invalidation and truncation operations on large files > showed that xa_get_order() is among the top functions where kernel > spends a lot of CPUs. xa_get_order() needs to traverse the tree to reach > the right node for a given index and then extract the order of the > entry. However it seems like at many places it is being called within an > already happening tree traversal where there is no need to do another > traversal. Just use xas_get_order() at those places. > > Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev> LGTM. Thanks. Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com> > --- > mm/filemap.c | 6 +++--- > mm/shmem.c | 2 +- > 2 files changed, 4 insertions(+), 4 deletions(-) > > diff --git a/mm/filemap.c b/mm/filemap.c > index 070dee9791a9..7e3412941a8d 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -2112,7 +2112,7 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start, > VM_BUG_ON_FOLIO(!folio_contains(folio, xas.xa_index), > folio); > } else { > - nr = 1 << xa_get_order(&mapping->i_pages, xas.xa_index); > + nr = 1 << xas_get_order(&xas); > base = xas.xa_index & ~(nr - 1); > /* Omit order>0 value which begins before the start */ > if (base < *start) > @@ -3001,7 +3001,7 @@ static inline loff_t folio_seek_hole_data(struct xa_state *xas, > static inline size_t seek_folio_size(struct xa_state *xas, struct folio *folio) > { > if (xa_is_value(folio)) > - return PAGE_SIZE << xa_get_order(xas->xa, xas->xa_index); > + return PAGE_SIZE << xas_get_order(xas); > return folio_size(folio); > } > > @@ -4297,7 +4297,7 @@ static void filemap_cachestat(struct address_space *mapping, > if (xas_retry(&xas, folio)) > continue; > > - order = xa_get_order(xas.xa, xas.xa_index); > + order = xas_get_order(&xas); > nr_pages = 1 << order; > folio_first_index = round_down(xas.xa_index, 1 << order); > folio_last_index = folio_first_index + nr_pages - 1; > diff --git a/mm/shmem.c b/mm/shmem.c > index 866d46d0c43d..4002c4f47d4d 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -893,7 +893,7 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping, > if (xas_retry(&xas, page)) > continue; > if (xa_is_value(page)) > - swapped += 1 << xa_get_order(xas.xa, xas.xa_index); > + swapped += 1 << xas_get_order(&xas); > if (xas.xa_index == max) > break; > if (need_resched()) {
On Fri, Sep 6, 2024 at 4:05 PM Shakeel Butt <shakeel.butt@linux.dev> wrote: > > The tracing of invalidation and truncation operations on large files > showed that xa_get_order() is among the top functions where kernel > spends a lot of CPUs. xa_get_order() needs to traverse the tree to reach > the right node for a given index and then extract the order of the > entry. However it seems like at many places it is being called within an > already happening tree traversal where there is no need to do another > traversal. Just use xas_get_order() at those places. > > Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev> > --- > mm/filemap.c | 6 +++--- > mm/shmem.c | 2 +- > 2 files changed, 4 insertions(+), 4 deletions(-) > > diff --git a/mm/filemap.c b/mm/filemap.c > index 070dee9791a9..7e3412941a8d 100644 > --- a/mm/filemap.c > +++ b/mm/filemap.c > @@ -2112,7 +2112,7 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start, > VM_BUG_ON_FOLIO(!folio_contains(folio, xas.xa_index), > folio); > } else { > - nr = 1 << xa_get_order(&mapping->i_pages, xas.xa_index); > + nr = 1 << xas_get_order(&xas); > base = xas.xa_index & ~(nr - 1); > /* Omit order>0 value which begins before the start */ > if (base < *start) > @@ -3001,7 +3001,7 @@ static inline loff_t folio_seek_hole_data(struct xa_state *xas, > static inline size_t seek_folio_size(struct xa_state *xas, struct folio *folio) > { > if (xa_is_value(folio)) > - return PAGE_SIZE << xa_get_order(xas->xa, xas->xa_index); > + return PAGE_SIZE << xas_get_order(xas); > return folio_size(folio); > } > > @@ -4297,7 +4297,7 @@ static void filemap_cachestat(struct address_space *mapping, > if (xas_retry(&xas, folio)) > continue; > > - order = xa_get_order(xas.xa, xas.xa_index); > + order = xas_get_order(&xas); Yikesy that's my bad. This is late, but FWIW: Reviewed-by: Nhat Pham <nphamcs@gmail.com>
diff --git a/mm/filemap.c b/mm/filemap.c index 070dee9791a9..7e3412941a8d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2112,7 +2112,7 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start, VM_BUG_ON_FOLIO(!folio_contains(folio, xas.xa_index), folio); } else { - nr = 1 << xa_get_order(&mapping->i_pages, xas.xa_index); + nr = 1 << xas_get_order(&xas); base = xas.xa_index & ~(nr - 1); /* Omit order>0 value which begins before the start */ if (base < *start) @@ -3001,7 +3001,7 @@ static inline loff_t folio_seek_hole_data(struct xa_state *xas, static inline size_t seek_folio_size(struct xa_state *xas, struct folio *folio) { if (xa_is_value(folio)) - return PAGE_SIZE << xa_get_order(xas->xa, xas->xa_index); + return PAGE_SIZE << xas_get_order(xas); return folio_size(folio); } @@ -4297,7 +4297,7 @@ static void filemap_cachestat(struct address_space *mapping, if (xas_retry(&xas, folio)) continue; - order = xa_get_order(xas.xa, xas.xa_index); + order = xas_get_order(&xas); nr_pages = 1 << order; folio_first_index = round_down(xas.xa_index, 1 << order); folio_last_index = folio_first_index + nr_pages - 1; diff --git a/mm/shmem.c b/mm/shmem.c index 866d46d0c43d..4002c4f47d4d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -893,7 +893,7 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping, if (xas_retry(&xas, page)) continue; if (xa_is_value(page)) - swapped += 1 << xa_get_order(xas.xa, xas.xa_index); + swapped += 1 << xas_get_order(&xas); if (xas.xa_index == max) break; if (need_resched()) {
The tracing of invalidation and truncation operations on large files showed that xa_get_order() is among the top functions where kernel spends a lot of CPUs. xa_get_order() needs to traverse the tree to reach the right node for a given index and then extract the order of the entry. However it seems like at many places it is being called within an already happening tree traversal where there is no need to do another traversal. Just use xas_get_order() at those places. Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev> --- mm/filemap.c | 6 +++--- mm/shmem.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-)