@@ -661,21 +661,20 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
{
struct hstate *h = hstate_inode(inode);
struct address_space *mapping = &inode->i_data;
- const pgoff_t start = lstart >> huge_page_shift(h);
- const pgoff_t end = lend >> huge_page_shift(h);
+ const pgoff_t end = lend >> PAGE_SHIFT;
struct folio_batch fbatch;
pgoff_t next, index;
int i, freed = 0;
bool truncate_op = (lend == LLONG_MAX);
folio_batch_init(&fbatch);
- next = start;
+ next = lstart >> PAGE_SHIFT;
while (filemap_get_folios(mapping, &next, end - 1, &fbatch)) {
for (i = 0; i < folio_batch_count(&fbatch); ++i) {
struct folio *folio = fbatch.folios[i];
u32 hash = 0;
- index = folio->index;
+ index = folio->index >> huge_page_order(h);
hash = hugetlb_fault_mutex_hash(mapping, index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
@@ -693,7 +692,9 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
}
if (truncate_op)
- (void)hugetlb_unreserve_pages(inode, start, LONG_MAX, freed);
+ (void)hugetlb_unreserve_pages(inode,
+ lstart >> huge_page_shift(h),
+ LONG_MAX, freed);
}
static void hugetlbfs_evict_inode(struct inode *inode)
@@ -741,7 +742,7 @@ static void hugetlbfs_zero_partial_page(struct hstate *h,
pgoff_t idx = start >> huge_page_shift(h);
struct folio *folio;
- folio = filemap_lock_folio(mapping, idx);
+ folio = filemap_lock_hugetlb_folio(h, mapping, idx);
if (IS_ERR(folio))
return;
@@ -886,7 +887,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/* See if already present in mapping to avoid alloc/free */
- folio = filemap_get_folio(mapping, index);
+ folio = filemap_get_folio(mapping, index << huge_page_order(h));
if (!IS_ERR(folio)) {
folio_put(folio);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -811,6 +811,12 @@ static inline unsigned int blocks_per_huge_page(struct hstate *h)
return huge_page_size(h) / 512;
}
+static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h,
+ struct address_space *mapping, pgoff_t idx)
+{
+ return filemap_lock_folio(mapping, idx << huge_page_order(h));
+}
+
#include <asm/hugetlb.h>
#ifndef is_hugepage_only_range
@@ -1005,6 +1011,12 @@ static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio
return NULL;
}
+static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h,
+ struct address_space *mapping, pgoff_t idx)
+{
+ return NULL;
+}
+
static inline int isolate_or_dissolve_huge_page(struct page *page,
struct list_head *list)
{
@@ -721,9 +721,6 @@ static inline pgoff_t folio_next_index(struct folio *folio)
*/
static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
{
- /* HugeTLBfs indexes the page cache in units of hpage_size */
- if (folio_test_hugetlb(folio))
- return &folio->page;
return folio_page(folio, index & (folio_nr_pages(folio) - 1));
}
@@ -739,9 +736,6 @@ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
*/
static inline bool folio_contains(struct folio *folio, pgoff_t index)
{
- /* HugeTLBfs indexes the page cache in units of hpage_size */
- if (folio_test_hugetlb(folio))
- return folio->index == index;
return index - folio_index(folio) < folio_nr_pages(folio);
}
@@ -799,10 +793,9 @@ static inline struct folio *read_mapping_folio(struct address_space *mapping,
}
/*
- * Get index of the page within radix-tree (but not for hugetlb pages).
- * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
+ * Get the offset in PAGE_SIZE (even for hugetlb pages).
*/
-static inline pgoff_t page_to_index(struct page *page)
+static inline pgoff_t page_to_pgoff(struct page *page)
{
struct page *head;
@@ -817,19 +810,6 @@ static inline pgoff_t page_to_index(struct page *page)
return head->index + page - head;
}
-extern pgoff_t hugetlb_basepage_index(struct page *page);
-
-/*
- * Get the offset in PAGE_SIZE (even for hugetlb pages).
- * (TODO: hugetlb pages should have ->index in PAGE_SIZE)
- */
-static inline pgoff_t page_to_pgoff(struct page *page)
-{
- if (unlikely(PageHuge(page)))
- return hugetlb_basepage_index(page);
- return page_to_index(page);
-}
-
/*
* Return byte-offset into filesystem object for page.
*/
@@ -866,12 +846,9 @@ static inline loff_t folio_file_pos(struct folio *folio)
/*
* Get the offset in PAGE_SIZE (even for hugetlb folios).
- * (TODO: hugetlb folios should have ->index in PAGE_SIZE)
*/
static inline pgoff_t folio_pgoff(struct folio *folio)
{
- if (unlikely(folio_test_hugetlb(folio)))
- return hugetlb_basepage_index(&folio->page);
return folio->index;
}
@@ -882,8 +859,6 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
unsigned long address)
{
pgoff_t pgoff;
- if (unlikely(is_vm_hugetlb_page(vma)))
- return linear_hugepage_index(vma, address);
pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
pgoff += vma->vm_pgoff;
return pgoff;
@@ -134,11 +134,8 @@ static void page_cache_delete(struct address_space *mapping,
mapping_set_update(&xas, mapping);
- /* hugetlb pages are represented by a single entry in the xarray */
- if (!folio_test_hugetlb(folio)) {
- xas_set_order(&xas, folio->index, folio_order(folio));
- nr = folio_nr_pages(folio);
- }
+ xas_set_order(&xas, folio->index, folio_order(folio));
+ nr = folio_nr_pages(folio);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
@@ -237,7 +234,7 @@ void filemap_free_folio(struct address_space *mapping, struct folio *folio)
if (free_folio)
free_folio(folio);
- if (folio_test_large(folio) && !folio_test_hugetlb(folio))
+ if (folio_test_large(folio))
refs = folio_nr_pages(folio);
folio_put_refs(folio, refs);
}
@@ -858,14 +855,15 @@ noinline int __filemap_add_folio(struct address_space *mapping,
if (!huge) {
int error = mem_cgroup_charge(folio, NULL, gfp);
- VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
if (error)
return error;
charged = true;
- xas_set_order(&xas, index, folio_order(folio));
- nr = folio_nr_pages(folio);
}
+ VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
+ xas_set_order(&xas, index, folio_order(folio));
+ nr = folio_nr_pages(folio);
+
gfp &= GFP_RECLAIM_MASK;
folio_ref_add(folio, nr);
folio->mapping = mapping;
@@ -2038,7 +2036,7 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t *start,
int idx = folio_batch_count(fbatch) - 1;
folio = fbatch->folios[idx];
- if (!xa_is_value(folio) && !folio_test_hugetlb(folio))
+ if (!xa_is_value(folio))
nr = folio_nr_pages(folio);
*start = indices[idx] + nr;
}
@@ -2102,7 +2100,7 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
int idx = folio_batch_count(fbatch) - 1;
folio = fbatch->folios[idx];
- if (!xa_is_value(folio) && !folio_test_hugetlb(folio))
+ if (!xa_is_value(folio))
nr = folio_nr_pages(folio);
*start = indices[idx] + nr;
}
@@ -2143,9 +2141,6 @@ unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
continue;
if (!folio_batch_add(fbatch, folio)) {
unsigned long nr = folio_nr_pages(folio);
-
- if (folio_test_hugetlb(folio))
- nr = 1;
*start = folio->index + nr;
goto out;
}
@@ -2171,7 +2166,7 @@ EXPORT_SYMBOL(filemap_get_folios);
static inline
bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max)
{
- if (!folio_test_large(folio) || folio_test_hugetlb(folio))
+ if (!folio_test_large(folio))
return false;
if (index >= max)
return false;
@@ -2221,9 +2216,6 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
if (!folio_batch_add(fbatch, folio)) {
nr = folio_nr_pages(folio);
-
- if (folio_test_hugetlb(folio))
- nr = 1;
*start = folio->index + nr;
goto out;
}
@@ -2240,10 +2232,7 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
if (nr) {
folio = fbatch->folios[nr - 1];
- if (folio_test_hugetlb(folio))
- *start = folio->index + 1;
- else
- *start = folio_next_index(folio);
+ *start = folio->index + folio_nr_pages(folio);
}
out:
rcu_read_unlock();
@@ -2281,9 +2270,6 @@ unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
continue;
if (!folio_batch_add(fbatch, folio)) {
unsigned long nr = folio_nr_pages(folio);
-
- if (folio_test_hugetlb(folio))
- nr = 1;
*start = folio->index + nr;
goto out;
}
@@ -952,7 +952,7 @@ static long region_count(struct resv_map *resv, long f, long t)
/*
* Convert the address within this vma to the page offset within
- * the mapping, in pagecache page units; huge pages here.
+ * the mapping, huge page units here.
*/
static pgoff_t vma_hugecache_offset(struct hstate *h,
struct vm_area_struct *vma, unsigned long address)
@@ -5750,7 +5750,7 @@ static bool hugetlbfs_pagecache_present(struct hstate *h,
struct vm_area_struct *vma, unsigned long address)
{
struct address_space *mapping = vma->vm_file->f_mapping;
- pgoff_t idx = vma_hugecache_offset(h, vma, address);
+ pgoff_t idx = linear_page_index(vma, address);
struct folio *folio;
folio = filemap_get_folio(mapping, idx);
@@ -5767,6 +5767,7 @@ int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping
struct hstate *h = hstate_inode(inode);
int err;
+ idx <<= huge_page_order(h);
__folio_set_locked(folio);
err = __filemap_add_folio(mapping, folio, idx, GFP_KERNEL, NULL);
@@ -5874,7 +5875,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
* before we get page_table_lock.
*/
new_folio = false;
- folio = filemap_lock_folio(mapping, idx);
+ folio = filemap_lock_hugetlb_folio(h, mapping, idx);
if (IS_ERR(folio)) {
size = i_size_read(mapping->host) >> huge_page_shift(h);
if (idx >= size)
@@ -6183,7 +6184,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
/* Just decrements count, does not deallocate */
vma_end_reservation(h, vma, haddr);
- pagecache_folio = filemap_lock_folio(mapping, idx);
+ pagecache_folio = filemap_lock_hugetlb_folio(h, mapping, idx);
if (IS_ERR(pagecache_folio))
pagecache_folio = NULL;
}
@@ -6315,7 +6316,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
if (is_continue) {
ret = -EFAULT;
- folio = filemap_lock_folio(mapping, idx);
+ folio = filemap_lock_hugetlb_folio(h, mapping, idx);
if (IS_ERR(folio))
goto out;
folio_in_pagecache = true;
This patchset attempts to implement a listed filemap TODO which is changing hugetlb folios to have ->index in PAGE_SIZE. This simplifies many functions within filemap.c as they have to special case hugetlb pages. New wrappers for hugetlb code are used to interact with the page cache using a linear index. Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com> --- RFC v2[1] -> v1[2] -change direction of series to maintain both huge and base page size index rather than try to get rid of all references to a huge page sized index. v1 -> v2[3] - squash seperate filemap and hugetlb changes into one patch to allow for bisection per Matthew - get rid of page_to_index() - fix errors in hugetlb_fallocate() and remove_inode_hugepages() v2 -> v3 - gather performance data per Mike Kravetz - remove start variable in remove_inode_hugepages() per Mike Kravetz - remove hugetlb special case within folio_file_page() ========================= PERFORMANCE ====================================== Perf was used to check the performance differences after the patch. Overall the performance is similar to mainline with a very small larger overhead that occurs in __filemap_add_folio() and hugetlb_add_to_page_cache(). This is because of the larger overhead that occurs in xa_load() and xa_store() as the xarray is now using more entriese to store hugetlb folios in the page cache. aarch64: workload - fallocate a 700GB file backed by huge pages 6.5-rc3 + this patch: 2MB Page Size: --100.00%--__arm64_sys_fallocate ksys_fallocate vfs_fallocate hugetlbfs_fallocate | |--95.04%--__pi_clear_page | |--3.57%--clear_huge_page | | | |--2.63%--rcu_all_qs | | | --0.91%--__cond_resched | --0.67%--__cond_resched 0.17% 0.00% 0 fallocate [kernel.vmlinux] [k] hugetlb_add_to_page_cache 0.14% 0.10% 11 fallocate [kernel.vmlinux] [k] __filemap_add_folio 1GB Page Size: --100.00%--__arm64_sys_fallocate ksys_fallocate vfs_fallocate hugetlbfs_fallocate | |--94.79%--__pi_clear_page | |--4.34%--clear_huge_page | | | |--3.27%--rcu_all_qs | | | --1.05%--__cond_resched | --0.86%--__cond_resched 0.01% 0.01% 1 fallocate [kernel.vmlinux] [k] __filemap_add_folio 0.01% 0.00% 0 fallocate [kernel.vmlinux] [k] hugetlb_add_to_page_cache 6.5-rc3 2MB Page Size: --100.00%--__arm64_sys_fallocate ksys_fallocate vfs_fallocate hugetlbfs_fallocate | |--94.91%--__pi_clear_page | |--4.11%--clear_huge_page | | | |--3.00%--rcu_all_qs | | | --1.10%--__cond_resched | --0.59%--__cond_resched 0.08% 0.01% 1 fallocate [kernel.kallsyms] [k] hugetlb_add_to_page_cache 0.05% 0.03% 3 fallocate [kernel.kallsyms] [k] __filemap_add_folio 1GB Page Size: --100.00%--__arm64_sys_fallocate ksys_fallocate vfs_fallocate hugetlbfs_fallocate | |--94.01%--__pi_clear_page | |--5.11%--clear_huge_page | | | |--3.89%--rcu_all_qs | | | --1.20%--__cond_resched | --0.88%--__cond_resched x86 workload - fallocate a 100GB file backed by huge pages 6.5-rc3 + this patch: 2MB Page Size: hugetlbfs_fallocate | --99.57%--clear_huge_page | --98.47%--clear_page_erms | --0.53%--asm_sysvec_apic_timer_interrupt 0.04% 0.04% 1 fallocate [kernel.kallsyms] [k] xa_load 0.04% 0.00% 0 fallocate [kernel.kallsyms] [k] hugetlb_add_to_page_cache 0.04% 0.00% 0 fallocate [kernel.kallsyms] [k] __filemap_add_folio 0.04% 0.00% 0 fallocate [kernel.kallsyms] [k] xas_store 1GB Page Size: --99.95%--__x64_sys_fallocate vfs_fallocate hugetlbfs_fallocate clear_huge_page | --98.84%--clear_page_erms 6.5-rc3 2MB Page Size: --99.93%--__x64_sys_fallocate vfs_fallocate hugetlbfs_fallocate | --99.38%--clear_huge_page | |--98.40%--clear_page_erms | --0.59%--__cond_resched 0.03% 0.03% 1 fallocate [kernel.kallsyms] [k] __filemap_add_folio 1GB Page Size: --99.95%--__x64_sys_fallocate vfs_fallocate hugetlbfs_fallocate | --99.90%--clear_huge_page | --99.38%--clear_page_erms ========================= TESTING ====================================== This patch passes libhugetlbfs tests and LTP hugetlb tests ********** TEST SUMMARY * 2M * 32-bit 64-bit * Total testcases: 110 113 * Skipped: 0 0 * PASS: 107 113 * FAIL: 0 0 * Killed by signal: 3 0 * Bad configuration: 0 0 * Expected FAIL: 0 0 * Unexpected PASS: 0 0 * Test not present: 0 0 * Strange test result: 0 0 ********** ############################################################### Done executing testcases. LTP Version: 20220527-178-g2761a81c4 ############################################################### rebased on mm-unstable 08/02/23 [1]: https://lore.kernel.org/linux-mm/20230519220142.212051-1-sidhartha.kumar@oracle.com/T/ [2]: https://lore.kernel.org/lkml/20230609194947.37196-1-sidhartha.kumar@oracle.com/ [3]: https://lore.kernel.org/lkml/ZLtVlJA+V2+2yjxc@casper.infradead.org/T/ fs/hugetlbfs/inode.c | 15 ++++++++------- include/linux/hugetlb.h | 12 ++++++++++++ include/linux/pagemap.h | 29 ++--------------------------- mm/filemap.c | 36 +++++++++++------------------------- mm/hugetlb.c | 11 ++++++----- 5 files changed, 39 insertions(+), 64 deletions(-)