@@ -406,13 +406,96 @@ static int try_context_readahead(struct address_space *mapping,
return 1;
}
+static inline int ra_alloc_page(struct address_space *mapping, pgoff_t offset,
+ pgoff_t mark, unsigned int order, gfp_t gfp)
+{
+ int err;
+ struct page *page = __page_cache_alloc_order(gfp, order);
+
+ if (!page)
+ return -ENOMEM;
+ if (mark - offset < (1UL << order))
+ SetPageReadahead(page);
+ err = add_to_page_cache_lru(page, mapping, offset, gfp);
+ if (err)
+ put_page(page);
+ return err;
+}
+
+#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT)
+
+static unsigned long page_cache_readahead_order(struct address_space *mapping,
+ struct file_ra_state *ra, struct file *file, unsigned int order)
+{
+ struct readahead_control rac = {
+ .mapping = mapping,
+ .file = file,
+ .start = ra->start,
+ .nr_pages = 0,
+ };
+ unsigned int old_order = order;
+ pgoff_t offset = ra->start;
+ pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT;
+ pgoff_t mark = offset + ra->size - ra->async_size;
+ int err = 0;
+ gfp_t gfp = readahead_gfp_mask(mapping);
+
+ limit = min(limit, offset + ra->size - 1);
+
+ /* Grow page size up to PMD size */
+ if (order < PMD_ORDER) {
+ order += 2;
+ if (order > PMD_ORDER)
+ order = PMD_ORDER;
+ while ((1 << order) > ra->size)
+ order--;
+ }
+
+ /* If size is somehow misaligned, fill with order-0 pages */
+ while (!err && offset & ((1UL << old_order) - 1)) {
+ err = ra_alloc_page(mapping, offset++, mark, 0, gfp);
+ if (!err)
+ rac.nr_pages++;
+ }
+
+ while (!err && offset & ((1UL << order) - 1)) {
+ err = ra_alloc_page(mapping, offset, mark, old_order, gfp);
+ if (!err)
+ rac.nr_pages += 1UL << old_order;
+ offset += 1UL << old_order;
+ }
+
+ while (!err && offset <= limit) {
+ err = ra_alloc_page(mapping, offset, mark, order, gfp);
+ if (!err)
+ rac.nr_pages += 1UL << order;
+ offset += 1UL << order;
+ }
+
+ if (offset > limit) {
+ ra->size += offset - limit - 1;
+ ra->async_size += offset - limit - 1;
+ }
+
+ read_pages(&rac, NULL);
+
+ /*
+ * If there were already pages in the page cache, then we may have
+ * left some gaps. Let the regular readahead code take care of this
+ * situation.
+ */
+ if (err)
+ return ra_submit(ra, mapping, file);
+ return 0;
+}
+
/*
* A minimal readahead algorithm for trivial sequential/random reads.
*/
static unsigned long
ondemand_readahead(struct address_space *mapping,
struct file_ra_state *ra, struct file *filp,
- bool hit_readahead_marker, pgoff_t offset,
+ struct page *page, pgoff_t offset,
unsigned long req_size)
{
struct backing_dev_info *bdi = inode_to_bdi(mapping->host);
@@ -451,7 +534,7 @@ ondemand_readahead(struct address_space *mapping,
* Query the pagecache for async_size, which normally equals to
* readahead size. Ramp it up and use it as the new readahead size.
*/
- if (hit_readahead_marker) {
+ if (page) {
pgoff_t start;
rcu_read_lock();
@@ -520,7 +603,12 @@ ondemand_readahead(struct address_space *mapping,
}
}
- return ra_submit(ra, mapping, filp);
+ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) || !page ||
+ !(mapping->host->i_sb->s_type->fs_flags & FS_LARGE_PAGES))
+ return ra_submit(ra, mapping, filp);
+
+ return page_cache_readahead_order(mapping, ra, filp,
+ compound_order(page));
}
/**
@@ -555,7 +643,7 @@ void page_cache_sync_readahead(struct address_space *mapping,
}
/* do read-ahead */
- ondemand_readahead(mapping, ra, filp, false, offset, req_size);
+ ondemand_readahead(mapping, ra, filp, NULL, offset, req_size);
}
EXPORT_SYMBOL_GPL(page_cache_sync_readahead);
@@ -602,7 +690,7 @@ page_cache_async_readahead(struct address_space *mapping,
return;
/* do read-ahead */
- ondemand_readahead(mapping, ra, filp, true, offset, req_size);
+ ondemand_readahead(mapping, ra, filp, page, offset, req_size);
}
EXPORT_SYMBOL_GPL(page_cache_async_readahead);