diff mbox series

[v2,15/25] netfs: Use new folio_queue data type and iterator instead of xarray iter

Message ID 20240814203850.2240469-16-dhowells@redhat.com (mailing list archive)
State New
Headers show
Series netfs: Read/write improvements | expand

Commit Message

David Howells Aug. 14, 2024, 8:38 p.m. UTC
Make the netfs write-side routines use the new folio_queue struct to hold a
rolling buffer of folios, with the issuer adding folios at the tail and the
collector removing them from the head as they're processed instead of using
an xarray.

This will allow a subsequent patch to simplify the write collector.

The primary mark (as tested by folioq_is_marked()) is used to note if the
corresponding folio needs putting.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
---
 fs/netfs/internal.h          |  9 +++-
 fs/netfs/misc.c              | 76 ++++++++++++++++++++++++++++++++
 fs/netfs/objects.c           |  1 +
 fs/netfs/stats.c             |  4 +-
 fs/netfs/write_collect.c     | 84 +++++++++++++++++++-----------------
 fs/netfs/write_issue.c       | 28 ++++++------
 include/linux/netfs.h        |  8 ++--
 include/trace/events/netfs.h |  1 +
 8 files changed, 150 insertions(+), 61 deletions(-)

Comments

Leon Romanovsky Sept. 24, 2024, 9:48 a.m. UTC | #1
On Wed, Aug 14, 2024 at 09:38:35PM +0100, David Howells wrote:
> Make the netfs write-side routines use the new folio_queue struct to hold a
> rolling buffer of folios, with the issuer adding folios at the tail and the
> collector removing them from the head as they're processed instead of using
> an xarray.
> 
> This will allow a subsequent patch to simplify the write collector.
> 
> The primary mark (as tested by folioq_is_marked()) is used to note if the
> corresponding folio needs putting.
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> cc: Jeff Layton <jlayton@kernel.org>
> cc: netfs@lists.linux.dev
> cc: linux-fsdevel@vger.kernel.org
> ---
>  fs/netfs/internal.h          |  9 +++-
>  fs/netfs/misc.c              | 76 ++++++++++++++++++++++++++++++++
>  fs/netfs/objects.c           |  1 +
>  fs/netfs/stats.c             |  4 +-
>  fs/netfs/write_collect.c     | 84 +++++++++++++++++++-----------------
>  fs/netfs/write_issue.c       | 28 ++++++------
>  include/linux/netfs.h        |  8 ++--
>  include/trace/events/netfs.h |  1 +
>  8 files changed, 150 insertions(+), 61 deletions(-)

According to git bisect, this commit causes to the following kernel
splat during boot of the system with 9p fs.

#
# Caches
#
CONFIG_NETFS_SUPPORT=y
# CONFIG_NETFS_STATS is not set
# CONFIG_NETFS_DEBUG is not set
# CONFIG_FSCACHE is not set
# end of Caches

...
CONFIG_9P_FS=y
...

[    1.510725][    T1] Run /sbin/init as init process
[    1.510937][    T1]   with arguments:
[    1.511060][    T1]     /sbin/init
[    1.511233][    T1]   with environment:
[    1.511332][    T1]     HOME=/
[    1.511448][    T1]     TERM=linux
[    1.516066][    T1] page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x6ce48
[    1.516920][    T1] flags: 0x4000000000000000(zone=1)
[    1.517112][    T1] raw: 4000000000000000 ffffea0001b39248 ffffea00001583c8 0000000000000000
[    1.517374][    T1] raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
[    1.517767][    T1] page dumped because: VM_BUG_ON_FOLIO(((unsigned int) folio_ref_count(folio) + 127u <= 127u))
[    1.518144][    T1] ------------[ cut here ]------------
[    1.518311][    T1] kernel BUG at include/linux/mm.h:1444!
[    1.518488][    T1] Oops: invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN
[    1.518738][    T1] CPU: 1 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.11.0+ #2488
[    1.518990][    T1] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[    1.519325][    T1] RIP: 0010:__iov_iter_get_pages_alloc+0x16d4/0x2210
[    1.519540][    T1] Code: 84 f2 fa ff ff 48 89 ef e8 49 28 98 ff e9 e5 fa ff ff 48 8d 48 ff e9 2c fe ff ff 48 c7 c6 20 ee 21 83 48 89 cf e8 7c 2d 8a ff <0f> 0b 48 b8 00 00 00 00 00 fc ff df 4c 8b 74 24 68 44 8b 5c 24 30
[    1.520110][    T1] RSP: 0000:ffff8880060f6e40 EFLAGS: 00010286
[    1.520317][    T1] RAX: 000000000000005c RBX: ffffea0001b39234 RCX: 0000000000000000
[    1.520547][    T1] RDX: 000000000000005c RSI: 0000000000000004 RDI: ffffed1000c1edbb
[    1.520776][    T1] RBP: dffffc0000000000 R08: 0000000000000000 R09: fffffbfff0718ce0
[    1.521027][    T1] R10: 0000000000000003 R11: 0000000000000001 R12: ffff8880065bd7e0                                                                                                                                  12:43:45 [122/
[    1.521252][    T1] R13: ffff888006644000 R14: 0000000000000002 R15: 0000000000001000
[    1.521475][    T1] FS:  0000000000000000(0000) GS:ffff88806ce80000(0000) knlGS:0000000000000000
[    1.521761][    T1] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    1.521961][    T1] CR2: 0000000000000000 CR3: 0000000003881001 CR4: 0000000000370eb0
[    1.522200][    T1] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    1.522418][    T1] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[    1.522636][    T1] Call Trace:
[    1.522750][    T1]  <TASK>
[    1.522823][    T1]  ? __die+0x52/0x8f
[    1.522939][    T1]  ? die+0x2a/0x50
[    1.523061][    T1]  ? do_trap+0x1d9/0x2c0
[    1.523163][    T1]  ? __iov_iter_get_pages_alloc+0x16d4/0x2210
[    1.523334][    T1]  ? do_error_trap+0xa3/0x160
[    1.523465][    T1]  ? __iov_iter_get_pages_alloc+0x16d4/0x2210
[    1.523633][    T1]  ? handle_invalid_op+0x2c/0x30
[    1.523765][    T1]  ? __iov_iter_get_pages_alloc+0x16d4/0x2210
[    1.523942][    T1]  ? exc_invalid_op+0x29/0x40
[    1.524087][    T1]  ? asm_exc_invalid_op+0x16/0x20
[    1.524238][    T1]  ? __iov_iter_get_pages_alloc+0x16d4/0x2210
[    1.524426][    T1]  ? iov_iter_extract_pages+0x1ee0/0x1ee0
[    1.524575][    T1]  ? radix_tree_node_alloc.constprop.0+0x16a/0x2c0
[    1.524762][    T1]  ? lock_acquire+0xe2/0x500
[    1.524916][    T1]  ? mark_lock+0xfc/0x2dc0
[    1.525071][    T1]  iov_iter_get_pages_alloc2+0x3d/0xe0
[    1.525208][    T1]  ? print_usage_bug.part.0+0x600/0x600
[    1.525392][    T1]  p9_get_mapped_pages.part.0.constprop.0+0x3bf/0x6c0
[    1.525595][    T1]  ? p9pdu_vwritef+0x320/0x1f20
[    1.525756][    T1]  ? p9_virtio_request+0x550/0x550
[    1.525918][    T1]  ? pdu_read+0xc0/0xc0
[    1.526056][    T1]  ? lock_release+0x220/0x780
[    1.526218][    T1]  ? pdu_read+0xc0/0xc0
[    1.526341][    T1]  p9_virtio_zc_request+0x728/0x1020
[    1.526501][    T1]  ? p9pdu_vwritef+0x320/0x1f20
[    1.526662][    T1]  ? p9_virtio_probe+0xa20/0xa20
[    1.526824][    T1]  ? netfs_read_to_pagecache+0x601/0xd50
[    1.526990][    T1]  ? mark_lock+0xfc/0x2dc0
[    1.527159][    T1]  ? p9pdu_finalize+0xdc/0x1d0
[    1.527321][    T1]  ? p9_client_prepare_req+0x235/0x360
[    1.527483][    T1]  ? p9_tag_alloc+0x6e0/0x6e0
[    1.527644][    T1]  ? lock_release+0x220/0x780
[    1.527806][    T1]  p9_client_zc_rpc.constprop.0+0x236/0x7d0
[    1.528013][    T1]  ? __create_object+0x5e/0x80
[    1.528175][    T1]  ? p9_client_flush.isra.0+0x390/0x390
[    1.528345][    T1]  ? lockdep_hardirqs_on_prepare+0x268/0x3e0
[    1.528544][    T1]  ? __call_rcu_common.constprop.0+0x475/0xc80
[    1.528785][    T1]  ? p9_req_put+0x17a/0x200
[    1.528944][    T1]  p9_client_read_once+0x343/0x840
[    1.529114][    T1]  ? p9_client_getlock_dotl+0x3c0/0x3c0
[    1.529274][    T1]  p9_client_read+0xf1/0x150
[    1.529440][    T1]  v9fs_issue_read+0x107/0x2c0
[    1.529608][    T1]  ? v9fs_issue_write+0x140/0x140
[    1.529736][    T1]  netfs_read_to_pagecache+0x601/0xd50
[    1.529858][    T1]  netfs_readahead+0x6af/0xbe0
[    1.530000][    T1]  read_pages+0x17b/0xaf0
[    1.530136][    T1]  ? lru_move_tail+0x8f0/0x8f0
[    1.530299][    T1]  ? file_ra_state_init+0xd0/0xd0
[    1.530479][    T1]  page_cache_ra_unbounded+0x324/0x5f0
[    1.530638][    T1]  filemap_get_pages+0x597/0x16b0
[    1.530801][    T1]  ? filemap_add_folio+0x140/0x140
[    1.530957][    T1]  ? lock_is_held_type+0x81/0xe0
[    1.531121][    T1]  filemap_read+0x2ec/0xa90
[    1.531282][    T1]  ? filemap_get_pages+0x16b0/0x16b0
[    1.531443][    T1]  ? 0xffffffff81000000
[    1.531565][    T1]  ? find_held_lock+0x2d/0x110
[    1.531720][    T1]  ? lock_is_held_type+0x81/0xe0
[    1.531888][    T1]  ? down_read_interruptible+0x1f6/0x490
[    1.532062][    T1]  ? down_read+0x450/0x450
[    1.532229][    T1]  ? lockdep_hardirqs_on_prepare+0x3e0/0x3e0
[    1.532435][    T1]  ? find_held_lock+0x2d/0x110
[    1.532594][    T1]  netfs_buffered_read_iter+0xe2/0x130
[    1.532755][    T1]  ? netfs_file_read_iter+0xb2/0x130
[    1.532904][    T1]  __kernel_read+0x2db/0x8a0
[    1.533066][    T1]  ? __x64_sys_lseek+0x1d0/0x1d0
[    1.533221][    T1]  bprm_execve+0x548/0x1410
[    1.533381][    T1]  ? setup_arg_pages+0xb40/0xb40
[    1.533534][    T1]  ? __cond_resched+0x17/0x70
[    1.533684][    T1]  kernel_execve+0x26a/0x2f0
[    1.533808][    T1]  try_to_run_init_process+0xf/0x30
[    1.533933][    T1]  ? rest_init+0x1b0/0x1b0
[    1.534064][    T1]  kernel_init+0xe2/0x140
[    1.534160][    T1]  ? _raw_spin_unlock_irq+0x24/0x30
[    1.534285][    T1]  ret_from_fork+0x2d/0x70
[    1.534415][    T1]  ? rest_init+0x1b0/0x1b0
[    1.534558][    T1]  ret_from_fork_asm+0x11/0x20
[    1.534730][    T1]  </TASK>
[    1.534858][    T1] Modules linked in:
[    1.535016][    T1] ---[ end trace 0000000000000000 ]---
[    1.535173][    T1] RIP: 0010:__iov_iter_get_pages_alloc+0x16d4/0x2210
[    1.535385][    T1] Code: 84 f2 fa ff ff 48 89 ef e8 49 28 98 ff e9 e5 fa ff ff 48 8d 48 ff e9 2c fe ff ff 48 c7 c6 20 ee 21 83 48 89 cf e8 7c 2d 8a ff <0f> 0b 48 b8 00 00 00 00 00 fc ff df 4c 8b 74 24 68 44 8b 5c 24 30
[    1.535967][    T1] RSP: 0000:ffff8880060f6e40 EFLAGS: 00010286
[    1.536183][    T1] RAX: 000000000000005c RBX: ffffea0001b39234 RCX: 0000000000000000
[    1.536426][    T1] RDX: 000000000000005c RSI: 0000000000000004 RDI: ffffed1000c1edbb
[    1.536667][    T1] RBP: dffffc0000000000 R08: 0000000000000000 R09: fffffbfff0718ce0
[    1.536914][    T1] R10: 0000000000000003 R11: 0000000000000001 R12: ffff8880065bd7e0
[    1.537163][    T1] R13: ffff888006644000 R14: 0000000000000002 R15: 0000000000001000
[    1.537409][    T1] FS:  0000000000000000(0000) GS:ffff88806ce80000(0000) knlGS:0000000000000000
[    1.537842][    T1] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    1.538035][    T1] CR2: 0000000000000000 CR3: 0000000003881001 CR4: 0000000000370eb0
[    1.538281][    T1] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    1.538519][    T1] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[    1.538779][    T1] ------------[ cut here ]------------
[    1.538904][    T1] WARNING: CPU: 1 PID: 1 at kernel/exit.c:886 do_exit+0x17c4/0x23a0
[    1.539110][    T1] Modules linked in:
[    1.539229][    T1] CPU: 1 UID: 0 PID: 1 Comm: swapper/0 Tainted: G      D            6.11.0+ #2488
[    1.539459][    T1] Tainted: [D]=DIE
[    1.539567][    T1] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/2014
[    1.539860][    T1] RIP: 0010:do_exit+0x17c4/0x23a0
[    1.540008][    T1] Code: bb 68 04 00 00 31 f6 e8 5a 92 ff ff e9 d6 f8 ff ff 4c 89 fe bf 05 06 00 00 e8 c8 53 02 00 e9 5c ec ff ff 0f 0b e9 b7 e8 ff ff <0f> 0b e9 27 ea ff ff 48 89 df e8 ad 90 ff ff 48 85 c0 49 89 c7 0f
[    1.540502][    T1] RSP: 0000:ffff8880060f7e68 EFLAGS: 00010286
[    1.540657][    T1] RAX: dffffc0000000000 RBX: ffff8880060e8000 RCX: 1ffffffff07aebdf
[    1.540860][    T1] RDX: 1ffff11000c1d20b RSI: 0000000000000008 RDI: ffff8880060e9058
[    1.541078][    T1] RBP: ffff8880060e8708 R08: 0000000000000000 R09: fffffbfff07ae5c1
[    1.541261][    T1] R10: 0000000000000000 R11: 0000000000000001 R12: ffff888006108000
[    1.541437][    T1] R13: ffff8880060e8710 R14: ffff888006100000 R15: 000000000000000b
[    1.541645][    T1] FS:  0000000000000000(0000) GS:ffff88806ce80000(0000) knlGS:0000000000000000
[    1.541875][    T1] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    1.542043][    T1] CR2: 0000000000000000 CR3: 0000000003881001 CR4: 0000000000370eb0
[    1.542237][    T1] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    1.542432][    T1] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[    1.542622][    T1] Call Trace:
[    1.542733][    T1]  <TASK>
[    1.542801][    T1]  ? __warn.cold+0x5f/0x1ed
[    1.542930][    T1]  ? do_exit+0x17c4/0x23a0
[    1.543063][    T1]  ? report_bug+0x1e6/0x290
[    1.543190][    T1]  ? handle_bug+0x4f/0x90
[    1.543290][    T1]  ? exc_invalid_op+0x13/0x40
[    1.543418][    T1]  ? asm_exc_invalid_op+0x16/0x20
[    1.543545][    T1]  ? do_exit+0x17c4/0x23a0
[    1.543676][    T1]  ? do_exit+0x1c2/0x23a0
[    1.543774][    T1]  ? __cond_resched+0x17/0x70
[    1.543904][    T1]  ? is_current_pgrp_orphaned+0x90/0x90
[    1.544040][    T1]  ? kernel_execve+0x26a/0x2f0
[    1.544169][    T1]  ? __iov_iter_get_pages_alloc+0x16d4/0x2210
[    1.544329][    T1]  make_task_dead+0xf0/0x110
[    1.544462][    T1]  rewind_stack_and_make_dead+0x16/0x20
[    1.544595][    T1] RIP: 0000:0x0
[    1.544708][    T1] Code: Unable to access opcode bytes at 0xffffffffffffffd6.
[    1.544903][    T1] RSP: 0000:0000000000000000 EFLAGS: 00000000 ORIG_RAX: 0000000000000000
[    1.545098][    T1] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
[    1.545286][    T1] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
[    1.545489][    T1] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
[    1.545699][    T1] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
[    1.545898][    T1] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
[    1.546099][    T1]  </TASK>
[    1.546198][    T1] Kernel panic - not syncing: kernel: panic_on_warn set ...
[    1.546654][    T1] Kernel Offset: disabled
[    1.546769][    T1] ---[ end Kernel panic - not syncing: kernel: panic_on_warn set ... ]---

Thanks
diff mbox series

Patch

diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index f2920b4ee726..e1149e05a5c8 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -7,6 +7,7 @@ 
 
 #include <linux/slab.h>
 #include <linux/seq_file.h>
+#include <linux/folio_queue.h>
 #include <linux/netfs.h>
 #include <linux/fscache.h>
 #include <linux/fscache-cache.h>
@@ -64,6 +65,10 @@  static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {}
 /*
  * misc.c
  */
+int netfs_buffer_append_folio(struct netfs_io_request *rreq, struct folio *folio,
+			      bool needs_put);
+struct folio_queue *netfs_delete_buffer_head(struct netfs_io_request *wreq);
+void netfs_clear_buffer(struct netfs_io_request *rreq);
 
 /*
  * objects.c
@@ -120,6 +125,7 @@  extern atomic_t netfs_n_wh_write_done;
 extern atomic_t netfs_n_wh_write_failed;
 extern atomic_t netfs_n_wb_lock_skip;
 extern atomic_t netfs_n_wb_lock_wait;
+extern atomic_t netfs_n_folioq;
 
 int netfs_stats_show(struct seq_file *m, void *v);
 
@@ -153,7 +159,8 @@  struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
 						loff_t start,
 						enum netfs_io_origin origin);
 void netfs_reissue_write(struct netfs_io_stream *stream,
-			 struct netfs_io_subrequest *subreq);
+			 struct netfs_io_subrequest *subreq,
+			 struct iov_iter *source);
 int netfs_advance_write(struct netfs_io_request *wreq,
 			struct netfs_io_stream *stream,
 			loff_t start, size_t len, bool to_eof);
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 554a1a4615ad..e642e5cacb8d 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -8,6 +8,82 @@ 
 #include <linux/swap.h>
 #include "internal.h"
 
+/*
+ * Append a folio to the rolling queue.
+ */
+int netfs_buffer_append_folio(struct netfs_io_request *rreq, struct folio *folio,
+			      bool needs_put)
+{
+	struct folio_queue *tail = rreq->buffer_tail;
+	unsigned int slot, order = folio_order(folio);
+
+	if (WARN_ON_ONCE(!rreq->buffer && tail) ||
+	    WARN_ON_ONCE(rreq->buffer && !tail))
+		return -EIO;
+
+	if (!tail || folioq_full(tail)) {
+		tail = kmalloc(sizeof(*tail), GFP_NOFS);
+		if (!tail)
+			return -ENOMEM;
+		netfs_stat(&netfs_n_folioq);
+		folioq_init(tail);
+		tail->prev = rreq->buffer_tail;
+		if (tail->prev)
+			tail->prev->next = tail;
+		rreq->buffer_tail = tail;
+		if (!rreq->buffer) {
+			rreq->buffer = tail;
+			iov_iter_folio_queue(&rreq->io_iter, ITER_SOURCE, tail, 0, 0, 0);
+		}
+		rreq->buffer_tail_slot = 0;
+	}
+
+	rreq->io_iter.count += PAGE_SIZE << order;
+
+	slot = folioq_append(tail, folio);
+	/* Store the counter after setting the slot. */
+	smp_store_release(&rreq->buffer_tail_slot, slot);
+	return 0;
+}
+
+/*
+ * Delete the head of a rolling queue.
+ */
+struct folio_queue *netfs_delete_buffer_head(struct netfs_io_request *wreq)
+{
+	struct folio_queue *head = wreq->buffer, *next = head->next;
+
+	if (next)
+		next->prev = NULL;
+	netfs_stat_d(&netfs_n_folioq);
+	kfree(head);
+	wreq->buffer = next;
+	return next;
+}
+
+/*
+ * Clear out a rolling queue.
+ */
+void netfs_clear_buffer(struct netfs_io_request *rreq)
+{
+	struct folio_queue *p;
+
+	while ((p = rreq->buffer)) {
+		rreq->buffer = p->next;
+		for (int slot = 0; slot < folioq_nr_slots(p); slot++) {
+			struct folio *folio = folioq_folio(p, slot);
+			if (!folio)
+				continue;
+			if (folioq_is_marked(p, slot)) {
+				trace_netfs_folio(folio, netfs_folio_trace_put);
+				folio_put(folio);
+			}
+		}
+		netfs_stat_d(&netfs_n_folioq);
+		kfree(p);
+	}
+}
+
 /**
  * netfs_dirty_folio - Mark folio dirty and pin a cache object for writeback
  * @mapping: The mapping the folio belongs to.
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index d6e9785ce7a3..4291cd405fc1 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -141,6 +141,7 @@  static void netfs_free_request(struct work_struct *work)
 		}
 		kvfree(rreq->direct_bv);
 	}
+	netfs_clear_buffer(rreq);
 
 	if (atomic_dec_and_test(&ictx->io_count))
 		wake_up_var(&ictx->io_count);
diff --git a/fs/netfs/stats.c b/fs/netfs/stats.c
index 5fe1c396e24f..5065289f5555 100644
--- a/fs/netfs/stats.c
+++ b/fs/netfs/stats.c
@@ -41,6 +41,7 @@  atomic_t netfs_n_wh_write_done;
 atomic_t netfs_n_wh_write_failed;
 atomic_t netfs_n_wb_lock_skip;
 atomic_t netfs_n_wb_lock_wait;
+atomic_t netfs_n_folioq;
 
 int netfs_stats_show(struct seq_file *m, void *v)
 {
@@ -76,9 +77,10 @@  int netfs_stats_show(struct seq_file *m, void *v)
 		   atomic_read(&netfs_n_wh_write),
 		   atomic_read(&netfs_n_wh_write_done),
 		   atomic_read(&netfs_n_wh_write_failed));
-	seq_printf(m, "Objs   : rr=%u sr=%u wsc=%u\n",
+	seq_printf(m, "Objs   : rr=%u sr=%u foq=%u wsc=%u\n",
 		   atomic_read(&netfs_n_rh_rreq),
 		   atomic_read(&netfs_n_rh_sreq),
+		   atomic_read(&netfs_n_folioq),
 		   atomic_read(&netfs_n_wh_wstream_conflict));
 	seq_printf(m, "WbLock : skip=%u wait=%u\n",
 		   atomic_read(&netfs_n_wb_lock_skip),
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 5f504b03a1e7..1521a23077c3 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -74,42 +74,6 @@  int netfs_folio_written_back(struct folio *folio)
 	return gcount;
 }
 
-/*
- * Get hold of a folio we have under writeback.  We don't want to get the
- * refcount on it.
- */
-static struct folio *netfs_writeback_lookup_folio(struct netfs_io_request *wreq, loff_t pos)
-{
-	XA_STATE(xas, &wreq->mapping->i_pages, pos / PAGE_SIZE);
-	struct folio *folio;
-
-	rcu_read_lock();
-
-	for (;;) {
-		xas_reset(&xas);
-		folio = xas_load(&xas);
-		if (xas_retry(&xas, folio))
-			continue;
-
-		if (!folio || xa_is_value(folio))
-			kdebug("R=%08x: folio %lx (%llx) not present",
-			       wreq->debug_id, xas.xa_index, pos / PAGE_SIZE);
-		BUG_ON(!folio || xa_is_value(folio));
-
-		if (folio == xas_reload(&xas))
-			break;
-	}
-
-	rcu_read_unlock();
-
-	if (WARN_ONCE(!folio_test_writeback(folio),
-		      "R=%08x: folio %lx is not under writeback\n",
-		      wreq->debug_id, folio->index)) {
-		trace_netfs_folio(folio, netfs_folio_trace_not_under_wback);
-	}
-	return folio;
-}
-
 /*
  * Unlock any folios we've finished with.
  */
@@ -117,13 +81,25 @@  static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
 					  unsigned long long collected_to,
 					  unsigned int *notes)
 {
+	struct folio_queue *folioq = wreq->buffer;
+	unsigned int slot = wreq->buffer_head_slot;
+
+	if (slot >= folioq_nr_slots(folioq)) {
+		folioq = netfs_delete_buffer_head(wreq);
+		slot = 0;
+	}
+
 	for (;;) {
 		struct folio *folio;
 		struct netfs_folio *finfo;
 		unsigned long long fpos, fend;
 		size_t fsize, flen;
 
-		folio = netfs_writeback_lookup_folio(wreq, wreq->cleaned_to);
+		folio = folioq_folio(folioq, slot);
+		if (WARN_ONCE(!folio_test_writeback(folio),
+			      "R=%08x: folio %lx is not under writeback\n",
+			      wreq->debug_id, folio->index))
+			trace_netfs_folio(folio, netfs_folio_trace_not_under_wback);
 
 		fpos = folio_pos(folio);
 		fsize = folio_size(folio);
@@ -148,9 +124,25 @@  static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
 		wreq->cleaned_to = fpos + fsize;
 		*notes |= MADE_PROGRESS;
 
+		/* Clean up the head folioq.  If we clear an entire folioq, then
+		 * we can get rid of it provided it's not also the tail folioq
+		 * being filled by the issuer.
+		 */
+		folioq_clear(folioq, slot);
+		slot++;
+		if (slot >= folioq_nr_slots(folioq)) {
+			if (READ_ONCE(wreq->buffer_tail) == folioq)
+				break;
+			folioq = netfs_delete_buffer_head(wreq);
+			slot = 0;
+		}
+
 		if (fpos + fsize >= collected_to)
 			break;
 	}
+
+	wreq->buffer = folioq;
+	wreq->buffer_head_slot = slot;
 }
 
 /*
@@ -181,9 +173,12 @@  static void netfs_retry_write_stream(struct netfs_io_request *wreq,
 			if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
 				break;
 			if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
+				struct iov_iter source = subreq->io_iter;
+
+				iov_iter_revert(&source, subreq->len - source.count);
 				__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
 				netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
-				netfs_reissue_write(stream, subreq);
+				netfs_reissue_write(stream, subreq, &source);
 			}
 		}
 		return;
@@ -193,6 +188,7 @@  static void netfs_retry_write_stream(struct netfs_io_request *wreq,
 
 	do {
 		struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp;
+		struct iov_iter source;
 		unsigned long long start, len;
 		size_t part;
 		bool boundary = false;
@@ -220,6 +216,14 @@  static void netfs_retry_write_stream(struct netfs_io_request *wreq,
 			len += to->len;
 		}
 
+		/* Determine the set of buffers we're going to use.  Each
+		 * subreq gets a subset of a single overall contiguous buffer.
+		 */
+		source = from->io_iter;
+		iov_iter_revert(&source, subreq->len - source.count);
+		iov_iter_advance(&source, from->transferred);
+		source.count = len;
+
 		/* Work through the sublist. */
 		subreq = from;
 		list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
@@ -242,7 +246,7 @@  static void netfs_retry_write_stream(struct netfs_io_request *wreq,
 				boundary = true;
 
 			netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
-			netfs_reissue_write(stream, subreq);
+			netfs_reissue_write(stream, subreq, &source);
 			if (subreq == to)
 				break;
 		}
@@ -309,7 +313,7 @@  static void netfs_retry_write_stream(struct netfs_io_request *wreq,
 				boundary = false;
 			}
 
-			netfs_reissue_write(stream, subreq);
+			netfs_reissue_write(stream, subreq, &source);
 			if (!len)
 				break;
 
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 7880a586343f..a75b62b202c5 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -213,9 +213,11 @@  static void netfs_prepare_write(struct netfs_io_request *wreq,
  * netfs_write_subrequest_terminated() when complete.
  */
 static void netfs_do_issue_write(struct netfs_io_stream *stream,
-				 struct netfs_io_subrequest *subreq)
+				 struct netfs_io_subrequest *subreq,
+				 struct iov_iter *source)
 {
 	struct netfs_io_request *wreq = subreq->rreq;
+	size_t size = subreq->len - subreq->transferred;
 
 	_enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
 
@@ -223,27 +225,20 @@  static void netfs_do_issue_write(struct netfs_io_stream *stream,
 		return netfs_write_subrequest_terminated(subreq, subreq->error, false);
 
 	// TODO: Use encrypted buffer
-	if (test_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags)) {
-		subreq->io_iter = wreq->io_iter;
-		iov_iter_advance(&subreq->io_iter,
-				 subreq->start + subreq->transferred - wreq->start);
-		iov_iter_truncate(&subreq->io_iter,
-				 subreq->len - subreq->transferred);
-	} else {
-		iov_iter_xarray(&subreq->io_iter, ITER_SOURCE, &wreq->mapping->i_pages,
-				subreq->start + subreq->transferred,
-				subreq->len   - subreq->transferred);
-	}
+	subreq->io_iter = *source;
+	iov_iter_advance(source, size);
+	iov_iter_truncate(&subreq->io_iter, size);
 
 	trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
 	stream->issue_write(subreq);
 }
 
 void netfs_reissue_write(struct netfs_io_stream *stream,
-			 struct netfs_io_subrequest *subreq)
+			 struct netfs_io_subrequest *subreq,
+			 struct iov_iter *source)
 {
 	__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
-	netfs_do_issue_write(stream, subreq);
+	netfs_do_issue_write(stream, subreq, source);
 }
 
 static void netfs_issue_write(struct netfs_io_request *wreq,
@@ -257,7 +252,7 @@  static void netfs_issue_write(struct netfs_io_request *wreq,
 
 	if (subreq->start + subreq->len > wreq->start + wreq->submitted)
 		WRITE_ONCE(wreq->submitted, subreq->start + subreq->len - wreq->start);
-	netfs_do_issue_write(stream, subreq);
+	netfs_do_issue_write(stream, subreq, &wreq->io_iter);
 }
 
 /*
@@ -422,6 +417,9 @@  static int netfs_write_folio(struct netfs_io_request *wreq,
 		trace_netfs_folio(folio, netfs_folio_trace_store_plus);
 	}
 
+	/* Attach the folio to the rolling buffer. */
+	netfs_buffer_append_folio(wreq, folio, false);
+
 	/* Move the submission point forward to allow for write-streaming data
 	 * not starting at the front of the page.  We don't do write-streaming
 	 * with the cache as the cache requires DIO alignment.
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index ae4abf121d97..6428be9d99ba 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -38,10 +38,6 @@  static inline void folio_start_private_2(struct folio *folio)
 	folio_set_private_2(folio);
 }
 
-/* Marks used on xarray-based buffers */
-#define NETFS_BUF_PUT_MARK	XA_MARK_0	/* - Page needs putting  */
-#define NETFS_BUF_PAGECACHE_MARK XA_MARK_1	/* - Page needs wb/dirty flag wrangling */
-
 enum netfs_io_source {
 	NETFS_SOURCE_UNKNOWN,
 	NETFS_FILL_WITH_ZEROES,
@@ -232,6 +228,8 @@  struct netfs_io_request {
 	struct netfs_io_stream	io_streams[2];	/* Streams of parallel I/O operations */
 #define NR_IO_STREAMS 2 //wreq->nr_io_streams
 	struct netfs_group	*group;		/* Writeback group being written back */
+	struct folio_queue	*buffer;	/* Head of I/O buffer */
+	struct folio_queue	*buffer_tail;	/* Tail of I/O buffer */
 	struct iov_iter		iter;		/* Unencrypted-side iterator */
 	struct iov_iter		io_iter;	/* I/O (Encrypted-side) iterator */
 	void			*netfs_priv;	/* Private data for the netfs */
@@ -253,6 +251,8 @@  struct netfs_io_request {
 	short			error;		/* 0 or error that occurred */
 	enum netfs_io_origin	origin;		/* Origin of the request */
 	bool			direct_bv_unpin; /* T if direct_bv[] must be unpinned */
+	u8			buffer_head_slot; /* First slot in ->buffer */
+	u8			buffer_tail_slot; /* Next slot in ->buffer_tail */
 	unsigned long long	i_size;		/* Size of the file */
 	unsigned long long	start;		/* Start position */
 	atomic64_t		issued_to;	/* Write issuer folio cursor */
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 47cd11aaccac..4e13774a06e6 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -153,6 +153,7 @@ 
 	EM(netfs_folio_trace_mkwrite,		"mkwrite")	\
 	EM(netfs_folio_trace_mkwrite_plus,	"mkwrite+")	\
 	EM(netfs_folio_trace_not_under_wback,	"!wback")	\
+	EM(netfs_folio_trace_put,		"put")		\
 	EM(netfs_folio_trace_read_gaps,		"read-gaps")	\
 	EM(netfs_folio_trace_redirtied,		"redirtied")	\
 	EM(netfs_folio_trace_store,		"store")	\