Message ID | 20210715033704.692967-102-willy@infradead.org (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Memory folios | expand |
On Thu, Jul 15, 2021 at 04:36:27AM +0100, Matthew Wilcox (Oracle) wrote: > If we write to any page in a folio, we have to mark the entire > folio as dirty, and potentially COW the entire folio, because it'll > all get written back as one unit. > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> > --- > fs/iomap/buffered-io.c | 42 +++++++++++++++++++++--------------------- > 1 file changed, 21 insertions(+), 21 deletions(-) > > diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c > index 7c702d6c2f64..a3fe0d36c739 100644 > --- a/fs/iomap/buffered-io.c > +++ b/fs/iomap/buffered-io.c > @@ -951,23 +951,23 @@ iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, > } > EXPORT_SYMBOL_GPL(iomap_truncate_page); > > -static loff_t > -iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, > - void *data, struct iomap *iomap, struct iomap *srcmap) > +static loff_t iomap_folio_mkwrite_actor(struct inode *inode, loff_t pos, > + loff_t length, void *data, struct iomap *iomap, > + struct iomap *srcmap) > { > - struct page *page = data; > - struct folio *folio = page_folio(page); > + struct folio *folio = data; > int ret; > > if (iomap->flags & IOMAP_F_BUFFER_HEAD) { > - ret = __block_write_begin_int(page, pos, length, NULL, iomap); > + ret = __block_write_begin_int(&folio->page, pos, length, NULL, > + iomap); > if (ret) > return ret; > - block_commit_write(page, 0, length); > + block_commit_write(&folio->page, 0, length); > } else { > - WARN_ON_ONCE(!PageUptodate(page)); > + WARN_ON_ONCE(!folio_test_uptodate(folio)); > iomap_page_create(inode, folio); > - set_page_dirty(page); > + folio_mark_dirty(folio); > } > > return length; > @@ -975,33 +975,33 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, > > vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops) > { > - struct page *page = vmf->page; > + struct folio *folio = page_folio(vmf->page); If before the page fault the folio was a compound 2M page, will the memory manager will have split it into 4k pages before passing it to us? That's a roundabout way of asking if we should expect folio_mkwrite at some point. ;) The conversion looks pretty straightforward though. Reviewed-by: Darrick J. Wong <djwong@kernel.org> --D > struct inode *inode = file_inode(vmf->vma->vm_file); > - unsigned long length; > - loff_t offset; > + size_t length; > + loff_t pos; > ssize_t ret; > > - lock_page(page); > - ret = page_mkwrite_check_truncate(page, inode); > + folio_lock(folio); > + ret = folio_mkwrite_check_truncate(folio, inode); > if (ret < 0) > goto out_unlock; > length = ret; > > - offset = page_offset(page); > + pos = folio_pos(folio); > while (length > 0) { > - ret = iomap_apply(inode, offset, length, > - IOMAP_WRITE | IOMAP_FAULT, ops, page, > - iomap_page_mkwrite_actor); > + ret = iomap_apply(inode, pos, length, > + IOMAP_WRITE | IOMAP_FAULT, ops, folio, > + iomap_folio_mkwrite_actor); > if (unlikely(ret <= 0)) > goto out_unlock; > - offset += ret; > + pos += ret; > length -= ret; > } > > - wait_for_stable_page(page); > + folio_wait_stable(folio); > return VM_FAULT_LOCKED; > out_unlock: > - unlock_page(page); > + folio_unlock(folio); > return block_page_mkwrite_return(ret); > } > EXPORT_SYMBOL_GPL(iomap_page_mkwrite); > -- > 2.30.2 >
On Thu, Jul 15, 2021 at 02:41:06PM -0700, Darrick J. Wong wrote: > > @@ -975,33 +975,33 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, > > > > vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops) > > { > > - struct page *page = vmf->page; > > + struct folio *folio = page_folio(vmf->page); > > If before the page fault the folio was a compound 2M page, will the > memory manager will have split it into 4k pages before passing it to us? > > That's a roundabout way of asking if we should expect folio_mkwrite at > some point. ;) Faults are tricky. For ->fault, we need to know the precise page which the fault occurred on (this detail is handled for you by filemap_fault()). For mkwrite(), the page will not be split, so it's going to be a matter of just marking the entire compound page as dirty (in the head page) and making sure the filesystem is able to write back the entire folio. Yes, there's going to be some write amplification here. I believe this will turn out to be a worthwhile tradeoff. If I'm wrong, we can implement some kind of split-on-fault.
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 7c702d6c2f64..a3fe0d36c739 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -951,23 +951,23 @@ iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, } EXPORT_SYMBOL_GPL(iomap_truncate_page); -static loff_t -iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, - void *data, struct iomap *iomap, struct iomap *srcmap) +static loff_t iomap_folio_mkwrite_actor(struct inode *inode, loff_t pos, + loff_t length, void *data, struct iomap *iomap, + struct iomap *srcmap) { - struct page *page = data; - struct folio *folio = page_folio(page); + struct folio *folio = data; int ret; if (iomap->flags & IOMAP_F_BUFFER_HEAD) { - ret = __block_write_begin_int(page, pos, length, NULL, iomap); + ret = __block_write_begin_int(&folio->page, pos, length, NULL, + iomap); if (ret) return ret; - block_commit_write(page, 0, length); + block_commit_write(&folio->page, 0, length); } else { - WARN_ON_ONCE(!PageUptodate(page)); + WARN_ON_ONCE(!folio_test_uptodate(folio)); iomap_page_create(inode, folio); - set_page_dirty(page); + folio_mark_dirty(folio); } return length; @@ -975,33 +975,33 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length, vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops) { - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vmf->vma->vm_file); - unsigned long length; - loff_t offset; + size_t length; + loff_t pos; ssize_t ret; - lock_page(page); - ret = page_mkwrite_check_truncate(page, inode); + folio_lock(folio); + ret = folio_mkwrite_check_truncate(folio, inode); if (ret < 0) goto out_unlock; length = ret; - offset = page_offset(page); + pos = folio_pos(folio); while (length > 0) { - ret = iomap_apply(inode, offset, length, - IOMAP_WRITE | IOMAP_FAULT, ops, page, - iomap_page_mkwrite_actor); + ret = iomap_apply(inode, pos, length, + IOMAP_WRITE | IOMAP_FAULT, ops, folio, + iomap_folio_mkwrite_actor); if (unlikely(ret <= 0)) goto out_unlock; - offset += ret; + pos += ret; length -= ret; } - wait_for_stable_page(page); + folio_wait_stable(folio); return VM_FAULT_LOCKED; out_unlock: - unlock_page(page); + folio_unlock(folio); return block_page_mkwrite_return(ret); } EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
If we write to any page in a folio, we have to mark the entire folio as dirty, and potentially COW the entire folio, because it'll all get written back as one unit. Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org> --- fs/iomap/buffered-io.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-)