From patchwork Tue Mar 26 19:02:53 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Goldwyn Rodrigues X-Patchwork-Id: 10872071 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 2573F17E6 for ; Tue, 26 Mar 2019 19:03:38 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 182DA28C19 for ; Tue, 26 Mar 2019 19:03:38 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 0BC1F28CFF; Tue, 26 Mar 2019 19:03:38 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=unavailable version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 8C25028C58 for ; Tue, 26 Mar 2019 19:03:37 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732688AbfCZTDg (ORCPT ); Tue, 26 Mar 2019 15:03:36 -0400 Received: from mx2.suse.de ([195.135.220.15]:36998 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1732636AbfCZTDd (ORCPT ); Tue, 26 Mar 2019 15:03:33 -0400 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.220.254]) by mx1.suse.de (Postfix) with ESMTP id 255BDAEE6; Tue, 26 Mar 2019 19:03:31 +0000 (UTC) From: Goldwyn Rodrigues To: linux-btrfs@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org, Goldwyn Rodrigues Subject: [PATCH 07/15] btrfs: add dax write support Date: Tue, 26 Mar 2019 14:02:53 -0500 Message-Id: <20190326190301.32365-8-rgoldwyn@suse.de> X-Mailer: git-send-email 2.16.4 In-Reply-To: <20190326190301.32365-1-rgoldwyn@suse.de> References: <20190326190301.32365-1-rgoldwyn@suse.de> Sender: linux-fsdevel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-fsdevel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP From: Goldwyn Rodrigues IOMAP_F_COW allows to inform the dax code, to first perform a copy which are not page-aligned before performing the write. A new struct btrfs_iomap is passed from iomap_begin() to iomap_end(), which contains all the accounting and locking information for CoW based writes. For writing to a hole, iomap->cow_addr is set to zero. Would this be better handled by a flag or can a valid filesystem block be at offset zero of the device? Signed-off-by: Goldwyn Rodrigues --- fs/btrfs/ctree.h | 6 +++ fs/btrfs/dax.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- fs/btrfs/file.c | 4 +- 3 files changed, 124 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a3543a4a063d..3bcd2a4959c1 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3801,6 +3801,12 @@ int btree_readahead_hook(struct extent_buffer *eb, int err); #ifdef CONFIG_FS_DAX /* dax.c */ ssize_t btrfs_file_dax_read(struct kiocb *iocb, struct iov_iter *to); +ssize_t btrfs_file_dax_write(struct kiocb *iocb, struct iov_iter *from); +#else +static inline ssize_t btrfs_file_dax_write(struct kiocb *iocb, struct iov_iter *from) +{ + return 0; +} #endif /* CONFIG_FS_DAX */ static inline int is_fstree(u64 rootid) diff --git a/fs/btrfs/dax.c b/fs/btrfs/dax.c index bf3d46b0acb6..49619fe3f94f 100644 --- a/fs/btrfs/dax.c +++ b/fs/btrfs/dax.c @@ -9,30 +9,124 @@ #ifdef CONFIG_FS_DAX #include #include +#include #include "ctree.h" #include "btrfs_inode.h" +struct btrfs_iomap { + u64 start; + u64 end; + int nocow; + struct extent_changeset *data_reserved; + struct extent_state *cached_state; +}; + static int btrfs_iomap_begin(struct inode *inode, loff_t pos, loff_t length, unsigned flags, struct iomap *iomap) { struct extent_map *em; struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, pos, length, 0); + + if (flags & IOMAP_WRITE) { + int ret = 0, nocow; + struct extent_map *map = em; + struct btrfs_iomap *bi; + + bi = kzalloc(sizeof(struct btrfs_iomap), GFP_NOFS); + if (!bi) + return -ENOMEM; + + bi->start = round_down(pos, PAGE_SIZE); + bi->end = round_up(pos + length, PAGE_SIZE); + + iomap->private = bi; + + /* Wait for existing ordered extents in range to finish */ + btrfs_wait_ordered_range(inode, bi->start, bi->end - bi->start); + + lock_extent_bits(&BTRFS_I(inode)->io_tree, bi->start, bi->end, &bi->cached_state); + + ret = btrfs_delalloc_reserve_space(inode, &bi->data_reserved, + bi->start, bi->end - bi->start); + if (ret) { + unlock_extent_cached(&BTRFS_I(inode)->io_tree, bi->start, bi->end, + &bi->cached_state); + kfree(bi); + return ret; + } + + refcount_inc(&map->refs); + ret = btrfs_get_extent_map_write(&em, NULL, + inode, bi->start, bi->end - bi->start, &nocow); + if (ret) { + unlock_extent_cached(&BTRFS_I(inode)->io_tree, bi->start, bi->end, + &bi->cached_state); + btrfs_delalloc_release_space(inode, + bi->data_reserved, bi->start, + bi->end - bi->start, true); + extent_changeset_free(bi->data_reserved); + kfree(bi); + return ret; + } + if (!nocow) { + iomap->flags |= IOMAP_F_COW; + if (map->block_start != EXTENT_MAP_HOLE) { + iomap->cow_addr = map->block_start; + iomap->cow_pos = map->start; + } + } else { + bi->nocow = 1; + } + free_extent_map(map); + } + + iomap->offset = em->start; + iomap->length = em->len; + iomap->bdev = em->bdev; + iomap->dax_dev = fs_info->dax_dev; + if (em->block_start == EXTENT_MAP_HOLE) { iomap->type = IOMAP_HOLE; return 0; } + iomap->type = IOMAP_MAPPED; - iomap->bdev = em->bdev; - iomap->dax_dev = fs_info->dax_dev; - iomap->offset = em->start; - iomap->length = em->len; iomap->addr = em->block_start; return 0; } +static int btrfs_iomap_end(struct inode *inode, loff_t pos, + loff_t length, ssize_t written, unsigned flags, + struct iomap *iomap) +{ + struct btrfs_iomap *bi = iomap->private; + u64 wend; + + if (!bi) + return 0; + + unlock_extent_cached(&BTRFS_I(inode)->io_tree, bi->start, bi->end, + &bi->cached_state); + + wend = round_up(pos + written, PAGE_SIZE); + if (wend < bi->end) { + btrfs_delalloc_release_space(inode, + bi->data_reserved, wend, + bi->end - wend, true); + } + + btrfs_update_ordered_extent(inode, bi->start, wend - bi->start, true); + btrfs_delalloc_release_extents(BTRFS_I(inode), wend - bi->start, false); + extent_changeset_free(bi->data_reserved); + kfree(bi); + return 0; +} + static const struct iomap_ops btrfs_iomap_ops = { .iomap_begin = btrfs_iomap_begin, + .iomap_end = btrfs_iomap_end, }; ssize_t btrfs_file_dax_read(struct kiocb *iocb, struct iov_iter *to) @@ -46,4 +140,21 @@ ssize_t btrfs_file_dax_read(struct kiocb *iocb, struct iov_iter *to) return ret; } + +ssize_t btrfs_file_dax_write(struct kiocb *iocb, struct iov_iter *iter) +{ + ssize_t ret = 0; + u64 pos = iocb->ki_pos; + struct inode *inode = file_inode(iocb->ki_filp); + + ret = dax_iomap_rw(iocb, iter, &btrfs_iomap_ops); + + if (ret > 0) { + pos += ret; + if (pos > i_size_read(inode)) + i_size_write(inode, pos); + iocb->ki_pos = pos; + } + return ret; +} #endif /* CONFIG_FS_DAX */ diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b620f4e718b2..3b320d0ab495 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1964,7 +1964,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, if (sync) atomic_inc(&BTRFS_I(inode)->sync_writers); - if (iocb->ki_flags & IOCB_DIRECT) { + if (IS_DAX(inode)) { + num_written = btrfs_file_dax_write(iocb, from); + } else if (iocb->ki_flags & IOCB_DIRECT) { num_written = __btrfs_direct_write(iocb, from); } else { num_written = btrfs_buffered_write(iocb, from);