@@ -1979,7 +1979,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
btrfs_init_log_ctx(&ctx);
- ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx);
+ ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx);
if (ret < 0) {
/* Fallthrough and commit/free transaction. */
ret = 1;
@@ -94,8 +94,10 @@
#define LOG_WALK_REPLAY_ALL 3
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- int inode_only);
+ struct btrfs_root *root, struct inode *inode,
+ int inode_only,
+ const loff_t start,
+ const loff_t end);
static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 objectid);
@@ -3856,8 +3858,10 @@ process:
* This handles both files and directories.
*/
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- int inode_only)
+ struct btrfs_root *root, struct inode *inode,
+ int inode_only,
+ const loff_t start,
+ const loff_t end)
{
struct btrfs_path *path;
struct btrfs_path *dst_path;
@@ -4050,8 +4054,31 @@ log_extents:
struct extent_map *em, *n;
write_lock(&tree->lock);
- list_for_each_entry_safe(em, n, &tree->modified_extents, list)
+ /*
+ * We can't just remove every em if we're called for a ranged
+ * fsync - that is, one that doesn't cover the whole possible
+ * file range (0 to LLONG_MAX). This is because we can have
+ * em's that fall outside the range we're logging and therefore
+ * their ordered operations haven't completed yet
+ * (btrfs_finish_ordered_io() not invoked yet). This means we
+ * didn't get their respective file extent item in the fs/subvol
+ * tree yet, and need to let the next fast fsync (one which
+ * consults the list of modified extent maps) find the em so
+ * that it logs a matching file extent item and waits for the
+ * respective ordered operation to complete (if it's still
+ * running).
+ *
+ * Removing every em outside the range we're logging would make
+ * the next fast fsync not log their matching file extent items,
+ * therefore making us lose data after a log replay.
+ */
+ list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
+ if (em->mod_start > end)
+ continue;
+ if (em->mod_start + em->mod_len <= start)
+ continue;
list_del_init(&em->list);
+ }
write_unlock(&tree->lock);
}
@@ -4158,7 +4185,10 @@ out:
*/
static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
- struct dentry *parent, int exists_only,
+ struct dentry *parent,
+ const loff_t start,
+ const loff_t end,
+ int exists_only,
struct btrfs_log_ctx *ctx)
{
int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
@@ -4204,7 +4234,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (ret)
goto end_no_trans;
- ret = btrfs_log_inode(trans, root, inode, inode_only);
+ ret = btrfs_log_inode(trans, root, inode, inode_only, start, end);
if (ret)
goto end_trans;
@@ -4232,7 +4262,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
if (BTRFS_I(inode)->generation >
root->fs_info->last_trans_committed) {
- ret = btrfs_log_inode(trans, root, inode, inode_only);
+ ret = btrfs_log_inode(trans, root, inode, inode_only,
+ 0, LLONG_MAX);
if (ret)
goto end_trans;
}
@@ -4266,13 +4297,15 @@ end_no_trans:
*/
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct dentry *dentry,
+ const loff_t start,
+ const loff_t end,
struct btrfs_log_ctx *ctx)
{
struct dentry *parent = dget_parent(dentry);
int ret;
ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent,
- 0, ctx);
+ start, end, 0, ctx);
dput(parent);
return ret;
@@ -4509,6 +4542,7 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
root->fs_info->last_trans_committed))
return 0;
- return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL);
+ return btrfs_log_inode_parent(trans, root, inode, parent, 0,
+ LLONG_MAX, 1, NULL);
}
@@ -59,6 +59,8 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
int btrfs_recover_log_trees(struct btrfs_root *tree_root);
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct dentry *dentry,
+ const loff_t start,
+ const loff_t end,
struct btrfs_log_ctx *ctx);
int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
While we're doing a full fsync (when the inode has the flag BTRFS_INODE_NEEDS_FULL_SYNC set) that is ranged too (covers only a portion of the file), we might have ordered operations that are started before or while we're logging the inode and that fall outside the fsync range. Therefore when a full ranged fsync finishes don't remove every extent map from the list of modified extent maps - as for some of them, that fall outside our fsync range, their respective ordered operation hasn't finished yet, meaning the corresponding file extent item wasn't inserted into the fs/subvol tree yet and therefore we didn't log it, and we must let the next fast fsync (one that checks only the modified list) see this extent map and log a matching file extent item to the log btree and wait for its ordered operation to finish (if it's still ongoing). Signed-off-by: Filipe Manana <fdmanana@suse.com> --- V2: No code change, only updated the changelog and the comment, to make them more clear and accurate. fs/btrfs/file.c | 2 +- fs/btrfs/tree-log.c | 54 +++++++++++++++++++++++++++++++++++++++++++---------- fs/btrfs/tree-log.h | 2 ++ 3 files changed, 47 insertions(+), 11 deletions(-)