@@ -552,16 +552,30 @@ static const struct ovl_copy_up_ops ovl_copy_up_tmpfile_ops = {
*/
static int ovl_copy_up_indexdir_aquire(struct ovl_copy_up_ctx *ctx)
{
- /* TODO: handle race of concurrent lower hardlinks copy up */
return ovl_copy_up_start(ctx->dentry);
}
+/*
+ * Set ctx->temp to a positive dentry with the index inode.
+ *
+ * Return 0 if entry was created by us, in which case, we also got
+ * inode_inuse lock and we will release it on copy_up_commit after
+ * copying the inode data.
+ *
+ * Return 1 if we found an index inode, in which case, we do not need
+ * to copy the inode data.
+ *
+ * May return -EEXISTS/-ENOENT/-EBUSY on various races of concurrent
+ * lower hardlinks copy up on the same lower inode.
+ */
static int ovl_copy_up_indexdir_prepare(struct ovl_copy_up_ctx *ctx)
{
+ struct inode *dir = d_inode(ctx->tempdir);
struct dentry *upper;
struct dentry *index;
struct inode *inode;
int err;
+ bool retried = false;
struct cattr cattr = {
/* Can't properly set mode on creation because of the umask */
.mode = ctx->stat->mode & S_IFMT,
@@ -576,6 +590,7 @@ static int ovl_copy_up_indexdir_prepare(struct ovl_copy_up_ctx *ctx)
index = dget(ovl_dentry_index(ctx->dentry));
BUG_ON(!index);
+retry:
inode = d_inode(index);
if (inode) {
/* Another lower hardlink already copied-up? */
@@ -583,18 +598,32 @@ static int ovl_copy_up_indexdir_prepare(struct ovl_copy_up_ctx *ctx)
if ((inode->i_mode & S_IFMT) != cattr.mode)
goto out_dput;
- err = -ENOENT;
+ /*
+ * We need to be carefull not to link with a copy in-progress
+ * index inode. Testing 'inuse' bit with indexdir i_mutex held
+ * protect us from the time of creating the index entry below
+ * to the time of ovl_copy_up_indexdir_commit. We also need to
+ * test i_nlink with indexdir i_mutex held to make sure that
+ * index has not been unlinked by ovl_copy_up_indexdir_cancel,
+ * right after clearing 'inuse' bit.
+ */
+ inode_lock_nested(dir, I_MUTEX_PARENT);
if (!inode->i_nlink)
+ err = -ENOENT;
+ else
+ err = wait_on_inode_inuse(inode, TASK_KILLABLE);
+ inode_unlock(dir);
+ if (err)
goto out_dput;
/*
* Verify that found index is a copy up of lower inode.
* If index inode doesn't point back to lower inode via
- * origin file handle, then this is either an in-progress
- * copy up or leftover from index dir before copying layers.
+ * origin file handle, then this is either a leftover from
+ * failed copy up or an index dir entry before copying layers.
* In both cases, we cannot use this index and must fail the
- * copy up. The in-progress case will return -EEXISTS and the
- * leftover case will return -ESTALE.
+ * copy up. The failed copy up case will return -EEXISTS and
+ * the copying layers case will return -ESTALE.
*/
err = ovl_verify_origin(index, ctx->lowerpath->mnt,
ctx->lowerpath->dentry, false);
@@ -630,11 +659,29 @@ static int ovl_copy_up_indexdir_prepare(struct ovl_copy_up_ctx *ctx)
goto out;
}
- inode_lock_nested(d_inode(ctx->tempdir), I_MUTEX_PARENT);
- err = ovl_create_real(d_inode(ctx->tempdir), index, &cattr, NULL, true);
+ /* Raced on creating index and not found on retry? */
+ err = -ENOENT;
+ if (retried)
+ goto out_dput;
+
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+ err = ovl_create_real(dir, index, &cattr, NULL, true);
if (!err)
ctx->created = true;
- inode_unlock(d_inode(ctx->tempdir));
+ /*
+ * Mark index inode 'inuse' before copying inode data to avoid racing
+ * with copy up of another lower hardlink of the same lower inode.
+ */
+ if (!err && !inode_inuse_trylock(index->d_inode))
+ err = -EBUSY;
+ inode_unlock(dir);
+
+ /* Raced with copy up of another lower hardlink of the same inode? */
+ if (err == -EEXIST) {
+ retried = true;
+ goto retry;
+ }
+
if (err)
goto out_dput;
@@ -679,6 +726,10 @@ static int ovl_copy_up_indexdir_commit(struct ovl_copy_up_ctx *ctx)
}
inode_unlock(d_inode(ctx->upperdir));
+ /* Allow other lower hardlinks to link with our creation */
+ if (ctx->created)
+ inode_inuse_unlock(d_inode(ctx->temp));
+
return err;
}
@@ -696,18 +747,20 @@ static int ovl_copy_up_indexdir_cancel(struct ovl_copy_up_ctx *ctx)
inode_lock_nested(d_inode(ctx->tempdir), I_MUTEX_PARENT);
/*
- * We must not cleanup an already hardlinked index.
+ * We must clear 'inuse' flag before unlink, but we need to take care
+ * because this could allow another copy up to race with this cleanup,
+ * find the unborn index inode that looks like an orphan and link it,
+ * before we unlink the unborn index entry. So before linking an index
+ * inode we must take indexdir i_mutex, wait for 'inuse' flag to be
+ * cleared and test that inode i_nlink is positive.
*/
- if (inode->i_nlink != 1)
- goto out_unlock;
-
- pr_warn_ratelimited("overlayfs: cleanup bad index (%pd2, ino=%lu)\n",
- ctx->temp, inode->i_ino);
+ inode_inuse_unlock(inode);
+ pr_warn_ratelimited("overlayfs: cleanup bad index (%pd2, ino=%lu, nlink=%u)\n",
+ ctx->temp, inode->i_ino, inode->i_nlink);
ovl_cleanup(d_inode(ctx->tempdir), ctx->temp);
/* Bad index inode is still cached in overlay dentry */
d_drop(ctx->dentry);
-out_unlock:
inode_unlock(d_inode(ctx->tempdir));
return 0;
}
Make sure that task B does not link to an index inode created by task A, before task A completed copying the inode data. Task A marks the index inode 'inuse' and task B waits for 'inuse' flag to be cleared. Take care of the race between task A canceling the copy up and unlinking the index inode and task B trying to link to it. Signed-off-by: Amir Goldstein <amir73il@gmail.com> --- fs/overlayfs/copy_up.c | 85 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 69 insertions(+), 16 deletions(-)