Message ID | 20191212142717.23656-1-jlayton@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | ceph: don't clear I_NEW until inode metadata is fully populated | expand |
On Thu, 2019-12-12 at 09:27 -0500, Jeff Layton wrote: > Currently, we could have an open-by-handle (or NFS server) call > into the filesystem and start working with an inode before it's > properly filled out. > > Don't clear I_NEW until we have filled out the inode, and discard it > properly if that fails. Note that we occasionally take an extra > reference to the inode to ensure that we don't put the last reference in > discard_new_inode, but rather leave it for ceph_async_iput. > > Signed-off-by: Jeff Layton <jlayton@kernel.org> > --- > fs/ceph/inode.c | 25 +++++++++++++++++++++---- > 1 file changed, 21 insertions(+), 4 deletions(-) > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c > index 5bdc1afc2bee..11672f8192b9 100644 > --- a/fs/ceph/inode.c > +++ b/fs/ceph/inode.c > @@ -55,11 +55,9 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) > inode = iget5_locked(sb, t, ceph_ino_compare, ceph_set_ino_cb, &vino); > if (!inode) > return ERR_PTR(-ENOMEM); > - if (inode->i_state & I_NEW) { > + if (inode->i_state & I_NEW) > dout("get_inode created new inode %p %llx.%llx ino %llx\n", > inode, ceph_vinop(inode), (u64)inode->i_ino); > - unlock_new_inode(inode); > - } > > dout("get_inode on %lu=%llx.%llx got %p\n", inode->i_ino, vino.ino, > vino.snap, inode); > @@ -88,6 +86,10 @@ struct inode *ceph_get_snapdir(struct inode *parent) > inode->i_fop = &ceph_snapdir_fops; > ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */ > ci->i_rbytes = 0; > + > + if (inode->i_state & I_NEW) > + unlock_new_inode(inode); > + > return inode; > } > > @@ -1301,7 +1303,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > err = PTR_ERR(in); > goto done; > } > - req->r_target_inode = in; > > err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL, > session, > @@ -1311,8 +1312,13 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > if (err < 0) { > pr_err("fill_inode badness %p %llx.%llx\n", > in, ceph_vinop(in)); > + if (in->i_state & I_NEW) > + discard_new_inode(in); > goto done; > } > + req->r_target_inode = in; > + if (in->i_state & I_NEW) > + unlock_new_inode(in); > } > > /* > @@ -1496,7 +1502,12 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, > if (rc < 0) { > pr_err("fill_inode badness on %p got %d\n", in, rc); > err = rc; > + ihold(in); > + discard_new_inode(in); > + } else if (in->i_state & I_NEW) { > + unlock_new_inode(in); > } > + > /* avoid calling iput_final() in mds dispatch threads */ > ceph_async_iput(in); > } > @@ -1698,12 +1709,18 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, > if (d_really_is_negative(dn)) { > /* avoid calling iput_final() in mds > * dispatch threads */ > + if (in->i_state & I_NEW) { > + ihold(in); > + discard_new_inode(in); > + } > ceph_async_iput(in); > } Note that I have a related concern about this function. We're gating whether to put a reference or not on whether the dentry is negative. I'm not sure anything guarantees that that won't change between where we took the reference and this point. We might should rework the refcounting there. > d_drop(dn); > err = ret; > goto next_item; > } > + if (in->i_state & I_NEW) > + unlock_new_inode(in); > > if (d_really_is_negative(dn)) { > if (ceph_security_xattr_deadlock(in)) {
On 2019/12/12 22:27, Jeff Layton wrote: > Currently, we could have an open-by-handle (or NFS server) call > into the filesystem and start working with an inode before it's > properly filled out. > > Don't clear I_NEW until we have filled out the inode, and discard it > properly if that fails. Note that we occasionally take an extra > reference to the inode to ensure that we don't put the last reference in > discard_new_inode, but rather leave it for ceph_async_iput. > > Signed-off-by: Jeff Layton <jlayton@kernel.org> > --- > fs/ceph/inode.c | 25 +++++++++++++++++++++---- > 1 file changed, 21 insertions(+), 4 deletions(-) > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c > index 5bdc1afc2bee..11672f8192b9 100644 > --- a/fs/ceph/inode.c > +++ b/fs/ceph/inode.c > @@ -55,11 +55,9 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) > inode = iget5_locked(sb, t, ceph_ino_compare, ceph_set_ino_cb, &vino); > if (!inode) > return ERR_PTR(-ENOMEM); > - if (inode->i_state & I_NEW) { > + if (inode->i_state & I_NEW) > dout("get_inode created new inode %p %llx.%llx ino %llx\n", > inode, ceph_vinop(inode), (u64)inode->i_ino); > - unlock_new_inode(inode); > - } > > dout("get_inode on %lu=%llx.%llx got %p\n", inode->i_ino, vino.ino, > vino.snap, inode); > @@ -88,6 +86,10 @@ struct inode *ceph_get_snapdir(struct inode *parent) > inode->i_fop = &ceph_snapdir_fops; > ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */ > ci->i_rbytes = 0; > + > + if (inode->i_state & I_NEW) > + unlock_new_inode(inode); > + > return inode; > } > > @@ -1301,7 +1303,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > err = PTR_ERR(in); > goto done; > } > - req->r_target_inode = in; > > err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL, > session, > @@ -1311,8 +1312,13 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > if (err < 0) { > pr_err("fill_inode badness %p %llx.%llx\n", > in, ceph_vinop(in)); > + if (in->i_state & I_NEW) > + discard_new_inode(in); > goto done; > } > + req->r_target_inode = in; > + if (in->i_state & I_NEW) > + unlock_new_inode(in); > } > > /* > @@ -1496,7 +1502,12 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, > if (rc < 0) { > pr_err("fill_inode badness on %p got %d\n", in, rc); > err = rc; > + ihold(in); > + discard_new_inode(in); Will it be warning when the inode is not a new one ? > + } else if (in->i_state & I_NEW) { > + unlock_new_inode(in); > } > + Maybe to be simple, we can switch ihold() & discard_new_inode() to unlock_new_inode() instead all over the ceph cold, since we do not care about the I_CREATING state. > /* avoid calling iput_final() in mds dispatch threads */ > ceph_async_iput(in); > } > @@ -1698,12 +1709,18 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, > if (d_really_is_negative(dn)) { > /* avoid calling iput_final() in mds > * dispatch threads */ > + if (in->i_state & I_NEW) { > + ihold(in); > + discard_new_inode(in); > + } > ceph_async_iput(in); > } > d_drop(dn); > err = ret; > goto next_item; > } > + if (in->i_state & I_NEW) > + unlock_new_inode(in); > > if (d_really_is_negative(dn)) { > if (ceph_security_xattr_deadlock(in)) {
On Thu, Dec 12, 2019 at 10:28 PM Jeff Layton <jlayton@kernel.org> wrote: > > Currently, we could have an open-by-handle (or NFS server) call > into the filesystem and start working with an inode before it's > properly filled out. > > Don't clear I_NEW until we have filled out the inode, and discard it > properly if that fails. Note that we occasionally take an extra > reference to the inode to ensure that we don't put the last reference in > discard_new_inode, but rather leave it for ceph_async_iput. > > Signed-off-by: Jeff Layton <jlayton@kernel.org> > --- > fs/ceph/inode.c | 25 +++++++++++++++++++++---- > 1 file changed, 21 insertions(+), 4 deletions(-) > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c > index 5bdc1afc2bee..11672f8192b9 100644 > --- a/fs/ceph/inode.c > +++ b/fs/ceph/inode.c > @@ -55,11 +55,9 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) > inode = iget5_locked(sb, t, ceph_ino_compare, ceph_set_ino_cb, &vino); > if (!inode) > return ERR_PTR(-ENOMEM); > - if (inode->i_state & I_NEW) { > + if (inode->i_state & I_NEW) > dout("get_inode created new inode %p %llx.%llx ino %llx\n", > inode, ceph_vinop(inode), (u64)inode->i_ino); > - unlock_new_inode(inode); > - } > > dout("get_inode on %lu=%llx.%llx got %p\n", inode->i_ino, vino.ino, > vino.snap, inode); > @@ -88,6 +86,10 @@ struct inode *ceph_get_snapdir(struct inode *parent) > inode->i_fop = &ceph_snapdir_fops; > ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */ > ci->i_rbytes = 0; > + > + if (inode->i_state & I_NEW) > + unlock_new_inode(inode); > + > return inode; > } > > @@ -1301,7 +1303,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > err = PTR_ERR(in); > goto done; > } > - req->r_target_inode = in; > > err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL, > session, > @@ -1311,8 +1312,13 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > if (err < 0) { > pr_err("fill_inode badness %p %llx.%llx\n", > in, ceph_vinop(in)); > + if (in->i_state & I_NEW) > + discard_new_inode(in); > goto done; > } > + req->r_target_inode = in; > + if (in->i_state & I_NEW) > + unlock_new_inode(in); > } > > /* > @@ -1496,7 +1502,12 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, > if (rc < 0) { > pr_err("fill_inode badness on %p got %d\n", in, rc); > err = rc; > + ihold(in); > + discard_new_inode(in); no check for I_NEW? > + } else if (in->i_state & I_NEW) { > + unlock_new_inode(in); > } > + > /* avoid calling iput_final() in mds dispatch threads */ > ceph_async_iput(in); > } > @@ -1698,12 +1709,18 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, > if (d_really_is_negative(dn)) { > /* avoid calling iput_final() in mds > * dispatch threads */ > + if (in->i_state & I_NEW) { > + ihold(in); > + discard_new_inode(in); > + } > ceph_async_iput(in); > } > d_drop(dn); > err = ret; > goto next_item; > } > + if (in->i_state & I_NEW) > + unlock_new_inode(in); > > if (d_really_is_negative(dn)) { > if (ceph_security_xattr_deadlock(in)) { > -- > 2.23.0 >
On Tue, 2019-12-17 at 19:59 +0800, Yan, Zheng wrote: > On Thu, Dec 12, 2019 at 10:28 PM Jeff Layton <jlayton@kernel.org> wrote: > > Currently, we could have an open-by-handle (or NFS server) call > > into the filesystem and start working with an inode before it's > > properly filled out. > > > > Don't clear I_NEW until we have filled out the inode, and discard it > > properly if that fails. Note that we occasionally take an extra > > reference to the inode to ensure that we don't put the last reference in > > discard_new_inode, but rather leave it for ceph_async_iput. > > > > Signed-off-by: Jeff Layton <jlayton@kernel.org> > > --- > > fs/ceph/inode.c | 25 +++++++++++++++++++++---- > > 1 file changed, 21 insertions(+), 4 deletions(-) > > > > diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c > > index 5bdc1afc2bee..11672f8192b9 100644 > > --- a/fs/ceph/inode.c > > +++ b/fs/ceph/inode.c > > @@ -55,11 +55,9 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) > > inode = iget5_locked(sb, t, ceph_ino_compare, ceph_set_ino_cb, &vino); > > if (!inode) > > return ERR_PTR(-ENOMEM); > > - if (inode->i_state & I_NEW) { > > + if (inode->i_state & I_NEW) > > dout("get_inode created new inode %p %llx.%llx ino %llx\n", > > inode, ceph_vinop(inode), (u64)inode->i_ino); > > - unlock_new_inode(inode); > > - } > > > > dout("get_inode on %lu=%llx.%llx got %p\n", inode->i_ino, vino.ino, > > vino.snap, inode); > > @@ -88,6 +86,10 @@ struct inode *ceph_get_snapdir(struct inode *parent) > > inode->i_fop = &ceph_snapdir_fops; > > ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */ > > ci->i_rbytes = 0; > > + > > + if (inode->i_state & I_NEW) > > + unlock_new_inode(inode); > > + > > return inode; > > } > > > > @@ -1301,7 +1303,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > > err = PTR_ERR(in); > > goto done; > > } > > - req->r_target_inode = in; > > > > err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL, > > session, > > @@ -1311,8 +1312,13 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) > > if (err < 0) { > > pr_err("fill_inode badness %p %llx.%llx\n", > > in, ceph_vinop(in)); > > + if (in->i_state & I_NEW) > > + discard_new_inode(in); > > goto done; > > } > > + req->r_target_inode = in; > > + if (in->i_state & I_NEW) > > + unlock_new_inode(in); > > } > > > > /* > > @@ -1496,7 +1502,12 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, > > if (rc < 0) { > > pr_err("fill_inode badness on %p got %d\n", in, rc); > > err = rc; > > + ihold(in); > > + discard_new_inode(in); > no check for I_NEW? > Good catch. Those two lines should be inside a check for I_NEW. > > + } else if (in->i_state & I_NEW) { > > + unlock_new_inode(in); > > } > > + > > /* avoid calling iput_final() in mds dispatch threads */ > > ceph_async_iput(in); > > } > > @@ -1698,12 +1709,18 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, > > if (d_really_is_negative(dn)) { > > /* avoid calling iput_final() in mds > > * dispatch threads */ > > + if (in->i_state & I_NEW) { > > + ihold(in); > > + discard_new_inode(in); > > + } > > ceph_async_iput(in); > > } > > d_drop(dn); > > err = ret; > > goto next_item; > > } > > + if (in->i_state & I_NEW) > > + unlock_new_inode(in); > > > > if (d_really_is_negative(dn)) { > > if (ceph_security_xattr_deadlock(in)) { > > -- > > 2.23.0 > >
On 2019/12/17 20:06, Jeff Layton wrote: > On Tue, 2019-12-17 at 19:59 +0800, Yan, Zheng wrote: >> On Thu, Dec 12, 2019 at 10:28 PM Jeff Layton <jlayton@kernel.org> wrote: >>> Currently, we could have an open-by-handle (or NFS server) call >>> into the filesystem and start working with an inode before it's >>> properly filled out. >>> >>> Don't clear I_NEW until we have filled out the inode, and discard it >>> properly if that fails. Note that we occasionally take an extra >>> reference to the inode to ensure that we don't put the last reference in >>> discard_new_inode, but rather leave it for ceph_async_iput. >>> >>> Signed-off-by: Jeff Layton <jlayton@kernel.org> >>> --- >>> fs/ceph/inode.c | 25 +++++++++++++++++++++---- >>> 1 file changed, 21 insertions(+), 4 deletions(-) >>> >>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c >>> index 5bdc1afc2bee..11672f8192b9 100644 >>> --- a/fs/ceph/inode.c >>> +++ b/fs/ceph/inode.c >>> @@ -55,11 +55,9 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) >>> inode = iget5_locked(sb, t, ceph_ino_compare, ceph_set_ino_cb, &vino); >>> if (!inode) >>> return ERR_PTR(-ENOMEM); >>> - if (inode->i_state & I_NEW) { >>> + if (inode->i_state & I_NEW) >>> dout("get_inode created new inode %p %llx.%llx ino %llx\n", >>> inode, ceph_vinop(inode), (u64)inode->i_ino); >>> - unlock_new_inode(inode); >>> - } >>> >>> dout("get_inode on %lu=%llx.%llx got %p\n", inode->i_ino, vino.ino, >>> vino.snap, inode); >>> @@ -88,6 +86,10 @@ struct inode *ceph_get_snapdir(struct inode *parent) >>> inode->i_fop = &ceph_snapdir_fops; >>> ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */ >>> ci->i_rbytes = 0; >>> + >>> + if (inode->i_state & I_NEW) >>> + unlock_new_inode(inode); >>> + >>> return inode; >>> } >>> >>> @@ -1301,7 +1303,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) >>> err = PTR_ERR(in); >>> goto done; >>> } >>> - req->r_target_inode = in; >>> >>> err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL, >>> session, >>> @@ -1311,8 +1312,13 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) >>> if (err < 0) { >>> pr_err("fill_inode badness %p %llx.%llx\n", >>> in, ceph_vinop(in)); >>> + if (in->i_state & I_NEW) >>> + discard_new_inode(in); >>> goto done; >>> } >>> + req->r_target_inode = in; >>> + if (in->i_state & I_NEW) >>> + unlock_new_inode(in); >>> } >>> >>> /* >>> @@ -1496,7 +1502,12 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, >>> if (rc < 0) { >>> pr_err("fill_inode badness on %p got %d\n", in, rc); >>> err = rc; >>> + ihold(in); >>> + discard_new_inode(in); >> no check for I_NEW? >> > Good catch. Those two lines should be inside a check for I_NEW. Without the I_NEW check we may hit the warning in discard_new_inode(). >>> + } else if (in->i_state & I_NEW) { >>> + unlock_new_inode(in); >>> } >>> + >>> /* avoid calling iput_final() in mds dispatch threads */ >>> ceph_async_iput(in); >>> } >>> @@ -1698,12 +1709,18 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, >>> if (d_really_is_negative(dn)) { >>> /* avoid calling iput_final() in mds >>> * dispatch threads */ >>> + if (in->i_state & I_NEW) { >>> + ihold(in); >>> + discard_new_inode(in); >>> + } >>> ceph_async_iput(in); >>> } >>> d_drop(dn); >>> err = ret; >>> goto next_item; >>> } >>> + if (in->i_state & I_NEW) >>> + unlock_new_inode(in); >>> >>> if (d_really_is_negative(dn)) { >>> if (ceph_security_xattr_deadlock(in)) { >>> -- >>> 2.23.0 >>>
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 5bdc1afc2bee..11672f8192b9 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -55,11 +55,9 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) inode = iget5_locked(sb, t, ceph_ino_compare, ceph_set_ino_cb, &vino); if (!inode) return ERR_PTR(-ENOMEM); - if (inode->i_state & I_NEW) { + if (inode->i_state & I_NEW) dout("get_inode created new inode %p %llx.%llx ino %llx\n", inode, ceph_vinop(inode), (u64)inode->i_ino); - unlock_new_inode(inode); - } dout("get_inode on %lu=%llx.%llx got %p\n", inode->i_ino, vino.ino, vino.snap, inode); @@ -88,6 +86,10 @@ struct inode *ceph_get_snapdir(struct inode *parent) inode->i_fop = &ceph_snapdir_fops; ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */ ci->i_rbytes = 0; + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + return inode; } @@ -1301,7 +1303,6 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) err = PTR_ERR(in); goto done; } - req->r_target_inode = in; err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL, session, @@ -1311,8 +1312,13 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) if (err < 0) { pr_err("fill_inode badness %p %llx.%llx\n", in, ceph_vinop(in)); + if (in->i_state & I_NEW) + discard_new_inode(in); goto done; } + req->r_target_inode = in; + if (in->i_state & I_NEW) + unlock_new_inode(in); } /* @@ -1496,7 +1502,12 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, if (rc < 0) { pr_err("fill_inode badness on %p got %d\n", in, rc); err = rc; + ihold(in); + discard_new_inode(in); + } else if (in->i_state & I_NEW) { + unlock_new_inode(in); } + /* avoid calling iput_final() in mds dispatch threads */ ceph_async_iput(in); } @@ -1698,12 +1709,18 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req, if (d_really_is_negative(dn)) { /* avoid calling iput_final() in mds * dispatch threads */ + if (in->i_state & I_NEW) { + ihold(in); + discard_new_inode(in); + } ceph_async_iput(in); } d_drop(dn); err = ret; goto next_item; } + if (in->i_state & I_NEW) + unlock_new_inode(in); if (d_really_is_negative(dn)) { if (ceph_security_xattr_deadlock(in)) {
Currently, we could have an open-by-handle (or NFS server) call into the filesystem and start working with an inode before it's properly filled out. Don't clear I_NEW until we have filled out the inode, and discard it properly if that fails. Note that we occasionally take an extra reference to the inode to ensure that we don't put the last reference in discard_new_inode, but rather leave it for ceph_async_iput. Signed-off-by: Jeff Layton <jlayton@kernel.org> --- fs/ceph/inode.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-)