Message ID | 20190801202605.18172-6-jlayton@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | ceph: add asynchronous unlink support | expand |
On Fri, Aug 2, 2019 at 4:26 AM Jeff Layton <jlayton@kernel.org> wrote: > > Ensure that we wait on replies from any pending directory operations > involving children before we allow synchronous operations involving > that directory to proceed. > This patch is not needed because mds does the job. For current implementation, we need to make inode operations (getattr/setattr/...) wait until getting reply for async create. > Signed-off-by: Jeff Layton <jlayton@kernel.org> > --- > fs/ceph/dir.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- > fs/ceph/file.c | 4 ++++ > fs/ceph/super.h | 1 + > 3 files changed, 51 insertions(+), 2 deletions(-) > > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c > index aab29f48c62d..35797ff895e7 100644 > --- a/fs/ceph/dir.c > +++ b/fs/ceph/dir.c > @@ -1036,6 +1036,38 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, > return err; > } > > +int ceph_async_dirop_request_wait(struct inode *inode) > +{ > + struct ceph_inode_info *ci = ceph_inode(inode); > + struct ceph_mds_request *cur, *req; > + int ret = 0; > + > + /* Only applicable for directories */ > + if (!inode || !S_ISDIR(inode->i_mode)) > + return 0; > +retry: > + spin_lock(&ci->i_unsafe_lock); > + req = NULL; > + list_for_each_entry(cur, &ci->i_unsafe_dirops, r_unsafe_dir_item) { > + if (!test_bit(CEPH_MDS_R_GOT_UNSAFE, &cur->r_req_flags) && > + !test_bit(CEPH_MDS_R_GOT_SAFE, &cur->r_req_flags)) { > + req = cur; > + ceph_mdsc_get_request(req); > + break; > + } > + } > + spin_unlock(&ci->i_unsafe_lock); > + if (req) { > + dout("%s %lx wait on tid %llu\n", __func__, inode->i_ino, > + req->r_tid); > + ret = wait_for_completion_killable(&req->r_completion); > + ceph_mdsc_put_request(req); > + if (!ret) > + goto retry; > + } > + return ret; > +} > + > /* > * rmdir and unlink are differ only by the metadata op code > */ > @@ -1059,6 +1091,12 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) > CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; > } else > goto out; > + > + /* Wait for any requests involving children to get a reply */ > + err = ceph_async_dirop_request_wait(inode); > + if (err) > + goto out; > + > req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); > if (IS_ERR(req)) { > err = PTR_ERR(req); > @@ -1105,8 +1143,14 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, > (!ceph_quota_is_same_realm(old_dir, new_dir))) > return -EXDEV; > > - dout("rename dir %p dentry %p to dir %p dentry %p\n", > - old_dir, old_dentry, new_dir, new_dentry); > + err = ceph_async_dirop_request_wait(d_inode(old_dentry)); > + if (err) > + return err; > + > + err = ceph_async_dirop_request_wait(d_inode(new_dentry)); > + if (err) > + return err; > + > req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); > if (IS_ERR(req)) > return PTR_ERR(req); > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > index 3c0b5247818f..75bce889305c 100644 > --- a/fs/ceph/file.c > +++ b/fs/ceph/file.c > @@ -449,6 +449,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, > dir, dentry, dentry, > d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode); > > + err = ceph_async_dirop_request_wait(dir); > + if (err) > + return err; > + > if (dentry->d_name.len > NAME_MAX) > return -ENAMETOOLONG; > > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index a9aa3e358226..77ed6c5900be 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -1113,6 +1113,7 @@ extern int ceph_handle_snapdir(struct ceph_mds_request *req, > struct dentry *dentry, int err); > extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, > struct dentry *dentry, int err); > +extern int ceph_async_dirop_request_wait(struct inode *inode); > > extern void __ceph_dentry_lease_touch(struct ceph_dentry_info *di); > extern void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di); > -- > 2.21.0 >
On Tue, Jan 7, 2020 at 10:50 PM Yan, Zheng <ukernel@gmail.com> wrote: > > On Fri, Aug 2, 2019 at 4:26 AM Jeff Layton <jlayton@kernel.org> wrote: > > > > Ensure that we wait on replies from any pending directory operations > > involving children before we allow synchronous operations involving > > that directory to proceed. > > > > This patch is not needed because mds does the job. For current > implementation, we need to make inode operations (getattr/setattr/...) > wait until getting reply for async create. > I think about this again. Maybe it's better to do all these waits in MDS > > > Signed-off-by: Jeff Layton <jlayton@kernel.org> > > --- > > fs/ceph/dir.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- > > fs/ceph/file.c | 4 ++++ > > fs/ceph/super.h | 1 + > > 3 files changed, 51 insertions(+), 2 deletions(-) > > > > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c > > index aab29f48c62d..35797ff895e7 100644 > > --- a/fs/ceph/dir.c > > +++ b/fs/ceph/dir.c > > @@ -1036,6 +1036,38 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, > > return err; > > } > > > > +int ceph_async_dirop_request_wait(struct inode *inode) > > +{ > > + struct ceph_inode_info *ci = ceph_inode(inode); > > + struct ceph_mds_request *cur, *req; > > + int ret = 0; > > + > > + /* Only applicable for directories */ > > + if (!inode || !S_ISDIR(inode->i_mode)) > > + return 0; > > +retry: > > + spin_lock(&ci->i_unsafe_lock); > > + req = NULL; > > + list_for_each_entry(cur, &ci->i_unsafe_dirops, r_unsafe_dir_item) { > > + if (!test_bit(CEPH_MDS_R_GOT_UNSAFE, &cur->r_req_flags) && > > + !test_bit(CEPH_MDS_R_GOT_SAFE, &cur->r_req_flags)) { > > + req = cur; > > + ceph_mdsc_get_request(req); > > + break; > > + } > > + } > > + spin_unlock(&ci->i_unsafe_lock); > > + if (req) { > > + dout("%s %lx wait on tid %llu\n", __func__, inode->i_ino, > > + req->r_tid); > > + ret = wait_for_completion_killable(&req->r_completion); > > + ceph_mdsc_put_request(req); > > + if (!ret) > > + goto retry; > > + } > > + return ret; > > +} > > + > > /* > > * rmdir and unlink are differ only by the metadata op code > > */ > > @@ -1059,6 +1091,12 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) > > CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; > > } else > > goto out; > > + > > + /* Wait for any requests involving children to get a reply */ > > + err = ceph_async_dirop_request_wait(inode); > > + if (err) > > + goto out; > > + > > req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); > > if (IS_ERR(req)) { > > err = PTR_ERR(req); > > @@ -1105,8 +1143,14 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, > > (!ceph_quota_is_same_realm(old_dir, new_dir))) > > return -EXDEV; > > > > - dout("rename dir %p dentry %p to dir %p dentry %p\n", > > - old_dir, old_dentry, new_dir, new_dentry); > > + err = ceph_async_dirop_request_wait(d_inode(old_dentry)); > > + if (err) > > + return err; > > + > > + err = ceph_async_dirop_request_wait(d_inode(new_dentry)); > > + if (err) > > + return err; > > + > > req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); > > if (IS_ERR(req)) > > return PTR_ERR(req); > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > > index 3c0b5247818f..75bce889305c 100644 > > --- a/fs/ceph/file.c > > +++ b/fs/ceph/file.c > > @@ -449,6 +449,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, > > dir, dentry, dentry, > > d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode); > > > > + err = ceph_async_dirop_request_wait(dir); > > + if (err) > > + return err; > > + > > if (dentry->d_name.len > NAME_MAX) > > return -ENAMETOOLONG; > > > > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > > index a9aa3e358226..77ed6c5900be 100644 > > --- a/fs/ceph/super.h > > +++ b/fs/ceph/super.h > > @@ -1113,6 +1113,7 @@ extern int ceph_handle_snapdir(struct ceph_mds_request *req, > > struct dentry *dentry, int err); > > extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, > > struct dentry *dentry, int err); > > +extern int ceph_async_dirop_request_wait(struct inode *inode); > > > > extern void __ceph_dentry_lease_touch(struct ceph_dentry_info *di); > > extern void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di); > > -- > > 2.21.0 > >
On Tue, 2020-01-07 at 23:45 +0800, Yan, Zheng wrote: > On Tue, Jan 7, 2020 at 10:50 PM Yan, Zheng <ukernel@gmail.com> wrote: > > On Fri, Aug 2, 2019 at 4:26 AM Jeff Layton <jlayton@kernel.org> wrote: > > > Ensure that we wait on replies from any pending directory operations > > > involving children before we allow synchronous operations involving > > > that directory to proceed. > > > > > > > This patch is not needed because mds does the job. For current > > implementation, we need to make inode operations (getattr/setattr/...) > > wait until getting reply for async create. > > > > I think about this again. Maybe it's better to do all these waits in MDS > This patch isn't in the latest set (sent yesterday). FWIW, the current async dirops patchset I'm playing with is here: https://github.com/ceph/ceph-client/commits/wip-async-dirops It's still a bit rough in places, but it seems to be working fairly well now. I hope to have some good numbers to share in the near future. > > > Signed-off-by: Jeff Layton <jlayton@kernel.org> > > > --- > > > fs/ceph/dir.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- > > > fs/ceph/file.c | 4 ++++ > > > fs/ceph/super.h | 1 + > > > 3 files changed, 51 insertions(+), 2 deletions(-) > > > > > > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c > > > index aab29f48c62d..35797ff895e7 100644 > > > --- a/fs/ceph/dir.c > > > +++ b/fs/ceph/dir.c > > > @@ -1036,6 +1036,38 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, > > > return err; > > > } > > > > > > +int ceph_async_dirop_request_wait(struct inode *inode) > > > +{ > > > + struct ceph_inode_info *ci = ceph_inode(inode); > > > + struct ceph_mds_request *cur, *req; > > > + int ret = 0; > > > + > > > + /* Only applicable for directories */ > > > + if (!inode || !S_ISDIR(inode->i_mode)) > > > + return 0; > > > +retry: > > > + spin_lock(&ci->i_unsafe_lock); > > > + req = NULL; > > > + list_for_each_entry(cur, &ci->i_unsafe_dirops, r_unsafe_dir_item) { > > > + if (!test_bit(CEPH_MDS_R_GOT_UNSAFE, &cur->r_req_flags) && > > > + !test_bit(CEPH_MDS_R_GOT_SAFE, &cur->r_req_flags)) { > > > + req = cur; > > > + ceph_mdsc_get_request(req); > > > + break; > > > + } > > > + } > > > + spin_unlock(&ci->i_unsafe_lock); > > > + if (req) { > > > + dout("%s %lx wait on tid %llu\n", __func__, inode->i_ino, > > > + req->r_tid); > > > + ret = wait_for_completion_killable(&req->r_completion); > > > + ceph_mdsc_put_request(req); > > > + if (!ret) > > > + goto retry; > > > + } > > > + return ret; > > > +} > > > + > > > /* > > > * rmdir and unlink are differ only by the metadata op code > > > */ > > > @@ -1059,6 +1091,12 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) > > > CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; > > > } else > > > goto out; > > > + > > > + /* Wait for any requests involving children to get a reply */ > > > + err = ceph_async_dirop_request_wait(inode); > > > + if (err) > > > + goto out; > > > + > > > req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); > > > if (IS_ERR(req)) { > > > err = PTR_ERR(req); > > > @@ -1105,8 +1143,14 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, > > > (!ceph_quota_is_same_realm(old_dir, new_dir))) > > > return -EXDEV; > > > > > > - dout("rename dir %p dentry %p to dir %p dentry %p\n", > > > - old_dir, old_dentry, new_dir, new_dentry); > > > + err = ceph_async_dirop_request_wait(d_inode(old_dentry)); > > > + if (err) > > > + return err; > > > + > > > + err = ceph_async_dirop_request_wait(d_inode(new_dentry)); > > > + if (err) > > > + return err; > > > + > > > req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); > > > if (IS_ERR(req)) > > > return PTR_ERR(req); > > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c > > > index 3c0b5247818f..75bce889305c 100644 > > > --- a/fs/ceph/file.c > > > +++ b/fs/ceph/file.c > > > @@ -449,6 +449,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, > > > dir, dentry, dentry, > > > d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode); > > > > > > + err = ceph_async_dirop_request_wait(dir); > > > + if (err) > > > + return err; > > > + > > > if (dentry->d_name.len > NAME_MAX) > > > return -ENAMETOOLONG; > > > > > > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > > > index a9aa3e358226..77ed6c5900be 100644 > > > --- a/fs/ceph/super.h > > > +++ b/fs/ceph/super.h > > > @@ -1113,6 +1113,7 @@ extern int ceph_handle_snapdir(struct ceph_mds_request *req, > > > struct dentry *dentry, int err); > > > extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, > > > struct dentry *dentry, int err); > > > +extern int ceph_async_dirop_request_wait(struct inode *inode); > > > > > > extern void __ceph_dentry_lease_touch(struct ceph_dentry_info *di); > > > extern void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di); > > > -- > > > 2.21.0 > > >
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index aab29f48c62d..35797ff895e7 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1036,6 +1036,38 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, return err; } +int ceph_async_dirop_request_wait(struct inode *inode) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_mds_request *cur, *req; + int ret = 0; + + /* Only applicable for directories */ + if (!inode || !S_ISDIR(inode->i_mode)) + return 0; +retry: + spin_lock(&ci->i_unsafe_lock); + req = NULL; + list_for_each_entry(cur, &ci->i_unsafe_dirops, r_unsafe_dir_item) { + if (!test_bit(CEPH_MDS_R_GOT_UNSAFE, &cur->r_req_flags) && + !test_bit(CEPH_MDS_R_GOT_SAFE, &cur->r_req_flags)) { + req = cur; + ceph_mdsc_get_request(req); + break; + } + } + spin_unlock(&ci->i_unsafe_lock); + if (req) { + dout("%s %lx wait on tid %llu\n", __func__, inode->i_ino, + req->r_tid); + ret = wait_for_completion_killable(&req->r_completion); + ceph_mdsc_put_request(req); + if (!ret) + goto retry; + } + return ret; +} + /* * rmdir and unlink are differ only by the metadata op code */ @@ -1059,6 +1091,12 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; } else goto out; + + /* Wait for any requests involving children to get a reply */ + err = ceph_async_dirop_request_wait(inode); + if (err) + goto out; + req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1105,8 +1143,14 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, (!ceph_quota_is_same_realm(old_dir, new_dir))) return -EXDEV; - dout("rename dir %p dentry %p to dir %p dentry %p\n", - old_dir, old_dentry, new_dir, new_dentry); + err = ceph_async_dirop_request_wait(d_inode(old_dentry)); + if (err) + return err; + + err = ceph_async_dirop_request_wait(d_inode(new_dentry)); + if (err) + return err; + req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); if (IS_ERR(req)) return PTR_ERR(req); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 3c0b5247818f..75bce889305c 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -449,6 +449,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, dir, dentry, dentry, d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode); + err = ceph_async_dirop_request_wait(dir); + if (err) + return err; + if (dentry->d_name.len > NAME_MAX) return -ENAMETOOLONG; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index a9aa3e358226..77ed6c5900be 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1113,6 +1113,7 @@ extern int ceph_handle_snapdir(struct ceph_mds_request *req, struct dentry *dentry, int err); extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, struct dentry *dentry, int err); +extern int ceph_async_dirop_request_wait(struct inode *inode); extern void __ceph_dentry_lease_touch(struct ceph_dentry_info *di); extern void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di);
Ensure that we wait on replies from any pending directory operations involving children before we allow synchronous operations involving that directory to proceed. Signed-off-by: Jeff Layton <jlayton@kernel.org> --- fs/ceph/dir.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- fs/ceph/file.c | 4 ++++ fs/ceph/super.h | 1 + 3 files changed, 51 insertions(+), 2 deletions(-)