diff mbox series

[5/9] ceph: wait for async dir ops to complete before doing synchronous dir ops

Message ID 20190801202605.18172-6-jlayton@kernel.org (mailing list archive)
State New, archived
Headers show
Series ceph: add asynchronous unlink support | expand

Commit Message

Jeff Layton Aug. 1, 2019, 8:26 p.m. UTC
Ensure that we wait on replies from any pending directory operations
involving children before we allow synchronous operations involving
that directory to proceed.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
---
 fs/ceph/dir.c   | 48 ++++++++++++++++++++++++++++++++++++++++++++++--
 fs/ceph/file.c  |  4 ++++
 fs/ceph/super.h |  1 +
 3 files changed, 51 insertions(+), 2 deletions(-)

Comments

Yan, Zheng Jan. 7, 2020, 2:50 p.m. UTC | #1
On Fri, Aug 2, 2019 at 4:26 AM Jeff Layton <jlayton@kernel.org> wrote:
>
> Ensure that we wait on replies from any pending directory operations
> involving children before we allow synchronous operations involving
> that directory to proceed.
>

This patch is not needed because mds does the job.  For current
implementation, we need to make inode operations (getattr/setattr/...)
wait until getting reply for async create.


> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>  fs/ceph/dir.c   | 48 ++++++++++++++++++++++++++++++++++++++++++++++--
>  fs/ceph/file.c  |  4 ++++
>  fs/ceph/super.h |  1 +
>  3 files changed, 51 insertions(+), 2 deletions(-)
>
> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> index aab29f48c62d..35797ff895e7 100644
> --- a/fs/ceph/dir.c
> +++ b/fs/ceph/dir.c
> @@ -1036,6 +1036,38 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
>         return err;
>  }
>
> +int ceph_async_dirop_request_wait(struct inode *inode)
> +{
> +       struct ceph_inode_info *ci = ceph_inode(inode);
> +       struct ceph_mds_request *cur, *req;
> +       int ret = 0;
> +
> +       /* Only applicable for directories */
> +       if (!inode || !S_ISDIR(inode->i_mode))
> +               return 0;
> +retry:
> +       spin_lock(&ci->i_unsafe_lock);
> +       req = NULL;
> +       list_for_each_entry(cur, &ci->i_unsafe_dirops, r_unsafe_dir_item) {
> +               if (!test_bit(CEPH_MDS_R_GOT_UNSAFE, &cur->r_req_flags) &&
> +                   !test_bit(CEPH_MDS_R_GOT_SAFE, &cur->r_req_flags)) {
> +                       req = cur;
> +                       ceph_mdsc_get_request(req);
> +                       break;
> +               }
> +       }
> +       spin_unlock(&ci->i_unsafe_lock);
> +       if (req) {
> +               dout("%s %lx wait on tid %llu\n", __func__, inode->i_ino,
> +                    req->r_tid);
> +               ret = wait_for_completion_killable(&req->r_completion);
> +               ceph_mdsc_put_request(req);
> +               if (!ret)
> +                       goto retry;
> +       }
> +       return ret;
> +}
> +
>  /*
>   * rmdir and unlink are differ only by the metadata op code
>   */
> @@ -1059,6 +1091,12 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
>                         CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
>         } else
>                 goto out;
> +
> +       /* Wait for any requests involving children to get a reply */
> +       err = ceph_async_dirop_request_wait(inode);
> +       if (err)
> +               goto out;
> +
>         req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
>         if (IS_ERR(req)) {
>                 err = PTR_ERR(req);
> @@ -1105,8 +1143,14 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
>             (!ceph_quota_is_same_realm(old_dir, new_dir)))
>                 return -EXDEV;
>
> -       dout("rename dir %p dentry %p to dir %p dentry %p\n",
> -            old_dir, old_dentry, new_dir, new_dentry);
> +       err = ceph_async_dirop_request_wait(d_inode(old_dentry));
> +       if (err)
> +               return err;
> +
> +       err = ceph_async_dirop_request_wait(d_inode(new_dentry));
> +       if (err)
> +               return err;
> +
>         req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
>         if (IS_ERR(req))
>                 return PTR_ERR(req);
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 3c0b5247818f..75bce889305c 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -449,6 +449,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
>              dir, dentry, dentry,
>              d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode);
>
> +       err = ceph_async_dirop_request_wait(dir);
> +       if (err)
> +               return err;
> +
>         if (dentry->d_name.len > NAME_MAX)
>                 return -ENAMETOOLONG;
>
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index a9aa3e358226..77ed6c5900be 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -1113,6 +1113,7 @@ extern int ceph_handle_snapdir(struct ceph_mds_request *req,
>                                struct dentry *dentry, int err);
>  extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
>                                          struct dentry *dentry, int err);
> +extern int ceph_async_dirop_request_wait(struct inode *inode);
>
>  extern void __ceph_dentry_lease_touch(struct ceph_dentry_info *di);
>  extern void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di);
> --
> 2.21.0
>
Yan, Zheng Jan. 7, 2020, 3:45 p.m. UTC | #2
On Tue, Jan 7, 2020 at 10:50 PM Yan, Zheng <ukernel@gmail.com> wrote:
>
> On Fri, Aug 2, 2019 at 4:26 AM Jeff Layton <jlayton@kernel.org> wrote:
> >
> > Ensure that we wait on replies from any pending directory operations
> > involving children before we allow synchronous operations involving
> > that directory to proceed.
> >
>
> This patch is not needed because mds does the job.  For current
> implementation, we need to make inode operations (getattr/setattr/...)
> wait until getting reply for async create.
>

I think about this again. Maybe it's better to do all these waits in MDS

>
> > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > ---
> >  fs/ceph/dir.c   | 48 ++++++++++++++++++++++++++++++++++++++++++++++--
> >  fs/ceph/file.c  |  4 ++++
> >  fs/ceph/super.h |  1 +
> >  3 files changed, 51 insertions(+), 2 deletions(-)
> >
> > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > index aab29f48c62d..35797ff895e7 100644
> > --- a/fs/ceph/dir.c
> > +++ b/fs/ceph/dir.c
> > @@ -1036,6 +1036,38 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
> >         return err;
> >  }
> >
> > +int ceph_async_dirop_request_wait(struct inode *inode)
> > +{
> > +       struct ceph_inode_info *ci = ceph_inode(inode);
> > +       struct ceph_mds_request *cur, *req;
> > +       int ret = 0;
> > +
> > +       /* Only applicable for directories */
> > +       if (!inode || !S_ISDIR(inode->i_mode))
> > +               return 0;
> > +retry:
> > +       spin_lock(&ci->i_unsafe_lock);
> > +       req = NULL;
> > +       list_for_each_entry(cur, &ci->i_unsafe_dirops, r_unsafe_dir_item) {
> > +               if (!test_bit(CEPH_MDS_R_GOT_UNSAFE, &cur->r_req_flags) &&
> > +                   !test_bit(CEPH_MDS_R_GOT_SAFE, &cur->r_req_flags)) {
> > +                       req = cur;
> > +                       ceph_mdsc_get_request(req);
> > +                       break;
> > +               }
> > +       }
> > +       spin_unlock(&ci->i_unsafe_lock);
> > +       if (req) {
> > +               dout("%s %lx wait on tid %llu\n", __func__, inode->i_ino,
> > +                    req->r_tid);
> > +               ret = wait_for_completion_killable(&req->r_completion);
> > +               ceph_mdsc_put_request(req);
> > +               if (!ret)
> > +                       goto retry;
> > +       }
> > +       return ret;
> > +}
> > +
> >  /*
> >   * rmdir and unlink are differ only by the metadata op code
> >   */
> > @@ -1059,6 +1091,12 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
> >                         CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
> >         } else
> >                 goto out;
> > +
> > +       /* Wait for any requests involving children to get a reply */
> > +       err = ceph_async_dirop_request_wait(inode);
> > +       if (err)
> > +               goto out;
> > +
> >         req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
> >         if (IS_ERR(req)) {
> >                 err = PTR_ERR(req);
> > @@ -1105,8 +1143,14 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
> >             (!ceph_quota_is_same_realm(old_dir, new_dir)))
> >                 return -EXDEV;
> >
> > -       dout("rename dir %p dentry %p to dir %p dentry %p\n",
> > -            old_dir, old_dentry, new_dir, new_dentry);
> > +       err = ceph_async_dirop_request_wait(d_inode(old_dentry));
> > +       if (err)
> > +               return err;
> > +
> > +       err = ceph_async_dirop_request_wait(d_inode(new_dentry));
> > +       if (err)
> > +               return err;
> > +
> >         req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
> >         if (IS_ERR(req))
> >                 return PTR_ERR(req);
> > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > index 3c0b5247818f..75bce889305c 100644
> > --- a/fs/ceph/file.c
> > +++ b/fs/ceph/file.c
> > @@ -449,6 +449,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
> >              dir, dentry, dentry,
> >              d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode);
> >
> > +       err = ceph_async_dirop_request_wait(dir);
> > +       if (err)
> > +               return err;
> > +
> >         if (dentry->d_name.len > NAME_MAX)
> >                 return -ENAMETOOLONG;
> >
> > diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> > index a9aa3e358226..77ed6c5900be 100644
> > --- a/fs/ceph/super.h
> > +++ b/fs/ceph/super.h
> > @@ -1113,6 +1113,7 @@ extern int ceph_handle_snapdir(struct ceph_mds_request *req,
> >                                struct dentry *dentry, int err);
> >  extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
> >                                          struct dentry *dentry, int err);
> > +extern int ceph_async_dirop_request_wait(struct inode *inode);
> >
> >  extern void __ceph_dentry_lease_touch(struct ceph_dentry_info *di);
> >  extern void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di);
> > --
> > 2.21.0
> >
Jeff Layton Jan. 7, 2020, 9:43 p.m. UTC | #3
On Tue, 2020-01-07 at 23:45 +0800, Yan, Zheng wrote:
> On Tue, Jan 7, 2020 at 10:50 PM Yan, Zheng <ukernel@gmail.com> wrote:
> > On Fri, Aug 2, 2019 at 4:26 AM Jeff Layton <jlayton@kernel.org> wrote:
> > > Ensure that we wait on replies from any pending directory operations
> > > involving children before we allow synchronous operations involving
> > > that directory to proceed.
> > > 
> > 
> > This patch is not needed because mds does the job.  For current
> > implementation, we need to make inode operations (getattr/setattr/...)
> > wait until getting reply for async create.
> > 
> 
> I think about this again. Maybe it's better to do all these waits in MDS
> 

This patch isn't in the latest set (sent yesterday).

FWIW, the current async dirops patchset I'm playing with is here:

https://github.com/ceph/ceph-client/commits/wip-async-dirops

It's still a bit rough in places, but it seems to be working fairly well
now. I hope to have some good numbers to share in the near future.

> > > Signed-off-by: Jeff Layton <jlayton@kernel.org>
> > > ---
> > >  fs/ceph/dir.c   | 48 ++++++++++++++++++++++++++++++++++++++++++++++--
> > >  fs/ceph/file.c  |  4 ++++
> > >  fs/ceph/super.h |  1 +
> > >  3 files changed, 51 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > > index aab29f48c62d..35797ff895e7 100644
> > > --- a/fs/ceph/dir.c
> > > +++ b/fs/ceph/dir.c
> > > @@ -1036,6 +1036,38 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
> > >         return err;
> > >  }
> > > 
> > > +int ceph_async_dirop_request_wait(struct inode *inode)
> > > +{
> > > +       struct ceph_inode_info *ci = ceph_inode(inode);
> > > +       struct ceph_mds_request *cur, *req;
> > > +       int ret = 0;
> > > +
> > > +       /* Only applicable for directories */
> > > +       if (!inode || !S_ISDIR(inode->i_mode))
> > > +               return 0;
> > > +retry:
> > > +       spin_lock(&ci->i_unsafe_lock);
> > > +       req = NULL;
> > > +       list_for_each_entry(cur, &ci->i_unsafe_dirops, r_unsafe_dir_item) {
> > > +               if (!test_bit(CEPH_MDS_R_GOT_UNSAFE, &cur->r_req_flags) &&
> > > +                   !test_bit(CEPH_MDS_R_GOT_SAFE, &cur->r_req_flags)) {
> > > +                       req = cur;
> > > +                       ceph_mdsc_get_request(req);
> > > +                       break;
> > > +               }
> > > +       }
> > > +       spin_unlock(&ci->i_unsafe_lock);
> > > +       if (req) {
> > > +               dout("%s %lx wait on tid %llu\n", __func__, inode->i_ino,
> > > +                    req->r_tid);
> > > +               ret = wait_for_completion_killable(&req->r_completion);
> > > +               ceph_mdsc_put_request(req);
> > > +               if (!ret)
> > > +                       goto retry;
> > > +       }
> > > +       return ret;
> > > +}
> > > +
> > >  /*
> > >   * rmdir and unlink are differ only by the metadata op code
> > >   */
> > > @@ -1059,6 +1091,12 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
> > >                         CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
> > >         } else
> > >                 goto out;
> > > +
> > > +       /* Wait for any requests involving children to get a reply */
> > > +       err = ceph_async_dirop_request_wait(inode);
> > > +       if (err)
> > > +               goto out;
> > > +
> > >         req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
> > >         if (IS_ERR(req)) {
> > >                 err = PTR_ERR(req);
> > > @@ -1105,8 +1143,14 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
> > >             (!ceph_quota_is_same_realm(old_dir, new_dir)))
> > >                 return -EXDEV;
> > > 
> > > -       dout("rename dir %p dentry %p to dir %p dentry %p\n",
> > > -            old_dir, old_dentry, new_dir, new_dentry);
> > > +       err = ceph_async_dirop_request_wait(d_inode(old_dentry));
> > > +       if (err)
> > > +               return err;
> > > +
> > > +       err = ceph_async_dirop_request_wait(d_inode(new_dentry));
> > > +       if (err)
> > > +               return err;
> > > +
> > >         req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
> > >         if (IS_ERR(req))
> > >                 return PTR_ERR(req);
> > > diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> > > index 3c0b5247818f..75bce889305c 100644
> > > --- a/fs/ceph/file.c
> > > +++ b/fs/ceph/file.c
> > > @@ -449,6 +449,10 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
> > >              dir, dentry, dentry,
> > >              d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode);
> > > 
> > > +       err = ceph_async_dirop_request_wait(dir);
> > > +       if (err)
> > > +               return err;
> > > +
> > >         if (dentry->d_name.len > NAME_MAX)
> > >                 return -ENAMETOOLONG;
> > > 
> > > diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> > > index a9aa3e358226..77ed6c5900be 100644
> > > --- a/fs/ceph/super.h
> > > +++ b/fs/ceph/super.h
> > > @@ -1113,6 +1113,7 @@ extern int ceph_handle_snapdir(struct ceph_mds_request *req,
> > >                                struct dentry *dentry, int err);
> > >  extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
> > >                                          struct dentry *dentry, int err);
> > > +extern int ceph_async_dirop_request_wait(struct inode *inode);
> > > 
> > >  extern void __ceph_dentry_lease_touch(struct ceph_dentry_info *di);
> > >  extern void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di);
> > > --
> > > 2.21.0
> > >
diff mbox series

Patch

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index aab29f48c62d..35797ff895e7 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1036,6 +1036,38 @@  static int ceph_link(struct dentry *old_dentry, struct inode *dir,
 	return err;
 }
 
+int ceph_async_dirop_request_wait(struct inode *inode)
+{
+	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct ceph_mds_request *cur, *req;
+	int ret = 0;
+
+	/* Only applicable for directories */
+	if (!inode || !S_ISDIR(inode->i_mode))
+		return 0;
+retry:
+	spin_lock(&ci->i_unsafe_lock);
+	req = NULL;
+	list_for_each_entry(cur, &ci->i_unsafe_dirops, r_unsafe_dir_item) {
+		if (!test_bit(CEPH_MDS_R_GOT_UNSAFE, &cur->r_req_flags) &&
+		    !test_bit(CEPH_MDS_R_GOT_SAFE, &cur->r_req_flags)) {
+			req = cur;
+			ceph_mdsc_get_request(req);
+			break;
+		}
+	}
+	spin_unlock(&ci->i_unsafe_lock);
+	if (req) {
+		dout("%s %lx wait on tid %llu\n", __func__, inode->i_ino,
+		     req->r_tid);
+		ret = wait_for_completion_killable(&req->r_completion);
+		ceph_mdsc_put_request(req);
+		if (!ret)
+			goto retry;
+	}
+	return ret;
+}
+
 /*
  * rmdir and unlink are differ only by the metadata op code
  */
@@ -1059,6 +1091,12 @@  static int ceph_unlink(struct inode *dir, struct dentry *dentry)
 			CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK;
 	} else
 		goto out;
+
+	/* Wait for any requests involving children to get a reply */
+	err = ceph_async_dirop_request_wait(inode);
+	if (err)
+		goto out;
+
 	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
@@ -1105,8 +1143,14 @@  static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 	    (!ceph_quota_is_same_realm(old_dir, new_dir)))
 		return -EXDEV;
 
-	dout("rename dir %p dentry %p to dir %p dentry %p\n",
-	     old_dir, old_dentry, new_dir, new_dentry);
+	err = ceph_async_dirop_request_wait(d_inode(old_dentry));
+	if (err)
+		return err;
+
+	err = ceph_async_dirop_request_wait(d_inode(new_dentry));
+	if (err)
+		return err;
+
 	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 3c0b5247818f..75bce889305c 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -449,6 +449,10 @@  int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 	     dir, dentry, dentry,
 	     d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode);
 
+	err = ceph_async_dirop_request_wait(dir);
+	if (err)
+		return err;
+
 	if (dentry->d_name.len > NAME_MAX)
 		return -ENAMETOOLONG;
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index a9aa3e358226..77ed6c5900be 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1113,6 +1113,7 @@  extern int ceph_handle_snapdir(struct ceph_mds_request *req,
 			       struct dentry *dentry, int err);
 extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
 					 struct dentry *dentry, int err);
+extern int ceph_async_dirop_request_wait(struct inode *inode);
 
 extern void __ceph_dentry_lease_touch(struct ceph_dentry_info *di);
 extern void __ceph_dentry_dir_lease_touch(struct ceph_dentry_info *di);