diff mbox series

ceph: trigger to flush the buffer when making snapshot

Message ID 20230511100911.361132-1-xiubli@redhat.com (mailing list archive)
State New, archived
Headers show
Series ceph: trigger to flush the buffer when making snapshot | expand

Commit Message

Xiubo Li May 11, 2023, 10:09 a.m. UTC
From: Xiubo Li <xiubli@redhat.com>

The 'i_wr_ref' is used to track the 'Fb' caps, while whenever the 'Fb'
caps is took the kclient will always take the 'Fw' caps at the same
time. That means it will always be a false check in __ceph_finish_cap_snap().

When writing to buffer the kclient will take both 'Fb|Fw' caps and then
write the contents to the buffer pages by increasing the 'i_wrbuffer_ref'
and then just release both 'Fb|Fw'. This is different with the user
space libcephfs, which will keep the 'Fb' being took and use 'i_wr_ref'
instead of 'i_wrbuffer_ref' to track this until the buffer is flushed
to Rados.

We need to defer flushing the capsnap until the corresponding buffer
pages are all flushed to Rados, and at the same time just trigger to
flush the buffer pages immediately.

URL: https://tracker.ceph.com/issues/59343
URL: https://tracker.ceph.com/issues/48640
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/caps.c | 6 ++++++
 fs/ceph/snap.c | 9 ++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

Comments

Milind Changire June 6, 2023, 5:39 a.m. UTC | #1
Looks good to me.

Reviewed-by: Milind Changire <mchangir@redhat.com>

On Thu, May 11, 2023 at 3:43 PM <xiubli@redhat.com> wrote:
>
> From: Xiubo Li <xiubli@redhat.com>
>
> The 'i_wr_ref' is used to track the 'Fb' caps, while whenever the 'Fb'
> caps is took the kclient will always take the 'Fw' caps at the same
> time. That means it will always be a false check in __ceph_finish_cap_snap().
>
> When writing to buffer the kclient will take both 'Fb|Fw' caps and then
> write the contents to the buffer pages by increasing the 'i_wrbuffer_ref'
> and then just release both 'Fb|Fw'. This is different with the user
> space libcephfs, which will keep the 'Fb' being took and use 'i_wr_ref'
> instead of 'i_wrbuffer_ref' to track this until the buffer is flushed
> to Rados.
>
> We need to defer flushing the capsnap until the corresponding buffer
> pages are all flushed to Rados, and at the same time just trigger to
> flush the buffer pages immediately.
>
> URL: https://tracker.ceph.com/issues/59343
> URL: https://tracker.ceph.com/issues/48640
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/caps.c | 6 ++++++
>  fs/ceph/snap.c | 9 ++++++---
>  2 files changed, 12 insertions(+), 3 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 2732f46532ec..feabf4cc0c4f 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -3168,6 +3168,12 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
>         }
>         if (had & CEPH_CAP_FILE_WR) {
>                 if (--ci->i_wr_ref == 0) {
> +                       /*
> +                        * The Fb caps will always be took and released
> +                        * together with the Fw caps.
> +                        */
> +                       WARN_ON_ONCE(ci->i_wb_ref);
> +
>                         last++;
>                         check_flushsnaps = true;
>                         if (ci->i_wrbuffer_ref_head == 0 &&
> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
> index 23b31600ee3c..0e59e95a96d9 100644
> --- a/fs/ceph/snap.c
> +++ b/fs/ceph/snap.c
> @@ -676,14 +676,17 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
>                 return 0;
>         }
>
> -       /* Fb cap still in use, delay it */
> -       if (ci->i_wb_ref) {
> +       /*
> +        * Defer flushing the capsnap if the dirty buffer not flushed yet.
> +        * And trigger to flush the buffer immediately.
> +        */
> +       if (ci->i_wrbuffer_ref) {
>                 dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu "
>                      "used WRBUFFER, delaying\n", __func__, inode,
>                      ceph_vinop(inode), capsnap, capsnap->context,
>                      capsnap->context->seq, ceph_cap_string(capsnap->dirty),
>                      capsnap->size);
> -               capsnap->writing = 1;
> +               ceph_queue_writeback(inode);
>                 return 0;
>         }
>
> --
> 2.40.0
>
diff mbox series

Patch

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 2732f46532ec..feabf4cc0c4f 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3168,6 +3168,12 @@  static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
 	}
 	if (had & CEPH_CAP_FILE_WR) {
 		if (--ci->i_wr_ref == 0) {
+			/*
+			 * The Fb caps will always be took and released
+			 * together with the Fw caps.
+			 */
+			WARN_ON_ONCE(ci->i_wb_ref);
+
 			last++;
 			check_flushsnaps = true;
 			if (ci->i_wrbuffer_ref_head == 0 &&
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 23b31600ee3c..0e59e95a96d9 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -676,14 +676,17 @@  int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
 		return 0;
 	}
 
-	/* Fb cap still in use, delay it */
-	if (ci->i_wb_ref) {
+	/*
+	 * Defer flushing the capsnap if the dirty buffer not flushed yet.
+	 * And trigger to flush the buffer immediately.
+	 */
+	if (ci->i_wrbuffer_ref) {
 		dout("%s %p %llx.%llx cap_snap %p snapc %p %llu %s s=%llu "
 		     "used WRBUFFER, delaying\n", __func__, inode,
 		     ceph_vinop(inode), capsnap, capsnap->context,
 		     capsnap->context->seq, ceph_cap_string(capsnap->dirty),
 		     capsnap->size);
-		capsnap->writing = 1;
+		ceph_queue_writeback(inode);
 		return 0;
 	}