diff mbox series

[02/10] xfs: fix potential log item leak

Message ID 20220503221728.185449-3-david@fromorbit.com (mailing list archive)
State Accepted
Headers show
Series xfs: intent whiteouts | expand

Commit Message

Dave Chinner May 3, 2022, 10:17 p.m. UTC
From: Dave Chinner <dchinner@redhat.com>

Ever since we added shadown format buffers to the log items, log
items need to handle the item being released with shadow buffers
attached. Due to the fact this requirement was added at the same
time we added new rmap/reflink intents, we missed the cleanup of
those items.

In theory, this means shadow buffers can be leaked in a very small
window when a shutdown is initiated. Testing with KASAN shows this
leak does not happen in practice - we haven't identified a single
leak in several years of shutdown testing since ~v4.8 kernels.

However, the intent whiteout cleanup mechanism results in every
cancelled intent in exactly the same state as this tiny race window
creates and so if intents down clean up shadow buffers on final
release we will leak the shadow buffer for just about every intent
we create.

Hence we start with this patch to close this condition off and
ensure that when whiteouts start to be used we don't leak lots of
memory.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_bmap_item.c     | 2 ++
 fs/xfs/xfs_icreate_item.c  | 1 +
 fs/xfs/xfs_refcount_item.c | 2 ++
 fs/xfs/xfs_rmap_item.c     | 2 ++
 4 files changed, 7 insertions(+)

Comments

Allison Henderson May 3, 2022, 10:42 p.m. UTC | #1
On Wed, 2022-05-04 at 08:17 +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> Ever since we added shadown format buffers to the log items, log
> items need to handle the item being released with shadow buffers
> attached. Due to the fact this requirement was added at the same
> time we added new rmap/reflink intents, we missed the cleanup of
> those items.
> 
> In theory, this means shadow buffers can be leaked in a very small
> window when a shutdown is initiated. Testing with KASAN shows this
> leak does not happen in practice - we haven't identified a single
> leak in several years of shutdown testing since ~v4.8 kernels.
> 
> However, the intent whiteout cleanup mechanism results in every
> cancelled intent in exactly the same state as this tiny race window
> creates and so if intents down clean up shadow buffers on final
> release we will leak the shadow buffer for just about every intent
> we create.
> 
> Hence we start with this patch to close this condition off and
> ensure that when whiteouts start to be used we don't leak lots of
> memory.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
Ok, looks good to me
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>

> ---
>  fs/xfs/xfs_bmap_item.c     | 2 ++
>  fs/xfs/xfs_icreate_item.c  | 1 +
>  fs/xfs/xfs_refcount_item.c | 2 ++
>  fs/xfs/xfs_rmap_item.c     | 2 ++
>  4 files changed, 7 insertions(+)
> 
> diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
> index 593ac29cffc7..2c8b686e2a11 100644
> --- a/fs/xfs/xfs_bmap_item.c
> +++ b/fs/xfs/xfs_bmap_item.c
> @@ -39,6 +39,7 @@ STATIC void
>  xfs_bui_item_free(
>  	struct xfs_bui_log_item	*buip)
>  {
> +	kmem_free(buip->bui_item.li_lv_shadow);
>  	kmem_cache_free(xfs_bui_cache, buip);
>  }
>  
> @@ -198,6 +199,7 @@ xfs_bud_item_release(
>  	struct xfs_bud_log_item	*budp = BUD_ITEM(lip);
>  
>  	xfs_bui_release(budp->bud_buip);
> +	kmem_free(budp->bud_item.li_lv_shadow);
>  	kmem_cache_free(xfs_bud_cache, budp);
>  }
>  
> diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
> index 508e184e3b8f..b05314d48176 100644
> --- a/fs/xfs/xfs_icreate_item.c
> +++ b/fs/xfs/xfs_icreate_item.c
> @@ -63,6 +63,7 @@ STATIC void
>  xfs_icreate_item_release(
>  	struct xfs_log_item	*lip)
>  {
> +	kmem_free(ICR_ITEM(lip)->ic_item.li_lv_shadow);
>  	kmem_cache_free(xfs_icreate_cache, ICR_ITEM(lip));
>  }
>  
> diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
> index 0d868c93144d..10474fe389e1 100644
> --- a/fs/xfs/xfs_refcount_item.c
> +++ b/fs/xfs/xfs_refcount_item.c
> @@ -35,6 +35,7 @@ STATIC void
>  xfs_cui_item_free(
>  	struct xfs_cui_log_item	*cuip)
>  {
> +	kmem_free(cuip->cui_item.li_lv_shadow);
>  	if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS)
>  		kmem_free(cuip);
>  	else
> @@ -204,6 +205,7 @@ xfs_cud_item_release(
>  	struct xfs_cud_log_item	*cudp = CUD_ITEM(lip);
>  
>  	xfs_cui_release(cudp->cud_cuip);
> +	kmem_free(cudp->cud_item.li_lv_shadow);
>  	kmem_cache_free(xfs_cud_cache, cudp);
>  }
>  
> diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
> index a22b2d19ef91..6c0b56ebdbe1 100644
> --- a/fs/xfs/xfs_rmap_item.c
> +++ b/fs/xfs/xfs_rmap_item.c
> @@ -35,6 +35,7 @@ STATIC void
>  xfs_rui_item_free(
>  	struct xfs_rui_log_item	*ruip)
>  {
> +	kmem_free(ruip->rui_item.li_lv_shadow);
>  	if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS)
>  		kmem_free(ruip);
>  	else
> @@ -227,6 +228,7 @@ xfs_rud_item_release(
>  	struct xfs_rud_log_item	*rudp = RUD_ITEM(lip);
>  
>  	xfs_rui_release(rudp->rud_ruip);
> +	kmem_free(rudp->rud_item.li_lv_shadow);
>  	kmem_cache_free(xfs_rud_cache, rudp);
>  }
>
Darrick J. Wong May 3, 2022, 10:44 p.m. UTC | #2
On Wed, May 04, 2022 at 08:17:20AM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> Ever since we added shadown format buffers to the log items, log
> items need to handle the item being released with shadow buffers
> attached. Due to the fact this requirement was added at the same
> time we added new rmap/reflink intents, we missed the cleanup of
> those items.
> 
> In theory, this means shadow buffers can be leaked in a very small
> window when a shutdown is initiated. Testing with KASAN shows this
> leak does not happen in practice - we haven't identified a single
> leak in several years of shutdown testing since ~v4.8 kernels.
> 
> However, the intent whiteout cleanup mechanism results in every
> cancelled intent in exactly the same state as this tiny race window
> creates and so if intents down clean up shadow buffers on final
> release we will leak the shadow buffer for just about every intent
> we create.
> 
> Hence we start with this patch to close this condition off and
> ensure that when whiteouts start to be used we don't leak lots of
> memory.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/xfs_bmap_item.c     | 2 ++
>  fs/xfs/xfs_icreate_item.c  | 1 +
>  fs/xfs/xfs_refcount_item.c | 2 ++
>  fs/xfs/xfs_rmap_item.c     | 2 ++
>  4 files changed, 7 insertions(+)
> 
> diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
> index 593ac29cffc7..2c8b686e2a11 100644
> --- a/fs/xfs/xfs_bmap_item.c
> +++ b/fs/xfs/xfs_bmap_item.c
> @@ -39,6 +39,7 @@ STATIC void
>  xfs_bui_item_free(
>  	struct xfs_bui_log_item	*buip)
>  {
> +	kmem_free(buip->bui_item.li_lv_shadow);
>  	kmem_cache_free(xfs_bui_cache, buip);
>  }
>  
> @@ -198,6 +199,7 @@ xfs_bud_item_release(
>  	struct xfs_bud_log_item	*budp = BUD_ITEM(lip);
>  
>  	xfs_bui_release(budp->bud_buip);
> +	kmem_free(budp->bud_item.li_lv_shadow);
>  	kmem_cache_free(xfs_bud_cache, budp);
>  }
>  
> diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
> index 508e184e3b8f..b05314d48176 100644
> --- a/fs/xfs/xfs_icreate_item.c
> +++ b/fs/xfs/xfs_icreate_item.c
> @@ -63,6 +63,7 @@ STATIC void
>  xfs_icreate_item_release(
>  	struct xfs_log_item	*lip)
>  {
> +	kmem_free(ICR_ITEM(lip)->ic_item.li_lv_shadow);

Oh hey, I missed one.  Good catch!

Reviewed-by: Darrick J. Wong <djwong@kernel.org>

--D

>  	kmem_cache_free(xfs_icreate_cache, ICR_ITEM(lip));
>  }
>  
> diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
> index 0d868c93144d..10474fe389e1 100644
> --- a/fs/xfs/xfs_refcount_item.c
> +++ b/fs/xfs/xfs_refcount_item.c
> @@ -35,6 +35,7 @@ STATIC void
>  xfs_cui_item_free(
>  	struct xfs_cui_log_item	*cuip)
>  {
> +	kmem_free(cuip->cui_item.li_lv_shadow);
>  	if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS)
>  		kmem_free(cuip);
>  	else
> @@ -204,6 +205,7 @@ xfs_cud_item_release(
>  	struct xfs_cud_log_item	*cudp = CUD_ITEM(lip);
>  
>  	xfs_cui_release(cudp->cud_cuip);
> +	kmem_free(cudp->cud_item.li_lv_shadow);
>  	kmem_cache_free(xfs_cud_cache, cudp);
>  }
>  
> diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
> index a22b2d19ef91..6c0b56ebdbe1 100644
> --- a/fs/xfs/xfs_rmap_item.c
> +++ b/fs/xfs/xfs_rmap_item.c
> @@ -35,6 +35,7 @@ STATIC void
>  xfs_rui_item_free(
>  	struct xfs_rui_log_item	*ruip)
>  {
> +	kmem_free(ruip->rui_item.li_lv_shadow);
>  	if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS)
>  		kmem_free(ruip);
>  	else
> @@ -227,6 +228,7 @@ xfs_rud_item_release(
>  	struct xfs_rud_log_item	*rudp = RUD_ITEM(lip);
>  
>  	xfs_rui_release(rudp->rud_ruip);
> +	kmem_free(rudp->rud_item.li_lv_shadow);
>  	kmem_cache_free(xfs_rud_cache, rudp);
>  }
>  
> -- 
> 2.35.1
>
Christoph Hellwig May 10, 2022, 12:48 p.m. UTC | #3
Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
diff mbox series

Patch

diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 593ac29cffc7..2c8b686e2a11 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -39,6 +39,7 @@  STATIC void
 xfs_bui_item_free(
 	struct xfs_bui_log_item	*buip)
 {
+	kmem_free(buip->bui_item.li_lv_shadow);
 	kmem_cache_free(xfs_bui_cache, buip);
 }
 
@@ -198,6 +199,7 @@  xfs_bud_item_release(
 	struct xfs_bud_log_item	*budp = BUD_ITEM(lip);
 
 	xfs_bui_release(budp->bud_buip);
+	kmem_free(budp->bud_item.li_lv_shadow);
 	kmem_cache_free(xfs_bud_cache, budp);
 }
 
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
index 508e184e3b8f..b05314d48176 100644
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -63,6 +63,7 @@  STATIC void
 xfs_icreate_item_release(
 	struct xfs_log_item	*lip)
 {
+	kmem_free(ICR_ITEM(lip)->ic_item.li_lv_shadow);
 	kmem_cache_free(xfs_icreate_cache, ICR_ITEM(lip));
 }
 
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 0d868c93144d..10474fe389e1 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -35,6 +35,7 @@  STATIC void
 xfs_cui_item_free(
 	struct xfs_cui_log_item	*cuip)
 {
+	kmem_free(cuip->cui_item.li_lv_shadow);
 	if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS)
 		kmem_free(cuip);
 	else
@@ -204,6 +205,7 @@  xfs_cud_item_release(
 	struct xfs_cud_log_item	*cudp = CUD_ITEM(lip);
 
 	xfs_cui_release(cudp->cud_cuip);
+	kmem_free(cudp->cud_item.li_lv_shadow);
 	kmem_cache_free(xfs_cud_cache, cudp);
 }
 
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index a22b2d19ef91..6c0b56ebdbe1 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -35,6 +35,7 @@  STATIC void
 xfs_rui_item_free(
 	struct xfs_rui_log_item	*ruip)
 {
+	kmem_free(ruip->rui_item.li_lv_shadow);
 	if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS)
 		kmem_free(ruip);
 	else
@@ -227,6 +228,7 @@  xfs_rud_item_release(
 	struct xfs_rud_log_item	*rudp = RUD_ITEM(lip);
 
 	xfs_rui_release(rudp->rud_ruip);
+	kmem_free(rudp->rud_item.li_lv_shadow);
 	kmem_cache_free(xfs_rud_cache, rudp);
 }