ocfs2: retry on ENOSPC if sufficient space in truncate log

Message ID	1466586469-5541-1-git-send-email-zren@suse.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <ocfs2-devel-bounces@oss.oracle.com> From: Eric Ren <zren@suse.com> To: akpm@linux-foundation.org Date: Wed, 22 Jun 2016 17:07:49 +0800 Message-Id: <1466586469-5541-1-git-send-email-zren@suse.com> Cc: mfasheh@suse.com, jack@suse.cz, ocfs2-devel@oss.oracle.com Subject: [Ocfs2-devel] [PATCH] ocfs2: retry on ENOSPC if sufficient space in truncate log Precedence: list MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Sender: ocfs2-devel-bounces@oss.oracle.com Errors-To: ocfs2-devel-bounces@oss.oracle.com

Message ID

1466586469-5541-1-git-send-email-zren@suse.com (mailing list archive)

State

New, archived

Headers

From: Eric Ren <zren@suse.com>
To: akpm@linux-foundation.org
Date: Wed, 22 Jun 2016 17:07:49 +0800
Message-Id: <1466586469-5541-1-git-send-email-zren@suse.com>
Cc: mfasheh@suse.com, jack@suse.cz, ocfs2-devel@oss.oracle.com
Subject: [Ocfs2-devel] [PATCH] ocfs2: retry on ENOSPC if sufficient space in
	truncate log
Precedence: list
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Sender: ocfs2-devel-bounces@oss.oracle.com
Errors-To: ocfs2-devel-bounces@oss.oracle.com

Commit Message

Zhen Ren June 22, 2016, 9:07 a.m. UTC

The testcase "mmaptruncate" in ocfs2 test suite always fails with
ENOSPC error on small volume (say less than 10G). This testcase
creates 2 threads T1/T2 which race to "truncate"/"extend" a same
file repeatedly. Specifically, T1 truncates 1/2 size of a small file
while T2 extend to 100% size. The main bitmap will quickly run out
of space because the "truncate" code prevent truncate log from being
flushed by ocfs2_schedule_truncate_log_flush(osb, 1), while truncate
log may have cached lots of clusters.

So retry to allocate after flushing truncate log when ENOSPC is
returned. And we cannot reuse the deleted blocks before the transaction
committed. Fortunately, we already have a function to do this -
ocfs2_try_to_free_truncate_log(). Just need to remove the "static"
modifier and put it into a right place.

Signed-off-by: Eric Ren <zren@suse.com>
---
 fs/ocfs2/alloc.c    | 37 +++++++++++++++++++++++++++++++++++++
 fs/ocfs2/alloc.h    |  2 ++
 fs/ocfs2/aops.c     | 37 -------------------------------------
 fs/ocfs2/suballoc.c | 17 ++++++++++++++++-
 4 files changed, 55 insertions(+), 38 deletions(-)

Comments

Gang He June 23, 2016, 9:25 a.m. UTC | #1

Reviewed-by: Gang He <ghe@suse.com>

Thanks
Gang


>>> 
> The testcase "mmaptruncate" in ocfs2 test suite always fails with
> ENOSPC error on small volume (say less than 10G). This testcase
> creates 2 threads T1/T2 which race to "truncate"/"extend" a same
> file repeatedly. Specifically, T1 truncates 1/2 size of a small file
> while T2 extend to 100% size. The main bitmap will quickly run out
> of space because the "truncate" code prevent truncate log from being
> flushed by ocfs2_schedule_truncate_log_flush(osb, 1), while truncate
> log may have cached lots of clusters.
> 
> So retry to allocate after flushing truncate log when ENOSPC is
> returned. And we cannot reuse the deleted blocks before the transaction
> committed. Fortunately, we already have a function to do this -
> ocfs2_try_to_free_truncate_log(). Just need to remove the "static"
> modifier and put it into a right place.
> 
> Signed-off-by: Eric Ren <zren@suse.com>
> ---
>  fs/ocfs2/alloc.c    | 37 +++++++++++++++++++++++++++++++++++++
>  fs/ocfs2/alloc.h    |  2 ++
>  fs/ocfs2/aops.c     | 37 -------------------------------------
>  fs/ocfs2/suballoc.c | 17 ++++++++++++++++-
>  4 files changed, 55 insertions(+), 38 deletions(-)
> 
> diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
> index 460c0ce..7dabbc3 100644
> --- a/fs/ocfs2/alloc.c
> +++ b/fs/ocfs2/alloc.c
> @@ -6106,6 +6106,43 @@ void ocfs2_schedule_truncate_log_flush(struct 
> ocfs2_super *osb,
>  	}
>  }
>  
> +/*
> + * Try to flush truncate logs if we can free enough clusters from it.
> + * As for return value, "< 0" means error, "0" no space and "1" means
> + * we have freed enough spaces and let the caller try to allocate again.
> + */
> +int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
> +					unsigned int needed)
> +{
> +	tid_t target;
> +	int ret = 0;
> +	unsigned int truncated_clusters;
> +
> +	inode_lock(osb->osb_tl_inode);
> +	truncated_clusters = osb->truncated_clusters;
> +	inode_unlock(osb->osb_tl_inode);
> +
> +	/*
> +	 * Check whether we can succeed in allocating if we free
> +	 * the truncate log.
> +	 */
> +	if (truncated_clusters < needed)
> +		goto out;
> +
> +	ret = ocfs2_flush_truncate_log(osb);
> +	if (ret) {
> +		mlog_errno(ret);
> +		goto out;
> +	}
> +
> +	if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
> +		jbd2_log_wait_commit(osb->journal->j_journal, target);
> +		ret = 1;
> +	}
> +out:
> +	return ret;
> +}
> +
>  static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
>  				       int slot_num,
>  				       struct inode **tl_inode,
> diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
> index f3dc1b0..4a5152e 100644
> --- a/fs/ocfs2/alloc.h
> +++ b/fs/ocfs2/alloc.h
> @@ -188,6 +188,8 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
>  			      u64 start_blk,
>  			      unsigned int num_clusters);
>  int __ocfs2_flush_truncate_log(struct ocfs2_super *osb);
> +int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
> +				   unsigned int needed);
>  
>  /*
>   * Process local structure which describes the block unlinks done
> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
> index c034edf..1802aef 100644
> --- a/fs/ocfs2/aops.c
> +++ b/fs/ocfs2/aops.c
> @@ -1645,43 +1645,6 @@ static int ocfs2_zero_tail(struct inode *inode, struct 
> buffer_head *di_bh,
>  	return ret;
>  }
>  
> -/*
> - * Try to flush truncate logs if we can free enough clusters from it.
> - * As for return value, "< 0" means error, "0" no space and "1" means
> - * we have freed enough spaces and let the caller try to allocate again.
> - */
> -static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
> -					  unsigned int needed)
> -{
> -	tid_t target;
> -	int ret = 0;
> -	unsigned int truncated_clusters;
> -
> -	inode_lock(osb->osb_tl_inode);
> -	truncated_clusters = osb->truncated_clusters;
> -	inode_unlock(osb->osb_tl_inode);
> -
> -	/*
> -	 * Check whether we can succeed in allocating if we free
> -	 * the truncate log.
> -	 */
> -	if (truncated_clusters < needed)
> -		goto out;
> -
> -	ret = ocfs2_flush_truncate_log(osb);
> -	if (ret) {
> -		mlog_errno(ret);
> -		goto out;
> -	}
> -
> -	if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
> -		jbd2_log_wait_commit(osb->journal->j_journal, target);
> -		ret = 1;
> -	}
> -out:
> -	return ret;
> -}
> -
>  int ocfs2_write_begin_nolock(struct address_space *mapping,
>  			     loff_t pos, unsigned len, ocfs2_write_type_t type,
>  			     struct page **pagep, void **fsdata,
> diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
> index 2f19aee..9f7f3b6 100644
> --- a/fs/ocfs2/suballoc.c
> +++ b/fs/ocfs2/suballoc.c
> @@ -1164,7 +1164,8 @@ static int ocfs2_reserve_clusters_with_limit(struct 
> ocfs2_super *osb,
>  					     int flags,
>  					     struct ocfs2_alloc_context **ac)
>  {
> -	int status;
> +	int status, ret = 0;
> +	int retried = 0;
>  
>  	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
>  	if (!(*ac)) {
> @@ -1189,7 +1190,21 @@ static int ocfs2_reserve_clusters_with_limit(struct 
> ocfs2_super *osb,
>  	}
>  
>  	if (status == -ENOSPC) {
> +retry:
>  		status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
> +		/* Retry if there is sufficient space cached in truncate log */
> +		if (status == -ENOSPC && !retried) {
> +			retried = 1;
> +			ocfs2_inode_unlock((*ac)->ac_inode, 1);
> +			inode_unlock((*ac)->ac_inode);
> +
> +			ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted);
> +			if (ret == 1)
> +				goto retry;
> +
> +			if (ret < 0)
> +				mlog_errno(ret);
> +		}
>  		if (status < 0) {
>  			if (status != -ENOSPC)
>  				mlog_errno(status);
> -- 
> 2.6.6
> 
> 
> _______________________________________________
> Ocfs2-devel mailing list
> Ocfs2-devel@oss.oracle.com 
> https://oss.oracle.com/mailman/listinfo/ocfs2-devel

Joseph Qi June 29, 2016, 3:45 a.m. UTC | #2

Looks good to me, thanks.
Reviewed-by: Joseph Qi <joseph.qi@huawei.com>

On 2016/6/22 17:07, Eric Ren wrote:
> The testcase "mmaptruncate" in ocfs2 test suite always fails with
> ENOSPC error on small volume (say less than 10G). This testcase
> creates 2 threads T1/T2 which race to "truncate"/"extend" a same
> file repeatedly. Specifically, T1 truncates 1/2 size of a small file
> while T2 extend to 100% size. The main bitmap will quickly run out
> of space because the "truncate" code prevent truncate log from being
> flushed by ocfs2_schedule_truncate_log_flush(osb, 1), while truncate
> log may have cached lots of clusters.
> 
> So retry to allocate after flushing truncate log when ENOSPC is
> returned. And we cannot reuse the deleted blocks before the transaction
> committed. Fortunately, we already have a function to do this -
> ocfs2_try_to_free_truncate_log(). Just need to remove the "static"
> modifier and put it into a right place.
> 
> Signed-off-by: Eric Ren <zren@suse.com>
> ---
>  fs/ocfs2/alloc.c    | 37 +++++++++++++++++++++++++++++++++++++
>  fs/ocfs2/alloc.h    |  2 ++
>  fs/ocfs2/aops.c     | 37 -------------------------------------
>  fs/ocfs2/suballoc.c | 17 ++++++++++++++++-
>  4 files changed, 55 insertions(+), 38 deletions(-)
> 
> diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
> index 460c0ce..7dabbc3 100644
> --- a/fs/ocfs2/alloc.c
> +++ b/fs/ocfs2/alloc.c
> @@ -6106,6 +6106,43 @@ void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
>  	}
>  }
>  
> +/*
> + * Try to flush truncate logs if we can free enough clusters from it.
> + * As for return value, "< 0" means error, "0" no space and "1" means
> + * we have freed enough spaces and let the caller try to allocate again.
> + */
> +int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
> +					unsigned int needed)
> +{
> +	tid_t target;
> +	int ret = 0;
> +	unsigned int truncated_clusters;
> +
> +	inode_lock(osb->osb_tl_inode);
> +	truncated_clusters = osb->truncated_clusters;
> +	inode_unlock(osb->osb_tl_inode);
> +
> +	/*
> +	 * Check whether we can succeed in allocating if we free
> +	 * the truncate log.
> +	 */
> +	if (truncated_clusters < needed)
> +		goto out;
> +
> +	ret = ocfs2_flush_truncate_log(osb);
> +	if (ret) {
> +		mlog_errno(ret);
> +		goto out;
> +	}
> +
> +	if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
> +		jbd2_log_wait_commit(osb->journal->j_journal, target);
> +		ret = 1;
> +	}
> +out:
> +	return ret;
> +}
> +
>  static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
>  				       int slot_num,
>  				       struct inode **tl_inode,
> diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
> index f3dc1b0..4a5152e 100644
> --- a/fs/ocfs2/alloc.h
> +++ b/fs/ocfs2/alloc.h
> @@ -188,6 +188,8 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
>  			      u64 start_blk,
>  			      unsigned int num_clusters);
>  int __ocfs2_flush_truncate_log(struct ocfs2_super *osb);
> +int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
> +				   unsigned int needed);
>  
>  /*
>   * Process local structure which describes the block unlinks done
> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
> index c034edf..1802aef 100644
> --- a/fs/ocfs2/aops.c
> +++ b/fs/ocfs2/aops.c
> @@ -1645,43 +1645,6 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
>  	return ret;
>  }
>  
> -/*
> - * Try to flush truncate logs if we can free enough clusters from it.
> - * As for return value, "< 0" means error, "0" no space and "1" means
> - * we have freed enough spaces and let the caller try to allocate again.
> - */
> -static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
> -					  unsigned int needed)
> -{
> -	tid_t target;
> -	int ret = 0;
> -	unsigned int truncated_clusters;
> -
> -	inode_lock(osb->osb_tl_inode);
> -	truncated_clusters = osb->truncated_clusters;
> -	inode_unlock(osb->osb_tl_inode);
> -
> -	/*
> -	 * Check whether we can succeed in allocating if we free
> -	 * the truncate log.
> -	 */
> -	if (truncated_clusters < needed)
> -		goto out;
> -
> -	ret = ocfs2_flush_truncate_log(osb);
> -	if (ret) {
> -		mlog_errno(ret);
> -		goto out;
> -	}
> -
> -	if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
> -		jbd2_log_wait_commit(osb->journal->j_journal, target);
> -		ret = 1;
> -	}
> -out:
> -	return ret;
> -}
> -
>  int ocfs2_write_begin_nolock(struct address_space *mapping,
>  			     loff_t pos, unsigned len, ocfs2_write_type_t type,
>  			     struct page **pagep, void **fsdata,
> diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
> index 2f19aee..9f7f3b6 100644
> --- a/fs/ocfs2/suballoc.c
> +++ b/fs/ocfs2/suballoc.c
> @@ -1164,7 +1164,8 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
>  					     int flags,
>  					     struct ocfs2_alloc_context **ac)
>  {
> -	int status;
> +	int status, ret = 0;
> +	int retried = 0;
>  
>  	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
>  	if (!(*ac)) {
> @@ -1189,7 +1190,21 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
>  	}
>  
>  	if (status == -ENOSPC) {
> +retry:
>  		status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
> +		/* Retry if there is sufficient space cached in truncate log */
> +		if (status == -ENOSPC && !retried) {
> +			retried = 1;
> +			ocfs2_inode_unlock((*ac)->ac_inode, 1);
> +			inode_unlock((*ac)->ac_inode);
> +
> +			ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted);
> +			if (ret == 1)
> +				goto retry;
> +
> +			if (ret < 0)
> +				mlog_errno(ret);
> +		}
>  		if (status < 0) {
>  			if (status != -ENOSPC)
>  				mlog_errno(status);
>

Joseph Qi July 6, 2016, 4:21 a.m. UTC | #3

NAK, if ocfs2_try_to_free_truncate_log fails, it will lead to double
ocfs2_inode_unlock and then BUG.

On 2016/6/22 17:07, Eric Ren wrote:
> The testcase "mmaptruncate" in ocfs2 test suite always fails with
> ENOSPC error on small volume (say less than 10G). This testcase
> creates 2 threads T1/T2 which race to "truncate"/"extend" a same
> file repeatedly. Specifically, T1 truncates 1/2 size of a small file
> while T2 extend to 100% size. The main bitmap will quickly run out
> of space because the "truncate" code prevent truncate log from being
> flushed by ocfs2_schedule_truncate_log_flush(osb, 1), while truncate
> log may have cached lots of clusters.
> 
> So retry to allocate after flushing truncate log when ENOSPC is
> returned. And we cannot reuse the deleted blocks before the transaction
> committed. Fortunately, we already have a function to do this -
> ocfs2_try_to_free_truncate_log(). Just need to remove the "static"
> modifier and put it into a right place.
> 
> Signed-off-by: Eric Ren <zren@suse.com>
> ---
>  fs/ocfs2/alloc.c    | 37 +++++++++++++++++++++++++++++++++++++
>  fs/ocfs2/alloc.h    |  2 ++
>  fs/ocfs2/aops.c     | 37 -------------------------------------
>  fs/ocfs2/suballoc.c | 17 ++++++++++++++++-
>  4 files changed, 55 insertions(+), 38 deletions(-)
> 
> diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
> index 460c0ce..7dabbc3 100644
> --- a/fs/ocfs2/alloc.c
> +++ b/fs/ocfs2/alloc.c
> @@ -6106,6 +6106,43 @@ void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
>  	}
>  }
>  
> +/*
> + * Try to flush truncate logs if we can free enough clusters from it.
> + * As for return value, "< 0" means error, "0" no space and "1" means
> + * we have freed enough spaces and let the caller try to allocate again.
> + */
> +int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
> +					unsigned int needed)
> +{
> +	tid_t target;
> +	int ret = 0;
> +	unsigned int truncated_clusters;
> +
> +	inode_lock(osb->osb_tl_inode);
> +	truncated_clusters = osb->truncated_clusters;
> +	inode_unlock(osb->osb_tl_inode);
> +
> +	/*
> +	 * Check whether we can succeed in allocating if we free
> +	 * the truncate log.
> +	 */
> +	if (truncated_clusters < needed)
> +		goto out;
> +
> +	ret = ocfs2_flush_truncate_log(osb);
> +	if (ret) {
> +		mlog_errno(ret);
> +		goto out;
> +	}
> +
> +	if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
> +		jbd2_log_wait_commit(osb->journal->j_journal, target);
> +		ret = 1;
> +	}
> +out:
> +	return ret;
> +}
> +
>  static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
>  				       int slot_num,
>  				       struct inode **tl_inode,
> diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
> index f3dc1b0..4a5152e 100644
> --- a/fs/ocfs2/alloc.h
> +++ b/fs/ocfs2/alloc.h
> @@ -188,6 +188,8 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
>  			      u64 start_blk,
>  			      unsigned int num_clusters);
>  int __ocfs2_flush_truncate_log(struct ocfs2_super *osb);
> +int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
> +				   unsigned int needed);
>  
>  /*
>   * Process local structure which describes the block unlinks done
> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
> index c034edf..1802aef 100644
> --- a/fs/ocfs2/aops.c
> +++ b/fs/ocfs2/aops.c
> @@ -1645,43 +1645,6 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
>  	return ret;
>  }
>  
> -/*
> - * Try to flush truncate logs if we can free enough clusters from it.
> - * As for return value, "< 0" means error, "0" no space and "1" means
> - * we have freed enough spaces and let the caller try to allocate again.
> - */
> -static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
> -					  unsigned int needed)
> -{
> -	tid_t target;
> -	int ret = 0;
> -	unsigned int truncated_clusters;
> -
> -	inode_lock(osb->osb_tl_inode);
> -	truncated_clusters = osb->truncated_clusters;
> -	inode_unlock(osb->osb_tl_inode);
> -
> -	/*
> -	 * Check whether we can succeed in allocating if we free
> -	 * the truncate log.
> -	 */
> -	if (truncated_clusters < needed)
> -		goto out;
> -
> -	ret = ocfs2_flush_truncate_log(osb);
> -	if (ret) {
> -		mlog_errno(ret);
> -		goto out;
> -	}
> -
> -	if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
> -		jbd2_log_wait_commit(osb->journal->j_journal, target);
> -		ret = 1;
> -	}
> -out:
> -	return ret;
> -}
> -
>  int ocfs2_write_begin_nolock(struct address_space *mapping,
>  			     loff_t pos, unsigned len, ocfs2_write_type_t type,
>  			     struct page **pagep, void **fsdata,
> diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
> index 2f19aee..9f7f3b6 100644
> --- a/fs/ocfs2/suballoc.c
> +++ b/fs/ocfs2/suballoc.c
> @@ -1164,7 +1164,8 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
>  					     int flags,
>  					     struct ocfs2_alloc_context **ac)
>  {
> -	int status;
> +	int status, ret = 0;
> +	int retried = 0;
>  
>  	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
>  	if (!(*ac)) {
> @@ -1189,7 +1190,21 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
>  	}
>  
>  	if (status == -ENOSPC) {
> +retry:
>  		status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
> +		/* Retry if there is sufficient space cached in truncate log */
> +		if (status == -ENOSPC && !retried) {
> +			retried = 1;
> +			ocfs2_inode_unlock((*ac)->ac_inode, 1);
> +			inode_unlock((*ac)->ac_inode);
> +
> +			ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted);
> +			if (ret == 1)
> +				goto retry;
> +
> +			if (ret < 0)
> +				mlog_errno(ret);
> +		}
>  		if (status < 0) {
>  			if (status != -ENOSPC)
>  				mlog_errno(status);
>

Zhen Ren July 6, 2016, 8:56 a.m. UTC | #4

Hi Joseph,

On 07/06/2016 12:21 PM, Joseph Qi wrote:
> NAK, if ocfs2_try_to_free_truncate_log fails, it will lead to double
> ocfs2_inode_unlock and then BUG.

Thanks for pointing out this! Will fix this and resend.

Eric

>
> On 2016/6/22 17:07, Eric Ren wrote:
>> The testcase "mmaptruncate" in ocfs2 test suite always fails with
>> ENOSPC error on small volume (say less than 10G). This testcase
>> creates 2 threads T1/T2 which race to "truncate"/"extend" a same
>> file repeatedly. Specifically, T1 truncates 1/2 size of a small file
>> while T2 extend to 100% size. The main bitmap will quickly run out
>> of space because the "truncate" code prevent truncate log from being
>> flushed by ocfs2_schedule_truncate_log_flush(osb, 1), while truncate
>> log may have cached lots of clusters.
>>
>> So retry to allocate after flushing truncate log when ENOSPC is
>> returned. And we cannot reuse the deleted blocks before the transaction
>> committed. Fortunately, we already have a function to do this -
>> ocfs2_try_to_free_truncate_log(). Just need to remove the "static"
>> modifier and put it into a right place.
>>
>> Signed-off-by: Eric Ren <zren@suse.com>
>> ---
>>   fs/ocfs2/alloc.c    | 37 +++++++++++++++++++++++++++++++++++++
>>   fs/ocfs2/alloc.h    |  2 ++
>>   fs/ocfs2/aops.c     | 37 -------------------------------------
>>   fs/ocfs2/suballoc.c | 17 ++++++++++++++++-
>>   4 files changed, 55 insertions(+), 38 deletions(-)
>>
>> diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
>> index 460c0ce..7dabbc3 100644
>> --- a/fs/ocfs2/alloc.c
>> +++ b/fs/ocfs2/alloc.c
>> @@ -6106,6 +6106,43 @@ void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
>>   	}
>>   }
>>
>> +/*
>> + * Try to flush truncate logs if we can free enough clusters from it.
>> + * As for return value, "< 0" means error, "0" no space and "1" means
>> + * we have freed enough spaces and let the caller try to allocate again.
>> + */
>> +int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
>> +					unsigned int needed)
>> +{
>> +	tid_t target;
>> +	int ret = 0;
>> +	unsigned int truncated_clusters;
>> +
>> +	inode_lock(osb->osb_tl_inode);
>> +	truncated_clusters = osb->truncated_clusters;
>> +	inode_unlock(osb->osb_tl_inode);
>> +
>> +	/*
>> +	 * Check whether we can succeed in allocating if we free
>> +	 * the truncate log.
>> +	 */
>> +	if (truncated_clusters < needed)
>> +		goto out;
>> +
>> +	ret = ocfs2_flush_truncate_log(osb);
>> +	if (ret) {
>> +		mlog_errno(ret);
>> +		goto out;
>> +	}
>> +
>> +	if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
>> +		jbd2_log_wait_commit(osb->journal->j_journal, target);
>> +		ret = 1;
>> +	}
>> +out:
>> +	return ret;
>> +}
>> +
>>   static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
>>   				       int slot_num,
>>   				       struct inode **tl_inode,
>> diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
>> index f3dc1b0..4a5152e 100644
>> --- a/fs/ocfs2/alloc.h
>> +++ b/fs/ocfs2/alloc.h
>> @@ -188,6 +188,8 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
>>   			      u64 start_blk,
>>   			      unsigned int num_clusters);
>>   int __ocfs2_flush_truncate_log(struct ocfs2_super *osb);
>> +int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
>> +				   unsigned int needed);
>>
>>   /*
>>    * Process local structure which describes the block unlinks done
>> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
>> index c034edf..1802aef 100644
>> --- a/fs/ocfs2/aops.c
>> +++ b/fs/ocfs2/aops.c
>> @@ -1645,43 +1645,6 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
>>   	return ret;
>>   }
>>
>> -/*
>> - * Try to flush truncate logs if we can free enough clusters from it.
>> - * As for return value, "< 0" means error, "0" no space and "1" means
>> - * we have freed enough spaces and let the caller try to allocate again.
>> - */
>> -static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
>> -					  unsigned int needed)
>> -{
>> -	tid_t target;
>> -	int ret = 0;
>> -	unsigned int truncated_clusters;
>> -
>> -	inode_lock(osb->osb_tl_inode);
>> -	truncated_clusters = osb->truncated_clusters;
>> -	inode_unlock(osb->osb_tl_inode);
>> -
>> -	/*
>> -	 * Check whether we can succeed in allocating if we free
>> -	 * the truncate log.
>> -	 */
>> -	if (truncated_clusters < needed)
>> -		goto out;
>> -
>> -	ret = ocfs2_flush_truncate_log(osb);
>> -	if (ret) {
>> -		mlog_errno(ret);
>> -		goto out;
>> -	}
>> -
>> -	if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
>> -		jbd2_log_wait_commit(osb->journal->j_journal, target);
>> -		ret = 1;
>> -	}
>> -out:
>> -	return ret;
>> -}
>> -
>>   int ocfs2_write_begin_nolock(struct address_space *mapping,
>>   			     loff_t pos, unsigned len, ocfs2_write_type_t type,
>>   			     struct page **pagep, void **fsdata,
>> diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
>> index 2f19aee..9f7f3b6 100644
>> --- a/fs/ocfs2/suballoc.c
>> +++ b/fs/ocfs2/suballoc.c
>> @@ -1164,7 +1164,8 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
>>   					     int flags,
>>   					     struct ocfs2_alloc_context **ac)
>>   {
>> -	int status;
>> +	int status, ret = 0;
>> +	int retried = 0;
>>
>>   	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
>>   	if (!(*ac)) {
>> @@ -1189,7 +1190,21 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
>>   	}
>>
>>   	if (status == -ENOSPC) {
>> +retry:
>>   		status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
>> +		/* Retry if there is sufficient space cached in truncate log */
>> +		if (status == -ENOSPC && !retried) {
>> +			retried = 1;
>> +			ocfs2_inode_unlock((*ac)->ac_inode, 1);
>> +			inode_unlock((*ac)->ac_inode);
>> +
>> +			ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted);
>> +			if (ret == 1)
>> +				goto retry;
>> +
>> +			if (ret < 0)
>> +				mlog_errno(ret);
>> +		}
>>   		if (status < 0) {
>>   			if (status != -ENOSPC)
>>   				mlog_errno(status);
>>
>
>
>

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 460c0ce..7dabbc3 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -6106,6 +6106,43 @@  void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
 	}
 }
 
+/*
+ * Try to flush truncate logs if we can free enough clusters from it.
+ * As for return value, "< 0" means error, "0" no space and "1" means
+ * we have freed enough spaces and let the caller try to allocate again.
+ */
+int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
+					unsigned int needed)
+{
+	tid_t target;
+	int ret = 0;
+	unsigned int truncated_clusters;
+
+	inode_lock(osb->osb_tl_inode);
+	truncated_clusters = osb->truncated_clusters;
+	inode_unlock(osb->osb_tl_inode);
+
+	/*
+	 * Check whether we can succeed in allocating if we free
+	 * the truncate log.
+	 */
+	if (truncated_clusters < needed)
+		goto out;
+
+	ret = ocfs2_flush_truncate_log(osb);
+	if (ret) {
+		mlog_errno(ret);
+		goto out;
+	}
+
+	if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
+		jbd2_log_wait_commit(osb->journal->j_journal, target);
+		ret = 1;
+	}
+out:
+	return ret;
+}
+
 static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
 				       int slot_num,
 				       struct inode **tl_inode,
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index f3dc1b0..4a5152e 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -188,6 +188,8 @@  int ocfs2_truncate_log_append(struct ocfs2_super *osb,
 			      u64 start_blk,
 			      unsigned int num_clusters);
 int __ocfs2_flush_truncate_log(struct ocfs2_super *osb);
+int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
+				   unsigned int needed);
 
 /*
  * Process local structure which describes the block unlinks done
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index c034edf..1802aef 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1645,43 +1645,6 @@  static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
 	return ret;
 }
 
-/*
- * Try to flush truncate logs if we can free enough clusters from it.
- * As for return value, "< 0" means error, "0" no space and "1" means
- * we have freed enough spaces and let the caller try to allocate again.
- */
-static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb,
-					  unsigned int needed)
-{
-	tid_t target;
-	int ret = 0;
-	unsigned int truncated_clusters;
-
-	inode_lock(osb->osb_tl_inode);
-	truncated_clusters = osb->truncated_clusters;
-	inode_unlock(osb->osb_tl_inode);
-
-	/*
-	 * Check whether we can succeed in allocating if we free
-	 * the truncate log.
-	 */
-	if (truncated_clusters < needed)
-		goto out;
-
-	ret = ocfs2_flush_truncate_log(osb);
-	if (ret) {
-		mlog_errno(ret);
-		goto out;
-	}
-
-	if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) {
-		jbd2_log_wait_commit(osb->journal->j_journal, target);
-		ret = 1;
-	}
-out:
-	return ret;
-}
-
 int ocfs2_write_begin_nolock(struct address_space *mapping,
 			     loff_t pos, unsigned len, ocfs2_write_type_t type,
 			     struct page **pagep, void **fsdata,
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 2f19aee..9f7f3b6 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -1164,7 +1164,8 @@  static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
 					     int flags,
 					     struct ocfs2_alloc_context **ac)
 {
-	int status;
+	int status, ret = 0;
+	int retried = 0;
 
 	*ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
 	if (!(*ac)) {
@@ -1189,7 +1190,21 @@  static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
 	}
 
 	if (status == -ENOSPC) {
+retry:
 		status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
+		/* Retry if there is sufficient space cached in truncate log */
+		if (status == -ENOSPC && !retried) {
+			retried = 1;
+			ocfs2_inode_unlock((*ac)->ac_inode, 1);
+			inode_unlock((*ac)->ac_inode);
+
+			ret = ocfs2_try_to_free_truncate_log(osb, bits_wanted);
+			if (ret == 1)
+				goto retry;
+
+			if (ret < 0)
+				mlog_errno(ret);
+		}
 		if (status < 0) {
 			if (status != -ENOSPC)
 				mlog_errno(status);

ocfs2: retry on ENOSPC if sufficient space in truncate log

Commit Message

Comments

Patch