ocfs2: the ip_alloc_sem should be taken in ocfs2_get_block()
diff mbox

Message ID 59E9BBEF.1060601@huawei.com
State New
Headers show

Commit Message

zhendong chen Oct. 20, 2017, 9:03 a.m. UTC
The ip_alloc_sem should be taken in ocfs2_get_block() when reading file
in DIRECT mode to prevent concurrent access to extent tree with
ocfs2_dio_end_io_write(), which may cause BUGON in
ocfs2_get_clusters_nocache()->BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos))

Signed-off-by: Alex Chen <alex.chen@huawei.com>
Reviewed-by: Jun Piao <piaojun@huawei.com>

---
 fs/ocfs2/aops.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

-- 1.9.5.msysgit.1

Comments

Changwei Ge Oct. 20, 2017, 9:23 a.m. UTC | #1
Hi Alex,

Are you able to provide a way to reproduce this issue?
I'm very interested in it.

Thanks,
Changwei.

On 2017/10/20 17:08, alex chen wrote:
> The ip_alloc_sem should be taken in ocfs2_get_block() when reading file
> in DIRECT mode to prevent concurrent access to extent tree with
> ocfs2_dio_end_io_write(), which may cause BUGON in
> ocfs2_get_clusters_nocache()->BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos))
> 
> Signed-off-by: Alex Chen <alex.chen@huawei.com>
> Reviewed-by: Jun Piao <piaojun@huawei.com>
> 
> ---
>   fs/ocfs2/aops.c | 21 +++++++++++++++------
>   1 file changed, 15 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
> index 88a31e9..5cb939f 100644
> --- a/fs/ocfs2/aops.c
> +++ b/fs/ocfs2/aops.c
> @@ -134,6 +134,19 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
>   	return err;
>   }
> 
> +static int ocfs2_get_block_lock(struct inode *inode, sector_t iblock,
> +		    struct buffer_head *bh_result, int create)
> +{
> +	int ret;
> +	struct ocfs2_inode_info *oi = OCFS2_I(inode);
> +
> +	down_read(&oi->ip_alloc_sem);
> +	ret = ocfs2_get_block(inode, iblock, bh_result, create);
> +	up_read(&oi->ip_alloc_sem);
> +
> +	return ret;
> +}
> +
>   int ocfs2_get_block(struct inode *inode, sector_t iblock,
>   		    struct buffer_head *bh_result, int create)
>   {
> @@ -2154,12 +2167,8 @@ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
>   	 * while file size will be changed.
>   	 */
>   	if (pos + total_len <= i_size_read(inode)) {
> -		down_read(&oi->ip_alloc_sem);
>   		/* This is the fast path for re-write. */
> -		ret = ocfs2_get_block(inode, iblock, bh_result, create);
> -
> -		up_read(&oi->ip_alloc_sem);
> -
> +		ret = ocfs2_get_block_lock(inode, iblock, bh_result, create);
>   		if (buffer_mapped(bh_result) &&
>   		    !buffer_new(bh_result) &&
>   		    ret == 0)
> @@ -2424,7 +2433,7 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
>   		return 0;
> 
>   	if (iov_iter_rw(iter) == READ)
> -		get_block = ocfs2_get_block;
> +		get_block = ocfs2_get_block_lock;
>   	else
>   		get_block = ocfs2_dio_get_block;
> 
> -- 1.9.5.msysgit.1
> 
> 
> 
> _______________________________________________
> Ocfs2-devel mailing list
> Ocfs2-devel@oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/ocfs2-devel
>
Changwei Ge Oct. 21, 2017, 3:27 a.m. UTC | #2
Hi,
Actually we encountered the same issue months ago and we also ever 
posted a patch to fix it, however, we never got a 'reviewed-by':-(

Your patch has a better change-log and the logic is fine to me.
I think it is deserved to be merged.

But it seems that your editor has '4 white spaces' occupied tab, it 
violates the exited code style. Can you adjust it to '8 white spaces' 
and re-send this patch?

Otherwise I thinks this is a good fix.

Moreover, this issue was introduced by commit c15471f79506 ("ocfs2: fix 
sparse file & data ordering issue in direct io")

Adding a *Fixs* tag to your change-log is encouraged.

Also I CC to ocfs2 maintainers so that they can provide some helpful 
comments.

Thanks,
Changwei.


On 2017/10/20 17:08, alex chen wrote:
> The ip_alloc_sem should be taken in ocfs2_get_block() when reading file
> in DIRECT mode to prevent concurrent access to extent tree with
> ocfs2_dio_end_io_write(), which may cause BUGON in
> ocfs2_get_clusters_nocache()->BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos))
> 
> Signed-off-by: Alex Chen <alex.chen@huawei.com>
> Reviewed-by: Jun Piao <piaojun@huawei.com>

Acked-by: Changwei Ge <ge.changwei@h3c.com>

> 
> ---
>   fs/ocfs2/aops.c | 21 +++++++++++++++------
>   1 file changed, 15 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
> index 88a31e9..5cb939f 100644
> --- a/fs/ocfs2/aops.c
> +++ b/fs/ocfs2/aops.c
> @@ -134,6 +134,19 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
>   	return err;
>   }
> 
> +static int ocfs2_get_block_lock(struct inode *inode, sector_t iblock,
> +		    struct buffer_head *bh_result, int create)
> +{
> +	int ret;
> +	struct ocfs2_inode_info *oi = OCFS2_I(inode);
> +
> +	down_read(&oi->ip_alloc_sem);
> +	ret = ocfs2_get_block(inode, iblock, bh_result, create);
> +	up_read(&oi->ip_alloc_sem);
> +
> +	return ret;
> +}
> +
>   int ocfs2_get_block(struct inode *inode, sector_t iblock,
>   		    struct buffer_head *bh_result, int create)
>   {
> @@ -2154,12 +2167,8 @@ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
>   	 * while file size will be changed.
>   	 */
>   	if (pos + total_len <= i_size_read(inode)) {
> -		down_read(&oi->ip_alloc_sem);
>   		/* This is the fast path for re-write. */
> -		ret = ocfs2_get_block(inode, iblock, bh_result, create);
> -
> -		up_read(&oi->ip_alloc_sem);
> -
> +		ret = ocfs2_get_block_lock(inode, iblock, bh_result, create);
>   		if (buffer_mapped(bh_result) &&
>   		    !buffer_new(bh_result) &&
>   		    ret == 0)
> @@ -2424,7 +2433,7 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
>   		return 0;
> 
>   	if (iov_iter_rw(iter) == READ)
> -		get_block = ocfs2_get_block;
> +		get_block = ocfs2_get_block_lock;
>   	else
>   		get_block = ocfs2_dio_get_block;
> 
> -- 1.9.5.msysgit.1
> 
> 
> 
> _______________________________________________
> Ocfs2-devel mailing list
> Ocfs2-devel@oss.oracle.com
> https://oss.oracle.com/mailman/listinfo/ocfs2-devel
>
zhendong chen Oct. 23, 2017, 2:39 a.m. UTC | #3
Hi Changwei,

On 2017/10/20 17:23, Changwei Ge wrote:
> Hi Alex,
> 
> Are you able to provide a way to reproduce this issue?
> I'm very interested in it.
> 

You can reproduce the BUG in the following steps:
1. touch "/mnt/ocfs2/test";
2. fallocate -l 1G "/mnt/ocfs2/test";
3. write the file "/mnt/ocfs2/test" using the io_submit();
4. at the same time, creating another process to read this file from
   a random location;

> Thanks,
> Changwei.
> 
> On 2017/10/20 17:08, alex chen wrote:
>> The ip_alloc_sem should be taken in ocfs2_get_block() when reading file
>> in DIRECT mode to prevent concurrent access to extent tree with
>> ocfs2_dio_end_io_write(), which may cause BUGON in
>> ocfs2_get_clusters_nocache()->BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos))
>>
>> Signed-off-by: Alex Chen <alex.chen@huawei.com>
>> Reviewed-by: Jun Piao <piaojun@huawei.com>
>>
>> ---
>>   fs/ocfs2/aops.c | 21 +++++++++++++++------
>>   1 file changed, 15 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
>> index 88a31e9..5cb939f 100644
>> --- a/fs/ocfs2/aops.c
>> +++ b/fs/ocfs2/aops.c
>> @@ -134,6 +134,19 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
>>   	return err;
>>   }
>>
>> +static int ocfs2_get_block_lock(struct inode *inode, sector_t iblock,
>> +		    struct buffer_head *bh_result, int create)
>> +{
>> +	int ret;
>> +	struct ocfs2_inode_info *oi = OCFS2_I(inode);
>> +
>> +	down_read(&oi->ip_alloc_sem);
>> +	ret = ocfs2_get_block(inode, iblock, bh_result, create);
>> +	up_read(&oi->ip_alloc_sem);
>> +
>> +	return ret;
>> +}
>> +
>>   int ocfs2_get_block(struct inode *inode, sector_t iblock,
>>   		    struct buffer_head *bh_result, int create)
>>   {
>> @@ -2154,12 +2167,8 @@ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
>>   	 * while file size will be changed.
>>   	 */
>>   	if (pos + total_len <= i_size_read(inode)) {
>> -		down_read(&oi->ip_alloc_sem);
>>   		/* This is the fast path for re-write. */
>> -		ret = ocfs2_get_block(inode, iblock, bh_result, create);
>> -
>> -		up_read(&oi->ip_alloc_sem);
>> -
>> +		ret = ocfs2_get_block_lock(inode, iblock, bh_result, create);
>>   		if (buffer_mapped(bh_result) &&
>>   		    !buffer_new(bh_result) &&
>>   		    ret == 0)
>> @@ -2424,7 +2433,7 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
>>   		return 0;
>>
>>   	if (iov_iter_rw(iter) == READ)
>> -		get_block = ocfs2_get_block;
>> +		get_block = ocfs2_get_block_lock;
>>   	else
>>   		get_block = ocfs2_dio_get_block;
>>
>> -- 1.9.5.msysgit.1
>>
>>
>>
>> _______________________________________________
>> Ocfs2-devel mailing list
>> Ocfs2-devel@oss.oracle.com
>> https://oss.oracle.com/mailman/listinfo/ocfs2-devel
>>
> 
> 
> .
>
zhendong chen Oct. 23, 2017, 3:09 a.m. UTC | #4
Hi Changewei,

Thank for your suggestion, I will modify this patch and resend it.

Thanks
Alex

On 2017/10/21 11:27, Changwei Ge wrote:
> Hi,
> Actually we encountered the same issue months ago and we also ever 
> posted a patch to fix it, however, we never got a 'reviewed-by':-(
> 
> Your patch has a better change-log and the logic is fine to me.
> I think it is deserved to be merged.
> 
> But it seems that your editor has '4 white spaces' occupied tab, it 
> violates the exited code style. Can you adjust it to '8 white spaces' 
> and re-send this patch?
> 
> Otherwise I thinks this is a good fix.
> 
> Moreover, this issue was introduced by commit c15471f79506 ("ocfs2: fix 
> sparse file & data ordering issue in direct io")
> 
> Adding a *Fixs* tag to your change-log is encouraged.
> 
> Also I CC to ocfs2 maintainers so that they can provide some helpful 
> comments.
> 
> Thanks,
> Changwei.
> 
> 
> On 2017/10/20 17:08, alex chen wrote:
>> The ip_alloc_sem should be taken in ocfs2_get_block() when reading file
>> in DIRECT mode to prevent concurrent access to extent tree with
>> ocfs2_dio_end_io_write(), which may cause BUGON in
>> ocfs2_get_clusters_nocache()->BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos))
>>
>> Signed-off-by: Alex Chen <alex.chen@huawei.com>
>> Reviewed-by: Jun Piao <piaojun@huawei.com>
> 
> Acked-by: Changwei Ge <ge.changwei@h3c.com>
> 
>>
>> ---
>>   fs/ocfs2/aops.c | 21 +++++++++++++++------
>>   1 file changed, 15 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
>> index 88a31e9..5cb939f 100644
>> --- a/fs/ocfs2/aops.c
>> +++ b/fs/ocfs2/aops.c
>> @@ -134,6 +134,19 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
>>   	return err;
>>   }
>>
>> +static int ocfs2_get_block_lock(struct inode *inode, sector_t iblock,
>> +		    struct buffer_head *bh_result, int create)
>> +{
>> +	int ret;
>> +	struct ocfs2_inode_info *oi = OCFS2_I(inode);
>> +
>> +	down_read(&oi->ip_alloc_sem);
>> +	ret = ocfs2_get_block(inode, iblock, bh_result, create);
>> +	up_read(&oi->ip_alloc_sem);
>> +
>> +	return ret;
>> +}
>> +
>>   int ocfs2_get_block(struct inode *inode, sector_t iblock,
>>   		    struct buffer_head *bh_result, int create)
>>   {
>> @@ -2154,12 +2167,8 @@ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
>>   	 * while file size will be changed.
>>   	 */
>>   	if (pos + total_len <= i_size_read(inode)) {
>> -		down_read(&oi->ip_alloc_sem);
>>   		/* This is the fast path for re-write. */
>> -		ret = ocfs2_get_block(inode, iblock, bh_result, create);
>> -
>> -		up_read(&oi->ip_alloc_sem);
>> -
>> +		ret = ocfs2_get_block_lock(inode, iblock, bh_result, create);
>>   		if (buffer_mapped(bh_result) &&
>>   		    !buffer_new(bh_result) &&
>>   		    ret == 0)
>> @@ -2424,7 +2433,7 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
>>   		return 0;
>>
>>   	if (iov_iter_rw(iter) == READ)
>> -		get_block = ocfs2_get_block;
>> +		get_block = ocfs2_get_block_lock;
>>   	else
>>   		get_block = ocfs2_dio_get_block;
>>
>> -- 1.9.5.msysgit.1
>>
>>
>>
>> _______________________________________________
>> Ocfs2-devel mailing list
>> Ocfs2-devel@oss.oracle.com
>> https://oss.oracle.com/mailman/listinfo/ocfs2-devel
>>
> 
> 
> .
>
Joseph Qi Oct. 23, 2017, 3:31 a.m. UTC | #5
On 17/10/20 17:03, alex chen wrote:
> The ip_alloc_sem should be taken in ocfs2_get_block() when reading file
> in DIRECT mode to prevent concurrent access to extent tree with
> ocfs2_dio_end_io_write(), which may cause BUGON in
> ocfs2_get_clusters_nocache()->BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos))
> 
> Signed-off-by: Alex Chen <alex.chen@huawei.com>
> Reviewed-by: Jun Piao <piaojun@huawei.com>
> 
> ---
>  fs/ocfs2/aops.c | 21 +++++++++++++++------
>  1 file changed, 15 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
> index 88a31e9..5cb939f 100644
> --- a/fs/ocfs2/aops.c
> +++ b/fs/ocfs2/aops.c
> @@ -134,6 +134,19 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
>  	return err;
>  }
> 
> +static int ocfs2_get_block_lock(struct inode *inode, sector_t iblock,
> +		    struct buffer_head *bh_result, int create)
> +{
> +	int ret;
> +	struct ocfs2_inode_info *oi = OCFS2_I(inode);
> +
> +	down_read(&oi->ip_alloc_sem);
> +	ret = ocfs2_get_block(inode, iblock, bh_result, create);
> +	up_read(&oi->ip_alloc_sem);
> +
> +	return ret;
> +}
> +
>  int ocfs2_get_block(struct inode *inode, sector_t iblock,
>  		    struct buffer_head *bh_result, int create)
>  {
> @@ -2154,12 +2167,8 @@ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
>  	 * while file size will be changed.
>  	 */
>  	if (pos + total_len <= i_size_read(inode)) {
> -		down_read(&oi->ip_alloc_sem);
>  		/* This is the fast path for re-write. */
> -		ret = ocfs2_get_block(inode, iblock, bh_result, create);
> -
> -		up_read(&oi->ip_alloc_sem);
> -
> +		ret = ocfs2_get_block_lock(inode, iblock, bh_result, create);
>  		if (buffer_mapped(bh_result) &&
>  		    !buffer_new(bh_result) &&
>  		    ret == 0)
> @@ -2424,7 +2433,7 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
>  		return 0;
> 
>  	if (iov_iter_rw(iter) == READ)
> -		get_block = ocfs2_get_block;
> +		get_block = ocfs2_get_block_lock;
ocfs2_lock_get_block may be better.

Thanks,
Joseph

>  	else
>  		get_block = ocfs2_dio_get_block;
> 
> -- 1.9.5.msysgit.1
> 
>
Zhen Ren Oct. 23, 2017, 4:06 a.m. UTC | #6
Hi,

On 10/20/2017 05:03 PM, alex chen wrote:
> The ip_alloc_sem should be taken in ocfs2_get_block() when reading file
> in DIRECT mode to prevent concurrent access to extent tree with
> ocfs2_dio_end_io_write(), which may cause BUGON in
> ocfs2_get_clusters_nocache()->BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos))

This maybe seem a obvious fix, but it would be great if you can
write a more detailed commit log, like paste the crash backtrace
here so that people can pick this fix easily when they see the same issue.

Thanks,
Eric
>
> Signed-off-by: Alex Chen <alex.chen@huawei.com>
> Reviewed-by: Jun Piao <piaojun@huawei.com>
>
> ---
>   fs/ocfs2/aops.c | 21 +++++++++++++++------
>   1 file changed, 15 insertions(+), 6 deletions(-)
>
> diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
> index 88a31e9..5cb939f 100644
> --- a/fs/ocfs2/aops.c
> +++ b/fs/ocfs2/aops.c
> @@ -134,6 +134,19 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
>   	return err;
>   }
>
> +static int ocfs2_get_block_lock(struct inode *inode, sector_t iblock,
> +		    struct buffer_head *bh_result, int create)
> +{
> +	int ret;
> +	struct ocfs2_inode_info *oi = OCFS2_I(inode);
> +
> +	down_read(&oi->ip_alloc_sem);
> +	ret = ocfs2_get_block(inode, iblock, bh_result, create);
> +	up_read(&oi->ip_alloc_sem);
> +
> +	return ret;
> +}
> +
>   int ocfs2_get_block(struct inode *inode, sector_t iblock,
>   		    struct buffer_head *bh_result, int create)
>   {
> @@ -2154,12 +2167,8 @@ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
>   	 * while file size will be changed.
>   	 */
>   	if (pos + total_len <= i_size_read(inode)) {
> -		down_read(&oi->ip_alloc_sem);
>   		/* This is the fast path for re-write. */
> -		ret = ocfs2_get_block(inode, iblock, bh_result, create);
> -
> -		up_read(&oi->ip_alloc_sem);
> -
> +		ret = ocfs2_get_block_lock(inode, iblock, bh_result, create);
>   		if (buffer_mapped(bh_result) &&
>   		    !buffer_new(bh_result) &&
>   		    ret == 0)
> @@ -2424,7 +2433,7 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
>   		return 0;
>
>   	if (iov_iter_rw(iter) == READ)
> -		get_block = ocfs2_get_block;
> +		get_block = ocfs2_get_block_lock;
>   	else
>   		get_block = ocfs2_dio_get_block;
>
> -- 1.9.5.msysgit.1
>
>
>

Patch
diff mbox

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 88a31e9..5cb939f 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -134,6 +134,19 @@  static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
 	return err;
 }

+static int ocfs2_get_block_lock(struct inode *inode, sector_t iblock,
+		    struct buffer_head *bh_result, int create)
+{
+	int ret;
+	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+
+	down_read(&oi->ip_alloc_sem);
+	ret = ocfs2_get_block(inode, iblock, bh_result, create);
+	up_read(&oi->ip_alloc_sem);
+
+	return ret;
+}
+
 int ocfs2_get_block(struct inode *inode, sector_t iblock,
 		    struct buffer_head *bh_result, int create)
 {
@@ -2154,12 +2167,8 @@  static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
 	 * while file size will be changed.
 	 */
 	if (pos + total_len <= i_size_read(inode)) {
-		down_read(&oi->ip_alloc_sem);
 		/* This is the fast path for re-write. */
-		ret = ocfs2_get_block(inode, iblock, bh_result, create);
-
-		up_read(&oi->ip_alloc_sem);
-
+		ret = ocfs2_get_block_lock(inode, iblock, bh_result, create);
 		if (buffer_mapped(bh_result) &&
 		    !buffer_new(bh_result) &&
 		    ret == 0)
@@ -2424,7 +2433,7 @@  static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 		return 0;

 	if (iov_iter_rw(iter) == READ)
-		get_block = ocfs2_get_block;
+		get_block = ocfs2_get_block_lock;
 	else
 		get_block = ocfs2_dio_get_block;