diff mbox series

exfat: do not clear VolumeDirty in writeback

Message ID HK2PR04MB38914869B1FEE326CFE11779812D9@HK2PR04MB3891.apcprd04.prod.outlook.com (mailing list archive)
State New, archived
Headers show
Series exfat: do not clear VolumeDirty in writeback | expand

Commit Message

Yuezhang.Mo@sony.com Feb. 8, 2022, 5:18 a.m. UTC
Before this commit, VolumeDirty will be cleared first in
writeback if 'dirsync' or 'sync' is not enabled. If the power
is suddenly cut off after cleaning VolumeDirty but other
updates are not written, the exFAT filesystem will not be able
to detect the power failure in the next mount.

And VolumeDirty will be set again when updating the parent
directory. It means that BootSector will be written twice in each
writeback, that will shorten the life of the device.

Reviewed-by: Andy.Wu <Andy.Wu@sony.com>
Reviewed-by: Aoyama, Wataru <wataru.aoyama@sony.com>
Signed-off-by: Yuezhang.Mo <Yuezhang.Mo@sony.com>
---
 fs/exfat/super.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

Comments

Yuezhang.Mo@sony.com Feb. 8, 2022, 6:43 a.m. UTC | #1
From the exFAT spec, VolumeDirty should be cleared after updating the directory entry.

1. Set the value of the VolumeDirty field to 1
2. Update the active FAT, if necessary
3. Update the active Allocation Bitmap
4. Create or update the directory entry, if necessary
5. Clear the value of the VolumeDirty field to 0, if its value prior to the first step was 0

But VolumeDirty will be cleared first in writeback if 'dirsync' or 'sync' is not enabled.
Refer the blktrace log of 'mkdir /mnt/test/dir1' as an example.

179,3    0        1     0.000000000    10  C  WS 2623488 + 1 [0]         <= Set VolumeDirty
179,3    3        1     5.052262001    26  C   W 2623488 + 1 [0]         <= Clear VolumeDirty
179,3    3        2     5.054685667    26  C   W 2627584 + 1 [0]         <= Bitmap
179,3    3        3     5.056795667    26  C   W 2628352 + 1 [0]         <= Body data of /test/
179,3    3        4     5.066790001    26  C   W 2628608 + 128 [0]       <= Body data of /test/dir1/
179,3    3        5     5.075998667    26  C   W 2628736 + 128 [0]       <= Body data of /test/dir1/
179,3    3        6     5.078409334     0  C  WS 2623488 + 1 [0]         <= Set VolumeDirty
179,3    3        7    20.239486002     0  C   W 2628096 + 1 [0]         <= Body data of /

After applying this patch, VolumeDirty will not be cleared until 'sync' or 'umount' is performed.

179,3    2        1     0.000000000     0  C  WS 2623488 + 1 [0]         <= Set VolumeDirty
179,3    0        1    30.221475670     9  C   W 2627584 + 1 [0]         <= Bitmap
179,3    0        2    30.223794337     9  C   W 2628352 + 1 [0]         <= Body data of /test/
179,3    0        3    30.233164003     9  C   W 2629376 + 128 [0]       <= Body data of /test/dir1/
179,3    0        4    30.242449670     9  C   W 2629504 + 128 [0]       <= Body data of /test/dir1/
179,3    0        5    60.445985007     9  C   W 2628096 + 1 [0]         <= Body data of /


-----Original Message-----
From: Mo, Yuezhang 
Sent: Tuesday, February 8, 2022 1:19 PM
To: linkinjeon@kernel.org; sj1557.seo@samsung.com
Cc: linux-fsdevel@vger.kernel.org; linux-kernel@vger.kernel.org
Subject: [PATCH] exfat: do not clear VolumeDirty in writeback

Before this commit, VolumeDirty will be cleared first in writeback if 'dirsync' or 'sync' is not enabled. If the power is suddenly cut off after cleaning VolumeDirty but other updates are not written, the exFAT filesystem will not be able to detect the power failure in the next mount.

And VolumeDirty will be set again when updating the parent directory. It means that BootSector will be written twice in each writeback, that will shorten the life of the device.

Reviewed-by: Andy.Wu <Andy.Wu@sony.com>
Reviewed-by: Aoyama, Wataru <wataru.aoyama@sony.com>
Signed-off-by: Yuezhang.Mo <Yuezhang.Mo@sony.com>
---
 fs/exfat/super.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 8c9fb7dcec16..f4906c17475e 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -25,6 +25,8 @@
 static char exfat_default_iocharset[] = CONFIG_EXFAT_DEFAULT_IOCHARSET;  static struct kmem_cache *exfat_inode_cachep;
 
+static int __exfat_clear_volume_dirty(struct super_block *sb);
+
 static void exfat_free_iocharset(struct exfat_sb_info *sbi)  {
 	if (sbi->options.iocharset != exfat_default_iocharset) @@ -64,7 +66,7 @@ static int exfat_sync_fs(struct super_block *sb, int wait)
 	/* If there are some dirty buffers in the bdev inode */
 	mutex_lock(&sbi->s_lock);
 	sync_blockdev(sb->s_bdev);
-	if (exfat_clear_volume_dirty(sb))
+	if (__exfat_clear_volume_dirty(sb))
 		err = -EIO;
 	mutex_unlock(&sbi->s_lock);
 	return err;
@@ -139,13 +141,21 @@ int exfat_set_volume_dirty(struct super_block *sb)
 	return exfat_set_vol_flags(sb, sbi->vol_flags | VOLUME_DIRTY);  }
 
-int exfat_clear_volume_dirty(struct super_block *sb)
+static int __exfat_clear_volume_dirty(struct super_block *sb)
 {
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
 
 	return exfat_set_vol_flags(sb, sbi->vol_flags & ~VOLUME_DIRTY);  }
 
+int exfat_clear_volume_dirty(struct super_block *sb) {
+	if (sb->s_flags & (SB_SYNCHRONOUS | SB_DIRSYNC))
+		return __exfat_clear_volume_dirty(sb);
+
+	return 0;
+}
+
 static int exfat_show_options(struct seq_file *m, struct dentry *root)  {
 	struct super_block *sb = root->d_sb;
--
2.25.1
Yuezhang.Mo@sony.com Feb. 28, 2022, 3:24 a.m. UTC | #2
Hi Namjae and Sungjong,

May I have your comments for this patch?

Best Regards,
Yuezhang,Mo
Kohada.Tetsuhiro@dc.MitsubishiElectric.co.jp Feb. 28, 2022, 7:11 a.m. UTC | #3
Hi, Yuezhang.

> And VolumeDirty will be set again when updating the parent directory. It means that BootSector will be written twice in
> each writeback, that will shorten the life of the device.

I have the same concern.
From a lifespan point of view, we should probably clear dirty with just sync_fs().

>  	sync_blockdev(sb->s_bdev);
> -	if (exfat_clear_volume_dirty(sb))
> +	if (__exfat_clear_volume_dirty(sb))

If SB_SYNCHRONOUS or SB_DIRSYNC is not present, isn't dirty cleared?

> +int exfat_clear_volume_dirty(struct super_block *sb) {
> +	if (sb->s_flags & (SB_SYNCHRONOUS | SB_DIRSYNC))
> +		return __exfat_clear_volume_dirty(sb);

Even when only one of SB or DIR is synced, dirty will be cleared.
Isn't it necessary to have both SB_SYNCHRONOUS and SB_DIRSYNC?
And, I think it would be better to use IS_SYNC or IS_DIRSYNC macro here.

BR
Yuezhang.Mo@sony.com Feb. 28, 2022, 10:51 a.m. UTC | #4
Hi, Kohada.Tetsuhiro.

Thank for your comments.

>> And VolumeDirty will be set again when updating the parent directory. 
>> It means that BootSector will be written twice in each writeback, that will shorten the life of the device.
> 
> I have the same concern.
> From a lifespan point of view, we should probably clear dirty with just sync_fs().

If it is acceptable for VolumeDirty to remain dirty after all updates are complete, I think it is a good idea.
(PS: The original logic is to clear VolumeDirty after BitMap, FAT and directory entries are updated.)

>>  	sync_blockdev(sb->s_bdev);
>> -	if (exfat_clear_volume_dirty(sb))
>> +	if (__exfat_clear_volume_dirty(sb))
> 
> If SB_SYNCHRONOUS or SB_DIRSYNC is not present, isn't dirty cleared?

With this patch, exfat_clear_volume_dirty() will not clear VolumeDirty if SB_SYNCHRONOUS or SB_DIRSYNC is not present, and __exfat_clear_volume_dirty() will clear VolumeDirty unconditionally.

>> +int exfat_clear_volume_dirty(struct super_block *sb) {
>> +	if (sb->s_flags & (SB_SYNCHRONOUS | SB_DIRSYNC))
>> +		return __exfat_clear_volume_dirty(sb);
> 
> Even when only one of SB or DIR is synced, dirty will be cleared.
> Isn't it necessary to have both SB_SYNCHRONOUS and SB_DIRSYNC?

VolumeDirty will be cleared if one of SB_SYNCHRONOUS and SB_DIRSYNC is set.
The condition of (sb->s_flags & (SB_SYNCHRONOUS | SB_DIRSYNC)) is exactly that.

> And, I think it would be better to use IS_SYNC or IS_DIRSYNC macro here.

If use IS_SYNC or IS_DIRSYNC, we should pass `inode` as an argument, it will be a big change for code.
And if open a file with O_SYNC, IS_DIRSYNC and IS_SYNC will be true, VolumeDirty will be cleared. 
So I think it is not necessary to use IS_DIRSYNC and IS_SYNC.

Best Regards,
Yuezhang,Mo
Namjae Jeon March 1, 2022, 9:19 a.m. UTC | #5
2022-02-28 12:24 GMT+09:00, Yuezhang.Mo@sony.com <Yuezhang.Mo@sony.com>:
> Hi Namjae and Sungjong,
Hi Yuezhang,
>
> May I have your comments for this patch?
Sorry for late response. I will check it within this week:)
>
> Best Regards,
> Yuezhang,Mo
Thanks!
>
>
Kohada.Tetsuhiro@dc.MitsubishiElectric.co.jp March 2, 2022, 9:29 a.m. UTC | #6
Hi, Yuezhang,Mo

Thank for your comments.

>>> And VolumeDirty will be set again when updating the parent directory. 
>>> It means that BootSector will be written twice in each writeback, that will shorten the life of the device.
>> 
>> I have the same concern.
>> From a lifespan point of view, we should probably clear dirty with just sync_fs().
>
>If it is acceptable for VolumeDirty to remain dirty after all updates are complete, I think it is a good idea.

This patch will keep VOL_DIRTY until sync_fs or umount when default mount.
It's a preferred change for device life and VOL_DIRTY integrity. 
On the other hand, we should think more about the behavior when SB_SYNCHRONOUS is set.
For example, FATFS keep VOL_DIRTY until umount regardless of SB_SYNCHRONOUS. 
When SB_SYNCHRONOUS is enabled, updating VOL_DIRTY every time will increase the number of writes to the boot-sector.
For NAND flash devices, mounts with 'sync' are a dangerous option that can drastically wear out their lifespan.


>(PS: The original logic is to clear VolumeDirty after BitMap, FAT and directory entries are updated.)

However, the writing order was not guaranteed.
More synchronous writes are needed to guarantee the write order.


>>>  	sync_blockdev(sb->s_bdev);
>>> -	if (exfat_clear_volume_dirty(sb))
>>> +	if (__exfat_clear_volume_dirty(sb))
>> 
>> If SB_SYNCHRONOUS or SB_DIRSYNC is not present, isn't dirty cleared?
>
>With this patch, exfat_clear_volume_dirty() will not clear VolumeDirty if SB_SYNCHRONOUS or SB_DIRSYNC is not present, and __exfat_clear_volume_dirty() will clear VolumeDirty unconditionally.

__exfat_clear_volume_dirty() only mark_buffer_dirty() to boot-sector, it doesn't sync.
It should sync in here or exfat_set_vol_flags().


>>> +int exfat_clear_volume_dirty(struct super_block *sb) {
>>> +	if (sb->s_flags & (SB_SYNCHRONOUS | SB_DIRSYNC))
>>> +		return __exfat_clear_volume_dirty(sb);
>> 
>> Even when only one of SB or DIR is synced, dirty will be cleared.
>> Isn't it necessary to have both SB_SYNCHRONOUS and SB_DIRSYNC?
>
>VolumeDirty will be cleared if one of SB_SYNCHRONOUS and SB_DIRSYNC is set.
>The condition of (sb->s_flags & (SB_SYNCHRONOUS | SB_DIRSYNC)) is exactly that.

Even if dir-entries is synced, dirty must not be cleared when FAT / mirrorFAT is not synced.
Also, it is not necessary to clear VOL_DIRTY even if SB_DIRSYNC is set.
I don't think it is necessary to check SB_DIRSYNC.


>> And, I think it would be better to use IS_SYNC or IS_DIRSYNC macro here.
>
>If use IS_SYNC or IS_DIRSYNC, we should pass `inode` as an argument, it will be a big change for code.
>And if open a file with O_SYNC, IS_DIRSYNC and IS_SYNC will be true, VolumeDirty will be cleared. 
>So I think it is not necessary to use IS_DIRSYNC and IS_SYNC.

exactly.


BR
Yuezhang.Mo@sony.com March 3, 2022, 7:08 a.m. UTC | #7
Hi, Kohada.Tetsuhiro

> >(PS: The original logic is to clear VolumeDirty after BitMap, FAT and directory
> entries are updated.)
> 
> However, the writing order was not guaranteed.
> More synchronous writes are needed to guarantee the write order.

If "dirsync" or "sync" is enabled, BitMap, FAT and directory entries are guaranteed to be written in order.
This is the reason to keep clearing VolumeDirty.


Best Regards,
Yuezhang Mo
Namjae Jeon March 5, 2022, 6:03 a.m. UTC | #8
2022-02-08 14:18 GMT+09:00, Yuezhang.Mo@sony.com <Yuezhang.Mo@sony.com>:
Hi Yuezhang,

> Before this commit, VolumeDirty will be cleared first in
> writeback if 'dirsync' or 'sync' is not enabled. If the power
> is suddenly cut off after cleaning VolumeDirty but other
> updates are not written, the exFAT filesystem will not be able
> to detect the power failure in the next mount.
>
> And VolumeDirty will be set again when updating the parent
> directory. It means that BootSector will be written twice in each
> writeback, that will shorten the life of the device.
>
> Reviewed-by: Andy.Wu <Andy.Wu@sony.com>
> Reviewed-by: Aoyama, Wataru <wataru.aoyama@sony.com>
> Signed-off-by: Yuezhang.Mo <Yuezhang.Mo@sony.com>
> ---
>  fs/exfat/super.c | 14 ++++++++++++--
>  1 file changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/fs/exfat/super.c b/fs/exfat/super.c
> index 8c9fb7dcec16..f4906c17475e 100644
> --- a/fs/exfat/super.c
> +++ b/fs/exfat/super.c
> @@ -25,6 +25,8 @@
>  static char exfat_default_iocharset[] = CONFIG_EXFAT_DEFAULT_IOCHARSET;
>  static struct kmem_cache *exfat_inode_cachep;
>
> +static int __exfat_clear_volume_dirty(struct super_block *sb);
> +
>  static void exfat_free_iocharset(struct exfat_sb_info *sbi)
>  {
>  	if (sbi->options.iocharset != exfat_default_iocharset)
> @@ -64,7 +66,7 @@ static int exfat_sync_fs(struct super_block *sb, int wait)
>  	/* If there are some dirty buffers in the bdev inode */
>  	mutex_lock(&sbi->s_lock);
>  	sync_blockdev(sb->s_bdev);
> -	if (exfat_clear_volume_dirty(sb))
> +	if (__exfat_clear_volume_dirty(sb))
>  		err = -EIO;
>  	mutex_unlock(&sbi->s_lock);
>  	return err;
> @@ -139,13 +141,21 @@ int exfat_set_volume_dirty(struct super_block *sb)
>  	return exfat_set_vol_flags(sb, sbi->vol_flags | VOLUME_DIRTY);
>  }
>
> -int exfat_clear_volume_dirty(struct super_block *sb)
> +static int __exfat_clear_volume_dirty(struct super_block *sb)
>  {
>  	struct exfat_sb_info *sbi = EXFAT_SB(sb);
>
>  	return exfat_set_vol_flags(sb, sbi->vol_flags & ~VOLUME_DIRTY);
>  }
>
> +int exfat_clear_volume_dirty(struct super_block *sb)
> +{
> +	if (sb->s_flags & (SB_SYNCHRONOUS | SB_DIRSYNC))
How about moving exfat_clear_volume_dirty() to IS_DIRSYNC() check in
each operations instead of this check?

> +		return __exfat_clear_volume_dirty(sb);
> +
> +	return 0;
> +}
> +
>  static int exfat_show_options(struct seq_file *m, struct dentry *root)
>  {
>  	struct super_block *sb = root->d_sb;
> --
> 2.25.1
Kohada.Tetsuhiro@dc.MitsubishiElectric.co.jp March 7, 2022, 5:34 a.m. UTC | #9
Hi, Yuezhang,Mo

> > >(PS: The original logic is to clear VolumeDirty after BitMap, FAT and directory
> > entries are updated.)
> >
> > However, the writing order was not guaranteed.
> > More synchronous writes are needed to guarantee the write order.
> 
> If "dirsync" or "sync" is enabled, BitMap, FAT and directory entries are guaranteed to be written in order.
> This is the reason to keep clearing VolumeDirty.

SB_DIRSYNC requests synchronization of the inode of the current file/dir.
The exfat implementation updates and syncs the dir-entries of the current file/dir.
If only SB_DIRSYNC is set and SB_SYNC is not set, it cannot be guaranteed that FAT/mirrorFAT is synchronized.

BR
T .Kohada
Yuezhang.Mo@sony.com March 8, 2022, 10:55 a.m. UTC | #10
Hi Namjae Jeon,

> > +int exfat_clear_volume_dirty(struct super_block *sb) {
> > +	if (sb->s_flags & (SB_SYNCHRONOUS | SB_DIRSYNC))
> How about moving exfat_clear_volume_dirty() to IS_DIRSYNC() check in each
> operations instead of this check?

I found that VolumeDirty keeps VOL_DIRTY until sync or umount regardless of sync or dirsync enabled, 
because there is no paired call to exfat_set_volume_dirty()/exfat_clear_volume_dirty() in __exfat_write_inode().

If exfat_set_volume_dirty()/exfat_clear_volume_dirty() is called in pairs in __exfat_write_inode(),
it will cause frequent writing of bootsector.

So, how about removing exfat_clear_volume_dirty() from each operations, except in exfat_sync_fs()?


Best Regards,
Yuezhang Mo
Namjae Jeon March 9, 2022, 11:14 a.m. UTC | #11
2022-03-08 19:55 GMT+09:00, Yuezhang.Mo@sony.com <Yuezhang.Mo@sony.com>:
> Hi Namjae Jeon,
>
>> > +int exfat_clear_volume_dirty(struct super_block *sb) {
>> > +	if (sb->s_flags & (SB_SYNCHRONOUS | SB_DIRSYNC))
>> How about moving exfat_clear_volume_dirty() to IS_DIRSYNC() check in each
>> operations instead of this check?
>
> I found that VolumeDirty keeps VOL_DIRTY until sync or umount regardless of
> sync or dirsync enabled,
> because there is no paired call to
> exfat_set_volume_dirty()/exfat_clear_volume_dirty() in
> __exfat_write_inode().
>
> If exfat_set_volume_dirty()/exfat_clear_volume_dirty() is called in pairs in
> __exfat_write_inode(),
> it will cause frequent writing of bootsector.
>
> So, how about removing exfat_clear_volume_dirty() from each operations,
> except in exfat_sync_fs()?
Okay. Please send the patch for this.

Thanks!
>
>
> Best Regards,
> Yuezhang Mo
>
diff mbox series

Patch

diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 8c9fb7dcec16..f4906c17475e 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -25,6 +25,8 @@ 
 static char exfat_default_iocharset[] = CONFIG_EXFAT_DEFAULT_IOCHARSET;
 static struct kmem_cache *exfat_inode_cachep;
 
+static int __exfat_clear_volume_dirty(struct super_block *sb);
+
 static void exfat_free_iocharset(struct exfat_sb_info *sbi)
 {
 	if (sbi->options.iocharset != exfat_default_iocharset)
@@ -64,7 +66,7 @@  static int exfat_sync_fs(struct super_block *sb, int wait)
 	/* If there are some dirty buffers in the bdev inode */
 	mutex_lock(&sbi->s_lock);
 	sync_blockdev(sb->s_bdev);
-	if (exfat_clear_volume_dirty(sb))
+	if (__exfat_clear_volume_dirty(sb))
 		err = -EIO;
 	mutex_unlock(&sbi->s_lock);
 	return err;
@@ -139,13 +141,21 @@  int exfat_set_volume_dirty(struct super_block *sb)
 	return exfat_set_vol_flags(sb, sbi->vol_flags | VOLUME_DIRTY);
 }
 
-int exfat_clear_volume_dirty(struct super_block *sb)
+static int __exfat_clear_volume_dirty(struct super_block *sb)
 {
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
 
 	return exfat_set_vol_flags(sb, sbi->vol_flags & ~VOLUME_DIRTY);
 }
 
+int exfat_clear_volume_dirty(struct super_block *sb)
+{
+	if (sb->s_flags & (SB_SYNCHRONOUS | SB_DIRSYNC))
+		return __exfat_clear_volume_dirty(sb);
+
+	return 0;
+}
+
 static int exfat_show_options(struct seq_file *m, struct dentry *root)
 {
 	struct super_block *sb = root->d_sb;