diff mbox series

exfat/balloc: using ffs instead of internal logic

Message ID 20231207234701.566133-1-sanpeqf@gmail.com (mailing list archive)
State New
Headers show
Series exfat/balloc: using ffs instead of internal logic | expand

Commit Message

John Sanpe Dec. 7, 2023, 11:47 p.m. UTC
Replaced the internal table lookup algorithm with ffs of
the bitops library with better performance.

Use it to increase the single processing length of the
exfat_find_free_bitmap function, from single-byte search to long type.

Signed-off-by: John Sanpe <sanpeqf@gmail.com>
---
 fs/exfat/balloc.c   | 41 +++++++++++++++--------------------------
 fs/exfat/exfat_fs.h |  3 +--
 2 files changed, 16 insertions(+), 28 deletions(-)

Comments

Sungjong Seo Dec. 12, 2023, 1:15 a.m. UTC | #1
> Replaced the internal table lookup algorithm with ffs of the bitops
> library with better performance.
> 
> Use it to increase the single processing length of the
> exfat_find_free_bitmap function, from single-byte search to long type.
> 
> Signed-off-by: John Sanpe <sanpeqf@gmail.com>

Looks good. Thanks for your patch.
Acked-by: Sungjong Seo <sj1557.seo@samsung.com>

> ---
>  fs/exfat/balloc.c   | 41 +++++++++++++++--------------------------
>  fs/exfat/exfat_fs.h |  3 +--
>  2 files changed, 16 insertions(+), 28 deletions(-)
> 
> diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index
> 3e3e9e4cce2f..4bacbb0cf5da 100644
> --- a/fs/exfat/balloc.c
> +++ b/fs/exfat/balloc.c
> @@ -14,29 +14,15 @@
>  #if BITS_PER_LONG == 32
>  #define __le_long __le32
>  #define lel_to_cpu(A) le32_to_cpu(A)
> +#define cpu_to_lel(A) cpu_to_le32(A)
>  #elif BITS_PER_LONG == 64
>  #define __le_long __le64
>  #define lel_to_cpu(A) le64_to_cpu(A)
> +#define cpu_to_lel(A) cpu_to_le64(A)
>  #else
>  #error "BITS_PER_LONG not 32 or 64"
>  #endif
> 
> -static const unsigned char free_bit[] = {
> -	0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/*  0 ~
> 19*/
> -	0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3,/* 20 ~
> 39*/
> -	0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/* 40 ~
> 59*/
> -	0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/* 60 ~
> 79*/
> -	0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2,/* 80 ~
> 99*/
> -	0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,/*100 ~
> 119*/
> -	0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*120 ~
> 139*/
> -	0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5,/*140 ~
> 159*/
> -	0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/*160 ~
> 179*/
> -	0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3,/*180 ~
> 199*/
> -	0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*200 ~
> 219*/
> -	0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/*220 ~
> 239*/
> -	0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0                /*240 ~
> 254*/
> -};
> -
>  /*
>   *  Allocation Bitmap Management Functions
>   */
> @@ -195,32 +181,35 @@ unsigned int exfat_find_free_bitmap(struct
> super_block *sb, unsigned int clu)  {
>  	unsigned int i, map_i, map_b, ent_idx;
>  	unsigned int clu_base, clu_free;
> -	unsigned char k, clu_mask;
> +	unsigned long clu_bits, clu_mask;
>  	struct exfat_sb_info *sbi = EXFAT_SB(sb);
> +	__le_long bitval;
> 
>  	WARN_ON(clu < EXFAT_FIRST_CLUSTER);
> -	ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
> -	clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK));
> +	ent_idx = ALIGN_DOWN(CLUSTER_TO_BITMAP_ENT(clu), BITS_PER_LONG);
> +	clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx);
>  	clu_mask = IGNORED_BITS_REMAINED(clu, clu_base);
> 
>  	map_i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx);
>  	map_b = BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent_idx);
> 
>  	for (i = EXFAT_FIRST_CLUSTER; i < sbi->num_clusters;
> -	     i += BITS_PER_BYTE) {
> -		k = *(sbi->vol_amap[map_i]->b_data + map_b);
> +	     i += BITS_PER_LONG) {
> +		bitval = *(__le_long *)(sbi->vol_amap[map_i]->b_data +
> map_b);
>  		if (clu_mask > 0) {
> -			k |= clu_mask;
> +			bitval |= cpu_to_lel(clu_mask);
>  			clu_mask = 0;
>  		}
> -		if (k < 0xFF) {
> -			clu_free = clu_base + free_bit[k];
> +		if (bitval != ULONG_MAX) {
> +			clu_bits = lel_to_cpu(bitval);
> +			clu_free = clu_base + ffz(clu_bits);
>  			if (clu_free < sbi->num_clusters)
>  				return clu_free;
>  		}
> -		clu_base += BITS_PER_BYTE;
> +		clu_base += BITS_PER_LONG;
> +		map_b += sizeof(long);
> 
> -		if (++map_b >= sb->s_blocksize ||
> +		if (map_b >= sb->s_blocksize ||
>  		    clu_base >= sbi->num_clusters) {
>  			if (++map_i >= sbi->map_sectors) {
>  				clu_base = EXFAT_FIRST_CLUSTER;
> diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index
> a7a2c35d74fb..8030780a199b 100644
> --- a/fs/exfat/exfat_fs.h
> +++ b/fs/exfat/exfat_fs.h
> @@ -135,8 +135,7 @@ enum {
>  #define BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent) (ent &
> BITS_PER_SECTOR_MASK(sb))  #define BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent) \
>  	((ent / BITS_PER_BYTE) & ((sb)->s_blocksize - 1))
> -#define BITS_PER_BYTE_MASK	0x7
> -#define IGNORED_BITS_REMAINED(clu, clu_base) ((1 << ((clu) - (clu_base)))
> - 1)
> +#define IGNORED_BITS_REMAINED(clu, clu_base) ((1UL << ((clu) -
> +(clu_base))) - 1)
> 
>  #define ES_ENTRY_NUM(name_len)	(ES_IDX_LAST_FILENAME(name_len) + 1)
>  /* 19 entries = 1 file entry + 1 stream entry + 17 filename entries */
> --
> 2.43.0
Matthew Wilcox Dec. 12, 2023, 1:53 a.m. UTC | #2
On Fri, Dec 08, 2023 at 07:47:01AM +0800, John Sanpe wrote:
>  	WARN_ON(clu < EXFAT_FIRST_CLUSTER);
> -	ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
> -	clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK));
> +	ent_idx = ALIGN_DOWN(CLUSTER_TO_BITMAP_ENT(clu), BITS_PER_LONG);
> +	clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx);
>  	clu_mask = IGNORED_BITS_REMAINED(clu, clu_base);
>  
>  	map_i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx);
>  	map_b = BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent_idx);
>  
>  	for (i = EXFAT_FIRST_CLUSTER; i < sbi->num_clusters;
> -	     i += BITS_PER_BYTE) {
> -		k = *(sbi->vol_amap[map_i]->b_data + map_b);
> +	     i += BITS_PER_LONG) {
> +		bitval = *(__le_long *)(sbi->vol_amap[map_i]->b_data + map_b);

Is this guaranteed to be word-aligned, or might we end up doing
misaligned loads here?
John Sanpe Dec. 12, 2023, 4:10 p.m. UTC | #3
On Fri, Dec 08, 2023 at 07:47:01AM +0800, John Sanpe wrote:
>       WARN_ON(clu < EXFAT_FIRST_CLUSTER);
> -     ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
> -     clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK));
> +     ent_idx = ALIGN_DOWN(CLUSTER_TO_BITMAP_ENT(clu), BITS_PER_LONG);

Thanks a lot for this question, the clusters are aligned here,
so the final calculated "map_b" is word-aligned.

On Tue, Dec 12, 2023 at 9:53 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> On Fri, Dec 08, 2023 at 07:47:01AM +0800, John Sanpe wrote:
> >       WARN_ON(clu < EXFAT_FIRST_CLUSTER);
> > -     ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
> > -     clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK));
> > +     ent_idx = ALIGN_DOWN(CLUSTER_TO_BITMAP_ENT(clu), BITS_PER_LONG);
> > +     clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx);
> >       clu_mask = IGNORED_BITS_REMAINED(clu, clu_base);
> >
> >       map_i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx);
> >       map_b = BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent_idx);
> >
> >       for (i = EXFAT_FIRST_CLUSTER; i < sbi->num_clusters;
> > -          i += BITS_PER_BYTE) {
> > -             k = *(sbi->vol_amap[map_i]->b_data + map_b);
> > +          i += BITS_PER_LONG) {
> > +             bitval = *(__le_long *)(sbi->vol_amap[map_i]->b_data + map_b);
>
> Is this guaranteed to be word-aligned, or might we end up doing
> misaligned loads here?
>
Namjae Jeon Dec. 13, 2023, 1:29 p.m. UTC | #4
2023-12-08 8:47 GMT+09:00, John Sanpe <sanpeqf@gmail.com>:
> Replaced the internal table lookup algorithm with ffs of
> the bitops library with better performance.
>
> Use it to increase the single processing length of the
> exfat_find_free_bitmap function, from single-byte search to long type.
>
> Signed-off-by: John Sanpe <sanpeqf@gmail.com>
Applied it to #dev.
Thanks for your patch!
diff mbox series

Patch

diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c
index 3e3e9e4cce2f..4bacbb0cf5da 100644
--- a/fs/exfat/balloc.c
+++ b/fs/exfat/balloc.c
@@ -14,29 +14,15 @@ 
 #if BITS_PER_LONG == 32
 #define __le_long __le32
 #define lel_to_cpu(A) le32_to_cpu(A)
+#define cpu_to_lel(A) cpu_to_le32(A)
 #elif BITS_PER_LONG == 64
 #define __le_long __le64
 #define lel_to_cpu(A) le64_to_cpu(A)
+#define cpu_to_lel(A) cpu_to_le64(A)
 #else
 #error "BITS_PER_LONG not 32 or 64"
 #endif
 
-static const unsigned char free_bit[] = {
-	0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/*  0 ~  19*/
-	0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3,/* 20 ~  39*/
-	0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/* 40 ~  59*/
-	0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/* 60 ~  79*/
-	0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2,/* 80 ~  99*/
-	0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,/*100 ~ 119*/
-	0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*120 ~ 139*/
-	0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5,/*140 ~ 159*/
-	0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/*160 ~ 179*/
-	0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3,/*180 ~ 199*/
-	0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*200 ~ 219*/
-	0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/*220 ~ 239*/
-	0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0                /*240 ~ 254*/
-};
-
 /*
  *  Allocation Bitmap Management Functions
  */
@@ -195,32 +181,35 @@  unsigned int exfat_find_free_bitmap(struct super_block *sb, unsigned int clu)
 {
 	unsigned int i, map_i, map_b, ent_idx;
 	unsigned int clu_base, clu_free;
-	unsigned char k, clu_mask;
+	unsigned long clu_bits, clu_mask;
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	__le_long bitval;
 
 	WARN_ON(clu < EXFAT_FIRST_CLUSTER);
-	ent_idx = CLUSTER_TO_BITMAP_ENT(clu);
-	clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK));
+	ent_idx = ALIGN_DOWN(CLUSTER_TO_BITMAP_ENT(clu), BITS_PER_LONG);
+	clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx);
 	clu_mask = IGNORED_BITS_REMAINED(clu, clu_base);
 
 	map_i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx);
 	map_b = BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent_idx);
 
 	for (i = EXFAT_FIRST_CLUSTER; i < sbi->num_clusters;
-	     i += BITS_PER_BYTE) {
-		k = *(sbi->vol_amap[map_i]->b_data + map_b);
+	     i += BITS_PER_LONG) {
+		bitval = *(__le_long *)(sbi->vol_amap[map_i]->b_data + map_b);
 		if (clu_mask > 0) {
-			k |= clu_mask;
+			bitval |= cpu_to_lel(clu_mask);
 			clu_mask = 0;
 		}
-		if (k < 0xFF) {
-			clu_free = clu_base + free_bit[k];
+		if (bitval != ULONG_MAX) {
+			clu_bits = lel_to_cpu(bitval);
+			clu_free = clu_base + ffz(clu_bits);
 			if (clu_free < sbi->num_clusters)
 				return clu_free;
 		}
-		clu_base += BITS_PER_BYTE;
+		clu_base += BITS_PER_LONG;
+		map_b += sizeof(long);
 
-		if (++map_b >= sb->s_blocksize ||
+		if (map_b >= sb->s_blocksize ||
 		    clu_base >= sbi->num_clusters) {
 			if (++map_i >= sbi->map_sectors) {
 				clu_base = EXFAT_FIRST_CLUSTER;
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index a7a2c35d74fb..8030780a199b 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -135,8 +135,7 @@  enum {
 #define BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent) (ent & BITS_PER_SECTOR_MASK(sb))
 #define BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent) \
 	((ent / BITS_PER_BYTE) & ((sb)->s_blocksize - 1))
-#define BITS_PER_BYTE_MASK	0x7
-#define IGNORED_BITS_REMAINED(clu, clu_base) ((1 << ((clu) - (clu_base))) - 1)
+#define IGNORED_BITS_REMAINED(clu, clu_base) ((1UL << ((clu) - (clu_base))) - 1)
 
 #define ES_ENTRY_NUM(name_len)	(ES_IDX_LAST_FILENAME(name_len) + 1)
 /* 19 entries = 1 file entry + 1 stream entry + 17 filename entries */