Message ID | 20231207234701.566133-1-sanpeqf@gmail.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | exfat/balloc: using ffs instead of internal logic | expand |
> Replaced the internal table lookup algorithm with ffs of the bitops > library with better performance. > > Use it to increase the single processing length of the > exfat_find_free_bitmap function, from single-byte search to long type. > > Signed-off-by: John Sanpe <sanpeqf@gmail.com> Looks good. Thanks for your patch. Acked-by: Sungjong Seo <sj1557.seo@samsung.com> > --- > fs/exfat/balloc.c | 41 +++++++++++++++-------------------------- > fs/exfat/exfat_fs.h | 3 +-- > 2 files changed, 16 insertions(+), 28 deletions(-) > > diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index > 3e3e9e4cce2f..4bacbb0cf5da 100644 > --- a/fs/exfat/balloc.c > +++ b/fs/exfat/balloc.c > @@ -14,29 +14,15 @@ > #if BITS_PER_LONG == 32 > #define __le_long __le32 > #define lel_to_cpu(A) le32_to_cpu(A) > +#define cpu_to_lel(A) cpu_to_le32(A) > #elif BITS_PER_LONG == 64 > #define __le_long __le64 > #define lel_to_cpu(A) le64_to_cpu(A) > +#define cpu_to_lel(A) cpu_to_le64(A) > #else > #error "BITS_PER_LONG not 32 or 64" > #endif > > -static const unsigned char free_bit[] = { > - 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/* 0 ~ > 19*/ > - 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3,/* 20 ~ > 39*/ > - 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/* 40 ~ > 59*/ > - 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/* 60 ~ > 79*/ > - 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2,/* 80 ~ > 99*/ > - 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,/*100 ~ > 119*/ > - 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*120 ~ > 139*/ > - 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5,/*140 ~ > 159*/ > - 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/*160 ~ > 179*/ > - 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3,/*180 ~ > 199*/ > - 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*200 ~ > 219*/ > - 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/*220 ~ > 239*/ > - 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 /*240 ~ > 254*/ > -}; > - > /* > * Allocation Bitmap Management Functions > */ > @@ -195,32 +181,35 @@ unsigned int exfat_find_free_bitmap(struct > super_block *sb, unsigned int clu) { > unsigned int i, map_i, map_b, ent_idx; > unsigned int clu_base, clu_free; > - unsigned char k, clu_mask; > + unsigned long clu_bits, clu_mask; > struct exfat_sb_info *sbi = EXFAT_SB(sb); > + __le_long bitval; > > WARN_ON(clu < EXFAT_FIRST_CLUSTER); > - ent_idx = CLUSTER_TO_BITMAP_ENT(clu); > - clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK)); > + ent_idx = ALIGN_DOWN(CLUSTER_TO_BITMAP_ENT(clu), BITS_PER_LONG); > + clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx); > clu_mask = IGNORED_BITS_REMAINED(clu, clu_base); > > map_i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); > map_b = BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent_idx); > > for (i = EXFAT_FIRST_CLUSTER; i < sbi->num_clusters; > - i += BITS_PER_BYTE) { > - k = *(sbi->vol_amap[map_i]->b_data + map_b); > + i += BITS_PER_LONG) { > + bitval = *(__le_long *)(sbi->vol_amap[map_i]->b_data + > map_b); > if (clu_mask > 0) { > - k |= clu_mask; > + bitval |= cpu_to_lel(clu_mask); > clu_mask = 0; > } > - if (k < 0xFF) { > - clu_free = clu_base + free_bit[k]; > + if (bitval != ULONG_MAX) { > + clu_bits = lel_to_cpu(bitval); > + clu_free = clu_base + ffz(clu_bits); > if (clu_free < sbi->num_clusters) > return clu_free; > } > - clu_base += BITS_PER_BYTE; > + clu_base += BITS_PER_LONG; > + map_b += sizeof(long); > > - if (++map_b >= sb->s_blocksize || > + if (map_b >= sb->s_blocksize || > clu_base >= sbi->num_clusters) { > if (++map_i >= sbi->map_sectors) { > clu_base = EXFAT_FIRST_CLUSTER; > diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index > a7a2c35d74fb..8030780a199b 100644 > --- a/fs/exfat/exfat_fs.h > +++ b/fs/exfat/exfat_fs.h > @@ -135,8 +135,7 @@ enum { > #define BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent) (ent & > BITS_PER_SECTOR_MASK(sb)) #define BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent) \ > ((ent / BITS_PER_BYTE) & ((sb)->s_blocksize - 1)) > -#define BITS_PER_BYTE_MASK 0x7 > -#define IGNORED_BITS_REMAINED(clu, clu_base) ((1 << ((clu) - (clu_base))) > - 1) > +#define IGNORED_BITS_REMAINED(clu, clu_base) ((1UL << ((clu) - > +(clu_base))) - 1) > > #define ES_ENTRY_NUM(name_len) (ES_IDX_LAST_FILENAME(name_len) + 1) > /* 19 entries = 1 file entry + 1 stream entry + 17 filename entries */ > -- > 2.43.0
On Fri, Dec 08, 2023 at 07:47:01AM +0800, John Sanpe wrote: > WARN_ON(clu < EXFAT_FIRST_CLUSTER); > - ent_idx = CLUSTER_TO_BITMAP_ENT(clu); > - clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK)); > + ent_idx = ALIGN_DOWN(CLUSTER_TO_BITMAP_ENT(clu), BITS_PER_LONG); > + clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx); > clu_mask = IGNORED_BITS_REMAINED(clu, clu_base); > > map_i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); > map_b = BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent_idx); > > for (i = EXFAT_FIRST_CLUSTER; i < sbi->num_clusters; > - i += BITS_PER_BYTE) { > - k = *(sbi->vol_amap[map_i]->b_data + map_b); > + i += BITS_PER_LONG) { > + bitval = *(__le_long *)(sbi->vol_amap[map_i]->b_data + map_b); Is this guaranteed to be word-aligned, or might we end up doing misaligned loads here?
On Fri, Dec 08, 2023 at 07:47:01AM +0800, John Sanpe wrote: > WARN_ON(clu < EXFAT_FIRST_CLUSTER); > - ent_idx = CLUSTER_TO_BITMAP_ENT(clu); > - clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK)); > + ent_idx = ALIGN_DOWN(CLUSTER_TO_BITMAP_ENT(clu), BITS_PER_LONG); Thanks a lot for this question, the clusters are aligned here, so the final calculated "map_b" is word-aligned. On Tue, Dec 12, 2023 at 9:53 AM Matthew Wilcox <willy@infradead.org> wrote: > > On Fri, Dec 08, 2023 at 07:47:01AM +0800, John Sanpe wrote: > > WARN_ON(clu < EXFAT_FIRST_CLUSTER); > > - ent_idx = CLUSTER_TO_BITMAP_ENT(clu); > > - clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK)); > > + ent_idx = ALIGN_DOWN(CLUSTER_TO_BITMAP_ENT(clu), BITS_PER_LONG); > > + clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx); > > clu_mask = IGNORED_BITS_REMAINED(clu, clu_base); > > > > map_i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); > > map_b = BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent_idx); > > > > for (i = EXFAT_FIRST_CLUSTER; i < sbi->num_clusters; > > - i += BITS_PER_BYTE) { > > - k = *(sbi->vol_amap[map_i]->b_data + map_b); > > + i += BITS_PER_LONG) { > > + bitval = *(__le_long *)(sbi->vol_amap[map_i]->b_data + map_b); > > Is this guaranteed to be word-aligned, or might we end up doing > misaligned loads here? >
2023-12-08 8:47 GMT+09:00, John Sanpe <sanpeqf@gmail.com>: > Replaced the internal table lookup algorithm with ffs of > the bitops library with better performance. > > Use it to increase the single processing length of the > exfat_find_free_bitmap function, from single-byte search to long type. > > Signed-off-by: John Sanpe <sanpeqf@gmail.com> Applied it to #dev. Thanks for your patch!
diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c index 3e3e9e4cce2f..4bacbb0cf5da 100644 --- a/fs/exfat/balloc.c +++ b/fs/exfat/balloc.c @@ -14,29 +14,15 @@ #if BITS_PER_LONG == 32 #define __le_long __le32 #define lel_to_cpu(A) le32_to_cpu(A) +#define cpu_to_lel(A) cpu_to_le32(A) #elif BITS_PER_LONG == 64 #define __le_long __le64 #define lel_to_cpu(A) le64_to_cpu(A) +#define cpu_to_lel(A) cpu_to_le64(A) #else #error "BITS_PER_LONG not 32 or 64" #endif -static const unsigned char free_bit[] = { - 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/* 0 ~ 19*/ - 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3,/* 20 ~ 39*/ - 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/* 40 ~ 59*/ - 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/* 60 ~ 79*/ - 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2,/* 80 ~ 99*/ - 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,/*100 ~ 119*/ - 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*120 ~ 139*/ - 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5,/*140 ~ 159*/ - 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2,/*160 ~ 179*/ - 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3,/*180 ~ 199*/ - 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2,/*200 ~ 219*/ - 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,/*220 ~ 239*/ - 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 /*240 ~ 254*/ -}; - /* * Allocation Bitmap Management Functions */ @@ -195,32 +181,35 @@ unsigned int exfat_find_free_bitmap(struct super_block *sb, unsigned int clu) { unsigned int i, map_i, map_b, ent_idx; unsigned int clu_base, clu_free; - unsigned char k, clu_mask; + unsigned long clu_bits, clu_mask; struct exfat_sb_info *sbi = EXFAT_SB(sb); + __le_long bitval; WARN_ON(clu < EXFAT_FIRST_CLUSTER); - ent_idx = CLUSTER_TO_BITMAP_ENT(clu); - clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx & ~(BITS_PER_BYTE_MASK)); + ent_idx = ALIGN_DOWN(CLUSTER_TO_BITMAP_ENT(clu), BITS_PER_LONG); + clu_base = BITMAP_ENT_TO_CLUSTER(ent_idx); clu_mask = IGNORED_BITS_REMAINED(clu, clu_base); map_i = BITMAP_OFFSET_SECTOR_INDEX(sb, ent_idx); map_b = BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent_idx); for (i = EXFAT_FIRST_CLUSTER; i < sbi->num_clusters; - i += BITS_PER_BYTE) { - k = *(sbi->vol_amap[map_i]->b_data + map_b); + i += BITS_PER_LONG) { + bitval = *(__le_long *)(sbi->vol_amap[map_i]->b_data + map_b); if (clu_mask > 0) { - k |= clu_mask; + bitval |= cpu_to_lel(clu_mask); clu_mask = 0; } - if (k < 0xFF) { - clu_free = clu_base + free_bit[k]; + if (bitval != ULONG_MAX) { + clu_bits = lel_to_cpu(bitval); + clu_free = clu_base + ffz(clu_bits); if (clu_free < sbi->num_clusters) return clu_free; } - clu_base += BITS_PER_BYTE; + clu_base += BITS_PER_LONG; + map_b += sizeof(long); - if (++map_b >= sb->s_blocksize || + if (map_b >= sb->s_blocksize || clu_base >= sbi->num_clusters) { if (++map_i >= sbi->map_sectors) { clu_base = EXFAT_FIRST_CLUSTER; diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index a7a2c35d74fb..8030780a199b 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -135,8 +135,7 @@ enum { #define BITMAP_OFFSET_BIT_IN_SECTOR(sb, ent) (ent & BITS_PER_SECTOR_MASK(sb)) #define BITMAP_OFFSET_BYTE_IN_SECTOR(sb, ent) \ ((ent / BITS_PER_BYTE) & ((sb)->s_blocksize - 1)) -#define BITS_PER_BYTE_MASK 0x7 -#define IGNORED_BITS_REMAINED(clu, clu_base) ((1 << ((clu) - (clu_base))) - 1) +#define IGNORED_BITS_REMAINED(clu, clu_base) ((1UL << ((clu) - (clu_base))) - 1) #define ES_ENTRY_NUM(name_len) (ES_IDX_LAST_FILENAME(name_len) + 1) /* 19 entries = 1 file entry + 1 stream entry + 17 filename entries */
Replaced the internal table lookup algorithm with ffs of the bitops library with better performance. Use it to increase the single processing length of the exfat_find_free_bitmap function, from single-byte search to long type. Signed-off-by: John Sanpe <sanpeqf@gmail.com> --- fs/exfat/balloc.c | 41 +++++++++++++++-------------------------- fs/exfat/exfat_fs.h | 3 +-- 2 files changed, 16 insertions(+), 28 deletions(-)