diff mbox series

[v2,5/7] buffer: Fix various functions for block size > PAGE_SIZE

Message ID 20231109210608.2252323-6-willy@infradead.org (mailing list archive)
State New
Headers show
Series More buffer_head cleanups | expand

Commit Message

Matthew Wilcox Nov. 9, 2023, 9:06 p.m. UTC
If i_blkbits is larger than PAGE_SHIFT, we shift by a negative number,
which is undefined.  It is safe to shift the block left as a block
device must be smaller than MAX_LFS_FILESIZE, which is guaranteed to
fit in loff_t.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Pankaj Raghav <p.raghav@samsung.com>
---
 fs/buffer.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

Comments

Ryusuke Konishi Nov. 10, 2023, 7:48 a.m. UTC | #1
On Fri, Nov 10, 2023 at 6:06 AM Matthew Wilcox (Oracle) wrote:
>
> If i_blkbits is larger than PAGE_SHIFT, we shift by a negative number,
> which is undefined.  It is safe to shift the block left as a block
> device must be smaller than MAX_LFS_FILESIZE, which is guaranteed to
> fit in loff_t.
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Reviewed-by: Pankaj Raghav <p.raghav@samsung.com>
> ---
>  fs/buffer.c | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/fs/buffer.c b/fs/buffer.c
> index ab345fd4da12..faf1916200c2 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -199,7 +199,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
>         int all_mapped = 1;
>         static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
>
> -       index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
> +       index = ((loff_t)block << bd_inode->i_blkbits) / PAGE_SIZE;

Multiple 64-bit divisions are used in this patch, but why not use two
stage shifts as shown below if there is no left shift overflow and the
sign bit will not be set ?

       index = ((loff_t)block << bd_inode->i_blkbits) >> PAGE_SHIFT;

Regards,
Ryusuke Konishi

>         folio = __filemap_get_folio(bd_mapping, index, FGP_ACCESSED, 0);
>         if (IS_ERR(folio))
>                 goto out;
> @@ -1693,13 +1693,13 @@ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
>         struct inode *bd_inode = bdev->bd_inode;
>         struct address_space *bd_mapping = bd_inode->i_mapping;
>         struct folio_batch fbatch;
> -       pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
> +       pgoff_t index = ((loff_t)block << bd_inode->i_blkbits) / PAGE_SIZE;
>         pgoff_t end;
>         int i, count;
>         struct buffer_head *bh;
>         struct buffer_head *head;
>
> -       end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
> +       end = ((loff_t)(block + len - 1) << bd_inode->i_blkbits) / PAGE_SIZE;
>         folio_batch_init(&fbatch);
>         while (filemap_get_folios(bd_mapping, &index, end, &fbatch)) {
>                 count = folio_batch_count(&fbatch);
> @@ -2660,8 +2660,8 @@ int block_truncate_page(struct address_space *mapping,
>                 return 0;
>
>         length = blocksize - length;
> -       iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
> -
> +       iblock = ((loff_t)index * PAGE_SIZE) >> inode->i_blkbits;
> +
>         folio = filemap_grab_folio(mapping, index);
>         if (IS_ERR(folio))
>                 return PTR_ERR(folio);
> --
> 2.42.0
>
>
Matthew Wilcox Nov. 10, 2023, 1:36 p.m. UTC | #2
On Fri, Nov 10, 2023 at 04:48:02PM +0900, Ryusuke Konishi wrote:
> On Fri, Nov 10, 2023 at 6:06 AM Matthew Wilcox (Oracle) wrote:
> > +++ b/fs/buffer.c
> > @@ -199,7 +199,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
> >         int all_mapped = 1;
> >         static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
> >
> > -       index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
> > +       index = ((loff_t)block << bd_inode->i_blkbits) / PAGE_SIZE;
> 
> Multiple 64-bit divisions are used in this patch, but why not use two
> stage shifts as shown below if there is no left shift overflow and the
> sign bit will not be set ?
> 
>        index = ((loff_t)block << bd_inode->i_blkbits) >> PAGE_SHIFT;

Here's what the compiler turns that into:

    3223:       49 8b 86 80 00 00 00    mov    0x80(%r14),%rax
    322a:       4c 89 ee                mov    %r13,%rsi
    322d:       ba 01 00 00 00          mov    $0x1,%edx
    3232:       0f b6 88 c2 00 00 00    movzbl 0xc2(%rax),%ecx
^ this is where we load i_blkbits into ecx
    3239:       48 89 45 d0             mov    %rax,-0x30(%rbp)
    323d:       4c 8b 60 30             mov    0x30(%rax),%r12
    3241:       48 d3 e6                shl    %cl,%rsi
^ shift left by %cl (the bottom byte of ecx)
    3244:       31 c9                   xor    %ecx,%ecx
    3246:       48 c1 ee 0c             shr    $0xc,%rsi
^ shift right by 12
    324a:       4c 89 e7                mov    %r12,%rdi
    324d:       e8 00 00 00 00          call   3252 <__find_get_block+0x82>
                        324e: R_X86_64_PLT32    __filemap_get_folio-0x4
Ryusuke Konishi Nov. 10, 2023, 3:23 p.m. UTC | #3
On Fri, Nov 10, 2023 at 10:36 PM Matthew Wilcox wrote:
>
> On Fri, Nov 10, 2023 at 04:48:02PM +0900, Ryusuke Konishi wrote:
> > On Fri, Nov 10, 2023 at 6:06 AM Matthew Wilcox (Oracle) wrote:
> > > +++ b/fs/buffer.c
> > > @@ -199,7 +199,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
> > >         int all_mapped = 1;
> > >         static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
> > >
> > > -       index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
> > > +       index = ((loff_t)block << bd_inode->i_blkbits) / PAGE_SIZE;
> >
> > Multiple 64-bit divisions are used in this patch, but why not use two
> > stage shifts as shown below if there is no left shift overflow and the
> > sign bit will not be set ?
> >
> >        index = ((loff_t)block << bd_inode->i_blkbits) >> PAGE_SHIFT;
>
> Here's what the compiler turns that into:
>
>     3223:       49 8b 86 80 00 00 00    mov    0x80(%r14),%rax
>     322a:       4c 89 ee                mov    %r13,%rsi
>     322d:       ba 01 00 00 00          mov    $0x1,%edx
>     3232:       0f b6 88 c2 00 00 00    movzbl 0xc2(%rax),%ecx
> ^ this is where we load i_blkbits into ecx
>     3239:       48 89 45 d0             mov    %rax,-0x30(%rbp)
>     323d:       4c 8b 60 30             mov    0x30(%rax),%r12
>     3241:       48 d3 e6                shl    %cl,%rsi
> ^ shift left by %cl (the bottom byte of ecx)
>     3244:       31 c9                   xor    %ecx,%ecx
>     3246:       48 c1 ee 0c             shr    $0xc,%rsi
> ^ shift right by 12
>     324a:       4c 89 e7                mov    %r12,%rdi
>     324d:       e8 00 00 00 00          call   3252 <__find_get_block+0x82>
>                         324e: R_X86_64_PLT32    __filemap_get_folio-0x4

Ah, similarly in my environment, these divisions by constant are
optimized (and no link errors happen even on a 32-bit machine).
Sorry for taking up your time with unnecessary comments along with
that for patch 3/7.

Regards,
Ryusuke Konishi
diff mbox series

Patch

diff --git a/fs/buffer.c b/fs/buffer.c
index ab345fd4da12..faf1916200c2 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -199,7 +199,7 @@  __find_get_block_slow(struct block_device *bdev, sector_t block)
 	int all_mapped = 1;
 	static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1);
 
-	index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
+	index = ((loff_t)block << bd_inode->i_blkbits) / PAGE_SIZE;
 	folio = __filemap_get_folio(bd_mapping, index, FGP_ACCESSED, 0);
 	if (IS_ERR(folio))
 		goto out;
@@ -1693,13 +1693,13 @@  void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len)
 	struct inode *bd_inode = bdev->bd_inode;
 	struct address_space *bd_mapping = bd_inode->i_mapping;
 	struct folio_batch fbatch;
-	pgoff_t index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
+	pgoff_t index = ((loff_t)block << bd_inode->i_blkbits) / PAGE_SIZE;
 	pgoff_t end;
 	int i, count;
 	struct buffer_head *bh;
 	struct buffer_head *head;
 
-	end = (block + len - 1) >> (PAGE_SHIFT - bd_inode->i_blkbits);
+	end = ((loff_t)(block + len - 1) << bd_inode->i_blkbits) / PAGE_SIZE;
 	folio_batch_init(&fbatch);
 	while (filemap_get_folios(bd_mapping, &index, end, &fbatch)) {
 		count = folio_batch_count(&fbatch);
@@ -2660,8 +2660,8 @@  int block_truncate_page(struct address_space *mapping,
 		return 0;
 
 	length = blocksize - length;
-	iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
-	
+	iblock = ((loff_t)index * PAGE_SIZE) >> inode->i_blkbits;
+
 	folio = filemap_grab_folio(mapping, index);
 	if (IS_ERR(folio))
 		return PTR_ERR(folio);