diff mbox

btrfs-progs: image: handle superblocks correctly on fs with big blocks

Message ID 1367874680-9502-1-git-send-email-dsterba@suse.cz (mailing list archive)
State Under Review, archived
Headers show

Commit Message

David Sterba May 6, 2013, 9:11 p.m. UTC
Superblock is always 4k, but metadata blocks may be larger. We have to
use the appropriate block size when doing checksums, otherwise they're
wrong.

Signed-off-by: David Sterba <dsterba@suse.cz>
---
 btrfs-image.c | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

Comments

Stefan Behrens May 7, 2013, 8:44 a.m. UTC | #1
On Mon,  6 May 2013 23:11:20 +0200, David Sterba wrote:
> Superblock is always 4k, but metadata blocks may be larger. We have to
> use the appropriate block size when doing checksums, otherwise they're
> wrong.
> 
> Signed-off-by: David Sterba <dsterba@suse.cz>
> ---
>  btrfs-image.c | 27 +++++++++++++++++++++++----
>  1 file changed, 23 insertions(+), 4 deletions(-)
> 
> diff --git a/btrfs-image.c b/btrfs-image.c
> index 188291c..dca7a28 100644
> --- a/btrfs-image.c
> +++ b/btrfs-image.c
> @@ -469,6 +469,16 @@ static int read_data_extent(struct metadump_struct *md,
>  	return 0;
>  }
>  
> +static int is_sb_offset(u64 offset) {
> +	switch (offset) {
> +		case 65536:
> +		case 67108864:
> +		case 274877906944:

Using btrfs_sb_offset() and an if statement would produce the same code
and would be more readable.
Additionally, the last huge number will cause a warning on 32-bit
systems, I assume.


> +			return 1;
> +	}
> +	return 0;
> +}
> +
>  static int flush_pending(struct metadump_struct *md, int done)
>  {
>  	struct async_work *async = NULL;
> @@ -506,7 +516,16 @@ static int flush_pending(struct metadump_struct *md, int done)
>  		}
>  
>  		while (!md->data && size > 0) {
> -			eb = read_tree_block(md->root, start, blocksize, 0);
> +			/*
> +			 * We must differentiate between superblock and
> +			 * metadata on filesystems with blocksize > 4k,
> +			 * otherwise the checksum fails for superblock
> +			 */
> +			int bs = blocksize;
> +
> +			if (is_sb_offset(start))
> +				bs = BTRFS_SUPER_INFO_SIZE;
> +			eb = read_tree_block(md->root, start, bs, 0);
>  			if (!eb) {
>  				free(async->buffer);
>  				free(async);
> @@ -516,9 +535,9 @@ static int flush_pending(struct metadump_struct *md, int done)
>  			}
>  			copy_buffer(async->buffer + offset, eb);
>  			free_extent_buffer(eb);
> -			start += blocksize;
> -			offset += blocksize;
> -			size -= blocksize;
> +			start += bs;
> +			offset += bs;
> +			size -= bs;
>  		}
>  
>  		md->pending_start = (u64)-1;
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba May 10, 2013, 11:20 a.m. UTC | #2
On Tue, May 07, 2013 at 10:44:05AM +0200, Stefan Behrens wrote:
> On Mon,  6 May 2013 23:11:20 +0200, David Sterba wrote:
> > Superblock is always 4k, but metadata blocks may be larger. We have to
> > use the appropriate block size when doing checksums, otherwise they're
> > wrong.
> > 
> > Signed-off-by: David Sterba <dsterba@suse.cz>
> > ---
> >  btrfs-image.c | 27 +++++++++++++++++++++++----
> >  1 file changed, 23 insertions(+), 4 deletions(-)
> > 
> > diff --git a/btrfs-image.c b/btrfs-image.c
> > index 188291c..dca7a28 100644
> > --- a/btrfs-image.c
> > +++ b/btrfs-image.c
> > @@ -469,6 +469,16 @@ static int read_data_extent(struct metadump_struct *md,
> >  	return 0;
> >  }
> >  
> > +static int is_sb_offset(u64 offset) {
> > +	switch (offset) {
> > +		case 65536:
> > +		case 67108864:
> > +		case 274877906944:
> 
> Using btrfs_sb_offset() and an if statement would produce the same code
> and would be more readable.

It was there originally, but this function is called for each block and
I've optimized it a bit right away. I'll add a comment.

> Additionally, the last huge number will cause a warning on 32-bit
> systems, I assume.

No warning with -m32 -Wall.

david
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stefan Behrens May 10, 2013, 2:26 p.m. UTC | #3
On 05/10/2013 13:20, David Sterba wrote:
> On Tue, May 07, 2013 at 10:44:05AM +0200, Stefan Behrens wrote:
>> On Mon,  6 May 2013 23:11:20 +0200, David Sterba wrote:
>>> Superblock is always 4k, but metadata blocks may be larger. We have to
>>> use the appropriate block size when doing checksums, otherwise they're
>>> wrong.
>>>
>>> Signed-off-by: David Sterba <dsterba@suse.cz>
>>> ---
>>>   btrfs-image.c | 27 +++++++++++++++++++++++----
>>>   1 file changed, 23 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/btrfs-image.c b/btrfs-image.c
>>> index 188291c..dca7a28 100644
>>> --- a/btrfs-image.c
>>> +++ b/btrfs-image.c
>>> @@ -469,6 +469,16 @@ static int read_data_extent(struct metadump_struct *md,
>>>   	return 0;
>>>   }
>>>
>>> +static int is_sb_offset(u64 offset) {
>>> +	switch (offset) {
>>> +		case 65536:
>>> +		case 67108864:
>>> +		case 274877906944:
>>
>> Using btrfs_sb_offset() and an if statement would produce the same code
>> and would be more readable.
>
> It was there originally, but this function is called for each block and
> I've optimized it a bit right away. I'll add a comment.

You have not optimized it. gcc does not generate a jump table with 274 
billion entries (I have verified it). The code is the same in both cases.

Here is the C code diff:

*** sw.c        2013-05-10 16:14:48.692299000 +0200
--- if.c        2013-05-10 16:15:03.496639000 +0200
***************
*** 471,482 ****

   static int is_sb_offset(u64 offset)
   {
!       switch (offset) {
!       case 65536:
!       case 67108864:
!       case 274877906944:
                 return 1;
-       }
         return 0;
   }

--- 471,480 ----

   static int is_sb_offset(u64 offset)
   {
!       if (offset == btrfs_sb_offset(0) ||
!           offset == btrfs_sb_offset(1) ||
!           offset == btrfs_sb_offset(2))
                 return 1;
         return 0;
   }


And this is the generated amd64 assembler code diff:

*** /tmp/sw     2013-05-10 15:57:49.096976480 +0200
--- /tmp/if     2013-05-10 16:00:41.712927262 +0200
***************
*** 1378,1397 ****
!     16c1:     49 81 fe 00 00 00 04    cmp    $0x4000000,%r14
       16c8:     74 20                   je     16ea <flush_pending+0x305>
!     16ca:     48 b8 00 00 00 00 40    mov    $0x4000000000,%rax
!     16d1:     00 00 00
!     16d4:     49 39 c6                cmp    %rax,%r14
!     16d7:     74 11                   je     16ea <flush_pending+0x305>
!     16d9:     8b 54 24 4c             mov    0x4c(%rsp),%edx
!     16dd:     89 54 24 20             mov    %edx,0x20(%rsp)
!     16e1:     49 81 fe 00 00 01 00    cmp    $0x10000,%r14
       16e8:     75 08                   jne    16f2 <flush_pending+0x30d>
!     16ea:     c7 44 24 20 00 10 00    movl   $0x1000,0x20(%rsp)
       16f1:     00
       16f2:     b9 00 00 00 00          mov    $0x0,%ecx
!     16f7:     8b 54 24 20             mov    0x20(%rsp),%edx
--- 1378,1397 ----
!     16c1:     49 81 fe 00 00 01 00    cmp    $0x10000,%r14
       16c8:     74 20                   je     16ea <flush_pending+0x305>
!     16ca:     49 81 fe 00 00 00 04    cmp    $0x4000000,%r14
!     16d1:     74 17                   je     16ea <flush_pending+0x305>
!     16d3:     8b 44 24 4c             mov    0x4c(%rsp),%eax
!     16d7:     89 44 24 10             mov    %eax,0x10(%rsp)
!     16db:     48 ba 00 00 00 00 40    mov    $0x4000000000,%rdx
!     16e2:     00 00 00
!     16e5:     49 39 d6                cmp    %rdx,%r14
       16e8:     75 08                   jne    16f2 <flush_pending+0x30d>
!     16ea:     c7 44 24 10 00 10 00    movl   $0x1000,0x10(%rsp)
       16f1:     00
       16f2:     b9 00 00 00 00          mov    $0x0,%ecx
!     16f7:     8b 54 24 10             mov    0x10(%rsp),%edx

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba May 10, 2013, 3:03 p.m. UTC | #4
On Fri, May 10, 2013 at 04:26:32PM +0200, Stefan Behrens wrote:
> On 05/10/2013 13:20, David Sterba wrote:
> >On Tue, May 07, 2013 at 10:44:05AM +0200, Stefan Behrens wrote:
> >>On Mon,  6 May 2013 23:11:20 +0200, David Sterba wrote:
> >>>Superblock is always 4k, but metadata blocks may be larger. We have to
> >>>use the appropriate block size when doing checksums, otherwise they're
> >>>wrong.
> >>>
> >>>Signed-off-by: David Sterba <dsterba@suse.cz>
> >>>---
> >>>  btrfs-image.c | 27 +++++++++++++++++++++++----
> >>>  1 file changed, 23 insertions(+), 4 deletions(-)
> >>>
> >>>diff --git a/btrfs-image.c b/btrfs-image.c
> >>>index 188291c..dca7a28 100644
> >>>--- a/btrfs-image.c
> >>>+++ b/btrfs-image.c
> >>>@@ -469,6 +469,16 @@ static int read_data_extent(struct metadump_struct *md,
> >>>  	return 0;
> >>>  }
> >>>
> >>>+static int is_sb_offset(u64 offset) {
> >>>+	switch (offset) {
> >>>+		case 65536:
> >>>+		case 67108864:
> >>>+		case 274877906944:
> >>
> >>Using btrfs_sb_offset() and an if statement would produce the same code
> >>and would be more readable.
> >
> >It was there originally, but this function is called for each block and
> >I've optimized it a bit right away. I'll add a comment.
> 
> You have not optimized it. gcc does not generate a jump table with 274
> billion entries (I have verified it). The code is the same in both cases.

Point for you. I'll use btrfs_sb_offset.

david
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chris Mason May 10, 2013, 3:08 p.m. UTC | #5
Quoting David Sterba (2013-05-06 17:11:20)
> Superblock is always 4k, but metadata blocks may be larger. We have to
> use the appropriate block size when doing checksums, otherwise they're
> wrong.

The size coming in from the md should be correct.  See this commit from
my integration branch

https://git.kernel.org/cgit/linux/kernel/git/mason/btrfs-progs.git/commit/?h=integration&id=9c821327408803229e93a788e032e8e9caf11686

-chris
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/btrfs-image.c b/btrfs-image.c
index 188291c..dca7a28 100644
--- a/btrfs-image.c
+++ b/btrfs-image.c
@@ -469,6 +469,16 @@  static int read_data_extent(struct metadump_struct *md,
 	return 0;
 }
 
+static int is_sb_offset(u64 offset) {
+	switch (offset) {
+		case 65536:
+		case 67108864:
+		case 274877906944:
+			return 1;
+	}
+	return 0;
+}
+
 static int flush_pending(struct metadump_struct *md, int done)
 {
 	struct async_work *async = NULL;
@@ -506,7 +516,16 @@  static int flush_pending(struct metadump_struct *md, int done)
 		}
 
 		while (!md->data && size > 0) {
-			eb = read_tree_block(md->root, start, blocksize, 0);
+			/*
+			 * We must differentiate between superblock and
+			 * metadata on filesystems with blocksize > 4k,
+			 * otherwise the checksum fails for superblock
+			 */
+			int bs = blocksize;
+
+			if (is_sb_offset(start))
+				bs = BTRFS_SUPER_INFO_SIZE;
+			eb = read_tree_block(md->root, start, bs, 0);
 			if (!eb) {
 				free(async->buffer);
 				free(async);
@@ -516,9 +535,9 @@  static int flush_pending(struct metadump_struct *md, int done)
 			}
 			copy_buffer(async->buffer + offset, eb);
 			free_extent_buffer(eb);
-			start += blocksize;
-			offset += blocksize;
-			size -= blocksize;
+			start += bs;
+			offset += bs;
+			size -= bs;
 		}
 
 		md->pending_start = (u64)-1;