From patchwork Wed Jan 26 07:46:08 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: =?utf-8?b?Q3lyaWxsZSBDaMOpcMOpbG92?= X-Patchwork-Id: 508281 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id p0Q7kNba015941 for ; Wed, 26 Jan 2011 07:46:24 GMT Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751954Ab1AZHqV (ORCPT ); Wed, 26 Jan 2011 02:46:21 -0500 Received: from smtp5-g21.free.fr ([212.27.42.5]:46759 "EHLO smtp5-g21.free.fr" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751629Ab1AZHqU (ORCPT ); Wed, 26 Jan 2011 02:46:20 -0500 Received: from venilia.domus (unknown [82.228.34.28]) by smtp5-g21.free.fr (Postfix) with ESMTP id B5808D48054; Wed, 26 Jan 2011 08:46:12 +0100 (CET) Received: from localhost (localhost [127.0.0.1]) by venilia.domus (Postfix) with ESMTP id 9C13D465E0; Wed, 26 Jan 2011 08:46:12 +0100 (CET) Received: from venilia.domus ([127.0.0.1]) by localhost (venilia.chepelov.org [127.0.0.1]) (amavisd-new, port 10024) with ESMTP id g8A43ulv4R+F; Wed, 26 Jan 2011 08:46:12 +0100 (CET) Received: from [192.168.250.65] (hestia [192.168.250.65]) (Authenticated sender: cyrille) by venilia.domus (Postfix) with ESMTPSA id 6127F465DB; Wed, 26 Jan 2011 08:46:12 +0100 (CET) Subject: Re: full btrfs partition, became unmountable (+ a solution that thankfully worked for me) From: Cyrille =?ISO-8859-1?Q?Ch=E9p=E9lov?= To: Shawn Stricker Cc: linux-btrfs@vger.kernel.org In-Reply-To: References: <1295981200.29117.19.camel@hestia> Date: Wed, 26 Jan 2011 08:46:08 +0100 Message-ID: <1296027969.29117.26.camel@hestia> Mime-Version: 1.0 X-Mailer: Evolution 2.30.3 Sender: linux-btrfs-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-btrfs@vger.kernel.org X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]); Wed, 26 Jan 2011 07:46:24 +0000 (UTC) diff --git a/btrfsck.c b/btrfsck.c index 63e44d1..1e6bc32 100644 --- a/btrfsck.c +++ b/btrfsck.c @@ -2823,13 +2823,17 @@ int main(int ac, char **av) u64 bytenr = 0; int ret; int num; + int emergency_super = 0; while(1) { int c; - c = getopt(ac, av, "s:"); + c = getopt(ac, av, "es:"); if (c < 0) break; switch(c) { + case 'e': + emergency_super = 1; + break; case 's': num = atol(optarg); bytenr = btrfs_sb_offset(num); @@ -2861,6 +2865,12 @@ int main(int ac, char **av) if (root == NULL) return 1; + if (root->fs_info->emergency_root_tree && (!emergency_super)) { + printf("DANGEROUS: had to use a synthetic super. Please run with '-e' flag if you know why you do it.\n"); + printf(" ... and have perfect backups.\n"); + return 1; + } + ret = check_extents(root); if (ret) goto out; @@ -2869,6 +2879,27 @@ int main(int ac, char **av) goto out; ret = check_root_refs(root, &root_cache); + if (ret) + goto out; + + + if (root->fs_info->emergency_root_tree) { + printf("DANGEROUS: had to use a scavenged root. Apparently could figure out the primary trees ?\n" + " Now writing supers, knock wood.\n"); + free_root_recs(&root_cache); + close_ctree(root); + + cache_tree_init(&root_cache); + root = open_ctree(av[optind], bytenr, 1 /* WRITES! */); + + ret = write_all_supers(root); + if (ret) { + printf("Error writing superblocks.\n"); + goto out; + } + printf("Wrote back superblocks.\n"); + } + out: free_root_recs(&root_cache); close_ctree(root); @@ -2897,6 +2928,7 @@ out: printf("file data blocks allocated: %llu\n referenced %llu\n", (unsigned long long)data_bytes_allocated, (unsigned long long)data_bytes_referenced); + printf("%s\n", BTRFS_BUILD_VERSION); return ret; } diff --git a/ctree.h b/ctree.h index b79e238..7439d87 100644 --- a/ctree.h +++ b/ctree.h @@ -728,6 +728,7 @@ struct btrfs_fs_info { struct list_head space_info; int system_allocs; int readonly; + int emergency_root_tree; }; /* diff --git a/disk-io.c b/disk-io.c index a6e1000..0b4e7f9 100644 --- a/disk-io.c +++ b/disk-io.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include "kerncompat.h" @@ -41,8 +42,11 @@ static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf) struct btrfs_fs_devices *fs_devices; int ret = 1; - if (buf->start != btrfs_header_bytenr(buf)) + if (buf->start != btrfs_header_bytenr(buf)) { + fprintf(stderr, "start place mismatch, buf says %llu btrfs_hdr says %llu\n", + buf->start, btrfs_header_bytenr(buf)); return ret; + } fs_devices = root->fs_info->fs_devices; while (fs_devices) { @@ -204,16 +208,26 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, eb->dev_bytenr = multi->stripes[0].physical; kfree(multi); ret = read_extent_from_disk(eb); - if (ret == 0 && check_tree_block(root, eb) == 0 && - csum_tree_block(root, eb, 1) == 0 && - verify_parent_transid(eb->tree, eb, parent_transid) == 0) { - btrfs_set_buffer_uptodate(eb); - return eb; + if (ret == 0) { + + if (check_tree_block(root, eb) == 0) { + /* fprintf(stderr, "checked tree block %p for %p -- %llu \n", root, eb, eb->start);*/ + if (csum_tree_block(root, eb, 1) == 0) { + /* fprintf(stderr, "tree block csum %p for %p--%llu is OK\n", root, eb, eb->start); */ + if (verify_parent_transid(eb->tree, eb, parent_transid) == 0) { + /*fprintf(stderr, "tree block %p--%llu has correct transid, setting uptodate\n", eb, eb->start); */ + + btrfs_set_buffer_uptodate(eb); + return eb; + } + } + } + } num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, eb->start, eb->len); if (num_copies == 1) { - break; + //break; } mirror_num++; if (mirror_num > num_copies) { @@ -581,7 +595,7 @@ struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, int writes) fp = open(filename, flags, 0600); if (fp < 0) { - fprintf (stderr, "Could not open %s\n", filename); + fprintf (stderr, "Could not open %s: %s\n", filename, strerror(errno)); return NULL; } root = open_ctree_fd(fp, filename, sb_bytenr, writes); @@ -590,6 +604,102 @@ struct btrfs_root *open_ctree(const char *filename, u64 sb_bytenr, int writes) return root; } +#define MAX_EMERG_BUCKETS 30 +static int try_emergency_tree_fixup(struct btrfs_super_block *disk_super, + struct btrfs_root *chunk_root, + struct btrfs_root *tree_root) +{ + struct btrfs_header* buf; + u64 ofs, oldofs; + u32 blocksize; + u64 generation; + int ret, i; + + u64 best_gen[MAX_EMERG_BUCKETS]; + u64 best_bytenr[MAX_EMERG_BUCKETS]; + + memset(&best_gen, 0, sizeof(best_gen)); + memset(&best_bytenr, 0, sizeof(best_bytenr)); + + blocksize = btrfs_level_size(tree_root, + btrfs_super_chunk_root_level(disk_super)); + generation = btrfs_super_chunk_root_generation(disk_super); + buf = (struct btrfs_header*)malloc(blocksize); + + oldofs = btrfs_super_bytenr(disk_super); + for (ofs = oldofs; + ofs < btrfs_super_total_bytes(disk_super); + ofs += blocksize) { + + ret = pread(chunk_root->node->fd, buf, blocksize, ofs); + if (ret == blocksize) { + u64 blockofs = le64_to_cpu(buf->bytenr); + if (blockofs != ofs) continue; + + char* src = chunk_root->node->data + (long)btrfs_header_fsid(chunk_root->node); + char* dst = (char*) &(buf->fsid); + if (memcmp(src, dst, sizeof(buf->fsid)) != 0) + continue; + + u64 blockgen = le64_to_cpu(buf->generation); + u64 blockowner = le64_to_cpu(buf->owner); + u32 blocknritems = le32_to_cpu(buf->nritems); + u8 blocklevel = le8_to_cpu(buf->level); + + /* + fprintf(stderr, + " found valid header at %llu(+%llu) -- gen=%llu owner=%llu nritems=%u level=%u ", + ofs, ofs-oldofs, blockgen, blockowner, blocknritems, blocklevel); + */ + + if ((blockowner < 0ull) && (blockowner > -11ull)) { blockowner += 30; /* hack */ } + + if ((blockowner >= 0) && (blockowner < MAX_EMERG_BUCKETS)) { + if (blockgen > best_gen[blockowner]) { + best_gen[blockowner] = blockgen; + best_bytenr[blockowner] = ofs; + + fprintf(stderr, + " found valid header at %llu(+%llu) -- gen=%llu owner=%llu nritems=%u level=%u ", + ofs, ofs-oldofs, blockgen, blockowner, blocknritems, blocklevel); + + fprintf(stderr, + " ... new best gen for ObjectID %llu at %llu\n", blockowner, ofs); + } else { + } + } else { + } + + oldofs = ofs; + } + } + fprintf(stderr,"*** done scanning, at offset %llu ***\n", ofs); + for ( i = 0; i < MAX_EMERG_BUCKETS; ++i) { + fprintf(stderr," for ObjectID: %d, max gen=%llu at %llu\n", i, best_gen[i], best_bytenr[i]); + } + + + free(buf); + +#if 1 + if (best_gen[BTRFS_CSUM_TREE_OBJECTID] == best_gen[BTRFS_ROOT_TREE_OBJECTID]) { + /* now we try to do the repair */ + fprintf(stderr, + " ATTEMPTING DANGEROUS REPAIR with root gen=%llu bytenr=%llu\n", + best_gen[BTRFS_ROOT_TREE_OBJECTID], best_bytenr[BTRFS_ROOT_TREE_OBJECTID]); + btrfs_set_super_generation(disk_super, best_gen[BTRFS_ROOT_TREE_OBJECTID]); + btrfs_set_super_root(disk_super, best_bytenr[BTRFS_ROOT_TREE_OBJECTID]); + + /* not actually changing the on-disk super. Debug should proceed, + fsck should eventually rewrite the super ?*/ + tree_root->fs_info->emergency_root_tree = 1; + return 0; + } +#endif + + return 1; /* nothing done */ +} + struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr, int writes) { @@ -736,7 +846,26 @@ struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr, tree_root->node = read_tree_block(tree_root, btrfs_super_root(disk_super), blocksize, generation); + if (!tree_root->node) { + fprintf(stderr,"*** did not find a root, about to abort ***\n"); + fprintf(stderr,"*** will attempt to find useful trees before bailing out anyway: ***\n"); + ret = try_emergency_tree_fixup(disk_super, chunk_root, tree_root); + if (!ret) { + fprintf(stderr,"a repair happened, trying again (once):\n"); + + generation = btrfs_super_generation(disk_super); + tree_root->node = read_tree_block(tree_root, + btrfs_super_root(disk_super), + blocksize, generation); + if (!tree_root->node) { + fprintf(stderr,"*** again, did not find a root, about to abort, for good. ***\n"); + } + } + } BUG_ON(!tree_root->node); + if (!tree_root->node) { + fprintf(stderr,"*** huh? ***\n"); + } ret = find_and_setup_root(tree_root, fs_info, BTRFS_EXTENT_TREE_OBJECTID, extent_root); BUG_ON(ret); @@ -774,12 +903,15 @@ struct btrfs_root *open_ctree_fd(int fp, const char *path, u64 sb_bytenr, int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr) { u8 fsid[BTRFS_FSID_SIZE]; + char up_fsid[37], up_ofsid[37]; struct btrfs_super_block buf; int i; int ret; u64 transid = 0; u64 bytenr; + + if (sb_bytenr != BTRFS_SUPER_INFO_OFFSET) { ret = pread64(fd, &buf, sizeof(buf), sb_bytenr); if (ret < sizeof(buf)) @@ -796,24 +928,63 @@ int btrfs_read_dev_super(int fd, struct btrfs_super_block *sb, u64 sb_bytenr) for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { bytenr = btrfs_sb_offset(i); + fprintf(stderr, "trying potential super #%d at bytenr %llu \n", + i, bytenr); + + ret = pread64(fd, &buf, sizeof(buf), bytenr); - if (ret < sizeof(buf)) + if (ret < sizeof(buf)) { + fprintf(stderr, " got only %d bytes instead of %lu\n", + ret, sizeof(buf)); break; + } - if (btrfs_super_bytenr(&buf) != bytenr || - strncmp((char *)(&buf.magic), BTRFS_MAGIC, - sizeof(buf.magic))) + + if (btrfs_super_bytenr(&buf) != bytenr) { + fprintf(stderr, " misplaced block thinks it's at %llu\n", + btrfs_super_bytenr(&buf)); + continue; + } + + if (strncmp((char *)(&buf.magic), BTRFS_MAGIC, + sizeof(buf.magic))) { + fprintf(stderr, " invalid magic\n"); continue; + } if (i == 0) memcpy(fsid, buf.fsid, sizeof(fsid)); - else if (memcmp(fsid, buf.fsid, sizeof(fsid))) + else if (memcmp(fsid, buf.fsid, sizeof(fsid))) { + uuid_unparse(fsid, up_fsid); + uuid_unparse(buf.fsid, up_ofsid); + fprintf(stderr, " wrong fsid %s expected %s \n", up_fsid, up_ofsid); + continue; + } + + if (btrfs_super_generation(&buf) < transid) { + fprintf(stderr, "super #%d at bytenr %llu has older generation %llu than %llu, skipping\n", + i, bytenr, btrfs_super_generation(&buf), transid); + continue; + } + + if (btrfs_super_generation(&buf) == transid) { + fprintf(stderr, "super #%d at bytenr %llu has same generation %llu than %llu, skipping\n", + i, bytenr, btrfs_super_generation(&buf), transid); - if (btrfs_super_generation(&buf) > transid) { - memcpy(sb, &buf, sizeof(*sb)); - transid = btrfs_super_generation(&buf); + if (memcmp(sb, &buf, sizeof(*sb))) { + fprintf(stderr, " warning: super #%d at bytenr %llu has different contents!\n", + i, bytenr); + } + continue; } + + /* btrfs_super_generation(&buf) > transid */ + fprintf(stderr, "super #%d at bytenr %llu has better generation %llu than %llu, using that\n", + i, bytenr, btrfs_super_generation(&buf), transid); + + memcpy(sb, &buf, sizeof(*sb)); + transid = btrfs_super_generation(&buf); } return transid > 0 ? 0 : -1; @@ -930,7 +1101,12 @@ static int close_all_devices(struct btrfs_fs_info *fs_info) struct list_head *next; struct btrfs_device *device; - return 0; + if (!fs_info->emergency_root_tree) { + /* huh? there was a "return 0" sitting here. Yes we leaked fd's. + Leaving it on when not doing funky desperate things. + */ + return 0; + } list = &fs_info->fs_devices->devices; list_for_each(next, list) { diff --git a/disk-io.h b/disk-io.h index 49e5692..0af98b4 100644 --- a/disk-io.h +++ b/disk-io.h @@ -64,6 +64,7 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); int btrfs_set_buffer_uptodate(struct extent_buffer *buf); int wait_on_tree_block_writeback(struct btrfs_root *root, struct extent_buffer *buf); +int write_all_supers(struct btrfs_root *root); u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, char *result);