From patchwork Fri Aug 17 20:11:31 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alexandre Oliva X-Patchwork-Id: 1339571 Return-Path: X-Original-To: patchwork-ceph-devel@patchwork.kernel.org Delivered-To: patchwork-process-083081@patchwork2.kernel.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by patchwork2.kernel.org (Postfix) with ESMTP id 451BEDF280 for ; Fri, 17 Aug 2012 20:11:55 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1758796Ab2HQULy (ORCPT ); Fri, 17 Aug 2012 16:11:54 -0400 Received: from lsd-gw.ic.unicamp.br ([143.106.7.165]:45060 "EHLO boneca.lsd.ic.unicamp.br" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1755665Ab2HQULx (ORCPT ); Fri, 17 Aug 2012 16:11:53 -0400 Received: from freie (gw-to-emilia.oliva.athome.lsd.ic.unicamp.br [172.31.160.17] (may be forged)) by boneca.lsd.ic.unicamp.br (8.14.5/8.14.5) with ESMTP id q7HKBfIp006496; Fri, 17 Aug 2012 17:11:44 -0300 Received: from livre.localdomain (livre-to-gw.oliva.athome.lsd.ic.unicamp.br [172.31.160.19]) by freie (8.14.5/8.14.5) with ESMTP id q7HKBc39026764; Fri, 17 Aug 2012 17:11:38 -0300 Received: from livre.localdomain (aoliva@localhost.localdomain [127.0.0.1]) by livre.localdomain (8.14.3/8.14.3/Debian-5+lenny1) with ESMTP id q7HKBbr7016417; Fri, 17 Aug 2012 17:11:37 -0300 Received: (from aoliva@localhost) by livre.localdomain (8.14.3/8.14.3/Submit) id q7HKBVrb016413; Fri, 17 Aug 2012 17:11:31 -0300 X-Authentication-Warning: livre.localdomain: aoliva set sender to oliva@lsd.ic.unicamp.br using -f From: Alexandre Oliva To: Sage Weil Cc: ceph-devel@vger.kernel.org Subject: Re: Preserve dir default_file_layout in encoded inode Organization: Free thinker, not speaking for University of Campinas References: Date: Fri, 17 Aug 2012 17:11:31 -0300 In-Reply-To: (Alexandre Oliva's message of "Fri, 17 Aug 2012 02:51:45 -0300") Message-ID: User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/23.1 (gnu/linux) MIME-Version: 1.0 Sender: ceph-devel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: ceph-devel@vger.kernel.org On Aug 17, 2012, Alexandre Oliva wrote: > On Aug 16, 2012, Sage Weil wrote: >> I've dusted it off and repushed it.. can you take a look? >> wip-mds-layout. > The patch looks good, but testing it is going to be a bit of a challenge > because of the on-disk format change and the current unavailability of > cluster snapshots. I'll figure something out, once I'm done with an > ongoing cluster operation. I gather some of the changes would have required additional versioning changes to avoid a fully incompatible format change (as in, failure to decode properly an existing filesystem). So I ended up applying this patch on top of yours, to keep the on-disk format unchanged, so I could roll back to the prior format. The combination of the patches is something I'd be comfortable applying in a stable branch. I haven't yet completed testing (getting some layouts flushed from the cache and loaded back in), but it's looking good so far. From d1bae416891d5fa0289dcad4565365892b062e42 Mon Sep 17 00:00:00 2001 From: Alexandre Oliva Date: Fri, 17 Aug 2012 16:26:05 -0300 Subject: mds: restore on-disk format for dir layouts Revert on-disk and protocol changes introduced in 37485df86984. Signed-off-by: Alexandre Oliva --- src/mds/CInode.cc | 33 +++++++++++++++++++++------------ src/mds/CInode.h | 40 +++++++++++++++++++++++++++++++++++----- src/mds/MDS.h | 2 +- src/mds/events/EMetaBlob.h | 23 ++++++++++++++++------- 4 files changed, 73 insertions(+), 25 deletions(-) diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index 2b29f01..53f9e69 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -1100,14 +1100,12 @@ void CInode::encode_lock_state(int type, bufferlist& bl) case CEPH_LOCK_IFILE: if (is_auth()) { + ::encode(inode.layout, bl); + ::encode(inode.size, bl); ::encode(inode.mtime, bl); ::encode(inode.atime, bl); ::encode(inode.time_warp_seq, bl); - if (!is_dir()) { - ::encode(inode.layout, bl); - ::encode(inode.size, bl); - ::encode(inode.client_ranges, bl); - } + ::encode(inode.client_ranges, bl); } else { bool dirty = filelock.is_dirty(); ::encode(dirty, bl); @@ -1189,7 +1187,13 @@ void CInode::encode_lock_state(int type, bufferlist& bl) case CEPH_LOCK_IPOLICY: if (inode.is_dir()) { - ::encode(inode.layout, bl); + bool has_layout = inode.has_layout(); + ::encode(has_layout, bl); + if (has_layout) { + __u8 lv = 1; + ::encode(lv, bl); + ::encode(inode.layout, bl); + } } break; @@ -1283,14 +1287,12 @@ void CInode::decode_lock_state(int type, bufferlist& bl) case CEPH_LOCK_IFILE: if (!is_auth()) { + ::decode(inode.layout, p); + ::decode(inode.size, p); ::decode(inode.mtime, p); ::decode(inode.atime, p); ::decode(inode.time_warp_seq, p); - if (!is_dir()) { - ::decode(inode.layout, p); - ::decode(inode.size, p); - ::decode(inode.client_ranges, p); - } + ::decode(inode.client_ranges, p); } else { bool replica_dirty; ::decode(replica_dirty, p); @@ -1432,7 +1434,14 @@ void CInode::decode_lock_state(int type, bufferlist& bl) case CEPH_LOCK_IPOLICY: if (inode.is_dir()) { - ::decode(inode.layout, p); + bool has_layout; + ::decode(has_layout, p); + if (has_layout) { + __u8 lv; + ::decode(lv, p); + assert(lv == 1); + ::decode(inode.layout, p); + } } break; diff --git a/src/mds/CInode.h b/src/mds/CInode.h index d4c0aa8..0b74b64 100644 --- a/src/mds/CInode.h +++ b/src/mds/CInode.h @@ -530,7 +530,7 @@ private: unsigned encode_parent_mutation(ObjectOperation& m); void encode_store(bufferlist& bl) { - __u8 struct_v = 3; + __u8 struct_v = 2; ::encode(struct_v, bl); ::encode(inode, bl); if (is_symlink()) @@ -541,6 +541,15 @@ private: encode_snap_blob(snapbl); ::encode(snapbl, bl); ::encode(old_inodes, bl); + if (struct_v == 2 && inode.is_dir()) { // FIXME: this is redundant + bool has_layout = inode.has_layout(); + ::encode(has_layout, bl); + if (has_layout) { + __u8 lv = 1; + ::encode(lv, bl); + ::encode(inode.layout, bl); + } + } } void decode_store(bufferlist::iterator& bl) { __u8 struct_v; @@ -555,10 +564,12 @@ private: decode_snap_blob(snapbl); ::decode(old_inodes, bl); if (struct_v == 2 && inode.is_dir()) { - bool default_layout_exists; - ::decode(default_layout_exists, bl); - if (default_layout_exists) { - ::decode(struct_v, bl); + bool has_layout; + ::decode(has_layout, bl); + if (has_layout) { + __u8 lv; + ::decode(lv, bl); + assert (lv == 1); ::decode(inode.layout, bl); } } @@ -576,6 +587,15 @@ private: _encode_base(bl); _encode_locks_state_for_replica(bl); + if (inode.is_dir()) { // FIXME: this is redundant + bool has_layout = inode.has_layout(); + ::encode(has_layout, bl); + if (has_layout) { + __u8 lv = 1; + ::encode(lv, bl); + ::encode(inode.layout, bl); + } + } } void decode_replica(bufferlist::iterator& p, bool is_new) { __u32 nonce; @@ -584,6 +604,16 @@ private: _decode_base(p); _decode_locks_state(p, is_new); + if (inode.is_dir()) { // FIXME: this is redundant + bool has_layout; + ::decode(has_layout, p); + if (has_layout) { + __u8 lv; + ::decode(lv, p); + assert(lv == 1); + ::decode(inode.layout, p); + } + } } diff --git a/src/mds/MDS.h b/src/mds/MDS.h index 71106c3..67a5102 100644 --- a/src/mds/MDS.h +++ b/src/mds/MDS.h @@ -35,7 +35,7 @@ #include "SessionMap.h" -#define CEPH_MDS_PROTOCOL 15 /* cluster internal */ +#define CEPH_MDS_PROTOCOL 14 /* cluster internal */ enum { diff --git a/src/mds/events/EMetaBlob.h b/src/mds/events/EMetaBlob.h index f63266e..38131d6 100644 --- a/src/mds/events/EMetaBlob.h +++ b/src/mds/events/EMetaBlob.h @@ -97,6 +97,15 @@ public: if (i.is_dir()) { ::encode(dft, _enc); ::encode(sbl, _enc); + if (struct_v >= 2 && struct_v <= 3) { // FIXME: this is redundant + bool has_layout = i.has_layout(); + ::encode(has_layout, _enc); + if (has_layout) { + __u8 lv = 1; + ::encode(lv, _enc); + ::encode(inode.layout, _enc); + } + } } ::encode(dr, _enc); ::encode(oi ? true : false, _enc); @@ -110,8 +119,6 @@ public: ~fullbit() {} void encode(bufferlist& bl) const { - __u8 struct_v = 3; - ::encode(struct_v, bl); assert(_enc.length()); bl.append(_enc); } @@ -129,11 +136,13 @@ public: if (inode.is_dir()) { ::decode(dirfragtree, bl); ::decode(snapbl, bl); - if (struct_v == 2) { - bool dir_layout_exists; - ::decode(dir_layout_exists, bl); - if (dir_layout_exists) { - ::decode(struct_v, bl); + if (struct_v >= 2 && struct_v <= 3) { + bool has_layout; + ::decode(has_layout, bl); + if (has_layout) { + __u8 lv; + ::decode(lv, bl); + assert(lv == 1); ::decode(inode.layout, bl); } }