From patchwork Sun Mar 17 14:51:16 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Yan, Zheng" X-Patchwork-Id: 2283741 Return-Path: X-Original-To: patchwork-ceph-devel@patchwork.kernel.org Delivered-To: patchwork-process-083081@patchwork2.kernel.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by patchwork2.kernel.org (Postfix) with ESMTP id 5BBD0E016C for ; Sun, 17 Mar 2013 15:06:04 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932615Ab3CQOwe (ORCPT ); Sun, 17 Mar 2013 10:52:34 -0400 Received: from mga03.intel.com ([143.182.124.21]:34328 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932610Ab3CQOwe (ORCPT ); Sun, 17 Mar 2013 10:52:34 -0400 Received: from azsmga002.ch.intel.com ([10.2.17.35]) by azsmga101.ch.intel.com with ESMTP; 17 Mar 2013 07:52:33 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.84,859,1355126400"; d="scan'208";a="215704303" Received: from unknown (HELO zyan5-mobl.ccr.corp.intel.com) ([10.255.20.118]) by AZSMGA002.ch.intel.com with ESMTP; 17 Mar 2013 07:52:30 -0700 From: "Yan, Zheng" To: ceph-devel@vger.kernel.org Cc: sage@inktank.com, greg@inktank.com, "Yan, Zheng" Subject: [PATCH 13/39] mds: don't send resolve message between active MDS Date: Sun, 17 Mar 2013 22:51:16 +0800 Message-Id: <1363531902-24909-14-git-send-email-zheng.z.yan@intel.com> X-Mailer: git-send-email 1.7.11.7 In-Reply-To: <1363531902-24909-1-git-send-email-zheng.z.yan@intel.com> References: <1363531902-24909-1-git-send-email-zheng.z.yan@intel.com> Sender: ceph-devel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: ceph-devel@vger.kernel.org From: "Yan, Zheng" When MDS cluster is resolving, current behavior is sending subtree resolve message to all other MDS and waiting for all other MDS' resolve message. The problem is that active MDS can have diffent subtree map due to rename. Besides gathering active MDS's resolve messages are also racy. The only function for these messages is disambiguate other MDS' import. We can replace it by import finish notification. Signed-off-by: Yan, Zheng Reviewed-by: Greg Farnum --- src/mds/MDCache.cc | 12 +++++++++--- src/mds/Migrator.cc | 25 +++++++++++++++++++++++-- src/mds/Migrator.h | 3 ++- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index c455a20..73c1d59 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -2517,7 +2517,8 @@ void MDCache::send_subtree_resolves() ++p) { if (*p == mds->whoami) continue; - resolves[*p] = new MMDSResolve; + if (mds->is_resolve() || mds->mdsmap->is_resolve(*p)) + resolves[*p] = new MMDSResolve; } // known @@ -2837,7 +2838,7 @@ void MDCache::handle_resolve(MMDSResolve *m) migrator->import_reverse(dir); } else { dout(7) << "ambiguous import succeeded on " << *dir << dendl; - migrator->import_finish(dir); + migrator->import_finish(dir, true); } my_ambiguous_imports.erase(p); // no longer ambiguous. } @@ -3432,7 +3433,12 @@ void MDCache::rejoin_send_rejoins() ++p) { CDir *dir = p->first; assert(dir->is_subtree_root()); - assert(!dir->is_ambiguous_dir_auth()); + if (dir->is_ambiguous_dir_auth()) { + // exporter is recovering, importer is survivor. + assert(rejoins.count(dir->authority().first)); + assert(!rejoins.count(dir->authority().second)); + continue; + } // my subtree? if (dir->is_auth()) diff --git a/src/mds/Migrator.cc b/src/mds/Migrator.cc index 5e53803..833df12 100644 --- a/src/mds/Migrator.cc +++ b/src/mds/Migrator.cc @@ -2088,6 +2088,23 @@ void Migrator::import_reverse(CDir *dir) } } +void Migrator::import_notify_finish(CDir *dir, set& bounds) +{ + dout(7) << "import_notify_finish " << *dir << dendl; + + for (set::iterator p = import_bystanders[dir].begin(); + p != import_bystanders[dir].end(); + ++p) { + MExportDirNotify *notify = + new MExportDirNotify(dir->dirfrag(), false, + pair(import_peer[dir->dirfrag()], mds->get_nodeid()), + pair(mds->get_nodeid(), CDIR_AUTH_UNKNOWN)); + for (set::iterator i = bounds.begin(); i != bounds.end(); i++) + notify->get_bounds().push_back((*i)->dirfrag()); + mds->send_message_mds(notify, *p); + } +} + void Migrator::import_notify_abort(CDir *dir, set& bounds) { dout(7) << "import_notify_abort " << *dir << dendl; @@ -2183,11 +2200,11 @@ void Migrator::handle_export_finish(MExportDirFinish *m) CDir *dir = cache->get_dirfrag(m->get_dirfrag()); assert(dir); dout(7) << "handle_export_finish on " << *dir << dendl; - import_finish(dir); + import_finish(dir, false); m->put(); } -void Migrator::import_finish(CDir *dir) +void Migrator::import_finish(CDir *dir, bool notify) { dout(7) << "import_finish on " << *dir << dendl; @@ -2205,6 +2222,10 @@ void Migrator::import_finish(CDir *dir) // remove pins set bounds; cache->get_subtree_bounds(dir, bounds); + + if (notify) + import_notify_finish(dir, bounds); + import_remove_pins(dir, bounds); map > cap_imports; diff --git a/src/mds/Migrator.h b/src/mds/Migrator.h index 7988f32..2889a74 100644 --- a/src/mds/Migrator.h +++ b/src/mds/Migrator.h @@ -273,12 +273,13 @@ protected: void import_reverse_unfreeze(CDir *dir); void import_reverse_final(CDir *dir); void import_notify_abort(CDir *dir, set& bounds); + void import_notify_finish(CDir *dir, set& bounds); void import_logged_start(dirfrag_t df, CDir *dir, int from, map &imported_client_map, map& sseqmap); void handle_export_finish(MExportDirFinish *m); public: - void import_finish(CDir *dir); + void import_finish(CDir *dir, bool notify); protected: void handle_export_caps(MExportCaps *m);