From patchwork Tue Jul 29 02:29:11 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Min Chen X-Patchwork-Id: 4638201 Return-Path: X-Original-To: patchwork-ceph-devel@patchwork.kernel.org Delivered-To: patchwork-parsemail@patchwork2.web.kernel.org Received: from mail.kernel.org (mail.kernel.org [198.145.19.201]) by patchwork2.web.kernel.org (Postfix) with ESMTP id CCB86C0338 for ; Tue, 29 Jul 2014 02:35:02 +0000 (UTC) Received: from mail.kernel.org (localhost [127.0.0.1]) by mail.kernel.org (Postfix) with ESMTP id 8FE6B20155 for ; Tue, 29 Jul 2014 02:35:01 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 729FC2015D for ; Tue, 29 Jul 2014 02:35:00 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752595AbaG2Ce6 (ORCPT ); Mon, 28 Jul 2014 22:34:58 -0400 Received: from m59-178.qiye.163.com ([123.58.178.59]:59491 "EHLO m59-178.qiye.163.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752528AbaG2Cez (ORCPT ); Mon, 28 Jul 2014 22:34:55 -0400 Received: from localhost.localdomain (unknown [202.197.9.8]) by m59-178.qiye.163.com (HMail) with ESMTPA id 2EA7A1481A98; Tue, 29 Jul 2014 10:29:20 +0800 (CST) From: Min Chen To: josh.durgin@inktank.com Cc: sage@inktank.com, ceph-devel@vger.kernel.org Subject: [PATCH 3/3] librbd: v3 copy-on-read for clones, write entire object into child asychronously Date: Tue, 29 Jul 2014 10:29:11 +0800 Message-Id: X-Mailer: git-send-email 1.7.10.4 In-Reply-To: References: In-Reply-To: References: X-HM-Spam-Status: e1koWUFPN1dZCBgUCR5ZQUpOVUNJQkJCQkJJSExLTUtOTldZCQ4XHghZQV koKz0kKDQ9Lz06MjckMjUkMz46Pz4pQUtVS0A2IyQiPigkMjUkMz46Pz4pQUtVS0ArLykkNTQkMj UkMz46Pz4pQUlVS0A*IjU6NjI4JDIrJDU0JDI1JDM#Oj8#KUFLVUtANi43LzIkKTgrLyQ*Mj09Pi k#NS8kMjUkMz46Pz4pQUlVS0AyKyRISyQ2MjUuLz4kODUvJEskTktBS1VLQDIrJC80PzoiJDg1Ly RLJEpLS0FLVUtAMiskSiQ2MjUuLz4kODUvJEskSktBS1VLQDIrJEokMzQuKSQ4NS8kSyRKS0tBS1 VLQDIrJE4kNjI1Li8#JDg1LyRLJEpLQUtVS0A1NC8kPTo2NC4oJD80NjoyNSQoKz0kPToyN0FKS1 VLQCguOSQ#QUpVTk5APTUkKC45JD41LDQpPygkMzcxJEpLS0lLSkFLVUlDWQY+ X-HM-Sender-Digest: e1kSHx4VD1lBWUc6ORA6Nyo*Pzo0LjpLKj8yOjQuOjQwChVVSlVKT0tN TUtLQk1KTkhDVTMWGhIXVRYSFRgTHhU7DhkOFQ8OEAIXEhVVGBQWRVlXWQweGVlBHRoXCB5XWQgB WUFCTEtPN1dZEgtZQVlJS0lVSkJMVUJVQ1kG Sender: ceph-devel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: ceph-devel@vger.kernel.org X-Spam-Status: No, score=-7.6 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=ham version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on mail.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP The object has been already saved in m_entire_object bufferlist. Send "copyup" request with m_entire_object, then cls_rbd method copyup will write the object data to child. AioCompletion is used to protect copyup aio operation, and remove it after copyup request finished. Add a function xlist::iterator:: item *get_cur() {return cur;} in include/xlist.h to support removing item in xlist. Signed-off-by: Min Chen Signed-off-by: Li Wang Signed-off-by: Yunchuan Wen --- src/include/xlist.h | 1 + src/librbd/AioRequest.cc | 33 ++++++++++++++++++++++ src/librbd/AioRequest.h | 1 + src/librbd/ImageCtx.cc | 68 ++++++++++++++++++++++++++++++++++++++++++++++ src/librbd/ImageCtx.h | 6 ++++ src/librbd/internal.cc | 4 +++ 6 files changed, 113 insertions(+) diff --git a/src/include/xlist.h b/src/include/xlist.h index 5384561..3932c40 100644 --- a/src/include/xlist.h +++ b/src/include/xlist.h @@ -157,6 +157,7 @@ public: return *this; } bool end() const { return cur == 0; } + item *get_cur() const { return cur; } }; iterator begin() { return iterator(_front); } diff --git a/src/librbd/AioRequest.cc b/src/librbd/AioRequest.cc index 767fa75..ee0a98e 100644 --- a/src/librbd/AioRequest.cc +++ b/src/librbd/AioRequest.cc @@ -71,6 +71,38 @@ namespace librbd { /** read **/ + //copy-on-read: after read entire object, just write it into child + ssize_t AioRead::write_cor() + { + ldout(m_ictx->cct, 20) << "write_cor" << dendl; + int ret = 0; + + m_ictx->snap_lock.get_read(); + ::SnapContext snapc = m_ictx->snapc; + m_ictx->snap_lock.put_read(); + + librados::ObjectWriteOperation copyup_cor; + copyup_cor.exec("rbd", "copyup", m_entire_object); + + std::vector m_snaps; + for (std::vector::const_iterator it = snapc.snaps.begin(); + it != snapc.snaps.end(); ++it) { + m_snaps.push_back(it->val); + } + + librados::AioCompletion *cor_completion = + librados::Rados::aio_create_completion(m_ictx, librbd::cor_completion_callback, NULL); + + xlist::item *comp = + new xlist::item(cor_completion); + + m_ictx->add_cor_completion(comp);//add cor_completion to xlist + //asynchronously write object + ret = m_ictx->md_ctx.aio_operate(m_oid, cor_completion, ©up_cor, snapc.seq.val, m_snaps); + + return ret; + } + bool AioRead::should_complete(int r) { ldout(m_ictx->cct, 20) << "should_complete " << this << " " << m_oid << " " << m_object_off << "~" << m_object_len @@ -128,6 +160,7 @@ namespace librbd { m_ictx->prune_parent_extents(image_extents, image_overlap); // copy the read range to m_read_data m_read_data.substr_of(m_entire_object, m_object_off, m_object_len); + write_cor(); } } diff --git a/src/librbd/AioRequest.h b/src/librbd/AioRequest.h index 42301a5..4e40024 100644 --- a/src/librbd/AioRequest.h +++ b/src/librbd/AioRequest.h @@ -75,6 +75,7 @@ namespace librbd { m_tried_parent(false), m_sparse(sparse) { } virtual ~AioRead() {} + ssize_t write_cor(); virtual bool should_complete(int r); virtual int send(); diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index 6477e8d..f74eafb 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -45,6 +45,7 @@ namespace librbd { snap_lock("librbd::ImageCtx::snap_lock"), parent_lock("librbd::ImageCtx::parent_lock"), refresh_lock("librbd::ImageCtx::refresh_lock"), + cor_lock("librbd::ImageCtx::cor_lock"), extra_read_flags(0), old_format(true), order(0), size(0), features(0), @@ -96,6 +97,7 @@ namespace librbd { object_set->return_enoent = true; object_cacher->start(); } + cor_completions = new xlist(); } ImageCtx::~ImageCtx() { @@ -112,6 +114,10 @@ namespace librbd { delete object_set; object_set = NULL; } + if (cor_completions) { + delete cor_completions; + cor_completions = NULL; + } delete[] format_string; } @@ -648,4 +654,66 @@ namespace librbd { << " from image extents " << objectx << dendl; return len; } + + void ImageCtx::add_cor_completion(xlist::item *comp) + { + if(!comp) + return; + + cor_lock.Lock(); + cor_completions->push_back(comp); + cor_lock.Unlock(); + + ldout(cct, 10) << "add_cor_completion:: size = "<< cor_completions->size() << dendl; + } + + void ImageCtx::wait_last_completions() + { + ldout(cct, 10) << "wait_last_completions:: cor_completions = " << cor_completions << " size = " << cor_completions->size() << dendl; + xlist::iterator itr; + xlist::item *ptr; + + while (!cor_completions->empty()){ + cor_lock.Lock(); + librados::AioCompletion *comp = cor_completions->front(); + comp->wait_for_complete(); + itr = cor_completions->begin(); + ptr = itr.get_cur(); + cor_completions->pop_front(); + delete ptr; + ptr = NULL; + cor_lock.Unlock(); + } + ldout(cct, 10) << "wait_last_completions:: after clear cor_completions = " << cor_completions << " size = " << cor_completions->size() << dendl; + } + + void cor_completion_callback(librados::completion_t aio_completion_impl, void *arg) + { + librbd::ImageCtx * ictx = (librbd::ImageCtx *)arg; + + ictx->cor_lock.Lock(); + xlist *completions = ictx->cor_completions; + ictx->cor_lock.Unlock(); + + ldout(ictx->cct, 10) << "cor_completion_callback:: cor_completions = " << completions << " size = "<< completions->size() << dendl; + if (!completions) + return; + + //find current AioCompletion item in xlist, and remove it + for (xlist::iterator itr = completions->begin(); !(itr.end()); ++itr) { + if (aio_completion_impl == (*itr)->pc){ + xlist::item *ptr = itr.get_cur(); + + ictx->cor_lock.Lock(); + completions->remove(ptr); + ictx->cor_lock.Unlock(); + + delete ptr;//delete xlist::item * + ptr = NULL; + break; + } + } + ldout(ictx->cct, 10) << "cor_completion_callback:: after remove item, size = " << completions->size() << dendl; + } + } diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h index 026a3e0..e1d08c9 100644 --- a/src/librbd/ImageCtx.h +++ b/src/librbd/ImageCtx.h @@ -68,6 +68,7 @@ namespace librbd { RWLock snap_lock; // protects snapshot-related member variables: RWLock parent_lock; // protects parent_md and parent Mutex refresh_lock; // protects refresh_seq and last_refresh + Mutex cor_lock; //protects cor_completions for copy-on-read unsigned extra_read_flags; @@ -89,6 +90,8 @@ namespace librbd { LibrbdWriteback *writeback_handler; ObjectCacher::ObjectSet *object_set; + xlist *cor_completions; //copy-on-read AioCompletions + /** * Either image_name or image_id must be set. * If id is not known, pass the empty std::string, @@ -148,7 +151,10 @@ namespace librbd { uint64_t prune_parent_extents(vector >& objectx, uint64_t overlap); + void add_cor_completion(xlist::item *comp); + void wait_last_completions();//wait for uncompleted asynchronous write which is still in xlist }; + void cor_completion_callback(librados::completion_t aio_completion_impl, void *arg); } #endif diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index 127be38..d676b77 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -2101,6 +2101,10 @@ reprotect_and_return_err: void close_image(ImageCtx *ictx) { ldout(ictx->cct, 20) << "close_image " << ictx << dendl; + + if (ictx->cor_completions) + ictx->wait_last_completions();//copy-on-read: wait for unfinished AioCompletion requests + if (ictx->object_cacher) ictx->shutdown_cache(); // implicitly flushes else