From patchwork Mon Mar 18 09:15:31 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Dongsheng Yang X-Patchwork-Id: 10857027 Return-Path: Received: from mail.wl.linuxfoundation.org (pdx-wl-mail.web.codeaurora.org [172.30.200.125]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 3B5CC6C2 for ; Mon, 18 Mar 2019 09:25:22 +0000 (UTC) Received: from mail.wl.linuxfoundation.org (localhost [127.0.0.1]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 1D96B29117 for ; Mon, 18 Mar 2019 09:25:22 +0000 (UTC) Received: by mail.wl.linuxfoundation.org (Postfix, from userid 486) id 10D9829142; Mon, 18 Mar 2019 09:25:22 +0000 (UTC) X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on pdx-wl-mail.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-7.9 required=2.0 tests=BAYES_00,MAILING_LIST_MULTI, RCVD_IN_DNSWL_HI autolearn=ham version=3.3.1 Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.wl.linuxfoundation.org (Postfix) with ESMTP id 4F19A29117 for ; Mon, 18 Mar 2019 09:25:21 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727247AbfCRJZT (ORCPT ); Mon, 18 Mar 2019 05:25:19 -0400 Received: from m97134.mail.qiye.163.com ([220.181.97.134]:55481 "EHLO m97134.mail.qiye.163.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726691AbfCRJZT (ORCPT ); Mon, 18 Mar 2019 05:25:19 -0400 Received: from atest-guest.localdomain (unknown [218.94.118.90]) by smtp5 (Coremail) with SMTP id huCowABHr4m4YY9cO8zEAg--.331S15; Mon, 18 Mar 2019 17:15:42 +0800 (CST) From: Dongsheng Yang To: idryomov@gmail.com, jdillama@redhat.com, sage@redhat.com, elder@kernel.org Cc: ceph-devel@vger.kernel.org, Dongsheng Yang Subject: [PATCH v2 13/16] rbd: introduce journal in rbd_device Date: Mon, 18 Mar 2019 05:15:31 -0400 Message-Id: <1552900534-29026-14-git-send-email-dongsheng.yang@easystack.cn> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1552900534-29026-1-git-send-email-dongsheng.yang@easystack.cn> References: <1552900534-29026-1-git-send-email-dongsheng.yang@easystack.cn> X-CM-TRANSID: huCowABHr4m4YY9cO8zEAg--.331S15 X-Coremail-Antispam: 1Uf129KBjvJXoW3AryxuF18Xr17AF4DJr17Awb_yoWfZFW3pF WDJFyFkrWUZr17W3yxXFs8ArWjqa40y34DWr9Ik3s7K3Z3JrZxta4IkFyDJrW7AFyUCa1k Jr45Jw4UC3yUKrDanT9S1TB71UUUUUJqnTZGkaVYY2UrUUUUjbIjqfuFe4nvWSU5nxnvy2 9KBjDUYxBIdaVFxhVjvjDU0xZFpf9x0JbGT5JUUUUU= X-Originating-IP: [218.94.118.90] X-CM-SenderInfo: 5grqw2pkhqwhp1dqwq5hdv52pwdfyhdfq/1tbihx97eltVf3Z3hAAAst Sender: ceph-devel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: ceph-devel@vger.kernel.org X-Virus-Scanned: ClamAV using ClamSMTP This commit introduce rbd_journal into rbd_device. with journaling feature enabled, We will open journal after exclusive-lock acquired and close journal before exclusive-lock released. Signed-off-by: Dongsheng Yang --- drivers/block/rbd.c | 237 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 237 insertions(+) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index cc0642c..bd90c17 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -28,16 +28,19 @@ */ +#include #include #include #include #include #include #include +#include #include #include #include +#include #include #include #include @@ -378,6 +381,8 @@ struct rbd_device { atomic_t parent_ref; struct rbd_device *parent; + struct rbd_journal *journal; + /* Block layer tags. */ struct blk_mq_tag_set tag_set; @@ -408,6 +413,22 @@ enum rbd_dev_flags { RBD_DEV_FLAG_BLACKLISTED, /* our ceph_client is blacklisted */ }; +#define LOCAL_MIRROR_UUID "" +#define LOCAL_CLIENT_ID "" + +enum rbd_journal_state { + RBD_JOURNAL_STATE_INITIALIZED, + RBD_JOURNAL_STATE_OPENED, + RBD_JOURNAL_STATE_CLOSED, +}; + +struct rbd_journal { + struct ceph_journaler *journaler; + uint64_t tag_tid; + /* state is protected by rbd_dev->lock_rwsem */ + enum rbd_journal_state state; +}; + static DEFINE_MUTEX(client_mutex); /* Serialize client creation */ static LIST_HEAD(rbd_dev_list); /* devices */ @@ -2681,6 +2702,7 @@ static void __rbd_lock(struct rbd_device *rbd_dev, const char *cookie) /* * lock_rwsem must be held for write */ +static int rbd_dev_open_journal(struct rbd_device *rbd_dev); static int rbd_lock(struct rbd_device *rbd_dev) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; @@ -2697,6 +2719,15 @@ static int rbd_lock(struct rbd_device *rbd_dev) if (ret) return ret; + if (rbd_dev->header.features & RBD_FEATURE_JOURNALING) { + ret = rbd_dev_open_journal(rbd_dev); + if (ret) { + rbd_warn(rbd_dev, "open journal failed: %d", ret); + set_disk_ro(rbd_dev->disk, true); + set_bit(RBD_DEV_FLAG_BLACKLISTED, &rbd_dev->flags); + return -EBLACKLISTED; + } + } rbd_dev->lock_state = RBD_LOCK_STATE_LOCKED; __rbd_lock(rbd_dev, cookie); return 0; @@ -2705,6 +2736,7 @@ static int rbd_lock(struct rbd_device *rbd_dev) /* * lock_rwsem must be held for write */ +static void rbd_journal_close(struct rbd_journal *journal); static void rbd_unlock(struct rbd_device *rbd_dev) { struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; @@ -2713,6 +2745,9 @@ static void rbd_unlock(struct rbd_device *rbd_dev) WARN_ON(!__rbd_is_lock_owner(rbd_dev) || rbd_dev->lock_cookie[0] == '\0'); + if (rbd_dev->journal) + rbd_journal_close(rbd_dev->journal); + ret = ceph_cls_unlock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc, RBD_LOCK_NAME, rbd_dev->lock_cookie); if (ret && ret != -ENOENT) @@ -5750,6 +5785,207 @@ static int rbd_dev_header_name(struct rbd_device *rbd_dev) return ret; } +static int rbd_journal_allocate_tag(struct rbd_journal *journal); +static int rbd_journal_open(struct rbd_journal *journal) +{ + struct ceph_journaler *journaler = journal->journaler; + int ret = 0; + + ret = ceph_journaler_open(journaler); + if (ret) + goto out; + + ret = ceph_journaler_start_replay(journaler); + if (ret) + goto err_close_journaler; + + ret = rbd_journal_allocate_tag(journal); + if (ret) + goto err_close_journaler; + + journal->state = RBD_JOURNAL_STATE_OPENED; + return ret; + +err_close_journaler: + ceph_journaler_close(journaler); + +out: + return ret; +} + +static int rbd_dev_open_journal(struct rbd_device *rbd_dev) +{ + int ret = 0; + struct rbd_journal *journal = NULL; + struct ceph_journaler *journaler = NULL; + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + + if (rbd_dev->journal && rbd_dev->journal->state == RBD_JOURNAL_STATE_OPENED) + return 0; + + // create journal + if (!rbd_dev->journal) { + journal = kzalloc(sizeof(struct rbd_journal), GFP_KERNEL); + if (!journal) + return -ENOMEM; + + journal->state = RBD_JOURNAL_STATE_INITIALIZED; + journaler = ceph_journaler_create(osdc, &rbd_dev->header_oloc, + rbd_dev->spec->image_id, + LOCAL_CLIENT_ID); + if (!journaler) { + ret = -ENOMEM; + goto err_free_journal; + } + + journaler->entry_handler = rbd_dev; + journaler->handle_entry = rbd_journal_replay; + + journal->journaler = journaler; + rbd_dev->journal = journal; + } + + // open journal + ret = rbd_journal_open(rbd_dev->journal); + if (ret) + goto err_destroy_journaler; + + return ret; + +err_destroy_journaler: + ceph_journaler_destroy(journaler); +err_free_journal: + kfree(rbd_dev->journal); + rbd_dev->journal = NULL; + return ret; +} + +static void rbd_journal_close(struct rbd_journal *journal) +{ + if (journal->state == RBD_JOURNAL_STATE_CLOSED) + return; + ceph_journaler_close(journal->journaler); + journal->tag_tid = 0; + journal->state = RBD_JOURNAL_STATE_CLOSED; +} + +static void rbd_dev_close_journal(struct rbd_device *rbd_dev) +{ + struct ceph_journaler *journaler = NULL; + + if (!rbd_dev->journal) + return; + + rbd_journal_close(rbd_dev->journal); + + journaler = rbd_dev->journal->journaler; + ceph_journaler_destroy(journaler); + kfree(rbd_dev->journal); + rbd_dev->journal = NULL; +} + +typedef struct rbd_journal_tag_predecessor { + bool commit_valid; + uint64_t tag_tid; + uint64_t entry_tid; + uint32_t uuid_len; + char *mirror_uuid; +} rbd_journal_tag_predecessor; + +typedef struct rbd_journal_tag_data { + struct rbd_journal_tag_predecessor predecessor; + uint32_t uuid_len; + char *mirror_uuid; +} rbd_journal_tag_data; + +static uint32_t tag_data_encoding_size(struct rbd_journal_tag_data *tag_data) +{ + // sizeof(uuid_len) 4 + uuid_len + 1 commit_valid + 8 tag_tid + + // 8 entry_tid + 4 sizeof(uuid_len) + uuid_len + return (4 + tag_data->uuid_len + 1 + 8 + 8 + 4 + + tag_data->predecessor.uuid_len); +} + +static void predecessor_encode(void **p, void *end, + struct rbd_journal_tag_predecessor *predecessor) +{ + ceph_encode_string(p, end, predecessor->mirror_uuid, + predecessor->uuid_len); + ceph_encode_8(p, predecessor->commit_valid); + ceph_encode_64(p, predecessor->tag_tid); + ceph_encode_64(p, predecessor->entry_tid); +} + +static int rbd_journal_encode_tag_data(void **p, void *end, + struct rbd_journal_tag_data *tag_data) +{ + struct rbd_journal_tag_predecessor *predecessor = &tag_data->predecessor; + + ceph_encode_string(p, end, tag_data->mirror_uuid, tag_data->uuid_len); + predecessor_encode(p, end, predecessor); + + return 0; +} + +static int rbd_journal_allocate_tag(struct rbd_journal *journal) +{ + struct ceph_journaler_tag tag = {}; + struct rbd_journal_tag_data tag_data = {}; + struct ceph_journaler *journaler = journal->journaler; + struct ceph_journaler_client *client; + struct rbd_journal_tag_predecessor *predecessor; + struct ceph_journaler_object_pos *position; + void *orig_buf = NULL, *buf = NULL, *p = NULL, *end = NULL; + uint32_t buf_len; + int ret = 0; + + ret = ceph_journaler_get_cached_client(journaler, LOCAL_CLIENT_ID, &client); + if (ret) + goto out; + + predecessor = &tag_data.predecessor; + position = list_first_entry(&client->object_positions, + struct ceph_journaler_object_pos, node); + + predecessor->commit_valid = true; + predecessor->tag_tid = position->tag_tid; + predecessor->entry_tid = position->entry_tid; + predecessor->uuid_len = 0; + predecessor->mirror_uuid = LOCAL_MIRROR_UUID; + + tag_data.uuid_len = 0; + tag_data.mirror_uuid = LOCAL_MIRROR_UUID; + + buf_len = tag_data_encoding_size(&tag_data); + + p = kmalloc(buf_len, GFP_KERNEL); + if (!p) { + pr_err("failed to allocate tag data"); + return -ENOMEM; + } + + end = p + buf_len; + orig_buf = buf = p; + ret = rbd_journal_encode_tag_data(&p, end, &tag_data); + if (ret) { + pr_err("error in tag data"); + goto free_buf; + } + + ret = ceph_journaler_allocate_tag(journaler, 0, buf, buf_len, &tag); + if (ret) + goto free_data; + + journal->tag_tid = tag.tid; +free_data: + if(tag.data) + kfree(tag.data); +free_buf: + kfree(orig_buf); +out: + return ret; +} + static void rbd_dev_image_release(struct rbd_device *rbd_dev) { rbd_dev_unprobe(rbd_dev); @@ -6074,6 +6310,7 @@ static ssize_t do_rbd_remove(struct bus_type *bus, device_del(&rbd_dev->dev); rbd_dev_image_unlock(rbd_dev); + rbd_dev_close_journal(rbd_dev); rbd_dev_device_release(rbd_dev); rbd_dev_image_release(rbd_dev); rbd_dev_destroy(rbd_dev);