diff mbox

[01/33] TCMU PR: first commit to implement TCMU PR

Message ID bdebb349-672c-9bae-0cca-f8ee7b8eaf7e@suse.de (mailing list archive)
State New, archived
Headers show

Commit Message

Zhu Lingshan June 15, 2018, 6:32 p.m. UTC
Hi,

I am replying my own thread, if you want to test these commits, maybe 
you need my tcmu-runner patch, I have attached, it works fine with PR 
register, reserve, clear, release and preempt. Or you can use my repo 
directly: https://github.com/ls-zhu/tcmu-runner/tree/rbd_v2_pr

It only works for Ceph RBD as backend for now, because other TCMU 
storage like qcow or a file does not have something like metadata. If we 
want such support for devices other than RBD, we may change their 
handler in tcmu-runner, like allocate some sectors works like metadata. 
I know gluster may support this, we can implement it after kernel side 
stabilized.

Thanks,
BR
Zhu Lingshan


On 2018/6/16 2:23, Zhu Lingshan wrote:
> These commits and the following intend to implement Persistent
> Reservation operations for TCMU devices.
>
> This series of commits would implement such PR operations:
> PR_Out_Register, PR_Out_Reserve, PR_Out_Clear, PR_Out_Preempt,
> PR_Out_Release and PR_In_ReadKeys.
>
> Next wave of patches will contain the other PR operations.
>
> This patch added a struct tcmu_pr_info to store PR information
> for the handling functions, added command codes and attrs for
> netlink interfaces.
>
> Design note:
> In order to get consistent Persistent Reservation results from
> multiple targets hosting the same TCMU device(like Ceph RBD),
> this solution stores a string on the device itself(like RBD metadata).
>
> Everytime when kernel receive a PR request against a TCMU device,
> it will query this string(a netlink attr carried by a netlink cmd).
> Then decide whether the PR request should be performed, after
> processing, it will update this string.
>
> For example:
> When receive a PR Reserve request, kernel will send a netlink
> message to tcmu-runner, try to get the string, tcmu-runner will
> response, send the PR info string to kernel. Then kernel will
> decode the string, find information like key, reservation holder,
> then process this request. After processing, it will update the
> string, send the updated string to tcmu-runner, so that tcmu-runner
> will write it back to the device(like RBD metadata).
>
> So we make the device itself as a "single" response point, (with
> locks protection) we will get a consistent result even more than one
> initiators sending multiple PR requests via multiple targets.
>
> Signed-off-by: Zhu Lingshan <lszhu@suse.com>
> ---
>   include/uapi/linux/target_core_user.h | 19 +++++++++++++++++++
>   1 file changed, 19 insertions(+)
>
> diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
> index 0be80f72646b..2d5c3e55d3f8 100644
> --- a/include/uapi/linux/target_core_user.h
> +++ b/include/uapi/linux/target_core_user.h
> @@ -132,9 +132,13 @@ enum tcmu_genl_cmd {
>   	TCMU_CMD_ADDED_DEVICE,
>   	TCMU_CMD_REMOVED_DEVICE,
>   	TCMU_CMD_RECONFIG_DEVICE,
> +	TCMU_CMD_GET_PR_INFO,
> +	TCMU_CMD_SET_PR_INFO,
>   	TCMU_CMD_ADDED_DEVICE_DONE,
>   	TCMU_CMD_REMOVED_DEVICE_DONE,
>   	TCMU_CMD_RECONFIG_DEVICE_DONE,
> +	TCMU_CMD_GET_PR_INFO_DONE,
> +	TCMU_CMD_SET_PR_INFO_DONE,
>   	TCMU_CMD_SET_FEATURES,
>   	__TCMU_CMD_MAX,
>   };
> @@ -151,8 +155,23 @@ enum tcmu_genl_attr {
>   	TCMU_ATTR_CMD_STATUS,
>   	TCMU_ATTR_DEVICE_ID,
>   	TCMU_ATTR_SUPP_KERN_CMD_REPLY,
> +	TCMU_ATTR_PR_INFO,
>   	__TCMU_ATTR_MAX,
>   };
>   #define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1)
>   
> +/* This struct help to store the Persistent Reservation which we
> + * are handling, it is encoded from or decoded to the string buffer in
> + * "struct tcmu_dev_pr_info"
> + */
> +struct tcmu_pr_info {
> +	u32 vers;		/* on disk format version number */
> +	u32 seq;		/* sequence number bumped every xattr write */
> +	struct tcmu_scsi2_rsv *scsi2_rsv; /* SCSI2 reservation if any */
> +	u32 gen;		/* PR generation number */
> +	struct tcmu_pr_rsv *rsv;	/* SCSI3 reservation if any */
> +	u32 num_regs;		/* number of registrations */
> +	struct list_head regs;	/* list of registrations */
> +};
> +
>   #endif
From 6a7029f03d092a86c9126bb4a72ab7c44b5abd6c Mon Sep 17 00:00:00 2001
From: Zhu Lingshan <lszhu@suse.com>
Date: Fri, 11 May 2018 16:54:51 +0800
Subject: [PATCH] pr_out_register and pr_in_readkeys can work

Signed-off-by: Zhu Lingshan <lszhu@suse.com>
---
 libtcmu.c                | 98 +++++++++++++++++++++++++++++++++++++++-
 libtcmu.h                |  4 ++
 main.c                   |  2 +
 rbd.c                    | 36 +++++++++++++++
 target_core_user_local.h |  7 ++-
 tcmu-runner.h            |  3 ++
 6 files changed, 147 insertions(+), 3 deletions(-)
diff mbox

Patch

diff --git a/libtcmu.c b/libtcmu.c
index d413020..826f482 100644
--- a/libtcmu.c
+++ b/libtcmu.c
@@ -84,6 +84,20 @@  static struct genl_cmd tcmu_cmds[] = {
 		.c_maxattr	= TCMU_ATTR_MAX,
 		.c_attr_policy	= tcmu_attr_policy,
 	},
+	{
+		.c_id		= TCMU_CMD_GET_PR_INFO,
+		.c_name		= "GET PR_INFO",
+		.c_msg_parser	= handle_netlink,
+		.c_maxattr	= TCMU_ATTR_MAX,
+		.c_attr_policy	= tcmu_attr_policy,
+	},
+	{
+		.c_id		= TCMU_CMD_SET_PR_INFO,
+		.c_name		= "SET PR_INFO",
+		.c_msg_parser	= handle_netlink,
+		.c_maxattr	= TCMU_ATTR_MAX,
+		.c_attr_policy	= tcmu_attr_policy,
+	},
 };
 
 static struct genl_ops tcmu_ops = {
@@ -93,7 +107,7 @@  static struct genl_ops tcmu_ops = {
 };
 
 static int send_netlink_reply(struct tcmulib_context *ctx, int reply_cmd,
-			      uint32_t dev_id, int status)
+			      uint32_t dev_id, int status, char *data)
 {
 	struct nl_sock *sock = ctx->nl_sock;
 	struct nl_msg *msg;
@@ -117,6 +131,12 @@  static int send_netlink_reply(struct tcmulib_context *ctx, int reply_cmd,
 	if (ret < 0)
 		goto free_msg;
 
+	if (reply_cmd == TCMU_CMD_GET_PR_INFO_DONE) {
+		ret = nla_put_string(msg, TCMU_ATTR_PR_INFO, data);
+		if (ret < 0)
+			goto free_msg;
+	}
+
 	/* Ignore ack. There is nothing we can do. */
 	ret = nl_send_auto(sock, msg);
 free_msg:
@@ -197,12 +217,77 @@  static int reconfig_device(struct tcmulib_context *ctx, char *dev_name,
 	return 0;
 }
 
+
+static int set_pr_info(struct tcmulib_context *ctx, char *dev_name,
+		       struct genl_info *info)
+{
+	struct tcmu_device *dev;
+	int i, ret;
+	char *pr_info_str = NULL;
+
+	dev = lookup_dev_by_name(ctx, dev_name, &i);
+	if (!dev) {
+		tcmu_err("Could not set PR info, device %s: not found.\n",
+			 dev_name);
+		return -ENODEV;
+	}
+
+	if (!dev->handler->set_pr_info) {
+		tcmu_dev_err(dev, "Setting PR info is not supported with this device.\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (info->attrs[TCMU_ATTR_PR_INFO]) {
+		pr_info_str = nla_get_string(info->attrs[TCMU_ATTR_PR_INFO]);
+	}
+	else {
+		tcmu_dev_err(dev, "Failed to obtain PR info from netlink.\n");
+		return -ENODATA;
+	}
+
+	ret = dev->handler->set_pr_info(dev, pr_info_str);
+	if (ret < 0) {
+		tcmu_dev_err(dev, "Failed to store PR info with error %d.\n",
+			     ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int get_pr_info(struct tcmulib_context *ctx, char *dev_name, char **buf)
+{
+	struct tcmu_device *dev;
+	int i, ret;
+
+	dev = lookup_dev_by_name(ctx, dev_name, &i);
+	if (!dev) {
+		tcmu_err("Could not get PR info, device %s: not found.\n", dev_name);
+		return -ENODEV;
+	}
+
+	if (!dev->handler->get_pr_info) {
+		tcmu_dev_err(dev, "Getting PR info is not supported with this device.\n");
+		return -EOPNOTSUPP;
+	}
+
+
+	ret = dev->handler->get_pr_info(dev, buf);
+	if (ret < 0) {
+		tcmu_dev_err(dev, "Failed to get PR info with error %d.\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
 static int handle_netlink(struct nl_cache_ops *unused, struct genl_cmd *cmd,
 			  struct genl_info *info, void *arg)
 {
 	struct tcmulib_context *ctx = arg;
 	int ret, reply_cmd, version = info->genlhdr->version;
 	char buf[32];
+	char *data = NULL;
 
 	tcmu_dbg("cmd %d. Got header version %d. Supported %d.\n",
 		 cmd->c_id, info->genlhdr->version, TCMU_NL_VERSION);
@@ -237,6 +322,15 @@  static int handle_netlink(struct nl_cache_ops *unused, struct genl_cmd *cmd,
 		reply_cmd = TCMU_CMD_RECONFIG_DEVICE_DONE;
 		ret = reconfig_device(ctx, buf, info);
 		break;
+	case TCMU_CMD_GET_PR_INFO:
+		reply_cmd = TCMU_CMD_GET_PR_INFO_DONE;
+		ret = get_pr_info(ctx, buf, &data);
+		break;
+	case TCMU_CMD_SET_PR_INFO:
+		reply_cmd = TCMU_CMD_SET_PR_INFO_DONE;
+		ret = set_pr_info(ctx, buf, info);
+		break;
+
 	default:
 		tcmu_err("Unknown netlink command %d. Netlink header received version %d. libtcmu supports %d\n",
 			 cmd->c_id, version, TCMU_NL_VERSION);
@@ -246,7 +340,7 @@  static int handle_netlink(struct nl_cache_ops *unused, struct genl_cmd *cmd,
 	if (version > 1)
 		ret = send_netlink_reply(ctx, reply_cmd,
 				nla_get_u32(info->attrs[TCMU_ATTR_DEVICE_ID]),
-				ret);
+				ret, data);
 
 	return ret;
 }
diff --git a/libtcmu.h b/libtcmu.h
index ee7f63d..a43bbbe 100644
--- a/libtcmu.h
+++ b/libtcmu.h
@@ -76,6 +76,10 @@  struct tcmulib_handler {
 	int (*added)(struct tcmu_device *dev);
 	void (*removed)(struct tcmu_device *dev);
 
+	int (*set_pr_info)(struct tcmu_device *dev, char *pr_info);
+	int (*get_pr_info)(struct tcmu_device *dev, char **buf);
+
+
 	void *hm_private; /* private ptr for handler module */
 	void *connection; /* private, dbus connection for this subtype */
 };
diff --git a/main.c b/main.c
index 2e33741..3b12e21 100644
--- a/main.c
+++ b/main.c
@@ -1084,6 +1084,8 @@  int main(int argc, char **argv)
 		tmp_handler.subtype = (*tmp_r_handler)->subtype;
 		tmp_handler.cfg_desc = (*tmp_r_handler)->cfg_desc;
 		tmp_handler.check_config = (*tmp_r_handler)->check_config;
+		tmp_handler.set_pr_info = (*tmp_r_handler)->set_pr_info;
+		tmp_handler.get_pr_info = (*tmp_r_handler)->get_pr_info;
 		tmp_handler.reconfig = dev_reconfig;
 		tmp_handler.added = dev_added;
 		tmp_handler.removed = dev_removed;
diff --git a/rbd.c b/rbd.c
index 0d1ffe9..bc722e1 100644
--- a/rbd.c
+++ b/rbd.c
@@ -78,6 +78,9 @@ 
 #define TCMU_RBD_LOCKER_TAG_FMT "tcmu_tag=%hu,rbd_client=%s"
 #define TCMU_RBD_LOCKER_BUF_LEN 256
 
+#define TCMU_RBD_PR_INFO_MAX_SIZE	8192
+#define TCMU_PR_INFO_KEY		"pr_info"
+
 struct tcmu_rbd_state {
 	rados_t cluster;
 	rados_ioctx_t io_ctx;
@@ -801,6 +804,36 @@  static int tcmu_rbd_check_image_size(struct tcmu_device *dev, uint64_t new_size)
 	return 0;
 }
 
+static rbd_image_t tcmu_dev_to_image(struct tcmu_device *dev);
+
+static int tcmu_rbd_pr_set(struct tcmu_device *dev, char *buf)
+{
+	int ret = 0;
+
+	rbd_image_t image = tcmu_dev_to_image(dev);
+	ret = rbd_metadata_set(image, TCMU_PR_INFO_KEY, buf);
+
+	return ret;
+}
+
+static int tcmu_rbd_pr_get(struct tcmu_device *dev, char **buf)
+{
+	int ret = 0;
+	size_t len = TCMU_RBD_PR_INFO_MAX_SIZE;
+	rbd_image_t image = tcmu_dev_to_image(dev);
+	char *pr_info_str = malloc(TCMU_RBD_PR_INFO_MAX_SIZE);
+	memset(pr_info_str, 0x0, TCMU_RBD_PR_INFO_MAX_SIZE);
+	if (!pr_info_str) {
+		tcmu_err("Not enough memory for getting PR info.\n");
+		return -ENOMEM;
+	}
+
+	ret = rbd_metadata_get(image, TCMU_PR_INFO_KEY, pr_info_str, &len);
+	*buf = pr_info_str;
+
+	return ret;
+}
+
 static int tcmu_rbd_open(struct tcmu_device *dev, bool reopen)
 {
 	rbd_image_info_t image_info;
@@ -904,6 +937,7 @@  static int tcmu_rbd_open(struct tcmu_device *dev, bool reopen)
 	tcmu_set_dev_write_cache_enabled(dev, 0);
 
 	free(dev_cfg_dup);
+	tcmu_rbd_pr_set(dev, "");
 	return 0;
 
 stop_image:
@@ -1431,6 +1465,8 @@  struct tcmur_handler tcmu_rbd_handler = {
 	.read	       = tcmu_rbd_read,
 	.write	       = tcmu_rbd_write,
 	.reconfig      = tcmu_rbd_reconfig,
+	.set_pr_info   = tcmu_rbd_pr_set,
+	.get_pr_info   = tcmu_rbd_pr_get,
 #ifdef LIBRBD_SUPPORTS_AIO_FLUSH
 	.flush	       = tcmu_rbd_flush,
 #endif
diff --git a/target_core_user_local.h b/target_core_user_local.h
index 2cdb3e5..aefa688 100644
--- a/target_core_user_local.h
+++ b/target_core_user_local.h
@@ -4,7 +4,7 @@ 
 /* This header will be used by application too */
 
 #include <linux/types.h>
-#include <linux/uio.h>
+//#include <linux/uio.h>
 
 #define TCMU_VERSION "2.0"
 
@@ -131,9 +131,13 @@  enum tcmu_genl_cmd {
 	TCMU_CMD_ADDED_DEVICE,
 	TCMU_CMD_REMOVED_DEVICE,
 	TCMU_CMD_RECONFIG_DEVICE,
+	TCMU_CMD_GET_PR_INFO,
+	TCMU_CMD_SET_PR_INFO,
 	TCMU_CMD_ADDED_DEVICE_DONE,
 	TCMU_CMD_REMOVED_DEVICE_DONE,
 	TCMU_CMD_RECONFIG_DEVICE_DONE,
+	TCMU_CMD_GET_PR_INFO_DONE,
+	TCMU_CMD_SET_PR_INFO_DONE,
 	TCMU_CMD_SET_FEATURES,
 	__TCMU_CMD_MAX,
 };
@@ -150,6 +154,7 @@  enum tcmu_genl_attr {
 	TCMU_ATTR_CMD_STATUS,
 	TCMU_ATTR_DEVICE_ID,
 	TCMU_ATTR_SUPP_KERN_CMD_REPLY,
+	TCMU_ATTR_PR_INFO,
 	__TCMU_ATTR_MAX,
 };
 #define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1)
diff --git a/tcmu-runner.h b/tcmu-runner.h
index b423fc5..792ba7f 100644
--- a/tcmu-runner.h
+++ b/tcmu-runner.h
@@ -133,6 +133,9 @@  struct tcmur_handler {
 	 * indicating success/failure.
 	 */
 	int (*get_lock_tag)(struct tcmu_device *dev, uint16_t *tag);
+	int (*set_pr_info)(struct tcmu_device *dev, char *pr_info);
+	int (*get_pr_info)(struct tcmu_device *dev, char **buf);
+
 
 	/*
 	 * internal field, don't touch this