diff mbox

[08/16] rbd: don't store pool name in struct rbd_dev

Message ID 4FFD875B.3060608@inktank.com (mailing list archive)
State New, archived
Headers show

Commit Message

Alex Elder July 11, 2012, 2:02 p.m. UTC
An rbd device's pool name is used to specify the pool to use for an
rbd image when it gets mapped (via rbd_add()).  However, only the
pool id can be relied on to be invariant--the name of a pool can
change at any time.

This means that the name of the pool is potentially stale as soon as
the image is mapped, so it's a bad idea to record this information.
So only store the pool id, not its name, in an rbd_dev.

Here are a few specific notes about this change:
    - The "pool" name device attribute (/sys/bus/rbd/devices/<N>/pool)
      goes away.  In its place is a "pool_id" attribute that provide
      the numeric pool id for the rbd image.
    - rbd_add_parse_args() now returns a pointer to a dynamically-
      allocated copy of the pool name taken from the arguments.
    - rbd_add() has been changed to handle freeing the pool name,
      both in error cases and in the normal case after the pool id
      has been recorded.

Signed-off-by: Alex Elder <elder@inktank.com>
---
 drivers/block/rbd.c |   74
+++++++++++++++++++++++++++++++-------------------
 1 files changed, 46 insertions(+), 28 deletions(-)

 static ssize_t rbd_name_show(struct device *dev,
@@ -1898,7 +1896,7 @@ static ssize_t rbd_image_refresh(struct device *dev,
 static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
 static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
 static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
-static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
+static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
 static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
 static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
 static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
@@ -1908,7 +1906,7 @@ static struct attribute *rbd_attrs[] = {
 	&dev_attr_size.attr,
 	&dev_attr_major.attr,
 	&dev_attr_client_id.attr,
-	&dev_attr_pool.attr,
+	&dev_attr_pool_id.attr,
 	&dev_attr_name.attr,
 	&dev_attr_current_snap.attr,
 	&dev_attr_refresh.attr,
@@ -2329,25 +2327,31 @@ static inline char *dup_token(const char **buf,
size_t *lenp)
 }

 /*
- * This fills in the pool_name, obj, obj_len, snap_name, obj_len,
- * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based
- * on the list of monitor addresses and other options provided via
- * /sys/bus/rbd/add.
+ * This fills in the obj, obj_len, snap_name, rbd_dev, rbd_md_name,
+ * and name fields of the given rbd_dev, based on the list of
+ * monitor addresses and other options provided via /sys/bus/rbd/add.
+ *
+ * Returns a pointer to a dynamically-allocated buffer containing
+ * the pool name provided, or a pointer-coded errno if an error
+ * occurs.
  */
-static int rbd_add_parse_args(struct rbd_device *rbd_dev,
+static char *rbd_add_parse_args(struct rbd_device *rbd_dev,
 			      const char *buf,
 			      const char **mon_addrs,
 			      size_t *mon_addrs_size,
 			      char *options,
 			      size_t options_size)
 {
-	size_t	len;
+	size_t len;
+	int ret = -EINVAL;
+	char *pool_name = NULL;

 	/* The first four tokens are required */

 	len = next_token(&buf);
 	if (!len)
-		return -EINVAL;
+		goto out_err;
+
 	*mon_addrs_size = len + 1;
 	*mon_addrs = buf;

@@ -2355,15 +2359,17 @@ static int rbd_add_parse_args(struct rbd_device
*rbd_dev,

 	len = copy_token(&buf, options, options_size);
 	if (!len || len >= options_size)
-		return -EINVAL;
+		goto out_err;

-	len = copy_token(&buf, rbd_dev->pool_name, sizeof (rbd_dev->pool_name));
-	if (!len || len >= sizeof (rbd_dev->pool_name))
-		return -EINVAL;
+	pool_name = dup_token(&buf, NULL);
+	if (!pool_name) {
+		ret = -ENOMEM;
+		goto out_err;
+	}

 	len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj));
 	if (!len || len >= sizeof (rbd_dev->obj))
-		return -EINVAL;
+		goto out_err;

 	/* We have the object length in hand, save it. */

@@ -2382,9 +2388,14 @@ static int rbd_add_parse_args(struct rbd_device
*rbd_dev,
 		memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
 			sizeof (RBD_SNAP_HEAD_NAME));
 	else if (len >= sizeof (rbd_dev->snap_name))
-		return -EINVAL;
+		goto out_err;

-	return 0;
+	return pool_name;
+
+out_err:
+	kfree(pool_name);
+
+	return ERR_PTR(ret);
 }

 static ssize_t rbd_add(struct bus_type *bus,
@@ -2396,6 +2407,7 @@ static ssize_t rbd_add(struct bus_type *bus,
 	size_t mon_addrs_size = 0;
 	char *options = NULL;
 	struct ceph_osd_client *osdc;
+	char *pool_name;
 	int rc = -ENOMEM;

 	if (!try_module_get(THIS_MODULE))
@@ -2425,10 +2437,14 @@ static ssize_t rbd_add(struct bus_type *bus,
 	sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->id);

 	/* parse add command */
-	rc = rbd_add_parse_args(rbd_dev, buf, &mon_addrs, &mon_addrs_size,
-				options, count);
-	if (rc)
+	pool_name = rbd_add_parse_args(rbd_dev, buf,
+					&mon_addrs, &mon_addrs_size,
+					options, count);
+	if (IS_ERR(pool_name)) {
+		rc = PTR_ERR(pool_name);
+		pool_name = NULL;
 		goto err_put_id;
+	}

 	rbd_dev->rbd_client = rbd_get_client(mon_addrs, mon_addrs_size - 1,
 						options);
@@ -2439,10 +2455,10 @@ static ssize_t rbd_add(struct bus_type *bus,

 	/* pick the pool */
 	osdc = &rbd_dev->rbd_client->client->osdc;
-	rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
+	rc = ceph_pg_poolid_by_name(osdc->osdmap, pool_name);
 	if (rc < 0)
 		goto err_out_client;
-	rbd_dev->poolid = rc;
+	rbd_dev->pool_id = rc;

 	/* register our block device */
 	rc = register_blkdev(0, rbd_dev->name);
@@ -2453,6 +2469,7 @@ static ssize_t rbd_add(struct bus_type *bus,
 	rc = rbd_bus_add_dev(rbd_dev);
 	if (rc)
 		goto err_out_blkdev;
+	kfree(pool_name);

 	/*
 	 * At this point cleanup in the event of an error is the job
@@ -2482,6 +2499,7 @@ err_out_blkdev:
 err_out_client:
 	rbd_put_client(rbd_dev);
 err_put_id:
+	kfree(pool_name);
 	rbd_id_put(rbd_dev);
 err_nomem:
 	kfree(options);

Comments

Josh Durgin July 11, 2012, 7:36 p.m. UTC | #1
On 07/11/2012 07:02 AM, Alex Elder wrote:
> An rbd device's pool name is used to specify the pool to use for an
> rbd image when it gets mapped (via rbd_add()).  However, only the
> pool id can be relied on to be invariant--the name of a pool can
> change at any time.
>
> This means that the name of the pool is potentially stale as soon as
> the image is mapped, so it's a bad idea to record this information.
> So only store the pool id, not its name, in an rbd_dev.
>
> Here are a few specific notes about this change:
>      - The "pool" name device attribute (/sys/bus/rbd/devices/<N>/pool)
>        goes away.  In its place is a "pool_id" attribute that provide
>        the numeric pool id for the rbd image.

We're using the pool name for udev to provide a predictable device name
(/dev/rbd/poolname/imagename[@snapname]), so we probably want to keep
the sysfs attribute. I don't think there's a good way for us to detect
pool renames right now though, so we should document that the pool
name reported does not reflect any potential renames.

>      - rbd_add_parse_args() now returns a pointer to a dynamically-
>        allocated copy of the pool name taken from the arguments.
>      - rbd_add() has been changed to handle freeing the pool name,
>        both in error cases and in the normal case after the pool id
>        has been recorded.
>
> Signed-off-by: Alex Elder<elder@inktank.com>
> ---
>   drivers/block/rbd.c |   74
> +++++++++++++++++++++++++++++++-------------------
>   1 files changed, 46 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 3aa0ca0..76e978c 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -56,7 +56,6 @@
>   #define RBD_MINORS_PER_MAJOR	256		/* max minors per blkdev */
>
>   #define RBD_MAX_MD_NAME_LEN	(RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX))
> -#define RBD_MAX_POOL_NAME_LEN	64
>   #define RBD_MAX_SNAP_NAME_LEN	32
>   #define RBD_MAX_OPT_LEN		1024
>
> @@ -166,8 +165,7 @@ struct rbd_device {
>   	char			obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */
>   	int			obj_len;
>   	char			obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */
> -	char			pool_name[RBD_MAX_POOL_NAME_LEN];
> -	int			poolid;
> +	int			pool_id;
>
>   	struct ceph_osd_event   *watch_event;
>   	struct ceph_osd_request *watch_request;
> @@ -930,7 +928,7 @@ static int rbd_do_request(struct request *rq,
>   	layout->fl_stripe_unit = cpu_to_le32(1<<  RBD_MAX_OBJ_ORDER);
>   	layout->fl_stripe_count = cpu_to_le32(1);
>   	layout->fl_object_size = cpu_to_le32(1<<  RBD_MAX_OBJ_ORDER);
> -	layout->fl_pg_pool = cpu_to_le32(dev->poolid);
> +	layout->fl_pg_pool = cpu_to_le32(dev->pool_id);
>   	ceph_calc_raw_layout(osdc, layout, snapid, ofs,&len,&bno,
>   				req, ops);
>
> @@ -1653,7 +1651,7 @@ static int rbd_header_add_snap(struct rbd_device *dev,
>   		return -EINVAL;
>
>   	monc =&dev->rbd_client->client->monc;
> -	ret = ceph_monc_create_snapid(monc, dev->poolid,&new_snapid);
> +	ret = ceph_monc_create_snapid(monc, dev->pool_id,&new_snapid);
>   	dout("created snapid=%lld\n", new_snapid);
>   	if (ret<  0)
>   		return ret;
> @@ -1851,12 +1849,12 @@ static ssize_t rbd_client_id_show(struct device
> *dev,
>   			ceph_client_id(rbd_dev->rbd_client->client));
>   }
>
> -static ssize_t rbd_pool_show(struct device *dev,
> +static ssize_t rbd_pool_id_show(struct device *dev,
>   			     struct device_attribute *attr, char *buf)
>   {
>   	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
>
> -	return sprintf(buf, "%s\n", rbd_dev->pool_name);
> +	return sprintf(buf, "%d\n", rbd_dev->pool_id);
>   }
>
>   static ssize_t rbd_name_show(struct device *dev,
> @@ -1898,7 +1896,7 @@ static ssize_t rbd_image_refresh(struct device *dev,
>   static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
>   static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
>   static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
> -static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
> +static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
>   static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
>   static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
>   static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
> @@ -1908,7 +1906,7 @@ static struct attribute *rbd_attrs[] = {
>   	&dev_attr_size.attr,
>   	&dev_attr_major.attr,
>   	&dev_attr_client_id.attr,
> -	&dev_attr_pool.attr,
> +	&dev_attr_pool_id.attr,
>   	&dev_attr_name.attr,
>   	&dev_attr_current_snap.attr,
>   	&dev_attr_refresh.attr,
> @@ -2329,25 +2327,31 @@ static inline char *dup_token(const char **buf,
> size_t *lenp)
>   }
>
>   /*
> - * This fills in the pool_name, obj, obj_len, snap_name, obj_len,
> - * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based
> - * on the list of monitor addresses and other options provided via
> - * /sys/bus/rbd/add.
> + * This fills in the obj, obj_len, snap_name, rbd_dev, rbd_md_name,
> + * and name fields of the given rbd_dev, based on the list of
> + * monitor addresses and other options provided via /sys/bus/rbd/add.
> + *
> + * Returns a pointer to a dynamically-allocated buffer containing
> + * the pool name provided, or a pointer-coded errno if an error
> + * occurs.
>    */
> -static int rbd_add_parse_args(struct rbd_device *rbd_dev,
> +static char *rbd_add_parse_args(struct rbd_device *rbd_dev,
>   			      const char *buf,
>   			      const char **mon_addrs,
>   			      size_t *mon_addrs_size,
>   			      char *options,
>   			      size_t options_size)
>   {
> -	size_t	len;
> +	size_t len;
> +	int ret = -EINVAL;
> +	char *pool_name = NULL;
>
>   	/* The first four tokens are required */
>
>   	len = next_token(&buf);
>   	if (!len)
> -		return -EINVAL;
> +		goto out_err;
> +
>   	*mon_addrs_size = len + 1;
>   	*mon_addrs = buf;
>
> @@ -2355,15 +2359,17 @@ static int rbd_add_parse_args(struct rbd_device
> *rbd_dev,
>
>   	len = copy_token(&buf, options, options_size);
>   	if (!len || len>= options_size)
> -		return -EINVAL;
> +		goto out_err;
>
> -	len = copy_token(&buf, rbd_dev->pool_name, sizeof (rbd_dev->pool_name));
> -	if (!len || len>= sizeof (rbd_dev->pool_name))
> -		return -EINVAL;
> +	pool_name = dup_token(&buf, NULL);
> +	if (!pool_name) {
> +		ret = -ENOMEM;
> +		goto out_err;
> +	}
>
>   	len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj));
>   	if (!len || len>= sizeof (rbd_dev->obj))
> -		return -EINVAL;
> +		goto out_err;
>
>   	/* We have the object length in hand, save it. */
>
> @@ -2382,9 +2388,14 @@ static int rbd_add_parse_args(struct rbd_device
> *rbd_dev,
>   		memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
>   			sizeof (RBD_SNAP_HEAD_NAME));
>   	else if (len>= sizeof (rbd_dev->snap_name))
> -		return -EINVAL;
> +		goto out_err;
>
> -	return 0;
> +	return pool_name;
> +
> +out_err:
> +	kfree(pool_name);
> +
> +	return ERR_PTR(ret);
>   }
>
>   static ssize_t rbd_add(struct bus_type *bus,
> @@ -2396,6 +2407,7 @@ static ssize_t rbd_add(struct bus_type *bus,
>   	size_t mon_addrs_size = 0;
>   	char *options = NULL;
>   	struct ceph_osd_client *osdc;
> +	char *pool_name;
>   	int rc = -ENOMEM;
>
>   	if (!try_module_get(THIS_MODULE))
> @@ -2425,10 +2437,14 @@ static ssize_t rbd_add(struct bus_type *bus,
>   	sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->id);
>
>   	/* parse add command */
> -	rc = rbd_add_parse_args(rbd_dev, buf,&mon_addrs,&mon_addrs_size,
> -				options, count);
> -	if (rc)
> +	pool_name = rbd_add_parse_args(rbd_dev, buf,
> +					&mon_addrs,&mon_addrs_size,
> +					options, count);
> +	if (IS_ERR(pool_name)) {
> +		rc = PTR_ERR(pool_name);
> +		pool_name = NULL;
>   		goto err_put_id;
> +	}
>
>   	rbd_dev->rbd_client = rbd_get_client(mon_addrs, mon_addrs_size - 1,
>   						options);
> @@ -2439,10 +2455,10 @@ static ssize_t rbd_add(struct bus_type *bus,
>
>   	/* pick the pool */
>   	osdc =&rbd_dev->rbd_client->client->osdc;
> -	rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
> +	rc = ceph_pg_poolid_by_name(osdc->osdmap, pool_name);
>   	if (rc<  0)
>   		goto err_out_client;
> -	rbd_dev->poolid = rc;
> +	rbd_dev->pool_id = rc;
>
>   	/* register our block device */
>   	rc = register_blkdev(0, rbd_dev->name);
> @@ -2453,6 +2469,7 @@ static ssize_t rbd_add(struct bus_type *bus,
>   	rc = rbd_bus_add_dev(rbd_dev);
>   	if (rc)
>   		goto err_out_blkdev;
> +	kfree(pool_name);
>
>   	/*
>   	 * At this point cleanup in the event of an error is the job
> @@ -2482,6 +2499,7 @@ err_out_blkdev:
>   err_out_client:
>   	rbd_put_client(rbd_dev);
>   err_put_id:
> +	kfree(pool_name);
>   	rbd_id_put(rbd_dev);
>   err_nomem:
>   	kfree(options);

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sage Weil July 11, 2012, 8:19 p.m. UTC | #2
On Wed, 11 Jul 2012, Josh Durgin wrote:
> On 07/11/2012 07:02 AM, Alex Elder wrote:
> > An rbd device's pool name is used to specify the pool to use for an
> > rbd image when it gets mapped (via rbd_add()).  However, only the
> > pool id can be relied on to be invariant--the name of a pool can
> > change at any time.
> > 
> > This means that the name of the pool is potentially stale as soon as
> > the image is mapped, so it's a bad idea to record this information.
> > So only store the pool id, not its name, in an rbd_dev.
> > 
> > Here are a few specific notes about this change:
> >      - The "pool" name device attribute (/sys/bus/rbd/devices/<N>/pool)
> >        goes away.  In its place is a "pool_id" attribute that provide
> >        the numeric pool id for the rbd image.
> 
> We're using the pool name for udev to provide a predictable device name
> (/dev/rbd/poolname/imagename[@snapname]), so we probably want to keep
> the sysfs attribute. I don't think there's a good way for us to detect
> pool renames right now though, so we should document that the pool
> name reported does not reflect any potential renames.

Whoops.  Let's just make sure that we don't use it for anything other than 
sysfs.

> 
> >      - rbd_add_parse_args() now returns a pointer to a dynamically-
> >        allocated copy of the pool name taken from the arguments.
> >      - rbd_add() has been changed to handle freeing the pool name,
> >        both in error cases and in the normal case after the pool id
> >        has been recorded.
> > 
> > Signed-off-by: Alex Elder<elder@inktank.com>
> > ---
> >   drivers/block/rbd.c |   74
> > +++++++++++++++++++++++++++++++-------------------
> >   1 files changed, 46 insertions(+), 28 deletions(-)
> > 
> > diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> > index 3aa0ca0..76e978c 100644
> > --- a/drivers/block/rbd.c
> > +++ b/drivers/block/rbd.c
> > @@ -56,7 +56,6 @@
> >   #define RBD_MINORS_PER_MAJOR	256		/* max minors per
> > blkdev */
> > 
> >   #define RBD_MAX_MD_NAME_LEN	(RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX))
> > -#define RBD_MAX_POOL_NAME_LEN	64
> >   #define RBD_MAX_SNAP_NAME_LEN	32
> >   #define RBD_MAX_OPT_LEN		1024
> > 
> > @@ -166,8 +165,7 @@ struct rbd_device {
> >   	char			obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name
> > */
> >   	int			obj_len;
> >   	char			obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm.
> > */
> > -	char			pool_name[RBD_MAX_POOL_NAME_LEN];
> > -	int			poolid;
> > +	int			pool_id;
> > 
> >   	struct ceph_osd_event   *watch_event;
> >   	struct ceph_osd_request *watch_request;
> > @@ -930,7 +928,7 @@ static int rbd_do_request(struct request *rq,
> >   	layout->fl_stripe_unit = cpu_to_le32(1<<  RBD_MAX_OBJ_ORDER);
> >   	layout->fl_stripe_count = cpu_to_le32(1);
> >   	layout->fl_object_size = cpu_to_le32(1<<  RBD_MAX_OBJ_ORDER);
> > -	layout->fl_pg_pool = cpu_to_le32(dev->poolid);
> > +	layout->fl_pg_pool = cpu_to_le32(dev->pool_id);
> >   	ceph_calc_raw_layout(osdc, layout, snapid, ofs,&len,&bno,
> >   				req, ops);
> > 
> > @@ -1653,7 +1651,7 @@ static int rbd_header_add_snap(struct rbd_device *dev,
> >   		return -EINVAL;
> > 
> >   	monc =&dev->rbd_client->client->monc;
> > -	ret = ceph_monc_create_snapid(monc, dev->poolid,&new_snapid);
> > +	ret = ceph_monc_create_snapid(monc, dev->pool_id,&new_snapid);
> >   	dout("created snapid=%lld\n", new_snapid);
> >   	if (ret<  0)
> >   		return ret;
> > @@ -1851,12 +1849,12 @@ static ssize_t rbd_client_id_show(struct device
> > *dev,
> >   			ceph_client_id(rbd_dev->rbd_client->client));
> >   }
> > 
> > -static ssize_t rbd_pool_show(struct device *dev,
> > +static ssize_t rbd_pool_id_show(struct device *dev,
> >   			     struct device_attribute *attr, char *buf)
> >   {
> >   	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
> > 
> > -	return sprintf(buf, "%s\n", rbd_dev->pool_name);
> > +	return sprintf(buf, "%d\n", rbd_dev->pool_id);
> >   }
> > 
> >   static ssize_t rbd_name_show(struct device *dev,
> > @@ -1898,7 +1896,7 @@ static ssize_t rbd_image_refresh(struct device *dev,
> >   static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
> >   static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
> >   static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
> > -static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
> > +static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
> >   static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
> >   static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
> >   static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
> > @@ -1908,7 +1906,7 @@ static struct attribute *rbd_attrs[] = {
> >   	&dev_attr_size.attr,
> >   	&dev_attr_major.attr,
> >   	&dev_attr_client_id.attr,
> > -	&dev_attr_pool.attr,
> > +	&dev_attr_pool_id.attr,
> >   	&dev_attr_name.attr,
> >   	&dev_attr_current_snap.attr,
> >   	&dev_attr_refresh.attr,
> > @@ -2329,25 +2327,31 @@ static inline char *dup_token(const char **buf,
> > size_t *lenp)
> >   }
> > 
> >   /*
> > - * This fills in the pool_name, obj, obj_len, snap_name, obj_len,
> > - * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based
> > - * on the list of monitor addresses and other options provided via
> > - * /sys/bus/rbd/add.
> > + * This fills in the obj, obj_len, snap_name, rbd_dev, rbd_md_name,
> > + * and name fields of the given rbd_dev, based on the list of
> > + * monitor addresses and other options provided via /sys/bus/rbd/add.
> > + *
> > + * Returns a pointer to a dynamically-allocated buffer containing
> > + * the pool name provided, or a pointer-coded errno if an error
> > + * occurs.
> >    */
> > -static int rbd_add_parse_args(struct rbd_device *rbd_dev,
> > +static char *rbd_add_parse_args(struct rbd_device *rbd_dev,
> >   			      const char *buf,
> >   			      const char **mon_addrs,
> >   			      size_t *mon_addrs_size,
> >   			      char *options,
> >   			      size_t options_size)
> >   {
> > -	size_t	len;
> > +	size_t len;
> > +	int ret = -EINVAL;
> > +	char *pool_name = NULL;
> > 
> >   	/* The first four tokens are required */
> > 
> >   	len = next_token(&buf);
> >   	if (!len)
> > -		return -EINVAL;
> > +		goto out_err;
> > +
> >   	*mon_addrs_size = len + 1;
> >   	*mon_addrs = buf;
> > 
> > @@ -2355,15 +2359,17 @@ static int rbd_add_parse_args(struct rbd_device
> > *rbd_dev,
> > 
> >   	len = copy_token(&buf, options, options_size);
> >   	if (!len || len>= options_size)
> > -		return -EINVAL;
> > +		goto out_err;
> > 
> > -	len = copy_token(&buf, rbd_dev->pool_name, sizeof
> > (rbd_dev->pool_name));
> > -	if (!len || len>= sizeof (rbd_dev->pool_name))
> > -		return -EINVAL;
> > +	pool_name = dup_token(&buf, NULL);
> > +	if (!pool_name) {
> > +		ret = -ENOMEM;
> > +		goto out_err;
> > +	}
> > 
> >   	len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj));
> >   	if (!len || len>= sizeof (rbd_dev->obj))
> > -		return -EINVAL;
> > +		goto out_err;
> > 
> >   	/* We have the object length in hand, save it. */
> > 
> > @@ -2382,9 +2388,14 @@ static int rbd_add_parse_args(struct rbd_device
> > *rbd_dev,
> >   		memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
> >   			sizeof (RBD_SNAP_HEAD_NAME));
> >   	else if (len>= sizeof (rbd_dev->snap_name))
> > -		return -EINVAL;
> > +		goto out_err;
> > 
> > -	return 0;
> > +	return pool_name;
> > +
> > +out_err:
> > +	kfree(pool_name);
> > +
> > +	return ERR_PTR(ret);
> >   }
> > 
> >   static ssize_t rbd_add(struct bus_type *bus,
> > @@ -2396,6 +2407,7 @@ static ssize_t rbd_add(struct bus_type *bus,
> >   	size_t mon_addrs_size = 0;
> >   	char *options = NULL;
> >   	struct ceph_osd_client *osdc;
> > +	char *pool_name;
> >   	int rc = -ENOMEM;
> > 
> >   	if (!try_module_get(THIS_MODULE))
> > @@ -2425,10 +2437,14 @@ static ssize_t rbd_add(struct bus_type *bus,
> >   	sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->id);
> > 
> >   	/* parse add command */
> > -	rc = rbd_add_parse_args(rbd_dev, buf,&mon_addrs,&mon_addrs_size,
> > -				options, count);
> > -	if (rc)
> > +	pool_name = rbd_add_parse_args(rbd_dev, buf,
> > +					&mon_addrs,&mon_addrs_size,
> > +					options, count);
> > +	if (IS_ERR(pool_name)) {
> > +		rc = PTR_ERR(pool_name);
> > +		pool_name = NULL;
> >   		goto err_put_id;
> > +	}
> > 
> >   	rbd_dev->rbd_client = rbd_get_client(mon_addrs, mon_addrs_size - 1,
> >   						options);
> > @@ -2439,10 +2455,10 @@ static ssize_t rbd_add(struct bus_type *bus,
> > 
> >   	/* pick the pool */
> >   	osdc =&rbd_dev->rbd_client->client->osdc;
> > -	rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
> > +	rc = ceph_pg_poolid_by_name(osdc->osdmap, pool_name);
> >   	if (rc<  0)
> >   		goto err_out_client;
> > -	rbd_dev->poolid = rc;
> > +	rbd_dev->pool_id = rc;
> > 
> >   	/* register our block device */
> >   	rc = register_blkdev(0, rbd_dev->name);
> > @@ -2453,6 +2469,7 @@ static ssize_t rbd_add(struct bus_type *bus,
> >   	rc = rbd_bus_add_dev(rbd_dev);
> >   	if (rc)
> >   		goto err_out_blkdev;
> > +	kfree(pool_name);
> > 
> >   	/*
> >   	 * At this point cleanup in the event of an error is the job
> > @@ -2482,6 +2499,7 @@ err_out_blkdev:
> >   err_out_client:
> >   	rbd_put_client(rbd_dev);
> >   err_put_id:
> > +	kfree(pool_name);
> >   	rbd_id_put(rbd_dev);
> >   err_nomem:
> >   	kfree(options);
> 
> --
> To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alex Elder July 11, 2012, 10:25 p.m. UTC | #3
On 07/11/2012 02:36 PM, Josh Durgin wrote:
> On 07/11/2012 07:02 AM, Alex Elder wrote:
>> An rbd device's pool name is used to specify the pool to use for an
>> rbd image when it gets mapped (via rbd_add()).  However, only the
>> pool id can be relied on to be invariant--the name of a pool can
>> change at any time.
>>
>> This means that the name of the pool is potentially stale as soon as
>> the image is mapped, so it's a bad idea to record this information.
>> So only store the pool id, not its name, in an rbd_dev.
>>
>> Here are a few specific notes about this change:
>>      - The "pool" name device attribute (/sys/bus/rbd/devices/<N>/pool)
>>        goes away.  In its place is a "pool_id" attribute that provide
>>        the numeric pool id for the rbd image.
> 
> We're using the pool name for udev to provide a predictable device name
> (/dev/rbd/poolname/imagename[@snapname]), so we probably want to keep
> the sysfs attribute. I don't think there's a good way for us to detect
> pool renames right now though, so we should document that the pool
> name reported does not reflect any potential renames.

OK.  I'll put the "pool" name entry back, but will still include the
new "pool_id" entry as well.  Do you want me to re-post after that,
or can I consider this reviewed?

					-Alex

>>      - rbd_add_parse_args() now returns a pointer to a dynamically-
>>        allocated copy of the pool name taken from the arguments.
>>      - rbd_add() has been changed to handle freeing the pool name,
>>        both in error cases and in the normal case after the pool id
>>        has been recorded.
>>
>> Signed-off-by: Alex Elder<elder@inktank.com>

. . .
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Josh Durgin July 11, 2012, 11:32 p.m. UTC | #4
On 07/11/2012 03:25 PM, Alex Elder wrote:
> On 07/11/2012 02:36 PM, Josh Durgin wrote:
>> On 07/11/2012 07:02 AM, Alex Elder wrote:
>>> An rbd device's pool name is used to specify the pool to use for an
>>> rbd image when it gets mapped (via rbd_add()).  However, only the
>>> pool id can be relied on to be invariant--the name of a pool can
>>> change at any time.
>>>
>>> This means that the name of the pool is potentially stale as soon as
>>> the image is mapped, so it's a bad idea to record this information.
>>> So only store the pool id, not its name, in an rbd_dev.
>>>
>>> Here are a few specific notes about this change:
>>>       - The "pool" name device attribute (/sys/bus/rbd/devices/<N>/pool)
>>>         goes away.  In its place is a "pool_id" attribute that provide
>>>         the numeric pool id for the rbd image.
>>
>> We're using the pool name for udev to provide a predictable device name
>> (/dev/rbd/poolname/imagename[@snapname]), so we probably want to keep
>> the sysfs attribute. I don't think there's a good way for us to detect
>> pool renames right now though, so we should document that the pool
>> name reported does not reflect any potential renames.
>
> OK.  I'll put the "pool" name entry back, but will still include the
> new "pool_id" entry as well.  Do you want me to re-post after that,
> or can I consider this reviewed?

Consider it reviewed. The same goes for the rest of this series,
since it was just minor changes.
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alex Elder July 12, 2012, 2:59 a.m. UTC | #5
On 07/11/2012 02:36 PM, Josh Durgin wrote:
> On 07/11/2012 07:02 AM, Alex Elder wrote:
>> An rbd device's pool name is used to specify the pool to use for an
>> rbd image when it gets mapped (via rbd_add()).  However, only the
>> pool id can be relied on to be invariant--the name of a pool can
>> change at any time.
>>
>> This means that the name of the pool is potentially stale as soon as
>> the image is mapped, so it's a bad idea to record this information.
>> So only store the pool id, not its name, in an rbd_dev.
>>
>> Here are a few specific notes about this change:
>>      - The "pool" name device attribute (/sys/bus/rbd/devices/<N>/pool)
>>        goes away.  In its place is a "pool_id" attribute that provide
>>        the numeric pool id for the rbd image.
> 
> We're using the pool name for udev to provide a predictable device name
> (/dev/rbd/poolname/imagename[@snapname]), so we probably want to keep
> the sysfs attribute. I don't think there's a good way for us to detect
> pool renames right now though, so we should document that the pool
> name reported does not reflect any potential renames.

OK, now that I've thought about this...

I'm going to pull this patch from the series entirely.
In its place, I'll do a patch that makes the pool name
get allocated dynamically (like the patches that follow
this one).  I will also put in place a patch that exposes
the pool id via the /sys/bus/rbd/devices/<N>/pool_id though.

Long term we could switch to using the pool id for the
predictable device name.  All format 1 images would
share the same pool (which would need to be assigned a
reserved pool id different from the empty string).

Which brings up another thing...
It would be good to provide an efficient tool to allow
a customer to convert a format 1 rbd image into format 2.
It seems like it would mostly involve object renames,
however they would all have to be done transactionally.

					-Alex

>>      - rbd_add_parse_args() now returns a pointer to a dynamically-
>>        allocated copy of the pool name taken from the arguments.
>>      - rbd_add() has been changed to handle freeing the pool name,
>>        both in error cases and in the normal case after the pool id
>>        has been recorded.
>>
>> Signed-off-by: Alex Elder<elder@inktank.com>
>> ---
>>   drivers/block/rbd.c |   74
>> +++++++++++++++++++++++++++++++-------------------
>>   1 files changed, 46 insertions(+), 28 deletions(-)
>>
>> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
>> index 3aa0ca0..76e978c 100644
>> --- a/drivers/block/rbd.c
>> +++ b/drivers/block/rbd.c
>> @@ -56,7 +56,6 @@
>>   #define RBD_MINORS_PER_MAJOR    256        /* max minors per blkdev */
>>
>>   #define RBD_MAX_MD_NAME_LEN    (RBD_MAX_OBJ_NAME_LEN +
>> sizeof(RBD_SUFFIX))
>> -#define RBD_MAX_POOL_NAME_LEN    64
>>   #define RBD_MAX_SNAP_NAME_LEN    32
>>   #define RBD_MAX_OPT_LEN        1024
>>
>> @@ -166,8 +165,7 @@ struct rbd_device {
>>       char            obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */
>>       int            obj_len;
>>       char            obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */
>> -    char            pool_name[RBD_MAX_POOL_NAME_LEN];
>> -    int            poolid;
>> +    int            pool_id;
>>
>>       struct ceph_osd_event   *watch_event;
>>       struct ceph_osd_request *watch_request;
>> @@ -930,7 +928,7 @@ static int rbd_do_request(struct request *rq,
>>       layout->fl_stripe_unit = cpu_to_le32(1<<  RBD_MAX_OBJ_ORDER);
>>       layout->fl_stripe_count = cpu_to_le32(1);
>>       layout->fl_object_size = cpu_to_le32(1<<  RBD_MAX_OBJ_ORDER);
>> -    layout->fl_pg_pool = cpu_to_le32(dev->poolid);
>> +    layout->fl_pg_pool = cpu_to_le32(dev->pool_id);
>>       ceph_calc_raw_layout(osdc, layout, snapid, ofs,&len,&bno,
>>                   req, ops);
>>
>> @@ -1653,7 +1651,7 @@ static int rbd_header_add_snap(struct rbd_device
>> *dev,
>>           return -EINVAL;
>>
>>       monc =&dev->rbd_client->client->monc;
>> -    ret = ceph_monc_create_snapid(monc, dev->poolid,&new_snapid);
>> +    ret = ceph_monc_create_snapid(monc, dev->pool_id,&new_snapid);
>>       dout("created snapid=%lld\n", new_snapid);
>>       if (ret<  0)
>>           return ret;
>> @@ -1851,12 +1849,12 @@ static ssize_t rbd_client_id_show(struct device
>> *dev,
>>               ceph_client_id(rbd_dev->rbd_client->client));
>>   }
>>
>> -static ssize_t rbd_pool_show(struct device *dev,
>> +static ssize_t rbd_pool_id_show(struct device *dev,
>>                    struct device_attribute *attr, char *buf)
>>   {
>>       struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
>>
>> -    return sprintf(buf, "%s\n", rbd_dev->pool_name);
>> +    return sprintf(buf, "%d\n", rbd_dev->pool_id);
>>   }
>>
>>   static ssize_t rbd_name_show(struct device *dev,
>> @@ -1898,7 +1896,7 @@ static ssize_t rbd_image_refresh(struct device
>> *dev,
>>   static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
>>   static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
>>   static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
>> -static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
>> +static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
>>   static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
>>   static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
>>   static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
>> @@ -1908,7 +1906,7 @@ static struct attribute *rbd_attrs[] = {
>>       &dev_attr_size.attr,
>>       &dev_attr_major.attr,
>>       &dev_attr_client_id.attr,
>> -    &dev_attr_pool.attr,
>> +    &dev_attr_pool_id.attr,
>>       &dev_attr_name.attr,
>>       &dev_attr_current_snap.attr,
>>       &dev_attr_refresh.attr,
>> @@ -2329,25 +2327,31 @@ static inline char *dup_token(const char **buf,
>> size_t *lenp)
>>   }
>>
>>   /*
>> - * This fills in the pool_name, obj, obj_len, snap_name, obj_len,
>> - * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based
>> - * on the list of monitor addresses and other options provided via
>> - * /sys/bus/rbd/add.
>> + * This fills in the obj, obj_len, snap_name, rbd_dev, rbd_md_name,
>> + * and name fields of the given rbd_dev, based on the list of
>> + * monitor addresses and other options provided via /sys/bus/rbd/add.
>> + *
>> + * Returns a pointer to a dynamically-allocated buffer containing
>> + * the pool name provided, or a pointer-coded errno if an error
>> + * occurs.
>>    */
>> -static int rbd_add_parse_args(struct rbd_device *rbd_dev,
>> +static char *rbd_add_parse_args(struct rbd_device *rbd_dev,
>>                     const char *buf,
>>                     const char **mon_addrs,
>>                     size_t *mon_addrs_size,
>>                     char *options,
>>                     size_t options_size)
>>   {
>> -    size_t    len;
>> +    size_t len;
>> +    int ret = -EINVAL;
>> +    char *pool_name = NULL;
>>
>>       /* The first four tokens are required */
>>
>>       len = next_token(&buf);
>>       if (!len)
>> -        return -EINVAL;
>> +        goto out_err;
>> +
>>       *mon_addrs_size = len + 1;
>>       *mon_addrs = buf;
>>
>> @@ -2355,15 +2359,17 @@ static int rbd_add_parse_args(struct rbd_device
>> *rbd_dev,
>>
>>       len = copy_token(&buf, options, options_size);
>>       if (!len || len>= options_size)
>> -        return -EINVAL;
>> +        goto out_err;
>>
>> -    len = copy_token(&buf, rbd_dev->pool_name, sizeof
>> (rbd_dev->pool_name));
>> -    if (!len || len>= sizeof (rbd_dev->pool_name))
>> -        return -EINVAL;
>> +    pool_name = dup_token(&buf, NULL);
>> +    if (!pool_name) {
>> +        ret = -ENOMEM;
>> +        goto out_err;
>> +    }
>>
>>       len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj));
>>       if (!len || len>= sizeof (rbd_dev->obj))
>> -        return -EINVAL;
>> +        goto out_err;
>>
>>       /* We have the object length in hand, save it. */
>>
>> @@ -2382,9 +2388,14 @@ static int rbd_add_parse_args(struct rbd_device
>> *rbd_dev,
>>           memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
>>               sizeof (RBD_SNAP_HEAD_NAME));
>>       else if (len>= sizeof (rbd_dev->snap_name))
>> -        return -EINVAL;
>> +        goto out_err;
>>
>> -    return 0;
>> +    return pool_name;
>> +
>> +out_err:
>> +    kfree(pool_name);
>> +
>> +    return ERR_PTR(ret);
>>   }
>>
>>   static ssize_t rbd_add(struct bus_type *bus,
>> @@ -2396,6 +2407,7 @@ static ssize_t rbd_add(struct bus_type *bus,
>>       size_t mon_addrs_size = 0;
>>       char *options = NULL;
>>       struct ceph_osd_client *osdc;
>> +    char *pool_name;
>>       int rc = -ENOMEM;
>>
>>       if (!try_module_get(THIS_MODULE))
>> @@ -2425,10 +2437,14 @@ static ssize_t rbd_add(struct bus_type *bus,
>>       sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->id);
>>
>>       /* parse add command */
>> -    rc = rbd_add_parse_args(rbd_dev, buf,&mon_addrs,&mon_addrs_size,
>> -                options, count);
>> -    if (rc)
>> +    pool_name = rbd_add_parse_args(rbd_dev, buf,
>> +                    &mon_addrs,&mon_addrs_size,
>> +                    options, count);
>> +    if (IS_ERR(pool_name)) {
>> +        rc = PTR_ERR(pool_name);
>> +        pool_name = NULL;
>>           goto err_put_id;
>> +    }
>>
>>       rbd_dev->rbd_client = rbd_get_client(mon_addrs, mon_addrs_size - 1,
>>                           options);
>> @@ -2439,10 +2455,10 @@ static ssize_t rbd_add(struct bus_type *bus,
>>
>>       /* pick the pool */
>>       osdc =&rbd_dev->rbd_client->client->osdc;
>> -    rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
>> +    rc = ceph_pg_poolid_by_name(osdc->osdmap, pool_name);
>>       if (rc<  0)
>>           goto err_out_client;
>> -    rbd_dev->poolid = rc;
>> +    rbd_dev->pool_id = rc;
>>
>>       /* register our block device */
>>       rc = register_blkdev(0, rbd_dev->name);
>> @@ -2453,6 +2469,7 @@ static ssize_t rbd_add(struct bus_type *bus,
>>       rc = rbd_bus_add_dev(rbd_dev);
>>       if (rc)
>>           goto err_out_blkdev;
>> +    kfree(pool_name);
>>
>>       /*
>>        * At this point cleanup in the event of an error is the job
>> @@ -2482,6 +2499,7 @@ err_out_blkdev:
>>   err_out_client:
>>       rbd_put_client(rbd_dev);
>>   err_put_id:
>> +    kfree(pool_name);
>>       rbd_id_put(rbd_dev);
>>   err_nomem:
>>       kfree(options);
> 
> 
> 


--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Josh Durgin July 12, 2012, 4:19 a.m. UTC | #6
On 07/11/2012 07:59 PM, Alex Elder wrote:
> On 07/11/2012 02:36 PM, Josh Durgin wrote:
>> On 07/11/2012 07:02 AM, Alex Elder wrote:
>>> An rbd device's pool name is used to specify the pool to use for an
>>> rbd image when it gets mapped (via rbd_add()).  However, only the
>>> pool id can be relied on to be invariant--the name of a pool can
>>> change at any time.
>>>
>>> This means that the name of the pool is potentially stale as soon as
>>> the image is mapped, so it's a bad idea to record this information.
>>> So only store the pool id, not its name, in an rbd_dev.
>>>
>>> Here are a few specific notes about this change:
>>>       - The "pool" name device attribute (/sys/bus/rbd/devices/<N>/pool)
>>>         goes away.  In its place is a "pool_id" attribute that provide
>>>         the numeric pool id for the rbd image.
>>
>> We're using the pool name for udev to provide a predictable device name
>> (/dev/rbd/poolname/imagename[@snapname]), so we probably want to keep
>> the sysfs attribute. I don't think there's a good way for us to detect
>> pool renames right now though, so we should document that the pool
>> name reported does not reflect any potential renames.
>
> OK, now that I've thought about this...
>
> I'm going to pull this patch from the series entirely.
> In its place, I'll do a patch that makes the pool name
> get allocated dynamically (like the patches that follow
> this one).  I will also put in place a patch that exposes
> the pool id via the /sys/bus/rbd/devices/<N>/pool_id though.

Sounds good.

> Long term we could switch to using the pool id for the
> predictable device name.  All format 1 images would
> share the same pool (which would need to be assigned a
> reserved pool id different from the empty string).

We could do that. If so, we should use image id as well.
I'm not sure it would be a good idea though. All user (and
management software) interaction with rbd images is done via names,
so if you ever need to reopen an image, like with live migration,
renaming is going to cause the destination to fail to find a renamed
image anyway. In general, renaming things while they're in use
is probably going to be problematic in any case, since management
layers above rbd would need to be aware of the rename as well,
and that's not something any of them support (that I'm aware of).

> Which brings up another thing...
> It would be good to provide an efficient tool to allow
> a customer to convert a format 1 rbd image into format 2.
> It seems like it would mostly involve object renames,
> however they would all have to be done transactionally.

There's no such thing as an object rename, since objects are placed
into pgs based on a hash of their name. A conversion tool wouldn't
make the process more efficient. Instead, you can just export the
format 1 image and import it as a format 2 image.

Josh

>
> 					-Alex
>
>>>       - rbd_add_parse_args() now returns a pointer to a dynamically-
>>>         allocated copy of the pool name taken from the arguments.
>>>       - rbd_add() has been changed to handle freeing the pool name,
>>>         both in error cases and in the normal case after the pool id
>>>         has been recorded.
>>>
>>> Signed-off-by: Alex Elder<elder@inktank.com>
>>> ---
>>>    drivers/block/rbd.c |   74
>>> +++++++++++++++++++++++++++++++-------------------
>>>    1 files changed, 46 insertions(+), 28 deletions(-)
>>>
>>> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
>>> index 3aa0ca0..76e978c 100644
>>> --- a/drivers/block/rbd.c
>>> +++ b/drivers/block/rbd.c
>>> @@ -56,7 +56,6 @@
>>>    #define RBD_MINORS_PER_MAJOR    256        /* max minors per blkdev */
>>>
>>>    #define RBD_MAX_MD_NAME_LEN    (RBD_MAX_OBJ_NAME_LEN +
>>> sizeof(RBD_SUFFIX))
>>> -#define RBD_MAX_POOL_NAME_LEN    64
>>>    #define RBD_MAX_SNAP_NAME_LEN    32
>>>    #define RBD_MAX_OPT_LEN        1024
>>>
>>> @@ -166,8 +165,7 @@ struct rbd_device {
>>>        char            obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */
>>>        int            obj_len;
>>>        char            obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */
>>> -    char            pool_name[RBD_MAX_POOL_NAME_LEN];
>>> -    int            poolid;
>>> +    int            pool_id;
>>>
>>>        struct ceph_osd_event   *watch_event;
>>>        struct ceph_osd_request *watch_request;
>>> @@ -930,7 +928,7 @@ static int rbd_do_request(struct request *rq,
>>>        layout->fl_stripe_unit = cpu_to_le32(1<<  RBD_MAX_OBJ_ORDER);
>>>        layout->fl_stripe_count = cpu_to_le32(1);
>>>        layout->fl_object_size = cpu_to_le32(1<<  RBD_MAX_OBJ_ORDER);
>>> -    layout->fl_pg_pool = cpu_to_le32(dev->poolid);
>>> +    layout->fl_pg_pool = cpu_to_le32(dev->pool_id);
>>>        ceph_calc_raw_layout(osdc, layout, snapid, ofs,&len,&bno,
>>>                    req, ops);
>>>
>>> @@ -1653,7 +1651,7 @@ static int rbd_header_add_snap(struct rbd_device
>>> *dev,
>>>            return -EINVAL;
>>>
>>>        monc =&dev->rbd_client->client->monc;
>>> -    ret = ceph_monc_create_snapid(monc, dev->poolid,&new_snapid);
>>> +    ret = ceph_monc_create_snapid(monc, dev->pool_id,&new_snapid);
>>>        dout("created snapid=%lld\n", new_snapid);
>>>        if (ret<  0)
>>>            return ret;
>>> @@ -1851,12 +1849,12 @@ static ssize_t rbd_client_id_show(struct device
>>> *dev,
>>>                ceph_client_id(rbd_dev->rbd_client->client));
>>>    }
>>>
>>> -static ssize_t rbd_pool_show(struct device *dev,
>>> +static ssize_t rbd_pool_id_show(struct device *dev,
>>>                     struct device_attribute *attr, char *buf)
>>>    {
>>>        struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
>>>
>>> -    return sprintf(buf, "%s\n", rbd_dev->pool_name);
>>> +    return sprintf(buf, "%d\n", rbd_dev->pool_id);
>>>    }
>>>
>>>    static ssize_t rbd_name_show(struct device *dev,
>>> @@ -1898,7 +1896,7 @@ static ssize_t rbd_image_refresh(struct device
>>> *dev,
>>>    static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
>>>    static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
>>>    static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
>>> -static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
>>> +static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
>>>    static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
>>>    static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
>>>    static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
>>> @@ -1908,7 +1906,7 @@ static struct attribute *rbd_attrs[] = {
>>>        &dev_attr_size.attr,
>>>        &dev_attr_major.attr,
>>>        &dev_attr_client_id.attr,
>>> -    &dev_attr_pool.attr,
>>> +    &dev_attr_pool_id.attr,
>>>        &dev_attr_name.attr,
>>>        &dev_attr_current_snap.attr,
>>>        &dev_attr_refresh.attr,
>>> @@ -2329,25 +2327,31 @@ static inline char *dup_token(const char **buf,
>>> size_t *lenp)
>>>    }
>>>
>>>    /*
>>> - * This fills in the pool_name, obj, obj_len, snap_name, obj_len,
>>> - * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based
>>> - * on the list of monitor addresses and other options provided via
>>> - * /sys/bus/rbd/add.
>>> + * This fills in the obj, obj_len, snap_name, rbd_dev, rbd_md_name,
>>> + * and name fields of the given rbd_dev, based on the list of
>>> + * monitor addresses and other options provided via /sys/bus/rbd/add.
>>> + *
>>> + * Returns a pointer to a dynamically-allocated buffer containing
>>> + * the pool name provided, or a pointer-coded errno if an error
>>> + * occurs.
>>>     */
>>> -static int rbd_add_parse_args(struct rbd_device *rbd_dev,
>>> +static char *rbd_add_parse_args(struct rbd_device *rbd_dev,
>>>                      const char *buf,
>>>                      const char **mon_addrs,
>>>                      size_t *mon_addrs_size,
>>>                      char *options,
>>>                      size_t options_size)
>>>    {
>>> -    size_t    len;
>>> +    size_t len;
>>> +    int ret = -EINVAL;
>>> +    char *pool_name = NULL;
>>>
>>>        /* The first four tokens are required */
>>>
>>>        len = next_token(&buf);
>>>        if (!len)
>>> -        return -EINVAL;
>>> +        goto out_err;
>>> +
>>>        *mon_addrs_size = len + 1;
>>>        *mon_addrs = buf;
>>>
>>> @@ -2355,15 +2359,17 @@ static int rbd_add_parse_args(struct rbd_device
>>> *rbd_dev,
>>>
>>>        len = copy_token(&buf, options, options_size);
>>>        if (!len || len>= options_size)
>>> -        return -EINVAL;
>>> +        goto out_err;
>>>
>>> -    len = copy_token(&buf, rbd_dev->pool_name, sizeof
>>> (rbd_dev->pool_name));
>>> -    if (!len || len>= sizeof (rbd_dev->pool_name))
>>> -        return -EINVAL;
>>> +    pool_name = dup_token(&buf, NULL);
>>> +    if (!pool_name) {
>>> +        ret = -ENOMEM;
>>> +        goto out_err;
>>> +    }
>>>
>>>        len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj));
>>>        if (!len || len>= sizeof (rbd_dev->obj))
>>> -        return -EINVAL;
>>> +        goto out_err;
>>>
>>>        /* We have the object length in hand, save it. */
>>>
>>> @@ -2382,9 +2388,14 @@ static int rbd_add_parse_args(struct rbd_device
>>> *rbd_dev,
>>>            memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
>>>                sizeof (RBD_SNAP_HEAD_NAME));
>>>        else if (len>= sizeof (rbd_dev->snap_name))
>>> -        return -EINVAL;
>>> +        goto out_err;
>>>
>>> -    return 0;
>>> +    return pool_name;
>>> +
>>> +out_err:
>>> +    kfree(pool_name);
>>> +
>>> +    return ERR_PTR(ret);
>>>    }
>>>
>>>    static ssize_t rbd_add(struct bus_type *bus,
>>> @@ -2396,6 +2407,7 @@ static ssize_t rbd_add(struct bus_type *bus,
>>>        size_t mon_addrs_size = 0;
>>>        char *options = NULL;
>>>        struct ceph_osd_client *osdc;
>>> +    char *pool_name;
>>>        int rc = -ENOMEM;
>>>
>>>        if (!try_module_get(THIS_MODULE))
>>> @@ -2425,10 +2437,14 @@ static ssize_t rbd_add(struct bus_type *bus,
>>>        sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->id);
>>>
>>>        /* parse add command */
>>> -    rc = rbd_add_parse_args(rbd_dev, buf,&mon_addrs,&mon_addrs_size,
>>> -                options, count);
>>> -    if (rc)
>>> +    pool_name = rbd_add_parse_args(rbd_dev, buf,
>>> +                    &mon_addrs,&mon_addrs_size,
>>> +                    options, count);
>>> +    if (IS_ERR(pool_name)) {
>>> +        rc = PTR_ERR(pool_name);
>>> +        pool_name = NULL;
>>>            goto err_put_id;
>>> +    }
>>>
>>>        rbd_dev->rbd_client = rbd_get_client(mon_addrs, mon_addrs_size - 1,
>>>                            options);
>>> @@ -2439,10 +2455,10 @@ static ssize_t rbd_add(struct bus_type *bus,
>>>
>>>        /* pick the pool */
>>>        osdc =&rbd_dev->rbd_client->client->osdc;
>>> -    rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
>>> +    rc = ceph_pg_poolid_by_name(osdc->osdmap, pool_name);
>>>        if (rc<  0)
>>>            goto err_out_client;
>>> -    rbd_dev->poolid = rc;
>>> +    rbd_dev->pool_id = rc;
>>>
>>>        /* register our block device */
>>>        rc = register_blkdev(0, rbd_dev->name);
>>> @@ -2453,6 +2469,7 @@ static ssize_t rbd_add(struct bus_type *bus,
>>>        rc = rbd_bus_add_dev(rbd_dev);
>>>        if (rc)
>>>            goto err_out_blkdev;
>>> +    kfree(pool_name);
>>>
>>>        /*
>>>         * At this point cleanup in the event of an error is the job
>>> @@ -2482,6 +2499,7 @@ err_out_blkdev:
>>>    err_out_client:
>>>        rbd_put_client(rbd_dev);
>>>    err_put_id:
>>> +    kfree(pool_name);
>>>        rbd_id_put(rbd_dev);
>>>    err_nomem:
>>>        kfree(options);
>>
>>
>>
>
>


--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alex Elder July 12, 2012, 5:05 p.m. UTC | #7
I said I would pull this patch and replace it with two others--one
that just creates an entry in /sys/bus/rbd/devices/<N>/pool_id,
and one that makes the pool name get allocated dynamically.  I'm
about to post those for review.  They fit in about the same place
in the original series but I'm not going to pollute the list by
re-posting everything again.

					-Alex
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 3aa0ca0..76e978c 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -56,7 +56,6 @@ 
 #define RBD_MINORS_PER_MAJOR	256		/* max minors per blkdev */

 #define RBD_MAX_MD_NAME_LEN	(RBD_MAX_OBJ_NAME_LEN + sizeof(RBD_SUFFIX))
-#define RBD_MAX_POOL_NAME_LEN	64
 #define RBD_MAX_SNAP_NAME_LEN	32
 #define RBD_MAX_OPT_LEN		1024

@@ -166,8 +165,7 @@  struct rbd_device {
 	char			obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */
 	int			obj_len;
 	char			obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */
-	char			pool_name[RBD_MAX_POOL_NAME_LEN];
-	int			poolid;
+	int			pool_id;

 	struct ceph_osd_event   *watch_event;
 	struct ceph_osd_request *watch_request;
@@ -930,7 +928,7 @@  static int rbd_do_request(struct request *rq,
 	layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
 	layout->fl_stripe_count = cpu_to_le32(1);
 	layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER);
-	layout->fl_pg_pool = cpu_to_le32(dev->poolid);
+	layout->fl_pg_pool = cpu_to_le32(dev->pool_id);
 	ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno,
 				req, ops);

@@ -1653,7 +1651,7 @@  static int rbd_header_add_snap(struct rbd_device *dev,
 		return -EINVAL;

 	monc = &dev->rbd_client->client->monc;
-	ret = ceph_monc_create_snapid(monc, dev->poolid, &new_snapid);
+	ret = ceph_monc_create_snapid(monc, dev->pool_id, &new_snapid);
 	dout("created snapid=%lld\n", new_snapid);
 	if (ret < 0)
 		return ret;
@@ -1851,12 +1849,12 @@  static ssize_t rbd_client_id_show(struct device
*dev,
 			ceph_client_id(rbd_dev->rbd_client->client));
 }

-static ssize_t rbd_pool_show(struct device *dev,
+static ssize_t rbd_pool_id_show(struct device *dev,
 			     struct device_attribute *attr, char *buf)
 {
 	struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);

-	return sprintf(buf, "%s\n", rbd_dev->pool_name);
+	return sprintf(buf, "%d\n", rbd_dev->pool_id);
 }