diff mbox

[1/8] dax: add region-available-size attribute

Message ID 148143771052.10950.7622110904173815243.stgit@dwillia2-desk3.amr.corp.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dan Williams Dec. 11, 2016, 6:28 a.m. UTC
In preparation for a facility that enables dax regions to be
sub-divided, introduce a 'dax/available_size' attribute.  This attribute
appears under the parent device that registered the device-dax region,
and it assumes that the device-dax-core owns the driver-data for that
device.

'dax/available_size' adjusts dynamically as dax-device instances are
registered and unregistered.

As a side effect of using __request_region() to reserve capacity from
the dax_region we now track pointers to those returned resources rather
than duplicating the passed in resource array.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/dax.c |  135 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 123 insertions(+), 12 deletions(-)

Comments

Johannes Thumshirn Dec. 14, 2016, 2:38 p.m. UTC | #1
Hi Dan,

On Sat, Dec 10, 2016 at 10:28:30PM -0800, Dan Williams wrote:
> In preparation for a facility that enables dax regions to be
> sub-divided, introduce a 'dax/available_size' attribute.  This attribute
> appears under the parent device that registered the device-dax region,
> and it assumes that the device-dax-core owns the driver-data for that
> device.
> 
> 'dax/available_size' adjusts dynamically as dax-device instances are
> registered and unregistered.
> 
> As a side effect of using __request_region() to reserve capacity from
> the dax_region we now track pointers to those returned resources rather
> than duplicating the passed in resource array.
> 
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---

[...]

> +static const struct attribute_group *dax_region_attribute_groups[] = {
> +	&dax_region_attribute_group,
> +	NULL,
>  };
>  
>  static struct inode *dax_alloc_inode(struct super_block *sb)
> @@ -200,12 +251,27 @@ void dax_region_put(struct dax_region *dax_region)
>  }
>  EXPORT_SYMBOL_GPL(dax_region_put);
>  
> +

Stray extra newline?

[...]

>  struct dax_region *alloc_dax_region(struct device *parent, int region_id,
>  		struct resource *res, unsigned int align, void *addr,
>  		unsigned long pfn_flags)
>  {
>  	struct dax_region *dax_region;
>  
> +	if (dev_get_drvdata(parent)) {
> +		dev_WARN(parent, "dax core found drvdata already in use\n");
> +		return NULL;
> +	}
> +

My first thought was, it might be interesting to see who already claimed
the drvdata. Then I figured, how are multiple sub-regions of a dax-device
supposed to work? What am I missing here?

>  	if (!IS_ALIGNED(res->start, align)
>  			|| !IS_ALIGNED(resource_size(res), align))
>  		return NULL;
> @@ -213,16 +279,26 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id,
>  	dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
>  	if (!dax_region)
>  		return NULL;
> -
> -	memcpy(&dax_region->res, res, sizeof(*res));
> +	dev_set_drvdata(parent, dax_region);
> +	dax_region->res.name = dev_name(parent);
> +	dax_region->res.start = res->start;
> +	dax_region->res.end = res->end;
>  	dax_region->pfn_flags = pfn_flags;
> +	mutex_init(&dax_region->lock);
>  	kref_init(&dax_region->kref);
>  	dax_region->id = region_id;
>  	ida_init(&dax_region->ida);
>  	dax_region->align = align;
>  	dax_region->dev = parent;
>  	dax_region->base = addr;
> +	if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
> +		kfree(dax_region);
> +		return NULL;;
> +	}
>  
> +	kref_get(&dax_region->kref);
> +	if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
> +		return NULL;
>  	return dax_region;
>  }
>  EXPORT_SYMBOL_GPL(alloc_dax_region);

[...]

> @@ -568,28 +654,42 @@ struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region,
>  	struct cdev *cdev;
>  	dev_t dev_t;
>  
> -	dax_dev = kzalloc(sizeof(*dax_dev) + sizeof(*res) * count, GFP_KERNEL);
> +	dax_dev = kzalloc(sizeof(*dax_dev), GFP_KERNEL);
>  	if (!dax_dev)
>  		return ERR_PTR(-ENOMEM);
>  
> +	dax_dev->res = kzalloc(sizeof(res) * count, GFP_KERNEL);

	dax_dev->res = kcalloc(sizeof(res), count, GFP_KERNEL); ?

> +	if (!dax_dev->res)
> +		goto err_res;
> +

Byte,
	Johannes
Dan Williams Dec. 14, 2016, 3:53 p.m. UTC | #2
On Wed, Dec 14, 2016 at 6:38 AM, Johannes Thumshirn <jthumshirn@suse.de> wrote:
> Hi Dan,
>
> On Sat, Dec 10, 2016 at 10:28:30PM -0800, Dan Williams wrote:
>> In preparation for a facility that enables dax regions to be
>> sub-divided, introduce a 'dax/available_size' attribute.  This attribute
>> appears under the parent device that registered the device-dax region,
>> and it assumes that the device-dax-core owns the driver-data for that
>> device.
>>
>> 'dax/available_size' adjusts dynamically as dax-device instances are
>> registered and unregistered.
>>
>> As a side effect of using __request_region() to reserve capacity from
>> the dax_region we now track pointers to those returned resources rather
>> than duplicating the passed in resource array.
>>
>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>> ---
>
> [...]
>
>> +static const struct attribute_group *dax_region_attribute_groups[] = {
>> +     &dax_region_attribute_group,
>> +     NULL,
>>  };
>>
>>  static struct inode *dax_alloc_inode(struct super_block *sb)
>> @@ -200,12 +251,27 @@ void dax_region_put(struct dax_region *dax_region)
>>  }
>>  EXPORT_SYMBOL_GPL(dax_region_put);
>>
>> +
>
> Stray extra newline?
>
> [...]
>
>>  struct dax_region *alloc_dax_region(struct device *parent, int region_id,
>>               struct resource *res, unsigned int align, void *addr,
>>               unsigned long pfn_flags)
>>  {
>>       struct dax_region *dax_region;
>>
>> +     if (dev_get_drvdata(parent)) {
>> +             dev_WARN(parent, "dax core found drvdata already in use\n");
>> +             return NULL;
>> +     }
>> +
>
> My first thought was, it might be interesting to see who already claimed
> the drvdata. Then I figured, how are multiple sub-regions of a dax-device
> supposed to work? What am I missing here?

This is a check similar to the -EBUSY return you would get from
request_mem_region(). In fact if all dax drivers are correctly calling
request_mem_region() before alloc_dax_region() then it would be
impossible for this check to ever fire. It's already impossible
because there's only one dax driver upstream (dax_pmem). It's not
really benefiting the kernel at all until we have multiple dax
drivers, I'll remove it.
Dan Williams Dec. 15, 2016, 6:47 a.m. UTC | #3
On Wed, Dec 14, 2016 at 7:53 AM, Dan Williams <dan.j.williams@intel.com> wrote:
> On Wed, Dec 14, 2016 at 6:38 AM, Johannes Thumshirn <jthumshirn@suse.de> wrote:
>> Hi Dan,
>>
>> On Sat, Dec 10, 2016 at 10:28:30PM -0800, Dan Williams wrote:
>>> In preparation for a facility that enables dax regions to be
>>> sub-divided, introduce a 'dax/available_size' attribute.  This attribute
>>> appears under the parent device that registered the device-dax region,
>>> and it assumes that the device-dax-core owns the driver-data for that
>>> device.
>>>
>>> 'dax/available_size' adjusts dynamically as dax-device instances are
>>> registered and unregistered.
>>>
>>> As a side effect of using __request_region() to reserve capacity from
>>> the dax_region we now track pointers to those returned resources rather
>>> than duplicating the passed in resource array.
>>>
>>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>>> ---
>>
>> [...]
>>
>>> +static const struct attribute_group *dax_region_attribute_groups[] = {
>>> +     &dax_region_attribute_group,
>>> +     NULL,
>>>  };
>>>
>>>  static struct inode *dax_alloc_inode(struct super_block *sb)
>>> @@ -200,12 +251,27 @@ void dax_region_put(struct dax_region *dax_region)
>>>  }
>>>  EXPORT_SYMBOL_GPL(dax_region_put);
>>>
>>> +
>>
>> Stray extra newline?
>>
>> [...]
>>
>>>  struct dax_region *alloc_dax_region(struct device *parent, int region_id,
>>>               struct resource *res, unsigned int align, void *addr,
>>>               unsigned long pfn_flags)
>>>  {
>>>       struct dax_region *dax_region;
>>>
>>> +     if (dev_get_drvdata(parent)) {
>>> +             dev_WARN(parent, "dax core found drvdata already in use\n");
>>> +             return NULL;
>>> +     }
>>> +
>>
>> My first thought was, it might be interesting to see who already claimed
>> the drvdata. Then I figured, how are multiple sub-regions of a dax-device
>> supposed to work? What am I missing here?
>
> This is a check similar to the -EBUSY return you would get from
> request_mem_region(). In fact if all dax drivers are correctly calling
> request_mem_region() before alloc_dax_region() then it would be
> impossible for this check to ever fire. It's already impossible
> because there's only one dax driver upstream (dax_pmem). It's not
> really benefiting the kernel at all until we have multiple dax
> drivers, I'll remove it.

No, I went to go delete this and remembered the real reason this was
added. A device driver that calls alloc_dax_region() commits to
letting the dax core own dev->driver_data. Since this wasn't even
clear to me, I'll go fix up the comment.
diff mbox

Patch

diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index 3d94ff20fdca..cc7c4aa4bcc2 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -38,6 +38,7 @@  MODULE_PARM_DESC(nr_dax, "max number of device-dax instances");
  * @id: kernel-wide unique region for a memory range
  * @base: linear address corresponding to @res
  * @kref: to pin while other agents have a need to do lookups
+ * @lock: synchronize changes / consistent-access to the resource tree (@res)
  * @dev: parent device backing this region
  * @align: allocation and mapping alignment for child dax devices
  * @res: physical address range of the region
@@ -48,6 +49,7 @@  struct dax_region {
 	struct ida ida;
 	void *base;
 	struct kref kref;
+	struct mutex lock;
 	struct device *dev;
 	unsigned int align;
 	struct resource res;
@@ -72,7 +74,56 @@  struct dax_dev {
 	bool alive;
 	int id;
 	int num_resources;
-	struct resource res[0];
+	struct resource **res;
+};
+
+#define for_each_dax_region_resource(dax_region, res) \
+	for (res = (dax_region)->res.child; res; res = res->sibling)
+
+static unsigned long long dax_region_avail_size(
+		struct dax_region *dax_region)
+{
+	unsigned long long size;
+	struct resource *res;
+
+	mutex_lock(&dax_region->lock);
+	size = resource_size(&dax_region->res);
+	for_each_dax_region_resource(dax_region, res)
+		size -= resource_size(res);
+	mutex_unlock(&dax_region->lock);
+
+	return size;
+}
+
+static ssize_t available_size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct dax_region *dax_region;
+	ssize_t rc = -ENXIO;
+
+	device_lock(dev);
+	dax_region = dev_get_drvdata(dev);
+	if (dax_region)
+		rc = sprintf(buf, "%llu\n", dax_region_avail_size(dax_region));
+	device_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RO(available_size);
+
+static struct attribute *dax_region_attributes[] = {
+	&dev_attr_available_size.attr,
+	NULL,
+};
+
+static const struct attribute_group dax_region_attribute_group = {
+	.name = "dax_region",
+	.attrs = dax_region_attributes,
+};
+
+static const struct attribute_group *dax_region_attribute_groups[] = {
+	&dax_region_attribute_group,
+	NULL,
 };
 
 static struct inode *dax_alloc_inode(struct super_block *sb)
@@ -200,12 +251,27 @@  void dax_region_put(struct dax_region *dax_region)
 }
 EXPORT_SYMBOL_GPL(dax_region_put);
 
+
+static void dax_region_unregister(void *region)
+{
+	struct dax_region *dax_region = region;
+
+	sysfs_remove_groups(&dax_region->dev->kobj,
+			dax_region_attribute_groups);
+	dax_region_put(dax_region);
+}
+
 struct dax_region *alloc_dax_region(struct device *parent, int region_id,
 		struct resource *res, unsigned int align, void *addr,
 		unsigned long pfn_flags)
 {
 	struct dax_region *dax_region;
 
+	if (dev_get_drvdata(parent)) {
+		dev_WARN(parent, "dax core found drvdata already in use\n");
+		return NULL;
+	}
+
 	if (!IS_ALIGNED(res->start, align)
 			|| !IS_ALIGNED(resource_size(res), align))
 		return NULL;
@@ -213,16 +279,26 @@  struct dax_region *alloc_dax_region(struct device *parent, int region_id,
 	dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
 	if (!dax_region)
 		return NULL;
-
-	memcpy(&dax_region->res, res, sizeof(*res));
+	dev_set_drvdata(parent, dax_region);
+	dax_region->res.name = dev_name(parent);
+	dax_region->res.start = res->start;
+	dax_region->res.end = res->end;
 	dax_region->pfn_flags = pfn_flags;
+	mutex_init(&dax_region->lock);
 	kref_init(&dax_region->kref);
 	dax_region->id = region_id;
 	ida_init(&dax_region->ida);
 	dax_region->align = align;
 	dax_region->dev = parent;
 	dax_region->base = addr;
+	if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
+		kfree(dax_region);
+		return NULL;;
+	}
 
+	kref_get(&dax_region->kref);
+	if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region))
+		return NULL;
 	return dax_region;
 }
 EXPORT_SYMBOL_GPL(alloc_dax_region);
@@ -240,7 +316,7 @@  static ssize_t size_show(struct device *dev,
 	int i;
 
 	for (i = 0; i < dax_dev->num_resources; i++)
-		size += resource_size(&dax_dev->res[i]);
+		size += resource_size(dax_dev->res[i]);
 
 	return sprintf(buf, "%llu\n", size);
 }
@@ -309,7 +385,7 @@  static phys_addr_t pgoff_to_phys(struct dax_dev *dax_dev, pgoff_t pgoff,
 	int i;
 
 	for (i = 0; i < dax_dev->num_resources; i++) {
-		res = &dax_dev->res[i];
+		res = dax_dev->res[i];
 		phys = pgoff * PAGE_SIZE + res->start;
 		if (phys >= res->start && phys <= res->end)
 			break;
@@ -317,7 +393,7 @@  static phys_addr_t pgoff_to_phys(struct dax_dev *dax_dev, pgoff_t pgoff,
 	}
 
 	if (i < dax_dev->num_resources) {
-		res = &dax_dev->res[i];
+		res = dax_dev->res[i];
 		if (phys + size - 1 <= res->end)
 			return phys;
 	}
@@ -534,13 +610,16 @@  static void dax_dev_release(struct device *dev)
 	ida_simple_remove(&dax_minor_ida, MINOR(dev->devt));
 	dax_region_put(dax_region);
 	iput(dax_dev->inode);
+	kfree(dax_dev->res);
 	kfree(dax_dev);
 }
 
 static void unregister_dax_dev(void *dev)
 {
 	struct dax_dev *dax_dev = to_dax_dev(dev);
+	struct dax_region *dax_region = dax_dev->region;
 	struct cdev *cdev = &dax_dev->cdev;
+	int i;
 
 	dev_dbg(dev, "%s\n", __func__);
 
@@ -554,6 +633,13 @@  static void unregister_dax_dev(void *dev)
 	dax_dev->alive = false;
 	synchronize_rcu();
 	unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1);
+
+	mutex_lock(&dax_region->lock);
+	for (i = 0; i < dax_dev->num_resources; i++)
+		__release_region(&dax_region->res, dax_dev->res[i]->start,
+				resource_size(dax_dev->res[i]));
+	mutex_unlock(&dax_region->lock);
+
 	cdev_del(cdev);
 	device_unregister(dev);
 }
@@ -568,28 +654,42 @@  struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region,
 	struct cdev *cdev;
 	dev_t dev_t;
 
-	dax_dev = kzalloc(sizeof(*dax_dev) + sizeof(*res) * count, GFP_KERNEL);
+	dax_dev = kzalloc(sizeof(*dax_dev), GFP_KERNEL);
 	if (!dax_dev)
 		return ERR_PTR(-ENOMEM);
 
+	dax_dev->res = kzalloc(sizeof(res) * count, GFP_KERNEL);
+	if (!dax_dev->res)
+		goto err_res;
+
 	for (i = 0; i < count; i++) {
+		struct resource *dax_res;
+
 		if (!IS_ALIGNED(res[i].start, dax_region->align)
 				|| !IS_ALIGNED(resource_size(&res[i]),
 					dax_region->align)) {
 			rc = -EINVAL;
 			break;
 		}
-		dax_dev->res[i].start = res[i].start;
-		dax_dev->res[i].end = res[i].end;
+
+		mutex_lock(&dax_region->lock);
+		dax_res = __request_region(&dax_region->res, res[i].start,
+				resource_size(&res[i]), NULL, 0);
+		mutex_unlock(&dax_region->lock);
+		if (!dax_res) {
+			rc = -EBUSY;
+			break;
+		}
+		dax_dev->res[i] = dax_res;
 	}
 
 	if (i < count)
-		goto err_id;
+		goto err_request_region;
 
 	dax_dev->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL);
 	if (dax_dev->id < 0) {
 		rc = dax_dev->id;
-		goto err_id;
+		goto err_request_region;
 	}
 
 	minor = ida_simple_get(&dax_minor_ida, 0, 0, GFP_KERNEL);
@@ -629,6 +729,10 @@  struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region,
 	dev->groups = dax_attribute_groups;
 	dev->release = dax_dev_release;
 	dev_set_name(dev, "dax%d.%d", dax_region->id, dax_dev->id);
+	/* update resource names now that the owner device is named */
+	for (i = 0; i < dax_dev->num_resources; i++)
+		dax_dev->res[i]->name = dev_name(dev);
+
 	rc = device_add(dev);
 	if (rc) {
 		put_device(dev);
@@ -647,7 +751,14 @@  struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region,
 	ida_simple_remove(&dax_minor_ida, minor);
  err_minor:
 	ida_simple_remove(&dax_region->ida, dax_dev->id);
- err_id:
+ err_request_region:
+	mutex_lock(&dax_region->lock);
+	for (i--; i >= 0; i--)
+		__release_region(&dax_region->res, dax_dev->res[i]->start,
+				resource_size(dax_dev->res[i]));
+	mutex_unlock(&dax_region->lock);
+	kfree(dax_dev->res);
+ err_res:
 	kfree(dax_dev);
 
 	return ERR_PTR(rc);