diff mbox series

[01/19] char_dev: replace cdev_map with an xarray

Message ID 20200826062446.31860-2-hch@lst.de (mailing list archive)
State New, archived
Headers show
Series [01/19] char_dev: replace cdev_map with an xarray | expand

Commit Message

Christoph Hellwig Aug. 26, 2020, 6:24 a.m. UTC
None of the complicated overlapping regions bits of the kobj_map are
required for the character device lookup, so just a trivial xarray
instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/char_dev.c | 94 +++++++++++++++++++++++++--------------------------
 fs/dcache.c   |  1 -
 fs/internal.h |  5 ---
 3 files changed, 46 insertions(+), 54 deletions(-)

Comments

Greg Kroah-Hartman Aug. 26, 2020, 8:19 a.m. UTC | #1
On Wed, Aug 26, 2020 at 08:24:28AM +0200, Christoph Hellwig wrote:
> None of the complicated overlapping regions bits of the kobj_map are
> required for the character device lookup, so just a trivial xarray
> instead.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Really?  This is ok to use and just as fast?  If so, wonderful, it would
be great to clean up kobj_map users.

But I don't think you got all of the needed bits here:

> ---
>  fs/char_dev.c | 94 +++++++++++++++++++++++++--------------------------
>  fs/dcache.c   |  1 -
>  fs/internal.h |  5 ---
>  3 files changed, 46 insertions(+), 54 deletions(-)
> 
> diff --git a/fs/char_dev.c b/fs/char_dev.c
> index ba0ded7842a779..6c4d6c4938f14b 100644
> --- a/fs/char_dev.c
> +++ b/fs/char_dev.c
> @@ -17,7 +17,6 @@
>  #include <linux/seq_file.h>
>  
>  #include <linux/kobject.h>
> -#include <linux/kobj_map.h>
>  #include <linux/cdev.h>
>  #include <linux/mutex.h>
>  #include <linux/backing-dev.h>
> @@ -25,8 +24,7 @@
>  
>  #include "internal.h"
>  
> -static struct kobj_map *cdev_map;
> -
> +static DEFINE_XARRAY(cdev_map);
>  static DEFINE_MUTEX(chrdevs_lock);
>  
>  #define CHRDEV_MAJOR_HASH_SIZE 255
> @@ -367,6 +365,29 @@ void cdev_put(struct cdev *p)
>  	}
>  }
>  
> +static struct cdev *cdev_lookup(dev_t dev)
> +{
> +	struct cdev *cdev;
> +
> +retry:
> +	mutex_lock(&chrdevs_lock);
> +	cdev = xa_load(&cdev_map, dev);
> +	if (!cdev) {
> +		mutex_unlock(&chrdevs_lock);
> +
> +		if (request_module("char-major-%d-%d",
> +				   MAJOR(dev), MINOR(dev)) > 0)
> +			/* Make old-style 2.4 aliases work */
> +			request_module("char-major-%d", MAJOR(dev));
> +		goto retry;
> +	}
> +
> +	if (!cdev_get(cdev))
> +		cdev = NULL;
> +	mutex_unlock(&chrdevs_lock);
> +	return cdev;
> +}
> +
>  /*
>   * Called every time a character special file is opened
>   */
> @@ -380,13 +401,10 @@ static int chrdev_open(struct inode *inode, struct file *filp)
>  	spin_lock(&cdev_lock);
>  	p = inode->i_cdev;
>  	if (!p) {
> -		struct kobject *kobj;
> -		int idx;
>  		spin_unlock(&cdev_lock);
> -		kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
> -		if (!kobj)
> +		new = cdev_lookup(inode->i_rdev);
> +		if (!new)
>  			return -ENXIO;
> -		new = container_of(kobj, struct cdev, kobj);
>  		spin_lock(&cdev_lock);
>  		/* Check i_cdev again in case somebody beat us to it while
>  		   we dropped the lock. */
> @@ -454,18 +472,6 @@ const struct file_operations def_chr_fops = {
>  	.llseek = noop_llseek,
>  };
>  
> -static struct kobject *exact_match(dev_t dev, int *part, void *data)
> -{
> -	struct cdev *p = data;
> -	return &p->kobj;
> -}
> -
> -static int exact_lock(dev_t dev, void *data)
> -{
> -	struct cdev *p = data;
> -	return cdev_get(p) ? 0 : -1;
> -}
> -
>  /**
>   * cdev_add() - add a char device to the system
>   * @p: the cdev structure for the device
> @@ -478,7 +484,7 @@ static int exact_lock(dev_t dev, void *data)
>   */
>  int cdev_add(struct cdev *p, dev_t dev, unsigned count)
>  {
> -	int error;
> +	int error, i;
>  
>  	p->dev = dev;
>  	p->count = count;
> @@ -486,14 +492,22 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count)
>  	if (WARN_ON(dev == WHITEOUT_DEV))
>  		return -EBUSY;
>  
> -	error = kobj_map(cdev_map, dev, count, NULL,
> -			 exact_match, exact_lock, p);
> -	if (error)
> -		return error;
> +	mutex_lock(&chrdevs_lock);
> +	for (i = 0; i < count; i++) {
> +		error = xa_insert(&cdev_map, dev + i, p, GFP_KERNEL);
> +		if (error)
> +			goto out_unwind;
> +	}
> +	mutex_unlock(&chrdevs_lock);
>  
>  	kobject_get(p->kobj.parent);

Can't you drop this kobject_get() too?

And also the "struct kobj" in struct cdev can be gone as well, as the
kobj_map was the only "real" user of this structure.  I know some
drivers liked to touch that field as well, but it never actually did
anything for them, so it was pointless, but it will take some 'make
allmodconfig' builds to flush them out.

thanks,

greg k-h
Hannes Reinecke Aug. 27, 2020, 7:25 a.m. UTC | #2
On 8/26/20 8:24 AM, Christoph Hellwig wrote:
> None of the complicated overlapping regions bits of the kobj_map are
> required for the character device lookup, so just a trivial xarray
> instead.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/char_dev.c | 94 +++++++++++++++++++++++++--------------------------
>  fs/dcache.c   |  1 -
>  fs/internal.h |  5 ---
>  3 files changed, 46 insertions(+), 54 deletions(-)
> 
> diff --git a/fs/char_dev.c b/fs/char_dev.c
> index ba0ded7842a779..6c4d6c4938f14b 100644
> --- a/fs/char_dev.c
> +++ b/fs/char_dev.c
> @@ -17,7 +17,6 @@
>  #include <linux/seq_file.h>
>  
>  #include <linux/kobject.h>
> -#include <linux/kobj_map.h>
>  #include <linux/cdev.h>
>  #include <linux/mutex.h>
>  #include <linux/backing-dev.h>
> @@ -25,8 +24,7 @@
>  
>  #include "internal.h"
>  
> -static struct kobj_map *cdev_map;
> -
> +static DEFINE_XARRAY(cdev_map);
>  static DEFINE_MUTEX(chrdevs_lock);
>  
>  #define CHRDEV_MAJOR_HASH_SIZE 255
> @@ -367,6 +365,29 @@ void cdev_put(struct cdev *p)
>  	}
>  }
>  
> +static struct cdev *cdev_lookup(dev_t dev)
> +{
> +	struct cdev *cdev;
> +
> +retry:
> +	mutex_lock(&chrdevs_lock);
> +	cdev = xa_load(&cdev_map, dev);
> +	if (!cdev) {
> +		mutex_unlock(&chrdevs_lock);
> +
> +		if (request_module("char-major-%d-%d",
> +				   MAJOR(dev), MINOR(dev)) > 0)
> +			/* Make old-style 2.4 aliases work */
> +			request_module("char-major-%d", MAJOR(dev));
> +		goto retry;
> +	}
> +
> +	if (!cdev_get(cdev))
> +		cdev = NULL;
> +	mutex_unlock(&chrdevs_lock);
> +	return cdev;
> +}
> +
>  /*
>   * Called every time a character special file is opened
>   */
> @@ -380,13 +401,10 @@ static int chrdev_open(struct inode *inode, struct file *filp)
>  	spin_lock(&cdev_lock);
>  	p = inode->i_cdev;
>  	if (!p) {
> -		struct kobject *kobj;
> -		int idx;
>  		spin_unlock(&cdev_lock);
> -		kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
> -		if (!kobj)
> +		new = cdev_lookup(inode->i_rdev);
> +		if (!new)
>  			return -ENXIO;
> -		new = container_of(kobj, struct cdev, kobj);
>  		spin_lock(&cdev_lock);
>  		/* Check i_cdev again in case somebody beat us to it while
>  		   we dropped the lock. */
> @@ -454,18 +472,6 @@ const struct file_operations def_chr_fops = {
>  	.llseek = noop_llseek,
>  };
>  
> -static struct kobject *exact_match(dev_t dev, int *part, void *data)
> -{
> -	struct cdev *p = data;
> -	return &p->kobj;
> -}
> -
> -static int exact_lock(dev_t dev, void *data)
> -{
> -	struct cdev *p = data;
> -	return cdev_get(p) ? 0 : -1;
> -}
> -
>  /**
>   * cdev_add() - add a char device to the system
>   * @p: the cdev structure for the device
> @@ -478,7 +484,7 @@ static int exact_lock(dev_t dev, void *data)
>   */
>  int cdev_add(struct cdev *p, dev_t dev, unsigned count)
>  {
> -	int error;
> +	int error, i;
>  
>  	p->dev = dev;
>  	p->count = count;
> @@ -486,14 +492,22 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count)
>  	if (WARN_ON(dev == WHITEOUT_DEV))
>  		return -EBUSY;
>  
> -	error = kobj_map(cdev_map, dev, count, NULL,
> -			 exact_match, exact_lock, p);
> -	if (error)
> -		return error;
> +	mutex_lock(&chrdevs_lock);
> +	for (i = 0; i < count; i++) {
> +		error = xa_insert(&cdev_map, dev + i, p, GFP_KERNEL);
> +		if (error)
> +			goto out_unwind;
> +	}
> +	mutex_unlock(&chrdevs_lock);
>  
>  	kobject_get(p->kobj.parent);
> -
>  	return 0;
> +
> +out_unwind:
> +	while (--i >= 0)
> +		xa_erase(&cdev_map, dev + i);
> +	mutex_unlock(&chrdevs_lock);
> +	return error;
>  }
>  
>  /**
Do you really need the mutex?
Wouldn't xa_store_range() be better and avoid the mutex?

Cheers,

Hannes
Christoph Hellwig Aug. 27, 2020, 8:53 a.m. UTC | #3
On Wed, Aug 26, 2020 at 10:19:05AM +0200, Greg Kroah-Hartman wrote:
> On Wed, Aug 26, 2020 at 08:24:28AM +0200, Christoph Hellwig wrote:
> > None of the complicated overlapping regions bits of the kobj_map are
> > required for the character device lookup, so just a trivial xarray
> > instead.
> > 
> > Signed-off-by: Christoph Hellwig <hch@lst.de>
> 
> Really?  This is ok to use and just as fast?  If so, wonderful, it would
> be great to clean up kobj_map users.

Xarrays provide pretty efficient as long as we have a unsigned long
or smaller index (check, dev_t is small) and the indices are reasonable
clustered (check, minors for the same major).  Memory usage will go down
vs the probes, and lookup speed up.

> > +	mutex_lock(&chrdevs_lock);
> > +	for (i = 0; i < count; i++) {
> > +		error = xa_insert(&cdev_map, dev + i, p, GFP_KERNEL);
> > +		if (error)
> > +			goto out_unwind;
> > +	}
> > +	mutex_unlock(&chrdevs_lock);
> >  
> >  	kobject_get(p->kobj.parent);
> 
> Can't you drop this kobject_get() too?

I'll have to drop it or add back the put on the delete side.  And
I think the latter is safer for now, because..

> 
> And also the "struct kobj" in struct cdev can be gone as well, as the
> kobj_map was the only "real" user of this structure.  I know some
> drivers liked to touch that field as well, but it never actually did
> anything for them, so it was pointless, but it will take some 'make
> allmodconfig' builds to flush them out.

I looked at it, but it does get registered and shows up in sysfs.
I don't really dare to touch this for now, as it can have huge
implications.  Better done in a separate series (if we can actually do
it at all).
Christoph Hellwig Aug. 27, 2020, 8:55 a.m. UTC | #4
On Thu, Aug 27, 2020 at 09:25:07AM +0200, Hannes Reinecke wrote:
> Do you really need the mutex?
> Wouldn't xa_store_range() be better and avoid the mutex?

We need the mutex as we need to grab the kobject reference under it.

xa_store_range is only available with a separate config option, and
has really strange calling conventions.  So I'd rather not pull it in
here, especially as most cdev_add callers are for a single minor only
anyway.
Greg Kroah-Hartman Aug. 27, 2020, 9:18 a.m. UTC | #5
On Thu, Aug 27, 2020 at 10:53:53AM +0200, Christoph Hellwig wrote:
> On Wed, Aug 26, 2020 at 10:19:05AM +0200, Greg Kroah-Hartman wrote:
> > On Wed, Aug 26, 2020 at 08:24:28AM +0200, Christoph Hellwig wrote:
> > > None of the complicated overlapping regions bits of the kobj_map are
> > > required for the character device lookup, so just a trivial xarray
> > > instead.
> > > 
> > > Signed-off-by: Christoph Hellwig <hch@lst.de>
> > 
> > Really?  This is ok to use and just as fast?  If so, wonderful, it would
> > be great to clean up kobj_map users.
> 
> Xarrays provide pretty efficient as long as we have a unsigned long
> or smaller index (check, dev_t is small) and the indices are reasonable
> clustered (check, minors for the same major).  Memory usage will go down
> vs the probes, and lookup speed up.

Ok, great!

xarrays weren't around when this code was written (back in the 2.5
days).

> > > +	mutex_lock(&chrdevs_lock);
> > > +	for (i = 0; i < count; i++) {
> > > +		error = xa_insert(&cdev_map, dev + i, p, GFP_KERNEL);
> > > +		if (error)
> > > +			goto out_unwind;
> > > +	}
> > > +	mutex_unlock(&chrdevs_lock);
> > >  
> > >  	kobject_get(p->kobj.parent);
> > 
> > Can't you drop this kobject_get() too?
> 
> I'll have to drop it or add back the put on the delete side.  And
> I think the latter is safer for now, because..
> 
> > 
> > And also the "struct kobj" in struct cdev can be gone as well, as the
> > kobj_map was the only "real" user of this structure.  I know some
> > drivers liked to touch that field as well, but it never actually did
> > anything for them, so it was pointless, but it will take some 'make
> > allmodconfig' builds to flush them out.
> 
> I looked at it, but it does get registered and shows up in sysfs.

It does?  Where does that happen?  I see a bunch of kobject_init()
calls, but nothing that registers it in sysfs that I can see.

Note, this is not the kobject that shows up in /sys/dev/char/ as a
symlink, that comes from the driver core logic and is independent of the
cdev code.

The kobject does handle the structure lifetime rules, but that should be
able to be replaced with a simple kref instead.

> I don't really dare to touch this for now, as it can have huge
> implications.  Better done in a separate series (if we can actually do
> it at all).

Fair enough, I will be willing to tackle that once this gets merged, so
this is fine as-is.

thanks,

greg k-h
Christoph Hellwig Aug. 27, 2020, 9:39 a.m. UTC | #6
On Thu, Aug 27, 2020 at 11:18:59AM +0200, Greg Kroah-Hartman wrote:
> > I looked at it, but it does get registered and shows up in sysfs.
> 
> It does?  Where does that happen?  I see a bunch of kobject_init()
> calls, but nothing that registers it in sysfs that I can see.

Hmm, true.

> 
> Note, this is not the kobject that shows up in /sys/dev/char/ as a
> symlink, that comes from the driver core logic and is independent of the
> cdev code.
> 
> The kobject does handle the structure lifetime rules, but that should be
> able to be replaced with a simple kref instead.

Yeah.  I'll let you handle this stuff, as you obviously know the area
better than I do.
diff mbox series

Patch

diff --git a/fs/char_dev.c b/fs/char_dev.c
index ba0ded7842a779..6c4d6c4938f14b 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -17,7 +17,6 @@ 
 #include <linux/seq_file.h>
 
 #include <linux/kobject.h>
-#include <linux/kobj_map.h>
 #include <linux/cdev.h>
 #include <linux/mutex.h>
 #include <linux/backing-dev.h>
@@ -25,8 +24,7 @@ 
 
 #include "internal.h"
 
-static struct kobj_map *cdev_map;
-
+static DEFINE_XARRAY(cdev_map);
 static DEFINE_MUTEX(chrdevs_lock);
 
 #define CHRDEV_MAJOR_HASH_SIZE 255
@@ -367,6 +365,29 @@  void cdev_put(struct cdev *p)
 	}
 }
 
+static struct cdev *cdev_lookup(dev_t dev)
+{
+	struct cdev *cdev;
+
+retry:
+	mutex_lock(&chrdevs_lock);
+	cdev = xa_load(&cdev_map, dev);
+	if (!cdev) {
+		mutex_unlock(&chrdevs_lock);
+
+		if (request_module("char-major-%d-%d",
+				   MAJOR(dev), MINOR(dev)) > 0)
+			/* Make old-style 2.4 aliases work */
+			request_module("char-major-%d", MAJOR(dev));
+		goto retry;
+	}
+
+	if (!cdev_get(cdev))
+		cdev = NULL;
+	mutex_unlock(&chrdevs_lock);
+	return cdev;
+}
+
 /*
  * Called every time a character special file is opened
  */
@@ -380,13 +401,10 @@  static int chrdev_open(struct inode *inode, struct file *filp)
 	spin_lock(&cdev_lock);
 	p = inode->i_cdev;
 	if (!p) {
-		struct kobject *kobj;
-		int idx;
 		spin_unlock(&cdev_lock);
-		kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
-		if (!kobj)
+		new = cdev_lookup(inode->i_rdev);
+		if (!new)
 			return -ENXIO;
-		new = container_of(kobj, struct cdev, kobj);
 		spin_lock(&cdev_lock);
 		/* Check i_cdev again in case somebody beat us to it while
 		   we dropped the lock. */
@@ -454,18 +472,6 @@  const struct file_operations def_chr_fops = {
 	.llseek = noop_llseek,
 };
 
-static struct kobject *exact_match(dev_t dev, int *part, void *data)
-{
-	struct cdev *p = data;
-	return &p->kobj;
-}
-
-static int exact_lock(dev_t dev, void *data)
-{
-	struct cdev *p = data;
-	return cdev_get(p) ? 0 : -1;
-}
-
 /**
  * cdev_add() - add a char device to the system
  * @p: the cdev structure for the device
@@ -478,7 +484,7 @@  static int exact_lock(dev_t dev, void *data)
  */
 int cdev_add(struct cdev *p, dev_t dev, unsigned count)
 {
-	int error;
+	int error, i;
 
 	p->dev = dev;
 	p->count = count;
@@ -486,14 +492,22 @@  int cdev_add(struct cdev *p, dev_t dev, unsigned count)
 	if (WARN_ON(dev == WHITEOUT_DEV))
 		return -EBUSY;
 
-	error = kobj_map(cdev_map, dev, count, NULL,
-			 exact_match, exact_lock, p);
-	if (error)
-		return error;
+	mutex_lock(&chrdevs_lock);
+	for (i = 0; i < count; i++) {
+		error = xa_insert(&cdev_map, dev + i, p, GFP_KERNEL);
+		if (error)
+			goto out_unwind;
+	}
+	mutex_unlock(&chrdevs_lock);
 
 	kobject_get(p->kobj.parent);
-
 	return 0;
+
+out_unwind:
+	while (--i >= 0)
+		xa_erase(&cdev_map, dev + i);
+	mutex_unlock(&chrdevs_lock);
+	return error;
 }
 
 /**
@@ -575,11 +589,6 @@  void cdev_device_del(struct cdev *cdev, struct device *dev)
 		cdev_del(cdev);
 }
 
-static void cdev_unmap(dev_t dev, unsigned count)
-{
-	kobj_unmap(cdev_map, dev, count);
-}
-
 /**
  * cdev_del() - remove a cdev from the system
  * @p: the cdev structure to be removed
@@ -593,10 +602,13 @@  static void cdev_unmap(dev_t dev, unsigned count)
  */
 void cdev_del(struct cdev *p)
 {
-	cdev_unmap(p->dev, p->count);
-	kobject_put(&p->kobj);
-}
+	int i;
 
+	mutex_lock(&chrdevs_lock);
+	for (i = 0; i < p->count; i++)
+		xa_erase(&cdev_map, p->dev + i);
+	mutex_unlock(&chrdevs_lock);
+}
 
 static void cdev_default_release(struct kobject *kobj)
 {
@@ -656,20 +668,6 @@  void cdev_init(struct cdev *cdev, const struct file_operations *fops)
 	cdev->ops = fops;
 }
 
-static struct kobject *base_probe(dev_t dev, int *part, void *data)
-{
-	if (request_module("char-major-%d-%d", MAJOR(dev), MINOR(dev)) > 0)
-		/* Make old-style 2.4 aliases work */
-		request_module("char-major-%d", MAJOR(dev));
-	return NULL;
-}
-
-void __init chrdev_init(void)
-{
-	cdev_map = kobj_map_init(base_probe, &chrdevs_lock);
-}
-
-
 /* Let modules do char dev stuff */
 EXPORT_SYMBOL(register_chrdev_region);
 EXPORT_SYMBOL(unregister_chrdev_region);
diff --git a/fs/dcache.c b/fs/dcache.c
index ea0485861d9377..55e534ad6f8f7f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3233,5 +3233,4 @@  void __init vfs_caches_init(void)
 	files_maxfiles_init();
 	mnt_init();
 	bdev_cache_init();
-	chrdev_init();
 }
diff --git a/fs/internal.h b/fs/internal.h
index 10517ece45167f..110e952e75a8aa 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -54,11 +54,6 @@  static inline void bd_forget(struct inode *inode)
 extern int __block_write_begin_int(struct page *page, loff_t pos, unsigned len,
 		get_block_t *get_block, struct iomap *iomap);
 
-/*
- * char_dev.c
- */
-extern void __init chrdev_init(void);
-
 /*
  * fs_context.c
  */