diff mbox series

[v2] mm: cma: support sysfs

Message ID 20210208180142.2765456-1-minchan@kernel.org (mailing list archive)
State New, archived
Headers show
Series [v2] mm: cma: support sysfs | expand

Commit Message

Minchan Kim Feb. 8, 2021, 6:01 p.m. UTC
Since CMA is getting used more widely, it's more important to
keep monitoring CMA statistics for system health since it's
directly related to user experience.

This patch introduces sysfs for the CMA and exposes stats below
to keep monitor for telemetric in the system.

 * the number of CMA page allocation attempts
 * the number of CMA page allocation failures

With those per-CMA statistics, we could know how CMA allocadtion
failure rate for each usecases.

e.g.)
  /sys/kernel/mm/cma/WIFI/cma_alloc_pages_[attempt|fail]
  /sys/kernel/mm/cma/SENSOR/cma_alloc_pages_[attempt|fail]
  /sys/kernel/mm/cma/BLUETOOTH/cma_alloc_pages_[attempt|fail]

Signed-off-by: Minchan Kim <minchan@kernel.org>
---

From v1 - https://lore.kernel.org/linux-mm/20210203155001.4121868-1-minchan@kernel.org/
 * fix sysfs build and refactoring - willy
 * rename and drop some attributes - jhubbard

 Documentation/ABI/testing/sysfs-kernel-mm-cma |  25 ++++
 mm/Kconfig                                    |   7 ++
 mm/Makefile                                   |   1 +
 mm/cma.c                                      |   6 +-
 mm/cma.h                                      |  18 +++
 mm/cma_sysfs.c                                | 114 ++++++++++++++++++
 6 files changed, 170 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/ABI/testing/sysfs-kernel-mm-cma
 create mode 100644 mm/cma_sysfs.c

Comments

John Hubbard Feb. 8, 2021, 9:34 p.m. UTC | #1
On 2/8/21 10:01 AM, Minchan Kim wrote:
> Since CMA is getting used more widely, it's more important to
> keep monitoring CMA statistics for system health since it's
> directly related to user experience.
> 
> This patch introduces sysfs for the CMA and exposes stats below
> to keep monitor for telemetric in the system.

Or:

This patch introduces sysfs statistics for CMA, in order to provide
some basic monitoring of the CMA allocator.

> 
>   * the number of CMA page allocation attempts
>   * the number of CMA page allocation failures
> 
> With those per-CMA statistics, we could know how CMA allocadtion
> failure rate for each usecases.

Maybe:

These two values allow the user to calcuate the allocation
failure rate for each CMA area.

> 
> e.g.)
>    /sys/kernel/mm/cma/WIFI/cma_alloc_pages_[attempt|fail]
>    /sys/kernel/mm/cma/SENSOR/cma_alloc_pages_[attempt|fail]
>    /sys/kernel/mm/cma/BLUETOOTH/cma_alloc_pages_[attempt|fail]
> 
> Signed-off-by: Minchan Kim <minchan@kernel.org>
> ---
> 
>  From v1 - https://lore.kernel.org/linux-mm/20210203155001.4121868-1-minchan@kernel.org/
>   * fix sysfs build and refactoring - willy
>   * rename and drop some attributes - jhubbard
> 
>   Documentation/ABI/testing/sysfs-kernel-mm-cma |  25 ++++
>   mm/Kconfig                                    |   7 ++
>   mm/Makefile                                   |   1 +
>   mm/cma.c                                      |   6 +-
>   mm/cma.h                                      |  18 +++
>   mm/cma_sysfs.c                                | 114 ++++++++++++++++++
>   6 files changed, 170 insertions(+), 1 deletion(-)
>   create mode 100644 Documentation/ABI/testing/sysfs-kernel-mm-cma
>   create mode 100644 mm/cma_sysfs.c
> 
> diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-cma b/Documentation/ABI/testing/sysfs-kernel-mm-cma
> new file mode 100644
> index 000000000000..68bdcc8c7681
> --- /dev/null
> +++ b/Documentation/ABI/testing/sysfs-kernel-mm-cma
> @@ -0,0 +1,25 @@
> +What:		/sys/kernel/mm/cma/
> +Date:		Feb 2021
> +Contact:	Minchan Kim <minchan@kernel.org>
> +Description:
> +		/sys/kernel/mm/cma/ contains a number of subdirectories by
> +		cma-heap name. The subdirectory contains a number of files
> +		to represent cma allocation statistics.
> +
> +		There are number of files under
> +				/sys/kernel/mm/cma/<cma-heap-name> directory
> +
> +			- cma_alloc_pages_attempt
> +			- cma_alloc_pages_fail

How about this instead:
Description:
		/sys/kernel/mm/cma/ contains a subdirectory for each CMA heap name (also
         sometimes called CMA areas).

         Each CMA heap subdirectory (that is, each
         /sys/kernel/mm/cma/<cma-heap-name> directory) contains the following
         items:

			cma_alloc_pages_attempt
			cma_alloc_pages_fail


> +
> +What:		/sys/kernel/mm/cma/<cma-heap-name>/cma_alloc_pages_attempt

Actually, shall we change that from "attempt" to "attempts"? Otherwise, the
language is a little odd there.

> +Date:		Feb 2021
> +Contact:	Minchan Kim <minchan@kernel.org>
> +Description:
> +		the number of pages CMA API tried to allocate
> +
> +What:		/sys/kernel/mm/cma/<cma-heap-name>/cma_alloc_pages_fail
> +Date:		Feb 2021
> +Contact:	Minchan Kim <minchan@kernel.org>
> +Description:
> +		the number of pages CMA API failed to allocate
> diff --git a/mm/Kconfig b/mm/Kconfig
> index ec35bf406439..ad7e9c065657 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -513,6 +513,13 @@ config CMA_DEBUGFS
>   	help
>   	  Turns on the DebugFS interface for CMA.
>   
> +config CMA_SYSFS
> +	bool "CMA information through sysfs interface"
> +	depends on CMA && SYSFS
> +	help
> +	  This option exposes some sysfs attributes to get information
> +	  from CMA.
> +
>   config CMA_AREAS
>   	int "Maximum count of the CMA areas"
>   	depends on CMA
> diff --git a/mm/Makefile b/mm/Makefile
> index b2a564eec27f..0ae764e5b1a8 100644
> --- a/mm/Makefile
> +++ b/mm/Makefile
> @@ -109,6 +109,7 @@ obj-$(CONFIG_CMA)	+= cma.o
>   obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
>   obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
>   obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
> +obj-$(CONFIG_CMA_SYSFS)	+= cma_sysfs.o

Remove the unnecessary tab there, none of the other nearby lines have one.

>   obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
>   obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
>   obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
> diff --git a/mm/cma.c b/mm/cma.c
> index 23d4a97c834a..0611202d6e7d 100644
> --- a/mm/cma.c
> +++ b/mm/cma.c
> @@ -447,9 +447,10 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
>   	offset = cma_bitmap_aligned_offset(cma, align);
>   	bitmap_maxno = cma_bitmap_maxno(cma);
>   	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
> +	cma_sysfs_alloc_count(cma, count);
>   
>   	if (bitmap_count > bitmap_maxno)
> -		return NULL;
> +		goto out;
>   
>   	for (;;) {
>   		mutex_lock(&cma->lock);
> @@ -504,6 +505,9 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
>   			__func__, count, ret);
>   		cma_debug_show_areas(cma);
>   	}
> +out:
> +	if (!page)
> +		cma_sysfs_fail(cma, count);
>   
>   	pr_debug("%s(): returned %p\n", __func__, page);
>   	return page;
> diff --git a/mm/cma.h b/mm/cma.h
> index 42ae082cb067..49a8ceddd9e8 100644
> --- a/mm/cma.h
> +++ b/mm/cma.h
> @@ -3,6 +3,14 @@
>   #define __MM_CMA_H__
>   
>   #include <linux/debugfs.h>
> +#include <linux/kobject.h>
> +
> +struct cma_stat {
> +	spinlock_t lock;
> +	unsigned long pages_attempt;	/* the number of CMA page allocation attempts */
> +	unsigned long pages_fail;	/* the number of CMA page allocation failures */
> +	struct kobject kobj;
> +};
>   
>   struct cma {
>   	unsigned long   base_pfn;
> @@ -16,6 +24,9 @@ struct cma {
>   	struct debugfs_u32_array dfs_bitmap;
>   #endif
>   	char name[CMA_MAX_NAME];
> +#ifdef CONFIG_CMA_SYSFS
> +	struct cma_stat	*stat;

This should not be a pointer. By making it a pointer, you've added a bunch of pointless
extra code to the implementation.

Here's a diff to implement the non-pointer way, and also to fix a build error in this
patch (missing semicolon):

diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-cma 
b/Documentation/ABI/testing/sysfs-kernel-mm-cma
index 68bdcc8c7681..f3769b4e1a3c 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-cma
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-cma
@@ -2,15 +2,15 @@ What:		/sys/kernel/mm/cma/
  Date:		Feb 2021
  Contact:	Minchan Kim <minchan@kernel.org>
  Description:
-		/sys/kernel/mm/cma/ contains a number of subdirectories by
-		cma-heap name. The subdirectory contains a number of files
-		to represent cma allocation statistics.
+		/sys/kernel/mm/cma/ contains a subdirectory for each CMA heap name (also
+        sometimes called CMA areas).

-		There are number of files under
-				/sys/kernel/mm/cma/<cma-heap-name> directory
+        Each CMA heap subdirectory (that is, each
+        /sys/kernel/mm/cma/<cma-heap-name> directory) contains the following
+        items:

-			- cma_alloc_pages_attempt
-			- cma_alloc_pages_fail
+			cma_alloc_pages_attempt
+			cma_alloc_pages_fail

  What:		/sys/kernel/mm/cma/<cma-heap-name>/cma_alloc_pages_attempt
  Date:		Feb 2021
diff --git a/mm/cma.h b/mm/cma.h
index 49a8ceddd9e8..1e109830f553 100644
--- a/mm/cma.h
+++ b/mm/cma.h
@@ -25,7 +25,7 @@ struct cma {
  #endif
  	char name[CMA_MAX_NAME];
  #ifdef CONFIG_CMA_SYSFS
-	struct cma_stat	*stat;
+	struct cma_stat	stat;
  #endif
  };

diff --git a/mm/cma_sysfs.c b/mm/cma_sysfs.c
index 1f6b9f785825..52905694b6b7 100644
--- a/mm/cma_sysfs.c
+++ b/mm/cma_sysfs.c
@@ -11,20 +11,18 @@

  #include "cma.h"

-static struct cma_stat *cma_stats;
-
  void cma_sysfs_alloc_count(struct cma *cma, size_t count)
  {
-	spin_lock(&cma->stat->lock);
-	cma->stat->pages_attempt += count;
-	spin_unlock(&cma->stat->lock);
+	spin_lock(&cma->stat.lock);
+	cma->stat.pages_attempt += count;
+	spin_unlock(&cma->stat.lock);
  }

  void cma_sysfs_fail(struct cma *cma, size_t count)
  {
-	spin_lock(&cma->stat->lock);
-	cma->stat->pages_fail += count;
-	spin_unlock(&cma->stat->lock);
+	spin_lock(&cma->stat.lock);
+	cma->stat.pages_fail += count;
+	spin_unlock(&cma->stat.lock);
  }

  #define CMA_ATTR_RO(_name) \
@@ -50,13 +48,6 @@ static ssize_t cma_alloc_pages_fail_show(struct kobject *kobj,
  }
  CMA_ATTR_RO(cma_alloc_pages_fail);

-static void cma_kobj_release(struct kobject *kobj)
-{
-	struct cma_stat *stat = container_of(kobj, struct cma_stat, kobj);
-
-	kfree(stat);
-}
-
  static struct attribute *cma_attrs[] = {
  	&cma_alloc_pages_attempt_attr.attr,
  	&cma_alloc_pages_fail_attr.attr,
@@ -65,7 +56,6 @@ static struct attribute *cma_attrs[] = {
  ATTRIBUTE_GROUPS(cma);

  static struct kobj_type cma_ktype = {
-	.release = cma_kobj_release,
  	.sysfs_ops = &kobj_sysfs_ops,
  	.default_groups = cma_groups
  };
@@ -81,32 +71,23 @@ static int __init cma_sysfs_init(void)
  		return -ENOMEM;
  	}

-	cma_stats = kzalloc(array_size(sizeof(struct cma_stat),
-				cma_area_count), GFP_KERNEL);
-	if (!cma_stats) {
-		pr_err("failed to create cma_stats\n");
-		goto out;
-	}
-
  	do {
  		cma = &cma_areas[i];
-		cma->stat = &cma_stats[i];
-		spin_lock_init(&cma->stat->lock);
-		if (kobject_init_and_add(&cma->stat->kobj, &cma_ktype,
+		spin_lock_init(&cma->stat.lock);
+		if (kobject_init_and_add(&cma->stat.kobj, &cma_ktype,
  					cma_kobj, "%s", cma->name)) {
-			kobject_put(&cma->stat->kobj);
+			kobject_put(&cma->stat.kobj);
  			goto out;
  		}
-	} while (++i < cma_area_count)
+	} while (++i < cma_area_count);

  	return 0;
  out:
  	while (--i >= 0) {
  		cma = &cma_areas[i];
-		kobject_put(&cma->stat->kobj);
+		kobject_put(&cma->stat.kobj);
  	}

-	kfree(cma_stats);
  	kobject_put(cma_kobj);

  	return -ENOMEM;

> +#endif
>   };
>   
>   extern struct cma cma_areas[MAX_CMA_AREAS];
> @@ -26,4 +37,11 @@ static inline unsigned long cma_bitmap_maxno(struct cma *cma)
>   	return cma->count >> cma->order_per_bit;
>   }
>   
> +#ifdef CONFIG_CMA_SYSFS
> +void cma_sysfs_alloc_count(struct cma *cma, size_t count);
> +void cma_sysfs_fail(struct cma *cma, size_t count);
> +#else
> +static inline void cma_sysfs_alloc_count(struct cma *cma, size_t count) {};
> +static inline void cma_sysfs_fail(struct cma *cma, size_t count) {};
> +#endif
>   #endif
> diff --git a/mm/cma_sysfs.c b/mm/cma_sysfs.c
> new file mode 100644
> index 000000000000..1f6b9f785825
> --- /dev/null
> +++ b/mm/cma_sysfs.c
> @@ -0,0 +1,114 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * CMA SysFS Interface
> + *
> + * Copyright (c) 2021 Minchan Kim <minchan@kernel.org>
> + */
> +
> +#include <linux/cma.h>
> +#include <linux/kernel.h>
> +#include <linux/slab.h>
> +
> +#include "cma.h"
> +
> +static struct cma_stat *cma_stats;

I don't know what that's for but it definitely is not needed if you make cma.stat
not a pointer, and not in any other case either.

> +
> +void cma_sysfs_alloc_count(struct cma *cma, size_t count)
> +{
> +	spin_lock(&cma->stat->lock);
> +	cma->stat->pages_attempt += count;
> +	spin_unlock(&cma->stat->lock);
> +}
> +
> +void cma_sysfs_fail(struct cma *cma, size_t count)
> +{
> +	spin_lock(&cma->stat->lock);
> +	cma->stat->pages_fail += count;
> +	spin_unlock(&cma->stat->lock);
> +}
> +
> +#define CMA_ATTR_RO(_name) \
> +	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
> +
> +static struct kobject *cma_kobj;
> +
> +static ssize_t cma_alloc_pages_attempt_show(struct kobject *kobj,
> +			struct kobj_attribute *attr, char *buf)
> +{
> +	struct cma_stat *stat = container_of(kobj, struct cma_stat, kobj);
> +
> +	return sysfs_emit(buf, "%lu\n", stat->pages_attempt);
> +}
> +CMA_ATTR_RO(cma_alloc_pages_attempt);
> +
> +static ssize_t cma_alloc_pages_fail_show(struct kobject *kobj,
> +			struct kobj_attribute *attr, char *buf)
> +{
> +	struct cma_stat *stat = container_of(kobj, struct cma_stat, kobj);
> +
> +	return sysfs_emit(buf, "%lu\n", stat->pages_fail);
> +}
> +CMA_ATTR_RO(cma_alloc_pages_fail);
> +
> +static void cma_kobj_release(struct kobject *kobj)
> +{
> +	struct cma_stat *stat = container_of(kobj, struct cma_stat, kobj);
> +
> +	kfree(stat);
> +}
> +
> +static struct attribute *cma_attrs[] = {
> +	&cma_alloc_pages_attempt_attr.attr,
> +	&cma_alloc_pages_fail_attr.attr,
> +	NULL,
> +};
> +ATTRIBUTE_GROUPS(cma);
> +
> +static struct kobj_type cma_ktype = {
> +	.release = cma_kobj_release,
> +	.sysfs_ops = &kobj_sysfs_ops,
> +	.default_groups = cma_groups
> +};
> +
> +static int __init cma_sysfs_init(void)
> +{
> +	int i = 0;
> +	struct cma *cma;
> +
> +	cma_kobj = kobject_create_and_add("cma", mm_kobj);
> +	if (!cma_kobj) {
> +		pr_err("failed to create cma kobject\n");
> +		return -ENOMEM;
> +	}
> +
> +	cma_stats = kzalloc(array_size(sizeof(struct cma_stat),
> +				cma_area_count), GFP_KERNEL);
> +	if (!cma_stats) {
> +		pr_err("failed to create cma_stats\n");
> +		goto out;
> +	}
> +
> +	do {
> +		cma = &cma_areas[i];
> +		cma->stat = &cma_stats[i];
> +		spin_lock_init(&cma->stat->lock);
> +		if (kobject_init_and_add(&cma->stat->kobj, &cma_ktype,
> +					cma_kobj, "%s", cma->name)) {
> +			kobject_put(&cma->stat->kobj);
> +			goto out;
> +		}
> +	} while (++i < cma_area_count)

This clearly is not going to compile! Don't forget to build and test the
patches.

> +
> +	return 0;
> +out:
> +	while (--i >= 0) {
> +		cma = &cma_areas[i];
> +		kobject_put(&cma->stat->kobj);
> +	}
> +
> +	kfree(cma_stats);
> +	kobject_put(cma_kobj);
> +
> +	return -ENOMEM;
> +}
> +subsys_initcall(cma_sysfs_init);
> 

thanks,
Minchan Kim Feb. 8, 2021, 11:36 p.m. UTC | #2
On Mon, Feb 08, 2021 at 01:34:06PM -0800, John Hubbard wrote:
> On 2/8/21 10:01 AM, Minchan Kim wrote:
> > Since CMA is getting used more widely, it's more important to
> > keep monitoring CMA statistics for system health since it's
> > directly related to user experience.
> > 
> > This patch introduces sysfs for the CMA and exposes stats below
> > to keep monitor for telemetric in the system.
> 
> Or:
> 
> This patch introduces sysfs statistics for CMA, in order to provide
> some basic monitoring of the CMA allocator.

Yub, take it.

> 
> > 
> >   * the number of CMA page allocation attempts
> >   * the number of CMA page allocation failures
> > 
> > With those per-CMA statistics, we could know how CMA allocadtion
> > failure rate for each usecases.
> 
> Maybe:
> 
> These two values allow the user to calcuate the allocation
> failure rate for each CMA area.

Good to me.

> 
> > 
> > e.g.)
> >    /sys/kernel/mm/cma/WIFI/cma_alloc_pages_[attempt|fail]
> >    /sys/kernel/mm/cma/SENSOR/cma_alloc_pages_[attempt|fail]
> >    /sys/kernel/mm/cma/BLUETOOTH/cma_alloc_pages_[attempt|fail]
> > 
> > Signed-off-by: Minchan Kim <minchan@kernel.org>
> > ---
> > 
> >  From v1 - https://lore.kernel.org/linux-mm/20210203155001.4121868-1-minchan@kernel.org/
> >   * fix sysfs build and refactoring - willy
> >   * rename and drop some attributes - jhubbard
> > 
> >   Documentation/ABI/testing/sysfs-kernel-mm-cma |  25 ++++
> >   mm/Kconfig                                    |   7 ++
> >   mm/Makefile                                   |   1 +
> >   mm/cma.c                                      |   6 +-
> >   mm/cma.h                                      |  18 +++
> >   mm/cma_sysfs.c                                | 114 ++++++++++++++++++
> >   6 files changed, 170 insertions(+), 1 deletion(-)
> >   create mode 100644 Documentation/ABI/testing/sysfs-kernel-mm-cma
> >   create mode 100644 mm/cma_sysfs.c
> > 
> > diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-cma b/Documentation/ABI/testing/sysfs-kernel-mm-cma
> > new file mode 100644
> > index 000000000000..68bdcc8c7681
> > --- /dev/null
> > +++ b/Documentation/ABI/testing/sysfs-kernel-mm-cma
> > @@ -0,0 +1,25 @@
> > +What:		/sys/kernel/mm/cma/
> > +Date:		Feb 2021
> > +Contact:	Minchan Kim <minchan@kernel.org>
> > +Description:
> > +		/sys/kernel/mm/cma/ contains a number of subdirectories by
> > +		cma-heap name. The subdirectory contains a number of files
> > +		to represent cma allocation statistics.
> > +
> > +		There are number of files under
> > +				/sys/kernel/mm/cma/<cma-heap-name> directory
> > +
> > +			- cma_alloc_pages_attempt
> > +			- cma_alloc_pages_fail
> 
> How about this instead:
> Description:
> 		/sys/kernel/mm/cma/ contains a subdirectory for each CMA heap name (also
>         sometimes called CMA areas).
> 
>         Each CMA heap subdirectory (that is, each
>         /sys/kernel/mm/cma/<cma-heap-name> directory) contains the following
>         items:
> 
> 			cma_alloc_pages_attempt
> 			cma_alloc_pages_fail
> 

Yub.

> 
> > +
> > +What:		/sys/kernel/mm/cma/<cma-heap-name>/cma_alloc_pages_attempt
> 
> Actually, shall we change that from "attempt" to "attempts"? Otherwise, the
> language is a little odd there.

Sure.

> 
> > +Date:		Feb 2021
> > +Contact:	Minchan Kim <minchan@kernel.org>
> > +Description:
> > +		the number of pages CMA API tried to allocate
> > +
> > +What:		/sys/kernel/mm/cma/<cma-heap-name>/cma_alloc_pages_fail
> > +Date:		Feb 2021
> > +Contact:	Minchan Kim <minchan@kernel.org>
> > +Description:
> > +		the number of pages CMA API failed to allocate
> > diff --git a/mm/Kconfig b/mm/Kconfig
> > index ec35bf406439..ad7e9c065657 100644
> > --- a/mm/Kconfig
> > +++ b/mm/Kconfig
> > @@ -513,6 +513,13 @@ config CMA_DEBUGFS
> >   	help
> >   	  Turns on the DebugFS interface for CMA.
> > +config CMA_SYSFS
> > +	bool "CMA information through sysfs interface"
> > +	depends on CMA && SYSFS
> > +	help
> > +	  This option exposes some sysfs attributes to get information
> > +	  from CMA.
> > +
> >   config CMA_AREAS
> >   	int "Maximum count of the CMA areas"
> >   	depends on CMA
> > diff --git a/mm/Makefile b/mm/Makefile
> > index b2a564eec27f..0ae764e5b1a8 100644
> > --- a/mm/Makefile
> > +++ b/mm/Makefile
> > @@ -109,6 +109,7 @@ obj-$(CONFIG_CMA)	+= cma.o
> >   obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
> >   obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
> >   obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
> > +obj-$(CONFIG_CMA_SYSFS)	+= cma_sysfs.o
> 
> Remove the unnecessary tab there, none of the other nearby lines have one.

Oops.

> 
> >   obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
> >   obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
> >   obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
> > diff --git a/mm/cma.c b/mm/cma.c
> > index 23d4a97c834a..0611202d6e7d 100644
> > --- a/mm/cma.c
> > +++ b/mm/cma.c
> > @@ -447,9 +447,10 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
> >   	offset = cma_bitmap_aligned_offset(cma, align);
> >   	bitmap_maxno = cma_bitmap_maxno(cma);
> >   	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
> > +	cma_sysfs_alloc_count(cma, count);
> >   	if (bitmap_count > bitmap_maxno)
> > -		return NULL;
> > +		goto out;
> >   	for (;;) {
> >   		mutex_lock(&cma->lock);
> > @@ -504,6 +505,9 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
> >   			__func__, count, ret);
> >   		cma_debug_show_areas(cma);
> >   	}
> > +out:
> > +	if (!page)
> > +		cma_sysfs_fail(cma, count);
> >   	pr_debug("%s(): returned %p\n", __func__, page);
> >   	return page;
> > diff --git a/mm/cma.h b/mm/cma.h
> > index 42ae082cb067..49a8ceddd9e8 100644
> > --- a/mm/cma.h
> > +++ b/mm/cma.h
> > @@ -3,6 +3,14 @@
> >   #define __MM_CMA_H__
> >   #include <linux/debugfs.h>
> > +#include <linux/kobject.h>
> > +
> > +struct cma_stat {
> > +	spinlock_t lock;
> > +	unsigned long pages_attempt;	/* the number of CMA page allocation attempts */
> > +	unsigned long pages_fail;	/* the number of CMA page allocation failures */
> > +	struct kobject kobj;
> > +};
> >   struct cma {
> >   	unsigned long   base_pfn;
> > @@ -16,6 +24,9 @@ struct cma {
> >   	struct debugfs_u32_array dfs_bitmap;
> >   #endif
> >   	char name[CMA_MAX_NAME];
> > +#ifdef CONFIG_CMA_SYSFS
> > +	struct cma_stat	*stat;
> 
> This should not be a pointer. By making it a pointer, you've added a bunch of pointless
> extra code to the implementation.

Originally, I went with the object lifetime with struct cma as you
suggested to make code simple. However, Greg KH wanted to have
release for kobj_type since it is consistent with other kboject
handling.


> 
> Here's a diff to implement the non-pointer way, and also to fix a build error in this
> patch (missing semicolon):
> 
> diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-cma
> b/Documentation/ABI/testing/sysfs-kernel-mm-cma
> index 68bdcc8c7681..f3769b4e1a3c 100644
> --- a/Documentation/ABI/testing/sysfs-kernel-mm-cma
> +++ b/Documentation/ABI/testing/sysfs-kernel-mm-cma
> @@ -2,15 +2,15 @@ What:		/sys/kernel/mm/cma/
>  Date:		Feb 2021
>  Contact:	Minchan Kim <minchan@kernel.org>
>  Description:
> -		/sys/kernel/mm/cma/ contains a number of subdirectories by
> -		cma-heap name. The subdirectory contains a number of files
> -		to represent cma allocation statistics.
> +		/sys/kernel/mm/cma/ contains a subdirectory for each CMA heap name (also
> +        sometimes called CMA areas).
> 
> -		There are number of files under
> -				/sys/kernel/mm/cma/<cma-heap-name> directory
> +        Each CMA heap subdirectory (that is, each
> +        /sys/kernel/mm/cma/<cma-heap-name> directory) contains the following
> +        items:
> 
> -			- cma_alloc_pages_attempt
> -			- cma_alloc_pages_fail
> +			cma_alloc_pages_attempt
> +			cma_alloc_pages_fail
> 
>  What:		/sys/kernel/mm/cma/<cma-heap-name>/cma_alloc_pages_attempt
>  Date:		Feb 2021
> diff --git a/mm/cma.h b/mm/cma.h
> index 49a8ceddd9e8..1e109830f553 100644
> --- a/mm/cma.h
> +++ b/mm/cma.h
> @@ -25,7 +25,7 @@ struct cma {
>  #endif
>  	char name[CMA_MAX_NAME];
>  #ifdef CONFIG_CMA_SYSFS
> -	struct cma_stat	*stat;
> +	struct cma_stat	stat;
>  #endif
>  };
> 
> diff --git a/mm/cma_sysfs.c b/mm/cma_sysfs.c
> index 1f6b9f785825..52905694b6b7 100644
> --- a/mm/cma_sysfs.c
> +++ b/mm/cma_sysfs.c
> @@ -11,20 +11,18 @@
> 
>  #include "cma.h"
> 
> -static struct cma_stat *cma_stats;
> -
>  void cma_sysfs_alloc_count(struct cma *cma, size_t count)
>  {
> -	spin_lock(&cma->stat->lock);
> -	cma->stat->pages_attempt += count;
> -	spin_unlock(&cma->stat->lock);
> +	spin_lock(&cma->stat.lock);
> +	cma->stat.pages_attempt += count;
> +	spin_unlock(&cma->stat.lock);
>  }
> 
>  void cma_sysfs_fail(struct cma *cma, size_t count)
>  {
> -	spin_lock(&cma->stat->lock);
> -	cma->stat->pages_fail += count;
> -	spin_unlock(&cma->stat->lock);
> +	spin_lock(&cma->stat.lock);
> +	cma->stat.pages_fail += count;
> +	spin_unlock(&cma->stat.lock);
>  }
> 
>  #define CMA_ATTR_RO(_name) \
> @@ -50,13 +48,6 @@ static ssize_t cma_alloc_pages_fail_show(struct kobject *kobj,
>  }
>  CMA_ATTR_RO(cma_alloc_pages_fail);
> 
> -static void cma_kobj_release(struct kobject *kobj)
> -{
> -	struct cma_stat *stat = container_of(kobj, struct cma_stat, kobj);
> -
> -	kfree(stat);
> -}
> -
>  static struct attribute *cma_attrs[] = {
>  	&cma_alloc_pages_attempt_attr.attr,
>  	&cma_alloc_pages_fail_attr.attr,
> @@ -65,7 +56,6 @@ static struct attribute *cma_attrs[] = {
>  ATTRIBUTE_GROUPS(cma);
> 
>  static struct kobj_type cma_ktype = {
> -	.release = cma_kobj_release,
>  	.sysfs_ops = &kobj_sysfs_ops,
>  	.default_groups = cma_groups
>  };
> @@ -81,32 +71,23 @@ static int __init cma_sysfs_init(void)
>  		return -ENOMEM;
>  	}
> 
> -	cma_stats = kzalloc(array_size(sizeof(struct cma_stat),
> -				cma_area_count), GFP_KERNEL);
> -	if (!cma_stats) {
> -		pr_err("failed to create cma_stats\n");
> -		goto out;
> -	}
> -
>  	do {
>  		cma = &cma_areas[i];
> -		cma->stat = &cma_stats[i];
> -		spin_lock_init(&cma->stat->lock);
> -		if (kobject_init_and_add(&cma->stat->kobj, &cma_ktype,
> +		spin_lock_init(&cma->stat.lock);
> +		if (kobject_init_and_add(&cma->stat.kobj, &cma_ktype,
>  					cma_kobj, "%s", cma->name)) {
> -			kobject_put(&cma->stat->kobj);
> +			kobject_put(&cma->stat.kobj);
>  			goto out;
>  		}
> -	} while (++i < cma_area_count)
> +	} while (++i < cma_area_count);
> 
>  	return 0;
>  out:
>  	while (--i >= 0) {
>  		cma = &cma_areas[i];
> -		kobject_put(&cma->stat->kobj);
> +		kobject_put(&cma->stat.kobj);
>  	}
> 
> -	kfree(cma_stats);
>  	kobject_put(cma_kobj);
> 
>  	return -ENOMEM;
> 
> > +#endif
> >   };
> >   extern struct cma cma_areas[MAX_CMA_AREAS];
> > @@ -26,4 +37,11 @@ static inline unsigned long cma_bitmap_maxno(struct cma *cma)
> >   	return cma->count >> cma->order_per_bit;
> >   }
> > +#ifdef CONFIG_CMA_SYSFS
> > +void cma_sysfs_alloc_count(struct cma *cma, size_t count);
> > +void cma_sysfs_fail(struct cma *cma, size_t count);
> > +#else
> > +static inline void cma_sysfs_alloc_count(struct cma *cma, size_t count) {};
> > +static inline void cma_sysfs_fail(struct cma *cma, size_t count) {};
> > +#endif
> >   #endif
> > diff --git a/mm/cma_sysfs.c b/mm/cma_sysfs.c
> > new file mode 100644
> > index 000000000000..1f6b9f785825
> > --- /dev/null
> > +++ b/mm/cma_sysfs.c
> > @@ -0,0 +1,114 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * CMA SysFS Interface
> > + *
> > + * Copyright (c) 2021 Minchan Kim <minchan@kernel.org>
> > + */
> > +
> > +#include <linux/cma.h>
> > +#include <linux/kernel.h>
> > +#include <linux/slab.h>
> > +
> > +#include "cma.h"
> > +
> > +static struct cma_stat *cma_stats;
> 
> I don't know what that's for but it definitely is not needed if you make cma.stat
> not a pointer, and not in any other case either.
> 
> > +
> > +void cma_sysfs_alloc_count(struct cma *cma, size_t count)
> > +{
> > +	spin_lock(&cma->stat->lock);
> > +	cma->stat->pages_attempt += count;
> > +	spin_unlock(&cma->stat->lock);
> > +}
> > +
> > +void cma_sysfs_fail(struct cma *cma, size_t count)
> > +{
> > +	spin_lock(&cma->stat->lock);
> > +	cma->stat->pages_fail += count;
> > +	spin_unlock(&cma->stat->lock);
> > +}
> > +
> > +#define CMA_ATTR_RO(_name) \
> > +	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
> > +
> > +static struct kobject *cma_kobj;
> > +
> > +static ssize_t cma_alloc_pages_attempt_show(struct kobject *kobj,
> > +			struct kobj_attribute *attr, char *buf)
> > +{
> > +	struct cma_stat *stat = container_of(kobj, struct cma_stat, kobj);
> > +
> > +	return sysfs_emit(buf, "%lu\n", stat->pages_attempt);
> > +}
> > +CMA_ATTR_RO(cma_alloc_pages_attempt);
> > +
> > +static ssize_t cma_alloc_pages_fail_show(struct kobject *kobj,
> > +			struct kobj_attribute *attr, char *buf)
> > +{
> > +	struct cma_stat *stat = container_of(kobj, struct cma_stat, kobj);
> > +
> > +	return sysfs_emit(buf, "%lu\n", stat->pages_fail);
> > +}
> > +CMA_ATTR_RO(cma_alloc_pages_fail);
> > +
> > +static void cma_kobj_release(struct kobject *kobj)
> > +{
> > +	struct cma_stat *stat = container_of(kobj, struct cma_stat, kobj);
> > +
> > +	kfree(stat);
> > +}
> > +
> > +static struct attribute *cma_attrs[] = {
> > +	&cma_alloc_pages_attempt_attr.attr,
> > +	&cma_alloc_pages_fail_attr.attr,
> > +	NULL,
> > +};
> > +ATTRIBUTE_GROUPS(cma);
> > +
> > +static struct kobj_type cma_ktype = {
> > +	.release = cma_kobj_release,
> > +	.sysfs_ops = &kobj_sysfs_ops,
> > +	.default_groups = cma_groups
> > +};
> > +
> > +static int __init cma_sysfs_init(void)
> > +{
> > +	int i = 0;
> > +	struct cma *cma;
> > +
> > +	cma_kobj = kobject_create_and_add("cma", mm_kobj);
> > +	if (!cma_kobj) {
> > +		pr_err("failed to create cma kobject\n");
> > +		return -ENOMEM;
> > +	}
> > +
> > +	cma_stats = kzalloc(array_size(sizeof(struct cma_stat),
> > +				cma_area_count), GFP_KERNEL);
> > +	if (!cma_stats) {
> > +		pr_err("failed to create cma_stats\n");
> > +		goto out;
> > +	}
> > +
> > +	do {
> > +		cma = &cma_areas[i];
> > +		cma->stat = &cma_stats[i];
> > +		spin_lock_init(&cma->stat->lock);
> > +		if (kobject_init_and_add(&cma->stat->kobj, &cma_ktype,
> > +					cma_kobj, "%s", cma->name)) {
> > +			kobject_put(&cma->stat->kobj);
> > +			goto out;
> > +		}
> > +	} while (++i < cma_area_count)
> 
> This clearly is not going to compile! Don't forget to build and test the
> patches.
> 
> > +
> > +	return 0;
> > +out:
> > +	while (--i >= 0) {
> > +		cma = &cma_areas[i];
> > +		kobject_put(&cma->stat->kobj);
> > +	}
> > +
> > +	kfree(cma_stats);
> > +	kobject_put(cma_kobj);
> > +
> > +	return -ENOMEM;
> > +}
> > +subsys_initcall(cma_sysfs_init);
> > 
> 
> thanks,
> -- 
> John Hubbard
> NVIDIA
John Hubbard Feb. 9, 2021, 1:57 a.m. UTC | #3
On 2/8/21 3:36 PM, Minchan Kim wrote:
...
>>>    	char name[CMA_MAX_NAME];
>>> +#ifdef CONFIG_CMA_SYSFS
>>> +	struct cma_stat	*stat;
>>
>> This should not be a pointer. By making it a pointer, you've added a bunch of pointless
>> extra code to the implementation.
> 
> Originally, I went with the object lifetime with struct cma as you
> suggested to make code simple. However, Greg KH wanted to have
> release for kobj_type since it is consistent with other kboject
> handling.

Are you talking about the kobj in your new struct cma_stat? That seems
like circular logic if so. I'm guessing Greg just wanted kobj methods
to be used *if* you are dealing with kobjects. That's a narrower point.

I can't imagine that he would have insisted on having additional
allocations just so that kobj freeing methods could be used. :)


thanks,
Minchan Kim Feb. 9, 2021, 4:19 a.m. UTC | #4
On Mon, Feb 08, 2021 at 05:57:17PM -0800, John Hubbard wrote:
> On 2/8/21 3:36 PM, Minchan Kim wrote:
> ...
> > > >    	char name[CMA_MAX_NAME];
> > > > +#ifdef CONFIG_CMA_SYSFS
> > > > +	struct cma_stat	*stat;
> > > 
> > > This should not be a pointer. By making it a pointer, you've added a bunch of pointless
> > > extra code to the implementation.
> > 
> > Originally, I went with the object lifetime with struct cma as you
> > suggested to make code simple. However, Greg KH wanted to have
> > release for kobj_type since it is consistent with other kboject
> > handling.
> 
> Are you talking about the kobj in your new struct cma_stat? That seems
> like circular logic if so. I'm guessing Greg just wanted kobj methods
> to be used *if* you are dealing with kobjects. That's a narrower point.
> 
> I can't imagine that he would have insisted on having additional
> allocations just so that kobj freeing methods could be used. :)

I have no objection if Greg agree static kobject is okay in this
case. Greg?
John Hubbard Feb. 9, 2021, 5:18 a.m. UTC | #5
On 2/8/21 8:19 PM, Minchan Kim wrote:
> On Mon, Feb 08, 2021 at 05:57:17PM -0800, John Hubbard wrote:
>> On 2/8/21 3:36 PM, Minchan Kim wrote:
>> ...
>>>>>     	char name[CMA_MAX_NAME];
>>>>> +#ifdef CONFIG_CMA_SYSFS
>>>>> +	struct cma_stat	*stat;
>>>>
>>>> This should not be a pointer. By making it a pointer, you've added a bunch of pointless
>>>> extra code to the implementation.
>>>
>>> Originally, I went with the object lifetime with struct cma as you
>>> suggested to make code simple. However, Greg KH wanted to have
>>> release for kobj_type since it is consistent with other kboject
>>> handling.
>>
>> Are you talking about the kobj in your new struct cma_stat? That seems
>> like circular logic if so. I'm guessing Greg just wanted kobj methods
>> to be used *if* you are dealing with kobjects. That's a narrower point.
>>
>> I can't imagine that he would have insisted on having additional
>> allocations just so that kobj freeing methods could be used. :)
> 
> I have no objection if Greg agree static kobject is okay in this
> case. Greg?
> 

What I meant is, no kobject at all in the struct cma_stat member
variable. The lifetime of the cma_stat member is the same as the
containing struct, so no point in putting a kobject into it.

thanks,
John Hubbard Feb. 9, 2021, 5:27 a.m. UTC | #6
On 2/8/21 9:18 PM, John Hubbard wrote:
> On 2/8/21 8:19 PM, Minchan Kim wrote:
>> On Mon, Feb 08, 2021 at 05:57:17PM -0800, John Hubbard wrote:
>>> On 2/8/21 3:36 PM, Minchan Kim wrote:
>>> ...
>>>>>>         char name[CMA_MAX_NAME];
>>>>>> +#ifdef CONFIG_CMA_SYSFS
>>>>>> +    struct cma_stat    *stat;
>>>>>
>>>>> This should not be a pointer. By making it a pointer, you've added a bunch of 
>>>>> pointless
>>>>> extra code to the implementation.
>>>>
>>>> Originally, I went with the object lifetime with struct cma as you
>>>> suggested to make code simple. However, Greg KH wanted to have
>>>> release for kobj_type since it is consistent with other kboject
>>>> handling.
>>>
>>> Are you talking about the kobj in your new struct cma_stat? That seems
>>> like circular logic if so. I'm guessing Greg just wanted kobj methods
>>> to be used *if* you are dealing with kobjects. That's a narrower point.
>>>
>>> I can't imagine that he would have insisted on having additional
>>> allocations just so that kobj freeing methods could be used. :)
>>
>> I have no objection if Greg agree static kobject is okay in this
>> case. Greg?
>>
> 
> What I meant is, no kobject at all in the struct cma_stat member
> variable. The lifetime of the cma_stat member is the same as the
> containing struct, so no point in putting a kobject into it.
> 

...unless...are you actually *wanting* to keep the lifetimes separate?
Hmmm, given the short nature of sysfs reads, though, I'd be inclined
to just let the parent object own the lifetime. But maybe I'm missing
some design point here?

thanks,
Greg Kroah-Hartman Feb. 9, 2021, 6:13 a.m. UTC | #7
On Mon, Feb 08, 2021 at 05:57:17PM -0800, John Hubbard wrote:
> On 2/8/21 3:36 PM, Minchan Kim wrote:
> ...
> > > >    	char name[CMA_MAX_NAME];
> > > > +#ifdef CONFIG_CMA_SYSFS
> > > > +	struct cma_stat	*stat;
> > > 
> > > This should not be a pointer. By making it a pointer, you've added a bunch of pointless
> > > extra code to the implementation.
> > 
> > Originally, I went with the object lifetime with struct cma as you
> > suggested to make code simple. However, Greg KH wanted to have
> > release for kobj_type since it is consistent with other kboject
> > handling.
> 
> Are you talking about the kobj in your new struct cma_stat? That seems
> like circular logic if so. I'm guessing Greg just wanted kobj methods
> to be used *if* you are dealing with kobjects. That's a narrower point.
> 
> I can't imagine that he would have insisted on having additional
> allocations just so that kobj freeing methods could be used. :)

Um, yes, I was :)

You can not add a kobject to a structure and then somehow think you can
just ignore the reference counting issues involved.  If a kobject is
part of a structure then the kobject is responsible for controling the
lifespan of the memory, nothing else can be.

So by making the kobject dynamic, you properly handle that memory
lifespan of the object, instead of having to worry about the lifespan of
the larger object (which the original patch was not doing.)

Does that make sense?

thanks,

greg k-h
John Hubbard Feb. 9, 2021, 6:27 a.m. UTC | #8
On 2/8/21 10:13 PM, Greg KH wrote:
> On Mon, Feb 08, 2021 at 05:57:17PM -0800, John Hubbard wrote:
>> On 2/8/21 3:36 PM, Minchan Kim wrote:
>> ...
>>>>>     	char name[CMA_MAX_NAME];
>>>>> +#ifdef CONFIG_CMA_SYSFS
>>>>> +	struct cma_stat	*stat;
>>>>
>>>> This should not be a pointer. By making it a pointer, you've added a bunch of pointless
>>>> extra code to the implementation.
>>>
>>> Originally, I went with the object lifetime with struct cma as you
>>> suggested to make code simple. However, Greg KH wanted to have
>>> release for kobj_type since it is consistent with other kboject
>>> handling.
>>
>> Are you talking about the kobj in your new struct cma_stat? That seems
>> like circular logic if so. I'm guessing Greg just wanted kobj methods
>> to be used *if* you are dealing with kobjects. That's a narrower point.
>>
>> I can't imagine that he would have insisted on having additional
>> allocations just so that kobj freeing methods could be used. :)
> 
> Um, yes, I was :)
> 
> You can not add a kobject to a structure and then somehow think you can
> just ignore the reference counting issues involved.  If a kobject is
> part of a structure then the kobject is responsible for controling the
> lifespan of the memory, nothing else can be.
> 
> So by making the kobject dynamic, you properly handle that memory
> lifespan of the object, instead of having to worry about the lifespan of
> the larger object (which the original patch was not doing.)
> 
> Does that make sense?
> 
That part makes sense, yes, thanks. The part that I'm trying to straighten
out is, why was kobject even added to the struct cma_stat in the first
place? Why not just leave .stat as a static member variable, without
a kobject in it, and done?

thanks,
John Hubbard Feb. 9, 2021, 6:34 a.m. UTC | #9
On 2/8/21 10:27 PM, John Hubbard wrote:
> On 2/8/21 10:13 PM, Greg KH wrote:
>> On Mon, Feb 08, 2021 at 05:57:17PM -0800, John Hubbard wrote:
>>> On 2/8/21 3:36 PM, Minchan Kim wrote:
>>> ...
>>>>>>         char name[CMA_MAX_NAME];
>>>>>> +#ifdef CONFIG_CMA_SYSFS
>>>>>> +    struct cma_stat    *stat;
>>>>>
>>>>> This should not be a pointer. By making it a pointer, you've added a bunch of pointless
>>>>> extra code to the implementation.
>>>>
>>>> Originally, I went with the object lifetime with struct cma as you
>>>> suggested to make code simple. However, Greg KH wanted to have
>>>> release for kobj_type since it is consistent with other kboject
>>>> handling.
>>>
>>> Are you talking about the kobj in your new struct cma_stat? That seems
>>> like circular logic if so. I'm guessing Greg just wanted kobj methods
>>> to be used *if* you are dealing with kobjects. That's a narrower point.
>>>
>>> I can't imagine that he would have insisted on having additional
>>> allocations just so that kobj freeing methods could be used. :)
>>
>> Um, yes, I was :)
>>
>> You can not add a kobject to a structure and then somehow think you can
>> just ignore the reference counting issues involved.  If a kobject is
>> part of a structure then the kobject is responsible for controling the
>> lifespan of the memory, nothing else can be.
>>
>> So by making the kobject dynamic, you properly handle that memory
>> lifespan of the object, instead of having to worry about the lifespan of
>> the larger object (which the original patch was not doing.)
>>
>> Does that make sense?
>>
> That part makes sense, yes, thanks. The part that I'm trying to straighten
> out is, why was kobject even added to the struct cma_stat in the first
> place? Why not just leave .stat as a static member variable, without
> a kobject in it, and done?
> 

Sorry, I think I get it now: this is in order to allow a separate lifetime
for the .stat member. I was sort of implicitly assuming that the "right"
way to do it is just have the whole object use one lifetime management,
but as you say, there is no kobject being added to the parent.

I still feel odd about the allocation and freeing of something that seems
to be logically the same lifetime (other than perhaps a few, briefly pending
sysfs reads, at the end of life). So I'd still think that the kobject should
be added to the parent...

thanks,
Greg Kroah-Hartman Feb. 9, 2021, 6:56 a.m. UTC | #10
On Mon, Feb 08, 2021 at 10:34:51PM -0800, John Hubbard wrote:
> On 2/8/21 10:27 PM, John Hubbard wrote:
> > On 2/8/21 10:13 PM, Greg KH wrote:
> > > On Mon, Feb 08, 2021 at 05:57:17PM -0800, John Hubbard wrote:
> > > > On 2/8/21 3:36 PM, Minchan Kim wrote:
> > > > ...
> > > > > > >         char name[CMA_MAX_NAME];
> > > > > > > +#ifdef CONFIG_CMA_SYSFS
> > > > > > > +    struct cma_stat    *stat;
> > > > > > 
> > > > > > This should not be a pointer. By making it a pointer, you've added a bunch of pointless
> > > > > > extra code to the implementation.
> > > > > 
> > > > > Originally, I went with the object lifetime with struct cma as you
> > > > > suggested to make code simple. However, Greg KH wanted to have
> > > > > release for kobj_type since it is consistent with other kboject
> > > > > handling.
> > > > 
> > > > Are you talking about the kobj in your new struct cma_stat? That seems
> > > > like circular logic if so. I'm guessing Greg just wanted kobj methods
> > > > to be used *if* you are dealing with kobjects. That's a narrower point.
> > > > 
> > > > I can't imagine that he would have insisted on having additional
> > > > allocations just so that kobj freeing methods could be used. :)
> > > 
> > > Um, yes, I was :)
> > > 
> > > You can not add a kobject to a structure and then somehow think you can
> > > just ignore the reference counting issues involved.  If a kobject is
> > > part of a structure then the kobject is responsible for controling the
> > > lifespan of the memory, nothing else can be.
> > > 
> > > So by making the kobject dynamic, you properly handle that memory
> > > lifespan of the object, instead of having to worry about the lifespan of
> > > the larger object (which the original patch was not doing.)
> > > 
> > > Does that make sense?
> > > 
> > That part makes sense, yes, thanks. The part that I'm trying to straighten
> > out is, why was kobject even added to the struct cma_stat in the first
> > place? Why not just leave .stat as a static member variable, without
> > a kobject in it, and done?
> > 
> 
> Sorry, I think I get it now: this is in order to allow a separate lifetime
> for the .stat member. I was sort of implicitly assuming that the "right"
> way to do it is just have the whole object use one lifetime management,
> but as you say, there is no kobject being added to the parent.
> 
> I still feel odd about the allocation and freeing of something that seems
> to be logically the same lifetime (other than perhaps a few, briefly pending
> sysfs reads, at the end of life). So I'd still think that the kobject should
> be added to the parent...

That's fine if you want to add it to the parent.  If so, then the
kobject controls the lifetime of the structure, nothing else can.

Either is fine with me, what is "forbidden" is having a kobject and
somehow thinking that it does not control the lifetime of the structure.

thanks,

greg k-h
Minchan Kim Feb. 9, 2021, 3:55 p.m. UTC | #11
On Tue, Feb 09, 2021 at 07:56:30AM +0100, Greg KH wrote:
> On Mon, Feb 08, 2021 at 10:34:51PM -0800, John Hubbard wrote:
> > On 2/8/21 10:27 PM, John Hubbard wrote:
> > > On 2/8/21 10:13 PM, Greg KH wrote:
> > > > On Mon, Feb 08, 2021 at 05:57:17PM -0800, John Hubbard wrote:
> > > > > On 2/8/21 3:36 PM, Minchan Kim wrote:
> > > > > ...
> > > > > > > >         char name[CMA_MAX_NAME];
> > > > > > > > +#ifdef CONFIG_CMA_SYSFS
> > > > > > > > +    struct cma_stat    *stat;
> > > > > > > 
> > > > > > > This should not be a pointer. By making it a pointer, you've added a bunch of pointless
> > > > > > > extra code to the implementation.
> > > > > > 
> > > > > > Originally, I went with the object lifetime with struct cma as you
> > > > > > suggested to make code simple. However, Greg KH wanted to have
> > > > > > release for kobj_type since it is consistent with other kboject
> > > > > > handling.
> > > > > 
> > > > > Are you talking about the kobj in your new struct cma_stat? That seems
> > > > > like circular logic if so. I'm guessing Greg just wanted kobj methods
> > > > > to be used *if* you are dealing with kobjects. That's a narrower point.
> > > > > 
> > > > > I can't imagine that he would have insisted on having additional
> > > > > allocations just so that kobj freeing methods could be used. :)
> > > > 
> > > > Um, yes, I was :)
> > > > 
> > > > You can not add a kobject to a structure and then somehow think you can
> > > > just ignore the reference counting issues involved.  If a kobject is
> > > > part of a structure then the kobject is responsible for controling the
> > > > lifespan of the memory, nothing else can be.
> > > > 
> > > > So by making the kobject dynamic, you properly handle that memory
> > > > lifespan of the object, instead of having to worry about the lifespan of
> > > > the larger object (which the original patch was not doing.)
> > > > 
> > > > Does that make sense?
> > > > 
> > > That part makes sense, yes, thanks. The part that I'm trying to straighten
> > > out is, why was kobject even added to the struct cma_stat in the first
> > > place? Why not just leave .stat as a static member variable, without
> > > a kobject in it, and done?
> > > 
> > 
> > Sorry, I think I get it now: this is in order to allow a separate lifetime
> > for the .stat member. I was sort of implicitly assuming that the "right"
> > way to do it is just have the whole object use one lifetime management,
> > but as you say, there is no kobject being added to the parent.
> > 
> > I still feel odd about the allocation and freeing of something that seems
> > to be logically the same lifetime (other than perhaps a few, briefly pending
> > sysfs reads, at the end of life). So I'd still think that the kobject should
> > be added to the parent...

sruct cma_stat {
	spinlock_t lock;
	unsigned long pages_attemtp;
	unsigned long pages_fail;
};

struct cma {
	..
	..
	struct kobject kobj;
	struct cma_stat stat;
};

I guess this is what Johan suggested. I agree with it.

> 
> That's fine if you want to add it to the parent.  If so, then the
> kobject controls the lifetime of the structure, nothing else can.

The problem was parent object(i.e., struct cma cma_areas) is
static arrary so kobj->release function will be NULL or just
dummy. Is it okay? I thought it was one of the what you wanted
to avoid it.

> 
> Either is fine with me, what is "forbidden" is having a kobject and
> somehow thinking that it does not control the lifetime of the structure.

Since parent object is static arrary, there is no need to control the
lifetime so I am curious if parent object approach is okay from kobject
handling point of view.

If it's no problem, I am happy to change it.
Greg Kroah-Hartman Feb. 9, 2021, 5:49 p.m. UTC | #12
On Tue, Feb 09, 2021 at 07:55:59AM -0800, Minchan Kim wrote:
> On Tue, Feb 09, 2021 at 07:56:30AM +0100, Greg KH wrote:
> > On Mon, Feb 08, 2021 at 10:34:51PM -0800, John Hubbard wrote:
> > > On 2/8/21 10:27 PM, John Hubbard wrote:
> > > > On 2/8/21 10:13 PM, Greg KH wrote:
> > > > > On Mon, Feb 08, 2021 at 05:57:17PM -0800, John Hubbard wrote:
> > > > > > On 2/8/21 3:36 PM, Minchan Kim wrote:
> > > > > > ...
> > > > > > > > >         char name[CMA_MAX_NAME];
> > > > > > > > > +#ifdef CONFIG_CMA_SYSFS
> > > > > > > > > +    struct cma_stat    *stat;
> > > > > > > > 
> > > > > > > > This should not be a pointer. By making it a pointer, you've added a bunch of pointless
> > > > > > > > extra code to the implementation.
> > > > > > > 
> > > > > > > Originally, I went with the object lifetime with struct cma as you
> > > > > > > suggested to make code simple. However, Greg KH wanted to have
> > > > > > > release for kobj_type since it is consistent with other kboject
> > > > > > > handling.
> > > > > > 
> > > > > > Are you talking about the kobj in your new struct cma_stat? That seems
> > > > > > like circular logic if so. I'm guessing Greg just wanted kobj methods
> > > > > > to be used *if* you are dealing with kobjects. That's a narrower point.
> > > > > > 
> > > > > > I can't imagine that he would have insisted on having additional
> > > > > > allocations just so that kobj freeing methods could be used. :)
> > > > > 
> > > > > Um, yes, I was :)
> > > > > 
> > > > > You can not add a kobject to a structure and then somehow think you can
> > > > > just ignore the reference counting issues involved.  If a kobject is
> > > > > part of a structure then the kobject is responsible for controling the
> > > > > lifespan of the memory, nothing else can be.
> > > > > 
> > > > > So by making the kobject dynamic, you properly handle that memory
> > > > > lifespan of the object, instead of having to worry about the lifespan of
> > > > > the larger object (which the original patch was not doing.)
> > > > > 
> > > > > Does that make sense?
> > > > > 
> > > > That part makes sense, yes, thanks. The part that I'm trying to straighten
> > > > out is, why was kobject even added to the struct cma_stat in the first
> > > > place? Why not just leave .stat as a static member variable, without
> > > > a kobject in it, and done?
> > > > 
> > > 
> > > Sorry, I think I get it now: this is in order to allow a separate lifetime
> > > for the .stat member. I was sort of implicitly assuming that the "right"
> > > way to do it is just have the whole object use one lifetime management,
> > > but as you say, there is no kobject being added to the parent.
> > > 
> > > I still feel odd about the allocation and freeing of something that seems
> > > to be logically the same lifetime (other than perhaps a few, briefly pending
> > > sysfs reads, at the end of life). So I'd still think that the kobject should
> > > be added to the parent...
> 
> sruct cma_stat {
> 	spinlock_t lock;
> 	unsigned long pages_attemtp;
> 	unsigned long pages_fail;
> };
> 
> struct cma {
> 	..
> 	..
> 	struct kobject kobj;
> 	struct cma_stat stat;
> };
> 
> I guess this is what Johan suggested. I agree with it.
> 
> > 
> > That's fine if you want to add it to the parent.  If so, then the
> > kobject controls the lifetime of the structure, nothing else can.
> 
> The problem was parent object(i.e., struct cma cma_areas) is
> static arrary so kobj->release function will be NULL or just
> dummy. Is it okay? I thought it was one of the what you wanted
> to avoid it.

No, that is not ok.

> > Either is fine with me, what is "forbidden" is having a kobject and
> > somehow thinking that it does not control the lifetime of the structure.
> 
> Since parent object is static arrary, there is no need to control the
> lifetime so I am curious if parent object approach is okay from kobject
> handling point of view.

So the array is _NEVER_ freed?  If not, fine, don't provide a release
function for the kobject, but ick, just make a dynamic kobject I don't
see the problem for something so tiny and not very many...

I worry that any static kobject might be copied/pasted as someone might
think this is an ok thing to do.  And it's not an ok thing to do.

thanks,

greg k-h
John Hubbard Feb. 9, 2021, 8:11 p.m. UTC | #13
On 2/9/21 9:49 AM, Greg KH wrote:
>>> That's fine if you want to add it to the parent.  If so, then the
>>> kobject controls the lifetime of the structure, nothing else can.
>>
>> The problem was parent object(i.e., struct cma cma_areas) is
>> static arrary so kobj->release function will be NULL or just
>> dummy. Is it okay? I thought it was one of the what you wanted
>> to avoid it.
> 
> No, that is not ok.
> 
>>> Either is fine with me, what is "forbidden" is having a kobject and
>>> somehow thinking that it does not control the lifetime of the structure.
>>
>> Since parent object is static arrary, there is no need to control the
>> lifetime so I am curious if parent object approach is okay from kobject
>> handling point of view.
> 
> So the array is _NEVER_ freed?  If not, fine, don't provide a release
> function for the kobject, but ick, just make a dynamic kobject I don't
> see the problem for something so tiny and not very many...
> 

Yeah, I wasn't trying to generate so much discussion, I initially thought it
would be a minor comment: "just use an embedded struct and avoid some extra
code", at first.

> I worry that any static kobject might be copied/pasted as someone might
> think this is an ok thing to do.  And it's not an ok thing to do.
> 

Overall, then, we're seeing that there is a small design hole: in order
to use sysfs most naturally, you either much provide a dynamically allocated
item for it, or you must use the static kobject, and the second one sets a
bad example.

I think we should just use a static kobject, with a cautionary comment to
would-be copy-pasters, that explains the design constraints above. That way,
we still get a nice, less-code implementation, a safe design, and it only
really costs us a single carefully written comment.

thanks,
Minchan Kim Feb. 9, 2021, 9:13 p.m. UTC | #14
On Tue, Feb 09, 2021 at 12:11:20PM -0800, John Hubbard wrote:
> On 2/9/21 9:49 AM, Greg KH wrote:
> > > > That's fine if you want to add it to the parent.  If so, then the
> > > > kobject controls the lifetime of the structure, nothing else can.
> > > 
> > > The problem was parent object(i.e., struct cma cma_areas) is
> > > static arrary so kobj->release function will be NULL or just
> > > dummy. Is it okay? I thought it was one of the what you wanted
> > > to avoid it.
> > 
> > No, that is not ok.
> > 
> > > > Either is fine with me, what is "forbidden" is having a kobject and
> > > > somehow thinking that it does not control the lifetime of the structure.
> > > 
> > > Since parent object is static arrary, there is no need to control the
> > > lifetime so I am curious if parent object approach is okay from kobject
> > > handling point of view.
> > 
> > So the array is _NEVER_ freed?  If not, fine, don't provide a release
> > function for the kobject, but ick, just make a dynamic kobject I don't
> > see the problem for something so tiny and not very many...
> > 
> 
> Yeah, I wasn't trying to generate so much discussion, I initially thought it
> would be a minor comment: "just use an embedded struct and avoid some extra
> code", at first.
> 
> > I worry that any static kobject might be copied/pasted as someone might
> > think this is an ok thing to do.  And it's not an ok thing to do.
> > 
> 
> Overall, then, we're seeing that there is a small design hole: in order
> to use sysfs most naturally, you either much provide a dynamically allocated
> item for it, or you must use the static kobject, and the second one sets a
> bad example.
> 
> I think we should just use a static kobject, with a cautionary comment to
> would-be copy-pasters, that explains the design constraints above. That way,
> we still get a nice, less-code implementation, a safe design, and it only
> really costs us a single carefully written comment.
> 
> thanks,

Agreed. How about this for the warning part?

+
+/*
+ * note: kobj_type should provide a release function to free dynamically
+ * allocated object since kobject is responsible for controlling lifespan
+ * of the object. However, cma_area is static object so technially, it
+ * doesn't need release function. It's very exceptional case so pleaes
+ * do not follow this model.
+ */
 static struct kobj_type cma_ktype = {
        .sysfs_ops = &kobj_sysfs_ops,
        .default_groups = cma_groups
+       .release = NULL, /* do not follow. See above */
 };
Greg Kroah-Hartman Feb. 10, 2021, 6:43 a.m. UTC | #15
On Tue, Feb 09, 2021 at 01:13:17PM -0800, Minchan Kim wrote:
> On Tue, Feb 09, 2021 at 12:11:20PM -0800, John Hubbard wrote:
> > On 2/9/21 9:49 AM, Greg KH wrote:
> > > > > That's fine if you want to add it to the parent.  If so, then the
> > > > > kobject controls the lifetime of the structure, nothing else can.
> > > > 
> > > > The problem was parent object(i.e., struct cma cma_areas) is
> > > > static arrary so kobj->release function will be NULL or just
> > > > dummy. Is it okay? I thought it was one of the what you wanted
> > > > to avoid it.
> > > 
> > > No, that is not ok.
> > > 
> > > > > Either is fine with me, what is "forbidden" is having a kobject and
> > > > > somehow thinking that it does not control the lifetime of the structure.
> > > > 
> > > > Since parent object is static arrary, there is no need to control the
> > > > lifetime so I am curious if parent object approach is okay from kobject
> > > > handling point of view.
> > > 
> > > So the array is _NEVER_ freed?  If not, fine, don't provide a release
> > > function for the kobject, but ick, just make a dynamic kobject I don't
> > > see the problem for something so tiny and not very many...
> > > 
> > 
> > Yeah, I wasn't trying to generate so much discussion, I initially thought it
> > would be a minor comment: "just use an embedded struct and avoid some extra
> > code", at first.
> > 
> > > I worry that any static kobject might be copied/pasted as someone might
> > > think this is an ok thing to do.  And it's not an ok thing to do.
> > > 
> > 
> > Overall, then, we're seeing that there is a small design hole: in order
> > to use sysfs most naturally, you either much provide a dynamically allocated
> > item for it, or you must use the static kobject, and the second one sets a
> > bad example.
> > 
> > I think we should just use a static kobject, with a cautionary comment to
> > would-be copy-pasters, that explains the design constraints above. That way,
> > we still get a nice, less-code implementation, a safe design, and it only
> > really costs us a single carefully written comment.
> > 
> > thanks,
> 
> Agreed. How about this for the warning part?
> 
> +
> +/*
> + * note: kobj_type should provide a release function to free dynamically
> + * allocated object since kobject is responsible for controlling lifespan
> + * of the object. However, cma_area is static object so technially, it
> + * doesn't need release function. It's very exceptional case so pleaes
> + * do not follow this model.
> + */
>  static struct kobj_type cma_ktype = {
>         .sysfs_ops = &kobj_sysfs_ops,
>         .default_groups = cma_groups
> +       .release = NULL, /* do not follow. See above */
>  };
> 

No, please no.  Just do it the correct way, what is the objection to
creating a few dynamic kobjects from the heap?  How many of these are
you going to have that it will somehow be "wasteful"?

Please do it properly.

thanks,

greg k-h
Minchan Kim Feb. 10, 2021, 7:12 a.m. UTC | #16
On Wed, Feb 10, 2021 at 07:43:37AM +0100, Greg KH wrote:
> On Tue, Feb 09, 2021 at 01:13:17PM -0800, Minchan Kim wrote:
> > On Tue, Feb 09, 2021 at 12:11:20PM -0800, John Hubbard wrote:
> > > On 2/9/21 9:49 AM, Greg KH wrote:
> > > > > > That's fine if you want to add it to the parent.  If so, then the
> > > > > > kobject controls the lifetime of the structure, nothing else can.
> > > > > 
> > > > > The problem was parent object(i.e., struct cma cma_areas) is
> > > > > static arrary so kobj->release function will be NULL or just
> > > > > dummy. Is it okay? I thought it was one of the what you wanted
> > > > > to avoid it.
> > > > 
> > > > No, that is not ok.
> > > > 
> > > > > > Either is fine with me, what is "forbidden" is having a kobject and
> > > > > > somehow thinking that it does not control the lifetime of the structure.
> > > > > 
> > > > > Since parent object is static arrary, there is no need to control the
> > > > > lifetime so I am curious if parent object approach is okay from kobject
> > > > > handling point of view.
> > > > 
> > > > So the array is _NEVER_ freed?  If not, fine, don't provide a release
> > > > function for the kobject, but ick, just make a dynamic kobject I don't
> > > > see the problem for something so tiny and not very many...
> > > > 
> > > 
> > > Yeah, I wasn't trying to generate so much discussion, I initially thought it
> > > would be a minor comment: "just use an embedded struct and avoid some extra
> > > code", at first.
> > > 
> > > > I worry that any static kobject might be copied/pasted as someone might
> > > > think this is an ok thing to do.  And it's not an ok thing to do.
> > > > 
> > > 
> > > Overall, then, we're seeing that there is a small design hole: in order
> > > to use sysfs most naturally, you either much provide a dynamically allocated
> > > item for it, or you must use the static kobject, and the second one sets a
> > > bad example.
> > > 
> > > I think we should just use a static kobject, with a cautionary comment to
> > > would-be copy-pasters, that explains the design constraints above. That way,
> > > we still get a nice, less-code implementation, a safe design, and it only
> > > really costs us a single carefully written comment.
> > > 
> > > thanks,
> > 
> > Agreed. How about this for the warning part?
> > 
> > +
> > +/*
> > + * note: kobj_type should provide a release function to free dynamically
> > + * allocated object since kobject is responsible for controlling lifespan
> > + * of the object. However, cma_area is static object so technially, it
> > + * doesn't need release function. It's very exceptional case so pleaes
> > + * do not follow this model.
> > + */
> >  static struct kobj_type cma_ktype = {
> >         .sysfs_ops = &kobj_sysfs_ops,
> >         .default_groups = cma_groups
> > +       .release = NULL, /* do not follow. See above */
> >  };
> > 
> 
> No, please no.  Just do it the correct way, what is the objection to
> creating a few dynamic kobjects from the heap?  How many of these are
> you going to have that it will somehow be "wasteful"?
> 
> Please do it properly.

Oh, I misunderstood your word "don't provide a release function for the
kobject" so thought you agreed on John. If you didn't, we are stuck again:
IIUC, the objection from John was the cma_stat lifetime should be on parent
object, which is reasonable and make code simple.
Frankly speaking, I don't have strong opinion about either approach.
John?
John Hubbard Feb. 10, 2021, 7:16 a.m. UTC | #17
On 2/9/21 11:12 PM, Minchan Kim wrote:
...
>>> Agreed. How about this for the warning part?
>>>
>>> +
>>> +/*
>>> + * note: kobj_type should provide a release function to free dynamically
>>> + * allocated object since kobject is responsible for controlling lifespan
>>> + * of the object. However, cma_area is static object so technially, it
>>> + * doesn't need release function. It's very exceptional case so pleaes
>>> + * do not follow this model.
>>> + */
>>>   static struct kobj_type cma_ktype = {
>>>          .sysfs_ops = &kobj_sysfs_ops,
>>>          .default_groups = cma_groups
>>> +       .release = NULL, /* do not follow. See above */
>>>   };
>>>
>>
>> No, please no.  Just do it the correct way, what is the objection to
>> creating a few dynamic kobjects from the heap?  How many of these are
>> you going to have that it will somehow be "wasteful"?
>>
>> Please do it properly.
> 
> Oh, I misunderstood your word "don't provide a release function for the
> kobject" so thought you agreed on John. If you didn't, we are stuck again:
> IIUC, the objection from John was the cma_stat lifetime should be on parent
> object, which is reasonable and make code simple.
> Frankly speaking, I don't have strong opinion about either approach.
> John?
> 

We should do it as Greg requests, now that it's quite clear that he's insisting
on this. Not a big deal.

I just am not especially happy about the inability to do natural, efficient
things here, such as use a statically allocated set of things with sysfs. And
I remain convinced that the above is not "improper"; it's a reasonable
step, given the limitations of the current sysfs design. I just wanted to say
that out loud, as my proposal sinks to the bottom of the trench here. haha :)


thanks,
Greg Kroah-Hartman Feb. 10, 2021, 7:26 a.m. UTC | #18
On Tue, Feb 09, 2021 at 11:16:07PM -0800, John Hubbard wrote:
> On 2/9/21 11:12 PM, Minchan Kim wrote:
> ...
> > > > Agreed. How about this for the warning part?
> > > > 
> > > > +
> > > > +/*
> > > > + * note: kobj_type should provide a release function to free dynamically
> > > > + * allocated object since kobject is responsible for controlling lifespan
> > > > + * of the object. However, cma_area is static object so technially, it
> > > > + * doesn't need release function. It's very exceptional case so pleaes
> > > > + * do not follow this model.
> > > > + */
> > > >   static struct kobj_type cma_ktype = {
> > > >          .sysfs_ops = &kobj_sysfs_ops,
> > > >          .default_groups = cma_groups
> > > > +       .release = NULL, /* do not follow. See above */
> > > >   };
> > > > 
> > > 
> > > No, please no.  Just do it the correct way, what is the objection to
> > > creating a few dynamic kobjects from the heap?  How many of these are
> > > you going to have that it will somehow be "wasteful"?
> > > 
> > > Please do it properly.
> > 
> > Oh, I misunderstood your word "don't provide a release function for the
> > kobject" so thought you agreed on John. If you didn't, we are stuck again:
> > IIUC, the objection from John was the cma_stat lifetime should be on parent
> > object, which is reasonable and make code simple.
> > Frankly speaking, I don't have strong opinion about either approach.
> > John?
> > 
> 
> We should do it as Greg requests, now that it's quite clear that he's insisting
> on this. Not a big deal.
> 
> I just am not especially happy about the inability to do natural, efficient
> things here, such as use a statically allocated set of things with sysfs. And
> I remain convinced that the above is not "improper"; it's a reasonable
> step, given the limitations of the current sysfs design. I just wanted to say
> that out loud, as my proposal sinks to the bottom of the trench here. haha :)

What is "odd" is that you are creating an object in the kernel that you
_never_ free.  That's not normal at all in the kernel, and so, your wish
to have a kobject that you never free represent this object also is not
normal :)

thanks,

greg k-h
John Hubbard Feb. 10, 2021, 7:50 a.m. UTC | #19
On 2/9/21 11:26 PM, Greg KH wrote:
...
>> I just am not especially happy about the inability to do natural, efficient
>> things here, such as use a statically allocated set of things with sysfs. And
>> I remain convinced that the above is not "improper"; it's a reasonable
>> step, given the limitations of the current sysfs design. I just wanted to say
>> that out loud, as my proposal sinks to the bottom of the trench here. haha :)
> 
> What is "odd" is that you are creating an object in the kernel that you
> _never_ free.  That's not normal at all in the kernel, and so, your wish
> to have a kobject that you never free represent this object also is not
> normal :)
> 

OK, thanks for taking the time to explain that, much appreciated!


thanks,
diff mbox series

Patch

diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-cma b/Documentation/ABI/testing/sysfs-kernel-mm-cma
new file mode 100644
index 000000000000..68bdcc8c7681
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-cma
@@ -0,0 +1,25 @@ 
+What:		/sys/kernel/mm/cma/
+Date:		Feb 2021
+Contact:	Minchan Kim <minchan@kernel.org>
+Description:
+		/sys/kernel/mm/cma/ contains a number of subdirectories by
+		cma-heap name. The subdirectory contains a number of files
+		to represent cma allocation statistics.
+
+		There are number of files under
+				/sys/kernel/mm/cma/<cma-heap-name> directory
+
+			- cma_alloc_pages_attempt
+			- cma_alloc_pages_fail
+
+What:		/sys/kernel/mm/cma/<cma-heap-name>/cma_alloc_pages_attempt
+Date:		Feb 2021
+Contact:	Minchan Kim <minchan@kernel.org>
+Description:
+		the number of pages CMA API tried to allocate
+
+What:		/sys/kernel/mm/cma/<cma-heap-name>/cma_alloc_pages_fail
+Date:		Feb 2021
+Contact:	Minchan Kim <minchan@kernel.org>
+Description:
+		the number of pages CMA API failed to allocate
diff --git a/mm/Kconfig b/mm/Kconfig
index ec35bf406439..ad7e9c065657 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -513,6 +513,13 @@  config CMA_DEBUGFS
 	help
 	  Turns on the DebugFS interface for CMA.
 
+config CMA_SYSFS
+	bool "CMA information through sysfs interface"
+	depends on CMA && SYSFS
+	help
+	  This option exposes some sysfs attributes to get information
+	  from CMA.
+
 config CMA_AREAS
 	int "Maximum count of the CMA areas"
 	depends on CMA
diff --git a/mm/Makefile b/mm/Makefile
index b2a564eec27f..0ae764e5b1a8 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -109,6 +109,7 @@  obj-$(CONFIG_CMA)	+= cma.o
 obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
 obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
 obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
+obj-$(CONFIG_CMA_SYSFS)	+= cma_sysfs.o
 obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
 obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
 obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
diff --git a/mm/cma.c b/mm/cma.c
index 23d4a97c834a..0611202d6e7d 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -447,9 +447,10 @@  struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
 	offset = cma_bitmap_aligned_offset(cma, align);
 	bitmap_maxno = cma_bitmap_maxno(cma);
 	bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+	cma_sysfs_alloc_count(cma, count);
 
 	if (bitmap_count > bitmap_maxno)
-		return NULL;
+		goto out;
 
 	for (;;) {
 		mutex_lock(&cma->lock);
@@ -504,6 +505,9 @@  struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
 			__func__, count, ret);
 		cma_debug_show_areas(cma);
 	}
+out:
+	if (!page)
+		cma_sysfs_fail(cma, count);
 
 	pr_debug("%s(): returned %p\n", __func__, page);
 	return page;
diff --git a/mm/cma.h b/mm/cma.h
index 42ae082cb067..49a8ceddd9e8 100644
--- a/mm/cma.h
+++ b/mm/cma.h
@@ -3,6 +3,14 @@ 
 #define __MM_CMA_H__
 
 #include <linux/debugfs.h>
+#include <linux/kobject.h>
+
+struct cma_stat {
+	spinlock_t lock;
+	unsigned long pages_attempt;	/* the number of CMA page allocation attempts */
+	unsigned long pages_fail;	/* the number of CMA page allocation failures */
+	struct kobject kobj;
+};
 
 struct cma {
 	unsigned long   base_pfn;
@@ -16,6 +24,9 @@  struct cma {
 	struct debugfs_u32_array dfs_bitmap;
 #endif
 	char name[CMA_MAX_NAME];
+#ifdef CONFIG_CMA_SYSFS
+	struct cma_stat	*stat;
+#endif
 };
 
 extern struct cma cma_areas[MAX_CMA_AREAS];
@@ -26,4 +37,11 @@  static inline unsigned long cma_bitmap_maxno(struct cma *cma)
 	return cma->count >> cma->order_per_bit;
 }
 
+#ifdef CONFIG_CMA_SYSFS
+void cma_sysfs_alloc_count(struct cma *cma, size_t count);
+void cma_sysfs_fail(struct cma *cma, size_t count);
+#else
+static inline void cma_sysfs_alloc_count(struct cma *cma, size_t count) {};
+static inline void cma_sysfs_fail(struct cma *cma, size_t count) {};
+#endif
 #endif
diff --git a/mm/cma_sysfs.c b/mm/cma_sysfs.c
new file mode 100644
index 000000000000..1f6b9f785825
--- /dev/null
+++ b/mm/cma_sysfs.c
@@ -0,0 +1,114 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CMA SysFS Interface
+ *
+ * Copyright (c) 2021 Minchan Kim <minchan@kernel.org>
+ */
+
+#include <linux/cma.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "cma.h"
+
+static struct cma_stat *cma_stats;
+
+void cma_sysfs_alloc_count(struct cma *cma, size_t count)
+{
+	spin_lock(&cma->stat->lock);
+	cma->stat->pages_attempt += count;
+	spin_unlock(&cma->stat->lock);
+}
+
+void cma_sysfs_fail(struct cma *cma, size_t count)
+{
+	spin_lock(&cma->stat->lock);
+	cma->stat->pages_fail += count;
+	spin_unlock(&cma->stat->lock);
+}
+
+#define CMA_ATTR_RO(_name) \
+	static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+
+static struct kobject *cma_kobj;
+
+static ssize_t cma_alloc_pages_attempt_show(struct kobject *kobj,
+			struct kobj_attribute *attr, char *buf)
+{
+	struct cma_stat *stat = container_of(kobj, struct cma_stat, kobj);
+
+	return sysfs_emit(buf, "%lu\n", stat->pages_attempt);
+}
+CMA_ATTR_RO(cma_alloc_pages_attempt);
+
+static ssize_t cma_alloc_pages_fail_show(struct kobject *kobj,
+			struct kobj_attribute *attr, char *buf)
+{
+	struct cma_stat *stat = container_of(kobj, struct cma_stat, kobj);
+
+	return sysfs_emit(buf, "%lu\n", stat->pages_fail);
+}
+CMA_ATTR_RO(cma_alloc_pages_fail);
+
+static void cma_kobj_release(struct kobject *kobj)
+{
+	struct cma_stat *stat = container_of(kobj, struct cma_stat, kobj);
+
+	kfree(stat);
+}
+
+static struct attribute *cma_attrs[] = {
+	&cma_alloc_pages_attempt_attr.attr,
+	&cma_alloc_pages_fail_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(cma);
+
+static struct kobj_type cma_ktype = {
+	.release = cma_kobj_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = cma_groups
+};
+
+static int __init cma_sysfs_init(void)
+{
+	int i = 0;
+	struct cma *cma;
+
+	cma_kobj = kobject_create_and_add("cma", mm_kobj);
+	if (!cma_kobj) {
+		pr_err("failed to create cma kobject\n");
+		return -ENOMEM;
+	}
+
+	cma_stats = kzalloc(array_size(sizeof(struct cma_stat),
+				cma_area_count), GFP_KERNEL);
+	if (!cma_stats) {
+		pr_err("failed to create cma_stats\n");
+		goto out;
+	}
+
+	do {
+		cma = &cma_areas[i];
+		cma->stat = &cma_stats[i];
+		spin_lock_init(&cma->stat->lock);
+		if (kobject_init_and_add(&cma->stat->kobj, &cma_ktype,
+					cma_kobj, "%s", cma->name)) {
+			kobject_put(&cma->stat->kobj);
+			goto out;
+		}
+	} while (++i < cma_area_count)
+
+	return 0;
+out:
+	while (--i >= 0) {
+		cma = &cma_areas[i];
+		kobject_put(&cma->stat->kobj);
+	}
+
+	kfree(cma_stats);
+	kobject_put(cma_kobj);
+
+	return -ENOMEM;
+}
+subsys_initcall(cma_sysfs_init);