diff mbox series

[v2] memcg: replace memcg ID idr with xarray

Message ID 20240815155402.3630804-1-shakeel.butt@linux.dev (mailing list archive)
State New
Headers show
Series [v2] memcg: replace memcg ID idr with xarray | expand

Commit Message

Shakeel Butt Aug. 15, 2024, 3:54 p.m. UTC
At the moment memcg IDs are managed through IDR which requires external
synchronization mechanisms and makes the allocation code a bit awkward.
Let's switch to xarray and make the code simpler.

Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Suggested-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

Changes since v1:
- Fix error path in mem_cgroup_alloc (Dan Carpenter)

 mm/memcontrol.c | 39 ++++++++++-----------------------------
 1 file changed, 10 insertions(+), 29 deletions(-)

Comments

Roman Gushchin Aug. 15, 2024, 7:31 p.m. UTC | #1
On Thu, Aug 15, 2024 at 08:54:02AM -0700, Shakeel Butt wrote:
> At the moment memcg IDs are managed through IDR which requires external
> synchronization mechanisms and makes the allocation code a bit awkward.
> Let's switch to xarray and make the code simpler.
> 
> Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
> Suggested-by: Matthew Wilcox <willy@infradead.org>
> Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
> Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Acked-by: Johannes Weiner <hannes@cmpxchg.org>
> Reviewed-by: Muchun Song <muchun.song@linux.dev>
> Acked-by: Michal Hocko <mhocko@suse.com>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> ---
> 
> Changes since v1:
> - Fix error path in mem_cgroup_alloc (Dan Carpenter)
> 
>  mm/memcontrol.c | 39 ++++++++++-----------------------------
>  1 file changed, 10 insertions(+), 29 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index df84683a0e1c..e8e03a5e1e5e 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3408,29 +3408,12 @@ static void memcg_wb_domain_size_changed(struct mem_cgroup *memcg)
>   */
>  
>  #define MEM_CGROUP_ID_MAX	((1UL << MEM_CGROUP_ID_SHIFT) - 1)
> -static DEFINE_IDR(mem_cgroup_idr);
> -static DEFINE_SPINLOCK(memcg_idr_lock);
> -
> -static int mem_cgroup_alloc_id(void)
> -{
> -	int ret;
> -
> -	idr_preload(GFP_KERNEL);
> -	spin_lock(&memcg_idr_lock);
> -	ret = idr_alloc(&mem_cgroup_idr, NULL, 1, MEM_CGROUP_ID_MAX + 1,
> -			GFP_NOWAIT);
> -	spin_unlock(&memcg_idr_lock);
> -	idr_preload_end();
> -	return ret;
> -}
> +static DEFINE_XARRAY_ALLOC1(mem_cgroup_ids);
>  
>  static void mem_cgroup_id_remove(struct mem_cgroup *memcg)
>  {
>  	if (memcg->id.id > 0) {
> -		spin_lock(&memcg_idr_lock);
> -		idr_remove(&mem_cgroup_idr, memcg->id.id);
> -		spin_unlock(&memcg_idr_lock);
> -
> +		xa_erase(&mem_cgroup_ids, memcg->id.id);
>  		memcg->id.id = 0;
>  	}
>  }
> @@ -3465,7 +3448,7 @@ static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
>  struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
>  {
>  	WARN_ON_ONCE(!rcu_read_lock_held());
> -	return idr_find(&mem_cgroup_idr, id);
> +	return xa_load(&mem_cgroup_ids, id);
>  }
>  
>  #ifdef CONFIG_SHRINKER_DEBUG
> @@ -3558,17 +3541,17 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent)
>  	struct mem_cgroup *memcg;
>  	int node, cpu;
>  	int __maybe_unused i;
> -	long error = -ENOMEM;
> +	long error;
>  
>  	memcg = kzalloc(struct_size(memcg, nodeinfo, nr_node_ids), GFP_KERNEL);
>  	if (!memcg)
> -		return ERR_PTR(error);
> +		return ERR_PTR(-ENOMEM);
>  
> -	memcg->id.id = mem_cgroup_alloc_id();
> -	if (memcg->id.id < 0) {
> -		error = memcg->id.id;
> +	error = xa_alloc(&mem_cgroup_ids, &memcg->id.id, NULL,
> +			 XA_LIMIT(1, MEM_CGROUP_ID_MAX), GFP_KERNEL);
> +	if (error)
>  		goto fail;
> -	}
> +	error = -ENOMEM;

There is another subtle change here: xa_alloc() returns -EBUSY in the case
of the address space exhaustion, while the old code returned -ENOSPC.
It's unlikely a big practical problem.
Matthew Wilcox Aug. 15, 2024, 9:16 p.m. UTC | #2
On Thu, Aug 15, 2024 at 07:31:43PM +0000, Roman Gushchin wrote:
> There is another subtle change here: xa_alloc() returns -EBUSY in the case
> of the address space exhaustion, while the old code returned -ENOSPC.
> It's unlikely a big practical problem.

I decided that EBUSY was the right errno for this situation;

#define EBUSY           16      /* Device or resource busy */
#define ENOSPC          28      /* No space left on device */

ENOSPC seemed wrong; the device isn't out of space.
Michal Hocko Aug. 16, 2024, 7:19 a.m. UTC | #3
On Thu 15-08-24 22:16:27, Matthew Wilcox wrote:
> On Thu, Aug 15, 2024 at 07:31:43PM +0000, Roman Gushchin wrote:
> > There is another subtle change here: xa_alloc() returns -EBUSY in the case
> > of the address space exhaustion, while the old code returned -ENOSPC.
> > It's unlikely a big practical problem.
> 
> I decided that EBUSY was the right errno for this situation;
> 
> #define EBUSY           16      /* Device or resource busy */
> #define ENOSPC          28      /* No space left on device */
> 
> ENOSPC seemed wrong; the device isn't out of space.

The thing is that this is observable by userspace - mkdir would return a
different and potentially unexpected errno. We can try and see whether
anybody complains or just translate the error.
Andrew Morton Aug. 16, 2024, 7:43 a.m. UTC | #4
On Fri, 16 Aug 2024 09:19:58 +0200 Michal Hocko <mhocko@suse.com> wrote:

> On Thu 15-08-24 22:16:27, Matthew Wilcox wrote:
> > On Thu, Aug 15, 2024 at 07:31:43PM +0000, Roman Gushchin wrote:
> > > There is another subtle change here: xa_alloc() returns -EBUSY in the case
> > > of the address space exhaustion, while the old code returned -ENOSPC.
> > > It's unlikely a big practical problem.
> > 
> > I decided that EBUSY was the right errno for this situation;
> > 
> > #define EBUSY           16      /* Device or resource busy */
> > #define ENOSPC          28      /* No space left on device */
> > 
> > ENOSPC seemed wrong; the device isn't out of space.
> 
> The thing is that this is observable by userspace - mkdir would return a
> different and potentially unexpected errno. We can try and see whether
> anybody complains or just translate the error.

The mkdir(2) manpage doesn't list EBUSY.  Maybe ENOMEM is close enough.
Michal Hocko Aug. 16, 2024, 8:18 a.m. UTC | #5
On Fri 16-08-24 00:43:34, Andrew Morton wrote:
> On Fri, 16 Aug 2024 09:19:58 +0200 Michal Hocko <mhocko@suse.com> wrote:
> 
> > On Thu 15-08-24 22:16:27, Matthew Wilcox wrote:
> > > On Thu, Aug 15, 2024 at 07:31:43PM +0000, Roman Gushchin wrote:
> > > > There is another subtle change here: xa_alloc() returns -EBUSY in the case
> > > > of the address space exhaustion, while the old code returned -ENOSPC.
> > > > It's unlikely a big practical problem.
> > > 
> > > I decided that EBUSY was the right errno for this situation;
> > > 
> > > #define EBUSY           16      /* Device or resource busy */
> > > #define ENOSPC          28      /* No space left on device */
> > > 
> > > ENOSPC seemed wrong; the device isn't out of space.
> > 
> > The thing is that this is observable by userspace - mkdir would return a
> > different and potentially unexpected errno. We can try and see whether
> > anybody complains or just translate the error.
> 
> The mkdir(2) manpage doesn't list EBUSY.  Maybe ENOMEM is close enough.

it used to report ENOSPC with xarray.
diff mbox series

Patch

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index df84683a0e1c..e8e03a5e1e5e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3408,29 +3408,12 @@  static void memcg_wb_domain_size_changed(struct mem_cgroup *memcg)
  */
 
 #define MEM_CGROUP_ID_MAX	((1UL << MEM_CGROUP_ID_SHIFT) - 1)
-static DEFINE_IDR(mem_cgroup_idr);
-static DEFINE_SPINLOCK(memcg_idr_lock);
-
-static int mem_cgroup_alloc_id(void)
-{
-	int ret;
-
-	idr_preload(GFP_KERNEL);
-	spin_lock(&memcg_idr_lock);
-	ret = idr_alloc(&mem_cgroup_idr, NULL, 1, MEM_CGROUP_ID_MAX + 1,
-			GFP_NOWAIT);
-	spin_unlock(&memcg_idr_lock);
-	idr_preload_end();
-	return ret;
-}
+static DEFINE_XARRAY_ALLOC1(mem_cgroup_ids);
 
 static void mem_cgroup_id_remove(struct mem_cgroup *memcg)
 {
 	if (memcg->id.id > 0) {
-		spin_lock(&memcg_idr_lock);
-		idr_remove(&mem_cgroup_idr, memcg->id.id);
-		spin_unlock(&memcg_idr_lock);
-
+		xa_erase(&mem_cgroup_ids, memcg->id.id);
 		memcg->id.id = 0;
 	}
 }
@@ -3465,7 +3448,7 @@  static inline void mem_cgroup_id_put(struct mem_cgroup *memcg)
 struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 {
 	WARN_ON_ONCE(!rcu_read_lock_held());
-	return idr_find(&mem_cgroup_idr, id);
+	return xa_load(&mem_cgroup_ids, id);
 }
 
 #ifdef CONFIG_SHRINKER_DEBUG
@@ -3558,17 +3541,17 @@  static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent)
 	struct mem_cgroup *memcg;
 	int node, cpu;
 	int __maybe_unused i;
-	long error = -ENOMEM;
+	long error;
 
 	memcg = kzalloc(struct_size(memcg, nodeinfo, nr_node_ids), GFP_KERNEL);
 	if (!memcg)
-		return ERR_PTR(error);
+		return ERR_PTR(-ENOMEM);
 
-	memcg->id.id = mem_cgroup_alloc_id();
-	if (memcg->id.id < 0) {
-		error = memcg->id.id;
+	error = xa_alloc(&mem_cgroup_ids, &memcg->id.id, NULL,
+			 XA_LIMIT(1, MEM_CGROUP_ID_MAX), GFP_KERNEL);
+	if (error)
 		goto fail;
-	}
+	error = -ENOMEM;
 
 	memcg->vmstats = kzalloc(sizeof(struct memcg_vmstats),
 				 GFP_KERNEL_ACCOUNT);
@@ -3709,9 +3692,7 @@  static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	 * publish it here at the end of onlining. This matches the
 	 * regular ID destruction during offlining.
 	 */
-	spin_lock(&memcg_idr_lock);
-	idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
-	spin_unlock(&memcg_idr_lock);
+	xa_store(&mem_cgroup_ids, memcg->id.id, memcg, GFP_KERNEL);
 
 	return 0;
 offline_kmem: