diff mbox

[3/9] blkio-cgroup-v9: The new page_cgroup framework

Message ID 20090721.231211.71098738.ryov@valinux.co.jp (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Ryo Tsuruta July 21, 2009, 2:12 p.m. UTC
This patch makes the page_cgroup framework be able to be used even if
the compile option of the cgroup memory controller is off.
So blkio-cgroup can use this framework without the memory controller.

Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>
Signed-off-by: Ryo Tsuruta <ryov@valinux.co.jp>

---
 include/linux/memcontrol.h  |    6 ++++++
 include/linux/mmzone.h      |    4 ++--
 include/linux/page_cgroup.h |    8 +++++---
 init/Kconfig                |    4 ++++
 mm/Makefile                 |    3 ++-
 mm/memcontrol.c             |    6 ++++++
 mm/page_cgroup.c            |    3 +--
 7 files changed, 26 insertions(+), 8 deletions(-)


--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Comments

Balbir Singh July 21, 2009, 3:56 p.m. UTC | #1
* Ryo Tsuruta <ryov@valinux.co.jp> [2009-07-21 23:12:11]:

> This patch makes the page_cgroup framework be able to be used even if
> the compile option of the cgroup memory controller is off.
> So blkio-cgroup can use this framework without the memory controller.
> 
> Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>
> Signed-off-by: Ryo Tsuruta <ryov@valinux.co.jp>
> 
> ---
>  include/linux/memcontrol.h  |    6 ++++++
>  include/linux/mmzone.h      |    4 ++--
>  include/linux/page_cgroup.h |    8 +++++---
>  init/Kconfig                |    4 ++++
>  mm/Makefile                 |    3 ++-
>  mm/memcontrol.c             |    6 ++++++
>  mm/page_cgroup.c            |    3 +--
>  7 files changed, 26 insertions(+), 8 deletions(-)
> 
> Index: linux-2.6.31-rc3/include/linux/memcontrol.h
> ===================================================================
> --- linux-2.6.31-rc3.orig/include/linux/memcontrol.h
> +++ linux-2.6.31-rc3/include/linux/memcontrol.h
> @@ -37,6 +37,8 @@ struct mm_struct;
>   * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
>   */
> 
> +extern void __init_mem_page_cgroup(struct page_cgroup *pc);
> +
>  extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
>  				gfp_t gfp_mask);
>  /* for swap handling */
> @@ -121,6 +123,10 @@ void mem_cgroup_update_mapped_file_stat(
>  #else /* CONFIG_CGROUP_MEM_RES_CTLR */
>  struct mem_cgroup;
> 
> +static inline void __init_mem_page_cgroup(struct page_cgroup *pc)
> +{
> +}
> +
>  static inline int mem_cgroup_newpage_charge(struct page *page,
>  					struct mm_struct *mm, gfp_t gfp_mask)
>  {
> Index: linux-2.6.31-rc3/include/linux/mmzone.h
> ===================================================================
> --- linux-2.6.31-rc3.orig/include/linux/mmzone.h
> +++ linux-2.6.31-rc3/include/linux/mmzone.h
> @@ -605,7 +605,7 @@ typedef struct pglist_data {
>  	int nr_zones;
>  #ifdef CONFIG_FLAT_NODE_MEM_MAP	/* means !SPARSEMEM */
>  	struct page *node_mem_map;
> -#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> +#ifdef CONFIG_CGROUP_PAGE
>  	struct page_cgroup *node_page_cgroup;
>  #endif
>  #endif
> @@ -956,7 +956,7 @@ struct mem_section {
> 
>  	/* See declaration of similar field in struct zone */
>  	unsigned long *pageblock_flags;
> -#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> +#ifdef CONFIG_CGROUP_PAGE
>  	/*
>  	 * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
>  	 * section. (see memcontrol.h/page_cgroup.h about this.)
> Index: linux-2.6.31-rc3/include/linux/page_cgroup.h
> ===================================================================
> --- linux-2.6.31-rc3.orig/include/linux/page_cgroup.h
> +++ linux-2.6.31-rc3/include/linux/page_cgroup.h
> @@ -1,7 +1,7 @@
>  #ifndef __LINUX_PAGE_CGROUP_H
>  #define __LINUX_PAGE_CGROUP_H
> 
> -#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> +#ifdef CONFIG_CGROUP_PAGE
>  #include <linux/bit_spinlock.h>
>  /*
>   * Page Cgroup can be considered as an extended mem_map.
> @@ -12,9 +12,11 @@
>   */
>  struct page_cgroup {
>  	unsigned long flags;
> -	struct mem_cgroup *mem_cgroup;
>  	struct page *page;
> +#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> +	struct mem_cgroup *mem_cgroup;
>  	struct list_head lru;		/* per cgroup LRU list */
> +#endif
>  };

If CONFIG_CGROUP_MEM_RES_CTLR is not enabled and CGROUP_PAGE is
(assuming that the depends on below is refactored), what would this
change buy us? What is page_cgroup helping us track, the mem_cgroup is
factored out, so we are interested in the flags only?


> 
>  void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
> @@ -83,7 +85,7 @@ static inline void unlock_page_cgroup(st
>  	bit_spin_unlock(PCG_LOCK, &pc->flags);
>  }
> 
> -#else /* CONFIG_CGROUP_MEM_RES_CTLR */
> +#else /* CONFIG_CGROUP_PAGE */
>  struct page_cgroup;
> 
>  static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
> Index: linux-2.6.31-rc3/init/Kconfig
> ===================================================================
> --- linux-2.6.31-rc3.orig/init/Kconfig
> +++ linux-2.6.31-rc3/init/Kconfig
> @@ -614,6 +614,10 @@ config CGROUP_MEM_RES_CTLR_SWAP
> 
>  endif # CGROUPS
> 
> +config CGROUP_PAGE
> +	def_bool y

Should def_bool be "y"? Shouldn't the CGROUP_MEM_RES_CTLR select it.

> +	depends on CGROUP_MEM_RES_CTLR
> +
>  config MM_OWNER
>  	bool
> 
> Index: linux-2.6.31-rc3/mm/Makefile
> ===================================================================
> --- linux-2.6.31-rc3.orig/mm/Makefile
> +++ linux-2.6.31-rc3/mm/Makefile
> @@ -39,6 +39,7 @@ else
>  obj-$(CONFIG_SMP) += allocpercpu.o
>  endif
>  obj-$(CONFIG_QUICKLIST) += quicklist.o
> -obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
> +obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o
> +obj-$(CONFIG_CGROUP_PAGE) += page_cgroup.o
>  obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
>  obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
> Index: linux-2.6.31-rc3/mm/memcontrol.c
> ===================================================================
> --- linux-2.6.31-rc3.orig/mm/memcontrol.c
> +++ linux-2.6.31-rc3/mm/memcontrol.c
> @@ -129,6 +129,12 @@ struct mem_cgroup_lru_info {
>  	struct mem_cgroup_per_node *nodeinfo[MAX_NUMNODES];
>  };
> 
> +void __meminit __init_mem_page_cgroup(struct page_cgroup *pc)
> +{
> +	pc->mem_cgroup = NULL;
> +	INIT_LIST_HEAD(&pc->lru);
> +}
> +
>  /*
>   * The memory controller data structure. The memory controller controls both
>   * page cache and RSS per cgroup. We would eventually like to provide
> Index: linux-2.6.31-rc3/mm/page_cgroup.c
> ===================================================================
> --- linux-2.6.31-rc3.orig/mm/page_cgroup.c
> +++ linux-2.6.31-rc3/mm/page_cgroup.c
> @@ -14,9 +14,8 @@ static void __meminit
>  __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
>  {
>  	pc->flags = 0;
> -	pc->mem_cgroup = NULL;
>  	pc->page = pfn_to_page(pfn);
> -	INIT_LIST_HEAD(&pc->lru);
> +	__init_mem_page_cgroup(pc);
>  }
>  static unsigned long total_usage;
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>
KAMEZAWA Hiroyuki July 22, 2009, 1:20 a.m. UTC | #2
On Tue, 21 Jul 2009 21:26:36 +0530
Balbir Singh <balbir@linux.vnet.ibm.com> wrote:

> * Ryo Tsuruta <ryov@valinux.co.jp> [2009-07-21 23:12:11]:
> 
> > This patch makes the page_cgroup framework be able to be used even if
> > the compile option of the cgroup memory controller is off.
> > So blkio-cgroup can use this framework without the memory controller.
> > 
> > Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>
> > Signed-off-by: Ryo Tsuruta <ryov@valinux.co.jp>
> > 
> > ---
> >  include/linux/memcontrol.h  |    6 ++++++
> >  include/linux/mmzone.h      |    4 ++--
> >  include/linux/page_cgroup.h |    8 +++++---
> >  init/Kconfig                |    4 ++++
> >  mm/Makefile                 |    3 ++-
> >  mm/memcontrol.c             |    6 ++++++
> >  mm/page_cgroup.c            |    3 +--
> >  7 files changed, 26 insertions(+), 8 deletions(-)
> > 
> > Index: linux-2.6.31-rc3/include/linux/memcontrol.h
> > ===================================================================
> > --- linux-2.6.31-rc3.orig/include/linux/memcontrol.h
> > +++ linux-2.6.31-rc3/include/linux/memcontrol.h
> > @@ -37,6 +37,8 @@ struct mm_struct;
> >   * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
> >   */
> > 
> > +extern void __init_mem_page_cgroup(struct page_cgroup *pc);
> > +
> >  extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
> >  				gfp_t gfp_mask);
> >  /* for swap handling */
> > @@ -121,6 +123,10 @@ void mem_cgroup_update_mapped_file_stat(
> >  #else /* CONFIG_CGROUP_MEM_RES_CTLR */
> >  struct mem_cgroup;
> > 
> > +static inline void __init_mem_page_cgroup(struct page_cgroup *pc)
> > +{
> > +}
> > +
> >  static inline int mem_cgroup_newpage_charge(struct page *page,
> >  					struct mm_struct *mm, gfp_t gfp_mask)
> >  {
> > Index: linux-2.6.31-rc3/include/linux/mmzone.h
> > ===================================================================
> > --- linux-2.6.31-rc3.orig/include/linux/mmzone.h
> > +++ linux-2.6.31-rc3/include/linux/mmzone.h
> > @@ -605,7 +605,7 @@ typedef struct pglist_data {
> >  	int nr_zones;
> >  #ifdef CONFIG_FLAT_NODE_MEM_MAP	/* means !SPARSEMEM */
> >  	struct page *node_mem_map;
> > -#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> > +#ifdef CONFIG_CGROUP_PAGE
> >  	struct page_cgroup *node_page_cgroup;
> >  #endif
> >  #endif
> > @@ -956,7 +956,7 @@ struct mem_section {
> > 
> >  	/* See declaration of similar field in struct zone */
> >  	unsigned long *pageblock_flags;
> > -#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> > +#ifdef CONFIG_CGROUP_PAGE
> >  	/*
> >  	 * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
> >  	 * section. (see memcontrol.h/page_cgroup.h about this.)
> > Index: linux-2.6.31-rc3/include/linux/page_cgroup.h
> > ===================================================================
> > --- linux-2.6.31-rc3.orig/include/linux/page_cgroup.h
> > +++ linux-2.6.31-rc3/include/linux/page_cgroup.h
> > @@ -1,7 +1,7 @@
> >  #ifndef __LINUX_PAGE_CGROUP_H
> >  #define __LINUX_PAGE_CGROUP_H
> > 
> > -#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> > +#ifdef CONFIG_CGROUP_PAGE
> >  #include <linux/bit_spinlock.h>
> >  /*
> >   * Page Cgroup can be considered as an extended mem_map.
> > @@ -12,9 +12,11 @@
> >   */
> >  struct page_cgroup {
> >  	unsigned long flags;
> > -	struct mem_cgroup *mem_cgroup;
> >  	struct page *page;
> > +#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> > +	struct mem_cgroup *mem_cgroup;
> >  	struct list_head lru;		/* per cgroup LRU list */
> > +#endif
> >  };
> 
> If CONFIG_CGROUP_MEM_RES_CTLR is not enabled and CGROUP_PAGE is
> (assuming that the depends on below is refactored), what would this
> change buy us? What is page_cgroup helping us track, the mem_cgroup is
> factored out, so we are interested in the flags only?
> 
plz remove CONFIG. This jsut makes code complicated.
or plz use your own infrastructure, not depends on page_cgroup.

Thanks,
-Kame

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
KAMEZAWA Hiroyuki July 22, 2009, 2:07 a.m. UTC | #3
On Wed, 22 Jul 2009 10:20:58 +0900
KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> wrote:

> On Tue, 21 Jul 2009 21:26:36 +0530
> Balbir Singh <balbir@linux.vnet.ibm.com> wrote:
> 
> > * Ryo Tsuruta <ryov@valinux.co.jp> [2009-07-21 23:12:11]:
> > 
> > > This patch makes the page_cgroup framework be able to be used even if
> > > the compile option of the cgroup memory controller is off.
> > > So blkio-cgroup can use this framework without the memory controller.
> > > 
> > > Signed-off-by: Hirokazu Takahashi <taka@valinux.co.jp>
> > > Signed-off-by: Ryo Tsuruta <ryov@valinux.co.jp>
> > > 
> > > ---
> > >  include/linux/memcontrol.h  |    6 ++++++
> > >  include/linux/mmzone.h      |    4 ++--
> > >  include/linux/page_cgroup.h |    8 +++++---
> > >  init/Kconfig                |    4 ++++
> > >  mm/Makefile                 |    3 ++-
> > >  mm/memcontrol.c             |    6 ++++++
> > >  mm/page_cgroup.c            |    3 +--
> > >  7 files changed, 26 insertions(+), 8 deletions(-)
> > > 
> > > Index: linux-2.6.31-rc3/include/linux/memcontrol.h
> > > ===================================================================
> > > --- linux-2.6.31-rc3.orig/include/linux/memcontrol.h
> > > +++ linux-2.6.31-rc3/include/linux/memcontrol.h
> > > @@ -37,6 +37,8 @@ struct mm_struct;
> > >   * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
> > >   */
> > > 
> > > +extern void __init_mem_page_cgroup(struct page_cgroup *pc);
> > > +
> > >  extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
> > >  				gfp_t gfp_mask);
> > >  /* for swap handling */
> > > @@ -121,6 +123,10 @@ void mem_cgroup_update_mapped_file_stat(
> > >  #else /* CONFIG_CGROUP_MEM_RES_CTLR */
> > >  struct mem_cgroup;
> > > 
> > > +static inline void __init_mem_page_cgroup(struct page_cgroup *pc)
> > > +{
> > > +}
> > > +
> > >  static inline int mem_cgroup_newpage_charge(struct page *page,
> > >  					struct mm_struct *mm, gfp_t gfp_mask)
> > >  {
> > > Index: linux-2.6.31-rc3/include/linux/mmzone.h
> > > ===================================================================
> > > --- linux-2.6.31-rc3.orig/include/linux/mmzone.h
> > > +++ linux-2.6.31-rc3/include/linux/mmzone.h
> > > @@ -605,7 +605,7 @@ typedef struct pglist_data {
> > >  	int nr_zones;
> > >  #ifdef CONFIG_FLAT_NODE_MEM_MAP	/* means !SPARSEMEM */
> > >  	struct page *node_mem_map;
> > > -#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> > > +#ifdef CONFIG_CGROUP_PAGE
> > >  	struct page_cgroup *node_page_cgroup;
> > >  #endif
> > >  #endif
> > > @@ -956,7 +956,7 @@ struct mem_section {
> > > 
> > >  	/* See declaration of similar field in struct zone */
> > >  	unsigned long *pageblock_flags;
> > > -#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> > > +#ifdef CONFIG_CGROUP_PAGE
> > >  	/*
> > >  	 * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
> > >  	 * section. (see memcontrol.h/page_cgroup.h about this.)
> > > Index: linux-2.6.31-rc3/include/linux/page_cgroup.h
> > > ===================================================================
> > > --- linux-2.6.31-rc3.orig/include/linux/page_cgroup.h
> > > +++ linux-2.6.31-rc3/include/linux/page_cgroup.h
> > > @@ -1,7 +1,7 @@
> > >  #ifndef __LINUX_PAGE_CGROUP_H
> > >  #define __LINUX_PAGE_CGROUP_H
> > > 
> > > -#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> > > +#ifdef CONFIG_CGROUP_PAGE
> > >  #include <linux/bit_spinlock.h>
> > >  /*
> > >   * Page Cgroup can be considered as an extended mem_map.
> > > @@ -12,9 +12,11 @@
> > >   */
> > >  struct page_cgroup {
> > >  	unsigned long flags;
> > > -	struct mem_cgroup *mem_cgroup;
> > >  	struct page *page;
> > > +#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> > > +	struct mem_cgroup *mem_cgroup;
> > >  	struct list_head lru;		/* per cgroup LRU list */
> > > +#endif
> > >  };
> > 
> > If CONFIG_CGROUP_MEM_RES_CTLR is not enabled and CGROUP_PAGE is
> > (assuming that the depends on below is refactored), what would this
> > change buy us? What is page_cgroup helping us track, the mem_cgroup is
> > factored out, so we are interested in the flags only?
> > 
> plz remove CONFIG. This jsut makes code complicated.
> or plz use your own infrastructure, not depends on page_cgroup.
> 

BTW, you can't modify page_cgroup->flags bit without cmpxchg.
Then, patch [5/9] is completely broken, now because new bit is used
with atomic bit ops but without lock_page_cgroup(). (see mmotm)

Why struct page's flags bit can includes zone id etc...is just because
it's initalized before using. Anyway, this is "flags" bit. If you want
to modify multiple bit at once, plz use cmpxchg.
Then, I buy patch [8/9] and just skip this patch.

But, following is more straightforward. (and what you do is not different
from this.)
==
struct page {
	.....
#ifdef CONFIG_BLOCKIO_CGROUP
	void *blockio_cgroup;
#endif
}
==


Regards,
-Kame



--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Ryo Tsuruta July 22, 2009, 8:28 a.m. UTC | #4
KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> wrote:
> > > > Index: linux-2.6.31-rc3/include/linux/page_cgroup.h
> > > > ===================================================================
> > > > --- linux-2.6.31-rc3.orig/include/linux/page_cgroup.h
> > > > +++ linux-2.6.31-rc3/include/linux/page_cgroup.h
> > > > @@ -1,7 +1,7 @@
> > > >  #ifndef __LINUX_PAGE_CGROUP_H
> > > >  #define __LINUX_PAGE_CGROUP_H
> > > > 
> > > > -#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> > > > +#ifdef CONFIG_CGROUP_PAGE
> > > >  #include <linux/bit_spinlock.h>
> > > >  /*
> > > >   * Page Cgroup can be considered as an extended mem_map.
> > > > @@ -12,9 +12,11 @@
> > > >   */
> > > >  struct page_cgroup {
> > > >  	unsigned long flags;
> > > > -	struct mem_cgroup *mem_cgroup;
> > > >  	struct page *page;
> > > > +#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> > > > +	struct mem_cgroup *mem_cgroup;
> > > >  	struct list_head lru;		/* per cgroup LRU list */
> > > > +#endif
> > > >  };
> > > 
> > > If CONFIG_CGROUP_MEM_RES_CTLR is not enabled and CGROUP_PAGE is
> > > (assuming that the depends on below is refactored), what would this
> > > change buy us? What is page_cgroup helping us track, the mem_cgroup is
> > > factored out, so we are interested in the flags only?
> > > 
> > plz remove CONFIG. This jsut makes code complicated.
> > or plz use your own infrastructure, not depends on page_cgroup.

Thanks for reviewing the patch.
Do you mean that remove only CONFIG_CGROUP_MEM_RES_CTR in struct
page_cgroup? Is it OK to define CONFIG_CGROUP_PAGE?

> BTW, you can't modify page_cgroup->flags bit without cmpxchg.
> Then, patch [5/9] is completely broken, now because new bit is used
> with atomic bit ops but without lock_page_cgroup(). (see mmotm)
> 
> Why struct page's flags bit can includes zone id etc...is just because
> it's initalized before using. Anyway, this is "flags" bit. If you want
> to modify multiple bit at once, plz use cmpxchg.
> Then, I buy patch [8/9] and just skip this patch.

O.K. I'll use cmpxchg.
 
> But, following is more straightforward. (and what you do is not different
> from this.)
> ==
> struct page {
> 	.....
> #ifdef CONFIG_BLOCKIO_CGROUP
> 	void *blockio_cgroup;
> #endif
> }
> ==

This increases the size of struct page. Could I get a consensus on
this approach?

Thanks,
Ryo Tsuruta

> Regards,
> -Kame
> 
> 

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Balbir Singh July 22, 2009, 8:37 a.m. UTC | #5
* Ryo Tsuruta <ryov@valinux.co.jp> [2009-07-22 17:28:43]:

> KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> wrote:
> > > > > Index: linux-2.6.31-rc3/include/linux/page_cgroup.h
> > > > > ===================================================================
> > > > > --- linux-2.6.31-rc3.orig/include/linux/page_cgroup.h
> > > > > +++ linux-2.6.31-rc3/include/linux/page_cgroup.h
> > > > > @@ -1,7 +1,7 @@
> > > > >  #ifndef __LINUX_PAGE_CGROUP_H
> > > > >  #define __LINUX_PAGE_CGROUP_H
> > > > > 
> > > > > -#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> > > > > +#ifdef CONFIG_CGROUP_PAGE
> > > > >  #include <linux/bit_spinlock.h>
> > > > >  /*
> > > > >   * Page Cgroup can be considered as an extended mem_map.
> > > > > @@ -12,9 +12,11 @@
> > > > >   */
> > > > >  struct page_cgroup {
> > > > >  	unsigned long flags;
> > > > > -	struct mem_cgroup *mem_cgroup;
> > > > >  	struct page *page;
> > > > > +#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> > > > > +	struct mem_cgroup *mem_cgroup;
> > > > >  	struct list_head lru;		/* per cgroup LRU list */
> > > > > +#endif
> > > > >  };
> > > > 
> > > > If CONFIG_CGROUP_MEM_RES_CTLR is not enabled and CGROUP_PAGE is
> > > > (assuming that the depends on below is refactored), what would this
> > > > change buy us? What is page_cgroup helping us track, the mem_cgroup is
> > > > factored out, so we are interested in the flags only?
> > > > 
> > > plz remove CONFIG. This jsut makes code complicated.
> > > or plz use your own infrastructure, not depends on page_cgroup.
> 
> Thanks for reviewing the patch.
> Do you mean that remove only CONFIG_CGROUP_MEM_RES_CTR in struct
> page_cgroup? Is it OK to define CONFIG_CGROUP_PAGE?
> 
> > BTW, you can't modify page_cgroup->flags bit without cmpxchg.
> > Then, patch [5/9] is completely broken, now because new bit is used
> > with atomic bit ops but without lock_page_cgroup(). (see mmotm)
> > 
> > Why struct page's flags bit can includes zone id etc...is just because
> > it's initalized before using. Anyway, this is "flags" bit. If you want
> > to modify multiple bit at once, plz use cmpxchg.
> > Then, I buy patch [8/9] and just skip this patch.
> 
> O.K. I'll use cmpxchg.
> 
> > But, following is more straightforward. (and what you do is not different
> > from this.)
> > ==
> > struct page {
> > 	.....
> > #ifdef CONFIG_BLOCKIO_CGROUP
> > 	void *blockio_cgroup;
> > #endif
> > }
> > ==
> 
> This increases the size of struct page. Could I get a consensus on
> this approach?
>


This defeats the entire purpose of page_cgroup, IMHO. You need to add
the cgroup pointer to page_cgroup or use css id's there.
 
> Thanks,
> Ryo Tsuruta
> 
> > Regards,
> > -Kame
> > 
> >
KAMEZAWA Hiroyuki July 22, 2009, 8:46 a.m. UTC | #6
On Wed, 22 Jul 2009 14:07:22 +0530
Balbir Singh <balbir@linux.vnet.ibm.com> wrote:

> * Ryo Tsuruta <ryov@valinux.co.jp> [2009-07-22 17:28:43]:
> 
> > KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> wrote:
> > > > > > Index: linux-2.6.31-rc3/include/linux/page_cgroup.h
> > > > > > ===================================================================
> > > > > > --- linux-2.6.31-rc3.orig/include/linux/page_cgroup.h
> > > > > > +++ linux-2.6.31-rc3/include/linux/page_cgroup.h
> > > > > > @@ -1,7 +1,7 @@
> > > > > >  #ifndef __LINUX_PAGE_CGROUP_H
> > > > > >  #define __LINUX_PAGE_CGROUP_H
> > > > > > 
> > > > > > -#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> > > > > > +#ifdef CONFIG_CGROUP_PAGE
> > > > > >  #include <linux/bit_spinlock.h>
> > > > > >  /*
> > > > > >   * Page Cgroup can be considered as an extended mem_map.
> > > > > > @@ -12,9 +12,11 @@
> > > > > >   */
> > > > > >  struct page_cgroup {
> > > > > >  	unsigned long flags;
> > > > > > -	struct mem_cgroup *mem_cgroup;
> > > > > >  	struct page *page;
> > > > > > +#ifdef CONFIG_CGROUP_MEM_RES_CTLR
> > > > > > +	struct mem_cgroup *mem_cgroup;
> > > > > >  	struct list_head lru;		/* per cgroup LRU list */
> > > > > > +#endif
> > > > > >  };
> > > > > 
> > > > > If CONFIG_CGROUP_MEM_RES_CTLR is not enabled and CGROUP_PAGE is
> > > > > (assuming that the depends on below is refactored), what would this
> > > > > change buy us? What is page_cgroup helping us track, the mem_cgroup is
> > > > > factored out, so we are interested in the flags only?
> > > > > 
> > > > plz remove CONFIG. This jsut makes code complicated.
> > > > or plz use your own infrastructure, not depends on page_cgroup.
> > 
> > Thanks for reviewing the patch.
> > Do you mean that remove only CONFIG_CGROUP_MEM_RES_CTR in struct
> > page_cgroup? Is it OK to define CONFIG_CGROUP_PAGE?
> > 
> > > BTW, you can't modify page_cgroup->flags bit without cmpxchg.
> > > Then, patch [5/9] is completely broken, now because new bit is used
> > > with atomic bit ops but without lock_page_cgroup(). (see mmotm)
> > > 
> > > Why struct page's flags bit can includes zone id etc...is just because
> > > it's initalized before using. Anyway, this is "flags" bit. If you want
> > > to modify multiple bit at once, plz use cmpxchg.
> > > Then, I buy patch [8/9] and just skip this patch.
> > 
> > O.K. I'll use cmpxchg.
> > 
> > > But, following is more straightforward. (and what you do is not different
> > > from this.)
> > > ==
> > > struct page {
> > > 	.....
> > > #ifdef CONFIG_BLOCKIO_CGROUP
> > > 	void *blockio_cgroup;
> > > #endif
> > > }
> > > ==
> > 
> > This increases the size of struct page. Could I get a consensus on
> > this approach?
> >
> 
> 
> This defeats the entire purpose of page_cgroup, IMHO. You need to add
> the cgroup pointer to page_cgroup or use css id's there.
>  
My point is
 - increasing size of struct page is verrry difficult.
 - increasing size of struct page_cgroup should be.
Any diffrence ?

Then, plz don't go this way without enough amounts of effort.
plz see my patch to reduce size struct page_cgroup, it's _a_ effort.

Thanks,
-Kame



--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
diff mbox

Patch

Index: linux-2.6.31-rc3/include/linux/memcontrol.h
===================================================================
--- linux-2.6.31-rc3.orig/include/linux/memcontrol.h
+++ linux-2.6.31-rc3/include/linux/memcontrol.h
@@ -37,6 +37,8 @@  struct mm_struct;
  * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.)
  */
 
+extern void __init_mem_page_cgroup(struct page_cgroup *pc);
+
 extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask);
 /* for swap handling */
@@ -121,6 +123,10 @@  void mem_cgroup_update_mapped_file_stat(
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 struct mem_cgroup;
 
+static inline void __init_mem_page_cgroup(struct page_cgroup *pc)
+{
+}
+
 static inline int mem_cgroup_newpage_charge(struct page *page,
 					struct mm_struct *mm, gfp_t gfp_mask)
 {
Index: linux-2.6.31-rc3/include/linux/mmzone.h
===================================================================
--- linux-2.6.31-rc3.orig/include/linux/mmzone.h
+++ linux-2.6.31-rc3/include/linux/mmzone.h
@@ -605,7 +605,7 @@  typedef struct pglist_data {
 	int nr_zones;
 #ifdef CONFIG_FLAT_NODE_MEM_MAP	/* means !SPARSEMEM */
 	struct page *node_mem_map;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_CGROUP_PAGE
 	struct page_cgroup *node_page_cgroup;
 #endif
 #endif
@@ -956,7 +956,7 @@  struct mem_section {
 
 	/* See declaration of similar field in struct zone */
 	unsigned long *pageblock_flags;
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_CGROUP_PAGE
 	/*
 	 * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
 	 * section. (see memcontrol.h/page_cgroup.h about this.)
Index: linux-2.6.31-rc3/include/linux/page_cgroup.h
===================================================================
--- linux-2.6.31-rc3.orig/include/linux/page_cgroup.h
+++ linux-2.6.31-rc3/include/linux/page_cgroup.h
@@ -1,7 +1,7 @@ 
 #ifndef __LINUX_PAGE_CGROUP_H
 #define __LINUX_PAGE_CGROUP_H
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_CGROUP_PAGE
 #include <linux/bit_spinlock.h>
 /*
  * Page Cgroup can be considered as an extended mem_map.
@@ -12,9 +12,11 @@ 
  */
 struct page_cgroup {
 	unsigned long flags;
-	struct mem_cgroup *mem_cgroup;
 	struct page *page;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+	struct mem_cgroup *mem_cgroup;
 	struct list_head lru;		/* per cgroup LRU list */
+#endif
 };
 
 void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat);
@@ -83,7 +85,7 @@  static inline void unlock_page_cgroup(st
 	bit_spin_unlock(PCG_LOCK, &pc->flags);
 }
 
-#else /* CONFIG_CGROUP_MEM_RES_CTLR */
+#else /* CONFIG_CGROUP_PAGE */
 struct page_cgroup;
 
 static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
Index: linux-2.6.31-rc3/init/Kconfig
===================================================================
--- linux-2.6.31-rc3.orig/init/Kconfig
+++ linux-2.6.31-rc3/init/Kconfig
@@ -614,6 +614,10 @@  config CGROUP_MEM_RES_CTLR_SWAP
 
 endif # CGROUPS
 
+config CGROUP_PAGE
+	def_bool y
+	depends on CGROUP_MEM_RES_CTLR
+
 config MM_OWNER
 	bool
 
Index: linux-2.6.31-rc3/mm/Makefile
===================================================================
--- linux-2.6.31-rc3.orig/mm/Makefile
+++ linux-2.6.31-rc3/mm/Makefile
@@ -39,6 +39,7 @@  else
 obj-$(CONFIG_SMP) += allocpercpu.o
 endif
 obj-$(CONFIG_QUICKLIST) += quicklist.o
-obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
+obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o
+obj-$(CONFIG_CGROUP_PAGE) += page_cgroup.o
 obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
 obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
Index: linux-2.6.31-rc3/mm/memcontrol.c
===================================================================
--- linux-2.6.31-rc3.orig/mm/memcontrol.c
+++ linux-2.6.31-rc3/mm/memcontrol.c
@@ -129,6 +129,12 @@  struct mem_cgroup_lru_info {
 	struct mem_cgroup_per_node *nodeinfo[MAX_NUMNODES];
 };
 
+void __meminit __init_mem_page_cgroup(struct page_cgroup *pc)
+{
+	pc->mem_cgroup = NULL;
+	INIT_LIST_HEAD(&pc->lru);
+}
+
 /*
  * The memory controller data structure. The memory controller controls both
  * page cache and RSS per cgroup. We would eventually like to provide
Index: linux-2.6.31-rc3/mm/page_cgroup.c
===================================================================
--- linux-2.6.31-rc3.orig/mm/page_cgroup.c
+++ linux-2.6.31-rc3/mm/page_cgroup.c
@@ -14,9 +14,8 @@  static void __meminit
 __init_page_cgroup(struct page_cgroup *pc, unsigned long pfn)
 {
 	pc->flags = 0;
-	pc->mem_cgroup = NULL;
 	pc->page = pfn_to_page(pfn);
-	INIT_LIST_HEAD(&pc->lru);
+	__init_mem_page_cgroup(pc);
 }
 static unsigned long total_usage;