Message ID | 20190227144736.5872-2-aneesh.kumar@linux.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mm/kvm/vfio/ppc64: Migrate compound pages out of CMA region | expand |
On 2/27/19 3:47 PM, Aneesh Kumar K.V wrote: > This patch adds PF_MEMALLOC_NOCMA which make sure any allocation in that context > is marked non-movable and hence cannot be satisfied by CMA region. > > This is useful with get_user_pages_longterm where we want to take a page pin by > migrating pages from CMA region. Marking the section PF_MEMALLOC_NOCMA ensures > that we avoid unnecessary page migration later. > > Suggested-by: Andrea Arcangeli <aarcange@redhat.com> > Reviewed-by: Andrea Arcangeli <aarcange@redhat.com> > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> +CC scheduler guys Do we really take the last available PF flag just so that "we avoid unnecessary page migration later"? If yes, that's a third PF_MEMALLOC flag, should we get separate variable for gfp context at this point? Also I don't like the name PF_MEMALLOC_NOCMA, as it's unnecessarily tied to CMA. If anything it should be e.g. PF_MEMALLOC_NOMOVABLE. Thanks. > --- > include/linux/sched.h | 1 + > include/linux/sched/mm.h | 48 +++++++++++++++++++++++++++++++++------- > 2 files changed, 41 insertions(+), 8 deletions(-) > > diff --git a/include/linux/sched.h b/include/linux/sched.h > index f9b43c989577..dfa90088ba08 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -1403,6 +1403,7 @@ extern struct pid *cad_pid; > #define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ > #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ > #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ > +#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */ > #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ > #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ > #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ > diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h > index 3bfa6a0cbba4..0cd9f10423fb 100644 > --- a/include/linux/sched/mm.h > +++ b/include/linux/sched/mm.h > @@ -148,17 +148,25 @@ static inline bool in_vfork(struct task_struct *tsk) > * Applies per-task gfp context to the given allocation flags. > * PF_MEMALLOC_NOIO implies GFP_NOIO > * PF_MEMALLOC_NOFS implies GFP_NOFS > + * PF_MEMALLOC_NOCMA implies no allocation from CMA region. > */ > static inline gfp_t current_gfp_context(gfp_t flags) > { > - /* > - * NOIO implies both NOIO and NOFS and it is a weaker context > - * so always make sure it makes precedence > - */ > - if (unlikely(current->flags & PF_MEMALLOC_NOIO)) > - flags &= ~(__GFP_IO | __GFP_FS); > - else if (unlikely(current->flags & PF_MEMALLOC_NOFS)) > - flags &= ~__GFP_FS; > + if (unlikely(current->flags & > + (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_NOCMA))) { > + /* > + * NOIO implies both NOIO and NOFS and it is a weaker context > + * so always make sure it makes precedence > + */ > + if (current->flags & PF_MEMALLOC_NOIO) > + flags &= ~(__GFP_IO | __GFP_FS); > + else if (current->flags & PF_MEMALLOC_NOFS) > + flags &= ~__GFP_FS; > +#ifdef CONFIG_CMA > + if (current->flags & PF_MEMALLOC_NOCMA) > + flags &= ~__GFP_MOVABLE; > +#endif > + } > return flags; > } > > @@ -248,6 +256,30 @@ static inline void memalloc_noreclaim_restore(unsigned int flags) > current->flags = (current->flags & ~PF_MEMALLOC) | flags; > } > > +#ifdef CONFIG_CMA > +static inline unsigned int memalloc_nocma_save(void) > +{ > + unsigned int flags = current->flags & PF_MEMALLOC_NOCMA; > + > + current->flags |= PF_MEMALLOC_NOCMA; > + return flags; > +} > + > +static inline void memalloc_nocma_restore(unsigned int flags) > +{ > + current->flags = (current->flags & ~PF_MEMALLOC_NOCMA) | flags; > +} > +#else > +static inline unsigned int memalloc_nocma_save(void) > +{ > + return 0; > +} > + > +static inline void memalloc_nocma_restore(unsigned int flags) > +{ > +} > +#endif > + > #ifdef CONFIG_MEMCG > /** > * memalloc_use_memcg - Starts the remote memcg charging scope. >
On Thu 28-02-19 13:20:03, Vlastimil Babka wrote: > On 2/27/19 3:47 PM, Aneesh Kumar K.V wrote: > > This patch adds PF_MEMALLOC_NOCMA which make sure any allocation in that context > > is marked non-movable and hence cannot be satisfied by CMA region. > > > > This is useful with get_user_pages_longterm where we want to take a page pin by > > migrating pages from CMA region. Marking the section PF_MEMALLOC_NOCMA ensures > > that we avoid unnecessary page migration later. > > > > Suggested-by: Andrea Arcangeli <aarcange@redhat.com> > > Reviewed-by: Andrea Arcangeli <aarcange@redhat.com> > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > > +CC scheduler guys > > Do we really take the last available PF flag just so that "we avoid > unnecessary page migration later"? > If yes, that's a third PF_MEMALLOC flag, should we get separate variable > for gfp context at this point? Yes, that sounds like a reasonable thing to do. Just note that xfs still uses current_{set,restore}* api to handle PF_MEMALLOC_NOFS so that would have to be moved over to the memalloc_nofs_{save,restore} API.
diff --git a/include/linux/sched.h b/include/linux/sched.h index f9b43c989577..dfa90088ba08 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1403,6 +1403,7 @@ extern struct pid *cad_pid; #define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ +#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 3bfa6a0cbba4..0cd9f10423fb 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -148,17 +148,25 @@ static inline bool in_vfork(struct task_struct *tsk) * Applies per-task gfp context to the given allocation flags. * PF_MEMALLOC_NOIO implies GFP_NOIO * PF_MEMALLOC_NOFS implies GFP_NOFS + * PF_MEMALLOC_NOCMA implies no allocation from CMA region. */ static inline gfp_t current_gfp_context(gfp_t flags) { - /* - * NOIO implies both NOIO and NOFS and it is a weaker context - * so always make sure it makes precedence - */ - if (unlikely(current->flags & PF_MEMALLOC_NOIO)) - flags &= ~(__GFP_IO | __GFP_FS); - else if (unlikely(current->flags & PF_MEMALLOC_NOFS)) - flags &= ~__GFP_FS; + if (unlikely(current->flags & + (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_NOCMA))) { + /* + * NOIO implies both NOIO and NOFS and it is a weaker context + * so always make sure it makes precedence + */ + if (current->flags & PF_MEMALLOC_NOIO) + flags &= ~(__GFP_IO | __GFP_FS); + else if (current->flags & PF_MEMALLOC_NOFS) + flags &= ~__GFP_FS; +#ifdef CONFIG_CMA + if (current->flags & PF_MEMALLOC_NOCMA) + flags &= ~__GFP_MOVABLE; +#endif + } return flags; } @@ -248,6 +256,30 @@ static inline void memalloc_noreclaim_restore(unsigned int flags) current->flags = (current->flags & ~PF_MEMALLOC) | flags; } +#ifdef CONFIG_CMA +static inline unsigned int memalloc_nocma_save(void) +{ + unsigned int flags = current->flags & PF_MEMALLOC_NOCMA; + + current->flags |= PF_MEMALLOC_NOCMA; + return flags; +} + +static inline void memalloc_nocma_restore(unsigned int flags) +{ + current->flags = (current->flags & ~PF_MEMALLOC_NOCMA) | flags; +} +#else +static inline unsigned int memalloc_nocma_save(void) +{ + return 0; +} + +static inline void memalloc_nocma_restore(unsigned int flags) +{ +} +#endif + #ifdef CONFIG_MEMCG /** * memalloc_use_memcg - Starts the remote memcg charging scope.