Message ID | 20200120131909.813-1-cai@lca.pw (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [-mm,v2] mm/page_isolation: fix potential warning from user | expand |
On 20.01.20 14:19, Qian Cai wrote: > It makes sense to call the WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE) > from start_isolate_page_range(), but should avoid triggering it from > userspace, i.e, from is_mem_section_removable() because it could be a > DoS if warn_on_panic is set. > > While at it, simplify the code a bit by removing an unnecessary jump > label and a local variable, so set_migratetype_isolate() could really > return a bool. > > Suggested-by: Michal Hocko <mhocko@kernel.org> > Signed-off-by: Qian Cai <cai@lca.pw> > --- > > v2: Improve the commit log. > Warn for all start_isolate_page_range() users not just offlining. > > mm/page_alloc.c | 11 ++++------- > mm/page_isolation.c | 30 +++++++++++++++++------------- > 2 files changed, 21 insertions(+), 20 deletions(-) > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index 621716a25639..3c4eb750a199 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -8231,7 +8231,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, > if (is_migrate_cma(migratetype)) > return NULL; > > - goto unmovable; > + return page; > } > > for (; iter < pageblock_nr_pages; iter++) { > @@ -8241,7 +8241,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, > page = pfn_to_page(pfn + iter); > > if (PageReserved(page)) > - goto unmovable; > + return page; > > /* > * If the zone is movable and we have ruled out all reserved > @@ -8261,7 +8261,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, > unsigned int skip_pages; > > if (!hugepage_migration_supported(page_hstate(head))) > - goto unmovable; > + return page; > > skip_pages = compound_nr(head) - (page - head); > iter += skip_pages - 1; > @@ -8303,12 +8303,9 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, > * is set to both of a memory hole page and a _used_ kernel > * page at boot. > */ > - goto unmovable; > + return page; > } > return NULL; > -unmovable: > - WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); > - return pfn_to_page(pfn + iter); > } > > #ifdef CONFIG_CONTIG_ALLOC > diff --git a/mm/page_isolation.c b/mm/page_isolation.c > index e70586523ca3..31f5516f5d54 100644 > --- a/mm/page_isolation.c > +++ b/mm/page_isolation.c > @@ -15,12 +15,12 @@ > #define CREATE_TRACE_POINTS > #include <trace/events/page_isolation.h> > > -static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags) > +static bool set_migratetype_isolate(struct page *page, int migratetype, > + int isol_flags) Why this change? > { > - struct page *unmovable = NULL; > + struct page *unmovable = ERR_PTR(-EBUSY); Also, why this change? > struct zone *zone; > unsigned long flags; > - int ret = -EBUSY; > > zone = page_zone(page); > > @@ -49,21 +49,25 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ > NULL); > > __mod_zone_freepage_state(zone, -nr_pages, mt); > - ret = 0; > } > > out: > spin_unlock_irqrestore(&zone->lock, flags); > - if (!ret) > + > + if (!unmovable) { > drain_all_pages(zone); > - else if ((isol_flags & REPORT_FAILURE) && unmovable) > - /* > - * printk() with zone->lock held will guarantee to trigger a > - * lockdep splat, so defer it here. > - */ > - dump_page(unmovable, "unmovable page"); > - > - return ret; > + } else { > + WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); > + > + if ((isol_flags & REPORT_FAILURE) && !IS_ERR(unmovable)) > + /* Why this change? (!IS_ERR) Some things here look unrelated - or I am missing something :)
> On Jan 20, 2020, at 8:30 AM, David Hildenbrand <david@redhat.com> wrote: > > On 20.01.20 14:19, Qian Cai wrote: >> It makes sense to call the WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE) >> from start_isolate_page_range(), but should avoid triggering it from >> userspace, i.e, from is_mem_section_removable() because it could be a >> DoS if warn_on_panic is set. >> >> While at it, simplify the code a bit by removing an unnecessary jump >> label and a local variable, so set_migratetype_isolate() could really >> return a bool. >> >> Suggested-by: Michal Hocko <mhocko@kernel.org> >> Signed-off-by: Qian Cai <cai@lca.pw> >> --- >> >> v2: Improve the commit log. >> Warn for all start_isolate_page_range() users not just offlining. >> >> mm/page_alloc.c | 11 ++++------- >> mm/page_isolation.c | 30 +++++++++++++++++------------- >> 2 files changed, 21 insertions(+), 20 deletions(-) >> >> diff --git a/mm/page_alloc.c b/mm/page_alloc.c >> index 621716a25639..3c4eb750a199 100644 >> --- a/mm/page_alloc.c >> +++ b/mm/page_alloc.c >> @@ -8231,7 +8231,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >> if (is_migrate_cma(migratetype)) >> return NULL; >> >> - goto unmovable; >> + return page; >> } >> >> for (; iter < pageblock_nr_pages; iter++) { >> @@ -8241,7 +8241,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >> page = pfn_to_page(pfn + iter); >> >> if (PageReserved(page)) >> - goto unmovable; >> + return page; >> >> /* >> * If the zone is movable and we have ruled out all reserved >> @@ -8261,7 +8261,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >> unsigned int skip_pages; >> >> if (!hugepage_migration_supported(page_hstate(head))) >> - goto unmovable; >> + return page; >> >> skip_pages = compound_nr(head) - (page - head); >> iter += skip_pages - 1; >> @@ -8303,12 +8303,9 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >> * is set to both of a memory hole page and a _used_ kernel >> * page at boot. >> */ >> - goto unmovable; >> + return page; >> } >> return NULL; >> -unmovable: >> - WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); >> - return pfn_to_page(pfn + iter); >> } >> >> #ifdef CONFIG_CONTIG_ALLOC >> diff --git a/mm/page_isolation.c b/mm/page_isolation.c >> index e70586523ca3..31f5516f5d54 100644 >> --- a/mm/page_isolation.c >> +++ b/mm/page_isolation.c >> @@ -15,12 +15,12 @@ >> #define CREATE_TRACE_POINTS >> #include <trace/events/page_isolation.h> >> >> -static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags) >> +static bool set_migratetype_isolate(struct page *page, int migratetype, >> + int isol_flags) > > Why this change? > >> { >> - struct page *unmovable = NULL; >> + struct page *unmovable = ERR_PTR(-EBUSY); > > Also, why this change? > >> struct zone *zone; >> unsigned long flags; >> - int ret = -EBUSY; >> >> zone = page_zone(page); >> >> @@ -49,21 +49,25 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ >> NULL); >> >> __mod_zone_freepage_state(zone, -nr_pages, mt); >> - ret = 0; >> } >> >> out: >> spin_unlock_irqrestore(&zone->lock, flags); >> - if (!ret) >> + >> + if (!unmovable) { >> drain_all_pages(zone); >> - else if ((isol_flags & REPORT_FAILURE) && unmovable) >> - /* >> - * printk() with zone->lock held will guarantee to trigger a >> - * lockdep splat, so defer it here. >> - */ >> - dump_page(unmovable, "unmovable page"); >> - >> - return ret; >> + } else { >> + WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); >> + >> + if ((isol_flags & REPORT_FAILURE) && !IS_ERR(unmovable)) >> + /* > > Why this change? (!IS_ERR) > > > Some things here look unrelated - or I am missing something :) The original “ret” variable looks ugly to me, so I just removed that and consolidated with the “unmovable” pointer to always be able to report an error. Since this cleanup is really small, I did not bother send a separate patch for it.
On 20.01.20 14:30, David Hildenbrand wrote: > On 20.01.20 14:19, Qian Cai wrote: >> It makes sense to call the WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE) >> from start_isolate_page_range(), but should avoid triggering it from >> userspace, i.e, from is_mem_section_removable() because it could be a >> DoS if warn_on_panic is set. >> >> While at it, simplify the code a bit by removing an unnecessary jump >> label and a local variable, so set_migratetype_isolate() could really >> return a bool. >> >> Suggested-by: Michal Hocko <mhocko@kernel.org> >> Signed-off-by: Qian Cai <cai@lca.pw> >> --- >> >> v2: Improve the commit log. >> Warn for all start_isolate_page_range() users not just offlining. >> >> mm/page_alloc.c | 11 ++++------- >> mm/page_isolation.c | 30 +++++++++++++++++------------- >> 2 files changed, 21 insertions(+), 20 deletions(-) >> >> diff --git a/mm/page_alloc.c b/mm/page_alloc.c >> index 621716a25639..3c4eb750a199 100644 >> --- a/mm/page_alloc.c >> +++ b/mm/page_alloc.c >> @@ -8231,7 +8231,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >> if (is_migrate_cma(migratetype)) >> return NULL; >> >> - goto unmovable; >> + return page; >> } >> >> for (; iter < pageblock_nr_pages; iter++) { >> @@ -8241,7 +8241,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >> page = pfn_to_page(pfn + iter); >> >> if (PageReserved(page)) >> - goto unmovable; >> + return page; >> >> /* >> * If the zone is movable and we have ruled out all reserved >> @@ -8261,7 +8261,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >> unsigned int skip_pages; >> >> if (!hugepage_migration_supported(page_hstate(head))) >> - goto unmovable; >> + return page; >> >> skip_pages = compound_nr(head) - (page - head); >> iter += skip_pages - 1; >> @@ -8303,12 +8303,9 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >> * is set to both of a memory hole page and a _used_ kernel >> * page at boot. >> */ >> - goto unmovable; >> + return page; >> } >> return NULL; >> -unmovable: >> - WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); >> - return pfn_to_page(pfn + iter); >> } >> >> #ifdef CONFIG_CONTIG_ALLOC >> diff --git a/mm/page_isolation.c b/mm/page_isolation.c >> index e70586523ca3..31f5516f5d54 100644 >> --- a/mm/page_isolation.c >> +++ b/mm/page_isolation.c >> @@ -15,12 +15,12 @@ >> #define CREATE_TRACE_POINTS >> #include <trace/events/page_isolation.h> >> >> -static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags) >> +static bool set_migratetype_isolate(struct page *page, int migratetype, >> + int isol_flags) > > Why this change? > >> { >> - struct page *unmovable = NULL; >> + struct page *unmovable = ERR_PTR(-EBUSY); > > Also, why this change? > >> struct zone *zone; >> unsigned long flags; >> - int ret = -EBUSY; >> >> zone = page_zone(page); >> >> @@ -49,21 +49,25 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ >> NULL); >> >> __mod_zone_freepage_state(zone, -nr_pages, mt); >> - ret = 0; >> } >> >> out: >> spin_unlock_irqrestore(&zone->lock, flags); >> - if (!ret) >> + >> + if (!unmovable) { >> drain_all_pages(zone); >> - else if ((isol_flags & REPORT_FAILURE) && unmovable) >> - /* >> - * printk() with zone->lock held will guarantee to trigger a >> - * lockdep splat, so defer it here. >> - */ >> - dump_page(unmovable, "unmovable page"); >> - >> - return ret; >> + } else { >> + WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); >> + >> + if ((isol_flags & REPORT_FAILURE) && !IS_ERR(unmovable)) >> + /* > > Why this change? (!IS_ERR) > > > Some things here look unrelated - or I am missing something :) > FWIW, I'd prefer this change without any such cleanups (e.g., I don't like returning a bool from this function and the IS_ERR handling, makes the function harder to read than before)
> On Jan 20, 2020, at 8:38 AM, David Hildenbrand <david@redhat.com> wrote: > > On 20.01.20 14:30, David Hildenbrand wrote: >> On 20.01.20 14:19, Qian Cai wrote: >>> It makes sense to call the WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE) >>> from start_isolate_page_range(), but should avoid triggering it from >>> userspace, i.e, from is_mem_section_removable() because it could be a >>> DoS if warn_on_panic is set. >>> >>> While at it, simplify the code a bit by removing an unnecessary jump >>> label and a local variable, so set_migratetype_isolate() could really >>> return a bool. >>> >>> Suggested-by: Michal Hocko <mhocko@kernel.org> >>> Signed-off-by: Qian Cai <cai@lca.pw> >>> --- >>> >>> v2: Improve the commit log. >>> Warn for all start_isolate_page_range() users not just offlining. >>> >>> mm/page_alloc.c | 11 ++++------- >>> mm/page_isolation.c | 30 +++++++++++++++++------------- >>> 2 files changed, 21 insertions(+), 20 deletions(-) >>> >>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c >>> index 621716a25639..3c4eb750a199 100644 >>> --- a/mm/page_alloc.c >>> +++ b/mm/page_alloc.c >>> @@ -8231,7 +8231,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >>> if (is_migrate_cma(migratetype)) >>> return NULL; >>> >>> - goto unmovable; >>> + return page; >>> } >>> >>> for (; iter < pageblock_nr_pages; iter++) { >>> @@ -8241,7 +8241,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >>> page = pfn_to_page(pfn + iter); >>> >>> if (PageReserved(page)) >>> - goto unmovable; >>> + return page; >>> >>> /* >>> * If the zone is movable and we have ruled out all reserved >>> @@ -8261,7 +8261,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >>> unsigned int skip_pages; >>> >>> if (!hugepage_migration_supported(page_hstate(head))) >>> - goto unmovable; >>> + return page; >>> >>> skip_pages = compound_nr(head) - (page - head); >>> iter += skip_pages - 1; >>> @@ -8303,12 +8303,9 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >>> * is set to both of a memory hole page and a _used_ kernel >>> * page at boot. >>> */ >>> - goto unmovable; >>> + return page; >>> } >>> return NULL; >>> -unmovable: >>> - WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); >>> - return pfn_to_page(pfn + iter); >>> } >>> >>> #ifdef CONFIG_CONTIG_ALLOC >>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c >>> index e70586523ca3..31f5516f5d54 100644 >>> --- a/mm/page_isolation.c >>> +++ b/mm/page_isolation.c >>> @@ -15,12 +15,12 @@ >>> #define CREATE_TRACE_POINTS >>> #include <trace/events/page_isolation.h> >>> >>> -static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags) >>> +static bool set_migratetype_isolate(struct page *page, int migratetype, >>> + int isol_flags) >> >> Why this change? >> >>> { >>> - struct page *unmovable = NULL; >>> + struct page *unmovable = ERR_PTR(-EBUSY); >> >> Also, why this change? >> >>> struct zone *zone; >>> unsigned long flags; >>> - int ret = -EBUSY; >>> >>> zone = page_zone(page); >>> >>> @@ -49,21 +49,25 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ >>> NULL); >>> >>> __mod_zone_freepage_state(zone, -nr_pages, mt); >>> - ret = 0; >>> } >>> >>> out: >>> spin_unlock_irqrestore(&zone->lock, flags); >>> - if (!ret) >>> + >>> + if (!unmovable) { >>> drain_all_pages(zone); >>> - else if ((isol_flags & REPORT_FAILURE) && unmovable) >>> - /* >>> - * printk() with zone->lock held will guarantee to trigger a >>> - * lockdep splat, so defer it here. >>> - */ >>> - dump_page(unmovable, "unmovable page"); >>> - >>> - return ret; >>> + } else { >>> + WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); >>> + >>> + if ((isol_flags & REPORT_FAILURE) && !IS_ERR(unmovable)) >>> + /* >> >> Why this change? (!IS_ERR) >> >> >> Some things here look unrelated - or I am missing something :) >> > > FWIW, I'd prefer this change without any such cleanups (e.g., I don't > like returning a bool from this function and the IS_ERR handling, makes > the function harder to read than before) What is Michal or Andrew’s opinion? BTW, a bonus point to return a bool is that it helps the code robustness in general, as UBSAN will be able to catch any abuse.
On 20.01.20 14:56, Qian Cai wrote: > > >> On Jan 20, 2020, at 8:38 AM, David Hildenbrand <david@redhat.com> wrote: >> >> On 20.01.20 14:30, David Hildenbrand wrote: >>> On 20.01.20 14:19, Qian Cai wrote: >>>> It makes sense to call the WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE) >>>> from start_isolate_page_range(), but should avoid triggering it from >>>> userspace, i.e, from is_mem_section_removable() because it could be a >>>> DoS if warn_on_panic is set. >>>> >>>> While at it, simplify the code a bit by removing an unnecessary jump >>>> label and a local variable, so set_migratetype_isolate() could really >>>> return a bool. >>>> >>>> Suggested-by: Michal Hocko <mhocko@kernel.org> >>>> Signed-off-by: Qian Cai <cai@lca.pw> >>>> --- >>>> >>>> v2: Improve the commit log. >>>> Warn for all start_isolate_page_range() users not just offlining. >>>> >>>> mm/page_alloc.c | 11 ++++------- >>>> mm/page_isolation.c | 30 +++++++++++++++++------------- >>>> 2 files changed, 21 insertions(+), 20 deletions(-) >>>> >>>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c >>>> index 621716a25639..3c4eb750a199 100644 >>>> --- a/mm/page_alloc.c >>>> +++ b/mm/page_alloc.c >>>> @@ -8231,7 +8231,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >>>> if (is_migrate_cma(migratetype)) >>>> return NULL; >>>> >>>> - goto unmovable; >>>> + return page; >>>> } >>>> >>>> for (; iter < pageblock_nr_pages; iter++) { >>>> @@ -8241,7 +8241,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >>>> page = pfn_to_page(pfn + iter); >>>> >>>> if (PageReserved(page)) >>>> - goto unmovable; >>>> + return page; >>>> >>>> /* >>>> * If the zone is movable and we have ruled out all reserved >>>> @@ -8261,7 +8261,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >>>> unsigned int skip_pages; >>>> >>>> if (!hugepage_migration_supported(page_hstate(head))) >>>> - goto unmovable; >>>> + return page; >>>> >>>> skip_pages = compound_nr(head) - (page - head); >>>> iter += skip_pages - 1; >>>> @@ -8303,12 +8303,9 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >>>> * is set to both of a memory hole page and a _used_ kernel >>>> * page at boot. >>>> */ >>>> - goto unmovable; >>>> + return page; >>>> } >>>> return NULL; >>>> -unmovable: >>>> - WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); >>>> - return pfn_to_page(pfn + iter); >>>> } >>>> >>>> #ifdef CONFIG_CONTIG_ALLOC >>>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c >>>> index e70586523ca3..31f5516f5d54 100644 >>>> --- a/mm/page_isolation.c >>>> +++ b/mm/page_isolation.c >>>> @@ -15,12 +15,12 @@ >>>> #define CREATE_TRACE_POINTS >>>> #include <trace/events/page_isolation.h> >>>> >>>> -static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags) >>>> +static bool set_migratetype_isolate(struct page *page, int migratetype, >>>> + int isol_flags) >>> >>> Why this change? >>> >>>> { >>>> - struct page *unmovable = NULL; >>>> + struct page *unmovable = ERR_PTR(-EBUSY); >>> >>> Also, why this change? >>> >>>> struct zone *zone; >>>> unsigned long flags; >>>> - int ret = -EBUSY; >>>> >>>> zone = page_zone(page); >>>> >>>> @@ -49,21 +49,25 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ >>>> NULL); >>>> >>>> __mod_zone_freepage_state(zone, -nr_pages, mt); >>>> - ret = 0; >>>> } >>>> >>>> out: >>>> spin_unlock_irqrestore(&zone->lock, flags); >>>> - if (!ret) >>>> + >>>> + if (!unmovable) { >>>> drain_all_pages(zone); >>>> - else if ((isol_flags & REPORT_FAILURE) && unmovable) >>>> - /* >>>> - * printk() with zone->lock held will guarantee to trigger a >>>> - * lockdep splat, so defer it here. >>>> - */ >>>> - dump_page(unmovable, "unmovable page"); >>>> - >>>> - return ret; >>>> + } else { >>>> + WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); >>>> + >>>> + if ((isol_flags & REPORT_FAILURE) && !IS_ERR(unmovable)) >>>> + /* >>> >>> Why this change? (!IS_ERR) >>> >>> >>> Some things here look unrelated - or I am missing something :) >>> >> >> FWIW, I'd prefer this change without any such cleanups (e.g., I don't >> like returning a bool from this function and the IS_ERR handling, makes >> the function harder to read than before) > > What is Michal or Andrew’s opinion? BTW, a bonus point to return a bool > is that it helps the code robustness in general, as UBSAN will be able to > catch any abuse. > A return type of bool on a function that does not test a property ("has_...", "is"...") is IMHO confusing. If we have an int, it is clear that "0" means "success". With a bool (true/false), it is not clear.
On Mon 20-01-20 08:19:09, Qian Cai wrote: > It makes sense to call the WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE) > from start_isolate_page_range(), but should avoid triggering it from > userspace, i.e, from is_mem_section_removable() because it could be a > DoS if warn_on_panic is set. Let's just make it clear that this mostly a pre-cautious because a real DoS should be pretty much impossible. But let's see whether somebody want to make a CVE out of it ;) > While at it, simplify the code a bit by removing an unnecessary jump > label and a local variable, so set_migratetype_isolate() could really > return a bool. > > Suggested-by: Michal Hocko <mhocko@kernel.org> > Signed-off-by: Qian Cai <cai@lca.pw> Acked-by: Michal Hocko <mhocko@suse.com> Thanks! > --- > > v2: Improve the commit log. > Warn for all start_isolate_page_range() users not just offlining. > > mm/page_alloc.c | 11 ++++------- > mm/page_isolation.c | 30 +++++++++++++++++------------- > 2 files changed, 21 insertions(+), 20 deletions(-) > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index 621716a25639..3c4eb750a199 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -8231,7 +8231,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, > if (is_migrate_cma(migratetype)) > return NULL; > > - goto unmovable; > + return page; > } > > for (; iter < pageblock_nr_pages; iter++) { > @@ -8241,7 +8241,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, > page = pfn_to_page(pfn + iter); > > if (PageReserved(page)) > - goto unmovable; > + return page; > > /* > * If the zone is movable and we have ruled out all reserved > @@ -8261,7 +8261,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, > unsigned int skip_pages; > > if (!hugepage_migration_supported(page_hstate(head))) > - goto unmovable; > + return page; > > skip_pages = compound_nr(head) - (page - head); > iter += skip_pages - 1; > @@ -8303,12 +8303,9 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, > * is set to both of a memory hole page and a _used_ kernel > * page at boot. > */ > - goto unmovable; > + return page; > } > return NULL; > -unmovable: > - WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); > - return pfn_to_page(pfn + iter); > } > > #ifdef CONFIG_CONTIG_ALLOC > diff --git a/mm/page_isolation.c b/mm/page_isolation.c > index e70586523ca3..31f5516f5d54 100644 > --- a/mm/page_isolation.c > +++ b/mm/page_isolation.c > @@ -15,12 +15,12 @@ > #define CREATE_TRACE_POINTS > #include <trace/events/page_isolation.h> > > -static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags) > +static bool set_migratetype_isolate(struct page *page, int migratetype, > + int isol_flags) > { > - struct page *unmovable = NULL; > + struct page *unmovable = ERR_PTR(-EBUSY); > struct zone *zone; > unsigned long flags; > - int ret = -EBUSY; > > zone = page_zone(page); > > @@ -49,21 +49,25 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ > NULL); > > __mod_zone_freepage_state(zone, -nr_pages, mt); > - ret = 0; > } > > out: > spin_unlock_irqrestore(&zone->lock, flags); > - if (!ret) > + > + if (!unmovable) { > drain_all_pages(zone); > - else if ((isol_flags & REPORT_FAILURE) && unmovable) > - /* > - * printk() with zone->lock held will guarantee to trigger a > - * lockdep splat, so defer it here. > - */ > - dump_page(unmovable, "unmovable page"); > - > - return ret; > + } else { > + WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); > + > + if ((isol_flags & REPORT_FAILURE) && !IS_ERR(unmovable)) > + /* > + * printk() with zone->lock held will likely trigger a > + * lockdep splat, so defer it here. > + */ > + dump_page(unmovable, "unmovable page"); > + } > + > + return !!unmovable; > } > > static void unset_migratetype_isolate(struct page *page, unsigned migratetype) > -- > 2.21.0 (Apple Git-122.2)
> On Jan 20, 2020, at 9:01 AM, David Hildenbrand <david@redhat.com> wrote: > > On 20.01.20 14:56, Qian Cai wrote: >> >> >>> On Jan 20, 2020, at 8:38 AM, David Hildenbrand <david@redhat.com> wrote: >>> >>> On 20.01.20 14:30, David Hildenbrand wrote: >>>> On 20.01.20 14:19, Qian Cai wrote: >>>>> It makes sense to call the WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE) >>>>> from start_isolate_page_range(), but should avoid triggering it from >>>>> userspace, i.e, from is_mem_section_removable() because it could be a >>>>> DoS if warn_on_panic is set. >>>>> >>>>> While at it, simplify the code a bit by removing an unnecessary jump >>>>> label and a local variable, so set_migratetype_isolate() could really >>>>> return a bool. >>>>> >>>>> Suggested-by: Michal Hocko <mhocko@kernel.org> >>>>> Signed-off-by: Qian Cai <cai@lca.pw> >>>>> --- >>>>> >>>>> v2: Improve the commit log. >>>>> Warn for all start_isolate_page_range() users not just offlining. >>>>> >>>>> mm/page_alloc.c | 11 ++++------- >>>>> mm/page_isolation.c | 30 +++++++++++++++++------------- >>>>> 2 files changed, 21 insertions(+), 20 deletions(-) >>>>> >>>>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c >>>>> index 621716a25639..3c4eb750a199 100644 >>>>> --- a/mm/page_alloc.c >>>>> +++ b/mm/page_alloc.c >>>>> @@ -8231,7 +8231,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >>>>> if (is_migrate_cma(migratetype)) >>>>> return NULL; >>>>> >>>>> - goto unmovable; >>>>> + return page; >>>>> } >>>>> >>>>> for (; iter < pageblock_nr_pages; iter++) { >>>>> @@ -8241,7 +8241,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >>>>> page = pfn_to_page(pfn + iter); >>>>> >>>>> if (PageReserved(page)) >>>>> - goto unmovable; >>>>> + return page; >>>>> >>>>> /* >>>>> * If the zone is movable and we have ruled out all reserved >>>>> @@ -8261,7 +8261,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >>>>> unsigned int skip_pages; >>>>> >>>>> if (!hugepage_migration_supported(page_hstate(head))) >>>>> - goto unmovable; >>>>> + return page; >>>>> >>>>> skip_pages = compound_nr(head) - (page - head); >>>>> iter += skip_pages - 1; >>>>> @@ -8303,12 +8303,9 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >>>>> * is set to both of a memory hole page and a _used_ kernel >>>>> * page at boot. >>>>> */ >>>>> - goto unmovable; >>>>> + return page; >>>>> } >>>>> return NULL; >>>>> -unmovable: >>>>> - WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); >>>>> - return pfn_to_page(pfn + iter); >>>>> } >>>>> >>>>> #ifdef CONFIG_CONTIG_ALLOC >>>>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c >>>>> index e70586523ca3..31f5516f5d54 100644 >>>>> --- a/mm/page_isolation.c >>>>> +++ b/mm/page_isolation.c >>>>> @@ -15,12 +15,12 @@ >>>>> #define CREATE_TRACE_POINTS >>>>> #include <trace/events/page_isolation.h> >>>>> >>>>> -static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags) >>>>> +static bool set_migratetype_isolate(struct page *page, int migratetype, >>>>> + int isol_flags) >>>> >>>> Why this change? >>>> >>>>> { >>>>> - struct page *unmovable = NULL; >>>>> + struct page *unmovable = ERR_PTR(-EBUSY); >>>> >>>> Also, why this change? >>>> >>>>> struct zone *zone; >>>>> unsigned long flags; >>>>> - int ret = -EBUSY; >>>>> >>>>> zone = page_zone(page); >>>>> >>>>> @@ -49,21 +49,25 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ >>>>> NULL); >>>>> >>>>> __mod_zone_freepage_state(zone, -nr_pages, mt); >>>>> - ret = 0; >>>>> } >>>>> >>>>> out: >>>>> spin_unlock_irqrestore(&zone->lock, flags); >>>>> - if (!ret) >>>>> + >>>>> + if (!unmovable) { >>>>> drain_all_pages(zone); >>>>> - else if ((isol_flags & REPORT_FAILURE) && unmovable) >>>>> - /* >>>>> - * printk() with zone->lock held will guarantee to trigger a >>>>> - * lockdep splat, so defer it here. >>>>> - */ >>>>> - dump_page(unmovable, "unmovable page"); >>>>> - >>>>> - return ret; >>>>> + } else { >>>>> + WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); >>>>> + >>>>> + if ((isol_flags & REPORT_FAILURE) && !IS_ERR(unmovable)) >>>>> + /* >>>> >>>> Why this change? (!IS_ERR) >>>> >>>> >>>> Some things here look unrelated - or I am missing something :) >>>> >>> >>> FWIW, I'd prefer this change without any such cleanups (e.g., I don't >>> like returning a bool from this function and the IS_ERR handling, makes >>> the function harder to read than before) >> >> What is Michal or Andrew’s opinion? BTW, a bonus point to return a bool >> is that it helps the code robustness in general, as UBSAN will be able to >> catch any abuse. >> > > A return type of bool on a function that does not test a property > ("has_...", "is"...") is IMHO confusing. That is fine. It could be renamed to set_migratetype_is_isolate() or is_set_migratetype_isolate() which seems pretty minor because we have no consistency in the naming of this in linux kernel at all, i.e., many existing bool function names without those test of properties. > > If we have an int, it is clear that "0" means "success". With a bool > (true/false), it is not clear. > > -- > Thanks, > > David / dhildenb >
On 20.01.20 15:11, Qian Cai wrote: > > >> On Jan 20, 2020, at 9:01 AM, David Hildenbrand <david@redhat.com> wrote: >> >> On 20.01.20 14:56, Qian Cai wrote: >>> >>> >>>> On Jan 20, 2020, at 8:38 AM, David Hildenbrand <david@redhat.com> wrote: >>>> >>>> On 20.01.20 14:30, David Hildenbrand wrote: >>>>> On 20.01.20 14:19, Qian Cai wrote: >>>>>> It makes sense to call the WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE) >>>>>> from start_isolate_page_range(), but should avoid triggering it from >>>>>> userspace, i.e, from is_mem_section_removable() because it could be a >>>>>> DoS if warn_on_panic is set. >>>>>> >>>>>> While at it, simplify the code a bit by removing an unnecessary jump >>>>>> label and a local variable, so set_migratetype_isolate() could really >>>>>> return a bool. >>>>>> >>>>>> Suggested-by: Michal Hocko <mhocko@kernel.org> >>>>>> Signed-off-by: Qian Cai <cai@lca.pw> >>>>>> --- >>>>>> >>>>>> v2: Improve the commit log. >>>>>> Warn for all start_isolate_page_range() users not just offlining. >>>>>> >>>>>> mm/page_alloc.c | 11 ++++------- >>>>>> mm/page_isolation.c | 30 +++++++++++++++++------------- >>>>>> 2 files changed, 21 insertions(+), 20 deletions(-) >>>>>> >>>>>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c >>>>>> index 621716a25639..3c4eb750a199 100644 >>>>>> --- a/mm/page_alloc.c >>>>>> +++ b/mm/page_alloc.c >>>>>> @@ -8231,7 +8231,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >>>>>> if (is_migrate_cma(migratetype)) >>>>>> return NULL; >>>>>> >>>>>> - goto unmovable; >>>>>> + return page; >>>>>> } >>>>>> >>>>>> for (; iter < pageblock_nr_pages; iter++) { >>>>>> @@ -8241,7 +8241,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >>>>>> page = pfn_to_page(pfn + iter); >>>>>> >>>>>> if (PageReserved(page)) >>>>>> - goto unmovable; >>>>>> + return page; >>>>>> >>>>>> /* >>>>>> * If the zone is movable and we have ruled out all reserved >>>>>> @@ -8261,7 +8261,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >>>>>> unsigned int skip_pages; >>>>>> >>>>>> if (!hugepage_migration_supported(page_hstate(head))) >>>>>> - goto unmovable; >>>>>> + return page; >>>>>> >>>>>> skip_pages = compound_nr(head) - (page - head); >>>>>> iter += skip_pages - 1; >>>>>> @@ -8303,12 +8303,9 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, >>>>>> * is set to both of a memory hole page and a _used_ kernel >>>>>> * page at boot. >>>>>> */ >>>>>> - goto unmovable; >>>>>> + return page; >>>>>> } >>>>>> return NULL; >>>>>> -unmovable: >>>>>> - WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); >>>>>> - return pfn_to_page(pfn + iter); >>>>>> } >>>>>> >>>>>> #ifdef CONFIG_CONTIG_ALLOC >>>>>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c >>>>>> index e70586523ca3..31f5516f5d54 100644 >>>>>> --- a/mm/page_isolation.c >>>>>> +++ b/mm/page_isolation.c >>>>>> @@ -15,12 +15,12 @@ >>>>>> #define CREATE_TRACE_POINTS >>>>>> #include <trace/events/page_isolation.h> >>>>>> >>>>>> -static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags) >>>>>> +static bool set_migratetype_isolate(struct page *page, int migratetype, >>>>>> + int isol_flags) >>>>> >>>>> Why this change? >>>>> >>>>>> { >>>>>> - struct page *unmovable = NULL; >>>>>> + struct page *unmovable = ERR_PTR(-EBUSY); >>>>> >>>>> Also, why this change? >>>>> >>>>>> struct zone *zone; >>>>>> unsigned long flags; >>>>>> - int ret = -EBUSY; >>>>>> >>>>>> zone = page_zone(page); >>>>>> >>>>>> @@ -49,21 +49,25 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ >>>>>> NULL); >>>>>> >>>>>> __mod_zone_freepage_state(zone, -nr_pages, mt); >>>>>> - ret = 0; >>>>>> } >>>>>> >>>>>> out: >>>>>> spin_unlock_irqrestore(&zone->lock, flags); >>>>>> - if (!ret) >>>>>> + >>>>>> + if (!unmovable) { >>>>>> drain_all_pages(zone); >>>>>> - else if ((isol_flags & REPORT_FAILURE) && unmovable) >>>>>> - /* >>>>>> - * printk() with zone->lock held will guarantee to trigger a >>>>>> - * lockdep splat, so defer it here. >>>>>> - */ >>>>>> - dump_page(unmovable, "unmovable page"); >>>>>> - >>>>>> - return ret; >>>>>> + } else { >>>>>> + WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); >>>>>> + >>>>>> + if ((isol_flags & REPORT_FAILURE) && !IS_ERR(unmovable)) >>>>>> + /* >>>>> >>>>> Why this change? (!IS_ERR) >>>>> >>>>> >>>>> Some things here look unrelated - or I am missing something :) >>>>> >>>> >>>> FWIW, I'd prefer this change without any such cleanups (e.g., I don't >>>> like returning a bool from this function and the IS_ERR handling, makes >>>> the function harder to read than before) >>> >>> What is Michal or Andrew’s opinion? BTW, a bonus point to return a bool >>> is that it helps the code robustness in general, as UBSAN will be able to >>> catch any abuse. >>> >> >> A return type of bool on a function that does not test a property >> ("has_...", "is"...") is IMHO confusing. > > That is fine. It could be renamed to set_migratetype_is_isolate() or > is_set_migratetype_isolate() which seems pretty minor because we > have no consistency in the naming of this in linux kernel at all, i.e., > many existing bool function names without those test of properties. It does not query a property, so "is_set_migratetype_isolate()" is plain wrong. Anyhow, Michal does not seem to care.
On Mon 20-01-20 15:13:54, David Hildenbrand wrote: > On 20.01.20 15:11, Qian Cai wrote: > >> On Jan 20, 2020, at 9:01 AM, David Hildenbrand <david@redhat.com> wrote: > >> On 20.01.20 14:56, Qian Cai wrote: [...] > >>>> FWIW, I'd prefer this change without any such cleanups (e.g., I don't > >>>> like returning a bool from this function and the IS_ERR handling, makes > >>>> the function harder to read than before) > >>> > >>> What is Michal or Andrew’s opinion? BTW, a bonus point to return a bool > >>> is that it helps the code robustness in general, as UBSAN will be able to > >>> catch any abuse. > >>> > >> > >> A return type of bool on a function that does not test a property > >> ("has_...", "is"...") is IMHO confusing. > > > > That is fine. It could be renamed to set_migratetype_is_isolate() or > > is_set_migratetype_isolate() which seems pretty minor because we > > have no consistency in the naming of this in linux kernel at all, i.e., > > many existing bool function names without those test of properties. > > It does not query a property, so "is_set_migratetype_isolate()" is plain > wrong. > > Anyhow, Michal does not seem to care. Well, TBH I have missed this change. My bad. I have mostly checked that the WARN_ONCE is not gated by the check and didn't expect more changes. But I have likely missed that change in the previous version already. You guys are too quick with new version to my standard. Anyway, I do agree that bool is clumsy here. Returning false on success is just head scratching. Nobody really consumes the errno value but I would just leave it that way or if there is a strong need to change then do it in a separate patch.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 621716a25639..3c4eb750a199 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8231,7 +8231,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, if (is_migrate_cma(migratetype)) return NULL; - goto unmovable; + return page; } for (; iter < pageblock_nr_pages; iter++) { @@ -8241,7 +8241,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, page = pfn_to_page(pfn + iter); if (PageReserved(page)) - goto unmovable; + return page; /* * If the zone is movable and we have ruled out all reserved @@ -8261,7 +8261,7 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, unsigned int skip_pages; if (!hugepage_migration_supported(page_hstate(head))) - goto unmovable; + return page; skip_pages = compound_nr(head) - (page - head); iter += skip_pages - 1; @@ -8303,12 +8303,9 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, * is set to both of a memory hole page and a _used_ kernel * page at boot. */ - goto unmovable; + return page; } return NULL; -unmovable: - WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); - return pfn_to_page(pfn + iter); } #ifdef CONFIG_CONTIG_ALLOC diff --git a/mm/page_isolation.c b/mm/page_isolation.c index e70586523ca3..31f5516f5d54 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -15,12 +15,12 @@ #define CREATE_TRACE_POINTS #include <trace/events/page_isolation.h> -static int set_migratetype_isolate(struct page *page, int migratetype, int isol_flags) +static bool set_migratetype_isolate(struct page *page, int migratetype, + int isol_flags) { - struct page *unmovable = NULL; + struct page *unmovable = ERR_PTR(-EBUSY); struct zone *zone; unsigned long flags; - int ret = -EBUSY; zone = page_zone(page); @@ -49,21 +49,25 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ NULL); __mod_zone_freepage_state(zone, -nr_pages, mt); - ret = 0; } out: spin_unlock_irqrestore(&zone->lock, flags); - if (!ret) + + if (!unmovable) { drain_all_pages(zone); - else if ((isol_flags & REPORT_FAILURE) && unmovable) - /* - * printk() with zone->lock held will guarantee to trigger a - * lockdep splat, so defer it here. - */ - dump_page(unmovable, "unmovable page"); - - return ret; + } else { + WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE); + + if ((isol_flags & REPORT_FAILURE) && !IS_ERR(unmovable)) + /* + * printk() with zone->lock held will likely trigger a + * lockdep splat, so defer it here. + */ + dump_page(unmovable, "unmovable page"); + } + + return !!unmovable; } static void unset_migratetype_isolate(struct page *page, unsigned migratetype)
It makes sense to call the WARN_ON_ONCE(zone_idx(zone) == ZONE_MOVABLE) from start_isolate_page_range(), but should avoid triggering it from userspace, i.e, from is_mem_section_removable() because it could be a DoS if warn_on_panic is set. While at it, simplify the code a bit by removing an unnecessary jump label and a local variable, so set_migratetype_isolate() could really return a bool. Suggested-by: Michal Hocko <mhocko@kernel.org> Signed-off-by: Qian Cai <cai@lca.pw> --- v2: Improve the commit log. Warn for all start_isolate_page_range() users not just offlining. mm/page_alloc.c | 11 ++++------- mm/page_isolation.c | 30 +++++++++++++++++------------- 2 files changed, 21 insertions(+), 20 deletions(-)