Message ID | 20200203105654.22998-4-pdurrant@amazon.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | purge free_shared_domheap_page() | expand |
Hi, I am sorry to jump that late in the conversation. On 03/02/2020 10:56, Paul Durrant wrote: > > - if ( unlikely(domain_adjust_tot_pages(d, 1 << order) == (1 << order)) ) > + if ( !(memflags & MEMF_no_refcount) && > + unlikely(domain_adjust_tot_pages(d, 1 << order) == (1 << order)) ) > get_knownalive_domain(d); > - } > > for ( i = 0; i < (1 << order); i++ ) > { > ASSERT(page_get_owner(&pg[i]) == NULL); > - ASSERT(!pg[i].count_info); > page_set_owner(&pg[i], d); > smp_wmb(); /* Domain pointer must be visible before updating refcnt. */ > - pg[i].count_info = PGC_allocated | 1; > + pg[i].count_info = > + (pg[i].count_info & PGC_extra) | PGC_allocated | 1; This is technically incorrect because we blindly assume the state of the page is inuse (which is thankfully equal to 0). See the discussion [1]. This is already an existing bug in the code base and I will be taking care of it. However... > page_list_add_tail(&pg[i], &d->page_list); > } > > @@ -2315,11 +2338,6 @@ struct page_info *alloc_domheap_pages( > > if ( memflags & MEMF_no_owner ) > memflags |= MEMF_no_refcount; > - else if ( (memflags & MEMF_no_refcount) && d ) > - { > - ASSERT(!(memflags & MEMF_no_refcount)); > - return NULL; > - } > > if ( !dma_bitsize ) > memflags &= ~MEMF_no_dma; > @@ -2332,11 +2350,23 @@ struct page_info *alloc_domheap_pages( > memflags, d)) == NULL)) ) > return NULL; > > - if ( d && !(memflags & MEMF_no_owner) && > - assign_pages(d, pg, order, memflags) ) > + if ( d && !(memflags & MEMF_no_owner) ) > { > - free_heap_pages(pg, order, memflags & MEMF_no_scrub); > - return NULL; > + if ( memflags & MEMF_no_refcount ) > + { > + unsigned long i; > + > + for ( i = 0; i < (1ul << order); i++ ) > + { > + ASSERT(!pg[i].count_info); > + pg[i].count_info = PGC_extra; ... this is pursuing the wrongness of the code above and not safe against offlining. We could argue this is an already existing bug, however I am a bit unease to add more abuse in the code. Jan, what do you think? > + } > + } > + if ( assign_pages(d, pg, order, memflags) ) > + { > + free_heap_pages(pg, order, memflags & MEMF_no_scrub); > + return NULL; > + } > } Cheers, [1] https://lore.kernel.org/xen-devel/20200204133357.32101-1-julien@xen.org/
> -----Original Message----- > From: Julien Grall <julien@xen.org> > Sent: 06 February 2020 10:04 > To: Durrant, Paul <pdurrant@amazon.co.uk>; xen-devel@lists.xenproject.org > Cc: Jan Beulich <jbeulich@suse.com>; Andrew Cooper > <andrew.cooper3@citrix.com>; George Dunlap <George.Dunlap@eu.citrix.com>; > Ian Jackson <ian.jackson@eu.citrix.com>; Konrad Rzeszutek Wilk > <konrad.wilk@oracle.com>; Stefano Stabellini <sstabellini@kernel.org>; Wei > Liu <wl@xen.org>; Volodymyr Babchuk <Volodymyr_Babchuk@epam.com>; Roger > Pau Monné <roger.pau@citrix.com> > Subject: Re: [PATCH v9 3/4] mm: make pages allocated with MEMF_no_refcount > safe to assign > > Hi, > > I am sorry to jump that late in the conversation. > > On 03/02/2020 10:56, Paul Durrant wrote: > > > > - if ( unlikely(domain_adjust_tot_pages(d, 1 << order) == (1 << > order)) ) > > + if ( !(memflags & MEMF_no_refcount) && > > + unlikely(domain_adjust_tot_pages(d, 1 << order) == (1 << > order)) ) > > get_knownalive_domain(d); > > - } > > > > for ( i = 0; i < (1 << order); i++ ) > > { > > ASSERT(page_get_owner(&pg[i]) == NULL); > > - ASSERT(!pg[i].count_info); > > page_set_owner(&pg[i], d); > > smp_wmb(); /* Domain pointer must be visible before updating > refcnt. */ > > - pg[i].count_info = PGC_allocated | 1; > > + pg[i].count_info = > > + (pg[i].count_info & PGC_extra) | PGC_allocated | 1; > > This is technically incorrect because we blindly assume the state of the > page is inuse (which is thankfully equal to 0). Assuming the page is inuse seems reasonable at this point. > > See the discussion [1]. This is already an existing bug in the code base > and I will be taking care of it. Fair enough; it's a very long standing bug. > However... > > > page_list_add_tail(&pg[i], &d->page_list); > > } > > > > @@ -2315,11 +2338,6 @@ struct page_info *alloc_domheap_pages( > > > > if ( memflags & MEMF_no_owner ) > > memflags |= MEMF_no_refcount; > > - else if ( (memflags & MEMF_no_refcount) && d ) > > - { > > - ASSERT(!(memflags & MEMF_no_refcount)); > > - return NULL; > > - } > > > > if ( !dma_bitsize ) > > memflags &= ~MEMF_no_dma; > > @@ -2332,11 +2350,23 @@ struct page_info *alloc_domheap_pages( > > memflags, d)) == NULL)) ) > > return NULL; > > > > - if ( d && !(memflags & MEMF_no_owner) && > > - assign_pages(d, pg, order, memflags) ) > > + if ( d && !(memflags & MEMF_no_owner) ) > > { > > - free_heap_pages(pg, order, memflags & MEMF_no_scrub); > > - return NULL; > > + if ( memflags & MEMF_no_refcount ) > > + { > > + unsigned long i; > > + > > + for ( i = 0; i < (1ul << order); i++ ) > > + { > > + ASSERT(!pg[i].count_info); > > + pg[i].count_info = PGC_extra; > > ... this is pursuing the wrongness of the code above and not safe > against offlining. > > We could argue this is an already existing bug, however I am a bit > unease to add more abuse in the code. Jan, what do you think? > I'd consider a straightforward patch-clash. If this patch goes in after yours then it needs to be modified accordingly, or vice versa. Paul > > + } > > + } > > + if ( assign_pages(d, pg, order, memflags) ) > > + { > > + free_heap_pages(pg, order, memflags & MEMF_no_scrub); > > + return NULL; > > + } > > } > > Cheers, > > [1] https://lore.kernel.org/xen-devel/20200204133357.32101-1- > julien@xen.org/ > > -- > Julien Grall
On 06.02.2020 11:12, Durrant, Paul wrote: >> From: Julien Grall <julien@xen.org> >> Sent: 06 February 2020 10:04 >> >> On 03/02/2020 10:56, Paul Durrant wrote: >>> @@ -2332,11 +2350,23 @@ struct page_info *alloc_domheap_pages( >>> memflags, d)) == NULL)) ) >>> return NULL; >>> >>> - if ( d && !(memflags & MEMF_no_owner) && >>> - assign_pages(d, pg, order, memflags) ) >>> + if ( d && !(memflags & MEMF_no_owner) ) >>> { >>> - free_heap_pages(pg, order, memflags & MEMF_no_scrub); >>> - return NULL; >>> + if ( memflags & MEMF_no_refcount ) >>> + { >>> + unsigned long i; >>> + >>> + for ( i = 0; i < (1ul << order); i++ ) >>> + { >>> + ASSERT(!pg[i].count_info); >>> + pg[i].count_info = PGC_extra; >> >> ... this is pursuing the wrongness of the code above and not safe >> against offlining. >> >> We could argue this is an already existing bug, however I am a bit >> unease to add more abuse in the code. Jan, what do you think? >> > > I'd consider a straightforward patch-clash. If this patch goes in > after yours then it needs to be modified accordingly, or vice versa. While generally I advocate for not widening existing issues, I agree with Paul here. His patch should not be penalized by us _later_ having found an issue (which is quite a bit wider). Jan
On 06/02/2020 11:43, Jan Beulich wrote: > On 06.02.2020 11:12, Durrant, Paul wrote: >>> From: Julien Grall <julien@xen.org> >>> Sent: 06 February 2020 10:04 >>> >>> On 03/02/2020 10:56, Paul Durrant wrote: >>>> @@ -2332,11 +2350,23 @@ struct page_info *alloc_domheap_pages( >>>> memflags, d)) == NULL)) ) >>>> return NULL; >>>> >>>> - if ( d && !(memflags & MEMF_no_owner) && >>>> - assign_pages(d, pg, order, memflags) ) >>>> + if ( d && !(memflags & MEMF_no_owner) ) >>>> { >>>> - free_heap_pages(pg, order, memflags & MEMF_no_scrub); >>>> - return NULL; >>>> + if ( memflags & MEMF_no_refcount ) >>>> + { >>>> + unsigned long i; >>>> + >>>> + for ( i = 0; i < (1ul << order); i++ ) >>>> + { >>>> + ASSERT(!pg[i].count_info); >>>> + pg[i].count_info = PGC_extra; >>> >>> ... this is pursuing the wrongness of the code above and not safe >>> against offlining. >>> >>> We could argue this is an already existing bug, however I am a bit >>> unease to add more abuse in the code. Jan, what do you think? >>> >> >> I'd consider a straightforward patch-clash. If this patch goes in >> after yours then it needs to be modified accordingly, or vice versa. > > While generally I advocate for not widening existing issues, I agree > with Paul here. His patch should not be penalized by us _later_ > having found an issue (which is quite a bit wider). Fair enough. For the Arm bits: Acked-by: Julien Grall <julien@xen.org> Cheers,
diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c index e1b041e2df..fd134edcde 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -4217,7 +4217,8 @@ int steal_page( if ( !(owner = page_get_owner_and_reference(page)) ) goto fail; - if ( owner != d || is_xen_heap_page(page) ) + if ( owner != d || is_xen_heap_page(page) || + (page->count_info & PGC_extra) ) goto fail_put; /* diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c index bbd3163909..1ac9d9c719 100644 --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -2267,7 +2267,29 @@ int assign_pages( goto out; } - if ( !(memflags & MEMF_no_refcount) ) +#ifndef NDEBUG + { + unsigned int extra_pages = 0; + + for ( i = 0; i < (1ul << order); i++ ) + { + ASSERT(!(pg[i].count_info & ~PGC_extra)); + if ( pg[i].count_info & PGC_extra ) + extra_pages++; + } + + ASSERT(!extra_pages || + ((memflags & MEMF_no_refcount) && + extra_pages == 1u << order)); + } +#endif + + if ( pg[0].count_info & PGC_extra ) + { + d->extra_pages += 1u << order; + memflags &= ~MEMF_no_refcount; + } + else if ( !(memflags & MEMF_no_refcount) ) { unsigned int tot_pages = domain_tot_pages(d) + (1 << order); @@ -2278,18 +2300,19 @@ int assign_pages( rc = -E2BIG; goto out; } + } - if ( unlikely(domain_adjust_tot_pages(d, 1 << order) == (1 << order)) ) + if ( !(memflags & MEMF_no_refcount) && + unlikely(domain_adjust_tot_pages(d, 1 << order) == (1 << order)) ) get_knownalive_domain(d); - } for ( i = 0; i < (1 << order); i++ ) { ASSERT(page_get_owner(&pg[i]) == NULL); - ASSERT(!pg[i].count_info); page_set_owner(&pg[i], d); smp_wmb(); /* Domain pointer must be visible before updating refcnt. */ - pg[i].count_info = PGC_allocated | 1; + pg[i].count_info = + (pg[i].count_info & PGC_extra) | PGC_allocated | 1; page_list_add_tail(&pg[i], &d->page_list); } @@ -2315,11 +2338,6 @@ struct page_info *alloc_domheap_pages( if ( memflags & MEMF_no_owner ) memflags |= MEMF_no_refcount; - else if ( (memflags & MEMF_no_refcount) && d ) - { - ASSERT(!(memflags & MEMF_no_refcount)); - return NULL; - } if ( !dma_bitsize ) memflags &= ~MEMF_no_dma; @@ -2332,11 +2350,23 @@ struct page_info *alloc_domheap_pages( memflags, d)) == NULL)) ) return NULL; - if ( d && !(memflags & MEMF_no_owner) && - assign_pages(d, pg, order, memflags) ) + if ( d && !(memflags & MEMF_no_owner) ) { - free_heap_pages(pg, order, memflags & MEMF_no_scrub); - return NULL; + if ( memflags & MEMF_no_refcount ) + { + unsigned long i; + + for ( i = 0; i < (1ul << order); i++ ) + { + ASSERT(!pg[i].count_info); + pg[i].count_info = PGC_extra; + } + } + if ( assign_pages(d, pg, order, memflags) ) + { + free_heap_pages(pg, order, memflags & MEMF_no_scrub); + return NULL; + } } return pg; @@ -2384,6 +2414,11 @@ void free_domheap_pages(struct page_info *pg, unsigned int order) BUG(); } arch_free_heap_page(d, &pg[i]); + if ( pg[i].count_info & PGC_extra ) + { + ASSERT(d->extra_pages); + d->extra_pages--; + } } drop_dom_ref = !domain_adjust_tot_pages(d, -(1 << order)); diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h index 333efd3a60..7df91280bc 100644 --- a/xen/include/asm-arm/mm.h +++ b/xen/include/asm-arm/mm.h @@ -119,9 +119,12 @@ struct page_info #define PGC_state_offlined PG_mask(2, 9) #define PGC_state_free PG_mask(3, 9) #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st) +/* Page is not reference counted */ +#define _PGC_extra PG_shift(10) +#define PGC_extra PG_mask(1, 10) /* Count of references to this frame. */ -#define PGC_count_width PG_shift(9) +#define PGC_count_width PG_shift(10) #define PGC_count_mask ((1UL<<PGC_count_width)-1) /* diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h index 2ca8882ad0..06d64d494d 100644 --- a/xen/include/asm-x86/mm.h +++ b/xen/include/asm-x86/mm.h @@ -77,9 +77,12 @@ #define PGC_state_offlined PG_mask(2, 9) #define PGC_state_free PG_mask(3, 9) #define page_state_is(pg, st) (((pg)->count_info&PGC_state) == PGC_state_##st) +/* Page is not reference counted */ +#define _PGC_extra PG_shift(10) +#define PGC_extra PG_mask(1, 10) - /* Count of references to this frame. */ -#define PGC_count_width PG_shift(9) +/* Count of references to this frame. */ +#define PGC_count_width PG_shift(10) #define PGC_count_mask ((1UL<<PGC_count_width)-1) /* diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h index 1b6d7b941f..21b5f4cebd 100644 --- a/xen/include/xen/sched.h +++ b/xen/include/xen/sched.h @@ -374,6 +374,7 @@ struct domain unsigned int xenheap_pages; /* pages allocated from Xen heap */ unsigned int outstanding_pages; /* pages claimed but not possessed */ unsigned int max_pages; /* maximum value for domain_tot_pages() */ + unsigned int extra_pages; /* pages not included in domain_tot_pages() */ atomic_t shr_pages; /* shared pages */ atomic_t paged_pages; /* paged-out pages */ @@ -548,7 +549,9 @@ struct domain /* Return number of pages currently posessed by the domain */ static inline unsigned int domain_tot_pages(const struct domain *d) { - return d->tot_pages; + ASSERT(d->extra_pages <= d->tot_pages); + + return d->tot_pages - d->extra_pages; } /* Protect updates/reads (resp.) of domain_list and domain_hash. */