diff mbox series

x86/mm: do not mark IO regions as Xen heap

Message ID 20200910133514.82155-1-roger.pau@citrix.com
State New
Headers show
Series x86/mm: do not mark IO regions as Xen heap | expand

Commit Message

Roger Pau Monné Sept. 10, 2020, 1:35 p.m. UTC
arch_init_memory will treat all the gaps on the physical memory map
between RAM regions as MMIO and use share_xen_page_with_guest in order
to assign them to dom_io. This has the side effect of setting the Xen
heap flag on such pages, and thus is_special_page would then return
true which is an issue in epte_get_entry_emt because such pages will
be forced to use write-back cache attributes.

Fix this by introducing a new helper to assign the MMIO regions to
dom_io without setting the Xen heap flag on the pages, so that
is_special_page will return false and the pages won't be forced to use
write-back cache attributes.

Fixes: 81fd0d3ca4b2cd ('x86/hvm: simplify 'mmio_direct' check in epte_get_entry_emt()')
Suggested-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Cc: Paul Durrant <paul@xen.org>
---
 xen/arch/x86/mm.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

Comments

Paul Durrant Sept. 10, 2020, 2:23 p.m. UTC | #1
> -----Original Message-----
> From: Roger Pau Monne <roger.pau@citrix.com>
> Sent: 10 September 2020 14:35
> To: xen-devel@lists.xenproject.org
> Cc: Roger Pau Monne <roger.pau@citrix.com>; Jan Beulich <jbeulich@suse.com>; Andrew Cooper
> <andrew.cooper3@citrix.com>; Wei Liu <wl@xen.org>; Paul Durrant <paul@xen.org>
> Subject: [PATCH] x86/mm: do not mark IO regions as Xen heap
> 
> arch_init_memory will treat all the gaps on the physical memory map
> between RAM regions as MMIO and use share_xen_page_with_guest in order
> to assign them to dom_io. This has the side effect of setting the Xen
> heap flag on such pages, and thus is_special_page would then return
> true which is an issue in epte_get_entry_emt because such pages will
> be forced to use write-back cache attributes.
> 
> Fix this by introducing a new helper to assign the MMIO regions to
> dom_io without setting the Xen heap flag on the pages, so that
> is_special_page will return false and the pages won't be forced to use
> write-back cache attributes.
> 
> Fixes: 81fd0d3ca4b2cd ('x86/hvm: simplify 'mmio_direct' check in epte_get_entry_emt()')
> Suggested-by: Jan Beulich <jbeulich@suse.com>
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> ---
> Cc: Paul Durrant <paul@xen.org>
> ---
>  xen/arch/x86/mm.c | 16 ++++++++++++++--
>  1 file changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
> index 35ec0e11f6..4daf4e038a 100644
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -271,6 +271,18 @@ static l4_pgentry_t __read_mostly split_l4e;
>  #define root_pgt_pv_xen_slots ROOT_PAGETABLE_PV_XEN_SLOTS
>  #endif
> 
> +static void __init assign_io_page(struct page_info *page)
> +{
> +    set_gpfn_from_mfn(mfn_x(page_to_mfn(page)), INVALID_M2P_ENTRY);
> +
> +    /* The incremented type count pins as writable. */
> +    page->u.inuse.type_info = PGT_writable_page | PGT_validated | 1;
> +
> +    page_set_owner(page, dom_io);
> +
> +    page->count_info |= PGC_allocated | 1;
> +}
> +
>  void __init arch_init_memory(void)
>  {
>      unsigned long i, pfn, rstart_pfn, rend_pfn, iostart_pfn, ioend_pfn;
> @@ -291,7 +303,7 @@ void __init arch_init_memory(void)
>       */
>      BUG_ON(pvh_boot && trampoline_phys != 0x1000);
>      for ( i = 0; i < 0x100; i++ )
> -        share_xen_page_with_guest(mfn_to_page(_mfn(i)), dom_io, SHARE_rw);
> +        assign_io_page(mfn_to_page(_mfn(i)));
> 
>      /* Any areas not specified as RAM by the e820 map are considered I/O. */
>      for ( i = 0, pfn = 0; pfn < max_page; i++ )
> @@ -332,7 +344,7 @@ void __init arch_init_memory(void)
>              if ( !mfn_valid(_mfn(pfn)) )
>                  continue;
> 
> -            share_xen_page_with_guest(mfn_to_page(_mfn(pfn)), dom_io, SHARE_rw);
> +            assign_io_page(mfn_to_page(_mfn(pfn)));

Now these calls to share_xen_page_with_guest() are gone, can we change share_xen_page_with_guest() to ASSERT that PGC_xen_heap is already set, and avoid (needlessly) ORing it in?

  Paul


>          }
> 
>          /* Skip the RAM region. */
> --
> 2.28.0
Jan Beulich Sept. 10, 2020, 2:36 p.m. UTC | #2
On 10.09.2020 16:23, Paul Durrant wrote:
>> From: Roger Pau Monne <roger.pau@citrix.com>
>> Sent: 10 September 2020 14:35
>>
>> @@ -291,7 +303,7 @@ void __init arch_init_memory(void)
>>       */
>>      BUG_ON(pvh_boot && trampoline_phys != 0x1000);
>>      for ( i = 0; i < 0x100; i++ )
>> -        share_xen_page_with_guest(mfn_to_page(_mfn(i)), dom_io, SHARE_rw);
>> +        assign_io_page(mfn_to_page(_mfn(i)));
>>
>>      /* Any areas not specified as RAM by the e820 map are considered I/O. */
>>      for ( i = 0, pfn = 0; pfn < max_page; i++ )
>> @@ -332,7 +344,7 @@ void __init arch_init_memory(void)
>>              if ( !mfn_valid(_mfn(pfn)) )
>>                  continue;
>>
>> -            share_xen_page_with_guest(mfn_to_page(_mfn(pfn)), dom_io, SHARE_rw);
>> +            assign_io_page(mfn_to_page(_mfn(pfn)));
> 
> Now these calls to share_xen_page_with_guest() are gone, can we
> change share_xen_page_with_guest() to ASSERT that PGC_xen_heap
> is already set, and avoid (needlessly) ORing it in?

At least for the first use from pv_shim_setup_dom() I'm not sure
this holds. I'm also uncertain we want to enforce this - it
ought to be fine to hand a page allocated via alloc_domheap_page()
to this function. The choice of alloc_xenheap_page() should be
merely is an always mapped page is wanted (without extra effort
to map).

Jan
Jan Beulich Sept. 10, 2020, 2:41 p.m. UTC | #3
On 10.09.2020 15:35, Roger Pau Monne wrote:
> arch_init_memory will treat all the gaps on the physical memory map
> between RAM regions as MMIO and use share_xen_page_with_guest in order
> to assign them to dom_io. This has the side effect of setting the Xen
> heap flag on such pages, and thus is_special_page would then return
> true which is an issue in epte_get_entry_emt because such pages will
> be forced to use write-back cache attributes.
> 
> Fix this by introducing a new helper to assign the MMIO regions to
> dom_io without setting the Xen heap flag on the pages, so that
> is_special_page will return false and the pages won't be forced to use
> write-back cache attributes.
> 
> Fixes: 81fd0d3ca4b2cd ('x86/hvm: simplify 'mmio_direct' check in epte_get_entry_emt()')
> Suggested-by: Jan Beulich <jbeulich@suse.com>
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>

Reviewed-by: Jan Beulich <jbeulich@suse.com>
albeit I'm inclined to add, while committing, a comment ...

> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -271,6 +271,18 @@ static l4_pgentry_t __read_mostly split_l4e;
>  #define root_pgt_pv_xen_slots ROOT_PAGETABLE_PV_XEN_SLOTS
>  #endif
>  
> +static void __init assign_io_page(struct page_info *page)
> +{
> +    set_gpfn_from_mfn(mfn_x(page_to_mfn(page)), INVALID_M2P_ENTRY);
> +
> +    /* The incremented type count pins as writable. */
> +    page->u.inuse.type_info = PGT_writable_page | PGT_validated | 1;
> +
> +    page_set_owner(page, dom_io);
> +
> +    page->count_info |= PGC_allocated | 1;
> +}

... clarifying its relationship with share_xen_page_with_guest().

Jan
Roger Pau Monné Sept. 10, 2020, 5:27 p.m. UTC | #4
On Thu, Sep 10, 2020 at 04:41:41PM +0200, Jan Beulich wrote:
> On 10.09.2020 15:35, Roger Pau Monne wrote:
> > arch_init_memory will treat all the gaps on the physical memory map
> > between RAM regions as MMIO and use share_xen_page_with_guest in order
> > to assign them to dom_io. This has the side effect of setting the Xen
> > heap flag on such pages, and thus is_special_page would then return
> > true which is an issue in epte_get_entry_emt because such pages will
> > be forced to use write-back cache attributes.
> > 
> > Fix this by introducing a new helper to assign the MMIO regions to
> > dom_io without setting the Xen heap flag on the pages, so that
> > is_special_page will return false and the pages won't be forced to use
> > write-back cache attributes.
> > 
> > Fixes: 81fd0d3ca4b2cd ('x86/hvm: simplify 'mmio_direct' check in epte_get_entry_emt()')
> > Suggested-by: Jan Beulich <jbeulich@suse.com>
> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> 
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
> albeit I'm inclined to add, while committing, a comment ...
> 
> > --- a/xen/arch/x86/mm.c
> > +++ b/xen/arch/x86/mm.c
> > @@ -271,6 +271,18 @@ static l4_pgentry_t __read_mostly split_l4e;
> >  #define root_pgt_pv_xen_slots ROOT_PAGETABLE_PV_XEN_SLOTS
> >  #endif
> >  
> > +static void __init assign_io_page(struct page_info *page)
> > +{
> > +    set_gpfn_from_mfn(mfn_x(page_to_mfn(page)), INVALID_M2P_ENTRY);
> > +
> > +    /* The incremented type count pins as writable. */
> > +    page->u.inuse.type_info = PGT_writable_page | PGT_validated | 1;
> > +
> > +    page_set_owner(page, dom_io);
> > +
> > +    page->count_info |= PGC_allocated | 1;
> > +}
> 
> ... clarifying its relationship with share_xen_page_with_guest().

Sure, I think the relation is quite vague now, since this is no longer
sharing the page (as it's not added to xenpage_list) but rather
assigning it to dom_io.

In general I feel this is all quite confusing, and would benefit from
having some clear rules about what flags and assigned domains non-RAM
pages have, and how they should be added to the p2m.

Thanks, Roger.
Jan Beulich Sept. 11, 2020, 6:25 a.m. UTC | #5
On 10.09.2020 19:27, Roger Pau Monné wrote:
> On Thu, Sep 10, 2020 at 04:41:41PM +0200, Jan Beulich wrote:
>> On 10.09.2020 15:35, Roger Pau Monne wrote:
>>> --- a/xen/arch/x86/mm.c
>>> +++ b/xen/arch/x86/mm.c
>>> @@ -271,6 +271,18 @@ static l4_pgentry_t __read_mostly split_l4e;
>>>  #define root_pgt_pv_xen_slots ROOT_PAGETABLE_PV_XEN_SLOTS
>>>  #endif
>>>  
>>> +static void __init assign_io_page(struct page_info *page)
>>> +{
>>> +    set_gpfn_from_mfn(mfn_x(page_to_mfn(page)), INVALID_M2P_ENTRY);
>>> +
>>> +    /* The incremented type count pins as writable. */
>>> +    page->u.inuse.type_info = PGT_writable_page | PGT_validated | 1;
>>> +
>>> +    page_set_owner(page, dom_io);
>>> +
>>> +    page->count_info |= PGC_allocated | 1;
>>> +}
>>
>> ... clarifying its relationship with share_xen_page_with_guest().
> 
> Sure, I think the relation is quite vague now, since this is no longer
> sharing the page (as it's not added to xenpage_list) but rather
> assigning it to dom_io.
> 
> In general I feel this is all quite confusing, and would benefit from
> having some clear rules about what flags and assigned domains non-RAM
> pages have, and how they should be added to the p2m.

Some might call this "organically grown" ... ;-)

Jan
Jan Beulich Sept. 11, 2020, 11:55 a.m. UTC | #6
On 10.09.2020 16:41, Jan Beulich wrote:
> On 10.09.2020 15:35, Roger Pau Monne wrote:
>> arch_init_memory will treat all the gaps on the physical memory map
>> between RAM regions as MMIO and use share_xen_page_with_guest in order
>> to assign them to dom_io. This has the side effect of setting the Xen
>> heap flag on such pages, and thus is_special_page would then return
>> true which is an issue in epte_get_entry_emt because such pages will
>> be forced to use write-back cache attributes.
>>
>> Fix this by introducing a new helper to assign the MMIO regions to
>> dom_io without setting the Xen heap flag on the pages, so that
>> is_special_page will return false and the pages won't be forced to use
>> write-back cache attributes.
>>
>> Fixes: 81fd0d3ca4b2cd ('x86/hvm: simplify 'mmio_direct' check in epte_get_entry_emt()')
>> Suggested-by: Jan Beulich <jbeulich@suse.com>
>> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> 
> Reviewed-by: Jan Beulich <jbeulich@suse.com>
> albeit I'm inclined to add, while committing, a comment ...
> 
>> --- a/xen/arch/x86/mm.c
>> +++ b/xen/arch/x86/mm.c
>> @@ -271,6 +271,18 @@ static l4_pgentry_t __read_mostly split_l4e;
>>  #define root_pgt_pv_xen_slots ROOT_PAGETABLE_PV_XEN_SLOTS
>>  #endif
>>  
>> +static void __init assign_io_page(struct page_info *page)
>> +{
>> +    set_gpfn_from_mfn(mfn_x(page_to_mfn(page)), INVALID_M2P_ENTRY);
>> +
>> +    /* The incremented type count pins as writable. */
>> +    page->u.inuse.type_info = PGT_writable_page | PGT_validated | 1;
>> +
>> +    page_set_owner(page, dom_io);
>> +
>> +    page->count_info |= PGC_allocated | 1;
>> +}
> 
> ... clarifying its relationship with share_xen_page_with_guest().

I'm also going to add an assertion to share_xen_page_with_guest() to
document and make sure dom_io won't again be passed there.

Jan
Jan Beulich Sept. 17, 2020, 2:12 p.m. UTC | #7
On 10.09.2020 15:35, Roger Pau Monne wrote:
> arch_init_memory will treat all the gaps on the physical memory map
> between RAM regions as MMIO and use share_xen_page_with_guest in order
> to assign them to dom_io. This has the side effect of setting the Xen
> heap flag on such pages, and thus is_special_page would then return
> true which is an issue in epte_get_entry_emt because such pages will
> be forced to use write-back cache attributes.
> 
> Fix this by introducing a new helper to assign the MMIO regions to
> dom_io without setting the Xen heap flag on the pages, so that
> is_special_page will return false and the pages won't be forced to use
> write-back cache attributes.
> 
> Fixes: 81fd0d3ca4b2cd ('x86/hvm: simplify 'mmio_direct' check in epte_get_entry_emt()')
> Suggested-by: Jan Beulich <jbeulich@suse.com>
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>

I'm sorry for noticing this only now, but there is a place where
we actually build on these pages being marked "special": In
xenmem_add_to_physmap_one() we have

    if ( mfn_valid(prev_mfn) )
    {
        if ( is_special_page(mfn_to_page(prev_mfn)) )
            /* Special pages are simply unhooked from this phys slot. */
            rc = guest_physmap_remove_page(d, gpfn, prev_mfn, PAGE_ORDER_4K);
        else
            /* Normal domain memory is freed, to avoid leaking memory. */
            rc = guest_remove_page(d, gfn_x(gpfn));
    }

As you'll notice MMIO pages not satisfying mfn_valid() will simply
bypass any updates here, but the subsequent guest_physmap_add_page()
will have the P2M entry updated anyway. MMIO pages which satisfy
mfn_valid(), however, would previously have been passed into
guest_physmap_remove_page() (which generally would succeed) while
now guest_remove_page() will (afaict) fail (get_page() there won't
succeed).

I haven't come to a clear conclusion yet how best to address this.

Jan
Roger Pau Monné Sept. 17, 2020, 2:28 p.m. UTC | #8
On Thu, Sep 17, 2020 at 04:12:23PM +0200, Jan Beulich wrote:
> On 10.09.2020 15:35, Roger Pau Monne wrote:
> > arch_init_memory will treat all the gaps on the physical memory map
> > between RAM regions as MMIO and use share_xen_page_with_guest in order
> > to assign them to dom_io. This has the side effect of setting the Xen
> > heap flag on such pages, and thus is_special_page would then return
> > true which is an issue in epte_get_entry_emt because such pages will
> > be forced to use write-back cache attributes.
> > 
> > Fix this by introducing a new helper to assign the MMIO regions to
> > dom_io without setting the Xen heap flag on the pages, so that
> > is_special_page will return false and the pages won't be forced to use
> > write-back cache attributes.
> > 
> > Fixes: 81fd0d3ca4b2cd ('x86/hvm: simplify 'mmio_direct' check in epte_get_entry_emt()')
> > Suggested-by: Jan Beulich <jbeulich@suse.com>
> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> 
> I'm sorry for noticing this only now, but there is a place where
> we actually build on these pages being marked "special": In
> xenmem_add_to_physmap_one() we have
> 
>     if ( mfn_valid(prev_mfn) )
>     {
>         if ( is_special_page(mfn_to_page(prev_mfn)) )
>             /* Special pages are simply unhooked from this phys slot. */
>             rc = guest_physmap_remove_page(d, gpfn, prev_mfn, PAGE_ORDER_4K);
>         else
>             /* Normal domain memory is freed, to avoid leaking memory. */
>             rc = guest_remove_page(d, gfn_x(gpfn));
>     }
> 
> As you'll notice MMIO pages not satisfying mfn_valid() will simply
> bypass any updates here, but the subsequent guest_physmap_add_page()
> will have the P2M entry updated anyway. MMIO pages which satisfy
> mfn_valid(), however, would previously have been passed into
> guest_physmap_remove_page() (which generally would succeed) while
> now guest_remove_page() will (afaict) fail (get_page() there won't
> succeed).

Would Xen even get to the get_page in guest_remove_page on that case?

There's a p2m_mmio_direct type check that will succeed for MMIO
ranges, and that just clears the p2m entry and returns before doing
any get_page.

Roger.
Jan Beulich Sept. 17, 2020, 2:38 p.m. UTC | #9
On 17.09.2020 16:28, Roger Pau Monné wrote:
> On Thu, Sep 17, 2020 at 04:12:23PM +0200, Jan Beulich wrote:
>> On 10.09.2020 15:35, Roger Pau Monne wrote:
>>> arch_init_memory will treat all the gaps on the physical memory map
>>> between RAM regions as MMIO and use share_xen_page_with_guest in order
>>> to assign them to dom_io. This has the side effect of setting the Xen
>>> heap flag on such pages, and thus is_special_page would then return
>>> true which is an issue in epte_get_entry_emt because such pages will
>>> be forced to use write-back cache attributes.
>>>
>>> Fix this by introducing a new helper to assign the MMIO regions to
>>> dom_io without setting the Xen heap flag on the pages, so that
>>> is_special_page will return false and the pages won't be forced to use
>>> write-back cache attributes.
>>>
>>> Fixes: 81fd0d3ca4b2cd ('x86/hvm: simplify 'mmio_direct' check in epte_get_entry_emt()')
>>> Suggested-by: Jan Beulich <jbeulich@suse.com>
>>> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
>>
>> I'm sorry for noticing this only now, but there is a place where
>> we actually build on these pages being marked "special": In
>> xenmem_add_to_physmap_one() we have
>>
>>     if ( mfn_valid(prev_mfn) )
>>     {
>>         if ( is_special_page(mfn_to_page(prev_mfn)) )
>>             /* Special pages are simply unhooked from this phys slot. */
>>             rc = guest_physmap_remove_page(d, gpfn, prev_mfn, PAGE_ORDER_4K);
>>         else
>>             /* Normal domain memory is freed, to avoid leaking memory. */
>>             rc = guest_remove_page(d, gfn_x(gpfn));
>>     }
>>
>> As you'll notice MMIO pages not satisfying mfn_valid() will simply
>> bypass any updates here, but the subsequent guest_physmap_add_page()
>> will have the P2M entry updated anyway. MMIO pages which satisfy
>> mfn_valid(), however, would previously have been passed into
>> guest_physmap_remove_page() (which generally would succeed) while
>> now guest_remove_page() will (afaict) fail (get_page() there won't
>> succeed).
> 
> Would Xen even get to the get_page in guest_remove_page on that case?
> 
> There's a p2m_mmio_direct type check that will succeed for MMIO
> ranges, and that just clears the p2m entry and returns before doing
> any get_page.

Oh, I did overlook this indeed.

Jan
Jan Beulich Sept. 21, 2020, 2:22 p.m. UTC | #10
On 10.09.2020 15:35, Roger Pau Monne wrote:
> arch_init_memory will treat all the gaps on the physical memory map
> between RAM regions as MMIO and use share_xen_page_with_guest in order
> to assign them to dom_io. This has the side effect of setting the Xen
> heap flag on such pages, and thus is_special_page would then return
> true which is an issue in epte_get_entry_emt because such pages will
> be forced to use write-back cache attributes.
> 
> Fix this by introducing a new helper to assign the MMIO regions to
> dom_io without setting the Xen heap flag on the pages, so that
> is_special_page will return false and the pages won't be forced to use
> write-back cache attributes.
> 
> Fixes: 81fd0d3ca4b2cd ('x86/hvm: simplify 'mmio_direct' check in epte_get_entry_emt()')

Is this really the correct commit? I had this queued for backport,
and ended up discarding it from the queue based on this tag, just
to then noticing that I remembered correctly that I did backport
ca24b2ffdbd9 ("x86/hvm: set 'ipat' in EPT for special pages") to
the stable trees already. Isn't it _this_ commit which the change
here fixes?

Jan
Roger Pau Monné Sept. 21, 2020, 3:35 p.m. UTC | #11
On Mon, Sep 21, 2020 at 04:22:26PM +0200, Jan Beulich wrote:
> On 10.09.2020 15:35, Roger Pau Monne wrote:
> > arch_init_memory will treat all the gaps on the physical memory map
> > between RAM regions as MMIO and use share_xen_page_with_guest in order
> > to assign them to dom_io. This has the side effect of setting the Xen
> > heap flag on such pages, and thus is_special_page would then return
> > true which is an issue in epte_get_entry_emt because such pages will
> > be forced to use write-back cache attributes.
> > 
> > Fix this by introducing a new helper to assign the MMIO regions to
> > dom_io without setting the Xen heap flag on the pages, so that
> > is_special_page will return false and the pages won't be forced to use
> > write-back cache attributes.
> > 
> > Fixes: 81fd0d3ca4b2cd ('x86/hvm: simplify 'mmio_direct' check in epte_get_entry_emt()')
> 
> Is this really the correct commit? I had this queued for backport,
> and ended up discarding it from the queue based on this tag, just
> to then noticing that I remembered correctly that I did backport
> ca24b2ffdbd9 ("x86/hvm: set 'ipat' in EPT for special pages") to
> the stable trees already. Isn't it _this_ commit which the change
> here fixes?

My bisection pointed to that exact commit as the one that broke my PVH
dom0 setup, so yes, I'm quite sure that's the commit at least on the
box that I've bisected it.

ca24b2ffdbd9 was still fine because previous to the is_special_page
check loop there was still the `if ( direct_mmio ) ...` handling,
which made all MMIO regions except for the APIC access page forcefully
have it's cache attributes set to UC.

Roger.
Jan Beulich Sept. 21, 2020, 3:49 p.m. UTC | #12
On 21.09.2020 17:35, Roger Pau Monné wrote:
> On Mon, Sep 21, 2020 at 04:22:26PM +0200, Jan Beulich wrote:
>> On 10.09.2020 15:35, Roger Pau Monne wrote:
>>> arch_init_memory will treat all the gaps on the physical memory map
>>> between RAM regions as MMIO and use share_xen_page_with_guest in order
>>> to assign them to dom_io. This has the side effect of setting the Xen
>>> heap flag on such pages, and thus is_special_page would then return
>>> true which is an issue in epte_get_entry_emt because such pages will
>>> be forced to use write-back cache attributes.
>>>
>>> Fix this by introducing a new helper to assign the MMIO regions to
>>> dom_io without setting the Xen heap flag on the pages, so that
>>> is_special_page will return false and the pages won't be forced to use
>>> write-back cache attributes.
>>>
>>> Fixes: 81fd0d3ca4b2cd ('x86/hvm: simplify 'mmio_direct' check in epte_get_entry_emt()')
>>
>> Is this really the correct commit? I had this queued for backport,
>> and ended up discarding it from the queue based on this tag, just
>> to then noticing that I remembered correctly that I did backport
>> ca24b2ffdbd9 ("x86/hvm: set 'ipat' in EPT for special pages") to
>> the stable trees already. Isn't it _this_ commit which the change
>> here fixes?
> 
> My bisection pointed to that exact commit as the one that broke my PVH
> dom0 setup, so yes, I'm quite sure that's the commit at least on the
> box that I've bisected it.
> 
> ca24b2ffdbd9 was still fine because previous to the is_special_page
> check loop there was still the `if ( direct_mmio ) ...` handling,
> which made all MMIO regions except for the APIC access page forcefully
> have it's cache attributes set to UC.

Ah yes, I see - thanks. Makes me less sure whether the patch
here really wants backporting. It's certainly an improvement in
its own right to remove the difference between mfn_valid() and
!mfn_valid() MMIO pages, leading e.g. to different treatment by
_sh_propagate(). Will need to give this some more thought, and
of course your and others thoughts would also be appreciated.

Jan
Roger Pau Monné Sept. 22, 2020, 8:43 a.m. UTC | #13
On Mon, Sep 21, 2020 at 05:49:45PM +0200, Jan Beulich wrote:
> On 21.09.2020 17:35, Roger Pau Monné wrote:
> > On Mon, Sep 21, 2020 at 04:22:26PM +0200, Jan Beulich wrote:
> >> On 10.09.2020 15:35, Roger Pau Monne wrote:
> >>> arch_init_memory will treat all the gaps on the physical memory map
> >>> between RAM regions as MMIO and use share_xen_page_with_guest in order
> >>> to assign them to dom_io. This has the side effect of setting the Xen
> >>> heap flag on such pages, and thus is_special_page would then return
> >>> true which is an issue in epte_get_entry_emt because such pages will
> >>> be forced to use write-back cache attributes.
> >>>
> >>> Fix this by introducing a new helper to assign the MMIO regions to
> >>> dom_io without setting the Xen heap flag on the pages, so that
> >>> is_special_page will return false and the pages won't be forced to use
> >>> write-back cache attributes.
> >>>
> >>> Fixes: 81fd0d3ca4b2cd ('x86/hvm: simplify 'mmio_direct' check in epte_get_entry_emt()')
> >>
> >> Is this really the correct commit? I had this queued for backport,
> >> and ended up discarding it from the queue based on this tag, just
> >> to then noticing that I remembered correctly that I did backport
> >> ca24b2ffdbd9 ("x86/hvm: set 'ipat' in EPT for special pages") to
> >> the stable trees already. Isn't it _this_ commit which the change
> >> here fixes?
> > 
> > My bisection pointed to that exact commit as the one that broke my PVH
> > dom0 setup, so yes, I'm quite sure that's the commit at least on the
> > box that I've bisected it.
> > 
> > ca24b2ffdbd9 was still fine because previous to the is_special_page
> > check loop there was still the `if ( direct_mmio ) ...` handling,
> > which made all MMIO regions except for the APIC access page forcefully
> > have it's cache attributes set to UC.
> 
> Ah yes, I see - thanks. Makes me less sure whether the patch
> here really wants backporting.

As long as 81fd0d3ca4b2cd is not backported, then I would argue to not
backport this either. I don't see much benefit in backporting this
alone, and the risk of introducing a non intended functionality change
as a result of not marking MMIO pages as Xen heap is possible.

Thanks, Roger.
diff mbox series

Patch

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 35ec0e11f6..4daf4e038a 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -271,6 +271,18 @@  static l4_pgentry_t __read_mostly split_l4e;
 #define root_pgt_pv_xen_slots ROOT_PAGETABLE_PV_XEN_SLOTS
 #endif
 
+static void __init assign_io_page(struct page_info *page)
+{
+    set_gpfn_from_mfn(mfn_x(page_to_mfn(page)), INVALID_M2P_ENTRY);
+
+    /* The incremented type count pins as writable. */
+    page->u.inuse.type_info = PGT_writable_page | PGT_validated | 1;
+
+    page_set_owner(page, dom_io);
+
+    page->count_info |= PGC_allocated | 1;
+}
+
 void __init arch_init_memory(void)
 {
     unsigned long i, pfn, rstart_pfn, rend_pfn, iostart_pfn, ioend_pfn;
@@ -291,7 +303,7 @@  void __init arch_init_memory(void)
      */
     BUG_ON(pvh_boot && trampoline_phys != 0x1000);
     for ( i = 0; i < 0x100; i++ )
-        share_xen_page_with_guest(mfn_to_page(_mfn(i)), dom_io, SHARE_rw);
+        assign_io_page(mfn_to_page(_mfn(i)));
 
     /* Any areas not specified as RAM by the e820 map are considered I/O. */
     for ( i = 0, pfn = 0; pfn < max_page; i++ )
@@ -332,7 +344,7 @@  void __init arch_init_memory(void)
             if ( !mfn_valid(_mfn(pfn)) )
                 continue;
 
-            share_xen_page_with_guest(mfn_to_page(_mfn(pfn)), dom_io, SHARE_rw);
+            assign_io_page(mfn_to_page(_mfn(pfn)));
         }
 
         /* Skip the RAM region. */