diff mbox series

[v3,2/9] x86: introduce a new set of APIs to manage Xen page tables

Message ID 177843fa29560291b8af90db5daffe4852ea96b7.1570034362.git.hongyax@amazon.com (mailing list archive)
State New, archived
Headers show
Series Add alternative API for Xen PTEs | expand

Commit Message

Xia, Hongyan Oct. 2, 2019, 5:16 p.m. UTC
From: Wei Liu <wei.liu2@citrix.com>

We are going to switch to using domheap page for page tables.
A new set of APIs is introduced to allocate, map, unmap and free pages
for page tables.

The allocation and deallocation work on mfn_t but not page_info,
because they are required to work even before frame table is set up.

Implement the old functions with the new ones. We will rewrite, site
by site, other mm functions that manipulate page tables to use the new
APIs.

Note these new APIs still use xenheap page underneath and no actual
map and unmap is done so that we don't break xen half way. They will
be switched to use domheap and dynamic mappings when usage of old APIs
is eliminated.

No functional change intended in this patch.

Signed-off-by: Wei Liu <wei.liu2@citrix.com>
---
 xen/arch/x86/mm.c        | 39 ++++++++++++++++++++++++++++++++++-----
 xen/include/asm-x86/mm.h | 11 +++++++++++
 2 files changed, 45 insertions(+), 5 deletions(-)

Comments

Jan Beulich Nov. 15, 2019, 3:23 p.m. UTC | #1
On 02.10.2019 19:16, Hongyan Xia wrote:
> @@ -4847,22 +4848,50 @@ int mmcfg_intercept_write(
>  }
>  
>  void *alloc_xen_pagetable(void)
> +{
> +    mfn_t mfn;
> +
> +    mfn = alloc_xen_pagetable_new();
> +    ASSERT(!mfn_eq(mfn, INVALID_MFN));
> +
> +    return map_xen_pagetable_new(mfn);
> +}
> +
> +void free_xen_pagetable(void *v)
> +{
> +    if ( system_state != SYS_STATE_early_boot )
> +        free_xen_pagetable_new(virt_to_mfn(v));
> +}
> +
> +mfn_t alloc_xen_pagetable_new(void)
>  {
>      if ( system_state != SYS_STATE_early_boot )
>      {
>          void *ptr = alloc_xenheap_page();
>  
>          BUG_ON(!hardware_domain && !ptr);
> -        return ptr;
> +        return virt_to_mfn(ptr);
>      }
>  
> -    return mfn_to_virt(mfn_x(alloc_boot_pages(1, 1)));
> +    return alloc_boot_pages(1, 1);
>  }
>  
> -void free_xen_pagetable(void *v)
> +void *map_xen_pagetable_new(mfn_t mfn)

There's no need for the map/unmap functions to have a _new
suffix, is there?

>  {
> -    if ( system_state != SYS_STATE_early_boot )
> -        free_xenheap_page(v);
> +    return mfn_to_virt(mfn_x(mfn));
> +}
> +
> +/* v can point to an entry within a table or be NULL */
> +void unmap_xen_pagetable_new(void *v)

Can this please take const void *, such that callers needing
mappings just for read purposes can have their pointer const-
qualified as well?

> +{
> +    /* XXX still using xenheap page, no need to do anything.  */

I wonder if it wouldn't be a good idea to at least have some
leak detection here temporarily, such that we have a chance of
noticing paths not properly doing the necessary unmapping.

The again a question is why you introduce such a map/unmap pair
in the first place. This is going to be a thin wrapper around
{,un}map_domain_page() in the end anyway, and hence callers
could as well be switched to calling those function directly,
as they're no-ops on Xen heap pages.

> --- a/xen/include/asm-x86/mm.h
> +++ b/xen/include/asm-x86/mm.h
> @@ -633,6 +633,17 @@ int arch_acquire_resource(struct domain *d, unsigned int type,
>  /* Allocator functions for Xen pagetables. */
>  void *alloc_xen_pagetable(void);
>  void free_xen_pagetable(void *v);
> +mfn_t alloc_xen_pagetable_new(void);
> +void *map_xen_pagetable_new(mfn_t mfn);
> +void unmap_xen_pagetable_new(void *v);
> +void free_xen_pagetable_new(mfn_t mfn);
> +
> +#define UNMAP_XEN_PAGETABLE_NEW(ptr)    \
> +    do {                                \
> +        unmap_xen_pagetable_new((ptr)); \

Stray double parentheses.

Jan
Wei Liu Nov. 15, 2019, 5:16 p.m. UTC | #2
On Fri, Nov 15, 2019 at 04:23:30PM +0100, Jan Beulich wrote:
> On 02.10.2019 19:16, Hongyan Xia wrote:
> > @@ -4847,22 +4848,50 @@ int mmcfg_intercept_write(
> >  }
> >  
> >  void *alloc_xen_pagetable(void)
> > +{
> > +    mfn_t mfn;
> > +
> > +    mfn = alloc_xen_pagetable_new();
> > +    ASSERT(!mfn_eq(mfn, INVALID_MFN));
> > +
> > +    return map_xen_pagetable_new(mfn);
> > +}
> > +
> > +void free_xen_pagetable(void *v)
> > +{
> > +    if ( system_state != SYS_STATE_early_boot )
> > +        free_xen_pagetable_new(virt_to_mfn(v));
> > +}
> > +
> > +mfn_t alloc_xen_pagetable_new(void)
> >  {
> >      if ( system_state != SYS_STATE_early_boot )
> >      {
> >          void *ptr = alloc_xenheap_page();
> >  
> >          BUG_ON(!hardware_domain && !ptr);
> > -        return ptr;
> > +        return virt_to_mfn(ptr);
> >      }
> >  
> > -    return mfn_to_virt(mfn_x(alloc_boot_pages(1, 1)));
> > +    return alloc_boot_pages(1, 1);
> >  }
> >  
> > -void free_xen_pagetable(void *v)
> > +void *map_xen_pagetable_new(mfn_t mfn)
> 
> There's no need for the map/unmap functions to have a _new
> suffix, is there?
> 

It is more consistent.

> >  {
> > -    if ( system_state != SYS_STATE_early_boot )
> > -        free_xenheap_page(v);
> > +    return mfn_to_virt(mfn_x(mfn));
> > +}
> > +
[...]
> 
> > +{
> > +    /* XXX still using xenheap page, no need to do anything.  */
> 
> I wonder if it wouldn't be a good idea to at least have some
> leak detection here temporarily, such that we have a chance of
> noticing paths not properly doing the necessary unmapping.
> 
> The again a question is why you introduce such a map/unmap pair
> in the first place. This is going to be a thin wrapper around
> {,un}map_domain_page() in the end anyway, and hence callers
> could as well be switched to calling those function directly,
> as they're no-ops on Xen heap pages.


All roads lead to Rome, but some roads are easier than others.  Having a
set of APIs to deal with page tables make the code easier to follow IMO.

And we can potentially do more stuff in this function, for example, make
the unmap function test if the page is really a page table to avoid
misuse; or like you said, have some leak detection check there.

Also, please consider there are dozens of patches that are built on top
of this set of new APIs.  Having to rewrite them just for mechanical
changes is not fun for Hongyan. I would suggest we be more pragmatic
here.

Wei.
Jan Beulich Nov. 18, 2019, 9:50 a.m. UTC | #3
On 15.11.2019 18:16, Wei Liu wrote:
> On Fri, Nov 15, 2019 at 04:23:30PM +0100, Jan Beulich wrote:
>> On 02.10.2019 19:16, Hongyan Xia wrote:
>>> @@ -4847,22 +4848,50 @@ int mmcfg_intercept_write(
>>>  }
>>>  
>>>  void *alloc_xen_pagetable(void)
>>> +{
>>> +    mfn_t mfn;
>>> +
>>> +    mfn = alloc_xen_pagetable_new();
>>> +    ASSERT(!mfn_eq(mfn, INVALID_MFN));
>>> +
>>> +    return map_xen_pagetable_new(mfn);
>>> +}
>>> +
>>> +void free_xen_pagetable(void *v)
>>> +{
>>> +    if ( system_state != SYS_STATE_early_boot )
>>> +        free_xen_pagetable_new(virt_to_mfn(v));
>>> +}
>>> +
>>> +mfn_t alloc_xen_pagetable_new(void)
>>>  {
>>>      if ( system_state != SYS_STATE_early_boot )
>>>      {
>>>          void *ptr = alloc_xenheap_page();
>>>  
>>>          BUG_ON(!hardware_domain && !ptr);
>>> -        return ptr;
>>> +        return virt_to_mfn(ptr);
>>>      }
>>>  
>>> -    return mfn_to_virt(mfn_x(alloc_boot_pages(1, 1)));
>>> +    return alloc_boot_pages(1, 1);
>>>  }
>>>  
>>> -void free_xen_pagetable(void *v)
>>> +void *map_xen_pagetable_new(mfn_t mfn)
>>
>> There's no need for the map/unmap functions to have a _new
>> suffix, is there?
>>
> 
> It is more consistent.

But will require touching all callers again when the _new suffixes
get dropped.

>>>  {
>>> -    if ( system_state != SYS_STATE_early_boot )
>>> -        free_xenheap_page(v);
>>> +    return mfn_to_virt(mfn_x(mfn));
>>> +}
>>> +
> [...]
>>
>>> +{
>>> +    /* XXX still using xenheap page, no need to do anything.  */
>>
>> I wonder if it wouldn't be a good idea to at least have some
>> leak detection here temporarily, such that we have a chance of
>> noticing paths not properly doing the necessary unmapping.
>>
>> The again a question is why you introduce such a map/unmap pair
>> in the first place. This is going to be a thin wrapper around
>> {,un}map_domain_page() in the end anyway, and hence callers
>> could as well be switched to calling those function directly,
>> as they're no-ops on Xen heap pages.
> 
> 
> All roads lead to Rome, but some roads are easier than others.  Having a
> set of APIs to deal with page tables make the code easier to follow IMO.
> 
> And we can potentially do more stuff in this function, for example, make
> the unmap function test if the page is really a page table to avoid
> misuse; or like you said, have some leak detection check there.
> 
> Also, please consider there are dozens of patches that are built on top
> of this set of new APIs.  Having to rewrite them just for mechanical
> changes is not fun for Hongyan. I would suggest we be more pragmatic
> here.

Whether to use separate functions depends - as you say - on the
longer term plans. If there's a good reasons to have these separate
(and that reason is stated in the description), then yes, I'll be
fine with having them. But introducing them just for the sake of
doing so isn't appropriate imo.

As to dozens of patches on top - I'm sorry to say it this bluntly,
but that's the risk anyone takes when compiling large series
without sufficient up front agreement. I've too been suffering from
such a penalty in a few cases; that's simply the way it is.

Jan
Wei Liu Nov. 18, 2019, 1:02 p.m. UTC | #4
On Mon, Nov 18, 2019 at 10:50:47AM +0100, Jan Beulich wrote:
> On 15.11.2019 18:16, Wei Liu wrote:
> > On Fri, Nov 15, 2019 at 04:23:30PM +0100, Jan Beulich wrote:
> >> On 02.10.2019 19:16, Hongyan Xia wrote:
> >>> @@ -4847,22 +4848,50 @@ int mmcfg_intercept_write(
> >>>  }
> >>>  
> >>>  void *alloc_xen_pagetable(void)
> >>> +{
> >>> +    mfn_t mfn;
> >>> +
> >>> +    mfn = alloc_xen_pagetable_new();
> >>> +    ASSERT(!mfn_eq(mfn, INVALID_MFN));
> >>> +
> >>> +    return map_xen_pagetable_new(mfn);
> >>> +}
> >>> +
> >>> +void free_xen_pagetable(void *v)
> >>> +{
> >>> +    if ( system_state != SYS_STATE_early_boot )
> >>> +        free_xen_pagetable_new(virt_to_mfn(v));
> >>> +}
> >>> +
> >>> +mfn_t alloc_xen_pagetable_new(void)
> >>>  {
> >>>      if ( system_state != SYS_STATE_early_boot )
> >>>      {
> >>>          void *ptr = alloc_xenheap_page();
> >>>  
> >>>          BUG_ON(!hardware_domain && !ptr);
> >>> -        return ptr;
> >>> +        return virt_to_mfn(ptr);
> >>>      }
> >>>  
> >>> -    return mfn_to_virt(mfn_x(alloc_boot_pages(1, 1)));
> >>> +    return alloc_boot_pages(1, 1);
> >>>  }
> >>>  
> >>> -void free_xen_pagetable(void *v)
> >>> +void *map_xen_pagetable_new(mfn_t mfn)
> >>
> >> There's no need for the map/unmap functions to have a _new
> >> suffix, is there?
> >>
> > 
> > It is more consistent.
> 
> But will require touching all callers again when the _new suffixes
> get dropped.

Yes but that's just a mechanical change that's very easy to review, so I
didn't think that's a big deal.

> 
> >>>  {
> >>> -    if ( system_state != SYS_STATE_early_boot )
> >>> -        free_xenheap_page(v);
> >>> +    return mfn_to_virt(mfn_x(mfn));
> >>> +}
> >>> +
> > [...]
> >>
> >>> +{
> >>> +    /* XXX still using xenheap page, no need to do anything.  */
> >>
> >> I wonder if it wouldn't be a good idea to at least have some
> >> leak detection here temporarily, such that we have a chance of
> >> noticing paths not properly doing the necessary unmapping.
> >>
> >> The again a question is why you introduce such a map/unmap pair
> >> in the first place. This is going to be a thin wrapper around
> >> {,un}map_domain_page() in the end anyway, and hence callers
> >> could as well be switched to calling those function directly,
> >> as they're no-ops on Xen heap pages.
> > 
> > 
> > All roads lead to Rome, but some roads are easier than others.  Having a
> > set of APIs to deal with page tables make the code easier to follow IMO.
> > 
> > And we can potentially do more stuff in this function, for example, make
> > the unmap function test if the page is really a page table to avoid
> > misuse; or like you said, have some leak detection check there.
> > 
> > Also, please consider there are dozens of patches that are built on top
> > of this set of new APIs.  Having to rewrite them just for mechanical
> > changes is not fun for Hongyan. I would suggest we be more pragmatic
> > here.
> 
> Whether to use separate functions depends - as you say - on the
> longer term plans. If there's a good reasons to have these separate
> (and that reason is stated in the description), then yes, I'll be
> fine with having them. But introducing them just for the sake of
> doing so isn't appropriate imo.
> 
> As to dozens of patches on top - I'm sorry to say it this bluntly,
> but that's the risk anyone takes when compiling large series
> without sufficient up front agreement. I've too been suffering from
> such a penalty in a few cases; that's simply the way it is.

The first paragraph illustrates why it is difficult to get sufficient
agreement up front. There will always be some changes that need
weighting benefits against long term and short term goal.  There will
always be differences in opinions in what is worthwhile or not.

Also, it is often said that a decision can't be made until patches are
written and posted, hence everyone has a significant risk if he/she
wants to work on something complex. You're well aware of this given the
second paragraph.

You've been on both sides of this. I'm sure you don't think that sort of
experience is nice. I just want to say, things don't have to be that
way.

My high-level modus operandi has been:

 * If a patch (series) achieves no apparent short term or long term
   goal, or it achieves one but actively works against the other, it
   should be rejected.
 * If a patch (series) achieves one of the two, also doesn't hinder
   further progress of the other, it should be accepted.
 * If a patch (series) achieves both at the same time, it should be
   accepted with open arms.

This applies regardless of how _I_ think things should be. If I have
very strong opinions, I will of course express them appropriately. But
if it is things are only internal to a component, I will be more relaxed
and let the contributors take the driver's seat. Sometimes I even
actively look beyond what is said in the commit message for positive
impact of the code that the patch author is not aware of.

I understand everyone has their own working style. That's fine. But
please consider what I said above. If that looks workable to you, it can
potentially make your life easier both as a reviewer and a
contributor. ;-)

Wei.

> 
> Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 99816fc67c..88a15ecdf2 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -119,6 +119,7 @@ 
 #include <xen/efi.h>
 #include <xen/grant_table.h>
 #include <xen/hypercall.h>
+#include <xen/mm.h>
 #include <asm/paging.h>
 #include <asm/shadow.h>
 #include <asm/page.h>
@@ -4847,22 +4848,50 @@  int mmcfg_intercept_write(
 }
 
 void *alloc_xen_pagetable(void)
+{
+    mfn_t mfn;
+
+    mfn = alloc_xen_pagetable_new();
+    ASSERT(!mfn_eq(mfn, INVALID_MFN));
+
+    return map_xen_pagetable_new(mfn);
+}
+
+void free_xen_pagetable(void *v)
+{
+    if ( system_state != SYS_STATE_early_boot )
+        free_xen_pagetable_new(virt_to_mfn(v));
+}
+
+mfn_t alloc_xen_pagetable_new(void)
 {
     if ( system_state != SYS_STATE_early_boot )
     {
         void *ptr = alloc_xenheap_page();
 
         BUG_ON(!hardware_domain && !ptr);
-        return ptr;
+        return virt_to_mfn(ptr);
     }
 
-    return mfn_to_virt(mfn_x(alloc_boot_pages(1, 1)));
+    return alloc_boot_pages(1, 1);
 }
 
-void free_xen_pagetable(void *v)
+void *map_xen_pagetable_new(mfn_t mfn)
 {
-    if ( system_state != SYS_STATE_early_boot )
-        free_xenheap_page(v);
+    return mfn_to_virt(mfn_x(mfn));
+}
+
+/* v can point to an entry within a table or be NULL */
+void unmap_xen_pagetable_new(void *v)
+{
+    /* XXX still using xenheap page, no need to do anything.  */
+}
+
+/* mfn can be INVALID_MFN */
+void free_xen_pagetable_new(mfn_t mfn)
+{
+    if ( system_state != SYS_STATE_early_boot && !mfn_eq(mfn, INVALID_MFN) )
+        free_xenheap_page(mfn_to_virt(mfn_x(mfn)));
 }
 
 static DEFINE_SPINLOCK(map_pgdir_lock);
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 2800106327..80173eb4c3 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -633,6 +633,17 @@  int arch_acquire_resource(struct domain *d, unsigned int type,
 /* Allocator functions for Xen pagetables. */
 void *alloc_xen_pagetable(void);
 void free_xen_pagetable(void *v);
+mfn_t alloc_xen_pagetable_new(void);
+void *map_xen_pagetable_new(mfn_t mfn);
+void unmap_xen_pagetable_new(void *v);
+void free_xen_pagetable_new(mfn_t mfn);
+
+#define UNMAP_XEN_PAGETABLE_NEW(ptr)    \
+    do {                                \
+        unmap_xen_pagetable_new((ptr)); \
+        (ptr) = NULL;                   \
+    } while (0)
+
 l1_pgentry_t *virt_to_xen_l1e(unsigned long v);
 
 #endif /* __ASM_X86_MM_H__ */