Message ID | 20240513134046.82605-10-eliasely@amazon.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Remove the directmap | expand |
On Mon, May 13, 2024 at 01:40:36PM +0000, Elias El Yandouzi wrote: > From: Hongyan Xia <hongyxia@amazon.com> > > When mfn is not in direct map, never use mfn_to_virt for any mappings. > > We replace mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) with > arch_mfns_in_direct_map(mfn, 1) because these two are equivalent. The > extra comparison in arch_mfns_in_direct_map() looks different but because > DIRECTMAP_VIRT_END is always higher, it does not make any difference. > > Lastly, domain_page_map_to_mfn() needs to gain to a special case for > the PMAP. > > Signed-off-by: Hongyan Xia <hongyxia@amazon.com> > Signed-off-by: Julien Grall <jgrall@amazon.com> > > ---- > > Changes since Hongyan's version: > * arch_mfn_in_direct_map() was renamed to arch_mfns_in_directmap() > * add a special case for the PMAP in domain_page_map_to_mfn() > > diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c > index 55e337aaf7..89caefc8a2 100644 > --- a/xen/arch/x86/domain_page.c > +++ b/xen/arch/x86/domain_page.c > @@ -14,8 +14,10 @@ > #include <xen/sched.h> > #include <xen/vmap.h> > #include <asm/current.h> > +#include <asm/fixmap.h> > #include <asm/flushtlb.h> > #include <asm/hardirq.h> > +#include <asm/pmap.h> > #include <asm/setup.h> > > static DEFINE_PER_CPU(struct vcpu *, override); > @@ -35,10 +37,11 @@ static inline struct vcpu *mapcache_current_vcpu(void) > /* > * When using efi runtime page tables, we have the equivalent of the idle > * domain's page tables but current may point at another domain's VCPU. > - * Return NULL as though current is not properly set up yet. > + * Return the idle domains's vcpu on that core because the efi per-domain > + * region (where the mapcache is) is in-sync with the idle domain. > */ > if ( efi_rs_using_pgtables() ) > - return NULL; > + return idle_vcpu[smp_processor_id()]; There's already an existing instance of idle_vcpu[smp_processor_id()] down in the function, it might make sense to put this in a local variable. > > /* > * If guest_table is NULL, and we are running a paravirtualised guest, > @@ -77,18 +80,24 @@ void *map_domain_page(mfn_t mfn) > struct vcpu_maphash_entry *hashent; > > #ifdef NDEBUG > - if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) > + if ( arch_mfns_in_directmap(mfn_x(mfn), 1) ) > return mfn_to_virt(mfn_x(mfn)); > #endif > > v = mapcache_current_vcpu(); > - if ( !v ) > - return mfn_to_virt(mfn_x(mfn)); > + if ( !v || !v->domain->arch.mapcache.inuse ) > + { > + if ( arch_mfns_in_directmap(mfn_x(mfn), 1) ) > + return mfn_to_virt(mfn_x(mfn)); > + else > + { > + BUG_ON(system_state >= SYS_STATE_smp_boot); > + return pmap_map(mfn); > + } > + } > > dcache = &v->domain->arch.mapcache; > vcache = &v->arch.mapcache; > - if ( !dcache->inuse ) > - return mfn_to_virt(mfn_x(mfn)); > > perfc_incr(map_domain_page_count); > > @@ -184,6 +193,12 @@ void unmap_domain_page(const void *ptr) > if ( !va || va >= DIRECTMAP_VIRT_START ) > return; > > + if ( va >= FIXADDR_START && va < FIXADDR_TOP ) This should be a fixmap helper IMO. virt_is_fixmap(addr) or similar. There's already an existing instance in virt_to_fix(). > + { > + pmap_unmap((void *)ptr); > + return; > + } > + > ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END); > > v = mapcache_current_vcpu(); > @@ -237,7 +252,7 @@ int mapcache_domain_init(struct domain *d) > unsigned int bitmap_pages; > > #ifdef NDEBUG > - if ( !mem_hotplug && max_page <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) > + if ( !mem_hotplug && arch_mfn_in_directmap(0, max_page) ) > return 0; > #endif > > @@ -308,7 +323,7 @@ void *map_domain_page_global(mfn_t mfn) > local_irq_is_enabled())); > > #ifdef NDEBUG > - if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) > + if ( arch_mfn_in_directmap(mfn_x(mfn, 1)) ) > return mfn_to_virt(mfn_x(mfn)); > #endif > > @@ -335,6 +350,23 @@ mfn_t domain_page_map_to_mfn(const void *ptr) > if ( va >= DIRECTMAP_VIRT_START ) > return _mfn(virt_to_mfn(ptr)); > > + /* > + * The fixmap is stealing the top-end of the VMAP. So the check for > + * the PMAP *must* happen first. > + * > + * Also, the fixmap translate a slot to an address backwards. The > + * logic will rely on it to avoid any complexity. So check at > + * compile time this will always hold. > + */ > + BUILD_BUG_ON(fix_to_virt(FIX_PMAP_BEGIN) < fix_to_virt(FIX_PMAP_END)); > + > + if ( ((unsigned long)fix_to_virt(FIX_PMAP_END) <= va) && > + ((va & PAGE_MASK) <= (unsigned long)fix_to_virt(FIX_PMAP_BEGIN)) ) > + { Can we place this as some kind of helper in fixmap.h? It's already quite ugly, and could be useful in other places. bool virt_in_fixmap_range(addr, start idx, end idx) Or something similar. Thanks, Roger.
On 13.05.2024 15:40, Elias El Yandouzi wrote: > @@ -77,18 +80,24 @@ void *map_domain_page(mfn_t mfn) > struct vcpu_maphash_entry *hashent; > > #ifdef NDEBUG > - if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) > + if ( arch_mfns_in_directmap(mfn_x(mfn), 1) ) > return mfn_to_virt(mfn_x(mfn)); > #endif > > v = mapcache_current_vcpu(); > - if ( !v ) > - return mfn_to_virt(mfn_x(mfn)); > + if ( !v || !v->domain->arch.mapcache.inuse ) > + { > + if ( arch_mfns_in_directmap(mfn_x(mfn), 1) ) > + return mfn_to_virt(mfn_x(mfn)); > + else > + { > + BUG_ON(system_state >= SYS_STATE_smp_boot); > + return pmap_map(mfn); > + } > + } > > dcache = &v->domain->arch.mapcache; > vcache = &v->arch.mapcache; > - if ( !dcache->inuse ) > - return mfn_to_virt(mfn_x(mfn)); Is this case (the logic for which you move up) actually possible? I.e. can we observe a domain here which hasn't made it through mapcache_domain_init() (where ->inuse is set)? > @@ -184,6 +193,12 @@ void unmap_domain_page(const void *ptr) > if ( !va || va >= DIRECTMAP_VIRT_START ) > return; > > + if ( va >= FIXADDR_START && va < FIXADDR_TOP ) > + { > + pmap_unmap((void *)ptr); Misra is going to object to this casting away of const. It's rather pmap_unmap() which wants changing, to accept a pointer-to-const. > @@ -335,6 +350,23 @@ mfn_t domain_page_map_to_mfn(const void *ptr) > if ( va >= DIRECTMAP_VIRT_START ) > return _mfn(virt_to_mfn(ptr)); > > + /* > + * The fixmap is stealing the top-end of the VMAP. So the check for > + * the PMAP *must* happen first. Not really. You could also ... > + * Also, the fixmap translate a slot to an address backwards. The > + * logic will rely on it to avoid any complexity. So check at > + * compile time this will always hold. > + */ > + BUILD_BUG_ON(fix_to_virt(FIX_PMAP_BEGIN) < fix_to_virt(FIX_PMAP_END)); > + > + if ( ((unsigned long)fix_to_virt(FIX_PMAP_END) <= va) && > + ((va & PAGE_MASK) <= (unsigned long)fix_to_virt(FIX_PMAP_BEGIN)) ) > + { > + BUG_ON(system_state >= SYS_STATE_smp_boot); > + return l1e_get_mfn(l1_fixmap[l1_table_offset(va)]); > + } > + > if ( va >= VMAP_VIRT_START && va < VMAP_VIRT_END ) ... put it into the body of this if() then. Which might be preferable to keep the non-global-mapping case straight / quick. Jan > return vmap_to_mfn(va); >
diff --git a/xen/arch/x86/domain_page.c b/xen/arch/x86/domain_page.c index 55e337aaf7..89caefc8a2 100644 --- a/xen/arch/x86/domain_page.c +++ b/xen/arch/x86/domain_page.c @@ -14,8 +14,10 @@ #include <xen/sched.h> #include <xen/vmap.h> #include <asm/current.h> +#include <asm/fixmap.h> #include <asm/flushtlb.h> #include <asm/hardirq.h> +#include <asm/pmap.h> #include <asm/setup.h> static DEFINE_PER_CPU(struct vcpu *, override); @@ -35,10 +37,11 @@ static inline struct vcpu *mapcache_current_vcpu(void) /* * When using efi runtime page tables, we have the equivalent of the idle * domain's page tables but current may point at another domain's VCPU. - * Return NULL as though current is not properly set up yet. + * Return the idle domains's vcpu on that core because the efi per-domain + * region (where the mapcache is) is in-sync with the idle domain. */ if ( efi_rs_using_pgtables() ) - return NULL; + return idle_vcpu[smp_processor_id()]; /* * If guest_table is NULL, and we are running a paravirtualised guest, @@ -77,18 +80,24 @@ void *map_domain_page(mfn_t mfn) struct vcpu_maphash_entry *hashent; #ifdef NDEBUG - if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) + if ( arch_mfns_in_directmap(mfn_x(mfn), 1) ) return mfn_to_virt(mfn_x(mfn)); #endif v = mapcache_current_vcpu(); - if ( !v ) - return mfn_to_virt(mfn_x(mfn)); + if ( !v || !v->domain->arch.mapcache.inuse ) + { + if ( arch_mfns_in_directmap(mfn_x(mfn), 1) ) + return mfn_to_virt(mfn_x(mfn)); + else + { + BUG_ON(system_state >= SYS_STATE_smp_boot); + return pmap_map(mfn); + } + } dcache = &v->domain->arch.mapcache; vcache = &v->arch.mapcache; - if ( !dcache->inuse ) - return mfn_to_virt(mfn_x(mfn)); perfc_incr(map_domain_page_count); @@ -184,6 +193,12 @@ void unmap_domain_page(const void *ptr) if ( !va || va >= DIRECTMAP_VIRT_START ) return; + if ( va >= FIXADDR_START && va < FIXADDR_TOP ) + { + pmap_unmap((void *)ptr); + return; + } + ASSERT(va >= MAPCACHE_VIRT_START && va < MAPCACHE_VIRT_END); v = mapcache_current_vcpu(); @@ -237,7 +252,7 @@ int mapcache_domain_init(struct domain *d) unsigned int bitmap_pages; #ifdef NDEBUG - if ( !mem_hotplug && max_page <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) + if ( !mem_hotplug && arch_mfn_in_directmap(0, max_page) ) return 0; #endif @@ -308,7 +323,7 @@ void *map_domain_page_global(mfn_t mfn) local_irq_is_enabled())); #ifdef NDEBUG - if ( mfn_x(mfn) <= PFN_DOWN(__pa(HYPERVISOR_VIRT_END - 1)) ) + if ( arch_mfn_in_directmap(mfn_x(mfn, 1)) ) return mfn_to_virt(mfn_x(mfn)); #endif @@ -335,6 +350,23 @@ mfn_t domain_page_map_to_mfn(const void *ptr) if ( va >= DIRECTMAP_VIRT_START ) return _mfn(virt_to_mfn(ptr)); + /* + * The fixmap is stealing the top-end of the VMAP. So the check for + * the PMAP *must* happen first. + * + * Also, the fixmap translate a slot to an address backwards. The + * logic will rely on it to avoid any complexity. So check at + * compile time this will always hold. + */ + BUILD_BUG_ON(fix_to_virt(FIX_PMAP_BEGIN) < fix_to_virt(FIX_PMAP_END)); + + if ( ((unsigned long)fix_to_virt(FIX_PMAP_END) <= va) && + ((va & PAGE_MASK) <= (unsigned long)fix_to_virt(FIX_PMAP_BEGIN)) ) + { + BUG_ON(system_state >= SYS_STATE_smp_boot); + return l1e_get_mfn(l1_fixmap[l1_table_offset(va)]); + } + if ( va >= VMAP_VIRT_START && va < VMAP_VIRT_END ) return vmap_to_mfn(va);