Message ID | 20240910103932.7634-6-jgross@suse.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | xen: fix dom0 PV boot on some AMD machines | expand |
On 10.09.2024 12:39, Juergen Gross wrote: > When running as a Xen PV dom0 it can happen that the kernel is being > loaded to a guest physical address conflicting with the host memory > map. > > In order to be able to resolve this conflict, add the capability to > remap non-RAM areas to different guest PFNs. A function to use this > remapping information for other purposes than doing the remap will be > added when needed. > > As the number of conflicts should be rather low (currently only > machines with max. 1 conflict are known), save the remap data in a > small statically allocated array. > > Signed-off-by: Juergen Gross <jgross@suse.com> Reviewed-by: Jan Beulich <jbeulich@suse.com> with two cosmetic remarks: > @@ -792,6 +793,70 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, > return ret; > } > > +/* Remapped non-RAM areas */ > +#define NR_NONRAM_REMAP 4 > +static struct nonram_remap { > + phys_addr_t maddr; > + phys_addr_t paddr; > + size_t size; > +} xen_nonram_remap[NR_NONRAM_REMAP] __ro_after_init; > +static unsigned int nr_nonram_remap __ro_after_init; I take it this is the canonical placement of section attributes in the kernel? (In Xen I'd ask for the attributes to be moved ahead of the identifiers being declared.) > +/* > + * Do the real remapping of non-RAM regions as specified in the > + * xen_nonram_remap[] array. > + * In case of an error just crash the system. > + */ > +void __init xen_do_remap_nonram(void) > +{ > + unsigned int i; > + unsigned int remapped = 0; > + const struct nonram_remap *remap = xen_nonram_remap; > + unsigned long pfn, mfn, end_pfn; > + > + for (i = 0; i < nr_nonram_remap; i++) { > + end_pfn = PFN_UP(remap->paddr + remap->size); > + pfn = PFN_DOWN(remap->paddr); > + mfn = PFN_DOWN(remap->maddr); > + while (pfn < end_pfn) { > + if (!set_phys_to_machine(pfn, mfn)) { > + pr_err("Failed to set p2m mapping for pfn=%lx mfn=%lx\n", > + pfn, mfn); > + BUG(); Wouldn't panic() get you both in one operation? Or do you expect the call trace / register state to be of immediate relevance for analysis? Jan
On 10.09.24 14:26, Jan Beulich wrote: > On 10.09.2024 12:39, Juergen Gross wrote: >> When running as a Xen PV dom0 it can happen that the kernel is being >> loaded to a guest physical address conflicting with the host memory >> map. >> >> In order to be able to resolve this conflict, add the capability to >> remap non-RAM areas to different guest PFNs. A function to use this >> remapping information for other purposes than doing the remap will be >> added when needed. >> >> As the number of conflicts should be rather low (currently only >> machines with max. 1 conflict are known), save the remap data in a >> small statically allocated array. >> >> Signed-off-by: Juergen Gross <jgross@suse.com> > > Reviewed-by: Jan Beulich <jbeulich@suse.com> > with two cosmetic remarks: > >> @@ -792,6 +793,70 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, >> return ret; >> } >> >> +/* Remapped non-RAM areas */ >> +#define NR_NONRAM_REMAP 4 >> +static struct nonram_remap { >> + phys_addr_t maddr; >> + phys_addr_t paddr; >> + size_t size; >> +} xen_nonram_remap[NR_NONRAM_REMAP] __ro_after_init; >> +static unsigned int nr_nonram_remap __ro_after_init; > > I take it this is the canonical placement of section attributes in the > kernel? (In Xen I'd ask for the attributes to be moved ahead of the > identifiers being declared.) I didn't find an explicit mentioning in the coding style, but most examples I've found place the section attribute after the name of the variable. > >> +/* >> + * Do the real remapping of non-RAM regions as specified in the >> + * xen_nonram_remap[] array. >> + * In case of an error just crash the system. >> + */ >> +void __init xen_do_remap_nonram(void) >> +{ >> + unsigned int i; >> + unsigned int remapped = 0; >> + const struct nonram_remap *remap = xen_nonram_remap; >> + unsigned long pfn, mfn, end_pfn; >> + >> + for (i = 0; i < nr_nonram_remap; i++) { >> + end_pfn = PFN_UP(remap->paddr + remap->size); >> + pfn = PFN_DOWN(remap->paddr); >> + mfn = PFN_DOWN(remap->maddr); >> + while (pfn < end_pfn) { >> + if (!set_phys_to_machine(pfn, mfn)) { >> + pr_err("Failed to set p2m mapping for pfn=%lx mfn=%lx\n", >> + pfn, mfn); >> + BUG(); > > Wouldn't panic() get you both in one operation? Or do you expect the call > trace / register state to be of immediate relevance for analysis? You are right, in this case panic() is a better option. Juergen
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 7c735b730acd..5b2aeae6f9e4 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -80,6 +80,7 @@ #include <asm/xen/hypervisor.h> #include <xen/balloon.h> #include <xen/grant_table.h> +#include <xen/hvc-console.h> #include "xen-ops.h" @@ -792,6 +793,70 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, return ret; } +/* Remapped non-RAM areas */ +#define NR_NONRAM_REMAP 4 +static struct nonram_remap { + phys_addr_t maddr; + phys_addr_t paddr; + size_t size; +} xen_nonram_remap[NR_NONRAM_REMAP] __ro_after_init; +static unsigned int nr_nonram_remap __ro_after_init; + +/* + * Do the real remapping of non-RAM regions as specified in the + * xen_nonram_remap[] array. + * In case of an error just crash the system. + */ +void __init xen_do_remap_nonram(void) +{ + unsigned int i; + unsigned int remapped = 0; + const struct nonram_remap *remap = xen_nonram_remap; + unsigned long pfn, mfn, end_pfn; + + for (i = 0; i < nr_nonram_remap; i++) { + end_pfn = PFN_UP(remap->paddr + remap->size); + pfn = PFN_DOWN(remap->paddr); + mfn = PFN_DOWN(remap->maddr); + while (pfn < end_pfn) { + if (!set_phys_to_machine(pfn, mfn)) { + pr_err("Failed to set p2m mapping for pfn=%lx mfn=%lx\n", + pfn, mfn); + BUG(); + } + + pfn++; + mfn++; + remapped++; + } + + remap++; + } + + pr_info("Remapped %u non-RAM page(s)\n", remapped); +} + +/* + * Add a new non-RAM remap entry. + * In case of no free entry found, just crash the system. + */ +void __init xen_add_remap_nonram(phys_addr_t maddr, phys_addr_t paddr, + unsigned long size) +{ + BUG_ON((maddr & ~PAGE_MASK) != (paddr & ~PAGE_MASK)); + + if (nr_nonram_remap == NR_NONRAM_REMAP) { + xen_raw_console_write("Number of required E820 entry remapping actions exceed maximum value\n"); + BUG(); + } + + xen_nonram_remap[nr_nonram_remap].maddr = maddr; + xen_nonram_remap[nr_nonram_remap].paddr = paddr; + xen_nonram_remap[nr_nonram_remap].size = size; + + nr_nonram_remap++; +} + #ifdef CONFIG_XEN_DEBUG_FS #include <linux/debugfs.h> static int p2m_dump_show(struct seq_file *m, void *v) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9a27d1d653d3..e1b782e823e6 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -47,6 +47,9 @@ void xen_mm_unpin_all(void); #ifdef CONFIG_X86_64 void __init xen_relocate_p2m(void); #endif +void __init xen_do_remap_nonram(void); +void __init xen_add_remap_nonram(phys_addr_t maddr, phys_addr_t paddr, + unsigned long size); void __init xen_chk_is_e820_usable(phys_addr_t start, phys_addr_t size, const char *component);