diff mbox series

[v2,5/7] xen: add capability to remap non-RAM pages to different PFNs

Message ID 20240820082012.31316-6-jgross@suse.com (mailing list archive)
State Superseded
Headers show
Series xen: fix dom0 PV boot on some AMD machines | expand

Commit Message

Juergen Gross Aug. 20, 2024, 8:20 a.m. UTC
When running as a Xen PV dom0 it can happen that the kernel is being
loaded to a guest physical address conflicting with the host memory
map.

In order to be able to resolve this conflict, add the capability to
remap non-RAM areas to different guest PFNs. A function to use this
remapping information for other purposes than doing the remap will be
added when needed.

As the number of conflicts should be rather low (currently only
machines with max. 1 conflict are known), save the remap data in a
small statically allocated array.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
V2:
- split off from patch 5 of V1 of the series
- moved to p2m.c
---
 arch/x86/xen/p2m.c     | 65 ++++++++++++++++++++++++++++++++++++++++++
 arch/x86/xen/xen-ops.h |  3 ++
 2 files changed, 68 insertions(+)

Comments

Jan Beulich Aug. 20, 2024, 9:38 a.m. UTC | #1
On 20.08.2024 10:20, Juergen Gross wrote:
> When running as a Xen PV dom0 it can happen that the kernel is being
> loaded to a guest physical address conflicting with the host memory
> map.
> 
> In order to be able to resolve this conflict, add the capability to
> remap non-RAM areas to different guest PFNs. A function to use this
> remapping information for other purposes than doing the remap will be
> added when needed.
> 
> As the number of conflicts should be rather low (currently only
> machines with max. 1 conflict are known), save the remap data in a
> small statically allocated array.
> 
> Signed-off-by: Juergen Gross <jgross@suse.com>
> ---
> V2:
> - split off from patch 5 of V1 of the series
> - moved to p2m.c
> ---
>  arch/x86/xen/p2m.c     | 65 ++++++++++++++++++++++++++++++++++++++++++
>  arch/x86/xen/xen-ops.h |  3 ++
>  2 files changed, 68 insertions(+)
> 
> diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
> index 7c735b730acd..bb55e0fe1a04 100644
> --- a/arch/x86/xen/p2m.c
> +++ b/arch/x86/xen/p2m.c
> @@ -80,6 +80,7 @@
>  #include <asm/xen/hypervisor.h>
>  #include <xen/balloon.h>
>  #include <xen/grant_table.h>
> +#include <xen/hvc-console.h>
>  
>  #include "xen-ops.h"
>  
> @@ -792,6 +793,70 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
>  	return ret;
>  }
>  
> +/* Remapped non-RAM areas */
> +#define NR_NONRAM_REMAP 4
> +static struct nonram_remap {
> +	phys_addr_t maddr;
> +	phys_addr_t paddr;
> +	unsigned long size;

size_t?

> +} xen_nonram_remap[NR_NONRAM_REMAP];
> +static unsigned int nr_nonram_remap;

Both __initdata? Or, considering patch 6, at least __ro_after_init?

> +/*
> + * Do the real remapping of non-RAM regions as specified in the
> + * xen_nonram_remap[] array.
> + * In case of an error just crash the system.
> + */
> +void __init xen_do_remap_nonram(void)
> +{
> +	unsigned int i;
> +	unsigned int remapped = 0;
> +	struct nonram_remap *remap = xen_nonram_remap;
> +	unsigned long pfn, mfn, len;
> +
> +	if (!nr_nonram_remap)
> +		return;
> +
> +	for (i = 0; i < nr_nonram_remap; i++) {
> +		pfn = PFN_DOWN(remap->paddr);
> +		mfn = PFN_DOWN(remap->maddr);
> +		for (len = 0; len < remap->size; len += PAGE_SIZE) {
> +			if (!set_phys_to_machine(pfn, mfn)) {
> +				pr_err("Failed to set p2m mapping for pfn=%ld mfn=%ld\n",

I'm not convinced that frame numbers logged in decimal are overly useful.

> +				       pfn, mfn);
> +				BUG();
> +			}
> +
> +			pfn++;
> +			mfn++;
> +			remapped++;
> +		}
> +
> +		remap++;
> +	}
> +
> +	pr_info("Remapped %u non-RAM page(s)\n", remapped);

This message may be useful in a log also when nothing was remapped - maybe
drop the initial if()?

> +}
> +
> +/*
> + * Add a new non-RAM remap entry.
> + * In case of no free entry found, just crash the system.
> + */
> +void __init xen_add_remap_nonram(phys_addr_t maddr, phys_addr_t paddr,
> +				 unsigned long size)
> +{
> +	if (nr_nonram_remap == NR_NONRAM_REMAP) {
> +		xen_raw_console_write("Number of required E820 entry remapping actions exceed maximum value\n");
> +		BUG();
> +	}
> +
> +	xen_nonram_remap[nr_nonram_remap].maddr = maddr;
> +	xen_nonram_remap[nr_nonram_remap].paddr = paddr;
> +	xen_nonram_remap[nr_nonram_remap].size = size;
> +
> +	nr_nonram_remap++;
> +}

You don't enforce any constraints on the addresses / size here. With
this the loop in xen_do_remap_nonram() may terminate too early if non-
page-aligned values were passed into here. Both addresses not having
the same offset-into-page may also end up anomalous. Might be worth
switching to frame numbers / number-of-pages for the tracking struct.

Jan
Juergen Gross Sept. 10, 2024, 7:59 a.m. UTC | #2
On 20.08.24 11:38, Jan Beulich wrote:
> On 20.08.2024 10:20, Juergen Gross wrote:
>> When running as a Xen PV dom0 it can happen that the kernel is being
>> loaded to a guest physical address conflicting with the host memory
>> map.
>>
>> In order to be able to resolve this conflict, add the capability to
>> remap non-RAM areas to different guest PFNs. A function to use this
>> remapping information for other purposes than doing the remap will be
>> added when needed.
>>
>> As the number of conflicts should be rather low (currently only
>> machines with max. 1 conflict are known), save the remap data in a
>> small statically allocated array.
>>
>> Signed-off-by: Juergen Gross <jgross@suse.com>
>> ---
>> V2:
>> - split off from patch 5 of V1 of the series
>> - moved to p2m.c
>> ---
>>   arch/x86/xen/p2m.c     | 65 ++++++++++++++++++++++++++++++++++++++++++
>>   arch/x86/xen/xen-ops.h |  3 ++
>>   2 files changed, 68 insertions(+)
>>
>> diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
>> index 7c735b730acd..bb55e0fe1a04 100644
>> --- a/arch/x86/xen/p2m.c
>> +++ b/arch/x86/xen/p2m.c
>> @@ -80,6 +80,7 @@
>>   #include <asm/xen/hypervisor.h>
>>   #include <xen/balloon.h>
>>   #include <xen/grant_table.h>
>> +#include <xen/hvc-console.h>
>>   
>>   #include "xen-ops.h"
>>   
>> @@ -792,6 +793,70 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
>>   	return ret;
>>   }
>>   
>> +/* Remapped non-RAM areas */
>> +#define NR_NONRAM_REMAP 4
>> +static struct nonram_remap {
>> +	phys_addr_t maddr;
>> +	phys_addr_t paddr;
>> +	unsigned long size;
> 
> size_t?

Fine with me.

> 
>> +} xen_nonram_remap[NR_NONRAM_REMAP];
>> +static unsigned int nr_nonram_remap;
> 
> Both __initdata? Or, considering patch 6, at least __ro_after_init?

__ro_after_init should be fine.

> 
>> +/*
>> + * Do the real remapping of non-RAM regions as specified in the
>> + * xen_nonram_remap[] array.
>> + * In case of an error just crash the system.
>> + */
>> +void __init xen_do_remap_nonram(void)
>> +{
>> +	unsigned int i;
>> +	unsigned int remapped = 0;
>> +	struct nonram_remap *remap = xen_nonram_remap;
>> +	unsigned long pfn, mfn, len;
>> +
>> +	if (!nr_nonram_remap)
>> +		return;
>> +
>> +	for (i = 0; i < nr_nonram_remap; i++) {
>> +		pfn = PFN_DOWN(remap->paddr);
>> +		mfn = PFN_DOWN(remap->maddr);
>> +		for (len = 0; len < remap->size; len += PAGE_SIZE) {
>> +			if (!set_phys_to_machine(pfn, mfn)) {
>> +				pr_err("Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
> 
> I'm not convinced that frame numbers logged in decimal are overly useful.

I agree. Will switch to hex.

> 
>> +				       pfn, mfn);
>> +				BUG();
>> +			}
>> +
>> +			pfn++;
>> +			mfn++;
>> +			remapped++;
>> +		}
>> +
>> +		remap++;
>> +	}
>> +
>> +	pr_info("Remapped %u non-RAM page(s)\n", remapped);
> 
> This message may be useful in a log also when nothing was remapped - maybe
> drop the initial if()?

Fine with me.

> 
>> +}
>> +
>> +/*
>> + * Add a new non-RAM remap entry.
>> + * In case of no free entry found, just crash the system.
>> + */
>> +void __init xen_add_remap_nonram(phys_addr_t maddr, phys_addr_t paddr,
>> +				 unsigned long size)
>> +{
>> +	if (nr_nonram_remap == NR_NONRAM_REMAP) {
>> +		xen_raw_console_write("Number of required E820 entry remapping actions exceed maximum value\n");
>> +		BUG();
>> +	}
>> +
>> +	xen_nonram_remap[nr_nonram_remap].maddr = maddr;
>> +	xen_nonram_remap[nr_nonram_remap].paddr = paddr;
>> +	xen_nonram_remap[nr_nonram_remap].size = size;
>> +
>> +	nr_nonram_remap++;
>> +}
> 
> You don't enforce any constraints on the addresses / size here. With
> this the loop in xen_do_remap_nonram() may terminate too early if non-
> page-aligned values were passed into here. Both addresses not having
> the same offset-into-page may also end up anomalous. Might be worth
> switching to frame numbers / number-of-pages for the tracking struct.

Hmm, I'd like to at least WARN() in case someone tries to access a remapped
area out of bounds, which requires to keep the original non-page-aligned
values.

I'll fix the xen_do_remap_nonram() loop.


Juergen
diff mbox series

Patch

diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 7c735b730acd..bb55e0fe1a04 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -80,6 +80,7 @@ 
 #include <asm/xen/hypervisor.h>
 #include <xen/balloon.h>
 #include <xen/grant_table.h>
+#include <xen/hvc-console.h>
 
 #include "xen-ops.h"
 
@@ -792,6 +793,70 @@  int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
 	return ret;
 }
 
+/* Remapped non-RAM areas */
+#define NR_NONRAM_REMAP 4
+static struct nonram_remap {
+	phys_addr_t maddr;
+	phys_addr_t paddr;
+	unsigned long size;
+} xen_nonram_remap[NR_NONRAM_REMAP];
+static unsigned int nr_nonram_remap;
+
+/*
+ * Do the real remapping of non-RAM regions as specified in the
+ * xen_nonram_remap[] array.
+ * In case of an error just crash the system.
+ */
+void __init xen_do_remap_nonram(void)
+{
+	unsigned int i;
+	unsigned int remapped = 0;
+	struct nonram_remap *remap = xen_nonram_remap;
+	unsigned long pfn, mfn, len;
+
+	if (!nr_nonram_remap)
+		return;
+
+	for (i = 0; i < nr_nonram_remap; i++) {
+		pfn = PFN_DOWN(remap->paddr);
+		mfn = PFN_DOWN(remap->maddr);
+		for (len = 0; len < remap->size; len += PAGE_SIZE) {
+			if (!set_phys_to_machine(pfn, mfn)) {
+				pr_err("Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
+				       pfn, mfn);
+				BUG();
+			}
+
+			pfn++;
+			mfn++;
+			remapped++;
+		}
+
+		remap++;
+	}
+
+	pr_info("Remapped %u non-RAM page(s)\n", remapped);
+}
+
+/*
+ * Add a new non-RAM remap entry.
+ * In case of no free entry found, just crash the system.
+ */
+void __init xen_add_remap_nonram(phys_addr_t maddr, phys_addr_t paddr,
+				 unsigned long size)
+{
+	if (nr_nonram_remap == NR_NONRAM_REMAP) {
+		xen_raw_console_write("Number of required E820 entry remapping actions exceed maximum value\n");
+		BUG();
+	}
+
+	xen_nonram_remap[nr_nonram_remap].maddr = maddr;
+	xen_nonram_remap[nr_nonram_remap].paddr = paddr;
+	xen_nonram_remap[nr_nonram_remap].size = size;
+
+	nr_nonram_remap++;
+}
+
 #ifdef CONFIG_XEN_DEBUG_FS
 #include <linux/debugfs.h>
 static int p2m_dump_show(struct seq_file *m, void *v)
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9a27d1d653d3..e1b782e823e6 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -47,6 +47,9 @@  void xen_mm_unpin_all(void);
 #ifdef CONFIG_X86_64
 void __init xen_relocate_p2m(void);
 #endif
+void __init xen_do_remap_nonram(void);
+void __init xen_add_remap_nonram(phys_addr_t maddr, phys_addr_t paddr,
+				 unsigned long size);
 
 void __init xen_chk_is_e820_usable(phys_addr_t start, phys_addr_t size,
 				   const char *component);