diff mbox series

[v5] x86/PVH: improve Dom0 memory size calculation

Message ID 5d675c6c-8cb1-f218-75d7-cb13f8b2e669@suse.com (mailing list archive)
State New, archived
Headers show
Series [v5] x86/PVH: improve Dom0 memory size calculation | expand

Commit Message

Jan Beulich Dec. 3, 2021, 10:27 a.m. UTC
Assuming that the accounting for IOMMU page tables will also take care
of the P2M needs was wrong: dom0_paging_pages() can determine a far
higher value, high enough for the system to run out of memory while
setting up Dom0. Hence in the case of shared page tables the larger of
the two values needs to be used (without shared page tables the sum of
both continues to be applicable).

To not further complicate the logic, eliminate the up-to-2-iteration
loop in favor of doing a few calculations twice (before and after
calling dom0_paging_pages()). While this will lead to slightly too high
a value in "cpu_pages", it is deemed better to account a few too many
than a few too little.

As a result the calculation is now deemed good enough to no longer
warrant the warning message, which therefore gets dropped.

Also uniformly use paging_mode_enabled(), not is_hvm_domain().

While there also account for two further aspects in the PV case: With
"iommu=dom0-passthrough" no IOMMU page tables would get allocated, so
none need accounting for. And if shadow mode is to be enabled (including
only potentially, because of "pv-l1tf=dom0"), setting aside a suitable
amount for the P2M pool to get populated is also necessary (i.e. similar
to the non-shared-page-tables case of PVH).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
v5: Fix the case where no command line (or built-in) option was given.
    Further re-work of what was done for v4. Also check
    opt_pv_l1tf_hwdom. Actually remove the warning message.
v4: Rework to eliminate the up-to-2-iteration loop.

Comments

Roger Pau Monné Dec. 14, 2021, 10:48 a.m. UTC | #1
On Fri, Dec 03, 2021 at 11:27:20AM +0100, Jan Beulich wrote:
> Assuming that the accounting for IOMMU page tables will also take care
> of the P2M needs was wrong: dom0_paging_pages() can determine a far
> higher value, high enough for the system to run out of memory while
> setting up Dom0. Hence in the case of shared page tables the larger of
> the two values needs to be used (without shared page tables the sum of
> both continues to be applicable).
> 
> To not further complicate the logic, eliminate the up-to-2-iteration
> loop in favor of doing a few calculations twice (before and after
> calling dom0_paging_pages()). While this will lead to slightly too high
> a value in "cpu_pages", it is deemed better to account a few too many
> than a few too little.
> 
> As a result the calculation is now deemed good enough to no longer
> warrant the warning message, which therefore gets dropped.
> 
> Also uniformly use paging_mode_enabled(), not is_hvm_domain().
> 
> While there also account for two further aspects in the PV case: With
> "iommu=dom0-passthrough" no IOMMU page tables would get allocated, so
> none need accounting for. And if shadow mode is to be enabled (including
> only potentially, because of "pv-l1tf=dom0"), setting aside a suitable
> amount for the P2M pool to get populated is also necessary (i.e. similar
> to the non-shared-page-tables case of PVH).
> 
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>

Thanks, Roger.
diff mbox series

Patch

--- a/xen/arch/x86/dom0_build.c
+++ b/xen/arch/x86/dom0_build.c
@@ -19,6 +19,7 @@ 
 #include <asm/io_apic.h>
 #include <asm/p2m.h>
 #include <asm/setup.h>
+#include <asm/spec_ctrl.h>
 
 struct memsize {
     long nr_pages;
@@ -321,12 +322,22 @@  unsigned long __init dom0_paging_pages(c
     return ((memkb + 1023) / 1024) << (20 - PAGE_SHIFT);
 }
 
+
+/*
+ * If allocation isn't specified, reserve 1/16th of available memory for
+ * things like DMA buffers. This reservation is clamped to a maximum of 128MB.
+ */
+static unsigned long __init default_nr_pages(unsigned long avail)
+{
+    return avail - (pv_shim ? pv_shim_mem(avail)
+                            : min(avail / 16, 128UL << (20 - PAGE_SHIFT)));
+}
+
 unsigned long __init dom0_compute_nr_pages(
     struct domain *d, struct elf_dom_parms *parms, unsigned long initrd_len)
 {
     nodeid_t node;
-    unsigned long avail = 0, nr_pages, min_pages, max_pages;
-    bool need_paging;
+    unsigned long avail = 0, nr_pages, min_pages, max_pages, iommu_pages = 0;
 
     /* The ordering of operands is to work around a clang5 issue. */
     if ( CONFIG_DOM0_MEM[0] && !dom0_mem_set )
@@ -344,53 +355,47 @@  unsigned long __init dom0_compute_nr_pag
         avail -= d->max_vcpus - 1;
 
     /* Reserve memory for iommu_dom0_init() (rough estimate). */
-    if ( is_iommu_enabled(d) )
+    if ( is_iommu_enabled(d) && !iommu_hwdom_passthrough )
     {
         unsigned int s;
 
         for ( s = 9; s < BITS_PER_LONG; s += 9 )
-            avail -= max_pdx >> s;
+            iommu_pages += max_pdx >> s;
+
+        avail -= iommu_pages;
     }
 
-    need_paging = is_hvm_domain(d) &&
-        (!iommu_use_hap_pt(d) || !paging_mode_hap(d));
-    for ( ; ; need_paging = false )
+    if ( paging_mode_enabled(d) || opt_dom0_shadow || opt_pv_l1tf_hwdom )
     {
-        nr_pages = get_memsize(&dom0_size, avail);
-        min_pages = get_memsize(&dom0_min_size, avail);
-        max_pages = get_memsize(&dom0_max_size, avail);
+        unsigned long cpu_pages;
+
+        nr_pages = get_memsize(&dom0_size, avail) ?: default_nr_pages(avail);
 
         /*
-         * If allocation isn't specified, reserve 1/16th of available memory
-         * for things like DMA buffers. This reservation is clamped to a
-         * maximum of 128MB.
+         * Clamp according to min/max limits and available memory
+         * (preliminary).
          */
-        if ( !nr_pages )
-        {
-            nr_pages = avail - (pv_shim ? pv_shim_mem(avail)
-                                 : min(avail / 16, 128UL << (20 - PAGE_SHIFT)));
-            if ( is_hvm_domain(d) && !need_paging )
-                /*
-                 * Temporary workaround message until internal (paging) memory
-                 * accounting required to build a pvh dom0 is improved.
-                 */
-                printk("WARNING: PVH dom0 without dom0_mem set is still unstable. "
-                       "If you get crashes during boot, try adding a dom0_mem parameter\n");
-        }
-
-
-        /* Clamp according to min/max limits and available memory. */
-        nr_pages = max(nr_pages, min_pages);
-        nr_pages = min(nr_pages, max_pages);
+        nr_pages = max(nr_pages, get_memsize(&dom0_min_size, avail));
+        nr_pages = min(nr_pages, get_memsize(&dom0_max_size, avail));
         nr_pages = min(nr_pages, avail);
 
-        if ( !need_paging )
-            break;
+        cpu_pages = dom0_paging_pages(d, nr_pages);
 
-        /* Reserve memory for shadow or HAP. */
-        avail -= dom0_paging_pages(d, nr_pages);
+        if ( !iommu_use_hap_pt(d) )
+            avail -= cpu_pages;
+        else if ( cpu_pages > iommu_pages )
+            avail -= cpu_pages - iommu_pages;
     }
 
+    nr_pages = get_memsize(&dom0_size, avail) ?: default_nr_pages(avail);
+    min_pages = get_memsize(&dom0_min_size, avail);
+    max_pages = get_memsize(&dom0_max_size, avail);
+
+    /* Clamp according to min/max limits and available memory (final). */
+    nr_pages = max(nr_pages, min_pages);
+    nr_pages = min(nr_pages, max_pages);
+    nr_pages = min(nr_pages, avail);
+
     if ( is_pv_domain(d) &&
          (parms->p2m_base == UNSET_ADDR) && !memsize_gt_zero(&dom0_size) &&
          (!memsize_gt_zero(&dom0_min_size) || (nr_pages > min_pages)) )