diff mbox series

[7/8] x86/mm: make code robust to future PAT changes

Message ID 33f3896ba4cdf50ceb0377f071682ac5d3f576c4.1670300446.git.demi@invisiblethingslab.com (mailing list archive)
State Superseded
Headers show
Series Make PAT handling less brittle | expand

Commit Message

Demi Marie Obenour Dec. 6, 2022, 4:33 a.m. UTC
It may be desirable to change Xen's PAT for various reasons.  This
requires changes to several _PAGE_* macros as well.  Add static
assertions to check that XEN_MSR_PAT is consistent with the _PAGE_*
macros.

Additionally, Xen has two unused entries in the PAT.  Currently these
are UC, but this will change if the hardware ever supports additional
memory types.  To avoid future problems, this adds a check in debug
builds that injects #GP into a guest that tries to use one of these
entries, along with returning -EINVAL from the hypercall.  Future
versions of Xen will refuse to use these entries even in release builds.

Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
---
 xen/arch/x86/mm.c | 58 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 54 insertions(+), 4 deletions(-)

Comments

Jan Beulich Dec. 6, 2022, 12:01 p.m. UTC | #1
On 06.12.2022 05:33, Demi Marie Obenour wrote:
> It may be desirable to change Xen's PAT for various reasons.  This
> requires changes to several _PAGE_* macros as well.  Add static
> assertions to check that XEN_MSR_PAT is consistent with the _PAGE_*
> macros.
> 
> Additionally, Xen has two unused entries in the PAT.  Currently these
> are UC, but this will change if the hardware ever supports additional
> memory types.  To avoid future problems, this adds a check in debug
> builds that injects #GP into a guest that tries to use one of these

I realize we already have a case of injecting of #GP from a hypercall
handling path, but I'm afraid this isn't really sane. _If_ #GP was
possible at all for any hypercall, it should imo be raised on the
syscall instruction (or the int $0x82 for compat guests), not on the
instruction following it. Plus, as a major difference, in the
existing case there's no other means to return an error indication.

> entries, along with returning -EINVAL from the hypercall.  Future
> versions of Xen will refuse to use these entries even in release builds.

Mind me asking where you're taking "will" from? (I might view "may" as
appropriate here.)

> @@ -961,13 +1000,24 @@ get_page_from_l1e(
>  
>          switch ( l1f & PAGE_CACHE_ATTRS )
>          {
> -        case _PAGE_WB:
> +        default:
> +#ifndef NDEBUG
> +            printk(XENLOG_G_WARNING
> +                   "d%d: Guest tried to use bad cachability attribute %u for MFN %lx\n",
> +                   l1e_owner->domain_id, l1f & PAGE_CACHE_ATTRS, mfn);
> +            pv_inject_hw_exception(TRAP_gp_fault, 0);
> +            return -EINVAL;
> +#endif
>          case _PAGE_WT:
>          case _PAGE_WP:
> -            flip |= (l1f & PAGE_CACHE_ATTRS) ^ _PAGE_UC;
> +        case _PAGE_WB:
> +            /* Force this to be uncachable */
> +            return flip | ( (l1f & PAGE_CACHE_ATTRS) ^ _PAGE_UC );
> +        case _PAGE_WC:
> +        case _PAGE_UC:
> +        case _PAGE_UCM:
> +            return flip;
>          }
> -
> -        return flip;
>      }

May I ask that you leave the basic structure here as is, merely adding
the default block you care about and the three case labels which you
need to add to compensate? Also please use %pd in new code logging
domain IDs. I'm also not convinced l1e_owner is the most appropriate
domain to actually blame here, at least with the wording you've chosen
to use.

Jan
Andrew Cooper Dec. 6, 2022, 12:06 p.m. UTC | #2
On 06/12/2022 04:33, Demi Marie Obenour wrote:
> It may be desirable to change Xen's PAT for various reasons.  This
> requires changes to several _PAGE_* macros as well.  Add static
> assertions to check that XEN_MSR_PAT is consistent with the _PAGE_*
> macros.
>
> Additionally, Xen has two unused entries in the PAT.  Currently these
> are UC, but this will change if the hardware ever supports additional
> memory types.  To avoid future problems, this adds a check in debug
> builds that injects #GP into a guest that tries to use one of these
> entries, along with returning -EINVAL from the hypercall.  Future
> versions of Xen will refuse to use these entries even in release builds.
>
> Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
> ---
>  xen/arch/x86/mm.c | 58 +++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 54 insertions(+), 4 deletions(-)
>
> diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
> index 5d05399c3a841bf03991a3bed63df9a815c1e891..517fccee699b2a673ba537e47933aefc80017aa5 100644
> --- a/xen/arch/x86/mm.c
> +++ b/xen/arch/x86/mm.c
> @@ -849,6 +849,45 @@ static int cf_check print_mmio_emul_range(
>  }
>  #endif
>  
> +static void __init __maybe_unused build_assertions(void)

This wants to be at the very bottom of the file.  (And also in the
previous patch to remove the _Static_assert())

> +{
> +    /* A bunch of static assertions to check that the XEN_MSR_PAT is valid
> +     * and consistent with the _PAGE_* macros */
> +#define PAT_VALUE(v) (0xFF & (XEN_MSR_PAT >> (8 * (v))))
> +#define BAD_VALUE(v) ((v) < 0 || (v) > 7 ||                                    \
> +                      (v) == MSR_PAT_RESERVED_1 || (v) == MSR_PAT_RESERVED_2)
> +#define BAD_PAT_VALUE(v) BUILD_BUG_ON(BAD_VALUE(PAT_VALUE(v)))
> +    BAD_PAT_VALUE(0);
> +    BAD_PAT_VALUE(1);
> +    BAD_PAT_VALUE(2);
> +    BAD_PAT_VALUE(3);
> +    BAD_PAT_VALUE(4);
> +    BAD_PAT_VALUE(5);
> +    BAD_PAT_VALUE(6);
> +    BAD_PAT_VALUE(7);
> +#undef BAD_PAT_VALUE
> +#undef BAD_VALUE

Given that you've reworked the PAT declaration to be of the form (MT <<
shift), I'm not sure how much value this check is.

> +#define PAT_SHIFT(page_value) (((page_value) & _PAGE_PAT) >> 5 |               \
> +                               ((page_value) & (_PAGE_PCD | _PAGE_PWT)) >> 3)

pte_flags_to_cacheattr()

> +#define CHECK_PAGE_VALUE(page_value) do {                                      \
> +    /* Check that the _PAGE_* macros only use bits from PAGE_CACHE_ATTRS */    \
> +    BUILD_BUG_ON(((_PAGE_##page_value) & PAGE_CACHE_ATTRS) !=                  \
> +                  (_PAGE_##page_value));                                       \
> +    /* Check that the _PAGE_* are consistent with XEN_MSR_PAT */               \
> +    BUILD_BUG_ON(PAT_VALUE(PAT_SHIFT(_PAGE_##page_value)) !=                   \
> +                 (MSR_PAT_##page_value));                                      \
> +} while (0)
> +    CHECK_PAGE_VALUE(WT);
> +    CHECK_PAGE_VALUE(WB);
> +    CHECK_PAGE_VALUE(WC);
> +    CHECK_PAGE_VALUE(UC);
> +    CHECK_PAGE_VALUE(UCM);
> +    CHECK_PAGE_VALUE(WP);
> +#undef CHECK_PAGE_VALUE
> +#undef PAT_SHIFT
> +#undef PAT_VALUE
> +}
> +
>  /*
>   * get_page_from_l1e returns:
>   *   0  => success (page not present also counts as such)
> @@ -961,13 +1000,24 @@ get_page_from_l1e(
>  
>          switch ( l1f & PAGE_CACHE_ATTRS )
>          {
> -        case _PAGE_WB:
> +        default:
> +#ifndef NDEBUG
> +            printk(XENLOG_G_WARNING
> +                   "d%d: Guest tried to use bad cachability attribute %u for MFN %lx\n",
> +                   l1e_owner->domain_id, l1f & PAGE_CACHE_ATTRS, mfn);

%pd.  You absolutely want to convert the PTE bits to a PAT value before
priniting (decimal on a PTE value is useless), and %PRI_mfn.

> +            pv_inject_hw_exception(TRAP_gp_fault, 0);

As I said on IRC, we do want this, but I'm not certain if we can get
away with just enabling it in debug builds.  _PAGE_GNTTAB was ok because
it has always been like that, but there's a non-trivial chance that
there are existing dom0 kernels which violate this constraint.

> +            return -EINVAL;
> +#endif
>          case _PAGE_WT:
>          case _PAGE_WP:
> -            flip |= (l1f & PAGE_CACHE_ATTRS) ^ _PAGE_UC;
> +        case _PAGE_WB:
> +            /* Force this to be uncachable */
> +            return flip | ( (l1f & PAGE_CACHE_ATTRS) ^ _PAGE_UC );
> +        case _PAGE_WC:
> +        case _PAGE_UC:
> +        case _PAGE_UCM:
> +            return flip;
>          }
> -
> -        return flip;

This wants reworking over Jan's suggestion in patch 1, and modifying to
reduce churn.  (Keep _PAGE_WB in the same order WRT _PAGE_WT, the
uncached memory types should simply break, and default should be at the
end.)

~Andrew
Demi Marie Obenour Dec. 6, 2022, 5:55 p.m. UTC | #3
On Tue, Dec 06, 2022 at 12:06:24PM +0000, Andrew Cooper wrote:
> On 06/12/2022 04:33, Demi Marie Obenour wrote:
> > It may be desirable to change Xen's PAT for various reasons.  This
> > requires changes to several _PAGE_* macros as well.  Add static
> > assertions to check that XEN_MSR_PAT is consistent with the _PAGE_*
> > macros.
> >
> > Additionally, Xen has two unused entries in the PAT.  Currently these
> > are UC, but this will change if the hardware ever supports additional
> > memory types.  To avoid future problems, this adds a check in debug
> > builds that injects #GP into a guest that tries to use one of these
> > entries, along with returning -EINVAL from the hypercall.  Future
> > versions of Xen will refuse to use these entries even in release builds.
> >
> > Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com>
> > ---
> >  xen/arch/x86/mm.c | 58 +++++++++++++++++++++++++++++++++++++++++++----
> >  1 file changed, 54 insertions(+), 4 deletions(-)
> >
> > diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
> > index 5d05399c3a841bf03991a3bed63df9a815c1e891..517fccee699b2a673ba537e47933aefc80017aa5 100644
> > --- a/xen/arch/x86/mm.c
> > +++ b/xen/arch/x86/mm.c
> > @@ -849,6 +849,45 @@ static int cf_check print_mmio_emul_range(
> >  }
> >  #endif
> >  
> > +static void __init __maybe_unused build_assertions(void)
> 
> This wants to be at the very bottom of the file.  (And also in the
> previous patch to remove the _Static_assert())
> 
> > +{
> > +    /* A bunch of static assertions to check that the XEN_MSR_PAT is valid
> > +     * and consistent with the _PAGE_* macros */
> > +#define PAT_VALUE(v) (0xFF & (XEN_MSR_PAT >> (8 * (v))))
> > +#define BAD_VALUE(v) ((v) < 0 || (v) > 7 ||                                    \
> > +                      (v) == MSR_PAT_RESERVED_1 || (v) == MSR_PAT_RESERVED_2)
> > +#define BAD_PAT_VALUE(v) BUILD_BUG_ON(BAD_VALUE(PAT_VALUE(v)))
> > +    BAD_PAT_VALUE(0);
> > +    BAD_PAT_VALUE(1);
> > +    BAD_PAT_VALUE(2);
> > +    BAD_PAT_VALUE(3);
> > +    BAD_PAT_VALUE(4);
> > +    BAD_PAT_VALUE(5);
> > +    BAD_PAT_VALUE(6);
> > +    BAD_PAT_VALUE(7);
> > +#undef BAD_PAT_VALUE
> > +#undef BAD_VALUE
> 
> Given that you've reworked the PAT declaration to be of the form (MT <<
> shift), I'm not sure how much value this check is.

One of my goals with this patch set is that it should be possible to
choose *any* value for XEN_MSR_PAT and for the PAT-related _PAGE_*, with
all bad values caught at compile-time.  This would allow making it a
Kconfig option.

> > +#define PAT_SHIFT(page_value) (((page_value) & _PAGE_PAT) >> 5 |               \
> > +                               ((page_value) & (_PAGE_PCD | _PAGE_PWT)) >> 3)
> 
> pte_flags_to_cacheattr()

That’s a function, not a macro (and for good reason), so it can’t be
used in BUILD_BUG_ON().

> > +#define CHECK_PAGE_VALUE(page_value) do {                                      \
> > +    /* Check that the _PAGE_* macros only use bits from PAGE_CACHE_ATTRS */    \
> > +    BUILD_BUG_ON(((_PAGE_##page_value) & PAGE_CACHE_ATTRS) !=                  \
> > +                  (_PAGE_##page_value));                                       \
> > +    /* Check that the _PAGE_* are consistent with XEN_MSR_PAT */               \
> > +    BUILD_BUG_ON(PAT_VALUE(PAT_SHIFT(_PAGE_##page_value)) !=                   \
> > +                 (MSR_PAT_##page_value));                                      \
> > +} while (0)
> > +    CHECK_PAGE_VALUE(WT);
> > +    CHECK_PAGE_VALUE(WB);
> > +    CHECK_PAGE_VALUE(WC);
> > +    CHECK_PAGE_VALUE(UC);
> > +    CHECK_PAGE_VALUE(UCM);
> > +    CHECK_PAGE_VALUE(WP);
> > +#undef CHECK_PAGE_VALUE
> > +#undef PAT_SHIFT
> > +#undef PAT_VALUE
> > +}
> > +
> >  /*
> >   * get_page_from_l1e returns:
> >   *   0  => success (page not present also counts as such)
> > @@ -961,13 +1000,24 @@ get_page_from_l1e(
> >  
> >          switch ( l1f & PAGE_CACHE_ATTRS )
> >          {
> > -        case _PAGE_WB:
> > +        default:
> > +#ifndef NDEBUG
> > +            printk(XENLOG_G_WARNING
> > +                   "d%d: Guest tried to use bad cachability attribute %u for MFN %lx\n",
> > +                   l1e_owner->domain_id, l1f & PAGE_CACHE_ATTRS, mfn);
> 
> %pd.  You absolutely want to convert the PTE bits to a PAT value before
> priniting (decimal on a PTE value is useless), and %PRI_mfn.

I’ll have to look at the rest of the Xen tree to see how to use this.

> > +            pv_inject_hw_exception(TRAP_gp_fault, 0);
> 
> As I said on IRC, we do want this, but I'm not certain if we can get
> away with just enabling it in debug builds.  _PAGE_GNTTAB was ok because
> it has always been like that, but there's a non-trivial chance that
> there are existing dom0 kernels which violate this constraint.

I chose this approach because it is very simple to implement.  Anything
more complex ought to be in a separate patch, IMO.

> > +            return -EINVAL;
> > +#endif
> >          case _PAGE_WT:
> >          case _PAGE_WP:
> > -            flip |= (l1f & PAGE_CACHE_ATTRS) ^ _PAGE_UC;
> > +        case _PAGE_WB:
> > +            /* Force this to be uncachable */
> > +            return flip | ( (l1f & PAGE_CACHE_ATTRS) ^ _PAGE_UC );
> > +        case _PAGE_WC:
> > +        case _PAGE_UC:
> > +        case _PAGE_UCM:
> > +            return flip;
> >          }
> > -
> > -        return flip;
> 
> This wants reworking over Jan's suggestion in patch 1, and modifying to
> reduce churn.  (Keep _PAGE_WB in the same order WRT _PAGE_WT, the
> uncached memory types should simply break, and default should be at the
> end.)

I put the default in the middle to keep the preprocessor conditionals
simple and avoid duplication.  I will have the default be treated as
cachable memory.
Jan Beulich Dec. 7, 2022, 9:41 a.m. UTC | #4
On 06.12.2022 18:55, Demi Marie Obenour wrote:
> On Tue, Dec 06, 2022 at 12:06:24PM +0000, Andrew Cooper wrote:
>> On 06/12/2022 04:33, Demi Marie Obenour wrote:
>>> @@ -961,13 +1000,24 @@ get_page_from_l1e(
>>>  
>>>          switch ( l1f & PAGE_CACHE_ATTRS )
>>>          {
>>> -        case _PAGE_WB:
>>> +        default:
>>> +#ifndef NDEBUG
>>> +            printk(XENLOG_G_WARNING
>>> +                   "d%d: Guest tried to use bad cachability attribute %u for MFN %lx\n",
>>> +                   l1e_owner->domain_id, l1f & PAGE_CACHE_ATTRS, mfn);
>>
>> %pd.  You absolutely want to convert the PTE bits to a PAT value before
>> priniting (decimal on a PTE value is useless), and %PRI_mfn.
> 
> I’ll have to look at the rest of the Xen tree to see how to use this.
> 
>>> +            pv_inject_hw_exception(TRAP_gp_fault, 0);
>>
>> As I said on IRC, we do want this, but I'm not certain if we can get
>> away with just enabling it in debug builds.  _PAGE_GNTTAB was ok because
>> it has always been like that, but there's a non-trivial chance that
>> there are existing dom0 kernels which violate this constraint.
> 
> I chose this approach because it is very simple to implement.  Anything
> more complex ought to be in a separate patch, IMO.
> 
>>> +            return -EINVAL;
>>> +#endif

As to "complex": Just the "return -EINVAL" would be yet less complex.
Irrespective of the remark my understanding of Andrew's response is that
the concern extends to the error return as well.

Jan
Andrew Cooper Dec. 7, 2022, 12:14 p.m. UTC | #5
On 07/12/2022 09:41, Jan Beulich wrote:
> On 06.12.2022 18:55, Demi Marie Obenour wrote:
>> On Tue, Dec 06, 2022 at 12:06:24PM +0000, Andrew Cooper wrote:
>>> On 06/12/2022 04:33, Demi Marie Obenour wrote:
>>>> @@ -961,13 +1000,24 @@ get_page_from_l1e(
>>>>  
>>>>          switch ( l1f & PAGE_CACHE_ATTRS )
>>>>          {
>>>> -        case _PAGE_WB:
>>>> +        default:
>>>> +#ifndef NDEBUG
>>>> +            printk(XENLOG_G_WARNING
>>>> +                   "d%d: Guest tried to use bad cachability attribute %u for MFN %lx\n",
>>>> +                   l1e_owner->domain_id, l1f & PAGE_CACHE_ATTRS, mfn);
>>> %pd.  You absolutely want to convert the PTE bits to a PAT value before
>>> priniting (decimal on a PTE value is useless), and %PRI_mfn.
>> I’ll have to look at the rest of the Xen tree to see how to use this.
>>
>>>> +            pv_inject_hw_exception(TRAP_gp_fault, 0);
>>> As I said on IRC, we do want this, but I'm not certain if we can get
>>> away with just enabling it in debug builds.  _PAGE_GNTTAB was ok because
>>> it has always been like that, but there's a non-trivial chance that
>>> there are existing dom0 kernels which violate this constraint.
>> I chose this approach because it is very simple to implement.  Anything
>> more complex ought to be in a separate patch, IMO.
>>
>>>> +            return -EINVAL;
>>>> +#endif
> As to "complex": Just the "return -EINVAL" would be yet less complex.
> Irrespective of the remark my understanding of Andrew's response is that
> the concern extends to the error return as well.

It's a tradeoff.

From a debugging point of view, #GP is absolutely the way to go, because
you get a full backtrace out in Linux.  It doesn't matter in the
slightest which side of the SYSCALL instruction it points at, because
that's not the interesting information to look at.

Returning -EINVAL stops the problematic cache attributes from being
used, but is far more variable in terms of noticeable change inside
Linux.  It ranges from hitting a BUG(), to having the return code lost.


In this case, I'd err on the side of a #GP fault because it is a
definite error in the guest needing debugging and fixing.  But as I said
originally, it probably needs to be on-by-default with a knob to disable.

~Andrew
diff mbox series

Patch

diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
index 5d05399c3a841bf03991a3bed63df9a815c1e891..517fccee699b2a673ba537e47933aefc80017aa5 100644
--- a/xen/arch/x86/mm.c
+++ b/xen/arch/x86/mm.c
@@ -849,6 +849,45 @@  static int cf_check print_mmio_emul_range(
 }
 #endif
 
+static void __init __maybe_unused build_assertions(void)
+{
+    /* A bunch of static assertions to check that the XEN_MSR_PAT is valid
+     * and consistent with the _PAGE_* macros */
+#define PAT_VALUE(v) (0xFF & (XEN_MSR_PAT >> (8 * (v))))
+#define BAD_VALUE(v) ((v) < 0 || (v) > 7 ||                                    \
+                      (v) == MSR_PAT_RESERVED_1 || (v) == MSR_PAT_RESERVED_2)
+#define BAD_PAT_VALUE(v) BUILD_BUG_ON(BAD_VALUE(PAT_VALUE(v)))
+    BAD_PAT_VALUE(0);
+    BAD_PAT_VALUE(1);
+    BAD_PAT_VALUE(2);
+    BAD_PAT_VALUE(3);
+    BAD_PAT_VALUE(4);
+    BAD_PAT_VALUE(5);
+    BAD_PAT_VALUE(6);
+    BAD_PAT_VALUE(7);
+#undef BAD_PAT_VALUE
+#undef BAD_VALUE
+#define PAT_SHIFT(page_value) (((page_value) & _PAGE_PAT) >> 5 |               \
+                               ((page_value) & (_PAGE_PCD | _PAGE_PWT)) >> 3)
+#define CHECK_PAGE_VALUE(page_value) do {                                      \
+    /* Check that the _PAGE_* macros only use bits from PAGE_CACHE_ATTRS */    \
+    BUILD_BUG_ON(((_PAGE_##page_value) & PAGE_CACHE_ATTRS) !=                  \
+                  (_PAGE_##page_value));                                       \
+    /* Check that the _PAGE_* are consistent with XEN_MSR_PAT */               \
+    BUILD_BUG_ON(PAT_VALUE(PAT_SHIFT(_PAGE_##page_value)) !=                   \
+                 (MSR_PAT_##page_value));                                      \
+} while (0)
+    CHECK_PAGE_VALUE(WT);
+    CHECK_PAGE_VALUE(WB);
+    CHECK_PAGE_VALUE(WC);
+    CHECK_PAGE_VALUE(UC);
+    CHECK_PAGE_VALUE(UCM);
+    CHECK_PAGE_VALUE(WP);
+#undef CHECK_PAGE_VALUE
+#undef PAT_SHIFT
+#undef PAT_VALUE
+}
+
 /*
  * get_page_from_l1e returns:
  *   0  => success (page not present also counts as such)
@@ -961,13 +1000,24 @@  get_page_from_l1e(
 
         switch ( l1f & PAGE_CACHE_ATTRS )
         {
-        case _PAGE_WB:
+        default:
+#ifndef NDEBUG
+            printk(XENLOG_G_WARNING
+                   "d%d: Guest tried to use bad cachability attribute %u for MFN %lx\n",
+                   l1e_owner->domain_id, l1f & PAGE_CACHE_ATTRS, mfn);
+            pv_inject_hw_exception(TRAP_gp_fault, 0);
+            return -EINVAL;
+#endif
         case _PAGE_WT:
         case _PAGE_WP:
-            flip |= (l1f & PAGE_CACHE_ATTRS) ^ _PAGE_UC;
+        case _PAGE_WB:
+            /* Force this to be uncachable */
+            return flip | ( (l1f & PAGE_CACHE_ATTRS) ^ _PAGE_UC );
+        case _PAGE_WC:
+        case _PAGE_UC:
+        case _PAGE_UCM:
+            return flip;
         }
-
-        return flip;
     }
 
     if ( unlikely((real_pg_owner != pg_owner) &&