diff mbox series

[10/12] docs/migration: Specify X86_{CPUID, MSR}_POLICY records

Message ID 20191224151932.6304-11-andrew.cooper3@citrix.com (mailing list archive)
State Superseded
Headers show
Series Support CPUID/MSR data in migration streams | expand

Commit Message

Andrew Cooper Dec. 24, 2019, 3:19 p.m. UTC
These two records move blobs from the XEN_DOMCTL_{get,set}_cpu_policy
hypercall.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: George Dunlap <George.Dunlap@eu.citrix.com>
CC: Ian Jackson <ian.jackson@citrix.com>
CC: Jan Beulich <JBeulich@suse.com>
CC: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
CC: Stefano Stabellini <sstabellini@kernel.org>
CC: Tim Deegan <tim@xen.org>
CC: Wei Liu <wl@xen.org>
CC: Julien Grall <julien.grall@arm.com>
CC: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
---
 docs/specs/libxc-migration-stream.pandoc | 42 +++++++++++++++++++++++++++++++
 tools/libxc/xc_sr_common.c               |  2 ++
 tools/libxc/xc_sr_stream_format.h        |  2 ++
 tools/python/xen/migration/libxc.py      | 43 ++++++++++++++++++++++++++++++++
 4 files changed, 89 insertions(+)

Comments

Jan Beulich Jan. 3, 2020, 2:49 p.m. UTC | #1
On 24.12.2019 16:19, Andrew Cooper wrote:
> @@ -439,6 +449,34 @@ def verify_record_static_data_end(self, content):
>              raise RecordError("Static data end record found in v2 stream")
>  
>  
> +    def verify_record_x86_cpuid_policy(self, content):
> +        """ x86 CPUID policy record """
> +
> +        if self.version < 3:
> +            raise RecordError("x86 CPUID policy record found in v2 stream")
> +
> +        sz = calcsize(X86_CPUID_POLICY_FORMAT)
> +        contentsz = len(content)
> +
> +        if contentsz < sz or (contentsz % sz) != 0:
> +            raise RecordError("Record length %u, expected multiple of %u" %
> +                              (contentsz, sz))
> +
> +
> +    def verify_record_x86_msr_policy(self, content):
> +        """ x86 MSR policy record """
> +
> +        if self.version < 3:
> +            raise RecordError("x86 MSR policy record found in v2 stream")
> +
> +        sz = calcsize(X86_MSR_POLICY_FORMAT)
> +        contentsz = len(content)
> +
> +        if contentsz < sz or (contentsz % sz) != 0:
> +            raise RecordError("Record length %u, expected multiple of %u" %
> +                              (contentsz, sz))

While I can't even see a theoretical case of the CPUID array
having zero elements, is it really entirely implausible to have
an empty MSRs array? I.e. wouldn't the left side of the "or"
better go away?

Jan
Andrew Cooper Jan. 3, 2020, 2:55 p.m. UTC | #2
On 03/01/2020 14:49, Jan Beulich wrote:
> On 24.12.2019 16:19, Andrew Cooper wrote:
>> @@ -439,6 +449,34 @@ def verify_record_static_data_end(self, content):
>>              raise RecordError("Static data end record found in v2 stream")
>>  
>>  
>> +    def verify_record_x86_cpuid_policy(self, content):
>> +        """ x86 CPUID policy record """
>> +
>> +        if self.version < 3:
>> +            raise RecordError("x86 CPUID policy record found in v2 stream")
>> +
>> +        sz = calcsize(X86_CPUID_POLICY_FORMAT)
>> +        contentsz = len(content)
>> +
>> +        if contentsz < sz or (contentsz % sz) != 0:
>> +            raise RecordError("Record length %u, expected multiple of %u" %
>> +                              (contentsz, sz))
>> +
>> +
>> +    def verify_record_x86_msr_policy(self, content):
>> +        """ x86 MSR policy record """
>> +
>> +        if self.version < 3:
>> +            raise RecordError("x86 MSR policy record found in v2 stream")
>> +
>> +        sz = calcsize(X86_MSR_POLICY_FORMAT)
>> +        contentsz = len(content)
>> +
>> +        if contentsz < sz or (contentsz % sz) != 0:
>> +            raise RecordError("Record length %u, expected multiple of %u" %
>> +                              (contentsz, sz))
> While I can't even see a theoretical case of the CPUID array
> having zero elements, is it really entirely implausible to have
> an empty MSRs array? I.e. wouldn't the left side of the "or"
> better go away?

MSRs will never have 0 entries, because unlike CPUID, we can't omit
records with 0s as their content.  This becomes ambiguous when the
policy default is nonzero.

When we do gain more MSRs, I will see about organising elision based on
CPUID features, so we don't have to send a 0 for every single MSR in the
policy, but MSRs without CPUID enumeration must always be sent.

This means that the one MSR we have currently (MSR_INTEL_PLATFORM_INFO
for CPUID Faulting, which we also virtualise on AMD hardware) shall
unconditionally be present forever more.

~Andrew
Jan Beulich Jan. 3, 2020, 3:30 p.m. UTC | #3
On 03.01.2020 15:55, Andrew Cooper wrote:
> On 03/01/2020 14:49, Jan Beulich wrote:
>> On 24.12.2019 16:19, Andrew Cooper wrote:
>>> @@ -439,6 +449,34 @@ def verify_record_static_data_end(self, content):
>>>              raise RecordError("Static data end record found in v2 stream")
>>>  
>>>  
>>> +    def verify_record_x86_cpuid_policy(self, content):
>>> +        """ x86 CPUID policy record """
>>> +
>>> +        if self.version < 3:
>>> +            raise RecordError("x86 CPUID policy record found in v2 stream")
>>> +
>>> +        sz = calcsize(X86_CPUID_POLICY_FORMAT)
>>> +        contentsz = len(content)
>>> +
>>> +        if contentsz < sz or (contentsz % sz) != 0:
>>> +            raise RecordError("Record length %u, expected multiple of %u" %
>>> +                              (contentsz, sz))
>>> +
>>> +
>>> +    def verify_record_x86_msr_policy(self, content):
>>> +        """ x86 MSR policy record """
>>> +
>>> +        if self.version < 3:
>>> +            raise RecordError("x86 MSR policy record found in v2 stream")
>>> +
>>> +        sz = calcsize(X86_MSR_POLICY_FORMAT)
>>> +        contentsz = len(content)
>>> +
>>> +        if contentsz < sz or (contentsz % sz) != 0:
>>> +            raise RecordError("Record length %u, expected multiple of %u" %
>>> +                              (contentsz, sz))
>> While I can't even see a theoretical case of the CPUID array
>> having zero elements, is it really entirely implausible to have
>> an empty MSRs array? I.e. wouldn't the left side of the "or"
>> better go away?
> 
> MSRs will never have 0 entries, because unlike CPUID, we can't omit
> records with 0s as their content.  This becomes ambiguous when the
> policy default is nonzero.

Isn't the same true for CPUID, in particular some of the non-boolean
fields?

> When we do gain more MSRs, I will see about organising elision based on
> CPUID features, so we don't have to send a 0 for every single MSR in the
> policy, but MSRs without CPUID enumeration must always be sent.
> 
> This means that the one MSR we have currently (MSR_INTEL_PLATFORM_INFO
> for CPUID Faulting, which we also virtualise on AMD hardware) shall
> unconditionally be present forever more.

Hmm, yes. Still the special casing of there needing to be at least
one entry looks a little odd here (and also for CPUID). I would
find it more logical if there was just the remainder-must-be-zero
check. But this is libxc code, so I'm not the one to really judge
anyway.

Jan
Andrew Cooper Jan. 9, 2020, 3:30 p.m. UTC | #4
On 03/01/2020 15:30, Jan Beulich wrote:
> On 03.01.2020 15:55, Andrew Cooper wrote:
>> On 03/01/2020 14:49, Jan Beulich wrote:
>>> On 24.12.2019 16:19, Andrew Cooper wrote:
>>>> @@ -439,6 +449,34 @@ def verify_record_static_data_end(self, content):
>>>>              raise RecordError("Static data end record found in v2 stream")
>>>>  
>>>>  
>>>> +    def verify_record_x86_cpuid_policy(self, content):
>>>> +        """ x86 CPUID policy record """
>>>> +
>>>> +        if self.version < 3:
>>>> +            raise RecordError("x86 CPUID policy record found in v2 stream")
>>>> +
>>>> +        sz = calcsize(X86_CPUID_POLICY_FORMAT)
>>>> +        contentsz = len(content)
>>>> +
>>>> +        if contentsz < sz or (contentsz % sz) != 0:
>>>> +            raise RecordError("Record length %u, expected multiple of %u" %
>>>> +                              (contentsz, sz))
>>>> +
>>>> +
>>>> +    def verify_record_x86_msr_policy(self, content):
>>>> +        """ x86 MSR policy record """
>>>> +
>>>> +        if self.version < 3:
>>>> +            raise RecordError("x86 MSR policy record found in v2 stream")
>>>> +
>>>> +        sz = calcsize(X86_MSR_POLICY_FORMAT)
>>>> +        contentsz = len(content)
>>>> +
>>>> +        if contentsz < sz or (contentsz % sz) != 0:
>>>> +            raise RecordError("Record length %u, expected multiple of %u" %
>>>> +                              (contentsz, sz))
>>> While I can't even see a theoretical case of the CPUID array
>>> having zero elements, is it really entirely implausible to have
>>> an empty MSRs array? I.e. wouldn't the left side of the "or"
>>> better go away?
>> MSRs will never have 0 entries, because unlike CPUID, we can't omit
>> records with 0s as their content.  This becomes ambiguous when the
>> policy default is nonzero.
> Isn't the same true for CPUID, in particular some of the non-boolean
> fields?

I perhaps misspoke.  We can omit CPUID leave(s) based on information
already sent (max_leaf and/or the absence of an enumerating feature). 
In these cases, the destination side can still fill the remainder in
with 0's.

Top level leaves without an encompassing feature do need sending in full.

The best practical example is for not sending all 63 subleaves of the
xsave state leaf, when most of them are outside of the policy XCR0|XSS bits.

>
>> When we do gain more MSRs, I will see about organising elision based on
>> CPUID features, so we don't have to send a 0 for every single MSR in the
>> policy, but MSRs without CPUID enumeration must always be sent.
>>
>> This means that the one MSR we have currently (MSR_INTEL_PLATFORM_INFO
>> for CPUID Faulting, which we also virtualise on AMD hardware) shall
>> unconditionally be present forever more.
> Hmm, yes. Still the special casing of there needing to be at least
> one entry looks a little odd here (and also for CPUID). I would
> find it more logical if there was just the remainder-must-be-zero
> check. But this is libxc code, so I'm not the one to really judge
> anyway.

The migration stream is split into records with no playload (markers
with external control flow meaning), and data records, which have a payload.

It is an error for a data record to have no payload, because it means
there is a source side generation bug.  In the case of Xen returning 0
MSRs, the record would be omitted entirely, rather than be sent with 0
MSRs worth of data.

~Andrew
Ian Jackson Jan. 14, 2020, 4:08 p.m. UTC | #5
Andrew Cooper writes ("[PATCH 10/12] docs/migration: Specify X86_{CPUID,MSR}_POLICY records"):
> These two records move blobs from the XEN_DOMCTL_{get,set}_cpu_policy
> hypercall.

We had an extensive IRL discussion recently about the compatibility
implications of this.  Is that written down somewhere ?  I was
expecting to see it in this patch.

Ian.
Ian Jackson Jan. 14, 2020, 4:12 p.m. UTC | #6
Andrew Cooper writes ("Re: [PATCH 10/12] docs/migration: Specify X86_{CPUID,MSR}_POLICY records"):
> The migration stream is split into records with no playload (markers
> with external control flow meaning), and data records, which have a payload.

I remember thinking at the time you specified this (some time ago, in
migration v2) that this was anomalous.

At the time it made no difference in practice, because all existing
payload types required nonempty payloads anyway, so I didn't argue.

> It is an error for a data record to have no payload, because it means
> there is a source side generation bug.  In the case of Xen returning 0
> MSRs, the record would be omitted entirely, rather than be sent with 0
> MSRs worth of data.

I think it would be better to say instead that data records may have a
0-length payload.  This allows a record with a 0-length payload to
have a different semantic meaning ("here is this information and the
information is the empty set") from an omitted record ("this
information is not available/provided").

Whether a record is a marker ought to be inferred from its type.

Ian.
Andrew Cooper Jan. 15, 2020, 3:36 p.m. UTC | #7
On 14/01/2020 16:08, Ian Jackson wrote:
> Andrew Cooper writes ("[PATCH 10/12] docs/migration: Specify X86_{CPUID,MSR}_POLICY records"):
>> These two records move blobs from the XEN_DOMCTL_{get,set}_cpu_policy
>> hypercall.
> We had an extensive IRL discussion recently about the compatibility
> implications of this.  Is that written down somewhere ?  I was
> expecting to see it in this patch.

Sadly clairvoyance isn't a skill I'm terribly good at.  (This email
predates our conversation by 2 weeks or so.)

v2 of the series will have appropriate adjustments, although none of it
was relevant to this patch specifically.

~Andrew
Andrew Cooper Jan. 15, 2020, 3:48 p.m. UTC | #8
On 14/01/2020 16:12, Ian Jackson wrote:
> Andrew Cooper writes ("Re: [PATCH 10/12] docs/migration: Specify X86_{CPUID,MSR}_POLICY records"):
>> The migration stream is split into records with no playload (markers
>> with external control flow meaning), and data records, which have a payload.
> I remember thinking at the time you specified this (some time ago, in
> migration v2) that this was anomalous.

It was, and remains, very deliberate.

> Whether a record is a marker ought to be inferred from its type.

All records have explicit semantics as specified by their types.  This
includes the semantics as to whether it shall have zero or non-zero payload.

A data record with no payload is nonsensical.  It is prohibited
specifically because it helps the protocol verification logic spot bugs,
and we really did spot several hypercall (preexiting) and save-side bugs
because of this rule.

If a plausible use for payload-less data appears, then we can take a
judgement call as to whether it outweighs the utility of improved error
detection.  Making this change would require a change to the spec, and
an adjustment to the pre-exiting receive side logic.

~Andrew
diff mbox series

Patch

diff --git a/docs/specs/libxc-migration-stream.pandoc b/docs/specs/libxc-migration-stream.pandoc
index 22ff306e0b..3a0915b795 100644
--- a/docs/specs/libxc-migration-stream.pandoc
+++ b/docs/specs/libxc-migration-stream.pandoc
@@ -634,6 +634,46 @@  The end record contains no fields; its body_length is 0.
 
 \clearpage
 
+X86_CPUID_POLICY
+----------------
+
+CPUID policy content, as accessed by the XEN_DOMCTL_{get,set}_cpu_policy
+hypercall sub-ops.
+
+     0     1     2     3     4     5     6     7 octet
+    +-------------------------------------------------+
+    | CPUID_policy                                    |
+    ...
+    +-------------------------------------------------+
+
+--------------------------------------------------------------------
+Field            Description
+------------     ---------------------------------------------------
+CPUID_policy     Array of xen_cpuid_leaf_t[]'s
+--------------------------------------------------------------------
+
+\clearpage
+
+X86_MSR_POLICY
+--------------
+
+MSR policy content, as accessed by the XEN_DOMCTL_{get,set}_cpu_policy
+hypercall sub-ops.
+
+     0     1     2     3     4     5     6     7 octet
+    +-------------------------------------------------+
+    | MSR_policy                                      |
+    ...
+    +-------------------------------------------------+
+
+--------------------------------------------------------------------
+Field            Description
+----------       ---------------------------------------------------
+MSR_policy       Array of xen_msr_entry_t[]'s
+--------------------------------------------------------------------
+
+\clearpage
+
 
 Layout
 ======
@@ -656,6 +696,7 @@  A typical save record for an x86 PV guest image would look like:
 * Domain header
 * Static data records:
     * X86_PV_INFO record
+    * X86_{CPUID,MSR}_POLICY
     * STATIC_DATA_END
 * X86_PV_P2M_FRAMES record
 * Many PAGE_DATA records
@@ -685,6 +726,7 @@  A typical save record for an x86 HVM guest image would look like:
 * Image header
 * Domain header
 * Static data records:
+    * X86_{CPUID,MSR}_POLICY
     * STATIC_DATA_END
 * Many PAGE_DATA records
 * X86_TSC_INFO
diff --git a/tools/libxc/xc_sr_common.c b/tools/libxc/xc_sr_common.c
index 7f22cf0365..7c54b03414 100644
--- a/tools/libxc/xc_sr_common.c
+++ b/tools/libxc/xc_sr_common.c
@@ -37,6 +37,8 @@  static const char *const mandatory_rec_types[] =
     [REC_TYPE_CHECKPOINT]                   = "Checkpoint",
     [REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST]    = "Checkpoint dirty pfn list",
     [REC_TYPE_STATIC_DATA_END]              = "Static data end",
+    [REC_TYPE_X86_CPUID_POLICY]             = "x86 CPUID policy",
+    [REC_TYPE_X86_MSR_POLICY]               = "x86 MSR policy",
 };
 
 const char *rec_type_to_str(uint32_t type)
diff --git a/tools/libxc/xc_sr_stream_format.h b/tools/libxc/xc_sr_stream_format.h
index 81c9765b0a..8a0da26f75 100644
--- a/tools/libxc/xc_sr_stream_format.h
+++ b/tools/libxc/xc_sr_stream_format.h
@@ -74,6 +74,8 @@  struct xc_sr_rhdr
 #define REC_TYPE_CHECKPOINT                 0x0000000eU
 #define REC_TYPE_CHECKPOINT_DIRTY_PFN_LIST  0x0000000fU
 #define REC_TYPE_STATIC_DATA_END            0x00000010U
+#define REC_TYPE_X86_CPUID_POLICY           0x00000011U
+#define REC_TYPE_X86_MSR_POLICY             0x00000012U
 
 #define REC_TYPE_OPTIONAL             0x80000000U
 
diff --git a/tools/python/xen/migration/libxc.py b/tools/python/xen/migration/libxc.py
index 5fb51b56ac..9881f5ced4 100644
--- a/tools/python/xen/migration/libxc.py
+++ b/tools/python/xen/migration/libxc.py
@@ -57,6 +57,8 @@ 
 REC_TYPE_checkpoint                 = 0x0000000e
 REC_TYPE_checkpoint_dirty_pfn_list  = 0x0000000f
 REC_TYPE_static_data_end            = 0x00000010
+REC_TYPE_x86_cpuid_policy           = 0x00000011
+REC_TYPE_x86_msr_policy             = 0x00000012
 
 rec_type_to_str = {
     REC_TYPE_end                        : "End",
@@ -76,6 +78,8 @@ 
     REC_TYPE_checkpoint                 : "Checkpoint",
     REC_TYPE_checkpoint_dirty_pfn_list  : "Checkpoint dirty pfn list",
     REC_TYPE_static_data_end            : "Static data end",
+    REC_TYPE_x86_cpuid_policy           : "x86 CPUID policy",
+    REC_TYPE_x86_msr_policy             : "x86 MSR policy",
 }
 
 # page_data
@@ -113,6 +117,12 @@ 
 HVM_PARAMS_ENTRY_FORMAT   = "QQ"
 HVM_PARAMS_FORMAT         = "II"
 
+# x86_cpuid_policy => xen_cpuid_leaf_t[]
+X86_CPUID_POLICY_FORMAT   = "IIIIII"
+
+# x86_msr_policy => xen_msr_entry_t[]
+X86_MSR_POLICY_FORMAT     = "QII"
+
 class VerifyLibxc(VerifyBase):
     """ Verify a Libxc v2 (or later) stream """
 
@@ -439,6 +449,34 @@  def verify_record_static_data_end(self, content):
             raise RecordError("Static data end record found in v2 stream")
 
 
+    def verify_record_x86_cpuid_policy(self, content):
+        """ x86 CPUID policy record """
+
+        if self.version < 3:
+            raise RecordError("x86 CPUID policy record found in v2 stream")
+
+        sz = calcsize(X86_CPUID_POLICY_FORMAT)
+        contentsz = len(content)
+
+        if contentsz < sz or (contentsz % sz) != 0:
+            raise RecordError("Record length %u, expected multiple of %u" %
+                              (contentsz, sz))
+
+
+    def verify_record_x86_msr_policy(self, content):
+        """ x86 MSR policy record """
+
+        if self.version < 3:
+            raise RecordError("x86 MSR policy record found in v2 stream")
+
+        sz = calcsize(X86_MSR_POLICY_FORMAT)
+        contentsz = len(content)
+
+        if contentsz < sz or (contentsz % sz) != 0:
+            raise RecordError("Record length %u, expected multiple of %u" %
+                              (contentsz, sz))
+
+
 record_verifiers = {
     REC_TYPE_end:
         VerifyLibxc.verify_record_end,
@@ -483,4 +521,9 @@  def verify_record_static_data_end(self, content):
 
     REC_TYPE_static_data_end:
         VerifyLibxc.verify_record_static_data_end,
+
+    REC_TYPE_x86_cpuid_policy:
+        VerifyLibxc.verify_record_x86_cpuid_policy,
+    REC_TYPE_x86_msr_policy:
+        VerifyLibxc.verify_record_x86_msr_policy,
     }