diff mbox series

[v2,04/12] x86/hvm: Reduce stack usage from HVMTRACE_ND()

Message ID 20210920172529.24932-5-andrew.cooper3@citrix.com (mailing list archive)
State New, archived
Headers show
Series xen/trace: Fix leakage of uninitialised stack into the tracebuffer | expand

Commit Message

Andrew Cooper Sept. 20, 2021, 5:25 p.m. UTC
It is pointless to write all 6 entries and only consume the useful subset.
bloat-o-meter shows quite how obscene the overhead is in vmx_vmexit_handler(),
weighing in at 12% of the function arranging unread zeroes on the stack, and
8% for svm_vmexit_handler().

  add/remove: 0/0 grow/shrink: 0/20 up/down: 0/-1929 (-1929)
  Function                                     old     new   delta
  hvm_msr_write_intercept                     1049    1033     -16
  vmx_enable_intr_window                       238     214     -24
  svm_enable_intr_window                       337     313     -24
  hvmemul_write_xcr                            115      91     -24
  hvmemul_write_cr                             350     326     -24
  hvmemul_read_xcr                             115      91     -24
  hvmemul_read_cr                              146     122     -24
  hvm_mov_to_cr                                438     414     -24
  hvm_mov_from_cr                              253     229     -24
  vmx_intr_assist                             1150    1118     -32
  svm_intr_assist                              459     427     -32
  hvm_rdtsc_intercept                          138     106     -32
  hvm_msr_read_intercept                       898     866     -32
  vmx_vmenter_helper                          1142    1094     -48
  vmx_inject_event                             813     765     -48
  svm_vmenter_helper                           238     187     -51
  hvm_hlt                                      197     146     -51
  svm_inject_event                            1678    1614     -64
  svm_vmexit_handler                          5880    5392    -488
  vmx_vmexit_handler                          7281    6438    -843
  Total: Before=3644277, After=3642348, chg -0.05%

Adjust all users of HVMTRACE_ND(), using TRC_PAR_LONG() where appropriate
instead of opencoding it.

The 0 case needs a little help.  All object in C must have a unique address
and _d is passed by pointer.  Explicitly permit the optimiser to drop the
array.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>

v2:
 * Adjust callers of HVMTRACE_ND() too
 * Drop _d[] for the 0 case.

Normally I wouldn't recommend patches like this for backport, but
{vmx,svm}_vmexit_handler() are fastpaths and this is a *lot* of I-cache lines
dropped...
---
 xen/arch/x86/hvm/svm/svm.c      |  8 +++-----
 xen/arch/x86/hvm/vmx/vmx.c      |  9 ++++-----
 xen/include/asm-x86/hvm/trace.h | 28 ++++++++++------------------
 3 files changed, 17 insertions(+), 28 deletions(-)

Comments

Jan Beulich Sept. 21, 2021, 11 a.m. UTC | #1
On 20.09.2021 19:25, Andrew Cooper wrote:
> v2:
>  * Adjust callers of HVMTRACE_ND() too

What does this refer to? The sole difference to v1 that I can spot
is ...

>  * Drop _d[] for the 0 case.

... the one corresponding to this line, i.e. ...

> --- a/xen/include/asm-x86/hvm/trace.h
> +++ b/xen/include/asm-x86/hvm/trace.h
> @@ -67,38 +67,30 @@
>  #define TRACE_2_LONG_4D(_e, d1, d2, d3, d4, ...) \
>      TRACE_6D(_e, d1, d2, d3, d4)
>  
> -#define HVMTRACE_ND(evt, modifier, cycles, count, d1, d2, d3, d4, d5, d6) \
> +#define HVMTRACE_ND(evt, modifier, cycles, ...)                           \
>      do {                                                                  \
>          if ( unlikely(tb_init_done) && DO_TRC_HVM_ ## evt )               \
>          {                                                                 \
> -            struct {                                                      \
> -                u32 d[6];                                                 \
> -            } _d;                                                         \
> -            _d.d[0]=(d1);                                                 \
> -            _d.d[1]=(d2);                                                 \
> -            _d.d[2]=(d3);                                                 \
> -            _d.d[3]=(d4);                                                 \
> -            _d.d[4]=(d5);                                                 \
> -            _d.d[5]=(d6);                                                 \
> +            uint32_t _d[] = { __VA_ARGS__ };                              \
>              __trace_var(TRC_HVM_ ## evt | (modifier), cycles,             \
> -                        sizeof(*_d.d) * count, &_d);                      \
> +                        sizeof(_d), sizeof(_d) ? _d : NULL);              \

... the addition of a conditional operator here (which I assume was
something a particular compiler didn't like in v1). FAOD - I'm fine
with the change, but I fear I'm overlooking something (again).

Jan
Andrew Cooper Sept. 21, 2021, 3:38 p.m. UTC | #2
On 21/09/2021 12:00, Jan Beulich wrote:
> On 20.09.2021 19:25, Andrew Cooper wrote:
>> v2:
>>  * Adjust callers of HVMTRACE_ND() too
> What does this refer to? The sole difference to v1 that I can spot
> is ...

Oh - its me who was confused.

I thought I had failed to include the changes in vmx.c/svm.c in v1.  In
which case, no change to that in v2.

>>  * Drop _d[] for the 0 case.
> ... the one corresponding to this line, i.e. ...
>
>> --- a/xen/include/asm-x86/hvm/trace.h
>> +++ b/xen/include/asm-x86/hvm/trace.h
>> @@ -67,38 +67,30 @@
>>  #define TRACE_2_LONG_4D(_e, d1, d2, d3, d4, ...) \
>>      TRACE_6D(_e, d1, d2, d3, d4)
>>  
>> -#define HVMTRACE_ND(evt, modifier, cycles, count, d1, d2, d3, d4, d5, d6) \
>> +#define HVMTRACE_ND(evt, modifier, cycles, ...)                           \
>>      do {                                                                  \
>>          if ( unlikely(tb_init_done) && DO_TRC_HVM_ ## evt )               \
>>          {                                                                 \
>> -            struct {                                                      \
>> -                u32 d[6];                                                 \
>> -            } _d;                                                         \
>> -            _d.d[0]=(d1);                                                 \
>> -            _d.d[1]=(d2);                                                 \
>> -            _d.d[2]=(d3);                                                 \
>> -            _d.d[3]=(d4);                                                 \
>> -            _d.d[4]=(d5);                                                 \
>> -            _d.d[5]=(d6);                                                 \
>> +            uint32_t _d[] = { __VA_ARGS__ };                              \
>>              __trace_var(TRC_HVM_ ## evt | (modifier), cycles,             \
>> -                        sizeof(*_d.d) * count, &_d);                      \
>> +                        sizeof(_d), sizeof(_d) ? _d : NULL);              \
> ... the addition of a conditional operator here (which I assume was
> something a particular compiler didn't like in v1).

And was covered in the commit message:

> The 0 case needs a little help.  All object in C must have a unique address
> and _d is passed by pointer.  Explicitly permit the optimiser to drop the
> array.



> FAOD - I'm fine
> with the change, but I fear I'm overlooking something (again).

Thanks,

~Andrew
Jan Beulich Sept. 21, 2021, 3:40 p.m. UTC | #3
On 21.09.2021 17:38, Andrew Cooper wrote:
> On 21/09/2021 12:00, Jan Beulich wrote:
>> On 20.09.2021 19:25, Andrew Cooper wrote:
>>> v2:
>>>  * Adjust callers of HVMTRACE_ND() too
>> What does this refer to? The sole difference to v1 that I can spot
>> is ...
> 
> Oh - its me who was confused.
> 
> I thought I had failed to include the changes in vmx.c/svm.c in v1.  In
> which case, no change to that in v2.

Good:
Reviewed-by: Jan Beulich <jbeulich@suse.com>

>>>  * Drop _d[] for the 0 case.
>> ... the one corresponding to this line, i.e. ...
>>
>>> --- a/xen/include/asm-x86/hvm/trace.h
>>> +++ b/xen/include/asm-x86/hvm/trace.h
>>> @@ -67,38 +67,30 @@
>>>  #define TRACE_2_LONG_4D(_e, d1, d2, d3, d4, ...) \
>>>      TRACE_6D(_e, d1, d2, d3, d4)
>>>  
>>> -#define HVMTRACE_ND(evt, modifier, cycles, count, d1, d2, d3, d4, d5, d6) \
>>> +#define HVMTRACE_ND(evt, modifier, cycles, ...)                           \
>>>      do {                                                                  \
>>>          if ( unlikely(tb_init_done) && DO_TRC_HVM_ ## evt )               \
>>>          {                                                                 \
>>> -            struct {                                                      \
>>> -                u32 d[6];                                                 \
>>> -            } _d;                                                         \
>>> -            _d.d[0]=(d1);                                                 \
>>> -            _d.d[1]=(d2);                                                 \
>>> -            _d.d[2]=(d3);                                                 \
>>> -            _d.d[3]=(d4);                                                 \
>>> -            _d.d[4]=(d5);                                                 \
>>> -            _d.d[5]=(d6);                                                 \
>>> +            uint32_t _d[] = { __VA_ARGS__ };                              \
>>>              __trace_var(TRC_HVM_ ## evt | (modifier), cycles,             \
>>> -                        sizeof(*_d.d) * count, &_d);                      \
>>> +                        sizeof(_d), sizeof(_d) ? _d : NULL);              \
>> ... the addition of a conditional operator here (which I assume was
>> something a particular compiler didn't like in v1).
> 
> And was covered in the commit message:
> 
>> The 0 case needs a little help.  All object in C must have a unique address
>> and _d is passed by pointer.  Explicitly permit the optimiser to drop the
>> array.

Right, I had associated text and change this way. It was really just
the revision log which was confusing.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index afb1ccb342c2..f0e10dec046e 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1052,7 +1052,7 @@  void svm_vmenter_helper(const struct cpu_user_regs *regs)
     if ( unlikely(tb_init_done) )
         HVMTRACE_ND(VMENTRY,
                     nestedhvm_vcpu_in_guestmode(curr) ? TRC_HVM_NESTEDFLAG : 0,
-                    1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
+                    1/*cycles*/);
 
     svm_sync_vmcb(curr, vmcb_needs_vmsave);
 
@@ -2565,12 +2565,10 @@  void svm_vmexit_handler(struct cpu_user_regs *regs)
 
     if ( hvm_long_mode_active(v) )
         HVMTRACE_ND(VMEXIT64, vcpu_guestmode ? TRC_HVM_NESTEDFLAG : 0,
-                    1/*cycles*/, 3, exit_reason,
-                    regs->eip, regs->rip >> 32, 0, 0, 0);
+                    1/*cycles*/, exit_reason, TRC_PAR_LONG(regs->rip));
     else
         HVMTRACE_ND(VMEXIT, vcpu_guestmode ? TRC_HVM_NESTEDFLAG : 0,
-                    1/*cycles*/, 2, exit_reason,
-                    regs->eip, 0, 0, 0, 0);
+                    1/*cycles*/, exit_reason, regs->eip);
 
     if ( vcpu_guestmode )
     {
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index b0a42d05f86a..d403e2d8060a 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3864,11 +3864,10 @@  void vmx_vmexit_handler(struct cpu_user_regs *regs)
     __vmread(VM_EXIT_REASON, &exit_reason);
 
     if ( hvm_long_mode_active(v) )
-        HVMTRACE_ND(VMEXIT64, 0, 1/*cycles*/, 3, exit_reason,
-                    regs->eip, regs->rip >> 32, 0, 0, 0);
+        HVMTRACE_ND(VMEXIT64, 0, 1/*cycles*/, exit_reason,
+                    TRC_PAR_LONG(regs->rip));
     else
-        HVMTRACE_ND(VMEXIT, 0, 1/*cycles*/, 2, exit_reason,
-                    regs->eip, 0, 0, 0, 0);
+        HVMTRACE_ND(VMEXIT, 0, 1/*cycles*/, exit_reason, regs->eip);
 
     perfc_incra(vmexits, exit_reason);
 
@@ -4645,7 +4644,7 @@  bool vmx_vmenter_helper(const struct cpu_user_regs *regs)
     if ( unlikely(curr->arch.hvm.vmx.lbr_flags & LBR_FIXUP_MASK) )
         lbr_fixup();
 
-    HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
+    HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/);
 
     __vmwrite(GUEST_RIP,    regs->rip);
     __vmwrite(GUEST_RSP,    regs->rsp);
diff --git a/xen/include/asm-x86/hvm/trace.h b/xen/include/asm-x86/hvm/trace.h
index 5cd459b855b7..145b59f6ac65 100644
--- a/xen/include/asm-x86/hvm/trace.h
+++ b/xen/include/asm-x86/hvm/trace.h
@@ -67,38 +67,30 @@ 
 #define TRACE_2_LONG_4D(_e, d1, d2, d3, d4, ...) \
     TRACE_6D(_e, d1, d2, d3, d4)
 
-#define HVMTRACE_ND(evt, modifier, cycles, count, d1, d2, d3, d4, d5, d6) \
+#define HVMTRACE_ND(evt, modifier, cycles, ...)                           \
     do {                                                                  \
         if ( unlikely(tb_init_done) && DO_TRC_HVM_ ## evt )               \
         {                                                                 \
-            struct {                                                      \
-                u32 d[6];                                                 \
-            } _d;                                                         \
-            _d.d[0]=(d1);                                                 \
-            _d.d[1]=(d2);                                                 \
-            _d.d[2]=(d3);                                                 \
-            _d.d[3]=(d4);                                                 \
-            _d.d[4]=(d5);                                                 \
-            _d.d[5]=(d6);                                                 \
+            uint32_t _d[] = { __VA_ARGS__ };                              \
             __trace_var(TRC_HVM_ ## evt | (modifier), cycles,             \
-                        sizeof(*_d.d) * count, &_d);                      \
+                        sizeof(_d), sizeof(_d) ? _d : NULL);              \
         }                                                                 \
     } while(0)
 
 #define HVMTRACE_6D(evt, d1, d2, d3, d4, d5, d6)    \
-    HVMTRACE_ND(evt, 0, 0, 6, d1, d2, d3, d4, d5, d6)
+    HVMTRACE_ND(evt, 0, 0, d1, d2, d3, d4, d5, d6)
 #define HVMTRACE_5D(evt, d1, d2, d3, d4, d5)        \
-    HVMTRACE_ND(evt, 0, 0, 5, d1, d2, d3, d4, d5,  0)
+    HVMTRACE_ND(evt, 0, 0, d1, d2, d3, d4, d5)
 #define HVMTRACE_4D(evt, d1, d2, d3, d4)            \
-    HVMTRACE_ND(evt, 0, 0, 4, d1, d2, d3, d4,  0,  0)
+    HVMTRACE_ND(evt, 0, 0, d1, d2, d3, d4)
 #define HVMTRACE_3D(evt, d1, d2, d3)                \
-    HVMTRACE_ND(evt, 0, 0, 3, d1, d2, d3,  0,  0,  0)
+    HVMTRACE_ND(evt, 0, 0, d1, d2, d3)
 #define HVMTRACE_2D(evt, d1, d2)                    \
-    HVMTRACE_ND(evt, 0, 0, 2, d1, d2,  0,  0,  0,  0)
+    HVMTRACE_ND(evt, 0, 0, d1, d2)
 #define HVMTRACE_1D(evt, d1)                        \
-    HVMTRACE_ND(evt, 0, 0, 1, d1,  0,  0,  0,  0,  0)
+    HVMTRACE_ND(evt, 0, 0, d1)
 #define HVMTRACE_0D(evt)                            \
-    HVMTRACE_ND(evt, 0, 0, 0,  0,  0,  0,  0,  0,  0)
+    HVMTRACE_ND(evt, 0, 0)
 
 #define HVMTRACE_LONG_1D(evt, d1)                  \
                    HVMTRACE_2D(evt ## 64, (d1) & 0xFFFFFFFF, (d1) >> 32)