diff mbox series

[4/5] xen/wait: Use relative stack adjustments

Message ID 20220718071825.22113-5-andrew.cooper3@citrix.com (mailing list archive)
State New, archived
Headers show
Series xen/wait: Improvements | expand

Commit Message

Andrew Cooper July 18, 2022, 7:18 a.m. UTC
The waitqueue's esp field is overloaded.  It serves both as an indication that
the waitqueue is in use, and as a direction to check_wakeup_from_wait() as to
where to adjust the stack pointer to, but using an absolute pointer comes with
a cost if requiring the vCPU to wake up on the same pCPU it went to sleep on.

Instead, have the waitqueue just keep track of how much data is on wqv->stack.
This is no practical change in __prepare_to_wait() (it already calculated the
delta) but split the result out of the (also overloaded) %rsi output parameter
by using a separate register instead.

check_wakeup_from_wait() has a bit more work to do.  It now needs to calculate
the adjustment to %rsp rather than having the new %rsp provided as a
parameter.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
CC: Jan Beulich <JBeulich@suse.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Wei Liu <wl@xen.org>
---
 xen/common/wait.c | 44 ++++++++++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 16 deletions(-)

Comments

Jan Beulich July 18, 2022, 10:29 a.m. UTC | #1
On 18.07.2022 09:18, Andrew Cooper wrote:
> @@ -121,11 +121,11 @@ void wake_up_all(struct waitqueue_head *wq)
>  
>  static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
>  {
> -    struct cpu_info *cpu_info = get_cpu_info();
>      struct vcpu *curr = current;
>      unsigned long dummy;
> +    unsigned int used;
>  
> -    ASSERT(wqv->esp == 0);
> +    ASSERT(wqv->used == 0);

Minor: Use ! like you do further down?

> @@ -154,24 +154,25 @@ static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
>          "push %%rbx; push %%rbp; push %%r12;"
>          "push %%r13; push %%r14; push %%r15;"
>  
> -        "sub %%esp,%%ecx;"
> +        "sub %%esp, %%ecx;" /* ecx = delta to cpu_info */
>          "cmp %[sz], %%ecx;"
>          "ja .L_skip;"       /* Bail if >4k */

According to the inputs, %eax is still 0 when bailing here, so the
check below won't find "used > PAGE_SIZE". I further wonder why you
don't store directly into wqv->used, and go through %eax instead.

> -        "mov %%rsp,%%rsi;"
> +
> +        "mov %%ecx, %%eax;"
> +        "mov %%rsp, %%rsi;" /* Copy from the stack, into wqv->stack */
>  
>          /* check_wakeup_from_wait() longjmp()'s to this point. */
>          ".L_wq_resume: rep movsb;"
> -        "mov %%rsp,%%rsi;"
>  
>          ".L_skip:"
>          "pop %%r15; pop %%r14; pop %%r13;"
>          "pop %%r12; pop %%rbp; pop %%rbx;"
> -        : "=&S" (wqv->esp), "=&c" (dummy), "=&D" (dummy)
> -        : "0" (0), "1" (cpu_info), "2" (wqv->stack),
> +        : "=a" (used), "=D" (dummy),     "=c" (dummy),         "=&S" (dummy)

You can't validly drop & from =D and =c. If you want to stick to
going through %eax, I think that one wants & added as well and ...

> +        : "a" (0),     "D" (wqv->stack), "c" (get_cpu_info()),

... the (unused) input here dropped.

> @@ -220,14 +224,22 @@ void check_wakeup_from_wait(void)
>       * the rep movs in __prepare_to_wait(), it copies from wqv->stack over the
>       * active stack.
>       *
> +     * We are also bound by __prepare_to_wait()'s output constraints, so %eax
> +     * needs to be wqv->used.
> +     *
>       * All other GPRs are available for use; they're either restored from
>       * wqv->stack or explicitly clobbered.
>       */
> -    asm volatile ( "mov %%rdi, %%rsp;"
> +    asm volatile ( "sub %%esp, %k[var];" /* var = delta to cpu_info */
> +                   "neg %k[var];"
> +                   "add %%ecx, %k[var];" /* var = -delta + wqv->used */
> +
> +                   "sub %[var], %%rsp;"  /* Adjust %rsp down to make room */
> +                   "mov %%rsp, %%rdi;"   /* Copy from wqv->stack, into the stack */
>                     "jmp .L_wq_resume;"
> -                   :
> -                   : "S" (wqv->stack), "D" (wqv->esp),
> -                     "c" ((char *)get_cpu_info() - (char *)wqv->esp)
> +                   : "=D" (tmp), [var] "=&r" (tmp)
> +                   : "S" (wqv->stack), "c" (wqv->used), "a" (wqv->used),

If you want to stick to going through %eax, then I think you need to
make it an output here: "+a" (wqv->used), so it is clear that the
register is blocked from any other use throughout the asm(). Or you
could use "=&a" and copy %ecx into %eax inside the asm(). Both may
cause the compiler to emit dead code updating wqv->used right after
the asm(), so I think not going through %eax is the more desirable
approach (but I may well be overlooking a reason why directly
dealing with wqv->used in __prepare_to_wait() isn't an option).

Strictly speaking (in particular if [right now] there wasn't just a
branch after updating %rdi) you also again want "=&D" here.

Jan
Andrew Cooper July 18, 2022, 10:41 a.m. UTC | #2
On 18/07/2022 11:29, Jan Beulich wrote:
> On 18.07.2022 09:18, Andrew Cooper wrote:
>> -        "mov %%rsp,%%rsi;"
>> +
>> +        "mov %%ecx, %%eax;"
>> +        "mov %%rsp, %%rsi;" /* Copy from the stack, into wqv->stack */
>>  
>>          /* check_wakeup_from_wait() longjmp()'s to this point. */
>>          ".L_wq_resume: rep movsb;"
>> -        "mov %%rsp,%%rsi;"
>>  
>>          ".L_skip:"
>>          "pop %%r15; pop %%r14; pop %%r13;"
>>          "pop %%r12; pop %%rbp; pop %%rbx;"
>> -        : "=&S" (wqv->esp), "=&c" (dummy), "=&D" (dummy)
>> -        : "0" (0), "1" (cpu_info), "2" (wqv->stack),
>> +        : "=a" (used), "=D" (dummy),     "=c" (dummy),         "=&S" (dummy)
> You can't validly drop & from =D and =c.

On the contrary, GCC demands it.

& is only relevant, and only permitted, when there is not an explicit
input tied to the same register.

When there is an explicit input tied to the same register, of course it
can't be reused for another input before being used as an output.

https://godbolt.org/z/4vWP4PKc5

~Andrew
Jan Beulich July 18, 2022, 10:51 a.m. UTC | #3
On 18.07.2022 12:41, Andrew Cooper wrote:
> On 18/07/2022 11:29, Jan Beulich wrote:
>> On 18.07.2022 09:18, Andrew Cooper wrote:
>>> -        "mov %%rsp,%%rsi;"
>>> +
>>> +        "mov %%ecx, %%eax;"
>>> +        "mov %%rsp, %%rsi;" /* Copy from the stack, into wqv->stack */
>>>  
>>>          /* check_wakeup_from_wait() longjmp()'s to this point. */
>>>          ".L_wq_resume: rep movsb;"
>>> -        "mov %%rsp,%%rsi;"
>>>  
>>>          ".L_skip:"
>>>          "pop %%r15; pop %%r14; pop %%r13;"
>>>          "pop %%r12; pop %%rbp; pop %%rbx;"
>>> -        : "=&S" (wqv->esp), "=&c" (dummy), "=&D" (dummy)
>>> -        : "0" (0), "1" (cpu_info), "2" (wqv->stack),
>>> +        : "=a" (used), "=D" (dummy),     "=c" (dummy),         "=&S" (dummy)
>> You can't validly drop & from =D and =c.
> 
> On the contrary, GCC demands it.
> 
> & is only relevant, and only permitted, when there is not an explicit
> input tied to the same register.
> 
> When there is an explicit input tied to the same register, of course it
> can't be reused for another input before being used as an output.

Oh, sorry - I neglected to take into account this adding of inputs.

Jan
diff mbox series

Patch

diff --git a/xen/common/wait.c b/xen/common/wait.c
index 4bc030d1a09d..4f1daf650bc4 100644
--- a/xen/common/wait.c
+++ b/xen/common/wait.c
@@ -32,8 +32,8 @@  struct waitqueue_vcpu {
      * Xen/x86 does not have per-vcpu hypervisor stacks. So we must save the
      * hypervisor context before sleeping (descheduling), setjmp/longjmp-style.
      */
-    void *esp;
     char *stack;
+    unsigned int used;
 #endif
 };
 
@@ -121,11 +121,11 @@  void wake_up_all(struct waitqueue_head *wq)
 
 static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
 {
-    struct cpu_info *cpu_info = get_cpu_info();
     struct vcpu *curr = current;
     unsigned long dummy;
+    unsigned int used;
 
-    ASSERT(wqv->esp == 0);
+    ASSERT(wqv->used == 0);
 
     /* Save current VCPU affinity; force wakeup on *this* CPU only. */
     if ( vcpu_temporary_affinity(curr, smp_processor_id(), VCPU_AFFINITY_WAIT) )
@@ -154,24 +154,25 @@  static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
         "push %%rbx; push %%rbp; push %%r12;"
         "push %%r13; push %%r14; push %%r15;"
 
-        "sub %%esp,%%ecx;"
+        "sub %%esp, %%ecx;" /* ecx = delta to cpu_info */
         "cmp %[sz], %%ecx;"
         "ja .L_skip;"       /* Bail if >4k */
-        "mov %%rsp,%%rsi;"
+
+        "mov %%ecx, %%eax;"
+        "mov %%rsp, %%rsi;" /* Copy from the stack, into wqv->stack */
 
         /* check_wakeup_from_wait() longjmp()'s to this point. */
         ".L_wq_resume: rep movsb;"
-        "mov %%rsp,%%rsi;"
 
         ".L_skip:"
         "pop %%r15; pop %%r14; pop %%r13;"
         "pop %%r12; pop %%rbp; pop %%rbx;"
-        : "=&S" (wqv->esp), "=&c" (dummy), "=&D" (dummy)
-        : "0" (0), "1" (cpu_info), "2" (wqv->stack),
+        : "=a" (used), "=D" (dummy),     "=c" (dummy),         "=&S" (dummy)
+        : "a" (0),     "D" (wqv->stack), "c" (get_cpu_info()),
           [sz] "i" (PAGE_SIZE)
-        : "memory", "rax", "rdx", "r8", "r9", "r10", "r11" );
+        : "memory", "rdx", "r8", "r9", "r10", "r11" );
 
-    if ( unlikely(wqv->esp == 0) )
+    if ( unlikely(used > PAGE_SIZE) )
     {
         gdprintk(XENLOG_ERR, "Stack too large in %s\n", __func__);
         domain_crash(curr->domain);
@@ -179,11 +180,13 @@  static void __prepare_to_wait(struct waitqueue_vcpu *wqv)
         for ( ; ; )
             do_softirq();
     }
+
+    wqv->used = used;
 }
 
 static void __finish_wait(struct waitqueue_vcpu *wqv)
 {
-    wqv->esp = NULL;
+    wqv->used = 0;
     vcpu_temporary_affinity(current, NR_CPUS, VCPU_AFFINITY_WAIT);
 }
 
@@ -191,10 +194,11 @@  void check_wakeup_from_wait(void)
 {
     struct vcpu *curr = current;
     struct waitqueue_vcpu *wqv = curr->waitqueue_vcpu;
+    unsigned long tmp;
 
     ASSERT(list_empty(&wqv->list));
 
-    if ( likely(wqv->esp == NULL) )
+    if ( likely(!wqv->used) )
         return;
 
     /* Check if we are still pinned. */
@@ -220,14 +224,22 @@  void check_wakeup_from_wait(void)
      * the rep movs in __prepare_to_wait(), it copies from wqv->stack over the
      * active stack.
      *
+     * We are also bound by __prepare_to_wait()'s output constraints, so %eax
+     * needs to be wqv->used.
+     *
      * All other GPRs are available for use; they're either restored from
      * wqv->stack or explicitly clobbered.
      */
-    asm volatile ( "mov %%rdi, %%rsp;"
+    asm volatile ( "sub %%esp, %k[var];" /* var = delta to cpu_info */
+                   "neg %k[var];"
+                   "add %%ecx, %k[var];" /* var = -delta + wqv->used */
+
+                   "sub %[var], %%rsp;"  /* Adjust %rsp down to make room */
+                   "mov %%rsp, %%rdi;"   /* Copy from wqv->stack, into the stack */
                    "jmp .L_wq_resume;"
-                   :
-                   : "S" (wqv->stack), "D" (wqv->esp),
-                     "c" ((char *)get_cpu_info() - (char *)wqv->esp)
+                   : "=D" (tmp), [var] "=&r" (tmp)
+                   : "S" (wqv->stack), "c" (wqv->used), "a" (wqv->used),
+                     "[var]" (get_cpu_info())
                    : "memory" );
     unreachable();
 }