diff mbox series

[5/8] xen/sched: use keyhandler locks when dumping data to console

Message ID 20200213125449.14226-6-jgross@suse.com (mailing list archive)
State New, archived
Headers show
Series xen: don't let keyhandlers block indefinitely on locks | expand

Commit Message

Jürgen Groß Feb. 13, 2020, 12:54 p.m. UTC
Instead of using the normal locks use the keyhandler provided trylocks
with timeouts. This requires a special primitive for the scheduler
lock.

Signed-off-by: Juergen Gross <jgross@suse.com>
---
 xen/common/sched/core.c    |  7 +++++++
 xen/common/sched/cpupool.c |  4 +++-
 xen/common/sched/credit.c  | 25 ++++++++++++++++++-------
 xen/common/sched/credit2.c | 17 +++++++++++------
 xen/common/sched/null.c    | 42 +++++++++++++++++++++++++-----------------
 xen/common/sched/private.h |  1 +
 xen/common/sched/rt.c      |  7 +++++--
 7 files changed, 70 insertions(+), 33 deletions(-)

Comments

Dario Faggioli Feb. 19, 2020, 2:31 p.m. UTC | #1
On Thu, 2020-02-13 at 13:54 +0100, Juergen Gross wrote:
> Instead of using the normal locks use the keyhandler provided
> trylocks
> with timeouts. This requires a special primitive for the scheduler
> lock.
> 
So, FWIW, I tend to agree with Andrew on the general aspects of this.
I.e., I personally don't think that the added complexity, however small
one may judge it to be, is worth it... I'm not even sure not using
regular locks is really an improvement.

When you mentioned, in your other mail, that having something like this
would have saved a lot of reboots during the development of core-
scheduling, would _just_ disabling the watchdog have achieved the same?

Anyway, I've had a look at this patch. _If_ we go with this new lock
thing, the modifications to the scheduler code done here seems fine to
me.

Regards
Jürgen Groß Feb. 19, 2020, 3:09 p.m. UTC | #2
On 19.02.20 15:31, Dario Faggioli wrote:
> On Thu, 2020-02-13 at 13:54 +0100, Juergen Gross wrote:
>> Instead of using the normal locks use the keyhandler provided
>> trylocks
>> with timeouts. This requires a special primitive for the scheduler
>> lock.
>>
> So, FWIW, I tend to agree with Andrew on the general aspects of this.
> I.e., I personally don't think that the added complexity, however small
> one may judge it to be, is worth it... I'm not even sure not using
> regular locks is really an improvement.
> 
> When you mentioned, in your other mail, that having something like this
> would have saved a lot of reboots during the development of core-
> scheduling, would _just_ disabling the watchdog have achieved the same?

No.

I was hit by the keyhandler just waiting for a lock which would never
be freed. This blocked printing information for cpus I would have
liked to see (chances were about 50% in my case that the "interesting"
cpu had a higher cpu number than the locked one).

> 
> Anyway, I've had a look at this patch. _If_ we go with this new lock
> thing, the modifications to the scheduler code done here seems fine to
> me.

Thanks.


Juergen
diff mbox series

Patch

diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c
index d4e8944e0e..7b8b0fe80e 100644
--- a/xen/common/sched/core.c
+++ b/xen/common/sched/core.c
@@ -21,6 +21,7 @@ 
 #include <xen/domain.h>
 #include <xen/delay.h>
 #include <xen/event.h>
+#include <xen/keyhandler.h>
 #include <xen/time.h>
 #include <xen/timer.h>
 #include <xen/perfc.h>
@@ -3302,6 +3303,12 @@  void __init sched_setup_dom0_vcpus(struct domain *d)
 }
 #endif
 
+spinlock_t *keyhandler_pcpu_lock(unsigned int cpu)
+{
+    keyhandler_lock_body(spinlock_t *, pcpu_schedule_trylock(cpu),
+                         "could not get pcpu lock, cpu=%u\n", cpu);
+}
+
 #ifdef CONFIG_COMPAT
 #include "compat.c"
 #endif
diff --git a/xen/common/sched/cpupool.c b/xen/common/sched/cpupool.c
index 476916c6ea..5c181e9772 100644
--- a/xen/common/sched/cpupool.c
+++ b/xen/common/sched/cpupool.c
@@ -893,7 +893,9 @@  void dump_runq(unsigned char key)
     s_time_t         now = NOW();
     struct cpupool **c;
 
-    spin_lock(&cpupool_lock);
+    if ( !keyhandler_spin_lock(&cpupool_lock, "could not get cpupools") )
+        return;
+
     local_irq_save(flags);
 
     printk("sched_smt_power_savings: %s\n",
diff --git a/xen/common/sched/credit.c b/xen/common/sched/credit.c
index dee87e7fe2..165ff26bb8 100644
--- a/xen/common/sched/credit.c
+++ b/xen/common/sched/credit.c
@@ -2057,8 +2057,15 @@  csched_dump_pcpu(const struct scheduler *ops, int cpu)
      * - we scan through the runqueue, so we need the proper runqueue
      *   lock (the one of the runqueue of this cpu).
      */
-    spin_lock(&prv->lock);
-    lock = pcpu_schedule_lock(cpu);
+    if ( !keyhandler_spin_lock(&prv->lock, "could not get credit data") )
+        return;
+
+    lock = keyhandler_pcpu_lock(cpu);
+    if ( !lock )
+    {
+        spin_unlock(&prv->lock);
+        return;
+    }
 
     spc = CSCHED_PCPU(cpu);
     runq = &spc->runq;
@@ -2098,7 +2105,8 @@  csched_dump(const struct scheduler *ops)
     struct csched_private *prv = CSCHED_PRIV(ops);
     int loop;
 
-    spin_lock(&prv->lock);
+    if ( !keyhandler_spin_lock(&prv->lock, "could not get credit data") )
+        return;
 
     printk("info:\n"
            "\tncpus              = %u\n"
@@ -2142,12 +2150,15 @@  csched_dump(const struct scheduler *ops)
             spinlock_t *lock;
 
             svc = list_entry(iter_svc, struct csched_unit, active_unit_elem);
-            lock = unit_schedule_lock(svc->unit);
+            lock = keyhandler_pcpu_lock(svc->unit->res->master_cpu);
 
-            printk("\t%3d: ", ++loop);
-            csched_dump_unit(svc);
+            if ( lock )
+            {
+                printk("\t%3d: ", ++loop);
+                csched_dump_unit(svc);
 
-            unit_schedule_unlock(lock, svc->unit);
+                pcpu_schedule_unlock(lock, svc->unit->res->master_cpu);
+            }
         }
     }
 
diff --git a/xen/common/sched/credit2.c b/xen/common/sched/credit2.c
index e76d2ed543..28b03fe744 100644
--- a/xen/common/sched/credit2.c
+++ b/xen/common/sched/credit2.c
@@ -3655,7 +3655,8 @@  csched2_dump(const struct scheduler *ops)
      * We need the private scheduler lock as we access global
      * scheduler data and (below) the list of active domains.
      */
-    read_lock(&prv->lock);
+    if ( !keyhandler_read_lock(&prv->lock, "could not get credit2 data") )
+        return;
 
     printk("Active queues: %d\n"
            "\tdefault-weight     = %d\n",
@@ -3711,12 +3712,15 @@  csched2_dump(const struct scheduler *ops)
             struct csched2_unit * const svc = csched2_unit(unit);
             spinlock_t *lock;
 
-            lock = unit_schedule_lock(unit);
+            lock = keyhandler_pcpu_lock(unit->res->master_cpu);
 
-            printk("\t%3d: ", ++loop);
-            csched2_dump_unit(prv, svc);
+            if ( lock )
+            {
+                printk("\t%3d: ", ++loop);
+                csched2_dump_unit(prv, svc);
 
-            unit_schedule_unlock(lock, unit);
+                pcpu_schedule_unlock(lock, unit->res->master_cpu);
+            }
         }
     }
 
@@ -3727,7 +3731,8 @@  csched2_dump(const struct scheduler *ops)
         int loop = 0;
 
         /* We need the lock to scan the runqueue. */
-        spin_lock(&rqd->lock);
+        if ( !keyhandler_spin_lock(&rqd->lock, "could not get runq") )
+            continue;
 
         printk("Runqueue %d:\n", i);
 
diff --git a/xen/common/sched/null.c b/xen/common/sched/null.c
index 3b31703d7e..fe59ce17fe 100644
--- a/xen/common/sched/null.c
+++ b/xen/common/sched/null.c
@@ -28,6 +28,7 @@ 
  * if the scheduler is used inside a cpupool.
  */
 
+#include <xen/keyhandler.h>
 #include <xen/sched.h>
 #include <xen/softirq.h>
 #include <xen/trace.h>
@@ -982,7 +983,8 @@  static void null_dump(const struct scheduler *ops)
     struct list_head *iter;
     unsigned int loop;
 
-    spin_lock(&prv->lock);
+    if ( !keyhandler_spin_lock(&prv->lock, "could not get null data") )
+        return;
 
     printk("\tcpus_free = %*pbl\n", CPUMASK_PR(&prv->cpus_free));
 
@@ -1001,31 +1003,37 @@  static void null_dump(const struct scheduler *ops)
             struct null_unit * const nvc = null_unit(unit);
             spinlock_t *lock;
 
-            lock = unit_schedule_lock(unit);
+            lock = keyhandler_pcpu_lock(unit->res->master_cpu);
 
-            printk("\t%3d: ", ++loop);
-            dump_unit(prv, nvc);
-            printk("\n");
+            if ( lock )
+            {
+                printk("\t%3d: ", ++loop);
+                dump_unit(prv, nvc);
+                printk("\n");
 
-            unit_schedule_unlock(lock, unit);
+                pcpu_schedule_unlock(lock, unit->res->master_cpu);
+            }
         }
     }
 
     printk("Waitqueue: ");
     loop = 0;
-    spin_lock(&prv->waitq_lock);
-    list_for_each( iter, &prv->waitq )
+    if ( keyhandler_spin_lock(&prv->waitq_lock, "could not get waitq") )
     {
-        struct null_unit *nvc = list_entry(iter, struct null_unit, waitq_elem);
-
-        if ( loop++ != 0 )
-            printk(", ");
-        if ( loop % 24 == 0 )
-            printk("\n\t");
-        printk("%pdv%d", nvc->unit->domain, nvc->unit->unit_id);
+        list_for_each( iter, &prv->waitq )
+        {
+            struct null_unit *nvc = list_entry(iter, struct null_unit,
+                                               waitq_elem);
+
+            if ( loop++ != 0 )
+                printk(", ");
+            if ( loop % 24 == 0 )
+                printk("\n\t");
+            printk("%pdv%d", nvc->unit->domain, nvc->unit->unit_id);
+        }
+        printk("\n");
+        spin_unlock(&prv->waitq_lock);
     }
-    printk("\n");
-    spin_unlock(&prv->waitq_lock);
 
     spin_unlock(&prv->lock);
 }
diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h
index 2a94179baa..6723f74d28 100644
--- a/xen/common/sched/private.h
+++ b/xen/common/sched/private.h
@@ -631,5 +631,6 @@  struct cpupool *cpupool_get_by_id(int poolid);
 void cpupool_put(struct cpupool *pool);
 int cpupool_add_domain(struct domain *d, int poolid);
 void cpupool_rm_domain(struct domain *d);
+spinlock_t *keyhandler_pcpu_lock(unsigned int cpu);
 
 #endif /* __XEN_SCHED_IF_H__ */
diff --git a/xen/common/sched/rt.c b/xen/common/sched/rt.c
index 16379cb2d2..d4b17e0f8b 100644
--- a/xen/common/sched/rt.c
+++ b/xen/common/sched/rt.c
@@ -354,7 +354,9 @@  rt_dump_pcpu(const struct scheduler *ops, int cpu)
     struct rt_private *prv = rt_priv(ops);
     const struct rt_unit *svc;
 
-    spin_lock(&prv->lock);
+    if ( !keyhandler_spin_lock(&prv->lock, "could not get rt data") )
+        return;
+
     printk("CPU[%02d]\n", cpu);
     /* current UNIT (nothing to say if that's the idle unit). */
     svc = rt_unit(curr_on_cpu(cpu));
@@ -373,7 +375,8 @@  rt_dump(const struct scheduler *ops)
     const struct rt_unit *svc;
     const struct rt_dom *sdom;
 
-    spin_lock(&prv->lock);
+    if ( !keyhandler_spin_lock(&prv->lock, "could not get rt data") )
+        return;
 
     if ( list_empty(&prv->sdom) )
         goto out;