diff mbox

[v2,6/6] x86/time: relax barriers

Message ID 57A2080202000078001023AA@prv-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Beulich Aug. 3, 2016, 1:04 p.m. UTC
On x86 there's no need for full barriers in loops waiting for some
memory location to change. Nor do we need full barriers between two
reads and two writes - SMP ones fully suffice (and I actually think
they could in fact be dropped, since atomic_*() operations should
already provide enough ordering).
x86/time: relax barriers

On x86 there's no need for full barriers in loops waiting for some
memory location to change. Nor do we need full barriers between two
reads and two writes - SMP ones fully suffice (and I actually think
they could in fact be dropped, since atomic_*() operations should
already provide enough ordering).

--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1206,7 +1206,7 @@ static void tsc_check_slave(void *unused
     unsigned int cpu = smp_processor_id();
     local_irq_disable();
     while ( !cpumask_test_cpu(cpu, &tsc_check_cpumask) )
-        mb();
+        cpu_relax();
     check_tsc_warp(cpu_khz, &tsc_max_warp);
     cpumask_clear_cpu(cpu, &tsc_check_cpumask);
     local_irq_enable();
@@ -1271,7 +1271,7 @@ static void time_calibration_tsc_rendezv
         if ( smp_processor_id() == 0 )
         {
             while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
-                mb();
+                cpu_relax();
 
             if ( r->master_stime == 0 )
             {
@@ -1284,21 +1284,21 @@ static void time_calibration_tsc_rendezv
                 write_tsc(r->master_tsc_stamp);
 
             while ( atomic_read(&r->semaphore) != (2*total_cpus - 1) )
-                mb();
+                cpu_relax();
             atomic_set(&r->semaphore, 0);
         }
         else
         {
             atomic_inc(&r->semaphore);
             while ( atomic_read(&r->semaphore) < total_cpus )
-                mb();
+                cpu_relax();
 
             if ( i == 0 )
                 write_tsc(r->master_tsc_stamp);
 
             atomic_inc(&r->semaphore);
             while ( atomic_read(&r->semaphore) > total_cpus )
-                mb();
+                cpu_relax();
         }
     }
 
@@ -1316,7 +1316,7 @@ static void time_calibration_std_rendezv
         while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
             cpu_relax();
         r->master_stime = read_platform_stime();
-        mb(); /* write r->master_stime /then/ signal */
+        smp_wmb(); /* write r->master_stime /then/ signal */
         atomic_inc(&r->semaphore);
     }
     else
@@ -1324,7 +1324,7 @@ static void time_calibration_std_rendezv
         atomic_inc(&r->semaphore);
         while ( atomic_read(&r->semaphore) != total_cpus )
             cpu_relax();
-        mb(); /* receive signal /then/ read r->master_stime */
+        smp_rmb(); /* receive signal /then/ read r->master_stime */
     }
 
     time_calibration_rendezvous_tail(r);

Comments

Andrew Cooper Aug. 3, 2016, 1:11 p.m. UTC | #1
On 03/08/16 14:04, Jan Beulich wrote:
> On x86 there's no need for full barriers in loops waiting for some
> memory location to change. Nor do we need full barriers between two
> reads and two writes - SMP ones fully suffice (and I actually think
> they could in fact be dropped, since atomic_*() operations should
> already provide enough ordering).

Missing a SoB,

Which "ones" are you referring to?  atomic_*() is only ordered with
respect to the atomic_t used.

Overall, I think the change is correct, so Reviewed-by: Andrew Cooper
<andrew.cooper3@citrix.com>

There are definitely more mis-uses of mandatory barriers in Xen,
although I haven't done a full audit yet.

~Andrew
Jan Beulich Aug. 3, 2016, 1:22 p.m. UTC | #2
>>> On 03.08.16 at 15:11, <andrew.cooper3@citrix.com> wrote:
> On 03/08/16 14:04, Jan Beulich wrote:
>> On x86 there's no need for full barriers in loops waiting for some
>> memory location to change. Nor do we need full barriers between two
>> reads and two writes - SMP ones fully suffice (and I actually think
>> they could in fact be dropped, since atomic_*() operations should
>> already provide enough ordering).
> 
> Missing a SoB,

Oops.

> Which "ones" are you referring to?  atomic_*() is only ordered with
> respect to the atomic_t used.

Oh, right - the one followed by atomic_inc() could be converted
to plain barrier() (but not dropped entirely), while the other one
follows an atomic_read() only, and hence can't be dropped. I'll
remove that part of the description.

> Overall, I think the change is correct, so Reviewed-by: Andrew Cooper
> <andrew.cooper3@citrix.com>

Thanks.

Jan
diff mbox

Patch

--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -1206,7 +1206,7 @@  static void tsc_check_slave(void *unused
     unsigned int cpu = smp_processor_id();
     local_irq_disable();
     while ( !cpumask_test_cpu(cpu, &tsc_check_cpumask) )
-        mb();
+        cpu_relax();
     check_tsc_warp(cpu_khz, &tsc_max_warp);
     cpumask_clear_cpu(cpu, &tsc_check_cpumask);
     local_irq_enable();
@@ -1271,7 +1271,7 @@  static void time_calibration_tsc_rendezv
         if ( smp_processor_id() == 0 )
         {
             while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
-                mb();
+                cpu_relax();
 
             if ( r->master_stime == 0 )
             {
@@ -1284,21 +1284,21 @@  static void time_calibration_tsc_rendezv
                 write_tsc(r->master_tsc_stamp);
 
             while ( atomic_read(&r->semaphore) != (2*total_cpus - 1) )
-                mb();
+                cpu_relax();
             atomic_set(&r->semaphore, 0);
         }
         else
         {
             atomic_inc(&r->semaphore);
             while ( atomic_read(&r->semaphore) < total_cpus )
-                mb();
+                cpu_relax();
 
             if ( i == 0 )
                 write_tsc(r->master_tsc_stamp);
 
             atomic_inc(&r->semaphore);
             while ( atomic_read(&r->semaphore) > total_cpus )
-                mb();
+                cpu_relax();
         }
     }
 
@@ -1316,7 +1316,7 @@  static void time_calibration_std_rendezv
         while ( atomic_read(&r->semaphore) != (total_cpus - 1) )
             cpu_relax();
         r->master_stime = read_platform_stime();
-        mb(); /* write r->master_stime /then/ signal */
+        smp_wmb(); /* write r->master_stime /then/ signal */
         atomic_inc(&r->semaphore);
     }
     else
@@ -1324,7 +1324,7 @@  static void time_calibration_std_rendezv
         atomic_inc(&r->semaphore);
         while ( atomic_read(&r->semaphore) != total_cpus )
             cpu_relax();
-        mb(); /* receive signal /then/ read r->master_stime */
+        smp_rmb(); /* receive signal /then/ read r->master_stime */
     }
 
     time_calibration_rendezvous_tail(r);