diff mbox

[v3,0/2] Use blocked_lock_lock only to protect blocked_hash

Message ID 54FEA948.2040209@bmw-carit.de (mailing list archive)
State New, archived
Headers show

Commit Message

Daniel Wagner March 10, 2015, 8:20 a.m. UTC
Hi,

On 03/07/2015 03:00 PM, Jeff Layton wrote:
> On Fri,  6 Mar 2015 08:53:30 +0100
> Daniel Wagner <daniel.wagner@bmw-carit.de> wrote:
> 
>> Hi,
>>
>> Finally, I got a bigger machine and did a quick test round. I expected
>> to see some improvements but the resutls do not show any real gain. So
>> they are merely refactoring patches.
>>
> 
> Ok, in that case is there any point in merging these? I'm all for
> breaking up global locks when it makes sense, but if you can't
> demonstrate a clear benefit then I'm less inclined to take the churn.
> 
> Perhaps we should wait to see if a benefit emerges when/if you convert
> the lglock code to use normal spinlocks (like Andi suggested)? That
> seems like a rather simple conversion, and I don't think it's
> "cheating" in any sense of the word.
> 
> I do however wonder why Nick used arch_spinlock_t there when he wrote
> the lglock code instead of normal spinlocks. Was it simply memory usage
> considerations or something else?

I did a complete test run with 4.0.0-rc3, the two patches from this
thread (fs-locks-v10), the spinlock_t conversion (lglock-v0)
and fs-locks-v10 and lglock-v0 combined. Some of the test take rather
long on my machine (12 minutes per run) so I tweaked it a bit to get
more samples. Each test was run 100 times.

The raw data and scripts are here: http://www.monom.org/lglock/data/

flock01
                             mean   variance      sigma        max        min
                4.0.0-rc3  8930.8708 282098.1663   531.1291  9612.7085  4681.8674
             fs-locks-v10  9081.6789 43493.0287   208.5498  9747.8491  8072.6508
                lglock-v0  9004.9252 12339.3832   111.0828  9489.5420  8493.0763
   fs-locks-v10+lglock-v0  9053.6680 17714.5623   133.0961  9588.7413  8555.0727


flock02
                             mean   variance      sigma        max        min
                4.0.0-rc3   553.1720  1057.6026    32.5208   606.2989   479.5528
             fs-locks-v10   596.0683  1486.8345    38.5595   662.6566   512.4865
                lglock-v0   595.2150   976.8544    31.2547   646.7506   527.2517
   fs-locks-v10+lglock-v0   587.5492   989.9467    31.4634   646.2098   509.6020


lease01
                             mean   variance      sigma        max        min
                4.0.0-rc3   505.2654   780.7545    27.9420   564.2530   433.7727
             fs-locks-v10   523.6855   705.2606    26.5567   570.3401   442.6539
                lglock-v0   516.7525  1026.7596    32.0431   573.8766   433.4124
   fs-locks-v10+lglock-v0   513.6507   751.1674    27.4074   567.0972   435.6154


lease02
                             mean   variance      sigma        max        min
                4.0.0-rc3  3588.2069 26736.0422   163.5116  3769.7430  3154.8405
             fs-locks-v10  3566.0658 34225.6410   185.0017  3747.6039  3188.5478
                lglock-v0  3585.0648 28720.1679   169.4703  3758.7240  3150.9310
   fs-locks-v10+lglock-v0  3621.9347 17706.2354   133.0648  3744.0095  3174.0998


posix01
                             mean   variance      sigma        max        min
                4.0.0-rc3  9297.5030 26911.6602   164.0477  9941.8094  8963.3528
             fs-locks-v10  9462.8665 44762.9316   211.5725 10504.3205  9202.5748
                lglock-v0  9320.4716 47168.9903   217.1842 10401.6565  9054.1950
   fs-locks-v10+lglock-v0  9458.1463 58231.8844   241.3128 10564.2086  9193.1177


posix02
                             mean   variance      sigma        max        min
                4.0.0-rc3   920.6533  2648.1293    51.4600   983.4213   790.1792
             fs-locks-v10   915.3972  4384.6821    66.2169  1004.2339   795.4129
                lglock-v0   888.1910  4644.0478    68.1473   983.8412   777.4851
   fs-locks-v10+lglock-v0   926.4184  1834.6481    42.8328   975.8544   794.4582


posix03
                             mean   variance      sigma        max        min
                4.0.0-rc3     7.5202     0.0456     0.2136     7.9697     6.9803
             fs-locks-v10     7.5203     0.0640     0.2529     7.9619     7.0063
                lglock-v0     7.4738     0.0349     0.1867     7.8011     7.0973
   fs-locks-v10+lglock-v0     7.5856     0.0595     0.2439     8.1098     6.8695


posix04
                             mean   variance      sigma        max        min
                4.0.0-rc3     0.6699     0.1091     0.3303     3.3845     0.5247
             fs-locks-v10     0.6320     0.0026     0.0511     0.9064     0.5195
                lglock-v0     0.6460     0.0039     0.0623     1.0830     0.5438
   fs-locks-v10+lglock-v0     0.6589     0.0338     0.1838     2.0346     0.5393


Hmm, not really convincing numbers. I hoped to see scaling effects but nope, no fun.

cheers,
daniel



The spinlock_t conversion patch (lglock-v0) I used:



--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Jeff Layton March 14, 2015, 12:40 p.m. UTC | #1
On Tue, 10 Mar 2015 09:20:24 +0100
Daniel Wagner <daniel.wagner@bmw-carit.de> wrote:

> Hi,
> 
> On 03/07/2015 03:00 PM, Jeff Layton wrote:
> > On Fri,  6 Mar 2015 08:53:30 +0100
> > Daniel Wagner <daniel.wagner@bmw-carit.de> wrote:
> > 
> >> Hi,
> >>
> >> Finally, I got a bigger machine and did a quick test round. I expected
> >> to see some improvements but the resutls do not show any real gain. So
> >> they are merely refactoring patches.
> >>
> > 
> > Ok, in that case is there any point in merging these? I'm all for
> > breaking up global locks when it makes sense, but if you can't
> > demonstrate a clear benefit then I'm less inclined to take the churn.
> > 
> > Perhaps we should wait to see if a benefit emerges when/if you convert
> > the lglock code to use normal spinlocks (like Andi suggested)? That
> > seems like a rather simple conversion, and I don't think it's
> > "cheating" in any sense of the word.
> > 
> > I do however wonder why Nick used arch_spinlock_t there when he wrote
> > the lglock code instead of normal spinlocks. Was it simply memory usage
> > considerations or something else?
> 
> I did a complete test run with 4.0.0-rc3, the two patches from this
> thread (fs-locks-v10), the spinlock_t conversion (lglock-v0)
> and fs-locks-v10 and lglock-v0 combined. Some of the test take rather
> long on my machine (12 minutes per run) so I tweaked it a bit to get
> more samples. Each test was run 100 times.
> 
> The raw data and scripts are here: http://www.monom.org/lglock/data/
> 
> flock01
>                              mean   variance      sigma        max        min
>                 4.0.0-rc3  8930.8708 282098.1663   531.1291  9612.7085  4681.8674
>              fs-locks-v10  9081.6789 43493.0287   208.5498  9747.8491  8072.6508
>                 lglock-v0  9004.9252 12339.3832   111.0828  9489.5420  8493.0763
>    fs-locks-v10+lglock-v0  9053.6680 17714.5623   133.0961  9588.7413  8555.0727
> 
> 
> flock02
>                              mean   variance      sigma        max        min
>                 4.0.0-rc3   553.1720  1057.6026    32.5208   606.2989   479.5528
>              fs-locks-v10   596.0683  1486.8345    38.5595   662.6566   512.4865
>                 lglock-v0   595.2150   976.8544    31.2547   646.7506   527.2517
>    fs-locks-v10+lglock-v0   587.5492   989.9467    31.4634   646.2098   509.6020
> 
> 
> lease01
>                              mean   variance      sigma        max        min
>                 4.0.0-rc3   505.2654   780.7545    27.9420   564.2530   433.7727
>              fs-locks-v10   523.6855   705.2606    26.5567   570.3401   442.6539
>                 lglock-v0   516.7525  1026.7596    32.0431   573.8766   433.4124
>    fs-locks-v10+lglock-v0   513.6507   751.1674    27.4074   567.0972   435.6154
> 
> 
> lease02
>                              mean   variance      sigma        max        min
>                 4.0.0-rc3  3588.2069 26736.0422   163.5116  3769.7430  3154.8405
>              fs-locks-v10  3566.0658 34225.6410   185.0017  3747.6039  3188.5478
>                 lglock-v0  3585.0648 28720.1679   169.4703  3758.7240  3150.9310
>    fs-locks-v10+lglock-v0  3621.9347 17706.2354   133.0648  3744.0095  3174.0998
> 
> 
> posix01
>                              mean   variance      sigma        max        min
>                 4.0.0-rc3  9297.5030 26911.6602   164.0477  9941.8094  8963.3528
>              fs-locks-v10  9462.8665 44762.9316   211.5725 10504.3205  9202.5748
>                 lglock-v0  9320.4716 47168.9903   217.1842 10401.6565  9054.1950
>    fs-locks-v10+lglock-v0  9458.1463 58231.8844   241.3128 10564.2086  9193.1177
> 
> 
> posix02
>                              mean   variance      sigma        max        min
>                 4.0.0-rc3   920.6533  2648.1293    51.4600   983.4213   790.1792
>              fs-locks-v10   915.3972  4384.6821    66.2169  1004.2339   795.4129
>                 lglock-v0   888.1910  4644.0478    68.1473   983.8412   777.4851
>    fs-locks-v10+lglock-v0   926.4184  1834.6481    42.8328   975.8544   794.4582
> 
> 
> posix03
>                              mean   variance      sigma        max        min
>                 4.0.0-rc3     7.5202     0.0456     0.2136     7.9697     6.9803
>              fs-locks-v10     7.5203     0.0640     0.2529     7.9619     7.0063
>                 lglock-v0     7.4738     0.0349     0.1867     7.8011     7.0973
>    fs-locks-v10+lglock-v0     7.5856     0.0595     0.2439     8.1098     6.8695
> 
> 
> posix04
>                              mean   variance      sigma        max        min
>                 4.0.0-rc3     0.6699     0.1091     0.3303     3.3845     0.5247
>              fs-locks-v10     0.6320     0.0026     0.0511     0.9064     0.5195
>                 lglock-v0     0.6460     0.0039     0.0623     1.0830     0.5438
>    fs-locks-v10+lglock-v0     0.6589     0.0338     0.1838     2.0346     0.5393
> 
> 
> Hmm, not really convincing numbers. I hoped to see scaling effects but nope, no fun.
> 

Yeah...

That said, the lglock-v0 numbers do look a little better. Perhaps it
would make sense to go ahead and consider that change? It's not clear
to me why the lglock code uses arch_spinlock_t. Was it just the extra
memory usage or was there some other reason?

You had mentioned at one point that lglocks didn't play well with the
-rt kernels. What's the actual problem there?

> cheers,
> daniel
> 
> 
> 
> The spinlock_t conversion patch (lglock-v0) I used:
> 
> diff --git a/include/linux/lglock.h b/include/linux/lglock.h
> index 0081f00..ea97f74 100644
> --- a/include/linux/lglock.h
> +++ b/include/linux/lglock.h
> @@ -34,7 +34,7 @@
>  #endif
>  
>  struct lglock {
> -	arch_spinlock_t __percpu *lock;
> +	spinlock_t __percpu *lock;
>  #ifdef CONFIG_DEBUG_LOCK_ALLOC
>  	struct lock_class_key lock_key;
>  	struct lockdep_map    lock_dep_map;
> @@ -42,13 +42,13 @@ struct lglock {
>  };
>  
>  #define
> DEFINE_LGLOCK(name)						\
> -	static DEFINE_PER_CPU(arch_spinlock_t, name ##
> _lock)		\
> -	=
> __ARCH_SPIN_LOCK_UNLOCKED;					\
> +	static DEFINE_PER_CPU(spinlock_t, name ##
> _lock)		\
> +	= __SPIN_LOCK_UNLOCKED(name ##
> _lock);				\ struct lglock name = { .lock
> = &name ## _lock } 
>  #define
> DEFINE_STATIC_LGLOCK(name)					\
> -	static DEFINE_PER_CPU(arch_spinlock_t, name ##
> _lock)		\
> -	=
> __ARCH_SPIN_LOCK_UNLOCKED;					\
> +	static DEFINE_PER_CPU(spinlock_t, name ##
> _lock)		\
> +	= __SPIN_LOCK_UNLOCKED(name ##
> _lock);				\ static struct lglock name =
> { .lock = &name ## _lock } 
>  void lg_lock_init(struct lglock *lg, char *name);
> diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c
> index 86ae2ae..34077a7 100644
> --- a/kernel/locking/lglock.c
> +++ b/kernel/locking/lglock.c
> @@ -18,44 +18,44 @@ EXPORT_SYMBOL(lg_lock_init);
>  
>  void lg_local_lock(struct lglock *lg)
>  {
> -	arch_spinlock_t *lock;
> +	spinlock_t *lock;
>  
>  	preempt_disable();
>  	lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
>  	lock = this_cpu_ptr(lg->lock);
> -	arch_spin_lock(lock);
> +	spin_lock(lock);
>  }
>  EXPORT_SYMBOL(lg_local_lock);
>  
>  void lg_local_unlock(struct lglock *lg)
>  {
> -	arch_spinlock_t *lock;
> +	spinlock_t *lock;
>  
>  	lock_release(&lg->lock_dep_map, 1, _RET_IP_);
>  	lock = this_cpu_ptr(lg->lock);
> -	arch_spin_unlock(lock);
> +	spin_unlock(lock);
>  	preempt_enable();
>  }
>  EXPORT_SYMBOL(lg_local_unlock);
>  
>  void lg_local_lock_cpu(struct lglock *lg, int cpu)
>  {
> -	arch_spinlock_t *lock;
> +	spinlock_t *lock;
>  
>  	preempt_disable();
>  	lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
>  	lock = per_cpu_ptr(lg->lock, cpu);
> -	arch_spin_lock(lock);
> +	spin_lock(lock);
>  }
>  EXPORT_SYMBOL(lg_local_lock_cpu);
>  
>  void lg_local_unlock_cpu(struct lglock *lg, int cpu)
>  {
> -	arch_spinlock_t *lock;
> +	spinlock_t *lock;
>  
>  	lock_release(&lg->lock_dep_map, 1, _RET_IP_);
>  	lock = per_cpu_ptr(lg->lock, cpu);
> -	arch_spin_unlock(lock);
> +	spin_unlock(lock);
>  	preempt_enable();
>  }
>  EXPORT_SYMBOL(lg_local_unlock_cpu);
> @@ -67,9 +67,9 @@ void lg_global_lock(struct lglock *lg)
>  	preempt_disable();
>  	lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL,
> _RET_IP_); for_each_possible_cpu(i) {
> -		arch_spinlock_t *lock;
> +		spinlock_t *lock;
>  		lock = per_cpu_ptr(lg->lock, i);
> -		arch_spin_lock(lock);
> +		spin_lock(lock);
>  	}
>  }
>  EXPORT_SYMBOL(lg_global_lock);
> @@ -80,9 +80,9 @@ void lg_global_unlock(struct lglock *lg)
>  
>  	lock_release(&lg->lock_dep_map, 1, _RET_IP_);
>  	for_each_possible_cpu(i) {
> -		arch_spinlock_t *lock;
> +		spinlock_t *lock;
>  		lock = per_cpu_ptr(lg->lock, i);
> -		arch_spin_unlock(lock);
> +		spin_unlock(lock);
>  	}
>  	preempt_enable();
>  }
> 
>
Daniel Wagner March 26, 2015, 10:11 a.m. UTC | #2
Hi Jeff,

Sorry for the long delay. Was I week on holiday and the testing
took a bit longer than I expected.

On 03/14/2015 01:40 PM, Jeff Layton wrote:
> On Tue, 10 Mar 2015 09:20:24 +0100
> Daniel Wagner <daniel.wagner@bmw-carit.de> wrote:
>> On 03/07/2015 03:00 PM, Jeff Layton wrote:
>>> On Fri,  6 Mar 2015 08:53:30 +0100
>>> Daniel Wagner <daniel.wagner@bmw-carit.de> wrote:
>>>
>>>> Hi,
>>>>
>>>> Finally, I got a bigger machine and did a quick test round. I expected
>>>> to see some improvements but the resutls do not show any real gain. So
>>>> they are merely refactoring patches.
>>>>
>>>
>>> Ok, in that case is there any point in merging these? I'm all for
>>> breaking up global locks when it makes sense, but if you can't
>>> demonstrate a clear benefit then I'm less inclined to take the churn.
>>>
>>> Perhaps we should wait to see if a benefit emerges when/if you convert
>>> the lglock code to use normal spinlocks (like Andi suggested)? That
>>> seems like a rather simple conversion, and I don't think it's
>>> "cheating" in any sense of the word.
>>>
>>> I do however wonder why Nick used arch_spinlock_t there when he wrote
>>> the lglock code instead of normal spinlocks. Was it simply memory usage
>>> considerations or something else?
>>
>> I did a complete test run with 4.0.0-rc3, the two patches from this
>> thread (fs-locks-v10), the spinlock_t conversion (lglock-v0)
>> and fs-locks-v10 and lglock-v0 combined. Some of the test take rather
>> long on my machine (12 minutes per run) so I tweaked it a bit to get
>> more samples. Each test was run 100 times.
>>
>> The raw data and scripts are here: http://www.monom.org/lglock/data/
>>
>> flock01
>>                              mean   variance      sigma        max        min
>>                 4.0.0-rc3  8930.8708 282098.1663   531.1291  9612.7085  4681.8674
>>              fs-locks-v10  9081.6789 43493.0287   208.5498  9747.8491  8072.6508
>>                 lglock-v0  9004.9252 12339.3832   111.0828  9489.5420  8493.0763
>>    fs-locks-v10+lglock-v0  9053.6680 17714.5623   133.0961  9588.7413  8555.0727
>>
>>
>> flock02
>>                              mean   variance      sigma        max        min
>>                 4.0.0-rc3   553.1720  1057.6026    32.5208   606.2989   479.5528
>>              fs-locks-v10   596.0683  1486.8345    38.5595   662.6566   512.4865
>>                 lglock-v0   595.2150   976.8544    31.2547   646.7506   527.2517
>>    fs-locks-v10+lglock-v0   587.5492   989.9467    31.4634   646.2098   509.6020
>>
>>
>> lease01
>>                              mean   variance      sigma        max        min
>>                 4.0.0-rc3   505.2654   780.7545    27.9420   564.2530   433.7727
>>              fs-locks-v10   523.6855   705.2606    26.5567   570.3401   442.6539
>>                 lglock-v0   516.7525  1026.7596    32.0431   573.8766   433.4124
>>    fs-locks-v10+lglock-v0   513.6507   751.1674    27.4074   567.0972   435.6154
>>
>>
>> lease02
>>                              mean   variance      sigma        max        min
>>                 4.0.0-rc3  3588.2069 26736.0422   163.5116  3769.7430  3154.8405
>>              fs-locks-v10  3566.0658 34225.6410   185.0017  3747.6039  3188.5478
>>                 lglock-v0  3585.0648 28720.1679   169.4703  3758.7240  3150.9310
>>    fs-locks-v10+lglock-v0  3621.9347 17706.2354   133.0648  3744.0095  3174.0998
>>
>>
>> posix01
>>                              mean   variance      sigma        max        min
>>                 4.0.0-rc3  9297.5030 26911.6602   164.0477  9941.8094  8963.3528
>>              fs-locks-v10  9462.8665 44762.9316   211.5725 10504.3205  9202.5748
>>                 lglock-v0  9320.4716 47168.9903   217.1842 10401.6565  9054.1950
>>    fs-locks-v10+lglock-v0  9458.1463 58231.8844   241.3128 10564.2086  9193.1177
>>
>>
>> posix02
>>                              mean   variance      sigma        max        min
>>                 4.0.0-rc3   920.6533  2648.1293    51.4600   983.4213   790.1792
>>              fs-locks-v10   915.3972  4384.6821    66.2169  1004.2339   795.4129
>>                 lglock-v0   888.1910  4644.0478    68.1473   983.8412   777.4851
>>    fs-locks-v10+lglock-v0   926.4184  1834.6481    42.8328   975.8544   794.4582
>>
>>
>> posix03
>>                              mean   variance      sigma        max        min
>>                 4.0.0-rc3     7.5202     0.0456     0.2136     7.9697     6.9803
>>              fs-locks-v10     7.5203     0.0640     0.2529     7.9619     7.0063
>>                 lglock-v0     7.4738     0.0349     0.1867     7.8011     7.0973
>>    fs-locks-v10+lglock-v0     7.5856     0.0595     0.2439     8.1098     6.8695
>>
>>
>> posix04
>>                              mean   variance      sigma        max        min
>>                 4.0.0-rc3     0.6699     0.1091     0.3303     3.3845     0.5247
>>              fs-locks-v10     0.6320     0.0026     0.0511     0.9064     0.5195
>>                 lglock-v0     0.6460     0.0039     0.0623     1.0830     0.5438
>>    fs-locks-v10+lglock-v0     0.6589     0.0338     0.1838     2.0346     0.5393
>>
>>
>> Hmm, not really convincing numbers. I hoped to see scaling effects but nope, no fun.
>>
> 
> Yeah...
> 
> That said, the lglock-v0 numbers do look a little better. Perhaps it
> would make sense to go ahead and consider that change? It's not clear
> to me why the lglock code uses arch_spinlock_t. Was it just the extra
> memory usage or was there some other reason?

If my reading is correct, the main difference between spinlock_t 
and arch_spinlock_t is the avoidance of the trylock path:

 	spin_lock(&lock)
	  raw_spin_lock(&lock)
	    _raw_spin_lock(&lock)
	      __raw_spin_lock(&lock)

	static inline void __raw_spin_lock(raw_spinlock_t *lock)
	{
		preempt_disable();
		spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
		LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
	}

	static inline int do_raw_spin_trylock(raw_spinlock_t *lock)
	{
		return arch_spin_trylock(&(lock)->raw_lock);
	}
	
	static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock)
	{
		__acquire(lock);
		arch_spin_lock(&lock->raw_lock);
	}
	

So with using arch_spin_lock(&lock) lglock is short cutting the
path slightly.

The memory consumption is even less using spinlock_t:

4.0.0-rc5

   text    data     bss     dec     hex filename
  19941    2409    1088   23438    5b8e fs/locks.o
    822       0       0     822     336 kernel/locking/lglock.o

[    0.000000] setup_percpu: NR_CPUS:64 nr_cpumask_bits:64 nr_cpu_ids:64 nr_node_ids:4
[    0.000000] PERCPU: Embedded 32 pages/cpu @ffff881fbfc00000 s92888 r8192 d29992 u131072
[    0.000000] pcpu-alloc: s92888 r8192 d29992 u131072 alloc=1*2097152
[    0.000000] pcpu-alloc: [0] 00 04 08 12 16 20 24 28 32 36 40 44 48 52 56 60 
[    0.000000] pcpu-alloc: [1] 01 05 09 13 17 21 25 29 33 37 41 45 49 53 57 61 
[    0.000000] pcpu-alloc: [2] 02 06 10 14 18 22 26 30 34 38 42 46 50 54 58 62 
[    0.000000] pcpu-alloc: [3] 03 07 11 15 19 23 27 31 35 39 43 47 51 55 59 63 
[    0.000000] Built 4 zonelists in Node order, mobility grouping on.  Total pages: 132109066


4.0.0-rc5-lglock-v0

   text    data     bss     dec     hex filename
  19941    2409    1088   23438    5b8e fs/locks.o
    620       0       0     620     26c kernel/locking/lglock.o

[  +0.000000] setup_percpu: NR_CPUS:64 nr_cpumask_bits:64 nr_cpu_ids:64 nr_node_ids:4
[  +0.000000] PERCPU: Embedded 32 pages/cpu @ffff881fbfc00000 s92888 r8192 d29992 u131072
[  +0.000000] pcpu-alloc: s92888 r8192 d29992 u131072 alloc=1*2097152
[  +0.000000] pcpu-alloc: [0] 00 04 08 12 16 20 24 28 32 36 40 44 48 52 56 60 
[  +0.000000] pcpu-alloc: [1] 01 05 09 13 17 21 25 29 33 37 41 45 49 53 57 61 
[  +0.000000] pcpu-alloc: [2] 02 06 10 14 18 22 26 30 34 38 42 46 50 54 58 62 
[  +0.000000] pcpu-alloc: [3] 03 07 11 15 19 23 27 31 35 39 43 47 51 55 59 63 
[  +0.000000] Built 4 zonelists in Node order, mobility grouping on.  Total pages: 132109066


Legend: s: static size, r: reserved size, d: dynamic size, u: unit size


I did another round of measurements with different parameters and saw 
some unexpected things:

 - flock01: The number of child processes is close to the number
   of cores (eg. 128 processes on 64 cores) and the number of test
   iterations is relatively low (32 iterations). In this configuration
   the results are not stable:

	while true; do 
		rm -rf /tmp/a; flock01 -n 128 -l 32 /tmp/a; 
	done

	38.392769508
	1.054781151
	113.731122000
	66.362571593
	97.581588309
	0.015311589
	117.311633231
	0.015412247
	0.014909320
	0.015469361
	0.015481439
	38.779573512
	101.239880635
	0.822888216
	...

   I see this on 4.0.0-rc5 with or without the lglock-v0 patch.
   This looks like if we are lucky the children of flock01 get
   along without interfering and that results in low numbers.

   If they system is not idle (kernel build in background
   'make -j200') then the numbers get more consistent:

	0.034442009
	0.035964874
	0.026305154
	0.030738657
	0.024400840
	0.028685513
	0.025869458
	0.027475024
	0.023971313
	0.026113323
	0.033676295
	....

 - I also played with lockdep detection. With lglock-v0 applied
   some tests like flock02 and posix02 get considerable worse
   results. The difference between flock01 and flock02 is that
   the children of flock01 fight over one file lock versus
   the children of flock02 lock and unlock their own lock.
   My best guess is that the lockdep tracing is adding
   far more to the per child lock configuration. I didn't find
   any other explanation than this. Although I have to admit
   I can't find a good argument why this makes a difference
   between arch_spinlock_t and spinlock_t. 


With lockdep enabled:

flock01
                                     mean   variance      sigma        max        min
                     4.0.0-rc5   339.6094  2174.4070    46.6305   446.6720   219.1196
           4.0.0-rc5-lglock-v0   499.1753  8123.7092    90.1316   666.3474   315.5089


flock02
                                     mean   variance      sigma        max        min
                     4.0.0-rc5   201.2253    64.5758     8.0359   219.6059   179.1278
           4.0.0-rc5-lglock-v0   785.1701  1049.7690    32.4001   844.4298   715.6951


lease01
                                     mean   variance      sigma        max        min
                     4.0.0-rc5     8.6606     4.2222     2.0548    13.3409     4.2273
           4.0.0-rc5-lglock-v0    12.1898     3.5871     1.8940    16.5744     9.2004


lease02
                                     mean   variance      sigma        max        min
                     4.0.0-rc5    42.0945     1.2928     1.1370    44.8503    37.0932
           4.0.0-rc5-lglock-v0    38.5887     0.4077     0.6385    39.8308    36.3703


posix01
                                     mean   variance      sigma        max        min
                     4.0.0-rc5   407.8706  3005.7657    54.8249   581.1921   293.4723
           4.0.0-rc5-lglock-v0   613.6238  5604.3537    74.8622   781.7903   487.7466


posix02
                                     mean   variance      sigma        max        min
                     4.0.0-rc5   340.7774   186.4059    13.6531   365.8146   315.1692
           4.0.0-rc5-lglock-v0  1319.7676   726.9997    26.9629  1377.5981  1242.2350


posix03
                                     mean   variance      sigma        max        min
                     4.0.0-rc5     0.9615     0.0040     0.0629     1.1086     0.8280
           4.0.0-rc5-lglock-v0     1.2682     0.0009     0.0299     1.3415     1.1902


posix04
                                     mean   variance      sigma        max        min
                     4.0.0-rc5     0.0527     0.0003     0.0172     0.1156     0.0237
           4.0.0-rc5-lglock-v0     0.0365     0.0001     0.0101     0.0887     0.0249




Without lockdep:

                                     mean   variance      sigma        max        min
                     4.0.0-rc5   448.0287 15417.8359   124.1686   527.8083     0.0081
           4.0.0-rc5-lglock-v0   395.1646 28713.4347   169.4504   520.7507     0.0075


flock02
                                     mean   variance      sigma        max        min
                     4.0.0-rc5     6.9185     0.8830     0.9397    10.6138     4.9707
           4.0.0-rc5-lglock-v0     6.2474     0.9234     0.9610     9.5478     4.3703


lease01
                                     mean   variance      sigma        max        min
                     4.0.0-rc5     7.7040     0.3930     0.6269     9.1874     5.4179
           4.0.0-rc5-lglock-v0     7.6862     0.7794     0.8828     9.0623     1.3639


lease02
                                     mean   variance      sigma        max        min
                     4.0.0-rc5    16.3074     0.1418     0.3766    17.1600    15.0240
           4.0.0-rc5-lglock-v0    16.2698     0.2772     0.5265    17.2221    13.4127


posix01
                                     mean   variance      sigma        max        min
                     4.0.0-rc5   531.5151   181.3078    13.4651   596.5883   501.2940
           4.0.0-rc5-lglock-v0   531.3600   209.0023    14.4569   600.7317   507.1767


posix02
                                     mean   variance      sigma        max        min
                     4.0.0-rc5    13.8395     2.9768     1.7253    22.0783     9.9136
           4.0.0-rc5-lglock-v0    12.6822     3.1645     1.7789    18.1258     9.0030


posix03
                                     mean   variance      sigma        max        min
                     4.0.0-rc5     0.8939     0.0006     0.0242     0.9392     0.8360
           4.0.0-rc5-lglock-v0     0.9050     0.0006     0.0254     0.9617     0.8454


posix04
                                     mean   variance      sigma        max        min
                     4.0.0-rc5     0.0122     0.0000     0.0023     0.0227     0.0083
           4.0.0-rc5-lglock-v0     0.0115     0.0000     0.0016     0.0165     0.0091

> You had mentioned at one point that lglocks didn't play well with the
> -rt kernels. What's the actual problem there?

-rt kernels like to preempt everything possible. One mean to achieve
this is by exchanging normal spinlock_t with rt_mutex. arch_spinlock_t
does not get this treatment automatically via the lock framework. 
For this following patch is carried around:

https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit/?h=v3.14-rt-rebase&id=da1cbed0dcf6ab22a4b50b0c5606271067749aef

 struct lglock {
+#ifndef CONFIG_PREEMPT_RT_FULL
        arch_spinlock_t __percpu *lock;
+#else
+       struct rt_mutex __percpu *lock;
+#endif
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
        struct lock_class_key lock_key;
        struct lockdep_map    lock_dep_map;
 #endif
 };

[...]


I have modified version of the above patch ontop of of the lglock-v0
which drops all the ifdefing around arch_spinlock_t and rt_mutex. The
results are identical.

If there aren't any objection I send the lglock-v0 patch with a proper
commit message.

cheers,
daniel
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jeff Layton March 26, 2015, 1:17 p.m. UTC | #3
On Thu, 26 Mar 2015 11:11:19 +0100
Daniel Wagner <daniel.wagner@bmw-carit.de> wrote:

> Hi Jeff,
> 
> Sorry for the long delay. Was I week on holiday and the testing
> took a bit longer than I expected.
> 
> On 03/14/2015 01:40 PM, Jeff Layton wrote:
> > On Tue, 10 Mar 2015 09:20:24 +0100
> > Daniel Wagner <daniel.wagner@bmw-carit.de> wrote:
> >> On 03/07/2015 03:00 PM, Jeff Layton wrote:
> >>> On Fri,  6 Mar 2015 08:53:30 +0100
> >>> Daniel Wagner <daniel.wagner@bmw-carit.de> wrote:
> >>>
> >>>> Hi,
> >>>>
> >>>> Finally, I got a bigger machine and did a quick test round. I expected
> >>>> to see some improvements but the resutls do not show any real gain. So
> >>>> they are merely refactoring patches.
> >>>>
> >>>
> >>> Ok, in that case is there any point in merging these? I'm all for
> >>> breaking up global locks when it makes sense, but if you can't
> >>> demonstrate a clear benefit then I'm less inclined to take the churn.
> >>>
> >>> Perhaps we should wait to see if a benefit emerges when/if you convert
> >>> the lglock code to use normal spinlocks (like Andi suggested)? That
> >>> seems like a rather simple conversion, and I don't think it's
> >>> "cheating" in any sense of the word.
> >>>
> >>> I do however wonder why Nick used arch_spinlock_t there when he wrote
> >>> the lglock code instead of normal spinlocks. Was it simply memory usage
> >>> considerations or something else?
> >>
> >> I did a complete test run with 4.0.0-rc3, the two patches from this
> >> thread (fs-locks-v10), the spinlock_t conversion (lglock-v0)
> >> and fs-locks-v10 and lglock-v0 combined. Some of the test take rather
> >> long on my machine (12 minutes per run) so I tweaked it a bit to get
> >> more samples. Each test was run 100 times.
> >>
> >> The raw data and scripts are here: http://www.monom.org/lglock/data/
> >>
> >> flock01
> >>                              mean   variance      sigma        max        min
> >>                 4.0.0-rc3  8930.8708 282098.1663   531.1291  9612.7085  4681.8674
> >>              fs-locks-v10  9081.6789 43493.0287   208.5498  9747.8491  8072.6508
> >>                 lglock-v0  9004.9252 12339.3832   111.0828  9489.5420  8493.0763
> >>    fs-locks-v10+lglock-v0  9053.6680 17714.5623   133.0961  9588.7413  8555.0727
> >>
> >>
> >> flock02
> >>                              mean   variance      sigma        max        min
> >>                 4.0.0-rc3   553.1720  1057.6026    32.5208   606.2989   479.5528
> >>              fs-locks-v10   596.0683  1486.8345    38.5595   662.6566   512.4865
> >>                 lglock-v0   595.2150   976.8544    31.2547   646.7506   527.2517
> >>    fs-locks-v10+lglock-v0   587.5492   989.9467    31.4634   646.2098   509.6020
> >>
> >>
> >> lease01
> >>                              mean   variance      sigma        max        min
> >>                 4.0.0-rc3   505.2654   780.7545    27.9420   564.2530   433.7727
> >>              fs-locks-v10   523.6855   705.2606    26.5567   570.3401   442.6539
> >>                 lglock-v0   516.7525  1026.7596    32.0431   573.8766   433.4124
> >>    fs-locks-v10+lglock-v0   513.6507   751.1674    27.4074   567.0972   435.6154
> >>
> >>
> >> lease02
> >>                              mean   variance      sigma        max        min
> >>                 4.0.0-rc3  3588.2069 26736.0422   163.5116  3769.7430  3154.8405
> >>              fs-locks-v10  3566.0658 34225.6410   185.0017  3747.6039  3188.5478
> >>                 lglock-v0  3585.0648 28720.1679   169.4703  3758.7240  3150.9310
> >>    fs-locks-v10+lglock-v0  3621.9347 17706.2354   133.0648  3744.0095  3174.0998
> >>
> >>
> >> posix01
> >>                              mean   variance      sigma        max        min
> >>                 4.0.0-rc3  9297.5030 26911.6602   164.0477  9941.8094  8963.3528
> >>              fs-locks-v10  9462.8665 44762.9316   211.5725 10504.3205  9202.5748
> >>                 lglock-v0  9320.4716 47168.9903   217.1842 10401.6565  9054.1950
> >>    fs-locks-v10+lglock-v0  9458.1463 58231.8844   241.3128 10564.2086  9193.1177
> >>
> >>
> >> posix02
> >>                              mean   variance      sigma        max        min
> >>                 4.0.0-rc3   920.6533  2648.1293    51.4600   983.4213   790.1792
> >>              fs-locks-v10   915.3972  4384.6821    66.2169  1004.2339   795.4129
> >>                 lglock-v0   888.1910  4644.0478    68.1473   983.8412   777.4851
> >>    fs-locks-v10+lglock-v0   926.4184  1834.6481    42.8328   975.8544   794.4582
> >>
> >>
> >> posix03
> >>                              mean   variance      sigma        max        min
> >>                 4.0.0-rc3     7.5202     0.0456     0.2136     7.9697     6.9803
> >>              fs-locks-v10     7.5203     0.0640     0.2529     7.9619     7.0063
> >>                 lglock-v0     7.4738     0.0349     0.1867     7.8011     7.0973
> >>    fs-locks-v10+lglock-v0     7.5856     0.0595     0.2439     8.1098     6.8695
> >>
> >>
> >> posix04
> >>                              mean   variance      sigma        max        min
> >>                 4.0.0-rc3     0.6699     0.1091     0.3303     3.3845     0.5247
> >>              fs-locks-v10     0.6320     0.0026     0.0511     0.9064     0.5195
> >>                 lglock-v0     0.6460     0.0039     0.0623     1.0830     0.5438
> >>    fs-locks-v10+lglock-v0     0.6589     0.0338     0.1838     2.0346     0.5393
> >>
> >>
> >> Hmm, not really convincing numbers. I hoped to see scaling effects but nope, no fun.
> >>
> > 
> > Yeah...
> > 
> > That said, the lglock-v0 numbers do look a little better. Perhaps it
> > would make sense to go ahead and consider that change? It's not clear
> > to me why the lglock code uses arch_spinlock_t. Was it just the extra
> > memory usage or was there some other reason?
> 
> If my reading is correct, the main difference between spinlock_t 
> and arch_spinlock_t is the avoidance of the trylock path:
> 
>  	spin_lock(&lock)
> 	  raw_spin_lock(&lock)
> 	    _raw_spin_lock(&lock)
> 	      __raw_spin_lock(&lock)
> 
> 	static inline void __raw_spin_lock(raw_spinlock_t *lock)
> 	{
> 		preempt_disable();
> 		spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
> 		LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
> 	}
> 
> 	static inline int do_raw_spin_trylock(raw_spinlock_t *lock)
> 	{
> 		return arch_spin_trylock(&(lock)->raw_lock);
> 	}
> 	
> 	static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock)
> 	{
> 		__acquire(lock);
> 		arch_spin_lock(&lock->raw_lock);
> 	}
> 	
> 
> So with using arch_spin_lock(&lock) lglock is short cutting the
> path slightly.
> 
> The memory consumption is even less using spinlock_t:
> 
> 4.0.0-rc5
> 
>    text    data     bss     dec     hex filename
>   19941    2409    1088   23438    5b8e fs/locks.o
>     822       0       0     822     336 kernel/locking/lglock.o
> 
> [    0.000000] setup_percpu: NR_CPUS:64 nr_cpumask_bits:64 nr_cpu_ids:64 nr_node_ids:4
> [    0.000000] PERCPU: Embedded 32 pages/cpu @ffff881fbfc00000 s92888 r8192 d29992 u131072
> [    0.000000] pcpu-alloc: s92888 r8192 d29992 u131072 alloc=1*2097152
> [    0.000000] pcpu-alloc: [0] 00 04 08 12 16 20 24 28 32 36 40 44 48 52 56 60 
> [    0.000000] pcpu-alloc: [1] 01 05 09 13 17 21 25 29 33 37 41 45 49 53 57 61 
> [    0.000000] pcpu-alloc: [2] 02 06 10 14 18 22 26 30 34 38 42 46 50 54 58 62 
> [    0.000000] pcpu-alloc: [3] 03 07 11 15 19 23 27 31 35 39 43 47 51 55 59 63 
> [    0.000000] Built 4 zonelists in Node order, mobility grouping on.  Total pages: 132109066
> 
> 
> 4.0.0-rc5-lglock-v0
> 
>    text    data     bss     dec     hex filename
>   19941    2409    1088   23438    5b8e fs/locks.o
>     620       0       0     620     26c kernel/locking/lglock.o
> 
> [  +0.000000] setup_percpu: NR_CPUS:64 nr_cpumask_bits:64 nr_cpu_ids:64 nr_node_ids:4
> [  +0.000000] PERCPU: Embedded 32 pages/cpu @ffff881fbfc00000 s92888 r8192 d29992 u131072
> [  +0.000000] pcpu-alloc: s92888 r8192 d29992 u131072 alloc=1*2097152
> [  +0.000000] pcpu-alloc: [0] 00 04 08 12 16 20 24 28 32 36 40 44 48 52 56 60 
> [  +0.000000] pcpu-alloc: [1] 01 05 09 13 17 21 25 29 33 37 41 45 49 53 57 61 
> [  +0.000000] pcpu-alloc: [2] 02 06 10 14 18 22 26 30 34 38 42 46 50 54 58 62 
> [  +0.000000] pcpu-alloc: [3] 03 07 11 15 19 23 27 31 35 39 43 47 51 55 59 63 
> [  +0.000000] Built 4 zonelists in Node order, mobility grouping on.  Total pages: 132109066
> 
> 
> Legend: s: static size, r: reserved size, d: dynamic size, u: unit size
> 

Memory consumption here really shouldn't be much of a factor. These are
global objects after all, so we really aren't looking at much memory in
the big scheme of things.

> 
> I did another round of measurements with different parameters and saw 
> some unexpected things:
> 
>  - flock01: The number of child processes is close to the number
>    of cores (eg. 128 processes on 64 cores) and the number of test
>    iterations is relatively low (32 iterations). In this configuration
>    the results are not stable:
> 
> 	while true; do 
> 		rm -rf /tmp/a; flock01 -n 128 -l 32 /tmp/a; 
> 	done
> 
> 	38.392769508
> 	1.054781151
> 	113.731122000
> 	66.362571593
> 	97.581588309
> 	0.015311589
> 	117.311633231
> 	0.015412247
> 	0.014909320
> 	0.015469361
> 	0.015481439
> 	38.779573512
> 	101.239880635
> 	0.822888216
> 	...
> 
>    I see this on 4.0.0-rc5 with or without the lglock-v0 patch.
>    This looks like if we are lucky the children of flock01 get
>    along without interfering and that results in low numbers.
> 

Yep. That test is more or less at the mercy of the scheduler.

>    If they system is not idle (kernel build in background
>    'make -j200') then the numbers get more consistent:
> 
> 	0.034442009
> 	0.035964874
> 	0.026305154
> 	0.030738657
> 	0.024400840
> 	0.028685513
> 	0.025869458
> 	0.027475024
> 	0.023971313
> 	0.026113323
> 	0.033676295
> 	....
> 

Interesting.

>  - I also played with lockdep detection. With lglock-v0 applied
>    some tests like flock02 and posix02 get considerable worse
>    results. The difference between flock01 and flock02 is that
>    the children of flock01 fight over one file lock versus
>    the children of flock02 lock and unlock their own lock.
>    My best guess is that the lockdep tracing is adding
>    far more to the per child lock configuration. I didn't find
>    any other explanation than this. Although I have to admit
>    I can't find a good argument why this makes a difference
>    between arch_spinlock_t and spinlock_t. 
> 
> 
> With lockdep enabled:
> 
> flock01
>                                      mean   variance      sigma        max        min
>                      4.0.0-rc5   339.6094  2174.4070    46.6305   446.6720   219.1196
>            4.0.0-rc5-lglock-v0   499.1753  8123.7092    90.1316   666.3474   315.5089
> 
> 
> flock02
>                                      mean   variance      sigma        max        min
>                      4.0.0-rc5   201.2253    64.5758     8.0359   219.6059   179.1278
>            4.0.0-rc5-lglock-v0   785.1701  1049.7690    32.4001   844.4298   715.6951
> 
> 
> lease01
>                                      mean   variance      sigma        max        min
>                      4.0.0-rc5     8.6606     4.2222     2.0548    13.3409     4.2273
>            4.0.0-rc5-lglock-v0    12.1898     3.5871     1.8940    16.5744     9.2004
> 
> 
> lease02
>                                      mean   variance      sigma        max        min
>                      4.0.0-rc5    42.0945     1.2928     1.1370    44.8503    37.0932
>            4.0.0-rc5-lglock-v0    38.5887     0.4077     0.6385    39.8308    36.3703
> 
> 
> posix01
>                                      mean   variance      sigma        max        min
>                      4.0.0-rc5   407.8706  3005.7657    54.8249   581.1921   293.4723
>            4.0.0-rc5-lglock-v0   613.6238  5604.3537    74.8622   781.7903   487.7466
> 
> 
> posix02
>                                      mean   variance      sigma        max        min
>                      4.0.0-rc5   340.7774   186.4059    13.6531   365.8146   315.1692
>            4.0.0-rc5-lglock-v0  1319.7676   726.9997    26.9629  1377.5981  1242.2350
> 
> 
> posix03
>                                      mean   variance      sigma        max        min
>                      4.0.0-rc5     0.9615     0.0040     0.0629     1.1086     0.8280
>            4.0.0-rc5-lglock-v0     1.2682     0.0009     0.0299     1.3415     1.1902
> 
> 
> posix04
>                                      mean   variance      sigma        max        min
>                      4.0.0-rc5     0.0527     0.0003     0.0172     0.1156     0.0237
>            4.0.0-rc5-lglock-v0     0.0365     0.0001     0.0101     0.0887     0.0249
> 
> 
> 
> 
> Without lockdep:
> 
>                                      mean   variance      sigma        max        min
>                      4.0.0-rc5   448.0287 15417.8359   124.1686   527.8083     0.0081
>            4.0.0-rc5-lglock-v0   395.1646 28713.4347   169.4504   520.7507     0.0075
> 
> 
> flock02
>                                      mean   variance      sigma        max        min
>                      4.0.0-rc5     6.9185     0.8830     0.9397    10.6138     4.9707
>            4.0.0-rc5-lglock-v0     6.2474     0.9234     0.9610     9.5478     4.3703
> 
> 
> lease01
>                                      mean   variance      sigma        max        min
>                      4.0.0-rc5     7.7040     0.3930     0.6269     9.1874     5.4179
>            4.0.0-rc5-lglock-v0     7.6862     0.7794     0.8828     9.0623     1.3639
> 
> 
> lease02
>                                      mean   variance      sigma        max        min
>                      4.0.0-rc5    16.3074     0.1418     0.3766    17.1600    15.0240
>            4.0.0-rc5-lglock-v0    16.2698     0.2772     0.5265    17.2221    13.4127
> 
> 
> posix01
>                                      mean   variance      sigma        max        min
>                      4.0.0-rc5   531.5151   181.3078    13.4651   596.5883   501.2940
>            4.0.0-rc5-lglock-v0   531.3600   209.0023    14.4569   600.7317   507.1767
> 
> 
> posix02
>                                      mean   variance      sigma        max        min
>                      4.0.0-rc5    13.8395     2.9768     1.7253    22.0783     9.9136
>            4.0.0-rc5-lglock-v0    12.6822     3.1645     1.7789    18.1258     9.0030
> 
> 
> posix03
>                                      mean   variance      sigma        max        min
>                      4.0.0-rc5     0.8939     0.0006     0.0242     0.9392     0.8360
>            4.0.0-rc5-lglock-v0     0.9050     0.0006     0.0254     0.9617     0.8454
> 
> 
> posix04
>                                      mean   variance      sigma        max        min
>                      4.0.0-rc5     0.0122     0.0000     0.0023     0.0227     0.0083
>            4.0.0-rc5-lglock-v0     0.0115     0.0000     0.0016     0.0165     0.0091
> 

lockdep has overhead, and when you move from arch_spinlock_t to
"normal" spinlock_t's you end up with per-spinlock lockdep structures.
I wouldn't worry much about performance with lockdep enabled.

> > You had mentioned at one point that lglocks didn't play well with the
> > -rt kernels. What's the actual problem there?
> 
> -rt kernels like to preempt everything possible. One mean to achieve
> this is by exchanging normal spinlock_t with rt_mutex. arch_spinlock_t
> does not get this treatment automatically via the lock framework. 
> For this following patch is carried around:
> 
> https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit/?h=v3.14-rt-rebase&id=da1cbed0dcf6ab22a4b50b0c5606271067749aef
> 
>  struct lglock {
> +#ifndef CONFIG_PREEMPT_RT_FULL
>         arch_spinlock_t __percpu *lock;
> +#else
> +       struct rt_mutex __percpu *lock;
> +#endif
>  #ifdef CONFIG_DEBUG_LOCK_ALLOC
>         struct lock_class_key lock_key;
>         struct lockdep_map    lock_dep_map;
>  #endif
>  };
> 
> [...]
> 

Ok. Is that approach problematic in some way? I'm trying to understand
the exact problem that you're trying to solve for -rt with this effort.

> 
> I have modified version of the above patch ontop of of the lglock-v0
> which drops all the ifdefing around arch_spinlock_t and rt_mutex. The
> results are identical.
> 
> If there aren't any objection I send the lglock-v0 patch with a proper
> commit message.
> 

I'll be happy to take a look.
Daniel Wagner March 26, 2015, 1:55 p.m. UTC | #4
>>  - I also played with lockdep detection. With lglock-v0 applied
>>    some tests like flock02 and posix02 get considerable worse
>>    results. The difference between flock01 and flock02 is that
>>    the children of flock01 fight over one file lock versus
>>    the children of flock02 lock and unlock their own lock.
>>    My best guess is that the lockdep tracing is adding
>>    far more to the per child lock configuration. I didn't find
>>    any other explanation than this. Although I have to admit
>>    I can't find a good argument why this makes a difference
>>    between arch_spinlock_t and spinlock_t. 
>>

[...]

> lockdep has overhead, and when you move from arch_spinlock_t to
> "normal" spinlock_t's you end up with per-spinlock lockdep structures.
> I wouldn't worry much about performance with lockdep enabled.

That was the missing piece. Okay, that explains the performance degradation.

>>> You had mentioned at one point that lglocks didn't play well with the
>>> -rt kernels. What's the actual problem there?
>>
>> -rt kernels like to preempt everything possible. One mean to achieve
>> this is by exchanging normal spinlock_t with rt_mutex. arch_spinlock_t
>> does not get this treatment automatically via the lock framework. 
>> For this following patch is carried around:
>>
>> https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit/?h=v3.14-rt-rebase&id=da1cbed0dcf6ab22a4b50b0c5606271067749aef
>>
>>  struct lglock {
>> +#ifndef CONFIG_PREEMPT_RT_FULL
>>         arch_spinlock_t __percpu *lock;
>> +#else
>> +       struct rt_mutex __percpu *lock;
>> +#endif
>>  #ifdef CONFIG_DEBUG_LOCK_ALLOC
>>         struct lock_class_key lock_key;
>>         struct lockdep_map    lock_dep_map;
>>  #endif
>>  };
>>
>> [...]
>>
> 
> Ok. Is that approach problematic in some way?

I expect that mainline wont accept such a patch :). T

> I'm trying to understand the exact problem that you're
> trying to solve for -rt with this effort.

My aim is to rid of the -rt patches and mainline the features. This here
is just my small contribution to the whole -rt effort.

cheers,
daniel
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index 0081f00..ea97f74 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -34,7 +34,7 @@ 
 #endif
 
 struct lglock {
-	arch_spinlock_t __percpu *lock;
+	spinlock_t __percpu *lock;
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lock_class_key lock_key;
 	struct lockdep_map    lock_dep_map;
@@ -42,13 +42,13 @@  struct lglock {
 };
 
 #define DEFINE_LGLOCK(name)						\
-	static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock)		\
-	= __ARCH_SPIN_LOCK_UNLOCKED;					\
+	static DEFINE_PER_CPU(spinlock_t, name ## _lock)		\
+	= __SPIN_LOCK_UNLOCKED(name ## _lock);				\
 	struct lglock name = { .lock = &name ## _lock }
 
 #define DEFINE_STATIC_LGLOCK(name)					\
-	static DEFINE_PER_CPU(arch_spinlock_t, name ## _lock)		\
-	= __ARCH_SPIN_LOCK_UNLOCKED;					\
+	static DEFINE_PER_CPU(spinlock_t, name ## _lock)		\
+	= __SPIN_LOCK_UNLOCKED(name ## _lock);				\
 	static struct lglock name = { .lock = &name ## _lock }
 
 void lg_lock_init(struct lglock *lg, char *name);
diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c
index 86ae2ae..34077a7 100644
--- a/kernel/locking/lglock.c
+++ b/kernel/locking/lglock.c
@@ -18,44 +18,44 @@  EXPORT_SYMBOL(lg_lock_init);
 
 void lg_local_lock(struct lglock *lg)
 {
-	arch_spinlock_t *lock;
+	spinlock_t *lock;
 
 	preempt_disable();
 	lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
 	lock = this_cpu_ptr(lg->lock);
-	arch_spin_lock(lock);
+	spin_lock(lock);
 }
 EXPORT_SYMBOL(lg_local_lock);
 
 void lg_local_unlock(struct lglock *lg)
 {
-	arch_spinlock_t *lock;
+	spinlock_t *lock;
 
 	lock_release(&lg->lock_dep_map, 1, _RET_IP_);
 	lock = this_cpu_ptr(lg->lock);
-	arch_spin_unlock(lock);
+	spin_unlock(lock);
 	preempt_enable();
 }
 EXPORT_SYMBOL(lg_local_unlock);
 
 void lg_local_lock_cpu(struct lglock *lg, int cpu)
 {
-	arch_spinlock_t *lock;
+	spinlock_t *lock;
 
 	preempt_disable();
 	lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
 	lock = per_cpu_ptr(lg->lock, cpu);
-	arch_spin_lock(lock);
+	spin_lock(lock);
 }
 EXPORT_SYMBOL(lg_local_lock_cpu);
 
 void lg_local_unlock_cpu(struct lglock *lg, int cpu)
 {
-	arch_spinlock_t *lock;
+	spinlock_t *lock;
 
 	lock_release(&lg->lock_dep_map, 1, _RET_IP_);
 	lock = per_cpu_ptr(lg->lock, cpu);
-	arch_spin_unlock(lock);
+	spin_unlock(lock);
 	preempt_enable();
 }
 EXPORT_SYMBOL(lg_local_unlock_cpu);
@@ -67,9 +67,9 @@  void lg_global_lock(struct lglock *lg)
 	preempt_disable();
 	lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
 	for_each_possible_cpu(i) {
-		arch_spinlock_t *lock;
+		spinlock_t *lock;
 		lock = per_cpu_ptr(lg->lock, i);
-		arch_spin_lock(lock);
+		spin_lock(lock);
 	}
 }
 EXPORT_SYMBOL(lg_global_lock);
@@ -80,9 +80,9 @@  void lg_global_unlock(struct lglock *lg)
 
 	lock_release(&lg->lock_dep_map, 1, _RET_IP_);
 	for_each_possible_cpu(i) {
-		arch_spinlock_t *lock;
+		spinlock_t *lock;
 		lock = per_cpu_ptr(lg->lock, i);
-		arch_spin_unlock(lock);
+		spin_unlock(lock);
 	}
 	preempt_enable();
 }