diff mbox series

ring-buffer: Prevent inconsistent operation on cpu_buffer->resize_disabled

Message ID 20230408052226.25268-1-Tze-nan.Wu@mediatek.com (mailing list archive)
State Superseded
Headers show
Series ring-buffer: Prevent inconsistent operation on cpu_buffer->resize_disabled | expand

Commit Message

Tze-nan Wu April 8, 2023, 5:22 a.m. UTC
Sometimes, write to buffer_size_kb can be permanently failure if we change
the cpu_online_mask between two for_each_online_buffer_cpu loops in
function ring_buffer_reset_online_cpus.

The number of increasing and decreasing on cpu_buffer->resize_disable
may be inconsistent, leading the resize_disabled in some CPUs becoming
none zero after ring_buffer_reset_online_cpus return.

This issue can be reproduced by "echo 0 > trace" and hotplug cpu at the
same time. After reproducing succeess, we can find out the attempt to
write to buffer_size_kb node failure every time.

This patch prevent the inconsistent increasing and decreasing on
cpu_buffer->resize_disabled by copying the cpu_online_mask at the
beginning of the function.

But I wonder if there's any side-effect of this patch,
since the behavior changed, if we turn on a cpu between the two loops,
reset_disabled_cpu_buffer() of that cpu won't be run as before,
meaning the cpu_buffer on that cpu just awake will not be cleaned up.

Cc: stable@vger.kernel.org
Signed-off-by: Tze-nan Wu <Tze-nan.Wu@mediatek.com>
---
 kernel/trace/ring_buffer.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

Comments

kernel test robot April 8, 2023, 8:35 a.m. UTC | #1
Hi Tze-nan,

kernel test robot noticed the following build warnings:

[auto build test WARNING on linus/master]
[also build test WARNING on rostedt-trace/for-next v6.3-rc5 next-20230406]
[cannot apply to rostedt-trace/for-next-urgent]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Tze-nan-Wu/ring-buffer-Prevent-inconsistent-operation-on-cpu_buffer-resize_disabled/20230408-132502
patch link:    https://lore.kernel.org/r/20230408052226.25268-1-Tze-nan.Wu%40mediatek.com
patch subject: [PATCH] ring-buffer: Prevent inconsistent operation on cpu_buffer->resize_disabled
config: x86_64-randconfig-a002-20230403 (https://download.01.org/0day-ci/archive/20230408/202304081615.eiaqpbV8-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/d404bc0af0a4bde3aa20704642d69a78bdc154f8
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Tze-nan-Wu/ring-buffer-Prevent-inconsistent-operation-on-cpu_buffer-resize_disabled/20230408-132502
        git checkout d404bc0af0a4bde3aa20704642d69a78bdc154f8
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash kernel/trace/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202304081615.eiaqpbV8-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> kernel/trace/ring_buffer.c:5359:15: warning: variable 'reset_online_mask' is uninitialized when used here [-Wuninitialized]
           cpumask_copy(reset_online_mask, cpu_online_mask);
                        ^~~~~~~~~~~~~~~~~
   kernel/trace/ring_buffer.c:5353:33: note: initialize the variable 'reset_online_mask' to silence this warning
           cpumask_var_t reset_online_mask;
                                          ^
                                           = NULL
   1 warning generated.


vim +/reset_online_mask +5359 kernel/trace/ring_buffer.c

  5344	
  5345	/**
  5346	 * ring_buffer_reset_online_cpus - reset a ring buffer per CPU buffer
  5347	 * @buffer: The ring buffer to reset a per cpu buffer of
  5348	 * @cpu: The CPU buffer to be reset
  5349	 */
  5350	void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
  5351	{
  5352		struct ring_buffer_per_cpu *cpu_buffer;
  5353		cpumask_var_t reset_online_mask;
  5354		int cpu;
  5355	
  5356		/* prevent another thread from changing buffer sizes */
  5357		mutex_lock(&buffer->mutex);
  5358	
> 5359		cpumask_copy(reset_online_mask, cpu_online_mask);
  5360	
  5361		for_each_cpu_and(cpu, buffer->cpumask, reset_online_mask) {
  5362			cpu_buffer = buffer->buffers[cpu];
  5363	
  5364			atomic_inc(&cpu_buffer->resize_disabled);
  5365			atomic_inc(&cpu_buffer->record_disabled);
  5366		}
  5367	
  5368		/* Make sure all commits have finished */
  5369		synchronize_rcu();
  5370	
  5371		for_each_cpu_and(cpu, buffer->cpumask, reset_online_mask) {
  5372			cpu_buffer = buffer->buffers[cpu];
  5373	
  5374			reset_disabled_cpu_buffer(cpu_buffer);
  5375	
  5376			atomic_dec(&cpu_buffer->record_disabled);
  5377			atomic_dec(&cpu_buffer->resize_disabled);
  5378		}
  5379	
  5380		mutex_unlock(&buffer->mutex);
  5381	}
  5382
kernel test robot April 8, 2023, 12:20 p.m. UTC | #2
Hi Tze-nan,

kernel test robot noticed the following build warnings:

[auto build test WARNING on linus/master]
[cannot apply to rostedt-trace/for-next-urgent]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Tze-nan-Wu/ring-buffer-Prevent-inconsistent-operation-on-cpu_buffer-resize_disabled/20230408-132502
patch link:    https://lore.kernel.org/r/20230408052226.25268-1-Tze-nan.Wu%40mediatek.com
patch subject: [PATCH] ring-buffer: Prevent inconsistent operation on cpu_buffer->resize_disabled
config: s390-allyesconfig (https://download.01.org/0day-ci/archive/20230408/202304082051.Dp50upfS-lkp@intel.com/config)
compiler: s390-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/d404bc0af0a4bde3aa20704642d69a78bdc154f8
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Tze-nan-Wu/ring-buffer-Prevent-inconsistent-operation-on-cpu_buffer-resize_disabled/20230408-132502
        git checkout d404bc0af0a4bde3aa20704642d69a78bdc154f8
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=s390 olddefconfig
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=s390 SHELL=/bin/bash kernel/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>
| Link: https://lore.kernel.org/oe-kbuild-all/202304082051.Dp50upfS-lkp@intel.com/

All warnings (new ones prefixed by >>):

   kernel/trace/ring_buffer.c: In function 'ring_buffer_reset_online_cpus':
>> kernel/trace/ring_buffer.c:5359:9: warning: 'reset_online_mask' is used uninitialized [-Wuninitialized]
    5359 |         cpumask_copy(reset_online_mask, cpu_online_mask);
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   kernel/trace/ring_buffer.c:5353:23: note: 'reset_online_mask' was declared here
    5353 |         cpumask_var_t reset_online_mask;
         |                       ^~~~~~~~~~~~~~~~~


vim +/reset_online_mask +5359 kernel/trace/ring_buffer.c

  5344	
  5345	/**
  5346	 * ring_buffer_reset_online_cpus - reset a ring buffer per CPU buffer
  5347	 * @buffer: The ring buffer to reset a per cpu buffer of
  5348	 * @cpu: The CPU buffer to be reset
  5349	 */
  5350	void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
  5351	{
  5352		struct ring_buffer_per_cpu *cpu_buffer;
  5353		cpumask_var_t reset_online_mask;
  5354		int cpu;
  5355	
  5356		/* prevent another thread from changing buffer sizes */
  5357		mutex_lock(&buffer->mutex);
  5358	
> 5359		cpumask_copy(reset_online_mask, cpu_online_mask);
  5360	
  5361		for_each_cpu_and(cpu, buffer->cpumask, reset_online_mask) {
  5362			cpu_buffer = buffer->buffers[cpu];
  5363	
  5364			atomic_inc(&cpu_buffer->resize_disabled);
  5365			atomic_inc(&cpu_buffer->record_disabled);
  5366		}
  5367	
  5368		/* Make sure all commits have finished */
  5369		synchronize_rcu();
  5370	
  5371		for_each_cpu_and(cpu, buffer->cpumask, reset_online_mask) {
  5372			cpu_buffer = buffer->buffers[cpu];
  5373	
  5374			reset_disabled_cpu_buffer(cpu_buffer);
  5375	
  5376			atomic_dec(&cpu_buffer->record_disabled);
  5377			atomic_dec(&cpu_buffer->resize_disabled);
  5378		}
  5379	
  5380		mutex_unlock(&buffer->mutex);
  5381	}
  5382
diff mbox series

Patch

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 76a2d91eecad..468f46bba71e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -288,9 +288,6 @@  EXPORT_SYMBOL_GPL(ring_buffer_event_data);
 #define for_each_buffer_cpu(buffer, cpu)		\
 	for_each_cpu(cpu, buffer->cpumask)
 
-#define for_each_online_buffer_cpu(buffer, cpu)		\
-	for_each_cpu_and(cpu, buffer->cpumask, cpu_online_mask)
-
 #define TS_SHIFT	27
 #define TS_MASK		((1ULL << TS_SHIFT) - 1)
 #define TS_DELTA_TEST	(~TS_MASK)
@@ -5353,12 +5350,15 @@  EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
 void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
+	cpumask_var_t reset_online_mask;
 	int cpu;
 
 	/* prevent another thread from changing buffer sizes */
 	mutex_lock(&buffer->mutex);
 
-	for_each_online_buffer_cpu(buffer, cpu) {
+	cpumask_copy(reset_online_mask, cpu_online_mask);
+
+	for_each_cpu_and(cpu, buffer->cpumask, reset_online_mask) {
 		cpu_buffer = buffer->buffers[cpu];
 
 		atomic_inc(&cpu_buffer->resize_disabled);
@@ -5368,7 +5368,7 @@  void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
 	/* Make sure all commits have finished */
 	synchronize_rcu();
 
-	for_each_online_buffer_cpu(buffer, cpu) {
+	for_each_cpu_and(cpu, buffer->cpumask, reset_online_mask) {
 		cpu_buffer = buffer->buffers[cpu];
 
 		reset_disabled_cpu_buffer(cpu_buffer);